@openbmb/clawxrouter 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.example.json +204 -0
- package/index.ts +398 -0
- package/openclaw.plugin.json +97 -0
- package/package.json +48 -0
- package/prompts/detection-system.md +50 -0
- package/prompts/token-saver-judge.md +25 -0
- package/src/config-schema.ts +210 -0
- package/src/dashboard-config-io.ts +25 -0
- package/src/detector.ts +230 -0
- package/src/guard-agent.ts +86 -0
- package/src/hooks.ts +1428 -0
- package/src/live-config.ts +75 -0
- package/src/llm-desensitize-worker.ts +7 -0
- package/src/llm-detect-worker.ts +7 -0
- package/src/local-model.ts +723 -0
- package/src/memory-isolation.ts +403 -0
- package/src/privacy-proxy.ts +683 -0
- package/src/prompt-loader.ts +101 -0
- package/src/provider.ts +268 -0
- package/src/router-pipeline.ts +380 -0
- package/src/routers/configurable.ts +208 -0
- package/src/routers/privacy.ts +102 -0
- package/src/routers/token-saver.ts +273 -0
- package/src/rules.ts +320 -0
- package/src/session-manager.ts +377 -0
- package/src/session-state.ts +471 -0
- package/src/stats-dashboard.ts +3402 -0
- package/src/sync-desensitize.ts +48 -0
- package/src/sync-detect.ts +49 -0
- package/src/token-stats.ts +358 -0
- package/src/types.ts +269 -0
- package/src/utils.ts +283 -0
- package/src/worker-loader.mjs +25 -0
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import type { ClawXrouterRouter, DetectionContext, EdgeProviderType, RouterDecision } from "../types.js";
|
|
3
|
+
import { callChatCompletion } from "../local-model.js";
|
|
4
|
+
import { loadPrompt } from "../prompt-loader.js";
|
|
5
|
+
import { getGlobalCollector } from "../token-stats.js";
|
|
6
|
+
|
|
7
|
+
// ── Types ──
|
|
8
|
+
|
|
9
|
+
type TierTarget = {
|
|
10
|
+
provider: string;
|
|
11
|
+
model: string;
|
|
12
|
+
description?: string;
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
type TokenSaverConfig = {
|
|
16
|
+
enabled: boolean;
|
|
17
|
+
judgeEndpoint: string;
|
|
18
|
+
judgeModel: string;
|
|
19
|
+
judgeProviderType: EdgeProviderType;
|
|
20
|
+
judgeCustomModule?: string;
|
|
21
|
+
judgeApiKey?: string;
|
|
22
|
+
tiers: Record<string, TierTarget>;
|
|
23
|
+
defaultTier?: string;
|
|
24
|
+
rules?: string[];
|
|
25
|
+
cacheTtlMs: number;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const DEFAULT_CONFIG: TokenSaverConfig = {
|
|
29
|
+
enabled: false,
|
|
30
|
+
judgeEndpoint: "http://localhost:11434",
|
|
31
|
+
judgeModel: "openbmb/minicpm4.1",
|
|
32
|
+
judgeProviderType: "openai-compatible",
|
|
33
|
+
tiers: {
|
|
34
|
+
SIMPLE: { provider: "zhipu", model: "glm-4.5-air" },
|
|
35
|
+
MEDIUM: { provider: "minimax", model: "minimax-m2.5" },
|
|
36
|
+
COMPLEX: { provider: "deepseek", model: "deepseek-v3.2" },
|
|
37
|
+
RESEARCH: { provider: "zhipu", model: "glm-5" },
|
|
38
|
+
REASONING: { provider: "moonshot", model: "kimi-k2.5" },
|
|
39
|
+
},
|
|
40
|
+
cacheTtlMs: 300_000,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// ── Prompt generation ──
|
|
44
|
+
|
|
45
|
+
function generateJudgePrompt(tiers: Record<string, TierTarget>, rules?: string[]): string {
|
|
46
|
+
const tierNames = Object.keys(tiers);
|
|
47
|
+
|
|
48
|
+
const tierDefs = tierNames
|
|
49
|
+
.map((name) => {
|
|
50
|
+
const desc = tiers[name].description;
|
|
51
|
+
return desc ? `${name} = ${desc}` : name;
|
|
52
|
+
})
|
|
53
|
+
.join("\n");
|
|
54
|
+
|
|
55
|
+
const defaultRules = [
|
|
56
|
+
"When unsure, pick the LOWER tier (save tokens).",
|
|
57
|
+
"Short prompts (< 20 words) with no technical depth → the lowest tier.",
|
|
58
|
+
];
|
|
59
|
+
const allRules = [...defaultRules, ...(rules ?? [])];
|
|
60
|
+
const rulesBlock = allRules.map((r) => `- ${r}`).join("\n");
|
|
61
|
+
|
|
62
|
+
const tierList = tierNames.join("|");
|
|
63
|
+
|
|
64
|
+
return [
|
|
65
|
+
"You are a task complexity classifier. Classify the user's task into exactly one tier.",
|
|
66
|
+
"",
|
|
67
|
+
tierDefs,
|
|
68
|
+
"",
|
|
69
|
+
"Rules:",
|
|
70
|
+
rulesBlock,
|
|
71
|
+
"",
|
|
72
|
+
`CRITICAL: Output ONLY the raw JSON object. Do NOT wrap in markdown code blocks. Do NOT add any text before or after.`,
|
|
73
|
+
`{"tier":"${tierList}"}`,
|
|
74
|
+
].join("\n");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const FALLBACK_JUDGE_PROMPT = `You are a task complexity classifier. Output ONLY a JSON object: {"tier":"MEDIUM"}`;
|
|
78
|
+
|
|
79
|
+
// ── Cache ──
|
|
80
|
+
|
|
81
|
+
type CacheEntry = { tier: string; ts: number };
|
|
82
|
+
const classificationCache = new Map<string, CacheEntry>();
|
|
83
|
+
|
|
84
|
+
const CACHE_CLEANUP_INTERVAL_MS = 60_000;
|
|
85
|
+
const CACHE_MAX_AGE_MS = 600_000;
|
|
86
|
+
|
|
87
|
+
let cleanupTimer: ReturnType<typeof setInterval> | null = null;
|
|
88
|
+
|
|
89
|
+
function startCacheCleanup(): void {
|
|
90
|
+
if (cleanupTimer) return;
|
|
91
|
+
cleanupTimer = setInterval(() => {
|
|
92
|
+
const now = Date.now();
|
|
93
|
+
for (const [k, v] of classificationCache) {
|
|
94
|
+
if (now - v.ts > CACHE_MAX_AGE_MS) classificationCache.delete(k);
|
|
95
|
+
}
|
|
96
|
+
}, CACHE_CLEANUP_INTERVAL_MS);
|
|
97
|
+
if (cleanupTimer && typeof cleanupTimer === "object" && "unref" in cleanupTimer) {
|
|
98
|
+
(cleanupTimer as NodeJS.Timeout).unref();
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ── Helpers ──
|
|
103
|
+
|
|
104
|
+
function hashPrompt(prompt: string): string {
|
|
105
|
+
return createHash("sha256").update(prompt).digest("hex").slice(0, 16);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function parseTier(response: string, validTiers: Set<string>, defaultTier: string): string {
|
|
109
|
+
try {
|
|
110
|
+
const cleaned = response.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
|
|
111
|
+
const match = cleaned.match(/\{[\s\S]*?"tier"\s*:\s*"([A-Za-z_]+)"[\s\S]*?\}/);
|
|
112
|
+
if (match) {
|
|
113
|
+
const tier = match[1].toUpperCase();
|
|
114
|
+
if (validTiers.has(tier)) return tier;
|
|
115
|
+
}
|
|
116
|
+
} catch {
|
|
117
|
+
// parse failure
|
|
118
|
+
}
|
|
119
|
+
return defaultTier;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function buildDecision(tier: string, config: TokenSaverConfig): RouterDecision {
|
|
123
|
+
const target = config.tiers[tier];
|
|
124
|
+
if (!target) {
|
|
125
|
+
return { level: "S1", action: "passthrough", reason: `no model mapping for tier ${tier}` };
|
|
126
|
+
}
|
|
127
|
+
return {
|
|
128
|
+
level: "S1",
|
|
129
|
+
action: "redirect",
|
|
130
|
+
target: { provider: target.provider, model: target.model },
|
|
131
|
+
reason: `tier=${tier}`,
|
|
132
|
+
confidence: 0.8,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function resolveConfig(pluginConfig: Record<string, unknown>): TokenSaverConfig {
|
|
137
|
+
const routers = (pluginConfig?.privacy as Record<string, unknown>)?.routers as
|
|
138
|
+
| Record<string, { options?: Record<string, unknown>; enabled?: boolean }>
|
|
139
|
+
| undefined;
|
|
140
|
+
const tsConfig = routers?.["token-saver"];
|
|
141
|
+
const options = (tsConfig?.options ?? {}) as Record<string, unknown>;
|
|
142
|
+
|
|
143
|
+
const privacyLocalModel = (pluginConfig?.privacy as Record<string, unknown>)?.localModel as
|
|
144
|
+
| { endpoint?: string; model?: string; type?: EdgeProviderType; module?: string; apiKey?: string }
|
|
145
|
+
| undefined;
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
enabled: tsConfig?.enabled ?? DEFAULT_CONFIG.enabled,
|
|
149
|
+
judgeEndpoint:
|
|
150
|
+
(options.judgeEndpoint as string) ??
|
|
151
|
+
privacyLocalModel?.endpoint ??
|
|
152
|
+
DEFAULT_CONFIG.judgeEndpoint,
|
|
153
|
+
judgeModel:
|
|
154
|
+
(options.judgeModel as string) ??
|
|
155
|
+
privacyLocalModel?.model ??
|
|
156
|
+
DEFAULT_CONFIG.judgeModel,
|
|
157
|
+
judgeProviderType:
|
|
158
|
+
(options.judgeProviderType as EdgeProviderType) ??
|
|
159
|
+
privacyLocalModel?.type ??
|
|
160
|
+
DEFAULT_CONFIG.judgeProviderType,
|
|
161
|
+
judgeCustomModule:
|
|
162
|
+
(options.judgeCustomModule as string) ??
|
|
163
|
+
privacyLocalModel?.module,
|
|
164
|
+
judgeApiKey:
|
|
165
|
+
(options.judgeApiKey as string) ??
|
|
166
|
+
privacyLocalModel?.apiKey,
|
|
167
|
+
tiers: (options.tiers as Record<string, TierTarget>) ?? {},
|
|
168
|
+
defaultTier: (options.defaultTier as string) ?? undefined,
|
|
169
|
+
rules: (options.rules as string[]) ?? undefined,
|
|
170
|
+
cacheTtlMs: (options.cacheTtlMs as number) ?? DEFAULT_CONFIG.cacheTtlMs,
|
|
171
|
+
};
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function hasAnyDescription(tiers: Record<string, TierTarget>): boolean {
|
|
175
|
+
return Object.values(tiers).some((t) => t.description);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// ── Router ──
|
|
179
|
+
|
|
180
|
+
export const tokenSaverRouter: ClawXrouterRouter = {
|
|
181
|
+
id: "token-saver",
|
|
182
|
+
|
|
183
|
+
async detect(
|
|
184
|
+
context: DetectionContext,
|
|
185
|
+
pluginConfig: Record<string, unknown>,
|
|
186
|
+
): Promise<RouterDecision> {
|
|
187
|
+
const config = resolveConfig(pluginConfig);
|
|
188
|
+
if (!config.enabled && !context.dryRun) {
|
|
189
|
+
return { level: "S1", action: "passthrough" };
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const isSubagent = context.sessionKey?.includes(":subagent:") ?? false;
|
|
193
|
+
if (isSubagent) {
|
|
194
|
+
return { level: "S1", action: "passthrough", reason: "subagent — skipped" };
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const tierNames = Object.keys(config.tiers);
|
|
198
|
+
if (tierNames.length === 0) {
|
|
199
|
+
return { level: "S1", action: "passthrough", reason: "no tiers configured" };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const prompt = context.message ?? "";
|
|
203
|
+
if (!prompt.trim()) {
|
|
204
|
+
return { level: "S1", action: "passthrough" };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
startCacheCleanup();
|
|
208
|
+
const validTiers = new Set(tierNames);
|
|
209
|
+
const defaultTier = config.defaultTier && validTiers.has(config.defaultTier)
|
|
210
|
+
? config.defaultTier
|
|
211
|
+
: tierNames[Math.floor(tierNames.length / 2)] ?? "MEDIUM";
|
|
212
|
+
|
|
213
|
+
const cacheKey = hashPrompt(prompt);
|
|
214
|
+
const cached = classificationCache.get(cacheKey);
|
|
215
|
+
if (cached && Date.now() - cached.ts < config.cacheTtlMs) {
|
|
216
|
+
if (validTiers.has(cached.tier)) {
|
|
217
|
+
return buildDecision(cached.tier, config);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
const promptFileContent = loadPrompt("token-saver-judge", "");
|
|
223
|
+
let judgeSystemPrompt: string;
|
|
224
|
+
|
|
225
|
+
if (promptFileContent) {
|
|
226
|
+
judgeSystemPrompt = promptFileContent;
|
|
227
|
+
} else if (hasAnyDescription(config.tiers)) {
|
|
228
|
+
judgeSystemPrompt = generateJudgePrompt(config.tiers, config.rules);
|
|
229
|
+
} else {
|
|
230
|
+
judgeSystemPrompt = FALLBACK_JUDGE_PROMPT;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const result = await callChatCompletion(
|
|
234
|
+
config.judgeEndpoint,
|
|
235
|
+
config.judgeModel,
|
|
236
|
+
[
|
|
237
|
+
{ role: "system", content: judgeSystemPrompt },
|
|
238
|
+
{ role: "user", content: prompt },
|
|
239
|
+
],
|
|
240
|
+
{
|
|
241
|
+
temperature: 0,
|
|
242
|
+
maxTokens: 1024,
|
|
243
|
+
providerType: config.judgeProviderType,
|
|
244
|
+
customModule: config.judgeCustomModule,
|
|
245
|
+
apiKey: config.judgeApiKey,
|
|
246
|
+
},
|
|
247
|
+
);
|
|
248
|
+
|
|
249
|
+
if (result.usage) {
|
|
250
|
+
const collector = getGlobalCollector();
|
|
251
|
+
collector?.record({
|
|
252
|
+
sessionKey: context.sessionKey ?? "",
|
|
253
|
+
provider: "edge",
|
|
254
|
+
model: config.judgeModel,
|
|
255
|
+
source: "router",
|
|
256
|
+
usage: result.usage,
|
|
257
|
+
});
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const tier = parseTier(result.text, validTiers, defaultTier);
|
|
261
|
+
classificationCache.set(cacheKey, { tier, ts: Date.now() });
|
|
262
|
+
return buildDecision(tier, config);
|
|
263
|
+
} catch (err) {
|
|
264
|
+
console.error(`[ClawXrouter] [TokenSaver] judge call failed:`, err);
|
|
265
|
+
return { level: "S1", action: "passthrough", reason: "judge call failed — passthrough" };
|
|
266
|
+
}
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
// ── Exports for testing ──
|
|
271
|
+
|
|
272
|
+
export { parseTier, hashPrompt, classificationCache, resolveConfig, generateJudgePrompt, DEFAULT_CONFIG, FALLBACK_JUDGE_PROMPT as DEFAULT_JUDGE_PROMPT };
|
|
273
|
+
export type { TierTarget, TokenSaverConfig };
|
package/src/rules.ts
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import type { DetectionContext, DetectionResult, PrivacyConfig, SensitivityLevel } from "./types.js";
|
|
2
|
+
import { levelToNumeric, maxLevel } from "./types.js";
|
|
3
|
+
import { extractPathsFromParams, matchesPathPattern } from "./utils.js";
|
|
4
|
+
|
|
5
|
+
/** Cache compiled regex patterns to avoid re-compilation on every call */
|
|
6
|
+
const PATTERN_CACHE_MAX = 500;
|
|
7
|
+
const patternCache = new Map<string, RegExp>();
|
|
8
|
+
|
|
9
|
+
function getOrCompileRegex(pattern: string): RegExp | null {
|
|
10
|
+
const cached = patternCache.get(pattern);
|
|
11
|
+
if (cached) return cached;
|
|
12
|
+
try {
|
|
13
|
+
// Strip Python-style inline flags (?i), (?s), (?m) etc. — JS uses RegExp flags instead
|
|
14
|
+
let flags = "i";
|
|
15
|
+
const cleaned = pattern.replace(/^\(\?([gimsuy]+)\)/, (_m, f: string) => {
|
|
16
|
+
flags = f.includes("i") ? "i" : "";
|
|
17
|
+
if (f.includes("s")) flags += "s";
|
|
18
|
+
if (f.includes("m")) flags += "m";
|
|
19
|
+
return "";
|
|
20
|
+
});
|
|
21
|
+
const compiled = new RegExp(cleaned, flags);
|
|
22
|
+
if (patternCache.size >= PATTERN_CACHE_MAX) {
|
|
23
|
+
const firstKey = patternCache.keys().next().value;
|
|
24
|
+
if (firstKey !== undefined) patternCache.delete(firstKey);
|
|
25
|
+
}
|
|
26
|
+
patternCache.set(pattern, compiled);
|
|
27
|
+
return compiled;
|
|
28
|
+
} catch (err) {
|
|
29
|
+
console.warn(`[ClawXrouter] Invalid regex pattern: ${pattern} — ${(err as Error).message}`);
|
|
30
|
+
return null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Detect sensitivity level based on configured rules
|
|
36
|
+
*/
|
|
37
|
+
export function detectByRules(
|
|
38
|
+
context: DetectionContext,
|
|
39
|
+
config: PrivacyConfig
|
|
40
|
+
): DetectionResult {
|
|
41
|
+
const levels: SensitivityLevel[] = [];
|
|
42
|
+
const reasons: string[] = [];
|
|
43
|
+
|
|
44
|
+
// 1. Check keywords in message
|
|
45
|
+
if (context.message) {
|
|
46
|
+
const keywordResult = checkKeywords(context.message, config);
|
|
47
|
+
if (keywordResult.level !== "S1") {
|
|
48
|
+
levels.push(keywordResult.level);
|
|
49
|
+
if (keywordResult.reason) {
|
|
50
|
+
reasons.push(keywordResult.reason);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// 2. Check regex patterns in message
|
|
56
|
+
if (context.message) {
|
|
57
|
+
const patternResult = checkPatterns(context.message, config);
|
|
58
|
+
if (patternResult.level !== "S1") {
|
|
59
|
+
levels.push(patternResult.level);
|
|
60
|
+
if (patternResult.reason) {
|
|
61
|
+
reasons.push(patternResult.reason);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// 3. Check tool type and parameters
|
|
67
|
+
if (context.toolName) {
|
|
68
|
+
const toolResult = checkToolType(context.toolName, config);
|
|
69
|
+
if (toolResult.level !== "S1") {
|
|
70
|
+
levels.push(toolResult.level);
|
|
71
|
+
if (toolResult.reason) {
|
|
72
|
+
reasons.push(toolResult.reason);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 4. Check tool parameters (paths, etc.)
|
|
78
|
+
if (context.toolParams) {
|
|
79
|
+
const paramResult = checkToolParams(context.toolParams, config);
|
|
80
|
+
if (paramResult.level !== "S1") {
|
|
81
|
+
levels.push(paramResult.level);
|
|
82
|
+
if (paramResult.reason) {
|
|
83
|
+
reasons.push(paramResult.reason);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// 5. Check tool result content (keywords + patterns)
|
|
89
|
+
if (context.toolResult) {
|
|
90
|
+
const resultText = typeof context.toolResult === "string"
|
|
91
|
+
? context.toolResult
|
|
92
|
+
: JSON.stringify(context.toolResult);
|
|
93
|
+
const resultKeywordLevel = checkKeywords(resultText, config);
|
|
94
|
+
if (resultKeywordLevel.level !== "S1") {
|
|
95
|
+
levels.push(resultKeywordLevel.level);
|
|
96
|
+
if (resultKeywordLevel.reason) {
|
|
97
|
+
reasons.push(`Result: ${resultKeywordLevel.reason}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
const resultPatternLevel = checkPatterns(resultText, config);
|
|
101
|
+
if (resultPatternLevel.level !== "S1") {
|
|
102
|
+
levels.push(resultPatternLevel.level);
|
|
103
|
+
if (resultPatternLevel.reason) {
|
|
104
|
+
reasons.push(`Result: ${resultPatternLevel.reason}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Determine final level (max of all checks)
|
|
110
|
+
const finalLevel = levels.length > 0 ? maxLevel(...levels) : "S1";
|
|
111
|
+
const finalReason = reasons.length > 0 ? reasons.join("; ") : undefined;
|
|
112
|
+
|
|
113
|
+
return {
|
|
114
|
+
level: finalLevel,
|
|
115
|
+
levelNumeric: levelToNumeric(finalLevel),
|
|
116
|
+
reason: finalReason,
|
|
117
|
+
detectorType: "ruleDetector",
|
|
118
|
+
confidence: 1.0, // Rules have high confidence
|
|
119
|
+
};
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Build a keyword-aware regex that matches the keyword at word-like boundaries.
|
|
124
|
+
*
|
|
125
|
+
* For keywords starting with "." (file extensions like ".env", ".key"):
|
|
126
|
+
* The "." itself is a boundary, so only check that the tail is NOT followed
|
|
127
|
+
* by an alphanumeric char. "file.env" matches, ".envelope" does not.
|
|
128
|
+
*
|
|
129
|
+
* For plain-word keywords:
|
|
130
|
+
* Negative lookbehind/lookahead on alphanumeric chars so "token" matches
|
|
131
|
+
* "auth_token" and "the token" but NOT "tokenize".
|
|
132
|
+
*/
|
|
133
|
+
const keywordRegexCache = new Map<string, RegExp>();
|
|
134
|
+
|
|
135
|
+
export function getKeywordRegex(keyword: string): RegExp {
|
|
136
|
+
const cached = keywordRegexCache.get(keyword);
|
|
137
|
+
if (cached) return cached;
|
|
138
|
+
|
|
139
|
+
const escaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
140
|
+
let pattern: string;
|
|
141
|
+
if (keyword.startsWith(".")) {
|
|
142
|
+
pattern = `${escaped}(?![a-zA-Z0-9])`;
|
|
143
|
+
} else {
|
|
144
|
+
pattern = `(?<![a-zA-Z0-9])${escaped}(?![a-zA-Z0-9])`;
|
|
145
|
+
}
|
|
146
|
+
const re = new RegExp(pattern, "i");
|
|
147
|
+
keywordRegexCache.set(keyword, re);
|
|
148
|
+
return re;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Check for sensitive keywords in text
|
|
153
|
+
*/
|
|
154
|
+
function checkKeywords(
|
|
155
|
+
text: string,
|
|
156
|
+
config: PrivacyConfig
|
|
157
|
+
): { level: SensitivityLevel; reason?: string } {
|
|
158
|
+
// Check S3 keywords first (higher priority)
|
|
159
|
+
const s3Keywords = config.rules?.keywords?.S3 ?? [];
|
|
160
|
+
for (const keyword of s3Keywords) {
|
|
161
|
+
if (getKeywordRegex(keyword).test(text)) {
|
|
162
|
+
return {
|
|
163
|
+
level: "S3",
|
|
164
|
+
reason: `S3 keyword detected: ${keyword}`,
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Check S2 keywords
|
|
170
|
+
const s2Keywords = config.rules?.keywords?.S2 ?? [];
|
|
171
|
+
for (const keyword of s2Keywords) {
|
|
172
|
+
if (getKeywordRegex(keyword).test(text)) {
|
|
173
|
+
return {
|
|
174
|
+
level: "S2",
|
|
175
|
+
reason: `S2 keyword detected: ${keyword}`,
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return { level: "S1" };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Check for sensitive content using regex patterns
|
|
185
|
+
*/
|
|
186
|
+
function checkPatterns(
|
|
187
|
+
text: string,
|
|
188
|
+
config: PrivacyConfig
|
|
189
|
+
): { level: SensitivityLevel; reason?: string } {
|
|
190
|
+
// Check S3 patterns first (higher priority)
|
|
191
|
+
const s3Patterns = config.rules?.patterns?.S3 ?? [];
|
|
192
|
+
for (const pattern of s3Patterns) {
|
|
193
|
+
const regex = getOrCompileRegex(pattern);
|
|
194
|
+
if (regex && regex.test(text)) {
|
|
195
|
+
return {
|
|
196
|
+
level: "S3",
|
|
197
|
+
reason: `S3 pattern matched: ${pattern}`,
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Check S2 patterns
|
|
203
|
+
const s2Patterns = config.rules?.patterns?.S2 ?? [];
|
|
204
|
+
for (const pattern of s2Patterns) {
|
|
205
|
+
const regex = getOrCompileRegex(pattern);
|
|
206
|
+
if (regex && regex.test(text)) {
|
|
207
|
+
return {
|
|
208
|
+
level: "S2",
|
|
209
|
+
reason: `S2 pattern matched: ${pattern}`,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return { level: "S1" };
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Check if `name` contains `segment` as a whole word delimited by common
|
|
219
|
+
* tool-name separators (`.`, `_`, `-`) or string boundaries.
|
|
220
|
+
* Prevents "pseudocode_generator" matching "sudo", "powershell" matching "shell", etc.
|
|
221
|
+
*/
|
|
222
|
+
function toolNameContainsSegment(name: string, segment: string): boolean {
|
|
223
|
+
const escaped = segment.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
224
|
+
const re = new RegExp(`(?:^|[._\\-])${escaped}(?:$|[._\\-])`, "i");
|
|
225
|
+
return re.test(name);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Check tool type against configured sensitive tools
|
|
230
|
+
*/
|
|
231
|
+
function checkToolType(
|
|
232
|
+
toolName: string,
|
|
233
|
+
config: PrivacyConfig
|
|
234
|
+
): { level: SensitivityLevel; reason?: string } {
|
|
235
|
+
const normalizedTool = toolName.toLowerCase();
|
|
236
|
+
|
|
237
|
+
// Check S3 tools first (higher priority)
|
|
238
|
+
const s3Tools = config.rules?.tools?.S3?.tools ?? [];
|
|
239
|
+
for (const tool of s3Tools) {
|
|
240
|
+
const pattern = tool.toLowerCase();
|
|
241
|
+
if (normalizedTool === pattern || toolNameContainsSegment(normalizedTool, pattern)) {
|
|
242
|
+
return {
|
|
243
|
+
level: "S3",
|
|
244
|
+
reason: `S3 tool detected: ${toolName}`,
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
// Check S2 tools
|
|
250
|
+
const s2Tools = config.rules?.tools?.S2?.tools ?? [];
|
|
251
|
+
for (const tool of s2Tools) {
|
|
252
|
+
const pattern = tool.toLowerCase();
|
|
253
|
+
if (normalizedTool === pattern || toolNameContainsSegment(normalizedTool, pattern)) {
|
|
254
|
+
return {
|
|
255
|
+
level: "S2",
|
|
256
|
+
reason: `S2 tool detected: ${toolName}`,
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return { level: "S1" };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Check tool parameters for sensitive paths or values
|
|
266
|
+
*/
|
|
267
|
+
function checkToolParams(
|
|
268
|
+
params: Record<string, unknown>,
|
|
269
|
+
config: PrivacyConfig
|
|
270
|
+
): { level: SensitivityLevel; reason?: string } {
|
|
271
|
+
const paths = extractPathsFromParams(params);
|
|
272
|
+
|
|
273
|
+
if (paths.length === 0) {
|
|
274
|
+
return { level: "S1" };
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Check S3 paths first (higher priority)
|
|
278
|
+
const s3Paths = config.rules?.tools?.S3?.paths ?? [];
|
|
279
|
+
for (const path of paths) {
|
|
280
|
+
if (matchesPathPattern(path, s3Paths)) {
|
|
281
|
+
return {
|
|
282
|
+
level: "S3",
|
|
283
|
+
reason: `S3 path detected: ${path}`,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Check S2 paths
|
|
289
|
+
const s2Paths = config.rules?.tools?.S2?.paths ?? [];
|
|
290
|
+
for (const path of paths) {
|
|
291
|
+
if (matchesPathPattern(path, s2Paths)) {
|
|
292
|
+
return {
|
|
293
|
+
level: "S2",
|
|
294
|
+
reason: `S2 path detected: ${path}`,
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Check for common sensitive file extensions
|
|
300
|
+
for (const path of paths) {
|
|
301
|
+
const lowerPath = path.toLowerCase();
|
|
302
|
+
if (
|
|
303
|
+
lowerPath.endsWith(".pem") ||
|
|
304
|
+
lowerPath.endsWith(".key") ||
|
|
305
|
+
lowerPath.endsWith(".p12") ||
|
|
306
|
+
lowerPath.endsWith(".pfx") ||
|
|
307
|
+
lowerPath.includes("id_rsa") ||
|
|
308
|
+
lowerPath.includes("id_dsa") ||
|
|
309
|
+
lowerPath.includes("id_ecdsa") ||
|
|
310
|
+
lowerPath.includes("id_ed25519")
|
|
311
|
+
) {
|
|
312
|
+
return {
|
|
313
|
+
level: "S3",
|
|
314
|
+
reason: `Sensitive file extension detected: ${path}`,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
return { level: "S1" };
|
|
320
|
+
}
|