@openbmb/clawxrouter 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,273 @@
1
+ import { createHash } from "node:crypto";
2
+ import type { ClawXrouterRouter, DetectionContext, EdgeProviderType, RouterDecision } from "../types.js";
3
+ import { callChatCompletion } from "../local-model.js";
4
+ import { loadPrompt } from "../prompt-loader.js";
5
+ import { getGlobalCollector } from "../token-stats.js";
6
+
7
+ // ── Types ──
8
+
9
+ type TierTarget = {
10
+ provider: string;
11
+ model: string;
12
+ description?: string;
13
+ };
14
+
15
+ type TokenSaverConfig = {
16
+ enabled: boolean;
17
+ judgeEndpoint: string;
18
+ judgeModel: string;
19
+ judgeProviderType: EdgeProviderType;
20
+ judgeCustomModule?: string;
21
+ judgeApiKey?: string;
22
+ tiers: Record<string, TierTarget>;
23
+ defaultTier?: string;
24
+ rules?: string[];
25
+ cacheTtlMs: number;
26
+ };
27
+
28
+ const DEFAULT_CONFIG: TokenSaverConfig = {
29
+ enabled: false,
30
+ judgeEndpoint: "http://localhost:11434",
31
+ judgeModel: "openbmb/minicpm4.1",
32
+ judgeProviderType: "openai-compatible",
33
+ tiers: {
34
+ SIMPLE: { provider: "zhipu", model: "glm-4.5-air" },
35
+ MEDIUM: { provider: "minimax", model: "minimax-m2.5" },
36
+ COMPLEX: { provider: "deepseek", model: "deepseek-v3.2" },
37
+ RESEARCH: { provider: "zhipu", model: "glm-5" },
38
+ REASONING: { provider: "moonshot", model: "kimi-k2.5" },
39
+ },
40
+ cacheTtlMs: 300_000,
41
+ };
42
+
43
+ // ── Prompt generation ──
44
+
45
+ function generateJudgePrompt(tiers: Record<string, TierTarget>, rules?: string[]): string {
46
+ const tierNames = Object.keys(tiers);
47
+
48
+ const tierDefs = tierNames
49
+ .map((name) => {
50
+ const desc = tiers[name].description;
51
+ return desc ? `${name} = ${desc}` : name;
52
+ })
53
+ .join("\n");
54
+
55
+ const defaultRules = [
56
+ "When unsure, pick the LOWER tier (save tokens).",
57
+ "Short prompts (< 20 words) with no technical depth → the lowest tier.",
58
+ ];
59
+ const allRules = [...defaultRules, ...(rules ?? [])];
60
+ const rulesBlock = allRules.map((r) => `- ${r}`).join("\n");
61
+
62
+ const tierList = tierNames.join("|");
63
+
64
+ return [
65
+ "You are a task complexity classifier. Classify the user's task into exactly one tier.",
66
+ "",
67
+ tierDefs,
68
+ "",
69
+ "Rules:",
70
+ rulesBlock,
71
+ "",
72
+ `CRITICAL: Output ONLY the raw JSON object. Do NOT wrap in markdown code blocks. Do NOT add any text before or after.`,
73
+ `{"tier":"${tierList}"}`,
74
+ ].join("\n");
75
+ }
76
+
77
+ const FALLBACK_JUDGE_PROMPT = `You are a task complexity classifier. Output ONLY a JSON object: {"tier":"MEDIUM"}`;
78
+
79
+ // ── Cache ──
80
+
81
+ type CacheEntry = { tier: string; ts: number };
82
+ const classificationCache = new Map<string, CacheEntry>();
83
+
84
+ const CACHE_CLEANUP_INTERVAL_MS = 60_000;
85
+ const CACHE_MAX_AGE_MS = 600_000;
86
+
87
+ let cleanupTimer: ReturnType<typeof setInterval> | null = null;
88
+
89
+ function startCacheCleanup(): void {
90
+ if (cleanupTimer) return;
91
+ cleanupTimer = setInterval(() => {
92
+ const now = Date.now();
93
+ for (const [k, v] of classificationCache) {
94
+ if (now - v.ts > CACHE_MAX_AGE_MS) classificationCache.delete(k);
95
+ }
96
+ }, CACHE_CLEANUP_INTERVAL_MS);
97
+ if (cleanupTimer && typeof cleanupTimer === "object" && "unref" in cleanupTimer) {
98
+ (cleanupTimer as NodeJS.Timeout).unref();
99
+ }
100
+ }
101
+
102
+ // ── Helpers ──
103
+
104
+ function hashPrompt(prompt: string): string {
105
+ return createHash("sha256").update(prompt).digest("hex").slice(0, 16);
106
+ }
107
+
108
+ function parseTier(response: string, validTiers: Set<string>, defaultTier: string): string {
109
+ try {
110
+ const cleaned = response.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
111
+ const match = cleaned.match(/\{[\s\S]*?"tier"\s*:\s*"([A-Za-z_]+)"[\s\S]*?\}/);
112
+ if (match) {
113
+ const tier = match[1].toUpperCase();
114
+ if (validTiers.has(tier)) return tier;
115
+ }
116
+ } catch {
117
+ // parse failure
118
+ }
119
+ return defaultTier;
120
+ }
121
+
122
+ function buildDecision(tier: string, config: TokenSaverConfig): RouterDecision {
123
+ const target = config.tiers[tier];
124
+ if (!target) {
125
+ return { level: "S1", action: "passthrough", reason: `no model mapping for tier ${tier}` };
126
+ }
127
+ return {
128
+ level: "S1",
129
+ action: "redirect",
130
+ target: { provider: target.provider, model: target.model },
131
+ reason: `tier=${tier}`,
132
+ confidence: 0.8,
133
+ };
134
+ }
135
+
136
+ function resolveConfig(pluginConfig: Record<string, unknown>): TokenSaverConfig {
137
+ const routers = (pluginConfig?.privacy as Record<string, unknown>)?.routers as
138
+ | Record<string, { options?: Record<string, unknown>; enabled?: boolean }>
139
+ | undefined;
140
+ const tsConfig = routers?.["token-saver"];
141
+ const options = (tsConfig?.options ?? {}) as Record<string, unknown>;
142
+
143
+ const privacyLocalModel = (pluginConfig?.privacy as Record<string, unknown>)?.localModel as
144
+ | { endpoint?: string; model?: string; type?: EdgeProviderType; module?: string; apiKey?: string }
145
+ | undefined;
146
+
147
+ return {
148
+ enabled: tsConfig?.enabled ?? DEFAULT_CONFIG.enabled,
149
+ judgeEndpoint:
150
+ (options.judgeEndpoint as string) ??
151
+ privacyLocalModel?.endpoint ??
152
+ DEFAULT_CONFIG.judgeEndpoint,
153
+ judgeModel:
154
+ (options.judgeModel as string) ??
155
+ privacyLocalModel?.model ??
156
+ DEFAULT_CONFIG.judgeModel,
157
+ judgeProviderType:
158
+ (options.judgeProviderType as EdgeProviderType) ??
159
+ privacyLocalModel?.type ??
160
+ DEFAULT_CONFIG.judgeProviderType,
161
+ judgeCustomModule:
162
+ (options.judgeCustomModule as string) ??
163
+ privacyLocalModel?.module,
164
+ judgeApiKey:
165
+ (options.judgeApiKey as string) ??
166
+ privacyLocalModel?.apiKey,
167
+ tiers: (options.tiers as Record<string, TierTarget>) ?? {},
168
+ defaultTier: (options.defaultTier as string) ?? undefined,
169
+ rules: (options.rules as string[]) ?? undefined,
170
+ cacheTtlMs: (options.cacheTtlMs as number) ?? DEFAULT_CONFIG.cacheTtlMs,
171
+ };
172
+ }
173
+
174
+ function hasAnyDescription(tiers: Record<string, TierTarget>): boolean {
175
+ return Object.values(tiers).some((t) => t.description);
176
+ }
177
+
178
+ // ── Router ──
179
+
180
+ export const tokenSaverRouter: ClawXrouterRouter = {
181
+ id: "token-saver",
182
+
183
+ async detect(
184
+ context: DetectionContext,
185
+ pluginConfig: Record<string, unknown>,
186
+ ): Promise<RouterDecision> {
187
+ const config = resolveConfig(pluginConfig);
188
+ if (!config.enabled && !context.dryRun) {
189
+ return { level: "S1", action: "passthrough" };
190
+ }
191
+
192
+ const isSubagent = context.sessionKey?.includes(":subagent:") ?? false;
193
+ if (isSubagent) {
194
+ return { level: "S1", action: "passthrough", reason: "subagent — skipped" };
195
+ }
196
+
197
+ const tierNames = Object.keys(config.tiers);
198
+ if (tierNames.length === 0) {
199
+ return { level: "S1", action: "passthrough", reason: "no tiers configured" };
200
+ }
201
+
202
+ const prompt = context.message ?? "";
203
+ if (!prompt.trim()) {
204
+ return { level: "S1", action: "passthrough" };
205
+ }
206
+
207
+ startCacheCleanup();
208
+ const validTiers = new Set(tierNames);
209
+ const defaultTier = config.defaultTier && validTiers.has(config.defaultTier)
210
+ ? config.defaultTier
211
+ : tierNames[Math.floor(tierNames.length / 2)] ?? "MEDIUM";
212
+
213
+ const cacheKey = hashPrompt(prompt);
214
+ const cached = classificationCache.get(cacheKey);
215
+ if (cached && Date.now() - cached.ts < config.cacheTtlMs) {
216
+ if (validTiers.has(cached.tier)) {
217
+ return buildDecision(cached.tier, config);
218
+ }
219
+ }
220
+
221
+ try {
222
+ const promptFileContent = loadPrompt("token-saver-judge", "");
223
+ let judgeSystemPrompt: string;
224
+
225
+ if (promptFileContent) {
226
+ judgeSystemPrompt = promptFileContent;
227
+ } else if (hasAnyDescription(config.tiers)) {
228
+ judgeSystemPrompt = generateJudgePrompt(config.tiers, config.rules);
229
+ } else {
230
+ judgeSystemPrompt = FALLBACK_JUDGE_PROMPT;
231
+ }
232
+
233
+ const result = await callChatCompletion(
234
+ config.judgeEndpoint,
235
+ config.judgeModel,
236
+ [
237
+ { role: "system", content: judgeSystemPrompt },
238
+ { role: "user", content: prompt },
239
+ ],
240
+ {
241
+ temperature: 0,
242
+ maxTokens: 1024,
243
+ providerType: config.judgeProviderType,
244
+ customModule: config.judgeCustomModule,
245
+ apiKey: config.judgeApiKey,
246
+ },
247
+ );
248
+
249
+ if (result.usage) {
250
+ const collector = getGlobalCollector();
251
+ collector?.record({
252
+ sessionKey: context.sessionKey ?? "",
253
+ provider: "edge",
254
+ model: config.judgeModel,
255
+ source: "router",
256
+ usage: result.usage,
257
+ });
258
+ }
259
+
260
+ const tier = parseTier(result.text, validTiers, defaultTier);
261
+ classificationCache.set(cacheKey, { tier, ts: Date.now() });
262
+ return buildDecision(tier, config);
263
+ } catch (err) {
264
+ console.error(`[ClawXrouter] [TokenSaver] judge call failed:`, err);
265
+ return { level: "S1", action: "passthrough", reason: "judge call failed — passthrough" };
266
+ }
267
+ },
268
+ };
269
+
270
+ // ── Exports for testing ──
271
+
272
+ export { parseTier, hashPrompt, classificationCache, resolveConfig, generateJudgePrompt, DEFAULT_CONFIG, FALLBACK_JUDGE_PROMPT as DEFAULT_JUDGE_PROMPT };
273
+ export type { TierTarget, TokenSaverConfig };
package/src/rules.ts ADDED
@@ -0,0 +1,320 @@
1
+ import type { DetectionContext, DetectionResult, PrivacyConfig, SensitivityLevel } from "./types.js";
2
+ import { levelToNumeric, maxLevel } from "./types.js";
3
+ import { extractPathsFromParams, matchesPathPattern } from "./utils.js";
4
+
5
+ /** Cache compiled regex patterns to avoid re-compilation on every call */
6
+ const PATTERN_CACHE_MAX = 500;
7
+ const patternCache = new Map<string, RegExp>();
8
+
9
+ function getOrCompileRegex(pattern: string): RegExp | null {
10
+ const cached = patternCache.get(pattern);
11
+ if (cached) return cached;
12
+ try {
13
+ // Strip Python-style inline flags (?i), (?s), (?m) etc. — JS uses RegExp flags instead
14
+ let flags = "i";
15
+ const cleaned = pattern.replace(/^\(\?([gimsuy]+)\)/, (_m, f: string) => {
16
+ flags = f.includes("i") ? "i" : "";
17
+ if (f.includes("s")) flags += "s";
18
+ if (f.includes("m")) flags += "m";
19
+ return "";
20
+ });
21
+ const compiled = new RegExp(cleaned, flags);
22
+ if (patternCache.size >= PATTERN_CACHE_MAX) {
23
+ const firstKey = patternCache.keys().next().value;
24
+ if (firstKey !== undefined) patternCache.delete(firstKey);
25
+ }
26
+ patternCache.set(pattern, compiled);
27
+ return compiled;
28
+ } catch (err) {
29
+ console.warn(`[ClawXrouter] Invalid regex pattern: ${pattern} — ${(err as Error).message}`);
30
+ return null;
31
+ }
32
+ }
33
+
34
+ /**
35
+ * Detect sensitivity level based on configured rules
36
+ */
37
+ export function detectByRules(
38
+ context: DetectionContext,
39
+ config: PrivacyConfig
40
+ ): DetectionResult {
41
+ const levels: SensitivityLevel[] = [];
42
+ const reasons: string[] = [];
43
+
44
+ // 1. Check keywords in message
45
+ if (context.message) {
46
+ const keywordResult = checkKeywords(context.message, config);
47
+ if (keywordResult.level !== "S1") {
48
+ levels.push(keywordResult.level);
49
+ if (keywordResult.reason) {
50
+ reasons.push(keywordResult.reason);
51
+ }
52
+ }
53
+ }
54
+
55
+ // 2. Check regex patterns in message
56
+ if (context.message) {
57
+ const patternResult = checkPatterns(context.message, config);
58
+ if (patternResult.level !== "S1") {
59
+ levels.push(patternResult.level);
60
+ if (patternResult.reason) {
61
+ reasons.push(patternResult.reason);
62
+ }
63
+ }
64
+ }
65
+
66
+ // 3. Check tool type and parameters
67
+ if (context.toolName) {
68
+ const toolResult = checkToolType(context.toolName, config);
69
+ if (toolResult.level !== "S1") {
70
+ levels.push(toolResult.level);
71
+ if (toolResult.reason) {
72
+ reasons.push(toolResult.reason);
73
+ }
74
+ }
75
+ }
76
+
77
+ // 4. Check tool parameters (paths, etc.)
78
+ if (context.toolParams) {
79
+ const paramResult = checkToolParams(context.toolParams, config);
80
+ if (paramResult.level !== "S1") {
81
+ levels.push(paramResult.level);
82
+ if (paramResult.reason) {
83
+ reasons.push(paramResult.reason);
84
+ }
85
+ }
86
+ }
87
+
88
+ // 5. Check tool result content (keywords + patterns)
89
+ if (context.toolResult) {
90
+ const resultText = typeof context.toolResult === "string"
91
+ ? context.toolResult
92
+ : JSON.stringify(context.toolResult);
93
+ const resultKeywordLevel = checkKeywords(resultText, config);
94
+ if (resultKeywordLevel.level !== "S1") {
95
+ levels.push(resultKeywordLevel.level);
96
+ if (resultKeywordLevel.reason) {
97
+ reasons.push(`Result: ${resultKeywordLevel.reason}`);
98
+ }
99
+ }
100
+ const resultPatternLevel = checkPatterns(resultText, config);
101
+ if (resultPatternLevel.level !== "S1") {
102
+ levels.push(resultPatternLevel.level);
103
+ if (resultPatternLevel.reason) {
104
+ reasons.push(`Result: ${resultPatternLevel.reason}`);
105
+ }
106
+ }
107
+ }
108
+
109
+ // Determine final level (max of all checks)
110
+ const finalLevel = levels.length > 0 ? maxLevel(...levels) : "S1";
111
+ const finalReason = reasons.length > 0 ? reasons.join("; ") : undefined;
112
+
113
+ return {
114
+ level: finalLevel,
115
+ levelNumeric: levelToNumeric(finalLevel),
116
+ reason: finalReason,
117
+ detectorType: "ruleDetector",
118
+ confidence: 1.0, // Rules have high confidence
119
+ };
120
+ }
121
+
122
+ /**
123
+ * Build a keyword-aware regex that matches the keyword at word-like boundaries.
124
+ *
125
+ * For keywords starting with "." (file extensions like ".env", ".key"):
126
+ * The "." itself is a boundary, so only check that the tail is NOT followed
127
+ * by an alphanumeric char. "file.env" matches, ".envelope" does not.
128
+ *
129
+ * For plain-word keywords:
130
+ * Negative lookbehind/lookahead on alphanumeric chars so "token" matches
131
+ * "auth_token" and "the token" but NOT "tokenize".
132
+ */
133
+ const keywordRegexCache = new Map<string, RegExp>();
134
+
135
+ export function getKeywordRegex(keyword: string): RegExp {
136
+ const cached = keywordRegexCache.get(keyword);
137
+ if (cached) return cached;
138
+
139
+ const escaped = keyword.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
140
+ let pattern: string;
141
+ if (keyword.startsWith(".")) {
142
+ pattern = `${escaped}(?![a-zA-Z0-9])`;
143
+ } else {
144
+ pattern = `(?<![a-zA-Z0-9])${escaped}(?![a-zA-Z0-9])`;
145
+ }
146
+ const re = new RegExp(pattern, "i");
147
+ keywordRegexCache.set(keyword, re);
148
+ return re;
149
+ }
150
+
151
+ /**
152
+ * Check for sensitive keywords in text
153
+ */
154
+ function checkKeywords(
155
+ text: string,
156
+ config: PrivacyConfig
157
+ ): { level: SensitivityLevel; reason?: string } {
158
+ // Check S3 keywords first (higher priority)
159
+ const s3Keywords = config.rules?.keywords?.S3 ?? [];
160
+ for (const keyword of s3Keywords) {
161
+ if (getKeywordRegex(keyword).test(text)) {
162
+ return {
163
+ level: "S3",
164
+ reason: `S3 keyword detected: ${keyword}`,
165
+ };
166
+ }
167
+ }
168
+
169
+ // Check S2 keywords
170
+ const s2Keywords = config.rules?.keywords?.S2 ?? [];
171
+ for (const keyword of s2Keywords) {
172
+ if (getKeywordRegex(keyword).test(text)) {
173
+ return {
174
+ level: "S2",
175
+ reason: `S2 keyword detected: ${keyword}`,
176
+ };
177
+ }
178
+ }
179
+
180
+ return { level: "S1" };
181
+ }
182
+
183
+ /**
184
+ * Check for sensitive content using regex patterns
185
+ */
186
+ function checkPatterns(
187
+ text: string,
188
+ config: PrivacyConfig
189
+ ): { level: SensitivityLevel; reason?: string } {
190
+ // Check S3 patterns first (higher priority)
191
+ const s3Patterns = config.rules?.patterns?.S3 ?? [];
192
+ for (const pattern of s3Patterns) {
193
+ const regex = getOrCompileRegex(pattern);
194
+ if (regex && regex.test(text)) {
195
+ return {
196
+ level: "S3",
197
+ reason: `S3 pattern matched: ${pattern}`,
198
+ };
199
+ }
200
+ }
201
+
202
+ // Check S2 patterns
203
+ const s2Patterns = config.rules?.patterns?.S2 ?? [];
204
+ for (const pattern of s2Patterns) {
205
+ const regex = getOrCompileRegex(pattern);
206
+ if (regex && regex.test(text)) {
207
+ return {
208
+ level: "S2",
209
+ reason: `S2 pattern matched: ${pattern}`,
210
+ };
211
+ }
212
+ }
213
+
214
+ return { level: "S1" };
215
+ }
216
+
217
+ /**
218
+ * Check if `name` contains `segment` as a whole word delimited by common
219
+ * tool-name separators (`.`, `_`, `-`) or string boundaries.
220
+ * Prevents "pseudocode_generator" matching "sudo", "powershell" matching "shell", etc.
221
+ */
222
+ function toolNameContainsSegment(name: string, segment: string): boolean {
223
+ const escaped = segment.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
224
+ const re = new RegExp(`(?:^|[._\\-])${escaped}(?:$|[._\\-])`, "i");
225
+ return re.test(name);
226
+ }
227
+
228
+ /**
229
+ * Check tool type against configured sensitive tools
230
+ */
231
+ function checkToolType(
232
+ toolName: string,
233
+ config: PrivacyConfig
234
+ ): { level: SensitivityLevel; reason?: string } {
235
+ const normalizedTool = toolName.toLowerCase();
236
+
237
+ // Check S3 tools first (higher priority)
238
+ const s3Tools = config.rules?.tools?.S3?.tools ?? [];
239
+ for (const tool of s3Tools) {
240
+ const pattern = tool.toLowerCase();
241
+ if (normalizedTool === pattern || toolNameContainsSegment(normalizedTool, pattern)) {
242
+ return {
243
+ level: "S3",
244
+ reason: `S3 tool detected: ${toolName}`,
245
+ };
246
+ }
247
+ }
248
+
249
+ // Check S2 tools
250
+ const s2Tools = config.rules?.tools?.S2?.tools ?? [];
251
+ for (const tool of s2Tools) {
252
+ const pattern = tool.toLowerCase();
253
+ if (normalizedTool === pattern || toolNameContainsSegment(normalizedTool, pattern)) {
254
+ return {
255
+ level: "S2",
256
+ reason: `S2 tool detected: ${toolName}`,
257
+ };
258
+ }
259
+ }
260
+
261
+ return { level: "S1" };
262
+ }
263
+
264
+ /**
265
+ * Check tool parameters for sensitive paths or values
266
+ */
267
+ function checkToolParams(
268
+ params: Record<string, unknown>,
269
+ config: PrivacyConfig
270
+ ): { level: SensitivityLevel; reason?: string } {
271
+ const paths = extractPathsFromParams(params);
272
+
273
+ if (paths.length === 0) {
274
+ return { level: "S1" };
275
+ }
276
+
277
+ // Check S3 paths first (higher priority)
278
+ const s3Paths = config.rules?.tools?.S3?.paths ?? [];
279
+ for (const path of paths) {
280
+ if (matchesPathPattern(path, s3Paths)) {
281
+ return {
282
+ level: "S3",
283
+ reason: `S3 path detected: ${path}`,
284
+ };
285
+ }
286
+ }
287
+
288
+ // Check S2 paths
289
+ const s2Paths = config.rules?.tools?.S2?.paths ?? [];
290
+ for (const path of paths) {
291
+ if (matchesPathPattern(path, s2Paths)) {
292
+ return {
293
+ level: "S2",
294
+ reason: `S2 path detected: ${path}`,
295
+ };
296
+ }
297
+ }
298
+
299
+ // Check for common sensitive file extensions
300
+ for (const path of paths) {
301
+ const lowerPath = path.toLowerCase();
302
+ if (
303
+ lowerPath.endsWith(".pem") ||
304
+ lowerPath.endsWith(".key") ||
305
+ lowerPath.endsWith(".p12") ||
306
+ lowerPath.endsWith(".pfx") ||
307
+ lowerPath.includes("id_rsa") ||
308
+ lowerPath.includes("id_dsa") ||
309
+ lowerPath.includes("id_ecdsa") ||
310
+ lowerPath.includes("id_ed25519")
311
+ ) {
312
+ return {
313
+ level: "S3",
314
+ reason: `Sensitive file extension detected: ${path}`,
315
+ };
316
+ }
317
+ }
318
+
319
+ return { level: "S1" };
320
+ }