@openbmb/clawxrouter 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,723 @@
1
+ import type {
2
+ DetectionContext,
3
+ DetectionResult,
4
+ EdgeProviderType,
5
+ PrivacyConfig,
6
+ SensitivityLevel,
7
+ } from "./types.js";
8
+ import { loadPrompt, loadPromptWithVars } from "./prompt-loader.js";
9
+ import { levelToNumeric } from "./types.js";
10
+ import { getGlobalCollector } from "./token-stats.js";
11
+
12
+ export type ChatMessage = { role: "system" | "user" | "assistant"; content: string };
13
+
14
+ export type ChatCompletionOptions = {
15
+ temperature?: number;
16
+ maxTokens?: number;
17
+ stop?: string[];
18
+ frequencyPenalty?: number;
19
+ apiKey?: string;
20
+ };
21
+
22
+ export type LlmUsageInfo = {
23
+ input: number;
24
+ output: number;
25
+ total: number;
26
+ };
27
+
28
+ export type ChatCompletionResult = {
29
+ text: string;
30
+ usage?: LlmUsageInfo;
31
+ };
32
+
33
+ /**
34
+ * Custom edge provider module interface.
35
+ * Users implementing type="custom" must export a module matching this shape.
36
+ */
37
+ export interface CustomEdgeProvider {
38
+ callChat(
39
+ endpoint: string,
40
+ model: string,
41
+ messages: ChatMessage[],
42
+ options?: ChatCompletionOptions,
43
+ ): Promise<string>;
44
+ }
45
+
46
+ const _customProviderCache: Map<string, CustomEdgeProvider> = new Map();
47
+
48
+ async function loadCustomProvider(modulePath: string): Promise<CustomEdgeProvider> {
49
+ const cached = _customProviderCache.get(modulePath);
50
+ if (cached) return cached;
51
+ const mod = await import(modulePath) as CustomEdgeProvider;
52
+ if (typeof mod.callChat !== "function") {
53
+ throw new Error(`Custom edge provider at "${modulePath}" must export a callChat() function`);
54
+ }
55
+ _customProviderCache.set(modulePath, mod);
56
+ return mod;
57
+ }
58
+
59
+ /**
60
+ * Dispatch a chat completion call based on the configured edge provider type.
61
+ * This is the single entry point for all edge model calls.
62
+ *
63
+ * Returns a ChatCompletionResult with the response text and optional usage info
64
+ * parsed from the API response (for token accounting).
65
+ */
66
+ export async function callChatCompletion(
67
+ endpoint: string,
68
+ model: string,
69
+ messages: ChatMessage[],
70
+ options?: ChatCompletionOptions & { providerType?: EdgeProviderType; customModule?: string },
71
+ ): Promise<ChatCompletionResult> {
72
+ const providerType = options?.providerType ?? "openai-compatible";
73
+
74
+ let result: ChatCompletionResult;
75
+ switch (providerType) {
76
+ case "ollama-native":
77
+ result = await callOllamaNative(endpoint, model, messages, options);
78
+ break;
79
+ case "custom": {
80
+ if (!options?.customModule) {
81
+ throw new Error("Custom edge provider requires a 'module' path in localModel config");
82
+ }
83
+ const provider = await loadCustomProvider(options.customModule);
84
+ const text = await provider.callChat(endpoint, model, messages, options);
85
+ result = { text };
86
+ break;
87
+ }
88
+ case "openai-compatible":
89
+ default:
90
+ result = await callOpenAICompatible(endpoint, model, messages, options);
91
+ break;
92
+ }
93
+ return result;
94
+ }
95
+
96
+ /**
97
+ * OpenAI-compatible chat completions call.
98
+ * POST ${endpoint}/v1/chat/completions — works with Ollama, vLLM, LiteLLM, LocalAI, LMStudio, SGLang, TGI, etc.
99
+ */
100
+ const CLAWXROUTER_FETCH_TIMEOUT_MS = 60_000;
101
+
102
+ async function callOpenAICompatible(
103
+ endpoint: string,
104
+ model: string,
105
+ messages: ChatMessage[],
106
+ options?: ChatCompletionOptions,
107
+ ): Promise<ChatCompletionResult> {
108
+ const base = endpoint.replace(/\/v1\/?$/, "");
109
+ const url = `${base}/v1/chat/completions`;
110
+
111
+ const headers: Record<string, string> = { "Content-Type": "application/json" };
112
+ if (options?.apiKey) {
113
+ headers["Authorization"] = `Bearer ${options.apiKey}`;
114
+ }
115
+
116
+ const response = await fetch(url, {
117
+ method: "POST",
118
+ headers,
119
+ body: JSON.stringify({
120
+ model,
121
+ messages,
122
+ temperature: options?.temperature ?? 0.1,
123
+ max_tokens: options?.maxTokens ?? 800,
124
+ stream: true,
125
+ ...(options?.stop ? { stop: options.stop } : {}),
126
+ ...(options?.frequencyPenalty != null ? { frequency_penalty: options.frequencyPenalty } : {}),
127
+ }),
128
+ signal: AbortSignal.timeout(CLAWXROUTER_FETCH_TIMEOUT_MS),
129
+ });
130
+
131
+ if (!response.ok) {
132
+ let body = "";
133
+ try { body = (await response.text()).slice(0, 300); } catch { /* ignore */ }
134
+ throw new Error(`Chat completions API error: ${response.status} ${response.statusText}${body ? ` – ${body}` : ""} (url=${url})`);
135
+ }
136
+
137
+ const contentType = response.headers.get("content-type") ?? "";
138
+ if (contentType.includes("text/event-stream") && response.body) {
139
+ return await consumeSSEStream(response.body);
140
+ }
141
+
142
+ const data = (await response.json()) as {
143
+ choices?: Array<{ message?: { content?: string } }>;
144
+ usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
145
+ };
146
+ let text = data.choices?.[0]?.message?.content ?? "";
147
+ text = stripThinkingTags(text);
148
+
149
+ const usage: LlmUsageInfo | undefined = data.usage
150
+ ? {
151
+ input: data.usage.prompt_tokens ?? 0,
152
+ output: data.usage.completion_tokens ?? 0,
153
+ total: data.usage.total_tokens ?? (data.usage.prompt_tokens ?? 0) + (data.usage.completion_tokens ?? 0),
154
+ }
155
+ : undefined;
156
+
157
+ return { text, usage };
158
+ }
159
+
160
+ async function consumeSSEStream(
161
+ body: ReadableStream<Uint8Array>,
162
+ ): Promise<ChatCompletionResult> {
163
+ const decoder = new TextDecoder();
164
+ const reader = body.getReader();
165
+ let textParts: string[] = [];
166
+ let usage: LlmUsageInfo | undefined;
167
+ let buffer = "";
168
+
169
+ try {
170
+ while (true) {
171
+ const { done, value } = await reader.read();
172
+ if (done) break;
173
+ buffer += decoder.decode(value, { stream: true });
174
+
175
+ const lines = buffer.split("\n");
176
+ buffer = lines.pop() ?? "";
177
+
178
+ for (const line of lines) {
179
+ const trimmed = line.trim();
180
+ if (!trimmed.startsWith("data:")) continue;
181
+ const payload = trimmed.slice(5).trim();
182
+ if (payload === "[DONE]") continue;
183
+
184
+ try {
185
+ const chunk = JSON.parse(payload) as {
186
+ choices?: Array<{ delta?: { content?: string; reasoning_content?: string } }>;
187
+ usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
188
+ };
189
+ const delta = chunk.choices?.[0]?.delta;
190
+ if (delta?.content) {
191
+ textParts.push(delta.content);
192
+ }
193
+ if (chunk.usage) {
194
+ usage = {
195
+ input: chunk.usage.prompt_tokens ?? 0,
196
+ output: chunk.usage.completion_tokens ?? 0,
197
+ total: chunk.usage.total_tokens ?? 0,
198
+ };
199
+ }
200
+ } catch {
201
+ // skip malformed SSE chunks
202
+ }
203
+ }
204
+ }
205
+ } finally {
206
+ reader.releaseLock();
207
+ }
208
+
209
+ let text = textParts.join("");
210
+ text = stripThinkingTags(text);
211
+ return { text, usage };
212
+ }
213
+
214
+ /**
215
+ * Ollama native API call.
216
+ * POST ${endpoint}/api/chat — Ollama's own protocol (non-streaming).
217
+ */
218
+ async function callOllamaNative(
219
+ endpoint: string,
220
+ model: string,
221
+ messages: ChatMessage[],
222
+ options?: ChatCompletionOptions,
223
+ ): Promise<ChatCompletionResult> {
224
+ const url = `${endpoint}/api/chat`;
225
+
226
+ const response = await fetch(url, {
227
+ method: "POST",
228
+ headers: { "Content-Type": "application/json" },
229
+ body: JSON.stringify({
230
+ model,
231
+ messages,
232
+ stream: false,
233
+ options: {
234
+ temperature: options?.temperature ?? 0.1,
235
+ num_predict: options?.maxTokens ?? 800,
236
+ ...(options?.stop ? { stop: options.stop } : {}),
237
+ ...(options?.frequencyPenalty != null ? { repeat_penalty: 1.0 + (options.frequencyPenalty ?? 0) } : {}),
238
+ },
239
+ }),
240
+ });
241
+
242
+ if (!response.ok) {
243
+ throw new Error(`Ollama native API error: ${response.status} ${response.statusText}`);
244
+ }
245
+
246
+ const data = (await response.json()) as {
247
+ message?: { content?: string };
248
+ prompt_eval_count?: number;
249
+ eval_count?: number;
250
+ };
251
+ let text = data.message?.content ?? "";
252
+ text = stripThinkingTags(text);
253
+
254
+ const promptTokens = data.prompt_eval_count ?? 0;
255
+ const outputTokens = data.eval_count ?? 0;
256
+ const usage: LlmUsageInfo | undefined = (promptTokens || outputTokens)
257
+ ? { input: promptTokens, output: outputTokens, total: promptTokens + outputTokens }
258
+ : undefined;
259
+
260
+ return { text, usage };
261
+ }
262
+
263
+ /** Strip <think>...</think> blocks emitted by reasoning models (MiniCPM, Qwen3, etc.) */
264
+ function stripThinkingTags(text: string): string {
265
+ let result = text.replace(/<think>[\s\S]*?<\/think>/g, "").trim();
266
+ const lastThinkClose = result.lastIndexOf("</think>");
267
+ if (lastThinkClose !== -1) {
268
+ result = result.slice(lastThinkClose + "</think>".length).trim();
269
+ }
270
+ return result;
271
+ }
272
+
273
+ /**
274
+ * Detect sensitivity level using a local model
275
+ */
276
+ export async function detectByLocalModel(
277
+ context: DetectionContext,
278
+ config: PrivacyConfig,
279
+ ): Promise<DetectionResult> {
280
+ // Check if local model is enabled
281
+ if (!config.localModel?.enabled) {
282
+ return {
283
+ level: "S1",
284
+ levelNumeric: 1,
285
+ reason: "Local model detection disabled",
286
+ detectorType: "localModelDetector",
287
+ confidence: 0,
288
+ };
289
+ }
290
+
291
+ try {
292
+ const { system, user } = buildDetectionMessages(context);
293
+ const result = await callLocalModel(system, user, config);
294
+ const parsed = parseModelResponse(result.text);
295
+
296
+ if (result.usage) {
297
+ const collector = getGlobalCollector();
298
+ collector?.record({
299
+ sessionKey: context.sessionKey ?? "",
300
+ provider: "edge",
301
+ model: config.localModel?.model ?? "unknown",
302
+ source: "router",
303
+ usage: result.usage,
304
+ });
305
+ }
306
+
307
+ return {
308
+ level: parsed.level,
309
+ levelNumeric: levelToNumeric(parsed.level),
310
+ reason: parsed.reason,
311
+ detectorType: "localModelDetector",
312
+ confidence: parsed.confidence ?? 0.8,
313
+ };
314
+ } catch (err) {
315
+ // If local model fails, return S1 (safe) but log the error
316
+ console.error("[ClawXrouter] Local model detection failed:", err);
317
+ return {
318
+ level: "S1",
319
+ levelNumeric: 1,
320
+ reason: `Local model error: ${String(err)}`,
321
+ detectorType: "localModelDetector",
322
+ confidence: 0,
323
+ };
324
+ }
325
+ }
326
+
327
+ /** Default detection system prompt (fallback if prompts/detection-system.md is missing) */
328
+ export const DEFAULT_DETECTION_SYSTEM_PROMPT = `[SYSTEM] You are a strict privacy classifier. Output ONLY a single JSON object — nothing else.
329
+
330
+ Classify by BOTH actual data AND intent. If the user asks to read/analyze a file, classify based on what the file WILL contain.
331
+
332
+ S3 = PRIVATE (local only, never cloud):
333
+ - Financial: payslip, salary, tax, bank account, SSN, 工资单, 报销单, 税表
334
+ - Medical: health records, diagnoses, prescriptions, lab results, 病历, 体检报告
335
+ - Credentials: passwords, API keys, secrets, tokens, private keys
336
+ - ANY request to read/analyze a file about the above topics → S3
337
+ "evaluate these passwords" → S3
338
+ "check my payslip" → S3
339
+ "summarize the medical record" → S3
340
+
341
+ S2 = SENSITIVE (redact PII, then send to cloud):
342
+ - Addresses (ANY physical address, 地址, 住址, street, road, apartment, 路, 街, 小区, 弄, 号)
343
+ - Gate/door codes, pickup codes, delivery tracking numbers
344
+ - Phone numbers, email addresses, real names used as contact PII
345
+ - License plates, SSN/ID mixed with personal context, chat logs with PII
346
+ - File content containing the above PII → S2
347
+ - ANY mention of "address"/"地址" with actual location data → S2
348
+ "1847 Elm St, gate code 4523#" → S2
349
+ "我的地址是北京市朝阳区xxx" → S2
350
+ "张伟 手机13912345678" → S2
351
+ "my address is 123 Main St" → S2
352
+
353
+ S1 = SAFE: No sensitive data or intent.
354
+ "write a poem about spring" → S1
355
+ "how to read Excel with pandas" → S1
356
+
357
+ Rules:
358
+ - Passwords/credentials → ALWAYS S3 (never S2)
359
+ - Medical data → ALWAYS S3 (never S2)
360
+ - Gate/access/pickup codes → S2 (not S3)
361
+ - If file content is provided and contains PII → at least S2
362
+ - When unsure → pick higher level
363
+
364
+ Output format: {"level":"S1|S2|S3","reason":"brief"}`;
365
+
366
+ /**
367
+ * Build separate system/user messages for the detection prompt.
368
+ *
369
+ * System instruction is loaded from prompts/detection-system.md (editable by users).
370
+ * The dynamic [CONTENT] block becomes the user message.
371
+ */
372
+ function buildDetectionMessages(context: DetectionContext): { system: string; user: string } {
373
+ const system = loadPrompt("detection-system", DEFAULT_DETECTION_SYSTEM_PROMPT);
374
+
375
+ const parts: string[] = ["[CONTENT]"];
376
+
377
+ if (context.message) {
378
+ parts.push(`Message: ${context.message.slice(0, 1500)}`);
379
+ }
380
+
381
+ if (context.toolName) {
382
+ parts.push(`Tool: ${context.toolName}`);
383
+ }
384
+
385
+ if (context.toolParams) {
386
+ const paramsStr = JSON.stringify(context.toolParams, null, 2);
387
+ parts.push(`Tool Parameters: ${paramsStr.slice(0, 800)}`);
388
+ }
389
+
390
+ if (context.toolResult) {
391
+ const resultStr =
392
+ typeof context.toolResult === "string"
393
+ ? context.toolResult
394
+ : JSON.stringify(context.toolResult);
395
+ parts.push(`Tool Result: ${resultStr.slice(0, 800)}`);
396
+ }
397
+
398
+ if (context.recentContext && context.recentContext.length > 0) {
399
+ parts.push(`Recent Context: ${context.recentContext.slice(-3).join(" | ")}`);
400
+ }
401
+
402
+ parts.push("[/CONTENT]");
403
+
404
+ return { system, user: parts.join("\n") };
405
+ }
406
+
407
+ /**
408
+ * Call local/edge model via the configured provider protocol.
409
+ * Dispatches to the correct API based on localModel.type.
410
+ * Returns both the text response and optional usage info for router overhead tracking.
411
+ */
412
+ async function callLocalModel(
413
+ systemPrompt: string,
414
+ userContent: string,
415
+ config: PrivacyConfig,
416
+ ): Promise<ChatCompletionResult> {
417
+ const model = config.localModel?.model ?? "openbmb/minicpm4.1";
418
+ const endpoint = config.localModel?.endpoint ?? "http://localhost:11434";
419
+ const providerType = config.localModel?.type ?? "openai-compatible";
420
+
421
+ const modelLower = model.toLowerCase();
422
+ const finalUser = modelLower.includes("qwen") ? `/no_think\n${userContent}` : userContent;
423
+
424
+ return await callChatCompletion(
425
+ endpoint,
426
+ model,
427
+ [
428
+ { role: "system", content: systemPrompt },
429
+ { role: "user", content: finalUser },
430
+ ],
431
+ {
432
+ temperature: 0.1,
433
+ maxTokens: 800,
434
+ apiKey: config.localModel?.apiKey,
435
+ providerType,
436
+ customModule: config.localModel?.module,
437
+ },
438
+ );
439
+ }
440
+
441
+ /**
442
+ * Two-step desensitization using a local model:
443
+ * Step 1: Model identifies PII items as a JSON array
444
+ * Step 2: Programmatic string replacement using the model's output
445
+ *
446
+ * Falls back to rule-based redaction if the local model is unavailable.
447
+ */
448
+ export async function desensitizeWithLocalModel(
449
+ content: string,
450
+ config: PrivacyConfig,
451
+ sessionKey?: string,
452
+ ): Promise<{ desensitized: string; wasModelUsed: boolean; failed?: boolean }> {
453
+ if (!config.localModel?.enabled) {
454
+ return { desensitized: content, wasModelUsed: false, failed: true };
455
+ }
456
+
457
+ try {
458
+ const endpoint = config.localModel?.endpoint ?? "http://localhost:11434";
459
+ const model = config.localModel?.model ?? "openbmb/minicpm4.1";
460
+ const providerType = config.localModel?.type ?? "openai-compatible";
461
+ const customModule = config.localModel?.module;
462
+
463
+ const piiItems = await extractPiiWithModel(endpoint, model, content, {
464
+ apiKey: config.localModel?.apiKey,
465
+ providerType,
466
+ customModule,
467
+ sessionKey,
468
+ });
469
+
470
+ if (piiItems.length === 0) {
471
+ return { desensitized: content, wasModelUsed: true };
472
+ }
473
+
474
+ // Step 2: Programmatic replacement
475
+ let redacted = content;
476
+ // Sort by value length descending to avoid partial replacements
477
+ const sorted = [...piiItems].sort((a, b) => b.value.length - a.value.length);
478
+ for (const item of sorted) {
479
+ if (!item.value || item.value.length < 2) continue;
480
+ const tag = mapPiiTypeToTag(item.type);
481
+ // Replace all occurrences of this value
482
+ redacted = replaceAll(redacted, item.value, tag);
483
+ }
484
+
485
+ return { desensitized: redacted, wasModelUsed: true };
486
+ } catch (err) {
487
+ console.error("[ClawXrouter] Local model desensitization failed:", err);
488
+ return { desensitized: content, wasModelUsed: false, failed: true };
489
+ }
490
+ }
491
+
492
+ /** Map model PII types to [REDACTED:...] tags */
493
+ function mapPiiTypeToTag(type: string): string {
494
+ const t = type.toUpperCase().replace(/\s+/g, "_");
495
+ const mapping: Record<string, string> = {
496
+ ADDRESS: "[REDACTED:ADDRESS]",
497
+ ACCESS_CODE: "[REDACTED:ACCESS_CODE]",
498
+ DELIVERY: "[REDACTED:DELIVERY]",
499
+ COURIER_NUMBER: "[REDACTED:DELIVERY]",
500
+ COURIER_NO: "[REDACTED:DELIVERY]",
501
+ COURIER_CODE: "[REDACTED:DELIVERY]",
502
+ TRACKING_NUMBER: "[REDACTED:DELIVERY]",
503
+ NAME: "[REDACTED:NAME]",
504
+ SENDER_NAME: "[REDACTED:NAME]",
505
+ RECIPIENT_NAME: "[REDACTED:NAME]",
506
+ PHONE: "[REDACTED:PHONE]",
507
+ SENDER_PHONE: "[REDACTED:PHONE]",
508
+ FACILITY_PHONE: "[REDACTED:PHONE]",
509
+ LANDLINE: "[REDACTED:PHONE]",
510
+ MOBILE: "[REDACTED:PHONE]",
511
+ EMAIL: "[REDACTED:EMAIL]",
512
+ ID: "[REDACTED:ID]",
513
+ ID_CARD: "[REDACTED:ID]",
514
+ ID_NUMBER: "[REDACTED:ID]",
515
+ CARD: "[REDACTED:CARD]",
516
+ BANK_CARD: "[REDACTED:CARD]",
517
+ CARD_NUMBER: "[REDACTED:CARD]",
518
+ SECRET: "[REDACTED:SECRET]",
519
+ PASSWORD: "[REDACTED:SECRET]",
520
+ API_KEY: "[REDACTED:SECRET]",
521
+ TOKEN: "[REDACTED:SECRET]",
522
+ IP: "[REDACTED:IP]",
523
+ LICENSE_PLATE: "[REDACTED:LICENSE]",
524
+ PLATE: "[REDACTED:LICENSE]",
525
+ TIME: "[REDACTED:TIME]",
526
+ DATE: "[REDACTED:DATE]",
527
+ SALARY: "[REDACTED:SALARY]",
528
+ AMOUNT: "[REDACTED:AMOUNT]",
529
+ };
530
+ return mapping[t] ?? `[REDACTED:${t}]`;
531
+ }
532
+
533
+ /** Simple replaceAll polyfill for older Node */
534
+ function replaceAll(str: string, search: string, replacement: string): string {
535
+ // Escape regex special chars in search string
536
+ const escaped = search.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
537
+ return str.replace(new RegExp(escaped, "g"), replacement);
538
+ }
539
+
540
+ /** Default PII extraction system prompt (fallback if prompts/pii-extraction.md is missing) */
541
+ export const DEFAULT_PII_EXTRACTION_PROMPT = `You are a PII extraction engine. Extract ALL PII (personally identifiable information) from the given text as a JSON array.
542
+
543
+ Types: NAME (every person), PHONE, ADDRESS (all variants including shortened), ACCESS_CODE (gate/door/门禁码), DELIVERY (tracking numbers, pickup codes/取件码), ID (SSN/身份证), CARD (bank/medical/insurance), LICENSE_PLATE (plate numbers/车牌), EMAIL, PASSWORD, PAYMENT (Venmo/PayPal/支付宝), BIRTHDAY, TIME (appointment/delivery times), NOTE (private instructions)
544
+
545
+ Important: Extract EVERY person's name and EVERY address variant.
546
+
547
+ Example:
548
+ Input: Alex lives at 123 Main St. Li Na phone 13912345678, gate code 1234#, card YB330-123, plate 京A12345, tracking SF123, Venmo @alex99
549
+ Output: [{"type":"NAME","value":"Alex"},{"type":"NAME","value":"Li Na"},{"type":"ADDRESS","value":"123 Main St"},{"type":"PHONE","value":"13912345678"},{"type":"ACCESS_CODE","value":"1234#"},{"type":"CARD","value":"YB330-123"},{"type":"LICENSE_PLATE","value":"京A12345"},{"type":"DELIVERY","value":"SF123"},{"type":"PAYMENT","value":"@alex99"}]
550
+
551
+ Output ONLY the JSON array — no explanation, no markdown fences.`;
552
+
553
+ /**
554
+ * Extract PII from content using local model via chat completions.
555
+ *
556
+ * Two-step approach: model identifies PII items as JSON, then we do
557
+ * programmatic string replacement. More reliable than asking models to rewrite.
558
+ */
559
+ async function extractPiiWithModel(
560
+ endpoint: string,
561
+ model: string,
562
+ content: string,
563
+ opts?: { apiKey?: string; providerType?: EdgeProviderType; customModule?: string; sessionKey?: string },
564
+ ): Promise<Array<{ type: string; value: string }>> {
565
+ const textSnippet = content.slice(0, 3000);
566
+
567
+ const systemPrompt = loadPromptWithVars("pii-extraction", DEFAULT_PII_EXTRACTION_PROMPT, {
568
+ CONTENT: textSnippet,
569
+ });
570
+
571
+ const promptHasContent = systemPrompt.includes(textSnippet) && textSnippet.length > 10;
572
+ const userMessage = promptHasContent
573
+ ? "Extract all PII from the text above. Output ONLY the JSON array."
574
+ : textSnippet;
575
+
576
+ const result = await callChatCompletion(
577
+ endpoint,
578
+ model,
579
+ [
580
+ { role: "system", content: systemPrompt },
581
+ { role: "user", content: userMessage },
582
+ ],
583
+ {
584
+ temperature: 0.0,
585
+ maxTokens: 2500,
586
+ stop: ["Input:", "Task:"],
587
+ apiKey: opts?.apiKey,
588
+ providerType: opts?.providerType,
589
+ customModule: opts?.customModule,
590
+ },
591
+ );
592
+
593
+ if (result.usage) {
594
+ const collector = getGlobalCollector();
595
+ collector?.record({
596
+ sessionKey: opts?.sessionKey ?? "",
597
+ provider: "edge",
598
+ model,
599
+ source: "router",
600
+ usage: result.usage,
601
+ });
602
+ }
603
+
604
+ return parsePiiJson(result.text);
605
+ }
606
+
607
+ /** Parse the model's PII extraction output into structured items */
608
+ function parsePiiJson(raw: string): Array<{ type: string; value: string }> {
609
+ // Normalize whitespace (model may use newlines between items)
610
+ let cleaned = raw.replace(/\s+/g, " ").trim();
611
+
612
+ // Strip markdown code fences if present
613
+ cleaned = cleaned
614
+ .replace(/^```(?:json)?\s*/i, "")
615
+ .replace(/\s*```$/i, "")
616
+ .trim();
617
+
618
+ // Find the JSON array in the output
619
+ const arrayStart = cleaned.indexOf("[");
620
+ if (arrayStart < 0) return [];
621
+ let jsonStr = cleaned.slice(arrayStart);
622
+
623
+ // Find the last ] to cut off any trailing garbage
624
+ const lastBracket = jsonStr.lastIndexOf("]");
625
+ if (lastBracket >= 0) {
626
+ jsonStr = jsonStr.slice(0, lastBracket + 1);
627
+ } else {
628
+ const lastCloseBrace = jsonStr.lastIndexOf("}");
629
+ if (lastCloseBrace >= 0) {
630
+ jsonStr = jsonStr.slice(0, lastCloseBrace + 1) + "]";
631
+ } else {
632
+ return [];
633
+ }
634
+ }
635
+
636
+ // Fix trailing commas before ]
637
+ jsonStr = jsonStr.replace(/,\s*\]/g, "]");
638
+
639
+ // Normalize Python-style single-quoted JSON to double-quoted JSON.
640
+ // Some local models output {'key': 'value'} instead of {"key": "value"}.
641
+ jsonStr = jsonStr
642
+ .replace(/(?<=[\[,{]\s*)'([^']+?)'(?=\s*:)/g, '"$1"')
643
+ .replace(/(?<=:\s*)'([^']*?)'(?=\s*[,}\]])/g, '"$1"');
644
+
645
+ try {
646
+ const arr = JSON.parse(jsonStr);
647
+ if (!Array.isArray(arr)) return [];
648
+ const items = arr.filter(
649
+ (item: unknown) =>
650
+ item &&
651
+ typeof item === "object" &&
652
+ typeof (item as Record<string, unknown>).type === "string" &&
653
+ typeof (item as Record<string, unknown>).value === "string",
654
+ ) as Array<{ type: string; value: string }>;
655
+ return items;
656
+ } catch {
657
+ console.error("[ClawXrouter] Failed to parse PII extraction JSON:", jsonStr.slice(0, 300));
658
+ return [];
659
+ }
660
+ }
661
+
662
+ /**
663
+ * Parse model response to extract sensitivity level
664
+ */
665
+ function parseModelResponse(response: string): {
666
+ level: SensitivityLevel;
667
+ reason?: string;
668
+ confidence?: number;
669
+ } {
670
+ try {
671
+ // Try to find JSON in the response
672
+ const jsonMatch = response.match(/\{[\s\S]*?\}/);
673
+ if (jsonMatch) {
674
+ const parsed = JSON.parse(jsonMatch[0]) as {
675
+ level?: string;
676
+ reason?: string;
677
+ confidence?: number;
678
+ };
679
+
680
+ // Validate level
681
+ const level = parsed.level?.toUpperCase();
682
+ if (level === "S1" || level === "S2" || level === "S3") {
683
+ return {
684
+ level: level as SensitivityLevel,
685
+ reason: parsed.reason,
686
+ confidence: parsed.confidence,
687
+ };
688
+ }
689
+ }
690
+
691
+ // Fallback: look for level mentions in text
692
+ const upperResponse = response.toUpperCase();
693
+ if (upperResponse.includes("S3") || upperResponse.includes("PRIVATE")) {
694
+ return {
695
+ level: "S3",
696
+ reason: "Detected from text analysis",
697
+ confidence: 0.6,
698
+ };
699
+ }
700
+ if (upperResponse.includes("S2") || upperResponse.includes("SENSITIVE")) {
701
+ return {
702
+ level: "S2",
703
+ reason: "Detected from text analysis",
704
+ confidence: 0.6,
705
+ };
706
+ }
707
+
708
+ // Default to S1 if unable to parse
709
+ return {
710
+ level: "S1",
711
+ reason: "Unable to parse model response",
712
+ confidence: 0.3,
713
+ };
714
+ } catch (err) {
715
+ console.error("[ClawXrouter] Error parsing model response:", err);
716
+ return {
717
+ level: "S1",
718
+ reason: "Parse error",
719
+ confidence: 0,
720
+ };
721
+ }
722
+ }
723
+