@openbmb/clawxrouter 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/hooks.ts ADDED
@@ -0,0 +1,1428 @@
1
+ import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry";
2
+ import * as fs from "node:fs";
3
+ import * as path from "node:path";
4
+ import type { PrivacyConfig } from "./types.js";
5
+ import {
6
+ buildMainSessionPlaceholder,
7
+ getGuardAgentConfig,
8
+ isGuardSessionKey,
9
+ } from "./guard-agent.js";
10
+ import { desensitizeWithLocalModel } from "./local-model.js";
11
+ import { syncDetectByLocalModel } from "./sync-detect.js";
12
+ import { syncDesensitizeWithLocalModel } from "./sync-desensitize.js";
13
+ import { getDefaultMemoryManager, GUARD_SECTION_BEGIN, GUARD_SECTION_END } from "./memory-isolation.js";
14
+ import { loadPrompt } from "./prompt-loader.js";
15
+ import { DualSessionManager, getDefaultSessionManager, type SessionMessage } from "./session-manager.js";
16
+ import {
17
+ markSessionAsPrivate,
18
+ trackSessionLevel,
19
+ recordDetection,
20
+ notifyDetectionStart,
21
+ notifyGenerating,
22
+ notifyLlmComplete,
23
+ notifyInputEstimate,
24
+ isSessionMarkedPrivate,
25
+ stashDetection,
26
+ getPendingDetection,
27
+ consumeDetection,
28
+ setActiveLocalRouting,
29
+ clearActiveLocalRouting,
30
+ clearSessionState,
31
+ isActiveLocalRouting,
32
+ resetTurnLevel,
33
+ setSessionRouteLevel,
34
+ getSessionRouteLevel,
35
+ startNewLoop,
36
+ getCurrentLoopId,
37
+ stashDesensitizedToolResult,
38
+ setLoopRouting,
39
+ } from "./session-state.js";
40
+ import { detectByRules } from "./rules.js";
41
+ import { isProtectedMemoryPath, redactSensitiveInfo, extractPathsFromParams } from "./utils.js";
42
+ import {
43
+ CLAWXROUTER_S2_OPEN,
44
+ CLAWXROUTER_S2_CLOSE,
45
+ } from "./privacy-proxy.js";
46
+ import { getGlobalPipeline } from "./router-pipeline.js";
47
+ import { getGlobalCollector, lookupPricing } from "./token-stats.js";
48
+ import { getLiveConfig } from "./live-config.js";
49
+ import { ensureModelMirrored, resolveOriginalProvider } from "./provider.js";
50
+
51
+ function getPipelineConfig(): Record<string, unknown> {
52
+ return { privacy: getLiveConfig() };
53
+ }
54
+
55
+ /**
56
+ * Should this session read from the full (unredacted) memory track?
57
+ *
58
+ * Only sessions whose data stays entirely local may access MEMORY-FULL.md:
59
+ * - S3 active local routing (Guard Agent turn)
60
+ * - Guard sub-sessions (always local)
61
+ * - S2 with s2Policy === "local"
62
+ *
63
+ * S2-proxy sessions send data to cloud after desensitisation, so they MUST
64
+ * read from the clean (already-redacted) MEMORY.md to avoid leaking PII
65
+ * that regex-based tool_result_persist redaction might miss.
66
+ */
67
+ function shouldUseFullMemoryTrack(sessionKey: string): boolean {
68
+ if (isActiveLocalRouting(sessionKey)) return true;
69
+ if (isGuardSessionKey(sessionKey)) return true;
70
+ if (isSessionMarkedPrivate(sessionKey)) {
71
+ const policy = getLiveConfig().s2Policy ?? "proxy";
72
+ return policy === "local";
73
+ }
74
+ return false;
75
+ }
76
+
77
+ const DEFAULT_GUARD_AGENT_SYSTEM_PROMPT = `You are a privacy-aware analyst. Analyze the data the user provides. Do your job.
78
+
79
+ RULES:
80
+ 1. Analyze the data directly. Do NOT write code. Do NOT generate programming examples or tutorials.
81
+ 2. NEVER echo raw sensitive values (exact salary, SSN, bank account, password). Use generic references like "your base salary", "the SSN on file", etc.
82
+ 3. You MAY discuss percentages, ratios, whether deductions are correct, anomalies, and recommendations.
83
+ 4. Reply ONCE, then stop. No [message_id:] tags. No multi-turn simulation.
84
+ 5. **Language rule: Reply in the SAME language the user writes in.** If the user writes in Chinese, reply entirely in Chinese. If the user writes in English, reply entirely in English.
85
+ 6. Be concise and professional.
86
+
87
+ 语言规则:必须使用与用户相同的语言回复。如果用户用中文提问,你必须用中文回答。`;
88
+
89
+ function getGuardAgentSystemPrompt(): string {
90
+ return loadPrompt("guard-agent-system", DEFAULT_GUARD_AGENT_SYSTEM_PROMPT);
91
+ }
92
+
93
+ /**
94
+ * Check if a tool is exempt from privacy pipeline detection and PII redaction.
95
+ * Reads from the live config `toolAllowlist` (default: empty = no exemptions).
96
+ */
97
+ function isToolAllowlisted(toolName: string): boolean {
98
+ const allowlist = getLiveConfig().toolAllowlist;
99
+ if (!allowlist || allowlist.length === 0) return false;
100
+ return allowlist.includes(toolName);
101
+ }
102
+
103
+ /**
104
+ * Resolve a usable session key from the hook context.
105
+ *
106
+ * OpenClaw passes `ctx.sessionKey` when the session was resolved from a
107
+ * channel sender (--to / Telegram / Discord …). But when the caller only
108
+ * supplies `--session-id` (e.g. the `openclaw agent` CLI), sessionKey can
109
+ * be `undefined`. Fall back to `sessionId` so ClawXrouter detection still
110
+ * runs in that scenario.
111
+ */
112
+ function resolveHookSessionKey(ctx: { sessionKey?: string; sessionId?: string }): string {
113
+ return ctx.sessionKey || ctx.sessionId || "";
114
+ }
115
+
116
+ // Workspace dir cache — set from first hook that has PluginHookAgentContext
117
+ let _cachedWorkspaceDir: string | undefined;
118
+
119
+ export function registerHooks(api: OpenClawPluginApi): void {
120
+ const privacyCfgInit = getLiveConfig();
121
+ const sessionBaseDir = privacyCfgInit.session?.baseDir;
122
+
123
+ const memoryManager = getDefaultMemoryManager();
124
+ memoryManager.initializeDirectories().catch((err) => {
125
+ api.logger.error(`[ClawXrouter] Failed to initialize memory directories: ${String(err)}`);
126
+ });
127
+
128
+ getDefaultSessionManager(sessionBaseDir);
129
+
130
+ api.on("before_model_resolve", async (event, ctx) => {
131
+ try {
132
+ const { prompt } = event;
133
+ const sessionKey = resolveHookSessionKey(ctx);
134
+ if (!sessionKey || !prompt) return;
135
+
136
+ clearActiveLocalRouting(sessionKey);
137
+ resetTurnLevel(sessionKey);
138
+ consumeDetection(sessionKey);
139
+ const loopId = startNewLoop(sessionKey, String(prompt));
140
+ notifyDetectionStart(sessionKey, "onUserMessage", loopId);
141
+
142
+ const privacyConfig = getLiveConfig();
143
+ if (!privacyConfig.enabled) return;
144
+
145
+ if (isGuardSessionKey(sessionKey)) {
146
+ const guardCfg = getGuardAgentConfig(privacyConfig);
147
+ if (guardCfg) {
148
+ return { providerOverride: guardCfg.provider, modelOverride: guardCfg.modelName };
149
+ }
150
+ return;
151
+ }
152
+
153
+ if (ctx.workspaceDir) _cachedWorkspaceDir = ctx.workspaceDir;
154
+
155
+ const rawMsg = String(prompt);
156
+ if (shouldSkipMessage(rawMsg)) return;
157
+ const msgStr = stripTimestampPrefix(rawMsg);
158
+
159
+ // ── S3 fast path: rule-based pre-check ──────────────────────────
160
+ // Rules are synchronous and deterministic. When they detect S3 we
161
+ // can route to the local model immediately — no need to run the
162
+ // full pipeline (LLM detector, token-saver, custom routers, etc.)
163
+ // which would waste compute and needlessly expose sensitive content.
164
+ const rulePreCheck = detectByRules(
165
+ { checkpoint: "onUserMessage", message: msgStr, sessionKey },
166
+ privacyConfig,
167
+ );
168
+
169
+ if (rulePreCheck.level === "S3") {
170
+ recordDetection(sessionKey, "S3", "onUserMessage", rulePreCheck.reason);
171
+ trackSessionLevel(sessionKey, "S3");
172
+ setActiveLocalRouting(sessionKey);
173
+ stashDetection(sessionKey, {
174
+ level: "S3",
175
+ reason: rulePreCheck.reason,
176
+ originalPrompt: msgStr,
177
+ timestamp: Date.now(),
178
+ });
179
+
180
+ const guardCfg = getGuardAgentConfig(privacyConfig);
181
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
182
+ const provider = guardCfg?.provider ?? defaultProvider;
183
+ const model = guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1";
184
+ api.logger.info(`[ClawXrouter] S3 (rule fast-path) — routing to ${provider}/${model}`);
185
+ return { providerOverride: provider, modelOverride: model };
186
+ }
187
+
188
+ // ── Normal path: run the full router pipeline ──────────────────
189
+ const pipeline = getGlobalPipeline();
190
+ if (!pipeline) {
191
+ api.logger.warn("[ClawXrouter] Router pipeline not initialized");
192
+ return;
193
+ }
194
+
195
+ const defaults = api.config.agents?.defaults as Record<string, unknown> | undefined;
196
+ const primaryModel = (defaults?.model as Record<string, unknown> | undefined)?.primary as string ?? "";
197
+ const defaultProvider = (defaults?.provider as string) || primaryModel.split("/")[0] || "openai";
198
+
199
+ const decision = await pipeline.run(
200
+ "onUserMessage",
201
+ {
202
+ checkpoint: "onUserMessage",
203
+ message: msgStr,
204
+ sessionKey,
205
+ agentId: ctx.agentId,
206
+ },
207
+ getPipelineConfig(),
208
+ );
209
+
210
+ recordDetection(sessionKey, decision.level, "onUserMessage", decision.reason,
211
+ decision.routerId, decision.action, decision.target ? `${decision.target.provider}/${decision.target.model}` : undefined);
212
+ setSessionRouteLevel(sessionKey, decision.level);
213
+
214
+ if (decision.routerId === "token-saver" && decision.reason?.startsWith("tier=")) {
215
+ const tier = decision.reason.split("=")[1];
216
+ setLoopRouting(sessionKey, tier,
217
+ decision.target ? `${decision.target.provider}/${decision.target.model}` : undefined,
218
+ decision.action ?? "passthrough");
219
+ }
220
+ api.logger.info(`[ClawXrouter] ROUTE: session=${sessionKey} level=${decision.level} action=${decision.action} target=${JSON.stringify(decision.target)} reason=${decision.reason}`);
221
+
222
+ if (decision.action !== "block") {
223
+ notifyGenerating(sessionKey, "onUserMessage", decision.level, decision.routerId, decision.action,
224
+ decision.target ? `${decision.target.provider}/${decision.target.model}` : undefined, decision.reason);
225
+ }
226
+
227
+ // S1: ALL S1 traffic routes through proxy for defense-in-depth
228
+ // (schema cleaning, regex PII scan). Token-saver may redirect to a
229
+ // different model — we honour the model choice but still proxy.
230
+ if (decision.level === "S1") {
231
+ const targetModel = decision.target?.model;
232
+ const targetOriginalProvider = decision.target?.provider !== "clawxrouter-privacy"
233
+ ? decision.target?.provider : undefined;
234
+ const originalProv = targetOriginalProvider
235
+ ?? (targetModel ? resolveOriginalProvider(api.config as Record<string, unknown>, targetModel, defaultProvider) : defaultProvider);
236
+ if (targetModel) {
237
+ ensureModelMirrored(
238
+ api.config as Record<string, unknown>,
239
+ targetModel,
240
+ originalProv,
241
+ () => { try { return api.runtime.config.loadConfig(); } catch { return undefined; } },
242
+ );
243
+ }
244
+ return {
245
+ providerOverride: "clawxrouter-privacy",
246
+ ...(targetModel ? { modelOverride: targetModel } : {}),
247
+ };
248
+ }
249
+
250
+ // S3 from LLM detector (rules didn't catch it above): route to local
251
+ if (decision.level === "S3") {
252
+ trackSessionLevel(sessionKey, "S3");
253
+ setActiveLocalRouting(sessionKey);
254
+ stashDetection(sessionKey, {
255
+ level: "S3",
256
+ reason: decision.reason,
257
+ originalPrompt: msgStr,
258
+ timestamp: Date.now(),
259
+ });
260
+ if (decision.target) {
261
+ api.logger.info(`[ClawXrouter] S3 — routing to ${decision.target.provider}/${decision.target.model} [${decision.routerId}]`);
262
+ return {
263
+ providerOverride: decision.target.provider,
264
+ ...(decision.target.model ? { modelOverride: decision.target.model } : {}),
265
+ };
266
+ }
267
+ const guardCfg = getGuardAgentConfig(privacyConfig);
268
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
269
+ api.logger.info(`[ClawXrouter] S3 — routing to ${guardCfg?.provider ?? defaultProvider}/${guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1"} [${decision.routerId}]`);
270
+ return {
271
+ providerOverride: guardCfg?.provider ?? defaultProvider,
272
+ modelOverride: guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1",
273
+ };
274
+ }
275
+
276
+ // Desensitize for S2 (needed for both proxy markers and local prompt).
277
+ // If desensitization fails (local model down), escalate to S3 so the
278
+ // message stays entirely local — never send raw PII to cloud.
279
+ let desensitized: string | undefined;
280
+ if (decision.level === "S2") {
281
+ const result = await desensitizeWithLocalModel(msgStr, privacyConfig, sessionKey);
282
+ if (result.failed) {
283
+ api.logger.warn("[ClawXrouter] S2 desensitization failed — escalating to S3 (local-only) to prevent PII leak");
284
+ trackSessionLevel(sessionKey, "S3");
285
+ setActiveLocalRouting(sessionKey);
286
+ stashDetection(sessionKey, {
287
+ level: "S3",
288
+ reason: `${decision.reason}; desensitization failed — escalated to S3`,
289
+ originalPrompt: msgStr,
290
+ timestamp: Date.now(),
291
+ });
292
+ const guardCfg = getGuardAgentConfig(privacyConfig);
293
+ const fallbackProvider = privacyConfig.localModel?.provider ?? "ollama";
294
+ return {
295
+ providerOverride: guardCfg?.provider ?? fallbackProvider,
296
+ modelOverride: guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1",
297
+ };
298
+ }
299
+ desensitized = result.desensitized;
300
+ }
301
+
302
+ // Stash decision for before_prompt_build / before_message_write
303
+ stashDetection(sessionKey, {
304
+ level: decision.level,
305
+ reason: decision.reason,
306
+ desensitized,
307
+ originalPrompt: msgStr,
308
+ timestamp: Date.now(),
309
+ });
310
+
311
+ // S2-local: route to edge model
312
+ if (decision.level === "S2" && decision.action === "redirect" && decision.target?.provider !== "clawxrouter-privacy") {
313
+ markSessionAsPrivate(sessionKey, decision.level);
314
+ if (decision.target) {
315
+ api.logger.info(`[ClawXrouter] S2 — routing to ${decision.target.provider}/${decision.target.model} [${decision.routerId}]`);
316
+ return {
317
+ providerOverride: decision.target.provider,
318
+ ...(decision.target.model ? { modelOverride: decision.target.model } : {}),
319
+ };
320
+ }
321
+ }
322
+
323
+ // S2-proxy: route through privacy proxy (model-keyed map handles upstream)
324
+ if (decision.level === "S2" && decision.target?.provider === "clawxrouter-privacy") {
325
+ markSessionAsPrivate(sessionKey, "S2");
326
+ const targetModel = decision.target.model;
327
+ const actualProvider = decision.target.originalProvider
328
+ ?? (targetModel ? resolveOriginalProvider(api.config as Record<string, unknown>, targetModel, defaultProvider) : defaultProvider);
329
+ if (targetModel) {
330
+ ensureModelMirrored(
331
+ api.config as Record<string, unknown>,
332
+ targetModel,
333
+ actualProvider,
334
+ () => { try { return api.runtime.config.loadConfig(); } catch { return undefined; } },
335
+ );
336
+ }
337
+ api.logger.info(`[ClawXrouter] S2 — routing through privacy proxy${targetModel ? ` (model=${targetModel})` : ""} [${decision.routerId}]`);
338
+ return {
339
+ providerOverride: "clawxrouter-privacy",
340
+ ...(targetModel ? { modelOverride: targetModel } : {}),
341
+ };
342
+ }
343
+
344
+ // Non-privacy routers may return redirect with a custom target
345
+ if (decision.action === "redirect" && decision.target) {
346
+ api.logger.info(`[ClawXrouter] ${decision.level} — custom route to ${decision.target.provider}/${decision.target.model} [${decision.routerId}]`);
347
+ return {
348
+ providerOverride: decision.target.provider,
349
+ ...(decision.target.model ? { modelOverride: decision.target.model } : {}),
350
+ };
351
+ }
352
+
353
+ // Block action at model resolve level → route to edge model as safeguard
354
+ if (decision.action === "block") {
355
+ if (decision.level === "S3") {
356
+ trackSessionLevel(sessionKey, "S3");
357
+ setActiveLocalRouting(sessionKey);
358
+ } else {
359
+ markSessionAsPrivate(sessionKey, decision.level);
360
+ }
361
+ const guardCfg = getGuardAgentConfig(privacyConfig);
362
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
363
+ api.logger.warn(`[ClawXrouter] ${decision.level} BLOCK — redirecting to edge model [${decision.routerId}]`);
364
+ return {
365
+ providerOverride: guardCfg?.provider ?? defaultProvider,
366
+ modelOverride: guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1",
367
+ };
368
+ }
369
+
370
+ // Transform action: the router rewrote the prompt content.
371
+ // For S2/S3 we must still route safely — use the transformed content
372
+ // as the desensitized payload and route through the appropriate path.
373
+ if (decision.action === "transform") {
374
+ if (decision.level === "S3") {
375
+ trackSessionLevel(sessionKey, "S3");
376
+ setActiveLocalRouting(sessionKey);
377
+ stashDetection(sessionKey, {
378
+ level: "S3",
379
+ reason: decision.reason,
380
+ originalPrompt: msgStr,
381
+ timestamp: Date.now(),
382
+ });
383
+ const guardCfg = getGuardAgentConfig(privacyConfig);
384
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
385
+ api.logger.info(`[ClawXrouter] S3 TRANSFORM — routing to edge model [${decision.routerId}]`);
386
+ return {
387
+ providerOverride: guardCfg?.provider ?? defaultProvider,
388
+ modelOverride: guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1",
389
+ };
390
+ }
391
+
392
+ if (decision.level === "S2") {
393
+ const transformedText = decision.transformedContent ?? desensitized ?? msgStr;
394
+ stashDetection(sessionKey, {
395
+ level: "S2",
396
+ reason: decision.reason,
397
+ desensitized: transformedText,
398
+ originalPrompt: msgStr,
399
+ timestamp: Date.now(),
400
+ });
401
+ markSessionAsPrivate(sessionKey, "S2");
402
+
403
+ const s2Policy = privacyConfig.s2Policy ?? "proxy";
404
+ if (s2Policy === "local") {
405
+ const guardCfg = getGuardAgentConfig(privacyConfig);
406
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
407
+ api.logger.info(`[ClawXrouter] S2 TRANSFORM — routing to local ${guardCfg?.provider ?? defaultProvider} [${decision.routerId}]`);
408
+ return {
409
+ providerOverride: guardCfg?.provider ?? defaultProvider,
410
+ modelOverride: guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1",
411
+ };
412
+ }
413
+
414
+ const transformModel = decision.target?.model;
415
+ const transformActualProvider = decision.target?.originalProvider
416
+ ?? (transformModel ? resolveOriginalProvider(api.config as Record<string, unknown>, transformModel, defaultProvider) : defaultProvider);
417
+ if (transformModel) {
418
+ ensureModelMirrored(
419
+ api.config as Record<string, unknown>,
420
+ transformModel,
421
+ transformActualProvider,
422
+ () => { try { return api.runtime.config.loadConfig(); } catch { return undefined; } },
423
+ );
424
+ }
425
+ const transformModelInfo = transformModel ? ` (model=${transformModel})` : "";
426
+ api.logger.info(`[ClawXrouter] S2 TRANSFORM — routing through privacy proxy${transformModelInfo} [${decision.routerId}]`);
427
+ return {
428
+ providerOverride: "clawxrouter-privacy",
429
+ ...(transformModel ? { modelOverride: transformModel } : {}),
430
+ };
431
+ }
432
+
433
+ // S1 + transform: route through proxy for defense-in-depth
434
+ return { providerOverride: "clawxrouter-privacy" };
435
+ }
436
+
437
+ // Default: route through proxy for defense-in-depth
438
+ return { providerOverride: "clawxrouter-privacy" };
439
+ } catch (err) {
440
+ api.logger.error(`[ClawXrouter] Error in before_model_resolve hook: ${String(err)}`);
441
+ }
442
+ });
443
+
444
+ api.on("before_prompt_build", async (_event, ctx) => {
445
+ try {
446
+ const sessionKey = resolveHookSessionKey(ctx);
447
+ if (!sessionKey) return;
448
+
449
+ const pending = getPendingDetection(sessionKey);
450
+ if (!pending || pending.level === "S1") return;
451
+
452
+ const privacyConfig = getLiveConfig();
453
+ const sessionCfg = privacyConfig.session ?? {};
454
+ const shouldInject = sessionCfg.injectDualHistory !== false
455
+ && sessionCfg.isolateGuardHistory !== false;
456
+ const historyLimit = sessionCfg.historyLimit ?? 20;
457
+
458
+ // S3: data processed entirely locally. Inject full-track history
459
+ // so the local model sees previous S3 interactions that were replaced
460
+ // by "🔒 [Private content]" placeholders in the main transcript.
461
+ if (pending.level === "S3") {
462
+ if (shouldInject) {
463
+ const context = await loadDualTrackContext(sessionKey, ctx.agentId, historyLimit);
464
+ if (context) {
465
+ api.logger.info(`[ClawXrouter] Injected dual-track history context for S3 turn`);
466
+ return { prependContext: context };
467
+ }
468
+ }
469
+ return;
470
+ }
471
+
472
+ const s2Policy = privacyConfig.s2Policy ?? "proxy";
473
+
474
+ // S2-local: data stays on-device — inject full-track history for richer context.
475
+ if (pending.level === "S2" && s2Policy === "local") {
476
+ if (shouldInject) {
477
+ const context = await loadDualTrackContext(sessionKey, ctx.agentId, historyLimit);
478
+ if (context) {
479
+ api.logger.info(`[ClawXrouter] Injected dual-track history context for S2-local turn`);
480
+ return { prependContext: context };
481
+ }
482
+ }
483
+ return;
484
+ }
485
+
486
+ // S2-proxy: inject desensitized content wrapped in markers for privacy-proxy to strip.
487
+ //
488
+ // SAFETY CONTRACT: OpenClaw's before_prompt_build `prependContext` prepends
489
+ // text directly to the user prompt string (see plugin.md §Prompt build order).
490
+ // The resulting message content becomes:
491
+ // "<clawxrouter-s2>\n{desensitized}\n</clawxrouter-s2>\n\n{original PII}"
492
+ // The proxy's stripPiiMarkers() replaces the ENTIRE content with only the text
493
+ // between markers, effectively discarding the original PII that follows.
494
+ // If OpenClaw ever changes prependContext semantics (e.g. to a separate message),
495
+ // the proxy's fallback regex redaction provides defense-in-depth.
496
+ if (pending.level === "S2" && pending.desensitized) {
497
+ return {
498
+ prependContext: `${CLAWXROUTER_S2_OPEN}\n${pending.desensitized}\n${CLAWXROUTER_S2_CLOSE}`,
499
+ };
500
+ }
501
+ } catch (err) {
502
+ api.logger.error(`[ClawXrouter] Error in before_prompt_build hook: ${String(err)}`);
503
+ }
504
+ });
505
+
506
+ api.on("before_tool_call", async (event, ctx) => {
507
+ try {
508
+ const { toolName, params } = event;
509
+ const sessionKey = resolveHookSessionKey(ctx);
510
+ if (!toolName) return;
511
+
512
+ const typedParams = params as Record<string, unknown>;
513
+ const privacyConfig = getLiveConfig();
514
+ if (!privacyConfig.enabled || !privacyConfig.routers?.privacy?.enabled) {
515
+ recordDetection(sessionKey, "S1", "onToolCallProposed", `tool: ${toolName}`);
516
+ return;
517
+ }
518
+ const baseDir = privacyConfig.session?.baseDir ?? "~/.openclaw";
519
+
520
+ // File-access guard for cloud models only — local models (Guard Agent
521
+ // sessions and S3 active routing) are trusted to read full history.
522
+ if (!isGuardSessionKey(sessionKey) && !isActiveLocalRouting(sessionKey)) {
523
+ const pathValues = extractPathsFromParams(typedParams);
524
+ for (const p of pathValues) {
525
+ if (isProtectedMemoryPath(p, baseDir)) {
526
+ api.logger.warn(`[ClawXrouter] BLOCKED: cloud model tried to access protected path: ${p}`);
527
+ return { block: true, blockReason: `ClawXrouter: access to full history/memory is restricted for cloud models (${p})` };
528
+ }
529
+ }
530
+ }
531
+
532
+ // Memory read routing: only fully-local sessions read from MEMORY-FULL.md.
533
+ // S2-proxy sessions stay on the clean track to avoid leaking PII to cloud.
534
+ if (toolName === "memory_get" && shouldUseFullMemoryTrack(sessionKey)) {
535
+ const p = String(typedParams.path ?? "");
536
+ if (p === "MEMORY.md" || p === "memory.md") {
537
+ return { params: { ...typedParams, path: "MEMORY-FULL.md" } };
538
+ }
539
+ if (p.startsWith("memory/")) {
540
+ return { params: { ...typedParams, path: p.replace(/^memory\//, "memory-full/") } };
541
+ }
542
+ }
543
+
544
+ // Subagent / A2A guard (rule-based only — no LLM detector overhead)
545
+ const isSpawn = toolName === "sessions_spawn";
546
+ const isSend = toolName === "sessions_send";
547
+ if (isSpawn || isSend) {
548
+ const contentField = isSpawn ? String(typedParams?.task ?? "") : String(typedParams?.message ?? "");
549
+ if (contentField.trim()) {
550
+ const ruleResult = detectByRules(
551
+ { checkpoint: "onToolCallProposed", message: contentField, toolName, toolParams: typedParams, sessionKey },
552
+ privacyConfig,
553
+ );
554
+ recordDetection(sessionKey, ruleResult.level, "onToolCallProposed", ruleResult.reason);
555
+
556
+ if (ruleResult.level === "S3") {
557
+ trackSessionLevel(sessionKey, "S3");
558
+ return { block: true, blockReason: `ClawXrouter: ${isSpawn ? "subagent task" : "A2A message"} blocked — S3 (${ruleResult.reason ?? "sensitive"})` };
559
+ }
560
+ if (ruleResult.level === "S2") {
561
+ markSessionAsPrivate(sessionKey, "S2");
562
+ }
563
+ }
564
+ }
565
+
566
+ // General tool call detection.
567
+ // S3 local routing: the model is already local — re-running detection
568
+ // would block the very tool calls the local model needs.
569
+ // Internal infrastructure tools are also exempt from detection.
570
+ //
571
+ // Detection method is config-driven: when onToolCallProposed includes
572
+ // "localModelDetector" the full pipeline runs (LLM + rules); otherwise
573
+ // only fast rule-based detection is used (default).
574
+ if (!isActiveLocalRouting(sessionKey) && !isToolAllowlisted(toolName)) {
575
+ const detectors = privacyConfig.checkpoints?.onToolCallProposed ?? ["ruleDetector"];
576
+ const usePipeline = detectors.includes("localModelDetector");
577
+ let level: "S1" | "S2" | "S3" = "S1";
578
+ let reason: string | undefined;
579
+
580
+ if (usePipeline) {
581
+ const pipeline = getGlobalPipeline();
582
+ if (pipeline) {
583
+ const decision = await pipeline.run(
584
+ "onToolCallProposed",
585
+ { checkpoint: "onToolCallProposed", toolName, toolParams: typedParams, sessionKey },
586
+ getPipelineConfig(),
587
+ );
588
+ level = decision.level;
589
+ reason = decision.reason;
590
+ }
591
+ } else {
592
+ const ruleResult = detectByRules(
593
+ { checkpoint: "onToolCallProposed", toolName, toolParams: typedParams, sessionKey },
594
+ privacyConfig,
595
+ );
596
+ level = ruleResult.level;
597
+ reason = ruleResult.reason;
598
+ }
599
+
600
+ recordDetection(sessionKey, level, "onToolCallProposed", reason);
601
+
602
+ if (level === "S3") {
603
+ trackSessionLevel(sessionKey, "S3");
604
+ return { block: true, blockReason: `ClawXrouter: tool "${toolName}" blocked — S3 (${reason ?? "sensitive"})` };
605
+ }
606
+ if (level === "S2") {
607
+ markSessionAsPrivate(sessionKey, "S2");
608
+ }
609
+ }
610
+ } catch (err) {
611
+ api.logger.error(`[ClawXrouter] Error in before_tool_call hook: ${String(err)}`);
612
+ }
613
+ });
614
+
615
+ api.on("tool_result_persist", (event, ctx) => {
616
+ try {
617
+ const sessionKey = resolveHookSessionKey(ctx) || `anon-${Date.now()}`;
618
+ const msg = event.message;
619
+ if (!msg) return;
620
+
621
+ // ── Memory dual-write sync ──
622
+ // When Agent writes to memory files, sync the other track.
623
+ if (ctx.toolName === "write" || ctx.toolName === "write_file") {
624
+ const writePath = String(((event as Record<string, unknown>).params as Record<string, unknown> | undefined)?.path ?? "");
625
+ if (writePath && isMemoryWritePath(writePath)) {
626
+ const workspaceDir = _cachedWorkspaceDir ?? process.cwd();
627
+ const privacyConfig = getLiveConfig();
628
+ syncMemoryWrite(writePath, workspaceDir, privacyConfig, api.logger, isGuardSessionKey(sessionKey)).catch((err) => {
629
+ api.logger.warn(`[ClawXrouter] Memory dual-write sync failed: ${String(err)}`);
630
+ });
631
+ }
632
+ }
633
+
634
+ // ── memory_search result filtering ──
635
+ // QMD indexes both MEMORY.md and MEMORY-FULL.md (via extraPaths).
636
+ // Filter out the wrong track so each session type only sees its own.
637
+ if (ctx.toolName === "memory_search") {
638
+ const filtered = filterMemorySearchResults(msg, shouldUseFullMemoryTrack(sessionKey));
639
+ if (filtered) return { message: filtered };
640
+ return;
641
+ }
642
+
643
+ // ── S3 local routing: dual-track split ──
644
+ // The local model sees full content (via dual-track history injection),
645
+ // but the main transcript must be redacted so future S1 turns don't
646
+ // leak S3 tool results to cloud models.
647
+ if (isActiveLocalRouting(sessionKey)) {
648
+ const textContent = extractMessageText(msg);
649
+ if (textContent && textContent.length >= 10) {
650
+ const sessionManager = getDefaultSessionManager();
651
+ sessionManager.writeToFull(sessionKey, {
652
+ role: "tool", content: textContent, timestamp: Date.now(), sessionKey,
653
+ }).catch(() => {});
654
+ const redacted = redactSensitiveInfo(textContent, getLiveConfig().redaction);
655
+ if (redacted !== textContent) {
656
+ api.logger.info(`[ClawXrouter] S3 tool result PII-redacted for transcript (tool=${ctx.toolName ?? "unknown"})`);
657
+ sessionManager.writeToClean(sessionKey, {
658
+ role: "tool", content: redacted, timestamp: Date.now(), sessionKey,
659
+ }).catch(() => {});
660
+ const modified = replaceMessageText(msg, redacted);
661
+ if (modified) return { message: modified };
662
+ } else {
663
+ sessionManager.writeToClean(sessionKey, {
664
+ role: "tool", content: textContent, timestamp: Date.now(), sessionKey,
665
+ }).catch(() => {});
666
+ }
667
+ }
668
+ return;
669
+ }
670
+
671
+ // Internal infrastructure tools (gateway, web_fetch, etc.) naturally contain
672
+ // auth headers/tokens that must NOT be redacted or the tool breaks.
673
+ if (ctx.toolName && isToolAllowlisted(ctx.toolName)) return;
674
+
675
+ const textContent = extractMessageText(msg);
676
+ if (!textContent || textContent.length < 10) return;
677
+
678
+ // ── Detection + PII redaction + state tracking + dual-track writing ──
679
+ // This sync hook is the single handler for tool result privacy:
680
+ // it is the only hook that can modify the persisted transcript.
681
+ const privacyConfig = getLiveConfig();
682
+ if (!privacyConfig.enabled || !privacyConfig.routers?.privacy?.enabled) {
683
+ recordDetection(sessionKey, "S1", "onToolCallExecuted", `result: ${ctx.toolName ?? "unknown"}`);
684
+ return;
685
+ }
686
+
687
+ // Snapshot the turn-level privacy state BEFORE detection runs.
688
+ // markSessionAsPrivate() updates currentTurnLevel immediately, so
689
+ // checking isSessionMarkedPrivate() later would always be true
690
+ // after any S2/S3 detection — causing the LLM dual-write fallback
691
+ // (below) to incorrectly skip.
692
+ const wasPrivateBefore = isSessionMarkedPrivate(sessionKey);
693
+
694
+ const ruleCheck = detectByRules(
695
+ {
696
+ checkpoint: "onToolCallExecuted",
697
+ toolName: ctx.toolName,
698
+ toolResult: textContent,
699
+ sessionKey,
700
+ },
701
+ privacyConfig,
702
+ );
703
+
704
+ const detectedSensitive = ruleCheck.level === "S3" || ruleCheck.level === "S2";
705
+
706
+ // S3 detected at tool_result_persist is TOO LATE for local routing:
707
+ // the cloud model is already processing this turn and has seen prior
708
+ // context. Setting activeLocalRouting here would be misleading.
709
+ // Instead, degrade to S2 behaviour: record S3 for audit, but apply
710
+ // S2-level treatment (PII redaction) since that is the strongest
711
+ // mitigation still available at this stage.
712
+ const effectiveLevel = ruleCheck.level === "S3" ? "S2" as const : ruleCheck.level;
713
+
714
+ if (detectedSensitive) {
715
+ trackSessionLevel(sessionKey, ruleCheck.level); // audit: record true S3
716
+ markSessionAsPrivate(sessionKey, effectiveLevel);
717
+ recordDetection(sessionKey, ruleCheck.level, "onToolCallExecuted", ruleCheck.reason);
718
+ if (ruleCheck.level === "S3") {
719
+ api.logger.warn(
720
+ `[ClawXrouter] S3 detected in tool result AFTER cloud model already active — ` +
721
+ `degrading to S2 (PII redaction). tool=${ctx.toolName ?? "unknown"}, reason=${ruleCheck.reason ?? "rule-match"}`,
722
+ );
723
+ }
724
+ }
725
+
726
+ let redacted = redactSensitiveInfo(textContent, getLiveConfig().redaction);
727
+ let wasRedacted = redacted !== textContent;
728
+
729
+ // S2 detected by rules but regex missed PII → fall back to LLM
730
+ // semantic desensitization (sync Worker, same as syncDetect pattern).
731
+ if (detectedSensitive && !wasRedacted && effectiveLevel === "S2" && privacyConfig.localModel?.enabled) {
732
+ const desenResult = syncDesensitizeWithLocalModel(textContent, privacyConfig, sessionKey);
733
+ if (desenResult.wasModelUsed && !desenResult.failed && desenResult.desensitized !== textContent) {
734
+ redacted = desenResult.desensitized;
735
+ wasRedacted = true;
736
+ api.logger.info(`[ClawXrouter] S2 tool result LLM-desensitized (regex missed, tool=${ctx.toolName ?? "unknown"})`);
737
+ }
738
+ }
739
+
740
+ // Session already S2-private but rules didn't flag this specific result:
741
+ // the conversation is known to involve sensitive data, so tool results
742
+ // (e.g. reading the same file that triggered S2) very likely contain PII.
743
+ // Proactively desensitize to prevent leaking through the proxy / clean track.
744
+ if (!detectedSensitive && !wasRedacted && wasPrivateBefore && privacyConfig.localModel?.enabled) {
745
+ const desenResult = syncDesensitizeWithLocalModel(textContent, privacyConfig, sessionKey);
746
+ if (desenResult.wasModelUsed && !desenResult.failed && desenResult.desensitized !== textContent) {
747
+ redacted = desenResult.desensitized;
748
+ wasRedacted = true;
749
+ api.logger.info(`[ClawXrouter] Proactive tool result desensitized for S2-private session (tool=${ctx.toolName ?? "unknown"})`);
750
+ }
751
+ }
752
+
753
+ if (detectedSensitive || wasRedacted || wasPrivateBefore) {
754
+ const sessionManager = getDefaultSessionManager();
755
+ sessionManager.writeToFull(sessionKey, {
756
+ role: "tool", content: textContent, timestamp: Date.now(), sessionKey,
757
+ }).catch(() => {});
758
+ sessionManager.writeToClean(sessionKey, {
759
+ role: "tool", content: wasRedacted ? redacted : textContent, timestamp: Date.now(), sessionKey,
760
+ }).catch(() => {});
761
+ }
762
+
763
+ if (wasRedacted) {
764
+ if (!detectedSensitive) markSessionAsPrivate(sessionKey, "S2");
765
+ stashDesensitizedToolResult(textContent, redacted);
766
+ api.logger.info(`[ClawXrouter] PII-redacted tool result for transcript (tool=${ctx.toolName ?? "unknown"})`);
767
+ const modified = replaceMessageText(msg, redacted);
768
+ if (modified) return { message: modified };
769
+ }
770
+
771
+ // ── Sync LLM detection via worker thread ──
772
+ // Rules cover keywords/regex but miss semantic sensitivity.
773
+ // synckit blocks the main thread (via Atomics.wait) for the LLM
774
+ // inference on a Worker, letting us use the result before returning.
775
+ // Timeout (20s) gracefully falls back to rules-only result.
776
+ if (privacyConfig.localModel?.enabled && ruleCheck.level !== "S3") {
777
+ const llmResult = syncDetectByLocalModel(
778
+ { checkpoint: "onToolCallExecuted", toolName: ctx.toolName, toolResult: textContent, sessionKey },
779
+ privacyConfig,
780
+ );
781
+
782
+ if (llmResult.level !== "S1" && llmResult.levelNumeric > ruleCheck.levelNumeric) {
783
+ // LLM-detected S3: PII redaction below will prevent the raw content
784
+ // from reaching the cloud model (sync hook blocks). Model routing
785
+ // cannot change mid-turn, so session marking stays at S2.
786
+ const llmEffective = llmResult.level === "S3" ? "S2" as const : llmResult.level;
787
+ trackSessionLevel(sessionKey, llmResult.level); // audit: true level
788
+ if (!detectedSensitive) {
789
+ markSessionAsPrivate(sessionKey, llmEffective);
790
+ }
791
+ recordDetection(sessionKey, llmResult.level, "onToolCallExecuted", llmResult.reason);
792
+ if (llmResult.level === "S3") {
793
+ api.logger.warn(
794
+ `[ClawXrouter] LLM elevated tool result to S3 — PII redacted before reaching cloud model. ` +
795
+ `tool=${ctx.toolName ?? "unknown"}, reason=${llmResult.reason ?? "semantic"}`,
796
+ );
797
+ } else {
798
+ api.logger.info(`[ClawXrouter] LLM elevated tool result to ${llmResult.level} (tool=${ctx.toolName ?? "unknown"}, reason=${llmResult.reason ?? "semantic"})`);
799
+ }
800
+
801
+ // LLM-elevated S2: desensitize before dual-write / transcript so
802
+ // the clean track and persisted message contain redacted content.
803
+ let llmDesensitized: string | undefined;
804
+ if (llmResult.level === "S2" && !wasRedacted && privacyConfig.localModel?.enabled) {
805
+ const desenResult = syncDesensitizeWithLocalModel(textContent, privacyConfig, sessionKey);
806
+ if (desenResult.wasModelUsed && !desenResult.failed && desenResult.desensitized !== textContent) {
807
+ llmDesensitized = desenResult.desensitized;
808
+ api.logger.info(`[ClawXrouter] LLM-elevated S2 tool result desensitized (tool=${ctx.toolName ?? "unknown"})`);
809
+ }
810
+ }
811
+
812
+ // Dual-write: ensure both full and clean tracks reflect the LLM's
813
+ // finding. When the earlier dual-write block already fired (because
814
+ // wasPrivateBefore was true), it wrote *unredacted* content to the
815
+ // clean track — overwrite it now with the desensitized version.
816
+ if (!detectedSensitive && !wasRedacted) {
817
+ const sessionManager = getDefaultSessionManager();
818
+ const ts = Date.now();
819
+ if (!wasPrivateBefore) {
820
+ sessionManager.writeToFull(sessionKey, { role: "tool", content: textContent, timestamp: ts, sessionKey }).catch(() => {});
821
+ }
822
+ sessionManager.writeToClean(sessionKey, { role: "tool", content: llmDesensitized ?? redacted, timestamp: ts, sessionKey }).catch(() => {});
823
+ }
824
+
825
+ // S3 at persist time: redact before the result enters the model
826
+ // context and the persisted transcript.
827
+ if (llmResult.level === "S3") {
828
+ const s3Redacted = wasRedacted ? redacted : redactSensitiveInfo(textContent, getLiveConfig().redaction);
829
+ stashDesensitizedToolResult(textContent, s3Redacted);
830
+ const modified = replaceMessageText(msg, s3Redacted);
831
+ if (modified) return { message: modified };
832
+ }
833
+
834
+ // LLM-elevated S2: modify the persisted transcript message.
835
+ // If LLM desensitization succeeded, use it; otherwise fall back
836
+ // to regex redaction so PII doesn't leak to the cloud unmodified.
837
+ const s2Content = llmDesensitized ?? redactSensitiveInfo(textContent, getLiveConfig().redaction);
838
+ if (s2Content !== textContent) {
839
+ stashDesensitizedToolResult(textContent, s2Content);
840
+ const modified = replaceMessageText(msg, s2Content);
841
+ if (modified) return { message: modified };
842
+ }
843
+ }
844
+ }
845
+ } catch (err) {
846
+ api.logger.error(`[ClawXrouter] Error in tool_result_persist hook: ${String(err)}`);
847
+ }
848
+ });
849
+
850
+ api.on("before_message_write", (event, ctx) => {
851
+ try {
852
+ const sessionKey = resolveHookSessionKey(ctx);
853
+ if (!sessionKey) return;
854
+
855
+ const msg = event.message;
856
+ if (!msg) return;
857
+
858
+ const role = (msg as { role?: string }).role ?? "";
859
+ const pending = getPendingDetection(sessionKey);
860
+
861
+ // ── Dual session history persistence ──
862
+ // Persist every message (user, assistant, system) to full/clean tracks
863
+ // when the session is private. Tool messages are handled separately
864
+ // in tool_result_persist (Hook 5) to avoid double-writes.
865
+ //
866
+ // Also persist when pending detection is S3: Guard Agent is physically
867
+ // isolated so the main session isn't marked private, but we still want
868
+ // the S3 user message recorded (original → full, placeholder → clean)
869
+ // for audit purposes.
870
+ const needsDualHistory = isSessionMarkedPrivate(sessionKey) || (pending?.level === "S3") || isActiveLocalRouting(sessionKey);
871
+ if (needsDualHistory && role !== "tool") {
872
+ const sessionManager = getDefaultSessionManager();
873
+ const msgText = extractMessageText(msg);
874
+ const ts = Date.now();
875
+
876
+ if (role === "user" && pending && pending.level !== "S1") {
877
+ // S2/S3 user message: original content → full, sanitized → clean
878
+ const original = pending.originalPrompt ?? msgText;
879
+ sessionManager.writeToFull(sessionKey, {
880
+ role: "user", content: original, timestamp: ts, sessionKey,
881
+ }).catch((err) => {
882
+ console.error("[ClawXrouter] Failed to persist user message to full history:", err);
883
+ });
884
+ const cleanContent = pending.level === "S3"
885
+ ? buildMainSessionPlaceholder("S3")
886
+ : (pending.desensitized ?? msgText);
887
+ sessionManager.writeToClean(sessionKey, {
888
+ role: "user", content: cleanContent, timestamp: ts, sessionKey,
889
+ }).catch((err) => {
890
+ console.error("[ClawXrouter] Failed to persist user message to clean history:", err);
891
+ });
892
+ } else if (msgText) {
893
+ if (role === "assistant" && isActiveLocalRouting(sessionKey)) {
894
+ // Local model response may contain echoed PII — write original
895
+ // to full track, PII-redacted version to clean track.
896
+ const redacted = redactSensitiveInfo(msgText, getLiveConfig().redaction);
897
+ sessionManager.writeToFull(sessionKey, {
898
+ role: "assistant", content: msgText, timestamp: ts, sessionKey,
899
+ }).catch((err) => {
900
+ console.error("[ClawXrouter] Failed to persist assistant message to full history:", err);
901
+ });
902
+ sessionManager.writeToClean(sessionKey, {
903
+ role: "assistant", content: redacted, timestamp: ts, sessionKey,
904
+ }).catch((err) => {
905
+ console.error("[ClawXrouter] Failed to persist assistant message to clean history:", err);
906
+ });
907
+ } else {
908
+ // System / S1-user / non-local-routing assistant messages:
909
+ // persistMessage handles guard-agent filtering (guard → full only, others → both).
910
+ sessionManager.persistMessage(sessionKey, {
911
+ role: (role as SessionMessage["role"]) || "assistant",
912
+ content: msgText, timestamp: ts, sessionKey,
913
+ }).catch((err) => {
914
+ console.error("[ClawXrouter] Failed to persist message to dual history:", err);
915
+ });
916
+ }
917
+ }
918
+ }
919
+
920
+ // ── PII-redact assistant responses from local model ──
921
+ // When S3 data is processed locally the model may echo back PII
922
+ // (e.g. "Your ID 310101... is valid"). Redact before entering the
923
+ // main transcript so subsequent cloud turns don't see raw PII.
924
+ if (role === "assistant" && isActiveLocalRouting(sessionKey)) {
925
+ const assistantText = extractMessageText(msg);
926
+ if (assistantText && assistantText.length >= 10) {
927
+ const redacted = redactSensitiveInfo(assistantText, getLiveConfig().redaction);
928
+ if (redacted !== assistantText) {
929
+ api.logger.info("[ClawXrouter] PII-redacted local model response before transcript write");
930
+ return { message: { ...(msg as Record<string, unknown>), content: [{ type: "text", text: redacted }] } };
931
+ }
932
+ }
933
+ }
934
+
935
+ // ── Sanitize user messages for session transcript ──
936
+ if (role !== "user") return;
937
+ if (!pending || pending.level === "S1") return;
938
+
939
+ if (pending.level === "S3") {
940
+ consumeDetection(sessionKey);
941
+ return { message: { ...msg, content: [{ type: "text", text: buildMainSessionPlaceholder("S3") }] } };
942
+ }
943
+ if (pending.level === "S2" && pending.desensitized) {
944
+ consumeDetection(sessionKey);
945
+ return { message: { ...msg, content: [{ type: "text", text: pending.desensitized }] } };
946
+ }
947
+ } catch (err) {
948
+ api.logger.error(`[ClawXrouter] Error in before_message_write hook: ${String(err)}`);
949
+ }
950
+ });
951
+
952
+ api.on("session_end", async (event, ctx) => {
953
+ try {
954
+ const sessionKey = event.sessionKey ?? resolveHookSessionKey(ctx);
955
+ if (!sessionKey) return;
956
+
957
+ const wasPrivate = isSessionMarkedPrivate(sessionKey);
958
+ api.logger.info(`[ClawXrouter] ${wasPrivate ? "private" : "cloud"} session ${sessionKey} ended. Syncing memory…`);
959
+
960
+ const memMgr = getDefaultMemoryManager();
961
+ const privacyConfig = getLiveConfig();
962
+ await memMgr.syncAllMemoryToClean(privacyConfig);
963
+
964
+ clearSessionState(sessionKey);
965
+
966
+ const collector = getGlobalCollector();
967
+ if (collector) await collector.flush();
968
+ } catch (err) {
969
+ api.logger.error(`[ClawXrouter] Error in session_end hook: ${String(err)}`);
970
+ }
971
+ });
972
+
973
+ api.on("after_compaction", async (_event, ctx) => {
974
+ try {
975
+ if (ctx.workspaceDir) _cachedWorkspaceDir = ctx.workspaceDir;
976
+ const memMgr = getDefaultMemoryManager();
977
+ const privacyConfig = getLiveConfig();
978
+ await memMgr.syncAllMemoryToClean(privacyConfig);
979
+ api.logger.info("[ClawXrouter] Memory synced after compaction");
980
+ } catch (err) {
981
+ api.logger.error(`[ClawXrouter] Error in after_compaction hook: ${String(err)}`);
982
+ }
983
+ });
984
+
985
+ api.on("llm_output", async (event, ctx) => {
986
+ try {
987
+ const sessionKey = resolveHookSessionKey(ctx) || event.sessionId || "";
988
+ api.logger.info(`[ClawXrouter] llm_output fired: session=${sessionKey} model=${event.model} usage=${JSON.stringify(event.usage)}`);
989
+ const collector = getGlobalCollector();
990
+ if (!collector) return;
991
+ collector.record({
992
+ sessionKey,
993
+ provider: event.provider ?? "unknown",
994
+ model: event.model ?? "unknown",
995
+ source: "task",
996
+ usage: event.usage,
997
+ loopId: getCurrentLoopId(sessionKey),
998
+ });
999
+ if (sessionKey) {
1000
+ const u = event.usage ?? {};
1001
+ const inputTok = u.inputTokens ?? u.prompt_tokens ?? 0;
1002
+ const outputTok = u.outputTokens ?? u.completion_tokens ?? 0;
1003
+ const cacheTok = u.cacheReadTokens ?? u.cache_read_input_tokens ?? 0;
1004
+ const summary = `${event.model ?? "unknown"} — in:${inputTok} out:${outputTok}` + (cacheTok ? ` cache:${cacheTok}` : "");
1005
+ recordDetection(sessionKey, "S1", "onLlmOutput" as any, summary);
1006
+ notifyLlmComplete(sessionKey, "onUserMessage");
1007
+ }
1008
+ } catch (err) {
1009
+ api.logger.error(`[ClawXrouter] Error in llm_output hook: ${String(err)}`);
1010
+ }
1011
+ });
1012
+
1013
+ api.on("llm_input", async (event, ctx) => {
1014
+ try {
1015
+ const sessionKey = resolveHookSessionKey(ctx) || event.sessionId || "";
1016
+ const routeLevel = getSessionRouteLevel(sessionKey);
1017
+
1018
+ // ── DIAGNOSTIC: dump actual message content going to the LLM ──
1019
+ if (Array.isArray(event.historyMessages)) {
1020
+ for (let i = 0; i < event.historyMessages.length; i++) {
1021
+ const m = event.historyMessages[i] as Record<string, unknown> | undefined;
1022
+ if (!m) continue;
1023
+ const role = m.role as string | undefined;
1024
+ if (role === "toolResult" || role === "tool") {
1025
+ const raw = typeof m.content === "string"
1026
+ ? m.content
1027
+ : Array.isArray(m.content)
1028
+ ? (m.content as Array<Record<string, unknown>>)
1029
+ .filter((p) => p.type === "text")
1030
+ .map((p) => p.text as string)
1031
+ .join("")
1032
+ : JSON.stringify(m.content);
1033
+ const first500 = raw.slice(0, 500);
1034
+ const hasPII = /何涛|张伟|李强|王芳|刘洋|陈明|林峰|赵磊|周杰|吴敏|孙浩|马丽/.test(raw);
1035
+ const hasRedacted = raw.includes("[REDACTED:");
1036
+ api.logger.warn(
1037
+ `[ClawXrouter][DIAG-LLM-INPUT] msg[${i}] role=${role} hasPII=${hasPII} hasRedacted=${hasRedacted} len=${raw.length} provider=${event.provider} model=${event.model} sample="${first500}"`,
1038
+ );
1039
+ }
1040
+ }
1041
+ }
1042
+
1043
+ if (routeLevel === "S3") return;
1044
+
1045
+ const estimateTokens = (s: string | undefined) => Math.ceil((s?.length ?? 0) / 4);
1046
+ let inputTokens = estimateTokens(event.systemPrompt) + estimateTokens(event.prompt);
1047
+ if (Array.isArray(event.historyMessages)) {
1048
+ for (const m of event.historyMessages) {
1049
+ inputTokens += estimateTokens(typeof m === "string" ? m : JSON.stringify(m));
1050
+ }
1051
+ }
1052
+
1053
+ const pricing = lookupPricing(event.model);
1054
+ const estimatedCost = (inputTokens * pricing.inputPer1M) / 1_000_000;
1055
+
1056
+ notifyInputEstimate(sessionKey, {
1057
+ estimatedInputTokens: inputTokens,
1058
+ estimatedCost,
1059
+ model: event.model,
1060
+ provider: event.provider,
1061
+ });
1062
+ } catch (err) {
1063
+ api.logger.error(`[ClawXrouter] Error in llm_input hook: ${String(err)}`);
1064
+ }
1065
+ });
1066
+
1067
+ api.on("before_reset", async (_event, ctx) => {
1068
+ try {
1069
+ if (ctx.workspaceDir) _cachedWorkspaceDir = ctx.workspaceDir;
1070
+ const memMgr = getDefaultMemoryManager();
1071
+ const privacyConfig = getLiveConfig();
1072
+ await memMgr.syncAllMemoryToClean(privacyConfig);
1073
+ api.logger.info("[ClawXrouter] Memory synced before reset");
1074
+ } catch (err) {
1075
+ api.logger.error(`[ClawXrouter] Error in before_reset hook: ${String(err)}`);
1076
+ }
1077
+ });
1078
+
1079
+ api.on("message_sending", async (event, ctx) => {
1080
+ try {
1081
+ const { content } = event;
1082
+ if (!content?.trim()) return;
1083
+
1084
+ const privacyConfig = getLiveConfig();
1085
+ if (!privacyConfig.enabled) return;
1086
+
1087
+ const pipeline = getGlobalPipeline();
1088
+ if (!pipeline) return;
1089
+
1090
+ const sessionKey = resolveHookSessionKey(ctx);
1091
+ const decision = await pipeline.run(
1092
+ "onUserMessage",
1093
+ { checkpoint: "onUserMessage", message: content, sessionKey },
1094
+ getPipelineConfig(),
1095
+ );
1096
+
1097
+ if (decision.level === "S3" || decision.action === "block") {
1098
+ api.logger.warn("[ClawXrouter] BLOCKED outbound message: S3/block detected");
1099
+ return { cancel: true };
1100
+ }
1101
+ if (decision.level === "S2") {
1102
+ const desenResult = await desensitizeWithLocalModel(content, privacyConfig, resolveHookSessionKey(ctx) || undefined);
1103
+ if (desenResult.failed) {
1104
+ api.logger.warn("[ClawXrouter] S2 desensitization failed — cancelling outbound message to prevent PII leak");
1105
+ return { cancel: true };
1106
+ }
1107
+ return { content: desenResult.desensitized };
1108
+ }
1109
+ } catch (err) {
1110
+ api.logger.error(`[ClawXrouter] Error in message_sending hook: ${String(err)}`);
1111
+ }
1112
+ });
1113
+
1114
+ api.on("before_agent_start", async (event, ctx) => {
1115
+ try {
1116
+ const { prompt } = event;
1117
+ const sessionKey = resolveHookSessionKey(ctx);
1118
+ if (!sessionKey.includes(":subagent:") || !prompt?.trim()) return;
1119
+
1120
+ const privacyConfig = getLiveConfig();
1121
+ if (!privacyConfig.enabled) return;
1122
+
1123
+ const pipeline = getGlobalPipeline();
1124
+ if (!pipeline) return;
1125
+
1126
+ const decision = await pipeline.run(
1127
+ "onUserMessage",
1128
+ { checkpoint: "onUserMessage", message: prompt, sessionKey, agentId: ctx.agentId },
1129
+ getPipelineConfig(),
1130
+ );
1131
+
1132
+ // S3 / block: route the subagent to a local model instead of
1133
+ // modifying the system prompt. The cloud model has already seen the
1134
+ // prompt text, so altering system instructions is not a reliable
1135
+ // security control. Routing to a local model keeps the data local.
1136
+ if (decision.level === "S3" || decision.action === "block") {
1137
+ const guardCfg = getGuardAgentConfig(privacyConfig);
1138
+ const defaultProvider = privacyConfig.localModel?.provider ?? "ollama";
1139
+ const provider = guardCfg?.provider ?? defaultProvider;
1140
+ const model = guardCfg?.modelName ?? privacyConfig.localModel?.model ?? "openbmb/minicpm4.1";
1141
+ api.logger.info(`[ClawXrouter] Subagent ${decision.level} — routing to ${provider}/${model}`);
1142
+ return {
1143
+ providerOverride: provider,
1144
+ modelOverride: model,
1145
+ };
1146
+ }
1147
+ if (decision.level === "S2") {
1148
+ const privacyCfg = getLiveConfig();
1149
+ const desenResult = await desensitizeWithLocalModel(prompt, privacyCfg, sessionKey);
1150
+ if (desenResult.failed) {
1151
+ const guardCfg = getGuardAgentConfig(privacyCfg);
1152
+ const fallbackProvider = privacyCfg.localModel?.provider ?? "ollama";
1153
+ const provider = guardCfg?.provider ?? fallbackProvider;
1154
+ const model = guardCfg?.modelName ?? privacyCfg.localModel?.model ?? "openbmb/minicpm4.1";
1155
+ api.logger.warn(`[ClawXrouter] Subagent S2 desensitization failed — routing to local ${provider}/${model}`);
1156
+ return { providerOverride: provider, modelOverride: model };
1157
+ }
1158
+
1159
+ markSessionAsPrivate(sessionKey, "S2");
1160
+ const s2Policy = privacyCfg.s2Policy ?? "proxy";
1161
+
1162
+ if (s2Policy === "local") {
1163
+ const guardCfg = getGuardAgentConfig(privacyCfg);
1164
+ const defaultProvider = privacyCfg.localModel?.provider ?? "ollama";
1165
+ api.logger.info(`[ClawXrouter] Subagent S2 — routing to local ${guardCfg?.provider ?? defaultProvider}`);
1166
+ return {
1167
+ providerOverride: guardCfg?.provider ?? defaultProvider,
1168
+ modelOverride: guardCfg?.modelName ?? privacyCfg.localModel?.model ?? "openbmb/minicpm4.1",
1169
+ };
1170
+ }
1171
+
1172
+ const subTargetModel = decision.target?.model;
1173
+ if (subTargetModel) {
1174
+ const subDefaults = api.config.agents?.defaults as Record<string, unknown> | undefined;
1175
+ const subPrimaryModel = (subDefaults?.model as Record<string, unknown> | undefined)?.primary as string ?? "";
1176
+ const subDefaultProvider = (subDefaults?.provider as string) || subPrimaryModel.split("/")[0] || "openai";
1177
+ const subActualProvider = decision.target?.originalProvider
1178
+ ?? resolveOriginalProvider(api.config as Record<string, unknown>, subTargetModel, subDefaultProvider);
1179
+ ensureModelMirrored(
1180
+ api.config as Record<string, unknown>,
1181
+ subTargetModel,
1182
+ subActualProvider,
1183
+ () => { try { return api.runtime.config.loadConfig(); } catch { return undefined; } },
1184
+ );
1185
+ }
1186
+
1187
+ api.logger.info("[ClawXrouter] Subagent S2 — routing through privacy proxy");
1188
+ return {
1189
+ prependContext: `${CLAWXROUTER_S2_OPEN}\n${desenResult.desensitized}\n${CLAWXROUTER_S2_CLOSE}`,
1190
+ providerOverride: "clawxrouter-privacy",
1191
+ };
1192
+ }
1193
+ } catch (err) {
1194
+ api.logger.error(`[ClawXrouter] Error in before_agent_start hook: ${String(err)}`);
1195
+ }
1196
+ });
1197
+
1198
+ api.on("message_received", async (event, _ctx) => {
1199
+ try {
1200
+ const privacyConfig = getLiveConfig();
1201
+ if (!privacyConfig.enabled) return;
1202
+ api.logger.info?.(`[ClawXrouter] Message received from ${event.from ?? "unknown"}`);
1203
+ } catch { /* observational only */ }
1204
+ });
1205
+
1206
+ api.logger.info("[ClawXrouter] All hooks registered (13 hooks, pipeline-driven)");
1207
+ }
1208
+
1209
+ // ==========================================================================
1210
+ // Helpers
1211
+ // ==========================================================================
1212
+
1213
+ function shouldSkipMessage(msg: string): boolean {
1214
+ if (msg.includes("[REDACTED:") || msg.startsWith("[SYSTEM]")) return true;
1215
+ // OpenClaw prepends timestamps like "[Thu 2026-03-19 20:19 GMT+8] ..." to user
1216
+ // messages. Only skip if the ENTIRE message is a bare timestamp (no real content).
1217
+ const stripped = msg.replace(/^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}[^\]]*\]\s*/, "");
1218
+ if (stripped.length === 0) return true;
1219
+ return false;
1220
+ }
1221
+
1222
+ /** Strip OpenClaw's timestamp prefix from a user message, if present. */
1223
+ function stripTimestampPrefix(msg: string): string {
1224
+ return msg.replace(/^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}[^\]]*\]\s*/, "");
1225
+ }
1226
+
1227
+ /**
1228
+ * Extract text from an AgentMessage (supports string content and content arrays).
1229
+ */
1230
+ function extractMessageText(msg: unknown): string {
1231
+ if (typeof msg === "string") return msg;
1232
+ if (!msg || typeof msg !== "object") return "";
1233
+ const m = msg as Record<string, unknown>;
1234
+
1235
+ if (typeof m.content === "string") return m.content;
1236
+
1237
+ if (Array.isArray(m.content)) {
1238
+ return m.content
1239
+ .map((part: unknown) => {
1240
+ if (typeof part === "string") return part;
1241
+ if (part && typeof part === "object" && typeof (part as Record<string, unknown>).text === "string") {
1242
+ return (part as Record<string, unknown>).text as string;
1243
+ }
1244
+ return "";
1245
+ })
1246
+ .filter(Boolean)
1247
+ .join("\n");
1248
+ }
1249
+
1250
+ return "";
1251
+ }
1252
+
1253
+ /**
1254
+ * Replace text content in an AgentMessage, preserving the message structure.
1255
+ * For content arrays, replaces the FIRST text part in-place and removes
1256
+ * subsequent text parts, preserving the original ordering of non-text parts
1257
+ * (images, file references, etc.).
1258
+ */
1259
+ function replaceMessageText(msg: unknown, newText: string): unknown | null {
1260
+ if (typeof msg === "string") return newText;
1261
+ if (!msg || typeof msg !== "object") return null;
1262
+ const m = { ...(msg as Record<string, unknown>) };
1263
+
1264
+ if (typeof m.content === "string") {
1265
+ return { ...m, content: newText };
1266
+ }
1267
+
1268
+ if (Array.isArray(m.content)) {
1269
+ let textReplaced = false;
1270
+ const newContent: Array<Record<string, unknown>> = [];
1271
+ for (const part of m.content as Array<Record<string, unknown>>) {
1272
+ if (part && typeof part === "object" && part.type === "text") {
1273
+ if (!textReplaced) {
1274
+ newContent.push({ type: "text", text: newText });
1275
+ textReplaced = true;
1276
+ }
1277
+ } else {
1278
+ newContent.push(part);
1279
+ }
1280
+ }
1281
+ if (!textReplaced) {
1282
+ newContent.unshift({ type: "text", text: newText });
1283
+ }
1284
+ return { ...m, content: newContent };
1285
+ }
1286
+
1287
+ return null;
1288
+ }
1289
+
1290
+ // ── Dual-track history injection helper ───────────────────────────────────
1291
+
1292
+ /**
1293
+ * Load the "delta" between full and clean session histories and format it
1294
+ * as conversation context. Returns null if there is nothing meaningful
1295
+ * to inject (e.g. no prior sensitive turns, or dual history is empty).
1296
+ */
1297
+ async function loadDualTrackContext(
1298
+ sessionKey: string,
1299
+ agentId?: string,
1300
+ limit?: number,
1301
+ ): Promise<string | null> {
1302
+ try {
1303
+ const mgr = getDefaultSessionManager();
1304
+ const delta = await mgr.loadHistoryDelta(sessionKey, agentId ?? "main", limit);
1305
+ if (delta.length === 0) return null;
1306
+ return DualSessionManager.formatAsContext(delta);
1307
+ } catch {
1308
+ return null;
1309
+ }
1310
+ }
1311
+
1312
+ // ── Memory dual-write helpers ─────────────────────────────────────────────
1313
+
1314
+ const MEMORY_WRITE_PATTERNS = [
1315
+ /^MEMORY\.md$/,
1316
+ /^memory\.md$/,
1317
+ /^memory\//,
1318
+ ];
1319
+
1320
+ function isMemoryWritePath(writePath: string): boolean {
1321
+ const rel = writePath.replace(/^\.\//, "");
1322
+ return MEMORY_WRITE_PATTERNS.some((p) => p.test(rel));
1323
+ }
1324
+
1325
+ /**
1326
+ * After Agent writes to a memory file, dual-write to the other track:
1327
+ * MEMORY.md written → read content → write full to MEMORY-FULL.md, redact to MEMORY.md
1328
+ * memory/X.md written → read → write full to memory-full/X.md, redact to memory/X.md
1329
+ */
1330
+ async function syncMemoryWrite(
1331
+ writePath: string,
1332
+ workspaceDir: string,
1333
+ privacyConfig: PrivacyConfig,
1334
+ logger: { info: (msg: string) => void; warn: (msg: string) => void },
1335
+ isGuardSession: boolean = false,
1336
+ ): Promise<void> {
1337
+ const rel = writePath.replace(/^\.\//, "");
1338
+ const absPath = path.isAbsolute(writePath)
1339
+ ? writePath
1340
+ : path.resolve(workspaceDir, rel);
1341
+
1342
+ let content: string;
1343
+ try {
1344
+ content = await fs.promises.readFile(absPath, "utf-8");
1345
+ } catch {
1346
+ return;
1347
+ }
1348
+
1349
+ if (!content.trim()) return;
1350
+
1351
+ // Determine the counterpart path
1352
+ let fullRelPath: string;
1353
+ if (rel === "MEMORY.md" || rel === "memory.md") {
1354
+ fullRelPath = "MEMORY-FULL.md";
1355
+ } else if (rel.startsWith("memory/")) {
1356
+ fullRelPath = rel.replace(/^memory\//, "memory-full/");
1357
+ } else {
1358
+ return;
1359
+ }
1360
+
1361
+ const fullAbsPath = path.resolve(workspaceDir, fullRelPath);
1362
+
1363
+ // Ensure directory exists for daily memory files
1364
+ await fs.promises.mkdir(path.dirname(fullAbsPath), { recursive: true });
1365
+
1366
+ // Wrap guard agent content with explicit markers so filterGuardContent
1367
+ // can reliably strip it when syncing FULL → CLEAN.
1368
+ const fullContent = isGuardSession
1369
+ ? `${GUARD_SECTION_BEGIN}\n${content}\n${GUARD_SECTION_END}`
1370
+ : content;
1371
+ await fs.promises.writeFile(fullAbsPath, fullContent, "utf-8");
1372
+
1373
+ // Redact PII and overwrite the clean version
1374
+ const memMgr = getDefaultMemoryManager();
1375
+ const redacted = await memMgr.redactContentPublic(content, privacyConfig);
1376
+ if (redacted !== content) {
1377
+ await fs.promises.writeFile(absPath, redacted, "utf-8");
1378
+ logger.info(`[ClawXrouter] Memory dual-write: ${rel} → ${fullRelPath} (redacted clean copy)`);
1379
+ } else {
1380
+ logger.info(`[ClawXrouter] Memory dual-write: ${rel} → ${fullRelPath} (no PII found)`);
1381
+ }
1382
+ }
1383
+
1384
+ /**
1385
+ * Filter memory_search results: strip results from the wrong memory track.
1386
+ * Cloud-bound sessions should not see MEMORY-FULL.md / memory-full/ results.
1387
+ * Fully-local sessions should not see MEMORY.md / memory/ results (prefer full).
1388
+ */
1389
+ function filterMemorySearchResults(msg: unknown, useFullTrack: boolean): unknown | null {
1390
+ if (!msg || typeof msg !== "object") return null;
1391
+ const m = msg as Record<string, unknown>;
1392
+
1393
+ const textContent = extractMessageText(msg);
1394
+ if (!textContent) return null;
1395
+
1396
+ try {
1397
+ const parsed = JSON.parse(textContent);
1398
+ if (!parsed || typeof parsed !== "object") return null;
1399
+
1400
+ const results = (parsed as Record<string, unknown>).results;
1401
+ if (!Array.isArray(results)) return null;
1402
+
1403
+ const filtered = results.filter((r: unknown) => {
1404
+ if (!r || typeof r !== "object") return true;
1405
+ const rPath = String((r as Record<string, unknown>).path ?? "");
1406
+ if (useFullTrack) {
1407
+ // Fully-local session: exclude clean-track results (prefer full)
1408
+ if (rPath === "MEMORY.md" || rPath === "memory.md" || rPath.startsWith("memory/")) {
1409
+ return false;
1410
+ }
1411
+ } else {
1412
+ // Cloud-bound session: exclude full-track results
1413
+ if (rPath === "MEMORY-FULL.md" || rPath.startsWith("memory-full/")) {
1414
+ return false;
1415
+ }
1416
+ }
1417
+ return true;
1418
+ });
1419
+
1420
+ if (filtered.length === results.length) return null;
1421
+
1422
+ const newParsed = { ...parsed as Record<string, unknown>, results: filtered };
1423
+ const newText = JSON.stringify(newParsed);
1424
+ return replaceMessageText(msg, newText);
1425
+ } catch {
1426
+ return null;
1427
+ }
1428
+ }