@datafog/fogclaw 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +31 -0
- package/README.md +83 -4
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +100 -1
- package/dist/config.js.map +1 -1
- package/dist/extract.d.ts +28 -0
- package/dist/extract.d.ts.map +1 -0
- package/dist/extract.js +91 -0
- package/dist/extract.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +135 -30
- package/dist/index.js.map +1 -1
- package/dist/message-sending-handler.d.ts +40 -0
- package/dist/message-sending-handler.d.ts.map +1 -0
- package/dist/message-sending-handler.js +50 -0
- package/dist/message-sending-handler.js.map +1 -0
- package/dist/scanner.d.ts +13 -2
- package/dist/scanner.d.ts.map +1 -1
- package/dist/scanner.js +76 -2
- package/dist/scanner.js.map +1 -1
- package/dist/tool-result-handler.d.ts +36 -0
- package/dist/tool-result-handler.d.ts.map +1 -0
- package/dist/tool-result-handler.js +91 -0
- package/dist/tool-result-handler.js.map +1 -0
- package/dist/types.d.ts +17 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -1
- package/docs/OBSERVABILITY.md +22 -15
- package/docs/SECURITY.md +6 -4
- package/docs/plans/active/2026-02-17-feat-tool-result-pii-scanning-plan.md +293 -0
- package/docs/specs/2026-02-17-feat-outbound-message-pii-scanning-spec.md +93 -0
- package/docs/specs/2026-02-17-feat-tool-result-pii-scanning-spec.md +122 -0
- package/fogclaw.config.example.json +19 -1
- package/openclaw.plugin.json +63 -2
- package/package.json +9 -9
- package/scripts/ci/he-docs-drift.sh +0 -0
- package/scripts/ci/he-docs-lint.sh +0 -0
- package/scripts/ci/he-plans-lint.sh +0 -0
- package/scripts/ci/he-runbooks-lint.sh +0 -0
- package/scripts/ci/he-specs-lint.sh +0 -0
- package/scripts/ci/he-spikes-lint.sh +0 -0
- package/scripts/runbooks/select-runbooks.sh +0 -0
- package/src/config.ts +139 -2
- package/src/extract.ts +98 -0
- package/src/index.ts +194 -36
- package/src/message-sending-handler.ts +87 -0
- package/src/scanner.ts +114 -8
- package/src/tool-result-handler.ts +133 -0
- package/src/types.ts +23 -0
- package/tests/config.test.ts +55 -81
- package/tests/extract.test.ts +185 -0
- package/tests/message-sending-handler.test.ts +244 -0
- package/tests/plugin-smoke.test.ts +139 -3
- package/tests/scanner.test.ts +61 -1
- package/tests/tool-result-handler.test.ts +329 -0
package/src/index.ts
CHANGED
|
@@ -1,7 +1,18 @@
|
|
|
1
1
|
import { Scanner } from "./scanner.js";
|
|
2
2
|
import { redact } from "./redactor.js";
|
|
3
3
|
import { loadConfig } from "./config.js";
|
|
4
|
-
import
|
|
4
|
+
import { RegexEngine } from "./engines/regex.js";
|
|
5
|
+
import { createToolResultHandler } from "./tool-result-handler.js";
|
|
6
|
+
import { createMessageSendingHandler } from "./message-sending-handler.js";
|
|
7
|
+
import { resolveAction } from "./types.js";
|
|
8
|
+
import type {
|
|
9
|
+
Entity,
|
|
10
|
+
FogClawConfig,
|
|
11
|
+
GuardrailAction,
|
|
12
|
+
RedactResult,
|
|
13
|
+
RedactStrategy,
|
|
14
|
+
ScanResult,
|
|
15
|
+
} from "./types.js";
|
|
5
16
|
|
|
6
17
|
export { Scanner } from "./scanner.js";
|
|
7
18
|
export { redact } from "./redactor.js";
|
|
@@ -15,12 +26,80 @@ export type {
|
|
|
15
26
|
GuardrailAction,
|
|
16
27
|
} from "./types.js";
|
|
17
28
|
|
|
29
|
+
function buildGuardrailPlan(entities: Entity[], config: FogClawConfig) {
|
|
30
|
+
const blocked: Entity[] = [];
|
|
31
|
+
const warned: Entity[] = [];
|
|
32
|
+
const redacted: Entity[] = [];
|
|
33
|
+
|
|
34
|
+
for (const entity of entities) {
|
|
35
|
+
const action = resolveAction(entity, config);
|
|
36
|
+
if (action === "block") blocked.push(entity);
|
|
37
|
+
else if (action === "warn") warned.push(entity);
|
|
38
|
+
else redacted.push(entity);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return { blocked, warned, redacted };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function planToSummary(plan: ReturnType<typeof buildGuardrailPlan>): {
|
|
45
|
+
total: number;
|
|
46
|
+
blocked: number;
|
|
47
|
+
warned: number;
|
|
48
|
+
redacted: number;
|
|
49
|
+
labels: {
|
|
50
|
+
blocked: string[];
|
|
51
|
+
warned: string[];
|
|
52
|
+
redacted: string[];
|
|
53
|
+
};
|
|
54
|
+
} {
|
|
55
|
+
return {
|
|
56
|
+
total: plan.blocked.length + plan.warned.length + plan.redacted.length,
|
|
57
|
+
blocked: plan.blocked.length,
|
|
58
|
+
warned: plan.warned.length,
|
|
59
|
+
redacted: plan.redacted.length,
|
|
60
|
+
labels: {
|
|
61
|
+
blocked: [...new Set(plan.blocked.map((entity) => entity.label))],
|
|
62
|
+
warned: [...new Set(plan.warned.map((entity) => entity.label))],
|
|
63
|
+
redacted: [...new Set(plan.redacted.map((entity) => entity.label))],
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function buildGuardrailContext(plan: ReturnType<typeof buildGuardrailPlan>, config: FogClawConfig): string[] {
|
|
69
|
+
const contextParts: string[] = [];
|
|
70
|
+
|
|
71
|
+
if (plan.blocked.length > 0) {
|
|
72
|
+
const types = [...new Set(plan.blocked.map((entity) => entity.label))].join(", ");
|
|
73
|
+
contextParts.push(
|
|
74
|
+
`[FOGCLAW GUARDRAIL — BLOCKED] The user's message contains sensitive information (${types}). ` +
|
|
75
|
+
`Do NOT process or repeat this information. Ask the user to rephrase without sensitive data.`,
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (plan.warned.length > 0) {
|
|
80
|
+
const types = [...new Set(plan.warned.map((entity) => entity.label))].join(", ");
|
|
81
|
+
contextParts.push(
|
|
82
|
+
`[FOGCLAW NOTICE] PII detected in user message: ${types}. Handle with care.`,
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (plan.redacted.length > 0) {
|
|
87
|
+
const labels = [...new Set(plan.redacted.map((entity) => entity.label))].join(", ");
|
|
88
|
+
contextParts.push(
|
|
89
|
+
`[FOGCLAW REDACTED] ${plan.redacted.length} entity(ies) prepared for ${config.redactStrategy} redaction (${labels}).`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return contextParts;
|
|
94
|
+
}
|
|
95
|
+
|
|
18
96
|
/**
|
|
19
97
|
* OpenClaw plugin definition.
|
|
20
98
|
*
|
|
21
99
|
* Registers:
|
|
22
100
|
* - `before_agent_start` hook for automatic PII guardrail
|
|
23
101
|
* - `fogclaw_scan` tool for on-demand entity detection
|
|
102
|
+
* - `fogclaw_preview` tool for dry-run policy simulation
|
|
24
103
|
* - `fogclaw_redact` tool for on-demand redaction
|
|
25
104
|
*/
|
|
26
105
|
const fogclaw = {
|
|
@@ -48,47 +127,35 @@ const fogclaw = {
|
|
|
48
127
|
const message = event.prompt ?? "";
|
|
49
128
|
if (!message) return;
|
|
50
129
|
|
|
51
|
-
const result = await scanner.scan(message);
|
|
52
|
-
|
|
130
|
+
const result: ScanResult = await scanner.scan(message);
|
|
53
131
|
if (result.entities.length === 0) return;
|
|
54
132
|
|
|
55
|
-
|
|
56
|
-
const
|
|
57
|
-
const warned: typeof result.entities = [];
|
|
58
|
-
const toRedact: typeof result.entities = [];
|
|
59
|
-
|
|
60
|
-
for (const entity of result.entities) {
|
|
61
|
-
const action: GuardrailAction =
|
|
62
|
-
config.entityActions[entity.label] ?? config.guardrail_mode;
|
|
63
|
-
if (action === "block") blocked.push(entity);
|
|
64
|
-
else if (action === "warn") warned.push(entity);
|
|
65
|
-
else if (action === "redact") toRedact.push(entity);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const contextParts: string[] = [];
|
|
133
|
+
const plan = buildGuardrailPlan(result.entities, config);
|
|
134
|
+
const contextParts = buildGuardrailContext(plan, config);
|
|
69
135
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
136
|
+
if (config.auditEnabled) {
|
|
137
|
+
const summary = planToSummary(plan);
|
|
138
|
+
api.logger?.info(
|
|
139
|
+
`[FOGCLAW AUDIT] guardrail_scan ${JSON.stringify({
|
|
140
|
+
totalEntities: summary.total,
|
|
141
|
+
blocked: summary.blocked,
|
|
142
|
+
warned: summary.warned,
|
|
143
|
+
redacted: summary.redacted,
|
|
144
|
+
blockedLabels: summary.labels.blocked,
|
|
145
|
+
warnedLabels: summary.labels.warned,
|
|
146
|
+
redactedLabels: summary.labels.redacted,
|
|
147
|
+
})}`,
|
|
76
148
|
);
|
|
77
149
|
}
|
|
78
150
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
151
|
+
if (plan.redacted.length > 0) {
|
|
152
|
+
const redactedResult: RedactResult = redact(
|
|
153
|
+
message,
|
|
154
|
+
plan.redacted,
|
|
155
|
+
config.redactStrategy,
|
|
84
156
|
);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// "redact" — replace PII with tokens
|
|
88
|
-
if (toRedact.length > 0) {
|
|
89
|
-
const redacted = redact(message, toRedact, config.redactStrategy);
|
|
90
157
|
contextParts.push(
|
|
91
|
-
`[FOGCLAW REDACTED] The following is the user's message with PII redacted:\n${
|
|
158
|
+
`[FOGCLAW REDACTED] The following is the user's message with PII redacted:\n${redactedResult.redacted_text}`,
|
|
92
159
|
);
|
|
93
160
|
}
|
|
94
161
|
|
|
@@ -97,6 +164,15 @@ const fogclaw = {
|
|
|
97
164
|
}
|
|
98
165
|
});
|
|
99
166
|
|
|
167
|
+
// --- HOOK: Scan tool results for PII before persistence ---
|
|
168
|
+
const toolResultRegex = new RegexEngine();
|
|
169
|
+
const toolResultHandler = createToolResultHandler(config, toolResultRegex, api.logger);
|
|
170
|
+
api.on("tool_result_persist", toolResultHandler);
|
|
171
|
+
|
|
172
|
+
// --- HOOK: Scan outbound messages for PII before delivery ---
|
|
173
|
+
const messageSendingHandler = createMessageSendingHandler(config, scanner, api.logger);
|
|
174
|
+
api.on("message_sending", messageSendingHandler);
|
|
175
|
+
|
|
100
176
|
// --- TOOL: On-demand scan ---
|
|
101
177
|
api.registerTool(
|
|
102
178
|
{
|
|
@@ -138,7 +214,7 @@ const fogclaw = {
|
|
|
138
214
|
count: result.entities.length,
|
|
139
215
|
summary:
|
|
140
216
|
result.entities.length > 0
|
|
141
|
-
? `Found ${result.entities.length} entities: ${[...new Set(result.entities.map((
|
|
217
|
+
? `Found ${result.entities.length} entities: ${[...new Set(result.entities.map((entity) => entity.label))].join(", ")}`
|
|
142
218
|
: "No entities detected",
|
|
143
219
|
},
|
|
144
220
|
null,
|
|
@@ -151,6 +227,88 @@ const fogclaw = {
|
|
|
151
227
|
}
|
|
152
228
|
);
|
|
153
229
|
|
|
230
|
+
// --- TOOL: Policy preview ---
|
|
231
|
+
api.registerTool(
|
|
232
|
+
{
|
|
233
|
+
name: "fogclaw_preview",
|
|
234
|
+
id: "fogclaw_preview",
|
|
235
|
+
description:
|
|
236
|
+
"Preview which entities will be blocked, warned, or redacted and the redacted message, without changing runtime behavior.",
|
|
237
|
+
schema: {
|
|
238
|
+
type: "object",
|
|
239
|
+
properties: {
|
|
240
|
+
text: {
|
|
241
|
+
type: "string",
|
|
242
|
+
description: "Text to run through FogClaw policy preview",
|
|
243
|
+
},
|
|
244
|
+
strategy: {
|
|
245
|
+
type: "string",
|
|
246
|
+
description:
|
|
247
|
+
'Override redaction strategy for the preview: "token" ([EMAIL_1]), "mask" (****), or "hash" ([EMAIL_a1b2c3...]).',
|
|
248
|
+
enum: ["token", "mask", "hash"],
|
|
249
|
+
},
|
|
250
|
+
custom_labels: {
|
|
251
|
+
type: "array",
|
|
252
|
+
items: { type: "string" },
|
|
253
|
+
description: "Additional entity labels for zero-shot detection",
|
|
254
|
+
},
|
|
255
|
+
},
|
|
256
|
+
required: ["text"],
|
|
257
|
+
},
|
|
258
|
+
handler: async ({
|
|
259
|
+
text,
|
|
260
|
+
strategy,
|
|
261
|
+
custom_labels,
|
|
262
|
+
}: {
|
|
263
|
+
text: string;
|
|
264
|
+
strategy?: "token" | "mask" | "hash";
|
|
265
|
+
custom_labels?: string[];
|
|
266
|
+
}) => {
|
|
267
|
+
const result = await scanner.scan(text, custom_labels);
|
|
268
|
+
const plan = buildGuardrailPlan(result.entities, config);
|
|
269
|
+
const summary = planToSummary(plan);
|
|
270
|
+
const redacted = redact(
|
|
271
|
+
text,
|
|
272
|
+
plan.redacted,
|
|
273
|
+
strategy ?? config.redactStrategy,
|
|
274
|
+
);
|
|
275
|
+
|
|
276
|
+
return {
|
|
277
|
+
content: [
|
|
278
|
+
{
|
|
279
|
+
type: "text",
|
|
280
|
+
text: JSON.stringify(
|
|
281
|
+
{
|
|
282
|
+
entities: result.entities,
|
|
283
|
+
totalEntities: summary.total,
|
|
284
|
+
actionPlan: {
|
|
285
|
+
blocked: {
|
|
286
|
+
count: summary.blocked,
|
|
287
|
+
labels: summary.labels.blocked,
|
|
288
|
+
},
|
|
289
|
+
warned: {
|
|
290
|
+
count: summary.warned,
|
|
291
|
+
labels: summary.labels.warned,
|
|
292
|
+
},
|
|
293
|
+
redacted: {
|
|
294
|
+
count: summary.redacted,
|
|
295
|
+
labels: summary.labels.redacted,
|
|
296
|
+
},
|
|
297
|
+
},
|
|
298
|
+
redactedText: redacted.redacted_text,
|
|
299
|
+
redactionStrategy: strategy ?? config.redactStrategy,
|
|
300
|
+
mapping: redacted.mapping,
|
|
301
|
+
},
|
|
302
|
+
null,
|
|
303
|
+
2,
|
|
304
|
+
),
|
|
305
|
+
},
|
|
306
|
+
],
|
|
307
|
+
};
|
|
308
|
+
},
|
|
309
|
+
}
|
|
310
|
+
);
|
|
311
|
+
|
|
154
312
|
// --- TOOL: On-demand redact ---
|
|
155
313
|
api.registerTool(
|
|
156
314
|
{
|
|
@@ -215,7 +373,7 @@ const fogclaw = {
|
|
|
215
373
|
);
|
|
216
374
|
|
|
217
375
|
api.logger?.info(
|
|
218
|
-
`[fogclaw] Plugin registered — guardrail: ${config.guardrail_mode}, model: ${config.model}, custom entities: ${config.custom_entities.length}`,
|
|
376
|
+
`[fogclaw] Plugin registered — guardrail: ${config.guardrail_mode}, model: ${config.model}, custom entities: ${config.custom_entities.length}, audit: ${config.auditEnabled}`,
|
|
219
377
|
);
|
|
220
378
|
},
|
|
221
379
|
};
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Async message_sending hook handler for FogClaw.
|
|
3
|
+
*
|
|
4
|
+
* Scans outbound message text for PII using the full Scanner
|
|
5
|
+
* (regex + GLiNER), redacts detected entities, and returns
|
|
6
|
+
* modified content. Never cancels message delivery.
|
|
7
|
+
*
|
|
8
|
+
* Note: message_sending is defined in OpenClaw but not yet invoked
|
|
9
|
+
* upstream. This handler activates automatically when wired.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import type { Scanner } from "./scanner.js";
|
|
13
|
+
import { redact } from "./redactor.js";
|
|
14
|
+
import { resolveAction } from "./types.js";
|
|
15
|
+
import type { Entity, FogClawConfig } from "./types.js";
|
|
16
|
+
|
|
17
|
+
interface Logger {
|
|
18
|
+
info(msg: string): void;
|
|
19
|
+
warn(msg: string): void;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export interface MessageSendingEvent {
|
|
23
|
+
to: string;
|
|
24
|
+
content: string;
|
|
25
|
+
metadata?: Record<string, unknown>;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface MessageSendingContext {
|
|
29
|
+
channelId: string;
|
|
30
|
+
accountId?: string;
|
|
31
|
+
conversationId?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export interface MessageSendingResult {
|
|
35
|
+
content?: string;
|
|
36
|
+
cancel?: boolean;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Create an async message_sending hook handler.
|
|
41
|
+
*
|
|
42
|
+
* Uses the full Scanner (regex + GLiNER) since this hook supports
|
|
43
|
+
* async handlers. All guardrail modes produce span-level redaction;
|
|
44
|
+
* cancel is never returned.
|
|
45
|
+
*/
|
|
46
|
+
export function createMessageSendingHandler(
|
|
47
|
+
config: FogClawConfig,
|
|
48
|
+
scanner: Scanner,
|
|
49
|
+
logger?: Logger,
|
|
50
|
+
): (event: MessageSendingEvent, ctx: MessageSendingContext) => Promise<MessageSendingResult | void> {
|
|
51
|
+
return async (
|
|
52
|
+
event: MessageSendingEvent,
|
|
53
|
+
_ctx: MessageSendingContext,
|
|
54
|
+
): Promise<MessageSendingResult | void> => {
|
|
55
|
+
const text = event.content;
|
|
56
|
+
if (!text) return;
|
|
57
|
+
|
|
58
|
+
const result = await scanner.scan(text);
|
|
59
|
+
if (result.entities.length === 0) return;
|
|
60
|
+
|
|
61
|
+
// All modes produce span-level redaction for outbound messages.
|
|
62
|
+
const actionableEntities = result.entities.filter((entity) => {
|
|
63
|
+
const action = resolveAction(entity, config);
|
|
64
|
+
return action === "redact" || action === "block" || action === "warn";
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
if (actionableEntities.length === 0) return;
|
|
68
|
+
|
|
69
|
+
const redacted = redact(text, actionableEntities, config.redactStrategy);
|
|
70
|
+
|
|
71
|
+
// Audit logging
|
|
72
|
+
if (config.auditEnabled && logger) {
|
|
73
|
+
const labels = [...new Set(actionableEntities.map((e) => e.label))];
|
|
74
|
+
logger.info(
|
|
75
|
+
`[FOGCLAW AUDIT] outbound_scan ${JSON.stringify({
|
|
76
|
+
totalEntities: actionableEntities.length,
|
|
77
|
+
labels,
|
|
78
|
+
channelId: _ctx.channelId ?? null,
|
|
79
|
+
source: "outbound",
|
|
80
|
+
})}`,
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Never cancel — always deliver the redacted version.
|
|
85
|
+
return { content: redacted.redacted_text };
|
|
86
|
+
};
|
|
87
|
+
}
|
package/src/scanner.ts
CHANGED
|
@@ -1,23 +1,44 @@
|
|
|
1
|
-
import type { Entity, FogClawConfig
|
|
1
|
+
import type { Entity, FogClawConfig } from "./types.js";
|
|
2
|
+
import { canonicalType } from "./types.js";
|
|
2
3
|
import { RegexEngine } from "./engines/regex.js";
|
|
3
4
|
import { GlinerEngine } from "./engines/gliner.js";
|
|
4
5
|
|
|
6
|
+
type AllowlistPatternCache = {
|
|
7
|
+
values: Set<string>;
|
|
8
|
+
patterns: RegExp[];
|
|
9
|
+
entityValues: Map<string, Set<string>>;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
function normalizeAllowlistValue(value: string): string {
|
|
13
|
+
return value.trim().toLowerCase();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function buildPatternMaps(value: string[] | undefined): RegExp[] {
|
|
17
|
+
if (!value || value.length === 0) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return value.map((pattern) => new RegExp(pattern, "i"));
|
|
22
|
+
}
|
|
23
|
+
|
|
5
24
|
export class Scanner {
|
|
6
25
|
private regexEngine: RegexEngine;
|
|
7
26
|
private glinerEngine: GlinerEngine;
|
|
8
27
|
private glinerAvailable = false;
|
|
9
28
|
private config: FogClawConfig;
|
|
29
|
+
private allowlist: AllowlistPatternCache;
|
|
10
30
|
|
|
11
31
|
constructor(config: FogClawConfig) {
|
|
12
32
|
this.config = config;
|
|
13
33
|
this.regexEngine = new RegexEngine();
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
);
|
|
34
|
+
|
|
35
|
+
const glinerThreshold = this.computeGlinerThreshold(config);
|
|
36
|
+
this.glinerEngine = new GlinerEngine(config.model, glinerThreshold);
|
|
18
37
|
if (config.custom_entities.length > 0) {
|
|
19
38
|
this.glinerEngine.setCustomLabels(config.custom_entities);
|
|
20
39
|
}
|
|
40
|
+
|
|
41
|
+
this.allowlist = this.buildAllowlistCache(config.allowlist);
|
|
21
42
|
}
|
|
22
43
|
|
|
23
44
|
async initialize(): Promise<void> {
|
|
@@ -32,19 +53,25 @@ export class Scanner {
|
|
|
32
53
|
}
|
|
33
54
|
}
|
|
34
55
|
|
|
35
|
-
async scan(text: string, extraLabels?: string[]): Promise<
|
|
56
|
+
async scan(text: string, extraLabels?: string[]): Promise<{ entities: Entity[]; text: string }> {
|
|
36
57
|
if (!text) return { entities: [], text };
|
|
37
58
|
|
|
38
59
|
// Step 1: Regex pass (always runs, synchronous)
|
|
39
|
-
const regexEntities = this.regexEngine.scan(text);
|
|
60
|
+
const regexEntities = this.filterByPolicy(this.regexEngine.scan(text));
|
|
40
61
|
|
|
41
62
|
// Step 2: GLiNER pass (if available)
|
|
42
63
|
let glinerEntities: Entity[] = [];
|
|
43
64
|
if (this.glinerAvailable) {
|
|
44
65
|
try {
|
|
45
66
|
glinerEntities = await this.glinerEngine.scan(text, extraLabels);
|
|
67
|
+
glinerEntities = this.filterByConfidence(glinerEntities);
|
|
68
|
+
glinerEntities = this.filterByPolicy(glinerEntities);
|
|
46
69
|
} catch (err) {
|
|
47
|
-
console.warn(
|
|
70
|
+
console.warn(
|
|
71
|
+
`[fogclaw] GLiNER scan failed, using regex results only: ${
|
|
72
|
+
err instanceof Error ? err.message : String(err)
|
|
73
|
+
}`,
|
|
74
|
+
);
|
|
48
75
|
}
|
|
49
76
|
}
|
|
50
77
|
|
|
@@ -53,6 +80,85 @@ export class Scanner {
|
|
|
53
80
|
|
|
54
81
|
return { entities: merged, text };
|
|
55
82
|
}
|
|
83
|
+
|
|
84
|
+
private filterByConfidence(entities: Entity[]): Entity[] {
|
|
85
|
+
return entities.filter((entity) => {
|
|
86
|
+
const threshold = this.getThresholdForLabel(entity.label);
|
|
87
|
+
return entity.confidence >= threshold;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
private filterByPolicy(entities: Entity[]): Entity[] {
|
|
92
|
+
if (
|
|
93
|
+
this.allowlist.values.size === 0 &&
|
|
94
|
+
this.allowlist.patterns.length === 0 &&
|
|
95
|
+
this.allowlist.entityValues.size === 0
|
|
96
|
+
) {
|
|
97
|
+
return entities;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return entities.filter((entity) => !this.shouldAllowlistEntity(entity));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private shouldAllowlistEntity(entity: Entity): boolean {
|
|
104
|
+
const normalizedText = normalizeAllowlistValue(entity.text);
|
|
105
|
+
|
|
106
|
+
if (this.allowlist.values.has(normalizedText)) {
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (this.allowlist.patterns.some((pattern) => pattern.test(entity.text))) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const entityValues = this.allowlist.entityValues.get(entity.label);
|
|
115
|
+
if (entityValues && entityValues.has(normalizedText)) {
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
private getThresholdForLabel(label: string): number {
|
|
123
|
+
const canonicalLabel = canonicalType(label);
|
|
124
|
+
return this.config.entityConfidenceThresholds[canonicalLabel] ?? this.config.confidence_threshold;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
private computeGlinerThreshold(config: FogClawConfig): number {
|
|
128
|
+
const thresholds = Object.values(config.entityConfidenceThresholds);
|
|
129
|
+
if (thresholds.length === 0) {
|
|
130
|
+
return config.confidence_threshold;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return Math.min(config.confidence_threshold, ...thresholds);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private buildAllowlistCache(allowlist: FogClawConfig["allowlist"]): AllowlistPatternCache {
|
|
137
|
+
const globalValues = new Set(
|
|
138
|
+
allowlist.values.map((value) => normalizeAllowlistValue(value)),
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
const globalPatterns = buildPatternMaps(allowlist.patterns);
|
|
142
|
+
|
|
143
|
+
const entityValues = new Map<string, Set<string>>();
|
|
144
|
+
for (const [entityType, values] of Object.entries(allowlist.entities)) {
|
|
145
|
+
const canonical = canonicalType(entityType);
|
|
146
|
+
const uniqueValues = values
|
|
147
|
+
.map((value) => normalizeAllowlistValue(value))
|
|
148
|
+
.filter((value) => value.length > 0);
|
|
149
|
+
entityValues.set(canonical, new Set(uniqueValues));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
values: globalValues,
|
|
154
|
+
patterns: globalPatterns,
|
|
155
|
+
entityValues,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
get isGlinerAvailable(): boolean {
|
|
160
|
+
return this.glinerAvailable;
|
|
161
|
+
}
|
|
56
162
|
}
|
|
57
163
|
|
|
58
164
|
/**
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Synchronous tool_result_persist hook handler for FogClaw.
|
|
3
|
+
*
|
|
4
|
+
* Scans tool result text for PII using the regex engine (synchronous),
|
|
5
|
+
* redacts detected entities, and returns the transformed message.
|
|
6
|
+
* GLiNER is not used here because tool_result_persist is synchronous-only.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { RegexEngine } from "./engines/regex.js";
|
|
10
|
+
import { redact } from "./redactor.js";
|
|
11
|
+
import { extractText, replaceText } from "./extract.js";
|
|
12
|
+
import { canonicalType, resolveAction } from "./types.js";
|
|
13
|
+
import type { Entity, FogClawConfig } from "./types.js";
|
|
14
|
+
|
|
15
|
+
interface Logger {
|
|
16
|
+
info(msg: string): void;
|
|
17
|
+
warn(msg: string): void;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface ToolResultPersistEvent {
|
|
21
|
+
toolName?: string;
|
|
22
|
+
toolCallId?: string;
|
|
23
|
+
message: unknown;
|
|
24
|
+
isSynthetic?: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface ToolResultPersistContext {
|
|
28
|
+
agentId?: string;
|
|
29
|
+
sessionKey?: string;
|
|
30
|
+
toolName?: string;
|
|
31
|
+
toolCallId?: string;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Build an allowlist filter from config. Replicates Scanner.filterByPolicy
|
|
36
|
+
* and Scanner.shouldAllowlistEntity logic synchronously.
|
|
37
|
+
*/
|
|
38
|
+
function buildAllowlistFilter(config: FogClawConfig): (entity: Entity) => boolean {
|
|
39
|
+
const globalValues = new Set(
|
|
40
|
+
config.allowlist.values.map((v) => v.trim().toLowerCase()),
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
const globalPatterns = config.allowlist.patterns
|
|
44
|
+
.filter((p) => p.length > 0)
|
|
45
|
+
.map((p) => new RegExp(p, "i"));
|
|
46
|
+
|
|
47
|
+
const entityValues = new Map<string, Set<string>>();
|
|
48
|
+
for (const [entityType, values] of Object.entries(config.allowlist.entities)) {
|
|
49
|
+
const canonical = canonicalType(entityType);
|
|
50
|
+
const set = new Set(
|
|
51
|
+
values
|
|
52
|
+
.map((v) => v.trim().toLowerCase())
|
|
53
|
+
.filter((v) => v.length > 0),
|
|
54
|
+
);
|
|
55
|
+
entityValues.set(canonical, set);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Short-circuit: if no allowlist entries, keep everything
|
|
59
|
+
if (globalValues.size === 0 && globalPatterns.length === 0 && entityValues.size === 0) {
|
|
60
|
+
return () => true;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Return true if entity should be KEPT (not allowlisted)
|
|
64
|
+
return (entity: Entity): boolean => {
|
|
65
|
+
const normalizedText = entity.text.trim().toLowerCase();
|
|
66
|
+
|
|
67
|
+
if (globalValues.has(normalizedText)) return false;
|
|
68
|
+
if (globalPatterns.some((pattern) => pattern.test(entity.text))) return false;
|
|
69
|
+
|
|
70
|
+
const perEntity = entityValues.get(entity.label);
|
|
71
|
+
if (perEntity && perEntity.has(normalizedText)) return false;
|
|
72
|
+
|
|
73
|
+
return true;
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Create a synchronous tool_result_persist hook handler.
|
|
79
|
+
*
|
|
80
|
+
* The returned function must NOT return a Promise — OpenClaw rejects
|
|
81
|
+
* async tool_result_persist handlers.
|
|
82
|
+
*/
|
|
83
|
+
export function createToolResultHandler(
|
|
84
|
+
config: FogClawConfig,
|
|
85
|
+
regexEngine: RegexEngine,
|
|
86
|
+
logger?: Logger,
|
|
87
|
+
): (event: ToolResultPersistEvent, ctx: ToolResultPersistContext) => { message: unknown } | void {
|
|
88
|
+
const shouldKeep = buildAllowlistFilter(config);
|
|
89
|
+
|
|
90
|
+
return (event: ToolResultPersistEvent, _ctx: ToolResultPersistContext): { message: unknown } | void => {
|
|
91
|
+
const text = extractText(event.message);
|
|
92
|
+
if (!text) return;
|
|
93
|
+
|
|
94
|
+
// Scan with regex engine (synchronous)
|
|
95
|
+
let entities = regexEngine.scan(text);
|
|
96
|
+
if (entities.length === 0) return;
|
|
97
|
+
|
|
98
|
+
// Apply allowlist filtering
|
|
99
|
+
entities = entities.filter(shouldKeep);
|
|
100
|
+
if (entities.length === 0) return;
|
|
101
|
+
|
|
102
|
+
// All guardrail modes produce span-level redaction in tool results.
|
|
103
|
+
// Determine which entities are actionable (all of them — block/warn/redact
|
|
104
|
+
// all produce redaction at the tool result level).
|
|
105
|
+
const actionableEntities = entities.filter((entity) => {
|
|
106
|
+
const action = resolveAction(entity, config);
|
|
107
|
+
return action === "redact" || action === "block" || action === "warn";
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
if (actionableEntities.length === 0) return;
|
|
111
|
+
|
|
112
|
+
// Redact
|
|
113
|
+
const result = redact(text, actionableEntities, config.redactStrategy);
|
|
114
|
+
|
|
115
|
+
// Replace text in the message
|
|
116
|
+
const modifiedMessage = replaceText(event.message, result.redacted_text);
|
|
117
|
+
|
|
118
|
+
// Audit logging
|
|
119
|
+
if (config.auditEnabled && logger) {
|
|
120
|
+
const labels = [...new Set(actionableEntities.map((e) => e.label))];
|
|
121
|
+
logger.info(
|
|
122
|
+
`[FOGCLAW AUDIT] tool_result_scan ${JSON.stringify({
|
|
123
|
+
totalEntities: actionableEntities.length,
|
|
124
|
+
labels,
|
|
125
|
+
toolName: event.toolName ?? null,
|
|
126
|
+
source: "tool_result",
|
|
127
|
+
})}`,
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { message: modifiedMessage };
|
|
132
|
+
};
|
|
133
|
+
}
|