@datafog/fogclaw 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -4
- package/dist/config.d.ts +1 -1
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +100 -1
- package/dist/config.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +127 -30
- package/dist/index.js.map +1 -1
- package/dist/scanner.d.ts +13 -2
- package/dist/scanner.d.ts.map +1 -1
- package/dist/scanner.js +76 -2
- package/dist/scanner.js.map +1 -1
- package/dist/types.d.ts +16 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/docs/plans/active/2026-02-17-feat-release-fogclaw-via-datafog-package-plan.md +24 -21
- package/docs/plugins/fogclaw.md +2 -0
- package/fogclaw.config.example.json +19 -1
- package/openclaw.plugin.json +103 -4
- package/package.json +1 -1
- package/src/config.ts +139 -2
- package/src/index.ts +185 -36
- package/src/scanner.ts +114 -8
- package/src/types.ts +19 -0
- package/tests/config.test.ts +55 -81
- package/tests/plugin-smoke.test.ts +30 -1
- package/tests/scanner.test.ts +61 -1
package/src/index.ts
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
1
|
import { Scanner } from "./scanner.js";
|
|
2
2
|
import { redact } from "./redactor.js";
|
|
3
3
|
import { loadConfig } from "./config.js";
|
|
4
|
-
import type {
|
|
4
|
+
import type {
|
|
5
|
+
Entity,
|
|
6
|
+
FogClawConfig,
|
|
7
|
+
GuardrailAction,
|
|
8
|
+
RedactResult,
|
|
9
|
+
RedactStrategy,
|
|
10
|
+
ScanResult,
|
|
11
|
+
} from "./types.js";
|
|
5
12
|
|
|
6
13
|
export { Scanner } from "./scanner.js";
|
|
7
14
|
export { redact } from "./redactor.js";
|
|
@@ -15,12 +22,84 @@ export type {
|
|
|
15
22
|
GuardrailAction,
|
|
16
23
|
} from "./types.js";
|
|
17
24
|
|
|
25
|
+
function resolveAction(entity: Entity, config: FogClawConfig): GuardrailAction {
|
|
26
|
+
return config.entityActions[entity.label] ?? config.guardrail_mode;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function buildGuardrailPlan(entities: Entity[], config: FogClawConfig) {
|
|
30
|
+
const blocked: Entity[] = [];
|
|
31
|
+
const warned: Entity[] = [];
|
|
32
|
+
const redacted: Entity[] = [];
|
|
33
|
+
|
|
34
|
+
for (const entity of entities) {
|
|
35
|
+
const action = resolveAction(entity, config);
|
|
36
|
+
if (action === "block") blocked.push(entity);
|
|
37
|
+
else if (action === "warn") warned.push(entity);
|
|
38
|
+
else redacted.push(entity);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return { blocked, warned, redacted };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function planToSummary(plan: ReturnType<typeof buildGuardrailPlan>): {
|
|
45
|
+
total: number;
|
|
46
|
+
blocked: number;
|
|
47
|
+
warned: number;
|
|
48
|
+
redacted: number;
|
|
49
|
+
labels: {
|
|
50
|
+
blocked: string[];
|
|
51
|
+
warned: string[];
|
|
52
|
+
redacted: string[];
|
|
53
|
+
};
|
|
54
|
+
} {
|
|
55
|
+
return {
|
|
56
|
+
total: plan.blocked.length + plan.warned.length + plan.redacted.length,
|
|
57
|
+
blocked: plan.blocked.length,
|
|
58
|
+
warned: plan.warned.length,
|
|
59
|
+
redacted: plan.redacted.length,
|
|
60
|
+
labels: {
|
|
61
|
+
blocked: [...new Set(plan.blocked.map((entity) => entity.label))],
|
|
62
|
+
warned: [...new Set(plan.warned.map((entity) => entity.label))],
|
|
63
|
+
redacted: [...new Set(plan.redacted.map((entity) => entity.label))],
|
|
64
|
+
},
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function buildGuardrailContext(plan: ReturnType<typeof buildGuardrailPlan>, config: FogClawConfig): string[] {
|
|
69
|
+
const contextParts: string[] = [];
|
|
70
|
+
|
|
71
|
+
if (plan.blocked.length > 0) {
|
|
72
|
+
const types = [...new Set(plan.blocked.map((entity) => entity.label))].join(", ");
|
|
73
|
+
contextParts.push(
|
|
74
|
+
`[FOGCLAW GUARDRAIL — BLOCKED] The user's message contains sensitive information (${types}). ` +
|
|
75
|
+
`Do NOT process or repeat this information. Ask the user to rephrase without sensitive data.`,
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (plan.warned.length > 0) {
|
|
80
|
+
const types = [...new Set(plan.warned.map((entity) => entity.label))].join(", ");
|
|
81
|
+
contextParts.push(
|
|
82
|
+
`[FOGCLAW NOTICE] PII detected in user message: ${types}. Handle with care.`,
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (plan.redacted.length > 0) {
|
|
87
|
+
const labels = [...new Set(plan.redacted.map((entity) => entity.label))].join(", ");
|
|
88
|
+
contextParts.push(
|
|
89
|
+
`[FOGCLAW REDACTED] ${plan.redacted.length} entity(ies) prepared for ${config.redactStrategy} redaction (${labels}).`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return contextParts;
|
|
94
|
+
}
|
|
95
|
+
|
|
18
96
|
/**
|
|
19
97
|
* OpenClaw plugin definition.
|
|
20
98
|
*
|
|
21
99
|
* Registers:
|
|
22
100
|
* - `before_agent_start` hook for automatic PII guardrail
|
|
23
101
|
* - `fogclaw_scan` tool for on-demand entity detection
|
|
102
|
+
* - `fogclaw_preview` tool for dry-run policy simulation
|
|
24
103
|
* - `fogclaw_redact` tool for on-demand redaction
|
|
25
104
|
*/
|
|
26
105
|
const fogclaw = {
|
|
@@ -48,47 +127,35 @@ const fogclaw = {
|
|
|
48
127
|
const message = event.prompt ?? "";
|
|
49
128
|
if (!message) return;
|
|
50
129
|
|
|
51
|
-
const result = await scanner.scan(message);
|
|
52
|
-
|
|
130
|
+
const result: ScanResult = await scanner.scan(message);
|
|
53
131
|
if (result.entities.length === 0) return;
|
|
54
132
|
|
|
55
|
-
|
|
56
|
-
const
|
|
57
|
-
const warned: typeof result.entities = [];
|
|
58
|
-
const toRedact: typeof result.entities = [];
|
|
59
|
-
|
|
60
|
-
for (const entity of result.entities) {
|
|
61
|
-
const action: GuardrailAction =
|
|
62
|
-
config.entityActions[entity.label] ?? config.guardrail_mode;
|
|
63
|
-
if (action === "block") blocked.push(entity);
|
|
64
|
-
else if (action === "warn") warned.push(entity);
|
|
65
|
-
else if (action === "redact") toRedact.push(entity);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
const contextParts: string[] = [];
|
|
133
|
+
const plan = buildGuardrailPlan(result.entities, config);
|
|
134
|
+
const contextParts = buildGuardrailContext(plan, config);
|
|
69
135
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
136
|
+
if (config.auditEnabled) {
|
|
137
|
+
const summary = planToSummary(plan);
|
|
138
|
+
api.logger?.info(
|
|
139
|
+
`[FOGCLAW AUDIT] guardrail_scan ${JSON.stringify({
|
|
140
|
+
totalEntities: summary.total,
|
|
141
|
+
blocked: summary.blocked,
|
|
142
|
+
warned: summary.warned,
|
|
143
|
+
redacted: summary.redacted,
|
|
144
|
+
blockedLabels: summary.labels.blocked,
|
|
145
|
+
warnedLabels: summary.labels.warned,
|
|
146
|
+
redactedLabels: summary.labels.redacted,
|
|
147
|
+
})}`,
|
|
76
148
|
);
|
|
77
149
|
}
|
|
78
150
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
151
|
+
if (plan.redacted.length > 0) {
|
|
152
|
+
const redactedResult: RedactResult = redact(
|
|
153
|
+
message,
|
|
154
|
+
plan.redacted,
|
|
155
|
+
config.redactStrategy,
|
|
84
156
|
);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// "redact" — replace PII with tokens
|
|
88
|
-
if (toRedact.length > 0) {
|
|
89
|
-
const redacted = redact(message, toRedact, config.redactStrategy);
|
|
90
157
|
contextParts.push(
|
|
91
|
-
`[FOGCLAW REDACTED] The following is the user's message with PII redacted:\n${
|
|
158
|
+
`[FOGCLAW REDACTED] The following is the user's message with PII redacted:\n${redactedResult.redacted_text}`,
|
|
92
159
|
);
|
|
93
160
|
}
|
|
94
161
|
|
|
@@ -138,7 +205,7 @@ const fogclaw = {
|
|
|
138
205
|
count: result.entities.length,
|
|
139
206
|
summary:
|
|
140
207
|
result.entities.length > 0
|
|
141
|
-
? `Found ${result.entities.length} entities: ${[...new Set(result.entities.map((
|
|
208
|
+
? `Found ${result.entities.length} entities: ${[...new Set(result.entities.map((entity) => entity.label))].join(", ")}`
|
|
142
209
|
: "No entities detected",
|
|
143
210
|
},
|
|
144
211
|
null,
|
|
@@ -151,6 +218,88 @@ const fogclaw = {
|
|
|
151
218
|
}
|
|
152
219
|
);
|
|
153
220
|
|
|
221
|
+
// --- TOOL: Policy preview ---
|
|
222
|
+
api.registerTool(
|
|
223
|
+
{
|
|
224
|
+
name: "fogclaw_preview",
|
|
225
|
+
id: "fogclaw_preview",
|
|
226
|
+
description:
|
|
227
|
+
"Preview which entities will be blocked, warned, or redacted and the redacted message, without changing runtime behavior.",
|
|
228
|
+
schema: {
|
|
229
|
+
type: "object",
|
|
230
|
+
properties: {
|
|
231
|
+
text: {
|
|
232
|
+
type: "string",
|
|
233
|
+
description: "Text to run through FogClaw policy preview",
|
|
234
|
+
},
|
|
235
|
+
strategy: {
|
|
236
|
+
type: "string",
|
|
237
|
+
description:
|
|
238
|
+
'Override redaction strategy for the preview: "token" ([EMAIL_1]), "mask" (****), or "hash" ([EMAIL_a1b2c3...]).',
|
|
239
|
+
enum: ["token", "mask", "hash"],
|
|
240
|
+
},
|
|
241
|
+
custom_labels: {
|
|
242
|
+
type: "array",
|
|
243
|
+
items: { type: "string" },
|
|
244
|
+
description: "Additional entity labels for zero-shot detection",
|
|
245
|
+
},
|
|
246
|
+
},
|
|
247
|
+
required: ["text"],
|
|
248
|
+
},
|
|
249
|
+
handler: async ({
|
|
250
|
+
text,
|
|
251
|
+
strategy,
|
|
252
|
+
custom_labels,
|
|
253
|
+
}: {
|
|
254
|
+
text: string;
|
|
255
|
+
strategy?: "token" | "mask" | "hash";
|
|
256
|
+
custom_labels?: string[];
|
|
257
|
+
}) => {
|
|
258
|
+
const result = await scanner.scan(text, custom_labels);
|
|
259
|
+
const plan = buildGuardrailPlan(result.entities, config);
|
|
260
|
+
const summary = planToSummary(plan);
|
|
261
|
+
const redacted = redact(
|
|
262
|
+
text,
|
|
263
|
+
plan.redacted,
|
|
264
|
+
strategy ?? config.redactStrategy,
|
|
265
|
+
);
|
|
266
|
+
|
|
267
|
+
return {
|
|
268
|
+
content: [
|
|
269
|
+
{
|
|
270
|
+
type: "text",
|
|
271
|
+
text: JSON.stringify(
|
|
272
|
+
{
|
|
273
|
+
entities: result.entities,
|
|
274
|
+
totalEntities: summary.total,
|
|
275
|
+
actionPlan: {
|
|
276
|
+
blocked: {
|
|
277
|
+
count: summary.blocked,
|
|
278
|
+
labels: summary.labels.blocked,
|
|
279
|
+
},
|
|
280
|
+
warned: {
|
|
281
|
+
count: summary.warned,
|
|
282
|
+
labels: summary.labels.warned,
|
|
283
|
+
},
|
|
284
|
+
redacted: {
|
|
285
|
+
count: summary.redacted,
|
|
286
|
+
labels: summary.labels.redacted,
|
|
287
|
+
},
|
|
288
|
+
},
|
|
289
|
+
redactedText: redacted.redacted_text,
|
|
290
|
+
redactionStrategy: strategy ?? config.redactStrategy,
|
|
291
|
+
mapping: redacted.mapping,
|
|
292
|
+
},
|
|
293
|
+
null,
|
|
294
|
+
2,
|
|
295
|
+
),
|
|
296
|
+
},
|
|
297
|
+
],
|
|
298
|
+
};
|
|
299
|
+
},
|
|
300
|
+
}
|
|
301
|
+
);
|
|
302
|
+
|
|
154
303
|
// --- TOOL: On-demand redact ---
|
|
155
304
|
api.registerTool(
|
|
156
305
|
{
|
|
@@ -215,7 +364,7 @@ const fogclaw = {
|
|
|
215
364
|
);
|
|
216
365
|
|
|
217
366
|
api.logger?.info(
|
|
218
|
-
`[fogclaw] Plugin registered — guardrail: ${config.guardrail_mode}, model: ${config.model}, custom entities: ${config.custom_entities.length}`,
|
|
367
|
+
`[fogclaw] Plugin registered — guardrail: ${config.guardrail_mode}, model: ${config.model}, custom entities: ${config.custom_entities.length}, audit: ${config.auditEnabled}`,
|
|
219
368
|
);
|
|
220
369
|
},
|
|
221
370
|
};
|
package/src/scanner.ts
CHANGED
|
@@ -1,23 +1,44 @@
|
|
|
1
|
-
import type { Entity, FogClawConfig
|
|
1
|
+
import type { Entity, FogClawConfig } from "./types.js";
|
|
2
|
+
import { canonicalType } from "./types.js";
|
|
2
3
|
import { RegexEngine } from "./engines/regex.js";
|
|
3
4
|
import { GlinerEngine } from "./engines/gliner.js";
|
|
4
5
|
|
|
6
|
+
type AllowlistPatternCache = {
|
|
7
|
+
values: Set<string>;
|
|
8
|
+
patterns: RegExp[];
|
|
9
|
+
entityValues: Map<string, Set<string>>;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
function normalizeAllowlistValue(value: string): string {
|
|
13
|
+
return value.trim().toLowerCase();
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function buildPatternMaps(value: string[] | undefined): RegExp[] {
|
|
17
|
+
if (!value || value.length === 0) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
return value.map((pattern) => new RegExp(pattern, "i"));
|
|
22
|
+
}
|
|
23
|
+
|
|
5
24
|
export class Scanner {
|
|
6
25
|
private regexEngine: RegexEngine;
|
|
7
26
|
private glinerEngine: GlinerEngine;
|
|
8
27
|
private glinerAvailable = false;
|
|
9
28
|
private config: FogClawConfig;
|
|
29
|
+
private allowlist: AllowlistPatternCache;
|
|
10
30
|
|
|
11
31
|
constructor(config: FogClawConfig) {
|
|
12
32
|
this.config = config;
|
|
13
33
|
this.regexEngine = new RegexEngine();
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
);
|
|
34
|
+
|
|
35
|
+
const glinerThreshold = this.computeGlinerThreshold(config);
|
|
36
|
+
this.glinerEngine = new GlinerEngine(config.model, glinerThreshold);
|
|
18
37
|
if (config.custom_entities.length > 0) {
|
|
19
38
|
this.glinerEngine.setCustomLabels(config.custom_entities);
|
|
20
39
|
}
|
|
40
|
+
|
|
41
|
+
this.allowlist = this.buildAllowlistCache(config.allowlist);
|
|
21
42
|
}
|
|
22
43
|
|
|
23
44
|
async initialize(): Promise<void> {
|
|
@@ -32,19 +53,25 @@ export class Scanner {
|
|
|
32
53
|
}
|
|
33
54
|
}
|
|
34
55
|
|
|
35
|
-
async scan(text: string, extraLabels?: string[]): Promise<
|
|
56
|
+
async scan(text: string, extraLabels?: string[]): Promise<{ entities: Entity[]; text: string }> {
|
|
36
57
|
if (!text) return { entities: [], text };
|
|
37
58
|
|
|
38
59
|
// Step 1: Regex pass (always runs, synchronous)
|
|
39
|
-
const regexEntities = this.regexEngine.scan(text);
|
|
60
|
+
const regexEntities = this.filterByPolicy(this.regexEngine.scan(text));
|
|
40
61
|
|
|
41
62
|
// Step 2: GLiNER pass (if available)
|
|
42
63
|
let glinerEntities: Entity[] = [];
|
|
43
64
|
if (this.glinerAvailable) {
|
|
44
65
|
try {
|
|
45
66
|
glinerEntities = await this.glinerEngine.scan(text, extraLabels);
|
|
67
|
+
glinerEntities = this.filterByConfidence(glinerEntities);
|
|
68
|
+
glinerEntities = this.filterByPolicy(glinerEntities);
|
|
46
69
|
} catch (err) {
|
|
47
|
-
console.warn(
|
|
70
|
+
console.warn(
|
|
71
|
+
`[fogclaw] GLiNER scan failed, using regex results only: ${
|
|
72
|
+
err instanceof Error ? err.message : String(err)
|
|
73
|
+
}`,
|
|
74
|
+
);
|
|
48
75
|
}
|
|
49
76
|
}
|
|
50
77
|
|
|
@@ -53,6 +80,85 @@ export class Scanner {
|
|
|
53
80
|
|
|
54
81
|
return { entities: merged, text };
|
|
55
82
|
}
|
|
83
|
+
|
|
84
|
+
private filterByConfidence(entities: Entity[]): Entity[] {
|
|
85
|
+
return entities.filter((entity) => {
|
|
86
|
+
const threshold = this.getThresholdForLabel(entity.label);
|
|
87
|
+
return entity.confidence >= threshold;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
private filterByPolicy(entities: Entity[]): Entity[] {
|
|
92
|
+
if (
|
|
93
|
+
this.allowlist.values.size === 0 &&
|
|
94
|
+
this.allowlist.patterns.length === 0 &&
|
|
95
|
+
this.allowlist.entityValues.size === 0
|
|
96
|
+
) {
|
|
97
|
+
return entities;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return entities.filter((entity) => !this.shouldAllowlistEntity(entity));
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
private shouldAllowlistEntity(entity: Entity): boolean {
|
|
104
|
+
const normalizedText = normalizeAllowlistValue(entity.text);
|
|
105
|
+
|
|
106
|
+
if (this.allowlist.values.has(normalizedText)) {
|
|
107
|
+
return true;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (this.allowlist.patterns.some((pattern) => pattern.test(entity.text))) {
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const entityValues = this.allowlist.entityValues.get(entity.label);
|
|
115
|
+
if (entityValues && entityValues.has(normalizedText)) {
|
|
116
|
+
return true;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
private getThresholdForLabel(label: string): number {
|
|
123
|
+
const canonicalLabel = canonicalType(label);
|
|
124
|
+
return this.config.entityConfidenceThresholds[canonicalLabel] ?? this.config.confidence_threshold;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
private computeGlinerThreshold(config: FogClawConfig): number {
|
|
128
|
+
const thresholds = Object.values(config.entityConfidenceThresholds);
|
|
129
|
+
if (thresholds.length === 0) {
|
|
130
|
+
return config.confidence_threshold;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return Math.min(config.confidence_threshold, ...thresholds);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private buildAllowlistCache(allowlist: FogClawConfig["allowlist"]): AllowlistPatternCache {
|
|
137
|
+
const globalValues = new Set(
|
|
138
|
+
allowlist.values.map((value) => normalizeAllowlistValue(value)),
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
const globalPatterns = buildPatternMaps(allowlist.patterns);
|
|
142
|
+
|
|
143
|
+
const entityValues = new Map<string, Set<string>>();
|
|
144
|
+
for (const [entityType, values] of Object.entries(allowlist.entities)) {
|
|
145
|
+
const canonical = canonicalType(entityType);
|
|
146
|
+
const uniqueValues = values
|
|
147
|
+
.map((value) => normalizeAllowlistValue(value))
|
|
148
|
+
.filter((value) => value.length > 0);
|
|
149
|
+
entityValues.set(canonical, new Set(uniqueValues));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
values: globalValues,
|
|
154
|
+
patterns: globalPatterns,
|
|
155
|
+
entityValues,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
get isGlinerAvailable(): boolean {
|
|
160
|
+
return this.glinerAvailable;
|
|
161
|
+
}
|
|
56
162
|
}
|
|
57
163
|
|
|
58
164
|
/**
|
package/src/types.ts
CHANGED
|
@@ -11,6 +11,16 @@ export type RedactStrategy = "token" | "mask" | "hash";
|
|
|
11
11
|
|
|
12
12
|
export type GuardrailAction = "redact" | "block" | "warn";
|
|
13
13
|
|
|
14
|
+
export interface EntityConfidenceThresholds {
|
|
15
|
+
[entityType: string]: number;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface EntityAllowlist {
|
|
19
|
+
values: string[];
|
|
20
|
+
patterns: string[];
|
|
21
|
+
entities: Record<string, string[]>;
|
|
22
|
+
}
|
|
23
|
+
|
|
14
24
|
export interface FogClawConfig {
|
|
15
25
|
enabled: boolean;
|
|
16
26
|
guardrail_mode: GuardrailAction;
|
|
@@ -19,6 +29,9 @@ export interface FogClawConfig {
|
|
|
19
29
|
confidence_threshold: number;
|
|
20
30
|
custom_entities: string[];
|
|
21
31
|
entityActions: Record<string, GuardrailAction>;
|
|
32
|
+
entityConfidenceThresholds: EntityConfidenceThresholds;
|
|
33
|
+
allowlist: EntityAllowlist;
|
|
34
|
+
auditEnabled: boolean;
|
|
22
35
|
}
|
|
23
36
|
|
|
24
37
|
export interface ScanResult {
|
|
@@ -32,6 +45,12 @@ export interface RedactResult {
|
|
|
32
45
|
entities: Entity[];
|
|
33
46
|
}
|
|
34
47
|
|
|
48
|
+
export interface GuardrailPlan {
|
|
49
|
+
blocked: Entity[];
|
|
50
|
+
warned: Entity[];
|
|
51
|
+
redacted: Entity[];
|
|
52
|
+
}
|
|
53
|
+
|
|
35
54
|
export const CANONICAL_TYPE_MAP: Record<string, string> = {
|
|
36
55
|
DOB: "DATE",
|
|
37
56
|
ZIP: "ZIP_CODE",
|
package/tests/config.test.ts
CHANGED
|
@@ -1,104 +1,78 @@
|
|
|
1
1
|
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { loadConfig, DEFAULT_CONFIG } from "../src/config.js";
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
it("returns defaults when no overrides are provided", () => {
|
|
6
|
-
const config = loadConfig({});
|
|
7
|
-
expect(config).toEqual(DEFAULT_CONFIG);
|
|
8
|
-
});
|
|
9
|
-
|
|
10
|
-
it("merges partial overrides with defaults", () => {
|
|
11
|
-
const config = loadConfig({ guardrail_mode: "block", confidence_threshold: 0.8 });
|
|
12
|
-
|
|
13
|
-
expect(config.guardrail_mode).toBe("block");
|
|
14
|
-
expect(config.confidence_threshold).toBe(0.8);
|
|
15
|
-
// Unset defaults are preserved
|
|
16
|
-
expect(config.enabled).toBe(true);
|
|
17
|
-
expect(config.redactStrategy).toBe("token");
|
|
18
|
-
expect(config.model).toBe("onnx-community/gliner_large-v2.1");
|
|
19
|
-
expect(config.custom_entities).toEqual([]);
|
|
20
|
-
expect(config.entityActions).toEqual({});
|
|
21
|
-
});
|
|
3
|
+
import { loadConfig } from "../src/config.js";
|
|
22
4
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
expect(() => loadConfig({ guardrail_mode: "warn" })).not.toThrow();
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
it("rejects invalid guardrail_mode", () => {
|
|
30
|
-
expect(() =>
|
|
31
|
-
loadConfig({ guardrail_mode: "invalid" as never }),
|
|
32
|
-
).toThrowError(
|
|
33
|
-
'Invalid guardrail_mode "invalid". Must be one of: redact, block, warn',
|
|
34
|
-
);
|
|
35
|
-
});
|
|
5
|
+
describe("FogClaw config", () => {
|
|
6
|
+
it("loads defaults for new policy fields", () => {
|
|
7
|
+
const config = loadConfig({});
|
|
36
8
|
|
|
37
|
-
|
|
38
|
-
expect(
|
|
39
|
-
|
|
40
|
-
|
|
9
|
+
expect(config.entityConfidenceThresholds).toEqual({});
|
|
10
|
+
expect(config.allowlist).toMatchObject({
|
|
11
|
+
values: [],
|
|
12
|
+
patterns: [],
|
|
13
|
+
entities: {},
|
|
14
|
+
});
|
|
41
15
|
});
|
|
42
16
|
|
|
43
|
-
it("
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
);
|
|
49
|
-
});
|
|
17
|
+
it("canonicalizes per-entity confidence threshold keys", () => {
|
|
18
|
+
const config = loadConfig({
|
|
19
|
+
entityConfidenceThresholds: {
|
|
20
|
+
person: 0.7,
|
|
21
|
+
},
|
|
22
|
+
});
|
|
50
23
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
expect(() => loadConfig({ confidence_threshold: 0.5 })).not.toThrow();
|
|
24
|
+
expect(config.entityConfidenceThresholds).toEqual({
|
|
25
|
+
PERSON: 0.7,
|
|
26
|
+
});
|
|
55
27
|
});
|
|
56
28
|
|
|
57
|
-
it("rejects
|
|
29
|
+
it("rejects invalid per-entity confidence thresholds", () => {
|
|
58
30
|
expect(() =>
|
|
59
|
-
loadConfig({
|
|
60
|
-
|
|
31
|
+
loadConfig({
|
|
32
|
+
entityConfidenceThresholds: {
|
|
33
|
+
PERSON: 1.2,
|
|
34
|
+
},
|
|
35
|
+
}),
|
|
36
|
+
).toThrow('entityConfidenceThresholds["PERSON"] must be between 0 and 1, got 1.2');
|
|
61
37
|
});
|
|
62
38
|
|
|
63
|
-
it("
|
|
39
|
+
it("validates allowlist regex patterns", () => {
|
|
64
40
|
expect(() =>
|
|
65
|
-
loadConfig({
|
|
66
|
-
|
|
41
|
+
loadConfig({
|
|
42
|
+
allowlist: {
|
|
43
|
+
values: ["ok@example.com"],
|
|
44
|
+
patterns: ["["],
|
|
45
|
+
entities: {
|
|
46
|
+
PERSON: ["John"],
|
|
47
|
+
},
|
|
48
|
+
},
|
|
49
|
+
}),
|
|
50
|
+
).toThrow(/invalid regex pattern/);
|
|
67
51
|
});
|
|
68
52
|
|
|
69
|
-
it("
|
|
53
|
+
it("canonicalizes allowlist entity keys", () => {
|
|
70
54
|
const config = loadConfig({
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
SSN: "warn",
|
|
55
|
+
allowlist: {
|
|
56
|
+
entities: {
|
|
57
|
+
person: ["John"],
|
|
58
|
+
},
|
|
59
|
+
},
|
|
77
60
|
});
|
|
78
|
-
});
|
|
79
61
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
entityActions: { EMAIL: "delete" as never },
|
|
84
|
-
}),
|
|
85
|
-
).toThrowError(
|
|
86
|
-
'Invalid action "delete" for entity type "EMAIL". Must be one of: redact, block, warn',
|
|
87
|
-
);
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
it("preserves custom_entities from overrides", () => {
|
|
91
|
-
const config = loadConfig({ custom_entities: ["EMPLOYEE_ID", "PROJECT_CODE"] });
|
|
92
|
-
expect(config.custom_entities).toEqual(["EMPLOYEE_ID", "PROJECT_CODE"]);
|
|
62
|
+
expect(config.allowlist.entities).toEqual({
|
|
63
|
+
PERSON: ["John"],
|
|
64
|
+
});
|
|
93
65
|
});
|
|
94
66
|
|
|
95
|
-
it("
|
|
96
|
-
const config = loadConfig({
|
|
97
|
-
|
|
98
|
-
|
|
67
|
+
it("canonicalizes entity action labels", () => {
|
|
68
|
+
const config = loadConfig({
|
|
69
|
+
entityActions: {
|
|
70
|
+
person: "block",
|
|
71
|
+
},
|
|
72
|
+
});
|
|
99
73
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
74
|
+
expect(config.entityActions).toEqual({
|
|
75
|
+
PERSON: "block",
|
|
76
|
+
});
|
|
103
77
|
});
|
|
104
78
|
});
|