@datafog/fogclaw 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +31 -0
  2. package/README.md +83 -4
  3. package/dist/config.d.ts +1 -1
  4. package/dist/config.d.ts.map +1 -1
  5. package/dist/config.js +100 -1
  6. package/dist/config.js.map +1 -1
  7. package/dist/extract.d.ts +28 -0
  8. package/dist/extract.d.ts.map +1 -0
  9. package/dist/extract.js +91 -0
  10. package/dist/extract.js.map +1 -0
  11. package/dist/index.d.ts +1 -0
  12. package/dist/index.d.ts.map +1 -1
  13. package/dist/index.js +135 -30
  14. package/dist/index.js.map +1 -1
  15. package/dist/message-sending-handler.d.ts +40 -0
  16. package/dist/message-sending-handler.d.ts.map +1 -0
  17. package/dist/message-sending-handler.js +50 -0
  18. package/dist/message-sending-handler.js.map +1 -0
  19. package/dist/scanner.d.ts +13 -2
  20. package/dist/scanner.d.ts.map +1 -1
  21. package/dist/scanner.js +76 -2
  22. package/dist/scanner.js.map +1 -1
  23. package/dist/tool-result-handler.d.ts +36 -0
  24. package/dist/tool-result-handler.d.ts.map +1 -0
  25. package/dist/tool-result-handler.js +91 -0
  26. package/dist/tool-result-handler.js.map +1 -0
  27. package/dist/types.d.ts +17 -0
  28. package/dist/types.d.ts.map +1 -1
  29. package/dist/types.js +3 -0
  30. package/dist/types.js.map +1 -1
  31. package/docs/OBSERVABILITY.md +22 -15
  32. package/docs/SECURITY.md +6 -4
  33. package/docs/plans/active/2026-02-17-feat-tool-result-pii-scanning-plan.md +293 -0
  34. package/docs/specs/2026-02-17-feat-outbound-message-pii-scanning-spec.md +93 -0
  35. package/docs/specs/2026-02-17-feat-tool-result-pii-scanning-spec.md +122 -0
  36. package/fogclaw.config.example.json +19 -1
  37. package/openclaw.plugin.json +63 -2
  38. package/package.json +9 -9
  39. package/scripts/ci/he-docs-drift.sh +0 -0
  40. package/scripts/ci/he-docs-lint.sh +0 -0
  41. package/scripts/ci/he-plans-lint.sh +0 -0
  42. package/scripts/ci/he-runbooks-lint.sh +0 -0
  43. package/scripts/ci/he-specs-lint.sh +0 -0
  44. package/scripts/ci/he-spikes-lint.sh +0 -0
  45. package/scripts/runbooks/select-runbooks.sh +0 -0
  46. package/src/config.ts +139 -2
  47. package/src/extract.ts +98 -0
  48. package/src/index.ts +194 -36
  49. package/src/message-sending-handler.ts +87 -0
  50. package/src/scanner.ts +114 -8
  51. package/src/tool-result-handler.ts +133 -0
  52. package/src/types.ts +23 -0
  53. package/tests/config.test.ts +55 -81
  54. package/tests/extract.test.ts +185 -0
  55. package/tests/message-sending-handler.test.ts +244 -0
  56. package/tests/plugin-smoke.test.ts +139 -3
  57. package/tests/scanner.test.ts +61 -1
  58. package/tests/tool-result-handler.test.ts +329 -0
@@ -0,0 +1,122 @@
1
+ ---
2
+ slug: 2026-02-17-feat-tool-result-pii-scanning
3
+ status: intake-complete
4
+ date: 2026-02-17T00:00:00Z
5
+ owner: sidmohan
6
+ plan_mode: lightweight
7
+ spike_recommended: no
8
+ priority: high
9
+ ---
10
+
11
+ # feat: Add PII scanning to tool results via tool_result_persist hook
12
+
13
+ ## Purpose / Big Picture
14
+
15
+ FogClaw currently only scans the user prompt text (`before_agent_start`). The majority of PII entering an agent's context comes from **tool results** — file reads, web fetches, API calls, database queries. This content bypasses FogClaw entirely today.
16
+
17
+ By hooking into OpenClaw's `tool_result_persist` lifecycle, FogClaw can scan and redact PII in tool results **before they are persisted to the session transcript**, closing the largest gap in FogClaw's coverage.
18
+
19
+ ## Scope
20
+
21
+ ### In Scope
22
+
23
+ - Register a `tool_result_persist` hook handler in FogClaw's plugin registration
24
+ - Extract text content from `AgentMessage` tool result payloads
25
+ - Scan extracted text using the **regex engine only** (synchronous constraint)
26
+ - Apply the existing `guardrail_mode`, `entityActions`, `redactStrategy`, and `allowlist` config to detected entities
27
+ - Redact PII spans in tool result text content (all modes — redact, block, warn — produce span-level redaction in tool results)
28
+ - Emit audit log entries for tool result detections when `auditEnabled: true`
29
+ - Add unit tests for the new hook handler
30
+ - Add integration test confirming the hook registers and transforms tool results
31
+
32
+ ### Boundaries
33
+
34
+ - **No GLiNER on this path.** The `tool_result_persist` hook is synchronous-only; async handlers are rejected by OpenClaw. Regex covers structured PII (SSN, email, phone, credit card, IP, date, zip). Unstructured entity detection (person names, organizations) is out of scope for this hook.
35
+ - **No `before_tool_call` hook.** This hook exists in OpenClaw's type system but has zero active invocation sites upstream. Will be addressed in a future initiative once OpenClaw wires it in.
36
+ - **No `message_sending` hook.** Outbound message scanning is a separate priority.
37
+ - **No scanning of `event.messages` history.** Historical message scanning is a separate priority.
38
+ - **No new config surface.** Reuse existing FogClaw config — no `toolResultScanning` sub-object.
39
+ - **No changes to OpenClaw upstream.** This initiative is FogClaw-only.
40
+
41
+ ## Non-Goals
42
+
43
+ - Blocking tool execution (requires `before_tool_call`, which is not wired upstream)
44
+ - Modifying files on disk
45
+ - Scanning binary/image content in tool results
46
+ - Real-time GLiNER inference on tool results
47
+
48
+ ## Risks
49
+
50
+ - **Performance on hot path.** `tool_result_persist` runs synchronously on every tool result. Regex scanning is sub-millisecond for typical payloads, but very large tool results (e.g., reading a 10K-line file) could add measurable latency. Mitigation: benchmark and consider a size cap with configurable threshold.
51
+ - **AgentMessage structure varies.** Tool results are typed as `AgentMessage`, whose internal structure depends on the tool and provider. Text extraction must handle multiple content formats without crashing on unexpected shapes. Mitigation: defensive extraction with fallback to no-op.
52
+ - **Redaction alters tool output semantics.** Replacing `123-45-6789` with `[SSN_1]` in a tool result changes what the model sees. This is the intended behavior, but could cause unexpected downstream effects if the model tries to use the redacted value literally. Mitigation: this is inherent to the feature and matches existing `before_agent_start` behavior.
53
+
54
+ ## Rollout
55
+
56
+ - Ship as part of next FogClaw patch release (0.1.7 or 0.2.0)
57
+ - Enabled by default when FogClaw is enabled (no separate toggle)
58
+ - Audit logging captures tool result scans for observability
59
+
60
+ ## Validation and Acceptance Signals
61
+
62
+ - Unit tests pass for text extraction from various `AgentMessage` shapes
63
+ - Unit tests pass for regex scanning + redaction of tool result content
64
+ - Integration test confirms `tool_result_persist` hook registers via `api.on()`
65
+ - Integration test confirms a tool result containing PII is transformed before persistence
66
+ - Audit log entries are emitted for tool result detections
67
+ - Existing `before_agent_start` tests continue to pass (no regression)
68
+ - Manual verification: install FogClaw in OpenClaw, have agent read a file with PII, confirm session transcript shows redacted content
69
+
70
+ ## Requirements
71
+
72
+ | ID | Priority | Requirement |
73
+ |---|---|---|
74
+ | R1 | critical | Register a `tool_result_persist` hook handler that scans tool result text for PII using the regex engine |
75
+ | R2 | critical | Redact detected PII spans in tool result messages using the configured `redactStrategy` (token/mask/hash) |
76
+ | R3 | critical | Handler must be synchronous (no Promises returned) — OpenClaw rejects async `tool_result_persist` handlers |
77
+ | R4 | high | Apply existing `entityActions` and `guardrail_mode` config to determine per-entity action; all actions produce span-level redaction in tool results |
78
+ | R5 | high | Respect existing `allowlist` config (global values, patterns, per-entity lists) |
79
+ | R6 | high | Extract text content defensively from `AgentMessage` payloads — handle string content, array-of-content-blocks, and unexpected shapes without throwing |
80
+ | R7 | medium | Emit audit log entry per tool result scan when `auditEnabled: true`, including tool name, entity count, and labels (no raw PII values in logs) |
81
+ | R8 | medium | Skip scanning for tool results with no extractable text content (binary, empty, non-string) |
82
+ | R9 | low | Include `source: "tool_result"` in audit log entries to distinguish from prompt-level scans |
83
+
84
+ ## Key Decisions
85
+
86
+ - **Regex-only on hot path**: GLiNER is async and cannot run in a synchronous hook. Regex covers the 7 structured PII types (SSN, email, phone, credit card, IP, date, zip) at sub-millisecond latency. This is a deliberate tradeoff — unstructured entities (person names, orgs) are not scanned in tool results.
87
+ - **Reuse existing config**: No separate config section for tool result scanning. The same `guardrail_mode`, `entityActions`, `redactStrategy`, and `allowlist` apply everywhere. Simpler mental model for users.
88
+ - **Span-level redaction for all modes**: Even when `entityActions` says `block` for an entity type, the tool result is redacted at the span level (not replaced entirely). This preserves non-PII context for the agent while removing sensitive values.
89
+
90
+ ## Success Criteria
91
+
92
+ - PII in tool results (file reads, web fetches, etc.) is redacted before entering the session transcript
93
+ - Regex engine detects SSN, email, phone, credit card, IP, date, and zip in tool result content
94
+ - No measurable latency impact for typical tool results (<1KB text)
95
+ - Audit log captures tool result scan events with entity counts and labels
96
+ - All existing tests pass; new tests cover the hook handler, text extraction, and edge cases
97
+
98
+ ## Constraints
99
+
100
+ - `tool_result_persist` handler MUST be synchronous (OpenClaw constraint)
101
+ - Must not introduce new dependencies
102
+ - Must not change the existing `FogClawConfig` type (reuse existing fields)
103
+ - Regex engine only — no ONNX/GLiNER on this path
104
+
105
+ ## Priority
106
+
107
+ - priority: high
108
+ - rationale: This closes the single largest gap in FogClaw's PII coverage. Tool results are the primary vector for PII entering agent context, and this hook is the only active interception point OpenClaw provides for that data flow.
109
+
110
+ ## Initial Milestone Candidates
111
+
112
+ - M1: Text extraction utility — defensively extract text from `AgentMessage` tool result payloads, handling string content, content block arrays, and edge cases. Likely files: `src/extract.ts`, `tests/extract.test.ts`.
113
+ - M2: `tool_result_persist` hook handler — register the hook, wire in regex scanning + redaction + audit logging, return transformed message. Likely files: `src/index.ts`, `tests/tool-result-hook.test.ts`.
114
+ - M3: Integration smoke test — end-to-end test confirming a registered FogClaw plugin transforms a tool result containing PII. Likely files: `tests/plugin-smoke.test.ts` (extend existing).
115
+
116
+ ## Handoff
117
+
118
+ After spec approval, proceed to `he-plan` for implementation breakdown. No spike needed — the OpenClaw hook contract is well-documented and the regex engine + redactor already exist in FogClaw.
119
+
120
+ ## Revision Notes
121
+
122
+ - 2026-02-17T00:00:00Z: Initialized spec from template. Reason: establish intake baseline for tool result PII scanning via `tool_result_persist` hook.
@@ -4,6 +4,10 @@
4
4
  "redactStrategy": "token",
5
5
  "model": "onnx-community/gliner_large-v2.1",
6
6
  "confidence_threshold": 0.5,
7
+ "entityConfidenceThresholds": {
8
+ "PERSON": 0.6,
9
+ "ORGANIZATION": 0.7
10
+ },
7
11
  "custom_entities": ["project codename", "internal tool name"],
8
12
  "entityActions": {
9
13
  "SSN": "block",
@@ -11,5 +15,19 @@
11
15
  "EMAIL": "redact",
12
16
  "PHONE": "redact",
13
17
  "PERSON": "warn"
14
- }
18
+ },
19
+ "allowlist": {
20
+ "values": [
21
+ "noreply@example.com"
22
+ ],
23
+ "patterns": [
24
+ "^internal-"
25
+ ],
26
+ "entities": {
27
+ "PERSON": [
28
+ "john doe"
29
+ ]
30
+ }
31
+ },
32
+ "auditEnabled": true
15
33
  }
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "id": "fogclaw",
3
3
  "name": "FogClaw",
4
- "version": "0.1.4",
5
- "description": "PII detection & custom entity redaction powered by DataFog",
4
+ "version": "0.2.0",
5
+ "description": "PII detection & custom entity redaction plugin powered by DataFog",
6
6
  "configSchema": {
7
7
  "type": "object",
8
8
  "properties": {
@@ -30,6 +30,15 @@
30
30
  "minimum": 0,
31
31
  "maximum": 1
32
32
  },
33
+ "entityConfidenceThresholds": {
34
+ "type": "object",
35
+ "additionalProperties": {
36
+ "type": "number",
37
+ "minimum": 0,
38
+ "maximum": 1
39
+ },
40
+ "default": {}
41
+ },
33
42
  "custom_entities": {
34
43
  "type": "array",
35
44
  "items": {
@@ -44,6 +53,43 @@
44
53
  "enum": ["redact", "block", "warn"]
45
54
  },
46
55
  "default": {}
56
+ },
57
+ "allowlist": {
58
+ "type": "object",
59
+ "properties": {
60
+ "values": {
61
+ "type": "array",
62
+ "items": {
63
+ "type": "string"
64
+ },
65
+ "default": []
66
+ },
67
+ "patterns": {
68
+ "type": "array",
69
+ "items": {
70
+ "type": "string"
71
+ },
72
+ "default": []
73
+ },
74
+ "entities": {
75
+ "type": "object",
76
+ "additionalProperties": {
77
+ "type": "array",
78
+ "items": {
79
+ "type": "string"
80
+ }
81
+ }
82
+ }
83
+ },
84
+ "default": {
85
+ "values": [],
86
+ "patterns": [],
87
+ "entities": {}
88
+ }
89
+ },
90
+ "auditEnabled": {
91
+ "type": "boolean",
92
+ "default": true
47
93
  }
48
94
  }
49
95
  },
@@ -70,6 +116,11 @@
70
116
  "help": "Minimum GLiNER score (0-1) required before an entity is treated as a detection.",
71
117
  "advanced": true
72
118
  },
119
+ "entityConfidenceThresholds": {
120
+ "label": "Per-Entity Confidence Thresholds",
121
+ "help": "Override confidence thresholds by entity label (for example: {\"PERSON\": 0.95, \"ORGANIZATION\": 0.7}).",
122
+ "advanced": true
123
+ },
73
124
  "custom_entities": {
74
125
  "label": "Custom Entity Labels",
75
126
  "help": "Extra labels to detect as sensitive entities (for example: `project code`, `competitor name`)."
@@ -78,6 +129,16 @@
78
129
  "label": "Entity Actions",
79
130
  "help": "Map specific entity labels to per-entity behavior (for example: {\"EMAIL\": \"block\", \"PHONE\": \"redact\"}).",
80
131
  "advanced": true
132
+ },
133
+ "allowlist": {
134
+ "label": "Allowlist / Exemptions",
135
+ "help": "Global and per-entity allowlist entries to exclude from enforcement. Supports exact values and regex patterns.",
136
+ "advanced": true
137
+ },
138
+ "auditEnabled": {
139
+ "label": "Audit Logging",
140
+ "help": "Emit structured audit summaries for guardrail decisions.",
141
+ "advanced": true
81
142
  }
82
143
  }
83
144
  }
package/package.json CHANGED
@@ -1,10 +1,17 @@
1
1
  {
2
2
  "name": "@datafog/fogclaw",
3
- "version": "0.1.5",
3
+ "version": "0.2.0",
4
4
  "description": "OpenClaw plugin for PII detection & custom entity redaction powered by DataFog",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
8
+ "scripts": {
9
+ "build": "tsc",
10
+ "test": "vitest run",
11
+ "test:watch": "vitest",
12
+ "test:plugin-smoke": "vitest run tests/plugin-smoke.test.ts",
13
+ "lint": "tsc --noEmit"
14
+ },
8
15
  "dependencies": {
9
16
  "gliner": "^0.0.19",
10
17
  "onnxruntime-node": "1.19.2",
@@ -32,12 +39,5 @@
32
39
  "overrides": {
33
40
  "onnxruntime-web": "1.21.0",
34
41
  "sharp": "0.34.5"
35
- },
36
- "scripts": {
37
- "build": "tsc",
38
- "test": "vitest run",
39
- "test:watch": "vitest",
40
- "test:plugin-smoke": "vitest run tests/plugin-smoke.test.ts",
41
- "lint": "tsc --noEmit"
42
42
  }
43
- }
43
+ }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
package/src/config.ts CHANGED
@@ -1,8 +1,109 @@
1
- import type { FogClawConfig, GuardrailAction, RedactStrategy } from "./types.js";
1
+ import {
2
+ canonicalType,
3
+ type EntityAllowlist,
4
+ type FogClawConfig,
5
+ type GuardrailAction,
6
+ type RedactStrategy,
7
+ } from "./types.js";
2
8
 
3
9
  const VALID_GUARDRAIL_MODES: GuardrailAction[] = ["redact", "block", "warn"];
4
10
  const VALID_REDACT_STRATEGIES: RedactStrategy[] = ["token", "mask", "hash"];
5
11
 
12
+ function ensureStringList(value: unknown, path: string): string[] {
13
+ if (!Array.isArray(value)) {
14
+ throw new Error(`${path} must be an array of strings`);
15
+ }
16
+
17
+ const entries = value.filter((entry): entry is string => {
18
+ if (typeof entry !== "string") {
19
+ throw new Error(`${path} must contain only strings`);
20
+ }
21
+
22
+ return true;
23
+ });
24
+
25
+ return entries.map((entry) => entry.trim()).filter((entry) => entry.length > 0);
26
+ }
27
+
28
+ function ensureEntityAllowlist(value: unknown): EntityAllowlist {
29
+ if (value == null) {
30
+ return { values: [], patterns: [], entities: {} };
31
+ }
32
+
33
+ if (typeof value !== "object" || Array.isArray(value)) {
34
+ throw new Error("allowlist must be an object");
35
+ }
36
+
37
+ const raw = value as Record<string, unknown>;
38
+ const values = ensureStringList(raw.values ?? [], "allowlist.values");
39
+ const patterns = ensureStringList(raw.patterns ?? [], "allowlist.patterns");
40
+
41
+ for (const pattern of patterns) {
42
+ try {
43
+ new RegExp(pattern);
44
+ } catch {
45
+ throw new Error(`allowlist.patterns contains invalid regex pattern: "${pattern}"`);
46
+ }
47
+ }
48
+
49
+ const entitiesValue = raw.entities ?? {};
50
+ if (
51
+ typeof entitiesValue !== "object" ||
52
+ Array.isArray(entitiesValue) ||
53
+ entitiesValue === null
54
+ ) {
55
+ throw new Error("allowlist.entities must be an object mapping entity labels to string arrays");
56
+ }
57
+
58
+ const entities: Record<string, string[]> = {};
59
+ for (const [entityType, entryValue] of Object.entries(entitiesValue)) {
60
+ const normalizedType = canonicalType(entityType);
61
+ entities[normalizedType] = ensureStringList(entryValue, `allowlist.entities.${entityType}`);
62
+ }
63
+
64
+ return {
65
+ values: [...new Set(values)],
66
+ patterns: [...new Set(patterns)],
67
+ entities,
68
+ };
69
+ }
70
+
71
+ function ensureEntityConfidenceThresholds(
72
+ value: unknown,
73
+ ): Record<string, number> {
74
+ if (!value) {
75
+ return {};
76
+ }
77
+
78
+ if (typeof value !== "object" || Array.isArray(value) || value === null) {
79
+ throw new Error("entityConfidenceThresholds must be an object");
80
+ }
81
+
82
+ const raw = value as Record<string, unknown>;
83
+ const normalized: Record<string, number> = {};
84
+
85
+ for (const [entityType, rawThreshold] of Object.entries(raw)) {
86
+ if (typeof rawThreshold !== "number" || Number.isNaN(rawThreshold)) {
87
+ throw new Error(
88
+ `entityConfidenceThresholds["${entityType}"] must be a number between 0 and 1, got ${String(
89
+ rawThreshold,
90
+ )}`,
91
+ );
92
+ }
93
+
94
+ if (rawThreshold < 0 || rawThreshold > 1) {
95
+ throw new Error(
96
+ `entityConfidenceThresholds["${entityType}"] must be between 0 and 1, got ${rawThreshold}`,
97
+ );
98
+ }
99
+
100
+ const canonicalTypeKey = canonicalType(entityType);
101
+ normalized[canonicalTypeKey] = rawThreshold;
102
+ }
103
+
104
+ return normalized;
105
+ }
106
+
6
107
  export const DEFAULT_CONFIG: FogClawConfig = {
7
108
  enabled: true,
8
109
  guardrail_mode: "redact",
@@ -11,10 +112,37 @@ export const DEFAULT_CONFIG: FogClawConfig = {
11
112
  confidence_threshold: 0.5,
12
113
  custom_entities: [],
13
114
  entityActions: {},
115
+ entityConfidenceThresholds: {},
116
+ allowlist: {
117
+ values: [],
118
+ patterns: [],
119
+ entities: {},
120
+ },
121
+ auditEnabled: true,
14
122
  };
15
123
 
16
124
  export function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig {
17
- const config: FogClawConfig = { ...DEFAULT_CONFIG, ...overrides };
125
+ const config: FogClawConfig = {
126
+ ...DEFAULT_CONFIG,
127
+ ...overrides,
128
+ entityActions: {
129
+ ...DEFAULT_CONFIG.entityActions,
130
+ ...(overrides.entityActions ?? {}),
131
+ },
132
+ entityConfidenceThresholds: {
133
+ ...DEFAULT_CONFIG.entityConfidenceThresholds,
134
+ ...(overrides.entityConfidenceThresholds ?? {}),
135
+ },
136
+ };
137
+
138
+ config.allowlist = ensureEntityAllowlist(overrides.allowlist ?? DEFAULT_CONFIG.allowlist);
139
+ config.entityConfidenceThresholds = ensureEntityConfidenceThresholds(
140
+ config.entityConfidenceThresholds,
141
+ );
142
+
143
+ if (typeof config.enabled !== "boolean") {
144
+ throw new Error(`enabled must be true or false`);
145
+ }
18
146
 
19
147
  if (!VALID_GUARDRAIL_MODES.includes(config.guardrail_mode)) {
20
148
  throw new Error(
@@ -34,13 +162,22 @@ export function loadConfig(overrides: Partial<FogClawConfig>): FogClawConfig {
34
162
  );
35
163
  }
36
164
 
165
+ if (typeof config.auditEnabled !== "boolean") {
166
+ throw new Error(`auditEnabled must be true or false`);
167
+ }
168
+
169
+ const normalizedActions: Record<string, GuardrailAction> = {};
37
170
  for (const [entityType, action] of Object.entries(config.entityActions)) {
38
171
  if (!VALID_GUARDRAIL_MODES.includes(action)) {
39
172
  throw new Error(
40
173
  `Invalid action "${action}" for entity type "${entityType}". Must be one of: ${VALID_GUARDRAIL_MODES.join(", ")}`,
41
174
  );
42
175
  }
176
+
177
+ const normalizedType = canonicalType(entityType);
178
+ normalizedActions[normalizedType] = action;
43
179
  }
180
+ config.entityActions = normalizedActions;
44
181
 
45
182
  return config;
46
183
  }
package/src/extract.ts ADDED
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Utilities for extracting text from AgentMessage tool result payloads
3
+ * and replacing text content after redaction.
4
+ *
5
+ * AgentMessage shapes handled:
6
+ * - Plain string
7
+ * - Object with `content: string`
8
+ * - Object with `content: [{ type: "text", text: "..." }, ...]`
9
+ *
10
+ * When multiple text blocks exist in a content array, they are joined
11
+ * with a null byte separator (\0) so entity offsets stay valid across
12
+ * the concatenated string. replaceText splits on the same separator
13
+ * to map redacted text back to individual blocks.
14
+ */
15
+
16
+ // Separator between text segments from content block arrays.
17
+ // Null byte won't appear in regex PII patterns or normal text content.
18
+ const SEGMENT_SEP = "\0";
19
+
20
+ /**
21
+ * Extract all text content from an AgentMessage tool result payload.
22
+ * Returns an empty string if no text content is found.
23
+ */
24
+ export function extractText(message: unknown): string {
25
+ if (message == null) return "";
26
+ if (typeof message === "string") return message;
27
+ if (typeof message !== "object") return "";
28
+
29
+ const msg = message as Record<string, unknown>;
30
+ const content = msg.content;
31
+
32
+ if (content == null) return "";
33
+ if (typeof content === "string") return content;
34
+
35
+ if (Array.isArray(content)) {
36
+ const textParts: string[] = [];
37
+ for (const block of content) {
38
+ if (
39
+ block != null &&
40
+ typeof block === "object" &&
41
+ (block as Record<string, unknown>).type === "text" &&
42
+ typeof (block as Record<string, unknown>).text === "string"
43
+ ) {
44
+ textParts.push((block as Record<string, unknown>).text as string);
45
+ }
46
+ }
47
+ if (textParts.length === 0) return "";
48
+ return textParts.join(SEGMENT_SEP);
49
+ }
50
+
51
+ return "";
52
+ }
53
+
54
+ /**
55
+ * Replace text content in an AgentMessage tool result payload with
56
+ * the redacted version. Returns a shallow copy; does not mutate.
57
+ *
58
+ * If the message shape is not recognized or has no text, returns
59
+ * the original message unchanged.
60
+ */
61
+ export function replaceText(message: unknown, redactedText: string): unknown {
62
+ if (message == null) return message;
63
+ if (typeof message === "string") return redactedText;
64
+ if (typeof message !== "object") return message;
65
+
66
+ const msg = message as Record<string, unknown>;
67
+ const content = msg.content;
68
+
69
+ if (content == null) return message;
70
+
71
+ if (typeof content === "string") {
72
+ return { ...msg, content: redactedText };
73
+ }
74
+
75
+ if (Array.isArray(content)) {
76
+ const segments = redactedText.split(SEGMENT_SEP);
77
+ let segmentIndex = 0;
78
+
79
+ const newContent = content.map((block) => {
80
+ if (
81
+ block != null &&
82
+ typeof block === "object" &&
83
+ (block as Record<string, unknown>).type === "text" &&
84
+ typeof (block as Record<string, unknown>).text === "string" &&
85
+ segmentIndex < segments.length
86
+ ) {
87
+ const replaced = { ...(block as Record<string, unknown>), text: segments[segmentIndex] };
88
+ segmentIndex++;
89
+ return replaced;
90
+ }
91
+ return block;
92
+ });
93
+
94
+ return { ...msg, content: newContent };
95
+ }
96
+
97
+ return message;
98
+ }