@framers/agentos 0.1.54 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/extensions/ExtensionManager.d.ts +1 -0
- package/dist/extensions/ExtensionManager.d.ts.map +1 -1
- package/dist/extensions/ExtensionManager.js +8 -0
- package/dist/extensions/ExtensionManager.js.map +1 -1
- package/dist/extensions/ISharedServiceRegistry.d.ts +35 -0
- package/dist/extensions/ISharedServiceRegistry.d.ts.map +1 -0
- package/dist/extensions/ISharedServiceRegistry.js +2 -0
- package/dist/extensions/ISharedServiceRegistry.js.map +1 -0
- package/dist/extensions/SharedServiceRegistry.d.ts +15 -0
- package/dist/extensions/SharedServiceRegistry.d.ts.map +1 -0
- package/dist/extensions/SharedServiceRegistry.js +63 -0
- package/dist/extensions/SharedServiceRegistry.js.map +1 -0
- package/dist/extensions/index.d.ts +3 -0
- package/dist/extensions/index.d.ts.map +1 -1
- package/dist/extensions/index.js +4 -0
- package/dist/extensions/index.js.map +1 -1
- package/dist/extensions/manifest.d.ts +2 -0
- package/dist/extensions/manifest.d.ts.map +1 -1
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts +127 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.js +263 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts +199 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js +456 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts +121 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js +271 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts +61 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js +207 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/index.d.ts +90 -0
- package/dist/extensions/packs/pii-redaction/index.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/index.js +195 -0
- package/dist/extensions/packs/pii-redaction/index.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts +151 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js +14 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts +177 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js +420 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts +145 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js +299 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts +102 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js +228 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts +103 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js +275 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts +118 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js +152 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts +98 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js +153 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/types.d.ts +332 -0
- package/dist/extensions/packs/pii-redaction/types.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/types.js +83 -0
- package/dist/extensions/packs/pii-redaction/types.js.map +1 -0
- package/dist/extensions/types.d.ts +5 -0
- package/dist/extensions/types.d.ts.map +1 -1
- package/dist/extensions/types.js.map +1 -1
- package/package.json +11 -1
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PiiRedactionGuardrail.ts
|
|
3
|
+
* @description Guardrail service that intercepts agent input and/or output to
|
|
4
|
+
* detect and redact PII (Personally Identifiable Information) in real time.
|
|
5
|
+
*
|
|
6
|
+
* The guardrail integrates with the AgentOS guardrail hook pipeline via the
|
|
7
|
+
* {@link IGuardrailService} interface, providing two evaluation paths:
|
|
8
|
+
*
|
|
9
|
+
* - **Input evaluation** (`evaluateInput`): Scans the user's text input before
|
|
10
|
+
* it enters the orchestration pipeline and returns a SANITIZE action with
|
|
11
|
+
* redacted text when PII is found.
|
|
12
|
+
*
|
|
13
|
+
* - **Output evaluation** (`evaluateOutput`): Uses a sentence-boundary buffer
|
|
14
|
+
* keyed by `streamId` to accumulate streaming text deltas. When a sentence
|
|
15
|
+
* boundary is detected (`. `, `? `, `! `, or `\n`) the buffer is scanned
|
|
16
|
+
* for PII and redacted text is returned as a SANITIZE action. Entity
|
|
17
|
+
* offsets are always relative to the buffer, not individual chunks.
|
|
18
|
+
*
|
|
19
|
+
* Which evaluation path(s) are active is controlled by the
|
|
20
|
+
* {@link PiiRedactionPackOptions.guardrailScope} option:
|
|
21
|
+
* - `'input'` -- only `evaluateInput` is active
|
|
22
|
+
* - `'output'` -- only `evaluateOutput` is active
|
|
23
|
+
* - `'both'` -- both paths are active (default)
|
|
24
|
+
*
|
|
25
|
+
* @module pii-redaction/PiiRedactionGuardrail
|
|
26
|
+
*/
|
|
27
|
+
import { GuardrailAction } from '../../../core/guardrails/IGuardrailService.js';
|
|
28
|
+
import { AgentOSResponseChunkType } from '../../../api/types/AgentOSResponse.js';
|
|
29
|
+
import { PiiDetectionPipeline } from './PiiDetectionPipeline.js';
|
|
30
|
+
import { redactText } from './RedactionEngine.js';
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
// Constants
|
|
33
|
+
// ---------------------------------------------------------------------------
|
|
34
|
+
/**
|
|
35
|
+
* Default maximum number of streaming evaluations (sentence flushes) allowed
|
|
36
|
+
* per stream before the guardrail stops scanning. Prevents unbounded CPU
|
|
37
|
+
* usage on very long streams.
|
|
38
|
+
*/
|
|
39
|
+
const DEFAULT_MAX_STREAMING_EVALUATIONS = 50;
|
|
40
|
+
/**
|
|
41
|
+
* Regex matching sentence-boundary positions in accumulated buffer text.
|
|
42
|
+
*
|
|
43
|
+
* Matches:
|
|
44
|
+
* - `. ` (period + space)
|
|
45
|
+
* - `? ` (question mark + space)
|
|
46
|
+
* - `! ` (exclamation mark + space)
|
|
47
|
+
* - `\n` (newline)
|
|
48
|
+
*
|
|
49
|
+
* The `g` flag is intentional so `lastIndex` tracks all boundaries in a
|
|
50
|
+
* single pass via `matchAll`.
|
|
51
|
+
*/
|
|
52
|
+
const SENTENCE_BOUNDARY_RE = /[.?!]\s|\n/g;
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// PiiRedactionGuardrail
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
/**
|
|
57
|
+
* AgentOS guardrail service that detects and redacts PII from both inbound
|
|
58
|
+
* user messages and outbound agent responses.
|
|
59
|
+
*
|
|
60
|
+
* ### Construction
|
|
61
|
+
* ```ts
|
|
62
|
+
* const guardrail = new PiiRedactionGuardrail(registry, options, getSecret);
|
|
63
|
+
* ```
|
|
64
|
+
*
|
|
65
|
+
* ### Thread safety
|
|
66
|
+
* The guardrail maintains per-stream mutable state for output evaluation.
|
|
67
|
+
* Concurrent calls with **different** `streamId` values are safe. Concurrent
|
|
68
|
+
* calls with the **same** `streamId` are serialised by the AgentOS streaming
|
|
69
|
+
* pipeline so no additional locking is required.
|
|
70
|
+
*
|
|
71
|
+
* @implements {IGuardrailService}
|
|
72
|
+
*/
|
|
73
|
+
export class PiiRedactionGuardrail {
|
|
74
|
+
// -----------------------------------------------------------------------
|
|
75
|
+
// Constructor
|
|
76
|
+
// -----------------------------------------------------------------------
|
|
77
|
+
/**
|
|
78
|
+
* Construct a new PiiRedactionGuardrail.
|
|
79
|
+
*
|
|
80
|
+
* @param services - Shared service registry forwarded to the detection
|
|
81
|
+
* pipeline for lazy-loading NLP/NER models.
|
|
82
|
+
* @param options - Pack-level configuration controlling entity types,
|
|
83
|
+
* confidence threshold, redaction style, guardrail scope,
|
|
84
|
+
* and streaming behaviour.
|
|
85
|
+
* @param getSecret - Optional secret resolver for the LLM judge API key.
|
|
86
|
+
*/
|
|
87
|
+
constructor(services, options, getSecret) {
|
|
88
|
+
/**
|
|
89
|
+
* Per-stream sentence-boundary buffers for output evaluation.
|
|
90
|
+
* Keys are `AgentOSResponseChunk.streamId` strings.
|
|
91
|
+
*/
|
|
92
|
+
this.streamBuffers = new Map();
|
|
93
|
+
// Build the detection pipeline with the provided options.
|
|
94
|
+
this.pipeline = new PiiDetectionPipeline(services, options, getSecret);
|
|
95
|
+
// Resolve configuration with sensible defaults.
|
|
96
|
+
this.redactionStyle = options.redactionStyle ?? 'placeholder';
|
|
97
|
+
this.scope = options.guardrailScope ?? 'both';
|
|
98
|
+
this.maxStreamingEvaluations =
|
|
99
|
+
options.maxStreamingEvaluations ?? DEFAULT_MAX_STREAMING_EVALUATIONS;
|
|
100
|
+
// Build the GuardrailConfig from pack options.
|
|
101
|
+
this.config = {
|
|
102
|
+
evaluateStreamingChunks: options.evaluateStreamingChunks ?? false,
|
|
103
|
+
maxStreamingEvaluations: this.maxStreamingEvaluations,
|
|
104
|
+
};
|
|
105
|
+
}
|
|
106
|
+
// -----------------------------------------------------------------------
|
|
107
|
+
// IGuardrailService — evaluateInput
|
|
108
|
+
// -----------------------------------------------------------------------
|
|
109
|
+
/**
|
|
110
|
+
* Evaluate inbound user text for PII before the orchestration pipeline
|
|
111
|
+
* processes it.
|
|
112
|
+
*
|
|
113
|
+
* When PII is found the method returns a {@link GuardrailAction.SANITIZE}
|
|
114
|
+
* result containing the redacted text. When no PII is found (or the
|
|
115
|
+
* input has no text) it returns `null` to signal the content should pass
|
|
116
|
+
* through unchanged.
|
|
117
|
+
*
|
|
118
|
+
* This method is a no-op (returns `null`) when `guardrailScope` is set
|
|
119
|
+
* to `'output'`.
|
|
120
|
+
*
|
|
121
|
+
* @param payload - Input payload containing the user's text and context.
|
|
122
|
+
* @returns Evaluation result with redacted text, or `null` if clean.
|
|
123
|
+
*/
|
|
124
|
+
async evaluateInput(payload) {
|
|
125
|
+
// Skip input evaluation when scope is output-only.
|
|
126
|
+
if (this.scope === 'output') {
|
|
127
|
+
return null;
|
|
128
|
+
}
|
|
129
|
+
// Extract the text input from the payload.
|
|
130
|
+
const text = payload.input.textInput;
|
|
131
|
+
// Nothing to scan if the input has no text content.
|
|
132
|
+
if (!text) {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
// Run the full detection pipeline over the input text.
|
|
136
|
+
const detectionResult = await this.pipeline.detect(text);
|
|
137
|
+
// No PII found — allow the input through unchanged.
|
|
138
|
+
if (detectionResult.entities.length === 0) {
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
// PII was found — redact the input text and return a SANITIZE action.
|
|
142
|
+
const redacted = redactText(text, detectionResult.entities, this.redactionStyle);
|
|
143
|
+
return {
|
|
144
|
+
action: GuardrailAction.SANITIZE,
|
|
145
|
+
modifiedText: redacted,
|
|
146
|
+
reason: detectionResult.summary,
|
|
147
|
+
reasonCode: 'PII_REDACTED',
|
|
148
|
+
metadata: {
|
|
149
|
+
entityCount: detectionResult.entities.length,
|
|
150
|
+
tiersExecuted: detectionResult.tiersExecuted,
|
|
151
|
+
processingTimeMs: detectionResult.processingTimeMs,
|
|
152
|
+
},
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
// -----------------------------------------------------------------------
|
|
156
|
+
// IGuardrailService — evaluateOutput
|
|
157
|
+
// -----------------------------------------------------------------------
|
|
158
|
+
/**
|
|
159
|
+
* Evaluate outbound agent response chunks for PII using a sentence-boundary
|
|
160
|
+
* buffer.
|
|
161
|
+
*
|
|
162
|
+
* ### Buffering strategy
|
|
163
|
+
*
|
|
164
|
+
* Text deltas are accumulated per-stream in an internal buffer. The buffer
|
|
165
|
+
* is scanned for PII only when a sentence boundary is detected (`. `, `? `,
|
|
166
|
+
* `! `, or `\n`) or when the stream ends (`isFinal === true` or chunk type
|
|
167
|
+
* is `FINAL_RESPONSE`).
|
|
168
|
+
*
|
|
169
|
+
* Entity offsets from the detection pipeline are relative to the **buffer**
|
|
170
|
+
* text, not individual chunk deltas, so redaction replacement is always
|
|
171
|
+
* positionally correct.
|
|
172
|
+
*
|
|
173
|
+
* An internal evaluation counter enforces {@link maxStreamingEvaluations}
|
|
174
|
+
* per stream. Once the limit is reached subsequent chunks pass through
|
|
175
|
+
* unevaluated.
|
|
176
|
+
*
|
|
177
|
+
* This method is a no-op (returns `null`) when `guardrailScope` is set
|
|
178
|
+
* to `'input'`.
|
|
179
|
+
*
|
|
180
|
+
* @param payload - Output payload containing the response chunk and context.
|
|
181
|
+
* @returns Evaluation result with redacted buffer text, or `null` if clean.
|
|
182
|
+
*/
|
|
183
|
+
async evaluateOutput(payload) {
|
|
184
|
+
// Skip output evaluation when scope is input-only.
|
|
185
|
+
if (this.scope === 'input') {
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
const { chunk } = payload;
|
|
189
|
+
// Determine the text content to buffer based on chunk type.
|
|
190
|
+
let textToBuffer = null;
|
|
191
|
+
let isStreamEnd = false;
|
|
192
|
+
if (chunk.type === AgentOSResponseChunkType.TEXT_DELTA) {
|
|
193
|
+
// TEXT_DELTA chunks carry incremental text in `textDelta`.
|
|
194
|
+
textToBuffer = chunk.textDelta ?? null;
|
|
195
|
+
}
|
|
196
|
+
else if (chunk.type === AgentOSResponseChunkType.FINAL_RESPONSE) {
|
|
197
|
+
// FINAL_RESPONSE may carry the complete response text.
|
|
198
|
+
textToBuffer =
|
|
199
|
+
chunk.finalResponseText ?? null;
|
|
200
|
+
isStreamEnd = true;
|
|
201
|
+
}
|
|
202
|
+
// Mark stream end when isFinal is set regardless of chunk type.
|
|
203
|
+
if (chunk.isFinal) {
|
|
204
|
+
isStreamEnd = true;
|
|
205
|
+
}
|
|
206
|
+
// Nothing to evaluate if there is no text content and stream is not ending.
|
|
207
|
+
if (!textToBuffer && !isStreamEnd) {
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
// Retrieve or create the per-stream buffer state.
|
|
211
|
+
const streamId = chunk.streamId;
|
|
212
|
+
let state = this.streamBuffers.get(streamId);
|
|
213
|
+
if (!state) {
|
|
214
|
+
state = { buffer: '', evaluations: 0, lastSeenAt: Date.now() };
|
|
215
|
+
this.streamBuffers.set(streamId, state);
|
|
216
|
+
}
|
|
217
|
+
// Update the last-seen timestamp.
|
|
218
|
+
state.lastSeenAt = Date.now();
|
|
219
|
+
// Append new text to the buffer.
|
|
220
|
+
if (textToBuffer) {
|
|
221
|
+
state.buffer += textToBuffer;
|
|
222
|
+
}
|
|
223
|
+
// Check whether the evaluation limit has been reached.
|
|
224
|
+
if (state.evaluations >= this.maxStreamingEvaluations && !isStreamEnd) {
|
|
225
|
+
// Limit reached — pass through without evaluation.
|
|
226
|
+
return null;
|
|
227
|
+
}
|
|
228
|
+
// Determine whether a sentence boundary exists in the buffer.
|
|
229
|
+
const hasSentenceBoundary = SENTENCE_BOUNDARY_RE.test(state.buffer);
|
|
230
|
+
// Reset the regex lastIndex since we used `test()` which advances it.
|
|
231
|
+
SENTENCE_BOUNDARY_RE.lastIndex = 0;
|
|
232
|
+
// Only evaluate when we have a sentence boundary or the stream is ending.
|
|
233
|
+
if (!hasSentenceBoundary && !isStreamEnd) {
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
// Increment the evaluation counter.
|
|
237
|
+
state.evaluations++;
|
|
238
|
+
// Run detection against the full buffer text.
|
|
239
|
+
const detectionResult = await this.pipeline.detect(state.buffer);
|
|
240
|
+
// Clean up buffer on stream end.
|
|
241
|
+
if (isStreamEnd) {
|
|
242
|
+
this.streamBuffers.delete(streamId);
|
|
243
|
+
}
|
|
244
|
+
// No PII found — allow the chunk through unchanged.
|
|
245
|
+
if (detectionResult.entities.length === 0) {
|
|
246
|
+
return null;
|
|
247
|
+
}
|
|
248
|
+
// PII was found — redact against the BUFFER text (not the individual
|
|
249
|
+
// chunk delta) since entity offsets are buffer-relative.
|
|
250
|
+
const redacted = redactText(state.buffer, detectionResult.entities, this.redactionStyle);
|
|
251
|
+
// Reset the buffer to empty after redaction since the sanitised text
|
|
252
|
+
// replaces the entire accumulated buffer.
|
|
253
|
+
if (!isStreamEnd) {
|
|
254
|
+
state.buffer = '';
|
|
255
|
+
}
|
|
256
|
+
return {
|
|
257
|
+
action: GuardrailAction.SANITIZE,
|
|
258
|
+
modifiedText: redacted,
|
|
259
|
+
reason: detectionResult.summary,
|
|
260
|
+
reasonCode: 'PII_REDACTED',
|
|
261
|
+
metadata: {
|
|
262
|
+
entityCount: detectionResult.entities.length,
|
|
263
|
+
tiersExecuted: detectionResult.tiersExecuted,
|
|
264
|
+
processingTimeMs: detectionResult.processingTimeMs,
|
|
265
|
+
streamId,
|
|
266
|
+
evaluationNumber: state?.evaluations,
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
//# sourceMappingURL=PiiRedactionGuardrail.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PiiRedactionGuardrail.js","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/PiiRedactionGuardrail.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAUH,OAAO,EAAE,eAAe,EAAE,MAAM,4CAA4C,CAAC;AAC7E,OAAO,EAAE,wBAAwB,EAAE,MAAM,oCAAoC,CAAC;AAE9E,OAAO,EAAE,oBAAoB,EAAE,MAAM,wBAAwB,CAAC;AAC9D,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAqB/C,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,iCAAiC,GAAG,EAAE,CAAC;AAE7C;;;;;;;;;;;GAWG;AACH,MAAM,oBAAoB,GAAG,aAAa,CAAC;AAE3C,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,OAAO,qBAAqB;IAkChC,0EAA0E;IAC1E,cAAc;IACd,0EAA0E;IAE1E;;;;;;;;;OASG;IACH,YACE,QAAgC,EAChC,OAAgC,EAChC,SAA8C;QAvBhD;;;WAGG;QACc,kBAAa,GAAG,IAAI,GAAG,EAAuB,CAAC;QAqB9D,0DAA0D;QAC1D,IAAI,CAAC,QAAQ,GAAG,IAAI,oBAAoB,CAAC,QAAQ,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;QAEvE,gDAAgD;QAChD,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,IAAI,aAAa,CAAC;QAC9D,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,cAAc,IAAI,MAAM,CAAC;QAC9C,IAAI,CAAC,uBAAuB;YAC1B,OAAO,CAAC,uBAAuB,IAAI,iCAAiC,CAAC;QAEvE,+CAA+C;QAC/C,IAAI,CAAC,MAAM,GAAG;YACZ,uBAAuB,EAAE,OAAO,CAAC,uBAAuB,IAAI,KAAK;YACjE,uBAAuB,EAAE,IAAI,CAAC,uBAAuB;SACtD,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,oCAAoC;IACpC,0EAA0E;IAE1E;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,aAAa,CACjB,OAA8B;QAE9B,mDAAmD;QACnD,IAAI,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,2CAA2C;QAC3C,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC;QAErC,oDAAoD;QACpD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,IAAI,CAAC;QACd,CAAC;QAED,uDAAuD;QACvD,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAEzD,oDAAoD;QACpD,IAAI,eAAe,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,sEAAsE;QACtE,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,EAAE,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;QAEjF,OAAO;YACL,MAAM,EAAE,eAAe,CAAC,QAAQ;YAChC,YAAY,EAAE,QAAQ;YACtB,MAAM,EAAE,eAAe,CAAC,OAAO;YAC/B,UAAU,EAAE,cAAc;YAC1B,QAAQ,EAAE;gBACR,WAAW,EAAE,eAAe,CAAC,QAAQ,CAAC,MAAM;gBAC5C,aAAa,EAAE,eAAe,CAAC,aAAa;gBAC5C,gBAAgB,EAAE,eAAe,CAAC,gBAAgB;aACnD;SACF,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,qCAAqC;IACrC,0EAA0E;IAE1E;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACH,KAAK,CAAC,cAAc,CAClB,OAA+B;QAE/B,mDAAmD;QACnD,IAAI,IAAI,CAAC,KAAK,KAAK,OAAO,EAAE,CAAC;YAC3B,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;QAE1B,4DAA4D;QAC5D,IAAI,YAAY,GAAkB,IAAI,CAAC;QACvC,IAAI,WAAW,GAAG,KAAK,CAAC;QAExB,IAAI,KAAK,CAAC,IAAI,KAAK,wBAAwB,CAAC,UAAU,EAAE,CAAC;YACvD,2DAA2D;YAC3D,YAAY,GAAI,KAAgC,CAAC,SAAS,IAAI,IAAI,CAAC;QACrE,CAAC;aAAM,IAAI,KAAK,CAAC,IAAI,KAAK,wBAAwB,CAAC,cAAc,EAAE,CAAC;YAClE,uDAAuD;YACvD,YAAY;gBACT,KAA+C,CAAC,iBAAiB,IAAI,IAAI,CAAC;YAC7E,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;QAED,gEAAgE;QAChE,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAClB,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;QAED,4EAA4E;QAC5E,IAAI,CAAC,YAAY,IAAI,CAAC,WAAW,EAAE,CAAC;YAClC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,kDAAkD;QAClD,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;QAChC,IAAI,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE7C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,KAAK,GAAG,EAAE,MAAM,EAAE,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC;YAC/D,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC1C,CAAC;QAED,kCAAkC;QAClC,KAAK,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE9B,iCAAiC;QACjC,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,MAAM,IAAI,YAAY,CAAC;QAC/B,CAAC;QAED,uDAAuD;QACvD,IAAI,KAAK,CAAC,WAAW,IAAI,IAAI,CAAC,uBAAuB,IAAI,CAAC,WAAW,EAAE,CAAC;YACtE,mDAAmD;YACnD,OAAO,IAAI,CAAC;QACd,CAAC;QAED,8DAA8D;QAC9D,MAAM,mBAAmB,GAAG,oBAAoB,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAEpE,sEAAsE;QACtE,oBAAoB,CAAC,SAAS,GAAG,CAAC,CAAC;QAEnC,0EAA0E;QAC1E,IAAI,CAAC,mBAAmB,IAAI,CAAC,WAAW,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,oCAAoC;QACpC,KAAK,CAAC,WAAW,EAAE,CAAC;QAEpB,8CAA8C;QAC9C,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QAEjE,iCAAiC;QACjC,IAAI,WAAW,EAAE,CAAC;YAChB,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QACtC,CAAC;QAED,oDAAoD;QACpD,IAAI,eAAe,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1C,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qEAAqE;QACrE,yDAAyD;QACzD,MAAM,QAAQ,GAAG,UAAU,CACzB,KAAK,CAAC,MAAM,EACZ,eAAe,CAAC,QAAQ,EACxB,IAAI,CAAC,cAAc,CACpB,CAAC;QAEF,qEAAqE;QACrE,0CAA0C;QAC1C,IAAI,CAAC,WAAW,EAAE,CAAC;YACjB,KAAK,CAAC,MAAM,GAAG,EAAE,CAAC;QACpB,CAAC;QAED,OAAO;YACL,MAAM,EAAE,eAAe,CAAC,QAAQ;YAChC,YAAY,EAAE,QAAQ;YACtB,MAAM,EAAE,eAAe,CAAC,OAAO;YAC/B,UAAU,EAAE,cAAc;YAC1B,QAAQ,EAAE;gBACR,WAAW,EAAE,eAAe,CAAC,QAAQ,CAAC,MAAM;gBAC5C,aAAa,EAAE,eAAe,CAAC,aAAa;gBAC5C,gBAAgB,EAAE,eAAe,CAAC,gBAAgB;gBAClD,QAAQ;gBACR,gBAAgB,EAAE,KAAK,EAAE,WAAW;aACrC;SACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file RedactionEngine.ts
|
|
3
|
+
* @description Applies configurable redaction transformations to PII entity
|
|
4
|
+
* spans detected in a text string.
|
|
5
|
+
*
|
|
6
|
+
* The engine processes entity spans in **reverse order** (highest start offset
|
|
7
|
+
* first) so that replacing a span never invalidates the offsets of earlier
|
|
8
|
+
* spans that have not yet been processed. This is the standard slice-and-join
|
|
9
|
+
* pattern for in-place text replacement with absolute positions.
|
|
10
|
+
*
|
|
11
|
+
* ## Supported redaction styles
|
|
12
|
+
*
|
|
13
|
+
* | Style | Example input | Example output |
|
|
14
|
+
* |---|---|---|
|
|
15
|
+
* | `placeholder` | `John Smith` (PERSON) | `[PERSON]` |
|
|
16
|
+
* | `mask` | `John Smith` | `J*** S****` |
|
|
17
|
+
* | `hash` | `John Smith` (PERSON) | `[PERSON:a1b2c3d4e5]` |
|
|
18
|
+
* | `category-tag` | `John Smith` (PERSON) | `<PII type="PERSON">REDACTED</PII>` |
|
|
19
|
+
*
|
|
20
|
+
* @module pii-redaction/RedactionEngine
|
|
21
|
+
*/
|
|
22
|
+
import type { PiiEntity, RedactionStyle } from './types';
|
|
23
|
+
/**
|
|
24
|
+
* Applies PII redaction to `text` by replacing each entity span with a token
|
|
25
|
+
* generated according to the specified {@link RedactionStyle}.
|
|
26
|
+
*
|
|
27
|
+
* ### Algorithm
|
|
28
|
+
*
|
|
29
|
+
* 1. If `entities` is empty, return `text` unchanged.
|
|
30
|
+
* 2. Sort entities by `start` offset in **descending** order (right to left).
|
|
31
|
+
* 3. Iteratively replace each span via `String.prototype.slice` — because
|
|
32
|
+
* processing proceeds right-to-left, earlier spans retain their original
|
|
33
|
+
* offsets and can be replaced without adjustment.
|
|
34
|
+
* 4. Return the final redacted string.
|
|
35
|
+
*
|
|
36
|
+
* The function assumes that the provided `entities` are **non-overlapping**.
|
|
37
|
+
* Overlapping spans must be resolved by {@link mergeEntities} before calling
|
|
38
|
+
* this function. Overlapping spans produce undefined output.
|
|
39
|
+
*
|
|
40
|
+
* @param text - The original input string to redact.
|
|
41
|
+
* @param entities - Non-overlapping PII entities to redact. May be unsorted;
|
|
42
|
+
* the function sorts internally.
|
|
43
|
+
* @param style - How each PII span should be replaced.
|
|
44
|
+
* @returns The redacted string with all PII spans replaced according to
|
|
45
|
+
* `style`.
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```ts
|
|
49
|
+
* const redacted = redactText(
|
|
50
|
+
* 'Contact John Smith at john@example.com please',
|
|
51
|
+
* [
|
|
52
|
+
* { entityType: 'PERSON', text: 'John Smith', start: 8, end: 18, score: 0.95, source: 'ner-model' },
|
|
53
|
+
* { entityType: 'EMAIL', text: 'john@example.com', start: 22, end: 38, score: 1.0, source: 'regex' },
|
|
54
|
+
* ],
|
|
55
|
+
* 'placeholder',
|
|
56
|
+
* );
|
|
57
|
+
* // → 'Contact [PERSON] at [EMAIL] please'
|
|
58
|
+
* ```
|
|
59
|
+
*/
|
|
60
|
+
export declare function redactText(text: string, entities: PiiEntity[], style: RedactionStyle): string;
|
|
61
|
+
//# sourceMappingURL=RedactionEngine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RedactionEngine.d.ts","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/RedactionEngine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,KAAK,EAAE,SAAS,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AAgJzD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,wBAAgB,UAAU,CACxB,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,SAAS,EAAE,EACrB,KAAK,EAAE,cAAc,GACpB,MAAM,CAwBR"}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file RedactionEngine.ts
|
|
3
|
+
* @description Applies configurable redaction transformations to PII entity
|
|
4
|
+
* spans detected in a text string.
|
|
5
|
+
*
|
|
6
|
+
* The engine processes entity spans in **reverse order** (highest start offset
|
|
7
|
+
* first) so that replacing a span never invalidates the offsets of earlier
|
|
8
|
+
* spans that have not yet been processed. This is the standard slice-and-join
|
|
9
|
+
* pattern for in-place text replacement with absolute positions.
|
|
10
|
+
*
|
|
11
|
+
* ## Supported redaction styles
|
|
12
|
+
*
|
|
13
|
+
* | Style | Example input | Example output |
|
|
14
|
+
* |---|---|---|
|
|
15
|
+
* | `placeholder` | `John Smith` (PERSON) | `[PERSON]` |
|
|
16
|
+
* | `mask` | `John Smith` | `J*** S****` |
|
|
17
|
+
* | `hash` | `John Smith` (PERSON) | `[PERSON:a1b2c3d4e5]` |
|
|
18
|
+
* | `category-tag` | `John Smith` (PERSON) | `<PII type="PERSON">REDACTED</PII>` |
|
|
19
|
+
*
|
|
20
|
+
* @module pii-redaction/RedactionEngine
|
|
21
|
+
*/
|
|
22
|
+
import { createHash } from 'crypto';
|
|
23
|
+
// ---------------------------------------------------------------------------
|
|
24
|
+
// Style implementation helpers
|
|
25
|
+
// ---------------------------------------------------------------------------
|
|
26
|
+
/**
|
|
27
|
+
* Produces a `[TYPE]` placeholder token for the given entity.
|
|
28
|
+
*
|
|
29
|
+
* This is the most compact style — the original value is fully discarded and
|
|
30
|
+
* only the entity category is preserved. Useful when downstream consumers
|
|
31
|
+
* only need to know that a PII span existed, not even its rough shape.
|
|
32
|
+
*
|
|
33
|
+
* @example `[PERSON]`, `[EMAIL]`, `[SSN]`
|
|
34
|
+
*
|
|
35
|
+
* @param entity - The detected PII entity to redact.
|
|
36
|
+
* @returns Replacement string.
|
|
37
|
+
*/
|
|
38
|
+
function applyPlaceholder(entity) {
|
|
39
|
+
return `[${entity.entityType}]`;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Produces a word-masked version of the original text.
|
|
43
|
+
*
|
|
44
|
+
* Each word in the matched text is masked by keeping its first character and
|
|
45
|
+
* replacing every subsequent character with `*`. Word boundaries are defined
|
|
46
|
+
* by one or more whitespace characters (`\s+`).
|
|
47
|
+
*
|
|
48
|
+
* Non-alphabetic first characters (digits, punctuation) are preserved as-is;
|
|
49
|
+
* only the trailing characters of each word are replaced.
|
|
50
|
+
*
|
|
51
|
+
* @example
|
|
52
|
+
* - `'John Smith'` → `'J*** S****'`
|
|
53
|
+
* - `'john@example.com'` → `'j***@*****.***'` (no spaces → single word)
|
|
54
|
+
*
|
|
55
|
+
* @param entity - The detected PII entity to redact.
|
|
56
|
+
* @returns Replacement string with per-word masking applied.
|
|
57
|
+
*/
|
|
58
|
+
function applyMask(entity) {
|
|
59
|
+
// Split on whitespace boundaries. Each segment is treated as one "word".
|
|
60
|
+
return entity.text
|
|
61
|
+
.split(/(\s+)/)
|
|
62
|
+
.map((segment) => {
|
|
63
|
+
// Preserve whitespace segments as-is so spacing is maintained.
|
|
64
|
+
if (/^\s+$/.test(segment)) {
|
|
65
|
+
return segment;
|
|
66
|
+
}
|
|
67
|
+
if (segment.length <= 1) {
|
|
68
|
+
// Single-character word — nothing to mask.
|
|
69
|
+
return segment;
|
|
70
|
+
}
|
|
71
|
+
// Keep the first character; replace the rest with '*'.
|
|
72
|
+
return segment[0] + '*'.repeat(segment.length - 1);
|
|
73
|
+
})
|
|
74
|
+
.join('');
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Produces a deterministic content-based hash token for the given entity.
|
|
78
|
+
*
|
|
79
|
+
* The replacement token embeds the entity type and a truncated SHA-256 digest
|
|
80
|
+
* (10 lowercase hex characters) of the **original text**. This preserves
|
|
81
|
+
* de-duplication semantics: the same input text always produces the same token,
|
|
82
|
+
* so consumers can detect when the same PII value appeared multiple times
|
|
83
|
+
* without being able to recover the original value.
|
|
84
|
+
*
|
|
85
|
+
* @example `[PERSON:a1b2c3d4e5]` (exact hash depends on input text)
|
|
86
|
+
*
|
|
87
|
+
* @param entity - The detected PII entity to redact.
|
|
88
|
+
* @returns Replacement string of the form `[TYPE:xxxxxxxxxx]`.
|
|
89
|
+
*/
|
|
90
|
+
function applyHash(entity) {
|
|
91
|
+
// Compute a SHA-256 digest of the original matched text.
|
|
92
|
+
const digest = createHash('sha256').update(entity.text, 'utf8').digest('hex');
|
|
93
|
+
// Truncate to 10 hex characters — enough entropy to identify duplicates
|
|
94
|
+
// while keeping the token compact.
|
|
95
|
+
const shortHash = digest.slice(0, 10);
|
|
96
|
+
return `[${entity.entityType}:${shortHash}]`;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Produces an XML-style PII category tag wrapping a generic `REDACTED`
|
|
100
|
+
* placeholder.
|
|
101
|
+
*
|
|
102
|
+
* The tag makes the redaction visible to any downstream parser that understands
|
|
103
|
+
* the `<PII>` schema, enabling programmatic unredaction if the original values
|
|
104
|
+
* are stored separately with an audit trail.
|
|
105
|
+
*
|
|
106
|
+
* @example `<PII type="PERSON">REDACTED</PII>`
|
|
107
|
+
*
|
|
108
|
+
* @param entity - The detected PII entity to redact.
|
|
109
|
+
* @returns Replacement string.
|
|
110
|
+
*/
|
|
111
|
+
function applyCategoryTag(entity) {
|
|
112
|
+
return `<PII type="${entity.entityType}">REDACTED</PII>`;
|
|
113
|
+
}
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Router
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
/**
|
|
118
|
+
* Returns the replacement string for a single entity according to the chosen
|
|
119
|
+
* {@link RedactionStyle}.
|
|
120
|
+
*
|
|
121
|
+
* @param entity - The entity span to redact.
|
|
122
|
+
* @param style - How the PII value should be replaced.
|
|
123
|
+
* @returns The replacement text to splice into the output string.
|
|
124
|
+
*
|
|
125
|
+
* @throws {Error} When an unrecognised style value is passed (compile-time
|
|
126
|
+
* exhaustiveness check via the `never` branch).
|
|
127
|
+
*/
|
|
128
|
+
function getReplacementText(entity, style) {
|
|
129
|
+
switch (style) {
|
|
130
|
+
case 'placeholder':
|
|
131
|
+
return applyPlaceholder(entity);
|
|
132
|
+
case 'mask':
|
|
133
|
+
return applyMask(entity);
|
|
134
|
+
case 'hash':
|
|
135
|
+
return applyHash(entity);
|
|
136
|
+
case 'category-tag':
|
|
137
|
+
return applyCategoryTag(entity);
|
|
138
|
+
default: {
|
|
139
|
+
// TypeScript exhaustiveness guard — should never be reached at runtime
|
|
140
|
+
// if the type system is intact.
|
|
141
|
+
const _exhaustive = style;
|
|
142
|
+
throw new Error(`Unknown redaction style: ${String(_exhaustive)}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
// Public API
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
/**
|
|
150
|
+
* Applies PII redaction to `text` by replacing each entity span with a token
|
|
151
|
+
* generated according to the specified {@link RedactionStyle}.
|
|
152
|
+
*
|
|
153
|
+
* ### Algorithm
|
|
154
|
+
*
|
|
155
|
+
* 1. If `entities` is empty, return `text` unchanged.
|
|
156
|
+
* 2. Sort entities by `start` offset in **descending** order (right to left).
|
|
157
|
+
* 3. Iteratively replace each span via `String.prototype.slice` — because
|
|
158
|
+
* processing proceeds right-to-left, earlier spans retain their original
|
|
159
|
+
* offsets and can be replaced without adjustment.
|
|
160
|
+
* 4. Return the final redacted string.
|
|
161
|
+
*
|
|
162
|
+
* The function assumes that the provided `entities` are **non-overlapping**.
|
|
163
|
+
* Overlapping spans must be resolved by {@link mergeEntities} before calling
|
|
164
|
+
* this function. Overlapping spans produce undefined output.
|
|
165
|
+
*
|
|
166
|
+
* @param text - The original input string to redact.
|
|
167
|
+
* @param entities - Non-overlapping PII entities to redact. May be unsorted;
|
|
168
|
+
* the function sorts internally.
|
|
169
|
+
* @param style - How each PII span should be replaced.
|
|
170
|
+
* @returns The redacted string with all PII spans replaced according to
|
|
171
|
+
* `style`.
|
|
172
|
+
*
|
|
173
|
+
* @example
|
|
174
|
+
* ```ts
|
|
175
|
+
* const redacted = redactText(
|
|
176
|
+
* 'Contact John Smith at john@example.com please',
|
|
177
|
+
* [
|
|
178
|
+
* { entityType: 'PERSON', text: 'John Smith', start: 8, end: 18, score: 0.95, source: 'ner-model' },
|
|
179
|
+
* { entityType: 'EMAIL', text: 'john@example.com', start: 22, end: 38, score: 1.0, source: 'regex' },
|
|
180
|
+
* ],
|
|
181
|
+
* 'placeholder',
|
|
182
|
+
* );
|
|
183
|
+
* // → 'Contact [PERSON] at [EMAIL] please'
|
|
184
|
+
* ```
|
|
185
|
+
*/
|
|
186
|
+
export function redactText(text, entities, style) {
|
|
187
|
+
// Fast path: nothing to redact.
|
|
188
|
+
if (entities.length === 0) {
|
|
189
|
+
return text;
|
|
190
|
+
}
|
|
191
|
+
// Sort by start offset descending so that right-most spans are replaced
|
|
192
|
+
// first, preserving the validity of earlier offsets.
|
|
193
|
+
const sorted = entities.slice().sort((a, b) => b.start - a.start);
|
|
194
|
+
// Apply replacements left-fold over the string, processing right-to-left.
|
|
195
|
+
let result = text;
|
|
196
|
+
for (const entity of sorted) {
|
|
197
|
+
const replacement = getReplacementText(entity, style);
|
|
198
|
+
// Splice: keep everything before the span, insert replacement, keep
|
|
199
|
+
// everything after the span.
|
|
200
|
+
result =
|
|
201
|
+
result.slice(0, entity.start) +
|
|
202
|
+
replacement +
|
|
203
|
+
result.slice(entity.end);
|
|
204
|
+
}
|
|
205
|
+
return result;
|
|
206
|
+
}
|
|
207
|
+
//# sourceMappingURL=RedactionEngine.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"RedactionEngine.js","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/RedactionEngine.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAGpC,8EAA8E;AAC9E,+BAA+B;AAC/B,8EAA8E;AAE9E;;;;;;;;;;;GAWG;AACH,SAAS,gBAAgB,CAAC,MAAiB;IACzC,OAAO,IAAI,MAAM,CAAC,UAAU,GAAG,CAAC;AAClC,CAAC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,SAAS,SAAS,CAAC,MAAiB;IAClC,0EAA0E;IAC1E,OAAO,MAAM,CAAC,IAAI;SACf,KAAK,CAAC,OAAO,CAAC;SACd,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE;QACf,+DAA+D;QAC/D,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACxB,2CAA2C;YAC3C,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,uDAAuD;QACvD,OAAO,OAAO,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACrD,CAAC,CAAC;SACD,IAAI,CAAC,EAAE,CAAC,CAAC;AACd,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,SAAS,SAAS,CAAC,MAAiB;IAClC,yDAAyD;IACzD,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAE9E,wEAAwE;IACxE,mCAAmC;IACnC,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAEtC,OAAO,IAAI,MAAM,CAAC,UAAU,IAAI,SAAS,GAAG,CAAC;AAC/C,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,gBAAgB,CAAC,MAAiB;IACzC,OAAO,cAAc,MAAM,CAAC,UAAU,kBAAkB,CAAC;AAC3D,CAAC;AAED,8EAA8E;AAC9E,SAAS;AACT,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,SAAS,kBAAkB,CAAC,MAAiB,EAAE,KAAqB;IAClE,QAAQ,KAAK,EAAE,CAAC;QACd,KAAK,aAAa;YAChB,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;QAElC,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;QAE3B,KAAK,MAAM;YACT,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;QAE3B,KAAK,cAAc;YACjB,OAAO,gBAAgB,CAAC,MAAM,CAAC,CAAC;QAElC,OAAO,CAAC,CAAC,CAAC;YACR,uEAAuE;YACvE,gCAAgC;YAChC,MAAM,WAAW,GAAU,KAAK,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;AACH,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AACH,MAAM,UAAU,UAAU,CACxB,IAAY,EACZ,QAAqB,EACrB,KAAqB;IAErB,gCAAgC;IAChC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,IAAI,CAAC;IACd,CAAC;IAED,wEAAwE;IACxE,qDAAqD;IACrD,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAElE,0EAA0E;IAC1E,IAAI,MAAM,GAAG,IAAI,CAAC;IAClB,KAAK,MAAM,MAAM,IAAI,MAAM,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;QAEtD,oEAAoE;QACpE,6BAA6B;QAC7B,MAAM;YACJ,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC;gBAC7B,WAAW;gBACX,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file index.ts
|
|
3
|
+
* @description Pack factory for the PII Redaction extension pack.
|
|
4
|
+
*
|
|
5
|
+
* This module exports the main `createPiiRedactionPack()` factory function
|
|
6
|
+
* that assembles the guardrail, scan tool, and redact tool into a single
|
|
7
|
+
* {@link ExtensionPack} ready for registration with the AgentOS extension
|
|
8
|
+
* manager.
|
|
9
|
+
*
|
|
10
|
+
* It also exports a `createExtensionPack()` bridge function that conforms to
|
|
11
|
+
* the AgentOS manifest factory convention, delegating to
|
|
12
|
+
* `createPiiRedactionPack()` with options extracted from the
|
|
13
|
+
* {@link ExtensionPackContext}.
|
|
14
|
+
*
|
|
15
|
+
* ### Lifecycle
|
|
16
|
+
*
|
|
17
|
+
* Components are built eagerly at pack creation time for direct programmatic
|
|
18
|
+
* use. When the pack is activated by the extension manager, the `onActivate`
|
|
19
|
+
* hook rebuilds all components with the manager's shared service registry and
|
|
20
|
+
* secret resolver, ensuring heavyweight services (NLP models, NER weights)
|
|
21
|
+
* are shared across the agent.
|
|
22
|
+
*
|
|
23
|
+
* @module pii-redaction
|
|
24
|
+
*/
|
|
25
|
+
import type { ExtensionPack, ExtensionPackContext } from '../../manifest';
|
|
26
|
+
import type { PiiRedactionPackOptions } from './types';
|
|
27
|
+
/**
|
|
28
|
+
* Re-export all types from the PII redaction type definitions so consumers
|
|
29
|
+
* can import everything from a single entry point:
|
|
30
|
+
* ```ts
|
|
31
|
+
* import { createPiiRedactionPack, PiiEntityType } from './pii-redaction';
|
|
32
|
+
* ```
|
|
33
|
+
*/
|
|
34
|
+
export * from './types';
|
|
35
|
+
/**
|
|
36
|
+
* Create an ExtensionPack that bundles the PII redaction guardrail with
|
|
37
|
+
* the `pii_scan` and `pii_redact` tools.
|
|
38
|
+
*
|
|
39
|
+
* ### Default behaviour (zero-config)
|
|
40
|
+
* When called without options, the pack detects all 18 PII entity types at
|
|
41
|
+
* a confidence threshold of 0.5, redacts using the `placeholder` style
|
|
42
|
+
* (`[EMAIL]`), and evaluates both input and output.
|
|
43
|
+
*
|
|
44
|
+
* ### Activation lifecycle
|
|
45
|
+
* The pack uses mutable internal state (`state.services`, `state.getSecret`)
|
|
46
|
+
* that is upgraded when the extension manager calls `onActivate` with its
|
|
47
|
+
* shared service registry and secret resolver. All three components
|
|
48
|
+
* (guardrail, scan tool, redact tool) are rebuilt at that point so they
|
|
49
|
+
* share NLP/NER model instances with other extensions.
|
|
50
|
+
*
|
|
51
|
+
* @param options - Optional pack-level configuration. All properties have
|
|
52
|
+
* sensible defaults; see {@link PiiRedactionPackOptions}.
|
|
53
|
+
* @returns A fully-configured {@link ExtensionPack} with one guardrail and
|
|
54
|
+
* two tools.
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
* ```ts
|
|
58
|
+
* import { createPiiRedactionPack } from './pii-redaction';
|
|
59
|
+
*
|
|
60
|
+
* const pack = createPiiRedactionPack({
|
|
61
|
+
* entityTypes: ['EMAIL', 'PHONE', 'SSN'],
|
|
62
|
+
* redactionStyle: 'mask',
|
|
63
|
+
* guardrailScope: 'both',
|
|
64
|
+
* });
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export declare function createPiiRedactionPack(options?: PiiRedactionPackOptions): ExtensionPack;
|
|
68
|
+
/**
|
|
69
|
+
* AgentOS manifest factory function.
|
|
70
|
+
*
|
|
71
|
+
* This function conforms to the convention expected by the extension loader
|
|
72
|
+
* when resolving packs from manifests. It extracts `options` from the
|
|
73
|
+
* {@link ExtensionPackContext} and delegates to {@link createPiiRedactionPack}.
|
|
74
|
+
*
|
|
75
|
+
* @param context - Manifest context containing optional pack options, secret
|
|
76
|
+
* resolver, and shared service registry.
|
|
77
|
+
* @returns A fully-configured {@link ExtensionPack}.
|
|
78
|
+
*
|
|
79
|
+
* @example
|
|
80
|
+
* ```ts
|
|
81
|
+
* // In an AgentOS manifest:
|
|
82
|
+
* {
|
|
83
|
+
* "packs": [
|
|
84
|
+
* { "module": "./pii-redaction", "options": { "redactionStyle": "hash" } }
|
|
85
|
+
* ]
|
|
86
|
+
* }
|
|
87
|
+
* ```
|
|
88
|
+
*/
|
|
89
|
+
export declare function createExtensionPack(context: ExtensionPackContext): ExtensionPack;
|
|
90
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,KAAK,EAAE,aAAa,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AAG1E,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,SAAS,CAAC;AASvD;;;;;;GAMG;AACH,cAAc,SAAS,CAAC;AAMxB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,wBAAgB,sBAAsB,CACpC,OAAO,CAAC,EAAE,uBAAuB,GAChC,aAAa,CA2Gf;AAMD;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,aAAa,CAEhF"}
|