@framers/agentos 0.1.55 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/extensions/ExtensionManager.d.ts +1 -0
- package/dist/extensions/ExtensionManager.d.ts.map +1 -1
- package/dist/extensions/ExtensionManager.js +8 -0
- package/dist/extensions/ExtensionManager.js.map +1 -1
- package/dist/extensions/ISharedServiceRegistry.d.ts +35 -0
- package/dist/extensions/ISharedServiceRegistry.d.ts.map +1 -0
- package/dist/extensions/ISharedServiceRegistry.js +2 -0
- package/dist/extensions/ISharedServiceRegistry.js.map +1 -0
- package/dist/extensions/SharedServiceRegistry.d.ts +15 -0
- package/dist/extensions/SharedServiceRegistry.d.ts.map +1 -0
- package/dist/extensions/SharedServiceRegistry.js +63 -0
- package/dist/extensions/SharedServiceRegistry.js.map +1 -0
- package/dist/extensions/index.d.ts +3 -0
- package/dist/extensions/index.d.ts.map +1 -1
- package/dist/extensions/index.js +4 -0
- package/dist/extensions/index.js.map +1 -1
- package/dist/extensions/manifest.d.ts +2 -0
- package/dist/extensions/manifest.d.ts.map +1 -1
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts +127 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.js +263 -0
- package/dist/extensions/packs/pii-redaction/EntityMerger.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts +199 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js +456 -0
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts +121 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js +271 -0
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts +61 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js +207 -0
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/index.d.ts +90 -0
- package/dist/extensions/packs/pii-redaction/index.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/index.js +195 -0
- package/dist/extensions/packs/pii-redaction/index.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts +151 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js +14 -0
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts +177 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js +420 -0
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts +145 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js +299 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts +102 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js +228 -0
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts +103 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js +275 -0
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts +118 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js +152 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts +98 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js +153 -0
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js.map +1 -0
- package/dist/extensions/packs/pii-redaction/types.d.ts +332 -0
- package/dist/extensions/packs/pii-redaction/types.d.ts.map +1 -0
- package/dist/extensions/packs/pii-redaction/types.js +83 -0
- package/dist/extensions/packs/pii-redaction/types.js.map +1 -0
- package/dist/extensions/types.d.ts +5 -0
- package/dist/extensions/types.d.ts.map +1 -1
- package/dist/extensions/types.js.map +1 -1
- package/package.json +11 -1
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PiiDetectionPipeline.ts
|
|
3
|
+
* @description Orchestrates the four-tier PII detection pipeline: Regex →
|
|
4
|
+
* NLP pre-filter → NER model → LLM judge. Each tier is gated by the
|
|
5
|
+
* previous tier's output or configuration flags so that only the work needed
|
|
6
|
+
* is performed, keeping median latency low for clean or simple inputs.
|
|
7
|
+
*
|
|
8
|
+
* ## Tier overview
|
|
9
|
+
*
|
|
10
|
+
* | Tier | Class | Always runs? |
|
|
11
|
+
* |------|------------------------|-----------------------------------------------|
|
|
12
|
+
* | 1 | RegexRecognizer | Yes |
|
|
13
|
+
* | 2 | NlpPrefilterRecognizer | Yes (when available / compromise installed) |
|
|
14
|
+
* | 3 | NerModelRecognizer | Only when Tier 2 found PERSON/ORG/LOC |
|
|
15
|
+
* | 4 | LlmJudgeRecognizer | Only for ambiguous entities (0.3 < score < 0.7)|
|
|
16
|
+
*
|
|
17
|
+
* After tiers 1–3 produce raw candidates, {@link mergeEntities} collapses
|
|
18
|
+
* overlapping spans. Tier 4 then re-examines the ambiguous slice. Finally
|
|
19
|
+
* the merged list is threshold-filtered and sorted by start offset.
|
|
20
|
+
*
|
|
21
|
+
* @module pii-redaction/PiiDetectionPipeline
|
|
22
|
+
*/
|
|
23
|
+
import { ALL_PII_ENTITY_TYPES } from './types.js';
|
|
24
|
+
import { RegexRecognizer } from './recognizers/RegexRecognizer.js';
|
|
25
|
+
import { NlpPrefilterRecognizer } from './recognizers/NlpPrefilterRecognizer.js';
|
|
26
|
+
import { NerModelRecognizer } from './recognizers/NerModelRecognizer.js';
|
|
27
|
+
import { LlmJudgeRecognizer } from './recognizers/LlmJudgeRecognizer.js';
|
|
28
|
+
import { mergeEntities } from './EntityMerger.js';
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
// Constants
|
|
31
|
+
// ---------------------------------------------------------------------------
|
|
32
|
+
/**
|
|
33
|
+
* Window of characters scanned to each side of an entity when performing
|
|
34
|
+
* context enhancement in Step 2. ±50 chars provides enough context for
|
|
35
|
+
* keyword phrases like "social security number:" while remaining cheap.
|
|
36
|
+
*/
|
|
37
|
+
const CONTEXT_WINDOW_CHARS = 50;
|
|
38
|
+
/**
|
|
39
|
+
* Score boost applied when a strong context keyword is found within the
|
|
40
|
+
* {@link CONTEXT_WINDOW_CHARS} window around a matching entity.
|
|
41
|
+
*/
|
|
42
|
+
const CONTEXT_BOOST_STRONG = 0.2;
|
|
43
|
+
/**
|
|
44
|
+
* Score boost applied when a weaker/generic context keyword is found.
|
|
45
|
+
*/
|
|
46
|
+
const CONTEXT_BOOST_WEAK = 0.15;
|
|
47
|
+
/**
|
|
48
|
+
* Lower bound (exclusive) of the ambiguous score range that triggers the
|
|
49
|
+
* LLM judge. Entities with score ≤ this value are assumed to be too weak
|
|
50
|
+
* to bother re-examining.
|
|
51
|
+
*/
|
|
52
|
+
const LLM_JUDGE_SCORE_LOW = 0.3;
|
|
53
|
+
/**
|
|
54
|
+
* Upper bound (exclusive) of the ambiguous score range. Entities at or
|
|
55
|
+
* above this value have sufficient confidence without needing LLM
|
|
56
|
+
* verification.
|
|
57
|
+
*/
|
|
58
|
+
const LLM_JUDGE_SCORE_HIGH = 0.7;
|
|
59
|
+
/**
|
|
60
|
+
* Default confidence threshold applied when none is specified in
|
|
61
|
+
* {@link PiiRedactionPackOptions}. Matches the documented default.
|
|
62
|
+
*/
|
|
63
|
+
const DEFAULT_CONFIDENCE_THRESHOLD = 0.5;
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Context-keyword definitions
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
/**
|
|
68
|
+
* Mapping of {@link PiiEntityType} to the context keyword groups that signal
|
|
69
|
+
* the entity is genuine PII in context. Keywords are matched
|
|
70
|
+
* case-insensitively.
|
|
71
|
+
*
|
|
72
|
+
* Each entry is a tuple of [keywords, boostAmount]:
|
|
73
|
+
* - Strong keywords (more specific phrases) → {@link CONTEXT_BOOST_STRONG}
|
|
74
|
+
* - Weak keywords (generic labels) → {@link CONTEXT_BOOST_WEAK}
|
|
75
|
+
*/
|
|
76
|
+
const CONTEXT_KEYWORDS = {
|
|
77
|
+
SSN: [
|
|
78
|
+
{ keywords: ['social security', 'ssn:', 'ss#'], boost: CONTEXT_BOOST_STRONG },
|
|
79
|
+
{ keywords: ['tax id', 'government id'], boost: CONTEXT_BOOST_WEAK },
|
|
80
|
+
],
|
|
81
|
+
PERSON: [
|
|
82
|
+
{ keywords: ['name:', 'full name', 'first name', 'last name', 'patient', 'employee'], boost: CONTEXT_BOOST_STRONG },
|
|
83
|
+
{ keywords: ['mr.', 'mrs.', 'ms.', 'dr.', 'prof.'], boost: CONTEXT_BOOST_WEAK },
|
|
84
|
+
],
|
|
85
|
+
DATE_OF_BIRTH: [
|
|
86
|
+
{ keywords: ['date of birth', 'dob:', 'born on', 'birthday'], boost: CONTEXT_BOOST_STRONG },
|
|
87
|
+
{ keywords: ['birth', 'age:'], boost: CONTEXT_BOOST_WEAK },
|
|
88
|
+
],
|
|
89
|
+
LOCATION: [
|
|
90
|
+
{ keywords: ['address:', 'mailing address', 'home address', 'street', 'city', 'zip code', 'postal code'], boost: CONTEXT_BOOST_STRONG },
|
|
91
|
+
{ keywords: ['lives at', 'resides at', 'located at'], boost: CONTEXT_BOOST_WEAK },
|
|
92
|
+
],
|
|
93
|
+
PHONE: [
|
|
94
|
+
{ keywords: ['phone:', 'tel:', 'mobile:', 'cell:', 'fax:', 'telephone', 'contact number'], boost: CONTEXT_BOOST_STRONG },
|
|
95
|
+
{ keywords: ['call', 'reach me at'], boost: CONTEXT_BOOST_WEAK },
|
|
96
|
+
],
|
|
97
|
+
EMAIL: [
|
|
98
|
+
{ keywords: ['email:', 'e-mail:', 'contact:', 'email address'], boost: CONTEXT_BOOST_STRONG },
|
|
99
|
+
{ keywords: ['send to', 'reply to', '@'], boost: CONTEXT_BOOST_WEAK },
|
|
100
|
+
],
|
|
101
|
+
CREDIT_CARD: [
|
|
102
|
+
{ keywords: ['credit card', 'card number', 'cc:', 'visa', 'mastercard', 'amex'], boost: CONTEXT_BOOST_STRONG },
|
|
103
|
+
{ keywords: ['payment', 'billing', 'expires'], boost: CONTEXT_BOOST_WEAK },
|
|
104
|
+
],
|
|
105
|
+
PASSPORT: [
|
|
106
|
+
{ keywords: ['passport', 'passport number', 'passport no.', 'travel document'], boost: CONTEXT_BOOST_STRONG },
|
|
107
|
+
{ keywords: ['identity', 'nationality', 'issued by'], boost: CONTEXT_BOOST_WEAK },
|
|
108
|
+
],
|
|
109
|
+
};
|
|
110
|
+
// ---------------------------------------------------------------------------
|
|
111
|
+
// PiiDetectionPipeline
|
|
112
|
+
// ---------------------------------------------------------------------------
|
|
113
|
+
/**
|
|
114
|
+
* Four-tier PII detection pipeline that orchestrates Regex, NLP pre-filter,
|
|
115
|
+
* NER model, and LLM judge recognisers into a single `detect()` call.
|
|
116
|
+
*
|
|
117
|
+
* ### Construction
|
|
118
|
+
* ```ts
|
|
119
|
+
* const pipeline = new PiiDetectionPipeline(serviceRegistry, packOptions, getSecret);
|
|
120
|
+
* const result = await pipeline.detect('Call me at 555-123-4567');
|
|
121
|
+
* ```
|
|
122
|
+
*
|
|
123
|
+
* ### Lifecycle
|
|
124
|
+
* The pipeline is designed to be constructed once at pack startup and reused
|
|
125
|
+
* across many `detect()` calls. Recognisers are constructed eagerly but load
|
|
126
|
+
* their heavy dependencies (NLP models, NER weights) lazily on first use.
|
|
127
|
+
*
|
|
128
|
+
* ### Concurrency
|
|
129
|
+
* `detect()` is safe to call concurrently from multiple async contexts:
|
|
130
|
+
* - Regex and NLP recognisers create fresh scoped instances per call.
|
|
131
|
+
* - The NER model pipeline is shared and thread-safe via the service registry.
|
|
132
|
+
* - The LLM judge uses an internal semaphore to cap concurrent requests.
|
|
133
|
+
*/
|
|
134
|
+
export class PiiDetectionPipeline {
|
|
135
|
+
// -----------------------------------------------------------------------
|
|
136
|
+
// Constructor
|
|
137
|
+
// -----------------------------------------------------------------------
|
|
138
|
+
/**
|
|
139
|
+
* Construct a new PiiDetectionPipeline.
|
|
140
|
+
*
|
|
141
|
+
* All recognisers are instantiated here but do not load their heavy
|
|
142
|
+
* dependencies (NLP libraries, transformer models) until the first call
|
|
143
|
+
* to {@link detect}.
|
|
144
|
+
*
|
|
145
|
+
* @param services - Shared service registry for lazy-loading NLP/NER
|
|
146
|
+
* models so they are shared across the agent.
|
|
147
|
+
* @param options - Pack-level configuration including entity type
|
|
148
|
+
* filter, confidence threshold, allow/denylists, and
|
|
149
|
+
* optional LLM judge config.
|
|
150
|
+
* @param getSecret - Optional function to look up credential secrets by
|
|
151
|
+
* ID (e.g. `'openai.apiKey'`, `'pii.llm.apiKey'`).
|
|
152
|
+
* Used to resolve the LLM judge API key when not
|
|
153
|
+
* provided explicitly in {@link LlmJudgeConfig.apiKey}.
|
|
154
|
+
*/
|
|
155
|
+
constructor(services, options, getSecret) {
|
|
156
|
+
// ---- Resolve entity types ----
|
|
157
|
+
this.entityTypes = options.entityTypes ?? [...ALL_PII_ENTITY_TYPES];
|
|
158
|
+
// ---- Resolve confidence threshold ----
|
|
159
|
+
this.confidenceThreshold = options.confidenceThreshold ?? DEFAULT_CONFIDENCE_THRESHOLD;
|
|
160
|
+
// ---- Resolve NER model flag ----
|
|
161
|
+
// `enableNerModel !== false` means "true unless explicitly disabled".
|
|
162
|
+
this.enableNerModel = options.enableNerModel !== false;
|
|
163
|
+
// ---- Resolve allow/denylist (normalise RegExp entries to strings) ----
|
|
164
|
+
// MergeOptions only accepts string[] for allow/denylist, so we strip
|
|
165
|
+
// RegExp entries here. RegExp entries are useful for the broader pack
|
|
166
|
+
// but the merger works with string exact-match lists.
|
|
167
|
+
this.allowlist = (options.allowlist ?? [])
|
|
168
|
+
.filter((v) => typeof v === 'string');
|
|
169
|
+
this.denylist = (options.denylist ?? [])
|
|
170
|
+
.filter((v) => typeof v === 'string');
|
|
171
|
+
// ---- Instantiate recognisers ----
|
|
172
|
+
this.regexRecognizer = new RegexRecognizer();
|
|
173
|
+
this.nlpPrefilter = new NlpPrefilterRecognizer(services);
|
|
174
|
+
this.nerRecognizer = new NerModelRecognizer(services);
|
|
175
|
+
// ---- Instantiate LLM judge if configured ----
|
|
176
|
+
if (options.llmJudge) {
|
|
177
|
+
const resolvedConfig = this.resolveJudgeApiKey(options.llmJudge, getSecret);
|
|
178
|
+
this.llmJudge = new LlmJudgeRecognizer(resolvedConfig);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
this.llmJudge = null;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
// -----------------------------------------------------------------------
|
|
185
|
+
// Public API
|
|
186
|
+
// -----------------------------------------------------------------------
|
|
187
|
+
/**
|
|
188
|
+
* Run all applicable detection tiers over `text` and return a
|
|
189
|
+
* {@link PiiDetectionResult} with the merged, threshold-filtered entity
|
|
190
|
+
* list and pipeline metadata.
|
|
191
|
+
*
|
|
192
|
+
* ### Processing steps
|
|
193
|
+
* 1. **Tier 1 (Regex)** — Always executed. Deterministic pattern matching.
|
|
194
|
+
* 2. **Context enhancement** — Scans ±{@link CONTEXT_WINDOW_CHARS} chars
|
|
195
|
+
* around each Tier 1 entity for keyword signals and boosts scores.
|
|
196
|
+
* 3. **Tier 2 (NLP)** — Always attempted; degrades gracefully if `compromise`
|
|
197
|
+
* is not installed.
|
|
198
|
+
* 4. **Tier 3 (NER)** — Runs only when:
|
|
199
|
+
* - `enableNerModel !== false`, AND
|
|
200
|
+
* - Tier 2 found at least one PERSON, ORGANIZATION, or LOCATION candidate.
|
|
201
|
+
* 5. **Merge** — {@link mergeEntities} collapses overlapping spans and applies
|
|
202
|
+
* allow/denylists. Threshold is NOT applied yet.
|
|
203
|
+
* 6. **Tier 4 (LLM judge)** — Applied only to entities in the ambiguous score
|
|
204
|
+
* band (0.3 < score < 0.7) and only when `llmJudge` is configured.
|
|
205
|
+
* `null` results (NOT_PII) are discarded.
|
|
206
|
+
* 7. **Threshold filter** — Entities with `score < confidenceThreshold` are
|
|
207
|
+
* removed from the final output.
|
|
208
|
+
* 8. **Sort + summary** — Sorted by start offset; summary string built.
|
|
209
|
+
*
|
|
210
|
+
* @param text - The raw input text to analyse.
|
|
211
|
+
* @returns A {@link PiiDetectionResult} containing detected entities and
|
|
212
|
+
* pipeline execution metadata.
|
|
213
|
+
*/
|
|
214
|
+
async detect(text) {
|
|
215
|
+
// Track wall-clock time from the very start of detection.
|
|
216
|
+
const startTime = Date.now();
|
|
217
|
+
// Accumulates all raw entities from tiers 1–3 before merging.
|
|
218
|
+
const rawEntities = [];
|
|
219
|
+
// Tracks which tiers were actually executed (for observability).
|
|
220
|
+
const tiersExecuted = [];
|
|
221
|
+
// -----------------------------------------------------------------------
|
|
222
|
+
// Tier 1: Regex recogniser — always runs.
|
|
223
|
+
// -----------------------------------------------------------------------
|
|
224
|
+
const tier1Entities = await this.regexRecognizer.recognize(text, {
|
|
225
|
+
entityTypes: this.entityTypes,
|
|
226
|
+
});
|
|
227
|
+
tiersExecuted.push('regex');
|
|
228
|
+
rawEntities.push(...tier1Entities);
|
|
229
|
+
// -----------------------------------------------------------------------
|
|
230
|
+
// Step 2: Context enhancement — boost scores based on surrounding keywords.
|
|
231
|
+
// -----------------------------------------------------------------------
|
|
232
|
+
const tier1Enhanced = this.applyContextEnhancement(tier1Entities, text);
|
|
233
|
+
// Replace the tier1 entities in rawEntities with their enhanced versions.
|
|
234
|
+
// We rebuild the slice since arrays are reference-ordered.
|
|
235
|
+
rawEntities.splice(0, tier1Entities.length, ...tier1Enhanced);
|
|
236
|
+
// -----------------------------------------------------------------------
|
|
237
|
+
// Tier 2: NLP pre-filter — always attempted.
|
|
238
|
+
// -----------------------------------------------------------------------
|
|
239
|
+
const tier2Entities = await this.nlpPrefilter.recognize(text, {
|
|
240
|
+
entityTypes: this.entityTypes,
|
|
241
|
+
});
|
|
242
|
+
// Even if compromise is unavailable (returns []), we still push nothing.
|
|
243
|
+
rawEntities.push(...tier2Entities);
|
|
244
|
+
// -----------------------------------------------------------------------
|
|
245
|
+
// Tier 3: NER model — gated on Tier 2 finding NER-class candidates.
|
|
246
|
+
// -----------------------------------------------------------------------
|
|
247
|
+
/** Entity types that gate Tier 3 execution. */
|
|
248
|
+
const NER_GATE_TYPES = new Set(['PERSON', 'ORGANIZATION', 'LOCATION']);
|
|
249
|
+
const tier2HasNerCandidates = tier2Entities.some((e) => NER_GATE_TYPES.has(e.entityType));
|
|
250
|
+
if (this.enableNerModel && tier2HasNerCandidates) {
|
|
251
|
+
// Tier 2 found at least one name/org/location candidate — run NER for
|
|
252
|
+
// higher-accuracy confirmation.
|
|
253
|
+
const tier3Entities = await this.nerRecognizer.recognize(text, {
|
|
254
|
+
entityTypes: this.entityTypes,
|
|
255
|
+
// Provide Tier 2 results as prior context so NER can skip confirmed spans.
|
|
256
|
+
priorEntities: tier2Entities,
|
|
257
|
+
});
|
|
258
|
+
rawEntities.push(...tier3Entities);
|
|
259
|
+
// Record 'ner' in tiersExecuted (we reuse the same label for both NLP
|
|
260
|
+
// tiers since PiiDetectionResult.tiersExecuted uses 'ner' as the value
|
|
261
|
+
// representing all NER-type processing).
|
|
262
|
+
tiersExecuted.push('ner');
|
|
263
|
+
}
|
|
264
|
+
// -----------------------------------------------------------------------
|
|
265
|
+
// Step 5: Merge — collapse overlapping spans; apply allow/denylist.
|
|
266
|
+
// Do NOT apply threshold yet (LLM judge needs the full list).
|
|
267
|
+
// -----------------------------------------------------------------------
|
|
268
|
+
const mergedEntities = mergeEntities(rawEntities, {
|
|
269
|
+
allowlist: this.allowlist,
|
|
270
|
+
denylist: this.denylist,
|
|
271
|
+
// Intentionally no confidenceThreshold here — applied post LLM judge.
|
|
272
|
+
}, text);
|
|
273
|
+
// -----------------------------------------------------------------------
|
|
274
|
+
// Tier 4: LLM judge — only for ambiguous-score entities.
|
|
275
|
+
// -----------------------------------------------------------------------
|
|
276
|
+
let judgedEntities;
|
|
277
|
+
if (this.llmJudge !== null) {
|
|
278
|
+
judgedEntities = await this.runLlmJudge(mergedEntities, text);
|
|
279
|
+
tiersExecuted.push('llm');
|
|
280
|
+
}
|
|
281
|
+
else {
|
|
282
|
+
judgedEntities = mergedEntities;
|
|
283
|
+
}
|
|
284
|
+
// -----------------------------------------------------------------------
|
|
285
|
+
// Step 7: Final threshold filter.
|
|
286
|
+
// -----------------------------------------------------------------------
|
|
287
|
+
const thresholded = judgedEntities.filter((e) => e.score >= this.confidenceThreshold);
|
|
288
|
+
// -----------------------------------------------------------------------
|
|
289
|
+
// Step 8: Sort by start offset and build summary.
|
|
290
|
+
// -----------------------------------------------------------------------
|
|
291
|
+
const sorted = thresholded.slice().sort((a, b) => a.start - b.start);
|
|
292
|
+
const summary = this.buildSummary(sorted);
|
|
293
|
+
return {
|
|
294
|
+
entities: sorted,
|
|
295
|
+
inputLength: text.length,
|
|
296
|
+
processingTimeMs: Date.now() - startTime,
|
|
297
|
+
tiersExecuted,
|
|
298
|
+
summary,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
// -----------------------------------------------------------------------
|
|
302
|
+
// Private helpers
|
|
303
|
+
// -----------------------------------------------------------------------
|
|
304
|
+
/**
|
|
305
|
+
* Applies context enhancement to Tier 1 regex entities.
|
|
306
|
+
*
|
|
307
|
+
* For each entity, a window of ±{@link CONTEXT_WINDOW_CHARS} characters
|
|
308
|
+
* around the entity is scanned for known context keywords. When a
|
|
309
|
+
* matching keyword is found for that entity's type, the entity's score is
|
|
310
|
+
* boosted by the keyword's associated {@link CONTEXT_BOOST_STRONG} or
|
|
311
|
+
* {@link CONTEXT_BOOST_WEAK} amount, capped at 1.0.
|
|
312
|
+
*
|
|
313
|
+
* Entities whose type has no context-keyword mapping are returned
|
|
314
|
+
* unchanged. A new array of entities is returned — the originals are not
|
|
315
|
+
* mutated.
|
|
316
|
+
*
|
|
317
|
+
* @param entities - Raw Tier 1 entities to enhance.
|
|
318
|
+
* @param text - Full input text (used to extract context windows).
|
|
319
|
+
* @returns New array of entities with potentially boosted scores.
|
|
320
|
+
*/
|
|
321
|
+
applyContextEnhancement(entities, text) {
|
|
322
|
+
return entities.map((entity) => {
|
|
323
|
+
const keywordGroups = CONTEXT_KEYWORDS[entity.entityType];
|
|
324
|
+
// No context keywords defined for this entity type — return as-is.
|
|
325
|
+
if (!keywordGroups || keywordGroups.length === 0)
|
|
326
|
+
return entity;
|
|
327
|
+
// Extract the context window around this entity.
|
|
328
|
+
const ctxStart = Math.max(0, entity.start - CONTEXT_WINDOW_CHARS);
|
|
329
|
+
const ctxEnd = Math.min(text.length, entity.end + CONTEXT_WINDOW_CHARS);
|
|
330
|
+
const contextSlice = text.slice(ctxStart, ctxEnd).toLowerCase();
|
|
331
|
+
// Accumulate the total boost from all matched keyword groups.
|
|
332
|
+
let totalBoost = 0;
|
|
333
|
+
for (const group of keywordGroups) {
|
|
334
|
+
// Check if any keyword in this group appears in the context window.
|
|
335
|
+
const matched = group.keywords.some((kw) => contextSlice.includes(kw));
|
|
336
|
+
if (matched) {
|
|
337
|
+
// Only apply the group's boost once even if multiple keywords match.
|
|
338
|
+
totalBoost += group.boost;
|
|
339
|
+
// Once we've found a strong-boost match there is no point continuing
|
|
340
|
+
// to look for weaker matches; break early to avoid double-boosting.
|
|
341
|
+
if (group.boost >= CONTEXT_BOOST_STRONG)
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
if (totalBoost === 0)
|
|
346
|
+
return entity;
|
|
347
|
+
// Cap the final score at 1.0 to stay within the valid range.
|
|
348
|
+
const boostedScore = Math.min(1.0, entity.score + totalBoost);
|
|
349
|
+
return { ...entity, score: boostedScore };
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
/**
|
|
353
|
+
* Runs the LLM judge over entities in the ambiguous score band
|
|
354
|
+
* (LLM_JUDGE_SCORE_LOW < score < LLM_JUDGE_SCORE_HIGH).
|
|
355
|
+
*
|
|
356
|
+
* Entities outside the ambiguous band are passed through as-is.
|
|
357
|
+
* For ambiguous entities, `judge()` is awaited in parallel (up to the
|
|
358
|
+
* semaphore limit configured in LlmJudgeRecognizer). Entities judged to
|
|
359
|
+
* be NOT_PII (null return) are discarded.
|
|
360
|
+
*
|
|
361
|
+
* @param entities - Merged entity list from Steps 5.
|
|
362
|
+
* @param text - Full input text passed to the judge for context.
|
|
363
|
+
* @returns Updated entity list after LLM judgement.
|
|
364
|
+
*/
|
|
365
|
+
async runLlmJudge(entities, text) {
|
|
366
|
+
// The judge is guaranteed non-null at call sites — assert for TS.
|
|
367
|
+
const judge = this.llmJudge;
|
|
368
|
+
// Partition entities into clear (pass-through) and ambiguous (judge) groups.
|
|
369
|
+
const clearEntities = [];
|
|
370
|
+
const ambiguousEntities = [];
|
|
371
|
+
for (const entity of entities) {
|
|
372
|
+
if (entity.score > LLM_JUDGE_SCORE_LOW && entity.score < LLM_JUDGE_SCORE_HIGH) {
|
|
373
|
+
ambiguousEntities.push(entity);
|
|
374
|
+
}
|
|
375
|
+
else {
|
|
376
|
+
clearEntities.push(entity);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
// Short-circuit if no entities need judging.
|
|
380
|
+
if (ambiguousEntities.length === 0)
|
|
381
|
+
return clearEntities;
|
|
382
|
+
// Fire all judge calls concurrently — the semaphore inside LlmJudgeRecognizer
|
|
383
|
+
// throttles actual concurrency to the configured maxConcurrency.
|
|
384
|
+
const judgeResults = await Promise.all(ambiguousEntities.map((entity) => judge.judge(entity, text)));
|
|
385
|
+
// Collect non-null results (null = judge determined NOT_PII → discard).
|
|
386
|
+
const confirmedEntities = judgeResults.filter((r) => r !== null);
|
|
387
|
+
// Combine confirmed ambiguous entities with the clear (non-ambiguous) ones.
|
|
388
|
+
return [...clearEntities, ...confirmedEntities];
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Resolves the API key for the LLM judge using a three-level fallback:
|
|
392
|
+
*
|
|
393
|
+
* 1. Explicit `config.apiKey` (highest priority — caller-supplied).
|
|
394
|
+
* 2. Provider-specific secret via `getSecret('<provider>.apiKey')`.
|
|
395
|
+
* 3. Pack-specific generic secret via `getSecret('pii.llm.apiKey')`.
|
|
396
|
+
*
|
|
397
|
+
* Returns a new {@link LlmJudgeConfig} with `apiKey` set to the resolved
|
|
398
|
+
* value (or the original value if a key was already present).
|
|
399
|
+
*
|
|
400
|
+
* @param config - Original LLM judge configuration.
|
|
401
|
+
* @param getSecret - Optional secret resolver function.
|
|
402
|
+
* @returns Config with `apiKey` resolved to the best available value.
|
|
403
|
+
*/
|
|
404
|
+
resolveJudgeApiKey(config, getSecret) {
|
|
405
|
+
// 1. If an explicit apiKey is already present, use it directly.
|
|
406
|
+
if (config.apiKey)
|
|
407
|
+
return config;
|
|
408
|
+
if (!getSecret)
|
|
409
|
+
return config;
|
|
410
|
+
// 2. Provider-specific secret: e.g. 'openai.apiKey' or 'anthropic.apiKey'.
|
|
411
|
+
const providerKey = getSecret(`${config.provider}.apiKey`);
|
|
412
|
+
if (providerKey)
|
|
413
|
+
return { ...config, apiKey: providerKey };
|
|
414
|
+
// 3. Pack-specific generic secret.
|
|
415
|
+
const packKey = getSecret('pii.llm.apiKey');
|
|
416
|
+
if (packKey)
|
|
417
|
+
return { ...config, apiKey: packKey };
|
|
418
|
+
// No key found via any path — return config unchanged (LlmJudgeRecognizer
|
|
419
|
+
// will fail open when it cannot authenticate, returning original entities).
|
|
420
|
+
return config;
|
|
421
|
+
}
|
|
422
|
+
/**
|
|
423
|
+
* Builds a human-readable summary string from the final entity list.
|
|
424
|
+
*
|
|
425
|
+
* Format: `"<n> entities found: <count>×<TYPE>, ..."` or
|
|
426
|
+
* `"No PII detected"` when the list is empty.
|
|
427
|
+
*
|
|
428
|
+
* Entity types are sorted alphabetically for deterministic output, and
|
|
429
|
+
* only types that are actually present appear in the summary.
|
|
430
|
+
*
|
|
431
|
+
* @example
|
|
432
|
+
* ```
|
|
433
|
+
* "3 entities found: 1×EMAIL, 1×PERSON, 1×PHONE"
|
|
434
|
+
* "No PII detected"
|
|
435
|
+
* ```
|
|
436
|
+
*
|
|
437
|
+
* @param entities - Final threshold-filtered, sorted entity list.
|
|
438
|
+
* @returns Human-readable summary string.
|
|
439
|
+
*/
|
|
440
|
+
buildSummary(entities) {
|
|
441
|
+
if (entities.length === 0)
|
|
442
|
+
return 'No PII detected';
|
|
443
|
+
// Count occurrences of each entity type.
|
|
444
|
+
const typeCounts = new Map();
|
|
445
|
+
for (const entity of entities) {
|
|
446
|
+
typeCounts.set(entity.entityType, (typeCounts.get(entity.entityType) ?? 0) + 1);
|
|
447
|
+
}
|
|
448
|
+
// Build sorted count strings for stable output.
|
|
449
|
+
const countParts = Array.from(typeCounts.entries())
|
|
450
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
451
|
+
.map(([type, count]) => `${count}×${type}`);
|
|
452
|
+
const noun = entities.length === 1 ? 'entity' : 'entities';
|
|
453
|
+
return `${entities.length} ${noun} found: ${countParts.join(', ')}`;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
//# sourceMappingURL=PiiDetectionPipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PiiDetectionPipeline.js","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/PiiDetectionPipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAUH,OAAO,EAAE,oBAAoB,EAAE,MAAM,SAAS,CAAC;AAC/C,OAAO,EAAE,eAAe,EAAE,MAAM,+BAA+B,CAAC;AAChE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC;AAC9E,OAAO,EAAE,kBAAkB,EAAE,MAAM,kCAAkC,CAAC;AACtE,OAAO,EAAE,kBAAkB,EAAE,MAAM,kCAAkC,CAAC;AACtE,OAAO,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAE/C,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;;;GAIG;AACH,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC;;;GAGG;AACH,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC;;GAEG;AACH,MAAM,kBAAkB,GAAG,IAAI,CAAC;AAEhC;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,GAAG,CAAC;AAEhC;;;;GAIG;AACH,MAAM,oBAAoB,GAAG,GAAG,CAAC;AAEjC;;;GAGG;AACH,MAAM,4BAA4B,GAAG,GAAG,CAAC;AAEzC,8EAA8E;AAC9E,8BAA8B;AAC9B,8EAA8E;AAE9E;;;;;;;;GAQG;AACH,MAAM,gBAAgB,GAElB;IACF,GAAG,EAAE;QACH,EAAE,QAAQ,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QAC7E,EAAE,QAAQ,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KACrE;IACD,MAAM,EAAE;QACN,EAAE,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,WAAW,EAAE,SAAS,EAAE,UAAU,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QACnH,EAAE,QAAQ,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,OAAO,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KAChF;IACD,aAAa,EAAE;QACb,EAAE,QAAQ,EAAE,CAAC,eAAe,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QAC3F,EAAE,QAAQ,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KAC3D;IACD,QAAQ,EAAE;QACR,EAAE,QAAQ,EAAE,CAAC,UAAU,EAAE,iBAAiB,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QACvI,EAAE,QAAQ,EAAE,CAAC,UAAU,EAAE,YAAY,EAAE,YAAY,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KAClF;IACD,KAAK,EAAE;QACL,EAAE,QAAQ,EAAE,CAAC,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,gBAAgB,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QACxH,EAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,aAAa,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KACjE;IACD,KAAK,EAAE;QACL,EAAE,QAAQ,EAAE,CAAC,QAAQ,EAAE,SAAS,EAAE,UAAU,EAAE,eAAe,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QAC7F,EAAE,QAAQ,EAAE,CAAC,SAAS,EAAE,UAAU,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KACtE;IACD,WAAW,EAAE;QACX,EAAE,QAAQ,EAAE,CAAC,aAAa,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QAC9G,EAAE,QAAQ,EAAE,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KAC3E;IACD,QAAQ,EAAE;QACR,EAAE,QAAQ,EAAE,CAAC,UAAU,EAAE,iBAAiB,EAAE,cAAc,EAAE,iBAAiB,CAAC,EAAE,KAAK,EAAE,oBAAoB,EAAE;QAC7G,EAAE,QAAQ,EAAE,CAAC,UAAU,EAAE,aAAa,EAAE,WAAW,CAAC,EAAE,KAAK,EAAE,kBAAkB,EAAE;KAClF;CACF,CAAC;AAEF,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,OAAO,oBAAoB;IAyD/B,0EAA0E;IAC1E,cAAc;IACd,0EAA0E;IAE1E;;;;;;;;;;;;;;;;OAgBG;IACH,YACE,QAAgC,EAChC,OAAgC,EAChC,SAA8C;QAE9C,iCAAiC;QACjC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,GAAG,oBAAoB,CAAC,CAAC;QAEpE,yCAAyC;QACzC,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,mBAAmB,IAAI,4BAA4B,CAAC;QAEvF,mCAAmC;QACnC,sEAAsE;QACtE,IAAI,CAAC,cAAc,GAAG,OAAO,CAAC,cAAc,KAAK,KAAK,CAAC;QAEvD,yEAAyE;QACzE,qEAAqE;QACrE,uEAAuE;QACvE,sDAAsD;QACtD,IAAI,CAAC,SAAS,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;aACvC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC;QAErD,IAAI,CAAC,QAAQ,GAAG,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;aACrC,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC;QAErD,oCAAoC;QACpC,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;QAC7C,IAAI,CAAC,YAAY,GAAG,IAAI,sBAAsB,CAAC,QAAQ,CAAC,CAAC;QACzD,IAAI,CAAC,aAAa,GAAG,IAAI,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAEtD,gDAAgD;QAChD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,MAAM,cAAc,GAAG,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;YAC5E,IAAI,CAAC,QAAQ,GAAG,IAAI,kBAAkB,CAAC,cAAc,CAAC,CAAC;QACzD,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,aAAa;IACb,0EAA0E;IAE1E;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACI,KAAK,CAAC,MAAM,CAAC,IAAY;QAC9B,0DAA0D;QAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,8DAA8D;QAC9D,MAAM,WAAW,GAAgB,EAAE,CAAC;QAEpC,iEAAiE;QACjE,MAAM,aAAa,GAAmC,EAAE,CAAC;QAEzD,0EAA0E;QAC1E,0CAA0C;QAC1C,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,SAAS,CAAC,IAAI,EAAE;YAC/D,WAAW,EAAE,IAAI,CAAC,WAAW;SAC9B,CAAC,CAAC;QAEH,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,WAAW,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAEnC,0EAA0E;QAC1E,4EAA4E;QAC5E,0EAA0E;QAC1E,MAAM,aAAa,GAAG,IAAI,CAAC,uBAAuB,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;QAExE,0EAA0E;QAC1E,2DAA2D;QAC3D,WAAW,CAAC,MAAM,CAAC,CAAC,EAAE,aAAa,CAAC,MAAM,EAAE,GAAG,aAAa,CAAC,CAAC;QAE9D,0EAA0E;QAC1E,6CAA6C;QAC7C,0EAA0E;QAC1E,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,IAAI,EAAE;YAC5D,WAAW,EAAE,IAAI,CAAC,WAAW;SAC9B,CAAC,CAAC;QAEH,yEAAyE;QACzE,WAAW,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;QAEnC,0EAA0E;QAC1E,oEAAoE;QACpE,0EAA0E;QAE1E,+CAA+C;QAC/C,MAAM,cAAc,GAAG,IAAI,GAAG,CAAgB,CAAC,QAAQ,EAAE,cAAc,EAAE,UAAU,CAAC,CAAC,CAAC;QAEtF,MAAM,qBAAqB,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACrD,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,CACjC,CAAC;QAEF,IAAI,IAAI,CAAC,cAAc,IAAI,qBAAqB,EAAE,CAAC;YACjD,sEAAsE;YACtE,gCAAgC;YAChC,MAAM,aAAa,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,IAAI,EAAE;gBAC7D,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,2EAA2E;gBAC3E,aAAa,EAAE,aAAa;aAC7B,CAAC,CAAC;YAEH,WAAW,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;YAEnC,sEAAsE;YACtE,uEAAuE;YACvE,yCAAyC;YACzC,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;QAED,0EAA0E;QAC1E,oEAAoE;QACpE,sEAAsE;QACtE,0EAA0E;QAC1E,MAAM,cAAc,GAAG,aAAa,CAClC,WAAW,EACX;YACE,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,sEAAsE;SACvE,EACD,IAAI,CACL,CAAC;QAEF,0EAA0E;QAC1E,yDAAyD;QACzD,0EAA0E;QAC1E,IAAI,cAA2B,CAAC;QAEhC,IAAI,IAAI,CAAC,QAAQ,KAAK,IAAI,EAAE,CAAC;YAC3B,cAAc,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;YAC9D,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,cAAc,GAAG,cAAc,CAAC;QAClC,CAAC;QAED,0EAA0E;QAC1E,kCAAkC;QAClC,0EAA0E;QAC1E,MAAM,WAAW,GAAG,cAAc,CAAC,MAAM,CACvC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,mBAAmB,CAC3C,CAAC;QAEF,0EAA0E;QAC1E,kDAAkD;QAClD,0EAA0E;QAC1E,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QACrE,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAE1C,OAAO;YACL,QAAQ,EAAE,MAAM;YAChB,WAAW,EAAE,IAAI,CAAC,MAAM;YACxB,gBAAgB,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YACxC,aAAa;YACb,OAAO;SACR,CAAC;IACJ,CAAC;IAED,0EAA0E;IAC1E,kBAAkB;IAClB,0EAA0E;IAE1E;;;;;;;;;;;;;;;;OAgBG;IACK,uBAAuB,CAC7B,QAAqB,EACrB,IAAY;QAEZ,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;YAC7B,MAAM,aAAa,GAAG,gBAAgB,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC;YAE1D,mEAAmE;YACnE,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,MAAM,CAAC;YAEhE,iDAAiD;YACjD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,GAAG,oBAAoB,CAAC,CAAC;YAClE,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,oBAAoB,CAAC,CAAC;YACxE,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,WAAW,EAAE,CAAC;YAEhE,8DAA8D;YAC9D,IAAI,UAAU,GAAG,CAAC,CAAC;YAEnB,KAAK,MAAM,KAAK,IAAI,aAAa,EAAE,CAAC;gBAClC,oEAAoE;gBACpE,MAAM,OAAO,GAAG,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,YAAY,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC;gBAEvE,IAAI,OAAO,EAAE,CAAC;oBACZ,qEAAqE;oBACrE,UAAU,IAAI,KAAK,CAAC,KAAK,CAAC;oBAC1B,qEAAqE;oBACrE,oEAAoE;oBACpE,IAAI,KAAK,CAAC,KAAK,IAAI,oBAAoB;wBAAE,MAAM;gBACjD,CAAC;YACH,CAAC;YAED,IAAI,UAAU,KAAK,CAAC;gBAAE,OAAO,MAAM,CAAC;YAEpC,6DAA6D;YAC7D,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,KAAK,GAAG,UAAU,CAAC,CAAC;YAE9D,OAAO,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;OAYG;IACK,KAAK,CAAC,WAAW,CACvB,QAAqB,EACrB,IAAY;QAEZ,kEAAkE;QAClE,MAAM,KAAK,GAAG,IAAI,CAAC,QAAS,CAAC;QAE7B,6EAA6E;QAC7E,MAAM,aAAa,GAAgB,EAAE,CAAC;QACtC,MAAM,iBAAiB,GAAgB,EAAE,CAAC;QAE1C,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC9B,IAAI,MAAM,CAAC,KAAK,GAAG,mBAAmB,IAAI,MAAM,CAAC,KAAK,GAAG,oBAAoB,EAAE,CAAC;gBAC9E,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACjC,CAAC;iBAAM,CAAC;gBACN,aAAa,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,6CAA6C;QAC7C,IAAI,iBAAiB,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,aAAa,CAAC;QAEzD,8EAA8E;QAC9E,iEAAiE;QACjE,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,GAAG,CACpC,iBAAiB,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAC7D,CAAC;QAEF,wEAAwE;QACxE,MAAM,iBAAiB,GAAgB,YAAY,CAAC,MAAM,CACxD,CAAC,CAAC,EAAkB,EAAE,CAAC,CAAC,KAAK,IAAI,CAClC,CAAC;QAEF,4EAA4E;QAC5E,OAAO,CAAC,GAAG,aAAa,EAAE,GAAG,iBAAiB,CAAC,CAAC;IAClD,CAAC;IAED;;;;;;;;;;;;;OAaG;IACK,kBAAkB,CACxB,MAAsB,EACtB,SAA8C;QAE9C,gEAAgE;QAChE,IAAI,MAAM,CAAC,MAAM;YAAE,OAAO,MAAM,CAAC;QAEjC,IAAI,CAAC,SAAS;YAAE,OAAO,MAAM,CAAC;QAE9B,2EAA2E;QAC3E,MAAM,WAAW,GAAG,SAAS,CAAC,GAAG,MAAM,CAAC,QAAQ,SAAS,CAAC,CAAC;QAC3D,IAAI,WAAW;YAAE,OAAO,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;QAE3D,mCAAmC;QACnC,MAAM,OAAO,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAC;QAC5C,IAAI,OAAO;YAAE,OAAO,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;QAEnD,0EAA0E;QAC1E,4EAA4E;QAC5E,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;;;;;;;;;;;;;;;OAiBG;IACK,YAAY,CAAC,QAAqB;QACxC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,iBAAiB,CAAC;QAEpD,yCAAyC;QACzC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAyB,CAAC;QACpD,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;YAC9B,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAClF,CAAC;QAED,gDAAgD;QAChD,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;aAChD,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;aACtC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC,CAAC;QAE9C,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC;QAC3D,OAAO,GAAG,QAAQ,CAAC,MAAM,IAAI,IAAI,WAAW,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;IACtE,CAAC;CACF"}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file PiiRedactionGuardrail.ts
|
|
3
|
+
* @description Guardrail service that intercepts agent input and/or output to
|
|
4
|
+
* detect and redact PII (Personally Identifiable Information) in real time.
|
|
5
|
+
*
|
|
6
|
+
* The guardrail integrates with the AgentOS guardrail hook pipeline via the
|
|
7
|
+
* {@link IGuardrailService} interface, providing two evaluation paths:
|
|
8
|
+
*
|
|
9
|
+
* - **Input evaluation** (`evaluateInput`): Scans the user's text input before
|
|
10
|
+
* it enters the orchestration pipeline and returns a SANITIZE action with
|
|
11
|
+
* redacted text when PII is found.
|
|
12
|
+
*
|
|
13
|
+
* - **Output evaluation** (`evaluateOutput`): Uses a sentence-boundary buffer
|
|
14
|
+
* keyed by `streamId` to accumulate streaming text deltas. When a sentence
|
|
15
|
+
* boundary is detected (`. `, `? `, `! `, or `\n`) the buffer is scanned
|
|
16
|
+
* for PII and redacted text is returned as a SANITIZE action. Entity
|
|
17
|
+
* offsets are always relative to the buffer, not individual chunks.
|
|
18
|
+
*
|
|
19
|
+
* Which evaluation path(s) are active is controlled by the
|
|
20
|
+
* {@link PiiRedactionPackOptions.guardrailScope} option:
|
|
21
|
+
* - `'input'` -- only `evaluateInput` is active
|
|
22
|
+
* - `'output'` -- only `evaluateOutput` is active
|
|
23
|
+
* - `'both'` -- both paths are active (default)
|
|
24
|
+
*
|
|
25
|
+
* @module pii-redaction/PiiRedactionGuardrail
|
|
26
|
+
*/
|
|
27
|
+
import type { ISharedServiceRegistry } from '../../ISharedServiceRegistry';
|
|
28
|
+
import type { IGuardrailService, GuardrailConfig, GuardrailInputPayload, GuardrailOutputPayload, GuardrailEvaluationResult } from '../../../core/guardrails/IGuardrailService';
|
|
29
|
+
import type { PiiRedactionPackOptions } from './types';
|
|
30
|
+
/**
|
|
31
|
+
* AgentOS guardrail service that detects and redacts PII from both inbound
|
|
32
|
+
* user messages and outbound agent responses.
|
|
33
|
+
*
|
|
34
|
+
* ### Construction
|
|
35
|
+
* ```ts
|
|
36
|
+
* const guardrail = new PiiRedactionGuardrail(registry, options, getSecret);
|
|
37
|
+
* ```
|
|
38
|
+
*
|
|
39
|
+
* ### Thread safety
|
|
40
|
+
* The guardrail maintains per-stream mutable state for output evaluation.
|
|
41
|
+
* Concurrent calls with **different** `streamId` values are safe. Concurrent
|
|
42
|
+
* calls with the **same** `streamId` are serialised by the AgentOS streaming
|
|
43
|
+
* pipeline so no additional locking is required.
|
|
44
|
+
*
|
|
45
|
+
* @implements {IGuardrailService}
|
|
46
|
+
*/
|
|
47
|
+
export declare class PiiRedactionGuardrail implements IGuardrailService {
|
|
48
|
+
/**
|
|
49
|
+
* Guardrail configuration exposed to the AgentOS hook pipeline.
|
|
50
|
+
* Controls whether streaming chunks are evaluated and the per-request
|
|
51
|
+
* evaluation cap.
|
|
52
|
+
*/
|
|
53
|
+
readonly config: GuardrailConfig;
|
|
54
|
+
/** Detection pipeline shared across all evaluations. */
|
|
55
|
+
private readonly pipeline;
|
|
56
|
+
/** Redaction style applied when replacing detected PII spans. */
|
|
57
|
+
private readonly redactionStyle;
|
|
58
|
+
/** Which evaluation paths are active: 'input', 'output', or 'both'. */
|
|
59
|
+
private readonly scope;
|
|
60
|
+
/** Maximum sentence-boundary evaluations per stream. */
|
|
61
|
+
private readonly maxStreamingEvaluations;
|
|
62
|
+
/**
|
|
63
|
+
* Per-stream sentence-boundary buffers for output evaluation.
|
|
64
|
+
* Keys are `AgentOSResponseChunk.streamId` strings.
|
|
65
|
+
*/
|
|
66
|
+
private readonly streamBuffers;
|
|
67
|
+
/**
|
|
68
|
+
* Construct a new PiiRedactionGuardrail.
|
|
69
|
+
*
|
|
70
|
+
* @param services - Shared service registry forwarded to the detection
|
|
71
|
+
* pipeline for lazy-loading NLP/NER models.
|
|
72
|
+
* @param options - Pack-level configuration controlling entity types,
|
|
73
|
+
* confidence threshold, redaction style, guardrail scope,
|
|
74
|
+
* and streaming behaviour.
|
|
75
|
+
* @param getSecret - Optional secret resolver for the LLM judge API key.
|
|
76
|
+
*/
|
|
77
|
+
constructor(services: ISharedServiceRegistry, options: PiiRedactionPackOptions, getSecret?: (id: string) => string | undefined);
|
|
78
|
+
/**
|
|
79
|
+
* Evaluate inbound user text for PII before the orchestration pipeline
|
|
80
|
+
* processes it.
|
|
81
|
+
*
|
|
82
|
+
* When PII is found the method returns a {@link GuardrailAction.SANITIZE}
|
|
83
|
+
* result containing the redacted text. When no PII is found (or the
|
|
84
|
+
* input has no text) it returns `null` to signal the content should pass
|
|
85
|
+
* through unchanged.
|
|
86
|
+
*
|
|
87
|
+
* This method is a no-op (returns `null`) when `guardrailScope` is set
|
|
88
|
+
* to `'output'`.
|
|
89
|
+
*
|
|
90
|
+
* @param payload - Input payload containing the user's text and context.
|
|
91
|
+
* @returns Evaluation result with redacted text, or `null` if clean.
|
|
92
|
+
*/
|
|
93
|
+
evaluateInput(payload: GuardrailInputPayload): Promise<GuardrailEvaluationResult | null>;
|
|
94
|
+
/**
|
|
95
|
+
* Evaluate outbound agent response chunks for PII using a sentence-boundary
|
|
96
|
+
* buffer.
|
|
97
|
+
*
|
|
98
|
+
* ### Buffering strategy
|
|
99
|
+
*
|
|
100
|
+
* Text deltas are accumulated per-stream in an internal buffer. The buffer
|
|
101
|
+
* is scanned for PII only when a sentence boundary is detected (`. `, `? `,
|
|
102
|
+
* `! `, or `\n`) or when the stream ends (`isFinal === true` or chunk type
|
|
103
|
+
* is `FINAL_RESPONSE`).
|
|
104
|
+
*
|
|
105
|
+
* Entity offsets from the detection pipeline are relative to the **buffer**
|
|
106
|
+
* text, not individual chunk deltas, so redaction replacement is always
|
|
107
|
+
* positionally correct.
|
|
108
|
+
*
|
|
109
|
+
* An internal evaluation counter enforces {@link maxStreamingEvaluations}
|
|
110
|
+
* per stream. Once the limit is reached subsequent chunks pass through
|
|
111
|
+
* unevaluated.
|
|
112
|
+
*
|
|
113
|
+
* This method is a no-op (returns `null`) when `guardrailScope` is set
|
|
114
|
+
* to `'input'`.
|
|
115
|
+
*
|
|
116
|
+
* @param payload - Output payload containing the response chunk and context.
|
|
117
|
+
* @returns Evaluation result with redacted buffer text, or `null` if clean.
|
|
118
|
+
*/
|
|
119
|
+
evaluateOutput(payload: GuardrailOutputPayload): Promise<GuardrailEvaluationResult | null>;
|
|
120
|
+
}
|
|
121
|
+
//# sourceMappingURL=PiiRedactionGuardrail.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"PiiRedactionGuardrail.d.ts","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/PiiRedactionGuardrail.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AAEH,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AAC3E,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,yBAAyB,EAC1B,MAAM,4CAA4C,CAAC;AAGpD,OAAO,KAAK,EAAE,uBAAuB,EAAkB,MAAM,SAAS,CAAC;AAoDvE;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,qBAAsB,YAAW,iBAAiB;IAK7D;;;;OAIG;IACH,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC;IAMjC,wDAAwD;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAuB;IAEhD,iEAAiE;IACjE,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAiB;IAEhD,uEAAuE;IACvE,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA8B;IAEpD,wDAAwD;IACxD,OAAO,CAAC,QAAQ,CAAC,uBAAuB,CAAS;IAEjD;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAkC;IAMhE;;;;;;;;;OASG;gBAED,QAAQ,EAAE,sBAAsB,EAChC,OAAO,EAAE,uBAAuB,EAChC,SAAS,CAAC,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,MAAM,GAAG,SAAS;IAsBhD;;;;;;;;;;;;;;OAcG;IACG,aAAa,CACjB,OAAO,EAAE,qBAAqB,GAC7B,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;IA0C5C;;;;;;;;;;;;;;;;;;;;;;;;OAwBG;IACG,cAAc,CAClB,OAAO,EAAE,sBAAsB,GAC9B,OAAO,CAAC,yBAAyB,GAAG,IAAI,CAAC;CA8G7C"}
|