@framers/agentos 0.1.56 → 0.1.57
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +67 -16
- package/dist/api/types/AgentOSResponse.d.ts +7 -0
- package/dist/api/types/AgentOSResponse.d.ts.map +1 -1
- package/dist/core/guardrails/IGuardrailService.d.ts +39 -0
- package/dist/core/guardrails/IGuardrailService.d.ts.map +1 -1
- package/dist/core/guardrails/ParallelGuardrailDispatcher.d.ts +92 -0
- package/dist/core/guardrails/ParallelGuardrailDispatcher.d.ts.map +1 -0
- package/dist/core/guardrails/ParallelGuardrailDispatcher.js +463 -0
- package/dist/core/guardrails/ParallelGuardrailDispatcher.js.map +1 -0
- package/dist/core/guardrails/guardrailDispatcher.d.ts +59 -1
- package/dist/core/guardrails/guardrailDispatcher.d.ts.map +1 -1
- package/dist/core/guardrails/guardrailDispatcher.js +49 -154
- package/dist/core/guardrails/guardrailDispatcher.js.map +1 -1
- package/dist/core/guardrails/index.d.ts +1 -0
- package/dist/core/guardrails/index.d.ts.map +1 -1
- package/dist/core/guardrails/index.js +2 -0
- package/dist/core/guardrails/index.js.map +1 -1
- package/dist/core/utils/index.d.ts +13 -0
- package/dist/core/utils/index.d.ts.map +1 -0
- package/dist/core/utils/index.js +13 -0
- package/dist/core/utils/index.js.map +1 -0
- package/dist/core/utils/text-utils.d.ts +164 -0
- package/dist/core/utils/text-utils.d.ts.map +1 -0
- package/dist/core/utils/text-utils.js +254 -0
- package/dist/core/utils/text-utils.js.map +1 -0
- package/dist/extensions/index.d.ts +0 -1
- package/dist/extensions/index.d.ts.map +1 -1
- package/dist/extensions/index.js +0 -2
- package/dist/extensions/index.js.map +1 -1
- package/package.json +1 -6
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts +0 -127
- package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/EntityMerger.js +0 -263
- package/dist/extensions/packs/pii-redaction/EntityMerger.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts +0 -199
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js +0 -456
- package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts +0 -121
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js +0 -271
- package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts +0 -61
- package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js +0 -207
- package/dist/extensions/packs/pii-redaction/RedactionEngine.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/index.d.ts +0 -90
- package/dist/extensions/packs/pii-redaction/index.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/index.js +0 -195
- package/dist/extensions/packs/pii-redaction/index.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts +0 -151
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js +0 -14
- package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts +0 -177
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js +0 -420
- package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts +0 -145
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js +0 -299
- package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts +0 -102
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js +0 -228
- package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts +0 -103
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js +0 -275
- package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts +0 -118
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js +0 -152
- package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts +0 -98
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js +0 -153
- package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js.map +0 -1
- package/dist/extensions/packs/pii-redaction/types.d.ts +0 -332
- package/dist/extensions/packs/pii-redaction/types.d.ts.map +0 -1
- package/dist/extensions/packs/pii-redaction/types.js +0 -83
- package/dist/extensions/packs/pii-redaction/types.js.map +0 -1
|
@@ -1,420 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file LlmJudgeRecognizer.ts
|
|
3
|
-
* @description Tier 4 LLM-powered judge that re-examines individual PII
|
|
4
|
-
* entity candidates using a chain-of-thought (CoT) prompt.
|
|
5
|
-
*
|
|
6
|
-
* Unlike the other recognisers, this class does **not** implement
|
|
7
|
-
* {@link IEntityRecognizer} because it operates on already-detected entities
|
|
8
|
-
* rather than raw text. Its primary entry point is
|
|
9
|
-
* {@link LlmJudgeRecognizer.judge}, which takes a single {@link PiiEntity}
|
|
10
|
-
* plus the surrounding full text and returns either a confirmed/reclassified
|
|
11
|
-
* entity or `null` if the LLM determines the span is not PII.
|
|
12
|
-
*
|
|
13
|
-
* ### Key features
|
|
14
|
-
* - **Chain-of-thought prompt**: Forces the LLM to reason before classifying,
|
|
15
|
-
* improving accuracy on ambiguous spans.
|
|
16
|
-
* - **LRU cache**: Keyed by `(span_text, context_hash)` to avoid redundant
|
|
17
|
-
* calls for identical spans in similar contexts.
|
|
18
|
-
* - **Semaphore**: Limits concurrent LLM requests to prevent rate-limit
|
|
19
|
-
* exhaustion on high-throughput agents.
|
|
20
|
-
* - **Fail-open**: If the LLM call fails for any reason, the original entity
|
|
21
|
-
* is returned unchanged (conservative — prefer false positive over leak).
|
|
22
|
-
*
|
|
23
|
-
* @module pii-redaction/recognizers
|
|
24
|
-
*/
|
|
25
|
-
// ---------------------------------------------------------------------------
|
|
26
|
-
// LRU cache implementation
|
|
27
|
-
// ---------------------------------------------------------------------------
|
|
28
|
-
/**
|
|
29
|
-
* Simple LRU (Least Recently Used) cache backed by a `Map`.
|
|
30
|
-
*
|
|
31
|
-
* Leverages the insertion-order guarantee of ES2015 Maps: the least recently
|
|
32
|
-
* used entry is always the first entry in iteration order. On each `get`,
|
|
33
|
-
* the entry is deleted and re-inserted to move it to the "most recent" end.
|
|
34
|
-
*
|
|
35
|
-
* @typeParam V - The cached value type.
|
|
36
|
-
*/
|
|
37
|
-
class LruCache {
|
|
38
|
-
/**
|
|
39
|
-
* @param maxSize - Maximum cache capacity.
|
|
40
|
-
*/
|
|
41
|
-
constructor(maxSize) {
|
|
42
|
-
/** Internal ordered map storage. */
|
|
43
|
-
this.map = new Map();
|
|
44
|
-
this.maxSize = maxSize;
|
|
45
|
-
}
|
|
46
|
-
/**
|
|
47
|
-
* Retrieve a cached value, promoting it to most-recently-used.
|
|
48
|
-
*
|
|
49
|
-
* @param key - Cache key.
|
|
50
|
-
* @returns The cached value, or `undefined` if not present.
|
|
51
|
-
*/
|
|
52
|
-
get(key) {
|
|
53
|
-
const value = this.map.get(key);
|
|
54
|
-
if (value === undefined)
|
|
55
|
-
return undefined;
|
|
56
|
-
// Move to most-recently-used position by deleting and re-inserting.
|
|
57
|
-
this.map.delete(key);
|
|
58
|
-
this.map.set(key, value);
|
|
59
|
-
return value;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* Insert or update a value, evicting the oldest entry if at capacity.
|
|
63
|
-
*
|
|
64
|
-
* @param key - Cache key.
|
|
65
|
-
* @param value - Value to cache.
|
|
66
|
-
*/
|
|
67
|
-
set(key, value) {
|
|
68
|
-
// If the key already exists, delete it first to reset its position.
|
|
69
|
-
if (this.map.has(key)) {
|
|
70
|
-
this.map.delete(key);
|
|
71
|
-
}
|
|
72
|
-
this.map.set(key, value);
|
|
73
|
-
// Evict the oldest entry if we've exceeded capacity.
|
|
74
|
-
if (this.map.size > this.maxSize) {
|
|
75
|
-
const oldestKey = this.map.keys().next().value;
|
|
76
|
-
this.map.delete(oldestKey);
|
|
77
|
-
}
|
|
78
|
-
}
|
|
79
|
-
/** Current number of entries in the cache. */
|
|
80
|
-
get size() {
|
|
81
|
-
return this.map.size;
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
// ---------------------------------------------------------------------------
|
|
85
|
-
// Semaphore implementation
|
|
86
|
-
// ---------------------------------------------------------------------------
|
|
87
|
-
/**
|
|
88
|
-
* Counting semaphore for limiting concurrent async operations.
|
|
89
|
-
*
|
|
90
|
-
* Callers {@link acquire} a permit before starting work and {@link release}
|
|
91
|
-
* it when done. If all permits are taken, `acquire()` returns a promise
|
|
92
|
-
* that resolves once a permit becomes available.
|
|
93
|
-
*/
|
|
94
|
-
class Semaphore {
|
|
95
|
-
/**
|
|
96
|
-
* @param maxConcurrency - Maximum number of simultaneous permits.
|
|
97
|
-
*/
|
|
98
|
-
constructor(maxConcurrency) {
|
|
99
|
-
/** Queue of waiters blocked on permit acquisition. */
|
|
100
|
-
this.waiters = [];
|
|
101
|
-
this.permits = maxConcurrency;
|
|
102
|
-
}
|
|
103
|
-
/**
|
|
104
|
-
* Acquire a permit. Resolves immediately if a permit is available,
|
|
105
|
-
* otherwise blocks until one is released.
|
|
106
|
-
*/
|
|
107
|
-
async acquire() {
|
|
108
|
-
if (this.permits > 0) {
|
|
109
|
-
this.permits--;
|
|
110
|
-
return;
|
|
111
|
-
}
|
|
112
|
-
// No permits available — queue a waiter.
|
|
113
|
-
return new Promise((resolve) => {
|
|
114
|
-
this.waiters.push(resolve);
|
|
115
|
-
});
|
|
116
|
-
}
|
|
117
|
-
/**
|
|
118
|
-
* Release a permit, waking the next queued waiter if any.
|
|
119
|
-
*/
|
|
120
|
-
release() {
|
|
121
|
-
const next = this.waiters.shift();
|
|
122
|
-
if (next) {
|
|
123
|
-
// Hand the permit directly to the next waiter.
|
|
124
|
-
next();
|
|
125
|
-
}
|
|
126
|
-
else {
|
|
127
|
-
this.permits++;
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
}
|
|
131
|
-
// ---------------------------------------------------------------------------
|
|
132
|
-
// System prompt
|
|
133
|
-
// ---------------------------------------------------------------------------
|
|
134
|
-
/**
|
|
135
|
-
* System prompt that instructs the LLM to perform chain-of-thought PII
|
|
136
|
-
* classification. The prompt requests a strict JSON response format.
|
|
137
|
-
*/
|
|
138
|
-
const SYSTEM_PROMPT = `You are a PII (Personally Identifiable Information) classification expert.
|
|
139
|
-
|
|
140
|
-
Your task: given a text span that was flagged as potential PII, determine whether it truly contains PII that should be redacted.
|
|
141
|
-
|
|
142
|
-
Think step-by-step:
|
|
143
|
-
1. What is the span text?
|
|
144
|
-
2. What is the surrounding context?
|
|
145
|
-
3. Is this genuinely identifying information about a real person, or is it generic/fictional/public knowledge?
|
|
146
|
-
4. What specific PII type does it represent?
|
|
147
|
-
|
|
148
|
-
Respond with ONLY a valid JSON object (no markdown, no explanation outside the JSON):
|
|
149
|
-
{
|
|
150
|
-
"isPii": true/false,
|
|
151
|
-
"entityType": "PERSON" | "ORGANIZATION" | "LOCATION" | "EMAIL" | "PHONE" | "SSN" | "CREDIT_CARD" | "IP_ADDRESS" | "DATE_OF_BIRTH" | "API_KEY" | "AWS_KEY" | "CRYPTO_ADDRESS" | "IBAN" | "PASSPORT" | "DRIVERS_LICENSE" | "GOV_ID" | "MEDICAL_TERM" | "UNKNOWN_PII" | "NOT_PII",
|
|
152
|
-
"confidence": 0.0-1.0,
|
|
153
|
-
"reasoning": "brief explanation of your classification"
|
|
154
|
-
}`;
|
|
155
|
-
// ---------------------------------------------------------------------------
|
|
156
|
-
// LlmJudgeRecognizer
|
|
157
|
-
// ---------------------------------------------------------------------------
|
|
158
|
-
/**
|
|
159
|
-
* Tier 4 LLM-powered judge that confirms or reclassifies individual PII
|
|
160
|
-
* entity candidates using chain-of-thought reasoning.
|
|
161
|
-
*
|
|
162
|
-
* ### Usage pattern
|
|
163
|
-
* ```ts
|
|
164
|
-
* const judge = new LlmJudgeRecognizer(config);
|
|
165
|
-
* const result = await judge.judge(candidateEntity, fullText);
|
|
166
|
-
* if (result === null) {
|
|
167
|
-
* // LLM says it's not PII — discard the candidate.
|
|
168
|
-
* } else {
|
|
169
|
-
* // Use the confirmed/reclassified entity.
|
|
170
|
-
* }
|
|
171
|
-
* ```
|
|
172
|
-
*
|
|
173
|
-
* ### Caching
|
|
174
|
-
* Results are cached by a composite key of the span text and a hash of
|
|
175
|
-
* the surrounding context. This means identical spans in similar contexts
|
|
176
|
-
* won't trigger redundant LLM calls.
|
|
177
|
-
*
|
|
178
|
-
* ### Concurrency control
|
|
179
|
-
* A counting semaphore limits the number of in-flight LLM requests to
|
|
180
|
-
* {@link LlmJudgeConfig.maxConcurrency} (default 3), preventing rate-limit
|
|
181
|
-
* errors when many entities are judged in parallel.
|
|
182
|
-
*
|
|
183
|
-
* ### Failure mode
|
|
184
|
-
* The judge is **fail-open**: if the LLM call fails (network error, invalid
|
|
185
|
-
* response, timeout), the original entity is returned unchanged. This is
|
|
186
|
-
* the conservative choice — a false positive (over-redaction) is preferable
|
|
187
|
-
* to leaking real PII.
|
|
188
|
-
*/
|
|
189
|
-
export class LlmJudgeRecognizer {
|
|
190
|
-
/**
|
|
191
|
-
* Construct a new LlmJudgeRecognizer.
|
|
192
|
-
*
|
|
193
|
-
* @param config - LLM provider/model configuration.
|
|
194
|
-
* @param fetchImpl - Optional injectable fetch function for testing.
|
|
195
|
-
* Defaults to the global `fetch`.
|
|
196
|
-
*/
|
|
197
|
-
constructor(config, fetchImpl) {
|
|
198
|
-
/** Human-readable name for logging/diagnostics. */
|
|
199
|
-
this.name = 'LlmJudgeRecognizer';
|
|
200
|
-
this.config = config;
|
|
201
|
-
this.cache = new LruCache(config.cacheSize ?? 256);
|
|
202
|
-
this.semaphore = new Semaphore(config.maxConcurrency ?? 3);
|
|
203
|
-
this.fetchImpl = fetchImpl ?? globalThis.fetch;
|
|
204
|
-
}
|
|
205
|
-
/**
|
|
206
|
-
* Judge a single PII entity candidate in context.
|
|
207
|
-
*
|
|
208
|
-
* @param entity - The candidate entity to evaluate.
|
|
209
|
-
* @param fullText - The full text from which the entity was extracted,
|
|
210
|
-
* providing the LLM with surrounding context.
|
|
211
|
-
* @returns The confirmed/reclassified entity, or `null` if the LLM
|
|
212
|
-
* determines the span is not PII.
|
|
213
|
-
*/
|
|
214
|
-
async judge(entity, fullText) {
|
|
215
|
-
// Build the cache key from the span text and a hash of the context.
|
|
216
|
-
const cacheKey = this.buildCacheKey(entity.text, fullText);
|
|
217
|
-
// Check cache first.
|
|
218
|
-
const cached = this.cache.get(cacheKey);
|
|
219
|
-
if (cached !== undefined) {
|
|
220
|
-
return cached;
|
|
221
|
-
}
|
|
222
|
-
// Acquire a semaphore permit to respect concurrency limits.
|
|
223
|
-
await this.semaphore.acquire();
|
|
224
|
-
try {
|
|
225
|
-
// Build the user prompt with the span and its context.
|
|
226
|
-
const userPrompt = this.buildUserPrompt(entity, fullText);
|
|
227
|
-
// Call the LLM via OpenAI-compatible chat completions API.
|
|
228
|
-
const judgement = await this.callLlm(userPrompt);
|
|
229
|
-
// Process the LLM's judgement.
|
|
230
|
-
const result = this.processJudgement(judgement, entity);
|
|
231
|
-
// Cache the result.
|
|
232
|
-
this.cache.set(cacheKey, result);
|
|
233
|
-
return result;
|
|
234
|
-
}
|
|
235
|
-
catch {
|
|
236
|
-
// Fail-open: if the LLM call fails, return the original entity
|
|
237
|
-
// unchanged to avoid accidentally leaking PII.
|
|
238
|
-
return entity;
|
|
239
|
-
}
|
|
240
|
-
finally {
|
|
241
|
-
// Always release the semaphore permit.
|
|
242
|
-
this.semaphore.release();
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
// -----------------------------------------------------------------------
|
|
246
|
-
// Private helpers
|
|
247
|
-
// -----------------------------------------------------------------------
|
|
248
|
-
/**
|
|
249
|
-
* Builds the cache key from the span text and a simple hash of the
|
|
250
|
-
* surrounding context.
|
|
251
|
-
*
|
|
252
|
-
* The context hash uses a fast DJB2 hash — sufficient for cache-key
|
|
253
|
-
* purposes without requiring a cryptographic hash function.
|
|
254
|
-
*
|
|
255
|
-
* @param spanText - The matched text span.
|
|
256
|
-
* @param context - The full surrounding text.
|
|
257
|
-
* @returns A composite cache key string.
|
|
258
|
-
*/
|
|
259
|
-
buildCacheKey(spanText, context) {
|
|
260
|
-
const contextHash = this.djb2Hash(context);
|
|
261
|
-
return `${spanText}::${contextHash}`;
|
|
262
|
-
}
|
|
263
|
-
/**
|
|
264
|
-
* DJB2 hash function by Daniel J. Bernstein.
|
|
265
|
-
* Fast, non-cryptographic hash suitable for hash-table keys.
|
|
266
|
-
*
|
|
267
|
-
* @param str - String to hash.
|
|
268
|
-
* @returns Hex string representation of the hash.
|
|
269
|
-
*/
|
|
270
|
-
djb2Hash(str) {
|
|
271
|
-
let hash = 5381;
|
|
272
|
-
for (let i = 0; i < str.length; i++) {
|
|
273
|
-
// hash * 33 + charCode
|
|
274
|
-
hash = ((hash << 5) + hash + str.charCodeAt(i)) | 0;
|
|
275
|
-
}
|
|
276
|
-
return (hash >>> 0).toString(16);
|
|
277
|
-
}
|
|
278
|
-
/**
|
|
279
|
-
* Builds the user prompt containing the span text and its surrounding
|
|
280
|
-
* context for the LLM to evaluate.
|
|
281
|
-
*
|
|
282
|
-
* @param entity - The candidate entity.
|
|
283
|
-
* @param fullText - The full text for context.
|
|
284
|
-
* @returns The formatted user prompt string.
|
|
285
|
-
*/
|
|
286
|
-
buildUserPrompt(entity, fullText) {
|
|
287
|
-
// Extract a window of context around the entity for the LLM.
|
|
288
|
-
const contextWindow = 200; // characters on each side
|
|
289
|
-
const ctxStart = Math.max(0, entity.start - contextWindow);
|
|
290
|
-
const ctxEnd = Math.min(fullText.length, entity.end + contextWindow);
|
|
291
|
-
const surroundingContext = fullText.slice(ctxStart, ctxEnd);
|
|
292
|
-
return [
|
|
293
|
-
`Span text: "${entity.text}"`,
|
|
294
|
-
`Current classification: ${entity.entityType} (confidence: ${entity.score.toFixed(2)})`,
|
|
295
|
-
`Source tier: ${entity.source}`,
|
|
296
|
-
`Surrounding context: "${surroundingContext}"`,
|
|
297
|
-
].join('\n');
|
|
298
|
-
}
|
|
299
|
-
/**
|
|
300
|
-
* Calls the LLM via OpenAI-compatible chat completions API.
|
|
301
|
-
*
|
|
302
|
-
* Uses raw `fetch` (no SDK dependency) to keep the extension lightweight
|
|
303
|
-
* and to support any OpenAI-compatible endpoint (OpenRouter, vLLM, etc.).
|
|
304
|
-
*
|
|
305
|
-
* @param userPrompt - The user message content.
|
|
306
|
-
* @returns Parsed LLM judgement response.
|
|
307
|
-
* @throws If the API call fails or the response is not valid JSON.
|
|
308
|
-
*/
|
|
309
|
-
async callLlm(userPrompt) {
|
|
310
|
-
const baseUrl = this.config.baseUrl ?? 'https://api.openai.com/v1';
|
|
311
|
-
const apiKey = this.config.apiKey ?? '';
|
|
312
|
-
const response = await this.fetchImpl(`${baseUrl}/chat/completions`, {
|
|
313
|
-
method: 'POST',
|
|
314
|
-
headers: {
|
|
315
|
-
'Content-Type': 'application/json',
|
|
316
|
-
Authorization: `Bearer ${apiKey}`,
|
|
317
|
-
},
|
|
318
|
-
body: JSON.stringify({
|
|
319
|
-
model: this.config.model,
|
|
320
|
-
messages: [
|
|
321
|
-
{ role: 'system', content: SYSTEM_PROMPT },
|
|
322
|
-
{ role: 'user', content: userPrompt },
|
|
323
|
-
],
|
|
324
|
-
temperature: 0.1, // Low temperature for deterministic classification
|
|
325
|
-
max_tokens: 300,
|
|
326
|
-
}),
|
|
327
|
-
});
|
|
328
|
-
if (!response.ok) {
|
|
329
|
-
throw new Error(`LLM API returned status ${response.status}`);
|
|
330
|
-
}
|
|
331
|
-
const data = await response.json();
|
|
332
|
-
// Extract the content string from the chat completion response.
|
|
333
|
-
const content = data.choices?.[0]?.message?.content ?? '';
|
|
334
|
-
// Parse the JSON response, handling potential markdown fences.
|
|
335
|
-
return this.parseJudgement(content);
|
|
336
|
-
}
|
|
337
|
-
/**
|
|
338
|
-
* Parses the LLM's response content into a structured judgement object.
|
|
339
|
-
*
|
|
340
|
-
* Handles common LLM response quirks:
|
|
341
|
-
* - Markdown code fences around JSON
|
|
342
|
-
* - Leading/trailing whitespace
|
|
343
|
-
*
|
|
344
|
-
* @param content - Raw response content string from the LLM.
|
|
345
|
-
* @returns Parsed {@link LlmJudgement}.
|
|
346
|
-
* @throws If the content cannot be parsed as valid JSON.
|
|
347
|
-
*/
|
|
348
|
-
parseJudgement(content) {
|
|
349
|
-
// Strip markdown code fences if present.
|
|
350
|
-
let cleaned = content.trim();
|
|
351
|
-
if (cleaned.startsWith('```')) {
|
|
352
|
-
// Remove opening fence (```json or ```)
|
|
353
|
-
cleaned = cleaned.replace(/^```(?:json)?\s*/, '');
|
|
354
|
-
// Remove closing fence
|
|
355
|
-
cleaned = cleaned.replace(/\s*```$/, '');
|
|
356
|
-
}
|
|
357
|
-
return JSON.parse(cleaned);
|
|
358
|
-
}
|
|
359
|
-
/**
|
|
360
|
-
* Processes the LLM's judgement and returns the appropriate result.
|
|
361
|
-
*
|
|
362
|
-
* - If the LLM says the span is NOT PII, returns `null`.
|
|
363
|
-
* - If the LLM confirms PII, returns an updated entity with the LLM's
|
|
364
|
-
* classification and confidence, preserving the original values in
|
|
365
|
-
* metadata for audit.
|
|
366
|
-
*
|
|
367
|
-
* @param judgement - The parsed LLM response.
|
|
368
|
-
* @param original - The original candidate entity.
|
|
369
|
-
* @returns Updated entity or `null`.
|
|
370
|
-
*/
|
|
371
|
-
processJudgement(judgement, original) {
|
|
372
|
-
// If the LLM says it's not PII, discard the entity.
|
|
373
|
-
if (!judgement.isPii || judgement.entityType === 'NOT_PII') {
|
|
374
|
-
return null;
|
|
375
|
-
}
|
|
376
|
-
// Map the LLM's entity type string to our PiiEntityType, falling back
|
|
377
|
-
// to the original type if the LLM returns something unexpected.
|
|
378
|
-
const newEntityType = this.mapLlmEntityType(judgement.entityType, original.entityType);
|
|
379
|
-
return {
|
|
380
|
-
entityType: newEntityType,
|
|
381
|
-
text: original.text,
|
|
382
|
-
start: original.start,
|
|
383
|
-
end: original.end,
|
|
384
|
-
score: judgement.confidence,
|
|
385
|
-
source: 'llm',
|
|
386
|
-
metadata: {
|
|
387
|
-
...original.metadata,
|
|
388
|
-
llmReasoning: judgement.reasoning,
|
|
389
|
-
llmModel: this.config.model,
|
|
390
|
-
originalEntityType: original.entityType,
|
|
391
|
-
originalScore: original.score,
|
|
392
|
-
originalSource: original.source,
|
|
393
|
-
},
|
|
394
|
-
};
|
|
395
|
-
}
|
|
396
|
-
/**
|
|
397
|
-
* Maps the LLM's entity type string to our {@link PiiEntityType}.
|
|
398
|
-
*
|
|
399
|
-
* If the LLM returns a recognised type string, it is used directly.
|
|
400
|
-
* Otherwise, the original entity type is preserved.
|
|
401
|
-
*
|
|
402
|
-
* @param llmType - Entity type string from the LLM response.
|
|
403
|
-
* @param fallback - Original entity type to use as fallback.
|
|
404
|
-
* @returns Resolved PiiEntityType.
|
|
405
|
-
*/
|
|
406
|
-
mapLlmEntityType(llmType, fallback) {
|
|
407
|
-
// List of all valid PiiEntityType values for validation.
|
|
408
|
-
const validTypes = new Set([
|
|
409
|
-
'SSN', 'CREDIT_CARD', 'EMAIL', 'PHONE', 'IP_ADDRESS', 'IBAN',
|
|
410
|
-
'PASSPORT', 'DRIVERS_LICENSE', 'GOV_ID', 'DATE_OF_BIRTH', 'API_KEY',
|
|
411
|
-
'AWS_KEY', 'CRYPTO_ADDRESS', 'PERSON', 'ORGANIZATION', 'LOCATION',
|
|
412
|
-
'MEDICAL_TERM', 'UNKNOWN_PII',
|
|
413
|
-
]);
|
|
414
|
-
if (validTypes.has(llmType)) {
|
|
415
|
-
return llmType;
|
|
416
|
-
}
|
|
417
|
-
return fallback;
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
//# sourceMappingURL=LlmJudgeRecognizer.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"LlmJudgeRecognizer.js","sourceRoot":"","sources":["../../../../../src/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E;;;;;;;;GAQG;AACH,MAAM,QAAQ;IAOZ;;OAEG;IACH,YAAY,OAAe;QAT3B,oCAAoC;QACnB,QAAG,GAAG,IAAI,GAAG,EAAa,CAAC;QAS1C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED;;;;;OAKG;IACH,GAAG,CAAC,GAAW;QACb,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAChC,IAAI,KAAK,KAAK,SAAS;YAAE,OAAO,SAAS,CAAC;QAE1C,oEAAoE;QACpE,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACrB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QACzB,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;OAKG;IACH,GAAG,CAAC,GAAW,EAAE,KAAQ;QACvB,oEAAoE;QACpE,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;QAED,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;QAEzB,qDAAqD;QACrD,IAAI,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAe,CAAC;YACzD,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,IAAI,IAAI;QACN,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC;IACvB,CAAC;CACF;AAED,8EAA8E;AAC9E,2BAA2B;AAC3B,8EAA8E;AAE9E;;;;;;GAMG;AACH,MAAM,SAAS;IAOb;;OAEG;IACH,YAAY,cAAsB;QANlC,sDAAsD;QACrC,YAAO,GAAsB,EAAE,CAAC;QAM/C,IAAI,CAAC,OAAO,GAAG,cAAc,CAAC;IAChC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO;QACT,CAAC;QAED,yCAAyC;QACzC,OAAO,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE;YACnC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7B,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,OAAO;QACL,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;QAClC,IAAI,IAAI,EAAE,CAAC;YACT,+CAA+C;YAC/C,IAAI,EAAE,CAAC;QACT,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,OAAO,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;CACF;AAuBD,8EAA8E;AAC9E,gBAAgB;AAChB,8EAA8E;AAE9E;;;GAGG;AACH,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;;EAgBpB,CAAC;AAgBH,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAM,OAAO,kBAAkB;IAgB7B;;;;;;OAMG;IACH,YAAY,MAAsB,EAAE,SAAmB;QAtBvD,mDAAmD;QACnC,SAAI,GAAG,oBAAoB,CAAC;QAsB1C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,KAAK,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,SAAS,IAAI,GAAG,CAAC,CAAC;QACnD,IAAI,CAAC,SAAS,GAAG,IAAI,SAAS,CAAC,MAAM,CAAC,cAAc,IAAI,CAAC,CAAC,CAAC;QAC3D,IAAI,CAAC,SAAS,GAAG,SAAS,IAAK,UAAU,CAAC,KAA4B,CAAC;IACzE,CAAC;IAED;;;;;;;;OAQG;IACI,KAAK,CAAC,KAAK,CAAC,MAAiB,EAAE,QAAgB;QACpD,oEAAoE;QACpE,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QAE3D,qBAAqB;QACrB,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACxC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,4DAA4D;QAC5D,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QAE/B,IAAI,CAAC;YACH,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;YAE1D,2DAA2D;YAC3D,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;YAEjD,+BAA+B;YAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;YAExD,oBAAoB;YACpB,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAEjC,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACP,+DAA+D;YAC/D,+CAA+C;YAC/C,OAAO,MAAM,CAAC;QAChB,CAAC;gBAAS,CAAC;YACT,uCAAuC;YACvC,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,0EAA0E;IAC1E,kBAAkB;IAClB,0EAA0E;IAE1E;;;;;;;;;;OAUG;IACK,aAAa,CAAC,QAAgB,EAAE,OAAe;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC3C,OAAO,GAAG,QAAQ,KAAK,WAAW,EAAE,CAAC;IACvC,CAAC;IAED;;;;;;OAMG;IACK,QAAQ,CAAC,GAAW;QAC1B,IAAI,IAAI,GAAG,IAAI,CAAC;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,uBAAuB;YACvB,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QACtD,CAAC;QACD,OAAO,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;IACnC,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CAAC,MAAiB,EAAE,QAAgB;QACzD,6DAA6D;QAC7D,MAAM,aAAa,GAAG,GAAG,CAAC,CAAC,0BAA0B;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,GAAG,aAAa,CAAC,CAAC;QAC3D,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,GAAG,aAAa,CAAC,CAAC;QACrE,MAAM,kBAAkB,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;QAE5D,OAAO;YACL,eAAe,MAAM,CAAC,IAAI,GAAG;YAC7B,2BAA2B,MAAM,CAAC,UAAU,iBAAiB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;YACvF,gBAAgB,MAAM,CAAC,MAAM,EAAE;YAC/B,yBAAyB,kBAAkB,GAAG;SAC/C,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED;;;;;;;;;OASG;IACK,KAAK,CAAC,OAAO,CAAC,UAAkB;QACtC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,2BAA2B,CAAC;QACnE,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;QAExC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,OAAO,mBAAmB,EAAE;YACnE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,MAAM,EAAE;aAClC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;gBACnB,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;gBACxB,QAAQ,EAAE;oBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,aAAa,EAAE;oBAC1C,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE;iBACtC;gBACD,WAAW,EAAE,GAAG,EAAE,mDAAmD;gBACrE,UAAU,EAAE,GAAG;aAChB,CAAC;SACH,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,2BAA2B,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAChE,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAE/B,CAAC;QAEF,gEAAgE;QAChE,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;QAE1D,+DAA+D;QAC/D,OAAO,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IACtC,CAAC;IAED;;;;;;;;;;OAUG;IACK,cAAc,CAAC,OAAe;QACpC,yCAAyC;QACzC,IAAI,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAC9B,wCAAwC;YACxC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;YAClD,uBAAuB;YACvB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;QAC3C,CAAC;QAED,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAiB,CAAC;IAC7C,CAAC;IAED;;;;;;;;;;;OAWG;IACK,gBAAgB,CACtB,SAAuB,EACvB,QAAmB;QAEnB,oDAAoD;QACpD,IAAI,CAAC,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YAC3D,OAAO,IAAI,CAAC;QACd,CAAC;QAED,sEAAsE;QACtE,gEAAgE;QAChE,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,UAAU,EAAE,QAAQ,CAAC,UAAU,CAAC,CAAC;QAEvF,OAAO;YACL,UAAU,EAAE,aAAa;YACzB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;YACjB,KAAK,EAAE,SAAS,CAAC,UAAU;YAC3B,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE;gBACR,GAAG,QAAQ,CAAC,QAAQ;gBACpB,YAAY,EAAE,SAAS,CAAC,SAAS;gBACjC,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK;gBAC3B,kBAAkB,EAAE,QAAQ,CAAC,UAAU;gBACvC,aAAa,EAAE,QAAQ,CAAC,KAAK;gBAC7B,cAAc,EAAE,QAAQ,CAAC,MAAM;aAChC;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;OASG;IACK,gBAAgB,CAAC,OAAe,EAAE,QAAuB;QAC/D,yDAAyD;QACzD,MAAM,UAAU,GAAgB,IAAI,GAAG,CAAC;YACtC,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM;YAC5D,UAAU,EAAE,iBAAiB,EAAE,QAAQ,EAAE,eAAe,EAAE,SAAS;YACnE,SAAS,EAAE,gBAAgB,EAAE,QAAQ,EAAE,cAAc,EAAE,UAAU;YACjE,cAAc,EAAE,aAAa;SAC9B,CAAC,CAAC;QAEH,IAAI,UAAU,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,OAAwB,CAAC;QAClC,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF"}
|
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* @file NerModelRecognizer.ts
|
|
3
|
-
* @description Tier 3 NER-model recogniser that uses a HuggingFace
|
|
4
|
-
* Transformers pipeline for high-accuracy named-entity recognition.
|
|
5
|
-
*
|
|
6
|
-
* This recogniser loads a pre-trained BERT-style NER model via the
|
|
7
|
-
* `@huggingface/transformers` library and maps BIO-tagged outputs
|
|
8
|
-
* (B-PER, I-PER, B-LOC, I-LOC, B-ORG, I-ORG, B-MISC, I-MISC) to the
|
|
9
|
-
* pipeline's {@link PiiEntityType} values.
|
|
10
|
-
*
|
|
11
|
-
* The model is lazily loaded through the {@link ISharedServiceRegistry} so
|
|
12
|
-
* that only one instance exists per agent, and it is shared across any
|
|
13
|
-
* extensions that need NER capabilities.
|
|
14
|
-
*
|
|
15
|
-
* @module pii-redaction/recognizers
|
|
16
|
-
*/
|
|
17
|
-
import type { PiiEntity, PiiEntityType } from '../types';
|
|
18
|
-
import type { IEntityRecognizer, RecognizeOptions } from './IEntityRecognizer';
|
|
19
|
-
import type { ISharedServiceRegistry } from '../../../ISharedServiceRegistry';
|
|
20
|
-
/**
|
|
21
|
-
* Shape of a single token-level NER result from the HuggingFace
|
|
22
|
-
* transformers pipeline.
|
|
23
|
-
*/
|
|
24
|
-
export interface NerToken {
|
|
25
|
-
/** BIO-tagged entity label, e.g. `'B-PER'`, `'I-LOC'`, `'O'`. */
|
|
26
|
-
entity: string;
|
|
27
|
-
/** The sub-word or word text for this token. */
|
|
28
|
-
word: string;
|
|
29
|
-
/** Confidence score from the model (0–1). */
|
|
30
|
-
score: number;
|
|
31
|
-
/** Character start offset in the original input. */
|
|
32
|
-
start: number;
|
|
33
|
-
/** Character end offset in the original input. */
|
|
34
|
-
end: number;
|
|
35
|
-
}
|
|
36
|
-
/**
|
|
37
|
-
* Tier 3 entity recogniser that runs a HuggingFace BERT NER model for
|
|
38
|
-
* high-accuracy named-entity recognition.
|
|
39
|
-
*
|
|
40
|
-
* ### How it works
|
|
41
|
-
* 1. On first `recognize()` call, the `@huggingface/transformers` library is
|
|
42
|
-
* loaded and a `token-classification` pipeline is created via the shared
|
|
43
|
-
* service registry.
|
|
44
|
-
* 2. The pipeline tokenises the input and runs it through the NER model,
|
|
45
|
-
* returning BIO-tagged token predictions.
|
|
46
|
-
* 3. Contiguous BIO tokens are merged: a `B-PER` followed by `I-PER` tokens
|
|
47
|
-
* becomes a single PERSON entity. The final score is the average of the
|
|
48
|
-
* constituent token scores.
|
|
49
|
-
* 4. Merged entities are mapped to {@link PiiEntity} objects.
|
|
50
|
-
*
|
|
51
|
-
* ### Graceful degradation
|
|
52
|
-
* If `@huggingface/transformers` is not installed or the model fails to load,
|
|
53
|
-
* the recogniser sets `unavailable = true` and returns empty arrays on all
|
|
54
|
-
* subsequent calls, ensuring the pipeline degrades without crashing.
|
|
55
|
-
*
|
|
56
|
-
* @example
|
|
57
|
-
* ```ts
|
|
58
|
-
* const registry = new SharedServiceRegistry();
|
|
59
|
-
* const recognizer = new NerModelRecognizer(registry);
|
|
60
|
-
* const entities = await recognizer.recognize('John Smith lives in London');
|
|
61
|
-
* // entities: [{ entityType: 'PERSON', text: 'John Smith', ... },
|
|
62
|
-
* // { entityType: 'LOCATION', text: 'London', ... }]
|
|
63
|
-
* ```
|
|
64
|
-
*/
|
|
65
|
-
export declare class NerModelRecognizer implements IEntityRecognizer {
|
|
66
|
-
/** @inheritdoc */
|
|
67
|
-
readonly name = "NerModelRecognizer";
|
|
68
|
-
/** @inheritdoc */
|
|
69
|
-
readonly supportedEntities: PiiEntityType[];
|
|
70
|
-
/**
|
|
71
|
-
* When `true`, the transformers library or model failed to load and all
|
|
72
|
-
* future calls will return empty arrays.
|
|
73
|
-
*/
|
|
74
|
-
private unavailable;
|
|
75
|
-
/**
|
|
76
|
-
* Reference to the shared service registry for lazy-loading the NER
|
|
77
|
-
* pipeline.
|
|
78
|
-
*/
|
|
79
|
-
private readonly services;
|
|
80
|
-
/**
|
|
81
|
-
* Construct a new NerModelRecognizer.
|
|
82
|
-
*
|
|
83
|
-
* @param services - Shared service registry for lazy-loading the
|
|
84
|
-
* HuggingFace NER pipeline.
|
|
85
|
-
*/
|
|
86
|
-
constructor(services: ISharedServiceRegistry);
|
|
87
|
-
/**
|
|
88
|
-
* Scan the input text for named entities using a BERT NER model.
|
|
89
|
-
*
|
|
90
|
-
* BIO-tagged tokens are merged into contiguous entity spans and mapped
|
|
91
|
-
* to {@link PiiEntity} objects.
|
|
92
|
-
*
|
|
93
|
-
* @param input - Raw text to analyse.
|
|
94
|
-
* @param options - Optional filtering and context hints.
|
|
95
|
-
* @returns Array of detected {@link PiiEntity} objects.
|
|
96
|
-
*/
|
|
97
|
-
recognize(input: string, options?: RecognizeOptions): Promise<PiiEntity[]>;
|
|
98
|
-
/** @inheritdoc */
|
|
99
|
-
dispose(): Promise<void>;
|
|
100
|
-
/**
|
|
101
|
-
* Determines which of our supported entity types the caller wants.
|
|
102
|
-
*
|
|
103
|
-
* @param entityTypes - Optional entity-type filter from the caller.
|
|
104
|
-
* @returns Set of wanted types intersected with our supported types.
|
|
105
|
-
*/
|
|
106
|
-
private resolveWantedTypes;
|
|
107
|
-
/**
|
|
108
|
-
* Merges BIO-tagged tokens into contiguous entity spans.
|
|
109
|
-
*
|
|
110
|
-
* The BIO tagging scheme works as follows:
|
|
111
|
-
* - `B-XXX` — Beginning of a new entity of type XXX.
|
|
112
|
-
* - `I-XXX` — Inside/continuation of the current entity of type XXX.
|
|
113
|
-
* - `O` — Outside any entity (ignored).
|
|
114
|
-
*
|
|
115
|
-
* A `B-PER` followed by one or more `I-PER` tokens produces a single
|
|
116
|
-
* merged span. When a `B-XXX` appears while another entity is open,
|
|
117
|
-
* the previous entity is flushed and a new one begins.
|
|
118
|
-
*
|
|
119
|
-
* @param tokens - Raw BIO-tagged token array from the NER pipeline.
|
|
120
|
-
* @returns Array of merged entity spans with aggregated metadata.
|
|
121
|
-
*/
|
|
122
|
-
private mergeBioTokens;
|
|
123
|
-
/**
|
|
124
|
-
* Parses a BIO label string like `'B-PER'` or `'I-LOC'` into its
|
|
125
|
-
* tag component (`'B'`, `'I'`, `'O'`) and entity label (`'PER'`, `'LOC'`).
|
|
126
|
-
*
|
|
127
|
-
* @param bioLabel - The raw BIO label from the NER model.
|
|
128
|
-
* @returns Parsed tag and label.
|
|
129
|
-
*/
|
|
130
|
-
private parseBioLabel;
|
|
131
|
-
/**
|
|
132
|
-
* Maps merged entity spans to {@link PiiEntity} objects, filtering by
|
|
133
|
-
* the set of wanted entity types.
|
|
134
|
-
*
|
|
135
|
-
* The score for each entity is the arithmetic mean of its constituent
|
|
136
|
-
* token scores, reflecting the model's average confidence across the
|
|
137
|
-
* full span.
|
|
138
|
-
*
|
|
139
|
-
* @param merged - Array of merged BIO entity spans.
|
|
140
|
-
* @param wantedTypes - Set of entity types the caller is interested in.
|
|
141
|
-
* @returns Filtered array of {@link PiiEntity} objects.
|
|
142
|
-
*/
|
|
143
|
-
private mapToEntities;
|
|
144
|
-
}
|
|
145
|
-
//# sourceMappingURL=NerModelRecognizer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"NerModelRecognizer.d.ts","sourceRoot":"","sources":["../../../../../src/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzD,OAAO,KAAK,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AAC/E,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,iCAAiC,CAAC;AAsC9E;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,iEAAiE;IACjE,MAAM,EAAE,MAAM,CAAC;IACf,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,KAAK,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,KAAK,EAAE,MAAM,CAAC;IACd,kDAAkD;IAClD,GAAG,EAAE,MAAM,CAAC;CACb;AAYD;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AACH,qBAAa,kBAAmB,YAAW,iBAAiB;IAC1D,kBAAkB;IAClB,SAAgB,IAAI,wBAAwB;IAE5C,kBAAkB;IAClB,SAAgB,iBAAiB,EAAE,aAAa,EAAE,CAKhD;IAEF;;;OAGG;IACH,OAAO,CAAC,WAAW,CAAS;IAE5B;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAyB;IAElD;;;;;OAKG;gBACS,QAAQ,EAAE,sBAAsB;IAI5C;;;;;;;;;OASG;IACU,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC;IAgDvF,kBAAkB;IACL,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IASrC;;;;;OAKG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;;;;;;;;;;;;OAcG;IACH,OAAO,CAAC,cAAc;IAuDtB;;;;;;OAMG;IACH,OAAO,CAAC,aAAa;IAerB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,aAAa;CAmCtB"}
|