@framers/agentos 0.1.55 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/dist/extensions/ExtensionManager.d.ts +1 -0
  2. package/dist/extensions/ExtensionManager.d.ts.map +1 -1
  3. package/dist/extensions/ExtensionManager.js +8 -0
  4. package/dist/extensions/ExtensionManager.js.map +1 -1
  5. package/dist/extensions/ISharedServiceRegistry.d.ts +35 -0
  6. package/dist/extensions/ISharedServiceRegistry.d.ts.map +1 -0
  7. package/dist/extensions/ISharedServiceRegistry.js +2 -0
  8. package/dist/extensions/ISharedServiceRegistry.js.map +1 -0
  9. package/dist/extensions/SharedServiceRegistry.d.ts +15 -0
  10. package/dist/extensions/SharedServiceRegistry.d.ts.map +1 -0
  11. package/dist/extensions/SharedServiceRegistry.js +63 -0
  12. package/dist/extensions/SharedServiceRegistry.js.map +1 -0
  13. package/dist/extensions/index.d.ts +3 -0
  14. package/dist/extensions/index.d.ts.map +1 -1
  15. package/dist/extensions/index.js +4 -0
  16. package/dist/extensions/index.js.map +1 -1
  17. package/dist/extensions/manifest.d.ts +2 -0
  18. package/dist/extensions/manifest.d.ts.map +1 -1
  19. package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts +127 -0
  20. package/dist/extensions/packs/pii-redaction/EntityMerger.d.ts.map +1 -0
  21. package/dist/extensions/packs/pii-redaction/EntityMerger.js +263 -0
  22. package/dist/extensions/packs/pii-redaction/EntityMerger.js.map +1 -0
  23. package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts +199 -0
  24. package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.d.ts.map +1 -0
  25. package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js +456 -0
  26. package/dist/extensions/packs/pii-redaction/PiiDetectionPipeline.js.map +1 -0
  27. package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts +121 -0
  28. package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.d.ts.map +1 -0
  29. package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js +271 -0
  30. package/dist/extensions/packs/pii-redaction/PiiRedactionGuardrail.js.map +1 -0
  31. package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts +61 -0
  32. package/dist/extensions/packs/pii-redaction/RedactionEngine.d.ts.map +1 -0
  33. package/dist/extensions/packs/pii-redaction/RedactionEngine.js +207 -0
  34. package/dist/extensions/packs/pii-redaction/RedactionEngine.js.map +1 -0
  35. package/dist/extensions/packs/pii-redaction/index.d.ts +90 -0
  36. package/dist/extensions/packs/pii-redaction/index.d.ts.map +1 -0
  37. package/dist/extensions/packs/pii-redaction/index.js +195 -0
  38. package/dist/extensions/packs/pii-redaction/index.js.map +1 -0
  39. package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts +151 -0
  40. package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.d.ts.map +1 -0
  41. package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js +14 -0
  42. package/dist/extensions/packs/pii-redaction/recognizers/IEntityRecognizer.js.map +1 -0
  43. package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts +177 -0
  44. package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.d.ts.map +1 -0
  45. package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js +420 -0
  46. package/dist/extensions/packs/pii-redaction/recognizers/LlmJudgeRecognizer.js.map +1 -0
  47. package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts +145 -0
  48. package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.d.ts.map +1 -0
  49. package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js +299 -0
  50. package/dist/extensions/packs/pii-redaction/recognizers/NerModelRecognizer.js.map +1 -0
  51. package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts +102 -0
  52. package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.d.ts.map +1 -0
  53. package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js +228 -0
  54. package/dist/extensions/packs/pii-redaction/recognizers/NlpPrefilterRecognizer.js.map +1 -0
  55. package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts +103 -0
  56. package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.d.ts.map +1 -0
  57. package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js +275 -0
  58. package/dist/extensions/packs/pii-redaction/recognizers/RegexRecognizer.js.map +1 -0
  59. package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts +118 -0
  60. package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.d.ts.map +1 -0
  61. package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js +152 -0
  62. package/dist/extensions/packs/pii-redaction/tools/PiiRedactTool.js.map +1 -0
  63. package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts +98 -0
  64. package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.d.ts.map +1 -0
  65. package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js +153 -0
  66. package/dist/extensions/packs/pii-redaction/tools/PiiScanTool.js.map +1 -0
  67. package/dist/extensions/packs/pii-redaction/types.d.ts +332 -0
  68. package/dist/extensions/packs/pii-redaction/types.d.ts.map +1 -0
  69. package/dist/extensions/packs/pii-redaction/types.js +83 -0
  70. package/dist/extensions/packs/pii-redaction/types.js.map +1 -0
  71. package/dist/extensions/types.d.ts +5 -0
  72. package/dist/extensions/types.d.ts.map +1 -1
  73. package/dist/extensions/types.js.map +1 -1
  74. package/package.json +11 -1
@@ -0,0 +1,263 @@
1
+ /**
2
+ * @file EntityMerger.ts
3
+ * @description Post-processing step that de-duplicates, filters, and merges
4
+ * {@link PiiEntity} spans emitted by the multi-tier detection pipeline.
5
+ *
6
+ * When multiple recognisers (regex, NER, LLM judge) run over the same text
7
+ * they frequently emit overlapping or duplicate spans. This module resolves
8
+ * those conflicts deterministically so that downstream redaction always
9
+ * receives a clean, non-overlapping, sorted list of entities.
10
+ *
11
+ * ## Processing pipeline
12
+ * 1. **Denylist boost** — entities whose text matches a denylist entry are
13
+ * promoted to score `1.0`, guaranteeing they survive threshold filtering.
14
+ * 2. **Allowlist filter** — entities whose text matches an allowlist entry are
15
+ * unconditionally removed before any other processing.
16
+ * 3. **Sort** — remaining entities are sorted by start offset; ties are broken
17
+ * by span length descending so that longer (more specific) spans are
18
+ * processed first in the overlap-resolution pass.
19
+ * 4. **Overlap resolution** — a single-pass scan collapses overlapping and
20
+ * adjacent spans into the best representative entity (details below).
21
+ * 5. **Threshold filter** — entities whose final score is below
22
+ * `confidenceThreshold` are removed.
23
+ * 6. **Final sort** — output is sorted by start offset for stable iteration.
24
+ *
25
+ * @module pii-redaction/EntityMerger
26
+ */
27
+ // ---------------------------------------------------------------------------
28
+ // Helpers
29
+ // ---------------------------------------------------------------------------
30
+ /**
31
+ * Returns `true` when the two entities overlap or one is fully contained
32
+ * within the other.
33
+ *
34
+ * Two spans overlap when `a.start < b.end && b.start < a.end` (the standard
35
+ * half-open interval overlap test).
36
+ *
37
+ * @param a - First entity.
38
+ * @param b - Second entity (must have `start >= a.start` after sorting).
39
+ */
40
+ function overlaps(a, b) {
41
+ return a.start < b.end && b.start < a.end;
42
+ }
43
+ /**
44
+ * Returns `true` when entity `b` is fully contained within entity `a`
45
+ * (i.e. `a` is a superset span of `b`).
46
+ *
47
+ * @param a - The candidate superset entity.
48
+ * @param b - The candidate subset entity.
49
+ */
50
+ function isSubset(a, b) {
51
+ return a.start <= b.start && a.end >= b.end;
52
+ }
53
+ /**
54
+ * Returns the length in UTF-16 code units of an entity span.
55
+ *
56
+ * @param entity - Entity whose span length to compute.
57
+ */
58
+ function spanLength(entity) {
59
+ return entity.end - entity.start;
60
+ }
61
+ /**
62
+ * Creates a merged entity from two adjacent (or near-adjacent) entities of
63
+ * the same type, bridging any gap between them with the corresponding
64
+ * characters from the original text.
65
+ *
66
+ * The merged entity inherits the **maximum** score of the two inputs and the
67
+ * `source` from the higher-scoring input (first one wins on a tie).
68
+ *
69
+ * @param a - The earlier (lower start offset) entity.
70
+ * @param b - The later entity.
71
+ * @param text - The original full input text used to fill the gap chars.
72
+ * @returns A new {@link PiiEntity} spanning from `a.start` to `b.end`.
73
+ */
74
+ function mergeAdjacent(a, b, text) {
75
+ // The merged text is the contiguous slice of the original string that covers
76
+ // both spans, including any gap characters between them.
77
+ const mergedText = text.slice(a.start, b.end);
78
+ const mergedScore = Math.max(a.score, b.score);
79
+ // Preserve the source of the higher-confidence detector; tie goes to `a`.
80
+ const mergedSource = a.score >= b.score ? a.source : b.source;
81
+ return {
82
+ entityType: a.entityType,
83
+ text: mergedText,
84
+ start: a.start,
85
+ end: b.end,
86
+ score: mergedScore,
87
+ source: mergedSource,
88
+ // Merge metadata objects shallowly; later keys overwrite earlier ones.
89
+ metadata: a.metadata || b.metadata
90
+ ? { ...(a.metadata ?? {}), ...(b.metadata ?? {}) }
91
+ : undefined,
92
+ };
93
+ }
94
+ // ---------------------------------------------------------------------------
95
+ // Public API
96
+ // ---------------------------------------------------------------------------
97
+ /**
98
+ * Merges a raw list of {@link PiiEntity} detections produced by one or more
99
+ * recognisers into a clean, non-overlapping, threshold-filtered output list.
100
+ *
101
+ * The function is **pure** — it does not mutate any of its inputs.
102
+ *
103
+ * ### Merge rules (applied in order)
104
+ *
105
+ * 1. **Denylist boost**: If an entity's `.text` (lowercased) appears in
106
+ * `options.denylist` (lowercased), its `score` is set to `1.0`.
107
+ *
108
+ * 2. **Allowlist filter**: If an entity's `.text` (lowercased) appears in
109
+ * `options.allowlist` (lowercased), the entity is removed entirely.
110
+ *
111
+ * 3. **Sort**: Entities are sorted by `start` offset ascending; ties broken
112
+ * by span length descending so longer spans are preferred in step 4.
113
+ *
114
+ * 4. **Overlap resolution** (single-pass, left-to-right):
115
+ * - *Exact or subset overlap* (current span is fully inside last span, or
116
+ * they share the same offsets): keep whichever has the higher score.
117
+ * - *Current is longer AND score ≥ last*: the current span replaces the
118
+ * last accumulated span (it provides more context at equal or better
119
+ * confidence).
120
+ * - *Adjacent spans* (gap between end of last and start of current is ≤ 2
121
+ * characters **and** both have the same `entityType`): the two spans are
122
+ * merged into a single entity whose text bridges the gap.
123
+ * - *Otherwise*: both spans are kept as separate entities.
124
+ *
125
+ * 5. **Confidence threshold filter**: Entities with `score <
126
+ * options.confidenceThreshold` are removed.
127
+ *
128
+ * 6. **Final sort**: Output is sorted by `start` offset ascending.
129
+ *
130
+ * @param entities - Raw (possibly overlapping, unsorted) entity list from the
131
+ * detection pipeline.
132
+ * @param options - Optional filtering / merging knobs. Safe to omit.
133
+ * @param text - The original input string. Required only when adjacent
134
+ * merging may occur (needed to fill gap characters).
135
+ * Defaults to `''` which produces a gap of spaces.
136
+ * @returns A new array of non-overlapping {@link PiiEntity} objects sorted by
137
+ * `start` offset.
138
+ *
139
+ * @example
140
+ * ```ts
141
+ * const clean = mergeEntities(rawEntities, {
142
+ * allowlist: ['support@example.com'],
143
+ * denylist: ['secret-project'],
144
+ * confidenceThreshold: 0.6,
145
+ * }, originalText);
146
+ * ```
147
+ */
148
+ export function mergeEntities(entities, options = {}, text = '') {
149
+ const { allowlist = [], denylist = [], confidenceThreshold } = options;
150
+ // Normalise allow/denylist values once for O(1) lookup per entity.
151
+ const allowSet = new Set(allowlist.map((s) => s.toLowerCase()));
152
+ const denySet = new Set(denylist.map((s) => s.toLowerCase()));
153
+ // -------------------------------------------------------------------------
154
+ // Step 1 & 2: Apply denylist boost and allowlist filter in a single pass.
155
+ // -------------------------------------------------------------------------
156
+ let working = [];
157
+ for (const entity of entities) {
158
+ const lower = entity.text.toLowerCase();
159
+ // Step 1 — denylist boost: entities in the denylist always score 1.0.
160
+ if (denySet.has(lower)) {
161
+ working.push({ ...entity, score: 1.0 });
162
+ continue;
163
+ }
164
+ // Step 2 — allowlist filter: entities in the allowlist are dropped.
165
+ if (allowSet.has(lower)) {
166
+ continue;
167
+ }
168
+ // Neither boosted nor filtered — keep as-is.
169
+ working.push({ ...entity });
170
+ }
171
+ // -------------------------------------------------------------------------
172
+ // Step 3: Sort by start offset ascending; break ties by span length desc
173
+ // so that wider (more informative) spans come first.
174
+ // -------------------------------------------------------------------------
175
+ working.sort((a, b) => {
176
+ if (a.start !== b.start)
177
+ return a.start - b.start;
178
+ // Longer span first on a tie.
179
+ return spanLength(b) - spanLength(a);
180
+ });
181
+ // -------------------------------------------------------------------------
182
+ // Step 4: Overlap resolution — single left-to-right pass.
183
+ // -------------------------------------------------------------------------
184
+ const resolved = [];
185
+ for (const current of working) {
186
+ if (resolved.length === 0) {
187
+ // First entity — nothing to compare against yet.
188
+ resolved.push(current);
189
+ continue;
190
+ }
191
+ // Always compare against the most recently accumulated entity.
192
+ const last = resolved[resolved.length - 1];
193
+ if (overlaps(last, current)) {
194
+ // The two spans overlap.
195
+ if (isSubset(current, last)) {
196
+ // `last` fully contains `current` — `last` is the wider (superset)
197
+ // span. Prefer the wider span when it scores at least as well.
198
+ // Replace only if `current` has a strictly higher confidence score.
199
+ if (current.score > last.score) {
200
+ resolved[resolved.length - 1] = current;
201
+ }
202
+ // Otherwise keep `last` (wider span with equal or better confidence).
203
+ }
204
+ else if (isSubset(last, current)) {
205
+ // `current` fully contains `last` — `current` is the wider span.
206
+ // For equal-confidence spans we prefer the wider span (`current`),
207
+ // but only replace when `current` is genuinely wider than `last`
208
+ // (same offsets = identical span; identical score = prefer last to
209
+ // avoid a no-op swap).
210
+ //
211
+ // Concrete rules:
212
+ // • current strictly longer → replace if current.score >= last.score
213
+ // • same length (identical span) → keep higher score (last wins tie)
214
+ const currentIsWider = (current.end - current.start) > (last.end - last.start);
215
+ if (currentIsWider ? current.score >= last.score : current.score > last.score) {
216
+ resolved[resolved.length - 1] = current;
217
+ }
218
+ // Otherwise `last` had equal/better confidence for the same span, or
219
+ // `last` is already the wider span — keep it.
220
+ }
221
+ else {
222
+ // Partial overlap — neither is a strict subset of the other.
223
+ // Prefer the longer span when its score is at least as high.
224
+ if (spanLength(current) > spanLength(last) &&
225
+ current.score >= last.score) {
226
+ // Current is longer with equal or better confidence — replace.
227
+ resolved[resolved.length - 1] = current;
228
+ }
229
+ // Otherwise keep `last`.
230
+ }
231
+ }
232
+ else {
233
+ // No overlap. Check whether the two spans are adjacent (gap ≤ 2 chars)
234
+ // and share the same entity type, in which case merging makes sense
235
+ // (e.g. a name split across a hyphen: "O'-Brien" → two PERSON spans).
236
+ const gap = current.start - last.end;
237
+ if (gap >= 0 &&
238
+ gap <= 2 &&
239
+ current.entityType === last.entityType) {
240
+ // Merge the two adjacent spans into one, bridging the gap with the
241
+ // original text characters.
242
+ const merged = mergeAdjacent(last, current, text);
243
+ resolved[resolved.length - 1] = merged;
244
+ }
245
+ else {
246
+ // Genuinely separate entities — keep both.
247
+ resolved.push(current);
248
+ }
249
+ }
250
+ }
251
+ // -------------------------------------------------------------------------
252
+ // Step 5: Confidence threshold filter.
253
+ // -------------------------------------------------------------------------
254
+ const thresholded = confidenceThreshold === undefined
255
+ ? resolved
256
+ : resolved.filter((e) => e.score >= confidenceThreshold);
257
+ // -------------------------------------------------------------------------
258
+ // Step 6: Final sort by start offset (the overlap pass may have produced
259
+ // out-of-order entries in edge cases involving replacements).
260
+ // -------------------------------------------------------------------------
261
+ return thresholded.slice().sort((a, b) => a.start - b.start);
262
+ }
263
+ //# sourceMappingURL=EntityMerger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"EntityMerger.js","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/EntityMerger.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AA0DH,8EAA8E;AAC9E,UAAU;AACV,8EAA8E;AAE9E;;;;;;;;;GASG;AACH,SAAS,QAAQ,CAAC,CAAY,EAAE,CAAY;IAC1C,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC;AAC5C,CAAC;AAED;;;;;;GAMG;AACH,SAAS,QAAQ,CAAC,CAAY,EAAE,CAAY;IAC1C,OAAO,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC;AAC9C,CAAC;AAED;;;;GAIG;AACH,SAAS,UAAU,CAAC,MAAiB;IACnC,OAAO,MAAM,CAAC,GAAG,GAAG,MAAM,CAAC,KAAK,CAAC;AACnC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,aAAa,CAAC,CAAY,EAAE,CAAY,EAAE,IAAY;IAC7D,6EAA6E;IAC7E,yDAAyD;IACzD,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC;IAC/C,0EAA0E;IAC1E,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IAE9D,OAAO;QACL,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,KAAK,EAAE,WAAW;QAClB,MAAM,EAAE,YAAY;QACpB,uEAAuE;QACvE,QAAQ,EACN,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,QAAQ;YACtB,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,EAAE;YAClD,CAAC,CAAC,SAAS;KAChB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,aAAa;AACb,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AACH,MAAM,UAAU,aAAa,CAC3B,QAAqB,EACrB,UAAwB,EAAE,EAC1B,IAAI,GAAG,EAAE;IAET,MAAM,EAAE,SAAS,GAAG,EAAE,EAAE,QAAQ,GAAG,EAAE,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC;IAEvE,mEAAmE;IACnE,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAChE,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAE9D,4EAA4E;IAC5E,0EAA0E;IAC1E,4EAA4E;IAE5E,IAAI,OAAO,GAAgB,EAAE,CAAC;IAC9B,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;QAExC,sEAAsE;QACtE,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;YACxC,SAAS;QACX,CAAC;QAED,oEAAoE;QACpE,IAAI,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;YACxB,SAAS;QACX,CAAC;QAED,6CAA6C;QAC7C,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,MAAM,EAAE,CAAC,CAAC;IAC9B,CAAC;IAED,4EAA4E;IAC5E,yEAAyE;IACzE,6DAA6D;IAC7D,4EAA4E;IAE5E,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACpB,IAAI,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,KAAK;YAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;QAClD,8BAA8B;QAC9B,OAAO,UAAU,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC,CAAC,CAAC;IAEH,4EAA4E;IAC5E,0DAA0D;IAC1D,4EAA4E;IAE5E,MAAM,QAAQ,GAAgB,EAAE,CAAC;IAEjC,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,iDAAiD;YACjD,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,SAAS;QACX,CAAC;QAED,+DAA+D;QAC/D,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE3C,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;YAC5B,yBAAyB;YAEzB,IAAI,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,EAAE,CAAC;gBAC5B,mEAAmE;gBACnE,gEAAgE;gBAChE,oEAAoE;gBACpE,IAAI,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;oBAC/B,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC;gBAC1C,CAAC;gBACD,sEAAsE;YACxE,CAAC;iBAAM,IAAI,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;gBACnC,iEAAiE;gBACjE,mEAAmE;gBACnE,iEAAiE;gBACjE,mEAAmE;gBACnE,uBAAuB;gBACvB,EAAE;gBACF,kBAAkB;gBAClB,uEAAuE;gBACvE,uEAAuE;gBACvE,MAAM,cAAc,GAAG,CAAC,OAAO,CAAC,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;gBAC/E,IAAI,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,CAAC;oBAC9E,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC;gBAC1C,CAAC;gBACD,qEAAqE;gBACrE,8CAA8C;YAChD,CAAC;iBAAM,CAAC;gBACN,6DAA6D;gBAC7D,6DAA6D;gBAC7D,IACE,UAAU,CAAC,OAAO,CAAC,GAAG,UAAU,CAAC,IAAI,CAAC;oBACtC,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,EAC3B,CAAC;oBACD,+DAA+D;oBAC/D,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,OAAO,CAAC;gBAC1C,CAAC;gBACD,yBAAyB;YAC3B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,wEAAwE;YACxE,oEAAoE;YACpE,sEAAsE;YACtE,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC;YAErC,IACE,GAAG,IAAI,CAAC;gBACR,GAAG,IAAI,CAAC;gBACR,OAAO,CAAC,UAAU,KAAK,IAAI,CAAC,UAAU,EACtC,CAAC;gBACD,mEAAmE;gBACnE,4BAA4B;gBAC5B,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;gBAClD,QAAQ,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC;YACzC,CAAC;iBAAM,CAAC;gBACN,2CAA2C;gBAC3C,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACzB,CAAC;QACH,CAAC;IACH,CAAC;IAED,4EAA4E;IAC5E,uCAAuC;IACvC,4EAA4E;IAE5E,MAAM,WAAW,GACf,mBAAmB,KAAK,SAAS;QAC/B,CAAC,CAAC,QAAQ;QACV,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,mBAAmB,CAAC,CAAC;IAE7D,4EAA4E;IAC5E,yEAAyE;IACzE,sEAAsE;IACtE,4EAA4E;IAE5E,OAAO,WAAW,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;AAC/D,CAAC"}
@@ -0,0 +1,199 @@
1
+ /**
2
+ * @file PiiDetectionPipeline.ts
3
+ * @description Orchestrates the four-tier PII detection pipeline: Regex →
4
+ * NLP pre-filter → NER model → LLM judge. Each tier is gated by the
5
+ * previous tier's output or configuration flags so that only the work needed
6
+ * is performed, keeping median latency low for clean or simple inputs.
7
+ *
8
+ * ## Tier overview
9
+ *
10
+ * | Tier | Class | Always runs? |
11
+ * |------|------------------------|-----------------------------------------------|
12
+ * | 1 | RegexRecognizer | Yes |
13
+ * | 2 | NlpPrefilterRecognizer | Yes (when available / compromise installed) |
14
+ * | 3 | NerModelRecognizer | Only when Tier 2 found PERSON/ORG/LOC |
15
+ * | 4 | LlmJudgeRecognizer | Only for ambiguous entities (0.3 < score < 0.7)|
16
+ *
17
+ * After tiers 1–3 produce raw candidates, {@link mergeEntities} collapses
18
+ * overlapping spans. Tier 4 then re-examines the ambiguous slice. Finally
19
+ * the merged list is threshold-filtered and sorted by start offset.
20
+ *
21
+ * @module pii-redaction/PiiDetectionPipeline
22
+ */
23
+ import type { ISharedServiceRegistry } from '../../ISharedServiceRegistry';
24
+ import type { PiiDetectionResult, PiiRedactionPackOptions } from './types';
25
+ /**
26
+ * Four-tier PII detection pipeline that orchestrates Regex, NLP pre-filter,
27
+ * NER model, and LLM judge recognisers into a single `detect()` call.
28
+ *
29
+ * ### Construction
30
+ * ```ts
31
+ * const pipeline = new PiiDetectionPipeline(serviceRegistry, packOptions, getSecret);
32
+ * const result = await pipeline.detect('Call me at 555-123-4567');
33
+ * ```
34
+ *
35
+ * ### Lifecycle
36
+ * The pipeline is designed to be constructed once at pack startup and reused
37
+ * across many `detect()` calls. Recognisers are constructed eagerly but load
38
+ * their heavy dependencies (NLP models, NER weights) lazily on first use.
39
+ *
40
+ * ### Concurrency
41
+ * `detect()` is safe to call concurrently from multiple async contexts:
42
+ * - Regex and NLP recognisers create fresh scoped instances per call.
43
+ * - The NER model pipeline is shared and thread-safe via the service registry.
44
+ * - The LLM judge uses an internal semaphore to cap concurrent requests.
45
+ */
46
+ export declare class PiiDetectionPipeline {
47
+ /**
48
+ * Tier 1: Regex-based recogniser backed by the `openredaction` library.
49
+ * Always runs regardless of configuration.
50
+ */
51
+ private readonly regexRecognizer;
52
+ /**
53
+ * Tier 2: NLP pre-filter using the `compromise` library.
54
+ * Catches person names, locations, and organisations that regex misses.
55
+ * Returns low-confidence candidates (0.3–0.6) for higher-tier confirmation.
56
+ */
57
+ private readonly nlpPrefilter;
58
+ /**
59
+ * Tier 3: HuggingFace BERT NER model for high-accuracy named-entity
60
+ * recognition. Only runs when Tier 2 found at least one NER-class
61
+ * candidate (PERSON, ORGANIZATION, or LOCATION).
62
+ */
63
+ private readonly nerRecognizer;
64
+ /**
65
+ * Tier 4: LLM-powered judge that re-examines ambiguous entities.
66
+ * Only created when {@link PiiRedactionPackOptions.llmJudge} is provided.
67
+ */
68
+ private readonly llmJudge;
69
+ /** Resolved entity types to detect (defaults to all types). */
70
+ private readonly entityTypes;
71
+ /** Allowlist passed through to {@link mergeEntities} (string values). */
72
+ private readonly allowlist;
73
+ /** Denylist passed through to {@link mergeEntities} (string values). */
74
+ private readonly denylist;
75
+ /**
76
+ * Minimum confidence score for the final output.
77
+ * @default {@link DEFAULT_CONFIDENCE_THRESHOLD}
78
+ */
79
+ private readonly confidenceThreshold;
80
+ /**
81
+ * Whether to load and run the NER model tier.
82
+ * `false` means Tier 3 is unconditionally skipped.
83
+ * @default true (when the option is absent, NER is allowed to run)
84
+ */
85
+ private readonly enableNerModel;
86
+ /**
87
+ * Construct a new PiiDetectionPipeline.
88
+ *
89
+ * All recognisers are instantiated here but do not load their heavy
90
+ * dependencies (NLP libraries, transformer models) until the first call
91
+ * to {@link detect}.
92
+ *
93
+ * @param services - Shared service registry for lazy-loading NLP/NER
94
+ * models so they are shared across the agent.
95
+ * @param options - Pack-level configuration including entity type
96
+ * filter, confidence threshold, allow/denylists, and
97
+ * optional LLM judge config.
98
+ * @param getSecret - Optional function to look up credential secrets by
99
+ * ID (e.g. `'openai.apiKey'`, `'pii.llm.apiKey'`).
100
+ * Used to resolve the LLM judge API key when not
101
+ * provided explicitly in {@link LlmJudgeConfig.apiKey}.
102
+ */
103
+ constructor(services: ISharedServiceRegistry, options: PiiRedactionPackOptions, getSecret?: (id: string) => string | undefined);
104
+ /**
105
+ * Run all applicable detection tiers over `text` and return a
106
+ * {@link PiiDetectionResult} with the merged, threshold-filtered entity
107
+ * list and pipeline metadata.
108
+ *
109
+ * ### Processing steps
110
+ * 1. **Tier 1 (Regex)** — Always executed. Deterministic pattern matching.
111
+ * 2. **Context enhancement** — Scans ±{@link CONTEXT_WINDOW_CHARS} chars
112
+ * around each Tier 1 entity for keyword signals and boosts scores.
113
+ * 3. **Tier 2 (NLP)** — Always attempted; degrades gracefully if `compromise`
114
+ * is not installed.
115
+ * 4. **Tier 3 (NER)** — Runs only when:
116
+ * - `enableNerModel !== false`, AND
117
+ * - Tier 2 found at least one PERSON, ORGANIZATION, or LOCATION candidate.
118
+ * 5. **Merge** — {@link mergeEntities} collapses overlapping spans and applies
119
+ * allow/denylists. Threshold is NOT applied yet.
120
+ * 6. **Tier 4 (LLM judge)** — Applied only to entities in the ambiguous score
121
+ * band (0.3 < score < 0.7) and only when `llmJudge` is configured.
122
+ * `null` results (NOT_PII) are discarded.
123
+ * 7. **Threshold filter** — Entities with `score < confidenceThreshold` are
124
+ * removed from the final output.
125
+ * 8. **Sort + summary** — Sorted by start offset; summary string built.
126
+ *
127
+ * @param text - The raw input text to analyse.
128
+ * @returns A {@link PiiDetectionResult} containing detected entities and
129
+ * pipeline execution metadata.
130
+ */
131
+ detect(text: string): Promise<PiiDetectionResult>;
132
+ /**
133
+ * Applies context enhancement to Tier 1 regex entities.
134
+ *
135
+ * For each entity, a window of ±{@link CONTEXT_WINDOW_CHARS} characters
136
+ * around the entity is scanned for known context keywords. When a
137
+ * matching keyword is found for that entity's type, the entity's score is
138
+ * boosted by the keyword's associated {@link CONTEXT_BOOST_STRONG} or
139
+ * {@link CONTEXT_BOOST_WEAK} amount, capped at 1.0.
140
+ *
141
+ * Entities whose type has no context-keyword mapping are returned
142
+ * unchanged. A new array of entities is returned — the originals are not
143
+ * mutated.
144
+ *
145
+ * @param entities - Raw Tier 1 entities to enhance.
146
+ * @param text - Full input text (used to extract context windows).
147
+ * @returns New array of entities with potentially boosted scores.
148
+ */
149
+ private applyContextEnhancement;
150
+ /**
151
+ * Runs the LLM judge over entities in the ambiguous score band
152
+ * (LLM_JUDGE_SCORE_LOW < score < LLM_JUDGE_SCORE_HIGH).
153
+ *
154
+ * Entities outside the ambiguous band are passed through as-is.
155
+ * For ambiguous entities, `judge()` is awaited in parallel (up to the
156
+ * semaphore limit configured in LlmJudgeRecognizer). Entities judged to
157
+ * be NOT_PII (null return) are discarded.
158
+ *
159
+ * @param entities - Merged entity list from Steps 5.
160
+ * @param text - Full input text passed to the judge for context.
161
+ * @returns Updated entity list after LLM judgement.
162
+ */
163
+ private runLlmJudge;
164
+ /**
165
+ * Resolves the API key for the LLM judge using a three-level fallback:
166
+ *
167
+ * 1. Explicit `config.apiKey` (highest priority — caller-supplied).
168
+ * 2. Provider-specific secret via `getSecret('<provider>.apiKey')`.
169
+ * 3. Pack-specific generic secret via `getSecret('pii.llm.apiKey')`.
170
+ *
171
+ * Returns a new {@link LlmJudgeConfig} with `apiKey` set to the resolved
172
+ * value (or the original value if a key was already present).
173
+ *
174
+ * @param config - Original LLM judge configuration.
175
+ * @param getSecret - Optional secret resolver function.
176
+ * @returns Config with `apiKey` resolved to the best available value.
177
+ */
178
+ private resolveJudgeApiKey;
179
+ /**
180
+ * Builds a human-readable summary string from the final entity list.
181
+ *
182
+ * Format: `"<n> entities found: <count>×<TYPE>, ..."` or
183
+ * `"No PII detected"` when the list is empty.
184
+ *
185
+ * Entity types are sorted alphabetically for deterministic output, and
186
+ * only types that are actually present appear in the summary.
187
+ *
188
+ * @example
189
+ * ```
190
+ * "3 entities found: 1×EMAIL, 1×PERSON, 1×PHONE"
191
+ * "No PII detected"
192
+ * ```
193
+ *
194
+ * @param entities - Final threshold-filtered, sorted entity list.
195
+ * @returns Human-readable summary string.
196
+ */
197
+ private buildSummary;
198
+ }
199
+ //# sourceMappingURL=PiiDetectionPipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"PiiDetectionPipeline.d.ts","sourceRoot":"","sources":["../../../../src/extensions/packs/pii-redaction/PiiDetectionPipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,8BAA8B,CAAC;AAC3E,OAAO,KAAK,EAGV,kBAAkB,EAClB,uBAAuB,EAExB,MAAM,SAAS,CAAC;AAwGjB;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,oBAAoB;IAK/B;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAElD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAyB;IAEtD;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqB;IAEnD;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;IAMrD,+DAA+D;IAC/D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAkB;IAE9C,yEAAyE;IACzE,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAW;IAErC,wEAAwE;IACxE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAS;IAE7C;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;IAMzC;;;;;;;;;;;;;;;;OAgBG;gBAED,QAAQ,EAAE,sBAAsB,EAChC,OAAO,EAAE,uBAAuB,EAChC,SAAS,CAAC,EAAE,CAAC,EAAE,EAAE,MAAM,KAAK,MAAM,GAAG,SAAS;IAwChD;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACU,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAuH9D;;;;;;;;;;;;;;;;OAgBG;IACH,OAAO,CAAC,uBAAuB;IAwC/B;;;;;;;;;;;;OAYG;YACW,WAAW;IAqCzB;;;;;;;;;;;;;OAaG;IACH,OAAO,CAAC,kBAAkB;IAsB1B;;;;;;;;;;;;;;;;;OAiBG;IACH,OAAO,CAAC,YAAY;CAiBrB"}