@elanlanguages/bridge-anonymization 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +382 -0
  2. package/dist/crypto/index.d.ts +6 -0
  3. package/dist/crypto/index.d.ts.map +1 -0
  4. package/dist/crypto/index.js +6 -0
  5. package/dist/crypto/index.js.map +1 -0
  6. package/dist/crypto/pii-map-crypto.d.ts +100 -0
  7. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  8. package/dist/crypto/pii-map-crypto.js +163 -0
  9. package/dist/crypto/pii-map-crypto.js.map +1 -0
  10. package/dist/index.d.ts +173 -0
  11. package/dist/index.d.ts.map +1 -0
  12. package/dist/index.js +294 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/ner/bio-decoder.d.ts +64 -0
  15. package/dist/ner/bio-decoder.d.ts.map +1 -0
  16. package/dist/ner/bio-decoder.js +216 -0
  17. package/dist/ner/bio-decoder.js.map +1 -0
  18. package/dist/ner/index.d.ts +10 -0
  19. package/dist/ner/index.d.ts.map +1 -0
  20. package/dist/ner/index.js +10 -0
  21. package/dist/ner/index.js.map +1 -0
  22. package/dist/ner/model-manager.d.ts +102 -0
  23. package/dist/ner/model-manager.d.ts.map +1 -0
  24. package/dist/ner/model-manager.js +253 -0
  25. package/dist/ner/model-manager.js.map +1 -0
  26. package/dist/ner/ner-model.d.ts +114 -0
  27. package/dist/ner/ner-model.d.ts.map +1 -0
  28. package/dist/ner/ner-model.js +240 -0
  29. package/dist/ner/ner-model.js.map +1 -0
  30. package/dist/ner/onnx-runtime.d.ts +45 -0
  31. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  32. package/dist/ner/onnx-runtime.js +99 -0
  33. package/dist/ner/onnx-runtime.js.map +1 -0
  34. package/dist/ner/tokenizer.d.ts +140 -0
  35. package/dist/ner/tokenizer.d.ts.map +1 -0
  36. package/dist/ner/tokenizer.js +341 -0
  37. package/dist/ner/tokenizer.js.map +1 -0
  38. package/dist/pipeline/index.d.ts +9 -0
  39. package/dist/pipeline/index.d.ts.map +1 -0
  40. package/dist/pipeline/index.js +9 -0
  41. package/dist/pipeline/index.js.map +1 -0
  42. package/dist/pipeline/prenormalize.d.ts +48 -0
  43. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  44. package/dist/pipeline/prenormalize.js +94 -0
  45. package/dist/pipeline/prenormalize.js.map +1 -0
  46. package/dist/pipeline/resolver.d.ts +56 -0
  47. package/dist/pipeline/resolver.d.ts.map +1 -0
  48. package/dist/pipeline/resolver.js +238 -0
  49. package/dist/pipeline/resolver.js.map +1 -0
  50. package/dist/pipeline/tagger.d.ts +74 -0
  51. package/dist/pipeline/tagger.d.ts.map +1 -0
  52. package/dist/pipeline/tagger.js +169 -0
  53. package/dist/pipeline/tagger.js.map +1 -0
  54. package/dist/pipeline/validator.d.ts +65 -0
  55. package/dist/pipeline/validator.d.ts.map +1 -0
  56. package/dist/pipeline/validator.js +264 -0
  57. package/dist/pipeline/validator.js.map +1 -0
  58. package/dist/recognizers/base.d.ts +78 -0
  59. package/dist/recognizers/base.d.ts.map +1 -0
  60. package/dist/recognizers/base.js +100 -0
  61. package/dist/recognizers/base.js.map +1 -0
  62. package/dist/recognizers/bic-swift.d.ts +10 -0
  63. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  64. package/dist/recognizers/bic-swift.js +107 -0
  65. package/dist/recognizers/bic-swift.js.map +1 -0
  66. package/dist/recognizers/credit-card.d.ts +32 -0
  67. package/dist/recognizers/credit-card.d.ts.map +1 -0
  68. package/dist/recognizers/credit-card.js +160 -0
  69. package/dist/recognizers/credit-card.js.map +1 -0
  70. package/dist/recognizers/custom-id.d.ts +28 -0
  71. package/dist/recognizers/custom-id.d.ts.map +1 -0
  72. package/dist/recognizers/custom-id.js +116 -0
  73. package/dist/recognizers/custom-id.js.map +1 -0
  74. package/dist/recognizers/email.d.ts +10 -0
  75. package/dist/recognizers/email.d.ts.map +1 -0
  76. package/dist/recognizers/email.js +75 -0
  77. package/dist/recognizers/email.js.map +1 -0
  78. package/dist/recognizers/iban.d.ts +14 -0
  79. package/dist/recognizers/iban.d.ts.map +1 -0
  80. package/dist/recognizers/iban.js +67 -0
  81. package/dist/recognizers/iban.js.map +1 -0
  82. package/dist/recognizers/index.d.ts +20 -0
  83. package/dist/recognizers/index.d.ts.map +1 -0
  84. package/dist/recognizers/index.js +42 -0
  85. package/dist/recognizers/index.js.map +1 -0
  86. package/dist/recognizers/ip-address.d.ts +14 -0
  87. package/dist/recognizers/ip-address.d.ts.map +1 -0
  88. package/dist/recognizers/ip-address.js +183 -0
  89. package/dist/recognizers/ip-address.js.map +1 -0
  90. package/dist/recognizers/phone.d.ts +10 -0
  91. package/dist/recognizers/phone.d.ts.map +1 -0
  92. package/dist/recognizers/phone.js +145 -0
  93. package/dist/recognizers/phone.js.map +1 -0
  94. package/dist/recognizers/registry.d.ts +59 -0
  95. package/dist/recognizers/registry.d.ts.map +1 -0
  96. package/dist/recognizers/registry.js +113 -0
  97. package/dist/recognizers/registry.js.map +1 -0
  98. package/dist/recognizers/url.d.ts +14 -0
  99. package/dist/recognizers/url.d.ts.map +1 -0
  100. package/dist/recognizers/url.js +121 -0
  101. package/dist/recognizers/url.js.map +1 -0
  102. package/dist/types/index.d.ts +134 -0
  103. package/dist/types/index.d.ts.map +1 -0
  104. package/dist/types/index.js +69 -0
  105. package/dist/types/index.js.map +1 -0
  106. package/dist/types/pii-types.d.ts +50 -0
  107. package/dist/types/pii-types.d.ts.map +1 -0
  108. package/dist/types/pii-types.js +114 -0
  109. package/dist/types/pii-types.js.map +1 -0
  110. package/dist/utils/iban-checksum.d.ts +23 -0
  111. package/dist/utils/iban-checksum.d.ts.map +1 -0
  112. package/dist/utils/iban-checksum.js +106 -0
  113. package/dist/utils/iban-checksum.js.map +1 -0
  114. package/dist/utils/index.d.ts +8 -0
  115. package/dist/utils/index.d.ts.map +1 -0
  116. package/dist/utils/index.js +8 -0
  117. package/dist/utils/index.js.map +1 -0
  118. package/dist/utils/luhn.d.ts +17 -0
  119. package/dist/utils/luhn.d.ts.map +1 -0
  120. package/dist/utils/luhn.js +55 -0
  121. package/dist/utils/luhn.js.map +1 -0
  122. package/dist/utils/offsets.d.ts +86 -0
  123. package/dist/utils/offsets.d.ts.map +1 -0
  124. package/dist/utils/offsets.js +124 -0
  125. package/dist/utils/offsets.js.map +1 -0
  126. package/package.json +62 -0
@@ -0,0 +1,238 @@
1
+ /**
2
+ * Entity Resolver
3
+ * Merges, deduplicates, and resolves overlapping entity detections
4
+ */
5
+ import { PIIType, DetectionSource, DEFAULT_TYPE_PRIORITY, } from '../types/index.js';
6
+ import { spansOverlap, spanLength, sortSpansByPosition } from '../utils/offsets.js';
7
+ /**
8
+ * Resolution strategy for overlapping entities
9
+ */
10
+ export var OverlapStrategy;
11
+ (function (OverlapStrategy) {
12
+ /** Regex matches always win over NER */
13
+ OverlapStrategy["REGEX_PRIORITY"] = "REGEX_PRIORITY";
14
+ /** Longer span wins */
15
+ OverlapStrategy["LONGER_SPAN"] = "LONGER_SPAN";
16
+ /** Higher confidence wins */
17
+ OverlapStrategy["HIGHER_CONFIDENCE"] = "HIGHER_CONFIDENCE";
18
+ /** Use type priority from policy */
19
+ OverlapStrategy["TYPE_PRIORITY"] = "TYPE_PRIORITY";
20
+ })(OverlapStrategy || (OverlapStrategy = {}));
21
+ /**
22
+ * Default resolver configuration
23
+ */
24
+ export const DEFAULT_RESOLVER_CONFIG = {
25
+ overlapStrategy: OverlapStrategy.REGEX_PRIORITY,
26
+ regexPriority: true,
27
+ minConfidence: 0.5,
28
+ };
29
+ /**
30
+ * Resolves and merges entity detections from regex and NER
31
+ */
32
+ export function resolveEntities(regexMatches, nerMatches, policy, originalText, config = {}) {
33
+ const resolverConfig = { ...DEFAULT_RESOLVER_CONFIG, ...config };
34
+ // Step 1: Filter by enabled types and confidence thresholds
35
+ const filteredRegex = filterByPolicy(regexMatches, policy);
36
+ const filteredNER = filterByPolicy(nerMatches, policy);
37
+ // Step 2: Apply allowlist filtering
38
+ const allowlistFilteredRegex = applyAllowlist(filteredRegex, policy, originalText);
39
+ const allowlistFilteredNER = applyAllowlist(filteredNER, policy, originalText);
40
+ // Step 3: Combine all matches
41
+ const allMatches = [...allowlistFilteredRegex, ...allowlistFilteredNER];
42
+ // Step 4: Remove overlaps based on strategy
43
+ const resolved = removeOverlaps(allMatches, policy, resolverConfig);
44
+ // Step 5: Apply denylist patterns (force include)
45
+ const withDenylist = applyDenylist(resolved, policy, originalText);
46
+ // Step 6: Final deduplication
47
+ const deduplicated = deduplicateExact(withDenylist);
48
+ // Step 7: Sort by position
49
+ return sortSpansByPosition(deduplicated);
50
+ }
51
+ /**
52
+ * Filters matches by policy (enabled types and confidence thresholds)
53
+ */
54
+ function filterByPolicy(matches, policy) {
55
+ return matches.filter((match) => {
56
+ // Check if type is enabled
57
+ if (!policy.enabledTypes.has(match.type)) {
58
+ return false;
59
+ }
60
+ // Check confidence threshold
61
+ const threshold = policy.confidenceThresholds.get(match.type) ?? 0.5;
62
+ if (match.confidence < threshold) {
63
+ return false;
64
+ }
65
+ return true;
66
+ });
67
+ }
68
+ /**
69
+ * Filters out matches that are in the allowlist (known non-PII terms)
70
+ */
71
+ function applyAllowlist(matches, policy, _originalText) {
72
+ if (policy.allowlistTerms.size === 0) {
73
+ return matches;
74
+ }
75
+ return matches.filter((match) => {
76
+ const matchText = match.text.toLowerCase().trim();
77
+ return !policy.allowlistTerms.has(matchText);
78
+ });
79
+ }
80
+ /**
81
+ * Adds matches from denylist patterns (patterns that must always be PII)
82
+ */
83
+ function applyDenylist(matches, policy, originalText) {
84
+ if (policy.denylistPatterns.length === 0) {
85
+ return matches;
86
+ }
87
+ const denylistMatches = [];
88
+ for (const pattern of policy.denylistPatterns) {
89
+ const globalPattern = pattern.global
90
+ ? pattern
91
+ : new RegExp(pattern.source, pattern.flags + 'g');
92
+ for (const match of originalText.matchAll(globalPattern)) {
93
+ if (match.index === undefined)
94
+ continue;
95
+ // Check if this is already covered by existing matches
96
+ const alreadyCovered = matches.some((existing) => existing.start <= match.index &&
97
+ existing.end >= match.index + match[0].length);
98
+ if (!alreadyCovered) {
99
+ denylistMatches.push({
100
+ type: PIIType.EMAIL, // Default type for denylist; could be configurable
101
+ start: match.index,
102
+ end: match.index + match[0].length,
103
+ confidence: 1.0,
104
+ source: DetectionSource.REGEX,
105
+ text: match[0],
106
+ });
107
+ }
108
+ }
109
+ }
110
+ return [...matches, ...denylistMatches];
111
+ }
112
+ /**
113
+ * Removes overlapping spans based on resolution strategy
114
+ */
115
+ function removeOverlaps(matches, policy, config) {
116
+ if (matches.length <= 1) {
117
+ return matches;
118
+ }
119
+ // Sort by start position
120
+ const sorted = sortSpansByPosition(matches);
121
+ const result = [];
122
+ for (const match of sorted) {
123
+ // Find overlapping matches in result
124
+ const overlappingIdx = result.findIndex((existing) => spansOverlap(match, existing));
125
+ if (overlappingIdx === -1) {
126
+ // No overlap, add directly
127
+ result.push(match);
128
+ }
129
+ else {
130
+ // Has overlap, resolve
131
+ const existing = result[overlappingIdx];
132
+ const winner = resolveOverlap(existing, match, policy, config);
133
+ if (winner === match) {
134
+ // New match wins, replace existing
135
+ result[overlappingIdx] = match;
136
+ }
137
+ // Otherwise keep existing (do nothing)
138
+ }
139
+ }
140
+ return result;
141
+ }
142
+ /**
143
+ * Resolves overlap between two spans
144
+ * Returns the winner
145
+ */
146
+ function resolveOverlap(a, b, policy, config) {
147
+ // Rule 1: Regex always beats NER if configured
148
+ if (config.regexPriority) {
149
+ if (a.source === DetectionSource.REGEX && b.source !== DetectionSource.REGEX) {
150
+ return a;
151
+ }
152
+ if (b.source === DetectionSource.REGEX && a.source !== DetectionSource.REGEX) {
153
+ return b;
154
+ }
155
+ }
156
+ // Rule 2: Apply overlap strategy
157
+ switch (config.overlapStrategy) {
158
+ case OverlapStrategy.LONGER_SPAN: {
159
+ const lenA = spanLength(a);
160
+ const lenB = spanLength(b);
161
+ if (lenA !== lenB) {
162
+ return lenA > lenB ? a : b;
163
+ }
164
+ break;
165
+ }
166
+ case OverlapStrategy.HIGHER_CONFIDENCE: {
167
+ if (a.confidence !== b.confidence) {
168
+ return a.confidence > b.confidence ? a : b;
169
+ }
170
+ break;
171
+ }
172
+ case OverlapStrategy.TYPE_PRIORITY: {
173
+ const priorityA = getTypePriority(a.type, policy);
174
+ const priorityB = getTypePriority(b.type, policy);
175
+ if (priorityA !== priorityB) {
176
+ return priorityA > priorityB ? a : b;
177
+ }
178
+ break;
179
+ }
180
+ case OverlapStrategy.REGEX_PRIORITY:
181
+ default:
182
+ // Already handled above
183
+ break;
184
+ }
185
+ // Tiebreakers: longer span > higher confidence > type priority
186
+ const lenA = spanLength(a);
187
+ const lenB = spanLength(b);
188
+ if (lenA !== lenB) {
189
+ return lenA > lenB ? a : b;
190
+ }
191
+ if (a.confidence !== b.confidence) {
192
+ return a.confidence > b.confidence ? a : b;
193
+ }
194
+ const priorityA = getTypePriority(a.type, policy);
195
+ const priorityB = getTypePriority(b.type, policy);
196
+ if (priorityA !== priorityB) {
197
+ return priorityA > priorityB ? a : b;
198
+ }
199
+ // Final tiebreaker: keep first one
200
+ return a;
201
+ }
202
+ /**
203
+ * Gets type priority from policy (higher = more important)
204
+ */
205
+ function getTypePriority(type, policy) {
206
+ const priorityList = policy.typePriority.length > 0 ? policy.typePriority : [...DEFAULT_TYPE_PRIORITY];
207
+ const index = priorityList.indexOf(type);
208
+ return index >= 0 ? index : -1;
209
+ }
210
+ /**
211
+ * Removes exact duplicate spans
212
+ */
213
+ function deduplicateExact(matches) {
214
+ const seen = new Set();
215
+ const result = [];
216
+ for (const match of matches) {
217
+ const key = `${match.start}:${match.end}:${match.type}`;
218
+ if (!seen.has(key)) {
219
+ seen.add(key);
220
+ result.push(match);
221
+ }
222
+ }
223
+ return result;
224
+ }
225
+ /**
226
+ * Creates protected spans from regex matches
227
+ * Used to mask regex matches from NER to avoid double-detection
228
+ */
229
+ export function createProtectedSpans(regexMatches) {
230
+ return regexMatches.map(({ start, end }) => ({ start, end }));
231
+ }
232
+ /**
233
+ * Checks if a span overlaps with any protected span
234
+ */
235
+ export function isInProtectedSpan(span, protectedSpans) {
236
+ return protectedSpans.some((protected_) => spansOverlap(span, protected_));
237
+ }
238
+ //# sourceMappingURL=resolver.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"resolver.js","sourceRoot":"","sources":["../../src/pipeline/resolver.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EAEP,eAAe,EAEf,qBAAqB,GACtB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAEpF;;GAEG;AACH,MAAM,CAAN,IAAY,eASX;AATD,WAAY,eAAe;IACzB,wCAAwC;IACxC,oDAAiC,CAAA;IACjC,uBAAuB;IACvB,8CAA2B,CAAA;IAC3B,6BAA6B;IAC7B,0DAAuC,CAAA;IACvC,oCAAoC;IACpC,kDAA+B,CAAA;AACjC,CAAC,EATW,eAAe,KAAf,eAAe,QAS1B;AAcD;;GAEG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,eAAe,EAAE,eAAe,CAAC,cAAc;IAC/C,aAAa,EAAE,IAAI;IACnB,aAAa,EAAE,GAAG;CACnB,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,eAAe,CAC7B,YAAyB,EACzB,UAAuB,EACvB,MAA2B,EAC3B,YAAoB,EACpB,SAAkC,EAAE;IAEpC,MAAM,cAAc,GAAG,EAAE,GAAG,uBAAuB,EAAE,GAAG,MAAM,EAAE,CAAC;IAEjE,4DAA4D;IAC5D,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC;IAC3D,MAAM,WAAW,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,CAAC,CAAC;IAEvD,oCAAoC;IACpC,MAAM,sBAAsB,GAAG,cAAc,CAAC,aAAa,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IACnF,MAAM,oBAAoB,GAAG,cAAc,CAAC,WAAW,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAE/E,8BAA8B;IAC9B,MAAM,UAAU,GAAG,CAAC,GAAG,sBAAsB,EAAE,GAAG,oBAAoB,CAAC,CAAC;IAExE,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,cAAc,CAAC,UAAU,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;IAEpE,kDAAkD;IAClD,MAAM,YAAY,GAAG,aAAa,CAAC,QAAQ,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;IAEnE,8BAA8B;IAC9B,MAAM,YAAY,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAEpD,2BAA2B;IAC3B,OAAO,mBAAmB,CAAC,YAAY,CAAC,CAAC;AAC3C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,OAAoB,EAAE,MAA2B;IACvE,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,2BAA2B;QAC3B,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACzC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,6BAA6B;QAC7B,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;QACrE,IAAI,KAAK,CAAC,UAAU,GAAG,SAAS,EAAE,CAAC;YACjC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAoB,EACpB,MAA2B,EAC3B,aAAqB;IAErB,IAAI,MAAM,CAAC,cAAc,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QACrC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAC9B,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;QAClD,OAAO,CAAC,MAAM,CAAC,cAAc,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CACpB,OAAoB,EACpB,MAA2B,EAC3B,YAAoB;IAEpB,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzC,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,MAAM,eAAe,GAAgB,EAAE,CAAC;IAExC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM;YAClC,CAAC,CAAC,OAAO;YACT,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,OAAO,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;QAEpD,KAAK,MAAM,KAAK,IAAI,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;YACzD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS;gBAAE,SAAS;YAExC,uDAAuD;YACvD,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CACjC,CAAC,QAAQ,EAAE,EAAE,CACX,QAAQ,CAAC,KAAK,IAAI,KAAK,CAAC,KAAM;gBAC9B,QAAQ,CAAC,GAAG,IAAI,KAAK,CAAC,KAAM,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CACjD,CAAC;YAEF,IAAI,CAAC,cAAc,EAAE,CAAC;gBACpB,eAAe,CAAC,IAAI,CAAC;oBACnB,IAAI,EAAE,OAAO,CAAC,KAAK,EAAE,mDAAmD;oBACxE,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM;oBAClC,UAAU,EAAE,GAAG;oBACf,MAAM,EAAE,eAAe,CAAC,KAAK;oBAC7B,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;iBACf,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,CAAC,GAAG,OAAO,EAAE,GAAG,eAAe,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CACrB,OAAoB,EACpB,MAA2B,EAC3B,MAAsB;IAEtB,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAC5C,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,qCAAqC;QACrC,MAAM,cAAc,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,YAAY,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;QAErF,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;YAC1B,2BAA2B;YAC3B,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;aAAM,CAAC;YACN,uBAAuB;YACvB,MAAM,QAAQ,GAAG,MAAM,CAAC,cAAc,CAAE,CAAC;YACzC,MAAM,MAAM,GAAG,cAAc,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAE/D,IAAI,MAAM,KAAK,KAAK,EAAE,CAAC;gBACrB,mCAAmC;gBACnC,MAAM,CAAC,cAAc,CAAC,GAAG,KAAK,CAAC;YACjC,CAAC;YACD,uCAAuC;QACzC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,cAAc,CACrB,CAAY,EACZ,CAAY,EACZ,MAA2B,EAC3B,MAAsB;IAEtB,+CAA+C;IAC/C,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,EAAE,CAAC;YAC7E,OAAO,CAAC,CAAC;QACX,CAAC;QACD,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,IAAI,CAAC,CAAC,MAAM,KAAK,eAAe,CAAC,KAAK,EAAE,CAAC;YAC7E,OAAO,CAAC,CAAC;QACX,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,QAAQ,MAAM,CAAC,eAAe,EAAE,CAAC;QAC/B,KAAK,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;YACjC,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAC3B,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;gBAClB,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,iBAAiB,CAAC,CAAC,CAAC;YACvC,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;gBAClC,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,aAAa,CAAC,CAAC,CAAC;YACnC,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAClD,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;YAClD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;gBAC5B,OAAO,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACvC,CAAC;YACD,MAAM;QACR,CAAC;QAED,KAAK,eAAe,CAAC,cAAc,CAAC;QACpC;YACE,wBAAwB;YACxB,MAAM;IACV,CAAC;IAED,+DAA+D;IAC/D,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;IAC3B,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7B,CAAC;IAED,IAAI,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC;QAClC,OAAO,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC7C,CAAC;IAED,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAClD,MAAM,SAAS,GAAG,eAAe,CAAC,CAAC,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAClD,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;QAC5B,OAAO,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,mCAAmC;IACnC,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,IAAa,EAAE,MAA2B;IACjE,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,GAAG,qBAAqB,CAAC,CAAC;IACvG,MAAM,KAAK,GAAG,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AACjC,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAoB;IAC5C,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;QACxD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACnB,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACd,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAClC,YAAyB;IAEzB,OAAO,YAAY,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAoC,EACpC,cAAqD;IAErD,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,UAAU,CAAC,CAAC,CAAC;AAC7E,CAAC"}
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Replacement Tagger
3
+ * Replaces PII spans with placeholder tags and builds the PII map
4
+ */
5
+ import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy } from '../types/index.js';
6
+ /**
7
+ * PII Map entry (before encryption)
8
+ */
9
+ export interface PIIMapEntry {
10
+ /** PII type */
11
+ type: PIIType;
12
+ /** Entity ID */
13
+ id: number;
14
+ /** Original text */
15
+ original: string;
16
+ }
17
+ /**
18
+ * Raw PII Map (before encryption)
19
+ */
20
+ export type RawPIIMap = Map<string, string>;
21
+ /**
22
+ * Tagging result
23
+ */
24
+ export interface TaggingResult {
25
+ /** Anonymized text with placeholder tags */
26
+ anonymizedText: string;
27
+ /** List of detected entities with assigned IDs */
28
+ entities: DetectedEntity[];
29
+ /** Raw PII map (type_id -> original) */
30
+ piiMap: RawPIIMap;
31
+ }
32
+ /**
33
+ * Generates a PII placeholder tag
34
+ * Format: <PII type="TYPE" id="N"/>
35
+ */
36
+ export declare function generateTag(type: PIIType, id: number): string;
37
+ /**
38
+ * Parses a PII tag to extract type and id
39
+ * Returns null if not a valid tag
40
+ */
41
+ export declare function parseTag(tag: string): {
42
+ type: PIIType;
43
+ id: number;
44
+ } | null;
45
+ /**
46
+ * Creates a key for the PII map
47
+ */
48
+ export declare function createPIIMapKey(type: PIIType, id: number): string;
49
+ /**
50
+ * Tags PII spans in text and builds the PII map
51
+ */
52
+ export declare function tagEntities(text: string, matches: SpanMatch[], policy: AnonymizationPolicy): TaggingResult;
53
+ /**
54
+ * Validates that a tag is well-formed
55
+ */
56
+ export declare function isValidTag(tag: string): boolean;
57
+ /**
58
+ * Extracts all PII tags from anonymized text
59
+ */
60
+ export declare function extractTags(anonymizedText: string): Array<{
61
+ type: PIIType;
62
+ id: number;
63
+ position: number;
64
+ }>;
65
+ /**
66
+ * Counts entities by type
67
+ */
68
+ export declare function countEntitiesByType(entities: DetectedEntity[]): Record<PIIType, number>;
69
+ /**
70
+ * Rehydrates anonymized text using the PII map
71
+ * (For testing/debugging only - not part of the anonymization pipeline)
72
+ */
73
+ export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap): string;
74
+ //# sourceMappingURL=tagger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,EAEd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe;IACf,IAAI,EAAE,OAAO,CAAC;IACd,gBAAgB;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,wCAAwC;IACxC,MAAM,EAAE,SAAS,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAE7D;AAED;;;GAGG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,GAAG,IAAI,CAoB1E;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,SAAS,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,aAAa,CAyEf;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,cAAc,EAAE,MAAM,GAAG,KAAK,CAAC;IAAE,IAAI,EAAE,OAAO,CAAC;IAAC,EAAE,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,CAAA;CAAE,CAAC,CAoB1G;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAcvF;AAED;;;GAGG;AACH,wBAAgB,SAAS,CAAC,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,SAAS,GAAG,MAAM,CAkB3E"}
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Replacement Tagger
3
+ * Replaces PII spans with placeholder tags and builds the PII map
4
+ */
5
+ import { PIIType, } from '../types/index.js';
6
+ import { sortSpansByPosition } from '../utils/offsets.js';
7
+ /**
8
+ * Generates a PII placeholder tag
9
+ * Format: <PII type="TYPE" id="N"/>
10
+ */
11
+ export function generateTag(type, id) {
12
+ return `<PII type="${type}" id="${id}"/>`;
13
+ }
14
+ /**
15
+ * Parses a PII tag to extract type and id
16
+ * Returns null if not a valid tag
17
+ */
18
+ export function parseTag(tag) {
19
+ const match = tag.match(/^<PII\s+type="([A-Z_]+)"\s+id="(\d+)"\s*\/>$/);
20
+ if (match === null) {
21
+ return null;
22
+ }
23
+ const [, typeStr, idStr] = match;
24
+ if (typeStr === undefined || idStr === undefined) {
25
+ return null;
26
+ }
27
+ const type = typeStr;
28
+ const id = parseInt(idStr, 10);
29
+ // Validate type is a valid PIIType
30
+ if (!Object.values(PIIType).includes(type)) {
31
+ return null;
32
+ }
33
+ return { type, id };
34
+ }
35
+ /**
36
+ * Creates a key for the PII map
37
+ */
38
+ export function createPIIMapKey(type, id) {
39
+ return `${type}_${id}`;
40
+ }
41
+ /**
42
+ * Tags PII spans in text and builds the PII map
43
+ */
44
+ export function tagEntities(text, matches, policy) {
45
+ if (matches.length === 0) {
46
+ return {
47
+ anonymizedText: text,
48
+ entities: [],
49
+ piiMap: new Map(),
50
+ };
51
+ }
52
+ // Sort by start position ascending for ID assignment
53
+ const sortedAscending = sortSpansByPosition(matches);
54
+ // Assign IDs
55
+ const entitiesWithIds = [];
56
+ let nextId = 1;
57
+ // Track seen text for ID reuse (if enabled)
58
+ const seenText = new Map(); // text -> id
59
+ for (const match of sortedAscending) {
60
+ let id;
61
+ if (policy.reuseIdsForRepeatedPII) {
62
+ const key = `${match.type}:${match.text}`;
63
+ const existingId = seenText.get(key);
64
+ if (existingId !== undefined) {
65
+ id = existingId;
66
+ }
67
+ else {
68
+ id = nextId++;
69
+ seenText.set(key, id);
70
+ }
71
+ }
72
+ else {
73
+ id = nextId++;
74
+ }
75
+ entitiesWithIds.push({ ...match, id });
76
+ }
77
+ // Build PII map
78
+ const piiMap = new Map();
79
+ for (const entity of entitiesWithIds) {
80
+ const key = createPIIMapKey(entity.type, entity.id);
81
+ piiMap.set(key, entity.text);
82
+ }
83
+ // Sort by start position descending for replacement
84
+ // (replacing from end to start preserves earlier offsets)
85
+ const sortedDescending = [...entitiesWithIds].sort((a, b) => b.start - a.start);
86
+ // Perform replacements
87
+ let anonymizedText = text;
88
+ for (const entity of sortedDescending) {
89
+ const tag = generateTag(entity.type, entity.id);
90
+ anonymizedText =
91
+ anonymizedText.slice(0, entity.start) + tag + anonymizedText.slice(entity.end);
92
+ }
93
+ // Build final entities list (sorted by position)
94
+ const entities = entitiesWithIds.map((e) => ({
95
+ type: e.type,
96
+ id: e.id,
97
+ start: e.start,
98
+ end: e.end,
99
+ confidence: e.confidence,
100
+ source: e.source,
101
+ original: e.text,
102
+ }));
103
+ return {
104
+ anonymizedText,
105
+ entities: sortSpansByPosition(entities),
106
+ piiMap,
107
+ };
108
+ }
109
+ /**
110
+ * Validates that a tag is well-formed
111
+ */
112
+ export function isValidTag(tag) {
113
+ return parseTag(tag) !== null;
114
+ }
115
+ /**
116
+ * Extracts all PII tags from anonymized text
117
+ */
118
+ export function extractTags(anonymizedText) {
119
+ const tags = [];
120
+ const tagPattern = /<PII\s+type="([A-Z_]+)"\s+id="(\d+)"\s*\/>/g;
121
+ let match;
122
+ while ((match = tagPattern.exec(anonymizedText)) !== null) {
123
+ const typeStr = match[1];
124
+ const idStr = match[2];
125
+ if (typeStr !== undefined && idStr !== undefined) {
126
+ const type = typeStr;
127
+ const id = parseInt(idStr, 10);
128
+ if (Object.values(PIIType).includes(type)) {
129
+ tags.push({ type, id, position: match.index });
130
+ }
131
+ }
132
+ }
133
+ return tags;
134
+ }
135
+ /**
136
+ * Counts entities by type
137
+ */
138
+ export function countEntitiesByType(entities) {
139
+ const counts = {};
140
+ // Initialize all types to 0
141
+ for (const type of Object.values(PIIType)) {
142
+ counts[type] = 0;
143
+ }
144
+ // Count entities
145
+ for (const entity of entities) {
146
+ counts[entity.type] = (counts[entity.type] ?? 0) + 1;
147
+ }
148
+ return counts;
149
+ }
150
+ /**
151
+ * Rehydrates anonymized text using the PII map
152
+ * (For testing/debugging only - not part of the anonymization pipeline)
153
+ */
154
+ export function rehydrate(anonymizedText, piiMap) {
155
+ let result = anonymizedText;
156
+ const tags = extractTags(anonymizedText);
157
+ // Sort by position descending for replacement
158
+ tags.sort((a, b) => b.position - a.position);
159
+ for (const { type, id, position } of tags) {
160
+ const key = createPIIMapKey(type, id);
161
+ const original = piiMap.get(key);
162
+ if (original !== undefined) {
163
+ const tag = generateTag(type, id);
164
+ result = result.slice(0, position) + original + result.slice(position + tag.length);
165
+ }
166
+ }
167
+ return result;
168
+ }
169
+ //# sourceMappingURL=tagger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tagger.js","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,GAKR,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,mBAAmB,EAAiC,MAAM,qBAAqB,CAAC;AA+BzF;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa,EAAE,EAAU;IACnD,OAAO,cAAc,IAAI,SAAS,EAAE,KAAK,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACxE,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,KAAK,CAAC;IACjC,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACjD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,OAAkB,CAAC;IAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAE/B,mCAAmC;IACnC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAa,EAAE,EAAU;IACvD,OAAO,GAAG,IAAI,IAAI,EAAE,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,OAAoB,EACpB,MAA2B;IAE3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,cAAc,EAAE,IAAI;YACpB,QAAQ,EAAE,EAAE;YACZ,MAAM,EAAE,IAAI,GAAG,EAAE;SAClB,CAAC;IACJ,CAAC;IAED,qDAAqD;IACrD,MAAM,eAAe,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAErD,aAAa;IACb,MAAM,eAAe,GAAsC,EAAE,CAAC;IAC9D,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,aAAa;IAEzD,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,EAAU,CAAC;QAEf,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;YAClC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;gBAC7B,EAAE,GAAG,UAAU,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,EAAE,GAAG,MAAM,EAAE,CAAC;gBACd,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,EAAE,GAAG,MAAM,EAAE,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAc,IAAI,GAAG,EAAE,CAAC;IACpC,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,eAAe,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,oDAAoD;IACpD,0DAA0D;IAC1D,MAAM,gBAAgB,GAAG,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEhF,uBAAuB;IACvB,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAChD,cAAc;YACZ,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACnF,CAAC;IAED,iDAAiD;IACjD,MAAM,QAAQ,GAAqB,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7D,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,IAAI;KACjB,CAAC,CAAC,CAAC;IAEJ,OAAO;QACL,cAAc;QACd,QAAQ,EAAE,mBAAmB,CAAC,QAAQ,CAAqB;QAC3D,MAAM;KACP,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,OAAO,QAAQ,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,cAAsB;IAChD,MAAM,IAAI,GAA2D,EAAE,CAAC;IACxE,MAAM,UAAU,GAAG,6CAA6C,CAAC;IAEjE,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACjD,MAAM,IAAI,GAAG,OAAkB,CAAC;YAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAE/B,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAA0B;IAC5D,MAAM,MAAM,GAA4B,EAA6B,CAAC;IAEtE,4BAA4B;IAC5B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,iBAAiB;IACjB,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,cAAsB,EAAE,MAAiB;IACjE,IAAI,MAAM,GAAG,cAAc,CAAC;IAC5B,MAAM,IAAI,GAAG,WAAW,CAAC,cAAc,CAAC,CAAC;IAEzC,8CAA8C;IAC9C,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAE7C,KAAK,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC;QAC1C,MAAM,GAAG,GAAG,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAClC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Output Validator
3
+ * Validates anonymized output and performs leak scan
4
+ */
5
+ import { PIIType, DetectedEntity, AnonymizationPolicy } from '../types/index.js';
6
+ /**
7
+ * Validation result
8
+ */
9
+ export interface ValidationResult {
10
+ /** Whether validation passed */
11
+ valid: boolean;
12
+ /** List of validation errors */
13
+ errors: ValidationError[];
14
+ /** Whether leak scan passed (if performed) */
15
+ leakScanPassed?: boolean;
16
+ /** Potential leaks found by leak scan */
17
+ potentialLeaks?: LeakScanMatch[];
18
+ }
19
+ /**
20
+ * Validation error
21
+ */
22
+ export interface ValidationError {
23
+ /** Error code */
24
+ code: ValidationErrorCode;
25
+ /** Human-readable message */
26
+ message: string;
27
+ /** Additional details */
28
+ details?: Record<string, unknown>;
29
+ }
30
+ /**
31
+ * Validation error codes
32
+ */
33
+ export declare enum ValidationErrorCode {
34
+ OVERLAPPING_ENTITIES = "OVERLAPPING_ENTITIES",
35
+ DUPLICATE_IDS = "DUPLICATE_IDS",
36
+ MALFORMED_TAG = "MALFORMED_TAG",
37
+ ID_MISMATCH = "ID_MISMATCH",
38
+ MISSING_IN_MAP = "MISSING_IN_MAP",
39
+ POTENTIAL_PII_LEAK = "POTENTIAL_PII_LEAK"
40
+ }
41
+ /**
42
+ * Leak scan match
43
+ */
44
+ export interface LeakScanMatch {
45
+ /** Type of potential leak */
46
+ type: PIIType;
47
+ /** Matched text */
48
+ text: string;
49
+ /** Position in anonymized text */
50
+ position: number;
51
+ /** Pattern that matched */
52
+ pattern: string;
53
+ }
54
+ /**
55
+ * Validates anonymization output
56
+ */
57
+ export declare function validateOutput(anonymizedText: string, entities: DetectedEntity[], piiMapKeys: string[], policy: AnonymizationPolicy): ValidationResult;
58
+ /**
59
+ * Validates that no overlaps exist (fast check)
60
+ */
61
+ export declare function hasNoOverlaps(entities: Array<{
62
+ start: number;
63
+ end: number;
64
+ }>): boolean;
65
+ //# sourceMappingURL=validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/pipeline/validator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAIjF;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,gCAAgC;IAChC,KAAK,EAAE,OAAO,CAAC;IACf,gCAAgC;IAChC,MAAM,EAAE,eAAe,EAAE,CAAC;IAC1B,8CAA8C;IAC9C,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,yCAAyC;IACzC,cAAc,CAAC,EAAE,aAAa,EAAE,CAAC;CAClC;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,iBAAiB;IACjB,IAAI,EAAE,mBAAmB,CAAC;IAC1B,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,yBAAyB;IACzB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,oBAAY,mBAAmB;IAC7B,oBAAoB,yBAAyB;IAC7C,aAAa,kBAAkB;IAC/B,aAAa,kBAAkB;IAC/B,WAAW,gBAAgB;IAC3B,cAAc,mBAAmB;IACjC,kBAAkB,uBAAuB;CAC1C;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,6BAA6B;IAC7B,IAAI,EAAE,OAAO,CAAC;IACd,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,kCAAkC;IAClC,QAAQ,EAAE,MAAM,CAAC;IACjB,2BAA2B;IAC3B,OAAO,EAAE,MAAM,CAAC;CACjB;AAkCD;;GAEG;AACH,wBAAgB,cAAc,CAC5B,cAAc,EAAE,MAAM,EACtB,QAAQ,EAAE,cAAc,EAAE,EAC1B,UAAU,EAAE,MAAM,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,gBAAgB,CA+ClB;AAkMD;;GAEG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,OAAO,CAYtF"}