@elanlanguages/bridge-anonymization 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +73 -1
  2. package/dist/crypto/pii-map-crypto.d.ts.map +1 -1
  3. package/dist/crypto/pii-map-crypto.js +8 -8
  4. package/dist/crypto/pii-map-crypto.js.map +1 -1
  5. package/dist/index.d.ts +25 -20
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +103 -52
  8. package/dist/index.js.map +1 -1
  9. package/dist/ner/model-manager.d.ts.map +1 -1
  10. package/dist/ner/model-manager.js +10 -8
  11. package/dist/ner/model-manager.js.map +1 -1
  12. package/dist/ner/ner-model.d.ts.map +1 -1
  13. package/dist/ner/ner-model.js +9 -9
  14. package/dist/ner/ner-model.js.map +1 -1
  15. package/dist/ner/onnx-runtime.d.ts +3 -3
  16. package/dist/ner/onnx-runtime.d.ts.map +1 -1
  17. package/dist/ner/onnx-runtime.js +1 -1
  18. package/dist/ner/onnx-runtime.js.map +1 -1
  19. package/dist/ner/tokenizer.js +3 -3
  20. package/dist/ner/tokenizer.js.map +1 -1
  21. package/dist/pipeline/index.d.ts +7 -4
  22. package/dist/pipeline/index.d.ts.map +1 -1
  23. package/dist/pipeline/index.js +7 -4
  24. package/dist/pipeline/index.js.map +1 -1
  25. package/dist/pipeline/resolver.d.ts.map +1 -1
  26. package/dist/pipeline/resolver.js +3 -2
  27. package/dist/pipeline/resolver.js.map +1 -1
  28. package/dist/pipeline/semantic-data-loader.d.ts +157 -0
  29. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  30. package/dist/pipeline/semantic-data-loader.js +662 -0
  31. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  32. package/dist/pipeline/semantic-enricher.d.ts +102 -0
  33. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  34. package/dist/pipeline/semantic-enricher.js +268 -0
  35. package/dist/pipeline/semantic-enricher.js.map +1 -0
  36. package/dist/pipeline/tagger.d.ts +52 -12
  37. package/dist/pipeline/tagger.d.ts.map +1 -1
  38. package/dist/pipeline/tagger.js +226 -21
  39. package/dist/pipeline/tagger.js.map +1 -1
  40. package/dist/pipeline/title-extractor.d.ts +79 -0
  41. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  42. package/dist/pipeline/title-extractor.js +801 -0
  43. package/dist/pipeline/title-extractor.js.map +1 -0
  44. package/dist/types/index.d.ts +66 -3
  45. package/dist/types/index.d.ts.map +1 -1
  46. package/dist/types/index.js +14 -3
  47. package/dist/types/index.js.map +1 -1
  48. package/dist/utils/index.d.ts +3 -3
  49. package/dist/utils/index.js +3 -3
  50. package/package.json +7 -5
@@ -2,25 +2,43 @@
2
2
  * Replacement Tagger
3
3
  * Replaces PII spans with placeholder tags and builds the PII map
4
4
  */
5
- import { PIIType, } from '../types/index.js';
6
- import { sortSpansByPosition } from '../utils/offsets.js';
5
+ import { PIIType, } from "../types/index.js";
6
+ import { sortSpansByPosition } from "../utils/offsets.js";
7
7
  /**
8
8
  * Generates a PII placeholder tag
9
- * Format: <PII type="TYPE" id="N"/>
9
+ * Format: <PII type="TYPE" id="N"/> or <PII type="TYPE" gender="X" id="N"/> etc.
10
+ *
11
+ * Semantic attributes (gender, scope) are included when provided and not 'unknown'
10
12
  */
11
- export function generateTag(type, id) {
12
- return `<PII type="${type}" id="${id}"/>`;
13
+ export function generateTag(type, id, semantic) {
14
+ let attrs = `type="${type}"`;
15
+ // Add semantic attributes if present and meaningful
16
+ if (semantic?.gender && semantic.gender !== "unknown") {
17
+ attrs += ` gender="${semantic.gender}"`;
18
+ }
19
+ if (semantic?.scope && semantic.scope !== "unknown") {
20
+ attrs += ` scope="${semantic.scope}"`;
21
+ }
22
+ attrs += ` id="${id}"`;
23
+ return `<PII ${attrs}/>`;
13
24
  }
14
25
  /**
15
- * Parses a PII tag to extract type and id
26
+ * Parses a PII tag to extract type, id, and semantic attributes
16
27
  * Returns null if not a valid tag
28
+ *
29
+ * Supports formats:
30
+ * - <PII type="TYPE" id="N"/>
31
+ * - <PII type="TYPE" gender="X" id="N"/>
32
+ * - <PII type="TYPE" scope="X" id="N"/>
33
+ * - <PII type="TYPE" gender="X" scope="Y" id="N"/>
17
34
  */
18
35
  export function parseTag(tag) {
19
- const match = tag.match(/^<PII\s+type="([A-Z_]+)"\s+id="(\d+)"\s*\/>$/);
36
+ // More flexible regex that handles optional gender/scope attributes
37
+ const match = tag.match(/^<PII\s+type="([A-Z_]+)"(?:\s+gender="(\w+)")?(?:\s+scope="(\w+)")?\s+id="(\d+)"\s*\/>$/);
20
38
  if (match === null) {
21
39
  return null;
22
40
  }
23
- const [, typeStr, idStr] = match;
41
+ const [, typeStr, genderStr, scopeStr, idStr] = match;
24
42
  if (typeStr === undefined || idStr === undefined) {
25
43
  return null;
26
44
  }
@@ -30,7 +48,23 @@ export function parseTag(tag) {
30
48
  if (!Object.values(PIIType).includes(type)) {
31
49
  return null;
32
50
  }
33
- return { type, id };
51
+ // Build semantic attributes if present
52
+ let semantic;
53
+ if ((genderStr !== undefined && genderStr !== "") ||
54
+ (scopeStr !== undefined && scopeStr !== "")) {
55
+ semantic = {};
56
+ if (genderStr !== undefined &&
57
+ genderStr !== "" &&
58
+ ["male", "female", "neutral", "unknown"].includes(genderStr)) {
59
+ semantic.gender = genderStr;
60
+ }
61
+ if (scopeStr !== undefined &&
62
+ scopeStr !== "" &&
63
+ ["city", "country", "region", "unknown"].includes(scopeStr)) {
64
+ semantic.scope = scopeStr;
65
+ }
66
+ }
67
+ return { type, id, semantic };
34
68
  }
35
69
  /**
36
70
  * Creates a key for the PII map
@@ -86,9 +120,11 @@ export function tagEntities(text, matches, policy) {
86
120
  // Perform replacements
87
121
  let anonymizedText = text;
88
122
  for (const entity of sortedDescending) {
89
- const tag = generateTag(entity.type, entity.id);
123
+ const tag = generateTag(entity.type, entity.id, entity.semantic);
90
124
  anonymizedText =
91
- anonymizedText.slice(0, entity.start) + tag + anonymizedText.slice(entity.end);
125
+ anonymizedText.slice(0, entity.start) +
126
+ tag +
127
+ anonymizedText.slice(entity.end);
92
128
  }
93
129
  // Build final entities list (sorted by position)
94
130
  const entities = entitiesWithIds.map((e) => ({
@@ -99,6 +135,7 @@ export function tagEntities(text, matches, policy) {
99
135
  confidence: e.confidence,
100
136
  source: e.source,
101
137
  original: e.text,
138
+ semantic: e.semantic,
102
139
  }));
103
140
  return {
104
141
  anonymizedText,
@@ -113,20 +150,176 @@ export function isValidTag(tag) {
113
150
  return parseTag(tag) !== null;
114
151
  }
115
152
  /**
116
- * Extracts all PII tags from anonymized text
153
+ * Quote characters that might appear after translation
154
+ * Includes: standard quotes, smart quotes, German quotes, French quotes, etc.
155
+ *
156
+ * Unicode references:
157
+ * - \u0022 (") Standard double quote
158
+ * - \u0027 (') Standard single quote
159
+ * - \u0060 (`) Backtick
160
+ * - \u00AB («) Left guillemet
161
+ * - \u00BB (») Right guillemet
162
+ * - \u2018 (') Left single curly quote
163
+ * - \u2019 (') Right single curly quote
164
+ * - \u201A (‚) Single low-9 quote
165
+ * - \u201C (") Left double curly quote
166
+ * - \u201D (") Right double curly quote
167
+ * - \u201E („) Double low-9 quote (German)
168
+ */
169
+ const QUOTE_CHARS = "[\"'`\u00AB\u00BB\u2018\u2019\u201A\u201C\u201D\u201E]";
170
+ /**
171
+ * Whitespace pattern including various unicode spaces
172
+ */
173
+ const FLEXIBLE_WS = `[\\s\\u00A0\\u2000-\\u200B]*`;
174
+ const FLEXIBLE_WS_REQUIRED = `[\\s\\u00A0\\u2000-\\u200B]+`;
175
+ /**
176
+ * Builds patterns for fuzzy PII tag matching
177
+ * Handles various translation artifacts and optional semantic attributes
178
+ */
179
+ function buildFuzzyTagPatterns() {
180
+ // Pattern for type attribute: type = "VALUE" (flexible spacing and quotes)
181
+ const typeAttr = `type${FLEXIBLE_WS}=${FLEXIBLE_WS}${QUOTE_CHARS}([A-Z_]+)${QUOTE_CHARS}`;
182
+ // Pattern for id attribute: id = "VALUE" (flexible spacing and quotes)
183
+ const idAttr = `id${FLEXIBLE_WS}=${FLEXIBLE_WS}${QUOTE_CHARS}(\\d+)${QUOTE_CHARS}`;
184
+ // Optional gender attribute
185
+ const genderAttr = `(?:${FLEXIBLE_WS}gender${FLEXIBLE_WS}=${FLEXIBLE_WS}${QUOTE_CHARS}(\\w+)${QUOTE_CHARS})?`;
186
+ // Optional scope attribute
187
+ const scopeAttr = `(?:${FLEXIBLE_WS}scope${FLEXIBLE_WS}=${FLEXIBLE_WS}${QUOTE_CHARS}(\\w+)${QUOTE_CHARS})?`;
188
+ // Self-closing tag endings: />, / >, >, etc.
189
+ const selfClosing = `${FLEXIBLE_WS}\\/?${FLEXIBLE_WS}>`;
190
+ return [
191
+ // type first with optional gender/scope: <PII type="X" gender="Y" scope="Z" id="N"/>
192
+ // Groups: type=1, gender=2, scope=3, id=4
193
+ new RegExp(`<${FLEXIBLE_WS}PII${FLEXIBLE_WS_REQUIRED}${typeAttr}${genderAttr}${scopeAttr}${FLEXIBLE_WS_REQUIRED}${idAttr}${selfClosing}`, "gi"),
194
+ // id first: <PII id="N" type="X"/>
195
+ // Groups: id=1, type=2
196
+ new RegExp(`<${FLEXIBLE_WS}PII${FLEXIBLE_WS_REQUIRED}${idAttr}${FLEXIBLE_WS_REQUIRED}${typeAttr}${selfClosing}`, "gi"),
197
+ ];
198
+ }
199
+ /**
200
+ * Extracts all PII tags from anonymized text using fuzzy matching
201
+ * Handles mangled tags that may occur after translation
202
+ *
203
+ * Translation can mangle tags by:
204
+ * - Changing quote types (" → " or „ or « etc.)
205
+ * - Adding/removing whitespace
206
+ * - Changing case (type → Type, PII → pii)
207
+ * - Reordering attributes (id before type)
208
+ * - Modifying self-closing syntax (/> → / > or >)
117
209
  */
118
210
  export function extractTags(anonymizedText) {
119
211
  const tags = [];
120
- const tagPattern = /<PII\s+type="([A-Z_]+)"\s+id="(\d+)"\s*\/>/g;
212
+ const patterns = buildFuzzyTagPatterns();
213
+ // Track positions we've already matched to avoid duplicates from overlapping patterns
214
+ const matchedPositions = new Set();
215
+ for (let patternIndex = 0; patternIndex < patterns.length; patternIndex++) {
216
+ const pattern = patterns[patternIndex];
217
+ if (pattern === undefined)
218
+ continue;
219
+ let match;
220
+ // Reset lastIndex for each pattern
221
+ pattern.lastIndex = 0;
222
+ while ((match = pattern.exec(anonymizedText)) !== null) {
223
+ if (matchedPositions.has(match.index)) {
224
+ continue; // Skip duplicates from overlapping patterns
225
+ }
226
+ // Extract type, id, and semantic attributes based on which pattern matched
227
+ // Pattern 0: type first with optional gender/scope (groups: type=1, gender=2, scope=3, id=4)
228
+ // Pattern 1: id first (groups: id=1, type=2)
229
+ let typeStr;
230
+ let idStr;
231
+ let genderStr;
232
+ let scopeStr;
233
+ if (patternIndex === 0) {
234
+ typeStr = match[1];
235
+ genderStr = match[2];
236
+ scopeStr = match[3];
237
+ idStr = match[4];
238
+ }
239
+ else {
240
+ idStr = match[1];
241
+ typeStr = match[2];
242
+ }
243
+ if (typeStr !== undefined && idStr !== undefined) {
244
+ const type = typeStr.toUpperCase();
245
+ const id = parseInt(idStr, 10);
246
+ if (Object.values(PIIType).includes(type)) {
247
+ // Build semantic attributes if present
248
+ let semantic;
249
+ if ((genderStr !== undefined && genderStr !== "") ||
250
+ (scopeStr !== undefined && scopeStr !== "")) {
251
+ semantic = {};
252
+ if (genderStr !== undefined &&
253
+ genderStr !== "" &&
254
+ ["male", "female", "neutral", "unknown"].includes(genderStr.toLowerCase())) {
255
+ semantic.gender =
256
+ genderStr.toLowerCase();
257
+ }
258
+ if (scopeStr !== undefined &&
259
+ scopeStr !== "" &&
260
+ ["city", "country", "region", "unknown"].includes(scopeStr.toLowerCase())) {
261
+ semantic.scope =
262
+ scopeStr.toLowerCase();
263
+ }
264
+ }
265
+ tags.push({
266
+ type,
267
+ id,
268
+ position: match.index,
269
+ matchedText: match[0],
270
+ semantic,
271
+ });
272
+ matchedPositions.add(match.index);
273
+ }
274
+ }
275
+ }
276
+ }
277
+ // Sort by position ascending
278
+ tags.sort((a, b) => a.position - b.position);
279
+ return tags;
280
+ }
281
+ /**
282
+ * Extracts tags using strict matching (original behavior)
283
+ * Useful when you know tags haven't been mangled
284
+ * Supports optional gender and scope attributes
285
+ */
286
+ export function extractTagsStrict(anonymizedText) {
287
+ const tags = [];
288
+ // Pattern matches: <PII type="X" [gender="Y"] [scope="Z"] id="N"/>
289
+ const tagPattern = /<PII\s+type="([A-Z_]+)"(?:\s+gender="(\w+)")?(?:\s+scope="(\w+)")?\s+id="(\d+)"\s*\/>/g;
121
290
  let match;
122
291
  while ((match = tagPattern.exec(anonymizedText)) !== null) {
123
292
  const typeStr = match[1];
124
- const idStr = match[2];
293
+ const genderStr = match[2];
294
+ const scopeStr = match[3];
295
+ const idStr = match[4];
125
296
  if (typeStr !== undefined && idStr !== undefined) {
126
297
  const type = typeStr;
127
298
  const id = parseInt(idStr, 10);
128
299
  if (Object.values(PIIType).includes(type)) {
129
- tags.push({ type, id, position: match.index });
300
+ // Build semantic attributes if present
301
+ let semantic;
302
+ if ((genderStr !== undefined && genderStr !== "") ||
303
+ (scopeStr !== undefined && scopeStr !== "")) {
304
+ semantic = {};
305
+ if (genderStr !== undefined &&
306
+ genderStr !== "" &&
307
+ ["male", "female", "neutral", "unknown"].includes(genderStr)) {
308
+ semantic.gender = genderStr;
309
+ }
310
+ if (scopeStr !== undefined &&
311
+ scopeStr !== "" &&
312
+ ["city", "country", "region", "unknown"].includes(scopeStr)) {
313
+ semantic.scope = scopeStr;
314
+ }
315
+ }
316
+ tags.push({
317
+ type,
318
+ id,
319
+ position: match.index,
320
+ matchedText: match[0],
321
+ semantic,
322
+ });
130
323
  }
131
324
  }
132
325
  }
@@ -149,19 +342,31 @@ export function countEntitiesByType(entities) {
149
342
  }
150
343
  /**
151
344
  * Rehydrates anonymized text using the PII map
152
- * (For testing/debugging only - not part of the anonymization pipeline)
345
+ * Uses fuzzy matching to handle tags that may have been mangled by translation
346
+ *
347
+ * @param anonymizedText - Text containing PII tags (possibly mangled)
348
+ * @param piiMap - Map of PII keys to original values
349
+ * @param strict - If true, use strict matching (original behavior). Default: false
350
+ * @returns Text with PII tags replaced by original values
153
351
  */
154
- export function rehydrate(anonymizedText, piiMap) {
352
+ export function rehydrate(anonymizedText, piiMap, strict = false) {
155
353
  let result = anonymizedText;
156
- const tags = extractTags(anonymizedText);
354
+ const tags = strict
355
+ ? extractTagsStrict(anonymizedText)
356
+ : extractTags(anonymizedText);
157
357
  // Sort by position descending for replacement
358
+ // (replacing from end to start preserves earlier offsets)
158
359
  tags.sort((a, b) => b.position - a.position);
159
- for (const { type, id, position } of tags) {
360
+ for (const { type, id, position, matchedText } of tags) {
160
361
  const key = createPIIMapKey(type, id);
161
362
  const original = piiMap.get(key);
162
363
  if (original !== undefined) {
163
- const tag = generateTag(type, id);
164
- result = result.slice(0, position) + original + result.slice(position + tag.length);
364
+ // Use the actual matched text length for replacement
365
+ // This handles mangled tags where the length differs from the canonical form
366
+ result =
367
+ result.slice(0, position) +
368
+ original +
369
+ result.slice(position + matchedText.length);
165
370
  }
166
371
  }
167
372
  return result;
@@ -1 +1 @@
1
- {"version":3,"file":"tagger.js","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,GAKR,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,mBAAmB,EAAiC,MAAM,qBAAqB,CAAC;AA+BzF;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,IAAa,EAAE,EAAU;IACnD,OAAO,cAAc,IAAI,SAAS,EAAE,KAAK,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACxE,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,GAAG,KAAK,CAAC;IACjC,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACjD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,OAAkB,CAAC;IAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAE/B,mCAAmC;IACnC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAa,EAAE,EAAU;IACvD,OAAO,GAAG,IAAI,IAAI,EAAE,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,OAAoB,EACpB,MAA2B;IAE3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,cAAc,EAAE,IAAI;YACpB,QAAQ,EAAE,EAAE;YACZ,MAAM,EAAE,IAAI,GAAG,EAAE;SAClB,CAAC;IACJ,CAAC;IAED,qDAAqD;IACrD,MAAM,eAAe,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAErD,aAAa;IACb,MAAM,eAAe,GAAsC,EAAE,CAAC;IAC9D,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,aAAa;IAEzD,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,EAAU,CAAC;QAEf,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;YAClC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;gBAC7B,EAAE,GAAG,UAAU,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,EAAE,GAAG,MAAM,EAAE,CAAC;gBACd,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,EAAE,GAAG,MAAM,EAAE,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAc,IAAI,GAAG,EAAE,CAAC;IACpC,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,eAAe,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,oDAAoD;IACpD,0DAA0D;IAC1D,MAAM,gBAAgB,GAAG,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAEhF,uBAAuB;IACvB,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QAChD,cAAc;YACZ,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACnF,CAAC;IAED,iDAAiD;IACjD,MAAM,QAAQ,GAAqB,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7D,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,IAAI;KACjB,CAAC,CAAC,CAAC;IAEJ,OAAO;QACL,cAAc;QACd,QAAQ,EAAE,mBAAmB,CAAC,QAAQ,CAAqB;QAC3D,MAAM;KACP,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,OAAO,QAAQ,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,cAAsB;IAChD,MAAM,IAAI,GAA2D,EAAE,CAAC;IACxE,MAAM,UAAU,GAAG,6CAA6C,CAAC;IAEjE,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACjD,MAAM,IAAI,GAAG,OAAkB,CAAC;YAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAE/B,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,IAAI,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CAAC,QAA0B;IAC5D,MAAM,MAAM,GAA4B,EAA6B,CAAC;IAEtE,4BAA4B;IAC5B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,iBAAiB;IACjB,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,SAAS,CAAC,cAAsB,EAAE,MAAiB;IACjE,IAAI,MAAM,GAAG,cAAc,CAAC;IAC5B,MAAM,IAAI,GAAG,WAAW,CAAC,cAAc,CAAC,CAAC;IAEzC,8CAA8C;IAC9C,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAE7C,KAAK,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC;QAC1C,MAAM,GAAG,GAAG,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;YAClC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC;QACtF,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"tagger.js","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,GAKR,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AA+B1D;;;;;GAKG;AACH,MAAM,UAAU,WAAW,CACzB,IAAa,EACb,EAAU,EACV,QAA6B;IAE7B,IAAI,KAAK,GAAG,SAAS,IAAI,GAAG,CAAC;IAE7B,oDAAoD;IACpD,IAAI,QAAQ,EAAE,MAAM,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QACtD,KAAK,IAAI,YAAY,QAAQ,CAAC,MAAM,GAAG,CAAC;IAC1C,CAAC;IACD,IAAI,QAAQ,EAAE,KAAK,IAAI,QAAQ,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QACpD,KAAK,IAAI,WAAW,QAAQ,CAAC,KAAK,GAAG,CAAC;IACxC,CAAC;IAED,KAAK,IAAI,QAAQ,EAAE,GAAG,CAAC;IAEvB,OAAO,QAAQ,KAAK,IAAI,CAAC;AAC3B,CAAC;AAWD;;;;;;;;;GASG;AACH,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,oEAAoE;IACpE,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CACrB,yFAAyF,CAC1F,CAAC;IAEF,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;QACnB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,CAAC,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,CAAC,GAAG,KAAK,CAAC;IACtD,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACjD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,MAAM,IAAI,GAAG,OAAkB,CAAC;IAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAE/B,mCAAmC;IACnC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QAC3C,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uCAAuC;IACvC,IAAI,QAAwC,CAAC;IAC7C,IACE,CAAC,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,EAAE,CAAC;QAC7C,CAAC,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,EAAE,CAAC,EAC3C,CAAC;QACD,QAAQ,GAAG,EAAE,CAAC;QACd,IACE,SAAS,KAAK,SAAS;YACvB,SAAS,KAAK,EAAE;YAChB,CAAC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,EAC5D,CAAC;YACD,QAAQ,CAAC,MAAM,GAAG,SAAyC,CAAC;QAC9D,CAAC;QACD,IACE,QAAQ,KAAK,SAAS;YACtB,QAAQ,KAAK,EAAE;YACf,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC3D,CAAC;YACD,QAAQ,CAAC,KAAK,GAAG,QAAuC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC;AAChC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe,CAAC,IAAa,EAAE,EAAU;IACvD,OAAO,GAAG,IAAI,IAAI,EAAE,EAAE,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CACzB,IAAY,EACZ,OAAoB,EACpB,MAA2B;IAE3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,cAAc,EAAE,IAAI;YACpB,QAAQ,EAAE,EAAE;YACZ,MAAM,EAAE,IAAI,GAAG,EAAE;SAClB,CAAC;IACJ,CAAC;IAED,qDAAqD;IACrD,MAAM,eAAe,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;IAErD,aAAa;IACb,MAAM,eAAe,GAAsC,EAAE,CAAC;IAC9D,IAAI,MAAM,GAAG,CAAC,CAAC;IAEf,4CAA4C;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAkB,CAAC,CAAC,aAAa;IAEzD,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,EAAU,CAAC;QAEf,IAAI,MAAM,CAAC,sBAAsB,EAAE,CAAC;YAClC,MAAM,GAAG,GAAG,GAAG,KAAK,CAAC,IAAI,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC;YAC1C,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;gBAC7B,EAAE,GAAG,UAAU,CAAC;YAClB,CAAC;iBAAM,CAAC;gBACN,EAAE,GAAG,MAAM,EAAE,CAAC;gBACd,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;YACxB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,EAAE,GAAG,MAAM,EAAE,CAAC;QAChB,CAAC;QAED,eAAe,CAAC,IAAI,CAAC,EAAE,GAAG,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;IACzC,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAc,IAAI,GAAG,EAAE,CAAC;IACpC,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,eAAe,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,oDAAoD;IACpD,0DAA0D;IAC1D,MAAM,gBAAgB,GAAG,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAChD,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAC5B,CAAC;IAEF,uBAAuB;IACvB,IAAI,cAAc,GAAG,IAAI,CAAC;IAC1B,KAAK,MAAM,MAAM,IAAI,gBAAgB,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,WAAW,CAAC,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;QACjE,cAAc;YACZ,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC;gBACrC,GAAG;gBACH,cAAc,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;IACrC,CAAC;IAED,iDAAiD;IACjD,MAAM,QAAQ,GAAqB,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7D,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,EAAE,EAAE,CAAC,CAAC,EAAE;QACR,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,IAAI;QAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;KACrB,CAAC,CAAC,CAAC;IAEJ,OAAO;QACL,cAAc;QACd,QAAQ,EAAE,mBAAmB,CAAC,QAAQ,CAAC;QACvC,MAAM;KACP,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,UAAU,CAAC,GAAW;IACpC,OAAO,QAAQ,CAAC,GAAG,CAAC,KAAK,IAAI,CAAC;AAChC,CAAC;AAeD;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,WAAW,GAAG,wDAAwD,CAAC;AAE7E;;GAEG;AACH,MAAM,WAAW,GAAG,8BAA8B,CAAC;AACnD,MAAM,oBAAoB,GAAG,8BAA8B,CAAC;AAE5D;;;GAGG;AACH,SAAS,qBAAqB;IAC5B,2EAA2E;IAC3E,MAAM,QAAQ,GAAG,OAAO,WAAW,IAAI,WAAW,GAAG,WAAW,YAAY,WAAW,EAAE,CAAC;IAC1F,uEAAuE;IACvE,MAAM,MAAM,GAAG,KAAK,WAAW,IAAI,WAAW,GAAG,WAAW,SAAS,WAAW,EAAE,CAAC;IACnF,4BAA4B;IAC5B,MAAM,UAAU,GAAG,MAAM,WAAW,SAAS,WAAW,IAAI,WAAW,GAAG,WAAW,SAAS,WAAW,IAAI,CAAC;IAC9G,2BAA2B;IAC3B,MAAM,SAAS,GAAG,MAAM,WAAW,QAAQ,WAAW,IAAI,WAAW,GAAG,WAAW,SAAS,WAAW,IAAI,CAAC;IAE5G,6CAA6C;IAC7C,MAAM,WAAW,GAAG,GAAG,WAAW,OAAO,WAAW,GAAG,CAAC;IAExD,OAAO;QACL,qFAAqF;QACrF,0CAA0C;QAC1C,IAAI,MAAM,CACR,IAAI,WAAW,MAAM,oBAAoB,GAAG,QAAQ,GAAG,UAAU,GAAG,SAAS,GAAG,oBAAoB,GAAG,MAAM,GAAG,WAAW,EAAE,EAC7H,IAAI,CACL;QACD,mCAAmC;QACnC,uBAAuB;QACvB,IAAI,MAAM,CACR,IAAI,WAAW,MAAM,oBAAoB,GAAG,MAAM,GAAG,oBAAoB,GAAG,QAAQ,GAAG,WAAW,EAAE,EACpG,IAAI,CACL;KACF,CAAC;AACJ,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,WAAW,CAAC,cAAsB;IAChD,MAAM,IAAI,GAAmB,EAAE,CAAC;IAChC,MAAM,QAAQ,GAAG,qBAAqB,EAAE,CAAC;IAEzC,sFAAsF;IACtF,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAE3C,KAAK,IAAI,YAAY,GAAG,CAAC,EAAE,YAAY,GAAG,QAAQ,CAAC,MAAM,EAAE,YAAY,EAAE,EAAE,CAAC;QAC1E,MAAM,OAAO,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;QACvC,IAAI,OAAO,KAAK,SAAS;YAAE,SAAS;QAEpC,IAAI,KAA6B,CAAC;QAClC,mCAAmC;QACnC,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;QAEtB,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACvD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtC,SAAS,CAAC,4CAA4C;YACxD,CAAC;YAED,2EAA2E;YAC3E,6FAA6F;YAC7F,6CAA6C;YAC7C,IAAI,OAA2B,CAAC;YAChC,IAAI,KAAyB,CAAC;YAC9B,IAAI,SAA6B,CAAC;YAClC,IAAI,QAA4B,CAAC;YAEjC,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACnB,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACpB,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACnB,CAAC;iBAAM,CAAC;gBACN,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACjB,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACrB,CAAC;YAED,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBACjD,MAAM,IAAI,GAAG,OAAO,CAAC,WAAW,EAAa,CAAC;gBAC9C,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;gBAE/B,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC1C,uCAAuC;oBACvC,IAAI,QAAwC,CAAC;oBAC7C,IACE,CAAC,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,EAAE,CAAC;wBAC7C,CAAC,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,EAAE,CAAC,EAC3C,CAAC;wBACD,QAAQ,GAAG,EAAE,CAAC;wBACd,IACE,SAAS,KAAK,SAAS;4BACvB,SAAS,KAAK,EAAE;4BAChB,CAAC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAC/C,SAAS,CAAC,WAAW,EAAE,CACxB,EACD,CAAC;4BACD,QAAQ,CAAC,MAAM;gCACb,SAAS,CAAC,WAAW,EAAkC,CAAC;wBAC5D,CAAC;wBACD,IACE,QAAQ,KAAK,SAAS;4BACtB,QAAQ,KAAK,EAAE;4BACf,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC,QAAQ,CAC/C,QAAQ,CAAC,WAAW,EAAE,CACvB,EACD,CAAC;4BACD,QAAQ,CAAC,KAAK;gCACZ,QAAQ,CAAC,WAAW,EAAiC,CAAC;wBAC1D,CAAC;oBACH,CAAC;oBAED,IAAI,CAAC,IAAI,CAAC;wBACR,IAAI;wBACJ,EAAE;wBACF,QAAQ,EAAE,KAAK,CAAC,KAAK;wBACrB,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC;wBACrB,QAAQ;qBACT,CAAC,CAAC;oBACH,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;gBACpC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAE7C,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,cAAsB;IACtD,MAAM,IAAI,GAAmB,EAAE,CAAC;IAChC,mEAAmE;IACnE,MAAM,UAAU,GACd,wFAAwF,CAAC;IAE3F,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1D,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACzB,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC3B,MAAM,QAAQ,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,IAAI,OAAO,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YACjD,MAAM,IAAI,GAAG,OAAkB,CAAC;YAChC,MAAM,EAAE,GAAG,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAE/B,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC1C,uCAAuC;gBACvC,IAAI,QAAwC,CAAC;gBAC7C,IACE,CAAC,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,EAAE,CAAC;oBAC7C,CAAC,QAAQ,KAAK,SAAS,IAAI,QAAQ,KAAK,EAAE,CAAC,EAC3C,CAAC;oBACD,QAAQ,GAAG,EAAE,CAAC;oBACd,IACE,SAAS,KAAK,SAAS;wBACvB,SAAS,KAAK,EAAE;wBAChB,CAAC,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,EAC5D,CAAC;wBACD,QAAQ,CAAC,MAAM,GAAG,SAAyC,CAAC;oBAC9D,CAAC;oBACD,IACE,QAAQ,KAAK,SAAS;wBACtB,QAAQ,KAAK,EAAE;wBACf,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAC3D,CAAC;wBACD,QAAQ,CAAC,KAAK,GAAG,QAAuC,CAAC;oBAC3D,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,IAAI,CAAC;oBACR,IAAI;oBACJ,EAAE;oBACF,QAAQ,EAAE,KAAK,CAAC,KAAK;oBACrB,WAAW,EAAE,KAAK,CAAC,CAAC,CAAC;oBACrB,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,mBAAmB,CACjC,QAA0B;IAE1B,MAAM,MAAM,GAA4B,EAA6B,CAAC;IAEtE,4BAA4B;IAC5B,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1C,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,iBAAiB;IACjB,KAAK,MAAM,MAAM,IAAI,QAAQ,EAAE,CAAC;QAC9B,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACvD,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,SAAS,CACvB,cAAsB,EACtB,MAAiB,EACjB,SAAkB,KAAK;IAEvB,IAAI,MAAM,GAAG,cAAc,CAAC;IAC5B,MAAM,IAAI,GAAG,MAAM;QACjB,CAAC,CAAC,iBAAiB,CAAC,cAAc,CAAC;QACnC,CAAC,CAAC,WAAW,CAAC,cAAc,CAAC,CAAC;IAEhC,8CAA8C;IAC9C,0DAA0D;IAC1D,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC;IAE7C,KAAK,MAAM,EAAE,IAAI,EAAE,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,GAAG,GAAG,eAAe,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACtC,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAEjC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,qDAAqD;YACrD,6EAA6E;YAC7E,MAAM;gBACJ,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC;oBACzB,QAAQ;oBACR,MAAM,CAAC,KAAK,CAAC,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Title Extractor
3
+ * Extracts and strips honorific titles/prefixes from PERSON entities
4
+ * so that titles remain visible in anonymized text for translation.
5
+ *
6
+ * Supported languages: ar, de, en, es, fr, it, lv, nl, pt, zh
7
+ */
8
+ import { SpanMatch, SemanticAttributes } from "../types/index.js";
9
+ /**
10
+ * Title extraction result
11
+ */
12
+ export interface TitleExtractionResult {
13
+ /** The extracted title (e.g., "Dr.", "Mr.") or undefined if no title */
14
+ title?: string;
15
+ /** The name without the title */
16
+ nameWithoutTitle: string;
17
+ /** Character offset where the name starts (after title + space) */
18
+ titleLength: number;
19
+ }
20
+ /**
21
+ * Extracts a title from the beginning of a name
22
+ *
23
+ * @param name - Full name potentially starting with a title
24
+ * @returns Extraction result with title, remaining name, and offset
25
+ *
26
+ * @example
27
+ * extractTitle("Dr. John Smith") // { title: "Dr.", nameWithoutTitle: "John Smith", titleLength: 4 }
28
+ * extractTitle("John Smith") // { title: undefined, nameWithoutTitle: "John Smith", titleLength: 0 }
29
+ */
30
+ export declare function extractTitle(name: string): TitleExtractionResult;
31
+ /**
32
+ * Extended semantic attributes including title
33
+ */
34
+ export interface SemanticAttributesWithTitle extends SemanticAttributes {
35
+ /** Extracted title prefix (e.g., "Dr.", "Mrs.") */
36
+ title?: string;
37
+ }
38
+ /**
39
+ * Processes PERSON spans to extract titles
40
+ * Titles are removed from the span and stored in semantic attributes
41
+ * The span boundaries are adjusted so the title remains visible
42
+ *
43
+ * @param spans - Array of detected PII spans
44
+ * @param originalText - The original text (needed to verify span boundaries)
45
+ * @returns Array of spans with titles extracted from PERSON entities
46
+ */
47
+ export declare function extractTitlesFromSpans(spans: SpanMatch[], originalText: string): SpanMatch[];
48
+ /**
49
+ * Gets all supported titles for a specific language
50
+ */
51
+ export declare function getTitlesForLanguage(langCode: "ar" | "de" | "en" | "es" | "fr" | "it" | "lv" | "nl" | "pt" | "zh"): string[];
52
+ /**
53
+ * Gets all supported titles across all languages
54
+ */
55
+ export declare function getAllTitles(): string[];
56
+ /**
57
+ * Checks if a string starts with a known title
58
+ */
59
+ export declare function startsWithTitle(text: string): boolean;
60
+ /**
61
+ * Checks if a text consists entirely of a title (with optional punctuation)
62
+ */
63
+ export declare function isOnlyTitle(text: string): boolean;
64
+ /**
65
+ * Merges adjacent PERSON spans when one is a title
66
+ *
67
+ * This fixes issues where NER models split "Mrs. Smith" into two entities:
68
+ * - PERSON: "Mrs" (or "Mrs.")
69
+ * - PERSON: "Smith"
70
+ *
71
+ * After merging: PERSON: "Mrs. Smith"
72
+ *
73
+ * @param spans - Array of detected PII spans
74
+ * @param originalText - The original text
75
+ * @param maxGap - Maximum characters between spans to consider them adjacent (default: 3)
76
+ * @returns Array of spans with adjacent title+name PERSON entities merged
77
+ */
78
+ export declare function mergeAdjacentTitleSpans(spans: SpanMatch[], originalText: string, maxGap?: number): SpanMatch[];
79
+ //# sourceMappingURL=title-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"title-extractor.d.ts","sourceRoot":"","sources":["../../src/pipeline/title-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EACL,SAAS,EAET,kBAAkB,EAEnB,MAAM,mBAAmB,CAAC;AAE3B;;GAEG;AACH,MAAM,WAAW,qBAAqB;IACpC,wEAAwE;IACxE,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,iCAAiC;IACjC,gBAAgB,EAAE,MAAM,CAAC;IACzB,mEAAmE;IACnE,WAAW,EAAE,MAAM,CAAC;CACrB;AAqjBD;;;;;;;;;GASG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,qBAAqB,CAyBhE;AAED;;GAEG;AACH,MAAM,WAAW,2BAA4B,SAAQ,kBAAkB;IACrE,mDAAmD;IACnD,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;GAQG;AACH,wBAAgB,sBAAsB,CACpC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,GACnB,SAAS,EAAE,CAiEb;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAAG,IAAI,GAC5E,MAAM,EAAE,CAcV;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,EAAE,CAEvC;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAGrD;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CA0BjD;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,uBAAuB,CACrC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,MAAU,GACjB,SAAS,EAAE,CAuFb"}