rehydra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +615 -0
  3. package/dist/crypto/index.d.ts +6 -0
  4. package/dist/crypto/index.d.ts.map +1 -0
  5. package/dist/crypto/index.js +6 -0
  6. package/dist/crypto/index.js.map +1 -0
  7. package/dist/crypto/pii-map-crypto.d.ts +114 -0
  8. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  9. package/dist/crypto/pii-map-crypto.js +228 -0
  10. package/dist/crypto/pii-map-crypto.js.map +1 -0
  11. package/dist/index.d.ts +180 -0
  12. package/dist/index.d.ts.map +1 -0
  13. package/dist/index.js +384 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/ner/bio-decoder.d.ts +64 -0
  16. package/dist/ner/bio-decoder.d.ts.map +1 -0
  17. package/dist/ner/bio-decoder.js +216 -0
  18. package/dist/ner/bio-decoder.js.map +1 -0
  19. package/dist/ner/index.d.ts +10 -0
  20. package/dist/ner/index.d.ts.map +1 -0
  21. package/dist/ner/index.js +10 -0
  22. package/dist/ner/index.js.map +1 -0
  23. package/dist/ner/model-manager.d.ts +111 -0
  24. package/dist/ner/model-manager.d.ts.map +1 -0
  25. package/dist/ner/model-manager.js +325 -0
  26. package/dist/ner/model-manager.js.map +1 -0
  27. package/dist/ner/ner-model.d.ts +114 -0
  28. package/dist/ner/ner-model.d.ts.map +1 -0
  29. package/dist/ner/ner-model.js +253 -0
  30. package/dist/ner/ner-model.js.map +1 -0
  31. package/dist/ner/onnx-runtime.d.ts +46 -0
  32. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  33. package/dist/ner/onnx-runtime.js +130 -0
  34. package/dist/ner/onnx-runtime.js.map +1 -0
  35. package/dist/ner/tokenizer.d.ts +118 -0
  36. package/dist/ner/tokenizer.d.ts.map +1 -0
  37. package/dist/ner/tokenizer.js +332 -0
  38. package/dist/ner/tokenizer.js.map +1 -0
  39. package/dist/pipeline/index.d.ts +12 -0
  40. package/dist/pipeline/index.d.ts.map +1 -0
  41. package/dist/pipeline/index.js +12 -0
  42. package/dist/pipeline/index.js.map +1 -0
  43. package/dist/pipeline/prenormalize.d.ts +48 -0
  44. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  45. package/dist/pipeline/prenormalize.js +94 -0
  46. package/dist/pipeline/prenormalize.js.map +1 -0
  47. package/dist/pipeline/resolver.d.ts +56 -0
  48. package/dist/pipeline/resolver.d.ts.map +1 -0
  49. package/dist/pipeline/resolver.js +239 -0
  50. package/dist/pipeline/resolver.js.map +1 -0
  51. package/dist/pipeline/semantic-data-loader.d.ts +165 -0
  52. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  53. package/dist/pipeline/semantic-data-loader.js +655 -0
  54. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  55. package/dist/pipeline/semantic-enricher.d.ts +112 -0
  56. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  57. package/dist/pipeline/semantic-enricher.js +318 -0
  58. package/dist/pipeline/semantic-enricher.js.map +1 -0
  59. package/dist/pipeline/tagger.d.ts +114 -0
  60. package/dist/pipeline/tagger.d.ts.map +1 -0
  61. package/dist/pipeline/tagger.js +374 -0
  62. package/dist/pipeline/tagger.js.map +1 -0
  63. package/dist/pipeline/title-extractor.d.ts +79 -0
  64. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  65. package/dist/pipeline/title-extractor.js +801 -0
  66. package/dist/pipeline/title-extractor.js.map +1 -0
  67. package/dist/pipeline/validator.d.ts +65 -0
  68. package/dist/pipeline/validator.d.ts.map +1 -0
  69. package/dist/pipeline/validator.js +264 -0
  70. package/dist/pipeline/validator.js.map +1 -0
  71. package/dist/recognizers/base.d.ts +78 -0
  72. package/dist/recognizers/base.d.ts.map +1 -0
  73. package/dist/recognizers/base.js +100 -0
  74. package/dist/recognizers/base.js.map +1 -0
  75. package/dist/recognizers/bic-swift.d.ts +10 -0
  76. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  77. package/dist/recognizers/bic-swift.js +107 -0
  78. package/dist/recognizers/bic-swift.js.map +1 -0
  79. package/dist/recognizers/credit-card.d.ts +32 -0
  80. package/dist/recognizers/credit-card.d.ts.map +1 -0
  81. package/dist/recognizers/credit-card.js +160 -0
  82. package/dist/recognizers/credit-card.js.map +1 -0
  83. package/dist/recognizers/custom-id.d.ts +28 -0
  84. package/dist/recognizers/custom-id.d.ts.map +1 -0
  85. package/dist/recognizers/custom-id.js +116 -0
  86. package/dist/recognizers/custom-id.js.map +1 -0
  87. package/dist/recognizers/email.d.ts +10 -0
  88. package/dist/recognizers/email.d.ts.map +1 -0
  89. package/dist/recognizers/email.js +75 -0
  90. package/dist/recognizers/email.js.map +1 -0
  91. package/dist/recognizers/iban.d.ts +14 -0
  92. package/dist/recognizers/iban.d.ts.map +1 -0
  93. package/dist/recognizers/iban.js +67 -0
  94. package/dist/recognizers/iban.js.map +1 -0
  95. package/dist/recognizers/index.d.ts +20 -0
  96. package/dist/recognizers/index.d.ts.map +1 -0
  97. package/dist/recognizers/index.js +42 -0
  98. package/dist/recognizers/index.js.map +1 -0
  99. package/dist/recognizers/ip-address.d.ts +14 -0
  100. package/dist/recognizers/ip-address.d.ts.map +1 -0
  101. package/dist/recognizers/ip-address.js +183 -0
  102. package/dist/recognizers/ip-address.js.map +1 -0
  103. package/dist/recognizers/phone.d.ts +10 -0
  104. package/dist/recognizers/phone.d.ts.map +1 -0
  105. package/dist/recognizers/phone.js +145 -0
  106. package/dist/recognizers/phone.js.map +1 -0
  107. package/dist/recognizers/registry.d.ts +59 -0
  108. package/dist/recognizers/registry.d.ts.map +1 -0
  109. package/dist/recognizers/registry.js +113 -0
  110. package/dist/recognizers/registry.js.map +1 -0
  111. package/dist/recognizers/url.d.ts +14 -0
  112. package/dist/recognizers/url.d.ts.map +1 -0
  113. package/dist/recognizers/url.js +121 -0
  114. package/dist/recognizers/url.js.map +1 -0
  115. package/dist/types/index.d.ts +197 -0
  116. package/dist/types/index.d.ts.map +1 -0
  117. package/dist/types/index.js +80 -0
  118. package/dist/types/index.js.map +1 -0
  119. package/dist/types/pii-types.d.ts +50 -0
  120. package/dist/types/pii-types.d.ts.map +1 -0
  121. package/dist/types/pii-types.js +114 -0
  122. package/dist/types/pii-types.js.map +1 -0
  123. package/dist/utils/iban-checksum.d.ts +23 -0
  124. package/dist/utils/iban-checksum.d.ts.map +1 -0
  125. package/dist/utils/iban-checksum.js +106 -0
  126. package/dist/utils/iban-checksum.js.map +1 -0
  127. package/dist/utils/index.d.ts +10 -0
  128. package/dist/utils/index.d.ts.map +1 -0
  129. package/dist/utils/index.js +10 -0
  130. package/dist/utils/index.js.map +1 -0
  131. package/dist/utils/luhn.d.ts +17 -0
  132. package/dist/utils/luhn.d.ts.map +1 -0
  133. package/dist/utils/luhn.js +55 -0
  134. package/dist/utils/luhn.js.map +1 -0
  135. package/dist/utils/offsets.d.ts +86 -0
  136. package/dist/utils/offsets.d.ts.map +1 -0
  137. package/dist/utils/offsets.js +124 -0
  138. package/dist/utils/offsets.js.map +1 -0
  139. package/dist/utils/path.d.ts +34 -0
  140. package/dist/utils/path.d.ts.map +1 -0
  141. package/dist/utils/path.js +96 -0
  142. package/dist/utils/path.js.map +1 -0
  143. package/dist/utils/storage-browser.d.ts +51 -0
  144. package/dist/utils/storage-browser.d.ts.map +1 -0
  145. package/dist/utils/storage-browser.js +381 -0
  146. package/dist/utils/storage-browser.js.map +1 -0
  147. package/dist/utils/storage-node.d.ts +43 -0
  148. package/dist/utils/storage-node.d.ts.map +1 -0
  149. package/dist/utils/storage-node.js +93 -0
  150. package/dist/utils/storage-node.js.map +1 -0
  151. package/dist/utils/storage.d.ts +70 -0
  152. package/dist/utils/storage.d.ts.map +1 -0
  153. package/dist/utils/storage.js +69 -0
  154. package/dist/utils/storage.js.map +1 -0
  155. package/package.json +66 -0
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Semantic Enricher
3
+ * Enriches PII spans with semantic attributes (gender, location scope)
4
+ * for MT-friendly tags that preserve grammatical context.
5
+ *
6
+ * This module uses data from the GeoNames and gender-guesser projects.
7
+ * Data is automatically downloaded when using:
8
+ * createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
9
+ */
10
+ import { SpanMatch, PersonGender, LocationScope } from "../types/index.js";
11
+ /**
12
+ * Configuration for semantic enrichment
13
+ */
14
+ export interface EnricherConfig {
15
+ /** Locale hint for name gender disambiguation (e.g., 'de', 'it', 'fr') */
16
+ locale?: string;
17
+ /** Minimum confidence to apply semantic attributes (default: 0.0) */
18
+ minConfidence?: number;
19
+ /** Whether to mark low-confidence results as 'unknown' */
20
+ strictMode?: boolean;
21
+ }
22
+ /**
23
+ * Result of gender inference with confidence
24
+ */
25
+ export interface GenderResult {
26
+ gender: PersonGender;
27
+ confidence: number;
28
+ source: "database" | "inference" | "unknown";
29
+ }
30
+ /**
31
+ * Result of location classification with confidence
32
+ */
33
+ export interface LocationResult {
34
+ scope: LocationScope;
35
+ confidence: number;
36
+ countryCode?: string;
37
+ }
38
+ /**
39
+ * Initializes semantic data (async, must be called before sync functions)
40
+ * @throws Error if data files are not available
41
+ */
42
+ export declare function initializeEnricher(): Promise<void>;
43
+ /**
44
+ * Checks if enricher is ready for synchronous operations
45
+ */
46
+ export declare function isEnricherReady(): boolean;
47
+ /**
48
+ * Enriches PII spans with semantic attributes based on lookup tables
49
+ *
50
+ * NOTE: This function requires semantic data to be pre-loaded via initializeEnricher()
51
+ * or through createAnonymizer({ semantic: { enabled: true } }).
52
+ *
53
+ * @param spans - Array of detected PII spans
54
+ * @param config - Optional configuration for enrichment
55
+ * @returns Array of spans with semantic attributes added
56
+ *
57
+ * @example
58
+ * ```typescript
59
+ * const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
60
+ * // "Mary" -> { gender: 'female' }
61
+ * // "Berlin" -> { scope: 'city' }
62
+ * ```
63
+ */
64
+ export declare function enrichSemantics(spans: SpanMatch[], config?: EnricherConfig): SpanMatch[];
65
+ /**
66
+ * Infers gender from a person's name using the lookup database
67
+ *
68
+ * @param name - Full name or first name
69
+ * @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
70
+ * @returns Gender result with confidence
71
+ *
72
+ * @example
73
+ * ```typescript
74
+ * inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
75
+ * inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
76
+ * inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
77
+ * ```
78
+ */
79
+ export declare function inferGender(name: string, locale?: string): GenderResult;
80
+ /**
81
+ * Classifies a location by its geographic scope
82
+ *
83
+ * @param location - Location name
84
+ * @returns Classification result with confidence
85
+ *
86
+ * @example
87
+ * ```typescript
88
+ * classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
89
+ * classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
90
+ * classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
91
+ * ```
92
+ */
93
+ export declare function classifyLocation(location: string): LocationResult;
94
+ /**
95
+ * Gets statistics about the lookup databases
96
+ */
97
+ export declare function getDatabaseStats(): {
98
+ names: number;
99
+ cities: number;
100
+ countries: number;
101
+ regions: number;
102
+ loaded: boolean;
103
+ };
104
+ /**
105
+ * Checks if a name exists in the database
106
+ */
107
+ export declare function hasName(name: string): boolean;
108
+ /**
109
+ * Checks if a location exists in the database
110
+ */
111
+ export declare function hasLocation(location: string): boolean;
112
+ //# sourceMappingURL=semantic-enricher.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-enricher.d.ts","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EACL,SAAS,EAET,YAAY,EACZ,aAAa,EACd,MAAM,mBAAmB,CAAC;AAS3B;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,0EAA0E;IAC1E,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,qEAAqE;IACrE,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,0DAA0D;IAC1D,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,MAAM,EAAE,YAAY,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,aAAa,CAAC;IACrB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;;GAGG;AACH,wBAAsB,kBAAkB,IAAI,OAAO,CAAC,IAAI,CAAC,CAaxD;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAEzC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,eAAe,CAC7B,KAAK,EAAE,SAAS,EAAE,EAClB,MAAM,CAAC,EAAE,cAAc,GACtB,SAAS,EAAE,CAkBb;AAoGD;;;;;;;;;;;;;GAaG;AACH,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAG,YAAY,CAwBvE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,cAAc,CAgCjE;AA+ED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,OAAO,CAAC;CACjB,CAEA;AAED;;GAEG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAU7C;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAQrD"}
@@ -0,0 +1,318 @@
1
+ /**
2
+ * Semantic Enricher
3
+ * Enriches PII spans with semantic attributes (gender, location scope)
4
+ * for MT-friendly tags that preserve grammatical context.
5
+ *
6
+ * This module uses data from the GeoNames and gender-guesser projects.
7
+ * Data is automatically downloaded when using:
8
+ * createAnonymizer({ semantic: { enabled: true, autoDownload: true } })
9
+ */
10
+ import { PIIType, } from "../types/index.js";
11
+ import { isSemanticDataDownloaded, loadSemanticData, getSemanticDataSync, getDataStats, } from "./semantic-data-loader.js";
12
+ // Track if data has been initialized
13
+ let dataInitialized = false;
14
+ /**
15
+ * Initializes semantic data (async, must be called before sync functions)
16
+ * @throws Error if data files are not available
17
+ */
18
+ export async function initializeEnricher() {
19
+ if (dataInitialized)
20
+ return;
21
+ const available = await isSemanticDataDownloaded();
22
+ if (!available) {
23
+ throw new Error(`Semantic enrichment data not available. ` +
24
+ `Use ensureSemanticData() or createAnonymizer({ semantic: { enabled: true } }) to download.`);
25
+ }
26
+ await loadSemanticData();
27
+ dataInitialized = true;
28
+ }
29
+ /**
30
+ * Checks if enricher is ready for synchronous operations
31
+ */
32
+ export function isEnricherReady() {
33
+ return dataInitialized && getSemanticDataSync() !== null;
34
+ }
35
+ /**
36
+ * Enriches PII spans with semantic attributes based on lookup tables
37
+ *
38
+ * NOTE: This function requires semantic data to be pre-loaded via initializeEnricher()
39
+ * or through createAnonymizer({ semantic: { enabled: true } }).
40
+ *
41
+ * @param spans - Array of detected PII spans
42
+ * @param config - Optional configuration for enrichment
43
+ * @returns Array of spans with semantic attributes added
44
+ *
45
+ * @example
46
+ * ```typescript
47
+ * const enrichedSpans = enrichSemantics(spans, { locale: 'de' });
48
+ * // "Mary" -> { gender: 'female' }
49
+ * // "Berlin" -> { scope: 'city' }
50
+ * ```
51
+ */
52
+ export function enrichSemantics(spans, config) {
53
+ // Check if data is loaded
54
+ const data = getSemanticDataSync();
55
+ if (data === null) {
56
+ // Silently skip enrichment if data not available
57
+ return spans;
58
+ }
59
+ return spans.map((span) => {
60
+ switch (span.type) {
61
+ case PIIType.PERSON:
62
+ return enrichPerson(span, config?.locale);
63
+ case PIIType.LOCATION:
64
+ return enrichLocation(span);
65
+ default:
66
+ return span;
67
+ }
68
+ });
69
+ }
70
+ /**
71
+ * Enriches a PERSON span with gender attribute
72
+ */
73
+ function enrichPerson(span, locale) {
74
+ const result = inferGender(span.text, locale);
75
+ return {
76
+ ...span,
77
+ semantic: {
78
+ ...span.semantic,
79
+ gender: result.gender,
80
+ },
81
+ };
82
+ }
83
+ /**
84
+ * Enriches a LOCATION span with scope attribute
85
+ */
86
+ function enrichLocation(span) {
87
+ const result = classifyLocation(span.text);
88
+ return {
89
+ ...span,
90
+ semantic: {
91
+ ...span.semantic,
92
+ scope: result.scope,
93
+ },
94
+ };
95
+ }
96
+ /**
97
+ * Looks up gender for a name in the database (synchronous)
98
+ */
99
+ function lookupGenderSync(name, locale) {
100
+ const data = getSemanticDataSync();
101
+ if (data === null)
102
+ return undefined;
103
+ const entry = data.names.get(name.toLowerCase());
104
+ if (entry === undefined)
105
+ return undefined;
106
+ // Check for locale-specific override
107
+ if (locale !== undefined &&
108
+ locale !== "" &&
109
+ entry.localeOverrides !== undefined &&
110
+ entry.localeOverrides[locale] !== undefined) {
111
+ return entry.localeOverrides[locale];
112
+ }
113
+ return entry.gender;
114
+ }
115
+ /**
116
+ * Population threshold for "major" cities that take precedence over regions
117
+ */
118
+ const MAJOR_CITY_POPULATION = 500000;
119
+ /**
120
+ * Looks up location type synchronously
121
+ */
122
+ function lookupLocationTypeSync(location) {
123
+ const data = getSemanticDataSync();
124
+ if (data === null)
125
+ return undefined;
126
+ const normalized = location.toLowerCase().trim();
127
+ // Check countries FIRST (to avoid "USA" being matched as a city)
128
+ const countryCode = data.countries.get(normalized);
129
+ if (countryCode !== undefined) {
130
+ return { type: "country", countryCode };
131
+ }
132
+ // Check cities - if it's a major city (pop > 500K), prioritize it over regions
133
+ const city = data.cities.get(normalized);
134
+ if (city && city.population >= MAJOR_CITY_POPULATION) {
135
+ return { type: "city", countryCode: city.country };
136
+ }
137
+ // Check regions
138
+ const region = data.regions.get(normalized);
139
+ if (region) {
140
+ return { type: "region", countryCode: region.country };
141
+ }
142
+ // Check remaining cities (smaller cities)
143
+ if (city) {
144
+ return { type: "city", countryCode: city.country };
145
+ }
146
+ return undefined;
147
+ }
148
+ /**
149
+ * Infers gender from a person's name using the lookup database
150
+ *
151
+ * @param name - Full name or first name
152
+ * @param locale - Optional locale for disambiguation (e.g., 'de', 'it')
153
+ * @returns Gender result with confidence
154
+ *
155
+ * @example
156
+ * ```typescript
157
+ * inferGender('Mary Smith'); // { gender: 'female', confidence: 1.0 }
158
+ * inferGender('Andrea', 'it'); // { gender: 'male', confidence: 1.0 }
159
+ * inferGender('Andrea', 'en'); // { gender: 'female', confidence: 1.0 }
160
+ * ```
161
+ */
162
+ export function inferGender(name, locale) {
163
+ // Extract first name (handles "John Smith" -> "John")
164
+ const firstName = extractFirstName(name);
165
+ if (firstName === null || firstName === "") {
166
+ return { gender: "unknown", confidence: 0, source: "unknown" };
167
+ }
168
+ // Check if data is available
169
+ const data = getSemanticDataSync();
170
+ if (data === null) {
171
+ return { gender: "unknown", confidence: 0, source: "unknown" };
172
+ }
173
+ const gender = lookupGenderSync(firstName, locale);
174
+ if (gender === undefined || gender === "") {
175
+ return { gender: "unknown", confidence: 0, source: "unknown" };
176
+ }
177
+ return {
178
+ gender: gender,
179
+ confidence: 1.0,
180
+ source: "database",
181
+ };
182
+ }
183
+ /**
184
+ * Classifies a location by its geographic scope
185
+ *
186
+ * @param location - Location name
187
+ * @returns Classification result with confidence
188
+ *
189
+ * @example
190
+ * ```typescript
191
+ * classifyLocation('Berlin'); // { scope: 'city', confidence: 1.0 }
192
+ * classifyLocation('Germany'); // { scope: 'country', confidence: 1.0 }
193
+ * classifyLocation('Bavaria'); // { scope: 'region', confidence: 1.0 }
194
+ * ```
195
+ */
196
+ export function classifyLocation(location) {
197
+ // Check if data is available
198
+ const data = getSemanticDataSync();
199
+ if (data === null) {
200
+ return { scope: "unknown", confidence: 0 };
201
+ }
202
+ const normalized = normalizeLocationName(location);
203
+ const result = lookupLocationTypeSync(normalized);
204
+ if (!result) {
205
+ // Try variations
206
+ const variations = generateLocationVariations(location);
207
+ for (const variant of variations) {
208
+ const variantResult = lookupLocationTypeSync(variant);
209
+ if (variantResult) {
210
+ return {
211
+ scope: variantResult.type,
212
+ confidence: 0.9,
213
+ countryCode: variantResult.countryCode,
214
+ };
215
+ }
216
+ }
217
+ return { scope: "unknown", confidence: 0 };
218
+ }
219
+ return {
220
+ scope: result.type,
221
+ confidence: 1.0,
222
+ countryCode: result.countryCode,
223
+ };
224
+ }
225
+ /**
226
+ * Extracts the first name from a full name
227
+ */
228
+ function extractFirstName(fullName) {
229
+ const trimmed = fullName.trim();
230
+ if (!trimmed)
231
+ return null;
232
+ // Handle common prefixes (Dr., Mr., Mrs., etc.)
233
+ const withoutPrefix = trimmed.replace(/^(dr\.?|mr\.?|mrs\.?|ms\.?|prof\.?|rev\.?|sir|dame|lord|lady)\s+/i, "");
234
+ // Split and get first word
235
+ const parts = withoutPrefix.split(/\s+/);
236
+ return parts[0] ?? null;
237
+ }
238
+ /**
239
+ * Normalizes a location name for lookup
240
+ */
241
+ function normalizeLocationName(location) {
242
+ return (location
243
+ .toLowerCase()
244
+ .trim()
245
+ // Remove common suffixes
246
+ .replace(/\s+(city|town|village|state|province|region|county)$/i, "")
247
+ // Normalize whitespace
248
+ .replace(/\s+/g, " "));
249
+ }
250
+ /**
251
+ * Generates variations of a location name for fuzzy matching
252
+ */
253
+ function generateLocationVariations(location) {
254
+ const normalized = normalizeLocationName(location);
255
+ const variations = [];
256
+ // Try without "the"
257
+ if (normalized.startsWith("the ")) {
258
+ variations.push(normalized.slice(4));
259
+ }
260
+ // Try without common articles in other languages
261
+ const articlePatterns = [
262
+ /^(la|le|les|el|los|las|il|lo|gli|i|die|der|das|de|het)\s+/i,
263
+ ];
264
+ for (const pattern of articlePatterns) {
265
+ const withoutArticle = normalized.replace(pattern, "");
266
+ if (withoutArticle !== normalized) {
267
+ variations.push(withoutArticle);
268
+ }
269
+ }
270
+ // Try ASCII transliteration for common diacritics
271
+ const asciiVersion = normalized
272
+ .replace(/[àáâãäå]/g, "a")
273
+ .replace(/[èéêë]/g, "e")
274
+ .replace(/[ìíîï]/g, "i")
275
+ .replace(/[òóôõö]/g, "o")
276
+ .replace(/[ùúûü]/g, "u")
277
+ .replace(/[ñ]/g, "n")
278
+ .replace(/[ç]/g, "c")
279
+ .replace(/[ß]/g, "ss")
280
+ .replace(/[æ]/g, "ae")
281
+ .replace(/[ø]/g, "o")
282
+ .replace(/[œ]/g, "oe");
283
+ if (asciiVersion !== normalized) {
284
+ variations.push(asciiVersion);
285
+ }
286
+ return variations;
287
+ }
288
+ /**
289
+ * Gets statistics about the lookup databases
290
+ */
291
+ export function getDatabaseStats() {
292
+ return getDataStats();
293
+ }
294
+ /**
295
+ * Checks if a name exists in the database
296
+ */
297
+ export function hasName(name) {
298
+ const data = getSemanticDataSync();
299
+ if (data === null) {
300
+ return false;
301
+ }
302
+ const firstName = extractFirstName(name);
303
+ if (firstName === null || firstName === "")
304
+ return false;
305
+ return lookupGenderSync(firstName) !== undefined;
306
+ }
307
+ /**
308
+ * Checks if a location exists in the database
309
+ */
310
+ export function hasLocation(location) {
311
+ const data = getSemanticDataSync();
312
+ if (data === null) {
313
+ return false;
314
+ }
315
+ const normalized = normalizeLocationName(location);
316
+ return lookupLocationTypeSync(normalized) !== undefined;
317
+ }
318
+ //# sourceMappingURL=semantic-enricher.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"semantic-enricher.js","sourceRoot":"","sources":["../../src/pipeline/semantic-enricher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAEL,OAAO,GAGR,MAAM,mBAAmB,CAAC;AAE3B,OAAO,EACL,wBAAwB,EACxB,gBAAgB,EAChB,mBAAmB,EACnB,YAAY,GACb,MAAM,2BAA2B,CAAC;AAgCnC,qCAAqC;AACrC,IAAI,eAAe,GAAG,KAAK,CAAC;AAE5B;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB;IACtC,IAAI,eAAe;QAAE,OAAO;IAE5B,MAAM,SAAS,GAAG,MAAM,wBAAwB,EAAE,CAAC;IACnD,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CACb,0CAA0C;YACxC,4FAA4F,CAC/F,CAAC;IACJ,CAAC;IAED,MAAM,gBAAgB,EAAE,CAAC;IACzB,eAAe,GAAG,IAAI,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,eAAe;IAC7B,OAAO,eAAe,IAAI,mBAAmB,EAAE,KAAK,IAAI,CAAC;AAC3D,CAAC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,eAAe,CAC7B,KAAkB,EAClB,MAAuB;IAEvB,0BAA0B;IAC1B,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,iDAAiD;QACjD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAClB,KAAK,OAAO,CAAC,MAAM;gBACjB,OAAO,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YAC5C,KAAK,OAAO,CAAC,QAAQ;gBACnB,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;YAC9B;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CAAC,IAAe,EAAE,MAAe;IACpD,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAE9C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,MAAM,EAAE,MAAM,CAAC,MAAM;SACtB;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,IAAe;IACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAE3C,OAAO;QACL,GAAG,IAAI;QACP,QAAQ,EAAE;YACR,GAAG,IAAI,CAAC,QAAQ;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK;SACpB;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CACvB,IAAY,EACZ,MAAe;IAEf,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC;IAEpC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;IACjD,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAE1C,qCAAqC;IACrC,IACE,MAAM,KAAK,SAAS;QACpB,MAAM,KAAK,EAAE;QACb,KAAK,CAAC,eAAe,KAAK,SAAS;QACnC,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,KAAK,SAAS,EAC3C,CAAC;QACD,OAAO,KAAK,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC;IACvC,CAAC;IAED,OAAO,KAAK,CAAC,MAAM,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,qBAAqB,GAAG,MAAM,CAAC;AAErC;;GAEG;AACH,SAAS,sBAAsB,CAC7B,QAAgB;IAEhB,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,SAAS,CAAC;IAEpC,MAAM,UAAU,GAAG,QAAQ,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC;IAEjD,iEAAiE;IACjE,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACnD,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;QAC9B,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,WAAW,EAAE,CAAC;IAC1C,CAAC;IAED,+EAA+E;IAC/E,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IACzC,IAAI,IAAI,IAAI,IAAI,CAAC,UAAU,IAAI,qBAAqB,EAAE,CAAC;QACrD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,gBAAgB;IAChB,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC;IACzD,CAAC;IAED,0CAA0C;IAC1C,IAAI,IAAI,EAAE,CAAC;QACT,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC;IACrD,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,WAAW,CAAC,IAAY,EAAE,MAAe;IACvD,sDAAsD;IACtD,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE,EAAE,CAAC;QAC3C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,6BAA6B;IAC7B,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAEnD,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,KAAK,EAAE,EAAE,CAAC;QAC1C,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IACjE,CAAC;IAED,OAAO;QACL,MAAM,EAAE,MAAsB;QAC9B,UAAU,EAAE,GAAG;QACf,MAAM,EAAE,UAAU;KACnB,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,gBAAgB,CAAC,QAAgB;IAC/C,6BAA6B;IAC7B,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,sBAAsB,CAAC,UAAU,CAAC,CAAC;IAElD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,iBAAiB;QACjB,MAAM,UAAU,GAAG,0BAA0B,CAAC,QAAQ,CAAC,CAAC;QACxD,KAAK,MAAM,OAAO,IAAI,UAAU,EAAE,CAAC;YACjC,MAAM,aAAa,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;YACtD,IAAI,aAAa,EAAE,CAAC;gBAClB,OAAO;oBACL,KAAK,EAAE,aAAa,CAAC,IAAqB;oBAC1C,UAAU,EAAE,GAAG;oBACf,WAAW,EAAE,aAAa,CAAC,WAAW;iBACvC,CAAC;YACJ,CAAC;QACH,CAAC;QAED,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC7C,CAAC;IAED,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,IAAqB;QACnC,UAAU,EAAE,GAAG;QACf,WAAW,EAAE,MAAM,CAAC,WAAW;KAChC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,QAAgB;IACxC,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,gDAAgD;IAChD,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,CACnC,mEAAmE,EACnE,EAAE,CACH,CAAC;IAEF,2BAA2B;IAC3B,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACzC,OAAO,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB,CAAC,QAAgB;IAC7C,OAAO,CACL,QAAQ;SACL,WAAW,EAAE;SACb,IAAI,EAAE;QACP,yBAAyB;SACxB,OAAO,CAAC,uDAAuD,EAAE,EAAE,CAAC;QACrE,uBAAuB;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CACxB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,0BAA0B,CAAC,QAAgB;IAClD,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,oBAAoB;IACpB,IAAI,UAAU,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QAClC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvC,CAAC;IAED,iDAAiD;IACjD,MAAM,eAAe,GAAG;QACtB,4DAA4D;KAC7D,CAAC;IACF,KAAK,MAAM,OAAO,IAAI,eAAe,EAAE,CAAC;QACtC,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACvD,IAAI,cAAc,KAAK,UAAU,EAAE,CAAC;YAClC,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED,kDAAkD;IAClD,MAAM,YAAY,GAAG,UAAU;SAC5B,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC;SACzB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC;SACrB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;IAEzB,IAAI,YAAY,KAAK,UAAU,EAAE,CAAC;QAChC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAChC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAO9B,OAAO,YAAY,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,OAAO,CAAC,IAAY;IAClC,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC;IACzC,IAAI,SAAS,KAAK,IAAI,IAAI,SAAS,KAAK,EAAE;QAAE,OAAO,KAAK,CAAC;IAEzD,OAAO,gBAAgB,CAAC,SAAS,CAAC,KAAK,SAAS,CAAC;AACnD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,MAAM,IAAI,GAAG,mBAAmB,EAAE,CAAC;IACnC,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IACnD,OAAO,sBAAsB,CAAC,UAAU,CAAC,KAAK,SAAS,CAAC;AAC1D,CAAC"}
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Replacement Tagger
3
+ * Replaces PII spans with placeholder tags and builds the PII map
4
+ */
5
+ import { PIIType, SpanMatch, DetectedEntity, AnonymizationPolicy, SemanticAttributes } from "../types/index.js";
6
+ /**
7
+ * PII Map entry (before encryption)
8
+ */
9
+ export interface PIIMapEntry {
10
+ /** PII type */
11
+ type: PIIType;
12
+ /** Entity ID */
13
+ id: number;
14
+ /** Original text */
15
+ original: string;
16
+ }
17
+ /**
18
+ * Raw PII Map (before encryption)
19
+ */
20
+ export type RawPIIMap = Map<string, string>;
21
+ /**
22
+ * Tagging result
23
+ */
24
+ export interface TaggingResult {
25
+ /** Anonymized text with placeholder tags */
26
+ anonymizedText: string;
27
+ /** List of detected entities with assigned IDs */
28
+ entities: DetectedEntity[];
29
+ /** Raw PII map (type_id -> original) */
30
+ piiMap: RawPIIMap;
31
+ }
32
+ /**
33
+ * Generates a PII placeholder tag
34
+ * Format: <PII type="TYPE" id="N"/> or <PII type="TYPE" gender="X" id="N"/> etc.
35
+ *
36
+ * Semantic attributes (gender, scope) are included when provided and not 'unknown'
37
+ */
38
+ export declare function generateTag(type: PIIType, id: number, semantic?: SemanticAttributes): string;
39
+ /**
40
+ * Result of parsing a PII tag
41
+ */
42
+ export interface ParsedTag {
43
+ type: PIIType;
44
+ id: number;
45
+ semantic?: SemanticAttributes;
46
+ }
47
+ /**
48
+ * Parses a PII tag to extract type, id, and semantic attributes
49
+ * Returns null if not a valid tag
50
+ *
51
+ * Supports formats:
52
+ * - <PII type="TYPE" id="N"/>
53
+ * - <PII type="TYPE" gender="X" id="N"/>
54
+ * - <PII type="TYPE" scope="X" id="N"/>
55
+ * - <PII type="TYPE" gender="X" scope="Y" id="N"/>
56
+ */
57
+ export declare function parseTag(tag: string): ParsedTag | null;
58
+ /**
59
+ * Creates a key for the PII map
60
+ */
61
+ export declare function createPIIMapKey(type: PIIType, id: number): string;
62
+ /**
63
+ * Tags PII spans in text and builds the PII map
64
+ */
65
+ export declare function tagEntities(text: string, matches: SpanMatch[], policy: AnonymizationPolicy): TaggingResult;
66
+ /**
67
+ * Validates that a tag is well-formed
68
+ */
69
+ export declare function isValidTag(tag: string): boolean;
70
+ /**
71
+ * Tag extraction result with the matched text for accurate replacement
72
+ */
73
+ export interface ExtractedTag {
74
+ type: PIIType;
75
+ id: number;
76
+ position: number;
77
+ /** The actual matched text (needed for replacement when tag is mangled) */
78
+ matchedText: string;
79
+ /** Semantic attributes extracted from the tag */
80
+ semantic?: SemanticAttributes;
81
+ }
82
+ /**
83
+ * Extracts all PII tags from anonymized text using fuzzy matching
84
+ * Handles mangled tags that may occur after translation
85
+ *
86
+ * Translation can mangle tags by:
87
+ * - Changing quote types (" → " or „ or « etc.)
88
+ * - Adding/removing whitespace
89
+ * - Changing case (type → Type, PII → pii)
90
+ * - Reordering attributes (id before type)
91
+ * - Modifying self-closing syntax (/> → / > or >)
92
+ */
93
+ export declare function extractTags(anonymizedText: string): ExtractedTag[];
94
+ /**
95
+ * Extracts tags using strict matching (original behavior)
96
+ * Useful when you know tags haven't been mangled
97
+ * Supports optional gender and scope attributes
98
+ */
99
+ export declare function extractTagsStrict(anonymizedText: string): ExtractedTag[];
100
+ /**
101
+ * Counts entities by type
102
+ */
103
+ export declare function countEntitiesByType(entities: DetectedEntity[]): Record<PIIType, number>;
104
+ /**
105
+ * Rehydrates anonymized text using the PII map
106
+ * Uses fuzzy matching to handle tags that may have been mangled by translation
107
+ *
108
+ * @param anonymizedText - Text containing PII tags (possibly mangled)
109
+ * @param piiMap - Map of PII keys to original values
110
+ * @param strict - If true, use strict matching (original behavior). Default: false
111
+ * @returns Text with PII tags replaced by original values
112
+ */
113
+ export declare function rehydrate(anonymizedText: string, piiMap: RawPIIMap, strict?: boolean): string;
114
+ //# sourceMappingURL=tagger.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tagger.d.ts","sourceRoot":"","sources":["../../src/pipeline/tagger.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,OAAO,EACP,SAAS,EACT,cAAc,EACd,mBAAmB,EACnB,kBAAkB,EACnB,MAAM,mBAAmB,CAAC;AAG3B;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,eAAe;IACf,IAAI,EAAE,OAAO,CAAC;IACd,gBAAgB;IAChB,EAAE,EAAE,MAAM,CAAC;IACX,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAE5C;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4CAA4C;IAC5C,cAAc,EAAE,MAAM,CAAC;IACvB,kDAAkD;IAClD,QAAQ,EAAE,cAAc,EAAE,CAAC;IAC3B,wCAAwC;IACxC,MAAM,EAAE,SAAS,CAAC;CACnB;AAED;;;;;GAKG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,OAAO,EACb,EAAE,EAAE,MAAM,EACV,QAAQ,CAAC,EAAE,kBAAkB,GAC5B,MAAM,CAcR;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AAED;;;;;;;;;GASG;AACH,wBAAgB,QAAQ,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,GAAG,IAAI,CA+CtD;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAEjE;AAED;;GAEG;AACH,wBAAgB,WAAW,CACzB,IAAI,EAAE,MAAM,EACZ,OAAO,EAAE,SAAS,EAAE,EACpB,MAAM,EAAE,mBAAmB,GAC1B,aAAa,CA8Ef;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAE/C;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,OAAO,CAAC;IACd,EAAE,EAAE,MAAM,CAAC;IACX,QAAQ,EAAE,MAAM,CAAC;IACjB,2EAA2E;IAC3E,WAAW,EAAE,MAAM,CAAC;IACpB,iDAAiD;IACjD,QAAQ,CAAC,EAAE,kBAAkB,CAAC;CAC/B;AA4DD;;;;;;;;;;GAUG;AACH,wBAAgB,WAAW,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAyFlE;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,cAAc,EAAE,MAAM,GAAG,YAAY,EAAE,CAqDxE;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CACjC,QAAQ,EAAE,cAAc,EAAE,GACzB,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,CAczB;AAED;;;;;;;;GAQG;AACH,wBAAgB,SAAS,CACvB,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,SAAS,EACjB,MAAM,GAAE,OAAe,GACtB,MAAM,CAyBR"}