rehydra 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +615 -0
  3. package/dist/crypto/index.d.ts +6 -0
  4. package/dist/crypto/index.d.ts.map +1 -0
  5. package/dist/crypto/index.js +6 -0
  6. package/dist/crypto/index.js.map +1 -0
  7. package/dist/crypto/pii-map-crypto.d.ts +114 -0
  8. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  9. package/dist/crypto/pii-map-crypto.js +228 -0
  10. package/dist/crypto/pii-map-crypto.js.map +1 -0
  11. package/dist/index.d.ts +180 -0
  12. package/dist/index.d.ts.map +1 -0
  13. package/dist/index.js +384 -0
  14. package/dist/index.js.map +1 -0
  15. package/dist/ner/bio-decoder.d.ts +64 -0
  16. package/dist/ner/bio-decoder.d.ts.map +1 -0
  17. package/dist/ner/bio-decoder.js +216 -0
  18. package/dist/ner/bio-decoder.js.map +1 -0
  19. package/dist/ner/index.d.ts +10 -0
  20. package/dist/ner/index.d.ts.map +1 -0
  21. package/dist/ner/index.js +10 -0
  22. package/dist/ner/index.js.map +1 -0
  23. package/dist/ner/model-manager.d.ts +111 -0
  24. package/dist/ner/model-manager.d.ts.map +1 -0
  25. package/dist/ner/model-manager.js +325 -0
  26. package/dist/ner/model-manager.js.map +1 -0
  27. package/dist/ner/ner-model.d.ts +114 -0
  28. package/dist/ner/ner-model.d.ts.map +1 -0
  29. package/dist/ner/ner-model.js +253 -0
  30. package/dist/ner/ner-model.js.map +1 -0
  31. package/dist/ner/onnx-runtime.d.ts +46 -0
  32. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  33. package/dist/ner/onnx-runtime.js +130 -0
  34. package/dist/ner/onnx-runtime.js.map +1 -0
  35. package/dist/ner/tokenizer.d.ts +118 -0
  36. package/dist/ner/tokenizer.d.ts.map +1 -0
  37. package/dist/ner/tokenizer.js +332 -0
  38. package/dist/ner/tokenizer.js.map +1 -0
  39. package/dist/pipeline/index.d.ts +12 -0
  40. package/dist/pipeline/index.d.ts.map +1 -0
  41. package/dist/pipeline/index.js +12 -0
  42. package/dist/pipeline/index.js.map +1 -0
  43. package/dist/pipeline/prenormalize.d.ts +48 -0
  44. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  45. package/dist/pipeline/prenormalize.js +94 -0
  46. package/dist/pipeline/prenormalize.js.map +1 -0
  47. package/dist/pipeline/resolver.d.ts +56 -0
  48. package/dist/pipeline/resolver.d.ts.map +1 -0
  49. package/dist/pipeline/resolver.js +239 -0
  50. package/dist/pipeline/resolver.js.map +1 -0
  51. package/dist/pipeline/semantic-data-loader.d.ts +165 -0
  52. package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
  53. package/dist/pipeline/semantic-data-loader.js +655 -0
  54. package/dist/pipeline/semantic-data-loader.js.map +1 -0
  55. package/dist/pipeline/semantic-enricher.d.ts +112 -0
  56. package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
  57. package/dist/pipeline/semantic-enricher.js +318 -0
  58. package/dist/pipeline/semantic-enricher.js.map +1 -0
  59. package/dist/pipeline/tagger.d.ts +114 -0
  60. package/dist/pipeline/tagger.d.ts.map +1 -0
  61. package/dist/pipeline/tagger.js +374 -0
  62. package/dist/pipeline/tagger.js.map +1 -0
  63. package/dist/pipeline/title-extractor.d.ts +79 -0
  64. package/dist/pipeline/title-extractor.d.ts.map +1 -0
  65. package/dist/pipeline/title-extractor.js +801 -0
  66. package/dist/pipeline/title-extractor.js.map +1 -0
  67. package/dist/pipeline/validator.d.ts +65 -0
  68. package/dist/pipeline/validator.d.ts.map +1 -0
  69. package/dist/pipeline/validator.js +264 -0
  70. package/dist/pipeline/validator.js.map +1 -0
  71. package/dist/recognizers/base.d.ts +78 -0
  72. package/dist/recognizers/base.d.ts.map +1 -0
  73. package/dist/recognizers/base.js +100 -0
  74. package/dist/recognizers/base.js.map +1 -0
  75. package/dist/recognizers/bic-swift.d.ts +10 -0
  76. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  77. package/dist/recognizers/bic-swift.js +107 -0
  78. package/dist/recognizers/bic-swift.js.map +1 -0
  79. package/dist/recognizers/credit-card.d.ts +32 -0
  80. package/dist/recognizers/credit-card.d.ts.map +1 -0
  81. package/dist/recognizers/credit-card.js +160 -0
  82. package/dist/recognizers/credit-card.js.map +1 -0
  83. package/dist/recognizers/custom-id.d.ts +28 -0
  84. package/dist/recognizers/custom-id.d.ts.map +1 -0
  85. package/dist/recognizers/custom-id.js +116 -0
  86. package/dist/recognizers/custom-id.js.map +1 -0
  87. package/dist/recognizers/email.d.ts +10 -0
  88. package/dist/recognizers/email.d.ts.map +1 -0
  89. package/dist/recognizers/email.js +75 -0
  90. package/dist/recognizers/email.js.map +1 -0
  91. package/dist/recognizers/iban.d.ts +14 -0
  92. package/dist/recognizers/iban.d.ts.map +1 -0
  93. package/dist/recognizers/iban.js +67 -0
  94. package/dist/recognizers/iban.js.map +1 -0
  95. package/dist/recognizers/index.d.ts +20 -0
  96. package/dist/recognizers/index.d.ts.map +1 -0
  97. package/dist/recognizers/index.js +42 -0
  98. package/dist/recognizers/index.js.map +1 -0
  99. package/dist/recognizers/ip-address.d.ts +14 -0
  100. package/dist/recognizers/ip-address.d.ts.map +1 -0
  101. package/dist/recognizers/ip-address.js +183 -0
  102. package/dist/recognizers/ip-address.js.map +1 -0
  103. package/dist/recognizers/phone.d.ts +10 -0
  104. package/dist/recognizers/phone.d.ts.map +1 -0
  105. package/dist/recognizers/phone.js +145 -0
  106. package/dist/recognizers/phone.js.map +1 -0
  107. package/dist/recognizers/registry.d.ts +59 -0
  108. package/dist/recognizers/registry.d.ts.map +1 -0
  109. package/dist/recognizers/registry.js +113 -0
  110. package/dist/recognizers/registry.js.map +1 -0
  111. package/dist/recognizers/url.d.ts +14 -0
  112. package/dist/recognizers/url.d.ts.map +1 -0
  113. package/dist/recognizers/url.js +121 -0
  114. package/dist/recognizers/url.js.map +1 -0
  115. package/dist/types/index.d.ts +197 -0
  116. package/dist/types/index.d.ts.map +1 -0
  117. package/dist/types/index.js +80 -0
  118. package/dist/types/index.js.map +1 -0
  119. package/dist/types/pii-types.d.ts +50 -0
  120. package/dist/types/pii-types.d.ts.map +1 -0
  121. package/dist/types/pii-types.js +114 -0
  122. package/dist/types/pii-types.js.map +1 -0
  123. package/dist/utils/iban-checksum.d.ts +23 -0
  124. package/dist/utils/iban-checksum.d.ts.map +1 -0
  125. package/dist/utils/iban-checksum.js +106 -0
  126. package/dist/utils/iban-checksum.js.map +1 -0
  127. package/dist/utils/index.d.ts +10 -0
  128. package/dist/utils/index.d.ts.map +1 -0
  129. package/dist/utils/index.js +10 -0
  130. package/dist/utils/index.js.map +1 -0
  131. package/dist/utils/luhn.d.ts +17 -0
  132. package/dist/utils/luhn.d.ts.map +1 -0
  133. package/dist/utils/luhn.js +55 -0
  134. package/dist/utils/luhn.js.map +1 -0
  135. package/dist/utils/offsets.d.ts +86 -0
  136. package/dist/utils/offsets.d.ts.map +1 -0
  137. package/dist/utils/offsets.js +124 -0
  138. package/dist/utils/offsets.js.map +1 -0
  139. package/dist/utils/path.d.ts +34 -0
  140. package/dist/utils/path.d.ts.map +1 -0
  141. package/dist/utils/path.js +96 -0
  142. package/dist/utils/path.js.map +1 -0
  143. package/dist/utils/storage-browser.d.ts +51 -0
  144. package/dist/utils/storage-browser.d.ts.map +1 -0
  145. package/dist/utils/storage-browser.js +381 -0
  146. package/dist/utils/storage-browser.js.map +1 -0
  147. package/dist/utils/storage-node.d.ts +43 -0
  148. package/dist/utils/storage-node.d.ts.map +1 -0
  149. package/dist/utils/storage-node.js +93 -0
  150. package/dist/utils/storage-node.js.map +1 -0
  151. package/dist/utils/storage.d.ts +70 -0
  152. package/dist/utils/storage.d.ts.map +1 -0
  153. package/dist/utils/storage.js +69 -0
  154. package/dist/utils/storage.js.map +1 -0
  155. package/package.json +66 -0
package/dist/index.js ADDED
@@ -0,0 +1,384 @@
1
+ /**
2
+ * Rehydra Module
3
+ * Main entry point for on-device PII anonymization
4
+ */
5
+ // Re-export types
6
+ export * from "./types/index.js";
7
+ // Re-export recognizers
8
+ export { RegexRecognizer, RecognizerRegistry, createDefaultRegistry, createRegistry, getGlobalRegistry, emailRecognizer, phoneRecognizer, ibanRecognizer, bicSwiftRecognizer, creditCardRecognizer, ipAddressRecognizer, urlRecognizer, createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, } from "./recognizers/index.js";
9
+ // Re-export NER components
10
+ export { NERModel, NERModelStub, createNERModel, createNERModelStub, WordPieceTokenizer, loadVocabFromFile, parseVocab, loadRuntime, detectRuntime, getRuntimeType, MODEL_REGISTRY, getModelCacheDir, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, } from "./ner/index.js";
11
+ // Re-export pipeline components
12
+ export { prenormalize, resolveEntities, tagEntities, validateOutput, generateTag, parseTag, rehydrate, enrichSemantics, inferGender, classifyLocation, getDatabaseStats, hasName, hasLocation,
13
+ // Semantic data loader exports
14
+ isSemanticDataAvailable, isSemanticDataDownloaded, getSemanticDataCacheDir, getDataDirectory, downloadSemanticData, ensureSemanticData, initializeSemanticData, loadSemanticData, clearSemanticData, clearSemanticDataCache, getSemanticDataInfo, SEMANTIC_DATA_FILES,
15
+ // Title extractor exports
16
+ extractTitle, extractTitlesFromSpans, mergeAdjacentTitleSpans, getTitlesForLanguage, getAllTitles, startsWithTitle, isOnlyTitle, } from "./pipeline/index.js";
17
+ // Re-export crypto
18
+ export { encryptPIIMap, decryptPIIMap, generateKey, deriveKey, generateSalt, InMemoryKeyProvider, ConfigKeyProvider, validateKey, secureCompare, uint8ArrayToBase64, base64ToUint8Array, } from "./crypto/index.js";
19
+ // Re-export storage utilities
20
+ export { getStorageProvider, isNode, isBrowser, resetStorageProvider, setStorageProvider, } from "./utils/storage.js";
21
+ // Re-export path utilities
22
+ export { join as pathJoin, dirname as pathDirname, basename as pathBasename, normalize as pathNormalize, extname as pathExtname, isAbsolute as pathIsAbsolute, } from "./utils/path.js";
23
+ // Main anonymization imports
24
+ import { createDefaultPolicy, } from "./types/index.js";
25
+ /**
26
+ * Merges a partial policy with a base policy (deep merge for Maps/Sets)
27
+ * Unlike the exported mergePolicy, this uses a custom base instead of global defaults
28
+ */
29
+ function mergePolicyWithBase(base, partial) {
30
+ // Deep merge confidenceThresholds Map
31
+ let confidenceThresholds = base.confidenceThresholds;
32
+ if (partial.confidenceThresholds !== undefined) {
33
+ confidenceThresholds = new Map(base.confidenceThresholds);
34
+ for (const [type, threshold] of partial.confidenceThresholds) {
35
+ confidenceThresholds.set(type, threshold);
36
+ }
37
+ }
38
+ return {
39
+ enabledTypes: partial.enabledTypes ?? base.enabledTypes,
40
+ regexEnabledTypes: partial.regexEnabledTypes ?? base.regexEnabledTypes,
41
+ nerEnabledTypes: partial.nerEnabledTypes ?? base.nerEnabledTypes,
42
+ typePriority: partial.typePriority ?? base.typePriority,
43
+ confidenceThresholds,
44
+ customIdPatterns: partial.customIdPatterns ?? base.customIdPatterns,
45
+ allowlistTerms: partial.allowlistTerms ?? base.allowlistTerms,
46
+ denylistPatterns: partial.denylistPatterns ?? base.denylistPatterns,
47
+ reuseIdsForRepeatedPII: partial.reuseIdsForRepeatedPII ?? base.reuseIdsForRepeatedPII,
48
+ enableLeakScan: partial.enableLeakScan ?? base.enableLeakScan,
49
+ enableSemanticMasking: partial.enableSemanticMasking ?? base.enableSemanticMasking,
50
+ };
51
+ }
52
+ import { createDefaultRegistry, } from "./recognizers/index.js";
53
+ import { NERModelStub, createNERModel, DEFAULT_LABEL_MAP, } from "./ner/index.js";
54
+ import { ensureModel, } from "./ner/model-manager.js";
55
+ import { prenormalize } from "./pipeline/prenormalize.js";
56
+ import { resolveEntities } from "./pipeline/resolver.js";
57
+ import { tagEntities, countEntitiesByType } from "./pipeline/tagger.js";
58
+ import { validateOutput } from "./pipeline/validator.js";
59
+ import { enrichSemantics } from "./pipeline/semantic-enricher.js";
60
+ import { ensureSemanticData, isSemanticDataAvailable, loadSemanticData, } from "./pipeline/semantic-data-loader.js";
61
+ import { extractTitlesFromSpans, mergeAdjacentTitleSpans, } from "./pipeline/title-extractor.js";
62
+ import { encryptPIIMap, generateKey, } from "./crypto/index.js";
63
+ import { getStorageProvider } from "./utils/storage.js";
64
+ /**
65
+ * Anonymizer instance
66
+ * Main class for performing PII anonymization
67
+ */
68
+ export class Anonymizer {
69
+ registry;
70
+ nerModel = null;
71
+ nerConfig;
72
+ semanticConfig;
73
+ keyProvider;
74
+ defaultPolicy;
75
+ modelVersion;
76
+ policyVersion;
77
+ initialized = false;
78
+ semanticDataReady = false;
79
+ constructor(config = {}) {
80
+ this.registry = config.registry ?? createDefaultRegistry();
81
+ this.keyProvider = config.keyProvider ?? null;
82
+ this.defaultPolicy = config.defaultPolicy ?? createDefaultPolicy();
83
+ this.policyVersion = config.policyVersion ?? "1.0.0";
84
+ // Handle NER configuration
85
+ this.nerConfig = config.ner ?? { mode: "disabled" };
86
+ this.modelVersion = config.modelVersion ?? "1.0.0";
87
+ // Merge NER thresholds into default policy if provided
88
+ if (this.nerConfig.thresholds !== undefined) {
89
+ const thresholdsMap = new Map(this.defaultPolicy.confidenceThresholds);
90
+ for (const [type, threshold] of Object.entries(this.nerConfig.thresholds)) {
91
+ if (threshold !== undefined) {
92
+ thresholdsMap.set(type, threshold);
93
+ }
94
+ }
95
+ this.defaultPolicy = {
96
+ ...this.defaultPolicy,
97
+ confidenceThresholds: thresholdsMap,
98
+ };
99
+ }
100
+ // Handle semantic configuration
101
+ this.semanticConfig = config.semantic ?? { enabled: false };
102
+ // If semantic is enabled, also enable it in the default policy
103
+ if (this.semanticConfig.enabled) {
104
+ this.defaultPolicy = {
105
+ ...this.defaultPolicy,
106
+ enableSemanticMasking: true,
107
+ };
108
+ }
109
+ }
110
+ /**
111
+ * Initializes the anonymizer
112
+ * Downloads NER model and semantic data if needed and loads them
113
+ */
114
+ async initialize() {
115
+ if (this.initialized)
116
+ return;
117
+ // Handle NER model setup based on mode
118
+ if (this.nerConfig.mode === "disabled") {
119
+ this.nerModel = new NERModelStub();
120
+ }
121
+ else if (this.nerConfig.mode === "custom") {
122
+ if (this.nerConfig.modelPath === undefined ||
123
+ this.nerConfig.modelPath === "" ||
124
+ this.nerConfig.vocabPath === undefined ||
125
+ this.nerConfig.vocabPath === "") {
126
+ throw new Error("NER mode 'custom' requires modelPath and vocabPath");
127
+ }
128
+ this.nerModel = createNERModel({
129
+ modelPath: this.nerConfig.modelPath,
130
+ vocabPath: this.nerConfig.vocabPath,
131
+ modelVersion: this.modelVersion,
132
+ });
133
+ }
134
+ else {
135
+ // 'standard' or 'quantized' - use model manager
136
+ const { modelPath, vocabPath, labelMapPath } = await ensureModel(this.nerConfig.mode, {
137
+ autoDownload: this.nerConfig.autoDownload ?? true,
138
+ onProgress: this.nerConfig.onDownloadProgress,
139
+ onStatus: this.nerConfig.onStatus,
140
+ });
141
+ // Load label map
142
+ let labelMap = DEFAULT_LABEL_MAP;
143
+ try {
144
+ const storage = await getStorageProvider();
145
+ const labelMapContent = await storage.readTextFile(labelMapPath);
146
+ labelMap = JSON.parse(labelMapContent);
147
+ }
148
+ catch {
149
+ // Use default label map
150
+ }
151
+ this.nerModel = createNERModel({
152
+ modelPath,
153
+ vocabPath,
154
+ labelMap,
155
+ modelVersion: this.modelVersion,
156
+ });
157
+ }
158
+ // Load the NER model
159
+ if (!this.nerModel.loaded) {
160
+ this.nerConfig.onStatus?.("Loading NER model...");
161
+ await this.nerModel.load();
162
+ this.nerConfig.onStatus?.("NER model loaded!");
163
+ }
164
+ // Handle semantic data setup if enabled
165
+ if (this.semanticConfig.enabled) {
166
+ const autoDownload = this.semanticConfig.autoDownload ?? true;
167
+ // Check if data is already available
168
+ const dataAvailable = await isSemanticDataAvailable();
169
+ if (!dataAvailable) {
170
+ if (!autoDownload) {
171
+ throw new Error("Semantic masking is enabled but data files are not available.\n\n" +
172
+ "To download automatically, use:\n" +
173
+ " createAnonymizer({ semantic: { enabled: true, autoDownload: true } })\n\n" +
174
+ "Or disable semantic masking:\n" +
175
+ " createAnonymizer({ semantic: { enabled: false } })");
176
+ }
177
+ // Download semantic data
178
+ await ensureSemanticData({
179
+ autoDownload: true,
180
+ onProgress: this.semanticConfig.onDownloadProgress,
181
+ onStatus: this.semanticConfig.onStatus,
182
+ });
183
+ }
184
+ else {
185
+ this.semanticConfig.onStatus?.("Semantic data already cached");
186
+ }
187
+ // Load data into memory for synchronous access during enrichment
188
+ await loadSemanticData();
189
+ this.semanticDataReady = true;
190
+ }
191
+ this.modelVersion = this.nerModel.version;
192
+ this.initialized = true;
193
+ }
194
+ /**
195
+ * Anonymizes text, replacing PII with placeholder tags
196
+ * @param text - Input text to anonymize
197
+ * @param locale - Optional locale hint (e.g., 'de-DE', 'en-US')
198
+ * @param policy - Optional policy override
199
+ * @returns Anonymization result with anonymized text and encrypted PII map
200
+ */
201
+ async anonymize(text, locale, policy) {
202
+ if (!this.initialized) {
203
+ await this.initialize();
204
+ }
205
+ const startTime = performance.now();
206
+ // Merge policy with instance defaults (not global defaults)
207
+ // This ensures semantic config from constructor is preserved
208
+ // Uses deep merge for Maps (confidenceThresholds) and Sets
209
+ const effectivePolicy = policy !== undefined
210
+ ? mergePolicyWithBase(this.defaultPolicy, policy)
211
+ : this.defaultPolicy;
212
+ // Step 1: Pre-normalize text
213
+ const normalizedText = prenormalize(text);
214
+ // Step 2: Run regex recognizers
215
+ const regexMatches = this.registry.findAll(normalizedText, effectivePolicy);
216
+ // Step 3: Run NER model
217
+ const nerResult = await this.nerModel.predict(normalizedText, effectivePolicy);
218
+ const nerMatches = nerResult.spans;
219
+ // Step 4: Resolve and merge entities
220
+ const resolvedMatches = resolveEntities(regexMatches, nerMatches, effectivePolicy, normalizedText);
221
+ // Step 4.5: Merge adjacent title+name PERSON spans (if semantic masking enabled)
222
+ // This fixes NER models that split "Mrs. Smith" into two entities
223
+ const mergedMatches = effectivePolicy.enableSemanticMasking === true
224
+ ? mergeAdjacentTitleSpans(resolvedMatches, normalizedText)
225
+ : resolvedMatches;
226
+ // Step 4.6: Extract titles from PERSON entities (if semantic masking enabled)
227
+ // This strips honorific titles (Dr., Mrs., etc.) so they remain visible for translation
228
+ const titleExtractedMatches = effectivePolicy.enableSemanticMasking
229
+ ? extractTitlesFromSpans(mergedMatches, normalizedText)
230
+ : mergedMatches;
231
+ // Step 4.6: Enrich with semantic attributes (if enabled)
232
+ // This adds gender for PERSON and scope for LOCATION entities
233
+ const enrichedMatches = effectivePolicy.enableSemanticMasking
234
+ ? enrichSemantics(titleExtractedMatches, {
235
+ locale: locale !== undefined ? locale.split("-")[0] : undefined, // Extract language code
236
+ })
237
+ : titleExtractedMatches;
238
+ // Step 5: Tag entities and build PII map
239
+ const { anonymizedText, entities, piiMap } = tagEntities(normalizedText, enrichedMatches, effectivePolicy);
240
+ // Step 6: Validate output
241
+ const validation = validateOutput(anonymizedText, entities, Array.from(piiMap.keys()), effectivePolicy);
242
+ if (!validation.valid) {
243
+ // Log validation errors (but don't expose raw PII)
244
+ const safeErrors = validation.errors.map((e) => ({
245
+ code: e.code,
246
+ message: e.message,
247
+ }));
248
+ // eslint-disable-next-line no-console
249
+ console.warn("Validation warnings:", safeErrors);
250
+ }
251
+ // Step 7: Encrypt PII map
252
+ const encryptionKey = this.keyProvider !== null
253
+ ? await this.keyProvider.getKey()
254
+ : generateKey();
255
+ const encryptedPiiMap = await encryptPIIMap(piiMap, encryptionKey);
256
+ // Step 8: Build stats
257
+ const endTime = performance.now();
258
+ const stats = {
259
+ countsByType: countEntitiesByType(entities),
260
+ totalEntities: entities.length,
261
+ modelVersion: this.modelVersion,
262
+ policyVersion: this.policyVersion,
263
+ processingTimeMs: endTime - startTime,
264
+ leakScanPassed: validation.leakScanPassed,
265
+ };
266
+ // Step 9: Build result (without original text in entities)
267
+ const safeEntities = entities.map(({ original: _original, ...rest }) => rest);
268
+ return {
269
+ anonymizedText,
270
+ entities: safeEntities,
271
+ piiMap: encryptedPiiMap,
272
+ stats,
273
+ };
274
+ }
275
+ /**
276
+ * Disposes of resources
277
+ */
278
+ async dispose() {
279
+ if (this.nerModel) {
280
+ await this.nerModel.dispose();
281
+ }
282
+ this.initialized = false;
283
+ }
284
+ /**
285
+ * Gets the recognizer registry
286
+ */
287
+ getRegistry() {
288
+ return this.registry;
289
+ }
290
+ /**
291
+ * Gets the NER model
292
+ */
293
+ getNERModel() {
294
+ return this.nerModel;
295
+ }
296
+ /**
297
+ * Whether the anonymizer is initialized
298
+ */
299
+ get isInitialized() {
300
+ return this.initialized;
301
+ }
302
+ }
303
+ /**
304
+ * Creates an anonymizer with the specified configuration
305
+ *
306
+ * @example
307
+ * ```typescript
308
+ * // Regex-only (no NER)
309
+ * const anonymizer = createAnonymizer();
310
+ *
311
+ * // With NER (auto-downloads model on first use)
312
+ * const anonymizer = createAnonymizer({
313
+ * ner: { mode: 'quantized' }
314
+ * });
315
+ *
316
+ * // With NER and progress callback
317
+ * const anonymizer = createAnonymizer({
318
+ * ner: {
319
+ * mode: 'standard',
320
+ * onStatus: (status) => console.log(status),
321
+ * onDownloadProgress: (p) => console.log(`${p.file}: ${p.percent}%`)
322
+ * }
323
+ * });
324
+ * ```
325
+ */
326
+ export function createAnonymizer(config) {
327
+ return new Anonymizer(config);
328
+ }
329
+ /**
330
+ * Convenience function for one-off anonymization
331
+ * Creates a temporary anonymizer with default settings (regex-only)
332
+ */
333
+ export async function anonymize(text, locale, policy) {
334
+ const anonymizer = createAnonymizer();
335
+ await anonymizer.initialize();
336
+ try {
337
+ return await anonymizer.anonymize(text, locale, policy);
338
+ }
339
+ finally {
340
+ await anonymizer.dispose();
341
+ }
342
+ }
343
+ /**
344
+ * Quick regex-only anonymization (no NER, faster)
345
+ */
346
+ export async function anonymizeRegexOnly(text, policy) {
347
+ // Create policy with NER disabled
348
+ const regexOnlyPolicy = {
349
+ ...policy,
350
+ nerEnabledTypes: new Set(), // Disable all NER types
351
+ };
352
+ return anonymize(text, undefined, regexOnlyPolicy);
353
+ }
354
+ /**
355
+ * Full anonymization with NER
356
+ * Auto-downloads the quantized model on first use
357
+ *
358
+ * @example
359
+ * ```typescript
360
+ * const result = await anonymizeWithNER(
361
+ * 'Contact John Smith at john@example.com',
362
+ * {
363
+ * mode: 'quantized',
364
+ * onStatus: console.log
365
+ * }
366
+ * );
367
+ * ```
368
+ */
369
+ export async function anonymizeWithNER(text, nerConfig, policy) {
370
+ const anonymizer = createAnonymizer({
371
+ ner: {
372
+ mode: nerConfig.mode ?? "quantized",
373
+ ...nerConfig,
374
+ },
375
+ });
376
+ await anonymizer.initialize();
377
+ try {
378
+ return await anonymizer.anonymize(text, undefined, policy);
379
+ }
380
+ finally {
381
+ await anonymizer.dispose();
382
+ }
383
+ }
384
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,kBAAkB;AAClB,cAAc,kBAAkB,CAAC;AAEjC,wBAAwB;AACxB,OAAO,EAEL,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,EACb,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAEhC,2BAA2B;AAC3B,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EAMd,cAAc,EACd,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,GACrB,MAAM,gBAAgB,CAAC;AAExB,gCAAgC;AAChC,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,cAAc,EACd,WAAW,EACX,QAAQ,EACR,SAAS,EACT,eAAe,EACf,WAAW,EACX,gBAAgB,EAChB,gBAAgB,EAChB,OAAO,EACP,WAAW;AACX,+BAA+B;AAC/B,uBAAuB,EACvB,wBAAwB,EACxB,uBAAuB,EACvB,gBAAgB,EAChB,oBAAoB,EACpB,kBAAkB,EAClB,sBAAsB,EACtB,gBAAgB,EAChB,iBAAiB,EACjB,sBAAsB,EACtB,mBAAmB,EACnB,mBAAmB;AACnB,0BAA0B;AAC1B,YAAY,EACZ,sBAAsB,EACtB,uBAAuB,EACvB,oBAAoB,EACpB,YAAY,EACZ,eAAe,EACf,WAAW,GAMZ,MAAM,qBAAqB,CAAC;AAE7B,mBAAmB;AACnB,OAAO,EACL,aAAa,EACb,aAAa,EACb,WAAW,EACX,SAAS,EACT,YAAY,EAEZ,mBAAmB,EACnB,iBAAiB,EACjB,WAAW,EACX,aAAa,EACb,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,mBAAmB,CAAC;AAE3B,8BAA8B;AAC9B,OAAO,EACL,kBAAkB,EAClB,MAAM,EACN,SAAS,EACT,oBAAoB,EACpB,kBAAkB,GAEnB,MAAM,oBAAoB,CAAC;AAE5B,2BAA2B;AAC3B,OAAO,EACL,IAAI,IAAI,QAAQ,EAChB,OAAO,IAAI,WAAW,EACtB,QAAQ,IAAI,YAAY,EACxB,SAAS,IAAI,aAAa,EAC1B,OAAO,IAAI,WAAW,EACtB,UAAU,IAAI,cAAc,GAC7B,MAAM,iBAAiB,CAAC;AAEzB,6BAA6B;AAC7B,OAAO,EAQL,mBAAmB,GACpB,MAAM,kBAAkB,CAAC;AAE1B;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,IAAyB,EACzB,OAAqC;IAErC,sCAAsC;IACtC,IAAI,oBAAoB,GAAG,IAAI,CAAC,oBAAoB,CAAC;IACrD,IAAI,OAAO,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;QAC/C,oBAAoB,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,OAAO,CAAC,oBAAoB,EAAE,CAAC;YAC7D,oBAAoB,CAAC,GAAG,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,OAAO;QACL,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY;QACvD,iBAAiB,EAAE,OAAO,CAAC,iBAAiB,IAAI,IAAI,CAAC,iBAAiB;QACtE,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe;QAChE,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY;QACvD,oBAAoB;QACpB,gBAAgB,EAAE,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,gBAAgB;QACnE,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc;QAC7D,gBAAgB,EAAE,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,gBAAgB;QACnE,sBAAsB,EACpB,OAAO,CAAC,sBAAsB,IAAI,IAAI,CAAC,sBAAsB;QAC/D,cAAc,EAAE,OAAO,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc;QAC7D,qBAAqB,EACnB,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAC,qBAAqB;KAC9D,CAAC;AACJ,CAAC;AACD,OAAO,EACL,qBAAqB,GAEtB,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAEL,YAAY,EACZ,cAAc,EACd,iBAAiB,GAClB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EAEL,WAAW,GAEZ,MAAM,wBAAwB,CAAC;AAChC,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EACL,kBAAkB,EAClB,uBAAuB,EACvB,gBAAgB,GACjB,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACL,sBAAsB,EACtB,uBAAuB,GACxB,MAAM,+BAA+B,CAAC;AACvC,OAAO,EACL,aAAa,EACb,WAAW,GAEZ,MAAM,mBAAmB,CAAC;AAC3B,OAAO,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAC;AAkFxD;;;GAGG;AACH,MAAM,OAAO,UAAU;IACb,QAAQ,CAAqB;IAC7B,QAAQ,GAAqB,IAAI,CAAC;IAClC,SAAS,CAAY;IACrB,cAAc,CAAiB;IAC/B,WAAW,CAAqB;IAChC,aAAa,CAAsB;IACnC,YAAY,CAAS;IACrB,aAAa,CAAS;IACtB,WAAW,GAAG,KAAK,CAAC;IACpB,iBAAiB,GAAG,KAAK,CAAC;IAElC,YAAY,SAA2B,EAAE;QACvC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,qBAAqB,EAAE,CAAC;QAC3D,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC;QAC9C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,mBAAmB,EAAE,CAAC;QACnE,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC;QAErD,2BAA2B;QAC3B,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;QACpD,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,OAAO,CAAC;QAEnD,uDAAuD;QACvD,IAAI,IAAI,CAAC,SAAS,CAAC,UAAU,KAAK,SAAS,EAAE,CAAC;YAC5C,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,oBAAoB,CAAC,CAAC;YACvE,KAAK,MAAM,CAAC,IAAI,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAC5C,IAAI,CAAC,SAAS,CAAC,UAAU,CAC1B,EAAE,CAAC;gBACF,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;oBAC5B,aAAa,CAAC,GAAG,CAAC,IAAe,EAAE,SAAS,CAAC,CAAC;gBAChD,CAAC;YACH,CAAC;YACD,IAAI,CAAC,aAAa,GAAG;gBACnB,GAAG,IAAI,CAAC,aAAa;gBACrB,oBAAoB,EAAE,aAAa;aACpC,CAAC;QACJ,CAAC;QAED,gCAAgC;QAChC,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,QAAQ,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;QAE5D,+DAA+D;QAC/D,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;YAChC,IAAI,CAAC,aAAa,GAAG;gBACnB,GAAG,IAAI,CAAC,aAAa;gBACrB,qBAAqB,EAAE,IAAI;aAC5B,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,uCAAuC;QACvC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YACvC,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,EAAE,CAAC;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5C,IACE,IAAI,CAAC,SAAS,CAAC,SAAS,KAAK,SAAS;gBACtC,IAAI,CAAC,SAAS,CAAC,SAAS,KAAK,EAAE;gBAC/B,IAAI,CAAC,SAAS,CAAC,SAAS,KAAK,SAAS;gBACtC,IAAI,CAAC,SAAS,CAAC,SAAS,KAAK,EAAE,EAC/B,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;YACxE,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,WAAW,CAC9D,IAAI,CAAC,SAAS,CAAC,IAAI,EACnB;gBACE,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY,IAAI,IAAI;gBACjD,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,kBAAkB;gBAC7C,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ;aAClC,CACF,CAAC;YAEF,iBAAiB;YACjB,IAAI,QAAQ,GAAG,iBAAiB,CAAC;YACjC,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,kBAAkB,EAAE,CAAC;gBAC3C,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;gBACjE,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAa,CAAC;YACrD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS;gBACT,SAAS;gBACT,QAAQ;gBACR,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;QAED,qBAAqB;QACrB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,sBAAsB,CAAC,CAAC;YAClD,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,mBAAmB,CAAC,CAAC;QACjD,CAAC;QAED,wCAAwC;QACxC,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,CAAC;YAChC,MAAM,YAAY,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,IAAI,IAAI,CAAC;YAE9D,qCAAqC;YACrC,MAAM,aAAa,GAAG,MAAM,uBAAuB,EAAE,CAAC;YACtD,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,IAAI,CAAC,YAAY,EAAE,CAAC;oBAClB,MAAM,IAAI,KAAK,CACb,mEAAmE;wBACjE,mCAAmC;wBACnC,6EAA6E;wBAC7E,gCAAgC;wBAChC,sDAAsD,CACzD,CAAC;gBACJ,CAAC;gBAED,yBAAyB;gBACzB,MAAM,kBAAkB,CAAC;oBACvB,YAAY,EAAE,IAAI;oBAClB,UAAU,EAAE,IAAI,CAAC,cAAc,CAAC,kBAAkB;oBAClD,QAAQ,EAAE,IAAI,CAAC,cAAc,CAAC,QAAQ;iBACvC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,CAAC,8BAA8B,CAAC,CAAC;YACjE,CAAC;YAED,iEAAiE;YACjE,MAAM,gBAAgB,EAAE,CAAC;YACzB,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAChC,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QAC1C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,SAAS,CACb,IAAY,EACZ,MAAe,EACf,MAAqC;QAErC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAC1B,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,4DAA4D;QAC5D,6DAA6D;QAC7D,2DAA2D;QAC3D,MAAM,eAAe,GACnB,MAAM,KAAK,SAAS;YAClB,CAAC,CAAC,mBAAmB,CAAC,IAAI,CAAC,aAAa,EAAE,MAAM,CAAC;YACjD,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC;QAEzB,6BAA6B;QAC7B,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAE1C,gCAAgC;QAChC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAE5E,wBAAwB;QACxB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAS,CAAC,OAAO,CAC5C,cAAc,EACd,eAAe,CAChB,CAAC;QACF,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC;QAEnC,qCAAqC;QACrC,MAAM,eAAe,GAAG,eAAe,CACrC,YAAY,EACZ,UAAU,EACV,eAAe,EACf,cAAc,CACf,CAAC;QAEF,iFAAiF;QACjF,kEAAkE;QAClE,MAAM,aAAa,GACjB,eAAe,CAAC,qBAAqB,KAAK,IAAI;YAC5C,CAAC,CAAC,uBAAuB,CAAC,eAAe,EAAE,cAAc,CAAC;YAC1D,CAAC,CAAC,eAAe,CAAC;QAEtB,8EAA8E;QAC9E,wFAAwF;QACxF,MAAM,qBAAqB,GAAG,eAAe,CAAC,qBAAqB;YACjE,CAAC,CAAC,sBAAsB,CAAC,aAAa,EAAE,cAAc,CAAC;YACvD,CAAC,CAAC,aAAa,CAAC;QAElB,yDAAyD;QACzD,8DAA8D;QAC9D,MAAM,eAAe,GAAG,eAAe,CAAC,qBAAqB;YAC3D,CAAC,CAAC,eAAe,CAAC,qBAAqB,EAAE;gBACrC,MAAM,EAAE,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,wBAAwB;aAC1F,CAAC;YACJ,CAAC,CAAC,qBAAqB,CAAC;QAE1B,yCAAyC;QACzC,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CACtD,cAAc,EACd,eAAe,EACf,eAAe,CAChB,CAAC;QAEF,0BAA0B;QAC1B,MAAM,UAAU,GAAG,cAAc,CAC/B,cAAc,EACd,QAAQ,EACR,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,EACzB,eAAe,CAChB,CAAC;QAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;YACtB,mDAAmD;YACnD,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC/C,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC,CAAC;YACJ,sCAAsC;YACtC,OAAO,CAAC,IAAI,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QACnD,CAAC;QAED,0BAA0B;QAC1B,MAAM,aAAa,GACjB,IAAI,CAAC,WAAW,KAAK,IAAI;YACvB,CAAC,CAAC,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;YACjC,CAAC,CAAC,WAAW,EAAE,CAAC;QAEpB,MAAM,eAAe,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAEnE,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAClC,MAAM,KAAK,GAAuB;YAChC,YAAY,EAAE,mBAAmB,CAAC,QAAQ,CAAC;YAC3C,aAAa,EAAE,QAAQ,CAAC,MAAM;YAC9B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,gBAAgB,EAAE,OAAO,GAAG,SAAS;YACrC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;QAEF,2DAA2D;QAC3D,MAAM,YAAY,GAAuC,QAAQ,CAAC,GAAG,CACnE,CAAC,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAC3C,CAAC;QAEF,OAAO;YACL,cAAc;YACd,QAAQ,EAAE,YAAY;YACtB,MAAM,EAAE,eAAe;YACvB,KAAK;SACN,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAyB;IACxD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,IAAY,EACZ,MAAe,EACf,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAC;IACtC,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAqC;IAErC,kCAAkC;IAClC,MAAM,eAAe,GAAiC;QACpD,GAAG,MAAM;QACT,eAAe,EAAE,IAAI,GAAG,EAAE,EAAE,wBAAwB;KACrD,CAAC;IAEF,OAAO,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,SAAwE,EACxE,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,CAAC;QAClC,GAAG,EAAE;YACH,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,WAAW;YACnC,GAAG,SAAS;SACb;KACF,CAAC,CAAC;IAEH,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAC7D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * BIO Tag Decoder
3
+ * Converts BIO-tagged token sequences to entity spans
4
+ */
5
+ import { SpanMatch } from '../types/index.js';
6
+ import type { Token } from './tokenizer.js';
7
+ /**
8
+ * BIO tag types
9
+ */
10
+ export declare enum BIOTag {
11
+ /** Beginning of an entity */
12
+ B = "B",
13
+ /** Inside an entity (continuation) */
14
+ I = "I",
15
+ /** Outside any entity */
16
+ O = "O"
17
+ }
18
+ /**
19
+ * Parsed BIO label
20
+ */
21
+ export interface ParsedBIOLabel {
22
+ /** BIO tag type */
23
+ tag: BIOTag;
24
+ /** Entity type (null for O tag) */
25
+ entityType: string | null;
26
+ }
27
+ /**
28
+ * Raw entity span from NER (before conversion to SpanMatch)
29
+ */
30
+ export interface RawNEREntity {
31
+ /** Entity type string from model */
32
+ type: string;
33
+ /** Start character offset */
34
+ start: number;
35
+ /** End character offset */
36
+ end: number;
37
+ /** Combined confidence score */
38
+ confidence: number;
39
+ /** Raw text */
40
+ text: string;
41
+ /** Token indices that make up this entity */
42
+ tokenIndices: number[];
43
+ }
44
+ /**
45
+ * Parses a BIO label string (e.g., "B-PER", "I-ORG", "O")
46
+ */
47
+ export declare function parseBIOLabel(label: string): ParsedBIOLabel;
48
+ /**
49
+ * Decodes BIO-tagged tokens into entity spans
50
+ */
51
+ export declare function decodeBIOTags(tokens: Token[], labels: string[], confidences: number[], originalText: string): RawNEREntity[];
52
+ /**
53
+ * Converts raw NER entities to SpanMatch format
54
+ */
55
+ export declare function convertToSpanMatches(rawEntities: RawNEREntity[], confidenceThreshold?: number): SpanMatch[];
56
+ /**
57
+ * Post-processes NER spans to clean up boundaries
58
+ */
59
+ export declare function cleanupSpanBoundaries(spans: SpanMatch[], originalText: string): SpanMatch[];
60
+ /**
61
+ * Merges adjacent spans of the same type
62
+ */
63
+ export declare function mergeAdjacentSpans(spans: SpanMatch[], originalText: string, maxGap?: number): SpanMatch[];
64
+ //# sourceMappingURL=bio-decoder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bio-decoder.d.ts","sourceRoot":"","sources":["../../src/ner/bio-decoder.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAW,SAAS,EAAmB,MAAM,mBAAmB,CAAC;AAExE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAE5C;;GAEG;AACH,oBAAY,MAAM;IAChB,6BAA6B;IAC7B,CAAC,MAAM;IACP,sCAAsC;IACtC,CAAC,MAAM;IACP,yBAAyB;IACzB,CAAC,MAAM;CACR;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,mCAAmC;IACnC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe;IACf,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,CAyB3D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE,KAAK,EAAE,EACf,MAAM,EAAE,MAAM,EAAE,EAChB,WAAW,EAAE,MAAM,EAAE,EACrB,YAAY,EAAE,MAAM,GACnB,YAAY,EAAE,CAkFhB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,WAAW,EAAE,YAAY,EAAE,EAC3B,mBAAmB,GAAE,MAAY,GAChC,SAAS,EAAE,CA0Bb;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,GACnB,SAAS,EAAE,CAoCb;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,MAAU,GACjB,SAAS,EAAE,CAiCb"}