@elanlanguages/bridge-anonymization 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +382 -0
  2. package/dist/crypto/index.d.ts +6 -0
  3. package/dist/crypto/index.d.ts.map +1 -0
  4. package/dist/crypto/index.js +6 -0
  5. package/dist/crypto/index.js.map +1 -0
  6. package/dist/crypto/pii-map-crypto.d.ts +100 -0
  7. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  8. package/dist/crypto/pii-map-crypto.js +163 -0
  9. package/dist/crypto/pii-map-crypto.js.map +1 -0
  10. package/dist/index.d.ts +173 -0
  11. package/dist/index.d.ts.map +1 -0
  12. package/dist/index.js +294 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/ner/bio-decoder.d.ts +64 -0
  15. package/dist/ner/bio-decoder.d.ts.map +1 -0
  16. package/dist/ner/bio-decoder.js +216 -0
  17. package/dist/ner/bio-decoder.js.map +1 -0
  18. package/dist/ner/index.d.ts +10 -0
  19. package/dist/ner/index.d.ts.map +1 -0
  20. package/dist/ner/index.js +10 -0
  21. package/dist/ner/index.js.map +1 -0
  22. package/dist/ner/model-manager.d.ts +102 -0
  23. package/dist/ner/model-manager.d.ts.map +1 -0
  24. package/dist/ner/model-manager.js +253 -0
  25. package/dist/ner/model-manager.js.map +1 -0
  26. package/dist/ner/ner-model.d.ts +114 -0
  27. package/dist/ner/ner-model.d.ts.map +1 -0
  28. package/dist/ner/ner-model.js +240 -0
  29. package/dist/ner/ner-model.js.map +1 -0
  30. package/dist/ner/onnx-runtime.d.ts +45 -0
  31. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  32. package/dist/ner/onnx-runtime.js +99 -0
  33. package/dist/ner/onnx-runtime.js.map +1 -0
  34. package/dist/ner/tokenizer.d.ts +140 -0
  35. package/dist/ner/tokenizer.d.ts.map +1 -0
  36. package/dist/ner/tokenizer.js +341 -0
  37. package/dist/ner/tokenizer.js.map +1 -0
  38. package/dist/pipeline/index.d.ts +9 -0
  39. package/dist/pipeline/index.d.ts.map +1 -0
  40. package/dist/pipeline/index.js +9 -0
  41. package/dist/pipeline/index.js.map +1 -0
  42. package/dist/pipeline/prenormalize.d.ts +48 -0
  43. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  44. package/dist/pipeline/prenormalize.js +94 -0
  45. package/dist/pipeline/prenormalize.js.map +1 -0
  46. package/dist/pipeline/resolver.d.ts +56 -0
  47. package/dist/pipeline/resolver.d.ts.map +1 -0
  48. package/dist/pipeline/resolver.js +238 -0
  49. package/dist/pipeline/resolver.js.map +1 -0
  50. package/dist/pipeline/tagger.d.ts +74 -0
  51. package/dist/pipeline/tagger.d.ts.map +1 -0
  52. package/dist/pipeline/tagger.js +169 -0
  53. package/dist/pipeline/tagger.js.map +1 -0
  54. package/dist/pipeline/validator.d.ts +65 -0
  55. package/dist/pipeline/validator.d.ts.map +1 -0
  56. package/dist/pipeline/validator.js +264 -0
  57. package/dist/pipeline/validator.js.map +1 -0
  58. package/dist/recognizers/base.d.ts +78 -0
  59. package/dist/recognizers/base.d.ts.map +1 -0
  60. package/dist/recognizers/base.js +100 -0
  61. package/dist/recognizers/base.js.map +1 -0
  62. package/dist/recognizers/bic-swift.d.ts +10 -0
  63. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  64. package/dist/recognizers/bic-swift.js +107 -0
  65. package/dist/recognizers/bic-swift.js.map +1 -0
  66. package/dist/recognizers/credit-card.d.ts +32 -0
  67. package/dist/recognizers/credit-card.d.ts.map +1 -0
  68. package/dist/recognizers/credit-card.js +160 -0
  69. package/dist/recognizers/credit-card.js.map +1 -0
  70. package/dist/recognizers/custom-id.d.ts +28 -0
  71. package/dist/recognizers/custom-id.d.ts.map +1 -0
  72. package/dist/recognizers/custom-id.js +116 -0
  73. package/dist/recognizers/custom-id.js.map +1 -0
  74. package/dist/recognizers/email.d.ts +10 -0
  75. package/dist/recognizers/email.d.ts.map +1 -0
  76. package/dist/recognizers/email.js +75 -0
  77. package/dist/recognizers/email.js.map +1 -0
  78. package/dist/recognizers/iban.d.ts +14 -0
  79. package/dist/recognizers/iban.d.ts.map +1 -0
  80. package/dist/recognizers/iban.js +67 -0
  81. package/dist/recognizers/iban.js.map +1 -0
  82. package/dist/recognizers/index.d.ts +20 -0
  83. package/dist/recognizers/index.d.ts.map +1 -0
  84. package/dist/recognizers/index.js +42 -0
  85. package/dist/recognizers/index.js.map +1 -0
  86. package/dist/recognizers/ip-address.d.ts +14 -0
  87. package/dist/recognizers/ip-address.d.ts.map +1 -0
  88. package/dist/recognizers/ip-address.js +183 -0
  89. package/dist/recognizers/ip-address.js.map +1 -0
  90. package/dist/recognizers/phone.d.ts +10 -0
  91. package/dist/recognizers/phone.d.ts.map +1 -0
  92. package/dist/recognizers/phone.js +145 -0
  93. package/dist/recognizers/phone.js.map +1 -0
  94. package/dist/recognizers/registry.d.ts +59 -0
  95. package/dist/recognizers/registry.d.ts.map +1 -0
  96. package/dist/recognizers/registry.js +113 -0
  97. package/dist/recognizers/registry.js.map +1 -0
  98. package/dist/recognizers/url.d.ts +14 -0
  99. package/dist/recognizers/url.d.ts.map +1 -0
  100. package/dist/recognizers/url.js +121 -0
  101. package/dist/recognizers/url.js.map +1 -0
  102. package/dist/types/index.d.ts +134 -0
  103. package/dist/types/index.d.ts.map +1 -0
  104. package/dist/types/index.js +69 -0
  105. package/dist/types/index.js.map +1 -0
  106. package/dist/types/pii-types.d.ts +50 -0
  107. package/dist/types/pii-types.d.ts.map +1 -0
  108. package/dist/types/pii-types.js +114 -0
  109. package/dist/types/pii-types.js.map +1 -0
  110. package/dist/utils/iban-checksum.d.ts +23 -0
  111. package/dist/utils/iban-checksum.d.ts.map +1 -0
  112. package/dist/utils/iban-checksum.js +106 -0
  113. package/dist/utils/iban-checksum.js.map +1 -0
  114. package/dist/utils/index.d.ts +8 -0
  115. package/dist/utils/index.d.ts.map +1 -0
  116. package/dist/utils/index.js +8 -0
  117. package/dist/utils/index.js.map +1 -0
  118. package/dist/utils/luhn.d.ts +17 -0
  119. package/dist/utils/luhn.d.ts.map +1 -0
  120. package/dist/utils/luhn.js +55 -0
  121. package/dist/utils/luhn.js.map +1 -0
  122. package/dist/utils/offsets.d.ts +86 -0
  123. package/dist/utils/offsets.d.ts.map +1 -0
  124. package/dist/utils/offsets.js +124 -0
  125. package/dist/utils/offsets.js.map +1 -0
  126. package/package.json +62 -0
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Bridge Anonymization Module
3
+ * Main entry point for on-device PII anonymization
4
+ */
5
+ export * from './types/index.js';
6
+ export { Recognizer, RegexRecognizer, RecognizerRegistry, createDefaultRegistry, createRegistry, getGlobalRegistry, emailRecognizer, phoneRecognizer, ibanRecognizer, bicSwiftRecognizer, creditCardRecognizer, ipAddressRecognizer, urlRecognizer, createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, } from './recognizers/index.js';
7
+ export { NERModel, NERModelStub, createNERModel, createNERModelStub, WordPieceTokenizer, loadVocabFromFile, parseVocab, loadRuntime, detectRuntime, getRuntimeType, type INERModel, type NERModelConfig, type NERPrediction, type NERModelMode, type DownloadProgressCallback, MODEL_REGISTRY, getModelCacheDir, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, } from './ner/index.js';
8
+ export { prenormalize, resolveEntities, tagEntities, validateOutput, generateTag, parseTag, rehydrate, } from './pipeline/index.js';
9
+ export { encryptPIIMap, decryptPIIMap, generateKey, deriveKey, generateSalt, KeyProvider, InMemoryKeyProvider, EnvKeyProvider, } from './crypto/index.js';
10
+ import { AnonymizationResult, AnonymizationPolicy } from './types/index.js';
11
+ import { RecognizerRegistry } from './recognizers/index.js';
12
+ import { type INERModel } from './ner/index.js';
13
+ import { type NERModelMode, type DownloadProgressCallback } from './ner/model-manager.js';
14
+ import { type KeyProvider } from './crypto/index.js';
15
+ /**
16
+ * NER configuration options
17
+ */
18
+ export interface NERConfig {
19
+ /**
20
+ * NER model mode:
21
+ * - 'standard': Full-size multilingual model (~1.1 GB)
22
+ * - 'quantized': Smaller quantized model (~280 MB)
23
+ * - 'disabled': No NER, regex-only detection
24
+ * - 'custom': Use custom model paths
25
+ */
26
+ mode: NERModelMode;
27
+ /**
28
+ * Custom model path (required when mode is 'custom')
29
+ */
30
+ modelPath?: string;
31
+ /**
32
+ * Custom vocab path (required when mode is 'custom')
33
+ */
34
+ vocabPath?: string;
35
+ /**
36
+ * Whether to auto-download model if not present
37
+ * @default true
38
+ */
39
+ autoDownload?: boolean;
40
+ /**
41
+ * Callback for download progress
42
+ */
43
+ onDownloadProgress?: DownloadProgressCallback;
44
+ /**
45
+ * Callback for status messages
46
+ */
47
+ onStatus?: (status: string) => void;
48
+ }
49
+ /**
50
+ * Anonymizer configuration
51
+ */
52
+ export interface AnonymizerConfig {
53
+ /** Recognizer registry (uses default if not provided) */
54
+ registry?: RecognizerRegistry;
55
+ /**
56
+ * NER configuration
57
+ * @default { mode: 'disabled' }
58
+ */
59
+ ner?: NERConfig;
60
+ /**
61
+ * @deprecated Use `ner` instead. Direct NER model injection for advanced use cases.
62
+ */
63
+ nerModel?: INERModel;
64
+ /** Key provider for encryption (generates random key if not provided) */
65
+ keyProvider?: KeyProvider;
66
+ /** Default policy (uses default if not provided) */
67
+ defaultPolicy?: AnonymizationPolicy;
68
+ /** Model version string */
69
+ modelVersion?: string;
70
+ /** Policy version string */
71
+ policyVersion?: string;
72
+ }
73
+ /**
74
+ * Anonymizer instance
75
+ * Main class for performing PII anonymization
76
+ */
77
+ export declare class Anonymizer {
78
+ private registry;
79
+ private nerModel;
80
+ private nerConfig;
81
+ private keyProvider;
82
+ private defaultPolicy;
83
+ private modelVersion;
84
+ private policyVersion;
85
+ private initialized;
86
+ constructor(config?: AnonymizerConfig);
87
+ /**
88
+ * Initializes the anonymizer
89
+ * Downloads NER model if needed and loads it
90
+ */
91
+ initialize(): Promise<void>;
92
+ /**
93
+ * Anonymizes text, replacing PII with placeholder tags
94
+ * @param text - Input text to anonymize
95
+ * @param locale - Optional locale hint (e.g., 'de-DE', 'en-US')
96
+ * @param policy - Optional policy override
97
+ * @returns Anonymization result with anonymized text and encrypted PII map
98
+ */
99
+ anonymize(text: string, locale?: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
100
+ /**
101
+ * Disposes of resources
102
+ */
103
+ dispose(): Promise<void>;
104
+ /**
105
+ * Gets the recognizer registry
106
+ */
107
+ getRegistry(): RecognizerRegistry;
108
+ /**
109
+ * Gets the NER model
110
+ */
111
+ getNERModel(): INERModel | null;
112
+ /**
113
+ * Whether the anonymizer is initialized
114
+ */
115
+ get isInitialized(): boolean;
116
+ }
117
+ /**
118
+ * Creates an anonymizer with the specified configuration
119
+ *
120
+ * @example
121
+ * ```typescript
122
+ * // Regex-only (no NER)
123
+ * const anonymizer = createAnonymizer();
124
+ *
125
+ * // With NER (auto-downloads model on first use)
126
+ * const anonymizer = createAnonymizer({
127
+ * ner: { mode: 'quantized' }
128
+ * });
129
+ *
130
+ * // With NER and progress callback
131
+ * const anonymizer = createAnonymizer({
132
+ * ner: {
133
+ * mode: 'standard',
134
+ * onStatus: (status) => console.log(status),
135
+ * onDownloadProgress: (p) => console.log(`${p.file}: ${p.percent}%`)
136
+ * }
137
+ * });
138
+ * ```
139
+ */
140
+ export declare function createAnonymizer(config?: AnonymizerConfig): Anonymizer;
141
+ /**
142
+ * Creates an anonymizer with a custom NER model
143
+ * @deprecated Use createAnonymizer with ner: { mode: 'custom', modelPath, vocabPath } instead
144
+ */
145
+ export declare function createAnonymizerWithNER(modelPath: string, vocabPath: string, config?: Omit<AnonymizerConfig, 'nerModel' | 'ner'>): Promise<Anonymizer>;
146
+ /**
147
+ * Convenience function for one-off anonymization
148
+ * Creates a temporary anonymizer with default settings (regex-only)
149
+ */
150
+ export declare function anonymize(text: string, locale?: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
151
+ /**
152
+ * Quick regex-only anonymization (no NER, faster)
153
+ */
154
+ export declare function anonymizeRegexOnly(text: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
155
+ /**
156
+ * Full anonymization with NER
157
+ * Auto-downloads the quantized model on first use
158
+ *
159
+ * @example
160
+ * ```typescript
161
+ * const result = await anonymizeWithNER(
162
+ * 'Contact John Smith at john@example.com',
163
+ * {
164
+ * mode: 'quantized',
165
+ * onStatus: console.log
166
+ * }
167
+ * );
168
+ * ```
169
+ */
170
+ export declare function anonymizeWithNER(text: string, nerConfig: Omit<NERConfig, 'mode'> & {
171
+ mode?: 'standard' | 'quantized';
172
+ }, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
173
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,kBAAkB,CAAC;AAGjC,OAAO,EACL,UAAU,EACV,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,EACb,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,KAAK,SAAS,EACd,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,wBAAwB,EAC7B,cAAc,EACd,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,GACrB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,cAAc,EACd,WAAW,EACX,QAAQ,EACR,SAAS,GACV,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,aAAa,EACb,aAAa,EACb,WAAW,EACX,SAAS,EACT,YAAY,EACZ,WAAW,EACX,mBAAmB,EACnB,cAAc,GACf,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAEL,mBAAmB,EACnB,mBAAmB,EAKpB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAyB,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACnF,OAAO,EAAE,KAAK,SAAS,EAAmD,MAAM,gBAAgB,CAAC;AACjG,OAAO,EAAE,KAAK,YAAY,EAAe,KAAK,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAMvG,OAAO,EAA8B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIjF;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB;;;;;;OAMG;IACH,IAAI,EAAE,YAAY,CAAC;IAEnB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;OAEG;IACH,kBAAkB,CAAC,EAAE,wBAAwB,CAAC;IAE9C;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yDAAyD;IACzD,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B;;;OAGG;IACH,GAAG,CAAC,EAAE,SAAS,CAAC;IAEhB;;OAEG;IACH,QAAQ,CAAC,EAAE,SAAS,CAAC;IAErB,yEAAyE;IACzE,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B,oDAAoD;IACpD,aAAa,CAAC,EAAE,mBAAmB,CAAC;IAEpC,2BAA2B;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,4BAA4B;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,QAAQ,CAA0B;IAC1C,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,GAAE,gBAAqB;IAkBzC;;;OAGG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuDjC;;;;;;OAMG;IACG,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,EACf,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC;IAoF/B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAO9B;;OAEG;IACH,WAAW,IAAI,kBAAkB;IAIjC;;OAEG;IACH,WAAW,IAAI,SAAS,GAAG,IAAI;IAI/B;;OAEG;IACH,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,CAAC,EAAE,gBAAgB,GAAG,UAAU,CAEtE;AAED;;;GAGG;AACH,wBAAsB,uBAAuB,CAC3C,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,IAAI,CAAC,gBAAgB,EAAE,UAAU,GAAG,KAAK,CAAC,GAClD,OAAO,CAAC,UAAU,CAAC,CAYrB;AAED;;;GAGG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,EACf,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAS9B;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAQ9B;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,UAAU,GAAG,WAAW,CAAA;CAAE,EACxE,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAe9B"}
package/dist/index.js ADDED
@@ -0,0 +1,294 @@
1
+ /**
2
+ * Bridge Anonymization Module
3
+ * Main entry point for on-device PII anonymization
4
+ */
5
+ // Re-export types
6
+ export * from './types/index.js';
7
+ // Re-export recognizers
8
+ export { RegexRecognizer, RecognizerRegistry, createDefaultRegistry, createRegistry, getGlobalRegistry, emailRecognizer, phoneRecognizer, ibanRecognizer, bicSwiftRecognizer, creditCardRecognizer, ipAddressRecognizer, urlRecognizer, createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, } from './recognizers/index.js';
9
+ // Re-export NER components
10
+ export { NERModel, NERModelStub, createNERModel, createNERModelStub, WordPieceTokenizer, loadVocabFromFile, parseVocab, loadRuntime, detectRuntime, getRuntimeType, MODEL_REGISTRY, getModelCacheDir, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, } from './ner/index.js';
11
+ // Re-export pipeline components
12
+ export { prenormalize, resolveEntities, tagEntities, validateOutput, generateTag, parseTag, rehydrate, } from './pipeline/index.js';
13
+ // Re-export crypto
14
+ export { encryptPIIMap, decryptPIIMap, generateKey, deriveKey, generateSalt, InMemoryKeyProvider, EnvKeyProvider, } from './crypto/index.js';
15
+ // Main anonymization imports
16
+ import { createDefaultPolicy, mergePolicy, } from './types/index.js';
17
+ import { createDefaultRegistry } from './recognizers/index.js';
18
+ import { NERModelStub, createNERModel, DEFAULT_LABEL_MAP } from './ner/index.js';
19
+ import { ensureModel } from './ner/model-manager.js';
20
+ import { prenormalize } from './pipeline/prenormalize.js';
21
+ import { resolveEntities } from './pipeline/resolver.js';
22
+ import { tagEntities, countEntitiesByType } from './pipeline/tagger.js';
23
+ import { validateOutput } from './pipeline/validator.js';
24
+ import { encryptPIIMap, generateKey } from './crypto/index.js';
25
+ import * as fs from 'fs/promises';
26
+ /**
27
+ * Anonymizer instance
28
+ * Main class for performing PII anonymization
29
+ */
30
+ export class Anonymizer {
31
+ registry;
32
+ nerModel = null;
33
+ nerConfig;
34
+ keyProvider;
35
+ defaultPolicy;
36
+ modelVersion;
37
+ policyVersion;
38
+ initialized = false;
39
+ constructor(config = {}) {
40
+ this.registry = config.registry ?? createDefaultRegistry();
41
+ this.keyProvider = config.keyProvider ?? null;
42
+ this.defaultPolicy = config.defaultPolicy ?? createDefaultPolicy();
43
+ this.policyVersion = config.policyVersion ?? '1.0.0';
44
+ // Handle NER configuration
45
+ if (config.nerModel) {
46
+ // Legacy: direct model injection
47
+ this.nerModel = config.nerModel;
48
+ this.nerConfig = { mode: 'custom' };
49
+ this.modelVersion = config.modelVersion ?? config.nerModel.version;
50
+ }
51
+ else {
52
+ this.nerConfig = config.ner ?? { mode: 'disabled' };
53
+ this.modelVersion = config.modelVersion ?? '1.0.0';
54
+ }
55
+ }
56
+ /**
57
+ * Initializes the anonymizer
58
+ * Downloads NER model if needed and loads it
59
+ */
60
+ async initialize() {
61
+ if (this.initialized)
62
+ return;
63
+ // Handle NER model setup based on mode
64
+ if (this.nerConfig.mode === 'disabled') {
65
+ this.nerModel = new NERModelStub();
66
+ }
67
+ else if (this.nerConfig.mode === 'custom') {
68
+ if (!this.nerConfig.modelPath || !this.nerConfig.vocabPath) {
69
+ throw new Error("NER mode 'custom' requires modelPath and vocabPath");
70
+ }
71
+ this.nerModel = createNERModel({
72
+ modelPath: this.nerConfig.modelPath,
73
+ vocabPath: this.nerConfig.vocabPath,
74
+ modelVersion: this.modelVersion,
75
+ });
76
+ }
77
+ else {
78
+ // 'standard' or 'quantized' - use model manager
79
+ const { modelPath, vocabPath, labelMapPath } = await ensureModel(this.nerConfig.mode, {
80
+ autoDownload: this.nerConfig.autoDownload ?? true,
81
+ onProgress: this.nerConfig.onDownloadProgress,
82
+ onStatus: this.nerConfig.onStatus,
83
+ });
84
+ // Load label map
85
+ let labelMap = DEFAULT_LABEL_MAP;
86
+ try {
87
+ const labelMapContent = await fs.readFile(labelMapPath, 'utf-8');
88
+ labelMap = JSON.parse(labelMapContent);
89
+ }
90
+ catch {
91
+ // Use default label map
92
+ }
93
+ this.nerModel = createNERModel({
94
+ modelPath,
95
+ vocabPath,
96
+ labelMap,
97
+ modelVersion: this.modelVersion,
98
+ });
99
+ }
100
+ // Load the model
101
+ if (!this.nerModel.loaded) {
102
+ this.nerConfig.onStatus?.('Loading NER model...');
103
+ await this.nerModel.load();
104
+ this.nerConfig.onStatus?.('NER model loaded!');
105
+ }
106
+ this.modelVersion = this.nerModel.version;
107
+ this.initialized = true;
108
+ }
109
+ /**
110
+ * Anonymizes text, replacing PII with placeholder tags
111
+ * @param text - Input text to anonymize
112
+ * @param locale - Optional locale hint (e.g., 'de-DE', 'en-US')
113
+ * @param policy - Optional policy override
114
+ * @returns Anonymization result with anonymized text and encrypted PII map
115
+ */
116
+ async anonymize(text, locale, policy) {
117
+ if (!this.initialized) {
118
+ await this.initialize();
119
+ }
120
+ const startTime = performance.now();
121
+ // Merge policy with defaults
122
+ const effectivePolicy = policy !== undefined ? mergePolicy(policy) : this.defaultPolicy;
123
+ // Step 1: Pre-normalize text
124
+ const normalizedText = prenormalize(text);
125
+ // Step 2: Run regex recognizers
126
+ const regexMatches = this.registry.findAll(normalizedText, effectivePolicy);
127
+ // Step 3: Run NER model
128
+ const nerResult = await this.nerModel.predict(normalizedText, effectivePolicy);
129
+ const nerMatches = nerResult.spans;
130
+ // Step 4: Resolve and merge entities
131
+ const resolvedMatches = resolveEntities(regexMatches, nerMatches, effectivePolicy, normalizedText);
132
+ // Step 5: Tag entities and build PII map
133
+ const { anonymizedText, entities, piiMap } = tagEntities(normalizedText, resolvedMatches, effectivePolicy);
134
+ // Step 6: Validate output
135
+ const validation = validateOutput(anonymizedText, entities, Array.from(piiMap.keys()), effectivePolicy);
136
+ if (!validation.valid) {
137
+ // Log validation errors (but don't expose raw PII)
138
+ const safeErrors = validation.errors.map((e) => ({
139
+ code: e.code,
140
+ message: e.message,
141
+ }));
142
+ // eslint-disable-next-line no-console
143
+ console.warn('Validation warnings:', safeErrors);
144
+ }
145
+ // Step 7: Encrypt PII map
146
+ const encryptionKey = this.keyProvider !== null
147
+ ? await this.keyProvider.getKey()
148
+ : generateKey();
149
+ const encryptedPiiMap = encryptPIIMap(piiMap, encryptionKey);
150
+ // Step 8: Build stats
151
+ const endTime = performance.now();
152
+ const stats = {
153
+ countsByType: countEntitiesByType(entities),
154
+ totalEntities: entities.length,
155
+ modelVersion: this.modelVersion,
156
+ policyVersion: this.policyVersion,
157
+ processingTimeMs: endTime - startTime,
158
+ leakScanPassed: validation.leakScanPassed,
159
+ };
160
+ // Step 9: Build result (without original text in entities)
161
+ const safeEntities = entities.map(({ original, ...rest }) => rest);
162
+ return {
163
+ anonymizedText,
164
+ entities: safeEntities,
165
+ piiMap: encryptedPiiMap,
166
+ stats,
167
+ };
168
+ }
169
+ /**
170
+ * Disposes of resources
171
+ */
172
+ async dispose() {
173
+ if (this.nerModel) {
174
+ await this.nerModel.dispose();
175
+ }
176
+ this.initialized = false;
177
+ }
178
+ /**
179
+ * Gets the recognizer registry
180
+ */
181
+ getRegistry() {
182
+ return this.registry;
183
+ }
184
+ /**
185
+ * Gets the NER model
186
+ */
187
+ getNERModel() {
188
+ return this.nerModel;
189
+ }
190
+ /**
191
+ * Whether the anonymizer is initialized
192
+ */
193
+ get isInitialized() {
194
+ return this.initialized;
195
+ }
196
+ }
197
+ /**
198
+ * Creates an anonymizer with the specified configuration
199
+ *
200
+ * @example
201
+ * ```typescript
202
+ * // Regex-only (no NER)
203
+ * const anonymizer = createAnonymizer();
204
+ *
205
+ * // With NER (auto-downloads model on first use)
206
+ * const anonymizer = createAnonymizer({
207
+ * ner: { mode: 'quantized' }
208
+ * });
209
+ *
210
+ * // With NER and progress callback
211
+ * const anonymizer = createAnonymizer({
212
+ * ner: {
213
+ * mode: 'standard',
214
+ * onStatus: (status) => console.log(status),
215
+ * onDownloadProgress: (p) => console.log(`${p.file}: ${p.percent}%`)
216
+ * }
217
+ * });
218
+ * ```
219
+ */
220
+ export function createAnonymizer(config) {
221
+ return new Anonymizer(config);
222
+ }
223
+ /**
224
+ * Creates an anonymizer with a custom NER model
225
+ * @deprecated Use createAnonymizer with ner: { mode: 'custom', modelPath, vocabPath } instead
226
+ */
227
+ export async function createAnonymizerWithNER(modelPath, vocabPath, config) {
228
+ const anonymizer = new Anonymizer({
229
+ ...config,
230
+ ner: {
231
+ mode: 'custom',
232
+ modelPath,
233
+ vocabPath,
234
+ },
235
+ });
236
+ await anonymizer.initialize();
237
+ return anonymizer;
238
+ }
239
+ /**
240
+ * Convenience function for one-off anonymization
241
+ * Creates a temporary anonymizer with default settings (regex-only)
242
+ */
243
+ export async function anonymize(text, locale, policy) {
244
+ const anonymizer = createAnonymizer();
245
+ await anonymizer.initialize();
246
+ try {
247
+ return await anonymizer.anonymize(text, locale, policy);
248
+ }
249
+ finally {
250
+ await anonymizer.dispose();
251
+ }
252
+ }
253
+ /**
254
+ * Quick regex-only anonymization (no NER, faster)
255
+ */
256
+ export async function anonymizeRegexOnly(text, policy) {
257
+ // Create policy with NER disabled
258
+ const regexOnlyPolicy = {
259
+ ...policy,
260
+ nerEnabledTypes: new Set(), // Disable all NER types
261
+ };
262
+ return anonymize(text, undefined, regexOnlyPolicy);
263
+ }
264
+ /**
265
+ * Full anonymization with NER
266
+ * Auto-downloads the quantized model on first use
267
+ *
268
+ * @example
269
+ * ```typescript
270
+ * const result = await anonymizeWithNER(
271
+ * 'Contact John Smith at john@example.com',
272
+ * {
273
+ * mode: 'quantized',
274
+ * onStatus: console.log
275
+ * }
276
+ * );
277
+ * ```
278
+ */
279
+ export async function anonymizeWithNER(text, nerConfig, policy) {
280
+ const anonymizer = createAnonymizer({
281
+ ner: {
282
+ mode: nerConfig.mode ?? 'quantized',
283
+ ...nerConfig,
284
+ },
285
+ });
286
+ await anonymizer.initialize();
287
+ try {
288
+ return await anonymizer.anonymize(text, undefined, policy);
289
+ }
290
+ finally {
291
+ await anonymizer.dispose();
292
+ }
293
+ }
294
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,kBAAkB;AAClB,cAAc,kBAAkB,CAAC;AAEjC,wBAAwB;AACxB,OAAO,EAEL,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,EACb,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAEhC,2BAA2B;AAC3B,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EAMd,cAAc,EACd,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,GACrB,MAAM,gBAAgB,CAAC;AAExB,gCAAgC;AAChC,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,cAAc,EACd,WAAW,EACX,QAAQ,EACR,SAAS,GACV,MAAM,qBAAqB,CAAC;AAE7B,mBAAmB;AACnB,OAAO,EACL,aAAa,EACb,aAAa,EACb,WAAW,EACX,SAAS,EACT,YAAY,EAEZ,mBAAmB,EACnB,cAAc,GACf,MAAM,mBAAmB,CAAC;AAE3B,6BAA6B;AAC7B,OAAO,EAML,mBAAmB,EACnB,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,qBAAqB,EAAsB,MAAM,wBAAwB,CAAC;AACnF,OAAO,EAAkB,YAAY,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACjG,OAAO,EAAqB,WAAW,EAAiC,MAAM,wBAAwB,CAAC;AAEvG,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,WAAW,EAAoB,MAAM,mBAAmB,CAAC;AACjF,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AA0ElC;;;GAGG;AACH,MAAM,OAAO,UAAU;IACb,QAAQ,CAAqB;IAC7B,QAAQ,GAAqB,IAAI,CAAC;IAClC,SAAS,CAAY;IACrB,WAAW,CAAqB;IAChC,aAAa,CAAsB;IACnC,YAAY,CAAS;IACrB,aAAa,CAAS;IACtB,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAA2B,EAAE;QACvC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,qBAAqB,EAAE,CAAC;QAC3D,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC;QAC9C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,mBAAmB,EAAE,CAAC;QACnE,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC;QAErD,2BAA2B;QAC3B,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,iCAAiC;YACjC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;YAChC,IAAI,CAAC,SAAS,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;YACpC,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;QACrE,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;YACpD,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,OAAO,CAAC;QACrD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,uCAAuC;QACvC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YACvC,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,EAAE,CAAC;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5C,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC3D,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;YACxE,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,WAAW,CAC9D,IAAI,CAAC,SAAS,CAAC,IAAI,EACnB;gBACE,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY,IAAI,IAAI;gBACjD,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,kBAAkB;gBAC7C,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ;aAClC,CACF,CAAC;YAEF,iBAAiB;YACjB,IAAI,QAAQ,GAAG,iBAAiB,CAAC;YACjC,IAAI,CAAC;gBACH,MAAM,eAAe,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;gBACjE,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAa,CAAC;YACrD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS;gBACT,SAAS;gBACT,QAAQ;gBACR,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,sBAAsB,CAAC,CAAC;YAClD,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,mBAAmB,CAAC,CAAC;QACjD,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QAC1C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,SAAS,CACb,IAAY,EACZ,MAAe,EACf,MAAqC;QAErC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAC1B,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,6BAA6B;QAC7B,MAAM,eAAe,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC;QAExF,6BAA6B;QAC7B,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAE1C,gCAAgC;QAChC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAE5E,wBAAwB;QACxB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAS,CAAC,OAAO,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAChF,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC;QAEnC,qCAAqC;QACrC,MAAM,eAAe,GAAG,eAAe,CACrC,YAAY,EACZ,UAAU,EACV,eAAe,EACf,cAAc,CACf,CAAC;QAEF,yCAAyC;QACzC,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CACtD,cAAc,EACd,eAAe,EACf,eAAe,CAChB,CAAC;QAEF,0BAA0B;QAC1B,MAAM,UAAU,GAAG,cAAc,CAC/B,cAAc,EACd,QAAQ,EACR,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,EACzB,eAAe,CAChB,CAAC;QAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;YACtB,mDAAmD;YACnD,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC/C,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC,CAAC;YACJ,sCAAsC;YACtC,OAAO,CAAC,IAAI,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QACnD,CAAC;QAED,0BAA0B;QAC1B,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,KAAK,IAAI;YAC7C,CAAC,CAAC,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;YACjC,CAAC,CAAC,WAAW,EAAE,CAAC;QAElB,MAAM,eAAe,GAAG,aAAa,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAE7D,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAClC,MAAM,KAAK,GAAuB;YAChC,YAAY,EAAE,mBAAmB,CAAC,QAAQ,CAAC;YAC3C,aAAa,EAAE,QAAQ,CAAC,MAAM;YAC9B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,gBAAgB,EAAE,OAAO,GAAG,SAAS;YACrC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;QAEF,2DAA2D;QAC3D,MAAM,YAAY,GAAuC,QAAQ,CAAC,GAAG,CACnE,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAChC,CAAC;QAEF,OAAO;YACL,cAAc;YACd,QAAQ,EAAE,YAAY;YACtB,MAAM,EAAE,eAAe;YACvB,KAAK;SACN,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAyB;IACxD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,SAAiB,EACjB,SAAiB,EACjB,MAAmD;IAEnD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC;QAChC,GAAG,MAAM;QACT,GAAG,EAAE;YACH,IAAI,EAAE,QAAQ;YACd,SAAS;YACT,SAAS;SACV;KACF,CAAC,CAAC;IAEH,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAC9B,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,IAAY,EACZ,MAAe,EACf,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAC;IACtC,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAqC;IAErC,kCAAkC;IAClC,MAAM,eAAe,GAAiC;QACpD,GAAG,MAAM;QACT,eAAe,EAAE,IAAI,GAAG,EAAE,EAAE,wBAAwB;KACrD,CAAC;IAEF,OAAO,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,SAAwE,EACxE,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,CAAC;QAClC,GAAG,EAAE;YACH,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,WAAW;YACnC,GAAG,SAAS;SACb;KACF,CAAC,CAAC;IAEH,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAC7D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC"}
@@ -0,0 +1,64 @@
1
+ /**
2
+ * BIO Tag Decoder
3
+ * Converts BIO-tagged token sequences to entity spans
4
+ */
5
+ import { SpanMatch } from '../types/index.js';
6
+ import type { Token } from './tokenizer.js';
7
+ /**
8
+ * BIO tag types
9
+ */
10
+ export declare enum BIOTag {
11
+ /** Beginning of an entity */
12
+ B = "B",
13
+ /** Inside an entity (continuation) */
14
+ I = "I",
15
+ /** Outside any entity */
16
+ O = "O"
17
+ }
18
+ /**
19
+ * Parsed BIO label
20
+ */
21
+ export interface ParsedBIOLabel {
22
+ /** BIO tag type */
23
+ tag: BIOTag;
24
+ /** Entity type (null for O tag) */
25
+ entityType: string | null;
26
+ }
27
+ /**
28
+ * Raw entity span from NER (before conversion to SpanMatch)
29
+ */
30
+ export interface RawNEREntity {
31
+ /** Entity type string from model */
32
+ type: string;
33
+ /** Start character offset */
34
+ start: number;
35
+ /** End character offset */
36
+ end: number;
37
+ /** Combined confidence score */
38
+ confidence: number;
39
+ /** Raw text */
40
+ text: string;
41
+ /** Token indices that make up this entity */
42
+ tokenIndices: number[];
43
+ }
44
+ /**
45
+ * Parses a BIO label string (e.g., "B-PER", "I-ORG", "O")
46
+ */
47
+ export declare function parseBIOLabel(label: string): ParsedBIOLabel;
48
+ /**
49
+ * Decodes BIO-tagged tokens into entity spans
50
+ */
51
+ export declare function decodeBIOTags(tokens: Token[], labels: string[], confidences: number[], originalText: string): RawNEREntity[];
52
+ /**
53
+ * Converts raw NER entities to SpanMatch format
54
+ */
55
+ export declare function convertToSpanMatches(rawEntities: RawNEREntity[], confidenceThreshold?: number): SpanMatch[];
56
+ /**
57
+ * Post-processes NER spans to clean up boundaries
58
+ */
59
+ export declare function cleanupSpanBoundaries(spans: SpanMatch[], originalText: string): SpanMatch[];
60
+ /**
61
+ * Merges adjacent spans of the same type
62
+ */
63
+ export declare function mergeAdjacentSpans(spans: SpanMatch[], originalText: string, maxGap?: number): SpanMatch[];
64
+ //# sourceMappingURL=bio-decoder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bio-decoder.d.ts","sourceRoot":"","sources":["../../src/ner/bio-decoder.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAW,SAAS,EAAmB,MAAM,mBAAmB,CAAC;AAExE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAE5C;;GAEG;AACH,oBAAY,MAAM;IAChB,6BAA6B;IAC7B,CAAC,MAAM;IACP,sCAAsC;IACtC,CAAC,MAAM;IACP,yBAAyB;IACzB,CAAC,MAAM;CACR;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,mCAAmC;IACnC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe;IACf,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,CAyB3D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE,KAAK,EAAE,EACf,MAAM,EAAE,MAAM,EAAE,EAChB,WAAW,EAAE,MAAM,EAAE,EACrB,YAAY,EAAE,MAAM,GACnB,YAAY,EAAE,CAkFhB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,WAAW,EAAE,YAAY,EAAE,EAC3B,mBAAmB,GAAE,MAAY,GAChC,SAAS,EAAE,CA0Bb;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,GACnB,SAAS,EAAE,CAoCb;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,MAAU,GACjB,SAAS,EAAE,CAiCb"}