@elanlanguages/bridge-anonymization 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +382 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +100 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +163 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +173 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +294 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +102 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +253 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +240 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +45 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +99 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +140 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +341 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +9 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +9 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +238 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +74 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +169 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +134 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +69 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +8 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +8 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/package.json +62 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bridge Anonymization Module
|
|
3
|
+
* Main entry point for on-device PII anonymization
|
|
4
|
+
*/
|
|
5
|
+
export * from './types/index.js';
|
|
6
|
+
export { Recognizer, RegexRecognizer, RecognizerRegistry, createDefaultRegistry, createRegistry, getGlobalRegistry, emailRecognizer, phoneRecognizer, ibanRecognizer, bicSwiftRecognizer, creditCardRecognizer, ipAddressRecognizer, urlRecognizer, createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, } from './recognizers/index.js';
|
|
7
|
+
export { NERModel, NERModelStub, createNERModel, createNERModelStub, WordPieceTokenizer, loadVocabFromFile, parseVocab, loadRuntime, detectRuntime, getRuntimeType, type INERModel, type NERModelConfig, type NERPrediction, type NERModelMode, type DownloadProgressCallback, MODEL_REGISTRY, getModelCacheDir, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, } from './ner/index.js';
|
|
8
|
+
export { prenormalize, resolveEntities, tagEntities, validateOutput, generateTag, parseTag, rehydrate, } from './pipeline/index.js';
|
|
9
|
+
export { encryptPIIMap, decryptPIIMap, generateKey, deriveKey, generateSalt, KeyProvider, InMemoryKeyProvider, EnvKeyProvider, } from './crypto/index.js';
|
|
10
|
+
import { AnonymizationResult, AnonymizationPolicy } from './types/index.js';
|
|
11
|
+
import { RecognizerRegistry } from './recognizers/index.js';
|
|
12
|
+
import { type INERModel } from './ner/index.js';
|
|
13
|
+
import { type NERModelMode, type DownloadProgressCallback } from './ner/model-manager.js';
|
|
14
|
+
import { type KeyProvider } from './crypto/index.js';
|
|
15
|
+
/**
|
|
16
|
+
* NER configuration options
|
|
17
|
+
*/
|
|
18
|
+
export interface NERConfig {
|
|
19
|
+
/**
|
|
20
|
+
* NER model mode:
|
|
21
|
+
* - 'standard': Full-size multilingual model (~1.1 GB)
|
|
22
|
+
* - 'quantized': Smaller quantized model (~280 MB)
|
|
23
|
+
* - 'disabled': No NER, regex-only detection
|
|
24
|
+
* - 'custom': Use custom model paths
|
|
25
|
+
*/
|
|
26
|
+
mode: NERModelMode;
|
|
27
|
+
/**
|
|
28
|
+
* Custom model path (required when mode is 'custom')
|
|
29
|
+
*/
|
|
30
|
+
modelPath?: string;
|
|
31
|
+
/**
|
|
32
|
+
* Custom vocab path (required when mode is 'custom')
|
|
33
|
+
*/
|
|
34
|
+
vocabPath?: string;
|
|
35
|
+
/**
|
|
36
|
+
* Whether to auto-download model if not present
|
|
37
|
+
* @default true
|
|
38
|
+
*/
|
|
39
|
+
autoDownload?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Callback for download progress
|
|
42
|
+
*/
|
|
43
|
+
onDownloadProgress?: DownloadProgressCallback;
|
|
44
|
+
/**
|
|
45
|
+
* Callback for status messages
|
|
46
|
+
*/
|
|
47
|
+
onStatus?: (status: string) => void;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Anonymizer configuration
|
|
51
|
+
*/
|
|
52
|
+
export interface AnonymizerConfig {
|
|
53
|
+
/** Recognizer registry (uses default if not provided) */
|
|
54
|
+
registry?: RecognizerRegistry;
|
|
55
|
+
/**
|
|
56
|
+
* NER configuration
|
|
57
|
+
* @default { mode: 'disabled' }
|
|
58
|
+
*/
|
|
59
|
+
ner?: NERConfig;
|
|
60
|
+
/**
|
|
61
|
+
* @deprecated Use `ner` instead. Direct NER model injection for advanced use cases.
|
|
62
|
+
*/
|
|
63
|
+
nerModel?: INERModel;
|
|
64
|
+
/** Key provider for encryption (generates random key if not provided) */
|
|
65
|
+
keyProvider?: KeyProvider;
|
|
66
|
+
/** Default policy (uses default if not provided) */
|
|
67
|
+
defaultPolicy?: AnonymizationPolicy;
|
|
68
|
+
/** Model version string */
|
|
69
|
+
modelVersion?: string;
|
|
70
|
+
/** Policy version string */
|
|
71
|
+
policyVersion?: string;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Anonymizer instance
|
|
75
|
+
* Main class for performing PII anonymization
|
|
76
|
+
*/
|
|
77
|
+
export declare class Anonymizer {
|
|
78
|
+
private registry;
|
|
79
|
+
private nerModel;
|
|
80
|
+
private nerConfig;
|
|
81
|
+
private keyProvider;
|
|
82
|
+
private defaultPolicy;
|
|
83
|
+
private modelVersion;
|
|
84
|
+
private policyVersion;
|
|
85
|
+
private initialized;
|
|
86
|
+
constructor(config?: AnonymizerConfig);
|
|
87
|
+
/**
|
|
88
|
+
* Initializes the anonymizer
|
|
89
|
+
* Downloads NER model if needed and loads it
|
|
90
|
+
*/
|
|
91
|
+
initialize(): Promise<void>;
|
|
92
|
+
/**
|
|
93
|
+
* Anonymizes text, replacing PII with placeholder tags
|
|
94
|
+
* @param text - Input text to anonymize
|
|
95
|
+
* @param locale - Optional locale hint (e.g., 'de-DE', 'en-US')
|
|
96
|
+
* @param policy - Optional policy override
|
|
97
|
+
* @returns Anonymization result with anonymized text and encrypted PII map
|
|
98
|
+
*/
|
|
99
|
+
anonymize(text: string, locale?: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
|
|
100
|
+
/**
|
|
101
|
+
* Disposes of resources
|
|
102
|
+
*/
|
|
103
|
+
dispose(): Promise<void>;
|
|
104
|
+
/**
|
|
105
|
+
* Gets the recognizer registry
|
|
106
|
+
*/
|
|
107
|
+
getRegistry(): RecognizerRegistry;
|
|
108
|
+
/**
|
|
109
|
+
* Gets the NER model
|
|
110
|
+
*/
|
|
111
|
+
getNERModel(): INERModel | null;
|
|
112
|
+
/**
|
|
113
|
+
* Whether the anonymizer is initialized
|
|
114
|
+
*/
|
|
115
|
+
get isInitialized(): boolean;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Creates an anonymizer with the specified configuration
|
|
119
|
+
*
|
|
120
|
+
* @example
|
|
121
|
+
* ```typescript
|
|
122
|
+
* // Regex-only (no NER)
|
|
123
|
+
* const anonymizer = createAnonymizer();
|
|
124
|
+
*
|
|
125
|
+
* // With NER (auto-downloads model on first use)
|
|
126
|
+
* const anonymizer = createAnonymizer({
|
|
127
|
+
* ner: { mode: 'quantized' }
|
|
128
|
+
* });
|
|
129
|
+
*
|
|
130
|
+
* // With NER and progress callback
|
|
131
|
+
* const anonymizer = createAnonymizer({
|
|
132
|
+
* ner: {
|
|
133
|
+
* mode: 'standard',
|
|
134
|
+
* onStatus: (status) => console.log(status),
|
|
135
|
+
* onDownloadProgress: (p) => console.log(`${p.file}: ${p.percent}%`)
|
|
136
|
+
* }
|
|
137
|
+
* });
|
|
138
|
+
* ```
|
|
139
|
+
*/
|
|
140
|
+
export declare function createAnonymizer(config?: AnonymizerConfig): Anonymizer;
|
|
141
|
+
/**
|
|
142
|
+
* Creates an anonymizer with a custom NER model
|
|
143
|
+
* @deprecated Use createAnonymizer with ner: { mode: 'custom', modelPath, vocabPath } instead
|
|
144
|
+
*/
|
|
145
|
+
export declare function createAnonymizerWithNER(modelPath: string, vocabPath: string, config?: Omit<AnonymizerConfig, 'nerModel' | 'ner'>): Promise<Anonymizer>;
|
|
146
|
+
/**
|
|
147
|
+
* Convenience function for one-off anonymization
|
|
148
|
+
* Creates a temporary anonymizer with default settings (regex-only)
|
|
149
|
+
*/
|
|
150
|
+
export declare function anonymize(text: string, locale?: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
|
|
151
|
+
/**
|
|
152
|
+
* Quick regex-only anonymization (no NER, faster)
|
|
153
|
+
*/
|
|
154
|
+
export declare function anonymizeRegexOnly(text: string, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
|
|
155
|
+
/**
|
|
156
|
+
* Full anonymization with NER
|
|
157
|
+
* Auto-downloads the quantized model on first use
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```typescript
|
|
161
|
+
* const result = await anonymizeWithNER(
|
|
162
|
+
* 'Contact John Smith at john@example.com',
|
|
163
|
+
* {
|
|
164
|
+
* mode: 'quantized',
|
|
165
|
+
* onStatus: console.log
|
|
166
|
+
* }
|
|
167
|
+
* );
|
|
168
|
+
* ```
|
|
169
|
+
*/
|
|
170
|
+
export declare function anonymizeWithNER(text: string, nerConfig: Omit<NERConfig, 'mode'> & {
|
|
171
|
+
mode?: 'standard' | 'quantized';
|
|
172
|
+
}, policy?: Partial<AnonymizationPolicy>): Promise<AnonymizationResult>;
|
|
173
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,cAAc,kBAAkB,CAAC;AAGjC,OAAO,EACL,UAAU,EACV,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,EACb,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAGhC,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EACd,KAAK,SAAS,EACd,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,wBAAwB,EAC7B,cAAc,EACd,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,GACrB,MAAM,gBAAgB,CAAC;AAGxB,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,cAAc,EACd,WAAW,EACX,QAAQ,EACR,SAAS,GACV,MAAM,qBAAqB,CAAC;AAG7B,OAAO,EACL,aAAa,EACb,aAAa,EACb,WAAW,EACX,SAAS,EACT,YAAY,EACZ,WAAW,EACX,mBAAmB,EACnB,cAAc,GACf,MAAM,mBAAmB,CAAC;AAG3B,OAAO,EAEL,mBAAmB,EACnB,mBAAmB,EAKpB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAyB,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACnF,OAAO,EAAE,KAAK,SAAS,EAAmD,MAAM,gBAAgB,CAAC;AACjG,OAAO,EAAE,KAAK,YAAY,EAAe,KAAK,wBAAwB,EAAE,MAAM,wBAAwB,CAAC;AAMvG,OAAO,EAA8B,KAAK,WAAW,EAAE,MAAM,mBAAmB,CAAC;AAIjF;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB;;;;;;OAMG;IACH,IAAI,EAAE,YAAY,CAAC;IAEnB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;OAEG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IAEnB;;;OAGG;IACH,YAAY,CAAC,EAAE,OAAO,CAAC;IAEvB;;OAEG;IACH,kBAAkB,CAAC,EAAE,wBAAwB,CAAC;IAE9C;;OAEG;IACH,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CACrC;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yDAAyD;IACzD,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B;;;OAGG;IACH,GAAG,CAAC,EAAE,SAAS,CAAC;IAEhB;;OAEG;IACH,QAAQ,CAAC,EAAE,SAAS,CAAC;IAErB,yEAAyE;IACzE,WAAW,CAAC,EAAE,WAAW,CAAC;IAE1B,oDAAoD;IACpD,aAAa,CAAC,EAAE,mBAAmB,CAAC;IAEpC,2BAA2B;IAC3B,YAAY,CAAC,EAAE,MAAM,CAAC;IAEtB,4BAA4B;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED;;;GAGG;AACH,qBAAa,UAAU;IACrB,OAAO,CAAC,QAAQ,CAAqB;IACrC,OAAO,CAAC,QAAQ,CAA0B;IAC1C,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,WAAW,CAAqB;IACxC,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,WAAW,CAAS;gBAEhB,MAAM,GAAE,gBAAqB;IAkBzC;;;OAGG;IACG,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IAuDjC;;;;;;OAMG;IACG,SAAS,CACb,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,EACf,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC;IAoF/B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAO9B;;OAEG;IACH,WAAW,IAAI,kBAAkB;IAIjC;;OAEG;IACH,WAAW,IAAI,SAAS,GAAG,IAAI;IAI/B;;OAEG;IACH,IAAI,aAAa,IAAI,OAAO,CAE3B;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,gBAAgB,CAAC,MAAM,CAAC,EAAE,gBAAgB,GAAG,UAAU,CAEtE;AAED;;;GAGG;AACH,wBAAsB,uBAAuB,CAC3C,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,CAAC,EAAE,IAAI,CAAC,gBAAgB,EAAE,UAAU,GAAG,KAAK,CAAC,GAClD,OAAO,CAAC,UAAU,CAAC,CAYrB;AAED;;;GAGG;AACH,wBAAsB,SAAS,CAC7B,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,MAAM,EACf,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAS9B;AAED;;GAEG;AACH,wBAAsB,kBAAkB,CACtC,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAQ9B;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,gBAAgB,CACpC,IAAI,EAAE,MAAM,EACZ,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG;IAAE,IAAI,CAAC,EAAE,UAAU,GAAG,WAAW,CAAA;CAAE,EACxE,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GACpC,OAAO,CAAC,mBAAmB,CAAC,CAe9B"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bridge Anonymization Module
|
|
3
|
+
* Main entry point for on-device PII anonymization
|
|
4
|
+
*/
|
|
5
|
+
// Re-export types
|
|
6
|
+
export * from './types/index.js';
|
|
7
|
+
// Re-export recognizers
|
|
8
|
+
export { RegexRecognizer, RecognizerRegistry, createDefaultRegistry, createRegistry, getGlobalRegistry, emailRecognizer, phoneRecognizer, ibanRecognizer, bicSwiftRecognizer, creditCardRecognizer, ipAddressRecognizer, urlRecognizer, createCustomIdRecognizer, createCaseIdRecognizer, createCustomerIdRecognizer, } from './recognizers/index.js';
|
|
9
|
+
// Re-export NER components
|
|
10
|
+
export { NERModel, NERModelStub, createNERModel, createNERModelStub, WordPieceTokenizer, loadVocabFromFile, parseVocab, loadRuntime, detectRuntime, getRuntimeType, MODEL_REGISTRY, getModelCacheDir, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, } from './ner/index.js';
|
|
11
|
+
// Re-export pipeline components
|
|
12
|
+
export { prenormalize, resolveEntities, tagEntities, validateOutput, generateTag, parseTag, rehydrate, } from './pipeline/index.js';
|
|
13
|
+
// Re-export crypto
|
|
14
|
+
export { encryptPIIMap, decryptPIIMap, generateKey, deriveKey, generateSalt, InMemoryKeyProvider, EnvKeyProvider, } from './crypto/index.js';
|
|
15
|
+
// Main anonymization imports
|
|
16
|
+
import { createDefaultPolicy, mergePolicy, } from './types/index.js';
|
|
17
|
+
import { createDefaultRegistry } from './recognizers/index.js';
|
|
18
|
+
import { NERModelStub, createNERModel, DEFAULT_LABEL_MAP } from './ner/index.js';
|
|
19
|
+
import { ensureModel } from './ner/model-manager.js';
|
|
20
|
+
import { prenormalize } from './pipeline/prenormalize.js';
|
|
21
|
+
import { resolveEntities } from './pipeline/resolver.js';
|
|
22
|
+
import { tagEntities, countEntitiesByType } from './pipeline/tagger.js';
|
|
23
|
+
import { validateOutput } from './pipeline/validator.js';
|
|
24
|
+
import { encryptPIIMap, generateKey } from './crypto/index.js';
|
|
25
|
+
import * as fs from 'fs/promises';
|
|
26
|
+
/**
|
|
27
|
+
* Anonymizer instance
|
|
28
|
+
* Main class for performing PII anonymization
|
|
29
|
+
*/
|
|
30
|
+
export class Anonymizer {
|
|
31
|
+
registry;
|
|
32
|
+
nerModel = null;
|
|
33
|
+
nerConfig;
|
|
34
|
+
keyProvider;
|
|
35
|
+
defaultPolicy;
|
|
36
|
+
modelVersion;
|
|
37
|
+
policyVersion;
|
|
38
|
+
initialized = false;
|
|
39
|
+
constructor(config = {}) {
|
|
40
|
+
this.registry = config.registry ?? createDefaultRegistry();
|
|
41
|
+
this.keyProvider = config.keyProvider ?? null;
|
|
42
|
+
this.defaultPolicy = config.defaultPolicy ?? createDefaultPolicy();
|
|
43
|
+
this.policyVersion = config.policyVersion ?? '1.0.0';
|
|
44
|
+
// Handle NER configuration
|
|
45
|
+
if (config.nerModel) {
|
|
46
|
+
// Legacy: direct model injection
|
|
47
|
+
this.nerModel = config.nerModel;
|
|
48
|
+
this.nerConfig = { mode: 'custom' };
|
|
49
|
+
this.modelVersion = config.modelVersion ?? config.nerModel.version;
|
|
50
|
+
}
|
|
51
|
+
else {
|
|
52
|
+
this.nerConfig = config.ner ?? { mode: 'disabled' };
|
|
53
|
+
this.modelVersion = config.modelVersion ?? '1.0.0';
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Initializes the anonymizer
|
|
58
|
+
* Downloads NER model if needed and loads it
|
|
59
|
+
*/
|
|
60
|
+
async initialize() {
|
|
61
|
+
if (this.initialized)
|
|
62
|
+
return;
|
|
63
|
+
// Handle NER model setup based on mode
|
|
64
|
+
if (this.nerConfig.mode === 'disabled') {
|
|
65
|
+
this.nerModel = new NERModelStub();
|
|
66
|
+
}
|
|
67
|
+
else if (this.nerConfig.mode === 'custom') {
|
|
68
|
+
if (!this.nerConfig.modelPath || !this.nerConfig.vocabPath) {
|
|
69
|
+
throw new Error("NER mode 'custom' requires modelPath and vocabPath");
|
|
70
|
+
}
|
|
71
|
+
this.nerModel = createNERModel({
|
|
72
|
+
modelPath: this.nerConfig.modelPath,
|
|
73
|
+
vocabPath: this.nerConfig.vocabPath,
|
|
74
|
+
modelVersion: this.modelVersion,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
// 'standard' or 'quantized' - use model manager
|
|
79
|
+
const { modelPath, vocabPath, labelMapPath } = await ensureModel(this.nerConfig.mode, {
|
|
80
|
+
autoDownload: this.nerConfig.autoDownload ?? true,
|
|
81
|
+
onProgress: this.nerConfig.onDownloadProgress,
|
|
82
|
+
onStatus: this.nerConfig.onStatus,
|
|
83
|
+
});
|
|
84
|
+
// Load label map
|
|
85
|
+
let labelMap = DEFAULT_LABEL_MAP;
|
|
86
|
+
try {
|
|
87
|
+
const labelMapContent = await fs.readFile(labelMapPath, 'utf-8');
|
|
88
|
+
labelMap = JSON.parse(labelMapContent);
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
// Use default label map
|
|
92
|
+
}
|
|
93
|
+
this.nerModel = createNERModel({
|
|
94
|
+
modelPath,
|
|
95
|
+
vocabPath,
|
|
96
|
+
labelMap,
|
|
97
|
+
modelVersion: this.modelVersion,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
// Load the model
|
|
101
|
+
if (!this.nerModel.loaded) {
|
|
102
|
+
this.nerConfig.onStatus?.('Loading NER model...');
|
|
103
|
+
await this.nerModel.load();
|
|
104
|
+
this.nerConfig.onStatus?.('NER model loaded!');
|
|
105
|
+
}
|
|
106
|
+
this.modelVersion = this.nerModel.version;
|
|
107
|
+
this.initialized = true;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Anonymizes text, replacing PII with placeholder tags
|
|
111
|
+
* @param text - Input text to anonymize
|
|
112
|
+
* @param locale - Optional locale hint (e.g., 'de-DE', 'en-US')
|
|
113
|
+
* @param policy - Optional policy override
|
|
114
|
+
* @returns Anonymization result with anonymized text and encrypted PII map
|
|
115
|
+
*/
|
|
116
|
+
async anonymize(text, locale, policy) {
|
|
117
|
+
if (!this.initialized) {
|
|
118
|
+
await this.initialize();
|
|
119
|
+
}
|
|
120
|
+
const startTime = performance.now();
|
|
121
|
+
// Merge policy with defaults
|
|
122
|
+
const effectivePolicy = policy !== undefined ? mergePolicy(policy) : this.defaultPolicy;
|
|
123
|
+
// Step 1: Pre-normalize text
|
|
124
|
+
const normalizedText = prenormalize(text);
|
|
125
|
+
// Step 2: Run regex recognizers
|
|
126
|
+
const regexMatches = this.registry.findAll(normalizedText, effectivePolicy);
|
|
127
|
+
// Step 3: Run NER model
|
|
128
|
+
const nerResult = await this.nerModel.predict(normalizedText, effectivePolicy);
|
|
129
|
+
const nerMatches = nerResult.spans;
|
|
130
|
+
// Step 4: Resolve and merge entities
|
|
131
|
+
const resolvedMatches = resolveEntities(regexMatches, nerMatches, effectivePolicy, normalizedText);
|
|
132
|
+
// Step 5: Tag entities and build PII map
|
|
133
|
+
const { anonymizedText, entities, piiMap } = tagEntities(normalizedText, resolvedMatches, effectivePolicy);
|
|
134
|
+
// Step 6: Validate output
|
|
135
|
+
const validation = validateOutput(anonymizedText, entities, Array.from(piiMap.keys()), effectivePolicy);
|
|
136
|
+
if (!validation.valid) {
|
|
137
|
+
// Log validation errors (but don't expose raw PII)
|
|
138
|
+
const safeErrors = validation.errors.map((e) => ({
|
|
139
|
+
code: e.code,
|
|
140
|
+
message: e.message,
|
|
141
|
+
}));
|
|
142
|
+
// eslint-disable-next-line no-console
|
|
143
|
+
console.warn('Validation warnings:', safeErrors);
|
|
144
|
+
}
|
|
145
|
+
// Step 7: Encrypt PII map
|
|
146
|
+
const encryptionKey = this.keyProvider !== null
|
|
147
|
+
? await this.keyProvider.getKey()
|
|
148
|
+
: generateKey();
|
|
149
|
+
const encryptedPiiMap = encryptPIIMap(piiMap, encryptionKey);
|
|
150
|
+
// Step 8: Build stats
|
|
151
|
+
const endTime = performance.now();
|
|
152
|
+
const stats = {
|
|
153
|
+
countsByType: countEntitiesByType(entities),
|
|
154
|
+
totalEntities: entities.length,
|
|
155
|
+
modelVersion: this.modelVersion,
|
|
156
|
+
policyVersion: this.policyVersion,
|
|
157
|
+
processingTimeMs: endTime - startTime,
|
|
158
|
+
leakScanPassed: validation.leakScanPassed,
|
|
159
|
+
};
|
|
160
|
+
// Step 9: Build result (without original text in entities)
|
|
161
|
+
const safeEntities = entities.map(({ original, ...rest }) => rest);
|
|
162
|
+
return {
|
|
163
|
+
anonymizedText,
|
|
164
|
+
entities: safeEntities,
|
|
165
|
+
piiMap: encryptedPiiMap,
|
|
166
|
+
stats,
|
|
167
|
+
};
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Disposes of resources
|
|
171
|
+
*/
|
|
172
|
+
async dispose() {
|
|
173
|
+
if (this.nerModel) {
|
|
174
|
+
await this.nerModel.dispose();
|
|
175
|
+
}
|
|
176
|
+
this.initialized = false;
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Gets the recognizer registry
|
|
180
|
+
*/
|
|
181
|
+
getRegistry() {
|
|
182
|
+
return this.registry;
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Gets the NER model
|
|
186
|
+
*/
|
|
187
|
+
getNERModel() {
|
|
188
|
+
return this.nerModel;
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Whether the anonymizer is initialized
|
|
192
|
+
*/
|
|
193
|
+
get isInitialized() {
|
|
194
|
+
return this.initialized;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Creates an anonymizer with the specified configuration
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* // Regex-only (no NER)
|
|
203
|
+
* const anonymizer = createAnonymizer();
|
|
204
|
+
*
|
|
205
|
+
* // With NER (auto-downloads model on first use)
|
|
206
|
+
* const anonymizer = createAnonymizer({
|
|
207
|
+
* ner: { mode: 'quantized' }
|
|
208
|
+
* });
|
|
209
|
+
*
|
|
210
|
+
* // With NER and progress callback
|
|
211
|
+
* const anonymizer = createAnonymizer({
|
|
212
|
+
* ner: {
|
|
213
|
+
* mode: 'standard',
|
|
214
|
+
* onStatus: (status) => console.log(status),
|
|
215
|
+
* onDownloadProgress: (p) => console.log(`${p.file}: ${p.percent}%`)
|
|
216
|
+
* }
|
|
217
|
+
* });
|
|
218
|
+
* ```
|
|
219
|
+
*/
|
|
220
|
+
export function createAnonymizer(config) {
|
|
221
|
+
return new Anonymizer(config);
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Creates an anonymizer with a custom NER model
|
|
225
|
+
* @deprecated Use createAnonymizer with ner: { mode: 'custom', modelPath, vocabPath } instead
|
|
226
|
+
*/
|
|
227
|
+
export async function createAnonymizerWithNER(modelPath, vocabPath, config) {
|
|
228
|
+
const anonymizer = new Anonymizer({
|
|
229
|
+
...config,
|
|
230
|
+
ner: {
|
|
231
|
+
mode: 'custom',
|
|
232
|
+
modelPath,
|
|
233
|
+
vocabPath,
|
|
234
|
+
},
|
|
235
|
+
});
|
|
236
|
+
await anonymizer.initialize();
|
|
237
|
+
return anonymizer;
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Convenience function for one-off anonymization
|
|
241
|
+
* Creates a temporary anonymizer with default settings (regex-only)
|
|
242
|
+
*/
|
|
243
|
+
export async function anonymize(text, locale, policy) {
|
|
244
|
+
const anonymizer = createAnonymizer();
|
|
245
|
+
await anonymizer.initialize();
|
|
246
|
+
try {
|
|
247
|
+
return await anonymizer.anonymize(text, locale, policy);
|
|
248
|
+
}
|
|
249
|
+
finally {
|
|
250
|
+
await anonymizer.dispose();
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
/**
|
|
254
|
+
* Quick regex-only anonymization (no NER, faster)
|
|
255
|
+
*/
|
|
256
|
+
export async function anonymizeRegexOnly(text, policy) {
|
|
257
|
+
// Create policy with NER disabled
|
|
258
|
+
const regexOnlyPolicy = {
|
|
259
|
+
...policy,
|
|
260
|
+
nerEnabledTypes: new Set(), // Disable all NER types
|
|
261
|
+
};
|
|
262
|
+
return anonymize(text, undefined, regexOnlyPolicy);
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Full anonymization with NER
|
|
266
|
+
* Auto-downloads the quantized model on first use
|
|
267
|
+
*
|
|
268
|
+
* @example
|
|
269
|
+
* ```typescript
|
|
270
|
+
* const result = await anonymizeWithNER(
|
|
271
|
+
* 'Contact John Smith at john@example.com',
|
|
272
|
+
* {
|
|
273
|
+
* mode: 'quantized',
|
|
274
|
+
* onStatus: console.log
|
|
275
|
+
* }
|
|
276
|
+
* );
|
|
277
|
+
* ```
|
|
278
|
+
*/
|
|
279
|
+
export async function anonymizeWithNER(text, nerConfig, policy) {
|
|
280
|
+
const anonymizer = createAnonymizer({
|
|
281
|
+
ner: {
|
|
282
|
+
mode: nerConfig.mode ?? 'quantized',
|
|
283
|
+
...nerConfig,
|
|
284
|
+
},
|
|
285
|
+
});
|
|
286
|
+
await anonymizer.initialize();
|
|
287
|
+
try {
|
|
288
|
+
return await anonymizer.anonymize(text, undefined, policy);
|
|
289
|
+
}
|
|
290
|
+
finally {
|
|
291
|
+
await anonymizer.dispose();
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,kBAAkB;AAClB,cAAc,kBAAkB,CAAC;AAEjC,wBAAwB;AACxB,OAAO,EAEL,eAAe,EACf,kBAAkB,EAClB,qBAAqB,EACrB,cAAc,EACd,iBAAiB,EACjB,eAAe,EACf,eAAe,EACf,cAAc,EACd,kBAAkB,EAClB,oBAAoB,EACpB,mBAAmB,EACnB,aAAa,EACb,wBAAwB,EACxB,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,wBAAwB,CAAC;AAEhC,2BAA2B;AAC3B,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,cAAc,EACd,kBAAkB,EAClB,kBAAkB,EAClB,iBAAiB,EACjB,UAAU,EACV,WAAW,EACX,aAAa,EACb,cAAc,EAMd,cAAc,EACd,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,GACrB,MAAM,gBAAgB,CAAC;AAExB,gCAAgC;AAChC,OAAO,EACL,YAAY,EACZ,eAAe,EACf,WAAW,EACX,cAAc,EACd,WAAW,EACX,QAAQ,EACR,SAAS,GACV,MAAM,qBAAqB,CAAC;AAE7B,mBAAmB;AACnB,OAAO,EACL,aAAa,EACb,aAAa,EACb,WAAW,EACX,SAAS,EACT,YAAY,EAEZ,mBAAmB,EACnB,cAAc,GACf,MAAM,mBAAmB,CAAC;AAE3B,6BAA6B;AAC7B,OAAO,EAML,mBAAmB,EACnB,WAAW,GACZ,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,qBAAqB,EAAsB,MAAM,wBAAwB,CAAC;AACnF,OAAO,EAAkB,YAAY,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACjG,OAAO,EAAqB,WAAW,EAAiC,MAAM,wBAAwB,CAAC;AAEvG,OAAO,EAAE,YAAY,EAAE,MAAM,4BAA4B,CAAC;AAC1D,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AACzD,OAAO,EAAE,WAAW,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AACxE,OAAO,EAAE,cAAc,EAAE,MAAM,yBAAyB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,WAAW,EAAoB,MAAM,mBAAmB,CAAC;AACjF,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AA0ElC;;;GAGG;AACH,MAAM,OAAO,UAAU;IACb,QAAQ,CAAqB;IAC7B,QAAQ,GAAqB,IAAI,CAAC;IAClC,SAAS,CAAY;IACrB,WAAW,CAAqB;IAChC,aAAa,CAAsB;IACnC,YAAY,CAAS;IACrB,aAAa,CAAS;IACtB,WAAW,GAAG,KAAK,CAAC;IAE5B,YAAY,SAA2B,EAAE;QACvC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,qBAAqB,EAAE,CAAC;QAC3D,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC;QAC9C,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,mBAAmB,EAAE,CAAC;QACnE,IAAI,CAAC,aAAa,GAAG,MAAM,CAAC,aAAa,IAAI,OAAO,CAAC;QAErD,2BAA2B;QAC3B,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,iCAAiC;YACjC,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;YAChC,IAAI,CAAC,SAAS,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;YACpC,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;QACrE,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,GAAG,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC;YACpD,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,OAAO,CAAC;QACrD,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,WAAW;YAAE,OAAO;QAE7B,uCAAuC;QACvC,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YACvC,IAAI,CAAC,QAAQ,GAAG,IAAI,YAAY,EAAE,CAAC;QACrC,CAAC;aAAM,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5C,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC3D,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAC;YACxE,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,SAAS,EAAE,IAAI,CAAC,SAAS,CAAC,SAAS;gBACnC,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,MAAM,WAAW,CAC9D,IAAI,CAAC,SAAS,CAAC,IAAI,EACnB;gBACE,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,YAAY,IAAI,IAAI;gBACjD,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,kBAAkB;gBAC7C,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ;aAClC,CACF,CAAC;YAEF,iBAAiB;YACjB,IAAI,QAAQ,GAAG,iBAAiB,CAAC;YACjC,IAAI,CAAC;gBACH,MAAM,eAAe,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC;gBACjE,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAa,CAAC;YACrD,CAAC;YAAC,MAAM,CAAC;gBACP,wBAAwB;YAC1B,CAAC;YAED,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC;gBAC7B,SAAS;gBACT,SAAS;gBACT,QAAQ;gBACR,YAAY,EAAE,IAAI,CAAC,YAAY;aAChC,CAAC,CAAC;QACL,CAAC;QAED,iBAAiB;QACjB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,sBAAsB,CAAC,CAAC;YAClD,MAAM,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YAC3B,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,CAAC,mBAAmB,CAAC,CAAC;QACjD,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;QAC1C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,SAAS,CACb,IAAY,EACZ,MAAe,EACf,MAAqC;QAErC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,CAAC,UAAU,EAAE,CAAC;QAC1B,CAAC;QAED,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,6BAA6B;QAC7B,MAAM,eAAe,GAAG,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC;QAExF,6BAA6B;QAC7B,MAAM,cAAc,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QAE1C,gCAAgC;QAChC,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAE5E,wBAAwB;QACxB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAS,CAAC,OAAO,CAAC,cAAc,EAAE,eAAe,CAAC,CAAC;QAChF,MAAM,UAAU,GAAG,SAAS,CAAC,KAAK,CAAC;QAEnC,qCAAqC;QACrC,MAAM,eAAe,GAAG,eAAe,CACrC,YAAY,EACZ,UAAU,EACV,eAAe,EACf,cAAc,CACf,CAAC;QAEF,yCAAyC;QACzC,MAAM,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,WAAW,CACtD,cAAc,EACd,eAAe,EACf,eAAe,CAChB,CAAC;QAEF,0BAA0B;QAC1B,MAAM,UAAU,GAAG,cAAc,CAC/B,cAAc,EACd,QAAQ,EACR,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,EACzB,eAAe,CAChB,CAAC;QAEF,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;YACtB,mDAAmD;YACnD,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;gBAC/C,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;aACnB,CAAC,CAAC,CAAC;YACJ,sCAAsC;YACtC,OAAO,CAAC,IAAI,CAAC,sBAAsB,EAAE,UAAU,CAAC,CAAC;QACnD,CAAC;QAED,0BAA0B;QAC1B,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,KAAK,IAAI;YAC7C,CAAC,CAAC,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE;YACjC,CAAC,CAAC,WAAW,EAAE,CAAC;QAElB,MAAM,eAAe,GAAG,aAAa,CAAC,MAAM,EAAE,aAAa,CAAC,CAAC;QAE7D,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAClC,MAAM,KAAK,GAAuB;YAChC,YAAY,EAAE,mBAAmB,CAAC,QAAQ,CAAC;YAC3C,aAAa,EAAE,QAAQ,CAAC,MAAM;YAC9B,YAAY,EAAE,IAAI,CAAC,YAAY;YAC/B,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,gBAAgB,EAAE,OAAO,GAAG,SAAS;YACrC,cAAc,EAAE,UAAU,CAAC,cAAc;SAC1C,CAAC;QAEF,2DAA2D;QAC3D,MAAM,YAAY,GAAuC,QAAQ,CAAC,GAAG,CACnE,CAAC,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE,CAAC,IAAI,CAChC,CAAC;QAEF,OAAO;YACL,cAAc;YACd,QAAQ,EAAE,YAAY;YACtB,MAAM,EAAE,eAAe;YACvB,KAAK;SACN,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,MAAM,IAAI,CAAC,QAAQ,CAAC,OAAO,EAAE,CAAC;QAChC,CAAC;QACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,IAAI,aAAa;QACf,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;CACF;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,gBAAgB,CAAC,MAAyB;IACxD,OAAO,IAAI,UAAU,CAAC,MAAM,CAAC,CAAC;AAChC,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,SAAiB,EACjB,SAAiB,EACjB,MAAmD;IAEnD,MAAM,UAAU,GAAG,IAAI,UAAU,CAAC;QAChC,GAAG,MAAM;QACT,GAAG,EAAE;YACH,IAAI,EAAE,QAAQ;YACd,SAAS;YACT,SAAS;SACV;KACF,CAAC,CAAC;IAEH,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAC9B,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,IAAY,EACZ,MAAe,EACf,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAC;IACtC,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;IAC1D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,IAAY,EACZ,MAAqC;IAErC,kCAAkC;IAClC,MAAM,eAAe,GAAiC;QACpD,GAAG,MAAM;QACT,eAAe,EAAE,IAAI,GAAG,EAAE,EAAE,wBAAwB;KACrD,CAAC;IAEF,OAAO,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,eAAe,CAAC,CAAC;AACrD,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,IAAY,EACZ,SAAwE,EACxE,MAAqC;IAErC,MAAM,UAAU,GAAG,gBAAgB,CAAC;QAClC,GAAG,EAAE;YACH,IAAI,EAAE,SAAS,CAAC,IAAI,IAAI,WAAW;YACnC,GAAG,SAAS;SACb;KACF,CAAC,CAAC;IAEH,MAAM,UAAU,CAAC,UAAU,EAAE,CAAC;IAE9B,IAAI,CAAC;QACH,OAAO,MAAM,UAAU,CAAC,SAAS,CAAC,IAAI,EAAE,SAAS,EAAE,MAAM,CAAC,CAAC;IAC7D,CAAC;YAAS,CAAC;QACT,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;IAC7B,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BIO Tag Decoder
|
|
3
|
+
* Converts BIO-tagged token sequences to entity spans
|
|
4
|
+
*/
|
|
5
|
+
import { SpanMatch } from '../types/index.js';
|
|
6
|
+
import type { Token } from './tokenizer.js';
|
|
7
|
+
/**
|
|
8
|
+
* BIO tag types
|
|
9
|
+
*/
|
|
10
|
+
export declare enum BIOTag {
|
|
11
|
+
/** Beginning of an entity */
|
|
12
|
+
B = "B",
|
|
13
|
+
/** Inside an entity (continuation) */
|
|
14
|
+
I = "I",
|
|
15
|
+
/** Outside any entity */
|
|
16
|
+
O = "O"
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Parsed BIO label
|
|
20
|
+
*/
|
|
21
|
+
export interface ParsedBIOLabel {
|
|
22
|
+
/** BIO tag type */
|
|
23
|
+
tag: BIOTag;
|
|
24
|
+
/** Entity type (null for O tag) */
|
|
25
|
+
entityType: string | null;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Raw entity span from NER (before conversion to SpanMatch)
|
|
29
|
+
*/
|
|
30
|
+
export interface RawNEREntity {
|
|
31
|
+
/** Entity type string from model */
|
|
32
|
+
type: string;
|
|
33
|
+
/** Start character offset */
|
|
34
|
+
start: number;
|
|
35
|
+
/** End character offset */
|
|
36
|
+
end: number;
|
|
37
|
+
/** Combined confidence score */
|
|
38
|
+
confidence: number;
|
|
39
|
+
/** Raw text */
|
|
40
|
+
text: string;
|
|
41
|
+
/** Token indices that make up this entity */
|
|
42
|
+
tokenIndices: number[];
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Parses a BIO label string (e.g., "B-PER", "I-ORG", "O")
|
|
46
|
+
*/
|
|
47
|
+
export declare function parseBIOLabel(label: string): ParsedBIOLabel;
|
|
48
|
+
/**
|
|
49
|
+
* Decodes BIO-tagged tokens into entity spans
|
|
50
|
+
*/
|
|
51
|
+
export declare function decodeBIOTags(tokens: Token[], labels: string[], confidences: number[], originalText: string): RawNEREntity[];
|
|
52
|
+
/**
|
|
53
|
+
* Converts raw NER entities to SpanMatch format
|
|
54
|
+
*/
|
|
55
|
+
export declare function convertToSpanMatches(rawEntities: RawNEREntity[], confidenceThreshold?: number): SpanMatch[];
|
|
56
|
+
/**
|
|
57
|
+
* Post-processes NER spans to clean up boundaries
|
|
58
|
+
*/
|
|
59
|
+
export declare function cleanupSpanBoundaries(spans: SpanMatch[], originalText: string): SpanMatch[];
|
|
60
|
+
/**
|
|
61
|
+
* Merges adjacent spans of the same type
|
|
62
|
+
*/
|
|
63
|
+
export declare function mergeAdjacentSpans(spans: SpanMatch[], originalText: string, maxGap?: number): SpanMatch[];
|
|
64
|
+
//# sourceMappingURL=bio-decoder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"bio-decoder.d.ts","sourceRoot":"","sources":["../../src/ner/bio-decoder.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAW,SAAS,EAAmB,MAAM,mBAAmB,CAAC;AAExE,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AAE5C;;GAEG;AACH,oBAAY,MAAM;IAChB,6BAA6B;IAC7B,CAAC,MAAM;IACP,sCAAsC;IACtC,CAAC,MAAM;IACP,yBAAyB;IACzB,CAAC,MAAM;CACR;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,mBAAmB;IACnB,GAAG,EAAE,MAAM,CAAC;IACZ,mCAAmC;IACnC,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,oCAAoC;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,6BAA6B;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,GAAG,EAAE,MAAM,CAAC;IACZ,gCAAgC;IAChC,UAAU,EAAE,MAAM,CAAC;IACnB,eAAe;IACf,IAAI,EAAE,MAAM,CAAC;IACb,6CAA6C;IAC7C,YAAY,EAAE,MAAM,EAAE,CAAC;CACxB;AAED;;GAEG;AACH,wBAAgB,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,cAAc,CAyB3D;AAED;;GAEG;AACH,wBAAgB,aAAa,CAC3B,MAAM,EAAE,KAAK,EAAE,EACf,MAAM,EAAE,MAAM,EAAE,EAChB,WAAW,EAAE,MAAM,EAAE,EACrB,YAAY,EAAE,MAAM,GACnB,YAAY,EAAE,CAkFhB;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,WAAW,EAAE,YAAY,EAAE,EAC3B,mBAAmB,GAAE,MAAY,GAChC,SAAS,EAAE,CA0Bb;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,GACnB,SAAS,EAAE,CAoCb;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,KAAK,EAAE,SAAS,EAAE,EAClB,YAAY,EAAE,MAAM,EACpB,MAAM,GAAE,MAAU,GACjB,SAAS,EAAE,CAiCb"}
|