@elanlanguages/bridge-anonymization 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +382 -0
  2. package/dist/crypto/index.d.ts +6 -0
  3. package/dist/crypto/index.d.ts.map +1 -0
  4. package/dist/crypto/index.js +6 -0
  5. package/dist/crypto/index.js.map +1 -0
  6. package/dist/crypto/pii-map-crypto.d.ts +100 -0
  7. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  8. package/dist/crypto/pii-map-crypto.js +163 -0
  9. package/dist/crypto/pii-map-crypto.js.map +1 -0
  10. package/dist/index.d.ts +173 -0
  11. package/dist/index.d.ts.map +1 -0
  12. package/dist/index.js +294 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/ner/bio-decoder.d.ts +64 -0
  15. package/dist/ner/bio-decoder.d.ts.map +1 -0
  16. package/dist/ner/bio-decoder.js +216 -0
  17. package/dist/ner/bio-decoder.js.map +1 -0
  18. package/dist/ner/index.d.ts +10 -0
  19. package/dist/ner/index.d.ts.map +1 -0
  20. package/dist/ner/index.js +10 -0
  21. package/dist/ner/index.js.map +1 -0
  22. package/dist/ner/model-manager.d.ts +102 -0
  23. package/dist/ner/model-manager.d.ts.map +1 -0
  24. package/dist/ner/model-manager.js +253 -0
  25. package/dist/ner/model-manager.js.map +1 -0
  26. package/dist/ner/ner-model.d.ts +114 -0
  27. package/dist/ner/ner-model.d.ts.map +1 -0
  28. package/dist/ner/ner-model.js +240 -0
  29. package/dist/ner/ner-model.js.map +1 -0
  30. package/dist/ner/onnx-runtime.d.ts +45 -0
  31. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  32. package/dist/ner/onnx-runtime.js +99 -0
  33. package/dist/ner/onnx-runtime.js.map +1 -0
  34. package/dist/ner/tokenizer.d.ts +140 -0
  35. package/dist/ner/tokenizer.d.ts.map +1 -0
  36. package/dist/ner/tokenizer.js +341 -0
  37. package/dist/ner/tokenizer.js.map +1 -0
  38. package/dist/pipeline/index.d.ts +9 -0
  39. package/dist/pipeline/index.d.ts.map +1 -0
  40. package/dist/pipeline/index.js +9 -0
  41. package/dist/pipeline/index.js.map +1 -0
  42. package/dist/pipeline/prenormalize.d.ts +48 -0
  43. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  44. package/dist/pipeline/prenormalize.js +94 -0
  45. package/dist/pipeline/prenormalize.js.map +1 -0
  46. package/dist/pipeline/resolver.d.ts +56 -0
  47. package/dist/pipeline/resolver.d.ts.map +1 -0
  48. package/dist/pipeline/resolver.js +238 -0
  49. package/dist/pipeline/resolver.js.map +1 -0
  50. package/dist/pipeline/tagger.d.ts +74 -0
  51. package/dist/pipeline/tagger.d.ts.map +1 -0
  52. package/dist/pipeline/tagger.js +169 -0
  53. package/dist/pipeline/tagger.js.map +1 -0
  54. package/dist/pipeline/validator.d.ts +65 -0
  55. package/dist/pipeline/validator.d.ts.map +1 -0
  56. package/dist/pipeline/validator.js +264 -0
  57. package/dist/pipeline/validator.js.map +1 -0
  58. package/dist/recognizers/base.d.ts +78 -0
  59. package/dist/recognizers/base.d.ts.map +1 -0
  60. package/dist/recognizers/base.js +100 -0
  61. package/dist/recognizers/base.js.map +1 -0
  62. package/dist/recognizers/bic-swift.d.ts +10 -0
  63. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  64. package/dist/recognizers/bic-swift.js +107 -0
  65. package/dist/recognizers/bic-swift.js.map +1 -0
  66. package/dist/recognizers/credit-card.d.ts +32 -0
  67. package/dist/recognizers/credit-card.d.ts.map +1 -0
  68. package/dist/recognizers/credit-card.js +160 -0
  69. package/dist/recognizers/credit-card.js.map +1 -0
  70. package/dist/recognizers/custom-id.d.ts +28 -0
  71. package/dist/recognizers/custom-id.d.ts.map +1 -0
  72. package/dist/recognizers/custom-id.js +116 -0
  73. package/dist/recognizers/custom-id.js.map +1 -0
  74. package/dist/recognizers/email.d.ts +10 -0
  75. package/dist/recognizers/email.d.ts.map +1 -0
  76. package/dist/recognizers/email.js +75 -0
  77. package/dist/recognizers/email.js.map +1 -0
  78. package/dist/recognizers/iban.d.ts +14 -0
  79. package/dist/recognizers/iban.d.ts.map +1 -0
  80. package/dist/recognizers/iban.js +67 -0
  81. package/dist/recognizers/iban.js.map +1 -0
  82. package/dist/recognizers/index.d.ts +20 -0
  83. package/dist/recognizers/index.d.ts.map +1 -0
  84. package/dist/recognizers/index.js +42 -0
  85. package/dist/recognizers/index.js.map +1 -0
  86. package/dist/recognizers/ip-address.d.ts +14 -0
  87. package/dist/recognizers/ip-address.d.ts.map +1 -0
  88. package/dist/recognizers/ip-address.js +183 -0
  89. package/dist/recognizers/ip-address.js.map +1 -0
  90. package/dist/recognizers/phone.d.ts +10 -0
  91. package/dist/recognizers/phone.d.ts.map +1 -0
  92. package/dist/recognizers/phone.js +145 -0
  93. package/dist/recognizers/phone.js.map +1 -0
  94. package/dist/recognizers/registry.d.ts +59 -0
  95. package/dist/recognizers/registry.d.ts.map +1 -0
  96. package/dist/recognizers/registry.js +113 -0
  97. package/dist/recognizers/registry.js.map +1 -0
  98. package/dist/recognizers/url.d.ts +14 -0
  99. package/dist/recognizers/url.d.ts.map +1 -0
  100. package/dist/recognizers/url.js +121 -0
  101. package/dist/recognizers/url.js.map +1 -0
  102. package/dist/types/index.d.ts +134 -0
  103. package/dist/types/index.d.ts.map +1 -0
  104. package/dist/types/index.js +69 -0
  105. package/dist/types/index.js.map +1 -0
  106. package/dist/types/pii-types.d.ts +50 -0
  107. package/dist/types/pii-types.d.ts.map +1 -0
  108. package/dist/types/pii-types.js +114 -0
  109. package/dist/types/pii-types.js.map +1 -0
  110. package/dist/utils/iban-checksum.d.ts +23 -0
  111. package/dist/utils/iban-checksum.d.ts.map +1 -0
  112. package/dist/utils/iban-checksum.js +106 -0
  113. package/dist/utils/iban-checksum.js.map +1 -0
  114. package/dist/utils/index.d.ts +8 -0
  115. package/dist/utils/index.d.ts.map +1 -0
  116. package/dist/utils/index.js +8 -0
  117. package/dist/utils/index.js.map +1 -0
  118. package/dist/utils/luhn.d.ts +17 -0
  119. package/dist/utils/luhn.d.ts.map +1 -0
  120. package/dist/utils/luhn.js +55 -0
  121. package/dist/utils/luhn.js.map +1 -0
  122. package/dist/utils/offsets.d.ts +86 -0
  123. package/dist/utils/offsets.d.ts.map +1 -0
  124. package/dist/utils/offsets.js +124 -0
  125. package/dist/utils/offsets.js.map +1 -0
  126. package/package.json +62 -0
@@ -0,0 +1,114 @@
1
+ /**
2
+ * NER Model Wrapper
3
+ * ONNX Runtime integration for Named Entity Recognition
4
+ * Supports both onnxruntime-node and onnxruntime-web
5
+ */
6
+ import { SpanMatch, AnonymizationPolicy } from '../types/index.js';
7
+ /**
8
+ * NER Model configuration
9
+ */
10
+ export interface NERModelConfig {
11
+ /** Path to ONNX model file */
12
+ modelPath: string;
13
+ /** Path to vocabulary file */
14
+ vocabPath: string;
15
+ /** Label mapping (index -> label string) */
16
+ labelMap: string[];
17
+ /** Maximum sequence length */
18
+ maxLength: number;
19
+ /** Whether model expects lowercase input */
20
+ doLowerCase: boolean;
21
+ /** Model version for tracking */
22
+ modelVersion: string;
23
+ }
24
+ /**
25
+ * NER prediction result for a single text
26
+ */
27
+ export interface NERPrediction {
28
+ /** Detected entity spans */
29
+ spans: SpanMatch[];
30
+ /** Processing time in ms */
31
+ processingTimeMs: number;
32
+ /** Model version used */
33
+ modelVersion: string;
34
+ }
35
+ /**
36
+ * Default label map for common NER models (CoNLL-style)
37
+ */
38
+ export declare const DEFAULT_LABEL_MAP: string[];
39
+ /**
40
+ * NER Model wrapper for ONNX inference
41
+ */
42
+ export declare class NERModel {
43
+ private ort;
44
+ private session;
45
+ private tokenizer;
46
+ private config;
47
+ private isLoaded;
48
+ constructor(config: NERModelConfig);
49
+ /**
50
+ * Loads the model and tokenizer
51
+ */
52
+ load(): Promise<void>;
53
+ /**
54
+ * Predicts entities in text
55
+ */
56
+ predict(text: string, policy?: AnonymizationPolicy): Promise<NERPrediction>;
57
+ /**
58
+ * Runs ONNX inference
59
+ */
60
+ private runInference;
61
+ /**
62
+ * Processes model logits to extract labels and confidences
63
+ */
64
+ private processLogits;
65
+ /**
66
+ * Gets minimum confidence threshold from policy
67
+ */
68
+ private getMinConfidence;
69
+ /**
70
+ * Gets model version
71
+ */
72
+ get version(): string;
73
+ /**
74
+ * Checks if model is loaded
75
+ */
76
+ get loaded(): boolean;
77
+ /**
78
+ * Disposes of model resources
79
+ */
80
+ dispose(): Promise<void>;
81
+ }
82
+ /**
83
+ * Creates a NER model instance with configuration
84
+ */
85
+ export declare function createNERModel(config: Partial<NERModelConfig> & {
86
+ modelPath: string;
87
+ vocabPath: string;
88
+ }): NERModel;
89
+ /**
90
+ * NER Model stub for when no model is available
91
+ * Returns empty results - useful for regex-only mode
92
+ */
93
+ export declare class NERModelStub {
94
+ readonly version = "stub-1.0.0";
95
+ readonly loaded = true;
96
+ load(): Promise<void>;
97
+ predict(_text: string, _policy?: AnonymizationPolicy): Promise<NERPrediction>;
98
+ dispose(): Promise<void>;
99
+ }
100
+ /**
101
+ * Creates a stub NER model (for testing or regex-only mode)
102
+ */
103
+ export declare function createNERModelStub(): NERModelStub;
104
+ /**
105
+ * NER model interface for dependency injection
106
+ */
107
+ export interface INERModel {
108
+ readonly version: string;
109
+ readonly loaded: boolean;
110
+ load(): Promise<void>;
111
+ predict(text: string, policy?: AnonymizationPolicy): Promise<NERPrediction>;
112
+ dispose(): Promise<void>;
113
+ }
114
+ //# sourceMappingURL=ner-model.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ner-model.d.ts","sourceRoot":"","sources":["../../src/ner/ner-model.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAW,SAAS,EAAmB,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAc7F;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,WAAW,EAAE,OAAO,CAAC;IACrB,iCAAiC;IACjC,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4BAA4B;IAC5B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,4BAA4B;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,UAU7B,CAAC;AAEF;;GAEG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,GAAG,CAA2B;IACtC,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,SAAS,CAAmC;IACpD,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,QAAQ,CAAS;gBAEb,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAmB3B;;OAEG;IACG,OAAO,CACX,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC;IA8CzB;;OAEG;YACW,YAAY;IA+D1B;;OAEG;IACH,OAAO,CAAC,aAAa;IAqCrB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAexB;;OAEG;IACH,IAAI,OAAO,IAAI,MAAM,CAEpB;IAED;;OAEG;IACH,IAAI,MAAM,IAAI,OAAO,CAEpB;IAED;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAQ/B;AAYD;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,QAAQ,CAWnH;AAED;;;GAGG;AACH,qBAAa,YAAY;IACvB,QAAQ,CAAC,OAAO,gBAAgB;IAChC,QAAQ,CAAC,MAAM,QAAQ;IAEjB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAIrB,OAAO,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC;IAQ7E,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,YAAY,CAEjD;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC5E,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B"}
@@ -0,0 +1,240 @@
1
+ /**
2
+ * NER Model Wrapper
3
+ * ONNX Runtime integration for Named Entity Recognition
4
+ * Supports both onnxruntime-node and onnxruntime-web
5
+ */
6
+ import { loadRuntime } from './onnx-runtime.js';
7
+ import { WordPieceTokenizer, loadVocabFromFile, } from './tokenizer.js';
8
+ import { decodeBIOTags, convertToSpanMatches, cleanupSpanBoundaries, mergeAdjacentSpans, } from './bio-decoder.js';
9
+ /**
10
+ * Default label map for common NER models (CoNLL-style)
11
+ */
12
+ export const DEFAULT_LABEL_MAP = [
13
+ 'O',
14
+ 'B-PER',
15
+ 'I-PER',
16
+ 'B-ORG',
17
+ 'I-ORG',
18
+ 'B-LOC',
19
+ 'I-LOC',
20
+ 'B-MISC',
21
+ 'I-MISC',
22
+ ];
23
+ /**
24
+ * NER Model wrapper for ONNX inference
25
+ */
26
+ export class NERModel {
27
+ ort = null;
28
+ session = null;
29
+ tokenizer = null;
30
+ config;
31
+ isLoaded = false;
32
+ constructor(config) {
33
+ this.config = config;
34
+ }
35
+ /**
36
+ * Loads the model and tokenizer
37
+ */
38
+ async load() {
39
+ if (this.isLoaded)
40
+ return;
41
+ // Load ONNX runtime (auto-detects best runtime for environment)
42
+ this.ort = await loadRuntime();
43
+ // Load ONNX model
44
+ this.session = await this.ort.InferenceSession.create(this.config.modelPath);
45
+ // Load tokenizer vocabulary
46
+ const vocab = await loadVocabFromFile(this.config.vocabPath);
47
+ this.tokenizer = new WordPieceTokenizer(vocab, {
48
+ maxLength: this.config.maxLength,
49
+ doLowerCase: this.config.doLowerCase,
50
+ });
51
+ this.isLoaded = true;
52
+ }
53
+ /**
54
+ * Predicts entities in text
55
+ */
56
+ async predict(text, policy) {
57
+ const startTime = performance.now();
58
+ if (!this.isLoaded || this.session === null || this.tokenizer === null) {
59
+ throw new Error('Model not loaded. Call load() first.');
60
+ }
61
+ // Tokenize input
62
+ const tokenization = this.tokenizer.tokenize(text);
63
+ // Run inference
64
+ const { labels, confidences } = await this.runInference(tokenization);
65
+ // Decode BIO tags to entities
66
+ const rawEntities = decodeBIOTags(tokenization.tokens, labels, confidences, text);
67
+ // Convert to SpanMatch format with confidence filtering
68
+ const minConfidence = this.getMinConfidence(policy);
69
+ let spans = convertToSpanMatches(rawEntities, minConfidence);
70
+ // Post-process spans
71
+ spans = cleanupSpanBoundaries(spans, text);
72
+ spans = mergeAdjacentSpans(spans, text);
73
+ // Filter by enabled types in policy
74
+ if (policy !== undefined) {
75
+ spans = spans.filter((span) => policy.enabledTypes.has(span.type) && policy.nerEnabledTypes.has(span.type));
76
+ }
77
+ const endTime = performance.now();
78
+ return {
79
+ spans,
80
+ processingTimeMs: endTime - startTime,
81
+ modelVersion: this.config.modelVersion,
82
+ };
83
+ }
84
+ /**
85
+ * Runs ONNX inference
86
+ */
87
+ async runInference(tokenization) {
88
+ if (this.session === null || this.ort === null) {
89
+ throw new Error('Session not initialized');
90
+ }
91
+ const session = this.session;
92
+ const seqLength = tokenization.inputIds.length;
93
+ // Create tensors
94
+ const inputIdsTensor = new this.ort.Tensor('int64', BigInt64Array.from(tokenization.inputIds.map(BigInt)), [1, seqLength]);
95
+ const attentionMaskTensor = new this.ort.Tensor('int64', BigInt64Array.from(tokenization.attentionMask.map(BigInt)), [1, seqLength]);
96
+ const tokenTypeIdsTensor = new this.ort.Tensor('int64', BigInt64Array.from(tokenization.tokenTypeIds.map(BigInt)), [1, seqLength]);
97
+ // Run inference
98
+ const feeds = {
99
+ input_ids: inputIdsTensor,
100
+ attention_mask: attentionMaskTensor,
101
+ };
102
+ // Some models also need token_type_ids
103
+ const inputNames = session.inputNames;
104
+ if (inputNames.includes('token_type_ids')) {
105
+ feeds['token_type_ids'] = tokenTypeIdsTensor;
106
+ }
107
+ const results = await session.run(feeds);
108
+ // Get logits output
109
+ const outputName = session.outputNames[0];
110
+ if (outputName === undefined) {
111
+ throw new Error('No output from model');
112
+ }
113
+ const logits = results[outputName];
114
+ if (logits === undefined) {
115
+ throw new Error('Logits output not found');
116
+ }
117
+ // Process logits to get labels and confidences
118
+ return this.processLogits(logits, seqLength);
119
+ }
120
+ /**
121
+ * Processes model logits to extract labels and confidences
122
+ */
123
+ processLogits(logits, seqLength) {
124
+ const data = logits.data;
125
+ const numLabels = this.config.labelMap.length;
126
+ const labels = [];
127
+ const confidences = [];
128
+ for (let i = 0; i < seqLength; i++) {
129
+ // Get logits for this token
130
+ const tokenLogits = [];
131
+ for (let j = 0; j < numLabels; j++) {
132
+ tokenLogits.push(data[i * numLabels + j] ?? 0);
133
+ }
134
+ // Apply softmax
135
+ const probs = softmax(tokenLogits);
136
+ // Get argmax
137
+ let maxIdx = 0;
138
+ let maxProb = probs[0] ?? 0;
139
+ for (let j = 1; j < probs.length; j++) {
140
+ if ((probs[j] ?? 0) > maxProb) {
141
+ maxProb = probs[j] ?? 0;
142
+ maxIdx = j;
143
+ }
144
+ }
145
+ labels.push(this.config.labelMap[maxIdx] ?? 'O');
146
+ confidences.push(maxProb);
147
+ }
148
+ return { labels, confidences };
149
+ }
150
+ /**
151
+ * Gets minimum confidence threshold from policy
152
+ */
153
+ getMinConfidence(policy) {
154
+ if (policy === undefined)
155
+ return 0.5;
156
+ // Get minimum from all NER-enabled types
157
+ let minThreshold = 1.0;
158
+ for (const type of policy.nerEnabledTypes) {
159
+ const threshold = policy.confidenceThresholds.get(type) ?? 0.5;
160
+ if (threshold < minThreshold) {
161
+ minThreshold = threshold;
162
+ }
163
+ }
164
+ return minThreshold;
165
+ }
166
+ /**
167
+ * Gets model version
168
+ */
169
+ get version() {
170
+ return this.config.modelVersion;
171
+ }
172
+ /**
173
+ * Checks if model is loaded
174
+ */
175
+ get loaded() {
176
+ return this.isLoaded;
177
+ }
178
+ /**
179
+ * Disposes of model resources
180
+ */
181
+ async dispose() {
182
+ if (this.session !== null) {
183
+ // ONNX Runtime Node doesn't have explicit dispose, but we can clear references
184
+ this.session = null;
185
+ }
186
+ this.tokenizer = null;
187
+ this.isLoaded = false;
188
+ }
189
+ }
190
+ /**
191
+ * Softmax function for probability calculation
192
+ */
193
+ function softmax(logits) {
194
+ const maxLogit = Math.max(...logits);
195
+ const expLogits = logits.map((x) => Math.exp(x - maxLogit));
196
+ const sumExp = expLogits.reduce((a, b) => a + b, 0);
197
+ return expLogits.map((x) => x / sumExp);
198
+ }
199
+ /**
200
+ * Creates a NER model instance with configuration
201
+ */
202
+ export function createNERModel(config) {
203
+ const fullConfig = {
204
+ modelPath: config.modelPath,
205
+ vocabPath: config.vocabPath,
206
+ labelMap: config.labelMap ?? DEFAULT_LABEL_MAP,
207
+ maxLength: config.maxLength ?? 512,
208
+ doLowerCase: config.doLowerCase ?? true,
209
+ modelVersion: config.modelVersion ?? '1.0.0',
210
+ };
211
+ return new NERModel(fullConfig);
212
+ }
213
+ /**
214
+ * NER Model stub for when no model is available
215
+ * Returns empty results - useful for regex-only mode
216
+ */
217
+ export class NERModelStub {
218
+ version = 'stub-1.0.0';
219
+ loaded = true;
220
+ async load() {
221
+ // No-op
222
+ }
223
+ async predict(_text, _policy) {
224
+ return {
225
+ spans: [],
226
+ processingTimeMs: 0,
227
+ modelVersion: this.version,
228
+ };
229
+ }
230
+ async dispose() {
231
+ // No-op
232
+ }
233
+ }
234
+ /**
235
+ * Creates a stub NER model (for testing or regex-only mode)
236
+ */
237
+ export function createNERModelStub() {
238
+ return new NERModelStub();
239
+ }
240
+ //# sourceMappingURL=ner-model.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ner-model.js","sourceRoot":"","sources":["../../src/ner/ner-model.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAmB,MAAM,mBAAmB,CAAC;AAEjE,OAAO,EACL,kBAAkB,EAClB,iBAAiB,GAGlB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,aAAa,EACb,oBAAoB,EACpB,qBAAqB,EACrB,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAgC1B;;GAEG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,GAAG;IACH,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,QAAQ;IACR,QAAQ;CACT,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,QAAQ;IACX,GAAG,GAAsB,IAAI,CAAC;IAC9B,OAAO,GAAmB,IAAI,CAAC;IAC/B,SAAS,GAA8B,IAAI,CAAC;IAC5C,MAAM,CAAiB;IACvB,QAAQ,GAAG,KAAK,CAAC;IAEzB,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,gEAAgE;QAChE,IAAI,CAAC,GAAG,GAAG,MAAM,WAAW,EAAE,CAAC;QAE/B,kBAAkB;QAClB,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAE7E,4BAA4B;QAC5B,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,SAAS,GAAG,IAAI,kBAAkB,CAAC,KAAK,EAAE;YAC7C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,IAAY,EACZ,MAA4B;QAE5B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,EAAE,CAAC;YACvE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,iBAAiB;QACjB,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAEnD,gBAAgB;QAChB,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;QAEtE,8BAA8B;QAC9B,MAAM,WAAW,GAAG,aAAa,CAC/B,YAAY,CAAC,MAAM,EACnB,MAAM,EACN,WAAW,EACX,IAAI,CACL,CAAC;QAEF,wDAAwD;QACxD,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,KAAK,GAAG,oBAAoB,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;QAE7D,qBAAqB;QACrB,KAAK,GAAG,qBAAqB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAC3C,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAExC,oCAAoC;QACpC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,GAAG,KAAK,CAAC,MAAM,CAClB,CAAC,IAAI,EAAE,EAAE,CACP,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAC9E,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAElC,OAAO;YACL,KAAK;YACL,gBAAgB,EAAE,OAAO,GAAG,SAAS;YACrC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CACxB,YAAgC;QAEhC,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,IAAI,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,OAIpB,CAAC;QAEF,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC;QAE/C,iBAAiB;QACjB,MAAM,cAAc,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CACxC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACrD,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,MAAM,mBAAmB,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CAC7C,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC1D,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,MAAM,kBAAkB,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CAC5C,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACzD,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,gBAAgB;QAChB,MAAM,KAAK,GAA4B;YACrC,SAAS,EAAE,cAAc;YACzB,cAAc,EAAE,mBAAmB;SACpC,CAAC;QAEF,uCAAuC;QACvC,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACtC,IAAI,UAAU,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAC1C,KAAK,CAAC,gBAAgB,CAAC,GAAG,kBAAkB,CAAC;QAC/C,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEzC,oBAAoB;QACpB,MAAM,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAC1C,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,+CAA+C;QAC/C,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,MAA8B,EAC9B,SAAiB;QAEjB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAoB,CAAC;QACzC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAE9C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,4BAA4B;YAC5B,MAAM,WAAW,GAAa,EAAE,CAAC;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;YACjD,CAAC;YAED,gBAAgB;YAChB,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;YAEnC,aAAa;YACb,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC;oBAC9B,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBACxB,MAAM,GAAG,CAAC,CAAC;gBACb,CAAC;YACH,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;YACjD,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;IACjC,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,MAA4B;QACnD,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,GAAG,CAAC;QAErC,yCAAyC;QACzC,IAAI,YAAY,GAAG,GAAG,CAAC;QACvB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;YAC/D,IAAI,SAAS,GAAG,YAAY,EAAE,CAAC;gBAC7B,YAAY,GAAG,SAAS,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;YAC1B,+EAA+E;YAC/E,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;IACxB,CAAC;CACF;AAED;;GAEG;AACH,SAAS,OAAO,CAAC,MAAgB;IAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,MAA0E;IACvG,MAAM,UAAU,GAAmB;QACjC,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,iBAAiB;QAC9C,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;QAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI;QACvC,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,OAAO;KAC7C,CAAC;IAEF,OAAO,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC;AAClC,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,YAAY;IACd,OAAO,GAAG,YAAY,CAAC;IACvB,MAAM,GAAG,IAAI,CAAC;IAEvB,KAAK,CAAC,IAAI;QACR,QAAQ;IACV,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,KAAa,EAAE,OAA6B;QACxD,OAAO;YACL,KAAK,EAAE,EAAE;YACT,gBAAgB,EAAE,CAAC;YACnB,YAAY,EAAE,IAAI,CAAC,OAAO;SAC3B,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,OAAO;QACX,QAAQ;IACV,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,IAAI,YAAY,EAAE,CAAC;AAC5B,CAAC"}
@@ -0,0 +1,45 @@
1
+ /**
2
+ * ONNX Runtime Abstraction
3
+ * Allows switching between onnxruntime-node and onnxruntime-web
4
+ */
5
+ export interface OrtTensor {
6
+ data: Float32Array | BigInt64Array | Int32Array;
7
+ dims: readonly number[];
8
+ }
9
+ export interface OrtSession {
10
+ inputNames: readonly string[];
11
+ outputNames: readonly string[];
12
+ run(feeds: Record<string, OrtTensor>): Promise<Record<string, OrtTensor>>;
13
+ }
14
+ export interface OrtInferenceSession {
15
+ create(path: string, options?: unknown): Promise<OrtSession>;
16
+ }
17
+ export interface OrtTensorConstructor {
18
+ new (type: string, data: Float32Array | BigInt64Array | Int32Array | number[] | bigint[], dims: number[]): OrtTensor;
19
+ }
20
+ export interface OrtRuntime {
21
+ InferenceSession: OrtInferenceSession;
22
+ Tensor: OrtTensorConstructor;
23
+ }
24
+ /**
25
+ * Detects the best ONNX runtime for the current environment
26
+ */
27
+ export declare function detectRuntime(): 'node' | 'web';
28
+ /**
29
+ * Loads the appropriate ONNX runtime
30
+ */
31
+ export declare function loadRuntime(preferredRuntime?: 'node' | 'web'): Promise<OrtRuntime>;
32
+ /**
33
+ * Gets the currently loaded runtime type
34
+ */
35
+ export declare function getRuntimeType(): 'node' | 'web' | null;
36
+ /**
37
+ * Resets the runtime (useful for testing)
38
+ */
39
+ export declare function resetRuntime(): void;
40
+ declare global {
41
+ var Bun: unknown | undefined;
42
+ var Deno: unknown | undefined;
43
+ var window: unknown | undefined;
44
+ }
45
+ //# sourceMappingURL=onnx-runtime.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-runtime.d.ts","sourceRoot":"","sources":["../../src/ner/onnx-runtime.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,YAAY,GAAG,aAAa,GAAG,UAAU,CAAC;IAChD,IAAI,EAAE,SAAS,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IAC9B,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IAC/B,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;CAC3E;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,OAAO,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;CAC9D;AAED,MAAM,WAAW,oBAAoB;IACnC,KACE,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,YAAY,GAAG,aAAa,GAAG,UAAU,GAAG,MAAM,EAAE,GAAG,MAAM,EAAE,EACrE,IAAI,EAAE,MAAM,EAAE,GACb,SAAS,CAAC;CACd;AAED,MAAM,WAAW,UAAU;IACzB,gBAAgB,EAAE,mBAAmB,CAAC;IACtC,MAAM,EAAE,oBAAoB,CAAC;CAC9B;AAQD;;GAEG;AACH,wBAAgB,aAAa,IAAI,MAAM,GAAG,KAAK,CA4B9C;AAED;;GAEG;AACH,wBAAsB,WAAW,CAAC,gBAAgB,CAAC,EAAE,MAAM,GAAG,KAAK,GAAG,OAAO,CAAC,UAAU,CAAC,CA8CxF;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,MAAM,GAAG,KAAK,GAAG,IAAI,CAEtD;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,IAAI,CAGnC;AAGD,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,GAAG,EAAE,OAAO,GAAG,SAAS,CAAC;IAE7B,IAAI,IAAI,EAAE,OAAO,GAAG,SAAS,CAAC;IAE9B,IAAI,MAAM,EAAE,OAAO,GAAG,SAAS,CAAC;CACjC"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * ONNX Runtime Abstraction
3
+ * Allows switching between onnxruntime-node and onnxruntime-web
4
+ */
5
+ /**
6
+ * Runtime detection and loading
7
+ */
8
+ let _runtime = null;
9
+ let _runtimeType = null;
10
+ /**
11
+ * Detects the best ONNX runtime for the current environment
12
+ */
13
+ export function detectRuntime() {
14
+ // Check if we're in Bun
15
+ const isBun = typeof globalThis.Bun !== 'undefined';
16
+ // Check if we're in a browser-like environment
17
+ // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
18
+ const isBrowser = typeof globalThis.window !== 'undefined';
19
+ // Check if we're in Deno
20
+ const isDeno = typeof globalThis.Deno !== 'undefined';
21
+ if (isBrowser || isDeno) {
22
+ return 'web';
23
+ }
24
+ // For Bun, try node first, fall back to web
25
+ if (isBun) {
26
+ try {
27
+ // Quick check if onnxruntime-node is loadable
28
+ require.resolve('onnxruntime-node');
29
+ return 'node';
30
+ }
31
+ catch {
32
+ return 'web';
33
+ }
34
+ }
35
+ // Default to node for Node.js
36
+ return 'node';
37
+ }
38
+ /**
39
+ * Loads the appropriate ONNX runtime
40
+ */
41
+ export async function loadRuntime(preferredRuntime) {
42
+ if (_runtime !== null) {
43
+ return _runtime;
44
+ }
45
+ const runtimeType = preferredRuntime ?? detectRuntime();
46
+ try {
47
+ if (runtimeType === 'node') {
48
+ // Dynamic import for onnxruntime-node
49
+ const ort = await import('onnxruntime-node');
50
+ _runtime = ort;
51
+ _runtimeType = 'node';
52
+ }
53
+ else {
54
+ // Dynamic import for onnxruntime-web
55
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
56
+ // @ts-ignore - onnxruntime-web may not be installed
57
+ const ort = await import('onnxruntime-web');
58
+ _runtime = ort;
59
+ _runtimeType = 'web';
60
+ }
61
+ }
62
+ catch (e) {
63
+ // If preferred runtime fails, try the other
64
+ const fallbackType = runtimeType === 'node' ? 'web' : 'node';
65
+ try {
66
+ if (fallbackType === 'node') {
67
+ const ort = await import('onnxruntime-node');
68
+ _runtime = ort;
69
+ _runtimeType = 'node';
70
+ }
71
+ else {
72
+ // eslint-disable-next-line @typescript-eslint/ban-ts-comment
73
+ // @ts-ignore - onnxruntime-web may not be installed
74
+ const ort = await import('onnxruntime-web');
75
+ _runtime = ort;
76
+ _runtimeType = 'web';
77
+ }
78
+ }
79
+ catch {
80
+ throw new Error(`Failed to load ONNX runtime. Install either 'onnxruntime-node' or 'onnxruntime-web'.\n` +
81
+ `Original error: ${e}`);
82
+ }
83
+ }
84
+ return _runtime;
85
+ }
86
+ /**
87
+ * Gets the currently loaded runtime type
88
+ */
89
+ export function getRuntimeType() {
90
+ return _runtimeType;
91
+ }
92
+ /**
93
+ * Resets the runtime (useful for testing)
94
+ */
95
+ export function resetRuntime() {
96
+ _runtime = null;
97
+ _runtimeType = null;
98
+ }
99
+ //# sourceMappingURL=onnx-runtime.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-runtime.js","sourceRoot":"","sources":["../../src/ner/onnx-runtime.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA+BH;;GAEG;AACH,IAAI,QAAQ,GAAsB,IAAI,CAAC;AACvC,IAAI,YAAY,GAA0B,IAAI,CAAC;AAE/C;;GAEG;AACH,MAAM,UAAU,aAAa;IAC3B,wBAAwB;IACxB,MAAM,KAAK,GAAG,OAAO,UAAU,CAAC,GAAG,KAAK,WAAW,CAAC;IAEpD,+CAA+C;IAC/C,uEAAuE;IACvE,MAAM,SAAS,GAAG,OAAO,UAAU,CAAC,MAAM,KAAK,WAAW,CAAC;IAE3D,yBAAyB;IACzB,MAAM,MAAM,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,WAAW,CAAC;IAEtD,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,4CAA4C;IAC5C,IAAI,KAAK,EAAE,CAAC;QACV,IAAI,CAAC;YACH,8CAA8C;YAC9C,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;YACpC,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,gBAAiC;IACjE,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACtB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,gBAAgB,IAAI,aAAa,EAAE,CAAC;IAExD,IAAI,CAAC;QACH,IAAI,WAAW,KAAK,MAAM,EAAE,CAAC;YAC3B,sCAAsC;YACtC,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;YAC7C,QAAQ,GAAG,GAA4B,CAAC;YACxC,YAAY,GAAG,MAAM,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,qCAAqC;YACrC,6DAA6D;YAC7D,oDAAoD;YACpD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;YAC5C,QAAQ,GAAG,GAA4B,CAAC;YACxC,YAAY,GAAG,KAAK,CAAC;QACvB,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,4CAA4C;QAC5C,MAAM,YAAY,GAAG,WAAW,KAAK,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;QAE7D,IAAI,CAAC;YACH,IAAI,YAAY,KAAK,MAAM,EAAE,CAAC;gBAC5B,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;gBAC7C,QAAQ,GAAG,GAA4B,CAAC;gBACxC,YAAY,GAAG,MAAM,CAAC;YACxB,CAAC;iBAAM,CAAC;gBACN,6DAA6D;gBAC7D,oDAAoD;gBACpD,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;gBAC5C,QAAQ,GAAG,GAA4B,CAAC;gBACxC,YAAY,GAAG,KAAK,CAAC;YACvB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,wFAAwF;gBACxF,mBAAmB,CAAC,EAAE,CACvB,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,QAAQ,GAAG,IAAI,CAAC;IAChB,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC"}
@@ -0,0 +1,140 @@
1
+ /**
2
+ * WordPiece Tokenizer
3
+ * Tokenizes text into subword tokens while maintaining character offset mapping
4
+ * Compatible with BERT-style models
5
+ */
6
+ /**
7
+ * Token with offset information
8
+ */
9
+ export interface Token {
10
+ /** Token ID in vocabulary */
11
+ id: number;
12
+ /** Token string */
13
+ token: string;
14
+ /** Start character offset in original text */
15
+ start: number;
16
+ /** End character offset in original text */
17
+ end: number;
18
+ /** Whether this is a continuation token (starts with ##) */
19
+ isContinuation: boolean;
20
+ /** Whether this is a special token ([CLS], [SEP], etc.) */
21
+ isSpecial: boolean;
22
+ }
23
+ /**
24
+ * Tokenization result with metadata
25
+ */
26
+ export interface TokenizationResult {
27
+ /** Array of tokens */
28
+ tokens: Token[];
29
+ /** Input IDs for model */
30
+ inputIds: number[];
31
+ /** Attention mask */
32
+ attentionMask: number[];
33
+ /** Token type IDs (for BERT-style models) */
34
+ tokenTypeIds: number[];
35
+ /** Mapping from token index to character span [start, end] */
36
+ tokenToCharSpan: Array<[number, number] | null>;
37
+ }
38
+ /**
39
+ * Tokenizer configuration
40
+ */
41
+ export interface TokenizerConfig {
42
+ /** Path to vocabulary file */
43
+ vocabPath?: string;
44
+ /** Vocabulary as a Map */
45
+ vocab?: Map<string, number>;
46
+ /** Maximum sequence length */
47
+ maxLength: number;
48
+ /** Unknown token */
49
+ unkToken: string;
50
+ /** Classification token */
51
+ clsToken: string;
52
+ /** Separator token */
53
+ sepToken: string;
54
+ /** Padding token */
55
+ padToken: string;
56
+ /** Mask token */
57
+ maskToken: string;
58
+ /** Whether to lowercase input */
59
+ doLowerCase: boolean;
60
+ /** Strip accents */
61
+ stripAccents: boolean;
62
+ }
63
+ /**
64
+ * Default tokenizer configuration for BERT-style models
65
+ */
66
+ export declare const DEFAULT_TOKENIZER_CONFIG: TokenizerConfig;
67
+ /**
68
+ * WordPiece Tokenizer implementation
69
+ */
70
+ export declare class WordPieceTokenizer {
71
+ private vocab;
72
+ private inverseVocab;
73
+ private config;
74
+ private unkId;
75
+ private clsId;
76
+ private sepId;
77
+ private padId;
78
+ constructor(vocab: Map<string, number>, config?: Partial<TokenizerConfig>);
79
+ /**
80
+ * Tokenizes text into tokens with offset tracking
81
+ */
82
+ tokenize(text: string): TokenizationResult;
83
+ /**
84
+ * Preprocesses text (lowercase, accent stripping)
85
+ */
86
+ private preprocess;
87
+ /**
88
+ * Strips accents from text
89
+ */
90
+ private stripAccents;
91
+ /**
92
+ * Splits text into words while tracking character offsets
93
+ */
94
+ private splitIntoWords;
95
+ /**
96
+ * Tokenizes a single word using WordPiece algorithm
97
+ */
98
+ private tokenizeWord;
99
+ /**
100
+ * Splits a word into pieces, handling punctuation
101
+ */
102
+ private splitWordIntoPieces;
103
+ /**
104
+ * Checks if a character is punctuation
105
+ */
106
+ private isPunctuation;
107
+ /**
108
+ * Finds the longest matching token in vocabulary
109
+ */
110
+ private findLongestMatch;
111
+ /**
112
+ * Decodes token IDs back to text
113
+ */
114
+ decode(tokenIds: number[]): string;
115
+ /**
116
+ * Gets vocabulary size
117
+ */
118
+ get vocabSize(): number;
119
+ /**
120
+ * Gets a token ID by string
121
+ */
122
+ getTokenId(token: string): number | undefined;
123
+ /**
124
+ * Gets a token string by ID
125
+ */
126
+ getToken(id: number): string | undefined;
127
+ }
128
+ /**
129
+ * Loads vocabulary from a text file (one token per line)
130
+ */
131
+ export declare function loadVocabFromFile(path: string): Promise<Map<string, number>>;
132
+ /**
133
+ * Parses vocabulary from string content
134
+ */
135
+ export declare function parseVocab(content: string): Map<string, number>;
136
+ /**
137
+ * Creates a minimal vocabulary for testing
138
+ */
139
+ export declare function createTestVocab(): Map<string, number>;
140
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/ner/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,6BAA6B;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,mBAAmB;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,GAAG,EAAE,MAAM,CAAC;IACZ,4DAA4D;IAC5D,cAAc,EAAE,OAAO,CAAC;IACxB,2DAA2D;IAC3D,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,sBAAsB;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,qBAAqB;IACrB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,6CAA6C;IAC7C,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,8DAA8D;IAC9D,eAAe,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;CACjD;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,8BAA8B;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0BAA0B;IAC1B,KAAK,CAAC,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC5B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,2BAA2B;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,sBAAsB;IACtB,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,iBAAiB;IACjB,SAAS,EAAE,MAAM,CAAC;IAClB,iCAAiC;IACjC,WAAW,EAAE,OAAO,CAAC;IACrB,oBAAoB;IACpB,YAAY,EAAE,OAAO,CAAC;CACvB;AAED;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eAStC,CAAC;AAEF;;GAEG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,YAAY,CAAsB;IAC1C,OAAO,CAAC,MAAM,CAAkB;IAGhC,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;IACtB,OAAO,CAAC,KAAK,CAAS;gBAEV,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAiB7E;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB;IAwE1C;;OAEG;IACH,OAAO,CAAC,UAAU;IAclB;;OAEG;IACH,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACH,OAAO,CAAC,cAAc;IA0BtB;;OAEG;IACH,OAAO,CAAC,YAAY;IA2CpB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAuB3B;;OAEG;IACH,OAAO,CAAC,aAAa;IAarB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAqBxB;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM;IA8BlC;;OAEG;IACH,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAI7C;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;CAGzC;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAIlF;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAY/D;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CA8BrD"}