rehydra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +615 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +114 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +228 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +180 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +384 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +111 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +325 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +253 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +46 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +130 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +118 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +332 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +12 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +12 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +239 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/semantic-data-loader.d.ts +165 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +655 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +112 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +318 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +114 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +374 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +197 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +80 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +10 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/dist/utils/path.d.ts +34 -0
- package/dist/utils/path.d.ts.map +1 -0
- package/dist/utils/path.js +96 -0
- package/dist/utils/path.js.map +1 -0
- package/dist/utils/storage-browser.d.ts +51 -0
- package/dist/utils/storage-browser.d.ts.map +1 -0
- package/dist/utils/storage-browser.js +381 -0
- package/dist/utils/storage-browser.js.map +1 -0
- package/dist/utils/storage-node.d.ts +43 -0
- package/dist/utils/storage-node.d.ts.map +1 -0
- package/dist/utils/storage-node.js +93 -0
- package/dist/utils/storage-node.js.map +1 -0
- package/dist/utils/storage.d.ts +70 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +69 -0
- package/dist/utils/storage.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NER Model Wrapper
|
|
3
|
+
* ONNX Runtime integration for Named Entity Recognition
|
|
4
|
+
* Supports both onnxruntime-node and onnxruntime-web
|
|
5
|
+
*/
|
|
6
|
+
import { loadRuntime } from "./onnx-runtime.js";
|
|
7
|
+
import { WordPieceTokenizer, loadVocabFromFile, } from "./tokenizer.js";
|
|
8
|
+
import { decodeBIOTags, convertToSpanMatches, cleanupSpanBoundaries, mergeAdjacentSpans, } from "./bio-decoder.js";
|
|
9
|
+
import { getStorageProvider, isBrowser } from "../utils/storage.js";
|
|
10
|
+
/**
|
|
11
|
+
* Default label map for common NER models (CoNLL-style)
|
|
12
|
+
*/
|
|
13
|
+
export const DEFAULT_LABEL_MAP = [
|
|
14
|
+
"O",
|
|
15
|
+
"B-PER",
|
|
16
|
+
"I-PER",
|
|
17
|
+
"B-ORG",
|
|
18
|
+
"I-ORG",
|
|
19
|
+
"B-LOC",
|
|
20
|
+
"I-LOC",
|
|
21
|
+
"B-MISC",
|
|
22
|
+
"I-MISC",
|
|
23
|
+
];
|
|
24
|
+
/**
|
|
25
|
+
* NER Model wrapper for ONNX inference
|
|
26
|
+
*/
|
|
27
|
+
export class NERModel {
|
|
28
|
+
ort = null;
|
|
29
|
+
session = null;
|
|
30
|
+
tokenizer = null;
|
|
31
|
+
config;
|
|
32
|
+
isLoaded = false;
|
|
33
|
+
constructor(config) {
|
|
34
|
+
this.config = config;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Loads the model and tokenizer
|
|
38
|
+
*/
|
|
39
|
+
async load() {
|
|
40
|
+
if (this.isLoaded)
|
|
41
|
+
return;
|
|
42
|
+
// Load ONNX runtime (auto-detects best runtime for environment)
|
|
43
|
+
this.ort = await loadRuntime();
|
|
44
|
+
// Load ONNX model
|
|
45
|
+
// In browsers, we need to load the model as ArrayBuffer since file paths don't work
|
|
46
|
+
// onnxruntime-web accepts ArrayBuffer/Uint8Array, while onnxruntime-node accepts file paths
|
|
47
|
+
if (isBrowser()) {
|
|
48
|
+
const storage = await getStorageProvider();
|
|
49
|
+
const modelData = await storage.readFile(this.config.modelPath);
|
|
50
|
+
// onnxruntime-web accepts Uint8Array directly
|
|
51
|
+
this.session = await this.ort.InferenceSession.create(modelData);
|
|
52
|
+
}
|
|
53
|
+
else {
|
|
54
|
+
// In Node.js, we can use the file path directly
|
|
55
|
+
this.session = await this.ort.InferenceSession.create(this.config.modelPath);
|
|
56
|
+
}
|
|
57
|
+
// Load tokenizer vocabulary (already uses storage abstraction internally)
|
|
58
|
+
const vocab = await loadVocabFromFile(this.config.vocabPath);
|
|
59
|
+
this.tokenizer = new WordPieceTokenizer(vocab, {
|
|
60
|
+
maxLength: this.config.maxLength,
|
|
61
|
+
doLowerCase: this.config.doLowerCase,
|
|
62
|
+
});
|
|
63
|
+
this.isLoaded = true;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Predicts entities in text
|
|
67
|
+
*/
|
|
68
|
+
async predict(text, policy) {
|
|
69
|
+
const startTime = performance.now();
|
|
70
|
+
if (!this.isLoaded || this.session === null || this.tokenizer === null) {
|
|
71
|
+
throw new Error("Model not loaded. Call load() first.");
|
|
72
|
+
}
|
|
73
|
+
// Tokenize input
|
|
74
|
+
const tokenization = this.tokenizer.tokenize(text);
|
|
75
|
+
// Run inference
|
|
76
|
+
const { labels, confidences } = await this.runInference(tokenization);
|
|
77
|
+
// Decode BIO tags to entities
|
|
78
|
+
const rawEntities = decodeBIOTags(tokenization.tokens, labels, confidences, text);
|
|
79
|
+
// Convert to SpanMatch format with confidence filtering
|
|
80
|
+
const minConfidence = this.getMinConfidence(policy);
|
|
81
|
+
let spans = convertToSpanMatches(rawEntities, minConfidence);
|
|
82
|
+
// Post-process spans
|
|
83
|
+
spans = cleanupSpanBoundaries(spans, text);
|
|
84
|
+
spans = mergeAdjacentSpans(spans, text);
|
|
85
|
+
// Filter by enabled types in policy
|
|
86
|
+
if (policy !== undefined) {
|
|
87
|
+
spans = spans.filter((span) => policy.enabledTypes.has(span.type) &&
|
|
88
|
+
policy.nerEnabledTypes.has(span.type));
|
|
89
|
+
}
|
|
90
|
+
const endTime = performance.now();
|
|
91
|
+
return {
|
|
92
|
+
spans,
|
|
93
|
+
processingTimeMs: endTime - startTime,
|
|
94
|
+
modelVersion: this.config.modelVersion,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Runs ONNX inference
|
|
99
|
+
*/
|
|
100
|
+
async runInference(tokenization) {
|
|
101
|
+
if (this.session === null || this.ort === null) {
|
|
102
|
+
throw new Error("Session not initialized");
|
|
103
|
+
}
|
|
104
|
+
const session = this.session;
|
|
105
|
+
const seqLength = tokenization.inputIds.length;
|
|
106
|
+
// Create tensors
|
|
107
|
+
const inputIdsTensor = new this.ort.Tensor("int64", BigInt64Array.from(tokenization.inputIds.map(BigInt)), [1, seqLength]);
|
|
108
|
+
const attentionMaskTensor = new this.ort.Tensor("int64", BigInt64Array.from(tokenization.attentionMask.map(BigInt)), [1, seqLength]);
|
|
109
|
+
const tokenTypeIdsTensor = new this.ort.Tensor("int64", BigInt64Array.from(tokenization.tokenTypeIds.map(BigInt)), [1, seqLength]);
|
|
110
|
+
// Run inference
|
|
111
|
+
const feeds = {
|
|
112
|
+
input_ids: inputIdsTensor,
|
|
113
|
+
attention_mask: attentionMaskTensor,
|
|
114
|
+
};
|
|
115
|
+
// Some models also need token_type_ids
|
|
116
|
+
const inputNames = session.inputNames;
|
|
117
|
+
if (inputNames.includes("token_type_ids")) {
|
|
118
|
+
feeds["token_type_ids"] = tokenTypeIdsTensor;
|
|
119
|
+
}
|
|
120
|
+
const results = await session.run(feeds);
|
|
121
|
+
// Get logits output
|
|
122
|
+
const outputName = session.outputNames[0];
|
|
123
|
+
if (outputName === undefined) {
|
|
124
|
+
throw new Error("No output from model");
|
|
125
|
+
}
|
|
126
|
+
const logits = results[outputName];
|
|
127
|
+
if (logits === undefined) {
|
|
128
|
+
throw new Error("Logits output not found");
|
|
129
|
+
}
|
|
130
|
+
// Process logits to get labels and confidences
|
|
131
|
+
return this.processLogits(logits, seqLength);
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Processes model logits to extract labels and confidences
|
|
135
|
+
*/
|
|
136
|
+
processLogits(logits, seqLength) {
|
|
137
|
+
const data = logits.data;
|
|
138
|
+
const numLabels = this.config.labelMap.length;
|
|
139
|
+
const labels = [];
|
|
140
|
+
const confidences = [];
|
|
141
|
+
for (let i = 0; i < seqLength; i++) {
|
|
142
|
+
// Get logits for this token
|
|
143
|
+
const tokenLogits = [];
|
|
144
|
+
for (let j = 0; j < numLabels; j++) {
|
|
145
|
+
tokenLogits.push(data[i * numLabels + j] ?? 0);
|
|
146
|
+
}
|
|
147
|
+
// Apply softmax
|
|
148
|
+
const probs = softmax(tokenLogits);
|
|
149
|
+
// Get argmax
|
|
150
|
+
let maxIdx = 0;
|
|
151
|
+
let maxProb = probs[0] ?? 0;
|
|
152
|
+
for (let j = 1; j < probs.length; j++) {
|
|
153
|
+
if ((probs[j] ?? 0) > maxProb) {
|
|
154
|
+
maxProb = probs[j] ?? 0;
|
|
155
|
+
maxIdx = j;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
labels.push(this.config.labelMap[maxIdx] ?? "O");
|
|
159
|
+
confidences.push(maxProb);
|
|
160
|
+
}
|
|
161
|
+
return { labels, confidences };
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Gets minimum confidence threshold from policy
|
|
165
|
+
*/
|
|
166
|
+
getMinConfidence(policy) {
|
|
167
|
+
if (policy === undefined)
|
|
168
|
+
return 0.5;
|
|
169
|
+
// Get minimum from all NER-enabled types
|
|
170
|
+
let minThreshold = 1.0;
|
|
171
|
+
for (const type of policy.nerEnabledTypes) {
|
|
172
|
+
const threshold = policy.confidenceThresholds.get(type) ?? 0.5;
|
|
173
|
+
if (threshold < minThreshold) {
|
|
174
|
+
minThreshold = threshold;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return minThreshold;
|
|
178
|
+
}
|
|
179
|
+
/**
|
|
180
|
+
* Gets model version
|
|
181
|
+
*/
|
|
182
|
+
get version() {
|
|
183
|
+
return this.config.modelVersion;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Checks if model is loaded
|
|
187
|
+
*/
|
|
188
|
+
get loaded() {
|
|
189
|
+
return this.isLoaded;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Disposes of model resources
|
|
193
|
+
*/
|
|
194
|
+
dispose() {
|
|
195
|
+
// ONNX Runtime Node doesn't have explicit dispose, but we can clear references
|
|
196
|
+
this.session = null;
|
|
197
|
+
this.tokenizer = null;
|
|
198
|
+
this.isLoaded = false;
|
|
199
|
+
return Promise.resolve();
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Softmax function for probability calculation
|
|
204
|
+
*/
|
|
205
|
+
function softmax(logits) {
|
|
206
|
+
const maxLogit = Math.max(...logits);
|
|
207
|
+
const expLogits = logits.map((x) => Math.exp(x - maxLogit));
|
|
208
|
+
const sumExp = expLogits.reduce((a, b) => a + b, 0);
|
|
209
|
+
return expLogits.map((x) => x / sumExp);
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Creates a NER model instance with configuration
|
|
213
|
+
*/
|
|
214
|
+
export function createNERModel(config) {
|
|
215
|
+
const fullConfig = {
|
|
216
|
+
modelPath: config.modelPath,
|
|
217
|
+
vocabPath: config.vocabPath,
|
|
218
|
+
labelMap: config.labelMap ?? DEFAULT_LABEL_MAP,
|
|
219
|
+
maxLength: config.maxLength ?? 512,
|
|
220
|
+
doLowerCase: config.doLowerCase ?? false, // XLM-RoBERTa is cased
|
|
221
|
+
modelVersion: config.modelVersion ?? "1.0.0",
|
|
222
|
+
};
|
|
223
|
+
return new NERModel(fullConfig);
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* NER Model stub for when no model is available
|
|
227
|
+
* Returns empty results - useful for regex-only mode
|
|
228
|
+
*/
|
|
229
|
+
export class NERModelStub {
|
|
230
|
+
version = "stub-1.0.0";
|
|
231
|
+
loaded = true;
|
|
232
|
+
async load() {
|
|
233
|
+
// No-op
|
|
234
|
+
}
|
|
235
|
+
predict(_text, _policy) {
|
|
236
|
+
return Promise.resolve({
|
|
237
|
+
spans: [],
|
|
238
|
+
processingTimeMs: 0,
|
|
239
|
+
modelVersion: this.version,
|
|
240
|
+
});
|
|
241
|
+
}
|
|
242
|
+
dispose() {
|
|
243
|
+
// No-op
|
|
244
|
+
return Promise.resolve();
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
/**
|
|
248
|
+
* Creates a stub NER model (for testing or regex-only mode)
|
|
249
|
+
*/
|
|
250
|
+
export function createNERModelStub() {
|
|
251
|
+
return new NERModelStub();
|
|
252
|
+
}
|
|
253
|
+
//# sourceMappingURL=ner-model.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ner-model.js","sourceRoot":"","sources":["../../src/ner/ner-model.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,WAAW,EAAmB,MAAM,mBAAmB,CAAC;AAEjE,OAAO,EACL,kBAAkB,EAClB,iBAAiB,GAElB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,aAAa,EACb,oBAAoB,EACpB,qBAAqB,EACrB,kBAAkB,GACnB,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,kBAAkB,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAgCpE;;GAEG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,GAAG;IACH,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,OAAO;IACP,QAAQ;IACR,QAAQ;CACT,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,QAAQ;IACX,GAAG,GAAsB,IAAI,CAAC;IAC9B,OAAO,GAAY,IAAI,CAAC;IACxB,SAAS,GAA8B,IAAI,CAAC;IAC5C,MAAM,CAAiB;IACvB,QAAQ,GAAG,KAAK,CAAC;IAEzB,YAAY,MAAsB;QAChC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI;QACR,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,gEAAgE;QAChE,IAAI,CAAC,GAAG,GAAG,MAAM,WAAW,EAAE,CAAC;QAE/B,kBAAkB;QAClB,oFAAoF;QACpF,4FAA4F;QAC5F,IAAI,SAAS,EAAE,EAAE,CAAC;YAChB,MAAM,OAAO,GAAG,MAAM,kBAAkB,EAAE,CAAC;YAC3C,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAChE,8CAA8C;YAC9C,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACnE,CAAC;aAAM,CAAC;YACN,gDAAgD;YAChD,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,MAAM,CACnD,IAAI,CAAC,MAAM,CAAC,SAAS,CACtB,CAAC;QACJ,CAAC;QAED,0EAA0E;QAC1E,MAAM,KAAK,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QAC7D,IAAI,CAAC,SAAS,GAAG,IAAI,kBAAkB,CAAC,KAAK,EAAE;YAC7C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,IAAY,EACZ,MAA4B;QAE5B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAEpC,IAAI,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,EAAE,CAAC;YACvE,MAAM,IAAI,KAAK,CAAC,sCAAsC,CAAC,CAAC;QAC1D,CAAC;QAED,iBAAiB;QACjB,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAEnD,gBAAgB;QAChB,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC;QAEtE,8BAA8B;QAC9B,MAAM,WAAW,GAAG,aAAa,CAC/B,YAAY,CAAC,MAAM,EACnB,MAAM,EACN,WAAW,EACX,IAAI,CACL,CAAC;QAEF,wDAAwD;QACxD,MAAM,aAAa,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,KAAK,GAAG,oBAAoB,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC;QAE7D,qBAAqB;QACrB,KAAK,GAAG,qBAAqB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAC3C,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAExC,oCAAoC;QACpC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,KAAK,GAAG,KAAK,CAAC,MAAM,CAClB,CAAC,IAAI,EAAE,EAAE,CACP,MAAM,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC;gBAClC,MAAM,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CACxC,CAAC;QACJ,CAAC;QAED,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;QAElC,OAAO;YACL,KAAK;YACL,gBAAgB,EAAE,OAAO,GAAG,SAAS;YACrC,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CACxB,YAAgC;QAEhC,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,IAAI,EAAE,CAAC;YAC/C,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,OAMpB,CAAC;QAEF,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC;QAE/C,iBAAiB;QACjB,MAAM,cAAc,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CACxC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACrD,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,MAAM,mBAAmB,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CAC7C,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC1D,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,MAAM,kBAAkB,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,CAC5C,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,YAAY,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EACzD,CAAC,CAAC,EAAE,SAAS,CAAC,CACf,CAAC;QAEF,gBAAgB;QAChB,MAAM,KAAK,GAA4B;YACrC,SAAS,EAAE,cAAc;YACzB,cAAc,EAAE,mBAAmB;SACpC,CAAC;QAEF,uCAAuC;QACvC,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC;QACtC,IAAI,UAAU,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;YAC1C,KAAK,CAAC,gBAAgB,CAAC,GAAG,kBAAkB,CAAC;QAC/C,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEzC,oBAAoB;QACpB,MAAM,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;QAC1C,IAAI,UAAU,KAAK,SAAS,EAAE,CAAC;YAC7B,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;QAC1C,CAAC;QAED,MAAM,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;QACnC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CAAC,yBAAyB,CAAC,CAAC;QAC7C,CAAC;QAED,+CAA+C;QAC/C,OAAO,IAAI,CAAC,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,MAA8B,EAC9B,SAAiB;QAEjB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC;QACzB,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;QAE9C,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,4BAA4B;YAC5B,MAAM,WAAW,GAAa,EAAE,CAAC;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,SAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;YACjD,CAAC;YAED,gBAAgB;YAChB,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;YAEnC,aAAa;YACb,IAAI,MAAM,GAAG,CAAC,CAAC;YACf,IAAI,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,OAAO,EAAE,CAAC;oBAC9B,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBACxB,MAAM,GAAG,CAAC,CAAC;gBACb,CAAC;YACH,CAAC;YAED,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;YACjD,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC5B,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,CAAC;IACjC,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,MAA4B;QACnD,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,GAAG,CAAC;QAErC,yCAAyC;QACzC,IAAI,YAAY,GAAG,GAAG,CAAC;QACvB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,eAAe,EAAE,CAAC;YAC1C,MAAM,SAAS,GAAG,MAAM,CAAC,oBAAoB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC;YAC/D,IAAI,SAAS,GAAG,YAAY,EAAE,CAAC;gBAC7B,YAAY,GAAG,SAAS,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED;;OAEG;IACH,IAAI,OAAO;QACT,OAAO,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC;IAClC,CAAC;IAED;;OAEG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,QAAQ,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,OAAO;QACL,+EAA+E;QAC/E,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QACtB,IAAI,CAAC,QAAQ,GAAG,KAAK,CAAC;QACtB,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;IAC3B,CAAC;CACF;AAED;;GAEG;AACH,SAAS,OAAO,CAAC,MAAgB;IAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;IACrC,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC;IACpD,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAC5B,MAA0E;IAE1E,MAAM,UAAU,GAAmB;QACjC,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ,EAAE,MAAM,CAAC,QAAQ,IAAI,iBAAiB;QAC9C,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;QAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,KAAK,EAAE,uBAAuB;QACjE,YAAY,EAAE,MAAM,CAAC,YAAY,IAAI,OAAO;KAC7C,CAAC;IAEF,OAAO,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC;AAClC,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,YAAY;IACd,OAAO,GAAG,YAAY,CAAC;IACvB,MAAM,GAAG,IAAI,CAAC;IAEvB,KAAK,CAAC,IAAI;QACR,QAAQ;IACV,CAAC;IAED,OAAO,CACL,KAAa,EACb,OAA6B;QAE7B,OAAO,OAAO,CAAC,OAAO,CAAC;YACrB,KAAK,EAAE,EAAE;YACT,gBAAgB,EAAE,CAAC;YACnB,YAAY,EAAE,IAAI,CAAC,OAAO;SAC3B,CAAC,CAAC;IACL,CAAC;IAED,OAAO;QACL,QAAQ;QACR,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;IAC3B,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,IAAI,YAAY,EAAE,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ONNX Runtime Abstraction
|
|
3
|
+
* Allows switching between onnxruntime-node and onnxruntime-web
|
|
4
|
+
*
|
|
5
|
+
* In browsers without a bundler, automatically loads onnxruntime-web from CDN
|
|
6
|
+
*/
|
|
7
|
+
export interface OrtTensor {
|
|
8
|
+
data: Float32Array | BigInt64Array | Int32Array;
|
|
9
|
+
dims: readonly number[];
|
|
10
|
+
}
|
|
11
|
+
export interface OrtSession {
|
|
12
|
+
inputNames: readonly string[];
|
|
13
|
+
outputNames: readonly string[];
|
|
14
|
+
run(feeds: Record<string, OrtTensor>): Promise<Record<string, OrtTensor>>;
|
|
15
|
+
}
|
|
16
|
+
export interface OrtInferenceSession {
|
|
17
|
+
create(pathOrBuffer: string | ArrayBuffer | Uint8Array, options?: unknown): Promise<OrtSession>;
|
|
18
|
+
}
|
|
19
|
+
export interface OrtTensorConstructor {
|
|
20
|
+
new (type: string, data: Float32Array | BigInt64Array | Int32Array | number[] | bigint[], dims: number[]): OrtTensor;
|
|
21
|
+
}
|
|
22
|
+
export interface OrtRuntime {
|
|
23
|
+
InferenceSession: OrtInferenceSession;
|
|
24
|
+
Tensor: OrtTensorConstructor;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Detects the best ONNX runtime for the current environment
|
|
28
|
+
*/
|
|
29
|
+
export declare function detectRuntime(): "node" | "web";
|
|
30
|
+
/**
|
|
31
|
+
* Loads the appropriate ONNX runtime
|
|
32
|
+
*/
|
|
33
|
+
export declare function loadRuntime(preferredRuntime?: "node" | "web"): Promise<OrtRuntime>;
|
|
34
|
+
/**
|
|
35
|
+
* Gets the currently loaded runtime type
|
|
36
|
+
*/
|
|
37
|
+
export declare function getRuntimeType(): "node" | "web" | null;
|
|
38
|
+
/**
|
|
39
|
+
* Resets the runtime (useful for testing)
|
|
40
|
+
*/
|
|
41
|
+
export declare function resetRuntime(): void;
|
|
42
|
+
declare global {
|
|
43
|
+
var Bun: unknown;
|
|
44
|
+
var Deno: unknown;
|
|
45
|
+
}
|
|
46
|
+
//# sourceMappingURL=onnx-runtime.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"onnx-runtime.d.ts","sourceRoot":"","sources":["../../src/ner/onnx-runtime.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAQH,MAAM,WAAW,SAAS;IACxB,IAAI,EAAE,YAAY,GAAG,aAAa,GAAG,UAAU,CAAC;IAChD,IAAI,EAAE,SAAS,MAAM,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,EAAE,SAAS,MAAM,EAAE,CAAC;IAC9B,WAAW,EAAE,SAAS,MAAM,EAAE,CAAC;IAC/B,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;CAC3E;AAED,MAAM,WAAW,mBAAmB;IAClC,MAAM,CACJ,YAAY,EAAE,MAAM,GAAG,WAAW,GAAG,UAAU,EAC/C,OAAO,CAAC,EAAE,OAAO,GAChB,OAAO,CAAC,UAAU,CAAC,CAAC;CACxB;AAED,MAAM,WAAW,oBAAoB;IACnC,KACE,IAAI,EAAE,MAAM,EACZ,IAAI,EAAE,YAAY,GAAG,aAAa,GAAG,UAAU,GAAG,MAAM,EAAE,GAAG,MAAM,EAAE,EACrE,IAAI,EAAE,MAAM,EAAE,GACb,SAAS,CAAC;CACd;AAED,MAAM,WAAW,UAAU;IACzB,gBAAgB,EAAE,mBAAmB,CAAC;IACtC,MAAM,EAAE,oBAAoB,CAAC;CAC9B;AAQD;;GAEG;AACH,wBAAgB,aAAa,IAAI,MAAM,GAAG,KAAK,CA4B9C;AAiCD;;GAEG;AACH,wBAAsB,WAAW,CAC/B,gBAAgB,CAAC,EAAE,MAAM,GAAG,KAAK,GAChC,OAAO,CAAC,UAAU,CAAC,CA2CrB;AAED;;GAEG;AACH,wBAAgB,cAAc,IAAI,MAAM,GAAG,KAAK,GAAG,IAAI,CAEtD;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,IAAI,CAGnC;AAGD,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,GAAG,EAAE,OAAO,CAAC;IAEjB,IAAI,IAAI,EAAE,OAAO,CAAC;CACnB"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ONNX Runtime Abstraction
|
|
3
|
+
* Allows switching between onnxruntime-node and onnxruntime-web
|
|
4
|
+
*
|
|
5
|
+
* In browsers without a bundler, automatically loads onnxruntime-web from CDN
|
|
6
|
+
*/
|
|
7
|
+
// CDN URL for onnxruntime-web (used when bare import fails in browser)
|
|
8
|
+
// Using the bundled ESM version that includes WebAssembly backend
|
|
9
|
+
const ONNX_WEB_CDN_URL = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.2/dist/ort.bundle.min.mjs";
|
|
10
|
+
/**
|
|
11
|
+
* Runtime detection and loading
|
|
12
|
+
*/
|
|
13
|
+
let _runtime = null;
|
|
14
|
+
let _runtimeType = null;
|
|
15
|
+
/**
|
|
16
|
+
* Detects the best ONNX runtime for the current environment
|
|
17
|
+
*/
|
|
18
|
+
export function detectRuntime() {
|
|
19
|
+
// Check if we're in Bun
|
|
20
|
+
const isBun = typeof globalThis.Bun !== "undefined";
|
|
21
|
+
// Check if we're in a browser-like environment
|
|
22
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
23
|
+
const isBrowser = typeof globalThis.window !== "undefined";
|
|
24
|
+
// Check if we're in Deno
|
|
25
|
+
const isDeno = typeof globalThis.Deno !== "undefined";
|
|
26
|
+
if (isBrowser || isDeno) {
|
|
27
|
+
return "web";
|
|
28
|
+
}
|
|
29
|
+
// For Bun, try node first, fall back to web
|
|
30
|
+
if (isBun) {
|
|
31
|
+
try {
|
|
32
|
+
// Quick check if onnxruntime-node is loadable
|
|
33
|
+
require.resolve("onnxruntime-node");
|
|
34
|
+
return "node";
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return "web";
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// Default to node for Node.js
|
|
41
|
+
return "node";
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Attempts to load onnxruntime-web, first via bare import, then via CDN
|
|
45
|
+
*/
|
|
46
|
+
async function loadOnnxWeb() {
|
|
47
|
+
// First try bare import (works with bundlers or import maps)
|
|
48
|
+
try {
|
|
49
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
50
|
+
// @ts-ignore - onnxruntime-web may not be installed
|
|
51
|
+
const ort = (await import("onnxruntime-web"));
|
|
52
|
+
return ort;
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// Bare import failed, try CDN (for browsers without bundlers)
|
|
56
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
57
|
+
const isBrowser = typeof globalThis.window !== "undefined";
|
|
58
|
+
if (isBrowser) {
|
|
59
|
+
try {
|
|
60
|
+
// Dynamic import from CDN URL
|
|
61
|
+
const ort = (await import(
|
|
62
|
+
/* webpackIgnore: true */ ONNX_WEB_CDN_URL));
|
|
63
|
+
return ort;
|
|
64
|
+
}
|
|
65
|
+
catch (cdnError) {
|
|
66
|
+
throw new Error(`Failed to load onnxruntime-web from CDN: ${String(cdnError)}`);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
throw new Error("onnxruntime-web is not available");
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Loads the appropriate ONNX runtime
|
|
74
|
+
*/
|
|
75
|
+
export async function loadRuntime(preferredRuntime) {
|
|
76
|
+
if (_runtime !== null) {
|
|
77
|
+
return _runtime;
|
|
78
|
+
}
|
|
79
|
+
const runtimeType = preferredRuntime ?? detectRuntime();
|
|
80
|
+
try {
|
|
81
|
+
if (runtimeType === "node") {
|
|
82
|
+
// Dynamic import for onnxruntime-node
|
|
83
|
+
const ort = (await import("onnxruntime-node"));
|
|
84
|
+
_runtime = ort;
|
|
85
|
+
_runtimeType = "node";
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
// Load onnxruntime-web (with CDN fallback for browsers)
|
|
89
|
+
const ort = await loadOnnxWeb();
|
|
90
|
+
_runtime = ort;
|
|
91
|
+
_runtimeType = "web";
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
// If preferred runtime fails, try the other
|
|
96
|
+
const fallbackType = runtimeType === "node" ? "web" : "node";
|
|
97
|
+
try {
|
|
98
|
+
if (fallbackType === "node") {
|
|
99
|
+
const ort = (await import("onnxruntime-node"));
|
|
100
|
+
_runtime = ort;
|
|
101
|
+
_runtimeType = "node";
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
// Load onnxruntime-web (with CDN fallback for browsers)
|
|
105
|
+
const ort = await loadOnnxWeb();
|
|
106
|
+
_runtime = ort;
|
|
107
|
+
_runtimeType = "web";
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
throw new Error(`Failed to load ONNX runtime. Install either 'onnxruntime-node' or 'onnxruntime-web'.\n` +
|
|
112
|
+
`Original error: ${String(e)}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return _runtime;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Gets the currently loaded runtime type
|
|
119
|
+
*/
|
|
120
|
+
export function getRuntimeType() {
|
|
121
|
+
return _runtimeType;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Resets the runtime (useful for testing)
|
|
125
|
+
*/
|
|
126
|
+
export function resetRuntime() {
|
|
127
|
+
_runtime = null;
|
|
128
|
+
_runtimeType = null;
|
|
129
|
+
}
|
|
130
|
+
//# sourceMappingURL=onnx-runtime.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"onnx-runtime.js","sourceRoot":"","sources":["../../src/ner/onnx-runtime.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,uEAAuE;AACvE,kEAAkE;AAClE,MAAM,gBAAgB,GACpB,6EAA6E,CAAC;AAkChF;;GAEG;AACH,IAAI,QAAQ,GAAsB,IAAI,CAAC;AACvC,IAAI,YAAY,GAA0B,IAAI,CAAC;AAE/C;;GAEG;AACH,MAAM,UAAU,aAAa;IAC3B,wBAAwB;IACxB,MAAM,KAAK,GAAG,OAAO,UAAU,CAAC,GAAG,KAAK,WAAW,CAAC;IAEpD,+CAA+C;IAC/C,uEAAuE;IACvE,MAAM,SAAS,GAAG,OAAO,UAAU,CAAC,MAAM,KAAK,WAAW,CAAC;IAE3D,yBAAyB;IACzB,MAAM,MAAM,GAAG,OAAO,UAAU,CAAC,IAAI,KAAK,WAAW,CAAC;IAEtD,IAAI,SAAS,IAAI,MAAM,EAAE,CAAC;QACxB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,4CAA4C;IAC5C,IAAI,KAAK,EAAE,CAAC;QACV,IAAI,CAAC;YACH,8CAA8C;YAC9C,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;YACpC,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,8BAA8B;IAC9B,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,WAAW;IACxB,6DAA6D;IAC7D,IAAI,CAAC;QACH,6DAA6D;QAC7D,oDAAoD;QACpD,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAe,CAAC;QAC5D,OAAO,GAAG,CAAC;IACb,CAAC;IAAC,MAAM,CAAC;QACP,8DAA8D;QAC9D,uEAAuE;QACvE,MAAM,SAAS,GAAG,OAAO,UAAU,CAAC,MAAM,KAAK,WAAW,CAAC;QAC3D,IAAI,SAAS,EAAE,CAAC;YACd,IAAI,CAAC;gBACH,8BAA8B;gBAC9B,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM;gBACvB,yBAAyB,CAAC,gBAAgB,CAC3C,CAAe,CAAC;gBACjB,OAAO,GAAG,CAAC;YACb,CAAC;YAAC,OAAO,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CACb,4CAA4C,MAAM,CAAC,QAAQ,CAAC,EAAE,CAC/D,CAAC;YACJ,CAAC;QACH,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,kCAAkC,CAAC,CAAC;IACtD,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,gBAAiC;IAEjC,IAAI,QAAQ,KAAK,IAAI,EAAE,CAAC;QACtB,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,gBAAgB,IAAI,aAAa,EAAE,CAAC;IAExD,IAAI,CAAC;QACH,IAAI,WAAW,KAAK,MAAM,EAAE,CAAC;YAC3B,sCAAsC;YACtC,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAe,CAAC;YAC7D,QAAQ,GAAG,GAAG,CAAC;YACf,YAAY,GAAG,MAAM,CAAC;QACxB,CAAC;aAAM,CAAC;YACN,wDAAwD;YACxD,MAAM,GAAG,GAAG,MAAM,WAAW,EAAE,CAAC;YAChC,QAAQ,GAAG,GAAG,CAAC;YACf,YAAY,GAAG,KAAK,CAAC;QACvB,CAAC;IACH,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,4CAA4C;QAC5C,MAAM,YAAY,GAAG,WAAW,KAAK,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC;QAE7D,IAAI,CAAC;YACH,IAAI,YAAY,KAAK,MAAM,EAAE,CAAC;gBAC5B,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAe,CAAC;gBAC7D,QAAQ,GAAG,GAAG,CAAC;gBACf,YAAY,GAAG,MAAM,CAAC;YACxB,CAAC;iBAAM,CAAC;gBACN,wDAAwD;gBACxD,MAAM,GAAG,GAAG,MAAM,WAAW,EAAE,CAAC;gBAChC,QAAQ,GAAG,GAAG,CAAC;gBACf,YAAY,GAAG,KAAK,CAAC;YACvB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CACb,wFAAwF;gBACtF,mBAAmB,MAAM,CAAC,CAAC,CAAC,EAAE,CACjC,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc;IAC5B,OAAO,YAAY,CAAC;AACtB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY;IAC1B,QAAQ,GAAG,IAAI,CAAC;IAChB,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* HuggingFace Tokenizer
|
|
3
|
+
* Loads and uses tokenizers from HuggingFace's tokenizer.json format
|
|
4
|
+
* Supports Unigram (SentencePiece) and BPE tokenizers
|
|
5
|
+
*/
|
|
6
|
+
/**
|
|
7
|
+
* Token with offset information
|
|
8
|
+
*/
|
|
9
|
+
export interface Token {
|
|
10
|
+
/** Token ID in vocabulary */
|
|
11
|
+
id: number;
|
|
12
|
+
/** Token string */
|
|
13
|
+
token: string;
|
|
14
|
+
/** Start character offset in original text */
|
|
15
|
+
start: number;
|
|
16
|
+
/** End character offset in original text */
|
|
17
|
+
end: number;
|
|
18
|
+
/** Whether this is a continuation token */
|
|
19
|
+
isContinuation: boolean;
|
|
20
|
+
/** Whether this is a special token */
|
|
21
|
+
isSpecial: boolean;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Tokenization result with metadata
|
|
25
|
+
*/
|
|
26
|
+
export interface TokenizationResult {
|
|
27
|
+
/** Array of tokens */
|
|
28
|
+
tokens: Token[];
|
|
29
|
+
/** Input IDs for model */
|
|
30
|
+
inputIds: number[];
|
|
31
|
+
/** Attention mask */
|
|
32
|
+
attentionMask: number[];
|
|
33
|
+
/** Token type IDs (for BERT-style models) */
|
|
34
|
+
tokenTypeIds: number[];
|
|
35
|
+
/** Mapping from token index to character span [start, end] */
|
|
36
|
+
tokenToCharSpan: Array<[number, number] | null>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Tokenizer configuration
|
|
40
|
+
*/
|
|
41
|
+
export interface TokenizerConfig {
|
|
42
|
+
/** Maximum sequence length */
|
|
43
|
+
maxLength: number;
|
|
44
|
+
/** Whether to lowercase input */
|
|
45
|
+
doLowerCase: boolean;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Default tokenizer configuration
|
|
49
|
+
*/
|
|
50
|
+
export declare const DEFAULT_TOKENIZER_CONFIG: TokenizerConfig;
|
|
51
|
+
/**
|
|
52
|
+
* WordPiece Tokenizer - supports both HuggingFace JSON and vocab.txt formats
|
|
53
|
+
*/
|
|
54
|
+
export declare class WordPieceTokenizer {
|
|
55
|
+
private vocab;
|
|
56
|
+
private inverseVocab;
|
|
57
|
+
private config;
|
|
58
|
+
private sortedVocab;
|
|
59
|
+
private clsId;
|
|
60
|
+
private sepId;
|
|
61
|
+
private padId;
|
|
62
|
+
private unkId;
|
|
63
|
+
private clsToken;
|
|
64
|
+
private sepToken;
|
|
65
|
+
private padToken;
|
|
66
|
+
private unkToken;
|
|
67
|
+
constructor(vocab: Map<string, number>, config?: Partial<TokenizerConfig>);
|
|
68
|
+
/**
|
|
69
|
+
* Detect special tokens from vocabulary
|
|
70
|
+
*/
|
|
71
|
+
private detectSpecialTokens;
|
|
72
|
+
/**
|
|
73
|
+
* Tokenizes text into tokens with offset tracking
|
|
74
|
+
*/
|
|
75
|
+
tokenize(text: string): TokenizationResult;
|
|
76
|
+
/**
|
|
77
|
+
* Find the best matching token using greedy longest-match
|
|
78
|
+
*/
|
|
79
|
+
private findBestToken;
|
|
80
|
+
/**
|
|
81
|
+
* Decodes token IDs back to text
|
|
82
|
+
*/
|
|
83
|
+
decode(tokenIds: number[]): string;
|
|
84
|
+
/**
|
|
85
|
+
* Gets vocabulary size
|
|
86
|
+
*/
|
|
87
|
+
get vocabSize(): number;
|
|
88
|
+
/**
|
|
89
|
+
* Gets a token ID by string
|
|
90
|
+
*/
|
|
91
|
+
getTokenId(token: string): number | undefined;
|
|
92
|
+
/**
|
|
93
|
+
* Gets a token string by ID
|
|
94
|
+
*/
|
|
95
|
+
getToken(id: number): string | undefined;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Loads vocabulary from a file (supports tokenizer.json and vocab.txt)
|
|
99
|
+
* Uses storage abstraction for browser compatibility
|
|
100
|
+
*/
|
|
101
|
+
export declare function loadVocabFromFile(filePath: string): Promise<Map<string, number>>;
|
|
102
|
+
/**
|
|
103
|
+
* Loads vocabulary from content string (for when content is already available)
|
|
104
|
+
*/
|
|
105
|
+
export declare function loadVocabFromContent(content: string, format?: 'json' | 'txt'): Map<string, number>;
|
|
106
|
+
/**
|
|
107
|
+
* Parses HuggingFace tokenizer.json format
|
|
108
|
+
*/
|
|
109
|
+
export declare function parseHFTokenizerJson(content: string): Map<string, number>;
|
|
110
|
+
/**
|
|
111
|
+
* Parses vocabulary from string content (vocab.txt format)
|
|
112
|
+
*/
|
|
113
|
+
export declare function parseVocab(content: string): Map<string, number>;
|
|
114
|
+
/**
|
|
115
|
+
* Creates a minimal vocabulary for testing
|
|
116
|
+
*/
|
|
117
|
+
export declare function createTestVocab(): Map<string, number>;
|
|
118
|
+
//# sourceMappingURL=tokenizer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../../src/ner/tokenizer.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH;;GAEG;AACH,MAAM,WAAW,KAAK;IACpB,6BAA6B;IAC7B,EAAE,EAAE,MAAM,CAAC;IACX,mBAAmB;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,8CAA8C;IAC9C,KAAK,EAAE,MAAM,CAAC;IACd,4CAA4C;IAC5C,GAAG,EAAE,MAAM,CAAC;IACZ,2CAA2C;IAC3C,cAAc,EAAE,OAAO,CAAC;IACxB,sCAAsC;IACtC,SAAS,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,sBAAsB;IACtB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,0BAA0B;IAC1B,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,qBAAqB;IACrB,aAAa,EAAE,MAAM,EAAE,CAAC;IACxB,6CAA6C;IAC7C,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,8DAA8D;IAC9D,eAAe,EAAE,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC,CAAC;CACjD;AAsBD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,iCAAiC;IACjC,WAAW,EAAE,OAAO,CAAC;CACtB;AAED;;GAEG;AACH,eAAO,MAAM,wBAAwB,EAAE,eAGtC,CAAC;AAEF;;GAEG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,KAAK,CAAsB;IACnC,OAAO,CAAC,YAAY,CAAsB;IAC1C,OAAO,CAAC,MAAM,CAAkB;IAChC,OAAO,CAAC,WAAW,CAA0B;IAG7C,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,KAAK,CAAa;IAC1B,OAAO,CAAC,KAAK,CAAa;IAG1B,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,QAAQ,CAAkB;IAClC,OAAO,CAAC,QAAQ,CAAmB;IACnC,OAAO,CAAC,QAAQ,CAAmB;gBAEvB,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAiB7E;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAyB3B;;OAEG;IACH,QAAQ,CAAC,IAAI,EAAE,MAAM,GAAG,kBAAkB;IAsF1C;;OAEG;IACH,OAAO,CAAC,aAAa;IA0CrB;;OAEG;IACH,MAAM,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,MAAM;IAmBlC;;OAEG;IACH,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;IAI7C;;OAEG;IACH,QAAQ,CAAC,EAAE,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;CAGzC;AAED;;;GAGG;AACH,wBAAsB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAWtF;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,GAAE,MAAM,GAAG,KAAc,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAM1G;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAmCzE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAY/D;AAED;;GAEG;AACH,wBAAgB,eAAe,IAAI,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAuBrD"}
|