rehydra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +615 -0
- package/dist/crypto/index.d.ts +6 -0
- package/dist/crypto/index.d.ts.map +1 -0
- package/dist/crypto/index.js +6 -0
- package/dist/crypto/index.js.map +1 -0
- package/dist/crypto/pii-map-crypto.d.ts +114 -0
- package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
- package/dist/crypto/pii-map-crypto.js +228 -0
- package/dist/crypto/pii-map-crypto.js.map +1 -0
- package/dist/index.d.ts +180 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +384 -0
- package/dist/index.js.map +1 -0
- package/dist/ner/bio-decoder.d.ts +64 -0
- package/dist/ner/bio-decoder.d.ts.map +1 -0
- package/dist/ner/bio-decoder.js +216 -0
- package/dist/ner/bio-decoder.js.map +1 -0
- package/dist/ner/index.d.ts +10 -0
- package/dist/ner/index.d.ts.map +1 -0
- package/dist/ner/index.js +10 -0
- package/dist/ner/index.js.map +1 -0
- package/dist/ner/model-manager.d.ts +111 -0
- package/dist/ner/model-manager.d.ts.map +1 -0
- package/dist/ner/model-manager.js +325 -0
- package/dist/ner/model-manager.js.map +1 -0
- package/dist/ner/ner-model.d.ts +114 -0
- package/dist/ner/ner-model.d.ts.map +1 -0
- package/dist/ner/ner-model.js +253 -0
- package/dist/ner/ner-model.js.map +1 -0
- package/dist/ner/onnx-runtime.d.ts +46 -0
- package/dist/ner/onnx-runtime.d.ts.map +1 -0
- package/dist/ner/onnx-runtime.js +130 -0
- package/dist/ner/onnx-runtime.js.map +1 -0
- package/dist/ner/tokenizer.d.ts +118 -0
- package/dist/ner/tokenizer.d.ts.map +1 -0
- package/dist/ner/tokenizer.js +332 -0
- package/dist/ner/tokenizer.js.map +1 -0
- package/dist/pipeline/index.d.ts +12 -0
- package/dist/pipeline/index.d.ts.map +1 -0
- package/dist/pipeline/index.js +12 -0
- package/dist/pipeline/index.js.map +1 -0
- package/dist/pipeline/prenormalize.d.ts +48 -0
- package/dist/pipeline/prenormalize.d.ts.map +1 -0
- package/dist/pipeline/prenormalize.js +94 -0
- package/dist/pipeline/prenormalize.js.map +1 -0
- package/dist/pipeline/resolver.d.ts +56 -0
- package/dist/pipeline/resolver.d.ts.map +1 -0
- package/dist/pipeline/resolver.js +239 -0
- package/dist/pipeline/resolver.js.map +1 -0
- package/dist/pipeline/semantic-data-loader.d.ts +165 -0
- package/dist/pipeline/semantic-data-loader.d.ts.map +1 -0
- package/dist/pipeline/semantic-data-loader.js +655 -0
- package/dist/pipeline/semantic-data-loader.js.map +1 -0
- package/dist/pipeline/semantic-enricher.d.ts +112 -0
- package/dist/pipeline/semantic-enricher.d.ts.map +1 -0
- package/dist/pipeline/semantic-enricher.js +318 -0
- package/dist/pipeline/semantic-enricher.js.map +1 -0
- package/dist/pipeline/tagger.d.ts +114 -0
- package/dist/pipeline/tagger.d.ts.map +1 -0
- package/dist/pipeline/tagger.js +374 -0
- package/dist/pipeline/tagger.js.map +1 -0
- package/dist/pipeline/title-extractor.d.ts +79 -0
- package/dist/pipeline/title-extractor.d.ts.map +1 -0
- package/dist/pipeline/title-extractor.js +801 -0
- package/dist/pipeline/title-extractor.js.map +1 -0
- package/dist/pipeline/validator.d.ts +65 -0
- package/dist/pipeline/validator.d.ts.map +1 -0
- package/dist/pipeline/validator.js +264 -0
- package/dist/pipeline/validator.js.map +1 -0
- package/dist/recognizers/base.d.ts +78 -0
- package/dist/recognizers/base.d.ts.map +1 -0
- package/dist/recognizers/base.js +100 -0
- package/dist/recognizers/base.js.map +1 -0
- package/dist/recognizers/bic-swift.d.ts +10 -0
- package/dist/recognizers/bic-swift.d.ts.map +1 -0
- package/dist/recognizers/bic-swift.js +107 -0
- package/dist/recognizers/bic-swift.js.map +1 -0
- package/dist/recognizers/credit-card.d.ts +32 -0
- package/dist/recognizers/credit-card.d.ts.map +1 -0
- package/dist/recognizers/credit-card.js +160 -0
- package/dist/recognizers/credit-card.js.map +1 -0
- package/dist/recognizers/custom-id.d.ts +28 -0
- package/dist/recognizers/custom-id.d.ts.map +1 -0
- package/dist/recognizers/custom-id.js +116 -0
- package/dist/recognizers/custom-id.js.map +1 -0
- package/dist/recognizers/email.d.ts +10 -0
- package/dist/recognizers/email.d.ts.map +1 -0
- package/dist/recognizers/email.js +75 -0
- package/dist/recognizers/email.js.map +1 -0
- package/dist/recognizers/iban.d.ts +14 -0
- package/dist/recognizers/iban.d.ts.map +1 -0
- package/dist/recognizers/iban.js +67 -0
- package/dist/recognizers/iban.js.map +1 -0
- package/dist/recognizers/index.d.ts +20 -0
- package/dist/recognizers/index.d.ts.map +1 -0
- package/dist/recognizers/index.js +42 -0
- package/dist/recognizers/index.js.map +1 -0
- package/dist/recognizers/ip-address.d.ts +14 -0
- package/dist/recognizers/ip-address.d.ts.map +1 -0
- package/dist/recognizers/ip-address.js +183 -0
- package/dist/recognizers/ip-address.js.map +1 -0
- package/dist/recognizers/phone.d.ts +10 -0
- package/dist/recognizers/phone.d.ts.map +1 -0
- package/dist/recognizers/phone.js +145 -0
- package/dist/recognizers/phone.js.map +1 -0
- package/dist/recognizers/registry.d.ts +59 -0
- package/dist/recognizers/registry.d.ts.map +1 -0
- package/dist/recognizers/registry.js +113 -0
- package/dist/recognizers/registry.js.map +1 -0
- package/dist/recognizers/url.d.ts +14 -0
- package/dist/recognizers/url.d.ts.map +1 -0
- package/dist/recognizers/url.js +121 -0
- package/dist/recognizers/url.js.map +1 -0
- package/dist/types/index.d.ts +197 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +80 -0
- package/dist/types/index.js.map +1 -0
- package/dist/types/pii-types.d.ts +50 -0
- package/dist/types/pii-types.d.ts.map +1 -0
- package/dist/types/pii-types.js +114 -0
- package/dist/types/pii-types.js.map +1 -0
- package/dist/utils/iban-checksum.d.ts +23 -0
- package/dist/utils/iban-checksum.d.ts.map +1 -0
- package/dist/utils/iban-checksum.js +106 -0
- package/dist/utils/iban-checksum.js.map +1 -0
- package/dist/utils/index.d.ts +10 -0
- package/dist/utils/index.d.ts.map +1 -0
- package/dist/utils/index.js +10 -0
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/luhn.d.ts +17 -0
- package/dist/utils/luhn.d.ts.map +1 -0
- package/dist/utils/luhn.js +55 -0
- package/dist/utils/luhn.js.map +1 -0
- package/dist/utils/offsets.d.ts +86 -0
- package/dist/utils/offsets.d.ts.map +1 -0
- package/dist/utils/offsets.js +124 -0
- package/dist/utils/offsets.js.map +1 -0
- package/dist/utils/path.d.ts +34 -0
- package/dist/utils/path.d.ts.map +1 -0
- package/dist/utils/path.js +96 -0
- package/dist/utils/path.js.map +1 -0
- package/dist/utils/storage-browser.d.ts +51 -0
- package/dist/utils/storage-browser.d.ts.map +1 -0
- package/dist/utils/storage-browser.js +381 -0
- package/dist/utils/storage-browser.js.map +1 -0
- package/dist/utils/storage-node.d.ts +43 -0
- package/dist/utils/storage-node.d.ts.map +1 -0
- package/dist/utils/storage-node.js +93 -0
- package/dist/utils/storage-node.js.map +1 -0
- package/dist/utils/storage.d.ts +70 -0
- package/dist/utils/storage.d.ts.map +1 -0
- package/dist/utils/storage.js +69 -0
- package/dist/utils/storage.js.map +1 -0
- package/package.json +66 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NER Model Manager
|
|
3
|
+
* Handles automatic downloading and caching of NER models from Hugging Face Hub
|
|
4
|
+
* Browser-compatible using storage abstraction layer
|
|
5
|
+
*/
|
|
6
|
+
import { getStorageProvider } from "../utils/storage.js";
|
|
7
|
+
import { join, basename } from "../utils/path.js";
|
|
8
|
+
/**
|
|
9
|
+
* Registry of available models hosted on Hugging Face Hub
|
|
10
|
+
*
|
|
11
|
+
* Using ELAN's ONNX exports which are optimized for JS/ONNX runtime
|
|
12
|
+
* https://huggingface.co/tjruesch/xlm-roberta-base-ner-hrl-onnx
|
|
13
|
+
*/
|
|
14
|
+
export const MODEL_REGISTRY = {
|
|
15
|
+
standard: {
|
|
16
|
+
id: "xlm-roberta-ner-standard",
|
|
17
|
+
name: "XLM-RoBERTa NER (Standard)",
|
|
18
|
+
description: "Multilingual NER model supporting EN, DE, FR, ES, and more",
|
|
19
|
+
size: "~1.1 GB",
|
|
20
|
+
hfRepo: "tjruesch/xlm-roberta-base-ner-hrl-onnx",
|
|
21
|
+
hfSubfolder: "onnx",
|
|
22
|
+
files: [
|
|
23
|
+
{ repoFile: "model.onnx", localFile: "model.onnx", required: true },
|
|
24
|
+
],
|
|
25
|
+
labelMap: [
|
|
26
|
+
"O",
|
|
27
|
+
"B-DATE",
|
|
28
|
+
"I-DATE",
|
|
29
|
+
"B-PER",
|
|
30
|
+
"I-PER",
|
|
31
|
+
"B-ORG",
|
|
32
|
+
"I-ORG",
|
|
33
|
+
"B-LOC",
|
|
34
|
+
"I-LOC",
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
quantized: {
|
|
38
|
+
id: "xlm-roberta-ner-quantized",
|
|
39
|
+
name: "XLM-RoBERTa NER (Quantized)",
|
|
40
|
+
description: "Quantized version, ~4x smaller with minimal accuracy loss",
|
|
41
|
+
size: "~265 MB",
|
|
42
|
+
hfRepo: "tjruesch/xlm-roberta-base-ner-hrl-onnx",
|
|
43
|
+
hfSubfolder: "onnx",
|
|
44
|
+
files: [
|
|
45
|
+
{
|
|
46
|
+
repoFile: "model_quantized.onnx",
|
|
47
|
+
localFile: "model.onnx",
|
|
48
|
+
required: true,
|
|
49
|
+
},
|
|
50
|
+
],
|
|
51
|
+
labelMap: [
|
|
52
|
+
"O",
|
|
53
|
+
"B-DATE",
|
|
54
|
+
"I-DATE",
|
|
55
|
+
"B-PER",
|
|
56
|
+
"I-PER",
|
|
57
|
+
"B-ORG",
|
|
58
|
+
"I-ORG",
|
|
59
|
+
"B-LOC",
|
|
60
|
+
"I-LOC",
|
|
61
|
+
],
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
/**
|
|
65
|
+
* Shared tokenizer files (same for both variants)
|
|
66
|
+
*/
|
|
67
|
+
const TOKENIZER_FILES = [
|
|
68
|
+
{ repoFile: "tokenizer.json", localFile: "tokenizer.json", required: true },
|
|
69
|
+
{
|
|
70
|
+
repoFile: "tokenizer_config.json",
|
|
71
|
+
localFile: "tokenizer_config.json",
|
|
72
|
+
required: false,
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
repoFile: "special_tokens_map.json",
|
|
76
|
+
localFile: "special_tokens_map.json",
|
|
77
|
+
required: false,
|
|
78
|
+
},
|
|
79
|
+
{ repoFile: "config.json", localFile: "config.json", required: false },
|
|
80
|
+
];
|
|
81
|
+
// Cached storage provider
|
|
82
|
+
let storageProvider = null;
|
|
83
|
+
/**
|
|
84
|
+
* Gets the storage provider (lazily initialized)
|
|
85
|
+
*/
|
|
86
|
+
async function getStorage() {
|
|
87
|
+
if (storageProvider === null) {
|
|
88
|
+
storageProvider = await getStorageProvider();
|
|
89
|
+
}
|
|
90
|
+
return storageProvider;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Gets the cache directory for models
|
|
94
|
+
* Uses platform-specific cache location (or virtual path in browser)
|
|
95
|
+
*/
|
|
96
|
+
export async function getModelCacheDir() {
|
|
97
|
+
const storage = await getStorage();
|
|
98
|
+
return storage.getCacheDir("models");
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Gets the path to a specific model variant
|
|
102
|
+
*/
|
|
103
|
+
export async function getModelPath(mode) {
|
|
104
|
+
const cacheDir = await getModelCacheDir();
|
|
105
|
+
return join(cacheDir, mode);
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Checks if a model is already downloaded
|
|
109
|
+
*/
|
|
110
|
+
export async function isModelDownloaded(mode) {
|
|
111
|
+
const storage = await getStorage();
|
|
112
|
+
const modelDir = await getModelPath(mode);
|
|
113
|
+
const info = MODEL_REGISTRY[mode];
|
|
114
|
+
try {
|
|
115
|
+
// Check if model file exists
|
|
116
|
+
const modelFile = info.files.find((f) => f.required && f.localFile.includes("model"));
|
|
117
|
+
if (modelFile) {
|
|
118
|
+
const modelExists = await storage.exists(join(modelDir, modelFile.localFile));
|
|
119
|
+
if (!modelExists)
|
|
120
|
+
return false;
|
|
121
|
+
}
|
|
122
|
+
// Check if tokenizer exists
|
|
123
|
+
const tokenizerExists = await storage.exists(join(modelDir, "tokenizer.json"));
|
|
124
|
+
return tokenizerExists;
|
|
125
|
+
}
|
|
126
|
+
catch {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Builds a Hugging Face Hub download URL
|
|
132
|
+
*/
|
|
133
|
+
function getHuggingFaceUrl(repo, filename, subfolder) {
|
|
134
|
+
const filePath = subfolder !== undefined && subfolder !== ""
|
|
135
|
+
? `${subfolder}/${filename}`
|
|
136
|
+
: filename;
|
|
137
|
+
return `https://huggingface.co/${repo}/resolve/main/${filePath}`;
|
|
138
|
+
}
|
|
139
|
+
/**
|
|
140
|
+
* Downloads a file from URL and returns the data
|
|
141
|
+
*/
|
|
142
|
+
async function downloadFileData(url, fileName, onProgress) {
|
|
143
|
+
const response = await fetch(url, {
|
|
144
|
+
headers: {
|
|
145
|
+
"User-Agent": "rehydra/1.0.0",
|
|
146
|
+
},
|
|
147
|
+
});
|
|
148
|
+
if (!response.ok) {
|
|
149
|
+
if (response.status === 404) {
|
|
150
|
+
throw new Error(`File not found: ${url}`);
|
|
151
|
+
}
|
|
152
|
+
throw new Error(`Failed to download ${url}: ${response.status} ${response.statusText}`);
|
|
153
|
+
}
|
|
154
|
+
const totalBytes = response.headers.get("content-length");
|
|
155
|
+
const total = totalBytes !== null && totalBytes !== "" ? parseInt(totalBytes, 10) : null;
|
|
156
|
+
const reader = response.body?.getReader();
|
|
157
|
+
if (reader === undefined) {
|
|
158
|
+
throw new Error("Response body is not readable");
|
|
159
|
+
}
|
|
160
|
+
const chunks = [];
|
|
161
|
+
let bytesDownloaded = 0;
|
|
162
|
+
// eslint-disable-next-line no-constant-condition
|
|
163
|
+
while (true) {
|
|
164
|
+
const result = await reader.read();
|
|
165
|
+
if (result.done)
|
|
166
|
+
break;
|
|
167
|
+
const value = result.value;
|
|
168
|
+
chunks.push(value);
|
|
169
|
+
bytesDownloaded += value.length;
|
|
170
|
+
if (onProgress) {
|
|
171
|
+
onProgress({
|
|
172
|
+
file: fileName,
|
|
173
|
+
bytesDownloaded,
|
|
174
|
+
totalBytes: total,
|
|
175
|
+
percent: total !== null && total > 0
|
|
176
|
+
? Math.round((bytesDownloaded / total) * 100)
|
|
177
|
+
: null,
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
// Concatenate all chunks
|
|
182
|
+
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
183
|
+
const result = new Uint8Array(totalLength);
|
|
184
|
+
let offset = 0;
|
|
185
|
+
for (const chunk of chunks) {
|
|
186
|
+
result.set(chunk, offset);
|
|
187
|
+
offset += chunk.length;
|
|
188
|
+
}
|
|
189
|
+
return result;
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Downloads a file from URL to storage
|
|
193
|
+
*/
|
|
194
|
+
async function downloadFile(url, destPath, onProgress) {
|
|
195
|
+
const storage = await getStorage();
|
|
196
|
+
const fileName = basename(destPath);
|
|
197
|
+
const data = await downloadFileData(url, fileName, onProgress);
|
|
198
|
+
await storage.writeFile(destPath, data);
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* Downloads a model variant from Hugging Face Hub
|
|
202
|
+
*/
|
|
203
|
+
export async function downloadModel(mode, onProgress, onStatus) {
|
|
204
|
+
const storage = await getStorage();
|
|
205
|
+
const info = MODEL_REGISTRY[mode];
|
|
206
|
+
const modelDir = await getModelPath(mode);
|
|
207
|
+
// Create directory
|
|
208
|
+
await storage.mkdir(modelDir);
|
|
209
|
+
onStatus?.(`Downloading ${info.name} from Hugging Face Hub...`);
|
|
210
|
+
onStatus?.(`Repository: ${info.hfRepo}`);
|
|
211
|
+
// Download model files
|
|
212
|
+
for (const file of info.files) {
|
|
213
|
+
const url = getHuggingFaceUrl(info.hfRepo, file.repoFile, info.hfSubfolder);
|
|
214
|
+
const destPath = join(modelDir, file.localFile);
|
|
215
|
+
onStatus?.(`Downloading ${file.repoFile}...`);
|
|
216
|
+
try {
|
|
217
|
+
await downloadFile(url, destPath, onProgress);
|
|
218
|
+
}
|
|
219
|
+
catch (e) {
|
|
220
|
+
if (file.required) {
|
|
221
|
+
throw new Error(`Failed to download required file ${file.repoFile}: ${String(e)}`);
|
|
222
|
+
}
|
|
223
|
+
// Optional files can fail silently
|
|
224
|
+
onStatus?.(`Skipping optional file ${file.repoFile}`);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
// Download tokenizer files (from repo root, not subfolder)
|
|
228
|
+
for (const file of TOKENIZER_FILES) {
|
|
229
|
+
const url = getHuggingFaceUrl(info.hfRepo, file.repoFile);
|
|
230
|
+
const destPath = join(modelDir, file.localFile);
|
|
231
|
+
try {
|
|
232
|
+
await downloadFile(url, destPath, onProgress);
|
|
233
|
+
}
|
|
234
|
+
catch (e) {
|
|
235
|
+
if (file.required) {
|
|
236
|
+
throw new Error(`Failed to download required file ${file.repoFile}: ${String(e)}`);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// Write label map
|
|
241
|
+
const labelMapPath = join(modelDir, "label_map.json");
|
|
242
|
+
await storage.writeFile(labelMapPath, JSON.stringify(info.labelMap, null, 2));
|
|
243
|
+
onStatus?.("Download complete!");
|
|
244
|
+
return modelDir;
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Gets model paths if available, or downloads if needed
|
|
248
|
+
*/
|
|
249
|
+
export async function ensureModel(mode, options = {}) {
|
|
250
|
+
const { autoDownload = true, onProgress, onStatus } = options;
|
|
251
|
+
const modelDir = await getModelPath(mode);
|
|
252
|
+
const info = MODEL_REGISTRY[mode];
|
|
253
|
+
// Check if already downloaded
|
|
254
|
+
const isDownloaded = await isModelDownloaded(mode);
|
|
255
|
+
if (!isDownloaded) {
|
|
256
|
+
if (!autoDownload) {
|
|
257
|
+
throw new Error(`NER model '${mode}' not found at ${modelDir}.\n\n` +
|
|
258
|
+
`To download automatically, use:\n` +
|
|
259
|
+
` createAnonymizer({ ner: { mode: '${mode}', autoDownload: true } })\n\n` +
|
|
260
|
+
`Or use regex-only mode:\n` +
|
|
261
|
+
` createAnonymizer({ ner: { mode: 'disabled' } })`);
|
|
262
|
+
}
|
|
263
|
+
await downloadModel(mode, onProgress, onStatus);
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
onStatus?.(`Using cached model: ${info.name}`);
|
|
267
|
+
}
|
|
268
|
+
// Find model file
|
|
269
|
+
const modelFile = info.files.find((f) => f.localFile === "model.onnx");
|
|
270
|
+
return {
|
|
271
|
+
modelPath: join(modelDir, modelFile?.localFile ?? "model.onnx"),
|
|
272
|
+
vocabPath: join(modelDir, "tokenizer.json"),
|
|
273
|
+
labelMapPath: join(modelDir, "label_map.json"),
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
/**
|
|
277
|
+
* Clears cached models
|
|
278
|
+
*/
|
|
279
|
+
export async function clearModelCache(mode) {
|
|
280
|
+
const storage = await getStorage();
|
|
281
|
+
if (mode) {
|
|
282
|
+
const modelDir = await getModelPath(mode);
|
|
283
|
+
await storage.rm(modelDir, { recursive: true, force: true });
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
const cacheDir = await getModelCacheDir();
|
|
287
|
+
await storage.rm(cacheDir, { recursive: true, force: true });
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Lists downloaded models
|
|
292
|
+
*/
|
|
293
|
+
export async function listDownloadedModels() {
|
|
294
|
+
const models = [];
|
|
295
|
+
for (const mode of ["standard", "quantized"]) {
|
|
296
|
+
if (await isModelDownloaded(mode)) {
|
|
297
|
+
const modelPath = await getModelPath(mode);
|
|
298
|
+
const info = MODEL_REGISTRY[mode];
|
|
299
|
+
models.push({ mode, path: modelPath, size: info.size });
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
return models;
|
|
303
|
+
}
|
|
304
|
+
/**
|
|
305
|
+
* Gets info about available models
|
|
306
|
+
*/
|
|
307
|
+
export function getModelInfo(mode) {
|
|
308
|
+
return MODEL_REGISTRY[mode];
|
|
309
|
+
}
|
|
310
|
+
/**
|
|
311
|
+
* Reads a model file as ArrayBuffer (for onnxruntime)
|
|
312
|
+
*/
|
|
313
|
+
export async function readModelFile(path) {
|
|
314
|
+
const storage = await getStorage();
|
|
315
|
+
const data = await storage.readFile(path);
|
|
316
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Reads a text file from storage
|
|
320
|
+
*/
|
|
321
|
+
export async function readTextFile(path) {
|
|
322
|
+
const storage = await getStorage();
|
|
323
|
+
return storage.readTextFile(path);
|
|
324
|
+
}
|
|
325
|
+
//# sourceMappingURL=model-manager.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"model-manager.js","sourceRoot":"","sources":["../../src/ner/model-manager.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,kBAAkB,EAAwB,MAAM,qBAAqB,CAAC;AAC/E,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAyClD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAAgD;IACzE,QAAQ,EAAE;QACR,EAAE,EAAE,0BAA0B;QAC9B,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,4DAA4D;QACzE,IAAI,EAAE,SAAS;QACf,MAAM,EAAE,wCAAwC;QAChD,WAAW,EAAE,MAAM;QACnB,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,QAAQ,EAAE,IAAI,EAAE;SACpE;QACD,QAAQ,EAAE;YACR,GAAG;YACH,QAAQ;YACR,QAAQ;YACR,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;SACR;KACF;IACD,SAAS,EAAE;QACT,EAAE,EAAE,2BAA2B;QAC/B,IAAI,EAAE,6BAA6B;QACnC,WAAW,EAAE,2DAA2D;QACxE,IAAI,EAAE,SAAS;QACf,MAAM,EAAE,wCAAwC;QAChD,WAAW,EAAE,MAAM;QACnB,KAAK,EAAE;YACL;gBACE,QAAQ,EAAE,sBAAsB;gBAChC,SAAS,EAAE,YAAY;gBACvB,QAAQ,EAAE,IAAI;aACf;SACF;QACD,QAAQ,EAAE;YACR,GAAG;YACH,QAAQ;YACR,QAAQ;YACR,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;YACP,OAAO;SACR;KACF;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,eAAe,GAAoB;IACvC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC3E;QACE,QAAQ,EAAE,uBAAuB;QACjC,SAAS,EAAE,uBAAuB;QAClC,QAAQ,EAAE,KAAK;KAChB;IACD;QACE,QAAQ,EAAE,yBAAyB;QACnC,SAAS,EAAE,yBAAyB;QACpC,QAAQ,EAAE,KAAK;KAChB;IACD,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE;CACvE,CAAC;AAEF,0BAA0B;AAC1B,IAAI,eAAe,GAA2B,IAAI,CAAC;AAEnD;;GAEG;AACH,KAAK,UAAU,UAAU;IACvB,IAAI,eAAe,KAAK,IAAI,EAAE,CAAC;QAC7B,eAAe,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC/C,CAAC;IACD,OAAO,eAAe,CAAC;AACzB,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACpC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,OAAO,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,IAA8B;IAE9B,MAAM,QAAQ,GAAG,MAAM,gBAAgB,EAAE,CAAC;IAC1C,OAAO,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAA8B;IAE9B,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,CAAC;QACH,6BAA6B;QAC7B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAC/B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,CACnD,CAAC;QACF,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,MAAM,CACtC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC,SAAS,CAAC,CACpC,CAAC;YACF,IAAI,CAAC,WAAW;gBAAE,OAAO,KAAK,CAAC;QACjC,CAAC;QAED,4BAA4B;QAC5B,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,MAAM,CAC1C,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CACjC,CAAC;QACF,OAAO,eAAe,CAAC;IACzB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAYD;;GAEG;AACH,SAAS,iBAAiB,CACxB,IAAY,EACZ,QAAgB,EAChB,SAAkB;IAElB,MAAM,QAAQ,GACZ,SAAS,KAAK,SAAS,IAAI,SAAS,KAAK,EAAE;QACzC,CAAC,CAAC,GAAG,SAAS,IAAI,QAAQ,EAAE;QAC5B,CAAC,CAAC,QAAQ,CAAC;IACf,OAAO,0BAA0B,IAAI,iBAAiB,QAAQ,EAAE,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,gBAAgB,CAC7B,GAAW,EACX,QAAgB,EAChB,UAAqC;IAErC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,YAAY,EAAE,eAAe;SAC9B;KACF,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,IAAI,KAAK,CACb,sBAAsB,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CACvE,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC1D,MAAM,KAAK,GACT,UAAU,KAAK,IAAI,IAAI,UAAU,KAAK,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE7E,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;IAC1C,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,iDAAiD;IACjD,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAEnC,IAAI,MAAM,CAAC,IAAI;YAAE,MAAM;QAEvB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAmB,CAAC;QACzC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,eAAe,IAAI,KAAK,CAAC,MAAM,CAAC;QAEhC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC;gBACT,IAAI,EAAE,QAAQ;gBACd,eAAe;gBACf,UAAU,EAAE,KAAK;gBACjB,OAAO,EACL,KAAK,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC;oBACzB,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;oBAC7C,CAAC,CAAC,IAAI;aACX,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,yBAAyB;IACzB,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;IACzE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAC;IAC3C,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;QAC1B,MAAM,IAAI,KAAK,CAAC,MAAM,CAAC;IACzB,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,QAAgB,EAChB,UAAqC;IAErC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEpC,MAAM,IAAI,GAAG,MAAM,gBAAgB,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;IAC/D,MAAM,OAAO,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAA8B,EAC9B,UAAqC,EACrC,QAAmC;IAEnC,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;IAE1C,mBAAmB;IACnB,MAAM,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAE9B,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,IAAI,2BAA2B,CAAC,CAAC;IAChE,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAEzC,uBAAuB;IACvB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QAC5E,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAEhD,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,QAAQ,KAAK,CAAC,CAAC;QAE9C,IAAI,CAAC;YACH,MAAM,YAAY,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CACb,oCAAoC,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,CAClE,CAAC;YACJ,CAAC;YACD,mCAAmC;YACnC,QAAQ,EAAE,CAAC,0BAA0B,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,2DAA2D;IAC3D,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAEhD,IAAI,CAAC;YACH,MAAM,YAAY,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CACb,oCAAoC,IAAI,CAAC,QAAQ,KAAK,MAAM,CAAC,CAAC,CAAC,EAAE,CAClE,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;IACtD,MAAM,OAAO,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAE9E,QAAQ,EAAE,CAAC,oBAAoB,CAAC,CAAC;IAEjC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAA8B,EAC9B,UAII,EAAE;IAEN,MAAM,EAAE,YAAY,GAAG,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAE9D,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAElC,8BAA8B;IAC9B,MAAM,YAAY,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAEnD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CACb,cAAc,IAAI,kBAAkB,QAAQ,OAAO;gBACjD,mCAAmC;gBACnC,sCAAsC,IAAI,gCAAgC;gBAC1E,2BAA2B;gBAC3B,mDAAmD,CACtD,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;SAAM,CAAC;QACN,QAAQ,EAAE,CAAC,uBAAuB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,kBAAkB;IAClB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,YAAY,CAAC,CAAC;IAEvE,OAAO;QACL,SAAS,EAAE,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,IAAI,YAAY,CAAC;QAC/D,SAAS,EAAE,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC;QAC3C,YAAY,EAAE,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KAC/C,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,IAA+B;IAE/B,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IAEnC,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;QAC1C,MAAM,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,CAAC;SAAM,CAAC;QACN,MAAM,QAAQ,GAAG,MAAM,gBAAgB,EAAE,CAAC;QAC1C,MAAM,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/D,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB;IAGxC,MAAM,MAAM,GAIP,EAAE,CAAC;IAER,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE,WAAW,CAAU,EAAE,CAAC;QACtD,IAAI,MAAM,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,SAAS,GAAG,MAAM,YAAY,CAAC,IAAI,CAAC,CAAC;YAC3C,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAA8B;IACzD,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CAAC,IAAY;IAC9C,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC1C,OAAO,IAAI,CAAC,MAAM,CAAC,KAAK,CACtB,IAAI,CAAC,UAAU,EACf,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CACnB,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,IAAY;IAC7C,MAAM,OAAO,GAAG,MAAM,UAAU,EAAE,CAAC;IACnC,OAAO,OAAO,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;AACpC,CAAC"}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NER Model Wrapper
|
|
3
|
+
* ONNX Runtime integration for Named Entity Recognition
|
|
4
|
+
* Supports both onnxruntime-node and onnxruntime-web
|
|
5
|
+
*/
|
|
6
|
+
import { SpanMatch, AnonymizationPolicy } from "../types/index.js";
|
|
7
|
+
/**
|
|
8
|
+
* NER Model configuration
|
|
9
|
+
*/
|
|
10
|
+
export interface NERModelConfig {
|
|
11
|
+
/** Path to ONNX model file */
|
|
12
|
+
modelPath: string;
|
|
13
|
+
/** Path to vocabulary file */
|
|
14
|
+
vocabPath: string;
|
|
15
|
+
/** Label mapping (index -> label string) */
|
|
16
|
+
labelMap: string[];
|
|
17
|
+
/** Maximum sequence length */
|
|
18
|
+
maxLength: number;
|
|
19
|
+
/** Whether model expects lowercase input */
|
|
20
|
+
doLowerCase: boolean;
|
|
21
|
+
/** Model version for tracking */
|
|
22
|
+
modelVersion: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* NER prediction result for a single text
|
|
26
|
+
*/
|
|
27
|
+
export interface NERPrediction {
|
|
28
|
+
/** Detected entity spans */
|
|
29
|
+
spans: SpanMatch[];
|
|
30
|
+
/** Processing time in ms */
|
|
31
|
+
processingTimeMs: number;
|
|
32
|
+
/** Model version used */
|
|
33
|
+
modelVersion: string;
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Default label map for common NER models (CoNLL-style)
|
|
37
|
+
*/
|
|
38
|
+
export declare const DEFAULT_LABEL_MAP: string[];
|
|
39
|
+
/**
|
|
40
|
+
* NER Model wrapper for ONNX inference
|
|
41
|
+
*/
|
|
42
|
+
export declare class NERModel {
|
|
43
|
+
private ort;
|
|
44
|
+
private session;
|
|
45
|
+
private tokenizer;
|
|
46
|
+
private config;
|
|
47
|
+
private isLoaded;
|
|
48
|
+
constructor(config: NERModelConfig);
|
|
49
|
+
/**
|
|
50
|
+
* Loads the model and tokenizer
|
|
51
|
+
*/
|
|
52
|
+
load(): Promise<void>;
|
|
53
|
+
/**
|
|
54
|
+
* Predicts entities in text
|
|
55
|
+
*/
|
|
56
|
+
predict(text: string, policy?: AnonymizationPolicy): Promise<NERPrediction>;
|
|
57
|
+
/**
|
|
58
|
+
* Runs ONNX inference
|
|
59
|
+
*/
|
|
60
|
+
private runInference;
|
|
61
|
+
/**
|
|
62
|
+
* Processes model logits to extract labels and confidences
|
|
63
|
+
*/
|
|
64
|
+
private processLogits;
|
|
65
|
+
/**
|
|
66
|
+
* Gets minimum confidence threshold from policy
|
|
67
|
+
*/
|
|
68
|
+
private getMinConfidence;
|
|
69
|
+
/**
|
|
70
|
+
* Gets model version
|
|
71
|
+
*/
|
|
72
|
+
get version(): string;
|
|
73
|
+
/**
|
|
74
|
+
* Checks if model is loaded
|
|
75
|
+
*/
|
|
76
|
+
get loaded(): boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Disposes of model resources
|
|
79
|
+
*/
|
|
80
|
+
dispose(): Promise<void>;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Creates a NER model instance with configuration
|
|
84
|
+
*/
|
|
85
|
+
export declare function createNERModel(config: Partial<NERModelConfig> & {
|
|
86
|
+
modelPath: string;
|
|
87
|
+
vocabPath: string;
|
|
88
|
+
}): NERModel;
|
|
89
|
+
/**
|
|
90
|
+
* NER Model stub for when no model is available
|
|
91
|
+
* Returns empty results - useful for regex-only mode
|
|
92
|
+
*/
|
|
93
|
+
export declare class NERModelStub {
|
|
94
|
+
readonly version = "stub-1.0.0";
|
|
95
|
+
readonly loaded = true;
|
|
96
|
+
load(): Promise<void>;
|
|
97
|
+
predict(_text: string, _policy?: AnonymizationPolicy): Promise<NERPrediction>;
|
|
98
|
+
dispose(): Promise<void>;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Creates a stub NER model (for testing or regex-only mode)
|
|
102
|
+
*/
|
|
103
|
+
export declare function createNERModelStub(): NERModelStub;
|
|
104
|
+
/**
|
|
105
|
+
* NER model interface for dependency injection
|
|
106
|
+
*/
|
|
107
|
+
export interface INERModel {
|
|
108
|
+
readonly version: string;
|
|
109
|
+
readonly loaded: boolean;
|
|
110
|
+
load(): Promise<void>;
|
|
111
|
+
predict(text: string, policy?: AnonymizationPolicy): Promise<NERPrediction>;
|
|
112
|
+
dispose(): Promise<void>;
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=ner-model.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ner-model.d.ts","sourceRoot":"","sources":["../../src/ner/ner-model.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,EAAE,SAAS,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAcnE;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,8BAA8B;IAC9B,SAAS,EAAE,MAAM,CAAC;IAClB,4CAA4C;IAC5C,WAAW,EAAE,OAAO,CAAC;IACrB,iCAAiC;IACjC,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,4BAA4B;IAC5B,KAAK,EAAE,SAAS,EAAE,CAAC;IACnB,4BAA4B;IAC5B,gBAAgB,EAAE,MAAM,CAAC;IACzB,yBAAyB;IACzB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;GAEG;AACH,eAAO,MAAM,iBAAiB,UAU7B,CAAC;AAEF;;GAEG;AACH,qBAAa,QAAQ;IACnB,OAAO,CAAC,GAAG,CAA2B;IACtC,OAAO,CAAC,OAAO,CAAiB;IAChC,OAAO,CAAC,SAAS,CAAmC;IACpD,OAAO,CAAC,MAAM,CAAiB;IAC/B,OAAO,CAAC,QAAQ,CAAS;gBAEb,MAAM,EAAE,cAAc;IAIlC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IA+B3B;;OAEG;IACG,OAAO,CACX,IAAI,EAAE,MAAM,EACZ,MAAM,CAAC,EAAE,mBAAmB,GAC3B,OAAO,CAAC,aAAa,CAAC;IA+CzB;;OAEG;YACW,YAAY;IAiE1B;;OAEG;IACH,OAAO,CAAC,aAAa;IAqCrB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAexB;;OAEG;IACH,IAAI,OAAO,IAAI,MAAM,CAEpB;IAED;;OAEG;IACH,IAAI,MAAM,IAAI,OAAO,CAEpB;IAED;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAOzB;AAYD;;GAEG;AACH,wBAAgB,cAAc,CAC5B,MAAM,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAA;CAAE,GACzE,QAAQ,CAWV;AAED;;;GAGG;AACH,qBAAa,YAAY;IACvB,QAAQ,CAAC,OAAO,gBAAgB;IAChC,QAAQ,CAAC,MAAM,QAAQ;IAEjB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAI3B,OAAO,CACL,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,mBAAmB,GAC5B,OAAO,CAAC,aAAa,CAAC;IAQzB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAIzB;AAED;;GAEG;AACH,wBAAgB,kBAAkB,IAAI,YAAY,CAEjD;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC;IACzB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IACtB,OAAO,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,aAAa,CAAC,CAAC;IAC5E,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B"}
|