@elanlanguages/bridge-anonymization 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/README.md +382 -0
  2. package/dist/crypto/index.d.ts +6 -0
  3. package/dist/crypto/index.d.ts.map +1 -0
  4. package/dist/crypto/index.js +6 -0
  5. package/dist/crypto/index.js.map +1 -0
  6. package/dist/crypto/pii-map-crypto.d.ts +100 -0
  7. package/dist/crypto/pii-map-crypto.d.ts.map +1 -0
  8. package/dist/crypto/pii-map-crypto.js +163 -0
  9. package/dist/crypto/pii-map-crypto.js.map +1 -0
  10. package/dist/index.d.ts +173 -0
  11. package/dist/index.d.ts.map +1 -0
  12. package/dist/index.js +294 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/ner/bio-decoder.d.ts +64 -0
  15. package/dist/ner/bio-decoder.d.ts.map +1 -0
  16. package/dist/ner/bio-decoder.js +216 -0
  17. package/dist/ner/bio-decoder.js.map +1 -0
  18. package/dist/ner/index.d.ts +10 -0
  19. package/dist/ner/index.d.ts.map +1 -0
  20. package/dist/ner/index.js +10 -0
  21. package/dist/ner/index.js.map +1 -0
  22. package/dist/ner/model-manager.d.ts +102 -0
  23. package/dist/ner/model-manager.d.ts.map +1 -0
  24. package/dist/ner/model-manager.js +253 -0
  25. package/dist/ner/model-manager.js.map +1 -0
  26. package/dist/ner/ner-model.d.ts +114 -0
  27. package/dist/ner/ner-model.d.ts.map +1 -0
  28. package/dist/ner/ner-model.js +240 -0
  29. package/dist/ner/ner-model.js.map +1 -0
  30. package/dist/ner/onnx-runtime.d.ts +45 -0
  31. package/dist/ner/onnx-runtime.d.ts.map +1 -0
  32. package/dist/ner/onnx-runtime.js +99 -0
  33. package/dist/ner/onnx-runtime.js.map +1 -0
  34. package/dist/ner/tokenizer.d.ts +140 -0
  35. package/dist/ner/tokenizer.d.ts.map +1 -0
  36. package/dist/ner/tokenizer.js +341 -0
  37. package/dist/ner/tokenizer.js.map +1 -0
  38. package/dist/pipeline/index.d.ts +9 -0
  39. package/dist/pipeline/index.d.ts.map +1 -0
  40. package/dist/pipeline/index.js +9 -0
  41. package/dist/pipeline/index.js.map +1 -0
  42. package/dist/pipeline/prenormalize.d.ts +48 -0
  43. package/dist/pipeline/prenormalize.d.ts.map +1 -0
  44. package/dist/pipeline/prenormalize.js +94 -0
  45. package/dist/pipeline/prenormalize.js.map +1 -0
  46. package/dist/pipeline/resolver.d.ts +56 -0
  47. package/dist/pipeline/resolver.d.ts.map +1 -0
  48. package/dist/pipeline/resolver.js +238 -0
  49. package/dist/pipeline/resolver.js.map +1 -0
  50. package/dist/pipeline/tagger.d.ts +74 -0
  51. package/dist/pipeline/tagger.d.ts.map +1 -0
  52. package/dist/pipeline/tagger.js +169 -0
  53. package/dist/pipeline/tagger.js.map +1 -0
  54. package/dist/pipeline/validator.d.ts +65 -0
  55. package/dist/pipeline/validator.d.ts.map +1 -0
  56. package/dist/pipeline/validator.js +264 -0
  57. package/dist/pipeline/validator.js.map +1 -0
  58. package/dist/recognizers/base.d.ts +78 -0
  59. package/dist/recognizers/base.d.ts.map +1 -0
  60. package/dist/recognizers/base.js +100 -0
  61. package/dist/recognizers/base.js.map +1 -0
  62. package/dist/recognizers/bic-swift.d.ts +10 -0
  63. package/dist/recognizers/bic-swift.d.ts.map +1 -0
  64. package/dist/recognizers/bic-swift.js +107 -0
  65. package/dist/recognizers/bic-swift.js.map +1 -0
  66. package/dist/recognizers/credit-card.d.ts +32 -0
  67. package/dist/recognizers/credit-card.d.ts.map +1 -0
  68. package/dist/recognizers/credit-card.js +160 -0
  69. package/dist/recognizers/credit-card.js.map +1 -0
  70. package/dist/recognizers/custom-id.d.ts +28 -0
  71. package/dist/recognizers/custom-id.d.ts.map +1 -0
  72. package/dist/recognizers/custom-id.js +116 -0
  73. package/dist/recognizers/custom-id.js.map +1 -0
  74. package/dist/recognizers/email.d.ts +10 -0
  75. package/dist/recognizers/email.d.ts.map +1 -0
  76. package/dist/recognizers/email.js +75 -0
  77. package/dist/recognizers/email.js.map +1 -0
  78. package/dist/recognizers/iban.d.ts +14 -0
  79. package/dist/recognizers/iban.d.ts.map +1 -0
  80. package/dist/recognizers/iban.js +67 -0
  81. package/dist/recognizers/iban.js.map +1 -0
  82. package/dist/recognizers/index.d.ts +20 -0
  83. package/dist/recognizers/index.d.ts.map +1 -0
  84. package/dist/recognizers/index.js +42 -0
  85. package/dist/recognizers/index.js.map +1 -0
  86. package/dist/recognizers/ip-address.d.ts +14 -0
  87. package/dist/recognizers/ip-address.d.ts.map +1 -0
  88. package/dist/recognizers/ip-address.js +183 -0
  89. package/dist/recognizers/ip-address.js.map +1 -0
  90. package/dist/recognizers/phone.d.ts +10 -0
  91. package/dist/recognizers/phone.d.ts.map +1 -0
  92. package/dist/recognizers/phone.js +145 -0
  93. package/dist/recognizers/phone.js.map +1 -0
  94. package/dist/recognizers/registry.d.ts +59 -0
  95. package/dist/recognizers/registry.d.ts.map +1 -0
  96. package/dist/recognizers/registry.js +113 -0
  97. package/dist/recognizers/registry.js.map +1 -0
  98. package/dist/recognizers/url.d.ts +14 -0
  99. package/dist/recognizers/url.d.ts.map +1 -0
  100. package/dist/recognizers/url.js +121 -0
  101. package/dist/recognizers/url.js.map +1 -0
  102. package/dist/types/index.d.ts +134 -0
  103. package/dist/types/index.d.ts.map +1 -0
  104. package/dist/types/index.js +69 -0
  105. package/dist/types/index.js.map +1 -0
  106. package/dist/types/pii-types.d.ts +50 -0
  107. package/dist/types/pii-types.d.ts.map +1 -0
  108. package/dist/types/pii-types.js +114 -0
  109. package/dist/types/pii-types.js.map +1 -0
  110. package/dist/utils/iban-checksum.d.ts +23 -0
  111. package/dist/utils/iban-checksum.d.ts.map +1 -0
  112. package/dist/utils/iban-checksum.js +106 -0
  113. package/dist/utils/iban-checksum.js.map +1 -0
  114. package/dist/utils/index.d.ts +8 -0
  115. package/dist/utils/index.d.ts.map +1 -0
  116. package/dist/utils/index.js +8 -0
  117. package/dist/utils/index.js.map +1 -0
  118. package/dist/utils/luhn.d.ts +17 -0
  119. package/dist/utils/luhn.d.ts.map +1 -0
  120. package/dist/utils/luhn.js +55 -0
  121. package/dist/utils/luhn.js.map +1 -0
  122. package/dist/utils/offsets.d.ts +86 -0
  123. package/dist/utils/offsets.d.ts.map +1 -0
  124. package/dist/utils/offsets.js +124 -0
  125. package/dist/utils/offsets.js.map +1 -0
  126. package/package.json +62 -0
@@ -0,0 +1,216 @@
1
+ /**
2
+ * BIO Tag Decoder
3
+ * Converts BIO-tagged token sequences to entity spans
4
+ */
5
+ import { PIIType, DetectionSource } from '../types/index.js';
6
+ import { getPIITypeFromNERLabel } from '../types/pii-types.js';
7
+ /**
8
+ * BIO tag types
9
+ */
10
+ export var BIOTag;
11
+ (function (BIOTag) {
12
+ /** Beginning of an entity */
13
+ BIOTag["B"] = "B";
14
+ /** Inside an entity (continuation) */
15
+ BIOTag["I"] = "I";
16
+ /** Outside any entity */
17
+ BIOTag["O"] = "O";
18
+ })(BIOTag || (BIOTag = {}));
19
+ /**
20
+ * Parses a BIO label string (e.g., "B-PER", "I-ORG", "O")
21
+ */
22
+ export function parseBIOLabel(label) {
23
+ if (label === 'O' || label === '[PAD]' || label === '[CLS]' || label === '[SEP]') {
24
+ return { tag: BIOTag.O, entityType: null };
25
+ }
26
+ const parts = label.split('-');
27
+ if (parts.length !== 2) {
28
+ return { tag: BIOTag.O, entityType: null };
29
+ }
30
+ const [tagStr, entityType] = parts;
31
+ let tag;
32
+ switch (tagStr?.toUpperCase()) {
33
+ case 'B':
34
+ tag = BIOTag.B;
35
+ break;
36
+ case 'I':
37
+ tag = BIOTag.I;
38
+ break;
39
+ default:
40
+ return { tag: BIOTag.O, entityType: null };
41
+ }
42
+ return { tag, entityType: entityType ?? null };
43
+ }
44
+ /**
45
+ * Decodes BIO-tagged tokens into entity spans
46
+ */
47
+ export function decodeBIOTags(tokens, labels, confidences, originalText) {
48
+ const entities = [];
49
+ let currentEntity = null;
50
+ for (let i = 0; i < tokens.length; i++) {
51
+ const token = tokens[i];
52
+ const label = labels[i] ?? 'O';
53
+ const confidence = confidences[i] ?? 0;
54
+ // Skip special tokens
55
+ if (token.isSpecial) {
56
+ // If we have a current entity, close it
57
+ if (currentEntity !== null) {
58
+ entities.push(currentEntity);
59
+ currentEntity = null;
60
+ }
61
+ continue;
62
+ }
63
+ const { tag, entityType } = parseBIOLabel(label);
64
+ switch (tag) {
65
+ case BIOTag.B:
66
+ // Start of new entity
67
+ // Close previous entity if exists
68
+ if (currentEntity !== null) {
69
+ entities.push(currentEntity);
70
+ }
71
+ currentEntity = {
72
+ type: entityType ?? 'UNKNOWN',
73
+ start: token.start,
74
+ end: token.end,
75
+ confidence,
76
+ text: originalText.slice(token.start, token.end),
77
+ tokenIndices: [i],
78
+ };
79
+ break;
80
+ case BIOTag.I:
81
+ // Continuation of entity
82
+ if (currentEntity !== null && entityType === currentEntity.type) {
83
+ // Extend current entity
84
+ currentEntity.end = token.end;
85
+ currentEntity.text = originalText.slice(currentEntity.start, currentEntity.end);
86
+ currentEntity.tokenIndices.push(i);
87
+ // Average confidence
88
+ currentEntity.confidence =
89
+ (currentEntity.confidence * (currentEntity.tokenIndices.length - 1) + confidence) /
90
+ currentEntity.tokenIndices.length;
91
+ }
92
+ else {
93
+ // I tag without matching B tag - treat as new entity (common in some models)
94
+ if (currentEntity !== null) {
95
+ entities.push(currentEntity);
96
+ }
97
+ currentEntity = {
98
+ type: entityType ?? 'UNKNOWN',
99
+ start: token.start,
100
+ end: token.end,
101
+ confidence,
102
+ text: originalText.slice(token.start, token.end),
103
+ tokenIndices: [i],
104
+ };
105
+ }
106
+ break;
107
+ case BIOTag.O:
108
+ // Outside entity - close current if exists
109
+ if (currentEntity !== null) {
110
+ entities.push(currentEntity);
111
+ currentEntity = null;
112
+ }
113
+ break;
114
+ }
115
+ }
116
+ // Don't forget to close the last entity
117
+ if (currentEntity !== null) {
118
+ entities.push(currentEntity);
119
+ }
120
+ return entities;
121
+ }
122
+ /**
123
+ * Converts raw NER entities to SpanMatch format
124
+ */
125
+ export function convertToSpanMatches(rawEntities, confidenceThreshold = 0.5) {
126
+ const spans = [];
127
+ for (const entity of rawEntities) {
128
+ // Filter by confidence
129
+ if (entity.confidence < confidenceThreshold) {
130
+ continue;
131
+ }
132
+ // Map entity type to PIIType
133
+ const piiType = getPIITypeFromNERLabel(entity.type);
134
+ if (piiType === null) {
135
+ continue; // Skip unknown types
136
+ }
137
+ spans.push({
138
+ type: piiType,
139
+ start: entity.start,
140
+ end: entity.end,
141
+ confidence: entity.confidence,
142
+ source: DetectionSource.NER,
143
+ text: entity.text,
144
+ });
145
+ }
146
+ return spans;
147
+ }
148
+ /**
149
+ * Post-processes NER spans to clean up boundaries
150
+ */
151
+ export function cleanupSpanBoundaries(spans, originalText) {
152
+ return spans.map((span) => {
153
+ let { start, end } = span;
154
+ // Trim leading whitespace
155
+ while (start < end && /\s/.test(originalText[start] ?? '')) {
156
+ start++;
157
+ }
158
+ // Trim trailing whitespace
159
+ while (end > start && /\s/.test(originalText[end - 1] ?? '')) {
160
+ end--;
161
+ }
162
+ // Trim leading/trailing punctuation for PERSON/ORG types
163
+ if (span.type === PIIType.PERSON || span.type === PIIType.ORG) {
164
+ while (start < end && /[.,;:!?'"()]/.test(originalText[start] ?? '')) {
165
+ start++;
166
+ }
167
+ while (end > start && /[.,;:!?'"()]/.test(originalText[end - 1] ?? '')) {
168
+ end--;
169
+ }
170
+ }
171
+ // If span became empty, return original
172
+ if (start >= end) {
173
+ return span;
174
+ }
175
+ return {
176
+ ...span,
177
+ start,
178
+ end,
179
+ text: originalText.slice(start, end),
180
+ };
181
+ });
182
+ }
183
+ /**
184
+ * Merges adjacent spans of the same type
185
+ */
186
+ export function mergeAdjacentSpans(spans, originalText, maxGap = 1) {
187
+ if (spans.length <= 1)
188
+ return spans;
189
+ // Sort by start position
190
+ const sorted = [...spans].sort((a, b) => a.start - b.start);
191
+ const merged = [];
192
+ let current = sorted[0];
193
+ for (let i = 1; i < sorted.length; i++) {
194
+ const next = sorted[i];
195
+ // Check if same type and close enough
196
+ const gap = next.start - current.end;
197
+ const gapText = originalText.slice(current.end, next.start);
198
+ const isOnlyWhitespace = /^\s*$/.test(gapText);
199
+ if (next.type === current.type && gap <= maxGap && isOnlyWhitespace) {
200
+ // Merge spans
201
+ current = {
202
+ ...current,
203
+ end: next.end,
204
+ text: originalText.slice(current.start, next.end),
205
+ confidence: (current.confidence + next.confidence) / 2,
206
+ };
207
+ }
208
+ else {
209
+ merged.push(current);
210
+ current = next;
211
+ }
212
+ }
213
+ merged.push(current);
214
+ return merged;
215
+ }
216
+ //# sourceMappingURL=bio-decoder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"bio-decoder.js","sourceRoot":"","sources":["../../src/ner/bio-decoder.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,OAAO,EAAa,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACxE,OAAO,EAAE,sBAAsB,EAAE,MAAM,uBAAuB,CAAC;AAG/D;;GAEG;AACH,MAAM,CAAN,IAAY,MAOX;AAPD,WAAY,MAAM;IAChB,6BAA6B;IAC7B,iBAAO,CAAA;IACP,sCAAsC;IACtC,iBAAO,CAAA;IACP,yBAAyB;IACzB,iBAAO,CAAA;AACT,CAAC,EAPW,MAAM,KAAN,MAAM,QAOjB;AA8BD;;GAEG;AACH,MAAM,UAAU,aAAa,CAAC,KAAa;IACzC,IAAI,KAAK,KAAK,GAAG,IAAI,KAAK,KAAK,OAAO,IAAI,KAAK,KAAK,OAAO,IAAI,KAAK,KAAK,OAAO,EAAE,CAAC;QACjF,OAAO,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC/B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAC7C,CAAC;IAED,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,GAAG,KAAK,CAAC;IAEnC,IAAI,GAAW,CAAC;IAChB,QAAQ,MAAM,EAAE,WAAW,EAAE,EAAE,CAAC;QAC9B,KAAK,GAAG;YACN,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC;YACf,MAAM;QACR,KAAK,GAAG;YACN,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC;YACf,MAAM;QACR;YACE,OAAO,EAAE,GAAG,EAAE,MAAM,CAAC,CAAC,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;IAC/C,CAAC;IAED,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,UAAU,IAAI,IAAI,EAAE,CAAC;AACjD,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAC3B,MAAe,EACf,MAAgB,EAChB,WAAqB,EACrB,YAAoB;IAEpB,MAAM,QAAQ,GAAmB,EAAE,CAAC;IACpC,IAAI,aAAa,GAAwB,IAAI,CAAC;IAE9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QACzB,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;QAC/B,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAEvC,sBAAsB;QACtB,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;YACpB,wCAAwC;YACxC,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;gBAC3B,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAC7B,aAAa,GAAG,IAAI,CAAC;YACvB,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,EAAE,GAAG,EAAE,UAAU,EAAE,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;QAEjD,QAAQ,GAAG,EAAE,CAAC;YACZ,KAAK,MAAM,CAAC,CAAC;gBACX,sBAAsB;gBACtB,kCAAkC;gBAClC,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;oBAC3B,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;gBAC/B,CAAC;gBAED,aAAa,GAAG;oBACd,IAAI,EAAE,UAAU,IAAI,SAAS;oBAC7B,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,EAAE,KAAK,CAAC,GAAG;oBACd,UAAU;oBACV,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC;oBAChD,YAAY,EAAE,CAAC,CAAC,CAAC;iBAClB,CAAC;gBACF,MAAM;YAER,KAAK,MAAM,CAAC,CAAC;gBACX,yBAAyB;gBACzB,IAAI,aAAa,KAAK,IAAI,IAAI,UAAU,KAAK,aAAa,CAAC,IAAI,EAAE,CAAC;oBAChE,wBAAwB;oBACxB,aAAa,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC;oBAC9B,aAAa,CAAC,IAAI,GAAG,YAAY,CAAC,KAAK,CAAC,aAAa,CAAC,KAAK,EAAE,aAAa,CAAC,GAAG,CAAC,CAAC;oBAChF,aAAa,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBACnC,qBAAqB;oBACrB,aAAa,CAAC,UAAU;wBACtB,CAAC,aAAa,CAAC,UAAU,GAAG,CAAC,aAAa,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,UAAU,CAAC;4BACjF,aAAa,CAAC,YAAY,CAAC,MAAM,CAAC;gBACtC,CAAC;qBAAM,CAAC;oBACN,6EAA6E;oBAC7E,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;wBAC3B,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBAC/B,CAAC;oBACD,aAAa,GAAG;wBACd,IAAI,EAAE,UAAU,IAAI,SAAS;wBAC7B,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU;wBACV,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC;wBAChD,YAAY,EAAE,CAAC,CAAC,CAAC;qBAClB,CAAC;gBACJ,CAAC;gBACD,MAAM;YAER,KAAK,MAAM,CAAC,CAAC;gBACX,2CAA2C;gBAC3C,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;oBAC3B,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;oBAC7B,aAAa,GAAG,IAAI,CAAC;gBACvB,CAAC;gBACD,MAAM;QACV,CAAC;IACH,CAAC;IAED,wCAAwC;IACxC,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;QAC3B,QAAQ,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,WAA2B,EAC3B,sBAA8B,GAAG;IAEjC,MAAM,KAAK,GAAgB,EAAE,CAAC;IAE9B,KAAK,MAAM,MAAM,IAAI,WAAW,EAAE,CAAC;QACjC,uBAAuB;QACvB,IAAI,MAAM,CAAC,UAAU,GAAG,mBAAmB,EAAE,CAAC;YAC5C,SAAS;QACX,CAAC;QAED,6BAA6B;QAC7B,MAAM,OAAO,GAAG,sBAAsB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACpD,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,SAAS,CAAC,qBAAqB;QACjC,CAAC;QAED,KAAK,CAAC,IAAI,CAAC;YACT,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,MAAM,CAAC,KAAK;YACnB,GAAG,EAAE,MAAM,CAAC,GAAG;YACf,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,MAAM,EAAE,eAAe,CAAC,GAAG;YAC3B,IAAI,EAAE,MAAM,CAAC,IAAI;SAClB,CAAC,CAAC;IACL,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACnC,KAAkB,EAClB,YAAoB;IAEpB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC;QAE1B,0BAA0B;QAC1B,OAAO,KAAK,GAAG,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YAC3D,KAAK,EAAE,CAAC;QACV,CAAC;QAED,2BAA2B;QAC3B,OAAO,GAAG,GAAG,KAAK,IAAI,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;YAC7D,GAAG,EAAE,CAAC;QACR,CAAC;QAED,yDAAyD;QACzD,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,GAAG,EAAE,CAAC;YAC9D,OAAO,KAAK,GAAG,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;gBACrE,KAAK,EAAE,CAAC;YACV,CAAC;YACD,OAAO,GAAG,GAAG,KAAK,IAAI,cAAc,CAAC,IAAI,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;gBACvE,GAAG,EAAE,CAAC;YACR,CAAC;QACH,CAAC;QAED,wCAAwC;QACxC,IAAI,KAAK,IAAI,GAAG,EAAE,CAAC;YACjB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,OAAO;YACL,GAAG,IAAI;YACP,KAAK;YACL,GAAG;YACH,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC;SACrC,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,KAAkB,EAClB,YAAoB,EACpB,SAAiB,CAAC;IAElB,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,KAAK,CAAC;IAEpC,yBAAyB;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAgB,EAAE,CAAC;IAE/B,IAAI,OAAO,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;IAEzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC;QAExB,sCAAsC;QACtC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC;QACrC,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC5D,MAAM,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAE/C,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO,CAAC,IAAI,IAAI,GAAG,IAAI,MAAM,IAAI,gBAAgB,EAAE,CAAC;YACpE,cAAc;YACd,OAAO,GAAG;gBACR,GAAG,OAAO;gBACV,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,IAAI,EAAE,YAAY,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC;gBACjD,UAAU,EAAE,CAAC,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;aACvD,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;IACH,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACrB,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * NER Module
3
+ * Exports NER model and tokenizer components
4
+ */
5
+ export * from './tokenizer.js';
6
+ export * from './bio-decoder.js';
7
+ export * from './ner-model.js';
8
+ export { loadRuntime, detectRuntime, getRuntimeType } from './onnx-runtime.js';
9
+ export { type NERModelMode, type ModelInfo, type ModelFileInfo, type DownloadProgressCallback, MODEL_REGISTRY, getModelCacheDir, getModelPath, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, getModelInfo, } from './model-manager.js';
10
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/ner/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,gBAAgB,CAAC;AAC/B,cAAc,kBAAkB,CAAC;AACjC,cAAc,gBAAgB,CAAC;AAC/B,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,EACL,KAAK,YAAY,EACjB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,wBAAwB,EAC7B,cAAc,EACd,gBAAgB,EAChB,YAAY,EACZ,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,EACpB,YAAY,GACb,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,10 @@
1
+ /**
2
+ * NER Module
3
+ * Exports NER model and tokenizer components
4
+ */
5
+ export * from './tokenizer.js';
6
+ export * from './bio-decoder.js';
7
+ export * from './ner-model.js';
8
+ export { loadRuntime, detectRuntime, getRuntimeType } from './onnx-runtime.js';
9
+ export { MODEL_REGISTRY, getModelCacheDir, getModelPath, isModelDownloaded, downloadModel, ensureModel, clearModelCache, listDownloadedModels, getModelInfo, } from './model-manager.js';
10
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/ner/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,cAAc,gBAAgB,CAAC;AAC/B,cAAc,kBAAkB,CAAC;AACjC,cAAc,gBAAgB,CAAC;AAC/B,OAAO,EAAE,WAAW,EAAE,aAAa,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAC/E,OAAO,EAKL,cAAc,EACd,gBAAgB,EAChB,YAAY,EACZ,iBAAiB,EACjB,aAAa,EACb,WAAW,EACX,eAAe,EACf,oBAAoB,EACpB,YAAY,GACb,MAAM,oBAAoB,CAAC"}
@@ -0,0 +1,102 @@
1
+ /**
2
+ * NER Model Manager
3
+ * Handles automatic downloading and caching of NER models from Hugging Face Hub
4
+ */
5
+ /**
6
+ * Available NER model variants
7
+ */
8
+ export type NERModelMode = 'standard' | 'quantized' | 'disabled' | 'custom';
9
+ /**
10
+ * Model file info
11
+ */
12
+ export interface ModelFileInfo {
13
+ /** Filename in the repo */
14
+ repoFile: string;
15
+ /** Local filename */
16
+ localFile: string;
17
+ /** Whether file is required */
18
+ required: boolean;
19
+ }
20
+ /**
21
+ * Model registry entry
22
+ */
23
+ export interface ModelInfo {
24
+ /** Model identifier */
25
+ id: string;
26
+ /** Human-readable name */
27
+ name: string;
28
+ /** Description */
29
+ description: string;
30
+ /** Approximate size */
31
+ size: string;
32
+ /** Hugging Face repo ID */
33
+ hfRepo: string;
34
+ /** Subfolder in repo (for models with multiple variants) */
35
+ hfSubfolder?: string;
36
+ /** Files to download */
37
+ files: ModelFileInfo[];
38
+ /** Label map for this model */
39
+ labelMap: string[];
40
+ }
41
+ /**
42
+ * Registry of available models hosted on Hugging Face Hub
43
+ *
44
+ * Using Xenova's ONNX exports which are optimized for JS/ONNX runtime
45
+ * https://huggingface.co/Xenova
46
+ */
47
+ export declare const MODEL_REGISTRY: Record<'standard' | 'quantized', ModelInfo>;
48
+ /**
49
+ * Gets the cache directory for models
50
+ * Uses platform-specific cache location
51
+ */
52
+ export declare function getModelCacheDir(): string;
53
+ /**
54
+ * Gets the path to a specific model variant
55
+ */
56
+ export declare function getModelPath(mode: 'standard' | 'quantized'): string;
57
+ /**
58
+ * Checks if a model is already downloaded
59
+ */
60
+ export declare function isModelDownloaded(mode: 'standard' | 'quantized'): Promise<boolean>;
61
+ /**
62
+ * Progress callback for downloads
63
+ */
64
+ export type DownloadProgressCallback = (progress: {
65
+ file: string;
66
+ bytesDownloaded: number;
67
+ totalBytes: number | null;
68
+ percent: number | null;
69
+ }) => void;
70
+ /**
71
+ * Downloads a model variant from Hugging Face Hub
72
+ */
73
+ export declare function downloadModel(mode: 'standard' | 'quantized', onProgress?: DownloadProgressCallback, onStatus?: (status: string) => void): Promise<string>;
74
+ /**
75
+ * Gets model paths if available, or downloads if needed
76
+ */
77
+ export declare function ensureModel(mode: 'standard' | 'quantized', options?: {
78
+ autoDownload?: boolean;
79
+ onProgress?: DownloadProgressCallback;
80
+ onStatus?: (status: string) => void;
81
+ }): Promise<{
82
+ modelPath: string;
83
+ vocabPath: string;
84
+ labelMapPath: string;
85
+ }>;
86
+ /**
87
+ * Clears cached models
88
+ */
89
+ export declare function clearModelCache(mode?: 'standard' | 'quantized'): Promise<void>;
90
+ /**
91
+ * Lists downloaded models
92
+ */
93
+ export declare function listDownloadedModels(): Promise<Array<{
94
+ mode: 'standard' | 'quantized';
95
+ path: string;
96
+ size: string;
97
+ }>>;
98
+ /**
99
+ * Gets info about available models
100
+ */
101
+ export declare function getModelInfo(mode: 'standard' | 'quantized'): ModelInfo;
102
+ //# sourceMappingURL=model-manager.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-manager.d.ts","sourceRoot":"","sources":["../../src/ner/model-manager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH;;GAEG;AACH,MAAM,MAAM,YAAY,GAAG,UAAU,GAAG,WAAW,GAAG,UAAU,GAAG,QAAQ,CAAC;AAE5E;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,2BAA2B;IAC3B,QAAQ,EAAE,MAAM,CAAC;IACjB,qBAAqB;IACrB,SAAS,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,uBAAuB;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,0BAA0B;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,kBAAkB;IAClB,WAAW,EAAE,MAAM,CAAC;IACpB,uBAAuB;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,2BAA2B;IAC3B,MAAM,EAAE,MAAM,CAAC;IACf,4DAA4D;IAC5D,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,wBAAwB;IACxB,KAAK,EAAE,aAAa,EAAE,CAAC;IACvB,+BAA+B;IAC/B,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED;;;;;GAKG;AACH,eAAO,MAAM,cAAc,EAAE,MAAM,CAAC,UAAU,GAAG,WAAW,EAAE,SAAS,CA0BtE,CAAC;AAYF;;;GAGG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAYzC;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,UAAU,GAAG,WAAW,GAAG,MAAM,CAEnE;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CAAC,IAAI,EAAE,UAAU,GAAG,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBxF;AAED;;GAEG;AACH,MAAM,MAAM,wBAAwB,GAAG,CAAC,QAAQ,EAAE;IAChD,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,MAAM,CAAC;IACxB,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,KAAK,IAAI,CAAC;AAuEX;;GAEG;AACH,wBAAsB,aAAa,CACjC,IAAI,EAAE,UAAU,GAAG,WAAW,EAC9B,UAAU,CAAC,EAAE,wBAAwB,EACrC,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,GAClC,OAAO,CAAC,MAAM,CAAC,CAiDjB;AAED;;GAEG;AACH,wBAAsB,WAAW,CAC/B,IAAI,EAAE,UAAU,GAAG,WAAW,EAC9B,OAAO,GAAE;IACP,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,UAAU,CAAC,EAAE,wBAAwB,CAAC;IACtC,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,IAAI,CAAC;CAChC,GACL,OAAO,CAAC;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,SAAS,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC,CAiCzE;AAED;;GAEG;AACH,wBAAsB,eAAe,CAAC,IAAI,CAAC,EAAE,UAAU,GAAG,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC,CAQpF;AAED;;GAEG;AACH,wBAAsB,oBAAoB,IAAI,OAAO,CAAC,KAAK,CAAC;IAAE,IAAI,EAAE,UAAU,GAAG,WAAW,CAAC;IAAC,IAAI,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,CAAC,CAAC,CAY3H;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,UAAU,GAAG,WAAW,GAAG,SAAS,CAEtE"}
@@ -0,0 +1,253 @@
1
+ /**
2
+ * NER Model Manager
3
+ * Handles automatic downloading and caching of NER models from Hugging Face Hub
4
+ */
5
+ import * as fs from 'fs/promises';
6
+ import * as path from 'path';
7
+ import * as os from 'os';
8
+ /**
9
+ * Registry of available models hosted on Hugging Face Hub
10
+ *
11
+ * Using Xenova's ONNX exports which are optimized for JS/ONNX runtime
12
+ * https://huggingface.co/Xenova
13
+ */
14
+ export const MODEL_REGISTRY = {
15
+ standard: {
16
+ id: 'xlm-roberta-ner-standard',
17
+ name: 'XLM-RoBERTa NER (Standard)',
18
+ description: 'Multilingual NER model supporting EN, DE, FR, ES, and more',
19
+ size: '~1.1 GB',
20
+ hfRepo: 'Xenova/xlm-roberta-base-ner-hrl',
21
+ hfSubfolder: 'onnx',
22
+ files: [
23
+ { repoFile: 'model.onnx', localFile: 'model.onnx', required: true },
24
+ { repoFile: 'model.onnx_data', localFile: 'model.onnx_data', required: false },
25
+ ],
26
+ labelMap: ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-DATE', 'I-DATE'],
27
+ },
28
+ quantized: {
29
+ id: 'xlm-roberta-ner-quantized',
30
+ name: 'XLM-RoBERTa NER (Quantized)',
31
+ description: 'Quantized version, ~4x smaller with minimal accuracy loss',
32
+ size: '~280 MB',
33
+ hfRepo: 'Xenova/xlm-roberta-base-ner-hrl',
34
+ hfSubfolder: 'onnx',
35
+ files: [
36
+ { repoFile: 'model_quantized.onnx', localFile: 'model.onnx', required: true },
37
+ ],
38
+ labelMap: ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC', 'B-DATE', 'I-DATE'],
39
+ },
40
+ };
41
+ /**
42
+ * Shared tokenizer files (same for both variants)
43
+ */
44
+ const TOKENIZER_FILES = [
45
+ { repoFile: 'tokenizer.json', localFile: 'tokenizer.json', required: true },
46
+ { repoFile: 'tokenizer_config.json', localFile: 'tokenizer_config.json', required: false },
47
+ { repoFile: 'special_tokens_map.json', localFile: 'special_tokens_map.json', required: false },
48
+ { repoFile: 'config.json', localFile: 'config.json', required: false },
49
+ ];
50
+ /**
51
+ * Gets the cache directory for models
52
+ * Uses platform-specific cache location
53
+ */
54
+ export function getModelCacheDir() {
55
+ const homeDir = os.homedir();
56
+ switch (process.platform) {
57
+ case 'darwin':
58
+ return path.join(homeDir, 'Library', 'Caches', 'bridge-anonymization', 'models');
59
+ case 'win32':
60
+ return path.join(process.env['LOCALAPPDATA'] ?? path.join(homeDir, 'AppData', 'Local'), 'bridge-anonymization', 'models');
61
+ default:
62
+ // Linux and others - use XDG_CACHE_HOME or ~/.cache
63
+ return path.join(process.env['XDG_CACHE_HOME'] ?? path.join(homeDir, '.cache'), 'bridge-anonymization', 'models');
64
+ }
65
+ }
66
+ /**
67
+ * Gets the path to a specific model variant
68
+ */
69
+ export function getModelPath(mode) {
70
+ return path.join(getModelCacheDir(), mode);
71
+ }
72
+ /**
73
+ * Checks if a model is already downloaded
74
+ */
75
+ export async function isModelDownloaded(mode) {
76
+ const modelDir = getModelPath(mode);
77
+ const info = MODEL_REGISTRY[mode];
78
+ try {
79
+ // Check if model file exists
80
+ const modelFile = info.files.find(f => f.required && f.localFile.includes('model'));
81
+ if (modelFile) {
82
+ await fs.access(path.join(modelDir, modelFile.localFile));
83
+ }
84
+ // Check if tokenizer exists
85
+ await fs.access(path.join(modelDir, 'tokenizer.json'));
86
+ return true;
87
+ }
88
+ catch {
89
+ return false;
90
+ }
91
+ }
92
+ /**
93
+ * Builds a Hugging Face Hub download URL
94
+ */
95
+ function getHuggingFaceUrl(repo, filename, subfolder) {
96
+ const filePath = subfolder ? `${subfolder}/${filename}` : filename;
97
+ return `https://huggingface.co/${repo}/resolve/main/${filePath}`;
98
+ }
99
+ /**
100
+ * Downloads a file from URL to local path with progress
101
+ */
102
+ async function downloadFile(url, destPath, onProgress) {
103
+ const response = await fetch(url, {
104
+ headers: {
105
+ 'User-Agent': 'bridge-anonymization/1.0.0',
106
+ },
107
+ });
108
+ if (!response.ok) {
109
+ if (response.status === 404) {
110
+ throw new Error(`File not found: ${url}`);
111
+ }
112
+ throw new Error(`Failed to download ${url}: ${response.status} ${response.statusText}`);
113
+ }
114
+ const totalBytes = response.headers.get('content-length');
115
+ const total = totalBytes ? parseInt(totalBytes, 10) : null;
116
+ // Ensure directory exists
117
+ await fs.mkdir(path.dirname(destPath), { recursive: true });
118
+ const fileName = path.basename(destPath);
119
+ // For Node.js, we need to handle the stream differently
120
+ const reader = response.body?.getReader();
121
+ if (!reader) {
122
+ throw new Error('Response body is not readable');
123
+ }
124
+ const chunks = [];
125
+ let bytesDownloaded = 0;
126
+ while (true) {
127
+ const { done, value } = await reader.read();
128
+ if (done)
129
+ break;
130
+ chunks.push(value);
131
+ bytesDownloaded += value.length;
132
+ if (onProgress) {
133
+ onProgress({
134
+ file: fileName,
135
+ bytesDownloaded,
136
+ totalBytes: total,
137
+ percent: total ? Math.round((bytesDownloaded / total) * 100) : null,
138
+ });
139
+ }
140
+ }
141
+ // Write all chunks to file
142
+ const buffer = Buffer.concat(chunks);
143
+ await fs.writeFile(destPath, buffer);
144
+ }
145
+ /**
146
+ * Downloads a model variant from Hugging Face Hub
147
+ */
148
+ export async function downloadModel(mode, onProgress, onStatus) {
149
+ const info = MODEL_REGISTRY[mode];
150
+ const modelDir = getModelPath(mode);
151
+ // Create directory
152
+ await fs.mkdir(modelDir, { recursive: true });
153
+ onStatus?.(`Downloading ${info.name} from Hugging Face Hub...`);
154
+ onStatus?.(`Repository: ${info.hfRepo}`);
155
+ // Download model files
156
+ for (const file of info.files) {
157
+ const url = getHuggingFaceUrl(info.hfRepo, file.repoFile, info.hfSubfolder);
158
+ const destPath = path.join(modelDir, file.localFile);
159
+ onStatus?.(`Downloading ${file.repoFile}...`);
160
+ try {
161
+ await downloadFile(url, destPath, onProgress);
162
+ }
163
+ catch (e) {
164
+ if (file.required) {
165
+ throw new Error(`Failed to download required file ${file.repoFile}: ${e}`);
166
+ }
167
+ // Optional files can fail silently
168
+ onStatus?.(`Skipping optional file ${file.repoFile}`);
169
+ }
170
+ }
171
+ // Download tokenizer files (from repo root, not subfolder)
172
+ for (const file of TOKENIZER_FILES) {
173
+ const url = getHuggingFaceUrl(info.hfRepo, file.repoFile);
174
+ const destPath = path.join(modelDir, file.localFile);
175
+ try {
176
+ await downloadFile(url, destPath, onProgress);
177
+ }
178
+ catch (e) {
179
+ if (file.required) {
180
+ throw new Error(`Failed to download required file ${file.repoFile}: ${e}`);
181
+ }
182
+ }
183
+ }
184
+ // Write label map
185
+ const labelMapPath = path.join(modelDir, 'label_map.json');
186
+ await fs.writeFile(labelMapPath, JSON.stringify(info.labelMap, null, 2));
187
+ onStatus?.('Download complete!');
188
+ return modelDir;
189
+ }
190
+ /**
191
+ * Gets model paths if available, or downloads if needed
192
+ */
193
+ export async function ensureModel(mode, options = {}) {
194
+ const { autoDownload = true, onProgress, onStatus } = options;
195
+ const modelDir = getModelPath(mode);
196
+ const info = MODEL_REGISTRY[mode];
197
+ // Check if already downloaded
198
+ const isDownloaded = await isModelDownloaded(mode);
199
+ if (!isDownloaded) {
200
+ if (!autoDownload) {
201
+ throw new Error(`NER model '${mode}' not found at ${modelDir}.\n\n` +
202
+ `To download automatically, use:\n` +
203
+ ` createAnonymizer({ ner: { mode: '${mode}', autoDownload: true } })\n\n` +
204
+ `Or use regex-only mode:\n` +
205
+ ` createAnonymizer({ ner: { mode: 'disabled' } })`);
206
+ }
207
+ await downloadModel(mode, onProgress, onStatus);
208
+ }
209
+ else {
210
+ onStatus?.(`Using cached model: ${info.name}`);
211
+ }
212
+ // Find model file
213
+ const modelFile = info.files.find(f => f.localFile === 'model.onnx');
214
+ return {
215
+ modelPath: path.join(modelDir, modelFile?.localFile ?? 'model.onnx'),
216
+ vocabPath: path.join(modelDir, 'tokenizer.json'),
217
+ labelMapPath: path.join(modelDir, 'label_map.json'),
218
+ };
219
+ }
220
+ /**
221
+ * Clears cached models
222
+ */
223
+ export async function clearModelCache(mode) {
224
+ if (mode) {
225
+ const modelDir = getModelPath(mode);
226
+ await fs.rm(modelDir, { recursive: true, force: true });
227
+ }
228
+ else {
229
+ const cacheDir = getModelCacheDir();
230
+ await fs.rm(cacheDir, { recursive: true, force: true });
231
+ }
232
+ }
233
+ /**
234
+ * Lists downloaded models
235
+ */
236
+ export async function listDownloadedModels() {
237
+ const models = [];
238
+ for (const mode of ['standard', 'quantized']) {
239
+ if (await isModelDownloaded(mode)) {
240
+ const modelPath = getModelPath(mode);
241
+ const info = MODEL_REGISTRY[mode];
242
+ models.push({ mode, path: modelPath, size: info.size });
243
+ }
244
+ }
245
+ return models;
246
+ }
247
+ /**
248
+ * Gets info about available models
249
+ */
250
+ export function getModelInfo(mode) {
251
+ return MODEL_REGISTRY[mode];
252
+ }
253
+ //# sourceMappingURL=model-manager.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"model-manager.js","sourceRoot":"","sources":["../../src/ner/model-manager.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,MAAM,aAAa,CAAC;AAClC,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AA0CzB;;;;;GAKG;AACH,MAAM,CAAC,MAAM,cAAc,GAAgD;IACzE,QAAQ,EAAE;QACR,EAAE,EAAE,0BAA0B;QAC9B,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,4DAA4D;QACzE,IAAI,EAAE,SAAS;QACf,MAAM,EAAE,iCAAiC;QACzC,WAAW,EAAE,MAAM;QACnB,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,YAAY,EAAE,SAAS,EAAE,YAAY,EAAE,QAAQ,EAAE,IAAI,EAAE;YACnE,EAAE,QAAQ,EAAE,iBAAiB,EAAE,SAAS,EAAE,iBAAiB,EAAE,QAAQ,EAAE,KAAK,EAAE;SAC/E;QACD,QAAQ,EAAE,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,CAAC;KAC1F;IACD,SAAS,EAAE;QACT,EAAE,EAAE,2BAA2B;QAC/B,IAAI,EAAE,6BAA6B;QACnC,WAAW,EAAE,2DAA2D;QACxE,IAAI,EAAE,SAAS;QACf,MAAM,EAAE,iCAAiC;QACzC,WAAW,EAAE,MAAM;QACnB,KAAK,EAAE;YACL,EAAE,QAAQ,EAAE,sBAAsB,EAAE,SAAS,EAAE,YAAY,EAAE,QAAQ,EAAE,IAAI,EAAE;SAC9E;QACD,QAAQ,EAAE,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,QAAQ,CAAC;KAC1F;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,eAAe,GAAoB;IACvC,EAAE,QAAQ,EAAE,gBAAgB,EAAE,SAAS,EAAE,gBAAgB,EAAE,QAAQ,EAAE,IAAI,EAAE;IAC3E,EAAE,QAAQ,EAAE,uBAAuB,EAAE,SAAS,EAAE,uBAAuB,EAAE,QAAQ,EAAE,KAAK,EAAE;IAC1F,EAAE,QAAQ,EAAE,yBAAyB,EAAE,SAAS,EAAE,yBAAyB,EAAE,QAAQ,EAAE,KAAK,EAAE;IAC9F,EAAE,QAAQ,EAAE,aAAa,EAAE,SAAS,EAAE,aAAa,EAAE,QAAQ,EAAE,KAAK,EAAE;CACvE,CAAC;AAEF;;;GAGG;AACH,MAAM,UAAU,gBAAgB;IAC9B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,EAAE,CAAC;IAE7B,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;QACzB,KAAK,QAAQ;YACX,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,sBAAsB,EAAE,QAAQ,CAAC,CAAC;QACnF,KAAK,OAAO;YACV,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,CAAC,EAAE,sBAAsB,EAAE,QAAQ,CAAC,CAAC;QAC5H;YACE,oDAAoD;YACpD,OAAO,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,EAAE,sBAAsB,EAAE,QAAQ,CAAC,CAAC;IACtH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAA8B;IACzD,OAAO,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,EAAE,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,IAA8B;IACpE,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAElC,IAAI,CAAC;QACH,6BAA6B;QAC7B,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACpF,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC,SAAS,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,4BAA4B;QAC5B,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAEvD,OAAO,IAAI,CAAC;IACd,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAYD;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,QAAgB,EAAE,SAAkB;IAC3E,MAAM,QAAQ,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,SAAS,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;IACnE,OAAO,0BAA0B,IAAI,iBAAiB,QAAQ,EAAE,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CACzB,GAAW,EACX,QAAgB,EAChB,UAAqC;IAErC,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;QAChC,OAAO,EAAE;YACP,YAAY,EAAE,4BAA4B;SAC3C;KACF,CAAC,CAAC;IAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACjB,IAAI,QAAQ,CAAC,MAAM,KAAK,GAAG,EAAE,CAAC;YAC5B,MAAM,IAAI,KAAK,CAAC,mBAAmB,GAAG,EAAE,CAAC,CAAC;QAC5C,CAAC;QACD,MAAM,IAAI,KAAK,CAAC,sBAAsB,GAAG,KAAK,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAC;IAC1F,CAAC;IAED,MAAM,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAE3D,0BAA0B;IAC1B,MAAM,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE5D,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAEzC,wDAAwD;IACxD,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,EAAE,CAAC;IAC1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;IACnD,CAAC;IAED,MAAM,MAAM,GAAiB,EAAE,CAAC;IAChC,IAAI,eAAe,GAAG,CAAC,CAAC;IAExB,OAAO,IAAI,EAAE,CAAC;QACZ,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;QAE5C,IAAI,IAAI;YAAE,MAAM;QAEhB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACnB,eAAe,IAAI,KAAK,CAAC,MAAM,CAAC;QAEhC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC;gBACT,IAAI,EAAE,QAAQ;gBACd,eAAe;gBACf,UAAU,EAAE,KAAK;gBACjB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,eAAe,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI;aACpE,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,2BAA2B;IAC3B,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;IACrC,MAAM,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,IAA8B,EAC9B,UAAqC,EACrC,QAAmC;IAEnC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IAEpC,mBAAmB;IACnB,MAAM,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE9C,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,IAAI,2BAA2B,CAAC,CAAC;IAChE,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;IAEzC,uBAAuB;IACvB,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;QAC5E,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAErD,QAAQ,EAAE,CAAC,eAAe,IAAI,CAAC,QAAQ,KAAK,CAAC,CAAC;QAE9C,IAAI,CAAC;YACH,MAAM,YAAY,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,oCAAoC,IAAI,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC,CAAC;YAC7E,CAAC;YACD,mCAAmC;YACnC,QAAQ,EAAE,CAAC,0BAA0B,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED,2DAA2D;IAC3D,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,iBAAiB,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QAErD,IAAI,CAAC;YACH,MAAM,YAAY,CAAC,GAAG,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC;QAChD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBAClB,MAAM,IAAI,KAAK,CAAC,oCAAoC,IAAI,CAAC,QAAQ,KAAK,CAAC,EAAE,CAAC,CAAC;YAC7E,CAAC;QACH,CAAC;IACH,CAAC;IAED,kBAAkB;IAClB,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC,CAAC;IAC3D,MAAM,EAAE,CAAC,SAAS,CAAC,YAAY,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAEzE,QAAQ,EAAE,CAAC,oBAAoB,CAAC,CAAC;IAEjC,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAA8B,EAC9B,UAII,EAAE;IAEN,MAAM,EAAE,YAAY,GAAG,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAE9D,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;IACpC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAElC,8BAA8B;IAC9B,MAAM,YAAY,GAAG,MAAM,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAEnD,IAAI,CAAC,YAAY,EAAE,CAAC;QAClB,IAAI,CAAC,YAAY,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CACb,cAAc,IAAI,kBAAkB,QAAQ,OAAO;gBACnD,mCAAmC;gBACnC,sCAAsC,IAAI,gCAAgC;gBAC1E,2BAA2B;gBAC3B,mDAAmD,CACpD,CAAC;QACJ,CAAC;QAED,MAAM,aAAa,CAAC,IAAI,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC;IAClD,CAAC;SAAM,CAAC;QACN,QAAQ,EAAE,CAAC,uBAAuB,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,kBAAkB;IAClB,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,YAAY,CAAC,CAAC;IAErE,OAAO;QACL,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,EAAE,SAAS,IAAI,YAAY,CAAC;QACpE,SAAS,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC;QAChD,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,gBAAgB,CAAC;KACpD,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAA+B;IACnE,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;QACpC,MAAM,EAAE,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;SAAM,CAAC;QACN,MAAM,QAAQ,GAAG,gBAAgB,EAAE,CAAC;QACpC,MAAM,EAAE,CAAC,EAAE,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;IAC1D,CAAC;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB;IACxC,MAAM,MAAM,GAA0E,EAAE,CAAC;IAEzF,KAAK,MAAM,IAAI,IAAI,CAAC,UAAU,EAAE,WAAW,CAAU,EAAE,CAAC;QACtD,IAAI,MAAM,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC;YAClC,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC,CAAC;YACrC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAA8B;IACzD,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC;AAC9B,CAAC"}