@mailwoman/neural 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/address-system.d.ts +35 -0
- package/out/address-system.d.ts.map +1 -0
- package/out/address-system.js +55 -0
- package/out/address-system.js.map +1 -0
- package/out/classifier.d.ts +39 -1
- package/out/classifier.d.ts.map +1 -1
- package/out/classifier.js +64 -61
- package/out/classifier.js.map +1 -1
- package/out/gazetteer-inference.d.ts.map +1 -1
- package/out/gazetteer-inference.js +17 -0
- package/out/gazetteer-inference.js.map +1 -1
- package/out/onnx-runner.d.ts +6 -0
- package/out/onnx-runner.d.ts.map +1 -1
- package/out/onnx-runner.js +4 -1
- package/out/onnx-runner.js.map +1 -1
- package/out/span-bridge.d.ts +33 -0
- package/out/span-bridge.d.ts.map +1 -0
- package/out/span-bridge.js +82 -0
- package/out/span-bridge.js.map +1 -0
- package/package.json +3 -3
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head
|
|
7
|
+
* never had). The PR3 self-conditioning head predicts which country an address belongs to from
|
|
8
|
+
* the pooled sequence; v1.1.0+ exports surface it as the `locale_logits` ONNX output. This module
|
|
9
|
+
* turns that posterior into a `SystemCode` the conventions layer can act on.
|
|
10
|
+
*
|
|
11
|
+
* Conservative by contract: below the confidence threshold, or for locales without a codex
|
|
12
|
+
* system slice, detection returns null and the parse proceeds exactly as before. The mask must
|
|
13
|
+
* never fire on a guess.
|
|
14
|
+
*/
|
|
15
|
+
import type { SystemCode } from "@mailwoman/codex";
|
|
16
|
+
/**
|
|
17
|
+
* Locale-head class order — MUST mirror `corpus-python/src/mailwoman_train/labels.py`
|
|
18
|
+
* `LOCALE_COUNTRIES` exactly (same never-reorder/append-only discipline; a drift here silently
|
|
19
|
+
* mislabels every detection).
|
|
20
|
+
*/
|
|
21
|
+
export declare const LOCALE_COUNTRIES: readonly ["US", "FR", "DE", "CA", "GB", "JP", "ES", "IT", "NL"];
|
|
22
|
+
export interface DetectedSystem {
|
|
23
|
+
system: SystemCode;
|
|
24
|
+
country: (typeof LOCALE_COUNTRIES)[number];
|
|
25
|
+
confidence: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Read the locale head's posterior into a confident `SystemCode`, or null.
|
|
29
|
+
*
|
|
30
|
+
* @param localeLogits The raw `locale_logits` output (LOCALE_COUNTRIES order).
|
|
31
|
+
* @param threshold Minimum softmax probability to act on (default 0.8 — the head's held-out
|
|
32
|
+
* accuracy is ~0.98, so 0.8 trades a little recall for never masking on a coin flip).
|
|
33
|
+
*/
|
|
34
|
+
export declare function detectAddressSystem(localeLogits: readonly number[] | undefined, threshold?: number): DetectedSystem | null;
|
|
35
|
+
//# sourceMappingURL=address-system.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-system.d.ts","sourceRoot":"","sources":["../address-system.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAIlD;;;;GAIG;AACH,eAAO,MAAM,gBAAgB,iEAAkE,CAAA;AAY/F,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,UAAU,CAAA;IAClB,OAAO,EAAE,CAAC,OAAO,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAA;IAC1C,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAClC,YAAY,EAAE,SAAS,MAAM,EAAE,GAAG,SAAS,EAC3C,SAAS,SAAM,GACb,cAAc,GAAG,IAAI,CAWvB"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head
|
|
7
|
+
* never had). The PR3 self-conditioning head predicts which country an address belongs to from
|
|
8
|
+
* the pooled sequence; v1.1.0+ exports surface it as the `locale_logits` ONNX output. This module
|
|
9
|
+
* turns that posterior into a `SystemCode` the conventions layer can act on.
|
|
10
|
+
*
|
|
11
|
+
* Conservative by contract: below the confidence threshold, or for locales without a codex
|
|
12
|
+
* system slice, detection returns null and the parse proceeds exactly as before. The mask must
|
|
13
|
+
* never fire on a guess.
|
|
14
|
+
*/
|
|
15
|
+
import { softmax } from "./viterbi.js";
|
|
16
|
+
/**
|
|
17
|
+
* Locale-head class order — MUST mirror `corpus-python/src/mailwoman_train/labels.py`
|
|
18
|
+
* `LOCALE_COUNTRIES` exactly (same never-reorder/append-only discipline; a drift here silently
|
|
19
|
+
* mislabels every detection).
|
|
20
|
+
*/
|
|
21
|
+
export const LOCALE_COUNTRIES = ["US", "FR", "DE", "CA", "GB", "JP", "ES", "IT", "NL"];
|
|
22
|
+
/** ISO-2 country → codex address-system slice. Unmapped locales have no conventions yet. */
|
|
23
|
+
const COUNTRY_TO_SYSTEM = {
|
|
24
|
+
US: "us",
|
|
25
|
+
FR: "fr",
|
|
26
|
+
DE: "de",
|
|
27
|
+
CA: "ca",
|
|
28
|
+
GB: "gb",
|
|
29
|
+
JP: "jp",
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Read the locale head's posterior into a confident `SystemCode`, or null.
|
|
33
|
+
*
|
|
34
|
+
* @param localeLogits The raw `locale_logits` output (LOCALE_COUNTRIES order).
|
|
35
|
+
* @param threshold Minimum softmax probability to act on (default 0.8 — the head's held-out
|
|
36
|
+
* accuracy is ~0.98, so 0.8 trades a little recall for never masking on a coin flip).
|
|
37
|
+
*/
|
|
38
|
+
export function detectAddressSystem(localeLogits, threshold = 0.8) {
|
|
39
|
+
if (!localeLogits || localeLogits.length !== LOCALE_COUNTRIES.length)
|
|
40
|
+
return null;
|
|
41
|
+
const probs = softmax(localeLogits);
|
|
42
|
+
let best = 0;
|
|
43
|
+
for (let i = 1; i < probs.length; i++)
|
|
44
|
+
if (probs[i] > probs[best])
|
|
45
|
+
best = i;
|
|
46
|
+
const confidence = probs[best];
|
|
47
|
+
if (confidence < threshold)
|
|
48
|
+
return null;
|
|
49
|
+
const country = LOCALE_COUNTRIES[best];
|
|
50
|
+
const system = COUNTRY_TO_SYSTEM[country];
|
|
51
|
+
if (!system)
|
|
52
|
+
return null;
|
|
53
|
+
return { system, country, confidence };
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=address-system.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-system.js","sourceRoot":"","sources":["../address-system.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAIH,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAEtC;;;;GAIG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU,CAAA;AAE/F,4FAA4F;AAC5F,MAAM,iBAAiB,GAAmE;IACzF,EAAE,EAAE,IAAI;IACR,EAAE,EAAE,IAAI;IACR,EAAE,EAAE,IAAI;IACR,EAAE,EAAE,IAAI;IACR,EAAE,EAAE,IAAI;IACR,EAAE,EAAE,IAAI;CACR,CAAA;AAQD;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAClC,YAA2C,EAC3C,SAAS,GAAG,GAAG;IAEf,IAAI,CAAC,YAAY,IAAI,YAAY,CAAC,MAAM,KAAK,gBAAgB,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACjF,MAAM,KAAK,GAAG,OAAO,CAAC,YAAwB,CAAC,CAAA;IAC/C,IAAI,IAAI,GAAG,CAAC,CAAA;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,IAAI,KAAK,CAAC,CAAC,CAAE,GAAG,KAAK,CAAC,IAAI,CAAE;YAAE,IAAI,GAAG,CAAC,CAAA;IAC7E,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAE,CAAA;IAC/B,IAAI,UAAU,GAAG,SAAS;QAAE,OAAO,IAAI,CAAA;IACvC,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,CAAE,CAAA;IACvC,MAAM,MAAM,GAAG,iBAAiB,CAAC,OAAO,CAAC,CAAA;IACzC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAA;IACxB,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,CAAA;AACvC,CAAC"}
|
package/out/classifier.d.ts
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
* Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
|
|
11
11
|
*/
|
|
12
12
|
import { decodeAsXml, type AddressTree, type Calibrator, type ComponentTag } from "@mailwoman/core/decoder";
|
|
13
|
+
import { type SystemCode } from "@mailwoman/codex";
|
|
13
14
|
import { type AnchorLookup } from "./anchor-inference.js";
|
|
14
15
|
import { type GazetteerLexicon } from "./gazetteer-inference.js";
|
|
15
16
|
import { type FstMatcherLike } from "./fst-prior.js";
|
|
@@ -87,8 +88,23 @@ export interface NeuralAddressClassifierConfig {
|
|
|
87
88
|
* trained with the train-time half.
|
|
88
89
|
*/
|
|
89
90
|
suppressGazetteerNearPostcode?: boolean;
|
|
91
|
+
/**
|
|
92
|
+
* Default address-system conventions mode for every parse (see `ParseOpts.addressSystemConventions`
|
|
93
|
+
* for semantics — `"auto"` reads the model's locale head; a `SystemCode` pins it). Per-parse opts
|
|
94
|
+
* override this. Omit for the byte-stable pre-#511 default (no detection, no mask).
|
|
95
|
+
*/
|
|
96
|
+
addressSystemConventions?: "auto" | SystemCode;
|
|
97
|
+
/**
|
|
98
|
+
* Punctuation-gap span bridging (the v4.4.0 corrective; see `span-bridge.ts`). The corpus
|
|
99
|
+
* label format cannot express punctuation inside a span, so dotted surfaces ("P.O. Box",
|
|
100
|
+
* "C.P.") decode as fragments. When true, adjacent same-tag spans separated only by short
|
|
101
|
+
* punctuation gaps are merged after decode. Per-parse opts override. Omit for the byte-stable
|
|
102
|
+
* pre-v4.4.0 behavior.
|
|
103
|
+
*/
|
|
104
|
+
bridgePunctuationGaps?: boolean;
|
|
90
105
|
}
|
|
91
106
|
export declare class NeuralAddressClassifier {
|
|
107
|
+
#private;
|
|
92
108
|
private readonly cfg;
|
|
93
109
|
private readonly labels;
|
|
94
110
|
private readonly decodeMode;
|
|
@@ -115,7 +131,11 @@ export declare class NeuralAddressClassifier {
|
|
|
115
131
|
parse(text: string, opts?: ParseOpts): Promise<AddressTree>;
|
|
116
132
|
/**
|
|
117
133
|
* Like `parse`, but also returns the raw per-token logits and piece offsets needed for per-span
|
|
118
|
-
* logit aggregation (Option C joint-reconcile integration).
|
|
134
|
+
* logit aggregation (Option C joint-reconcile integration). Shares the ENTIRE decode path with
|
|
135
|
+
* `parse` (one `#decode`, #481) — including the repair passes, which previously ran only in
|
|
136
|
+
* `parse`: reconcile must consume the same tokens the argmax path serves users, and the repair
|
|
137
|
+
* opts were silently ignored here before. `logits` stay RAW (pre-prior, pre-repair) — they are
|
|
138
|
+
* the model's emissions, not the decode's opinions.
|
|
119
139
|
*/
|
|
120
140
|
parseWithLogits(text: string, opts?: ParseOpts): Promise<ParseWithLogitsResult>;
|
|
121
141
|
parseJson(text: string, opts?: ParseOpts): Promise<Partial<Record<ComponentTag, string>>>;
|
|
@@ -201,5 +221,23 @@ export interface ParseOpts {
|
|
|
201
221
|
* (`@mailwoman/core/decoder`) from `data/eval/calibration/isotonic-<locale>-<version>.json`.
|
|
202
222
|
*/
|
|
203
223
|
calibrate?: Calibrator;
|
|
224
|
+
/** Per-parse override of the config-level `bridgePunctuationGaps` (see that doc). */
|
|
225
|
+
bridgePunctuationGaps?: boolean;
|
|
226
|
+
/**
|
|
227
|
+
* Address-system conventions enforcement (#511 Tier A / #478's rules-as-constraints slice).
|
|
228
|
+
*
|
|
229
|
+
* - `"auto"` — detect the system from the model's locale head (`locale_logits` output, v1.1.0+
|
|
230
|
+
* exports; silently no-ops on models without it) and apply that system's codex conventions:
|
|
231
|
+
* forbidden tags become a hard emission mask before Viterbi, and a conventions postcode shape
|
|
232
|
+
* enables the snap-only postcode repair pass.
|
|
233
|
+
* - A `SystemCode` (`"fr"`, `"us"`, …) — apply that system's conventions unconditionally
|
|
234
|
+
* (callers that already know the locale, e.g. the pipeline's BCP-47 region).
|
|
235
|
+
* - Omit — byte-stable default: no detection, no mask (pre-#511 behavior).
|
|
236
|
+
*
|
|
237
|
+
* The detection threshold is deliberately high (0.8): the mask must never fire on a guess.
|
|
238
|
+
* Measured motivation: the 2026-06-10 v1.1.0 gate, where US suffix logic fired inside French
|
|
239
|
+
* parses (`street_suffix: "Rue"`) and digit-splits corrupted leading FR postcodes.
|
|
240
|
+
*/
|
|
241
|
+
addressSystemConventions?: "auto" | SystemCode;
|
|
204
242
|
}
|
|
205
243
|
//# sourceMappingURL=classifier.d.ts.map
|
package/out/classifier.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAyD,KAAK,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAA0B,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAE5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAwB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAGxE,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAyD,KAAK,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAA0B,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAE5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAGnD,OAAO,EAA0C,KAAK,cAAc,EAAE,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAuC,KAAK,yBAAyB,EAAE,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAGnD,OAAO,KAAK,EAAE,kBAAkB,EAAmB,MAAM,cAAc,CAAA;AAEvE;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,CACJ,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,EAC9F,SAAS,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC/F,OAAO,CAAC,WAAW,CAAC,CAAA;CACvB;AAED,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,YAAY,CAAA;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAC1B;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC7B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,EAAE,CAAA;IACxB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAA;IACnC;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAA;IACnC;;;;;;;;;;OAUG;IACH,6BAA6B,CAAC,EAAE,OAAO,CAAA;IACvC;;;;OAIG;IACH,wBAAwB,CAAC,EAAE,MAAM,GAAG,UAAU,CAAA;IAC9C;;;;;;OAMG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAA;CAC/B;AAED,qBAAa,uBAAuB;;IAOvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IANhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAY;IACxC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAEZ,GAAG,EAAE,6BAA6B;IAa/D;;;;;;;;;;;OAWG;WACU,eAAe,CAC3B,IAAI,GAAE,kBAAkB,GAAG;QAAE,oBAAoB,CAAC,EAAE,YAAY,CAAA;KAAO,GACrE,OAAO,CAAC,uBAAuB,CAAC;IA4BnC,6DAA6D;IACvD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IAMjE;;;;;;;OAOG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,qBAAqB,CAAC;IAyI/E,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIzF,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAInF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG;QAAE,GAAG,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7G;;;;;;;;;;OAUG;IACH,OAAO,CAAC,mBAAmB;CAW3B;AAED,wFAAwF;AACxF,MAAM,WAAW,qBAAqB;IACrC,IAAI,EAAE,WAAW,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC7C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACzB;;;;OAIG;IACH,UAAU,CAAC,EAAE,cAAc,CAAA;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;OAGG;IACH,GAAG,CAAC,EAAE,cAAc,CAAA;IACpB,6DAA6D;IAC7D,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,cAAc,CAAA;IACpC,yDAAyD;IACzD,uBAAuB,CAAC,EAAE,yBAAyB,CAAA;IACnD;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAA;IACtB,qFAAqF;IACrF,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B;;;;;;;;;;;;;;OAcG;IACH,wBAAwB,CAAC,EAAE,MAAM,GAAG,UAAU,CAAA;CAC9C"}
|
package/out/classifier.js
CHANGED
|
@@ -10,11 +10,14 @@
|
|
|
10
10
|
* Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
|
|
11
11
|
*/
|
|
12
12
|
import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
|
|
13
|
+
import { conventionsForSystem } from "@mailwoman/codex";
|
|
14
|
+
import { detectAddressSystem } from "./address-system.js";
|
|
13
15
|
import { buildAnchorFeatures } from "./anchor-inference.js";
|
|
14
16
|
import { buildGazetteerFeatures, suppressGazetteerNearPostcode } from "./gazetteer-inference.js";
|
|
15
17
|
import { buildFstEmissionPriors } from "./fst-prior.js";
|
|
16
18
|
import { STAGE2_BIO_LABELS } from "./labels.js";
|
|
17
19
|
import { repairPostcodeLabels } from "./postcode-repair.js";
|
|
20
|
+
import { bridgePunctuationGaps } from "./span-bridge.js";
|
|
18
21
|
import { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
19
22
|
import { buildStreetMorphologyEmissionPriors } from "./street-morphology-prior.js";
|
|
20
23
|
import { MailwomanTokenizer } from "./tokenizer.js";
|
|
@@ -84,69 +87,34 @@ export class NeuralAddressClassifier {
|
|
|
84
87
|
async parse(text, opts) {
|
|
85
88
|
if (text.length === 0)
|
|
86
89
|
return { raw: text, roots: [] };
|
|
87
|
-
const {
|
|
88
|
-
// Postcode-anchor channel (#239/#240): build per-piece anchor features from the same lookup the
|
|
89
|
-
// model trained on, fed alongside the ids. No-op when no lookup is configured.
|
|
90
|
-
const anchor = this.cfg.postcodeAnchorLookup
|
|
91
|
-
? buildAnchorFeatures(text, pieces, this.cfg.postcodeAnchorLookup)
|
|
92
|
-
: undefined;
|
|
93
|
-
const gazetteer = this.cfg.gazetteerLexicon
|
|
94
|
-
? buildGazetteerFeatures(text, pieces, this.cfg.gazetteerLexicon)
|
|
95
|
-
: undefined;
|
|
96
|
-
const gazFed = gazetteer && anchor && this.cfg.suppressGazetteerNearPostcode
|
|
97
|
-
? suppressGazetteerNearPostcode(gazetteer, anchor.confidence)
|
|
98
|
-
: gazetteer;
|
|
99
|
-
const { logits } = await this.cfg.runner.infer(ids, anchor, gazFed);
|
|
100
|
-
this.assertEmissionWidth(logits);
|
|
101
|
-
let emissions = opts?.queryShape
|
|
102
|
-
? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
|
|
103
|
-
biasScale: opts.queryShapeBiasScale ?? 1.0,
|
|
104
|
-
inputText: text,
|
|
105
|
-
}))
|
|
106
|
-
: logits;
|
|
107
|
-
if (opts?.fst) {
|
|
108
|
-
emissions = addEmissionMatrix(emissions, buildFstEmissionPriors(opts.fst, pieces, this.labels, {
|
|
109
|
-
biasScale: opts.fstBiasScale ?? 1.0,
|
|
110
|
-
}));
|
|
111
|
-
}
|
|
112
|
-
if (opts?.fstStreetMorphology) {
|
|
113
|
-
emissions = addEmissionMatrix(emissions, buildStreetMorphologyEmissionPriors(opts.fstStreetMorphology, pieces, this.labels, opts.fstStreetMorphologyOpts ?? {}));
|
|
114
|
-
}
|
|
115
|
-
const labelIndices = this.decodeMode === "viterbi"
|
|
116
|
-
? viterbi({
|
|
117
|
-
emissions,
|
|
118
|
-
transitions: this.transitions,
|
|
119
|
-
startTransitions: this.startTransitions,
|
|
120
|
-
endTransitions: this.endTransitions,
|
|
121
|
-
}).path
|
|
122
|
-
: emissions.map((row) => argmaxSoftmax(row).idx);
|
|
123
|
-
let tokens = pieces.map((p, i) => {
|
|
124
|
-
const idx = labelIndices[i];
|
|
125
|
-
const probs = softmax(logits[i]);
|
|
126
|
-
return {
|
|
127
|
-
piece: p.piece,
|
|
128
|
-
start: p.start,
|
|
129
|
-
end: p.end,
|
|
130
|
-
label: (this.labels[idx] ?? "O"),
|
|
131
|
-
confidence: probs[idx],
|
|
132
|
-
};
|
|
133
|
-
});
|
|
134
|
-
if (opts?.postcodeRepair) {
|
|
135
|
-
tokens = repairPostcodeLabels(text, tokens).tokens;
|
|
136
|
-
}
|
|
137
|
-
if (opts?.unitRepair) {
|
|
138
|
-
tokens = repairUnitLabels(text, tokens).tokens;
|
|
139
|
-
}
|
|
90
|
+
const { tokens } = await this.#decode(text, opts);
|
|
140
91
|
return buildAddressTree(text, tokens, opts?.calibrate ? { calibrate: opts.calibrate } : undefined);
|
|
141
92
|
}
|
|
142
93
|
/**
|
|
143
94
|
* Like `parse`, but also returns the raw per-token logits and piece offsets needed for per-span
|
|
144
|
-
* logit aggregation (Option C joint-reconcile integration).
|
|
95
|
+
* logit aggregation (Option C joint-reconcile integration). Shares the ENTIRE decode path with
|
|
96
|
+
* `parse` (one `#decode`, #481) — including the repair passes, which previously ran only in
|
|
97
|
+
* `parse`: reconcile must consume the same tokens the argmax path serves users, and the repair
|
|
98
|
+
* opts were silently ignored here before. `logits` stay RAW (pre-prior, pre-repair) — they are
|
|
99
|
+
* the model's emissions, not the decode's opinions.
|
|
145
100
|
*/
|
|
146
101
|
async parseWithLogits(text, opts) {
|
|
147
102
|
if (text.length === 0) {
|
|
148
103
|
return { tree: { raw: text, roots: [] }, logits: [], pieces: [] };
|
|
149
104
|
}
|
|
105
|
+
const { tokens, logits, pieces } = await this.#decode(text, opts);
|
|
106
|
+
return {
|
|
107
|
+
tree: buildAddressTree(text, tokens, opts?.calibrate ? { calibrate: opts.calibrate } : undefined),
|
|
108
|
+
logits,
|
|
109
|
+
pieces: pieces.map((p) => ({ start: p.start, end: p.end })),
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* THE decode path (#481): tokenize → anchor/gazetteer features → infer → priors → CRF/argmax →
|
|
114
|
+
* tokens → repairs. Both `parse` and `parseWithLogits` consume this — never fork it; the 2026-06
|
|
115
|
+
* audit found three drift surfaces in the previous duplicated copies.
|
|
116
|
+
*/
|
|
117
|
+
async #decode(text, opts) {
|
|
150
118
|
const { pieces, ids } = this.cfg.tokenizer.encode(text);
|
|
151
119
|
// Postcode-anchor channel (#239/#240): build per-piece anchor features from the same lookup the
|
|
152
120
|
// model trained on, fed alongside the ids. No-op when no lookup is configured.
|
|
@@ -159,8 +127,15 @@ export class NeuralAddressClassifier {
|
|
|
159
127
|
const gazFed = gazetteer && anchor && this.cfg.suppressGazetteerNearPostcode
|
|
160
128
|
? suppressGazetteerNearPostcode(gazetteer, anchor.confidence)
|
|
161
129
|
: gazetteer;
|
|
162
|
-
const { logits } = await this.cfg.runner.infer(ids, anchor, gazFed);
|
|
130
|
+
const { logits, localeLogits } = await this.cfg.runner.infer(ids, anchor, gazFed);
|
|
163
131
|
this.assertEmissionWidth(logits);
|
|
132
|
+
// Address-system conventions (#511 Tier A): resolve which system's rules apply — caller-pinned
|
|
133
|
+
// system, or the model's own locale-head detection under a high confidence bar. Null = no
|
|
134
|
+
// constraints; the parse below is byte-identical to the pre-conventions path.
|
|
135
|
+
const conventionsOpt = opts?.addressSystemConventions ?? this.cfg.addressSystemConventions;
|
|
136
|
+
const conventions = conventionsOpt === undefined
|
|
137
|
+
? null
|
|
138
|
+
: conventionsForSystem(conventionsOpt === "auto" ? (detectAddressSystem(localeLogits)?.system ?? null) : conventionsOpt);
|
|
164
139
|
let emissions = opts?.queryShape
|
|
165
140
|
? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
|
|
166
141
|
biasScale: opts.queryShapeBiasScale ?? 1.0,
|
|
@@ -175,6 +150,23 @@ export class NeuralAddressClassifier {
|
|
|
175
150
|
if (opts?.fstStreetMorphology) {
|
|
176
151
|
emissions = addEmissionMatrix(emissions, buildStreetMorphologyEmissionPriors(opts.fstStreetMorphology, pieces, this.labels, opts.fstStreetMorphologyOpts ?? {}));
|
|
177
152
|
}
|
|
153
|
+
// Conventions emission mask: tags that are ungrammatical in the detected system are removed
|
|
154
|
+
// from the decoder's vocabulary outright (-1e9 ≈ log 0). Copy-on-mask — `emissions` may alias
|
|
155
|
+
// `logits`, which the per-token confidence below reads unmasked.
|
|
156
|
+
if (conventions?.forbiddenTags?.length) {
|
|
157
|
+
const forbidden = new Set();
|
|
158
|
+
for (const tag of conventions.forbiddenTags) {
|
|
159
|
+
const b = this.labels.indexOf(`B-${tag}`);
|
|
160
|
+
const i = this.labels.indexOf(`I-${tag}`);
|
|
161
|
+
if (b >= 0)
|
|
162
|
+
forbidden.add(b);
|
|
163
|
+
if (i >= 0)
|
|
164
|
+
forbidden.add(i);
|
|
165
|
+
}
|
|
166
|
+
if (forbidden.size > 0) {
|
|
167
|
+
emissions = emissions.map((row) => row.map((v, idx) => (forbidden.has(idx) ? -1e9 : v)));
|
|
168
|
+
}
|
|
169
|
+
}
|
|
178
170
|
const labelIndices = this.decodeMode === "viterbi"
|
|
179
171
|
? viterbi({
|
|
180
172
|
emissions,
|
|
@@ -183,7 +175,7 @@ export class NeuralAddressClassifier {
|
|
|
183
175
|
endTransitions: this.endTransitions,
|
|
184
176
|
}).path
|
|
185
177
|
: emissions.map((row) => argmaxSoftmax(row).idx);
|
|
186
|
-
|
|
178
|
+
let tokens = pieces.map((p, i) => {
|
|
187
179
|
const idx = labelIndices[i];
|
|
188
180
|
const probs = softmax(logits[i]);
|
|
189
181
|
return {
|
|
@@ -194,11 +186,22 @@ export class NeuralAddressClassifier {
|
|
|
194
186
|
confidence: probs[idx],
|
|
195
187
|
};
|
|
196
188
|
});
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
189
|
+
// Postcode repair runs when the caller asks for it OR the detected system declares a postcode
|
|
190
|
+
// shape (#511 Tier A): a span that is a sub-match of a shape-valid string is exactly the
|
|
191
|
+
// snap-only truncation class the pass exists for ("47110" decoded as "4711" + a digit-split).
|
|
192
|
+
if (opts?.postcodeRepair || conventions?.postcodePattern) {
|
|
193
|
+
tokens = repairPostcodeLabels(text, tokens).tokens;
|
|
194
|
+
}
|
|
195
|
+
if (opts?.unitRepair) {
|
|
196
|
+
tokens = repairUnitLabels(text, tokens).tokens;
|
|
197
|
+
}
|
|
198
|
+
// Punctuation-gap span bridging (v4.4.0 corrective — see span-bridge.ts): merge same-tag
|
|
199
|
+
// fragments split at unlabeled punctuation ("P.O. Box" decoding as P + O + Box). Opt-in,
|
|
200
|
+
// declared in the ship config like the conventions mask.
|
|
201
|
+
if (opts?.bridgePunctuationGaps ?? this.cfg.bridgePunctuationGaps) {
|
|
202
|
+
tokens = bridgePunctuationGaps(text, tokens);
|
|
203
|
+
}
|
|
204
|
+
return { tokens, logits, pieces };
|
|
202
205
|
}
|
|
203
206
|
async parseJson(text, opts) {
|
|
204
207
|
return decodeAsJson(await this.parse(text, opts));
|
package/out/classifier.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAKX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,sBAAsB,EAAE,6BAA6B,EAAyB,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;
|
|
1
|
+
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAKX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,oBAAoB,EAAmB,MAAM,kBAAkB,CAAA;AAExE,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AACzD,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,sBAAsB,EAAE,6BAA6B,EAAyB,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAA;AACxD,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAuF3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAC3B,OAAqE,EAAE;QAEvE,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC3G,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAA;QAC3D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC;YAClC,SAAS;YACT,MAAM;YACN,MAAM;YACN,WAAW,EAAE,GAAG,EAAE,WAAW;YAC7B,gBAAgB,EAAE,GAAG,EAAE,gBAAgB;YACvC,cAAc,EAAE,GAAG,EAAE,cAAc;YACnC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAA;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QACtD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjD,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IACnG,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjE,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;YACjG,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO,CACZ,IAAY,EACZ,IAAgB;QAEhB,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB;YAC1C,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC;YACjE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,MAAM,GACX,SAAS,IAAI,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,6BAA6B;YAC5D,CAAC,CAAC,6BAA6B,CAAC,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC;YAC7D,CAAC,CAAC,SAAS,CAAA;QACb,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;QAEjF,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,+FAA+F;QAC/F,0FAA0F;QAC1F,8EAA8E;QAC9E,MAAM,cAAc,GAAG,IAAI,EAAE,wBAAwB,IAAI,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAA;QAC1F,MAAM,WAAW,GAChB,cAAc,KAAK,SAAS;YAC3B,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,oBAAoB,CACpB,cAAc,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,EAAE,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,cAAc,CAChG,CAAA;QAEJ,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,4FAA4F;QAC5F,8FAA8F;QAC9F,iEAAiE;QACjE,IAAI,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAA;YACnC,KAAK,MAAM,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,CAAC;gBAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAC5B,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC;YACD,IAAI,SAAS,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YACzF,CAAC;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,8FAA8F;QAC9F,yFAAyF;QACzF,8FAA8F;QAC9F,IAAI,IAAI,EAAE,cAAc,IAAI,WAAW,EAAE,eAAe,EAAE,CAAC;YAC1D,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QACnD,CAAC;QACD,IAAI,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QAC/C,CAAC;QACD,yFAAyF;QACzF,yFAAyF;QACzF,yDAAyD;QACzD,IAAI,IAAI,EAAE,qBAAqB,IAAI,IAAI,CAAC,GAAG,CAAC,qBAAqB,EAAE,CAAC;YACnE,MAAM,GAAG,qBAAqB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QAC7C,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;IAED;;;;;;;;;;OAUG;IACK,mBAAmB,CAAC,MAA2B;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CACd,wCAAwC,KAAK,2CAA2C;gBACvF,wBAAwB,IAAI,CAAC,MAAM,CAAC,MAAM,iDAAiD;gBAC3F,oFAAoF,CACrF,CAAA;QACF,CAAC;IACF,CAAC;CACD;AAqFD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gazetteer-inference.d.ts","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAA;AAEtC,mFAAmF;AACnF,MAAM,WAAW,gBAAgB;IAChC,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,SAAS,MAAM,EAAE,CAAA;IACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,8DAA8D;IAC9D,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,gGAAgG;IAChG,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAChC;AAED,qGAAqG;AACrG,wBAAgB,qBAAqB,CAAC,GAAG,EAAE;IAC1C,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC/B,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACpC,GAAG,gBAAgB,
|
|
1
|
+
{"version":3,"file":"gazetteer-inference.d.ts","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAA;AAEtC,mFAAmF;AACnF,MAAM,WAAW,gBAAgB;IAChC,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,SAAS,MAAM,EAAE,CAAA;IACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,8DAA8D;IAC9D,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,gGAAgG;IAChG,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAChC;AAED,qGAAqG;AACrG,wBAAgB,qBAAqB,CAAC,GAAG,EAAE;IAC1C,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC/B,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACpC,GAAG,gBAAgB,CA0BnB;AAsBD,gGAAgG;AAChG,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,MAAM,EAAE,CA0DpF;AAED;;;;;;;;;GASG;AACH,wBAAgB,6BAA6B,CAC5C,SAAS,EAAE;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,EACzD,gBAAgB,EAAE,aAAa,CAAC,MAAM,CAAC,EACvC,MAAM,SAAI,GACR;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAgBhD;AAED;;;;GAIG;AACH,wBAAgB,sBAAsB,CACrC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,aAAa,CAAC,cAAc,CAAC,EACrC,OAAO,EAAE,gBAAgB,GACvB;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBhD"}
|
|
@@ -22,6 +22,23 @@
|
|
|
22
22
|
export const GAZETTEER_FEATURE_DIM = 5;
|
|
23
23
|
/** Parse the lexicon JSON (already `JSON.parse`d — keeps this module browser-safe; caller reads). */
|
|
24
24
|
export function parseGazetteerLexicon(raw) {
|
|
25
|
+
// Loud validation (#481): a malformed lexicon previously surfaced as a crash deep inside
|
|
26
|
+
// buildGazetteerFeatures (or worse, silently zero-filled clues — the fake-affix-crash class).
|
|
27
|
+
// Unknown refs fail loud, never silent.
|
|
28
|
+
if (typeof raw?.feature_dim !== "number" || raw.feature_dim <= 0) {
|
|
29
|
+
throw new Error(`gazetteer lexicon: feature_dim must be a positive number, got ${raw?.feature_dim}`);
|
|
30
|
+
}
|
|
31
|
+
if (!Array.isArray(raw.slots) || raw.slots.length === 0) {
|
|
32
|
+
throw new Error("gazetteer lexicon: slots must be a non-empty array");
|
|
33
|
+
}
|
|
34
|
+
if (typeof raw.max_ngram !== "number" || raw.max_ngram < 1) {
|
|
35
|
+
throw new Error(`gazetteer lexicon: max_ngram must be >= 1, got ${raw.max_ngram}`);
|
|
36
|
+
}
|
|
37
|
+
for (const field of ["bits", "entries", "code_entries"]) {
|
|
38
|
+
if (typeof raw[field] !== "object" || raw[field] === null) {
|
|
39
|
+
throw new Error(`gazetteer lexicon: ${field} must be an object`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
25
42
|
return {
|
|
26
43
|
featureDim: raw.feature_dim,
|
|
27
44
|
slots: raw.slots,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gazetteer-inference.js","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AActC,qGAAqG;AACrG,MAAM,UAAU,qBAAqB,CAAC,GAOrC;IACA,OAAO;QACN,UAAU,EAAE,GAAG,CAAC,WAAW;QAC3B,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,GAAG,CAAC,SAAS;QACvB,OAAO,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7C,WAAW,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACtD,CAAA;AACF,CAAC;AAED,6FAA6F;AAC7F,SAAS,SAAS,CAAC,IAAY;IAC9B,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAA;IACrB,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACpD,OAAO,KAAK,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAE,CAAC;QAAE,KAAK,EAAE,CAAA;IACnD,OAAO,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAE,CAAC;QAAE,GAAG,EAAE,CAAA;IACnD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAyB;IACzD,OAAO,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACzE,CAAC;AAQD,gGAAgG;AAChG,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,OAAyB;IACzE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,MAAM,MAAM,GAAG,MAAM,CAAA;IACrB,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAA;QACnC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;YACtD,SAAQ;QACT,CAAC;QACD,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpD,OAAO,IAAI,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAE,CAAC;YAAE,IAAI,EAAE,CAAA;QAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;YACrB,CAAC,EAAE,CAAA;YACH,SAAQ;QACT,CAAC;QACD,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAA;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACzD,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAA;YAC1B,IAAI,EAAE,GAAG,IAAI,CAAA;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;oBACrB,EAAE,GAAG,KAAK,CAAA;oBACV,MAAK;gBACN,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3B,CAAC;YACD,IAAI,CAAC,EAAE;gBAAE,SAAQ;YACjB,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YACzC,IAAI,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YACxC,uFAAuF;YACvF,IAAI,CAAC,KAAK,CAAC;gBAAE,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC5D,IAAI,IAAI,EAAE,CAAC;gBACV,QAAQ,GAAG,CAAC,CAAA;gBACZ,WAAW,GAAG,IAAI,CAAA;gBAClB,MAAK;YACN,CAAC;QACF,CAAC;QACD,IAAI,QAAQ,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,CAAA;YAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAE,CAAC,GAAG,CAAA;YACxC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE;gBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,WAAW,CAAA;YAClF,CAAC,IAAI,QAAQ,CAAA;QACd,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAA;QACJ,CAAC;IACF,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,6BAA6B,CAC5C,SAAyD,EACzD,gBAAuC,EACvC,MAAM,GAAG,CAAC;IAEV,MAAM,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,MAAM,CAAA;IACrC,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAU,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,KAAK,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,IAAI,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;gBACf,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC;oBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YACnD,CAAC;QACF,CAAC;IACF,CAAC;IACD,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;IAC9C,OAAO;QACN,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,CAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClG,UAAU,EAAE,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACrE,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CACrC,IAAY,EACZ,MAAqC,EACrC,OAAyB;IAEzB,MAAM,QAAQ,GAAG,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IAClD,MAAM,IAAI,GAAG,GAAG,EAAE,CAAC,IAAI,KAAK,CAAS,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAChE,MAAM,QAAQ,GAAe,EAAE,CAAA;IAC/B,MAAM,UAAU,GAAa,EAAE,CAAA;IAC/B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;gBAC7C,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAA;gBACnB,MAAK;YACN,CAAC;QACF,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;QACvD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAA;AAChC,CAAC"}
|
|
1
|
+
{"version":3,"file":"gazetteer-inference.js","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AActC,qGAAqG;AACrG,MAAM,UAAU,qBAAqB,CAAC,GAOrC;IACA,yFAAyF;IACzF,8FAA8F;IAC9F,wCAAwC;IACxC,IAAI,OAAO,GAAG,EAAE,WAAW,KAAK,QAAQ,IAAI,GAAG,CAAC,WAAW,IAAI,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CAAC,iEAAiE,GAAG,EAAE,WAAW,EAAE,CAAC,CAAA;IACrG,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAA;IACtE,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,IAAI,GAAG,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CAAC,kDAAkD,GAAG,CAAC,SAAS,EAAE,CAAC,CAAA;IACnF,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,cAAc,CAAU,EAAE,CAAC;QAClE,IAAI,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3D,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,oBAAoB,CAAC,CAAA;QACjE,CAAC;IACF,CAAC;IACD,OAAO;QACN,UAAU,EAAE,GAAG,CAAC,WAAW;QAC3B,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,GAAG,CAAC,SAAS;QACvB,OAAO,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7C,WAAW,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACtD,CAAA;AACF,CAAC;AAED,6FAA6F;AAC7F,SAAS,SAAS,CAAC,IAAY;IAC9B,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAA;IACrB,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACpD,OAAO,KAAK,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAE,CAAC;QAAE,KAAK,EAAE,CAAA;IACnD,OAAO,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAE,CAAC;QAAE,GAAG,EAAE,CAAA;IACnD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAyB;IACzD,OAAO,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACzE,CAAC;AAQD,gGAAgG;AAChG,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,OAAyB;IACzE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,MAAM,MAAM,GAAG,MAAM,CAAA;IACrB,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAA;QACnC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;YACtD,SAAQ;QACT,CAAC;QACD,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpD,OAAO,IAAI,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAE,CAAC;YAAE,IAAI,EAAE,CAAA;QAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;YACrB,CAAC,EAAE,CAAA;YACH,SAAQ;QACT,CAAC;QACD,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAA;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACzD,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAA;YAC1B,IAAI,EAAE,GAAG,IAAI,CAAA;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;oBACrB,EAAE,GAAG,KAAK,CAAA;oBACV,MAAK;gBACN,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3B,CAAC;YACD,IAAI,CAAC,EAAE;gBAAE,SAAQ;YACjB,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YACzC,IAAI,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YACxC,uFAAuF;YACvF,IAAI,CAAC,KAAK,CAAC;gBAAE,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC5D,IAAI,IAAI,EAAE,CAAC;gBACV,QAAQ,GAAG,CAAC,CAAA;gBACZ,WAAW,GAAG,IAAI,CAAA;gBAClB,MAAK;YACN,CAAC;QACF,CAAC;QACD,IAAI,QAAQ,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,CAAA;YAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAE,CAAC,GAAG,CAAA;YACxC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE;gBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,WAAW,CAAA;YAClF,CAAC,IAAI,QAAQ,CAAA;QACd,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAA;QACJ,CAAC;IACF,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,6BAA6B,CAC5C,SAAyD,EACzD,gBAAuC,EACvC,MAAM,GAAG,CAAC;IAEV,MAAM,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,MAAM,CAAA;IACrC,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAU,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,KAAK,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,IAAI,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;gBACf,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC;oBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YACnD,CAAC;QACF,CAAC;IACF,CAAC;IACD,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;IAC9C,OAAO;QACN,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,CAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClG,UAAU,EAAE,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACrE,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,sBAAsB,CACrC,IAAY,EACZ,MAAqC,EACrC,OAAyB;IAEzB,MAAM,QAAQ,GAAG,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IAClD,MAAM,IAAI,GAAG,GAAG,EAAE,CAAC,IAAI,KAAK,CAAS,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAChE,MAAM,QAAQ,GAAe,EAAE,CAAA;IAC/B,MAAM,UAAU,GAAa,EAAE,CAAA;IAC/B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;gBAC7C,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAA;gBACnB,MAAK;YACN,CAAC;QACF,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;QACvD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAA;AAChC,CAAC"}
|
package/out/onnx-runner.d.ts
CHANGED
|
@@ -30,6 +30,12 @@ export interface InferResult {
|
|
|
30
30
|
logits: number[][];
|
|
31
31
|
/** Number of label classes (the inner-dim of the logits tensor). */
|
|
32
32
|
numLabels: number;
|
|
33
|
+
/**
|
|
34
|
+
* Pooled locale-head posterior (`locale_logits` output, LOCALE_COUNTRIES order), when the model
|
|
35
|
+
* exports it (v1.1.0+, #511 Tier A). Absent on older bundles — consumers must treat undefined
|
|
36
|
+
* as "no address-system detection available".
|
|
37
|
+
*/
|
|
38
|
+
localeLogits?: number[];
|
|
33
39
|
}
|
|
34
40
|
export declare class OnnxRunner {
|
|
35
41
|
private readonly modelPath;
|
package/out/onnx-runner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAQH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;
|
|
1
|
+
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAQH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IACjB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACvB;AAED,qBAAa,UAAU;IAMrB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU;IAN5B,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,WAAW,CAA6C;IAChE,SAAgB,WAAW,EAAE,MAAM,CAAA;IAEnC,OAAO;IAQP,oEAAoE;WACvD,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;IAMtF,6CAA6C;WAChC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;YAMhF,aAAa;IAgB3B;;;;;;;;;;;OAWG;IACG,KAAK,CACV,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,EAC9F,SAAS,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC/F,OAAO,CAAC,WAAW,CAAC;CAoFvB"}
|
package/out/onnx-runner.js
CHANGED
|
@@ -147,7 +147,10 @@ export class OnnxRunner {
|
|
|
147
147
|
row[l] = data[base + l];
|
|
148
148
|
logits.push(row);
|
|
149
149
|
}
|
|
150
|
-
|
|
150
|
+
// Locale head (#511 Tier A): present on v1.1.0+ exports, absent (and optional) before.
|
|
151
|
+
const localeTensor = output.locale_logits;
|
|
152
|
+
const localeLogits = localeTensor ? Array.from(localeTensor.data) : undefined;
|
|
153
|
+
return { logits, numLabels, ...(localeLogits ? { localeLogits } : {}) };
|
|
151
154
|
}
|
|
152
155
|
}
|
|
153
156
|
//# sourceMappingURL=onnx-runner.js.map
|
package/out/onnx-runner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAElC,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAC1D,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAchE,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;
|
|
1
|
+
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAElC,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAC1D,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAchE,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;AAexC,MAAM,OAAO,UAAU;IAMJ;IACA;IANV,OAAO,GAAgC,IAAI,CAAA;IAC3C,WAAW,GAAyC,IAAI,CAAA;IAChD,WAAW,CAAQ;IAEnC,YACkB,SAAiB,EACjB,UAA6B,EAC9C,IAAoB;QAFH,cAAS,GAAT,SAAS,CAAQ;QACjB,eAAU,GAAV,UAAU,CAAmB;QAG9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAA;IAC7D,CAAC;IAED,oEAAoE;IACpE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,OAAuB,EAAE;QAC/D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACpD,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,UAAsB,EAAE,OAAuB,EAAE;QACvE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAA;QAC1D,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;gBAClF,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE;oBACxD,kBAAkB,EAAE,CAAC,KAAK,CAAC;oBAC3B,sBAAsB,EAAE,KAAK;iBAC7B,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;gBACtB,OAAO,OAAO,CAAA;YACf,CAAC,CAAC,EAAE,CAAA;QACL,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,KAAK,CACV,QAAkB,EAClB,MAA8F,EAC9F,SAAiG;QAEjG,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;YAChC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAA+B;YACzC,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;SACpE,CAAA;QAED,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC3C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACjF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/E,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC3D,6FAA6F;YAC7F,0FAA0F;YAC1F,0EAA0E;YAC1E,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,kBAAkB,CAAC,EAAE;gBAC1G,CAAC;gBACD,IAAI,CAAC,WAAW;gBAChB,kBAAkB;aAClB,CAAC,CAAA;YACF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/G,CAAC;QAED,+FAA+F;QAC/F,iGAAiG;QACjG,qFAAqF;QACrF,IAAI,SAAS,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YACpE,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC9C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACpC,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBACjC,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACpF,KAAK,CAAC,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAClF,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC9D,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,CACxC,SAAS,EACT,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,qBAAqB,CAAC,EAC1D,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,qBAAqB,CAAC,CAC5C,CAAA;YACD,KAAK,CAAC,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE;gBAC1F,CAAC;gBACD,IAAI,CAAC,WAAW;aAChB,CAAC,CAAA;QACH,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACvC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAA;QAClC,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAA;QACjF,MAAM,IAAI,GAAG,YAAY,CAAC,IAAoB,CAAA;QAC9C,MAAM,CAAC,EAAE,AAAD,EAAG,SAAS,CAAC,GAAG,YAAY,CAAC,IAAyC,CAAA;QAE9E,MAAM,MAAM,GAAe,EAAE,CAAA;QAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,GAAG,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAA;YAC1C,MAAM,IAAI,GAAG,CAAC,GAAG,SAAS,CAAA;YAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,CAAE,CAAA;YAC5D,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACjB,CAAC;QAED,uFAAuF;QACvF,MAAM,YAAY,GAAG,MAAM,CAAC,aAAa,CAAA;QACzC,MAAM,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAoB,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;QAE7F,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAA;IACxE,CAAC;CACD"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Punctuation-gap span bridging — the v4.4.0 corrective (and the long-deferred Saint-Albans
|
|
7
|
+
* span-merge, scoped to where it is provably safe).
|
|
8
|
+
*
|
|
9
|
+
* The corpus alignment tokenizer drops standalone punctuation (corpus/src/tokenize.ts), so NO
|
|
10
|
+
* training row can label the periods inside "P.O. Box" — the model learns the tag perfectly
|
|
11
|
+
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and
|
|
12
|
+
* span assembly surfaces only the first fragment ("p"). Measured on the v1.3.0 gate: dotted
|
|
13
|
+
* po_box leaders failed 98%, ALL truncations, while plain leaders passed — a structural
|
|
14
|
+
* expressivity limit of the label format, not a learning failure.
|
|
15
|
+
*
|
|
16
|
+
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists
|
|
17
|
+
* only of punctuation/whitespace, contains at least one non-space character, and is short
|
|
18
|
+
* (≤ 3 chars). The non-space requirement is load-bearing — space-only gaps ("Saint Paul" as two
|
|
19
|
+
* locality spans) are NOT bridged, because a space between two same-tag spans is often a real
|
|
20
|
+
* boundary (the Saint-Albans fragmentation wants this fix too, but it must come with its own
|
|
21
|
+
* evidence; this pass stays conservative by construction).
|
|
22
|
+
*
|
|
23
|
+
* Runs beside the postcode/unit repair passes in the classifier, before tree-building.
|
|
24
|
+
*/
|
|
25
|
+
import type { DecoderToken } from "@mailwoman/core/decoder";
|
|
26
|
+
/**
|
|
27
|
+
* Merge same-label fragments separated only by punctuation gaps. Returns a new token array where
|
|
28
|
+
* the first fragment of each bridged group is widened to the group's full char range (so span
|
|
29
|
+
* extraction reads the raw text straight through the punctuation), and later fragments are
|
|
30
|
+
* dropped. Labels, ordering, and all non-bridged tokens are untouched.
|
|
31
|
+
*/
|
|
32
|
+
export declare function bridgePunctuationGaps(text: string, input: readonly DecoderToken[]): DecoderToken[];
|
|
33
|
+
//# sourceMappingURL=span-bridge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-bridge.d.ts","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;AAgB3D;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,YAAY,EAAE,GAAG,YAAY,EAAE,CAqClG"}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Punctuation-gap span bridging — the v4.4.0 corrective (and the long-deferred Saint-Albans
|
|
7
|
+
* span-merge, scoped to where it is provably safe).
|
|
8
|
+
*
|
|
9
|
+
* The corpus alignment tokenizer drops standalone punctuation (corpus/src/tokenize.ts), so NO
|
|
10
|
+
* training row can label the periods inside "P.O. Box" — the model learns the tag perfectly
|
|
11
|
+
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and
|
|
12
|
+
* span assembly surfaces only the first fragment ("p"). Measured on the v1.3.0 gate: dotted
|
|
13
|
+
* po_box leaders failed 98%, ALL truncations, while plain leaders passed — a structural
|
|
14
|
+
* expressivity limit of the label format, not a learning failure.
|
|
15
|
+
*
|
|
16
|
+
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists
|
|
17
|
+
* only of punctuation/whitespace, contains at least one non-space character, and is short
|
|
18
|
+
* (≤ 3 chars). The non-space requirement is load-bearing — space-only gaps ("Saint Paul" as two
|
|
19
|
+
* locality spans) are NOT bridged, because a space between two same-tag spans is often a real
|
|
20
|
+
* boundary (the Saint-Albans fragmentation wants this fix too, but it must come with its own
|
|
21
|
+
* evidence; this pass stays conservative by construction).
|
|
22
|
+
*
|
|
23
|
+
* Runs beside the postcode/unit repair passes in the classifier, before tree-building.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Gap text qualifies when short, made only of INTRA-TOKEN punctuation (period/hyphen/slash/
|
|
27
|
+
* apostrophe) plus whitespace, with at least one non-space char. Separator punctuation (comma,
|
|
28
|
+
* semicolon) is EXCLUDED — measured 2026-06-11: the comma form merged "47110, 9016"-style
|
|
29
|
+
* postcode + house-number fragments on six FR golden rows (the model double-labels the number;
|
|
30
|
+
* the comma is the only thing keeping the spans honest). A comma between same-tag spans is a
|
|
31
|
+
* list/separator, never the inside of a surface form.
|
|
32
|
+
*/
|
|
33
|
+
function bridgeable(gap) {
|
|
34
|
+
if (gap.length === 0 || gap.length > 3)
|
|
35
|
+
return false;
|
|
36
|
+
if (!/^[.\-/'\u2019\s]*$/.test(gap))
|
|
37
|
+
return false;
|
|
38
|
+
return /[^\s]/.test(gap);
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Merge same-label fragments separated only by punctuation gaps. Returns a new token array where
|
|
42
|
+
* the first fragment of each bridged group is widened to the group's full char range (so span
|
|
43
|
+
* extraction reads the raw text straight through the punctuation), and later fragments are
|
|
44
|
+
* dropped. Labels, ordering, and all non-bridged tokens are untouched.
|
|
45
|
+
*/
|
|
46
|
+
export function bridgePunctuationGaps(text, input) {
|
|
47
|
+
const out = [];
|
|
48
|
+
for (const token of input) {
|
|
49
|
+
if (token.label !== "O") {
|
|
50
|
+
// Look back past any O tokens that sit INSIDE the candidate gap (the punctuation pieces
|
|
51
|
+
// themselves decode as O — they are exactly what we bridge across).
|
|
52
|
+
let back = out.length - 1;
|
|
53
|
+
while (back >= 0 && out[back].label === "O" && out[back].start >= (out[back - 1]?.end ?? 0))
|
|
54
|
+
back--;
|
|
55
|
+
const prev = back >= 0 ? out[back] : undefined;
|
|
56
|
+
const tag = token.label.replace(/^[BI]-/, "");
|
|
57
|
+
const prevTag = prev?.label.replace(/^[BI]-/, "");
|
|
58
|
+
const skipped = out.slice(back + 1);
|
|
59
|
+
const skippedInsideGap = prev !== undefined && skipped.every((t) => t.start >= prev.end && t.end <= token.start);
|
|
60
|
+
if (prev &&
|
|
61
|
+
prev.label !== "O" &&
|
|
62
|
+
prevTag === tag &&
|
|
63
|
+
token.start >= prev.end &&
|
|
64
|
+
skippedInsideGap &&
|
|
65
|
+
bridgeable(text.slice(prev.end, token.start))) {
|
|
66
|
+
// Widen the previous fragment through the gap (absorbing the punctuation O tokens);
|
|
67
|
+
// keep the lower confidence so the merged span never overstates its weakest piece.
|
|
68
|
+
out.length = back + 1;
|
|
69
|
+
out[back] = {
|
|
70
|
+
...prev,
|
|
71
|
+
end: token.end,
|
|
72
|
+
piece: text.slice(prev.start, token.end),
|
|
73
|
+
confidence: Math.min(prev.confidence, token.confidence),
|
|
74
|
+
};
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
out.push(token);
|
|
79
|
+
}
|
|
80
|
+
return out;
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=span-bridge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-bridge.js","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH;;;;;;;GAOG;AACH,SAAS,UAAU,CAAC,GAAW;IAC9B,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACpD,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACjD,OAAO,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACzB,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,IAAY,EAAE,KAA8B;IACjF,MAAM,GAAG,GAAmB,EAAE,CAAA;IAC9B,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,KAAK,KAAK,GAAG,EAAE,CAAC;YACzB,wFAAwF;YACxF,oEAAoE;YACpE,IAAI,IAAI,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAA;YACzB,OAAO,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,CAAE,CAAC,KAAK,KAAK,GAAG,IAAI,GAAG,CAAC,IAAI,CAAE,CAAC,KAAK,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;gBAAE,IAAI,EAAE,CAAA;YACrG,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,SAAS,CAAA;YAC/C,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;YAC7C,MAAM,OAAO,GAAG,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;YACjD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAA;YACnC,MAAM,gBAAgB,GACrB,IAAI,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,CAAA;YACxF,IACC,IAAI;gBACJ,IAAI,CAAC,KAAK,KAAK,GAAG;gBAClB,OAAO,KAAK,GAAG;gBACf,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;gBACvB,gBAAgB;gBAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAC5C,CAAC;gBACF,oFAAoF;gBACpF,mFAAmF;gBACnF,GAAG,CAAC,MAAM,GAAG,IAAI,GAAG,CAAC,CAAA;gBACrB,GAAG,CAAC,IAAI,CAAC,GAAG;oBACX,GAAG,IAAI;oBACP,GAAG,EAAE,KAAK,CAAC,GAAG;oBACd,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC;oBACxC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC;iBACvD,CAAA;gBACD,SAAQ;YACT,CAAC;QACF,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAChB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
"./browser": "./out/browser.js"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"@mailwoman/codex": "4.
|
|
24
|
-
"@mailwoman/core": "4.
|
|
23
|
+
"@mailwoman/codex": "4.4.0",
|
|
24
|
+
"@mailwoman/core": "4.4.0",
|
|
25
25
|
"@sctg/sentencepiece-js": "^1.3.3",
|
|
26
26
|
"onnxruntime-node": "^1.26.0"
|
|
27
27
|
},
|