@mailwoman/neural 2.2.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/out/anchor-inference.d.ts +58 -0
  2. package/out/anchor-inference.d.ts.map +1 -0
  3. package/out/anchor-inference.js +95 -0
  4. package/out/anchor-inference.js.map +1 -0
  5. package/out/browser.d.ts +2 -0
  6. package/out/browser.d.ts.map +1 -1
  7. package/out/browser.js +4 -0
  8. package/out/browser.js.map +1 -1
  9. package/out/classifier.d.ts +70 -3
  10. package/out/classifier.d.ts.map +1 -1
  11. package/out/classifier.js +80 -19
  12. package/out/classifier.js.map +1 -1
  13. package/out/fst-prior.d.ts +71 -0
  14. package/out/fst-prior.d.ts.map +1 -0
  15. package/out/fst-prior.js +173 -0
  16. package/out/fst-prior.js.map +1 -0
  17. package/out/index.d.ts +3 -0
  18. package/out/index.d.ts.map +1 -1
  19. package/out/index.js +3 -0
  20. package/out/index.js.map +1 -1
  21. package/out/labels.d.ts +3 -0
  22. package/out/labels.d.ts.map +1 -1
  23. package/out/labels.js +13 -0
  24. package/out/labels.js.map +1 -1
  25. package/out/onnx-runner.d.ts +8 -1
  26. package/out/onnx-runner.d.ts.map +1 -1
  27. package/out/onnx-runner.js +31 -1
  28. package/out/onnx-runner.js.map +1 -1
  29. package/out/postcode-anchor.d.ts +117 -0
  30. package/out/postcode-anchor.d.ts.map +1 -0
  31. package/out/postcode-anchor.js +269 -0
  32. package/out/postcode-anchor.js.map +1 -0
  33. package/out/postcode-binary-resolver.d.ts +60 -0
  34. package/out/postcode-binary-resolver.d.ts.map +1 -0
  35. package/out/postcode-binary-resolver.js +208 -0
  36. package/out/postcode-binary-resolver.js.map +1 -0
  37. package/out/postcode-repair.d.ts +65 -0
  38. package/out/postcode-repair.d.ts.map +1 -0
  39. package/out/postcode-repair.js +171 -0
  40. package/out/postcode-repair.js.map +1 -0
  41. package/out/proposal-classifier.d.ts.map +1 -1
  42. package/out/proposal-classifier.js +3 -1
  43. package/out/proposal-classifier.js.map +1 -1
  44. package/out/query-shape-prior.d.ts +12 -0
  45. package/out/query-shape-prior.d.ts.map +1 -1
  46. package/out/query-shape-prior.js +132 -2
  47. package/out/query-shape-prior.js.map +1 -1
  48. package/out/street-morphology-prior.d.ts +56 -0
  49. package/out/street-morphology-prior.d.ts.map +1 -0
  50. package/out/street-morphology-prior.js +159 -0
  51. package/out/street-morphology-prior.js.map +1 -0
  52. package/out/unit-repair.d.ts +42 -0
  53. package/out/unit-repair.d.ts.map +1 -0
  54. package/out/unit-repair.js +142 -0
  55. package/out/unit-repair.js.map +1 -0
  56. package/out/vitest.config.d.ts.map +1 -1
  57. package/out/vitest.config.js +3 -0
  58. package/out/vitest.config.js.map +1 -1
  59. package/out/weights.d.ts +27 -3
  60. package/out/weights.d.ts.map +1 -1
  61. package/out/weights.js +46 -2
  62. package/out/weights.js.map +1 -1
  63. package/package.json +6 -2
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Inference-side postcode-anchor features (#239/#240) — the mirror of the Python training pipeline
7
+ * (`mailwoman_train/tokenizer.py::anchor_feature_vector` + `realign_anchor_to_pieces`). At
8
+ * inference the model conditions on per-piece anchor features fed alongside `input_ids`; this
9
+ * builds them from a raw address + its SentencePiece pieces, using the SAME postcode→anchor
10
+ * lookup the model trained against (`scripts/build-pilot-anchor-lookup.py`), so the feature
11
+ * layout matches byte-for-byte.
12
+ *
13
+ * The layout is LOAD-BEARING and cross-language: a wrong locale order or centroid scale feeds the
14
+ * model garbage. `anchor-inference.test.ts` pins both `LOCALE_ORDER` and the vector to values
15
+ * emitted by the Python `anchor_feature_vector` — any drift fails the test.
16
+ */
17
+ import type { TokenizedPiece } from "./tokenizer.js";
18
+ /**
19
+ * The locale class order — MUST match Python `mailwoman_train/labels.py::LOCALE_COUNTRIES`. The
20
+ * posterior occupies indices `[0, LOCALE_ORDER.length)`; the normalized centroid the last two.
21
+ * (Pinned by the test; do not reorder.)
22
+ */
23
+ export declare const LOCALE_ORDER: readonly ["US", "FR", "DE", "CA", "GB", "JP", "ES", "IT", "NL"];
24
+ /** Anchor feature width = posterior over the locale set + a 2-d centroid. */
25
+ export declare const ANCHOR_FEATURE_DIM: number;
26
+ /** One postcode's anchor record (from the pilot lookup): country posterior + a single centroid. */
27
+ export interface AnchorEntry {
28
+ posterior: Record<string, number>;
29
+ lat: number;
30
+ lon: number;
31
+ }
32
+ export type AnchorLookup = Map<string, AnchorEntry>;
33
+ /**
34
+ * Build the fixed-width anchor feature vector — the exact mirror of Python `anchor_feature_vector`:
35
+ * a uniform country posterior over {@linkcode LOCALE_ORDER} (renormalized over the in-set mass) + a
36
+ * normalized centroid (`lat/90`, `lon/180` ∈ [-1, 1]).
37
+ */
38
+ export declare function anchorFeatureVector(posterior: Record<string, number>, lat: number, lon: number): number[];
39
+ /**
40
+ * Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon]}`) into a Map.
41
+ * Pure (takes the parsed object, not a path) so this module stays browser-safe — the file read
42
+ * lives in the Node-side caller (the eval).
43
+ */
44
+ export declare function parseAnchorLookup(raw: Record<string, [Record<string, number>, number, number]>): AnchorLookup;
45
+ /**
46
+ * Per-piece anchor features + confidence for `text`, projected onto its SP `pieces` by the SAME
47
+ * char→piece rule the labels use (a piece takes the anchor of the postcode span its first
48
+ * non-whitespace char falls inside) — so the anchor lands on exactly the postcode's sub-tokens.
49
+ *
50
+ * Postcode spans are the alphanumeric runs in `text` that the lookup recognizes (gold-equivalent on
51
+ * clean rendered addresses); a recognized span yields a confidence-1.0 anchor, like training's
52
+ * gold-span. Returns `(pieces × ANCHOR_FEATURE_DIM)` features + `(pieces,)` confidence.
53
+ */
54
+ export declare function buildAnchorFeatures(text: string, pieces: ReadonlyArray<TokenizedPiece>, lookup: AnchorLookup): {
55
+ features: number[][];
56
+ confidence: number[];
57
+ };
58
+ //# sourceMappingURL=anchor-inference.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"anchor-inference.d.ts","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,YAAY,iEAAkE,CAAA;AAE3F,6EAA6E;AAC7E,eAAO,MAAM,kBAAkB,QAA0B,CAAA;AAEzD,mGAAmG;AACnG,MAAM,WAAW,WAAW;IAC3B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,MAAM,YAAY,GAAG,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;AAEnD;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAgBzG;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,GAAG,YAAY,CAI7G;AAED;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,aAAa,CAAC,cAAc,CAAC,EACrC,MAAM,EAAE,YAAY,GAClB;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CA0BhD"}
@@ -0,0 +1,95 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Inference-side postcode-anchor features (#239/#240) — the mirror of the Python training pipeline
7
+ * (`mailwoman_train/tokenizer.py::anchor_feature_vector` + `realign_anchor_to_pieces`). At
8
+ * inference the model conditions on per-piece anchor features fed alongside `input_ids`; this
9
+ * builds them from a raw address + its SentencePiece pieces, using the SAME postcode→anchor
10
+ * lookup the model trained against (`scripts/build-pilot-anchor-lookup.py`), so the feature
11
+ * layout matches byte-for-byte.
12
+ *
13
+ * The layout is LOAD-BEARING and cross-language: a wrong locale order or centroid scale feeds the
14
+ * model garbage. `anchor-inference.test.ts` pins both `LOCALE_ORDER` and the vector to values
15
+ * emitted by the Python `anchor_feature_vector` — any drift fails the test.
16
+ */
17
+ /**
18
+ * The locale class order — MUST match Python `mailwoman_train/labels.py::LOCALE_COUNTRIES`. The
19
+ * posterior occupies indices `[0, LOCALE_ORDER.length)`; the normalized centroid the last two.
20
+ * (Pinned by the test; do not reorder.)
21
+ */
22
+ export const LOCALE_ORDER = ["US", "FR", "DE", "CA", "GB", "JP", "ES", "IT", "NL"];
23
+ /** Anchor feature width = posterior over the locale set + a 2-d centroid. */
24
+ export const ANCHOR_FEATURE_DIM = LOCALE_ORDER.length + 2;
25
+ /**
26
+ * Build the fixed-width anchor feature vector — the exact mirror of Python `anchor_feature_vector`:
27
+ * a uniform country posterior over {@linkcode LOCALE_ORDER} (renormalized over the in-set mass) + a
28
+ * normalized centroid (`lat/90`, `lon/180` ∈ [-1, 1]).
29
+ */
30
+ export function anchorFeatureVector(posterior, lat, lon) {
31
+ const vec = new Array(ANCHOR_FEATURE_DIM).fill(0);
32
+ let total = 0;
33
+ for (const [country, weight] of Object.entries(posterior)) {
34
+ const idx = LOCALE_ORDER.indexOf(country.toUpperCase());
35
+ if (idx >= 0) {
36
+ vec[idx] = weight;
37
+ total += weight;
38
+ }
39
+ }
40
+ if (total > 0) {
41
+ for (let i = 0; i < LOCALE_ORDER.length; i++)
42
+ vec[i] /= total;
43
+ }
44
+ vec[LOCALE_ORDER.length] = Math.max(-1, Math.min(1, lat / 90));
45
+ vec[LOCALE_ORDER.length + 1] = Math.max(-1, Math.min(1, lon / 180));
46
+ return vec;
47
+ }
48
+ /**
49
+ * Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon]}`) into a Map.
50
+ * Pure (takes the parsed object, not a path) so this module stays browser-safe — the file read
51
+ * lives in the Node-side caller (the eval).
52
+ */
53
+ export function parseAnchorLookup(raw) {
54
+ const out = new Map();
55
+ for (const [pc, [posterior, lat, lon]] of Object.entries(raw))
56
+ out.set(pc, { posterior, lat, lon });
57
+ return out;
58
+ }
59
+ /**
60
+ * Per-piece anchor features + confidence for `text`, projected onto its SP `pieces` by the SAME
61
+ * char→piece rule the labels use (a piece takes the anchor of the postcode span its first
62
+ * non-whitespace char falls inside) — so the anchor lands on exactly the postcode's sub-tokens.
63
+ *
64
+ * Postcode spans are the alphanumeric runs in `text` that the lookup recognizes (gold-equivalent on
65
+ * clean rendered addresses); a recognized span yields a confidence-1.0 anchor, like training's
66
+ * gold-span. Returns `(pieces × ANCHOR_FEATURE_DIM)` features + `(pieces,)` confidence.
67
+ */
68
+ export function buildAnchorFeatures(text, pieces, lookup) {
69
+ const features = pieces.map(() => new Array(ANCHOR_FEATURE_DIM).fill(0));
70
+ const confidence = pieces.map(() => 0);
71
+ const tokenRe = /[A-Za-z0-9]+/g;
72
+ let m;
73
+ while ((m = tokenRe.exec(text)) !== null) {
74
+ const entry = lookup.get(m[0].toUpperCase());
75
+ if (!entry)
76
+ continue;
77
+ const spanBegin = m.index;
78
+ const spanEnd = m.index + m[0].length;
79
+ const vec = anchorFeatureVector(entry.posterior, entry.lat, entry.lon);
80
+ for (let i = 0; i < pieces.length; i++) {
81
+ const p = pieces[i];
82
+ for (let c = p.start; c < p.end; c++) {
83
+ if (c < text.length && !/\s/.test(text[c])) {
84
+ if (c >= spanBegin && c < spanEnd) {
85
+ features[i] = vec;
86
+ confidence[i] = 1.0;
87
+ }
88
+ break; // first non-whitespace char of the piece decides (mirrors realign_anchor_to_pieces)
89
+ }
90
+ }
91
+ }
92
+ }
93
+ return { features, confidence };
94
+ }
95
+ //# sourceMappingURL=anchor-inference.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"anchor-inference.js","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU,CAAA;AAE3F,6EAA6E;AAC7E,MAAM,CAAC,MAAM,kBAAkB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAA;AAWzD;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,SAAiC,EAAE,GAAW,EAAE,GAAW;IAC9F,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzD,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3D,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,WAAW,EAAmC,CAAC,CAAA;QACxF,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC;YACd,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAAA;YACjB,KAAK,IAAI,MAAM,CAAA;QAChB,CAAC;IACF,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAE,IAAI,KAAK,CAAA;IAC/D,CAAC;IACD,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC,CAAA;IAC9D,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAA;IACnE,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAA6D;IAC9F,MAAM,GAAG,GAAiB,IAAI,GAAG,EAAE,CAAA;IACnC,KAAK,MAAM,CAAC,EAAE,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAA;IACnG,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAClC,IAAY,EACZ,MAAqC,EACrC,MAAoB;IAEpB,MAAM,QAAQ,GAAe,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,KAAK,CAAS,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5F,MAAM,UAAU,GAAa,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAA;IAEhD,MAAM,OAAO,GAAG,eAAe,CAAA;IAC/B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAA;QAC5C,IAAI,CAAC,KAAK;YAAE,SAAQ;QACpB,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAA;QACzB,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;QACrC,MAAM,GAAG,GAAG,mBAAmB,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,CAAC,CAAA;QACtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;oBAC7C,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC;wBACnC,QAAQ,CAAC,CAAC,CAAC,GAAG,GAAG,CAAA;wBACjB,UAAU,CAAC,CAAC,CAAC,GAAG,GAAG,CAAA;oBACpB,CAAC;oBACD,MAAK,CAAC,oFAAoF;gBAC3F,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAA;AAChC,CAAC"}
package/out/browser.d.ts CHANGED
@@ -12,5 +12,7 @@
12
12
  export * from "./classifier.js";
13
13
  export * from "./labels.js";
14
14
  export * from "./tokenizer.js";
15
+ export * from "./anchor-inference.js";
16
+ export * from "./postcode-binary-resolver.js";
15
17
  export type { InferResult } from "./onnx-runner.js";
16
18
  //# sourceMappingURL=browser.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAG9B,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA"}
1
+ {"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAG9B,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA;AAG7C,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA"}
package/out/browser.js CHANGED
@@ -12,4 +12,8 @@
12
12
  export * from "./classifier.js";
13
13
  export * from "./labels.js";
14
14
  export * from "./tokenizer.js";
15
+ // Browser-safe anchor channel (#239/#240): the pure-JS feature builder + the postcode binary resolver
16
+ // (zero-dep) the demo wires together to feed the anchor at inference.
17
+ export * from "./anchor-inference.js";
18
+ export * from "./postcode-binary-resolver.js";
15
19
  //# sourceMappingURL=browser.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"browser.js","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA"}
1
+ {"version":3,"file":"browser.js","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAC9B,sGAAsG;AACtG,sEAAsE;AACtE,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA"}
@@ -9,9 +9,12 @@
9
9
  *
10
10
  * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
11
  */
12
- import { decodeAsXml, type AddressTree, type ComponentTag } from "@mailwoman/core/decoder";
12
+ import { decodeAsXml, type AddressTree, type Calibrator, type ComponentTag } from "@mailwoman/core/decoder";
13
+ import { type AnchorLookup } from "./anchor-inference.js";
14
+ import { type FstMatcherLike } from "./fst-prior.js";
13
15
  import type { InferResult } from "./onnx-runner.js";
14
16
  import { type QueryShapeLike } from "./query-shape-prior.js";
17
+ import { type StreetMorphologyPriorOpts } from "./street-morphology-prior.js";
15
18
  import { MailwomanTokenizer } from "./tokenizer.js";
16
19
  import type { ResolveWeightsOpts } from "./weights.js";
17
20
  /**
@@ -20,7 +23,10 @@ import type { ResolveWeightsOpts } from "./weights.js";
20
23
  * the classifier only ever calls `infer(ids)`.
21
24
  */
22
25
  export interface NeuralRunner {
23
- infer(tokenIds: number[]): Promise<InferResult>;
26
+ infer(tokenIds: number[], anchor?: {
27
+ features: ReadonlyArray<ReadonlyArray<number>>;
28
+ confidence: ReadonlyArray<number>;
29
+ }): Promise<InferResult>;
24
30
  }
25
31
  export interface NeuralAddressClassifierConfig {
26
32
  tokenizer: MailwomanTokenizer;
@@ -50,6 +56,13 @@ export interface NeuralAddressClassifierConfig {
50
56
  startTransitions?: number[];
51
57
  /** Optional learned end-of-sequence transition scores per label. */
52
58
  endTransitions?: number[];
59
+ /**
60
+ * Optional postcode-anchor lookup (#239/#240). When set, `parse` builds per-piece anchor features
61
+ * from the text + this lookup and feeds them to the runner — for models trained with the anchor
62
+ * channel (exported with the `anchor_features`/`anchor_confidence` ONNX inputs). Omit for plain
63
+ * models. Load via `loadAnchorLookup` from `./anchor-inference.js`.
64
+ */
65
+ postcodeAnchorLookup?: AnchorLookup;
53
66
  }
54
67
  export declare class NeuralAddressClassifier {
55
68
  private readonly cfg;
@@ -71,7 +84,9 @@ export declare class NeuralAddressClassifier {
71
84
  * by `@mailwoman/neural-web`. Calling this method in a browser will throw at runtime — use
72
85
  * `loadNeuralClassifierFromUrls` from `@mailwoman/neural-web` instead.
73
86
  */
74
- static loadFromWeights(opts?: ResolveWeightsOpts): Promise<NeuralAddressClassifier>;
87
+ static loadFromWeights(opts?: ResolveWeightsOpts & {
88
+ postcodeAnchorLookup?: AnchorLookup;
89
+ }): Promise<NeuralAddressClassifier>;
75
90
  /** Tokenize → infer → Viterbi (or argmax) → decoder tree. */
76
91
  parse(text: string, opts?: ParseOpts): Promise<AddressTree>;
77
92
  /**
@@ -84,6 +99,18 @@ export declare class NeuralAddressClassifier {
84
99
  parseXml(text: string, opts?: ParseOpts & {
85
100
  xml?: Parameters<typeof decodeAsXml>[1];
86
101
  }): Promise<string>;
102
+ /**
103
+ * Guard against a silent label/emission shape overrun. When the model emits MORE logits per token
104
+ * than the configured label vocabulary (e.g. a Stage 3 bundle loaded with the default Stage 2
105
+ * labels), viterbi indexes past the transition matrix and dies with an opaque `Cannot read
106
+ * properties of undefined (reading '0')`. Fail fast here with a message that names the contract
107
+ * the caller violated.
108
+ *
109
+ * The opposite shape (model narrower than labels) is intentionally permitted — STAGE2_BIO_LABELS
110
+ * prefix-extends STAGE1_BIO_LABELS so a Stage 1 model loaded with Stage 2 labels decodes
111
+ * correctly via the first 15 logits. See labels.ts for the contract.
112
+ */
113
+ private assertEmissionWidth;
87
114
  }
88
115
  /** Result of `parseWithLogits` — tree + raw material for per-span logit aggregation. */
89
116
  export interface ParseWithLogitsResult {
@@ -110,5 +137,45 @@ export interface ParseOpts {
110
137
  * favored label. Confidence-scaled, so a 0.6-confidence format hit gets +0.6 max bias.
111
138
  */
112
139
  queryShapeBiasScale?: number;
140
+ /**
141
+ * Pre-built FST gazetteer matcher. When provided, gazetteer matches produce additive emission
142
+ * biases.
143
+ */
144
+ fst?: FstMatcherLike;
145
+ /** Bias magnitude for FST gazetteer matches. Default 1.0. */
146
+ fstBiasScale?: number;
147
+ /**
148
+ * Pre-built street-morphology FST matcher. When provided, street-type affixes (Avenue, rue,
149
+ * Calle, Straße, …) produce additive emission biases toward `street_prefix`/`street_suffix` on
150
+ * the matched tokens AND toward `street` / away from `dependent_locality` on the adjacent name
151
+ * tokens. Closes the v0.6.1 dependent_locality vacuum; see
152
+ * `docs/articles/concepts/street-supplement-architecture.md` for the layered design.
153
+ */
154
+ fstStreetMorphology?: FstMatcherLike;
155
+ /** Override bias magnitudes for the morphology prior. */
156
+ fstStreetMorphologyOpts?: StreetMorphologyPriorOpts;
157
+ /**
158
+ * When true, run the deterministic postcode regex repair pass (v0.7 #35) on the decoded label
159
+ * sequence before tree-building. Detects postcode-shaped substrings (GB/CA/NL/US/FR/… patterns)
160
+ * and snaps/adds the postcode span to the matched shape, fixing the SentencePiece-fragmentation
161
+ * failures catalogued in the 2026-05-29 postcode diagnostic. Off by default — opt-in until the
162
+ * v0.7 gate confirms it. See `./postcode-repair.ts`.
163
+ */
164
+ postcodeRepair?: boolean;
165
+ /**
166
+ * When true, run the deterministic secondary-unit regex repair pass on the decoded label sequence
167
+ * before tree-building. Detects designator-shaped substrings ("Apt 4B", "Ste 12", "Unit 9400",
168
+ * bare "#104", …) and snaps/adds the unit span, fixing the unit-drop weakness the three-arena
169
+ * capability eval surfaced (postal secondary-unit 0% neural). Off by default — opt-in until the
170
+ * v0.7.2 arena re-run quantifies its delta. See `./unit-repair.ts`.
171
+ */
172
+ unitRepair?: boolean;
173
+ /**
174
+ * Optional span-confidence calibrator (task #59). When provided, each decoded span's `conf=` is
175
+ * mapped through it (isotonic lookup table → calibrated probability of correctness). OPT-IN —
176
+ * omit for the byte-stable default softmax confidence. Build one via `createCalibrator`
177
+ * (`@mailwoman/core/decoder`) from `data/eval/calibration/isotonic-<locale>-<version>.json`.
178
+ */
179
+ calibrate?: Calibrator;
113
180
  }
114
181
  //# sourceMappingURL=classifier.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAEhC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAA0C,KAAK,cAAc,EAAE,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAEnD,OAAO,KAAK,EAAE,kBAAkB,EAAmB,MAAM,cAAc,CAAA;AAEvE;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;CAC/C;AAED,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,YAAY,CAAA;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAC1B;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC7B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,EAAE,CAAA;IACxB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAED,qBAAa,uBAAuB;IAOvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IANhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAY;IACxC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAEZ,GAAG,EAAE,6BAA6B;IAa/D;;;;;;;;;;;OAWG;WACU,eAAe,CAAC,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAuB7F,6DAA6D;IACvD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IA8CjE;;;OAGG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,qBAAqB,CAAC;IA6C/E,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIzF,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAInF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG;QAAE,GAAG,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;CAG7G;AAED,wFAAwF;AACxF,MAAM,WAAW,qBAAqB;IACrC,IAAI,EAAE,WAAW,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC7C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACzB;;;;OAIG;IACH,UAAU,CAAC,EAAE,cAAc,CAAA;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;CAC5B"}
1
+ {"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAA0B,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAE5E,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAEnD,OAAO,EAA0C,KAAK,cAAc,EAAE,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAuC,KAAK,yBAAyB,EAAE,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAGnD,OAAO,KAAK,EAAE,kBAAkB,EAAmB,MAAM,cAAc,CAAA;AAEvE;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,CACJ,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC5F,OAAO,CAAC,WAAW,CAAC,CAAA;CACvB;AAED,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,YAAY,CAAA;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAC1B;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC7B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,EAAE,CAAA;IACxB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAA;CACnC;AAED,qBAAa,uBAAuB;IAOvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IANhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAY;IACxC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAEZ,GAAG,EAAE,6BAA6B;IAa/D;;;;;;;;;;;OAWG;WACU,eAAe,CAC3B,IAAI,GAAE,kBAAkB,GAAG;QAAE,oBAAoB,CAAC,EAAE,YAAY,CAAA;KAAO,GACrE,OAAO,CAAC,uBAAuB,CAAC;IA4BnC,6DAA6D;IACvD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IA4EjE;;;OAGG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,qBAAqB,CAAC;IA0E/E,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIzF,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAInF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG;QAAE,GAAG,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7G;;;;;;;;;;OAUG;IACH,OAAO,CAAC,mBAAmB;CAW3B;AAED,wFAAwF;AACxF,MAAM,WAAW,qBAAqB;IACrC,IAAI,EAAE,WAAW,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC7C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACzB;;;;OAIG;IACH,UAAU,CAAC,EAAE,cAAc,CAAA;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;OAGG;IACH,GAAG,CAAC,EAAE,cAAc,CAAA;IACpB,6DAA6D;IAC7D,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,cAAc,CAAA;IACpC,yDAAyD;IACzD,uBAAuB,CAAC,EAAE,yBAAyB,CAAA;IACnD;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAA;CACtB"}
package/out/classifier.js CHANGED
@@ -10,9 +10,14 @@
10
10
  * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
11
  */
12
12
  import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
13
+ import { buildAnchorFeatures } from "./anchor-inference.js";
14
+ import { buildFstEmissionPriors } from "./fst-prior.js";
13
15
  import { STAGE2_BIO_LABELS } from "./labels.js";
16
+ import { repairPostcodeLabels } from "./postcode-repair.js";
14
17
  import { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
18
+ import { buildStreetMorphologyEmissionPriors } from "./street-morphology-prior.js";
15
19
  import { MailwomanTokenizer } from "./tokenizer.js";
20
+ import { repairUnitLabels } from "./unit-repair.js";
16
21
  import { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, softmax, viterbi } from "./viterbi.js";
17
22
  export class NeuralAddressClassifier {
18
23
  cfg;
@@ -53,37 +58,53 @@ export class NeuralAddressClassifier {
53
58
  // + node:fs) and throws cleanly in a browser if called. Without the directive, webpack
54
59
  // pulls onnx-runner / weights into the browser chunk graph + then chokes on the Node-only
55
60
  // builtins they reference.
56
- const [{ OnnxRunner }, { resolveWeights, readLabelsFromModelCard }] = await Promise.all([
61
+ const [{ OnnxRunner }, { resolveWeights, readLabelsFromModelCard, readCrfTransitions }] = await Promise.all([
57
62
  import(/* webpackIgnore: true */ "./onnx-runner.js"),
58
63
  import(/* webpackIgnore: true */ "./weights.js"),
59
64
  ]);
60
65
  const resolved = resolveWeights(opts);
61
- // Read the trained label vocabulary from the bundled model-card.json when present. Falls
62
- // through to the constructor default (STAGE2_BIO_LABELS) for legacy bundles that predate
63
- // the `labels` field — those are always Stage 2 cards by construction, so the default is
64
- // the correct fallback. A future Stage 3 ship will require the card to carry the field.
65
66
  const labels = readLabelsFromModelCard(resolved.modelCardPath);
67
+ const crf = readCrfTransitions(resolved.crfTransitionsPath);
66
68
  const [tokenizer, runner] = await Promise.all([
67
69
  MailwomanTokenizer.loadFromFile(resolved.tokenizerPath),
68
70
  OnnxRunner.create(resolved.modelPath),
69
71
  ]);
70
- return new NeuralAddressClassifier({ tokenizer, runner, labels });
72
+ return new NeuralAddressClassifier({
73
+ tokenizer,
74
+ runner,
75
+ labels,
76
+ transitions: crf?.transitions,
77
+ startTransitions: crf?.startTransitions,
78
+ endTransitions: crf?.endTransitions,
79
+ ...(opts.postcodeAnchorLookup ? { postcodeAnchorLookup: opts.postcodeAnchorLookup } : {}),
80
+ });
71
81
  }
72
82
  /** Tokenize → infer → Viterbi (or argmax) → decoder tree. */
73
83
  async parse(text, opts) {
74
84
  if (text.length === 0)
75
85
  return { raw: text, roots: [] };
76
86
  const { pieces, ids } = this.cfg.tokenizer.encode(text);
77
- const { logits } = await this.cfg.runner.infer(ids);
78
- // QueryShape soft prior: when the caller supplies a QueryShape (typically from
79
- // `@mailwoman/query-shape`'s `computeQueryShape`), nudge per-token emissions toward the
80
- // labels implied by known-format hits. Bounded magnitude — confident encoder predictions
81
- // still win.
82
- const emissions = opts?.queryShape
87
+ // Postcode-anchor channel (#239/#240): build per-piece anchor features from the same lookup the
88
+ // model trained on, fed alongside the ids. No-op when no lookup is configured.
89
+ const anchor = this.cfg.postcodeAnchorLookup
90
+ ? buildAnchorFeatures(text, pieces, this.cfg.postcodeAnchorLookup)
91
+ : undefined;
92
+ const { logits } = await this.cfg.runner.infer(ids, anchor);
93
+ this.assertEmissionWidth(logits);
94
+ let emissions = opts?.queryShape
83
95
  ? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
84
96
  biasScale: opts.queryShapeBiasScale ?? 1.0,
97
+ inputText: text,
85
98
  }))
86
99
  : logits;
100
+ if (opts?.fst) {
101
+ emissions = addEmissionMatrix(emissions, buildFstEmissionPriors(opts.fst, pieces, this.labels, {
102
+ biasScale: opts.fstBiasScale ?? 1.0,
103
+ }));
104
+ }
105
+ if (opts?.fstStreetMorphology) {
106
+ emissions = addEmissionMatrix(emissions, buildStreetMorphologyEmissionPriors(opts.fstStreetMorphology, pieces, this.labels, opts.fstStreetMorphologyOpts ?? {}));
107
+ }
87
108
  const labelIndices = this.decodeMode === "viterbi"
88
109
  ? viterbi({
89
110
  emissions,
@@ -92,10 +113,8 @@ export class NeuralAddressClassifier {
92
113
  endTransitions: this.endTransitions,
93
114
  }).path
94
115
  : emissions.map((row) => argmaxSoftmax(row).idx);
95
- const tokens = pieces.map((p, i) => {
116
+ let tokens = pieces.map((p, i) => {
96
117
  const idx = labelIndices[i];
97
- // Confidence reports the encoder's *raw* probability (no prior baked in) so callers see
98
- // the model's own conviction, not the prior-augmented score.
99
118
  const probs = softmax(logits[i]);
100
119
  return {
101
120
  piece: p.piece,
@@ -105,7 +124,13 @@ export class NeuralAddressClassifier {
105
124
  confidence: probs[idx],
106
125
  };
107
126
  });
108
- return buildAddressTree(text, tokens);
127
+ if (opts?.postcodeRepair) {
128
+ tokens = repairPostcodeLabels(text, tokens).tokens;
129
+ }
130
+ if (opts?.unitRepair) {
131
+ tokens = repairUnitLabels(text, tokens).tokens;
132
+ }
133
+ return buildAddressTree(text, tokens, opts?.calibrate ? { calibrate: opts.calibrate } : undefined);
109
134
  }
110
135
  /**
111
136
  * Like `parse`, but also returns the raw per-token logits and piece offsets needed for per-span
@@ -116,12 +141,27 @@ export class NeuralAddressClassifier {
116
141
  return { tree: { raw: text, roots: [] }, logits: [], pieces: [] };
117
142
  }
118
143
  const { pieces, ids } = this.cfg.tokenizer.encode(text);
119
- const { logits } = await this.cfg.runner.infer(ids);
120
- const emissions = opts?.queryShape
144
+ // Postcode-anchor channel (#239/#240): build per-piece anchor features from the same lookup the
145
+ // model trained on, fed alongside the ids. No-op when no lookup is configured.
146
+ const anchor = this.cfg.postcodeAnchorLookup
147
+ ? buildAnchorFeatures(text, pieces, this.cfg.postcodeAnchorLookup)
148
+ : undefined;
149
+ const { logits } = await this.cfg.runner.infer(ids, anchor);
150
+ this.assertEmissionWidth(logits);
151
+ let emissions = opts?.queryShape
121
152
  ? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
122
153
  biasScale: opts.queryShapeBiasScale ?? 1.0,
154
+ inputText: text,
123
155
  }))
124
156
  : logits;
157
+ if (opts?.fst) {
158
+ emissions = addEmissionMatrix(emissions, buildFstEmissionPriors(opts.fst, pieces, this.labels, {
159
+ biasScale: opts.fstBiasScale ?? 1.0,
160
+ }));
161
+ }
162
+ if (opts?.fstStreetMorphology) {
163
+ emissions = addEmissionMatrix(emissions, buildStreetMorphologyEmissionPriors(opts.fstStreetMorphology, pieces, this.labels, opts.fstStreetMorphologyOpts ?? {}));
164
+ }
125
165
  const labelIndices = this.decodeMode === "viterbi"
126
166
  ? viterbi({
127
167
  emissions,
@@ -142,7 +182,7 @@ export class NeuralAddressClassifier {
142
182
  };
143
183
  });
144
184
  return {
145
- tree: buildAddressTree(text, tokens),
185
+ tree: buildAddressTree(text, tokens, opts?.calibrate ? { calibrate: opts.calibrate } : undefined),
146
186
  logits,
147
187
  pieces: pieces.map((p) => ({ start: p.start, end: p.end })),
148
188
  };
@@ -156,6 +196,27 @@ export class NeuralAddressClassifier {
156
196
  async parseXml(text, opts) {
157
197
  return decodeAsXml(await this.parse(text, opts), opts?.xml);
158
198
  }
199
+ /**
200
+ * Guard against a silent label/emission shape overrun. When the model emits MORE logits per token
201
+ * than the configured label vocabulary (e.g. a Stage 3 bundle loaded with the default Stage 2
202
+ * labels), viterbi indexes past the transition matrix and dies with an opaque `Cannot read
203
+ * properties of undefined (reading '0')`. Fail fast here with a message that names the contract
204
+ * the caller violated.
205
+ *
206
+ * The opposite shape (model narrower than labels) is intentionally permitted — STAGE2_BIO_LABELS
207
+ * prefix-extends STAGE1_BIO_LABELS so a Stage 1 model loaded with Stage 2 labels decodes
208
+ * correctly via the first 15 logits. See labels.ts for the contract.
209
+ */
210
+ assertEmissionWidth(logits) {
211
+ if (logits.length === 0)
212
+ return;
213
+ const width = logits[0].length;
214
+ if (width > this.labels.length) {
215
+ throw new Error(`Label/emission mismatch: model emits ${width} logits per token but the classifier was ` +
216
+ `configured with only ${this.labels.length} labels. Did you load a Stage 3 bundle without ` +
217
+ `passing its model-card labels? See loadFromWeights / loadNeuralClassifierFromUrls.`);
218
+ }
219
+ }
159
220
  }
160
221
  function argmaxSoftmax(row) {
161
222
  let maxIdx = 0;
@@ -1 +1 @@
1
- {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAIX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AA0C3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,OAA2B,EAAE;QACzD,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACvF,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,yFAAyF;QACzF,yFAAyF;QACzF,yFAAyF;QACzF,wFAAwF;QACxF,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAA;IAClE,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,+EAA+E;QAC/E,wFAAwF;QACxF,yFAAyF;QACzF,aAAa;QACb,MAAM,SAAS,GAAG,IAAI,EAAE,UAAU;YACjC,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;aAC1C,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,wFAAwF;YACxF,6DAA6D;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACtC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,MAAM,SAAS,GAAG,IAAI,EAAE,UAAU;YACjC,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;aAC1C,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC;YACpC,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;CACD;AA2BD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
1
+ {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAKX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAoD3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAC3B,OAAqE,EAAE;QAEvE,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC3G,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAA;QAC3D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC;YAClC,SAAS;YACT,MAAM;YACN,MAAM;YACN,WAAW,EAAE,GAAG,EAAE,WAAW;YAC7B,gBAAgB,EAAE,GAAG,EAAE,gBAAgB;YACvC,cAAc,EAAE,GAAG,EAAE,cAAc;YACnC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAA;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAE3D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,IAAI,IAAI,EAAE,cAAc,EAAE,CAAC;YAC1B,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QACnD,CAAC;QACD,IAAI,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QAC/C,CAAC;QAED,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IACnG,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,CAAC,CAAA;QAE3D,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;YACjG,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;IAED;;;;;;;;;;OAUG;IACK,mBAAmB,CAAC,MAA2B;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CACd,wCAAwC,KAAK,2CAA2C;gBACvF,wBAAwB,IAAI,CAAC,MAAM,CAAC,MAAM,iDAAiD;gBAC3F,oFAAoF,CACrF,CAAA;QACF,CAAC;IACF,CAAC;CACD;AAmED,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
@@ -0,0 +1,71 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Soft-prior emission biases derived from the FST gazetteer. When the FST finds that a token
7
+ * sequence matches a known place name (e.g., "New York" → locality + region), this module
8
+ * produces additive biases that nudge the Viterbi decoder toward the matching BIO labels.
9
+ *
10
+ * Composes with the QueryShape prior via addEmissionMatrix — same integration point, same additive
11
+ * semantics.
12
+ *
13
+ * SentencePiece ↔ FST bridge: SentencePiece pieces are grouped into whitespace words (by the ▁
14
+ * sentinel), normalized through the same pipeline as FST edges (NFKC, lowercase, strip
15
+ * non-alnum), and walked through the FST as contiguous subpaths.
16
+ *
17
+ * Uses structural typing for the FST input so this module has zero dependencies on
18
+ * `@mailwoman/resolver-wof-sqlite` — consumers pass an FstMatcher instance, but this file only
19
+ * consumes the shape.
20
+ */
21
+ import type { TokenLike } from "./query-shape-prior.js";
22
+ export interface FstMatchLike {
23
+ stateId: number;
24
+ accepted: boolean;
25
+ depth: number;
26
+ }
27
+ export interface FstPlaceEntryLike {
28
+ wofID: number;
29
+ placetype: string;
30
+ importance: number;
31
+ }
32
+ export interface FstMatcherLike {
33
+ walk(tokens: string[]): FstMatchLike | null;
34
+ walkFrom(prev: FstMatchLike, token: string): FstMatchLike | null;
35
+ accepting(stateId: number): FstPlaceEntryLike[];
36
+ }
37
+ export interface WordGroup {
38
+ fstToken: string;
39
+ pieceIndices: number[];
40
+ }
41
+ export interface FstPriorOpts {
42
+ biasScale?: number;
43
+ /**
44
+ * Maximum bias magnitude (logits). Prevents large-population places from overriding the model.
45
+ * Default 3.0.
46
+ */
47
+ maxBias?: number;
48
+ suppressionScale?: number;
49
+ }
50
+ /**
51
+ * Build a `[seqLen][numLabels]` bias matrix from FST gazetteer matches.
52
+ *
53
+ * Walks all contiguous subpaths of the reconstructed whitespace-token sequence through the FST. For
54
+ * each accepting state, biases the corresponding BIO labels on the matched pieces.
55
+ */
56
+ export declare function buildFstEmissionPriors(fst: FstMatcherLike, pieces: ReadonlyArray<TokenLike & {
57
+ piece: string;
58
+ }>, labels: ReadonlyArray<string>, opts?: FstPriorOpts): number[][];
59
+ /**
60
+ * Group SentencePiece pieces into whitespace-delimited words. Each word's literal text is
61
+ * reconstructed by concatenating pieces (minus leading ▁), then normalized through the same
62
+ * pipeline the FST builder uses.
63
+ *
64
+ * Exported (alongside {@linkcode normalizeFstToken} and the {@linkcode WordGroup} type) so the
65
+ * street-morphology prior can reuse the same piece-grouping/normalization pipeline without
66
+ * duplication. Internal helper signature; not part of the public neural API.
67
+ */
68
+ export declare function groupPiecesIntoWords(pieces: ReadonlyArray<{
69
+ piece: string;
70
+ }>): WordGroup[];
71
+ //# sourceMappingURL=fst-prior.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fst-prior.d.ts","sourceRoot":"","sources":["../fst-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAQvD,MAAM,WAAW,YAAY;IAC5B,OAAO,EAAE,MAAM,CAAA;IACf,QAAQ,EAAE,OAAO,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;CACb;AAED,MAAM,WAAW,iBAAiB;IACjC,KAAK,EAAE,MAAM,CAAA;IACb,SAAS,EAAE,MAAM,CAAA;IACjB,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,cAAc;IAC9B,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,YAAY,GAAG,IAAI,CAAA;IAC3C,QAAQ,CAAC,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,YAAY,GAAG,IAAI,CAAA;IAChE,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,iBAAiB,EAAE,CAAA;CAC/C;AAiBD,MAAM,WAAW,SAAS;IACzB,QAAQ,EAAE,MAAM,CAAA;IAChB,YAAY,EAAE,MAAM,EAAE,CAAA;CACtB;AAID,MAAM,WAAW,YAAY;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,gBAAgB,CAAC,EAAE,MAAM,CAAA;CACzB;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACrC,GAAG,EAAE,cAAc,EACnB,MAAM,EAAE,aAAa,CAAC,SAAS,GAAG;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,EACpD,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,EAC7B,IAAI,GAAE,YAAiB,GACrB,MAAM,EAAE,EAAE,CA+DZ;AAED;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,EAAE,aAAa,CAAC;IAAE,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,GAAG,SAAS,EAAE,CAiC1F"}