@mailwoman/neural 2.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/out/anchor-inference.d.ts +57 -0
  2. package/out/anchor-inference.d.ts.map +1 -0
  3. package/out/anchor-inference.js +94 -0
  4. package/out/anchor-inference.js.map +1 -0
  5. package/out/browser.d.ts +2 -0
  6. package/out/browser.d.ts.map +1 -1
  7. package/out/browser.js +4 -0
  8. package/out/browser.js.map +1 -1
  9. package/out/classifier.d.ts +62 -2
  10. package/out/classifier.d.ts.map +1 -1
  11. package/out/classifier.js +78 -17
  12. package/out/classifier.js.map +1 -1
  13. package/out/fst-prior.d.ts +71 -0
  14. package/out/fst-prior.d.ts.map +1 -0
  15. package/out/fst-prior.js +173 -0
  16. package/out/fst-prior.js.map +1 -0
  17. package/out/index.d.ts +3 -0
  18. package/out/index.d.ts.map +1 -1
  19. package/out/index.js +3 -0
  20. package/out/index.js.map +1 -1
  21. package/out/labels.d.ts +3 -0
  22. package/out/labels.d.ts.map +1 -1
  23. package/out/labels.js +13 -0
  24. package/out/labels.js.map +1 -1
  25. package/out/onnx-runner.d.ts +8 -1
  26. package/out/onnx-runner.d.ts.map +1 -1
  27. package/out/onnx-runner.js +31 -1
  28. package/out/onnx-runner.js.map +1 -1
  29. package/out/postcode-anchor.d.ts +117 -0
  30. package/out/postcode-anchor.d.ts.map +1 -0
  31. package/out/postcode-anchor.js +269 -0
  32. package/out/postcode-anchor.js.map +1 -0
  33. package/out/postcode-binary-resolver.d.ts +60 -0
  34. package/out/postcode-binary-resolver.d.ts.map +1 -0
  35. package/out/postcode-binary-resolver.js +208 -0
  36. package/out/postcode-binary-resolver.js.map +1 -0
  37. package/out/postcode-repair.d.ts +65 -0
  38. package/out/postcode-repair.d.ts.map +1 -0
  39. package/out/postcode-repair.js +171 -0
  40. package/out/postcode-repair.js.map +1 -0
  41. package/out/proposal-classifier.d.ts.map +1 -1
  42. package/out/proposal-classifier.js +3 -1
  43. package/out/proposal-classifier.js.map +1 -1
  44. package/out/query-shape-prior.d.ts +12 -0
  45. package/out/query-shape-prior.d.ts.map +1 -1
  46. package/out/query-shape-prior.js +132 -2
  47. package/out/query-shape-prior.js.map +1 -1
  48. package/out/street-morphology-prior.d.ts +56 -0
  49. package/out/street-morphology-prior.d.ts.map +1 -0
  50. package/out/street-morphology-prior.js +159 -0
  51. package/out/street-morphology-prior.js.map +1 -0
  52. package/out/unit-repair.d.ts +46 -0
  53. package/out/unit-repair.d.ts.map +1 -0
  54. package/out/unit-repair.js +147 -0
  55. package/out/unit-repair.js.map +1 -0
  56. package/out/vitest.config.d.ts.map +1 -1
  57. package/out/vitest.config.js +3 -0
  58. package/out/vitest.config.js.map +1 -1
  59. package/out/weights.d.ts +27 -3
  60. package/out/weights.d.ts.map +1 -1
  61. package/out/weights.js +46 -2
  62. package/out/weights.js.map +1 -1
  63. package/package.json +6 -2
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-morphology-prior.js","sourceRoot":"","sources":["../street-morphology-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,oBAAoB,EAAuC,MAAM,gBAAgB,CAAA;AAyB1F;;;;;GAKG;AACH,MAAM,UAAU,mCAAmC,CAClD,GAAmB,EACnB,MAAoD,EACpD,MAA6B,EAC7B,OAAkC,EAAE;IAEpC,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAA;IACvC,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,GAAG,CAAA;IAC7C,MAAM,sBAAsB,GAAG,IAAI,CAAC,sBAAsB,IAAI,GAAG,CAAA;IACjE,MAAM,wBAAwB,GAAG,IAAI,CAAC,wBAAwB,IAAI,GAAG,CAAA;IAErE,MAAM,MAAM,GAAe,EAAE,CAAA;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAErE,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAA;IAErE,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAA;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAA;IAEtD,6FAA6F;IAC7F,kFAAkF;IAClF,IAAI,OAAO,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;QACzF,OAAO,MAAM,CAAA;IACd,CAAC;IAED,MAAM,UAAU,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAA;IAC/C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAQ1C,MAAM,YAAY,GAAiB,EAAE,CAAA;IAErC,0FAA0F;IAC1F,oCAAoC;IACpC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAE,CAAA;QAChC,IAAI,KAAK,CAAC,QAAQ,KAAK,EAAE;YAAE,SAAQ;QAEnC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAA;QAC1C,IAAI,CAAC,OAAO;YAAE,SAAQ;QAEtB,IAAI,OAAO,GAAG,CAAC,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAC,CAAA;QACpB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACtB,OAAO,GAAG,KAAK,CAAA;YACf,WAAW,GAAG,OAAO,CAAC,OAAO,CAAA;QAC9B,CAAC;QAED,IAAI,OAAO,GAAG,OAAO,CAAA;QACrB,KAAK,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,UAAU,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;YAClC,IAAI,SAAS,CAAC,QAAQ,KAAK,EAAE;gBAAE,SAAQ;YAEvC,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAA;YACtD,IAAI,CAAC,IAAI;gBAAE,MAAK;YAChB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnB,OAAO,GAAG,GAAG,CAAA;gBACb,WAAW,GAAG,IAAI,CAAC,OAAO,CAAA;YAC3B,CAAC;YACD,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;QAED,IAAI,OAAO,KAAK,CAAC,CAAC;YAAE,SAAQ;QAE5B,2FAA2F;QAC3F,uEAAuE;QACvE,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAA;QAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,cAAc,CAAC,CAAA;QACpE,IAAI,CAAC,QAAQ;YAAE,SAAQ;QAEvB,YAAY,CAAC,IAAI,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAA;QAEjE,8CAA8C;QAC9C,MAAM,iBAAiB,GAAa,EAAE,CAAA;QACtC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,UAAU,CAAC,CAAC,CAAE,CAAA;YACzB,IAAI,EAAE,CAAC,QAAQ,KAAK,EAAE;gBAAE,SAAQ;YAChC,KAAK,MAAM,EAAE,IAAI,EAAE,CAAC,YAAY;gBAAE,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAC7D,CAAC;QAED,0FAA0F;QAC1F,yFAAyF;QACzF,yFAAyF;QACzF,MAAM,SAAS,GAAG,SAAS,GAAG,YAAY,CAAA;QAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,EAAE,GAAG,iBAAiB,CAAC,CAAC,CAAE,CAAA;YAChC,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,aAAa,CAAC,CAAA;YAC5E,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,aAAa,CAAC,CAAA;YAC5E,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,SAAS,CAAC,CAAA;YACrE,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,SAAS,CAAC,CAAA;QACtE,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAE5C,yFAAyF;IACzF,kFAAkF;IAClF,sBAAsB;IACtB,MAAM,mBAAmB,GAAG,SAAS,GAAG,sBAAsB,CAAA;IAC9D,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,CAAA;QACjE,MAAM,KAAK,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAA;QAE9D,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,SAAS;gBAAE,SAAQ;YACxB,MAAM,OAAO,GAAG,SAAS,CAAC,YAAY,CAAA;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAE,CAAA;gBACtB,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,OAAO,CAAC,CAAA;gBAC1D,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,mBAAmB,CAAC,CAAA;gBAE/E,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC3B,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,OAAO,CAAC,CAAA;oBAC1D,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,CAAC,wBAAwB,CAAC,CAAA;gBACtF,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,MAAmB,EAAE,YAAoB,EAAE,SAAiB;IAClF,KAAK,IAAI,CAAC,GAAG,YAAY,GAAG,SAAS,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACpF,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACpB,IAAI,CAAC,CAAC,QAAQ,KAAK,EAAE;YAAE,OAAO,CAAC,CAAA;IAChC,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC"}
@@ -0,0 +1,46 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Secondary-unit regex repair pass — parser-improvement backlog (2026-05-30).
7
+ *
8
+ * The three-arena capability eval surfaced a persistent neural weakness: the
9
+ * model DROPS secondary units. "123 Main St Apt 456" → no unit label; the
10
+ * postal-standards secondary-unit edge class scored 0% neural. Units have a
11
+ * rigid surface shape (a designator keyword + an identifier), so — exactly
12
+ * like the postcode-repair pass (#35) — we can detect them deterministically
13
+ * and repair the BIO labels AFTER decode but BEFORE `buildAddressTree`. The
14
+ * model is untouched; this is a decoder-side correction, the same "lowest
15
+ * risk" lever family as postcode-repair.
16
+ *
17
+ * PRECISION GUARDS (mirror postcode-repair — never regress a confident parse):
18
+ * - We only fire on EXPLICIT designators (Apt, Ste, Suite, Unit, Rm, Floor,
19
+ * Bldg, Flat, … + bare "#<n>"). Ambiguous tokens are deliberately excluded:
20
+ * "Box" (that's po_box), bare "F"/"No" (too greedy), "Space"/"Stop" (common
21
+ * words).
22
+ * - ADD path (model emitted no unit over the matched run): allowed ONLY over
23
+ * `O` tokens — never over house_number / street* / postcode / po_box / a
24
+ * geographic container. So a confidently-labeled street or number is safe.
25
+ * - SNAP path: when the model already started a unit span inside the match,
26
+ * we expand/clip it to the full detected shape.
27
+ * - Local smear-clip: unit tokens immediately flanking a snapped run are
28
+ * cleared (mirrors postcode-repair) so "Apt 4 Springfield" can't leave a
29
+ * stray I-unit on "Springfield".
30
+ *
31
+ * Opt-in via `ParseOpts.unitRepair` (postcode-repair earned default-on only
32
+ * after a measured +135/0; unit-repair stays opt-in until the v0.7.2 arena
33
+ * re-run quantifies its delta).
34
+ */
35
+ import type { DecoderToken } from "@mailwoman/core/decoder";
36
+ export interface RepairResult {
37
+ tokens: DecoderToken[];
38
+ /** Number of token labels changed — for telemetry / logging. */
39
+ changed: number;
40
+ }
41
+ /**
42
+ * Repair secondary-unit label spans in a decoded token sequence using designator
43
+ * regexes. Returns a NEW token array (inputs are not mutated) plus a change count.
44
+ */
45
+ export declare function repairUnitLabels(text: string, input: readonly DecoderToken[]): RepairResult;
46
+ //# sourceMappingURL=unit-repair.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"unit-repair.d.ts","sourceRoot":"","sources":["../unit-repair.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;AA+E3D,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,YAAY,EAAE,CAAA;IACtB,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,YAAY,EAAE,GAAG,YAAY,CA8C3F"}
@@ -0,0 +1,147 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Secondary-unit regex repair pass — parser-improvement backlog (2026-05-30).
7
+ *
8
+ * The three-arena capability eval surfaced a persistent neural weakness: the
9
+ * model DROPS secondary units. "123 Main St Apt 456" → no unit label; the
10
+ * postal-standards secondary-unit edge class scored 0% neural. Units have a
11
+ * rigid surface shape (a designator keyword + an identifier), so — exactly
12
+ * like the postcode-repair pass (#35) — we can detect them deterministically
13
+ * and repair the BIO labels AFTER decode but BEFORE `buildAddressTree`. The
14
+ * model is untouched; this is a decoder-side correction, the same "lowest
15
+ * risk" lever family as postcode-repair.
16
+ *
17
+ * PRECISION GUARDS (mirror postcode-repair — never regress a confident parse):
18
+ * - We only fire on EXPLICIT designators (Apt, Ste, Suite, Unit, Rm, Floor,
19
+ * Bldg, Flat, … + bare "#<n>"). Ambiguous tokens are deliberately excluded:
20
+ * "Box" (that's po_box), bare "F"/"No" (too greedy), "Space"/"Stop" (common
21
+ * words).
22
+ * - ADD path (model emitted no unit over the matched run): allowed ONLY over
23
+ * `O` tokens — never over house_number / street* / postcode / po_box / a
24
+ * geographic container. So a confidently-labeled street or number is safe.
25
+ * - SNAP path: when the model already started a unit span inside the match,
26
+ * we expand/clip it to the full detected shape.
27
+ * - Local smear-clip: unit tokens immediately flanking a snapped run are
28
+ * cleared (mirrors postcode-repair) so "Apt 4 Springfield" can't leave a
29
+ * stray I-unit on "Springfield".
30
+ *
31
+ * Opt-in via `ParseOpts.unitRepair` (postcode-repair earned default-on only
32
+ * after a measured +135/0; unit-repair stays opt-in until the v0.7.2 arena
33
+ * re-run quantifies its delta).
34
+ */
35
+ /**
36
+ * Secondary-unit shape patterns, ordered most-specific → least. Case-insensitive
37
+ * (unit designators appear in every casing in real data). The identifier is a
38
+ * 1-5 digit number with an optional trailing letter ("4B"), a single letter
39
+ * ("STE D"), or a letter+digits — kept tight so we don't swallow following words.
40
+ */
41
+ const UNIT_DESIGNATORS = "APARTMENT|APT|SUITE|STE|UNIT|ROOM|RM|FLOOR|FLR|FL|BUILDING|BLDG|DEPARTMENT|DEPT|LOT|TRAILER|TRLR|SLIP|HANGAR|PIER|FLAT|PH|PENTHOUSE";
42
+ const UNIT_PATTERNS = [
43
+ // Designator + optional "#"/"No." + identifier, e.g. "Apt 4B", "Ste 12", "STE D",
44
+ // "Unit 9400", "Suite 100", "Rm 5", "Flat 2", "Apartment #3", "Bldg C".
45
+ // The `\b` after the designator is load-bearing: it stops "Unit" matching inside
46
+ // "United", "Fl" inside "Florida", etc. The trailing `\b` on the identifier stops
47
+ // "Apt Main" capturing the "M" of "Main" (single-letter ident only fires on a
48
+ // standalone token like "STE D").
49
+ {
50
+ label: "designator",
51
+ re: new RegExp(`\\b(?:${UNIT_DESIGNATORS})\\b\\.?\\s*#?\\s*(?:No\\.?\\s*)?(?:\\d{1,5}[A-Za-z]?|[A-Za-z]\\d{0,4})\\b`, "gi"),
52
+ },
53
+ // Bare hash + identifier, e.g. "#104", "# 4B". Common US secondary-unit form.
54
+ { label: "hash", re: /#\s*\d{1,5}[A-Za-z]?\b/g },
55
+ ];
56
+ const UNIT_B = "B-unit";
57
+ const UNIT_I = "I-unit";
58
+ const OUTSIDE = "O";
59
+ /**
60
+ * Tags a unit span is allowed to overwrite on the ADD path. The v0.7.2 arena
61
+ * showed the dominant failure for bare designator-led units ("Flat 2 14 Smith
62
+ * St", "APT 2 …") is the model labeling the WHOLE designator+identifier run as
63
+ * `locality` — not leaving it `O`. An explicit designator + identifier is a
64
+ * high-confidence "this is a unit" shape (a real locality/suburb name never has
65
+ * that form), so — exactly like postcode-repair's ADD_OVER_TAGS — we let it
66
+ * reclaim a `locality`/`dependent_locality` span. Structural tags
67
+ * (house_number, street*, postcode, po_box, region, country, venue) stay off the
68
+ * list so a confident parse is never clobbered. (`O` is always eligible.)
69
+ */
70
+ const ADD_OVER_TAGS = new Set(["locality", "dependent_locality"]);
71
+ function isUnitLabel(label) {
72
+ return label === "B-unit" || label === "I-unit";
73
+ }
74
+ /** Extract the bare tag from a BIO label ("B-locality" → "locality", "O" → null). */
75
+ function tagOf(label) {
76
+ return label === "O" ? null : label.slice(2);
77
+ }
78
+ /** Collect non-overlapping unit matches, preferring more-specific (earlier) patterns + longest. */
79
+ function collectMatches(text) {
80
+ const candidates = [];
81
+ UNIT_PATTERNS.forEach((pat, priority) => {
82
+ pat.re.lastIndex = 0;
83
+ for (let m = pat.re.exec(text); m; m = pat.re.exec(text)) {
84
+ candidates.push({ start: m.index, end: m.index + m[0].length, priority });
85
+ }
86
+ });
87
+ // Longest-match-wins, then most-specific; reject anything overlapping an accepted match.
88
+ candidates.sort((a, b) => b.end - b.start - (a.end - a.start) || a.priority - b.priority);
89
+ const accepted = [];
90
+ for (const c of candidates) {
91
+ if (accepted.some((a) => c.start < a.end && a.start < c.end))
92
+ continue;
93
+ accepted.push(c);
94
+ }
95
+ return accepted;
96
+ }
97
+ /**
98
+ * Repair secondary-unit label spans in a decoded token sequence using designator
99
+ * regexes. Returns a NEW token array (inputs are not mutated) plus a change count.
100
+ */
101
+ export function repairUnitLabels(text, input) {
102
+ const matches = collectMatches(text);
103
+ const tokens = input.map((t) => ({ ...t }));
104
+ if (matches.length === 0)
105
+ return { tokens, changed: 0 };
106
+ let changed = 0;
107
+ const setLabel = (i, label) => {
108
+ if (tokens[i].label !== label) {
109
+ tokens[i].label = label;
110
+ changed++;
111
+ }
112
+ };
113
+ for (const m of matches) {
114
+ // Tokens whose char span intersects the match.
115
+ const overlap = [];
116
+ for (let i = 0; i < tokens.length; i++) {
117
+ const t = tokens[i];
118
+ if (t.start < m.end && m.start < t.end)
119
+ overlap.push(i);
120
+ }
121
+ if (overlap.length === 0)
122
+ continue;
123
+ const hasUnit = overlap.some((i) => isUnitLabel(tokens[i].label));
124
+ if (!hasUnit) {
125
+ // ADD path — explicit designators are high-confidence, but only ever over O or a
126
+ // geographic-container tag (locality/dependent_locality — the tags the model
127
+ // mislabels bare units as). Never clobber a confident house_number/street/postcode/
128
+ // po_box/region/country/venue.
129
+ const safe = overlap.every((i) => {
130
+ const tag = tagOf(tokens[i].label);
131
+ return tag === null || ADD_OVER_TAGS.has(tag);
132
+ });
133
+ if (!safe)
134
+ continue;
135
+ }
136
+ // SNAP/ADD: relabel the matched run as a single unit span.
137
+ overlap.forEach((i, k) => setLabel(i, k === 0 ? UNIT_B : UNIT_I));
138
+ // Local smear clip: clear unit tokens immediately flanking the snapped run.
139
+ for (let j = overlap[0] - 1; j >= 0 && isUnitLabel(tokens[j].label); j--)
140
+ setLabel(j, OUTSIDE);
141
+ for (let j = overlap[overlap.length - 1] + 1; j < tokens.length && isUnitLabel(tokens[j].label); j++) {
142
+ setLabel(j, OUTSIDE);
143
+ }
144
+ }
145
+ return { tokens, changed };
146
+ }
147
+ //# sourceMappingURL=unit-repair.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"unit-repair.js","sourceRoot":"","sources":["../unit-repair.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAYH;;;;;GAKG;AACH,MAAM,gBAAgB,GACrB,qIAAqI,CAAA;AAEtI,MAAM,aAAa,GAAyC;IAC3D,kFAAkF;IAClF,wEAAwE;IACxE,iFAAiF;IACjF,kFAAkF;IAClF,8EAA8E;IAC9E,kCAAkC;IAClC;QACC,KAAK,EAAE,YAAY;QACnB,EAAE,EAAE,IAAI,MAAM,CAAC,SAAS,gBAAgB,4EAA4E,EAAE,IAAI,CAAC;KAC3H;IACD,8EAA8E;IAC9E,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,yBAAyB,EAAE;CAChD,CAAA;AAED,MAAM,MAAM,GAAG,QAAiC,CAAA;AAChD,MAAM,MAAM,GAAG,QAAiC,CAAA;AAChD,MAAM,OAAO,GAAG,GAA4B,CAAA;AAE5C;;;;;;;;;;GAUG;AACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAS,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAC,CAAA;AAEzE,SAAS,WAAW,CAAC,KAAa;IACjC,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,QAAQ,CAAA;AAChD,CAAC;AAED,qFAAqF;AACrF,SAAS,KAAK,CAAC,KAAa;IAC3B,OAAO,KAAK,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAC7C,CAAC;AAED,mGAAmG;AACnG,SAAS,cAAc,CAAC,IAAY;IACnC,MAAM,UAAU,GAAgB,EAAE,CAAA;IAClC,aAAa,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;QACvC,GAAG,CAAC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAA;QACpB,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1D,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;QAC1E,CAAC;IACF,CAAC,CAAC,CAAA;IACF,yFAAyF;IACzF,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IACzF,MAAM,QAAQ,GAAgB,EAAE,CAAA;IAChC,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC;YAAE,SAAQ;QACtE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAQD;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY,EAAE,KAA8B;IAC5E,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;IACpC,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,CAAA;IAEvD,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,MAAM,QAAQ,GAAG,CAAC,CAAS,EAAE,KAA4B,EAAQ,EAAE;QAClE,IAAI,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAChC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,GAAG,KAAK,CAAA;YACxB,OAAO,EAAE,CAAA;QACV,CAAC;IACF,CAAC,CAAA;IAED,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACzB,+CAA+C;QAC/C,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG;gBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACxD,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAElC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,CAAC,CAAA;QAClE,IAAI,CAAC,OAAO,EAAE,CAAC;YACd,iFAAiF;YACjF,6EAA6E;YAC7E,oFAAoF;YACpF,+BAA+B;YAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;gBAChC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,CAAA;gBACnC,OAAO,GAAG,KAAK,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;YACF,IAAI,CAAC,IAAI;gBAAE,SAAQ;QACpB,CAAC;QAED,2DAA2D;QAC3D,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAEjE,4EAA4E;QAC5E,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC,CAAE,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE;YAAE,QAAQ,CAAC,CAAC,EAAE,OAAO,CAAC,CAAA;QAChG,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACxG,QAAQ,CAAC,CAAC,EAAE,OAAO,CAAC,CAAA;QACrB,CAAC;IACF,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"vitest.config.d.ts","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAUH,wBAiBE"}
1
+ {"version":3,"file":"vitest.config.d.ts","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAUH,wBAoBE"}
@@ -22,6 +22,9 @@ export default defineConfig({
22
22
  },
23
23
  { find: /^@mailwoman\/core\/(.+)$/, replacement: resolve(here, "../core/$1/index.ts") },
24
24
  { find: /^@mailwoman\/core$/, replacement: resolve(here, "../core/index.ts") },
25
+ // @mailwoman/codex resolves to source too (per-address-system postal reference data).
26
+ { find: /^@mailwoman\/codex\/(.+)$/, replacement: resolve(here, "../codex/$1/index.ts") },
27
+ { find: /^@mailwoman\/codex$/, replacement: resolve(here, "../codex/index.ts") },
25
28
  ],
26
29
  },
27
30
  test: {
@@ -1 +1 @@
1
- {"version":3,"file":"vitest.config.js","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,uCAAuC;AAEvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,MAAM,CAAA;AAEnC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,eAAe,YAAY,CAAC;IAC3B,OAAO,EAAE;QACR,KAAK,EAAE;YACN,+EAA+E;YAC/E,0EAA0E;YAC1E;gBACC,IAAI,EAAE,0CAA0C;gBAChD,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qCAAqC,CAAC;aACjE;YACD,EAAE,IAAI,EAAE,0BAA0B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qBAAqB,CAAC,EAAE;YACvF,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE;SAC9E;KACD;IACD,IAAI,EAAE;QACL,OAAO,EAAE,KAAK;QACd,OAAO,EAAE,CAAC,oBAAoB,EAAE,WAAW,EAAE,YAAY,CAAC;KAC1D;CACD,CAAC,CAAA"}
1
+ {"version":3,"file":"vitest.config.js","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,uCAAuC;AAEvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,MAAM,CAAA;AAEnC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,eAAe,YAAY,CAAC;IAC3B,OAAO,EAAE;QACR,KAAK,EAAE;YACN,+EAA+E;YAC/E,0EAA0E;YAC1E;gBACC,IAAI,EAAE,0CAA0C;gBAChD,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qCAAqC,CAAC;aACjE;YACD,EAAE,IAAI,EAAE,0BAA0B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qBAAqB,CAAC,EAAE;YACvF,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE;YAC9E,sFAAsF;YACtF,EAAE,IAAI,EAAE,2BAA2B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,sBAAsB,CAAC,EAAE;YACzF,EAAE,IAAI,EAAE,qBAAqB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,mBAAmB,CAAC,EAAE;SAChF;KACD;IACD,IAAI,EAAE;QACL,OAAO,EAAE,KAAK;QACd,OAAO,EAAE,CAAC,oBAAoB,EAAE,WAAW,EAAE,YAAY,CAAC;KAC1D;CACD,CAAC,CAAA"}
package/out/weights.d.ts CHANGED
@@ -28,16 +28,30 @@ export interface ResolveWeightsOpts {
28
28
  modelPath?: string;
29
29
  /** Explicit tokenizer.model path; takes precedence over package auto-resolve. */
30
30
  tokenizerPath?: string;
31
+ /**
32
+ * Explicit `model-card.json` path (for the label vocab) on the explicit model+tokenizer path.
33
+ * When omitted, falls back to a `model-card.json` co-located with `modelPath`. Without a card,
34
+ * labels default to `STAGE2_BIO_LABELS` — which silently mis-decodes a STAGE3 (33-label) model
35
+ * into empty/garbage parses. Pass this (or co-locate the card) when evaluating a custom STAGE3
36
+ * checkpoint via explicit paths.
37
+ */
38
+ modelCardPath?: string;
31
39
  }
32
40
  export interface ResolvedWeights {
33
41
  modelPath: string;
34
42
  tokenizerPath: string;
35
43
  /**
36
- * Path to `model-card.json` alongside the resolved model. `undefined` when the caller passed
37
- * explicit paths or when the package directory has no card on disk. Read by `loadFromWeights` to
38
- * thread the trained label vocabulary into the classifier see {@link readLabelsFromModelCard}.
44
+ * Path to `model-card.json` for the resolved model. On the package path, the card co-located in
45
+ * the package dir. On the explicit path, `opts.modelCardPath` or a card co-located with
46
+ * `modelPath`. `undefined` only when no card is found. Read by `loadFromWeights` to thread the
47
+ * trained label vocabulary into the classifier — see {@link readLabelsFromModelCard}.
39
48
  */
40
49
  modelCardPath?: string;
50
+ /**
51
+ * Path to `crf-transitions.json` alongside the resolved model. `undefined` when the file doesn't
52
+ * exist (pre-v0.6.0 bundles or CE-only training).
53
+ */
54
+ crfTransitionsPath?: string;
41
55
  /** "explicit" if both paths came from opts; "package:<name>" if resolved via require.resolve. */
42
56
  source: string;
43
57
  }
@@ -54,4 +68,14 @@ export declare function resolveWeights(opts: ResolveWeightsOpts): ResolvedWeight
54
68
  * and should be loud, not silently re-defaulted.
55
69
  */
56
70
  export declare function readLabelsFromModelCard(modelCardPath: string | undefined): readonly string[] | undefined;
71
+ export interface CrfTransitions {
72
+ transitions: number[][];
73
+ startTransitions: number[];
74
+ endTransitions: number[];
75
+ }
76
+ /**
77
+ * Read learned CRF transition parameters from `crf-transitions.json`. Returns `undefined` when the
78
+ * file is missing or malformed — callers fall back to the structural BIO mask only.
79
+ */
80
+ export declare function readCrfTransitions(crfPath: string | undefined): CrfTransitions | undefined;
57
81
  //# sourceMappingURL=weights.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CA0CxE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,MAAM,EAAE,GAAG,SAAS,CAwBxG"}
1
+ {"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CAkDxE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,MAAM,EAAE,GAAG,SAAS,CAwBxG;AAED,MAAM,WAAW,cAAc;IAC9B,WAAW,EAAE,MAAM,EAAE,EAAE,CAAA;IACvB,gBAAgB,EAAE,MAAM,EAAE,CAAA;IAC1B,cAAc,EAAE,MAAM,EAAE,CAAA;CACxB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,cAAc,GAAG,SAAS,CA0B1F"}
package/out/weights.js CHANGED
@@ -32,7 +32,12 @@ export function resolveWeights(opts) {
32
32
  throw new Error(`Explicit modelPath does not exist: ${opts.modelPath}`);
33
33
  if (!existsSync(opts.tokenizerPath))
34
34
  throw new Error(`Explicit tokenizerPath does not exist: ${opts.tokenizerPath}`);
35
- return { modelPath: opts.modelPath, tokenizerPath: opts.tokenizerPath, source: "explicit" };
35
+ // Resolve a model-card for the label vocab: explicit opt first, else one co-located with the
36
+ // model. Omitting it makes the classifier fall back to STAGE2_BIO_LABELS, which mis-decodes a
37
+ // STAGE3 (33-label) checkpoint into empty parses — the trap that broke eval-matrix --model-path.
38
+ const coLocatedCard = resolve(dirname(opts.modelPath), "model-card.json");
39
+ const modelCardPath = opts.modelCardPath ?? (existsSync(coLocatedCard) ? coLocatedCard : undefined);
40
+ return { modelPath: opts.modelPath, tokenizerPath: opts.tokenizerPath, modelCardPath, source: "explicit" };
36
41
  }
37
42
  // Package names follow the all-lowercase BCP-47 convention (`neural-weights-en-us`,
38
43
  // `neural-weights-fr-fr`). The CLI's locale validation accepts canonical `en-US` / `fr-FR`
@@ -59,7 +64,9 @@ export function resolveWeights(opts) {
59
64
  }
60
65
  const modelCardCandidate = resolve(packageDir, "model-card.json");
61
66
  const modelCardPath = existsSync(modelCardCandidate) ? modelCardCandidate : undefined;
62
- return { modelPath, tokenizerPath, modelCardPath, source: `package:${packageName}` };
67
+ const crfCandidate = resolve(packageDir, "crf-transitions.json");
68
+ const crfTransitionsPath = existsSync(crfCandidate) ? crfCandidate : undefined;
69
+ return { modelPath, tokenizerPath, modelCardPath, crfTransitionsPath, source: `package:${packageName}` };
63
70
  }
64
71
  /**
65
72
  * Read the `labels` array from a `model-card.json` file. Returns `undefined` when the file is
@@ -100,4 +107,41 @@ export function readLabelsFromModelCard(modelCardPath) {
100
107
  }
101
108
  return Object.freeze(labels.slice());
102
109
  }
110
+ /**
111
+ * Read learned CRF transition parameters from `crf-transitions.json`. Returns `undefined` when the
112
+ * file is missing or malformed — callers fall back to the structural BIO mask only.
113
+ */
114
+ export function readCrfTransitions(crfPath) {
115
+ if (!crfPath || !existsSync(crfPath))
116
+ return undefined;
117
+ let raw;
118
+ try {
119
+ raw = readFileSync(crfPath, "utf8");
120
+ }
121
+ catch {
122
+ return undefined;
123
+ }
124
+ let parsed;
125
+ try {
126
+ parsed = JSON.parse(raw);
127
+ }
128
+ catch {
129
+ return undefined;
130
+ }
131
+ if (typeof parsed !== "object" || parsed === null)
132
+ return undefined;
133
+ const obj = parsed;
134
+ const transitions = obj.transitions;
135
+ const start = obj.start_transitions;
136
+ const end = obj.end_transitions;
137
+ if (!Array.isArray(transitions) || !Array.isArray(start) || !Array.isArray(end))
138
+ return undefined;
139
+ if (transitions.length === 0 || start.length === 0 || end.length === 0)
140
+ return undefined;
141
+ return {
142
+ transitions: transitions,
143
+ startTransitions: start,
144
+ endTransitions: end,
145
+ };
146
+ }
103
147
  //# sourceMappingURL=weights.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAwB1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC5F,CAAC;IAED,oFAAoF;IACpF,2FAA2F;IAC3F,4DAA4D;IAC5D,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,WAAW,EAAE,CAAA;IACrD,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,kBAAkB,GAAG,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IACjE,MAAM,aAAa,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAA;IAErF,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACrF,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAiC;IACxE,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,SAAS,CAAA;IAClE,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,aAAa,EAAE,MAAM,CAAC,CAAA;IAC1C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,MAAM,GAAI,MAA+B,CAAC,MAAM,CAAA;IACtD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QAClG,MAAM,IAAI,KAAK,CACd,sBAAsB,aAAa,sCAAsC;YACxE,8CAA8C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CACxE,CAAA;IACF,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,CAAsB,CAAA;AAC1D,CAAC"}
1
+ {"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAsC1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,6FAA6F;QAC7F,8FAA8F;QAC9F,iGAAiG;QACjG,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,iBAAiB,CAAC,CAAA;QACzE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;QACnG,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC3G,CAAC;IAED,oFAAoF;IACpF,2FAA2F;IAC3F,4DAA4D;IAC5D,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,WAAW,EAAE,CAAA;IACrD,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,kBAAkB,GAAG,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IACjE,MAAM,aAAa,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAA;IAErF,MAAM,YAAY,GAAG,OAAO,CAAC,UAAU,EAAE,sBAAsB,CAAC,CAAA;IAChE,MAAM,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CAAA;IAE9E,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACzG,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAiC;IACxE,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,SAAS,CAAA;IAClE,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,aAAa,EAAE,MAAM,CAAC,CAAA;IAC1C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,MAAM,GAAI,MAA+B,CAAC,MAAM,CAAA;IACtD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QAClG,MAAM,IAAI,KAAK,CACd,sBAAsB,aAAa,sCAAsC;YACxE,8CAA8C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CACxE,CAAA;IACF,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,CAAsB,CAAA;AAC1D,CAAC;AAQD;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAA2B;IAC7D,IAAI,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,SAAS,CAAA;IACtD,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,GAAG,GAAG,MAAiC,CAAA;IAC7C,MAAM,WAAW,GAAG,GAAG,CAAC,WAAW,CAAA;IACnC,MAAM,KAAK,GAAG,GAAG,CAAC,iBAAiB,CAAA;IACnC,MAAM,GAAG,GAAG,GAAG,CAAC,eAAe,CAAA;IAC/B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAA;IACjG,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAA;IACxF,OAAO;QACN,WAAW,EAAE,WAAyB;QACtC,gBAAgB,EAAE,KAAiB;QACnC,cAAc,EAAE,GAAe;KAC/B,CAAA;AACF,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/neural",
3
- "version": "2.2.0",
3
+ "version": "4.0.0",
4
4
  "description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {
@@ -12,12 +12,16 @@
12
12
  "exports": {
13
13
  "./package.json": "./package.json",
14
14
  ".": "./out/index.js",
15
+ "./postcode-anchor": "./out/postcode-anchor.js",
16
+ "./postcode-binary-resolver": "./out/postcode-binary-resolver.js",
15
17
  "./tokenizer": "./out/tokenizer.js",
18
+ "./onnx-runner": "./out/onnx-runner.js",
16
19
  "./weights": "./out/weights.js",
17
20
  "./browser": "./out/browser.js"
18
21
  },
19
22
  "dependencies": {
20
- "@mailwoman/core": "2.2.0",
23
+ "@mailwoman/codex": "4.0.0",
24
+ "@mailwoman/core": "4.0.0",
21
25
  "@sctg/sentencepiece-js": "^1.3.3",
22
26
  "onnxruntime-node": "^1.26.0"
23
27
  },