@mailwoman/neural 2.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/anchor-inference.d.ts +57 -0
- package/out/anchor-inference.d.ts.map +1 -0
- package/out/anchor-inference.js +94 -0
- package/out/anchor-inference.js.map +1 -0
- package/out/browser.d.ts +2 -0
- package/out/browser.d.ts.map +1 -1
- package/out/browser.js +4 -0
- package/out/browser.js.map +1 -1
- package/out/classifier.d.ts +62 -2
- package/out/classifier.d.ts.map +1 -1
- package/out/classifier.js +78 -17
- package/out/classifier.js.map +1 -1
- package/out/fst-prior.d.ts +71 -0
- package/out/fst-prior.d.ts.map +1 -0
- package/out/fst-prior.js +173 -0
- package/out/fst-prior.js.map +1 -0
- package/out/index.d.ts +3 -0
- package/out/index.d.ts.map +1 -1
- package/out/index.js +3 -0
- package/out/index.js.map +1 -1
- package/out/labels.d.ts +3 -0
- package/out/labels.d.ts.map +1 -1
- package/out/labels.js +13 -0
- package/out/labels.js.map +1 -1
- package/out/onnx-runner.d.ts +8 -1
- package/out/onnx-runner.d.ts.map +1 -1
- package/out/onnx-runner.js +31 -1
- package/out/onnx-runner.js.map +1 -1
- package/out/postcode-anchor.d.ts +117 -0
- package/out/postcode-anchor.d.ts.map +1 -0
- package/out/postcode-anchor.js +269 -0
- package/out/postcode-anchor.js.map +1 -0
- package/out/postcode-binary-resolver.d.ts +60 -0
- package/out/postcode-binary-resolver.d.ts.map +1 -0
- package/out/postcode-binary-resolver.js +208 -0
- package/out/postcode-binary-resolver.js.map +1 -0
- package/out/postcode-repair.d.ts +65 -0
- package/out/postcode-repair.d.ts.map +1 -0
- package/out/postcode-repair.js +171 -0
- package/out/postcode-repair.js.map +1 -0
- package/out/proposal-classifier.d.ts.map +1 -1
- package/out/proposal-classifier.js +3 -1
- package/out/proposal-classifier.js.map +1 -1
- package/out/query-shape-prior.d.ts +12 -0
- package/out/query-shape-prior.d.ts.map +1 -1
- package/out/query-shape-prior.js +132 -2
- package/out/query-shape-prior.js.map +1 -1
- package/out/street-morphology-prior.d.ts +56 -0
- package/out/street-morphology-prior.d.ts.map +1 -0
- package/out/street-morphology-prior.js +159 -0
- package/out/street-morphology-prior.js.map +1 -0
- package/out/unit-repair.d.ts +46 -0
- package/out/unit-repair.d.ts.map +1 -0
- package/out/unit-repair.js +147 -0
- package/out/unit-repair.js.map +1 -0
- package/out/vitest.config.d.ts.map +1 -1
- package/out/vitest.config.js +3 -0
- package/out/vitest.config.js.map +1 -1
- package/out/weights.d.ts +27 -3
- package/out/weights.d.ts.map +1 -1
- package/out/weights.js +46 -2
- package/out/weights.js.map +1 -1
- package/package.json +6 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-morphology-prior.js","sourceRoot":"","sources":["../street-morphology-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,oBAAoB,EAAuC,MAAM,gBAAgB,CAAA;AAyB1F;;;;;GAKG;AACH,MAAM,UAAU,mCAAmC,CAClD,GAAmB,EACnB,MAAoD,EACpD,MAA6B,EAC7B,OAAkC,EAAE;IAEpC,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAA;IACvC,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,GAAG,CAAA;IAC7C,MAAM,sBAAsB,GAAG,IAAI,CAAC,sBAAsB,IAAI,GAAG,CAAA;IACjE,MAAM,wBAAwB,GAAG,IAAI,CAAC,wBAAwB,IAAI,GAAG,CAAA;IAErE,MAAM,MAAM,GAAe,EAAE,CAAA;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAErE,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAA;IAErE,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAA;IACvD,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IAC1C,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAA;IACtD,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAA;IAEtD,6FAA6F;IAC7F,kFAAkF;IAClF,IAAI,OAAO,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,IAAI,aAAa,KAAK,SAAS,EAAE,CAAC;QACzF,OAAO,MAAM,CAAA;IACd,CAAC;IAED,MAAM,UAAU,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAA;IAC/C,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAQ1C,MAAM,YAAY,GAAiB,EAAE,CAAA;IAErC,0FAA0F;IAC1F,oCAAoC;IACpC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,UAAU,CAAC,MAAM,EAAE,KAAK,EAAE,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAE,CAAA;QAChC,IAAI,KAAK,CAAC,QAAQ,KAAK,EAAE;YAAE,SAAQ;QAEnC,MAAM,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAA;QAC1C,IAAI,CAAC,OAAO;YAAE,SAAQ;QAEtB,IAAI,OAAO,GAAG,CAAC,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAC,CAAA;QACpB,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACtB,OAAO,GAAG,KAAK,CAAA;YACf,WAAW,GAAG,OAAO,CAAC,OAAO,CAAA;QAC9B,CAAC;QAED,IAAI,OAAO,GAAG,OAAO,CAAA;QACrB,KAAK,IAAI,GAAG,GAAG,KAAK,GAAG,CAAC,EAAE,GAAG,GAAG,UAAU,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;YAC1D,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;YAClC,IAAI,SAAS,CAAC,QAAQ,KAAK,EAAE;gBAAE,SAAQ;YAEvC,MAAM,IAAI,GAAG,GAAG,CAAC,QAAQ,CAAC,OAAO,EAAE,SAAS,CAAC,QAAQ,CAAC,CAAA;YACtD,IAAI,CAAC,IAAI;gBAAE,MAAK;YAChB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACnB,OAAO,GAAG,GAAG,CAAA;gBACb,WAAW,GAAG,IAAI,CAAC,OAAO,CAAA;YAC3B,CAAC;YACD,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;QAED,IAAI,OAAO,KAAK,CAAC,CAAC;YAAE,SAAQ;QAE5B,2FAA2F;QAC3F,uEAAuE;QACvE,MAAM,OAAO,GAAG,GAAG,CAAC,SAAS,CAAC,WAAW,CAAC,CAAA;QAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,cAAc,CAAC,CAAA;QACpE,IAAI,CAAC,QAAQ;YAAE,SAAQ;QAEvB,YAAY,CAAC,IAAI,CAAC,EAAE,aAAa,EAAE,KAAK,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAA;QAEjE,8CAA8C;QAC9C,MAAM,iBAAiB,GAAa,EAAE,CAAA;QACtC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,EAAE,GAAG,UAAU,CAAC,CAAC,CAAE,CAAA;YACzB,IAAI,EAAE,CAAC,QAAQ,KAAK,EAAE;gBAAE,SAAQ;YAChC,KAAK,MAAM,EAAE,IAAI,EAAE,CAAC,YAAY;gBAAE,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAC7D,CAAC;QAED,0FAA0F;QAC1F,yFAAyF;QACzF,yFAAyF;QACzF,MAAM,SAAS,GAAG,SAAS,GAAG,YAAY,CAAA;QAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnD,MAAM,EAAE,GAAG,iBAAiB,CAAC,CAAC,CAAE,CAAA;YAChC,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,aAAa,CAAC,CAAA;YAC5E,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,IAAI,aAAa,CAAC,CAAA;YAC5E,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,SAAS,CAAC,CAAA;YACrE,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,SAAS,CAAC,CAAA;QACtE,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAE5C,yFAAyF;IACzF,kFAAkF;IAClF,sBAAsB;IACtB,MAAM,mBAAmB,GAAG,SAAS,GAAG,sBAAsB,CAAA;IAC9D,KAAK,MAAM,KAAK,IAAI,YAAY,EAAE,CAAC;QAClC,MAAM,MAAM,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,CAAA;QACjE,MAAM,KAAK,GAAG,aAAa,CAAC,UAAU,EAAE,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,CAAA;QAE9D,KAAK,MAAM,SAAS,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,SAAS;gBAAE,SAAQ;YACxB,MAAM,OAAO,GAAG,SAAS,CAAC,YAAY,CAAA;YACtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,EAAE,GAAG,OAAO,CAAC,CAAC,CAAE,CAAA;gBACtB,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,OAAO,CAAC,CAAA;gBAC1D,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,mBAAmB,CAAC,CAAA;gBAE/E,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;oBAC3B,MAAM,SAAS,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,OAAO,CAAC,CAAA;oBAC1D,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,SAAS,CAAE,EAAE,CAAC,wBAAwB,CAAC,CAAA;gBACtF,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,MAAmB,EAAE,YAAoB,EAAE,SAAiB;IAClF,KAAK,IAAI,CAAC,GAAG,YAAY,GAAG,SAAS,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACpF,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACpB,IAAI,CAAC,CAAC,QAAQ,KAAK,EAAE;YAAE,OAAO,CAAC,CAAA;IAChC,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Secondary-unit regex repair pass — parser-improvement backlog (2026-05-30).
|
|
7
|
+
*
|
|
8
|
+
* The three-arena capability eval surfaced a persistent neural weakness: the
|
|
9
|
+
* model DROPS secondary units. "123 Main St Apt 456" → no unit label; the
|
|
10
|
+
* postal-standards secondary-unit edge class scored 0% neural. Units have a
|
|
11
|
+
* rigid surface shape (a designator keyword + an identifier), so — exactly
|
|
12
|
+
* like the postcode-repair pass (#35) — we can detect them deterministically
|
|
13
|
+
* and repair the BIO labels AFTER decode but BEFORE `buildAddressTree`. The
|
|
14
|
+
* model is untouched; this is a decoder-side correction, the same "lowest
|
|
15
|
+
* risk" lever family as postcode-repair.
|
|
16
|
+
*
|
|
17
|
+
* PRECISION GUARDS (mirror postcode-repair — never regress a confident parse):
|
|
18
|
+
* - We only fire on EXPLICIT designators (Apt, Ste, Suite, Unit, Rm, Floor,
|
|
19
|
+
* Bldg, Flat, … + bare "#<n>"). Ambiguous tokens are deliberately excluded:
|
|
20
|
+
* "Box" (that's po_box), bare "F"/"No" (too greedy), "Space"/"Stop" (common
|
|
21
|
+
* words).
|
|
22
|
+
* - ADD path (model emitted no unit over the matched run): allowed ONLY over
|
|
23
|
+
* `O` tokens — never over house_number / street* / postcode / po_box / a
|
|
24
|
+
* geographic container. So a confidently-labeled street or number is safe.
|
|
25
|
+
* - SNAP path: when the model already started a unit span inside the match,
|
|
26
|
+
* we expand/clip it to the full detected shape.
|
|
27
|
+
* - Local smear-clip: unit tokens immediately flanking a snapped run are
|
|
28
|
+
* cleared (mirrors postcode-repair) so "Apt 4 Springfield" can't leave a
|
|
29
|
+
* stray I-unit on "Springfield".
|
|
30
|
+
*
|
|
31
|
+
* Opt-in via `ParseOpts.unitRepair` (postcode-repair earned default-on only
|
|
32
|
+
* after a measured +135/0; unit-repair stays opt-in until the v0.7.2 arena
|
|
33
|
+
* re-run quantifies its delta).
|
|
34
|
+
*/
|
|
35
|
+
import type { DecoderToken } from "@mailwoman/core/decoder";
|
|
36
|
+
export interface RepairResult {
|
|
37
|
+
tokens: DecoderToken[];
|
|
38
|
+
/** Number of token labels changed — for telemetry / logging. */
|
|
39
|
+
changed: number;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Repair secondary-unit label spans in a decoded token sequence using designator
|
|
43
|
+
* regexes. Returns a NEW token array (inputs are not mutated) plus a change count.
|
|
44
|
+
*/
|
|
45
|
+
export declare function repairUnitLabels(text: string, input: readonly DecoderToken[]): RepairResult;
|
|
46
|
+
//# sourceMappingURL=unit-repair.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unit-repair.d.ts","sourceRoot":"","sources":["../unit-repair.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;AA+E3D,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,YAAY,EAAE,CAAA;IACtB,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,YAAY,EAAE,GAAG,YAAY,CA8C3F"}
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Secondary-unit regex repair pass — parser-improvement backlog (2026-05-30).
|
|
7
|
+
*
|
|
8
|
+
* The three-arena capability eval surfaced a persistent neural weakness: the
|
|
9
|
+
* model DROPS secondary units. "123 Main St Apt 456" → no unit label; the
|
|
10
|
+
* postal-standards secondary-unit edge class scored 0% neural. Units have a
|
|
11
|
+
* rigid surface shape (a designator keyword + an identifier), so — exactly
|
|
12
|
+
* like the postcode-repair pass (#35) — we can detect them deterministically
|
|
13
|
+
* and repair the BIO labels AFTER decode but BEFORE `buildAddressTree`. The
|
|
14
|
+
* model is untouched; this is a decoder-side correction, the same "lowest
|
|
15
|
+
* risk" lever family as postcode-repair.
|
|
16
|
+
*
|
|
17
|
+
* PRECISION GUARDS (mirror postcode-repair — never regress a confident parse):
|
|
18
|
+
* - We only fire on EXPLICIT designators (Apt, Ste, Suite, Unit, Rm, Floor,
|
|
19
|
+
* Bldg, Flat, … + bare "#<n>"). Ambiguous tokens are deliberately excluded:
|
|
20
|
+
* "Box" (that's po_box), bare "F"/"No" (too greedy), "Space"/"Stop" (common
|
|
21
|
+
* words).
|
|
22
|
+
* - ADD path (model emitted no unit over the matched run): allowed ONLY over
|
|
23
|
+
* `O` tokens — never over house_number / street* / postcode / po_box / a
|
|
24
|
+
* geographic container. So a confidently-labeled street or number is safe.
|
|
25
|
+
* - SNAP path: when the model already started a unit span inside the match,
|
|
26
|
+
* we expand/clip it to the full detected shape.
|
|
27
|
+
* - Local smear-clip: unit tokens immediately flanking a snapped run are
|
|
28
|
+
* cleared (mirrors postcode-repair) so "Apt 4 Springfield" can't leave a
|
|
29
|
+
* stray I-unit on "Springfield".
|
|
30
|
+
*
|
|
31
|
+
* Opt-in via `ParseOpts.unitRepair` (postcode-repair earned default-on only
|
|
32
|
+
* after a measured +135/0; unit-repair stays opt-in until the v0.7.2 arena
|
|
33
|
+
* re-run quantifies its delta).
|
|
34
|
+
*/
|
|
35
|
+
/**
|
|
36
|
+
* Secondary-unit shape patterns, ordered most-specific → least. Case-insensitive
|
|
37
|
+
* (unit designators appear in every casing in real data). The identifier is a
|
|
38
|
+
* 1-5 digit number with an optional trailing letter ("4B"), a single letter
|
|
39
|
+
* ("STE D"), or a letter+digits — kept tight so we don't swallow following words.
|
|
40
|
+
*/
|
|
41
|
+
const UNIT_DESIGNATORS = "APARTMENT|APT|SUITE|STE|UNIT|ROOM|RM|FLOOR|FLR|FL|BUILDING|BLDG|DEPARTMENT|DEPT|LOT|TRAILER|TRLR|SLIP|HANGAR|PIER|FLAT|PH|PENTHOUSE";
|
|
42
|
+
const UNIT_PATTERNS = [
|
|
43
|
+
// Designator + optional "#"/"No." + identifier, e.g. "Apt 4B", "Ste 12", "STE D",
|
|
44
|
+
// "Unit 9400", "Suite 100", "Rm 5", "Flat 2", "Apartment #3", "Bldg C".
|
|
45
|
+
// The `\b` after the designator is load-bearing: it stops "Unit" matching inside
|
|
46
|
+
// "United", "Fl" inside "Florida", etc. The trailing `\b` on the identifier stops
|
|
47
|
+
// "Apt Main" capturing the "M" of "Main" (single-letter ident only fires on a
|
|
48
|
+
// standalone token like "STE D").
|
|
49
|
+
{
|
|
50
|
+
label: "designator",
|
|
51
|
+
re: new RegExp(`\\b(?:${UNIT_DESIGNATORS})\\b\\.?\\s*#?\\s*(?:No\\.?\\s*)?(?:\\d{1,5}[A-Za-z]?|[A-Za-z]\\d{0,4})\\b`, "gi"),
|
|
52
|
+
},
|
|
53
|
+
// Bare hash + identifier, e.g. "#104", "# 4B". Common US secondary-unit form.
|
|
54
|
+
{ label: "hash", re: /#\s*\d{1,5}[A-Za-z]?\b/g },
|
|
55
|
+
];
|
|
56
|
+
const UNIT_B = "B-unit";
|
|
57
|
+
const UNIT_I = "I-unit";
|
|
58
|
+
const OUTSIDE = "O";
|
|
59
|
+
/**
|
|
60
|
+
* Tags a unit span is allowed to overwrite on the ADD path. The v0.7.2 arena
|
|
61
|
+
* showed the dominant failure for bare designator-led units ("Flat 2 14 Smith
|
|
62
|
+
* St", "APT 2 …") is the model labeling the WHOLE designator+identifier run as
|
|
63
|
+
* `locality` — not leaving it `O`. An explicit designator + identifier is a
|
|
64
|
+
* high-confidence "this is a unit" shape (a real locality/suburb name never has
|
|
65
|
+
* that form), so — exactly like postcode-repair's ADD_OVER_TAGS — we let it
|
|
66
|
+
* reclaim a `locality`/`dependent_locality` span. Structural tags
|
|
67
|
+
* (house_number, street*, postcode, po_box, region, country, venue) stay off the
|
|
68
|
+
* list so a confident parse is never clobbered. (`O` is always eligible.)
|
|
69
|
+
*/
|
|
70
|
+
const ADD_OVER_TAGS = new Set(["locality", "dependent_locality"]);
|
|
71
|
+
function isUnitLabel(label) {
|
|
72
|
+
return label === "B-unit" || label === "I-unit";
|
|
73
|
+
}
|
|
74
|
+
/** Extract the bare tag from a BIO label ("B-locality" → "locality", "O" → null). */
|
|
75
|
+
function tagOf(label) {
|
|
76
|
+
return label === "O" ? null : label.slice(2);
|
|
77
|
+
}
|
|
78
|
+
/** Collect non-overlapping unit matches, preferring more-specific (earlier) patterns + longest. */
|
|
79
|
+
function collectMatches(text) {
|
|
80
|
+
const candidates = [];
|
|
81
|
+
UNIT_PATTERNS.forEach((pat, priority) => {
|
|
82
|
+
pat.re.lastIndex = 0;
|
|
83
|
+
for (let m = pat.re.exec(text); m; m = pat.re.exec(text)) {
|
|
84
|
+
candidates.push({ start: m.index, end: m.index + m[0].length, priority });
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
// Longest-match-wins, then most-specific; reject anything overlapping an accepted match.
|
|
88
|
+
candidates.sort((a, b) => b.end - b.start - (a.end - a.start) || a.priority - b.priority);
|
|
89
|
+
const accepted = [];
|
|
90
|
+
for (const c of candidates) {
|
|
91
|
+
if (accepted.some((a) => c.start < a.end && a.start < c.end))
|
|
92
|
+
continue;
|
|
93
|
+
accepted.push(c);
|
|
94
|
+
}
|
|
95
|
+
return accepted;
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Repair secondary-unit label spans in a decoded token sequence using designator
|
|
99
|
+
* regexes. Returns a NEW token array (inputs are not mutated) plus a change count.
|
|
100
|
+
*/
|
|
101
|
+
export function repairUnitLabels(text, input) {
|
|
102
|
+
const matches = collectMatches(text);
|
|
103
|
+
const tokens = input.map((t) => ({ ...t }));
|
|
104
|
+
if (matches.length === 0)
|
|
105
|
+
return { tokens, changed: 0 };
|
|
106
|
+
let changed = 0;
|
|
107
|
+
const setLabel = (i, label) => {
|
|
108
|
+
if (tokens[i].label !== label) {
|
|
109
|
+
tokens[i].label = label;
|
|
110
|
+
changed++;
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
for (const m of matches) {
|
|
114
|
+
// Tokens whose char span intersects the match.
|
|
115
|
+
const overlap = [];
|
|
116
|
+
for (let i = 0; i < tokens.length; i++) {
|
|
117
|
+
const t = tokens[i];
|
|
118
|
+
if (t.start < m.end && m.start < t.end)
|
|
119
|
+
overlap.push(i);
|
|
120
|
+
}
|
|
121
|
+
if (overlap.length === 0)
|
|
122
|
+
continue;
|
|
123
|
+
const hasUnit = overlap.some((i) => isUnitLabel(tokens[i].label));
|
|
124
|
+
if (!hasUnit) {
|
|
125
|
+
// ADD path — explicit designators are high-confidence, but only ever over O or a
|
|
126
|
+
// geographic-container tag (locality/dependent_locality — the tags the model
|
|
127
|
+
// mislabels bare units as). Never clobber a confident house_number/street/postcode/
|
|
128
|
+
// po_box/region/country/venue.
|
|
129
|
+
const safe = overlap.every((i) => {
|
|
130
|
+
const tag = tagOf(tokens[i].label);
|
|
131
|
+
return tag === null || ADD_OVER_TAGS.has(tag);
|
|
132
|
+
});
|
|
133
|
+
if (!safe)
|
|
134
|
+
continue;
|
|
135
|
+
}
|
|
136
|
+
// SNAP/ADD: relabel the matched run as a single unit span.
|
|
137
|
+
overlap.forEach((i, k) => setLabel(i, k === 0 ? UNIT_B : UNIT_I));
|
|
138
|
+
// Local smear clip: clear unit tokens immediately flanking the snapped run.
|
|
139
|
+
for (let j = overlap[0] - 1; j >= 0 && isUnitLabel(tokens[j].label); j--)
|
|
140
|
+
setLabel(j, OUTSIDE);
|
|
141
|
+
for (let j = overlap[overlap.length - 1] + 1; j < tokens.length && isUnitLabel(tokens[j].label); j++) {
|
|
142
|
+
setLabel(j, OUTSIDE);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
return { tokens, changed };
|
|
146
|
+
}
|
|
147
|
+
//# sourceMappingURL=unit-repair.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unit-repair.js","sourceRoot":"","sources":["../unit-repair.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAYH;;;;;GAKG;AACH,MAAM,gBAAgB,GACrB,qIAAqI,CAAA;AAEtI,MAAM,aAAa,GAAyC;IAC3D,kFAAkF;IAClF,wEAAwE;IACxE,iFAAiF;IACjF,kFAAkF;IAClF,8EAA8E;IAC9E,kCAAkC;IAClC;QACC,KAAK,EAAE,YAAY;QACnB,EAAE,EAAE,IAAI,MAAM,CAAC,SAAS,gBAAgB,4EAA4E,EAAE,IAAI,CAAC;KAC3H;IACD,8EAA8E;IAC9E,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE,yBAAyB,EAAE;CAChD,CAAA;AAED,MAAM,MAAM,GAAG,QAAiC,CAAA;AAChD,MAAM,MAAM,GAAG,QAAiC,CAAA;AAChD,MAAM,OAAO,GAAG,GAA4B,CAAA;AAE5C;;;;;;;;;;GAUG;AACH,MAAM,aAAa,GAAG,IAAI,GAAG,CAAS,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAC,CAAA;AAEzE,SAAS,WAAW,CAAC,KAAa;IACjC,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,QAAQ,CAAA;AAChD,CAAC;AAED,qFAAqF;AACrF,SAAS,KAAK,CAAC,KAAa;IAC3B,OAAO,KAAK,KAAK,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAC7C,CAAC;AAED,mGAAmG;AACnG,SAAS,cAAc,CAAC,IAAY;IACnC,MAAM,UAAU,GAAgB,EAAE,CAAA;IAClC,aAAa,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE;QACvC,GAAG,CAAC,EAAE,CAAC,SAAS,GAAG,CAAC,CAAA;QACpB,KAAK,IAAI,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;YAC1D,UAAU,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;QAC1E,CAAC;IACF,CAAC,CAAC,CAAA;IACF,yFAAyF;IACzF,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAA;IACzF,MAAM,QAAQ,GAAgB,EAAE,CAAA;IAChC,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC5B,IAAI,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC;YAAE,SAAQ;QACtE,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAQD;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY,EAAE,KAA8B;IAC5E,MAAM,OAAO,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;IACpC,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,EAAE,CAAA;IAEvD,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,MAAM,QAAQ,GAAG,CAAC,CAAS,EAAE,KAA4B,EAAQ,EAAE;QAClE,IAAI,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,KAAK,KAAK,EAAE,CAAC;YAChC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,GAAG,KAAK,CAAA;YACxB,OAAO,EAAE,CAAA;QACV,CAAC;IACF,CAAC,CAAA;IAED,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACzB,+CAA+C;QAC/C,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG;gBAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACxD,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAElC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,CAAC,CAAA;QAClE,IAAI,CAAC,OAAO,EAAE,CAAC;YACd,iFAAiF;YACjF,6EAA6E;YAC7E,oFAAoF;YACpF,+BAA+B;YAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE;gBAChC,MAAM,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,CAAA;gBACnC,OAAO,GAAG,KAAK,IAAI,IAAI,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;YAC9C,CAAC,CAAC,CAAA;YACF,IAAI,CAAC,IAAI;gBAAE,SAAQ;QACpB,CAAC;QAED,2DAA2D;QAC3D,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAEjE,4EAA4E;QAC5E,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,CAAC,CAAE,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE;YAAE,QAAQ,CAAC,CAAC,EAAE,OAAO,CAAC,CAAA;QAChG,KAAK,IAAI,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAE,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACxG,QAAQ,CAAC,CAAC,EAAE,OAAO,CAAC,CAAA;QACrB,CAAC;IACF,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vitest.config.d.ts","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAUH,
|
|
1
|
+
{"version":3,"file":"vitest.config.d.ts","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAUH,wBAoBE"}
|
package/out/vitest.config.js
CHANGED
|
@@ -22,6 +22,9 @@ export default defineConfig({
|
|
|
22
22
|
},
|
|
23
23
|
{ find: /^@mailwoman\/core\/(.+)$/, replacement: resolve(here, "../core/$1/index.ts") },
|
|
24
24
|
{ find: /^@mailwoman\/core$/, replacement: resolve(here, "../core/index.ts") },
|
|
25
|
+
// @mailwoman/codex resolves to source too (per-address-system postal reference data).
|
|
26
|
+
{ find: /^@mailwoman\/codex\/(.+)$/, replacement: resolve(here, "../codex/$1/index.ts") },
|
|
27
|
+
{ find: /^@mailwoman\/codex$/, replacement: resolve(here, "../codex/index.ts") },
|
|
25
28
|
],
|
|
26
29
|
},
|
|
27
30
|
test: {
|
package/out/vitest.config.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vitest.config.js","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,uCAAuC;AAEvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,MAAM,CAAA;AAEnC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,eAAe,YAAY,CAAC;IAC3B,OAAO,EAAE;QACR,KAAK,EAAE;YACN,+EAA+E;YAC/E,0EAA0E;YAC1E;gBACC,IAAI,EAAE,0CAA0C;gBAChD,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qCAAqC,CAAC;aACjE;YACD,EAAE,IAAI,EAAE,0BAA0B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qBAAqB,CAAC,EAAE;YACvF,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE;
|
|
1
|
+
{"version":3,"file":"vitest.config.js","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,uCAAuC;AAEvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,MAAM,CAAA;AAEnC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,eAAe,YAAY,CAAC;IAC3B,OAAO,EAAE;QACR,KAAK,EAAE;YACN,+EAA+E;YAC/E,0EAA0E;YAC1E;gBACC,IAAI,EAAE,0CAA0C;gBAChD,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qCAAqC,CAAC;aACjE;YACD,EAAE,IAAI,EAAE,0BAA0B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qBAAqB,CAAC,EAAE;YACvF,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE;YAC9E,sFAAsF;YACtF,EAAE,IAAI,EAAE,2BAA2B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,sBAAsB,CAAC,EAAE;YACzF,EAAE,IAAI,EAAE,qBAAqB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,mBAAmB,CAAC,EAAE;SAChF;KACD;IACD,IAAI,EAAE;QACL,OAAO,EAAE,KAAK;QACd,OAAO,EAAE,CAAC,oBAAoB,EAAE,WAAW,EAAE,YAAY,CAAC;KAC1D;CACD,CAAC,CAAA"}
|
package/out/weights.d.ts
CHANGED
|
@@ -28,16 +28,30 @@ export interface ResolveWeightsOpts {
|
|
|
28
28
|
modelPath?: string;
|
|
29
29
|
/** Explicit tokenizer.model path; takes precedence over package auto-resolve. */
|
|
30
30
|
tokenizerPath?: string;
|
|
31
|
+
/**
|
|
32
|
+
* Explicit `model-card.json` path (for the label vocab) on the explicit model+tokenizer path.
|
|
33
|
+
* When omitted, falls back to a `model-card.json` co-located with `modelPath`. Without a card,
|
|
34
|
+
* labels default to `STAGE2_BIO_LABELS` — which silently mis-decodes a STAGE3 (33-label) model
|
|
35
|
+
* into empty/garbage parses. Pass this (or co-locate the card) when evaluating a custom STAGE3
|
|
36
|
+
* checkpoint via explicit paths.
|
|
37
|
+
*/
|
|
38
|
+
modelCardPath?: string;
|
|
31
39
|
}
|
|
32
40
|
export interface ResolvedWeights {
|
|
33
41
|
modelPath: string;
|
|
34
42
|
tokenizerPath: string;
|
|
35
43
|
/**
|
|
36
|
-
* Path to `model-card.json`
|
|
37
|
-
*
|
|
38
|
-
*
|
|
44
|
+
* Path to `model-card.json` for the resolved model. On the package path, the card co-located in
|
|
45
|
+
* the package dir. On the explicit path, `opts.modelCardPath` or a card co-located with
|
|
46
|
+
* `modelPath`. `undefined` only when no card is found. Read by `loadFromWeights` to thread the
|
|
47
|
+
* trained label vocabulary into the classifier — see {@link readLabelsFromModelCard}.
|
|
39
48
|
*/
|
|
40
49
|
modelCardPath?: string;
|
|
50
|
+
/**
|
|
51
|
+
* Path to `crf-transitions.json` alongside the resolved model. `undefined` when the file doesn't
|
|
52
|
+
* exist (pre-v0.6.0 bundles or CE-only training).
|
|
53
|
+
*/
|
|
54
|
+
crfTransitionsPath?: string;
|
|
41
55
|
/** "explicit" if both paths came from opts; "package:<name>" if resolved via require.resolve. */
|
|
42
56
|
source: string;
|
|
43
57
|
}
|
|
@@ -54,4 +68,14 @@ export declare function resolveWeights(opts: ResolveWeightsOpts): ResolvedWeight
|
|
|
54
68
|
* and should be loud, not silently re-defaulted.
|
|
55
69
|
*/
|
|
56
70
|
export declare function readLabelsFromModelCard(modelCardPath: string | undefined): readonly string[] | undefined;
|
|
71
|
+
export interface CrfTransitions {
|
|
72
|
+
transitions: number[][];
|
|
73
|
+
startTransitions: number[];
|
|
74
|
+
endTransitions: number[];
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Read learned CRF transition parameters from `crf-transitions.json`. Returns `undefined` when the
|
|
78
|
+
* file is missing or malformed — callers fall back to the structural BIO mask only.
|
|
79
|
+
*/
|
|
80
|
+
export declare function readCrfTransitions(crfPath: string | undefined): CrfTransitions | undefined;
|
|
57
81
|
//# sourceMappingURL=weights.d.ts.map
|
package/out/weights.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB
|
|
1
|
+
{"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB;;;;;;OAMG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB;;;;;OAKG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB;;;OAGG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CAkDxE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,MAAM,EAAE,GAAG,SAAS,CAwBxG;AAED,MAAM,WAAW,cAAc;IAC9B,WAAW,EAAE,MAAM,EAAE,EAAE,CAAA;IACvB,gBAAgB,EAAE,MAAM,EAAE,CAAA;IAC1B,cAAc,EAAE,MAAM,EAAE,CAAA;CACxB;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,SAAS,GAAG,cAAc,GAAG,SAAS,CA0B1F"}
|
package/out/weights.js
CHANGED
|
@@ -32,7 +32,12 @@ export function resolveWeights(opts) {
|
|
|
32
32
|
throw new Error(`Explicit modelPath does not exist: ${opts.modelPath}`);
|
|
33
33
|
if (!existsSync(opts.tokenizerPath))
|
|
34
34
|
throw new Error(`Explicit tokenizerPath does not exist: ${opts.tokenizerPath}`);
|
|
35
|
-
|
|
35
|
+
// Resolve a model-card for the label vocab: explicit opt first, else one co-located with the
|
|
36
|
+
// model. Omitting it makes the classifier fall back to STAGE2_BIO_LABELS, which mis-decodes a
|
|
37
|
+
// STAGE3 (33-label) checkpoint into empty parses — the trap that broke eval-matrix --model-path.
|
|
38
|
+
const coLocatedCard = resolve(dirname(opts.modelPath), "model-card.json");
|
|
39
|
+
const modelCardPath = opts.modelCardPath ?? (existsSync(coLocatedCard) ? coLocatedCard : undefined);
|
|
40
|
+
return { modelPath: opts.modelPath, tokenizerPath: opts.tokenizerPath, modelCardPath, source: "explicit" };
|
|
36
41
|
}
|
|
37
42
|
// Package names follow the all-lowercase BCP-47 convention (`neural-weights-en-us`,
|
|
38
43
|
// `neural-weights-fr-fr`). The CLI's locale validation accepts canonical `en-US` / `fr-FR`
|
|
@@ -59,7 +64,9 @@ export function resolveWeights(opts) {
|
|
|
59
64
|
}
|
|
60
65
|
const modelCardCandidate = resolve(packageDir, "model-card.json");
|
|
61
66
|
const modelCardPath = existsSync(modelCardCandidate) ? modelCardCandidate : undefined;
|
|
62
|
-
|
|
67
|
+
const crfCandidate = resolve(packageDir, "crf-transitions.json");
|
|
68
|
+
const crfTransitionsPath = existsSync(crfCandidate) ? crfCandidate : undefined;
|
|
69
|
+
return { modelPath, tokenizerPath, modelCardPath, crfTransitionsPath, source: `package:${packageName}` };
|
|
63
70
|
}
|
|
64
71
|
/**
|
|
65
72
|
* Read the `labels` array from a `model-card.json` file. Returns `undefined` when the file is
|
|
@@ -100,4 +107,41 @@ export function readLabelsFromModelCard(modelCardPath) {
|
|
|
100
107
|
}
|
|
101
108
|
return Object.freeze(labels.slice());
|
|
102
109
|
}
|
|
110
|
+
/**
|
|
111
|
+
* Read learned CRF transition parameters from `crf-transitions.json`. Returns `undefined` when the
|
|
112
|
+
* file is missing or malformed — callers fall back to the structural BIO mask only.
|
|
113
|
+
*/
|
|
114
|
+
export function readCrfTransitions(crfPath) {
|
|
115
|
+
if (!crfPath || !existsSync(crfPath))
|
|
116
|
+
return undefined;
|
|
117
|
+
let raw;
|
|
118
|
+
try {
|
|
119
|
+
raw = readFileSync(crfPath, "utf8");
|
|
120
|
+
}
|
|
121
|
+
catch {
|
|
122
|
+
return undefined;
|
|
123
|
+
}
|
|
124
|
+
let parsed;
|
|
125
|
+
try {
|
|
126
|
+
parsed = JSON.parse(raw);
|
|
127
|
+
}
|
|
128
|
+
catch {
|
|
129
|
+
return undefined;
|
|
130
|
+
}
|
|
131
|
+
if (typeof parsed !== "object" || parsed === null)
|
|
132
|
+
return undefined;
|
|
133
|
+
const obj = parsed;
|
|
134
|
+
const transitions = obj.transitions;
|
|
135
|
+
const start = obj.start_transitions;
|
|
136
|
+
const end = obj.end_transitions;
|
|
137
|
+
if (!Array.isArray(transitions) || !Array.isArray(start) || !Array.isArray(end))
|
|
138
|
+
return undefined;
|
|
139
|
+
if (transitions.length === 0 || start.length === 0 || end.length === 0)
|
|
140
|
+
return undefined;
|
|
141
|
+
return {
|
|
142
|
+
transitions: transitions,
|
|
143
|
+
startTransitions: start,
|
|
144
|
+
endTransitions: end,
|
|
145
|
+
};
|
|
146
|
+
}
|
|
103
147
|
//# sourceMappingURL=weights.js.map
|
package/out/weights.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;
|
|
1
|
+
{"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAsC1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,6FAA6F;QAC7F,8FAA8F;QAC9F,iGAAiG;QACjG,MAAM,aAAa,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,iBAAiB,CAAC,CAAA;QACzE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;QACnG,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC3G,CAAC;IAED,oFAAoF;IACpF,2FAA2F;IAC3F,4DAA4D;IAC5D,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,WAAW,EAAE,CAAA;IACrD,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,kBAAkB,GAAG,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IACjE,MAAM,aAAa,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAA;IAErF,MAAM,YAAY,GAAG,OAAO,CAAC,UAAU,EAAE,sBAAsB,CAAC,CAAA;IAChE,MAAM,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CAAA;IAE9E,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACzG,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAiC;IACxE,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,SAAS,CAAA;IAClE,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,aAAa,EAAE,MAAM,CAAC,CAAA;IAC1C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,MAAM,GAAI,MAA+B,CAAC,MAAM,CAAA;IACtD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QAClG,MAAM,IAAI,KAAK,CACd,sBAAsB,aAAa,sCAAsC;YACxE,8CAA8C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CACxE,CAAA;IACF,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,CAAsB,CAAA;AAC1D,CAAC;AAQD;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAA2B;IAC7D,IAAI,CAAC,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,OAAO,SAAS,CAAA;IACtD,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;IACpC,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,GAAG,GAAG,MAAiC,CAAA;IAC7C,MAAM,WAAW,GAAG,GAAG,CAAC,WAAW,CAAA;IACnC,MAAM,KAAK,GAAG,GAAG,CAAC,iBAAiB,CAAA;IACnC,MAAM,GAAG,GAAG,GAAG,CAAC,eAAe,CAAA;IAC/B,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,SAAS,CAAA;IACjG,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAA;IACxF,OAAO;QACN,WAAW,EAAE,WAAyB;QACtC,gBAAgB,EAAE,KAAiB;QACnC,cAAc,EAAE,GAAe;KAC/B,CAAA;AACF,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0",
|
|
4
4
|
"description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -12,12 +12,16 @@
|
|
|
12
12
|
"exports": {
|
|
13
13
|
"./package.json": "./package.json",
|
|
14
14
|
".": "./out/index.js",
|
|
15
|
+
"./postcode-anchor": "./out/postcode-anchor.js",
|
|
16
|
+
"./postcode-binary-resolver": "./out/postcode-binary-resolver.js",
|
|
15
17
|
"./tokenizer": "./out/tokenizer.js",
|
|
18
|
+
"./onnx-runner": "./out/onnx-runner.js",
|
|
16
19
|
"./weights": "./out/weights.js",
|
|
17
20
|
"./browser": "./out/browser.js"
|
|
18
21
|
},
|
|
19
22
|
"dependencies": {
|
|
20
|
-
"@mailwoman/
|
|
23
|
+
"@mailwoman/codex": "4.0.0",
|
|
24
|
+
"@mailwoman/core": "4.0.0",
|
|
21
25
|
"@sctg/sentencepiece-js": "^1.3.3",
|
|
22
26
|
"onnxruntime-node": "^1.26.0"
|
|
23
27
|
},
|