@mailwoman/neural-weights-en-us 4.2.0 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/model-card.json CHANGED
@@ -1,17 +1,17 @@
1
1
  {
2
2
  "name": "neural-weights-en-us",
3
- "version": "4.2.0",
4
- "model_lineage": "v1.0.2-consolidation-runB / step 20000consolidation of the parity campaign (unit + affix + country gazetteer-anchor + multi-locale balance) init_from consolidation v1.0.0 step-040000 (fresh optimizerNOT resume; recorded honestly, see docs/articles/evals/2026-06-10-consolidation-session.md) @ affix 17x on corpus v0.4.12-consolidation — shipped as the unified 4.2.0 release version; tokenizer 0.6.0-a0",
5
- "phase": "Stage 3 — v1.0 consolidation: parity flag-plant (spine + country anchor + affix existence)",
3
+ "version": "4.4.0",
4
+ "model_lineage": "v1.3.0-boundary-consolidation / step 40000from scratch on corpus v0.4.15-boundary: the v4.3.0 recipe + three probe-passed levers (glue augmentation #513, real-TIGER intersection shard #487, codex po_box/cedex coverage shard), old synth-po-box retired. Ships with TWO declared inference behaviors: addressSystemConventions:'auto' (v4.3.0) + bridgePunctuationGaps:true (the span bridge the corpus label format cannot express punctuation inside spans; the bridge merges same-tag fragments across intra-token punctuation). Gate: docs/articles/evals/2026-06-11-v4.4.0-ship-gate.md; tokenizer 0.6.0-a0",
5
+ "phase": "Stage 3 — v1.3 boundary: the last starved tags alive (po_box/cedex/intersections) + the arena debt closed",
6
6
  "license": "AGPL-3.0-only",
7
7
  "locale": "en-us",
8
8
  "training": {
9
- "corpus_version": "0.4.12-consolidation",
9
+ "corpus_version": "0.4.15-boundary (v0.4.12 + intersection + po_box/cedex shards; loader relabel + glue augmentation)",
10
10
  "tokenizer_version": "0.6.0-a0",
11
- "steps": 20000,
12
- "best_step": 20000,
11
+ "steps": 40000,
12
+ "best_step": 40000,
13
13
  "hardware": "NVIDIA A100-SXM4-40GB (Modal cloud)",
14
- "recipe": "Run B of the consolidation campaign: init_from the clean v1.0.0 consolidation step-040000 (every proven lever: unit shard, affix-ml shard, country balanced shard + gazetteer soft anchor + channel choreography, both-order German), synth-affix 17x, 20k steps, CE-only, lr=1.5e-4, seed 42. Selected over v1.0.0/A/C at the fork: strongest stable variant (US postcode 97.3, country 89.8, FR hn 94.6). STATED RE-BASELINES vs canonical bars: affix 64.9/48.8 (vs 78/67), US street 76.2 (vs 80.4), unit 90.6 (vs 92) — measured 29M stability ceiling, see issue #492. GATE NUMBERS ARE REPAIRS-OFF (#486). Eval procedure REQUIRES --gazetteer-lexicon + --suppress-gaz-near-postcode (zero-filled clues degrade country recall and fake an affix crash)."
14
+ "recipe": "v1.3.0-boundary-consolidation: from-scratch 40k, augment_glue_prob 0.25, synth-intersection 1.0, synth-po-box-cedex 1.5 (old synth-po-box retired, stated), affix relabel pass, gazetteer anchor + choreography, CE-only, lr=1.5e-4, seed 42."
15
15
  },
16
16
  "architecture": {
17
17
  "hidden_size": 384,
@@ -79,14 +79,14 @@
79
79
  "intersection_a",
80
80
  "intersection_b"
81
81
  ],
82
- "notes": "v4.1.0 — secondary-unit coverage. Same Stage-3 33-BIO-label schema as 4.0.0 (no schema change). Adds a format-diverse synth-unit shard (USPS Pub-28 C2 designators: APT/STE/FL/… across unit-after, unit-first, bare, and venue-prefixed layouts) on top of the v0.9.3 multi-locale base. `unit` recognition 0%→92.3% on a held-out real-designator eval; by 'negative space' it also raised US `street` +3.3pp and lifted `country` (US +6pp, FR +15pp)covering the missing tag sharpened its neighbors. No regression vs 4.0.0 on any US/FR golden tag; DE native-order locality held (90.6%).",
82
+ "notes": "v4.4.0 — the boundary consolidation: po_box 0→89.1, cedex 0→96.1, intersections 0→100 (real-OOD, P≥95) the parity scorecard's last empty rows filled; the v4.3.0 perturb-arena debt closed (64→72, gated). The ship config REQUIRES bridgePunctuationGaps:true + addressSystemConventions:'auto' — without the bridge, dotted surfaces (P.O. Box, C.P.) decode as period-truncated fragments (the corpus alignment tokenizer cannot label punctuation; measured: po_box 60.4 without, 89.1 with). Bridge merges intra-token punctuation gaps ONLY (commas excluded they separate genuine spans). Known follow-ups: char-offset corpus labels (the structural cure), conventions loss-mask rides the next run, FR region tail 25.6 (floored, recovering).",
83
83
  "format": {
84
84
  "model": "ONNX int8 dynamic (quantized from fp32)",
85
85
  "tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=48000",
86
86
  "max_sequence_length": 128,
87
87
  "opset": 17,
88
- "fp32_size_mb": 112.9,
89
- "int8_size_mb": 28.6
88
+ "fp32_size_mb": 118.4,
89
+ "int8_size_mb": 29.8
90
90
  },
91
91
  "files": {
92
92
  "model": "model.onnx",
@@ -103,51 +103,49 @@
103
103
  },
104
104
  "base_relpath": "/data/output-v097-unit-v3-s42/checkpoints/step-020000",
105
105
  "eval": {
106
- "ship_gate_2026_06_10": {
106
+ "ship_gate_2026_06_11_night": {
107
+ "promotion_gate": "PASS 17/17 (gates/v4.4.0-boundary.json, int8-graded, max delta 0.3pp; three-battery record in the gate doc)",
107
108
  "honest_eval_vt": {
108
109
  "n": 1428,
109
- "region_match_pct": 99.9,
110
+ "region_match_pct": 99.6,
110
111
  "coord_p50_km": 3.4,
111
- "coord_p90_km": 7.4,
112
- "pip_coverage_adj_pct": 47.1,
113
- "baseline_v410_region_pct": 100.0,
112
+ "coord_p90_km": 7.5,
114
113
  "verdict": "PASS"
115
114
  },
116
- "demo_presets": "PASS — 5/6 identical to v4.1.0; 6th is the intended affix split",
117
- "int8_vs_fp32": "PASS — all gate tags within 0.1pp; quant deterministic",
118
- "de_native_order_int8_pct": 90.9
115
+ "arena_perturb_pct": 72.0
119
116
  },
120
- "per_component_int8_gazfed": {
117
+ "per_component_int8_shipconfig": {
121
118
  "us": {
122
- "postcode": 97.3,
119
+ "postcode": 98.3,
123
120
  "country_homograph": 89.8,
124
- "micro": 84.8,
125
- "locality": 72.9,
126
- "region": 89.1,
127
- "street": 76.2,
128
- "street_prefix": 64.9,
129
- "street_suffix": 48.8,
130
- "unit": 90.6,
131
- "house_number": 96.9
121
+ "micro": 86.1,
122
+ "locality": 75.7,
123
+ "region": 90.3,
124
+ "street": 77.9,
125
+ "street_prefix": 93.6,
126
+ "street_suffix": 96.6,
127
+ "unit": 92.1,
128
+ "po_box_real": 89.1,
129
+ "intersection_real": 100.0
132
130
  },
133
131
  "fr": {
134
132
  "postcode": 99.6,
135
- "house_number": 94.6,
136
- "region": 27.6
133
+ "house_number": 97.2,
134
+ "region": 25.6,
135
+ "cedex_real": 96.1
137
136
  },
138
137
  "de": {
139
- "native_locality_anchor_on": 90.9
138
+ "native_locality_anchor_on": 91.0
140
139
  }
141
140
  },
142
- "known_regressions_vs_4_1_0": {
143
- "us_street": -2.3,
144
- "unit": -1.7,
145
- "us_postcode": -1.0,
146
- "mitigations": "arbitration layer #478; architecture escalation #492"
141
+ "known_regressions_vs_4_3_0": {
142
+ "fr_postcode": -0.1,
143
+ "fr_house_number": -0.5,
144
+ "note": "both within single-row noise on their evals; every floor met"
147
145
  }
148
146
  },
149
147
  "files_md5": {
150
- "model.onnx": "9eb4a99f6db06cccff57939f657c09f9",
148
+ "model.onnx": "f086951a807b35e1ef700c0c2662a088",
151
149
  "tokenizer.model": "b6137e8c52914c9715374268ecaa4bc6"
152
150
  }
153
- }
151
+ }
package/model.onnx CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/neural-weights-en-us",
3
- "version": "4.2.0",
3
+ "version": "4.4.0",
4
4
  "description": "Mailwoman neural-classifier weights for locale 'en-us'. Data-only package — loaded by @mailwoman/neural at runtime.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {