@mailwoman/neural-weights-en-us 4.1.0 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/model-card.json +57 -9
- package/model.onnx +0 -0
- package/package.json +1 -1
package/model-card.json
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "neural-weights-en-us",
|
|
3
|
-
"version": "4.
|
|
4
|
-
"model_lineage": "
|
|
5
|
-
"phase": "Stage 3
|
|
3
|
+
"version": "4.2.0",
|
|
4
|
+
"model_lineage": "v1.0.2-consolidation-runB / step 20000 — consolidation of the parity campaign (unit + affix + country gazetteer-anchor + multi-locale balance) init_from consolidation v1.0.0 step-040000 (fresh optimizer — NOT resume; recorded honestly, see docs/articles/evals/2026-06-10-consolidation-session.md) @ affix 17x on corpus v0.4.12-consolidation — shipped as the unified 4.2.0 release version; tokenizer 0.6.0-a0",
|
|
5
|
+
"phase": "Stage 3 — v1.0 consolidation: parity flag-plant (spine + country anchor + affix existence)",
|
|
6
6
|
"license": "AGPL-3.0-only",
|
|
7
7
|
"locale": "en-us",
|
|
8
8
|
"training": {
|
|
9
|
-
"corpus_version": "0.4.
|
|
9
|
+
"corpus_version": "0.4.12-consolidation",
|
|
10
10
|
"tokenizer_version": "0.6.0-a0",
|
|
11
11
|
"steps": 20000,
|
|
12
12
|
"best_step": 20000,
|
|
13
13
|
"hardware": "NVIDIA A100-SXM4-40GB (Modal cloud)",
|
|
14
|
-
"recipe": "
|
|
14
|
+
"recipe": "Run B of the consolidation campaign: init_from the clean v1.0.0 consolidation step-040000 (every proven lever: unit shard, affix-ml shard, country balanced shard + gazetteer soft anchor + channel choreography, both-order German), synth-affix 17x, 20k steps, CE-only, lr=1.5e-4, seed 42. Selected over v1.0.0/A/C at the fork: strongest stable variant (US postcode 97.3, country 89.8, FR hn 94.6). STATED RE-BASELINES vs canonical bars: affix 64.9/48.8 (vs 78/67), US street 76.2 (vs 80.4), unit 90.6 (vs 92) — measured 29M stability ceiling, see issue #492. GATE NUMBERS ARE REPAIRS-OFF (#486). Eval procedure REQUIRES --gazetteer-lexicon + --suppress-gaz-near-postcode (zero-filled clues degrade country recall and fake an affix crash)."
|
|
15
15
|
},
|
|
16
16
|
"architecture": {
|
|
17
17
|
"hidden_size": 384,
|
|
@@ -79,14 +79,14 @@
|
|
|
79
79
|
"intersection_a",
|
|
80
80
|
"intersection_b"
|
|
81
81
|
],
|
|
82
|
-
"notes": "v4.1.0
|
|
82
|
+
"notes": "v4.1.0 — secondary-unit coverage. Same Stage-3 33-BIO-label schema as 4.0.0 (no schema change). Adds a format-diverse synth-unit shard (USPS Pub-28 C2 designators: APT/STE/FL/… across unit-after, unit-first, bare, and venue-prefixed layouts) on top of the v0.9.3 multi-locale base. `unit` recognition 0%→92.3% on a held-out real-designator eval; by 'negative space' it also raised US `street` +3.3pp and lifted `country` (US +6pp, FR +15pp) — covering the missing tag sharpened its neighbors. No regression vs 4.0.0 on any US/FR golden tag; DE native-order locality held (90.6%).",
|
|
83
83
|
"format": {
|
|
84
84
|
"model": "ONNX int8 dynamic (quantized from fp32)",
|
|
85
85
|
"tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=48000",
|
|
86
86
|
"max_sequence_length": 128,
|
|
87
87
|
"opset": 17,
|
|
88
88
|
"fp32_size_mb": 112.9,
|
|
89
|
-
"int8_size_mb": 28.
|
|
89
|
+
"int8_size_mb": 28.6
|
|
90
90
|
},
|
|
91
91
|
"files": {
|
|
92
92
|
"model": "model.onnx",
|
|
@@ -101,5 +101,53 @@
|
|
|
101
101
|
"held_out_ece_calibrated": 0.0035,
|
|
102
102
|
"note": "calibration.json is the global table; calibration-per-locale.json carries per-locale tables (the global table under-serves DE/NL). Apply via @mailwoman/core/decoder's createCalibrator; default parse output is byte-stable when omitted."
|
|
103
103
|
},
|
|
104
|
-
"base_relpath": "/data/output-v097-unit-v3-s42/checkpoints/step-020000"
|
|
105
|
-
|
|
104
|
+
"base_relpath": "/data/output-v097-unit-v3-s42/checkpoints/step-020000",
|
|
105
|
+
"eval": {
|
|
106
|
+
"ship_gate_2026_06_10": {
|
|
107
|
+
"honest_eval_vt": {
|
|
108
|
+
"n": 1428,
|
|
109
|
+
"region_match_pct": 99.9,
|
|
110
|
+
"coord_p50_km": 3.4,
|
|
111
|
+
"coord_p90_km": 7.4,
|
|
112
|
+
"pip_coverage_adj_pct": 47.1,
|
|
113
|
+
"baseline_v410_region_pct": 100.0,
|
|
114
|
+
"verdict": "PASS"
|
|
115
|
+
},
|
|
116
|
+
"demo_presets": "PASS — 5/6 identical to v4.1.0; 6th is the intended affix split",
|
|
117
|
+
"int8_vs_fp32": "PASS — all gate tags within 0.1pp; quant deterministic",
|
|
118
|
+
"de_native_order_int8_pct": 90.9
|
|
119
|
+
},
|
|
120
|
+
"per_component_int8_gazfed": {
|
|
121
|
+
"us": {
|
|
122
|
+
"postcode": 97.3,
|
|
123
|
+
"country_homograph": 89.8,
|
|
124
|
+
"micro": 84.8,
|
|
125
|
+
"locality": 72.9,
|
|
126
|
+
"region": 89.1,
|
|
127
|
+
"street": 76.2,
|
|
128
|
+
"street_prefix": 64.9,
|
|
129
|
+
"street_suffix": 48.8,
|
|
130
|
+
"unit": 90.6,
|
|
131
|
+
"house_number": 96.9
|
|
132
|
+
},
|
|
133
|
+
"fr": {
|
|
134
|
+
"postcode": 99.6,
|
|
135
|
+
"house_number": 94.6,
|
|
136
|
+
"region": 27.6
|
|
137
|
+
},
|
|
138
|
+
"de": {
|
|
139
|
+
"native_locality_anchor_on": 90.9
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"known_regressions_vs_4_1_0": {
|
|
143
|
+
"us_street": -2.3,
|
|
144
|
+
"unit": -1.7,
|
|
145
|
+
"us_postcode": -1.0,
|
|
146
|
+
"mitigations": "arbitration layer #478; architecture escalation #492"
|
|
147
|
+
}
|
|
148
|
+
},
|
|
149
|
+
"files_md5": {
|
|
150
|
+
"model.onnx": "9eb4a99f6db06cccff57939f657c09f9",
|
|
151
|
+
"tokenizer.model": "b6137e8c52914c9715374268ecaa4bc6"
|
|
152
|
+
}
|
|
153
|
+
}
|
package/model.onnx
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural-weights-en-us",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.2.0",
|
|
4
4
|
"description": "Mailwoman neural-classifier weights for locale 'en-us'. Data-only package — loaded by @mailwoman/neural at runtime.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|