@mailwoman/neural-weights-en-us 4.3.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/model-card.json +30 -41
- package/model.onnx +0 -0
- package/package.json +1 -1
package/model-card.json
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "neural-weights-en-us",
|
|
3
|
-
"version": "4.
|
|
4
|
-
"model_lineage": "v1.
|
|
5
|
-
"phase": "Stage 3 — v1.
|
|
3
|
+
"version": "4.4.0",
|
|
4
|
+
"model_lineage": "v1.3.0-boundary-consolidation / step 40000 — from scratch on corpus v0.4.15-boundary: the v4.3.0 recipe + three probe-passed levers (glue augmentation #513, real-TIGER intersection shard #487, codex po_box/cedex coverage shard), old synth-po-box retired. Ships with TWO declared inference behaviors: addressSystemConventions:'auto' (v4.3.0) + bridgePunctuationGaps:true (the span bridge — the corpus label format cannot express punctuation inside spans; the bridge merges same-tag fragments across intra-token punctuation). Gate: docs/articles/evals/2026-06-11-v4.4.0-ship-gate.md; tokenizer 0.6.0-a0",
|
|
5
|
+
"phase": "Stage 3 — v1.3 boundary: the last starved tags alive (po_box/cedex/intersections) + the arena debt closed",
|
|
6
6
|
"license": "AGPL-3.0-only",
|
|
7
7
|
"locale": "en-us",
|
|
8
8
|
"training": {
|
|
9
|
-
"corpus_version": "0.4.
|
|
9
|
+
"corpus_version": "0.4.15-boundary (v0.4.12 + intersection + po_box/cedex shards; loader relabel + glue augmentation)",
|
|
10
10
|
"tokenizer_version": "0.6.0-a0",
|
|
11
11
|
"steps": 40000,
|
|
12
12
|
"best_step": 40000,
|
|
13
13
|
"hardware": "NVIDIA A100-SXM4-40GB (Modal cloud)",
|
|
14
|
-
"recipe": "v1.
|
|
14
|
+
"recipe": "v1.3.0-boundary-consolidation: from-scratch 40k, augment_glue_prob 0.25, synth-intersection 1.0, synth-po-box-cedex 1.5 (old synth-po-box retired, stated), affix relabel pass, gazetteer anchor + choreography, CE-only, lr=1.5e-4, seed 42."
|
|
15
15
|
},
|
|
16
16
|
"architecture": {
|
|
17
17
|
"hidden_size": 384,
|
|
@@ -79,7 +79,7 @@
|
|
|
79
79
|
"intersection_a",
|
|
80
80
|
"intersection_b"
|
|
81
81
|
],
|
|
82
|
-
"notes": "v4.
|
|
82
|
+
"notes": "v4.4.0 — the boundary consolidation: po_box 0→89.1, cedex 0→96.1, intersections 0→100 (real-OOD, P≥95) — the parity scorecard's last empty rows filled; the v4.3.0 perturb-arena debt closed (64→72, gated). The ship config REQUIRES bridgePunctuationGaps:true + addressSystemConventions:'auto' — without the bridge, dotted surfaces (P.O. Box, C.P.) decode as period-truncated fragments (the corpus alignment tokenizer cannot label punctuation; measured: po_box 60.4 without, 89.1 with). Bridge merges intra-token punctuation gaps ONLY (commas excluded — they separate genuine spans). Known follow-ups: char-offset corpus labels (the structural cure), conventions loss-mask rides the next run, FR region tail 25.6 (floored, recovering).",
|
|
83
83
|
"format": {
|
|
84
84
|
"model": "ONNX int8 dynamic (quantized from fp32)",
|
|
85
85
|
"tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=48000",
|
|
@@ -103,60 +103,49 @@
|
|
|
103
103
|
},
|
|
104
104
|
"base_relpath": "/data/output-v097-unit-v3-s42/checkpoints/step-020000",
|
|
105
105
|
"eval": {
|
|
106
|
-
"
|
|
107
|
-
"promotion_gate": "PASS
|
|
106
|
+
"ship_gate_2026_06_11_night": {
|
|
107
|
+
"promotion_gate": "PASS 17/17 (gates/v4.4.0-boundary.json, int8-graded, max delta 0.3pp; three-battery record in the gate doc)",
|
|
108
108
|
"honest_eval_vt": {
|
|
109
109
|
"n": 1428,
|
|
110
110
|
"region_match_pct": 99.6,
|
|
111
111
|
"coord_p50_km": 3.4,
|
|
112
|
-
"coord_p90_km": 7.
|
|
113
|
-
"pip_coverage_adj_pct": 46.9,
|
|
114
|
-
"baseline_v420_region_pct": 99.9,
|
|
112
|
+
"coord_p90_km": 7.5,
|
|
115
113
|
"verdict": "PASS"
|
|
116
114
|
},
|
|
117
|
-
"
|
|
118
|
-
"int8_vs_fp32": "PASS — all gate tags within 0.2pp",
|
|
119
|
-
"affix_stability_20k_to_40k": "v2 prefix 88.3→91.6, suffix 89.4→90.3 (rose, no decay)"
|
|
115
|
+
"arena_perturb_pct": 72.0
|
|
120
116
|
},
|
|
121
|
-
"
|
|
117
|
+
"per_component_int8_shipconfig": {
|
|
122
118
|
"us": {
|
|
123
|
-
"postcode":
|
|
124
|
-
"country_homograph":
|
|
125
|
-
"micro":
|
|
126
|
-
"locality":
|
|
127
|
-
"region":
|
|
128
|
-
"street":
|
|
119
|
+
"postcode": 98.3,
|
|
120
|
+
"country_homograph": 89.8,
|
|
121
|
+
"micro": 86.1,
|
|
122
|
+
"locality": 75.7,
|
|
123
|
+
"region": 90.3,
|
|
124
|
+
"street": 77.9,
|
|
129
125
|
"street_prefix": 93.6,
|
|
130
126
|
"street_suffix": 96.6,
|
|
131
|
-
"unit": 92.1
|
|
127
|
+
"unit": 92.1,
|
|
128
|
+
"po_box_real": 89.1,
|
|
129
|
+
"intersection_real": 100.0
|
|
132
130
|
},
|
|
133
131
|
"fr": {
|
|
134
|
-
"postcode": 99.
|
|
135
|
-
"house_number": 97.
|
|
136
|
-
"region":
|
|
132
|
+
"postcode": 99.6,
|
|
133
|
+
"house_number": 97.2,
|
|
134
|
+
"region": 25.6,
|
|
135
|
+
"cedex_real": 96.1
|
|
137
136
|
},
|
|
138
137
|
"de": {
|
|
139
|
-
"native_locality_anchor_on":
|
|
140
|
-
},
|
|
141
|
-
"affix_nad_native_v2": {
|
|
142
|
-
"street_prefix_f1": 92.2,
|
|
143
|
-
"street_suffix_f1": 90.3,
|
|
144
|
-
"precision": 100.0,
|
|
145
|
-
"baseline_v420": {
|
|
146
|
-
"street_prefix_f1": 18.2,
|
|
147
|
-
"street_suffix_f1": 8.9
|
|
148
|
-
}
|
|
138
|
+
"native_locality_anchor_on": 91.0
|
|
149
139
|
}
|
|
150
140
|
},
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
"
|
|
155
|
-
"mitigations": "FR region tail rides the next corpus pass; arbitration layer #478 continues"
|
|
141
|
+
"known_regressions_vs_4_3_0": {
|
|
142
|
+
"fr_postcode": -0.1,
|
|
143
|
+
"fr_house_number": -0.5,
|
|
144
|
+
"note": "both within single-row noise on their evals; every floor met"
|
|
156
145
|
}
|
|
157
146
|
},
|
|
158
147
|
"files_md5": {
|
|
159
|
-
"model.onnx": "
|
|
148
|
+
"model.onnx": "f086951a807b35e1ef700c0c2662a088",
|
|
160
149
|
"tokenizer.model": "b6137e8c52914c9715374268ecaa4bc6"
|
|
161
150
|
}
|
|
162
151
|
}
|
package/model.onnx
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural-weights-en-us",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Mailwoman neural-classifier weights for locale 'en-us'. Data-only package — loaded by @mailwoman/neural at runtime.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|