@mailwoman/neural-weights-fr-fr 3.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/model-card.json +77 -55
- package/model.onnx +0 -0
- package/package.json +18 -19
- package/tokenizer.model +0 -0
package/README.md
CHANGED
|
@@ -4,26 +4,26 @@ Stage 2 (coarse + venue/street/house_number) Mailwoman neural-classifier weights
|
|
|
4
4
|
|
|
5
5
|
- locale: **fr-fr**
|
|
6
6
|
- corpus: **0.3.0**
|
|
7
|
-
- training steps: **
|
|
7
|
+
- training steps: **2200**
|
|
8
8
|
- hardware: **AMD Radeon 780M (gfx1103) bf16 ~14.6 GiB GTT**
|
|
9
9
|
|
|
10
10
|
## Per-component F1 targets
|
|
11
11
|
|
|
12
12
|
**⚠ Below per-component F1 targets:**
|
|
13
13
|
|
|
14
|
-
- `country` F1 = **0.
|
|
15
|
-
- `region` F1 = **0.
|
|
16
|
-
- `locality` F1 = **0.
|
|
17
|
-
- `postcode` F1 = **0.
|
|
18
|
-
- `venue` F1 = **0.
|
|
19
|
-
- `street` F1 = **0.
|
|
20
|
-
- `house_number` F1 = **0.
|
|
14
|
+
- `country` F1 = **0.2112** (target ≥0.95)
|
|
15
|
+
- `region` F1 = **0.1883** (target ≥0.95)
|
|
16
|
+
- `locality` F1 = **0.2736** (target ≥0.95)
|
|
17
|
+
- `postcode` F1 = **0.6916** (target ≥0.95)
|
|
18
|
+
- `venue` F1 = **0.3886** (target ≥0.60)
|
|
19
|
+
- `street` F1 = **0.3016** (target ≥0.70)
|
|
20
|
+
- `house_number` F1 = **0.7866** (target ≥0.80)
|
|
21
21
|
|
|
22
22
|
## Eval (golden set)
|
|
23
23
|
|
|
24
24
|
- entries: **4535**
|
|
25
|
-
- full-parse exact match: **0.
|
|
26
|
-
- mean token confidence: **0.
|
|
25
|
+
- full-parse exact match: **0.0818**
|
|
26
|
+
- mean token confidence: **0.8063**
|
|
27
27
|
|
|
28
28
|
## Components supported
|
|
29
29
|
|
package/model-card.json
CHANGED
|
@@ -1,17 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "neural-weights-fr-fr",
|
|
3
|
-
"version": "
|
|
4
|
-
"
|
|
3
|
+
"version": "4.1.0",
|
|
4
|
+
"model_lineage": "shares the en-us v0.9.7-unit-v3 multi-locale model (step 20000) — shipped as the unified 4.1.0 release version; tokenizer 0.6.0-a0. fr-fr and en-us bundle the SAME byte-identical model; this package carries the FR-specific calibration table.",
|
|
5
|
+
"phase": "Stage 3 — multi-locale (FR via the shared model)",
|
|
5
6
|
"license": "AGPL-3.0-only",
|
|
6
7
|
"locale": "fr-fr",
|
|
7
8
|
"training": {
|
|
8
|
-
"corpus_version": "0.
|
|
9
|
-
"tokenizer_version": "0.
|
|
10
|
-
"steps":
|
|
11
|
-
"hardware": "
|
|
12
|
-
"
|
|
13
|
-
"started_at": null,
|
|
14
|
-
"completed_at": "2026-05-22T10:22:18.739046Z"
|
|
9
|
+
"corpus_version": "0.4.5-unit-v2",
|
|
10
|
+
"tokenizer_version": "0.6.0-a0",
|
|
11
|
+
"steps": 20000,
|
|
12
|
+
"hardware": "NVIDIA A100-SXM4-40GB (Modal cloud)",
|
|
13
|
+
"note": "Identical artifact to neural-weights-en-us 4.1.0 (one multi-locale model serves both). See that card's training/recipe for full provenance."
|
|
15
14
|
},
|
|
16
15
|
"components_supported": [
|
|
17
16
|
"country",
|
|
@@ -25,39 +24,62 @@
|
|
|
25
24
|
"street",
|
|
26
25
|
"house_number"
|
|
27
26
|
],
|
|
27
|
+
"labels": [
|
|
28
|
+
"O",
|
|
29
|
+
"B-country",
|
|
30
|
+
"I-country",
|
|
31
|
+
"B-region",
|
|
32
|
+
"I-region",
|
|
33
|
+
"B-locality",
|
|
34
|
+
"I-locality",
|
|
35
|
+
"B-dependent_locality",
|
|
36
|
+
"I-dependent_locality",
|
|
37
|
+
"B-postcode",
|
|
38
|
+
"I-postcode",
|
|
39
|
+
"B-subregion",
|
|
40
|
+
"I-subregion",
|
|
41
|
+
"B-cedex",
|
|
42
|
+
"I-cedex",
|
|
43
|
+
"B-venue",
|
|
44
|
+
"I-venue",
|
|
45
|
+
"B-street",
|
|
46
|
+
"I-street",
|
|
47
|
+
"B-house_number",
|
|
48
|
+
"I-house_number"
|
|
49
|
+
],
|
|
28
50
|
"eval": {
|
|
29
51
|
"n_entries": 4535,
|
|
30
|
-
"full_parse_exact_match": 0.
|
|
31
|
-
"mean_token_confidence": 0.
|
|
52
|
+
"full_parse_exact_match": 0.08180815876515987,
|
|
53
|
+
"mean_token_confidence": 0.8062812768727202,
|
|
32
54
|
"per_component": {
|
|
33
55
|
"country": {
|
|
34
|
-
"precision": 0.
|
|
35
|
-
"recall": 0.
|
|
36
|
-
"f1": 0.
|
|
56
|
+
"precision": 0.21428571428481394,
|
|
57
|
+
"recall": 0.20816326530527282,
|
|
58
|
+
"f1": 0.21118012372283307,
|
|
37
59
|
"support": 245
|
|
38
60
|
},
|
|
39
61
|
"region": {
|
|
40
|
-
"precision": 0.
|
|
41
|
-
"recall": 0.
|
|
42
|
-
"f1": 0.
|
|
62
|
+
"precision": 0.342951360263526,
|
|
63
|
+
"recall": 0.129797191887635,
|
|
64
|
+
"f1": 0.18832050661831204,
|
|
43
65
|
"support": 3205
|
|
44
66
|
},
|
|
45
67
|
"locality": {
|
|
46
|
-
"precision": 0.
|
|
47
|
-
"recall": 0.
|
|
48
|
-
"f1": 0.
|
|
68
|
+
"precision": 0.24782398452605223,
|
|
69
|
+
"recall": 0.30533214179317686,
|
|
70
|
+
"f1": 0.2735886822759171,
|
|
49
71
|
"support": 3357
|
|
50
72
|
},
|
|
51
73
|
"dependent_locality": {
|
|
52
|
-
"precision": 0.
|
|
53
|
-
"recall": 0.
|
|
54
|
-
"f1": 0.
|
|
74
|
+
"precision": 0.005044136191670815,
|
|
75
|
+
"recall": 0.0999999999975,
|
|
76
|
+
"f1": 0.009603841445164863,
|
|
55
77
|
"support": 40
|
|
56
78
|
},
|
|
57
79
|
"postcode": {
|
|
58
|
-
"precision": 0.
|
|
59
|
-
"recall": 0.
|
|
60
|
-
"f1": 0.
|
|
80
|
+
"precision": 0.8323890462696731,
|
|
81
|
+
"recall": 0.591610738254835,
|
|
82
|
+
"f1": 0.6916437813892687,
|
|
61
83
|
"support": 2980
|
|
62
84
|
},
|
|
63
85
|
"subregion": {
|
|
@@ -73,21 +95,21 @@
|
|
|
73
95
|
"support": 1
|
|
74
96
|
},
|
|
75
97
|
"venue": {
|
|
76
|
-
"precision": 0.
|
|
77
|
-
"recall": 0.
|
|
78
|
-
"f1": 0.
|
|
98
|
+
"precision": 0.37649063032335905,
|
|
99
|
+
"recall": 0.4014532243411431,
|
|
100
|
+
"f1": 0.38857142807160183,
|
|
79
101
|
"support": 1101
|
|
80
102
|
},
|
|
81
103
|
"street": {
|
|
82
|
-
"precision": 0.
|
|
83
|
-
"recall": 0.
|
|
84
|
-
"f1": 0.
|
|
104
|
+
"precision": 0.35594795539016916,
|
|
105
|
+
"recall": 0.26161202185783416,
|
|
106
|
+
"f1": 0.3015748026611547,
|
|
85
107
|
"support": 2928
|
|
86
108
|
},
|
|
87
109
|
"house_number": {
|
|
88
|
-
"precision": 0.
|
|
89
|
-
"recall": 0.
|
|
90
|
-
"f1": 0.
|
|
110
|
+
"precision": 0.7446153846150028,
|
|
111
|
+
"recall": 0.8335246842704744,
|
|
112
|
+
"f1": 0.7865655466300883,
|
|
91
113
|
"support": 1742
|
|
92
114
|
}
|
|
93
115
|
},
|
|
@@ -101,56 +123,56 @@
|
|
|
101
123
|
{
|
|
102
124
|
"low": 0.1,
|
|
103
125
|
"high": 0.2,
|
|
104
|
-
"n":
|
|
105
|
-
"acc": 0.
|
|
126
|
+
"n": 21,
|
|
127
|
+
"acc": 0.23809523809523808
|
|
106
128
|
},
|
|
107
129
|
{
|
|
108
130
|
"low": 0.2,
|
|
109
131
|
"high": 0.3,
|
|
110
|
-
"n":
|
|
111
|
-
"acc": 0.
|
|
132
|
+
"n": 666,
|
|
133
|
+
"acc": 0.2912912912912913
|
|
112
134
|
},
|
|
113
135
|
{
|
|
114
136
|
"low": 0.3,
|
|
115
137
|
"high": 0.4,
|
|
116
|
-
"n":
|
|
117
|
-
"acc": 0.
|
|
138
|
+
"n": 2416,
|
|
139
|
+
"acc": 0.3265728476821192
|
|
118
140
|
},
|
|
119
141
|
{
|
|
120
142
|
"low": 0.4,
|
|
121
143
|
"high": 0.5,
|
|
122
|
-
"n":
|
|
123
|
-
"acc": 0.
|
|
144
|
+
"n": 4308,
|
|
145
|
+
"acc": 0.3152274837511606
|
|
124
146
|
},
|
|
125
147
|
{
|
|
126
148
|
"low": 0.5,
|
|
127
149
|
"high": 0.6,
|
|
128
|
-
"n":
|
|
129
|
-
"acc": 0.
|
|
150
|
+
"n": 4777,
|
|
151
|
+
"acc": 0.3382876282185472
|
|
130
152
|
},
|
|
131
153
|
{
|
|
132
154
|
"low": 0.6,
|
|
133
155
|
"high": 0.7,
|
|
134
|
-
"n":
|
|
135
|
-
"acc": 0.
|
|
156
|
+
"n": 4943,
|
|
157
|
+
"acc": 0.3568682986040866
|
|
136
158
|
},
|
|
137
159
|
{
|
|
138
160
|
"low": 0.7,
|
|
139
161
|
"high": 0.8,
|
|
140
|
-
"n":
|
|
141
|
-
"acc": 0.
|
|
162
|
+
"n": 5534,
|
|
163
|
+
"acc": 0.382544271774485
|
|
142
164
|
},
|
|
143
165
|
{
|
|
144
166
|
"low": 0.8,
|
|
145
167
|
"high": 0.9,
|
|
146
|
-
"n":
|
|
147
|
-
"acc": 0.
|
|
168
|
+
"n": 8066,
|
|
169
|
+
"acc": 0.43627572526655095
|
|
148
170
|
},
|
|
149
171
|
{
|
|
150
172
|
"low": 0.9,
|
|
151
173
|
"high": 1.0,
|
|
152
|
-
"n":
|
|
153
|
-
"acc": 0.
|
|
174
|
+
"n": 30517,
|
|
175
|
+
"acc": 0.6440344725890488
|
|
154
176
|
}
|
|
155
177
|
]
|
|
156
178
|
},
|
|
@@ -159,7 +181,7 @@
|
|
|
159
181
|
"particle-honorific kryptonite (e.g. FR 'Saint-Just-Saint-Rambert') if not in synth set",
|
|
160
182
|
"non-Latin scripts (CJK, Cyrillic) fall through to byte-fallback tokens; F1 unknown"
|
|
161
183
|
],
|
|
162
|
-
"notes": "
|
|
184
|
+
"notes": "v0.4.0 \u2014 issue #116. Same encoder geometry as v0.3.0 (8.87M params, 6L/256H/4-heads, 21 BIO labels, linear-chain CRF). Issue proposed (1) per-token CRF NLL normalization + (3) class-weighted CE biased toward coarse labels + (4) source-weight rebalance. Empirical iteration found that \u00a71 and \u00a73 destabilize sustained training at every LR tested (5e-4, 3e-4, 1.5e-4 \u2014 even the v0.3.0-safe LR), and on the golden v0.1.2 eval (4535 entries) they slightly REGRESS country/postcode F1 vs v0.3.0. SHIPPED recipe is the \u00a74-only ablation (v0.3.0 dual-loss + v0.4.0 source-weight rebalance) at lr=1.5e-4, step 2200. Modest fine-label gains (street +0.03, house_number +0.01), modest coarse-F1 regression (country -0.07, postcode -0.07). \u00a71/\u00a73 deferred to v0.4.1 corpus-side investigation per issue's '2K divergence' clause. Full ablation matrix retrospective in LOG.md.",
|
|
163
185
|
"format": {
|
|
164
186
|
"model": "ONNX int8 dynamic",
|
|
165
187
|
"tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=16000",
|
|
@@ -171,5 +193,5 @@
|
|
|
171
193
|
"tokenizer": "tokenizer.model",
|
|
172
194
|
"model_card": "model-card.json"
|
|
173
195
|
},
|
|
174
|
-
"base_relpath": "/data/models/checkpoints/
|
|
196
|
+
"base_relpath": "/data/models/checkpoints/v0_4_0-stableLR-source-only/step-002200"
|
|
175
197
|
}
|
package/model.onnx
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,20 +1,19 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
}
|
|
2
|
+
"name": "@mailwoman/neural-weights-fr-fr",
|
|
3
|
+
"version": "4.1.0",
|
|
4
|
+
"description": "Mailwoman neural-classifier weights for locale 'fr-fr'. Data-only package — loaded by @mailwoman/neural at runtime.",
|
|
5
|
+
"license": "AGPL-3.0-only",
|
|
6
|
+
"repository": {
|
|
7
|
+
"type": "git",
|
|
8
|
+
"url": "https://github.com/sister-software/mailwoman"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"model.onnx",
|
|
12
|
+
"tokenizer.model",
|
|
13
|
+
"model-card.json",
|
|
14
|
+
"README.md"
|
|
15
|
+
],
|
|
16
|
+
"publishConfig": {
|
|
17
|
+
"access": "public"
|
|
18
|
+
}
|
|
19
|
+
}
|
package/tokenizer.model
CHANGED
|
Binary file
|