@mailwoman/neural-weights-fr-fr 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -13
- package/model-card.json +76 -47
- package/package.json +18 -17
package/README.md
CHANGED
|
@@ -1,30 +1,33 @@
|
|
|
1
1
|
# @mailwoman/neural-weights-fr-fr
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Stage 2 (coarse + venue/street/house_number) Mailwoman neural-classifier weights.
|
|
4
4
|
|
|
5
5
|
- locale: **fr-fr**
|
|
6
|
-
- corpus: **0.
|
|
7
|
-
- training steps: **
|
|
6
|
+
- corpus: **0.3.0**
|
|
7
|
+
- training steps: **1800**
|
|
8
8
|
- hardware: **AMD Radeon 780M (gfx1103) bf16 ~14.6 GiB GTT**
|
|
9
9
|
|
|
10
|
-
##
|
|
10
|
+
## Per-component F1 targets
|
|
11
11
|
|
|
12
|
-
**⚠ Below
|
|
12
|
+
**⚠ Below per-component F1 targets:**
|
|
13
13
|
|
|
14
|
-
- `country` F1 = **0.
|
|
15
|
-
- `region` F1 = **0.
|
|
16
|
-
- `locality` F1 = **0.
|
|
17
|
-
- `postcode` F1 = **0.
|
|
14
|
+
- `country` F1 = **0.2796** (target ≥0.95)
|
|
15
|
+
- `region` F1 = **0.1759** (target ≥0.95)
|
|
16
|
+
- `locality` F1 = **0.2657** (target ≥0.95)
|
|
17
|
+
- `postcode` F1 = **0.7554** (target ≥0.95)
|
|
18
|
+
- `venue` F1 = **0.3941** (target ≥0.60)
|
|
19
|
+
- `street` F1 = **0.2660** (target ≥0.70)
|
|
20
|
+
- `house_number` F1 = **0.7835** (target ≥0.80)
|
|
18
21
|
|
|
19
22
|
## Eval (golden set)
|
|
20
23
|
|
|
21
|
-
- entries: **
|
|
22
|
-
- full-parse exact match: **0.
|
|
23
|
-
- mean token confidence: **0.
|
|
24
|
+
- entries: **4535**
|
|
25
|
+
- full-parse exact match: **0.1074**
|
|
26
|
+
- mean token confidence: **0.8566**
|
|
24
27
|
|
|
25
28
|
## Components supported
|
|
26
29
|
|
|
27
|
-
Stage
|
|
30
|
+
Stage 2 ships coarse (country / region / locality / dependent_locality / postcode / subregion / cedex) plus fine-grained venue / street / house_number. Token classifier emits 21 BIO labels.
|
|
28
31
|
|
|
29
32
|
## Files
|
|
30
33
|
|
package/model-card.json
CHANGED
|
@@ -1,53 +1,64 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "neural-weights-fr-fr",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"phase": "Stage
|
|
3
|
+
"version": "3.0.0",
|
|
4
|
+
"phase": "Stage 2 (coarse + venue/street/house_number)",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"locale": "fr-fr",
|
|
7
7
|
"training": {
|
|
8
|
-
"corpus_version": "0.
|
|
8
|
+
"corpus_version": "0.3.0",
|
|
9
9
|
"tokenizer_version": "0.1.0",
|
|
10
|
-
"steps":
|
|
10
|
+
"steps": 1800,
|
|
11
11
|
"hardware": "AMD Radeon 780M (gfx1103) bf16 ~14.6 GiB GTT",
|
|
12
|
-
"duration_seconds":
|
|
12
|
+
"duration_seconds": 1067.0,
|
|
13
13
|
"started_at": null,
|
|
14
|
-
"completed_at": "2026-05-
|
|
14
|
+
"completed_at": "2026-05-22T10:22:18.739046Z"
|
|
15
15
|
},
|
|
16
|
-
"components_supported": [
|
|
16
|
+
"components_supported": [
|
|
17
|
+
"country",
|
|
18
|
+
"region",
|
|
19
|
+
"locality",
|
|
20
|
+
"dependent_locality",
|
|
21
|
+
"postcode",
|
|
22
|
+
"subregion",
|
|
23
|
+
"cedex",
|
|
24
|
+
"venue",
|
|
25
|
+
"street",
|
|
26
|
+
"house_number"
|
|
27
|
+
],
|
|
17
28
|
"eval": {
|
|
18
|
-
"n_entries":
|
|
19
|
-
"full_parse_exact_match": 0.
|
|
20
|
-
"mean_token_confidence": 0.
|
|
29
|
+
"n_entries": 4535,
|
|
30
|
+
"full_parse_exact_match": 0.1073869900771775,
|
|
31
|
+
"mean_token_confidence": 0.856554333787935,
|
|
21
32
|
"per_component": {
|
|
22
33
|
"country": {
|
|
23
|
-
"precision": 0.
|
|
24
|
-
"recall": 0.
|
|
25
|
-
"f1": 0.
|
|
26
|
-
"support":
|
|
34
|
+
"precision": 0.2954545454532025,
|
|
35
|
+
"recall": 0.2653061224478967,
|
|
36
|
+
"f1": 0.2795698919733611,
|
|
37
|
+
"support": 245
|
|
27
38
|
},
|
|
28
39
|
"region": {
|
|
29
|
-
"precision": 0.
|
|
30
|
-
"recall": 0.
|
|
31
|
-
"f1": 0.
|
|
32
|
-
"support":
|
|
40
|
+
"precision": 0.4411027568916778,
|
|
41
|
+
"recall": 0.10982839313569116,
|
|
42
|
+
"f1": 0.17586809860649805,
|
|
43
|
+
"support": 3205
|
|
33
44
|
},
|
|
34
45
|
"locality": {
|
|
35
|
-
"precision": 0.
|
|
36
|
-
"recall": 0.
|
|
37
|
-
"f1": 0.
|
|
38
|
-
"support":
|
|
46
|
+
"precision": 0.23303370786511615,
|
|
47
|
+
"recall": 0.3089067619897799,
|
|
48
|
+
"f1": 0.26565902346259507,
|
|
49
|
+
"support": 3357
|
|
39
50
|
},
|
|
40
51
|
"dependent_locality": {
|
|
41
52
|
"precision": 0.0,
|
|
42
53
|
"recall": 0.0,
|
|
43
54
|
"f1": 0.0,
|
|
44
|
-
"support":
|
|
55
|
+
"support": 40
|
|
45
56
|
},
|
|
46
57
|
"postcode": {
|
|
47
|
-
"precision": 0.
|
|
48
|
-
"recall": 0.
|
|
49
|
-
"f1": 0.
|
|
50
|
-
"support":
|
|
58
|
+
"precision": 0.8426270136303831,
|
|
59
|
+
"recall": 0.684563758389032,
|
|
60
|
+
"f1": 0.7554156632710455,
|
|
61
|
+
"support": 2980
|
|
51
62
|
},
|
|
52
63
|
"subregion": {
|
|
53
64
|
"precision": 0.0,
|
|
@@ -60,6 +71,24 @@
|
|
|
60
71
|
"recall": 0.0,
|
|
61
72
|
"f1": 0.0,
|
|
62
73
|
"support": 1
|
|
74
|
+
},
|
|
75
|
+
"venue": {
|
|
76
|
+
"precision": 0.38622493461169466,
|
|
77
|
+
"recall": 0.4023614895545846,
|
|
78
|
+
"f1": 0.39412811337886233,
|
|
79
|
+
"support": 1101
|
|
80
|
+
},
|
|
81
|
+
"street": {
|
|
82
|
+
"precision": 0.3326499231161801,
|
|
83
|
+
"recall": 0.22165300546440517,
|
|
84
|
+
"f1": 0.2660381220860398,
|
|
85
|
+
"support": 2928
|
|
86
|
+
},
|
|
87
|
+
"house_number": {
|
|
88
|
+
"precision": 0.7315737051789185,
|
|
89
|
+
"recall": 0.8432835820890682,
|
|
90
|
+
"f1": 0.7834666661687646,
|
|
91
|
+
"support": 1742
|
|
63
92
|
}
|
|
64
93
|
},
|
|
65
94
|
"calibration": [
|
|
@@ -72,56 +101,56 @@
|
|
|
72
101
|
{
|
|
73
102
|
"low": 0.1,
|
|
74
103
|
"high": 0.2,
|
|
75
|
-
"n":
|
|
76
|
-
"acc": 0.
|
|
104
|
+
"n": 19,
|
|
105
|
+
"acc": 0.5789473684210527
|
|
77
106
|
},
|
|
78
107
|
{
|
|
79
108
|
"low": 0.2,
|
|
80
109
|
"high": 0.3,
|
|
81
|
-
"n":
|
|
82
|
-
"acc": 0.
|
|
110
|
+
"n": 316,
|
|
111
|
+
"acc": 0.3322784810126582
|
|
83
112
|
},
|
|
84
113
|
{
|
|
85
114
|
"low": 0.3,
|
|
86
115
|
"high": 0.4,
|
|
87
|
-
"n":
|
|
88
|
-
"acc": 0.
|
|
116
|
+
"n": 1213,
|
|
117
|
+
"acc": 0.37757625721352017
|
|
89
118
|
},
|
|
90
119
|
{
|
|
91
120
|
"low": 0.4,
|
|
92
121
|
"high": 0.5,
|
|
93
|
-
"n":
|
|
94
|
-
"acc": 0.
|
|
122
|
+
"n": 2754,
|
|
123
|
+
"acc": 0.34132171387073346
|
|
95
124
|
},
|
|
96
125
|
{
|
|
97
126
|
"low": 0.5,
|
|
98
127
|
"high": 0.6,
|
|
99
|
-
"n":
|
|
100
|
-
"acc": 0.
|
|
128
|
+
"n": 3647,
|
|
129
|
+
"acc": 0.35481217438990953
|
|
101
130
|
},
|
|
102
131
|
{
|
|
103
132
|
"low": 0.6,
|
|
104
133
|
"high": 0.7,
|
|
105
|
-
"n":
|
|
106
|
-
"acc": 0.
|
|
134
|
+
"n": 3659,
|
|
135
|
+
"acc": 0.3883574747198688
|
|
107
136
|
},
|
|
108
137
|
{
|
|
109
138
|
"low": 0.7,
|
|
110
139
|
"high": 0.8,
|
|
111
|
-
"n":
|
|
112
|
-
"acc": 0.
|
|
140
|
+
"n": 4610,
|
|
141
|
+
"acc": 0.40629067245119305
|
|
113
142
|
},
|
|
114
143
|
{
|
|
115
144
|
"low": 0.8,
|
|
116
145
|
"high": 0.9,
|
|
117
|
-
"n":
|
|
118
|
-
"acc": 0.
|
|
146
|
+
"n": 7716,
|
|
147
|
+
"acc": 0.43960601347848627
|
|
119
148
|
},
|
|
120
149
|
{
|
|
121
150
|
"low": 0.9,
|
|
122
151
|
"high": 1.0,
|
|
123
|
-
"n":
|
|
124
|
-
"acc": 0.
|
|
152
|
+
"n": 37314,
|
|
153
|
+
"acc": 0.5987564989012167
|
|
125
154
|
}
|
|
126
155
|
]
|
|
127
156
|
},
|
|
@@ -130,7 +159,7 @@
|
|
|
130
159
|
"particle-honorific kryptonite (e.g. FR 'Saint-Just-Saint-Rambert') if not in synth set",
|
|
131
160
|
"non-Latin scripts (CJK, Cyrillic) fall through to byte-fallback tokens; F1 unknown"
|
|
132
161
|
],
|
|
133
|
-
"notes": "Stage
|
|
162
|
+
"notes": "Stage 2 v3.0.0 \u2014 same encoder geometry as v2.0.x (8.87M params, 6L/256H/4-heads) plus a linear-chain CRF decoder (+~500 params with a frozen BIO transition mask), label smoothing on the per-token CE leg (disabled in the shipped hparams after iteration; see ship notes), and a 21-label classifier head (was 15) that adds venue / street / house_number BIO classes. Trained on corpus-v0.3.0 which adds the US DOT NAD source (~97M structured 911-grade address points). The CRF transition mask makes orphan-I sequences (e.g. \"Saint Petersburg \u2192 Petersburg\" clipping visible on the v0.2.0 demo) structurally impossible. See evals/scores-by-version.json for the v2.0.x \u2192 v3.0.0 deltas + the per-component F1 on the new fine labels.",
|
|
134
163
|
"format": {
|
|
135
164
|
"model": "ONNX int8 dynamic",
|
|
136
165
|
"tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=16000",
|
|
@@ -142,5 +171,5 @@
|
|
|
142
171
|
"tokenizer": "tokenizer.model",
|
|
143
172
|
"model_card": "model-card.json"
|
|
144
173
|
},
|
|
145
|
-
"base_relpath": "/data/models/checkpoints/
|
|
174
|
+
"base_relpath": "/data/models/checkpoints/stage2/step-001800"
|
|
146
175
|
}
|
package/package.json
CHANGED
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
2
|
+
"name": "@mailwoman/neural-weights-fr-fr",
|
|
3
|
+
"version": "3.0.0",
|
|
4
|
+
"description": "Mailwoman neural-classifier weights for locale 'fr-fr'. Data-only package — loaded by @mailwoman/neural at runtime.",
|
|
5
|
+
"license": "AGPL-3.0-only",
|
|
6
|
+
"private": false,
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/sister-software/mailwoman"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"model.onnx",
|
|
13
|
+
"tokenizer.model",
|
|
14
|
+
"model-card.json",
|
|
15
|
+
"README.md"
|
|
16
|
+
],
|
|
17
|
+
"publishConfig": {
|
|
18
|
+
"access": "public"
|
|
19
|
+
}
|
|
19
20
|
}
|