@mailwoman/neural-weights-fr-fr 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,37 @@
1
+ # @mailwoman/neural-weights-fr-fr
2
+
3
+ Phase 2 / Stage 1 (coarse) Mailwoman neural-classifier weights.
4
+
5
+ - locale: **fr-fr**
6
+ - corpus: **0.2.0**
7
+ - training steps: **50000**
8
+ - hardware: **AMD Radeon 780M (gfx1103) bf16 ~14.6 GiB GTT**
9
+
10
+ ## Phase 2 §6 status
11
+
12
+ **⚠ Below Phase 2 §6 targets (≥95% F1):**
13
+
14
+ - `country` F1 = **0.0000** (target ≥0.95)
15
+ - `region` F1 = **0.8293** (target ≥0.95)
16
+ - `locality` F1 = **0.6471** (target ≥0.95)
17
+ - `postcode` F1 = **0.8594** (target ≥0.95)
18
+
19
+ ## Eval (golden set)
20
+
21
+ - entries: **74**
22
+ - full-parse exact match: **0.5270**
23
+ - mean token confidence: **0.9745**
24
+
25
+ ## Components supported
26
+
27
+ Stage 1 ships coarse-only: country / region / locality / dependent_locality / postcode / subregion / cedex. Street- and venue-level components are explicit future phases.
28
+
29
+ ## Files
30
+
31
+ - `model.onnx` — int8-quantized ONNX model.
32
+ - `tokenizer.model` — SentencePiece unigram tokenizer (matches the corpus version).
33
+ - `model-card.json` — ModelCard with training + eval metadata.
34
+
35
+ ## Loader
36
+
37
+ Loaded at runtime by `@mailwoman/neural`. This package contains no JS code.
@@ -0,0 +1,146 @@
1
+ {
2
+ "name": "neural-weights-fr-fr",
3
+ "version": "0.2.0",
4
+ "phase": "Stage 1 (coarse)",
5
+ "license": "AGPL-3.0-only",
6
+ "locale": "fr-fr",
7
+ "training": {
8
+ "corpus_version": "0.2.0",
9
+ "tokenizer_version": "0.1.0",
10
+ "steps": 50000,
11
+ "hardware": "AMD Radeon 780M (gfx1103) bf16 ~14.6 GiB GTT",
12
+ "duration_seconds": 23520.0,
13
+ "started_at": null,
14
+ "completed_at": "2026-05-18T21:33:27.380730Z"
15
+ },
16
+ "components_supported": ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex"],
17
+ "eval": {
18
+ "n_entries": 74,
19
+ "full_parse_exact_match": 0.527027027027027,
20
+ "mean_token_confidence": 0.974534777700901,
21
+ "per_component": {
22
+ "country": {
23
+ "precision": 0.0,
24
+ "recall": 0.0,
25
+ "f1": 0.0,
26
+ "support": 6
27
+ },
28
+ "region": {
29
+ "precision": 0.8499999999858334,
30
+ "recall": 0.80952380951096,
31
+ "f1": 0.8292682921697403,
32
+ "support": 63
33
+ },
34
+ "locality": {
35
+ "precision": 0.6874999999892578,
36
+ "recall": 0.6111111111026234,
37
+ "f1": 0.6470588230216262,
38
+ "support": 72
39
+ },
40
+ "dependent_locality": {
41
+ "precision": 0.0,
42
+ "recall": 0.0,
43
+ "f1": 0.0,
44
+ "support": 1
45
+ },
46
+ "postcode": {
47
+ "precision": 0.8730158730020157,
48
+ "recall": 0.8461538461408283,
49
+ "f1": 0.8593749994866943,
50
+ "support": 65
51
+ },
52
+ "subregion": {
53
+ "precision": 0.0,
54
+ "recall": 0.0,
55
+ "f1": 0.0,
56
+ "support": 0
57
+ },
58
+ "cedex": {
59
+ "precision": 0.0,
60
+ "recall": 0.0,
61
+ "f1": 0.0,
62
+ "support": 1
63
+ }
64
+ },
65
+ "calibration": [
66
+ {
67
+ "low": 0.0,
68
+ "high": 0.1,
69
+ "n": 0,
70
+ "acc": 0.0
71
+ },
72
+ {
73
+ "low": 0.1,
74
+ "high": 0.2,
75
+ "n": 0,
76
+ "acc": 0.0
77
+ },
78
+ {
79
+ "low": 0.2,
80
+ "high": 0.3,
81
+ "n": 0,
82
+ "acc": 0.0
83
+ },
84
+ {
85
+ "low": 0.3,
86
+ "high": 0.4,
87
+ "n": 5,
88
+ "acc": 0.2
89
+ },
90
+ {
91
+ "low": 0.4,
92
+ "high": 0.5,
93
+ "n": 9,
94
+ "acc": 0.4444444444444444
95
+ },
96
+ {
97
+ "low": 0.5,
98
+ "high": 0.6,
99
+ "n": 20,
100
+ "acc": 0.4
101
+ },
102
+ {
103
+ "low": 0.6,
104
+ "high": 0.7,
105
+ "n": 8,
106
+ "acc": 0.5
107
+ },
108
+ {
109
+ "low": 0.7,
110
+ "high": 0.8,
111
+ "n": 19,
112
+ "acc": 0.3684210526315789
113
+ },
114
+ {
115
+ "low": 0.8,
116
+ "high": 0.9,
117
+ "n": 25,
118
+ "acc": 0.4
119
+ },
120
+ {
121
+ "low": 0.9,
122
+ "high": 1.0,
123
+ "n": 1114,
124
+ "acc": 0.8824057450628366
125
+ }
126
+ ]
127
+ },
128
+ "known_failure_modes": [
129
+ "underperforms on Hawaiian addresses (sparse in training corpus)",
130
+ "particle-honorific kryptonite (e.g. FR 'Saint-Just-Saint-Rambert') if not in synth set",
131
+ "non-Latin scripts (CJK, Cyrillic) fall through to byte-fallback tokens; F1 unknown"
132
+ ],
133
+ "notes": "Stage 1 coarse v0.2.0 \u2014 same architecture as v0.1.0 (8.87M params, 6L/256H/4-heads), trained on the expanded corpus-v0.2.0 (262.7M aligned rows, 6 train sources) with the loader rewrite from issue #43 (source-weighted multinomial sampler + relaxed coarse filter). The v0.1.0 positional-heuristic overfit was driven by a strict country-tag gate that dropped ~94% of v0.2.0 before any source weighting; with the gate relaxed and the loader interleaving sources at the row level, the model now sees a fixed mix of ban/tiger/nppes/state-tx/wof-admin/wof-postalcode per batch instead of mono-source blocks. See evals/scores-by-version.json for the v0.1.0 \u2192 v0.2.0 deltas.",
134
+ "format": {
135
+ "model": "ONNX int8 dynamic",
136
+ "tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=16000",
137
+ "max_sequence_length": 128,
138
+ "opset": 17
139
+ },
140
+ "files": {
141
+ "model": "model.onnx",
142
+ "tokenizer": "tokenizer.model",
143
+ "model_card": "model-card.json"
144
+ },
145
+ "base_relpath": "/data/models/checkpoints/stage1-coarse/step-050000"
146
+ }
package/model.onnx ADDED
Binary file
package/package.json ADDED
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "@mailwoman/neural-weights-fr-fr",
3
+ "version": "2.0.0",
4
+ "description": "Mailwoman neural-classifier weights for locale 'fr-fr'. Data-only package — loaded by @mailwoman/neural at runtime.",
5
+ "license": "AGPL-3.0-only",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/sister-software/mailwoman"
9
+ },
10
+ "files": [
11
+ "model.onnx",
12
+ "tokenizer.model",
13
+ "model-card.json",
14
+ "README.md"
15
+ ],
16
+ "publishConfig": {
17
+ "access": "public"
18
+ }
19
+ }
Binary file