@mailwoman/neural-weights-en-us 2.2.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/model-card.json +47 -44
- package/model.onnx +0 -0
- package/package.json +1 -1
- package/tokenizer.model +0 -0
package/model-card.json
CHANGED
|
@@ -1,18 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "neural-weights-en-us",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"
|
|
3
|
+
"version": "4.0.0",
|
|
4
|
+
"model_lineage": "Stage 3 / step 100000 (formerly v0.6.0) \u2014 relabeled to the unified 4.0.0 release version; tokenizer 0.6.0-a0",
|
|
5
|
+
"phase": "Stage 3 \u2014 street decomposition + PO box + intersection",
|
|
5
6
|
"license": "AGPL-3.0-only",
|
|
6
7
|
"locale": "en-us",
|
|
7
8
|
"training": {
|
|
8
|
-
"corpus_version": "0.4.0",
|
|
9
|
-
"tokenizer_version": "0.
|
|
10
|
-
"steps":
|
|
9
|
+
"corpus_version": "0.4.0+stage3",
|
|
10
|
+
"tokenizer_version": "0.6.0-a0",
|
|
11
|
+
"steps": 100000,
|
|
12
|
+
"best_step": 100000,
|
|
11
13
|
"hardware": "NVIDIA A100-SXM4-40GB (Modal cloud)",
|
|
12
|
-
"
|
|
13
|
-
"started_at": "2026-05-25T06:04:00Z",
|
|
14
|
-
"completed_at": "2026-05-25T06:39:00Z",
|
|
15
|
-
"recipe": "CE-only (crf_loss_weight=0.0), h384, batch=128 direct, constant LR=1.5e-4, phrase_priors=ON, class_weights tuned"
|
|
14
|
+
"recipe": "v0.5.1 base + STAGE3 (33 BIO labels) + synth-po-box source @ 1.5x. CE-only (crf_loss_weight=0.0 after two NaN attempts with crf>0; the 33x33 transition table + bf16 was numerically unstable). lr=1.5e-4 constant, warmup=1000."
|
|
16
15
|
},
|
|
17
16
|
"architecture": {
|
|
18
17
|
"hidden_size": 384,
|
|
@@ -20,23 +19,13 @@
|
|
|
20
19
|
"num_attention_heads": 6,
|
|
21
20
|
"intermediate_size": 1536,
|
|
22
21
|
"max_position_embeddings": 128,
|
|
23
|
-
"
|
|
22
|
+
"vocab_size": 48000,
|
|
23
|
+
"num_labels": 33,
|
|
24
|
+
"params": "29.3M (29M encoder + 9M embedding from 48K vocab)",
|
|
24
25
|
"crf_at_training": false,
|
|
25
26
|
"crf_at_inference": true,
|
|
26
27
|
"phrase_priors": true
|
|
27
28
|
},
|
|
28
|
-
"components_supported": [
|
|
29
|
-
"country",
|
|
30
|
-
"region",
|
|
31
|
-
"locality",
|
|
32
|
-
"dependent_locality",
|
|
33
|
-
"postcode",
|
|
34
|
-
"subregion",
|
|
35
|
-
"cedex",
|
|
36
|
-
"venue",
|
|
37
|
-
"street",
|
|
38
|
-
"house_number"
|
|
39
|
-
],
|
|
40
29
|
"labels": [
|
|
41
30
|
"O",
|
|
42
31
|
"B-country",
|
|
@@ -58,37 +47,51 @@
|
|
|
58
47
|
"B-street",
|
|
59
48
|
"I-street",
|
|
60
49
|
"B-house_number",
|
|
61
|
-
"I-house_number"
|
|
50
|
+
"I-house_number",
|
|
51
|
+
"B-street_prefix",
|
|
52
|
+
"I-street_prefix",
|
|
53
|
+
"B-street_suffix",
|
|
54
|
+
"I-street_suffix",
|
|
55
|
+
"B-unit",
|
|
56
|
+
"I-unit",
|
|
57
|
+
"B-po_box",
|
|
58
|
+
"I-po_box",
|
|
59
|
+
"B-intersection_a",
|
|
60
|
+
"I-intersection_a",
|
|
61
|
+
"B-intersection_b",
|
|
62
|
+
"I-intersection_b"
|
|
62
63
|
],
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
64
|
+
"components_supported": [
|
|
65
|
+
"country",
|
|
66
|
+
"region",
|
|
67
|
+
"locality",
|
|
68
|
+
"dependent_locality",
|
|
69
|
+
"postcode",
|
|
70
|
+
"subregion",
|
|
71
|
+
"cedex",
|
|
72
|
+
"venue",
|
|
73
|
+
"street",
|
|
74
|
+
"house_number",
|
|
75
|
+
"street_prefix",
|
|
76
|
+
"street_suffix",
|
|
77
|
+
"unit",
|
|
78
|
+
"po_box",
|
|
79
|
+
"intersection_a",
|
|
80
|
+
"intersection_b"
|
|
80
81
|
],
|
|
81
|
-
"notes": "v0.
|
|
82
|
+
"notes": "v0.6.0 \u2014 Stage 3 ships. Schema expanded from 10 to 16 tags / 21 to 33 BIO labels. STAGE2 label IDs preserved exactly (new tags at IDs 21-32). TIGER, NAD, and BAN adapters now emit street_prefix/street/street_suffix/unit from existing structured input. New synth-po-box corpus source provides 50K PO box training examples across en-US/CA/GB/AU, fr-FR/CA, es-ES/MX/AR.",
|
|
82
83
|
"format": {
|
|
83
|
-
"model": "ONNX
|
|
84
|
+
"model": "ONNX int8 dynamic (quantized from fp32)",
|
|
84
85
|
"tokenizer": "SentencePiece unigram, byte_fallback=true, vocab_size=48000",
|
|
85
86
|
"max_sequence_length": 128,
|
|
86
|
-
"opset": 17
|
|
87
|
+
"opset": 17,
|
|
88
|
+
"fp32_size_mb": 111.8,
|
|
89
|
+
"int8_size_mb": 28.1
|
|
87
90
|
},
|
|
88
91
|
"files": {
|
|
89
92
|
"model": "model.onnx",
|
|
90
93
|
"tokenizer": "tokenizer.model",
|
|
91
94
|
"model_card": "model-card.json"
|
|
92
95
|
},
|
|
93
|
-
"base_relpath": "/data/output/checkpoints/step-
|
|
96
|
+
"base_relpath": "/data/output-v060/checkpoints/step-100000"
|
|
94
97
|
}
|
package/model.onnx
CHANGED
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural-weights-en-us",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "4.0.0",
|
|
4
4
|
"description": "Mailwoman neural-classifier weights for locale 'en-us'. Data-only package — loaded by @mailwoman/neural at runtime.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
package/tokenizer.model
CHANGED
|
Binary file
|