@mailwoman/corpus 4.2.0 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -30,6 +30,18 @@ export interface PoBoxBaseTuple {
|
|
|
30
30
|
postcode: string;
|
|
31
31
|
country: string;
|
|
32
32
|
}
|
|
33
|
+
export interface LocaleTemplate {
|
|
34
|
+
locale: string;
|
|
35
|
+
leaders: ReadonlyArray<string>;
|
|
36
|
+
pmb?: ReadonlyArray<string>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* The per-locale PO-box designator vocabulary (DeepSeek-signed list, see the header). Exported so
|
|
40
|
+
* shard builders (scripts/build-po-box-cedex-shard.mjs) can reuse THIS list as the single source of
|
|
41
|
+
* truth for non-US leaders instead of re-deriving it — the US slice additionally has
|
|
42
|
+
* `@mailwoman/codex/us` `US_PO_BOX_DESIGNATORS`/`isPOBox` as its matcher-side truth.
|
|
43
|
+
*/
|
|
44
|
+
export declare const PO_BOX_LOCALE_TEMPLATES: ReadonlyArray<LocaleTemplate>;
|
|
33
45
|
/**
|
|
34
46
|
* Inject number-format noise into a box number string. Returns the noisy variant or the original
|
|
35
47
|
* (10% probability of noise per the design).
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-po-box.d.ts","sourceRoot":"","sources":["../../src/synthesize-po-box.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,MAAM,WAAW,cAAc;IAC9B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;CACf;
|
|
1
|
+
{"version":3,"file":"synthesize-po-box.d.ts","sourceRoot":"","sources":["../../src/synthesize-po-box.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9C,MAAM,WAAW,cAAc;IAC9B,QAAQ,EAAE,MAAM,CAAA;IAChB,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;CACf;AAED,MAAM,WAAW,cAAc;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,OAAO,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;IAE9B,GAAG,CAAC,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;CAC3B;AAED;;;;;GAKG;AACH,eAAO,MAAM,uBAAuB,EAAE,aAAa,CAAC,cAAc,CAuCjE,CAAA;AAID;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,MAAM,GAAG,MAAM,CAY/E;AAED;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,MAAM,CAEzE;AAED,MAAM,WAAW,mBAAmB;IACnC,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,YAAY,CAAC,YAAY,CAAC,CAAA;IACtC,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,QAAQ,GAAG,iBAAiB,CAAA;CACtC;AAED,MAAM,WAAW,kBAAkB;IAClC,gFAAgF;IAChF,MAAM,CAAC,EAAE,MAAM,MAAM,CAAA;IACrB,uDAAuD;IACvD,UAAU,CAAC,EAAE,CAAC,MAAM,EAAE,MAAM,MAAM,KAAK,MAAM,CAAA;IAC7C,4FAA4F;IAC5F,QAAQ,CAAC,EAAE,MAAM,CAAA;CACjB;AAWD;;;;GAIG;AACH,wBAAgB,kBAAkB,CACjC,IAAI,EAAE,cAAc,GAAG;IAAE,MAAM,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAA;CAAE,EAChE,IAAI,GAAE,kBAAuB,GAC3B,mBAAmB,GAAG,IAAI,CAkD5B;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAWvD;AAED,qFAAqF;AACrF,wBAAgB,gBAAgB,IAAI,aAAa,CAAC,MAAM,CAAC,CAExD"}
|
|
@@ -23,7 +23,13 @@
|
|
|
23
23
|
* - USPS Pub 28 §28C2.040 — Private Mailbox formatting
|
|
24
24
|
* - USPS DMM 508 §4.1.4 / §4.5.4 — PO Box and street-addressed PO Box
|
|
25
25
|
*/
|
|
26
|
-
|
|
26
|
+
/**
|
|
27
|
+
* The per-locale PO-box designator vocabulary (DeepSeek-signed list, see the header). Exported so
|
|
28
|
+
* shard builders (scripts/build-po-box-cedex-shard.mjs) can reuse THIS list as the single source of
|
|
29
|
+
* truth for non-US leaders instead of re-deriving it — the US slice additionally has
|
|
30
|
+
* `@mailwoman/codex/us` `US_PO_BOX_DESIGNATORS`/`isPOBox` as its matcher-side truth.
|
|
31
|
+
*/
|
|
32
|
+
export const PO_BOX_LOCALE_TEMPLATES = [
|
|
27
33
|
{
|
|
28
34
|
locale: "en-US",
|
|
29
35
|
leaders: ["PO Box", "P.O. Box", "P.O.Box", "PO BOX", "POB", "Post Office Box", "Box"],
|
|
@@ -63,7 +69,7 @@ const LOCALE_TEMPLATES = [
|
|
|
63
69
|
leaders: ["Casilla", "Casilla de Correo", "CC"],
|
|
64
70
|
},
|
|
65
71
|
];
|
|
66
|
-
const LEADERS_BY_LOCALE = new Map(
|
|
72
|
+
const LEADERS_BY_LOCALE = new Map(PO_BOX_LOCALE_TEMPLATES.map((t) => [t.locale, t]));
|
|
67
73
|
/**
|
|
68
74
|
* Inject number-format noise into a box number string. Returns the noisy variant or the original
|
|
69
75
|
* (10% probability of noise per the design).
|
|
@@ -181,6 +187,6 @@ export function countryToLocale(country) {
|
|
|
181
187
|
}
|
|
182
188
|
/** All locales we synthesize for. Exposed for tests and for source-weight tuning. */
|
|
183
189
|
export function supportedLocales() {
|
|
184
|
-
return
|
|
190
|
+
return PO_BOX_LOCALE_TEMPLATES.map((t) => t.locale);
|
|
185
191
|
}
|
|
186
192
|
//# sourceMappingURL=synthesize-po-box.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"synthesize-po-box.js","sourceRoot":"","sources":["../../src/synthesize-po-box.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAkBH,MAAM,
|
|
1
|
+
{"version":3,"file":"synthesize-po-box.js","sourceRoot":"","sources":["../../src/synthesize-po-box.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAkBH;;;;;GAKG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAkC;IACrE;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,iBAAiB,EAAE,KAAK,CAAC;QACrF,GAAG,EAAE,CAAC,KAAK,EAAE,GAAG,CAAC;KACjB;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,KAAK,EAAE,iBAAiB,CAAC;QACzD,GAAG,EAAE,CAAC,KAAK,EAAE,GAAG,CAAC;KACjB;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,iBAAiB,CAAC;KAClD;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,QAAQ,EAAE,UAAU,EAAE,iBAAiB,EAAE,SAAS,EAAE,YAAY,CAAC;KAC3E;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,eAAe,EAAE,KAAK,CAAC;KAC/C;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,cAAc,EAAE,IAAI,EAAE,MAAM,CAAC;KACrD;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,qBAAqB,CAAC;KAC7D;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,OAAO,EAAE,UAAU,EAAE,iBAAiB,EAAE,IAAI,CAAC;KACvD;IACD;QACC,MAAM,EAAE,OAAO;QACf,OAAO,EAAE,CAAC,SAAS,EAAE,mBAAmB,EAAE,IAAI,CAAC;KAC/C;CACD,CAAA;AAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAyB,uBAAuB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AAE5G;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,GAAW,EAAE,MAAoB;IACtE,IAAI,MAAM,EAAE,GAAG,GAAG;QAAE,OAAO,GAAG,CAAA;IAC9B,MAAM,QAAQ,GAAiC;QAC9C,qDAAqD;QACrD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,wCAAwC;QACxC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/D,mDAAmD;QACnD,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;KAC5B,CAAA;IACD,MAAM,CAAC,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAE,CAAA;IAC3D,OAAO,CAAC,CAAC,GAAG,CAAC,CAAA;AACd,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,kBAAkB,CAAC,MAAc,EAAE,MAAc;IAChE,OAAO,GAAG,MAAM,IAAI,MAAM,EAAE,CAAA;AAC7B,CAAC;AAkBD,SAAS,iBAAiB,CAAC,MAAoB;IAC9C,sFAAsF;IACtF,MAAM,CAAC,GAAG,MAAM,EAAE,CAAA;IAClB,IAAI,CAAC,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA,CAAC,OAAO;IACjE,IAAI,CAAC,GAAG,GAAG;QAAE,OAAO,MAAM,CAAC,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAA,CAAC,UAAU;IACvE,IAAI,CAAC,GAAG,IAAI;QAAE,OAAO,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAA,CAAC,YAAY;IAC5E,OAAO,MAAM,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAA,CAAC,cAAc;AACnE,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CACjC,IAAgE,EAChE,OAA2B,EAAE;IAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM,CAAA;IACzC,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,iBAAiB,CAAA;IACvD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,GAAG,CAAA;IAErC,MAAM,MAAM,GAAG,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC5C,MAAM,GAAG,GAAG,iBAAiB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IACzC,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAA;IAErB,MAAM,MAAM,GAAG,qBAAqB,CAAC,UAAU,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAA;IAChE,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,CAAE,CAAA;IACtE,MAAM,WAAW,GAAG,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAEtD,mEAAmE;IACnE,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,GAAG,IAAI,MAAM,EAAE,GAAG,QAAQ,CAAA;IAC7D,IAAI,OAAO,EAAE,CAAC;QACb,MAAM,SAAS,GAAG,GAAG,CAAC,GAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,GAAG,GAAG,CAAC,GAAI,CAAC,MAAM,CAAC,CAAE,CAAA;QACnE,MAAM,SAAS,GAAG,kBAAkB,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;QACvD,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAO,CAAA;QACzF,MAAM,GAAG,GAAG,GAAG,UAAU,KAAK,SAAS,KAAK,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;QAC5F,OAAO;YACN,GAAG;YACH,UAAU,EAAE;gBACX,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC/D,MAAM,EAAE,IAAI,CAAC,MAAO;gBACpB,MAAM,EAAE,SAAS;gBACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,OAAO,EAAE,IAAI,CAAC,OAAO;aACrB;YACD,MAAM;YACN,QAAQ,EAAE,iBAAiB;SAC3B,CAAA;IACF,CAAC;IAED,sDAAsD;IACtD,MAAM,GAAG,GAAG,GAAG,WAAW,KAAK,IAAI,CAAC,QAAQ,KAAK,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC/E,OAAO;QACN,GAAG;QACH,UAAU,EAAE;YACX,MAAM,EAAE,WAAW;YACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,OAAO,EAAE,IAAI,CAAC,OAAO;SACrB;QACD,MAAM;QACN,QAAQ,EAAE,QAAQ;KAClB,CAAA;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,OAAe;IAC9C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;IACtC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,eAAe;QAAE,OAAO,OAAO,CAAA;IACtE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,QAAQ;QAAE,OAAO,OAAO,CAAA;IAC/D,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,gBAAgB;QAAE,OAAO,OAAO,CAAA;IACrF,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,WAAW;QAAE,OAAO,OAAO,CAAA;IAClE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,QAAQ;QAAE,OAAO,OAAO,CAAA;IAC/D,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,OAAO;QAAE,OAAO,OAAO,CAAA;IAC9D,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,QAAQ;QAAE,OAAO,OAAO,CAAA;IAC/D,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,WAAW;QAAE,OAAO,OAAO,CAAA;IAClE,OAAO,OAAO,CAAA;AACf,CAAC;AAED,qFAAqF;AACrF,MAAM,UAAU,gBAAgB;IAC/B,OAAO,uBAAuB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;AACpD,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/corpus",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.4.0",
|
|
4
4
|
"description": "Mailwoman corpus pipeline: BIO-labeled dataset builder for the neural classifier.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"@dsnp/parquetjs": "1.8.7",
|
|
20
20
|
"@fragaria/address-formatter": "^6.7.1",
|
|
21
|
-
"@mailwoman/codex": "4.
|
|
22
|
-
"@mailwoman/core": "4.
|
|
21
|
+
"@mailwoman/codex": "4.4.0",
|
|
22
|
+
"@mailwoman/core": "4.4.0",
|
|
23
23
|
"csv-parse": "^5.6.0",
|
|
24
24
|
"fastest-levenshtein": "^1.0.16",
|
|
25
25
|
"lru-cache": "^10.4.3"
|