@mailwoman/corpus 3.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapters/ban/adapter.d.ts.map +1 -1
- package/out/src/adapters/ban/adapter.js +6 -2
- package/out/src/adapters/ban/adapter.js.map +1 -1
- package/out/src/adapters/ban/street-decompose.d.ts +28 -0
- package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/ban/street-decompose.js +78 -0
- package/out/src/adapters/ban/street-decompose.js.map +1 -0
- package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
- package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
- package/out/src/adapters/synth-po-box/adapter.js +101 -0
- package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
- package/out/src/adapters/tiger/adapter.js +9 -3
- package/out/src/adapters/tiger/adapter.js.map +1 -1
- package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
- package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/tiger/street-decompose.js +99 -0
- package/out/src/adapters/tiger/street-decompose.js.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
- package/out/src/adapters/usgov-nad/adapter.js +31 -10
- package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
- package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
- package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
- package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
- package/out/src/index.d.ts +6 -0
- package/out/src/index.d.ts.map +1 -1
- package/out/src/index.js +6 -0
- package/out/src/index.js.map +1 -1
- package/out/src/synthesize-german.d.ts +75 -0
- package/out/src/synthesize-german.d.ts.map +1 -0
- package/out/src/synthesize-german.js +116 -0
- package/out/src/synthesize-german.js.map +1 -0
- package/out/src/synthesize-house-venue.d.ts +57 -0
- package/out/src/synthesize-house-venue.d.ts.map +1 -0
- package/out/src/synthesize-house-venue.js +147 -0
- package/out/src/synthesize-house-venue.js.map +1 -0
- package/out/src/synthesize-intersection.d.ts +48 -0
- package/out/src/synthesize-intersection.d.ts.map +1 -0
- package/out/src/synthesize-intersection.js +138 -0
- package/out/src/synthesize-intersection.js.map +1 -0
- package/out/src/synthesize-no-street.d.ts +70 -0
- package/out/src/synthesize-no-street.d.ts.map +1 -0
- package/out/src/synthesize-no-street.js +279 -0
- package/out/src/synthesize-no-street.js.map +1 -0
- package/out/src/synthesize-po-box.d.ts +75 -0
- package/out/src/synthesize-po-box.d.ts.map +1 -0
- package/out/src/synthesize-po-box.js +186 -0
- package/out/src/synthesize-po-box.js.map +1 -0
- package/out/src/synthesize-street.d.ts +53 -0
- package/out/src/synthesize-street.d.ts.map +1 -0
- package/out/src/synthesize-street.js +212 -0
- package/out/src/synthesize-street.js.map +1 -0
- package/out/src/synthesize.js +1 -1
- package/out/src/synthesize.js.map +1 -1
- package/package.json +3 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,cAAc,QAAQ,CAAA;AA4CnC,wBAAgB,gBAAgB,IAAI,aAAa,CAwEhD;AAED,eAAO,MAAM,UAAU,eAAqB,CAAA"}
|
|
@@ -29,6 +29,7 @@ import { parse as csvParse } from "csv-parse";
|
|
|
29
29
|
import { createReadStream } from "node:fs";
|
|
30
30
|
import { stableSourceId } from "../../adapter.js";
|
|
31
31
|
import { reconcileComponents } from "../../format.js";
|
|
32
|
+
import { decomposeFrStreet } from "./street-decompose.js";
|
|
32
33
|
export const BAN_ADAPTER_ID = "ban";
|
|
33
34
|
/**
|
|
34
35
|
* Compose `house_number` from `numero` + `rep`. BAN uses `rep` for repetition indices ("bis",
|
|
@@ -92,11 +93,14 @@ export function createBanAdapter() {
|
|
|
92
93
|
continue;
|
|
93
94
|
if (!house && !postcode)
|
|
94
95
|
continue;
|
|
96
|
+
const decomposed = decomposeFrStreet(street);
|
|
95
97
|
const components = {};
|
|
96
98
|
if (house)
|
|
97
99
|
components.house_number = house;
|
|
98
|
-
if (
|
|
99
|
-
components.
|
|
100
|
+
if (decomposed.prefix)
|
|
101
|
+
components.street_prefix = decomposed.prefix;
|
|
102
|
+
if (decomposed.street)
|
|
103
|
+
components.street = decomposed.street;
|
|
100
104
|
if (postcode)
|
|
101
105
|
components.postcode = postcode;
|
|
102
106
|
if (locality)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAEzD,MAAM,CAAC,MAAM,cAAc,GAAG,KAAK,CAAA;AAenC;;;GAGG;AACH,SAAS,kBAAkB,CAAC,MAAc,EAAE,GAAW;IACtD,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IACvB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IACpB,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,CAAA;IACjB,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AAC3B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,KAAa,EAAE,MAAc,EAAE,QAAgB,EAAE,QAAgB;IACpF,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACtC,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACtE,IAAI,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB;IAC/B,OAAO;QACN,EAAE,EAAE,cAAc;QAClB,cAAc,EAAE,UAAU;QAC1B,WAAW,EAAE,+EAA+E;QAE5F,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,+CAA+C,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC/E,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,SAAS,EAAE,GAAG;gBACd,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAA+B,EAAE,CAAC;oBAC5D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAA;oBACvE,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAElD,IAAI,CAAC,MAAM,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAClC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEjC,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAA;oBAE5C,MAAM,UAAU,GAA+B,EAAE,CAAA;oBACjD,IAAI,KAAK;wBAAE,UAAU,CAAC,YAAY,GAAG,KAAK,CAAA;oBAC1C,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;oBACnE,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;oBAC5D,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAC5C,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAE5C,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAA;oBACzD,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAQ;oBAE/C,MAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE;wBACjC,CAAC,CAAC,GAAG,cAAc,IAAI,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE;wBACzC,CAAC,CAAC,cAAc,CAAC,cAAc,EAAE,OAAO,CAAC,CAAA;oBAE1C,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,cAAc;wBACtB,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,UAAU;qBACnB,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAA"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a French street name into Stage 3 components. French convention puts the street type as
|
|
7
|
+
* a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
|
|
8
|
+
*
|
|
9
|
+
* The street type becomes street_prefix in our schema. The remaining tokens form the street name.
|
|
10
|
+
*
|
|
11
|
+
* Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
|
|
12
|
+
* { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
|
|
13
|
+
* "Boulevard", street: "Voltaire" }
|
|
14
|
+
*
|
|
15
|
+
* Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
|
|
16
|
+
*/
|
|
17
|
+
export interface DecomposedFrStreet {
|
|
18
|
+
prefix: string | null;
|
|
19
|
+
street: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Decompose a French street name into prefix (leading type word) and street name.
|
|
23
|
+
*
|
|
24
|
+
* If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
|
|
25
|
+
* they become the prefix. Returns `{ prefix: null, street: original }` if no match.
|
|
26
|
+
*/
|
|
27
|
+
export declare function decomposeFrStreet(fullname: string): DecomposedFrStreet;
|
|
28
|
+
//# sourceMappingURL=street-decompose.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAoCH,MAAM,WAAW,kBAAkB;IAClC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACd;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,kBAAkB,CAwBtE"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a French street name into Stage 3 components. French convention puts the street type as
|
|
7
|
+
* a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
|
|
8
|
+
*
|
|
9
|
+
* The street type becomes street_prefix in our schema. The remaining tokens form the street name.
|
|
10
|
+
*
|
|
11
|
+
* Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
|
|
12
|
+
* { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
|
|
13
|
+
* "Boulevard", street: "Voltaire" }
|
|
14
|
+
*
|
|
15
|
+
* Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
|
|
16
|
+
*/
|
|
17
|
+
import { readFileSync } from "node:fs";
|
|
18
|
+
import { dirname, resolve } from "node:path";
|
|
19
|
+
import { fileURLToPath } from "node:url";
|
|
20
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
function loadDictionary(filename) {
|
|
22
|
+
const candidates = [
|
|
23
|
+
resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/fr", filename),
|
|
24
|
+
resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/fr", filename),
|
|
25
|
+
resolve(process.cwd(), "core/data/libpostal/dictionaries/fr", filename),
|
|
26
|
+
];
|
|
27
|
+
for (const path of candidates) {
|
|
28
|
+
try {
|
|
29
|
+
const text = readFileSync(path, "utf8");
|
|
30
|
+
const set = new Set();
|
|
31
|
+
for (const line of text.split("\n")) {
|
|
32
|
+
const trimmed = line.trim();
|
|
33
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
34
|
+
continue;
|
|
35
|
+
for (const form of trimmed.split("|")) {
|
|
36
|
+
const f = form.trim().toLowerCase();
|
|
37
|
+
if (f)
|
|
38
|
+
set.add(f);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return set;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// try next
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
throw new Error(`Could not load FR libpostal dictionary: ${filename}`);
|
|
48
|
+
}
|
|
49
|
+
const STREET_TYPES_FR = loadDictionary("street_types.txt");
|
|
50
|
+
/**
|
|
51
|
+
* Decompose a French street name into prefix (leading type word) and street name.
|
|
52
|
+
*
|
|
53
|
+
* If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
|
|
54
|
+
* they become the prefix. Returns `{ prefix: null, street: original }` if no match.
|
|
55
|
+
*/
|
|
56
|
+
export function decomposeFrStreet(fullname) {
|
|
57
|
+
const trimmed = fullname.trim();
|
|
58
|
+
if (!trimmed)
|
|
59
|
+
return { prefix: null, street: "" };
|
|
60
|
+
const tokens = trimmed.split(/\s+/);
|
|
61
|
+
if (tokens.length < 2)
|
|
62
|
+
return { prefix: null, street: trimmed };
|
|
63
|
+
const norm = (s) => s.toLowerCase().replace(/[.,;]$/, "");
|
|
64
|
+
// Try 2-word prefix first (e.g. "ancien chemin")
|
|
65
|
+
if (tokens.length >= 3) {
|
|
66
|
+
const twoWord = norm(tokens[0]) + " " + norm(tokens[1]);
|
|
67
|
+
if (STREET_TYPES_FR.has(twoWord)) {
|
|
68
|
+
return { prefix: tokens.slice(0, 2).join(" "), street: tokens.slice(2).join(" ") };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Then try 1-word prefix
|
|
72
|
+
const first = norm(tokens[0]);
|
|
73
|
+
if (STREET_TYPES_FR.has(first)) {
|
|
74
|
+
return { prefix: tokens[0], street: tokens.slice(1).join(" ") };
|
|
75
|
+
}
|
|
76
|
+
return { prefix: null, street: trimmed };
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=street-decompose.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,WAAW;QACZ,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,2CAA2C,QAAQ,EAAE,CAAC,CAAA;AACvE,CAAC;AAED,MAAM,eAAe,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAO1D;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IACjD,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;IAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IAE/D,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IAEjE,iDAAiD;IACjD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;QACzD,IAAI,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAClC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;QACnF,CAAC;IACF,CAAC;IAED,yBAAyB;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;IAC9B,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAE,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;IACjE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AACzC,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
|
|
7
|
+
*
|
|
8
|
+
* Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
|
|
9
|
+
* from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
|
|
10
|
+
* `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
|
|
11
|
+
*
|
|
12
|
+
* Why an adapter and not an augmenter:
|
|
13
|
+
*
|
|
14
|
+
* - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
|
|
15
|
+
* Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
|
|
16
|
+
* The clean shape is: read just (locality, region, postcode, country) and produce a fresh
|
|
17
|
+
* PO-box-shaped row.
|
|
18
|
+
* - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
|
|
19
|
+
* ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
|
|
20
|
+
* carries a `street` field.
|
|
21
|
+
*/
|
|
22
|
+
import { type PoBoxBaseTuple } from "../../synthesize-po-box.js";
|
|
23
|
+
import type { CorpusAdapter } from "../../types.js";
|
|
24
|
+
export declare const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
|
|
25
|
+
export declare const SYNTH_PO_BOX_LICENSE = "Synthetic \u2014 derived from CC-BY / public-domain input tuples";
|
|
26
|
+
export interface PoBoxInputRow extends PoBoxBaseTuple {
|
|
27
|
+
street?: string;
|
|
28
|
+
houseNumber?: string;
|
|
29
|
+
}
|
|
30
|
+
export interface SynthPoBoxAdapterOptions {
|
|
31
|
+
/**
|
|
32
|
+
* How many PO box variants to emit per input tuple. Each variant picks a different leader (and
|
|
33
|
+
* possibly a different number / noise level). Default 1.
|
|
34
|
+
*/
|
|
35
|
+
variantsPerInput?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Probability (0..1) of emitting a PMB-with-street variant when both the input has a street and
|
|
38
|
+
* the locale supports PMB. Default 0.15.
|
|
39
|
+
*/
|
|
40
|
+
pmbRatio?: number;
|
|
41
|
+
/**
|
|
42
|
+
* Deterministic seed for reproducible synthesis. Default Date.now().
|
|
43
|
+
*/
|
|
44
|
+
seed?: number;
|
|
45
|
+
}
|
|
46
|
+
export declare function createSynthPoBoxAdapter(opts?: SynthPoBoxAdapterOptions): CorpusAdapter;
|
|
47
|
+
export declare const synthPoBoxAdapter: CorpusAdapter;
|
|
48
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAKH,OAAO,EAAsB,KAAK,cAAc,EAAE,MAAM,4BAA4B,CAAA;AACpF,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,iBAAiB,CAAA;AACrD,eAAO,MAAM,oBAAoB,qEAAgE,CAAA;AAEjG,MAAM,WAAW,aAAc,SAAQ,cAAc;IACpD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,wBAAwB;IACxC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAUD,wBAAgB,uBAAuB,CAAC,IAAI,GAAE,wBAA6B,GAAG,aAAa,CAuE1F;AAED,eAAO,MAAM,iBAAiB,eAA4B,CAAA"}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
|
|
7
|
+
*
|
|
8
|
+
* Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
|
|
9
|
+
* from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
|
|
10
|
+
* `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
|
|
11
|
+
*
|
|
12
|
+
* Why an adapter and not an augmenter:
|
|
13
|
+
*
|
|
14
|
+
* - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
|
|
15
|
+
* Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
|
|
16
|
+
* The clean shape is: read just (locality, region, postcode, country) and produce a fresh
|
|
17
|
+
* PO-box-shaped row.
|
|
18
|
+
* - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
|
|
19
|
+
* ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
|
|
20
|
+
* carries a `street` field.
|
|
21
|
+
*/
|
|
22
|
+
import { createReadStream } from "node:fs";
|
|
23
|
+
import { createInterface } from "node:readline";
|
|
24
|
+
import { stableSourceId } from "../../adapter.js";
|
|
25
|
+
import { synthesizePoBoxRow } from "../../synthesize-po-box.js";
|
|
26
|
+
export const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
|
|
27
|
+
export const SYNTH_PO_BOX_LICENSE = "Synthetic — derived from CC-BY / public-domain input tuples";
|
|
28
|
+
function makeRandom(seed) {
|
|
29
|
+
let s = seed;
|
|
30
|
+
return () => {
|
|
31
|
+
s = (s * 1664525 + 1013904223) % 4294967296;
|
|
32
|
+
return s / 4294967296;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
export function createSynthPoBoxAdapter(opts = {}) {
|
|
36
|
+
const variantsPerInput = opts.variantsPerInput ?? 1;
|
|
37
|
+
const pmbRatio = opts.pmbRatio ?? 0.15;
|
|
38
|
+
return {
|
|
39
|
+
id: SYNTH_PO_BOX_ADAPTER_ID,
|
|
40
|
+
defaultLicense: SYNTH_PO_BOX_LICENSE,
|
|
41
|
+
description: "Synthetic PO box / PMB / Apartado / Boîte Postale rows. Consumes JSONL of (locality, region, postcode, country) tuples and emits locale-appropriate PO box variants.",
|
|
42
|
+
async *rows(options) {
|
|
43
|
+
const random = makeRandom(opts.seed ?? Date.now());
|
|
44
|
+
const stream = createReadStream(options.inputPath, { encoding: "utf8" });
|
|
45
|
+
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
46
|
+
let emitted = 0;
|
|
47
|
+
let skipped = 0;
|
|
48
|
+
for await (const line of rl) {
|
|
49
|
+
if (options.signal?.aborted)
|
|
50
|
+
break;
|
|
51
|
+
if (options.limit !== undefined && emitted >= options.limit)
|
|
52
|
+
break;
|
|
53
|
+
const trimmed = line.trim();
|
|
54
|
+
if (!trimmed)
|
|
55
|
+
continue;
|
|
56
|
+
let input;
|
|
57
|
+
try {
|
|
58
|
+
input = JSON.parse(trimmed);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
skipped++;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
if (!input.locality || !input.region || !input.postcode || !input.country) {
|
|
65
|
+
skipped++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (options.country && options.country !== input.country)
|
|
69
|
+
continue;
|
|
70
|
+
for (let v = 0; v < variantsPerInput; v++) {
|
|
71
|
+
const synth = synthesizePoBoxRow(input, { random, pmbRatio });
|
|
72
|
+
if (!synth)
|
|
73
|
+
continue;
|
|
74
|
+
// Include `v` in dependent_locality slot to vary the digest across variants;
|
|
75
|
+
// stableSourceId only accepts ComponentTag keys.
|
|
76
|
+
const sourceId = stableSourceId(SYNTH_PO_BOX_ADAPTER_ID, {
|
|
77
|
+
locality: `${input.locality}#${v}`,
|
|
78
|
+
region: input.region,
|
|
79
|
+
postcode: input.postcode,
|
|
80
|
+
country: input.country,
|
|
81
|
+
});
|
|
82
|
+
yield {
|
|
83
|
+
raw: synth.raw,
|
|
84
|
+
components: synth.components,
|
|
85
|
+
country: input.country,
|
|
86
|
+
locale: synth.locale,
|
|
87
|
+
source: SYNTH_PO_BOX_ADAPTER_ID,
|
|
88
|
+
source_id: sourceId,
|
|
89
|
+
corpus_version: "",
|
|
90
|
+
license: SYNTH_PO_BOX_LICENSE,
|
|
91
|
+
};
|
|
92
|
+
emitted++;
|
|
93
|
+
if (options.limit !== undefined && emitted >= options.limit)
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
export const synthPoBoxAdapter = createSynthPoBoxAdapter();
|
|
101
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,kBAAkB,EAAuB,MAAM,4BAA4B,CAAA;AAGpF,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAA;AACrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,6DAA6D,CAAA;AAwBjG,SAAS,UAAU,CAAC,IAAY;IAC/B,IAAI,CAAC,GAAG,IAAI,CAAA;IACZ,OAAO,GAAG,EAAE;QACX,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAA;QAC3C,OAAO,CAAC,GAAG,UAAU,CAAA;IACtB,CAAC,CAAA;AACF,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAiC,EAAE;IAC1E,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAA;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAA;IAEtC,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,oBAAoB;QACpC,WAAW,EACV,sKAAsK;QAEvK,KAAK,CAAC,CAAC,IAAI,CAAC,OAAuB;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;YAElD,MAAM,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACxE,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,OAAO,GAAG,CAAC,CAAA;YAEf,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;gBAC7B,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAClC,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;oBAAE,MAAK;gBAElE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO;oBAAE,SAAQ;gBAEtB,IAAI,KAAoB,CAAA;gBACxB,IAAI,CAAC;oBACJ,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAA;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;oBAC3E,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK,CAAC,OAAO;oBAAE,SAAQ;gBAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC3C,MAAM,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;oBAC7D,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,6EAA6E;oBAC7E,iDAAiD;oBACjD,MAAM,QAAQ,GAAG,cAAc,CAAC,uBAAuB,EAAE;wBACxD,QAAQ,EAAE,GAAG,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE;wBAClC,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,OAAO,EAAE,KAAK,CAAC,OAAO;qBACtB,CAAC,CAAA;oBAEF,MAAM;wBACL,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,oBAAoB;qBAC7B,CAAA;oBACD,OAAO,EAAE,CAAA;oBAET,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;wBAAE,MAAK;gBACnE,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AA0FpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
import { DatabaseSync } from "node:sqlite";
|
|
40
40
|
import { lookupFipsState } from "../../codex/us-fips-state.js";
|
|
41
41
|
import { formatAddress, reconcileComponents } from "../../format.js";
|
|
42
|
+
import { decomposeStreet } from "./street-decompose.js";
|
|
42
43
|
export const TIGER_ADAPTER_ID = "tiger";
|
|
43
44
|
export const TIGER_DEFAULT_LICENSE = "Public Domain";
|
|
44
45
|
/**
|
|
@@ -54,18 +55,23 @@ const US_COUNTRY_DISPLAY = "United States of America";
|
|
|
54
55
|
* - `zipl !== zipr` → two rows (one per side's ZIP).
|
|
55
56
|
*/
|
|
56
57
|
function* streetVariants(row) {
|
|
57
|
-
const
|
|
58
|
-
if (!
|
|
58
|
+
const fullname = row.fullname.trim();
|
|
59
|
+
if (!fullname)
|
|
59
60
|
return;
|
|
60
61
|
const state = lookupFipsState(row.statefp);
|
|
61
62
|
if (!state)
|
|
62
63
|
return;
|
|
63
64
|
const zipl = row.zipl?.trim() ?? "";
|
|
64
65
|
const zipr = row.zipr?.trim() ?? "";
|
|
66
|
+
const decomposed = decomposeStreet(fullname);
|
|
65
67
|
const baseComponents = {
|
|
66
|
-
street,
|
|
67
68
|
region: state.abbreviation,
|
|
69
|
+
street: decomposed.street,
|
|
68
70
|
};
|
|
71
|
+
if (decomposed.prefix)
|
|
72
|
+
baseComponents.street_prefix = decomposed.prefix;
|
|
73
|
+
if (decomposed.suffix)
|
|
74
|
+
baseComponents.street_suffix = decomposed.suffix;
|
|
69
75
|
if (!zipl && !zipr) {
|
|
70
76
|
yield { components: baseComponents, variantKey: "no-zip" };
|
|
71
77
|
return;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AAEvD,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IACpC,IAAI,CAAC,QAAQ;QAAE,OAAM;IACrB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,UAAU,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAA;IAE5C,MAAM,cAAc,GAA+B;QAClD,MAAM,EAAE,KAAK,CAAC,YAAY;QAC1B,MAAM,EAAE,UAAU,CAAC,MAAM;KACzB,CAAA;IACD,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IACvE,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IAEvE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
|
|
7
|
+
*
|
|
8
|
+
* Sources directionals and street types from the curated libpostal/en dictionaries
|
|
9
|
+
* (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
|
|
10
|
+
* dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
|
|
11
|
+
* corpus labels and runtime classifications agree on the vocabulary.
|
|
12
|
+
*
|
|
13
|
+
* Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
|
|
14
|
+
* { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
|
|
15
|
+
* street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
|
|
16
|
+
* suffix: "Blvd" }
|
|
17
|
+
*/
|
|
18
|
+
export interface DecomposedStreet {
|
|
19
|
+
prefix: string | null;
|
|
20
|
+
street: string;
|
|
21
|
+
suffix: string | null;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Decompose a US street name into prefix/name/suffix components.
|
|
25
|
+
*
|
|
26
|
+
* Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
|
|
27
|
+
* Returns the original as `street` if nothing matches.
|
|
28
|
+
*/
|
|
29
|
+
export declare function decomposeStreet(fullname: string): DecomposedStreet;
|
|
30
|
+
//# sourceMappingURL=street-decompose.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAuCH,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB,CA0ClE"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
|
|
7
|
+
*
|
|
8
|
+
* Sources directionals and street types from the curated libpostal/en dictionaries
|
|
9
|
+
* (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
|
|
10
|
+
* dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
|
|
11
|
+
* corpus labels and runtime classifications agree on the vocabulary.
|
|
12
|
+
*
|
|
13
|
+
* Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
|
|
14
|
+
* { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
|
|
15
|
+
* street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
|
|
16
|
+
* suffix: "Blvd" }
|
|
17
|
+
*/
|
|
18
|
+
import { readFileSync } from "node:fs";
|
|
19
|
+
import { dirname, resolve } from "node:path";
|
|
20
|
+
import { fileURLToPath } from "node:url";
|
|
21
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
function loadDictionary(filename) {
|
|
23
|
+
// Resolve via the @mailwoman/core data directory.
|
|
24
|
+
const candidates = [
|
|
25
|
+
resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/en", filename),
|
|
26
|
+
resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/en", filename),
|
|
27
|
+
resolve(process.cwd(), "core/data/libpostal/dictionaries/en", filename),
|
|
28
|
+
];
|
|
29
|
+
for (const path of candidates) {
|
|
30
|
+
try {
|
|
31
|
+
const text = readFileSync(path, "utf8");
|
|
32
|
+
const set = new Set();
|
|
33
|
+
for (const line of text.split("\n")) {
|
|
34
|
+
const trimmed = line.trim();
|
|
35
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
36
|
+
continue;
|
|
37
|
+
// libpostal format: canonical|abbr|abbr|... — index all forms
|
|
38
|
+
for (const form of trimmed.split("|")) {
|
|
39
|
+
const f = form.trim().toLowerCase();
|
|
40
|
+
if (f)
|
|
41
|
+
set.add(f);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return set;
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
// try next candidate
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`Could not load libpostal dictionary: ${filename}`);
|
|
51
|
+
}
|
|
52
|
+
const DIRECTIONALS = loadDictionary("directionals.txt");
|
|
53
|
+
const STREET_TYPES = loadDictionary("street_types.txt");
|
|
54
|
+
/**
|
|
55
|
+
* Decompose a US street name into prefix/name/suffix components.
|
|
56
|
+
*
|
|
57
|
+
* Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
|
|
58
|
+
* Returns the original as `street` if nothing matches.
|
|
59
|
+
*/
|
|
60
|
+
export function decomposeStreet(fullname) {
|
|
61
|
+
const trimmed = fullname.trim();
|
|
62
|
+
if (!trimmed)
|
|
63
|
+
return { prefix: null, street: "", suffix: null };
|
|
64
|
+
const tokens = trimmed.split(/\s+/);
|
|
65
|
+
if (tokens.length === 1)
|
|
66
|
+
return { prefix: null, street: trimmed, suffix: null };
|
|
67
|
+
const norm = (s) => s.toLowerCase().replace(/\.$/, "");
|
|
68
|
+
let prefix = null;
|
|
69
|
+
let suffix = null;
|
|
70
|
+
let startIdx = 0;
|
|
71
|
+
let endIdx = tokens.length;
|
|
72
|
+
// Leading directional prefix
|
|
73
|
+
if (DIRECTIONALS.has(norm(tokens[0])) && tokens.length >= 2) {
|
|
74
|
+
prefix = tokens[0];
|
|
75
|
+
startIdx = 1;
|
|
76
|
+
}
|
|
77
|
+
// Trailing post-directional combined with street type (e.g. "Pennsylvania Ave NW")
|
|
78
|
+
const last = norm(tokens[endIdx - 1]);
|
|
79
|
+
const secondLast = endIdx >= 2 ? norm(tokens[endIdx - 2]) : "";
|
|
80
|
+
if (DIRECTIONALS.has(last) && STREET_TYPES.has(secondLast)) {
|
|
81
|
+
suffix = tokens.slice(endIdx - 2, endIdx).join(" ");
|
|
82
|
+
endIdx -= 2;
|
|
83
|
+
}
|
|
84
|
+
else if (STREET_TYPES.has(last) && endIdx - startIdx >= 2) {
|
|
85
|
+
suffix = tokens[endIdx - 1];
|
|
86
|
+
endIdx -= 1;
|
|
87
|
+
}
|
|
88
|
+
else if (DIRECTIONALS.has(last) && endIdx - startIdx >= 2) {
|
|
89
|
+
// Post-directional without type
|
|
90
|
+
suffix = tokens[endIdx - 1];
|
|
91
|
+
endIdx -= 1;
|
|
92
|
+
}
|
|
93
|
+
const street = tokens.slice(startIdx, endIdx).join(" ").trim();
|
|
94
|
+
if (!street) {
|
|
95
|
+
return { prefix: null, street: trimmed, suffix: null };
|
|
96
|
+
}
|
|
97
|
+
return { prefix, street, suffix };
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=street-decompose.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,kDAAkD;IAClD,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,8DAA8D;gBAC9D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,QAAQ,EAAE,CAAC,CAAA;AACpE,CAAC;AAED,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AACvD,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAQvD;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/E,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IAE9D,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,QAAQ,GAAG,CAAC,CAAA;IAChB,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAE1B,6BAA6B;IAC7B,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACnB,QAAQ,GAAG,CAAC,CAAA;IACb,CAAC;IAED,mFAAmF;IACnF,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAA;IACtC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAE/D,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5D,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACnD,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,gCAAgC;QAChC,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IAC9D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACvD,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;AAClC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AAgLxD,wBAAgB,qBAAqB,IAAI,aAAa,CAmGrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
|
|
@@ -114,14 +114,23 @@ function composeHouseNumber(r) {
|
|
|
114
114
|
const suf = (r.AddNum_Suf ?? "").toString().trim();
|
|
115
115
|
return [pre, num, suf].filter(Boolean).join(" ").trim() || undefined;
|
|
116
116
|
}
|
|
117
|
-
function
|
|
117
|
+
function decomposeNadStreet(r) {
|
|
118
|
+
const name = (r.St_Name ?? "").toString().trim();
|
|
119
|
+
if (name) {
|
|
120
|
+
const preDir = (r.St_PreDir ?? "").toString().trim();
|
|
121
|
+
const preTyp = (r.St_PreTyp ?? "").toString().trim();
|
|
122
|
+
const preSep = (r.St_PreSep ?? "").toString().trim();
|
|
123
|
+
const posTyp = (r.St_PosTyp ?? "").toString().trim();
|
|
124
|
+
const posDir = (r.St_PosDir ?? "").toString().trim();
|
|
125
|
+
const prefix = [preDir, preTyp, preSep].filter(Boolean).join(" ") || undefined;
|
|
126
|
+
const suffix = [posTyp, posDir].filter(Boolean).join(" ") || undefined;
|
|
127
|
+
const full = [prefix, name, suffix].filter(Boolean).join(" ");
|
|
128
|
+
return { prefix, street: name, suffix, full };
|
|
129
|
+
}
|
|
118
130
|
const full = (r.StNam_Full ?? "").toString().trim();
|
|
119
131
|
if (full)
|
|
120
|
-
return full;
|
|
121
|
-
|
|
122
|
-
.map((p) => (p ?? "").toString().trim())
|
|
123
|
-
.filter(Boolean);
|
|
124
|
-
return parts.length ? parts.join(" ") : undefined;
|
|
132
|
+
return { full, street: full };
|
|
133
|
+
return undefined;
|
|
125
134
|
}
|
|
126
135
|
function composeLocality(r) {
|
|
127
136
|
return nonEmpty(r.Post_City, r.Inc_Muni, r.Census_Plc, r.Uninc_Comm);
|
|
@@ -134,7 +143,7 @@ function composePostcode(r) {
|
|
|
134
143
|
return plus4 ? `${zip}-${plus4}` : zip;
|
|
135
144
|
}
|
|
136
145
|
function composeRaw(parts) {
|
|
137
|
-
const streetLine = [parts.houseNumber, parts.street].filter(Boolean).join(" ").trim();
|
|
146
|
+
const streetLine = [parts.houseNumber, parts.street, parts.unit].filter(Boolean).join(" ").trim();
|
|
138
147
|
const tail = `${parts.locality}, ${parts.region} ${parts.postcode}`;
|
|
139
148
|
return [parts.venue, streetLine || undefined, tail].filter(Boolean).join(", ");
|
|
140
149
|
}
|
|
@@ -182,18 +191,30 @@ export function createUsgovNadAdapter() {
|
|
|
182
191
|
const postcode = composePostcode(record);
|
|
183
192
|
if (!postcode)
|
|
184
193
|
continue;
|
|
185
|
-
const
|
|
194
|
+
const decomposed = decomposeNadStreet(record);
|
|
186
195
|
const houseNumber = composeHouseNumber(record);
|
|
187
196
|
const venue = nonEmpty(record.LandmkName);
|
|
197
|
+
const unit = nonEmpty(record.Unit, record.Building, record.Floor, record.Room);
|
|
188
198
|
const components = {
|
|
189
199
|
...(venue ? { venue } : {}),
|
|
190
200
|
...(houseNumber ? { house_number: houseNumber } : {}),
|
|
191
|
-
...(
|
|
201
|
+
...(decomposed?.prefix ? { street_prefix: decomposed.prefix } : {}),
|
|
202
|
+
...(decomposed?.street ? { street: decomposed.street } : {}),
|
|
203
|
+
...(decomposed?.suffix ? { street_suffix: decomposed.suffix } : {}),
|
|
204
|
+
...(unit ? { unit } : {}),
|
|
192
205
|
locality,
|
|
193
206
|
region: state,
|
|
194
207
|
postcode,
|
|
195
208
|
};
|
|
196
|
-
const raw = composeRaw({
|
|
209
|
+
const raw = composeRaw({
|
|
210
|
+
venue,
|
|
211
|
+
houseNumber,
|
|
212
|
+
street: decomposed?.full,
|
|
213
|
+
unit,
|
|
214
|
+
locality,
|
|
215
|
+
region: state,
|
|
216
|
+
postcode,
|
|
217
|
+
});
|
|
197
218
|
if (!raw)
|
|
198
219
|
continue;
|
|
199
220
|
const aligned = reconcileComponents(components, raw);
|