@mailwoman/corpus 3.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapters/ban/adapter.d.ts.map +1 -1
- package/out/src/adapters/ban/adapter.js +6 -2
- package/out/src/adapters/ban/adapter.js.map +1 -1
- package/out/src/adapters/ban/street-decompose.d.ts +28 -0
- package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/ban/street-decompose.js +78 -0
- package/out/src/adapters/ban/street-decompose.js.map +1 -0
- package/out/src/adapters/geonames/adapter.d.ts +35 -0
- package/out/src/adapters/geonames/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames/adapter.js +161 -0
- package/out/src/adapters/geonames/adapter.js.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts +30 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.js +96 -0
- package/out/src/adapters/geonames-postal/adapter.js.map +1 -0
- package/out/src/adapters/index.d.ts +3 -0
- package/out/src/adapters/index.d.ts.map +1 -1
- package/out/src/adapters/index.js +9 -0
- package/out/src/adapters/index.js.map +1 -1
- package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
- package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
- package/out/src/adapters/synth-po-box/adapter.js +101 -0
- package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
- package/out/src/adapters/tiger/adapter.js +9 -3
- package/out/src/adapters/tiger/adapter.js.map +1 -1
- package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
- package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/tiger/street-decompose.js +99 -0
- package/out/src/adapters/tiger/street-decompose.js.map +1 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts +26 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js +115 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
- package/out/src/adapters/usgov-nad/adapter.js +31 -10
- package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
- package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
- package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
- package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
- package/out/src/index.d.ts +6 -0
- package/out/src/index.d.ts.map +1 -1
- package/out/src/index.js +6 -0
- package/out/src/index.js.map +1 -1
- package/out/src/synthesize-german.d.ts +77 -0
- package/out/src/synthesize-german.d.ts.map +1 -0
- package/out/src/synthesize-german.js +117 -0
- package/out/src/synthesize-german.js.map +1 -0
- package/out/src/synthesize-house-venue.d.ts +57 -0
- package/out/src/synthesize-house-venue.d.ts.map +1 -0
- package/out/src/synthesize-house-venue.js +147 -0
- package/out/src/synthesize-house-venue.js.map +1 -0
- package/out/src/synthesize-intersection.d.ts +46 -0
- package/out/src/synthesize-intersection.d.ts.map +1 -0
- package/out/src/synthesize-intersection.js +152 -0
- package/out/src/synthesize-intersection.js.map +1 -0
- package/out/src/synthesize-no-street.d.ts +70 -0
- package/out/src/synthesize-no-street.d.ts.map +1 -0
- package/out/src/synthesize-no-street.js +279 -0
- package/out/src/synthesize-no-street.js.map +1 -0
- package/out/src/synthesize-po-box.d.ts +75 -0
- package/out/src/synthesize-po-box.d.ts.map +1 -0
- package/out/src/synthesize-po-box.js +186 -0
- package/out/src/synthesize-po-box.js.map +1 -0
- package/out/src/synthesize-street.d.ts +53 -0
- package/out/src/synthesize-street.d.ts.map +1 -0
- package/out/src/synthesize-street.js +212 -0
- package/out/src/synthesize-street.js.map +1 -0
- package/out/src/synthesize.d.ts +19 -0
- package/out/src/synthesize.d.ts.map +1 -1
- package/out/src/synthesize.js +65 -1
- package/out/src/synthesize.js.map +1 -1
- package/package.json +8 -7
- package/out/src/codex/us-street-suffix.d.ts +0 -260
- package/out/src/codex/us-street-suffix.d.ts.map +0 -1
- package/out/src/codex/us-street-suffix.js +0 -286
- package/out/src/codex/us-street-suffix.js.map +0 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,cAAc,QAAQ,CAAA;AA4CnC,wBAAgB,gBAAgB,IAAI,aAAa,CAwEhD;AAED,eAAO,MAAM,UAAU,eAAqB,CAAA"}
|
|
@@ -29,6 +29,7 @@ import { parse as csvParse } from "csv-parse";
|
|
|
29
29
|
import { createReadStream } from "node:fs";
|
|
30
30
|
import { stableSourceId } from "../../adapter.js";
|
|
31
31
|
import { reconcileComponents } from "../../format.js";
|
|
32
|
+
import { decomposeFrStreet } from "./street-decompose.js";
|
|
32
33
|
export const BAN_ADAPTER_ID = "ban";
|
|
33
34
|
/**
|
|
34
35
|
* Compose `house_number` from `numero` + `rep`. BAN uses `rep` for repetition indices ("bis",
|
|
@@ -92,11 +93,14 @@ export function createBanAdapter() {
|
|
|
92
93
|
continue;
|
|
93
94
|
if (!house && !postcode)
|
|
94
95
|
continue;
|
|
96
|
+
const decomposed = decomposeFrStreet(street);
|
|
95
97
|
const components = {};
|
|
96
98
|
if (house)
|
|
97
99
|
components.house_number = house;
|
|
98
|
-
if (
|
|
99
|
-
components.
|
|
100
|
+
if (decomposed.prefix)
|
|
101
|
+
components.street_prefix = decomposed.prefix;
|
|
102
|
+
if (decomposed.street)
|
|
103
|
+
components.street = decomposed.street;
|
|
100
104
|
if (postcode)
|
|
101
105
|
components.postcode = postcode;
|
|
102
106
|
if (locality)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/ban/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAErD,OAAO,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAA;AAEzD,MAAM,CAAC,MAAM,cAAc,GAAG,KAAK,CAAA;AAenC;;;GAGG;AACH,SAAS,kBAAkB,CAAC,MAAc,EAAE,GAAW;IACtD,MAAM,CAAC,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IACvB,MAAM,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IACpB,IAAI,CAAC,CAAC;QAAE,OAAO,EAAE,CAAA;IACjB,OAAO,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AAC3B,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,KAAa,EAAE,MAAc,EAAE,QAAgB,EAAE,QAAgB;IACpF,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,IAAI,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IACtC,MAAM,QAAQ,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACtE,IAAI,QAAQ;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;AACpD,CAAC;AAED,MAAM,UAAU,gBAAgB;IAC/B,OAAO;QACN,EAAE,EAAE,cAAc;QAClB,cAAc,EAAE,UAAU;QAC1B,WAAW,EAAE,+EAA+E;QAE5F,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,+CAA+C,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC/E,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,SAAS,EAAE,GAAG;gBACd,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAA+B,EAAE,CAAC;oBAC5D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,kBAAkB,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,EAAE,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAA;oBACvE,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAElD,IAAI,CAAC,MAAM,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAClC,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEjC,MAAM,UAAU,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAA;oBAE5C,MAAM,UAAU,GAA+B,EAAE,CAAA;oBACjD,IAAI,KAAK;wBAAE,UAAU,CAAC,YAAY,GAAG,KAAK,CAAA;oBAC1C,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;oBACnE,IAAI,UAAU,CAAC,MAAM;wBAAE,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC,MAAM,CAAA;oBAC5D,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAC5C,IAAI,QAAQ;wBAAE,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAA;oBAE5C,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAA;oBACzD,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAQ;oBAE/C,MAAM,QAAQ,GAAG,MAAM,CAAC,EAAE,EAAE,IAAI,EAAE;wBACjC,CAAC,CAAC,GAAG,cAAc,IAAI,MAAM,CAAC,EAAE,CAAC,IAAI,EAAE,EAAE;wBACzC,CAAC,CAAC,cAAc,CAAC,cAAc,EAAE,OAAO,CAAC,CAAA;oBAE1C,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,cAAc;wBACtB,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,UAAU;qBACnB,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,UAAU,GAAG,gBAAgB,EAAE,CAAA"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a French street name into Stage 3 components. French convention puts the street type as
|
|
7
|
+
* a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
|
|
8
|
+
*
|
|
9
|
+
* The street type becomes street_prefix in our schema. The remaining tokens form the street name.
|
|
10
|
+
*
|
|
11
|
+
* Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
|
|
12
|
+
* { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
|
|
13
|
+
* "Boulevard", street: "Voltaire" }
|
|
14
|
+
*
|
|
15
|
+
* Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
|
|
16
|
+
*/
|
|
17
|
+
export interface DecomposedFrStreet {
|
|
18
|
+
prefix: string | null;
|
|
19
|
+
street: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Decompose a French street name into prefix (leading type word) and street name.
|
|
23
|
+
*
|
|
24
|
+
* If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
|
|
25
|
+
* they become the prefix. Returns `{ prefix: null, street: original }` if no match.
|
|
26
|
+
*/
|
|
27
|
+
export declare function decomposeFrStreet(fullname: string): DecomposedFrStreet;
|
|
28
|
+
//# sourceMappingURL=street-decompose.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAoCH,MAAM,WAAW,kBAAkB;IAClC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;CACd;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,QAAQ,EAAE,MAAM,GAAG,kBAAkB,CAwBtE"}
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a French street name into Stage 3 components. French convention puts the street type as
|
|
7
|
+
* a leading word: "Rue de Rivoli", "Avenue des Champs-Élysées", "Bd Voltaire".
|
|
8
|
+
*
|
|
9
|
+
* The street type becomes street_prefix in our schema. The remaining tokens form the street name.
|
|
10
|
+
*
|
|
11
|
+
* Examples: "Rue de Rivoli" → { prefix: "Rue", street: "de Rivoli" } "Avenue des Champs-Élysées" →
|
|
12
|
+
* { prefix: "Avenue", street: "des Champs-Élysées" } "Boulevard Voltaire" → { prefix:
|
|
13
|
+
* "Boulevard", street: "Voltaire" }
|
|
14
|
+
*
|
|
15
|
+
* Sources street types from `core/data/libpostal/dictionaries/fr/street_types.txt`.
|
|
16
|
+
*/
|
|
17
|
+
import { readFileSync } from "node:fs";
|
|
18
|
+
import { dirname, resolve } from "node:path";
|
|
19
|
+
import { fileURLToPath } from "node:url";
|
|
20
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
21
|
+
function loadDictionary(filename) {
|
|
22
|
+
const candidates = [
|
|
23
|
+
resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/fr", filename),
|
|
24
|
+
resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/fr", filename),
|
|
25
|
+
resolve(process.cwd(), "core/data/libpostal/dictionaries/fr", filename),
|
|
26
|
+
];
|
|
27
|
+
for (const path of candidates) {
|
|
28
|
+
try {
|
|
29
|
+
const text = readFileSync(path, "utf8");
|
|
30
|
+
const set = new Set();
|
|
31
|
+
for (const line of text.split("\n")) {
|
|
32
|
+
const trimmed = line.trim();
|
|
33
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
34
|
+
continue;
|
|
35
|
+
for (const form of trimmed.split("|")) {
|
|
36
|
+
const f = form.trim().toLowerCase();
|
|
37
|
+
if (f)
|
|
38
|
+
set.add(f);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return set;
|
|
42
|
+
}
|
|
43
|
+
catch {
|
|
44
|
+
// try next
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
throw new Error(`Could not load FR libpostal dictionary: ${filename}`);
|
|
48
|
+
}
|
|
49
|
+
const STREET_TYPES_FR = loadDictionary("street_types.txt");
|
|
50
|
+
/**
|
|
51
|
+
* Decompose a French street name into prefix (leading type word) and street name.
|
|
52
|
+
*
|
|
53
|
+
* If the first 1-2 tokens match a known street type (allowing for multi-word like "ancien chemin"),
|
|
54
|
+
* they become the prefix. Returns `{ prefix: null, street: original }` if no match.
|
|
55
|
+
*/
|
|
56
|
+
export function decomposeFrStreet(fullname) {
|
|
57
|
+
const trimmed = fullname.trim();
|
|
58
|
+
if (!trimmed)
|
|
59
|
+
return { prefix: null, street: "" };
|
|
60
|
+
const tokens = trimmed.split(/\s+/);
|
|
61
|
+
if (tokens.length < 2)
|
|
62
|
+
return { prefix: null, street: trimmed };
|
|
63
|
+
const norm = (s) => s.toLowerCase().replace(/[.,;]$/, "");
|
|
64
|
+
// Try 2-word prefix first (e.g. "ancien chemin")
|
|
65
|
+
if (tokens.length >= 3) {
|
|
66
|
+
const twoWord = norm(tokens[0]) + " " + norm(tokens[1]);
|
|
67
|
+
if (STREET_TYPES_FR.has(twoWord)) {
|
|
68
|
+
return { prefix: tokens.slice(0, 2).join(" "), street: tokens.slice(2).join(" ") };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
// Then try 1-word prefix
|
|
72
|
+
const first = norm(tokens[0]);
|
|
73
|
+
if (STREET_TYPES_FR.has(first)) {
|
|
74
|
+
return { prefix: tokens[0], street: tokens.slice(1).join(" ") };
|
|
75
|
+
}
|
|
76
|
+
return { prefix: null, street: trimmed };
|
|
77
|
+
}
|
|
78
|
+
//# sourceMappingURL=street-decompose.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/ban/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,WAAW;QACZ,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,2CAA2C,QAAQ,EAAE,CAAC,CAAA;AACvE,CAAC;AAED,MAAM,eAAe,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAO1D;;;;;GAKG;AACH,MAAM,UAAU,iBAAiB,CAAC,QAAgB;IACjD,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;IAEjD,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IAE/D,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;IAEjE,iDAAiD;IACjD,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;QACzD,IAAI,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAClC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;QACnF,CAAC;IACF,CAAC;IAED,yBAAyB;IACzB,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;IAC9B,IAAI,eAAe,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAE,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAA;IACjE,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AACzC,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `geonames`: GeoNames populated-places consumer (https://www.geonames.org/, CC-BY-4.0).
|
|
7
|
+
*
|
|
8
|
+
* GeoNames is a global gazetteer of ~12M features. This adapter ingests the POPULATED PLACES
|
|
9
|
+
* (`feature_class = "P"`, excluding historical/abandoned/destroyed variants) from a per-country
|
|
10
|
+
* dump file — global locality coverage, including the small towns and villages a coarser admin
|
|
11
|
+
* gazetteer (WOF) lacks. It's the cheapest path to broadening the corpus's LOCALE coverage.
|
|
12
|
+
*
|
|
13
|
+
* Input: a per-country tab-separated dump (e.g. `US.txt` from
|
|
14
|
+
* `https://download.geonames.org/export/dump/`, 19 columns, no header). Two sibling files in the
|
|
15
|
+
* same directory supply human-readable names (downloaded once from the same place):
|
|
16
|
+
*
|
|
17
|
+
* - `admin1CodesASCII.txt` — `<CC>.<admin1_code>` → region name (e.g. `US.VT` → "Vermont").
|
|
18
|
+
* - `countryInfo.txt` — ISO alpha-2 → country name (e.g. `US` → "United States"); `#`-commented. If a
|
|
19
|
+
* sibling is missing, the corresponding component is simply omitted (graceful degradation).
|
|
20
|
+
*
|
|
21
|
+
* Output: per place, up to two hierarchy variants (mirroring `wof-admin`'s with/without-country
|
|
22
|
+
* balance so the model sees both domestic and international order) —
|
|
23
|
+
*
|
|
24
|
+
* 1. `{ locality, region }` → "City, Region"
|
|
25
|
+
* 2. `{ locality, region, country }` → "City, Region, Country" `reconcileComponents` drops any
|
|
26
|
+
* component that didn't survive into the rendered `raw`.
|
|
27
|
+
*
|
|
28
|
+
* License: stamped `"CC-BY-4.0"` per row (GeoNames' terms); provenance is the `geonames-<id>` key.
|
|
29
|
+
*/
|
|
30
|
+
import type { CorpusAdapter } from "../../types.js";
|
|
31
|
+
export declare const GEONAMES_ADAPTER_ID = "geonames";
|
|
32
|
+
export declare const GEONAMES_DEFAULT_LICENSE = "CC-BY-4.0";
|
|
33
|
+
export declare function createGeonamesAdapter(): CorpusAdapter;
|
|
34
|
+
export declare const geonamesAdapter: CorpusAdapter;
|
|
35
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/geonames/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,mBAAmB,aAAa,CAAA;AAC7C,eAAO,MAAM,wBAAwB,cAAc,CAAA;AA2CnD,wBAAgB,qBAAqB,IAAI,aAAa,CA8ErD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `geonames`: GeoNames populated-places consumer (https://www.geonames.org/, CC-BY-4.0).
|
|
7
|
+
*
|
|
8
|
+
* GeoNames is a global gazetteer of ~12M features. This adapter ingests the POPULATED PLACES
|
|
9
|
+
* (`feature_class = "P"`, excluding historical/abandoned/destroyed variants) from a per-country
|
|
10
|
+
* dump file — global locality coverage, including the small towns and villages a coarser admin
|
|
11
|
+
* gazetteer (WOF) lacks. It's the cheapest path to broadening the corpus's LOCALE coverage.
|
|
12
|
+
*
|
|
13
|
+
* Input: a per-country tab-separated dump (e.g. `US.txt` from
|
|
14
|
+
* `https://download.geonames.org/export/dump/`, 19 columns, no header). Two sibling files in the
|
|
15
|
+
* same directory supply human-readable names (downloaded once from the same place):
|
|
16
|
+
*
|
|
17
|
+
* - `admin1CodesASCII.txt` — `<CC>.<admin1_code>` → region name (e.g. `US.VT` → "Vermont").
|
|
18
|
+
* - `countryInfo.txt` — ISO alpha-2 → country name (e.g. `US` → "United States"); `#`-commented. If a
|
|
19
|
+
* sibling is missing, the corresponding component is simply omitted (graceful degradation).
|
|
20
|
+
*
|
|
21
|
+
* Output: per place, up to two hierarchy variants (mirroring `wof-admin`'s with/without-country
|
|
22
|
+
* balance so the model sees both domestic and international order) —
|
|
23
|
+
*
|
|
24
|
+
* 1. `{ locality, region }` → "City, Region"
|
|
25
|
+
* 2. `{ locality, region, country }` → "City, Region, Country" `reconcileComponents` drops any
|
|
26
|
+
* component that didn't survive into the rendered `raw`.
|
|
27
|
+
*
|
|
28
|
+
* License: stamped `"CC-BY-4.0"` per row (GeoNames' terms); provenance is the `geonames-<id>` key.
|
|
29
|
+
*/
|
|
30
|
+
import { parse as csvParse } from "csv-parse";
|
|
31
|
+
import { createReadStream, existsSync, readFileSync } from "node:fs";
|
|
32
|
+
import { dirname, join } from "node:path";
|
|
33
|
+
import { stableSourceId } from "../../adapter.js";
|
|
34
|
+
import { reconcileComponents } from "../../format.js";
|
|
35
|
+
export const GEONAMES_ADAPTER_ID = "geonames";
|
|
36
|
+
export const GEONAMES_DEFAULT_LICENSE = "CC-BY-4.0";
|
|
37
|
+
// GeoNames main-table column indices (0-based; see the export README).
|
|
38
|
+
const COL = {
|
|
39
|
+
geonameid: 0,
|
|
40
|
+
name: 1,
|
|
41
|
+
alternatenames: 3,
|
|
42
|
+
featureClass: 6,
|
|
43
|
+
featureCode: 7,
|
|
44
|
+
country: 8,
|
|
45
|
+
admin1: 10,
|
|
46
|
+
};
|
|
47
|
+
// Populated-place feature codes that are NOT current real places — skip them.
|
|
48
|
+
const NON_CURRENT_PPL = new Set(["PPLH", "PPLQ", "PPLW", "PPLCH"]);
|
|
49
|
+
/** Load `admin1CodesASCII.txt` → Map("<CC>.<admin1>" → region name). Empty map if absent. */
|
|
50
|
+
function loadAdmin1(dir) {
|
|
51
|
+
const map = new Map();
|
|
52
|
+
const fp = join(dir, "admin1CodesASCII.txt");
|
|
53
|
+
if (!existsSync(fp))
|
|
54
|
+
return map;
|
|
55
|
+
for (const line of readFileSync(fp, "utf8").split("\n")) {
|
|
56
|
+
if (!line)
|
|
57
|
+
continue;
|
|
58
|
+
const cols = line.split("\t");
|
|
59
|
+
if (cols[0] && cols[1])
|
|
60
|
+
map.set(cols[0], cols[1]);
|
|
61
|
+
}
|
|
62
|
+
return map;
|
|
63
|
+
}
|
|
64
|
+
/** Load `countryInfo.txt` → Map(ISO → country name). Empty map if absent. The file is `#`-commented. */
|
|
65
|
+
function loadCountries(dir) {
|
|
66
|
+
const map = new Map();
|
|
67
|
+
const fp = join(dir, "countryInfo.txt");
|
|
68
|
+
if (!existsSync(fp))
|
|
69
|
+
return map;
|
|
70
|
+
for (const line of readFileSync(fp, "utf8").split("\n")) {
|
|
71
|
+
if (!line || line.startsWith("#"))
|
|
72
|
+
continue;
|
|
73
|
+
const cols = line.split("\t");
|
|
74
|
+
// ISO(0), ISO3(1), iso-numeric(2), fips(3), Country(4), ...
|
|
75
|
+
if (cols[0] && cols[4])
|
|
76
|
+
map.set(cols[0], cols[4]);
|
|
77
|
+
}
|
|
78
|
+
return map;
|
|
79
|
+
}
|
|
80
|
+
export function createGeonamesAdapter() {
|
|
81
|
+
return {
|
|
82
|
+
id: GEONAMES_ADAPTER_ID,
|
|
83
|
+
defaultLicense: GEONAMES_DEFAULT_LICENSE,
|
|
84
|
+
description: "GeoNames populated places (CC-BY-4.0) — global locality coverage incl. small towns, with region/country names from the sibling admin1/countryInfo files.",
|
|
85
|
+
async *rows(opts) {
|
|
86
|
+
const dir = dirname(opts.inputPath);
|
|
87
|
+
const admin1 = loadAdmin1(dir);
|
|
88
|
+
const countries = loadCountries(dir);
|
|
89
|
+
const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
|
|
90
|
+
const parser = stream.pipe(csvParse({ delimiter: "\t", quote: false, relax_column_count: true, skip_empty_lines: true }));
|
|
91
|
+
let emitted = 0;
|
|
92
|
+
try {
|
|
93
|
+
for await (const rec of parser) {
|
|
94
|
+
if (opts.signal?.aborted)
|
|
95
|
+
break;
|
|
96
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
97
|
+
break;
|
|
98
|
+
if (rec[COL.featureClass] !== "P")
|
|
99
|
+
continue;
|
|
100
|
+
if (NON_CURRENT_PPL.has(rec[COL.featureCode] ?? ""))
|
|
101
|
+
continue;
|
|
102
|
+
const cc = (rec[COL.country] ?? "").trim();
|
|
103
|
+
if (!cc)
|
|
104
|
+
continue;
|
|
105
|
+
if (opts.country && cc !== opts.country)
|
|
106
|
+
continue;
|
|
107
|
+
const locality = (rec[COL.name] ?? "").trim();
|
|
108
|
+
if (!locality)
|
|
109
|
+
continue;
|
|
110
|
+
const geonameid = (rec[COL.geonameid] ?? "").trim();
|
|
111
|
+
const region = admin1.get(`${cc}.${(rec[COL.admin1] ?? "").trim()}`);
|
|
112
|
+
const country = countries.get(cc);
|
|
113
|
+
// Two hierarchy variants (domestic + international order) — but only emit the
|
|
114
|
+
// distinct ones the available names support.
|
|
115
|
+
const variants = [];
|
|
116
|
+
if (region) {
|
|
117
|
+
variants.push({ slot: "lr", comp: { locality, region }, raw: `${locality}, ${region}` });
|
|
118
|
+
if (country) {
|
|
119
|
+
variants.push({
|
|
120
|
+
slot: "lrc",
|
|
121
|
+
comp: { locality, region, country },
|
|
122
|
+
raw: `${locality}, ${region}, ${country}`,
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
else if (country) {
|
|
127
|
+
variants.push({ slot: "lc", comp: { locality, country }, raw: `${locality}, ${country}` });
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
variants.push({ slot: "l", comp: { locality }, raw: locality });
|
|
131
|
+
}
|
|
132
|
+
for (const v of variants) {
|
|
133
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
134
|
+
break;
|
|
135
|
+
const aligned = reconcileComponents(v.comp, v.raw);
|
|
136
|
+
if (Object.keys(aligned).length === 0)
|
|
137
|
+
continue;
|
|
138
|
+
const sourceId = geonameid
|
|
139
|
+
? `${GEONAMES_ADAPTER_ID}-${geonameid}-${v.slot}`
|
|
140
|
+
: stableSourceId(GEONAMES_ADAPTER_ID, aligned);
|
|
141
|
+
yield {
|
|
142
|
+
raw: v.raw,
|
|
143
|
+
components: aligned,
|
|
144
|
+
country: cc,
|
|
145
|
+
source: GEONAMES_ADAPTER_ID,
|
|
146
|
+
source_id: sourceId,
|
|
147
|
+
corpus_version: "",
|
|
148
|
+
license: GEONAMES_DEFAULT_LICENSE,
|
|
149
|
+
};
|
|
150
|
+
emitted++;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
finally {
|
|
155
|
+
stream.destroy();
|
|
156
|
+
}
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
export const geonamesAdapter = createGeonamesAdapter();
|
|
161
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/geonames/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACpE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AACzC,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,mBAAmB,GAAG,UAAU,CAAA;AAC7C,MAAM,CAAC,MAAM,wBAAwB,GAAG,WAAW,CAAA;AAEnD,uEAAuE;AACvE,MAAM,GAAG,GAAG;IACX,SAAS,EAAE,CAAC;IACZ,IAAI,EAAE,CAAC;IACP,cAAc,EAAE,CAAC;IACjB,YAAY,EAAE,CAAC;IACf,WAAW,EAAE,CAAC;IACd,OAAO,EAAE,CAAC;IACV,MAAM,EAAE,EAAE;CACD,CAAA;AAEV,8EAA8E;AAC9E,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAA;AAElE,6FAA6F;AAC7F,SAAS,UAAU,CAAC,GAAW;IAC9B,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,sBAAsB,CAAC,CAAA;IAC5C,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAAE,OAAO,GAAG,CAAA;IAC/B,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACzD,IAAI,CAAC,IAAI;YAAE,SAAQ;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC7B,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAClD,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,wGAAwG;AACxG,SAAS,aAAa,CAAC,GAAW;IACjC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAA;IACrC,MAAM,EAAE,GAAG,IAAI,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAA;IACvC,IAAI,CAAC,UAAU,CAAC,EAAE,CAAC;QAAE,OAAO,GAAG,CAAA;IAC/B,KAAK,MAAM,IAAI,IAAI,YAAY,CAAC,EAAE,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACzD,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC;YAAE,SAAQ;QAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC7B,4DAA4D;QAC5D,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC;YAAE,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAClD,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED,MAAM,UAAU,qBAAqB;IACpC,OAAO;QACN,EAAE,EAAE,mBAAmB;QACvB,cAAc,EAAE,wBAAwB;QACxC,WAAW,EACV,0JAA0J;QAE3J,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;YACnC,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,CAAA;YAC9B,MAAM,SAAS,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;YAEpC,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC,CAC7F,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,MAAiC,EAAE,CAAC;oBAC3D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,IAAI,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,KAAK,GAAG;wBAAE,SAAQ;oBAC3C,IAAI,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC;wBAAE,SAAQ;oBAE7D,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC1C,IAAI,CAAC,EAAE;wBAAE,SAAQ;oBACjB,IAAI,IAAI,CAAC,OAAO,IAAI,EAAE,KAAK,IAAI,CAAC,OAAO;wBAAE,SAAQ;oBAEjD,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBACvB,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACnD,MAAM,MAAM,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAA;oBACpE,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;oBAEjC,8EAA8E;oBAC9E,6CAA6C;oBAC7C,MAAM,QAAQ,GAA2E,EAAE,CAAA;oBAC3F,IAAI,MAAM,EAAE,CAAC;wBACZ,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,GAAG,EAAE,GAAG,QAAQ,KAAK,MAAM,EAAE,EAAE,CAAC,CAAA;wBACxF,IAAI,OAAO,EAAE,CAAC;4BACb,QAAQ,CAAC,IAAI,CAAC;gCACb,IAAI,EAAE,KAAK;gCACX,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE;gCACnC,GAAG,EAAE,GAAG,QAAQ,KAAK,MAAM,KAAK,OAAO,EAAE;6BACzC,CAAC,CAAA;wBACH,CAAC;oBACF,CAAC;yBAAM,IAAI,OAAO,EAAE,CAAC;wBACpB,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE,GAAG,EAAE,GAAG,QAAQ,KAAK,OAAO,EAAE,EAAE,CAAC,CAAA;oBAC3F,CAAC;yBAAM,CAAC;wBACP,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,CAAA;oBAChE,CAAC;oBAED,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;wBAC1B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,MAAK;wBAC5D,MAAM,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAA;wBAClD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAC/C,MAAM,QAAQ,GAAG,SAAS;4BACzB,CAAC,CAAC,GAAG,mBAAmB,IAAI,SAAS,IAAI,CAAC,CAAC,IAAI,EAAE;4BACjD,CAAC,CAAC,cAAc,CAAC,mBAAmB,EAAE,OAAO,CAAC,CAAA;wBAC/C,MAAM;4BACL,GAAG,EAAE,CAAC,CAAC,GAAG;4BACV,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,EAAE;4BACX,MAAM,EAAE,mBAAmB;4BAC3B,SAAS,EAAE,QAAQ;4BACnB,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,wBAAwB;yBACjC,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `geonames-postal`: GeoNames postal-code dump consumer (https://www.geonames.org/, CC-BY-4.0).
|
|
7
|
+
*
|
|
8
|
+
* The GeoNames postal export (`https://download.geonames.org/export/zip/<CC>.zip`) is a clean,
|
|
9
|
+
* per-country `postcode → place → admin1` table with the place + region NAMES inline (no aux-file
|
|
10
|
+
* join needed). It broadens the corpus's postcode→locality→region coverage to ~80 countries, well
|
|
11
|
+
* beyond `wof-postalcode`/the coordinate-first table — forward coverage for the multi-locale
|
|
12
|
+
* goal.
|
|
13
|
+
*
|
|
14
|
+
* Input: a per-country postal dump (`<CC>.txt`, 12 tab-separated columns, no header): country,
|
|
15
|
+
* postcode, place, admin1_name, admin1_code, admin2__, admin3__, lat, lon, accuracy.
|
|
16
|
+
*
|
|
17
|
+
* Output: per row, postcode-FIRST (international) variants — the common order for the non-US
|
|
18
|
+
* locales this fills (US postcodes are already covered by TIGER/WOF, which use postcode-LAST):
|
|
19
|
+
*
|
|
20
|
+
* 1. `{ postcode, locality }` → "AD100 Canillo"
|
|
21
|
+
* 2. `{ postcode, locality, region }` → "AD100 Canillo, Canillo" Prefer configuring this adapter for
|
|
22
|
+
* non-US countries; for US, the postcode-last sources are the right order. License:
|
|
23
|
+
* `"CC-BY-4.0"` per row (attribute "GeoNames").
|
|
24
|
+
*/
|
|
25
|
+
import type { CorpusAdapter } from "../../types.js";
|
|
26
|
+
export declare const GEONAMES_POSTAL_ADAPTER_ID = "geonames-postal";
|
|
27
|
+
export declare const GEONAMES_POSTAL_DEFAULT_LICENSE = "CC-BY-4.0";
|
|
28
|
+
export declare function createGeonamesPostalAdapter(): CorpusAdapter;
|
|
29
|
+
export declare const geonamesPostalAdapter: CorpusAdapter;
|
|
30
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/geonames-postal/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,0BAA0B,oBAAoB,CAAA;AAC3D,eAAO,MAAM,+BAA+B,cAAc,CAAA;AAK1D,wBAAgB,2BAA2B,IAAI,aAAa,CA8D3D;AAED,eAAO,MAAM,qBAAqB,eAAgC,CAAA"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `geonames-postal`: GeoNames postal-code dump consumer (https://www.geonames.org/, CC-BY-4.0).
|
|
7
|
+
*
|
|
8
|
+
* The GeoNames postal export (`https://download.geonames.org/export/zip/<CC>.zip`) is a clean,
|
|
9
|
+
* per-country `postcode → place → admin1` table with the place + region NAMES inline (no aux-file
|
|
10
|
+
* join needed). It broadens the corpus's postcode→locality→region coverage to ~80 countries, well
|
|
11
|
+
* beyond `wof-postalcode`/the coordinate-first table — forward coverage for the multi-locale
|
|
12
|
+
* goal.
|
|
13
|
+
*
|
|
14
|
+
* Input: a per-country postal dump (`<CC>.txt`, 12 tab-separated columns, no header): country,
|
|
15
|
+
* postcode, place, admin1_name, admin1_code, admin2__, admin3__, lat, lon, accuracy.
|
|
16
|
+
*
|
|
17
|
+
* Output: per row, postcode-FIRST (international) variants — the common order for the non-US
|
|
18
|
+
* locales this fills (US postcodes are already covered by TIGER/WOF, which use postcode-LAST):
|
|
19
|
+
*
|
|
20
|
+
* 1. `{ postcode, locality }` → "AD100 Canillo"
|
|
21
|
+
* 2. `{ postcode, locality, region }` → "AD100 Canillo, Canillo" Prefer configuring this adapter for
|
|
22
|
+
* non-US countries; for US, the postcode-last sources are the right order. License:
|
|
23
|
+
* `"CC-BY-4.0"` per row (attribute "GeoNames").
|
|
24
|
+
*/
|
|
25
|
+
import { parse as csvParse } from "csv-parse";
|
|
26
|
+
import { createReadStream } from "node:fs";
|
|
27
|
+
import { stableSourceId } from "../../adapter.js";
|
|
28
|
+
import { reconcileComponents } from "../../format.js";
|
|
29
|
+
export const GEONAMES_POSTAL_ADAPTER_ID = "geonames-postal";
|
|
30
|
+
export const GEONAMES_POSTAL_DEFAULT_LICENSE = "CC-BY-4.0";
|
|
31
|
+
// GeoNames postal-dump columns (0-based).
|
|
32
|
+
const COL = { country: 0, postcode: 1, place: 2, admin1Name: 3 };
|
|
33
|
+
export function createGeonamesPostalAdapter() {
|
|
34
|
+
return {
|
|
35
|
+
id: GEONAMES_POSTAL_ADAPTER_ID,
|
|
36
|
+
defaultLicense: GEONAMES_POSTAL_DEFAULT_LICENSE,
|
|
37
|
+
description: "GeoNames postal codes (CC-BY-4.0) — multi-locale postcode→locality→region, names inline; international postcode-first order.",
|
|
38
|
+
async *rows(opts) {
|
|
39
|
+
const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
|
|
40
|
+
const parser = stream.pipe(csvParse({ delimiter: "\t", quote: false, relax_column_count: true, skip_empty_lines: true }));
|
|
41
|
+
let emitted = 0;
|
|
42
|
+
try {
|
|
43
|
+
for await (const rec of parser) {
|
|
44
|
+
if (opts.signal?.aborted)
|
|
45
|
+
break;
|
|
46
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
47
|
+
break;
|
|
48
|
+
const cc = (rec[COL.country] ?? "").trim();
|
|
49
|
+
if (!cc)
|
|
50
|
+
continue;
|
|
51
|
+
if (opts.country && cc !== opts.country)
|
|
52
|
+
continue;
|
|
53
|
+
const postcode = (rec[COL.postcode] ?? "").trim();
|
|
54
|
+
const locality = (rec[COL.place] ?? "").trim();
|
|
55
|
+
if (!postcode || !locality)
|
|
56
|
+
continue;
|
|
57
|
+
const region = (rec[COL.admin1Name] ?? "").trim();
|
|
58
|
+
// Postcode-first (international) variants. Skip the region variant when admin1 just
|
|
59
|
+
// repeats the place (common for city-states / micro-admin) to avoid "X X" noise.
|
|
60
|
+
const variants = [
|
|
61
|
+
{ slot: "pl", comp: { postcode, locality }, raw: `${postcode} ${locality}` },
|
|
62
|
+
];
|
|
63
|
+
if (region && region.toLowerCase() !== locality.toLowerCase()) {
|
|
64
|
+
variants.push({
|
|
65
|
+
slot: "plr",
|
|
66
|
+
comp: { postcode, locality, region },
|
|
67
|
+
raw: `${postcode} ${locality}, ${region}`,
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
for (const v of variants) {
|
|
71
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
72
|
+
break;
|
|
73
|
+
const aligned = reconcileComponents(v.comp, v.raw);
|
|
74
|
+
if (Object.keys(aligned).length < 2)
|
|
75
|
+
continue;
|
|
76
|
+
yield {
|
|
77
|
+
raw: v.raw,
|
|
78
|
+
components: aligned,
|
|
79
|
+
country: cc,
|
|
80
|
+
source: GEONAMES_POSTAL_ADAPTER_ID,
|
|
81
|
+
source_id: `${stableSourceId(GEONAMES_POSTAL_ADAPTER_ID, aligned)}-${v.slot}`,
|
|
82
|
+
corpus_version: "",
|
|
83
|
+
license: GEONAMES_POSTAL_DEFAULT_LICENSE,
|
|
84
|
+
};
|
|
85
|
+
emitted++;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
stream.destroy();
|
|
91
|
+
}
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
export const geonamesPostalAdapter = createGeonamesPostalAdapter();
|
|
96
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/geonames-postal/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,0BAA0B,GAAG,iBAAiB,CAAA;AAC3D,MAAM,CAAC,MAAM,+BAA+B,GAAG,WAAW,CAAA;AAE1D,0CAA0C;AAC1C,MAAM,GAAG,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAW,CAAA;AAEzE,MAAM,UAAU,2BAA2B;IAC1C,OAAO;QACN,EAAE,EAAE,0BAA0B;QAC9B,cAAc,EAAE,+BAA+B;QAC/C,WAAW,EACV,8HAA8H;QAE/H,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,CAAC,CAC7F,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,GAAG,IAAI,MAAiC,EAAE,CAAC;oBAC3D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC1C,IAAI,CAAC,EAAE;wBAAE,SAAQ;oBACjB,IAAI,IAAI,CAAC,OAAO,IAAI,EAAE,KAAK,IAAI,CAAC,OAAO;wBAAE,SAAQ;oBAEjD,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACjD,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC9C,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBACpC,MAAM,MAAM,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEjD,oFAAoF;oBACpF,iFAAiF;oBACjF,MAAM,QAAQ,GAA2E;wBACxF,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,EAAE,GAAG,EAAE,GAAG,QAAQ,IAAI,QAAQ,EAAE,EAAE;qBAC5E,CAAA;oBACD,IAAI,MAAM,IAAI,MAAM,CAAC,WAAW,EAAE,KAAK,QAAQ,CAAC,WAAW,EAAE,EAAE,CAAC;wBAC/D,QAAQ,CAAC,IAAI,CAAC;4BACb,IAAI,EAAE,KAAK;4BACX,IAAI,EAAE,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE;4BACpC,GAAG,EAAE,GAAG,QAAQ,IAAI,QAAQ,KAAK,MAAM,EAAE;yBACzC,CAAC,CAAA;oBACH,CAAC;oBAED,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;wBAC1B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,MAAK;wBAC5D,MAAM,OAAO,GAAG,mBAAmB,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAA;wBAClD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC;4BAAE,SAAQ;wBAC7C,MAAM;4BACL,GAAG,EAAE,CAAC,CAAC,GAAG;4BACV,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,EAAE;4BACX,MAAM,EAAE,0BAA0B;4BAClC,SAAS,EAAE,GAAG,cAAc,CAAC,0BAA0B,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;4BAC7E,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,+BAA+B;yBACxC,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,qBAAqB,GAAG,2BAA2B,EAAE,CAAA"}
|
|
@@ -33,6 +33,8 @@ import type { CorpusAdapter } from "../types.js";
|
|
|
33
33
|
export declare const BUILTIN_ADAPTERS: readonly CorpusAdapter[];
|
|
34
34
|
export { BAN_ADAPTER_ID, banAdapter } from "./ban/adapter.js";
|
|
35
35
|
export { FCC_BDC_ADAPTER_ID, FCC_BDC_DEFAULT_LICENSE, fccBdcAdapter } from "./fcc-bdc/adapter.js";
|
|
36
|
+
export { GEONAMES_POSTAL_ADAPTER_ID, GEONAMES_POSTAL_DEFAULT_LICENSE, geonamesPostalAdapter, } from "./geonames-postal/adapter.js";
|
|
37
|
+
export { GEONAMES_ADAPTER_ID, GEONAMES_DEFAULT_LICENSE, geonamesAdapter } from "./geonames/adapter.js";
|
|
36
38
|
export { OPENADDRESSES_ADAPTER_ID, OPENADDRESSES_DEFAULT_LICENSE, openaddressesAdapter, } from "./openaddresses/adapter.js";
|
|
37
39
|
export { STATE_HI_SCHOOLS_ADAPTER_ID, STATE_HI_SCHOOLS_DEFAULT_LICENSE, stateHiSchoolsAdapter, } from "./state-hi-schools/adapter.js";
|
|
38
40
|
export { STATE_IA_CONTRACTORS_ADAPTER_ID, STATE_IA_CONTRACTORS_DEFAULT_LICENSE, stateIaContractorsAdapter, } from "./state-ia-contractors/adapter.js";
|
|
@@ -41,6 +43,7 @@ export { STATE_TX_NOTARIES_ADAPTER_ID, STATE_TX_NOTARIES_DEFAULT_LICENSE, stateT
|
|
|
41
43
|
export { TIGER_ADAPTER_ID, TIGER_DEFAULT_LICENSE, tigerAdapter } from "./tiger/adapter.js";
|
|
42
44
|
export { USGOV_HRSA_FQHC_ADAPTER_ID, USGOV_HRSA_FQHC_DEFAULT_LICENSE, usgovHrsaFqhcAdapter, } from "./usgov-hrsa-fqhc/adapter.js";
|
|
43
45
|
export { USGOV_IMLS_PLS_ADAPTER_ID, USGOV_IMLS_PLS_DEFAULT_LICENSE, usgovImlsPlsAdapter, } from "./usgov-imls-pls/adapter.js";
|
|
46
|
+
export { USGOV_IRS_BMF_ADAPTER_ID, USGOV_IRS_BMF_DEFAULT_LICENSE, usgovIrsBmfAdapter } from "./usgov-irs-bmf/adapter.js";
|
|
44
47
|
export { USGOV_NAD_ADAPTER_ID, USGOV_NAD_DEFAULT_LICENSE, usgovNadAdapter } from "./usgov-nad/adapter.js";
|
|
45
48
|
export { USGOV_NPPES_ADAPTER_ID, USGOV_NPPES_DEFAULT_LICENSE, usgovNppesAdapter } from "./usgov-nppes/adapter.js";
|
|
46
49
|
export { USGOV_SAMHSA_ADAPTER_ID, USGOV_SAMHSA_DEFAULT_LICENSE, usgovSamhsaTreatmentLocatorAdapter, } from "./usgov-samhsa-treatment-locator/adapter.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/adapters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/adapters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAmBhD;;;;;;;;;;GAUG;AACH,eAAO,MAAM,gBAAgB,EAAE,SAAS,aAAa,EAkBpD,CAAA;AAQD,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7D,OAAO,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACjG,OAAO,EACN,0BAA0B,EAC1B,+BAA+B,EAC/B,qBAAqB,GACrB,MAAM,8BAA8B,CAAA;AACrC,OAAO,EAAE,mBAAmB,EAAE,wBAAwB,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACtG,OAAO,EACN,wBAAwB,EACxB,6BAA6B,EAC7B,oBAAoB,GACpB,MAAM,4BAA4B,CAAA;AACnC,OAAO,EACN,2BAA2B,EAC3B,gCAAgC,EAChC,qBAAqB,GACrB,MAAM,+BAA+B,CAAA;AACtC,OAAO,EACN,+BAA+B,EAC/B,oCAAoC,EACpC,yBAAyB,GACzB,MAAM,mCAAmC,CAAA;AAC1C,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAC1F,OAAO,EACN,0BAA0B,EAC1B,+BAA+B,EAC/B,oBAAoB,GACpB,MAAM,8BAA8B,CAAA;AACrC,OAAO,EACN,yBAAyB,EACzB,8BAA8B,EAC9B,mBAAmB,GACnB,MAAM,6BAA6B,CAAA;AACpC,OAAO,EAAE,wBAAwB,EAAE,6BAA6B,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AACxH,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AACzG,OAAO,EAAE,sBAAsB,EAAE,2BAA2B,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AACjH,OAAO,EACN,uBAAuB,EACvB,4BAA4B,EAC5B,kCAAkC,GAClC,MAAM,6CAA6C,CAAA;AACpD,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AACnF,OAAO,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA"}
|
|
@@ -21,6 +21,8 @@
|
|
|
21
21
|
import { defaultAdapterRegistry } from "../adapter.js";
|
|
22
22
|
import { banAdapter } from "./ban/adapter.js";
|
|
23
23
|
import { fccBdcAdapter } from "./fcc-bdc/adapter.js";
|
|
24
|
+
import { geonamesPostalAdapter } from "./geonames-postal/adapter.js";
|
|
25
|
+
import { geonamesAdapter } from "./geonames/adapter.js";
|
|
24
26
|
import { openaddressesAdapter } from "./openaddresses/adapter.js";
|
|
25
27
|
import { stateHiSchoolsAdapter } from "./state-hi-schools/adapter.js";
|
|
26
28
|
import { stateIaContractorsAdapter } from "./state-ia-contractors/adapter.js";
|
|
@@ -29,6 +31,7 @@ import { stateTxNotariesAdapter } from "./state-tx-notaries/adapter.js";
|
|
|
29
31
|
import { tigerAdapter } from "./tiger/adapter.js";
|
|
30
32
|
import { usgovHrsaFqhcAdapter } from "./usgov-hrsa-fqhc/adapter.js";
|
|
31
33
|
import { usgovImlsPlsAdapter } from "./usgov-imls-pls/adapter.js";
|
|
34
|
+
import { usgovIrsBmfAdapter } from "./usgov-irs-bmf/adapter.js";
|
|
32
35
|
import { usgovNadAdapter } from "./usgov-nad/adapter.js";
|
|
33
36
|
import { usgovNppesAdapter } from "./usgov-nppes/adapter.js";
|
|
34
37
|
import { wofAdminAdapter } from "./wof-admin-json/adapter.js";
|
|
@@ -47,6 +50,8 @@ import { wofPostalcodeAdapter } from "./wof-postalcode-json/adapter.js";
|
|
|
47
50
|
export const BUILTIN_ADAPTERS = [
|
|
48
51
|
wofAdminAdapter,
|
|
49
52
|
wofPostalcodeAdapter,
|
|
53
|
+
geonamesAdapter,
|
|
54
|
+
geonamesPostalAdapter,
|
|
50
55
|
banAdapter,
|
|
51
56
|
tigerAdapter,
|
|
52
57
|
openaddressesAdapter,
|
|
@@ -55,6 +60,7 @@ export const BUILTIN_ADAPTERS = [
|
|
|
55
60
|
usgovNppesAdapter,
|
|
56
61
|
usgovNadAdapter,
|
|
57
62
|
usgovImlsPlsAdapter,
|
|
63
|
+
usgovIrsBmfAdapter,
|
|
58
64
|
stateIaContractorsAdapter,
|
|
59
65
|
stateTxNotariesAdapter,
|
|
60
66
|
stateNyNotariesAdapter,
|
|
@@ -67,6 +73,8 @@ for (const adapter of BUILTIN_ADAPTERS) {
|
|
|
67
73
|
}
|
|
68
74
|
export { BAN_ADAPTER_ID, banAdapter } from "./ban/adapter.js";
|
|
69
75
|
export { FCC_BDC_ADAPTER_ID, FCC_BDC_DEFAULT_LICENSE, fccBdcAdapter } from "./fcc-bdc/adapter.js";
|
|
76
|
+
export { GEONAMES_POSTAL_ADAPTER_ID, GEONAMES_POSTAL_DEFAULT_LICENSE, geonamesPostalAdapter, } from "./geonames-postal/adapter.js";
|
|
77
|
+
export { GEONAMES_ADAPTER_ID, GEONAMES_DEFAULT_LICENSE, geonamesAdapter } from "./geonames/adapter.js";
|
|
70
78
|
export { OPENADDRESSES_ADAPTER_ID, OPENADDRESSES_DEFAULT_LICENSE, openaddressesAdapter, } from "./openaddresses/adapter.js";
|
|
71
79
|
export { STATE_HI_SCHOOLS_ADAPTER_ID, STATE_HI_SCHOOLS_DEFAULT_LICENSE, stateHiSchoolsAdapter, } from "./state-hi-schools/adapter.js";
|
|
72
80
|
export { STATE_IA_CONTRACTORS_ADAPTER_ID, STATE_IA_CONTRACTORS_DEFAULT_LICENSE, stateIaContractorsAdapter, } from "./state-ia-contractors/adapter.js";
|
|
@@ -75,6 +83,7 @@ export { STATE_TX_NOTARIES_ADAPTER_ID, STATE_TX_NOTARIES_DEFAULT_LICENSE, stateT
|
|
|
75
83
|
export { TIGER_ADAPTER_ID, TIGER_DEFAULT_LICENSE, tigerAdapter } from "./tiger/adapter.js";
|
|
76
84
|
export { USGOV_HRSA_FQHC_ADAPTER_ID, USGOV_HRSA_FQHC_DEFAULT_LICENSE, usgovHrsaFqhcAdapter, } from "./usgov-hrsa-fqhc/adapter.js";
|
|
77
85
|
export { USGOV_IMLS_PLS_ADAPTER_ID, USGOV_IMLS_PLS_DEFAULT_LICENSE, usgovImlsPlsAdapter, } from "./usgov-imls-pls/adapter.js";
|
|
86
|
+
export { USGOV_IRS_BMF_ADAPTER_ID, USGOV_IRS_BMF_DEFAULT_LICENSE, usgovIrsBmfAdapter } from "./usgov-irs-bmf/adapter.js";
|
|
78
87
|
export { USGOV_NAD_ADAPTER_ID, USGOV_NAD_DEFAULT_LICENSE, usgovNadAdapter } from "./usgov-nad/adapter.js";
|
|
79
88
|
export { USGOV_NPPES_ADAPTER_ID, USGOV_NPPES_DEFAULT_LICENSE, usgovNppesAdapter } from "./usgov-nppes/adapter.js";
|
|
80
89
|
export { USGOV_SAMHSA_ADAPTER_ID, USGOV_SAMHSA_DEFAULT_LICENSE, usgovSamhsaTreatmentLocatorAdapter, } from "./usgov-samhsa-treatment-locator/adapter.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/adapters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAA;AAEtD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACpD,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAA;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAA;AACrE,OAAO,EAAE,yBAAyB,EAAE,MAAM,mCAAmC,CAAA;AAC7E,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAA;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAA;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAA;AACnE,OAAO,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAA;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AACxD,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA;AAEvE;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAA6B;IACzD,eAAe;IACf,oBAAoB;IACpB,UAAU;IACV,YAAY;IACZ,oBAAoB;IACpB,aAAa;IACb,oBAAoB;IACpB,iBAAiB;IACjB,eAAe;IACf,mBAAmB;IACnB,yBAAyB;IACzB,sBAAsB;IACtB,sBAAsB;IACtB,qBAAqB;CACrB,CAAA;AAED,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;IACxC,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;QAC7C,sBAAsB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAA;IACzC,CAAC;AACF,CAAC;AAED,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7D,OAAO,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACjG,OAAO,EACN,wBAAwB,EACxB,6BAA6B,EAC7B,oBAAoB,GACpB,MAAM,4BAA4B,CAAA;AACnC,OAAO,EACN,2BAA2B,EAC3B,gCAAgC,EAChC,qBAAqB,GACrB,MAAM,+BAA+B,CAAA;AACtC,OAAO,EACN,+BAA+B,EAC/B,oCAAoC,EACpC,yBAAyB,GACzB,MAAM,mCAAmC,CAAA;AAC1C,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAC1F,OAAO,EACN,0BAA0B,EAC1B,+BAA+B,EAC/B,oBAAoB,GACpB,MAAM,8BAA8B,CAAA;AACrC,OAAO,EACN,yBAAyB,EACzB,8BAA8B,EAC9B,mBAAmB,GACnB,MAAM,6BAA6B,CAAA;AACpC,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AACzG,OAAO,EAAE,sBAAsB,EAAE,2BAA2B,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AACjH,OAAO,EACN,uBAAuB,EACvB,4BAA4B,EAC5B,kCAAkC,GAClC,MAAM,6CAA6C,CAAA;AACpD,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AACnF,OAAO,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/adapters/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,eAAe,CAAA;AAEtD,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACpD,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAA;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACvD,OAAO,EAAE,oBAAoB,EAAE,MAAM,4BAA4B,CAAA;AACjE,OAAO,EAAE,qBAAqB,EAAE,MAAM,+BAA+B,CAAA;AACrE,OAAO,EAAE,yBAAyB,EAAE,MAAM,mCAAmC,CAAA;AAC7E,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAA;AACvE,OAAO,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAA;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAA;AACnE,OAAO,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAA;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AAC/D,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AACxD,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AAC7D,OAAO,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA;AAEvE;;;;;;;;;;GAUG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAA6B;IACzD,eAAe;IACf,oBAAoB;IACpB,eAAe;IACf,qBAAqB;IACrB,UAAU;IACV,YAAY;IACZ,oBAAoB;IACpB,aAAa;IACb,oBAAoB;IACpB,iBAAiB;IACjB,eAAe;IACf,mBAAmB;IACnB,kBAAkB;IAClB,yBAAyB;IACzB,sBAAsB;IACtB,sBAAsB;IACtB,qBAAqB;CACrB,CAAA;AAED,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;IACxC,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC;QAC7C,sBAAsB,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAA;IACzC,CAAC;AACF,CAAC;AAED,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7D,OAAO,EAAE,kBAAkB,EAAE,uBAAuB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACjG,OAAO,EACN,0BAA0B,EAC1B,+BAA+B,EAC/B,qBAAqB,GACrB,MAAM,8BAA8B,CAAA;AACrC,OAAO,EAAE,mBAAmB,EAAE,wBAAwB,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AACtG,OAAO,EACN,wBAAwB,EACxB,6BAA6B,EAC7B,oBAAoB,GACpB,MAAM,4BAA4B,CAAA;AACnC,OAAO,EACN,2BAA2B,EAC3B,gCAAgC,EAChC,qBAAqB,GACrB,MAAM,+BAA+B,CAAA;AACtC,OAAO,EACN,+BAA+B,EAC/B,oCAAoC,EACpC,yBAAyB,GACzB,MAAM,mCAAmC,CAAA;AAC1C,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EACN,4BAA4B,EAC5B,iCAAiC,EACjC,sBAAsB,GACtB,MAAM,gCAAgC,CAAA;AACvC,OAAO,EAAE,gBAAgB,EAAE,qBAAqB,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAC1F,OAAO,EACN,0BAA0B,EAC1B,+BAA+B,EAC/B,oBAAoB,GACpB,MAAM,8BAA8B,CAAA;AACrC,OAAO,EACN,yBAAyB,EACzB,8BAA8B,EAC9B,mBAAmB,GACnB,MAAM,6BAA6B,CAAA;AACpC,OAAO,EAAE,wBAAwB,EAAE,6BAA6B,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AACxH,OAAO,EAAE,oBAAoB,EAAE,yBAAyB,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AACzG,OAAO,EAAE,sBAAsB,EAAE,2BAA2B,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAA;AACjH,OAAO,EACN,uBAAuB,EACvB,4BAA4B,EAC5B,kCAAkC,GAClC,MAAM,6CAA6C,CAAA;AACpD,OAAO,EAAE,oBAAoB,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAA;AACnF,OAAO,EAAE,yBAAyB,EAAE,oBAAoB,EAAE,MAAM,kCAAkC,CAAA"}
|