@mailwoman/corpus 3.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/src/adapters/ban/adapter.d.ts.map +1 -1
- package/out/src/adapters/ban/adapter.js +6 -2
- package/out/src/adapters/ban/adapter.js.map +1 -1
- package/out/src/adapters/ban/street-decompose.d.ts +28 -0
- package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/ban/street-decompose.js +78 -0
- package/out/src/adapters/ban/street-decompose.js.map +1 -0
- package/out/src/adapters/geonames/adapter.d.ts +35 -0
- package/out/src/adapters/geonames/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames/adapter.js +161 -0
- package/out/src/adapters/geonames/adapter.js.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts +30 -0
- package/out/src/adapters/geonames-postal/adapter.d.ts.map +1 -0
- package/out/src/adapters/geonames-postal/adapter.js +96 -0
- package/out/src/adapters/geonames-postal/adapter.js.map +1 -0
- package/out/src/adapters/index.d.ts +3 -0
- package/out/src/adapters/index.d.ts.map +1 -1
- package/out/src/adapters/index.js +9 -0
- package/out/src/adapters/index.js.map +1 -1
- package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
- package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
- package/out/src/adapters/synth-po-box/adapter.js +101 -0
- package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
- package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
- package/out/src/adapters/tiger/adapter.js +9 -3
- package/out/src/adapters/tiger/adapter.js.map +1 -1
- package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
- package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
- package/out/src/adapters/tiger/street-decompose.js +99 -0
- package/out/src/adapters/tiger/street-decompose.js.map +1 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts +26 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.d.ts.map +1 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js +115 -0
- package/out/src/adapters/usgov-irs-bmf/adapter.js.map +1 -0
- package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
- package/out/src/adapters/usgov-nad/adapter.js +31 -10
- package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
- package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
- package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
- package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
- package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
- package/out/src/index.d.ts +6 -0
- package/out/src/index.d.ts.map +1 -1
- package/out/src/index.js +6 -0
- package/out/src/index.js.map +1 -1
- package/out/src/synthesize-german.d.ts +77 -0
- package/out/src/synthesize-german.d.ts.map +1 -0
- package/out/src/synthesize-german.js +117 -0
- package/out/src/synthesize-german.js.map +1 -0
- package/out/src/synthesize-house-venue.d.ts +57 -0
- package/out/src/synthesize-house-venue.d.ts.map +1 -0
- package/out/src/synthesize-house-venue.js +147 -0
- package/out/src/synthesize-house-venue.js.map +1 -0
- package/out/src/synthesize-intersection.d.ts +46 -0
- package/out/src/synthesize-intersection.d.ts.map +1 -0
- package/out/src/synthesize-intersection.js +152 -0
- package/out/src/synthesize-intersection.js.map +1 -0
- package/out/src/synthesize-no-street.d.ts +70 -0
- package/out/src/synthesize-no-street.d.ts.map +1 -0
- package/out/src/synthesize-no-street.js +279 -0
- package/out/src/synthesize-no-street.js.map +1 -0
- package/out/src/synthesize-po-box.d.ts +75 -0
- package/out/src/synthesize-po-box.d.ts.map +1 -0
- package/out/src/synthesize-po-box.js +186 -0
- package/out/src/synthesize-po-box.js.map +1 -0
- package/out/src/synthesize-street.d.ts +53 -0
- package/out/src/synthesize-street.d.ts.map +1 -0
- package/out/src/synthesize-street.js +212 -0
- package/out/src/synthesize-street.js.map +1 -0
- package/out/src/synthesize.d.ts +19 -0
- package/out/src/synthesize.d.ts.map +1 -1
- package/out/src/synthesize.js +65 -1
- package/out/src/synthesize.js.map +1 -1
- package/package.json +8 -7
- package/out/src/codex/us-street-suffix.d.ts +0 -260
- package/out/src/codex/us-street-suffix.d.ts.map +0 -1
- package/out/src/codex/us-street-suffix.js +0 -286
- package/out/src/codex/us-street-suffix.js.map +0 -1
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
|
|
7
|
+
*
|
|
8
|
+
* Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
|
|
9
|
+
* from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
|
|
10
|
+
* `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
|
|
11
|
+
*
|
|
12
|
+
* Why an adapter and not an augmenter:
|
|
13
|
+
*
|
|
14
|
+
* - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
|
|
15
|
+
* Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
|
|
16
|
+
* The clean shape is: read just (locality, region, postcode, country) and produce a fresh
|
|
17
|
+
* PO-box-shaped row.
|
|
18
|
+
* - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
|
|
19
|
+
* ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
|
|
20
|
+
* carries a `street` field.
|
|
21
|
+
*/
|
|
22
|
+
import { type PoBoxBaseTuple } from "../../synthesize-po-box.js";
|
|
23
|
+
import type { CorpusAdapter } from "../../types.js";
|
|
24
|
+
export declare const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
|
|
25
|
+
export declare const SYNTH_PO_BOX_LICENSE = "Synthetic \u2014 derived from CC-BY / public-domain input tuples";
|
|
26
|
+
export interface PoBoxInputRow extends PoBoxBaseTuple {
|
|
27
|
+
street?: string;
|
|
28
|
+
houseNumber?: string;
|
|
29
|
+
}
|
|
30
|
+
export interface SynthPoBoxAdapterOptions {
|
|
31
|
+
/**
|
|
32
|
+
* How many PO box variants to emit per input tuple. Each variant picks a different leader (and
|
|
33
|
+
* possibly a different number / noise level). Default 1.
|
|
34
|
+
*/
|
|
35
|
+
variantsPerInput?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Probability (0..1) of emitting a PMB-with-street variant when both the input has a street and
|
|
38
|
+
* the locale supports PMB. Default 0.15.
|
|
39
|
+
*/
|
|
40
|
+
pmbRatio?: number;
|
|
41
|
+
/**
|
|
42
|
+
* Deterministic seed for reproducible synthesis. Default Date.now().
|
|
43
|
+
*/
|
|
44
|
+
seed?: number;
|
|
45
|
+
}
|
|
46
|
+
export declare function createSynthPoBoxAdapter(opts?: SynthPoBoxAdapterOptions): CorpusAdapter;
|
|
47
|
+
export declare const synthPoBoxAdapter: CorpusAdapter;
|
|
48
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAKH,OAAO,EAAsB,KAAK,cAAc,EAAE,MAAM,4BAA4B,CAAA;AACpF,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,iBAAiB,CAAA;AACrD,eAAO,MAAM,oBAAoB,qEAAgE,CAAA;AAEjG,MAAM,WAAW,aAAc,SAAQ,cAAc;IACpD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,wBAAwB;IACxC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAUD,wBAAgB,uBAAuB,CAAC,IAAI,GAAE,wBAA6B,GAAG,aAAa,CAuE1F;AAED,eAAO,MAAM,iBAAiB,eAA4B,CAAA"}
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
|
|
7
|
+
*
|
|
8
|
+
* Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
|
|
9
|
+
* from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
|
|
10
|
+
* `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
|
|
11
|
+
*
|
|
12
|
+
* Why an adapter and not an augmenter:
|
|
13
|
+
*
|
|
14
|
+
* - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
|
|
15
|
+
* Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
|
|
16
|
+
* The clean shape is: read just (locality, region, postcode, country) and produce a fresh
|
|
17
|
+
* PO-box-shaped row.
|
|
18
|
+
* - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
|
|
19
|
+
* ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
|
|
20
|
+
* carries a `street` field.
|
|
21
|
+
*/
|
|
22
|
+
import { createReadStream } from "node:fs";
|
|
23
|
+
import { createInterface } from "node:readline";
|
|
24
|
+
import { stableSourceId } from "../../adapter.js";
|
|
25
|
+
import { synthesizePoBoxRow } from "../../synthesize-po-box.js";
|
|
26
|
+
export const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
|
|
27
|
+
export const SYNTH_PO_BOX_LICENSE = "Synthetic — derived from CC-BY / public-domain input tuples";
|
|
28
|
+
function makeRandom(seed) {
|
|
29
|
+
let s = seed;
|
|
30
|
+
return () => {
|
|
31
|
+
s = (s * 1664525 + 1013904223) % 4294967296;
|
|
32
|
+
return s / 4294967296;
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
export function createSynthPoBoxAdapter(opts = {}) {
|
|
36
|
+
const variantsPerInput = opts.variantsPerInput ?? 1;
|
|
37
|
+
const pmbRatio = opts.pmbRatio ?? 0.15;
|
|
38
|
+
return {
|
|
39
|
+
id: SYNTH_PO_BOX_ADAPTER_ID,
|
|
40
|
+
defaultLicense: SYNTH_PO_BOX_LICENSE,
|
|
41
|
+
description: "Synthetic PO box / PMB / Apartado / Boîte Postale rows. Consumes JSONL of (locality, region, postcode, country) tuples and emits locale-appropriate PO box variants.",
|
|
42
|
+
async *rows(options) {
|
|
43
|
+
const random = makeRandom(opts.seed ?? Date.now());
|
|
44
|
+
const stream = createReadStream(options.inputPath, { encoding: "utf8" });
|
|
45
|
+
const rl = createInterface({ input: stream, crlfDelay: Infinity });
|
|
46
|
+
let emitted = 0;
|
|
47
|
+
let skipped = 0;
|
|
48
|
+
for await (const line of rl) {
|
|
49
|
+
if (options.signal?.aborted)
|
|
50
|
+
break;
|
|
51
|
+
if (options.limit !== undefined && emitted >= options.limit)
|
|
52
|
+
break;
|
|
53
|
+
const trimmed = line.trim();
|
|
54
|
+
if (!trimmed)
|
|
55
|
+
continue;
|
|
56
|
+
let input;
|
|
57
|
+
try {
|
|
58
|
+
input = JSON.parse(trimmed);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
skipped++;
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
if (!input.locality || !input.region || !input.postcode || !input.country) {
|
|
65
|
+
skipped++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (options.country && options.country !== input.country)
|
|
69
|
+
continue;
|
|
70
|
+
for (let v = 0; v < variantsPerInput; v++) {
|
|
71
|
+
const synth = synthesizePoBoxRow(input, { random, pmbRatio });
|
|
72
|
+
if (!synth)
|
|
73
|
+
continue;
|
|
74
|
+
// Include `v` in dependent_locality slot to vary the digest across variants;
|
|
75
|
+
// stableSourceId only accepts ComponentTag keys.
|
|
76
|
+
const sourceId = stableSourceId(SYNTH_PO_BOX_ADAPTER_ID, {
|
|
77
|
+
locality: `${input.locality}#${v}`,
|
|
78
|
+
region: input.region,
|
|
79
|
+
postcode: input.postcode,
|
|
80
|
+
country: input.country,
|
|
81
|
+
});
|
|
82
|
+
yield {
|
|
83
|
+
raw: synth.raw,
|
|
84
|
+
components: synth.components,
|
|
85
|
+
country: input.country,
|
|
86
|
+
locale: synth.locale,
|
|
87
|
+
source: SYNTH_PO_BOX_ADAPTER_ID,
|
|
88
|
+
source_id: sourceId,
|
|
89
|
+
corpus_version: "",
|
|
90
|
+
license: SYNTH_PO_BOX_LICENSE,
|
|
91
|
+
};
|
|
92
|
+
emitted++;
|
|
93
|
+
if (options.limit !== undefined && emitted >= options.limit)
|
|
94
|
+
break;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
export const synthPoBoxAdapter = createSynthPoBoxAdapter();
|
|
101
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,kBAAkB,EAAuB,MAAM,4BAA4B,CAAA;AAGpF,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAA;AACrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,6DAA6D,CAAA;AAwBjG,SAAS,UAAU,CAAC,IAAY;IAC/B,IAAI,CAAC,GAAG,IAAI,CAAA;IACZ,OAAO,GAAG,EAAE;QACX,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAA;QAC3C,OAAO,CAAC,GAAG,UAAU,CAAA;IACtB,CAAC,CAAA;AACF,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAiC,EAAE;IAC1E,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAA;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAA;IAEtC,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,oBAAoB;QACpC,WAAW,EACV,sKAAsK;QAEvK,KAAK,CAAC,CAAC,IAAI,CAAC,OAAuB;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;YAElD,MAAM,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACxE,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,OAAO,GAAG,CAAC,CAAA;YAEf,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;gBAC7B,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAClC,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;oBAAE,MAAK;gBAElE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO;oBAAE,SAAQ;gBAEtB,IAAI,KAAoB,CAAA;gBACxB,IAAI,CAAC;oBACJ,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAA;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;oBAC3E,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK,CAAC,OAAO;oBAAE,SAAQ;gBAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC3C,MAAM,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;oBAC7D,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,6EAA6E;oBAC7E,iDAAiD;oBACjD,MAAM,QAAQ,GAAG,cAAc,CAAC,uBAAuB,EAAE;wBACxD,QAAQ,EAAE,GAAG,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE;wBAClC,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,OAAO,EAAE,KAAK,CAAC,OAAO;qBACtB,CAAC,CAAA;oBAEF,MAAM;wBACL,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,oBAAoB;qBAC7B,CAAA;oBACD,OAAO,EAAE,CAAA;oBAET,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;wBAAE,MAAK;gBACnE,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AA0FpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
import { DatabaseSync } from "node:sqlite";
|
|
40
40
|
import { lookupFipsState } from "../../codex/us-fips-state.js";
|
|
41
41
|
import { formatAddress, reconcileComponents } from "../../format.js";
|
|
42
|
+
import { decomposeStreet } from "./street-decompose.js";
|
|
42
43
|
export const TIGER_ADAPTER_ID = "tiger";
|
|
43
44
|
export const TIGER_DEFAULT_LICENSE = "Public Domain";
|
|
44
45
|
/**
|
|
@@ -54,18 +55,23 @@ const US_COUNTRY_DISPLAY = "United States of America";
|
|
|
54
55
|
* - `zipl !== zipr` → two rows (one per side's ZIP).
|
|
55
56
|
*/
|
|
56
57
|
function* streetVariants(row) {
|
|
57
|
-
const
|
|
58
|
-
if (!
|
|
58
|
+
const fullname = row.fullname.trim();
|
|
59
|
+
if (!fullname)
|
|
59
60
|
return;
|
|
60
61
|
const state = lookupFipsState(row.statefp);
|
|
61
62
|
if (!state)
|
|
62
63
|
return;
|
|
63
64
|
const zipl = row.zipl?.trim() ?? "";
|
|
64
65
|
const zipr = row.zipr?.trim() ?? "";
|
|
66
|
+
const decomposed = decomposeStreet(fullname);
|
|
65
67
|
const baseComponents = {
|
|
66
|
-
street,
|
|
67
68
|
region: state.abbreviation,
|
|
69
|
+
street: decomposed.street,
|
|
68
70
|
};
|
|
71
|
+
if (decomposed.prefix)
|
|
72
|
+
baseComponents.street_prefix = decomposed.prefix;
|
|
73
|
+
if (decomposed.suffix)
|
|
74
|
+
baseComponents.street_suffix = decomposed.suffix;
|
|
69
75
|
if (!zipl && !zipr) {
|
|
70
76
|
yield { components: baseComponents, variantKey: "no-zip" };
|
|
71
77
|
return;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AAEvD,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IACpC,IAAI,CAAC,QAAQ;QAAE,OAAM;IACrB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,UAAU,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAA;IAE5C,MAAM,cAAc,GAA+B;QAClD,MAAM,EAAE,KAAK,CAAC,YAAY;QAC1B,MAAM,EAAE,UAAU,CAAC,MAAM;KACzB,CAAA;IACD,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IACvE,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IAEvE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
|
|
7
|
+
*
|
|
8
|
+
* Sources directionals and street types from the curated libpostal/en dictionaries
|
|
9
|
+
* (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
|
|
10
|
+
* dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
|
|
11
|
+
* corpus labels and runtime classifications agree on the vocabulary.
|
|
12
|
+
*
|
|
13
|
+
* Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
|
|
14
|
+
* { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
|
|
15
|
+
* street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
|
|
16
|
+
* suffix: "Blvd" }
|
|
17
|
+
*/
|
|
18
|
+
export interface DecomposedStreet {
|
|
19
|
+
prefix: string | null;
|
|
20
|
+
street: string;
|
|
21
|
+
suffix: string | null;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Decompose a US street name into prefix/name/suffix components.
|
|
25
|
+
*
|
|
26
|
+
* Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
|
|
27
|
+
* Returns the original as `street` if nothing matches.
|
|
28
|
+
*/
|
|
29
|
+
export declare function decomposeStreet(fullname: string): DecomposedStreet;
|
|
30
|
+
//# sourceMappingURL=street-decompose.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAuCH,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB,CA0ClE"}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
|
|
7
|
+
*
|
|
8
|
+
* Sources directionals and street types from the curated libpostal/en dictionaries
|
|
9
|
+
* (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
|
|
10
|
+
* dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
|
|
11
|
+
* corpus labels and runtime classifications agree on the vocabulary.
|
|
12
|
+
*
|
|
13
|
+
* Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
|
|
14
|
+
* { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
|
|
15
|
+
* street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
|
|
16
|
+
* suffix: "Blvd" }
|
|
17
|
+
*/
|
|
18
|
+
import { readFileSync } from "node:fs";
|
|
19
|
+
import { dirname, resolve } from "node:path";
|
|
20
|
+
import { fileURLToPath } from "node:url";
|
|
21
|
+
const moduleDir = dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
function loadDictionary(filename) {
|
|
23
|
+
// Resolve via the @mailwoman/core data directory.
|
|
24
|
+
const candidates = [
|
|
25
|
+
resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/en", filename),
|
|
26
|
+
resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/en", filename),
|
|
27
|
+
resolve(process.cwd(), "core/data/libpostal/dictionaries/en", filename),
|
|
28
|
+
];
|
|
29
|
+
for (const path of candidates) {
|
|
30
|
+
try {
|
|
31
|
+
const text = readFileSync(path, "utf8");
|
|
32
|
+
const set = new Set();
|
|
33
|
+
for (const line of text.split("\n")) {
|
|
34
|
+
const trimmed = line.trim();
|
|
35
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
36
|
+
continue;
|
|
37
|
+
// libpostal format: canonical|abbr|abbr|... — index all forms
|
|
38
|
+
for (const form of trimmed.split("|")) {
|
|
39
|
+
const f = form.trim().toLowerCase();
|
|
40
|
+
if (f)
|
|
41
|
+
set.add(f);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return set;
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
// try next candidate
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
throw new Error(`Could not load libpostal dictionary: ${filename}`);
|
|
51
|
+
}
|
|
52
|
+
const DIRECTIONALS = loadDictionary("directionals.txt");
|
|
53
|
+
const STREET_TYPES = loadDictionary("street_types.txt");
|
|
54
|
+
/**
|
|
55
|
+
* Decompose a US street name into prefix/name/suffix components.
|
|
56
|
+
*
|
|
57
|
+
* Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
|
|
58
|
+
* Returns the original as `street` if nothing matches.
|
|
59
|
+
*/
|
|
60
|
+
export function decomposeStreet(fullname) {
|
|
61
|
+
const trimmed = fullname.trim();
|
|
62
|
+
if (!trimmed)
|
|
63
|
+
return { prefix: null, street: "", suffix: null };
|
|
64
|
+
const tokens = trimmed.split(/\s+/);
|
|
65
|
+
if (tokens.length === 1)
|
|
66
|
+
return { prefix: null, street: trimmed, suffix: null };
|
|
67
|
+
const norm = (s) => s.toLowerCase().replace(/\.$/, "");
|
|
68
|
+
let prefix = null;
|
|
69
|
+
let suffix = null;
|
|
70
|
+
let startIdx = 0;
|
|
71
|
+
let endIdx = tokens.length;
|
|
72
|
+
// Leading directional prefix
|
|
73
|
+
if (DIRECTIONALS.has(norm(tokens[0])) && tokens.length >= 2) {
|
|
74
|
+
prefix = tokens[0];
|
|
75
|
+
startIdx = 1;
|
|
76
|
+
}
|
|
77
|
+
// Trailing post-directional combined with street type (e.g. "Pennsylvania Ave NW")
|
|
78
|
+
const last = norm(tokens[endIdx - 1]);
|
|
79
|
+
const secondLast = endIdx >= 2 ? norm(tokens[endIdx - 2]) : "";
|
|
80
|
+
if (DIRECTIONALS.has(last) && STREET_TYPES.has(secondLast)) {
|
|
81
|
+
suffix = tokens.slice(endIdx - 2, endIdx).join(" ");
|
|
82
|
+
endIdx -= 2;
|
|
83
|
+
}
|
|
84
|
+
else if (STREET_TYPES.has(last) && endIdx - startIdx >= 2) {
|
|
85
|
+
suffix = tokens[endIdx - 1];
|
|
86
|
+
endIdx -= 1;
|
|
87
|
+
}
|
|
88
|
+
else if (DIRECTIONALS.has(last) && endIdx - startIdx >= 2) {
|
|
89
|
+
// Post-directional without type
|
|
90
|
+
suffix = tokens[endIdx - 1];
|
|
91
|
+
endIdx -= 1;
|
|
92
|
+
}
|
|
93
|
+
const street = tokens.slice(startIdx, endIdx).join(" ").trim();
|
|
94
|
+
if (!street) {
|
|
95
|
+
return { prefix: null, street: trimmed, suffix: null };
|
|
96
|
+
}
|
|
97
|
+
return { prefix, street, suffix };
|
|
98
|
+
}
|
|
99
|
+
//# sourceMappingURL=street-decompose.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,kDAAkD;IAClD,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,8DAA8D;gBAC9D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,QAAQ,EAAE,CAAC,CAAA;AACpE,CAAC;AAED,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AACvD,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAQvD;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/E,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IAE9D,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,QAAQ,GAAG,CAAC,CAAA;IAChB,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAE1B,6BAA6B;IAC7B,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACnB,QAAQ,GAAG,CAAC,CAAA;IACb,CAAC;IAED,mFAAmF;IACnF,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAA;IACtC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAE/D,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5D,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACnD,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,gCAAgC;QAChC,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IAC9D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACvD,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;AAClC,CAAC"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `usgov-irs-bmf`: IRS Exempt Organizations Business Master File (EO BMF) CSV consumer.
|
|
7
|
+
*
|
|
8
|
+
* The EO BMF is the IRS's authoritative registry of US tax-exempt organizations (charities,
|
|
9
|
+
* churches, foundations, ...), published as per-region CSVs at
|
|
10
|
+
* `https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf`
|
|
11
|
+
* (`eo1.csv`..`eo4.csv`, `eo_pr.csv`, `eo_xx.csv`). Each row carries an organization NAME plus
|
|
12
|
+
* its mailing address. It complements `usgov-nppes` with a DIFFERENT venue population
|
|
13
|
+
* (non-profits vs healthcare providers) and, notably, a high share of PO-box addresses — useful
|
|
14
|
+
* `po_box`-tag signal (a tag with historically low recall).
|
|
15
|
+
*
|
|
16
|
+
* Output: one row per record with a usable city + postcode. NAME → `venue`; the street line becomes
|
|
17
|
+
* `po_box` when it's a PO-box, else `house_number` + `street`; CITY/STATE/ZIP fill the locality
|
|
18
|
+
* line. STATE is already a USPS abbreviation in the source. License: `"Public Domain"` (US
|
|
19
|
+
* federal).
|
|
20
|
+
*/
|
|
21
|
+
import type { CorpusAdapter } from "../../types.js";
|
|
22
|
+
export declare const USGOV_IRS_BMF_ADAPTER_ID = "usgov-irs-bmf";
|
|
23
|
+
export declare const USGOV_IRS_BMF_DEFAULT_LICENSE = "Public Domain";
|
|
24
|
+
export declare function createUsgovIrsBmfAdapter(): CorpusAdapter;
|
|
25
|
+
export declare const usgovIrsBmfAdapter: CorpusAdapter;
|
|
26
|
+
//# sourceMappingURL=adapter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-irs-bmf/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,wBAAwB,kBAAkB,CAAA;AACvD,eAAO,MAAM,6BAA6B,kBAAkB,CAAA;AAoC5D,wBAAgB,wBAAwB,IAAI,aAAa,CA2ExD;AAED,eAAO,MAAM,kBAAkB,eAA6B,CAAA"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `usgov-irs-bmf`: IRS Exempt Organizations Business Master File (EO BMF) CSV consumer.
|
|
7
|
+
*
|
|
8
|
+
* The EO BMF is the IRS's authoritative registry of US tax-exempt organizations (charities,
|
|
9
|
+
* churches, foundations, ...), published as per-region CSVs at
|
|
10
|
+
* `https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf`
|
|
11
|
+
* (`eo1.csv`..`eo4.csv`, `eo_pr.csv`, `eo_xx.csv`). Each row carries an organization NAME plus
|
|
12
|
+
* its mailing address. It complements `usgov-nppes` with a DIFFERENT venue population
|
|
13
|
+
* (non-profits vs healthcare providers) and, notably, a high share of PO-box addresses — useful
|
|
14
|
+
* `po_box`-tag signal (a tag with historically low recall).
|
|
15
|
+
*
|
|
16
|
+
* Output: one row per record with a usable city + postcode. NAME → `venue`; the street line becomes
|
|
17
|
+
* `po_box` when it's a PO-box, else `house_number` + `street`; CITY/STATE/ZIP fill the locality
|
|
18
|
+
* line. STATE is already a USPS abbreviation in the source. License: `"Public Domain"` (US
|
|
19
|
+
* federal).
|
|
20
|
+
*/
|
|
21
|
+
import { parse as csvParse } from "csv-parse";
|
|
22
|
+
import { createReadStream } from "node:fs";
|
|
23
|
+
import { stableSourceId } from "../../adapter.js";
|
|
24
|
+
import { reconcileComponents } from "../../format.js";
|
|
25
|
+
export const USGOV_IRS_BMF_ADAPTER_ID = "usgov-irs-bmf";
|
|
26
|
+
export const USGOV_IRS_BMF_DEFAULT_LICENSE = "Public Domain";
|
|
27
|
+
const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
|
|
28
|
+
// PO box in its many written forms: "PO BOX 12", "P.O. BOX 12", "P O BOX 12", "POB 12", "BOX 12".
|
|
29
|
+
const PO_BOX = /^\s*(?:P\.?\s?O\.?\s*BOX|POB|BOX)\s+\w/i;
|
|
30
|
+
/** Classify the street line into a `po_box` or a `{house_number?, street}` split. */
|
|
31
|
+
function splitStreetLine(street) {
|
|
32
|
+
const trimmed = street.trim();
|
|
33
|
+
if (!trimmed)
|
|
34
|
+
return null;
|
|
35
|
+
if (PO_BOX.test(trimmed))
|
|
36
|
+
return { po_box: trimmed };
|
|
37
|
+
const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
|
|
38
|
+
if (m)
|
|
39
|
+
return { house_number: m[1], street: m[2].trim() };
|
|
40
|
+
return { street: trimmed };
|
|
41
|
+
}
|
|
42
|
+
function composeRaw(venue, streetPart, city, state, postcode) {
|
|
43
|
+
const cityPart = [city.trim(), [state, postcode].filter(Boolean).join(" ").trim()].filter(Boolean).join(", ");
|
|
44
|
+
return [venue, streetPart, cityPart].filter(Boolean).join(", ");
|
|
45
|
+
}
|
|
46
|
+
export function createUsgovIrsBmfAdapter() {
|
|
47
|
+
return {
|
|
48
|
+
id: USGOV_IRS_BMF_ADAPTER_ID,
|
|
49
|
+
defaultLicense: USGOV_IRS_BMF_DEFAULT_LICENSE,
|
|
50
|
+
description: "IRS Exempt Organizations Business Master File — US non-profit venue+address (public-domain), with strong PO-box coverage.",
|
|
51
|
+
async *rows(opts) {
|
|
52
|
+
if (opts.country && opts.country !== "US") {
|
|
53
|
+
throw new Error(`usgov-irs-bmf adapter: only US supported, got country=${opts.country}`);
|
|
54
|
+
}
|
|
55
|
+
const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
|
|
56
|
+
const parser = stream.pipe(csvParse({ columns: true, skip_empty_lines: true, relax_quotes: true, relax_column_count: true, trim: true }));
|
|
57
|
+
let emitted = 0;
|
|
58
|
+
try {
|
|
59
|
+
for await (const record of parser) {
|
|
60
|
+
if (opts.signal?.aborted)
|
|
61
|
+
break;
|
|
62
|
+
if (opts.limit !== undefined && emitted >= opts.limit)
|
|
63
|
+
break;
|
|
64
|
+
const ein = (record.EIN ?? "").trim();
|
|
65
|
+
const venue = (record.NAME ?? "").trim() || undefined;
|
|
66
|
+
const street = (record.STREET ?? "").trim();
|
|
67
|
+
const city = (record.CITY ?? "").trim();
|
|
68
|
+
const state = (record.STATE ?? "").trim();
|
|
69
|
+
const zipRaw = (record.ZIP ?? "").trim();
|
|
70
|
+
if (!city || !zipRaw)
|
|
71
|
+
continue;
|
|
72
|
+
const postcode = zipRaw.split("-")[0].trim(); // 5-digit; drop the optional +4
|
|
73
|
+
const split = splitStreetLine(street);
|
|
74
|
+
if (!split)
|
|
75
|
+
continue;
|
|
76
|
+
const streetPart = "po_box" in split ? split.po_box : [split.house_number, split.street].filter(Boolean).join(" ");
|
|
77
|
+
const components = {
|
|
78
|
+
...(venue ? { venue } : {}),
|
|
79
|
+
...("po_box" in split
|
|
80
|
+
? { po_box: split.po_box }
|
|
81
|
+
: { ...(split.house_number ? { house_number: split.house_number } : {}), street: split.street }),
|
|
82
|
+
locality: city,
|
|
83
|
+
...(state ? { region: state } : {}),
|
|
84
|
+
postcode,
|
|
85
|
+
};
|
|
86
|
+
const raw = composeRaw(venue, streetPart, city, state, postcode);
|
|
87
|
+
if (!raw)
|
|
88
|
+
continue;
|
|
89
|
+
const aligned = reconcileComponents(components, raw);
|
|
90
|
+
if (Object.keys(aligned).length <= 2)
|
|
91
|
+
continue;
|
|
92
|
+
const sourceId = ein
|
|
93
|
+
? `${USGOV_IRS_BMF_ADAPTER_ID}-${ein}`
|
|
94
|
+
: stableSourceId(USGOV_IRS_BMF_ADAPTER_ID, aligned);
|
|
95
|
+
yield {
|
|
96
|
+
raw,
|
|
97
|
+
components: aligned,
|
|
98
|
+
country: "US",
|
|
99
|
+
locale: "en-US",
|
|
100
|
+
source: USGOV_IRS_BMF_ADAPTER_ID,
|
|
101
|
+
source_id: sourceId,
|
|
102
|
+
corpus_version: "",
|
|
103
|
+
license: USGOV_IRS_BMF_DEFAULT_LICENSE,
|
|
104
|
+
};
|
|
105
|
+
emitted++;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
finally {
|
|
109
|
+
stream.destroy();
|
|
110
|
+
}
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
export const usgovIrsBmfAdapter = createUsgovIrsBmfAdapter();
|
|
115
|
+
//# sourceMappingURL=adapter.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-irs-bmf/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,wBAAwB,GAAG,eAAe,CAAA;AACvD,MAAM,CAAC,MAAM,6BAA6B,GAAG,eAAe,CAAA;AAE5D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAC9D,kGAAkG;AAClG,MAAM,MAAM,GAAG,yCAAyC,CAAA;AAWxD,qFAAqF;AACrF,SAAS,eAAe,CAAC,MAAc;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IAC7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IACpD,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,SAAS,UAAU,CAClB,KAAyB,EACzB,UAAkB,EAClB,IAAY,EACZ,KAAa,EACb,QAAgB;IAEhB,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7G,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAChE,CAAC;AAED,MAAM,UAAU,wBAAwB;IACvC,OAAO;QACN,EAAE,EAAE,wBAAwB;QAC5B,cAAc,EAAE,6BAA6B;QAC7C,WAAW,EACV,2HAA2H;QAE5H,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,yDAAyD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACzF,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAC7G,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAkC,EAAE,CAAC;oBAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAA;oBACrD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACxC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM;wBAAE,SAAQ;oBAC9B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAA,CAAC,gCAAgC;oBAE9E,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;oBACrC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GACf,QAAQ,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAEhG,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,QAAQ,IAAI,KAAK;4BACpB,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE;4BAC1B,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;wBACjG,QAAQ,EAAE,IAAI;wBACd,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnC,QAAQ;qBACR,CAAA;oBAED,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;oBAChE,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,QAAQ,GAAG,GAAG;wBACnB,CAAC,CAAC,GAAG,wBAAwB,IAAI,GAAG,EAAE;wBACtC,CAAC,CAAC,cAAc,CAAC,wBAAwB,EAAE,OAAO,CAAC,CAAA;oBAEpD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,wBAAwB;wBAChC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,6BAA6B;qBACtC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAG,wBAAwB,EAAE,CAAA"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AAgLxD,wBAAgB,qBAAqB,IAAI,aAAa,CAmGrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
|
|
@@ -114,14 +114,23 @@ function composeHouseNumber(r) {
|
|
|
114
114
|
const suf = (r.AddNum_Suf ?? "").toString().trim();
|
|
115
115
|
return [pre, num, suf].filter(Boolean).join(" ").trim() || undefined;
|
|
116
116
|
}
|
|
117
|
-
function
|
|
117
|
+
function decomposeNadStreet(r) {
|
|
118
|
+
const name = (r.St_Name ?? "").toString().trim();
|
|
119
|
+
if (name) {
|
|
120
|
+
const preDir = (r.St_PreDir ?? "").toString().trim();
|
|
121
|
+
const preTyp = (r.St_PreTyp ?? "").toString().trim();
|
|
122
|
+
const preSep = (r.St_PreSep ?? "").toString().trim();
|
|
123
|
+
const posTyp = (r.St_PosTyp ?? "").toString().trim();
|
|
124
|
+
const posDir = (r.St_PosDir ?? "").toString().trim();
|
|
125
|
+
const prefix = [preDir, preTyp, preSep].filter(Boolean).join(" ") || undefined;
|
|
126
|
+
const suffix = [posTyp, posDir].filter(Boolean).join(" ") || undefined;
|
|
127
|
+
const full = [prefix, name, suffix].filter(Boolean).join(" ");
|
|
128
|
+
return { prefix, street: name, suffix, full };
|
|
129
|
+
}
|
|
118
130
|
const full = (r.StNam_Full ?? "").toString().trim();
|
|
119
131
|
if (full)
|
|
120
|
-
return full;
|
|
121
|
-
|
|
122
|
-
.map((p) => (p ?? "").toString().trim())
|
|
123
|
-
.filter(Boolean);
|
|
124
|
-
return parts.length ? parts.join(" ") : undefined;
|
|
132
|
+
return { full, street: full };
|
|
133
|
+
return undefined;
|
|
125
134
|
}
|
|
126
135
|
function composeLocality(r) {
|
|
127
136
|
return nonEmpty(r.Post_City, r.Inc_Muni, r.Census_Plc, r.Uninc_Comm);
|
|
@@ -134,7 +143,7 @@ function composePostcode(r) {
|
|
|
134
143
|
return plus4 ? `${zip}-${plus4}` : zip;
|
|
135
144
|
}
|
|
136
145
|
function composeRaw(parts) {
|
|
137
|
-
const streetLine = [parts.houseNumber, parts.street].filter(Boolean).join(" ").trim();
|
|
146
|
+
const streetLine = [parts.houseNumber, parts.street, parts.unit].filter(Boolean).join(" ").trim();
|
|
138
147
|
const tail = `${parts.locality}, ${parts.region} ${parts.postcode}`;
|
|
139
148
|
return [parts.venue, streetLine || undefined, tail].filter(Boolean).join(", ");
|
|
140
149
|
}
|
|
@@ -182,18 +191,30 @@ export function createUsgovNadAdapter() {
|
|
|
182
191
|
const postcode = composePostcode(record);
|
|
183
192
|
if (!postcode)
|
|
184
193
|
continue;
|
|
185
|
-
const
|
|
194
|
+
const decomposed = decomposeNadStreet(record);
|
|
186
195
|
const houseNumber = composeHouseNumber(record);
|
|
187
196
|
const venue = nonEmpty(record.LandmkName);
|
|
197
|
+
const unit = nonEmpty(record.Unit, record.Building, record.Floor, record.Room);
|
|
188
198
|
const components = {
|
|
189
199
|
...(venue ? { venue } : {}),
|
|
190
200
|
...(houseNumber ? { house_number: houseNumber } : {}),
|
|
191
|
-
...(
|
|
201
|
+
...(decomposed?.prefix ? { street_prefix: decomposed.prefix } : {}),
|
|
202
|
+
...(decomposed?.street ? { street: decomposed.street } : {}),
|
|
203
|
+
...(decomposed?.suffix ? { street_suffix: decomposed.suffix } : {}),
|
|
204
|
+
...(unit ? { unit } : {}),
|
|
192
205
|
locality,
|
|
193
206
|
region: state,
|
|
194
207
|
postcode,
|
|
195
208
|
};
|
|
196
|
-
const raw = composeRaw({
|
|
209
|
+
const raw = composeRaw({
|
|
210
|
+
venue,
|
|
211
|
+
houseNumber,
|
|
212
|
+
street: decomposed?.full,
|
|
213
|
+
unit,
|
|
214
|
+
locality,
|
|
215
|
+
region: state,
|
|
216
|
+
postcode,
|
|
217
|
+
});
|
|
197
218
|
if (!raw)
|
|
198
219
|
continue;
|
|
199
220
|
const aligned = reconcileComponents(components, raw);
|