@mailwoman/corpus 3.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/out/src/adapters/ban/adapter.d.ts.map +1 -1
  2. package/out/src/adapters/ban/adapter.js +6 -2
  3. package/out/src/adapters/ban/adapter.js.map +1 -1
  4. package/out/src/adapters/ban/street-decompose.d.ts +28 -0
  5. package/out/src/adapters/ban/street-decompose.d.ts.map +1 -0
  6. package/out/src/adapters/ban/street-decompose.js +78 -0
  7. package/out/src/adapters/ban/street-decompose.js.map +1 -0
  8. package/out/src/adapters/geonames/adapter.d.ts +35 -0
  9. package/out/src/adapters/geonames/adapter.d.ts.map +1 -0
  10. package/out/src/adapters/geonames/adapter.js +161 -0
  11. package/out/src/adapters/geonames/adapter.js.map +1 -0
  12. package/out/src/adapters/geonames-postal/adapter.d.ts +30 -0
  13. package/out/src/adapters/geonames-postal/adapter.d.ts.map +1 -0
  14. package/out/src/adapters/geonames-postal/adapter.js +96 -0
  15. package/out/src/adapters/geonames-postal/adapter.js.map +1 -0
  16. package/out/src/adapters/index.d.ts +3 -0
  17. package/out/src/adapters/index.d.ts.map +1 -1
  18. package/out/src/adapters/index.js +9 -0
  19. package/out/src/adapters/index.js.map +1 -1
  20. package/out/src/adapters/synth-po-box/adapter.d.ts +48 -0
  21. package/out/src/adapters/synth-po-box/adapter.d.ts.map +1 -0
  22. package/out/src/adapters/synth-po-box/adapter.js +101 -0
  23. package/out/src/adapters/synth-po-box/adapter.js.map +1 -0
  24. package/out/src/adapters/tiger/adapter.d.ts.map +1 -1
  25. package/out/src/adapters/tiger/adapter.js +9 -3
  26. package/out/src/adapters/tiger/adapter.js.map +1 -1
  27. package/out/src/adapters/tiger/street-decompose.d.ts +30 -0
  28. package/out/src/adapters/tiger/street-decompose.d.ts.map +1 -0
  29. package/out/src/adapters/tiger/street-decompose.js +99 -0
  30. package/out/src/adapters/tiger/street-decompose.js.map +1 -0
  31. package/out/src/adapters/usgov-irs-bmf/adapter.d.ts +26 -0
  32. package/out/src/adapters/usgov-irs-bmf/adapter.d.ts.map +1 -0
  33. package/out/src/adapters/usgov-irs-bmf/adapter.js +115 -0
  34. package/out/src/adapters/usgov-irs-bmf/adapter.js.map +1 -0
  35. package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -1
  36. package/out/src/adapters/usgov-nad/adapter.js +31 -10
  37. package/out/src/adapters/usgov-nad/adapter.js.map +1 -1
  38. package/out/src/adapters/wof-admin-jp/adapter.d.ts +58 -0
  39. package/out/src/adapters/wof-admin-jp/adapter.d.ts.map +1 -0
  40. package/out/src/adapters/wof-admin-jp/adapter.js +129 -0
  41. package/out/src/adapters/wof-admin-jp/adapter.js.map +1 -0
  42. package/out/src/index.d.ts +6 -0
  43. package/out/src/index.d.ts.map +1 -1
  44. package/out/src/index.js +6 -0
  45. package/out/src/index.js.map +1 -1
  46. package/out/src/synthesize-german.d.ts +77 -0
  47. package/out/src/synthesize-german.d.ts.map +1 -0
  48. package/out/src/synthesize-german.js +117 -0
  49. package/out/src/synthesize-german.js.map +1 -0
  50. package/out/src/synthesize-house-venue.d.ts +57 -0
  51. package/out/src/synthesize-house-venue.d.ts.map +1 -0
  52. package/out/src/synthesize-house-venue.js +147 -0
  53. package/out/src/synthesize-house-venue.js.map +1 -0
  54. package/out/src/synthesize-intersection.d.ts +46 -0
  55. package/out/src/synthesize-intersection.d.ts.map +1 -0
  56. package/out/src/synthesize-intersection.js +152 -0
  57. package/out/src/synthesize-intersection.js.map +1 -0
  58. package/out/src/synthesize-no-street.d.ts +70 -0
  59. package/out/src/synthesize-no-street.d.ts.map +1 -0
  60. package/out/src/synthesize-no-street.js +279 -0
  61. package/out/src/synthesize-no-street.js.map +1 -0
  62. package/out/src/synthesize-po-box.d.ts +75 -0
  63. package/out/src/synthesize-po-box.d.ts.map +1 -0
  64. package/out/src/synthesize-po-box.js +186 -0
  65. package/out/src/synthesize-po-box.js.map +1 -0
  66. package/out/src/synthesize-street.d.ts +53 -0
  67. package/out/src/synthesize-street.d.ts.map +1 -0
  68. package/out/src/synthesize-street.js +212 -0
  69. package/out/src/synthesize-street.js.map +1 -0
  70. package/out/src/synthesize.d.ts +19 -0
  71. package/out/src/synthesize.d.ts.map +1 -1
  72. package/out/src/synthesize.js +65 -1
  73. package/out/src/synthesize.js.map +1 -1
  74. package/package.json +8 -7
  75. package/out/src/codex/us-street-suffix.d.ts +0 -260
  76. package/out/src/codex/us-street-suffix.d.ts.map +0 -1
  77. package/out/src/codex/us-street-suffix.js +0 -286
  78. package/out/src/codex/us-street-suffix.js.map +0 -1
@@ -0,0 +1,48 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
7
+ *
8
+ * Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
9
+ * from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
10
+ * `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
11
+ *
12
+ * Why an adapter and not an augmenter:
13
+ *
14
+ * - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
15
+ * Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
16
+ * The clean shape is: read just (locality, region, postcode, country) and produce a fresh
17
+ * PO-box-shaped row.
18
+ * - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
19
+ * ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
20
+ * carries a `street` field.
21
+ */
22
+ import { type PoBoxBaseTuple } from "../../synthesize-po-box.js";
23
+ import type { CorpusAdapter } from "../../types.js";
24
+ export declare const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
25
+ export declare const SYNTH_PO_BOX_LICENSE = "Synthetic \u2014 derived from CC-BY / public-domain input tuples";
26
+ export interface PoBoxInputRow extends PoBoxBaseTuple {
27
+ street?: string;
28
+ houseNumber?: string;
29
+ }
30
+ export interface SynthPoBoxAdapterOptions {
31
+ /**
32
+ * How many PO box variants to emit per input tuple. Each variant picks a different leader (and
33
+ * possibly a different number / noise level). Default 1.
34
+ */
35
+ variantsPerInput?: number;
36
+ /**
37
+ * Probability (0..1) of emitting a PMB-with-street variant when both the input has a street and
38
+ * the locale supports PMB. Default 0.15.
39
+ */
40
+ pmbRatio?: number;
41
+ /**
42
+ * Deterministic seed for reproducible synthesis. Default Date.now().
43
+ */
44
+ seed?: number;
45
+ }
46
+ export declare function createSynthPoBoxAdapter(opts?: SynthPoBoxAdapterOptions): CorpusAdapter;
47
+ export declare const synthPoBoxAdapter: CorpusAdapter;
48
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAKH,OAAO,EAAsB,KAAK,cAAc,EAAE,MAAM,4BAA4B,CAAA;AACpF,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,iBAAiB,CAAA;AACrD,eAAO,MAAM,oBAAoB,qEAAgE,CAAA;AAEjG,MAAM,WAAW,aAAc,SAAQ,cAAc;IACpD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,MAAM,WAAW,wBAAwB;IACxC;;;OAGG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;OAEG;IACH,IAAI,CAAC,EAAE,MAAM,CAAA;CACb;AAUD,wBAAgB,uBAAuB,CAAC,IAAI,GAAE,wBAA6B,GAAG,aAAa,CAuE1F;AAED,eAAO,MAAM,iBAAiB,eAA4B,CAAA"}
@@ -0,0 +1,101 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `synth-po-box`: PO box / PMB / Apartado / BP synthesizer adapter.
7
+ *
8
+ * Consumes a JSONL stream of (locality, region, postcode, country) tuples — typically extracted
9
+ * from existing corpus output (TIGER/NAD/BAN/WOF) — and emits synthetic PO box training rows. See
10
+ * `../../synthesize-po-box.ts` for the per-locale templates and number-noise logic.
11
+ *
12
+ * Why an adapter and not an augmenter:
13
+ *
14
+ * - Per USPS Pub 28 / DMM 508, a PO box delivery line is mutually exclusive with a street line.
15
+ * Synthesizing PO boxes by mutating a street row would teach the model an invalid pattern.
16
+ * The clean shape is: read just (locality, region, postcode, country) and produce a fresh
17
+ * PO-box-shaped row.
18
+ * - Per-DeepSeek (3-turn consult, 2026-05-28): PMB rows that COMBINE a street line with a PMB number
19
+ * ARE valid (CMRA addresses). Those are produced when `pmbRatio > 0` AND the input tuple
20
+ * carries a `street` field.
21
+ */
22
+ import { createReadStream } from "node:fs";
23
+ import { createInterface } from "node:readline";
24
+ import { stableSourceId } from "../../adapter.js";
25
+ import { synthesizePoBoxRow } from "../../synthesize-po-box.js";
26
+ export const SYNTH_PO_BOX_ADAPTER_ID = "synth-po-box";
27
+ export const SYNTH_PO_BOX_LICENSE = "Synthetic — derived from CC-BY / public-domain input tuples";
28
+ function makeRandom(seed) {
29
+ let s = seed;
30
+ return () => {
31
+ s = (s * 1664525 + 1013904223) % 4294967296;
32
+ return s / 4294967296;
33
+ };
34
+ }
35
+ export function createSynthPoBoxAdapter(opts = {}) {
36
+ const variantsPerInput = opts.variantsPerInput ?? 1;
37
+ const pmbRatio = opts.pmbRatio ?? 0.15;
38
+ return {
39
+ id: SYNTH_PO_BOX_ADAPTER_ID,
40
+ defaultLicense: SYNTH_PO_BOX_LICENSE,
41
+ description: "Synthetic PO box / PMB / Apartado / Boîte Postale rows. Consumes JSONL of (locality, region, postcode, country) tuples and emits locale-appropriate PO box variants.",
42
+ async *rows(options) {
43
+ const random = makeRandom(opts.seed ?? Date.now());
44
+ const stream = createReadStream(options.inputPath, { encoding: "utf8" });
45
+ const rl = createInterface({ input: stream, crlfDelay: Infinity });
46
+ let emitted = 0;
47
+ let skipped = 0;
48
+ for await (const line of rl) {
49
+ if (options.signal?.aborted)
50
+ break;
51
+ if (options.limit !== undefined && emitted >= options.limit)
52
+ break;
53
+ const trimmed = line.trim();
54
+ if (!trimmed)
55
+ continue;
56
+ let input;
57
+ try {
58
+ input = JSON.parse(trimmed);
59
+ }
60
+ catch {
61
+ skipped++;
62
+ continue;
63
+ }
64
+ if (!input.locality || !input.region || !input.postcode || !input.country) {
65
+ skipped++;
66
+ continue;
67
+ }
68
+ if (options.country && options.country !== input.country)
69
+ continue;
70
+ for (let v = 0; v < variantsPerInput; v++) {
71
+ const synth = synthesizePoBoxRow(input, { random, pmbRatio });
72
+ if (!synth)
73
+ continue;
74
+ // Include `v` in dependent_locality slot to vary the digest across variants;
75
+ // stableSourceId only accepts ComponentTag keys.
76
+ const sourceId = stableSourceId(SYNTH_PO_BOX_ADAPTER_ID, {
77
+ locality: `${input.locality}#${v}`,
78
+ region: input.region,
79
+ postcode: input.postcode,
80
+ country: input.country,
81
+ });
82
+ yield {
83
+ raw: synth.raw,
84
+ components: synth.components,
85
+ country: input.country,
86
+ locale: synth.locale,
87
+ source: SYNTH_PO_BOX_ADAPTER_ID,
88
+ source_id: sourceId,
89
+ corpus_version: "",
90
+ license: SYNTH_PO_BOX_LICENSE,
91
+ };
92
+ emitted++;
93
+ if (options.limit !== undefined && emitted >= options.limit)
94
+ break;
95
+ }
96
+ }
97
+ },
98
+ };
99
+ }
100
+ export const synthPoBoxAdapter = createSynthPoBoxAdapter();
101
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/synth-po-box/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAA;AAC/C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,kBAAkB,EAAuB,MAAM,4BAA4B,CAAA;AAGpF,MAAM,CAAC,MAAM,uBAAuB,GAAG,cAAc,CAAA;AACrD,MAAM,CAAC,MAAM,oBAAoB,GAAG,6DAA6D,CAAA;AAwBjG,SAAS,UAAU,CAAC,IAAY;IAC/B,IAAI,CAAC,GAAG,IAAI,CAAA;IACZ,OAAO,GAAG,EAAE;QACX,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,GAAG,UAAU,CAAC,GAAG,UAAU,CAAA;QAC3C,OAAO,CAAC,GAAG,UAAU,CAAA;IACtB,CAAC,CAAA;AACF,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,OAAiC,EAAE;IAC1E,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAA;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAA;IAEtC,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,oBAAoB;QACpC,WAAW,EACV,sKAAsK;QAEvK,KAAK,CAAC,CAAC,IAAI,CAAC,OAAuB;YAClC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;YAElD,MAAM,MAAM,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACxE,MAAM,EAAE,GAAG,eAAe,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,CAAA;YAElE,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,OAAO,GAAG,CAAC,CAAA;YAEf,IAAI,KAAK,EAAE,MAAM,IAAI,IAAI,EAAE,EAAE,CAAC;gBAC7B,IAAI,OAAO,CAAC,MAAM,EAAE,OAAO;oBAAE,MAAK;gBAClC,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;oBAAE,MAAK;gBAElE,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO;oBAAE,SAAQ;gBAEtB,IAAI,KAAoB,CAAA;gBACxB,IAAI,CAAC;oBACJ,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAA;gBAC7C,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC;oBAC3E,OAAO,EAAE,CAAA;oBACT,SAAQ;gBACT,CAAC;gBAED,IAAI,OAAO,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,KAAK,KAAK,CAAC,OAAO;oBAAE,SAAQ;gBAElE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC3C,MAAM,KAAK,GAAG,kBAAkB,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAA;oBAC7D,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,6EAA6E;oBAC7E,iDAAiD;oBACjD,MAAM,QAAQ,GAAG,cAAc,CAAC,uBAAuB,EAAE;wBACxD,QAAQ,EAAE,GAAG,KAAK,CAAC,QAAQ,IAAI,CAAC,EAAE;wBAClC,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;wBACxB,OAAO,EAAE,KAAK,CAAC,OAAO;qBACtB,CAAC,CAAA;oBAEF,MAAM;wBACL,GAAG,EAAE,KAAK,CAAC,GAAG;wBACd,UAAU,EAAE,KAAK,CAAC,UAAU;wBAC5B,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,oBAAoB;qBAC7B,CAAA;oBACD,OAAO,EAAE,CAAA;oBAET,IAAI,OAAO,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,OAAO,CAAC,KAAK;wBAAE,MAAK;gBACnE,CAAC;YACF,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AAsFpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAGjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AA0FpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAoElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
@@ -39,6 +39,7 @@
39
39
  import { DatabaseSync } from "node:sqlite";
40
40
  import { lookupFipsState } from "../../codex/us-fips-state.js";
41
41
  import { formatAddress, reconcileComponents } from "../../format.js";
42
+ import { decomposeStreet } from "./street-decompose.js";
42
43
  export const TIGER_ADAPTER_ID = "tiger";
43
44
  export const TIGER_DEFAULT_LICENSE = "Public Domain";
44
45
  /**
@@ -54,18 +55,23 @@ const US_COUNTRY_DISPLAY = "United States of America";
54
55
  * - `zipl !== zipr` → two rows (one per side's ZIP).
55
56
  */
56
57
  function* streetVariants(row) {
57
- const street = row.fullname.trim();
58
- if (!street)
58
+ const fullname = row.fullname.trim();
59
+ if (!fullname)
59
60
  return;
60
61
  const state = lookupFipsState(row.statefp);
61
62
  if (!state)
62
63
  return;
63
64
  const zipl = row.zipl?.trim() ?? "";
64
65
  const zipr = row.zipr?.trim() ?? "";
66
+ const decomposed = decomposeStreet(fullname);
65
67
  const baseComponents = {
66
- street,
67
68
  region: state.abbreviation,
69
+ street: decomposed.street,
68
70
  };
71
+ if (decomposed.prefix)
72
+ baseComponents.street_prefix = decomposed.prefix;
73
+ if (decomposed.suffix)
74
+ baseComponents.street_suffix = decomposed.suffix;
69
75
  if (!zipl && !zipr) {
70
76
  yield { components: baseComponents, variantKey: "no-zip" };
71
77
  return;
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGpE,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IAClC,IAAI,CAAC,MAAM;QAAE,OAAM;IACnB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,cAAc,GAA+B;QAClD,MAAM;QACN,MAAM,EAAE,KAAK,CAAC,YAAY;KAC1B,CAAA;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAC1C,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAEpE,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAA;AAEvD,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IACpC,IAAI,CAAC,QAAQ;QAAE,OAAM;IACrB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,UAAU,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAA;IAE5C,MAAM,cAAc,GAA+B;QAClD,MAAM,EAAE,KAAK,CAAC,YAAY;QAC1B,MAAM,EAAE,UAAU,CAAC,MAAM;KACzB,CAAA;IACD,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IACvE,IAAI,UAAU,CAAC,MAAM;QAAE,cAAc,CAAC,aAAa,GAAG,UAAU,CAAC,MAAM,CAAA;IAEvE,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC/D,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,mEAAmE,CAAC,CAAA;gBAClG,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAC,qDAAqD,CAAC,CAAA;gBAEnF,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAsC,EAAE,CAAC;oBAC5E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAqC,EAAE,CAAC;oBAC1E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
7
+ *
8
+ * Sources directionals and street types from the curated libpostal/en dictionaries
9
+ * (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
10
+ * dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
11
+ * corpus labels and runtime classifications agree on the vocabulary.
12
+ *
13
+ * Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
14
+ * { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
15
+ * street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
16
+ * suffix: "Blvd" }
17
+ */
18
+ export interface DecomposedStreet {
19
+ prefix: string | null;
20
+ street: string;
21
+ suffix: string | null;
22
+ }
23
+ /**
24
+ * Decompose a US street name into prefix/name/suffix components.
25
+ *
26
+ * Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
27
+ * Returns the original as `street` if nothing matches.
28
+ */
29
+ export declare function decomposeStreet(fullname: string): DecomposedStreet;
30
+ //# sourceMappingURL=street-decompose.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAuCH,MAAM,WAAW,gBAAgB;IAChC,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;CACrB;AAED;;;;;GAKG;AACH,wBAAgB,eAAe,CAAC,QAAQ,EAAE,MAAM,GAAG,gBAAgB,CA0ClE"}
@@ -0,0 +1,99 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Decompose a US street name into Stage 3 components: street_prefix, street, street_suffix.
7
+ *
8
+ * Sources directionals and street types from the curated libpostal/en dictionaries
9
+ * (`core/data/libpostal/dictionaries/en/{directionals,street_types}.txt`). These are the same
10
+ * dictionaries the runtime classifiers (StreetPrefixClassifier, StreetSuffixClassifier) use, so
11
+ * corpus labels and runtime classifications agree on the vocabulary.
12
+ *
13
+ * Examples: "N Main St" → { prefix: "N", street: "Main", suffix: "St" } "Pennsylvania Avenue NW" →
14
+ * { prefix: null, street: "Pennsylvania", suffix: "Avenue NW" } "Salmon St" → { prefix: null,
15
+ * street: "Salmon", suffix: "St" } "SE Hawthorne Blvd" → { prefix: "SE", street: "Hawthorne",
16
+ * suffix: "Blvd" }
17
+ */
18
+ import { readFileSync } from "node:fs";
19
+ import { dirname, resolve } from "node:path";
20
+ import { fileURLToPath } from "node:url";
21
+ const moduleDir = dirname(fileURLToPath(import.meta.url));
22
+ function loadDictionary(filename) {
23
+ // Resolve via the @mailwoman/core data directory.
24
+ const candidates = [
25
+ resolve(moduleDir, "../../../../core/data/libpostal/dictionaries/en", filename),
26
+ resolve(moduleDir, "../../../../../core/data/libpostal/dictionaries/en", filename),
27
+ resolve(process.cwd(), "core/data/libpostal/dictionaries/en", filename),
28
+ ];
29
+ for (const path of candidates) {
30
+ try {
31
+ const text = readFileSync(path, "utf8");
32
+ const set = new Set();
33
+ for (const line of text.split("\n")) {
34
+ const trimmed = line.trim();
35
+ if (!trimmed || trimmed.startsWith("#"))
36
+ continue;
37
+ // libpostal format: canonical|abbr|abbr|... — index all forms
38
+ for (const form of trimmed.split("|")) {
39
+ const f = form.trim().toLowerCase();
40
+ if (f)
41
+ set.add(f);
42
+ }
43
+ }
44
+ return set;
45
+ }
46
+ catch {
47
+ // try next candidate
48
+ }
49
+ }
50
+ throw new Error(`Could not load libpostal dictionary: ${filename}`);
51
+ }
52
+ const DIRECTIONALS = loadDictionary("directionals.txt");
53
+ const STREET_TYPES = loadDictionary("street_types.txt");
54
+ /**
55
+ * Decompose a US street name into prefix/name/suffix components.
56
+ *
57
+ * Conservative — only emits prefix/suffix when there's a clear directional or street-type keyword.
58
+ * Returns the original as `street` if nothing matches.
59
+ */
60
+ export function decomposeStreet(fullname) {
61
+ const trimmed = fullname.trim();
62
+ if (!trimmed)
63
+ return { prefix: null, street: "", suffix: null };
64
+ const tokens = trimmed.split(/\s+/);
65
+ if (tokens.length === 1)
66
+ return { prefix: null, street: trimmed, suffix: null };
67
+ const norm = (s) => s.toLowerCase().replace(/\.$/, "");
68
+ let prefix = null;
69
+ let suffix = null;
70
+ let startIdx = 0;
71
+ let endIdx = tokens.length;
72
+ // Leading directional prefix
73
+ if (DIRECTIONALS.has(norm(tokens[0])) && tokens.length >= 2) {
74
+ prefix = tokens[0];
75
+ startIdx = 1;
76
+ }
77
+ // Trailing post-directional combined with street type (e.g. "Pennsylvania Ave NW")
78
+ const last = norm(tokens[endIdx - 1]);
79
+ const secondLast = endIdx >= 2 ? norm(tokens[endIdx - 2]) : "";
80
+ if (DIRECTIONALS.has(last) && STREET_TYPES.has(secondLast)) {
81
+ suffix = tokens.slice(endIdx - 2, endIdx).join(" ");
82
+ endIdx -= 2;
83
+ }
84
+ else if (STREET_TYPES.has(last) && endIdx - startIdx >= 2) {
85
+ suffix = tokens[endIdx - 1];
86
+ endIdx -= 1;
87
+ }
88
+ else if (DIRECTIONALS.has(last) && endIdx - startIdx >= 2) {
89
+ // Post-directional without type
90
+ suffix = tokens[endIdx - 1];
91
+ endIdx -= 1;
92
+ }
93
+ const street = tokens.slice(startIdx, endIdx).join(" ").trim();
94
+ if (!street) {
95
+ return { prefix: null, street: trimmed, suffix: null };
96
+ }
97
+ return { prefix, street, suffix };
98
+ }
99
+ //# sourceMappingURL=street-decompose.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-decompose.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/street-decompose.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AAExC,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,SAAS,cAAc,CAAC,QAAgB;IACvC,kDAAkD;IAClD,MAAM,UAAU,GAAG;QAClB,OAAO,CAAC,SAAS,EAAE,iDAAiD,EAAE,QAAQ,CAAC;QAC/E,OAAO,CAAC,SAAS,EAAE,oDAAoD,EAAE,QAAQ,CAAC;QAClF,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,qCAAqC,EAAE,QAAQ,CAAC;KACvE,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,CAAC;YACJ,MAAM,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;YACvC,MAAM,GAAG,GAAG,IAAI,GAAG,EAAU,CAAA;YAC7B,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;gBACrC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;gBAC3B,IAAI,CAAC,OAAO,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;oBAAE,SAAQ;gBACjD,8DAA8D;gBAC9D,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;oBACvC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAA;oBACnC,IAAI,CAAC;wBAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAClB,CAAC;YACF,CAAC;YACD,OAAO,GAAG,CAAA;QACX,CAAC;QAAC,MAAM,CAAC;YACR,qBAAqB;QACtB,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,wCAAwC,QAAQ,EAAE,CAAC,CAAA;AACpE,CAAC;AAED,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AACvD,MAAM,YAAY,GAAG,cAAc,CAAC,kBAAkB,CAAC,CAAA;AAQvD;;;;;GAKG;AACH,MAAM,UAAU,eAAe,CAAC,QAAgB;IAC/C,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACnC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IAE/E,MAAM,IAAI,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;IAE9D,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,MAAM,GAAkB,IAAI,CAAA;IAChC,IAAI,QAAQ,GAAG,CAAC,CAAA;IAChB,IAAI,MAAM,GAAG,MAAM,CAAC,MAAM,CAAA;IAE1B,6BAA6B;IAC7B,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC9D,MAAM,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;QACnB,QAAQ,GAAG,CAAC,CAAA;IACb,CAAC;IAED,mFAAmF;IACnF,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAA;IACtC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAE/D,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;QAC5D,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACnD,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;SAAM,IAAI,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,MAAM,GAAG,QAAQ,IAAI,CAAC,EAAE,CAAC;QAC7D,gCAAgC;QAChC,MAAM,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC5B,MAAM,IAAI,CAAC,CAAA;IACZ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IAC9D,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;IACvD,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;AAClC,CAAC"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-irs-bmf`: IRS Exempt Organizations Business Master File (EO BMF) CSV consumer.
7
+ *
8
+ * The EO BMF is the IRS's authoritative registry of US tax-exempt organizations (charities,
9
+ * churches, foundations, ...), published as per-region CSVs at
10
+ * `https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf`
11
+ * (`eo1.csv`..`eo4.csv`, `eo_pr.csv`, `eo_xx.csv`). Each row carries an organization NAME plus
12
+ * its mailing address. It complements `usgov-nppes` with a DIFFERENT venue population
13
+ * (non-profits vs healthcare providers) and, notably, a high share of PO-box addresses — useful
14
+ * `po_box`-tag signal (a tag with historically low recall).
15
+ *
16
+ * Output: one row per record with a usable city + postcode. NAME → `venue`; the street line becomes
17
+ * `po_box` when it's a PO-box, else `house_number` + `street`; CITY/STATE/ZIP fill the locality
18
+ * line. STATE is already a USPS abbreviation in the source. License: `"Public Domain"` (US
19
+ * federal).
20
+ */
21
+ import type { CorpusAdapter } from "../../types.js";
22
+ export declare const USGOV_IRS_BMF_ADAPTER_ID = "usgov-irs-bmf";
23
+ export declare const USGOV_IRS_BMF_DEFAULT_LICENSE = "Public Domain";
24
+ export declare function createUsgovIrsBmfAdapter(): CorpusAdapter;
25
+ export declare const usgovIrsBmfAdapter: CorpusAdapter;
26
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-irs-bmf/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAMH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,wBAAwB,kBAAkB,CAAA;AACvD,eAAO,MAAM,6BAA6B,kBAAkB,CAAA;AAoC5D,wBAAgB,wBAAwB,IAAI,aAAa,CA2ExD;AAED,eAAO,MAAM,kBAAkB,eAA6B,CAAA"}
@@ -0,0 +1,115 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-irs-bmf`: IRS Exempt Organizations Business Master File (EO BMF) CSV consumer.
7
+ *
8
+ * The EO BMF is the IRS's authoritative registry of US tax-exempt organizations (charities,
9
+ * churches, foundations, ...), published as per-region CSVs at
10
+ * `https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf`
11
+ * (`eo1.csv`..`eo4.csv`, `eo_pr.csv`, `eo_xx.csv`). Each row carries an organization NAME plus
12
+ * its mailing address. It complements `usgov-nppes` with a DIFFERENT venue population
13
+ * (non-profits vs healthcare providers) and, notably, a high share of PO-box addresses — useful
14
+ * `po_box`-tag signal (a tag with historically low recall).
15
+ *
16
+ * Output: one row per record with a usable city + postcode. NAME → `venue`; the street line becomes
17
+ * `po_box` when it's a PO-box, else `house_number` + `street`; CITY/STATE/ZIP fill the locality
18
+ * line. STATE is already a USPS abbreviation in the source. License: `"Public Domain"` (US
19
+ * federal).
20
+ */
21
+ import { parse as csvParse } from "csv-parse";
22
+ import { createReadStream } from "node:fs";
23
+ import { stableSourceId } from "../../adapter.js";
24
+ import { reconcileComponents } from "../../format.js";
25
+ export const USGOV_IRS_BMF_ADAPTER_ID = "usgov-irs-bmf";
26
+ export const USGOV_IRS_BMF_DEFAULT_LICENSE = "Public Domain";
27
+ const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
28
+ // PO box in its many written forms: "PO BOX 12", "P.O. BOX 12", "P O BOX 12", "POB 12", "BOX 12".
29
+ const PO_BOX = /^\s*(?:P\.?\s?O\.?\s*BOX|POB|BOX)\s+\w/i;
30
+ /** Classify the street line into a `po_box` or a `{house_number?, street}` split. */
31
+ function splitStreetLine(street) {
32
+ const trimmed = street.trim();
33
+ if (!trimmed)
34
+ return null;
35
+ if (PO_BOX.test(trimmed))
36
+ return { po_box: trimmed };
37
+ const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
38
+ if (m)
39
+ return { house_number: m[1], street: m[2].trim() };
40
+ return { street: trimmed };
41
+ }
42
+ function composeRaw(venue, streetPart, city, state, postcode) {
43
+ const cityPart = [city.trim(), [state, postcode].filter(Boolean).join(" ").trim()].filter(Boolean).join(", ");
44
+ return [venue, streetPart, cityPart].filter(Boolean).join(", ");
45
+ }
46
+ export function createUsgovIrsBmfAdapter() {
47
+ return {
48
+ id: USGOV_IRS_BMF_ADAPTER_ID,
49
+ defaultLicense: USGOV_IRS_BMF_DEFAULT_LICENSE,
50
+ description: "IRS Exempt Organizations Business Master File — US non-profit venue+address (public-domain), with strong PO-box coverage.",
51
+ async *rows(opts) {
52
+ if (opts.country && opts.country !== "US") {
53
+ throw new Error(`usgov-irs-bmf adapter: only US supported, got country=${opts.country}`);
54
+ }
55
+ const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
56
+ const parser = stream.pipe(csvParse({ columns: true, skip_empty_lines: true, relax_quotes: true, relax_column_count: true, trim: true }));
57
+ let emitted = 0;
58
+ try {
59
+ for await (const record of parser) {
60
+ if (opts.signal?.aborted)
61
+ break;
62
+ if (opts.limit !== undefined && emitted >= opts.limit)
63
+ break;
64
+ const ein = (record.EIN ?? "").trim();
65
+ const venue = (record.NAME ?? "").trim() || undefined;
66
+ const street = (record.STREET ?? "").trim();
67
+ const city = (record.CITY ?? "").trim();
68
+ const state = (record.STATE ?? "").trim();
69
+ const zipRaw = (record.ZIP ?? "").trim();
70
+ if (!city || !zipRaw)
71
+ continue;
72
+ const postcode = zipRaw.split("-")[0].trim(); // 5-digit; drop the optional +4
73
+ const split = splitStreetLine(street);
74
+ if (!split)
75
+ continue;
76
+ const streetPart = "po_box" in split ? split.po_box : [split.house_number, split.street].filter(Boolean).join(" ");
77
+ const components = {
78
+ ...(venue ? { venue } : {}),
79
+ ...("po_box" in split
80
+ ? { po_box: split.po_box }
81
+ : { ...(split.house_number ? { house_number: split.house_number } : {}), street: split.street }),
82
+ locality: city,
83
+ ...(state ? { region: state } : {}),
84
+ postcode,
85
+ };
86
+ const raw = composeRaw(venue, streetPart, city, state, postcode);
87
+ if (!raw)
88
+ continue;
89
+ const aligned = reconcileComponents(components, raw);
90
+ if (Object.keys(aligned).length <= 2)
91
+ continue;
92
+ const sourceId = ein
93
+ ? `${USGOV_IRS_BMF_ADAPTER_ID}-${ein}`
94
+ : stableSourceId(USGOV_IRS_BMF_ADAPTER_ID, aligned);
95
+ yield {
96
+ raw,
97
+ components: aligned,
98
+ country: "US",
99
+ locale: "en-US",
100
+ source: USGOV_IRS_BMF_ADAPTER_ID,
101
+ source_id: sourceId,
102
+ corpus_version: "",
103
+ license: USGOV_IRS_BMF_DEFAULT_LICENSE,
104
+ };
105
+ emitted++;
106
+ }
107
+ }
108
+ finally {
109
+ stream.destroy();
110
+ }
111
+ },
112
+ };
113
+ }
114
+ export const usgovIrsBmfAdapter = createUsgovIrsBmfAdapter();
115
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-irs-bmf/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,wBAAwB,GAAG,eAAe,CAAA;AACvD,MAAM,CAAC,MAAM,6BAA6B,GAAG,eAAe,CAAA;AAE5D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAC9D,kGAAkG;AAClG,MAAM,MAAM,GAAG,yCAAyC,CAAA;AAWxD,qFAAqF;AACrF,SAAS,eAAe,CAAC,MAAc;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,IAAI,EAAE,CAAA;IAC7B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC;QAAE,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;IACpD,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,SAAS,UAAU,CAClB,KAAyB,EACzB,UAAkB,EAClB,IAAY,EACZ,KAAa,EACb,QAAgB;IAEhB,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7G,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAChE,CAAC;AAED,MAAM,UAAU,wBAAwB;IACvC,OAAO;QACN,EAAE,EAAE,wBAAwB;QAC5B,cAAc,EAAE,6BAA6B;QAC7C,WAAW,EACV,2HAA2H;QAE5H,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,yDAAyD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACzF,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,gBAAgB,EAAE,IAAI,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAC7G,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAkC,EAAE,CAAC;oBAC/D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAA;oBACrD,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACxC,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM;wBAAE,SAAQ;oBAC9B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAA,CAAC,gCAAgC;oBAE9E,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,CAAC,CAAA;oBACrC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GACf,QAAQ,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAEhG,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,QAAQ,IAAI,KAAK;4BACpB,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE;4BAC1B,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC;wBACjG,QAAQ,EAAE,IAAI;wBACd,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnC,QAAQ;qBACR,CAAA;oBAED,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;oBAChE,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,QAAQ,GAAG,GAAG;wBACnB,CAAC,CAAC,GAAG,wBAAwB,IAAI,GAAG,EAAE;wBACtC,CAAC,CAAC,cAAc,CAAC,wBAAwB,EAAE,OAAO,CAAC,CAAA;oBAEpD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,wBAAwB;wBAChC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,6BAA6B;qBACtC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,kBAAkB,GAAG,wBAAwB,EAAE,CAAA"}
@@ -1 +1 @@
1
- {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AA+JxD,wBAAgB,qBAAqB,IAAI,aAAa,CAuFrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-nad/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAQH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAC/C,eAAO,MAAM,yBAAyB,kBAAkB,CAAA;AAgLxD,wBAAgB,qBAAqB,IAAI,aAAa,CAmGrD;AAED,eAAO,MAAM,eAAe,eAA0B,CAAA"}
@@ -114,14 +114,23 @@ function composeHouseNumber(r) {
114
114
  const suf = (r.AddNum_Suf ?? "").toString().trim();
115
115
  return [pre, num, suf].filter(Boolean).join(" ").trim() || undefined;
116
116
  }
117
- function composeStreet(r) {
117
+ function decomposeNadStreet(r) {
118
+ const name = (r.St_Name ?? "").toString().trim();
119
+ if (name) {
120
+ const preDir = (r.St_PreDir ?? "").toString().trim();
121
+ const preTyp = (r.St_PreTyp ?? "").toString().trim();
122
+ const preSep = (r.St_PreSep ?? "").toString().trim();
123
+ const posTyp = (r.St_PosTyp ?? "").toString().trim();
124
+ const posDir = (r.St_PosDir ?? "").toString().trim();
125
+ const prefix = [preDir, preTyp, preSep].filter(Boolean).join(" ") || undefined;
126
+ const suffix = [posTyp, posDir].filter(Boolean).join(" ") || undefined;
127
+ const full = [prefix, name, suffix].filter(Boolean).join(" ");
128
+ return { prefix, street: name, suffix, full };
129
+ }
118
130
  const full = (r.StNam_Full ?? "").toString().trim();
119
131
  if (full)
120
- return full;
121
- const parts = [r.St_PreMod, r.St_PreDir, r.St_PreTyp, r.St_PreSep, r.St_Name, r.St_PosTyp, r.St_PosDir, r.St_PosMod]
122
- .map((p) => (p ?? "").toString().trim())
123
- .filter(Boolean);
124
- return parts.length ? parts.join(" ") : undefined;
132
+ return { full, street: full };
133
+ return undefined;
125
134
  }
126
135
  function composeLocality(r) {
127
136
  return nonEmpty(r.Post_City, r.Inc_Muni, r.Census_Plc, r.Uninc_Comm);
@@ -134,7 +143,7 @@ function composePostcode(r) {
134
143
  return plus4 ? `${zip}-${plus4}` : zip;
135
144
  }
136
145
  function composeRaw(parts) {
137
- const streetLine = [parts.houseNumber, parts.street].filter(Boolean).join(" ").trim();
146
+ const streetLine = [parts.houseNumber, parts.street, parts.unit].filter(Boolean).join(" ").trim();
138
147
  const tail = `${parts.locality}, ${parts.region} ${parts.postcode}`;
139
148
  return [parts.venue, streetLine || undefined, tail].filter(Boolean).join(", ");
140
149
  }
@@ -182,18 +191,30 @@ export function createUsgovNadAdapter() {
182
191
  const postcode = composePostcode(record);
183
192
  if (!postcode)
184
193
  continue;
185
- const street = composeStreet(record);
194
+ const decomposed = decomposeNadStreet(record);
186
195
  const houseNumber = composeHouseNumber(record);
187
196
  const venue = nonEmpty(record.LandmkName);
197
+ const unit = nonEmpty(record.Unit, record.Building, record.Floor, record.Room);
188
198
  const components = {
189
199
  ...(venue ? { venue } : {}),
190
200
  ...(houseNumber ? { house_number: houseNumber } : {}),
191
- ...(street ? { street } : {}),
201
+ ...(decomposed?.prefix ? { street_prefix: decomposed.prefix } : {}),
202
+ ...(decomposed?.street ? { street: decomposed.street } : {}),
203
+ ...(decomposed?.suffix ? { street_suffix: decomposed.suffix } : {}),
204
+ ...(unit ? { unit } : {}),
192
205
  locality,
193
206
  region: state,
194
207
  postcode,
195
208
  };
196
- const raw = composeRaw({ venue, houseNumber, street, locality, region: state, postcode });
209
+ const raw = composeRaw({
210
+ venue,
211
+ houseNumber,
212
+ street: decomposed?.full,
213
+ unit,
214
+ locality,
215
+ region: state,
216
+ postcode,
217
+ });
197
218
  if (!raw)
198
219
  continue;
199
220
  const aligned = reconcileComponents(components, raw);