@mailwoman/corpus 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/out/src/adapter.d.ts +96 -0
  2. package/out/src/adapter.d.ts.map +1 -0
  3. package/out/src/adapter.js +107 -0
  4. package/out/src/adapter.js.map +1 -0
  5. package/out/src/adapters/ban/adapter.d.ts +32 -0
  6. package/out/src/adapters/ban/adapter.d.ts.map +1 -0
  7. package/out/src/adapters/ban/adapter.js +133 -0
  8. package/out/src/adapters/ban/adapter.js.map +1 -0
  9. package/out/src/adapters/fcc-bdc/adapter.d.ts +61 -0
  10. package/out/src/adapters/fcc-bdc/adapter.d.ts.map +1 -0
  11. package/out/src/adapters/fcc-bdc/adapter.js +153 -0
  12. package/out/src/adapters/fcc-bdc/adapter.js.map +1 -0
  13. package/out/src/adapters/index.d.ts +42 -0
  14. package/out/src/adapters/index.d.ts.map +1 -0
  15. package/out/src/adapters/index.js +76 -0
  16. package/out/src/adapters/index.js.map +1 -0
  17. package/out/src/adapters/openaddresses/adapter.d.ts +60 -0
  18. package/out/src/adapters/openaddresses/adapter.d.ts.map +1 -0
  19. package/out/src/adapters/openaddresses/adapter.js +174 -0
  20. package/out/src/adapters/openaddresses/adapter.js.map +1 -0
  21. package/out/src/adapters/state-ia-contractors/adapter.d.ts +23 -0
  22. package/out/src/adapters/state-ia-contractors/adapter.d.ts.map +1 -0
  23. package/out/src/adapters/state-ia-contractors/adapter.js +113 -0
  24. package/out/src/adapters/state-ia-contractors/adapter.js.map +1 -0
  25. package/out/src/adapters/state-ny-notaries/adapter.d.ts +21 -0
  26. package/out/src/adapters/state-ny-notaries/adapter.d.ts.map +1 -0
  27. package/out/src/adapters/state-ny-notaries/adapter.js +132 -0
  28. package/out/src/adapters/state-ny-notaries/adapter.js.map +1 -0
  29. package/out/src/adapters/state-tx-notaries/adapter.d.ts +22 -0
  30. package/out/src/adapters/state-tx-notaries/adapter.d.ts.map +1 -0
  31. package/out/src/adapters/state-tx-notaries/adapter.js +125 -0
  32. package/out/src/adapters/state-tx-notaries/adapter.js.map +1 -0
  33. package/out/src/adapters/tiger/adapter.d.ts +45 -0
  34. package/out/src/adapters/tiger/adapter.d.ts.map +1 -0
  35. package/out/src/adapters/tiger/adapter.js +179 -0
  36. package/out/src/adapters/tiger/adapter.js.map +1 -0
  37. package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts +36 -0
  38. package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts.map +1 -0
  39. package/out/src/adapters/usgov-hrsa-fqhc/adapter.js +147 -0
  40. package/out/src/adapters/usgov-hrsa-fqhc/adapter.js.map +1 -0
  41. package/out/src/adapters/usgov-imls-pls/adapter.d.ts +25 -0
  42. package/out/src/adapters/usgov-imls-pls/adapter.d.ts.map +1 -0
  43. package/out/src/adapters/usgov-imls-pls/adapter.js +118 -0
  44. package/out/src/adapters/usgov-imls-pls/adapter.js.map +1 -0
  45. package/out/src/adapters/usgov-nad/adapter.d.ts +37 -0
  46. package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -0
  47. package/out/src/adapters/usgov-nad/adapter.js +227 -0
  48. package/out/src/adapters/usgov-nad/adapter.js.map +1 -0
  49. package/out/src/adapters/usgov-nppes/adapter.d.ts +28 -0
  50. package/out/src/adapters/usgov-nppes/adapter.d.ts.map +1 -0
  51. package/out/src/adapters/usgov-nppes/adapter.js +123 -0
  52. package/out/src/adapters/usgov-nppes/adapter.js.map +1 -0
  53. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts +35 -0
  54. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts.map +1 -0
  55. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js +162 -0
  56. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js.map +1 -0
  57. package/out/src/adapters/wof-admin-json/adapter.d.ts +85 -0
  58. package/out/src/adapters/wof-admin-json/adapter.d.ts.map +1 -0
  59. package/out/src/adapters/wof-admin-json/adapter.js +241 -0
  60. package/out/src/adapters/wof-admin-json/adapter.js.map +1 -0
  61. package/out/src/adapters/wof-postalcode-json/adapter.d.ts +63 -0
  62. package/out/src/adapters/wof-postalcode-json/adapter.d.ts.map +1 -0
  63. package/out/src/adapters/wof-postalcode-json/adapter.js +178 -0
  64. package/out/src/adapters/wof-postalcode-json/adapter.js.map +1 -0
  65. package/out/src/align.d.ts +58 -0
  66. package/out/src/align.d.ts.map +1 -0
  67. package/out/src/align.js +139 -0
  68. package/out/src/align.js.map +1 -0
  69. package/out/src/build.d.ts +104 -0
  70. package/out/src/build.d.ts.map +1 -0
  71. package/out/src/build.js +201 -0
  72. package/out/src/build.js.map +1 -0
  73. package/out/src/codex/us-fips-state.d.ts +44 -0
  74. package/out/src/codex/us-fips-state.d.ts.map +1 -0
  75. package/out/src/codex/us-fips-state.js +105 -0
  76. package/out/src/codex/us-fips-state.js.map +1 -0
  77. package/out/src/codex/us-street-suffix.d.ts +259 -0
  78. package/out/src/codex/us-street-suffix.d.ts.map +1 -0
  79. package/out/src/codex/us-street-suffix.js +285 -0
  80. package/out/src/codex/us-street-suffix.js.map +1 -0
  81. package/out/src/format.d.ts +79 -0
  82. package/out/src/format.d.ts.map +1 -0
  83. package/out/src/format.js +151 -0
  84. package/out/src/format.js.map +1 -0
  85. package/out/src/golden.d.ts +50 -0
  86. package/out/src/golden.d.ts.map +1 -0
  87. package/out/src/golden.js +104 -0
  88. package/out/src/golden.js.map +1 -0
  89. package/out/src/index.d.ts +18 -0
  90. package/out/src/index.d.ts.map +1 -0
  91. package/out/src/index.js +18 -0
  92. package/out/src/index.js.map +1 -0
  93. package/out/src/parquet-wrapper/index.d.ts +12 -0
  94. package/out/src/parquet-wrapper/index.d.ts.map +1 -0
  95. package/out/src/parquet-wrapper/index.js +12 -0
  96. package/out/src/parquet-wrapper/index.js.map +1 -0
  97. package/out/src/parquet-wrapper/reader.d.ts +31 -0
  98. package/out/src/parquet-wrapper/reader.d.ts.map +1 -0
  99. package/out/src/parquet-wrapper/reader.js +54 -0
  100. package/out/src/parquet-wrapper/reader.js.map +1 -0
  101. package/out/src/parquet-wrapper/schema.d.ts +45 -0
  102. package/out/src/parquet-wrapper/schema.d.ts.map +1 -0
  103. package/out/src/parquet-wrapper/schema.js +55 -0
  104. package/out/src/parquet-wrapper/schema.js.map +1 -0
  105. package/out/src/parquet-wrapper/writer.d.ts +41 -0
  106. package/out/src/parquet-wrapper/writer.d.ts.map +1 -0
  107. package/out/src/parquet-wrapper/writer.js +71 -0
  108. package/out/src/parquet-wrapper/writer.js.map +1 -0
  109. package/out/src/parquet.d.ts +122 -0
  110. package/out/src/parquet.d.ts.map +1 -0
  111. package/out/src/parquet.js +220 -0
  112. package/out/src/parquet.js.map +1 -0
  113. package/out/src/runner.d.ts +100 -0
  114. package/out/src/runner.d.ts.map +1 -0
  115. package/out/src/runner.js +183 -0
  116. package/out/src/runner.js.map +1 -0
  117. package/out/src/split.d.ts +108 -0
  118. package/out/src/split.d.ts.map +1 -0
  119. package/out/src/split.js +191 -0
  120. package/out/src/split.js.map +1 -0
  121. package/out/src/synthesize.d.ts +146 -0
  122. package/out/src/synthesize.d.ts.map +1 -0
  123. package/out/src/synthesize.js +472 -0
  124. package/out/src/synthesize.js.map +1 -0
  125. package/out/src/tokenize.d.ts +47 -0
  126. package/out/src/tokenize.d.ts.map +1 -0
  127. package/out/src/tokenize.js +49 -0
  128. package/out/src/tokenize.js.map +1 -0
  129. package/out/src/types.d.ts +168 -0
  130. package/out/src/types.d.ts.map +1 -0
  131. package/out/src/types.js +19 -0
  132. package/out/src/types.js.map +1 -0
  133. package/out/src/wof-json.d.ts +105 -0
  134. package/out/src/wof-json.d.ts.map +1 -0
  135. package/out/src/wof-json.js +174 -0
  136. package/out/src/wof-json.js.map +1 -0
  137. package/package.json +36 -0
@@ -0,0 +1,132 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `state-ny-notaries`: New York Commissioned Notaries CSV consumer.
7
+ *
8
+ * The New York Department of State publishes a registry of commissioned notaries public. Each row
9
+ * optionally carries a business name and business address (~1-5% fill rate).
10
+ *
11
+ * The adapter consumes the CSV the operator pre-downloads via `fetch-state-sources.sh`. Column
12
+ * names match the data.ny.gov export header (note: some columns have leading spaces).
13
+ *
14
+ * License: stamped `"Public Domain"` per New York state government open-data terms.
15
+ */
16
+ import { parse as csvParse } from "csv-parse";
17
+ import { createReadStream } from "node:fs";
18
+ import { stableSourceId } from "../../adapter.js";
19
+ import { lookupStateAbbreviation } from "../../codex/us-fips-state.js";
20
+ import { reconcileComponents } from "../../format.js";
21
+ export const STATE_NY_NOTARIES_ADAPTER_ID = "state-ny-notaries";
22
+ export const STATE_NY_NOTARIES_DEFAULT_LICENSE = "Public Domain";
23
+ const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
24
+ function splitAddress(address) {
25
+ const trimmed = address.trim();
26
+ if (!trimmed)
27
+ return null;
28
+ const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
29
+ if (m)
30
+ return { house_number: m[1], street: m[2].trim() };
31
+ return { street: trimmed };
32
+ }
33
+ const RAW_NY_COLUMNS = [
34
+ "Commission Holder Name",
35
+ "Commission Number (UID)",
36
+ "Business Name (if available)",
37
+ "Business Address 1 (if available)",
38
+ "Business Address 2 (if available)",
39
+ " Business City (if available)",
40
+ "Business State (if available)",
41
+ "Business Zip (if available)",
42
+ "Commissioned County",
43
+ ];
44
+ export function createStateNyNotariesAdapter() {
45
+ return {
46
+ id: STATE_NY_NOTARIES_ADAPTER_ID,
47
+ defaultLicense: STATE_NY_NOTARIES_DEFAULT_LICENSE,
48
+ description: "New York Commissioned Notaries — name + optional business address (public-domain).",
49
+ async *rows(opts) {
50
+ if (opts.country && opts.country !== "US") {
51
+ throw new Error(`state-ny-notaries adapter: only US supported, got country=${opts.country}`);
52
+ }
53
+ const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
54
+ const parser = stream.pipe(csvParse({
55
+ columns: true,
56
+ skip_empty_lines: true,
57
+ relax_quotes: true,
58
+ relax_column_count: true,
59
+ }));
60
+ let emitted = 0;
61
+ try {
62
+ for await (const rawRecord of parser) {
63
+ if (opts.signal?.aborted)
64
+ break;
65
+ if (opts.limit !== undefined && emitted >= opts.limit)
66
+ break;
67
+ // NY CSV has columns with leading spaces, so we normalize by trimming keys.
68
+ const record = {};
69
+ for (const key of Object.keys(rawRecord)) {
70
+ record[key.trim()] = rawRecord[key] ?? "";
71
+ }
72
+ const holderName = (record["Commission Holder Name"] ?? "").trim();
73
+ const businessName = (record["Business Name (if available)"] ?? "").trim();
74
+ const address1 = (record["Business Address 1 (if available)"] ?? "").trim();
75
+ const address2 = (record["Business Address 2 (if available)"] ?? "").trim();
76
+ const city = (record["Business City (if available)"] ?? "").trim();
77
+ const stateAbbr = (record["Business State (if available)"] ?? "").trim();
78
+ const zip = (record["Business Zip (if available)"] ?? "").trim();
79
+ const county = (record["Commissioned County"] ?? "").trim();
80
+ if (!city || !stateAbbr || !zip)
81
+ continue;
82
+ if (!address1 && !address2)
83
+ continue;
84
+ const state = lookupStateAbbreviation(stateAbbr);
85
+ if (!state)
86
+ continue;
87
+ const fullAddress = [address1, address2].filter(Boolean).join(" ");
88
+ const split = splitAddress(fullAddress);
89
+ if (!split)
90
+ continue;
91
+ const venue = businessName || holderName || undefined;
92
+ const components = {
93
+ ...(venue ? { venue } : {}),
94
+ ...(split.house_number ? { house_number: split.house_number } : {}),
95
+ street: split.street,
96
+ locality: city,
97
+ region: state.abbreviation,
98
+ postcode: zip,
99
+ ...(county ? { subregion: county } : {}),
100
+ };
101
+ const streetPart = [split.house_number, split.street].filter(Boolean).join(" ").trim();
102
+ const raw = [venue, streetPart, [city, [stateAbbr, zip].filter(Boolean).join(" ")].filter(Boolean).join(", ")]
103
+ .filter(Boolean)
104
+ .join(", ");
105
+ const aligned = reconcileComponents(components, raw);
106
+ if (Object.keys(aligned).length <= 2)
107
+ continue;
108
+ const commNum = (record["Commission Number (UID)"] ?? "").trim();
109
+ const sourceId = commNum
110
+ ? `${STATE_NY_NOTARIES_ADAPTER_ID}-${commNum}`
111
+ : stableSourceId(STATE_NY_NOTARIES_ADAPTER_ID, aligned);
112
+ yield {
113
+ raw,
114
+ components: aligned,
115
+ country: "US",
116
+ locale: "en-US",
117
+ source: STATE_NY_NOTARIES_ADAPTER_ID,
118
+ source_id: sourceId,
119
+ corpus_version: "",
120
+ license: STATE_NY_NOTARIES_DEFAULT_LICENSE,
121
+ };
122
+ emitted++;
123
+ }
124
+ }
125
+ finally {
126
+ stream.destroy();
127
+ }
128
+ },
129
+ };
130
+ }
131
+ export const stateNyNotariesAdapter = createStateNyNotariesAdapter();
132
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/state-ny-notaries/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,4BAA4B,GAAG,mBAAmB,CAAA;AAC/D,MAAM,CAAC,MAAM,iCAAiC,GAAG,eAAe,CAAA;AAEhE,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAc9D,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,MAAM,cAAc,GAAG;IACtB,wBAAwB;IACxB,yBAAyB;IACzB,8BAA8B;IAC9B,mCAAmC;IACnC,mCAAmC;IACnC,+BAA+B;IAC/B,+BAA+B;IAC/B,6BAA6B;IAC7B,qBAAqB;CACZ,CAAA;AAEV,MAAM,UAAU,4BAA4B;IAC3C,OAAO;QACN,EAAE,EAAE,4BAA4B;QAChC,cAAc,EAAE,iCAAiC;QACjD,WAAW,EAAE,oFAAoF;QAEjG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,6DAA6D,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC7F,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,SAAS,IAAI,MAA+C,EAAE,CAAC;oBAC/E,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,4EAA4E;oBAC5E,MAAM,MAAM,GAA2B,EAAE,CAAA;oBACzC,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;wBAC1C,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,GAAG,SAAS,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;oBAC1C,CAAC;oBAED,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,wBAAwB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClE,MAAM,YAAY,GAAG,CAAC,MAAM,CAAC,8BAA8B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC1E,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,mCAAmC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3E,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,mCAAmC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3E,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,8BAA8B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAClE,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,+BAA+B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACxE,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,6BAA6B,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAChE,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,qBAAqB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAE3D,IAAI,CAAC,IAAI,IAAI,CAAC,SAAS,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBACzC,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEpC,MAAM,KAAK,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAA;oBAChD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,WAAW,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAClE,MAAM,KAAK,GAAG,YAAY,CAAC,WAAW,CAAC,CAAA;oBACvC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,KAAK,GAAG,YAAY,IAAI,UAAU,IAAI,SAAS,CAAA;oBAErD,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,QAAQ,EAAE,GAAG;wBACb,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;qBACxC,CAAA;oBAED,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;oBACtF,MAAM,GAAG,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;yBAC5G,MAAM,CAAC,OAAO,CAAC;yBACf,IAAI,CAAC,IAAI,CAAC,CAAA;oBAEZ,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,yBAAyB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAChE,MAAM,QAAQ,GAAG,OAAO;wBACvB,CAAC,CAAC,GAAG,4BAA4B,IAAI,OAAO,EAAE;wBAC9C,CAAC,CAAC,cAAc,CAAC,4BAA4B,EAAE,OAAO,CAAC,CAAA;oBAExD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,4BAA4B;wBACpC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,iCAAiC;qBAC1C,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAG,4BAA4B,EAAE,CAAA"}
@@ -0,0 +1,22 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `state-tx-notaries`: Texas Notary Public Commissions CSV consumer.
7
+ *
8
+ * The Texas Secretary of State publishes a registry of commissioned notaries public. Each row
9
+ * optionally carries a mailing address in free-form text (often multi-line with embedded
10
+ * city/state/zip). Address fill rate is ~5-10%.
11
+ *
12
+ * The adapter parses the embedded `Address` field for city/state/zip using a trailing `"CITY, ST
13
+ * ZIP"` pattern.
14
+ *
15
+ * License: stamped `"Public Domain"` per Texas state government open-data terms.
16
+ */
17
+ import type { CorpusAdapter } from "../../types.js";
18
+ export declare const STATE_TX_NOTARIES_ADAPTER_ID = "state-tx-notaries";
19
+ export declare const STATE_TX_NOTARIES_DEFAULT_LICENSE = "Public Domain";
20
+ export declare function createStateTxNotariesAdapter(): CorpusAdapter;
21
+ export declare const stateTxNotariesAdapter: CorpusAdapter;
22
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/state-tx-notaries/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,4BAA4B,sBAAsB,CAAA;AAC/D,eAAO,MAAM,iCAAiC,kBAAkB,CAAA;AAsBhE,wBAAgB,4BAA4B,IAAI,aAAa,CAiG5D;AAED,eAAO,MAAM,sBAAsB,eAAiC,CAAA"}
@@ -0,0 +1,125 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `state-tx-notaries`: Texas Notary Public Commissions CSV consumer.
7
+ *
8
+ * The Texas Secretary of State publishes a registry of commissioned notaries public. Each row
9
+ * optionally carries a mailing address in free-form text (often multi-line with embedded
10
+ * city/state/zip). Address fill rate is ~5-10%.
11
+ *
12
+ * The adapter parses the embedded `Address` field for city/state/zip using a trailing `"CITY, ST
13
+ * ZIP"` pattern.
14
+ *
15
+ * License: stamped `"Public Domain"` per Texas state government open-data terms.
16
+ */
17
+ import { parse as csvParse } from "csv-parse";
18
+ import { createReadStream } from "node:fs";
19
+ import { stableSourceId } from "../../adapter.js";
20
+ import { lookupStateAbbreviation } from "../../codex/us-fips-state.js";
21
+ import { reconcileComponents } from "../../format.js";
22
+ export const STATE_TX_NOTARIES_ADAPTER_ID = "state-tx-notaries";
23
+ export const STATE_TX_NOTARIES_DEFAULT_LICENSE = "Public Domain";
24
+ const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
25
+ /** Match trailing "CITY, ST ZIP" or "CITY, ST" at the end of an address line. */
26
+ const CITY_STATE_ZIP_SUFFIX = /[,]?\s*([^,]+),\s*([A-Z]{2})\s*(\d{5}(?:-\d{4})?)?\s*$/i;
27
+ function splitAddress(address) {
28
+ const trimmed = address.trim();
29
+ if (!trimmed)
30
+ return null;
31
+ const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
32
+ if (m)
33
+ return { house_number: m[1], street: m[2].trim() };
34
+ return { street: trimmed };
35
+ }
36
+ export function createStateTxNotariesAdapter() {
37
+ return {
38
+ id: STATE_TX_NOTARIES_ADAPTER_ID,
39
+ defaultLicense: STATE_TX_NOTARIES_DEFAULT_LICENSE,
40
+ description: "Texas Notary Public Commissions — name + mailing address with embedded city/state/zip (public-domain).",
41
+ async *rows(opts) {
42
+ if (opts.country && opts.country !== "US") {
43
+ throw new Error(`state-tx-notaries adapter: only US supported, got country=${opts.country}`);
44
+ }
45
+ const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
46
+ const parser = stream.pipe(csvParse({
47
+ columns: true,
48
+ skip_empty_lines: true,
49
+ relax_quotes: true,
50
+ relax_column_count: true,
51
+ }));
52
+ let emitted = 0;
53
+ try {
54
+ for await (const record of parser) {
55
+ if (opts.signal?.aborted)
56
+ break;
57
+ if (opts.limit !== undefined && emitted >= opts.limit)
58
+ break;
59
+ const rawAddress = (record.Address ?? "").trim();
60
+ if (!rawAddress)
61
+ continue;
62
+ const firstName = (record["First Name"] ?? "").trim();
63
+ const lastName = (record["Last Name"] ?? "").trim();
64
+ const notaryId = (record["Notary ID"] ?? "").trim();
65
+ // Parse embedded city/state/zip from the trailing portion of the address.
66
+ // Addresses look like: "1215 MCMILLAN DR\nCEDAR HILL, TX 75104"
67
+ const addrSingleLine = rawAddress.replace(/\n/g, ", ");
68
+ const cszMatch = CITY_STATE_ZIP_SUFFIX.exec(addrSingleLine);
69
+ if (!cszMatch)
70
+ continue;
71
+ const city = (cszMatch[1] ?? "").trim();
72
+ const stateAbbr = (cszMatch[2] ?? "").trim();
73
+ const zip = (cszMatch[3] ?? "").trim();
74
+ if (!city || !stateAbbr)
75
+ continue;
76
+ const state = lookupStateAbbreviation(stateAbbr);
77
+ if (!state)
78
+ continue;
79
+ // Extract the street portion (everything before the city/state/zip)
80
+ const streetPortion = addrSingleLine.slice(0, cszMatch.index).replace(/,\s*$/, "").trim();
81
+ if (!streetPortion)
82
+ continue;
83
+ const split = splitAddress(streetPortion);
84
+ if (!split)
85
+ continue;
86
+ const venue = [firstName, lastName].filter(Boolean).join(" ") || undefined;
87
+ const components = {
88
+ ...(venue ? { venue } : {}),
89
+ ...(split.house_number ? { house_number: split.house_number } : {}),
90
+ street: split.street,
91
+ locality: city,
92
+ region: state.abbreviation,
93
+ ...(zip ? { postcode: zip } : {}),
94
+ };
95
+ const streetPart = [split.house_number, split.street].filter(Boolean).join(" ").trim();
96
+ const raw = [venue, streetPart, [city, [stateAbbr, zip].filter(Boolean).join(" ")].filter(Boolean).join(", ")]
97
+ .filter(Boolean)
98
+ .join(", ");
99
+ const aligned = reconcileComponents(components, raw);
100
+ if (Object.keys(aligned).length <= 2)
101
+ continue;
102
+ const sourceId = notaryId
103
+ ? `${STATE_TX_NOTARIES_ADAPTER_ID}-${notaryId}`
104
+ : stableSourceId(STATE_TX_NOTARIES_ADAPTER_ID, aligned);
105
+ yield {
106
+ raw,
107
+ components: aligned,
108
+ country: "US",
109
+ locale: "en-US",
110
+ source: STATE_TX_NOTARIES_ADAPTER_ID,
111
+ source_id: sourceId,
112
+ corpus_version: "",
113
+ license: STATE_TX_NOTARIES_DEFAULT_LICENSE,
114
+ };
115
+ emitted++;
116
+ }
117
+ }
118
+ finally {
119
+ stream.destroy();
120
+ }
121
+ },
122
+ };
123
+ }
124
+ export const stateTxNotariesAdapter = createStateTxNotariesAdapter();
125
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/state-tx-notaries/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,4BAA4B,GAAG,mBAAmB,CAAA;AAC/D,MAAM,CAAC,MAAM,iCAAiC,GAAG,eAAe,CAAA;AAEhE,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAE9D,iFAAiF;AACjF,MAAM,qBAAqB,GAAG,yDAAyD,CAAA;AASvF,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,MAAM,UAAU,4BAA4B;IAC3C,OAAO;QACN,EAAE,EAAE,4BAA4B;QAChC,cAAc,EAAE,iCAAiC;QACjD,WAAW,EACV,wGAAwG;QAEzG,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,6DAA6D,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YAC7F,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAoC,EAAE,CAAC;oBACjE,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAChD,IAAI,CAAC,UAAU;wBAAE,SAAQ;oBAEzB,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACnD,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEnD,0EAA0E;oBAC1E,gEAAgE;oBAChE,MAAM,cAAc,GAAG,UAAU,CAAC,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAA;oBACtD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,IAAI,CAAC,cAAc,CAAC,CAAA;oBAC3D,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEvB,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,SAAS,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC5C,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEtC,IAAI,CAAC,IAAI,IAAI,CAAC,SAAS;wBAAE,SAAQ;oBAEjC,MAAM,KAAK,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAA;oBAChD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,oEAAoE;oBACpE,MAAM,aAAa,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzF,IAAI,CAAC,aAAa;wBAAE,SAAQ;oBAE5B,MAAM,KAAK,GAAG,YAAY,CAAC,aAAa,CAAC,CAAA;oBACzC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,KAAK,GAAG,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAA;oBAE1E,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;qBACjC,CAAA;oBAED,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;oBACtF,MAAM,GAAG,GAAG,CAAC,KAAK,EAAE,UAAU,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;yBAC5G,MAAM,CAAC,OAAO,CAAC;yBACf,IAAI,CAAC,IAAI,CAAC,CAAA;oBAEZ,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,QAAQ,GAAG,QAAQ;wBACxB,CAAC,CAAC,GAAG,4BAA4B,IAAI,QAAQ,EAAE;wBAC/C,CAAC,CAAC,cAAc,CAAC,4BAA4B,EAAE,OAAO,CAAC,CAAA;oBAExD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,4BAA4B;wBACpC,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,iCAAiC;qBAC1C,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAG,4BAA4B,EAAE,CAAA"}
@@ -0,0 +1,45 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `tiger`: US Census TIGER/Line consumer adapter.
7
+ *
8
+ * TIGER/Line is the canonical US street + locality dataset published by the Census Bureau as a
9
+ * **public-domain** product (no ODbL share-alike concerns for US-only corpora). Coverage extends
10
+ * to every named street segment + every incorporated place + CDP across the 50 states + DC + the
11
+ * five primary territories — substantially better US street-name coverage than OSM, especially in
12
+ * rural areas.
13
+ *
14
+ * Following the `wof-admin` / `wof-postalcode` pattern, this adapter consumes a SQLite database the
15
+ * operator pre-builds from the raw TIGER shapefiles (see the README for the schema and a
16
+ * suggested `ogr2ogr` pipeline). The mailwoman side does not parse Shapefile binary directly —
17
+ * keeping the adapter narrow lets the operator pick their own ingestion tool (ogr2ogr / shp2pgsql
18
+ * / a custom Python script / etc.) without forcing a heavy native dep into `@mailwoman/corpus`.
19
+ *
20
+ * Two row classes are emitted:
21
+ *
22
+ * - **Street-level** (`tiger_streets`): one row per segment, optionally with up to two postcode
23
+ * variants if `zipl` / `zipr` differ. Components: `{ street, region, postcode? }`. Streets
24
+ * without a recognized state FIPS are dropped — there's no useful row without `region`.
25
+ * - **Locality-level** (`tiger_places`): up to three variants per place: locality-only,
26
+ * locality-with-region, locality-with-region-country (mirrors `wof-admin`'s fan-out for
27
+ * consistency).
28
+ *
29
+ * Salvaged components from `isp-nexus/universe@6eeb7bd9`:
30
+ *
31
+ * - `packages/corpus/src/codex/us-fips-state.ts` — the FIPS → `{abbreviation, name}` lookup table
32
+ * (originally `tiger/state.ts`, AGPL-3.0 → AGPL-3.0). The full isp-nexus TIGER module ships a
33
+ * TypeORM-backed service layer; mailwoman only needs the lookup data so we don't carry the
34
+ * service layer over.
35
+ *
36
+ * License: stamped `"Public Domain"` per Census Bureau guidance on TIGER/Line. No per-row override
37
+ * needed — every row in TIGER is the same license.
38
+ */
39
+ import type { CorpusAdapter } from "../../types.js";
40
+ export declare const TIGER_ADAPTER_ID = "tiger";
41
+ export declare const TIGER_DEFAULT_LICENSE = "Public Domain";
42
+ /** Build a TIGER adapter. Pure factory so multiple instances can be created in tests. */
43
+ export declare function createTigerAdapter(): CorpusAdapter;
44
+ export declare const tigerAdapter: CorpusAdapter;
45
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAKH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,gBAAgB,UAAU,CAAA;AACvC,eAAO,MAAM,qBAAqB,kBAAkB,CAAA;AAsFpD,yFAAyF;AACzF,wBAAgB,kBAAkB,IAAI,aAAa,CAsElD;AAED,eAAO,MAAM,YAAY,eAAuB,CAAA"}
@@ -0,0 +1,179 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `tiger`: US Census TIGER/Line consumer adapter.
7
+ *
8
+ * TIGER/Line is the canonical US street + locality dataset published by the Census Bureau as a
9
+ * **public-domain** product (no ODbL share-alike concerns for US-only corpora). Coverage extends
10
+ * to every named street segment + every incorporated place + CDP across the 50 states + DC + the
11
+ * five primary territories — substantially better US street-name coverage than OSM, especially in
12
+ * rural areas.
13
+ *
14
+ * Following the `wof-admin` / `wof-postalcode` pattern, this adapter consumes a SQLite database the
15
+ * operator pre-builds from the raw TIGER shapefiles (see the README for the schema and a
16
+ * suggested `ogr2ogr` pipeline). The mailwoman side does not parse Shapefile binary directly —
17
+ * keeping the adapter narrow lets the operator pick their own ingestion tool (ogr2ogr / shp2pgsql
18
+ * / a custom Python script / etc.) without forcing a heavy native dep into `@mailwoman/corpus`.
19
+ *
20
+ * Two row classes are emitted:
21
+ *
22
+ * - **Street-level** (`tiger_streets`): one row per segment, optionally with up to two postcode
23
+ * variants if `zipl` / `zipr` differ. Components: `{ street, region, postcode? }`. Streets
24
+ * without a recognized state FIPS are dropped — there's no useful row without `region`.
25
+ * - **Locality-level** (`tiger_places`): up to three variants per place: locality-only,
26
+ * locality-with-region, locality-with-region-country (mirrors `wof-admin`'s fan-out for
27
+ * consistency).
28
+ *
29
+ * Salvaged components from `isp-nexus/universe@6eeb7bd9`:
30
+ *
31
+ * - `packages/corpus/src/codex/us-fips-state.ts` — the FIPS → `{abbreviation, name}` lookup table
32
+ * (originally `tiger/state.ts`, AGPL-3.0 → AGPL-3.0). The full isp-nexus TIGER module ships a
33
+ * TypeORM-backed service layer; mailwoman only needs the lookup data so we don't carry the
34
+ * service layer over.
35
+ *
36
+ * License: stamped `"Public Domain"` per Census Bureau guidance on TIGER/Line. No per-row override
37
+ * needed — every row in TIGER is the same license.
38
+ */
39
+ import Database from "better-sqlite3";
40
+ import { lookupFipsState } from "../../codex/us-fips-state.js";
41
+ import { formatAddress, reconcileComponents } from "../../format.js";
42
+ export const TIGER_ADAPTER_ID = "tiger";
43
+ export const TIGER_DEFAULT_LICENSE = "Public Domain";
44
+ /**
45
+ * The country surface form used in `formatAddress` for US. Matches the canonical OpenCage US
46
+ * template output so reconciliation doesn't strip it when the row carries `country` explicitly.
47
+ */
48
+ const US_COUNTRY_DISPLAY = "United States of America";
49
+ /**
50
+ * Yield one or more `CanonicalRow`s per street segment. Postcode variants:
51
+ *
52
+ * - No ZIP set → one row, street + region.
53
+ * - `zipl === zipr` → one row, street + region + postcode.
54
+ * - `zipl !== zipr` → two rows (one per side's ZIP).
55
+ */
56
+ function* streetVariants(row) {
57
+ const street = row.fullname.trim();
58
+ if (!street)
59
+ return;
60
+ const state = lookupFipsState(row.statefp);
61
+ if (!state)
62
+ return;
63
+ const zipl = row.zipl?.trim() ?? "";
64
+ const zipr = row.zipr?.trim() ?? "";
65
+ const baseComponents = {
66
+ street,
67
+ region: state.abbreviation,
68
+ };
69
+ if (!zipl && !zipr) {
70
+ yield { components: baseComponents, variantKey: "no-zip" };
71
+ return;
72
+ }
73
+ if (zipl && zipr && zipl === zipr) {
74
+ yield {
75
+ components: { ...baseComponents, postcode: zipl },
76
+ variantKey: `zip-${zipl}`,
77
+ };
78
+ return;
79
+ }
80
+ if (zipl)
81
+ yield { components: { ...baseComponents, postcode: zipl }, variantKey: `zipl-${zipl}` };
82
+ if (zipr && zipr !== zipl)
83
+ yield { components: { ...baseComponents, postcode: zipr }, variantKey: `zipr-${zipr}` };
84
+ }
85
+ /** Three locality-level variants, mirroring `wof-admin`'s fan-out. */
86
+ function* placeVariants(row) {
87
+ const name = row.name.trim();
88
+ if (!name)
89
+ return;
90
+ const state = lookupFipsState(row.statefp);
91
+ if (!state)
92
+ return;
93
+ yield {
94
+ components: { locality: name },
95
+ variantKey: "locality-only",
96
+ };
97
+ yield {
98
+ components: { locality: name, region: state.abbreviation },
99
+ variantKey: "with-region",
100
+ };
101
+ yield {
102
+ components: { locality: name, region: state.abbreviation, country: US_COUNTRY_DISPLAY },
103
+ variantKey: "with-region-country",
104
+ };
105
+ }
106
+ /** Build a TIGER adapter. Pure factory so multiple instances can be created in tests. */
107
+ export function createTigerAdapter() {
108
+ return {
109
+ id: TIGER_ADAPTER_ID,
110
+ defaultLicense: TIGER_DEFAULT_LICENSE,
111
+ description: "US Census TIGER/Line streets + places consumer (public-domain); SQLite DB the operator builds via ogr2ogr.",
112
+ async *rows(opts) {
113
+ if (opts.country && opts.country !== "US") {
114
+ throw new Error(`tiger adapter: only US supported, got country=${opts.country}`);
115
+ }
116
+ const db = new Database(opts.inputPath, { readonly: true, fileMustExist: true });
117
+ let emitted = 0;
118
+ try {
119
+ const streetStmt = db.prepare(`SELECT linearid, fullname, zipl, zipr, statefp FROM tiger_streets`);
120
+ const placeStmt = db.prepare(`SELECT geoid, name, statefp, lsad FROM tiger_places`);
121
+ for (const row of streetStmt.iterate()) {
122
+ if (opts.signal?.aborted)
123
+ return;
124
+ for (const variant of streetVariants(row)) {
125
+ if (opts.limit !== undefined && emitted >= opts.limit)
126
+ return;
127
+ const raw = formatAddress(variant.components, "US", { separator: ", " });
128
+ if (!raw)
129
+ continue;
130
+ const aligned = reconcileComponents(variant.components, raw);
131
+ if (Object.keys(aligned).length === 0)
132
+ continue;
133
+ yield {
134
+ raw,
135
+ components: aligned,
136
+ country: "US",
137
+ locale: "en-US",
138
+ source: TIGER_ADAPTER_ID,
139
+ source_id: `${TIGER_ADAPTER_ID}-st-${row.linearid}-${variant.variantKey}`,
140
+ corpus_version: "",
141
+ license: TIGER_DEFAULT_LICENSE,
142
+ };
143
+ emitted++;
144
+ }
145
+ }
146
+ for (const row of placeStmt.iterate()) {
147
+ if (opts.signal?.aborted)
148
+ return;
149
+ for (const variant of placeVariants(row)) {
150
+ if (opts.limit !== undefined && emitted >= opts.limit)
151
+ return;
152
+ const raw = formatAddress(variant.components, "US", { separator: ", " });
153
+ if (!raw)
154
+ continue;
155
+ const aligned = reconcileComponents(variant.components, raw);
156
+ if (Object.keys(aligned).length === 0)
157
+ continue;
158
+ yield {
159
+ raw,
160
+ components: aligned,
161
+ country: "US",
162
+ locale: "en-US",
163
+ source: TIGER_ADAPTER_ID,
164
+ source_id: `${TIGER_ADAPTER_ID}-pl-${row.geoid}-${variant.variantKey}`,
165
+ corpus_version: "",
166
+ license: TIGER_DEFAULT_LICENSE,
167
+ };
168
+ emitted++;
169
+ }
170
+ }
171
+ }
172
+ finally {
173
+ db.close();
174
+ }
175
+ },
176
+ };
177
+ }
178
+ export const tigerAdapter = createTigerAdapter();
179
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/tiger/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,QAAQ,MAAM,gBAAgB,CAAA;AACrC,OAAO,EAAE,eAAe,EAAE,MAAM,8BAA8B,CAAA;AAC9D,OAAO,EAAE,aAAa,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGpE,MAAM,CAAC,MAAM,gBAAgB,GAAG,OAAO,CAAA;AACvC,MAAM,CAAC,MAAM,qBAAqB,GAAG,eAAe,CAAA;AAEpD;;;GAGG;AACH,MAAM,kBAAkB,GAAG,0BAA0B,CAAA;AAiBrD;;;;;;GAMG;AACH,QAAQ,CAAC,CAAC,cAAc,CAAC,GAAmB;IAI3C,MAAM,MAAM,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAA;IAClC,IAAI,CAAC,MAAM;QAAE,OAAM;IACnB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IACnC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAEnC,MAAM,cAAc,GAA+B;QAClD,MAAM;QACN,MAAM,EAAE,KAAK,CAAC,YAAY;KAC1B,CAAA;IAED,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QACpB,MAAM,EAAE,UAAU,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAA;QAC1D,OAAM;IACP,CAAC;IACD,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QACnC,MAAM;YACL,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE;YACjD,UAAU,EAAE,OAAO,IAAI,EAAE;SACzB,CAAA;QACD,OAAM;IACP,CAAC;IACD,IAAI,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;IACjG,IAAI,IAAI,IAAI,IAAI,KAAK,IAAI;QAAE,MAAM,EAAE,UAAU,EAAE,EAAE,GAAG,cAAc,EAAE,QAAQ,EAAE,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,IAAI,EAAE,EAAE,CAAA;AACnH,CAAC;AAED,sEAAsE;AACtE,QAAQ,CAAC,CAAC,aAAa,CAAC,GAAkB;IAIzC,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,IAAI;QAAE,OAAM;IACjB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;IAC1C,IAAI,CAAC,KAAK;QAAE,OAAM;IAElB,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE;QAC9B,UAAU,EAAE,eAAe;KAC3B,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE;QAC1D,UAAU,EAAE,aAAa;KACzB,CAAA;IACD,MAAM;QACL,UAAU,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,CAAC,YAAY,EAAE,OAAO,EAAE,kBAAkB,EAAE;QACvF,UAAU,EAAE,qBAAqB;KACjC,CAAA;AACF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,kBAAkB;IACjC,OAAO;QACN,EAAE,EAAE,gBAAgB;QACpB,cAAc,EAAE,qBAAqB;QACrC,WAAW,EACV,4GAA4G;QAE7G,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,iDAAiD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACjF,CAAC;YAED,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAA;YAChF,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAC5B,mEAAmE,CACnE,CAAA;gBACD,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAAoB,qDAAqD,CAAC,CAAA;gBAEtG,KAAK,MAAM,GAAG,IAAI,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC;oBACxC,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,cAAc,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC3C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,QAAQ,IAAI,OAAO,CAAC,UAAU,EAAE;4BACzE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;gBAED,KAAK,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAE,EAAE,CAAC;oBACvC,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,OAAM;oBAChC,KAAK,MAAM,OAAO,IAAI,aAAa,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC1C,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;4BAAE,OAAM;wBAC7D,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAA;wBACxE,IAAI,CAAC,GAAG;4BAAE,SAAQ;wBAClB,MAAM,OAAO,GAAG,mBAAmB,CAAC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;wBAC5D,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;4BAAE,SAAQ;wBAE/C,MAAM;4BACL,GAAG;4BACH,UAAU,EAAE,OAAO;4BACnB,OAAO,EAAE,IAAI;4BACb,MAAM,EAAE,OAAO;4BACf,MAAM,EAAE,gBAAgB;4BACxB,SAAS,EAAE,GAAG,gBAAgB,OAAO,GAAG,CAAC,KAAK,IAAI,OAAO,CAAC,UAAU,EAAE;4BACtE,cAAc,EAAE,EAAE;4BAClB,OAAO,EAAE,qBAAqB;yBAC9B,CAAA;wBACD,OAAO,EAAE,CAAA;oBACV,CAAC;gBACF,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,EAAE,CAAC,KAAK,EAAE,CAAA;YACX,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAG,kBAAkB,EAAE,CAAA"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-hrsa-fqhc`: HRSA "Health Center Service Delivery Site Locations" CSV consumer.
7
+ *
8
+ * Federally Qualified Health Centers (FQHCs) are HRSA-funded community health programs that
9
+ * self-report site addresses to the HRSA Data Warehouse. The published CSV (`data.hrsa.gov`)
10
+ * carries the site name, the postal-formatted street address, and the locality/region/postcode
11
+ * quad. Phase 1.6 §1.2 (#22) selects this source for its adversarial-value-per-row: every
12
+ * facility name is a human-typed venue string and the addresses pass through enough hands to
13
+ * accumulate the abbreviation drift + suite designator chaos that pure gazetteer data does not.
14
+ *
15
+ * The adapter consumes a CSV file the operator pre-downloads. The HRSA data is published as a
16
+ * single national CSV (~10K rows), small enough that the operator can re-fetch on every corpus
17
+ * rebuild without an intermediate SQLite step. Column names below match the HRSA Data Warehouse's
18
+ * "Health Center Service Delivery Site" public dataset. Operators substituting the
19
+ * closely-related "Site Address" or "Health Center" public extracts may need to remap columns;
20
+ * the README documents the expected set.
21
+ *
22
+ * Output: one row per CSV record, with `venue` component carrying the site name and the address
23
+ * quad on `(house_number, street, locality, region, postcode)`. Component order is load-bearing:
24
+ * `venue` is inserted FIRST so alignment claims its surface span before `locality` searches for
25
+ * its own (the kryptonite case "Buffalo Health Clinic, …, Buffalo, NY" relies on `venue`
26
+ * consuming the first "Buffalo" so locality lands on the second).
27
+ *
28
+ * License: stamped `"Public Domain"` per the HRSA Data Warehouse's federal government distribution
29
+ * terms.
30
+ */
31
+ import type { CorpusAdapter } from "../../types.js";
32
+ export declare const USGOV_HRSA_FQHC_ADAPTER_ID = "usgov-hrsa-fqhc";
33
+ export declare const USGOV_HRSA_FQHC_DEFAULT_LICENSE = "Public Domain";
34
+ export declare function createUsgovHrsaFqhcAdapter(): CorpusAdapter;
35
+ export declare const usgovHrsaFqhcAdapter: CorpusAdapter;
36
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-hrsa-fqhc/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,0BAA0B,oBAAoB,CAAA;AAC3D,eAAO,MAAM,+BAA+B,kBAAkB,CAAA;AA4D9D,wBAAgB,0BAA0B,IAAI,aAAa,CA+E1D;AAED,eAAO,MAAM,oBAAoB,eAA+B,CAAA"}