@mailwoman/corpus 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/out/src/adapter.d.ts +96 -0
  2. package/out/src/adapter.d.ts.map +1 -0
  3. package/out/src/adapter.js +107 -0
  4. package/out/src/adapter.js.map +1 -0
  5. package/out/src/adapters/ban/adapter.d.ts +32 -0
  6. package/out/src/adapters/ban/adapter.d.ts.map +1 -0
  7. package/out/src/adapters/ban/adapter.js +133 -0
  8. package/out/src/adapters/ban/adapter.js.map +1 -0
  9. package/out/src/adapters/fcc-bdc/adapter.d.ts +61 -0
  10. package/out/src/adapters/fcc-bdc/adapter.d.ts.map +1 -0
  11. package/out/src/adapters/fcc-bdc/adapter.js +153 -0
  12. package/out/src/adapters/fcc-bdc/adapter.js.map +1 -0
  13. package/out/src/adapters/index.d.ts +42 -0
  14. package/out/src/adapters/index.d.ts.map +1 -0
  15. package/out/src/adapters/index.js +76 -0
  16. package/out/src/adapters/index.js.map +1 -0
  17. package/out/src/adapters/openaddresses/adapter.d.ts +60 -0
  18. package/out/src/adapters/openaddresses/adapter.d.ts.map +1 -0
  19. package/out/src/adapters/openaddresses/adapter.js +174 -0
  20. package/out/src/adapters/openaddresses/adapter.js.map +1 -0
  21. package/out/src/adapters/state-ia-contractors/adapter.d.ts +23 -0
  22. package/out/src/adapters/state-ia-contractors/adapter.d.ts.map +1 -0
  23. package/out/src/adapters/state-ia-contractors/adapter.js +113 -0
  24. package/out/src/adapters/state-ia-contractors/adapter.js.map +1 -0
  25. package/out/src/adapters/state-ny-notaries/adapter.d.ts +21 -0
  26. package/out/src/adapters/state-ny-notaries/adapter.d.ts.map +1 -0
  27. package/out/src/adapters/state-ny-notaries/adapter.js +132 -0
  28. package/out/src/adapters/state-ny-notaries/adapter.js.map +1 -0
  29. package/out/src/adapters/state-tx-notaries/adapter.d.ts +22 -0
  30. package/out/src/adapters/state-tx-notaries/adapter.d.ts.map +1 -0
  31. package/out/src/adapters/state-tx-notaries/adapter.js +125 -0
  32. package/out/src/adapters/state-tx-notaries/adapter.js.map +1 -0
  33. package/out/src/adapters/tiger/adapter.d.ts +45 -0
  34. package/out/src/adapters/tiger/adapter.d.ts.map +1 -0
  35. package/out/src/adapters/tiger/adapter.js +179 -0
  36. package/out/src/adapters/tiger/adapter.js.map +1 -0
  37. package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts +36 -0
  38. package/out/src/adapters/usgov-hrsa-fqhc/adapter.d.ts.map +1 -0
  39. package/out/src/adapters/usgov-hrsa-fqhc/adapter.js +147 -0
  40. package/out/src/adapters/usgov-hrsa-fqhc/adapter.js.map +1 -0
  41. package/out/src/adapters/usgov-imls-pls/adapter.d.ts +25 -0
  42. package/out/src/adapters/usgov-imls-pls/adapter.d.ts.map +1 -0
  43. package/out/src/adapters/usgov-imls-pls/adapter.js +118 -0
  44. package/out/src/adapters/usgov-imls-pls/adapter.js.map +1 -0
  45. package/out/src/adapters/usgov-nad/adapter.d.ts +37 -0
  46. package/out/src/adapters/usgov-nad/adapter.d.ts.map +1 -0
  47. package/out/src/adapters/usgov-nad/adapter.js +227 -0
  48. package/out/src/adapters/usgov-nad/adapter.js.map +1 -0
  49. package/out/src/adapters/usgov-nppes/adapter.d.ts +28 -0
  50. package/out/src/adapters/usgov-nppes/adapter.d.ts.map +1 -0
  51. package/out/src/adapters/usgov-nppes/adapter.js +123 -0
  52. package/out/src/adapters/usgov-nppes/adapter.js.map +1 -0
  53. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts +35 -0
  54. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.d.ts.map +1 -0
  55. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js +162 -0
  56. package/out/src/adapters/usgov-samhsa-treatment-locator/adapter.js.map +1 -0
  57. package/out/src/adapters/wof-admin-json/adapter.d.ts +85 -0
  58. package/out/src/adapters/wof-admin-json/adapter.d.ts.map +1 -0
  59. package/out/src/adapters/wof-admin-json/adapter.js +241 -0
  60. package/out/src/adapters/wof-admin-json/adapter.js.map +1 -0
  61. package/out/src/adapters/wof-postalcode-json/adapter.d.ts +63 -0
  62. package/out/src/adapters/wof-postalcode-json/adapter.d.ts.map +1 -0
  63. package/out/src/adapters/wof-postalcode-json/adapter.js +178 -0
  64. package/out/src/adapters/wof-postalcode-json/adapter.js.map +1 -0
  65. package/out/src/align.d.ts +58 -0
  66. package/out/src/align.d.ts.map +1 -0
  67. package/out/src/align.js +139 -0
  68. package/out/src/align.js.map +1 -0
  69. package/out/src/build.d.ts +104 -0
  70. package/out/src/build.d.ts.map +1 -0
  71. package/out/src/build.js +201 -0
  72. package/out/src/build.js.map +1 -0
  73. package/out/src/codex/us-fips-state.d.ts +44 -0
  74. package/out/src/codex/us-fips-state.d.ts.map +1 -0
  75. package/out/src/codex/us-fips-state.js +105 -0
  76. package/out/src/codex/us-fips-state.js.map +1 -0
  77. package/out/src/codex/us-street-suffix.d.ts +259 -0
  78. package/out/src/codex/us-street-suffix.d.ts.map +1 -0
  79. package/out/src/codex/us-street-suffix.js +285 -0
  80. package/out/src/codex/us-street-suffix.js.map +1 -0
  81. package/out/src/format.d.ts +79 -0
  82. package/out/src/format.d.ts.map +1 -0
  83. package/out/src/format.js +151 -0
  84. package/out/src/format.js.map +1 -0
  85. package/out/src/golden.d.ts +50 -0
  86. package/out/src/golden.d.ts.map +1 -0
  87. package/out/src/golden.js +104 -0
  88. package/out/src/golden.js.map +1 -0
  89. package/out/src/index.d.ts +18 -0
  90. package/out/src/index.d.ts.map +1 -0
  91. package/out/src/index.js +18 -0
  92. package/out/src/index.js.map +1 -0
  93. package/out/src/parquet-wrapper/index.d.ts +12 -0
  94. package/out/src/parquet-wrapper/index.d.ts.map +1 -0
  95. package/out/src/parquet-wrapper/index.js +12 -0
  96. package/out/src/parquet-wrapper/index.js.map +1 -0
  97. package/out/src/parquet-wrapper/reader.d.ts +31 -0
  98. package/out/src/parquet-wrapper/reader.d.ts.map +1 -0
  99. package/out/src/parquet-wrapper/reader.js +54 -0
  100. package/out/src/parquet-wrapper/reader.js.map +1 -0
  101. package/out/src/parquet-wrapper/schema.d.ts +45 -0
  102. package/out/src/parquet-wrapper/schema.d.ts.map +1 -0
  103. package/out/src/parquet-wrapper/schema.js +55 -0
  104. package/out/src/parquet-wrapper/schema.js.map +1 -0
  105. package/out/src/parquet-wrapper/writer.d.ts +41 -0
  106. package/out/src/parquet-wrapper/writer.d.ts.map +1 -0
  107. package/out/src/parquet-wrapper/writer.js +71 -0
  108. package/out/src/parquet-wrapper/writer.js.map +1 -0
  109. package/out/src/parquet.d.ts +122 -0
  110. package/out/src/parquet.d.ts.map +1 -0
  111. package/out/src/parquet.js +220 -0
  112. package/out/src/parquet.js.map +1 -0
  113. package/out/src/runner.d.ts +100 -0
  114. package/out/src/runner.d.ts.map +1 -0
  115. package/out/src/runner.js +183 -0
  116. package/out/src/runner.js.map +1 -0
  117. package/out/src/split.d.ts +108 -0
  118. package/out/src/split.d.ts.map +1 -0
  119. package/out/src/split.js +191 -0
  120. package/out/src/split.js.map +1 -0
  121. package/out/src/synthesize.d.ts +146 -0
  122. package/out/src/synthesize.d.ts.map +1 -0
  123. package/out/src/synthesize.js +472 -0
  124. package/out/src/synthesize.js.map +1 -0
  125. package/out/src/tokenize.d.ts +47 -0
  126. package/out/src/tokenize.d.ts.map +1 -0
  127. package/out/src/tokenize.js +49 -0
  128. package/out/src/tokenize.js.map +1 -0
  129. package/out/src/types.d.ts +168 -0
  130. package/out/src/types.d.ts.map +1 -0
  131. package/out/src/types.js +19 -0
  132. package/out/src/types.js.map +1 -0
  133. package/out/src/wof-json.d.ts +105 -0
  134. package/out/src/wof-json.d.ts.map +1 -0
  135. package/out/src/wof-json.js +174 -0
  136. package/out/src/wof-json.js.map +1 -0
  137. package/package.json +36 -0
@@ -0,0 +1,123 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-nppes`: CMS National Plan and Provider Enumeration System (NPI registry) CSV consumer.
7
+ *
8
+ * NPPES is the authoritative US healthcare provider registry, published monthly by CMS. Each row
9
+ * carries a provider's business practice location address together with their legal business name
10
+ * or individual name. At ~7M rows it is the single largest venue+address signal source
11
+ * available.
12
+ *
13
+ * The adapter consumes the monthly full-replacement CSV (operator pre-downloads via
14
+ * `fetch-nppes.sh`). Column names match the canonical NPPES "Full Replacement Monthly NPI File"
15
+ * header published at `https://download.cms.gov/nppes/NPI_Files.html`.
16
+ *
17
+ * Output: one row per CSV record where the practice location address is populated. Organization
18
+ * rows carry `venue` from the legal business name; individual rows compose `attention` from
19
+ * last+first name. Address quad goes on `(house_number, street, locality, region, postcode)`.
20
+ *
21
+ * License: stamped `"Public Domain"` per CMS's federal government distribution terms.
22
+ */
23
+ import { parse as csvParse } from "csv-parse";
24
+ import { createReadStream } from "node:fs";
25
+ import { stableSourceId } from "../../adapter.js";
26
+ import { lookupStateAbbreviation } from "../../codex/us-fips-state.js";
27
+ import { reconcileComponents } from "../../format.js";
28
+ export const USGOV_NPPES_ADAPTER_ID = "usgov-nppes";
29
+ export const USGOV_NPPES_DEFAULT_LICENSE = "Public Domain";
30
+ const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
31
+ function splitAddress(address) {
32
+ const trimmed = address.trim();
33
+ if (!trimmed)
34
+ return null;
35
+ const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
36
+ if (m)
37
+ return { house_number: m[1], street: m[2].trim() };
38
+ return { street: trimmed };
39
+ }
40
+ function composeRaw(venue, house, street, city, state, postcode) {
41
+ const streetPart = [house, street].filter(Boolean).join(" ").trim();
42
+ const cityPart = [city.trim(), [state, postcode].filter(Boolean).join(" ").trim()].filter(Boolean).join(", ");
43
+ return [venue, streetPart, cityPart].filter(Boolean).join(", ");
44
+ }
45
+ export function createUsgovNppesAdapter() {
46
+ return {
47
+ id: USGOV_NPPES_ADAPTER_ID,
48
+ defaultLicense: USGOV_NPPES_DEFAULT_LICENSE,
49
+ description: "CMS National Plan and Provider Enumeration System — 7M provider practice locations (public-domain). Venue+address co-occurrence at scale.",
50
+ async *rows(opts) {
51
+ if (opts.country && opts.country !== "US") {
52
+ throw new Error(`usgov-nppes adapter: only US supported, got country=${opts.country}`);
53
+ }
54
+ const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
55
+ const parser = stream.pipe(csvParse({
56
+ columns: true,
57
+ skip_empty_lines: true,
58
+ relax_quotes: true,
59
+ relax_column_count: true,
60
+ }));
61
+ let emitted = 0;
62
+ try {
63
+ for await (const record of parser) {
64
+ if (opts.signal?.aborted)
65
+ break;
66
+ if (opts.limit !== undefined && emitted >= opts.limit)
67
+ break;
68
+ const npi = (record.NPI ?? "").trim();
69
+ const entityType = (record["Entity Type Code"] ?? "").trim();
70
+ const orgName = (record["Provider Organization Name (Legal Business Name)"] ?? "").trim();
71
+ const lastName = (record["Provider Last Name (Legal Name)"] ?? "").trim();
72
+ const firstName = (record["Provider First Name"] ?? "").trim();
73
+ const address1 = (record["Provider First Line Business Practice Location Address"] ?? "").trim();
74
+ const address2 = (record["Provider Second Line Business Practice Location Address"] ?? "").trim();
75
+ const city = (record["Provider Business Practice Location Address City Name"] ?? "").trim();
76
+ const stateRaw = (record["Provider Business Practice Location Address State Name"] ?? "").trim();
77
+ const postcode = (record["Provider Business Practice Location Address Postal Code"] ?? "").trim();
78
+ if (!city || !postcode)
79
+ continue;
80
+ const state = lookupStateAbbreviation(stateRaw);
81
+ if (!state)
82
+ continue;
83
+ const fullStreet = [address1, address2].filter(Boolean).join(" ");
84
+ const split = splitAddress(fullStreet);
85
+ if (!split)
86
+ continue;
87
+ const venue = orgName || [firstName, lastName].filter(Boolean).join(" ") || undefined;
88
+ const components = {
89
+ ...(venue ? { venue } : {}),
90
+ ...(split.house_number ? { house_number: split.house_number } : {}),
91
+ street: split.street,
92
+ locality: city,
93
+ region: state.abbreviation,
94
+ postcode,
95
+ };
96
+ const raw = composeRaw(venue, split.house_number, split.street, city, state.abbreviation, postcode);
97
+ if (!raw)
98
+ continue;
99
+ const aligned = reconcileComponents(components, raw);
100
+ if (Object.keys(aligned).length <= 2)
101
+ continue;
102
+ const sourceId = npi ? `${USGOV_NPPES_ADAPTER_ID}-${npi}` : stableSourceId(USGOV_NPPES_ADAPTER_ID, aligned);
103
+ yield {
104
+ raw,
105
+ components: aligned,
106
+ country: "US",
107
+ locale: "en-US",
108
+ source: USGOV_NPPES_ADAPTER_ID,
109
+ source_id: sourceId,
110
+ corpus_version: "",
111
+ license: USGOV_NPPES_DEFAULT_LICENSE,
112
+ };
113
+ emitted++;
114
+ }
115
+ }
116
+ finally {
117
+ stream.destroy();
118
+ }
119
+ },
120
+ };
121
+ }
122
+ export const usgovNppesAdapter = createUsgovNppesAdapter();
123
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-nppes/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,sBAAsB,GAAG,aAAa,CAAA;AACnD,MAAM,CAAC,MAAM,2BAA2B,GAAG,eAAe,CAAA;AAE1D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAe9D,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED,SAAS,UAAU,CAClB,KAAyB,EACzB,KAAyB,EACzB,MAAc,EACd,IAAY,EACZ,KAAa,EACb,QAAgB;IAEhB,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7G,OAAO,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AAChE,CAAC;AAED,MAAM,UAAU,uBAAuB;IACtC,OAAO;QACN,EAAE,EAAE,sBAAsB;QAC1B,cAAc,EAAE,2BAA2B;QAC3C,WAAW,EACV,2IAA2I;QAE5I,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,uDAAuD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACvF,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAiC,EAAE,CAAC;oBAC9D,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACrC,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC5D,MAAM,OAAO,GAAG,CAAC,MAAM,CAAC,kDAAkD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzF,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,iCAAiC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACzE,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,qBAAqB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAE9D,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,wDAAwD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAChG,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,yDAAyD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACjG,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,uDAAuD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC3F,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,wDAAwD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAChG,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,yDAAyD,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAEjG,IAAI,CAAC,IAAI,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBAEhC,MAAM,KAAK,GAAG,uBAAuB,CAAC,QAAQ,CAAC,CAAA;oBAC/C,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,UAAU,GAAG,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBACjE,MAAM,KAAK,GAAG,YAAY,CAAC,UAAU,CAAC,CAAA;oBACtC,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,MAAM,KAAK,GAAG,OAAO,IAAI,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,CAAA;oBAErF,MAAM,UAAU,GAA+B;wBAC9C,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC3B,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,QAAQ;qBACR,CAAA;oBAED,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAA;oBACnG,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC;wBAAE,SAAQ;oBAE9C,MAAM,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,sBAAsB,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,cAAc,CAAC,sBAAsB,EAAE,OAAO,CAAC,CAAA;oBAE3G,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,sBAAsB;wBAC9B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,2BAA2B;qBACpC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,uBAAuB,EAAE,CAAA"}
@@ -0,0 +1,35 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-samhsa-treatment-locator`: SAMHSA Behavioral Health Treatment Services Locator CSV
7
+ * consumer.
8
+ *
9
+ * SAMHSA's Treatment Locator (`findtreatment.gov`) is the federal directory of substance-use and
10
+ * mental-health treatment facilities. The published CSV carries the facility name, an optional
11
+ * secondary name (typically the organizational parent), and the postal address quad split into
12
+ * primary + secondary street lines. Phase 1.6 §1.2 (#22) selects this source for the same reason
13
+ * it selects HRSA: facility names are hand-typed venue strings and the addresses pass through
14
+ * enough human + system hands to accumulate the suite-designator + sub-tenant chaos ("Suite C,
15
+ * behind main building") that pure gazetteer data does not.
16
+ *
17
+ * SAMHSA's two-line address shape is the key adapter-specific concern. `street1` typically carries
18
+ * the canonical postal address (`"123 Main St"`); `street2` carries the suite / unit / "second
19
+ * floor" surface form. The adapter joins them with `", "` into a single `street` component (Phase
20
+ * 1 keeps `unit` as a deferred slot since the OpenCage template doesn't have a clean rendering
21
+ * for it). Operators wanting a different join policy can subclass the factory.
22
+ *
23
+ * Column names below match the canonical SAMHSA Behavioral Health Treatment Services Locator CSV
24
+ * export header. Operators substituting a closely-related extract should rename columns to match;
25
+ * the README has the mapping cheatsheet.
26
+ *
27
+ * License: stamped `"Public Domain"` per the SAMHSA Open Data Foundry's federal-government
28
+ * distribution terms.
29
+ */
30
+ import type { CorpusAdapter } from "../../types.js";
31
+ export declare const USGOV_SAMHSA_ADAPTER_ID = "usgov-samhsa-treatment-locator";
32
+ export declare const USGOV_SAMHSA_DEFAULT_LICENSE = "Public Domain";
33
+ export declare function createUsgovSamhsaTreatmentLocatorAdapter(): CorpusAdapter;
34
+ export declare const usgovSamhsaTreatmentLocatorAdapter: CorpusAdapter;
35
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/usgov-samhsa-treatment-locator/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAOH,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AAEjF,eAAO,MAAM,uBAAuB,mCAAmC,CAAA;AACvE,eAAO,MAAM,4BAA4B,kBAAkB,CAAA;AA2E3D,wBAAgB,wCAAwC,IAAI,aAAa,CA6ExE;AAED,eAAO,MAAM,kCAAkC,eAA6C,CAAA"}
@@ -0,0 +1,162 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `usgov-samhsa-treatment-locator`: SAMHSA Behavioral Health Treatment Services Locator CSV
7
+ * consumer.
8
+ *
9
+ * SAMHSA's Treatment Locator (`findtreatment.gov`) is the federal directory of substance-use and
10
+ * mental-health treatment facilities. The published CSV carries the facility name, an optional
11
+ * secondary name (typically the organizational parent), and the postal address quad split into
12
+ * primary + secondary street lines. Phase 1.6 §1.2 (#22) selects this source for the same reason
13
+ * it selects HRSA: facility names are hand-typed venue strings and the addresses pass through
14
+ * enough human + system hands to accumulate the suite-designator + sub-tenant chaos ("Suite C,
15
+ * behind main building") that pure gazetteer data does not.
16
+ *
17
+ * SAMHSA's two-line address shape is the key adapter-specific concern. `street1` typically carries
18
+ * the canonical postal address (`"123 Main St"`); `street2` carries the suite / unit / "second
19
+ * floor" surface form. The adapter joins them with `", "` into a single `street` component (Phase
20
+ * 1 keeps `unit` as a deferred slot since the OpenCage template doesn't have a clean rendering
21
+ * for it). Operators wanting a different join policy can subclass the factory.
22
+ *
23
+ * Column names below match the canonical SAMHSA Behavioral Health Treatment Services Locator CSV
24
+ * export header. Operators substituting a closely-related extract should rename columns to match;
25
+ * the README has the mapping cheatsheet.
26
+ *
27
+ * License: stamped `"Public Domain"` per the SAMHSA Open Data Foundry's federal-government
28
+ * distribution terms.
29
+ */
30
+ import { parse as csvParse } from "csv-parse";
31
+ import { createReadStream } from "node:fs";
32
+ import { stableSourceId } from "../../adapter.js";
33
+ import { lookupStateAbbreviation } from "../../codex/us-fips-state.js";
34
+ import { reconcileComponents } from "../../format.js";
35
+ export const USGOV_SAMHSA_ADAPTER_ID = "usgov-samhsa-treatment-locator";
36
+ export const USGOV_SAMHSA_DEFAULT_LICENSE = "Public Domain";
37
+ const HOUSE_NUMBER_PREFIX = /^(\d+(?:-\d+)?[A-Za-z]?)\s+(.+)$/;
38
+ function splitAddress(address) {
39
+ const trimmed = address.trim();
40
+ if (!trimmed)
41
+ return null;
42
+ const m = HOUSE_NUMBER_PREFIX.exec(trimmed);
43
+ if (m)
44
+ return { house_number: m[1], street: m[2].trim() };
45
+ return { street: trimmed };
46
+ }
47
+ /**
48
+ * Join the SAMHSA two-line street: primary street + optional secondary line (suite / unit / floor /
49
+ * "behind main building") on `", "`. The combined value is the `street` component surface form.
50
+ * Phase 1 does not break this out into the `unit` component — see the file-level comment.
51
+ */
52
+ function joinTwoLineStreet(street1, street2) {
53
+ const s1 = street1.trim();
54
+ const s2 = (street2 ?? "").trim();
55
+ if (!s1 && !s2)
56
+ return "";
57
+ if (!s2)
58
+ return s1;
59
+ if (!s1)
60
+ return s2;
61
+ return `${s1}, ${s2}`;
62
+ }
63
+ /**
64
+ * Combine `name1` + optional `name2` into a single venue surface form. SAMHSA conventions:
65
+ *
66
+ * - `name1` is the program / clinic name ("Mountain Plains Counseling Services").
67
+ * - `name2` is the parent organization ("Catholic Charities of Wyoming"), if any.
68
+ *
69
+ * Both render together as `"<name1> - <name2>"` when both are present — geocoder users typically
70
+ * type either form, so the model benefits from the joined surface.
71
+ */
72
+ function composeVenue(name1, name2) {
73
+ const n1 = name1.trim();
74
+ const n2 = (name2 ?? "").trim();
75
+ if (!n1 && !n2)
76
+ return "";
77
+ if (!n2)
78
+ return n1;
79
+ if (!n1)
80
+ return n2;
81
+ return `${n1} - ${n2}`;
82
+ }
83
+ /** Same envelope-style format as HRSA: venue prefix, street body, city/state/zip suffix. */
84
+ function composeRaw(venue, house, street, city, state, postcode) {
85
+ const streetPart = [house, street].filter(Boolean).join(" ").trim();
86
+ const cityPart = [city.trim(), [state, postcode].filter(Boolean).join(" ").trim()].filter(Boolean).join(", ");
87
+ return [venue.trim(), streetPart, cityPart].filter(Boolean).join(", ");
88
+ }
89
+ export function createUsgovSamhsaTreatmentLocatorAdapter() {
90
+ return {
91
+ id: USGOV_SAMHSA_ADAPTER_ID,
92
+ defaultLicense: USGOV_SAMHSA_DEFAULT_LICENSE,
93
+ description: "SAMHSA Behavioral Health Treatment Services Locator (public-domain). Adversarial source: venue + two-line address co-occurrence, hand-entered.",
94
+ async *rows(opts) {
95
+ if (opts.country && opts.country !== "US") {
96
+ throw new Error(`usgov-samhsa adapter: only US supported, got country=${opts.country}`);
97
+ }
98
+ const stream = createReadStream(opts.inputPath, { encoding: "utf8" });
99
+ const parser = stream.pipe(csvParse({
100
+ columns: true,
101
+ skip_empty_lines: true,
102
+ relax_quotes: true,
103
+ relax_column_count: true,
104
+ }));
105
+ let emitted = 0;
106
+ try {
107
+ for await (const record of parser) {
108
+ if (opts.signal?.aborted)
109
+ break;
110
+ if (opts.limit !== undefined && emitted >= opts.limit)
111
+ break;
112
+ const venue = composeVenue(record.name1 ?? "", record.name2);
113
+ const street = joinTwoLineStreet(record.street1 ?? "", record.street2);
114
+ const split = splitAddress(street);
115
+ const city = (record.city ?? "").trim();
116
+ const stateAbbr = (record.state ?? "").trim();
117
+ const postcode = (record.zip ?? "").trim();
118
+ if (!venue || !split || !city || !postcode)
119
+ continue;
120
+ const state = lookupStateAbbreviation(stateAbbr);
121
+ if (!state)
122
+ continue;
123
+ // venue first — same kryptonite-defending insertion order as HRSA.
124
+ const components = {
125
+ venue,
126
+ ...(split.house_number ? { house_number: split.house_number } : {}),
127
+ street: split.street,
128
+ locality: city,
129
+ region: state.abbreviation,
130
+ postcode,
131
+ };
132
+ const raw = composeRaw(venue, split.house_number, split.street, city, state.abbreviation, postcode);
133
+ if (!raw)
134
+ continue;
135
+ const aligned = reconcileComponents(components, raw);
136
+ if (Object.keys(aligned).length === 0)
137
+ continue;
138
+ const frId = (record.frid ?? "").trim();
139
+ const sourceId = frId
140
+ ? `${USGOV_SAMHSA_ADAPTER_ID}-${frId}`
141
+ : stableSourceId(USGOV_SAMHSA_ADAPTER_ID, aligned);
142
+ yield {
143
+ raw,
144
+ components: aligned,
145
+ country: "US",
146
+ locale: "en-US",
147
+ source: USGOV_SAMHSA_ADAPTER_ID,
148
+ source_id: sourceId,
149
+ corpus_version: "",
150
+ license: USGOV_SAMHSA_DEFAULT_LICENSE,
151
+ };
152
+ emitted++;
153
+ }
154
+ }
155
+ finally {
156
+ stream.destroy();
157
+ }
158
+ },
159
+ };
160
+ }
161
+ export const usgovSamhsaTreatmentLocatorAdapter = createUsgovSamhsaTreatmentLocatorAdapter();
162
+ //# sourceMappingURL=adapter.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.js","sourceRoot":"","sources":["../../../../src/adapters/usgov-samhsa-treatment-locator/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,WAAW,CAAA;AAC7C,OAAO,EAAE,gBAAgB,EAAE,MAAM,SAAS,CAAA;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAA;AACtE,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAA;AAGrD,MAAM,CAAC,MAAM,uBAAuB,GAAG,gCAAgC,CAAA;AACvE,MAAM,CAAC,MAAM,4BAA4B,GAAG,eAAe,CAAA;AAmB3D,MAAM,mBAAmB,GAAG,kCAAkC,CAAA;AAE9D,SAAS,YAAY,CAAC,OAAe;IACpC,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,MAAM,CAAC,GAAG,mBAAmB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC3C,IAAI,CAAC;QAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,EAAE,CAAA;IAC1D,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,CAAA;AAC3B,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,OAAe,EAAE,OAA2B;IACtE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IACzB,MAAM,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IACjC,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IACzB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IAClB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IAClB,OAAO,GAAG,EAAE,KAAK,EAAE,EAAE,CAAA;AACtB,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,YAAY,CAAC,KAAa,EAAE,KAAyB;IAC7D,MAAM,EAAE,GAAG,KAAK,CAAC,IAAI,EAAE,CAAA;IACvB,MAAM,EAAE,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAC/B,IAAI,CAAC,EAAE,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IACzB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IAClB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,CAAA;IAClB,OAAO,GAAG,EAAE,MAAM,EAAE,EAAE,CAAA;AACvB,CAAC;AAED,4FAA4F;AAC5F,SAAS,UAAU,CAClB,KAAa,EACb,KAAyB,EACzB,MAAc,EACd,IAAY,EACZ,KAAa,EACb,QAAgB;IAEhB,MAAM,UAAU,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,MAAM,QAAQ,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC7G,OAAO,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACvE,CAAC;AAED,MAAM,UAAU,wCAAwC;IACvD,OAAO;QACN,EAAE,EAAE,uBAAuB;QAC3B,cAAc,EAAE,4BAA4B;QAC5C,WAAW,EACV,gJAAgJ;QAEjJ,KAAK,CAAC,CAAC,IAAI,CAAC,IAAoB;YAC/B,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,EAAE,CAAC;gBAC3C,MAAM,IAAI,KAAK,CAAC,wDAAwD,IAAI,CAAC,OAAO,EAAE,CAAC,CAAA;YACxF,CAAC;YAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAA;YACrE,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CACzB,QAAQ,CAAC;gBACR,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,IAAI;gBACtB,YAAY,EAAE,IAAI;gBAClB,kBAAkB,EAAE,IAAI;aACxB,CAAC,CACF,CAAA;YAED,IAAI,OAAO,GAAG,CAAC,CAAA;YACf,IAAI,CAAC;gBACJ,IAAI,KAAK,EAAE,MAAM,MAAM,IAAI,MAAsC,EAAE,CAAC;oBACnE,IAAI,IAAI,CAAC,MAAM,EAAE,OAAO;wBAAE,MAAK;oBAC/B,IAAI,IAAI,CAAC,KAAK,KAAK,SAAS,IAAI,OAAO,IAAI,IAAI,CAAC,KAAK;wBAAE,MAAK;oBAE5D,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,EAAE,MAAM,CAAC,KAAK,CAAC,CAAA;oBAC5D,MAAM,MAAM,GAAG,iBAAiB,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,EAAE,MAAM,CAAC,OAAO,CAAC,CAAA;oBACtE,MAAM,KAAK,GAAG,YAAY,CAAC,MAAM,CAAC,CAAA;oBAClC,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAC7C,MAAM,QAAQ,GAAG,CAAC,MAAM,CAAC,GAAG,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBAE1C,IAAI,CAAC,KAAK,IAAI,CAAC,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC,QAAQ;wBAAE,SAAQ;oBACpD,MAAM,KAAK,GAAG,uBAAuB,CAAC,SAAS,CAAC,CAAA;oBAChD,IAAI,CAAC,KAAK;wBAAE,SAAQ;oBAEpB,mEAAmE;oBACnE,MAAM,UAAU,GAA+B;wBAC9C,KAAK;wBACL,GAAG,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;wBACnE,MAAM,EAAE,KAAK,CAAC,MAAM;wBACpB,QAAQ,EAAE,IAAI;wBACd,MAAM,EAAE,KAAK,CAAC,YAAY;wBAC1B,QAAQ;qBACR,CAAA;oBAED,MAAM,GAAG,GAAG,UAAU,CAAC,KAAK,EAAE,KAAK,CAAC,YAAY,EAAE,KAAK,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAA;oBACnG,IAAI,CAAC,GAAG;wBAAE,SAAQ;oBAElB,MAAM,OAAO,GAAG,mBAAmB,CAAC,UAAU,EAAE,GAAG,CAAC,CAAA;oBACpD,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,KAAK,CAAC;wBAAE,SAAQ;oBAE/C,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;oBACvC,MAAM,QAAQ,GAAG,IAAI;wBACpB,CAAC,CAAC,GAAG,uBAAuB,IAAI,IAAI,EAAE;wBACtC,CAAC,CAAC,cAAc,CAAC,uBAAuB,EAAE,OAAO,CAAC,CAAA;oBAEnD,MAAM;wBACL,GAAG;wBACH,UAAU,EAAE,OAAO;wBACnB,OAAO,EAAE,IAAI;wBACb,MAAM,EAAE,OAAO;wBACf,MAAM,EAAE,uBAAuB;wBAC/B,SAAS,EAAE,QAAQ;wBACnB,cAAc,EAAE,EAAE;wBAClB,OAAO,EAAE,4BAA4B;qBACrC,CAAA;oBACD,OAAO,EAAE,CAAA;gBACV,CAAC;YACF,CAAC;oBAAS,CAAC;gBACV,MAAM,CAAC,OAAO,EAAE,CAAA;YACjB,CAAC;QACF,CAAC;KACD,CAAA;AACF,CAAC;AAED,MAAM,CAAC,MAAM,kCAAkC,GAAG,wCAAwC,EAAE,CAAA"}
@@ -0,0 +1,85 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `wof-admin`: Who's On First admin GeoJSON-bundle adapter.
7
+ *
8
+ * **Phase 1.5.1 pivot.** The original Phase 1.5 SQLite adapter (formerly at
9
+ * `packages/corpus/src/adapters/wof-admin/`, removed in this same change) was replaced by this
10
+ * one because the SQLite distribution path was unworkable for the real corpus build:
11
+ *
12
+ * 1. `dist.whosonfirst.org/sqlite/` is dead (NXDOMAIN); the Geocode-Earth mirror is the only one.
13
+ * 2. The Geocode-Earth-hosted postalcode DB tags every row `mz:is_current = -1` ("unknown but treated
14
+ * as active"); the SQLite adapter's `is_current = 1` predicate emitted zero rows.
15
+ * 3. The `names` table in the SQLite distribution is empty — localized `name:*` variants live in a
16
+ * separate distribution. The St. Petersburg / Mt. Vernon / Ft. Lauderdale alternation cases
17
+ * (the original Phase 1.5.1 motivator) cannot be solved on the SQLite path even with a
18
+ * patched `is_current` predicate.
19
+ *
20
+ * Input: a directory containing one or more cloned `whosonfirst-data-admin-<cc>` GitHub repos. Each
21
+ * repo has `data/XXX/YYY/ZZZ/<wof-id>.geojson` files; `**\/*.geojson` walks the tree recursively.
22
+ * Alternate-geometry siblings (`-alt-*`) are skipped — they're separate exports of the same
23
+ * record, not new records.
24
+ *
25
+ * Per record, the adapter emits one row per `(name-variant, hierarchy-variant)` pair:
26
+ *
27
+ * - **Name variants**: the canonical `wof:name` (slot key `default`) plus every `name:*` localized
28
+ * variant present on the feature (`name:eng_x_preferred`, `name:eng_x_colloquial`,
29
+ * `name:rus_x_preferred`, ...). This is the Phase 1.5.1 fix for the St. Petersburg case:
30
+ * `"Saint Petersburg"` (canonical) and `"St. Petersburg"` (eng_x_colloquial) both become
31
+ * training rows for the same WOF id.
32
+ * - **Hierarchy variants** (unchanged from the SQLite adapter): locality → 3 variants, region → 2,
33
+ * country → 1, county → 1.
34
+ *
35
+ * `source_id` is `wof-admin-<wof_id>-<name-slot>-<hierarchy-variant>`. The previous SQLite adapter
36
+ * used `wof-admin-<wof_id>-<hierarchy-variant>` (no name slot); the new format adds a name-slot
37
+ * segment so the colloquial / preferred / per-locale variants survive dedup independently.
38
+ *
39
+ * License: CC0. The adapter stamps every row with `CC0-1.0`.
40
+ */
41
+ import type { ComponentTag } from "@mailwoman/core/types";
42
+ import type { CorpusAdapter } from "../../types.js";
43
+ import { type WofRecord } from "../../wof-json.js";
44
+ interface VariantSpec {
45
+ /** Hierarchy-variant id appended to `source_id`. */
46
+ suffix: string;
47
+ /** Component tag → display string the adapter will hand to the runner. */
48
+ components: Partial<Record<ComponentTag, string>>;
49
+ }
50
+ /**
51
+ * Compute the hierarchy variants for a record given its ancestry chain and the chosen `selfName`.
52
+ *
53
+ * `selfName` is the surface form to use for the record's own component (locality / region / country
54
+ * / subregion). Callers pass the canonical `wof:name` for the `"default"` slot and a `name:*`
55
+ * localized value for variant slots; ancestor names always come from the ancestor's canonical
56
+ * `wof:name`.
57
+ *
58
+ * Country variants substitute `COUNTRY_DISPLAY_NAME` for the default slot so the OpenCage template
59
+ * produces the canonicalized form (`"United States of America"`), matching the legacy SQLite
60
+ * adapter's behavior.
61
+ */
62
+ export declare function variantsFor(row: WofRecord, ancestry: WofRecord[], selfName: string): VariantSpec[];
63
+ /**
64
+ * Build the per-record name-slot list. The canonical `"default"` slot uses the OpenCage-canonical
65
+ * country form when the record is itself a country (matches SQLite-adapter behavior); every other
66
+ * placetype's default slot uses `wof:name` verbatim.
67
+ *
68
+ * Subsequent slots come from `name:*` variants, deduplicated against the default name so we don't
69
+ * emit a redundant `"default"`-equivalent row under a localized key.
70
+ */
71
+ export declare function nameSlotsFor(rec: WofRecord): Array<{
72
+ key: string;
73
+ value: string;
74
+ }>;
75
+ export declare const WOF_ADMIN_ADAPTER_ID = "wof-admin";
76
+ /**
77
+ * Construct the wof-admin JSON-bundle adapter. The adapter is stateless across runs; calling this
78
+ * twice with the same input directory produces byte-identical `canonical.jsonl` (records are
79
+ * emitted in sorted `wof:id` order to be insensitive to filesystem walk ordering).
80
+ */
81
+ export declare function createWofAdminAdapter(): CorpusAdapter;
82
+ /** Single shared instance, suitable for `defaultAdapterRegistry`. */
83
+ export declare const wofAdminAdapter: CorpusAdapter;
84
+ export {};
85
+ //# sourceMappingURL=adapter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"adapter.d.ts","sourceRoot":"","sources":["../../../../src/adapters/wof-admin-json/adapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEzD,OAAO,KAAK,EAAgC,aAAa,EAAE,MAAM,gBAAgB,CAAA;AACjF,OAAO,EAAsD,KAAK,SAAS,EAAE,MAAM,mBAAmB,CAAA;AAiDtG,UAAU,WAAW;IACpB,oDAAoD;IACpD,MAAM,EAAE,MAAM,CAAA;IAEd,0EAA0E;IAC1E,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAA;CACjD;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,EAAE,QAAQ,EAAE,MAAM,GAAG,WAAW,EAAE,CA0DlG;AAED;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAAC,GAAG,EAAE,SAAS,GAAG,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAclF;AAED,eAAO,MAAM,oBAAoB,cAAc,CAAA;AAE/C;;;;GAIG;AACH,wBAAgB,qBAAqB,IAAI,aAAa,CA2DrD;AAED,qEAAqE;AACrE,eAAO,MAAM,eAAe,eAA0B,CAAA"}