@mailwoman/resolver-wof-sqlite 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/README.md +250 -0
  2. package/out/address-point-interpolation.d.ts +48 -0
  3. package/out/address-point-interpolation.d.ts.map +1 -0
  4. package/out/address-point-interpolation.js +164 -0
  5. package/out/address-point-interpolation.js.map +1 -0
  6. package/out/address-point-schema.d.ts +58 -0
  7. package/out/address-point-schema.d.ts.map +1 -0
  8. package/out/address-point-schema.js +67 -0
  9. package/out/address-point-schema.js.map +1 -0
  10. package/out/address-point.d.ts +29 -0
  11. package/out/address-point.d.ts.map +1 -0
  12. package/out/address-point.js +62 -0
  13. package/out/address-point.js.map +1 -0
  14. package/out/ancestry.d.ts +40 -0
  15. package/out/ancestry.d.ts.map +1 -0
  16. package/out/ancestry.js +53 -0
  17. package/out/ancestry.js.map +1 -0
  18. package/out/build-candidate-cli.d.ts +16 -0
  19. package/out/build-candidate-cli.d.ts.map +1 -0
  20. package/out/build-candidate-cli.js +80 -0
  21. package/out/build-candidate-cli.js.map +1 -0
  22. package/out/build-candidate.d.ts +54 -0
  23. package/out/build-candidate.d.ts.map +1 -0
  24. package/out/build-candidate.js +230 -0
  25. package/out/build-candidate.js.map +1 -0
  26. package/out/build-coincident-roles-cli.d.ts +16 -0
  27. package/out/build-coincident-roles-cli.d.ts.map +1 -0
  28. package/out/build-coincident-roles-cli.js +94 -0
  29. package/out/build-coincident-roles-cli.js.map +1 -0
  30. package/out/build-fts-cli.d.ts +23 -0
  31. package/out/build-fts-cli.d.ts.map +1 -0
  32. package/out/build-fts-cli.js +117 -0
  33. package/out/build-fts-cli.js.map +1 -0
  34. package/out/build-slim-cli.d.ts +14 -0
  35. package/out/build-slim-cli.d.ts.map +1 -0
  36. package/out/build-slim-cli.js +130 -0
  37. package/out/build-slim-cli.js.map +1 -0
  38. package/out/build-slim.d.ts +71 -0
  39. package/out/build-slim.d.ts.map +1 -0
  40. package/out/build-slim.js +267 -0
  41. package/out/build-slim.js.map +1 -0
  42. package/out/candidate-lookup.d.ts +43 -0
  43. package/out/candidate-lookup.d.ts.map +1 -0
  44. package/out/candidate-lookup.js +191 -0
  45. package/out/candidate-lookup.js.map +1 -0
  46. package/out/candidate-schema.d.ts +86 -0
  47. package/out/candidate-schema.d.ts.map +1 -0
  48. package/out/candidate-schema.js +109 -0
  49. package/out/candidate-schema.js.map +1 -0
  50. package/out/coincident-roles.d.ts +86 -0
  51. package/out/coincident-roles.d.ts.map +1 -0
  52. package/out/coincident-roles.js +160 -0
  53. package/out/coincident-roles.js.map +1 -0
  54. package/out/convention.d.ts +109 -0
  55. package/out/convention.d.ts.map +1 -0
  56. package/out/convention.js +94 -0
  57. package/out/convention.js.map +1 -0
  58. package/out/fst-autocomplete.d.ts +49 -0
  59. package/out/fst-autocomplete.d.ts.map +1 -0
  60. package/out/fst-autocomplete.js +124 -0
  61. package/out/fst-autocomplete.js.map +1 -0
  62. package/out/fst-builder.d.ts +20 -0
  63. package/out/fst-builder.d.ts.map +1 -0
  64. package/out/fst-builder.js +219 -0
  65. package/out/fst-builder.js.map +1 -0
  66. package/out/fst-deserialize-web.d.ts +16 -0
  67. package/out/fst-deserialize-web.d.ts.map +1 -0
  68. package/out/fst-deserialize-web.js +133 -0
  69. package/out/fst-deserialize-web.js.map +1 -0
  70. package/out/fst-matcher.d.ts +33 -0
  71. package/out/fst-matcher.d.ts.map +1 -0
  72. package/out/fst-matcher.js +117 -0
  73. package/out/fst-matcher.js.map +1 -0
  74. package/out/fst-serialize.d.ts +30 -0
  75. package/out/fst-serialize.d.ts.map +1 -0
  76. package/out/fst-serialize.js +261 -0
  77. package/out/fst-serialize.js.map +1 -0
  78. package/out/fst-types.d.ts +60 -0
  79. package/out/fst-types.d.ts.map +1 -0
  80. package/out/fst-types.js +11 -0
  81. package/out/fst-types.js.map +1 -0
  82. package/out/fts.d.ts +158 -0
  83. package/out/fts.d.ts.map +1 -0
  84. package/out/fts.js +261 -0
  85. package/out/fts.js.map +1 -0
  86. package/out/geo.d.ts +74 -0
  87. package/out/geo.d.ts.map +1 -0
  88. package/out/geo.js +88 -0
  89. package/out/geo.js.map +1 -0
  90. package/out/index.d.ts +27 -0
  91. package/out/index.d.ts.map +1 -0
  92. package/out/index.js +22 -0
  93. package/out/index.js.map +1 -0
  94. package/out/interpolation.d.ts +84 -0
  95. package/out/interpolation.d.ts.map +1 -0
  96. package/out/interpolation.js +150 -0
  97. package/out/interpolation.js.map +1 -0
  98. package/out/lookup.d.ts +156 -0
  99. package/out/lookup.d.ts.map +1 -0
  100. package/out/lookup.js +876 -0
  101. package/out/lookup.js.map +1 -0
  102. package/out/postal-city-alias-lookup.d.ts +50 -0
  103. package/out/postal-city-alias-lookup.d.ts.map +1 -0
  104. package/out/postal-city-alias-lookup.js +66 -0
  105. package/out/postal-city-alias-lookup.js.map +1 -0
  106. package/out/postal-city-alias-schema.d.ts +51 -0
  107. package/out/postal-city-alias-schema.d.ts.map +1 -0
  108. package/out/postal-city-alias-schema.js +47 -0
  109. package/out/postal-city-alias-schema.js.map +1 -0
  110. package/out/postal-city-candidate-schema.d.ts +58 -0
  111. package/out/postal-city-candidate-schema.d.ts.map +1 -0
  112. package/out/postal-city-candidate-schema.js +56 -0
  113. package/out/postal-city-candidate-schema.js.map +1 -0
  114. package/out/postcode-point-lookup.d.ts +38 -0
  115. package/out/postcode-point-lookup.d.ts.map +1 -0
  116. package/out/postcode-point-lookup.js +46 -0
  117. package/out/postcode-point-lookup.js.map +1 -0
  118. package/out/reverse.d.ts +99 -0
  119. package/out/reverse.d.ts.map +1 -0
  120. package/out/reverse.js +290 -0
  121. package/out/reverse.js.map +1 -0
  122. package/out/schema.d.ts +163 -0
  123. package/out/schema.d.ts.map +1 -0
  124. package/out/schema.js +18 -0
  125. package/out/schema.js.map +1 -0
  126. package/out/sharding.d.ts +96 -0
  127. package/out/sharding.d.ts.map +1 -0
  128. package/out/sharding.js +129 -0
  129. package/out/sharding.js.map +1 -0
  130. package/out/sqlite-convention-source.d.ts +29 -0
  131. package/out/sqlite-convention-source.d.ts.map +1 -0
  132. package/out/sqlite-convention-source.js +53 -0
  133. package/out/sqlite-convention-source.js.map +1 -0
  134. package/out/sqlite-utils.d.ts +17 -0
  135. package/out/sqlite-utils.d.ts.map +1 -0
  136. package/out/sqlite-utils.js +24 -0
  137. package/out/sqlite-utils.js.map +1 -0
  138. package/out/street-morphology-fst-builder.d.ts +59 -0
  139. package/out/street-morphology-fst-builder.d.ts.map +1 -0
  140. package/out/street-morphology-fst-builder.js +174 -0
  141. package/out/street-morphology-fst-builder.js.map +1 -0
  142. package/out/street-normalize.d.ts +66 -0
  143. package/out/street-normalize.d.ts.map +1 -0
  144. package/out/street-normalize.js +176 -0
  145. package/out/street-normalize.js.map +1 -0
  146. package/out/street-segment-schema.d.ts +61 -0
  147. package/out/street-segment-schema.d.ts.map +1 -0
  148. package/out/street-segment-schema.js +64 -0
  149. package/out/street-segment-schema.js.map +1 -0
  150. package/out/types.d.ts +137 -0
  151. package/out/types.d.ts.map +1 -0
  152. package/out/types.js +13 -0
  153. package/out/types.js.map +1 -0
  154. package/out/unified-schema.d.ts +25 -0
  155. package/out/unified-schema.d.ts.map +1 -0
  156. package/out/unified-schema.js +142 -0
  157. package/out/unified-schema.js.map +1 -0
  158. package/package.json +54 -0
@@ -0,0 +1,129 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Multi-shard support for `WofSqlitePlaceLookup` — opens multiple WOF SQLite distributions on one
7
+ * connection via `ATTACH DATABASE`, and routes queries to the right shard based on placetype.
8
+ *
9
+ * ## The FTS5 syntax rule that drove this design
10
+ *
11
+ * The naive `SELECT … FROM pc.place_search WHERE pc.place_search MATCH ?` fails — SQLite parses the
12
+ * schema-qualified table on the left of MATCH as "column place_search of table pc". Discovered in
13
+ * the spike at PR review time; documented as `_SHARD_RULE.md` should it ever bite again.
14
+ *
15
+ * The working form: schema-qualified in FROM, bare table name in MATCH:
16
+ *
17
+ * ```sql
18
+ * SELECT … FROM pc.place_search WHERE place_search MATCH ?
19
+ * ```
20
+ *
21
+ * Identical table names across attached shards (which is what we have — every shard ships its own
22
+ * `place_search` + `place_bbox`) are fine because the bare-name MATCH resolves against FROM
23
+ * scope.
24
+ */
25
+ import { basename } from "node:path";
26
+ /**
27
+ * Derive a SQL-safe schema name from a WOF distribution filename. Used by `ATTACH DATABASE … AS
28
+ * <name>` so each shard gets a stable, predictable handle.
29
+ *
30
+ * Convention strips the `whosonfirst-data-` prefix and the `-latest.db` (or just `.db`) suffix,
31
+ * then replaces `-` with `_` for SQL identifier safety.
32
+ *
33
+ * Examples:
34
+ *
35
+ * - `whosonfirst-data-admin-us-latest.db` → `admin_us`
36
+ * - `whosonfirst-data-postalcode-us-latest.db` → `postalcode_us`
37
+ * - `whosonfirst-data-admin-latest.db` → `admin`
38
+ * - `my-custom.db` → `my_custom`
39
+ *
40
+ * Callers can override the derived name explicitly via `ShardConfig.schemaName` when the filename
41
+ * doesn't follow WOF convention.
42
+ */
43
+ export function deriveSchemaName(path) {
44
+ const stem = basename(path)
45
+ .replace(/^whosonfirst-data-/u, "")
46
+ .replace(/-latest\.db$/u, "")
47
+ .replace(/\.db$/u, "")
48
+ .replace(/[^a-zA-Z0-9_]/g, "_");
49
+ if (!stem) {
50
+ throw new Error(`deriveSchemaName: could not derive a SQL schema name from path ${JSON.stringify(path)}`);
51
+ }
52
+ return stem;
53
+ }
54
+ /** SQLite identifier regex — `[A-Za-z_][A-Za-z0-9_]*`. */
55
+ const SQLITE_IDENT_RE = /^[A-Za-z_][A-Za-z0-9_]*$/u;
56
+ /**
57
+ * Normalize the user-provided `databasePath` opt (which may be a single string, an array of
58
+ * strings, or an array of `ShardConfig` objects) into a uniform `ResolvedShard[]`.
59
+ *
60
+ * The first shard becomes `main` regardless of its derived schema name — that's the SQLite
61
+ * convention. Subsequent shards keep their derived (or override) schema name.
62
+ */
63
+ export function resolveShards(input) {
64
+ const list = typeof input === "string" ? [input] : input;
65
+ if (list.length === 0)
66
+ throw new Error("resolveShards: at least one shard is required");
67
+ const seen = new Set();
68
+ const out = [];
69
+ for (let i = 0; i < list.length; i++) {
70
+ const entry = list[i];
71
+ const cfg = typeof entry === "string" ? { path: entry } : entry;
72
+ const derived = cfg.schemaName ?? deriveSchemaName(cfg.path);
73
+ if (!SQLITE_IDENT_RE.test(derived)) {
74
+ throw new Error(`resolveShards: schema name ${JSON.stringify(derived)} is not a valid SQLite identifier ` +
75
+ `(derived from path ${JSON.stringify(cfg.path)}). Pass an explicit ` +
76
+ `{ path, schemaName } to override.`);
77
+ }
78
+ // The first shard is always main per SQLite semantics — its derived name is informational
79
+ // only. Subsequent shards must have unique non-main names.
80
+ const schemaName = i === 0 ? "main" : derived;
81
+ if (i > 0 && (schemaName === "main" || seen.has(schemaName))) {
82
+ throw new Error(`resolveShards: schema name ${JSON.stringify(schemaName)} collides ` +
83
+ `(either with "main" or another shard). Pass an explicit { path, schemaName }.`);
84
+ }
85
+ seen.add(schemaName);
86
+ out.push({
87
+ path: cfg.path,
88
+ schemaName,
89
+ placetypes: cfg.placetypes ?? [],
90
+ });
91
+ }
92
+ return out;
93
+ }
94
+ /**
95
+ * Pick the shard to route a query to given the requested placetype(s).
96
+ *
97
+ * Routing rules, in order:
98
+ *
99
+ * 1. If any shard has explicit `placetypes` that includes the requested placetype, use it.
100
+ * 2. Otherwise, if a non-main shard's `schemaName` matches the placetype (e.g. `postalcode_us` matches
101
+ * `postalcode`), use it.
102
+ * 3. Otherwise, fall back to `main`.
103
+ *
104
+ * This deliberately doesn't UNION across shards — BM25 scores aren't comparable across separately-
105
+ * indexed corpora, and the typical mailwoman query has a single placetype anyway. If a caller needs
106
+ * cross-shard results they can issue two `findPlace` calls.
107
+ */
108
+ export function pickShardForPlacetype(shards, placetype) {
109
+ if (!placetype)
110
+ return shards[0];
111
+ for (const s of shards) {
112
+ if (s.placetypes.includes(placetype))
113
+ return s;
114
+ }
115
+ for (const s of shards) {
116
+ if (s.schemaName === "main")
117
+ continue;
118
+ // Substring match: `postalcode_us` matches `postalcode`. Conservative — requires the
119
+ // placetype to appear at a word boundary in the schema name to avoid false hits like
120
+ // `region` matching `arboregion`.
121
+ if (s.schemaName === placetype ||
122
+ s.schemaName.startsWith(`${placetype}_`) ||
123
+ s.schemaName.endsWith(`_${placetype}`)) {
124
+ return s;
125
+ }
126
+ }
127
+ return shards[0];
128
+ }
129
+ //# sourceMappingURL=sharding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sharding.js","sourceRoot":"","sources":["../sharding.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAA;AAEpC;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC5C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;SACzB,OAAO,CAAC,qBAAqB,EAAE,EAAE,CAAC;SAClC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;SAC5B,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;SACrB,OAAO,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAA;IAChC,IAAI,CAAC,IAAI,EAAE,CAAC;QACX,MAAM,IAAI,KAAK,CAAC,kEAAkE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1G,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAkCD,0DAA0D;AAC1D,MAAM,eAAe,GAAG,2BAA2B,CAAA;AAEnD;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,KAAmD;IAChF,MAAM,IAAI,GAAG,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IACxD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,+CAA+C,CAAC,CAAA;IAEvF,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAA;IAC9B,MAAM,GAAG,GAAoB,EAAE,CAAA;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,IAAI,CAAC,CAAC,CAAE,CAAA;QACtB,MAAM,GAAG,GAAgB,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAA;QAC5E,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,IAAI,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC5D,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CACd,8BAA8B,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,oCAAoC;gBACxF,sBAAsB,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,sBAAsB;gBACpE,mCAAmC,CACpC,CAAA;QACF,CAAC;QACD,0FAA0F;QAC1F,2DAA2D;QAC3D,MAAM,UAAU,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAA;QAC7C,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,KAAK,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,EAAE,CAAC;YAC9D,MAAM,IAAI,KAAK,CACd,8BAA8B,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,YAAY;gBACnE,+EAA+E,CAChF,CAAA;QACF,CAAC;QACD,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;QACpB,GAAG,CAAC,IAAI,CAAC;YACR,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,UAAU;YACV,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,EAAE;SAChC,CAAC,CAAA;IACH,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAuB,EAAE,SAA6B;IAC3F,IAAI,CAAC,SAAS;QAAE,OAAO,MAAM,CAAC,CAAC,CAAE,CAAA;IACjC,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,SAAS,CAAC;YAAE,OAAO,CAAC,CAAA;IAC/C,CAAC;IACD,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,UAAU,KAAK,MAAM;YAAE,SAAQ;QACrC,qFAAqF;QACrF,qFAAqF;QACrF,kCAAkC;QAClC,IACC,CAAC,CAAC,UAAU,KAAK,SAAS;YAC1B,CAAC,CAAC,UAAU,CAAC,UAAU,CAAC,GAAG,SAAS,GAAG,CAAC;YACxC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,SAAS,EAAE,CAAC,EACrC,CAAC;YACF,OAAO,CAAC,CAAA;QACT,CAAC;IACF,CAAC;IACD,OAAO,MAAM,CAAC,CAAC,CAAE,CAAA;AAClB,CAAC"}
@@ -0,0 +1,29 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `SqliteConventionSource` — a `ConventionSource` backed by the build-from-source convention asset
7
+ * (#290, Direction E). Conventions live in a read-only, provenance-stamped `address_convention`
8
+ * table keyed by WOF polygon id; this source queries them ON DEMAND by id (one indexed lookup,
9
+ * memoized) rather than paging the whole table into memory as a code constant — the deliberate
10
+ * counter to the Pelias "giant dictionary in RAM, no provenance" pattern (see the operator design
11
+ * value in memory `feedback-no-load-bearing-trivia`).
12
+ *
13
+ * The asset is the queryable, distributable artifact; the strategy IMPLEMENTATIONS stay in code. An
14
+ * unknown strategy NAME is surfaced loudly at dispatch (see `lookup.ts`), not silently
15
+ * swallowed.
16
+ */
17
+ import type { DatabaseSync } from "node:sqlite";
18
+ import { type Convention, type ConventionSource } from "./convention.js";
19
+ export declare class SqliteConventionSource implements ConventionSource {
20
+ #private;
21
+ /**
22
+ * @param db An open handle to a DB that has the convention asset attached (or is it).
23
+ * @param schema The schema name the `address_convention` table lives under (`main` or an ATTACHed
24
+ * shard name — `WofSqlitePlaceLookup` auto-detects which shard carries the table).
25
+ */
26
+ constructor(db: DatabaseSync, schema: string);
27
+ get(wofId: number): Convention | undefined;
28
+ }
29
+ //# sourceMappingURL=sqlite-convention-source.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite-convention-source.d.ts","sourceRoot":"","sources":["../sqlite-convention-source.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAA4B,KAAK,UAAU,EAAE,KAAK,gBAAgB,EAAE,MAAM,iBAAiB,CAAA;AAElG,qBAAa,sBAAuB,YAAW,gBAAgB;;IAM9D;;;;OAIG;gBACS,EAAE,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM;IAK5C,GAAG,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;CAiB1C"}
@@ -0,0 +1,53 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `SqliteConventionSource` — a `ConventionSource` backed by the build-from-source convention asset
7
+ * (#290, Direction E). Conventions live in a read-only, provenance-stamped `address_convention`
8
+ * table keyed by WOF polygon id; this source queries them ON DEMAND by id (one indexed lookup,
9
+ * memoized) rather than paging the whole table into memory as a code constant — the deliberate
10
+ * counter to the Pelias "giant dictionary in RAM, no provenance" pattern (see the operator design
11
+ * value in memory `feedback-no-load-bearing-trivia`).
12
+ *
13
+ * The asset is the queryable, distributable artifact; the strategy IMPLEMENTATIONS stay in code. An
14
+ * unknown strategy NAME is surfaced loudly at dispatch (see `lookup.ts`), not silently
15
+ * swallowed.
16
+ */
17
+ import { ADDRESS_CONVENTION_TABLE } from "./convention.js";
18
+ export class SqliteConventionSource {
19
+ #db;
20
+ #schema;
21
+ /** Memoize per-id lookups (including misses, as `null`) so a hot ancestor chain is queried once. */
22
+ #cache = new Map();
23
+ /**
24
+ * @param db An open handle to a DB that has the convention asset attached (or is it).
25
+ * @param schema The schema name the `address_convention` table lives under (`main` or an ATTACHed
26
+ * shard name — `WofSqlitePlaceLookup` auto-detects which shard carries the table).
27
+ */
28
+ constructor(db, schema) {
29
+ this.#db = db;
30
+ this.#schema = schema;
31
+ }
32
+ get(wofId) {
33
+ const cached = this.#cache.get(wofId);
34
+ if (cached !== undefined)
35
+ return cached ?? undefined;
36
+ let value = null;
37
+ try {
38
+ const row = this.#db
39
+ .prepare(`SELECT convention FROM ${this.#schema}.${ADDRESS_CONVENTION_TABLE} WHERE wof_id = ?`)
40
+ .get(wofId);
41
+ if (row?.convention)
42
+ value = JSON.parse(row.convention);
43
+ }
44
+ catch {
45
+ // Malformed JSON or a missing table → treat as no override (the chain falls back to
46
+ // WORLD_DEFAULT). The build script validates structure, so this is purely defensive.
47
+ value = null;
48
+ }
49
+ this.#cache.set(wofId, value);
50
+ return value ?? undefined;
51
+ }
52
+ }
53
+ //# sourceMappingURL=sqlite-convention-source.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite-convention-source.js","sourceRoot":"","sources":["../sqlite-convention-source.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH,OAAO,EAAE,wBAAwB,EAA0C,MAAM,iBAAiB,CAAA;AAElG,MAAM,OAAO,sBAAsB;IACzB,GAAG,CAAc;IACjB,OAAO,CAAQ;IACxB,oGAAoG;IAC3F,MAAM,GAAG,IAAI,GAAG,EAA6B,CAAA;IAEtD;;;;OAIG;IACH,YAAY,EAAgB,EAAE,MAAc;QAC3C,IAAI,CAAC,GAAG,GAAG,EAAE,CAAA;QACb,IAAI,CAAC,OAAO,GAAG,MAAM,CAAA;IACtB,CAAC;IAED,GAAG,CAAC,KAAa;QAChB,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACrC,IAAI,MAAM,KAAK,SAAS;YAAE,OAAO,MAAM,IAAI,SAAS,CAAA;QACpD,IAAI,KAAK,GAAsB,IAAI,CAAA;QACnC,IAAI,CAAC;YACJ,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG;iBAClB,OAAO,CAAC,0BAA0B,IAAI,CAAC,OAAO,IAAI,wBAAwB,mBAAmB,CAAC;iBAC9F,GAAG,CAAC,KAAK,CAAuC,CAAA;YAClD,IAAI,GAAG,EAAE,UAAU;gBAAE,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAe,CAAA;QACtE,CAAC;QAAC,MAAM,CAAC;YACR,oFAAoF;YACpF,qFAAqF;YACrF,KAAK,GAAG,IAAI,CAAA;QACb,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAA;QAC7B,OAAO,KAAK,IAAI,SAAS,CAAA;IAC1B,CAAC;CACD"}
@@ -0,0 +1,17 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Small shared helpers for the SQLite-backed lookups.
7
+ */
8
+ import type { DatabaseSync } from "node:sqlite";
9
+ /**
10
+ * True when `name` is a table in the open database. The street-level lookups use this to degrade
11
+ * gracefully on an empty/tableless shard — an interrupted `build-*-shard.ts`, or a stray 0-byte
12
+ * file (e.g. `sqlite3 <missing>.db "…"` CREATES one) — rather than throwing `no such table` at
13
+ * construction and taking down a whole state's geocode (#568). A missing table makes the lookup a
14
+ * no-op miss.
15
+ */
16
+ export declare function hasTable(db: DatabaseSync, name: string): boolean;
17
+ //# sourceMappingURL=sqlite-utils.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite-utils.d.ts","sourceRoot":"","sources":["../sqlite-utils.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAE/C;;;;;;GAMG;AACH,wBAAgB,QAAQ,CAAC,EAAE,EAAE,YAAY,EAAE,IAAI,EAAE,MAAM,GAAG,OAAO,CAOhE"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Small shared helpers for the SQLite-backed lookups.
7
+ */
8
+ /**
9
+ * True when `name` is a table in the open database. The street-level lookups use this to degrade
10
+ * gracefully on an empty/tableless shard — an interrupted `build-*-shard.ts`, or a stray 0-byte
11
+ * file (e.g. `sqlite3 <missing>.db "…"` CREATES one) — rather than throwing `no such table` at
12
+ * construction and taking down a whole state's geocode (#568). A missing table makes the lookup a
13
+ * no-op miss.
14
+ */
15
+ export function hasTable(db, name) {
16
+ try {
17
+ const row = db.prepare("SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = ? LIMIT 1").get(name);
18
+ return row !== undefined;
19
+ }
20
+ catch {
21
+ return false;
22
+ }
23
+ }
24
+ //# sourceMappingURL=sqlite-utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"sqlite-utils.js","sourceRoot":"","sources":["../sqlite-utils.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAIH;;;;;;GAMG;AACH,MAAM,UAAU,QAAQ,CAAC,EAAgB,EAAE,IAAY;IACtD,IAAI,CAAC;QACJ,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,uEAAuE,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QACzG,OAAO,GAAG,KAAK,SAAS,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,KAAK,CAAA;IACb,CAAC;AACF,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Build a street-morphology FST from libpostal's street_types dictionaries. The morphology FST maps
7
+ * street-typing affixes (Street/Avenue/rue/Calle/Straße/...) to a single synthetic placetype
8
+ * `"street_affix"` — distinct from the admin FST in source data, intent, and binary artifact.
9
+ *
10
+ * The morphology FST closes the inference-time vacuum identified by the v0.6.1 postmortem: street
11
+ * tokens have no admin-FST anchor, so synth-street training pushed the model toward over-emitting
12
+ * `dependent_locality` on subcomponents. With the morphology FST, the neural decoder gets
13
+ * positive evidence for street-typing affixes and the adjacent name tokens, plus negative
14
+ * evidence away from `dependent_locality` on the same neighbours.
15
+ *
16
+ * Design rationale + the four-layer street-supplement architecture lives in
17
+ * `docs/articles/concepts/street-supplement-architecture.md`.
18
+ *
19
+ * Source: `core/data/libpostal/dictionaries/{locale}/street_types.txt`. Each line is pipe-delimited
20
+ * surface forms with the canonical form first: avenue|av|ave|aven|avenu|avn|avnu|avnue
21
+ *
22
+ * Output: an `FstMatcher` ready to serialize via `serializeFst` to e.g.
23
+ * `fst-street-morphology.bin`.
24
+ */
25
+ import { FstMatcher } from "./fst-matcher.js";
26
+ import type { FstProvenance } from "./fst-types.js";
27
+ export interface BuildStreetMorphologyFstOpts {
28
+ /** Path to the `core/data/libpostal/dictionaries` directory containing per-locale subfolders. */
29
+ dictionariesDir: string;
30
+ /**
31
+ * Optional locale filter — only ingest these locale subfolders. Defaults to all that have a
32
+ * `street_types.txt`.
33
+ */
34
+ locales?: string[];
35
+ /**
36
+ * Minimum length (in characters, post-normalization) of variant surface forms to insert into the
37
+ * trie. Defaults to 3.
38
+ *
39
+ * Rationale: libpostal's street_types dictionaries contain 1-2 character abbreviations (`a`, `b`,
40
+ * `av`, `bd`, `br`, ...) that collide with non-affix tokens at parse time — notably US state
41
+ * abbreviations (`OR`, `CA`, `ND`, `NY`), single-letter unit designators, and arbitrary short
42
+ * tokens. Empirically these collisions push the morphology prior to mis-tag state abbreviations
43
+ * as `street_suffix`. A minimum length of 3 retains useful forms (`ave`, `blvd`, `rue`, `str`)
44
+ * while filtering out the noise.
45
+ */
46
+ minVariantLength?: number;
47
+ /** Optional progress callback. */
48
+ onProgress?: (phase: string, detail?: string) => void;
49
+ }
50
+ export interface BuildStreetMorphologyFstResult {
51
+ matcher: FstMatcher;
52
+ provenance: FstProvenance;
53
+ canonicalCount: number;
54
+ variantCount: number;
55
+ insertCount: number;
56
+ locales: string[];
57
+ }
58
+ export declare function buildStreetMorphologyFst(opts: BuildStreetMorphologyFstOpts): BuildStreetMorphologyFstResult;
59
+ //# sourceMappingURL=street-morphology-fst-builder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-morphology-fst-builder.d.ts","sourceRoot":"","sources":["../street-morphology-fst-builder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAKH,OAAO,EAAE,UAAU,EAAmB,MAAM,kBAAkB,CAAA;AAC9D,OAAO,KAAK,EAAE,aAAa,EAAc,MAAM,gBAAgB,CAAA;AAW/D,MAAM,WAAW,4BAA4B;IAC5C,iGAAiG;IACjG,eAAe,EAAE,MAAM,CAAA;IACvB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,EAAE,CAAA;IAClB;;;;;;;;;;OAUG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAA;IACzB,kCAAkC;IAClC,UAAU,CAAC,EAAE,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,KAAK,IAAI,CAAA;CACrD;AAED,MAAM,WAAW,8BAA8B;IAC9C,OAAO,EAAE,UAAU,CAAA;IACnB,UAAU,EAAE,aAAa,CAAA;IACzB,cAAc,EAAE,MAAM,CAAA;IACtB,YAAY,EAAE,MAAM,CAAA;IACpB,WAAW,EAAE,MAAM,CAAA;IACnB,OAAO,EAAE,MAAM,EAAE,CAAA;CACjB;AAmBD,wBAAgB,wBAAwB,CAAC,IAAI,EAAE,4BAA4B,GAAG,8BAA8B,CAwH3G"}
@@ -0,0 +1,174 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Build a street-morphology FST from libpostal's street_types dictionaries. The morphology FST maps
7
+ * street-typing affixes (Street/Avenue/rue/Calle/Straße/...) to a single synthetic placetype
8
+ * `"street_affix"` — distinct from the admin FST in source data, intent, and binary artifact.
9
+ *
10
+ * The morphology FST closes the inference-time vacuum identified by the v0.6.1 postmortem: street
11
+ * tokens have no admin-FST anchor, so synth-street training pushed the model toward over-emitting
12
+ * `dependent_locality` on subcomponents. With the morphology FST, the neural decoder gets
13
+ * positive evidence for street-typing affixes and the adjacent name tokens, plus negative
14
+ * evidence away from `dependent_locality` on the same neighbours.
15
+ *
16
+ * Design rationale + the four-layer street-supplement architecture lives in
17
+ * `docs/articles/concepts/street-supplement-architecture.md`.
18
+ *
19
+ * Source: `core/data/libpostal/dictionaries/{locale}/street_types.txt`. Each line is pipe-delimited
20
+ * surface forms with the canonical form first: avenue|av|ave|aven|avenu|avn|avnu|avnue
21
+ *
22
+ * Output: an `FstMatcher` ready to serialize via `serializeFst` to e.g.
23
+ * `fst-street-morphology.bin`.
24
+ */
25
+ import { readdirSync, readFileSync, statSync } from "node:fs";
26
+ import { join } from "node:path";
27
+ import { FstMatcher, normalizeTokens } from "./fst-matcher.js";
28
+ /**
29
+ * Reserved synthetic wofID base for street-morphology entries. 32-bit unsigned, well above any
30
+ * realistic WOF allocation. Reusing the same base across rebuilds keeps IDs stable for any consumer
31
+ * that caches them. See [[project-schema-storage-decision]] for the reserved range policy.
32
+ */
33
+ const STREET_AFFIX_WOFID_BASE = 1_900_000_000;
34
+ const STREET_TYPES_FILENAME = "street_types.txt";
35
+ /**
36
+ * Parse one `street_types.txt` line into `{ canonical, variants }`. Canonical is the first token
37
+ * (pre-`|`); variants are all whitespace-stripped non-empty tokens including the canonical.
38
+ *
39
+ * Lines with no `|` are treated as a single-form entry where canonical == variant.
40
+ */
41
+ function parseLine(line) {
42
+ const trimmed = line.trim();
43
+ if (trimmed.length === 0 || trimmed.startsWith("#"))
44
+ return null;
45
+ const parts = trimmed
46
+ .split("|")
47
+ .map((s) => s.trim())
48
+ .filter((s) => s.length > 0);
49
+ if (parts.length === 0)
50
+ return null;
51
+ return { canonical: parts[0], variants: parts };
52
+ }
53
+ export function buildStreetMorphologyFst(opts) {
54
+ const progress = opts.onProgress ?? (() => { });
55
+ const minVariantLength = opts.minVariantLength ?? 3;
56
+ // Discover locales — either provided explicitly, or all directories containing street_types.txt.
57
+ let locales;
58
+ if (opts.locales && opts.locales.length > 0) {
59
+ locales = opts.locales;
60
+ }
61
+ else {
62
+ locales = readdirSync(opts.dictionariesDir).filter((entry) => {
63
+ const localePath = join(opts.dictionariesDir, entry);
64
+ if (!statSync(localePath).isDirectory())
65
+ return false;
66
+ try {
67
+ statSync(join(localePath, STREET_TYPES_FILENAME));
68
+ return true;
69
+ }
70
+ catch {
71
+ return false;
72
+ }
73
+ });
74
+ }
75
+ progress("discover", `Found ${locales.length} locales with ${STREET_TYPES_FILENAME}`);
76
+ // Collect canonical → set-of-variants across all locales. Same canonical form may appear in
77
+ // multiple locales (e.g. "avenue" in en/fr); we union the variant sets.
78
+ const canonicalToVariants = new Map();
79
+ for (const locale of locales) {
80
+ const filePath = join(opts.dictionariesDir, locale, STREET_TYPES_FILENAME);
81
+ const content = readFileSync(filePath, "utf8");
82
+ for (const line of content.split("\n")) {
83
+ const parsed = parseLine(line);
84
+ if (!parsed)
85
+ continue;
86
+ const existing = canonicalToVariants.get(parsed.canonical) ?? new Set();
87
+ for (const variant of parsed.variants)
88
+ existing.add(variant);
89
+ canonicalToVariants.set(parsed.canonical, existing);
90
+ }
91
+ }
92
+ progress("collect", `Collected ${canonicalToVariants.size} canonical affixes`);
93
+ // Assign stable synthetic wofIDs. Sort canonicals for determinism.
94
+ const sortedCanonicals = [...canonicalToVariants.keys()].sort();
95
+ const canonicalToWofID = new Map();
96
+ for (let i = 0; i < sortedCanonicals.length; i++) {
97
+ canonicalToWofID.set(sortedCanonicals[i], STREET_AFFIX_WOFID_BASE + i);
98
+ }
99
+ // Build the trie. Each variant is inserted as a token sequence pointing to its canonical's
100
+ // PlaceEntry — so all variants of "avenue" (av/ave/aven/...) lead to the same terminal entry.
101
+ const nodes = [{ edges: new Map(), places: [] }];
102
+ function insertName(tokens, entry) {
103
+ if (tokens.length === 0)
104
+ return;
105
+ let stateId = 0;
106
+ for (const t of tokens) {
107
+ const node = nodes[stateId];
108
+ let next = node.edges.get(t);
109
+ if (next === undefined) {
110
+ next = nodes.length;
111
+ nodes.push({ edges: new Map(), places: [] });
112
+ node.edges.set(t, next);
113
+ }
114
+ stateId = next;
115
+ }
116
+ const existing = nodes[stateId].places;
117
+ if (!existing.some((p) => p.wofID === entry.wofID && p.placetype === entry.placetype)) {
118
+ existing.push(entry);
119
+ }
120
+ }
121
+ let insertCount = 0;
122
+ let variantCount = 0;
123
+ for (const canonical of sortedCanonicals) {
124
+ const variants = canonicalToVariants.get(canonical);
125
+ const wofID = canonicalToWofID.get(canonical);
126
+ const entry = {
127
+ wofID,
128
+ placetype: "street_affix",
129
+ name: canonical,
130
+ parentChain: [],
131
+ // Fixed importance: street affixes are structurally unambiguous (Avenue is almost never
132
+ // anything but street-typing). The morphology prior caps bias separately; this value
133
+ // just feeds the cap formula `importance * cap`.
134
+ importance: 1.0,
135
+ lat: 0,
136
+ lon: 0,
137
+ };
138
+ for (const variant of variants) {
139
+ const tokens = normalizeTokens(variant);
140
+ if (tokens.length === 0)
141
+ continue;
142
+ // Filter out collision-prone short surface forms — see `minVariantLength` docstring.
143
+ // We measure against the joined token form (no spaces) since FST keys are token sequences.
144
+ const joined = tokens.join("");
145
+ if (joined.length < minVariantLength)
146
+ continue;
147
+ insertName(tokens, entry);
148
+ insertCount++;
149
+ variantCount++;
150
+ }
151
+ }
152
+ progress("trie", `Built trie: ${nodes.length} states, ${insertCount} variant insertions`);
153
+ const edgeCount = nodes.reduce((sum, n) => sum + n.edges.size, 0);
154
+ const matcher = FstMatcher.fromNodes(nodes);
155
+ const provenance = {
156
+ builtAt: new Date().toISOString(),
157
+ countries: locales, // Reuse `countries` slot for locale provenance — semantics differ from admin FST.
158
+ stateCount: nodes.length,
159
+ placeCount: sortedCanonicals.length,
160
+ edgeCount,
161
+ nameInsertions: insertCount,
162
+ importanceMatches: 0, // No importance scoring for morphology — fixed at 1.0.
163
+ sourceDb: opts.dictionariesDir,
164
+ };
165
+ return {
166
+ matcher,
167
+ provenance,
168
+ canonicalCount: sortedCanonicals.length,
169
+ variantCount,
170
+ insertCount,
171
+ locales,
172
+ };
173
+ }
174
+ //# sourceMappingURL=street-morphology-fst-builder.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-morphology-fst-builder.js","sourceRoot":"","sources":["../street-morphology-fst-builder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAA;AAC7D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEhC,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAG9D;;;;GAIG;AACH,MAAM,uBAAuB,GAAG,aAAa,CAAA;AAE7C,MAAM,qBAAqB,GAAG,kBAAkB,CAAA;AAmChD;;;;;GAKG;AACH,SAAS,SAAS,CAAC,IAAY;IAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAA;IAC3B,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAChE,MAAM,KAAK,GAAG,OAAO;SACnB,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SACpB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;IAC7B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IACnC,OAAO,EAAE,SAAS,EAAE,KAAK,CAAC,CAAC,CAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAA;AACjD,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,IAAkC;IAC1E,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAC9C,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAA;IAEnD,iGAAiG;IACjG,IAAI,OAAiB,CAAA;IACrB,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7C,OAAO,GAAG,IAAI,CAAC,OAAO,CAAA;IACvB,CAAC;SAAM,CAAC;QACP,OAAO,GAAG,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;YAC5D,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAA;YACpD,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,WAAW,EAAE;gBAAE,OAAO,KAAK,CAAA;YACrD,IAAI,CAAC;gBACJ,QAAQ,CAAC,IAAI,CAAC,UAAU,EAAE,qBAAqB,CAAC,CAAC,CAAA;gBACjD,OAAO,IAAI,CAAA;YACZ,CAAC;YAAC,MAAM,CAAC;gBACR,OAAO,KAAK,CAAA;YACb,CAAC;QACF,CAAC,CAAC,CAAA;IACH,CAAC;IACD,QAAQ,CAAC,UAAU,EAAE,SAAS,OAAO,CAAC,MAAM,iBAAiB,qBAAqB,EAAE,CAAC,CAAA;IAErF,4FAA4F;IAC5F,wEAAwE;IACxE,MAAM,mBAAmB,GAAG,IAAI,GAAG,EAAuB,CAAA;IAC1D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,MAAM,EAAE,qBAAqB,CAAC,CAAA;QAC1E,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAA;QAC9C,KAAK,MAAM,IAAI,IAAI,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;YACxC,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAA;YAC9B,IAAI,CAAC,MAAM;gBAAE,SAAQ;YACrB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,IAAI,GAAG,EAAU,CAAA;YAC/E,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,QAAQ;gBAAE,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;YAC5D,mBAAmB,CAAC,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAA;QACpD,CAAC;IACF,CAAC;IACD,QAAQ,CAAC,SAAS,EAAE,aAAa,mBAAmB,CAAC,IAAI,oBAAoB,CAAC,CAAA;IAE9E,mEAAmE;IACnE,MAAM,gBAAgB,GAAG,CAAC,GAAG,mBAAmB,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;IAC/D,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAkB,CAAA;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,gBAAgB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClD,gBAAgB,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC,CAAE,EAAE,uBAAuB,GAAG,CAAC,CAAC,CAAA;IACxE,CAAC;IAED,2FAA2F;IAC3F,8FAA8F;IAC9F,MAAM,KAAK,GAAc,CAAC,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAA;IAE3D,SAAS,UAAU,CAAC,MAAgB,EAAE,KAAiB;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,IAAI,OAAO,GAAG,CAAC,CAAA;QACf,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;YACxB,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAE,CAAA;YAC5B,IAAI,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACxB,IAAI,GAAG,KAAK,CAAC,MAAM,CAAA;gBACnB,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAA;gBAC5C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;YACxB,CAAC;YACD,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;QACD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAE,CAAC,MAAM,CAAA;QACvC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC,SAAS,KAAK,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACvF,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACrB,CAAC;IACF,CAAC;IAED,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,IAAI,YAAY,GAAG,CAAC,CAAA;IACpB,KAAK,MAAM,SAAS,IAAI,gBAAgB,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAE,CAAA;QACpD,MAAM,KAAK,GAAG,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAE,CAAA;QAC9C,MAAM,KAAK,GAAe;YACzB,KAAK;YACL,SAAS,EAAE,cAAc;YACzB,IAAI,EAAE,SAAS;YACf,WAAW,EAAE,EAAE;YACf,wFAAwF;YACxF,qFAAqF;YACrF,iDAAiD;YACjD,UAAU,EAAE,GAAG;YACf,GAAG,EAAE,CAAC;YACN,GAAG,EAAE,CAAC;SACN,CAAA;QACD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,eAAe,CAAC,OAAO,CAAC,CAAA;YACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAQ;YACjC,qFAAqF;YACrF,2FAA2F;YAC3F,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YAC9B,IAAI,MAAM,CAAC,MAAM,GAAG,gBAAgB;gBAAE,SAAQ;YAC9C,UAAU,CAAC,MAAM,EAAE,KAAK,CAAC,CAAA;YACzB,WAAW,EAAE,CAAA;YACb,YAAY,EAAE,CAAA;QACf,CAAC;IACF,CAAC;IACD,QAAQ,CAAC,MAAM,EAAE,eAAe,KAAK,CAAC,MAAM,YAAY,WAAW,qBAAqB,CAAC,CAAA;IAEzF,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IACjE,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IAC3C,MAAM,UAAU,GAAkB;QACjC,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,SAAS,EAAE,OAAO,EAAE,kFAAkF;QACtG,UAAU,EAAE,KAAK,CAAC,MAAM;QACxB,UAAU,EAAE,gBAAgB,CAAC,MAAM;QACnC,SAAS;QACT,cAAc,EAAE,WAAW;QAC3B,iBAAiB,EAAE,CAAC,EAAE,uDAAuD;QAC7E,QAAQ,EAAE,IAAI,CAAC,eAAe;KAC9B,CAAA;IAED,OAAO;QACN,OAAO;QACP,UAAU;QACV,cAAc,EAAE,gBAAgB,CAAC,MAAM;QACvC,YAAY;QACZ,WAAW;QACX,OAAO;KACP,CAAA;AACF,CAAC"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * THE street normalizer for the address-point tier (#476). One function, used by BOTH the shard
7
+ * builder (`scripts/build-address-point-shard.ts`) and the lookup tier (`address-point.ts`) —
8
+ * never two implementations (the PLACETYPE_ORDER lesson: parallel copies silently corrupt).
9
+ *
10
+ * Normalization contract (deliberately aggressive — both sides apply the same function, so
11
+ * collisions only need to be _consistent_, not linguistically perfect):
12
+ *
13
+ * 1. Lowercase, NFKD-fold diacritics, collapse whitespace, strip punctuation (periods, commas,
14
+ * apostrophes).
15
+ * 2. Expand USPS directional abbreviations at the FIRST and LAST token position (`n` → `north`, `se` →
16
+ * `southeast`) — Overture sources abbreviate inconsistently.
17
+ * 3. Canonicalize a trailing USPS street-type token via the codex suffix table to its canonical full
18
+ * form (`st`/`str`/`street` → `street`).
19
+ *
20
+ * Numbered streets are left as digits (`5th` stays `5th`); a SPELLED ordinal before a street suffix
21
+ * folds to its digit form (`tenth street` → `10th street`, #723) so the grid-city ordinal
22
+ * cross-streets the source data spells with digits become reachable.
23
+ */
24
+ /**
25
+ * Normalize a street name for address-point keying. Same function at build time and lookup time —
26
+ * see module docstring for the contract.
27
+ */
28
+ export declare function normalizeStreetForKey(street: string): string;
29
+ /** Normalize a locality name for address-point keying (fold only — no street semantics). */
30
+ export declare function normalizeLocalityForKey(locality: string): string;
31
+ /**
32
+ * Strip a locality QUALIFIER for a query-side fallback — when an OA locality's exact normalized
33
+ * name misses the gazetteer's canonical name, retry with the qualifier removed. OA address data
34
+ * carries disambiguating qualifiers the gazetteer's canonical name omits: Austrian `Kraubath/Mur`
35
+ * and `Hart b.Graz` → `Hart`; Swiss `Lenk im Simmental` → `Lenk`, `Roche VD` → `Roche`; Danish
36
+ * `Odense S`, `Hurup Thy`. A FALLBACK ONLY — the exact name is tried first, and the region-bbox
37
+ * disambiguation resolves any base-name ambiguity downstream. The candidate table is unchanged
38
+ * (this is purely query-side); feed the result back through {@link normalizeLocalityForKey}. Returns
39
+ * "" when nothing was stripped (no point re-probing the identical key).
40
+ *
41
+ * Measured (`scripts/eval/candidate-recall.ts --strip-fallback`, EU OA holdouts): recovers AT
42
+ * 74.1→88.2% (+14.1pp), DK 91.5→96.2%, CH 90.4→92.6%; +1.3pp overall (diluted by the already-100%
43
+ * locales). Conservative by design — only the qualifier forms above; FI/PT/SI misses are
44
+ * untouched.
45
+ */
46
+ export declare function stripLocalityQualifier(locality: string): string;
47
+ /**
48
+ * Fold numbered-route designators to a canonical key, applied AFTER {@link normalizeStreetForKey}.
49
+ * Sources disagree systematically on how they spell a route: TIGER says `State Rte 100` / `US Hwy
50
+ * 5` where E911/Overture say `VT ROUTE 100` / `US ROUTE 5` — the dominant street-name miss class in
51
+ * the #483 interpolation eval (rural addresses live on routes). `us <designator> N…` folds to `us
52
+ * route N…`; `state <designator> N…` and `<2-letter-prefix> <designator> N…` (the state
53
+ * abbreviation form) fold to `state route N…`. Only digit-leading route numbers fold — `State
54
+ * Street` and friends never match.
55
+ *
56
+ * Used by BOTH the segment-shard builder (`scripts/build-interpolation-shard.ts`) and the
57
+ * interpolation lookup — same one-function discipline as {@link normalizeStreetForKey}. The
58
+ * address-point tier (#476) does NOT apply it yet: adopting it there requires a shard rebuild
59
+ * (noted on #483).
60
+ *
61
+ * A same-numbered US and state route stay DISTINCT keys (`us route 5` vs `state route 5`); only the
62
+ * BARE `route N` form is ambiguous (designator unknown) and it stays unfolded — a bare-route query
63
+ * therefore misses rather than guessing a designator.
64
+ */
65
+ export declare function canonicalizeRouteKey(streetNorm: string): string;
66
+ //# sourceMappingURL=street-normalize.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"street-normalize.d.ts","sourceRoot":"","sources":["../street-normalize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAuDH;;;GAGG;AACH,wBAAgB,qBAAqB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CA6C5D;AAED,4FAA4F;AAC5F,wBAAgB,uBAAuB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAEhE;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAQ/D;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAgB,oBAAoB,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAI/D"}