@mailwoman/resolver-wof-sqlite 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/README.md +250 -0
  2. package/out/address-point-interpolation.d.ts +48 -0
  3. package/out/address-point-interpolation.d.ts.map +1 -0
  4. package/out/address-point-interpolation.js +164 -0
  5. package/out/address-point-interpolation.js.map +1 -0
  6. package/out/address-point-schema.d.ts +58 -0
  7. package/out/address-point-schema.d.ts.map +1 -0
  8. package/out/address-point-schema.js +67 -0
  9. package/out/address-point-schema.js.map +1 -0
  10. package/out/address-point.d.ts +29 -0
  11. package/out/address-point.d.ts.map +1 -0
  12. package/out/address-point.js +62 -0
  13. package/out/address-point.js.map +1 -0
  14. package/out/ancestry.d.ts +40 -0
  15. package/out/ancestry.d.ts.map +1 -0
  16. package/out/ancestry.js +53 -0
  17. package/out/ancestry.js.map +1 -0
  18. package/out/build-candidate-cli.d.ts +16 -0
  19. package/out/build-candidate-cli.d.ts.map +1 -0
  20. package/out/build-candidate-cli.js +80 -0
  21. package/out/build-candidate-cli.js.map +1 -0
  22. package/out/build-candidate.d.ts +54 -0
  23. package/out/build-candidate.d.ts.map +1 -0
  24. package/out/build-candidate.js +230 -0
  25. package/out/build-candidate.js.map +1 -0
  26. package/out/build-coincident-roles-cli.d.ts +16 -0
  27. package/out/build-coincident-roles-cli.d.ts.map +1 -0
  28. package/out/build-coincident-roles-cli.js +94 -0
  29. package/out/build-coincident-roles-cli.js.map +1 -0
  30. package/out/build-fts-cli.d.ts +23 -0
  31. package/out/build-fts-cli.d.ts.map +1 -0
  32. package/out/build-fts-cli.js +117 -0
  33. package/out/build-fts-cli.js.map +1 -0
  34. package/out/build-slim-cli.d.ts +14 -0
  35. package/out/build-slim-cli.d.ts.map +1 -0
  36. package/out/build-slim-cli.js +130 -0
  37. package/out/build-slim-cli.js.map +1 -0
  38. package/out/build-slim.d.ts +71 -0
  39. package/out/build-slim.d.ts.map +1 -0
  40. package/out/build-slim.js +267 -0
  41. package/out/build-slim.js.map +1 -0
  42. package/out/candidate-lookup.d.ts +43 -0
  43. package/out/candidate-lookup.d.ts.map +1 -0
  44. package/out/candidate-lookup.js +191 -0
  45. package/out/candidate-lookup.js.map +1 -0
  46. package/out/candidate-schema.d.ts +86 -0
  47. package/out/candidate-schema.d.ts.map +1 -0
  48. package/out/candidate-schema.js +109 -0
  49. package/out/candidate-schema.js.map +1 -0
  50. package/out/coincident-roles.d.ts +86 -0
  51. package/out/coincident-roles.d.ts.map +1 -0
  52. package/out/coincident-roles.js +160 -0
  53. package/out/coincident-roles.js.map +1 -0
  54. package/out/convention.d.ts +109 -0
  55. package/out/convention.d.ts.map +1 -0
  56. package/out/convention.js +94 -0
  57. package/out/convention.js.map +1 -0
  58. package/out/fst-autocomplete.d.ts +49 -0
  59. package/out/fst-autocomplete.d.ts.map +1 -0
  60. package/out/fst-autocomplete.js +124 -0
  61. package/out/fst-autocomplete.js.map +1 -0
  62. package/out/fst-builder.d.ts +20 -0
  63. package/out/fst-builder.d.ts.map +1 -0
  64. package/out/fst-builder.js +219 -0
  65. package/out/fst-builder.js.map +1 -0
  66. package/out/fst-deserialize-web.d.ts +16 -0
  67. package/out/fst-deserialize-web.d.ts.map +1 -0
  68. package/out/fst-deserialize-web.js +133 -0
  69. package/out/fst-deserialize-web.js.map +1 -0
  70. package/out/fst-matcher.d.ts +33 -0
  71. package/out/fst-matcher.d.ts.map +1 -0
  72. package/out/fst-matcher.js +117 -0
  73. package/out/fst-matcher.js.map +1 -0
  74. package/out/fst-serialize.d.ts +30 -0
  75. package/out/fst-serialize.d.ts.map +1 -0
  76. package/out/fst-serialize.js +261 -0
  77. package/out/fst-serialize.js.map +1 -0
  78. package/out/fst-types.d.ts +60 -0
  79. package/out/fst-types.d.ts.map +1 -0
  80. package/out/fst-types.js +11 -0
  81. package/out/fst-types.js.map +1 -0
  82. package/out/fts.d.ts +158 -0
  83. package/out/fts.d.ts.map +1 -0
  84. package/out/fts.js +261 -0
  85. package/out/fts.js.map +1 -0
  86. package/out/geo.d.ts +74 -0
  87. package/out/geo.d.ts.map +1 -0
  88. package/out/geo.js +88 -0
  89. package/out/geo.js.map +1 -0
  90. package/out/index.d.ts +27 -0
  91. package/out/index.d.ts.map +1 -0
  92. package/out/index.js +22 -0
  93. package/out/index.js.map +1 -0
  94. package/out/interpolation.d.ts +84 -0
  95. package/out/interpolation.d.ts.map +1 -0
  96. package/out/interpolation.js +150 -0
  97. package/out/interpolation.js.map +1 -0
  98. package/out/lookup.d.ts +156 -0
  99. package/out/lookup.d.ts.map +1 -0
  100. package/out/lookup.js +876 -0
  101. package/out/lookup.js.map +1 -0
  102. package/out/postal-city-alias-lookup.d.ts +50 -0
  103. package/out/postal-city-alias-lookup.d.ts.map +1 -0
  104. package/out/postal-city-alias-lookup.js +66 -0
  105. package/out/postal-city-alias-lookup.js.map +1 -0
  106. package/out/postal-city-alias-schema.d.ts +51 -0
  107. package/out/postal-city-alias-schema.d.ts.map +1 -0
  108. package/out/postal-city-alias-schema.js +47 -0
  109. package/out/postal-city-alias-schema.js.map +1 -0
  110. package/out/postal-city-candidate-schema.d.ts +58 -0
  111. package/out/postal-city-candidate-schema.d.ts.map +1 -0
  112. package/out/postal-city-candidate-schema.js +56 -0
  113. package/out/postal-city-candidate-schema.js.map +1 -0
  114. package/out/postcode-point-lookup.d.ts +38 -0
  115. package/out/postcode-point-lookup.d.ts.map +1 -0
  116. package/out/postcode-point-lookup.js +46 -0
  117. package/out/postcode-point-lookup.js.map +1 -0
  118. package/out/reverse.d.ts +99 -0
  119. package/out/reverse.d.ts.map +1 -0
  120. package/out/reverse.js +290 -0
  121. package/out/reverse.js.map +1 -0
  122. package/out/schema.d.ts +163 -0
  123. package/out/schema.d.ts.map +1 -0
  124. package/out/schema.js +18 -0
  125. package/out/schema.js.map +1 -0
  126. package/out/sharding.d.ts +96 -0
  127. package/out/sharding.d.ts.map +1 -0
  128. package/out/sharding.js +129 -0
  129. package/out/sharding.js.map +1 -0
  130. package/out/sqlite-convention-source.d.ts +29 -0
  131. package/out/sqlite-convention-source.d.ts.map +1 -0
  132. package/out/sqlite-convention-source.js +53 -0
  133. package/out/sqlite-convention-source.js.map +1 -0
  134. package/out/sqlite-utils.d.ts +17 -0
  135. package/out/sqlite-utils.d.ts.map +1 -0
  136. package/out/sqlite-utils.js +24 -0
  137. package/out/sqlite-utils.js.map +1 -0
  138. package/out/street-morphology-fst-builder.d.ts +59 -0
  139. package/out/street-morphology-fst-builder.d.ts.map +1 -0
  140. package/out/street-morphology-fst-builder.js +174 -0
  141. package/out/street-morphology-fst-builder.js.map +1 -0
  142. package/out/street-normalize.d.ts +66 -0
  143. package/out/street-normalize.d.ts.map +1 -0
  144. package/out/street-normalize.js +176 -0
  145. package/out/street-normalize.js.map +1 -0
  146. package/out/street-segment-schema.d.ts +61 -0
  147. package/out/street-segment-schema.d.ts.map +1 -0
  148. package/out/street-segment-schema.js +64 -0
  149. package/out/street-segment-schema.js.map +1 -0
  150. package/out/types.d.ts +137 -0
  151. package/out/types.d.ts.map +1 -0
  152. package/out/types.js +13 -0
  153. package/out/types.js.map +1 -0
  154. package/out/unified-schema.d.ts +25 -0
  155. package/out/unified-schema.d.ts.map +1 -0
  156. package/out/unified-schema.js +142 -0
  157. package/out/unified-schema.js.map +1 -0
  158. package/package.json +54 -0
package/out/lookup.js ADDED
@@ -0,0 +1,876 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `WofSqlitePlaceLookup` — the resolver implementation backed by `node:sqlite` + a Kysely-typed
7
+ * query layer where the queries are non-trivial, and raw SQL where they aren't (FTS5 MATCH, the
8
+ * FTS index build).
9
+ *
10
+ * See `docs/plan/phases/PHASE_4_2_wof_sqlite.md` for the design rationale.
11
+ */
12
+ import { Kysely, sql } from "kysely";
13
+ import { DatabaseSync } from "node:sqlite";
14
+ import { SqliteDialect } from "@mailwoman/core/kysley/dialect";
15
+ import { expandPlacetypeFilter } from "@mailwoman/resolver";
16
+ import { ADDRESS_CONVENTION_TABLE, resolveConvention, SeedConventionSource, } from "./convention.js";
17
+ import { ancestorLineage } from "./ancestry.js";
18
+ import { COINCIDENT_ROLES_TABLE, coincidentRolesExists } from "./coincident-roles.js";
19
+ import { aliasBagExactMatch, buildPlaceSearchFts, PLACE_BBOX_TABLE, PLACE_POPULATION_TABLE, placeBboxExists, placePopulationExists, placeSearchFtsExists, } from "./fts.js";
20
+ import { bboxAround, haversineKm } from "./geo.js";
21
+ import { pickShardForPlacetype, resolveShards } from "./sharding.js";
22
+ import { SqliteConventionSource } from "./sqlite-convention-source.js";
23
+ const DEFAULT_WEIGHTS = {
24
+ placetypeMatchBoost: 0.5,
25
+ localityImplicitBoost: 0.2,
26
+ countryMatchBoost: 0.3,
27
+ directChildBoost: 0.5,
28
+ descendantBoost: 0.2,
29
+ lengthPenaltyWeight: 0.1,
30
+ proximityBoost: 0.8,
31
+ proximityScaleKm: 100,
32
+ // populationBoost is intentionally large — empirical tuning against real WOF showed BM25 gaps
33
+ // of 1.5-3.0 between famous places and tiny same-name peers (because the famous ones have
34
+ // hundreds of alt-name entries that hurt their FTS document score). To consistently surface
35
+ // "the famous one" for unambiguous queries like "New York" or "Chicago", the population signal
36
+ // needs to dominate. Callers wanting a more conservative balance can drop this in the
37
+ // RankingWeights override.
38
+ //
39
+ // Note: this resolver uses `place_population` directly. The separate `place_importance` table
40
+ // (Wikipedia-derived) is consumed by the FST layer, not here. See
41
+ // docs/articles/concepts/importance-vs-population.md for the two-signal contract.
42
+ populationBoost: 4.0,
43
+ populationScaleLog10: 6,
44
+ // Exact name/alias match outranks partial match before the weighted sum (incl. population) is
45
+ // consulted — keeps population as an intra-tier prominence tiebreaker, not a cross-tier promoter.
46
+ // Fixes the 2-letter-region-abbrev bug ("ME" → Maine, not the more-populous Missouri).
47
+ exactMatchTiering: true,
48
+ };
49
+ /**
50
+ * Over-fetch floor for SHORT (≤3-char) queries — region abbreviations like "NY"/"VT". An
51
+ * exact-abbrev holder's BM25 is poor (long multilingual alt-name document), so the normal `limit *
52
+ * 4` window can drop it before `exactMatchTiering` promotes it. 200 comfortably covers every
53
+ * same-abbrev region across the 12-country gazetteer (a 2-letter token matches a few dozen regions
54
+ * at most) while staying a cheap region-placetype fetch. See the `#fuzzyNameMatch` over-fetch
55
+ * comment.
56
+ */
57
+ const SHORT_QUERY_OVERFETCH = 200;
58
+ /**
59
+ * The coordinate-first candidate table (scripts/build-postcode-locality.py): postcode → containing
60
+ *
61
+ * - Nearby localities with WOF alt-name aliases.
62
+ */
63
+ const POSTCODE_LOCALITY_TABLE = "postcode_locality";
64
+ /**
65
+ * Tunables for the coordinate-first locality soft-score `Score = pc·S_pc + name·S_name + pop·S_pop`
66
+ * (each S in [0,1]). The pc/name/pop WEIGHTS now come from the resolved convention's
67
+ * `scoringWeights` (`WORLD_DEFAULT` = 0.6/0.3/0.1 — the EU values), so a locale can retune them as
68
+ * data. PC_DECAY_KM sets how fast S_pc falls with distance.
69
+ */
70
+ const CF_PC_DECAY_KM = 8;
71
+ /**
72
+ * The chosen locality must be within this distance of the postcode's containing locality, else the
73
+ * postcode and the parsed city name are judged to disagree (a transposed / wrong-for-the-city
74
+ * postcode) and the `mismatch` flag fires. Generous enough that a city-state Ortsteil (~15km from
75
+ * the city centroid) and an abutting town (~few km) are NOT flagged, tight enough to catch a wrong
76
+ * city (hundreds of km).
77
+ */
78
+ const CF_MISMATCH_KM = 50;
79
+ const CF_MISMATCH_DELTA = 0.5;
80
+ /** Case-fold + strip diacritics + collapse punctuation — for the coord-first soft name match. */
81
+ function cfNormalize(s) {
82
+ return s
83
+ .toLowerCase()
84
+ .normalize("NFD")
85
+ .replace(/[\u0300-\u036f]/g, "") // combining diacritical marks
86
+ .replace(/[^a-z0-9]+/g, " ")
87
+ .trim();
88
+ }
89
+ /** Padded character-trigram set (a leading/trailing space pads short tokens). */
90
+ function trigrams(s) {
91
+ const t = ` ${s} `;
92
+ const out = new Set();
93
+ for (let i = 0; i + 3 <= t.length; i++)
94
+ out.add(t.slice(i, i + 3));
95
+ return out;
96
+ }
97
+ /**
98
+ * Character-trigram Jaccard ∈ [0,1] — tolerant of the swallowed-leading-char fragments ("auen" vs
99
+ * "plauen") and minor misspellings without a heavyweight edit-distance pass.
100
+ */
101
+ function trigramJaccard(a, b) {
102
+ const A = trigrams(a);
103
+ const B = trigrams(b);
104
+ if (A.size === 0 || B.size === 0)
105
+ return 0;
106
+ let inter = 0;
107
+ for (const x of A)
108
+ if (B.has(x))
109
+ inter++;
110
+ return inter / (A.size + B.size - inter);
111
+ }
112
+ /** Soft name-match score ∈ [0,1]: exact (normalized) name/alias → 1, else best trigram-Jaccard. */
113
+ function softNameScore(text, name, aliases) {
114
+ const q = cfNormalize(text);
115
+ if (!q)
116
+ return 0;
117
+ let best = 0;
118
+ for (const raw of [name, ...aliases]) {
119
+ const n = cfNormalize(raw);
120
+ if (!n)
121
+ continue;
122
+ if (n === q)
123
+ return 1;
124
+ best = Math.max(best, trigramJaccard(q, n));
125
+ }
126
+ return best;
127
+ }
128
+ export class WofSqlitePlaceLookup {
129
+ #db;
130
+ #ownsDb;
131
+ #kysely;
132
+ #weights;
133
+ /**
134
+ * Cached at construction so we don't `sqlite_master` query on every findPlace call. Bbox + near-
135
+ * with-radius queries fall back to no-filter when this is false, preserving compatibility with
136
+ * DBs that were FTS-built before the R*Tree shipped.
137
+ *
138
+ * Per-shard: a shard is only considered to have the bbox index if its own R*Tree table exists.
139
+ */
140
+ #hasBboxIndex;
141
+ /**
142
+ * Per-shard probe for the `place_population` aux table. When false, the LEFT JOIN is omitted from
143
+ * the SELECT and population boost is 0 for every row — preserves compatibility with DBs built
144
+ * before this feature shipped.
145
+ */
146
+ #hasPopulationIndex;
147
+ /**
148
+ * Per-shard probe for the `postcode_locality` table (the coordinate-first candidate table, built
149
+ * by scripts/build-postcode-locality.py). Cached at construction; null'd out when absent so the
150
+ * coord-first path silently no-ops on a deployment that didn't ship the table.
151
+ */
152
+ #postcodeLocalityShard;
153
+ /**
154
+ * Resolved shard list. Always at least one entry; first is `main`. Multi-shard adds extras with
155
+ * their own derived (or override) schema names.
156
+ */
157
+ #shards;
158
+ /**
159
+ * The Geographic Rule Engine (Direction E, #289). `#conventionSource` supplies per-WOF-polygon
160
+ * resolution profiles; `#strategies` is the named-primitive registry the merged convention
161
+ * dispatches. Empty source → every query resolves to `WORLD_DEFAULT` → byte-identical to the
162
+ * pre-engine coordinate-first path. `#countryWofIdCache` memoizes the country-code →
163
+ * country-WOF-id lookup that seeds the convention ancestor chain (one query per country, then
164
+ * cached).
165
+ */
166
+ #conventionSource;
167
+ #strategies;
168
+ #countryWofIdCache = new Map();
169
+ /** Strategy names already warned about — so an unknown name surfaces once, not once per query. */
170
+ #warnedUnknownStrategies = new Set();
171
+ /**
172
+ * Lazily-built `admin_id → coincident localities` map from the #403 relation (null until first
173
+ * use).
174
+ */
175
+ #coincidentRolesCache = null;
176
+ /** Per-id memoized ancestor lineages (#404) — a hot chain is queried once. */
177
+ #ancestorsCache = new Map();
178
+ /**
179
+ * Opt-in postal-city alias reader (#475). `null` unless `opts.postalCityAliases` was supplied —
180
+ * every alias code path is gated on this, so the default resolver is byte-identical.
181
+ */
182
+ #postalCityAliases;
183
+ constructor(opts, weights) {
184
+ if (opts.database && opts.databasePath) {
185
+ throw new Error("WofSqlitePlaceLookup: pass either `database` or `databasePath`, not both");
186
+ }
187
+ if (!opts.database && !opts.databasePath) {
188
+ throw new Error("WofSqlitePlaceLookup: one of `database` or `databasePath` is required");
189
+ }
190
+ if (opts.database) {
191
+ this.#db = opts.database;
192
+ this.#ownsDb = false;
193
+ this.#shards = [{ path: ":memory:", schemaName: "main", placetypes: [] }];
194
+ }
195
+ else {
196
+ const shards = resolveShards(opts.databasePath);
197
+ this.#shards = shards;
198
+ this.#db = new DatabaseSync(shards[0].path, { readOnly: false });
199
+ this.#ownsDb = true;
200
+ // ATTACH each non-main shard. Schema names were validated by resolveShards, so safe to
201
+ // interpolate directly (SQLite ATTACH doesn't accept parameters for the schema name).
202
+ for (const s of shards.slice(1)) {
203
+ this.#db.exec(`ATTACH DATABASE '${s.path.replace(/'/g, "''")}' AS ${s.schemaName}`);
204
+ }
205
+ }
206
+ // node:sqlite has no .pragma() helper; pragmas are executed as plain SQL.
207
+ this.#db.exec("PRAGMA busy_timeout = 5000");
208
+ if (opts.buildFts) {
209
+ this.#ensureFts();
210
+ }
211
+ else {
212
+ this.#assertFtsExists();
213
+ }
214
+ this.#kysely = new Kysely({
215
+ dialect: new SqliteDialect({ database: this.#db }),
216
+ });
217
+ this.#weights = { ...DEFAULT_WEIGHTS, ...(weights ?? {}) };
218
+ // Probe each shard's aux-table presence — driven by per-shard table existence in
219
+ // sqlite_master. Cached at construction so findPlace doesn't query sqlite_master per call.
220
+ this.#hasBboxIndex = new Map();
221
+ this.#hasPopulationIndex = new Map();
222
+ for (const s of this.#shards) {
223
+ this.#hasBboxIndex.set(s.schemaName, this.#shardHasTable(s.schemaName, PLACE_BBOX_TABLE));
224
+ this.#hasPopulationIndex.set(s.schemaName, this.#shardHasTable(s.schemaName, PLACE_POPULATION_TABLE));
225
+ }
226
+ // The postcode_locality table can live on any attached shard (typically its own
227
+ // `postcode-locality-<cc>.db`). Find the first shard that has it; null = coord-first disabled.
228
+ this.#postcodeLocalityShard =
229
+ this.#shards.find((s) => this.#shardHasTable(s.schemaName, POSTCODE_LOCALITY_TABLE))?.schemaName ?? null;
230
+ // Opt-in postal-city alias reader (#475). Construction-time present-or-not is the gate: null
231
+ // keeps the coordinate-first scorer byte-identical to pre-#475.
232
+ this.#postalCityAliases = opts.postalCityAliases ?? null;
233
+ // The Geographic Rule Engine convention source. Precedence: an explicit `opts.conventions`
234
+ // (a ready source or a seed map) wins; else the build-from-source convention asset if one is
235
+ // attached (auto-detected, like the postcode_locality shard — adding conventions.db to
236
+ // databasePath enables it; queried on demand, not paged into memory); else empty, so EU rides
237
+ // WORLD_DEFAULT. The registry binds strategy NAMES to the SQL-bound primitives — adding a
238
+ // strategy is registering it here.
239
+ const conventionShard = this.#shards.find((s) => this.#shardHasTable(s.schemaName, ADDRESS_CONVENTION_TABLE))?.schemaName ?? null;
240
+ this.#conventionSource = opts.conventions
241
+ ? "get" in opts.conventions && typeof opts.conventions.get === "function"
242
+ ? opts.conventions
243
+ : new SeedConventionSource(opts.conventions)
244
+ : conventionShard
245
+ ? new SqliteConventionSource(this.#db, conventionShard)
246
+ : new SeedConventionSource();
247
+ this.#strategies = new Map([
248
+ ["postcode_area_resolution", (q, c) => this.#postcodeAreaResolution(q, c)],
249
+ ["fallback_fuzzy_name_match", (q) => this.#fuzzyNameMatch(q)],
250
+ ]);
251
+ }
252
+ #shardHasTable(schemaName, tableName) {
253
+ // For main, the existing helpers work directly. For attached shards we have to ask via the
254
+ // schema-qualified `sqlite_master` view.
255
+ if (schemaName === "main") {
256
+ if (tableName === PLACE_BBOX_TABLE)
257
+ return placeBboxExists(this.#db);
258
+ if (tableName === PLACE_POPULATION_TABLE)
259
+ return placePopulationExists(this.#db);
260
+ }
261
+ const row = this.#db
262
+ .prepare(`SELECT name FROM ${schemaName}.sqlite_master WHERE type = 'table' AND name = ?`)
263
+ .get(tableName);
264
+ return Boolean(row);
265
+ }
266
+ async findPlace(query) {
267
+ // Geographic Rule Engine dispatch (#289). Resolve the effective convention for this query
268
+ // (WORLD_DEFAULT for the EU locales — the seed source is empty) and run its candidate strategies
269
+ // in order; the first to return a non-null result wins. The default list,
270
+ // [postcode_area_resolution, fallback_fuzzy_name_match], reproduces the pre-engine coordinate-
271
+ // first → FTS fall-through exactly. Unknown strategy names are skipped, so a convention may name
272
+ // a primitive a future phase will register.
273
+ const convention = this.#conventionFor(query);
274
+ for (const name of convention.candidateStrategies) {
275
+ const strategy = this.#strategies.get(name);
276
+ if (!strategy) {
277
+ this.#warnUnknownStrategy(name);
278
+ continue;
279
+ }
280
+ const result = await strategy(query, convention);
281
+ if (result !== null)
282
+ return result;
283
+ }
284
+ return [];
285
+ }
286
+ /**
287
+ * Dual-role localities coincident with an admin id, from the precomputed `coincident_roles`
288
+ * relation (#403). Backs {@link ResolveOpts.hierarchyCompletion} (#405): O(1) once the relation is
289
+ * loaded. Returns `[]` when the relation table is absent (older DB) or the admin isn't a
290
+ * dual-role place, so completion degrades gracefully. The relation + `spr` join is loaded once
291
+ * and memoized.
292
+ */
293
+ coincidentLocalitiesFor(adminId) {
294
+ const id = typeof adminId === "number" ? adminId : Number(adminId);
295
+ if (!Number.isFinite(id))
296
+ return [];
297
+ if (!this.#coincidentRolesCache) {
298
+ const map = new Map();
299
+ if (coincidentRolesExists(this.#db)) {
300
+ const rows = this.#db
301
+ .prepare(`SELECT cr.admin_id AS adminId, s.id AS id, s.name AS name, s.country AS country,
302
+ s.latitude AS lat, s.longitude AS lon,
303
+ cr.relationship_type AS relationshipType, cr.locality_population AS population,
304
+ cr.distance_km AS distanceKm
305
+ FROM ${COINCIDENT_ROLES_TABLE} cr JOIN spr s ON s.id = cr.locality_id`)
306
+ .all();
307
+ for (const r of rows) {
308
+ const candidate = {
309
+ id: r.id,
310
+ name: r.name,
311
+ placetype: "locality",
312
+ country: r.country,
313
+ lat: r.lat,
314
+ lon: r.lon,
315
+ score: 0,
316
+ relationshipType: r.relationshipType,
317
+ population: r.population,
318
+ distanceKm: r.distanceKm,
319
+ };
320
+ const list = map.get(r.adminId);
321
+ if (list)
322
+ list.push(candidate);
323
+ else
324
+ map.set(r.adminId, [candidate]);
325
+ }
326
+ }
327
+ this.#coincidentRolesCache = map;
328
+ }
329
+ return this.#coincidentRolesCache.get(id) ?? [];
330
+ }
331
+ /**
332
+ * The ancestor lineage of a place — its containment chain joined with `spr` for canonical names,
333
+ * ordered NEAREST-FIRST (localadmin → county → region → … → country). Backs
334
+ * {@link ResolveOpts.includeAncestors} (#404). Self is excluded; memoized per id. Returns `[]`
335
+ * when the place has no recorded ancestry.
336
+ *
337
+ * The walk itself lives in `ancestry.ts` (shared with the reverse geocoder, #484); the ordering
338
+ * is its `PLACETYPE_DEPTH` table — same ranking as the previous inline SQL CASE, extended below
339
+ * `localadmin` so locality/neighbourhood ancestors order correctly instead of sorting last.
340
+ */
341
+ ancestors(id) {
342
+ const pid = typeof id === "number" ? id : Number(id);
343
+ if (!Number.isFinite(pid))
344
+ return [];
345
+ const cached = this.#ancestorsCache.get(pid);
346
+ if (cached)
347
+ return cached;
348
+ const lineage = ancestorLineage(this.#db, pid).map((r) => ({
349
+ id: r.id,
350
+ placetype: r.placetype,
351
+ name: r.name,
352
+ }));
353
+ this.#ancestorsCache.set(pid, lineage);
354
+ return lineage;
355
+ }
356
+ /**
357
+ * Surface an unknown strategy name LOUDLY (once per name) rather than swallowing it silently — an
358
+ * invisible no-op is exactly the hidden-dependency failure mode we avoid (see the
359
+ * provenance-first design value). We warn rather than throw so a convention asset built against a
360
+ * newer code revision (one that adds a strategy) degrades gracefully on an older build instead of
361
+ * taking down resolution.
362
+ */
363
+ #warnUnknownStrategy(name) {
364
+ if (this.#warnedUnknownStrategies.has(name))
365
+ return;
366
+ this.#warnedUnknownStrategies.add(name);
367
+ console.warn(`WofSqlitePlaceLookup: a convention names strategy "${name}", which this build does not register ` +
368
+ `(known: ${[...this.#strategies.keys()].join(", ")}). Skipping it. If the convention asset was built ` +
369
+ `against a newer code revision, rebuild the asset for this one.`);
370
+ }
371
+ /**
372
+ * Strategy `postcode_area_resolution` — the coordinate-first locality path, strictly gated (a
373
+ * sibling postcode AND a postcode_locality table AND a locality query). Returns `null` — so the
374
+ * dispatcher falls through to the next strategy — when the gate is unmet or the postcode isn't in
375
+ * the table; otherwise the soft-scored postcode∪name candidate set.
376
+ */
377
+ #postcodeAreaResolution(query, convention) {
378
+ if (!(query.postcode && this.#postcodeLocalityShard && this.#isLocalityQuery(query))) {
379
+ return Promise.resolve(null);
380
+ }
381
+ return this.#findLocalityCoordFirst(query, this.#postcodeLocalityShard, convention);
382
+ }
383
+ /**
384
+ * Strategy `fallback_fuzzy_name_match` — the BM25 FTS name-match over the gazetteer, the
385
+ * universal fallback. Always returns an array (never null), so it terminates the dispatch chain.
386
+ */
387
+ async #fuzzyNameMatch(query) {
388
+ const limit = query.limit ?? 10;
389
+ // Over-fetch so post-scoring + exact-match tiering have room to re-rank. SHORT queries (a 2–3-char
390
+ // region abbreviation like "NY"/"VT") are the danger case the `exactMatchTiering` docstring flags:
391
+ // the exact-abbrev holder's BM25 is poor (its long multilingual alt-name document tanks the score),
392
+ // so under the normal `limit * 4` window it drops OUT of the candidate pool BEFORE tiering can
393
+ // promote it — "NY" then resolves to a token-matching foreign region (Highland, GB) instead of New
394
+ // York. Widen the window for short queries so the exact match is always present to be tiered.
395
+ // (Cross-country abbrev collisions — "VT" is BOTH Vermont and Viterbo — still need a country/
396
+ // postcode signal to disambiguate; this only rescues the window-drop class, not genuine ambiguity.
397
+ // With a `country` hint every abbrev resolves; bare + no-context lifts 7→10/15 US states.)
398
+ const ftsLimit = query.text.trim().length <= 3 ? Math.max(limit * 4, SHORT_QUERY_OVERFETCH) : limit * 4;
399
+ // Expand the placetype filter through the shared equivalence table (core/resolver): a
400
+ // `locality` query must also reach `borough` / `localadmin` rows — Brooklyn-the-borough
401
+ // (pop 2.5M) is a borough, not a locality, and a strict filter made it unreachable so the
402
+ // fuzzy "Brooklyn Park, MN" won instead. Order-preserving: the FIRST entry stays the
403
+ // requested placetype, which is what shard routing keys off below.
404
+ const placetypes = expandPlacetypeFilter(normalizePlacetypes(query.placetype));
405
+ const ftsQuery = sanitizeFtsQuery(query.text);
406
+ if (!ftsQuery)
407
+ return [];
408
+ // Pick the shard for this query. Multi-shard routing is placetype-driven; a query without
409
+ // `placetype` always goes to main. (Mixed-placetype queries with multiple shards aren't
410
+ // supported in v1 — caller can issue two findPlace calls and merge in TS if needed.)
411
+ const firstPlacetype = placetypes?.[0];
412
+ const shard = pickShardForPlacetype(this.#shards, firstPlacetype);
413
+ const sch = shard.schemaName; // bare schema name; safe to interpolate (validated at construction)
414
+ // Filter out historical / superseded / deprecated places by default — they live in the same
415
+ // spr table but should never win a contemporary lookup. `is_current = 0` is the only WOF
416
+ // value that means "not current"; both `-1` (modern) and `1` (legacy) mean current. See #91.
417
+ // Note: with schema-qualified FROM the bare `place_search` reference in MATCH resolves to
418
+ // the FROM table — required by FTS5 parser, see sharding.ts header comment.
419
+ const where = ["place_search MATCH ?", "spr.is_current != 0", "spr.is_deprecated = 0"];
420
+ const params = [ftsQuery];
421
+ if (placetypes && placetypes.length > 0) {
422
+ where.push(`spr.placetype IN (${placetypes.map(() => "?").join(", ")})`);
423
+ params.push(...placetypes);
424
+ }
425
+ if (query.country) {
426
+ where.push("spr.country = ?");
427
+ params.push(query.country);
428
+ }
429
+ if (query.parentId !== undefined) {
430
+ where.push(`(spr.parent_id = ? OR spr.id IN (SELECT id FROM ${sch}.ancestors WHERE ancestor_id = ?))`);
431
+ params.push(query.parentId, query.parentId);
432
+ }
433
+ // Bbox + near-with-radius are SQL-level filters via the R*Tree. We only emit the JOIN when
434
+ // the active shard has the R*Tree; missing-but-requested is silently treated as no-bbox-
435
+ // filter so legacy DBs / shards-without-bbox don't crash.
436
+ const shardHasBbox = this.#hasBboxIndex.get(sch) === true;
437
+ const useBboxJoin = (query.bbox || query.near?.maxDistanceKm !== undefined) && shardHasBbox;
438
+ let joinClause = `JOIN ${sch}.spr ON spr.id = place_search.wof_id`;
439
+ if (useBboxJoin) {
440
+ joinClause += ` JOIN ${sch}.${PLACE_BBOX_TABLE} bbox ON bbox.id = spr.id`;
441
+ // AABB intersection — both bbox sides must overlap. R*Tree handles this in O(log n).
442
+ const filterBox = query.bbox
443
+ ? query.bbox
444
+ : bboxAround(query.near.lat, query.near.lon, query.near.maxDistanceKm);
445
+ where.push("bbox.min_lat <= ? AND bbox.max_lat >= ?", "bbox.min_lon <= ? AND bbox.max_lon >= ?");
446
+ params.push(filterBox.maxLat, filterBox.minLat, filterBox.maxLon, filterBox.minLon);
447
+ }
448
+ // LEFT JOIN the population aux table when present. Missing-on-this-shard means the SELECT
449
+ // just doesn't include the population column; the post-scoring loop treats it as 0.
450
+ const shardHasPopulation = this.#hasPopulationIndex.get(sch) === true;
451
+ const populationSelect = shardHasPopulation
452
+ ? `${PLACE_POPULATION_TABLE}.population AS population`
453
+ : `NULL AS population`;
454
+ const populationJoin = shardHasPopulation
455
+ ? `LEFT JOIN ${sch}.${PLACE_POPULATION_TABLE} ON ${PLACE_POPULATION_TABLE}.id = spr.id`
456
+ : "";
457
+ // Push the population boost into the ORDER BY when the index is available, so famous places
458
+ // (whose long alt-name lists hurt BM25) actually make it into the over-fetch window. The TS
459
+ // post-scoring will still compute the same boost for the final score; this just ensures the
460
+ // candidate set is right.
461
+ //
462
+ // Formula: rank_adjusted = bm25 - populationBoost * min(1.0, log10(1 + pop) / scaleLog10)
463
+ // Lower rank_adjusted = better (matches SQLite's bm25 convention of "more negative = better").
464
+ const orderByExpr = shardHasPopulation
465
+ ? `(bm25(place_search) - ? * MIN(1.0, COALESCE(log10(1.0 + ${PLACE_POPULATION_TABLE}.population), 0) / ?))`
466
+ : "bm25(place_search)";
467
+ // Schema-qualified FROM with bare-name MATCH — required syntax for FTS5 on attached schemas.
468
+ // See sharding.ts header for the gotcha that drove this design.
469
+ const stmt = this.#db.prepare(`
470
+ SELECT
471
+ spr.id AS id,
472
+ spr.name,
473
+ spr.placetype,
474
+ spr.country,
475
+ spr.parent_id,
476
+ bm25(place_search) AS rank,
477
+ spr.latitude AS lat,
478
+ spr.longitude AS lon,
479
+ spr.min_latitude, spr.max_latitude, spr.min_longitude, spr.max_longitude,
480
+ ${populationSelect}
481
+ FROM ${sch}.place_search
482
+ ${joinClause}
483
+ ${populationJoin}
484
+ WHERE ${where.join(" AND ")}
485
+ ORDER BY ${orderByExpr} ASC
486
+ LIMIT ?
487
+ `);
488
+ if (shardHasPopulation) {
489
+ params.push(this.#weights.populationBoost, this.#weights.populationScaleLog10);
490
+ }
491
+ params.push(ftsLimit);
492
+ const rawRows = stmt.all(...params);
493
+ const queryLen = query.text.length;
494
+ const candidates = rawRows.map((row) => {
495
+ // SQLite's bm25() returns a lower-is-better score (negative for matches). Negate so we
496
+ // start from a higher-is-better baseline.
497
+ let score = -row.rank;
498
+ if (placetypes && placetypes.length > 0 && placetypes.includes(row.placetype)) {
499
+ score += this.#weights.placetypeMatchBoost;
500
+ }
501
+ if (!placetypes && row.placetype === "locality") {
502
+ score += this.#weights.localityImplicitBoost;
503
+ }
504
+ if (query.country && row.country === query.country) {
505
+ score += this.#weights.countryMatchBoost;
506
+ }
507
+ if (query.parentId !== undefined) {
508
+ if (row.parent_id === query.parentId) {
509
+ score += this.#weights.directChildBoost;
510
+ }
511
+ else {
512
+ score += this.#weights.descendantBoost;
513
+ }
514
+ }
515
+ const extraLen = Math.max(0, row.name.length - queryLen - 3);
516
+ score -= (this.#weights.lengthPenaltyWeight * extraLen) / 10;
517
+ // Proximity boost: only applied when the query carries `near` AND the candidate has real
518
+ // coordinates. The formula decays smoothly with distance so close-but-not-exact hits
519
+ // still benefit; tunable via proximityBoost + proximityScaleKm.
520
+ let distanceKm;
521
+ if (query.near && row.lat !== null && row.lon !== null && !(row.lat === 0 && row.lon === 0)) {
522
+ distanceKm = haversineKm(query.near.lat, query.near.lon, row.lat, row.lon);
523
+ score += this.#weights.proximityBoost / (1 + distanceKm / this.#weights.proximityScaleKm);
524
+ }
525
+ // Population boost: capped at `populationBoost` magnitude at `10^populationScaleLog10`
526
+ // people. Missing population → no contribution. Never penalizes.
527
+ if (row.population !== null && row.population > 0 && this.#weights.populationScaleLog10 > 0) {
528
+ const popLog = Math.log10(1 + row.population);
529
+ const popFraction = Math.min(1, popLog / this.#weights.populationScaleLog10);
530
+ score += this.#weights.populationBoost * popFraction;
531
+ }
532
+ const candidate = {
533
+ id: row.id,
534
+ name: row.name,
535
+ placetype: row.placetype,
536
+ country: row.country ?? "",
537
+ lat: row.lat ?? 0,
538
+ lon: row.lon ?? 0,
539
+ parent_id: row.parent_id ?? undefined,
540
+ score,
541
+ };
542
+ if (distanceKm !== undefined)
543
+ candidate.distanceKm = distanceKm;
544
+ if (row.population !== null && row.population > 0)
545
+ candidate.population = row.population;
546
+ // Candidate bbox — parity with the WASM lookup (resolver-wof-wasm/lookup.ts), whose
547
+ // consumers (the demo cascade's region constraint) read it. Without this the Node
548
+ // backend's region→bbox constraint is dead and disambiguation falls to population
549
+ // ranking (the Springfield-IL→MO failure the #524 smoke eval caught).
550
+ if (row.min_latitude != null &&
551
+ row.max_latitude != null &&
552
+ row.min_longitude != null &&
553
+ row.max_longitude != null) {
554
+ candidate.bbox = {
555
+ minLat: row.min_latitude,
556
+ maxLat: row.max_latitude,
557
+ minLon: row.min_longitude,
558
+ maxLon: row.max_longitude,
559
+ };
560
+ }
561
+ return candidate;
562
+ });
563
+ // Exact-match tiering: a candidate whose name OR any alias equals the query text (case-folded)
564
+ // ranks above any partial match, with the weighted-sum score (incl. population) breaking ties
565
+ // WITHIN a tier. See the RankingWeights.exactMatchTiering docstring for why this aligns the
566
+ // population prior rather than overriding it. One cheap indexed lookup over the candidate ids.
567
+ // Runs even for a SINGLE candidate so `exactMatch` is stamped consistently (parity with the
568
+ // WASM lookup) — a sole alias hit ("New York City" → New York) must still carry the flag the
569
+ // demo cascade / #369 re-rank read.
570
+ if (this.#weights.exactMatchTiering && candidates.length > 0) {
571
+ const exactIds = this.#exactMatchIds(sch, candidates.map((c) => c.id), query.text);
572
+ // Stamp the tier onto every candidate (not just when the tiering sort fires) so a downstream
573
+ // re-rank — #369's postcode-anchor country pin in `resolveTree` — can keep the country pin from
574
+ // crossing the exact/partial boundary ("ME" → Maine, not the more-populous Missouri).
575
+ for (const c of candidates)
576
+ c.exactMatch = exactIds.has(c.id);
577
+ if (exactIds.size > 0 && exactIds.size < candidates.length) {
578
+ candidates.sort((a, b) => {
579
+ const ax = exactIds.has(a.id) ? 1 : 0;
580
+ const bx = exactIds.has(b.id) ? 1 : 0;
581
+ return bx - ax || b.score - a.score;
582
+ });
583
+ return Promise.resolve(candidates.slice(0, limit));
584
+ }
585
+ }
586
+ candidates.sort((a, b) => b.score - a.score);
587
+ return Promise.resolve(candidates.slice(0, limit));
588
+ }
589
+ #isLocalityQuery(query) {
590
+ const pts = normalizePlacetypes(query.placetype);
591
+ return !pts || pts.includes("locality");
592
+ }
593
+ /**
594
+ * Resolve the effective convention for a query (the Geographic Rule Engine entry point). The
595
+ * ancestor chain is keyed by WOF polygon id; for #289 it carries just the country level —
596
+ * resolved from `query.country` via the cached code→WOF-id lookup — so the EU locales, which have
597
+ * no override rows, resolve to `WORLD_DEFAULT` and dispatch is byte-identical to the pre-engine
598
+ * path. E4 (JP) extends the chain with the resolved locality's `ancestors` row, so a
599
+ * region/locality-level convention (e.g. Sapporo's grid) deep-merges over the country one.
600
+ */
601
+ #conventionFor(query) {
602
+ const chain = [];
603
+ if (query.country) {
604
+ const cid = this.#countryWofId(query.country);
605
+ if (cid !== null)
606
+ chain.push(cid);
607
+ }
608
+ return resolveConvention(this.#conventionSource, chain);
609
+ }
610
+ /**
611
+ * Country ISO code → its WOF polygon id (the coarsest convention key). Cached — one indexed `spr`
612
+ * query per distinct country, then memoized (including a not-found `null`) so findPlace never
613
+ * pays for it twice.
614
+ */
615
+ #countryWofId(code) {
616
+ const cached = this.#countryWofIdCache.get(code);
617
+ if (cached !== undefined)
618
+ return cached;
619
+ let id = null;
620
+ try {
621
+ const row = this.#db
622
+ .prepare(`SELECT id FROM main.spr WHERE placetype = 'country' AND country = ? AND is_current != 0 LIMIT 1`)
623
+ .get(code);
624
+ id = row?.id ?? null;
625
+ }
626
+ catch {
627
+ id = null;
628
+ }
629
+ this.#countryWofIdCache.set(code, id);
630
+ return id;
631
+ }
632
+ /**
633
+ * Coordinate-first locality resolution. The postcode_locality table maps the sibling postcode to
634
+ * the locality whose polygon contains the postcode centroid (+ a few nearby ones for the
635
+ * abutting- postcode case). We union those COORDINATE candidates with the FTS NAME candidates and
636
+ * soft-score the union `0.6·S_pc + 0.3·S_name + 0.1·S_pop` — so a small town the name-match never
637
+ * finds is recovered by the postcode, while an unambiguous name (Berlin) still wins on name +
638
+ * population. Returns null when the postcode isn't in the table (→ caller falls back to the FTS
639
+ * path).
640
+ */
641
+ async #findLocalityCoordFirst(query, sch, convention) {
642
+ const w = convention.scoringWeights;
643
+ const pc = query.postcode.trim();
644
+ const pcWhere = query.country ? "postcode = ? AND country = ?" : "postcode = ?";
645
+ const pcParams = query.country ? [pc, query.country] : [pc];
646
+ const pcRows = this.#db
647
+ .prepare(`SELECT locality_id AS id, aliases, distance_km AS dist, is_containing AS containing
648
+ FROM ${sch}.${POSTCODE_LOCALITY_TABLE} WHERE ${pcWhere}`)
649
+ .all(...pcParams);
650
+ if (pcRows.length === 0)
651
+ return null;
652
+ const limit = query.limit ?? 10;
653
+ // Name-match candidates via the normal FTS path (postcode cleared → no recursion).
654
+ const ftsCands = await this.findPlace({ ...query, postcode: undefined, limit: Math.max(limit, 10) });
655
+ const pcInfo = new Map();
656
+ for (const r of pcRows) {
657
+ pcInfo.set(r.id, { dist: r.dist, containing: r.containing === 1, aliases: r.aliases ? r.aliases.split("|") : [] });
658
+ }
659
+ // #475 (opt-in): observed postal-city aliases for this postcode, keyed by the geographic
660
+ // locality name they map to. A user-typed postal city ("Antioch", 37013) becomes a name-match
661
+ // alias for the geographic locality the postcode sits in ("Nashville"). Empty when the reader
662
+ // isn't supplied → the scoring loop below is byte-identical to pre-#475.
663
+ const postalAliasByGeo = new Map();
664
+ if (this.#postalCityAliases) {
665
+ for (const a of await this.#postalCityAliases.getDivergentAliases(pc)) {
666
+ const key = cfNormalize(a.geoLocality);
667
+ if (!key)
668
+ continue;
669
+ const bag = postalAliasByGeo.get(key);
670
+ if (bag)
671
+ bag.push(a.postalCity);
672
+ else
673
+ postalAliasByGeo.set(key, [a.postalCity]);
674
+ }
675
+ }
676
+ const merged = new Map();
677
+ for (const c of ftsCands)
678
+ merged.set(c.id, c);
679
+ const missing = [...pcInfo.keys()].filter((id) => !merged.has(id));
680
+ for (const row of this.#fetchLocalitiesById(missing))
681
+ merged.set(row.id, row);
682
+ const scored = [];
683
+ for (const cand of merged.values()) {
684
+ const info = pcInfo.get(cand.id);
685
+ const sPc = info ? (info.containing ? 1 : Math.exp(-info.dist / CF_PC_DECAY_KM)) : 0;
686
+ // Fold any postal-city aliases for this candidate's geographic name into the soft name match
687
+ // (#475). `postalAliasByGeo` is empty unless the opt-in reader was supplied, so when off this
688
+ // reduces to the original `info?.aliases ?? []` and the score is unchanged.
689
+ const wofAliases = info?.aliases ?? [];
690
+ const aliases = postalAliasByGeo.size > 0
691
+ ? [...wofAliases, ...(postalAliasByGeo.get(cfNormalize(cand.name)) ?? [])]
692
+ : wofAliases;
693
+ const sName = softNameScore(query.text, cand.name, aliases);
694
+ const sPop = cand.population && cand.population > 0 ? Math.min(1, Math.log10(1 + cand.population) / 6) : 0;
695
+ scored.push({ ...cand, score: w.pc * sPc + w.name * sName + w.pop * sPop, exact: sName >= 1 });
696
+ }
697
+ // Exact-name tiering (same philosophy as the FTS path): an EXACT name/alias match tiers above
698
+ // coordinate-only candidates, with the soft-score breaking ties WITHIN a tier. This keeps an
699
+ // unambiguous city ("Berlin", exact + huge population) ahead of the fine-grained Ortsteil its
700
+ // postcode centroid lands in, while a small town the name-match never finds (no exact tier) is
701
+ // still recovered by its postcode's containing locality.
702
+ scored.sort((a, b) => Number(b.exact) - Number(a.exact) || b.score - a.score);
703
+ // Conflict flag: if the chosen locality is NOT the postcode's containing locality and sits far
704
+ // from it, the postcode and the city name disagree (a transposed / wrong-for-the-city postcode).
705
+ // We keep the name-chosen locality but flag it — the falsehood signal a BM25 geocoder can't give.
706
+ const top = scored[0];
707
+ if (top) {
708
+ // The postcode's geographic anchor: among the postcode's candidate localities that actually
709
+ // resolved (some — e.g. unnamed Ortsteile — are in the postcode table but not the admin DB),
710
+ // prefer the containing one, else the nearest. Postcodes whose centroid falls just outside
711
+ // every locality polygon still anchor to the closest town.
712
+ const anchorRow = pcRows
713
+ .filter((r) => merged.has(r.id))
714
+ .sort((a, b) => b.containing - a.containing || a.dist - b.dist)[0];
715
+ const anchor = anchorRow ? merged.get(anchorRow.id) : undefined;
716
+ if (anchor && top.id !== anchorRow.id) {
717
+ if (haversineKm(top.lat, top.lon, anchor.lat, anchor.lon) > CF_MISMATCH_KM)
718
+ top.mismatch = true;
719
+ }
720
+ }
721
+ return scored.slice(0, limit).map(({ exact, ...c }) => {
722
+ void exact;
723
+ return c;
724
+ });
725
+ }
726
+ /** Fetch locality spr rows (from main) for the postcode-injected candidate ids the FTS set missed. */
727
+ #fetchLocalitiesById(ids) {
728
+ if (ids.length === 0)
729
+ return [];
730
+ const hasPop = this.#hasPopulationIndex.get("main") === true;
731
+ const popSelect = hasPop ? `pp.population AS population` : `NULL AS population`;
732
+ const popJoin = hasPop ? `LEFT JOIN main.${PLACE_POPULATION_TABLE} pp ON pp.id = s.id` : "";
733
+ const ph = ids.map(() => "?").join(", ");
734
+ const rows = this.#db
735
+ .prepare(`SELECT s.id AS id, s.name AS name, s.country AS country, s.parent_id AS parent_id,
736
+ s.latitude AS lat, s.longitude AS lon, s.placetype AS placetype, ${popSelect}
737
+ FROM main.spr s ${popJoin}
738
+ WHERE s.id IN (${ph}) AND s.is_current != 0`)
739
+ .all(...ids);
740
+ return rows.map((row) => {
741
+ const c = {
742
+ id: row.id,
743
+ name: row.name,
744
+ placetype: row.placetype,
745
+ country: row.country ?? "",
746
+ lat: row.lat ?? 0,
747
+ lon: row.lon ?? 0,
748
+ parent_id: row.parent_id ?? undefined,
749
+ score: 0,
750
+ };
751
+ if (row.population !== null && row.population > 0)
752
+ c.population = row.population;
753
+ return c;
754
+ });
755
+ }
756
+ /**
757
+ * Among `ids`, return the subset whose name OR any alias equals `text` case-insensitively — the
758
+ * exact-match tier for ranking. One indexed query over `<schema>.names`. When the shard has no
759
+ * `names` table (a slim DB built with `dropNames`, or a postcode-only shard), fall back to the
760
+ * self-contained `place_search` FTS content: its `alt_names` column is the same alias set joined
761
+ * on the boundary-preserving `ALIAS_SEPARATOR` (#523), so `aliasBagExactMatch` recovers the exact
762
+ * alias tier ("New York City" → New York) that the dropped `names` table used to provide.
763
+ */
764
+ #exactMatchIds(schemaName, ids, text) {
765
+ const out = new Set();
766
+ const trimmed = text.trim();
767
+ if (ids.length === 0 || !trimmed)
768
+ return out;
769
+ const placeholders = ids.map(() => "?").join(", ");
770
+ try {
771
+ const rows = this.#db
772
+ .prepare(`SELECT DISTINCT id FROM ${schemaName}.names WHERE id IN (${placeholders}) AND name = ? COLLATE NOCASE`)
773
+ .all(...ids, trimmed);
774
+ for (const r of rows)
775
+ out.add(r.id);
776
+ return out;
777
+ }
778
+ catch {
779
+ // No `names` table on this shard — fall through to the place_search alias bag.
780
+ }
781
+ try {
782
+ const rows = this.#db
783
+ .prepare(`SELECT wof_id AS id, name, alt_names FROM ${schemaName}.place_search WHERE wof_id IN (${placeholders})`)
784
+ .all(...ids);
785
+ const norm = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
786
+ const needle = norm(trimmed);
787
+ for (const r of rows) {
788
+ if (r.name !== null && norm(r.name) === needle)
789
+ out.add(r.id);
790
+ }
791
+ // Alias pass via the shared bag parser (#523). Separated bags (built since #523) get a true
792
+ // per-alias equality check, ungated — matching the `names`-table branch above, where an
793
+ // alias match counts as exact regardless of other candidates. Legacy bags (no separator)
794
+ // fall back to padded containment, gated on "no canonical exact in the pool" because their
795
+ // lost boundaries would otherwise false-promote interior fragments ("York" inside the alias
796
+ // "New York City") or cross-alias fragments ("York New" across "…York" + "New City…").
797
+ const anyCanonicalExact = out.size > 0;
798
+ for (const r of rows) {
799
+ if (aliasBagExactMatch(r.alt_names, needle, anyCanonicalExact))
800
+ out.add(r.id);
801
+ }
802
+ }
803
+ catch {
804
+ // Shard without place_search either → no exact-match tier. Falls back to weighted-sum order.
805
+ }
806
+ return out;
807
+ }
808
+ close() {
809
+ // Destroying the Kysely instance closes the underlying connection IF we own it. If the caller
810
+ // passed in a pre-opened DatabaseSync (test fixture), respect their ownership.
811
+ void this.#kysely.destroy();
812
+ if (this.#ownsDb) {
813
+ this.#db.close();
814
+ }
815
+ }
816
+ [Symbol.dispose]() {
817
+ this.close();
818
+ }
819
+ /** Build the FTS5 virtual table from the `names` + `places` tables. */
820
+ #ensureFts() {
821
+ buildPlaceSearchFts(this.#db);
822
+ }
823
+ #assertFtsExists() {
824
+ if (!placeSearchFtsExists(this.#db)) {
825
+ throw new Error("WofSqlitePlaceLookup: `place_search` FTS5 table is missing. Pass `buildFts: true` to build it on open, or run `mailwoman-wof-build-fts <path-to-wof.db>` ahead of time (see resolver-wof-sqlite/README.md).");
826
+ }
827
+ }
828
+ }
829
+ function normalizePlacetypes(p) {
830
+ if (!p)
831
+ return null;
832
+ return Array.isArray(p) ? p : [p];
833
+ }
834
+ /**
835
+ * Make an arbitrary user-typed string safe for FTS5 MATCH.
836
+ *
837
+ * FTS5 has its own query syntax (`"phrase"`, `term1 OR term2`, `prefix*`, NEAR/N, etc.). Letting
838
+ * raw user input through means a user typing `Paris's` or `St. (Petersburg)` causes a syntax
839
+ * error.
840
+ *
841
+ * Per-token rules:
842
+ *
843
+ * - Strip all punctuation except trailing `*` from each whitespace-separated token.
844
+ * - **Trailing `*`** is preserved as FTS5 **prefix syntax** — `627*` becomes the literal `627*`
845
+ * (unquoted). The caller signaled they want a prefix; respect that.
846
+ * - All other tokens are wrapped in `"..."` as a single-word phrase. Conservative — handles
847
+ * apostrophes, parens, accented input, etc. safely.
848
+ * - Multiple tokens join with implicit AND.
849
+ *
850
+ * Examples:
851
+ *
852
+ * - `"Paris"` → `"Paris"` (phrase)
853
+ * - `"627*"` → `627*` (prefix)
854
+ * - `"St. (Petersburg)"` → `"St" "Petersburg"` (two phrases, AND-joined)
855
+ * - `"Pari* TX"` → `Pari* "TX"` (mixed prefix + phrase)
856
+ * - `"*"` alone → `""` (no body → drop)
857
+ */
858
+ function sanitizeFtsQuery(text) {
859
+ const out = [];
860
+ for (const rawToken of text.normalize("NFKC").split(/\s+/u)) {
861
+ const trimmed = rawToken.trim();
862
+ if (!trimmed)
863
+ continue;
864
+ const hasPrefixStar = trimmed.endsWith("*");
865
+ // Strip everything except letters + numbers from the token body. Apostrophes / hyphens /
866
+ // any embedded `*` all go. The trailing `*` (if any) is reapplied separately below.
867
+ const body = trimmed.replace(/[^\p{L}\p{N}]/gu, "");
868
+ if (!body)
869
+ continue;
870
+ out.push(hasPrefixStar ? `${body}*` : `"${body.replace(/"/g, '""')}"`);
871
+ }
872
+ return out.join(" ");
873
+ }
874
+ // `sql` is imported only because future Kysely-typed queries will use it; silence "unused" linting.
875
+ void sql;
876
+ //# sourceMappingURL=lookup.js.map