@mailwoman/resolver-wof-sqlite 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. package/README.md +250 -0
  2. package/out/address-point-interpolation.d.ts +48 -0
  3. package/out/address-point-interpolation.d.ts.map +1 -0
  4. package/out/address-point-interpolation.js +164 -0
  5. package/out/address-point-interpolation.js.map +1 -0
  6. package/out/address-point-schema.d.ts +58 -0
  7. package/out/address-point-schema.d.ts.map +1 -0
  8. package/out/address-point-schema.js +67 -0
  9. package/out/address-point-schema.js.map +1 -0
  10. package/out/address-point.d.ts +29 -0
  11. package/out/address-point.d.ts.map +1 -0
  12. package/out/address-point.js +62 -0
  13. package/out/address-point.js.map +1 -0
  14. package/out/ancestry.d.ts +40 -0
  15. package/out/ancestry.d.ts.map +1 -0
  16. package/out/ancestry.js +53 -0
  17. package/out/ancestry.js.map +1 -0
  18. package/out/build-candidate-cli.d.ts +16 -0
  19. package/out/build-candidate-cli.d.ts.map +1 -0
  20. package/out/build-candidate-cli.js +80 -0
  21. package/out/build-candidate-cli.js.map +1 -0
  22. package/out/build-candidate.d.ts +54 -0
  23. package/out/build-candidate.d.ts.map +1 -0
  24. package/out/build-candidate.js +230 -0
  25. package/out/build-candidate.js.map +1 -0
  26. package/out/build-coincident-roles-cli.d.ts +16 -0
  27. package/out/build-coincident-roles-cli.d.ts.map +1 -0
  28. package/out/build-coincident-roles-cli.js +94 -0
  29. package/out/build-coincident-roles-cli.js.map +1 -0
  30. package/out/build-fts-cli.d.ts +23 -0
  31. package/out/build-fts-cli.d.ts.map +1 -0
  32. package/out/build-fts-cli.js +117 -0
  33. package/out/build-fts-cli.js.map +1 -0
  34. package/out/build-slim-cli.d.ts +14 -0
  35. package/out/build-slim-cli.d.ts.map +1 -0
  36. package/out/build-slim-cli.js +130 -0
  37. package/out/build-slim-cli.js.map +1 -0
  38. package/out/build-slim.d.ts +71 -0
  39. package/out/build-slim.d.ts.map +1 -0
  40. package/out/build-slim.js +267 -0
  41. package/out/build-slim.js.map +1 -0
  42. package/out/candidate-lookup.d.ts +43 -0
  43. package/out/candidate-lookup.d.ts.map +1 -0
  44. package/out/candidate-lookup.js +191 -0
  45. package/out/candidate-lookup.js.map +1 -0
  46. package/out/candidate-schema.d.ts +86 -0
  47. package/out/candidate-schema.d.ts.map +1 -0
  48. package/out/candidate-schema.js +109 -0
  49. package/out/candidate-schema.js.map +1 -0
  50. package/out/coincident-roles.d.ts +86 -0
  51. package/out/coincident-roles.d.ts.map +1 -0
  52. package/out/coincident-roles.js +160 -0
  53. package/out/coincident-roles.js.map +1 -0
  54. package/out/convention.d.ts +109 -0
  55. package/out/convention.d.ts.map +1 -0
  56. package/out/convention.js +94 -0
  57. package/out/convention.js.map +1 -0
  58. package/out/fst-autocomplete.d.ts +49 -0
  59. package/out/fst-autocomplete.d.ts.map +1 -0
  60. package/out/fst-autocomplete.js +124 -0
  61. package/out/fst-autocomplete.js.map +1 -0
  62. package/out/fst-builder.d.ts +20 -0
  63. package/out/fst-builder.d.ts.map +1 -0
  64. package/out/fst-builder.js +219 -0
  65. package/out/fst-builder.js.map +1 -0
  66. package/out/fst-deserialize-web.d.ts +16 -0
  67. package/out/fst-deserialize-web.d.ts.map +1 -0
  68. package/out/fst-deserialize-web.js +133 -0
  69. package/out/fst-deserialize-web.js.map +1 -0
  70. package/out/fst-matcher.d.ts +33 -0
  71. package/out/fst-matcher.d.ts.map +1 -0
  72. package/out/fst-matcher.js +117 -0
  73. package/out/fst-matcher.js.map +1 -0
  74. package/out/fst-serialize.d.ts +30 -0
  75. package/out/fst-serialize.d.ts.map +1 -0
  76. package/out/fst-serialize.js +261 -0
  77. package/out/fst-serialize.js.map +1 -0
  78. package/out/fst-types.d.ts +60 -0
  79. package/out/fst-types.d.ts.map +1 -0
  80. package/out/fst-types.js +11 -0
  81. package/out/fst-types.js.map +1 -0
  82. package/out/fts.d.ts +158 -0
  83. package/out/fts.d.ts.map +1 -0
  84. package/out/fts.js +261 -0
  85. package/out/fts.js.map +1 -0
  86. package/out/geo.d.ts +74 -0
  87. package/out/geo.d.ts.map +1 -0
  88. package/out/geo.js +88 -0
  89. package/out/geo.js.map +1 -0
  90. package/out/index.d.ts +27 -0
  91. package/out/index.d.ts.map +1 -0
  92. package/out/index.js +22 -0
  93. package/out/index.js.map +1 -0
  94. package/out/interpolation.d.ts +84 -0
  95. package/out/interpolation.d.ts.map +1 -0
  96. package/out/interpolation.js +150 -0
  97. package/out/interpolation.js.map +1 -0
  98. package/out/lookup.d.ts +156 -0
  99. package/out/lookup.d.ts.map +1 -0
  100. package/out/lookup.js +876 -0
  101. package/out/lookup.js.map +1 -0
  102. package/out/postal-city-alias-lookup.d.ts +50 -0
  103. package/out/postal-city-alias-lookup.d.ts.map +1 -0
  104. package/out/postal-city-alias-lookup.js +66 -0
  105. package/out/postal-city-alias-lookup.js.map +1 -0
  106. package/out/postal-city-alias-schema.d.ts +51 -0
  107. package/out/postal-city-alias-schema.d.ts.map +1 -0
  108. package/out/postal-city-alias-schema.js +47 -0
  109. package/out/postal-city-alias-schema.js.map +1 -0
  110. package/out/postal-city-candidate-schema.d.ts +58 -0
  111. package/out/postal-city-candidate-schema.d.ts.map +1 -0
  112. package/out/postal-city-candidate-schema.js +56 -0
  113. package/out/postal-city-candidate-schema.js.map +1 -0
  114. package/out/postcode-point-lookup.d.ts +38 -0
  115. package/out/postcode-point-lookup.d.ts.map +1 -0
  116. package/out/postcode-point-lookup.js +46 -0
  117. package/out/postcode-point-lookup.js.map +1 -0
  118. package/out/reverse.d.ts +99 -0
  119. package/out/reverse.d.ts.map +1 -0
  120. package/out/reverse.js +290 -0
  121. package/out/reverse.js.map +1 -0
  122. package/out/schema.d.ts +163 -0
  123. package/out/schema.d.ts.map +1 -0
  124. package/out/schema.js +18 -0
  125. package/out/schema.js.map +1 -0
  126. package/out/sharding.d.ts +96 -0
  127. package/out/sharding.d.ts.map +1 -0
  128. package/out/sharding.js +129 -0
  129. package/out/sharding.js.map +1 -0
  130. package/out/sqlite-convention-source.d.ts +29 -0
  131. package/out/sqlite-convention-source.d.ts.map +1 -0
  132. package/out/sqlite-convention-source.js +53 -0
  133. package/out/sqlite-convention-source.js.map +1 -0
  134. package/out/sqlite-utils.d.ts +17 -0
  135. package/out/sqlite-utils.d.ts.map +1 -0
  136. package/out/sqlite-utils.js +24 -0
  137. package/out/sqlite-utils.js.map +1 -0
  138. package/out/street-morphology-fst-builder.d.ts +59 -0
  139. package/out/street-morphology-fst-builder.d.ts.map +1 -0
  140. package/out/street-morphology-fst-builder.js +174 -0
  141. package/out/street-morphology-fst-builder.js.map +1 -0
  142. package/out/street-normalize.d.ts +66 -0
  143. package/out/street-normalize.d.ts.map +1 -0
  144. package/out/street-normalize.js +176 -0
  145. package/out/street-normalize.js.map +1 -0
  146. package/out/street-segment-schema.d.ts +61 -0
  147. package/out/street-segment-schema.d.ts.map +1 -0
  148. package/out/street-segment-schema.js +64 -0
  149. package/out/street-segment-schema.js.map +1 -0
  150. package/out/types.d.ts +137 -0
  151. package/out/types.d.ts.map +1 -0
  152. package/out/types.js +13 -0
  153. package/out/types.js.map +1 -0
  154. package/out/unified-schema.d.ts +25 -0
  155. package/out/unified-schema.d.ts.map +1 -0
  156. package/out/unified-schema.js +142 -0
  157. package/out/unified-schema.js.map +1 -0
  158. package/package.json +54 -0
package/out/fts.d.ts ADDED
@@ -0,0 +1,158 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * FTS5 index lifecycle for the WOF SQLite distribution.
7
+ *
8
+ * Shared by `WofSqlitePlaceLookup` (lazy build via `buildFts: true`) and the operator-side
9
+ * `mailwoman-wof-build-fts` CLI (ahead-of-time build to avoid first-open latency in production).
10
+ *
11
+ * Upstream WOF SQLite distributions do NOT ship FTS5. The index lives in a `place_search` virtual
12
+ * table whose rows mirror `(spr.id, spr.name, GROUP_CONCAT(names.name))` — one current,
13
+ * non-deprecated place per row, with all alternate names concatenated into a single search-token
14
+ * bag.
15
+ */
16
+ import type { DatabaseSync } from "node:sqlite";
17
+ /**
18
+ * Name of the FTS5 virtual table this module owns. Centralized so `WofSqlitePlaceLookup` and the
19
+ * CLI can't drift apart.
20
+ */
21
+ export declare const PLACE_SEARCH_TABLE = "place_search";
22
+ /**
23
+ * Boundary-preserving separator between aliases in the `alt_names` bag (#523): U+E000, the first
24
+ * Private Use Area codepoint, written as an escape so the source stays plain ASCII.
25
+ *
26
+ * Why a PUA codepoint and not punctuation: the goal is to stop a phrase query from matching ACROSS
27
+ * two adjacent aliases' concatenation boundary ("York" + "New City" must not phrase-match `"york
28
+ * new"`). FTS5 assigns token positions to TOKENS only — separator characters never consume a
29
+ * position — so any character the tokenizer treats as a boundary leaves the two aliases' tokens
30
+ * adjacent and the false phrase match intact. The separator must therefore be an INDEXED TOKEN that
31
+ * sits between the aliases and breaks positional adjacency.
32
+ *
33
+ * Empirical probe (node:sqlite, `tokenize = 'unicode61 remove_diacritics 2'` — the exact config
34
+ * below). Bag = the aliases "York" and "New City" joined by each candidate separator; query = the
35
+ * cross-boundary phrase `MATCH '"york new"'`:
36
+ *
37
+ * - `' '` (the pre-#523 join, no separator) — false HIT
38
+ * - `' ; '` (punctuation) — false HIT
39
+ * - `' \u2016 '` (double vertical line) — false HIT
40
+ * - `' \x1F '` (ASCII unit separator) — false HIT
41
+ * - `' \uE000 '` (PUA, this constant) — NO match, while `'"york"'` and `'"new city"'` still match the
42
+ * bag individually under every variant above.
43
+ *
44
+ * U+E000 works because unicode61 classifies it (category Co — neither space nor punctuation) as a
45
+ * token character, so the standalone `\uE000` between aliases is indexed as its own token and the
46
+ * aliases' tokens are no longer positionally adjacent. The remaining requirements also hold:
47
+ *
48
+ * - **Unreachable from queries**: `sanitizeFtsQuery` (Node + WASM resolvers) strips everything
49
+ * outside `\p{L}\p{N}` from token bodies, and U+E000 is neither — no user query can ever address
50
+ * the separator token. The demo's `sanitizeFts` strips it explicitly.
51
+ * - **Never in place names**: PUA codepoints are unassigned by definition; real-world WOF names don't
52
+ * carry them. Defensively, the INSERT below also strips any embedded U+E000 from source names so
53
+ * a poisoned row can't forge an alias boundary.
54
+ * - **Survives GROUP_CONCAT**: verified — `GROUP_CONCAT(name, ' ' || char(57344) || ' ')` emits the
55
+ * codepoint intact (`57344` = 0xE000).
56
+ * - **Cost**: one extra token per alias boundary in the FTS document. Marginal BM25 length-norm
57
+ * impact, on a column whose length stats are already the known #189 problem.
58
+ *
59
+ * Interaction with #189 (split `alt_names` into its own FTS table for independent BM25 length
60
+ * stats): the separator SURVIVES that split as proposed — #189 still GROUP_CONCATs all aliases into
61
+ * one `place_search_alt` row per place, so both the separator and the bag-parsing exact check
62
+ * (`aliasBagExactMatch`) carry over unchanged, just pointed at the new table. Only if #189 were
63
+ * instead built as one-row-per-alias would both become moot (per-alias rows give exact equality and
64
+ * phrase isolation for free). Sequencing: if #189 lands before the next slim-DB artifact rebuild,
65
+ * fold both into ONE rebuild rather than shipping two FTS schema bumps.
66
+ */
67
+ export declare const ALIAS_SEPARATOR = "\uE000";
68
+ /**
69
+ * Does any alias in an `alt_names` bag exactly equal the (already-normalized) query? The single
70
+ * shared implementation of the exact-tier alias check for every consumer of the bag — the Node
71
+ * resolver's `#exactMatchIds` fallback, the WASM resolver, and the demo's httpvfs resolver — so the
72
+ * bag format and its parsers can't drift.
73
+ *
74
+ * Two formats exist in the wild:
75
+ *
76
+ * - **Separated bags** (built since #523): aliases joined with {@link ALIAS_SEPARATOR}, plus a
77
+ * trailing separator so even a single-alias bag self-identifies as separator-formatted. Split +
78
+ * per-alias equality — a true exact-alias check, matching the semantics of the full `names` table
79
+ * (`names.name = ? COLLATE NOCASE`), so it runs UNGATED: an alias match is an exact match whether
80
+ * or not another candidate matched on its canonical name.
81
+ * - **Legacy bags** (pre-#523 artifacts, e.g. an already-deployed slim DB): aliases space-joined,
82
+ * boundaries lost. Falls back to the historical padded-containment check, gated on
83
+ * `anyStrictExact` — ungated containment would false-promote interior fragments ("York" inside
84
+ * the alias "New York City") and cross-boundary fragments ("York New" across "…York" + "New…").
85
+ * Delete this branch once every shipped artifact carries the separator.
86
+ *
87
+ * @param altNames The `alt_names` bag from `place_search` (null when the row has no aliases).
88
+ * @param normalizedQuery The query, pre-normalized: lowercased, trimmed, internal whitespace
89
+ * collapsed (every consumer already normalizes this way).
90
+ * @param anyStrictExact Whether ANY candidate in the pool already matched strictly (canonical name
91
+ * or region abbreviation). Only consulted for legacy bags.
92
+ */
93
+ export declare function aliasBagExactMatch(altNames: string | null, normalizedQuery: string, anyStrictExact: boolean): boolean;
94
+ /**
95
+ * Name of the R*Tree virtual table that indexes WOF places' bounding boxes for proximity / bbox
96
+ * lookups. Built alongside `place_search` by the CLI and `buildFts: true`. Pure SQLite — no
97
+ * extensions needed, the `rtree` virtual-table module ships with the core library.
98
+ */
99
+ export declare const PLACE_BBOX_TABLE = "place_bbox";
100
+ /**
101
+ * Name of the auxiliary table holding `wof:population` per place. Powers the population-weighted
102
+ * ranking boost. Sparse — WOF only populates this field for ~15% of localities (and mostly larger
103
+ * ones); missing means no boost, never a penalty. Built upstream by `scripts/build-unified-wof.ts`
104
+ * at ingest (and copied through by `build-slim`) — this module consumes it, never builds it.
105
+ *
106
+ * Schema: `(id INTEGER PRIMARY KEY, population INTEGER NOT NULL)`. Plain table, not virtual.
107
+ */
108
+ export declare const PLACE_POPULATION_TABLE = "place_population";
109
+ /**
110
+ * Counters for a single `buildPlaceSearchFts` run. Exposed so callers (CLI, lazy-build) can render
111
+ * progress to the user.
112
+ */
113
+ export interface BuildPlaceSearchFtsResult {
114
+ /** Whether the FTS5 index was created (true) or already existed and was left alone (false). */
115
+ created: boolean;
116
+ /** Number of rows in the `place_search` table after the call. */
117
+ indexedRows: number;
118
+ /** Whether the R*Tree bbox index was created (true) or already existed (false). */
119
+ bboxCreated: boolean;
120
+ /** Number of rows in the `place_bbox` R*Tree after the call. */
121
+ bboxIndexedRows: number;
122
+ /** Wall-clock duration of the build step, in milliseconds. */
123
+ durationMs: number;
124
+ }
125
+ export interface BuildPlaceSearchFtsOpts {
126
+ /**
127
+ * Drop the existing `place_search` AND `place_bbox` tables before building. Default false — if
128
+ * either already exists the corresponding build step is skipped. Set true when you want to
129
+ * rebuild against an updated `spr` / `names` snapshot.
130
+ */
131
+ drop?: boolean;
132
+ /**
133
+ * Optional progress callback invoked after each phase. Useful for CLI output on the planet-scale
134
+ * builds where the INSERT step can take minutes.
135
+ */
136
+ onProgress?: (phase: "checking" | "dropping" | "creating" | "populating" | "creating-bbox" | "populating-bbox" | "done", detail?: string) => void;
137
+ }
138
+ /**
139
+ * Build (or rebuild, with `drop: true`) the `place_search` FTS5 virtual table AND the `place_bbox`
140
+ * R*Tree virtual table from the existing `spr` + `names` tables in a WOF SQLite distribution.
141
+ *
142
+ * The FTS5 index is used for name-based MATCH queries; the R*Tree is used for bbox + proximity
143
+ * filtering. Both are pure SQLite — no extensions required.
144
+ *
145
+ * Returns a `BuildPlaceSearchFtsResult` summary. Idempotent when `drop: false` — re-running against
146
+ * an already-indexed DB skips whichever indexes already exist.
147
+ */
148
+ export declare function buildPlaceSearchFts(db: DatabaseSync, opts?: BuildPlaceSearchFtsOpts): BuildPlaceSearchFtsResult;
149
+ /**
150
+ * Returns true iff the `place_search` table exists in the connected DB. Used by
151
+ * `WofSqlitePlaceLookup` for its "FTS missing — pass buildFts:true or run the CLI" guard.
152
+ */
153
+ export declare function placeSearchFtsExists(db: DatabaseSync): boolean;
154
+ /** Returns true iff the `place_bbox` R*Tree table exists. Used for opt-in proximity lookup checks. */
155
+ export declare function placeBboxExists(db: DatabaseSync): boolean;
156
+ /** Returns true iff the `place_population` table exists. Used for opt-in population-ranking checks. */
157
+ export declare function placePopulationExists(db: DatabaseSync): boolean;
158
+ //# sourceMappingURL=fts.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fts.d.ts","sourceRoot":"","sources":["../fts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAE/C;;;GAGG;AACH,eAAO,MAAM,kBAAkB,iBAAiB,CAAA;AAEhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AACH,eAAO,MAAM,eAAe,WAAW,CAAA;AAKvC;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,MAAM,GAAG,IAAI,EAAE,eAAe,EAAE,MAAM,EAAE,cAAc,EAAE,OAAO,GAAG,OAAO,CAQrH;AAED;;;;GAIG;AACH,eAAO,MAAM,gBAAgB,eAAe,CAAA;AAE5C;;;;;;;GAOG;AACH,eAAO,MAAM,sBAAsB,qBAAqB,CAAA;AAExD;;;GAGG;AACH,MAAM,WAAW,yBAAyB;IACzC,+FAA+F;IAC/F,OAAO,EAAE,OAAO,CAAA;IAChB,iEAAiE;IACjE,WAAW,EAAE,MAAM,CAAA;IACnB,mFAAmF;IACnF,WAAW,EAAE,OAAO,CAAA;IACpB,gEAAgE;IAChE,eAAe,EAAE,MAAM,CAAA;IACvB,8DAA8D;IAC9D,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,MAAM,WAAW,uBAAuB;IACvC;;;;OAIG;IACH,IAAI,CAAC,EAAE,OAAO,CAAA;IACd;;;OAGG;IACH,UAAU,CAAC,EAAE,CACZ,KAAK,EAAE,UAAU,GAAG,UAAU,GAAG,UAAU,GAAG,YAAY,GAAG,eAAe,GAAG,iBAAiB,GAAG,MAAM,EACzG,MAAM,CAAC,EAAE,MAAM,KACX,IAAI,CAAA;CACT;AAED;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAAC,EAAE,EAAE,YAAY,EAAE,IAAI,GAAE,uBAA4B,GAAG,yBAAyB,CAuHnH;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,EAAE,EAAE,YAAY,GAAG,OAAO,CAE9D;AAED,sGAAsG;AACtG,wBAAgB,eAAe,CAAC,EAAE,EAAE,YAAY,GAAG,OAAO,CAEzD;AAED,uGAAuG;AACvG,wBAAgB,qBAAqB,CAAC,EAAE,EAAE,YAAY,GAAG,OAAO,CAE/D"}
package/out/fts.js ADDED
@@ -0,0 +1,261 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * FTS5 index lifecycle for the WOF SQLite distribution.
7
+ *
8
+ * Shared by `WofSqlitePlaceLookup` (lazy build via `buildFts: true`) and the operator-side
9
+ * `mailwoman-wof-build-fts` CLI (ahead-of-time build to avoid first-open latency in production).
10
+ *
11
+ * Upstream WOF SQLite distributions do NOT ship FTS5. The index lives in a `place_search` virtual
12
+ * table whose rows mirror `(spr.id, spr.name, GROUP_CONCAT(names.name))` — one current,
13
+ * non-deprecated place per row, with all alternate names concatenated into a single search-token
14
+ * bag.
15
+ */
16
+ /**
17
+ * Name of the FTS5 virtual table this module owns. Centralized so `WofSqlitePlaceLookup` and the
18
+ * CLI can't drift apart.
19
+ */
20
+ export const PLACE_SEARCH_TABLE = "place_search";
21
+ /**
22
+ * Boundary-preserving separator between aliases in the `alt_names` bag (#523): U+E000, the first
23
+ * Private Use Area codepoint, written as an escape so the source stays plain ASCII.
24
+ *
25
+ * Why a PUA codepoint and not punctuation: the goal is to stop a phrase query from matching ACROSS
26
+ * two adjacent aliases' concatenation boundary ("York" + "New City" must not phrase-match `"york
27
+ * new"`). FTS5 assigns token positions to TOKENS only — separator characters never consume a
28
+ * position — so any character the tokenizer treats as a boundary leaves the two aliases' tokens
29
+ * adjacent and the false phrase match intact. The separator must therefore be an INDEXED TOKEN that
30
+ * sits between the aliases and breaks positional adjacency.
31
+ *
32
+ * Empirical probe (node:sqlite, `tokenize = 'unicode61 remove_diacritics 2'` — the exact config
33
+ * below). Bag = the aliases "York" and "New City" joined by each candidate separator; query = the
34
+ * cross-boundary phrase `MATCH '"york new"'`:
35
+ *
36
+ * - `' '` (the pre-#523 join, no separator) — false HIT
37
+ * - `' ; '` (punctuation) — false HIT
38
+ * - `' \u2016 '` (double vertical line) — false HIT
39
+ * - `' \x1F '` (ASCII unit separator) — false HIT
40
+ * - `' \uE000 '` (PUA, this constant) — NO match, while `'"york"'` and `'"new city"'` still match the
41
+ * bag individually under every variant above.
42
+ *
43
+ * U+E000 works because unicode61 classifies it (category Co — neither space nor punctuation) as a
44
+ * token character, so the standalone `\uE000` between aliases is indexed as its own token and the
45
+ * aliases' tokens are no longer positionally adjacent. The remaining requirements also hold:
46
+ *
47
+ * - **Unreachable from queries**: `sanitizeFtsQuery` (Node + WASM resolvers) strips everything
48
+ * outside `\p{L}\p{N}` from token bodies, and U+E000 is neither — no user query can ever address
49
+ * the separator token. The demo's `sanitizeFts` strips it explicitly.
50
+ * - **Never in place names**: PUA codepoints are unassigned by definition; real-world WOF names don't
51
+ * carry them. Defensively, the INSERT below also strips any embedded U+E000 from source names so
52
+ * a poisoned row can't forge an alias boundary.
53
+ * - **Survives GROUP_CONCAT**: verified — `GROUP_CONCAT(name, ' ' || char(57344) || ' ')` emits the
54
+ * codepoint intact (`57344` = 0xE000).
55
+ * - **Cost**: one extra token per alias boundary in the FTS document. Marginal BM25 length-norm
56
+ * impact, on a column whose length stats are already the known #189 problem.
57
+ *
58
+ * Interaction with #189 (split `alt_names` into its own FTS table for independent BM25 length
59
+ * stats): the separator SURVIVES that split as proposed — #189 still GROUP_CONCATs all aliases into
60
+ * one `place_search_alt` row per place, so both the separator and the bag-parsing exact check
61
+ * (`aliasBagExactMatch`) carry over unchanged, just pointed at the new table. Only if #189 were
62
+ * instead built as one-row-per-alias would both become moot (per-alias rows give exact equality and
63
+ * phrase isolation for free). Sequencing: if #189 lands before the next slim-DB artifact rebuild,
64
+ * fold both into ONE rebuild rather than shipping two FTS schema bumps.
65
+ */
66
+ export const ALIAS_SEPARATOR = "\uE000";
67
+ /** `char()` argument for {@link ALIAS_SEPARATOR} in SQL — keeps the SQL text plain ASCII. */
68
+ const ALIAS_SEPARATOR_CODEPOINT = ALIAS_SEPARATOR.codePointAt(0);
69
+ /**
70
+ * Does any alias in an `alt_names` bag exactly equal the (already-normalized) query? The single
71
+ * shared implementation of the exact-tier alias check for every consumer of the bag — the Node
72
+ * resolver's `#exactMatchIds` fallback, the WASM resolver, and the demo's httpvfs resolver — so the
73
+ * bag format and its parsers can't drift.
74
+ *
75
+ * Two formats exist in the wild:
76
+ *
77
+ * - **Separated bags** (built since #523): aliases joined with {@link ALIAS_SEPARATOR}, plus a
78
+ * trailing separator so even a single-alias bag self-identifies as separator-formatted. Split +
79
+ * per-alias equality — a true exact-alias check, matching the semantics of the full `names` table
80
+ * (`names.name = ? COLLATE NOCASE`), so it runs UNGATED: an alias match is an exact match whether
81
+ * or not another candidate matched on its canonical name.
82
+ * - **Legacy bags** (pre-#523 artifacts, e.g. an already-deployed slim DB): aliases space-joined,
83
+ * boundaries lost. Falls back to the historical padded-containment check, gated on
84
+ * `anyStrictExact` — ungated containment would false-promote interior fragments ("York" inside
85
+ * the alias "New York City") and cross-boundary fragments ("York New" across "…York" + "New…").
86
+ * Delete this branch once every shipped artifact carries the separator.
87
+ *
88
+ * @param altNames The `alt_names` bag from `place_search` (null when the row has no aliases).
89
+ * @param normalizedQuery The query, pre-normalized: lowercased, trimmed, internal whitespace
90
+ * collapsed (every consumer already normalizes this way).
91
+ * @param anyStrictExact Whether ANY candidate in the pool already matched strictly (canonical name
92
+ * or region abbreviation). Only consulted for legacy bags.
93
+ */
94
+ export function aliasBagExactMatch(altNames, normalizedQuery, anyStrictExact) {
95
+ if (altNames === null || altNames === "" || !normalizedQuery)
96
+ return false;
97
+ const norm = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
98
+ if (altNames.includes(ALIAS_SEPARATOR)) {
99
+ return altNames.split(ALIAS_SEPARATOR).some((alias) => norm(alias) === normalizedQuery);
100
+ }
101
+ if (anyStrictExact)
102
+ return false;
103
+ return ` ${norm(altNames)} `.includes(` ${normalizedQuery} `);
104
+ }
105
+ /**
106
+ * Name of the R*Tree virtual table that indexes WOF places' bounding boxes for proximity / bbox
107
+ * lookups. Built alongside `place_search` by the CLI and `buildFts: true`. Pure SQLite — no
108
+ * extensions needed, the `rtree` virtual-table module ships with the core library.
109
+ */
110
+ export const PLACE_BBOX_TABLE = "place_bbox";
111
+ /**
112
+ * Name of the auxiliary table holding `wof:population` per place. Powers the population-weighted
113
+ * ranking boost. Sparse — WOF only populates this field for ~15% of localities (and mostly larger
114
+ * ones); missing means no boost, never a penalty. Built upstream by `scripts/build-unified-wof.ts`
115
+ * at ingest (and copied through by `build-slim`) — this module consumes it, never builds it.
116
+ *
117
+ * Schema: `(id INTEGER PRIMARY KEY, population INTEGER NOT NULL)`. Plain table, not virtual.
118
+ */
119
+ export const PLACE_POPULATION_TABLE = "place_population";
120
+ /**
121
+ * Build (or rebuild, with `drop: true`) the `place_search` FTS5 virtual table AND the `place_bbox`
122
+ * R*Tree virtual table from the existing `spr` + `names` tables in a WOF SQLite distribution.
123
+ *
124
+ * The FTS5 index is used for name-based MATCH queries; the R*Tree is used for bbox + proximity
125
+ * filtering. Both are pure SQLite — no extensions required.
126
+ *
127
+ * Returns a `BuildPlaceSearchFtsResult` summary. Idempotent when `drop: false` — re-running against
128
+ * an already-indexed DB skips whichever indexes already exist.
129
+ */
130
+ export function buildPlaceSearchFts(db, opts = {}) {
131
+ const start = Date.now();
132
+ const onProgress = opts.onProgress ?? (() => { });
133
+ onProgress("checking");
134
+ const ftsExisting = tableExists(db, PLACE_SEARCH_TABLE);
135
+ const bboxExisting = tableExists(db, PLACE_BBOX_TABLE);
136
+ // ─── FTS5 phase ──────────────────────────────────────────────────
137
+ let ftsCreated = false;
138
+ if (ftsExisting && opts.drop) {
139
+ onProgress("dropping", PLACE_SEARCH_TABLE);
140
+ db.exec(`DROP TABLE ${PLACE_SEARCH_TABLE}`);
141
+ }
142
+ if (!ftsExisting || opts.drop) {
143
+ onProgress("creating");
144
+ db.exec(`
145
+ CREATE VIRTUAL TABLE ${PLACE_SEARCH_TABLE} USING fts5(
146
+ wof_id UNINDEXED,
147
+ name,
148
+ alt_names,
149
+ tokenize = 'unicode61 remove_diacritics 2'
150
+ );
151
+ `);
152
+ onProgress("populating");
153
+ // Excludes only definitively-not-current places. WOF's `is_current` carries TWO conventions:
154
+ // `-1` (modern Who's On First) and `1` (legacy Mapzen-era), both meaning "currently valid".
155
+ // Only `0` means "no longer current". Filtering on `= -1` strict (as Phase 4.2 did) excluded
156
+ // ~42% of admin-US and ~68% of postcode-US — see #91 for the diagnostic + magnitude.
157
+ //
158
+ // Aliases join on the boundary-preserving ALIAS_SEPARATOR token (#523) — space-padded so each
159
+ // alias still tokenizes normally — and any U+E000 embedded in a source name is defensively
160
+ // flattened to a space so it can't forge a boundary. A TRAILING separator marks the bag as
161
+ // separator-formatted even when it holds a single alias, so `aliasBagExactMatch` never
162
+ // mistakes a new bag for a legacy (pre-#523) one. See the ALIAS_SEPARATOR docs for the
163
+ // probe + rationale.
164
+ db.exec(`
165
+ INSERT INTO ${PLACE_SEARCH_TABLE} (wof_id, name, alt_names)
166
+ SELECT
167
+ spr.id,
168
+ spr.name,
169
+ COALESCE((
170
+ SELECT GROUP_CONCAT(
171
+ REPLACE(name, char(${ALIAS_SEPARATOR_CODEPOINT}), ' '),
172
+ ' ' || char(${ALIAS_SEPARATOR_CODEPOINT}) || ' '
173
+ ) || ' ' || char(${ALIAS_SEPARATOR_CODEPOINT})
174
+ FROM names WHERE names.id = spr.id
175
+ ), '')
176
+ FROM spr
177
+ WHERE spr.is_current != 0
178
+ AND spr.is_deprecated = 0
179
+ AND spr.name IS NOT NULL;
180
+ `);
181
+ ftsCreated = true;
182
+ }
183
+ const ftsCountRow = db.prepare(`SELECT COUNT(*) AS n FROM ${PLACE_SEARCH_TABLE}`).get();
184
+ // ─── R*Tree phase ────────────────────────────────────────────────
185
+ let bboxCreated = false;
186
+ if (bboxExisting && opts.drop) {
187
+ onProgress("dropping", PLACE_BBOX_TABLE);
188
+ db.exec(`DROP TABLE ${PLACE_BBOX_TABLE}`);
189
+ }
190
+ if (!bboxExisting || opts.drop) {
191
+ onProgress("creating-bbox");
192
+ // R*Tree requires INTEGER PRIMARY KEY (id) + paired min/max for each indexed dimension.
193
+ // `rtree` (not `rtree_i32`) keeps coordinates as REAL — what we want for WGS-84.
194
+ db.exec(`
195
+ CREATE VIRTUAL TABLE ${PLACE_BBOX_TABLE} USING rtree(
196
+ id,
197
+ min_lat, max_lat,
198
+ min_lon, max_lon
199
+ );
200
+ `);
201
+ onProgress("populating-bbox");
202
+ // Only index places that have non-zero coordinates AND a real bbox. WOF stores both the
203
+ // centroid (latitude/longitude) and the bounding box (min_*/max_*). A subset of rows have
204
+ // all-zero coordinates — likely placeholders for deprecated / unmapped entries; the
205
+ // is_current / is_deprecated filter mostly catches them, but we double-check at insert.
206
+ db.exec(`
207
+ INSERT INTO ${PLACE_BBOX_TABLE} (id, min_lat, max_lat, min_lon, max_lon)
208
+ SELECT
209
+ spr.id,
210
+ spr.min_latitude,
211
+ spr.max_latitude,
212
+ spr.min_longitude,
213
+ spr.max_longitude
214
+ FROM spr
215
+ WHERE spr.is_current != 0
216
+ AND spr.is_deprecated = 0
217
+ AND spr.min_latitude IS NOT NULL
218
+ AND spr.max_latitude IS NOT NULL
219
+ AND spr.min_longitude IS NOT NULL
220
+ AND spr.max_longitude IS NOT NULL
221
+ AND NOT (spr.min_latitude = 0 AND spr.max_latitude = 0
222
+ AND spr.min_longitude = 0 AND spr.max_longitude = 0);
223
+ `);
224
+ bboxCreated = true;
225
+ }
226
+ const bboxCountRow = db.prepare(`SELECT COUNT(*) AS n FROM ${PLACE_BBOX_TABLE}`).get();
227
+ // NOTE: `place_population` is NOT built here. `scripts/build-unified-wof.ts` extracts
228
+ // `wof:population` straight into that table at ingest (the canonical source carries no `geojson`
229
+ // table), and `build-slim` copies it through. This function only owns the two FTS-derived virtual
230
+ // tables, both of which build from `spr` + `names` alone. `placePopulationExists` lets callers
231
+ // check for the pre-built table.
232
+ onProgress("done", `${ftsCountRow.n} FTS rows + ${bboxCountRow.n} bbox rows ` +
233
+ `(${ftsCreated ? "built" : "preexisting"} / ${bboxCreated ? "built" : "preexisting"})`);
234
+ return {
235
+ created: ftsCreated,
236
+ indexedRows: ftsCountRow.n,
237
+ bboxCreated,
238
+ bboxIndexedRows: bboxCountRow.n,
239
+ durationMs: Date.now() - start,
240
+ };
241
+ }
242
+ /**
243
+ * Returns true iff the `place_search` table exists in the connected DB. Used by
244
+ * `WofSqlitePlaceLookup` for its "FTS missing — pass buildFts:true or run the CLI" guard.
245
+ */
246
+ export function placeSearchFtsExists(db) {
247
+ return tableExists(db, PLACE_SEARCH_TABLE);
248
+ }
249
+ /** Returns true iff the `place_bbox` R*Tree table exists. Used for opt-in proximity lookup checks. */
250
+ export function placeBboxExists(db) {
251
+ return tableExists(db, PLACE_BBOX_TABLE);
252
+ }
253
+ /** Returns true iff the `place_population` table exists. Used for opt-in population-ranking checks. */
254
+ export function placePopulationExists(db) {
255
+ return tableExists(db, PLACE_POPULATION_TABLE);
256
+ }
257
+ function tableExists(db, name) {
258
+ const row = db.prepare(`SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?`).get(name);
259
+ return Boolean(row);
260
+ }
261
+ //# sourceMappingURL=fts.js.map
package/out/fts.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"fts.js","sourceRoot":"","sources":["../fts.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAIH;;;GAGG;AACH,MAAM,CAAC,MAAM,kBAAkB,GAAG,cAAc,CAAA;AAEhD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AACH,MAAM,CAAC,MAAM,eAAe,GAAG,QAAQ,CAAA;AAEvC,6FAA6F;AAC7F,MAAM,yBAAyB,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAW,CAAA;AAE1E;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,MAAM,UAAU,kBAAkB,CAAC,QAAuB,EAAE,eAAuB,EAAE,cAAuB;IAC3G,IAAI,QAAQ,KAAK,IAAI,IAAI,QAAQ,KAAK,EAAE,IAAI,CAAC,eAAe;QAAE,OAAO,KAAK,CAAA;IAC1E,MAAM,IAAI,GAAG,CAAC,CAAS,EAAU,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IAC/E,IAAI,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;QACxC,OAAO,QAAQ,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,eAAe,CAAC,CAAA;IACxF,CAAC;IACD,IAAI,cAAc;QAAE,OAAO,KAAK,CAAA;IAChC,OAAO,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,eAAe,GAAG,CAAC,CAAA;AAC9D,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,YAAY,CAAA;AAE5C;;;;;;;GAOG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG,kBAAkB,CAAA;AAoCxD;;;;;;;;;GASG;AACH,MAAM,UAAU,mBAAmB,CAAC,EAAgB,EAAE,OAAgC,EAAE;IACvF,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAA;IACxB,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAEhD,UAAU,CAAC,UAAU,CAAC,CAAA;IACtB,MAAM,WAAW,GAAG,WAAW,CAAC,EAAE,EAAE,kBAAkB,CAAC,CAAA;IACvD,MAAM,YAAY,GAAG,WAAW,CAAC,EAAE,EAAE,gBAAgB,CAAC,CAAA;IAEtD,oEAAoE;IACpE,IAAI,UAAU,GAAG,KAAK,CAAA;IACtB,IAAI,WAAW,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC9B,UAAU,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAA;QAC1C,EAAE,CAAC,IAAI,CAAC,cAAc,kBAAkB,EAAE,CAAC,CAAA;IAC5C,CAAC;IACD,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC/B,UAAU,CAAC,UAAU,CAAC,CAAA;QACtB,EAAE,CAAC,IAAI,CAAC;0BACgB,kBAAkB;;;;;;GAMzC,CAAC,CAAA;QACF,UAAU,CAAC,YAAY,CAAC,CAAA;QACxB,6FAA6F;QAC7F,4FAA4F;QAC5F,6FAA6F;QAC7F,qFAAqF;QACrF,EAAE;QACF,8FAA8F;QAC9F,2FAA2F;QAC3F,2FAA2F;QAC3F,uFAAuF;QACvF,uFAAuF;QACvF,qBAAqB;QACrB,EAAE,CAAC,IAAI,CAAC;iBACO,kBAAkB;;;;;;2BAMR,yBAAyB;oBAChC,yBAAyB;wBACrB,yBAAyB;;;;;;;GAO9C,CAAC,CAAA;QACF,UAAU,GAAG,IAAI,CAAA;IAClB,CAAC;IACD,MAAM,WAAW,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,kBAAkB,EAAE,CAAC,CAAC,GAAG,EAAmB,CAAA;IAExG,oEAAoE;IACpE,IAAI,WAAW,GAAG,KAAK,CAAA;IACvB,IAAI,YAAY,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAC/B,UAAU,CAAC,UAAU,EAAE,gBAAgB,CAAC,CAAA;QACxC,EAAE,CAAC,IAAI,CAAC,cAAc,gBAAgB,EAAE,CAAC,CAAA;IAC1C,CAAC;IACD,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;QAChC,UAAU,CAAC,eAAe,CAAC,CAAA;QAC3B,wFAAwF;QACxF,iFAAiF;QACjF,EAAE,CAAC,IAAI,CAAC;0BACgB,gBAAgB;;;;;GAKvC,CAAC,CAAA;QACF,UAAU,CAAC,iBAAiB,CAAC,CAAA;QAC7B,wFAAwF;QACxF,0FAA0F;QAC1F,oFAAoF;QACpF,wFAAwF;QACxF,EAAE,CAAC,IAAI,CAAC;iBACO,gBAAgB;;;;;;;;;;;;;;;;GAgB9B,CAAC,CAAA;QACF,WAAW,GAAG,IAAI,CAAA;IACnB,CAAC;IACD,MAAM,YAAY,GAAG,EAAE,CAAC,OAAO,CAAC,6BAA6B,gBAAgB,EAAE,CAAC,CAAC,GAAG,EAAmB,CAAA;IAEvG,sFAAsF;IACtF,iGAAiG;IACjG,kGAAkG;IAClG,+FAA+F;IAC/F,iCAAiC;IAEjC,UAAU,CACT,MAAM,EACN,GAAG,WAAW,CAAC,CAAC,eAAe,YAAY,CAAC,CAAC,aAAa;QACzD,IAAI,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa,MAAM,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,aAAa,GAAG,CACvF,CAAA;IACD,OAAO;QACN,OAAO,EAAE,UAAU;QACnB,WAAW,EAAE,WAAW,CAAC,CAAC;QAC1B,WAAW;QACX,eAAe,EAAE,YAAY,CAAC,CAAC;QAC/B,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK;KAC9B,CAAA;AACF,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,EAAgB;IACpD,OAAO,WAAW,CAAC,EAAE,EAAE,kBAAkB,CAAC,CAAA;AAC3C,CAAC;AAED,sGAAsG;AACtG,MAAM,UAAU,eAAe,CAAC,EAAgB;IAC/C,OAAO,WAAW,CAAC,EAAE,EAAE,gBAAgB,CAAC,CAAA;AACzC,CAAC;AAED,uGAAuG;AACvG,MAAM,UAAU,qBAAqB,CAAC,EAAgB;IACrD,OAAO,WAAW,CAAC,EAAE,EAAE,sBAAsB,CAAC,CAAA;AAC/C,CAAC;AAED,SAAS,WAAW,CAAC,EAAgB,EAAE,IAAY;IAClD,MAAM,GAAG,GAAG,EAAE,CAAC,OAAO,CAAC,kEAAkE,CAAC,CAAC,GAAG,CAAC,IAAI,CAEvF,CAAA;IACZ,OAAO,OAAO,CAAC,GAAG,CAAC,CAAA;AACpB,CAAC"}
package/out/geo.d.ts ADDED
@@ -0,0 +1,74 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Geographic helpers for the resolver — haversine distance, bbox math, and point-in-polygon.
7
+ *
8
+ * We deliberately don't pull SpatiaLite (or turf) for this. SQLite's built-in `rtree` virtual table
9
+ * gives us bbox filtering at the SQL level; haversine distance is a 12-line TS function and
10
+ * ray-cast PIP a ~30-line one — both plenty fast for the post-fetch passes (we operate on ≤ a few
11
+ * hundred candidates per query, not the whole 142k-row corpus).
12
+ *
13
+ * The PIP implementation here is the CANONICAL port of the even-odd ray cast that previously lived
14
+ * only in Python (`scripts/eval/pip-containment.py`, with a second copy in
15
+ * `scripts/build-postcode-locality.py`). Keep the three in sync if the algorithm ever changes —
16
+ * the eval-side Python copies grade the same containment truth this one resolves with.
17
+ *
18
+ * The R*Tree index name + schema are centralized in `fts.ts` (alongside the FTS5 build).
19
+ */
20
+ export { haversineKm } from "@mailwoman/spatial";
21
+ /**
22
+ * Approximate bbox around a point — `radiusKm` in each direction. Used to translate a `near: {lat,
23
+ * lon}` + `maxDistanceKm` filter into an R*Tree bbox query.
24
+ *
25
+ * The math is the spherical-Earth equirectangular approximation: 1° latitude ≈ 111 km globally, 1°
26
+ * longitude ≈ 111 km × cos(latitude). Accurate enough for filtering (we re-check with exact
27
+ * haversine post-fetch), and it stays cheap.
28
+ */
29
+ export interface Bbox {
30
+ minLat: number;
31
+ maxLat: number;
32
+ minLon: number;
33
+ maxLon: number;
34
+ }
35
+ export declare function bboxAround(lat: number, lon: number, radiusKm: number): Bbox;
36
+ /**
37
+ * A GeoJSON position — `[lon, lat]`, possibly with extra dimensions we ignore. WOF geometries are
38
+ * 2-D throughout, but the type stays open so a 3-D source doesn't break parsing.
39
+ */
40
+ export type GeojsonPosition = [number, number, ...number[]];
41
+ /** The two areal GeoJSON geometry types PIP can test, plus an open fallback (Point etc.). */
42
+ export interface GeojsonPolygon {
43
+ type: "Polygon";
44
+ /** `[outerRing, hole1, hole2, …]` — each ring a closed list of positions. */
45
+ coordinates: GeojsonPosition[][];
46
+ }
47
+ export interface GeojsonMultiPolygon {
48
+ type: "MultiPolygon";
49
+ coordinates: GeojsonPosition[][][];
50
+ }
51
+ export type GeojsonGeometry = GeojsonPolygon | GeojsonMultiPolygon | {
52
+ type: string;
53
+ coordinates?: unknown;
54
+ };
55
+ /**
56
+ * Ray-cast a point against ONE linear ring. Standard even-odd crossing count: shoot a ray along
57
+ * +lon and toggle on every edge crossing. Points exactly on an edge are implementation-defined
58
+ * (either side is acceptable for geocoding — admin boundaries are DP-simplified anyway).
59
+ */
60
+ export declare function pointInRing(lon: number, lat: number, ring: readonly GeojsonPosition[]): boolean;
61
+ /**
62
+ * Even-odd containment over a polygon's ring list (`[outer, hole1, …]`) — being inside an odd
63
+ * number of rings means inside the polygon, which handles holes without ring-orientation rules.
64
+ */
65
+ export declare function pointInPolygonRings(lon: number, lat: number, rings: readonly GeojsonPosition[][]): boolean;
66
+ /**
67
+ * Does an areal GeoJSON geometry contain the point?
68
+ *
69
+ * - `true` / `false` — a Polygon or MultiPolygon was tested.
70
+ * - `null` — the geometry isn't areal (Point, LineString, …) and CANNOT contain; callers treat this
71
+ * the same as "no polygon on record" (the approximate-fallback path), never as a rejection.
72
+ */
73
+ export declare function geometryContains(geometry: GeojsonGeometry | null | undefined, lon: number, lat: number): boolean | null;
74
+ //# sourceMappingURL=geo.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"geo.d.ts","sourceRoot":"","sources":["../geo.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAA;AAOhD;;;;;;;GAOG;AACH,MAAM,WAAW,IAAI;IACpB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,UAAU,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAW3E;AAED;;;GAGG;AACH,MAAM,MAAM,eAAe,GAAG,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC,CAAA;AAE3D,6FAA6F;AAC7F,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,SAAS,CAAA;IACf,6EAA6E;IAC7E,WAAW,EAAE,eAAe,EAAE,EAAE,CAAA;CAChC;AAED,MAAM,WAAW,mBAAmB;IACnC,IAAI,EAAE,cAAc,CAAA;IACpB,WAAW,EAAE,eAAe,EAAE,EAAE,EAAE,CAAA;CAClC;AAED,MAAM,MAAM,eAAe,GAAG,cAAc,GAAG,mBAAmB,GAAG;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,OAAO,CAAA;CAAE,CAAA;AAE5G;;;;GAIG;AACH,wBAAgB,WAAW,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,eAAe,EAAE,GAAG,OAAO,CAa/F;AAED;;;GAGG;AACH,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,eAAe,EAAE,EAAE,GAAG,OAAO,CAM1G;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAC/B,QAAQ,EAAE,eAAe,GAAG,IAAI,GAAG,SAAS,EAC5C,GAAG,EAAE,MAAM,EACX,GAAG,EAAE,MAAM,GACT,OAAO,GAAG,IAAI,CAShB"}
package/out/geo.js ADDED
@@ -0,0 +1,88 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Geographic helpers for the resolver — haversine distance, bbox math, and point-in-polygon.
7
+ *
8
+ * We deliberately don't pull SpatiaLite (or turf) for this. SQLite's built-in `rtree` virtual table
9
+ * gives us bbox filtering at the SQL level; haversine distance is a 12-line TS function and
10
+ * ray-cast PIP a ~30-line one — both plenty fast for the post-fetch passes (we operate on ≤ a few
11
+ * hundred candidates per query, not the whole 142k-row corpus).
12
+ *
13
+ * The PIP implementation here is the CANONICAL port of the even-odd ray cast that previously lived
14
+ * only in Python (`scripts/eval/pip-containment.py`, with a second copy in
15
+ * `scripts/build-postcode-locality.py`). Keep the three in sync if the algorithm ever changes —
16
+ * the eval-side Python copies grade the same containment truth this one resolves with.
17
+ *
18
+ * The R*Tree index name + schema are centralized in `fts.ts` (alongside the FTS5 build).
19
+ */
20
+ // haversineKm is the canonical implementation in @mailwoman/spatial; re-exported so this package's
21
+ // readers keep importing it from "./geo.js" (the spatial dep is transitive via @mailwoman/resolver).
22
+ export { haversineKm } from "@mailwoman/spatial";
23
+ /** WGS-84 degrees → radians. */
24
+ function toRad(deg) {
25
+ return (deg * Math.PI) / 180;
26
+ }
27
+ export function bboxAround(lat, lon, radiusKm) {
28
+ const latDelta = radiusKm / 111;
29
+ // Guard against cos(±90°) = 0 (and tiny values near the poles) by clamping to a minimum.
30
+ const cosLat = Math.max(Math.cos(toRad(lat)), 1e-6);
31
+ const lonDelta = radiusKm / (111 * cosLat);
32
+ return {
33
+ minLat: lat - latDelta,
34
+ maxLat: lat + latDelta,
35
+ minLon: lon - lonDelta,
36
+ maxLon: lon + lonDelta,
37
+ };
38
+ }
39
+ /**
40
+ * Ray-cast a point against ONE linear ring. Standard even-odd crossing count: shoot a ray along
41
+ * +lon and toggle on every edge crossing. Points exactly on an edge are implementation-defined
42
+ * (either side is acceptable for geocoding — admin boundaries are DP-simplified anyway).
43
+ */
44
+ export function pointInRing(lon, lat, ring) {
45
+ let inside = false;
46
+ const n = ring.length;
47
+ for (let i = 0, j = n - 1; i < n; j = i++) {
48
+ const xi = ring[i][0];
49
+ const yi = ring[i][1];
50
+ const xj = ring[j][0];
51
+ const yj = ring[j][1];
52
+ if (yi > lat !== yj > lat && lon < ((xj - xi) * (lat - yi)) / (yj - yi) + xi) {
53
+ inside = !inside;
54
+ }
55
+ }
56
+ return inside;
57
+ }
58
+ /**
59
+ * Even-odd containment over a polygon's ring list (`[outer, hole1, …]`) — being inside an odd
60
+ * number of rings means inside the polygon, which handles holes without ring-orientation rules.
61
+ */
62
+ export function pointInPolygonRings(lon, lat, rings) {
63
+ let inside = false;
64
+ for (const ring of rings) {
65
+ if (pointInRing(lon, lat, ring))
66
+ inside = !inside;
67
+ }
68
+ return inside;
69
+ }
70
+ /**
71
+ * Does an areal GeoJSON geometry contain the point?
72
+ *
73
+ * - `true` / `false` — a Polygon or MultiPolygon was tested.
74
+ * - `null` — the geometry isn't areal (Point, LineString, …) and CANNOT contain; callers treat this
75
+ * the same as "no polygon on record" (the approximate-fallback path), never as a rejection.
76
+ */
77
+ export function geometryContains(geometry, lon, lat) {
78
+ if (!geometry)
79
+ return null;
80
+ if (geometry.type === "Polygon") {
81
+ return pointInPolygonRings(lon, lat, geometry.coordinates);
82
+ }
83
+ if (geometry.type === "MultiPolygon") {
84
+ return geometry.coordinates.some((rings) => pointInPolygonRings(lon, lat, rings));
85
+ }
86
+ return null;
87
+ }
88
+ //# sourceMappingURL=geo.js.map
package/out/geo.js.map ADDED
@@ -0,0 +1 @@
1
+ {"version":3,"file":"geo.js","sourceRoot":"","sources":["../geo.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,mGAAmG;AACnG,qGAAqG;AACrG,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAA;AAEhD,gCAAgC;AAChC,SAAS,KAAK,CAAC,GAAW;IACzB,OAAO,CAAC,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,GAAG,GAAG,CAAA;AAC7B,CAAC;AAiBD,MAAM,UAAU,UAAU,CAAC,GAAW,EAAE,GAAW,EAAE,QAAgB;IACpE,MAAM,QAAQ,GAAG,QAAQ,GAAG,GAAG,CAAA;IAC/B,yFAAyF;IACzF,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;IACnD,MAAM,QAAQ,GAAG,QAAQ,GAAG,CAAC,GAAG,GAAG,MAAM,CAAC,CAAA;IAC1C,OAAO;QACN,MAAM,EAAE,GAAG,GAAG,QAAQ;QACtB,MAAM,EAAE,GAAG,GAAG,QAAQ;QACtB,MAAM,EAAE,GAAG,GAAG,QAAQ;QACtB,MAAM,EAAE,GAAG,GAAG,QAAQ;KACtB,CAAA;AACF,CAAC;AAsBD;;;;GAIG;AACH,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,GAAW,EAAE,IAAgC;IACrF,IAAI,MAAM,GAAG,KAAK,CAAA;IAClB,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAA;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAA;QACtB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAA;QACtB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAA;QACtB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAA;QACtB,IAAI,EAAE,GAAG,GAAG,KAAK,EAAE,GAAG,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC;YAC9E,MAAM,GAAG,CAAC,MAAM,CAAA;QACjB,CAAC;IACF,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB,CAAC,GAAW,EAAE,GAAW,EAAE,KAAmC;IAChG,IAAI,MAAM,GAAG,KAAK,CAAA;IAClB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC;YAAE,MAAM,GAAG,CAAC,MAAM,CAAA;IAClD,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAC/B,QAA4C,EAC5C,GAAW,EACX,GAAW;IAEX,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAA;IAC1B,IAAI,QAAQ,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;QACjC,OAAO,mBAAmB,CAAC,GAAG,EAAE,GAAG,EAAG,QAA2B,CAAC,WAAW,CAAC,CAAA;IAC/E,CAAC;IACD,IAAI,QAAQ,CAAC,IAAI,KAAK,cAAc,EAAE,CAAC;QACtC,OAAQ,QAAgC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,mBAAmB,CAAC,GAAG,EAAE,GAAG,EAAE,KAAK,CAAC,CAAC,CAAA;IAC3G,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC"}
package/out/index.d.ts ADDED
@@ -0,0 +1,27 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ */
6
+ export type { FindPlaceQuery, GeoBbox, GeoPoint, PlaceCandidate, PlaceLookup, WofPlacetype } from "./types.js";
7
+ export type { AncestorsTable, CoincidentRolesTable, ConcordancesTable, GeojsonTable, NamesTable, PlaceAbbrTable, PlacePopulationTable, PlaceSearchTable, SprTable, WofDatabase, } from "./schema.js";
8
+ export { WofSqlitePlaceLookup, type RankingWeights, type WofSqlitePlaceLookupOpts } from "./lookup.js";
9
+ export { WofCandidateTableLookup, type WofCandidateTableLookupOpts } from "./candidate-lookup.js";
10
+ export { ADDRESS_POINT_COLUMNS, createAddressPointIndexes, createAddressPointTable } from "./address-point-schema.js";
11
+ export type { AddressPointDatabase, AddressPointTable } from "./address-point-schema.js";
12
+ export { WofPostalCityAliasLookup, type PostalCityAlias, type WofPostalCityAliasLookupOpts, } from "./postal-city-alias-lookup.js";
13
+ export type { PostalCityAliasDatabase, PostalCityAliasTable } from "./postal-city-alias-schema.js";
14
+ export { POSTAL_CITY_CANDIDATE_COLUMNS, POSTAL_CITY_CANDIDATE_TABLE, createPostalCityCandidateTable, } from "./postal-city-candidate-schema.js";
15
+ export type { PostalCityCandidateDatabase, PostalCityCandidateTable } from "./postal-city-candidate-schema.js";
16
+ export { ADDRESS_CONVENTION_TABLE, BUILTIN_STRATEGY_NAMES, SeedConventionSource, WORLD_DEFAULT, mergeConventions, resolveConvention, type Convention, type ConventionSource, type ResolvedConvention, type ScoringWeights, type Strategy, } from "./convention.js";
17
+ export { SqliteConventionSource } from "./sqlite-convention-source.js";
18
+ export { WofPostcodeLookup, type PostcodePlace } from "./postcode-point-lookup.js";
19
+ export { PLACE_BBOX_TABLE, PLACE_SEARCH_TABLE, buildPlaceSearchFts, placeBboxExists, placeSearchFtsExists, type BuildPlaceSearchFtsOpts, type BuildPlaceSearchFtsResult, } from "./fts.js";
20
+ export { bboxAround, geometryContains, haversineKm, pointInPolygonRings, pointInRing, type Bbox, type GeojsonGeometry, type GeojsonMultiPolygon, type GeojsonPolygon, type GeojsonPosition, } from "./geo.js";
21
+ export { PLACETYPE_DEPTH, ancestorLineage, placetypeDepth, type AncestorPlaceRow } from "./ancestry.js";
22
+ export { WofReverseGeocoder, type ContainmentKind, type ReverseGeocodeOpts, type ReverseGeocodeResult, type WofReverseGeocoderOpts, } from "./reverse.js";
23
+ export { AddressPointInterpolator } from "./address-point-interpolation.js";
24
+ export { AddressPointSqliteLookup } from "./address-point.js";
25
+ export { StreetInterpolator, type InterpolatedHit, type InterpolationMethod, type InterpolationQuery, } from "./interpolation.js";
26
+ export { deriveSchemaName, pickShardForPlacetype, resolveShards, type ResolvedShard, type ShardConfig, } from "./sharding.js";
27
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,YAAY,EAAE,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,cAAc,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAE9G,YAAY,EACX,cAAc,EACd,oBAAoB,EACpB,iBAAiB,EACjB,YAAY,EACZ,UAAU,EACV,cAAc,EACd,oBAAoB,EACpB,gBAAgB,EAChB,QAAQ,EACR,WAAW,GACX,MAAM,aAAa,CAAA;AAEpB,OAAO,EAAE,oBAAoB,EAAE,KAAK,cAAc,EAAE,KAAK,wBAAwB,EAAE,MAAM,aAAa,CAAA;AAEtG,OAAO,EAAE,uBAAuB,EAAE,KAAK,2BAA2B,EAAE,MAAM,uBAAuB,CAAA;AAEjG,OAAO,EAAE,qBAAqB,EAAE,yBAAyB,EAAE,uBAAuB,EAAE,MAAM,2BAA2B,CAAA;AACrH,YAAY,EAAE,oBAAoB,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAA;AACxF,OAAO,EACN,wBAAwB,EACxB,KAAK,eAAe,EACpB,KAAK,4BAA4B,GACjC,MAAM,+BAA+B,CAAA;AACtC,YAAY,EAAE,uBAAuB,EAAE,oBAAoB,EAAE,MAAM,+BAA+B,CAAA;AAClG,OAAO,EACN,6BAA6B,EAC7B,2BAA2B,EAC3B,8BAA8B,GAC9B,MAAM,mCAAmC,CAAA;AAC1C,YAAY,EAAE,2BAA2B,EAAE,wBAAwB,EAAE,MAAM,mCAAmC,CAAA;AAE9G,OAAO,EACN,wBAAwB,EACxB,sBAAsB,EACtB,oBAAoB,EACpB,aAAa,EACb,gBAAgB,EAChB,iBAAiB,EACjB,KAAK,UAAU,EACf,KAAK,gBAAgB,EACrB,KAAK,kBAAkB,EACvB,KAAK,cAAc,EACnB,KAAK,QAAQ,GACb,MAAM,iBAAiB,CAAA;AAExB,OAAO,EAAE,sBAAsB,EAAE,MAAM,+BAA+B,CAAA;AAEtE,OAAO,EAAE,iBAAiB,EAAE,KAAK,aAAa,EAAE,MAAM,4BAA4B,CAAA;AAElF,OAAO,EACN,gBAAgB,EAChB,kBAAkB,EAClB,mBAAmB,EACnB,eAAe,EACf,oBAAoB,EACpB,KAAK,uBAAuB,EAC5B,KAAK,yBAAyB,GAC9B,MAAM,UAAU,CAAA;AAEjB,OAAO,EACN,UAAU,EACV,gBAAgB,EAChB,WAAW,EACX,mBAAmB,EACnB,WAAW,EACX,KAAK,IAAI,EACT,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,cAAc,EACnB,KAAK,eAAe,GACpB,MAAM,UAAU,CAAA;AAEjB,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,cAAc,EAAE,KAAK,gBAAgB,EAAE,MAAM,eAAe,CAAA;AAEvG,OAAO,EACN,kBAAkB,EAClB,KAAK,eAAe,EACpB,KAAK,kBAAkB,EACvB,KAAK,oBAAoB,EACzB,KAAK,sBAAsB,GAC3B,MAAM,cAAc,CAAA;AAErB,OAAO,EAAE,wBAAwB,EAAE,MAAM,kCAAkC,CAAA;AAC3E,OAAO,EAAE,wBAAwB,EAAE,MAAM,oBAAoB,CAAA;AAC7D,OAAO,EACN,kBAAkB,EAClB,KAAK,eAAe,EACpB,KAAK,mBAAmB,EACxB,KAAK,kBAAkB,GACvB,MAAM,oBAAoB,CAAA;AAC3B,OAAO,EACN,gBAAgB,EAChB,qBAAqB,EACrB,aAAa,EACb,KAAK,aAAa,EAClB,KAAK,WAAW,GAChB,MAAM,eAAe,CAAA"}