@mailwoman/resolver-wof-sqlite 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -0
- package/out/address-point-interpolation.d.ts +48 -0
- package/out/address-point-interpolation.d.ts.map +1 -0
- package/out/address-point-interpolation.js +164 -0
- package/out/address-point-interpolation.js.map +1 -0
- package/out/address-point-schema.d.ts +58 -0
- package/out/address-point-schema.d.ts.map +1 -0
- package/out/address-point-schema.js +67 -0
- package/out/address-point-schema.js.map +1 -0
- package/out/address-point.d.ts +29 -0
- package/out/address-point.d.ts.map +1 -0
- package/out/address-point.js +62 -0
- package/out/address-point.js.map +1 -0
- package/out/ancestry.d.ts +40 -0
- package/out/ancestry.d.ts.map +1 -0
- package/out/ancestry.js +53 -0
- package/out/ancestry.js.map +1 -0
- package/out/build-candidate-cli.d.ts +16 -0
- package/out/build-candidate-cli.d.ts.map +1 -0
- package/out/build-candidate-cli.js +80 -0
- package/out/build-candidate-cli.js.map +1 -0
- package/out/build-candidate.d.ts +54 -0
- package/out/build-candidate.d.ts.map +1 -0
- package/out/build-candidate.js +230 -0
- package/out/build-candidate.js.map +1 -0
- package/out/build-coincident-roles-cli.d.ts +16 -0
- package/out/build-coincident-roles-cli.d.ts.map +1 -0
- package/out/build-coincident-roles-cli.js +94 -0
- package/out/build-coincident-roles-cli.js.map +1 -0
- package/out/build-fts-cli.d.ts +23 -0
- package/out/build-fts-cli.d.ts.map +1 -0
- package/out/build-fts-cli.js +117 -0
- package/out/build-fts-cli.js.map +1 -0
- package/out/build-slim-cli.d.ts +14 -0
- package/out/build-slim-cli.d.ts.map +1 -0
- package/out/build-slim-cli.js +130 -0
- package/out/build-slim-cli.js.map +1 -0
- package/out/build-slim.d.ts +71 -0
- package/out/build-slim.d.ts.map +1 -0
- package/out/build-slim.js +267 -0
- package/out/build-slim.js.map +1 -0
- package/out/candidate-lookup.d.ts +43 -0
- package/out/candidate-lookup.d.ts.map +1 -0
- package/out/candidate-lookup.js +191 -0
- package/out/candidate-lookup.js.map +1 -0
- package/out/candidate-schema.d.ts +86 -0
- package/out/candidate-schema.d.ts.map +1 -0
- package/out/candidate-schema.js +109 -0
- package/out/candidate-schema.js.map +1 -0
- package/out/coincident-roles.d.ts +86 -0
- package/out/coincident-roles.d.ts.map +1 -0
- package/out/coincident-roles.js +160 -0
- package/out/coincident-roles.js.map +1 -0
- package/out/convention.d.ts +109 -0
- package/out/convention.d.ts.map +1 -0
- package/out/convention.js +94 -0
- package/out/convention.js.map +1 -0
- package/out/fst-autocomplete.d.ts +49 -0
- package/out/fst-autocomplete.d.ts.map +1 -0
- package/out/fst-autocomplete.js +124 -0
- package/out/fst-autocomplete.js.map +1 -0
- package/out/fst-builder.d.ts +20 -0
- package/out/fst-builder.d.ts.map +1 -0
- package/out/fst-builder.js +219 -0
- package/out/fst-builder.js.map +1 -0
- package/out/fst-deserialize-web.d.ts +16 -0
- package/out/fst-deserialize-web.d.ts.map +1 -0
- package/out/fst-deserialize-web.js +133 -0
- package/out/fst-deserialize-web.js.map +1 -0
- package/out/fst-matcher.d.ts +33 -0
- package/out/fst-matcher.d.ts.map +1 -0
- package/out/fst-matcher.js +117 -0
- package/out/fst-matcher.js.map +1 -0
- package/out/fst-serialize.d.ts +30 -0
- package/out/fst-serialize.d.ts.map +1 -0
- package/out/fst-serialize.js +261 -0
- package/out/fst-serialize.js.map +1 -0
- package/out/fst-types.d.ts +60 -0
- package/out/fst-types.d.ts.map +1 -0
- package/out/fst-types.js +11 -0
- package/out/fst-types.js.map +1 -0
- package/out/fts.d.ts +158 -0
- package/out/fts.d.ts.map +1 -0
- package/out/fts.js +261 -0
- package/out/fts.js.map +1 -0
- package/out/geo.d.ts +74 -0
- package/out/geo.d.ts.map +1 -0
- package/out/geo.js +88 -0
- package/out/geo.js.map +1 -0
- package/out/index.d.ts +27 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +22 -0
- package/out/index.js.map +1 -0
- package/out/interpolation.d.ts +84 -0
- package/out/interpolation.d.ts.map +1 -0
- package/out/interpolation.js +150 -0
- package/out/interpolation.js.map +1 -0
- package/out/lookup.d.ts +156 -0
- package/out/lookup.d.ts.map +1 -0
- package/out/lookup.js +876 -0
- package/out/lookup.js.map +1 -0
- package/out/postal-city-alias-lookup.d.ts +50 -0
- package/out/postal-city-alias-lookup.d.ts.map +1 -0
- package/out/postal-city-alias-lookup.js +66 -0
- package/out/postal-city-alias-lookup.js.map +1 -0
- package/out/postal-city-alias-schema.d.ts +51 -0
- package/out/postal-city-alias-schema.d.ts.map +1 -0
- package/out/postal-city-alias-schema.js +47 -0
- package/out/postal-city-alias-schema.js.map +1 -0
- package/out/postal-city-candidate-schema.d.ts +58 -0
- package/out/postal-city-candidate-schema.d.ts.map +1 -0
- package/out/postal-city-candidate-schema.js +56 -0
- package/out/postal-city-candidate-schema.js.map +1 -0
- package/out/postcode-point-lookup.d.ts +38 -0
- package/out/postcode-point-lookup.d.ts.map +1 -0
- package/out/postcode-point-lookup.js +46 -0
- package/out/postcode-point-lookup.js.map +1 -0
- package/out/reverse.d.ts +99 -0
- package/out/reverse.d.ts.map +1 -0
- package/out/reverse.js +290 -0
- package/out/reverse.js.map +1 -0
- package/out/schema.d.ts +163 -0
- package/out/schema.d.ts.map +1 -0
- package/out/schema.js +18 -0
- package/out/schema.js.map +1 -0
- package/out/sharding.d.ts +96 -0
- package/out/sharding.d.ts.map +1 -0
- package/out/sharding.js +129 -0
- package/out/sharding.js.map +1 -0
- package/out/sqlite-convention-source.d.ts +29 -0
- package/out/sqlite-convention-source.d.ts.map +1 -0
- package/out/sqlite-convention-source.js +53 -0
- package/out/sqlite-convention-source.js.map +1 -0
- package/out/sqlite-utils.d.ts +17 -0
- package/out/sqlite-utils.d.ts.map +1 -0
- package/out/sqlite-utils.js +24 -0
- package/out/sqlite-utils.js.map +1 -0
- package/out/street-morphology-fst-builder.d.ts +59 -0
- package/out/street-morphology-fst-builder.d.ts.map +1 -0
- package/out/street-morphology-fst-builder.js +174 -0
- package/out/street-morphology-fst-builder.js.map +1 -0
- package/out/street-normalize.d.ts +66 -0
- package/out/street-normalize.d.ts.map +1 -0
- package/out/street-normalize.js +176 -0
- package/out/street-normalize.js.map +1 -0
- package/out/street-segment-schema.d.ts +61 -0
- package/out/street-segment-schema.d.ts.map +1 -0
- package/out/street-segment-schema.js +64 -0
- package/out/street-segment-schema.js.map +1 -0
- package/out/types.d.ts +137 -0
- package/out/types.d.ts.map +1 -0
- package/out/types.js +13 -0
- package/out/types.js.map +1 -0
- package/out/unified-schema.d.ts +25 -0
- package/out/unified-schema.d.ts.map +1 -0
- package/out/unified-schema.js +142 -0
- package/out/unified-schema.js.map +1 -0
- package/package.json +54 -0
package/README.md
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# @mailwoman/resolver-wof-sqlite
|
|
2
|
+
|
|
3
|
+
FTS5-backed [Who's On First](https://whosonfirst.org/) SQLite resolver for [mailwoman](https://www.npmjs.com/package/mailwoman). Takes free-text place queries (`"Paris, FR"`, `"Springfield, IL"`) and returns ranked candidate place IDs + coordinates from a WOF SQLite distribution on disk.
|
|
4
|
+
|
|
5
|
+
Phase 4.2 of the mailwoman neural-resolver plan — see [`docs/plan/phases/PHASE_4_2_wof_sqlite.md`](https://github.com/sister-software/mailwoman/blob/main/docs/plan/phases/PHASE_4_2_wof_sqlite.md) in the source repo.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @mailwoman/resolver-wof-sqlite
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires Node 22+ for built-in `node:sqlite`.
|
|
14
|
+
|
|
15
|
+
## Quick start
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import { WofSqlitePlaceLookup } from "@mailwoman/resolver-wof-sqlite"
|
|
19
|
+
|
|
20
|
+
const lookup = new WofSqlitePlaceLookup({
|
|
21
|
+
databasePath: "/path/to/whosonfirst-data-admin-us-latest.db",
|
|
22
|
+
buildFts: true, // build the FTS5 index on first open (one-time cost)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
const candidates = await lookup.findPlace({
|
|
26
|
+
text: "Springfield",
|
|
27
|
+
placetype: "locality",
|
|
28
|
+
country: "US",
|
|
29
|
+
})
|
|
30
|
+
|
|
31
|
+
for (const c of candidates) {
|
|
32
|
+
console.log(c.id, c.name, c.country, c.lat, c.lon, "score:", c.score)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
lookup.close()
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Multi-shard (admin + postcode in one connection)
|
|
39
|
+
|
|
40
|
+
Pass an array of paths to open multiple WOF shards on a single connection — each is opened as a
|
|
41
|
+
separate SQLite schema via `ATTACH DATABASE`. Schema names auto-derive from filenames
|
|
42
|
+
(`whosonfirst-data-admin-us-latest.db` → `admin_us`, `whosonfirst-data-postalcode-us-latest.db` →
|
|
43
|
+
`postalcode_us`). Queries route by `placetype` — a `postalcode` query goes to the
|
|
44
|
+
`postalcode_us` shard automatically, everything else hits main.
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
const lookup = new WofSqlitePlaceLookup({
|
|
48
|
+
databasePath: ["/data/wof/whosonfirst-data-admin-us-latest.db", "/data/wof/whosonfirst-data-postalcode-us-latest.db"],
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
await lookup.findPlace({ text: "Springfield", placetype: "locality" }) // → admin shard
|
|
52
|
+
await lookup.findPlace({ text: "62701", placetype: "postalcode" }) // → postcode shard
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Override schema names or routing explicitly when needed:
|
|
56
|
+
|
|
57
|
+
```ts
|
|
58
|
+
new WofSqlitePlaceLookup({
|
|
59
|
+
databasePath: ["/data/wof/admin.db", { path: "/data/oddly-named.db", schemaName: "pc", placetypes: ["postalcode"] }],
|
|
60
|
+
})
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Cross-shard `UNION` queries are not supported in one `findPlace` call — BM25 scores aren't
|
|
64
|
+
comparable across separately-indexed corpora. Issue two `findPlace` calls and merge in your
|
|
65
|
+
caller if you need that.
|
|
66
|
+
|
|
67
|
+
## Getting the WOF SQLite distribution
|
|
68
|
+
|
|
69
|
+
The Geocode Earth team mirrors WOF SQLite distributions at <https://data.geocode.earth/wof/dist/sqlite/>. The two relevant shards for v1:
|
|
70
|
+
|
|
71
|
+
| Distribution | Size (bz2) | Use |
|
|
72
|
+
| ---------------------------------------------- | ---------- | ----------------------------------------------------------------- |
|
|
73
|
+
| `whosonfirst-data-admin-us-latest.db.bz2` | ~845 MB | US administrative places (country / region / locality / borough). |
|
|
74
|
+
| `whosonfirst-data-postalcode-us-latest.db.bz2` | ~320 MB | US postcodes. |
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
curl -L -o whosonfirst-data-admin-us-latest.db.bz2 \
|
|
78
|
+
https://data.geocode.earth/wof/dist/sqlite/whosonfirst-data-admin-us-latest.db.bz2
|
|
79
|
+
bunzip2 whosonfirst-data-admin-us-latest.db.bz2
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## FTS5 index
|
|
83
|
+
|
|
84
|
+
Upstream WOF SQLite distributions ship a `places` table but **not** an FTS5 index. The resolver needs FTS5 to do fast prefix + token-bag matching. Two options:
|
|
85
|
+
|
|
86
|
+
1. **`buildFts: true` on construction** — builds the index lazily on first open. Cost is one-time but expensive (~minutes on the full US admin shard). Use for prototyping.
|
|
87
|
+
2. **Pre-build the index with `mailwoman-wof-build-fts`** — ship the DB with the index included so first-open is fast. Recommended for production.
|
|
88
|
+
|
|
89
|
+
### `mailwoman-wof-build-fts` CLI
|
|
90
|
+
|
|
91
|
+
A one-shot operator script ships with this package as a `bin`:
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
npx mailwoman-wof-build-fts /path/to/whosonfirst-data-admin-us-latest.db
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The CLI:
|
|
98
|
+
|
|
99
|
+
- Opens the DB read-write.
|
|
100
|
+
- Creates the `place_search` FTS5 virtual table (with the same schema the lazy build uses).
|
|
101
|
+
- Populates it from `spr` + `names` (alternate-name concatenation included).
|
|
102
|
+
- Builds the `place_bbox` R*Tree virtual table from `spr.min\_*`/`spr.max\_\*` columns for the
|
|
103
|
+
proximity + bbox query support.
|
|
104
|
+
- Reports progress to stderr per phase (`checking` → `creating` → `populating` → `creating-bbox`
|
|
105
|
+
→ `populating-bbox` → `done`).
|
|
106
|
+
- Exits 0 with a no-op message if both indexes already exist.
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Refresh after pulling a newer WOF dump
|
|
110
|
+
npx mailwoman-wof-build-fts /path/to/wof.db --drop
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
`--drop` rebuilds from scratch — useful after refreshing the `places` / `names` tables from a newer dump. Without `--drop` the CLI is a no-op when the index is already present.
|
|
114
|
+
|
|
115
|
+
### `mailwoman-wof-build-slim` CLI
|
|
116
|
+
|
|
117
|
+
Builds a trimmed WOF SQLite distribution sized for browser-side deployments (Path B of the demo plan). The full admin-US distribution is ~4 GB; a slim US bundle with the top-1k localities by population plus all postcodes lands at **~35 MB** — small enough to ship as a static asset.
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Defaults: --top 1000 localities, --countries US, drops geojson after building aux tables
|
|
121
|
+
npx mailwoman-wof-build-slim \
|
|
122
|
+
--in /path/to/whosonfirst-data-admin-us-latest.db \
|
|
123
|
+
--in /path/to/whosonfirst-data-postalcode-us-latest.db \
|
|
124
|
+
--out /path/to/wof-hot.db
|
|
125
|
+
|
|
126
|
+
# Tinier — top 100 localities only
|
|
127
|
+
npx mailwoman-wof-build-slim --in admin-us.db --out wof-tiny.db --top 100
|
|
128
|
+
|
|
129
|
+
# Multi-country
|
|
130
|
+
npx mailwoman-wof-build-slim --in admin-na.db --out wof-na.db --countries US,CA,MX
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
What survives in the slim DB:
|
|
134
|
+
|
|
135
|
+
- All ancestor placetypes (`country`, `region`, `county`, `borough`, `macroregion`) in scope
|
|
136
|
+
- Top-K localities by `wof:population`
|
|
137
|
+
- All postcodes in scope
|
|
138
|
+
- All `names` rows for the selected place IDs
|
|
139
|
+
- Fresh `place_search` (FTS5), `place_bbox` (R\*Tree), `place_population` aux tables
|
|
140
|
+
|
|
141
|
+
What gets dropped: the `geojson` table, which is build-time only — `lookup.ts` never reads it at query time, and it accounts for ~95% of the on-disk size. The `place_population` aux table consumes `wof:population` from geojson before we drop it.
|
|
142
|
+
|
|
143
|
+
`WofSqlitePlaceLookup` opens the slim DB without any code change. Out-of-set queries (a locality not in the top-K) correctly return zero hits.
|
|
144
|
+
|
|
145
|
+
You can also build the index programmatically via the package's `./fts` subpath:
|
|
146
|
+
|
|
147
|
+
```ts
|
|
148
|
+
import { DatabaseSync } from "node:sqlite"
|
|
149
|
+
import { buildPlaceSearchFts } from "@mailwoman/resolver-wof-sqlite/fts"
|
|
150
|
+
|
|
151
|
+
const db = new DatabaseSync("/path/to/wof.db")
|
|
152
|
+
const { created, indexedRows, durationMs } = buildPlaceSearchFts(db, {
|
|
153
|
+
drop: false,
|
|
154
|
+
onProgress: (phase, detail) => console.log(phase, detail),
|
|
155
|
+
})
|
|
156
|
+
db.close()
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Ranking
|
|
160
|
+
|
|
161
|
+
The resolver scores candidates by:
|
|
162
|
+
|
|
163
|
+
1. SQLite FTS5 BM25 (negated so higher = better).
|
|
164
|
+
2. - `placetypeMatchBoost` when the candidate's placetype matches the query filter.
|
|
165
|
+
3. - `localityImplicitBoost` when no placetype filter is set and the candidate is a locality.
|
|
166
|
+
4. - `countryMatchBoost` when the country filter matches.
|
|
167
|
+
5. - `directChildBoost` / `descendantBoost` when `parentId` is set.
|
|
168
|
+
6. - `proximityBoost / (1 + distanceKm / proximityScaleKm)` when `near: {lat, lon}` is set — decays
|
|
169
|
+
smoothly with distance from the user's position. At distance 0 the boost is full magnitude; at
|
|
170
|
+
`proximityScaleKm` (default 100 km) it's half.
|
|
171
|
+
7. − `lengthPenaltyWeight` × excess-length penalty (favors short matches over long matches on short
|
|
172
|
+
queries).
|
|
173
|
+
|
|
174
|
+
## Geographic filters (Phase 4.3.x)
|
|
175
|
+
|
|
176
|
+
Two query options use the package-built R\*Tree index over WOF's bounding boxes:
|
|
177
|
+
|
|
178
|
+
```ts
|
|
179
|
+
// Proximity boost (no hard filter — distant candidates aren't dropped, just ranked lower)
|
|
180
|
+
lookup.findPlace({
|
|
181
|
+
text: "Springfield",
|
|
182
|
+
placetype: "locality",
|
|
183
|
+
near: { lat: 39.78, lon: -89.65 },
|
|
184
|
+
})
|
|
185
|
+
|
|
186
|
+
// Proximity boost + hard filter — drop anything beyond 200 km
|
|
187
|
+
lookup.findPlace({
|
|
188
|
+
text: "Springfield",
|
|
189
|
+
placetype: "locality",
|
|
190
|
+
near: { lat: 39.78, lon: -89.65, maxDistanceKm: 200 },
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
// Bbox hard filter — only return candidates whose bbox intersects the box
|
|
194
|
+
lookup.findPlace({
|
|
195
|
+
text: "Springfield",
|
|
196
|
+
placetype: "locality",
|
|
197
|
+
bbox: { minLat: 37, maxLat: 42.5, minLon: -91.5, maxLon: -87.5 },
|
|
198
|
+
})
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
When the R*Tree index isn't present (DBs built before this feature), the bbox-hard-filter is
|
|
202
|
+
silently dropped to preserve backwards compatibility. The proximity boost still works without the
|
|
203
|
+
R*Tree because it computes haversine distance against the centroid columns directly. Rebuild with
|
|
204
|
+
`mailwoman-wof-build-fts --drop <path>` to gain the bbox index.
|
|
205
|
+
|
|
206
|
+
All weights are configurable via the second ctor argument:
|
|
207
|
+
|
|
208
|
+
```ts
|
|
209
|
+
new WofSqlitePlaceLookup({ databasePath }, { countryMatchBoost: 0.5 })
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Defaults are in `lookup.ts::DEFAULT_WEIGHTS`.
|
|
213
|
+
|
|
214
|
+
## Query syntax
|
|
215
|
+
|
|
216
|
+
`FindPlaceQuery.text` accepts free-text input — apostrophes / parens / accented characters / etc.
|
|
217
|
+
are all stripped safely before going to FTS5. Per-token rules:
|
|
218
|
+
|
|
219
|
+
- **Bare tokens** (`"Paris"`, `"62701"`) become FTS5 **phrase matches**: `"Paris"` matches places
|
|
220
|
+
named exactly "Paris", `"62701"` matches the postcode 62701 exactly.
|
|
221
|
+
- **Trailing `*`** (`"627*"`, `"Pari*"`) becomes FTS5 **prefix syntax**: `627*` matches every
|
|
222
|
+
postcode starting with 627, `Pari*` matches Paris / Parishville / etc. The caller explicitly
|
|
223
|
+
signals "prefix"; bare tokens stay phrase-matched for safety.
|
|
224
|
+
- **Multiple tokens** join with implicit AND: `"Pari* TX"` matches places whose name contains
|
|
225
|
+
both a `Pari*`-prefixed word AND the word `TX`.
|
|
226
|
+
|
|
227
|
+
Example: `findPlace({ text: "902*", placetype: "postalcode" })` returns 90201, 90210, 90211, …
|
|
228
|
+
matching the Los Angeles ZIP corridor.
|
|
229
|
+
|
|
230
|
+
## Attribution (CC-BY 4.0)
|
|
231
|
+
|
|
232
|
+
Who's On First data is licensed [CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). Downstream applications shipping resolved results from this package **must** carry an attribution notice — for example:
|
|
233
|
+
|
|
234
|
+
> Place data via [Who's On First](https://whosonfirst.org/) © Mapzen + contributors, CC-BY 4.0.
|
|
235
|
+
|
|
236
|
+
This package itself is AGPL-3.0; the WOF data it indexes is CC-BY 4.0. The two licenses are separate — your application must comply with both.
|
|
237
|
+
|
|
238
|
+
## Integration tests
|
|
239
|
+
|
|
240
|
+
`resolver-wof-sqlite/integration.test.ts` exercises the resolver against a real WOF SQLite distribution. The suite is **skipped** when no DB is present — set `MAILWOMAN_WOF_DB` to override the lookup path, otherwise it defaults to `/mnt/playpen/mailwoman-data/wof/whosonfirst-data-admin-us-latest.db` (the canonical lab location). CI runs against the fixture-only suites; operators with real WOF data locally get an extra layer of validation.
|
|
241
|
+
|
|
242
|
+
Coverage includes: placetype filtering, country filtering, the empty-result case, FTS5 special-character sanitization, Japanese alt-name resolution, parent-constrained lookup, and a performance budget (`findPlace` < 250 ms against the 142 k-row US admin shard).
|
|
243
|
+
|
|
244
|
+
## Concurrency model
|
|
245
|
+
|
|
246
|
+
This package opens a single `node:sqlite` connection per `WofSqlitePlaceLookup` instance. SQLite is single-writer / many-reader; the Kysely wrapper around the connection serializes all queries through a mutex. For high-concurrency HTTP servers, instantiate one resolver per request handler or per pool slot — sharing a single instance across concurrent requests is fine (queries queue) but won't parallelize across cores.
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
AGPL-3.0. WOF data: CC-BY 4.0 (see Attribution above).
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Address-point interpolation — "Method 2" of the resolution ladder (#483, Phase 1 of
|
|
7
|
+
* `docs/articles/plan/2026-06-11-resolution-ladder.md`): when the exact address-point tier (#476)
|
|
8
|
+
* misses a house number, bracket the number with REAL neighbor points on the same street from the
|
|
9
|
+
* same #476 shard and interpolate linearly in house-number space between them. Real occupancy
|
|
10
|
+
* replaces TIGER's uniform-spacing assumption — the dominant error term of the TIGER pilot's gate
|
|
11
|
+
* miss; TIGER range interpolation (`StreetInterpolator`) demotes to the fallback for streets too
|
|
12
|
+
* sparse to bracket.
|
|
13
|
+
*
|
|
14
|
+
* Matching key is `street_key` — THE shared normalizer plus the route fold
|
|
15
|
+
* (`canonicalizeRouteKey`), identical at build time (`scripts/build-address-point-shard.ts`) and
|
|
16
|
+
* query time, by construction. Scope is postcode-first like the segment tier; a query without a
|
|
17
|
+
* postcode goes straight to the fallback (which carries its own statewide-ambiguity abstention).
|
|
18
|
+
*
|
|
19
|
+
* Bracketing contract:
|
|
20
|
+
*
|
|
21
|
+
* - Neighbor candidates NEVER include the queried number itself (any unit/duplicate row of it) — in
|
|
22
|
+
* production the exact tier would already have answered an on-file number, and in the eval
|
|
23
|
+
* this is what makes grading against the same shard non-circular by construction.
|
|
24
|
+
* - Both-sided bracket (`bracket: "both"`): linear interpolation between the nearest known number
|
|
25
|
+
* below and above; `uncertaintyM` = half the distance between them.
|
|
26
|
+
* - Single-sided (`bracket: "single"`): linear extrapolation along the two nearest known numbers on
|
|
27
|
+
* that side, capped at one pair-span beyond the nearest point (`t ≤ 2` — beyond that the line
|
|
28
|
+
* carries no evidence and the query falls through); `uncertaintyM` = the pair distance plus
|
|
29
|
+
* the extrapolated overshoot, explicitly larger than the both-sided radius.
|
|
30
|
+
* - No bracket (no neighbors, a single known number, or past the extrapolation cap): fall through to
|
|
31
|
+
* the TIGER fallback when configured, else null.
|
|
32
|
+
*
|
|
33
|
+
* Standalone like the segment tier — core wiring rides the Phase 2 ordered `spatialTiers` list.
|
|
34
|
+
*/
|
|
35
|
+
import { DatabaseSync } from "node:sqlite";
|
|
36
|
+
import type { InterpolationLookup } from "@mailwoman/resolver";
|
|
37
|
+
import type { InterpolatedHit, InterpolationQuery, StreetInterpolator } from "./interpolation.js";
|
|
38
|
+
export declare class AddressPointInterpolator implements InterpolationLookup {
|
|
39
|
+
#private;
|
|
40
|
+
constructor(opts: {
|
|
41
|
+
dbPath?: string;
|
|
42
|
+
database?: DatabaseSync;
|
|
43
|
+
fallback?: StreetInterpolator;
|
|
44
|
+
});
|
|
45
|
+
find(query: InterpolationQuery): InterpolatedHit | null;
|
|
46
|
+
close(): void;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=address-point-interpolation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-point-interpolation.d.ts","sourceRoot":"","sources":["../address-point-interpolation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAE1C,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AAG9D,OAAO,KAAK,EAAE,eAAe,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,MAAM,oBAAoB,CAAA;AA2BjG,qBAAa,wBAAyB,YAAW,mBAAmB;;gBAMvD,IAAI,EAAE;QAAE,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,YAAY,CAAC;QAAC,QAAQ,CAAC,EAAE,kBAAkB,CAAA;KAAE;IA0B7F,IAAI,CAAC,KAAK,EAAE,kBAAkB,GAAG,eAAe,GAAG,IAAI;IAcvD,KAAK,IAAI,IAAI;CAGb"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Address-point interpolation — "Method 2" of the resolution ladder (#483, Phase 1 of
|
|
7
|
+
* `docs/articles/plan/2026-06-11-resolution-ladder.md`): when the exact address-point tier (#476)
|
|
8
|
+
* misses a house number, bracket the number with REAL neighbor points on the same street from the
|
|
9
|
+
* same #476 shard and interpolate linearly in house-number space between them. Real occupancy
|
|
10
|
+
* replaces TIGER's uniform-spacing assumption — the dominant error term of the TIGER pilot's gate
|
|
11
|
+
* miss; TIGER range interpolation (`StreetInterpolator`) demotes to the fallback for streets too
|
|
12
|
+
* sparse to bracket.
|
|
13
|
+
*
|
|
14
|
+
* Matching key is `street_key` — THE shared normalizer plus the route fold
|
|
15
|
+
* (`canonicalizeRouteKey`), identical at build time (`scripts/build-address-point-shard.ts`) and
|
|
16
|
+
* query time, by construction. Scope is postcode-first like the segment tier; a query without a
|
|
17
|
+
* postcode goes straight to the fallback (which carries its own statewide-ambiguity abstention).
|
|
18
|
+
*
|
|
19
|
+
* Bracketing contract:
|
|
20
|
+
*
|
|
21
|
+
* - Neighbor candidates NEVER include the queried number itself (any unit/duplicate row of it) — in
|
|
22
|
+
* production the exact tier would already have answered an on-file number, and in the eval
|
|
23
|
+
* this is what makes grading against the same shard non-circular by construction.
|
|
24
|
+
* - Both-sided bracket (`bracket: "both"`): linear interpolation between the nearest known number
|
|
25
|
+
* below and above; `uncertaintyM` = half the distance between them.
|
|
26
|
+
* - Single-sided (`bracket: "single"`): linear extrapolation along the two nearest known numbers on
|
|
27
|
+
* that side, capped at one pair-span beyond the nearest point (`t ≤ 2` — beyond that the line
|
|
28
|
+
* carries no evidence and the query falls through); `uncertaintyM` = the pair distance plus
|
|
29
|
+
* the extrapolated overshoot, explicitly larger than the both-sided radius.
|
|
30
|
+
* - No bracket (no neighbors, a single known number, or past the extrapolation cap): fall through to
|
|
31
|
+
* the TIGER fallback when configured, else null.
|
|
32
|
+
*
|
|
33
|
+
* Standalone like the segment tier — core wiring rides the Phase 2 ordered `spatialTiers` list.
|
|
34
|
+
*/
|
|
35
|
+
import { DatabaseSync } from "node:sqlite";
|
|
36
|
+
import { haversineKm } from "./geo.js";
|
|
37
|
+
import { hasTable } from "./sqlite-utils.js";
|
|
38
|
+
import { canonicalizeRouteKey, normalizeStreetForKey } from "./street-normalize.js";
|
|
39
|
+
/**
|
|
40
|
+
* Extrapolation cap for a single-sided bracket: at most one pair-span beyond the nearest known
|
|
41
|
+
* point (`t = 2`). Past it, the two-point line carries no evidence about the query number.
|
|
42
|
+
*/
|
|
43
|
+
const MAX_EXTRAPOLATION_T = 2;
|
|
44
|
+
export class AddressPointInterpolator {
|
|
45
|
+
#db;
|
|
46
|
+
#ownsDb;
|
|
47
|
+
#fallback;
|
|
48
|
+
#byPostcode;
|
|
49
|
+
constructor(opts) {
|
|
50
|
+
if (opts.database) {
|
|
51
|
+
this.#db = opts.database;
|
|
52
|
+
this.#ownsDb = false;
|
|
53
|
+
}
|
|
54
|
+
else if (opts.dbPath) {
|
|
55
|
+
this.#db = new DatabaseSync(opts.dbPath, { readOnly: true });
|
|
56
|
+
this.#ownsDb = true;
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
throw new Error("AddressPointInterpolator: one of dbPath or database is required");
|
|
60
|
+
}
|
|
61
|
+
this.#fallback = opts.fallback;
|
|
62
|
+
// Degrade gracefully on an empty/tableless shard (#568): with no `address_point` table this tier
|
|
63
|
+
// is skipped, deferring to the segment fallback rather than crashing at construction.
|
|
64
|
+
if (hasTable(this.#db, "address_point")) {
|
|
65
|
+
// Strictly-numeric neighbor numbers on the route-folded street key within the ZIP. The
|
|
66
|
+
// queried number itself is excluded HERE (see module doc: non-circular by construction).
|
|
67
|
+
this.#byPostcode = this.#db.prepare(`SELECT CAST(number AS INTEGER) AS n, lat, lon, source, release
|
|
68
|
+
FROM address_point
|
|
69
|
+
WHERE postcode = ? AND street_key = ?
|
|
70
|
+
AND number GLOB '[0-9]*' AND number NOT GLOB '*[^0-9]*'
|
|
71
|
+
AND CAST(number AS INTEGER) != ?`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
find(query) {
|
|
75
|
+
const streetKey = canonicalizeRouteKey(normalizeStreetForKey(query.street));
|
|
76
|
+
const numberRaw = query.number.trim();
|
|
77
|
+
if (!streetKey || !/^\d+$/.test(numberRaw))
|
|
78
|
+
return null;
|
|
79
|
+
const n = Number(numberRaw);
|
|
80
|
+
// No own table (empty shard) or no postcode → defer to the segment fallback rather than query.
|
|
81
|
+
if (!this.#byPostcode || !query.postcode)
|
|
82
|
+
return this.#fallback?.find(query) ?? null;
|
|
83
|
+
const rows = this.#byPostcode.all(query.postcode.trim(), streetKey, n);
|
|
84
|
+
const hit = rows.length >= 2 ? interpolateFromNeighbors(rows, n) : null;
|
|
85
|
+
return hit ?? this.#fallback?.find(query) ?? null;
|
|
86
|
+
}
|
|
87
|
+
close() {
|
|
88
|
+
if (this.#ownsDb)
|
|
89
|
+
this.#db.close();
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
/** Collapse rows to one centroid anchor per distinct house number, sorted ascending. */
|
|
93
|
+
function anchorsByNumber(rows) {
|
|
94
|
+
const byN = new Map();
|
|
95
|
+
for (const row of rows) {
|
|
96
|
+
const group = byN.get(row.n);
|
|
97
|
+
if (group)
|
|
98
|
+
group.push(row);
|
|
99
|
+
else
|
|
100
|
+
byN.set(row.n, [row]);
|
|
101
|
+
}
|
|
102
|
+
return [...byN.entries()]
|
|
103
|
+
.map(([n, group]) => ({
|
|
104
|
+
n,
|
|
105
|
+
lat: group.reduce((sum, r) => sum + r.lat, 0) / group.length,
|
|
106
|
+
lon: group.reduce((sum, r) => sum + r.lon, 0) / group.length,
|
|
107
|
+
source: group[0].source,
|
|
108
|
+
release: group[0].release,
|
|
109
|
+
}))
|
|
110
|
+
.sort((a, b) => a.n - b.n);
|
|
111
|
+
}
|
|
112
|
+
function interpolateFromNeighbors(rows, n) {
|
|
113
|
+
const anchors = anchorsByNumber(rows);
|
|
114
|
+
// Nearest known number below and above the query (the rows never contain n itself).
|
|
115
|
+
let below;
|
|
116
|
+
let above;
|
|
117
|
+
for (const anchor of anchors) {
|
|
118
|
+
if (anchor.n < n)
|
|
119
|
+
below = anchor;
|
|
120
|
+
else {
|
|
121
|
+
above = anchor;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
if (below && above) {
|
|
126
|
+
const t = (n - below.n) / (above.n - below.n);
|
|
127
|
+
const spanM = haversineKm(below.lat, below.lon, above.lat, above.lon) * 1000;
|
|
128
|
+
return {
|
|
129
|
+
lat: below.lat + (above.lat - below.lat) * t,
|
|
130
|
+
lon: below.lon + (above.lon - below.lon) * t,
|
|
131
|
+
interpolated: true,
|
|
132
|
+
method: "address_point",
|
|
133
|
+
bracket: "both",
|
|
134
|
+
uncertaintyM: Math.round(spanM / 2),
|
|
135
|
+
source: below.source,
|
|
136
|
+
release: below.release,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
// Single-sided: extrapolate along the two nearest known numbers on the populated side.
|
|
140
|
+
// `near` is the anchor closest to n, `far` the next one out; t > 1 by construction.
|
|
141
|
+
const side = below ? anchors.slice(-2) : anchors.slice(0, 2);
|
|
142
|
+
if (side.length < 2)
|
|
143
|
+
return null;
|
|
144
|
+
const [far, near] = below ? [side[0], side[1]] : [side[1], side[0]];
|
|
145
|
+
const t = (n - far.n) / (near.n - far.n);
|
|
146
|
+
if (t > MAX_EXTRAPOLATION_T)
|
|
147
|
+
return null;
|
|
148
|
+
const lat = far.lat + (near.lat - far.lat) * t;
|
|
149
|
+
const lon = far.lon + (near.lon - far.lon) * t;
|
|
150
|
+
const pairM = haversineKm(near.lat, near.lon, far.lat, far.lon) * 1000;
|
|
151
|
+
const overshootM = haversineKm(lat, lon, near.lat, near.lon) * 1000;
|
|
152
|
+
return {
|
|
153
|
+
lat,
|
|
154
|
+
lon,
|
|
155
|
+
interpolated: true,
|
|
156
|
+
method: "address_point",
|
|
157
|
+
bracket: "single",
|
|
158
|
+
// Explicitly larger than the both-sided radius: the whole pair span plus the overshoot.
|
|
159
|
+
uncertaintyM: Math.round(pairM + overshootM),
|
|
160
|
+
source: near.source,
|
|
161
|
+
release: near.release,
|
|
162
|
+
};
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=address-point-interpolation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-point-interpolation.js","sourceRoot":"","sources":["../address-point-interpolation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAI1C,OAAO,EAAE,WAAW,EAAE,MAAM,UAAU,CAAA;AAEtC,OAAO,EAAE,QAAQ,EAAE,MAAM,mBAAmB,CAAA;AAC5C,OAAO,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAA;AAEnF;;;GAGG;AACH,MAAM,mBAAmB,GAAG,CAAC,CAAA;AAmB7B,MAAM,OAAO,wBAAwB;IAC3B,GAAG,CAAc;IACjB,OAAO,CAAS;IAChB,SAAS,CAAgC;IACzC,WAAW,CAAiD;IAErE,YAAY,IAAiF;QAC5F,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,IAAI,CAAC,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAA;YACxB,IAAI,CAAC,OAAO,GAAG,KAAK,CAAA;QACrB,CAAC;aAAM,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACxB,IAAI,CAAC,GAAG,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAA;YAC5D,IAAI,CAAC,OAAO,GAAG,IAAI,CAAA;QACpB,CAAC;aAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,iEAAiE,CAAC,CAAA;QACnF,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAA;QAC9B,iGAAiG;QACjG,sFAAsF;QACtF,IAAI,QAAQ,CAAC,IAAI,CAAC,GAAG,EAAE,eAAe,CAAC,EAAE,CAAC;YACzC,uFAAuF;YACvF,yFAAyF;YACzF,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAClC;;;;sCAIkC,CAClC,CAAA;QACF,CAAC;IACF,CAAC;IAED,IAAI,CAAC,KAAyB;QAC7B,MAAM,SAAS,GAAG,oBAAoB,CAAC,qBAAqB,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAA;QAC3E,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,EAAE,CAAA;QACrC,IAAI,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,OAAO,IAAI,CAAA;QACvD,MAAM,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC,CAAA;QAE3B,+FAA+F;QAC/F,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,CAAC,KAAK,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;QAEpF,MAAM,IAAI,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,CAA0B,CAAA;QAC/F,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;QACvE,OAAO,GAAG,IAAI,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,IAAI,CAAA;IAClD,CAAC;IAED,KAAK;QACJ,IAAI,IAAI,CAAC,OAAO;YAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,CAAA;IACnC,CAAC;CACD;AAED,wFAAwF;AACxF,SAAS,eAAe,CAAC,IAAyB;IACjD,MAAM,GAAG,GAAG,IAAI,GAAG,EAAsB,CAAA;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;QAC5B,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;;YACrB,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAA;IAC3B,CAAC;IACD,OAAO,CAAC,GAAG,GAAG,CAAC,OAAO,EAAE,CAAC;SACvB,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QACrB,CAAC;QACD,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM;QAC5D,GAAG,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC,MAAM;QAC5D,MAAM,EAAE,KAAK,CAAC,CAAC,CAAE,CAAC,MAAM;QACxB,OAAO,EAAE,KAAK,CAAC,CAAC,CAAE,CAAC,OAAO;KAC1B,CAAC,CAAC;SACF,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;AAC5B,CAAC;AAED,SAAS,wBAAwB,CAAC,IAAyB,EAAE,CAAS;IACrE,MAAM,OAAO,GAAG,eAAe,CAAC,IAAI,CAAC,CAAA;IAErC,oFAAoF;IACpF,IAAI,KAA+B,CAAA;IACnC,IAAI,KAA+B,CAAA;IACnC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC9B,IAAI,MAAM,CAAC,CAAC,GAAG,CAAC;YAAE,KAAK,GAAG,MAAM,CAAA;aAC3B,CAAC;YACL,KAAK,GAAG,MAAM,CAAA;YACd,MAAK;QACN,CAAC;IACF,CAAC;IAED,IAAI,KAAK,IAAI,KAAK,EAAE,CAAC;QACpB,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;QAC7C,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,CAAC,GAAG,IAAI,CAAA;QAC5E,OAAO;YACN,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC;YAC5C,GAAG,EAAE,KAAK,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC;YAC5C,YAAY,EAAE,IAAI;YAClB,MAAM,EAAE,eAAe;YACvB,OAAO,EAAE,MAAM;YACf,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,CAAC,CAAC;YACnC,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,OAAO,EAAE,KAAK,CAAC,OAAO;SACtB,CAAA;IACF,CAAC;IAED,uFAAuF;IACvF,oFAAoF;IACpF,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;IAC5D,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IAChC,MAAM,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAE,EAAE,IAAI,CAAC,CAAC,CAAE,CAAC,CAAA;IACvE,MAAM,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAA;IACxC,IAAI,CAAC,GAAG,mBAAmB;QAAE,OAAO,IAAI,CAAA;IAExC,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC9C,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAC9C,MAAM,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,GAAG,IAAI,CAAA;IACtE,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAA;IACnE,OAAO;QACN,GAAG;QACH,GAAG;QACH,YAAY,EAAE,IAAI;QAClB,MAAM,EAAE,eAAe;QACvB,OAAO,EAAE,QAAQ;QACjB,wFAAwF;QACxF,YAAY,EAAE,IAAI,CAAC,KAAK,CAAC,KAAK,GAAG,UAAU,CAAC;QAC5C,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,OAAO,EAAE,IAAI,CAAC,OAAO;KACrB,CAAA;AACF,CAAC"}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed schema for the SITUS / rooftop ADDRESS-POINT shards (`address-points-<cc>-<slug>.db`, built
|
|
7
|
+
* by `scripts/build-address-point-shard.ts` — the #476/#567 national rooftop tier behind the
|
|
8
|
+
* demo's "type any US address, get the building"). Single source of truth for the columns shared
|
|
9
|
+
* by the BUILDER and the READER ({@link AddressPointSqliteLookup}), so a column rename in one is a
|
|
10
|
+
* compile error in the other.
|
|
11
|
+
*
|
|
12
|
+
* The builder's hot INSERT (tens of millions of rows per state) stays a POSITIONAL prepared
|
|
13
|
+
* statement for throughput — but its column list is derived from {@link ADDRESS_POINT_COLUMNS}
|
|
14
|
+
* here, and its table comes from {@link createAddressPointTable}, so the positional order can't
|
|
15
|
+
* silently drift from what the reader expects. (Same convention as the candidate build: typed
|
|
16
|
+
* schema guards the contract; positional inserts keep the speed.)
|
|
17
|
+
*/
|
|
18
|
+
import type { Kysely } from "kysely";
|
|
19
|
+
/**
|
|
20
|
+
* One rooftop address point. `(street_norm, number)` within a `postcode` (preferred) or
|
|
21
|
+
* `locality_norm` scope is the lookup; `street_key` is the #483 route-fold key for interpolation.
|
|
22
|
+
* Coordinates are non-null (the builder drops non-finite coords). `unit`/`postcode`/`locality_norm`
|
|
23
|
+
* are nullable (not every source carries all three).
|
|
24
|
+
*/
|
|
25
|
+
export interface AddressPointTable {
|
|
26
|
+
/** Shared {@link normalizeStreetForKey} of the street — the build/query-consistent probe key. */
|
|
27
|
+
street_norm: string;
|
|
28
|
+
/** `canonicalizeRouteKey(street_norm)` — the route-fold key (#483 Method 2). */
|
|
29
|
+
street_key: string;
|
|
30
|
+
/** House number, normalized lower-case (kept TEXT — "123-A", "12 1/2" must survive). */
|
|
31
|
+
number: string;
|
|
32
|
+
unit: string | null;
|
|
33
|
+
postcode: string | null;
|
|
34
|
+
/** Shared {@link normalizeLocalityForKey} of the locality — the fallback scope. */
|
|
35
|
+
locality_norm: string | null;
|
|
36
|
+
/** The street as it appeared in the source (kept for display / debugging). */
|
|
37
|
+
street_raw: string;
|
|
38
|
+
lat: number;
|
|
39
|
+
lon: number;
|
|
40
|
+
/** Provenance: the dataset this point came from (e.g. `overture:us`, `openaddresses`). */
|
|
41
|
+
source: string;
|
|
42
|
+
/** The pinned data release the point was ingested from. */
|
|
43
|
+
release: string;
|
|
44
|
+
}
|
|
45
|
+
/** The address-point database schema for `new DatabaseClient<AddressPointDatabase>(...)`. */
|
|
46
|
+
export interface AddressPointDatabase {
|
|
47
|
+
address_point: AddressPointTable;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* The `address_point` columns in INSERT order. The builder's positional prepared statement derives
|
|
51
|
+
* its placeholder list from this, so the positional order can't drift from the DDL / the reader.
|
|
52
|
+
*/
|
|
53
|
+
export declare const ADDRESS_POINT_COLUMNS: readonly ["street_norm", "street_key", "number", "unit", "postcode", "locality_norm", "street_raw", "lat", "lon", "source", "release"];
|
|
54
|
+
/** Create the `address_point` table — called before the streaming bulk load. */
|
|
55
|
+
export declare function createAddressPointTable(db: Kysely<AddressPointDatabase>): Promise<void>;
|
|
56
|
+
/** Create the three probe indexes the reader relies on (postcode-scope, locality-scope, route-key). */
|
|
57
|
+
export declare function createAddressPointIndexes(db: Kysely<AddressPointDatabase>): Promise<void>;
|
|
58
|
+
//# sourceMappingURL=address-point-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-point-schema.d.ts","sourceRoot":"","sources":["../address-point-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAA;AAEpC;;;;;GAKG;AACH,MAAM,WAAW,iBAAiB;IACjC,iGAAiG;IACjG,WAAW,EAAE,MAAM,CAAA;IACnB,gFAAgF;IAChF,UAAU,EAAE,MAAM,CAAA;IAClB,wFAAwF;IACxF,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE,MAAM,GAAG,IAAI,CAAA;IACnB,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,mFAAmF;IACnF,aAAa,EAAE,MAAM,GAAG,IAAI,CAAA;IAC5B,8EAA8E;IAC9E,UAAU,EAAE,MAAM,CAAA;IAClB,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,0FAA0F;IAC1F,MAAM,EAAE,MAAM,CAAA;IACd,2DAA2D;IAC3D,OAAO,EAAE,MAAM,CAAA;CACf;AAED,6FAA6F;AAC7F,MAAM,WAAW,oBAAoB;IACpC,aAAa,EAAE,iBAAiB,CAAA;CAChC;AAED;;;GAGG;AACH,eAAO,MAAM,qBAAqB,wIAYxB,CAAA;AAEV,gFAAgF;AAChF,wBAAsB,uBAAuB,CAAC,EAAE,EAAE,MAAM,CAAC,oBAAoB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAgB7F;AAED,uGAAuG;AACvG,wBAAsB,yBAAyB,CAAC,EAAE,EAAE,MAAM,CAAC,oBAAoB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAY/F"}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed schema for the SITUS / rooftop ADDRESS-POINT shards (`address-points-<cc>-<slug>.db`, built
|
|
7
|
+
* by `scripts/build-address-point-shard.ts` — the #476/#567 national rooftop tier behind the
|
|
8
|
+
* demo's "type any US address, get the building"). Single source of truth for the columns shared
|
|
9
|
+
* by the BUILDER and the READER ({@link AddressPointSqliteLookup}), so a column rename in one is a
|
|
10
|
+
* compile error in the other.
|
|
11
|
+
*
|
|
12
|
+
* The builder's hot INSERT (tens of millions of rows per state) stays a POSITIONAL prepared
|
|
13
|
+
* statement for throughput — but its column list is derived from {@link ADDRESS_POINT_COLUMNS}
|
|
14
|
+
* here, and its table comes from {@link createAddressPointTable}, so the positional order can't
|
|
15
|
+
* silently drift from what the reader expects. (Same convention as the candidate build: typed
|
|
16
|
+
* schema guards the contract; positional inserts keep the speed.)
|
|
17
|
+
*/
|
|
18
|
+
/**
|
|
19
|
+
* The `address_point` columns in INSERT order. The builder's positional prepared statement derives
|
|
20
|
+
* its placeholder list from this, so the positional order can't drift from the DDL / the reader.
|
|
21
|
+
*/
|
|
22
|
+
export const ADDRESS_POINT_COLUMNS = [
|
|
23
|
+
"street_norm",
|
|
24
|
+
"street_key",
|
|
25
|
+
"number",
|
|
26
|
+
"unit",
|
|
27
|
+
"postcode",
|
|
28
|
+
"locality_norm",
|
|
29
|
+
"street_raw",
|
|
30
|
+
"lat",
|
|
31
|
+
"lon",
|
|
32
|
+
"source",
|
|
33
|
+
"release",
|
|
34
|
+
];
|
|
35
|
+
/** Create the `address_point` table — called before the streaming bulk load. */
|
|
36
|
+
export async function createAddressPointTable(db) {
|
|
37
|
+
await db.schema
|
|
38
|
+
.createTable("address_point")
|
|
39
|
+
.addColumn("street_norm", "text", (c) => c.notNull())
|
|
40
|
+
// `street_key` = canonicalizeRouteKey(street_norm): the route-fold key (#483 Method 2).
|
|
41
|
+
.addColumn("street_key", "text", (c) => c.notNull())
|
|
42
|
+
.addColumn("number", "text", (c) => c.notNull())
|
|
43
|
+
.addColumn("unit", "text")
|
|
44
|
+
.addColumn("postcode", "text")
|
|
45
|
+
.addColumn("locality_norm", "text")
|
|
46
|
+
.addColumn("street_raw", "text", (c) => c.notNull())
|
|
47
|
+
.addColumn("lat", "real", (c) => c.notNull())
|
|
48
|
+
.addColumn("lon", "real", (c) => c.notNull())
|
|
49
|
+
.addColumn("source", "text", (c) => c.notNull())
|
|
50
|
+
.addColumn("release", "text", (c) => c.notNull())
|
|
51
|
+
.execute();
|
|
52
|
+
}
|
|
53
|
+
/** Create the three probe indexes the reader relies on (postcode-scope, locality-scope, route-key). */
|
|
54
|
+
export async function createAddressPointIndexes(db) {
|
|
55
|
+
await db.schema
|
|
56
|
+
.createIndex("idx_ap_postcode")
|
|
57
|
+
.on("address_point")
|
|
58
|
+
.columns(["postcode", "street_norm", "number"])
|
|
59
|
+
.execute();
|
|
60
|
+
await db.schema
|
|
61
|
+
.createIndex("idx_ap_locality")
|
|
62
|
+
.on("address_point")
|
|
63
|
+
.columns(["locality_norm", "street_norm", "number"])
|
|
64
|
+
.execute();
|
|
65
|
+
await db.schema.createIndex("idx_ap_streetkey").on("address_point").columns(["postcode", "street_key"]).execute();
|
|
66
|
+
}
|
|
67
|
+
//# sourceMappingURL=address-point-schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-point-schema.js","sourceRoot":"","sources":["../address-point-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAoCH;;;GAGG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG;IACpC,aAAa;IACb,YAAY;IACZ,QAAQ;IACR,MAAM;IACN,UAAU;IACV,eAAe;IACf,YAAY;IACZ,KAAK;IACL,KAAK;IACL,QAAQ;IACR,SAAS;CACA,CAAA;AAEV,gFAAgF;AAChF,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAAC,EAAgC;IAC7E,MAAM,EAAE,CAAC,MAAM;SACb,WAAW,CAAC,eAAe,CAAC;SAC5B,SAAS,CAAC,aAAa,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;QACrD,wFAAwF;SACvF,SAAS,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACnD,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC/C,SAAS,CAAC,MAAM,EAAE,MAAM,CAAC;SACzB,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;SAC7B,SAAS,CAAC,eAAe,EAAE,MAAM,CAAC;SAClC,SAAS,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACnD,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC5C,SAAS,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC5C,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC/C,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAChD,OAAO,EAAE,CAAA;AACZ,CAAC;AAED,uGAAuG;AACvG,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAAC,EAAgC;IAC/E,MAAM,EAAE,CAAC,MAAM;SACb,WAAW,CAAC,iBAAiB,CAAC;SAC9B,EAAE,CAAC,eAAe,CAAC;SACnB,OAAO,CAAC,CAAC,UAAU,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;SAC9C,OAAO,EAAE,CAAA;IACX,MAAM,EAAE,CAAC,MAAM;SACb,WAAW,CAAC,iBAAiB,CAAC;SAC9B,EAAE,CAAC,eAAe,CAAC;SACnB,OAAO,CAAC,CAAC,eAAe,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;SACnD,OAAO,EAAE,CAAA;IACX,MAAM,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,kBAAkB,CAAC,CAAC,EAAE,CAAC,eAAe,CAAC,CAAC,OAAO,CAAC,CAAC,UAAU,EAAE,YAAY,CAAC,CAAC,CAAC,OAAO,EAAE,CAAA;AAClH,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* SQLite implementation of core's `AddressPointLookup` (#476): exact `(street, number)` within a
|
|
7
|
+
* postcode (preferred) or locality scope, against a per-state shard built by
|
|
8
|
+
* `scripts/build-address-point-shard.ts`. Query-side normalization is THE shared normalizer
|
|
9
|
+
* (`street-normalize.ts`) — identical to build-side, by construction.
|
|
10
|
+
*
|
|
11
|
+
* Matching is exact-after-normalization only — no fuzzy street matching in this tier (measure how
|
|
12
|
+
* far exact gets first; fuzz is a later, separate decision). Postcode scope is attempted first
|
|
13
|
+
* (cheapest, most selective); locality scope is the fallback. Multiple hits (same number,
|
|
14
|
+
* units/duplicates) return the first by rowid — coordinates of unit siblings are the same
|
|
15
|
+
* building for tier purposes.
|
|
16
|
+
*/
|
|
17
|
+
import type { AddressPointHit, AddressPointLookup } from "@mailwoman/resolver";
|
|
18
|
+
export declare class AddressPointSqliteLookup implements AddressPointLookup {
|
|
19
|
+
#private;
|
|
20
|
+
constructor(dbPath: string);
|
|
21
|
+
find(query: {
|
|
22
|
+
street: string;
|
|
23
|
+
number: string;
|
|
24
|
+
postcode?: string;
|
|
25
|
+
locality?: string;
|
|
26
|
+
}): AddressPointHit | null;
|
|
27
|
+
close(): void;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=address-point.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"address-point.d.ts","sourceRoot":"","sources":["../address-point.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH,OAAO,KAAK,EAAE,eAAe,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAA;AAmB9E,qBAAa,wBAAyB,YAAW,kBAAkB;;gBAKtD,MAAM,EAAE,MAAM;IAgB1B,IAAI,CAAC,KAAK,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,GAAG,eAAe,GAAG,IAAI;IAmB7G,KAAK,IAAI,IAAI;CAGb"}
|