@mailwoman/resolver-wof-sqlite 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -0
- package/out/address-point-interpolation.d.ts +48 -0
- package/out/address-point-interpolation.d.ts.map +1 -0
- package/out/address-point-interpolation.js +164 -0
- package/out/address-point-interpolation.js.map +1 -0
- package/out/address-point-schema.d.ts +58 -0
- package/out/address-point-schema.d.ts.map +1 -0
- package/out/address-point-schema.js +67 -0
- package/out/address-point-schema.js.map +1 -0
- package/out/address-point.d.ts +29 -0
- package/out/address-point.d.ts.map +1 -0
- package/out/address-point.js +62 -0
- package/out/address-point.js.map +1 -0
- package/out/ancestry.d.ts +40 -0
- package/out/ancestry.d.ts.map +1 -0
- package/out/ancestry.js +53 -0
- package/out/ancestry.js.map +1 -0
- package/out/build-candidate-cli.d.ts +16 -0
- package/out/build-candidate-cli.d.ts.map +1 -0
- package/out/build-candidate-cli.js +80 -0
- package/out/build-candidate-cli.js.map +1 -0
- package/out/build-candidate.d.ts +54 -0
- package/out/build-candidate.d.ts.map +1 -0
- package/out/build-candidate.js +230 -0
- package/out/build-candidate.js.map +1 -0
- package/out/build-coincident-roles-cli.d.ts +16 -0
- package/out/build-coincident-roles-cli.d.ts.map +1 -0
- package/out/build-coincident-roles-cli.js +94 -0
- package/out/build-coincident-roles-cli.js.map +1 -0
- package/out/build-fts-cli.d.ts +23 -0
- package/out/build-fts-cli.d.ts.map +1 -0
- package/out/build-fts-cli.js +117 -0
- package/out/build-fts-cli.js.map +1 -0
- package/out/build-slim-cli.d.ts +14 -0
- package/out/build-slim-cli.d.ts.map +1 -0
- package/out/build-slim-cli.js +130 -0
- package/out/build-slim-cli.js.map +1 -0
- package/out/build-slim.d.ts +71 -0
- package/out/build-slim.d.ts.map +1 -0
- package/out/build-slim.js +267 -0
- package/out/build-slim.js.map +1 -0
- package/out/candidate-lookup.d.ts +43 -0
- package/out/candidate-lookup.d.ts.map +1 -0
- package/out/candidate-lookup.js +191 -0
- package/out/candidate-lookup.js.map +1 -0
- package/out/candidate-schema.d.ts +86 -0
- package/out/candidate-schema.d.ts.map +1 -0
- package/out/candidate-schema.js +109 -0
- package/out/candidate-schema.js.map +1 -0
- package/out/coincident-roles.d.ts +86 -0
- package/out/coincident-roles.d.ts.map +1 -0
- package/out/coincident-roles.js +160 -0
- package/out/coincident-roles.js.map +1 -0
- package/out/convention.d.ts +109 -0
- package/out/convention.d.ts.map +1 -0
- package/out/convention.js +94 -0
- package/out/convention.js.map +1 -0
- package/out/fst-autocomplete.d.ts +49 -0
- package/out/fst-autocomplete.d.ts.map +1 -0
- package/out/fst-autocomplete.js +124 -0
- package/out/fst-autocomplete.js.map +1 -0
- package/out/fst-builder.d.ts +20 -0
- package/out/fst-builder.d.ts.map +1 -0
- package/out/fst-builder.js +219 -0
- package/out/fst-builder.js.map +1 -0
- package/out/fst-deserialize-web.d.ts +16 -0
- package/out/fst-deserialize-web.d.ts.map +1 -0
- package/out/fst-deserialize-web.js +133 -0
- package/out/fst-deserialize-web.js.map +1 -0
- package/out/fst-matcher.d.ts +33 -0
- package/out/fst-matcher.d.ts.map +1 -0
- package/out/fst-matcher.js +117 -0
- package/out/fst-matcher.js.map +1 -0
- package/out/fst-serialize.d.ts +30 -0
- package/out/fst-serialize.d.ts.map +1 -0
- package/out/fst-serialize.js +261 -0
- package/out/fst-serialize.js.map +1 -0
- package/out/fst-types.d.ts +60 -0
- package/out/fst-types.d.ts.map +1 -0
- package/out/fst-types.js +11 -0
- package/out/fst-types.js.map +1 -0
- package/out/fts.d.ts +158 -0
- package/out/fts.d.ts.map +1 -0
- package/out/fts.js +261 -0
- package/out/fts.js.map +1 -0
- package/out/geo.d.ts +74 -0
- package/out/geo.d.ts.map +1 -0
- package/out/geo.js +88 -0
- package/out/geo.js.map +1 -0
- package/out/index.d.ts +27 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +22 -0
- package/out/index.js.map +1 -0
- package/out/interpolation.d.ts +84 -0
- package/out/interpolation.d.ts.map +1 -0
- package/out/interpolation.js +150 -0
- package/out/interpolation.js.map +1 -0
- package/out/lookup.d.ts +156 -0
- package/out/lookup.d.ts.map +1 -0
- package/out/lookup.js +876 -0
- package/out/lookup.js.map +1 -0
- package/out/postal-city-alias-lookup.d.ts +50 -0
- package/out/postal-city-alias-lookup.d.ts.map +1 -0
- package/out/postal-city-alias-lookup.js +66 -0
- package/out/postal-city-alias-lookup.js.map +1 -0
- package/out/postal-city-alias-schema.d.ts +51 -0
- package/out/postal-city-alias-schema.d.ts.map +1 -0
- package/out/postal-city-alias-schema.js +47 -0
- package/out/postal-city-alias-schema.js.map +1 -0
- package/out/postal-city-candidate-schema.d.ts +58 -0
- package/out/postal-city-candidate-schema.d.ts.map +1 -0
- package/out/postal-city-candidate-schema.js +56 -0
- package/out/postal-city-candidate-schema.js.map +1 -0
- package/out/postcode-point-lookup.d.ts +38 -0
- package/out/postcode-point-lookup.d.ts.map +1 -0
- package/out/postcode-point-lookup.js +46 -0
- package/out/postcode-point-lookup.js.map +1 -0
- package/out/reverse.d.ts +99 -0
- package/out/reverse.d.ts.map +1 -0
- package/out/reverse.js +290 -0
- package/out/reverse.js.map +1 -0
- package/out/schema.d.ts +163 -0
- package/out/schema.d.ts.map +1 -0
- package/out/schema.js +18 -0
- package/out/schema.js.map +1 -0
- package/out/sharding.d.ts +96 -0
- package/out/sharding.d.ts.map +1 -0
- package/out/sharding.js +129 -0
- package/out/sharding.js.map +1 -0
- package/out/sqlite-convention-source.d.ts +29 -0
- package/out/sqlite-convention-source.d.ts.map +1 -0
- package/out/sqlite-convention-source.js +53 -0
- package/out/sqlite-convention-source.js.map +1 -0
- package/out/sqlite-utils.d.ts +17 -0
- package/out/sqlite-utils.d.ts.map +1 -0
- package/out/sqlite-utils.js +24 -0
- package/out/sqlite-utils.js.map +1 -0
- package/out/street-morphology-fst-builder.d.ts +59 -0
- package/out/street-morphology-fst-builder.d.ts.map +1 -0
- package/out/street-morphology-fst-builder.js +174 -0
- package/out/street-morphology-fst-builder.js.map +1 -0
- package/out/street-normalize.d.ts +66 -0
- package/out/street-normalize.d.ts.map +1 -0
- package/out/street-normalize.js +176 -0
- package/out/street-normalize.js.map +1 -0
- package/out/street-segment-schema.d.ts +61 -0
- package/out/street-segment-schema.d.ts.map +1 -0
- package/out/street-segment-schema.js +64 -0
- package/out/street-segment-schema.js.map +1 -0
- package/out/types.d.ts +137 -0
- package/out/types.d.ts.map +1 -0
- package/out/types.js +13 -0
- package/out/types.js.map +1 -0
- package/out/unified-schema.d.ts +25 -0
- package/out/unified-schema.d.ts.map +1 -0
- package/out/unified-schema.js +142 -0
- package/out/unified-schema.js.map +1 -0
- package/package.json +54 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* THE street normalizer for the address-point tier (#476). One function, used by BOTH the shard
|
|
7
|
+
* builder (`scripts/build-address-point-shard.ts`) and the lookup tier (`address-point.ts`) —
|
|
8
|
+
* never two implementations (the PLACETYPE_ORDER lesson: parallel copies silently corrupt).
|
|
9
|
+
*
|
|
10
|
+
* Normalization contract (deliberately aggressive — both sides apply the same function, so
|
|
11
|
+
* collisions only need to be _consistent_, not linguistically perfect):
|
|
12
|
+
*
|
|
13
|
+
* 1. Lowercase, NFKD-fold diacritics, collapse whitespace, strip punctuation (periods, commas,
|
|
14
|
+
* apostrophes).
|
|
15
|
+
* 2. Expand USPS directional abbreviations at the FIRST and LAST token position (`n` → `north`, `se` →
|
|
16
|
+
* `southeast`) — Overture sources abbreviate inconsistently.
|
|
17
|
+
* 3. Canonicalize a trailing USPS street-type token via the codex suffix table to its canonical full
|
|
18
|
+
* form (`st`/`str`/`street` → `street`).
|
|
19
|
+
*
|
|
20
|
+
* Numbered streets are left as digits (`5th` stays `5th`); a SPELLED ordinal before a street suffix
|
|
21
|
+
* folds to its digit form (`tenth street` → `10th street`, #723) so the grid-city ordinal
|
|
22
|
+
* cross-streets the source data spells with digits become reachable.
|
|
23
|
+
*/
|
|
24
|
+
import { AbbreviationToDirectional, US_STREET_SUFFIX_LOOKUP } from "@mailwoman/codex/us";
|
|
25
|
+
/**
|
|
26
|
+
* Spelled ordinal street names → their digit-ordinal form ("tenth" → "10th"), applied ONLY when a
|
|
27
|
+
* street-type suffix follows (#723 admin-tail) — so the ordinal cross-streets common in grid cities
|
|
28
|
+
* ("Tenth Street", "Fifth Avenue") match the shards' digit keys, WITHOUT rewriting ordinal-WORD
|
|
29
|
+
* names where the next token is not a suffix ("First National Bank Rd" stays "first national …").
|
|
30
|
+
* Digit-source shards are unaffected (a digit token isn't in this map), so the existing keys need
|
|
31
|
+
* no rebuild; a future rebuild folds any spelled-source key the same way (the one-function
|
|
32
|
+
* discipline).
|
|
33
|
+
*/
|
|
34
|
+
const SPELLED_ORDINAL_TO_DIGIT = new Map([
|
|
35
|
+
["first", "1st"],
|
|
36
|
+
["second", "2nd"],
|
|
37
|
+
["third", "3rd"],
|
|
38
|
+
["fourth", "4th"],
|
|
39
|
+
["fifth", "5th"],
|
|
40
|
+
["sixth", "6th"],
|
|
41
|
+
["seventh", "7th"],
|
|
42
|
+
["eighth", "8th"],
|
|
43
|
+
["ninth", "9th"],
|
|
44
|
+
["tenth", "10th"],
|
|
45
|
+
["eleventh", "11th"],
|
|
46
|
+
["twelfth", "12th"],
|
|
47
|
+
["thirteenth", "13th"],
|
|
48
|
+
["fourteenth", "14th"],
|
|
49
|
+
["fifteenth", "15th"],
|
|
50
|
+
["sixteenth", "16th"],
|
|
51
|
+
["seventeenth", "17th"],
|
|
52
|
+
["eighteenth", "18th"],
|
|
53
|
+
["nineteenth", "19th"],
|
|
54
|
+
["twentieth", "20th"],
|
|
55
|
+
["thirtieth", "30th"],
|
|
56
|
+
["fortieth", "40th"],
|
|
57
|
+
["fiftieth", "50th"],
|
|
58
|
+
["sixtieth", "60th"],
|
|
59
|
+
["seventieth", "70th"],
|
|
60
|
+
["eightieth", "80th"],
|
|
61
|
+
["ninetieth", "90th"],
|
|
62
|
+
["hundredth", "100th"],
|
|
63
|
+
]);
|
|
64
|
+
/** Lowercase + diacritic-fold + punctuation strip + whitespace collapse. */
|
|
65
|
+
function fold(input) {
|
|
66
|
+
return input
|
|
67
|
+
.normalize("NFKD")
|
|
68
|
+
.replace(/[̀-ͯ]/g, "")
|
|
69
|
+
.toLowerCase()
|
|
70
|
+
.replace(/[.,'’]/g, "")
|
|
71
|
+
.replace(/\s+/g, " ")
|
|
72
|
+
.trim();
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Normalize a street name for address-point keying. Same function at build time and lookup time —
|
|
76
|
+
* see module docstring for the contract.
|
|
77
|
+
*/
|
|
78
|
+
export function normalizeStreetForKey(street) {
|
|
79
|
+
const tokens = fold(street).split(" ");
|
|
80
|
+
if (tokens.length === 0)
|
|
81
|
+
return "";
|
|
82
|
+
// Spelled-ordinal street names → digit form when a street suffix follows ("Tenth Street" →
|
|
83
|
+
// "10th street", #723). Gated on the next token being a suffix so ordinal-WORD names are untouched.
|
|
84
|
+
for (let i = 0; i < tokens.length - 1; i++) {
|
|
85
|
+
const digit = SPELLED_ORDINAL_TO_DIGIT.get(tokens[i]);
|
|
86
|
+
if (digit && US_STREET_SUFFIX_LOOKUP.has(tokens[i + 1]))
|
|
87
|
+
tokens[i] = digit;
|
|
88
|
+
}
|
|
89
|
+
// Directional expansion at the edges only ("N Main St" / "Main St N" — never interior
|
|
90
|
+
// tokens, where "W" may be an initial in a person-named street). The codex expands
|
|
91
|
+
// compounds to two words ("SE" → "SOUTH EAST"); we key on the spaceless form
|
|
92
|
+
// ("southeast"), and also merge an already-written two-token pair ("South East …").
|
|
93
|
+
const edgeDirectional = (raw) => AbbreviationToDirectional.get(raw.toUpperCase())?.toLowerCase().replace(" ", "");
|
|
94
|
+
const mergePair = (a, b) => a && b && /^(north|south)$/.test(a) && /^(east|west)$/.test(b) ? a + b : undefined;
|
|
95
|
+
const leadPair = mergePair(tokens[0], tokens[1]);
|
|
96
|
+
if (leadPair && tokens.length > 2)
|
|
97
|
+
tokens.splice(0, 2, leadPair);
|
|
98
|
+
const first = edgeDirectional(tokens[0]);
|
|
99
|
+
if (first && tokens.length > 1)
|
|
100
|
+
tokens[0] = first;
|
|
101
|
+
const tailPair = mergePair(tokens[tokens.length - 2], tokens[tokens.length - 1]);
|
|
102
|
+
if (tailPair && tokens.length > 3)
|
|
103
|
+
tokens.splice(tokens.length - 2, 2, tailPair);
|
|
104
|
+
if (tokens.length > 2) {
|
|
105
|
+
const last = edgeDirectional(tokens[tokens.length - 1]);
|
|
106
|
+
if (last)
|
|
107
|
+
tokens[tokens.length - 1] = last;
|
|
108
|
+
}
|
|
109
|
+
// Street-type canonicalization via the codex table (lowercase keys, UPPER canonical
|
|
110
|
+
// values). The suffix is usually the last token, but sits second-to-last when a trailing
|
|
111
|
+
// directional follows ("Main St N") — check both positions, canonicalize the first hit.
|
|
112
|
+
for (const at of [tokens.length - 1, tokens.length - 2]) {
|
|
113
|
+
if (at < 1)
|
|
114
|
+
continue; // never canonicalize the only/first token ("Street Road" exists)
|
|
115
|
+
const canonical = US_STREET_SUFFIX_LOOKUP.get(tokens[at]);
|
|
116
|
+
if (canonical) {
|
|
117
|
+
tokens[at] = canonical.toLowerCase();
|
|
118
|
+
break;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return tokens.join(" ");
|
|
122
|
+
}
|
|
123
|
+
/** Normalize a locality name for address-point keying (fold only — no street semantics). */
|
|
124
|
+
export function normalizeLocalityForKey(locality) {
|
|
125
|
+
return fold(locality);
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Strip a locality QUALIFIER for a query-side fallback — when an OA locality's exact normalized
|
|
129
|
+
* name misses the gazetteer's canonical name, retry with the qualifier removed. OA address data
|
|
130
|
+
* carries disambiguating qualifiers the gazetteer's canonical name omits: Austrian `Kraubath/Mur`
|
|
131
|
+
* and `Hart b.Graz` → `Hart`; Swiss `Lenk im Simmental` → `Lenk`, `Roche VD` → `Roche`; Danish
|
|
132
|
+
* `Odense S`, `Hurup Thy`. A FALLBACK ONLY — the exact name is tried first, and the region-bbox
|
|
133
|
+
* disambiguation resolves any base-name ambiguity downstream. The candidate table is unchanged
|
|
134
|
+
* (this is purely query-side); feed the result back through {@link normalizeLocalityForKey}. Returns
|
|
135
|
+
* "" when nothing was stripped (no point re-probing the identical key).
|
|
136
|
+
*
|
|
137
|
+
* Measured (`scripts/eval/candidate-recall.ts --strip-fallback`, EU OA holdouts): recovers AT
|
|
138
|
+
* 74.1→88.2% (+14.1pp), DK 91.5→96.2%, CH 90.4→92.6%; +1.3pp overall (diluted by the already-100%
|
|
139
|
+
* locales). Conservative by design — only the qualifier forms above; FI/PT/SI misses are
|
|
140
|
+
* untouched.
|
|
141
|
+
*/
|
|
142
|
+
export function stripLocalityQualifier(locality) {
|
|
143
|
+
let s = locality.trim();
|
|
144
|
+
if (s.includes("/"))
|
|
145
|
+
s = s.split("/")[0].trim(); // "Kraubath/Mur", "St.Kanzian/Klopeiner See"
|
|
146
|
+
s = s.replace(/\s+[a-zà-ÿ]\.\s*\S.*$/iu, ""); // abbreviated " b.Graz" / " o.Bleiburg" / " a.d. …"
|
|
147
|
+
s = s.replace(/\s+(im|an der|ob|bei|in der|unter|vor)\s+\S.*$/iu, ""); // " im Simmental", " bei Graz"
|
|
148
|
+
s = s.replace(/\s+(S|N|E|W|V|Ø|Sø|Fyn|Thy|Sjælland|Jylland|[A-ZÅÄÖ]{2})$/u, ""); // " S", " VD", " Thy"
|
|
149
|
+
s = s.trim();
|
|
150
|
+
return s === locality.trim() ? "" : s;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Fold numbered-route designators to a canonical key, applied AFTER {@link normalizeStreetForKey}.
|
|
154
|
+
* Sources disagree systematically on how they spell a route: TIGER says `State Rte 100` / `US Hwy
|
|
155
|
+
* 5` where E911/Overture say `VT ROUTE 100` / `US ROUTE 5` — the dominant street-name miss class in
|
|
156
|
+
* the #483 interpolation eval (rural addresses live on routes). `us <designator> N…` folds to `us
|
|
157
|
+
* route N…`; `state <designator> N…` and `<2-letter-prefix> <designator> N…` (the state
|
|
158
|
+
* abbreviation form) fold to `state route N…`. Only digit-leading route numbers fold — `State
|
|
159
|
+
* Street` and friends never match.
|
|
160
|
+
*
|
|
161
|
+
* Used by BOTH the segment-shard builder (`scripts/build-interpolation-shard.ts`) and the
|
|
162
|
+
* interpolation lookup — same one-function discipline as {@link normalizeStreetForKey}. The
|
|
163
|
+
* address-point tier (#476) does NOT apply it yet: adopting it there requires a shard rebuild
|
|
164
|
+
* (noted on #483).
|
|
165
|
+
*
|
|
166
|
+
* A same-numbered US and state route stay DISTINCT keys (`us route 5` vs `state route 5`); only the
|
|
167
|
+
* BARE `route N` form is ambiguous (designator unknown) and it stays unfolded — a bare-route query
|
|
168
|
+
* therefore misses rather than guessing a designator.
|
|
169
|
+
*/
|
|
170
|
+
export function canonicalizeRouteKey(streetNorm) {
|
|
171
|
+
const match = /^(us|state|[a-z]{2}) (?:route|rte|rt|highway|hwy) (\d.*)$/.exec(streetNorm);
|
|
172
|
+
if (!match)
|
|
173
|
+
return streetNorm;
|
|
174
|
+
return `${match[1] === "us" ? "us" : "state"} route ${match[2]}`;
|
|
175
|
+
}
|
|
176
|
+
//# sourceMappingURL=street-normalize.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-normalize.js","sourceRoot":"","sources":["../street-normalize.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,yBAAyB,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAA;AAExF;;;;;;;;GAQG;AACH,MAAM,wBAAwB,GAAG,IAAI,GAAG,CAAiB;IACxD,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,QAAQ,EAAE,KAAK,CAAC;IACjB,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,QAAQ,EAAE,KAAK,CAAC;IACjB,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,SAAS,EAAE,KAAK,CAAC;IAClB,CAAC,QAAQ,EAAE,KAAK,CAAC;IACjB,CAAC,OAAO,EAAE,KAAK,CAAC;IAChB,CAAC,OAAO,EAAE,MAAM,CAAC;IACjB,CAAC,UAAU,EAAE,MAAM,CAAC;IACpB,CAAC,SAAS,EAAE,MAAM,CAAC;IACnB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,aAAa,EAAE,MAAM,CAAC;IACvB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,UAAU,EAAE,MAAM,CAAC;IACpB,CAAC,UAAU,EAAE,MAAM,CAAC;IACpB,CAAC,UAAU,EAAE,MAAM,CAAC;IACpB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,WAAW,EAAE,MAAM,CAAC;IACrB,CAAC,WAAW,EAAE,OAAO,CAAC;CACtB,CAAC,CAAA;AAEF,4EAA4E;AAC5E,SAAS,IAAI,CAAC,KAAa;IAC1B,OAAO,KAAK;SACV,SAAS,CAAC,MAAM,CAAC;SACjB,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;SACrB,WAAW,EAAE;SACb,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC;SACtB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAA;AACT,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,qBAAqB,CAAC,MAAc;IACnD,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACtC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IAElC,2FAA2F;IAC3F,oGAAoG;IACpG,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,wBAAwB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;QACtD,IAAI,KAAK,IAAI,uBAAuB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC;YAAE,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,CAAA;IAC5E,CAAC;IAED,sFAAsF;IACtF,mFAAmF;IACnF,6EAA6E;IAC7E,oFAAoF;IACpF,MAAM,eAAe,GAAG,CAAC,GAAW,EAAE,EAAE,CACvC,yBAAyB,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,EAAE,WAAW,EAAE,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA;IACjF,MAAM,SAAS,GAAG,CAAC,CAAU,EAAE,CAAU,EAAE,EAAE,CAC5C,CAAC,IAAI,CAAC,IAAI,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;IAEnF,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAChD,IAAI,QAAQ,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAA;IAChE,MAAM,KAAK,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;IACzC,IAAI,KAAK,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,CAAC,CAAC,CAAC,GAAG,KAAK,CAAA;IAEjD,MAAM,QAAQ,GAAG,SAAS,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAA;IAChF,IAAI,QAAQ,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;QAAE,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAA;IAChF,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,eAAe,CAAC,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAE,CAAC,CAAA;QACxD,IAAI,IAAI;YAAE,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAA;IAC3C,CAAC;IAED,oFAAoF;IACpF,yFAAyF;IACzF,wFAAwF;IACxF,KAAK,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;QACzD,IAAI,EAAE,GAAG,CAAC;YAAE,SAAQ,CAAC,iEAAiE;QACtF,MAAM,SAAS,GAAG,uBAAuB,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAE,CAAC,CAAA;QAC1D,IAAI,SAAS,EAAE,CAAC;YACf,MAAM,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,WAAW,EAAE,CAAA;YACpC,MAAK;QACN,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACxB,CAAC;AAED,4FAA4F;AAC5F,MAAM,UAAU,uBAAuB,CAAC,QAAgB;IACvD,OAAO,IAAI,CAAC,QAAQ,CAAC,CAAA;AACtB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACtD,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,CAAA;IACvB,IAAI,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAA,CAAC,6CAA6C;IAC9F,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC,CAAA,CAAC,oDAAoD;IACjG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,kDAAkD,EAAE,EAAE,CAAC,CAAA,CAAC,+BAA+B;IACrG,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,4DAA4D,EAAE,EAAE,CAAC,CAAA,CAAC,sBAAsB;IACtG,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACZ,OAAO,CAAC,KAAK,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;AACtC,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,UAAU,oBAAoB,CAAC,UAAkB;IACtD,MAAM,KAAK,GAAG,2DAA2D,CAAC,IAAI,CAAC,UAAU,CAAC,CAAA;IAC1F,IAAI,CAAC,KAAK;QAAE,OAAO,UAAU,CAAA;IAC7B,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,UAAU,KAAK,CAAC,CAAC,CAAC,EAAE,CAAA;AACjE,CAAC"}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed schema for the TIGER STREET-SEGMENT interpolation shards (`street-segments-<cc>-<st>.db`,
|
|
7
|
+
* built by `scripts/build-interpolation-shard.ts` from TIGER EDGES) — the #483 Method-3 fallback
|
|
8
|
+
* the resolver drops to when the address-point tier (Method 2) can't bracket. Single source of
|
|
9
|
+
* truth for the columns the BUILDER writes and the READER ({@link StreetInterpolator}) probes, so
|
|
10
|
+
* a column rename in one is a compile error in the other.
|
|
11
|
+
*
|
|
12
|
+
* The builder reads geometry from shapefiles via DuckDB's spatial extension (raw `ST_Read` — see
|
|
13
|
+
* AGENTS.md "Database / inline SQL") and writes here through `node:sqlite`. The hot positional
|
|
14
|
+
* INSERT (a county's worth of edges) stays raw; its column list is derived from
|
|
15
|
+
* {@link STREET_SEGMENT_COLUMNS} so it can't drift from the DDL.
|
|
16
|
+
*/
|
|
17
|
+
import type { Kysely } from "kysely";
|
|
18
|
+
/**
|
|
19
|
+
* One TIGER street-segment edge: a `(from_hn, to_hn)` house-number range on one `side` of a named
|
|
20
|
+
* street, with the geometry the interpolator walks. `min_hn`/`max_hn` are the sorted bounds (the
|
|
21
|
+
* probe filters on them); `parity` is `odd`/`even`/`mixed`.
|
|
22
|
+
*/
|
|
23
|
+
export interface StreetSegmentTable {
|
|
24
|
+
/** Shared {@link normalizeStreetForKey} of the street — the build/query-consistent probe key. */
|
|
25
|
+
street_norm: string;
|
|
26
|
+
/** `L` or `R` — the TIGER side the address range sits on. */
|
|
27
|
+
side: string;
|
|
28
|
+
from_hn: number;
|
|
29
|
+
to_hn: number;
|
|
30
|
+
/** Sorted lower bound of `(from_hn, to_hn)` — the probe filters `min_hn <= n <= max_hn`. */
|
|
31
|
+
min_hn: number;
|
|
32
|
+
/** Sorted upper bound of `(from_hn, to_hn)`. */
|
|
33
|
+
max_hn: number;
|
|
34
|
+
/** `odd` | `even` | `mixed` — the house-number parity along the range. */
|
|
35
|
+
parity: string;
|
|
36
|
+
postcode: string | null;
|
|
37
|
+
/** 5-digit state+county FIPS the edge came from. */
|
|
38
|
+
county_fips: string;
|
|
39
|
+
/** The street as it appeared in TIGER (kept for display / debugging). */
|
|
40
|
+
street_raw: string;
|
|
41
|
+
/** GeoJSON LineString text (no SpatiaLite — read back with `JSON.parse`). */
|
|
42
|
+
geometry: string;
|
|
43
|
+
/** Provenance: the dataset this edge came from (e.g. `tiger:edges`). */
|
|
44
|
+
source: string;
|
|
45
|
+
/** The pinned TIGER release the edge was ingested from. */
|
|
46
|
+
release: string;
|
|
47
|
+
}
|
|
48
|
+
/** The street-segment database schema for `new DatabaseClient<StreetSegmentDatabase>(...)`. */
|
|
49
|
+
export interface StreetSegmentDatabase {
|
|
50
|
+
street_segment: StreetSegmentTable;
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* The `street_segment` columns in INSERT order. The builder's positional prepared statement derives
|
|
54
|
+
* its placeholder list from this, so the positional order can't drift from the DDL / the reader.
|
|
55
|
+
*/
|
|
56
|
+
export declare const STREET_SEGMENT_COLUMNS: readonly ["street_norm", "side", "from_hn", "to_hn", "min_hn", "max_hn", "parity", "postcode", "county_fips", "street_raw", "geometry", "source", "release"];
|
|
57
|
+
/** Create the `street_segment` table — called before the streaming bulk load. */
|
|
58
|
+
export declare function createStreetSegmentTable(db: Kysely<StreetSegmentDatabase>): Promise<void>;
|
|
59
|
+
/** Create the two probe indexes the reader relies on (postcode-scope, street-scope). */
|
|
60
|
+
export declare function createStreetSegmentIndexes(db: Kysely<StreetSegmentDatabase>): Promise<void>;
|
|
61
|
+
//# sourceMappingURL=street-segment-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-segment-schema.d.ts","sourceRoot":"","sources":["../street-segment-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAA;AAEpC;;;;GAIG;AACH,MAAM,WAAW,kBAAkB;IAClC,iGAAiG;IACjG,WAAW,EAAE,MAAM,CAAA;IACnB,6DAA6D;IAC7D,IAAI,EAAE,MAAM,CAAA;IACZ,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,4FAA4F;IAC5F,MAAM,EAAE,MAAM,CAAA;IACd,gDAAgD;IAChD,MAAM,EAAE,MAAM,CAAA;IACd,0EAA0E;IAC1E,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAA;IACvB,oDAAoD;IACpD,WAAW,EAAE,MAAM,CAAA;IACnB,yEAAyE;IACzE,UAAU,EAAE,MAAM,CAAA;IAClB,6EAA6E;IAC7E,QAAQ,EAAE,MAAM,CAAA;IAChB,wEAAwE;IACxE,MAAM,EAAE,MAAM,CAAA;IACd,2DAA2D;IAC3D,OAAO,EAAE,MAAM,CAAA;CACf;AAED,+FAA+F;AAC/F,MAAM,WAAW,qBAAqB;IACrC,cAAc,EAAE,kBAAkB,CAAA;CAClC;AAED;;;GAGG;AACH,eAAO,MAAM,sBAAsB,8JAczB,CAAA;AAEV,iFAAiF;AACjF,wBAAsB,wBAAwB,CAAC,EAAE,EAAE,MAAM,CAAC,qBAAqB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAiB/F;AAED,wFAAwF;AACxF,wBAAsB,0BAA0B,CAAC,EAAE,EAAE,MAAM,CAAC,qBAAqB,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAOjG"}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Typed schema for the TIGER STREET-SEGMENT interpolation shards (`street-segments-<cc>-<st>.db`,
|
|
7
|
+
* built by `scripts/build-interpolation-shard.ts` from TIGER EDGES) — the #483 Method-3 fallback
|
|
8
|
+
* the resolver drops to when the address-point tier (Method 2) can't bracket. Single source of
|
|
9
|
+
* truth for the columns the BUILDER writes and the READER ({@link StreetInterpolator}) probes, so
|
|
10
|
+
* a column rename in one is a compile error in the other.
|
|
11
|
+
*
|
|
12
|
+
* The builder reads geometry from shapefiles via DuckDB's spatial extension (raw `ST_Read` — see
|
|
13
|
+
* AGENTS.md "Database / inline SQL") and writes here through `node:sqlite`. The hot positional
|
|
14
|
+
* INSERT (a county's worth of edges) stays raw; its column list is derived from
|
|
15
|
+
* {@link STREET_SEGMENT_COLUMNS} so it can't drift from the DDL.
|
|
16
|
+
*/
|
|
17
|
+
/**
|
|
18
|
+
* The `street_segment` columns in INSERT order. The builder's positional prepared statement derives
|
|
19
|
+
* its placeholder list from this, so the positional order can't drift from the DDL / the reader.
|
|
20
|
+
*/
|
|
21
|
+
export const STREET_SEGMENT_COLUMNS = [
|
|
22
|
+
"street_norm",
|
|
23
|
+
"side",
|
|
24
|
+
"from_hn",
|
|
25
|
+
"to_hn",
|
|
26
|
+
"min_hn",
|
|
27
|
+
"max_hn",
|
|
28
|
+
"parity",
|
|
29
|
+
"postcode",
|
|
30
|
+
"county_fips",
|
|
31
|
+
"street_raw",
|
|
32
|
+
"geometry",
|
|
33
|
+
"source",
|
|
34
|
+
"release",
|
|
35
|
+
];
|
|
36
|
+
/** Create the `street_segment` table — called before the streaming bulk load. */
|
|
37
|
+
export async function createStreetSegmentTable(db) {
|
|
38
|
+
await db.schema
|
|
39
|
+
.createTable("street_segment")
|
|
40
|
+
.addColumn("street_norm", "text", (c) => c.notNull())
|
|
41
|
+
.addColumn("side", "text", (c) => c.notNull())
|
|
42
|
+
.addColumn("from_hn", "integer", (c) => c.notNull())
|
|
43
|
+
.addColumn("to_hn", "integer", (c) => c.notNull())
|
|
44
|
+
.addColumn("min_hn", "integer", (c) => c.notNull())
|
|
45
|
+
.addColumn("max_hn", "integer", (c) => c.notNull())
|
|
46
|
+
.addColumn("parity", "text", (c) => c.notNull())
|
|
47
|
+
.addColumn("postcode", "text")
|
|
48
|
+
.addColumn("county_fips", "text", (c) => c.notNull())
|
|
49
|
+
.addColumn("street_raw", "text", (c) => c.notNull())
|
|
50
|
+
.addColumn("geometry", "text", (c) => c.notNull())
|
|
51
|
+
.addColumn("source", "text", (c) => c.notNull())
|
|
52
|
+
.addColumn("release", "text", (c) => c.notNull())
|
|
53
|
+
.execute();
|
|
54
|
+
}
|
|
55
|
+
/** Create the two probe indexes the reader relies on (postcode-scope, street-scope). */
|
|
56
|
+
export async function createStreetSegmentIndexes(db) {
|
|
57
|
+
await db.schema
|
|
58
|
+
.createIndex("idx_seg_postcode")
|
|
59
|
+
.on("street_segment")
|
|
60
|
+
.columns(["postcode", "street_norm", "min_hn"])
|
|
61
|
+
.execute();
|
|
62
|
+
await db.schema.createIndex("idx_seg_street").on("street_segment").columns(["street_norm", "min_hn"]).execute();
|
|
63
|
+
}
|
|
64
|
+
//# sourceMappingURL=street-segment-schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"street-segment-schema.js","sourceRoot":"","sources":["../street-segment-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAwCH;;;GAGG;AACH,MAAM,CAAC,MAAM,sBAAsB,GAAG;IACrC,aAAa;IACb,MAAM;IACN,SAAS;IACT,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,aAAa;IACb,YAAY;IACZ,UAAU;IACV,QAAQ;IACR,SAAS;CACA,CAAA;AAEV,iFAAiF;AACjF,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,EAAiC;IAC/E,MAAM,EAAE,CAAC,MAAM;SACb,WAAW,CAAC,gBAAgB,CAAC;SAC7B,SAAS,CAAC,aAAa,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACpD,SAAS,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC7C,SAAS,CAAC,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACnD,SAAS,CAAC,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACjD,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAClD,SAAS,CAAC,QAAQ,EAAE,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAClD,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC/C,SAAS,CAAC,UAAU,EAAE,MAAM,CAAC;SAC7B,SAAS,CAAC,aAAa,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACpD,SAAS,CAAC,YAAY,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACnD,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SACjD,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAC/C,SAAS,CAAC,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;SAChD,OAAO,EAAE,CAAA;AACZ,CAAC;AAED,wFAAwF;AACxF,MAAM,CAAC,KAAK,UAAU,0BAA0B,CAAC,EAAiC;IACjF,MAAM,EAAE,CAAC,MAAM;SACb,WAAW,CAAC,kBAAkB,CAAC;SAC/B,EAAE,CAAC,gBAAgB,CAAC;SACpB,OAAO,CAAC,CAAC,UAAU,EAAE,aAAa,EAAE,QAAQ,CAAC,CAAC;SAC9C,OAAO,EAAE,CAAA;IACX,MAAM,EAAE,CAAC,MAAM,CAAC,WAAW,CAAC,gBAAgB,CAAC,CAAC,EAAE,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC,CAAC,OAAO,EAAE,CAAA;AAChH,CAAC"}
|
package/out/types.d.ts
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Public surface for the WOF SQLite resolver — types only, no runtime.
|
|
7
|
+
*
|
|
8
|
+
* These mirror the conceptual model described in `docs/plan/phases/PHASE_4_2_wof_sqlite.md`. Phase
|
|
9
|
+
* 4.3 will extend `PlaceCandidate` with the resolver-decorated fields that flow into
|
|
10
|
+
* `AddressNode.source` / `sourceId` (e.g. an explicit `wofUri: "wof-admin:101751113"` form).
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* The placetype taxonomy used by Who's On First. Ordered roughly from coarsest (country) to finest
|
|
14
|
+
* (address). See https://github.com/whosonfirst/whosonfirst-placetypes for the authoritative
|
|
15
|
+
* definitions of each.
|
|
16
|
+
*
|
|
17
|
+
* Phase 4.2 only emits the ones we actually look up; the union is open enough to extend later.
|
|
18
|
+
*/
|
|
19
|
+
export type WofPlacetype = "country" | "macroregion" | "region" | "macrocounty" | "county" | "localadmin" | "locality" | "borough" | "neighbourhood" | "microhood" | "postalcode" | "venue" | "campus" | "address";
|
|
20
|
+
/**
|
|
21
|
+
* One candidate match for a place lookup.
|
|
22
|
+
*
|
|
23
|
+
* `score` is the post-boost ranking number — higher is better, but the scale is implementation-
|
|
24
|
+
* defined. Callers should treat it as ordinal, not absolute.
|
|
25
|
+
*
|
|
26
|
+
* `id` is the WOF place id. It's named generically (not `wof_id`) so the shape stays structurally
|
|
27
|
+
* compatible with `@mailwoman/resolver`'s `ResolvedPlace` — `WofSqlitePlaceLookup` satisfies the
|
|
28
|
+
* generic `ResolverBackend` contract without an adapter shim.
|
|
29
|
+
*
|
|
30
|
+
* `distanceKm` is populated only when the query carried `near` (and the place has a centroid).
|
|
31
|
+
* Useful for downstream UIs that want to show "X km from you" alongside the result.
|
|
32
|
+
*/
|
|
33
|
+
export interface PlaceCandidate {
|
|
34
|
+
id: number;
|
|
35
|
+
name: string;
|
|
36
|
+
placetype: WofPlacetype;
|
|
37
|
+
/** ISO 3166-1 alpha-2 country code. */
|
|
38
|
+
country: string;
|
|
39
|
+
lat: number;
|
|
40
|
+
lon: number;
|
|
41
|
+
parent_id?: number;
|
|
42
|
+
score: number;
|
|
43
|
+
distanceKm?: number;
|
|
44
|
+
/**
|
|
45
|
+
* True when this candidate's name OR an alias EXACTLY equals the query (the exact-match tier from
|
|
46
|
+
* {@link RankingWeights.exactMatchTiering}). Surfaced so a downstream country re-rank (#369's
|
|
47
|
+
* postcode anchor in `resolveTree`) can pin the country without crossing the tier — see the
|
|
48
|
+
* `exactMatch` field on `@mailwoman/core`'s `ResolvedPlace`.
|
|
49
|
+
*/
|
|
50
|
+
exactMatch?: boolean;
|
|
51
|
+
/**
|
|
52
|
+
* Population from WOF's `wof:population` property. Only present when the candidate has it on
|
|
53
|
+
* record — WOF carries population for ~15% of localities (mostly larger ones). Absent does NOT
|
|
54
|
+
* mean zero, just unknown.
|
|
55
|
+
*/
|
|
56
|
+
population?: number;
|
|
57
|
+
/**
|
|
58
|
+
* Bounding box from WOF's `spr.{min,max}_{latitude,longitude}` columns. Coarse outline for the
|
|
59
|
+
* place — a city's bbox is the city's full extent, a postcode's is roughly the postcode polygon's
|
|
60
|
+
* envelope. Optional because not all callers ask for it; implementations are free to omit when
|
|
61
|
+
* the underlying schema lacks the columns.
|
|
62
|
+
*/
|
|
63
|
+
bbox?: GeoBbox;
|
|
64
|
+
/**
|
|
65
|
+
* Set by the coordinate-first path when the chosen locality and the sibling postcode's containing
|
|
66
|
+
* locality are geographically far apart — the postcode and the parsed city name disagree (a
|
|
67
|
+
* transposed / wrong-for-the-city postcode). The candidate is still returned (the name wins for
|
|
68
|
+
* the locality), but the flag lets callers lower confidence / surface the conflict rather than
|
|
69
|
+
* silently mislocate. A retrieval/BM25 geocoder can't raise this — it's the falsehood-detection
|
|
70
|
+
* differentiator.
|
|
71
|
+
*/
|
|
72
|
+
mismatch?: boolean;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* A WGS-84 lat/lon point. Used as a proximity hint for `FindPlaceQuery.near`.
|
|
76
|
+
*/
|
|
77
|
+
export interface GeoPoint {
|
|
78
|
+
lat: number;
|
|
79
|
+
lon: number;
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* A WGS-84 bounding box. Used as a hard filter via `FindPlaceQuery.bbox`.
|
|
83
|
+
*/
|
|
84
|
+
export interface GeoBbox {
|
|
85
|
+
minLat: number;
|
|
86
|
+
maxLat: number;
|
|
87
|
+
minLon: number;
|
|
88
|
+
maxLon: number;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Query against the resolver.
|
|
92
|
+
*
|
|
93
|
+
* `text` is the only required field; everything else narrows the search. When `country` and
|
|
94
|
+
* `parentId` are both set, `parentId` wins (it's more specific).
|
|
95
|
+
*
|
|
96
|
+
* `near` and `bbox` are independent. `near` is a soft signal — candidates close to the point get a
|
|
97
|
+
* ranking boost but distant candidates aren't dropped. `bbox` is a hard filter — only candidates
|
|
98
|
+
* whose bbox intersects the query bbox are returned (uses the package-built R*Tree index when
|
|
99
|
+
* present; if the index is missing the option is silently ignored to preserve backwards
|
|
100
|
+
* compatibility).
|
|
101
|
+
*
|
|
102
|
+
* `near` may carry `maxDistanceKm` to escalate from a boost to a hard filter — candidates further
|
|
103
|
+
* than that distance from the point are dropped at the SQL level via an R*Tree pre-filter.
|
|
104
|
+
*/
|
|
105
|
+
export interface FindPlaceQuery {
|
|
106
|
+
text: string;
|
|
107
|
+
placetype?: WofPlacetype | WofPlacetype[];
|
|
108
|
+
/** ISO 3166-1 alpha-2 — narrows to one country. */
|
|
109
|
+
country?: string;
|
|
110
|
+
/** WOF place id — narrows to descendants of this place. */
|
|
111
|
+
parentId?: number;
|
|
112
|
+
/**
|
|
113
|
+
* Sibling postcode. When set on a `locality` query AND a `postcode_locality` table is present,
|
|
114
|
+
* triggers the coordinate-first soft-score path: postcode→candidate localities are injected and
|
|
115
|
+
* scored `0.6·S_pc + 0.3·S_name + 0.1·S_pop` against the FTS name-match set, recovering small
|
|
116
|
+
* localities the name-match alone misses. Ignored when no postcode_locality shard is present.
|
|
117
|
+
*/
|
|
118
|
+
postcode?: string;
|
|
119
|
+
/** Proximity hint — candidates close to this point get a ranking boost. */
|
|
120
|
+
near?: GeoPoint & {
|
|
121
|
+
maxDistanceKm?: number;
|
|
122
|
+
};
|
|
123
|
+
/** Bounding-box filter — only candidates whose bbox intersects this box are returned. */
|
|
124
|
+
bbox?: GeoBbox;
|
|
125
|
+
/** Default 10. */
|
|
126
|
+
limit?: number;
|
|
127
|
+
}
|
|
128
|
+
/**
|
|
129
|
+
* The pull-based lookup surface. Implementations resolve a `FindPlaceQuery` to a ranked list of
|
|
130
|
+
* `PlaceCandidate`s. The interface is async even though `node:sqlite` is sync — leaves room for
|
|
131
|
+
* `Worker`-backed implementations later without a public API break.
|
|
132
|
+
*/
|
|
133
|
+
export interface PlaceLookup {
|
|
134
|
+
findPlace(query: FindPlaceQuery): Promise<PlaceCandidate[]>;
|
|
135
|
+
close(): void;
|
|
136
|
+
}
|
|
137
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH;;;;;;GAMG;AACH,MAAM,MAAM,YAAY,GACrB,SAAS,GACT,aAAa,GACb,QAAQ,GACR,aAAa,GACb,QAAQ,GACR,YAAY,GACZ,UAAU,GACV,SAAS,GACT,eAAe,GACf,WAAW,GACX,YAAY,GACZ,OAAO,GACP,QAAQ,GACR,SAAS,CAAA;AAEZ;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,cAAc;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,EAAE,YAAY,CAAA;IACvB,uCAAuC;IACvC,OAAO,EAAE,MAAM,CAAA;IACf,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;IACX,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,MAAM,CAAA;IACb,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB;;;;;OAKG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;OAIG;IACH,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,OAAO,CAAA;IACd;;;;;;;OAOG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAA;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACxB,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;CACX;AAED;;GAEG;AACH,MAAM,WAAW,OAAO;IACvB,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;IACd,MAAM,EAAE,MAAM,CAAA;CACd;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,WAAW,cAAc;IAC9B,IAAI,EAAE,MAAM,CAAA;IACZ,SAAS,CAAC,EAAE,YAAY,GAAG,YAAY,EAAE,CAAA;IACzC,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,2DAA2D;IAC3D,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,2EAA2E;IAC3E,IAAI,CAAC,EAAE,QAAQ,GAAG;QAAE,aAAa,CAAC,EAAE,MAAM,CAAA;KAAE,CAAA;IAC5C,yFAAyF;IACzF,IAAI,CAAC,EAAE,OAAO,CAAA;IACd,kBAAkB;IAClB,KAAK,CAAC,EAAE,MAAM,CAAA;CACd;AAED;;;;GAIG;AACH,MAAM,WAAW,WAAW;IAC3B,SAAS,CAAC,KAAK,EAAE,cAAc,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAA;IAC3D,KAAK,IAAI,IAAI,CAAA;CACb"}
|
package/out/types.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Public surface for the WOF SQLite resolver — types only, no runtime.
|
|
7
|
+
*
|
|
8
|
+
* These mirror the conceptual model described in `docs/plan/phases/PHASE_4_2_wof_sqlite.md`. Phase
|
|
9
|
+
* 4.3 will extend `PlaceCandidate` with the resolver-decorated fields that flow into
|
|
10
|
+
* `AddressNode.source` / `sourceId` (e.g. an explicit `wofUri: "wof-admin:101751113"` form).
|
|
11
|
+
*/
|
|
12
|
+
export {};
|
|
13
|
+
//# sourceMappingURL=types.js.map
|
package/out/types.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Schema for the unified WOF SQLite database we build from cloned WOF GeoJSON repos
|
|
7
|
+
* (`scripts/build-unified-wof.ts`). This is the CANONICAL gazetteer — we never use the
|
|
8
|
+
* off-the-shelf geocode.earth prebuilt dumps (they assign different WOF ids to the same place;
|
|
9
|
+
* see the `feedback-custom-wof-db-only` memory). The table/column names match the resolver's
|
|
10
|
+
* expectations (`lookup.ts`) so `WofSqlitePlaceLookup` works unchanged, INCLUDING the `ancestors`
|
|
11
|
+
* table (which lookup.ts's parent-constraint subquery needs) — see `populateAncestors`. The
|
|
12
|
+
* `place_search` FTS5 + `place_bbox` R*Tree are built separately by `build-fts` (fts.ts).
|
|
13
|
+
*/
|
|
14
|
+
import { DatabaseSync } from "node:sqlite";
|
|
15
|
+
export declare function createUnifiedSchema(db: DatabaseSync): Promise<void>;
|
|
16
|
+
/**
|
|
17
|
+
* Populate the `ancestors` table by walking each place's `parent_id` chain in `spr` (transitive
|
|
18
|
+
* closure, including the place itself). Idempotent: drops + rebuilds the table contents. Returns
|
|
19
|
+
* the row count. Run after `spr` is fully ingested (build-unified-wof freeze phase) or standalone
|
|
20
|
+
* on an existing unified DB (`scripts/add-ancestors.ts`). Sentinel/negative parent_ids and cycles
|
|
21
|
+
* terminate the walk. ~4 rows/place average; a transaction keeps the ~5M inserts fast.
|
|
22
|
+
*/
|
|
23
|
+
export declare function populateAncestors(db: DatabaseSync): number;
|
|
24
|
+
export declare function createUnifiedIndexes(db: DatabaseSync): Promise<void>;
|
|
25
|
+
//# sourceMappingURL=unified-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unified-schema.d.ts","sourceRoot":"","sources":["../unified-schema.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAG1C,wBAAsB,mBAAmB,CAAC,EAAE,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAyEzE;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAAE,EAAE,YAAY,GAAG,MAAM,CA6B1D;AAED,wBAAsB,oBAAoB,CAAC,EAAE,EAAE,YAAY,GAAG,OAAO,CAAC,IAAI,CAAC,CAuB1E"}
|