@mailwoman/resolver-wof-sqlite 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +250 -0
- package/out/address-point-interpolation.d.ts +48 -0
- package/out/address-point-interpolation.d.ts.map +1 -0
- package/out/address-point-interpolation.js +164 -0
- package/out/address-point-interpolation.js.map +1 -0
- package/out/address-point-schema.d.ts +58 -0
- package/out/address-point-schema.d.ts.map +1 -0
- package/out/address-point-schema.js +67 -0
- package/out/address-point-schema.js.map +1 -0
- package/out/address-point.d.ts +29 -0
- package/out/address-point.d.ts.map +1 -0
- package/out/address-point.js +62 -0
- package/out/address-point.js.map +1 -0
- package/out/ancestry.d.ts +40 -0
- package/out/ancestry.d.ts.map +1 -0
- package/out/ancestry.js +53 -0
- package/out/ancestry.js.map +1 -0
- package/out/build-candidate-cli.d.ts +16 -0
- package/out/build-candidate-cli.d.ts.map +1 -0
- package/out/build-candidate-cli.js +80 -0
- package/out/build-candidate-cli.js.map +1 -0
- package/out/build-candidate.d.ts +54 -0
- package/out/build-candidate.d.ts.map +1 -0
- package/out/build-candidate.js +230 -0
- package/out/build-candidate.js.map +1 -0
- package/out/build-coincident-roles-cli.d.ts +16 -0
- package/out/build-coincident-roles-cli.d.ts.map +1 -0
- package/out/build-coincident-roles-cli.js +94 -0
- package/out/build-coincident-roles-cli.js.map +1 -0
- package/out/build-fts-cli.d.ts +23 -0
- package/out/build-fts-cli.d.ts.map +1 -0
- package/out/build-fts-cli.js +117 -0
- package/out/build-fts-cli.js.map +1 -0
- package/out/build-slim-cli.d.ts +14 -0
- package/out/build-slim-cli.d.ts.map +1 -0
- package/out/build-slim-cli.js +130 -0
- package/out/build-slim-cli.js.map +1 -0
- package/out/build-slim.d.ts +71 -0
- package/out/build-slim.d.ts.map +1 -0
- package/out/build-slim.js +267 -0
- package/out/build-slim.js.map +1 -0
- package/out/candidate-lookup.d.ts +43 -0
- package/out/candidate-lookup.d.ts.map +1 -0
- package/out/candidate-lookup.js +191 -0
- package/out/candidate-lookup.js.map +1 -0
- package/out/candidate-schema.d.ts +86 -0
- package/out/candidate-schema.d.ts.map +1 -0
- package/out/candidate-schema.js +109 -0
- package/out/candidate-schema.js.map +1 -0
- package/out/coincident-roles.d.ts +86 -0
- package/out/coincident-roles.d.ts.map +1 -0
- package/out/coincident-roles.js +160 -0
- package/out/coincident-roles.js.map +1 -0
- package/out/convention.d.ts +109 -0
- package/out/convention.d.ts.map +1 -0
- package/out/convention.js +94 -0
- package/out/convention.js.map +1 -0
- package/out/fst-autocomplete.d.ts +49 -0
- package/out/fst-autocomplete.d.ts.map +1 -0
- package/out/fst-autocomplete.js +124 -0
- package/out/fst-autocomplete.js.map +1 -0
- package/out/fst-builder.d.ts +20 -0
- package/out/fst-builder.d.ts.map +1 -0
- package/out/fst-builder.js +219 -0
- package/out/fst-builder.js.map +1 -0
- package/out/fst-deserialize-web.d.ts +16 -0
- package/out/fst-deserialize-web.d.ts.map +1 -0
- package/out/fst-deserialize-web.js +133 -0
- package/out/fst-deserialize-web.js.map +1 -0
- package/out/fst-matcher.d.ts +33 -0
- package/out/fst-matcher.d.ts.map +1 -0
- package/out/fst-matcher.js +117 -0
- package/out/fst-matcher.js.map +1 -0
- package/out/fst-serialize.d.ts +30 -0
- package/out/fst-serialize.d.ts.map +1 -0
- package/out/fst-serialize.js +261 -0
- package/out/fst-serialize.js.map +1 -0
- package/out/fst-types.d.ts +60 -0
- package/out/fst-types.d.ts.map +1 -0
- package/out/fst-types.js +11 -0
- package/out/fst-types.js.map +1 -0
- package/out/fts.d.ts +158 -0
- package/out/fts.d.ts.map +1 -0
- package/out/fts.js +261 -0
- package/out/fts.js.map +1 -0
- package/out/geo.d.ts +74 -0
- package/out/geo.d.ts.map +1 -0
- package/out/geo.js +88 -0
- package/out/geo.js.map +1 -0
- package/out/index.d.ts +27 -0
- package/out/index.d.ts.map +1 -0
- package/out/index.js +22 -0
- package/out/index.js.map +1 -0
- package/out/interpolation.d.ts +84 -0
- package/out/interpolation.d.ts.map +1 -0
- package/out/interpolation.js +150 -0
- package/out/interpolation.js.map +1 -0
- package/out/lookup.d.ts +156 -0
- package/out/lookup.d.ts.map +1 -0
- package/out/lookup.js +876 -0
- package/out/lookup.js.map +1 -0
- package/out/postal-city-alias-lookup.d.ts +50 -0
- package/out/postal-city-alias-lookup.d.ts.map +1 -0
- package/out/postal-city-alias-lookup.js +66 -0
- package/out/postal-city-alias-lookup.js.map +1 -0
- package/out/postal-city-alias-schema.d.ts +51 -0
- package/out/postal-city-alias-schema.d.ts.map +1 -0
- package/out/postal-city-alias-schema.js +47 -0
- package/out/postal-city-alias-schema.js.map +1 -0
- package/out/postal-city-candidate-schema.d.ts +58 -0
- package/out/postal-city-candidate-schema.d.ts.map +1 -0
- package/out/postal-city-candidate-schema.js +56 -0
- package/out/postal-city-candidate-schema.js.map +1 -0
- package/out/postcode-point-lookup.d.ts +38 -0
- package/out/postcode-point-lookup.d.ts.map +1 -0
- package/out/postcode-point-lookup.js +46 -0
- package/out/postcode-point-lookup.js.map +1 -0
- package/out/reverse.d.ts +99 -0
- package/out/reverse.d.ts.map +1 -0
- package/out/reverse.js +290 -0
- package/out/reverse.js.map +1 -0
- package/out/schema.d.ts +163 -0
- package/out/schema.d.ts.map +1 -0
- package/out/schema.js +18 -0
- package/out/schema.js.map +1 -0
- package/out/sharding.d.ts +96 -0
- package/out/sharding.d.ts.map +1 -0
- package/out/sharding.js +129 -0
- package/out/sharding.js.map +1 -0
- package/out/sqlite-convention-source.d.ts +29 -0
- package/out/sqlite-convention-source.d.ts.map +1 -0
- package/out/sqlite-convention-source.js +53 -0
- package/out/sqlite-convention-source.js.map +1 -0
- package/out/sqlite-utils.d.ts +17 -0
- package/out/sqlite-utils.d.ts.map +1 -0
- package/out/sqlite-utils.js +24 -0
- package/out/sqlite-utils.js.map +1 -0
- package/out/street-morphology-fst-builder.d.ts +59 -0
- package/out/street-morphology-fst-builder.d.ts.map +1 -0
- package/out/street-morphology-fst-builder.js +174 -0
- package/out/street-morphology-fst-builder.js.map +1 -0
- package/out/street-normalize.d.ts +66 -0
- package/out/street-normalize.d.ts.map +1 -0
- package/out/street-normalize.js +176 -0
- package/out/street-normalize.js.map +1 -0
- package/out/street-segment-schema.d.ts +61 -0
- package/out/street-segment-schema.d.ts.map +1 -0
- package/out/street-segment-schema.js +64 -0
- package/out/street-segment-schema.js.map +1 -0
- package/out/types.d.ts +137 -0
- package/out/types.d.ts.map +1 -0
- package/out/types.js +13 -0
- package/out/types.js.map +1 -0
- package/out/unified-schema.d.ts +25 -0
- package/out/unified-schema.d.ts.map +1 -0
- package/out/unified-schema.js +142 -0
- package/out/unified-schema.js.map +1 -0
- package/package.json +54 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* FST-based autocomplete. Prefix walk + BFS expansion to collect ranked place suggestions. O(depth
|
|
7
|
+
* × branching) — the FST IS the autocomplete index.
|
|
8
|
+
*
|
|
9
|
+
* Two query shapes are handled (the FST is a trie over normalized WORD tokens):
|
|
10
|
+
*
|
|
11
|
+
* - COMPLETE tokens ("new york") — `walk` lands on a state; collect its accepting entries + BFS a
|
|
12
|
+
* couple tokens past it for nearby completions. This is the CLI's "complete a place word"
|
|
13
|
+
* path.
|
|
14
|
+
* - A PARTIAL last token ("new yor", "chic") — `walk` fails (there is no "yor" edge, only "york"). So
|
|
15
|
+
* walk the complete prefix, then complete the partial token by prefix-filtering the
|
|
16
|
+
* continuation edges (`token.startsWith(partial)`). This is what a char-level typeahead
|
|
17
|
+
* needs; without it "new yor" returns nothing useful. (#587)
|
|
18
|
+
*/
|
|
19
|
+
import { FstMatcher, normalizeTokens } from "./fst-matcher.js";
|
|
20
|
+
/** Max accepting entries collected per BFS branch — keeps one dense branch from starving the search. */
|
|
21
|
+
const PER_BRANCH = 4;
|
|
22
|
+
/**
|
|
23
|
+
* The top-`k` entries by importance (descending). Avoids sorting/allocating when `entries` is
|
|
24
|
+
* small.
|
|
25
|
+
*/
|
|
26
|
+
function topByImportance(entries, k) {
|
|
27
|
+
if (entries.length <= k)
|
|
28
|
+
return [...entries];
|
|
29
|
+
return [...entries].sort((a, b) => b.importance - a.importance).slice(0, k);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Autocomplete from the current prefix. Returns suggestions ranked importance-descending.
|
|
33
|
+
*/
|
|
34
|
+
export function autocomplete(fst, query, opts = {}) {
|
|
35
|
+
const maxSuggestions = opts.maxSuggestions ?? 10;
|
|
36
|
+
const maxExpansionDepth = opts.maxExpansionDepth ?? 2;
|
|
37
|
+
const normalizedTokens = normalizeTokens(query);
|
|
38
|
+
if (normalizedTokens.length === 0) {
|
|
39
|
+
return { query, normalizedTokens: [], depth: 0, suggestions: [] };
|
|
40
|
+
}
|
|
41
|
+
const seen = new Map();
|
|
42
|
+
const queue = [];
|
|
43
|
+
let depth = 0;
|
|
44
|
+
const match = fst.walk(normalizedTokens);
|
|
45
|
+
if (match) {
|
|
46
|
+
// COMPLETE-token prefix landed on a state. Seed at the match state (accepting + continuations).
|
|
47
|
+
depth = match.depth;
|
|
48
|
+
for (const entry of fst.accepting(match.stateId))
|
|
49
|
+
addSuggestion(seen, entry, match.depth, []);
|
|
50
|
+
for (const cont of fst.continuations(match.stateId)) {
|
|
51
|
+
queue.push({ stateId: cont.targetState, depth: 1, tokens: [cont.token] });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
else {
|
|
55
|
+
// PARTIAL last token — walk the complete prefix, complete the partial by prefix-filtering edges.
|
|
56
|
+
const complete = normalizedTokens.slice(0, -1);
|
|
57
|
+
const partial = normalizedTokens[normalizedTokens.length - 1];
|
|
58
|
+
const prefixState = complete.length === 0 ? 0 : (fst.walk(complete)?.stateId ?? undefined);
|
|
59
|
+
if (prefixState === undefined) {
|
|
60
|
+
return { query, normalizedTokens, depth: 0, suggestions: [] };
|
|
61
|
+
}
|
|
62
|
+
depth = complete.length;
|
|
63
|
+
for (const cont of fst.continuations(prefixState)) {
|
|
64
|
+
if (!cont.token.startsWith(partial))
|
|
65
|
+
continue;
|
|
66
|
+
// This edge completes the typed partial token — its target is a real match at depth+1.
|
|
67
|
+
for (const entry of topByImportance(fst.accepting(cont.targetState), PER_BRANCH))
|
|
68
|
+
addSuggestion(seen, entry, complete.length + 1, [cont.token]);
|
|
69
|
+
// BFS a little past it too (multi-token completions: "new yor" → "New York Mills").
|
|
70
|
+
queue.push({ stateId: cont.targetState, depth: 1, tokens: [cont.token] });
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
// BFS expansion (shared by both paths) — find nearby completions up to maxExpansionDepth. Each
|
|
74
|
+
// branch contributes only its top PER_BRANCH places: a state like "new london" has dozens of
|
|
75
|
+
// accepting entries and would otherwise blow the budget before the BFS ever reaches "new york"
|
|
76
|
+
// (the "new" state has 311 continuations). Per-branch capping keeps the search broad. (#587)
|
|
77
|
+
while (queue.length > 0 && seen.size < maxSuggestions * 4) {
|
|
78
|
+
const item = queue.shift();
|
|
79
|
+
if (item.depth > maxExpansionDepth)
|
|
80
|
+
continue;
|
|
81
|
+
for (const entry of topByImportance(fst.accepting(item.stateId), PER_BRANCH))
|
|
82
|
+
addSuggestion(seen, entry, depth + item.depth, item.tokens);
|
|
83
|
+
if (item.depth < maxExpansionDepth) {
|
|
84
|
+
for (const cont of fst.continuations(item.stateId)) {
|
|
85
|
+
queue.push({ stateId: cont.targetState, depth: item.depth + 1, tokens: [...item.tokens, cont.token] });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
let suggestions = [...seen.values()].sort((a, b) => b.importance - a.importance);
|
|
90
|
+
if (opts.dedupeByName)
|
|
91
|
+
suggestions = dedupeByName(suggestions);
|
|
92
|
+
return { query, normalizedTokens, depth, suggestions: suggestions.slice(0, maxSuggestions) };
|
|
93
|
+
}
|
|
94
|
+
function addSuggestion(seen, entry, matchDepth, completionTokens) {
|
|
95
|
+
const existing = seen.get(entry.wofID);
|
|
96
|
+
if (existing && existing.matchDepth <= matchDepth)
|
|
97
|
+
return;
|
|
98
|
+
seen.set(entry.wofID, {
|
|
99
|
+
name: entry.name,
|
|
100
|
+
placetype: entry.placetype,
|
|
101
|
+
importance: entry.importance,
|
|
102
|
+
wofID: entry.wofID,
|
|
103
|
+
parentChain: entry.parentChain,
|
|
104
|
+
matchDepth,
|
|
105
|
+
completionTokens: [...completionTokens],
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Keep one suggestion per name — the highest-importance. Input is already importance-sorted, so the
|
|
110
|
+
* first occurrence per name wins; order is preserved.
|
|
111
|
+
*/
|
|
112
|
+
function dedupeByName(suggestions) {
|
|
113
|
+
const seenNames = new Set();
|
|
114
|
+
const out = [];
|
|
115
|
+
for (const s of suggestions) {
|
|
116
|
+
const key = s.name.toLowerCase();
|
|
117
|
+
if (seenNames.has(key))
|
|
118
|
+
continue;
|
|
119
|
+
seenNames.add(key);
|
|
120
|
+
out.push(s);
|
|
121
|
+
}
|
|
122
|
+
return out;
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=fst-autocomplete.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-autocomplete.js","sourceRoot":"","sources":["../fst-autocomplete.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAqC9D,wGAAwG;AACxG,MAAM,UAAU,GAAG,CAAC,CAAA;AAEpB;;;GAGG;AACH,SAAS,eAAe,CAAC,OAA8B,EAAE,CAAS;IACjE,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;QAAE,OAAO,CAAC,GAAG,OAAO,CAAC,CAAA;IAC5C,OAAO,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAA;AAC5E,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,GAAe,EAAE,KAAa,EAAE,OAAyB,EAAE;IACvF,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,EAAE,CAAA;IAChD,MAAM,iBAAiB,GAAG,IAAI,CAAC,iBAAiB,IAAI,CAAC,CAAA;IACrD,MAAM,gBAAgB,GAAG,eAAe,CAAC,KAAK,CAAC,CAAA;IAE/C,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACnC,OAAO,EAAE,KAAK,EAAE,gBAAgB,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,CAAA;IAClE,CAAC;IAED,MAAM,IAAI,GAAG,IAAI,GAAG,EAAkC,CAAA;IACtD,MAAM,KAAK,GAAc,EAAE,CAAA;IAC3B,IAAI,KAAK,GAAG,CAAC,CAAA;IAEb,MAAM,KAAK,GAAG,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAA;IACxC,IAAI,KAAK,EAAE,CAAC;QACX,gGAAgG;QAChG,KAAK,GAAG,KAAK,CAAC,KAAK,CAAA;QACnB,KAAK,MAAM,KAAK,IAAI,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,OAAO,CAAC;YAAE,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;QAC7F,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;YACrD,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;QAC1E,CAAC;IACF,CAAC;SAAM,CAAC;QACP,iGAAiG;QACjG,MAAM,QAAQ,GAAG,gBAAgB,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;QAC9C,MAAM,OAAO,GAAG,gBAAgB,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAE,CAAA;QAC9D,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,OAAO,IAAI,SAAS,CAAC,CAAA;QAC1F,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YAC/B,OAAO,EAAE,KAAK,EAAE,gBAAgB,EAAE,KAAK,EAAE,CAAC,EAAE,WAAW,EAAE,EAAE,EAAE,CAAA;QAC9D,CAAC;QACD,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAA;QACvB,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,aAAa,CAAC,WAAW,CAAC,EAAE,CAAC;YACnD,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,SAAQ;YAC7C,uFAAuF;YACvF,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,UAAU,CAAC;gBAC/E,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAA;YAC9D,oFAAoF;YACpF,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;QAC1E,CAAC;IACF,CAAC;IAED,+FAA+F;IAC/F,6FAA6F;IAC7F,+FAA+F;IAC/F,6FAA6F;IAC7F,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,GAAG,cAAc,GAAG,CAAC,EAAE,CAAC;QAC3D,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAG,CAAA;QAC3B,IAAI,IAAI,CAAC,KAAK,GAAG,iBAAiB;YAAE,SAAQ;QAC5C,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,UAAU,CAAC;YAC3E,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;QAC5D,IAAI,IAAI,CAAC,KAAK,GAAG,iBAAiB,EAAE,CAAC;YACpC,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBACpD,KAAK,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,MAAM,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAA;YACvG,CAAC;QACF,CAAC;IACF,CAAC;IAED,IAAI,WAAW,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAA;IAChF,IAAI,IAAI,CAAC,YAAY;QAAE,WAAW,GAAG,YAAY,CAAC,WAAW,CAAC,CAAA;IAC9D,OAAO,EAAE,KAAK,EAAE,gBAAgB,EAAE,KAAK,EAAE,WAAW,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,EAAE,CAAA;AAC7F,CAAC;AAED,SAAS,aAAa,CACrB,IAAyC,EACzC,KAAiB,EACjB,UAAkB,EAClB,gBAA0B;IAE1B,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAA;IACtC,IAAI,QAAQ,IAAI,QAAQ,CAAC,UAAU,IAAI,UAAU;QAAE,OAAM;IACzD,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE;QACrB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,UAAU;QACV,gBAAgB,EAAE,CAAC,GAAG,gBAAgB,CAAC;KACvC,CAAC,CAAA;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,WAAqC;IAC1D,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAA;IACnC,MAAM,GAAG,GAA6B,EAAE,CAAA;IACxC,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAA;QAChC,IAAI,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC;YAAE,SAAQ;QAChC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QAClB,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACZ,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Build an FST (finite-state transducer) from a WOF SQLite database. The FST maps normalized token
|
|
7
|
+
* sequences to PlaceEntry arrays, pre-computing the valid interpretations for every prefix of
|
|
8
|
+
* every place name in the gazetteer.
|
|
9
|
+
*
|
|
10
|
+
* Build pipeline: open WOF DB → query spr + names → normalize names → insert into trie → attach
|
|
11
|
+
* PlaceEntry at terminals → return FstMatcher.
|
|
12
|
+
*/
|
|
13
|
+
import { FstMatcher } from "./fst-matcher.js";
|
|
14
|
+
import type { BuildFstOpts, BuildFstResult, FstProvenance } from "./fst-types.js";
|
|
15
|
+
export declare function buildFstFromWof(opts: BuildFstOpts): {
|
|
16
|
+
matcher: FstMatcher;
|
|
17
|
+
provenance: FstProvenance;
|
|
18
|
+
result: BuildFstResult;
|
|
19
|
+
};
|
|
20
|
+
//# sourceMappingURL=fst-builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-builder.d.ts","sourceRoot":"","sources":["../fst-builder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAIH,OAAO,EAAE,UAAU,EAAmB,MAAM,kBAAkB,CAAA;AAC9D,OAAO,KAAK,EAAE,YAAY,EAAE,cAAc,EAAE,aAAa,EAA2B,MAAM,gBAAgB,CAAA;AAmC1G,wBAAgB,eAAe,CAAC,IAAI,EAAE,YAAY,GAAG;IACpD,OAAO,EAAE,UAAU,CAAA;IACnB,UAAU,EAAE,aAAa,CAAA;IACzB,MAAM,EAAE,cAAc,CAAA;CACtB,CA6MA"}
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Build an FST (finite-state transducer) from a WOF SQLite database. The FST maps normalized token
|
|
7
|
+
* sequences to PlaceEntry arrays, pre-computing the valid interpretations for every prefix of
|
|
8
|
+
* every place name in the gazetteer.
|
|
9
|
+
*
|
|
10
|
+
* Build pipeline: open WOF DB → query spr + names → normalize names → insert into trie → attach
|
|
11
|
+
* PlaceEntry at terminals → return FstMatcher.
|
|
12
|
+
*/
|
|
13
|
+
import { DatabaseSync } from "node:sqlite";
|
|
14
|
+
import { FstMatcher, normalizeTokens } from "./fst-matcher.js";
|
|
15
|
+
const DEFAULT_PLACETYPES = [
|
|
16
|
+
"country",
|
|
17
|
+
"region",
|
|
18
|
+
"county",
|
|
19
|
+
"locality",
|
|
20
|
+
"localadmin",
|
|
21
|
+
"borough",
|
|
22
|
+
"neighbourhood",
|
|
23
|
+
];
|
|
24
|
+
const DEFAULT_COUNTRIES = ["US"];
|
|
25
|
+
const DEFAULT_LANGUAGES = ["eng", ""];
|
|
26
|
+
export function buildFstFromWof(opts) {
|
|
27
|
+
const countries = opts.countries ?? DEFAULT_COUNTRIES;
|
|
28
|
+
const placetypes = opts.placetypes ?? DEFAULT_PLACETYPES;
|
|
29
|
+
const languages = opts.languages ?? DEFAULT_LANGUAGES;
|
|
30
|
+
const progress = opts.onProgress ?? (() => { });
|
|
31
|
+
progress("open", opts.dbPath);
|
|
32
|
+
const db = new DatabaseSync(opts.dbPath, { open: true });
|
|
33
|
+
// Phase 1: Load all matching SPR rows.
|
|
34
|
+
progress("spr", `Loading places for countries=[${countries}], placetypes=[${placetypes}]`);
|
|
35
|
+
const placeholders = (arr) => arr.map(() => "?").join(",");
|
|
36
|
+
const sprStmt = db.prepare(`SELECT id, name, placetype, parent_id, latitude, longitude
|
|
37
|
+
FROM spr
|
|
38
|
+
WHERE is_current = 1
|
|
39
|
+
AND country IN (${placeholders(countries)})
|
|
40
|
+
AND placetype IN (${placeholders(placetypes)})`);
|
|
41
|
+
const sprRows = sprStmt.all(...countries, ...placetypes);
|
|
42
|
+
progress("spr", `Loaded ${sprRows.length} places`);
|
|
43
|
+
// Phase 2: Build a lookup for parent chain resolution.
|
|
44
|
+
const sprByID = new Map();
|
|
45
|
+
for (const row of sprRows)
|
|
46
|
+
sprByID.set(row.id, row);
|
|
47
|
+
// Also load parent rows that might be outside our placetype filter (e.g., country for region).
|
|
48
|
+
const parentStmt = db.prepare("SELECT id, name, placetype, parent_id, latitude, longitude FROM spr WHERE id = ?");
|
|
49
|
+
// Fallback: use ancestors table when parent_id is a sentinel (-1, -4, etc.).
|
|
50
|
+
let ancestorStmt = null;
|
|
51
|
+
try {
|
|
52
|
+
ancestorStmt = db.prepare(`SELECT DISTINCT ancestor_id FROM ancestors
|
|
53
|
+
WHERE id = ? AND ancestor_placetype IN ('country', 'region', 'county')
|
|
54
|
+
ORDER BY CASE ancestor_placetype
|
|
55
|
+
WHEN 'county' THEN 1
|
|
56
|
+
WHEN 'region' THEN 2
|
|
57
|
+
WHEN 'country' THEN 3
|
|
58
|
+
END`);
|
|
59
|
+
}
|
|
60
|
+
catch {
|
|
61
|
+
progress("ancestors", "No ancestors table — sentinel parent_ids will produce empty chains");
|
|
62
|
+
}
|
|
63
|
+
function resolveParentChain(id) {
|
|
64
|
+
const row = sprByID.get(id);
|
|
65
|
+
if (!row)
|
|
66
|
+
return [];
|
|
67
|
+
// If parent_id is a sentinel (≤ 0), use ancestors table.
|
|
68
|
+
if (row.parent_id <= 0 && ancestorStmt) {
|
|
69
|
+
const ancestors = ancestorStmt.all(id);
|
|
70
|
+
return ancestors.map((a) => a.ancestor_id).filter((aid) => aid !== id);
|
|
71
|
+
}
|
|
72
|
+
// Normal case: walk parent_id chain.
|
|
73
|
+
const chain = [];
|
|
74
|
+
let current = row.parent_id;
|
|
75
|
+
const seen = new Set([id]);
|
|
76
|
+
while (current > 0 && !seen.has(current)) {
|
|
77
|
+
seen.add(current);
|
|
78
|
+
chain.push(current);
|
|
79
|
+
let parentRow = sprByID.get(current);
|
|
80
|
+
if (!parentRow) {
|
|
81
|
+
const fetched = parentStmt.get(current);
|
|
82
|
+
if (!fetched)
|
|
83
|
+
break;
|
|
84
|
+
parentRow = fetched;
|
|
85
|
+
sprByID.set(current, parentRow);
|
|
86
|
+
}
|
|
87
|
+
if (parentRow.parent_id > 0 && parentRow.parent_id !== current) {
|
|
88
|
+
current = parentRow.parent_id;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return chain;
|
|
95
|
+
}
|
|
96
|
+
// Phase 3: Load importance data (Wikipedia-based, falls back to population-scaled).
|
|
97
|
+
// See docs/articles/concepts/importance-vs-population.md for the two-signal contract.
|
|
98
|
+
progress("importance", "Loading importance data");
|
|
99
|
+
const importanceMap = new Map();
|
|
100
|
+
try {
|
|
101
|
+
const impStmt = db.prepare("SELECT id, importance FROM place_importance");
|
|
102
|
+
const impRows = impStmt.all();
|
|
103
|
+
for (const row of impRows)
|
|
104
|
+
importanceMap.set(row.id, row.importance);
|
|
105
|
+
progress("importance", `Loaded ${importanceMap.size} importance scores`);
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
progress("importance", "No place_importance table — falling back to population");
|
|
109
|
+
try {
|
|
110
|
+
const popStmt = db.prepare("SELECT id, population FROM place_population");
|
|
111
|
+
const popRows = popStmt.all();
|
|
112
|
+
for (const row of popRows) {
|
|
113
|
+
const normalized = row.population > 0 ? Math.min(1.0, Math.log2(1 + row.population / 1000) / 14) : 0;
|
|
114
|
+
importanceMap.set(row.id, normalized);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
progress("importance", "No place_population either — using 0 for all");
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// Phase 4: Load names for matching places.
|
|
122
|
+
progress("names", "Loading name variants");
|
|
123
|
+
const placeIds = sprRows.map((r) => r.id);
|
|
124
|
+
const namesByPlace = new Map();
|
|
125
|
+
const allLanguages = languages.includes("*");
|
|
126
|
+
for (let i = 0; i < placeIds.length; i += 500) {
|
|
127
|
+
const chunk = placeIds.slice(i, i + 500);
|
|
128
|
+
const idPlaceholders = chunk.map(() => "?").join(",");
|
|
129
|
+
const nameStmt = allLanguages
|
|
130
|
+
? db.prepare(`SELECT id, name, language, privateuse FROM names WHERE id IN (${idPlaceholders})`)
|
|
131
|
+
: db.prepare(`SELECT id, name, language, privateuse FROM names WHERE id IN (${idPlaceholders}) AND language IN (${languages.map(() => "?").join(",")})`);
|
|
132
|
+
const nameRows = (allLanguages
|
|
133
|
+
? nameStmt.all(...chunk)
|
|
134
|
+
: nameStmt.all(...chunk, ...languages));
|
|
135
|
+
for (const row of nameRows) {
|
|
136
|
+
const existing = namesByPlace.get(row.id) ?? [];
|
|
137
|
+
if (!existing.includes(row.name))
|
|
138
|
+
existing.push(row.name);
|
|
139
|
+
namesByPlace.set(row.id, existing);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
progress("names", `Loaded names for ${namesByPlace.size} places`);
|
|
143
|
+
// Phase 5: Build the trie.
|
|
144
|
+
progress("trie", "Building trie");
|
|
145
|
+
const nodes = [{ edges: new Map(), places: [] }];
|
|
146
|
+
function insertName(tokens, entry) {
|
|
147
|
+
if (tokens.length === 0)
|
|
148
|
+
return;
|
|
149
|
+
let stateId = 0;
|
|
150
|
+
for (const t of tokens) {
|
|
151
|
+
const node = nodes[stateId];
|
|
152
|
+
let next = node.edges.get(t);
|
|
153
|
+
if (next === undefined) {
|
|
154
|
+
next = nodes.length;
|
|
155
|
+
nodes.push({ edges: new Map(), places: [] });
|
|
156
|
+
node.edges.set(t, next);
|
|
157
|
+
}
|
|
158
|
+
stateId = next;
|
|
159
|
+
}
|
|
160
|
+
// Deduplicate: don't add the same wofID twice at the same state.
|
|
161
|
+
const existing = nodes[stateId].places;
|
|
162
|
+
if (!existing.some((p) => p.wofID === entry.wofID && p.placetype === entry.placetype)) {
|
|
163
|
+
existing.push(entry);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
let insertCount = 0;
|
|
167
|
+
for (const row of sprRows) {
|
|
168
|
+
const parentChain = resolveParentChain(row.id);
|
|
169
|
+
const entry = {
|
|
170
|
+
wofID: row.id,
|
|
171
|
+
placetype: row.placetype,
|
|
172
|
+
name: row.name,
|
|
173
|
+
parentChain,
|
|
174
|
+
importance: importanceMap.get(row.id) ?? 0,
|
|
175
|
+
lat: row.latitude,
|
|
176
|
+
lon: row.longitude,
|
|
177
|
+
};
|
|
178
|
+
// Insert the primary name from spr.
|
|
179
|
+
const primaryTokens = normalizeTokens(row.name);
|
|
180
|
+
insertName(primaryTokens, entry);
|
|
181
|
+
insertCount++;
|
|
182
|
+
// Insert alt names from the names table.
|
|
183
|
+
const altNames = namesByPlace.get(row.id) ?? [];
|
|
184
|
+
for (const altName of altNames) {
|
|
185
|
+
if (altName === row.name)
|
|
186
|
+
continue;
|
|
187
|
+
const altTokens = normalizeTokens(altName);
|
|
188
|
+
if (altTokens.length > 0 && altTokens.join(" ") !== primaryTokens.join(" ")) {
|
|
189
|
+
insertName(altTokens, entry);
|
|
190
|
+
insertCount++;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
db.close();
|
|
195
|
+
progress("done", `Built trie: ${nodes.length} states, ${insertCount} name insertions`);
|
|
196
|
+
const edgeCount = nodes.reduce((sum, n) => sum + n.edges.size, 0);
|
|
197
|
+
const matcher = FstMatcher.fromNodes(nodes);
|
|
198
|
+
const provenance = {
|
|
199
|
+
builtAt: new Date().toISOString(),
|
|
200
|
+
countries,
|
|
201
|
+
stateCount: nodes.length,
|
|
202
|
+
placeCount: sprRows.length,
|
|
203
|
+
edgeCount,
|
|
204
|
+
nameInsertions: insertCount,
|
|
205
|
+
importanceMatches: importanceMap.size,
|
|
206
|
+
sourceDb: opts.dbPath,
|
|
207
|
+
};
|
|
208
|
+
return {
|
|
209
|
+
matcher,
|
|
210
|
+
provenance,
|
|
211
|
+
result: {
|
|
212
|
+
stateCount: nodes.length,
|
|
213
|
+
placeCount: sprRows.length,
|
|
214
|
+
edgeCount,
|
|
215
|
+
tokenCount: insertCount,
|
|
216
|
+
},
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
//# sourceMappingURL=fst-builder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-builder.js","sourceRoot":"","sources":["../fst-builder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAA;AAE1C,OAAO,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,kBAAkB,CAAA;AAG9D,MAAM,kBAAkB,GAAkB;IACzC,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,SAAS;IACT,eAAe;CACf,CAAA;AACD,MAAM,iBAAiB,GAAG,CAAC,IAAI,CAAC,CAAA;AAChC,MAAM,iBAAiB,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;AAuBrC,MAAM,UAAU,eAAe,CAAC,IAAkB;IAKjD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,iBAAiB,CAAA;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,kBAAkB,CAAA;IACxD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,iBAAiB,CAAA;IACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,IAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAA;IAE9C,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;IAC7B,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAA;IAExD,uCAAuC;IACvC,QAAQ,CAAC,KAAK,EAAE,iCAAiC,SAAS,kBAAkB,UAAU,GAAG,CAAC,CAAA;IAC1F,MAAM,YAAY,GAAG,CAAC,GAAa,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACpE,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CACzB;;;uBAGqB,YAAY,CAAC,SAAS,CAAC;yBACrB,YAAY,CAAC,UAAU,CAAC,GAAG,CAClD,CAAA;IACD,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,GAAG,SAAS,EAAE,GAAG,UAAU,CAAwB,CAAA;IAC/E,QAAQ,CAAC,KAAK,EAAE,UAAU,OAAO,CAAC,MAAM,SAAS,CAAC,CAAA;IAElD,uDAAuD;IACvD,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAA;IACzC,KAAK,MAAM,GAAG,IAAI,OAAO;QAAE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,CAAA;IAEnD,+FAA+F;IAC/F,MAAM,UAAU,GAAG,EAAE,CAAC,OAAO,CAAC,kFAAkF,CAAC,CAAA;IAEjH,6EAA6E;IAC7E,IAAI,YAAY,GAAyC,IAAI,CAAA;IAC7D,IAAI,CAAC;QACJ,YAAY,GAAG,EAAE,CAAC,OAAO,CACxB;;;;;;QAMK,CACL,CAAA;IACF,CAAC;IAAC,MAAM,CAAC;QACR,QAAQ,CAAC,WAAW,EAAE,oEAAoE,CAAC,CAAA;IAC5F,CAAC;IAED,SAAS,kBAAkB,CAAC,EAAU;QACrC,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;QAC3B,IAAI,CAAC,GAAG;YAAE,OAAO,EAAE,CAAA;QAEnB,yDAAyD;QACzD,IAAI,GAAG,CAAC,SAAS,IAAI,CAAC,IAAI,YAAY,EAAE,CAAC;YACxC,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,EAAE,CAA8C,CAAA;YACnF,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,CAAA;QACvE,CAAC;QAED,qCAAqC;QACrC,MAAM,KAAK,GAAa,EAAE,CAAA;QAC1B,IAAI,OAAO,GAAG,GAAG,CAAC,SAAS,CAAA;QAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAS,CAAC,EAAE,CAAC,CAAC,CAAA;QAClC,OAAO,OAAO,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1C,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;YACjB,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACnB,IAAI,SAAS,GAAG,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAA;YACpC,IAAI,CAAC,SAAS,EAAE,CAAC;gBAChB,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,CAAC,OAAO,CAAkC,CAAA;gBACxE,IAAI,CAAC,OAAO;oBAAE,MAAK;gBACnB,SAAS,GAAG,OAAO,CAAA;gBACnB,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,SAAS,CAAC,CAAA;YAChC,CAAC;YACD,IAAI,SAAS,CAAC,SAAS,GAAG,CAAC,IAAI,SAAS,CAAC,SAAS,KAAK,OAAO,EAAE,CAAC;gBAChE,OAAO,GAAG,SAAS,CAAC,SAAS,CAAA;YAC9B,CAAC;iBAAM,CAAC;gBACP,MAAK;YACN,CAAC;QACF,CAAC;QACD,OAAO,KAAK,CAAA;IACb,CAAC;IAED,oFAAoF;IACpF,sFAAsF;IACtF,QAAQ,CAAC,YAAY,EAAE,yBAAyB,CAAC,CAAA;IACjD,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC/C,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAA;QACzE,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,EAA0D,CAAA;QACrF,KAAK,MAAM,GAAG,IAAI,OAAO;YAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,UAAU,CAAC,CAAA;QACpE,QAAQ,CAAC,YAAY,EAAE,UAAU,aAAa,CAAC,IAAI,oBAAoB,CAAC,CAAA;IACzE,CAAC;IAAC,MAAM,CAAC;QACR,QAAQ,CAAC,YAAY,EAAE,wDAAwD,CAAC,CAAA;QAChF,IAAI,CAAC;YACJ,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,6CAA6C,CAAC,CAAA;YACzE,MAAM,OAAO,GAAG,OAAO,CAAC,GAAG,EAAgC,CAAA;YAC3D,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;gBAC3B,MAAM,UAAU,GAAG,GAAG,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,GAAG,CAAC,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;gBACpG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,UAAU,CAAC,CAAA;YACtC,CAAC;QACF,CAAC;QAAC,MAAM,CAAC;YACR,QAAQ,CAAC,YAAY,EAAE,8CAA8C,CAAC,CAAA;QACvE,CAAC;IACF,CAAC;IAED,2CAA2C;IAC3C,QAAQ,CAAC,OAAO,EAAE,uBAAuB,CAAC,CAAA;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAA;IACzC,MAAM,YAAY,GAAG,IAAI,GAAG,EAAoB,CAAA;IAEhD,MAAM,YAAY,GAAG,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,GAAG,EAAE,CAAC;QAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,CAAA;QACxC,MAAM,cAAc,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACrD,MAAM,QAAQ,GAAG,YAAY;YAC5B,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,iEAAiE,cAAc,GAAG,CAAC;YAChG,CAAC,CAAC,EAAE,CAAC,OAAO,CACV,iEAAiE,cAAc,sBAAsB,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAC1I,CAAA;QACH,MAAM,QAAQ,GAAG,CAAC,YAAY;YAC7B,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;YACxB,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,KAAK,EAAE,GAAG,SAAS,CAAC,CAAyB,CAAA;QAChE,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,CAAA;YAC/C,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YACzD,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,QAAQ,CAAC,CAAA;QACnC,CAAC;IACF,CAAC;IACD,QAAQ,CAAC,OAAO,EAAE,oBAAoB,YAAY,CAAC,IAAI,SAAS,CAAC,CAAA;IAEjE,2BAA2B;IAC3B,QAAQ,CAAC,MAAM,EAAE,eAAe,CAAC,CAAA;IACjC,MAAM,KAAK,GAAc,CAAC,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAA;IAE3D,SAAS,UAAU,CAAC,MAAgB,EAAE,KAAiB;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,IAAI,OAAO,GAAG,CAAC,CAAA;QACf,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;YACxB,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAE,CAAA;YAC5B,IAAI,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAC5B,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACxB,IAAI,GAAG,KAAK,CAAC,MAAM,CAAA;gBACnB,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC,CAAA;gBAC5C,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;YACxB,CAAC;YACD,OAAO,GAAG,IAAI,CAAA;QACf,CAAC;QACD,iEAAiE;QACjE,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAE,CAAC,MAAM,CAAA;QACvC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,KAAK,KAAK,CAAC,KAAK,IAAI,CAAC,CAAC,SAAS,KAAK,KAAK,CAAC,SAAS,CAAC,EAAE,CAAC;YACvF,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACrB,CAAC;IACF,CAAC;IAED,IAAI,WAAW,GAAG,CAAC,CAAA;IACnB,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;QAC3B,MAAM,WAAW,GAAG,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAA;QAC9C,MAAM,KAAK,GAAe;YACzB,KAAK,EAAE,GAAG,CAAC,EAAE;YACb,SAAS,EAAE,GAAG,CAAC,SAAwB;YACvC,IAAI,EAAE,GAAG,CAAC,IAAI;YACd,WAAW;YACX,UAAU,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC;YAC1C,GAAG,EAAE,GAAG,CAAC,QAAQ;YACjB,GAAG,EAAE,GAAG,CAAC,SAAS;SAClB,CAAA;QAED,oCAAoC;QACpC,MAAM,aAAa,GAAG,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC/C,UAAU,CAAC,aAAa,EAAE,KAAK,CAAC,CAAA;QAChC,WAAW,EAAE,CAAA;QAEb,yCAAyC;QACzC,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,EAAE,CAAA;QAC/C,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAChC,IAAI,OAAO,KAAK,GAAG,CAAC,IAAI;gBAAE,SAAQ;YAClC,MAAM,SAAS,GAAG,eAAe,CAAC,OAAO,CAAC,CAAA;YAC1C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,aAAa,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC7E,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,CAAA;gBAC5B,WAAW,EAAE,CAAA;YACd,CAAC;QACF,CAAC;IACF,CAAC;IAED,EAAE,CAAC,KAAK,EAAE,CAAA;IACV,QAAQ,CAAC,MAAM,EAAE,eAAe,KAAK,CAAC,MAAM,YAAY,WAAW,kBAAkB,CAAC,CAAA;IAEtF,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,CAAA;IACjE,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;IAC3C,MAAM,UAAU,GAAkB;QACjC,OAAO,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACjC,SAAS;QACT,UAAU,EAAE,KAAK,CAAC,MAAM;QACxB,UAAU,EAAE,OAAO,CAAC,MAAM;QAC1B,SAAS;QACT,cAAc,EAAE,WAAW;QAC3B,iBAAiB,EAAE,aAAa,CAAC,IAAI;QACrC,QAAQ,EAAE,IAAI,CAAC,MAAM;KACrB,CAAA;IACD,OAAO;QACN,OAAO;QACP,UAAU;QACV,MAAM,EAAE;YACP,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,UAAU,EAAE,OAAO,CAAC,MAAM;YAC1B,SAAS;YACT,UAAU,EAAE,WAAW;SACvB;KACD,CAAA;AACF,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Browser-compatible FST deserializer. Uses DataView + TextDecoder instead of Node's Buffer so the
|
|
7
|
+
* same binary format can be loaded in the browser via fetch(url).then(r => r.arrayBuffer()).
|
|
8
|
+
*
|
|
9
|
+
* This is a read-only counterpart to fst-serialize.ts — serialization stays Node-only (it's a
|
|
10
|
+
* build-time operation).
|
|
11
|
+
*/
|
|
12
|
+
import { FstMatcher } from "./fst-matcher.js";
|
|
13
|
+
import type { FstProvenance } from "./fst-types.js";
|
|
14
|
+
export declare function deserializeFstWeb(input: ArrayBuffer | Uint8Array): FstMatcher;
|
|
15
|
+
export declare function readFstProvenanceWeb(input: ArrayBuffer | Uint8Array): FstProvenance | undefined;
|
|
16
|
+
//# sourceMappingURL=fst-deserialize-web.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-deserialize-web.d.ts","sourceRoot":"","sources":["../fst-deserialize-web.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,KAAK,EAAE,aAAa,EAA2B,MAAM,gBAAgB,CAAA;AAsB5E,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,WAAW,GAAG,UAAU,GAAG,UAAU,CA+F7E;AAED,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,WAAW,GAAG,UAAU,GAAG,aAAa,GAAG,SAAS,CAiB/F"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Browser-compatible FST deserializer. Uses DataView + TextDecoder instead of Node's Buffer so the
|
|
7
|
+
* same binary format can be loaded in the browser via fetch(url).then(r => r.arrayBuffer()).
|
|
8
|
+
*
|
|
9
|
+
* This is a read-only counterpart to fst-serialize.ts — serialization stays Node-only (it's a
|
|
10
|
+
* build-time operation).
|
|
11
|
+
*/
|
|
12
|
+
import { FstMatcher } from "./fst-matcher.js";
|
|
13
|
+
const HEADER_SIZE = 32;
|
|
14
|
+
const EDGE_ENTRY_SIZE = 8;
|
|
15
|
+
const PLACE_ENTRY_SIZE = 56;
|
|
16
|
+
const MAGIC_BYTES = [0x46, 0x53, 0x54, 0x00]; // "FST\0"
|
|
17
|
+
const MAX_VERSION = 2;
|
|
18
|
+
const PLACETYPE_ORDER = [
|
|
19
|
+
"country",
|
|
20
|
+
"region",
|
|
21
|
+
"county",
|
|
22
|
+
"locality",
|
|
23
|
+
"localadmin",
|
|
24
|
+
"borough",
|
|
25
|
+
"neighbourhood",
|
|
26
|
+
"postalcode",
|
|
27
|
+
"campus",
|
|
28
|
+
"dependency",
|
|
29
|
+
"street_affix",
|
|
30
|
+
];
|
|
31
|
+
export function deserializeFstWeb(input) {
|
|
32
|
+
const bytes = input instanceof ArrayBuffer ? new Uint8Array(input) : input;
|
|
33
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
34
|
+
const decoder = new TextDecoder("utf-8");
|
|
35
|
+
if (bytes.byteLength < HEADER_SIZE)
|
|
36
|
+
throw new Error("FST buffer too small for header");
|
|
37
|
+
if (bytes[0] !== MAGIC_BYTES[0] ||
|
|
38
|
+
bytes[1] !== MAGIC_BYTES[1] ||
|
|
39
|
+
bytes[2] !== MAGIC_BYTES[2] ||
|
|
40
|
+
bytes[3] !== MAGIC_BYTES[3]) {
|
|
41
|
+
throw new Error("FST magic mismatch");
|
|
42
|
+
}
|
|
43
|
+
const version = view.getUint16(4, true);
|
|
44
|
+
if (version < 1 || version > MAX_VERSION) {
|
|
45
|
+
throw new Error(`FST version ${version} unsupported (expected 1..${MAX_VERSION})`);
|
|
46
|
+
}
|
|
47
|
+
const isV2 = version >= 2;
|
|
48
|
+
const stateCount = view.getUint32(8, true);
|
|
49
|
+
const edgeCount = view.getUint32(12, true);
|
|
50
|
+
const _placeCount = view.getUint32(16, true);
|
|
51
|
+
const stringCount = view.getUint32(20, true);
|
|
52
|
+
const stringBytes = view.getUint32(24, true);
|
|
53
|
+
let pos = HEADER_SIZE;
|
|
54
|
+
// --- String table ---
|
|
55
|
+
const strOffsets = new Uint32Array(stringCount + 1);
|
|
56
|
+
for (let i = 0; i <= stringCount; i++) {
|
|
57
|
+
strOffsets[i] = view.getUint32(pos, true);
|
|
58
|
+
pos += 4;
|
|
59
|
+
}
|
|
60
|
+
const strDataStart = pos;
|
|
61
|
+
const strings = new Array(stringCount);
|
|
62
|
+
for (let i = 0; i < stringCount; i++) {
|
|
63
|
+
const start = strDataStart + strOffsets[i];
|
|
64
|
+
const end = strDataStart + strOffsets[i + 1];
|
|
65
|
+
strings[i] = decoder.decode(bytes.subarray(start, end));
|
|
66
|
+
}
|
|
67
|
+
pos += stringBytes;
|
|
68
|
+
// --- State table ---
|
|
69
|
+
const stateEntrySize = version >= 4 ? 16 : 12;
|
|
70
|
+
const stateTableStart = pos;
|
|
71
|
+
const edgeTableStart = stateTableStart + stateCount * stateEntrySize;
|
|
72
|
+
const placeTableStart = edgeTableStart + edgeCount * EDGE_ENTRY_SIZE;
|
|
73
|
+
const nodes = new Array(stateCount);
|
|
74
|
+
for (let si = 0; si < stateCount; si++) {
|
|
75
|
+
const sp = stateTableStart + si * stateEntrySize;
|
|
76
|
+
const edgeStart = view.getUint32(sp, true);
|
|
77
|
+
const placeStart = view.getUint32(sp + 4, true);
|
|
78
|
+
const edgeCountForState = version >= 4 ? view.getUint32(sp + 8, true) : view.getUint16(sp + 8, true);
|
|
79
|
+
const placeCountForState = version >= 4 ? view.getUint32(sp + 12, true) : view.getUint16(sp + 10, true);
|
|
80
|
+
const edges = new Map();
|
|
81
|
+
for (let ei = 0; ei < edgeCountForState; ei++) {
|
|
82
|
+
const ep = edgeTableStart + (edgeStart + ei) * EDGE_ENTRY_SIZE;
|
|
83
|
+
const stringIdx = view.getUint32(ep, true);
|
|
84
|
+
const target = view.getUint32(ep + 4, true);
|
|
85
|
+
edges.set(strings[stringIdx], target);
|
|
86
|
+
}
|
|
87
|
+
const places = new Array(placeCountForState);
|
|
88
|
+
for (let pi = 0; pi < placeCountForState; pi++) {
|
|
89
|
+
const pp = placeTableStart + (placeStart + pi) * PLACE_ENTRY_SIZE;
|
|
90
|
+
const chainLen = view.getUint8(pp + 5);
|
|
91
|
+
const parentChain = [];
|
|
92
|
+
for (let ci = 0; ci < chainLen; ci++) {
|
|
93
|
+
parentChain.push(view.getUint32(pp + 24 + ci * 4, true));
|
|
94
|
+
}
|
|
95
|
+
const rawImportance = isV2
|
|
96
|
+
? view.getFloat32(pp + 12, true)
|
|
97
|
+
: Math.min(1.0, Math.log2(1 + view.getUint32(pp + 12, true) / 1000) / 14);
|
|
98
|
+
places[pi] = {
|
|
99
|
+
wofID: view.getUint32(pp, true),
|
|
100
|
+
placetype: PLACETYPE_ORDER[view.getUint8(pp + 4)] ?? "locality",
|
|
101
|
+
name: strings[view.getUint32(pp + 8, true)],
|
|
102
|
+
importance: rawImportance,
|
|
103
|
+
lat: view.getFloat32(pp + 16, true),
|
|
104
|
+
lon: view.getFloat32(pp + 20, true),
|
|
105
|
+
parentChain,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
nodes[si] = { edges, places };
|
|
109
|
+
}
|
|
110
|
+
return FstMatcher.fromNodes(nodes);
|
|
111
|
+
}
|
|
112
|
+
export function readFstProvenanceWeb(input) {
|
|
113
|
+
const bytes = input instanceof ArrayBuffer ? new Uint8Array(input) : input;
|
|
114
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
115
|
+
const decoder = new TextDecoder("utf-8");
|
|
116
|
+
if (bytes.byteLength < HEADER_SIZE)
|
|
117
|
+
return undefined;
|
|
118
|
+
const version = view.getUint16(4, true);
|
|
119
|
+
if (version < 3)
|
|
120
|
+
return undefined;
|
|
121
|
+
const provenanceOffset = view.getUint32(28, true);
|
|
122
|
+
if (provenanceOffset === 0 || provenanceOffset >= bytes.byteLength)
|
|
123
|
+
return undefined;
|
|
124
|
+
try {
|
|
125
|
+
const jsonLen = view.getUint32(provenanceOffset, true);
|
|
126
|
+
const jsonStr = decoder.decode(bytes.subarray(provenanceOffset + 4, provenanceOffset + 4 + jsonLen));
|
|
127
|
+
return JSON.parse(jsonStr);
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return undefined;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=fst-deserialize-web.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-deserialize-web.js","sourceRoot":"","sources":["../fst-deserialize-web.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAG7C,MAAM,WAAW,GAAG,EAAE,CAAA;AACtB,MAAM,eAAe,GAAG,CAAC,CAAA;AACzB,MAAM,gBAAgB,GAAG,EAAE,CAAA;AAC3B,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA,CAAC,UAAU;AACvD,MAAM,WAAW,GAAG,CAAC,CAAA;AAErB,MAAM,eAAe,GAA2B;IAC/C,SAAS;IACT,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,YAAY;IACZ,SAAS;IACT,eAAe;IACf,YAAY;IACZ,QAAQ;IACR,YAAY;IACZ,cAAc;CACd,CAAA;AAED,MAAM,UAAU,iBAAiB,CAAC,KAA+B;IAChE,MAAM,KAAK,GAAG,KAAK,YAAY,WAAW,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1E,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAA;IAC3E,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,CAAC,CAAA;IAExC,IAAI,KAAK,CAAC,UAAU,GAAG,WAAW;QAAE,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAA;IAEtF,IACC,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC;QAC3B,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC;QAC3B,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC;QAC3B,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,CAAC,CAAC,CAAC,EAC1B,CAAC;QACF,MAAM,IAAI,KAAK,CAAC,oBAAoB,CAAC,CAAA;IACtC,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;IACvC,IAAI,OAAO,GAAG,CAAC,IAAI,OAAO,GAAG,WAAW,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,eAAe,OAAO,6BAA6B,WAAW,GAAG,CAAC,CAAA;IACnF,CAAC;IACD,MAAM,IAAI,GAAG,OAAO,IAAI,CAAC,CAAA;IAEzB,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;IAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;IAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;IAE5C,IAAI,GAAG,GAAG,WAAW,CAAA;IAErB,uBAAuB;IACvB,MAAM,UAAU,GAAG,IAAI,WAAW,CAAC,WAAW,GAAG,CAAC,CAAC,CAAA;IACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAA;QACzC,GAAG,IAAI,CAAC,CAAA;IACT,CAAC;IACD,MAAM,YAAY,GAAG,GAAG,CAAA;IACxB,MAAM,OAAO,GAAa,IAAI,KAAK,CAAC,WAAW,CAAC,CAAA;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,YAAY,GAAG,UAAU,CAAC,CAAC,CAAE,CAAA;QAC3C,MAAM,GAAG,GAAG,YAAY,GAAG,UAAU,CAAC,CAAC,GAAG,CAAC,CAAE,CAAA;QAC7C,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAA;IACxD,CAAC;IACD,GAAG,IAAI,WAAW,CAAA;IAElB,sBAAsB;IACtB,MAAM,cAAc,GAAG,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,CAAA;IAC7C,MAAM,eAAe,GAAG,GAAG,CAAA;IAC3B,MAAM,cAAc,GAAG,eAAe,GAAG,UAAU,GAAG,cAAc,CAAA;IACpE,MAAM,eAAe,GAAG,cAAc,GAAG,SAAS,GAAG,eAAe,CAAA;IAEpE,MAAM,KAAK,GAAc,IAAI,KAAK,CAAC,UAAU,CAAC,CAAA;IAE9C,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,UAAU,EAAE,EAAE,EAAE,EAAE,CAAC;QACxC,MAAM,EAAE,GAAG,eAAe,GAAG,EAAE,GAAG,cAAc,CAAA;QAChD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;QAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;QAC/C,MAAM,iBAAiB,GAAG,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;QACpG,MAAM,kBAAkB,GAAG,OAAO,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC,CAAA;QAEvG,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkB,CAAA;QACvC,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,iBAAiB,EAAE,EAAE,EAAE,EAAE,CAAC;YAC/C,MAAM,EAAE,GAAG,cAAc,GAAG,CAAC,SAAS,GAAG,EAAE,CAAC,GAAG,eAAe,CAAA;YAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;YAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAA;YAC3C,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,SAAS,CAAE,EAAE,MAAM,CAAC,CAAA;QACvC,CAAC;QAED,MAAM,MAAM,GAAiB,IAAI,KAAK,CAAC,kBAAkB,CAAC,CAAA;QAC1D,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,kBAAkB,EAAE,EAAE,EAAE,EAAE,CAAC;YAChD,MAAM,EAAE,GAAG,eAAe,GAAG,CAAC,UAAU,GAAG,EAAE,CAAC,GAAG,gBAAgB,CAAA;YACjE,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAC,CAAA;YACtC,MAAM,WAAW,GAAa,EAAE,CAAA;YAChC,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,QAAQ,EAAE,EAAE,EAAE,EAAE,CAAC;gBACtC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAC,CAAA;YACzD,CAAC;YACD,MAAM,aAAa,GAAG,IAAI;gBACzB,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC;gBAChC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;YAE1E,MAAM,CAAC,EAAE,CAAC,GAAG;gBACZ,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC;gBAC/B,SAAS,EAAE,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,UAAU;gBAC/D,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,IAAI,CAAC,CAAE;gBAC5C,UAAU,EAAE,aAAa;gBACzB,GAAG,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC;gBACnC,GAAG,EAAE,IAAI,CAAC,UAAU,CAAC,EAAE,GAAG,EAAE,EAAE,IAAI,CAAC;gBACnC,WAAW;aACX,CAAA;QACF,CAAC;QAED,KAAK,CAAC,EAAE,CAAC,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAA;IAC9B,CAAC;IAED,OAAO,UAAU,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;AACnC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAAC,KAA+B;IACnE,MAAM,KAAK,GAAG,KAAK,YAAY,WAAW,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;IAC1E,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAA;IAC3E,MAAM,OAAO,GAAG,IAAI,WAAW,CAAC,OAAO,CAAC,CAAA;IAExC,IAAI,KAAK,CAAC,UAAU,GAAG,WAAW;QAAE,OAAO,SAAS,CAAA;IACpD,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;IACvC,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,SAAS,CAAA;IACjC,MAAM,gBAAgB,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,EAAE,IAAI,CAAC,CAAA;IACjD,IAAI,gBAAgB,KAAK,CAAC,IAAI,gBAAgB,IAAI,KAAK,CAAC,UAAU;QAAE,OAAO,SAAS,CAAA;IACpF,IAAI,CAAC;QACJ,MAAM,OAAO,GAAG,IAAI,CAAC,SAAS,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAA;QACtD,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,gBAAgB,GAAG,CAAC,EAAE,gBAAgB,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,CAAA;QACpG,OAAO,IAAI,CAAC,KAAK,CAAC,OAAO,CAAkB,CAAA;IAC5C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* In-memory FST matcher. Built by `fst-builder.ts`, queried at runtime for emission priors and CLI
|
|
7
|
+
* introspection. The structure is a deterministic trie over normalized tokens with PlaceEntry
|
|
8
|
+
* arrays at accepting states.
|
|
9
|
+
*/
|
|
10
|
+
import type { FstContinuation, FstMatchResult, FstQueryResult, PlaceEntry } from "./fst-types.js";
|
|
11
|
+
interface FstNode {
|
|
12
|
+
edges: Map<string, number>;
|
|
13
|
+
places: PlaceEntry[];
|
|
14
|
+
}
|
|
15
|
+
export declare class FstMatcher {
|
|
16
|
+
private nodes;
|
|
17
|
+
constructor(nodes: FstNode[]);
|
|
18
|
+
get stateCount(): number;
|
|
19
|
+
get placeCount(): number;
|
|
20
|
+
walk(tokens: string[]): FstMatchResult | null;
|
|
21
|
+
walkFrom(prev: FstMatchResult, token: string): FstMatchResult | null;
|
|
22
|
+
accepting(stateId: number): PlaceEntry[];
|
|
23
|
+
continuations(stateId: number): FstContinuation[];
|
|
24
|
+
query(text: string): FstQueryResult;
|
|
25
|
+
get nodeCount(): number;
|
|
26
|
+
/** Expose the internal node array for serialization. */
|
|
27
|
+
toNodes(): readonly FstNode[];
|
|
28
|
+
static fromNodes(nodes: FstNode[]): FstMatcher;
|
|
29
|
+
}
|
|
30
|
+
/** Normalize text into FST tokens: lowercase, NFKC, strip punctuation, split on whitespace. */
|
|
31
|
+
export declare function normalizeTokens(text: string): string[];
|
|
32
|
+
export type { FstNode };
|
|
33
|
+
//# sourceMappingURL=fst-matcher.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fst-matcher.d.ts","sourceRoot":"","sources":["../fst-matcher.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAA;AAEjG,UAAU,OAAO;IAChB,KAAK,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC1B,MAAM,EAAE,UAAU,EAAE,CAAA;CACpB;AAED,qBAAa,UAAU;IACtB,OAAO,CAAC,KAAK,CAAW;gBAEZ,KAAK,EAAE,OAAO,EAAE;IAI5B,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,UAAU,IAAI,MAAM,CAIvB;IAED,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,cAAc,GAAG,IAAI;IAa7C,QAAQ,CAAC,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,MAAM,GAAG,cAAc,GAAG,IAAI;IASpE,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,UAAU,EAAE;IAIxC,aAAa,CAAC,OAAO,EAAE,MAAM,GAAG,eAAe,EAAE;IAejD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc;IA8BnC,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,wDAAwD;IACxD,OAAO,IAAI,SAAS,OAAO,EAAE;IAI7B,MAAM,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,EAAE,GAAG,UAAU;CAG9C;AAED,+FAA+F;AAC/F,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAOtD;AAED,YAAY,EAAE,OAAO,EAAE,CAAA"}
|