@mailwoman/neural 2.2.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/out/anchor-inference.d.ts +57 -0
  2. package/out/anchor-inference.d.ts.map +1 -0
  3. package/out/anchor-inference.js +94 -0
  4. package/out/anchor-inference.js.map +1 -0
  5. package/out/browser.d.ts +2 -0
  6. package/out/browser.d.ts.map +1 -1
  7. package/out/browser.js +4 -0
  8. package/out/browser.js.map +1 -1
  9. package/out/classifier.d.ts +62 -2
  10. package/out/classifier.d.ts.map +1 -1
  11. package/out/classifier.js +78 -17
  12. package/out/classifier.js.map +1 -1
  13. package/out/fst-prior.d.ts +71 -0
  14. package/out/fst-prior.d.ts.map +1 -0
  15. package/out/fst-prior.js +173 -0
  16. package/out/fst-prior.js.map +1 -0
  17. package/out/index.d.ts +3 -0
  18. package/out/index.d.ts.map +1 -1
  19. package/out/index.js +3 -0
  20. package/out/index.js.map +1 -1
  21. package/out/labels.d.ts +3 -0
  22. package/out/labels.d.ts.map +1 -1
  23. package/out/labels.js +13 -0
  24. package/out/labels.js.map +1 -1
  25. package/out/onnx-runner.d.ts +8 -1
  26. package/out/onnx-runner.d.ts.map +1 -1
  27. package/out/onnx-runner.js +31 -1
  28. package/out/onnx-runner.js.map +1 -1
  29. package/out/postcode-anchor.d.ts +117 -0
  30. package/out/postcode-anchor.d.ts.map +1 -0
  31. package/out/postcode-anchor.js +269 -0
  32. package/out/postcode-anchor.js.map +1 -0
  33. package/out/postcode-binary-resolver.d.ts +60 -0
  34. package/out/postcode-binary-resolver.d.ts.map +1 -0
  35. package/out/postcode-binary-resolver.js +208 -0
  36. package/out/postcode-binary-resolver.js.map +1 -0
  37. package/out/postcode-repair.d.ts +65 -0
  38. package/out/postcode-repair.d.ts.map +1 -0
  39. package/out/postcode-repair.js +171 -0
  40. package/out/postcode-repair.js.map +1 -0
  41. package/out/proposal-classifier.d.ts.map +1 -1
  42. package/out/proposal-classifier.js +3 -1
  43. package/out/proposal-classifier.js.map +1 -1
  44. package/out/query-shape-prior.d.ts +12 -0
  45. package/out/query-shape-prior.d.ts.map +1 -1
  46. package/out/query-shape-prior.js +132 -2
  47. package/out/query-shape-prior.js.map +1 -1
  48. package/out/street-morphology-prior.d.ts +56 -0
  49. package/out/street-morphology-prior.d.ts.map +1 -0
  50. package/out/street-morphology-prior.js +159 -0
  51. package/out/street-morphology-prior.js.map +1 -0
  52. package/out/unit-repair.d.ts +46 -0
  53. package/out/unit-repair.d.ts.map +1 -0
  54. package/out/unit-repair.js +147 -0
  55. package/out/unit-repair.js.map +1 -0
  56. package/out/vitest.config.d.ts.map +1 -1
  57. package/out/vitest.config.js +3 -0
  58. package/out/vitest.config.js.map +1 -1
  59. package/out/weights.d.ts +27 -3
  60. package/out/weights.d.ts.map +1 -1
  61. package/out/weights.js +46 -2
  62. package/out/weights.js.map +1 -1
  63. package/package.json +6 -2
@@ -0,0 +1,269 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Postcode anchor — the first member of the "anchor-based parsing" family (Direction D, #240). See
7
+ * `docs/articles/plan/2026-06-03-anchor-based-parsing.md`.
8
+ *
9
+ * A postcode is the most information-dense token in an address: a hierarchical geo-encoding that
10
+ * places a query on Earth far more cheaply than the rest of the parse. This module lifts the
11
+ * postcode out of the BIO sequence-labelling problem and treats it as a structured anchor. It
12
+ * runs the same per-country shape regexes the decoder repair pass uses ({@link collectMatches}),
13
+ * resolves each shaped span against a postcode gazetteer, and returns a SOFT signal: a country
14
+ * posterior plus a calibrated confidence. It never decides a postcode's identity on its own — it
15
+ * reports "this string is (or is not) a real postcode, in these countries, near here", and leaves
16
+ * the parser to weigh that against the surrounding tokens.
17
+ *
18
+ * Two design rules carried from the DeepSeek consult
19
+ * (`.agents/skills/deepseek-consult/ds-pc-turn{1,2}-postcode-anchor.txt`):
20
+ *
21
+ * - The country posterior is UNIFORM over the countries a string actually exists in. We never weight
22
+ * by per-country postcode volume, because that skews "75001" toward whichever country owns
23
+ * more 5-digit codes — the exact bias the anchor exists to avoid. Disambiguation is the
24
+ * parser's job, using script, city tokens, and user locale.
25
+ * - Confidence combines gazetteer MEMBERSHIP with country AMBIGUITY. A string that matches a postcode
26
+ * regex but exists in no gazetteer (a bare `27`, or a 5-digit house number that is not a real
27
+ * code) gets confidence 0, so the parser treats it as a house number. A real-but-ambiguous
28
+ * code (`75001` in FR and US) gets moderate confidence. A real, single-country code gets
29
+ * 1.0.
30
+ */
31
+ import { candidateSystemsForPostcode } from "@mailwoman/codex";
32
+ import { isGermanStreetToken } from "@mailwoman/codex/de";
33
+ import { isFrenchStreetWord } from "@mailwoman/codex/fr";
34
+ import { isStreetSuffixToken, isUsStateAbbreviation } from "@mailwoman/codex/us";
35
+ import { collectMatches } from "./postcode-repair.js";
36
+ /**
37
+ * Entropy cap for the confidence formula: a k-way country split saturates toward 0 confidence at
38
+ * k=10.
39
+ */
40
+ const MAX_COUNTRIES = 10;
41
+ /** A fuzzy (typo-corrected) match is less certain than an exact one — scale its confidence down. */
42
+ const FUZZY_PENALTY = 0.6;
43
+ /**
44
+ * Class-aware edit-distance-1 variants of a postcode string: deletions, same-class substitutions
45
+ * (digit↔digit, letter↔letter), same-class insertions, and adjacent transpositions. Restricting
46
+ * substitutions/insertions to the character's class mirrors how humans mistype or OCR a postcode (a
47
+ * digit becomes another digit, not a letter) and keeps the candidate set small.
48
+ */
49
+ export function editDistance1Variants(s) {
50
+ const classOf = (ch) => /[0-9]/.test(ch) ? "0123456789" : /[A-Z]/.test(ch) ? "ABCDEFGHIJKLMNOPQRSTUVWXYZ" : "";
51
+ const variants = new Set();
52
+ for (let i = 0; i < s.length; i++)
53
+ variants.add(s.slice(0, i) + s.slice(i + 1)); // deletions
54
+ for (let i = 0; i < s.length; i++) {
55
+ for (const c of classOf(s[i]))
56
+ if (c !== s[i])
57
+ variants.add(s.slice(0, i) + c + s.slice(i + 1)); // substitutions
58
+ }
59
+ for (let i = 0; i <= s.length; i++) {
60
+ for (const c of classOf(s[i] ?? s[i - 1] ?? ""))
61
+ variants.add(s.slice(0, i) + c + s.slice(i)); // insertions
62
+ }
63
+ for (let i = 0; i + 1 < s.length; i++)
64
+ variants.add(s.slice(0, i) + s[i + 1] + s[i] + s.slice(i + 2)); // transpositions
65
+ variants.delete(s);
66
+ return [...variants];
67
+ }
68
+ /**
69
+ * Normalize a shaped span to the canonical gazetteer key: uppercase, collapse internal whitespace
70
+ * to a single space, and strip the German `D-` courtesy prefix (the shards store `68161`, not
71
+ * `D-68161`).
72
+ */
73
+ export function normalizePostcode(raw) {
74
+ let s = raw.trim().toUpperCase().replace(/\s+/g, " ");
75
+ if (/^D-\d{5}$/.test(s))
76
+ s = s.slice(2); // German courtesy prefix: D-68161 → 68161
77
+ if (/^\d{4} [A-Z]{2}$/.test(s))
78
+ s = s.replace(" ", ""); // Dutch: gazetteer stores 1012LM, not 1012 LM
79
+ return s;
80
+ }
81
+ /**
82
+ * The GB outward code of a normalized unit postcode — the part before the space when the inward
83
+ * half is `\d[A-Z]{2}` (`SO4 3RX` → `SO4`). The GB gazetteer is aggregated to outward codes (2.7M
84
+ * units is too large + too fine for an anchor), so the extractor retries the outward code when a
85
+ * full GB unit misses. Returns `null` for any string that isn't a GB unit postcode (so it never
86
+ * fires elsewhere).
87
+ */
88
+ export function gbOutwardCode(normalized) {
89
+ const sp = normalized.indexOf(" ");
90
+ if (sp < 1)
91
+ return null;
92
+ return /^\d[A-Z]{2}$/.test(normalized.slice(sp + 1)) ? normalized.slice(0, sp) : null;
93
+ }
94
+ /**
95
+ * `1 - log2(k)/log2(MAX_COUNTRIES)`, clamped to [0, 1]. k=1 → 1.0; k=2 → ~0.70; k≥MAX_COUNTRIES →
96
+ * 0.
97
+ */
98
+ function confidenceFromCountryCount(k) {
99
+ if (k <= 0)
100
+ return 0;
101
+ if (k === 1)
102
+ return 1;
103
+ const c = 1 - Math.log2(k) / Math.log2(MAX_COUNTRIES);
104
+ return Math.max(0, Math.min(1, c));
105
+ }
106
+ /**
107
+ * Confidence scale for a digit-only code that shares its segment with a street word. A house number
108
+ * and a 5-digit postcode are the same shape, so membership alone can't separate `12345 Main St`
109
+ * (house number that happens to be a real ZIP elsewhere) from `San Francisco 94105` (postcode). The
110
+ * structural tell is cheap and locale-general: house numbers sit beside the street, postcodes
111
+ * beside the city. We scale rather than zero — the gazetteer still vouches for the shape, so a lone
112
+ * code in a street-only line stays usable; the penalty just lets a real trailing postcode out-rank
113
+ * it.
114
+ */
115
+ const HOUSE_NUMBER_PENALTY = 0.2;
116
+ /**
117
+ * Standalone street-type words for the locales without a codex slice yet (ES/IT). US comes from
118
+ * `@mailwoman/codex/us`, German from `@mailwoman/codex/de`, French from `@mailwoman/codex/fr`; the
119
+ * Dutch compound suffixes are still inline below pending a `codex/nl` slice.
120
+ */
121
+ const NON_US_STREET_WORDS = new Set([
122
+ // Spanish
123
+ "calle",
124
+ "avenida",
125
+ "avda",
126
+ "plaza",
127
+ "paseo",
128
+ "camino",
129
+ "carrera",
130
+ "ronda",
131
+ // Italian
132
+ "via",
133
+ "viale",
134
+ "piazza",
135
+ "corso",
136
+ "largo",
137
+ "vicolo",
138
+ "strada",
139
+ "contrada",
140
+ ]);
141
+ /** Dutch compound street suffixes — matched against a token's tail (pending a `codex/nl` slice). */
142
+ const NL_STREET_SUFFIXES = ["straat", "laan", "plein", "gracht", "kade", "dijk", "steeg", "dreef"];
143
+ /**
144
+ * True when a token denotes a street. US suffixes come from the USPS Pub-28 table in
145
+ * `@mailwoman/codex/us` (complete, so `Trl`/`Holw`/`Xing` all match), EXCEPT the abbreviations that
146
+ * collide with a state code — `KY` (Key vs Kentucky), `PR` (Prairie vs Puerto Rico) — which sit in
147
+ * the postcode's own `City, ST ZIP` segment. German compounds come from `@mailwoman/codex/de`
148
+ * ({@link isGermanStreetToken}), whose suffix set already excludes the place-name endings (`-berg`,
149
+ * `-burg`, `-dorf`) that would otherwise flag a city token. French voie words come from
150
+ * `@mailwoman/codex/fr` ({@link isFrenchStreetWord}). ES/IT and Dutch fall back to the inline
151
+ * lists.
152
+ *
153
+ * `systems` GATES which vocabularies are consulted — only the systems the postcode plausibly
154
+ * belongs to (its gazetteer membership, e.g. a US-only ZIP gates to `{us}` and never checks the
155
+ * German or French vocab). This is what lets the check scale to 15-20 systems without a
156
+ * cross-locale collision (German `-ring` vs English `spring`): an unrelated system's vocabulary is
157
+ * simply never asked. The gate carries lowercase system/locale tags (`us`, `de`, `fr`, `es`, `it`,
158
+ * `nl`).
159
+ */
160
+ function looksLikeStreetWord(token, systems) {
161
+ const t = token.toLowerCase().replace(/[^\p{L}]/gu, "");
162
+ if (t.length < 2)
163
+ return false;
164
+ if (systems.has("us") && isStreetSuffixToken(t) && !isUsStateAbbreviation(t))
165
+ return true;
166
+ if (systems.has("de") && isGermanStreetToken(t))
167
+ return true;
168
+ if (systems.has("fr") && isFrenchStreetWord(t))
169
+ return true;
170
+ if ((systems.has("es") || systems.has("it")) && NON_US_STREET_WORDS.has(t))
171
+ return true;
172
+ if (systems.has("nl"))
173
+ return NL_STREET_SUFFIXES.some((s) => t.length > s.length && t.endsWith(s));
174
+ return false;
175
+ }
176
+ /**
177
+ * Position-aware confidence factor for a postcode span: `1` for anything that cannot be confused
178
+ * with a house number, and {@link HOUSE_NUMBER_PENALTY} for a digit-only code sharing its
179
+ * comma-delimited segment with a street word. This is the structural prior that lets the anchor
180
+ * tell a leading `12345 Main St` house number from a trailing `San Francisco 94105` postcode with
181
+ * no model in the loop — and lets a consumer pick the right span by confidence instead of by raw
182
+ * position.
183
+ *
184
+ * `systems` narrows the street vocabularies to the ones this code plausibly belongs to (its
185
+ * gazetteer membership, or — for a code in no gazetteer — the format-shape candidates from codex).
186
+ */
187
+ function positionFactor(text, start, normalized, systems) {
188
+ if (!/^\d+$/.test(normalized))
189
+ return 1; // only digit-only codes collide with house numbers
190
+ const segStart = text.lastIndexOf(",", start - 1) + 1;
191
+ let segEnd = text.indexOf(",", start);
192
+ if (segEnd < 0)
193
+ segEnd = text.length;
194
+ for (const token of text.slice(segStart, segEnd).split(/\s+/)) {
195
+ if (looksLikeStreetWord(token, systems))
196
+ return HOUSE_NUMBER_PENALTY;
197
+ }
198
+ return 1;
199
+ }
200
+ /**
201
+ * Extract postcode anchors from raw text. For each postcode-shaped span, resolve it against the
202
+ * gazetteer and emit a soft anchor (country posterior + confidence). Spans that match a shape but
203
+ * exist in no gazetteer are still returned, with an empty posterior and confidence 0 — an explicit
204
+ * "looks like a postcode, but isn't one" so the caller can see the extractor fired and chose not to
205
+ * anchor.
206
+ */
207
+ export function extractPostcodeAnchors(text, resolver, opts = {}) {
208
+ const anchors = [];
209
+ for (const match of collectMatches(text)) {
210
+ const spanText = text.slice(match.start, match.end);
211
+ const normalized = normalizePostcode(spanText);
212
+ // Exact first; then the GB outward fallback (structural, not a guess); then edit-distance-1.
213
+ let hits = resolver.lookup(normalized);
214
+ let matchType = hits.length > 0 ? "exact" : "none";
215
+ if (matchType === "none") {
216
+ const outward = gbOutwardCode(normalized);
217
+ if (outward) {
218
+ const outwardHits = resolver.lookup(outward);
219
+ if (outwardHits.length > 0) {
220
+ hits = outwardHits;
221
+ matchType = "outward";
222
+ }
223
+ }
224
+ }
225
+ if (matchType === "none" && opts.fuzzy) {
226
+ const fuzzyHits = [];
227
+ for (const variant of editDistance1Variants(normalized)) {
228
+ for (const h of resolver.lookup(variant))
229
+ fuzzyHits.push(h);
230
+ }
231
+ if (fuzzyHits.length > 0) {
232
+ hits = fuzzyHits;
233
+ matchType = "fuzzy";
234
+ }
235
+ }
236
+ // Membership: distinct countries the postcode exists in (regardless of whether we have a centroid).
237
+ const countries = [...new Set(hits.map((h) => h.country))].sort();
238
+ const k = countries.length;
239
+ const posterior = {};
240
+ for (const c of countries)
241
+ posterior[c] = 1 / k;
242
+ // Placement: one representative coordinate-bearing hit per country (the first with real coords).
243
+ const candidates = [];
244
+ for (const c of countries) {
245
+ const placed = hits.find((h) => h.country === c && h.lat !== 0 && h.lon !== 0);
246
+ if (placed)
247
+ candidates.push(placed);
248
+ }
249
+ // Gate the street-word check to the systems this code plausibly belongs to: its gazetteer
250
+ // membership when known (precise — a US-only ZIP never checks the German vocab), else the
251
+ // format-shape candidates from codex (for a code in no gazetteer; its confidence is 0 anyway).
252
+ const systems = countries.length > 0
253
+ ? new Set(countries.map((c) => c.toLowerCase()))
254
+ : new Set(candidateSystemsForPostcode(normalized));
255
+ const position = positionFactor(text, match.start, normalized, systems);
256
+ const confidence = confidenceFromCountryCount(k) * (matchType === "fuzzy" ? FUZZY_PENALTY : 1) * position;
257
+ anchors.push({
258
+ span: { text: spanText, start: match.start, end: match.end },
259
+ normalized,
260
+ candidates,
261
+ posterior,
262
+ confidence,
263
+ matchType,
264
+ positionFactor: position,
265
+ });
266
+ }
267
+ return anchors;
268
+ }
269
+ //# sourceMappingURL=postcode-anchor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postcode-anchor.js","sourceRoot":"","sources":["../postcode-anchor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AAEH,OAAO,EAAE,2BAA2B,EAAE,MAAM,kBAAkB,CAAA;AAC9D,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AACzD,OAAO,EAAE,kBAAkB,EAAE,MAAM,qBAAqB,CAAA;AACxD,OAAO,EAAE,mBAAmB,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAA;AAChF,OAAO,EAAE,cAAc,EAAE,MAAM,sBAAsB,CAAA;AA2DrD;;;GAGG;AACH,MAAM,aAAa,GAAG,EAAE,CAAA;AAExB,oGAAoG;AACpG,MAAM,aAAa,GAAG,GAAG,CAAA;AAEzB;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CAAC,CAAS;IAC9C,MAAM,OAAO,GAAG,CAAC,EAAU,EAAU,EAAE,CACtC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B,CAAC,CAAC,CAAC,EAAE,CAAA;IACvF,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAA;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA,CAAC,YAAY;IAC5F,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC;YAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA,CAAC,gBAAgB;IAClH,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;YAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA,CAAC,aAAa;IAC5G,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA,CAAC,iBAAiB;IACvH,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAA;IAClB,OAAO,CAAC,GAAG,QAAQ,CAAC,CAAA;AACrB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,GAAW;IAC5C,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;IACrD,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA,CAAC,0CAA0C;IAClF,IAAI,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC;QAAE,CAAC,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,CAAC,CAAA,CAAC,8CAA8C;IACrG,OAAO,CAAC,CAAA;AACT,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,aAAa,CAAC,UAAkB;IAC/C,MAAM,EAAE,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAClC,IAAI,EAAE,GAAG,CAAC;QAAE,OAAO,IAAI,CAAA;IACvB,OAAO,cAAc,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAA;AACtF,CAAC;AAED;;;GAGG;AACH,SAAS,0BAA0B,CAAC,CAAS;IAC5C,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,CAAC,CAAA;IACpB,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,CAAC,CAAA;IACrB,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAA;IACrD,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAA;AACnC,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,oBAAoB,GAAG,GAAG,CAAA;AAEhC;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IACnC,UAAU;IACV,OAAO;IACP,SAAS;IACT,MAAM;IACN,OAAO;IACP,OAAO;IACP,QAAQ;IACR,SAAS;IACT,OAAO;IACP,UAAU;IACV,KAAK;IACL,OAAO;IACP,QAAQ;IACR,OAAO;IACP,OAAO;IACP,QAAQ;IACR,QAAQ;IACR,UAAU;CACV,CAAC,CAAA;AAEF,oGAAoG;AACpG,MAAM,kBAAkB,GAAG,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAA;AAElG;;;;;;;;;;;;;;;;GAgBG;AACH,SAAS,mBAAmB,CAAC,KAAa,EAAE,OAA4B;IACvE,MAAM,CAAC,GAAG,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAA;IACvD,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IAC9B,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,mBAAmB,CAAC,CAAC,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IACzF,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,mBAAmB,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IAC5D,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,kBAAkB,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IAC3D,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC;QAAE,OAAO,IAAI,CAAA;IACvF,IAAI,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC;QAAE,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAA;IAClG,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;;;;;;;;;GAUG;AACH,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAE,UAAkB,EAAE,OAA4B;IACpG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,UAAU,CAAC;QAAE,OAAO,CAAC,CAAA,CAAC,mDAAmD;IAC3F,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,EAAE,KAAK,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;IACrD,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;IACrC,IAAI,MAAM,GAAG,CAAC;QAAE,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IACpC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC;QAC/D,IAAI,mBAAmB,CAAC,KAAK,EAAE,OAAO,CAAC;YAAE,OAAO,oBAAoB,CAAA;IACrE,CAAC;IACD,OAAO,CAAC,CAAA;AACT,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,sBAAsB,CACrC,IAAY,EACZ,QAA0B,EAC1B,OAAmC,EAAE;IAErC,MAAM,OAAO,GAAqB,EAAE,CAAA;IAEpC,KAAK,MAAM,KAAK,IAAI,cAAc,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAA;QACnD,MAAM,UAAU,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAA;QAE9C,6FAA6F;QAC7F,IAAI,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,CAAA;QACtC,IAAI,SAAS,GAAgC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAA;QAC/E,IAAI,SAAS,KAAK,MAAM,EAAE,CAAC;YAC1B,MAAM,OAAO,GAAG,aAAa,CAAC,UAAU,CAAC,CAAA;YACzC,IAAI,OAAO,EAAE,CAAC;gBACb,MAAM,WAAW,GAAG,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;gBAC5C,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC5B,IAAI,GAAG,WAAW,CAAA;oBAClB,SAAS,GAAG,SAAS,CAAA;gBACtB,CAAC;YACF,CAAC;QACF,CAAC;QACD,IAAI,SAAS,KAAK,MAAM,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACxC,MAAM,SAAS,GAAoB,EAAE,CAAA;YACrC,KAAK,MAAM,OAAO,IAAI,qBAAqB,CAAC,UAAU,CAAC,EAAE,CAAC;gBACzD,KAAK,MAAM,CAAC,IAAI,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC;oBAAE,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;YAC5D,CAAC;YACD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,IAAI,GAAG,SAAS,CAAA;gBAChB,SAAS,GAAG,OAAO,CAAA;YACpB,CAAC;QACF,CAAC;QAED,oGAAoG;QACpG,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;QACjE,MAAM,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;QAE1B,MAAM,SAAS,GAA2B,EAAE,CAAA;QAC5C,KAAK,MAAM,CAAC,IAAI,SAAS;YAAE,SAAS,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;QAE/C,iGAAiG;QACjG,MAAM,UAAU,GAAoB,EAAE,CAAA;QACtC,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAA;YAC9E,IAAI,MAAM;gBAAE,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACpC,CAAC;QAED,0FAA0F;QAC1F,0FAA0F;QAC1F,+FAA+F;QAC/F,MAAM,OAAO,GACZ,SAAS,CAAC,MAAM,GAAG,CAAC;YACnB,CAAC,CAAC,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;YAChD,CAAC,CAAC,IAAI,GAAG,CAAS,2BAA2B,CAAC,UAAU,CAAC,CAAC,CAAA;QAC5D,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,KAAK,EAAE,UAAU,EAAE,OAAO,CAAC,CAAA;QACvE,MAAM,UAAU,GAAG,0BAA0B,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,KAAK,OAAO,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAA;QAEzG,OAAO,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE;YAC5D,UAAU;YACV,UAAU;YACV,SAAS;YACT,UAAU;YACV,SAAS;YACT,cAAc,EAAE,QAAQ;SACxB,CAAC,CAAA;IACH,CAAC;IAED,OAAO,OAAO,CAAA;AACf,CAAC"}
@@ -0,0 +1,60 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Browser-side postcode resolver for the anchor (#240). A pure-JS, zero-dependency
7
+ * `PostcodeResolver` backed by a compact flat binary instead of SQLite, so the postcode anchor
8
+ * runs in the WASM/browser parser behind the same `lookup()` seam as the server-side
9
+ * `WofPostcodeLookup`.
10
+ *
11
+ * This file owns BOTH ends of the format — `serializePostcodeBinary` (run in Node by
12
+ * `scripts/build-postcode-binary.ts`) and `PostcodeBinaryResolver` (run in the browser) — so the
13
+ * layout can never drift between writer and reader.
14
+ *
15
+ * Binary layout (little-endian): magic "PCB1" (4 bytes) u32 recordCount u8 countryCount, then
16
+ * countryCount × 2 ASCII bytes (the country table) u8 keyWidth (max postcode length in bytes)
17
+ * records recordCount × { key[keyWidth] ASCII right-padded with 0x00, u8 countryIdx, i16 latQ,
18
+ * i16 lonQ }, sorted by key bytes ascending. A postcode present in two countries appears as two
19
+ * adjacent records (same key, different countryIdx).
20
+ *
21
+ * Coordinates are quantized to i16: latQ = round(lat/90 × 32767), lonQ = round(lon/180 × 32767),
22
+ * giving ~300 m resolution — ample for a "which city/region" anchor. A record with latQ = lonQ =
23
+ * 0 means "known postcode, no centroid" (membership only), matching the SQLite resolver's
24
+ * convention.
25
+ */
26
+ import type { AnchorLookup } from "./anchor-inference.js";
27
+ import type { PostcodePlace } from "./postcode-anchor.js";
28
+ export interface PostcodeBinaryEntry {
29
+ postcode: string;
30
+ country: string;
31
+ lat: number;
32
+ lon: number;
33
+ }
34
+ /**
35
+ * Serialize postcode entries into the flat binary. Entries are sorted by (postcode, country) so
36
+ * equal postcodes land in adjacent records. Run in Node; consumed by
37
+ * {@link PostcodeBinaryResolver}.
38
+ */
39
+ export declare function serializePostcodeBinary(entries: readonly PostcodeBinaryEntry[]): Uint8Array;
40
+ /**
41
+ * Pure-JS, browser-safe postcode resolver over the flat binary. Implements the same `lookup()` seam
42
+ * as the SQLite `WofPostcodeLookup`, so `extractPostcodeAnchors` is agnostic to which backs it.
43
+ */
44
+ export declare class PostcodeBinaryResolver {
45
+ #private;
46
+ constructor(bytes: Uint8Array);
47
+ lookup(postcode: string): PostcodePlace[];
48
+ /**
49
+ * Decode the whole binary into an {@link AnchorLookup} (`Map<postcode, AnchorEntry>`) for the
50
+ * neural anchor channel (#239/#240): each postcode → a uniform posterior over its member
51
+ * countries
52
+ *
53
+ * - The mean of its non-zero centroids. This is the browser-side equivalent of the pilot
54
+ * postcode→anchor lookup the model trained against, built live from the shipped binary instead
55
+ * of a precomputed JSON. Records are stored sorted by (postcode, country), so equal keys are
56
+ * contiguous.
57
+ */
58
+ toAnchorLookup(): AnchorLookup;
59
+ }
60
+ //# sourceMappingURL=postcode-binary-resolver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postcode-binary-resolver.d.ts","sourceRoot":"","sources":["../postcode-binary-resolver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AACzD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAOzD,MAAM,WAAW,mBAAmB;IACnC,QAAQ,EAAE,MAAM,CAAA;IAChB,OAAO,EAAE,MAAM,CAAA;IACf,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;CACX;AAUD;;;;GAIG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,SAAS,mBAAmB,EAAE,GAAG,UAAU,CA2C3F;AAED;;;GAGG;AACH,qBAAa,sBAAsB;;gBAStB,KAAK,EAAE,UAAU;IA2B7B,MAAM,CAAC,QAAQ,EAAE,MAAM,GAAG,aAAa,EAAE;IA2BzC;;;;;;;;;OASG;IACH,cAAc,IAAI,YAAY;CA+C9B"}
@@ -0,0 +1,208 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Browser-side postcode resolver for the anchor (#240). A pure-JS, zero-dependency
7
+ * `PostcodeResolver` backed by a compact flat binary instead of SQLite, so the postcode anchor
8
+ * runs in the WASM/browser parser behind the same `lookup()` seam as the server-side
9
+ * `WofPostcodeLookup`.
10
+ *
11
+ * This file owns BOTH ends of the format — `serializePostcodeBinary` (run in Node by
12
+ * `scripts/build-postcode-binary.ts`) and `PostcodeBinaryResolver` (run in the browser) — so the
13
+ * layout can never drift between writer and reader.
14
+ *
15
+ * Binary layout (little-endian): magic "PCB1" (4 bytes) u32 recordCount u8 countryCount, then
16
+ * countryCount × 2 ASCII bytes (the country table) u8 keyWidth (max postcode length in bytes)
17
+ * records recordCount × { key[keyWidth] ASCII right-padded with 0x00, u8 countryIdx, i16 latQ,
18
+ * i16 lonQ }, sorted by key bytes ascending. A postcode present in two countries appears as two
19
+ * adjacent records (same key, different countryIdx).
20
+ *
21
+ * Coordinates are quantized to i16: latQ = round(lat/90 × 32767), lonQ = round(lon/180 × 32767),
22
+ * giving ~300 m resolution — ample for a "which city/region" anchor. A record with latQ = lonQ =
23
+ * 0 means "known postcode, no centroid" (membership only), matching the SQLite resolver's
24
+ * convention.
25
+ */
26
+ const MAGIC = 0x31_42_43_50; // "PCB1" little-endian (P=0x50 C=0x43 B=0x42 1=0x31)
27
+ const REC_TAIL = 5; // countryIdx(1) + latQ(2) + lonQ(2)
28
+ const LAT_Q = 32767 / 90;
29
+ const LON_Q = 32767 / 180;
30
+ /**
31
+ * Right-pad an ASCII postcode to `width` with NUL; `\0` sorts below any real char, so shorter keys
32
+ * order before longer ones with the same prefix, which is what we want.
33
+ */
34
+ function encodeKey(s, width, out, offset) {
35
+ for (let i = 0; i < width; i++)
36
+ out[offset + i] = i < s.length ? s.charCodeAt(i) & 0x7f : 0;
37
+ }
38
+ /**
39
+ * Serialize postcode entries into the flat binary. Entries are sorted by (postcode, country) so
40
+ * equal postcodes land in adjacent records. Run in Node; consumed by
41
+ * {@link PostcodeBinaryResolver}.
42
+ */
43
+ export function serializePostcodeBinary(entries) {
44
+ const sorted = [...entries].sort((a, b) => a.postcode < b.postcode
45
+ ? -1
46
+ : a.postcode > b.postcode
47
+ ? 1
48
+ : a.country < b.country
49
+ ? -1
50
+ : a.country > b.country
51
+ ? 1
52
+ : 0);
53
+ const countries = [...new Set(sorted.map((e) => e.country))].sort();
54
+ const countryIdx = new Map(countries.map((c, i) => [c, i]));
55
+ const keyWidth = sorted.reduce((m, e) => Math.max(m, e.postcode.length), 1);
56
+ const recSize = keyWidth + REC_TAIL;
57
+ const headerSize = 4 + 4 + 1 + countries.length * 2 + 1;
58
+ const buf = new Uint8Array(headerSize + sorted.length * recSize);
59
+ const view = new DataView(buf.buffer);
60
+ let o = 0;
61
+ view.setUint32(o, MAGIC, true);
62
+ o += 4;
63
+ view.setUint32(o, sorted.length, true);
64
+ o += 4;
65
+ buf[o++] = countries.length;
66
+ for (const c of countries) {
67
+ buf[o++] = c.charCodeAt(0) & 0x7f;
68
+ buf[o++] = c.charCodeAt(1) & 0x7f;
69
+ }
70
+ buf[o++] = keyWidth;
71
+ for (const e of sorted) {
72
+ encodeKey(e.postcode, keyWidth, buf, o);
73
+ o += keyWidth;
74
+ buf[o++] = countryIdx.get(e.country);
75
+ view.setInt16(o, Math.max(-32767, Math.min(32767, Math.round(e.lat * LAT_Q))), true);
76
+ o += 2;
77
+ view.setInt16(o, Math.max(-32767, Math.min(32767, Math.round(e.lon * LON_Q))), true);
78
+ o += 2;
79
+ }
80
+ return buf;
81
+ }
82
+ /**
83
+ * Pure-JS, browser-safe postcode resolver over the flat binary. Implements the same `lookup()` seam
84
+ * as the SQLite `WofPostcodeLookup`, so `extractPostcodeAnchors` is agnostic to which backs it.
85
+ */
86
+ export class PostcodeBinaryResolver {
87
+ #buf;
88
+ #view;
89
+ #count;
90
+ #countries;
91
+ #keyWidth;
92
+ #recSize;
93
+ #recBase;
94
+ constructor(bytes) {
95
+ this.#buf = bytes;
96
+ this.#view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
97
+ if (this.#view.getUint32(0, true) !== MAGIC)
98
+ throw new Error("postcode binary: bad magic");
99
+ this.#count = this.#view.getUint32(4, true);
100
+ let o = 8;
101
+ const countryCount = bytes[o++];
102
+ this.#countries = [];
103
+ for (let i = 0; i < countryCount; i++) {
104
+ this.#countries.push(String.fromCharCode(bytes[o], bytes[o + 1]));
105
+ o += 2;
106
+ }
107
+ this.#keyWidth = bytes[o++];
108
+ this.#recSize = this.#keyWidth + REC_TAIL;
109
+ this.#recBase = o;
110
+ }
111
+ /** Compare the keyWidth bytes of record `i` against a padded query key. */
112
+ #cmpKey(i, key) {
113
+ const base = this.#recBase + i * this.#recSize;
114
+ for (let j = 0; j < this.#keyWidth; j++) {
115
+ const d = this.#buf[base + j] - key[j];
116
+ if (d !== 0)
117
+ return d;
118
+ }
119
+ return 0;
120
+ }
121
+ lookup(postcode) {
122
+ if (postcode.length > this.#keyWidth)
123
+ return []; // longer than any stored key → impossible
124
+ const key = new Uint8Array(this.#keyWidth);
125
+ encodeKey(postcode, this.#keyWidth, key, 0);
126
+ // Binary search for the first record whose key >= the query.
127
+ let lo = 0;
128
+ let hi = this.#count;
129
+ while (lo < hi) {
130
+ const mid = (lo + hi) >>> 1;
131
+ if (this.#cmpKey(mid, key) < 0)
132
+ lo = mid + 1;
133
+ else
134
+ hi = mid;
135
+ }
136
+ // Collect the contiguous run of equal keys (one per country).
137
+ const out = [];
138
+ for (let i = lo; i < this.#count && this.#cmpKey(i, key) === 0; i++) {
139
+ const base = this.#recBase + i * this.#recSize + this.#keyWidth;
140
+ out.push({
141
+ country: this.#countries[this.#buf[base]],
142
+ lat: this.#view.getInt16(base + 1, true) / LAT_Q,
143
+ lon: this.#view.getInt16(base + 3, true) / LON_Q,
144
+ });
145
+ }
146
+ return out;
147
+ }
148
+ /**
149
+ * Decode the whole binary into an {@link AnchorLookup} (`Map<postcode, AnchorEntry>`) for the
150
+ * neural anchor channel (#239/#240): each postcode → a uniform posterior over its member
151
+ * countries
152
+ *
153
+ * - The mean of its non-zero centroids. This is the browser-side equivalent of the pilot
154
+ * postcode→anchor lookup the model trained against, built live from the shipped binary instead
155
+ * of a precomputed JSON. Records are stored sorted by (postcode, country), so equal keys are
156
+ * contiguous.
157
+ */
158
+ toAnchorLookup() {
159
+ const out = new Map();
160
+ let i = 0;
161
+ while (i < this.#count) {
162
+ // Decode this record's postcode key (ASCII, 0x00-right-padded).
163
+ const keyBase = this.#recBase + i * this.#recSize;
164
+ let postcode = "";
165
+ for (let j = 0; j < this.#keyWidth; j++) {
166
+ const c = this.#buf[keyBase + j];
167
+ if (c === 0)
168
+ break;
169
+ postcode += String.fromCharCode(c);
170
+ }
171
+ // Walk the contiguous run of records sharing this key (one per member country).
172
+ const posterior = {};
173
+ let latSum = 0;
174
+ let lonSum = 0;
175
+ let centroidCount = 0;
176
+ let k = i;
177
+ for (; k < this.#count; k++) {
178
+ const base = this.#recBase + k * this.#recSize;
179
+ let same = true;
180
+ for (let j = 0; j < this.#keyWidth; j++) {
181
+ if (this.#buf[base + j] !== this.#buf[keyBase + j]) {
182
+ same = false;
183
+ break;
184
+ }
185
+ }
186
+ if (!same)
187
+ break;
188
+ const tail = base + this.#keyWidth;
189
+ posterior[this.#countries[this.#buf[tail]]] = 1; // uniform — anchorFeatureVector renormalizes
190
+ const lat = this.#view.getInt16(tail + 1, true) / LAT_Q;
191
+ const lon = this.#view.getInt16(tail + 3, true) / LON_Q;
192
+ if (lat !== 0 || lon !== 0) {
193
+ latSum += lat;
194
+ lonSum += lon;
195
+ centroidCount++;
196
+ }
197
+ }
198
+ out.set(postcode, {
199
+ posterior,
200
+ lat: centroidCount ? latSum / centroidCount : 0,
201
+ lon: centroidCount ? lonSum / centroidCount : 0,
202
+ });
203
+ i = k;
204
+ }
205
+ return out;
206
+ }
207
+ }
208
+ //# sourceMappingURL=postcode-binary-resolver.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postcode-binary-resolver.js","sourceRoot":"","sources":["../postcode-binary-resolver.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AAKH,MAAM,KAAK,GAAG,aAAa,CAAA,CAAC,qDAAqD;AACjF,MAAM,QAAQ,GAAG,CAAC,CAAA,CAAC,oCAAoC;AACvD,MAAM,KAAK,GAAG,KAAK,GAAG,EAAE,CAAA;AACxB,MAAM,KAAK,GAAG,KAAK,GAAG,GAAG,CAAA;AASzB;;;GAGG;AACH,SAAS,SAAS,CAAC,CAAS,EAAE,KAAa,EAAE,GAAe,EAAE,MAAc;IAC3E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE;QAAE,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;AAC5F,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAuC;IAC9E,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACzC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ;QACtB,CAAC,CAAC,CAAC,CAAC;QACJ,CAAC,CAAC,CAAC,CAAC,QAAQ,GAAG,CAAC,CAAC,QAAQ;YACxB,CAAC,CAAC,CAAC;YACH,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO;gBACtB,CAAC,CAAC,CAAC,CAAC;gBACJ,CAAC,CAAC,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,OAAO;oBACtB,CAAC,CAAC,CAAC;oBACH,CAAC,CAAC,CAAC,CACP,CAAA;IACD,MAAM,SAAS,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAA;IACnE,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;IAC3D,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3E,MAAM,OAAO,GAAG,QAAQ,GAAG,QAAQ,CAAA;IAEnC,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,GAAG,CAAC,CAAA;IACvD,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,UAAU,GAAG,MAAM,CAAC,MAAM,GAAG,OAAO,CAAC,CAAA;IAChE,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAErC,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,CAAA;IAC9B,CAAC,IAAI,CAAC,CAAA;IACN,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAA;IACtC,CAAC,IAAI,CAAC,CAAA;IACN,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;IAC3B,KAAK,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC3B,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;QACjC,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;IAClC,CAAC;IACD,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAA;IAEnB,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,SAAS,CAAC,CAAC,CAAC,QAAQ,EAAE,QAAQ,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;QACvC,CAAC,IAAI,QAAQ,CAAA;QACb,GAAG,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAE,CAAA;QACrC,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;QACpF,CAAC,IAAI,CAAC,CAAA;QACN,IAAI,CAAC,QAAQ,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;QACpF,CAAC,IAAI,CAAC,CAAA;IACP,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;GAGG;AACH,MAAM,OAAO,sBAAsB;IACzB,IAAI,CAAY;IAChB,KAAK,CAAU;IACf,MAAM,CAAQ;IACd,UAAU,CAAU;IACpB,SAAS,CAAQ;IACjB,QAAQ,CAAQ;IAChB,QAAQ,CAAQ;IAEzB,YAAY,KAAiB;QAC5B,IAAI,CAAC,IAAI,GAAG,KAAK,CAAA;QACjB,IAAI,CAAC,KAAK,GAAG,IAAI,QAAQ,CAAC,KAAK,CAAC,MAAM,EAAE,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAA;QAC3E,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,KAAK,KAAK;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAA;QAC1F,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;QAC3C,IAAI,CAAC,GAAG,CAAC,CAAA;QACT,MAAM,YAAY,GAAG,KAAK,CAAC,CAAC,EAAE,CAAE,CAAA;QAChC,IAAI,CAAC,UAAU,GAAG,EAAE,CAAA;QACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,KAAK,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,CAAA;YACnE,CAAC,IAAI,CAAC,CAAA;QACP,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC,CAAC,EAAE,CAAE,CAAA;QAC5B,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAA;QACzC,IAAI,CAAC,QAAQ,GAAG,CAAC,CAAA;IAClB,CAAC;IAED,2EAA2E;IAC3E,OAAO,CAAC,CAAS,EAAE,GAAe;QACjC,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAA;QAC9C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAE,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YACxC,IAAI,CAAC,KAAK,CAAC;gBAAE,OAAO,CAAC,CAAA;QACtB,CAAC;QACD,OAAO,CAAC,CAAA;IACT,CAAC;IAED,MAAM,CAAC,QAAgB;QACtB,IAAI,QAAQ,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAA,CAAC,0CAA0C;QAC1F,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;QAC1C,SAAS,CAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,CAAA;QAE3C,6DAA6D;QAC7D,IAAI,EAAE,GAAG,CAAC,CAAA;QACV,IAAI,EAAE,GAAG,IAAI,CAAC,MAAM,CAAA;QACpB,OAAO,EAAE,GAAG,EAAE,EAAE,CAAC;YAChB,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,CAAA;YAC3B,IAAI,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC;gBAAE,EAAE,GAAG,GAAG,GAAG,CAAC,CAAA;;gBACvC,EAAE,GAAG,GAAG,CAAA;QACd,CAAC;QAED,8DAA8D;QAC9D,MAAM,GAAG,GAAoB,EAAE,CAAA;QAC/B,KAAK,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,OAAO,CAAC,CAAC,EAAE,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACrE,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAA;YAC/D,GAAG,CAAC,IAAI,CAAC;gBACR,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAE,CAAE;gBAC3C,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK;gBAChD,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK;aAChD,CAAC,CAAA;QACH,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;IAED;;;;;;;;;OASG;IACH,cAAc;QACb,MAAM,GAAG,GAAiB,IAAI,GAAG,EAAE,CAAA;QACnC,IAAI,CAAC,GAAG,CAAC,CAAA;QACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACxB,gEAAgE;YAChE,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAA;YACjD,IAAI,QAAQ,GAAG,EAAE,CAAA;YACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,CAAE,CAAA;gBACjC,IAAI,CAAC,KAAK,CAAC;oBAAE,MAAK;gBAClB,QAAQ,IAAI,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YACnC,CAAC;YACD,gFAAgF;YAChF,MAAM,SAAS,GAA2B,EAAE,CAAA;YAC5C,IAAI,MAAM,GAAG,CAAC,CAAA;YACd,IAAI,MAAM,GAAG,CAAC,CAAA;YACd,IAAI,aAAa,GAAG,CAAC,CAAA;YACrB,IAAI,CAAC,GAAG,CAAC,CAAA;YACT,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,GAAG,CAAC,GAAG,IAAI,CAAC,QAAQ,CAAA;gBAC9C,IAAI,IAAI,GAAG,IAAI,CAAA;gBACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACzC,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,CAAC,EAAE,CAAC;wBACpD,IAAI,GAAG,KAAK,CAAA;wBACZ,MAAK;oBACN,CAAC;gBACF,CAAC;gBACD,IAAI,CAAC,IAAI;oBAAE,MAAK;gBAChB,MAAM,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,SAAS,CAAA;gBAClC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAE,CAAE,CAAC,GAAG,CAAC,CAAA,CAAC,6CAA6C;gBAC/F,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK,CAAA;gBACvD,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,GAAG,CAAC,EAAE,IAAI,CAAC,GAAG,KAAK,CAAA;gBACvD,IAAI,GAAG,KAAK,CAAC,IAAI,GAAG,KAAK,CAAC,EAAE,CAAC;oBAC5B,MAAM,IAAI,GAAG,CAAA;oBACb,MAAM,IAAI,GAAG,CAAA;oBACb,aAAa,EAAE,CAAA;gBAChB,CAAC;YACF,CAAC;YACD,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE;gBACjB,SAAS;gBACT,GAAG,EAAE,aAAa,CAAC,CAAC,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;gBAC/C,GAAG,EAAE,aAAa,CAAC,CAAC,CAAC,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;aAC/C,CAAC,CAAA;YACF,CAAC,GAAG,CAAC,CAAA;QACN,CAAC;QACD,OAAO,GAAG,CAAA;IACX,CAAC;CACD"}
@@ -0,0 +1,65 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Postcode regex repair pass — v0.7 task #35 ("postcode regex pre-pass").
7
+ *
8
+ * The 2026-05-29 postcode diagnostic showed the neural model fragments alphanumeric postcodes at
9
+ * the SentencePiece layer (GB/CA/NL at 0%, US 80.5%, FR 70.1%). Three failure modes were visible
10
+ * in the data:
11
+ *
12
+ * 1. Total miss — "London SW1A 1AA" → (no postcode label)
13
+ * 2. Truncation — "M5V 2T6" → "2T6"; "B12 8QX" → "B12"
14
+ * 3. Char-drift — "75008" → "5008"; "62701" → "2701" (and smear: "1200-030 Lisboa" → "200-030 Lis")
15
+ *
16
+ * This pass runs AFTER the model's per-token BIO labels are decoded but BEFORE `buildAddressTree`.
17
+ * It detects postcode-shaped substrings with per-country regexes and repairs the label sequence
18
+ * so the postcode span matches the detected shape. The model is untouched — this is a
19
+ * deterministic decoder-side correction, the "lowest risk" lever in the v0.7 plan (vs. #36's soft
20
+ * FST shallow-fusion or #41's char-level encoder).
21
+ *
22
+ * PRECISION GUARDS (so we never regress the countries already passing):
23
+ *
24
+ * - Alphanumeric shapes (GB/CA/NL/DE-prefixed) are high-confidence "this IS a postcode" patterns →
25
+ * eligible to ADD a span where the model emitted none, but only over non-structural labels
26
+ * (never over house_number/street/etc.).
27
+ * - Numeric shapes (\d{5}, ZIP+4, JP, PT, PL) are ambiguous (a bare 5-digit could be a house number)
28
+ * → SNAP-only: they expand/clip an EXISTING postcode span, never create one from scratch.
29
+ * - Smear cleanup is LOCAL: only postcode tokens immediately flanking a snapped span are cleared. We
30
+ * never globally clear unmatched postcode tokens — that would regress shapes we don't
31
+ * pattern-match (AU 4-digit, IN 6-digit, …).
32
+ */
33
+ import type { DecoderToken } from "@mailwoman/core/decoder";
34
+ /** A detected postcode-shaped substring with its char range and confidence class. */
35
+ export interface PostcodeMatch {
36
+ start: number;
37
+ end: number;
38
+ /** "alnum" shapes may ADD; "numeric" shapes may only SNAP an existing span. */
39
+ kind: "alnum" | "numeric";
40
+ /** Pattern priority (lower = more specific, wins overlap resolution). */
41
+ priority: number;
42
+ }
43
+ /**
44
+ * Per-country postcode shape patterns, ordered most-specific → least. Alphanumeric patterns require
45
+ * uppercase letters (postcodes are conventionally uppercase, and the eval data has them uppercase)
46
+ * — this keeps them from matching ordinary lowercase prose.
47
+ */
48
+ export declare const POSTCODE_PATTERNS: Array<{
49
+ label: string;
50
+ kind: "alnum" | "numeric";
51
+ re: RegExp;
52
+ }>;
53
+ /** Collect non-overlapping postcode matches, preferring more-specific (earlier) patterns. */
54
+ export declare function collectMatches(text: string): PostcodeMatch[];
55
+ export interface RepairResult {
56
+ tokens: DecoderToken[];
57
+ /** Number of token labels changed — for telemetry / logging. */
58
+ changed: number;
59
+ }
60
+ /**
61
+ * Repair postcode label spans in a decoded token sequence using per-country regexes. Returns a NEW
62
+ * token array (inputs are not mutated) plus a change count.
63
+ */
64
+ export declare function repairPostcodeLabels(text: string, input: readonly DecoderToken[]): RepairResult;
65
+ //# sourceMappingURL=postcode-repair.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postcode-repair.d.ts","sourceRoot":"","sources":["../postcode-repair.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;AAE3D,qFAAqF;AACrF,MAAM,WAAW,aAAa;IAC7B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,+EAA+E;IAC/E,IAAI,EAAE,OAAO,GAAG,SAAS,CAAA;IACzB,yEAAyE;IACzE,QAAQ,EAAE,MAAM,CAAA;CAChB;AAED;;;;GAIG;AACH,eAAO,MAAM,iBAAiB,EAAE,KAAK,CAAC;IAAE,KAAK,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,OAAO,GAAG,SAAS,CAAC;IAAC,EAAE,EAAE,MAAM,CAAA;CAAE,CAiB7F,CAAA;AA0BD,6FAA6F;AAC7F,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,aAAa,EAAE,CAkB5D;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,YAAY,EAAE,CAAA;IACtB,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAA;CACf;AAED;;;GAGG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,YAAY,EAAE,GAAG,YAAY,CAmE/F"}