@mailwoman/core 3.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/decoder/build-tree.d.ts +21 -4
- package/out/decoder/build-tree.d.ts.map +1 -1
- package/out/decoder/build-tree.js +38 -9
- package/out/decoder/build-tree.js.map +1 -1
- package/out/decoder/calibration.d.ts +50 -0
- package/out/decoder/calibration.d.ts.map +1 -0
- package/out/decoder/calibration.js +74 -0
- package/out/decoder/calibration.js.map +1 -0
- package/out/decoder/containment.d.ts +31 -2
- package/out/decoder/containment.d.ts.map +1 -1
- package/out/decoder/containment.js +36 -3
- package/out/decoder/containment.js.map +1 -1
- package/out/decoder/index.d.ts +2 -0
- package/out/decoder/index.d.ts.map +1 -1
- package/out/decoder/index.js +2 -0
- package/out/decoder/index.js.map +1 -1
- package/out/decoder/serialize-json.d.ts +4 -0
- package/out/decoder/serialize-json.d.ts.map +1 -1
- package/out/decoder/serialize-json.js +10 -0
- package/out/decoder/serialize-json.js.map +1 -1
- package/out/decoder/serialize-xml.d.ts.map +1 -1
- package/out/decoder/serialize-xml.js +6 -0
- package/out/decoder/serialize-xml.js.map +1 -1
- package/out/decoder/types.d.ts +46 -0
- package/out/decoder/types.d.ts.map +1 -1
- package/out/decoder/validate-tree.d.ts +39 -0
- package/out/decoder/validate-tree.d.ts.map +1 -0
- package/out/decoder/validate-tree.js +79 -0
- package/out/decoder/validate-tree.js.map +1 -0
- package/out/pipeline/runtime-pipeline.d.ts +17 -1
- package/out/pipeline/runtime-pipeline.d.ts.map +1 -1
- package/out/pipeline/runtime-pipeline.js +76 -16
- package/out/pipeline/runtime-pipeline.js.map +1 -1
- package/out/pipeline/span-logit-aggregation.d.ts +4 -2
- package/out/pipeline/span-logit-aggregation.d.ts.map +1 -1
- package/out/pipeline/span-logit-aggregation.js +11 -2
- package/out/pipeline/span-logit-aggregation.js.map +1 -1
- package/out/pipeline/types.d.ts +12 -3
- package/out/pipeline/types.d.ts.map +1 -1
- package/out/resolver/index.d.ts +1 -1
- package/out/resolver/index.d.ts.map +1 -1
- package/out/resolver/resolve.d.ts.map +1 -1
- package/out/resolver/resolve.js +162 -10
- package/out/resolver/resolve.js.map +1 -1
- package/out/resolver/types.d.ts +125 -0
- package/out/resolver/types.d.ts.map +1 -1
- package/out/resolver/types.js.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.d.ts.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.js +3 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.js.map +1 -1
- package/out/resources/whosonfirst/placetypes/graph.d.ts +47 -0
- package/out/resources/whosonfirst/placetypes/graph.d.ts.map +1 -0
- package/out/resources/whosonfirst/placetypes/graph.js +0 -0
- package/out/resources/whosonfirst/placetypes/graph.js.map +1 -0
- package/out/resources/whosonfirst/placetypes/index.d.ts +2 -0
- package/out/resources/whosonfirst/placetypes/index.d.ts.map +1 -1
- package/out/resources/whosonfirst/placetypes/index.js +2 -0
- package/out/resources/whosonfirst/placetypes/index.js.map +1 -1
- package/out/resources/whosonfirst/placetypes/mermaid.d.ts +35 -3
- package/out/resources/whosonfirst/placetypes/mermaid.d.ts.map +1 -1
- package/out/resources/whosonfirst/placetypes/mermaid.js +87 -15
- package/out/resources/whosonfirst/placetypes/mermaid.js.map +1 -1
- package/out/resources/whosonfirst/placetypes/tree.d.ts +30 -0
- package/out/resources/whosonfirst/placetypes/tree.d.ts.map +1 -0
- package/out/resources/whosonfirst/placetypes/tree.js +28 -0
- package/out/resources/whosonfirst/placetypes/tree.js.map +1 -0
- package/out/tokenization/Graph.d.ts +1 -1
- package/out/tokenization/Graph.d.ts.map +1 -1
- package/out/tokenization/Graph.js +5 -1
- package/out/tokenization/Graph.js.map +1 -1
- package/package.json +8 -2
|
@@ -8,9 +8,11 @@
|
|
|
8
8
|
* Two passes:
|
|
9
9
|
*
|
|
10
10
|
* 1. Span emission — walk the token stream, group `B-X` followed by `I-X*` into one span. Lenient on
|
|
11
|
-
* hanging `I-X` (treat as new span).
|
|
12
|
-
*
|
|
13
|
-
*
|
|
11
|
+
* hanging `I-X` (treat as new span). A `B-X` that is whitespace-adjacent to an already-open
|
|
12
|
+
* `X` span is also folded in (spurious-boundary repair for multi-word values the model
|
|
13
|
+
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
+
* keeps them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
+
* from `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
14
16
|
* 2. Parent attachment — for each span, find the nearest labeled span whose tag is the
|
|
15
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
16
18
|
* Spans with no found parent become roots.
|
|
@@ -20,7 +22,8 @@
|
|
|
20
22
|
* Source order is still preserved in the `start`/`end` fields, which the XML serializer exposes
|
|
21
23
|
* as attributes.
|
|
22
24
|
*/
|
|
23
|
-
import type {
|
|
25
|
+
import type { Calibrator } from "./calibration.js";
|
|
26
|
+
import type { AddressSystem, AddressTree, DecoderToken } from "./types.js";
|
|
24
27
|
/**
|
|
25
28
|
* Optional caller-supplied attribution stamped on every emitted node. The BIO stream comes from a
|
|
26
29
|
* single model, so there's no per-span variation — one source for the whole tree.
|
|
@@ -30,6 +33,20 @@ import type { AddressTree, DecoderToken } from "./types.js";
|
|
|
30
33
|
export interface BuildTreeOpts {
|
|
31
34
|
source?: string;
|
|
32
35
|
sourceId?: string;
|
|
36
|
+
/**
|
|
37
|
+
* Addressing system to decode under — selects the containment hierarchy via `containmentFor`.
|
|
38
|
+
* Stamped onto the returned `AddressTree.system`. Omit for the default Western hierarchy. Today
|
|
39
|
+
* all systems share one map, so this only records intent + threads the discriminator; it becomes
|
|
40
|
+
* behavioral when a system-specific map lands (Phase 6 JP). See `containment.ts`.
|
|
41
|
+
*/
|
|
42
|
+
system?: AddressSystem;
|
|
43
|
+
/**
|
|
44
|
+
* Optional confidence calibrator (task #59). When provided, each span's mean-of-token-softmax
|
|
45
|
+
* confidence is mapped through it before being stamped on the node, so `conf=` reports a
|
|
46
|
+
* calibrated probability of correctness rather than the raw softmax. OPT-IN — omit for the
|
|
47
|
+
* byte-stable default. Build one via `createCalibrator` (`./calibration.ts`).
|
|
48
|
+
*/
|
|
49
|
+
calibrate?: Calibrator;
|
|
33
50
|
}
|
|
34
51
|
/**
|
|
35
52
|
* Build an `AddressTree` from a raw input string and the token stream produced by the model.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build-tree.d.ts","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"build-tree.d.ts","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAElD,OAAO,KAAK,EAAe,aAAa,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAEvF;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;;;;OAKG;IACH,MAAM,CAAC,EAAE,aAAa,CAAA;IACtB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAA;CACtB;AAsHD;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,EAAE,IAAI,GAAE,aAAkB,GAAG,WAAW,CAe3G"}
|
|
@@ -8,9 +8,11 @@
|
|
|
8
8
|
* Two passes:
|
|
9
9
|
*
|
|
10
10
|
* 1. Span emission — walk the token stream, group `B-X` followed by `I-X*` into one span. Lenient on
|
|
11
|
-
* hanging `I-X` (treat as new span).
|
|
12
|
-
*
|
|
13
|
-
*
|
|
11
|
+
* hanging `I-X` (treat as new span). A `B-X` that is whitespace-adjacent to an already-open
|
|
12
|
+
* `X` span is also folded in (spurious-boundary repair for multi-word values the model
|
|
13
|
+
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
+
* keeps them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
+
* from `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
14
16
|
* 2. Parent attachment — for each span, find the nearest labeled span whose tag is the
|
|
15
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
16
18
|
* Spans with no found parent become roots.
|
|
@@ -20,7 +22,7 @@
|
|
|
20
22
|
* Source order is still preserved in the `start`/`end` fields, which the XML serializer exposes
|
|
21
23
|
* as attributes.
|
|
22
24
|
*/
|
|
23
|
-
import {
|
|
25
|
+
import { containmentFor } from "./containment.js";
|
|
24
26
|
function bioParts(label) {
|
|
25
27
|
if (label === "O")
|
|
26
28
|
return { prefix: "O", tag: null };
|
|
@@ -51,7 +53,8 @@ function flush(open, raw, out, attribution) {
|
|
|
51
53
|
if (start >= end)
|
|
52
54
|
return null;
|
|
53
55
|
const value = raw.slice(start, end);
|
|
54
|
-
const
|
|
56
|
+
const rawConfidence = open.confidences.reduce((a, b) => a + b, 0) / open.confidences.length;
|
|
57
|
+
const confidence = attribution.calibrate ? attribution.calibrate(rawConfidence) : rawConfidence;
|
|
55
58
|
const node = { tag: open.tag, start, end, value, confidence, children: [] };
|
|
56
59
|
if (attribution.source !== undefined)
|
|
57
60
|
node.source = attribution.source;
|
|
@@ -66,10 +69,32 @@ function emitSpans(raw, tokens, attribution) {
|
|
|
66
69
|
for (const tok of tokens) {
|
|
67
70
|
const { prefix, tag } = bioParts(tok.label);
|
|
68
71
|
if (prefix === "O") {
|
|
72
|
+
// A zero-width or whitespace-only `O` piece is a tokenizer artifact — SentencePiece emits a
|
|
73
|
+
// standalone `▁` word-boundary marker between words and the model labels it `O` (e.g.
|
|
74
|
+
// "Saint Paul" → "▁Saint"[B-loc], "▁"[O, zero-width], "Paul"[B-loc]). It is NOT a real
|
|
75
|
+
// component boundary, so it must not flush the open span; keeping the span alive lets the
|
|
76
|
+
// following same-tag `B-` token merge in (see the spurious-boundary repair below). A
|
|
77
|
+
// non-whitespace `O` (comma, slash, …) is a genuine separator and still flushes.
|
|
78
|
+
if (open !== null && /^\s*$/.test(raw.slice(tok.start, tok.end)))
|
|
79
|
+
continue;
|
|
69
80
|
open = flush(open, raw, out, attribution);
|
|
70
81
|
continue;
|
|
71
82
|
}
|
|
72
83
|
if (prefix === "B" || open === null || open.tag !== tag) {
|
|
84
|
+
// Spurious-boundary repair: a `B-X` token that is whitespace-adjacent to an already-open
|
|
85
|
+
// `X` span is the model fragmenting a multi-word value — e.g. "Saint Paul" emitted as
|
|
86
|
+
// B-locality B-locality instead of B-locality I-locality (a real, decode-agnostic
|
|
87
|
+
// emission bug; see scripts/diag-saintalbans.ts). Fold it into the open span.
|
|
88
|
+
//
|
|
89
|
+
// Guard: only merge when the text in `raw` between the two spans is whitespace-only. A
|
|
90
|
+
// comma or any other separator keeps them distinct, and an intervening O/different-tag
|
|
91
|
+
// token already nulls/replaces `open` above — so two genuinely separate same-tag spans
|
|
92
|
+
// (e.g. "Springfield, Chicago") are never merged.
|
|
93
|
+
if (prefix === "B" && open !== null && open.tag === tag && /^\s*$/.test(raw.slice(open.end, tok.start))) {
|
|
94
|
+
open.end = tok.end;
|
|
95
|
+
open.confidences.push(tok.confidence);
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
73
98
|
open = flush(open, raw, out, attribution);
|
|
74
99
|
open = { tag: tag, start: tok.start, end: tok.end, confidences: [tok.confidence] };
|
|
75
100
|
continue;
|
|
@@ -88,8 +113,8 @@ function distance(a, b) {
|
|
|
88
113
|
return a.start - b.end;
|
|
89
114
|
return 0;
|
|
90
115
|
}
|
|
91
|
-
function findParent(span, all) {
|
|
92
|
-
const candidates =
|
|
116
|
+
function findParent(span, all, parentOf) {
|
|
117
|
+
const candidates = parentOf[span.tag] ?? [];
|
|
93
118
|
for (const parentTag of candidates) {
|
|
94
119
|
const matches = all.filter((s) => s !== span && s.tag === parentTag);
|
|
95
120
|
if (matches.length === 0)
|
|
@@ -115,14 +140,18 @@ function sortByStart(nodes) {
|
|
|
115
140
|
export function buildAddressTree(raw, tokens, opts = {}) {
|
|
116
141
|
const spans = emitSpans(raw, tokens, opts);
|
|
117
142
|
const roots = [];
|
|
143
|
+
const parentOf = containmentFor(opts.system);
|
|
118
144
|
for (const span of spans) {
|
|
119
|
-
const parent = findParent(span, spans);
|
|
145
|
+
const parent = findParent(span, spans, parentOf);
|
|
120
146
|
if (parent)
|
|
121
147
|
parent.children.push(span);
|
|
122
148
|
else
|
|
123
149
|
roots.push(span);
|
|
124
150
|
}
|
|
125
151
|
sortByStart(roots);
|
|
126
|
-
|
|
152
|
+
const tree = { raw, roots };
|
|
153
|
+
if (opts.system !== undefined)
|
|
154
|
+
tree.system = opts.system;
|
|
155
|
+
return tree;
|
|
127
156
|
}
|
|
128
157
|
//# sourceMappingURL=build-tree.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAmCjD,SAAS,QAAQ,CAAC,KAAe;IAChC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,CAAA;IACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAc,EAAE,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAiB,EAAE,CAAA;AACjG,CAAC;AAED,mGAAmG;AACnG,iGAAiG;AACjG,+FAA+F;AAC/F,8FAA8F;AAC9F,+FAA+F;AAC/F,oDAAoD;AACpD,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,GAAW;IAC5D,IAAI,CAAC,GAAG,KAAK,CAAA;IACb,IAAI,CAAC,GAAG,GAAG,CAAA;IACX,MAAM,UAAU,GAAG,CAAC,CAAS,EAAW,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;IAC7E,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACnC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACvC,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAA;AAC5B,CAAC;AAED,SAAS,KAAK,CAAC,IAAqB,EAAE,GAAW,EAAE,GAAkB,EAAE,WAA0B;IAChG,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,YAAY,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9D,6FAA6F;IAC7F,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,IAAI,CAAA;IAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAA;IAC3F,MAAM,UAAU,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAA;IAC/F,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;IACxF,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAA;IACtE,IAAI,WAAW,CAAC,QAAQ,KAAK,SAAS;QAAE,IAAI,CAAC,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAA;IAC5E,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,MAAsB,EAAE,WAA0B;IACjF,MAAM,GAAG,GAAkB,EAAE,CAAA;IAC7B,IAAI,IAAI,GAAoB,IAAI,CAAA;IAEhC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QAE3C,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACpB,4FAA4F;YAC5F,sFAAsF;YACtF,uFAAuF;YACvF,0FAA0F;YAC1F,qFAAqF;YACrF,iFAAiF;YACjF,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAQ;YAC1E,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,SAAQ;QACT,CAAC;QAED,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,EAAE,CAAC;YACzD,yFAAyF;YACzF,sFAAsF;YACtF,kFAAkF;YAClF,8EAA8E;YAC9E,EAAE;YACF,uFAAuF;YACvF,uFAAuF;YACvF,uFAAuF;YACvF,kDAAkD;YAClD,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzG,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;gBAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBACrC,SAAQ;YACT,CAAC;YACD,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAI,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,WAAW,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAA;YACnF,SAAQ;QACT,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;QAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IACtC,CAAC;IAED,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;IAClC,OAAO,GAAG,CAAA;AACX,CAAC;AAED,SAAS,QAAQ,CAAC,CAAc,EAAE,CAAc;IAC/C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,OAAO,CAAC,CAAA;AACT,CAAC;AAED,SAAS,UAAU,CAClB,IAAiB,EACjB,GAAkB,EAClB,QAAuD;IAEvD,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;IAC3C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,GAAG,KAAK,SAAS,CAAC,CAAA;QACpE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAClC,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAChG,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,WAAW,CAAC,KAAoB;IACxC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK;QAAE,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;AAC/C,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,MAAsB,EAAE,OAAsB,EAAE;IAC7F,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,KAAK,GAAkB,EAAE,CAAA;IAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;QAChD,IAAI,MAAM;YAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;;YACjC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtB,CAAC;IAED,WAAW,CAAC,KAAK,CAAC,CAAA;IAClB,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,KAAK,EAAE,CAAA;IACxC,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IACxD,OAAO,IAAI,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Opt-in confidence calibration for decoded spans.
|
|
7
|
+
*
|
|
8
|
+
* The decoder emits a per-span `confidence` that is the mean of the span's per-token softmax
|
|
9
|
+
* probabilities (`build-tree.ts`). Softmax probabilities are NOT calibrated — a CE-trained model
|
|
10
|
+
* is systematically over/under-confident in bands. Task #59 fits an isotonic-regression
|
|
11
|
+
* calibrator on a held-out OpenAddresses + corpus set
|
|
12
|
+
* (`scripts/eval/fit-isotonic-calibration.py`) and ships the result as a 20-bin lookup table
|
|
13
|
+
* (`data/eval/calibration/isotonic-<locale>-<version>.json`).
|
|
14
|
+
*
|
|
15
|
+
* This module turns that table into a pure `(rawConfidence) => calibratedConfidence` function. It
|
|
16
|
+
* is deliberately decoupled from the table source: pass the PARSED JSON object so this stays
|
|
17
|
+
* browser-safe (no `node:fs`) — the demo imports the JSON directly, Node scripts `JSON.parse`
|
|
18
|
+
* it.
|
|
19
|
+
*
|
|
20
|
+
* Wiring is OPT-IN. The default decode path is unchanged (byte-stable `conf=` output). A caller
|
|
21
|
+
* that wants calibrated confidences builds a `Calibrator` here and passes it via
|
|
22
|
+
* `ParseOpts.calibrate` (neural) / `BuildTreeOpts.calibrate` (decoder), which `build-tree.ts`
|
|
23
|
+
* applies in `flush()`.
|
|
24
|
+
*/
|
|
25
|
+
/** One row of the lookup table: a confidence bin and the calibrated value at its center. */
|
|
26
|
+
export interface CalibrationBin {
|
|
27
|
+
lo: number;
|
|
28
|
+
hi: number;
|
|
29
|
+
center: number;
|
|
30
|
+
calibrated: number;
|
|
31
|
+
}
|
|
32
|
+
/** The full calibration artifact emitted by `fit-isotonic-calibration.py`. */
|
|
33
|
+
export interface CalibrationTable {
|
|
34
|
+
model: string;
|
|
35
|
+
model_version: string;
|
|
36
|
+
method: string;
|
|
37
|
+
bins: number;
|
|
38
|
+
table: CalibrationBin[];
|
|
39
|
+
[key: string]: unknown;
|
|
40
|
+
}
|
|
41
|
+
/** Maps a raw span confidence in [0, 1] to its calibrated probability of correctness. */
|
|
42
|
+
export type Calibrator = (rawConfidence: number) => number;
|
|
43
|
+
/**
|
|
44
|
+
* Build a calibrator from an isotonic lookup table. The mapping is piecewise-linear between bin
|
|
45
|
+
* centers and clamped to the table's range outside it (the table is monotone non-decreasing by
|
|
46
|
+
* construction, so the interpolation is monotone too). Accepts either the full `CalibrationTable`
|
|
47
|
+
* or a bare `CalibrationBin[]`.
|
|
48
|
+
*/
|
|
49
|
+
export declare function createCalibrator(table: CalibrationTable | CalibrationBin[]): Calibrator;
|
|
50
|
+
//# sourceMappingURL=calibration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.d.ts","sourceRoot":"","sources":["../../decoder/calibration.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,4FAA4F;AAC5F,MAAM,WAAW,cAAc;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,EAAE,EAAE,MAAM,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,8EAA8E;AAC9E,MAAM,WAAW,gBAAgB;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,cAAc,EAAE,CAAA;IACvB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,yFAAyF;AACzF,MAAM,MAAM,UAAU,GAAG,CAAC,aAAa,EAAE,MAAM,KAAK,MAAM,CAAA;AAE1D;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,EAAE,GAAG,UAAU,CA8BvF"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Opt-in confidence calibration for decoded spans.
|
|
7
|
+
*
|
|
8
|
+
* The decoder emits a per-span `confidence` that is the mean of the span's per-token softmax
|
|
9
|
+
* probabilities (`build-tree.ts`). Softmax probabilities are NOT calibrated — a CE-trained model
|
|
10
|
+
* is systematically over/under-confident in bands. Task #59 fits an isotonic-regression
|
|
11
|
+
* calibrator on a held-out OpenAddresses + corpus set
|
|
12
|
+
* (`scripts/eval/fit-isotonic-calibration.py`) and ships the result as a 20-bin lookup table
|
|
13
|
+
* (`data/eval/calibration/isotonic-<locale>-<version>.json`).
|
|
14
|
+
*
|
|
15
|
+
* This module turns that table into a pure `(rawConfidence) => calibratedConfidence` function. It
|
|
16
|
+
* is deliberately decoupled from the table source: pass the PARSED JSON object so this stays
|
|
17
|
+
* browser-safe (no `node:fs`) — the demo imports the JSON directly, Node scripts `JSON.parse`
|
|
18
|
+
* it.
|
|
19
|
+
*
|
|
20
|
+
* Wiring is OPT-IN. The default decode path is unchanged (byte-stable `conf=` output). A caller
|
|
21
|
+
* that wants calibrated confidences builds a `Calibrator` here and passes it via
|
|
22
|
+
* `ParseOpts.calibrate` (neural) / `BuildTreeOpts.calibrate` (decoder), which `build-tree.ts`
|
|
23
|
+
* applies in `flush()`.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Build a calibrator from an isotonic lookup table. The mapping is piecewise-linear between bin
|
|
27
|
+
* centers and clamped to the table's range outside it (the table is monotone non-decreasing by
|
|
28
|
+
* construction, so the interpolation is monotone too). Accepts either the full `CalibrationTable`
|
|
29
|
+
* or a bare `CalibrationBin[]`.
|
|
30
|
+
*/
|
|
31
|
+
export function createCalibrator(table) {
|
|
32
|
+
const bins = Array.isArray(table) ? table : table.table;
|
|
33
|
+
if (!bins || bins.length === 0) {
|
|
34
|
+
throw new Error("createCalibrator: empty calibration table");
|
|
35
|
+
}
|
|
36
|
+
// Sort by center and extract parallel arrays for interpolation.
|
|
37
|
+
const sorted = [...bins].sort((a, b) => a.center - b.center);
|
|
38
|
+
const centers = sorted.map((b) => b.center);
|
|
39
|
+
const cals = sorted.map((b) => clamp01(b.calibrated));
|
|
40
|
+
const n = centers.length;
|
|
41
|
+
return (raw) => {
|
|
42
|
+
const x = clamp01(raw);
|
|
43
|
+
if (x <= centers[0])
|
|
44
|
+
return cals[0];
|
|
45
|
+
if (x >= centers[n - 1])
|
|
46
|
+
return cals[n - 1];
|
|
47
|
+
// Binary search for the interval [centers[i], centers[i+1]] containing x.
|
|
48
|
+
let lo = 0;
|
|
49
|
+
let hi = n - 1;
|
|
50
|
+
while (hi - lo > 1) {
|
|
51
|
+
const mid = (lo + hi) >> 1;
|
|
52
|
+
if (centers[mid] <= x)
|
|
53
|
+
lo = mid;
|
|
54
|
+
else
|
|
55
|
+
hi = mid;
|
|
56
|
+
}
|
|
57
|
+
const x0 = centers[lo];
|
|
58
|
+
const x1 = centers[hi];
|
|
59
|
+
const y0 = cals[lo];
|
|
60
|
+
const y1 = cals[hi];
|
|
61
|
+
const t = x1 === x0 ? 0 : (x - x0) / (x1 - x0);
|
|
62
|
+
return y0 + t * (y1 - y0);
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function clamp01(v) {
|
|
66
|
+
if (Number.isNaN(v))
|
|
67
|
+
return 0;
|
|
68
|
+
if (v < 0)
|
|
69
|
+
return 0;
|
|
70
|
+
if (v > 1)
|
|
71
|
+
return 1;
|
|
72
|
+
return v;
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=calibration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.js","sourceRoot":"","sources":["../../decoder/calibration.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAuBH;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAA0C;IAC1E,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAA;IACvD,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAA;IAC7D,CAAC;IACD,gEAAgE;IAChE,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAA;IAC5D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAA;IACrD,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAA;IAExB,OAAO,CAAC,GAAW,EAAU,EAAE;QAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QACtB,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,CAAE;YAAE,OAAO,IAAI,CAAC,CAAC,CAAE,CAAA;QACrC,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE;YAAE,OAAO,IAAI,CAAC,CAAC,GAAG,CAAC,CAAE,CAAA;QAC7C,0EAA0E;QAC1E,IAAI,EAAE,GAAG,CAAC,CAAA;QACV,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,CAAA;QACd,OAAO,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,CAAA;YAC1B,IAAI,OAAO,CAAC,GAAG,CAAE,IAAI,CAAC;gBAAE,EAAE,GAAG,GAAG,CAAA;;gBAC3B,EAAE,GAAG,GAAG,CAAA;QACd,CAAC;QACD,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAE,CAAA;QACvB,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAE,CAAA;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAE,CAAA;QACpB,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAE,CAAA;QACpB,MAAM,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;QAC9C,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;IAC1B,CAAC,CAAA;AACF,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACzB,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAA;IACnB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAA;IACnB,OAAO,CAAC,CAAA;AACT,CAAC"}
|
|
@@ -11,9 +11,38 @@
|
|
|
11
11
|
* distance wins. Spans whose tag is absent from this map (or has no labeled parent) become
|
|
12
12
|
* roots.
|
|
13
13
|
*
|
|
14
|
-
* Tags absent from
|
|
14
|
+
* Tags absent from a map are treated as root-only (no parent ever accepted).
|
|
15
|
+
*
|
|
16
|
+
* ## Per-system containment (anti-lock-in)
|
|
17
|
+
*
|
|
18
|
+
* Addressing _systems_ disagree on hierarchy: a US street address nests `house_number → street →
|
|
19
|
+
* locality`, while a Japanese block address nests `building_number → sub_block → block →
|
|
20
|
+
* district` — there is no `street` parent at all. Today a single global map suffices only because
|
|
21
|
+
* the tag sets don't collide, but the moment the resolver or tree builder hardcodes the Western
|
|
22
|
+
* shape, retrofitting a second system gets expensive (DeepSeek resolver consult, 2026-05-30).
|
|
23
|
+
*
|
|
24
|
+
* The cheap insurance is this indirection: callers ask `containmentFor(system)` rather than
|
|
25
|
+
* importing one global constant. Today every system resolves to `WESTERN_PARENT_OF` (the
|
|
26
|
+
* historical map, behavior-identical), and `PARENT_OF` is kept as an alias so existing imports
|
|
27
|
+
* don't break. When a genuinely distinct system map lands (Phase 6 JP), it slots in here with
|
|
28
|
+
* zero call-site churn. See `AddressSystem` in `./types.ts`.
|
|
15
29
|
*/
|
|
16
30
|
import type { ComponentTag } from "../types/component.js";
|
|
31
|
+
import type { AddressSystem } from "./types.js";
|
|
17
32
|
/** Preferred-parent ordering for each tag. Empty / missing = always root. */
|
|
18
|
-
export declare const
|
|
33
|
+
export declare const WESTERN_PARENT_OF: Partial<Record<ComponentTag, ComponentTag[]>>;
|
|
34
|
+
/**
|
|
35
|
+
* The containment map for a given addressing system.
|
|
36
|
+
*
|
|
37
|
+
* Currently every system maps to {@link WESTERN_PARENT_OF} — the indirection exists so a future
|
|
38
|
+
* system-specific map (e.g. Japanese block addressing) can be introduced without touching the tree
|
|
39
|
+
* builder or validator. `undefined` (the common case — system not yet detected) uses the default.
|
|
40
|
+
*/
|
|
41
|
+
export declare function containmentFor(_system?: AddressSystem): Partial<Record<ComponentTag, ComponentTag[]>>;
|
|
42
|
+
/**
|
|
43
|
+
* Backwards-compatible alias for the default (Western) containment map. Prefer `containmentFor()`
|
|
44
|
+
* in new code so the system parameter threads through; this export remains for existing call
|
|
45
|
+
* sites.
|
|
46
|
+
*/
|
|
47
|
+
export declare const PARENT_OF: Partial<Record<"country" | "house_number" | "locality" | "postcode" | "region" | "street_prefix" | "street_suffix" | "street" | "unit" | "venue" | "attention" | "municipality" | "district" | "dependent_locality" | "subregion" | "street_prefix_particle" | "intersection_a" | "intersection_b" | "po_box" | "cedex" | "prefecture" | "block" | "sub_block" | "building_number" | "building_name", ("country" | "house_number" | "locality" | "postcode" | "region" | "street_prefix" | "street_suffix" | "street" | "unit" | "venue" | "attention" | "municipality" | "district" | "dependent_locality" | "subregion" | "street_prefix_particle" | "intersection_a" | "intersection_b" | "po_box" | "cedex" | "prefecture" | "block" | "sub_block" | "building_number" | "building_name")[]>>;
|
|
19
48
|
//# sourceMappingURL=containment.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"containment.d.ts","sourceRoot":"","sources":["../../decoder/containment.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"containment.d.ts","sourceRoot":"","sources":["../../decoder/containment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AACzD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAA;AAE/C,6EAA6E;AAC7E,eAAO,MAAM,iBAAiB,EAAE,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,EAAE,CAAC,CAiC3E,CAAA;AAED;;;;;;GAMG;AACH,wBAAgB,cAAc,CAAC,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,YAAY,EAAE,CAAC,CAAC,CAIrG;AAED;;;;GAIG;AACH,eAAO,MAAM,SAAS,mwBAAoB,CAAA"}
|
|
@@ -11,10 +11,24 @@
|
|
|
11
11
|
* distance wins. Spans whose tag is absent from this map (or has no labeled parent) become
|
|
12
12
|
* roots.
|
|
13
13
|
*
|
|
14
|
-
* Tags absent from
|
|
14
|
+
* Tags absent from a map are treated as root-only (no parent ever accepted).
|
|
15
|
+
*
|
|
16
|
+
* ## Per-system containment (anti-lock-in)
|
|
17
|
+
*
|
|
18
|
+
* Addressing _systems_ disagree on hierarchy: a US street address nests `house_number → street →
|
|
19
|
+
* locality`, while a Japanese block address nests `building_number → sub_block → block →
|
|
20
|
+
* district` — there is no `street` parent at all. Today a single global map suffices only because
|
|
21
|
+
* the tag sets don't collide, but the moment the resolver or tree builder hardcodes the Western
|
|
22
|
+
* shape, retrofitting a second system gets expensive (DeepSeek resolver consult, 2026-05-30).
|
|
23
|
+
*
|
|
24
|
+
* The cheap insurance is this indirection: callers ask `containmentFor(system)` rather than
|
|
25
|
+
* importing one global constant. Today every system resolves to `WESTERN_PARENT_OF` (the
|
|
26
|
+
* historical map, behavior-identical), and `PARENT_OF` is kept as an alias so existing imports
|
|
27
|
+
* don't break. When a genuinely distinct system map lands (Phase 6 JP), it slots in here with
|
|
28
|
+
* zero call-site churn. See `AddressSystem` in `./types.ts`.
|
|
15
29
|
*/
|
|
16
30
|
/** Preferred-parent ordering for each tag. Empty / missing = always root. */
|
|
17
|
-
export const
|
|
31
|
+
export const WESTERN_PARENT_OF = {
|
|
18
32
|
// Universal coarse — containment follows geographic granularity.
|
|
19
33
|
region: ["country"],
|
|
20
34
|
subregion: ["region", "country"],
|
|
@@ -35,7 +49,8 @@ export const PARENT_OF = {
|
|
|
35
49
|
venue: ["street", "locality"],
|
|
36
50
|
attention: ["venue"],
|
|
37
51
|
po_box: ["locality", "subregion", "region"],
|
|
38
|
-
// JP — declared for forward-compat; mapping is provisional and will be revisited in Phase 6
|
|
52
|
+
// JP — declared for forward-compat; mapping is provisional and will be revisited in Phase 6, when
|
|
53
|
+
// a dedicated `japanese` system map likely supersedes these entries with a no-street hierarchy.
|
|
39
54
|
prefecture: ["country"],
|
|
40
55
|
municipality: ["prefecture"],
|
|
41
56
|
district: ["municipality"],
|
|
@@ -44,4 +59,22 @@ export const PARENT_OF = {
|
|
|
44
59
|
building_number: ["sub_block", "block"],
|
|
45
60
|
building_name: ["building_number", "sub_block", "block"],
|
|
46
61
|
};
|
|
62
|
+
/**
|
|
63
|
+
* The containment map for a given addressing system.
|
|
64
|
+
*
|
|
65
|
+
* Currently every system maps to {@link WESTERN_PARENT_OF} — the indirection exists so a future
|
|
66
|
+
* system-specific map (e.g. Japanese block addressing) can be introduced without touching the tree
|
|
67
|
+
* builder or validator. `undefined` (the common case — system not yet detected) uses the default.
|
|
68
|
+
*/
|
|
69
|
+
export function containmentFor(_system) {
|
|
70
|
+
// Single system today. The parameter is intentionally consumed lazily — adding `case "japanese":`
|
|
71
|
+
// here is the entire surface area for a new system's hierarchy.
|
|
72
|
+
return WESTERN_PARENT_OF;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Backwards-compatible alias for the default (Western) containment map. Prefer `containmentFor()`
|
|
76
|
+
* in new code so the system parameter threads through; this export remains for existing call
|
|
77
|
+
* sites.
|
|
78
|
+
*/
|
|
79
|
+
export const PARENT_OF = WESTERN_PARENT_OF;
|
|
47
80
|
//# sourceMappingURL=containment.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"containment.js","sourceRoot":"","sources":["../../decoder/containment.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"containment.js","sourceRoot":"","sources":["../../decoder/containment.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAKH,6EAA6E;AAC7E,MAAM,CAAC,MAAM,iBAAiB,GAAkD;IAC/E,iEAAiE;IACjE,MAAM,EAAE,CAAC,SAAS,CAAC;IACnB,SAAS,EAAE,CAAC,QAAQ,EAAE,SAAS,CAAC;IAChC,QAAQ,EAAE,CAAC,WAAW,EAAE,QAAQ,EAAE,SAAS,CAAC;IAC5C,kBAAkB,EAAE,CAAC,UAAU,CAAC;IAChC,QAAQ,EAAE,CAAC,UAAU,EAAE,WAAW,EAAE,QAAQ,EAAE,SAAS,CAAC;IACxD,KAAK,EAAE,CAAC,UAAU,EAAE,UAAU,CAAC;IAE/B,mGAAmG;IACnG,MAAM,EAAE,CAAC,oBAAoB,EAAE,UAAU,EAAE,WAAW,EAAE,QAAQ,CAAC;IACjE,aAAa,EAAE,CAAC,QAAQ,CAAC;IACzB,sBAAsB,EAAE,CAAC,eAAe,EAAE,QAAQ,CAAC;IACnD,aAAa,EAAE,CAAC,QAAQ,CAAC;IACzB,YAAY,EAAE,CAAC,QAAQ,CAAC;IACxB,IAAI,EAAE,CAAC,QAAQ,EAAE,cAAc,CAAC;IAChC,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;IACtC,cAAc,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;IAEtC,8EAA8E;IAC9E,KAAK,EAAE,CAAC,QAAQ,EAAE,UAAU,CAAC;IAC7B,SAAS,EAAE,CAAC,OAAO,CAAC;IACpB,MAAM,EAAE,CAAC,UAAU,EAAE,WAAW,EAAE,QAAQ,CAAC;IAE3C,kGAAkG;IAClG,gGAAgG;IAChG,UAAU,EAAE,CAAC,SAAS,CAAC;IACvB,YAAY,EAAE,CAAC,YAAY,CAAC;IAC5B,QAAQ,EAAE,CAAC,cAAc,CAAC;IAC1B,KAAK,EAAE,CAAC,UAAU,CAAC;IACnB,SAAS,EAAE,CAAC,OAAO,CAAC;IACpB,eAAe,EAAE,CAAC,WAAW,EAAE,OAAO,CAAC;IACvC,aAAa,EAAE,CAAC,iBAAiB,EAAE,WAAW,EAAE,OAAO,CAAC;CACxD,CAAA;AAED;;;;;;GAMG;AACH,MAAM,UAAU,cAAc,CAAC,OAAuB;IACrD,kGAAkG;IAClG,gEAAgE;IAChE,OAAO,iBAAiB,CAAA;AACzB,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,SAAS,GAAG,iBAAiB,CAAA"}
|
package/out/decoder/index.d.ts
CHANGED
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
6
|
export * from "./build-tree.js";
|
|
7
|
+
export * from "./calibration.js";
|
|
7
8
|
export * from "./containment.js";
|
|
8
9
|
export * from "./proposals-to-tree.js";
|
|
9
10
|
export * from "./serialize-json.js";
|
|
10
11
|
export * from "./serialize-tuples.js";
|
|
11
12
|
export * from "./serialize-xml.js";
|
|
12
13
|
export * from "./types.js";
|
|
14
|
+
export * from "./validate-tree.js";
|
|
13
15
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
package/out/decoder/index.js
CHANGED
|
@@ -4,10 +4,12 @@
|
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
6
|
export * from "./build-tree.js";
|
|
7
|
+
export * from "./calibration.js";
|
|
7
8
|
export * from "./containment.js";
|
|
8
9
|
export * from "./proposals-to-tree.js";
|
|
9
10
|
export * from "./serialize-json.js";
|
|
10
11
|
export * from "./serialize-tuples.js";
|
|
11
12
|
export * from "./serialize-xml.js";
|
|
12
13
|
export * from "./types.js";
|
|
14
|
+
export * from "./validate-tree.js";
|
|
13
15
|
//# sourceMappingURL=index.js.map
|
package/out/decoder/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Flattens the tree to `{ tag: value }`. First-occurrence wins for repeated tags — matches
|
|
9
9
|
* libpostal's behavior. Use `decodeAsTuples` if order or repetition matters.
|
|
10
|
+
*
|
|
11
|
+
* A multi-role node (#413 — a city-state span tagged `region` that also plays `locality`) emits one
|
|
12
|
+
* entry per role from its `interpretations`, so `out.locality` still surfaces for a completed
|
|
13
|
+
* city-state. The shared span means every role gets the same `value`.
|
|
10
14
|
*/
|
|
11
15
|
import type { ComponentTag } from "../types/component.js";
|
|
12
16
|
import type { AddressTree } from "./types.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-json.d.ts","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"serialize-json.d.ts","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AACzD,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAY1D,wEAAwE;AACxE,wBAAgB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAIrF"}
|
|
@@ -7,10 +7,20 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Flattens the tree to `{ tag: value }`. First-occurrence wins for repeated tags — matches
|
|
9
9
|
* libpostal's behavior. Use `decodeAsTuples` if order or repetition matters.
|
|
10
|
+
*
|
|
11
|
+
* A multi-role node (#413 — a city-state span tagged `region` that also plays `locality`) emits one
|
|
12
|
+
* entry per role from its `interpretations`, so `out.locality` still surfaces for a completed
|
|
13
|
+
* city-state. The shared span means every role gets the same `value`.
|
|
10
14
|
*/
|
|
11
15
|
function visit(node, out) {
|
|
12
16
|
if (!(node.tag in out))
|
|
13
17
|
out[node.tag] = node.value;
|
|
18
|
+
if (node.interpretations) {
|
|
19
|
+
for (const interp of node.interpretations) {
|
|
20
|
+
if (!(interp.tag in out))
|
|
21
|
+
out[interp.tag] = node.value;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
14
24
|
for (const child of node.children)
|
|
15
25
|
visit(child, out);
|
|
16
26
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-json.js","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"serialize-json.js","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAKH,SAAS,KAAK,CAAC,IAAiB,EAAE,GAA0C;IAC3E,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC;QAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAA;IAClD,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;QAC1B,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAC3C,IAAI,CAAC,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC;gBAAE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAA;QACvD,CAAC;IACF,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;QAAE,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AACrD,CAAC;AAED,wEAAwE;AACxE,MAAM,UAAU,YAAY,CAAC,IAAiB;IAC7C,MAAM,GAAG,GAA0C,EAAE,CAAA;IACrD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;IAC/C,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-xml.d.ts","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,WAAW,gBAAgB;IAChC,mEAAmE;IACnE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,iEAAiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,oEAAoE;IACpE,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,iGAAiG;IACjG,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAA;CAC7B;
|
|
1
|
+
{"version":3,"file":"serialize-xml.d.ts","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,WAAW,gBAAgB;IAChC,mEAAmE;IACnE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,iEAAiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,oEAAoE;IACpE,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,iGAAiG;IACjG,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAA;CAC7B;AA0FD,yFAAyF;AACzF,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,GAAE,gBAAqB,GAAG,MAAM,CAelF"}
|
|
@@ -65,6 +65,12 @@ function attrs(node, opts) {
|
|
|
65
65
|
if (opts.includePlace && node.placeId !== undefined) {
|
|
66
66
|
parts.push(`place="${escapeXml(node.placeId)}"`);
|
|
67
67
|
}
|
|
68
|
+
// Multi-role node (#413): a city-state span tagged `region` that also plays `locality` lists every
|
|
69
|
+
// role it holds, primary first — `roles="region locality"`. Emitted only when extra roles exist.
|
|
70
|
+
if (node.interpretations && node.interpretations.length > 0) {
|
|
71
|
+
const roles = [node.tag, ...node.interpretations.map((i) => i.tag)];
|
|
72
|
+
parts.push(`roles="${escapeXml(roles.join(" "))}"`);
|
|
73
|
+
}
|
|
68
74
|
return parts.length === 0 ? "" : " " + parts.join(" ");
|
|
69
75
|
}
|
|
70
76
|
function serializeAlternatives(node, indent) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-xml.js","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AA2BH,SAAS,SAAS,CAAC,CAAS;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACpG,CAAC;AAED,SAAS,YAAY,CAAC,IAAiB;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC1E,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAA;IACnC,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC,QAAQ,CAAA;IACvC,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,aAAa,GAAG,CAAC,CAAA;AAEvB,SAAS,KAAK,CAAC,IAAiB,EAAE,IAAgC;IACjE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,IAAI,CAAC,cAAc;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,GAAG,CAAC,CAAA;IACjF,IAAI,IAAI,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACxE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,GAAG,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACxD,CAAC;IACD,+FAA+F;IAC/F,wEAAwE;IACxE,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;IACnG,CAAC;IACD,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IACjD,CAAC;IACD,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvD,CAAC;AAWD,SAAS,qBAAqB,CAAC,IAAiB,EAAE,MAAc;IAC/D,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IACnE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC3C,MAAM,GAAG,GAAG,GAAsB,CAAA;QAClC,MAAM,KAAK,GAAG,OAAO,GAAG,CAAC,EAAE,EAAE,CAAA;QAC7B,MAAM,KAAK,GAAG;YACb,UAAU,SAAS,CAAC,KAAK,CAAC,GAAG;YAC7B,SAAS,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG;YAC/B,cAAc,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,UAAU,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SACjC,CAAA;QACD,OAAO,GAAG,MAAM,gBAAgB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACxB,CAAC;AAED,SAAS,aAAa,CAAC,IAAiB,EAAE,MAAc,EAAE,IAAgC;IACzF,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,qBAAqB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAA;IAC5C,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO,EAAE,CAAC;QAC9B,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;IACzD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1F,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/D,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,GAAG,KAAK,GAAG,EAAE,GAAG,MAAM,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;AACpF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,WAAW,CAAC,IAAiB,EAAE,OAAyB,EAAE;IACzE,MAAM,IAAI,GAA+B;QACxC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;QAC3B,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI;QACrC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,IAAI;QAC3C,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY,IAAI,IAAI;QACvC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,KAAK;KACtD,CAAA;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACnC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/E,OAAO,iBAAiB,OAAO,KAAK,EAAE,GAAG,QAAQ,GAAG,EAAE,YAAY,CAAA;AACnE,CAAC"}
|
|
1
|
+
{"version":3,"file":"serialize-xml.js","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AA2BH,SAAS,SAAS,CAAC,CAAS;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACpG,CAAC;AAED,SAAS,YAAY,CAAC,IAAiB;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC1E,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAA;IACnC,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC,QAAQ,CAAA;IACvC,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,aAAa,GAAG,CAAC,CAAA;AAEvB,SAAS,KAAK,CAAC,IAAiB,EAAE,IAAgC;IACjE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,IAAI,CAAC,cAAc;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,GAAG,CAAC,CAAA;IACjF,IAAI,IAAI,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACxE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,GAAG,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACxD,CAAC;IACD,+FAA+F;IAC/F,wEAAwE;IACxE,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;IACnG,CAAC;IACD,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IACjD,CAAC;IACD,mGAAmG;IACnG,iGAAiG;IACjG,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACnE,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;IACpD,CAAC;IACD,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvD,CAAC;AAWD,SAAS,qBAAqB,CAAC,IAAiB,EAAE,MAAc;IAC/D,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IACnE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC3C,MAAM,GAAG,GAAG,GAAsB,CAAA;QAClC,MAAM,KAAK,GAAG,OAAO,GAAG,CAAC,EAAE,EAAE,CAAA;QAC7B,MAAM,KAAK,GAAG;YACb,UAAU,SAAS,CAAC,KAAK,CAAC,GAAG;YAC7B,SAAS,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG;YAC/B,cAAc,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,UAAU,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SACjC,CAAA;QACD,OAAO,GAAG,MAAM,gBAAgB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACxB,CAAC;AAED,SAAS,aAAa,CAAC,IAAiB,EAAE,MAAc,EAAE,IAAgC;IACzF,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,qBAAqB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAA;IAC5C,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO,EAAE,CAAC;QAC9B,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;IACzD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1F,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/D,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,GAAG,KAAK,GAAG,EAAE,GAAG,MAAM,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;AACpF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,WAAW,CAAC,IAAiB,EAAE,OAAyB,EAAE;IACzE,MAAM,IAAI,GAA+B;QACxC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;QAC3B,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI;QACrC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,IAAI;QAC3C,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY,IAAI,IAAI;QACvC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,KAAK;KACtD,CAAA;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACnC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/E,OAAO,iBAAiB,OAAO,KAAK,EAAE,GAAG,QAAQ,GAAG,EAAE,YAAY,CAAA;AACnE,CAAC"}
|
package/out/decoder/types.d.ts
CHANGED
|
@@ -100,6 +100,31 @@ export interface AddressNode {
|
|
|
100
100
|
* `@mailwoman/core/resolver`.
|
|
101
101
|
*/
|
|
102
102
|
alternatives?: ReadonlyArray<unknown>;
|
|
103
|
+
/**
|
|
104
|
+
* ADDITIONAL roles this single span plays, beyond `tag` (#413). A place can hold multiple admin
|
|
105
|
+
* tiers under one name — a city-state (Berlin is region AND locality) or a capital-seat province
|
|
106
|
+
* (Milano province ~ Milano comune). Rather than synthesize a second node with a borrowed span,
|
|
107
|
+
* the resolver records the extra role(s) here, so one node = one span = many roles (the model
|
|
108
|
+
* Google's `address_components[].types` uses). `tag`/`placeId`/`lat`/`lon` remain the PRIMARY
|
|
109
|
+
* role; each interpretation is a distinct secondary role with its own resolved place. Serializers
|
|
110
|
+
* surface every role (a city-state emits both `region` and `locality`). Distinct from
|
|
111
|
+
* `alternatives` — those are same-role runner-up places (Springfield IL vs MA); interpretations
|
|
112
|
+
* are DIFFERENT tags, same span. Empty / absent for the common single-role node. Both completion
|
|
113
|
+
* (#415) and a future concordance decode write into this one slot.
|
|
114
|
+
*/
|
|
115
|
+
interpretations?: ReadonlyArray<Interpretation>;
|
|
116
|
+
}
|
|
117
|
+
/** One additional role a span plays (#413) — see {@link AddressNode.interpretations}. */
|
|
118
|
+
export interface Interpretation {
|
|
119
|
+
tag: ComponentTag;
|
|
120
|
+
/** Resolver-supplied normalized place URI for this role (e.g. `wof:101909779`). */
|
|
121
|
+
placeId?: string;
|
|
122
|
+
sourceId?: string;
|
|
123
|
+
/** Centroid for this role's place (a capital-seat comune differs from its province). */
|
|
124
|
+
lat?: number;
|
|
125
|
+
lon?: number;
|
|
126
|
+
confidence?: number;
|
|
127
|
+
metadata?: Record<string, unknown>;
|
|
103
128
|
}
|
|
104
129
|
/**
|
|
105
130
|
* The full decoded tree for one parsed address.
|
|
@@ -112,5 +137,26 @@ export interface AddressTree {
|
|
|
112
137
|
/** The original raw input text — preserved for round-trip and XML root @raw attribute. */
|
|
113
138
|
raw: string;
|
|
114
139
|
roots: AddressNode[];
|
|
140
|
+
/**
|
|
141
|
+
* The addressing SYSTEM this tree was decoded under, which selects the containment hierarchy
|
|
142
|
+
* (`containmentFor(system)` in `./containment.ts`). Absent means the default Western hierarchy
|
|
143
|
+
* (`house_number → street → locality → …`).
|
|
144
|
+
*
|
|
145
|
+
* This is forward-compat insurance, not yet a behavioral switch: every system currently resolves
|
|
146
|
+
* to the same map, so an absent or present `system` produces identical trees today. It exists so
|
|
147
|
+
* that when a genuinely distinct system lands (e.g. Japanese block addressing, where
|
|
148
|
+
* `building_number` nests under `sub_block`/`block` with no `street` parent), consumers and the
|
|
149
|
+
* tree builder already carry the discriminator — no `AddressTree` shape change later. A locale
|
|
150
|
+
* pre-classifier (Phase 6+) is the intended source of this value.
|
|
151
|
+
*/
|
|
152
|
+
system?: AddressSystem;
|
|
115
153
|
}
|
|
154
|
+
/**
|
|
155
|
+
* The addressing system a tree was decoded under — selects the containment hierarchy. Western
|
|
156
|
+
* covers US/EU/most-Latin-script street addressing (`house_number → street → locality`). `japanese`
|
|
157
|
+
* is declared for forward-compat (block addressing: `building_number → sub_block → block →
|
|
158
|
+
* district`, no street); it currently shares the Western map until Phase 6 gives it a distinct one.
|
|
159
|
+
* Open string union so a new system can be added without a breaking enum change.
|
|
160
|
+
*/
|
|
161
|
+
export type AddressSystem = "western" | "japanese" | (string & {});
|
|
116
162
|
//# sourceMappingURL=types.d.ts.map
|