@mailwoman/core 4.0.0 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/classification/Classification.d.ts +2 -2
- package/out/decoder/build-tree.d.ts +11 -3
- package/out/decoder/build-tree.d.ts.map +1 -1
- package/out/decoder/build-tree.js +5 -4
- package/out/decoder/build-tree.js.map +1 -1
- package/out/decoder/calibration.d.ts +50 -0
- package/out/decoder/calibration.d.ts.map +1 -0
- package/out/decoder/calibration.js +74 -0
- package/out/decoder/calibration.js.map +1 -0
- package/out/decoder/containment.d.ts +1 -1
- package/out/decoder/index.d.ts +1 -0
- package/out/decoder/index.d.ts.map +1 -1
- package/out/decoder/index.js +1 -0
- package/out/decoder/index.js.map +1 -1
- package/out/decoder/serialize-json.d.ts +4 -0
- package/out/decoder/serialize-json.d.ts.map +1 -1
- package/out/decoder/serialize-json.js +10 -0
- package/out/decoder/serialize-json.js.map +1 -1
- package/out/decoder/serialize-xml.d.ts.map +1 -1
- package/out/decoder/serialize-xml.js +6 -0
- package/out/decoder/serialize-xml.js.map +1 -1
- package/out/decoder/types.d.ts +25 -0
- package/out/decoder/types.d.ts.map +1 -1
- package/out/pipeline/runtime-pipeline.d.ts +17 -1
- package/out/pipeline/runtime-pipeline.d.ts.map +1 -1
- package/out/pipeline/runtime-pipeline.js +63 -12
- package/out/pipeline/runtime-pipeline.js.map +1 -1
- package/out/pipeline/span-logit-aggregation.d.ts +1 -0
- package/out/pipeline/span-logit-aggregation.d.ts.map +1 -1
- package/out/pipeline/span-logit-aggregation.js +8 -0
- package/out/pipeline/span-logit-aggregation.js.map +1 -1
- package/out/pipeline/types.d.ts +10 -3
- package/out/pipeline/types.d.ts.map +1 -1
- package/out/resolver/index.d.ts +1 -1
- package/out/resolver/index.d.ts.map +1 -1
- package/out/resolver/resolve.d.ts.map +1 -1
- package/out/resolver/resolve.js +114 -4
- package/out/resolver/resolve.js.map +1 -1
- package/out/resolver/types.d.ts +99 -4
- package/out/resolver/types.d.ts.map +1 -1
- package/out/resolver/types.js.map +1 -1
- package/out/solver/SolutionMatch.d.ts +1 -1
- package/out/solver/mask.d.ts +1 -1
- package/out/tokenization/Graph.d.ts +1 -1
- package/out/tokenization/Graph.d.ts.map +1 -1
- package/out/tokenization/Graph.js +5 -1
- package/out/tokenization/Graph.js.map +1 -1
- package/package.json +2 -2
|
@@ -9,12 +9,12 @@ import type { LibPostalLanguageCode } from "../resources/libpostal.js";
|
|
|
9
9
|
/**
|
|
10
10
|
* Classification recognized by Mailwoman.
|
|
11
11
|
*/
|
|
12
|
-
export declare const Classifications: Set<"
|
|
12
|
+
export declare const Classifications: Set<"adjacent" | "alpha" | "alphanumeric" | "area" | "chain" | "country" | "dependency" | "directional" | "end_token_single_character" | "end_token" | "given_name" | "house_number" | "intersection" | "level_designator" | "level" | "locality" | "middle_initial" | "multistreet" | "numeric" | "ordinal" | "person" | "personal_suffix" | "personal_title" | "place" | "postcode" | "punctuation" | "region" | "road_type" | "start_token" | "stop_word" | "street_name" | "street_prefix" | "street_proper_name" | "street_suffix" | "street" | "surname" | "toponym" | "unit_designator" | "unit" | "unknown" | "venue">;
|
|
13
13
|
export type Classification = typeof Classifications extends Set<infer T> ? T : never;
|
|
14
14
|
/**
|
|
15
15
|
* Public classification labels.
|
|
16
16
|
*/
|
|
17
|
-
declare const VisibleClassification: Set<"country" | "
|
|
17
|
+
declare const VisibleClassification: Set<"country" | "dependency" | "house_number" | "level_designator" | "level" | "locality" | "postcode" | "region" | "street" | "unit_designator" | "unit" | "venue">;
|
|
18
18
|
/**
|
|
19
19
|
* Classification labels that are exposed to API consumers.
|
|
20
20
|
*/
|
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
* 1. Span emission — walk the token stream, group `B-X` followed by `I-X*` into one span. Lenient on
|
|
11
11
|
* hanging `I-X` (treat as new span). A `B-X` that is whitespace-adjacent to an already-open
|
|
12
12
|
* `X` span is also folded in (spurious-boundary repair for multi-word values the model
|
|
13
|
-
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
-
* them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
-
* `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
13
|
+
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
+
* keeps them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
+
* from `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
16
16
|
* 2. Parent attachment — for each span, find the nearest labeled span whose tag is the
|
|
17
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
18
18
|
* Spans with no found parent become roots.
|
|
@@ -22,6 +22,7 @@
|
|
|
22
22
|
* Source order is still preserved in the `start`/`end` fields, which the XML serializer exposes
|
|
23
23
|
* as attributes.
|
|
24
24
|
*/
|
|
25
|
+
import type { Calibrator } from "./calibration.js";
|
|
25
26
|
import type { AddressSystem, AddressTree, DecoderToken } from "./types.js";
|
|
26
27
|
/**
|
|
27
28
|
* Optional caller-supplied attribution stamped on every emitted node. The BIO stream comes from a
|
|
@@ -39,6 +40,13 @@ export interface BuildTreeOpts {
|
|
|
39
40
|
* behavioral when a system-specific map lands (Phase 6 JP). See `containment.ts`.
|
|
40
41
|
*/
|
|
41
42
|
system?: AddressSystem;
|
|
43
|
+
/**
|
|
44
|
+
* Optional confidence calibrator (task #59). When provided, each span's mean-of-token-softmax
|
|
45
|
+
* confidence is mapped through it before being stamped on the node, so `conf=` reports a
|
|
46
|
+
* calibrated probability of correctness rather than the raw softmax. OPT-IN — omit for the
|
|
47
|
+
* byte-stable default. Build one via `createCalibrator` (`./calibration.ts`).
|
|
48
|
+
*/
|
|
49
|
+
calibrate?: Calibrator;
|
|
42
50
|
}
|
|
43
51
|
/**
|
|
44
52
|
* Build an `AddressTree` from a raw input string and the token stream produced by the model.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build-tree.d.ts","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;
|
|
1
|
+
{"version":3,"file":"build-tree.d.ts","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAElD,OAAO,KAAK,EAAe,aAAa,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,YAAY,CAAA;AAEvF;;;;;GAKG;AACH,MAAM,WAAW,aAAa;IAC7B,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB;;;;;OAKG;IACH,MAAM,CAAC,EAAE,aAAa,CAAA;IACtB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAA;CACtB;AAsHD;;;;;;;;GAQG;AACH,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,EAAE,IAAI,GAAE,aAAkB,GAAG,WAAW,CAe3G"}
|
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
* 1. Span emission — walk the token stream, group `B-X` followed by `I-X*` into one span. Lenient on
|
|
11
11
|
* hanging `I-X` (treat as new span). A `B-X` that is whitespace-adjacent to an already-open
|
|
12
12
|
* `X` span is also folded in (spurious-boundary repair for multi-word values the model
|
|
13
|
-
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
-
* them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
-
* `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
13
|
+
* fragments, e.g. "Saint Paul" → B-locality B-locality); a comma/separator between them
|
|
14
|
+
* keeps them distinct. Span `value` is sliced from `raw` by [start, end), NOT concatenated
|
|
15
|
+
* from `piece` — this avoids SentencePiece's synthetic leading-space markers in the output.
|
|
16
16
|
* 2. Parent attachment — for each span, find the nearest labeled span whose tag is the
|
|
17
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
18
18
|
* Spans with no found parent become roots.
|
|
@@ -53,7 +53,8 @@ function flush(open, raw, out, attribution) {
|
|
|
53
53
|
if (start >= end)
|
|
54
54
|
return null;
|
|
55
55
|
const value = raw.slice(start, end);
|
|
56
|
-
const
|
|
56
|
+
const rawConfidence = open.confidences.reduce((a, b) => a + b, 0) / open.confidences.length;
|
|
57
|
+
const confidence = attribution.calibrate ? attribution.calibrate(rawConfidence) : rawConfidence;
|
|
57
58
|
const node = { tag: open.tag, start, end, value, confidence, children: [] };
|
|
58
59
|
if (attribution.source !== undefined)
|
|
59
60
|
node.source = attribution.source;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;
|
|
1
|
+
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAmCjD,SAAS,QAAQ,CAAC,KAAe;IAChC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,CAAA;IACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAc,EAAE,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAiB,EAAE,CAAA;AACjG,CAAC;AAED,mGAAmG;AACnG,iGAAiG;AACjG,+FAA+F;AAC/F,8FAA8F;AAC9F,+FAA+F;AAC/F,oDAAoD;AACpD,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,GAAW;IAC5D,IAAI,CAAC,GAAG,KAAK,CAAA;IACb,IAAI,CAAC,GAAG,GAAG,CAAA;IACX,MAAM,UAAU,GAAG,CAAC,CAAS,EAAW,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;IAC7E,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACnC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACvC,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAA;AAC5B,CAAC;AAED,SAAS,KAAK,CAAC,IAAqB,EAAE,GAAW,EAAE,GAAkB,EAAE,WAA0B;IAChG,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,YAAY,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9D,6FAA6F;IAC7F,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,IAAI,CAAA;IAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAA;IAC3F,MAAM,UAAU,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAA;IAC/F,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;IACxF,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAA;IACtE,IAAI,WAAW,CAAC,QAAQ,KAAK,SAAS;QAAE,IAAI,CAAC,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAA;IAC5E,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,MAAsB,EAAE,WAA0B;IACjF,MAAM,GAAG,GAAkB,EAAE,CAAA;IAC7B,IAAI,IAAI,GAAoB,IAAI,CAAA;IAEhC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QAE3C,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACpB,4FAA4F;YAC5F,sFAAsF;YACtF,uFAAuF;YACvF,0FAA0F;YAC1F,qFAAqF;YACrF,iFAAiF;YACjF,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAQ;YAC1E,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,SAAQ;QACT,CAAC;QAED,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,EAAE,CAAC;YACzD,yFAAyF;YACzF,sFAAsF;YACtF,kFAAkF;YAClF,8EAA8E;YAC9E,EAAE;YACF,uFAAuF;YACvF,uFAAuF;YACvF,uFAAuF;YACvF,kDAAkD;YAClD,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzG,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;gBAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBACrC,SAAQ;YACT,CAAC;YACD,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAI,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,WAAW,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAA;YACnF,SAAQ;QACT,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;QAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IACtC,CAAC;IAED,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;IAClC,OAAO,GAAG,CAAA;AACX,CAAC;AAED,SAAS,QAAQ,CAAC,CAAc,EAAE,CAAc;IAC/C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,OAAO,CAAC,CAAA;AACT,CAAC;AAED,SAAS,UAAU,CAClB,IAAiB,EACjB,GAAkB,EAClB,QAAuD;IAEvD,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;IAC3C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,GAAG,KAAK,SAAS,CAAC,CAAA;QACpE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAClC,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAChG,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,WAAW,CAAC,KAAoB;IACxC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK;QAAE,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;AAC/C,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,MAAsB,EAAE,OAAsB,EAAE;IAC7F,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,KAAK,GAAkB,EAAE,CAAA;IAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;QAChD,IAAI,MAAM;YAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;;YACjC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtB,CAAC;IAED,WAAW,CAAC,KAAK,CAAC,CAAA;IAClB,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,KAAK,EAAE,CAAA;IACxC,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IACxD,OAAO,IAAI,CAAA;AACZ,CAAC"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Opt-in confidence calibration for decoded spans.
|
|
7
|
+
*
|
|
8
|
+
* The decoder emits a per-span `confidence` that is the mean of the span's per-token softmax
|
|
9
|
+
* probabilities (`build-tree.ts`). Softmax probabilities are NOT calibrated — a CE-trained model
|
|
10
|
+
* is systematically over/under-confident in bands. Task #59 fits an isotonic-regression
|
|
11
|
+
* calibrator on a held-out OpenAddresses + corpus set
|
|
12
|
+
* (`scripts/eval/fit-isotonic-calibration.py`) and ships the result as a 20-bin lookup table
|
|
13
|
+
* (`data/eval/calibration/isotonic-<locale>-<version>.json`).
|
|
14
|
+
*
|
|
15
|
+
* This module turns that table into a pure `(rawConfidence) => calibratedConfidence` function. It
|
|
16
|
+
* is deliberately decoupled from the table source: pass the PARSED JSON object so this stays
|
|
17
|
+
* browser-safe (no `node:fs`) — the demo imports the JSON directly, Node scripts `JSON.parse`
|
|
18
|
+
* it.
|
|
19
|
+
*
|
|
20
|
+
* Wiring is OPT-IN. The default decode path is unchanged (byte-stable `conf=` output). A caller
|
|
21
|
+
* that wants calibrated confidences builds a `Calibrator` here and passes it via
|
|
22
|
+
* `ParseOpts.calibrate` (neural) / `BuildTreeOpts.calibrate` (decoder), which `build-tree.ts`
|
|
23
|
+
* applies in `flush()`.
|
|
24
|
+
*/
|
|
25
|
+
/** One row of the lookup table: a confidence bin and the calibrated value at its center. */
|
|
26
|
+
export interface CalibrationBin {
|
|
27
|
+
lo: number;
|
|
28
|
+
hi: number;
|
|
29
|
+
center: number;
|
|
30
|
+
calibrated: number;
|
|
31
|
+
}
|
|
32
|
+
/** The full calibration artifact emitted by `fit-isotonic-calibration.py`. */
|
|
33
|
+
export interface CalibrationTable {
|
|
34
|
+
model: string;
|
|
35
|
+
model_version: string;
|
|
36
|
+
method: string;
|
|
37
|
+
bins: number;
|
|
38
|
+
table: CalibrationBin[];
|
|
39
|
+
[key: string]: unknown;
|
|
40
|
+
}
|
|
41
|
+
/** Maps a raw span confidence in [0, 1] to its calibrated probability of correctness. */
|
|
42
|
+
export type Calibrator = (rawConfidence: number) => number;
|
|
43
|
+
/**
|
|
44
|
+
* Build a calibrator from an isotonic lookup table. The mapping is piecewise-linear between bin
|
|
45
|
+
* centers and clamped to the table's range outside it (the table is monotone non-decreasing by
|
|
46
|
+
* construction, so the interpolation is monotone too). Accepts either the full `CalibrationTable`
|
|
47
|
+
* or a bare `CalibrationBin[]`.
|
|
48
|
+
*/
|
|
49
|
+
export declare function createCalibrator(table: CalibrationTable | CalibrationBin[]): Calibrator;
|
|
50
|
+
//# sourceMappingURL=calibration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.d.ts","sourceRoot":"","sources":["../../decoder/calibration.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,4FAA4F;AAC5F,MAAM,WAAW,cAAc;IAC9B,EAAE,EAAE,MAAM,CAAA;IACV,EAAE,EAAE,MAAM,CAAA;IACV,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,8EAA8E;AAC9E,MAAM,WAAW,gBAAgB;IAChC,KAAK,EAAE,MAAM,CAAA;IACb,aAAa,EAAE,MAAM,CAAA;IACrB,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,cAAc,EAAE,CAAA;IACvB,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAA;CACtB;AAED,yFAAyF;AACzF,MAAM,MAAM,UAAU,GAAG,CAAC,aAAa,EAAE,MAAM,KAAK,MAAM,CAAA;AAE1D;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,gBAAgB,GAAG,cAAc,EAAE,GAAG,UAAU,CA8BvF"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Opt-in confidence calibration for decoded spans.
|
|
7
|
+
*
|
|
8
|
+
* The decoder emits a per-span `confidence` that is the mean of the span's per-token softmax
|
|
9
|
+
* probabilities (`build-tree.ts`). Softmax probabilities are NOT calibrated — a CE-trained model
|
|
10
|
+
* is systematically over/under-confident in bands. Task #59 fits an isotonic-regression
|
|
11
|
+
* calibrator on a held-out OpenAddresses + corpus set
|
|
12
|
+
* (`scripts/eval/fit-isotonic-calibration.py`) and ships the result as a 20-bin lookup table
|
|
13
|
+
* (`data/eval/calibration/isotonic-<locale>-<version>.json`).
|
|
14
|
+
*
|
|
15
|
+
* This module turns that table into a pure `(rawConfidence) => calibratedConfidence` function. It
|
|
16
|
+
* is deliberately decoupled from the table source: pass the PARSED JSON object so this stays
|
|
17
|
+
* browser-safe (no `node:fs`) — the demo imports the JSON directly, Node scripts `JSON.parse`
|
|
18
|
+
* it.
|
|
19
|
+
*
|
|
20
|
+
* Wiring is OPT-IN. The default decode path is unchanged (byte-stable `conf=` output). A caller
|
|
21
|
+
* that wants calibrated confidences builds a `Calibrator` here and passes it via
|
|
22
|
+
* `ParseOpts.calibrate` (neural) / `BuildTreeOpts.calibrate` (decoder), which `build-tree.ts`
|
|
23
|
+
* applies in `flush()`.
|
|
24
|
+
*/
|
|
25
|
+
/**
|
|
26
|
+
* Build a calibrator from an isotonic lookup table. The mapping is piecewise-linear between bin
|
|
27
|
+
* centers and clamped to the table's range outside it (the table is monotone non-decreasing by
|
|
28
|
+
* construction, so the interpolation is monotone too). Accepts either the full `CalibrationTable`
|
|
29
|
+
* or a bare `CalibrationBin[]`.
|
|
30
|
+
*/
|
|
31
|
+
export function createCalibrator(table) {
|
|
32
|
+
const bins = Array.isArray(table) ? table : table.table;
|
|
33
|
+
if (!bins || bins.length === 0) {
|
|
34
|
+
throw new Error("createCalibrator: empty calibration table");
|
|
35
|
+
}
|
|
36
|
+
// Sort by center and extract parallel arrays for interpolation.
|
|
37
|
+
const sorted = [...bins].sort((a, b) => a.center - b.center);
|
|
38
|
+
const centers = sorted.map((b) => b.center);
|
|
39
|
+
const cals = sorted.map((b) => clamp01(b.calibrated));
|
|
40
|
+
const n = centers.length;
|
|
41
|
+
return (raw) => {
|
|
42
|
+
const x = clamp01(raw);
|
|
43
|
+
if (x <= centers[0])
|
|
44
|
+
return cals[0];
|
|
45
|
+
if (x >= centers[n - 1])
|
|
46
|
+
return cals[n - 1];
|
|
47
|
+
// Binary search for the interval [centers[i], centers[i+1]] containing x.
|
|
48
|
+
let lo = 0;
|
|
49
|
+
let hi = n - 1;
|
|
50
|
+
while (hi - lo > 1) {
|
|
51
|
+
const mid = (lo + hi) >> 1;
|
|
52
|
+
if (centers[mid] <= x)
|
|
53
|
+
lo = mid;
|
|
54
|
+
else
|
|
55
|
+
hi = mid;
|
|
56
|
+
}
|
|
57
|
+
const x0 = centers[lo];
|
|
58
|
+
const x1 = centers[hi];
|
|
59
|
+
const y0 = cals[lo];
|
|
60
|
+
const y1 = cals[hi];
|
|
61
|
+
const t = x1 === x0 ? 0 : (x - x0) / (x1 - x0);
|
|
62
|
+
return y0 + t * (y1 - y0);
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
function clamp01(v) {
|
|
66
|
+
if (Number.isNaN(v))
|
|
67
|
+
return 0;
|
|
68
|
+
if (v < 0)
|
|
69
|
+
return 0;
|
|
70
|
+
if (v > 1)
|
|
71
|
+
return 1;
|
|
72
|
+
return v;
|
|
73
|
+
}
|
|
74
|
+
//# sourceMappingURL=calibration.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"calibration.js","sourceRoot":"","sources":["../../decoder/calibration.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAuBH;;;;;GAKG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAA0C;IAC1E,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAA;IACvD,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAA;IAC7D,CAAC;IACD,gEAAgE;IAChE,MAAM,MAAM,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAA;IAC5D,MAAM,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAA;IAC3C,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAA;IACrD,MAAM,CAAC,GAAG,OAAO,CAAC,MAAM,CAAA;IAExB,OAAO,CAAC,GAAW,EAAU,EAAE;QAC9B,MAAM,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,CAAA;QACtB,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,CAAE;YAAE,OAAO,IAAI,CAAC,CAAC,CAAE,CAAA;QACrC,IAAI,CAAC,IAAI,OAAO,CAAC,CAAC,GAAG,CAAC,CAAE;YAAE,OAAO,IAAI,CAAC,CAAC,GAAG,CAAC,CAAE,CAAA;QAC7C,0EAA0E;QAC1E,IAAI,EAAE,GAAG,CAAC,CAAA;QACV,IAAI,EAAE,GAAG,CAAC,GAAG,CAAC,CAAA;QACd,OAAO,EAAE,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,GAAG,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,CAAA;YAC1B,IAAI,OAAO,CAAC,GAAG,CAAE,IAAI,CAAC;gBAAE,EAAE,GAAG,GAAG,CAAA;;gBAC3B,EAAE,GAAG,GAAG,CAAA;QACd,CAAC;QACD,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAE,CAAA;QACvB,MAAM,EAAE,GAAG,OAAO,CAAC,EAAE,CAAE,CAAA;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAE,CAAA;QACpB,MAAM,EAAE,GAAG,IAAI,CAAC,EAAE,CAAE,CAAA;QACpB,MAAM,CAAC,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;QAC9C,OAAO,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE,GAAG,EAAE,CAAC,CAAA;IAC1B,CAAC,CAAA;AACF,CAAC;AAED,SAAS,OAAO,CAAC,CAAS;IACzB,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QAAE,OAAO,CAAC,CAAA;IAC7B,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAA;IACnB,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,CAAC,CAAA;IACnB,OAAO,CAAC,CAAA;AACT,CAAC"}
|
|
@@ -44,5 +44,5 @@ export declare function containmentFor(_system?: AddressSystem): Partial<Record<
|
|
|
44
44
|
* in new code so the system parameter threads through; this export remains for existing call
|
|
45
45
|
* sites.
|
|
46
46
|
*/
|
|
47
|
-
export declare const PARENT_OF: Partial<Record<"country" | "
|
|
47
|
+
export declare const PARENT_OF: Partial<Record<"country" | "house_number" | "locality" | "postcode" | "region" | "street_prefix" | "street_suffix" | "street" | "unit" | "venue" | "attention" | "municipality" | "district" | "dependent_locality" | "subregion" | "street_prefix_particle" | "intersection_a" | "intersection_b" | "po_box" | "cedex" | "prefecture" | "block" | "sub_block" | "building_number" | "building_name", ("country" | "house_number" | "locality" | "postcode" | "region" | "street_prefix" | "street_suffix" | "street" | "unit" | "venue" | "attention" | "municipality" | "district" | "dependent_locality" | "subregion" | "street_prefix_particle" | "intersection_a" | "intersection_b" | "po_box" | "cedex" | "prefecture" | "block" | "sub_block" | "building_number" | "building_name")[]>>;
|
|
48
48
|
//# sourceMappingURL=containment.d.ts.map
|
package/out/decoder/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
package/out/decoder/index.js
CHANGED
package/out/decoder/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Flattens the tree to `{ tag: value }`. First-occurrence wins for repeated tags — matches
|
|
9
9
|
* libpostal's behavior. Use `decodeAsTuples` if order or repetition matters.
|
|
10
|
+
*
|
|
11
|
+
* A multi-role node (#413 — a city-state span tagged `region` that also plays `locality`) emits one
|
|
12
|
+
* entry per role from its `interpretations`, so `out.locality` still surfaces for a completed
|
|
13
|
+
* city-state. The shared span means every role gets the same `value`.
|
|
10
14
|
*/
|
|
11
15
|
import type { ComponentTag } from "../types/component.js";
|
|
12
16
|
import type { AddressTree } from "./types.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-json.d.ts","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"serialize-json.d.ts","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AACzD,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAY1D,wEAAwE;AACxE,wBAAgB,YAAY,CAAC,IAAI,EAAE,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAIrF"}
|
|
@@ -7,10 +7,20 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Flattens the tree to `{ tag: value }`. First-occurrence wins for repeated tags — matches
|
|
9
9
|
* libpostal's behavior. Use `decodeAsTuples` if order or repetition matters.
|
|
10
|
+
*
|
|
11
|
+
* A multi-role node (#413 — a city-state span tagged `region` that also plays `locality`) emits one
|
|
12
|
+
* entry per role from its `interpretations`, so `out.locality` still surfaces for a completed
|
|
13
|
+
* city-state. The shared span means every role gets the same `value`.
|
|
10
14
|
*/
|
|
11
15
|
function visit(node, out) {
|
|
12
16
|
if (!(node.tag in out))
|
|
13
17
|
out[node.tag] = node.value;
|
|
18
|
+
if (node.interpretations) {
|
|
19
|
+
for (const interp of node.interpretations) {
|
|
20
|
+
if (!(interp.tag in out))
|
|
21
|
+
out[interp.tag] = node.value;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
14
24
|
for (const child of node.children)
|
|
15
25
|
visit(child, out);
|
|
16
26
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-json.js","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"serialize-json.js","sourceRoot":"","sources":["../../decoder/serialize-json.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAKH,SAAS,KAAK,CAAC,IAAiB,EAAE,GAA0C;IAC3E,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC;QAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAA;IAClD,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;QAC1B,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAC3C,IAAI,CAAC,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC;gBAAE,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAA;QACvD,CAAC;IACF,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;QAAE,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AACrD,CAAC;AAED,wEAAwE;AACxE,MAAM,UAAU,YAAY,CAAC,IAAiB;IAC7C,MAAM,GAAG,GAA0C,EAAE,CAAA;IACrD,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,EAAE,GAAG,CAAC,CAAA;IAC/C,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-xml.d.ts","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,WAAW,gBAAgB;IAChC,mEAAmE;IACnE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,iEAAiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,oEAAoE;IACpE,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,iGAAiG;IACjG,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAA;CAC7B;
|
|
1
|
+
{"version":3,"file":"serialize-xml.d.ts","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAE1D,MAAM,WAAW,gBAAgB;IAChC,mEAAmE;IACnE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB,iEAAiE;IACjE,WAAW,CAAC,EAAE,OAAO,CAAA;IACrB,oEAAoE;IACpE,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB,0FAA0F;IAC1F,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,iGAAiG;IACjG,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,qFAAqF;IACrF,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,OAAO,CAAA;CAC7B;AA0FD,yFAAyF;AACzF,wBAAgB,WAAW,CAAC,IAAI,EAAE,WAAW,EAAE,IAAI,GAAE,gBAAqB,GAAG,MAAM,CAelF"}
|
|
@@ -65,6 +65,12 @@ function attrs(node, opts) {
|
|
|
65
65
|
if (opts.includePlace && node.placeId !== undefined) {
|
|
66
66
|
parts.push(`place="${escapeXml(node.placeId)}"`);
|
|
67
67
|
}
|
|
68
|
+
// Multi-role node (#413): a city-state span tagged `region` that also plays `locality` lists every
|
|
69
|
+
// role it holds, primary first — `roles="region locality"`. Emitted only when extra roles exist.
|
|
70
|
+
if (node.interpretations && node.interpretations.length > 0) {
|
|
71
|
+
const roles = [node.tag, ...node.interpretations.map((i) => i.tag)];
|
|
72
|
+
parts.push(`roles="${escapeXml(roles.join(" "))}"`);
|
|
73
|
+
}
|
|
68
74
|
return parts.length === 0 ? "" : " " + parts.join(" ");
|
|
69
75
|
}
|
|
70
76
|
function serializeAlternatives(node, indent) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialize-xml.js","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AA2BH,SAAS,SAAS,CAAC,CAAS;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACpG,CAAC;AAED,SAAS,YAAY,CAAC,IAAiB;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC1E,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAA;IACnC,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC,QAAQ,CAAA;IACvC,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,aAAa,GAAG,CAAC,CAAA;AAEvB,SAAS,KAAK,CAAC,IAAiB,EAAE,IAAgC;IACjE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,IAAI,CAAC,cAAc;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,GAAG,CAAC,CAAA;IACjF,IAAI,IAAI,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACxE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,GAAG,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACxD,CAAC;IACD,+FAA+F;IAC/F,wEAAwE;IACxE,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;IACnG,CAAC;IACD,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IACjD,CAAC;IACD,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvD,CAAC;AAWD,SAAS,qBAAqB,CAAC,IAAiB,EAAE,MAAc;IAC/D,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IACnE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC3C,MAAM,GAAG,GAAG,GAAsB,CAAA;QAClC,MAAM,KAAK,GAAG,OAAO,GAAG,CAAC,EAAE,EAAE,CAAA;QAC7B,MAAM,KAAK,GAAG;YACb,UAAU,SAAS,CAAC,KAAK,CAAC,GAAG;YAC7B,SAAS,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG;YAC/B,cAAc,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,UAAU,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SACjC,CAAA;QACD,OAAO,GAAG,MAAM,gBAAgB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACxB,CAAC;AAED,SAAS,aAAa,CAAC,IAAiB,EAAE,MAAc,EAAE,IAAgC;IACzF,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,qBAAqB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAA;IAC5C,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO,EAAE,CAAC;QAC9B,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;IACzD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1F,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/D,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,GAAG,KAAK,GAAG,EAAE,GAAG,MAAM,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;AACpF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,WAAW,CAAC,IAAiB,EAAE,OAAyB,EAAE;IACzE,MAAM,IAAI,GAA+B;QACxC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;QAC3B,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI;QACrC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,IAAI;QAC3C,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY,IAAI,IAAI;QACvC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,KAAK;KACtD,CAAA;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACnC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/E,OAAO,iBAAiB,OAAO,KAAK,EAAE,GAAG,QAAQ,GAAG,EAAE,YAAY,CAAA;AACnE,CAAC"}
|
|
1
|
+
{"version":3,"file":"serialize-xml.js","sourceRoot":"","sources":["../../decoder/serialize-xml.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AA2BH,SAAS,SAAS,CAAC,CAAS;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAA;AACpG,CAAC;AAED,SAAS,YAAY,CAAC,IAAiB;IACtC,IAAI,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAA;IAC1E,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC,MAAM,CAAA;IACnC,IAAI,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC,QAAQ,CAAA;IACvC,OAAO,IAAI,CAAA;AACZ,CAAC;AAED;;;GAGG;AACH,MAAM,aAAa,GAAG,CAAC,CAAA;AAEvB,SAAS,KAAK,CAAC,IAAiB,EAAE,IAAgC;IACjE,MAAM,KAAK,GAAa,EAAE,CAAA;IAC1B,IAAI,IAAI,CAAC,cAAc;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,IAAI,CAAC,KAAK,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,GAAG,CAAC,CAAA;IACjF,IAAI,IAAI,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAA;IACxE,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,CAAA;QAC9B,IAAI,GAAG,KAAK,IAAI;YAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IACxD,CAAC;IACD,+FAA+F;IAC/F,wEAAwE;IACxE,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,IAAI,IAAI,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QACzE,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG,CAAC,CAAA;IACnG,CAAC;IACD,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,OAAO,KAAK,SAAS,EAAE,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IACjD,CAAC;IACD,mGAAmG;IACnG,iGAAiG;IACjG,IAAI,IAAI,CAAC,eAAe,IAAI,IAAI,CAAC,eAAe,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC7D,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACnE,KAAK,CAAC,IAAI,CAAC,UAAU,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;IACpD,CAAC;IACD,OAAO,KAAK,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACvD,CAAC;AAWD,SAAS,qBAAqB,CAAC,IAAiB,EAAE,MAAc;IAC/D,IAAI,CAAC,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAA;IACnE,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC3C,MAAM,GAAG,GAAG,GAAsB,CAAA;QAClC,MAAM,KAAK,GAAG,OAAO,GAAG,CAAC,EAAE,EAAE,CAAA;QAC7B,MAAM,KAAK,GAAG;YACb,UAAU,SAAS,CAAC,KAAK,CAAC,GAAG;YAC7B,SAAS,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG;YAC/B,cAAc,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,QAAQ,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,GAAG;YACzC,UAAU,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG;SACjC,CAAA;QACD,OAAO,GAAG,MAAM,gBAAgB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAA;IACrD,CAAC,CAAC,CAAA;IACF,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACxB,CAAC;AAED,SAAS,aAAa,CAAC,IAAiB,EAAE,MAAc,EAAE,IAAgC;IACzF,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;IAC3B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAEpD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,CAAC,CAAC,qBAAqB,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC1F,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAA;IAC5C,MAAM,OAAO,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAA;IAEpC,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO,EAAE,CAAC;QAC9B,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;IACzD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,WAAW,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1F,MAAM,KAAK,GAAG,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/D,OAAO,GAAG,MAAM,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,GAAG,KAAK,GAAG,EAAE,GAAG,MAAM,KAAK,IAAI,CAAC,GAAG,GAAG,CAAA;AACpF,CAAC;AAED,yFAAyF;AACzF,MAAM,UAAU,WAAW,CAAC,IAAiB,EAAE,OAAyB,EAAE;IACzE,MAAM,IAAI,GAA+B;QACxC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;QAC3B,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI;QACrC,cAAc,EAAE,IAAI,CAAC,cAAc,IAAI,IAAI;QAC3C,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,UAAU,EAAE,IAAI,CAAC,UAAU,IAAI,IAAI;QACnC,YAAY,EAAE,IAAI,CAAC,YAAY,IAAI,IAAI;QACvC,mBAAmB,EAAE,IAAI,CAAC,mBAAmB,IAAI,KAAK;KACtD,CAAA;IACD,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACnC,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IAClC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAA;IACtC,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,aAAa,CAAC,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC/E,OAAO,iBAAiB,OAAO,KAAK,EAAE,GAAG,QAAQ,GAAG,EAAE,YAAY,CAAA;AACnE,CAAC"}
|
package/out/decoder/types.d.ts
CHANGED
|
@@ -100,6 +100,31 @@ export interface AddressNode {
|
|
|
100
100
|
* `@mailwoman/core/resolver`.
|
|
101
101
|
*/
|
|
102
102
|
alternatives?: ReadonlyArray<unknown>;
|
|
103
|
+
/**
|
|
104
|
+
* ADDITIONAL roles this single span plays, beyond `tag` (#413). A place can hold multiple admin
|
|
105
|
+
* tiers under one name — a city-state (Berlin is region AND locality) or a capital-seat province
|
|
106
|
+
* (Milano province ~ Milano comune). Rather than synthesize a second node with a borrowed span,
|
|
107
|
+
* the resolver records the extra role(s) here, so one node = one span = many roles (the model
|
|
108
|
+
* Google's `address_components[].types` uses). `tag`/`placeId`/`lat`/`lon` remain the PRIMARY
|
|
109
|
+
* role; each interpretation is a distinct secondary role with its own resolved place. Serializers
|
|
110
|
+
* surface every role (a city-state emits both `region` and `locality`). Distinct from
|
|
111
|
+
* `alternatives` — those are same-role runner-up places (Springfield IL vs MA); interpretations
|
|
112
|
+
* are DIFFERENT tags, same span. Empty / absent for the common single-role node. Both completion
|
|
113
|
+
* (#415) and a future concordance decode write into this one slot.
|
|
114
|
+
*/
|
|
115
|
+
interpretations?: ReadonlyArray<Interpretation>;
|
|
116
|
+
}
|
|
117
|
+
/** One additional role a span plays (#413) — see {@link AddressNode.interpretations}. */
|
|
118
|
+
export interface Interpretation {
|
|
119
|
+
tag: ComponentTag;
|
|
120
|
+
/** Resolver-supplied normalized place URI for this role (e.g. `wof:101909779`). */
|
|
121
|
+
placeId?: string;
|
|
122
|
+
sourceId?: string;
|
|
123
|
+
/** Centroid for this role's place (a capital-seat comune differs from its province). */
|
|
124
|
+
lat?: number;
|
|
125
|
+
lon?: number;
|
|
126
|
+
confidence?: number;
|
|
127
|
+
metadata?: Record<string, unknown>;
|
|
103
128
|
}
|
|
104
129
|
/**
|
|
105
130
|
* The full decoded tree for one parsed address.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../decoder/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEnE,YAAY,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAA;AAEtC;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC5B,+FAA+F;IAC/F,KAAK,EAAE,MAAM,CAAA;IACb,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAA;IACb,0DAA0D;IAC1D,GAAG,EAAE,MAAM,CAAA;IACX,2CAA2C;IAC3C,KAAK,EAAE,QAAQ,CAAA;IACf,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,WAAW;IAC3B,GAAG,EAAE,YAAY,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,QAAQ,EAAE,WAAW,EAAE,CAAA;IACvB;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAA;IACf;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iGAAiG;IACjG,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,kGAAkG;IAClG,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAClC;;;;;;;;;OASG;IACH,YAAY,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAA;
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../decoder/types.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEnE,YAAY,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAA;AAEtC;;;;;;GAMG;AACH,MAAM,WAAW,YAAY;IAC5B,+FAA+F;IAC/F,KAAK,EAAE,MAAM,CAAA;IACb,4DAA4D;IAC5D,KAAK,EAAE,MAAM,CAAA;IACb,0DAA0D;IAC1D,GAAG,EAAE,MAAM,CAAA;IACX,2CAA2C;IAC3C,KAAK,EAAE,QAAQ,CAAA;IACf,0DAA0D;IAC1D,UAAU,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;GAOG;AACH,MAAM,WAAW,WAAW;IAC3B,GAAG,EAAE,YAAY,CAAA;IACjB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;IACX,UAAU,EAAE,MAAM,CAAA;IAClB,QAAQ,EAAE,WAAW,EAAE,CAAA;IACvB;;;;OAIG;IACH,MAAM,CAAC,EAAE,MAAM,CAAA;IACf;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,iGAAiG;IACjG,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,kGAAkG;IAClG,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ;;;;OAIG;IACH,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB;;;;OAIG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;IAClC;;;;;;;;;OASG;IACH,YAAY,CAAC,EAAE,aAAa,CAAC,OAAO,CAAC,CAAA;IACrC;;;;;;;;;;;OAWG;IACH,eAAe,CAAC,EAAE,aAAa,CAAC,cAAc,CAAC,CAAA;CAC/C;AAED,yFAAyF;AACzF,MAAM,WAAW,cAAc;IAC9B,GAAG,EAAE,YAAY,CAAA;IACjB,mFAAmF;IACnF,OAAO,CAAC,EAAE,MAAM,CAAA;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAA;IACjB,wFAAwF;IACxF,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,GAAG,CAAC,EAAE,MAAM,CAAA;IACZ,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;CAClC;AAED;;;;;;GAMG;AACH,MAAM,WAAW,WAAW;IAC3B,0FAA0F;IAC1F,GAAG,EAAE,MAAM,CAAA;IACX,KAAK,EAAE,WAAW,EAAE,CAAA;IACpB;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,EAAE,aAAa,CAAA;CACtB;AAED;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,GAAG,SAAS,GAAG,UAAU,GAAG,CAAC,MAAM,GAAG,EAAE,CAAC,CAAA"}
|
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
*
|
|
12
12
|
* Implementation contract per `docs/articles/plan/reference/STAGES.md`.
|
|
13
13
|
*/
|
|
14
|
-
import type {
|
|
14
|
+
import type { AddressTree } from "../decoder/types.js";
|
|
15
|
+
import type { ClassifierCandidate } from "./reconcile.js";
|
|
16
|
+
import type { PhraseProposal, PipelineOpts, PipelineResult, RuntimePipelineStages } from "./types.js";
|
|
15
17
|
/**
|
|
16
18
|
* Run the runtime pipeline.
|
|
17
19
|
*
|
|
@@ -26,4 +28,18 @@ import type { PipelineOpts, PipelineResult, RuntimePipelineStages } from "./type
|
|
|
26
28
|
* Per-stage timing recorded on `result.timing`. Fast-path stages are absent from the timing map.
|
|
27
29
|
*/
|
|
28
30
|
export declare function runPipeline(raw: string, stages: RuntimePipelineStages, opts?: PipelineOpts): Promise<PipelineResult>;
|
|
31
|
+
/**
|
|
32
|
+
* Post-classification audit: for each phrase-grouper proposal whose span is entirely unlabeled
|
|
33
|
+
* (all-O) in the classifier output, inject a provisional node using the grouper's structural
|
|
34
|
+
* hypothesis. This rescues spans the neural model couldn't type — primarily venue text.
|
|
35
|
+
*
|
|
36
|
+
* When `classifierTopK` is supplied (the joint-reconcile path), the audit defers to the
|
|
37
|
+
* classifier's own verdict for the orphaned span: if the classifier confidently typed it as a
|
|
38
|
+
* DIFFERENT component than the phrase kind, we inject the classifier's tag rather than the
|
|
39
|
+
* structural guess. Without this, a reconciler that leaves a street-prefix word like `Via` orphaned
|
|
40
|
+
* (because it picked the single `Trento` street span) would see the audit promote `Via`'s
|
|
41
|
+
* LOCALITY_PHRASE to a spurious `locality` node — burying the real trailing city. The classifier
|
|
42
|
+
* said `street:0.73` for `Via`; trust it (#425).
|
|
43
|
+
*/
|
|
44
|
+
export declare function grouperAudit(tree: AddressTree, proposals: PhraseProposal[], text: string, classifierTopK?: ClassifierCandidate[]): AddressTree;
|
|
29
45
|
//# sourceMappingURL=runtime-pipeline.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"runtime-pipeline.d.ts","sourceRoot":"","sources":["../../pipeline/runtime-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;
|
|
1
|
+
{"version":3,"file":"runtime-pipeline.d.ts","sourceRoot":"","sources":["../../pipeline/runtime-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,qBAAqB,CAAA;AAEnE,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AAGzD,OAAO,KAAK,EAOX,cAAc,EACd,YAAY,EACZ,cAAc,EAGd,qBAAqB,EACrB,MAAM,YAAY,CAAA;AA4HnB;;;;;;;;;;;;GAYG;AACH,wBAAsB,WAAW,CAChC,GAAG,EAAE,MAAM,EACX,MAAM,EAAE,qBAAqB,EAC7B,IAAI,CAAC,EAAE,YAAY,GACjB,OAAO,CAAC,cAAc,CAAC,CA8JzB;AA2DD;;;;;;;;;;;;GAYG;AACH,wBAAgB,YAAY,CAC3B,IAAI,EAAE,WAAW,EACjB,SAAS,EAAE,cAAc,EAAE,EAC3B,IAAI,EAAE,MAAM,EACZ,cAAc,CAAC,EAAE,mBAAmB,EAAE,GACpC,WAAW,CA2Eb"}
|
|
@@ -190,13 +190,16 @@ export async function runPipeline(raw, stages, opts) {
|
|
|
190
190
|
timing["phrase-grouper"] = performance.now() - tGroup;
|
|
191
191
|
}
|
|
192
192
|
let tree = { raw: normalized.normalized, roots: [] };
|
|
193
|
-
//
|
|
194
|
-
//
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
193
|
+
// Captured from the joint-reconcile path so the grouper-audit can defer to the classifier's
|
|
194
|
+
// per-span verdict on orphaned spans (see the assignment + grouperAudit below).
|
|
195
|
+
let auditClassifierTopK;
|
|
196
|
+
// Joint-reconcile path: DEFAULT as of Route A Phase II (#427). It beats argmax on every measured
|
|
197
|
+
// locale with per-field regression under 0.5% (report: docs/articles/evals/2026-06-07-route-a-
|
|
198
|
+
// phase-ii-regate.md). Set `jointReconcile: false` (or the deprecated `forceJointReconcile: false`)
|
|
199
|
+
// to force the legacy argmax sort. Still requires a phrase grouper + a `parseWithLogits` classifier;
|
|
200
|
+
// without either, the pipeline falls back to argmax regardless of the flag.
|
|
201
|
+
const jointEnabled = opts?.jointReconcile ?? opts?.forceJointReconcile ?? true;
|
|
202
|
+
const useJointReconcile = jointEnabled && phraseProposals.length > 0 && stages.classifier && "parseWithLogits" in stages.classifier;
|
|
200
203
|
if (useJointReconcile) {
|
|
201
204
|
const classifierWithLogits = stages.classifier;
|
|
202
205
|
throwIfAborted(opts);
|
|
@@ -208,7 +211,7 @@ export async function runPipeline(raw, stages, opts) {
|
|
|
208
211
|
// The classifier must expose its label vocabulary so the aggregation can strip BIO prefixes.
|
|
209
212
|
// NeuralAddressClassifier surfaces this as `cfg.labels` — extracted via structural typing here.
|
|
210
213
|
const labels = "labels" in classifierWithLogits ? classifierWithLogits.labels : [];
|
|
211
|
-
const classifierTopK = aggregateSpanLogits(logits, pieces, phraseProposals.map((p) => ({ start: p.span.start, end: p.span.end })), { labels });
|
|
214
|
+
const classifierTopK = aggregateSpanLogits(logits, pieces, phraseProposals.map((p) => ({ start: p.span.start, end: p.span.end })), { labels, text: normalized.normalized });
|
|
212
215
|
if (classifierTopK.length > 0) {
|
|
213
216
|
const result = reconcileSpans({
|
|
214
217
|
raw: normalized.normalized,
|
|
@@ -216,6 +219,12 @@ export async function runPipeline(raw, stages, opts) {
|
|
|
216
219
|
classifierTopK,
|
|
217
220
|
});
|
|
218
221
|
tree = result.tree;
|
|
222
|
+
// The reconciler can leave a span uncovered (e.g. it picked the single-token street
|
|
223
|
+
// `Trento` over `Via Trento`, orphaning `Via`). The grouper-audit below would then promote
|
|
224
|
+
// that orphan's LOCALITY_PHRASE proposal to a `locality` node — even though the classifier
|
|
225
|
+
// confidently typed it `street`. Hand the audit the classifier's per-span verdict so it
|
|
226
|
+
// respects that opinion instead of trusting the structural phrase kind (#425 re-gate).
|
|
227
|
+
auditClassifierTopK = classifierTopK;
|
|
219
228
|
}
|
|
220
229
|
else {
|
|
221
230
|
tree = argmaxTree;
|
|
@@ -230,7 +239,7 @@ export async function runPipeline(raw, stages, opts) {
|
|
|
230
239
|
}
|
|
231
240
|
if (phraseProposals.length > 0 && tree.roots.length >= 0) {
|
|
232
241
|
const tAudit = performance.now();
|
|
233
|
-
tree = grouperAudit(tree, phraseProposals, normalized.normalized);
|
|
242
|
+
tree = grouperAudit(tree, phraseProposals, normalized.normalized, auditClassifierTopK);
|
|
234
243
|
timing["grouper-audit"] = performance.now() - tAudit;
|
|
235
244
|
}
|
|
236
245
|
if (stages.resolver) {
|
|
@@ -298,8 +307,16 @@ const PHRASE_KIND_TO_TAG = new Map([
|
|
|
298
307
|
* Post-classification audit: for each phrase-grouper proposal whose span is entirely unlabeled
|
|
299
308
|
* (all-O) in the classifier output, inject a provisional node using the grouper's structural
|
|
300
309
|
* hypothesis. This rescues spans the neural model couldn't type — primarily venue text.
|
|
310
|
+
*
|
|
311
|
+
* When `classifierTopK` is supplied (the joint-reconcile path), the audit defers to the
|
|
312
|
+
* classifier's own verdict for the orphaned span: if the classifier confidently typed it as a
|
|
313
|
+
* DIFFERENT component than the phrase kind, we inject the classifier's tag rather than the
|
|
314
|
+
* structural guess. Without this, a reconciler that leaves a street-prefix word like `Via` orphaned
|
|
315
|
+
* (because it picked the single `Trento` street span) would see the audit promote `Via`'s
|
|
316
|
+
* LOCALITY_PHRASE to a spurious `locality` node — burying the real trailing city. The classifier
|
|
317
|
+
* said `street:0.73` for `Via`; trust it (#425).
|
|
301
318
|
*/
|
|
302
|
-
function grouperAudit(tree, proposals, text) {
|
|
319
|
+
export function grouperAudit(tree, proposals, text, classifierTopK) {
|
|
303
320
|
if (proposals.length === 0)
|
|
304
321
|
return tree;
|
|
305
322
|
const roots = [...tree.roots];
|
|
@@ -312,15 +329,47 @@ function grouperAudit(tree, proposals, text) {
|
|
|
312
329
|
}
|
|
313
330
|
};
|
|
314
331
|
collectNodes(roots);
|
|
332
|
+
// Index the classifier's single best tag per exact span (start:end) so the audit can defer to it.
|
|
333
|
+
const CLASSIFIER_OVERRIDE_MIN = 0.4;
|
|
334
|
+
const bestTagBySpan = new Map();
|
|
335
|
+
for (const c of classifierTopK ?? []) {
|
|
336
|
+
const k = `${c.span.start}:${c.span.end}`;
|
|
337
|
+
const cur = bestTagBySpan.get(k);
|
|
338
|
+
if (!cur || c.score > cur.score)
|
|
339
|
+
bestTagBySpan.set(k, { tag: c.tag, score: c.score });
|
|
340
|
+
}
|
|
341
|
+
// Tags that may appear AT MOST ONCE per address. On the joint path, the reconciler has already
|
|
342
|
+
// placed the confident locality/region/postcode; a SECOND one injected here is almost always a
|
|
343
|
+
// street-name word the OOD model mistyped ("Via Francesca Nord" → `Francesca`) or an area-line
|
|
344
|
+
// prefix ("LUGAR …" / "URBANIZACION …"). Suppressing the duplicate keeps the real trailing city
|
|
345
|
+
// from being shadowed by an earlier-positioned spurious node in `decodeAsJson` (#425 residual tail).
|
|
346
|
+
const SINGLETON_TAGS = new Set(["locality", "region", "postcode", "country"]);
|
|
347
|
+
const presentSingletons = new Set();
|
|
348
|
+
const collectSingletons = (nodes) => {
|
|
349
|
+
for (const n of nodes) {
|
|
350
|
+
if (SINGLETON_TAGS.has(n.tag))
|
|
351
|
+
presentSingletons.add(n.tag);
|
|
352
|
+
if (n.children)
|
|
353
|
+
collectSingletons(n.children);
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
collectSingletons(roots);
|
|
357
|
+
const dedupeSingletons = classifierTopK !== undefined; // joint path only — argmax stays byte-stable
|
|
315
358
|
for (const proposal of proposals) {
|
|
316
|
-
const
|
|
317
|
-
if (!
|
|
359
|
+
const phraseTag = PHRASE_KIND_TO_TAG.get(proposal.kindHypothesis);
|
|
360
|
+
if (!phraseTag)
|
|
318
361
|
continue;
|
|
319
362
|
const pStart = proposal.span.start;
|
|
320
363
|
const pEnd = pStart + proposal.span.body.length;
|
|
321
364
|
const covered = allNodes.some((node) => node.start < pEnd && pStart < node.end);
|
|
322
365
|
if (covered)
|
|
323
366
|
continue;
|
|
367
|
+
// Defer to the classifier when it confidently typed this exact span as something else.
|
|
368
|
+
const classifierVerdict = bestTagBySpan.get(`${proposal.span.start}:${proposal.span.end}`);
|
|
369
|
+
const tag = classifierVerdict && classifierVerdict.score >= CLASSIFIER_OVERRIDE_MIN ? classifierVerdict.tag : phraseTag;
|
|
370
|
+
// Don't inject a second singleton-tag node when the reconciler already produced one.
|
|
371
|
+
if (dedupeSingletons && SINGLETON_TAGS.has(tag) && presentSingletons.has(tag))
|
|
372
|
+
continue;
|
|
324
373
|
const provisionalNode = {
|
|
325
374
|
tag,
|
|
326
375
|
value: text.slice(pStart, pEnd),
|
|
@@ -332,6 +381,8 @@ function grouperAudit(tree, proposals, text) {
|
|
|
332
381
|
sourceId: `grouper:${proposal.kindHypothesis}`,
|
|
333
382
|
};
|
|
334
383
|
roots.push(provisionalNode);
|
|
384
|
+
if (SINGLETON_TAGS.has(tag))
|
|
385
|
+
presentSingletons.add(tag);
|
|
335
386
|
}
|
|
336
387
|
roots.sort((a, b) => a.start - b.start);
|
|
337
388
|
return { raw: tree.raw, roots };
|