@mailwoman/core 4.10.0 → 4.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -0
- package/data/coarse-placer/meta.json +17 -48
- package/out/decoder/arbitrate-tree.d.ts +45 -0
- package/out/decoder/arbitrate-tree.d.ts.map +1 -0
- package/out/decoder/arbitrate-tree.js +97 -0
- package/out/decoder/arbitrate-tree.js.map +1 -0
- package/out/decoder/build-tree.d.ts +4 -4
- package/out/decoder/build-tree.js +5 -5
- package/out/decoder/build-tree.js.map +1 -1
- package/out/decoder/containment.d.ts +2 -2
- package/out/decoder/containment.js +2 -2
- package/out/decoder/index.d.ts +2 -0
- package/out/decoder/index.d.ts.map +1 -1
- package/out/decoder/index.js +2 -0
- package/out/decoder/index.js.map +1 -1
- package/out/decoder/proposals-to-tree.d.ts +20 -1
- package/out/decoder/proposals-to-tree.d.ts.map +1 -1
- package/out/decoder/proposals-to-tree.js +37 -0
- package/out/decoder/proposals-to-tree.js.map +1 -1
- package/out/decoder/resolve-proposal-overlaps.d.ts +52 -0
- package/out/decoder/resolve-proposal-overlaps.d.ts.map +1 -0
- package/out/decoder/resolve-proposal-overlaps.js +74 -0
- package/out/decoder/resolve-proposal-overlaps.js.map +1 -0
- package/out/decoder/types.d.ts +5 -5
- package/out/decoder/types.js +1 -1
- package/out/kysley/client.d.ts +5 -1
- package/out/kysley/client.d.ts.map +1 -1
- package/out/kysley/client.js +4 -0
- package/out/kysley/client.js.map +1 -1
- package/out/parser/index.d.ts +1 -0
- package/out/parser/index.d.ts.map +1 -1
- package/out/parser/index.js +1 -0
- package/out/parser/index.js.map +1 -1
- package/out/parser/proposal-pipeline.d.ts +16 -3
- package/out/parser/proposal-pipeline.d.ts.map +1 -1
- package/out/parser/proposal-pipeline.js +18 -6
- package/out/parser/proposal-pipeline.js.map +1 -1
- package/out/parser/solution-to-proposals.d.ts +28 -0
- package/out/parser/solution-to-proposals.d.ts.map +1 -0
- package/out/parser/solution-to-proposals.js +44 -0
- package/out/parser/solution-to-proposals.js.map +1 -0
- package/out/pipeline/reconcile.js +1 -1
- package/out/pipeline/runtime-pipeline.d.ts.map +1 -1
- package/out/pipeline/runtime-pipeline.js +32 -4
- package/out/pipeline/runtime-pipeline.js.map +1 -1
- package/out/pipeline/types.d.ts +30 -1
- package/out/pipeline/types.d.ts.map +1 -1
- package/out/policy/defaults.d.ts +11 -6
- package/out/policy/defaults.d.ts.map +1 -1
- package/out/policy/defaults.js +12 -7
- package/out/policy/defaults.js.map +1 -1
- package/out/policy/from-config.d.ts +14 -4
- package/out/policy/from-config.d.ts.map +1 -1
- package/out/policy/from-config.js +16 -5
- package/out/policy/from-config.js.map +1 -1
- package/out/policy/index.d.ts +1 -0
- package/out/policy/index.d.ts.map +1 -1
- package/out/policy/index.js +1 -0
- package/out/policy/index.js.map +1 -1
- package/out/policy/input-shape-router.d.ts +104 -0
- package/out/policy/input-shape-router.d.ts.map +1 -0
- package/out/policy/input-shape-router.js +88 -0
- package/out/policy/input-shape-router.js.map +1 -0
- package/out/policy/registry.d.ts +7 -3
- package/out/policy/registry.d.ts.map +1 -1
- package/out/policy/registry.js +7 -3
- package/out/policy/registry.js.map +1 -1
- package/out/resolver/remote-resolver.d.ts +4 -2
- package/out/resolver/remote-resolver.d.ts.map +1 -1
- package/out/resolver/remote-resolver.js.map +1 -1
- package/out/resolver/resolve.d.ts.map +1 -1
- package/out/resolver/resolve.js +75 -5
- package/out/resolver/resolve.js.map +1 -1
- package/out/resolver/types.d.ts +48 -9
- package/out/resolver/types.d.ts.map +1 -1
- package/out/resolver/types.js +56 -9
- package/out/resolver/types.js.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.d.ts.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.js +2 -0
- package/out/resources/whosonfirst/PlacetypeDataSource.js.map +1 -1
- package/out/utils/repo.d.ts.map +1 -1
- package/out/utils/repo.js +5 -4
- package/out/utils/repo.js.map +1 -1
- package/package.json +1 -1
package/README.md
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# @mailwoman/core
|
|
2
|
+
|
|
3
|
+
**The foundation of the Mailwoman address parser** — types, tokenization,
|
|
4
|
+
classification primitives, solver, decoder, and the staged pipeline coordinator.
|
|
5
|
+
Ships ~9 MB of provenance-tracked reference dictionaries (libpostal, Who's On
|
|
6
|
+
First, chromium-i18n) consumed by the resolver and classifiers.
|
|
7
|
+
|
|
8
|
+
```ts
|
|
9
|
+
import { createRuntimePipeline, AddressParser, ComponentTag, Classification, Span } from "@mailwoman/core"
|
|
10
|
+
|
|
11
|
+
const pipeline = createRuntimePipeline({ locale: "en-US" })
|
|
12
|
+
const result = pipeline.parse("1600 Amphitheatre Parkway, Mountain View, CA 94043")
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## What's inside
|
|
16
|
+
|
|
17
|
+
| Module | Purpose |
|
|
18
|
+
| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------- |
|
|
19
|
+
| **`types/`** | Core type system: `ComponentTag`, `Span`, `Classification`, `ClassificationMap`, `LocaleTag` |
|
|
20
|
+
| **`tokenization/`** | Tokenizer primitives, whitespace/punctuation rules, token classification |
|
|
21
|
+
| **`classification/`** | `Classification` data structure, `ClassificationMap`, span overlap resolution |
|
|
22
|
+
| **`decoder/`** | Span proposal → tree projection, BIO decoding, reconcile/merge strategies, confidence calibration |
|
|
23
|
+
| **`pipeline/`** | `createRuntimePipeline` — the staged pipeline coordinator that wires normalize → query-shape → locale-gate → ... → classifier → decoder |
|
|
24
|
+
| **`solver/`** | Rule-based solver (the v0 rules engine), `Solution`, `Solver` |
|
|
25
|
+
| **`parser/`** | `AddressParser` — high-level parse entry point (consumed by `mailwoman` CLI) |
|
|
26
|
+
| **`resources/`** | ~9 MB of shipped reference data: libpostal dictionaries, WOF place data, chromium-i18n address formats |
|
|
27
|
+
|
|
28
|
+
## Key exports
|
|
29
|
+
|
|
30
|
+
```ts
|
|
31
|
+
// Types
|
|
32
|
+
export type { ComponentTag, Span, Classification, ClassificationMap, LocaleTag }
|
|
33
|
+
|
|
34
|
+
// Pipeline
|
|
35
|
+
export { createRuntimePipeline, type RuntimePipeline, type PipelineOpts }
|
|
36
|
+
|
|
37
|
+
// Classification
|
|
38
|
+
export { Classification, ClassificationMap }
|
|
39
|
+
export { treeToClassification, classificationToTree }
|
|
40
|
+
|
|
41
|
+
// Decoder
|
|
42
|
+
export { decodeBioSpans, viterbiDecode, reconcileSpans }
|
|
43
|
+
export { createCalibrator, type Calibrator } // isotonic confidence calibration
|
|
44
|
+
|
|
45
|
+
// Solver (v0 rules)
|
|
46
|
+
export { Solver, Solution }
|
|
47
|
+
|
|
48
|
+
// Tokenization
|
|
49
|
+
export { tokenize, Token, TokenClass }
|
|
50
|
+
|
|
51
|
+
// Resources
|
|
52
|
+
export { loadDictionary, getAvailableLanguages }
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Pipeline architecture
|
|
56
|
+
|
|
57
|
+
Mailwoman's runtime pipeline is a staged coordinator that chains pure-function
|
|
58
|
+
stages with typed handoffs:
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
normalize → query-shape → locale-gate → kind-classifier → phrase-grouper → classifier → decoder
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Each stage is published as its own `@mailwoman/*` package and wired together by
|
|
65
|
+
the pipeline coordinator in this package. The design ensures every stage is
|
|
66
|
+
independently testable, benchmarkable, and replaceable.
|
|
67
|
+
|
|
68
|
+
## Reference data
|
|
69
|
+
|
|
70
|
+
This package ships immutable, provenance-tracked dictionaries consumed by the
|
|
71
|
+
resolver and rule-based classifiers:
|
|
72
|
+
|
|
73
|
+
- **libpostal** — multilingual street types, place names, directional/ordinal tokens
|
|
74
|
+
- **Who's On First** — place hierarchy and geography
|
|
75
|
+
- **chromium-i18n** — per-country address format templates
|
|
76
|
+
|
|
77
|
+
The dictionaries are ~9 MB total and are loaded lazily.
|
|
78
|
+
|
|
79
|
+
## Related
|
|
80
|
+
|
|
81
|
+
- [`mailwoman`](../mailwoman) — the user-facing CLI + `AddressParser`
|
|
82
|
+
- [`@mailwoman/normalize`](../normalize) — Stage 1 of the pipeline
|
|
83
|
+
- [`@mailwoman/neural`](../neural) — neural classifier (ONNX runtime)
|
|
84
|
+
- [`@mailwoman/classifiers`](../classifiers) — rule-based classifiers
|
|
85
|
+
- [What Mailwoman Is](https://mailwoman.sister.software/articles/concepts/what-mailwoman-is/)
|
|
86
|
+
- [Staged Pipeline Contract](https://mailwoman.sister.software/articles/plan/reference/STAGES/)
|
|
87
|
+
|
|
88
|
+
## License
|
|
89
|
+
|
|
90
|
+
[AGPL-3.0-only](https://www.gnu.org/licenses/agpl-3.0.html)
|
|
@@ -1,49 +1,18 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
-2.534797191619873,
|
|
20
|
-
-2.1968510150909424,
|
|
21
|
-
-1.1691420078277588,
|
|
22
|
-
-0.1516338437795639,
|
|
23
|
-
-1.0911738872528076,
|
|
24
|
-
0.4597792327404022,
|
|
25
|
-
1.1227235794067383,
|
|
26
|
-
0.8241579532623291,
|
|
27
|
-
1.009660005569458,
|
|
28
|
-
0.47969329357147217,
|
|
29
|
-
0.34428685903549194,
|
|
30
|
-
3.046959161758423
|
|
31
|
-
],
|
|
32
|
-
"trainedAt": null,
|
|
33
|
-
"trainRows": 612888,
|
|
34
|
-
"quantization": "int8-per-row",
|
|
35
|
-
"scales": [
|
|
36
|
-
0.01638676988796925,
|
|
37
|
-
0.018822564853457954,
|
|
38
|
-
0.024230125382190614,
|
|
39
|
-
0.0221641082463302,
|
|
40
|
-
0.04191271714338168,
|
|
41
|
-
0.019635750552800698,
|
|
42
|
-
0.022043046050184353,
|
|
43
|
-
0.02781985688397265,
|
|
44
|
-
0.02106240227466493,
|
|
45
|
-
0.022867627031221166,
|
|
46
|
-
0.029368993804210753,
|
|
47
|
-
0.04316849220456101
|
|
48
|
-
]
|
|
49
|
-
}
|
|
2
|
+
"classes": ["US", "FR", "GB", "CN", "NL", "IT", "DE", "JP", "ES", "KR", "TW", "OTHER"],
|
|
3
|
+
"featureDim": 65536,
|
|
4
|
+
"temperature": 1.2,
|
|
5
|
+
"bias": [
|
|
6
|
+
-2.534797191619873, -2.1968510150909424, -1.1691420078277588, -0.1516338437795639, -1.0911738872528076,
|
|
7
|
+
0.4597792327404022, 1.1227235794067383, 0.8241579532623291, 1.009660005569458, 0.47969329357147217,
|
|
8
|
+
0.34428685903549194, 3.046959161758423
|
|
9
|
+
],
|
|
10
|
+
"trainedAt": null,
|
|
11
|
+
"trainRows": 612888,
|
|
12
|
+
"quantization": "int8-per-row",
|
|
13
|
+
"scales": [
|
|
14
|
+
0.01638676988796925, 0.018822564853457954, 0.024230125382190614, 0.0221641082463302, 0.04191271714338168,
|
|
15
|
+
0.019635750552800698, 0.022043046050184353, 0.02781985688397265, 0.02106240227466493, 0.022867627031221166,
|
|
16
|
+
0.029368993804210753, 0.04316849220456101
|
|
17
|
+
]
|
|
18
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Containment-preserving arbitration (#478 inc 3, fix-v1).
|
|
7
|
+
*
|
|
8
|
+
* The first arbitration implementation flattened the neural parse to proposals, unioned the solved
|
|
9
|
+
* v0 proposals, filtered per-component, resolved span overlaps, and rebuilt a FLAT tree. That
|
|
10
|
+
* lost containment two ways (diagnosed in `2026-06-17-478-arbitration-arena-gate.md`): the
|
|
11
|
+
* overlap pass evicted a `street` for the `street_suffix` sitting inside it (street dropped on
|
|
12
|
+
* 42% of rows), and the flat tree lost the region→locality structure the resolver needs
|
|
13
|
+
* (wrong-state namesakes, coord p50 3.3 km → 1069 km).
|
|
14
|
+
*
|
|
15
|
+
* This applies arbitration as **edits on the nested neural argmax tree** — never flattening, never
|
|
16
|
+
* restructuring — so the neural tree's containment is preserved by construction. Used only on the
|
|
17
|
+
* `rule_preferred` route; `neural_preferred` / `abstain` pass the neural tree through untouched.
|
|
18
|
+
*
|
|
19
|
+
* The edits (DeepSeek-coordinated, 2026-06-17):
|
|
20
|
+
*
|
|
21
|
+
* 1. **Relabel** — when a rule proposal covers the EXACT span of a neural node but assigns a different
|
|
22
|
+
* tag, take the rule's tag (the genuine same-span disagreement; rule wins under
|
|
23
|
+
* `rule_preferred`). Structure unchanged — only the node's tag/provenance.
|
|
24
|
+
* 2. **Add missing tags** — a rule proposal whose tag is absent from the neural tree AND whose span
|
|
25
|
+
* doesn't overlap any neural node is added as a new root (a component neural missed
|
|
26
|
+
* entirely).
|
|
27
|
+
*
|
|
28
|
+
* What it deliberately does NOT do: replace a neural node with a differently-spanned rule node,
|
|
29
|
+
* drop neural's sub-component decomposition (`street_suffix`/`street_prefix`), or add an
|
|
30
|
+
* overlapping rule node. So a clean address — where neural and v0 agree on tags+spans and differ
|
|
31
|
+
* only in street decomposition — is a **no-op**. The cost is losing pure-decomposition wins (low
|
|
32
|
+
* value); the gate re-run is the arbiter.
|
|
33
|
+
*/
|
|
34
|
+
import type { ClassificationProposal } from "../types/index.js";
|
|
35
|
+
import type { AddressTree } from "./types.js";
|
|
36
|
+
/**
|
|
37
|
+
* Edit the nested neural argmax tree with the solved v0 (rule) parse under the `rule_preferred`
|
|
38
|
+
* route — relabel same-span tag disagreements toward rule, add rule-only non-overlapping missing
|
|
39
|
+
* tags. Containment-preserving (no flatten, no restructure). Input is not mutated.
|
|
40
|
+
*
|
|
41
|
+
* @param tree The neural argmax `AddressTree`.
|
|
42
|
+
* @param ruleProposals Proposals from the solved v0 parse (`solutionToProposals`).
|
|
43
|
+
*/
|
|
44
|
+
export declare function applyRuleArbitration(tree: AddressTree, ruleProposals: readonly ClassificationProposal[]): AddressTree;
|
|
45
|
+
//# sourceMappingURL=arbitrate-tree.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"arbitrate-tree.d.ts","sourceRoot":"","sources":["../../decoder/arbitrate-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAA;AAC/D,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAU1D;;;;;;;GAOG;AACH,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,WAAW,EAAE,aAAa,EAAE,SAAS,sBAAsB,EAAE,GAAG,WAAW,CAgDrH"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Containment-preserving arbitration (#478 inc 3, fix-v1).
|
|
7
|
+
*
|
|
8
|
+
* The first arbitration implementation flattened the neural parse to proposals, unioned the solved
|
|
9
|
+
* v0 proposals, filtered per-component, resolved span overlaps, and rebuilt a FLAT tree. That
|
|
10
|
+
* lost containment two ways (diagnosed in `2026-06-17-478-arbitration-arena-gate.md`): the
|
|
11
|
+
* overlap pass evicted a `street` for the `street_suffix` sitting inside it (street dropped on
|
|
12
|
+
* 42% of rows), and the flat tree lost the region→locality structure the resolver needs
|
|
13
|
+
* (wrong-state namesakes, coord p50 3.3 km → 1069 km).
|
|
14
|
+
*
|
|
15
|
+
* This applies arbitration as **edits on the nested neural argmax tree** — never flattening, never
|
|
16
|
+
* restructuring — so the neural tree's containment is preserved by construction. Used only on the
|
|
17
|
+
* `rule_preferred` route; `neural_preferred` / `abstain` pass the neural tree through untouched.
|
|
18
|
+
*
|
|
19
|
+
* The edits (DeepSeek-coordinated, 2026-06-17):
|
|
20
|
+
*
|
|
21
|
+
* 1. **Relabel** — when a rule proposal covers the EXACT span of a neural node but assigns a different
|
|
22
|
+
* tag, take the rule's tag (the genuine same-span disagreement; rule wins under
|
|
23
|
+
* `rule_preferred`). Structure unchanged — only the node's tag/provenance.
|
|
24
|
+
* 2. **Add missing tags** — a rule proposal whose tag is absent from the neural tree AND whose span
|
|
25
|
+
* doesn't overlap any neural node is added as a new root (a component neural missed
|
|
26
|
+
* entirely).
|
|
27
|
+
*
|
|
28
|
+
* What it deliberately does NOT do: replace a neural node with a differently-spanned rule node,
|
|
29
|
+
* drop neural's sub-component decomposition (`street_suffix`/`street_prefix`), or add an
|
|
30
|
+
* overlapping rule node. So a clean address — where neural and v0 agree on tags+spans and differ
|
|
31
|
+
* only in street decomposition — is a **no-op**. The cost is losing pure-decomposition wins (low
|
|
32
|
+
* value); the gate re-run is the arbiter.
|
|
33
|
+
*/
|
|
34
|
+
function cloneNode(node) {
|
|
35
|
+
return { ...node, children: node.children.map(cloneNode) };
|
|
36
|
+
}
|
|
37
|
+
function spansOverlap(aStart, aEnd, bStart, bEnd) {
|
|
38
|
+
return aStart < bEnd && bStart < aEnd;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Edit the nested neural argmax tree with the solved v0 (rule) parse under the `rule_preferred`
|
|
42
|
+
* route — relabel same-span tag disagreements toward rule, add rule-only non-overlapping missing
|
|
43
|
+
* tags. Containment-preserving (no flatten, no restructure). Input is not mutated.
|
|
44
|
+
*
|
|
45
|
+
* @param tree The neural argmax `AddressTree`.
|
|
46
|
+
* @param ruleProposals Proposals from the solved v0 parse (`solutionToProposals`).
|
|
47
|
+
*/
|
|
48
|
+
export function applyRuleArbitration(tree, ruleProposals) {
|
|
49
|
+
const roots = tree.roots.map(cloneNode);
|
|
50
|
+
// 1. Relabel: a rule proposal on the EXACT span of a neural node, but a different tag → rule's tag.
|
|
51
|
+
const relabel = (node) => {
|
|
52
|
+
const hit = ruleProposals.find((p) => p.span.start === node.start && p.span.end === node.end && p.component !== node.tag);
|
|
53
|
+
if (hit) {
|
|
54
|
+
node.tag = hit.component;
|
|
55
|
+
node.source = "rule";
|
|
56
|
+
node.confidence = hit.confidence;
|
|
57
|
+
node.sourceId = hit.source_id;
|
|
58
|
+
}
|
|
59
|
+
for (const child of node.children)
|
|
60
|
+
relabel(child);
|
|
61
|
+
};
|
|
62
|
+
for (const root of roots)
|
|
63
|
+
relabel(root);
|
|
64
|
+
// Post-relabel inventory: which tags exist, and every node span (for the overlap guard).
|
|
65
|
+
const neuralTags = new Set();
|
|
66
|
+
const neuralSpans = [];
|
|
67
|
+
const collect = (node) => {
|
|
68
|
+
neuralTags.add(node.tag);
|
|
69
|
+
neuralSpans.push({ start: node.start, end: node.end });
|
|
70
|
+
for (const child of node.children)
|
|
71
|
+
collect(child);
|
|
72
|
+
};
|
|
73
|
+
for (const root of roots)
|
|
74
|
+
collect(root);
|
|
75
|
+
// 2. Add: a rule tag the neural tree lacks entirely, on a span that overlaps no neural node.
|
|
76
|
+
for (const p of ruleProposals) {
|
|
77
|
+
if (neuralTags.has(p.component))
|
|
78
|
+
continue;
|
|
79
|
+
if (neuralSpans.some((s) => spansOverlap(s.start, s.end, p.span.start, p.span.end)))
|
|
80
|
+
continue;
|
|
81
|
+
roots.push({
|
|
82
|
+
tag: p.component,
|
|
83
|
+
value: p.span.body,
|
|
84
|
+
start: p.span.start,
|
|
85
|
+
end: p.span.end,
|
|
86
|
+
confidence: p.confidence,
|
|
87
|
+
children: [],
|
|
88
|
+
source: p.source,
|
|
89
|
+
sourceId: p.source_id,
|
|
90
|
+
});
|
|
91
|
+
neuralTags.add(p.component); // a tag is added at most once
|
|
92
|
+
neuralSpans.push({ start: p.span.start, end: p.span.end });
|
|
93
|
+
}
|
|
94
|
+
roots.sort((a, b) => a.start - b.start);
|
|
95
|
+
return { ...tree, roots };
|
|
96
|
+
}
|
|
97
|
+
//# sourceMappingURL=arbitrate-tree.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"arbitrate-tree.js","sourceRoot":"","sources":["../../decoder/arbitrate-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAKH,SAAS,SAAS,CAAC,IAAiB;IACnC,OAAO,EAAE,GAAG,IAAI,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAA;AAC3D,CAAC;AAED,SAAS,YAAY,CAAC,MAAc,EAAE,IAAY,EAAE,MAAc,EAAE,IAAY;IAC/E,OAAO,MAAM,GAAG,IAAI,IAAI,MAAM,GAAG,IAAI,CAAA;AACtC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,oBAAoB,CAAC,IAAiB,EAAE,aAAgD;IACvG,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;IAEvC,oGAAoG;IACpG,MAAM,OAAO,GAAG,CAAC,IAAiB,EAAQ,EAAE;QAC3C,MAAM,GAAG,GAAG,aAAa,CAAC,IAAI,CAC7B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,SAAS,KAAK,IAAI,CAAC,GAAG,CACzF,CAAA;QACD,IAAI,GAAG,EAAE,CAAC;YACT,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,SAAS,CAAA;YACxB,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;YACpB,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,UAAU,CAAA;YAChC,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,SAAS,CAAA;QAC9B,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,CAAC,KAAK,CAAC,CAAA;IAClD,CAAC,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,KAAK;QAAE,OAAO,CAAC,IAAI,CAAC,CAAA;IAEvC,yFAAyF;IACzF,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAA;IACpC,MAAM,WAAW,GAA0C,EAAE,CAAA;IAC7D,MAAM,OAAO,GAAG,CAAC,IAAiB,EAAQ,EAAE;QAC3C,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACxB,WAAW,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;QACtD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO,CAAC,KAAK,CAAC,CAAA;IAClD,CAAC,CAAA;IACD,KAAK,MAAM,IAAI,IAAI,KAAK;QAAE,OAAO,CAAC,IAAI,CAAC,CAAA;IAEvC,6FAA6F;IAC7F,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;QAC/B,IAAI,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;YAAE,SAAQ;QACzC,IAAI,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,YAAY,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAAE,SAAQ;QAC7F,KAAK,CAAC,IAAI,CAAC;YACV,GAAG,EAAE,CAAC,CAAC,SAAS;YAChB,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI;YAClB,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK;YACnB,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG;YACf,UAAU,EAAE,CAAC,CAAC,UAAU;YACxB,QAAQ,EAAE,EAAE;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,QAAQ,EAAE,CAAC,CAAC,SAAS;SACrB,CAAC,CAAA;QACF,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA,CAAC,8BAA8B;QAC1D,WAAW,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAA;IAC3D,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACvC,OAAO,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,CAAA;AAC1B,CAAC"}
|
|
@@ -17,10 +17,10 @@
|
|
|
17
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
18
18
|
* Spans with no found parent become roots.
|
|
19
19
|
*
|
|
20
|
-
* The "nearest" rule (vs "most-recent-prior") is intentional: it makes the decoder
|
|
21
|
-
* ordering — e.g. "75004 Paris" attaches postcode to locality even though postcode came
|
|
22
|
-
* Source order is still preserved in the `start`/`end` fields, which the XML serializer
|
|
23
|
-
* as attributes.
|
|
20
|
+
* The "nearest" rule (vs "most-recent-prior") is intentional: it makes the decoder independent of
|
|
21
|
+
* source ordering — e.g. "75004 Paris" attaches postcode to locality even though postcode came
|
|
22
|
+
* first. Source order is still preserved in the `start`/`end` fields, which the XML serializer
|
|
23
|
+
* exposes as attributes.
|
|
24
24
|
*/
|
|
25
25
|
import type { Calibrator } from "./calibration.js";
|
|
26
26
|
import type { AddressSystem, AddressTree, DecoderToken } from "./types.js";
|
|
@@ -17,10 +17,10 @@
|
|
|
17
17
|
* highest-priority entry in this span's `PARENT_OF` list. Distance is the tiebreaker only.
|
|
18
18
|
* Spans with no found parent become roots.
|
|
19
19
|
*
|
|
20
|
-
* The "nearest" rule (vs "most-recent-prior") is intentional: it makes the decoder
|
|
21
|
-
* ordering — e.g. "75004 Paris" attaches postcode to locality even though postcode came
|
|
22
|
-
* Source order is still preserved in the `start`/`end` fields, which the XML serializer
|
|
23
|
-
* as attributes.
|
|
20
|
+
* The "nearest" rule (vs "most-recent-prior") is intentional: it makes the decoder independent of
|
|
21
|
+
* source ordering — e.g. "75004 Paris" attaches postcode to locality even though postcode came
|
|
22
|
+
* first. Source order is still preserved in the `start`/`end` fields, which the XML serializer
|
|
23
|
+
* exposes as attributes.
|
|
24
24
|
*/
|
|
25
25
|
import { containmentFor } from "./containment.js";
|
|
26
26
|
function bioParts(label) {
|
|
@@ -88,7 +88,7 @@ function emitSpans(raw, tokens, attribution) {
|
|
|
88
88
|
//
|
|
89
89
|
// Guard: only merge when the text in `raw` between the two spans is whitespace-only. A
|
|
90
90
|
// comma or any other separator keeps them distinct, and an intervening O/different-tag
|
|
91
|
-
// token already nulls/replaces `open` above — so two
|
|
91
|
+
// token already nulls/replaces `open` above — so two separate same-tag spans
|
|
92
92
|
// (e.g. "Springfield, Chicago") are never merged.
|
|
93
93
|
if (prefix === "B" && open !== null && open.tag === tag && /^\s*$/.test(raw.slice(open.end, tok.start))) {
|
|
94
94
|
open.end = tok.end;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAmCjD,SAAS,QAAQ,CAAC,KAAe;IAChC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,CAAA;IACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAc,EAAE,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAiB,EAAE,CAAA;AACjG,CAAC;AAED,mGAAmG;AACnG,iGAAiG;AACjG,+FAA+F;AAC/F,8FAA8F;AAC9F,+FAA+F;AAC/F,oDAAoD;AACpD,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,GAAW;IAC5D,IAAI,CAAC,GAAG,KAAK,CAAA;IACb,IAAI,CAAC,GAAG,GAAG,CAAA;IACX,MAAM,UAAU,GAAG,CAAC,CAAS,EAAW,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;IAC7E,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACnC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACvC,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAA;AAC5B,CAAC;AAED,SAAS,KAAK,CAAC,IAAqB,EAAE,GAAW,EAAE,GAAkB,EAAE,WAA0B;IAChG,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,YAAY,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9D,6FAA6F;IAC7F,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,IAAI,CAAA;IAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAA;IAC3F,MAAM,UAAU,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAA;IAC/F,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;IACxF,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAA;IACtE,IAAI,WAAW,CAAC,QAAQ,KAAK,SAAS;QAAE,IAAI,CAAC,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAA;IAC5E,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,MAAsB,EAAE,WAA0B;IACjF,MAAM,GAAG,GAAkB,EAAE,CAAA;IAC7B,IAAI,IAAI,GAAoB,IAAI,CAAA;IAEhC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QAE3C,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACpB,4FAA4F;YAC5F,sFAAsF;YACtF,uFAAuF;YACvF,0FAA0F;YAC1F,qFAAqF;YACrF,iFAAiF;YACjF,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAQ;YAC1E,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,SAAQ;QACT,CAAC;QAED,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,EAAE,CAAC;YACzD,yFAAyF;YACzF,sFAAsF;YACtF,kFAAkF;YAClF,8EAA8E;YAC9E,EAAE;YACF,uFAAuF;YACvF,uFAAuF;YACvF,
|
|
1
|
+
{"version":3,"file":"build-tree.js","sourceRoot":"","sources":["../../decoder/build-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAA;AAmCjD,SAAS,QAAQ,CAAC,KAAe;IAChC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,CAAA;IACpD,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAc,EAAE,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAiB,EAAE,CAAA;AACjG,CAAC;AAED,mGAAmG;AACnG,iGAAiG;AACjG,+FAA+F;AAC/F,8FAA8F;AAC9F,+FAA+F;AAC/F,oDAAoD;AACpD,SAAS,YAAY,CAAC,GAAW,EAAE,KAAa,EAAE,GAAW;IAC5D,IAAI,CAAC,GAAG,KAAK,CAAA;IACb,IAAI,CAAC,GAAG,GAAG,CAAA;IACX,MAAM,UAAU,GAAG,CAAC,CAAS,EAAW,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;IAC7E,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACnC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC;QAAE,CAAC,EAAE,CAAA;IACvC,OAAO,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAA;AAC5B,CAAC;AAED,SAAS,KAAK,CAAC,IAAqB,EAAE,GAAW,EAAE,GAAkB,EAAE,WAA0B;IAChG,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAA;IACtB,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,YAAY,CAAC,GAAG,EAAE,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,CAAA;IAC9D,6FAA6F;IAC7F,IAAI,KAAK,IAAI,GAAG;QAAE,OAAO,IAAI,CAAA;IAC7B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,CAAA;IAC3F,MAAM,UAAU,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,aAAa,CAAA;IAC/F,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAA;IACxF,IAAI,WAAW,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC,MAAM,CAAA;IACtE,IAAI,WAAW,CAAC,QAAQ,KAAK,SAAS;QAAE,IAAI,CAAC,QAAQ,GAAG,WAAW,CAAC,QAAQ,CAAA;IAC5E,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACd,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,SAAS,CAAC,GAAW,EAAE,MAAsB,EAAE,WAA0B;IACjF,MAAM,GAAG,GAAkB,EAAE,CAAA;IAC7B,IAAI,IAAI,GAAoB,IAAI,CAAA;IAEhC,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;QAC1B,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QAE3C,IAAI,MAAM,KAAK,GAAG,EAAE,CAAC;YACpB,4FAA4F;YAC5F,sFAAsF;YACtF,uFAAuF;YACvF,0FAA0F;YAC1F,qFAAqF;YACrF,iFAAiF;YACjF,IAAI,IAAI,KAAK,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;gBAAE,SAAQ;YAC1E,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,SAAQ;QACT,CAAC;QAED,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,EAAE,CAAC;YACzD,yFAAyF;YACzF,sFAAsF;YACtF,kFAAkF;YAClF,8EAA8E;YAC9E,EAAE;YACF,uFAAuF;YACvF,uFAAuF;YACvF,6EAA6E;YAC7E,kDAAkD;YAClD,IAAI,MAAM,KAAK,GAAG,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,GAAG,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;gBACzG,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;gBAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;gBACrC,SAAQ;YACT,CAAC;YACD,IAAI,GAAG,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;YACzC,IAAI,GAAG,EAAE,GAAG,EAAE,GAAI,EAAE,KAAK,EAAE,GAAG,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,WAAW,EAAE,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAA;YACnF,SAAQ;QACT,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,CAAA;QAClB,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,CAAA;IACtC,CAAC;IAED,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,WAAW,CAAC,CAAA;IAClC,OAAO,GAAG,CAAA;AACX,CAAC;AAED,SAAS,QAAQ,CAAC,CAAc,EAAE,CAAc;IAC/C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK;QAAE,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;IAC5C,OAAO,CAAC,CAAA;AACT,CAAC;AAED,SAAS,UAAU,CAClB,IAAiB,EACjB,GAAkB,EAClB,QAAuD;IAEvD,MAAM,UAAU,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;IAC3C,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,GAAG,KAAK,SAAS,CAAC,CAAA;QACpE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAClC,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,GAAG,QAAQ,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;IAChG,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,SAAS,WAAW,CAAC,KAAoB;IACxC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACvC,KAAK,MAAM,CAAC,IAAI,KAAK;QAAE,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAA;AAC/C,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,gBAAgB,CAAC,GAAW,EAAE,MAAsB,EAAE,OAAsB,EAAE;IAC7F,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,CAAA;IAC1C,MAAM,KAAK,GAAkB,EAAE,CAAA;IAC/B,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAA;QAChD,IAAI,MAAM;YAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;;YACjC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtB,CAAC;IAED,WAAW,CAAC,KAAK,CAAC,CAAA;IAClB,MAAM,IAAI,GAAgB,EAAE,GAAG,EAAE,KAAK,EAAE,CAAA;IACxC,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS;QAAE,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IACxD,OAAO,IAAI,CAAA;AACZ,CAAC"}
|
|
@@ -24,8 +24,8 @@
|
|
|
24
24
|
* The cheap insurance is this indirection: callers ask `containmentFor(system)` rather than
|
|
25
25
|
* importing one global constant. Today every system resolves to `WESTERN_PARENT_OF` (the
|
|
26
26
|
* historical map, behavior-identical), and `PARENT_OF` is kept as an alias so existing imports
|
|
27
|
-
* don't break. When a
|
|
28
|
-
*
|
|
27
|
+
* don't break. When a distinct system map lands (Phase 6 JP), it slots in here with zero
|
|
28
|
+
* call-site churn. See `AddressSystem` in `./types.ts`.
|
|
29
29
|
*/
|
|
30
30
|
import type { ComponentTag } from "../types/component.js";
|
|
31
31
|
import type { AddressSystem } from "./types.js";
|
|
@@ -24,8 +24,8 @@
|
|
|
24
24
|
* The cheap insurance is this indirection: callers ask `containmentFor(system)` rather than
|
|
25
25
|
* importing one global constant. Today every system resolves to `WESTERN_PARENT_OF` (the
|
|
26
26
|
* historical map, behavior-identical), and `PARENT_OF` is kept as an alias so existing imports
|
|
27
|
-
* don't break. When a
|
|
28
|
-
*
|
|
27
|
+
* don't break. When a distinct system map lands (Phase 6 JP), it slots in here with zero
|
|
28
|
+
* call-site churn. See `AddressSystem` in `./types.ts`.
|
|
29
29
|
*/
|
|
30
30
|
/** Preferred-parent ordering for each tag. Empty / missing = always root. */
|
|
31
31
|
export const WESTERN_PARENT_OF = {
|
package/out/decoder/index.d.ts
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
|
+
export * from "./arbitrate-tree.js";
|
|
6
7
|
export * from "./build-tree.js";
|
|
7
8
|
export * from "./calibration.js";
|
|
8
9
|
export * from "./containment.js";
|
|
9
10
|
export * from "./proposals-to-tree.js";
|
|
11
|
+
export * from "./resolve-proposal-overlaps.js";
|
|
10
12
|
export * from "./serialize-json.js";
|
|
11
13
|
export * from "./serialize-tuples.js";
|
|
12
14
|
export * from "./serialize-xml.js";
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,qBAAqB,CAAA;AACnC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,gCAAgC,CAAA;AAC9C,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
package/out/decoder/index.js
CHANGED
|
@@ -3,10 +3,12 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*/
|
|
6
|
+
export * from "./arbitrate-tree.js";
|
|
6
7
|
export * from "./build-tree.js";
|
|
7
8
|
export * from "./calibration.js";
|
|
8
9
|
export * from "./containment.js";
|
|
9
10
|
export * from "./proposals-to-tree.js";
|
|
11
|
+
export * from "./resolve-proposal-overlaps.js";
|
|
10
12
|
export * from "./serialize-json.js";
|
|
11
13
|
export * from "./serialize-tuples.js";
|
|
12
14
|
export * from "./serialize-xml.js";
|
package/out/decoder/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../decoder/index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,qBAAqB,CAAA;AACnC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,kBAAkB,CAAA;AAChC,cAAc,kBAAkB,CAAA;AAChC,cAAc,wBAAwB,CAAA;AACtC,cAAc,gCAAgC,CAAA;AAC9C,cAAc,qBAAqB,CAAA;AACnC,cAAc,uBAAuB,CAAA;AACrC,cAAc,oBAAoB,CAAA;AAClC,cAAc,YAAY,CAAA;AAC1B,cAAc,oBAAoB,CAAA"}
|
|
@@ -13,7 +13,26 @@
|
|
|
13
13
|
* For consumers that need containment back, re-tokenize the input and run the full decoder
|
|
14
14
|
* pipeline.
|
|
15
15
|
*/
|
|
16
|
-
import type { ClassificationProposal } from "../types/index.js";
|
|
16
|
+
import type { ClassificationProposal, ClassificationProposalSource, ComponentTag } from "../types/index.js";
|
|
17
17
|
import type { AddressTree } from "./types.js";
|
|
18
18
|
export declare function proposalsToTree(raw: string, proposals: readonly ClassificationProposal[]): AddressTree;
|
|
19
|
+
/**
|
|
20
|
+
* The inverse of {@link proposalsToTree}: walk an `AddressTree` into a flat list of
|
|
21
|
+
* `ClassificationProposal`s (one per node, depth-first), tagged with the given `source` (#478
|
|
22
|
+
* increment 3). Used to bring the whole-text neural parse into the arbitration layer's proposal
|
|
23
|
+
* currency so it can be unioned with rule proposals and filtered by the policy registry.
|
|
24
|
+
*
|
|
25
|
+
* The spans are structural (`{ start, end, body }`) — we intentionally avoid `Span.from(...)`
|
|
26
|
+
* (which forces the tokenization module's filesystem-bound init); downstream proposal consumers
|
|
27
|
+
* read only `start` / `end` / `body`. Same convention as the neural proposal-classifier adapter.
|
|
28
|
+
*
|
|
29
|
+
* @param tree The parsed tree (e.g. the neural argmax tree).
|
|
30
|
+
* @param source Provenance stamped on every emitted proposal (`"neural"` here).
|
|
31
|
+
* @param opts.sourceId Optional stable id surfaced as `source_id`.
|
|
32
|
+
* @param opts.emits Optional tag allow-list; when set, only nodes with these tags are emitted.
|
|
33
|
+
*/
|
|
34
|
+
export declare function treeToProposals(tree: AddressTree, source: ClassificationProposalSource, opts?: {
|
|
35
|
+
sourceId?: string;
|
|
36
|
+
emits?: ReadonlySet<ComponentTag>;
|
|
37
|
+
}): ClassificationProposal[];
|
|
19
38
|
//# sourceMappingURL=proposals-to-tree.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proposals-to-tree.d.ts","sourceRoot":"","sources":["../../decoder/proposals-to-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;
|
|
1
|
+
{"version":3,"file":"proposals-to-tree.d.ts","sourceRoot":"","sources":["../../decoder/proposals-to-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAGH,OAAO,KAAK,EAAE,sBAAsB,EAAE,4BAA4B,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAA;AAC3G,OAAO,KAAK,EAAe,WAAW,EAAE,MAAM,YAAY,CAAA;AAE1D,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,sBAAsB,EAAE,GAAG,WAAW,CAatG;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,eAAe,CAC9B,IAAI,EAAE,WAAW,EACjB,MAAM,EAAE,4BAA4B,EACpC,IAAI,GAAE;IAAE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAA;CAAO,GACjE,sBAAsB,EAAE,CAqB1B"}
|
|
@@ -27,4 +27,41 @@ export function proposalsToTree(raw, proposals) {
|
|
|
27
27
|
roots.sort((a, b) => a.start - b.start);
|
|
28
28
|
return { raw, roots };
|
|
29
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* The inverse of {@link proposalsToTree}: walk an `AddressTree` into a flat list of
|
|
32
|
+
* `ClassificationProposal`s (one per node, depth-first), tagged with the given `source` (#478
|
|
33
|
+
* increment 3). Used to bring the whole-text neural parse into the arbitration layer's proposal
|
|
34
|
+
* currency so it can be unioned with rule proposals and filtered by the policy registry.
|
|
35
|
+
*
|
|
36
|
+
* The spans are structural (`{ start, end, body }`) — we intentionally avoid `Span.from(...)`
|
|
37
|
+
* (which forces the tokenization module's filesystem-bound init); downstream proposal consumers
|
|
38
|
+
* read only `start` / `end` / `body`. Same convention as the neural proposal-classifier adapter.
|
|
39
|
+
*
|
|
40
|
+
* @param tree The parsed tree (e.g. the neural argmax tree).
|
|
41
|
+
* @param source Provenance stamped on every emitted proposal (`"neural"` here).
|
|
42
|
+
* @param opts.sourceId Optional stable id surfaced as `source_id`.
|
|
43
|
+
* @param opts.emits Optional tag allow-list; when set, only nodes with these tags are emitted.
|
|
44
|
+
*/
|
|
45
|
+
export function treeToProposals(tree, source, opts = {}) {
|
|
46
|
+
const proposals = [];
|
|
47
|
+
const { sourceId, emits } = opts;
|
|
48
|
+
const visit = (node) => {
|
|
49
|
+
if (!emits || emits.has(node.tag)) {
|
|
50
|
+
const span = { start: node.start, end: node.end, body: node.value };
|
|
51
|
+
proposals.push({
|
|
52
|
+
span,
|
|
53
|
+
component: node.tag,
|
|
54
|
+
confidence: node.confidence,
|
|
55
|
+
source,
|
|
56
|
+
source_id: sourceId ?? node.sourceId ?? source,
|
|
57
|
+
penalty: 0,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
for (const child of node.children)
|
|
61
|
+
visit(child);
|
|
62
|
+
};
|
|
63
|
+
for (const root of tree.roots)
|
|
64
|
+
visit(root);
|
|
65
|
+
return proposals;
|
|
66
|
+
}
|
|
30
67
|
//# sourceMappingURL=proposals-to-tree.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"proposals-to-tree.js","sourceRoot":"","sources":["../../decoder/proposals-to-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;
|
|
1
|
+
{"version":3,"file":"proposals-to-tree.js","sourceRoot":"","sources":["../../decoder/proposals-to-tree.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAMH,MAAM,UAAU,eAAe,CAAC,GAAW,EAAE,SAA4C;IACxF,MAAM,KAAK,GAAkB,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAClD,GAAG,EAAE,CAAC,CAAC,SAAyB;QAChC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,IAAI;QAClB,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK;QACnB,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG;QACf,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,QAAQ,EAAE,EAAE;QACZ,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,SAAS;KACrB,CAAC,CAAC,CAAA;IACH,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IACvC,OAAO,EAAE,GAAG,EAAE,KAAK,EAAE,CAAA;AACtB,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,eAAe,CAC9B,IAAiB,EACjB,MAAoC,EACpC,OAAiE,EAAE;IAEnE,MAAM,SAAS,GAA6B,EAAE,CAAA;IAC9C,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,IAAI,CAAA;IAEhC,MAAM,KAAK,GAAG,CAAC,IAAiB,EAAQ,EAAE;QACzC,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,IAAI,GAAG,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,IAAI,EAAE,IAAI,CAAC,KAAK,EAAqB,CAAA;YACtF,SAAS,CAAC,IAAI,CAAC;gBACd,IAAI;gBACJ,SAAS,EAAE,IAAI,CAAC,GAAG;gBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,MAAM;gBACN,SAAS,EAAE,QAAQ,IAAI,IAAI,CAAC,QAAQ,IAAI,MAAM;gBAC9C,OAAO,EAAE,CAAC;aACV,CAAC,CAAA;QACH,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;YAAE,KAAK,CAAC,KAAK,CAAC,CAAA;IAChD,CAAC,CAAA;IAED,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA;IAC1C,OAAO,SAAS,CAAA;AACjB,CAAC"}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Coherence pass for arbitrated proposals (#478 increment 3).
|
|
7
|
+
*
|
|
8
|
+
* The arbitration layer unions proposals from multiple sources (whole-text `neural`, per-section
|
|
9
|
+
* `rule`) and filters them per-component via the policy registry. That per-_tag_ filter is blind
|
|
10
|
+
* to cross-_tag_ span overlap: a `neural` street span `[0,11]` ("350 5th Ave") and a `rule`
|
|
11
|
+
* house_number `[0,3]` ("350") can both survive — different tags, overlapping spans. Fed straight
|
|
12
|
+
* into {@link proposalsToTree} (which emits one flat root node per proposal, no overlap handling)
|
|
13
|
+
* that yields an incoherent tree with overlapping nodes, which degrades or breaks the resolver.
|
|
14
|
+
*
|
|
15
|
+
* This pass guarantees the invariant {@link proposalsToTree} needs: **no two surviving proposals
|
|
16
|
+
* have overlapping spans.** It is a greedy interval selection — accept proposals in priority
|
|
17
|
+
* order, skip any that overlap an already-accepted span.
|
|
18
|
+
*
|
|
19
|
+
* ## The selection policy (the gate-tunable lever)
|
|
20
|
+
*
|
|
21
|
+
* Priority is **confidence desc, then shorter span first, then earlier start**:
|
|
22
|
+
*
|
|
23
|
+
* - _Confidence primary_ respects the arbitration that already happened — a source the registry kept
|
|
24
|
+
* at high confidence wins its span.
|
|
25
|
+
* - _Shorter-span-first on ties_ preserves finer decompositions: given equal-confidence
|
|
26
|
+
* `street[0,11]` vs `{house_number[0,3], street[4,11]}`, the two finer spans are accepted and
|
|
27
|
+
* the coarse subsuming span is dropped — keeping the street+house_number precondition intact
|
|
28
|
+
* (the thing #566 broke). The neural argmax path labels per-token, so it normally emits the
|
|
29
|
+
* finer decomposition itself; this tiebreak is the safety net when a coarse rule span
|
|
30
|
+
* competes.
|
|
31
|
+
*
|
|
32
|
+
* This policy is deliberately simple and deterministic. It is the lever the inc-3 assembled gate
|
|
33
|
+
* validates: if it drops too many house numbers (precondition regression) the comparator is where
|
|
34
|
+
* to look. (An alternative — earliest-end-first maximal-tiling, ignoring confidence — maximizes
|
|
35
|
+
* the _count_ of non-overlapping spans but can let a spurious tiny span evict a correct large
|
|
36
|
+
* one; confidence-primary guards against that.)
|
|
37
|
+
*
|
|
38
|
+
* Pure module: reads only `span.{start,end}` + `confidence`. Safe to import anywhere.
|
|
39
|
+
*/
|
|
40
|
+
import type { ClassificationProposal } from "../types/index.js";
|
|
41
|
+
/**
|
|
42
|
+
* Reduce a set of (possibly overlapping) arbitrated proposals to a coherent, non-overlapping set
|
|
43
|
+
* via greedy interval selection. The output is sorted by span start (the order
|
|
44
|
+
* {@link proposalsToTree} expects). Input is not mutated.
|
|
45
|
+
*
|
|
46
|
+
* @param proposals Arbitrated proposals (post policy-registry filter), any source, possibly
|
|
47
|
+
* overlapping.
|
|
48
|
+
*
|
|
49
|
+
* @returns A subset with no two spans overlapping, in span-start order.
|
|
50
|
+
*/
|
|
51
|
+
export declare function resolveProposalOverlaps(proposals: readonly ClassificationProposal[]): ClassificationProposal[];
|
|
52
|
+
//# sourceMappingURL=resolve-proposal-overlaps.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"resolve-proposal-overlaps.d.ts","sourceRoot":"","sources":["../../decoder/resolve-proposal-overlaps.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AAEH,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,mBAAmB,CAAA;AAO/D;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,SAAS,sBAAsB,EAAE,GAAG,sBAAsB,EAAE,CAmB9G"}
|