@mailwoman/core 2.1.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/api/APIClient.d.ts +57 -0
- package/out/api/APIClient.d.ts.map +1 -0
- package/out/api/APIClient.js +108 -0
- package/out/api/APIClient.js.map +1 -0
- package/out/api/headless.d.ts +17 -0
- package/out/api/headless.d.ts.map +1 -0
- package/out/api/headless.js +18 -0
- package/out/api/headless.js.map +1 -0
- package/out/api/index.d.ts +11 -0
- package/out/api/index.d.ts.map +1 -0
- package/out/api/index.js +11 -0
- package/out/api/index.js.map +1 -0
- package/out/api/responses.d.ts +48 -0
- package/out/api/responses.d.ts.map +1 -0
- package/out/api/responses.js +68 -0
- package/out/api/responses.js.map +1 -0
- package/out/collections.d.ts +66 -0
- package/out/collections.d.ts.map +1 -0
- package/out/collections.js +97 -0
- package/out/collections.js.map +1 -0
- package/out/db/schema.d.ts +21 -0
- package/out/db/schema.d.ts.map +1 -0
- package/out/db/schema.js +16 -0
- package/out/db/schema.js.map +1 -0
- package/out/decoder/build-tree.d.ts +14 -1
- package/out/decoder/build-tree.d.ts.map +1 -1
- package/out/decoder/build-tree.js +37 -9
- package/out/decoder/build-tree.js.map +1 -1
- package/out/decoder/proposals-to-tree.d.ts.map +1 -1
- package/out/decoder/proposals-to-tree.js +2 -0
- package/out/decoder/proposals-to-tree.js.map +1 -1
- package/out/decoder/serialize-xml.d.ts +22 -1
- package/out/decoder/serialize-xml.d.ts.map +1 -1
- package/out/decoder/serialize-xml.js +64 -4
- package/out/decoder/serialize-xml.js.map +1 -1
- package/out/decoder/types.d.ts +45 -0
- package/out/decoder/types.d.ts.map +1 -1
- package/out/decoder/types.js +6 -0
- package/out/decoder/types.js.map +1 -1
- package/out/errors/index.d.ts +9 -0
- package/out/errors/index.d.ts.map +1 -0
- package/out/errors/index.js +9 -0
- package/out/errors/index.js.map +1 -0
- package/out/errors/schema.d.ts +69 -0
- package/out/errors/schema.d.ts.map +1 -0
- package/out/errors/schema.js +102 -0
- package/out/errors/schema.js.map +1 -0
- package/out/identifiers.d.ts +18 -0
- package/out/identifiers.d.ts.map +1 -0
- package/out/identifiers.js +49 -0
- package/out/identifiers.js.map +1 -0
- package/out/index.d.ts +3 -0
- package/out/index.d.ts.map +1 -1
- package/out/index.js +3 -4
- package/out/index.js.map +1 -1
- package/out/kysley/adapter.d.ts +13 -0
- package/out/kysley/adapter.d.ts.map +1 -0
- package/out/kysley/adapter.js +25 -0
- package/out/kysley/adapter.js.map +1 -0
- package/out/kysley/client.d.ts +16 -0
- package/out/kysley/client.d.ts.map +1 -0
- package/out/kysley/client.js +22 -0
- package/out/kysley/client.js.map +1 -0
- package/out/kysley/dialect-config.d.ts +27 -0
- package/out/kysley/dialect-config.d.ts.map +1 -0
- package/out/kysley/dialect-config.js +7 -0
- package/out/kysley/dialect-config.js.map +1 -0
- package/out/kysley/dialect.d.ts +39 -0
- package/out/kysley/dialect.d.ts.map +1 -0
- package/out/kysley/dialect.js +49 -0
- package/out/kysley/dialect.js.map +1 -0
- package/out/kysley/driver.d.ts +22 -0
- package/out/kysley/driver.d.ts.map +1 -0
- package/out/kysley/driver.js +114 -0
- package/out/kysley/driver.js.map +1 -0
- package/out/lifecycle/ServiceSymbol.d.ts +59 -0
- package/out/lifecycle/ServiceSymbol.d.ts.map +1 -0
- package/out/lifecycle/ServiceSymbol.js +62 -0
- package/out/lifecycle/ServiceSymbol.js.map +1 -0
- package/out/lifecycle/index.d.ts +11 -0
- package/out/lifecycle/index.d.ts.map +1 -0
- package/out/lifecycle/index.js +11 -0
- package/out/lifecycle/index.js.map +1 -0
- package/out/lifecycle/lru-cache.d.ts +22 -0
- package/out/lifecycle/lru-cache.d.ts.map +1 -0
- package/out/lifecycle/lru-cache.js +31 -0
- package/out/lifecycle/lru-cache.js.map +1 -0
- package/out/lifecycle/services.d.ts +145 -0
- package/out/lifecycle/services.d.ts.map +1 -0
- package/out/lifecycle/services.js +190 -0
- package/out/lifecycle/services.js.map +1 -0
- package/out/logging/index.d.ts +7 -0
- package/out/logging/index.d.ts.map +1 -0
- package/out/logging/index.js +7 -0
- package/out/logging/index.js.map +1 -0
- package/out/logging/shared.d.ts +60 -0
- package/out/logging/shared.d.ts.map +1 -0
- package/out/logging/shared.js +100 -0
- package/out/logging/shared.js.map +1 -0
- package/out/logging/tables.d.ts +7 -0
- package/out/logging/tables.d.ts.map +1 -0
- package/out/logging/tables.js +75 -0
- package/out/logging/tables.js.map +1 -0
- package/out/objects.d.ts +96 -0
- package/out/objects.d.ts.map +1 -0
- package/out/objects.js +96 -0
- package/out/objects.js.map +1 -0
- package/out/parser/proposal-pipeline.d.ts.map +1 -1
- package/out/parser/proposal-pipeline.js +0 -1
- package/out/parser/proposal-pipeline.js.map +1 -1
- package/out/pipeline/index.d.ts +14 -0
- package/out/pipeline/index.d.ts.map +1 -0
- package/out/pipeline/index.js +11 -0
- package/out/pipeline/index.js.map +1 -0
- package/out/pipeline/reconcile.d.ts +135 -0
- package/out/pipeline/reconcile.d.ts.map +1 -0
- package/out/pipeline/reconcile.js +355 -0
- package/out/pipeline/reconcile.js.map +1 -0
- package/out/pipeline/runtime-pipeline.d.ts +29 -0
- package/out/pipeline/runtime-pipeline.d.ts.map +1 -0
- package/out/pipeline/runtime-pipeline.js +339 -0
- package/out/pipeline/runtime-pipeline.js.map +1 -0
- package/out/pipeline/span-logit-aggregation.d.ts +57 -0
- package/out/pipeline/span-logit-aggregation.d.ts.map +1 -0
- package/out/pipeline/span-logit-aggregation.js +105 -0
- package/out/pipeline/span-logit-aggregation.js.map +1 -0
- package/out/pipeline/types.d.ts +215 -0
- package/out/pipeline/types.d.ts.map +1 -0
- package/out/pipeline/types.js +16 -0
- package/out/pipeline/types.js.map +1 -0
- package/out/resolver/index.d.ts +9 -0
- package/out/resolver/index.d.ts.map +1 -0
- package/out/resolver/index.js +8 -0
- package/out/resolver/index.js.map +1 -0
- package/out/resolver/resolve.d.ts +21 -0
- package/out/resolver/resolve.d.ts.map +1 -0
- package/out/resolver/resolve.js +118 -0
- package/out/resolver/resolve.js.map +1 -0
- package/out/resolver/types.d.ts +118 -0
- package/out/resolver/types.d.ts.map +1 -0
- package/out/resolver/types.js +26 -0
- package/out/resolver/types.js.map +1 -0
- package/out/resources/git.d.ts +1 -1
- package/out/resources/index.d.ts +0 -1
- package/out/resources/index.d.ts.map +1 -1
- package/out/resources/index.js +0 -1
- package/out/resources/index.js.map +1 -1
- package/out/resources/whosonfirst/DataSourceCache.d.ts +0 -1
- package/out/resources/whosonfirst/DataSourceCache.d.ts.map +1 -1
- package/out/resources/whosonfirst/DataSourceCache.js +0 -1
- package/out/resources/whosonfirst/DataSourceCache.js.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.d.ts +2 -2
- package/out/resources/whosonfirst/PlacetypeDataSource.d.ts.map +1 -1
- package/out/resources/whosonfirst/PlacetypeDataSource.js +9 -6
- package/out/resources/whosonfirst/PlacetypeDataSource.js.map +1 -1
- package/out/resources/whosonfirst/placetypes/admin.d.ts +23 -1
- package/out/resources/whosonfirst/placetypes/admin.d.ts.map +1 -1
- package/out/resources/whosonfirst/placetypes/admin.js +14 -1
- package/out/resources/whosonfirst/placetypes/admin.js.map +1 -1
- package/out/routing/index.d.ts +67 -0
- package/out/routing/index.d.ts.map +1 -0
- package/out/routing/index.js +114 -0
- package/out/routing/index.js.map +1 -0
- package/out/sets.d.ts +2 -0
- package/out/sets.d.ts.map +1 -0
- package/out/sets.js +2 -0
- package/out/sets.js.map +1 -0
- package/package.json +28 -2
- package/out/resources/db/index.d.ts +0 -57
- package/out/resources/db/index.d.ts.map +0 -1
- package/out/resources/db/index.js +0 -57
- package/out/resources/db/index.js.map +0 -1
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* `runPipeline` — the runtime coordinator that composes all six stages.
|
|
7
|
+
*
|
|
8
|
+
* Generic over stage implementations (see `types.ts::RuntimePipelineStages`). Each stage is
|
|
9
|
+
* injected; the coordinator handles composition, timing, fast-path routing, and graceful
|
|
10
|
+
* degradation when stages are absent.
|
|
11
|
+
*
|
|
12
|
+
* Implementation contract per `docs/articles/plan/reference/STAGES.md`.
|
|
13
|
+
*/
|
|
14
|
+
import { reconcileSpans } from "./reconcile.js";
|
|
15
|
+
import { aggregateSpanLogits } from "./span-logit-aggregation.js";
|
|
16
|
+
/**
|
|
17
|
+
* Known QueryShape format strings that indicate "this token is a postcode". Mirrors the set in
|
|
18
|
+
*
|
|
19
|
+
* @mailwoman/kind-classifier — kept duplicated so core/pipeline has no dep on kind-classifier.
|
|
20
|
+
*/
|
|
21
|
+
const POSTCODE_FORMATS = new Set([
|
|
22
|
+
"us_zip",
|
|
23
|
+
"us_zip4",
|
|
24
|
+
"uk_postcode",
|
|
25
|
+
"fr_postcode",
|
|
26
|
+
"de_postcode",
|
|
27
|
+
"ca_postcode",
|
|
28
|
+
"jp_postcode",
|
|
29
|
+
]);
|
|
30
|
+
function isPostcodeFormat(format) {
|
|
31
|
+
return POSTCODE_FORMATS.has(format);
|
|
32
|
+
}
|
|
33
|
+
function isPostcodeFormatHit(hit) {
|
|
34
|
+
return isPostcodeFormat(hit.format);
|
|
35
|
+
}
|
|
36
|
+
/** Pass-through normalize used when no `normalize` stage is wired. */
|
|
37
|
+
function identityNormalize(raw, opts) {
|
|
38
|
+
return { raw, normalized: raw, appliedLocale: opts?.locale };
|
|
39
|
+
}
|
|
40
|
+
/** No-op query-shape used when no `computeQueryShape` stage is wired. */
|
|
41
|
+
function emptyQueryShape() {
|
|
42
|
+
return { knownFormats: [] };
|
|
43
|
+
}
|
|
44
|
+
/** Default locale detector: trusts the caller's hint, or falls back to `und`. */
|
|
45
|
+
async function defaultDetectLocale(_input, _shape, opts) {
|
|
46
|
+
const locale = opts?.hint ?? "und";
|
|
47
|
+
return {
|
|
48
|
+
locale,
|
|
49
|
+
confidence: opts?.hint ? 1.0 : 0.0,
|
|
50
|
+
alternatives: [],
|
|
51
|
+
source: opts?.hint ? "caller" : "detected",
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
/** Default kind classifier: always returns `structured_address` with low confidence (no fast-path). */
|
|
55
|
+
async function defaultClassifyKind(_input, _shape, _locale) {
|
|
56
|
+
return {
|
|
57
|
+
kind: "structured_address",
|
|
58
|
+
confidence: 0.0,
|
|
59
|
+
alternatives: [],
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Decide whether to short-circuit stages 3-5 and go straight to resolve. Conservative: requires
|
|
64
|
+
* high kind-classifier confidence AND a matching QueryShape known-format hit. See
|
|
65
|
+
* `STAGES.md#fast-path-routing` for the rationale.
|
|
66
|
+
*/
|
|
67
|
+
function canShortCircuit(kind, shape, opts) {
|
|
68
|
+
if (opts?.forceFullPipeline)
|
|
69
|
+
return false;
|
|
70
|
+
if (kind.confidence < 0.95)
|
|
71
|
+
return false;
|
|
72
|
+
if (kind.kind === "postcode_only") {
|
|
73
|
+
return shape.knownFormats.some(isPostcodeFormatHit);
|
|
74
|
+
}
|
|
75
|
+
if (kind.kind === "locality_only") {
|
|
76
|
+
return (shape.totalLength ?? Infinity) <= 30 && shape.characterClass === "alpha";
|
|
77
|
+
}
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Build a stub `AddressTree` for the fast-path case (no classifier ran). Single root node tagged by
|
|
82
|
+
* the QueryShape's known-format hit.
|
|
83
|
+
*/
|
|
84
|
+
function buildFastPathTree(text, kind, shape) {
|
|
85
|
+
if (kind.kind === "postcode_only") {
|
|
86
|
+
const hit = shape.knownFormats.find((f) => isPostcodeFormat(f.format));
|
|
87
|
+
if (hit) {
|
|
88
|
+
return {
|
|
89
|
+
raw: text,
|
|
90
|
+
roots: [
|
|
91
|
+
{
|
|
92
|
+
tag: "postcode",
|
|
93
|
+
value: text.slice(hit.span.start, hit.span.end),
|
|
94
|
+
start: hit.span.start,
|
|
95
|
+
end: hit.span.end,
|
|
96
|
+
confidence: hit.confidence,
|
|
97
|
+
children: [],
|
|
98
|
+
source: "query-shape",
|
|
99
|
+
sourceId: hit.format,
|
|
100
|
+
},
|
|
101
|
+
],
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
if (kind.kind === "locality_only") {
|
|
106
|
+
return {
|
|
107
|
+
raw: text,
|
|
108
|
+
roots: [
|
|
109
|
+
{
|
|
110
|
+
tag: "locality",
|
|
111
|
+
value: text.trim(),
|
|
112
|
+
start: 0,
|
|
113
|
+
end: text.length,
|
|
114
|
+
confidence: kind.confidence,
|
|
115
|
+
children: [],
|
|
116
|
+
source: "query-shape",
|
|
117
|
+
sourceId: "kind:locality_only",
|
|
118
|
+
},
|
|
119
|
+
],
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return { raw: text, roots: [] };
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Run the runtime pipeline.
|
|
126
|
+
*
|
|
127
|
+
* Composition order (per STAGES.md):
|
|
128
|
+
*
|
|
129
|
+
* 1. Normalize (or identity)
|
|
130
|
+
* 2. Compute QueryShape (or empty)
|
|
131
|
+
* 3. Locale gate (or caller-trust)
|
|
132
|
+
* 4. Kind classifier (or default structured_address)
|
|
133
|
+
* 5. Branch: fast-path → resolver; full → classifier → resolver
|
|
134
|
+
*
|
|
135
|
+
* Per-stage timing recorded on `result.timing`. Fast-path stages are absent from the timing map.
|
|
136
|
+
*/
|
|
137
|
+
export async function runPipeline(raw, stages, opts) {
|
|
138
|
+
const timing = {};
|
|
139
|
+
const t0 = performance.now();
|
|
140
|
+
const normalize = stages.normalize ?? identityNormalize;
|
|
141
|
+
const computeQueryShape = stages.computeQueryShape ?? emptyQueryShape;
|
|
142
|
+
const detectLocale = stages.detectLocale ?? defaultDetectLocale;
|
|
143
|
+
const classifyKind = stages.classifyKind ?? defaultClassifyKind;
|
|
144
|
+
throwIfAborted(opts);
|
|
145
|
+
const normalized = normalize(raw, { locale: opts?.locale });
|
|
146
|
+
timing["normalize"] = performance.now() - t0;
|
|
147
|
+
throwIfAborted(opts);
|
|
148
|
+
const tQs = performance.now();
|
|
149
|
+
const queryShape = computeQueryShape(normalized, { locale: opts?.locale });
|
|
150
|
+
timing["query-shape"] = performance.now() - tQs;
|
|
151
|
+
throwIfAborted(opts);
|
|
152
|
+
const tLocale = performance.now();
|
|
153
|
+
const locale = await detectLocale(normalized, queryShape, { hint: opts?.locale });
|
|
154
|
+
timing["locale-gate"] = performance.now() - tLocale;
|
|
155
|
+
throwIfAborted(opts);
|
|
156
|
+
const tKind = performance.now();
|
|
157
|
+
const kind = await classifyKind(normalized, queryShape, locale);
|
|
158
|
+
timing["kind-classifier"] = performance.now() - tKind;
|
|
159
|
+
// Fast-path: trivial inputs short-circuit stages 3-5. The fast-path tree is built from
|
|
160
|
+
// QueryShape's format hits + kind alone — useful even without a wired resolver (a consumer
|
|
161
|
+
// who just wants the parsed structure for a bare postcode shouldn't be forced to pay for the
|
|
162
|
+
// classifier).
|
|
163
|
+
if (canShortCircuit(kind, queryShape, opts)) {
|
|
164
|
+
let tree = buildFastPathTree(normalized.normalized, kind, queryShape);
|
|
165
|
+
if (stages.resolver) {
|
|
166
|
+
throwIfAborted(opts);
|
|
167
|
+
const tResolve = performance.now();
|
|
168
|
+
tree = await safeResolve(stages.resolver, tree, opts);
|
|
169
|
+
timing["resolve"] = performance.now() - tResolve;
|
|
170
|
+
}
|
|
171
|
+
return {
|
|
172
|
+
input: raw,
|
|
173
|
+
normalized,
|
|
174
|
+
queryShape,
|
|
175
|
+
locale,
|
|
176
|
+
kind,
|
|
177
|
+
phraseProposals: [],
|
|
178
|
+
tree,
|
|
179
|
+
timing,
|
|
180
|
+
path: "fast-path",
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
// Full pipeline.
|
|
184
|
+
// Stage 2.7 — phrase grouper. Optional injection; runs when wired. Proposals flow forward to
|
|
185
|
+
// stages 3 + 5 (today: surfaced on the result; tomorrow: passed in as classifier conditioning).
|
|
186
|
+
let phraseProposals = [];
|
|
187
|
+
if (stages.groupPhrases) {
|
|
188
|
+
throwIfAborted(opts);
|
|
189
|
+
const tGroup = performance.now();
|
|
190
|
+
phraseProposals = await safeGroupPhrases(stages.groupPhrases, normalized, queryShape, locale);
|
|
191
|
+
timing["phrase-grouper"] = performance.now() - tGroup;
|
|
192
|
+
}
|
|
193
|
+
let tree = { raw: normalized.normalized, roots: [] };
|
|
194
|
+
// Joint-reconcile path: opt-in until phrase-grouper proposal quality supports it as default.
|
|
195
|
+
// The reconciler produces single-token spans when phrase proposals don't cover multi-word
|
|
196
|
+
// streets/localities — needs grouper improvements before becoming the default path.
|
|
197
|
+
const useJointReconcile = opts?.forceJointReconcile &&
|
|
198
|
+
phraseProposals.length > 0 &&
|
|
199
|
+
stages.classifier &&
|
|
200
|
+
"parseWithLogits" in stages.classifier;
|
|
201
|
+
if (useJointReconcile) {
|
|
202
|
+
const classifierWithLogits = stages.classifier;
|
|
203
|
+
throwIfAborted(opts);
|
|
204
|
+
const tClassify = performance.now();
|
|
205
|
+
const { tree: argmaxTree, logits, pieces, } = await classifierWithLogits.parseWithLogits(normalized.normalized, { queryShape, fst: stages.fst });
|
|
206
|
+
timing["token-classify"] = performance.now() - tClassify;
|
|
207
|
+
throwIfAborted(opts);
|
|
208
|
+
const tReconcile = performance.now();
|
|
209
|
+
// The classifier must expose its label vocabulary so the aggregation can strip BIO prefixes.
|
|
210
|
+
// NeuralAddressClassifier surfaces this as `cfg.labels` — extracted via structural typing here.
|
|
211
|
+
const labels = "labels" in classifierWithLogits ? classifierWithLogits.labels : [];
|
|
212
|
+
const classifierTopK = aggregateSpanLogits(logits, pieces, phraseProposals.map((p) => ({ start: p.span.start, end: p.span.end })), { labels });
|
|
213
|
+
if (classifierTopK.length > 0) {
|
|
214
|
+
const result = reconcileSpans({
|
|
215
|
+
raw: normalized.normalized,
|
|
216
|
+
phraseProposals,
|
|
217
|
+
classifierTopK,
|
|
218
|
+
});
|
|
219
|
+
tree = result.tree;
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
tree = argmaxTree;
|
|
223
|
+
}
|
|
224
|
+
timing["reconcile"] = performance.now() - tReconcile;
|
|
225
|
+
}
|
|
226
|
+
else if (stages.classifier) {
|
|
227
|
+
throwIfAborted(opts);
|
|
228
|
+
const tClassify = performance.now();
|
|
229
|
+
tree = await safeClassify(stages.classifier, normalized.normalized, queryShape, stages.fst);
|
|
230
|
+
timing["token-classify"] = performance.now() - tClassify;
|
|
231
|
+
}
|
|
232
|
+
if (phraseProposals.length > 0 && tree.roots.length >= 0) {
|
|
233
|
+
const tAudit = performance.now();
|
|
234
|
+
tree = grouperAudit(tree, phraseProposals, normalized.normalized);
|
|
235
|
+
timing["grouper-audit"] = performance.now() - tAudit;
|
|
236
|
+
}
|
|
237
|
+
if (stages.resolver) {
|
|
238
|
+
throwIfAborted(opts);
|
|
239
|
+
const tResolve = performance.now();
|
|
240
|
+
tree = await safeResolve(stages.resolver, tree, opts);
|
|
241
|
+
timing["resolve"] = performance.now() - tResolve;
|
|
242
|
+
}
|
|
243
|
+
return {
|
|
244
|
+
input: raw,
|
|
245
|
+
normalized,
|
|
246
|
+
queryShape,
|
|
247
|
+
locale,
|
|
248
|
+
kind,
|
|
249
|
+
phraseProposals,
|
|
250
|
+
tree,
|
|
251
|
+
timing,
|
|
252
|
+
path: "full",
|
|
253
|
+
};
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Throws the signal's reason if aborted. Coarse-grained cancellation: we check between stages, so
|
|
257
|
+
* the longest cancellation latency is one stage's runtime. Fine-grained mid-stage cancellation
|
|
258
|
+
* requires plumbing `signal` into each stage's contract (`detectLocale`, `classifyKind`,
|
|
259
|
+
* `classifier.parse`, `resolver.resolveTree`) — a future enhancement once stage authors are ready
|
|
260
|
+
* for it. For now, in-flight stages always run to completion before the abort takes effect.
|
|
261
|
+
*/
|
|
262
|
+
function throwIfAborted(opts) {
|
|
263
|
+
if (opts?.signal?.aborted) {
|
|
264
|
+
throw opts.signal.reason ?? new DOMException("Pipeline aborted", "AbortError");
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
/** Defensive wrapper: if the classifier throws, return an empty tree rather than abort the pipeline. */
|
|
268
|
+
async function safeClassify(classifier, text, queryShape, fst) {
|
|
269
|
+
try {
|
|
270
|
+
return await classifier.parse(text, { queryShape, fst });
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
return { raw: text, roots: [] };
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
/** Defensive wrapper: a grouper failure returns an empty proposal list rather than abort. */
|
|
277
|
+
async function safeGroupPhrases(groupPhrases, normalized, shape, locale) {
|
|
278
|
+
try {
|
|
279
|
+
return await groupPhrases(normalized, shape, locale);
|
|
280
|
+
}
|
|
281
|
+
catch {
|
|
282
|
+
return [];
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
// Grouper-audit pass
|
|
287
|
+
// ---------------------------------------------------------------------------
|
|
288
|
+
const GROUPER_TYPING_PENALTY = 0.55;
|
|
289
|
+
const PHRASE_KIND_TO_TAG = new Map([
|
|
290
|
+
["VENUE_PHRASE", "venue"],
|
|
291
|
+
["LOCALITY_PHRASE", "locality"],
|
|
292
|
+
["REGION_ABBREVIATION", "region"],
|
|
293
|
+
["POSTCODE", "postcode"],
|
|
294
|
+
["STREET_PHRASE", "street"],
|
|
295
|
+
["NUMERIC", "house_number"],
|
|
296
|
+
]);
|
|
297
|
+
/**
|
|
298
|
+
* Post-classification audit: for each phrase-grouper proposal whose span is entirely unlabeled
|
|
299
|
+
* (all-O) in the classifier output, inject a provisional node using the grouper's structural
|
|
300
|
+
* hypothesis. This rescues spans the neural model couldn't type — primarily venue text.
|
|
301
|
+
*/
|
|
302
|
+
function grouperAudit(tree, proposals, text) {
|
|
303
|
+
if (proposals.length === 0)
|
|
304
|
+
return tree;
|
|
305
|
+
const roots = [...tree.roots];
|
|
306
|
+
for (const proposal of proposals) {
|
|
307
|
+
const tag = PHRASE_KIND_TO_TAG.get(proposal.kindHypothesis);
|
|
308
|
+
if (!tag)
|
|
309
|
+
continue;
|
|
310
|
+
const pStart = proposal.span.start;
|
|
311
|
+
const pEnd = pStart + proposal.span.body.length;
|
|
312
|
+
const covered = roots.some((node) => node.start < pEnd && pStart < node.end);
|
|
313
|
+
if (covered)
|
|
314
|
+
continue;
|
|
315
|
+
const provisionalNode = {
|
|
316
|
+
tag,
|
|
317
|
+
value: text.slice(pStart, pEnd),
|
|
318
|
+
start: pStart,
|
|
319
|
+
end: pEnd,
|
|
320
|
+
confidence: proposal.confidence * GROUPER_TYPING_PENALTY,
|
|
321
|
+
children: [],
|
|
322
|
+
source: "grouper-audit",
|
|
323
|
+
sourceId: `grouper:${proposal.kindHypothesis}`,
|
|
324
|
+
};
|
|
325
|
+
roots.push(provisionalNode);
|
|
326
|
+
}
|
|
327
|
+
roots.sort((a, b) => a.start - b.start);
|
|
328
|
+
return { raw: tree.raw, roots };
|
|
329
|
+
}
|
|
330
|
+
/** Defensive wrapper: a resolver failure leaves the classifier tree intact. */
|
|
331
|
+
async function safeResolve(resolver, tree, opts) {
|
|
332
|
+
try {
|
|
333
|
+
return await resolver.resolveTree(tree, opts?.resolveOpts);
|
|
334
|
+
}
|
|
335
|
+
catch {
|
|
336
|
+
return tree;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
//# sourceMappingURL=runtime-pipeline.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"runtime-pipeline.js","sourceRoot":"","sources":["../../pipeline/runtime-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAIH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAC/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAA;AAgBjE;;;;GAIG;AACH,MAAM,gBAAgB,GAAwB,IAAI,GAAG,CAAC;IACrD,QAAQ;IACR,SAAS;IACT,aAAa;IACb,aAAa;IACb,aAAa;IACb,aAAa;IACb,aAAa;CACb,CAAC,CAAA;AAEF,SAAS,gBAAgB,CAAC,MAAc;IACvC,OAAO,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;AACpC,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAuB;IACnD,OAAO,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;AACpC,CAAC;AAED,sEAAsE;AACtE,SAAS,iBAAiB,CAAC,GAAW,EAAE,IAA0B;IACjE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAE,CAAA;AAC7D,CAAC;AAED,yEAAyE;AACzE,SAAS,eAAe;IACvB,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,CAAA;AAC5B,CAAC;AAED,iFAAiF;AACjF,KAAK,UAAU,mBAAmB,CACjC,MAA2B,EAC3B,MAAsB,EACtB,IAA2B;IAE3B,MAAM,MAAM,GAAG,IAAI,EAAE,IAAI,IAAI,KAAK,CAAA;IAClC,OAAO;QACN,MAAM;QACN,UAAU,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QAClC,YAAY,EAAE,EAAE;QAChB,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU;KAC1C,CAAA;AACF,CAAC;AAED,uGAAuG;AACvG,KAAK,UAAU,mBAAmB,CACjC,MAA2B,EAC3B,MAAsB,EACtB,OAAmB;IAEnB,OAAO;QACN,IAAI,EAAE,oBAAoB;QAC1B,UAAU,EAAE,GAAG;QACf,YAAY,EAAE,EAAE;KAChB,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,IAAqB,EAAE,KAAqB,EAAE,IAAmB;IACzF,IAAI,IAAI,EAAE,iBAAiB;QAAE,OAAO,KAAK,CAAA;IACzC,IAAI,IAAI,CAAC,UAAU,GAAG,IAAI;QAAE,OAAO,KAAK,CAAA;IACxC,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;IACpD,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO,CAAC,KAAK,CAAC,WAAW,IAAI,QAAQ,CAAC,IAAI,EAAE,IAAI,KAAK,CAAC,cAAc,KAAK,OAAO,CAAA;IACjF,CAAC;IACD,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,IAAqB,EAAE,KAAqB;IACpF,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,gBAAgB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QACtE,IAAI,GAAG,EAAE,CAAC;YACT,OAAO;gBACN,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE;oBACN;wBACC,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;wBAC/C,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK;wBACrB,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG;wBACjB,UAAU,EAAE,GAAG,CAAC,UAAU;wBAC1B,QAAQ,EAAE,EAAE;wBACZ,MAAM,EAAE,aAAa;wBACrB,QAAQ,EAAE,GAAG,CAAC,MAAM;qBACpB;iBACD;aACD,CAAA;QACF,CAAC;IACF,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO;YACN,GAAG,EAAE,IAAI;YACT,KAAK,EAAE;gBACN;oBACC,GAAG,EAAE,UAAU;oBACf,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;oBAClB,KAAK,EAAE,CAAC;oBACR,GAAG,EAAE,IAAI,CAAC,MAAM;oBAChB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,QAAQ,EAAE,EAAE;oBACZ,MAAM,EAAE,aAAa;oBACrB,QAAQ,EAAE,oBAAoB;iBAC9B;aACD;SACD,CAAA;IACF,CAAC;IACD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;AAChC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,GAAW,EACX,MAA6B,EAC7B,IAAmB;IAEnB,MAAM,MAAM,GAA2B,EAAE,CAAA;IACzC,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAE5B,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAA;IACvD,MAAM,iBAAiB,GAAG,MAAM,CAAC,iBAAiB,IAAI,eAAe,CAAA;IACrE,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,CAAA;IAC/D,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,CAAA;IAE/D,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IAC3D,MAAM,CAAC,WAAW,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAA;IAE5C,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAC7B,MAAM,UAAU,GAAG,iBAAiB,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IAC1E,MAAM,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,GAAG,CAAA;IAE/C,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IACjC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IACjF,MAAM,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,OAAO,CAAA;IAEnD,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAC/B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;IAC/D,MAAM,CAAC,iBAAiB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAA;IAErD,uFAAuF;IACvF,2FAA2F;IAC3F,6FAA6F;IAC7F,eAAe;IACf,IAAI,eAAe,CAAC,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,EAAE,CAAC;QAC7C,IAAI,IAAI,GAAG,iBAAiB,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,UAAU,CAAC,CAAA;QACrE,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACrB,cAAc,CAAC,IAAI,CAAC,CAAA;YACpB,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;YAClC,IAAI,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;YACrD,MAAM,CAAC,SAAS,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAA;QACjD,CAAC;QACD,OAAO;YACN,KAAK,EAAE,GAAG;YACV,UAAU;YACV,UAAU;YACV,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,EAAE;YACnB,IAAI;YACJ,MAAM;YACN,IAAI,EAAE,WAAW;SACjB,CAAA;IACF,CAAC;IAED,iBAAiB;IACjB,6FAA6F;IAC7F,gGAAgG;IAChG,IAAI,eAAe,GAAqB,EAAE,CAAA;IAC1C,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAChC,eAAe,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;QAC7F,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,MAAM,CAAA;IACtD,CAAC;IAED,IAAI,IAAI,GAAgB,EAAE,GAAG,EAAE,UAAU,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IAEjE,6FAA6F;IAC7F,0FAA0F;IAC1F,oFAAoF;IACpF,MAAM,iBAAiB,GACtB,IAAI,EAAE,mBAAmB;QACzB,eAAe,CAAC,MAAM,GAAG,CAAC;QAC1B,MAAM,CAAC,UAAU;QACjB,iBAAiB,IAAI,MAAM,CAAC,UAAU,CAAA;IAEvC,IAAI,iBAAiB,EAAE,CAAC;QACvB,MAAM,oBAAoB,GAAG,MAAM,CAAC,UAKnC,CAAA;QAED,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QACnC,MAAM,EACL,IAAI,EAAE,UAAU,EAChB,MAAM,EACN,MAAM,GACN,GAAG,MAAM,oBAAoB,CAAC,eAAe,CAAC,UAAU,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAA;QACtG,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;QAExD,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAEpC,6FAA6F;QAC7F,gGAAgG;QAChG,MAAM,MAAM,GACX,QAAQ,IAAI,oBAAoB,CAAC,CAAC,CAAE,oBAAiE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAA;QAElH,MAAM,cAAc,GAAG,mBAAmB,CACzC,MAAM,EACN,MAAM,EACN,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,EACtE,EAAE,MAAM,EAAE,CACV,CAAA;QAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,cAAc,CAAC;gBAC7B,GAAG,EAAE,UAAU,CAAC,UAAU;gBAC1B,eAAe;gBACf,cAAc;aACd,CAAC,CAAA;YACF,IAAI,GAAG,MAAM,CAAC,IAAI,CAAA;QACnB,CAAC;aAAM,CAAC;YACP,IAAI,GAAG,UAAU,CAAA;QAClB,CAAC;QACD,MAAM,CAAC,WAAW,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,UAAU,CAAA;IACrD,CAAC;SAAM,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAC9B,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QACnC,IAAI,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,GAAG,CAAC,CAAA;QAC3F,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;IACzD,CAAC;IAED,IAAI,eAAe,CAAC,MAAM,GAAG,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAC1D,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAChC,IAAI,GAAG,YAAY,CAAC,IAAI,EAAE,eAAe,EAAE,UAAU,CAAC,UAAU,CAAC,CAAA;QACjE,MAAM,CAAC,eAAe,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,MAAM,CAAA;IACrD,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACrB,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAClC,IAAI,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACrD,MAAM,CAAC,SAAS,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAA;IACjD,CAAC;IAED,OAAO;QACN,KAAK,EAAE,GAAG;QACV,UAAU;QACV,UAAU;QACV,MAAM;QACN,IAAI;QACJ,eAAe;QACf,IAAI;QACJ,MAAM;QACN,IAAI,EAAE,MAAM;KACZ,CAAA;AACF,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,IAAmB;IAC1C,IAAI,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;QAC3B,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,YAAY,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAAA;IAC/E,CAAC;AACF,CAAC;AAED,wGAAwG;AACxG,KAAK,UAAU,YAAY,CAC1B,UAA6B,EAC7B,IAAY,EACZ,UAA0B,EAC1B,GAAoB;IAEpB,IAAI,CAAC;QACJ,OAAO,MAAM,UAAU,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAA;IACzD,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IAChC,CAAC;AACF,CAAC;AAED,6FAA6F;AAC7F,KAAK,UAAU,gBAAgB,CAC9B,YAAgE,EAChE,UAA+B,EAC/B,KAAqB,EACrB,MAAkB;IAElB,IAAI,CAAC;QACJ,OAAO,MAAM,YAAY,CAAC,UAAU,EAAE,KAAK,EAAE,MAAM,CAAC,CAAA;IACrD,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,EAAE,CAAA;IACV,CAAC;AACF,CAAC;AAED,8EAA8E;AAC9E,qBAAqB;AACrB,8EAA8E;AAE9E,MAAM,sBAAsB,GAAG,IAAI,CAAA;AAEnC,MAAM,kBAAkB,GAAsC,IAAI,GAAG,CAAC;IACrE,CAAC,cAAc,EAAE,OAAO,CAAC;IACzB,CAAC,iBAAiB,EAAE,UAAU,CAAC;IAC/B,CAAC,qBAAqB,EAAE,QAAQ,CAAC;IACjC,CAAC,UAAU,EAAE,UAAU,CAAC;IACxB,CAAC,eAAe,EAAE,QAAQ,CAAC;IAC3B,CAAC,SAAS,EAAE,cAAc,CAAC;CAC3B,CAAC,CAAA;AAEF;;;;GAIG;AACH,SAAS,YAAY,CAAC,IAAiB,EAAE,SAA2B,EAAE,IAAY;IACjF,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAA;IAEvC,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAA;IAE7B,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,cAAc,CAAC,CAAA;QAC3D,IAAI,CAAC,GAAG;YAAE,SAAQ;QAElB,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAA;QAClC,MAAM,IAAI,GAAG,MAAM,GAAG,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAA;QAE/C,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,GAAG,IAAI,IAAI,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAA;QAC5E,IAAI,OAAO;YAAE,SAAQ;QAErB,MAAM,eAAe,GAAgB;YACpC,GAAG;YACH,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,IAAI,CAAC;YAC/B,KAAK,EAAE,MAAM;YACb,GAAG,EAAE,IAAI;YACT,UAAU,EAAE,QAAQ,CAAC,UAAU,GAAG,sBAAsB;YACxD,QAAQ,EAAE,EAAE;YACZ,MAAM,EAAE,eAAe;YACvB,QAAQ,EAAE,WAAW,QAAQ,CAAC,cAAc,EAAE;SAC9C,CAAA;QAED,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;IAC5B,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAA;IAEvC,OAAO,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,CAAA;AAChC,CAAC;AAED,+EAA+E;AAC/E,KAAK,UAAU,WAAW,CACzB,QAAwD,EACxD,IAAiB,EACjB,IAAmB;IAEnB,IAAI,CAAC;QACJ,OAAO,MAAM,QAAQ,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,EAAE,WAAW,CAAC,CAAA;IAC3D,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,IAAI,CAAA;IACZ,CAAC;AACF,CAAC"}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Per-span logit aggregation — Option C from the DeepSeek synthesis review.
|
|
7
|
+
*
|
|
8
|
+
* Takes the per-token logits already emitted by the ONNX model (currently discarded after argmax in
|
|
9
|
+
* `classifier.ts`) and aggregates them over phrase-grouper spans to produce per-span top-K tag
|
|
10
|
+
* candidates. These feed directly into `reconcileSpans` as the `classifierTopK` input.
|
|
11
|
+
*
|
|
12
|
+
* Why this instead of a sequence-level beam decoder: the reconciler takes `(span, tag, score)`
|
|
13
|
+
* triples — per-span confidence, not BIO-sequence-level confidence. The phrase grouper has
|
|
14
|
+
* already done boundary discovery. This module answers "given these boundaries, what tags does
|
|
15
|
+
* the classifier think each span is?" — which is the right abstraction for joint decoding.
|
|
16
|
+
*
|
|
17
|
+
* Code path matches the eventual production runtime: when a top-k-trained classifier exists, the TS
|
|
18
|
+
* runtime just swaps "per-token softmax aggregation" for "classifier's native top-k API." Same
|
|
19
|
+
* downstream contract.
|
|
20
|
+
*/
|
|
21
|
+
import type { ClassifierCandidate } from "./reconcile.js";
|
|
22
|
+
/**
|
|
23
|
+
* A token piece with character-level offsets into the original text.
|
|
24
|
+
*/
|
|
25
|
+
export interface TokenPiece {
|
|
26
|
+
start: number;
|
|
27
|
+
end: number;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* A span proposal from the phrase grouper, in character offsets.
|
|
31
|
+
*/
|
|
32
|
+
export interface SpanBounds {
|
|
33
|
+
start: number;
|
|
34
|
+
end: number;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Given per-token logits and phrase-grouper spans, produce per-span top-K tag candidates.
|
|
38
|
+
*
|
|
39
|
+
* For each span, finds the tokens whose character ranges overlap the span, sums their softmax
|
|
40
|
+
* probabilities per-tag, normalizes, and emits the top K tags with their aggregated scores.
|
|
41
|
+
*
|
|
42
|
+
* BIO prefix stripping: the model emits BIO labels (`B-locality`, `I-locality`, etc.) but the
|
|
43
|
+
* reconciler works with component tags (`locality`). This function strips the `B-`/`I-` prefix and
|
|
44
|
+
* merges probabilities: `score(locality) = sum(score(B-locality) + score(I-locality))` across the
|
|
45
|
+
* span's tokens.
|
|
46
|
+
*
|
|
47
|
+
* @param logits Per-token logits from ONNX inference, shape `[seqLen][numLabels]`.
|
|
48
|
+
* @param pieces Token pieces with character-level offsets (from the tokenizer's `encode`).
|
|
49
|
+
* @param spans Phrase-grouper span proposals in character offsets.
|
|
50
|
+
* @param opts Options — `topK` (default 3), `labels` (required — the BIO label vocabulary the model
|
|
51
|
+
* emits, e.g. `["O", "B-locality", "I-locality", ...]`).
|
|
52
|
+
*/
|
|
53
|
+
export declare function aggregateSpanLogits(logits: number[][], pieces: readonly TokenPiece[], spans: readonly SpanBounds[], opts: {
|
|
54
|
+
topK?: number;
|
|
55
|
+
labels: readonly string[];
|
|
56
|
+
}): ClassifierCandidate[];
|
|
57
|
+
//# sourceMappingURL=span-logit-aggregation.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-logit-aggregation.d.ts","sourceRoot":"","sources":["../../pipeline/span-logit-aggregation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AAEzD;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,mBAAmB,CAClC,MAAM,EAAE,MAAM,EAAE,EAAE,EAClB,MAAM,EAAE,SAAS,UAAU,EAAE,EAC7B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,IAAI,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;CAAE,GAChD,mBAAmB,EAAE,CAiDvB"}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Per-span logit aggregation — Option C from the DeepSeek synthesis review.
|
|
7
|
+
*
|
|
8
|
+
* Takes the per-token logits already emitted by the ONNX model (currently discarded after argmax in
|
|
9
|
+
* `classifier.ts`) and aggregates them over phrase-grouper spans to produce per-span top-K tag
|
|
10
|
+
* candidates. These feed directly into `reconcileSpans` as the `classifierTopK` input.
|
|
11
|
+
*
|
|
12
|
+
* Why this instead of a sequence-level beam decoder: the reconciler takes `(span, tag, score)`
|
|
13
|
+
* triples — per-span confidence, not BIO-sequence-level confidence. The phrase grouper has
|
|
14
|
+
* already done boundary discovery. This module answers "given these boundaries, what tags does
|
|
15
|
+
* the classifier think each span is?" — which is the right abstraction for joint decoding.
|
|
16
|
+
*
|
|
17
|
+
* Code path matches the eventual production runtime: when a top-k-trained classifier exists, the TS
|
|
18
|
+
* runtime just swaps "per-token softmax aggregation" for "classifier's native top-k API." Same
|
|
19
|
+
* downstream contract.
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Given per-token logits and phrase-grouper spans, produce per-span top-K tag candidates.
|
|
23
|
+
*
|
|
24
|
+
* For each span, finds the tokens whose character ranges overlap the span, sums their softmax
|
|
25
|
+
* probabilities per-tag, normalizes, and emits the top K tags with their aggregated scores.
|
|
26
|
+
*
|
|
27
|
+
* BIO prefix stripping: the model emits BIO labels (`B-locality`, `I-locality`, etc.) but the
|
|
28
|
+
* reconciler works with component tags (`locality`). This function strips the `B-`/`I-` prefix and
|
|
29
|
+
* merges probabilities: `score(locality) = sum(score(B-locality) + score(I-locality))` across the
|
|
30
|
+
* span's tokens.
|
|
31
|
+
*
|
|
32
|
+
* @param logits Per-token logits from ONNX inference, shape `[seqLen][numLabels]`.
|
|
33
|
+
* @param pieces Token pieces with character-level offsets (from the tokenizer's `encode`).
|
|
34
|
+
* @param spans Phrase-grouper span proposals in character offsets.
|
|
35
|
+
* @param opts Options — `topK` (default 3), `labels` (required — the BIO label vocabulary the model
|
|
36
|
+
* emits, e.g. `["O", "B-locality", "I-locality", ...]`).
|
|
37
|
+
*/
|
|
38
|
+
export function aggregateSpanLogits(logits, pieces, spans, opts) {
|
|
39
|
+
const topK = opts.topK ?? 3;
|
|
40
|
+
const labels = opts.labels;
|
|
41
|
+
const candidates = [];
|
|
42
|
+
for (const span of spans) {
|
|
43
|
+
// Find tokens overlapping this span (character-level).
|
|
44
|
+
const overlapping = [];
|
|
45
|
+
for (let t = 0; t < pieces.length; t++) {
|
|
46
|
+
const p = pieces[t];
|
|
47
|
+
if (p.end <= span.start)
|
|
48
|
+
continue;
|
|
49
|
+
if (p.start >= span.end)
|
|
50
|
+
break;
|
|
51
|
+
overlapping.push(t);
|
|
52
|
+
}
|
|
53
|
+
if (overlapping.length === 0)
|
|
54
|
+
continue;
|
|
55
|
+
// Aggregate softmax probabilities per component tag (strip BIO prefix).
|
|
56
|
+
const tagScores = new Map();
|
|
57
|
+
for (const t of overlapping) {
|
|
58
|
+
const probs = softmax(logits[t]);
|
|
59
|
+
for (let l = 0; l < labels.length; l++) {
|
|
60
|
+
const bioLabel = labels[l];
|
|
61
|
+
const tag = stripBioPrefix(bioLabel);
|
|
62
|
+
if (tag === "O")
|
|
63
|
+
continue;
|
|
64
|
+
const prev = tagScores.get(tag) ?? 0;
|
|
65
|
+
tagScores.set(tag, prev + probs[l]);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// Normalize by number of overlapping tokens so longer spans don't auto-win.
|
|
69
|
+
const norm = overlapping.length;
|
|
70
|
+
const sorted = [...tagScores.entries()]
|
|
71
|
+
.map(([tag, score]) => ({ tag, score: score / norm }))
|
|
72
|
+
.sort((a, b) => b.score - a.score)
|
|
73
|
+
.slice(0, topK);
|
|
74
|
+
for (const { tag, score } of sorted) {
|
|
75
|
+
candidates.push({
|
|
76
|
+
span: { start: span.start, end: span.end },
|
|
77
|
+
tag: tag,
|
|
78
|
+
score,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return candidates;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Strip `B-` or `I-` prefix from a BIO label, returning the component tag. `O` returns `"O"`.
|
|
86
|
+
*/
|
|
87
|
+
function stripBioPrefix(label) {
|
|
88
|
+
if (label === "O")
|
|
89
|
+
return "O";
|
|
90
|
+
const dash = label.indexOf("-");
|
|
91
|
+
if (dash === -1)
|
|
92
|
+
return label;
|
|
93
|
+
return label.slice(dash + 1);
|
|
94
|
+
}
|
|
95
|
+
/** Numerically stable softmax over a row of logits. */
|
|
96
|
+
function softmax(row) {
|
|
97
|
+
let max = row[0];
|
|
98
|
+
for (let i = 1; i < row.length; i++)
|
|
99
|
+
if (row[i] > max)
|
|
100
|
+
max = row[i];
|
|
101
|
+
const exps = row.map((v) => Math.exp(v - max));
|
|
102
|
+
const sum = exps.reduce((a, b) => a + b, 0);
|
|
103
|
+
return exps.map((e) => e / sum);
|
|
104
|
+
}
|
|
105
|
+
//# sourceMappingURL=span-logit-aggregation.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-logit-aggregation.js","sourceRoot":"","sources":["../../pipeline/span-logit-aggregation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAqBH;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,mBAAmB,CAClC,MAAkB,EAClB,MAA6B,EAC7B,KAA4B,EAC5B,IAAkD;IAElD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,CAAA;IAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAE1B,MAAM,UAAU,GAA0B,EAAE,CAAA;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,uDAAuD;QACvD,MAAM,WAAW,GAAa,EAAE,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,IAAI,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK;gBAAE,SAAQ;YACjC,IAAI,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;gBAAE,MAAK;YAC9B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,CAAC;QAED,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEtC,wEAAwE;QACxE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAA;QAE3C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;gBAC3B,MAAM,GAAG,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAA;gBACpC,IAAI,GAAG,KAAK,GAAG;oBAAE,SAAQ;gBACzB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;gBACpC,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,CAAA;YACrC,CAAC;QACF,CAAC;QAED,4EAA4E;QAC5E,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAA;QAC/B,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;aACrC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,CAAC,CAAC;aACrD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;QAEhB,KAAK,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,MAAM,EAAE,CAAC;YACrC,UAAU,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE;gBAC1C,GAAG,EAAE,GAAmB;gBACxB,KAAK;aACL,CAAC,CAAA;QACH,CAAC;IACF,CAAC;IAED,OAAO,UAAU,CAAA;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,KAAa;IACpC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,GAAG,CAAA;IAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,IAAI,IAAI,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAA;IAC7B,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAA;AAC7B,CAAC;AAED,uDAAuD;AACvD,SAAS,OAAO,CAAC,GAAsB;IACtC,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,GAAG;YAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACrE,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAA;IAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;AAChC,CAAC"}
|