@mailwoman/core 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. package/out/api/APIClient.d.ts +57 -0
  2. package/out/api/APIClient.d.ts.map +1 -0
  3. package/out/api/APIClient.js +108 -0
  4. package/out/api/APIClient.js.map +1 -0
  5. package/out/api/headless.d.ts +17 -0
  6. package/out/api/headless.d.ts.map +1 -0
  7. package/out/api/headless.js +18 -0
  8. package/out/api/headless.js.map +1 -0
  9. package/out/api/index.d.ts +11 -0
  10. package/out/api/index.d.ts.map +1 -0
  11. package/out/api/index.js +11 -0
  12. package/out/api/index.js.map +1 -0
  13. package/out/api/responses.d.ts +48 -0
  14. package/out/api/responses.d.ts.map +1 -0
  15. package/out/api/responses.js +68 -0
  16. package/out/api/responses.js.map +1 -0
  17. package/out/collections.d.ts +66 -0
  18. package/out/collections.d.ts.map +1 -0
  19. package/out/collections.js +97 -0
  20. package/out/collections.js.map +1 -0
  21. package/out/db/schema.d.ts +21 -0
  22. package/out/db/schema.d.ts.map +1 -0
  23. package/out/db/schema.js +16 -0
  24. package/out/db/schema.js.map +1 -0
  25. package/out/decoder/build-tree.d.ts +14 -1
  26. package/out/decoder/build-tree.d.ts.map +1 -1
  27. package/out/decoder/build-tree.js +37 -9
  28. package/out/decoder/build-tree.js.map +1 -1
  29. package/out/decoder/proposals-to-tree.d.ts.map +1 -1
  30. package/out/decoder/proposals-to-tree.js +2 -0
  31. package/out/decoder/proposals-to-tree.js.map +1 -1
  32. package/out/decoder/serialize-xml.d.ts +22 -1
  33. package/out/decoder/serialize-xml.d.ts.map +1 -1
  34. package/out/decoder/serialize-xml.js +64 -4
  35. package/out/decoder/serialize-xml.js.map +1 -1
  36. package/out/decoder/types.d.ts +45 -0
  37. package/out/decoder/types.d.ts.map +1 -1
  38. package/out/decoder/types.js +6 -0
  39. package/out/decoder/types.js.map +1 -1
  40. package/out/errors/index.d.ts +9 -0
  41. package/out/errors/index.d.ts.map +1 -0
  42. package/out/errors/index.js +9 -0
  43. package/out/errors/index.js.map +1 -0
  44. package/out/errors/schema.d.ts +69 -0
  45. package/out/errors/schema.d.ts.map +1 -0
  46. package/out/errors/schema.js +102 -0
  47. package/out/errors/schema.js.map +1 -0
  48. package/out/identifiers.d.ts +18 -0
  49. package/out/identifiers.d.ts.map +1 -0
  50. package/out/identifiers.js +49 -0
  51. package/out/identifiers.js.map +1 -0
  52. package/out/index.d.ts +3 -0
  53. package/out/index.d.ts.map +1 -1
  54. package/out/index.js +3 -4
  55. package/out/index.js.map +1 -1
  56. package/out/kysley/adapter.d.ts +13 -0
  57. package/out/kysley/adapter.d.ts.map +1 -0
  58. package/out/kysley/adapter.js +25 -0
  59. package/out/kysley/adapter.js.map +1 -0
  60. package/out/kysley/client.d.ts +16 -0
  61. package/out/kysley/client.d.ts.map +1 -0
  62. package/out/kysley/client.js +22 -0
  63. package/out/kysley/client.js.map +1 -0
  64. package/out/kysley/dialect-config.d.ts +27 -0
  65. package/out/kysley/dialect-config.d.ts.map +1 -0
  66. package/out/kysley/dialect-config.js +7 -0
  67. package/out/kysley/dialect-config.js.map +1 -0
  68. package/out/kysley/dialect.d.ts +39 -0
  69. package/out/kysley/dialect.d.ts.map +1 -0
  70. package/out/kysley/dialect.js +49 -0
  71. package/out/kysley/dialect.js.map +1 -0
  72. package/out/kysley/driver.d.ts +22 -0
  73. package/out/kysley/driver.d.ts.map +1 -0
  74. package/out/kysley/driver.js +114 -0
  75. package/out/kysley/driver.js.map +1 -0
  76. package/out/lifecycle/ServiceSymbol.d.ts +59 -0
  77. package/out/lifecycle/ServiceSymbol.d.ts.map +1 -0
  78. package/out/lifecycle/ServiceSymbol.js +62 -0
  79. package/out/lifecycle/ServiceSymbol.js.map +1 -0
  80. package/out/lifecycle/index.d.ts +11 -0
  81. package/out/lifecycle/index.d.ts.map +1 -0
  82. package/out/lifecycle/index.js +11 -0
  83. package/out/lifecycle/index.js.map +1 -0
  84. package/out/lifecycle/lru-cache.d.ts +22 -0
  85. package/out/lifecycle/lru-cache.d.ts.map +1 -0
  86. package/out/lifecycle/lru-cache.js +31 -0
  87. package/out/lifecycle/lru-cache.js.map +1 -0
  88. package/out/lifecycle/services.d.ts +145 -0
  89. package/out/lifecycle/services.d.ts.map +1 -0
  90. package/out/lifecycle/services.js +190 -0
  91. package/out/lifecycle/services.js.map +1 -0
  92. package/out/logging/index.d.ts +7 -0
  93. package/out/logging/index.d.ts.map +1 -0
  94. package/out/logging/index.js +7 -0
  95. package/out/logging/index.js.map +1 -0
  96. package/out/logging/shared.d.ts +60 -0
  97. package/out/logging/shared.d.ts.map +1 -0
  98. package/out/logging/shared.js +100 -0
  99. package/out/logging/shared.js.map +1 -0
  100. package/out/logging/tables.d.ts +7 -0
  101. package/out/logging/tables.d.ts.map +1 -0
  102. package/out/logging/tables.js +75 -0
  103. package/out/logging/tables.js.map +1 -0
  104. package/out/objects.d.ts +96 -0
  105. package/out/objects.d.ts.map +1 -0
  106. package/out/objects.js +96 -0
  107. package/out/objects.js.map +1 -0
  108. package/out/parser/proposal-pipeline.d.ts.map +1 -1
  109. package/out/parser/proposal-pipeline.js +0 -1
  110. package/out/parser/proposal-pipeline.js.map +1 -1
  111. package/out/pipeline/index.d.ts +14 -0
  112. package/out/pipeline/index.d.ts.map +1 -0
  113. package/out/pipeline/index.js +11 -0
  114. package/out/pipeline/index.js.map +1 -0
  115. package/out/pipeline/reconcile.d.ts +135 -0
  116. package/out/pipeline/reconcile.d.ts.map +1 -0
  117. package/out/pipeline/reconcile.js +355 -0
  118. package/out/pipeline/reconcile.js.map +1 -0
  119. package/out/pipeline/runtime-pipeline.d.ts +29 -0
  120. package/out/pipeline/runtime-pipeline.d.ts.map +1 -0
  121. package/out/pipeline/runtime-pipeline.js +288 -0
  122. package/out/pipeline/runtime-pipeline.js.map +1 -0
  123. package/out/pipeline/span-logit-aggregation.d.ts +57 -0
  124. package/out/pipeline/span-logit-aggregation.d.ts.map +1 -0
  125. package/out/pipeline/span-logit-aggregation.js +105 -0
  126. package/out/pipeline/span-logit-aggregation.js.map +1 -0
  127. package/out/pipeline/types.d.ts +189 -0
  128. package/out/pipeline/types.d.ts.map +1 -0
  129. package/out/pipeline/types.js +16 -0
  130. package/out/pipeline/types.js.map +1 -0
  131. package/out/resolver/index.d.ts +9 -0
  132. package/out/resolver/index.d.ts.map +1 -0
  133. package/out/resolver/index.js +8 -0
  134. package/out/resolver/index.js.map +1 -0
  135. package/out/resolver/resolve.d.ts +21 -0
  136. package/out/resolver/resolve.d.ts.map +1 -0
  137. package/out/resolver/resolve.js +118 -0
  138. package/out/resolver/resolve.js.map +1 -0
  139. package/out/resolver/types.d.ts +118 -0
  140. package/out/resolver/types.d.ts.map +1 -0
  141. package/out/resolver/types.js +24 -0
  142. package/out/resolver/types.js.map +1 -0
  143. package/out/resources/git.d.ts +1 -1
  144. package/out/resources/index.d.ts +0 -1
  145. package/out/resources/index.d.ts.map +1 -1
  146. package/out/resources/index.js +0 -1
  147. package/out/resources/index.js.map +1 -1
  148. package/out/resources/whosonfirst/DataSourceCache.d.ts +0 -1
  149. package/out/resources/whosonfirst/DataSourceCache.d.ts.map +1 -1
  150. package/out/resources/whosonfirst/DataSourceCache.js +0 -1
  151. package/out/resources/whosonfirst/DataSourceCache.js.map +1 -1
  152. package/out/resources/whosonfirst/PlacetypeDataSource.d.ts +2 -2
  153. package/out/resources/whosonfirst/PlacetypeDataSource.d.ts.map +1 -1
  154. package/out/resources/whosonfirst/PlacetypeDataSource.js +9 -6
  155. package/out/resources/whosonfirst/PlacetypeDataSource.js.map +1 -1
  156. package/out/routing/index.d.ts +67 -0
  157. package/out/routing/index.d.ts.map +1 -0
  158. package/out/routing/index.js +114 -0
  159. package/out/routing/index.js.map +1 -0
  160. package/out/sets.d.ts +2 -0
  161. package/out/sets.d.ts.map +1 -0
  162. package/out/sets.js +2 -0
  163. package/out/sets.js.map +1 -0
  164. package/package.json +28 -2
  165. package/out/resources/db/index.d.ts +0 -57
  166. package/out/resources/db/index.d.ts.map +0 -1
  167. package/out/resources/db/index.js +0 -57
  168. package/out/resources/db/index.js.map +0 -1
@@ -0,0 +1,288 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `runPipeline` — the runtime coordinator that composes all six stages.
7
+ *
8
+ * Generic over stage implementations (see `types.ts::RuntimePipelineStages`). Each stage is
9
+ * injected; the coordinator handles composition, timing, fast-path routing, and graceful
10
+ * degradation when stages are absent.
11
+ *
12
+ * Implementation contract per `docs/articles/plan/reference/STAGES.md`.
13
+ */
14
+ import { reconcileSpans } from "./reconcile.js";
15
+ import { aggregateSpanLogits } from "./span-logit-aggregation.js";
16
+ /**
17
+ * Known QueryShape format strings that indicate "this token is a postcode". Mirrors the set in
18
+ *
19
+ * @mailwoman/kind-classifier — kept duplicated so core/pipeline has no dep on kind-classifier.
20
+ */
21
+ const POSTCODE_FORMATS = new Set([
22
+ "us_zip",
23
+ "us_zip4",
24
+ "uk_postcode",
25
+ "fr_postcode",
26
+ "de_postcode",
27
+ "ca_postcode",
28
+ "jp_postcode",
29
+ ]);
30
+ function isPostcodeFormat(format) {
31
+ return POSTCODE_FORMATS.has(format);
32
+ }
33
+ function isPostcodeFormatHit(hit) {
34
+ return isPostcodeFormat(hit.format);
35
+ }
36
+ /** Pass-through normalize used when no `normalize` stage is wired. */
37
+ function identityNormalize(raw, opts) {
38
+ return { raw, normalized: raw, appliedLocale: opts?.locale };
39
+ }
40
+ /** No-op query-shape used when no `computeQueryShape` stage is wired. */
41
+ function emptyQueryShape() {
42
+ return { knownFormats: [] };
43
+ }
44
+ /** Default locale detector: trusts the caller's hint, or falls back to `und`. */
45
+ async function defaultDetectLocale(_input, _shape, opts) {
46
+ const locale = opts?.hint ?? "und";
47
+ return {
48
+ locale,
49
+ confidence: opts?.hint ? 1.0 : 0.0,
50
+ alternatives: [],
51
+ source: opts?.hint ? "caller" : "detected",
52
+ };
53
+ }
54
+ /** Default kind classifier: always returns `structured_address` with low confidence (no fast-path). */
55
+ async function defaultClassifyKind(_input, _shape, _locale) {
56
+ return {
57
+ kind: "structured_address",
58
+ confidence: 0.0,
59
+ alternatives: [],
60
+ };
61
+ }
62
+ /**
63
+ * Decide whether to short-circuit stages 3-5 and go straight to resolve. Conservative: requires
64
+ * high kind-classifier confidence AND a matching QueryShape known-format hit. See
65
+ * `STAGES.md#fast-path-routing` for the rationale.
66
+ */
67
+ function canShortCircuit(kind, shape, opts) {
68
+ if (opts?.forceFullPipeline)
69
+ return false;
70
+ if (kind.confidence < 0.95)
71
+ return false;
72
+ if (kind.kind === "postcode_only") {
73
+ return shape.knownFormats.some(isPostcodeFormatHit);
74
+ }
75
+ if (kind.kind === "locality_only") {
76
+ return (shape.totalLength ?? Infinity) <= 30 && shape.characterClass === "alpha";
77
+ }
78
+ return false;
79
+ }
80
+ /**
81
+ * Build a stub `AddressTree` for the fast-path case (no classifier ran). Single root node tagged by
82
+ * the QueryShape's known-format hit.
83
+ */
84
+ function buildFastPathTree(text, kind, shape) {
85
+ if (kind.kind === "postcode_only") {
86
+ const hit = shape.knownFormats.find((f) => isPostcodeFormat(f.format));
87
+ if (hit) {
88
+ return {
89
+ raw: text,
90
+ roots: [
91
+ {
92
+ tag: "postcode",
93
+ value: text.slice(hit.span.start, hit.span.end),
94
+ start: hit.span.start,
95
+ end: hit.span.end,
96
+ confidence: hit.confidence,
97
+ children: [],
98
+ source: "query-shape",
99
+ sourceId: hit.format,
100
+ },
101
+ ],
102
+ };
103
+ }
104
+ }
105
+ if (kind.kind === "locality_only") {
106
+ return {
107
+ raw: text,
108
+ roots: [
109
+ {
110
+ tag: "locality",
111
+ value: text.trim(),
112
+ start: 0,
113
+ end: text.length,
114
+ confidence: kind.confidence,
115
+ children: [],
116
+ source: "query-shape",
117
+ sourceId: "kind:locality_only",
118
+ },
119
+ ],
120
+ };
121
+ }
122
+ return { raw: text, roots: [] };
123
+ }
124
+ /**
125
+ * Run the runtime pipeline.
126
+ *
127
+ * Composition order (per STAGES.md):
128
+ *
129
+ * 1. Normalize (or identity)
130
+ * 2. Compute QueryShape (or empty)
131
+ * 3. Locale gate (or caller-trust)
132
+ * 4. Kind classifier (or default structured_address)
133
+ * 5. Branch: fast-path → resolver; full → classifier → resolver
134
+ *
135
+ * Per-stage timing recorded on `result.timing`. Fast-path stages are absent from the timing map.
136
+ */
137
+ export async function runPipeline(raw, stages, opts) {
138
+ const timing = {};
139
+ const t0 = performance.now();
140
+ const normalize = stages.normalize ?? identityNormalize;
141
+ const computeQueryShape = stages.computeQueryShape ?? emptyQueryShape;
142
+ const detectLocale = stages.detectLocale ?? defaultDetectLocale;
143
+ const classifyKind = stages.classifyKind ?? defaultClassifyKind;
144
+ throwIfAborted(opts);
145
+ const normalized = normalize(raw, { locale: opts?.locale });
146
+ timing["normalize"] = performance.now() - t0;
147
+ throwIfAborted(opts);
148
+ const tQs = performance.now();
149
+ const queryShape = computeQueryShape(normalized, { locale: opts?.locale });
150
+ timing["query-shape"] = performance.now() - tQs;
151
+ throwIfAborted(opts);
152
+ const tLocale = performance.now();
153
+ const locale = await detectLocale(normalized, queryShape, { hint: opts?.locale });
154
+ timing["locale-gate"] = performance.now() - tLocale;
155
+ throwIfAborted(opts);
156
+ const tKind = performance.now();
157
+ const kind = await classifyKind(normalized, queryShape, locale);
158
+ timing["kind-classifier"] = performance.now() - tKind;
159
+ // Fast-path: trivial inputs short-circuit stages 3-5. The fast-path tree is built from
160
+ // QueryShape's format hits + kind alone — useful even without a wired resolver (a consumer
161
+ // who just wants the parsed structure for a bare postcode shouldn't be forced to pay for the
162
+ // classifier).
163
+ if (canShortCircuit(kind, queryShape, opts)) {
164
+ let tree = buildFastPathTree(normalized.normalized, kind, queryShape);
165
+ if (stages.resolver) {
166
+ throwIfAborted(opts);
167
+ const tResolve = performance.now();
168
+ tree = await safeResolve(stages.resolver, tree, opts);
169
+ timing["resolve"] = performance.now() - tResolve;
170
+ }
171
+ return {
172
+ input: raw,
173
+ normalized,
174
+ queryShape,
175
+ locale,
176
+ kind,
177
+ phraseProposals: [],
178
+ tree,
179
+ timing,
180
+ path: "fast-path",
181
+ };
182
+ }
183
+ // Full pipeline.
184
+ // Stage 2.7 — phrase grouper. Optional injection; runs when wired. Proposals flow forward to
185
+ // stages 3 + 5 (today: surfaced on the result; tomorrow: passed in as classifier conditioning).
186
+ let phraseProposals = [];
187
+ if (stages.groupPhrases) {
188
+ throwIfAborted(opts);
189
+ const tGroup = performance.now();
190
+ phraseProposals = await safeGroupPhrases(stages.groupPhrases, normalized, queryShape, locale);
191
+ timing["phrase-grouper"] = performance.now() - tGroup;
192
+ }
193
+ let tree = { raw: normalized.normalized, roots: [] };
194
+ // Joint-reconcile path: when the flag is set AND we have phrase proposals AND the classifier
195
+ // exposes parseWithLogits, use per-span logit aggregation + reconcileSpans instead of argmax.
196
+ const useJointReconcile = opts?.forceJointReconcile &&
197
+ phraseProposals.length > 0 &&
198
+ stages.classifier &&
199
+ "parseWithLogits" in stages.classifier;
200
+ if (useJointReconcile) {
201
+ const classifierWithLogits = stages.classifier;
202
+ throwIfAborted(opts);
203
+ const tClassify = performance.now();
204
+ const { tree: argmaxTree, logits, pieces, } = await classifierWithLogits.parseWithLogits(normalized.normalized, { queryShape });
205
+ timing["token-classify"] = performance.now() - tClassify;
206
+ throwIfAborted(opts);
207
+ const tReconcile = performance.now();
208
+ // The classifier must expose its label vocabulary so the aggregation can strip BIO prefixes.
209
+ // NeuralAddressClassifier surfaces this as `cfg.labels` — extracted via structural typing here.
210
+ const labels = "labels" in classifierWithLogits ? classifierWithLogits.labels : [];
211
+ const classifierTopK = aggregateSpanLogits(logits, pieces, phraseProposals.map((p) => ({ start: p.span.start, end: p.span.end })), { labels });
212
+ if (classifierTopK.length > 0) {
213
+ const result = reconcileSpans({
214
+ raw: normalized.normalized,
215
+ phraseProposals,
216
+ classifierTopK,
217
+ });
218
+ tree = result.tree;
219
+ }
220
+ else {
221
+ tree = argmaxTree;
222
+ }
223
+ timing["reconcile"] = performance.now() - tReconcile;
224
+ }
225
+ else if (stages.classifier) {
226
+ throwIfAborted(opts);
227
+ const tClassify = performance.now();
228
+ tree = await safeClassify(stages.classifier, normalized.normalized, queryShape);
229
+ timing["token-classify"] = performance.now() - tClassify;
230
+ }
231
+ if (stages.resolver) {
232
+ throwIfAborted(opts);
233
+ const tResolve = performance.now();
234
+ tree = await safeResolve(stages.resolver, tree, opts);
235
+ timing["resolve"] = performance.now() - tResolve;
236
+ }
237
+ return {
238
+ input: raw,
239
+ normalized,
240
+ queryShape,
241
+ locale,
242
+ kind,
243
+ phraseProposals,
244
+ tree,
245
+ timing,
246
+ path: "full",
247
+ };
248
+ }
249
+ /**
250
+ * Throws the signal's reason if aborted. Coarse-grained cancellation: we check between stages, so
251
+ * the longest cancellation latency is one stage's runtime. Fine-grained mid-stage cancellation
252
+ * requires plumbing `signal` into each stage's contract (`detectLocale`, `classifyKind`,
253
+ * `classifier.parse`, `resolver.resolveTree`) — a future enhancement once stage authors are ready
254
+ * for it. For now, in-flight stages always run to completion before the abort takes effect.
255
+ */
256
+ function throwIfAborted(opts) {
257
+ if (opts?.signal?.aborted) {
258
+ throw opts.signal.reason ?? new DOMException("Pipeline aborted", "AbortError");
259
+ }
260
+ }
261
+ /** Defensive wrapper: if the classifier throws, return an empty tree rather than abort the pipeline. */
262
+ async function safeClassify(classifier, text, queryShape) {
263
+ try {
264
+ return await classifier.parse(text, { queryShape });
265
+ }
266
+ catch {
267
+ return { raw: text, roots: [] };
268
+ }
269
+ }
270
+ /** Defensive wrapper: a grouper failure returns an empty proposal list rather than abort. */
271
+ async function safeGroupPhrases(groupPhrases, normalized, shape, locale) {
272
+ try {
273
+ return await groupPhrases(normalized, shape, locale);
274
+ }
275
+ catch {
276
+ return [];
277
+ }
278
+ }
279
+ /** Defensive wrapper: a resolver failure leaves the classifier tree intact. */
280
+ async function safeResolve(resolver, tree, opts) {
281
+ try {
282
+ return await resolver.resolveTree(tree, opts?.resolveOpts);
283
+ }
284
+ catch {
285
+ return tree;
286
+ }
287
+ }
288
+ //# sourceMappingURL=runtime-pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"runtime-pipeline.js","sourceRoot":"","sources":["../../pipeline/runtime-pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAGH,OAAO,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAC/C,OAAO,EAAE,mBAAmB,EAAE,MAAM,6BAA6B,CAAA;AAcjE;;;;GAIG;AACH,MAAM,gBAAgB,GAAwB,IAAI,GAAG,CAAC;IACrD,QAAQ;IACR,SAAS;IACT,aAAa;IACb,aAAa;IACb,aAAa;IACb,aAAa;IACb,aAAa;CACb,CAAC,CAAA;AAEF,SAAS,gBAAgB,CAAC,MAAc;IACvC,OAAO,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;AACpC,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAuB;IACnD,OAAO,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;AACpC,CAAC;AAED,sEAAsE;AACtE,SAAS,iBAAiB,CAAC,GAAW,EAAE,IAA0B;IACjE,OAAO,EAAE,GAAG,EAAE,UAAU,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,EAAE,CAAA;AAC7D,CAAC;AAED,yEAAyE;AACzE,SAAS,eAAe;IACvB,OAAO,EAAE,YAAY,EAAE,EAAE,EAAE,CAAA;AAC5B,CAAC;AAED,iFAAiF;AACjF,KAAK,UAAU,mBAAmB,CACjC,MAA2B,EAC3B,MAAsB,EACtB,IAA2B;IAE3B,MAAM,MAAM,GAAG,IAAI,EAAE,IAAI,IAAI,KAAK,CAAA;IAClC,OAAO;QACN,MAAM;QACN,UAAU,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG;QAClC,YAAY,EAAE,EAAE;QAChB,MAAM,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU;KAC1C,CAAA;AACF,CAAC;AAED,uGAAuG;AACvG,KAAK,UAAU,mBAAmB,CACjC,MAA2B,EAC3B,MAAsB,EACtB,OAAmB;IAEnB,OAAO;QACN,IAAI,EAAE,oBAAoB;QAC1B,UAAU,EAAE,GAAG;QACf,YAAY,EAAE,EAAE;KAChB,CAAA;AACF,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,IAAqB,EAAE,KAAqB,EAAE,IAAmB;IACzF,IAAI,IAAI,EAAE,iBAAiB;QAAE,OAAO,KAAK,CAAA;IACzC,IAAI,IAAI,CAAC,UAAU,GAAG,IAAI;QAAE,OAAO,KAAK,CAAA;IACxC,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAA;IACpD,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO,CAAC,KAAK,CAAC,WAAW,IAAI,QAAQ,CAAC,IAAI,EAAE,IAAI,KAAK,CAAC,cAAc,KAAK,OAAO,CAAA;IACjF,CAAC;IACD,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,IAAqB,EAAE,KAAqB;IACpF,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,gBAAgB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QACtE,IAAI,GAAG,EAAE,CAAC;YACT,OAAO;gBACN,GAAG,EAAE,IAAI;gBACT,KAAK,EAAE;oBACN;wBACC,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC;wBAC/C,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,KAAK;wBACrB,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,GAAG;wBACjB,UAAU,EAAE,GAAG,CAAC,UAAU;wBAC1B,QAAQ,EAAE,EAAE;wBACZ,MAAM,EAAE,aAAa;wBACrB,QAAQ,EAAE,GAAG,CAAC,MAAM;qBACpB;iBACD;aACD,CAAA;QACF,CAAC;IACF,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;QACnC,OAAO;YACN,GAAG,EAAE,IAAI;YACT,KAAK,EAAE;gBACN;oBACC,GAAG,EAAE,UAAU;oBACf,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;oBAClB,KAAK,EAAE,CAAC;oBACR,GAAG,EAAE,IAAI,CAAC,MAAM;oBAChB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,QAAQ,EAAE,EAAE;oBACZ,MAAM,EAAE,aAAa;oBACrB,QAAQ,EAAE,oBAAoB;iBAC9B;aACD;SACD,CAAA;IACF,CAAC;IACD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;AAChC,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAChC,GAAW,EACX,MAA6B,EAC7B,IAAmB;IAEnB,MAAM,MAAM,GAA2B,EAAE,CAAA;IACzC,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAE5B,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,iBAAiB,CAAA;IACvD,MAAM,iBAAiB,GAAG,MAAM,CAAC,iBAAiB,IAAI,eAAe,CAAA;IACrE,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,CAAA;IAC/D,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,CAAA;IAE/D,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,UAAU,GAAG,SAAS,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IAC3D,MAAM,CAAC,WAAW,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,EAAE,CAAA;IAE5C,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAC7B,MAAM,UAAU,GAAG,iBAAiB,CAAC,UAAU,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IAC1E,MAAM,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,GAAG,CAAA;IAE/C,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IACjC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAA;IACjF,MAAM,CAAC,aAAa,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,OAAO,CAAA;IAEnD,cAAc,CAAC,IAAI,CAAC,CAAA;IACpB,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;IAC/B,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;IAC/D,MAAM,CAAC,iBAAiB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,KAAK,CAAA;IAErD,uFAAuF;IACvF,2FAA2F;IAC3F,6FAA6F;IAC7F,eAAe;IACf,IAAI,eAAe,CAAC,IAAI,EAAE,UAAU,EAAE,IAAI,CAAC,EAAE,CAAC;QAC7C,IAAI,IAAI,GAAG,iBAAiB,CAAC,UAAU,CAAC,UAAU,EAAE,IAAI,EAAE,UAAU,CAAC,CAAA;QACrE,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACrB,cAAc,CAAC,IAAI,CAAC,CAAA;YACpB,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;YAClC,IAAI,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;YACrD,MAAM,CAAC,SAAS,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAA;QACjD,CAAC;QACD,OAAO;YACN,KAAK,EAAE,GAAG;YACV,UAAU;YACV,UAAU;YACV,MAAM;YACN,IAAI;YACJ,eAAe,EAAE,EAAE;YACnB,IAAI;YACJ,MAAM;YACN,IAAI,EAAE,WAAW;SACjB,CAAA;IACF,CAAC;IAED,iBAAiB;IACjB,6FAA6F;IAC7F,gGAAgG;IAChG,IAAI,eAAe,GAAqB,EAAE,CAAA;IAC1C,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACzB,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAChC,eAAe,GAAG,MAAM,gBAAgB,CAAC,MAAM,CAAC,YAAY,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,CAAC,CAAA;QAC7F,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,MAAM,CAAA;IACtD,CAAC;IAED,IAAI,IAAI,GAAgB,EAAE,GAAG,EAAE,UAAU,CAAC,UAAU,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IAEjE,6FAA6F;IAC7F,8FAA8F;IAC9F,MAAM,iBAAiB,GACtB,IAAI,EAAE,mBAAmB;QACzB,eAAe,CAAC,MAAM,GAAG,CAAC;QAC1B,MAAM,CAAC,UAAU;QACjB,iBAAiB,IAAI,MAAM,CAAC,UAAU,CAAA;IAEvC,IAAI,iBAAiB,EAAE,CAAC;QACvB,MAAM,oBAAoB,GAAG,MAAM,CAAC,UAKnC,CAAA;QAED,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QACnC,MAAM,EACL,IAAI,EAAE,UAAU,EAChB,MAAM,EACN,MAAM,GACN,GAAG,MAAM,oBAAoB,CAAC,eAAe,CAAC,UAAU,CAAC,UAAU,EAAE,EAAE,UAAU,EAAE,CAAC,CAAA;QACrF,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;QAExD,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAEpC,6FAA6F;QAC7F,gGAAgG;QAChG,MAAM,MAAM,GACX,QAAQ,IAAI,oBAAoB,CAAC,CAAC,CAAE,oBAAiE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAA;QAElH,MAAM,cAAc,GAAG,mBAAmB,CACzC,MAAM,EACN,MAAM,EACN,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,EACtE,EAAE,MAAM,EAAE,CACV,CAAA;QAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC/B,MAAM,MAAM,GAAG,cAAc,CAAC;gBAC7B,GAAG,EAAE,UAAU,CAAC,UAAU;gBAC1B,eAAe;gBACf,cAAc;aACd,CAAC,CAAA;YACF,IAAI,GAAG,MAAM,CAAC,IAAI,CAAA;QACnB,CAAC;aAAM,CAAC;YACP,IAAI,GAAG,UAAU,CAAA;QAClB,CAAC;QACD,MAAM,CAAC,WAAW,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,UAAU,CAAA;IACrD,CAAC;SAAM,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;QAC9B,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QACnC,IAAI,GAAG,MAAM,YAAY,CAAC,MAAM,CAAC,UAAU,EAAE,UAAU,CAAC,UAAU,EAAE,UAAU,CAAC,CAAA;QAC/E,MAAM,CAAC,gBAAgB,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAA;IACzD,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;QACrB,cAAc,CAAC,IAAI,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,EAAE,CAAA;QAClC,IAAI,GAAG,MAAM,WAAW,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACrD,MAAM,CAAC,SAAS,CAAC,GAAG,WAAW,CAAC,GAAG,EAAE,GAAG,QAAQ,CAAA;IACjD,CAAC;IAED,OAAO;QACN,KAAK,EAAE,GAAG;QACV,UAAU;QACV,UAAU;QACV,MAAM;QACN,IAAI;QACJ,eAAe;QACf,IAAI;QACJ,MAAM;QACN,IAAI,EAAE,MAAM;KACZ,CAAA;AACF,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CAAC,IAAmB;IAC1C,IAAI,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;QAC3B,MAAM,IAAI,CAAC,MAAM,CAAC,MAAM,IAAI,IAAI,YAAY,CAAC,kBAAkB,EAAE,YAAY,CAAC,CAAA;IAC/E,CAAC;AACF,CAAC;AAED,wGAAwG;AACxG,KAAK,UAAU,YAAY,CAC1B,UAA6B,EAC7B,IAAY,EACZ,UAA0B;IAE1B,IAAI,CAAC;QACJ,OAAO,MAAM,UAAU,CAAC,KAAK,CAAC,IAAI,EAAE,EAAE,UAAU,EAAE,CAAC,CAAA;IACpD,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;IAChC,CAAC;AACF,CAAC;AAED,6FAA6F;AAC7F,KAAK,UAAU,gBAAgB,CAC9B,YAAgE,EAChE,UAA+B,EAC/B,KAAqB,EACrB,MAAkB;IAElB,IAAI,CAAC;QACJ,OAAO,MAAM,YAAY,CAAC,UAAU,EAAE,KAAK,EAAE,MAAM,CAAC,CAAA;IACrD,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,EAAE,CAAA;IACV,CAAC;AACF,CAAC;AAED,+EAA+E;AAC/E,KAAK,UAAU,WAAW,CACzB,QAAwD,EACxD,IAAiB,EACjB,IAAmB;IAEnB,IAAI,CAAC;QACJ,OAAO,MAAM,QAAQ,CAAC,WAAW,CAAC,IAAI,EAAE,IAAI,EAAE,WAAW,CAAC,CAAA;IAC3D,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,IAAI,CAAA;IACZ,CAAC;AACF,CAAC"}
@@ -0,0 +1,57 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Per-span logit aggregation — Option C from the DeepSeek synthesis review.
7
+ *
8
+ * Takes the per-token logits already emitted by the ONNX model (currently discarded after argmax in
9
+ * `classifier.ts`) and aggregates them over phrase-grouper spans to produce per-span top-K tag
10
+ * candidates. These feed directly into `reconcileSpans` as the `classifierTopK` input.
11
+ *
12
+ * Why this instead of a sequence-level beam decoder: the reconciler takes `(span, tag, score)`
13
+ * triples — per-span confidence, not BIO-sequence-level confidence. The phrase grouper has
14
+ * already done boundary discovery. This module answers "given these boundaries, what tags does
15
+ * the classifier think each span is?" — which is the right abstraction for joint decoding.
16
+ *
17
+ * Code path matches the eventual production runtime: when a top-k-trained classifier exists, the TS
18
+ * runtime just swaps "per-token softmax aggregation" for "classifier's native top-k API." Same
19
+ * downstream contract.
20
+ */
21
+ import type { ClassifierCandidate } from "./reconcile.js";
22
+ /**
23
+ * A token piece with character-level offsets into the original text.
24
+ */
25
+ export interface TokenPiece {
26
+ start: number;
27
+ end: number;
28
+ }
29
+ /**
30
+ * A span proposal from the phrase grouper, in character offsets.
31
+ */
32
+ export interface SpanBounds {
33
+ start: number;
34
+ end: number;
35
+ }
36
+ /**
37
+ * Given per-token logits and phrase-grouper spans, produce per-span top-K tag candidates.
38
+ *
39
+ * For each span, finds the tokens whose character ranges overlap the span, sums their softmax
40
+ * probabilities per-tag, normalizes, and emits the top K tags with their aggregated scores.
41
+ *
42
+ * BIO prefix stripping: the model emits BIO labels (`B-locality`, `I-locality`, etc.) but the
43
+ * reconciler works with component tags (`locality`). This function strips the `B-`/`I-` prefix and
44
+ * merges probabilities: `score(locality) = sum(score(B-locality) + score(I-locality))` across the
45
+ * span's tokens.
46
+ *
47
+ * @param logits Per-token logits from ONNX inference, shape `[seqLen][numLabels]`.
48
+ * @param pieces Token pieces with character-level offsets (from the tokenizer's `encode`).
49
+ * @param spans Phrase-grouper span proposals in character offsets.
50
+ * @param opts Options — `topK` (default 3), `labels` (required — the BIO label vocabulary the model
51
+ * emits, e.g. `["O", "B-locality", "I-locality", ...]`).
52
+ */
53
+ export declare function aggregateSpanLogits(logits: number[][], pieces: readonly TokenPiece[], spans: readonly SpanBounds[], opts: {
54
+ topK?: number;
55
+ labels: readonly string[];
56
+ }): ClassifierCandidate[];
57
+ //# sourceMappingURL=span-logit-aggregation.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"span-logit-aggregation.d.ts","sourceRoot":"","sources":["../../pipeline/span-logit-aggregation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAGH,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAA;AAEzD;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAgB,mBAAmB,CAClC,MAAM,EAAE,MAAM,EAAE,EAAE,EAClB,MAAM,EAAE,SAAS,UAAU,EAAE,EAC7B,KAAK,EAAE,SAAS,UAAU,EAAE,EAC5B,IAAI,EAAE;IAAE,IAAI,CAAC,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAA;CAAE,GAChD,mBAAmB,EAAE,CAiDvB"}
@@ -0,0 +1,105 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Per-span logit aggregation — Option C from the DeepSeek synthesis review.
7
+ *
8
+ * Takes the per-token logits already emitted by the ONNX model (currently discarded after argmax in
9
+ * `classifier.ts`) and aggregates them over phrase-grouper spans to produce per-span top-K tag
10
+ * candidates. These feed directly into `reconcileSpans` as the `classifierTopK` input.
11
+ *
12
+ * Why this instead of a sequence-level beam decoder: the reconciler takes `(span, tag, score)`
13
+ * triples — per-span confidence, not BIO-sequence-level confidence. The phrase grouper has
14
+ * already done boundary discovery. This module answers "given these boundaries, what tags does
15
+ * the classifier think each span is?" — which is the right abstraction for joint decoding.
16
+ *
17
+ * Code path matches the eventual production runtime: when a top-k-trained classifier exists, the TS
18
+ * runtime just swaps "per-token softmax aggregation" for "classifier's native top-k API." Same
19
+ * downstream contract.
20
+ */
21
+ /**
22
+ * Given per-token logits and phrase-grouper spans, produce per-span top-K tag candidates.
23
+ *
24
+ * For each span, finds the tokens whose character ranges overlap the span, sums their softmax
25
+ * probabilities per-tag, normalizes, and emits the top K tags with their aggregated scores.
26
+ *
27
+ * BIO prefix stripping: the model emits BIO labels (`B-locality`, `I-locality`, etc.) but the
28
+ * reconciler works with component tags (`locality`). This function strips the `B-`/`I-` prefix and
29
+ * merges probabilities: `score(locality) = sum(score(B-locality) + score(I-locality))` across the
30
+ * span's tokens.
31
+ *
32
+ * @param logits Per-token logits from ONNX inference, shape `[seqLen][numLabels]`.
33
+ * @param pieces Token pieces with character-level offsets (from the tokenizer's `encode`).
34
+ * @param spans Phrase-grouper span proposals in character offsets.
35
+ * @param opts Options — `topK` (default 3), `labels` (required — the BIO label vocabulary the model
36
+ * emits, e.g. `["O", "B-locality", "I-locality", ...]`).
37
+ */
38
+ export function aggregateSpanLogits(logits, pieces, spans, opts) {
39
+ const topK = opts.topK ?? 3;
40
+ const labels = opts.labels;
41
+ const candidates = [];
42
+ for (const span of spans) {
43
+ // Find tokens overlapping this span (character-level).
44
+ const overlapping = [];
45
+ for (let t = 0; t < pieces.length; t++) {
46
+ const p = pieces[t];
47
+ if (p.end <= span.start)
48
+ continue;
49
+ if (p.start >= span.end)
50
+ break;
51
+ overlapping.push(t);
52
+ }
53
+ if (overlapping.length === 0)
54
+ continue;
55
+ // Aggregate softmax probabilities per component tag (strip BIO prefix).
56
+ const tagScores = new Map();
57
+ for (const t of overlapping) {
58
+ const probs = softmax(logits[t]);
59
+ for (let l = 0; l < labels.length; l++) {
60
+ const bioLabel = labels[l];
61
+ const tag = stripBioPrefix(bioLabel);
62
+ if (tag === "O")
63
+ continue;
64
+ const prev = tagScores.get(tag) ?? 0;
65
+ tagScores.set(tag, prev + probs[l]);
66
+ }
67
+ }
68
+ // Normalize by number of overlapping tokens so longer spans don't auto-win.
69
+ const norm = overlapping.length;
70
+ const sorted = [...tagScores.entries()]
71
+ .map(([tag, score]) => ({ tag, score: score / norm }))
72
+ .sort((a, b) => b.score - a.score)
73
+ .slice(0, topK);
74
+ for (const { tag, score } of sorted) {
75
+ candidates.push({
76
+ span: { start: span.start, end: span.end },
77
+ tag: tag,
78
+ score,
79
+ });
80
+ }
81
+ }
82
+ return candidates;
83
+ }
84
+ /**
85
+ * Strip `B-` or `I-` prefix from a BIO label, returning the component tag. `O` returns `"O"`.
86
+ */
87
+ function stripBioPrefix(label) {
88
+ if (label === "O")
89
+ return "O";
90
+ const dash = label.indexOf("-");
91
+ if (dash === -1)
92
+ return label;
93
+ return label.slice(dash + 1);
94
+ }
95
+ /** Numerically stable softmax over a row of logits. */
96
+ function softmax(row) {
97
+ let max = row[0];
98
+ for (let i = 1; i < row.length; i++)
99
+ if (row[i] > max)
100
+ max = row[i];
101
+ const exps = row.map((v) => Math.exp(v - max));
102
+ const sum = exps.reduce((a, b) => a + b, 0);
103
+ return exps.map((e) => e / sum);
104
+ }
105
+ //# sourceMappingURL=span-logit-aggregation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"span-logit-aggregation.js","sourceRoot":"","sources":["../../pipeline/span-logit-aggregation.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAqBH;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,UAAU,mBAAmB,CAClC,MAAkB,EAClB,MAA6B,EAC7B,KAA4B,EAC5B,IAAkD;IAElD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,CAAA;IAC3B,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAA;IAE1B,MAAM,UAAU,GAA0B,EAAE,CAAA;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,uDAAuD;QACvD,MAAM,WAAW,GAAa,EAAE,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,IAAI,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK;gBAAE,SAAQ;YACjC,IAAI,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;gBAAE,MAAK;YAC9B,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpB,CAAC;QAED,IAAI,WAAW,CAAC,MAAM,KAAK,CAAC;YAAE,SAAQ;QAEtC,wEAAwE;QACxE,MAAM,SAAS,GAAG,IAAI,GAAG,EAAkB,CAAA;QAE3C,KAAK,MAAM,CAAC,IAAI,WAAW,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;gBAC3B,MAAM,GAAG,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAA;gBACpC,IAAI,GAAG,KAAK,GAAG;oBAAE,SAAQ;gBACzB,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;gBACpC,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,CAAA;YACrC,CAAC;QACF,CAAC;QAED,4EAA4E;QAC5E,MAAM,IAAI,GAAG,WAAW,CAAC,MAAM,CAAA;QAC/B,MAAM,MAAM,GAAG,CAAC,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;aACrC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,EAAE,CAAC,CAAC;aACrD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC;aACjC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,CAAA;QAEhB,KAAK,MAAM,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,MAAM,EAAE,CAAC;YACrC,UAAU,CAAC,IAAI,CAAC;gBACf,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,GAAG,EAAE,IAAI,CAAC,GAAG,EAAE;gBAC1C,GAAG,EAAE,GAAmB;gBACxB,KAAK;aACL,CAAC,CAAA;QACH,CAAC;IACF,CAAC;IAED,OAAO,UAAU,CAAA;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,KAAa;IACpC,IAAI,KAAK,KAAK,GAAG;QAAE,OAAO,GAAG,CAAA;IAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;IAC/B,IAAI,IAAI,KAAK,CAAC,CAAC;QAAE,OAAO,KAAK,CAAA;IAC7B,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAA;AAC7B,CAAC;AAED,uDAAuD;AACvD,SAAS,OAAO,CAAC,GAAsB;IACtC,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,GAAG;YAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACrE,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAA;IAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;AAChC,CAAC"}
@@ -0,0 +1,189 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Types for the runtime pipeline coordinator (`runPipeline`).
7
+ *
8
+ * Generic over its stage implementations — each stage is an injected function or class, defined
9
+ * structurally. Keeps `@mailwoman/core` free of dependencies on the concrete neural / normalize /
10
+ * query-shape / resolver packages while still composing them at runtime when callers wire them
11
+ * up.
12
+ *
13
+ * See `docs/articles/plan/reference/STAGES.md` for the full contract this implements.
14
+ */
15
+ import type { AddressTree } from "../decoder/types.js";
16
+ import type { ResolveOpts, Resolver } from "../resolver/types.js";
17
+ import type { Section } from "../types/classifier.js";
18
+ export type LocaleTag = string;
19
+ /** Optional user-location signal for Stage 6 resolver scoring. */
20
+ export type UserLocation = {
21
+ lat: number;
22
+ lon: number;
23
+ } | {
24
+ country: string;
25
+ } | {
26
+ region: string;
27
+ country: string;
28
+ };
29
+ /** Common opts threaded through every stage. */
30
+ export interface PipelineOpts {
31
+ locale?: LocaleTag;
32
+ userLocation?: UserLocation;
33
+ /** Disable fast-path shortcuts; always run the full pipeline. */
34
+ forceFullPipeline?: boolean;
35
+ /**
36
+ * Enable the joint-reconcile path (Stage 5 beam search over candidates). When set, the pipeline:
37
+ *
38
+ * 1. Aggregates per-token logits over phrase-grouper spans (per-span top-K tag candidates).
39
+ * 2. Feeds candidates into `reconcileSpans` for joint-coherence scoring.
40
+ * 3. Returns the reconciled tree instead of the argmax tree.
41
+ *
42
+ * Requires both a phrase grouper AND a classifier that exposes raw logits (the standard
43
+ * `NeuralAddressClassifier` does via its `parseWithLogits` method). Falls back to argmax if
44
+ * either is missing.
45
+ */
46
+ forceJointReconcile?: boolean;
47
+ /** Hard cap on lookups the resolver may issue; passed through. */
48
+ resolveOpts?: ResolveOpts;
49
+ signal?: AbortSignal;
50
+ }
51
+ /** Minimal structural shape `NormalizedInput` must satisfy. Compatible with @mailwoman/normalize. */
52
+ export interface NormalizedInputLite {
53
+ raw: string;
54
+ normalized: string;
55
+ appliedLocale?: string;
56
+ }
57
+ /** Minimal structural shape `QueryShape` must satisfy. Compatible with @mailwoman/query-shape. */
58
+ export interface QueryShapeLite {
59
+ knownFormats: ReadonlyArray<{
60
+ format: string;
61
+ span: {
62
+ start: number;
63
+ end: number;
64
+ };
65
+ confidence: number;
66
+ }>;
67
+ segments?: ReadonlyArray<{
68
+ body: string;
69
+ index: number;
70
+ }>;
71
+ characterClass?: string;
72
+ totalLength?: number;
73
+ }
74
+ /** Detected (or asserted) locale + alternatives. */
75
+ export interface LocaleHint {
76
+ locale: LocaleTag;
77
+ confidence: number;
78
+ alternatives: ReadonlyArray<{
79
+ locale: LocaleTag;
80
+ confidence: number;
81
+ }>;
82
+ source: "caller" | "detected" | "ensemble";
83
+ }
84
+ /** Kind classifier output. */
85
+ export type QueryKind = "postcode_only" | "locality_only" | "structured_address" | "intersection" | "po_box" | "landmark" | "vague";
86
+ export interface QueryKindResult {
87
+ kind: QueryKind;
88
+ confidence: number;
89
+ alternatives: ReadonlyArray<{
90
+ kind: QueryKind;
91
+ confidence: number;
92
+ }>;
93
+ }
94
+ /**
95
+ * Stage 2.7 phrase grouper output. Coarse phrase-shape hypothesis attached to a `Section` (sub-Span
96
+ * of the tokenized input). The classifier (Stage 3) conditions on these proposals so it can answer
97
+ * the simpler "what type is this proposed span?" instead of jointly discovering boundaries and
98
+ * types. The reconciler (Stage 5) consumes them as boundary candidates for joint decoding.
99
+ *
100
+ * Taxonomy is purely structural — no place-name knowledge. A `LOCALITY_PHRASE` proposal is "this
101
+ * looks shaped like a multi-word capitalized phrase that could be a city name" — not "this IS New
102
+ * York." Typing the span is the classifier's job.
103
+ *
104
+ * See `docs/articles/concepts/the-knowledge-ladder.md` § Phrase grouper for the design rationale.
105
+ */
106
+ export type PhraseKind = "NUMERIC" | "STREET_PHRASE" | "LOCALITY_PHRASE" | "REGION_ABBREVIATION" | "POSTCODE" | "VENUE_PHRASE" | "HYPHENATED_COMPOUND";
107
+ /**
108
+ * One phrase proposal emitted by Stage 2.7. The contract:
109
+ *
110
+ * - `span`: the input slice (sub-Span of the tokenized input) the proposal applies to.
111
+ * - `kindHypothesis`: structural shape this slice looks like.
112
+ * - `confidence`: 0..1 score. Used by downstream stages to weight proposals.
113
+ *
114
+ * Per "possibilities not constraints", emit a proposal whenever a rule fires — overlapping
115
+ * proposals over the same tokens are expected (e.g. `Saint Petersburg` may surface as one
116
+ * `LOCALITY_PHRASE` AND two `LOCALITY_PHRASE`s, with confidence ordering signalling which the
117
+ * grouper prefers).
118
+ */
119
+ export interface PhraseProposal {
120
+ span: Section;
121
+ kindHypothesis: PhraseKind;
122
+ confidence: number;
123
+ }
124
+ /**
125
+ * Stage 2.7 contract. Structural — any of the rule-based grouper (`@mailwoman/phrase-grouper`), a
126
+ * learned span proposer (future), or a fake for tests satisfies this. Async so the coordinator can
127
+ * stay uniform even when implementations call into models.
128
+ */
129
+ export interface PhraseGrouper {
130
+ group(input: NormalizedInputLite, shape: QueryShapeLite, locale: LocaleHint): Promise<PhraseProposal[]>;
131
+ }
132
+ /**
133
+ * Stage 3 contract: classifier that turns a text into an `AddressTree`. Structural — any of
134
+ * `@mailwoman/neural`'s `NeuralAddressClassifier`, a rule-based classifier, or a fake for tests
135
+ * satisfies this.
136
+ */
137
+ export interface AddressClassifier {
138
+ parse(text: string, opts?: {
139
+ queryShape?: QueryShapeLite;
140
+ }): Promise<AddressTree>;
141
+ }
142
+ /**
143
+ * Injectable stage implementations. All optional — when a stage is absent, the coordinator either
144
+ * skips it (resolver) or substitutes a no-op stub (normalize / queryShape / locale gate / kind
145
+ * classifier). The classifier is required for the full pipeline path; without it, the coordinator
146
+ * can only fast-path on QueryShape known-formats.
147
+ */
148
+ export interface RuntimePipelineStages {
149
+ normalize?: (raw: string, opts?: {
150
+ locale?: string;
151
+ }) => NormalizedInputLite;
152
+ computeQueryShape?: (input: NormalizedInputLite | string, opts?: {
153
+ locale?: string;
154
+ }) => QueryShapeLite;
155
+ detectLocale?: (input: NormalizedInputLite, shape: QueryShapeLite, opts?: {
156
+ hint?: LocaleTag;
157
+ }) => Promise<LocaleHint>;
158
+ classifyKind?: (input: NormalizedInputLite, shape: QueryShapeLite, locale: LocaleHint) => Promise<QueryKindResult>;
159
+ /**
160
+ * Stage 2.7 phrase grouper. Emits coherent input-unit proposals consumed by Stage 3 (as
161
+ * conditioning) and Stage 5 (as boundary candidates). Hard dep in v0.5.0; pre-v0.5.0 callers run
162
+ * with no grouper and the result `phraseProposals` field is empty.
163
+ */
164
+ groupPhrases?: (input: NormalizedInputLite, shape: QueryShapeLite, locale: LocaleHint) => Promise<PhraseProposal[]>;
165
+ classifier?: AddressClassifier;
166
+ resolver?: Resolver;
167
+ }
168
+ export interface PipelineTiming {
169
+ [stage: string]: number;
170
+ }
171
+ /** Result of one `runPipeline` call. */
172
+ export interface PipelineResult {
173
+ input: string;
174
+ normalized: NormalizedInputLite;
175
+ queryShape: QueryShapeLite;
176
+ locale: LocaleHint;
177
+ kind: QueryKindResult;
178
+ /**
179
+ * Stage 2.7 phrase proposals when a grouper was wired. Empty array when the coordinator ran with
180
+ * no grouper (pre-v0.5.0 callers) or when the fast-path skipped Stage 2.7. Stage 3 consumes this
181
+ * as conditioning; Stage 5 consumes it as boundary candidates.
182
+ */
183
+ phraseProposals: PhraseProposal[];
184
+ tree: AddressTree;
185
+ timing: PipelineTiming;
186
+ /** Which path the coordinator took. `"fast-path"` skipped stages 3-5. */
187
+ path: "fast-path" | "full";
188
+ }
189
+ //# sourceMappingURL=types.d.ts.map