@mailwoman/neural 4.4.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/out/address-system.d.ts +6 -6
- package/out/address-system.js +6 -6
- package/out/anchor-inference.d.ts +4 -2
- package/out/anchor-inference.d.ts.map +1 -1
- package/out/anchor-inference.js +3 -1
- package/out/anchor-inference.js.map +1 -1
- package/out/browser.d.ts +1 -0
- package/out/browser.d.ts.map +1 -1
- package/out/browser.js +5 -0
- package/out/browser.js.map +1 -1
- package/out/classifier.d.ts +49 -13
- package/out/classifier.d.ts.map +1 -1
- package/out/classifier.js +31 -5
- package/out/classifier.js.map +1 -1
- package/out/gazetteer-inference.d.ts +17 -15
- package/out/gazetteer-inference.d.ts.map +1 -1
- package/out/gazetteer-inference.js +16 -14
- package/out/gazetteer-inference.js.map +1 -1
- package/out/index.d.ts +2 -0
- package/out/index.d.ts.map +1 -1
- package/out/index.js +2 -0
- package/out/index.js.map +1 -1
- package/out/onnx-runner.d.ts +2 -2
- package/out/onnx-runner.d.ts.map +1 -1
- package/out/onnx-runner.js +3 -2
- package/out/onnx-runner.js.map +1 -1
- package/out/span-bridge.d.ts +29 -13
- package/out/span-bridge.d.ts.map +1 -1
- package/out/span-bridge.js +33 -18
- package/out/span-bridge.js.map +1 -1
- package/out/span-proposal-prior.d.ts +59 -0
- package/out/span-proposal-prior.d.ts.map +1 -0
- package/out/span-proposal-prior.js +93 -0
- package/out/span-proposal-prior.js.map +1 -0
- package/out/span-proposer-lexicon.d.ts +27 -0
- package/out/span-proposer-lexicon.d.ts.map +1 -0
- package/out/span-proposer-lexicon.js +120 -0
- package/out/span-proposer-lexicon.js.map +1 -0
- package/package.json +3 -3
package/out/address-system.d.ts
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*
|
|
6
|
-
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head never
|
|
7
|
+
* had). The PR3 self-conditioning head predicts which country an address belongs to from the
|
|
8
|
+
* pooled sequence; v1.1.0+ exports surface it as the `locale_logits` ONNX output. This module
|
|
9
9
|
* turns that posterior into a `SystemCode` the conventions layer can act on.
|
|
10
10
|
*
|
|
11
|
-
* Conservative by contract: below the confidence threshold, or for locales without a codex
|
|
12
|
-
*
|
|
13
|
-
*
|
|
11
|
+
* Conservative by contract: below the confidence threshold, or for locales without a codex system
|
|
12
|
+
* slice, detection returns null and the parse proceeds exactly as before. The mask must never
|
|
13
|
+
* fire on a guess.
|
|
14
14
|
*/
|
|
15
15
|
import type { SystemCode } from "@mailwoman/codex";
|
|
16
16
|
/**
|
package/out/address-system.js
CHANGED
|
@@ -3,14 +3,14 @@
|
|
|
3
3
|
* @license AGPL-3.0
|
|
4
4
|
* @author Teffen Ellis, et al.
|
|
5
5
|
*
|
|
6
|
-
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Address-system detection from the model's locale head (#511 Tier A — the consumer the head never
|
|
7
|
+
* had). The PR3 self-conditioning head predicts which country an address belongs to from the
|
|
8
|
+
* pooled sequence; v1.1.0+ exports surface it as the `locale_logits` ONNX output. This module
|
|
9
9
|
* turns that posterior into a `SystemCode` the conventions layer can act on.
|
|
10
10
|
*
|
|
11
|
-
* Conservative by contract: below the confidence threshold, or for locales without a codex
|
|
12
|
-
*
|
|
13
|
-
*
|
|
11
|
+
* Conservative by contract: below the confidence threshold, or for locales without a codex system
|
|
12
|
+
* slice, detection returns null and the parse proceeds exactly as before. The mask must never
|
|
13
|
+
* fire on a guess.
|
|
14
14
|
*/
|
|
15
15
|
import { softmax } from "./viterbi.js";
|
|
16
16
|
/**
|
|
@@ -37,11 +37,13 @@ export type AnchorLookup = Map<string, AnchorEntry>;
|
|
|
37
37
|
*/
|
|
38
38
|
export declare function anchorFeatureVector(posterior: Record<string, number>, lat: number, lon: number): number[];
|
|
39
39
|
/**
|
|
40
|
-
* Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon]}`) into a
|
|
40
|
+
* Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon, source?]}`) into a
|
|
41
|
+
* Map. The optional trailing `source` is the centroid's provenance label (#525 — `"wof"`,
|
|
42
|
+
* `"census-zcta-2024"`, or `null` for a placeholder); build-side bookkeeping, ignored at inference.
|
|
41
43
|
* Pure (takes the parsed object, not a path) so this module stays browser-safe — the file read
|
|
42
44
|
* lives in the Node-side caller (the eval).
|
|
43
45
|
*/
|
|
44
|
-
export declare function parseAnchorLookup(raw: Record<string, [Record<string, number>, number, number]>): AnchorLookup;
|
|
46
|
+
export declare function parseAnchorLookup(raw: Record<string, [Record<string, number>, number, number, (string | null)?]>): AnchorLookup;
|
|
45
47
|
/**
|
|
46
48
|
* Per-piece anchor features + confidence for `text`, projected onto its SP `pieces` by the SAME
|
|
47
49
|
* char→piece rule the labels use (a piece takes the anchor of the postcode span its first
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"anchor-inference.d.ts","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,YAAY,iEAAkE,CAAA;AAE3F,6EAA6E;AAC7E,eAAO,MAAM,kBAAkB,QAA0B,CAAA;AAEzD,mGAAmG;AACnG,MAAM,WAAW,WAAW;IAC3B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,MAAM,YAAY,GAAG,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;AAEnD;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAgBzG;AAED
|
|
1
|
+
{"version":3,"file":"anchor-inference.d.ts","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,YAAY,iEAAkE,CAAA;AAE3F,6EAA6E;AAC7E,eAAO,MAAM,kBAAkB,QAA0B,CAAA;AAEzD,mGAAmG;AACnG,MAAM,WAAW,WAAW;IAC3B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IACjC,GAAG,EAAE,MAAM,CAAA;IACX,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,MAAM,YAAY,GAAG,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;AAEnD;;;;GAIG;AACH,wBAAgB,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,MAAM,EAAE,CAgBzG;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAChC,GAAG,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAC7E,YAAY,CAId;AAED;;;;;;;;GAQG;AACH,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,aAAa,CAAC,cAAc,CAAC,EACrC,MAAM,EAAE,YAAY,GAClB;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CA0BhD"}
|
package/out/anchor-inference.js
CHANGED
|
@@ -46,7 +46,9 @@ export function anchorFeatureVector(posterior, lat, lon) {
|
|
|
46
46
|
return vec;
|
|
47
47
|
}
|
|
48
48
|
/**
|
|
49
|
-
* Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon]}`) into a
|
|
49
|
+
* Parse the pilot postcode→anchor lookup JSON (`{postcode: [posterior, lat, lon, source?]}`) into a
|
|
50
|
+
* Map. The optional trailing `source` is the centroid's provenance label (#525 — `"wof"`,
|
|
51
|
+
* `"census-zcta-2024"`, or `null` for a placeholder); build-side bookkeeping, ignored at inference.
|
|
50
52
|
* Pure (takes the parsed object, not a path) so this module stays browser-safe — the file read
|
|
51
53
|
* lives in the Node-side caller (the eval).
|
|
52
54
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"anchor-inference.js","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU,CAAA;AAE3F,6EAA6E;AAC7E,MAAM,CAAC,MAAM,kBAAkB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAA;AAWzD;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,SAAiC,EAAE,GAAW,EAAE,GAAW;IAC9F,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzD,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3D,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,WAAW,EAAmC,CAAC,CAAA;QACxF,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC;YACd,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAAA;YACjB,KAAK,IAAI,MAAM,CAAA;QAChB,CAAC;IACF,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAE,IAAI,KAAK,CAAA;IAC/D,CAAC;IACD,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC,CAAA;IAC9D,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAA;IACnE,OAAO,GAAG,CAAA;AACX,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"anchor-inference.js","sourceRoot":"","sources":["../anchor-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAU,CAAA;AAE3F,6EAA6E;AAC7E,MAAM,CAAC,MAAM,kBAAkB,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAA;AAWzD;;;;GAIG;AACH,MAAM,UAAU,mBAAmB,CAAC,SAAiC,EAAE,GAAW,EAAE,GAAW;IAC9F,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACzD,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;QAC3D,MAAM,GAAG,GAAG,YAAY,CAAC,OAAO,CAAC,OAAO,CAAC,WAAW,EAAmC,CAAC,CAAA;QACxF,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC;YACd,GAAG,CAAC,GAAG,CAAC,GAAG,MAAM,CAAA;YACjB,KAAK,IAAI,MAAM,CAAA;QAChB,CAAC;IACF,CAAC;IACD,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;QACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAE,IAAI,KAAK,CAAA;IAC/D,CAAC;IACD,GAAG,CAAC,YAAY,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC,CAAA;IAC9D,GAAG,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAA;IACnE,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAChC,GAA+E;IAE/E,MAAM,GAAG,GAAiB,IAAI,GAAG,EAAE,CAAA;IACnC,KAAK,MAAM,CAAC,EAAE,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,GAAG,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,EAAE,CAAC,CAAA;IACnG,OAAO,GAAG,CAAA;AACX,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,mBAAmB,CAClC,IAAY,EACZ,MAAqC,EACrC,MAAoB;IAEpB,MAAM,QAAQ,GAAe,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,KAAK,CAAS,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5F,MAAM,UAAU,GAAa,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAA;IAEhD,MAAM,OAAO,GAAG,eAAe,CAAA;IAC/B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAA;QAC5C,IAAI,CAAC,KAAK;YAAE,SAAQ;QACpB,MAAM,SAAS,GAAG,CAAC,CAAC,KAAK,CAAA;QACzB,MAAM,OAAO,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAA;QACrC,MAAM,GAAG,GAAG,mBAAmB,CAAC,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,GAAG,CAAC,CAAA;QACtE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;oBAC7C,IAAI,CAAC,IAAI,SAAS,IAAI,CAAC,GAAG,OAAO,EAAE,CAAC;wBACnC,QAAQ,CAAC,CAAC,CAAC,GAAG,GAAG,CAAA;wBACjB,UAAU,CAAC,CAAC,CAAC,GAAG,GAAG,CAAA;oBACpB,CAAC;oBACD,MAAK,CAAC,oFAAoF;gBAC3F,CAAC;YACF,CAAC;QACF,CAAC;IACF,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAA;AAChC,CAAC"}
|
package/out/browser.d.ts
CHANGED
|
@@ -14,5 +14,6 @@ export * from "./labels.js";
|
|
|
14
14
|
export * from "./tokenizer.js";
|
|
15
15
|
export * from "./anchor-inference.js";
|
|
16
16
|
export * from "./postcode-binary-resolver.js";
|
|
17
|
+
export * from "./gazetteer-inference.js";
|
|
17
18
|
export type { InferResult } from "./onnx-runner.js";
|
|
18
19
|
//# sourceMappingURL=browser.d.ts.map
|
package/out/browser.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAG9B,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA;
|
|
1
|
+
{"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAG9B,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA;AAK7C,cAAc,0BAA0B,CAAA;AAGxC,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA"}
|
package/out/browser.js
CHANGED
|
@@ -16,4 +16,9 @@ export * from "./tokenizer.js";
|
|
|
16
16
|
// (zero-dep) the demo wires together to feed the anchor at inference.
|
|
17
17
|
export * from "./anchor-inference.js";
|
|
18
18
|
export * from "./postcode-binary-resolver.js";
|
|
19
|
+
// Browser-safe gazetteer-anchor channel (#464): the lexicon parser + feature builder + the postcode
|
|
20
|
+
// choreography suppressor. Pure JS over a JSON lexicon — the demo fetches the lexicon alongside the
|
|
21
|
+
// model and feeds the clue at inference (gazetteer-trained models REQUIRE it; zero-filled clues are
|
|
22
|
+
// the measured train/inference mismatch — see CONTRIBUTING_MODEL_WORK.mdx "zero-fill trap").
|
|
23
|
+
export * from "./gazetteer-inference.js";
|
|
19
24
|
//# sourceMappingURL=browser.js.map
|
package/out/browser.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAC9B,sGAAsG;AACtG,sEAAsE;AACtE,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA"}
|
|
1
|
+
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAC9B,sGAAsG;AACtG,sEAAsE;AACtE,cAAc,uBAAuB,CAAA;AACrC,cAAc,+BAA+B,CAAA;AAC7C,oGAAoG;AACpG,oGAAoG;AACpG,oGAAoG;AACpG,6FAA6F;AAC7F,cAAc,0BAA0B,CAAA"}
|
package/out/classifier.d.ts
CHANGED
|
@@ -9,13 +9,15 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
|
|
11
11
|
*/
|
|
12
|
-
import { decodeAsXml, type AddressTree, type Calibrator, type ComponentTag } from "@mailwoman/core/decoder";
|
|
13
12
|
import { type SystemCode } from "@mailwoman/codex";
|
|
13
|
+
import { decodeAsXml, type AddressTree, type Calibrator, type ComponentTag } from "@mailwoman/core/decoder";
|
|
14
|
+
import { type SpanProposerLexicon } from "@mailwoman/core/pipeline";
|
|
14
15
|
import { type AnchorLookup } from "./anchor-inference.js";
|
|
15
|
-
import { type GazetteerLexicon } from "./gazetteer-inference.js";
|
|
16
16
|
import { type FstMatcherLike } from "./fst-prior.js";
|
|
17
|
+
import { type GazetteerLexicon } from "./gazetteer-inference.js";
|
|
17
18
|
import type { InferResult } from "./onnx-runner.js";
|
|
18
19
|
import { type QueryShapeLike } from "./query-shape-prior.js";
|
|
20
|
+
import { type SpanProposalPriorOpts } from "./span-proposal-prior.js";
|
|
19
21
|
import { type StreetMorphologyPriorOpts } from "./street-morphology-prior.js";
|
|
20
22
|
import { MailwomanTokenizer } from "./tokenizer.js";
|
|
21
23
|
import type { ResolveWeightsOpts } from "./weights.js";
|
|
@@ -83,25 +85,48 @@ export interface NeuralAddressClassifierConfig {
|
|
|
83
85
|
*
|
|
84
86
|
* PAIRING IS LOAD-BEARING: set this IFF the model was TRAINED with the matching train-time
|
|
85
87
|
* choreography (`data.gazetteer_choreography`). The 2026-06-10 diagnostic showed the harm is
|
|
86
|
-
* WEIGHT-BAKED — applying this at inference on a model trained
|
|
88
|
+
* WEIGHT-BAKED — applying this at inference on a model trained _without_ train-choreography does
|
|
87
89
|
* NOT recover postcode and adds train/inference skew. Only enable for a consolidation-era model
|
|
88
90
|
* trained with the train-time half.
|
|
89
91
|
*/
|
|
90
92
|
suppressGazetteerNearPostcode?: boolean;
|
|
91
93
|
/**
|
|
92
|
-
* Default address-system conventions mode for every parse (see
|
|
93
|
-
* for semantics — `"auto"` reads the model's locale head; a
|
|
94
|
-
* override this. Omit for the byte-stable pre-#511 default
|
|
94
|
+
* Default address-system conventions mode for every parse (see
|
|
95
|
+
* `ParseOpts.addressSystemConventions` for semantics — `"auto"` reads the model's locale head; a
|
|
96
|
+
* `SystemCode` pins it). Per-parse opts override this. Omit for the byte-stable pre-#511 default
|
|
97
|
+
* (no detection, no mask).
|
|
95
98
|
*/
|
|
96
99
|
addressSystemConventions?: "auto" | SystemCode;
|
|
97
100
|
/**
|
|
98
|
-
* Punctuation-gap span bridging (the v4.4.0 corrective; see `span-bridge.ts`). The corpus
|
|
99
|
-
*
|
|
100
|
-
*
|
|
101
|
-
*
|
|
102
|
-
* pre-v4.4.0 behavior.
|
|
101
|
+
* Punctuation-gap span bridging (the v4.4.0 corrective; see `span-bridge.ts`). The corpus label
|
|
102
|
+
* format cannot express punctuation inside a span, so dotted surfaces ("P.O. Box", "C.P.") decode
|
|
103
|
+
* as fragments. When true, adjacent same-tag spans separated only by short punctuation gaps are
|
|
104
|
+
* merged after decode. Per-parse opts override. Omit for the byte-stable pre-v4.4.0 behavior.
|
|
103
105
|
*/
|
|
104
106
|
bridgePunctuationGaps?: boolean;
|
|
107
|
+
/**
|
|
108
|
+
* Stage 2.7 span proposer (M2+M3 from the punctuation survey, #518). When set, every parse runs
|
|
109
|
+
* `proposeSpans` (`@mailwoman/core/pipeline`) over the raw text and consumes the typed proposals
|
|
110
|
+
* two ways: (a) as additive emission priors — the phrase-prior path; the classifier conditions on
|
|
111
|
+
* the boundary hypotheses and can still disagree — and (b) ANNOTATION/QUOTED span boundaries feed
|
|
112
|
+
* the span bridge as merge-crossing constraints (no same-tag merge may straddle a structural
|
|
113
|
+
* delimiter). Build the lexicon with `buildCodexSpanLexicon` (`./span-proposer-lexicon.js`).
|
|
114
|
+
* Per-parse opts override.
|
|
115
|
+
*
|
|
116
|
+
* DEFAULT ON (operator ruling 2026-06-12, after the #518 measurement closed both v0-win quadrants
|
|
117
|
+
* with no class down): omitting this builds the codex lexicon lazily with the frozen measured
|
|
118
|
+
* scales (biasScale 5.0 / annotationBiasScale 12.0). Pass `false` for the proposer-free baseline
|
|
119
|
+
* (the pre-2026-06-12 byte-stable default).
|
|
120
|
+
*/
|
|
121
|
+
spanProposer?: SpanProposerConfig | false;
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Config for the Stage 2.7 span-proposer integration (see
|
|
125
|
+
* `NeuralAddressClassifierConfig.spanProposer`).
|
|
126
|
+
*/
|
|
127
|
+
export interface SpanProposerConfig extends SpanProposalPriorOpts {
|
|
128
|
+
/** Codex-backed designator vocabulary (`buildCodexSpanLexicon`). */
|
|
129
|
+
lexicon: SpanProposerLexicon;
|
|
105
130
|
}
|
|
106
131
|
export declare class NeuralAddressClassifier {
|
|
107
132
|
#private;
|
|
@@ -112,6 +137,11 @@ export declare class NeuralAddressClassifier {
|
|
|
112
137
|
private readonly startTransitions;
|
|
113
138
|
private readonly endTransitions;
|
|
114
139
|
constructor(cfg: NeuralAddressClassifierConfig);
|
|
140
|
+
/**
|
|
141
|
+
* The default-ON Stage 2.7 config: codex lexicon (us/au/nz), frozen measured scales (the prior
|
|
142
|
+
* builder's own defaults). Built once per instance, only when a parse actually needs it.
|
|
143
|
+
*/
|
|
144
|
+
private defaultProposer;
|
|
115
145
|
/**
|
|
116
146
|
* One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
|
|
117
147
|
* ONNX runner, and returns a ready-to-use classifier.
|
|
@@ -223,6 +253,12 @@ export interface ParseOpts {
|
|
|
223
253
|
calibrate?: Calibrator;
|
|
224
254
|
/** Per-parse override of the config-level `bridgePunctuationGaps` (see that doc). */
|
|
225
255
|
bridgePunctuationGaps?: boolean;
|
|
256
|
+
/**
|
|
257
|
+
* Per-parse switch for the config-level `spanProposer` (see that doc). `false` disables the
|
|
258
|
+
* configured proposer for this parse; `true`/omitted runs it when configured. Cannot enable the
|
|
259
|
+
* stage without a configured lexicon.
|
|
260
|
+
*/
|
|
261
|
+
spanProposer?: boolean;
|
|
226
262
|
/**
|
|
227
263
|
* Address-system conventions enforcement (#511 Tier A / #478's rules-as-constraints slice).
|
|
228
264
|
*
|
|
@@ -230,8 +266,8 @@ export interface ParseOpts {
|
|
|
230
266
|
* exports; silently no-ops on models without it) and apply that system's codex conventions:
|
|
231
267
|
* forbidden tags become a hard emission mask before Viterbi, and a conventions postcode shape
|
|
232
268
|
* enables the snap-only postcode repair pass.
|
|
233
|
-
* - A `SystemCode` (`"fr"`, `"us"`, …) — apply that system's conventions unconditionally
|
|
234
|
-
*
|
|
269
|
+
* - A `SystemCode` (`"fr"`, `"us"`, …) — apply that system's conventions unconditionally (callers
|
|
270
|
+
* that already know the locale, e.g. the pipeline's BCP-47 region).
|
|
235
271
|
* - Omit — byte-stable default: no detection, no mask (pre-#511 behavior).
|
|
236
272
|
*
|
|
237
273
|
* The detection threshold is deliberately high (0.8): the mask must never fire on a guess.
|
package/out/classifier.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAChC,OAAO,
|
|
1
|
+
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAwB,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAA;AACxE,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAmC,KAAK,mBAAmB,EAAE,MAAM,0BAA0B,CAAA;AAGpG,OAAO,EAAuB,KAAK,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAA0B,KAAK,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAyD,KAAK,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAEvH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AAEnD,OAAO,EAA0C,KAAK,cAAc,EAAE,MAAM,wBAAwB,CAAA;AAEpG,OAAO,EAA2B,KAAK,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAE9F,OAAO,EAAuC,KAAK,yBAAyB,EAAE,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAGnD,OAAO,KAAK,EAAE,kBAAkB,EAAmB,MAAM,cAAc,CAAA;AAEvE;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,CACJ,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,EAC9F,SAAS,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC/F,OAAO,CAAC,WAAW,CAAC,CAAA;CACvB;AAED,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,YAAY,CAAA;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAC1B;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC7B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,EAAE,CAAA;IACxB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB;;;;;OAKG;IACH,oBAAoB,CAAC,EAAE,YAAY,CAAA;IACnC;;;;;;OAMG;IACH,gBAAgB,CAAC,EAAE,gBAAgB,CAAA;IACnC;;;;;;;;;;OAUG;IACH,6BAA6B,CAAC,EAAE,OAAO,CAAA;IACvC;;;;;OAKG;IACH,wBAAwB,CAAC,EAAE,MAAM,GAAG,UAAU,CAAA;IAC9C;;;;;OAKG;IACH,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B;;;;;;;;;;;;;OAaG;IACH,YAAY,CAAC,EAAE,kBAAkB,GAAG,KAAK,CAAA;CACzC;AAED;;;GAGG;AACH,MAAM,WAAW,kBAAmB,SAAQ,qBAAqB;IAChE,oEAAoE;IACpE,OAAO,EAAE,mBAAmB,CAAA;CAC5B;AAED,qBAAa,uBAAuB;;IASvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IARhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAY;IAGxC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAEZ,GAAG,EAAE,6BAA6B;IAa/D;;;OAGG;IACH,OAAO,CAAC,eAAe;IAKvB;;;;;;;;;;;OAWG;WACU,eAAe,CAC3B,IAAI,GAAE,kBAAkB,GAAG;QAAE,oBAAoB,CAAC,EAAE,YAAY,CAAA;KAAO,GACrE,OAAO,CAAC,uBAAuB,CAAC;IA4BnC,6DAA6D;IACvD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IAMjE;;;;;;;OAOG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,qBAAqB,CAAC;IA4J/E,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIzF,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAInF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG;QAAE,GAAG,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;IAI7G;;;;;;;;;;OAUG;IACH,OAAO,CAAC,mBAAmB;CAW3B;AAED,wFAAwF;AACxF,MAAM,WAAW,qBAAqB;IACrC,IAAI,EAAE,WAAW,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC7C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACzB;;;;OAIG;IACH,UAAU,CAAC,EAAE,cAAc,CAAA;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;OAGG;IACH,GAAG,CAAC,EAAE,cAAc,CAAA;IACpB,6DAA6D;IAC7D,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,cAAc,CAAA;IACpC,yDAAyD;IACzD,uBAAuB,CAAC,EAAE,yBAAyB,CAAA;IACnD;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,OAAO,CAAA;IACxB;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB;;;;;OAKG;IACH,SAAS,CAAC,EAAE,UAAU,CAAA;IACtB,qFAAqF;IACrF,qBAAqB,CAAC,EAAE,OAAO,CAAA;IAC/B;;;;OAIG;IACH,YAAY,CAAC,EAAE,OAAO,CAAA;IACtB;;;;;;;;;;;;;;OAcG;IACH,wBAAwB,CAAC,EAAE,MAAM,GAAG,UAAU,CAAA;CAC9C"}
|
package/out/classifier.js
CHANGED
|
@@ -9,16 +9,19 @@
|
|
|
9
9
|
*
|
|
10
10
|
* Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
|
|
11
11
|
*/
|
|
12
|
-
import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
|
|
13
12
|
import { conventionsForSystem } from "@mailwoman/codex";
|
|
13
|
+
import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
|
|
14
|
+
import { proposeSpans } from "@mailwoman/core/pipeline";
|
|
14
15
|
import { detectAddressSystem } from "./address-system.js";
|
|
15
16
|
import { buildAnchorFeatures } from "./anchor-inference.js";
|
|
16
|
-
import { buildGazetteerFeatures, suppressGazetteerNearPostcode } from "./gazetteer-inference.js";
|
|
17
17
|
import { buildFstEmissionPriors } from "./fst-prior.js";
|
|
18
|
+
import { buildGazetteerFeatures, suppressGazetteerNearPostcode } from "./gazetteer-inference.js";
|
|
18
19
|
import { STAGE2_BIO_LABELS } from "./labels.js";
|
|
19
20
|
import { repairPostcodeLabels } from "./postcode-repair.js";
|
|
20
|
-
import { bridgePunctuationGaps } from "./span-bridge.js";
|
|
21
21
|
import { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
22
|
+
import { bridgePunctuationGaps } from "./span-bridge.js";
|
|
23
|
+
import { buildSpanProposalPriors } from "./span-proposal-prior.js";
|
|
24
|
+
import { buildCodexSpanLexicon } from "./span-proposer-lexicon.js";
|
|
22
25
|
import { buildStreetMorphologyEmissionPriors } from "./street-morphology-prior.js";
|
|
23
26
|
import { MailwomanTokenizer } from "./tokenizer.js";
|
|
24
27
|
import { repairUnitLabels } from "./unit-repair.js";
|
|
@@ -28,6 +31,8 @@ export class NeuralAddressClassifier {
|
|
|
28
31
|
labels;
|
|
29
32
|
decodeMode;
|
|
30
33
|
transitions;
|
|
34
|
+
/** Lazily-built default Stage 2.7 config (codex lexicon, frozen scales) — see `cfg.spanProposer`. */
|
|
35
|
+
#defaultProposerCfg;
|
|
31
36
|
startTransitions;
|
|
32
37
|
endTransitions;
|
|
33
38
|
constructor(cfg) {
|
|
@@ -44,6 +49,14 @@ export class NeuralAddressClassifier {
|
|
|
44
49
|
this.startTransitions = cfg.startTransitions ?? buildBioStartMask(this.labels);
|
|
45
50
|
this.endTransitions = cfg.endTransitions ?? buildBioEndMask(this.labels);
|
|
46
51
|
}
|
|
52
|
+
/**
|
|
53
|
+
* The default-ON Stage 2.7 config: codex lexicon (us/au/nz), frozen measured scales (the prior
|
|
54
|
+
* builder's own defaults). Built once per instance, only when a parse actually needs it.
|
|
55
|
+
*/
|
|
56
|
+
defaultProposer() {
|
|
57
|
+
this.#defaultProposerCfg ??= { lexicon: buildCodexSpanLexicon() };
|
|
58
|
+
return this.#defaultProposerCfg;
|
|
59
|
+
}
|
|
47
60
|
/**
|
|
48
61
|
* One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
|
|
49
62
|
* ONNX runner, and returns a ready-to-use classifier.
|
|
@@ -150,6 +163,17 @@ export class NeuralAddressClassifier {
|
|
|
150
163
|
if (opts?.fstStreetMorphology) {
|
|
151
164
|
emissions = addEmissionMatrix(emissions, buildStreetMorphologyEmissionPriors(opts.fstStreetMorphology, pieces, this.labels, opts.fstStreetMorphologyOpts ?? {}));
|
|
152
165
|
}
|
|
166
|
+
// Stage 2.7 span proposer (#518, M2+M3): typed span proposals consumed as phrase priors.
|
|
167
|
+
// DEFAULT ON since 2026-06-12 (operator ruling): an omitted config builds the codex lexicon
|
|
168
|
+
// lazily with the frozen measured scales; `spanProposer: false` (config or per-parse) is the
|
|
169
|
+
// proposer-free baseline. Disabled = byte-stable (no proposals computed).
|
|
170
|
+
const configured = this.cfg.spanProposer === false ? undefined : (this.cfg.spanProposer ?? this.defaultProposer());
|
|
171
|
+
const proposerCfg = (opts?.spanProposer ?? true) ? configured : undefined;
|
|
172
|
+
const spanProposals = proposerCfg ? proposeSpans(text, proposerCfg.lexicon) : [];
|
|
173
|
+
if (spanProposals.length > 0) {
|
|
174
|
+
emissions = addEmissionMatrix(emissions, buildSpanProposalPriors(spanProposals, pieces, this.labels, proposerCfg));
|
|
175
|
+
}
|
|
176
|
+
// (defaultProposer lives below decode helpers — one lazy build per classifier instance.)
|
|
153
177
|
// Conventions emission mask: tags that are ungrammatical in the detected system are removed
|
|
154
178
|
// from the decoder's vocabulary outright (-1e9 ≈ log 0). Copy-on-mask — `emissions` may alias
|
|
155
179
|
// `logits`, which the per-token confidence below reads unmasked.
|
|
@@ -197,9 +221,11 @@ export class NeuralAddressClassifier {
|
|
|
197
221
|
}
|
|
198
222
|
// Punctuation-gap span bridging (v4.4.0 corrective — see span-bridge.ts): merge same-tag
|
|
199
223
|
// fragments split at unlabeled punctuation ("P.O. Box" decoding as P + O + Box). Opt-in,
|
|
200
|
-
// declared in the ship config like the conventions mask.
|
|
224
|
+
// declared in the ship config like the conventions mask. When the span proposer ran, its
|
|
225
|
+
// ANNOTATION/QUOTED boundaries become merge-crossing constraints (M2's second half).
|
|
201
226
|
if (opts?.bridgePunctuationGaps ?? this.cfg.bridgePunctuationGaps) {
|
|
202
|
-
|
|
227
|
+
const blockedSpans = spanProposals.filter((p) => p.kind === "ANNOTATION_SPAN" || p.kind === "QUOTED_SPAN");
|
|
228
|
+
tokens = bridgePunctuationGaps(text, tokens, blockedSpans.length > 0 ? { blockedSpans } : undefined);
|
|
203
229
|
}
|
|
204
230
|
return { tokens, logits, pieces };
|
|
205
231
|
}
|
package/out/classifier.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAKX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,oBAAoB,EAAmB,MAAM,kBAAkB,CAAA;AAExE,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AACzD,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,sBAAsB,EAAE,6BAA6B,EAAyB,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAA;AACxD,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AAuF3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAC3B,OAAqE,EAAE;QAEvE,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC3G,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAA;QAC3D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC;YAClC,SAAS;YACT,MAAM;YACN,MAAM;YACN,WAAW,EAAE,GAAG,EAAE,WAAW;YAC7B,gBAAgB,EAAE,GAAG,EAAE,gBAAgB;YACvC,cAAc,EAAE,GAAG,EAAE,cAAc;YACnC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAA;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QACtD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjD,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IACnG,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjE,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;YACjG,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO,CACZ,IAAY,EACZ,IAAgB;QAEhB,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB;YAC1C,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC;YACjE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,MAAM,GACX,SAAS,IAAI,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,6BAA6B;YAC5D,CAAC,CAAC,6BAA6B,CAAC,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC;YAC7D,CAAC,CAAC,SAAS,CAAA;QACb,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;QAEjF,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,+FAA+F;QAC/F,0FAA0F;QAC1F,8EAA8E;QAC9E,MAAM,cAAc,GAAG,IAAI,EAAE,wBAAwB,IAAI,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAA;QAC1F,MAAM,WAAW,GAChB,cAAc,KAAK,SAAS;YAC3B,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,oBAAoB,CACpB,cAAc,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,EAAE,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,cAAc,CAChG,CAAA;QAEJ,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,4FAA4F;QAC5F,8FAA8F;QAC9F,iEAAiE;QACjE,IAAI,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAA;YACnC,KAAK,MAAM,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,CAAC;gBAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAC5B,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC;YACD,IAAI,SAAS,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YACzF,CAAC;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,8FAA8F;QAC9F,yFAAyF;QACzF,8FAA8F;QAC9F,IAAI,IAAI,EAAE,cAAc,IAAI,WAAW,EAAE,eAAe,EAAE,CAAC;YAC1D,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QACnD,CAAC;QACD,IAAI,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QAC/C,CAAC;QACD,yFAAyF;QACzF,yFAAyF;QACzF,yDAAyD;QACzD,IAAI,IAAI,EAAE,qBAAqB,IAAI,IAAI,CAAC,GAAG,CAAC,qBAAqB,EAAE,CAAC;YACnE,MAAM,GAAG,qBAAqB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;QAC7C,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;IAED;;;;;;;;;;OAUG;IACK,mBAAmB,CAAC,MAA2B;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CACd,wCAAwC,KAAK,2CAA2C;gBACvF,wBAAwB,IAAI,CAAC,MAAM,CAAC,MAAM,iDAAiD;gBAC3F,oFAAoF,CACrF,CAAA;QACF,CAAC;IACF,CAAC;CACD;AAqFD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
1
|
+
{"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,oBAAoB,EAAmB,MAAM,kBAAkB,CAAA;AACxE,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAKX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,YAAY,EAA+C,MAAM,0BAA0B,CAAA;AAEpG,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAA;AACzD,OAAO,EAAE,mBAAmB,EAAqB,MAAM,uBAAuB,CAAA;AAC9E,OAAO,EAAE,sBAAsB,EAAuB,MAAM,gBAAgB,CAAA;AAC5E,OAAO,EAAE,sBAAsB,EAAE,6BAA6B,EAAyB,MAAM,0BAA0B,CAAA;AACvH,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,oBAAoB,EAAE,MAAM,sBAAsB,CAAA;AAC3D,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAA;AACxD,OAAO,EAAE,uBAAuB,EAA8B,MAAM,0BAA0B,CAAA;AAC9F,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAA;AAClE,OAAO,EAAE,mCAAmC,EAAkC,MAAM,8BAA8B,CAAA;AAClH,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,gBAAgB,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AA+G3G,MAAM,OAAO,uBAAuB;IASN;IARZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACxC,qGAAqG;IACrG,mBAAmB,CAAgC;IAClC,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;OAGG;IACK,eAAe;QACtB,IAAI,CAAC,mBAAmB,KAAK,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE,CAAA;QACjE,OAAO,IAAI,CAAC,mBAAmB,CAAA;IAChC,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAC3B,OAAqE,EAAE;QAEvE,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,kBAAkB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC3G,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,GAAG,GAAG,kBAAkB,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAA;QAC3D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC;YAClC,SAAS;YACT,MAAM;YACN,MAAM;YACN,WAAW,EAAE,GAAG,EAAE,WAAW;YAC7B,gBAAgB,EAAE,GAAG,EAAE,gBAAgB;YACvC,cAAc,EAAE,GAAG,EAAE,cAAc;YACnC,GAAG,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,oBAAoB,EAAE,IAAI,CAAC,oBAAoB,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;SACzF,CAAC,CAAA;IACH,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QACtD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjD,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;IACnG,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,CAAA;QACjE,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;YACjG,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO,CACZ,IAAY,EACZ,IAAgB;QAMhB,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,gGAAgG;QAChG,+EAA+E;QAC/E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,oBAAoB;YAC3C,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,oBAAoB,CAAC;YAClE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB;YAC1C,CAAC,CAAC,sBAAsB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC;YACjE,CAAC,CAAC,SAAS,CAAA;QACZ,MAAM,MAAM,GACX,SAAS,IAAI,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,6BAA6B;YAC5D,CAAC,CAAC,6BAA6B,CAAC,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC;YAC7D,CAAC,CAAC,SAAS,CAAA;QACb,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,CAAA;QAEjF,IAAI,CAAC,mBAAmB,CAAC,MAAM,CAAC,CAAA;QAEhC,+FAA+F;QAC/F,0FAA0F;QAC1F,8EAA8E;QAC9E,MAAM,cAAc,GAAG,IAAI,EAAE,wBAAwB,IAAI,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAA;QAC1F,MAAM,WAAW,GAChB,cAAc,KAAK,SAAS;YAC3B,CAAC,CAAC,IAAI;YACN,CAAC,CAAC,oBAAoB,CACpB,cAAc,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,mBAAmB,CAAC,YAAY,CAAC,EAAE,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,cAAc,CAChG,CAAA;QAEJ,IAAI,SAAS,GAAG,IAAI,EAAE,UAAU;YAC/B,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;gBAC1C,SAAS,EAAE,IAAI;aACf,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,IAAI,IAAI,EAAE,GAAG,EAAE,CAAC;YACf,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACrD,SAAS,EAAE,IAAI,CAAC,YAAY,IAAI,GAAG;aACnC,CAAC,CACF,CAAA;QACF,CAAC;QAED,IAAI,IAAI,EAAE,mBAAmB,EAAE,CAAC;YAC/B,SAAS,GAAG,iBAAiB,CAC5B,SAAS,EACT,mCAAmC,CAClC,IAAI,CAAC,mBAAmB,EACxB,MAAM,EACN,IAAI,CAAC,MAAM,EACX,IAAI,CAAC,uBAAuB,IAAI,EAAE,CAClC,CACD,CAAA;QACF,CAAC;QAED,yFAAyF;QACzF,4FAA4F;QAC5F,6FAA6F;QAC7F,0EAA0E;QAC1E,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,KAAK,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC,CAAA;QAClH,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,YAAY,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS,CAAA;QACzE,MAAM,aAAa,GAAmB,WAAW,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;QAChG,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,SAAS,GAAG,iBAAiB,CAAC,SAAS,EAAE,uBAAuB,CAAC,aAAa,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC,CAAA;QACnH,CAAC;QAED,yFAAyF;QAEzF,4FAA4F;QAC5F,8FAA8F;QAC9F,iEAAiE;QACjE,IAAI,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAA;YACnC,KAAK,MAAM,GAAG,IAAI,WAAW,CAAC,aAAa,EAAE,CAAC;gBAC7C,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,MAAM,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;gBACzC,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;gBAC5B,IAAI,CAAC,IAAI,CAAC;oBAAE,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAA;YAC7B,CAAC;YACD,IAAI,SAAS,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBACxB,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YACzF,CAAC;QACF,CAAC;QAED,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,IAAI,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAChD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,8FAA8F;QAC9F,yFAAyF;QACzF,8FAA8F;QAC9F,IAAI,IAAI,EAAE,cAAc,IAAI,WAAW,EAAE,eAAe,EAAE,CAAC;YAC1D,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QACnD,CAAC;QACD,IAAI,IAAI,EAAE,UAAU,EAAE,CAAC;YACtB,MAAM,GAAG,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,MAAM,CAAA;QAC/C,CAAC;QACD,yFAAyF;QACzF,yFAAyF;QACzF,yFAAyF;QACzF,qFAAqF;QACrF,IAAI,IAAI,EAAE,qBAAqB,IAAI,IAAI,CAAC,GAAG,CAAC,qBAAqB,EAAE,CAAC;YACnE,MAAM,YAAY,GAAG,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,iBAAiB,IAAI,CAAC,CAAC,IAAI,KAAK,aAAa,CAAC,CAAA;YAC1G,MAAM,GAAG,qBAAqB,CAAC,IAAI,EAAE,MAAM,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;QACrG,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,CAAA;IAClC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;IAED;;;;;;;;;;OAUG;IACK,mBAAmB,CAAC,MAA2B;QACtD,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YAAE,OAAM;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;QAC/B,IAAI,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC;YAChC,MAAM,IAAI,KAAK,CACd,wCAAwC,KAAK,2CAA2C;gBACvF,wBAAwB,IAAI,CAAC,MAAM,CAAC,MAAM,iDAAiD;gBAC3F,oFAAoF,CACrF,CAAA;QACF,CAAC;IACF,CAAC;CACD;AA2FD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
* Inference-side gazetteer-anchor features (#464, knowledge-ladder rung 3.2) — the TS mirror of the
|
|
7
7
|
* Python training pipeline (`mailwoman_train/gazetteer_anchor.py`). Both consumers load the SAME
|
|
8
8
|
* codex-generated lexicon (`scripts/build-gazetteer-anchor-lexicon.mjs` →
|
|
9
|
-
* `data/gazetteer/anchor-lexicon-v1.json`) whose `rules` encode the match semantics as DATA, so
|
|
10
|
-
* two implementations cannot drift. The model conditions on per-token candidate-tag-set clues
|
|
11
|
-
* alongside `input_ids`; this builds them from a raw address + its SentencePiece pieces.
|
|
9
|
+
* `data/gazetteer/anchor-lexicon-v1.json`) whose `rules` encode the match semantics as DATA, so
|
|
10
|
+
* the two implementations cannot drift. The model conditions on per-token candidate-tag-set clues
|
|
11
|
+
* fed alongside `input_ids`; this builds them from a raw address + its SentencePiece pieces.
|
|
12
12
|
*
|
|
13
13
|
* The clue INFORMS, the model decides (model-first). `gazetteer-inference.test.ts` pins the matcher
|
|
14
14
|
* against the Python fixture: the homograph clue is symmetric, "in" ≠ "IN", multi-word countries
|
|
@@ -27,9 +27,9 @@ export interface GazetteerLexicon {
|
|
|
27
27
|
slots: readonly string[];
|
|
28
28
|
bits: Record<string, number>;
|
|
29
29
|
maxNgram: number;
|
|
30
|
-
/**
|
|
30
|
+
/** Case-insensitive: key = word_norm lowercased → bitmask. */
|
|
31
31
|
entries: Map<string, number>;
|
|
32
|
-
/**
|
|
32
|
+
/** Case-SENSITIVE: key = word_norm uppercased → bitmask (surface must already be uppercase). */
|
|
33
33
|
codeEntries: Map<string, number>;
|
|
34
34
|
}
|
|
35
35
|
/** Parse the lexicon JSON (already `JSON.parse`d — keeps this module browser-safe; caller reads). */
|
|
@@ -44,14 +44,15 @@ export declare function parseGazetteerLexicon(raw: {
|
|
|
44
44
|
/** Scan the raw surface and paint each char with its candidate-tag bitmask (mirrors Python). */
|
|
45
45
|
export declare function gazetteerCharPaint(text: string, lexicon: GazetteerLexicon): number[];
|
|
46
46
|
/**
|
|
47
|
-
* Channel choreography (#464, v0.9.13 postcode fix; DeepSeek 2026-06-10): zero the gazetteer clue
|
|
48
|
-
* pieces within `window` of a postcode-anchor hit. The clue fires on the region token
|
|
49
|
-
* immediately before a US postcode; its additive vector strengthens `B-region`, which
|
|
50
|
-
* `B-region → B-postcode` CRF transition less competitive and drops the postcode (~3pp,
|
|
51
|
-
* postcode precedes the locality, no region neighbor). Suppressing the clue adjacent
|
|
52
|
-
* removes the interference while leaving every other clue intact. Returns a NEW
|
|
53
|
-
* pair (does not mutate). `anchorConfidence[i] > 0` marks postcode-span pieces.
|
|
54
|
-
* train-time half (`gazetteer_anchor.suppress_gazetteer_near_postcode`) — enable
|
|
47
|
+
* Channel choreography (#464, v0.9.13 postcode fix; DeepSeek 2026-06-10): zero the gazetteer clue
|
|
48
|
+
* on pieces within `window` of a postcode-anchor hit. The clue fires on the region token
|
|
49
|
+
* (`CA`/`GA`) immediately before a US postcode; its additive vector strengthens `B-region`, which
|
|
50
|
+
* makes the `B-region → B-postcode` CRF transition less competitive and drops the postcode (~3pp,
|
|
51
|
+
* US-only — FR postcode precedes the locality, no region neighbor). Suppressing the clue adjacent
|
|
52
|
+
* to the postcode removes the interference while leaving every other clue intact. Returns a NEW
|
|
53
|
+
* features/confidence pair (does not mutate). `anchorConfidence[i] > 0` marks postcode-span pieces.
|
|
54
|
+
* PAIRS WITH the train-time half (`gazetteer_anchor.suppress_gazetteer_near_postcode`) — enable
|
|
55
|
+
* both or neither.
|
|
55
56
|
*/
|
|
56
57
|
export declare function suppressGazetteerNearPostcode(gazetteer: {
|
|
57
58
|
features: number[][];
|
|
@@ -62,8 +63,9 @@ export declare function suppressGazetteerNearPostcode(gazetteer: {
|
|
|
62
63
|
};
|
|
63
64
|
/**
|
|
64
65
|
* Per-piece gazetteer features + confidence for `text`, projected onto its SP `pieces` by the SAME
|
|
65
|
-
* char→piece rule the labels use (a piece takes the bits of the first non-whitespace char it
|
|
66
|
-
* Returns `(pieces × featureDim)` features + `(pieces,)` confidence (1.0 wherever any bit
|
|
66
|
+
* char→piece rule the labels use (a piece takes the bits of the first non-whitespace char it
|
|
67
|
+
* covers). Returns `(pieces × featureDim)` features + `(pieces,)` confidence (1.0 wherever any bit
|
|
68
|
+
* fires).
|
|
67
69
|
*/
|
|
68
70
|
export declare function buildGazetteerFeatures(text: string, pieces: ReadonlyArray<TokenizedPiece>, lexicon: GazetteerLexicon): {
|
|
69
71
|
features: number[][];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gazetteer-inference.d.ts","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAA;AAEtC,mFAAmF;AACnF,MAAM,WAAW,gBAAgB;IAChC,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,SAAS,MAAM,EAAE,CAAA;IACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,8DAA8D;IAC9D,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,gGAAgG;IAChG,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAChC;AAED,qGAAqG;AACrG,wBAAgB,qBAAqB,CAAC,GAAG,EAAE;IAC1C,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC/B,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACpC,GAAG,gBAAgB,CA0BnB;AAsBD,gGAAgG;AAChG,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,MAAM,EAAE,CA0DpF;AAED
|
|
1
|
+
{"version":3,"file":"gazetteer-inference.d.ts","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAA;AAEpD;;;;GAIG;AACH,eAAO,MAAM,qBAAqB,IAAI,CAAA;AAEtC,mFAAmF;AACnF,MAAM,WAAW,gBAAgB;IAChC,UAAU,EAAE,MAAM,CAAA;IAClB,KAAK,EAAE,SAAS,MAAM,EAAE,CAAA;IACxB,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,QAAQ,EAAE,MAAM,CAAA;IAChB,8DAA8D;IAC9D,OAAO,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,gGAAgG;IAChG,WAAW,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CAChC;AAED,qGAAqG;AACrG,wBAAgB,qBAAqB,CAAC,GAAG,EAAE;IAC1C,WAAW,EAAE,MAAM,CAAA;IACnB,KAAK,EAAE,MAAM,EAAE,CAAA;IACf,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC5B,SAAS,EAAE,MAAM,CAAA;IACjB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;IAC/B,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACpC,GAAG,gBAAgB,CA0BnB;AAsBD,gGAAgG;AAChG,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,GAAG,MAAM,EAAE,CA0DpF;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,6BAA6B,CAC5C,SAAS,EAAE;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,EACzD,gBAAgB,EAAE,aAAa,CAAC,MAAM,CAAC,EACvC,MAAM,SAAI,GACR;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAgBhD;AAED;;;;;GAKG;AACH,wBAAgB,sBAAsB,CACrC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,aAAa,CAAC,cAAc,CAAC,EACrC,OAAO,EAAE,gBAAgB,GACvB;IAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,CAAC;IAAC,UAAU,EAAE,MAAM,EAAE,CAAA;CAAE,CAiBhD"}
|
|
@@ -6,9 +6,9 @@
|
|
|
6
6
|
* Inference-side gazetteer-anchor features (#464, knowledge-ladder rung 3.2) — the TS mirror of the
|
|
7
7
|
* Python training pipeline (`mailwoman_train/gazetteer_anchor.py`). Both consumers load the SAME
|
|
8
8
|
* codex-generated lexicon (`scripts/build-gazetteer-anchor-lexicon.mjs` →
|
|
9
|
-
* `data/gazetteer/anchor-lexicon-v1.json`) whose `rules` encode the match semantics as DATA, so
|
|
10
|
-
* two implementations cannot drift. The model conditions on per-token candidate-tag-set clues
|
|
11
|
-
* alongside `input_ids`; this builds them from a raw address + its SentencePiece pieces.
|
|
9
|
+
* `data/gazetteer/anchor-lexicon-v1.json`) whose `rules` encode the match semantics as DATA, so
|
|
10
|
+
* the two implementations cannot drift. The model conditions on per-token candidate-tag-set clues
|
|
11
|
+
* fed alongside `input_ids`; this builds them from a raw address + its SentencePiece pieces.
|
|
12
12
|
*
|
|
13
13
|
* The clue INFORMS, the model decides (model-first). `gazetteer-inference.test.ts` pins the matcher
|
|
14
14
|
* against the Python fixture: the homograph clue is symmetric, "in" ≠ "IN", multi-word countries
|
|
@@ -48,7 +48,7 @@ export function parseGazetteerLexicon(raw) {
|
|
|
48
48
|
codeEntries: new Map(Object.entries(raw.code_entries)),
|
|
49
49
|
};
|
|
50
50
|
}
|
|
51
|
-
/**
|
|
51
|
+
/** Word_norm for one word: strip leading/trailing non-letter/digit chars (keep internal). */
|
|
52
52
|
function stripWord(word) {
|
|
53
53
|
let start = 0;
|
|
54
54
|
let end = word.length;
|
|
@@ -127,14 +127,15 @@ export function gazetteerCharPaint(text, lexicon) {
|
|
|
127
127
|
return charBits;
|
|
128
128
|
}
|
|
129
129
|
/**
|
|
130
|
-
* Channel choreography (#464, v0.9.13 postcode fix; DeepSeek 2026-06-10): zero the gazetteer clue
|
|
131
|
-
* pieces within `window` of a postcode-anchor hit. The clue fires on the region token
|
|
132
|
-
* immediately before a US postcode; its additive vector strengthens `B-region`, which
|
|
133
|
-
* `B-region → B-postcode` CRF transition less competitive and drops the postcode (~3pp,
|
|
134
|
-
* postcode precedes the locality, no region neighbor). Suppressing the clue adjacent
|
|
135
|
-
* removes the interference while leaving every other clue intact. Returns a NEW
|
|
136
|
-
* pair (does not mutate). `anchorConfidence[i] > 0` marks postcode-span pieces.
|
|
137
|
-
* train-time half (`gazetteer_anchor.suppress_gazetteer_near_postcode`) — enable
|
|
130
|
+
* Channel choreography (#464, v0.9.13 postcode fix; DeepSeek 2026-06-10): zero the gazetteer clue
|
|
131
|
+
* on pieces within `window` of a postcode-anchor hit. The clue fires on the region token
|
|
132
|
+
* (`CA`/`GA`) immediately before a US postcode; its additive vector strengthens `B-region`, which
|
|
133
|
+
* makes the `B-region → B-postcode` CRF transition less competitive and drops the postcode (~3pp,
|
|
134
|
+
* US-only — FR postcode precedes the locality, no region neighbor). Suppressing the clue adjacent
|
|
135
|
+
* to the postcode removes the interference while leaving every other clue intact. Returns a NEW
|
|
136
|
+
* features/confidence pair (does not mutate). `anchorConfidence[i] > 0` marks postcode-span pieces.
|
|
137
|
+
* PAIRS WITH the train-time half (`gazetteer_anchor.suppress_gazetteer_near_postcode`) — enable
|
|
138
|
+
* both or neither.
|
|
138
139
|
*/
|
|
139
140
|
export function suppressGazetteerNearPostcode(gazetteer, anchorConfidence, window = 1) {
|
|
140
141
|
const n = gazetteer.confidence.length;
|
|
@@ -156,8 +157,9 @@ export function suppressGazetteerNearPostcode(gazetteer, anchorConfidence, windo
|
|
|
156
157
|
}
|
|
157
158
|
/**
|
|
158
159
|
* Per-piece gazetteer features + confidence for `text`, projected onto its SP `pieces` by the SAME
|
|
159
|
-
* char→piece rule the labels use (a piece takes the bits of the first non-whitespace char it
|
|
160
|
-
* Returns `(pieces × featureDim)` features + `(pieces,)` confidence (1.0 wherever any bit
|
|
160
|
+
* char→piece rule the labels use (a piece takes the bits of the first non-whitespace char it
|
|
161
|
+
* covers). Returns `(pieces × featureDim)` features + `(pieces,)` confidence (1.0 wherever any bit
|
|
162
|
+
* fires).
|
|
161
163
|
*/
|
|
162
164
|
export function buildGazetteerFeatures(text, pieces, lexicon) {
|
|
163
165
|
const charBits = gazetteerCharPaint(text, lexicon);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"gazetteer-inference.js","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AActC,qGAAqG;AACrG,MAAM,UAAU,qBAAqB,CAAC,GAOrC;IACA,yFAAyF;IACzF,8FAA8F;IAC9F,wCAAwC;IACxC,IAAI,OAAO,GAAG,EAAE,WAAW,KAAK,QAAQ,IAAI,GAAG,CAAC,WAAW,IAAI,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CAAC,iEAAiE,GAAG,EAAE,WAAW,EAAE,CAAC,CAAA;IACrG,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAA;IACtE,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,IAAI,GAAG,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CAAC,kDAAkD,GAAG,CAAC,SAAS,EAAE,CAAC,CAAA;IACnF,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,cAAc,CAAU,EAAE,CAAC;QAClE,IAAI,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3D,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,oBAAoB,CAAC,CAAA;QACjE,CAAC;IACF,CAAC;IACD,OAAO;QACN,UAAU,EAAE,GAAG,CAAC,WAAW;QAC3B,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,GAAG,CAAC,SAAS;QACvB,OAAO,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7C,WAAW,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACtD,CAAA;AACF,CAAC;AAED,6FAA6F;AAC7F,SAAS,SAAS,CAAC,IAAY;IAC9B,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAA;IACrB,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACpD,OAAO,KAAK,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAE,CAAC;QAAE,KAAK,EAAE,CAAA;IACnD,OAAO,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAE,CAAC;QAAE,GAAG,EAAE,CAAA;IACnD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAyB;IACzD,OAAO,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACzE,CAAC;AAQD,gGAAgG;AAChG,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,OAAyB;IACzE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,MAAM,MAAM,GAAG,MAAM,CAAA;IACrB,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAA;QACnC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;YACtD,SAAQ;QACT,CAAC;QACD,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpD,OAAO,IAAI,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAE,CAAC;YAAE,IAAI,EAAE,CAAA;QAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;YACrB,CAAC,EAAE,CAAA;YACH,SAAQ;QACT,CAAC;QACD,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAA;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACzD,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAA;YAC1B,IAAI,EAAE,GAAG,IAAI,CAAA;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;oBACrB,EAAE,GAAG,KAAK,CAAA;oBACV,MAAK;gBACN,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3B,CAAC;YACD,IAAI,CAAC,EAAE;gBAAE,SAAQ;YACjB,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YACzC,IAAI,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YACxC,uFAAuF;YACvF,IAAI,CAAC,KAAK,CAAC;gBAAE,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC5D,IAAI,IAAI,EAAE,CAAC;gBACV,QAAQ,GAAG,CAAC,CAAA;gBACZ,WAAW,GAAG,IAAI,CAAA;gBAClB,MAAK;YACN,CAAC;QACF,CAAC;QACD,IAAI,QAAQ,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,CAAA;YAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAE,CAAC,GAAG,CAAA;YACxC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE;gBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,WAAW,CAAA;YAClF,CAAC,IAAI,QAAQ,CAAA;QACd,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAA;QACJ,CAAC;IACF,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"gazetteer-inference.js","sourceRoot":"","sources":["../gazetteer-inference.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAIH;;;;GAIG;AACH,MAAM,CAAC,MAAM,qBAAqB,GAAG,CAAC,CAAA;AActC,qGAAqG;AACrG,MAAM,UAAU,qBAAqB,CAAC,GAOrC;IACA,yFAAyF;IACzF,8FAA8F;IAC9F,wCAAwC;IACxC,IAAI,OAAO,GAAG,EAAE,WAAW,KAAK,QAAQ,IAAI,GAAG,CAAC,WAAW,IAAI,CAAC,EAAE,CAAC;QAClE,MAAM,IAAI,KAAK,CAAC,iEAAiE,GAAG,EAAE,WAAW,EAAE,CAAC,CAAA;IACrG,CAAC;IACD,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzD,MAAM,IAAI,KAAK,CAAC,oDAAoD,CAAC,CAAA;IACtE,CAAC;IACD,IAAI,OAAO,GAAG,CAAC,SAAS,KAAK,QAAQ,IAAI,GAAG,CAAC,SAAS,GAAG,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,KAAK,CAAC,kDAAkD,GAAG,CAAC,SAAS,EAAE,CAAC,CAAA;IACnF,CAAC;IACD,KAAK,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,SAAS,EAAE,cAAc,CAAU,EAAE,CAAC;QAClE,IAAI,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,QAAQ,IAAI,GAAG,CAAC,KAAK,CAAC,KAAK,IAAI,EAAE,CAAC;YAC3D,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,oBAAoB,CAAC,CAAA;QACjE,CAAC;IACF,CAAC;IACD,OAAO;QACN,UAAU,EAAE,GAAG,CAAC,WAAW;QAC3B,KAAK,EAAE,GAAG,CAAC,KAAK;QAChB,IAAI,EAAE,GAAG,CAAC,IAAI;QACd,QAAQ,EAAE,GAAG,CAAC,SAAS;QACvB,OAAO,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;QAC7C,WAAW,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;KACtD,CAAA;AACF,CAAC;AAED,6FAA6F;AAC7F,SAAS,SAAS,CAAC,IAAY;IAC9B,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAA;IACrB,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACpD,OAAO,KAAK,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAE,CAAC;QAAE,KAAK,EAAE,CAAA;IACnD,OAAO,GAAG,GAAG,KAAK,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAE,CAAC;QAAE,GAAG,EAAE,CAAA;IACnD,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,OAAyB;IACzD,OAAO,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACzE,CAAC;AAQD,gGAAgG;AAChG,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,OAAyB;IACzE,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAS,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACvD,MAAM,MAAM,GAAG,MAAM,CAAA;IACrB,MAAM,KAAK,GAAe,EAAE,CAAA;IAC5B,IAAI,CAAyB,CAAA;IAC7B,OAAO,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAA;QACpB,MAAM,QAAQ,GAAG,SAAS,CAAC,OAAO,CAAC,CAAA;QACnC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC,CAAA;YACtD,SAAQ;QACT,CAAC;QACD,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,MAAM,KAAK,GAAG,CAAC,CAAS,EAAE,EAAE,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACpD,OAAO,IAAI,GAAG,OAAO,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAE,CAAC;YAAE,IAAI,EAAE,CAAA;QAC9D,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,GAAG,QAAQ,CAAC,MAAM,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAA;IAC7F,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,CAAA;IACT,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;QACzB,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;YACrB,CAAC,EAAE,CAAA;YACH,SAAQ;QACT,CAAC;QACD,IAAI,QAAQ,GAAG,CAAC,CAAA;QAChB,IAAI,WAAW,GAAG,CAAC,CAAA;QACnB,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAA;QACzD,KAAK,IAAI,CAAC,GAAG,IAAI,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,KAAK,GAAa,EAAE,CAAA;YAC1B,IAAI,EAAE,GAAG,IAAI,CAAA;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAChC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE,CAAC;oBACrB,EAAE,GAAG,KAAK,CAAA;oBACV,MAAK;gBACN,CAAC;gBACD,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC3B,CAAC;YACD,IAAI,CAAC,EAAE;gBAAE,SAAQ;YACjB,MAAM,GAAG,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAA;YACzC,IAAI,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;YACxC,uFAAuF;YACvF,IAAI,CAAC,KAAK,CAAC;gBAAE,IAAI,IAAI,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAA;YAC5D,IAAI,IAAI,EAAE,CAAC;gBACV,QAAQ,GAAG,CAAC,CAAA;gBACZ,WAAW,GAAG,IAAI,CAAA;gBAClB,MAAK;YACN,CAAC;QACF,CAAC;QACD,IAAI,QAAQ,EAAE,CAAC;YACd,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC,KAAK,CAAA;YAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,GAAG,QAAQ,GAAG,CAAC,CAAE,CAAC,GAAG,CAAA;YACxC,KAAK,IAAI,CAAC,GAAG,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE;gBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,WAAW,CAAA;YAClF,CAAC,IAAI,QAAQ,CAAA;QACd,CAAC;aAAM,CAAC;YACP,CAAC,EAAE,CAAA;QACJ,CAAC;IACF,CAAC;IACD,OAAO,QAAQ,CAAA;AAChB,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,6BAA6B,CAC5C,SAAyD,EACzD,gBAAuC,EACvC,MAAM,GAAG,CAAC;IAEV,MAAM,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,MAAM,CAAA;IACrC,MAAM,QAAQ,GAAG,IAAI,KAAK,CAAU,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAClD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,KAAK,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,CAAC,IAAI,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACxC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;gBACf,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC;oBAAE,QAAQ,CAAC,CAAC,CAAC,GAAG,IAAI,CAAA;YACnD,CAAC;QACF,CAAC;IACF,CAAC;IACD,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;IAC9C,OAAO;QACN,QAAQ,EAAE,SAAS,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,CAAS,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAClG,UAAU,EAAE,SAAS,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;KACrE,CAAA;AACF,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,sBAAsB,CACrC,IAAY,EACZ,MAAqC,EACrC,OAAyB;IAEzB,MAAM,QAAQ,GAAG,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;IAClD,MAAM,IAAI,GAAG,GAAG,EAAE,CAAC,IAAI,KAAK,CAAS,OAAO,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAChE,MAAM,QAAQ,GAAe,EAAE,CAAA;IAC/B,MAAM,UAAU,GAAa,EAAE,CAAA;IAC/B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE,CAAC;QACxB,IAAI,IAAI,GAAG,CAAC,CAAA;QACZ,KAAK,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAE,CAAC,EAAE,CAAC;gBAC7C,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAA;gBACnB,MAAK;YACN,CAAC;QACF,CAAC;QACD,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAA;QACvD,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC;IACD,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAA;AAChC,CAAC"}
|
package/out/index.d.ts
CHANGED
|
@@ -13,6 +13,8 @@ export * from "./postcode-binary-resolver.js";
|
|
|
13
13
|
export * from "./proposal-classifier.js";
|
|
14
14
|
export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
15
15
|
export type { BuildPriorsOpts, KnownFormatHitLike, QueryShapeLike, TokenLike } from "./query-shape-prior.js";
|
|
16
|
+
export * from "./span-proposal-prior.js";
|
|
17
|
+
export * from "./span-proposer-lexicon.js";
|
|
16
18
|
export * from "./tokenizer.js";
|
|
17
19
|
export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
|
|
18
20
|
export type { ViterbiInput, ViterbiResult } from "./viterbi.js";
|
package/out/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,0BAA0B,CAAA;AACxC,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAC/E,YAAY,EAAE,eAAe,EAAE,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAC5G,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAC/D,cAAc,cAAc,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,0BAA0B,CAAA;AACxC,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAC/E,YAAY,EAAE,eAAe,EAAE,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAC5G,cAAc,0BAA0B,CAAA;AACxC,cAAc,4BAA4B,CAAA;AAC1C,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAC/D,cAAc,cAAc,CAAA"}
|
package/out/index.js
CHANGED
|
@@ -12,6 +12,8 @@ export * from "./postcode-anchor.js";
|
|
|
12
12
|
export * from "./postcode-binary-resolver.js";
|
|
13
13
|
export * from "./proposal-classifier.js";
|
|
14
14
|
export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
|
|
15
|
+
export * from "./span-proposal-prior.js";
|
|
16
|
+
export * from "./span-proposer-lexicon.js";
|
|
15
17
|
export * from "./tokenizer.js";
|
|
16
18
|
export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
|
|
17
19
|
export * from "./weights.js";
|
package/out/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,0BAA0B,CAAA;AACxC,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAE/E,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AAErB,cAAc,cAAc,CAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,uBAAuB,CAAA;AACrC,cAAc,iBAAiB,CAAA;AAC/B,cAAc,0BAA0B,CAAA;AACxC,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,sBAAsB,CAAA;AACpC,cAAc,+BAA+B,CAAA;AAC7C,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAE/E,cAAc,0BAA0B,CAAA;AACxC,cAAc,4BAA4B,CAAA;AAC1C,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AAErB,cAAc,cAAc,CAAA"}
|
package/out/onnx-runner.d.ts
CHANGED
|
@@ -32,8 +32,8 @@ export interface InferResult {
|
|
|
32
32
|
numLabels: number;
|
|
33
33
|
/**
|
|
34
34
|
* Pooled locale-head posterior (`locale_logits` output, LOCALE_COUNTRIES order), when the model
|
|
35
|
-
* exports it (v1.1.0+, #511 Tier A). Absent on older bundles — consumers must treat undefined
|
|
36
|
-
*
|
|
35
|
+
* exports it (v1.1.0+, #511 Tier A). Absent on older bundles — consumers must treat undefined as
|
|
36
|
+
* "no address-system detection available".
|
|
37
37
|
*/
|
|
38
38
|
localeLogits?: number[];
|
|
39
39
|
}
|
package/out/onnx-runner.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAQH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IACjB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACvB;AAED,qBAAa,UAAU;IAMrB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU;IAN5B,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,WAAW,CAA6C;IAChE,SAAgB,WAAW,EAAE,MAAM,CAAA;IAEnC,OAAO;IAQP,oEAAoE;WACvD,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;IAMtF,6CAA6C;WAChC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;YAMhF,aAAa;IAgB3B;;;;;;;;;;;OAWG;IACG,KAAK,CACV,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,EAC9F,SAAS,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC/F,OAAO,CAAC,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAQH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;IACjB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAA;CACvB;AAED,qBAAa,UAAU;IAMrB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU;IAN5B,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,WAAW,CAA6C;IAChE,SAAgB,WAAW,EAAE,MAAM,CAAA;IAEnC,OAAO;IAQP,oEAAoE;WACvD,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;IAMtF,6CAA6C;WAChC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;YAMhF,aAAa;IAgB3B;;;;;;;;;;;OAWG;IACG,KAAK,CACV,QAAQ,EAAE,MAAM,EAAE,EAClB,MAAM,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,EAC9F,SAAS,CAAC,EAAE;QAAE,QAAQ,EAAE,aAAa,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC,CAAC;QAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAA;KAAE,GAC/F,OAAO,CAAC,WAAW,CAAC;CAiFvB"}
|
package/out/onnx-runner.js
CHANGED
|
@@ -127,11 +127,12 @@ export class OnnxRunner {
|
|
|
127
127
|
feeds.gazetteer_confidence = new ort.Tensor("float32", gc, [1, this.fixedSeqLen]);
|
|
128
128
|
}
|
|
129
129
|
else if (session.inputNames.includes("gazetteer_features")) {
|
|
130
|
-
feeds.gazetteer_features = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen * GAZETTEER_FEATURE_DIM), [
|
|
131
|
-
feeds.gazetteer_confidence = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen), [
|
|
130
|
+
feeds.gazetteer_features = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen * GAZETTEER_FEATURE_DIM), [
|
|
132
131
|
1,
|
|
133
132
|
this.fixedSeqLen,
|
|
133
|
+
GAZETTEER_FEATURE_DIM,
|
|
134
134
|
]);
|
|
135
|
+
feeds.gazetteer_confidence = new ort.Tensor("float32", new Float32Array(this.fixedSeqLen), [1, this.fixedSeqLen]);
|
|
135
136
|
}
|
|
136
137
|
const output = await session.run(feeds);
|
|
137
138
|
const logitsTensor = output.logits;
|
package/out/onnx-runner.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAElC,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAC1D,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAchE,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;AAexC,MAAM,OAAO,UAAU;IAMJ;IACA;IANV,OAAO,GAAgC,IAAI,CAAA;IAC3C,WAAW,GAAyC,IAAI,CAAA;IAChD,WAAW,CAAQ;IAEnC,YACkB,SAAiB,EACjB,UAA6B,EAC9C,IAAoB;QAFH,cAAS,GAAT,SAAS,CAAQ;QACjB,eAAU,GAAV,UAAU,CAAmB;QAG9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAA;IAC7D,CAAC;IAED,oEAAoE;IACpE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,OAAuB,EAAE;QAC/D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACpD,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,UAAsB,EAAE,OAAuB,EAAE;QACvE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAA;QAC1D,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;gBAClF,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE;oBACxD,kBAAkB,EAAE,CAAC,KAAK,CAAC;oBAC3B,sBAAsB,EAAE,KAAK;iBAC7B,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;gBACtB,OAAO,OAAO,CAAA;YACf,CAAC,CAAC,EAAE,CAAA;QACL,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,KAAK,CACV,QAAkB,EAClB,MAA8F,EAC9F,SAAiG;QAEjG,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;YAChC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAA+B;YACzC,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;SACpE,CAAA;QAED,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC3C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACjF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/E,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC3D,6FAA6F;YAC7F,0FAA0F;YAC1F,0EAA0E;YAC1E,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,kBAAkB,CAAC,EAAE;gBAC1G,CAAC;gBACD,IAAI,CAAC,WAAW;gBAChB,kBAAkB;aAClB,CAAC,CAAA;YACF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/G,CAAC;QAED,+FAA+F;QAC/F,iGAAiG;QACjG,qFAAqF;QACrF,IAAI,SAAS,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YACpE,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC9C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACpC,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBACjC,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACpF,KAAK,CAAC,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAClF,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC9D,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,
|
|
1
|
+
{"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAElC,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAC1D,OAAO,EAAE,qBAAqB,EAAE,MAAM,0BAA0B,CAAA;AAchE,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;AAexC,MAAM,OAAO,UAAU;IAMJ;IACA;IANV,OAAO,GAAgC,IAAI,CAAA;IAC3C,WAAW,GAAyC,IAAI,CAAA;IAChD,WAAW,CAAQ;IAEnC,YACkB,SAAiB,EACjB,UAA6B,EAC9C,IAAoB;QAFH,cAAS,GAAT,SAAS,CAAQ;QACjB,eAAU,GAAV,UAAU,CAAmB;QAG9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAA;IAC7D,CAAC;IAED,oEAAoE;IACpE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,OAAuB,EAAE;QAC/D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACpD,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,UAAsB,EAAE,OAAuB,EAAE;QACvE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAA;QAC1D,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;gBAClF,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE;oBACxD,kBAAkB,EAAE,CAAC,KAAK,CAAC;oBAC3B,sBAAsB,EAAE,KAAK;iBAC7B,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;gBACtB,OAAO,OAAO,CAAA;YACf,CAAC,CAAC,EAAE,CAAA;QACL,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,KAAK,CACV,QAAkB,EAClB,MAA8F,EAC9F,SAAiG;QAEjG,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;YAChC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAA+B;YACzC,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;SACpE,CAAA;QAED,IAAI,MAAM,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC3C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACjC,MAAM,GAAG,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACjF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/E,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;YAC3D,6FAA6F;YAC7F,0FAA0F;YAC1F,0EAA0E;YAC1E,KAAK,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,kBAAkB,CAAC,EAAE;gBAC1G,CAAC;gBACD,IAAI,CAAC,WAAW;gBAChB,kBAAkB;aAClB,CAAC,CAAA;YACF,KAAK,CAAC,iBAAiB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAC/G,CAAC;QAED,+FAA+F;QAC/F,iGAAiG;QACjG,qFAAqF;QACrF,IAAI,SAAS,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YACpE,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,CAAC,CAAA;YAC9C,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,EAAE,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;gBACpC,MAAM,GAAG,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAA;gBACjC,IAAI,GAAG;oBAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,EAAE,CAAC,EAAE;wBAAE,EAAE,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAA;YACrE,CAAC;YACD,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,CAAA;YACpF,KAAK,CAAC,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAClF,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC9D,KAAK,CAAC,kBAAkB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,GAAG,qBAAqB,CAAC,EAAE;gBAChH,CAAC;gBACD,IAAI,CAAC,WAAW;gBAChB,qBAAqB;aACrB,CAAC,CAAA;YACF,KAAK,CAAC,oBAAoB,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,YAAY,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC,CAAA;QAClH,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACvC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAA;QAClC,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAA;QACjF,MAAM,IAAI,GAAG,YAAY,CAAC,IAAoB,CAAA;QAC9C,MAAM,CAAC,EAAE,AAAD,EAAG,SAAS,CAAC,GAAG,YAAY,CAAC,IAAyC,CAAA;QAE9E,MAAM,MAAM,GAAe,EAAE,CAAA;QAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,GAAG,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAA;YAC1C,MAAM,IAAI,GAAG,CAAC,GAAG,SAAS,CAAA;YAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,CAAE,CAAA;YAC5D,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACjB,CAAC;QAED,uFAAuF;QACvF,MAAM,YAAY,GAAG,MAAM,CAAC,aAAa,CAAA;QACzC,MAAM,YAAY,GAAG,YAAY,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,IAAoB,CAAC,CAAC,CAAC,CAAC,SAAS,CAAA;QAE7F,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,CAAA;IACxE,CAAC;CACD"}
|
package/out/span-bridge.d.ts
CHANGED
|
@@ -8,26 +8,42 @@
|
|
|
8
8
|
*
|
|
9
9
|
* The corpus alignment tokenizer drops standalone punctuation (corpus/src/tokenize.ts), so NO
|
|
10
10
|
* training row can label the periods inside "P.O. Box" — the model learns the tag perfectly
|
|
11
|
-
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
11
|
+
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and span
|
|
12
|
+
* assembly surfaces only the first fragment ("p"). Measured on the v1.3.0 gate: dotted po_box
|
|
13
|
+
* leaders failed 98%, ALL truncations, while plain leaders passed — a structural expressivity
|
|
14
|
+
* limit of the label format, not a learning failure.
|
|
15
15
|
*
|
|
16
|
-
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
16
|
+
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists only
|
|
17
|
+
* of punctuation/whitespace, contains at least one non-space character, and is short (≤ 3 chars).
|
|
18
|
+
* The non-space requirement is load-bearing — space-only gaps ("Saint Paul" as two locality
|
|
19
|
+
* spans) are NOT bridged, because a space between two same-tag spans is often a real boundary
|
|
20
|
+
* (the Saint-Albans fragmentation wants this fix too, but it must come with its own evidence;
|
|
21
|
+
* this pass stays conservative by construction).
|
|
22
22
|
*
|
|
23
23
|
* Runs beside the postcode/unit repair passes in the classifier, before tree-building.
|
|
24
24
|
*/
|
|
25
25
|
import type { DecoderToken } from "@mailwoman/core/decoder";
|
|
26
|
+
/** Options for {@link bridgePunctuationGaps}. */
|
|
27
|
+
export interface BridgePunctuationOpts {
|
|
28
|
+
/**
|
|
29
|
+
* Structural spans (from the Stage 2.7 span proposer — ANNOTATION/QUOTED groups, delimiters
|
|
30
|
+
* inclusive) whose boundaries no merge may straddle: M2's crossing constraint, the bridge's
|
|
31
|
+
* mirror image (the bridge merges across WEAK punctuation; this blocks merging across STRUCTURAL
|
|
32
|
+
* punctuation). A merge is blocked when either span boundary falls inside the gap being bridged —
|
|
33
|
+
* e.g. an apostrophe-quoted name whose closing quote sits in an otherwise-bridgeable gap.
|
|
34
|
+
* Boundaries already inside a labeled token are the model's call, not the bridge's; only gaps are
|
|
35
|
+
* policed.
|
|
36
|
+
*/
|
|
37
|
+
blockedSpans?: ReadonlyArray<{
|
|
38
|
+
start: number;
|
|
39
|
+
end: number;
|
|
40
|
+
}>;
|
|
41
|
+
}
|
|
26
42
|
/**
|
|
27
43
|
* Merge same-label fragments separated only by punctuation gaps. Returns a new token array where
|
|
28
44
|
* the first fragment of each bridged group is widened to the group's full char range (so span
|
|
29
|
-
* extraction reads the raw text straight through the punctuation), and later fragments are
|
|
30
|
-
*
|
|
45
|
+
* extraction reads the raw text straight through the punctuation), and later fragments are dropped.
|
|
46
|
+
* Labels, ordering, and all non-bridged tokens are untouched.
|
|
31
47
|
*/
|
|
32
|
-
export declare function bridgePunctuationGaps(text: string, input: readonly DecoderToken[]): DecoderToken[];
|
|
48
|
+
export declare function bridgePunctuationGaps(text: string, input: readonly DecoderToken[], opts?: BridgePunctuationOpts): DecoderToken[];
|
|
33
49
|
//# sourceMappingURL=span-bridge.d.ts.map
|
package/out/span-bridge.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"span-bridge.d.ts","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;
|
|
1
|
+
{"version":3,"file":"span-bridge.d.ts","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAA;AAiB3D,iDAAiD;AACjD,MAAM,WAAW,qBAAqB;IACrC;;;;;;;;OAQG;IACH,YAAY,CAAC,EAAE,aAAa,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC5D;AAiBD;;;;;GAKG;AACH,wBAAgB,qBAAqB,CACpC,IAAI,EAAE,MAAM,EACZ,KAAK,EAAE,SAAS,YAAY,EAAE,EAC9B,IAAI,CAAC,EAAE,qBAAqB,GAC1B,YAAY,EAAE,CAqChB"}
|
package/out/span-bridge.js
CHANGED
|
@@ -8,27 +8,28 @@
|
|
|
8
8
|
*
|
|
9
9
|
* The corpus alignment tokenizer drops standalone punctuation (corpus/src/tokenize.ts), so NO
|
|
10
10
|
* training row can label the periods inside "P.O. Box" — the model learns the tag perfectly
|
|
11
|
-
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
11
|
+
* (every letter piece at 0.93+ confidence) but emits it as fragments split at each dot, and span
|
|
12
|
+
* assembly surfaces only the first fragment ("p"). Measured on the v1.3.0 gate: dotted po_box
|
|
13
|
+
* leaders failed 98%, ALL truncations, while plain leaders passed — a structural expressivity
|
|
14
|
+
* limit of the label format, not a learning failure.
|
|
15
15
|
*
|
|
16
|
-
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
16
|
+
* The fix is deterministic: AFTER decode, merge adjacent same-label spans whose gap consists only
|
|
17
|
+
* of punctuation/whitespace, contains at least one non-space character, and is short (≤ 3 chars).
|
|
18
|
+
* The non-space requirement is load-bearing — space-only gaps ("Saint Paul" as two locality
|
|
19
|
+
* spans) are NOT bridged, because a space between two same-tag spans is often a real boundary
|
|
20
|
+
* (the Saint-Albans fragmentation wants this fix too, but it must come with its own evidence;
|
|
21
|
+
* this pass stays conservative by construction).
|
|
22
22
|
*
|
|
23
23
|
* Runs beside the postcode/unit repair passes in the classifier, before tree-building.
|
|
24
24
|
*/
|
|
25
25
|
/**
|
|
26
26
|
* Gap text qualifies when short, made only of INTRA-TOKEN punctuation (period/hyphen/slash/
|
|
27
27
|
* apostrophe) plus whitespace, with at least one non-space char. Separator punctuation (comma,
|
|
28
|
-
* semicolon) is EXCLUDED — measured 2026-06-11: the comma form merged "47110, 9016"-style
|
|
29
|
-
*
|
|
30
|
-
*
|
|
31
|
-
*
|
|
28
|
+
* semicolon) is EXCLUDED — measured 2026-06-11: the comma form merged "47110, 9016"-style postcode
|
|
29
|
+
*
|
|
30
|
+
* - House-number fragments on six FR golden rows (the model double-labels the number; the comma is
|
|
31
|
+
* the only thing keeping the spans honest). A comma between same-tag spans is a list/separator,
|
|
32
|
+
* never the inside of a surface form.
|
|
32
33
|
*/
|
|
33
34
|
function bridgeable(gap) {
|
|
34
35
|
if (gap.length === 0 || gap.length > 3)
|
|
@@ -37,13 +38,26 @@ function bridgeable(gap) {
|
|
|
37
38
|
return false;
|
|
38
39
|
return /[^\s]/.test(gap);
|
|
39
40
|
}
|
|
41
|
+
/** True when a structural boundary falls inside the closed gap interval `[gapStart, gapEnd]`. */
|
|
42
|
+
function crossesBlockedBoundary(gapStart, gapEnd, blockedSpans) {
|
|
43
|
+
if (!blockedSpans)
|
|
44
|
+
return false;
|
|
45
|
+
for (const span of blockedSpans) {
|
|
46
|
+
// span.start = opening delimiter index; span.end = one past the closing delimiter.
|
|
47
|
+
if (span.start >= gapStart && span.start <= gapEnd)
|
|
48
|
+
return true;
|
|
49
|
+
if (span.end - 1 >= gapStart && span.end - 1 <= gapEnd)
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
40
54
|
/**
|
|
41
55
|
* Merge same-label fragments separated only by punctuation gaps. Returns a new token array where
|
|
42
56
|
* the first fragment of each bridged group is widened to the group's full char range (so span
|
|
43
|
-
* extraction reads the raw text straight through the punctuation), and later fragments are
|
|
44
|
-
*
|
|
57
|
+
* extraction reads the raw text straight through the punctuation), and later fragments are dropped.
|
|
58
|
+
* Labels, ordering, and all non-bridged tokens are untouched.
|
|
45
59
|
*/
|
|
46
|
-
export function bridgePunctuationGaps(text, input) {
|
|
60
|
+
export function bridgePunctuationGaps(text, input, opts) {
|
|
47
61
|
const out = [];
|
|
48
62
|
for (const token of input) {
|
|
49
63
|
if (token.label !== "O") {
|
|
@@ -62,7 +76,8 @@ export function bridgePunctuationGaps(text, input) {
|
|
|
62
76
|
prevTag === tag &&
|
|
63
77
|
token.start >= prev.end &&
|
|
64
78
|
skippedInsideGap &&
|
|
65
|
-
bridgeable(text.slice(prev.end, token.start))
|
|
79
|
+
bridgeable(text.slice(prev.end, token.start)) &&
|
|
80
|
+
!crossesBlockedBoundary(prev.end, token.start, opts?.blockedSpans)) {
|
|
66
81
|
// Widen the previous fragment through the gap (absorbing the punctuation O tokens);
|
|
67
82
|
// keep the lower confidence so the merged span never overstates its weakest piece.
|
|
68
83
|
out.length = back + 1;
|
package/out/span-bridge.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"span-bridge.js","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH
|
|
1
|
+
{"version":3,"file":"span-bridge.js","sourceRoot":"","sources":["../span-bridge.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAIH;;;;;;;;GAQG;AACH,SAAS,UAAU,CAAC,GAAW;IAC9B,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACpD,IAAI,CAAC,oBAAoB,CAAC,IAAI,CAAC,GAAG,CAAC;QAAE,OAAO,KAAK,CAAA;IACjD,OAAO,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AACzB,CAAC;AAgBD,iGAAiG;AACjG,SAAS,sBAAsB,CAC9B,QAAgB,EAChB,MAAc,EACd,YAAuE;IAEvE,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAA;IAC/B,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QACjC,mFAAmF;QACnF,IAAI,IAAI,CAAC,KAAK,IAAI,QAAQ,IAAI,IAAI,CAAC,KAAK,IAAI,MAAM;YAAE,OAAO,IAAI,CAAA;QAC/D,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,QAAQ,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,IAAI,MAAM;YAAE,OAAO,IAAI,CAAA;IACpE,CAAC;IACD,OAAO,KAAK,CAAA;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,qBAAqB,CACpC,IAAY,EACZ,KAA8B,EAC9B,IAA4B;IAE5B,MAAM,GAAG,GAAmB,EAAE,CAAA;IAC9B,KAAK,MAAM,KAAK,IAAI,KAAK,EAAE,CAAC;QAC3B,IAAI,KAAK,CAAC,KAAK,KAAK,GAAG,EAAE,CAAC;YACzB,wFAAwF;YACxF,oEAAoE;YACpE,IAAI,IAAI,GAAG,GAAG,CAAC,MAAM,GAAG,CAAC,CAAA;YACzB,OAAO,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,CAAE,CAAC,KAAK,KAAK,GAAG,IAAI,GAAG,CAAC,IAAI,CAAE,CAAC,KAAK,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,CAAC;gBAAE,IAAI,EAAE,CAAA;YACrG,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,CAAC,CAAC,SAAS,CAAA;YAC/C,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;YAC7C,MAAM,OAAO,GAAG,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAA;YACjD,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAA;YACnC,MAAM,gBAAgB,GAAG,IAAI,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,CAAA;YAChH,IACC,IAAI;gBACJ,IAAI,CAAC,KAAK,KAAK,GAAG;gBAClB,OAAO,KAAK,GAAG;gBACf,KAAK,CAAC,KAAK,IAAI,IAAI,CAAC,GAAG;gBACvB,gBAAgB;gBAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;gBAC7C,CAAC,sBAAsB,CAAC,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,EAAE,IAAI,EAAE,YAAY,CAAC,EACjE,CAAC;gBACF,oFAAoF;gBACpF,mFAAmF;gBACnF,GAAG,CAAC,MAAM,GAAG,IAAI,GAAG,CAAC,CAAA;gBACrB,GAAG,CAAC,IAAI,CAAC,GAAG;oBACX,GAAG,IAAI;oBACP,GAAG,EAAE,KAAK,CAAC,GAAG;oBACd,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC;oBACxC,UAAU,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC;iBACvD,CAAA;gBACD,SAAQ;YACT,CAAC;QACF,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IAChB,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Emission priors from Stage 2.7 span proposals — the consumption half of M2+M3 (the phrase-prior
|
|
7
|
+
* path the sub-premise direction note names: "consumed as phrase priors today — the classifier
|
|
8
|
+
* conditions on the boundary hypothesis and can still disagree").
|
|
9
|
+
*
|
|
10
|
+
* Same contract as `query-shape-prior.ts`: an additive `[seqLen][numLabels]` log-bias matrix
|
|
11
|
+
* composed onto the encoder emissions before Viterbi. Soft by construction — a confident encoder
|
|
12
|
+
* wins; an uncertain one gets pulled toward the proposal's reading. Dual-path alternatives (M3)
|
|
13
|
+
* simply contribute competing biases at their respective confidences; the CRF resolves them
|
|
14
|
+
* against the model's own evidence, which IS the deferred decision the survey prescribes.
|
|
15
|
+
*
|
|
16
|
+
* Mapping:
|
|
17
|
+
*
|
|
18
|
+
* - `ANNOTATION_SPAN` → bias toward `O` (gold convention 2: bracketed asides are not components; the
|
|
19
|
+
* win is that neighbors stop being poisoned). Applied only above a confidence floor so
|
|
20
|
+
* trailing component-shaped groups ("(Australia)") are left to the model.
|
|
21
|
+
* - `QUOTED_SPAN` → no bias. The content is a real name; typing it is the classifier's job. The span
|
|
22
|
+
* still matters decode-side (the bridge's crossing constraint).
|
|
23
|
+
* - `PO_BOX_PHRASE` → `po_box`; `UNIT_PHRASE`/`LEVEL_PHRASE`/`SPLIT_UNIT` → `unit` (the schema has no
|
|
24
|
+
* level tag — levels ride `unit` until the codex level sourcing lands, #517);
|
|
25
|
+
* `SPLIT_HOUSE_NUMBER`/`FUSED_NUMBER` → `house_number`. B- on the first overlapping piece, I-
|
|
26
|
+
* on the rest.
|
|
27
|
+
*/
|
|
28
|
+
import type { ProposedSpan } from "@mailwoman/core/pipeline";
|
|
29
|
+
import type { TokenLike } from "./query-shape-prior.js";
|
|
30
|
+
export interface SpanProposalPriorOpts {
|
|
31
|
+
/**
|
|
32
|
+
* Bias magnitude for tag-mapped proposals, in log-odds units. Confidence-scaled. Default 5.0 —
|
|
33
|
+
* measured on the punctuation-stress sweep (2026-06-12, v4.4.0 int8): the proposer's job is to
|
|
34
|
+
* flip CONFIDENTLY-wrong emissions (fused `2/14` → split), which 1-2-nat query-shape-style scales
|
|
35
|
+
* cannot reach; 5.0 moved slash +11.1 with every other class flat, and the model still vetoes
|
|
36
|
+
* where its logit gap is larger (the bare `3/45` row stays fused).
|
|
37
|
+
*/
|
|
38
|
+
biasScale?: number;
|
|
39
|
+
/**
|
|
40
|
+
* Bias magnitude for the annotation O-prior. Confidence-scaled. Default 12.0 — deliberately
|
|
41
|
+
* near-mask strength (measured saturation point on the same sweep: bracketed +9.1, paren
|
|
42
|
+
* regressions zero): a BALANCED bracket pair with aside-shaped content is the strongest
|
|
43
|
+
* structural cue the proposer has, and the confidence floor (not the scale) is what protects the
|
|
44
|
+
* component-shaped groups.
|
|
45
|
+
*/
|
|
46
|
+
annotationBiasScale?: number;
|
|
47
|
+
/**
|
|
48
|
+
* Annotation proposals below this confidence contribute NO O-bias (their span still feeds the
|
|
49
|
+
* decode-side crossing constraint). Default 0.6 — above the trailing-component shape (0.45),
|
|
50
|
+
* below the capitalized mid-string aside (0.75).
|
|
51
|
+
*/
|
|
52
|
+
annotationConfidenceFloor?: number;
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Build the additive prior matrix for one parse. Returns all-zeros rows for pieces no proposal
|
|
56
|
+
* covers — composes harmlessly via `addEmissionMatrix`.
|
|
57
|
+
*/
|
|
58
|
+
export declare function buildSpanProposalPriors(proposals: ReadonlyArray<ProposedSpan>, tokens: ReadonlyArray<TokenLike>, labels: ReadonlyArray<string>, opts?: SpanProposalPriorOpts): number[][];
|
|
59
|
+
//# sourceMappingURL=span-proposal-prior.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-proposal-prior.d.ts","sourceRoot":"","sources":["../span-proposal-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAA;AAE5D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAEvD,MAAM,WAAW,qBAAqB;IACrC;;;;;;OAMG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB;;;;;;OAMG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;IAC5B;;;;OAIG;IACH,yBAAyB,CAAC,EAAE,MAAM,CAAA;CAClC;AAWD;;;GAGG;AACH,wBAAgB,uBAAuB,CACtC,SAAS,EAAE,aAAa,CAAC,YAAY,CAAC,EACtC,MAAM,EAAE,aAAa,CAAC,SAAS,CAAC,EAChC,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,EAC7B,IAAI,GAAE,qBAA0B,GAC9B,MAAM,EAAE,EAAE,CA6CZ"}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Emission priors from Stage 2.7 span proposals — the consumption half of M2+M3 (the phrase-prior
|
|
7
|
+
* path the sub-premise direction note names: "consumed as phrase priors today — the classifier
|
|
8
|
+
* conditions on the boundary hypothesis and can still disagree").
|
|
9
|
+
*
|
|
10
|
+
* Same contract as `query-shape-prior.ts`: an additive `[seqLen][numLabels]` log-bias matrix
|
|
11
|
+
* composed onto the encoder emissions before Viterbi. Soft by construction — a confident encoder
|
|
12
|
+
* wins; an uncertain one gets pulled toward the proposal's reading. Dual-path alternatives (M3)
|
|
13
|
+
* simply contribute competing biases at their respective confidences; the CRF resolves them
|
|
14
|
+
* against the model's own evidence, which IS the deferred decision the survey prescribes.
|
|
15
|
+
*
|
|
16
|
+
* Mapping:
|
|
17
|
+
*
|
|
18
|
+
* - `ANNOTATION_SPAN` → bias toward `O` (gold convention 2: bracketed asides are not components; the
|
|
19
|
+
* win is that neighbors stop being poisoned). Applied only above a confidence floor so
|
|
20
|
+
* trailing component-shaped groups ("(Australia)") are left to the model.
|
|
21
|
+
* - `QUOTED_SPAN` → no bias. The content is a real name; typing it is the classifier's job. The span
|
|
22
|
+
* still matters decode-side (the bridge's crossing constraint).
|
|
23
|
+
* - `PO_BOX_PHRASE` → `po_box`; `UNIT_PHRASE`/`LEVEL_PHRASE`/`SPLIT_UNIT` → `unit` (the schema has no
|
|
24
|
+
* level tag — levels ride `unit` until the codex level sourcing lands, #517);
|
|
25
|
+
* `SPLIT_HOUSE_NUMBER`/`FUSED_NUMBER` → `house_number`. B- on the first overlapping piece, I-
|
|
26
|
+
* on the rest.
|
|
27
|
+
*/
|
|
28
|
+
const KIND_TO_TAG = new Map([
|
|
29
|
+
["PO_BOX_PHRASE", "po_box"],
|
|
30
|
+
["UNIT_PHRASE", "unit"],
|
|
31
|
+
["LEVEL_PHRASE", "unit"],
|
|
32
|
+
["SPLIT_UNIT", "unit"],
|
|
33
|
+
["SPLIT_HOUSE_NUMBER", "house_number"],
|
|
34
|
+
["FUSED_NUMBER", "house_number"],
|
|
35
|
+
]);
|
|
36
|
+
/**
|
|
37
|
+
* Build the additive prior matrix for one parse. Returns all-zeros rows for pieces no proposal
|
|
38
|
+
* covers — composes harmlessly via `addEmissionMatrix`.
|
|
39
|
+
*/
|
|
40
|
+
export function buildSpanProposalPriors(proposals, tokens, labels, opts = {}) {
|
|
41
|
+
const T = tokens.length;
|
|
42
|
+
const L = labels.length;
|
|
43
|
+
const biasScale = opts.biasScale ?? 5.0;
|
|
44
|
+
const annotationBiasScale = opts.annotationBiasScale ?? 12.0;
|
|
45
|
+
const annotationFloor = opts.annotationConfidenceFloor ?? 0.6;
|
|
46
|
+
const matrix = [];
|
|
47
|
+
for (let t = 0; t < T; t++)
|
|
48
|
+
matrix.push(new Array(L).fill(0));
|
|
49
|
+
if (proposals.length === 0)
|
|
50
|
+
return matrix;
|
|
51
|
+
const labelToCol = new Map();
|
|
52
|
+
for (let k = 0; k < labels.length; k++)
|
|
53
|
+
labelToCol.set(labels[k], k);
|
|
54
|
+
const oCol = labelToCol.get("O");
|
|
55
|
+
for (const proposal of proposals) {
|
|
56
|
+
if (proposal.kind === "QUOTED_SPAN")
|
|
57
|
+
continue;
|
|
58
|
+
if (proposal.kind === "ANNOTATION_SPAN") {
|
|
59
|
+
if (oCol === undefined || proposal.confidence < annotationFloor)
|
|
60
|
+
continue;
|
|
61
|
+
const bias = proposal.confidence * annotationBiasScale;
|
|
62
|
+
for (let t = 0; t < T; t++) {
|
|
63
|
+
if (overlaps(tokens[t], proposal)) {
|
|
64
|
+
matrix[t][oCol] = Math.max(matrix[t][oCol], bias);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
const tag = KIND_TO_TAG.get(proposal.kind);
|
|
70
|
+
if (!tag)
|
|
71
|
+
continue;
|
|
72
|
+
const bCol = labelToCol.get(`B-${tag}`);
|
|
73
|
+
const iCol = labelToCol.get(`I-${tag}`);
|
|
74
|
+
if (bCol === undefined)
|
|
75
|
+
continue;
|
|
76
|
+
const bias = proposal.confidence * biasScale;
|
|
77
|
+
let first = true;
|
|
78
|
+
for (let t = 0; t < T; t++) {
|
|
79
|
+
if (!overlaps(tokens[t], proposal))
|
|
80
|
+
continue;
|
|
81
|
+
const col = first ? bCol : iCol;
|
|
82
|
+
first = false;
|
|
83
|
+
if (col === undefined)
|
|
84
|
+
continue;
|
|
85
|
+
matrix[t][col] = Math.max(matrix[t][col], bias);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return matrix;
|
|
89
|
+
}
|
|
90
|
+
function overlaps(a, b) {
|
|
91
|
+
return a.start < b.end && b.start < a.end;
|
|
92
|
+
}
|
|
93
|
+
//# sourceMappingURL=span-proposal-prior.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-proposal-prior.js","sourceRoot":"","sources":["../span-proposal-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AA+BH,MAAM,WAAW,GAAgC,IAAI,GAAG,CAAC;IACxD,CAAC,eAAe,EAAE,QAAQ,CAAC;IAC3B,CAAC,aAAa,EAAE,MAAM,CAAC;IACvB,CAAC,cAAc,EAAE,MAAM,CAAC;IACxB,CAAC,YAAY,EAAE,MAAM,CAAC;IACtB,CAAC,oBAAoB,EAAE,cAAc,CAAC;IACtC,CAAC,cAAc,EAAE,cAAc,CAAC;CAChC,CAAC,CAAA;AAEF;;;GAGG;AACH,MAAM,UAAU,uBAAuB,CACtC,SAAsC,EACtC,MAAgC,EAChC,MAA6B,EAC7B,OAA8B,EAAE;IAEhC,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAA;IACvC,MAAM,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAA;IAC5D,MAAM,eAAe,GAAG,IAAI,CAAC,yBAAyB,IAAI,GAAG,CAAA;IAE7D,MAAM,MAAM,GAAe,EAAE,CAAA;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IACrE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAEzC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAA;IACrE,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;IAEhC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAClC,IAAI,QAAQ,CAAC,IAAI,KAAK,aAAa;YAAE,SAAQ;QAE7C,IAAI,QAAQ,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;YACzC,IAAI,IAAI,KAAK,SAAS,IAAI,QAAQ,CAAC,UAAU,GAAG,eAAe;gBAAE,SAAQ;YACzE,MAAM,IAAI,GAAG,QAAQ,CAAC,UAAU,GAAG,mBAAmB,CAAA;YACtD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,QAAQ,CAAC,EAAE,CAAC;oBACpC,MAAM,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,IAAI,CAAE,EAAE,IAAI,CAAC,CAAA;gBACrD,CAAC;YACF,CAAC;YACD,SAAQ;QACT,CAAC;QAED,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAA;QAC1C,IAAI,CAAC,GAAG;YAAE,SAAQ;QAClB,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;QACvC,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,KAAK,GAAG,EAAE,CAAC,CAAA;QACvC,IAAI,IAAI,KAAK,SAAS;YAAE,SAAQ;QAChC,MAAM,IAAI,GAAG,QAAQ,CAAC,UAAU,GAAG,SAAS,CAAA;QAC5C,IAAI,KAAK,GAAG,IAAI,CAAA;QAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,QAAQ,CAAC;gBAAE,SAAQ;YAC7C,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAA;YAC/B,KAAK,GAAG,KAAK,CAAA;YACb,IAAI,GAAG,KAAK,SAAS;gBAAE,SAAQ;YAC/B,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,CAAE,EAAE,IAAI,CAAC,CAAA;QACnD,CAAC;IACF,CAAC;IACD,OAAO,MAAM,CAAA;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,CAAiC,EAAE,CAAiC;IACrF,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;AAC1C,CAAC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Codex-backed lexicon for the Stage 2.7 span proposer (`@mailwoman/core/pipeline`'s
|
|
7
|
+
* `proposeSpans`). Core stays codex-free; this module assembles the proposer's designator
|
|
8
|
+
* vocabulary from the provenance-tracked `@mailwoman/codex` tables — USPS Pub-28 C2 secondary
|
|
9
|
+
* unit designators, USPS PO-box designators, Australia Post AMAS delivery types, NZ Post ADV358
|
|
10
|
+
* delivery-service types. Which systems are loaded conditions the proposer's locale-dependent
|
|
11
|
+
* readings (the AU/NZ `Flat 2/14` split exists only when those tables are present).
|
|
12
|
+
*
|
|
13
|
+
* No entry here is hand-invented (the no-load-bearing-trivia rule): every token/pattern derives
|
|
14
|
+
* from a codex table row. AU `MS` (Mail Service) and the identifier-less counter types (CARE PO,
|
|
15
|
+
* CMA, CPA, Counter Delivery, Poste Restante) are excluded from the mid-text SCAN regex — a bare
|
|
16
|
+
* two-letter designator with no required number is exactly the false-positive shape ("Ms Smith")
|
|
17
|
+
* the AU matcher special-cases; the scan keeps only number-carrying forms.
|
|
18
|
+
*/
|
|
19
|
+
import { type SystemCode } from "@mailwoman/codex";
|
|
20
|
+
import type { SpanProposerLexicon } from "@mailwoman/core/pipeline";
|
|
21
|
+
/**
|
|
22
|
+
* Build the span-proposer lexicon from the codex tables of the requested systems. Defaults to every
|
|
23
|
+
* system with designator tables in the codex today. The result is pure data — safe to share across
|
|
24
|
+
* parses.
|
|
25
|
+
*/
|
|
26
|
+
export declare function buildCodexSpanLexicon(systems?: readonly SystemCode[]): SpanProposerLexicon;
|
|
27
|
+
//# sourceMappingURL=span-proposer-lexicon.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-proposer-lexicon.d.ts","sourceRoot":"","sources":["../span-proposer-lexicon.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAAc,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC9D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAA;AAoDnE;;;;GAIG;AACH,wBAAgB,qBAAqB,CAAC,OAAO,GAAE,SAAS,UAAU,EAAuB,GAAG,mBAAmB,CAgD9G"}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @copyright Sister Software
|
|
3
|
+
* @license AGPL-3.0
|
|
4
|
+
* @author Teffen Ellis, et al.
|
|
5
|
+
*
|
|
6
|
+
* Codex-backed lexicon for the Stage 2.7 span proposer (`@mailwoman/core/pipeline`'s
|
|
7
|
+
* `proposeSpans`). Core stays codex-free; this module assembles the proposer's designator
|
|
8
|
+
* vocabulary from the provenance-tracked `@mailwoman/codex` tables — USPS Pub-28 C2 secondary
|
|
9
|
+
* unit designators, USPS PO-box designators, Australia Post AMAS delivery types, NZ Post ADV358
|
|
10
|
+
* delivery-service types. Which systems are loaded conditions the proposer's locale-dependent
|
|
11
|
+
* readings (the AU/NZ `Flat 2/14` split exists only when those tables are present).
|
|
12
|
+
*
|
|
13
|
+
* No entry here is hand-invented (the no-load-bearing-trivia rule): every token/pattern derives
|
|
14
|
+
* from a codex table row. AU `MS` (Mail Service) and the identifier-less counter types (CARE PO,
|
|
15
|
+
* CMA, CPA, Counter Delivery, Poste Restante) are excluded from the mid-text SCAN regex — a bare
|
|
16
|
+
* two-letter designator with no required number is exactly the false-positive shape ("Ms Smith")
|
|
17
|
+
* the AU matcher special-cases; the scan keeps only number-carrying forms.
|
|
18
|
+
*/
|
|
19
|
+
import { au, nz, us } from "@mailwoman/codex";
|
|
20
|
+
/**
|
|
21
|
+
* USPS Pub-28 C2 canonicals whose designator is DESCRIPTIVE rather than addressing ("Building A"
|
|
22
|
+
* describes the building; "Suite 9" addresses a unit). Inside a bracketed group, these read as
|
|
23
|
+
* annotation content (gold convention 2 of the punctuation-stress eval).
|
|
24
|
+
*/
|
|
25
|
+
const WEAK_CANONICALS = new Set([
|
|
26
|
+
"BUILDING",
|
|
27
|
+
"FRONT",
|
|
28
|
+
"REAR",
|
|
29
|
+
"SIDE",
|
|
30
|
+
"UPPER",
|
|
31
|
+
"LOWER",
|
|
32
|
+
"KEY",
|
|
33
|
+
"STOP",
|
|
34
|
+
]);
|
|
35
|
+
/** USPS canonicals that name a LEVEL of the building rather than a numbered unit on it. */
|
|
36
|
+
const LEVEL_CANONICALS = new Set(["FLOOR", "BASEMENT", "PENTHOUSE", "LOBBY"]);
|
|
37
|
+
/** AU/NZ delivery types excluded from the scan regex (no required number / two-letter ambiguity). */
|
|
38
|
+
const SCAN_EXCLUDED_DELIVERY = new Set([
|
|
39
|
+
"MS",
|
|
40
|
+
"CARE PO",
|
|
41
|
+
"CMA",
|
|
42
|
+
"CPA",
|
|
43
|
+
"Counter Delivery",
|
|
44
|
+
"Poste Restante",
|
|
45
|
+
]);
|
|
46
|
+
/**
|
|
47
|
+
* Convert one designator phrase from a codex table into a scan-pattern fragment. Short alphabetic
|
|
48
|
+
* words (≤ 3 chars: "PO", "GPO", "RMB") are treated as initialisms with optional periods/spacing —
|
|
49
|
+
* the punctuation AMAS tells mailers to strip but deliverable mail still carries ("P.O. Box",
|
|
50
|
+
* "R.M.B 4600"). Longer words match literally with flexible whitespace.
|
|
51
|
+
*/
|
|
52
|
+
function phraseToPattern(phrase) {
|
|
53
|
+
return phrase
|
|
54
|
+
.trim()
|
|
55
|
+
.split(/\s+/)
|
|
56
|
+
.map((word) => /^[A-Za-z]{1,3}$/.test(word) && word.toLowerCase() !== "box" && word.toLowerCase() !== "bag"
|
|
57
|
+
? word
|
|
58
|
+
.split("")
|
|
59
|
+
.map((ch) => `${ch}\\.?`)
|
|
60
|
+
.join("\\s*")
|
|
61
|
+
: word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"))
|
|
62
|
+
.join("\\s+");
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Build the span-proposer lexicon from the codex tables of the requested systems. Defaults to every
|
|
66
|
+
* system with designator tables in the codex today. The result is pure data — safe to share across
|
|
67
|
+
* parses.
|
|
68
|
+
*/
|
|
69
|
+
export function buildCodexSpanLexicon(systems = ["us", "au", "nz"]) {
|
|
70
|
+
const sys = new Set(systems);
|
|
71
|
+
const unitDesignators = new Set();
|
|
72
|
+
const levelDesignators = new Set();
|
|
73
|
+
const weakDesignators = new Set();
|
|
74
|
+
const deliveryPhrases = new Set();
|
|
75
|
+
if (sys.has("us")) {
|
|
76
|
+
for (const canonical of Object.keys(us.US_UNIT_DESIGNATOR_VARIANTS)) {
|
|
77
|
+
const variants = [canonical, ...us.US_UNIT_DESIGNATOR_VARIANTS[canonical]];
|
|
78
|
+
const target = LEVEL_CANONICALS.has(canonical) ? levelDesignators : unitDesignators;
|
|
79
|
+
for (const v of variants) {
|
|
80
|
+
target.add(v.toLowerCase());
|
|
81
|
+
if (WEAK_CANONICALS.has(canonical))
|
|
82
|
+
weakDesignators.add(v.toLowerCase());
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
for (const phrase of us.US_PO_BOX_DESIGNATORS)
|
|
86
|
+
deliveryPhrases.add(phrase);
|
|
87
|
+
}
|
|
88
|
+
if (sys.has("au")) {
|
|
89
|
+
for (const row of au.AU_DELIVERY_SERVICE_DESIGNATORS) {
|
|
90
|
+
if (SCAN_EXCLUDED_DELIVERY.has(row.abbreviation))
|
|
91
|
+
continue;
|
|
92
|
+
if (!row.requiresNumber)
|
|
93
|
+
continue;
|
|
94
|
+
deliveryPhrases.add(row.abbreviation);
|
|
95
|
+
deliveryPhrases.add(row.name);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (sys.has("nz")) {
|
|
99
|
+
for (const row of nz.NZ_DELIVERY_SERVICE_TYPES) {
|
|
100
|
+
if (SCAN_EXCLUDED_DELIVERY.has(row.type))
|
|
101
|
+
continue;
|
|
102
|
+
if (row.identifier === "not-used")
|
|
103
|
+
continue;
|
|
104
|
+
deliveryPhrases.add(row.type);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
// Longest-first so "GPO Box" beats "Box", "Private Bag" beats "Bag".
|
|
108
|
+
const alternatives = [...deliveryPhrases].sort((a, b) => b.length - a.length).map(phraseToPattern);
|
|
109
|
+
const deliveryService = alternatives.length > 0
|
|
110
|
+
? new RegExp(String.raw `\b(?:${alternatives.join("|")})\s*#?\s*([A-Za-z]?\d[\dA-Za-z-]*)\b`, "gi")
|
|
111
|
+
: undefined;
|
|
112
|
+
return {
|
|
113
|
+
systems: sys,
|
|
114
|
+
unitDesignators,
|
|
115
|
+
levelDesignators,
|
|
116
|
+
weakDesignators,
|
|
117
|
+
...(deliveryService ? { deliveryService } : {}),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
//# sourceMappingURL=span-proposer-lexicon.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"span-proposer-lexicon.js","sourceRoot":"","sources":["../span-proposer-lexicon.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAEH,OAAO,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE,EAAmB,MAAM,kBAAkB,CAAA;AAG9D;;;;GAIG;AACH,MAAM,eAAe,GAAwB,IAAI,GAAG,CAAC;IACpD,UAAU;IACV,OAAO;IACP,MAAM;IACN,MAAM;IACN,OAAO;IACP,OAAO;IACP,KAAK;IACL,MAAM;CACN,CAAC,CAAA;AAEF,2FAA2F;AAC3F,MAAM,gBAAgB,GAAwB,IAAI,GAAG,CAAC,CAAC,OAAO,EAAE,UAAU,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC,CAAA;AAElG,qGAAqG;AACrG,MAAM,sBAAsB,GAAwB,IAAI,GAAG,CAAC;IAC3D,IAAI;IACJ,SAAS;IACT,KAAK;IACL,KAAK;IACL,kBAAkB;IAClB,gBAAgB;CAChB,CAAC,CAAA;AAEF;;;;;GAKG;AACH,SAAS,eAAe,CAAC,MAAc;IACtC,OAAO,MAAM;SACX,IAAI,EAAE;SACN,KAAK,CAAC,KAAK,CAAC;SACZ,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CACb,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,IAAI,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK;QAC3F,CAAC,CAAC,IAAI;aACH,KAAK,CAAC,EAAE,CAAC;aACT,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,MAAM,CAAC;aACxB,IAAI,CAAC,MAAM,CAAC;QACf,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAC9C;SACA,IAAI,CAAC,MAAM,CAAC,CAAA;AACf,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,UAAiC,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;IACxF,MAAM,GAAG,GAAG,IAAI,GAAG,CAAS,OAAO,CAAC,CAAA;IACpC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAA;IACzC,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAA;IAC1C,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAA;IACzC,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAA;IAEzC,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,2BAA2B,CAAC,EAAE,CAAC;YACrE,MAAM,QAAQ,GAAG,CAAC,SAAS,EAAE,GAAG,EAAE,CAAC,2BAA2B,CAAC,SAAgC,CAAC,CAAC,CAAA;YACjG,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,eAAe,CAAA;YACnF,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;gBAC1B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAA;gBAC3B,IAAI,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC;oBAAE,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAA;YACzE,CAAC;QACF,CAAC;QACD,KAAK,MAAM,MAAM,IAAI,EAAE,CAAC,qBAAqB;YAAE,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;IAC3E,CAAC;IACD,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,KAAK,MAAM,GAAG,IAAI,EAAE,CAAC,+BAA+B,EAAE,CAAC;YACtD,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC;gBAAE,SAAQ;YAC1D,IAAI,CAAC,GAAG,CAAC,cAAc;gBAAE,SAAQ;YACjC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,YAAY,CAAC,CAAA;YACrC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,CAAC;IACF,CAAC;IACD,IAAI,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;QACnB,KAAK,MAAM,GAAG,IAAI,EAAE,CAAC,yBAAyB,EAAE,CAAC;YAChD,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC;gBAAE,SAAQ;YAClD,IAAI,GAAG,CAAC,UAAU,KAAK,UAAU;gBAAE,SAAQ;YAC3C,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAA;QAC9B,CAAC;IACF,CAAC;IAED,qEAAqE;IACrE,MAAM,YAAY,GAAG,CAAC,GAAG,eAAe,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,eAAe,CAAC,CAAA;IAClG,MAAM,eAAe,GACpB,YAAY,CAAC,MAAM,GAAG,CAAC;QACtB,CAAC,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAA,QAAQ,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,sCAAsC,EAAE,IAAI,CAAC;QAClG,CAAC,CAAC,SAAS,CAAA;IAEb,OAAO;QACN,OAAO,EAAE,GAAG;QACZ,eAAe;QACf,gBAAgB;QAChB,eAAe;QACf,GAAG,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC/C,CAAA;AACF,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mailwoman/neural",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.1",
|
|
4
4
|
"description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
|
|
5
5
|
"license": "AGPL-3.0-only",
|
|
6
6
|
"repository": {
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
"./browser": "./out/browser.js"
|
|
21
21
|
},
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"@mailwoman/codex": "4.
|
|
24
|
-
"@mailwoman/core": "4.
|
|
23
|
+
"@mailwoman/codex": "4.5.1",
|
|
24
|
+
"@mailwoman/core": "4.5.1",
|
|
25
25
|
"@sctg/sentencepiece-js": "^1.3.3",
|
|
26
26
|
"onnxruntime-node": "^1.26.0"
|
|
27
27
|
},
|