@mailwoman/neural 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Browser-safe re-export surface. Excludes `./onnx-runner.js` + `./weights.js` (Node-only — they
7
+ * statically reference `onnxruntime-node` + `node:fs`), the dynamic `loadFromWeights` /
8
+ * `loadFromFile` paths from those modules guard the corresponding imports with `webpackIgnore` so
9
+ * Node callers still get them via the main `@mailwoman/neural` entry without bundling them into a
10
+ * browser graph.
11
+ */
12
+ export * from "./classifier.js";
13
+ export * from "./labels.js";
14
+ export * from "./tokenizer.js";
15
+ export type { InferResult } from "./onnx-runner.js";
16
+ //# sourceMappingURL=browser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser.d.ts","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA;AAG9B,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA"}
package/out/browser.js ADDED
@@ -0,0 +1,15 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Browser-safe re-export surface. Excludes `./onnx-runner.js` + `./weights.js` (Node-only — they
7
+ * statically reference `onnxruntime-node` + `node:fs`), the dynamic `loadFromWeights` /
8
+ * `loadFromFile` paths from those modules guard the corresponding imports with `webpackIgnore` so
9
+ * Node callers still get them via the main `@mailwoman/neural` entry without bundling them into a
10
+ * browser graph.
11
+ */
12
+ export * from "./classifier.js";
13
+ export * from "./labels.js";
14
+ export * from "./tokenizer.js";
15
+ //# sourceMappingURL=browser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"browser.js","sourceRoot":"","sources":["../browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,gBAAgB,CAAA"}
@@ -9,19 +9,55 @@
9
9
  *
10
10
  * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
11
  */
12
- import { type AddressTree, type ComponentTag, decodeAsXml } from "@mailwoman/core/decoder";
13
- import { OnnxRunner } from "./onnx-runner.js";
12
+ import { decodeAsXml, type AddressTree, type ComponentTag } from "@mailwoman/core/decoder";
13
+ import type { InferResult } from "./onnx-runner.js";
14
+ import { type QueryShapeLike } from "./query-shape-prior.js";
14
15
  import { MailwomanTokenizer } from "./tokenizer.js";
15
- import { type ResolveWeightsOpts } from "./weights.js";
16
+ import type { ResolveWeightsOpts } from "./weights.js";
17
+ /**
18
+ * Structural type the classifier needs from a runner. Lets callers swap the Node-side `OnnxRunner`
19
+ * for a browser-side runner (e.g. `@mailwoman/neural-web`'s `WebOnnxRunner`) without inheritance —
20
+ * the classifier only ever calls `infer(ids)`.
21
+ */
22
+ export interface NeuralRunner {
23
+ infer(tokenIds: number[]): Promise<InferResult>;
24
+ }
16
25
  export interface NeuralAddressClassifierConfig {
17
26
  tokenizer: MailwomanTokenizer;
18
- runner: OnnxRunner;
19
- /** Label vocabulary in the order the model emits them. Defaults to Stage 1 (v0.1.0/v0.2.0). */
27
+ runner: NeuralRunner;
28
+ /**
29
+ * Label vocabulary in the order the model emits them. Defaults to Stage 2 (v0.3.0). Stage 2
30
+ * strictly extends Stage 1 at the same indices, so a v0.2.0 Stage 1 model loaded with this
31
+ * default still decodes correctly — its emissions only span the first 15 entries.
32
+ */
20
33
  labels?: readonly string[];
34
+ /**
35
+ * Decoding strategy:
36
+ *
37
+ * - `"viterbi"` (default) — linear-chain CRF Viterbi with the BIO structural mask. Prevents
38
+ * orphan-`I-*` sequences. If `transitions` is provided, uses learned scores on top.
39
+ * - `"argmax"` — per-token argmax. Faster but produces structurally invalid sequences. Use only for
40
+ * debugging / comparison.
41
+ */
42
+ decode?: "viterbi" | "argmax";
43
+ /**
44
+ * Optional learned CRF transition scores. Square matrix of size `labels.length × labels.length`.
45
+ * Added on top of the structural BIO mask. Future weights releases ship this; today's v3.0.0
46
+ * weights don't, so the structural mask alone is used.
47
+ */
48
+ transitions?: number[][];
49
+ /** Optional learned start-of-sequence transition scores per label. */
50
+ startTransitions?: number[];
51
+ /** Optional learned end-of-sequence transition scores per label. */
52
+ endTransitions?: number[];
21
53
  }
22
54
  export declare class NeuralAddressClassifier {
23
55
  private readonly cfg;
24
56
  private readonly labels;
57
+ private readonly decodeMode;
58
+ private readonly transitions;
59
+ private readonly startTransitions;
60
+ private readonly endTransitions;
25
61
  constructor(cfg: NeuralAddressClassifierConfig);
26
62
  /**
27
63
  * One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
@@ -29,12 +65,50 @@ export declare class NeuralAddressClassifier {
29
65
  *
30
66
  * Resolution order: explicit paths in `opts` → `@mailwoman/neural-weights-<locale>` package →
31
67
  * throws a single actionable error.
68
+ *
69
+ * **Node-only.** The dynamic imports keep `OnnxRunner` (onnxruntime-node) + `resolveWeights`
70
+ * (uses Node fs) out of the static dependency graph, so this file can be bundled for the browser
71
+ * by `@mailwoman/neural-web`. Calling this method in a browser will throw at runtime — use
72
+ * `loadNeuralClassifierFromUrls` from `@mailwoman/neural-web` instead.
32
73
  */
33
74
  static loadFromWeights(opts?: ResolveWeightsOpts): Promise<NeuralAddressClassifier>;
34
- /** Tokenize → infer → argmax/softmax → decoder tree. */
35
- parse(text: string): Promise<AddressTree>;
36
- parseJson(text: string): Promise<Partial<Record<ComponentTag, string>>>;
37
- parseTuples(text: string): Promise<Array<[ComponentTag, string]>>;
38
- parseXml(text: string, opts?: Parameters<typeof decodeAsXml>[1]): Promise<string>;
75
+ /** Tokenize → infer → Viterbi (or argmax) → decoder tree. */
76
+ parse(text: string, opts?: ParseOpts): Promise<AddressTree>;
77
+ /**
78
+ * Like `parse`, but also returns the raw per-token logits and piece offsets needed for per-span
79
+ * logit aggregation (Option C joint-reconcile integration).
80
+ */
81
+ parseWithLogits(text: string, opts?: ParseOpts): Promise<ParseWithLogitsResult>;
82
+ parseJson(text: string, opts?: ParseOpts): Promise<Partial<Record<ComponentTag, string>>>;
83
+ parseTuples(text: string, opts?: ParseOpts): Promise<Array<[ComponentTag, string]>>;
84
+ parseXml(text: string, opts?: ParseOpts & {
85
+ xml?: Parameters<typeof decodeAsXml>[1];
86
+ }): Promise<string>;
87
+ }
88
+ /** Result of `parseWithLogits` — tree + raw material for per-span logit aggregation. */
89
+ export interface ParseWithLogitsResult {
90
+ tree: AddressTree;
91
+ logits: number[][];
92
+ pieces: Array<{
93
+ start: number;
94
+ end: number;
95
+ }>;
96
+ }
97
+ /**
98
+ * Per-call opts for `parse()`. Threading a precomputed `QueryShape` here turns on the soft-prior
99
+ * bias path in the Viterbi decoder (Stage 2.4 boundary → Stage 3 encoder integration).
100
+ */
101
+ export interface ParseOpts {
102
+ /**
103
+ * Precomputed `QueryShape` for this input (from `@mailwoman/query-shape`'s `computeQueryShape`).
104
+ * Known-format hits in the shape produce additive emission biases toward the matching BIO label.
105
+ * Typed structurally — no runtime dependency on `@mailwoman/query-shape`.
106
+ */
107
+ queryShape?: QueryShapeLike;
108
+ /**
109
+ * Maximum bias magnitude in log-odds units. Default 1.0 — adds up to ~e^1 ≈ 2.7× odds to the
110
+ * favored label. Confidence-scaled, so a 0.6-confidence format hit gets +0.6 max bias.
111
+ */
112
+ queryShapeBiasScale?: number;
39
113
  }
40
114
  //# sourceMappingURL=classifier.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,YAAY,EAKjB,WAAW,EACX,MAAM,yBAAyB,CAAA;AAEhC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,KAAK,kBAAkB,EAAkB,MAAM,cAAc,CAAA;AAEtE,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,UAAU,CAAA;IAClB,+FAA+F;IAC/F,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;CAC1B;AAED,qBAAa,uBAAuB;IAGvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IAFhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;gBAEb,GAAG,EAAE,6BAA6B;IAI/D;;;;;;OAMG;WACU,eAAe,CAAC,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAS7F,wDAAwD;IAClD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAqBzC,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIvE,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIjE,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;CAGvF"}
1
+ {"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,WAAW,EACX,KAAK,WAAW,EAChB,KAAK,YAAY,EAEjB,MAAM,yBAAyB,CAAA;AAEhC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAA;AACnD,OAAO,EAA0C,KAAK,cAAc,EAAE,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AAEnD,OAAO,KAAK,EAAE,kBAAkB,EAAmB,MAAM,cAAc,CAAA;AAEvE;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC5B,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;CAC/C;AAED,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,YAAY,CAAA;IACpB;;;;OAIG;IACH,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;IAC1B;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,SAAS,GAAG,QAAQ,CAAA;IAC7B;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,EAAE,EAAE,CAAA;IACxB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oEAAoE;IACpE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAED,qBAAa,uBAAuB;IAOvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IANhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAsB;IACjD,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAY;IACxC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAU;IAC3C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAU;gBAEZ,GAAG,EAAE,6BAA6B;IAa/D;;;;;;;;;;;OAWG;WACU,eAAe,CAAC,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAuB7F,6DAA6D;IACvD,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,WAAW,CAAC;IA8CjE;;;OAGG;IACG,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,qBAAqB,CAAC;IA6C/E,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIzF,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAInF,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,SAAS,GAAG;QAAE,GAAG,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,CAAA;KAAE,GAAG,OAAO,CAAC,MAAM,CAAC;CAG7G;AAED,wFAAwF;AACxF,MAAM,WAAW,qBAAqB;IACrC,IAAI,EAAE,WAAW,CAAA;IACjB,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,MAAM,EAAE,KAAK,CAAC;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;CAC7C;AAED;;;GAGG;AACH,MAAM,WAAW,SAAS;IACzB;;;;OAIG;IACH,UAAU,CAAC,EAAE,cAAc,CAAA;IAC3B;;;OAGG;IACH,mBAAmB,CAAC,EAAE,MAAM,CAAA;CAC5B"}
package/out/classifier.js CHANGED
@@ -10,16 +10,30 @@
10
10
  * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
11
  */
12
12
  import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
13
- import { STAGE1_BIO_LABELS } from "./labels.js";
14
- import { OnnxRunner } from "./onnx-runner.js";
13
+ import { STAGE2_BIO_LABELS } from "./labels.js";
14
+ import { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
15
15
  import { MailwomanTokenizer } from "./tokenizer.js";
16
- import { resolveWeights } from "./weights.js";
16
+ import { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, softmax, viterbi } from "./viterbi.js";
17
17
  export class NeuralAddressClassifier {
18
18
  cfg;
19
19
  labels;
20
+ decodeMode;
21
+ transitions;
22
+ startTransitions;
23
+ endTransitions;
20
24
  constructor(cfg) {
21
25
  this.cfg = cfg;
22
- this.labels = cfg.labels ?? STAGE1_BIO_LABELS;
26
+ this.labels = cfg.labels ?? STAGE2_BIO_LABELS;
27
+ this.decodeMode = cfg.decode ?? "viterbi";
28
+ const structural = buildBioTransitionMask(this.labels);
29
+ if (cfg.transitions) {
30
+ this.transitions = addMatrices(structural, cfg.transitions);
31
+ }
32
+ else {
33
+ this.transitions = structural;
34
+ }
35
+ this.startTransitions = cfg.startTransitions ?? buildBioStartMask(this.labels);
36
+ this.endTransitions = cfg.endTransitions ?? buildBioEndMask(this.labels);
23
37
  }
24
38
  /**
25
39
  * One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
@@ -27,42 +41,120 @@ export class NeuralAddressClassifier {
27
41
  *
28
42
  * Resolution order: explicit paths in `opts` → `@mailwoman/neural-weights-<locale>` package →
29
43
  * throws a single actionable error.
44
+ *
45
+ * **Node-only.** The dynamic imports keep `OnnxRunner` (onnxruntime-node) + `resolveWeights`
46
+ * (uses Node fs) out of the static dependency graph, so this file can be bundled for the browser
47
+ * by `@mailwoman/neural-web`. Calling this method in a browser will throw at runtime — use
48
+ * `loadNeuralClassifierFromUrls` from `@mailwoman/neural-web` instead.
30
49
  */
31
50
  static async loadFromWeights(opts = {}) {
32
- const { modelPath, tokenizerPath } = resolveWeights(opts);
51
+ // /* webpackIgnore: true */ tells webpack to leave the dynamic import statement intact —
52
+ // it becomes a runtime native ESM import that resolves in Node (which has onnxruntime-node
53
+ // + node:fs) and throws cleanly in a browser if called. Without the directive, webpack
54
+ // pulls onnx-runner / weights into the browser chunk graph + then chokes on the Node-only
55
+ // builtins they reference.
56
+ const [{ OnnxRunner }, { resolveWeights, readLabelsFromModelCard }] = await Promise.all([
57
+ import(/* webpackIgnore: true */ "./onnx-runner.js"),
58
+ import(/* webpackIgnore: true */ "./weights.js"),
59
+ ]);
60
+ const resolved = resolveWeights(opts);
61
+ // Read the trained label vocabulary from the bundled model-card.json when present. Falls
62
+ // through to the constructor default (STAGE2_BIO_LABELS) for legacy bundles that predate
63
+ // the `labels` field — those are always Stage 2 cards by construction, so the default is
64
+ // the correct fallback. A future Stage 3 ship will require the card to carry the field.
65
+ const labels = readLabelsFromModelCard(resolved.modelCardPath);
33
66
  const [tokenizer, runner] = await Promise.all([
34
- MailwomanTokenizer.loadFromFile(tokenizerPath),
35
- OnnxRunner.create(modelPath),
67
+ MailwomanTokenizer.loadFromFile(resolved.tokenizerPath),
68
+ OnnxRunner.create(resolved.modelPath),
36
69
  ]);
37
- return new NeuralAddressClassifier({ tokenizer, runner });
70
+ return new NeuralAddressClassifier({ tokenizer, runner, labels });
38
71
  }
39
- /** Tokenize → infer → argmax/softmax → decoder tree. */
40
- async parse(text) {
72
+ /** Tokenize → infer → Viterbi (or argmax) → decoder tree. */
73
+ async parse(text, opts) {
41
74
  if (text.length === 0)
42
75
  return { raw: text, roots: [] };
43
76
  const { pieces, ids } = this.cfg.tokenizer.encode(text);
44
77
  const { logits } = await this.cfg.runner.infer(ids);
78
+ // QueryShape soft prior: when the caller supplies a QueryShape (typically from
79
+ // `@mailwoman/query-shape`'s `computeQueryShape`), nudge per-token emissions toward the
80
+ // labels implied by known-format hits. Bounded magnitude — confident encoder predictions
81
+ // still win.
82
+ const emissions = opts?.queryShape
83
+ ? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
84
+ biasScale: opts.queryShapeBiasScale ?? 1.0,
85
+ }))
86
+ : logits;
87
+ const labelIndices = this.decodeMode === "viterbi"
88
+ ? viterbi({
89
+ emissions,
90
+ transitions: this.transitions,
91
+ startTransitions: this.startTransitions,
92
+ endTransitions: this.endTransitions,
93
+ }).path
94
+ : emissions.map((row) => argmaxSoftmax(row).idx);
45
95
  const tokens = pieces.map((p, i) => {
46
- const row = logits[i];
47
- const { idx, conf } = argmaxSoftmax(row);
96
+ const idx = labelIndices[i];
97
+ // Confidence reports the encoder's *raw* probability (no prior baked in) so callers see
98
+ // the model's own conviction, not the prior-augmented score.
99
+ const probs = softmax(logits[i]);
48
100
  return {
49
101
  piece: p.piece,
50
102
  start: p.start,
51
103
  end: p.end,
52
104
  label: (this.labels[idx] ?? "O"),
53
- confidence: conf,
105
+ confidence: probs[idx],
54
106
  };
55
107
  });
56
108
  return buildAddressTree(text, tokens);
57
109
  }
58
- async parseJson(text) {
59
- return decodeAsJson(await this.parse(text));
110
+ /**
111
+ * Like `parse`, but also returns the raw per-token logits and piece offsets needed for per-span
112
+ * logit aggregation (Option C joint-reconcile integration).
113
+ */
114
+ async parseWithLogits(text, opts) {
115
+ if (text.length === 0) {
116
+ return { tree: { raw: text, roots: [] }, logits: [], pieces: [] };
117
+ }
118
+ const { pieces, ids } = this.cfg.tokenizer.encode(text);
119
+ const { logits } = await this.cfg.runner.infer(ids);
120
+ const emissions = opts?.queryShape
121
+ ? addEmissionMatrix(logits, buildEmissionPriors(opts.queryShape, pieces, this.labels, {
122
+ biasScale: opts.queryShapeBiasScale ?? 1.0,
123
+ }))
124
+ : logits;
125
+ const labelIndices = this.decodeMode === "viterbi"
126
+ ? viterbi({
127
+ emissions,
128
+ transitions: this.transitions,
129
+ startTransitions: this.startTransitions,
130
+ endTransitions: this.endTransitions,
131
+ }).path
132
+ : emissions.map((row) => argmaxSoftmax(row).idx);
133
+ const tokens = pieces.map((p, i) => {
134
+ const idx = labelIndices[i];
135
+ const probs = softmax(logits[i]);
136
+ return {
137
+ piece: p.piece,
138
+ start: p.start,
139
+ end: p.end,
140
+ label: (this.labels[idx] ?? "O"),
141
+ confidence: probs[idx],
142
+ };
143
+ });
144
+ return {
145
+ tree: buildAddressTree(text, tokens),
146
+ logits,
147
+ pieces: pieces.map((p) => ({ start: p.start, end: p.end })),
148
+ };
149
+ }
150
+ async parseJson(text, opts) {
151
+ return decodeAsJson(await this.parse(text, opts));
60
152
  }
61
- async parseTuples(text) {
62
- return decodeAsTuples(await this.parse(text));
153
+ async parseTuples(text, opts) {
154
+ return decodeAsTuples(await this.parse(text, opts));
63
155
  }
64
156
  async parseXml(text, opts) {
65
- return decodeAsXml(await this.parse(text), opts);
157
+ return decodeAsXml(await this.parse(text, opts), opts?.xml);
66
158
  }
67
159
  }
68
160
  function argmaxSoftmax(row) {
@@ -80,4 +172,16 @@ function argmaxSoftmax(row) {
80
172
  const conf = 1 / sumExp;
81
173
  return { idx: maxIdx, conf };
82
174
  }
175
+ /** Element-wise add two square matrices. Used to compose the structural mask + learned transitions. */
176
+ function addMatrices(a, b) {
177
+ const n = a.length;
178
+ const out = [];
179
+ for (let i = 0; i < n; i++) {
180
+ const row = new Array(n);
181
+ for (let j = 0; j < n; j++)
182
+ row[j] = a[i][j] + b[i][j];
183
+ out.push(row);
184
+ }
185
+ return out;
186
+ }
83
187
  //# sourceMappingURL=classifier.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GACX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAA2B,cAAc,EAAE,MAAM,cAAc,CAAA;AAStE,MAAM,OAAO,uBAAuB;IAGN;IAFZ,MAAM,CAAmB;IAE1C,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;IAC9C,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,OAA2B,EAAE;QACzD,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;QACzD,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,aAAa,CAAC;YAC9C,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC;SAC5B,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAA;IAC1D,CAAC;IAED,wDAAwD;IACxD,KAAK,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACtB,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;YACxC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,IAAI;aAChB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACtC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY;QAC3B,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY;QAC7B,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAA;IAC9C,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAAwC;QACpE,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,CAAA;IACjD,CAAC;CACD;AAED,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC"}
1
+ {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GAIX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAE/C,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAuB,MAAM,wBAAwB,CAAA;AACpG,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,sBAAsB,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAA;AA0C3G,MAAM,OAAO,uBAAuB;IAON;IANZ,MAAM,CAAmB;IACzB,UAAU,CAAsB;IAChC,WAAW,CAAY;IACvB,gBAAgB,CAAU;IAC1B,cAAc,CAAU;IAEzC,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;QAC7C,IAAI,CAAC,UAAU,GAAG,GAAG,CAAC,MAAM,IAAI,SAAS,CAAA;QACzC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtD,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;YACrB,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC,UAAU,EAAE,GAAG,CAAC,WAAW,CAAC,CAAA;QAC5D,CAAC;aAAM,CAAC;YACP,IAAI,CAAC,WAAW,GAAG,UAAU,CAAA;QAC9B,CAAC;QACD,IAAI,CAAC,gBAAgB,GAAG,GAAG,CAAC,gBAAgB,IAAI,iBAAiB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC9E,IAAI,CAAC,cAAc,GAAG,GAAG,CAAC,cAAc,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACzE,CAAC;IAED;;;;;;;;;;;OAWG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,OAA2B,EAAE;QACzD,yFAAyF;QACzF,2FAA2F;QAC3F,uFAAuF;QACvF,0FAA0F;QAC1F,2BAA2B;QAC3B,MAAM,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE,cAAc,EAAE,uBAAuB,EAAE,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YACvF,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC;YACpD,MAAM,CAAC,yBAAyB,CAAC,cAAc,CAAC;SAChD,CAAC,CAAA;QACF,MAAM,QAAQ,GAAoB,cAAc,CAAC,IAAI,CAAC,CAAA;QACtD,yFAAyF;QACzF,yFAAyF;QACzF,yFAAyF;QACzF,wFAAwF;QACxF,MAAM,MAAM,GAAG,uBAAuB,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAA;QAC9D,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,QAAQ,CAAC,aAAa,CAAC;YACvD,UAAU,CAAC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC;SACrC,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAA;IAClE,CAAC;IAED,6DAA6D;IAC7D,KAAK,CAAC,KAAK,CAAC,IAAY,EAAE,IAAgB;QACzC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,+EAA+E;QAC/E,wFAAwF;QACxF,yFAAyF;QACzF,aAAa;QACb,MAAM,SAAS,GAAG,IAAI,EAAE,UAAU;YACjC,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;aAC1C,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,wFAAwF;YACxF,6DAA6D;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACtC,CAAC;IAED;;;OAGG;IACH,KAAK,CAAC,eAAe,CAAC,IAAY,EAAE,IAAgB;QACnD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,IAAI,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,EAAE,EAAE,CAAA;QAClE,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,MAAM,SAAS,GAAG,IAAI,EAAE,UAAU;YACjC,CAAC,CAAC,iBAAiB,CACjB,MAAM,EACN,mBAAmB,CAAC,IAAI,CAAC,UAAU,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE;gBACzD,SAAS,EAAE,IAAI,CAAC,mBAAmB,IAAI,GAAG;aAC1C,CAAC,CACF;YACF,CAAC,CAAC,MAAM,CAAA;QAET,MAAM,YAAY,GACjB,IAAI,CAAC,UAAU,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC;gBACR,SAAS;gBACT,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,gBAAgB,EAAE,IAAI,CAAC,gBAAgB;gBACvC,cAAc,EAAE,IAAI,CAAC,cAAc;aACnC,CAAC,CAAC,IAAI;YACR,CAAC,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAA;QAElD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,YAAY,CAAC,CAAC,CAAE,CAAA;YAC5B,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,CAAA;YACjC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,KAAK,CAAC,GAAG,CAAE;aACvB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO;YACN,IAAI,EAAE,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC;YACpC,MAAM;YACN,MAAM,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;SAC3D,CAAA;IACF,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY,EAAE,IAAgB;QAC7C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IAClD,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY,EAAE,IAAgB;QAC/C,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAA;IACpD,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAA8D;QAC1F,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,EAAE,IAAI,EAAE,GAAG,CAAC,CAAA;IAC5D,CAAC;CACD;AA2BD,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC;AAED,uGAAuG;AACvG,SAAS,WAAW,CAAC,CAAa,EAAE,CAAa;IAChD,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAA;IAClB,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;QAC1D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
package/out/index.d.ts CHANGED
@@ -7,6 +7,10 @@ export * from "./classifier.js";
7
7
  export * from "./labels.js";
8
8
  export * from "./onnx-runner.js";
9
9
  export * from "./proposal-classifier.js";
10
+ export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
11
+ export type { BuildPriorsOpts, KnownFormatHitLike, QueryShapeLike, TokenLike } from "./query-shape-prior.js";
10
12
  export * from "./tokenizer.js";
13
+ export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
14
+ export type { ViterbiInput, ViterbiResult } from "./viterbi.js";
11
15
  export * from "./weights.js";
12
16
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAC/E,YAAY,EAAE,eAAe,EAAE,kBAAkB,EAAE,cAAc,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAC5G,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AACrB,YAAY,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,cAAc,CAAA;AAC/D,cAAc,cAAc,CAAA"}
package/out/index.js CHANGED
@@ -7,6 +7,8 @@ export * from "./classifier.js";
7
7
  export * from "./labels.js";
8
8
  export * from "./onnx-runner.js";
9
9
  export * from "./proposal-classifier.js";
10
+ export { addEmissionMatrix, buildEmissionPriors } from "./query-shape-prior.js";
10
11
  export * from "./tokenizer.js";
12
+ export { buildBioEndMask, buildBioStartMask, buildBioTransitionMask, perTokenArgmax, softmax, viterbi, } from "./viterbi.js";
11
13
  export * from "./weights.js";
12
14
  //# sourceMappingURL=index.js.map
package/out/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,OAAO,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AAE/E,cAAc,gBAAgB,CAAA;AAC9B,OAAO,EACN,eAAe,EACf,iBAAiB,EACjB,sBAAsB,EACtB,cAAc,EACd,OAAO,EACP,OAAO,GACP,MAAM,cAAc,CAAA;AAErB,cAAc,cAAc,CAAA"}
package/out/labels.d.ts CHANGED
@@ -3,18 +3,39 @@
3
3
  * @license AGPL-3.0
4
4
  * @author Teffen Ellis, et al.
5
5
  *
6
- * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py::STAGE1_BIO_LABELS`.
6
+ * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py`.
7
7
  *
8
- * The v0.1.0 / v0.2.0 weight packages were trained with this exact label order. Any drift here
9
- * silently corrupts downstream BIO decoding index 5 must mean `B-locality` on both sides.
8
+ * Index label parity is load-bearing: the model emits logits in one canonical order on both sides
9
+ * and any drift here silently corrupts BIO decoding. STAGE2 strictly extends STAGE1 the first
10
+ * 15 indices are identical, so reading a v0.2.0 (Stage 1) model with the Stage 2 label vocabulary
11
+ * stays correct; the extra entries are unused.
10
12
  *
11
- * Stage 2+ models will support more labels (street, house_number, venue, …). The plan is to plumb
12
- * the label set through `model-card.json` at load time rather than hard-coding it here. Until
13
- * then this file is the source of truth on the TS side.
13
+ * Runtime loading: as of v0.4.0 the trained label vocabulary is carried in `model-card.json`'s
14
+ * `labels` field and read by `loadFromWeights` (see `weights.readLabelsFromModelCard`). These
15
+ * constants remain the compile-time fallback for legacy bundles whose cards predate the field
16
+ * safe because such bundles are by construction Stage 1 or Stage 2, and Stage 2 prefix-extends
17
+ * Stage 1. A future Stage 3 ship will not be safe under the fallback; the loader treats a missing
18
+ * `labels` field as "you are loading a pre-v0.4.0 bundle" rather than "unknown stage".
14
19
  */
15
20
  import type { BioLabel } from "@mailwoman/core/decoder";
16
21
  /** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
17
22
  export declare const STAGE1_COARSE_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex"];
18
23
  /** BIO label vocabulary for Stage 1 — O + (B-/I- per coarse tag). 1 + 14 = 15 labels. */
19
24
  export declare const STAGE1_BIO_LABELS: readonly BioLabel[];
25
+ /**
26
+ * Fine-grained tags added in Phase 2 Stage 2 (v0.3.0). venue covers organization/POI/landmark
27
+ * names; street + house_number break out the street-address components that Stage 1 collapsed to
28
+ * `O`.
29
+ */
30
+ export declare const STAGE2_FINE_TAGS: readonly ["venue", "street", "house_number"];
31
+ /** Stage 2 ships the full coarse + fine set in the order STAGE2_BIO_LABELS is interleaved. */
32
+ export declare const STAGE2_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex", "venue", "street", "house_number"];
33
+ /**
34
+ * BIO label vocabulary for Stage 2 (v0.3.0) — O + (B-/I- per Stage 2 tag). 1 + 20 = 21 labels.
35
+ *
36
+ * Index parity vs Stage 1: STAGE2_BIO_LABELS[i] === STAGE1_BIO_LABELS[i] for i ∈ [0, 15). Anyone
37
+ * loading a Stage 1 model with this vocabulary still decodes correctly; the tail (15..20) just
38
+ * never gets argmax'd because Stage 1 only emits 15 logits.
39
+ */
40
+ export declare const STAGE2_BIO_LABELS: readonly BioLabel[];
20
41
  //# sourceMappingURL=labels.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAA;AAEvD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,oGAQrB,CAAA;AAEV,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA"}
1
+ {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAA;AAEvD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,oGAQrB,CAAA;AAEV,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA;AAEF;;;;GAIG;AACH,eAAO,MAAM,gBAAgB,8CAA+C,CAAA;AAE5E,8FAA8F;AAC9F,eAAO,MAAM,WAAW,uIAAwD,CAAA;AAEhF;;;;;;GAMG;AACH,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA"}
package/out/labels.js CHANGED
@@ -3,14 +3,19 @@
3
3
  * @license AGPL-3.0
4
4
  * @author Teffen Ellis, et al.
5
5
  *
6
- * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py::STAGE1_BIO_LABELS`.
6
+ * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py`.
7
7
  *
8
- * The v0.1.0 / v0.2.0 weight packages were trained with this exact label order. Any drift here
9
- * silently corrupts downstream BIO decoding index 5 must mean `B-locality` on both sides.
8
+ * Index label parity is load-bearing: the model emits logits in one canonical order on both sides
9
+ * and any drift here silently corrupts BIO decoding. STAGE2 strictly extends STAGE1 the first
10
+ * 15 indices are identical, so reading a v0.2.0 (Stage 1) model with the Stage 2 label vocabulary
11
+ * stays correct; the extra entries are unused.
10
12
  *
11
- * Stage 2+ models will support more labels (street, house_number, venue, …). The plan is to plumb
12
- * the label set through `model-card.json` at load time rather than hard-coding it here. Until
13
- * then this file is the source of truth on the TS side.
13
+ * Runtime loading: as of v0.4.0 the trained label vocabulary is carried in `model-card.json`'s
14
+ * `labels` field and read by `loadFromWeights` (see `weights.readLabelsFromModelCard`). These
15
+ * constants remain the compile-time fallback for legacy bundles whose cards predate the field
16
+ * safe because such bundles are by construction Stage 1 or Stage 2, and Stage 2 prefix-extends
17
+ * Stage 1. A future Stage 3 ship will not be safe under the fallback; the loader treats a missing
18
+ * `labels` field as "you are loading a pre-v0.4.0 bundle" rather than "unknown stage".
14
19
  */
15
20
  /** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
16
21
  export const STAGE1_COARSE_TAGS = [
@@ -27,4 +32,23 @@ export const STAGE1_BIO_LABELS = Object.freeze([
27
32
  "O",
28
33
  ...STAGE1_COARSE_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
29
34
  ]);
35
+ /**
36
+ * Fine-grained tags added in Phase 2 Stage 2 (v0.3.0). venue covers organization/POI/landmark
37
+ * names; street + house_number break out the street-address components that Stage 1 collapsed to
38
+ * `O`.
39
+ */
40
+ export const STAGE2_FINE_TAGS = ["venue", "street", "house_number"];
41
+ /** Stage 2 ships the full coarse + fine set in the order STAGE2_BIO_LABELS is interleaved. */
42
+ export const STAGE2_TAGS = [...STAGE1_COARSE_TAGS, ...STAGE2_FINE_TAGS];
43
+ /**
44
+ * BIO label vocabulary for Stage 2 (v0.3.0) — O + (B-/I- per Stage 2 tag). 1 + 20 = 21 labels.
45
+ *
46
+ * Index parity vs Stage 1: STAGE2_BIO_LABELS[i] === STAGE1_BIO_LABELS[i] for i ∈ [0, 15). Anyone
47
+ * loading a Stage 1 model with this vocabulary still decodes correctly; the tail (15..20) just
48
+ * never gets argmax'd because Stage 1 only emits 15 logits.
49
+ */
50
+ export const STAGE2_BIO_LABELS = Object.freeze([
51
+ "O",
52
+ ...STAGE2_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
53
+ ]);
30
54
  //# sourceMappingURL=labels.js.map
package/out/labels.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"labels.js","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAIH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG;IACjC,SAAS;IACT,QAAQ;IACR,UAAU;IACV,oBAAoB;IACpB,UAAU;IACV,WAAW;IACX,OAAO;CACE,CAAA;AAEV,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACxF,CAAC,CAAA"}
1
+ {"version":3,"file":"labels.js","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG;IACjC,SAAS;IACT,QAAQ;IACR,UAAU;IACV,oBAAoB;IACpB,UAAU;IACV,WAAW;IACX,OAAO;CACE,CAAA;AAEV,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACxF,CAAC,CAAA;AAEF;;;;GAIG;AACH,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,OAAO,EAAE,QAAQ,EAAE,cAAc,CAAU,CAAA;AAE5E,8FAA8F;AAC9F,MAAM,CAAC,MAAM,WAAW,GAAG,CAAC,GAAG,kBAAkB,EAAE,GAAG,gBAAgB,CAAU,CAAA;AAEhF;;;;;;GAMG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACjF,CAAC,CAAA"}
@@ -24,7 +24,11 @@ export interface NeuralProposalClassifierConfig {
24
24
  id: string;
25
25
  /** The underlying neural classifier instance. */
26
26
  classifier: NeuralAddressClassifier;
27
- /** Component tags this classifier may emit. Defaults to Stage 1 coarse tags. */
27
+ /**
28
+ * Component tags this classifier may emit. Defaults to the Stage 2 tag set (coarse +
29
+ * venue/street/house_number). v0.2.0 Stage 1 models never decode to the fine tags anyway, so the
30
+ * broader default is forwards-compat without back-compat risk.
31
+ */
28
32
  emits?: readonly ComponentTag[];
29
33
  /** Locales this classifier is active for. `["*"]` (locale-agnostic) by default. */
30
34
  locales?: readonly (string | "*")[];
@@ -1 +1 @@
1
- {"version":3,"file":"proposal-classifier.d.ts","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,KAAK,EAGX,YAAY,EACZ,kBAAkB,EAElB,MAAM,uBAAuB,CAAA;AAC9B,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAA;AAG9D,MAAM,WAAW,8BAA8B;IAC9C,wFAAwF;IACxF,EAAE,EAAE,MAAM,CAAA;IACV,iDAAiD;IACjD,UAAU,EAAE,uBAAuB,CAAA;IACnC,gFAAgF;IAChF,KAAK,CAAC,EAAE,SAAS,YAAY,EAAE,CAAA;IAC/B,mFAAmF;IACnF,OAAO,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAA;IACnC,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,0EAA0E;AAC1E,wBAAgB,8BAA8B,CAAC,GAAG,EAAE,8BAA8B,GAAG,kBAAkB,CA6CtG"}
1
+ {"version":3,"file":"proposal-classifier.d.ts","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,KAAK,EAGX,YAAY,EACZ,kBAAkB,EAElB,MAAM,uBAAuB,CAAA;AAC9B,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAA;AAG9D,MAAM,WAAW,8BAA8B;IAC9C,wFAAwF;IACxF,EAAE,EAAE,MAAM,CAAA;IACV,iDAAiD;IACjD,UAAU,EAAE,uBAAuB,CAAA;IACnC;;;;OAIG;IACH,KAAK,CAAC,EAAE,SAAS,YAAY,EAAE,CAAA;IAC/B,mFAAmF;IACnF,OAAO,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAA;IACnC,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,0EAA0E;AAC1E,wBAAgB,8BAA8B,CAAC,GAAG,EAAE,8BAA8B,GAAG,kBAAkB,CA6CtG"}
@@ -17,10 +17,10 @@
17
17
  * inference is a future optimization once the policy layer has a way to invoke a classifier "once
18
18
  * per parse" instead of per section.
19
19
  */
20
- import { STAGE1_COARSE_TAGS } from "./labels.js";
20
+ import { STAGE2_TAGS } from "./labels.js";
21
21
  /** Build a `ProposalClassifier` backed by a `NeuralAddressClassifier`. */
22
22
  export function createNeuralProposalClassifier(cfg) {
23
- const emits = cfg.emits ?? STAGE1_COARSE_TAGS;
23
+ const emits = cfg.emits ?? STAGE2_TAGS;
24
24
  const emitsSet = new Set(emits);
25
25
  const penalty = cfg.penalty ?? 0;
26
26
  async function classify(section, _ctx) {
@@ -1 +1 @@
1
- {"version":3,"file":"proposal-classifier.js","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAYH,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAA;AAehD,0EAA0E;AAC1E,MAAM,UAAU,8BAA8B,CAAC,GAAmC;IACjF,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,IAAI,kBAAkB,CAAA;IAC7C,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAe,KAAgC,CAAC,CAAA;IACxE,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,CAAC,CAAA;IAEhC,KAAK,UAAU,QAAQ,CAAC,OAAgB,EAAE,IAAuB;QAChE,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACrD,MAAM,SAAS,GAA6B,EAAE,CAAA;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAA;QAEnC,MAAM,KAAK,GAAG,CAAC,IAAiB,EAAQ,EAAE;YACzC,IAAI,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,wFAAwF;gBACxF,oFAAoF;gBACpF,uFAAuF;gBACvF,iFAAiF;gBACjF,mFAAmF;gBACnF,2EAA2E;gBAC3E,MAAM,IAAI,GAAG;oBACZ,KAAK,EAAE,aAAa,GAAG,IAAI,CAAC,KAAK;oBACjC,GAAG,EAAE,aAAa,GAAG,IAAI,CAAC,GAAG;oBAC7B,IAAI,EAAE,IAAI,CAAC,KAAK;iBACG,CAAA;gBACpB,SAAS,CAAC,IAAI,CAAC;oBACd,IAAI;oBACJ,SAAS,EAAE,IAAI,CAAC,GAAG;oBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,MAAM,EAAE,QAAQ;oBAChB,SAAS,EAAE,GAAG,CAAC,EAAE;oBACjB,OAAO;iBACP,CAAC,CAAA;YACH,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;gBAAE,KAAK,CAAC,KAAK,CAAC,CAAA;QAChD,CAAC,CAAA;QAED,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QAC1C,OAAO,SAAS,CAAA;IACjB,CAAC;IAED,OAAO;QACN,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,KAAK;QACL,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC;QAC7B,QAAQ;KACR,CAAA;AACF,CAAC"}
1
+ {"version":3,"file":"proposal-classifier.js","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAYH,OAAO,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAmBzC,0EAA0E;AAC1E,MAAM,UAAU,8BAA8B,CAAC,GAAmC;IACjF,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,IAAI,WAAW,CAAA;IACtC,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAe,KAAgC,CAAC,CAAA;IACxE,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,CAAC,CAAA;IAEhC,KAAK,UAAU,QAAQ,CAAC,OAAgB,EAAE,IAAuB;QAChE,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACrD,MAAM,SAAS,GAA6B,EAAE,CAAA;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAA;QAEnC,MAAM,KAAK,GAAG,CAAC,IAAiB,EAAQ,EAAE;YACzC,IAAI,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,wFAAwF;gBACxF,oFAAoF;gBACpF,uFAAuF;gBACvF,iFAAiF;gBACjF,mFAAmF;gBACnF,2EAA2E;gBAC3E,MAAM,IAAI,GAAG;oBACZ,KAAK,EAAE,aAAa,GAAG,IAAI,CAAC,KAAK;oBACjC,GAAG,EAAE,aAAa,GAAG,IAAI,CAAC,GAAG;oBAC7B,IAAI,EAAE,IAAI,CAAC,KAAK;iBACG,CAAA;gBACpB,SAAS,CAAC,IAAI,CAAC;oBACd,IAAI;oBACJ,SAAS,EAAE,IAAI,CAAC,GAAG;oBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,MAAM,EAAE,QAAQ;oBAChB,SAAS,EAAE,GAAG,CAAC,EAAE;oBACjB,OAAO;iBACP,CAAC,CAAA;YACH,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;gBAAE,KAAK,CAAC,KAAK,CAAC,CAAA;QAChD,CAAC,CAAA;QAED,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QAC1C,OAAO,SAAS,CAAA;IACjB,CAAC;IAED,OAAO;QACN,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,KAAK;QACL,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC;QAC7B,QAAQ;KACR,CAAA;AACF,CAAC"}
@@ -0,0 +1,62 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Soft-prior emission biases derived from `QueryShape`.
7
+ *
8
+ * When the QueryShape sub-system has identified a known-format span (US ZIP, UK postcode, PO box,
9
+ * etc.), this module produces an additive bias matrix that nudges the encoder's per-token
10
+ * emissions toward the matching BIO label. The biases compose with the structural BIO mask in the
11
+ * Viterbi decoder — confident encoder predictions still win, but uncertain ones get pulled toward
12
+ * the format-implied label.
13
+ *
14
+ * Bitter-lesson-safe boundary: we don't override the encoder, just bias it. The encoder remains the
15
+ * authority on context-dependent calls (the "Buffalo Wild Wings, Buffalo, NY" disambiguation);
16
+ * the QueryShape prior helps on the easy cases (a 5-digit token is _probably_ a postcode).
17
+ *
18
+ * Uses structural typing for the QueryShape input so this module has zero dependencies on
19
+ * `@mailwoman/query-shape` — consumers compute the shape with that package, pass it in here.
20
+ */
21
+ /**
22
+ * Minimal subset of `QueryShape` this module consumes. Compatible with `@mailwoman/query-shape`'s
23
+ * exported `QueryShape` type by shape — no import required.
24
+ */
25
+ export interface QueryShapeLike {
26
+ knownFormats: ReadonlyArray<KnownFormatHitLike>;
27
+ }
28
+ export interface KnownFormatHitLike {
29
+ format: string;
30
+ span: {
31
+ start: number;
32
+ end: number;
33
+ };
34
+ /** 0..1; ambiguous patterns (e.g. 5-digit US/FR/DE overlap) score lower. */
35
+ confidence: number;
36
+ }
37
+ /** Minimal subset of `TokenizedPiece` this module consumes. */
38
+ export interface TokenLike {
39
+ start: number;
40
+ end: number;
41
+ }
42
+ export interface BuildPriorsOpts {
43
+ /**
44
+ * Maximum bias magnitude (in log-odds units). Default 1.0 — adds up to ~e^1 ≈ 2.7× odds to the
45
+ * favored label. Confidence-scaled, so a 0.6-confidence format hit gets +0.6 max bias.
46
+ */
47
+ biasScale?: number;
48
+ }
49
+ /**
50
+ * Build a `[seqLen][numLabels]` matrix of additive log-bias to be added to encoder emissions before
51
+ * Viterbi decoding.
52
+ *
53
+ * For each (token, format-hit) pair where the token's character span overlaps the hit's span, the
54
+ * matrix entry for the format's mapped label receives `hit.confidence × biasScale`. Tokens that
55
+ * don't overlap any hit, or for which no label mapping exists, get 0.
56
+ *
57
+ * Returns the all-zeros matrix if `shape.knownFormats` is empty — composes harmlessly.
58
+ */
59
+ export declare function buildEmissionPriors(shape: QueryShapeLike, tokens: ReadonlyArray<TokenLike>, labels: ReadonlyArray<string>, opts?: BuildPriorsOpts): number[][];
60
+ /** Element-wise add two matrices of equal shape. Returns a new matrix. */
61
+ export declare function addEmissionMatrix(emissions: number[][], priors: number[][]): number[][];
62
+ //# sourceMappingURL=query-shape-prior.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-shape-prior.d.ts","sourceRoot":"","sources":["../query-shape-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC9B,YAAY,EAAE,aAAa,CAAC,kBAAkB,CAAC,CAAA;CAC/C;AAED,MAAM,WAAW,kBAAkB;IAClC,MAAM,EAAE,MAAM,CAAA;IACd,IAAI,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,MAAM,CAAA;KAAE,CAAA;IACpC,4EAA4E;IAC5E,UAAU,EAAE,MAAM,CAAA;CAClB;AAED,+DAA+D;AAC/D,MAAM,WAAW,SAAS;IACzB,KAAK,EAAE,MAAM,CAAA;IACb,GAAG,EAAE,MAAM,CAAA;CACX;AAiBD,MAAM,WAAW,eAAe;IAC/B;;;OAGG;IACH,SAAS,CAAC,EAAE,MAAM,CAAA;CAClB;AAED;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAClC,KAAK,EAAE,cAAc,EACrB,MAAM,EAAE,aAAa,CAAC,SAAS,CAAC,EAChC,MAAM,EAAE,aAAa,CAAC,MAAM,CAAC,EAC7B,IAAI,GAAE,eAAoB,GACxB,MAAM,EAAE,EAAE,CA4BZ;AAMD,0EAA0E;AAC1E,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,EAAE,CAWvF"}
@@ -0,0 +1,93 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Soft-prior emission biases derived from `QueryShape`.
7
+ *
8
+ * When the QueryShape sub-system has identified a known-format span (US ZIP, UK postcode, PO box,
9
+ * etc.), this module produces an additive bias matrix that nudges the encoder's per-token
10
+ * emissions toward the matching BIO label. The biases compose with the structural BIO mask in the
11
+ * Viterbi decoder — confident encoder predictions still win, but uncertain ones get pulled toward
12
+ * the format-implied label.
13
+ *
14
+ * Bitter-lesson-safe boundary: we don't override the encoder, just bias it. The encoder remains the
15
+ * authority on context-dependent calls (the "Buffalo Wild Wings, Buffalo, NY" disambiguation);
16
+ * the QueryShape prior helps on the easy cases (a 5-digit token is _probably_ a postcode).
17
+ *
18
+ * Uses structural typing for the QueryShape input so this module has zero dependencies on
19
+ * `@mailwoman/query-shape` — consumers compute the shape with that package, pass it in here.
20
+ */
21
+ /**
22
+ * Mapping from `KnownFormat` strings to the BIO label that should be boosted. Multiple formats may
23
+ * map to the same label (all postcode flavors → `B-postcode`).
24
+ */
25
+ const FORMAT_TO_LABEL = new Map([
26
+ ["us_zip", "B-postcode"],
27
+ ["us_zip4", "B-postcode"],
28
+ ["fr_postcode", "B-postcode"],
29
+ ["de_postcode", "B-postcode"],
30
+ ["uk_postcode", "B-postcode"],
31
+ ["ca_postcode", "B-postcode"],
32
+ ["jp_postcode", "B-postcode"],
33
+ ["po_box", "B-po_box"],
34
+ ]);
35
+ /**
36
+ * Build a `[seqLen][numLabels]` matrix of additive log-bias to be added to encoder emissions before
37
+ * Viterbi decoding.
38
+ *
39
+ * For each (token, format-hit) pair where the token's character span overlaps the hit's span, the
40
+ * matrix entry for the format's mapped label receives `hit.confidence × biasScale`. Tokens that
41
+ * don't overlap any hit, or for which no label mapping exists, get 0.
42
+ *
43
+ * Returns the all-zeros matrix if `shape.knownFormats` is empty — composes harmlessly.
44
+ */
45
+ export function buildEmissionPriors(shape, tokens, labels, opts = {}) {
46
+ const T = tokens.length;
47
+ const L = labels.length;
48
+ const biasScale = opts.biasScale ?? 1.0;
49
+ const matrix = [];
50
+ for (let t = 0; t < T; t++)
51
+ matrix.push(new Array(L).fill(0));
52
+ if (shape.knownFormats.length === 0)
53
+ return matrix;
54
+ // Index label → column for fast lookup.
55
+ const labelToCol = new Map();
56
+ for (let k = 0; k < labels.length; k++)
57
+ labelToCol.set(labels[k], k);
58
+ for (const hit of shape.knownFormats) {
59
+ const targetLabel = FORMAT_TO_LABEL.get(hit.format);
60
+ if (!targetLabel)
61
+ continue;
62
+ const col = labelToCol.get(targetLabel);
63
+ if (col === undefined)
64
+ continue;
65
+ const bias = hit.confidence * biasScale;
66
+ for (let t = 0; t < T; t++) {
67
+ const tok = tokens[t];
68
+ if (overlaps(tok, hit.span)) {
69
+ matrix[t][col] = Math.max(matrix[t][col], bias);
70
+ }
71
+ }
72
+ }
73
+ return matrix;
74
+ }
75
+ function overlaps(a, b) {
76
+ return a.start < b.end && b.start < a.end;
77
+ }
78
+ /** Element-wise add two matrices of equal shape. Returns a new matrix. */
79
+ export function addEmissionMatrix(emissions, priors) {
80
+ if (priors.length === 0)
81
+ return emissions.map((row) => row.slice());
82
+ const out = [];
83
+ for (let t = 0; t < emissions.length; t++) {
84
+ const e = emissions[t];
85
+ const p = priors[t] ?? new Array(e.length).fill(0);
86
+ const row = new Array(e.length);
87
+ for (let k = 0; k < e.length; k++)
88
+ row[k] = e[k] + (p[k] ?? 0);
89
+ out.push(row);
90
+ }
91
+ return out;
92
+ }
93
+ //# sourceMappingURL=query-shape-prior.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"query-shape-prior.js","sourceRoot":"","sources":["../query-shape-prior.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAuBH;;;GAGG;AACH,MAAM,eAAe,GAAgC,IAAI,GAAG,CAAC;IAC5D,CAAC,QAAQ,EAAE,YAAY,CAAC;IACxB,CAAC,SAAS,EAAE,YAAY,CAAC;IACzB,CAAC,aAAa,EAAE,YAAY,CAAC;IAC7B,CAAC,aAAa,EAAE,YAAY,CAAC;IAC7B,CAAC,aAAa,EAAE,YAAY,CAAC;IAC7B,CAAC,aAAa,EAAE,YAAY,CAAC;IAC7B,CAAC,aAAa,EAAE,YAAY,CAAC;IAC7B,CAAC,QAAQ,EAAE,UAAU,CAAC;CACtB,CAAC,CAAA;AAUF;;;;;;;;;GASG;AACH,MAAM,UAAU,mBAAmB,CAClC,KAAqB,EACrB,MAAgC,EAChC,MAA6B,EAC7B,OAAwB,EAAE;IAE1B,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,GAAG,CAAA;IACvC,MAAM,MAAM,GAAe,EAAE,CAAA;IAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE;QAAE,MAAM,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAA;IAErE,IAAI,KAAK,CAAC,YAAY,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,MAAM,CAAA;IAElD,wCAAwC;IACxC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAA;IAC5C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAA;IAErE,KAAK,MAAM,GAAG,IAAI,KAAK,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAA;QACnD,IAAI,CAAC,WAAW;YAAE,SAAQ;QAC1B,MAAM,GAAG,GAAG,UAAU,CAAC,GAAG,CAAC,WAAW,CAAC,CAAA;QACvC,IAAI,GAAG,KAAK,SAAS;YAAE,SAAQ;QAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,GAAG,SAAS,CAAA;QACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACtB,IAAI,QAAQ,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAE,CAAC,GAAG,CAAE,EAAE,IAAI,CAAC,CAAA;YACnD,CAAC;QACF,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAA;AACd,CAAC;AAED,SAAS,QAAQ,CAAC,CAAiC,EAAE,CAAiC;IACrF,OAAO,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,GAAG,CAAA;AAC1C,CAAC;AAED,0EAA0E;AAC1E,MAAM,UAAU,iBAAiB,CAAC,SAAqB,EAAE,MAAkB;IAC1E,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,CAAA;IACnE,MAAM,GAAG,GAAe,EAAE,CAAA;IAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,CAAC,GAAG,SAAS,CAAC,CAAC,CAAE,CAAA;QACvB,MAAM,CAAC,GAAG,MAAM,CAAC,CAAC,CAAC,IAAI,IAAI,KAAK,CAAS,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC1D,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,MAAM,CAAC,CAAA;QACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/D,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACd,CAAC;IACD,OAAO,GAAG,CAAA;AACX,CAAC"}
@@ -51,7 +51,12 @@ export declare class MailwomanTokenizer {
51
51
  private constructor();
52
52
  /** Load from a base64-encoded `tokenizer.model`. Use for in-memory / test setups. */
53
53
  static loadFromBase64(b64: string): Promise<MailwomanTokenizer>;
54
- /** Load from a path to a `tokenizer.model` file on disk. Node-only convenience. */
54
+ /**
55
+ * Load from a path to a `tokenizer.model` file on disk. **Node-only** — the dynamic `node:fs`
56
+ * import keeps this method out of the static dependency graph so the rest of the tokenizer
57
+ * bundles cleanly for the browser. Calling it in a browser throws at runtime; use
58
+ * `loadFromBase64` (or the URL-fetching loaders in `@mailwoman/neural-web`) instead.
59
+ */
55
60
  static loadFromFile(modelPath: string): Promise<MailwomanTokenizer>;
56
61
  /**
57
62
  * Tokenize `text` to pieces + ids + realigned char offsets.
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAKH,4EAA4E;AAC5E,eAAO,MAAM,cAAc,WAAM,CAAA;AAEjC,0EAA0E;AAC1E,MAAM,WAAW,cAAc;IAC9B,wFAAwF;IACxF,KAAK,EAAE,MAAM,CAAA;IACb,mCAAmC;IACnC,EAAE,EAAE,MAAM,CAAA;IACV,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAA;IACb,uDAAuD;IACvD,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,cAAc,EAAE,CAAA;IACxB,GAAG,EAAE,MAAM,EAAE,CAAA;CACb;AAED,qBAAa,kBAAkB;IACV,OAAO,CAAC,QAAQ,CAAC,SAAS;IAA9C,OAAO;IAEP,qFAAqF;WACxE,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAMrE,mFAAmF;WACtE,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAKzE;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY;IA2BlC,oFAAoF;IACpF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,UAAU,GAAG,MAAM;CAI1C"}
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAIH,4EAA4E;AAC5E,eAAO,MAAM,cAAc,WAAM,CAAA;AAEjC,0EAA0E;AAC1E,MAAM,WAAW,cAAc;IAC9B,wFAAwF;IACxF,KAAK,EAAE,MAAM,CAAA;IACb,mCAAmC;IACnC,EAAE,EAAE,MAAM,CAAA;IACV,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAA;IACb,uDAAuD;IACvD,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,cAAc,EAAE,CAAA;IACxB,GAAG,EAAE,MAAM,EAAE,CAAA;CACb;AAED,qBAAa,kBAAkB;IACV,OAAO,CAAC,QAAQ,CAAC,SAAS;IAA9C,OAAO;IAEP,qFAAqF;WACxE,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAMrE;;;;;OAKG;WACU,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAMzE;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY;IA2BlC,oFAAoF;IACpF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,UAAU,GAAG,MAAM;CAI1C"}
package/out/tokenizer.js CHANGED
@@ -30,7 +30,6 @@
30
30
  * - `loadFromFile(path)` — convenience helper that does the read + b64 + load.
31
31
  */
32
32
  import { SentencePieceProcessor } from "@sctg/sentencepiece-js";
33
- import { promises as fs } from "node:fs";
34
33
  /** SentencePiece's word-boundary marker (U+2581 LOWER ONE EIGHTH BLOCK). */
35
34
  export const SPACE_SENTINEL = "▁";
36
35
  export class MailwomanTokenizer {
@@ -44,9 +43,15 @@ export class MailwomanTokenizer {
44
43
  await processor.loadFromB64StringModel(b64);
45
44
  return new MailwomanTokenizer(processor);
46
45
  }
47
- /** Load from a path to a `tokenizer.model` file on disk. Node-only convenience. */
46
+ /**
47
+ * Load from a path to a `tokenizer.model` file on disk. **Node-only** — the dynamic `node:fs`
48
+ * import keeps this method out of the static dependency graph so the rest of the tokenizer
49
+ * bundles cleanly for the browser. Calling it in a browser throws at runtime; use
50
+ * `loadFromBase64` (or the URL-fetching loaders in `@mailwoman/neural-web`) instead.
51
+ */
48
52
  static async loadFromFile(modelPath) {
49
- const buf = await fs.readFile(modelPath);
53
+ const { readFile } = await import(/* webpackIgnore: true */ "node:fs/promises");
54
+ const buf = await readFile(modelPath);
50
55
  return MailwomanTokenizer.loadFromBase64(buf.toString("base64"));
51
56
  }
52
57
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAA;AAC/D,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AAExC,4EAA4E;AAC5E,MAAM,CAAC,MAAM,cAAc,GAAG,GAAG,CAAA;AAmBjC,MAAM,OAAO,kBAAkB;IACO;IAArC,YAAqC,SAAiC;QAAjC,cAAS,GAAT,SAAS,CAAwB;IAAG,CAAC;IAE1E,qFAAqF;IACrF,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,GAAW;QACtC,MAAM,SAAS,GAAG,IAAI,sBAAsB,EAAE,CAAA;QAC9C,MAAM,SAAS,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAA;QAC3C,OAAO,IAAI,kBAAkB,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED,mFAAmF;IACnF,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAiB;QAC1C,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAA;QACxC,OAAO,kBAAkB,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACjE,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAY;QAClB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QAChD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAE1C,MAAM,SAAS,GAAqB,EAAE,CAAA;QACtC,IAAI,MAAM,GAAG,CAAC,CAAA;QAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACxB,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;YACvB,MAAM,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;YACpD,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;YAExE,IAAI,WAAW,EAAE,CAAC;gBACjB,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAE,CAAC;oBAAE,MAAM,EAAE,CAAA;YAClE,CAAC;YAED,MAAM,KAAK,GAAG,MAAM,CAAA;YACpB,MAAM,IAAI,OAAO,CAAC,MAAM,CAAA;YACxB,MAAM,GAAG,GAAG,MAAM,CAAA;YAElB,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAA;QAC1C,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,CAAA;IAClC,CAAC;IAED,oFAAoF;IACpF,MAAM,CAAC,GAA0B;QAChC,MAAM,GAAG,GAAG,GAAG,YAAY,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAClE,OAAO,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,GAAG,CAAW,CAAA;IAC/C,CAAC;CACD"}
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAA;AAE/D,4EAA4E;AAC5E,MAAM,CAAC,MAAM,cAAc,GAAG,GAAG,CAAA;AAmBjC,MAAM,OAAO,kBAAkB;IACO;IAArC,YAAqC,SAAiC;QAAjC,cAAS,GAAT,SAAS,CAAwB;IAAG,CAAC;IAE1E,qFAAqF;IACrF,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,GAAW;QACtC,MAAM,SAAS,GAAG,IAAI,sBAAsB,EAAE,CAAA;QAC9C,MAAM,SAAS,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAA;QAC3C,OAAO,IAAI,kBAAkB,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED;;;;;OAKG;IACH,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAiB;QAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,yBAAyB,CAAC,kBAAkB,CAAC,CAAA;QAC/E,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,SAAS,CAAC,CAAA;QACrC,OAAO,kBAAkB,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACjE,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAY;QAClB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QAChD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAE1C,MAAM,SAAS,GAAqB,EAAE,CAAA;QACtC,IAAI,MAAM,GAAG,CAAC,CAAA;QAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACxB,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;YACvB,MAAM,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;YACpD,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;YAExE,IAAI,WAAW,EAAE,CAAC;gBACjB,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAE,CAAC;oBAAE,MAAM,EAAE,CAAA;YAClE,CAAC;YAED,MAAM,KAAK,GAAG,MAAM,CAAA;YACpB,MAAM,IAAI,OAAO,CAAC,MAAM,CAAA;YACxB,MAAM,GAAG,GAAG,MAAM,CAAA;YAElB,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAA;QAC1C,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,CAAA;IAClC,CAAC;IAED,oFAAoF;IACpF,MAAM,CAAC,GAA0B;QAChC,MAAM,GAAG,GAAG,GAAG,YAAY,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAClE,OAAO,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,GAAG,CAAW,CAAA;IAC/C,CAAC;CACD"}
@@ -0,0 +1,76 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Linear-chain CRF Viterbi decoder in TypeScript.
7
+ *
8
+ * Replaces per-token argmax in the classifier when transition scores are available. Mirrors the
9
+ * Python training-time / eval-time path so JS runtime decode agrees with the model card's
10
+ * metrics.
11
+ *
12
+ * Two transition matrix modes:
13
+ *
14
+ * 1. **Structural-only** (no weights changes required) — build from the BIO label vocabulary using
15
+ * `buildBioTransitionMask()`. Forbids `O → I-X`, `B-X → I-Y` (X ≠ Y), and sequence-start →
16
+ * `I-X`. Permits everything else. This alone prevents orphan-I decoding ("Saint Petersburg →
17
+ * Petersburg" bug) at runtime — a strict improvement over argmax.
18
+ * 2. **Learned** (requires a future weights release that ships `crf-transitions.json`) — load the
19
+ * trained transition matrix from the model card. Adds learned soft priors on top of the
20
+ * structural mask. Currently not exported from the training-side ONNX bundle.
21
+ */
22
+ /**
23
+ * Build the BIO structural transition mask given the label vocabulary in order.
24
+ *
25
+ * Rules:
26
+ *
27
+ * - `X → O` always permitted (0)
28
+ * - `X → B-Y` always permitted (0)
29
+ * - `X → I-Y` permitted only if `X` is `B-Y` or `I-Y` (0); otherwise -inf
30
+ *
31
+ * Returns a `numLabels × numLabels` matrix where `mask[from][to]` is the additive log-score (0 for
32
+ * permitted, NEG_INF for forbidden).
33
+ */
34
+ export declare function buildBioTransitionMask(labels: readonly string[]): number[][];
35
+ /** Returns the per-label vector of valid start-of-sequence transitions (0 or -inf). */
36
+ export declare function buildBioStartMask(labels: readonly string[]): number[];
37
+ /**
38
+ * End-of-sequence transitions. By default all labels are valid endings (returns zeros). Override if
39
+ * the trained model has learned end transitions.
40
+ */
41
+ export declare function buildBioEndMask(labels: readonly string[]): number[];
42
+ export interface ViterbiInput {
43
+ /** `emissions[t][k]` — log-emission for label k at timestep t. Pass raw logits or log-softmaxes. */
44
+ emissions: number[][];
45
+ /** `transitions[from][to]` — additive log-score. Use `buildBioTransitionMask` if unsure. */
46
+ transitions: number[][];
47
+ /** Per-label log-score for being the FIRST label. */
48
+ startTransitions?: number[];
49
+ /** Per-label log-score for being the LAST label. */
50
+ endTransitions?: number[];
51
+ }
52
+ export interface ViterbiResult {
53
+ /** Best label index per timestep. */
54
+ path: number[];
55
+ /** Total path score (log-prob). */
56
+ score: number;
57
+ }
58
+ /**
59
+ * Viterbi decode: find the highest-scoring label sequence under the CRF.
60
+ *
61
+ * Time: O(seq_len × num_labels²). Space: O(seq_len × num_labels) for the backpointer table.
62
+ */
63
+ export declare function viterbi(input: ViterbiInput): ViterbiResult;
64
+ /**
65
+ * Convenience: argmax over per-token softmax (existing behavior). Provided so callers can opt in to
66
+ * Viterbi only when transitions are available, falling back to this cleanly.
67
+ */
68
+ export declare function perTokenArgmax(emissions: readonly number[][]): number[];
69
+ /**
70
+ * Softmax of a logit row (returns probabilities summing to 1).
71
+ *
72
+ * Used to compute per-token confidence after Viterbi picks the label sequence — the confidence is
73
+ * the softmax probability of the Viterbi-chosen label at that timestep.
74
+ */
75
+ export declare function softmax(row: readonly number[]): number[];
76
+ //# sourceMappingURL=viterbi.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"viterbi.d.ts","sourceRoot":"","sources":["../viterbi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH;;;;;;;;;;;GAWG;AACH,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,EAAE,CAa5E;AAED,uFAAuF;AACvF,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,CAErE;AAED;;;GAGG;AACH,wBAAgB,eAAe,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,CAEnE;AAYD,MAAM,WAAW,YAAY;IAC5B,oGAAoG;IACpG,SAAS,EAAE,MAAM,EAAE,EAAE,CAAA;IACrB,4FAA4F;IAC5F,WAAW,EAAE,MAAM,EAAE,EAAE,CAAA;IACvB,qDAAqD;IACrD,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC3B,oDAAoD;IACpD,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;CACzB;AAED,MAAM,WAAW,aAAa;IAC7B,qCAAqC;IACrC,IAAI,EAAE,MAAM,EAAE,CAAA;IACd,mCAAmC;IACnC,KAAK,EAAE,MAAM,CAAA;CACb;AAED;;;;GAIG;AACH,wBAAgB,OAAO,CAAC,KAAK,EAAE,YAAY,GAAG,aAAa,CA4D1D;AAED;;;GAGG;AACH,wBAAgB,cAAc,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,EAAE,GAAG,MAAM,EAAE,CAYvE;AAED;;;;;GAKG;AACH,wBAAgB,OAAO,CAAC,GAAG,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,CAMxD"}
package/out/viterbi.js ADDED
@@ -0,0 +1,163 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Linear-chain CRF Viterbi decoder in TypeScript.
7
+ *
8
+ * Replaces per-token argmax in the classifier when transition scores are available. Mirrors the
9
+ * Python training-time / eval-time path so JS runtime decode agrees with the model card's
10
+ * metrics.
11
+ *
12
+ * Two transition matrix modes:
13
+ *
14
+ * 1. **Structural-only** (no weights changes required) — build from the BIO label vocabulary using
15
+ * `buildBioTransitionMask()`. Forbids `O → I-X`, `B-X → I-Y` (X ≠ Y), and sequence-start →
16
+ * `I-X`. Permits everything else. This alone prevents orphan-I decoding ("Saint Petersburg →
17
+ * Petersburg" bug) at runtime — a strict improvement over argmax.
18
+ * 2. **Learned** (requires a future weights release that ships `crf-transitions.json`) — load the
19
+ * trained transition matrix from the model card. Adds learned soft priors on top of the
20
+ * structural mask. Currently not exported from the training-side ONNX bundle.
21
+ */
22
+ const NEG_INF = -1e9;
23
+ /**
24
+ * Build the BIO structural transition mask given the label vocabulary in order.
25
+ *
26
+ * Rules:
27
+ *
28
+ * - `X → O` always permitted (0)
29
+ * - `X → B-Y` always permitted (0)
30
+ * - `X → I-Y` permitted only if `X` is `B-Y` or `I-Y` (0); otherwise -inf
31
+ *
32
+ * Returns a `numLabels × numLabels` matrix where `mask[from][to]` is the additive log-score (0 for
33
+ * permitted, NEG_INF for forbidden).
34
+ */
35
+ export function buildBioTransitionMask(labels) {
36
+ const n = labels.length;
37
+ const mask = [];
38
+ for (let from = 0; from < n; from++) {
39
+ const row = new Array(n);
40
+ const fromLabel = labels[from];
41
+ for (let to = 0; to < n; to++) {
42
+ const toLabel = labels[to];
43
+ row[to] = isValidTransition(fromLabel, toLabel) ? 0 : NEG_INF;
44
+ }
45
+ mask.push(row);
46
+ }
47
+ return mask;
48
+ }
49
+ /** Returns the per-label vector of valid start-of-sequence transitions (0 or -inf). */
50
+ export function buildBioStartMask(labels) {
51
+ return labels.map((l) => (l.startsWith("I-") ? NEG_INF : 0));
52
+ }
53
+ /**
54
+ * End-of-sequence transitions. By default all labels are valid endings (returns zeros). Override if
55
+ * the trained model has learned end transitions.
56
+ */
57
+ export function buildBioEndMask(labels) {
58
+ return labels.map(() => 0);
59
+ }
60
+ function isValidTransition(from, to) {
61
+ if (to === "O")
62
+ return true;
63
+ if (to.startsWith("B-"))
64
+ return true;
65
+ if (to.startsWith("I-")) {
66
+ const tag = to.slice(2);
67
+ return from === `B-${tag}` || from === `I-${tag}`;
68
+ }
69
+ return true;
70
+ }
71
+ /**
72
+ * Viterbi decode: find the highest-scoring label sequence under the CRF.
73
+ *
74
+ * Time: O(seq_len × num_labels²). Space: O(seq_len × num_labels) for the backpointer table.
75
+ */
76
+ export function viterbi(input) {
77
+ const { emissions, transitions } = input;
78
+ const T = emissions.length;
79
+ if (T === 0)
80
+ return { path: [], score: 0 };
81
+ const numLabels = emissions[0].length;
82
+ const startTrans = input.startTransitions ?? new Array(numLabels).fill(0);
83
+ const endTrans = input.endTransitions ?? new Array(numLabels).fill(0);
84
+ // dp[t][k] = best log-score ending at (timestep t, label k)
85
+ const dp = [];
86
+ const back = [];
87
+ // t = 0
88
+ const first = new Array(numLabels);
89
+ for (let k = 0; k < numLabels; k++) {
90
+ first[k] = startTrans[k] + emissions[0][k];
91
+ }
92
+ dp.push(first);
93
+ back.push(new Array(numLabels).fill(-1));
94
+ for (let t = 1; t < T; t++) {
95
+ const cur = new Array(numLabels);
96
+ const ptr = new Array(numLabels);
97
+ for (let k = 0; k < numLabels; k++) {
98
+ let bestScore = NEG_INF;
99
+ let bestPrev = 0;
100
+ for (let j = 0; j < numLabels; j++) {
101
+ const s = dp[t - 1][j] + transitions[j][k];
102
+ if (s > bestScore) {
103
+ bestScore = s;
104
+ bestPrev = j;
105
+ }
106
+ }
107
+ cur[k] = bestScore + emissions[t][k];
108
+ ptr[k] = bestPrev;
109
+ }
110
+ dp.push(cur);
111
+ back.push(ptr);
112
+ }
113
+ // Pick the best ending state.
114
+ let bestEndScore = NEG_INF;
115
+ let bestEnd = 0;
116
+ for (let k = 0; k < numLabels; k++) {
117
+ const s = dp[T - 1][k] + endTrans[k];
118
+ if (s > bestEndScore) {
119
+ bestEndScore = s;
120
+ bestEnd = k;
121
+ }
122
+ }
123
+ // Trace back.
124
+ const path = new Array(T);
125
+ path[T - 1] = bestEnd;
126
+ for (let t = T - 1; t > 0; t--) {
127
+ path[t - 1] = back[t][path[t]];
128
+ }
129
+ return { path, score: bestEndScore };
130
+ }
131
+ /**
132
+ * Convenience: argmax over per-token softmax (existing behavior). Provided so callers can opt in to
133
+ * Viterbi only when transitions are available, falling back to this cleanly.
134
+ */
135
+ export function perTokenArgmax(emissions) {
136
+ return emissions.map((row) => {
137
+ let bestIdx = 0;
138
+ let bestVal = row[0];
139
+ for (let k = 1; k < row.length; k++) {
140
+ if (row[k] > bestVal) {
141
+ bestVal = row[k];
142
+ bestIdx = k;
143
+ }
144
+ }
145
+ return bestIdx;
146
+ });
147
+ }
148
+ /**
149
+ * Softmax of a logit row (returns probabilities summing to 1).
150
+ *
151
+ * Used to compute per-token confidence after Viterbi picks the label sequence — the confidence is
152
+ * the softmax probability of the Viterbi-chosen label at that timestep.
153
+ */
154
+ export function softmax(row) {
155
+ let max = row[0];
156
+ for (let i = 1; i < row.length; i++)
157
+ if (row[i] > max)
158
+ max = row[i];
159
+ const exps = row.map((v) => Math.exp(v - max));
160
+ const sum = exps.reduce((a, b) => a + b, 0);
161
+ return exps.map((e) => e / sum);
162
+ }
163
+ //# sourceMappingURL=viterbi.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"viterbi.js","sourceRoot":"","sources":["../viterbi.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,MAAM,OAAO,GAAG,CAAC,GAAG,CAAA;AAEpB;;;;;;;;;;;GAWG;AACH,MAAM,UAAU,sBAAsB,CAAC,MAAyB;IAC/D,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAA;IACvB,MAAM,IAAI,GAAe,EAAE,CAAA;IAC3B,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;QAChC,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAE,CAAA;QAC/B,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,MAAM,CAAC,EAAE,CAAE,CAAA;YAC3B,GAAG,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAA;QAC9D,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACf,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAED,uFAAuF;AACvF,MAAM,UAAU,iBAAiB,CAAC,MAAyB;IAC1D,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AAC7D,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,eAAe,CAAC,MAAyB;IACxD,OAAO,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAA;AAC3B,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY,EAAE,EAAU;IAClD,IAAI,EAAE,KAAK,GAAG;QAAE,OAAO,IAAI,CAAA;IAC3B,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAA;IACpC,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;QACvB,OAAO,IAAI,KAAK,KAAK,GAAG,EAAE,IAAI,IAAI,KAAK,KAAK,GAAG,EAAE,CAAA;IAClD,CAAC;IACD,OAAO,IAAI,CAAA;AACZ,CAAC;AAoBD;;;;GAIG;AACH,MAAM,UAAU,OAAO,CAAC,KAAmB;IAC1C,MAAM,EAAE,SAAS,EAAE,WAAW,EAAE,GAAG,KAAK,CAAA;IACxC,MAAM,CAAC,GAAG,SAAS,CAAC,MAAM,CAAA;IAC1B,IAAI,CAAC,KAAK,CAAC;QAAE,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAA;IAE1C,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC,MAAM,CAAA;IACtC,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,IAAI,IAAI,KAAK,CAAS,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjF,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,IAAI,IAAI,KAAK,CAAS,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAE7E,4DAA4D;IAC5D,MAAM,EAAE,GAAe,EAAE,CAAA;IACzB,MAAM,IAAI,GAAe,EAAE,CAAA;IAE3B,QAAQ;IACR,MAAM,KAAK,GAAG,IAAI,KAAK,CAAS,SAAS,CAAC,CAAA;IAC1C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,KAAK,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC,CAAE,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;IAC9C,CAAC;IACD,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;IACd,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,CAAS,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAEhD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,SAAS,CAAC,CAAA;QACxC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAS,SAAS,CAAC,CAAA;QACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACpC,IAAI,SAAS,GAAG,OAAO,CAAA;YACvB,IAAI,QAAQ,GAAG,CAAC,CAAA;YAChB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,WAAW,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;gBAC9C,IAAI,CAAC,GAAG,SAAS,EAAE,CAAC;oBACnB,SAAS,GAAG,CAAC,CAAA;oBACb,QAAQ,GAAG,CAAC,CAAA;gBACb,CAAC;YACF,CAAC;YACD,GAAG,CAAC,CAAC,CAAC,GAAG,SAAS,GAAG,SAAS,CAAC,CAAC,CAAE,CAAC,CAAC,CAAE,CAAA;YACtC,GAAG,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAA;QAClB,CAAC;QACD,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACZ,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;IACf,CAAC;IAED,8BAA8B;IAC9B,IAAI,YAAY,GAAG,OAAO,CAAA;IAC1B,IAAI,OAAO,GAAG,CAAC,CAAA;IACf,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,QAAQ,CAAC,CAAC,CAAE,CAAA;QACvC,IAAI,CAAC,GAAG,YAAY,EAAE,CAAC;YACtB,YAAY,GAAG,CAAC,CAAA;YAChB,OAAO,GAAG,CAAC,CAAA;QACZ,CAAC;IACF,CAAC;IAED,cAAc;IACd,MAAM,IAAI,GAAG,IAAI,KAAK,CAAS,CAAC,CAAC,CAAA;IACjC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,OAAO,CAAA;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAE,CAAC,IAAI,CAAC,CAAC,CAAE,CAAE,CAAA;IAClC,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,CAAA;AACrC,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,SAA8B;IAC5D,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;QAC5B,IAAI,OAAO,GAAG,CAAC,CAAA;QACf,IAAI,OAAO,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;QACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,OAAO,EAAE,CAAC;gBACvB,OAAO,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;gBACjB,OAAO,GAAG,CAAC,CAAA;YACZ,CAAC;QACF,CAAC;QACD,OAAO,OAAO,CAAA;IACf,CAAC,CAAC,CAAA;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,OAAO,CAAC,GAAsB;IAC7C,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE;QAAE,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,GAAG;YAAE,GAAG,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACrE,MAAM,IAAI,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAA;IAC9C,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,CAAA;IAC3C,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,CAAA;AAChC,CAAC"}
package/out/weights.d.ts CHANGED
@@ -32,8 +32,26 @@ export interface ResolveWeightsOpts {
32
32
  export interface ResolvedWeights {
33
33
  modelPath: string;
34
34
  tokenizerPath: string;
35
+ /**
36
+ * Path to `model-card.json` alongside the resolved model. `undefined` when the caller passed
37
+ * explicit paths or when the package directory has no card on disk. Read by `loadFromWeights` to
38
+ * thread the trained label vocabulary into the classifier — see {@link readLabelsFromModelCard}.
39
+ */
40
+ modelCardPath?: string;
35
41
  /** "explicit" if both paths came from opts; "package:<name>" if resolved via require.resolve. */
36
42
  source: string;
37
43
  }
38
44
  export declare function resolveWeights(opts: ResolveWeightsOpts): ResolvedWeights;
45
+ /**
46
+ * Read the `labels` array from a `model-card.json` file. Returns `undefined` when the file is
47
+ * missing, unreadable, malformed, or has no `labels` field — callers should fall back to their
48
+ * compile-time default in that case (the loader contract: the JS-side default tracks the most
49
+ * recent shipped stage, so a card without `labels` is always a pre-v0.4.0 card whose label vocab
50
+ * matches that default by construction).
51
+ *
52
+ * Validates shape: must be a non-empty array of strings. Throws on a present-but-malformed `labels`
53
+ * field — a card that emits e.g. `labels: 21` rather than `labels: [...]` is a corrupted artifact
54
+ * and should be loud, not silently re-defaulted.
55
+ */
56
+ export declare function readLabelsFromModelCard(modelCardPath: string | undefined): readonly string[] | undefined;
39
57
  //# sourceMappingURL=weights.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CAoCxE"}
1
+ {"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAA;IACtB,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CA0CxE;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,aAAa,EAAE,MAAM,GAAG,SAAS,GAAG,SAAS,MAAM,EAAE,GAAG,SAAS,CAwBxG"}
package/out/weights.js CHANGED
@@ -21,7 +21,7 @@
21
21
  * The resolver checks for both files and throws a single actionable error when neither is findable,
22
22
  * naming all the paths it tried.
23
23
  */
24
- import { existsSync } from "node:fs";
24
+ import { existsSync, readFileSync } from "node:fs";
25
25
  import { createRequire } from "node:module";
26
26
  import { dirname, resolve } from "node:path";
27
27
  const req = createRequire(import.meta.url);
@@ -34,7 +34,10 @@ export function resolveWeights(opts) {
34
34
  throw new Error(`Explicit tokenizerPath does not exist: ${opts.tokenizerPath}`);
35
35
  return { modelPath: opts.modelPath, tokenizerPath: opts.tokenizerPath, source: "explicit" };
36
36
  }
37
- const locale = opts.locale ?? "en-us";
37
+ // Package names follow the all-lowercase BCP-47 convention (`neural-weights-en-us`,
38
+ // `neural-weights-fr-fr`). The CLI's locale validation accepts canonical `en-US` / `fr-FR`
39
+ // casing, so we normalize here rather than at the callsite.
40
+ const locale = (opts.locale ?? "en-us").toLowerCase();
38
41
  const packageName = `@mailwoman/neural-weights-${locale}`;
39
42
  let packageDir;
40
43
  try {
@@ -54,6 +57,47 @@ export function resolveWeights(opts) {
54
57
  `Run \`scripts/link-dev-weights.sh\` inside the package to symlink dev weights, ` +
55
58
  `or pass --model + --tokenizer with explicit paths.`);
56
59
  }
57
- return { modelPath, tokenizerPath, source: `package:${packageName}` };
60
+ const modelCardCandidate = resolve(packageDir, "model-card.json");
61
+ const modelCardPath = existsSync(modelCardCandidate) ? modelCardCandidate : undefined;
62
+ return { modelPath, tokenizerPath, modelCardPath, source: `package:${packageName}` };
63
+ }
64
+ /**
65
+ * Read the `labels` array from a `model-card.json` file. Returns `undefined` when the file is
66
+ * missing, unreadable, malformed, or has no `labels` field — callers should fall back to their
67
+ * compile-time default in that case (the loader contract: the JS-side default tracks the most
68
+ * recent shipped stage, so a card without `labels` is always a pre-v0.4.0 card whose label vocab
69
+ * matches that default by construction).
70
+ *
71
+ * Validates shape: must be a non-empty array of strings. Throws on a present-but-malformed `labels`
72
+ * field — a card that emits e.g. `labels: 21` rather than `labels: [...]` is a corrupted artifact
73
+ * and should be loud, not silently re-defaulted.
74
+ */
75
+ export function readLabelsFromModelCard(modelCardPath) {
76
+ if (!modelCardPath || !existsSync(modelCardPath))
77
+ return undefined;
78
+ let raw;
79
+ try {
80
+ raw = readFileSync(modelCardPath, "utf8");
81
+ }
82
+ catch {
83
+ return undefined;
84
+ }
85
+ let parsed;
86
+ try {
87
+ parsed = JSON.parse(raw);
88
+ }
89
+ catch {
90
+ return undefined;
91
+ }
92
+ if (typeof parsed !== "object" || parsed === null)
93
+ return undefined;
94
+ const labels = parsed.labels;
95
+ if (labels === undefined)
96
+ return undefined;
97
+ if (!Array.isArray(labels) || labels.length === 0 || !labels.every((l) => typeof l === "string")) {
98
+ throw new Error(`model-card.json at ${modelCardPath} has a malformed \`labels\` field — ` +
99
+ `expected a non-empty array of strings, got ${JSON.stringify(labels)}.`);
100
+ }
101
+ return Object.freeze(labels.slice());
58
102
  }
59
103
  //# sourceMappingURL=weights.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAkB1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC5F,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAA;IACrC,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACtE,CAAC"}
1
+ {"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAwB1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC5F,CAAC;IAED,oFAAoF;IACpF,2FAA2F;IAC3F,4DAA4D;IAC5D,MAAM,MAAM,GAAG,CAAC,IAAI,CAAC,MAAM,IAAI,OAAO,CAAC,CAAC,WAAW,EAAE,CAAA;IACrD,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,kBAAkB,GAAG,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IACjE,MAAM,aAAa,GAAG,UAAU,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,SAAS,CAAA;IAErF,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,aAAa,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACrF,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,aAAiC;IACxE,IAAI,CAAC,aAAa,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,SAAS,CAAA;IAClE,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACJ,GAAG,GAAG,YAAY,CAAC,aAAa,EAAE,MAAM,CAAC,CAAA;IAC1C,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACJ,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IACzB,CAAC;IAAC,MAAM,CAAC;QACR,OAAO,SAAS,CAAA;IACjB,CAAC;IACD,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI;QAAE,OAAO,SAAS,CAAA;IACnE,MAAM,MAAM,GAAI,MAA+B,CAAC,MAAM,CAAA;IACtD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,SAAS,CAAA;IAC1C,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,CAAC,EAAE,CAAC;QAClG,MAAM,IAAI,KAAK,CACd,sBAAsB,aAAa,sCAAsC;YACxE,8CAA8C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CACxE,CAAA;IACF,CAAC;IACD,OAAO,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,EAAE,CAAsB,CAAA;AAC1D,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mailwoman/neural",
3
- "version": "2.1.0",
3
+ "version": "2.2.0",
4
4
  "description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
5
5
  "license": "AGPL-3.0-only",
6
6
  "repository": {
@@ -12,10 +12,12 @@
12
12
  "exports": {
13
13
  "./package.json": "./package.json",
14
14
  ".": "./out/index.js",
15
- "./tokenizer": "./out/tokenizer.js"
15
+ "./tokenizer": "./out/tokenizer.js",
16
+ "./weights": "./out/weights.js",
17
+ "./browser": "./out/browser.js"
16
18
  },
17
19
  "dependencies": {
18
- "@mailwoman/core": "2.1.0",
20
+ "@mailwoman/core": "2.2.0",
19
21
  "@sctg/sentencepiece-js": "^1.3.3",
20
22
  "onnxruntime-node": "^1.26.0"
21
23
  },
@@ -25,6 +27,7 @@
25
27
  "out/**/*.d.ts",
26
28
  "out/**/*.d.ts.map"
27
29
  ],
30
+ "sideEffects": false,
28
31
  "publishConfig": {
29
32
  "access": "public"
30
33
  }