@mailwoman/neural 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,40 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `NeuralAddressClassifier` ties together the tokenizer, the ONNX inference runner, and the
7
+ * `@mailwoman/core` decoder. Single user-facing entrypoint: `parse(text)` returns an
8
+ * `AddressTree` ready for projection into JSON / tuple / XML.
9
+ *
10
+ * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
+ */
12
+ import { type AddressTree, type ComponentTag, decodeAsXml } from "@mailwoman/core/decoder";
13
+ import { OnnxRunner } from "./onnx-runner.js";
14
+ import { MailwomanTokenizer } from "./tokenizer.js";
15
+ import { type ResolveWeightsOpts } from "./weights.js";
16
+ export interface NeuralAddressClassifierConfig {
17
+ tokenizer: MailwomanTokenizer;
18
+ runner: OnnxRunner;
19
+ /** Label vocabulary in the order the model emits them. Defaults to Stage 1 (v0.1.0/v0.2.0). */
20
+ labels?: readonly string[];
21
+ }
22
+ export declare class NeuralAddressClassifier {
23
+ private readonly cfg;
24
+ private readonly labels;
25
+ constructor(cfg: NeuralAddressClassifierConfig);
26
+ /**
27
+ * One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
28
+ * ONNX runner, and returns a ready-to-use classifier.
29
+ *
30
+ * Resolution order: explicit paths in `opts` → `@mailwoman/neural-weights-<locale>` package →
31
+ * throws a single actionable error.
32
+ */
33
+ static loadFromWeights(opts?: ResolveWeightsOpts): Promise<NeuralAddressClassifier>;
34
+ /** Tokenize → infer → argmax/softmax → decoder tree. */
35
+ parse(text: string): Promise<AddressTree>;
36
+ parseJson(text: string): Promise<Partial<Record<ComponentTag, string>>>;
37
+ parseTuples(text: string): Promise<Array<[ComponentTag, string]>>;
38
+ parseXml(text: string, opts?: Parameters<typeof decodeAsXml>[1]): Promise<string>;
39
+ }
40
+ //# sourceMappingURL=classifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACN,KAAK,WAAW,EAChB,KAAK,YAAY,EAKjB,WAAW,EACX,MAAM,yBAAyB,CAAA;AAEhC,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAAE,KAAK,kBAAkB,EAAkB,MAAM,cAAc,CAAA;AAEtE,MAAM,WAAW,6BAA6B;IAC7C,SAAS,EAAE,kBAAkB,CAAA;IAC7B,MAAM,EAAE,UAAU,CAAA;IAClB,+FAA+F;IAC/F,MAAM,CAAC,EAAE,SAAS,MAAM,EAAE,CAAA;CAC1B;AAED,qBAAa,uBAAuB;IAGvB,OAAO,CAAC,QAAQ,CAAC,GAAG;IAFhC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmB;gBAEb,GAAG,EAAE,6BAA6B;IAI/D;;;;;;OAMG;WACU,eAAe,CAAC,IAAI,GAAE,kBAAuB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAS7F,wDAAwD;IAClD,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAqBzC,SAAS,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIvE,WAAW,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,YAAY,EAAE,MAAM,CAAC,CAAC,CAAC;IAIjE,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,CAAC,EAAE,UAAU,CAAC,OAAO,WAAW,CAAC,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC;CAGvF"}
@@ -0,0 +1,83 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `NeuralAddressClassifier` ties together the tokenizer, the ONNX inference runner, and the
7
+ * `@mailwoman/core` decoder. Single user-facing entrypoint: `parse(text)` returns an
8
+ * `AddressTree` ready for projection into JSON / tuple / XML.
9
+ *
10
+ * Convenience wrappers `parseJson` / `parseTuples` / `parseXml` project the tree on the way out.
11
+ */
12
+ import { buildAddressTree, decodeAsJson, decodeAsTuples, decodeAsXml, } from "@mailwoman/core/decoder";
13
+ import { STAGE1_BIO_LABELS } from "./labels.js";
14
+ import { OnnxRunner } from "./onnx-runner.js";
15
+ import { MailwomanTokenizer } from "./tokenizer.js";
16
+ import { resolveWeights } from "./weights.js";
17
+ export class NeuralAddressClassifier {
18
+ cfg;
19
+ labels;
20
+ constructor(cfg) {
21
+ this.cfg = cfg;
22
+ this.labels = cfg.labels ?? STAGE1_BIO_LABELS;
23
+ }
24
+ /**
25
+ * One-call factory that resolves the weights package (or explicit paths), loads the tokenizer and
26
+ * ONNX runner, and returns a ready-to-use classifier.
27
+ *
28
+ * Resolution order: explicit paths in `opts` → `@mailwoman/neural-weights-<locale>` package →
29
+ * throws a single actionable error.
30
+ */
31
+ static async loadFromWeights(opts = {}) {
32
+ const { modelPath, tokenizerPath } = resolveWeights(opts);
33
+ const [tokenizer, runner] = await Promise.all([
34
+ MailwomanTokenizer.loadFromFile(tokenizerPath),
35
+ OnnxRunner.create(modelPath),
36
+ ]);
37
+ return new NeuralAddressClassifier({ tokenizer, runner });
38
+ }
39
+ /** Tokenize → infer → argmax/softmax → decoder tree. */
40
+ async parse(text) {
41
+ if (text.length === 0)
42
+ return { raw: text, roots: [] };
43
+ const { pieces, ids } = this.cfg.tokenizer.encode(text);
44
+ const { logits } = await this.cfg.runner.infer(ids);
45
+ const tokens = pieces.map((p, i) => {
46
+ const row = logits[i];
47
+ const { idx, conf } = argmaxSoftmax(row);
48
+ return {
49
+ piece: p.piece,
50
+ start: p.start,
51
+ end: p.end,
52
+ label: (this.labels[idx] ?? "O"),
53
+ confidence: conf,
54
+ };
55
+ });
56
+ return buildAddressTree(text, tokens);
57
+ }
58
+ async parseJson(text) {
59
+ return decodeAsJson(await this.parse(text));
60
+ }
61
+ async parseTuples(text) {
62
+ return decodeAsTuples(await this.parse(text));
63
+ }
64
+ async parseXml(text, opts) {
65
+ return decodeAsXml(await this.parse(text), opts);
66
+ }
67
+ }
68
+ function argmaxSoftmax(row) {
69
+ let maxIdx = 0;
70
+ let maxVal = row[0];
71
+ for (let i = 1; i < row.length; i++) {
72
+ if (row[i] > maxVal) {
73
+ maxVal = row[i];
74
+ maxIdx = i;
75
+ }
76
+ }
77
+ let sumExp = 0;
78
+ for (const v of row)
79
+ sumExp += Math.exp(v - maxVal);
80
+ const conf = 1 / sumExp;
81
+ return { idx: maxIdx, conf };
82
+ }
83
+ //# sourceMappingURL=classifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAIN,gBAAgB,EAChB,YAAY,EACZ,cAAc,EACd,WAAW,GACX,MAAM,yBAAyB,CAAA;AAChC,OAAO,EAAE,iBAAiB,EAAE,MAAM,aAAa,CAAA;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAA;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,gBAAgB,CAAA;AACnD,OAAO,EAA2B,cAAc,EAAE,MAAM,cAAc,CAAA;AAStE,MAAM,OAAO,uBAAuB;IAGN;IAFZ,MAAM,CAAmB;IAE1C,YAA6B,GAAkC;QAAlC,QAAG,GAAH,GAAG,CAA+B;QAC9D,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,MAAM,IAAI,iBAAiB,CAAA;IAC9C,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,KAAK,CAAC,eAAe,CAAC,OAA2B,EAAE;QACzD,MAAM,EAAE,SAAS,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,CAAC,CAAA;QACzD,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC;YAC7C,kBAAkB,CAAC,YAAY,CAAC,aAAa,CAAC;YAC9C,UAAU,CAAC,MAAM,CAAC,SAAS,CAAC;SAC5B,CAAC,CAAA;QACF,OAAO,IAAI,uBAAuB,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAA;IAC1D,CAAC;IAED,wDAAwD;IACxD,KAAK,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,EAAE,CAAA;QAEtD,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;QACvD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;QAEnD,MAAM,MAAM,GAAmB,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;YAClD,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACtB,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,aAAa,CAAC,GAAG,CAAC,CAAA;YACxC,OAAO;gBACN,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,KAAK,EAAE,CAAC,CAAC,KAAK;gBACd,GAAG,EAAE,CAAC,CAAC,GAAG;gBACV,KAAK,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,GAAG,CAA0B;gBACzD,UAAU,EAAE,IAAI;aAChB,CAAA;QACF,CAAC,CAAC,CAAA;QAEF,OAAO,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IACtC,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,IAAY;QAC3B,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,KAAK,CAAC,WAAW,CAAC,IAAY;QAC7B,OAAO,cAAc,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAA;IAC9C,CAAC;IAED,KAAK,CAAC,QAAQ,CAAC,IAAY,EAAE,IAAwC;QACpE,OAAO,WAAW,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,CAAC,CAAA;IACjD,CAAC;CACD;AAED,SAAS,aAAa,CAAC,GAAa;IACnC,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,IAAI,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;IACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,IAAI,GAAG,CAAC,CAAC,CAAE,GAAG,MAAM,EAAE,CAAC;YACtB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAE,CAAA;YAChB,MAAM,GAAG,CAAC,CAAA;QACX,CAAC;IACF,CAAC;IACD,IAAI,MAAM,GAAG,CAAC,CAAA;IACd,KAAK,MAAM,CAAC,IAAI,GAAG;QAAE,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,CAAC,GAAG,MAAM,CAAA;IACvB,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE,CAAA;AAC7B,CAAC"}
package/out/index.d.ts ADDED
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ */
6
+ export * from "./classifier.js";
7
+ export * from "./labels.js";
8
+ export * from "./onnx-runner.js";
9
+ export * from "./proposal-classifier.js";
10
+ export * from "./tokenizer.js";
11
+ export * from "./weights.js";
12
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
package/out/index.js ADDED
@@ -0,0 +1,12 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ */
6
+ export * from "./classifier.js";
7
+ export * from "./labels.js";
8
+ export * from "./onnx-runner.js";
9
+ export * from "./proposal-classifier.js";
10
+ export * from "./tokenizer.js";
11
+ export * from "./weights.js";
12
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../index.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,cAAc,iBAAiB,CAAA;AAC/B,cAAc,aAAa,CAAA;AAC3B,cAAc,kBAAkB,CAAA;AAChC,cAAc,0BAA0B,CAAA;AACxC,cAAc,gBAAgB,CAAA;AAC9B,cAAc,cAAc,CAAA"}
@@ -0,0 +1,20 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py::STAGE1_BIO_LABELS`.
7
+ *
8
+ * The v0.1.0 / v0.2.0 weight packages were trained with this exact label order. Any drift here
9
+ * silently corrupts downstream BIO decoding — index 5 must mean `B-locality` on both sides.
10
+ *
11
+ * Stage 2+ models will support more labels (street, house_number, venue, …). The plan is to plumb
12
+ * the label set through `model-card.json` at load time rather than hard-coding it here. Until
13
+ * then this file is the source of truth on the TS side.
14
+ */
15
+ import type { BioLabel } from "@mailwoman/core/decoder";
16
+ /** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
17
+ export declare const STAGE1_COARSE_TAGS: readonly ["country", "region", "locality", "dependent_locality", "postcode", "subregion", "cedex"];
18
+ /** BIO label vocabulary for Stage 1 — O + (B-/I- per coarse tag). 1 + 14 = 15 labels. */
19
+ export declare const STAGE1_BIO_LABELS: readonly BioLabel[];
20
+ //# sourceMappingURL=labels.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"labels.d.ts","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,yBAAyB,CAAA;AAEvD,0EAA0E;AAC1E,eAAO,MAAM,kBAAkB,oGAQrB,CAAA;AAEV,yFAAyF;AACzF,eAAO,MAAM,iBAAiB,EAAE,SAAS,QAAQ,EAG/C,CAAA"}
package/out/labels.js ADDED
@@ -0,0 +1,30 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Mirror of `packages/corpus-python/src/mailwoman_train/labels.py::STAGE1_BIO_LABELS`.
7
+ *
8
+ * The v0.1.0 / v0.2.0 weight packages were trained with this exact label order. Any drift here
9
+ * silently corrupts downstream BIO decoding — index 5 must mean `B-locality` on both sides.
10
+ *
11
+ * Stage 2+ models will support more labels (street, house_number, venue, …). The plan is to plumb
12
+ * the label set through `model-card.json` at load time rather than hard-coding it here. Until
13
+ * then this file is the source of truth on the TS side.
14
+ */
15
+ /** Coarse component tags trained in Phase 2 Stage 1 (v0.1.0 / v0.2.0). */
16
+ export const STAGE1_COARSE_TAGS = [
17
+ "country",
18
+ "region",
19
+ "locality",
20
+ "dependent_locality",
21
+ "postcode",
22
+ "subregion",
23
+ "cedex",
24
+ ];
25
+ /** BIO label vocabulary for Stage 1 — O + (B-/I- per coarse tag). 1 + 14 = 15 labels. */
26
+ export const STAGE1_BIO_LABELS = Object.freeze([
27
+ "O",
28
+ ...STAGE1_COARSE_TAGS.flatMap((tag) => [`B-${tag}`, `I-${tag}`]),
29
+ ]);
30
+ //# sourceMappingURL=labels.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"labels.js","sourceRoot":"","sources":["../labels.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAIH,0EAA0E;AAC1E,MAAM,CAAC,MAAM,kBAAkB,GAAG;IACjC,SAAS;IACT,QAAQ;IACR,UAAU;IACV,oBAAoB;IACpB,UAAU;IACV,WAAW;IACX,OAAO;CACE,CAAA;AAEV,yFAAyF;AACzF,MAAM,CAAC,MAAM,iBAAiB,GAAwB,MAAM,CAAC,MAAM,CAAC;IACnE,GAAe;IACf,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,KAAK,GAAG,EAAc,EAAE,KAAK,GAAG,EAAc,CAAC,CAAC;CACxF,CAAC,CAAA"}
@@ -0,0 +1,56 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * ONNX inference wrapper.
7
+ *
8
+ * Loads a token-classification model exported by `packages/corpus-python/src/mailwoman_train/
9
+ * export_onnx.py` (BertForTokenClassification w/ inputs `input_ids` + `attention_mask`, output
10
+ * `logits` shape `[batch, sequence, num_labels]`).
11
+ *
12
+ * Lazy-loads on first `infer()` call unless `warmup: true` is passed; the constructor itself is
13
+ * cheap and synchronous.
14
+ */
15
+ export interface OnnxRunnerOpts {
16
+ /** If true, load the model immediately in `create()`. Default false. */
17
+ warmup?: boolean;
18
+ /**
19
+ * Fixed sequence length the model expects. v0.1.0 / v0.2.0 quantization baked in 128 (the
20
+ * training-time max position) even though the fp32 export specified dynamic axes — re-quantize
21
+ * with a different shape to override. Inputs shorter than this are padded with id `0` and masked
22
+ * out via attention_mask=0; inputs longer are truncated.
23
+ */
24
+ fixedSeqLen?: number;
25
+ }
26
+ /** Default sequence length for v0.1.0 / v0.2.0 (BertConfig max_position_embeddings = 128). */
27
+ export declare const DEFAULT_FIXED_SEQ_LEN = 128;
28
+ export interface InferResult {
29
+ /** Logits per token per label, indexed as `logits[tokenIdx][labelIdx]`. */
30
+ logits: number[][];
31
+ /** Number of label classes (the inner-dim of the logits tensor). */
32
+ numLabels: number;
33
+ }
34
+ export declare class OnnxRunner {
35
+ private readonly modelPath;
36
+ private readonly modelBytes;
37
+ private session;
38
+ private loadPromise;
39
+ readonly fixedSeqLen: number;
40
+ private constructor();
41
+ /** Load by path. Reads the model lazily unless `warmup` is true. */
42
+ static create(modelPath: string, opts?: OnnxRunnerOpts): Promise<OnnxRunner>;
43
+ /** Load from an already-read byte buffer. */
44
+ static fromBytes(modelBytes: Uint8Array, opts?: OnnxRunnerOpts): Promise<OnnxRunner>;
45
+ private ensureSession;
46
+ /**
47
+ * Run inference on a single token id sequence.
48
+ *
49
+ * Pads to `fixedSeqLen` (default 128) with id 0 + mask 0; truncates if longer. Output is sliced
50
+ * back to the actual input length.
51
+ *
52
+ * @param tokenIds The id sequence produced by the tokenizer (no special tokens added).
53
+ */
54
+ infer(tokenIds: number[]): Promise<InferResult>;
55
+ }
56
+ //# sourceMappingURL=onnx-runner.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-runner.d.ts","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAKH,MAAM,WAAW,cAAc;IAC9B,wEAAwE;IACxE,MAAM,CAAC,EAAE,OAAO,CAAA;IAChB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAA;CACpB;AAED,8FAA8F;AAC9F,eAAO,MAAM,qBAAqB,MAAM,CAAA;AAExC,MAAM,WAAW,WAAW;IAC3B,2EAA2E;IAC3E,MAAM,EAAE,MAAM,EAAE,EAAE,CAAA;IAClB,oEAAoE;IACpE,SAAS,EAAE,MAAM,CAAA;CACjB;AAED,qBAAa,UAAU;IAMrB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,UAAU;IAN5B,OAAO,CAAC,OAAO,CAAoC;IACnD,OAAO,CAAC,WAAW,CAA6C;IAChE,SAAgB,WAAW,EAAE,MAAM,CAAA;IAEnC,OAAO;IAQP,oEAAoE;WACvD,MAAM,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;IAMtF,6CAA6C;WAChC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,GAAE,cAAmB,GAAG,OAAO,CAAC,UAAU,CAAC;YAMhF,aAAa;IAgB3B;;;;;;;OAOG;IACG,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC;CA8BrD"}
@@ -0,0 +1,98 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * ONNX inference wrapper.
7
+ *
8
+ * Loads a token-classification model exported by `packages/corpus-python/src/mailwoman_train/
9
+ * export_onnx.py` (BertForTokenClassification w/ inputs `input_ids` + `attention_mask`, output
10
+ * `logits` shape `[batch, sequence, num_labels]`).
11
+ *
12
+ * Lazy-loads on first `infer()` call unless `warmup: true` is passed; the constructor itself is
13
+ * cheap and synchronous.
14
+ */
15
+ import { promises as fs } from "node:fs";
16
+ import ort from "onnxruntime-node";
17
+ /** Default sequence length for v0.1.0 / v0.2.0 (BertConfig max_position_embeddings = 128). */
18
+ export const DEFAULT_FIXED_SEQ_LEN = 128;
19
+ export class OnnxRunner {
20
+ modelPath;
21
+ modelBytes;
22
+ session = null;
23
+ loadPromise = null;
24
+ fixedSeqLen;
25
+ constructor(modelPath, modelBytes, opts) {
26
+ this.modelPath = modelPath;
27
+ this.modelBytes = modelBytes;
28
+ this.fixedSeqLen = opts.fixedSeqLen ?? DEFAULT_FIXED_SEQ_LEN;
29
+ }
30
+ /** Load by path. Reads the model lazily unless `warmup` is true. */
31
+ static async create(modelPath, opts = {}) {
32
+ const runner = new OnnxRunner(modelPath, null, opts);
33
+ if (opts.warmup)
34
+ await runner.ensureSession();
35
+ return runner;
36
+ }
37
+ /** Load from an already-read byte buffer. */
38
+ static async fromBytes(modelBytes, opts = {}) {
39
+ const runner = new OnnxRunner("(bytes)", modelBytes, opts);
40
+ if (opts.warmup)
41
+ await runner.ensureSession();
42
+ return runner;
43
+ }
44
+ async ensureSession() {
45
+ if (this.session)
46
+ return this.session;
47
+ if (!this.loadPromise) {
48
+ this.loadPromise = (async () => {
49
+ const bytes = this.modelBytes ?? new Uint8Array(await fs.readFile(this.modelPath));
50
+ const session = await ort.InferenceSession.create(bytes, {
51
+ executionProviders: ["cpu"],
52
+ graphOptimizationLevel: "all",
53
+ });
54
+ this.session = session;
55
+ return session;
56
+ })();
57
+ }
58
+ return this.loadPromise;
59
+ }
60
+ /**
61
+ * Run inference on a single token id sequence.
62
+ *
63
+ * Pads to `fixedSeqLen` (default 128) with id 0 + mask 0; truncates if longer. Output is sliced
64
+ * back to the actual input length.
65
+ *
66
+ * @param tokenIds The id sequence produced by the tokenizer (no special tokens added).
67
+ */
68
+ async infer(tokenIds) {
69
+ const session = await this.ensureSession();
70
+ const seqLen = Math.min(tokenIds.length, this.fixedSeqLen);
71
+ const padded = new BigInt64Array(this.fixedSeqLen);
72
+ const mask = new BigInt64Array(this.fixedSeqLen);
73
+ for (let i = 0; i < seqLen; i++) {
74
+ padded[i] = BigInt(tokenIds[i]);
75
+ mask[i] = 1n;
76
+ }
77
+ const feeds = {
78
+ input_ids: new ort.Tensor("int64", padded, [1, this.fixedSeqLen]),
79
+ attention_mask: new ort.Tensor("int64", mask, [1, this.fixedSeqLen]),
80
+ };
81
+ const output = await session.run(feeds);
82
+ const logitsTensor = output.logits;
83
+ if (!logitsTensor)
84
+ throw new Error("ONNX model did not return a `logits` output");
85
+ const data = logitsTensor.data;
86
+ const [, , numLabels] = logitsTensor.dims;
87
+ const logits = [];
88
+ for (let t = 0; t < seqLen; t++) {
89
+ const row = new Array(numLabels);
90
+ const base = t * numLabels;
91
+ for (let l = 0; l < numLabels; l++)
92
+ row[l] = data[base + l];
93
+ logits.push(row);
94
+ }
95
+ return { logits, numLabels };
96
+ }
97
+ }
98
+ //# sourceMappingURL=onnx-runner.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"onnx-runner.js","sourceRoot":"","sources":["../onnx-runner.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAEH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AACxC,OAAO,GAAG,MAAM,kBAAkB,CAAA;AAclC,8FAA8F;AAC9F,MAAM,CAAC,MAAM,qBAAqB,GAAG,GAAG,CAAA;AASxC,MAAM,OAAO,UAAU;IAMJ;IACA;IANV,OAAO,GAAgC,IAAI,CAAA;IAC3C,WAAW,GAAyC,IAAI,CAAA;IAChD,WAAW,CAAQ;IAEnC,YACkB,SAAiB,EACjB,UAA6B,EAC9C,IAAoB;QAFH,cAAS,GAAT,SAAS,CAAQ;QACjB,eAAU,GAAV,UAAU,CAAmB;QAG9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,qBAAqB,CAAA;IAC7D,CAAC;IAED,oEAAoE;IACpE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,SAAiB,EAAE,OAAuB,EAAE;QAC/D,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,IAAI,EAAE,IAAI,CAAC,CAAA;QACpD,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,UAAsB,EAAE,OAAuB,EAAE;QACvE,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,EAAE,UAAU,EAAE,IAAI,CAAC,CAAA;QAC1D,IAAI,IAAI,CAAC,MAAM;YAAE,MAAM,MAAM,CAAC,aAAa,EAAE,CAAA;QAC7C,OAAO,MAAM,CAAA;IACd,CAAC;IAEO,KAAK,CAAC,aAAa;QAC1B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,CAAC,KAAK,IAAI,EAAE;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,IAAI,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAA;gBAClF,MAAM,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAAC,KAAK,EAAE;oBACxD,kBAAkB,EAAE,CAAC,KAAK,CAAC;oBAC3B,sBAAsB,EAAE,KAAK;iBAC7B,CAAC,CAAA;gBACF,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;gBACtB,OAAO,OAAO,CAAA;YACf,CAAC,CAAC,EAAE,CAAA;QACL,CAAC;QACD,OAAO,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,KAAK,CAAC,QAAkB;QAC7B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE,CAAA;QAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,IAAI,CAAC,WAAW,CAAC,CAAA;QAC1D,MAAM,MAAM,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAClD,MAAM,IAAI,GAAG,IAAI,aAAa,CAAC,IAAI,CAAC,WAAW,CAAC,CAAA;QAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAA;YAChC,IAAI,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;QACb,CAAC;QAED,MAAM,KAAK,GAA+B;YACzC,SAAS,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;YACjE,cAAc,EAAE,IAAI,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;SACpE,CAAA;QAED,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAA;QACvC,MAAM,YAAY,GAAG,MAAM,CAAC,MAAM,CAAA;QAClC,IAAI,CAAC,YAAY;YAAE,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAA;QACjF,MAAM,IAAI,GAAG,YAAY,CAAC,IAAoB,CAAA;QAC9C,MAAM,CAAC,EAAE,AAAD,EAAG,SAAS,CAAC,GAAG,YAAY,CAAC,IAAyC,CAAA;QAE9E,MAAM,MAAM,GAAe,EAAE,CAAA;QAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACjC,MAAM,GAAG,GAAa,IAAI,KAAK,CAAC,SAAS,CAAC,CAAA;YAC1C,MAAM,IAAI,GAAG,CAAC,GAAG,SAAS,CAAA;YAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE;gBAAE,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,CAAE,CAAA;YAC5D,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QACjB,CAAC;QACD,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAA;IAC7B,CAAC;CACD"}
@@ -0,0 +1,36 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `NeuralProposalClassifier` — adapter that exposes a `NeuralAddressClassifier` as a
7
+ * `ProposalClassifier` (the `@mailwoman/core/types` contract that the policy registry consumes).
8
+ *
9
+ * Implementation: for each section, run the neural classifier on `section.body`, walk the resulting
10
+ * `AddressTree`, and emit one `ClassificationProposal` per node whose tag is in the `emits` list.
11
+ * Spans are rebased to the original input via `section.start + node.start` so downstream
12
+ * consumers see character offsets in the caller's coordinate space — same convention as
13
+ * `wrapLegacyClassifier`.
14
+ *
15
+ * Per-section calls trade a small amount of context for the uniform `ProposalClassifier` shape.
16
+ * Addresses inside a section are typically short and the model handles them well; whole-input
17
+ * inference is a future optimization once the policy layer has a way to invoke a classifier "once
18
+ * per parse" instead of per section.
19
+ */
20
+ import type { ComponentTag, ProposalClassifier } from "@mailwoman/core/types";
21
+ import type { NeuralAddressClassifier } from "./classifier.js";
22
+ export interface NeuralProposalClassifierConfig {
23
+ /** Stable id surfaced as `source_id` on every proposal (e.g. `neural-v0.2.0-en-us`). */
24
+ id: string;
25
+ /** The underlying neural classifier instance. */
26
+ classifier: NeuralAddressClassifier;
27
+ /** Component tags this classifier may emit. Defaults to Stage 1 coarse tags. */
28
+ emits?: readonly ComponentTag[];
29
+ /** Locales this classifier is active for. `["*"]` (locale-agnostic) by default. */
30
+ locales?: readonly (string | "*")[];
31
+ /** Default penalty applied to emitted proposals. Default 0. */
32
+ penalty?: number;
33
+ }
34
+ /** Build a `ProposalClassifier` backed by a `NeuralAddressClassifier`. */
35
+ export declare function createNeuralProposalClassifier(cfg: NeuralProposalClassifierConfig): ProposalClassifier;
36
+ //# sourceMappingURL=proposal-classifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proposal-classifier.d.ts","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAIH,OAAO,KAAK,EAGX,YAAY,EACZ,kBAAkB,EAElB,MAAM,uBAAuB,CAAA;AAC9B,OAAO,KAAK,EAAE,uBAAuB,EAAE,MAAM,iBAAiB,CAAA;AAG9D,MAAM,WAAW,8BAA8B;IAC9C,wFAAwF;IACxF,EAAE,EAAE,MAAM,CAAA;IACV,iDAAiD;IACjD,UAAU,EAAE,uBAAuB,CAAA;IACnC,gFAAgF;IAChF,KAAK,CAAC,EAAE,SAAS,YAAY,EAAE,CAAA;IAC/B,mFAAmF;IACnF,OAAO,CAAC,EAAE,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,EAAE,CAAA;IACnC,+DAA+D;IAC/D,OAAO,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,0EAA0E;AAC1E,wBAAgB,8BAA8B,CAAC,GAAG,EAAE,8BAA8B,GAAG,kBAAkB,CA6CtG"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * `NeuralProposalClassifier` — adapter that exposes a `NeuralAddressClassifier` as a
7
+ * `ProposalClassifier` (the `@mailwoman/core/types` contract that the policy registry consumes).
8
+ *
9
+ * Implementation: for each section, run the neural classifier on `section.body`, walk the resulting
10
+ * `AddressTree`, and emit one `ClassificationProposal` per node whose tag is in the `emits` list.
11
+ * Spans are rebased to the original input via `section.start + node.start` so downstream
12
+ * consumers see character offsets in the caller's coordinate space — same convention as
13
+ * `wrapLegacyClassifier`.
14
+ *
15
+ * Per-section calls trade a small amount of context for the uniform `ProposalClassifier` shape.
16
+ * Addresses inside a section are typically short and the model handles them well; whole-input
17
+ * inference is a future optimization once the policy layer has a way to invoke a classifier "once
18
+ * per parse" instead of per section.
19
+ */
20
+ import { STAGE1_COARSE_TAGS } from "./labels.js";
21
+ /** Build a `ProposalClassifier` backed by a `NeuralAddressClassifier`. */
22
+ export function createNeuralProposalClassifier(cfg) {
23
+ const emits = cfg.emits ?? STAGE1_COARSE_TAGS;
24
+ const emitsSet = new Set(emits);
25
+ const penalty = cfg.penalty ?? 0;
26
+ async function classify(section, _ctx) {
27
+ const tree = await cfg.classifier.parse(section.body);
28
+ const proposals = [];
29
+ const sectionOffset = section.start;
30
+ const visit = (node) => {
31
+ if (emitsSet.has(node.tag)) {
32
+ // Emit a structurally-Span-shaped record. We intentionally avoid `Span.from(...)` here:
33
+ // the tokenization module performs filesystem-bound module-init (libpostal data dir
34
+ // scan) which we don't want to force on every consumer of the proposal-classifier. The
35
+ // solver and policy registry read `start` / `end` / `body` only; if a downstream
36
+ // consumer needs the full Span behavior (graph membership, classifications, …), it
37
+ // should re-construct via Span.from(p.span.body, { start: p.span.start }).
38
+ const span = {
39
+ start: sectionOffset + node.start,
40
+ end: sectionOffset + node.end,
41
+ body: node.value,
42
+ };
43
+ proposals.push({
44
+ span,
45
+ component: node.tag,
46
+ confidence: node.confidence,
47
+ source: "neural",
48
+ source_id: cfg.id,
49
+ penalty,
50
+ });
51
+ }
52
+ for (const child of node.children)
53
+ visit(child);
54
+ };
55
+ for (const root of tree.roots)
56
+ visit(root);
57
+ return proposals;
58
+ }
59
+ return {
60
+ id: cfg.id,
61
+ emits,
62
+ locales: cfg.locales ?? ["*"],
63
+ classify,
64
+ };
65
+ }
66
+ //# sourceMappingURL=proposal-classifier.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"proposal-classifier.js","sourceRoot":"","sources":["../proposal-classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAYH,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAA;AAehD,0EAA0E;AAC1E,MAAM,UAAU,8BAA8B,CAAC,GAAmC;IACjF,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,IAAI,kBAAkB,CAAA;IAC7C,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAe,KAAgC,CAAC,CAAA;IACxE,MAAM,OAAO,GAAG,GAAG,CAAC,OAAO,IAAI,CAAC,CAAA;IAEhC,KAAK,UAAU,QAAQ,CAAC,OAAgB,EAAE,IAAuB;QAChE,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACrD,MAAM,SAAS,GAA6B,EAAE,CAAA;QAC9C,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAA;QAEnC,MAAM,KAAK,GAAG,CAAC,IAAiB,EAAQ,EAAE;YACzC,IAAI,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC5B,wFAAwF;gBACxF,oFAAoF;gBACpF,uFAAuF;gBACvF,iFAAiF;gBACjF,mFAAmF;gBACnF,2EAA2E;gBAC3E,MAAM,IAAI,GAAG;oBACZ,KAAK,EAAE,aAAa,GAAG,IAAI,CAAC,KAAK;oBACjC,GAAG,EAAE,aAAa,GAAG,IAAI,CAAC,GAAG;oBAC7B,IAAI,EAAE,IAAI,CAAC,KAAK;iBACG,CAAA;gBACpB,SAAS,CAAC,IAAI,CAAC;oBACd,IAAI;oBACJ,SAAS,EAAE,IAAI,CAAC,GAAG;oBACnB,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,MAAM,EAAE,QAAQ;oBAChB,SAAS,EAAE,GAAG,CAAC,EAAE;oBACjB,OAAO;iBACP,CAAC,CAAA;YACH,CAAC;YACD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,QAAQ;gBAAE,KAAK,CAAC,KAAK,CAAC,CAAA;QAChD,CAAC,CAAA;QAED,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,CAAA;QAC1C,OAAO,SAAS,CAAA;IACjB,CAAC;IAED,OAAO;QACN,EAAE,EAAE,GAAG,CAAC,EAAE;QACV,KAAK;QACL,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,CAAC,GAAG,CAAC;QAC7B,QAAQ;KACR,CAAA;AACF,CAAC"}
@@ -0,0 +1,67 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * SentencePiece tokenizer wrapper with char-offset realignment.
7
+ *
8
+ * `@sctg/sentencepiece-js` is a pure-JS port of the unigram SentencePiece algorithm and produces
9
+ * pieces + ids but NOT offsets. The TS layer reconstructs offsets by walking the input string
10
+ * alongside the emitted pieces; this gives us the `[start, end)` char ranges the BIO decoder
11
+ * needs to map labels back to substrings.
12
+ *
13
+ * Offset reconstruction algorithm:
14
+ *
15
+ * - SentencePiece prepends `▁` (U+2581) to the first piece of each word (and to the first piece of
16
+ * the input). Pieces without `▁` are continuations of the current word.
17
+ * - When a piece starts with `▁`, consume any whitespace from the input before counting it.
18
+ * - The piece's actual chars (the piece minus a leading `▁`) advance the input cursor by that many
19
+ * code units. SentencePiece operates on Unicode codepoints, but since addresses are almost
20
+ * entirely BMP characters, JS code-unit indexing is correct in practice. Surrogate- pair
21
+ * codepoints would need `Array.from(s).length` accounting; deferred until the parity test
22
+ * surfaces a real case.
23
+ * - Byte-fallback pieces (`<0xHH>`) are not handled here. The v0.1.0 corpus and golden set are
24
+ * Latin-script; the parity test will surface any unhandled cases.
25
+ *
26
+ * The wrapper supports two load modes:
27
+ *
28
+ * - `loadFromBase64(b64)` — for tests and Node usage where the model is read off disk and
29
+ * base64-encoded before being handed to the JS port.
30
+ * - `loadFromFile(path)` — convenience helper that does the read + b64 + load.
31
+ */
32
+ /** SentencePiece's word-boundary marker (U+2581 LOWER ONE EIGHTH BLOCK). */
33
+ export declare const SPACE_SENTINEL = "\u2581";
34
+ /** A tokenized piece paired with its char-range in the original input. */
35
+ export interface TokenizedPiece {
36
+ /** The piece exactly as the tokenizer emitted it (with `▁` preserved where present). */
37
+ piece: string;
38
+ /** The vocab id for this piece. */
39
+ id: number;
40
+ /** Inclusive start char offset in the original input. */
41
+ start: number;
42
+ /** Exclusive end char offset in the original input. */
43
+ end: number;
44
+ }
45
+ export interface EncodeResult {
46
+ pieces: TokenizedPiece[];
47
+ ids: number[];
48
+ }
49
+ export declare class MailwomanTokenizer {
50
+ private readonly processor;
51
+ private constructor();
52
+ /** Load from a base64-encoded `tokenizer.model`. Use for in-memory / test setups. */
53
+ static loadFromBase64(b64: string): Promise<MailwomanTokenizer>;
54
+ /** Load from a path to a `tokenizer.model` file on disk. Node-only convenience. */
55
+ static loadFromFile(modelPath: string): Promise<MailwomanTokenizer>;
56
+ /**
57
+ * Tokenize `text` to pieces + ids + realigned char offsets.
58
+ *
59
+ * The returned `pieces[i].piece` matches what the Python `sp.EncodeAsPieces(text)[i]` returns,
60
+ * and `pieces[i].id` matches `sp.EncodeAsIds(text)[i]`. The offsets are reconstructed in TS — see
61
+ * file header for the algorithm.
62
+ */
63
+ encode(text: string): EncodeResult;
64
+ /** Decode a list of ids back to a string. Delegates to the underlying processor. */
65
+ decode(ids: number[] | Int32Array): string;
66
+ }
67
+ //# sourceMappingURL=tokenizer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.d.ts","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAKH,4EAA4E;AAC5E,eAAO,MAAM,cAAc,WAAM,CAAA;AAEjC,0EAA0E;AAC1E,MAAM,WAAW,cAAc;IAC9B,wFAAwF;IACxF,KAAK,EAAE,MAAM,CAAA;IACb,mCAAmC;IACnC,EAAE,EAAE,MAAM,CAAA;IACV,yDAAyD;IACzD,KAAK,EAAE,MAAM,CAAA;IACb,uDAAuD;IACvD,GAAG,EAAE,MAAM,CAAA;CACX;AAED,MAAM,WAAW,YAAY;IAC5B,MAAM,EAAE,cAAc,EAAE,CAAA;IACxB,GAAG,EAAE,MAAM,EAAE,CAAA;CACb;AAED,qBAAa,kBAAkB;IACV,OAAO,CAAC,QAAQ,CAAC,SAAS;IAA9C,OAAO;IAEP,qFAAqF;WACxE,cAAc,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAMrE,mFAAmF;WACtE,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAKzE;;;;;;OAMG;IACH,MAAM,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY;IA2BlC,oFAAoF;IACpF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,UAAU,GAAG,MAAM;CAI1C"}
@@ -0,0 +1,86 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * SentencePiece tokenizer wrapper with char-offset realignment.
7
+ *
8
+ * `@sctg/sentencepiece-js` is a pure-JS port of the unigram SentencePiece algorithm and produces
9
+ * pieces + ids but NOT offsets. The TS layer reconstructs offsets by walking the input string
10
+ * alongside the emitted pieces; this gives us the `[start, end)` char ranges the BIO decoder
11
+ * needs to map labels back to substrings.
12
+ *
13
+ * Offset reconstruction algorithm:
14
+ *
15
+ * - SentencePiece prepends `▁` (U+2581) to the first piece of each word (and to the first piece of
16
+ * the input). Pieces without `▁` are continuations of the current word.
17
+ * - When a piece starts with `▁`, consume any whitespace from the input before counting it.
18
+ * - The piece's actual chars (the piece minus a leading `▁`) advance the input cursor by that many
19
+ * code units. SentencePiece operates on Unicode codepoints, but since addresses are almost
20
+ * entirely BMP characters, JS code-unit indexing is correct in practice. Surrogate- pair
21
+ * codepoints would need `Array.from(s).length` accounting; deferred until the parity test
22
+ * surfaces a real case.
23
+ * - Byte-fallback pieces (`<0xHH>`) are not handled here. The v0.1.0 corpus and golden set are
24
+ * Latin-script; the parity test will surface any unhandled cases.
25
+ *
26
+ * The wrapper supports two load modes:
27
+ *
28
+ * - `loadFromBase64(b64)` — for tests and Node usage where the model is read off disk and
29
+ * base64-encoded before being handed to the JS port.
30
+ * - `loadFromFile(path)` — convenience helper that does the read + b64 + load.
31
+ */
32
+ import { SentencePieceProcessor } from "@sctg/sentencepiece-js";
33
+ import { promises as fs } from "node:fs";
34
+ /** SentencePiece's word-boundary marker (U+2581 LOWER ONE EIGHTH BLOCK). */
35
+ export const SPACE_SENTINEL = "▁";
36
+ export class MailwomanTokenizer {
37
+ processor;
38
+ constructor(processor) {
39
+ this.processor = processor;
40
+ }
41
+ /** Load from a base64-encoded `tokenizer.model`. Use for in-memory / test setups. */
42
+ static async loadFromBase64(b64) {
43
+ const processor = new SentencePieceProcessor();
44
+ await processor.loadFromB64StringModel(b64);
45
+ return new MailwomanTokenizer(processor);
46
+ }
47
+ /** Load from a path to a `tokenizer.model` file on disk. Node-only convenience. */
48
+ static async loadFromFile(modelPath) {
49
+ const buf = await fs.readFile(modelPath);
50
+ return MailwomanTokenizer.loadFromBase64(buf.toString("base64"));
51
+ }
52
+ /**
53
+ * Tokenize `text` to pieces + ids + realigned char offsets.
54
+ *
55
+ * The returned `pieces[i].piece` matches what the Python `sp.EncodeAsPieces(text)[i]` returns,
56
+ * and `pieces[i].id` matches `sp.EncodeAsIds(text)[i]`. The offsets are reconstructed in TS — see
57
+ * file header for the algorithm.
58
+ */
59
+ encode(text) {
60
+ const pieces = this.processor.encodePieces(text);
61
+ const ids = this.processor.encodeIds(text);
62
+ const tokenized = [];
63
+ let cursor = 0;
64
+ for (let i = 0; i < pieces.length; i++) {
65
+ const piece = pieces[i];
66
+ const id = ids[i] ?? -1;
67
+ const hasSentinel = piece.startsWith(SPACE_SENTINEL);
68
+ const literal = hasSentinel ? piece.slice(SPACE_SENTINEL.length) : piece;
69
+ if (hasSentinel) {
70
+ while (cursor < text.length && /\s/.test(text[cursor]))
71
+ cursor++;
72
+ }
73
+ const start = cursor;
74
+ cursor += literal.length;
75
+ const end = cursor;
76
+ tokenized.push({ piece, id, start, end });
77
+ }
78
+ return { pieces: tokenized, ids };
79
+ }
80
+ /** Decode a list of ids back to a string. Delegates to the underlying processor. */
81
+ decode(ids) {
82
+ const arr = ids instanceof Int32Array ? ids : Int32Array.from(ids);
83
+ return this.processor.decodeIds(arr);
84
+ }
85
+ }
86
+ //# sourceMappingURL=tokenizer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"tokenizer.js","sourceRoot":"","sources":["../tokenizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,sBAAsB,EAAE,MAAM,wBAAwB,CAAA;AAC/D,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAA;AAExC,4EAA4E;AAC5E,MAAM,CAAC,MAAM,cAAc,GAAG,GAAG,CAAA;AAmBjC,MAAM,OAAO,kBAAkB;IACO;IAArC,YAAqC,SAAiC;QAAjC,cAAS,GAAT,SAAS,CAAwB;IAAG,CAAC;IAE1E,qFAAqF;IACrF,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,GAAW;QACtC,MAAM,SAAS,GAAG,IAAI,sBAAsB,EAAE,CAAA;QAC9C,MAAM,SAAS,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAA;QAC3C,OAAO,IAAI,kBAAkB,CAAC,SAAS,CAAC,CAAA;IACzC,CAAC;IAED,mFAAmF;IACnF,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,SAAiB;QAC1C,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAA;QACxC,OAAO,kBAAkB,CAAC,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACjE,CAAC;IAED;;;;;;OAMG;IACH,MAAM,CAAC,IAAY;QAClB,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,YAAY,CAAC,IAAI,CAAC,CAAA;QAChD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAE1C,MAAM,SAAS,GAAqB,EAAE,CAAA;QACtC,IAAI,MAAM,GAAG,CAAC,CAAA;QAEd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACxC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAE,CAAA;YACxB,MAAM,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAA;YACvB,MAAM,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,cAAc,CAAC,CAAA;YACpD,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;YAExE,IAAI,WAAW,EAAE,CAAC;gBACjB,OAAO,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAE,CAAC;oBAAE,MAAM,EAAE,CAAA;YAClE,CAAC;YAED,MAAM,KAAK,GAAG,MAAM,CAAA;YACpB,MAAM,IAAI,OAAO,CAAC,MAAM,CAAA;YACxB,MAAM,GAAG,GAAG,MAAM,CAAA;YAElB,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAA;QAC1C,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,EAAE,CAAA;IAClC,CAAC;IAED,oFAAoF;IACpF,MAAM,CAAC,GAA0B;QAChC,MAAM,GAAG,GAAG,GAAG,YAAY,UAAU,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;QAClE,OAAO,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,GAAG,CAAW,CAAA;IAC/C,CAAC;CACD"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Per-package vitest config that resolves @mailwoman/core subpath imports to source. Mirrors the
7
+ * layout used in core/vitest.config.ts.
8
+ */
9
+ declare const _default: import("vite").UserConfig;
10
+ export default _default;
11
+ //# sourceMappingURL=vitest.config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vitest.config.d.ts","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;;AAUH,wBAiBE"}
@@ -0,0 +1,32 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Per-package vitest config that resolves @mailwoman/core subpath imports to source. Mirrors the
7
+ * layout used in core/vitest.config.ts.
8
+ */
9
+ /// <reference types="vitest/config" />
10
+ import { resolve } from "node:path";
11
+ import { fileURLToPath } from "node:url";
12
+ import { defineConfig } from "vite";
13
+ const here = fileURLToPath(new URL(".", import.meta.url));
14
+ export default defineConfig({
15
+ resolve: {
16
+ alias: [
17
+ // Sub-subpath alias for the pure proposal-pipeline module — avoids dragging in
18
+ // AddressParser → classification → tokenization → libpostal init cascade.
19
+ {
20
+ find: "@mailwoman/core/parser/proposal-pipeline",
21
+ replacement: resolve(here, "../core/parser/proposal-pipeline.ts"),
22
+ },
23
+ { find: /^@mailwoman\/core\/(.+)$/, replacement: resolve(here, "../core/$1/index.ts") },
24
+ { find: /^@mailwoman\/core$/, replacement: resolve(here, "../core/index.ts") },
25
+ ],
26
+ },
27
+ test: {
28
+ isolate: false,
29
+ exclude: ["**/node_modules/**", "**/out/**", "**/dist/**"],
30
+ },
31
+ });
32
+ //# sourceMappingURL=vitest.config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"vitest.config.js","sourceRoot":"","sources":["../vitest.config.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,uCAAuC;AAEvC,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AACnC,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAA;AACxC,OAAO,EAAE,YAAY,EAAE,MAAM,MAAM,CAAA;AAEnC,MAAM,IAAI,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAA;AAEzD,eAAe,YAAY,CAAC;IAC3B,OAAO,EAAE;QACR,KAAK,EAAE;YACN,+EAA+E;YAC/E,0EAA0E;YAC1E;gBACC,IAAI,EAAE,0CAA0C;gBAChD,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qCAAqC,CAAC;aACjE;YACD,EAAE,IAAI,EAAE,0BAA0B,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,qBAAqB,CAAC,EAAE;YACvF,EAAE,IAAI,EAAE,oBAAoB,EAAE,WAAW,EAAE,OAAO,CAAC,IAAI,EAAE,kBAAkB,CAAC,EAAE;SAC9E;KACD;IACD,IAAI,EAAE;QACL,OAAO,EAAE,KAAK;QACd,OAAO,EAAE,CAAC,oBAAoB,EAAE,WAAW,EAAE,YAAY,CAAC;KAC1D;CACD,CAAC,CAAA"}
@@ -0,0 +1,39 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Weight-package resolution.
7
+ *
8
+ * The `@mailwoman/neural-weights-<locale>` packages ship the `model.onnx` + `tokenizer.model` files
9
+ * declared in their `files` array. At install time npm bundles those files alongside the
10
+ * package.json; at runtime we locate them by resolving the package.json then walking sideways.
11
+ *
12
+ * Local development gotcha: the weights packages in the monorepo carry only metadata (package.json
13
+ *
14
+ * - README.md + model-card.json). The actual binary files are produced by Phase 2 training and copied
15
+ * in at publish time. To run the neural classifier locally without publishing, either:
16
+ *
17
+ * 1. Pass explicit `modelPath` + `tokenizerPath` to `loadFromWeights`, or
18
+ * 2. Symlink the dev model files into the weights package directory — see
19
+ * `scripts/link-dev-weights.sh` in each weights package.
20
+ *
21
+ * The resolver checks for both files and throws a single actionable error when neither is findable,
22
+ * naming all the paths it tried.
23
+ */
24
+ export interface ResolveWeightsOpts {
25
+ /** BCP-47-ish locale tag, e.g. "en-us" or "fr-fr". Used to pick the weights package. */
26
+ locale?: string;
27
+ /** Explicit model.onnx path; takes precedence over package auto-resolve. */
28
+ modelPath?: string;
29
+ /** Explicit tokenizer.model path; takes precedence over package auto-resolve. */
30
+ tokenizerPath?: string;
31
+ }
32
+ export interface ResolvedWeights {
33
+ modelPath: string;
34
+ tokenizerPath: string;
35
+ /** "explicit" if both paths came from opts; "package:<name>" if resolved via require.resolve. */
36
+ source: string;
37
+ }
38
+ export declare function resolveWeights(opts: ResolveWeightsOpts): ResolvedWeights;
39
+ //# sourceMappingURL=weights.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"weights.d.ts","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAQH,MAAM,WAAW,kBAAkB;IAClC,wFAAwF;IACxF,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,4EAA4E;IAC5E,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,iFAAiF;IACjF,aAAa,CAAC,EAAE,MAAM,CAAA;CACtB;AAED,MAAM,WAAW,eAAe;IAC/B,SAAS,EAAE,MAAM,CAAA;IACjB,aAAa,EAAE,MAAM,CAAA;IACrB,iGAAiG;IACjG,MAAM,EAAE,MAAM,CAAA;CACd;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,kBAAkB,GAAG,eAAe,CAoCxE"}
package/out/weights.js ADDED
@@ -0,0 +1,59 @@
1
+ /**
2
+ * @copyright Sister Software
3
+ * @license AGPL-3.0
4
+ * @author Teffen Ellis, et al.
5
+ *
6
+ * Weight-package resolution.
7
+ *
8
+ * The `@mailwoman/neural-weights-<locale>` packages ship the `model.onnx` + `tokenizer.model` files
9
+ * declared in their `files` array. At install time npm bundles those files alongside the
10
+ * package.json; at runtime we locate them by resolving the package.json then walking sideways.
11
+ *
12
+ * Local development gotcha: the weights packages in the monorepo carry only metadata (package.json
13
+ *
14
+ * - README.md + model-card.json). The actual binary files are produced by Phase 2 training and copied
15
+ * in at publish time. To run the neural classifier locally without publishing, either:
16
+ *
17
+ * 1. Pass explicit `modelPath` + `tokenizerPath` to `loadFromWeights`, or
18
+ * 2. Symlink the dev model files into the weights package directory — see
19
+ * `scripts/link-dev-weights.sh` in each weights package.
20
+ *
21
+ * The resolver checks for both files and throws a single actionable error when neither is findable,
22
+ * naming all the paths it tried.
23
+ */
24
+ import { existsSync } from "node:fs";
25
+ import { createRequire } from "node:module";
26
+ import { dirname, resolve } from "node:path";
27
+ const req = createRequire(import.meta.url);
28
+ export function resolveWeights(opts) {
29
+ const tried = [];
30
+ if (opts.modelPath && opts.tokenizerPath) {
31
+ if (!existsSync(opts.modelPath))
32
+ throw new Error(`Explicit modelPath does not exist: ${opts.modelPath}`);
33
+ if (!existsSync(opts.tokenizerPath))
34
+ throw new Error(`Explicit tokenizerPath does not exist: ${opts.tokenizerPath}`);
35
+ return { modelPath: opts.modelPath, tokenizerPath: opts.tokenizerPath, source: "explicit" };
36
+ }
37
+ const locale = opts.locale ?? "en-us";
38
+ const packageName = `@mailwoman/neural-weights-${locale}`;
39
+ let packageDir;
40
+ try {
41
+ const pkgJsonPath = req.resolve(`${packageName}/package.json`);
42
+ packageDir = dirname(pkgJsonPath);
43
+ }
44
+ catch {
45
+ throw new Error(`Could not resolve ${packageName}. Install it via: npm install ${packageName}\n` +
46
+ `Or pass --model + --tokenizer with explicit paths.`);
47
+ }
48
+ const modelPath = opts.modelPath ?? resolve(packageDir, "model.onnx");
49
+ const tokenizerPath = opts.tokenizerPath ?? resolve(packageDir, "tokenizer.model");
50
+ tried.push(modelPath, tokenizerPath);
51
+ if (!existsSync(modelPath) || !existsSync(tokenizerPath)) {
52
+ throw new Error(`Weights package ${packageName} resolved at ${packageDir} but is missing model files.\n` +
53
+ `Tried:\n ${tried.join("\n ")}\n` +
54
+ `Run \`scripts/link-dev-weights.sh\` inside the package to symlink dev weights, ` +
55
+ `or pass --model + --tokenizer with explicit paths.`);
56
+ }
57
+ return { modelPath, tokenizerPath, source: `package:${packageName}` };
58
+ }
59
+ //# sourceMappingURL=weights.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"weights.js","sourceRoot":"","sources":["../weights.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAA;AAC3C,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAA;AAE5C,MAAM,GAAG,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;AAkB1C,MAAM,UAAU,cAAc,CAAC,IAAwB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAA;IAE1B,IAAI,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;QAC1C,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,sCAAsC,IAAI,CAAC,SAAS,EAAE,CAAC,CAAA;QACxG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,aAAa,CAAC;YAAE,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,aAAa,EAAE,CAAC,CAAA;QACpH,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,aAAa,EAAE,IAAI,CAAC,aAAa,EAAE,MAAM,EAAE,UAAU,EAAE,CAAA;IAC5F,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,IAAI,OAAO,CAAA;IACrC,MAAM,WAAW,GAAG,6BAA6B,MAAM,EAAE,CAAA;IACzD,IAAI,UAAkB,CAAA;IACtB,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,WAAW,eAAe,CAAC,CAAA;QAC9D,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACR,MAAM,IAAI,KAAK,CACd,qBAAqB,WAAW,iCAAiC,WAAW,IAAI;YAC/E,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,UAAU,EAAE,YAAY,CAAC,CAAA;IACrE,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,IAAI,OAAO,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAA;IAClF,KAAK,CAAC,IAAI,CAAC,SAAS,EAAE,aAAa,CAAC,CAAA;IAEpC,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CACd,mBAAmB,WAAW,gBAAgB,UAAU,gCAAgC;YACvF,aAAa,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI;YACnC,iFAAiF;YACjF,oDAAoD,CACrD,CAAA;IACF,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,MAAM,EAAE,WAAW,WAAW,EAAE,EAAE,CAAA;AACtE,CAAC"}
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@mailwoman/neural",
3
+ "version": "2.0.0",
4
+ "description": "Mailwoman neural classifier runtime: SentencePiece tokenizer + ONNX inference + decoder wiring.",
5
+ "license": "AGPL-3.0-only",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/sister-software/mailwoman.git",
9
+ "directory": "neural"
10
+ },
11
+ "type": "module",
12
+ "exports": {
13
+ "./package.json": "./package.json",
14
+ ".": "./out/index.js",
15
+ "./tokenizer": "./out/tokenizer.js"
16
+ },
17
+ "dependencies": {
18
+ "@mailwoman/core": "workspace:*",
19
+ "@sctg/sentencepiece-js": "^1.3.3",
20
+ "onnxruntime-node": "^1.26.0"
21
+ },
22
+ "files": [
23
+ "out/**/*.js",
24
+ "out/**/*.js.map",
25
+ "out/**/*.d.ts",
26
+ "out/**/*.d.ts.map"
27
+ ],
28
+ "publishConfig": {
29
+ "access": "public"
30
+ }
31
+ }