@raeven-co/sether-ner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,80 @@
1
+ # @raeven-co/sether-ner
2
+
3
+ > Free-text NER redaction — **names, organisations, locations** — for
4
+ > [Sether](https://www.npmjs.com/package/@raeven-co/sether). The part regex
5
+ > can't do, shipped as a separate, lazy-loaded package so the core stays ~35 KB.
6
+
7
+ The core Sether detectors catch *structured* PII (emails, cards, SSNs, keys) and
8
+ *label-anchored* identity (`Name:`, `DOB:`). This package adds the hard part:
9
+ unlabelled people, companies, and places in running prose — the thing a
10
+ customer's own weekend build and the regex-only competitors can't replicate.
11
+
12
+ ## Why a separate package
13
+
14
+ - The model + ONNX runtime is ~30 MB+. Keeping it out of the core means
15
+ `new Sether()` stays tiny and dependency-light.
16
+ - NER is **async** and runs on **full outbound text** (the prompt you're about to
17
+ send), not the streaming response — so it's a different integration point than
18
+ the sync, chunk-boundary-safe core detectors. This package is honest about that.
19
+ - `@huggingface/transformers` is an **optional peer dependency** — install it only
20
+ if you use the default model. Bring your own inferer (e.g. GLiNER) and you don't
21
+ need it at all.
22
+
23
+ ## Install
24
+
25
+ ```bash
26
+ npm install @raeven-co/sether-ner @huggingface/transformers
27
+ ```
28
+
29
+ ## Use
30
+
31
+ ```ts
32
+ import { Sether, redactSync, basicDetectors } from '@raeven-co/sether';
33
+ import { createNerRedactor } from '@raeven-co/sether-ner';
34
+
35
+ const sether = new Sether();
36
+ const ner = createNerRedactor(); // Xenova/bert-base-NER, lazy-loaded on first call
37
+
38
+ // Outbound path: NER first (names/orgs/locations), then structured PII — one vault.
39
+ const { redacted } = await ner.redact(userPrompt, { vault: sether.vault });
40
+ const safe = redactSync(redacted, { detectors: basicDetectors, vault: sether.vault });
41
+
42
+ // Send `safe` to the LLM. On the reply, sether.restore() swaps BOTH token sets
43
+ // back, because NER tokens use the same `<TYPE_uuid>` format the core restores.
44
+ ```
45
+
46
+ NER tokens look like `<NAME_…>`, `<ORG_…>`, `<LOCATION_…>` and restore through the
47
+ core's `restore()` / `createRestoreStream()` with no extra wiring.
48
+
49
+ ## Options
50
+
51
+ ```ts
52
+ createNerRedactor({
53
+ model: 'Xenova/bert-base-NER', // any transformers.js token-classification model
54
+ threshold: 0.6, // min confidence
55
+ labels: ['NAME', 'ORG'], // restrict which types to redact
56
+ infer: myGlinerInferer, // bring your own model / service / mock
57
+ });
58
+ ```
59
+
60
+ ### Bring your own model (e.g. GLiNER)
61
+
62
+ `infer` is `(text) => Promise<RawEntity[]>` where each entity has
63
+ `{ entity_group, score, start, end }` (absolute char offsets). Map your model's
64
+ output to that shape and the rest of the pipeline is identical — which also makes
65
+ the whole redactor unit-testable without downloading a model.
66
+
67
+ ## Honest limitations
68
+
69
+ - **First call is slow** — the model downloads (~30 MB+) and warms up. Call
70
+ `ner.warmup()` at boot. ONNX inference runs ~50% slower than PyTorch.
71
+ - **Not on the streaming hot path.** NER is a batched forward pass; it runs on the
72
+ outbound prompt, not per-chunk on the response. Restoration of the response is
73
+ the core's job (sync, chunk-boundary-safe).
74
+ - **Accuracy is model-bound.** `bert-base-NER` is solid for Western names/orgs;
75
+ multilingual and domain names need a better model (swap via `model`/`infer`).
76
+ We'd rather you know that than oversell it.
77
+
78
+ ## License
79
+
80
+ MIT © Godfrey Lebo / Raeven, Inc.
package/dist/index.cjs ADDED
@@ -0,0 +1,148 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ NER_LABELS: () => NER_LABELS,
24
+ buildRedactor: () => buildRedactor,
25
+ createNerRedactor: () => createNerRedactor,
26
+ createTransformersInfer: () => createTransformersInfer
27
+ });
28
+ module.exports = __toCommonJS(index_exports);
29
+
30
+ // src/redactor.ts
31
+ var import_node_crypto = require("crypto");
32
+
33
+ // src/types.ts
34
+ var NER_LABELS = ["NAME", "ORG", "LOCATION"];
35
+
36
+ // src/redactor.ts
37
+ var LABEL_MAP = {
38
+ PER: "NAME",
39
+ PERSON: "NAME",
40
+ NAME: "NAME",
41
+ ORG: "ORG",
42
+ ORGANIZATION: "ORG",
43
+ ORGANISATION: "ORG",
44
+ LOC: "LOCATION",
45
+ LOCATION: "LOCATION",
46
+ GPE: "LOCATION"
47
+ };
48
+ function mapLabel(group) {
49
+ return LABEL_MAP[group.toUpperCase().replace(/^[BI]-/, "")] ?? null;
50
+ }
51
+ function buildRedactor(infer, options = {}) {
52
+ const threshold = options.threshold ?? 0.6;
53
+ const enabled = new Set(options.labels ?? NER_LABELS);
54
+ const uuid = options.uuid ?? import_node_crypto.randomUUID;
55
+ async function detect(text) {
56
+ if (!text.trim()) return [];
57
+ const raw = await infer(text);
58
+ return resolveOverlaps(toMatches(raw, text, threshold, enabled));
59
+ }
60
+ async function redact(text, opts) {
61
+ const matches = await detect(text);
62
+ if (matches.length === 0) return { redacted: text, matches };
63
+ let out = "";
64
+ let pos = 0;
65
+ for (const m of matches) {
66
+ out += text.slice(pos, m.start);
67
+ const token = `<${m.type}_${uuid()}>`;
68
+ opts.vault.set(token, m.value);
69
+ out += token;
70
+ pos = m.end;
71
+ }
72
+ out += text.slice(pos);
73
+ return { redacted: out, matches };
74
+ }
75
+ async function warmup() {
76
+ await infer("Warm up the model.");
77
+ }
78
+ return { detect, redact, warmup };
79
+ }
80
+ function toMatches(raw, text, threshold, enabled) {
81
+ const out = [];
82
+ for (const e of raw) {
83
+ if (e.score < threshold) continue;
84
+ if (e.start == null || e.end == null || e.end <= e.start) continue;
85
+ const type = mapLabel(e.entity_group);
86
+ if (!type || !enabled.has(type)) continue;
87
+ const value = text.slice(e.start, e.end);
88
+ if (!value.trim()) continue;
89
+ out.push({ type, value, start: e.start, end: e.end, score: e.score });
90
+ }
91
+ return out;
92
+ }
93
+ function resolveOverlaps(matches) {
94
+ matches.sort((a, b) => a.start - b.start || b.score - a.score || b.end - b.start - (a.end - a.start));
95
+ const out = [];
96
+ for (const m of matches) {
97
+ if (!out.some((o) => m.start < o.end && o.start < m.end)) out.push(m);
98
+ }
99
+ return out.sort((a, b) => a.start - b.start);
100
+ }
101
+
102
+ // src/infer.ts
103
+ var dynamicImport = (specifier) => Function("s", "return import(s)")(specifier);
104
+ function createTransformersInfer(options = {}) {
105
+ const model = options.model ?? "Xenova/bert-base-NER";
106
+ let pipePromise = null;
107
+ async function getPipe() {
108
+ if (!pipePromise) {
109
+ pipePromise = (async () => {
110
+ let mod;
111
+ try {
112
+ mod = await dynamicImport("@huggingface/transformers");
113
+ } catch {
114
+ throw new Error(
115
+ "@raeven-co/sether-ner: install the optional peer '@huggingface/transformers' to use the default model, or pass your own { infer } to createNerRedactor()."
116
+ );
117
+ }
118
+ return mod.pipeline("token-classification", model);
119
+ })();
120
+ }
121
+ return pipePromise;
122
+ }
123
+ return async (text) => {
124
+ const pipe = await getPipe();
125
+ const out = await pipe(text, { aggregation_strategy: "simple" });
126
+ return out.map((e) => ({
127
+ entity_group: e.entity_group ?? e.entity ?? "MISC",
128
+ score: typeof e.score === "number" ? e.score : 0,
129
+ start: e.start,
130
+ end: e.end,
131
+ word: e.word
132
+ }));
133
+ };
134
+ }
135
+
136
+ // src/index.ts
137
+ function createNerRedactor(options = {}) {
138
+ const infer = options.infer ?? createTransformersInfer(options);
139
+ return buildRedactor(infer, options);
140
+ }
141
+ // Annotate the CommonJS export names for ESM import in node:
142
+ 0 && (module.exports = {
143
+ NER_LABELS,
144
+ buildRedactor,
145
+ createNerRedactor,
146
+ createTransformersInfer
147
+ });
148
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts","../src/redactor.ts","../src/types.ts","../src/infer.ts"],"sourcesContent":["import { buildRedactor } from './redactor.js';\nimport { createTransformersInfer } from './infer.js';\nimport type { NerOptions, NerRedactor } from './types.js';\n\n/**\n * Create a free-text NER redactor for names, organisations, and locations.\n *\n * ```ts\n * import { Sether, redactSync, basicDetectors } from '@raeven-co/sether';\n * import { createNerRedactor } from '@raeven-co/sether-ner';\n *\n * const sether = new Sether();\n * const ner = createNerRedactor(); // uses Xenova/bert-base-NER, lazy-loaded\n *\n * // Outbound: NER first (names/orgs/locations), then structured PII — same vault.\n * const { redacted } = await ner.redact(prompt, { vault: sether.vault });\n * const safe = redactSync(redacted, { detectors: basicDetectors, vault: sether.vault });\n * // ...send `safe` to the LLM. sether.restore() then swaps BOTH token sets back,\n * // because NER tokens use the same `<TYPE_uuid>` format.\n * ```\n *\n * Pass `{ infer }` to use GLiNER or any other model. NER runs on the full\n * outbound text — not the streaming response.\n */\nexport function createNerRedactor(options: NerOptions = {}): NerRedactor {\n const infer = options.infer ?? createTransformersInfer(options);\n return buildRedactor(infer, options);\n}\n\nexport { buildRedactor } from './redactor.js';\nexport { createTransformersInfer } from './infer.js';\nexport { NER_LABELS } from './types.js';\nexport type {\n NerLabel,\n NerMatch,\n NerOptions,\n NerRedactor,\n InferFn,\n RawEntity,\n VaultLike,\n RedactResult,\n} from './types.js';\n","import { randomUUID } from 'node:crypto';\nimport {\n NER_LABELS,\n type InferFn,\n type NerLabel,\n type NerMatch,\n type NerOptions,\n type NerRedactor,\n type RawEntity,\n type RedactResult,\n type VaultLike,\n} from './types.js';\n\n// Map model label groups (and B-/I- prefixes) to Sether's three NER types.\nconst LABEL_MAP: Record<string, NerLabel> = {\n PER: 'NAME',\n PERSON: 'NAME',\n NAME: 'NAME',\n ORG: 'ORG',\n ORGANIZATION: 'ORG',\n ORGANISATION: 'ORG',\n LOC: 'LOCATION',\n LOCATION: 'LOCATION',\n GPE: 'LOCATION',\n};\n\nfunction mapLabel(group: string): NerLabel | null {\n return LABEL_MAP[group.toUpperCase().replace(/^[BI]-/, '')] ?? null;\n}\n\n/**\n * Build a redactor from an injected inferer. This is the pure, model-free core —\n * fully unit-testable with a mock `infer`. The default model wiring lives in\n * index.ts so this module carries no heavy dependency.\n */\nexport function buildRedactor(infer: InferFn, options: NerOptions = {}): NerRedactor {\n const threshold = options.threshold ?? 0.6;\n const enabled = new Set<NerLabel>(options.labels ?? NER_LABELS);\n const uuid = options.uuid ?? randomUUID;\n\n async function detect(text: string): Promise<NerMatch[]> {\n if (!text.trim()) return [];\n const raw = await infer(text);\n return resolveOverlaps(toMatches(raw, text, threshold, enabled));\n }\n\n async function redact(text: string, opts: { vault: VaultLike }): Promise<RedactResult> {\n const matches = await detect(text);\n if (matches.length === 0) return { redacted: text, matches };\n\n let out = '';\n let pos = 0;\n for (const m of matches) {\n out += text.slice(pos, m.start);\n const token = `<${m.type}_${uuid()}>`;\n opts.vault.set(token, m.value);\n out += token;\n pos = m.end;\n }\n out += text.slice(pos);\n return { redacted: out, matches };\n }\n\n async function warmup(): Promise<void> {\n await infer('Warm up the model.');\n }\n\n return { detect, redact, warmup };\n}\n\nfunction toMatches(raw: RawEntity[], text: string, threshold: number, enabled: Set<NerLabel>): NerMatch[] {\n const out: NerMatch[] = [];\n for (const e of raw) {\n if (e.score < threshold) continue;\n if (e.start == null || e.end == null || e.end <= e.start) continue;\n const type = mapLabel(e.entity_group);\n if (!type || !enabled.has(type)) continue;\n const value = text.slice(e.start, e.end);\n if (!value.trim()) continue;\n out.push({ type, value, start: e.start, end: e.end, score: e.score });\n }\n return out;\n}\n\n// Keep the higher-score (then longer) match when spans overlap.\nfunction resolveOverlaps(matches: NerMatch[]): NerMatch[] {\n matches.sort((a, b) => a.start - b.start || b.score - a.score || b.end - b.start - (a.end - a.start));\n const out: NerMatch[] = [];\n for (const m of matches) {\n if (!out.some((o) => m.start < o.end && o.start < m.end)) out.push(m);\n }\n return out.sort((a, b) => a.start - b.start);\n}\n","export type NerLabel = 'NAME' | 'ORG' | 'LOCATION';\n\nexport const NER_LABELS: readonly NerLabel[] = ['NAME', 'ORG', 'LOCATION'];\n\n/**\n * A raw entity as produced by a token-classification pipeline\n * (transformers.js `aggregation_strategy: 'simple'`) or any compatible inferer.\n * Offsets are absolute char positions into the input; entities without offsets\n * are skipped (we never trust the reconstructed `word`, which can carry subword\n * artifacts — we always slice the original text by [start, end)).\n */\nexport interface RawEntity {\n entity_group: string; // e.g. 'PER' | 'ORG' | 'LOC' | 'person' | 'organization'\n score: number; // 0..1\n start?: number;\n end?: number;\n word?: string;\n}\n\nexport type InferFn = (text: string) => Promise<RawEntity[]>;\n\nexport interface NerMatch {\n type: NerLabel;\n value: string;\n start: number;\n end: number;\n score: number;\n}\n\n/** Minimal vault surface — structurally compatible with @raeven-co/sether's Vault. */\nexport interface VaultLike {\n set(token: string, value: string): void;\n}\n\nexport interface NerOptions {\n /** Model id for the default transformers.js inferer. Default: 'Xenova/bert-base-NER'. */\n model?: string;\n /** Minimum confidence to keep a match. Default: 0.6. */\n threshold?: number;\n /** Which entity types to redact. Default: NAME, ORG, LOCATION. */\n labels?: readonly NerLabel[];\n /** Inject a custom inferer (GLiNER, a remote service, or a test mock). */\n infer?: InferFn;\n /** Token id generator. Default: node:crypto randomUUID. */\n uuid?: () => string;\n}\n\nexport interface RedactResult {\n redacted: string;\n matches: NerMatch[];\n}\n\nexport interface NerRedactor {\n /** Detect names / organisations / locations in the text. */\n detect(text: string): Promise<NerMatch[]>;\n /**\n * Redact entities into `<TYPE_uuid>` tokens (the same format @raeven-co/sether\n * uses), storing originals in the vault so the core `restore()` swaps them back.\n * Runs on the full OUTBOUND text — not the response byte-stream.\n */\n redact(text: string, opts: { vault: VaultLike }): Promise<RedactResult>;\n /** Preload the model so the first real call isn't slow. */\n warmup(): Promise<void>;\n}\n","import type { InferFn, NerOptions, RawEntity } from './types.js';\n\ntype Pipe = (text: string, opts: { aggregation_strategy: string }) => Promise<unknown>;\n\ninterface RawPipelineEntity {\n entity_group?: string;\n entity?: string;\n score?: number;\n start?: number;\n end?: number;\n word?: string;\n}\n\n// Hide the specifier from the bundler and the type-checker so\n// @huggingface/transformers stays a TRULY optional peer dependency — it's only\n// needed at runtime if the default inferer actually runs.\nconst dynamicImport = (specifier: string): Promise<unknown> =>\n (Function('s', 'return import(s)') as (s: string) => Promise<unknown>)(specifier);\n\n/**\n * The default inferer: a lazy, memoized transformers.js token-classification\n * pipeline. The ~30 MB+ model is only fetched on first inference, so importing\n * this module stays cheap. Swap in GLiNER or a remote service by passing your\n * own `infer` to createNerRedactor().\n */\nexport function createTransformersInfer(options: NerOptions = {}): InferFn {\n const model = options.model ?? 'Xenova/bert-base-NER';\n let pipePromise: Promise<Pipe> | null = null;\n\n async function getPipe(): Promise<Pipe> {\n if (!pipePromise) {\n pipePromise = (async () => {\n let mod: { pipeline: (task: string, model: string) => Promise<Pipe> };\n try {\n mod = (await dynamicImport('@huggingface/transformers')) as typeof mod;\n } catch {\n throw new Error(\n \"@raeven-co/sether-ner: install the optional peer '@huggingface/transformers' to use the default model, or pass your own { infer } to createNerRedactor().\",\n );\n }\n return mod.pipeline('token-classification', model);\n })();\n }\n return pipePromise;\n }\n\n return async (text: string): Promise<RawEntity[]> => {\n const pipe = await getPipe();\n const out = (await pipe(text, { aggregation_strategy: 'simple' })) as RawPipelineEntity[];\n return out.map((e) => ({\n entity_group: e.entity_group ?? e.entity ?? 'MISC',\n score: typeof e.score === 'number' ? e.score : 0,\n start: e.start,\n end: e.end,\n word: e.word,\n }));\n };\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA2B;;;ACEpB,IAAM,aAAkC,CAAC,QAAQ,OAAO,UAAU;;;ADYzE,IAAM,YAAsC;AAAA,EAC1C,KAAK;AAAA,EACL,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,KAAK;AAAA,EACL,cAAc;AAAA,EACd,cAAc;AAAA,EACd,KAAK;AAAA,EACL,UAAU;AAAA,EACV,KAAK;AACP;AAEA,SAAS,SAAS,OAAgC;AAChD,SAAO,UAAU,MAAM,YAAY,EAAE,QAAQ,UAAU,EAAE,CAAC,KAAK;AACjE;AAOO,SAAS,cAAc,OAAgB,UAAsB,CAAC,GAAgB;AACnF,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,UAAU,IAAI,IAAc,QAAQ,UAAU,UAAU;AAC9D,QAAM,OAAO,QAAQ,QAAQ;AAE7B,iBAAe,OAAO,MAAmC;AACvD,QAAI,CAAC,KAAK,KAAK,EAAG,QAAO,CAAC;AAC1B,UAAM,MAAM,MAAM,MAAM,IAAI;AAC5B,WAAO,gBAAgB,UAAU,KAAK,MAAM,WAAW,OAAO,CAAC;AAAA,EACjE;AAEA,iBAAe,OAAO,MAAc,MAAmD;AACrF,UAAM,UAAU,MAAM,OAAO,IAAI;AACjC,QAAI,QAAQ,WAAW,EAAG,QAAO,EAAE,UAAU,MAAM,QAAQ;AAE3D,QAAI,MAAM;AACV,QAAI,MAAM;AACV,eAAW,KAAK,SAAS;AACvB,aAAO,KAAK,MAAM,KAAK,EAAE,KAAK;AAC9B,YAAM,QAAQ,IAAI,EAAE,IAAI,IAAI,KAAK,CAAC;AAClC,WAAK,MAAM,IAAI,OAAO,EAAE,KAAK;AAC7B,aAAO;AACP,YAAM,EAAE;AAAA,IACV;AACA,WAAO,KAAK,MAAM,GAAG;AACrB,WAAO,EAAE,UAAU,KAAK,QAAQ;AAAA,EAClC;AAEA,iBAAe,SAAwB;AACrC,UAAM,MAAM,oBAAoB;AAAA,EAClC;AAEA,SAAO,EAAE,QAAQ,QAAQ,OAAO;AAClC;AAEA,SAAS,UAAU,KAAkB,MAAc,WAAmB,SAAoC;AACxG,QAAM,MAAkB,CAAC;AACzB,aAAW,KAAK,KAAK;AACnB,QAAI,EAAE,QAAQ,UAAW;AACzB,QAAI,EAAE,SAAS,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,EAAE,MAAO;AAC1D,UAAM,OAAO,SAAS,EAAE,YAAY;AACpC,QAAI,CAAC,QAAQ,CAAC,QAAQ,IAAI,IAAI,EAAG;AACjC,UAAM,QAAQ,KAAK,MAAM,EAAE,OAAO,EAAE,GAAG;AACvC,QAAI,CAAC,MAAM,KAAK,EAAG;AACnB,QAAI,KAAK,EAAE,MAAM,OAAO,OAAO,EAAE,OAAO,KAAK,EAAE,KAAK,OAAO,EAAE,MAAM,CAAC;AAAA,EACtE;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,SAAiC;AACxD,UAAQ,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM;AACpG,QAAM,MAAkB,CAAC;AACzB,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAG,KAAI,KAAK,CAAC;AAAA,EACtE;AACA,SAAO,IAAI,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC7C;;;AE5EA,IAAM,gBAAgB,CAAC,cACpB,SAAS,KAAK,kBAAkB,EAAsC,SAAS;AAQ3E,SAAS,wBAAwB,UAAsB,CAAC,GAAY;AACzE,QAAM,QAAQ,QAAQ,SAAS;AAC/B,MAAI,cAAoC;AAExC,iBAAe,UAAyB;AACtC,QAAI,CAAC,aAAa;AAChB,qBAAe,YAAY;AACzB,YAAI;AACJ,YAAI;AACF,gBAAO,MAAM,cAAc,2BAA2B;AAAA,QACxD,QAAQ;AACN,gBAAM,IAAI;AAAA,YACR;AAAA,UACF;AAAA,QACF;AACA,eAAO,IAAI,SAAS,wBAAwB,KAAK;AAAA,MACnD,GAAG;AAAA,IACL;AACA,WAAO;AAAA,EACT;AAEA,SAAO,OAAO,SAAuC;AACnD,UAAM,OAAO,MAAM,QAAQ;AAC3B,UAAM,MAAO,MAAM,KAAK,MAAM,EAAE,sBAAsB,SAAS,CAAC;AAChE,WAAO,IAAI,IAAI,CAAC,OAAO;AAAA,MACrB,cAAc,EAAE,gBAAgB,EAAE,UAAU;AAAA,MAC5C,OAAO,OAAO,EAAE,UAAU,WAAW,EAAE,QAAQ;AAAA,MAC/C,OAAO,EAAE;AAAA,MACT,KAAK,EAAE;AAAA,MACP,MAAM,EAAE;AAAA,IACV,EAAE;AAAA,EACJ;AACF;;;AHjCO,SAAS,kBAAkB,UAAsB,CAAC,GAAgB;AACvE,QAAM,QAAQ,QAAQ,SAAS,wBAAwB,OAAO;AAC9D,SAAO,cAAc,OAAO,OAAO;AACrC;","names":[]}
@@ -0,0 +1,97 @@
1
+ type NerLabel = 'NAME' | 'ORG' | 'LOCATION';
2
+ declare const NER_LABELS: readonly NerLabel[];
3
+ /**
4
+ * A raw entity as produced by a token-classification pipeline
5
+ * (transformers.js `aggregation_strategy: 'simple'`) or any compatible inferer.
6
+ * Offsets are absolute char positions into the input; entities without offsets
7
+ * are skipped (we never trust the reconstructed `word`, which can carry subword
8
+ * artifacts — we always slice the original text by [start, end)).
9
+ */
10
+ interface RawEntity {
11
+ entity_group: string;
12
+ score: number;
13
+ start?: number;
14
+ end?: number;
15
+ word?: string;
16
+ }
17
+ type InferFn = (text: string) => Promise<RawEntity[]>;
18
+ interface NerMatch {
19
+ type: NerLabel;
20
+ value: string;
21
+ start: number;
22
+ end: number;
23
+ score: number;
24
+ }
25
+ /** Minimal vault surface — structurally compatible with @raeven-co/sether's Vault. */
26
+ interface VaultLike {
27
+ set(token: string, value: string): void;
28
+ }
29
+ interface NerOptions {
30
+ /** Model id for the default transformers.js inferer. Default: 'Xenova/bert-base-NER'. */
31
+ model?: string;
32
+ /** Minimum confidence to keep a match. Default: 0.6. */
33
+ threshold?: number;
34
+ /** Which entity types to redact. Default: NAME, ORG, LOCATION. */
35
+ labels?: readonly NerLabel[];
36
+ /** Inject a custom inferer (GLiNER, a remote service, or a test mock). */
37
+ infer?: InferFn;
38
+ /** Token id generator. Default: node:crypto randomUUID. */
39
+ uuid?: () => string;
40
+ }
41
+ interface RedactResult {
42
+ redacted: string;
43
+ matches: NerMatch[];
44
+ }
45
+ interface NerRedactor {
46
+ /** Detect names / organisations / locations in the text. */
47
+ detect(text: string): Promise<NerMatch[]>;
48
+ /**
49
+ * Redact entities into `<TYPE_uuid>` tokens (the same format @raeven-co/sether
50
+ * uses), storing originals in the vault so the core `restore()` swaps them back.
51
+ * Runs on the full OUTBOUND text — not the response byte-stream.
52
+ */
53
+ redact(text: string, opts: {
54
+ vault: VaultLike;
55
+ }): Promise<RedactResult>;
56
+ /** Preload the model so the first real call isn't slow. */
57
+ warmup(): Promise<void>;
58
+ }
59
+
60
+ /**
61
+ * Build a redactor from an injected inferer. This is the pure, model-free core —
62
+ * fully unit-testable with a mock `infer`. The default model wiring lives in
63
+ * index.ts so this module carries no heavy dependency.
64
+ */
65
+ declare function buildRedactor(infer: InferFn, options?: NerOptions): NerRedactor;
66
+
67
+ /**
68
+ * The default inferer: a lazy, memoized transformers.js token-classification
69
+ * pipeline. The ~30 MB+ model is only fetched on first inference, so importing
70
+ * this module stays cheap. Swap in GLiNER or a remote service by passing your
71
+ * own `infer` to createNerRedactor().
72
+ */
73
+ declare function createTransformersInfer(options?: NerOptions): InferFn;
74
+
75
+ /**
76
+ * Create a free-text NER redactor for names, organisations, and locations.
77
+ *
78
+ * ```ts
79
+ * import { Sether, redactSync, basicDetectors } from '@raeven-co/sether';
80
+ * import { createNerRedactor } from '@raeven-co/sether-ner';
81
+ *
82
+ * const sether = new Sether();
83
+ * const ner = createNerRedactor(); // uses Xenova/bert-base-NER, lazy-loaded
84
+ *
85
+ * // Outbound: NER first (names/orgs/locations), then structured PII — same vault.
86
+ * const { redacted } = await ner.redact(prompt, { vault: sether.vault });
87
+ * const safe = redactSync(redacted, { detectors: basicDetectors, vault: sether.vault });
88
+ * // ...send `safe` to the LLM. sether.restore() then swaps BOTH token sets back,
89
+ * // because NER tokens use the same `<TYPE_uuid>` format.
90
+ * ```
91
+ *
92
+ * Pass `{ infer }` to use GLiNER or any other model. NER runs on the full
93
+ * outbound text — not the streaming response.
94
+ */
95
+ declare function createNerRedactor(options?: NerOptions): NerRedactor;
96
+
97
+ export { type InferFn, NER_LABELS, type NerLabel, type NerMatch, type NerOptions, type NerRedactor, type RawEntity, type RedactResult, type VaultLike, buildRedactor, createNerRedactor, createTransformersInfer };
@@ -0,0 +1,97 @@
1
+ type NerLabel = 'NAME' | 'ORG' | 'LOCATION';
2
+ declare const NER_LABELS: readonly NerLabel[];
3
+ /**
4
+ * A raw entity as produced by a token-classification pipeline
5
+ * (transformers.js `aggregation_strategy: 'simple'`) or any compatible inferer.
6
+ * Offsets are absolute char positions into the input; entities without offsets
7
+ * are skipped (we never trust the reconstructed `word`, which can carry subword
8
+ * artifacts — we always slice the original text by [start, end)).
9
+ */
10
+ interface RawEntity {
11
+ entity_group: string;
12
+ score: number;
13
+ start?: number;
14
+ end?: number;
15
+ word?: string;
16
+ }
17
+ type InferFn = (text: string) => Promise<RawEntity[]>;
18
+ interface NerMatch {
19
+ type: NerLabel;
20
+ value: string;
21
+ start: number;
22
+ end: number;
23
+ score: number;
24
+ }
25
+ /** Minimal vault surface — structurally compatible with @raeven-co/sether's Vault. */
26
+ interface VaultLike {
27
+ set(token: string, value: string): void;
28
+ }
29
+ interface NerOptions {
30
+ /** Model id for the default transformers.js inferer. Default: 'Xenova/bert-base-NER'. */
31
+ model?: string;
32
+ /** Minimum confidence to keep a match. Default: 0.6. */
33
+ threshold?: number;
34
+ /** Which entity types to redact. Default: NAME, ORG, LOCATION. */
35
+ labels?: readonly NerLabel[];
36
+ /** Inject a custom inferer (GLiNER, a remote service, or a test mock). */
37
+ infer?: InferFn;
38
+ /** Token id generator. Default: node:crypto randomUUID. */
39
+ uuid?: () => string;
40
+ }
41
+ interface RedactResult {
42
+ redacted: string;
43
+ matches: NerMatch[];
44
+ }
45
+ interface NerRedactor {
46
+ /** Detect names / organisations / locations in the text. */
47
+ detect(text: string): Promise<NerMatch[]>;
48
+ /**
49
+ * Redact entities into `<TYPE_uuid>` tokens (the same format @raeven-co/sether
50
+ * uses), storing originals in the vault so the core `restore()` swaps them back.
51
+ * Runs on the full OUTBOUND text — not the response byte-stream.
52
+ */
53
+ redact(text: string, opts: {
54
+ vault: VaultLike;
55
+ }): Promise<RedactResult>;
56
+ /** Preload the model so the first real call isn't slow. */
57
+ warmup(): Promise<void>;
58
+ }
59
+
60
+ /**
61
+ * Build a redactor from an injected inferer. This is the pure, model-free core —
62
+ * fully unit-testable with a mock `infer`. The default model wiring lives in
63
+ * index.ts so this module carries no heavy dependency.
64
+ */
65
+ declare function buildRedactor(infer: InferFn, options?: NerOptions): NerRedactor;
66
+
67
+ /**
68
+ * The default inferer: a lazy, memoized transformers.js token-classification
69
+ * pipeline. The ~30 MB+ model is only fetched on first inference, so importing
70
+ * this module stays cheap. Swap in GLiNER or a remote service by passing your
71
+ * own `infer` to createNerRedactor().
72
+ */
73
+ declare function createTransformersInfer(options?: NerOptions): InferFn;
74
+
75
+ /**
76
+ * Create a free-text NER redactor for names, organisations, and locations.
77
+ *
78
+ * ```ts
79
+ * import { Sether, redactSync, basicDetectors } from '@raeven-co/sether';
80
+ * import { createNerRedactor } from '@raeven-co/sether-ner';
81
+ *
82
+ * const sether = new Sether();
83
+ * const ner = createNerRedactor(); // uses Xenova/bert-base-NER, lazy-loaded
84
+ *
85
+ * // Outbound: NER first (names/orgs/locations), then structured PII — same vault.
86
+ * const { redacted } = await ner.redact(prompt, { vault: sether.vault });
87
+ * const safe = redactSync(redacted, { detectors: basicDetectors, vault: sether.vault });
88
+ * // ...send `safe` to the LLM. sether.restore() then swaps BOTH token sets back,
89
+ * // because NER tokens use the same `<TYPE_uuid>` format.
90
+ * ```
91
+ *
92
+ * Pass `{ infer }` to use GLiNER or any other model. NER runs on the full
93
+ * outbound text — not the streaming response.
94
+ */
95
+ declare function createNerRedactor(options?: NerOptions): NerRedactor;
96
+
97
+ export { type InferFn, NER_LABELS, type NerLabel, type NerMatch, type NerOptions, type NerRedactor, type RawEntity, type RedactResult, type VaultLike, buildRedactor, createNerRedactor, createTransformersInfer };
package/dist/index.js ADDED
@@ -0,0 +1,118 @@
1
+ // src/redactor.ts
2
+ import { randomUUID } from "crypto";
3
+
4
+ // src/types.ts
5
+ var NER_LABELS = ["NAME", "ORG", "LOCATION"];
6
+
7
+ // src/redactor.ts
8
+ var LABEL_MAP = {
9
+ PER: "NAME",
10
+ PERSON: "NAME",
11
+ NAME: "NAME",
12
+ ORG: "ORG",
13
+ ORGANIZATION: "ORG",
14
+ ORGANISATION: "ORG",
15
+ LOC: "LOCATION",
16
+ LOCATION: "LOCATION",
17
+ GPE: "LOCATION"
18
+ };
19
+ function mapLabel(group) {
20
+ return LABEL_MAP[group.toUpperCase().replace(/^[BI]-/, "")] ?? null;
21
+ }
22
+ function buildRedactor(infer, options = {}) {
23
+ const threshold = options.threshold ?? 0.6;
24
+ const enabled = new Set(options.labels ?? NER_LABELS);
25
+ const uuid = options.uuid ?? randomUUID;
26
+ async function detect(text) {
27
+ if (!text.trim()) return [];
28
+ const raw = await infer(text);
29
+ return resolveOverlaps(toMatches(raw, text, threshold, enabled));
30
+ }
31
+ async function redact(text, opts) {
32
+ const matches = await detect(text);
33
+ if (matches.length === 0) return { redacted: text, matches };
34
+ let out = "";
35
+ let pos = 0;
36
+ for (const m of matches) {
37
+ out += text.slice(pos, m.start);
38
+ const token = `<${m.type}_${uuid()}>`;
39
+ opts.vault.set(token, m.value);
40
+ out += token;
41
+ pos = m.end;
42
+ }
43
+ out += text.slice(pos);
44
+ return { redacted: out, matches };
45
+ }
46
+ async function warmup() {
47
+ await infer("Warm up the model.");
48
+ }
49
+ return { detect, redact, warmup };
50
+ }
51
+ function toMatches(raw, text, threshold, enabled) {
52
+ const out = [];
53
+ for (const e of raw) {
54
+ if (e.score < threshold) continue;
55
+ if (e.start == null || e.end == null || e.end <= e.start) continue;
56
+ const type = mapLabel(e.entity_group);
57
+ if (!type || !enabled.has(type)) continue;
58
+ const value = text.slice(e.start, e.end);
59
+ if (!value.trim()) continue;
60
+ out.push({ type, value, start: e.start, end: e.end, score: e.score });
61
+ }
62
+ return out;
63
+ }
64
+ function resolveOverlaps(matches) {
65
+ matches.sort((a, b) => a.start - b.start || b.score - a.score || b.end - b.start - (a.end - a.start));
66
+ const out = [];
67
+ for (const m of matches) {
68
+ if (!out.some((o) => m.start < o.end && o.start < m.end)) out.push(m);
69
+ }
70
+ return out.sort((a, b) => a.start - b.start);
71
+ }
72
+
73
+ // src/infer.ts
74
+ var dynamicImport = (specifier) => Function("s", "return import(s)")(specifier);
75
+ function createTransformersInfer(options = {}) {
76
+ const model = options.model ?? "Xenova/bert-base-NER";
77
+ let pipePromise = null;
78
+ async function getPipe() {
79
+ if (!pipePromise) {
80
+ pipePromise = (async () => {
81
+ let mod;
82
+ try {
83
+ mod = await dynamicImport("@huggingface/transformers");
84
+ } catch {
85
+ throw new Error(
86
+ "@raeven-co/sether-ner: install the optional peer '@huggingface/transformers' to use the default model, or pass your own { infer } to createNerRedactor()."
87
+ );
88
+ }
89
+ return mod.pipeline("token-classification", model);
90
+ })();
91
+ }
92
+ return pipePromise;
93
+ }
94
+ return async (text) => {
95
+ const pipe = await getPipe();
96
+ const out = await pipe(text, { aggregation_strategy: "simple" });
97
+ return out.map((e) => ({
98
+ entity_group: e.entity_group ?? e.entity ?? "MISC",
99
+ score: typeof e.score === "number" ? e.score : 0,
100
+ start: e.start,
101
+ end: e.end,
102
+ word: e.word
103
+ }));
104
+ };
105
+ }
106
+
107
+ // src/index.ts
108
+ function createNerRedactor(options = {}) {
109
+ const infer = options.infer ?? createTransformersInfer(options);
110
+ return buildRedactor(infer, options);
111
+ }
112
+ export {
113
+ NER_LABELS,
114
+ buildRedactor,
115
+ createNerRedactor,
116
+ createTransformersInfer
117
+ };
118
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/redactor.ts","../src/types.ts","../src/infer.ts","../src/index.ts"],"sourcesContent":["import { randomUUID } from 'node:crypto';\nimport {\n NER_LABELS,\n type InferFn,\n type NerLabel,\n type NerMatch,\n type NerOptions,\n type NerRedactor,\n type RawEntity,\n type RedactResult,\n type VaultLike,\n} from './types.js';\n\n// Map model label groups (and B-/I- prefixes) to Sether's three NER types.\nconst LABEL_MAP: Record<string, NerLabel> = {\n PER: 'NAME',\n PERSON: 'NAME',\n NAME: 'NAME',\n ORG: 'ORG',\n ORGANIZATION: 'ORG',\n ORGANISATION: 'ORG',\n LOC: 'LOCATION',\n LOCATION: 'LOCATION',\n GPE: 'LOCATION',\n};\n\nfunction mapLabel(group: string): NerLabel | null {\n return LABEL_MAP[group.toUpperCase().replace(/^[BI]-/, '')] ?? null;\n}\n\n/**\n * Build a redactor from an injected inferer. This is the pure, model-free core —\n * fully unit-testable with a mock `infer`. The default model wiring lives in\n * index.ts so this module carries no heavy dependency.\n */\nexport function buildRedactor(infer: InferFn, options: NerOptions = {}): NerRedactor {\n const threshold = options.threshold ?? 0.6;\n const enabled = new Set<NerLabel>(options.labels ?? NER_LABELS);\n const uuid = options.uuid ?? randomUUID;\n\n async function detect(text: string): Promise<NerMatch[]> {\n if (!text.trim()) return [];\n const raw = await infer(text);\n return resolveOverlaps(toMatches(raw, text, threshold, enabled));\n }\n\n async function redact(text: string, opts: { vault: VaultLike }): Promise<RedactResult> {\n const matches = await detect(text);\n if (matches.length === 0) return { redacted: text, matches };\n\n let out = '';\n let pos = 0;\n for (const m of matches) {\n out += text.slice(pos, m.start);\n const token = `<${m.type}_${uuid()}>`;\n opts.vault.set(token, m.value);\n out += token;\n pos = m.end;\n }\n out += text.slice(pos);\n return { redacted: out, matches };\n }\n\n async function warmup(): Promise<void> {\n await infer('Warm up the model.');\n }\n\n return { detect, redact, warmup };\n}\n\nfunction toMatches(raw: RawEntity[], text: string, threshold: number, enabled: Set<NerLabel>): NerMatch[] {\n const out: NerMatch[] = [];\n for (const e of raw) {\n if (e.score < threshold) continue;\n if (e.start == null || e.end == null || e.end <= e.start) continue;\n const type = mapLabel(e.entity_group);\n if (!type || !enabled.has(type)) continue;\n const value = text.slice(e.start, e.end);\n if (!value.trim()) continue;\n out.push({ type, value, start: e.start, end: e.end, score: e.score });\n }\n return out;\n}\n\n// Keep the higher-score (then longer) match when spans overlap.\nfunction resolveOverlaps(matches: NerMatch[]): NerMatch[] {\n matches.sort((a, b) => a.start - b.start || b.score - a.score || b.end - b.start - (a.end - a.start));\n const out: NerMatch[] = [];\n for (const m of matches) {\n if (!out.some((o) => m.start < o.end && o.start < m.end)) out.push(m);\n }\n return out.sort((a, b) => a.start - b.start);\n}\n","export type NerLabel = 'NAME' | 'ORG' | 'LOCATION';\n\nexport const NER_LABELS: readonly NerLabel[] = ['NAME', 'ORG', 'LOCATION'];\n\n/**\n * A raw entity as produced by a token-classification pipeline\n * (transformers.js `aggregation_strategy: 'simple'`) or any compatible inferer.\n * Offsets are absolute char positions into the input; entities without offsets\n * are skipped (we never trust the reconstructed `word`, which can carry subword\n * artifacts — we always slice the original text by [start, end)).\n */\nexport interface RawEntity {\n entity_group: string; // e.g. 'PER' | 'ORG' | 'LOC' | 'person' | 'organization'\n score: number; // 0..1\n start?: number;\n end?: number;\n word?: string;\n}\n\nexport type InferFn = (text: string) => Promise<RawEntity[]>;\n\nexport interface NerMatch {\n type: NerLabel;\n value: string;\n start: number;\n end: number;\n score: number;\n}\n\n/** Minimal vault surface — structurally compatible with @raeven-co/sether's Vault. */\nexport interface VaultLike {\n set(token: string, value: string): void;\n}\n\nexport interface NerOptions {\n /** Model id for the default transformers.js inferer. Default: 'Xenova/bert-base-NER'. */\n model?: string;\n /** Minimum confidence to keep a match. Default: 0.6. */\n threshold?: number;\n /** Which entity types to redact. Default: NAME, ORG, LOCATION. */\n labels?: readonly NerLabel[];\n /** Inject a custom inferer (GLiNER, a remote service, or a test mock). */\n infer?: InferFn;\n /** Token id generator. Default: node:crypto randomUUID. */\n uuid?: () => string;\n}\n\nexport interface RedactResult {\n redacted: string;\n matches: NerMatch[];\n}\n\nexport interface NerRedactor {\n /** Detect names / organisations / locations in the text. */\n detect(text: string): Promise<NerMatch[]>;\n /**\n * Redact entities into `<TYPE_uuid>` tokens (the same format @raeven-co/sether\n * uses), storing originals in the vault so the core `restore()` swaps them back.\n * Runs on the full OUTBOUND text — not the response byte-stream.\n */\n redact(text: string, opts: { vault: VaultLike }): Promise<RedactResult>;\n /** Preload the model so the first real call isn't slow. */\n warmup(): Promise<void>;\n}\n","import type { InferFn, NerOptions, RawEntity } from './types.js';\n\ntype Pipe = (text: string, opts: { aggregation_strategy: string }) => Promise<unknown>;\n\ninterface RawPipelineEntity {\n entity_group?: string;\n entity?: string;\n score?: number;\n start?: number;\n end?: number;\n word?: string;\n}\n\n// Hide the specifier from the bundler and the type-checker so\n// @huggingface/transformers stays a TRULY optional peer dependency — it's only\n// needed at runtime if the default inferer actually runs.\nconst dynamicImport = (specifier: string): Promise<unknown> =>\n (Function('s', 'return import(s)') as (s: string) => Promise<unknown>)(specifier);\n\n/**\n * The default inferer: a lazy, memoized transformers.js token-classification\n * pipeline. The ~30 MB+ model is only fetched on first inference, so importing\n * this module stays cheap. Swap in GLiNER or a remote service by passing your\n * own `infer` to createNerRedactor().\n */\nexport function createTransformersInfer(options: NerOptions = {}): InferFn {\n const model = options.model ?? 'Xenova/bert-base-NER';\n let pipePromise: Promise<Pipe> | null = null;\n\n async function getPipe(): Promise<Pipe> {\n if (!pipePromise) {\n pipePromise = (async () => {\n let mod: { pipeline: (task: string, model: string) => Promise<Pipe> };\n try {\n mod = (await dynamicImport('@huggingface/transformers')) as typeof mod;\n } catch {\n throw new Error(\n \"@raeven-co/sether-ner: install the optional peer '@huggingface/transformers' to use the default model, or pass your own { infer } to createNerRedactor().\",\n );\n }\n return mod.pipeline('token-classification', model);\n })();\n }\n return pipePromise;\n }\n\n return async (text: string): Promise<RawEntity[]> => {\n const pipe = await getPipe();\n const out = (await pipe(text, { aggregation_strategy: 'simple' })) as RawPipelineEntity[];\n return out.map((e) => ({\n entity_group: e.entity_group ?? e.entity ?? 'MISC',\n score: typeof e.score === 'number' ? e.score : 0,\n start: e.start,\n end: e.end,\n word: e.word,\n }));\n };\n}\n","import { buildRedactor } from './redactor.js';\nimport { createTransformersInfer } from './infer.js';\nimport type { NerOptions, NerRedactor } from './types.js';\n\n/**\n * Create a free-text NER redactor for names, organisations, and locations.\n *\n * ```ts\n * import { Sether, redactSync, basicDetectors } from '@raeven-co/sether';\n * import { createNerRedactor } from '@raeven-co/sether-ner';\n *\n * const sether = new Sether();\n * const ner = createNerRedactor(); // uses Xenova/bert-base-NER, lazy-loaded\n *\n * // Outbound: NER first (names/orgs/locations), then structured PII — same vault.\n * const { redacted } = await ner.redact(prompt, { vault: sether.vault });\n * const safe = redactSync(redacted, { detectors: basicDetectors, vault: sether.vault });\n * // ...send `safe` to the LLM. sether.restore() then swaps BOTH token sets back,\n * // because NER tokens use the same `<TYPE_uuid>` format.\n * ```\n *\n * Pass `{ infer }` to use GLiNER or any other model. NER runs on the full\n * outbound text — not the streaming response.\n */\nexport function createNerRedactor(options: NerOptions = {}): NerRedactor {\n const infer = options.infer ?? createTransformersInfer(options);\n return buildRedactor(infer, options);\n}\n\nexport { buildRedactor } from './redactor.js';\nexport { createTransformersInfer } from './infer.js';\nexport { NER_LABELS } from './types.js';\nexport type {\n NerLabel,\n NerMatch,\n NerOptions,\n NerRedactor,\n InferFn,\n RawEntity,\n VaultLike,\n RedactResult,\n} from './types.js';\n"],"mappings":";AAAA,SAAS,kBAAkB;;;ACEpB,IAAM,aAAkC,CAAC,QAAQ,OAAO,UAAU;;;ADYzE,IAAM,YAAsC;AAAA,EAC1C,KAAK;AAAA,EACL,QAAQ;AAAA,EACR,MAAM;AAAA,EACN,KAAK;AAAA,EACL,cAAc;AAAA,EACd,cAAc;AAAA,EACd,KAAK;AAAA,EACL,UAAU;AAAA,EACV,KAAK;AACP;AAEA,SAAS,SAAS,OAAgC;AAChD,SAAO,UAAU,MAAM,YAAY,EAAE,QAAQ,UAAU,EAAE,CAAC,KAAK;AACjE;AAOO,SAAS,cAAc,OAAgB,UAAsB,CAAC,GAAgB;AACnF,QAAM,YAAY,QAAQ,aAAa;AACvC,QAAM,UAAU,IAAI,IAAc,QAAQ,UAAU,UAAU;AAC9D,QAAM,OAAO,QAAQ,QAAQ;AAE7B,iBAAe,OAAO,MAAmC;AACvD,QAAI,CAAC,KAAK,KAAK,EAAG,QAAO,CAAC;AAC1B,UAAM,MAAM,MAAM,MAAM,IAAI;AAC5B,WAAO,gBAAgB,UAAU,KAAK,MAAM,WAAW,OAAO,CAAC;AAAA,EACjE;AAEA,iBAAe,OAAO,MAAc,MAAmD;AACrF,UAAM,UAAU,MAAM,OAAO,IAAI;AACjC,QAAI,QAAQ,WAAW,EAAG,QAAO,EAAE,UAAU,MAAM,QAAQ;AAE3D,QAAI,MAAM;AACV,QAAI,MAAM;AACV,eAAW,KAAK,SAAS;AACvB,aAAO,KAAK,MAAM,KAAK,EAAE,KAAK;AAC9B,YAAM,QAAQ,IAAI,EAAE,IAAI,IAAI,KAAK,CAAC;AAClC,WAAK,MAAM,IAAI,OAAO,EAAE,KAAK;AAC7B,aAAO;AACP,YAAM,EAAE;AAAA,IACV;AACA,WAAO,KAAK,MAAM,GAAG;AACrB,WAAO,EAAE,UAAU,KAAK,QAAQ;AAAA,EAClC;AAEA,iBAAe,SAAwB;AACrC,UAAM,MAAM,oBAAoB;AAAA,EAClC;AAEA,SAAO,EAAE,QAAQ,QAAQ,OAAO;AAClC;AAEA,SAAS,UAAU,KAAkB,MAAc,WAAmB,SAAoC;AACxG,QAAM,MAAkB,CAAC;AACzB,aAAW,KAAK,KAAK;AACnB,QAAI,EAAE,QAAQ,UAAW;AACzB,QAAI,EAAE,SAAS,QAAQ,EAAE,OAAO,QAAQ,EAAE,OAAO,EAAE,MAAO;AAC1D,UAAM,OAAO,SAAS,EAAE,YAAY;AACpC,QAAI,CAAC,QAAQ,CAAC,QAAQ,IAAI,IAAI,EAAG;AACjC,UAAM,QAAQ,KAAK,MAAM,EAAE,OAAO,EAAE,GAAG;AACvC,QAAI,CAAC,MAAM,KAAK,EAAG;AACnB,QAAI,KAAK,EAAE,MAAM,OAAO,OAAO,EAAE,OAAO,KAAK,EAAE,KAAK,OAAO,EAAE,MAAM,CAAC;AAAA,EACtE;AACA,SAAO;AACT;AAGA,SAAS,gBAAgB,SAAiC;AACxD,UAAQ,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,MAAM;AACpG,QAAM,MAAkB,CAAC;AACzB,aAAW,KAAK,SAAS;AACvB,QAAI,CAAC,IAAI,KAAK,CAAC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAG,KAAI,KAAK,CAAC;AAAA,EACtE;AACA,SAAO,IAAI,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAC7C;;;AE5EA,IAAM,gBAAgB,CAAC,cACpB,SAAS,KAAK,kBAAkB,EAAsC,SAAS;AAQ3E,SAAS,wBAAwB,UAAsB,CAAC,GAAY;AACzE,QAAM,QAAQ,QAAQ,SAAS;AAC/B,MAAI,cAAoC;AAExC,iBAAe,UAAyB;AACtC,QAAI,CAAC,aAAa;AAChB,qBAAe,YAAY;AACzB,YAAI;AACJ,YAAI;AACF,gBAAO,MAAM,cAAc,2BAA2B;AAAA,QACxD,QAAQ;AACN,gBAAM,IAAI;AAAA,YACR;AAAA,UACF;AAAA,QACF;AACA,eAAO,IAAI,SAAS,wBAAwB,KAAK;AAAA,MACnD,GAAG;AAAA,IACL;AACA,WAAO;AAAA,EACT;AAEA,SAAO,OAAO,SAAuC;AACnD,UAAM,OAAO,MAAM,QAAQ;AAC3B,UAAM,MAAO,MAAM,KAAK,MAAM,EAAE,sBAAsB,SAAS,CAAC;AAChE,WAAO,IAAI,IAAI,CAAC,OAAO;AAAA,MACrB,cAAc,EAAE,gBAAgB,EAAE,UAAU;AAAA,MAC5C,OAAO,OAAO,EAAE,UAAU,WAAW,EAAE,QAAQ;AAAA,MAC/C,OAAO,EAAE;AAAA,MACT,KAAK,EAAE;AAAA,MACP,MAAM,EAAE;AAAA,IACV,EAAE;AAAA,EACJ;AACF;;;ACjCO,SAAS,kBAAkB,UAAsB,CAAC,GAAgB;AACvE,QAAM,QAAQ,QAAQ,SAAS,wBAAwB,OAAO;AAC9D,SAAO,cAAc,OAAO,OAAO;AACrC;","names":[]}
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "@raeven-co/sether-ner",
3
+ "version": "0.1.0",
4
+ "description": "Free-text NER redaction (names, organisations, locations) for Sether. Lazy-loaded ONNX model via transformers.js. Tokens restore through @raeven-co/sether's vault.",
5
+ "type": "module",
6
+ "main": "./dist/index.cjs",
7
+ "module": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "types": "./dist/index.d.ts",
12
+ "import": "./dist/index.js",
13
+ "require": "./dist/index.cjs"
14
+ }
15
+ },
16
+ "files": ["dist", "README.md"],
17
+ "scripts": {
18
+ "build": "tsup",
19
+ "typecheck": "tsc --noEmit",
20
+ "test": "node test/redactor.test.mjs"
21
+ },
22
+ "keywords": ["pii", "ner", "redaction", "sether", "llm", "privacy", "onnx", "transformers", "gdpr"],
23
+ "author": "Godfrey Lebo <emorylebo@gmail.com> (Raeven, Inc.)",
24
+ "license": "MIT",
25
+ "repository": {
26
+ "type": "git",
27
+ "url": "git+https://github.com/raeven-co/sether-ner.git"
28
+ },
29
+ "peerDependencies": {
30
+ "@huggingface/transformers": ">=3.0.0"
31
+ },
32
+ "peerDependenciesMeta": {
33
+ "@huggingface/transformers": {
34
+ "optional": true
35
+ }
36
+ },
37
+ "devDependencies": {
38
+ "@types/node": "^22.10.0",
39
+ "esbuild": "^0.24.0",
40
+ "tsup": "^8.3.5",
41
+ "typescript": "^5.7.0"
42
+ },
43
+ "engines": {
44
+ "node": ">=18"
45
+ }
46
+ }