@nationaldesignstudio/rampart 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +402 -0
- package/MODEL_CARD.md +422 -0
- package/README.md +279 -0
- package/RELEASE.md +97 -0
- package/WHITEPAPER.md +316 -0
- package/dist/index.d.ts +23 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +35639 -0
- package/dist/index.js.map +36 -0
- package/dist/src/guard.d.ts +94 -0
- package/dist/src/guard.d.ts.map +1 -0
- package/dist/src/heuristics.d.ts +14 -0
- package/dist/src/heuristics.d.ts.map +1 -0
- package/dist/src/ner/classifier.d.ts +92 -0
- package/dist/src/ner/classifier.d.ts.map +1 -0
- package/dist/src/ner/worker.d.ts +44 -0
- package/dist/src/ner/worker.d.ts.map +1 -0
- package/dist/src/ner/worker.js +35302 -0
- package/dist/src/ner/worker.js.map +30 -0
- package/dist/src/pipeline.d.ts +76 -0
- package/dist/src/pipeline.d.ts.map +1 -0
- package/dist/src/policy.d.ts +27 -0
- package/dist/src/policy.d.ts.map +1 -0
- package/dist/src/premask.d.ts +48 -0
- package/dist/src/premask.d.ts.map +1 -0
- package/dist/src/session.d.ts +60 -0
- package/dist/src/session.d.ts.map +1 -0
- package/dist/src/streaming.d.ts +32 -0
- package/dist/src/streaming.d.ts.map +1 -0
- package/dist/src/types.d.ts +43 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/validators.d.ts +16 -0
- package/dist/src/validators.d.ts.map +1 -0
- package/eval/bench/README.md +91 -0
- package/eval/bench/fetch.ts +152 -0
- package/eval/bench/labels.ts +45 -0
- package/eval/bench/run.ts +146 -0
- package/eval/bench/runs/m06-v3-30k/by_language.json +303 -0
- package/eval/bench/runs/m06-v3-30k/summary.json +56 -0
- package/eval/bench/runs/sample-900/by_language.json +303 -0
- package/eval/bench/runs/sample-900/manifest.json +926 -0
- package/eval/bench/runs/sample-900/summary.json +56 -0
- package/eval/bench/score.ts +197 -0
- package/eval/bench/webgpu/entry.ts +70 -0
- package/eval/bench/webgpu/index.html +12 -0
- package/eval/bench/webgpu.ts +209 -0
- package/eval/public-cases.ts +412 -0
- package/eval/run-public-eval.ts +140 -0
- package/examples/basic-chat.ts +12 -0
- package/examples/pii-worker.ts +3 -0
- package/index.ts +47 -0
- package/package.json +103 -0
- package/src/guard.ts +170 -0
- package/src/heuristics.ts +141 -0
- package/src/ner/classifier.ts +580 -0
- package/src/ner/worker.ts +130 -0
- package/src/policy.ts +64 -0
- package/src/premask.ts +90 -0
- package/src/session.ts +99 -0
- package/src/streaming.ts +73 -0
- package/src/types.ts +74 -0
- package/src/validators.ts +40 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ChatGuard: per-conversation PII filter for wiring into a chat app.
|
|
3
|
+
*
|
|
4
|
+
* const guard = await createGuard();
|
|
5
|
+
* const safe = await guard.protect(userInput);
|
|
6
|
+
* const reply = await llm(safe.text);
|
|
7
|
+
* guard.reveal(reply); // non-streaming
|
|
8
|
+
* stream.pipeThrough(guard.revealTransform()); // streaming
|
|
9
|
+
*
|
|
10
|
+
* The entity table lives only on the client; the real values never leave the
|
|
11
|
+
* device. Each guard keeps placeholder identity stable across every turn of a
|
|
12
|
+
* conversation.
|
|
13
|
+
*
|
|
14
|
+
* user message
|
|
15
|
+
* → heuristics (sync, structured PII)
|
|
16
|
+
* → NER (optional, async, contextual PII)
|
|
17
|
+
* → merge + default-deny policy (keep {city, state, zip})
|
|
18
|
+
* → session table: replace with stable placeholders
|
|
19
|
+
* assistant reply
|
|
20
|
+
* → rehydrate placeholders → render to user
|
|
21
|
+
*/
|
|
22
|
+
import { type NerOptions } from "./ner/classifier";
|
|
23
|
+
import { type PlaceholderAliases, type ScrubResult } from "./session";
|
|
24
|
+
import { type PiiLabel, type Span } from "./types";
|
|
25
|
+
/** An async contextual detector. Matches both the in-process and worker forms. */
|
|
26
|
+
export type NerDetector = (text: string) => Promise<Span[]>;
|
|
27
|
+
/**
|
|
28
|
+
* Default placeholder aliases. Empty: names are split into GIVEN_NAME/SURNAME
|
|
29
|
+
* and a household may share a surname, so each keeps its own typed token
|
|
30
|
+
* (`[GIVEN_NAME_1]`, `[SURNAME_1]`) rather than collapsing to a single `NAME`.
|
|
31
|
+
*/
|
|
32
|
+
export declare const DEFAULT_ALIASES: PlaceholderAliases;
|
|
33
|
+
export interface GuardOptions {
|
|
34
|
+
/** Placeholder aliases. Defaults to `{}` — typed tokens like `[GIVEN_NAME_1]`. */
|
|
35
|
+
readonly aliases?: PlaceholderAliases;
|
|
36
|
+
/** Labels to preserve; defaults to `{CITY, STATE, ZIP_CODE}`. */
|
|
37
|
+
readonly keepLabels?: readonly PiiLabel[];
|
|
38
|
+
/**
|
|
39
|
+
* When `true`, skip the structured-PII premask before the model. Required for
|
|
40
|
+
* a model trained without prefilter (no-prefilter ablation) whose classes
|
|
41
|
+
* include SSN / CREDIT_CARD / IP_ADDRESS. Heuristic spans for those types
|
|
42
|
+
* still run as a safety net.
|
|
43
|
+
*/
|
|
44
|
+
readonly noPrefilter?: boolean;
|
|
45
|
+
/** Pre-built NER detector. When set, `model` is ignored. */
|
|
46
|
+
readonly ner?: NerDetector;
|
|
47
|
+
/** When `true`, skip the classifier and run heuristics only. */
|
|
48
|
+
readonly heuristicsOnly?: boolean;
|
|
49
|
+
/**
|
|
50
|
+
* Hugging Face model id or local directory path (q4 ONNX). Defaults to
|
|
51
|
+
* {@link RAMPART_MODEL_ID}.
|
|
52
|
+
*/
|
|
53
|
+
readonly model?: string;
|
|
54
|
+
/** Worker script URL. When set, NER runs off the main thread. */
|
|
55
|
+
readonly worker?: string | URL;
|
|
56
|
+
/** Backend. `"wasm"`/`"webgpu"` in browsers; `"cpu"` for Node. */
|
|
57
|
+
readonly device?: NerOptions["device"];
|
|
58
|
+
/** Spans below this score are discarded. */
|
|
59
|
+
readonly minScore?: number;
|
|
60
|
+
}
|
|
61
|
+
type ChatGuardConfig = Pick<GuardOptions, "ner" | "aliases" | "keepLabels" | "noPrefilter">;
|
|
62
|
+
export declare class ChatGuard {
|
|
63
|
+
private readonly table;
|
|
64
|
+
private readonly ner?;
|
|
65
|
+
private readonly noPrefilter;
|
|
66
|
+
constructor(config?: ChatGuardConfig);
|
|
67
|
+
private detect;
|
|
68
|
+
/**
|
|
69
|
+
* Run this on the user's text *before* handing it to the AI SDK. Returns the
|
|
70
|
+
* placeholdered text to send plus the placeholders introduced this turn.
|
|
71
|
+
*/
|
|
72
|
+
protect(text: string): Promise<ScrubResult>;
|
|
73
|
+
/** Restore real values in a complete (non-streaming) assistant reply. */
|
|
74
|
+
reveal(reply: string): string;
|
|
75
|
+
/**
|
|
76
|
+
* A Web Streams transform that reveals placeholders in a streamed reply,
|
|
77
|
+
* correctly handling placeholders split across chunks. Pipe an AI SDK
|
|
78
|
+
* `textStream` through it before rendering.
|
|
79
|
+
*/
|
|
80
|
+
revealTransform(): TransformStream<string, string>;
|
|
81
|
+
/**
|
|
82
|
+
* Defense in depth: scrub the model's *output* before logging/persisting it,
|
|
83
|
+
* since a model can emit PII the user never typed. Returns placeholdered text.
|
|
84
|
+
*/
|
|
85
|
+
protectReply(reply: string): Promise<ScrubResult>;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Build a conversation guard. Loads the Rampart classifier (q4 ONNX) by default.
|
|
89
|
+
* Pass `model` for a different Hugging Face id or local path, `heuristicsOnly:
|
|
90
|
+
* true` to skip NER, or `ner` to inject a custom detector.
|
|
91
|
+
*/
|
|
92
|
+
export declare function createGuard(options?: GuardOptions): Promise<ChatGuard>;
|
|
93
|
+
export {};
|
|
94
|
+
//# sourceMappingURL=guard.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"guard.d.ts","sourceRoot":"","sources":["../../src/guard.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAGH,OAAO,EAAkD,KAAK,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAInG,OAAO,EAAsB,KAAK,kBAAkB,EAAE,KAAK,WAAW,EAAE,MAAM,WAAW,CAAC;AAC1F,OAAO,EAAqB,KAAK,QAAQ,EAAE,KAAK,IAAI,EAAE,MAAM,SAAS,CAAC;AAEtE,kFAAkF;AAClF,MAAM,MAAM,WAAW,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;AAE5D;;;;GAIG;AACH,eAAO,MAAM,eAAe,EAAE,kBAAuB,CAAC;AAEtD,MAAM,WAAW,YAAY;IAC3B,kFAAkF;IAClF,QAAQ,CAAC,OAAO,CAAC,EAAE,kBAAkB,CAAC;IACtC,iEAAiE;IACjE,QAAQ,CAAC,UAAU,CAAC,EAAE,SAAS,QAAQ,EAAE,CAAC;IAC1C;;;;;OAKG;IACH,QAAQ,CAAC,WAAW,CAAC,EAAE,OAAO,CAAC;IAC/B,4DAA4D;IAC5D,QAAQ,CAAC,GAAG,CAAC,EAAE,WAAW,CAAC;IAC3B,gEAAgE;IAChE,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC;IAClC;;;OAGG;IACH,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,iEAAiE;IACjE,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,GAAG,CAAC;IAC/B,kEAAkE;IAClE,QAAQ,CAAC,MAAM,CAAC,EAAE,UAAU,CAAC,QAAQ,CAAC,CAAC;IACvC,4CAA4C;IAC5C,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,KAAK,eAAe,GAAG,IAAI,CAAC,YAAY,EAAE,KAAK,GAAG,SAAS,GAAG,YAAY,GAAG,aAAa,CAAC,CAAC;AAE5F,qBAAa,SAAS;IACpB,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAqB;IAC3C,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAc;IACnC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;gBAE1B,MAAM,GAAE,eAAoB;YAM1B,MAAM;IAiBpB;;;OAGG;IACG,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;IAKjD,yEAAyE;IACzE,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM;IAI7B;;;;OAIG;IACH,eAAe,IAAI,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC;IAOlD;;;OAGG;IACG,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC;CAIxD;AAsBD;;;;GAIG;AACH,wBAAsB,WAAW,CAAC,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,SAAS,CAAC,CAShF"}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Heuristic detectors: the cheap, synchronous, zero-model first pass.
|
|
3
|
+
*
|
|
4
|
+
* Digit-bearing PII (SSN, card, phone, routing) is found over the *digit
|
|
5
|
+
* projection* (see normalize.ts) so every separator variant collapses to one
|
|
6
|
+
* rule: `888-88-8888`, `888 88 8888`, `888.88.8888`, and `888888888` all match.
|
|
7
|
+
* Text-shaped PII (email, URL, IP) is matched on the raw string where the
|
|
8
|
+
* structure lives in the punctuation.
|
|
9
|
+
*/
|
|
10
|
+
import type { Span } from "./types";
|
|
11
|
+
/** Run all heuristic detectors over the raw input. Spans may overlap; the
|
|
12
|
+
* pipeline's merge step resolves conflicts before redaction. */
|
|
13
|
+
export declare function detectHeuristics(raw: string): Span[];
|
|
14
|
+
//# sourceMappingURL=heuristics.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"heuristics.d.ts","sourceRoot":"","sources":["../../src/heuristics.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,KAAK,EAAY,IAAI,EAAE,MAAM,SAAS,CAAC;AA2H9C;gEACgE;AAChE,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,EAAE,CAKpD"}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Contextual PII detection via a small token-classification model running in
|
|
3
|
+
* the browser (transformers.js → ONNX Runtime Web, wasm or WebGPU backend).
|
|
4
|
+
*
|
|
5
|
+
* This is the residual layer: it catches what the heuristics can't — people's
|
|
6
|
+
* names, organizations, and free-text identifiers — which is exactly the PII we
|
|
7
|
+
* never want in our logs. The model is intentionally tiny and int8-quantized so
|
|
8
|
+
* it loads once (cached in IndexedDB by the runtime) and runs on-device with no
|
|
9
|
+
* server round-trip and no shared queue to saturate.
|
|
10
|
+
*
|
|
11
|
+
* Label mapping: the fine-tuned model emits token-classification entity groups,
|
|
12
|
+
* which we map onto our {@link PiiLabel} set. CITY/STATE/ZIP_CODE are emitted
|
|
13
|
+
* too so the merge step can carry them through to the keep-set.
|
|
14
|
+
*/
|
|
15
|
+
import type { Span } from "../types";
|
|
16
|
+
/** Minimal shape of a transformers.js token-classification result row. */
|
|
17
|
+
interface RawEntity {
|
|
18
|
+
readonly entity_group?: string;
|
|
19
|
+
readonly entity?: string;
|
|
20
|
+
readonly score: number;
|
|
21
|
+
readonly start: number;
|
|
22
|
+
readonly end: number;
|
|
23
|
+
readonly word: string;
|
|
24
|
+
}
|
|
25
|
+
/** Counts the model tokens in a string, excluding the [CLS]/[SEP] specials. */
|
|
26
|
+
export type TokenCounter = (text: string) => number;
|
|
27
|
+
/**
|
|
28
|
+
* The callable returned by a token-classification pipeline. `countTokens` is
|
|
29
|
+
* attached when the classifier is backed by a real tokenizer (see
|
|
30
|
+
* {@link loadNerClassifier}); {@link detectNer} uses it to size windows by the
|
|
31
|
+
* model's token budget. Bare mocks may omit it, in which case detection runs the
|
|
32
|
+
* whole input as a single window.
|
|
33
|
+
*/
|
|
34
|
+
export interface TokenClassifier {
|
|
35
|
+
(text: string, options?: {
|
|
36
|
+
aggregation_strategy?: "simple" | "first" | "max";
|
|
37
|
+
}): Promise<RawEntity[]>;
|
|
38
|
+
countTokens?: TokenCounter;
|
|
39
|
+
}
|
|
40
|
+
/** The shipped Rampart token-classifier on Hugging Face (q4 ONNX only). */
|
|
41
|
+
export declare const RAMPART_MODEL_ID = "nationaldesignstudio/rampart";
|
|
42
|
+
export interface NerOptions {
|
|
43
|
+
/**
|
|
44
|
+
* Hugging Face model id or local directory path. Must be a token-classification
|
|
45
|
+
* ONNX export compatible with Rampart's label schema. Defaults to
|
|
46
|
+
* {@link RAMPART_MODEL_ID}.
|
|
47
|
+
*/
|
|
48
|
+
readonly model?: string;
|
|
49
|
+
/** Backend. `"wasm"`/`"webgpu"` in browsers; `"cpu"` for Node (ORT). */
|
|
50
|
+
readonly device?: "wasm" | "webgpu" | "cpu";
|
|
51
|
+
/** Spans below this score are discarded. Low default → recall-biased. */
|
|
52
|
+
readonly minScore?: number;
|
|
53
|
+
}
|
|
54
|
+
/** Per-window content-token budget: the model max less specials and a safety margin. */
|
|
55
|
+
export declare const NER_TOKEN_BUDGET: number;
|
|
56
|
+
/**
|
|
57
|
+
* Tokens shared by consecutive NER windows. Long input slides a window of
|
|
58
|
+
* {@link NER_TOKEN_BUDGET} tokens; this overlap guarantees an entity landing on a
|
|
59
|
+
* window seam is still *wholly* inside a neighbouring window.
|
|
60
|
+
*
|
|
61
|
+
* The invariant: as long as the overlap is at least the longest entity we detect
|
|
62
|
+
* (names, orgs, street lines — all a handful of tokens), no entity is ever split
|
|
63
|
+
* across a boundary, so a window-edge name is never silently dropped. The generous
|
|
64
|
+
* margin over the longest entity also means a seam entity reappears deep inside its
|
|
65
|
+
* neighbour with ample context, which the classifier needs to label it confidently.
|
|
66
|
+
*/
|
|
67
|
+
export declare const NER_TOKEN_OVERLAP = 64;
|
|
68
|
+
/**
|
|
69
|
+
* Lazily construct the token-classification pipeline. transformers.js is a peer
|
|
70
|
+
* dependency and a heavy import, so it is loaded on first use, not at module
|
|
71
|
+
* load — keeping the heuristic path dependency-free.
|
|
72
|
+
*/
|
|
73
|
+
export declare function loadNerClassifier(options?: NerOptions): Promise<TokenClassifier>;
|
|
74
|
+
/**
|
|
75
|
+
* Detect contextual PII across the whole input, regardless of length.
|
|
76
|
+
*
|
|
77
|
+
* The model has a fixed token budget, so input longer than one window is scanned
|
|
78
|
+
* as a sliding window sized to {@link NER_TOKEN_BUDGET} *tokens* (measured by the
|
|
79
|
+
* classifier's own tokenizer) that overlaps its neighbour by {@link NER_TOKEN_OVERLAP}
|
|
80
|
+
* tokens. Each window's spans are shifted back into whole-text coordinates; because
|
|
81
|
+
* windows overlap, an entity on a seam is re-detected in both, so {@link mergeSpans}
|
|
82
|
+
* collapses the duplicates into the canonical disjoint set. Input that fits one
|
|
83
|
+
* window — or any classifier without a tokenizer, e.g. a bare test mock — takes a
|
|
84
|
+
* single-window fast path identical to scanning the text directly.
|
|
85
|
+
*
|
|
86
|
+
* Sizing by tokens rather than a char cap means a window holds exactly as much
|
|
87
|
+
* text as the model can attend to, and nothing past a fixed char count is silently
|
|
88
|
+
* dropped: the overlap keeps any entity from being split across a seam.
|
|
89
|
+
*/
|
|
90
|
+
export declare function detectNer(raw: string, classifier: TokenClassifier, minScore?: number): Promise<Span[]>;
|
|
91
|
+
export {};
|
|
92
|
+
//# sourceMappingURL=classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"classifier.d.ts","sourceRoot":"","sources":["../../../src/ner/classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;GAaG;AAGH,OAAO,KAAK,EAAY,IAAI,EAAE,MAAM,UAAU,CAAC;AAE/C,0EAA0E;AAC1E,UAAU,SAAS;IACjB,QAAQ,CAAC,YAAY,CAAC,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED,+EAA+E;AAC/E,MAAM,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,MAAM,KAAK,MAAM,CAAC;AAEpD;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAAE,oBAAoB,CAAC,EAAE,QAAQ,GAAG,OAAO,GAAG,KAAK,CAAA;KAAE,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;IACtG,WAAW,CAAC,EAAE,YAAY,CAAC;CAC5B;AA6BD,2EAA2E;AAC3E,eAAO,MAAM,gBAAgB,iCAAiC,CAAC;AAE/D,MAAM,WAAW,UAAU;IACzB;;;;OAIG;IACH,QAAQ,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IACxB,wEAAwE;IACxE,QAAQ,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IAC5C,yEAAyE;IACzE,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AAiBD,wFAAwF;AACxF,eAAO,MAAM,gBAAgB,QAAyC,CAAC;AAEvE;;;;;;;;;;GAUG;AACH,eAAO,MAAM,iBAAiB,KAAK,CAAC;AAWpC;;;;GAIG;AACH,wBAAsB,iBAAiB,CAAC,OAAO,GAAE,UAAe,GAAG,OAAO,CAAC,eAAe,CAAC,CA0B1F;AAED;;;;;;;;;;;;;;;GAeG;AACH,wBAAsB,SAAS,CAC7B,GAAG,EAAE,MAAM,EACX,UAAU,EAAE,eAAe,EAC3B,QAAQ,GAAE,MAAiC,GAC1C,OAAO,CAAC,IAAI,EAAE,CAAC,CAoBjB"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Web Worker host for the NER classifier.
|
|
3
|
+
*
|
|
4
|
+
* Inference must not jank the chat UI, so the model lives on a worker thread.
|
|
5
|
+
* The main thread talks to it through {@link createWorkerClassifier}, which
|
|
6
|
+
* adapts the postMessage round-trip back into the {@link TokenClassifier}
|
|
7
|
+
* signature the pipeline expects — so the rest of the system is agnostic to
|
|
8
|
+
* whether detection runs on the main thread or off it.
|
|
9
|
+
*
|
|
10
|
+
* Bundle this file as the worker entry (it self-registers `onmessage`). The
|
|
11
|
+
* client is created on the main thread with `new Worker(new URL(...))`.
|
|
12
|
+
*/
|
|
13
|
+
import { type NerOptions } from "./classifier";
|
|
14
|
+
interface InitMessage {
|
|
15
|
+
readonly kind: "init";
|
|
16
|
+
readonly options: NerOptions;
|
|
17
|
+
}
|
|
18
|
+
interface DetectMessage {
|
|
19
|
+
readonly kind: "detect";
|
|
20
|
+
readonly id: number;
|
|
21
|
+
readonly text: string;
|
|
22
|
+
readonly minScore?: number;
|
|
23
|
+
}
|
|
24
|
+
type InboundMessage = InitMessage | DetectMessage;
|
|
25
|
+
type WorkerInboundEvent = {
|
|
26
|
+
data: InboundMessage;
|
|
27
|
+
};
|
|
28
|
+
export type WorkerMessagePort = {
|
|
29
|
+
onmessage: ((event: WorkerInboundEvent) => void) | null;
|
|
30
|
+
postMessage: (message: unknown) => void;
|
|
31
|
+
};
|
|
32
|
+
/** Register the worker message handler. Call from the worker entry module. */
|
|
33
|
+
export declare function registerNerWorker(scope: WorkerMessagePort): void;
|
|
34
|
+
/**
|
|
35
|
+
* Wrap a worker as a {@link TokenClassifier}-compatible async function. The
|
|
36
|
+
* detection contract is span-in/span-out, so callers use it exactly like the
|
|
37
|
+
* in-process classifier. Resolves once the worker reports `ready`.
|
|
38
|
+
*/
|
|
39
|
+
export declare function createWorkerClassifier(worker: WorkerMessagePort, options: NerOptions): {
|
|
40
|
+
ready: Promise<void>;
|
|
41
|
+
detect: (text: string, minScore?: number) => Promise<unknown>;
|
|
42
|
+
};
|
|
43
|
+
export {};
|
|
44
|
+
//# sourceMappingURL=worker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"worker.d.ts","sourceRoot":"","sources":["../../../src/ner/worker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAgC,KAAK,UAAU,EAAwB,MAAM,cAAc,CAAC;AAEnG,UAAU,WAAW;IACnB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,OAAO,EAAE,UAAU,CAAC;CAC9B;AACD,UAAU,aAAa;IACrB,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC;IACxB,QAAQ,CAAC,EAAE,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;CAC5B;AACD,KAAK,cAAc,GAAG,WAAW,GAAG,aAAa,CAAC;AAElD,KAAK,kBAAkB,GAAG;IACxB,IAAI,EAAE,cAAc,CAAC;CACtB,CAAC;AAMF,MAAM,MAAM,iBAAiB,GAAG;IAC9B,SAAS,EAAE,CAAC,CAAC,KAAK,EAAE,kBAAkB,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC;IACxD,WAAW,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,IAAI,CAAC;CACzC,CAAC;AAIF,8EAA8E;AAC9E,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,iBAAiB,GAAG,IAAI,CA6BhE;AAID;;;;GAIG;AACH,wBAAgB,sBAAsB,CACpC,MAAM,EAAE,iBAAiB,EACzB,OAAO,EAAE,UAAU,GAClB;IAAE,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAAC,MAAM,EAAE,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,MAAM,KAAK,OAAO,CAAC,OAAO,CAAC,CAAA;CAAE,CA6CzF"}
|