semantic-state-estimator 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,265 @@
1
+ # semantic-state-estimator
2
+
3
+ [![npm version](https://img.shields.io/npm/v/semantic-state-estimator?style=flat-square)](https://www.npmjs.com/package/semantic-state-estimator)
4
+ [![license](https://img.shields.io/npm/l/semantic-state-estimator?style=flat-square)](LICENSE)
5
+ [![node](https://img.shields.io/node/v/semantic-state-estimator?style=flat-square)](https://nodejs.org)
6
+
7
+ **Bridge the gap between boolean UI state and semantic AI intent — all inside a WebWorker, on-device, zero-latency.**
8
+ Instead of asking *"did the user click?"*, this library asks *"what does the user **mean**?"* — fusing local text embeddings with Exponential Moving Average (EMA) to build a living, drifting semantic context of your entire session.
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ npm install semantic-state-estimator
14
+ ```
15
+
16
+ > **Peer dependencies:** `react >=18` and/or `zustand >=4` are optional — only install what you need.
17
+
18
+ ---
19
+
20
+ ## Quick Start
21
+
22
+ ### 1. Initialize the `SemanticStateEngine`
23
+
24
+ ```typescript
25
+ import { WorkerManager, SemanticStateEngine } from 'semantic-state-estimator';
26
+
27
+ // The worker uses import.meta.url so Webpack 5 and Vite resolve the path correctly.
28
+ const workerManager = new WorkerManager();
29
+
30
+ const engine = new SemanticStateEngine({
31
+ provider: workerManager,
32
+ alpha: 0.5, // Balanced EMA decay — see "Tuning the Math" below
33
+ driftThreshold: 0.75, // Fire onDriftDetected when similarity drops below this
34
+ onDriftDetected: (vector, driftScore) => {
35
+ console.log(`Semantic drift detected! Score: ${driftScore.toFixed(3)}`);
36
+ },
37
+ });
38
+
39
+ // Feed events into the engine — it runs inside the WebWorker, never blocks your UI
40
+ await engine.update('user opened the billing settings');
41
+ await engine.update('user clicked "cancel subscription"');
42
+
43
+ const snapshot = engine.getSnapshot();
44
+ console.log(snapshot.semanticSummary); // "stable" | "drifting" | "volatile"
45
+ ```
46
+
47
+ ### 2. Wrap a Zustand Store
48
+
49
+ ```typescript
50
+ import { create } from 'zustand';
51
+ import { semanticMiddleware } from 'semantic-state-estimator/zustand';
52
+ import { WorkerManager, SemanticStateEngine } from 'semantic-state-estimator';
53
+
54
+ type AppState = {
55
+ page: string;
56
+ cartItems: number;
57
+ setPage: (page: string) => void;
58
+ addToCart: () => void;
59
+ };
60
+
61
+ const workerManager = new WorkerManager();
62
+ const engine = new SemanticStateEngine({ provider: workerManager, alpha: 0.5, driftThreshold: 0.75 });
63
+
64
+ // Wrap your store creator with semanticMiddleware
65
+ export const useAppStore = create<AppState>(
66
+ semanticMiddleware(
67
+ engine,
68
+ // Map each state transition to a semantic string — return null to skip
69
+ (next, prev) => {
70
+ if (next.page !== prev.page) return `user navigated to ${next.page}`;
71
+ if (next.cartItems > prev.cartItems) return 'user added item to cart';
72
+ return null;
73
+ },
74
+ (set) => ({
75
+ page: 'home',
76
+ cartItems: 0,
77
+ setPage: (page) => set({ page }),
78
+ addToCart: () => set((s) => ({ cartItems: s.cartItems + 1 })),
79
+ }),
80
+ ),
81
+ );
82
+ ```
83
+
84
+ ---
85
+
86
+ ## Tuning the Math: The EMA α (Decay) Weight
87
+
88
+ The `alpha` parameter controls how quickly new events override the session history.
89
+
90
+ | α value | Behavior | Best for |
91
+ |---------|-----------|----------|
92
+ | `0.1` | **Slow drift, highly stable.** Requires many consistent events to shift the state. Past context dominates. | Long-running sessions, background intent tracking |
93
+ | `0.5` | **Balanced.** Responds well to recent events while still remembering session history. | General-purpose apps, e-commerce, dashboards |
94
+ | `0.9` | **Highly reactive.** Almost instantly forgets past context in favour of the latest event. | Real-time chat, game UIs, live coding tools |
95
+
96
+ The EMA formula applied on every `engine.update(text)` call:
97
+
98
+ ```
99
+ S_t = α · E_t + (1 − α) · S_{t−1}
100
+ ```
101
+
102
+ Where `E_t` is the embedding of the incoming event and `S_{t−1}` is the previous state vector.
103
+
104
+ ---
105
+
106
+ ## The Drift Callback
107
+
108
+ The `onDriftDetected` callback fires **before** EMA fusion is applied, giving you a chance to react to a sharp semantic shift — e.g. a user suddenly switching from "browsing products" to "requesting a refund".
109
+
110
+ ### With `SemanticStateEngine` directly
111
+
112
+ ```typescript
113
+ const engine = new SemanticStateEngine({
114
+ provider: workerManager,
115
+ alpha: 0.5,
116
+ driftThreshold: 0.75,
117
+ onDriftDetected: (vector, driftScore) => {
118
+ // driftScore = 1 − cosine_similarity ∈ [0, 2]
119
+ if (driftScore > 0.8) {
120
+ showModal('We noticed your focus shifted. Can we help?');
121
+ }
122
+ },
123
+ });
124
+ ```
125
+
126
+ ### With the React `useSemanticState` Hook
127
+
128
+ ```tsx
129
+ import { useSemanticState } from 'semantic-state-estimator/react';
130
+ import { useEffect, useState } from 'react';
131
+
132
+ function SemanticStatusBanner({ engine }) {
133
+ const [showDriftModal, setShowDriftModal] = useState(false);
134
+ const snapshot = useSemanticState(engine); // re-renders on every engine.update()
135
+
136
+ useEffect(() => {
137
+ if (snapshot.semanticSummary === 'volatile') {
138
+ setShowDriftModal(true);
139
+ }
140
+ }, [snapshot.semanticSummary]);
141
+
142
+ return (
143
+ <>
144
+ <div>Health: {(snapshot.healthScore * 100).toFixed(0)}%</div>
145
+ <div>State: {snapshot.semanticSummary}</div>
146
+ {showDriftModal && (
147
+ <Modal onClose={() => setShowDriftModal(false)}>
148
+ Your session intent has shifted significantly. Need help?
149
+ </Modal>
150
+ )}
151
+ </>
152
+ );
153
+ }
154
+ ```
155
+
156
+ ---
157
+
158
+ ## API Reference
159
+
160
+ ### `new WorkerManager(workerUrl?, modelName?)`
161
+
162
+ | Parameter | Type | Default | Description |
163
+ |-----------|------|---------|-------------|
164
+ | `workerUrl` | `string \| URL` | `new URL('./embedding.worker.js', import.meta.url)` | Location of the compiled worker file |
165
+ | `modelName` | `string` | `"Xenova/all-MiniLM-L6-v2"` | HuggingFace model for text embeddings |
166
+
167
+ ### `new SemanticStateEngine(config)`
168
+
169
+ | Option | Type | Default | Description |
170
+ |--------|------|---------|-------------|
171
+ | `provider` | `EmbeddingProvider` | *(required)* | Provides async embedding vectors. `WorkerManager` satisfies this interface out of the box; you can also pass a custom OpenAI, Ollama, or any other wrapper. |
172
+ | `alpha` | `number` | — | EMA decay factor α ∈ (0, 1] |
173
+ | `driftThreshold` | `number` | — | Cosine similarity below which drift fires |
174
+ | `onDriftDetected` | `(vector, driftScore) => void` | `undefined` | Callback on semantic drift |
175
+ | `modelName` | `string` | `"Xenova/all-MiniLM-L6-v2"` | Model name (informational) |
176
+
177
+ ### `engine.getSnapshot()` → `Snapshot`
178
+
179
+ ```typescript
180
+ {
181
+ vector: number[]; // Current EMA state vector
182
+ healthScore: number; // Reliability [0, 1] — degrades with age and drift
183
+ timestamp: number; // Unix ms of last update
184
+ semanticSummary: string; // "stable" | "drifting" | "volatile"
185
+ }
186
+ ```
187
+
188
+ ---
189
+
190
+ ## Custom Embedding Providers
191
+
192
+ The `SemanticStateEngine` accepts any object that implements the `EmbeddingProvider` interface:
193
+
194
+ ```typescript
195
+ import type { EmbeddingProvider } from 'semantic-state-estimator';
196
+
197
+ interface EmbeddingProvider {
198
+ getEmbedding(text: string): Promise<Float32Array | number[]>;
199
+ }
200
+ ```
201
+
202
+ `WorkerManager` satisfies this interface automatically, so existing code continues to work. You can also write a thin wrapper to use any other embedding source:
203
+
204
+ ### OpenAI Provider
205
+
206
+ ```typescript
207
+ import type { EmbeddingProvider } from 'semantic-state-estimator';
208
+
209
+ class OpenAIProvider implements EmbeddingProvider {
210
+ constructor(private apiKey: string, private model = "text-embedding-3-small") {}
211
+
212
+ async getEmbedding(text: string): Promise<number[]> {
213
+ const res = await fetch("https://api.openai.com/v1/embeddings", {
214
+ method: "POST",
215
+ headers: {
216
+ "Content-Type": "application/json",
217
+ "Authorization": `Bearer ${this.apiKey}`
218
+ },
219
+ body: JSON.stringify({ input: text, model: this.model })
220
+ });
221
+ const data = await res.json();
222
+ return data.data[0].embedding; // 1536-dimension array
223
+ }
224
+ }
225
+
226
+ const engine = new SemanticStateEngine({
227
+ alpha: 0.5,
228
+ driftThreshold: 0.75,
229
+ provider: new OpenAIProvider("sk-..."),
230
+ });
231
+ ```
232
+
233
+ ### Ollama Provider
234
+
235
+ ```typescript
236
+ import type { EmbeddingProvider } from 'semantic-state-estimator';
237
+
238
+ class OllamaProvider implements EmbeddingProvider {
239
+ constructor(private model = "nomic-embed-text", private url = "http://localhost:11434") {}
240
+
241
+ async getEmbedding(text: string): Promise<number[]> {
242
+ const res = await fetch(`${this.url}/api/embeddings`, {
243
+ method: "POST",
244
+ headers: { "Content-Type": "application/json" },
245
+ body: JSON.stringify({ model: this.model, prompt: text })
246
+ });
247
+ const data = await res.json();
248
+ return data.embedding; // 768-dimension array
249
+ }
250
+ }
251
+
252
+ const engine = new SemanticStateEngine({
253
+ alpha: 0.5,
254
+ driftThreshold: 0.75,
255
+ provider: new OllamaProvider(),
256
+ });
257
+ ```
258
+
259
+ > ⚠️ **Frontend / high-frequency usage warning:** The built-in `WorkerManager` runs inference locally in the browser in ~20–50 ms. If you replace it with a remote provider such as `OpenAIProvider`, every `engine.update()` call incurs a 300 ms–800 ms network round-trip. When used with `semanticMiddleware` on rapid UI state changes, requests will queue up and you may hit API rate limits quickly. Remote providers are best suited for server-side or local-desktop applications where update frequency is low.
260
+
261
+ ---
262
+
263
+ ## License
264
+
265
+ MIT
@@ -0,0 +1,108 @@
1
+ /**
2
+ * A generic embedding provider contract.
3
+ * Any object that can return an embedding vector for a given text satisfies this interface.
4
+ * This includes `WorkerManager` (on-device WebWorker) as well as custom OpenAI, Ollama,
5
+ * or other remote-inference wrappers.
6
+ */
7
+ interface EmbeddingProvider {
8
+ getEmbedding(text: string): Promise<Float32Array | number[]>;
9
+ }
10
+ /**
11
+ * Configuration for the SemanticStateEngine.
12
+ */
13
+ interface SemanticStateEngineConfig {
14
+ /** EMA decay factor α ∈ (0, 1]. Higher values weight recent embeddings more. */
15
+ alpha: number;
16
+ /** Minimum cosine similarity below which drift is detected and the callback fires. */
17
+ driftThreshold: number;
18
+ /**
19
+ * Optional callback invoked when the incoming embedding drifts beyond the threshold.
20
+ * Fired *before* the EMA fusion is applied.
21
+ *
22
+ * @param vector The incoming embedding that triggered the drift.
23
+ * @param driftScore Drift magnitude: 1 − cosine_similarity ∈ [0, 2].
24
+ */
25
+ onDriftDetected?: (vector: number[], driftScore: number) => void;
26
+ /**
27
+ * The embedding provider used to obtain embedding vectors asynchronously.
28
+ * Any object implementing `getEmbedding(text: string): Promise<Float32Array | number[]>`
29
+ * satisfies this interface — including `WorkerManager`, or a custom OpenAI / Ollama wrapper.
30
+ */
31
+ provider: EmbeddingProvider;
32
+ /**
33
+ * The name of the embedding model to use.
34
+ * Must match the modelName passed to the WorkerManager so the worker
35
+ * loads the correct model.
36
+ * @default "Xenova/all-MiniLM-L6-v2"
37
+ */
38
+ modelName?: string;
39
+ }
40
+ /**
41
+ * A point-in-time snapshot of the current semantic state.
42
+ */
43
+ interface Snapshot {
44
+ /** The current EMA state vector. */
45
+ vector: number[];
46
+ /** Reliability indicator in [0, 1]. Degrades with age and high drift. */
47
+ healthScore: number;
48
+ /** Unix timestamp (ms) of the last state update. */
49
+ timestamp: number;
50
+ /** Human-readable description of the current state quality. */
51
+ semanticSummary: string;
52
+ }
53
+ /**
54
+ * SemanticStateEngine tracks the implicit semantic intent of an event stream
55
+ * using Exponential Moving Average (EMA) vector fusion.
56
+ *
57
+ * It fires an optional drift callback when incoming embeddings diverge
58
+ * significantly from the current state, and exposes a healthScore that
59
+ * degrades with both age and volatility.
60
+ */
61
+ declare class SemanticStateEngine {
62
+ private readonly alpha;
63
+ private readonly driftThreshold;
64
+ private readonly onDriftDetected?;
65
+ private readonly provider;
66
+ readonly modelName: string;
67
+ private stateVector;
68
+ private lastUpdatedAt;
69
+ private lastDrift;
70
+ private updateCount;
71
+ private readonly listeners;
72
+ constructor(config: SemanticStateEngineConfig);
73
+ /**
74
+ * Obtains an embedding for `text` from the WorkerManager and fuses it into
75
+ * the rolling semantic state using EMA.
76
+ *
77
+ * On the first call the embedding establishes the baseline.
78
+ * On subsequent calls, if the cosine similarity between the current state
79
+ * and the new embedding falls below {@link SemanticStateEngineConfig.driftThreshold},
80
+ * the {@link SemanticStateEngineConfig.onDriftDetected} callback is fired
81
+ * *before* the EMA fusion is applied.
82
+ *
83
+ * @param text Raw text whose embedding will be fused into the state.
84
+ */
85
+ update(text: string): Promise<void>;
86
+ /**
87
+ * Subscribes to state changes. Returns an unsubscribe function.
88
+ * The listener is called after every successful `update`.
89
+ */
90
+ subscribe(listener: () => void): () => void;
91
+ /**
92
+ * Returns a point-in-time snapshot of the current semantic state.
93
+ */
94
+ getSnapshot(): Snapshot;
95
+ /**
96
+ * Computes the current healthScore.
97
+ *
98
+ * Starts at 1.0 and subtracts:
99
+ * - An age penalty proportional to milliseconds elapsed since the last update.
100
+ * - A drift penalty proportional to the most recent drift magnitude.
101
+ *
102
+ * The result is clamped to [0, 1].
103
+ */
104
+ private calculateHealth;
105
+ private buildSummary;
106
+ }
107
+
108
+ export { type EmbeddingProvider as E, SemanticStateEngine as S, type SemanticStateEngineConfig as a, type Snapshot as b };
@@ -0,0 +1,108 @@
1
+ /**
2
+ * A generic embedding provider contract.
3
+ * Any object that can return an embedding vector for a given text satisfies this interface.
4
+ * This includes `WorkerManager` (on-device WebWorker) as well as custom OpenAI, Ollama,
5
+ * or other remote-inference wrappers.
6
+ */
7
+ interface EmbeddingProvider {
8
+ getEmbedding(text: string): Promise<Float32Array | number[]>;
9
+ }
10
+ /**
11
+ * Configuration for the SemanticStateEngine.
12
+ */
13
+ interface SemanticStateEngineConfig {
14
+ /** EMA decay factor α ∈ (0, 1]. Higher values weight recent embeddings more. */
15
+ alpha: number;
16
+ /** Minimum cosine similarity below which drift is detected and the callback fires. */
17
+ driftThreshold: number;
18
+ /**
19
+ * Optional callback invoked when the incoming embedding drifts beyond the threshold.
20
+ * Fired *before* the EMA fusion is applied.
21
+ *
22
+ * @param vector The incoming embedding that triggered the drift.
23
+ * @param driftScore Drift magnitude: 1 − cosine_similarity ∈ [0, 2].
24
+ */
25
+ onDriftDetected?: (vector: number[], driftScore: number) => void;
26
+ /**
27
+ * The embedding provider used to obtain embedding vectors asynchronously.
28
+ * Any object implementing `getEmbedding(text: string): Promise<Float32Array | number[]>`
29
+ * satisfies this interface — including `WorkerManager`, or a custom OpenAI / Ollama wrapper.
30
+ */
31
+ provider: EmbeddingProvider;
32
+ /**
33
+ * The name of the embedding model to use.
34
+ * Must match the modelName passed to the WorkerManager so the worker
35
+ * loads the correct model.
36
+ * @default "Xenova/all-MiniLM-L6-v2"
37
+ */
38
+ modelName?: string;
39
+ }
40
+ /**
41
+ * A point-in-time snapshot of the current semantic state.
42
+ */
43
+ interface Snapshot {
44
+ /** The current EMA state vector. */
45
+ vector: number[];
46
+ /** Reliability indicator in [0, 1]. Degrades with age and high drift. */
47
+ healthScore: number;
48
+ /** Unix timestamp (ms) of the last state update. */
49
+ timestamp: number;
50
+ /** Human-readable description of the current state quality. */
51
+ semanticSummary: string;
52
+ }
53
+ /**
54
+ * SemanticStateEngine tracks the implicit semantic intent of an event stream
55
+ * using Exponential Moving Average (EMA) vector fusion.
56
+ *
57
+ * It fires an optional drift callback when incoming embeddings diverge
58
+ * significantly from the current state, and exposes a healthScore that
59
+ * degrades with both age and volatility.
60
+ */
61
+ declare class SemanticStateEngine {
62
+ private readonly alpha;
63
+ private readonly driftThreshold;
64
+ private readonly onDriftDetected?;
65
+ private readonly provider;
66
+ readonly modelName: string;
67
+ private stateVector;
68
+ private lastUpdatedAt;
69
+ private lastDrift;
70
+ private updateCount;
71
+ private readonly listeners;
72
+ constructor(config: SemanticStateEngineConfig);
73
+ /**
74
+ * Obtains an embedding for `text` from the WorkerManager and fuses it into
75
+ * the rolling semantic state using EMA.
76
+ *
77
+ * On the first call the embedding establishes the baseline.
78
+ * On subsequent calls, if the cosine similarity between the current state
79
+ * and the new embedding falls below {@link SemanticStateEngineConfig.driftThreshold},
80
+ * the {@link SemanticStateEngineConfig.onDriftDetected} callback is fired
81
+ * *before* the EMA fusion is applied.
82
+ *
83
+ * @param text Raw text whose embedding will be fused into the state.
84
+ */
85
+ update(text: string): Promise<void>;
86
+ /**
87
+ * Subscribes to state changes. Returns an unsubscribe function.
88
+ * The listener is called after every successful `update`.
89
+ */
90
+ subscribe(listener: () => void): () => void;
91
+ /**
92
+ * Returns a point-in-time snapshot of the current semantic state.
93
+ */
94
+ getSnapshot(): Snapshot;
95
+ /**
96
+ * Computes the current healthScore.
97
+ *
98
+ * Starts at 1.0 and subtracts:
99
+ * - An age penalty proportional to milliseconds elapsed since the last update.
100
+ * - A drift penalty proportional to the most recent drift magnitude.
101
+ *
102
+ * The result is clamped to [0, 1].
103
+ */
104
+ private calculateHealth;
105
+ private buildSummary;
106
+ }
107
+
108
+ export { type EmbeddingProvider as E, SemanticStateEngine as S, type SemanticStateEngineConfig as a, type Snapshot as b };
@@ -0,0 +1,92 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/worker/embedding.worker.ts
21
+ var embedding_worker_exports = {};
22
+ __export(embedding_worker_exports, {
23
+ PipelineSingleton: () => PipelineSingleton,
24
+ getModelName: () => getModelName,
25
+ handleInitMessage: () => handleInitMessage,
26
+ handleMessage: () => handleMessage
27
+ });
28
+ module.exports = __toCommonJS(embedding_worker_exports);
29
+ var import_transformers = require("@huggingface/transformers");
30
+ import_transformers.env.allowLocalModels = false;
31
+ var PipelineSingleton = class {
32
+ static async getInstance(modelName, progressCallback) {
33
+ if (this.instance === null || this.modelName !== modelName) {
34
+ this.modelName = modelName;
35
+ this.instance = (0, import_transformers.pipeline)("feature-extraction", modelName, {
36
+ dtype: "q8",
37
+ progress_callback: progressCallback
38
+ });
39
+ }
40
+ return this.instance;
41
+ }
42
+ };
43
+ PipelineSingleton.instance = null;
44
+ PipelineSingleton.modelName = null;
45
+ var currentModelName = "Xenova/all-MiniLM-L6-v2";
46
+ function getModelName() {
47
+ return currentModelName;
48
+ }
49
+ async function handleInitMessage(event) {
50
+ currentModelName = event.data.modelName;
51
+ self.postMessage({ type: "STATUS", status: "loading" });
52
+ try {
53
+ await PipelineSingleton.getInstance(currentModelName, (data) => {
54
+ if (data.status === "progress" && data.file && data.progress !== void 0) {
55
+ self.postMessage({ type: "PROGRESS", file: data.file, progress: data.progress });
56
+ }
57
+ });
58
+ self.postMessage({ type: "STATUS", status: "ready" });
59
+ } catch (err) {
60
+ const error = err instanceof Error ? err.message : String(err);
61
+ self.postMessage({ type: "STATUS", status: "failed", error });
62
+ }
63
+ }
64
+ async function handleMessage(event) {
65
+ const { id, text } = event.data;
66
+ try {
67
+ const extractor = await PipelineSingleton.getInstance(currentModelName);
68
+ const output = await extractor(text, { pooling: "mean", normalize: true });
69
+ const response = { type: "EMBED_RES", id, vector: output.data };
70
+ self.postMessage(response);
71
+ } catch (err) {
72
+ const error = err instanceof Error ? err.message : String(err);
73
+ const response = { type: "EMBED_RES", id, vector: null, error };
74
+ self.postMessage(response);
75
+ }
76
+ }
77
+ self.addEventListener("message", (event) => {
78
+ const msg = event.data;
79
+ if (msg.type === "INIT") {
80
+ handleInitMessage(event);
81
+ } else {
82
+ handleMessage(event);
83
+ }
84
+ });
85
+ // Annotate the CommonJS export names for ESM import in node:
86
+ 0 && (module.exports = {
87
+ PipelineSingleton,
88
+ getModelName,
89
+ handleInitMessage,
90
+ handleMessage
91
+ });
92
+ //# sourceMappingURL=embedding.worker.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/worker/embedding.worker.ts"],"sourcesContent":["import { pipeline, env } from \"@huggingface/transformers\";\nimport type { EmbeddingRequest, EmbeddingResponse, WorkerIncomingMessage, WorkerInitMessage } from \"./types.js\";\n\n// Disable local models; always load from the HuggingFace Hub.\nenv.allowLocalModels = false;\n\nexport class PipelineSingleton {\n static instance: Promise<any> | null = null;\n static modelName: string | null = null;\n\n static async getInstance(modelName: string, progressCallback?: (data: any) => void) {\n if (this.instance === null || this.modelName !== modelName) {\n this.modelName = modelName;\n this.instance = pipeline('feature-extraction', modelName, {\n dtype: 'q8',\n progress_callback: progressCallback,\n });\n }\n return this.instance;\n }\n}\n\nlet currentModelName: string = \"Xenova/all-MiniLM-L6-v2\";\n\n/** Returns the model name currently configured in the worker. */\nexport function getModelName(): string {\n return currentModelName;\n}\n\n/**\n * Handles an INIT message: saves the model name and starts loading the pipeline,\n * broadcasting STATUS events so the main thread can track the model lifecycle.\n */\nexport async function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void> {\n currentModelName = event.data.modelName;\n self.postMessage({ type: 'STATUS', status: 'loading' });\n try {\n await PipelineSingleton.getInstance(currentModelName, (data: any) => {\n if (data.status === 'progress' && data.file && data.progress !== undefined) {\n self.postMessage({ type: 'PROGRESS', file: data.file, progress: data.progress });\n }\n });\n self.postMessage({ type: 'STATUS', status: 'ready' });\n } catch (err) {\n const error = err instanceof Error ? err.message : String(err);\n self.postMessage({ type: 'STATUS', status: 'failed', error });\n }\n}\n\n/**\n * Handles an EMBED message: runs the text through the pipeline and posts back\n * the resulting normalized 1D Float32Array.\n */\nexport async function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void> {\n const { id, text } = event.data;\n try {\n const extractor = await PipelineSingleton.getInstance(currentModelName);\n const output = await extractor(text, { pooling: 'mean', normalize: true });\n const response: EmbeddingResponse = { type: \"EMBED_RES\", id, vector: output.data as Float32Array };\n self.postMessage(response);\n } catch (err) {\n const error = err instanceof Error ? err.message : String(err);\n const response: EmbeddingResponse = { type: \"EMBED_RES\", id, vector: null, error };\n self.postMessage(response);\n }\n}\n\nself.addEventListener(\"message\", (event: Event) => {\n const msg = (event as MessageEvent<WorkerIncomingMessage>).data;\n if (msg.type === \"INIT\") {\n handleInitMessage(event as MessageEvent<WorkerInitMessage>);\n } else {\n handleMessage(event as MessageEvent<EmbeddingRequest>);\n }\n});\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,0BAA8B;AAI9B,wBAAI,mBAAmB;AAEhB,IAAM,oBAAN,MAAwB;AAAA,EAI7B,aAAa,YAAY,WAAmB,kBAAwC;AAClF,QAAI,KAAK,aAAa,QAAQ,KAAK,cAAc,WAAW;AAC1D,WAAK,YAAY;AACjB,WAAK,eAAW,8BAAS,sBAAsB,WAAW;AAAA,QACxD,OAAO;AAAA,QACP,mBAAmB;AAAA,MACrB,CAAC;AAAA,IACH;AACA,WAAO,KAAK;AAAA,EACd;AACF;AAda,kBACJ,WAAgC;AAD5B,kBAEJ,YAA2B;AAcpC,IAAI,mBAA2B;AAGxB,SAAS,eAAuB;AACrC,SAAO;AACT;AAMA,eAAsB,kBAAkB,OAAuD;AAC7F,qBAAmB,MAAM,KAAK;AAC9B,OAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,UAAU,CAAC;AACtD,MAAI;AACF,UAAM,kBAAkB,YAAY,kBAAkB,CAAC,SAAc;AACnE,UAAI,KAAK,WAAW,cAAc,KAAK,QAAQ,KAAK,aAAa,QAAW;AAC1E,aAAK,YAAY,EAAE,MAAM,YAAY,MAAM,KAAK,MAAM,UAAU,KAAK,SAAS,CAAC;AAAA,MACjF;AAAA,IACF,CAAC;AACD,SAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,QAAQ,CAAC;AAAA,EACtD,SAAS,KAAK;AACZ,UAAM,QAAQ,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC7D,SAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,UAAU,MAAM,CAAC;AAAA,EAC9D;AACF;AAMA,eAAsB,cAAc,OAAsD;AACxF,QAAM,EAAE,IAAI,KAAK,IAAI,MAAM;AAC3B,MAAI;AACF,UAAM,YAAY,MAAM,kBAAkB,YAAY,gBAAgB;AACtE,UAAM,SAAS,MAAM,UAAU,MAAM,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AACzE,UAAM,WAA8B,EAAE,MAAM,aAAa,IAAI,QAAQ,OAAO,KAAqB;AACjG,SAAK,YAAY,QAAQ;AAAA,EAC3B,SAAS,KAAK;AACZ,UAAM,QAAQ,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC7D,UAAM,WAA8B,EAAE,MAAM,aAAa,IAAI,QAAQ,MAAM,MAAM;AACjF,SAAK,YAAY,QAAQ;AAAA,EAC3B;AACF;AAEA,KAAK,iBAAiB,WAAW,CAAC,UAAiB;AACjD,QAAM,MAAO,MAA8C;AAC3D,MAAI,IAAI,SAAS,QAAQ;AACvB,sBAAkB,KAAwC;AAAA,EAC5D,OAAO;AACL,kBAAc,KAAuC;AAAA,EACvD;AACF,CAAC;","names":[]}
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Message contract between the Main Thread and the Worker Thread.
3
+ * Every message carries an `id` so responses can be mapped back to their originating Promises.
4
+ */
5
+ /** An initialization message sent from the main thread to configure the worker. */
6
+ interface WorkerInitMessage {
7
+ type: "INIT";
8
+ /** The name of the embedding model the worker should load. */
9
+ modelName: string;
10
+ }
11
+ /** A request sent from the main thread to the embedding worker. */
12
+ interface EmbeddingRequest {
13
+ type: "EMBED";
14
+ /** UUID that uniquely identifies this request. */
15
+ id: string;
16
+ /** The raw text to embed. */
17
+ text: string;
18
+ }
19
+
20
+ declare class PipelineSingleton {
21
+ static instance: Promise<any> | null;
22
+ static modelName: string | null;
23
+ static getInstance(modelName: string, progressCallback?: (data: any) => void): Promise<any>;
24
+ }
25
+ /** Returns the model name currently configured in the worker. */
26
+ declare function getModelName(): string;
27
+ /**
28
+ * Handles an INIT message: saves the model name and starts loading the pipeline,
29
+ * broadcasting STATUS events so the main thread can track the model lifecycle.
30
+ */
31
+ declare function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void>;
32
+ /**
33
+ * Handles an EMBED message: runs the text through the pipeline and posts back
34
+ * the resulting normalized 1D Float32Array.
35
+ */
36
+ declare function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void>;
37
+
38
+ export { PipelineSingleton, getModelName, handleInitMessage, handleMessage };
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Message contract between the Main Thread and the Worker Thread.
3
+ * Every message carries an `id` so responses can be mapped back to their originating Promises.
4
+ */
5
+ /** An initialization message sent from the main thread to configure the worker. */
6
+ interface WorkerInitMessage {
7
+ type: "INIT";
8
+ /** The name of the embedding model the worker should load. */
9
+ modelName: string;
10
+ }
11
+ /** A request sent from the main thread to the embedding worker. */
12
+ interface EmbeddingRequest {
13
+ type: "EMBED";
14
+ /** UUID that uniquely identifies this request. */
15
+ id: string;
16
+ /** The raw text to embed. */
17
+ text: string;
18
+ }
19
+
20
+ declare class PipelineSingleton {
21
+ static instance: Promise<any> | null;
22
+ static modelName: string | null;
23
+ static getInstance(modelName: string, progressCallback?: (data: any) => void): Promise<any>;
24
+ }
25
+ /** Returns the model name currently configured in the worker. */
26
+ declare function getModelName(): string;
27
+ /**
28
+ * Handles an INIT message: saves the model name and starts loading the pipeline,
29
+ * broadcasting STATUS events so the main thread can track the model lifecycle.
30
+ */
31
+ declare function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void>;
32
+ /**
33
+ * Handles an EMBED message: runs the text through the pipeline and posts back
34
+ * the resulting normalized 1D Float32Array.
35
+ */
36
+ declare function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void>;
37
+
38
+ export { PipelineSingleton, getModelName, handleInitMessage, handleMessage };