semantic-state-estimator 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +265 -0
- package/dist/SemanticStateEngine-BP5URJOJ.d.cts +108 -0
- package/dist/SemanticStateEngine-BP5URJOJ.d.ts +108 -0
- package/dist/embedding.worker.cjs +92 -0
- package/dist/embedding.worker.cjs.map +1 -0
- package/dist/embedding.worker.d.cts +38 -0
- package/dist/embedding.worker.d.ts +38 -0
- package/dist/embedding.worker.js +64 -0
- package/dist/embedding.worker.js.map +1 -0
- package/dist/index.cjs +236 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +78 -0
- package/dist/index.d.ts +78 -0
- package/dist/index.js +202 -0
- package/dist/index.js.map +1 -0
- package/dist/react.cjs +39 -0
- package/dist/react.cjs.map +1 -0
- package/dist/react.d.cts +13 -0
- package/dist/react.d.ts +13 -0
- package/dist/react.js +12 -0
- package/dist/react.js.map +1 -0
- package/dist/zustand.cjs +43 -0
- package/dist/zustand.cjs.map +1 -0
- package/dist/zustand.d.cts +21 -0
- package/dist/zustand.d.ts +21 -0
- package/dist/zustand.js +18 -0
- package/dist/zustand.js.map +1 -0
- package/package.json +93 -0
package/README.md
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
# semantic-state-estimator
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/semantic-state-estimator)
|
|
4
|
+
[](LICENSE)
|
|
5
|
+
[](https://nodejs.org)
|
|
6
|
+
|
|
7
|
+
**Bridge the gap between boolean UI state and semantic AI intent — all inside a WebWorker, on-device, zero-latency.**
|
|
8
|
+
Instead of asking *"did the user click?"*, this library asks *"what does the user **mean**?"* — fusing local text embeddings with Exponential Moving Average (EMA) to build a living, drifting semantic context of your entire session.
|
|
9
|
+
|
|
10
|
+
## Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
npm install semantic-state-estimator
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
> **Peer dependencies:** `react >=18` and/or `zustand >=4` are optional — only install what you need.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### 1. Initialize the `SemanticStateEngine`
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { WorkerManager, SemanticStateEngine } from 'semantic-state-estimator';
|
|
26
|
+
|
|
27
|
+
// The worker uses import.meta.url so Webpack 5 and Vite resolve the path correctly.
|
|
28
|
+
const workerManager = new WorkerManager();
|
|
29
|
+
|
|
30
|
+
const engine = new SemanticStateEngine({
|
|
31
|
+
provider: workerManager,
|
|
32
|
+
alpha: 0.5, // Balanced EMA decay — see "Tuning the Math" below
|
|
33
|
+
driftThreshold: 0.75, // Fire onDriftDetected when similarity drops below this
|
|
34
|
+
onDriftDetected: (vector, driftScore) => {
|
|
35
|
+
console.log(`Semantic drift detected! Score: ${driftScore.toFixed(3)}`);
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Feed events into the engine — it runs inside the WebWorker, never blocks your UI
|
|
40
|
+
await engine.update('user opened the billing settings');
|
|
41
|
+
await engine.update('user clicked "cancel subscription"');
|
|
42
|
+
|
|
43
|
+
const snapshot = engine.getSnapshot();
|
|
44
|
+
console.log(snapshot.semanticSummary); // "stable" | "drifting" | "volatile"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### 2. Wrap a Zustand Store
|
|
48
|
+
|
|
49
|
+
```typescript
|
|
50
|
+
import { create } from 'zustand';
|
|
51
|
+
import { semanticMiddleware } from 'semantic-state-estimator/zustand';
|
|
52
|
+
import { WorkerManager, SemanticStateEngine } from 'semantic-state-estimator';
|
|
53
|
+
|
|
54
|
+
type AppState = {
|
|
55
|
+
page: string;
|
|
56
|
+
cartItems: number;
|
|
57
|
+
setPage: (page: string) => void;
|
|
58
|
+
addToCart: () => void;
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
const workerManager = new WorkerManager();
|
|
62
|
+
const engine = new SemanticStateEngine({ provider: workerManager, alpha: 0.5, driftThreshold: 0.75 });
|
|
63
|
+
|
|
64
|
+
// Wrap your store creator with semanticMiddleware
|
|
65
|
+
export const useAppStore = create<AppState>(
|
|
66
|
+
semanticMiddleware(
|
|
67
|
+
engine,
|
|
68
|
+
// Map each state transition to a semantic string — return null to skip
|
|
69
|
+
(next, prev) => {
|
|
70
|
+
if (next.page !== prev.page) return `user navigated to ${next.page}`;
|
|
71
|
+
if (next.cartItems > prev.cartItems) return 'user added item to cart';
|
|
72
|
+
return null;
|
|
73
|
+
},
|
|
74
|
+
(set) => ({
|
|
75
|
+
page: 'home',
|
|
76
|
+
cartItems: 0,
|
|
77
|
+
setPage: (page) => set({ page }),
|
|
78
|
+
addToCart: () => set((s) => ({ cartItems: s.cartItems + 1 })),
|
|
79
|
+
}),
|
|
80
|
+
),
|
|
81
|
+
);
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Tuning the Math: The EMA α (Decay) Weight
|
|
87
|
+
|
|
88
|
+
The `alpha` parameter controls how quickly new events override the session history.
|
|
89
|
+
|
|
90
|
+
| α value | Behavior | Best for |
|
|
91
|
+
|---------|-----------|----------|
|
|
92
|
+
| `0.1` | **Slow drift, highly stable.** Requires many consistent events to shift the state. Past context dominates. | Long-running sessions, background intent tracking |
|
|
93
|
+
| `0.5` | **Balanced.** Responds well to recent events while still remembering session history. | General-purpose apps, e-commerce, dashboards |
|
|
94
|
+
| `0.9` | **Highly reactive.** Almost instantly forgets past context in favour of the latest event. | Real-time chat, game UIs, live coding tools |
|
|
95
|
+
|
|
96
|
+
The EMA formula applied on every `engine.update(text)` call:
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
S_t = α · E_t + (1 − α) · S_{t−1}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Where `E_t` is the embedding of the incoming event and `S_{t−1}` is the previous state vector.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## The Drift Callback
|
|
107
|
+
|
|
108
|
+
The `onDriftDetected` callback fires **before** EMA fusion is applied, giving you a chance to react to a sharp semantic shift — e.g. a user suddenly switching from "browsing products" to "requesting a refund".
|
|
109
|
+
|
|
110
|
+
### With `SemanticStateEngine` directly
|
|
111
|
+
|
|
112
|
+
```typescript
|
|
113
|
+
const engine = new SemanticStateEngine({
|
|
114
|
+
provider: workerManager,
|
|
115
|
+
alpha: 0.5,
|
|
116
|
+
driftThreshold: 0.75,
|
|
117
|
+
onDriftDetected: (vector, driftScore) => {
|
|
118
|
+
// driftScore = 1 − cosine_similarity ∈ [0, 2]
|
|
119
|
+
if (driftScore > 0.8) {
|
|
120
|
+
showModal('We noticed your focus shifted. Can we help?');
|
|
121
|
+
}
|
|
122
|
+
},
|
|
123
|
+
});
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### With the React `useSemanticState` Hook
|
|
127
|
+
|
|
128
|
+
```tsx
|
|
129
|
+
import { useSemanticState } from 'semantic-state-estimator/react';
|
|
130
|
+
import { useEffect, useState } from 'react';
|
|
131
|
+
|
|
132
|
+
function SemanticStatusBanner({ engine }) {
|
|
133
|
+
const [showDriftModal, setShowDriftModal] = useState(false);
|
|
134
|
+
const snapshot = useSemanticState(engine); // re-renders on every engine.update()
|
|
135
|
+
|
|
136
|
+
useEffect(() => {
|
|
137
|
+
if (snapshot.semanticSummary === 'volatile') {
|
|
138
|
+
setShowDriftModal(true);
|
|
139
|
+
}
|
|
140
|
+
}, [snapshot.semanticSummary]);
|
|
141
|
+
|
|
142
|
+
return (
|
|
143
|
+
<>
|
|
144
|
+
<div>Health: {(snapshot.healthScore * 100).toFixed(0)}%</div>
|
|
145
|
+
<div>State: {snapshot.semanticSummary}</div>
|
|
146
|
+
{showDriftModal && (
|
|
147
|
+
<Modal onClose={() => setShowDriftModal(false)}>
|
|
148
|
+
Your session intent has shifted significantly. Need help?
|
|
149
|
+
</Modal>
|
|
150
|
+
)}
|
|
151
|
+
</>
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## API Reference
|
|
159
|
+
|
|
160
|
+
### `new WorkerManager(workerUrl?, modelName?)`
|
|
161
|
+
|
|
162
|
+
| Parameter | Type | Default | Description |
|
|
163
|
+
|-----------|------|---------|-------------|
|
|
164
|
+
| `workerUrl` | `string \| URL` | `new URL('./embedding.worker.js', import.meta.url)` | Location of the compiled worker file |
|
|
165
|
+
| `modelName` | `string` | `"Xenova/all-MiniLM-L6-v2"` | HuggingFace model for text embeddings |
|
|
166
|
+
|
|
167
|
+
### `new SemanticStateEngine(config)`
|
|
168
|
+
|
|
169
|
+
| Option | Type | Default | Description |
|
|
170
|
+
|--------|------|---------|-------------|
|
|
171
|
+
| `provider` | `EmbeddingProvider` | *(required)* | Provides async embedding vectors. `WorkerManager` satisfies this interface out of the box; you can also pass a custom OpenAI, Ollama, or any other wrapper. |
|
|
172
|
+
| `alpha` | `number` | — | EMA decay factor α ∈ (0, 1] |
|
|
173
|
+
| `driftThreshold` | `number` | — | Cosine similarity below which drift fires |
|
|
174
|
+
| `onDriftDetected` | `(vector, driftScore) => void` | `undefined` | Callback on semantic drift |
|
|
175
|
+
| `modelName` | `string` | `"Xenova/all-MiniLM-L6-v2"` | Model name (informational) |
|
|
176
|
+
|
|
177
|
+
### `engine.getSnapshot()` → `Snapshot`
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
{
|
|
181
|
+
vector: number[]; // Current EMA state vector
|
|
182
|
+
healthScore: number; // Reliability [0, 1] — degrades with age and drift
|
|
183
|
+
timestamp: number; // Unix ms of last update
|
|
184
|
+
semanticSummary: string; // "stable" | "drifting" | "volatile"
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Custom Embedding Providers
|
|
191
|
+
|
|
192
|
+
The `SemanticStateEngine` accepts any object that implements the `EmbeddingProvider` interface:
|
|
193
|
+
|
|
194
|
+
```typescript
|
|
195
|
+
import type { EmbeddingProvider } from 'semantic-state-estimator';
|
|
196
|
+
|
|
197
|
+
interface EmbeddingProvider {
|
|
198
|
+
getEmbedding(text: string): Promise<Float32Array | number[]>;
|
|
199
|
+
}
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
`WorkerManager` satisfies this interface automatically, so existing code continues to work. You can also write a thin wrapper to use any other embedding source:
|
|
203
|
+
|
|
204
|
+
### OpenAI Provider
|
|
205
|
+
|
|
206
|
+
```typescript
|
|
207
|
+
import type { EmbeddingProvider } from 'semantic-state-estimator';
|
|
208
|
+
|
|
209
|
+
class OpenAIProvider implements EmbeddingProvider {
|
|
210
|
+
constructor(private apiKey: string, private model = "text-embedding-3-small") {}
|
|
211
|
+
|
|
212
|
+
async getEmbedding(text: string): Promise<number[]> {
|
|
213
|
+
const res = await fetch("https://api.openai.com/v1/embeddings", {
|
|
214
|
+
method: "POST",
|
|
215
|
+
headers: {
|
|
216
|
+
"Content-Type": "application/json",
|
|
217
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
218
|
+
},
|
|
219
|
+
body: JSON.stringify({ input: text, model: this.model })
|
|
220
|
+
});
|
|
221
|
+
const data = await res.json();
|
|
222
|
+
return data.data[0].embedding; // 1536-dimension array
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const engine = new SemanticStateEngine({
|
|
227
|
+
alpha: 0.5,
|
|
228
|
+
driftThreshold: 0.75,
|
|
229
|
+
provider: new OpenAIProvider("sk-..."),
|
|
230
|
+
});
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Ollama Provider
|
|
234
|
+
|
|
235
|
+
```typescript
|
|
236
|
+
import type { EmbeddingProvider } from 'semantic-state-estimator';
|
|
237
|
+
|
|
238
|
+
class OllamaProvider implements EmbeddingProvider {
|
|
239
|
+
constructor(private model = "nomic-embed-text", private url = "http://localhost:11434") {}
|
|
240
|
+
|
|
241
|
+
async getEmbedding(text: string): Promise<number[]> {
|
|
242
|
+
const res = await fetch(`${this.url}/api/embeddings`, {
|
|
243
|
+
method: "POST",
|
|
244
|
+
headers: { "Content-Type": "application/json" },
|
|
245
|
+
body: JSON.stringify({ model: this.model, prompt: text })
|
|
246
|
+
});
|
|
247
|
+
const data = await res.json();
|
|
248
|
+
return data.embedding; // 768-dimension array
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
const engine = new SemanticStateEngine({
|
|
253
|
+
alpha: 0.5,
|
|
254
|
+
driftThreshold: 0.75,
|
|
255
|
+
provider: new OllamaProvider(),
|
|
256
|
+
});
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
> ⚠️ **Frontend / high-frequency usage warning:** The built-in `WorkerManager` runs inference locally in the browser in ~20–50 ms. If you replace it with a remote provider such as `OpenAIProvider`, every `engine.update()` call incurs a 300 ms–800 ms network round-trip. When used with `semanticMiddleware` on rapid UI state changes, requests will queue up and you may hit API rate limits quickly. Remote providers are best suited for server-side or local-desktop applications where update frequency is low.
|
|
260
|
+
|
|
261
|
+
---
|
|
262
|
+
|
|
263
|
+
## License
|
|
264
|
+
|
|
265
|
+
MIT
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A generic embedding provider contract.
|
|
3
|
+
* Any object that can return an embedding vector for a given text satisfies this interface.
|
|
4
|
+
* This includes `WorkerManager` (on-device WebWorker) as well as custom OpenAI, Ollama,
|
|
5
|
+
* or other remote-inference wrappers.
|
|
6
|
+
*/
|
|
7
|
+
interface EmbeddingProvider {
|
|
8
|
+
getEmbedding(text: string): Promise<Float32Array | number[]>;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Configuration for the SemanticStateEngine.
|
|
12
|
+
*/
|
|
13
|
+
interface SemanticStateEngineConfig {
|
|
14
|
+
/** EMA decay factor α ∈ (0, 1]. Higher values weight recent embeddings more. */
|
|
15
|
+
alpha: number;
|
|
16
|
+
/** Minimum cosine similarity below which drift is detected and the callback fires. */
|
|
17
|
+
driftThreshold: number;
|
|
18
|
+
/**
|
|
19
|
+
* Optional callback invoked when the incoming embedding drifts beyond the threshold.
|
|
20
|
+
* Fired *before* the EMA fusion is applied.
|
|
21
|
+
*
|
|
22
|
+
* @param vector The incoming embedding that triggered the drift.
|
|
23
|
+
* @param driftScore Drift magnitude: 1 − cosine_similarity ∈ [0, 2].
|
|
24
|
+
*/
|
|
25
|
+
onDriftDetected?: (vector: number[], driftScore: number) => void;
|
|
26
|
+
/**
|
|
27
|
+
* The embedding provider used to obtain embedding vectors asynchronously.
|
|
28
|
+
* Any object implementing `getEmbedding(text: string): Promise<Float32Array | number[]>`
|
|
29
|
+
* satisfies this interface — including `WorkerManager`, or a custom OpenAI / Ollama wrapper.
|
|
30
|
+
*/
|
|
31
|
+
provider: EmbeddingProvider;
|
|
32
|
+
/**
|
|
33
|
+
* The name of the embedding model to use.
|
|
34
|
+
* Must match the modelName passed to the WorkerManager so the worker
|
|
35
|
+
* loads the correct model.
|
|
36
|
+
* @default "Xenova/all-MiniLM-L6-v2"
|
|
37
|
+
*/
|
|
38
|
+
modelName?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* A point-in-time snapshot of the current semantic state.
|
|
42
|
+
*/
|
|
43
|
+
interface Snapshot {
|
|
44
|
+
/** The current EMA state vector. */
|
|
45
|
+
vector: number[];
|
|
46
|
+
/** Reliability indicator in [0, 1]. Degrades with age and high drift. */
|
|
47
|
+
healthScore: number;
|
|
48
|
+
/** Unix timestamp (ms) of the last state update. */
|
|
49
|
+
timestamp: number;
|
|
50
|
+
/** Human-readable description of the current state quality. */
|
|
51
|
+
semanticSummary: string;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* SemanticStateEngine tracks the implicit semantic intent of an event stream
|
|
55
|
+
* using Exponential Moving Average (EMA) vector fusion.
|
|
56
|
+
*
|
|
57
|
+
* It fires an optional drift callback when incoming embeddings diverge
|
|
58
|
+
* significantly from the current state, and exposes a healthScore that
|
|
59
|
+
* degrades with both age and volatility.
|
|
60
|
+
*/
|
|
61
|
+
declare class SemanticStateEngine {
|
|
62
|
+
private readonly alpha;
|
|
63
|
+
private readonly driftThreshold;
|
|
64
|
+
private readonly onDriftDetected?;
|
|
65
|
+
private readonly provider;
|
|
66
|
+
readonly modelName: string;
|
|
67
|
+
private stateVector;
|
|
68
|
+
private lastUpdatedAt;
|
|
69
|
+
private lastDrift;
|
|
70
|
+
private updateCount;
|
|
71
|
+
private readonly listeners;
|
|
72
|
+
constructor(config: SemanticStateEngineConfig);
|
|
73
|
+
/**
|
|
74
|
+
* Obtains an embedding for `text` from the WorkerManager and fuses it into
|
|
75
|
+
* the rolling semantic state using EMA.
|
|
76
|
+
*
|
|
77
|
+
* On the first call the embedding establishes the baseline.
|
|
78
|
+
* On subsequent calls, if the cosine similarity between the current state
|
|
79
|
+
* and the new embedding falls below {@link SemanticStateEngineConfig.driftThreshold},
|
|
80
|
+
* the {@link SemanticStateEngineConfig.onDriftDetected} callback is fired
|
|
81
|
+
* *before* the EMA fusion is applied.
|
|
82
|
+
*
|
|
83
|
+
* @param text Raw text whose embedding will be fused into the state.
|
|
84
|
+
*/
|
|
85
|
+
update(text: string): Promise<void>;
|
|
86
|
+
/**
|
|
87
|
+
* Subscribes to state changes. Returns an unsubscribe function.
|
|
88
|
+
* The listener is called after every successful `update`.
|
|
89
|
+
*/
|
|
90
|
+
subscribe(listener: () => void): () => void;
|
|
91
|
+
/**
|
|
92
|
+
* Returns a point-in-time snapshot of the current semantic state.
|
|
93
|
+
*/
|
|
94
|
+
getSnapshot(): Snapshot;
|
|
95
|
+
/**
|
|
96
|
+
* Computes the current healthScore.
|
|
97
|
+
*
|
|
98
|
+
* Starts at 1.0 and subtracts:
|
|
99
|
+
* - An age penalty proportional to milliseconds elapsed since the last update.
|
|
100
|
+
* - A drift penalty proportional to the most recent drift magnitude.
|
|
101
|
+
*
|
|
102
|
+
* The result is clamped to [0, 1].
|
|
103
|
+
*/
|
|
104
|
+
private calculateHealth;
|
|
105
|
+
private buildSummary;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export { type EmbeddingProvider as E, SemanticStateEngine as S, type SemanticStateEngineConfig as a, type Snapshot as b };
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A generic embedding provider contract.
|
|
3
|
+
* Any object that can return an embedding vector for a given text satisfies this interface.
|
|
4
|
+
* This includes `WorkerManager` (on-device WebWorker) as well as custom OpenAI, Ollama,
|
|
5
|
+
* or other remote-inference wrappers.
|
|
6
|
+
*/
|
|
7
|
+
interface EmbeddingProvider {
|
|
8
|
+
getEmbedding(text: string): Promise<Float32Array | number[]>;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Configuration for the SemanticStateEngine.
|
|
12
|
+
*/
|
|
13
|
+
interface SemanticStateEngineConfig {
|
|
14
|
+
/** EMA decay factor α ∈ (0, 1]. Higher values weight recent embeddings more. */
|
|
15
|
+
alpha: number;
|
|
16
|
+
/** Minimum cosine similarity below which drift is detected and the callback fires. */
|
|
17
|
+
driftThreshold: number;
|
|
18
|
+
/**
|
|
19
|
+
* Optional callback invoked when the incoming embedding drifts beyond the threshold.
|
|
20
|
+
* Fired *before* the EMA fusion is applied.
|
|
21
|
+
*
|
|
22
|
+
* @param vector The incoming embedding that triggered the drift.
|
|
23
|
+
* @param driftScore Drift magnitude: 1 − cosine_similarity ∈ [0, 2].
|
|
24
|
+
*/
|
|
25
|
+
onDriftDetected?: (vector: number[], driftScore: number) => void;
|
|
26
|
+
/**
|
|
27
|
+
* The embedding provider used to obtain embedding vectors asynchronously.
|
|
28
|
+
* Any object implementing `getEmbedding(text: string): Promise<Float32Array | number[]>`
|
|
29
|
+
* satisfies this interface — including `WorkerManager`, or a custom OpenAI / Ollama wrapper.
|
|
30
|
+
*/
|
|
31
|
+
provider: EmbeddingProvider;
|
|
32
|
+
/**
|
|
33
|
+
* The name of the embedding model to use.
|
|
34
|
+
* Must match the modelName passed to the WorkerManager so the worker
|
|
35
|
+
* loads the correct model.
|
|
36
|
+
* @default "Xenova/all-MiniLM-L6-v2"
|
|
37
|
+
*/
|
|
38
|
+
modelName?: string;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* A point-in-time snapshot of the current semantic state.
|
|
42
|
+
*/
|
|
43
|
+
interface Snapshot {
|
|
44
|
+
/** The current EMA state vector. */
|
|
45
|
+
vector: number[];
|
|
46
|
+
/** Reliability indicator in [0, 1]. Degrades with age and high drift. */
|
|
47
|
+
healthScore: number;
|
|
48
|
+
/** Unix timestamp (ms) of the last state update. */
|
|
49
|
+
timestamp: number;
|
|
50
|
+
/** Human-readable description of the current state quality. */
|
|
51
|
+
semanticSummary: string;
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* SemanticStateEngine tracks the implicit semantic intent of an event stream
|
|
55
|
+
* using Exponential Moving Average (EMA) vector fusion.
|
|
56
|
+
*
|
|
57
|
+
* It fires an optional drift callback when incoming embeddings diverge
|
|
58
|
+
* significantly from the current state, and exposes a healthScore that
|
|
59
|
+
* degrades with both age and volatility.
|
|
60
|
+
*/
|
|
61
|
+
declare class SemanticStateEngine {
|
|
62
|
+
private readonly alpha;
|
|
63
|
+
private readonly driftThreshold;
|
|
64
|
+
private readonly onDriftDetected?;
|
|
65
|
+
private readonly provider;
|
|
66
|
+
readonly modelName: string;
|
|
67
|
+
private stateVector;
|
|
68
|
+
private lastUpdatedAt;
|
|
69
|
+
private lastDrift;
|
|
70
|
+
private updateCount;
|
|
71
|
+
private readonly listeners;
|
|
72
|
+
constructor(config: SemanticStateEngineConfig);
|
|
73
|
+
/**
|
|
74
|
+
* Obtains an embedding for `text` from the WorkerManager and fuses it into
|
|
75
|
+
* the rolling semantic state using EMA.
|
|
76
|
+
*
|
|
77
|
+
* On the first call the embedding establishes the baseline.
|
|
78
|
+
* On subsequent calls, if the cosine similarity between the current state
|
|
79
|
+
* and the new embedding falls below {@link SemanticStateEngineConfig.driftThreshold},
|
|
80
|
+
* the {@link SemanticStateEngineConfig.onDriftDetected} callback is fired
|
|
81
|
+
* *before* the EMA fusion is applied.
|
|
82
|
+
*
|
|
83
|
+
* @param text Raw text whose embedding will be fused into the state.
|
|
84
|
+
*/
|
|
85
|
+
update(text: string): Promise<void>;
|
|
86
|
+
/**
|
|
87
|
+
* Subscribes to state changes. Returns an unsubscribe function.
|
|
88
|
+
* The listener is called after every successful `update`.
|
|
89
|
+
*/
|
|
90
|
+
subscribe(listener: () => void): () => void;
|
|
91
|
+
/**
|
|
92
|
+
* Returns a point-in-time snapshot of the current semantic state.
|
|
93
|
+
*/
|
|
94
|
+
getSnapshot(): Snapshot;
|
|
95
|
+
/**
|
|
96
|
+
* Computes the current healthScore.
|
|
97
|
+
*
|
|
98
|
+
* Starts at 1.0 and subtracts:
|
|
99
|
+
* - An age penalty proportional to milliseconds elapsed since the last update.
|
|
100
|
+
* - A drift penalty proportional to the most recent drift magnitude.
|
|
101
|
+
*
|
|
102
|
+
* The result is clamped to [0, 1].
|
|
103
|
+
*/
|
|
104
|
+
private calculateHealth;
|
|
105
|
+
private buildSummary;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
export { type EmbeddingProvider as E, SemanticStateEngine as S, type SemanticStateEngineConfig as a, type Snapshot as b };
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __export = (target, all) => {
|
|
7
|
+
for (var name in all)
|
|
8
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
+
};
|
|
10
|
+
var __copyProps = (to, from, except, desc) => {
|
|
11
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
12
|
+
for (let key of __getOwnPropNames(from))
|
|
13
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
14
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
15
|
+
}
|
|
16
|
+
return to;
|
|
17
|
+
};
|
|
18
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
19
|
+
|
|
20
|
+
// src/worker/embedding.worker.ts
|
|
21
|
+
var embedding_worker_exports = {};
|
|
22
|
+
__export(embedding_worker_exports, {
|
|
23
|
+
PipelineSingleton: () => PipelineSingleton,
|
|
24
|
+
getModelName: () => getModelName,
|
|
25
|
+
handleInitMessage: () => handleInitMessage,
|
|
26
|
+
handleMessage: () => handleMessage
|
|
27
|
+
});
|
|
28
|
+
module.exports = __toCommonJS(embedding_worker_exports);
|
|
29
|
+
var import_transformers = require("@huggingface/transformers");
|
|
30
|
+
import_transformers.env.allowLocalModels = false;
|
|
31
|
+
var PipelineSingleton = class {
|
|
32
|
+
static async getInstance(modelName, progressCallback) {
|
|
33
|
+
if (this.instance === null || this.modelName !== modelName) {
|
|
34
|
+
this.modelName = modelName;
|
|
35
|
+
this.instance = (0, import_transformers.pipeline)("feature-extraction", modelName, {
|
|
36
|
+
dtype: "q8",
|
|
37
|
+
progress_callback: progressCallback
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
return this.instance;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
PipelineSingleton.instance = null;
|
|
44
|
+
PipelineSingleton.modelName = null;
|
|
45
|
+
var currentModelName = "Xenova/all-MiniLM-L6-v2";
|
|
46
|
+
function getModelName() {
|
|
47
|
+
return currentModelName;
|
|
48
|
+
}
|
|
49
|
+
async function handleInitMessage(event) {
|
|
50
|
+
currentModelName = event.data.modelName;
|
|
51
|
+
self.postMessage({ type: "STATUS", status: "loading" });
|
|
52
|
+
try {
|
|
53
|
+
await PipelineSingleton.getInstance(currentModelName, (data) => {
|
|
54
|
+
if (data.status === "progress" && data.file && data.progress !== void 0) {
|
|
55
|
+
self.postMessage({ type: "PROGRESS", file: data.file, progress: data.progress });
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
self.postMessage({ type: "STATUS", status: "ready" });
|
|
59
|
+
} catch (err) {
|
|
60
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
61
|
+
self.postMessage({ type: "STATUS", status: "failed", error });
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
async function handleMessage(event) {
|
|
65
|
+
const { id, text } = event.data;
|
|
66
|
+
try {
|
|
67
|
+
const extractor = await PipelineSingleton.getInstance(currentModelName);
|
|
68
|
+
const output = await extractor(text, { pooling: "mean", normalize: true });
|
|
69
|
+
const response = { type: "EMBED_RES", id, vector: output.data };
|
|
70
|
+
self.postMessage(response);
|
|
71
|
+
} catch (err) {
|
|
72
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
73
|
+
const response = { type: "EMBED_RES", id, vector: null, error };
|
|
74
|
+
self.postMessage(response);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
self.addEventListener("message", (event) => {
|
|
78
|
+
const msg = event.data;
|
|
79
|
+
if (msg.type === "INIT") {
|
|
80
|
+
handleInitMessage(event);
|
|
81
|
+
} else {
|
|
82
|
+
handleMessage(event);
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
86
|
+
0 && (module.exports = {
|
|
87
|
+
PipelineSingleton,
|
|
88
|
+
getModelName,
|
|
89
|
+
handleInitMessage,
|
|
90
|
+
handleMessage
|
|
91
|
+
});
|
|
92
|
+
//# sourceMappingURL=embedding.worker.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/worker/embedding.worker.ts"],"sourcesContent":["import { pipeline, env } from \"@huggingface/transformers\";\nimport type { EmbeddingRequest, EmbeddingResponse, WorkerIncomingMessage, WorkerInitMessage } from \"./types.js\";\n\n// Disable local models; always load from the HuggingFace Hub.\nenv.allowLocalModels = false;\n\nexport class PipelineSingleton {\n static instance: Promise<any> | null = null;\n static modelName: string | null = null;\n\n static async getInstance(modelName: string, progressCallback?: (data: any) => void) {\n if (this.instance === null || this.modelName !== modelName) {\n this.modelName = modelName;\n this.instance = pipeline('feature-extraction', modelName, {\n dtype: 'q8',\n progress_callback: progressCallback,\n });\n }\n return this.instance;\n }\n}\n\nlet currentModelName: string = \"Xenova/all-MiniLM-L6-v2\";\n\n/** Returns the model name currently configured in the worker. */\nexport function getModelName(): string {\n return currentModelName;\n}\n\n/**\n * Handles an INIT message: saves the model name and starts loading the pipeline,\n * broadcasting STATUS events so the main thread can track the model lifecycle.\n */\nexport async function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void> {\n currentModelName = event.data.modelName;\n self.postMessage({ type: 'STATUS', status: 'loading' });\n try {\n await PipelineSingleton.getInstance(currentModelName, (data: any) => {\n if (data.status === 'progress' && data.file && data.progress !== undefined) {\n self.postMessage({ type: 'PROGRESS', file: data.file, progress: data.progress });\n }\n });\n self.postMessage({ type: 'STATUS', status: 'ready' });\n } catch (err) {\n const error = err instanceof Error ? err.message : String(err);\n self.postMessage({ type: 'STATUS', status: 'failed', error });\n }\n}\n\n/**\n * Handles an EMBED message: runs the text through the pipeline and posts back\n * the resulting normalized 1D Float32Array.\n */\nexport async function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void> {\n const { id, text } = event.data;\n try {\n const extractor = await PipelineSingleton.getInstance(currentModelName);\n const output = await extractor(text, { pooling: 'mean', normalize: true });\n const response: EmbeddingResponse = { type: \"EMBED_RES\", id, vector: output.data as Float32Array };\n self.postMessage(response);\n } catch (err) {\n const error = err instanceof Error ? err.message : String(err);\n const response: EmbeddingResponse = { type: \"EMBED_RES\", id, vector: null, error };\n self.postMessage(response);\n }\n}\n\nself.addEventListener(\"message\", (event: Event) => {\n const msg = (event as MessageEvent<WorkerIncomingMessage>).data;\n if (msg.type === \"INIT\") {\n handleInitMessage(event as MessageEvent<WorkerInitMessage>);\n } else {\n handleMessage(event as MessageEvent<EmbeddingRequest>);\n }\n});\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,0BAA8B;AAI9B,wBAAI,mBAAmB;AAEhB,IAAM,oBAAN,MAAwB;AAAA,EAI7B,aAAa,YAAY,WAAmB,kBAAwC;AAClF,QAAI,KAAK,aAAa,QAAQ,KAAK,cAAc,WAAW;AAC1D,WAAK,YAAY;AACjB,WAAK,eAAW,8BAAS,sBAAsB,WAAW;AAAA,QACxD,OAAO;AAAA,QACP,mBAAmB;AAAA,MACrB,CAAC;AAAA,IACH;AACA,WAAO,KAAK;AAAA,EACd;AACF;AAda,kBACJ,WAAgC;AAD5B,kBAEJ,YAA2B;AAcpC,IAAI,mBAA2B;AAGxB,SAAS,eAAuB;AACrC,SAAO;AACT;AAMA,eAAsB,kBAAkB,OAAuD;AAC7F,qBAAmB,MAAM,KAAK;AAC9B,OAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,UAAU,CAAC;AACtD,MAAI;AACF,UAAM,kBAAkB,YAAY,kBAAkB,CAAC,SAAc;AACnE,UAAI,KAAK,WAAW,cAAc,KAAK,QAAQ,KAAK,aAAa,QAAW;AAC1E,aAAK,YAAY,EAAE,MAAM,YAAY,MAAM,KAAK,MAAM,UAAU,KAAK,SAAS,CAAC;AAAA,MACjF;AAAA,IACF,CAAC;AACD,SAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,QAAQ,CAAC;AAAA,EACtD,SAAS,KAAK;AACZ,UAAM,QAAQ,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC7D,SAAK,YAAY,EAAE,MAAM,UAAU,QAAQ,UAAU,MAAM,CAAC;AAAA,EAC9D;AACF;AAMA,eAAsB,cAAc,OAAsD;AACxF,QAAM,EAAE,IAAI,KAAK,IAAI,MAAM;AAC3B,MAAI;AACF,UAAM,YAAY,MAAM,kBAAkB,YAAY,gBAAgB;AACtE,UAAM,SAAS,MAAM,UAAU,MAAM,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AACzE,UAAM,WAA8B,EAAE,MAAM,aAAa,IAAI,QAAQ,OAAO,KAAqB;AACjG,SAAK,YAAY,QAAQ;AAAA,EAC3B,SAAS,KAAK;AACZ,UAAM,QAAQ,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC7D,UAAM,WAA8B,EAAE,MAAM,aAAa,IAAI,QAAQ,MAAM,MAAM;AACjF,SAAK,YAAY,QAAQ;AAAA,EAC3B;AACF;AAEA,KAAK,iBAAiB,WAAW,CAAC,UAAiB;AACjD,QAAM,MAAO,MAA8C;AAC3D,MAAI,IAAI,SAAS,QAAQ;AACvB,sBAAkB,KAAwC;AAAA,EAC5D,OAAO;AACL,kBAAc,KAAuC;AAAA,EACvD;AACF,CAAC;","names":[]}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message contract between the Main Thread and the Worker Thread.
|
|
3
|
+
* Every message carries an `id` so responses can be mapped back to their originating Promises.
|
|
4
|
+
*/
|
|
5
|
+
/** An initialization message sent from the main thread to configure the worker. */
|
|
6
|
+
interface WorkerInitMessage {
|
|
7
|
+
type: "INIT";
|
|
8
|
+
/** The name of the embedding model the worker should load. */
|
|
9
|
+
modelName: string;
|
|
10
|
+
}
|
|
11
|
+
/** A request sent from the main thread to the embedding worker. */
|
|
12
|
+
interface EmbeddingRequest {
|
|
13
|
+
type: "EMBED";
|
|
14
|
+
/** UUID that uniquely identifies this request. */
|
|
15
|
+
id: string;
|
|
16
|
+
/** The raw text to embed. */
|
|
17
|
+
text: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
declare class PipelineSingleton {
|
|
21
|
+
static instance: Promise<any> | null;
|
|
22
|
+
static modelName: string | null;
|
|
23
|
+
static getInstance(modelName: string, progressCallback?: (data: any) => void): Promise<any>;
|
|
24
|
+
}
|
|
25
|
+
/** Returns the model name currently configured in the worker. */
|
|
26
|
+
declare function getModelName(): string;
|
|
27
|
+
/**
|
|
28
|
+
* Handles an INIT message: saves the model name and starts loading the pipeline,
|
|
29
|
+
* broadcasting STATUS events so the main thread can track the model lifecycle.
|
|
30
|
+
*/
|
|
31
|
+
declare function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void>;
|
|
32
|
+
/**
|
|
33
|
+
* Handles an EMBED message: runs the text through the pipeline and posts back
|
|
34
|
+
* the resulting normalized 1D Float32Array.
|
|
35
|
+
*/
|
|
36
|
+
declare function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void>;
|
|
37
|
+
|
|
38
|
+
export { PipelineSingleton, getModelName, handleInitMessage, handleMessage };
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Message contract between the Main Thread and the Worker Thread.
|
|
3
|
+
* Every message carries an `id` so responses can be mapped back to their originating Promises.
|
|
4
|
+
*/
|
|
5
|
+
/** An initialization message sent from the main thread to configure the worker. */
|
|
6
|
+
interface WorkerInitMessage {
|
|
7
|
+
type: "INIT";
|
|
8
|
+
/** The name of the embedding model the worker should load. */
|
|
9
|
+
modelName: string;
|
|
10
|
+
}
|
|
11
|
+
/** A request sent from the main thread to the embedding worker. */
|
|
12
|
+
interface EmbeddingRequest {
|
|
13
|
+
type: "EMBED";
|
|
14
|
+
/** UUID that uniquely identifies this request. */
|
|
15
|
+
id: string;
|
|
16
|
+
/** The raw text to embed. */
|
|
17
|
+
text: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
declare class PipelineSingleton {
|
|
21
|
+
static instance: Promise<any> | null;
|
|
22
|
+
static modelName: string | null;
|
|
23
|
+
static getInstance(modelName: string, progressCallback?: (data: any) => void): Promise<any>;
|
|
24
|
+
}
|
|
25
|
+
/** Returns the model name currently configured in the worker. */
|
|
26
|
+
declare function getModelName(): string;
|
|
27
|
+
/**
|
|
28
|
+
* Handles an INIT message: saves the model name and starts loading the pipeline,
|
|
29
|
+
* broadcasting STATUS events so the main thread can track the model lifecycle.
|
|
30
|
+
*/
|
|
31
|
+
declare function handleInitMessage(event: MessageEvent<WorkerInitMessage>): Promise<void>;
|
|
32
|
+
/**
|
|
33
|
+
* Handles an EMBED message: runs the text through the pipeline and posts back
|
|
34
|
+
* the resulting normalized 1D Float32Array.
|
|
35
|
+
*/
|
|
36
|
+
declare function handleMessage(event: MessageEvent<EmbeddingRequest>): Promise<void>;
|
|
37
|
+
|
|
38
|
+
export { PipelineSingleton, getModelName, handleInitMessage, handleMessage };
|