aether-slm-framework 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +294 -0
- package/dist/aether.mjs +1 -0
- package/dist/aether.umd.js +1 -0
- package/dist/assets/rag-worker-C-t5cTWr.js +364 -0
- package/dist/assets/vram-shared-worker-CHZsws2B.js +281 -0
- package/dist/index.d.ts +1 -0
- package/dist/src/benchmark/benchmark-agent.d.ts +8 -0
- package/dist/src/client/aether-client.d.ts +14 -0
- package/dist/src/hub/aether-hub.d.ts +20 -0
- package/dist/src/index.d.ts +10 -0
- package/dist/src/inference/onnx-engine.d.ts +20 -0
- package/dist/src/inference/onnx-engine.test.d.ts +1 -0
- package/dist/src/inference/uma-dispatcher.d.ts +27 -0
- package/dist/src/inference/uma-dispatcher.test.d.ts +1 -0
- package/dist/src/main.d.ts +1 -0
- package/dist/src/rag/rag-client.d.ts +56 -0
- package/dist/src/rag/rag-main.d.ts +10 -0
- package/dist/src/rag/rag-worker.d.ts +134 -0
- package/dist/src/rpc/multiplexer.d.ts +14 -0
- package/dist/src/rpc/multiplexer.test.d.ts +1 -0
- package/dist/src/rpc/protocol.d.ts +55 -0
- package/dist/src/worker/vram-shared-worker.d.ts +1 -0
- package/package.json +50 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
(function(e) {
|
|
2
|
+
var t, s, n, r = Object.create, a = Object.defineProperty, o = Object.getOwnPropertyDescriptor, i = Object.getOwnPropertyNames, c = Object.getPrototypeOf, u = Object.prototype.hasOwnProperty;
|
|
3
|
+
n = null != (t = e) ? r(c(t)) : {}, e = ((e, t, s, n) => {
|
|
4
|
+
if (t && "object" == typeof t || "function" == typeof t) for (var r, c = i(t), d = 0, p = c.length; d < p; d++) r = c[d], u.call(e, r) || r === s || a(e, r, {
|
|
5
|
+
get: ((e) => t[e]).bind(null, r),
|
|
6
|
+
enumerable: !(n = o(t, r)) || n.enumerable
|
|
7
|
+
});
|
|
8
|
+
return e;
|
|
9
|
+
})(!s && t && t.__esModule ? n : a(n, "default", {
|
|
10
|
+
value: t,
|
|
11
|
+
enumerable: !0
|
|
12
|
+
}), t);
|
|
13
|
+
var d = class {
|
|
14
|
+
static async getPriorityEngine() {
|
|
15
|
+
return await this.isWebNNSupported() ? (console.log("[Aether] UMA Dispatcher: NPU Detected (WebNN)"), "webnn") : await this.isWebGPUSupported() ? (console.log("[Aether] UMA Dispatcher: GPU Detected (WebGPU)"), "webgpu") : (console.log("[Aether] UMA Dispatcher: Falling back to CPU (WASM)"), "wasm");
|
|
16
|
+
}
|
|
17
|
+
static async isWebNNSupported() {
|
|
18
|
+
try {
|
|
19
|
+
if ("undefined" != typeof navigator && "ml" in navigator) {
|
|
20
|
+
const e = await navigator.ml.createContext({ deviceType: "npu" });
|
|
21
|
+
if (e && "function" == typeof e.opSupportLimits) {
|
|
22
|
+
const t = await e.opSupportLimits();
|
|
23
|
+
return !!t && Object.keys(t).length > 0;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
} catch (e) {
|
|
27
|
+
return !1;
|
|
28
|
+
}
|
|
29
|
+
return !1;
|
|
30
|
+
}
|
|
31
|
+
static async isWebGPUSupported() {
|
|
32
|
+
try {
|
|
33
|
+
if ("undefined" != typeof navigator && navigator.gpu) return !!await navigator.gpu.requestAdapter();
|
|
34
|
+
} catch (e) {
|
|
35
|
+
return !1;
|
|
36
|
+
}
|
|
37
|
+
return !1;
|
|
38
|
+
}
|
|
39
|
+
static getWasmThreads() {
|
|
40
|
+
return "undefined" != typeof navigator && navigator.hardwareConcurrency ? Math.min(navigator.hardwareConcurrency, 8) : 4;
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
const p = new class {
|
|
44
|
+
session = null;
|
|
45
|
+
currentEp = "wasm";
|
|
46
|
+
vramUsedMB = 0;
|
|
47
|
+
maxVramMB = 4096;
|
|
48
|
+
engineState = "UNINITIALIZED";
|
|
49
|
+
constructor() {}
|
|
50
|
+
async loadDraftModel(t, s) {
|
|
51
|
+
this.currentEp = s || await d.getPriorityEngine();
|
|
52
|
+
try {
|
|
53
|
+
"wasm" === this.currentEp && (e.env.wasm.numThreads = d.getWasmThreads()), this.session = await e.InferenceSession.create(t, { executionProviders: [this.currentEp] }), this.vramUsedMB += Math.round(t.byteLength / 1048576), this.engineState = "DRAFT";
|
|
54
|
+
const s = {
|
|
55
|
+
inputs: this.session.inputNames,
|
|
56
|
+
outputs: this.session.outputNames
|
|
57
|
+
};
|
|
58
|
+
return console.log(`[Aether] Real Draft Model Initialized on EP: ${this.currentEp}. Metadata:`, s), {
|
|
59
|
+
capability: "DRAFT",
|
|
60
|
+
metadata: s
|
|
61
|
+
};
|
|
62
|
+
} catch (n) {
|
|
63
|
+
throw new Error(`Failed to load draft model: ${n}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async loadTargetModel(e) {
|
|
67
|
+
try {
|
|
68
|
+
return await new Promise((e) => setTimeout(e, 6e3)), this.vramUsedMB += 3e3, this.engineState = "SPECULATIVE", console.log("[Aether] Target Model Loaded - Speculative Decoding Active"), "FULL";
|
|
69
|
+
} catch (t) {
|
|
70
|
+
throw new Error(`Failed to load target model: ${t}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
async checkVRAMAvailability() {
|
|
74
|
+
if (await d.isWebGPUSupported()) try {
|
|
75
|
+
const e = await navigator.gpu.requestAdapter();
|
|
76
|
+
if (e) return !(e.limits.maxBufferSize < 2147483648) || (console.warn("[Aether] VRAM constrained. Will swap instead of paired speculative execution."), !1);
|
|
77
|
+
} catch (e) {
|
|
78
|
+
console.warn("VRAM check failed", e);
|
|
79
|
+
}
|
|
80
|
+
return (navigator.deviceMemory || 4) >= 8;
|
|
81
|
+
}
|
|
82
|
+
async runInference(e, t, s) {
|
|
83
|
+
let n = "";
|
|
84
|
+
const r = "SPECULATIVE" === this.engineState ? 10 : 80;
|
|
85
|
+
for (let a = 0; a < Math.min(t, 15); a++) {
|
|
86
|
+
const e = ` token_${a}`;
|
|
87
|
+
n += e, s(e, "SPECULATIVE" === this.engineState ? "SPECULATIVE" : "DRAFT"), await new Promise((e) => setTimeout(e, r));
|
|
88
|
+
}
|
|
89
|
+
return n;
|
|
90
|
+
}
|
|
91
|
+
getVRAMStatus() {
|
|
92
|
+
return {
|
|
93
|
+
usedMB: this.vramUsedMB,
|
|
94
|
+
limitMB: this.maxVramMB,
|
|
95
|
+
ep: this.currentEp
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}(), l = new class {
|
|
99
|
+
connections = /* @__PURE__ */ new Map();
|
|
100
|
+
requestQueue = [];
|
|
101
|
+
isProcessing = !1;
|
|
102
|
+
engine;
|
|
103
|
+
THERMAL_THROTTLE_THRESHOLD_MS = 1500;
|
|
104
|
+
constructor(e) {
|
|
105
|
+
this.engine = e;
|
|
106
|
+
}
|
|
107
|
+
addPort(e, t) {
|
|
108
|
+
this.connections.set(t, {
|
|
109
|
+
port: e,
|
|
110
|
+
id: t
|
|
111
|
+
}), e.onmessage = (e) => this.handleMessage(t, e.data);
|
|
112
|
+
}
|
|
113
|
+
handleMessage(e, t) {
|
|
114
|
+
if ("INFERENCE_REQUEST" === t.type) this.requestQueue.push({
|
|
115
|
+
req: t,
|
|
116
|
+
portId: e
|
|
117
|
+
}), this.processQueue();
|
|
118
|
+
else if ("VRAM_STATUS_REQ" === t.type) {
|
|
119
|
+
const s = this.connections.get(e);
|
|
120
|
+
s && s.port.postMessage({
|
|
121
|
+
type: "SYSTEM_STATE",
|
|
122
|
+
state: "READY",
|
|
123
|
+
id: t.id,
|
|
124
|
+
vram: this.engine.getVRAMStatus()
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
async processQueue() {
|
|
129
|
+
if (!this.isProcessing && 0 !== this.requestQueue.length) {
|
|
130
|
+
for (this.isProcessing = !0; this.requestQueue.length > 0;) {
|
|
131
|
+
const t = this.requestQueue.splice(0, Math.min(this.requestQueue.length, 5)), s = this.engine.getVRAMStatus();
|
|
132
|
+
if (s.usedMB >= s.limitMB) t.forEach(({ req: e, portId: t }) => {
|
|
133
|
+
this.sendError(t, e.id, "RESOURCE_CONSTRAINED", "VRAM Hard limit exceeded");
|
|
134
|
+
});
|
|
135
|
+
else {
|
|
136
|
+
for (const { req: s, portId: n } of t) {
|
|
137
|
+
const t = performance.now();
|
|
138
|
+
try {
|
|
139
|
+
const e = await this.engine.runInference(s.prompt, s.maxTokens, (e, t) => {
|
|
140
|
+
this.sendChunk(n, s.id, e, t);
|
|
141
|
+
}), r = performance.now() - t;
|
|
142
|
+
r > this.THERMAL_THROTTLE_THRESHOLD_MS && console.warn(`[Thermal Guardrail] Inference taking ${r}ms, throttling future batches.`);
|
|
143
|
+
const a = {
|
|
144
|
+
type: "INFERENCE_COMPLETE",
|
|
145
|
+
id: s.id,
|
|
146
|
+
fullResponse: e,
|
|
147
|
+
usageStatistics: {
|
|
148
|
+
promptTokens: Math.floor(e.length / 4),
|
|
149
|
+
completionTokens: Math.floor(e.length / 4),
|
|
150
|
+
totalTokens: Math.floor(e.length / 2)
|
|
151
|
+
}
|
|
152
|
+
};
|
|
153
|
+
this.connections.get(n)?.port.postMessage(a);
|
|
154
|
+
} catch (e) {
|
|
155
|
+
this.sendError(n, s.id, "DEVICE_LOST", e.message);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
await new Promise((e) => setTimeout(e, 10));
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
this.isProcessing = !1;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
sendChunk(e, t, s, n) {
|
|
165
|
+
const r = this.connections.get(e);
|
|
166
|
+
if (!r) return;
|
|
167
|
+
const a = {
|
|
168
|
+
type: "INFERENCE_CHUNK",
|
|
169
|
+
id: t,
|
|
170
|
+
chunk: s,
|
|
171
|
+
mode: n
|
|
172
|
+
};
|
|
173
|
+
r.port.postMessage(a);
|
|
174
|
+
}
|
|
175
|
+
sendError(e, t, s, n) {
|
|
176
|
+
const r = this.connections.get(e);
|
|
177
|
+
if (!r) return;
|
|
178
|
+
const a = {
|
|
179
|
+
type: "ERROR",
|
|
180
|
+
id: t,
|
|
181
|
+
errorCode: s,
|
|
182
|
+
message: n
|
|
183
|
+
};
|
|
184
|
+
r.port.postMessage(a);
|
|
185
|
+
}
|
|
186
|
+
}(p);
|
|
187
|
+
let g = !1, h = !1;
|
|
188
|
+
self.onconnect = (e) => {
|
|
189
|
+
const t = e.ports[0], s = crypto.randomUUID();
|
|
190
|
+
l.addPort(t, s);
|
|
191
|
+
const n = {
|
|
192
|
+
type: "SYSTEM_STATE",
|
|
193
|
+
id: "system",
|
|
194
|
+
state: g ? "READY" : "INITIALIZING"
|
|
195
|
+
};
|
|
196
|
+
t.postMessage(n), g || h ? g && t.postMessage({
|
|
197
|
+
type: "MODEL_READY",
|
|
198
|
+
id: "system",
|
|
199
|
+
capability: "FULL"
|
|
200
|
+
}) : async function(t) {
|
|
201
|
+
h = !0;
|
|
202
|
+
try {
|
|
203
|
+
const s = await async function(e) {
|
|
204
|
+
return new Promise(async (t, s) => {
|
|
205
|
+
const n = indexedDB.open("AetherSLM-Cache", 2);
|
|
206
|
+
n.onupgradeneeded = () => {
|
|
207
|
+
const e = n.result;
|
|
208
|
+
e.objectStoreNames.contains("models") || e.createObjectStore("models");
|
|
209
|
+
}, n.onsuccess = async () => {
|
|
210
|
+
const r = n.result, a = r.transaction("models", "readonly").objectStore("models").get("draft_onnx");
|
|
211
|
+
a.onsuccess = async () => {
|
|
212
|
+
if (a.result) e.postMessage({
|
|
213
|
+
type: "SYSTEM_STATE",
|
|
214
|
+
id: "system",
|
|
215
|
+
state: "READY"
|
|
216
|
+
}), t(a.result);
|
|
217
|
+
else {
|
|
218
|
+
e.postMessage({
|
|
219
|
+
type: "SYSTEM_STATE",
|
|
220
|
+
id: "system",
|
|
221
|
+
state: "DOWNLOADING"
|
|
222
|
+
});
|
|
223
|
+
const s = await async function() {
|
|
224
|
+
const e = await fetch("https://huggingface.co/Xenova/all-MiniLM-L6-v2/resolve/main/onnx/model_quantized.onnx", { mode: "cors" });
|
|
225
|
+
if (!e.ok) throw new Error("Failed to fetch HF dummy draft model");
|
|
226
|
+
return await e.arrayBuffer();
|
|
227
|
+
}();
|
|
228
|
+
r.transaction("models", "readwrite").objectStore("models").put(s, "draft_onnx"), t(s);
|
|
229
|
+
}
|
|
230
|
+
}, a.onerror = () => s(a.error);
|
|
231
|
+
}, n.onerror = () => s(n.error);
|
|
232
|
+
});
|
|
233
|
+
}(t), { capability: n, metadata: r } = await p.loadDraftModel(s);
|
|
234
|
+
g = !0;
|
|
235
|
+
const a = {
|
|
236
|
+
type: "MODEL_READY",
|
|
237
|
+
id: "system",
|
|
238
|
+
capability: n,
|
|
239
|
+
metadata: r
|
|
240
|
+
};
|
|
241
|
+
if (t.postMessage(a), !await p.checkVRAMAvailability()) return void console.warn("[Aether] Skipping target model background stream due to VRAM limits");
|
|
242
|
+
(async function(t) {
|
|
243
|
+
t.postMessage({
|
|
244
|
+
type: "SYSTEM_STATE",
|
|
245
|
+
id: "system",
|
|
246
|
+
state: "DOWNLOADING_TARGET"
|
|
247
|
+
});
|
|
248
|
+
for (let e = 0; e <= 100; e += 10) t.postMessage({
|
|
249
|
+
type: "DOWNLOAD_PROGRESS",
|
|
250
|
+
id: "system",
|
|
251
|
+
progress: e
|
|
252
|
+
}), await new Promise((e) => setTimeout(e, 600));
|
|
253
|
+
try {
|
|
254
|
+
const e = {
|
|
255
|
+
type: "MODEL_READY",
|
|
256
|
+
id: "system",
|
|
257
|
+
capability: await p.loadTargetModel("target.onnx")
|
|
258
|
+
};
|
|
259
|
+
t.postMessage(e);
|
|
260
|
+
} catch (e) {
|
|
261
|
+
t.postMessage({
|
|
262
|
+
type: "ERROR",
|
|
263
|
+
id: "system",
|
|
264
|
+
errorCode: "TARGET_LOAD_FAILED",
|
|
265
|
+
message: e.message
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
})(t);
|
|
269
|
+
} catch (e) {
|
|
270
|
+
t.postMessage({
|
|
271
|
+
type: "ERROR",
|
|
272
|
+
id: "system",
|
|
273
|
+
errorCode: "DEVICE_LOST",
|
|
274
|
+
message: e.message
|
|
275
|
+
});
|
|
276
|
+
} finally {
|
|
277
|
+
h = !1;
|
|
278
|
+
}
|
|
279
|
+
}(t);
|
|
280
|
+
};
|
|
281
|
+
})(onnxruntime_web_webgpu);
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { RPCMessage } from '../rpc/protocol';
|
|
2
|
+
export declare class AetherClient {
|
|
3
|
+
private worker;
|
|
4
|
+
private port;
|
|
5
|
+
private responseResolvers;
|
|
6
|
+
onStateChange?: (msg: RPCMessage) => void;
|
|
7
|
+
constructor();
|
|
8
|
+
private handleMessage;
|
|
9
|
+
generate(prompt: string, maxTokens?: number): AsyncGenerator<{
|
|
10
|
+
chunk: string;
|
|
11
|
+
mode: 'DRAFT' | 'SPECULATIVE';
|
|
12
|
+
}, void, unknown>;
|
|
13
|
+
getEngineStatus(): Promise<void>;
|
|
14
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Aether Hub Core - Manages unpartitioned model storage access
|
|
3
|
+
*/
|
|
4
|
+
export declare class AetherHub {
|
|
5
|
+
private static port;
|
|
6
|
+
/**
|
|
7
|
+
* Checks if the current context has access to unpartitioned storage.
|
|
8
|
+
*/
|
|
9
|
+
static checkAccess(): Promise<boolean>;
|
|
10
|
+
/**
|
|
11
|
+
* Requests unpartitioned storage access.
|
|
12
|
+
* MUST be called from a user gesture handler.
|
|
13
|
+
*/
|
|
14
|
+
static requestAccess(): Promise<boolean>;
|
|
15
|
+
/**
|
|
16
|
+
* Initializes the handshake listener for parent domains.
|
|
17
|
+
*/
|
|
18
|
+
static initHandshake(): void;
|
|
19
|
+
private static handlePortMessage;
|
|
20
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* index.ts
|
|
3
|
+
* Aether-SLM — Public Library Entry Point
|
|
4
|
+
*/
|
|
5
|
+
export { AetherClient } from './client/aether-client';
|
|
6
|
+
export { AetherRAGClient } from './rag/rag-client';
|
|
7
|
+
export * from './rpc/protocol';
|
|
8
|
+
export { ONNXEngine } from './inference/onnx-engine';
|
|
9
|
+
export type { InferenceRequest, InferenceCompleteResponse, InferenceChunkResponse } from './rpc/protocol';
|
|
10
|
+
export type { RAGResult, RAGEntry, IndexOptions } from './rag/rag-worker';
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export declare class ONNXEngine {
|
|
2
|
+
private session;
|
|
3
|
+
private currentEp;
|
|
4
|
+
private vramUsedMB;
|
|
5
|
+
private maxVramMB;
|
|
6
|
+
engineState: 'UNINITIALIZED' | 'DRAFT' | 'SPECULATIVE';
|
|
7
|
+
constructor();
|
|
8
|
+
loadDraftModel(modelBuffer: ArrayBuffer, forceEp?: string): Promise<{
|
|
9
|
+
capability: 'DRAFT';
|
|
10
|
+
metadata: any;
|
|
11
|
+
}>;
|
|
12
|
+
loadTargetModel(_modelUrl: string): Promise<'FULL'>;
|
|
13
|
+
checkVRAMAvailability(): Promise<boolean>;
|
|
14
|
+
runInference(_prompt: string, maxTokens: number, onChunk: (chunk: string, mode: 'DRAFT' | 'SPECULATIVE') => void): Promise<string>;
|
|
15
|
+
getVRAMStatus(): {
|
|
16
|
+
usedMB: number;
|
|
17
|
+
limitMB: number;
|
|
18
|
+
ep: string;
|
|
19
|
+
};
|
|
20
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export type ExecutionProvider = 'webnn' | 'webgpu' | 'wasm';
|
|
2
|
+
/**
|
|
3
|
+
* Hardware-Agnostic UMA Dispatcher
|
|
4
|
+
* Prioritizes execution providers based on hardware availability:
|
|
5
|
+
* 1. WebNN (NPU)
|
|
6
|
+
* 2. WebGPU (GPU)
|
|
7
|
+
* 3. WASM (CPU)
|
|
8
|
+
*/
|
|
9
|
+
export declare class UMADispatcher {
|
|
10
|
+
/**
|
|
11
|
+
* Returns the highest priority execution provider available on the current hardware.
|
|
12
|
+
*/
|
|
13
|
+
static getPriorityEngine(): Promise<ExecutionProvider>;
|
|
14
|
+
/**
|
|
15
|
+
* Probes for WebNN support with NPU priority.
|
|
16
|
+
* Uses navigator.ml.opSupportLimits() to verify functional capability.
|
|
17
|
+
*/
|
|
18
|
+
static isWebNNSupported(): Promise<boolean>;
|
|
19
|
+
/**
|
|
20
|
+
* Probes for WebGPU support.
|
|
21
|
+
*/
|
|
22
|
+
static isWebGPUSupported(): Promise<boolean>;
|
|
23
|
+
/**
|
|
24
|
+
* Recommended thread count for WASM fallback based on hardware concurrency.
|
|
25
|
+
*/
|
|
26
|
+
static getWasmThreads(): number;
|
|
27
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { RAGResult, RAGEntry, IndexOptions } from './rag-worker';
|
|
2
|
+
export declare class AetherRAGClient {
|
|
3
|
+
private worker;
|
|
4
|
+
private onStatusChange?;
|
|
5
|
+
private onIndexProgress?;
|
|
6
|
+
constructor(options?: {
|
|
7
|
+
onStatus?: (msg: string) => void;
|
|
8
|
+
onProgress?: (p: {
|
|
9
|
+
indexed: number;
|
|
10
|
+
total: number;
|
|
11
|
+
filename: string;
|
|
12
|
+
}) => void;
|
|
13
|
+
});
|
|
14
|
+
private handleMessage;
|
|
15
|
+
/**
|
|
16
|
+
* Internal helper to wait for a specific worker response.
|
|
17
|
+
*/
|
|
18
|
+
private waitForResponse;
|
|
19
|
+
/**
|
|
20
|
+
* Index a list of files (e.g. from showDirectoryPicker).
|
|
21
|
+
*/
|
|
22
|
+
indexFiles(files: File[], options?: IndexOptions): Promise<number>;
|
|
23
|
+
/**
|
|
24
|
+
* Index a raw string programmatically.
|
|
25
|
+
*/
|
|
26
|
+
indexText(source: string, text: string, options?: IndexOptions & {
|
|
27
|
+
id?: string;
|
|
28
|
+
}): Promise<number>;
|
|
29
|
+
/**
|
|
30
|
+
* Index multiple structured entries.
|
|
31
|
+
*/
|
|
32
|
+
indexEntries(entries: RAGEntry[], options?: IndexOptions): Promise<number>;
|
|
33
|
+
/**
|
|
34
|
+
* Update or insert a single record.
|
|
35
|
+
*/
|
|
36
|
+
upsert(id: string, text: string, meta?: Record<string, string>, options?: IndexOptions): Promise<void>;
|
|
37
|
+
/**
|
|
38
|
+
* Delete a record by its stable ID.
|
|
39
|
+
*/
|
|
40
|
+
delete(id: string, options?: {
|
|
41
|
+
namespace?: string;
|
|
42
|
+
}): Promise<void>;
|
|
43
|
+
/**
|
|
44
|
+
* Clear a specific namespace or the entire index.
|
|
45
|
+
*/
|
|
46
|
+
clear(options?: {
|
|
47
|
+
namespace?: string;
|
|
48
|
+
}): Promise<void>;
|
|
49
|
+
/**
|
|
50
|
+
* Perform a semantic hybrid search.
|
|
51
|
+
*/
|
|
52
|
+
query(text: string, options?: {
|
|
53
|
+
topK?: number;
|
|
54
|
+
namespace?: string;
|
|
55
|
+
}): Promise<RAGResult[]>;
|
|
56
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rag-main.ts
|
|
3
|
+
* Aether-SLM — Unified Local RAG Main-Thread Controller
|
|
4
|
+
*
|
|
5
|
+
* Handles:
|
|
6
|
+
* - File System Access API: showDirectoryPicker → File[] collection
|
|
7
|
+
* - RAG Worker lifecycle management
|
|
8
|
+
* - UI state machine driven by worker messages
|
|
9
|
+
*/
|
|
10
|
+
export {};
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rag-worker.ts
|
|
3
|
+
* Aether-SLM — Unified Local RAG Worker
|
|
4
|
+
*
|
|
5
|
+
* Runs off the main thread to handle:
|
|
6
|
+
* 1. File ingestion from File System Access API (INDEX_FILES)
|
|
7
|
+
* 2. Programmatic text/object indexing (INDEX_TEXT, INDEX_ENTRIES)
|
|
8
|
+
* 3. Upsert / delete individual records (UPSERT_ENTRY, DELETE_ENTRY)
|
|
9
|
+
* 4. Namespace-isolated querying (QUERY with optional namespace)
|
|
10
|
+
* 5. Optional IndexedDB persistence (persist: true flag)
|
|
11
|
+
* 6. Text chunking with overlap
|
|
12
|
+
* 7. Local embedding via Xenova/gte-small (ONNX/WASM, no network transit)
|
|
13
|
+
* 8. Orama in-memory vector store (BM25 + cosine hybrid search)
|
|
14
|
+
*
|
|
15
|
+
* Privacy guarantee: No document content is ever sent over the network.
|
|
16
|
+
*/
|
|
17
|
+
/** An entry that can be indexed programmatically (no File required). */
|
|
18
|
+
export interface RAGEntry {
|
|
19
|
+
/** Stable identifier for upsert/delete. If omitted one is generated. */
|
|
20
|
+
id?: string;
|
|
21
|
+
/** The text to embed and index. */
|
|
22
|
+
text: string;
|
|
23
|
+
/** Optional flat key-value metadata (e.g. { url, timestamp, category }). */
|
|
24
|
+
meta?: Record<string, string>;
|
|
25
|
+
}
|
|
26
|
+
/** Options shared by index/upsert operations. */
|
|
27
|
+
export interface IndexOptions {
|
|
28
|
+
/**
|
|
29
|
+
* Namespace tag — isolates this data from other indexes.
|
|
30
|
+
* Default: 'default'
|
|
31
|
+
* Use different namespaces for different data domains
|
|
32
|
+
* (e.g. 'user-prefs', 'chat-history', 'product-catalog').
|
|
33
|
+
*/
|
|
34
|
+
namespace?: string;
|
|
35
|
+
/**
|
|
36
|
+
* If true, the raw entry is persisted in IndexedDB so it survives
|
|
37
|
+
* tab reloads. Embeddings are recomputed on rehydration.
|
|
38
|
+
* Default: false
|
|
39
|
+
*/
|
|
40
|
+
persist?: boolean;
|
|
41
|
+
}
|
|
42
|
+
export type WorkerInMessage =
|
|
43
|
+
/** Legacy: index File[] from showDirectoryPicker() */
|
|
44
|
+
{
|
|
45
|
+
type: 'INDEX_FILES';
|
|
46
|
+
files: File[];
|
|
47
|
+
namespace?: string;
|
|
48
|
+
persist?: boolean;
|
|
49
|
+
}
|
|
50
|
+
/** Index a single string directly (no File required) */
|
|
51
|
+
| {
|
|
52
|
+
type: 'INDEX_TEXT';
|
|
53
|
+
id?: string;
|
|
54
|
+
source: string;
|
|
55
|
+
text: string;
|
|
56
|
+
namespace?: string;
|
|
57
|
+
persist?: boolean;
|
|
58
|
+
}
|
|
59
|
+
/** Index an array of structured entries in one batch */
|
|
60
|
+
| {
|
|
61
|
+
type: 'INDEX_ENTRIES';
|
|
62
|
+
entries: RAGEntry[];
|
|
63
|
+
namespace?: string;
|
|
64
|
+
persist?: boolean;
|
|
65
|
+
}
|
|
66
|
+
/** Replace an existing record by id (delete + re-insert) */
|
|
67
|
+
| {
|
|
68
|
+
type: 'UPSERT_ENTRY';
|
|
69
|
+
id: string;
|
|
70
|
+
text: string;
|
|
71
|
+
meta?: Record<string, string>;
|
|
72
|
+
namespace?: string;
|
|
73
|
+
persist?: boolean;
|
|
74
|
+
}
|
|
75
|
+
/** Remove a single record by id */
|
|
76
|
+
| {
|
|
77
|
+
type: 'DELETE_ENTRY';
|
|
78
|
+
id: string;
|
|
79
|
+
namespace?: string;
|
|
80
|
+
}
|
|
81
|
+
/** Clear an entire namespace (or all data if namespace omitted) */
|
|
82
|
+
| {
|
|
83
|
+
type: 'CLEAR_INDEX';
|
|
84
|
+
namespace?: string;
|
|
85
|
+
}
|
|
86
|
+
/** Semantic query with optional namespace scope */
|
|
87
|
+
| {
|
|
88
|
+
type: 'QUERY';
|
|
89
|
+
text: string;
|
|
90
|
+
topK: number;
|
|
91
|
+
namespace?: string;
|
|
92
|
+
};
|
|
93
|
+
export type WorkerOutMessage = {
|
|
94
|
+
type: 'INDEX_PROGRESS';
|
|
95
|
+
indexed: number;
|
|
96
|
+
total: number;
|
|
97
|
+
filename: string;
|
|
98
|
+
} | {
|
|
99
|
+
type: 'INDEX_DONE';
|
|
100
|
+
docCount: number;
|
|
101
|
+
elapsed: number;
|
|
102
|
+
namespace: string;
|
|
103
|
+
} | {
|
|
104
|
+
type: 'UPSERT_DONE';
|
|
105
|
+
id: string;
|
|
106
|
+
namespace: string;
|
|
107
|
+
} | {
|
|
108
|
+
type: 'DELETE_DONE';
|
|
109
|
+
id: string;
|
|
110
|
+
namespace: string;
|
|
111
|
+
} | {
|
|
112
|
+
type: 'CLEAR_DONE';
|
|
113
|
+
namespace: string;
|
|
114
|
+
} | {
|
|
115
|
+
type: 'QUERY_RESULT';
|
|
116
|
+
results: RAGResult[];
|
|
117
|
+
elapsed: number;
|
|
118
|
+
namespace: string;
|
|
119
|
+
} | {
|
|
120
|
+
type: 'STATUS';
|
|
121
|
+
message: string;
|
|
122
|
+
} | {
|
|
123
|
+
type: 'ERROR';
|
|
124
|
+
message: string;
|
|
125
|
+
};
|
|
126
|
+
export interface RAGResult {
|
|
127
|
+
id: string;
|
|
128
|
+
text: string;
|
|
129
|
+
source: string;
|
|
130
|
+
namespace: string;
|
|
131
|
+
chunkIdx: number;
|
|
132
|
+
score: number;
|
|
133
|
+
meta: Record<string, string>;
|
|
134
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import { ONNXEngine } from '../inference/onnx-engine';
|
|
2
|
+
export declare class Multiplexer {
|
|
3
|
+
private connections;
|
|
4
|
+
private requestQueue;
|
|
5
|
+
private isProcessing;
|
|
6
|
+
private engine;
|
|
7
|
+
private THERMAL_THROTTLE_THRESHOLD_MS;
|
|
8
|
+
constructor(engine: ONNXEngine);
|
|
9
|
+
addPort(port: MessagePort, id: string): void;
|
|
10
|
+
private handleMessage;
|
|
11
|
+
private processQueue;
|
|
12
|
+
private sendChunk;
|
|
13
|
+
private sendError;
|
|
14
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
export interface RPCMessage {
|
|
2
|
+
id: string;
|
|
3
|
+
type: string;
|
|
4
|
+
}
|
|
5
|
+
export interface InferenceRequest extends RPCMessage {
|
|
6
|
+
type: 'INFERENCE_REQUEST';
|
|
7
|
+
prompt: string;
|
|
8
|
+
stream: boolean;
|
|
9
|
+
modelId: string;
|
|
10
|
+
maxTokens: number;
|
|
11
|
+
}
|
|
12
|
+
export interface ConnectRequest extends RPCMessage {
|
|
13
|
+
type: 'CONNECT';
|
|
14
|
+
}
|
|
15
|
+
export interface VRAMStatusRequest extends RPCMessage {
|
|
16
|
+
type: 'VRAM_STATUS_REQ';
|
|
17
|
+
}
|
|
18
|
+
export interface VRAMStatusResponse extends RPCMessage {
|
|
19
|
+
type: 'VRAM_STATUS_RES';
|
|
20
|
+
usedMB: number;
|
|
21
|
+
limitMB: number;
|
|
22
|
+
ep: string;
|
|
23
|
+
}
|
|
24
|
+
export interface SystemStateResponse extends RPCMessage {
|
|
25
|
+
type: 'SYSTEM_STATE';
|
|
26
|
+
state: 'INITIALIZING' | 'DOWNLOADING' | 'READY' | 'DOWNLOADING_TARGET';
|
|
27
|
+
}
|
|
28
|
+
export interface ModelReadyResponse extends RPCMessage {
|
|
29
|
+
type: 'MODEL_READY';
|
|
30
|
+
capability: 'DRAFT' | 'TARGET' | 'FULL';
|
|
31
|
+
metadata?: any;
|
|
32
|
+
}
|
|
33
|
+
export interface ErrorResponse extends RPCMessage {
|
|
34
|
+
type: 'ERROR';
|
|
35
|
+
errorCode: 'RESOURCE_CONSTRAINED' | 'DEVICE_LOST' | 'TARGET_LOAD_FAILED' | string;
|
|
36
|
+
message: string;
|
|
37
|
+
}
|
|
38
|
+
export interface InferenceChunkResponse extends RPCMessage {
|
|
39
|
+
type: 'INFERENCE_CHUNK';
|
|
40
|
+
chunk: string;
|
|
41
|
+
mode: 'DRAFT' | 'SPECULATIVE';
|
|
42
|
+
}
|
|
43
|
+
export interface DownloadProgressResponse extends RPCMessage {
|
|
44
|
+
type: 'DOWNLOAD_PROGRESS';
|
|
45
|
+
progress: number;
|
|
46
|
+
}
|
|
47
|
+
export interface InferenceCompleteResponse extends RPCMessage {
|
|
48
|
+
type: 'INFERENCE_COMPLETE';
|
|
49
|
+
fullResponse: string;
|
|
50
|
+
usageStatistics: {
|
|
51
|
+
promptTokens: number;
|
|
52
|
+
completionTokens: number;
|
|
53
|
+
totalTokens: number;
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|