@runanywhere/web 0.1.0-beta.5 → 0.1.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Foundation/SherpaONNXBridge.d.ts +147 -0
- package/dist/Foundation/SherpaONNXBridge.d.ts.map +1 -0
- package/dist/Foundation/SherpaONNXBridge.js +345 -0
- package/dist/Foundation/SherpaONNXBridge.js.map +1 -0
- package/dist/Foundation/StructOffsets.d.ts +5 -33
- package/dist/Foundation/StructOffsets.d.ts.map +1 -1
- package/dist/Foundation/StructOffsets.js +94 -128
- package/dist/Foundation/StructOffsets.js.map +1 -1
- package/dist/Infrastructure/AudioCapture.d.ts +99 -0
- package/dist/Infrastructure/AudioCapture.d.ts.map +1 -0
- package/dist/Infrastructure/AudioCapture.js +264 -0
- package/dist/Infrastructure/AudioCapture.js.map +1 -0
- package/dist/Infrastructure/AudioPlayback.d.ts +53 -0
- package/dist/Infrastructure/AudioPlayback.d.ts.map +1 -0
- package/dist/Infrastructure/AudioPlayback.js +117 -0
- package/dist/Infrastructure/AudioPlayback.js.map +1 -0
- package/dist/Infrastructure/ModelDownloader.d.ts +5 -16
- package/dist/Infrastructure/ModelDownloader.d.ts.map +1 -1
- package/dist/Infrastructure/ModelDownloader.js +7 -54
- package/dist/Infrastructure/ModelDownloader.js.map +1 -1
- package/dist/Infrastructure/ModelLoaderTypes.d.ts +11 -76
- package/dist/Infrastructure/ModelLoaderTypes.d.ts.map +1 -1
- package/dist/Infrastructure/ModelLoaderTypes.js +1 -7
- package/dist/Infrastructure/ModelLoaderTypes.js.map +1 -1
- package/dist/Infrastructure/ModelManager.d.ts +104 -30
- package/dist/Infrastructure/ModelManager.d.ts.map +1 -1
- package/dist/Infrastructure/ModelManager.js +546 -209
- package/dist/Infrastructure/ModelManager.js.map +1 -1
- package/dist/Infrastructure/ModelRegistry.d.ts +8 -6
- package/dist/Infrastructure/ModelRegistry.d.ts.map +1 -1
- package/dist/Infrastructure/ModelRegistry.js +4 -11
- package/dist/Infrastructure/ModelRegistry.js.map +1 -1
- package/dist/Infrastructure/VLMWorkerBridge.d.ts +211 -0
- package/dist/Infrastructure/VLMWorkerBridge.d.ts.map +1 -0
- package/dist/Infrastructure/VLMWorkerBridge.js +264 -0
- package/dist/Infrastructure/VLMWorkerBridge.js.map +1 -0
- package/dist/Infrastructure/VLMWorkerRuntime.d.ts +38 -0
- package/dist/Infrastructure/VLMWorkerRuntime.d.ts.map +1 -0
- package/dist/Infrastructure/VLMWorkerRuntime.js +503 -0
- package/dist/Infrastructure/VLMWorkerRuntime.js.map +1 -0
- package/dist/Infrastructure/VideoCapture.d.ts +130 -0
- package/dist/Infrastructure/VideoCapture.d.ts.map +1 -0
- package/dist/Infrastructure/VideoCapture.js +236 -0
- package/dist/Infrastructure/VideoCapture.js.map +1 -0
- package/dist/Public/Extensions/DiffusionTypes.d.ts +64 -0
- package/dist/Public/Extensions/DiffusionTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/DiffusionTypes.js +28 -0
- package/dist/Public/Extensions/DiffusionTypes.js.map +1 -0
- package/dist/Public/Extensions/EmbeddingsTypes.d.ts +33 -0
- package/dist/Public/Extensions/EmbeddingsTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/EmbeddingsTypes.js +13 -0
- package/dist/Public/Extensions/EmbeddingsTypes.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+Diffusion.d.ts +44 -0
- package/dist/Public/Extensions/RunAnywhere+Diffusion.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+Diffusion.js +189 -0
- package/dist/Public/Extensions/RunAnywhere+Diffusion.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+Embeddings.d.ts +56 -0
- package/dist/Public/Extensions/RunAnywhere+Embeddings.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+Embeddings.js +240 -0
- package/dist/Public/Extensions/RunAnywhere+Embeddings.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+STT.d.ts +97 -0
- package/dist/Public/Extensions/RunAnywhere+STT.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+STT.js +417 -0
- package/dist/Public/Extensions/RunAnywhere+STT.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+StructuredOutput.d.ts +69 -0
- package/dist/Public/Extensions/RunAnywhere+StructuredOutput.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+StructuredOutput.js +196 -0
- package/dist/Public/Extensions/RunAnywhere+StructuredOutput.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+TTS.d.ts +55 -0
- package/dist/Public/Extensions/RunAnywhere+TTS.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+TTS.js +253 -0
- package/dist/Public/Extensions/RunAnywhere+TTS.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+TextGeneration.d.ts +80 -0
- package/dist/Public/Extensions/RunAnywhere+TextGeneration.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+TextGeneration.js +470 -0
- package/dist/Public/Extensions/RunAnywhere+TextGeneration.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+ToolCalling.d.ts +82 -0
- package/dist/Public/Extensions/RunAnywhere+ToolCalling.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+ToolCalling.js +576 -0
- package/dist/Public/Extensions/RunAnywhere+ToolCalling.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+VAD.d.ts +70 -0
- package/dist/Public/Extensions/RunAnywhere+VAD.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+VAD.js +231 -0
- package/dist/Public/Extensions/RunAnywhere+VAD.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+VLM.d.ts +58 -0
- package/dist/Public/Extensions/RunAnywhere+VLM.d.ts.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+VLM.js +262 -0
- package/dist/Public/Extensions/RunAnywhere+VLM.js.map +1 -0
- package/dist/Public/Extensions/RunAnywhere+VoicePipeline.d.ts +29 -3
- package/dist/Public/Extensions/RunAnywhere+VoicePipeline.d.ts.map +1 -1
- package/dist/Public/Extensions/RunAnywhere+VoicePipeline.js +42 -46
- package/dist/Public/Extensions/RunAnywhere+VoicePipeline.js.map +1 -1
- package/dist/Public/Extensions/STTTypes.d.ts +61 -0
- package/dist/Public/Extensions/STTTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/STTTypes.js +16 -0
- package/dist/Public/Extensions/STTTypes.js.map +1 -0
- package/dist/Public/Extensions/TTSTypes.d.ts +31 -0
- package/dist/Public/Extensions/TTSTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/TTSTypes.js +3 -0
- package/dist/Public/Extensions/TTSTypes.js.map +1 -0
- package/dist/Public/Extensions/ToolCallingTypes.d.ts +78 -0
- package/dist/Public/Extensions/ToolCallingTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/ToolCallingTypes.js +8 -0
- package/dist/Public/Extensions/ToolCallingTypes.js.map +1 -0
- package/dist/Public/Extensions/VADTypes.d.ts +30 -0
- package/dist/Public/Extensions/VADTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/VADTypes.js +8 -0
- package/dist/Public/Extensions/VADTypes.js.map +1 -0
- package/dist/Public/Extensions/VLMTypes.d.ts +56 -0
- package/dist/Public/Extensions/VLMTypes.d.ts.map +1 -0
- package/dist/Public/Extensions/VLMTypes.js +24 -0
- package/dist/Public/Extensions/VLMTypes.js.map +1 -0
- package/dist/Public/Extensions/VoicePipelineTypes.d.ts +37 -28
- package/dist/Public/Extensions/VoicePipelineTypes.d.ts.map +1 -1
- package/dist/Public/Extensions/VoicePipelineTypes.js +1 -4
- package/dist/Public/Extensions/VoicePipelineTypes.js.map +1 -1
- package/dist/Public/RunAnywhere.d.ts +61 -65
- package/dist/Public/RunAnywhere.d.ts.map +1 -1
- package/dist/Public/RunAnywhere.js +141 -193
- package/dist/Public/RunAnywhere.js.map +1 -1
- package/dist/index.d.ts +39 -23
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +31 -19
- package/dist/index.js.map +1 -1
- package/dist/types/LLMTypes.d.ts +48 -0
- package/dist/types/LLMTypes.d.ts.map +1 -0
- package/dist/types/LLMTypes.js +8 -0
- package/dist/types/LLMTypes.js.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +0 -1
- package/dist/types/index.js.map +1 -1
- package/dist/workers/vlm-worker.d.ts +9 -0
- package/dist/workers/vlm-worker.d.ts.map +1 -0
- package/dist/workers/vlm-worker.js +10 -0
- package/dist/workers/vlm-worker.js.map +1 -0
- package/package.json +5 -1
- package/wasm/racommons-webgpu.js +156 -0
- package/wasm/racommons-webgpu.wasm +0 -0
- package/wasm/racommons.js +91 -91
- package/wasm/racommons.wasm +0 -0
- package/wasm/sherpa/sherpa-onnx-asr.js +1538 -0
- package/wasm/sherpa/sherpa-onnx-glue-original.js +19 -0
- package/wasm/sherpa/sherpa-onnx-glue.js +17 -0
- package/wasm/sherpa/sherpa-onnx-tts.js +657 -0
- package/wasm/sherpa/sherpa-onnx-vad.js +337 -0
- package/wasm/sherpa/sherpa-onnx-wave.js +88 -0
- package/wasm/sherpa/sherpa-onnx.wasm +0 -0
- package/dist/Infrastructure/ExtensionPoint.d.ts +0 -78
- package/dist/Infrastructure/ExtensionPoint.d.ts.map +0 -1
- package/dist/Infrastructure/ExtensionPoint.js +0 -118
- package/dist/Infrastructure/ExtensionPoint.js.map +0 -1
- package/dist/Infrastructure/LocalFileStorage.d.ts +0 -116
- package/dist/Infrastructure/LocalFileStorage.d.ts.map +0 -1
- package/dist/Infrastructure/LocalFileStorage.js +0 -368
- package/dist/Infrastructure/LocalFileStorage.js.map +0 -1
- package/dist/Infrastructure/ModelFileInference.d.ts +0 -39
- package/dist/Infrastructure/ModelFileInference.d.ts.map +0 -1
- package/dist/Infrastructure/ModelFileInference.js +0 -110
- package/dist/Infrastructure/ModelFileInference.js.map +0 -1
|
@@ -4,18 +4,21 @@
|
|
|
4
4
|
* Composes ModelRegistry (catalog) + ModelDownloader (downloads) and adds
|
|
5
5
|
* model-loading orchestration (STT / TTS / LLM / VLM routing).
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
7
|
+
* The public API is unchanged — `ModelManager` is still a singleton that
|
|
8
|
+
* exposes `registerModels()`, `downloadModel()`, `loadModel()`, `onChange()`, etc.
|
|
9
|
+
* Internally it delegates catalog operations to the Registry and download
|
|
10
|
+
* operations to the Downloader.
|
|
10
11
|
*/
|
|
11
12
|
import { WASMBridge } from '../Foundation/WASMBridge';
|
|
13
|
+
import { SherpaONNXBridge } from '../Foundation/SherpaONNXBridge';
|
|
12
14
|
import { EventBus } from '../Foundation/EventBus';
|
|
13
15
|
import { SDKLogger } from '../Foundation/SDKLogger';
|
|
16
|
+
import { STTModelType } from '../Public/Extensions/STTTypes';
|
|
14
17
|
import { ModelCategory, LLMFramework, ModelStatus, DownloadStage, SDKEventType } from '../types/enums';
|
|
15
18
|
import { OPFSStorage } from './OPFSStorage';
|
|
16
19
|
import { ModelRegistry } from './ModelRegistry';
|
|
17
20
|
import { ModelDownloader } from './ModelDownloader';
|
|
18
|
-
import {
|
|
21
|
+
import { extractTarGz } from './ArchiveUtility';
|
|
19
22
|
// Re-export types so existing imports from './Infrastructure/ModelManager' still work
|
|
20
23
|
export { ModelCategory, LLMFramework, ModelStatus, DownloadStage };
|
|
21
24
|
// ---------------------------------------------------------------------------
|
|
@@ -35,38 +38,48 @@ class ModelManagerImpl {
|
|
|
35
38
|
metadata = {};
|
|
36
39
|
/** Pluggable VLM loader (set by the app via setVLMLoader) */
|
|
37
40
|
vlmLoader = null;
|
|
38
|
-
/** Pluggable model loaders — registered by
|
|
41
|
+
/** Pluggable model loaders — registered by the Public layer during init */
|
|
39
42
|
llmLoader = null;
|
|
40
43
|
sttLoader = null;
|
|
41
44
|
ttsLoader = null;
|
|
42
45
|
vadLoader = null;
|
|
43
46
|
constructor() {
|
|
44
47
|
this.downloader = new ModelDownloader(this.registry, this.storage);
|
|
48
|
+
// Initialize OPFS storage (non-blocking)
|
|
45
49
|
this.initStorage();
|
|
50
|
+
// Request persistent storage so browser won't evict our cached models
|
|
46
51
|
this.requestPersistentStorage();
|
|
47
52
|
}
|
|
48
53
|
async initStorage() {
|
|
49
54
|
await this.storage.initialize();
|
|
50
55
|
}
|
|
51
|
-
// --- Registration API ---
|
|
56
|
+
// --- Registration API (called by the app) ---
|
|
57
|
+
/**
|
|
58
|
+
* Register a catalog of models. Resolves compact definitions into full
|
|
59
|
+
* ManagedModel entries and checks OPFS for previously downloaded files.
|
|
60
|
+
*/
|
|
52
61
|
registerModels(models) {
|
|
53
62
|
this.registry.registerModels(models);
|
|
63
|
+
// Check OPFS for previously downloaded models (async, updates status when done)
|
|
54
64
|
this.refreshDownloadStatus();
|
|
55
65
|
}
|
|
66
|
+
/**
|
|
67
|
+
* Set the VLM loader implementation. Called by the app to plug in
|
|
68
|
+
* worker-based VLM loading (the SDK doesn't create Web Workers directly).
|
|
69
|
+
*/
|
|
56
70
|
setVLMLoader(loader) {
|
|
57
71
|
this.vlmLoader = loader;
|
|
58
72
|
}
|
|
73
|
+
/** Register the LLM model loader (text generation extension). */
|
|
59
74
|
setLLMLoader(loader) { this.llmLoader = loader; }
|
|
75
|
+
/** Register the STT model loader (speech-to-text extension). */
|
|
60
76
|
setSTTLoader(loader) { this.sttLoader = loader; }
|
|
77
|
+
/** Register the TTS model loader (text-to-speech extension). */
|
|
61
78
|
setTTSLoader(loader) { this.ttsLoader = loader; }
|
|
79
|
+
/** Register the VAD model loader (voice activity detection extension). */
|
|
62
80
|
setVADLoader(loader) { this.vadLoader = loader; }
|
|
63
|
-
/** Expose the downloader for backend packages that need file operations. */
|
|
64
|
-
getDownloader() { return this.downloader; }
|
|
65
|
-
/** Set the local file storage backend for persistent model storage. */
|
|
66
|
-
setLocalFileStorage(storage) {
|
|
67
|
-
this.downloader.setLocalFileStorage(storage);
|
|
68
|
-
}
|
|
69
81
|
// --- Internal init ---
|
|
82
|
+
/** Request persistent storage to prevent browser from evicting cached models */
|
|
70
83
|
async requestPersistentStorage() {
|
|
71
84
|
try {
|
|
72
85
|
if (navigator.storage?.persist) {
|
|
@@ -75,18 +88,24 @@ class ModelManagerImpl {
|
|
|
75
88
|
logger.info('Persistent storage: granted');
|
|
76
89
|
}
|
|
77
90
|
else {
|
|
91
|
+
// Expected on first visit — browsers require engagement signals
|
|
92
|
+
// (bookmark, PWA install, etc.) before granting persistence.
|
|
78
93
|
logger.debug('Persistent storage: denied (expected on first visit)');
|
|
79
94
|
}
|
|
80
95
|
}
|
|
81
96
|
}
|
|
82
97
|
catch {
|
|
83
|
-
// Not supported or denied
|
|
98
|
+
// Not supported or denied — non-critical
|
|
84
99
|
}
|
|
85
100
|
}
|
|
101
|
+
/**
|
|
102
|
+
* Check OPFS for models that were downloaded in a previous session.
|
|
103
|
+
* Updates their status from 'registered' to 'downloaded'.
|
|
104
|
+
* Also loads persisted LRU metadata for each model.
|
|
105
|
+
* Only checks file existence + size — does NOT read file contents into memory.
|
|
106
|
+
*/
|
|
86
107
|
async refreshDownloadStatus() {
|
|
87
|
-
//
|
|
88
|
-
// initStorage() is idempotent — returns immediately if already done.
|
|
89
|
-
await this.storage.initialize();
|
|
108
|
+
// Load persisted metadata (lastUsedAt timestamps)
|
|
90
109
|
this.metadata = await this.storage.loadMetadata();
|
|
91
110
|
for (const model of this.registry.getModels()) {
|
|
92
111
|
if (model.status !== ModelStatus.Registered)
|
|
@@ -95,6 +114,7 @@ class ModelManagerImpl {
|
|
|
95
114
|
const size = await this.downloader.getOPFSFileSize(model.id);
|
|
96
115
|
if (size !== null && size > 0) {
|
|
97
116
|
this.registry.updateModel(model.id, { status: ModelStatus.Downloaded, sizeBytes: size });
|
|
117
|
+
// Ensure metadata entry exists — use persisted value or fall back to OPFS lastModified
|
|
98
118
|
if (!this.metadata[model.id]) {
|
|
99
119
|
const stored = await this.storage.listModels();
|
|
100
120
|
const entry = stored.find((s) => s.id === model.id);
|
|
@@ -109,17 +129,34 @@ class ModelManagerImpl {
|
|
|
109
129
|
// Not in OPFS, keep as registered
|
|
110
130
|
}
|
|
111
131
|
}
|
|
132
|
+
// Persist any newly created metadata entries
|
|
112
133
|
await this.storage.saveMetadata(this.metadata);
|
|
113
134
|
}
|
|
114
|
-
// --- Queries ---
|
|
115
|
-
getModels() {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
135
|
+
// --- Queries (delegated to registry) ---
|
|
136
|
+
getModels() {
|
|
137
|
+
return this.registry.getModels();
|
|
138
|
+
}
|
|
139
|
+
getModelsByCategory(category) {
|
|
140
|
+
return this.registry.getModelsByCategory(category);
|
|
141
|
+
}
|
|
142
|
+
getModelsByFramework(framework) {
|
|
143
|
+
return this.registry.getModelsByFramework(framework);
|
|
144
|
+
}
|
|
145
|
+
getLLMModels() {
|
|
146
|
+
return this.registry.getLLMModels();
|
|
147
|
+
}
|
|
148
|
+
getVLMModels() {
|
|
149
|
+
return this.registry.getVLMModels();
|
|
150
|
+
}
|
|
151
|
+
getSTTModels() {
|
|
152
|
+
return this.registry.getSTTModels();
|
|
153
|
+
}
|
|
154
|
+
getTTSModels() {
|
|
155
|
+
return this.registry.getTTSModels();
|
|
156
|
+
}
|
|
157
|
+
getVADModels() {
|
|
158
|
+
return this.registry.getVADModels();
|
|
159
|
+
}
|
|
123
160
|
getLoadedModel(category) {
|
|
124
161
|
if (category) {
|
|
125
162
|
const id = this.loadedByCategory.get(category);
|
|
@@ -131,84 +168,69 @@ class ModelManagerImpl {
|
|
|
131
168
|
if (category) {
|
|
132
169
|
return this.loadedByCategory.get(category) ?? null;
|
|
133
170
|
}
|
|
171
|
+
// Legacy: return first loaded model id
|
|
134
172
|
return this.registry.getModels().find((m) => m.status === ModelStatus.Loaded)?.id ?? null;
|
|
135
173
|
}
|
|
174
|
+
/** Check if models for all given categories are loaded */
|
|
136
175
|
areAllLoaded(categories) {
|
|
137
176
|
return categories.every((c) => this.loadedByCategory.has(c));
|
|
138
177
|
}
|
|
178
|
+
/**
|
|
179
|
+
* Ensure a model is loaded for the given category.
|
|
180
|
+
* If already loaded, returns the loaded model. If a downloaded model exists,
|
|
181
|
+
* loads it automatically. Returns null if no suitable model is available.
|
|
182
|
+
*
|
|
183
|
+
* @param options.coexist Forwarded to `loadModel()`. When true, only swaps
|
|
184
|
+
* models of the same category instead of unloading everything.
|
|
185
|
+
*/
|
|
139
186
|
async ensureLoaded(category, options) {
|
|
187
|
+
// Check if already loaded
|
|
140
188
|
const loaded = this.getLoadedModel(category);
|
|
141
189
|
if (loaded)
|
|
142
190
|
return loaded;
|
|
191
|
+
// Find a downloaded model for this category
|
|
143
192
|
const models = this.getModels();
|
|
144
193
|
const downloaded = models.find(m => m.modality === category && m.status === ModelStatus.Downloaded);
|
|
145
194
|
if (!downloaded)
|
|
146
195
|
return null;
|
|
196
|
+
// Load it
|
|
147
197
|
await this.loadModel(downloaded.id, options);
|
|
148
198
|
return this.getLoadedModel(category);
|
|
149
199
|
}
|
|
150
|
-
// --- Download ---
|
|
200
|
+
// --- Download (delegated to downloader) ---
|
|
201
|
+
/**
|
|
202
|
+
* Check whether downloading a model will fit in OPFS without eviction.
|
|
203
|
+
* Returns a result indicating whether it fits and which models could be
|
|
204
|
+
* evicted if not. Does NOT perform any mutations.
|
|
205
|
+
*/
|
|
151
206
|
async checkDownloadFit(modelId) {
|
|
152
207
|
const model = this.registry.getModel(modelId);
|
|
153
208
|
if (!model)
|
|
154
209
|
return { fits: true, availableBytes: 0, neededBytes: 0, evictionCandidates: [] };
|
|
210
|
+
// Find the currently loaded model for the same category (excluded from eviction)
|
|
155
211
|
const loadedId = this.loadedByCategory.get(model.modality ?? ModelCategory.Language);
|
|
156
212
|
return this.downloader.checkStorageQuota(model, this.metadata, loadedId ?? undefined);
|
|
157
213
|
}
|
|
158
214
|
async downloadModel(modelId) {
|
|
159
215
|
return this.downloader.downloadModel(modelId);
|
|
160
216
|
}
|
|
161
|
-
// --- Model
|
|
217
|
+
// --- Model loading orchestration ---
|
|
162
218
|
/**
|
|
163
|
-
*
|
|
164
|
-
* Stores the file in the active storage backend and registers it as downloaded.
|
|
165
|
-
* If the model isn't already in the catalog, auto-registers it based on filename.
|
|
219
|
+
* Load a model by ID.
|
|
166
220
|
*
|
|
167
|
-
* @param
|
|
168
|
-
*
|
|
169
|
-
*
|
|
221
|
+
* @param options.coexist When `true`, only unload the model of the **same
|
|
222
|
+
* category** (swap) rather than unloading ALL loaded models. Use this for
|
|
223
|
+
* multi-model pipelines like Voice (STT + LLM + TTS).
|
|
224
|
+
* Default is `false` — unloads everything to reclaim memory.
|
|
170
225
|
*/
|
|
171
|
-
async importModel(file, modelId) {
|
|
172
|
-
const id = modelId ?? sanitizeId(file.name.replace(/\.[^.]+$/, ''));
|
|
173
|
-
// Auto-register if not in the catalog
|
|
174
|
-
if (!this.registry.getModel(id)) {
|
|
175
|
-
const meta = inferModelFromFilename(file.name);
|
|
176
|
-
this.registry.addModel({
|
|
177
|
-
id: meta.id,
|
|
178
|
-
name: meta.name,
|
|
179
|
-
url: '',
|
|
180
|
-
modality: meta.category,
|
|
181
|
-
framework: meta.framework,
|
|
182
|
-
status: ModelStatus.Registered,
|
|
183
|
-
});
|
|
184
|
-
// Use the inferred ID if different
|
|
185
|
-
if (meta.id !== id) {
|
|
186
|
-
return this.importModel(file, meta.id);
|
|
187
|
-
}
|
|
188
|
-
}
|
|
189
|
-
logger.info(`Importing model from file: ${file.name} (${(file.size / 1024 / 1024).toFixed(1)} MB) -> ${id}`);
|
|
190
|
-
const data = new Uint8Array(await file.arrayBuffer());
|
|
191
|
-
await this.downloader.storeInOPFS(id, data);
|
|
192
|
-
this.registry.updateModel(id, {
|
|
193
|
-
status: ModelStatus.Downloaded,
|
|
194
|
-
sizeBytes: data.length,
|
|
195
|
-
});
|
|
196
|
-
this.touchLastUsed(id, data.length);
|
|
197
|
-
EventBus.shared.emit('model.imported', SDKEventType.Model, {
|
|
198
|
-
modelId: id,
|
|
199
|
-
filename: file.name,
|
|
200
|
-
sizeBytes: data.length,
|
|
201
|
-
});
|
|
202
|
-
logger.info(`Model imported: ${id} (${(data.length / 1024 / 1024).toFixed(1)} MB)`);
|
|
203
|
-
return id;
|
|
204
|
-
}
|
|
205
|
-
// --- Model loading orchestration ---
|
|
206
226
|
async loadModel(modelId, options) {
|
|
207
227
|
const model = this.registry.getModel(modelId);
|
|
208
228
|
if (!model || (model.status !== ModelStatus.Downloaded && model.status !== ModelStatus.Registered))
|
|
209
229
|
return false;
|
|
210
230
|
const category = model.modality ?? ModelCategory.Language;
|
|
211
231
|
if (options?.coexist) {
|
|
232
|
+
// Pipeline mode: only unload models of the SAME category (swap).
|
|
233
|
+
// Other categories remain loaded for multi-model workflows.
|
|
212
234
|
const currentId = this.loadedByCategory.get(category);
|
|
213
235
|
if (currentId && currentId !== modelId) {
|
|
214
236
|
logger.info(`Swapping ${category} model: ${currentId} → ${modelId}`);
|
|
@@ -216,41 +238,61 @@ class ModelManagerImpl {
|
|
|
216
238
|
}
|
|
217
239
|
}
|
|
218
240
|
else {
|
|
241
|
+
// Default: Unload ALL currently loaded models before loading the new one.
|
|
242
|
+
//
|
|
243
|
+
// In a browser environment, memory is limited (WASM linear memory +
|
|
244
|
+
// WebGPU buffers). The user interacts with one feature at a time
|
|
245
|
+
// (chat, vision, transcribe, etc.), so there's no need to keep models
|
|
246
|
+
// from other categories resident.
|
|
219
247
|
await this.unloadAll(modelId);
|
|
220
248
|
}
|
|
221
249
|
this.registry.updateModel(modelId, { status: ModelStatus.Loading });
|
|
222
250
|
EventBus.shared.emit('model.loadStarted', SDKEventType.Model, { modelId, category });
|
|
223
251
|
try {
|
|
224
252
|
if (model.modality === ModelCategory.Multimodal) {
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
const
|
|
229
|
-
if (!
|
|
253
|
+
// VLM: Worker reads from OPFS directly when possible.
|
|
254
|
+
// When OPFS quota is exceeded, models live only in the main-thread
|
|
255
|
+
// memory cache — we must read and transfer them to the Worker.
|
|
256
|
+
const exists = await this.downloader.existsInOPFS(modelId);
|
|
257
|
+
if (!exists) {
|
|
230
258
|
throw new Error('Model not downloaded — please download the model first.');
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
259
|
+
}
|
|
260
|
+
const inActualOPFS = await this.downloader.existsInActualOPFS(modelId);
|
|
261
|
+
if (inActualOPFS) {
|
|
262
|
+
// Worker can read from OPFS directly (optimal: avoids main-thread copy)
|
|
263
|
+
await this.loadLLMModel(model, modelId, new Uint8Array(0));
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Model is in memory cache only (OPFS quota exceeded) — read and transfer to Worker
|
|
267
|
+
logger.debug(`VLM model ${modelId} not in OPFS, reading from memory cache to transfer to Worker`);
|
|
268
|
+
const data = await this.downloader.loadFromOPFS(modelId);
|
|
269
|
+
if (!data)
|
|
270
|
+
throw new Error('Model not downloaded — please download the model first.');
|
|
271
|
+
await this.loadLLMModel(model, modelId, data);
|
|
272
|
+
}
|
|
244
273
|
}
|
|
245
274
|
else {
|
|
246
275
|
const data = await this.downloader.loadFromOPFS(modelId);
|
|
247
|
-
if (!data)
|
|
276
|
+
if (!data) {
|
|
248
277
|
throw new Error('Model not downloaded — please download the model first.');
|
|
249
|
-
|
|
278
|
+
}
|
|
279
|
+
if (model.modality === ModelCategory.SpeechRecognition) {
|
|
280
|
+
await this.loadSTTModel(model, data);
|
|
281
|
+
}
|
|
282
|
+
else if (model.modality === ModelCategory.SpeechSynthesis) {
|
|
283
|
+
await this.loadTTSModel(model, data);
|
|
284
|
+
}
|
|
285
|
+
else if (model.modality === ModelCategory.Audio) {
|
|
286
|
+
await this.loadVADModel(model, data);
|
|
287
|
+
}
|
|
288
|
+
else {
|
|
289
|
+
await this.loadLLMModel(model, modelId, data);
|
|
290
|
+
}
|
|
250
291
|
}
|
|
251
292
|
this.loadedByCategory.set(category, modelId);
|
|
252
293
|
this.registry.updateModel(modelId, { status: ModelStatus.Loaded });
|
|
253
294
|
EventBus.shared.emit('model.loadCompleted', SDKEventType.Model, { modelId, category });
|
|
295
|
+
// Update LRU metadata
|
|
254
296
|
this.touchLastUsed(modelId, model.sizeBytes ?? 0);
|
|
255
297
|
return true;
|
|
256
298
|
}
|
|
@@ -271,7 +313,17 @@ class ModelManagerImpl {
|
|
|
271
313
|
const category = model.modality ?? ModelCategory.Language;
|
|
272
314
|
await this.unloadModelByCategory(category);
|
|
273
315
|
}
|
|
316
|
+
/**
|
|
317
|
+
* Unload ALL currently loaded models.
|
|
318
|
+
*
|
|
319
|
+
* Called automatically before loading a new model, and can also be called
|
|
320
|
+
* explicitly by app code (e.g. on tab switch) to release all resources.
|
|
321
|
+
*
|
|
322
|
+
* @param exceptModelId - Optional model ID to skip (the model about to be loaded).
|
|
323
|
+
* Avoids redundant unload+reload of the same model.
|
|
324
|
+
*/
|
|
274
325
|
async unloadAll(exceptModelId) {
|
|
326
|
+
// Snapshot categories to avoid mutation during iteration
|
|
275
327
|
const loaded = [...this.loadedByCategory.entries()];
|
|
276
328
|
if (loaded.length === 0)
|
|
277
329
|
return;
|
|
@@ -283,13 +335,16 @@ class ModelManagerImpl {
|
|
|
283
335
|
}
|
|
284
336
|
}
|
|
285
337
|
async deleteModel(modelId) {
|
|
338
|
+
// Remove from loaded tracking if this model is loaded
|
|
286
339
|
for (const [category, id] of this.loadedByCategory) {
|
|
287
340
|
if (id === modelId) {
|
|
288
341
|
this.loadedByCategory.delete(category);
|
|
289
342
|
break;
|
|
290
343
|
}
|
|
291
344
|
}
|
|
345
|
+
// Delete primary file
|
|
292
346
|
await this.downloader.deleteFromOPFS(modelId);
|
|
347
|
+
// Delete additional files
|
|
293
348
|
const model = this.registry.getModel(modelId);
|
|
294
349
|
if (model?.additionalFiles) {
|
|
295
350
|
for (const file of model.additionalFiles) {
|
|
@@ -299,6 +354,7 @@ class ModelManagerImpl {
|
|
|
299
354
|
this.registry.updateModel(modelId, { status: ModelStatus.Registered, downloadProgress: undefined, sizeBytes: undefined });
|
|
300
355
|
this.removeMetadata(modelId);
|
|
301
356
|
}
|
|
357
|
+
/** Clear all models from OPFS and reset registry statuses. */
|
|
302
358
|
async clearAll() {
|
|
303
359
|
await this.storage.clearAll();
|
|
304
360
|
this.metadata = {};
|
|
@@ -342,18 +398,22 @@ class ModelManagerImpl {
|
|
|
342
398
|
return { modelCount, totalSize, available };
|
|
343
399
|
}
|
|
344
400
|
// --- LRU Metadata ---
|
|
401
|
+
/** Get the last-used timestamp for a model (0 if never recorded). */
|
|
345
402
|
getModelLastUsedAt(modelId) {
|
|
346
403
|
return this.metadata[modelId]?.lastUsedAt ?? 0;
|
|
347
404
|
}
|
|
405
|
+
/** Update lastUsedAt for a model and persist to OPFS (fire-and-forget). */
|
|
348
406
|
touchLastUsed(modelId, sizeBytes) {
|
|
349
407
|
this.metadata[modelId] = { lastUsedAt: Date.now(), sizeBytes };
|
|
408
|
+
// Persist asynchronously — don't block the caller
|
|
350
409
|
this.storage.saveMetadata(this.metadata).catch(() => { });
|
|
351
410
|
}
|
|
411
|
+
/** Remove metadata entry when a model is deleted. */
|
|
352
412
|
removeMetadata(modelId) {
|
|
353
413
|
delete this.metadata[modelId];
|
|
354
414
|
this.storage.saveMetadata(this.metadata).catch(() => { });
|
|
355
415
|
}
|
|
356
|
-
// --- Subscriptions ---
|
|
416
|
+
// --- Subscriptions (delegated to registry) ---
|
|
357
417
|
onChange(callback) {
|
|
358
418
|
return this.registry.onChange(callback);
|
|
359
419
|
}
|
|
@@ -361,148 +421,423 @@ class ModelManagerImpl {
|
|
|
361
421
|
// Private — model loading by modality
|
|
362
422
|
// ---------------------------------------------------------------------------
|
|
363
423
|
/**
|
|
364
|
-
*
|
|
365
|
-
*/
|
|
366
|
-
buildLoadContext(model, data) {
|
|
367
|
-
return {
|
|
368
|
-
model,
|
|
369
|
-
data,
|
|
370
|
-
downloadFile: (url) => this.downloader.downloadFile(url),
|
|
371
|
-
loadFile: (fileKey) => this.downloader.loadFromOPFS(fileKey),
|
|
372
|
-
storeFile: (fileKey, fileData) => this.downloader.storeInOPFS(fileKey, fileData),
|
|
373
|
-
additionalFileKey: (modelId, filename) => this.downloader.additionalFileKey(modelId, filename),
|
|
374
|
-
};
|
|
375
|
-
}
|
|
376
|
-
/**
|
|
377
|
-
* Load an LLM model into the RACommons Emscripten FS.
|
|
378
|
-
* This logic stays in core because WASMBridge is in core.
|
|
424
|
+
* Load an LLM/VLM model into the RACommons Emscripten FS.
|
|
379
425
|
*/
|
|
380
426
|
async loadLLMModel(model, modelId, data) {
|
|
381
427
|
const fsDir = `/models`;
|
|
382
428
|
const fsPath = `${fsDir}/${modelId}.gguf`;
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
if (!bridge.isLoaded) {
|
|
386
|
-
throw new Error('WASM module not loaded — SDK not initialized.');
|
|
429
|
+
if (model.modality === ModelCategory.Multimodal) {
|
|
430
|
+
// VLM models are loaded in a dedicated Web Worker that reads from OPFS.
|
|
387
431
|
}
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
432
|
+
else {
|
|
433
|
+
// Text-only LLM: write to main-thread Emscripten FS as before
|
|
434
|
+
const bridge = WASMBridge.shared;
|
|
435
|
+
if (!bridge.isLoaded) {
|
|
436
|
+
throw new Error('WASM module not loaded — SDK not initialized.');
|
|
437
|
+
}
|
|
438
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
439
|
+
const m = bridge.module;
|
|
440
|
+
if (typeof m.FS_createPath !== 'function' || typeof m.FS_createDataFile !== 'function') {
|
|
441
|
+
throw new Error('Emscripten FS helper functions not available on WASM module.');
|
|
442
|
+
}
|
|
443
|
+
m.FS_createPath('/', 'models', true, true);
|
|
444
|
+
try {
|
|
445
|
+
m.FS_unlink(fsPath);
|
|
446
|
+
}
|
|
447
|
+
catch { /* File doesn't exist yet */ }
|
|
448
|
+
logger.debug(`Writing ${data.length} bytes to ${fsPath}`);
|
|
449
|
+
m.FS_createDataFile('/models', `${modelId}.gguf`, data, true, true, true);
|
|
450
|
+
logger.debug(`Model file written to ${fsPath}`);
|
|
392
451
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
452
|
+
if (model.modality === ModelCategory.Multimodal) {
|
|
453
|
+
const mmprojFile = model.additionalFiles?.find((f) => f.filename.includes('mmproj'));
|
|
454
|
+
if (!mmprojFile) {
|
|
455
|
+
logger.warning(`No mmproj found, loading as text-only LLM: ${modelId}`);
|
|
456
|
+
if (!this.llmLoader)
|
|
457
|
+
throw new Error('No LLM loader registered. Call ModelManager.setLLMLoader() first.');
|
|
458
|
+
await this.llmLoader.loadModel(fsPath, modelId, model.name);
|
|
459
|
+
}
|
|
460
|
+
else {
|
|
461
|
+
// Ensure mmproj is in OPFS or memory cache (fallback download if missing)
|
|
462
|
+
const mmprojKey = this.downloader.additionalFileKey(modelId, mmprojFile.filename);
|
|
463
|
+
const mmprojExists = await this.downloader.existsInOPFS(mmprojKey);
|
|
464
|
+
if (!mmprojExists && mmprojFile.url) {
|
|
465
|
+
logger.debug(`mmproj not in OPFS, downloading on-demand: ${mmprojFile.filename}`);
|
|
466
|
+
const mmprojDownload = await this.downloader.downloadFile(mmprojFile.url);
|
|
467
|
+
await this.downloader.storeInOPFS(mmprojKey, mmprojDownload);
|
|
468
|
+
}
|
|
469
|
+
if (!this.vlmLoader) {
|
|
470
|
+
throw new Error('No VLM loader registered. Call ModelManager.setVLMLoader() first.');
|
|
471
|
+
}
|
|
472
|
+
// Initialize the Worker (loads its own WASM instance)
|
|
473
|
+
if (!this.vlmLoader.isInitialized) {
|
|
474
|
+
logger.info('Initializing VLM loader...');
|
|
475
|
+
await this.vlmLoader.init();
|
|
476
|
+
}
|
|
477
|
+
// When model/mmproj are only in memory cache (OPFS quota exceeded),
|
|
478
|
+
// we need to read and transfer the data to the Worker.
|
|
479
|
+
let modelDataBuf;
|
|
480
|
+
let mmprojDataBuf;
|
|
481
|
+
const modelInOPFS = await this.downloader.existsInActualOPFS(modelId);
|
|
482
|
+
if (!modelInOPFS && data.length > 0) {
|
|
483
|
+
// data was already read from memory cache in the caller
|
|
484
|
+
modelDataBuf = new ArrayBuffer(data.byteLength);
|
|
485
|
+
new Uint8Array(modelDataBuf).set(data);
|
|
486
|
+
logger.debug(`Transferring model data to VLM Worker (${(data.length / 1024 / 1024).toFixed(1)} MB)`);
|
|
487
|
+
}
|
|
488
|
+
const mmprojInOPFS = await this.downloader.existsInActualOPFS(mmprojKey);
|
|
489
|
+
if (!mmprojInOPFS) {
|
|
490
|
+
const mmprojBytes = await this.downloader.loadFromOPFS(mmprojKey);
|
|
491
|
+
if (mmprojBytes) {
|
|
492
|
+
mmprojDataBuf = new ArrayBuffer(mmprojBytes.byteLength);
|
|
493
|
+
new Uint8Array(mmprojDataBuf).set(mmprojBytes);
|
|
494
|
+
logger.debug(`Transferring mmproj data to VLM Worker (${(mmprojBytes.length / 1024 / 1024).toFixed(1)} MB)`);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
// Load model via the pluggable VLM loader
|
|
498
|
+
logger.info(`Loading VLM model: ${modelId}`);
|
|
499
|
+
await this.vlmLoader.loadModel({
|
|
500
|
+
modelOpfsKey: modelId,
|
|
501
|
+
modelFilename: `${modelId}.gguf`,
|
|
502
|
+
mmprojOpfsKey: mmprojKey,
|
|
503
|
+
mmprojFilename: mmprojFile.filename,
|
|
504
|
+
modelId,
|
|
505
|
+
modelName: model.name,
|
|
506
|
+
modelData: modelDataBuf,
|
|
507
|
+
mmprojData: mmprojDataBuf,
|
|
508
|
+
});
|
|
509
|
+
logger.info(`VLM model loaded: ${modelId}`);
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
else if (model.modality === ModelCategory.Language) {
|
|
513
|
+
if (!this.llmLoader)
|
|
514
|
+
throw new Error('No LLM loader registered. Call ModelManager.setLLMLoader() first.');
|
|
515
|
+
await this.llmLoader.loadModel(fsPath, modelId, model.name);
|
|
516
|
+
logger.info(`LLM model loaded via TextGeneration: ${modelId}`);
|
|
396
517
|
}
|
|
397
|
-
catch { /* File doesn't exist yet */ }
|
|
398
|
-
logger.debug(`Writing ${data.length} bytes to ${fsPath}`);
|
|
399
|
-
m.FS_createDataFile('/models', `${modelId}.gguf`, data, true, true, true);
|
|
400
|
-
logger.debug(`Model file written to ${fsPath}`);
|
|
401
|
-
if (!this.llmLoader)
|
|
402
|
-
throw new Error('No LLM loader registered. Register the @runanywhere/web-llamacpp package.');
|
|
403
|
-
await this.llmLoader.loadModel(fsPath, modelId, model.name);
|
|
404
|
-
logger.info(`LLM model loaded: ${modelId}`);
|
|
405
518
|
}
|
|
406
519
|
/**
|
|
407
|
-
* Load
|
|
520
|
+
* Load an STT model into sherpa-onnx.
|
|
521
|
+
*
|
|
522
|
+
* Supports two modes:
|
|
523
|
+
* 1. **Archive** (isArchive=true): Download is a .tar.gz that bundles encoder,
|
|
524
|
+
* decoder, tokens, etc. Matches the Swift SDK approach.
|
|
525
|
+
* 2. **Individual files**: Separate encoder/decoder/tokens downloads.
|
|
408
526
|
*/
|
|
409
|
-
async
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
const
|
|
415
|
-
if (
|
|
416
|
-
|
|
417
|
-
logger.warning(`No mmproj found, loading as text-only LLM: ${modelId}`);
|
|
418
|
-
const data = await this.downloader.loadFromOPFS(modelId);
|
|
419
|
-
if (!data)
|
|
420
|
-
throw new Error('Model not downloaded.');
|
|
421
|
-
await this.loadLLMModel(model, modelId, data);
|
|
422
|
-
return;
|
|
527
|
+
async loadSTTModel(model, primaryData) {
|
|
528
|
+
if (!this.sttLoader)
|
|
529
|
+
throw new Error('No STT loader registered. Call ModelManager.setSTTLoader() first.');
|
|
530
|
+
const sherpa = SherpaONNXBridge.shared;
|
|
531
|
+
await sherpa.ensureLoaded();
|
|
532
|
+
const modelDir = `/models/${model.id}`;
|
|
533
|
+
if (model.isArchive) {
|
|
534
|
+
await this.loadSTTFromArchive(model, primaryData, sherpa, modelDir);
|
|
423
535
|
}
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
logger.debug(`mmproj not in storage, downloading on-demand: ${mmprojFile.filename}`);
|
|
429
|
-
const mmprojDownload = await this.downloader.downloadFile(mmprojFile.url);
|
|
430
|
-
await this.downloader.storeInOPFS(mmprojKey, mmprojDownload);
|
|
431
|
-
}
|
|
432
|
-
if (!this.vlmLoader) {
|
|
433
|
-
throw new Error('No VLM loader registered. Call ModelManager.setVLMLoader() first.');
|
|
434
|
-
}
|
|
435
|
-
if (!this.vlmLoader.isInitialized) {
|
|
436
|
-
logger.info('Initializing VLM loader...');
|
|
437
|
-
await this.vlmLoader.init();
|
|
438
|
-
}
|
|
439
|
-
// Transfer data to Worker when model is only in memory cache
|
|
440
|
-
let modelDataBuf;
|
|
441
|
-
let mmprojDataBuf;
|
|
442
|
-
const modelInOPFS = await this.downloader.existsInActualOPFS(modelId);
|
|
443
|
-
if (!modelInOPFS) {
|
|
444
|
-
const data = await this.downloader.loadFromOPFS(modelId);
|
|
445
|
-
if (data && data.length > 0) {
|
|
446
|
-
modelDataBuf = new ArrayBuffer(data.byteLength);
|
|
447
|
-
new Uint8Array(modelDataBuf).set(data);
|
|
448
|
-
logger.debug(`Transferring model data to VLM Worker (${(data.length / 1024 / 1024).toFixed(1)} MB)`);
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
const mmprojInOPFS = await this.downloader.existsInActualOPFS(mmprojKey);
|
|
452
|
-
if (!mmprojInOPFS) {
|
|
453
|
-
const mmprojBytes = await this.downloader.loadFromOPFS(mmprojKey);
|
|
454
|
-
if (mmprojBytes) {
|
|
455
|
-
mmprojDataBuf = new ArrayBuffer(mmprojBytes.byteLength);
|
|
456
|
-
new Uint8Array(mmprojDataBuf).set(mmprojBytes);
|
|
457
|
-
logger.debug(`Transferring mmproj data to VLM Worker (${(mmprojBytes.length / 1024 / 1024).toFixed(1)} MB)`);
|
|
458
|
-
}
|
|
459
|
-
}
|
|
460
|
-
logger.info(`Loading VLM model: ${modelId}`);
|
|
461
|
-
await this.vlmLoader.loadModel({
|
|
462
|
-
modelOpfsKey: modelId,
|
|
463
|
-
modelFilename: `${modelId}.gguf`,
|
|
464
|
-
mmprojOpfsKey: mmprojKey,
|
|
465
|
-
mmprojFilename: mmprojFile.filename,
|
|
466
|
-
modelId,
|
|
467
|
-
modelName: model.name,
|
|
468
|
-
modelData: modelDataBuf,
|
|
469
|
-
mmprojData: mmprojDataBuf,
|
|
470
|
-
});
|
|
471
|
-
logger.info(`VLM model loaded: ${modelId}`);
|
|
536
|
+
else {
|
|
537
|
+
await this.loadSTTFromIndividualFiles(model, primaryData, sherpa, modelDir);
|
|
538
|
+
}
|
|
539
|
+
logger.info(`STT model loaded via sherpa-onnx: ${model.id}`);
|
|
472
540
|
}
|
|
473
541
|
/**
|
|
474
|
-
* Load an STT model
|
|
475
|
-
*
|
|
542
|
+
* Load an STT model from a .tar.gz archive (matching Swift SDK approach).
|
|
543
|
+
* Extracts encoder, decoder, and tokens from the archive automatically.
|
|
476
544
|
*/
|
|
477
|
-
async
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
545
|
+
async loadSTTFromArchive(model, archiveData, sherpa, modelDir) {
|
|
546
|
+
logger.debug(`Extracting STT archive for ${model.id} (${archiveData.length} bytes)...`);
|
|
547
|
+
const entries = await extractTarGz(archiveData);
|
|
548
|
+
logger.debug(`Extracted ${entries.length} files from STT archive`);
|
|
549
|
+
const prefix = this.findArchivePrefix(entries.map(e => e.path));
|
|
550
|
+
// Write all files and auto-discover key paths
|
|
551
|
+
let encoderPath = null;
|
|
552
|
+
let decoderPath = null;
|
|
553
|
+
let tokensPath = null;
|
|
554
|
+
let joinerPath = null;
|
|
555
|
+
let modelPath = null;
|
|
556
|
+
for (const entry of entries) {
|
|
557
|
+
const relativePath = prefix ? entry.path.slice(prefix.length) : entry.path;
|
|
558
|
+
const fsPath = `${modelDir}/${relativePath}`;
|
|
559
|
+
sherpa.writeFile(fsPath, entry.data);
|
|
560
|
+
// Auto-discover by filename pattern
|
|
561
|
+
if (relativePath.includes('encoder') && relativePath.endsWith('.onnx')) {
|
|
562
|
+
encoderPath = fsPath;
|
|
563
|
+
}
|
|
564
|
+
else if (relativePath.includes('decoder') && relativePath.endsWith('.onnx')) {
|
|
565
|
+
decoderPath = fsPath;
|
|
566
|
+
}
|
|
567
|
+
else if (relativePath.includes('joiner') && relativePath.endsWith('.onnx')) {
|
|
568
|
+
joinerPath = fsPath;
|
|
569
|
+
}
|
|
570
|
+
else if (relativePath.includes('tokens') && relativePath.endsWith('.txt')) {
|
|
571
|
+
tokensPath = fsPath;
|
|
572
|
+
}
|
|
573
|
+
else if (relativePath.endsWith('.onnx') && !relativePath.includes('encoder') && !relativePath.includes('decoder') && !relativePath.includes('joiner')) {
|
|
574
|
+
modelPath = fsPath;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
// Route to the appropriate STT model type
|
|
578
|
+
if (model.id.includes('whisper')) {
|
|
579
|
+
if (!encoderPath || !decoderPath || !tokensPath) {
|
|
580
|
+
throw new Error(`Whisper archive for '${model.id}' missing encoder/decoder/tokens`);
|
|
581
|
+
}
|
|
582
|
+
await this.sttLoader.loadModel({
|
|
583
|
+
modelId: model.id,
|
|
584
|
+
type: STTModelType.Whisper,
|
|
585
|
+
modelFiles: { encoder: encoderPath, decoder: decoderPath, tokens: tokensPath },
|
|
586
|
+
sampleRate: 16000,
|
|
587
|
+
language: model.language ?? 'en',
|
|
588
|
+
task: model.sttTask ?? 'transcribe',
|
|
589
|
+
});
|
|
590
|
+
}
|
|
591
|
+
else if (model.id.includes('paraformer')) {
|
|
592
|
+
if (!modelPath || !tokensPath) {
|
|
593
|
+
throw new Error(`Paraformer archive for '${model.id}' missing model/tokens`);
|
|
594
|
+
}
|
|
595
|
+
await this.sttLoader.loadModel({
|
|
596
|
+
modelId: model.id,
|
|
597
|
+
type: STTModelType.Paraformer,
|
|
598
|
+
modelFiles: { model: modelPath, tokens: tokensPath },
|
|
599
|
+
sampleRate: 16000,
|
|
600
|
+
});
|
|
601
|
+
}
|
|
602
|
+
else if (model.id.includes('zipformer')) {
|
|
603
|
+
if (!encoderPath || !decoderPath || !joinerPath || !tokensPath) {
|
|
604
|
+
throw new Error(`Zipformer archive for '${model.id}' missing encoder/decoder/joiner/tokens`);
|
|
605
|
+
}
|
|
606
|
+
await this.sttLoader.loadModel({
|
|
607
|
+
modelId: model.id,
|
|
608
|
+
type: STTModelType.Zipformer,
|
|
609
|
+
modelFiles: { encoder: encoderPath, decoder: decoderPath, joiner: joinerPath, tokens: tokensPath },
|
|
610
|
+
sampleRate: 16000,
|
|
611
|
+
});
|
|
612
|
+
}
|
|
613
|
+
else {
|
|
614
|
+
throw new Error(`Unknown STT model type for model: ${model.id}`);
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
/**
|
|
618
|
+
* Load an STT model from individual downloaded files (legacy path).
|
|
619
|
+
*/
|
|
620
|
+
async loadSTTFromIndividualFiles(model, primaryData, sherpa, modelDir) {
|
|
621
|
+
const primaryFilename = model.url.split('/').pop();
|
|
622
|
+
const primaryPath = `${modelDir}/${primaryFilename}`;
|
|
623
|
+
logger.debug(`Writing STT primary file to ${primaryPath} (${primaryData.length} bytes)`);
|
|
624
|
+
sherpa.writeFile(primaryPath, primaryData);
|
|
625
|
+
// Write additional files to sherpa FS (download on-demand if missing from OPFS)
|
|
626
|
+
const additionalPaths = {};
|
|
627
|
+
if (model.additionalFiles) {
|
|
628
|
+
for (const file of model.additionalFiles) {
|
|
629
|
+
const fileKey = this.downloader.additionalFileKey(model.id, file.filename);
|
|
630
|
+
let fileData = await this.downloader.loadFromOPFS(fileKey);
|
|
631
|
+
if (!fileData) {
|
|
632
|
+
logger.debug(`Additional file ${file.filename} not in OPFS, downloading...`);
|
|
633
|
+
fileData = await this.downloader.downloadFile(file.url);
|
|
634
|
+
await this.downloader.storeInOPFS(fileKey, fileData);
|
|
635
|
+
}
|
|
636
|
+
const filePath = `${modelDir}/${file.filename}`;
|
|
637
|
+
logger.debug(`Writing STT file to ${filePath} (${fileData.length} bytes)`);
|
|
638
|
+
sherpa.writeFile(filePath, fileData);
|
|
639
|
+
additionalPaths[file.filename] = filePath;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
// Determine model type and build config based on the model ID
|
|
643
|
+
if (model.id.includes('whisper')) {
|
|
644
|
+
const encoderPath = primaryPath;
|
|
645
|
+
const decoderFilename = model.additionalFiles?.find(f => f.filename.includes('decoder'))?.filename;
|
|
646
|
+
const tokensFilename = model.additionalFiles?.find(f => f.filename.includes('tokens'))?.filename;
|
|
647
|
+
if (!decoderFilename || !tokensFilename) {
|
|
648
|
+
throw new Error('Whisper model requires encoder, decoder, and tokens files');
|
|
649
|
+
}
|
|
650
|
+
await this.sttLoader.loadModel({
|
|
651
|
+
modelId: model.id,
|
|
652
|
+
type: STTModelType.Whisper,
|
|
653
|
+
modelFiles: {
|
|
654
|
+
encoder: encoderPath,
|
|
655
|
+
decoder: `${modelDir}/${decoderFilename}`,
|
|
656
|
+
tokens: `${modelDir}/${tokensFilename}`,
|
|
657
|
+
},
|
|
658
|
+
sampleRate: 16000,
|
|
659
|
+
language: model.language ?? 'en',
|
|
660
|
+
task: model.sttTask ?? 'transcribe',
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
else if (model.id.includes('paraformer')) {
|
|
664
|
+
const tokensFilename = model.additionalFiles?.find(f => f.filename.includes('tokens'))?.filename;
|
|
665
|
+
if (!tokensFilename) {
|
|
666
|
+
throw new Error('Paraformer model requires model and tokens files');
|
|
667
|
+
}
|
|
668
|
+
await this.sttLoader.loadModel({
|
|
669
|
+
modelId: model.id,
|
|
670
|
+
type: STTModelType.Paraformer,
|
|
671
|
+
modelFiles: { model: primaryPath, tokens: `${modelDir}/${tokensFilename}` },
|
|
672
|
+
sampleRate: 16000,
|
|
673
|
+
});
|
|
674
|
+
}
|
|
675
|
+
else if (model.id.includes('zipformer')) {
|
|
676
|
+
const decoderFilename = model.additionalFiles?.find(f => f.filename.includes('decoder'))?.filename;
|
|
677
|
+
const joinerFilename = model.additionalFiles?.find(f => f.filename.includes('joiner'))?.filename;
|
|
678
|
+
const tokensFilename = model.additionalFiles?.find(f => f.filename.includes('tokens'))?.filename;
|
|
679
|
+
if (!decoderFilename || !joinerFilename || !tokensFilename) {
|
|
680
|
+
throw new Error('Zipformer model requires encoder, decoder, joiner, and tokens files');
|
|
681
|
+
}
|
|
682
|
+
await this.sttLoader.loadModel({
|
|
683
|
+
modelId: model.id,
|
|
684
|
+
type: STTModelType.Zipformer,
|
|
685
|
+
modelFiles: {
|
|
686
|
+
encoder: primaryPath,
|
|
687
|
+
decoder: `${modelDir}/${decoderFilename}`,
|
|
688
|
+
joiner: `${modelDir}/${joinerFilename}`,
|
|
689
|
+
tokens: `${modelDir}/${tokensFilename}`,
|
|
690
|
+
},
|
|
691
|
+
sampleRate: 16000,
|
|
692
|
+
});
|
|
693
|
+
}
|
|
694
|
+
else {
|
|
695
|
+
throw new Error(`Unknown STT model type for model: ${model.id}`);
|
|
696
|
+
}
|
|
483
697
|
}
|
|
484
698
|
/**
|
|
485
|
-
* Load a TTS model
|
|
486
|
-
*
|
|
699
|
+
* Load a TTS model into the sherpa-onnx Emscripten FS and initialise the TTS engine.
|
|
700
|
+
*
|
|
701
|
+
* Supports two modes:
|
|
702
|
+
* 1. **Archive** (isArchive=true): Download is a .tar.gz that bundles model files +
|
|
703
|
+
* espeak-ng-data. Matches the Swift SDK approach — extract and write all files.
|
|
704
|
+
* 2. **Individual files** (legacy): Separate model + companion file downloads.
|
|
487
705
|
*/
|
|
488
|
-
async loadTTSModel(model,
|
|
706
|
+
async loadTTSModel(model, primaryData) {
|
|
489
707
|
if (!this.ttsLoader)
|
|
490
|
-
throw new Error('No TTS loader registered.
|
|
491
|
-
const
|
|
492
|
-
await
|
|
493
|
-
|
|
708
|
+
throw new Error('No TTS loader registered. Call ModelManager.setTTSLoader() first.');
|
|
709
|
+
const sherpa = SherpaONNXBridge.shared;
|
|
710
|
+
await sherpa.ensureLoaded();
|
|
711
|
+
const modelDir = `/models/${model.id}`;
|
|
712
|
+
if (model.isArchive) {
|
|
713
|
+
await this.loadTTSFromArchive(model, primaryData, sherpa, modelDir);
|
|
714
|
+
}
|
|
715
|
+
else {
|
|
716
|
+
await this.loadTTSFromIndividualFiles(model, primaryData, sherpa, modelDir);
|
|
717
|
+
}
|
|
718
|
+
logger.info(`TTS model loaded via sherpa-onnx: ${model.id}`);
|
|
494
719
|
}
|
|
495
720
|
/**
|
|
496
|
-
* Load a
|
|
497
|
-
*
|
|
721
|
+
* Load a TTS model from a .tar.gz archive (matching Swift SDK approach).
|
|
722
|
+
*
|
|
723
|
+
* The archive contains all necessary files in a nested directory:
|
|
724
|
+
* model.onnx, tokens.txt, espeak-ng-data/, etc.
|
|
725
|
+
* We extract everything and write it to the sherpa virtual FS.
|
|
726
|
+
*/
|
|
727
|
+
async loadTTSFromArchive(model, archiveData, sherpa, modelDir) {
|
|
728
|
+
logger.debug(`Extracting TTS archive for ${model.id} (${archiveData.length} bytes)...`);
|
|
729
|
+
const entries = await extractTarGz(archiveData);
|
|
730
|
+
logger.debug(`Extracted ${entries.length} files from archive`);
|
|
731
|
+
// Find the common prefix (nested directory) — archives typically contain
|
|
732
|
+
// one top-level directory with all files inside it.
|
|
733
|
+
const prefix = this.findArchivePrefix(entries.map(e => e.path));
|
|
734
|
+
// Write all extracted files to the sherpa virtual FS
|
|
735
|
+
let modelPath = null;
|
|
736
|
+
let tokensPath = null;
|
|
737
|
+
let dataDirPath = null;
|
|
738
|
+
for (const entry of entries) {
|
|
739
|
+
// Strip the nested directory prefix to get relative path
|
|
740
|
+
const relativePath = prefix ? entry.path.slice(prefix.length) : entry.path;
|
|
741
|
+
const fsPath = `${modelDir}/${relativePath}`;
|
|
742
|
+
sherpa.writeFile(fsPath, entry.data);
|
|
743
|
+
// Auto-discover key files
|
|
744
|
+
if (relativePath.endsWith('.onnx') && !relativePath.includes('/')) {
|
|
745
|
+
modelPath = fsPath;
|
|
746
|
+
}
|
|
747
|
+
if (relativePath === 'tokens.txt') {
|
|
748
|
+
tokensPath = fsPath;
|
|
749
|
+
}
|
|
750
|
+
if (relativePath.startsWith('espeak-ng-data/') && !dataDirPath) {
|
|
751
|
+
dataDirPath = `${modelDir}/espeak-ng-data`;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
if (!modelPath) {
|
|
755
|
+
throw new Error(`TTS archive for '${model.id}' does not contain an .onnx model file`);
|
|
756
|
+
}
|
|
757
|
+
if (!tokensPath) {
|
|
758
|
+
throw new Error(`TTS archive for '${model.id}' does not contain tokens.txt`);
|
|
759
|
+
}
|
|
760
|
+
logger.debug(`TTS archive extracted — model: ${modelPath}, tokens: ${tokensPath}, dataDir: ${dataDirPath ?? 'none'}`);
|
|
761
|
+
await this.ttsLoader.loadVoice({
|
|
762
|
+
voiceId: model.id,
|
|
763
|
+
modelPath,
|
|
764
|
+
tokensPath,
|
|
765
|
+
dataDir: dataDirPath ?? '',
|
|
766
|
+
numThreads: 1,
|
|
767
|
+
});
|
|
768
|
+
}
|
|
769
|
+
/**
|
|
770
|
+
* Load a TTS model from individual downloaded files.
|
|
771
|
+
* Used when models are registered with individual file URLs (e.g. HuggingFace)
|
|
772
|
+
* rather than tar.gz archives. Downloads espeak-ng-data on-demand for Piper models.
|
|
773
|
+
*/
|
|
774
|
+
async loadTTSFromIndividualFiles(model, primaryData, sherpa, modelDir) {
|
|
775
|
+
const primaryFilename = model.url.split('/').pop();
|
|
776
|
+
const primaryPath = `${modelDir}/${primaryFilename}`;
|
|
777
|
+
logger.debug(`Writing TTS primary file to ${primaryPath} (${primaryData.length} bytes)`);
|
|
778
|
+
sherpa.writeFile(primaryPath, primaryData);
|
|
779
|
+
// Write additional files (tokens.txt, *.json, etc.)
|
|
780
|
+
const additionalPaths = {};
|
|
781
|
+
if (model.additionalFiles) {
|
|
782
|
+
for (const file of model.additionalFiles) {
|
|
783
|
+
const fileKey = this.downloader.additionalFileKey(model.id, file.filename);
|
|
784
|
+
let fileData = await this.downloader.loadFromOPFS(fileKey);
|
|
785
|
+
if (!fileData) {
|
|
786
|
+
logger.debug(`Additional file ${file.filename} not in OPFS, downloading...`);
|
|
787
|
+
fileData = await this.downloader.downloadFile(file.url);
|
|
788
|
+
await this.downloader.storeInOPFS(fileKey, fileData);
|
|
789
|
+
}
|
|
790
|
+
const filePath = `${modelDir}/${file.filename}`;
|
|
791
|
+
logger.debug(`Writing TTS file to ${filePath} (${fileData.length} bytes)`);
|
|
792
|
+
sherpa.writeFile(filePath, fileData);
|
|
793
|
+
additionalPaths[file.filename] = filePath;
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
const tokensPath = additionalPaths['tokens.txt'];
|
|
797
|
+
if (!tokensPath) {
|
|
798
|
+
throw new Error('TTS model requires tokens.txt file');
|
|
799
|
+
}
|
|
800
|
+
await this.ttsLoader.loadVoice({
|
|
801
|
+
voiceId: model.id,
|
|
802
|
+
modelPath: primaryPath,
|
|
803
|
+
tokensPath,
|
|
804
|
+
dataDir: '', // espeak-ng-data is bundled in archives; individual-file path doesn't include it
|
|
805
|
+
numThreads: 1,
|
|
806
|
+
});
|
|
807
|
+
}
|
|
808
|
+
/**
|
|
809
|
+
* Load a VAD model (Silero) into the sherpa-onnx Emscripten FS.
|
|
810
|
+
* The Silero VAD is a single ONNX file — write it to FS and initialise.
|
|
498
811
|
*/
|
|
499
812
|
async loadVADModel(model, data) {
|
|
813
|
+
const sherpa = SherpaONNXBridge.shared;
|
|
814
|
+
await sherpa.ensureLoaded();
|
|
815
|
+
const modelDir = `/models/${model.id}`;
|
|
816
|
+
const filename = model.url?.split('/').pop() ?? 'silero_vad.onnx';
|
|
817
|
+
const fsPath = `${modelDir}/${filename}`;
|
|
818
|
+
logger.debug(`Writing VAD model to ${fsPath} (${data.length} bytes)`);
|
|
819
|
+
sherpa.writeFile(fsPath, data);
|
|
500
820
|
if (!this.vadLoader)
|
|
501
|
-
throw new Error('No VAD loader registered.
|
|
502
|
-
|
|
503
|
-
await this.vadLoader.loadModelFromData(ctx);
|
|
821
|
+
throw new Error('No VAD loader registered. Call ModelManager.setVADLoader() first.');
|
|
822
|
+
await this.vadLoader.loadModel({ modelPath: fsPath });
|
|
504
823
|
logger.info(`VAD model loaded: ${model.id}`);
|
|
505
824
|
}
|
|
825
|
+
/**
|
|
826
|
+
* Find the common directory prefix in archive entry paths.
|
|
827
|
+
* Archives typically contain a single top-level directory (nested structure).
|
|
828
|
+
* Returns the prefix including trailing '/', or empty string if no common prefix.
|
|
829
|
+
*/
|
|
830
|
+
findArchivePrefix(paths) {
|
|
831
|
+
if (paths.length === 0)
|
|
832
|
+
return '';
|
|
833
|
+
// Check if all paths share a common first directory component
|
|
834
|
+
const firstSlash = paths[0].indexOf('/');
|
|
835
|
+
if (firstSlash === -1)
|
|
836
|
+
return '';
|
|
837
|
+
const candidate = paths[0].slice(0, firstSlash + 1);
|
|
838
|
+
const allMatch = paths.every(p => p.startsWith(candidate));
|
|
839
|
+
return allMatch ? candidate : '';
|
|
840
|
+
}
|
|
506
841
|
/** Unload the currently loaded model for a specific category */
|
|
507
842
|
async unloadModelByCategory(category) {
|
|
508
843
|
const modelId = this.loadedByCategory.get(category);
|
|
@@ -525,12 +860,14 @@ class ModelManagerImpl {
|
|
|
525
860
|
else {
|
|
526
861
|
await this.llmLoader?.unloadModel();
|
|
527
862
|
}
|
|
528
|
-
// Clean up Emscripten FS model files
|
|
863
|
+
// Clean up Emscripten FS model files to release WASM linear memory.
|
|
864
|
+
// LLM models (Language) write .gguf files into the main-thread
|
|
865
|
+
// Emscripten FS. VLM (Multimodal) models are handled by the Worker's
|
|
866
|
+
// own WASM FS and don't need cleanup here.
|
|
529
867
|
if (category === ModelCategory.Language) {
|
|
530
868
|
try {
|
|
531
869
|
const bridge = WASMBridge.shared;
|
|
532
870
|
if (bridge.isLoaded) {
|
|
533
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
534
871
|
const m = bridge.module;
|
|
535
872
|
const fsPath = `/models/${modelId}.gguf`;
|
|
536
873
|
try {
|
|
@@ -541,7 +878,7 @@ class ModelManagerImpl {
|
|
|
541
878
|
}
|
|
542
879
|
}
|
|
543
880
|
catch {
|
|
544
|
-
// Non-critical
|
|
881
|
+
// Non-critical — FS cleanup is best-effort
|
|
545
882
|
}
|
|
546
883
|
}
|
|
547
884
|
}
|