@runanywhere/web-llamacpp 0.1.0-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +75 -0
  2. package/dist/Extensions/DiffusionTypes.d.ts +64 -0
  3. package/dist/Extensions/DiffusionTypes.d.ts.map +1 -0
  4. package/dist/Extensions/DiffusionTypes.js +28 -0
  5. package/dist/Extensions/DiffusionTypes.js.map +1 -0
  6. package/dist/Extensions/EmbeddingsTypes.d.ts +33 -0
  7. package/dist/Extensions/EmbeddingsTypes.d.ts.map +1 -0
  8. package/dist/Extensions/EmbeddingsTypes.js +13 -0
  9. package/dist/Extensions/EmbeddingsTypes.js.map +1 -0
  10. package/dist/Extensions/RunAnywhere+Diffusion.d.ts +44 -0
  11. package/dist/Extensions/RunAnywhere+Diffusion.d.ts.map +1 -0
  12. package/dist/Extensions/RunAnywhere+Diffusion.js +185 -0
  13. package/dist/Extensions/RunAnywhere+Diffusion.js.map +1 -0
  14. package/dist/Extensions/RunAnywhere+Embeddings.d.ts +56 -0
  15. package/dist/Extensions/RunAnywhere+Embeddings.d.ts.map +1 -0
  16. package/dist/Extensions/RunAnywhere+Embeddings.js +236 -0
  17. package/dist/Extensions/RunAnywhere+Embeddings.js.map +1 -0
  18. package/dist/Extensions/RunAnywhere+StructuredOutput.d.ts +69 -0
  19. package/dist/Extensions/RunAnywhere+StructuredOutput.d.ts.map +1 -0
  20. package/dist/Extensions/RunAnywhere+StructuredOutput.js +194 -0
  21. package/dist/Extensions/RunAnywhere+StructuredOutput.js.map +1 -0
  22. package/dist/Extensions/RunAnywhere+TextGeneration.d.ts +87 -0
  23. package/dist/Extensions/RunAnywhere+TextGeneration.d.ts.map +1 -0
  24. package/dist/Extensions/RunAnywhere+TextGeneration.js +525 -0
  25. package/dist/Extensions/RunAnywhere+TextGeneration.js.map +1 -0
  26. package/dist/Extensions/RunAnywhere+ToolCalling.d.ts +82 -0
  27. package/dist/Extensions/RunAnywhere+ToolCalling.d.ts.map +1 -0
  28. package/dist/Extensions/RunAnywhere+ToolCalling.js +574 -0
  29. package/dist/Extensions/RunAnywhere+ToolCalling.js.map +1 -0
  30. package/dist/Extensions/RunAnywhere+VLM.d.ts +57 -0
  31. package/dist/Extensions/RunAnywhere+VLM.d.ts.map +1 -0
  32. package/dist/Extensions/RunAnywhere+VLM.js +258 -0
  33. package/dist/Extensions/RunAnywhere+VLM.js.map +1 -0
  34. package/dist/Extensions/ToolCallingTypes.d.ts +78 -0
  35. package/dist/Extensions/ToolCallingTypes.d.ts.map +1 -0
  36. package/dist/Extensions/ToolCallingTypes.js +8 -0
  37. package/dist/Extensions/ToolCallingTypes.js.map +1 -0
  38. package/dist/Extensions/VLMTypes.d.ts +16 -0
  39. package/dist/Extensions/VLMTypes.d.ts.map +1 -0
  40. package/dist/Extensions/VLMTypes.js +18 -0
  41. package/dist/Extensions/VLMTypes.js.map +1 -0
  42. package/dist/Foundation/AnalyticsEventsBridge.d.ts +38 -0
  43. package/dist/Foundation/AnalyticsEventsBridge.d.ts.map +1 -0
  44. package/dist/Foundation/AnalyticsEventsBridge.js +394 -0
  45. package/dist/Foundation/AnalyticsEventsBridge.js.map +1 -0
  46. package/dist/Foundation/LlamaCppBridge.d.ts +182 -0
  47. package/dist/Foundation/LlamaCppBridge.d.ts.map +1 -0
  48. package/dist/Foundation/LlamaCppBridge.js +483 -0
  49. package/dist/Foundation/LlamaCppBridge.js.map +1 -0
  50. package/dist/Foundation/LlamaCppOffsets.d.ts +32 -0
  51. package/dist/Foundation/LlamaCppOffsets.d.ts.map +1 -0
  52. package/dist/Foundation/LlamaCppOffsets.js +191 -0
  53. package/dist/Foundation/LlamaCppOffsets.js.map +1 -0
  54. package/dist/Foundation/PlatformAdapter.d.ts +109 -0
  55. package/dist/Foundation/PlatformAdapter.d.ts.map +1 -0
  56. package/dist/Foundation/PlatformAdapter.js +417 -0
  57. package/dist/Foundation/PlatformAdapter.js.map +1 -0
  58. package/dist/Foundation/TelemetryService.d.ts +96 -0
  59. package/dist/Foundation/TelemetryService.d.ts.map +1 -0
  60. package/dist/Foundation/TelemetryService.js +345 -0
  61. package/dist/Foundation/TelemetryService.js.map +1 -0
  62. package/dist/Foundation/WASMAnalyticsEmitter.d.ts +29 -0
  63. package/dist/Foundation/WASMAnalyticsEmitter.d.ts.map +1 -0
  64. package/dist/Foundation/WASMAnalyticsEmitter.js +70 -0
  65. package/dist/Foundation/WASMAnalyticsEmitter.js.map +1 -0
  66. package/dist/Infrastructure/VLMWorkerBridge.d.ts +219 -0
  67. package/dist/Infrastructure/VLMWorkerBridge.d.ts.map +1 -0
  68. package/dist/Infrastructure/VLMWorkerBridge.js +288 -0
  69. package/dist/Infrastructure/VLMWorkerBridge.js.map +1 -0
  70. package/dist/Infrastructure/VLMWorkerRuntime.d.ts +38 -0
  71. package/dist/Infrastructure/VLMWorkerRuntime.d.ts.map +1 -0
  72. package/dist/Infrastructure/VLMWorkerRuntime.js +620 -0
  73. package/dist/Infrastructure/VLMWorkerRuntime.js.map +1 -0
  74. package/dist/LlamaCPP.d.ts +49 -0
  75. package/dist/LlamaCPP.d.ts.map +1 -0
  76. package/dist/LlamaCPP.js +63 -0
  77. package/dist/LlamaCPP.js.map +1 -0
  78. package/dist/LlamaCppProvider.d.ts +32 -0
  79. package/dist/LlamaCppProvider.d.ts.map +1 -0
  80. package/dist/LlamaCppProvider.js +106 -0
  81. package/dist/LlamaCppProvider.js.map +1 -0
  82. package/dist/index.d.ts +43 -0
  83. package/dist/index.d.ts.map +1 -0
  84. package/dist/index.js +41 -0
  85. package/dist/index.js.map +1 -0
  86. package/dist/workers/vlm-worker.d.ts +9 -0
  87. package/dist/workers/vlm-worker.d.ts.map +1 -0
  88. package/dist/workers/vlm-worker.js +10 -0
  89. package/dist/workers/vlm-worker.js.map +1 -0
  90. package/package.json +69 -0
  91. package/wasm/racommons-llamacpp-webgpu.js +159 -0
  92. package/wasm/racommons-llamacpp-webgpu.wasm +0 -0
  93. package/wasm/racommons-llamacpp.js +129 -0
  94. package/wasm/racommons-llamacpp.wasm +0 -0
@@ -0,0 +1,620 @@
1
+ /**
2
+ * RunAnywhere Web SDK - VLM Worker Runtime
3
+ *
4
+ * Encapsulates the Worker-side logic for VLM inference. This module runs
5
+ * inside a dedicated Web Worker and manages its own WASM instance
6
+ * (separate from the main-thread SDK).
7
+ *
8
+ * Architecture:
9
+ * - Loads its OWN WASM instance (separate from the main thread SDK)
10
+ * - Reads model files from OPFS directly (no large postMessage transfers)
11
+ * - Communicates via typed postMessage RPC
12
+ *
13
+ * Why a separate WASM instance?
14
+ * The C function `rac_vlm_component_process` is synchronous and blocks for
15
+ * ~100s (2B model in WASM). Running it on the main thread freezes the entire UI.
16
+ * A Worker with its own WASM instance allows inference to happen concurrently.
17
+ *
18
+ * IMPORTANT: This file must NOT import from WASMBridge.ts or other SDK modules
19
+ * that assume a main-thread context. The Worker has its own WASM instance and
20
+ * should be self-contained. Only `type`-only imports are safe.
21
+ */
22
+ // ---------------------------------------------------------------------------
23
+ // Worker state
24
+ // ---------------------------------------------------------------------------
25
+ let wasmModule = null;
26
+ let vlmHandle = 0;
27
+ let isWebGPU = false;
28
+ let offsets = null;
29
+ // ---------------------------------------------------------------------------
30
+ // Inline offset loader for Worker context
31
+ //
32
+ // The Worker cannot import from the main SDK or LlamaCppBridge (they assume
33
+ // a main-thread context). Instead, we read offsets directly from the WASM
34
+ // module's _rac_wasm_offsetof_* / _rac_wasm_sizeof_* exports.
35
+ // ---------------------------------------------------------------------------
36
+ function workerOffsetOf(m, name) {
37
+ const fn = m[`_rac_wasm_offsetof_${name}`];
38
+ return typeof fn === 'function' ? fn() : 0;
39
+ }
40
+ function workerSizeOf(m, name) {
41
+ const fn = m[`_rac_wasm_sizeof_${name}`];
42
+ return typeof fn === 'function' ? fn() : 0;
43
+ }
44
+ function loadOffsetsFromModule(m) {
45
+ return {
46
+ config: { logLevel: workerOffsetOf(m, 'config_log_level') },
47
+ llmOptions: {
48
+ maxTokens: workerOffsetOf(m, 'llm_options_max_tokens'),
49
+ temperature: workerOffsetOf(m, 'llm_options_temperature'),
50
+ topP: workerOffsetOf(m, 'llm_options_top_p'),
51
+ systemPrompt: workerOffsetOf(m, 'llm_options_system_prompt'),
52
+ },
53
+ llmResult: {
54
+ text: workerOffsetOf(m, 'llm_result_text'),
55
+ promptTokens: workerOffsetOf(m, 'llm_result_prompt_tokens'),
56
+ completionTokens: workerOffsetOf(m, 'llm_result_completion_tokens'),
57
+ },
58
+ vlmImage: {
59
+ format: workerOffsetOf(m, 'vlm_image_format'),
60
+ filePath: workerOffsetOf(m, 'vlm_image_file_path'),
61
+ pixelData: workerOffsetOf(m, 'vlm_image_pixel_data'),
62
+ base64Data: workerOffsetOf(m, 'vlm_image_base64_data'),
63
+ width: workerOffsetOf(m, 'vlm_image_width'),
64
+ height: workerOffsetOf(m, 'vlm_image_height'),
65
+ dataSize: workerOffsetOf(m, 'vlm_image_data_size'),
66
+ },
67
+ vlmOptions: {
68
+ maxTokens: workerOffsetOf(m, 'vlm_options_max_tokens'),
69
+ temperature: workerOffsetOf(m, 'vlm_options_temperature'),
70
+ topP: workerOffsetOf(m, 'vlm_options_top_p'),
71
+ streamingEnabled: workerOffsetOf(m, 'vlm_options_streaming_enabled'),
72
+ systemPrompt: workerOffsetOf(m, 'vlm_options_system_prompt'),
73
+ modelFamily: workerOffsetOf(m, 'vlm_options_model_family'),
74
+ },
75
+ vlmResult: {
76
+ text: workerOffsetOf(m, 'vlm_result_text'),
77
+ promptTokens: workerOffsetOf(m, 'vlm_result_prompt_tokens'),
78
+ imageTokens: workerOffsetOf(m, 'vlm_result_image_tokens'),
79
+ completionTokens: workerOffsetOf(m, 'vlm_result_completion_tokens'),
80
+ totalTokens: workerOffsetOf(m, 'vlm_result_total_tokens'),
81
+ timeToFirstTokenMs: workerOffsetOf(m, 'vlm_result_time_to_first_token_ms'),
82
+ imageEncodeTimeMs: workerOffsetOf(m, 'vlm_result_image_encode_time_ms'),
83
+ totalTimeMs: workerOffsetOf(m, 'vlm_result_total_time_ms'),
84
+ tokensPerSecond: workerOffsetOf(m, 'vlm_result_tokens_per_second'),
85
+ },
86
+ structuredOutputConfig: {
87
+ jsonSchema: workerOffsetOf(m, 'structured_output_config_json_schema'),
88
+ includeSchemaInPrompt: workerOffsetOf(m, 'structured_output_config_include_schema_in_prompt'),
89
+ },
90
+ structuredOutputValidation: {
91
+ isValid: workerOffsetOf(m, 'structured_output_validation_is_valid'),
92
+ errorMessage: workerOffsetOf(m, 'structured_output_validation_error_message'),
93
+ extractedJson: workerOffsetOf(m, 'structured_output_validation_extracted_json'),
94
+ },
95
+ embeddingsOptions: {
96
+ normalize: workerOffsetOf(m, 'embeddings_options_normalize'),
97
+ pooling: workerOffsetOf(m, 'embeddings_options_pooling'),
98
+ nThreads: workerOffsetOf(m, 'embeddings_options_n_threads'),
99
+ },
100
+ embeddingsResult: {
101
+ embeddings: workerOffsetOf(m, 'embeddings_result_embeddings'),
102
+ numEmbeddings: workerOffsetOf(m, 'embeddings_result_num_embeddings'),
103
+ dimension: workerOffsetOf(m, 'embeddings_result_dimension'),
104
+ processingTimeMs: workerOffsetOf(m, 'embeddings_result_processing_time_ms'),
105
+ totalTokens: workerOffsetOf(m, 'embeddings_result_total_tokens'),
106
+ },
107
+ embeddingVector: {
108
+ data: workerOffsetOf(m, 'embedding_vector_data'),
109
+ dimension: workerOffsetOf(m, 'embedding_vector_dimension'),
110
+ structSize: workerSizeOf(m, 'embedding_vector'),
111
+ },
112
+ diffusionOptions: {
113
+ prompt: workerOffsetOf(m, 'diffusion_options_prompt'),
114
+ negativePrompt: workerOffsetOf(m, 'diffusion_options_negative_prompt'),
115
+ width: workerOffsetOf(m, 'diffusion_options_width'),
116
+ height: workerOffsetOf(m, 'diffusion_options_height'),
117
+ steps: workerOffsetOf(m, 'diffusion_options_steps'),
118
+ guidanceScale: workerOffsetOf(m, 'diffusion_options_guidance_scale'),
119
+ seed: workerOffsetOf(m, 'diffusion_options_seed'),
120
+ scheduler: workerOffsetOf(m, 'diffusion_options_scheduler'),
121
+ mode: workerOffsetOf(m, 'diffusion_options_mode'),
122
+ denoiseStrength: workerOffsetOf(m, 'diffusion_options_denoise_strength'),
123
+ reportIntermediate: workerOffsetOf(m, 'diffusion_options_report_intermediate'),
124
+ progressStride: workerOffsetOf(m, 'diffusion_options_progress_stride'),
125
+ },
126
+ diffusionResult: {
127
+ imageData: workerOffsetOf(m, 'diffusion_result_image_data'),
128
+ imageSize: workerOffsetOf(m, 'diffusion_result_image_size'),
129
+ width: workerOffsetOf(m, 'diffusion_result_width'),
130
+ height: workerOffsetOf(m, 'diffusion_result_height'),
131
+ seedUsed: workerOffsetOf(m, 'diffusion_result_seed_used'),
132
+ generationTimeMs: workerOffsetOf(m, 'diffusion_result_generation_time_ms'),
133
+ safetyFlagged: workerOffsetOf(m, 'diffusion_result_safety_flagged'),
134
+ },
135
+ };
136
+ }
137
+ // ---------------------------------------------------------------------------
138
+ // Logging (lightweight — no SDKLogger dependency in Worker context)
139
+ // ---------------------------------------------------------------------------
140
+ const LOG_PREFIX = '[RunAnywhere:VLMWorker]';
141
+ function logInfo(...args) { console.info(LOG_PREFIX, ...args); }
142
+ function logWarn(...args) { console.warn(LOG_PREFIX, ...args); }
143
+ function logError(...args) { console.error(LOG_PREFIX, ...args); }
144
+ // ---------------------------------------------------------------------------
145
+ // Helpers: string alloc / free on WASM heap
146
+ // ---------------------------------------------------------------------------
147
+ function allocString(str) {
148
+ const m = wasmModule;
149
+ const len = m.lengthBytesUTF8(str) + 1; // +1 for null terminator
150
+ const ptr = m._malloc(len);
151
+ m.stringToUTF8(str, ptr, len);
152
+ return ptr;
153
+ }
154
+ function readString(ptr) {
155
+ if (!ptr)
156
+ return '';
157
+ return wasmModule.UTF8ToString(ptr);
158
+ }
159
+ // ---------------------------------------------------------------------------
160
+ // Helpers: binary data ↔ WASM heap
161
+ //
162
+ // HEAPU8 may not be exported from the WASM module (depends on build config).
163
+ // Try HEAPU8 first for speed, fall back to setValue byte-by-byte.
164
+ // ---------------------------------------------------------------------------
165
+ function writeToWasmHeap(src, destPtr) {
166
+ const m = wasmModule;
167
+ // Fast path: direct HEAPU8 (available when exported via EXPORTED_RUNTIME_METHODS)
168
+ if (m.HEAPU8) {
169
+ m.HEAPU8.set(src, destPtr);
170
+ return;
171
+ }
172
+ // Slow fallback: byte-by-byte via setValue (always available)
173
+ for (let i = 0; i < src.length; i++) {
174
+ m.setValue(destPtr + i, src[i], 'i8');
175
+ }
176
+ }
177
+ // ---------------------------------------------------------------------------
178
+ // OPFS helpers (Workers have full OPFS access)
179
+ //
180
+ // Lightweight inline reader matching the same directory layout as OPFSStorage
181
+ // (root → models/ → nested paths). We don't import OPFSStorage because it
182
+ // uses SDKLogger and other SDK infrastructure that may not work in Workers.
183
+ // ---------------------------------------------------------------------------
184
+ const OPFS_MODELS_DIR = 'models';
185
+ async function loadFromOPFS(key) {
186
+ try {
187
+ const root = await navigator.storage.getDirectory();
188
+ const modelsDir = await root.getDirectoryHandle(OPFS_MODELS_DIR);
189
+ let file;
190
+ if (key.includes('/')) {
191
+ const parts = key.split('/');
192
+ let dir = modelsDir;
193
+ for (let i = 0; i < parts.length - 1; i++) {
194
+ dir = await dir.getDirectoryHandle(parts[i]);
195
+ }
196
+ const handle = await dir.getFileHandle(parts[parts.length - 1]);
197
+ file = await handle.getFile();
198
+ }
199
+ else {
200
+ const handle = await modelsDir.getFileHandle(key);
201
+ file = await handle.getFile();
202
+ }
203
+ const buffer = await file.arrayBuffer();
204
+ return new Uint8Array(buffer);
205
+ }
206
+ catch {
207
+ return null;
208
+ }
209
+ }
210
+ // ---------------------------------------------------------------------------
211
+ // WASM initialization
212
+ // ---------------------------------------------------------------------------
213
+ async function initWASM(wasmJsUrl, useWebGPU = false) {
214
+ isWebGPU = useWebGPU;
215
+ logInfo(`Loading WASM module (${useWebGPU ? 'WebGPU' : 'CPU'})...`);
216
+ // Dynamically import the Emscripten ES6 glue JS
217
+ const { default: createModule } = await import(/* @vite-ignore */ wasmJsUrl);
218
+ const wasmBaseUrl = wasmJsUrl.substring(0, wasmJsUrl.lastIndexOf('/') + 1);
219
+ wasmModule = await createModule({
220
+ print: (text) => logInfo(text),
221
+ printErr: (text) => logError(text),
222
+ locateFile: (path) => wasmBaseUrl + path,
223
+ });
224
+ const m = wasmModule;
225
+ // ---- rac_init: minimal initialization ----
226
+ // We need a platform adapter for rac_init. Create a minimal one.
227
+ const adapterSize = m._rac_wasm_sizeof_platform_adapter();
228
+ const adapterPtr = m._malloc(adapterSize);
229
+ for (let i = 0; i < adapterSize; i++)
230
+ m.setValue(adapterPtr + i, 0, 'i8');
231
+ // Register essential callbacks via addFunction.
232
+ // Signatures MUST match the main-thread PlatformAdapter.ts exactly —
233
+ // Emscripten's indirect-call table traps on signature mismatch.
234
+ const PTR_SIZE = 4;
235
+ let offset = 0;
236
+ // file_exists: rac_bool_t (*)(const char* path, void* user_data)
237
+ const fileExistsCb = m.addFunction((_pathPtr, _ud) => {
238
+ return 0; // nothing exists — VLM uses Emscripten's C fopen/fread
239
+ }, 'iii');
240
+ m.setValue(adapterPtr + offset, fileExistsCb, '*');
241
+ offset += PTR_SIZE;
242
+ // file_read: rac_result_t (*)(const char* path, void** out_data, size_t* out_size, void* user_data)
243
+ const noopReadCb = m.addFunction((_pathPtr, _outData, _outSize, _ud) => -180, 'iiiii');
244
+ m.setValue(adapterPtr + offset, noopReadCb, '*');
245
+ offset += PTR_SIZE;
246
+ // file_write: rac_result_t (*)(const char* path, const void* data, size_t size, void* user_data)
247
+ const noopWriteCb = m.addFunction((_pathPtr, _data, _size, _ud) => -180, 'iiiii');
248
+ m.setValue(adapterPtr + offset, noopWriteCb, '*');
249
+ offset += PTR_SIZE;
250
+ // file_delete: rac_result_t (*)(const char* path, void* user_data)
251
+ const noopDelCb = m.addFunction((_pathPtr, _ud) => -180, 'iii');
252
+ m.setValue(adapterPtr + offset, noopDelCb, '*');
253
+ offset += PTR_SIZE;
254
+ // secure_get: rac_result_t (*)(const char* key, char** out_value, void* user_data)
255
+ const secureGetCb = m.addFunction((_kp, outPtr, _ud) => {
256
+ m.setValue(outPtr, 0, '*');
257
+ return -182;
258
+ }, 'iiii');
259
+ m.setValue(adapterPtr + offset, secureGetCb, '*');
260
+ offset += PTR_SIZE;
261
+ // secure_set: rac_result_t (*)(const char* key, const char* value, void* user_data)
262
+ const secureSetCb = m.addFunction((_keyPtr, _valPtr, _ud) => 0, 'iiii');
263
+ m.setValue(adapterPtr + offset, secureSetCb, '*');
264
+ offset += PTR_SIZE;
265
+ // secure_delete: rac_result_t (*)(const char* key, void* user_data)
266
+ const secureDelCb = m.addFunction((_keyPtr, _ud) => 0, 'iii');
267
+ m.setValue(adapterPtr + offset, secureDelCb, '*');
268
+ offset += PTR_SIZE;
269
+ // log: void (*)(rac_log_level_t level, const char* category, const char* message, void* user_data)
270
+ const logCb = m.addFunction((level, catPtr, msgPtr, _ud) => {
271
+ const cat = m.UTF8ToString(catPtr);
272
+ const msg = m.UTF8ToString(msgPtr);
273
+ const prefix = `[RunAnywhere:VLMWorker:${cat}]`;
274
+ if (level <= 1)
275
+ console.debug(prefix, msg);
276
+ else if (level === 2)
277
+ console.info(prefix, msg);
278
+ else if (level === 3)
279
+ console.warn(prefix, msg);
280
+ else
281
+ console.error(prefix, msg);
282
+ }, 'viiii');
283
+ m.setValue(adapterPtr + offset, logCb, '*');
284
+ offset += PTR_SIZE;
285
+ // track_error (null)
286
+ m.setValue(adapterPtr + offset, 0, '*');
287
+ offset += PTR_SIZE;
288
+ // now_ms: int64_t (*)(void* user_data) — signature 'ii' (returns i32, takes i32 user_data)
289
+ const nowMsCb = m.addFunction((_ud) => Date.now(), 'ii');
290
+ m.setValue(adapterPtr + offset, nowMsCb, '*');
291
+ offset += PTR_SIZE;
292
+ // get_memory_info: rac_result_t (*)(rac_memory_info_t* out_info, void* user_data)
293
+ const memInfoCb = m.addFunction((outPtr, _ud) => {
294
+ const totalMB = navigator.deviceMemory ?? 4;
295
+ const totalBytes = totalMB * 1024 * 1024 * 1024;
296
+ // rac_memory_info_t: { uint64_t total, available, used }
297
+ // Write as two i32 values per uint64 (wasm32)
298
+ m.setValue(outPtr, totalBytes & 0xFFFFFFFF, 'i32'); // total low
299
+ m.setValue(outPtr + 4, 0, 'i32'); // total high
300
+ m.setValue(outPtr + 8, totalBytes & 0xFFFFFFFF, 'i32'); // available low
301
+ m.setValue(outPtr + 12, 0, 'i32'); // available high
302
+ m.setValue(outPtr + 16, 0, 'i32'); // used low
303
+ m.setValue(outPtr + 20, 0, 'i32'); // used high
304
+ return 0;
305
+ }, 'iii');
306
+ m.setValue(adapterPtr + offset, memInfoCb, '*');
307
+ offset += PTR_SIZE;
308
+ // http_download (no-op)
309
+ m.setValue(adapterPtr + offset, 0, '*');
310
+ offset += PTR_SIZE;
311
+ // http_download_cancel (no-op) — main-thread PlatformAdapter also sets this slot
312
+ m.setValue(adapterPtr + offset, 0, '*');
313
+ offset += PTR_SIZE;
314
+ // extract_archive (no-op)
315
+ m.setValue(adapterPtr + offset, 0, '*');
316
+ offset += PTR_SIZE;
317
+ // user_data (null)
318
+ m.setValue(adapterPtr + offset, 0, '*');
319
+ // ---- Register the adapter with RACommons (must happen before rac_init) ----
320
+ // _rac_set_platform_adapter is a simple pointer-store: it makes NO indirect
321
+ // calls into JS, so Emscripten does NOT wrap it with JSPI → returns a plain
322
+ // number synchronously.
323
+ logInfo('Step 1: Registering platform adapter...');
324
+ if (typeof m._rac_set_platform_adapter === 'function') {
325
+ const adapterResult = m._rac_set_platform_adapter(adapterPtr);
326
+ if (adapterResult !== 0) {
327
+ logWarn(`rac_set_platform_adapter returned ${adapterResult}`);
328
+ }
329
+ }
330
+ logInfo('Step 1 done: Platform adapter registered');
331
+ // ---- Call rac_init ----
332
+ //
333
+ // rac_init is logically synchronous C++ (stores adapter, inits diffusion
334
+ // registry, logs). However, in the WebGPU WASM build **every** export that
335
+ // transitively calls an addFunction-registered callback (e.g. the log
336
+ // callback via adapter->log) is JSPI-wrapped and returns a Promise.
337
+ //
338
+ // The Worker's JSPI suspendable stack is smaller than the main thread's,
339
+ // and the diffusion-model-registry init inside rac_init calls RAC_LOG_INFO
340
+ // multiple times — each log allocates 2×2048-byte char[] buffers on the
341
+ // stack — which overflows the JSPI stack with "memory access out of bounds".
342
+ //
343
+ // This is NON-FATAL for VLM: none of rac_backend_llamacpp_vlm_register,
344
+ // rac_vlm_component_create, rac_vlm_component_load_model, or
345
+ // rac_vlm_component_process check `s_initialized`. The platform adapter
346
+ // was already stored in Step 1 via rac_set_platform_adapter, so logging
347
+ // from subsequent calls still works.
348
+ //
349
+ // Strategy: try rac_init, and if it fails (JSPI stack overflow), continue.
350
+ logInfo('Step 2: Calling rac_init...');
351
+ const configSize = m._rac_wasm_sizeof_config();
352
+ const configPtr = m._malloc(configSize);
353
+ for (let i = 0; i < configSize; i++)
354
+ m.setValue(configPtr + i, 0, 'i8');
355
+ m.setValue(configPtr, adapterPtr, '*'); // platform_adapter (offset 0)
356
+ const logLevelOffset = typeof m._rac_wasm_offsetof_config_log_level === 'function'
357
+ ? m._rac_wasm_offsetof_config_log_level()
358
+ : 4;
359
+ m.setValue(configPtr + logLevelOffset, 2, 'i32'); // log_level = INFO
360
+ try {
361
+ const initResult = await m.ccall('rac_init', 'number', ['number'], [configPtr], { async: true });
362
+ if (initResult !== 0) {
363
+ logWarn(`rac_init returned non-zero (${initResult}), continuing without full core init`);
364
+ }
365
+ else {
366
+ logInfo('Step 2 done: rac_init succeeded');
367
+ }
368
+ }
369
+ catch (e) {
370
+ // Expected on WebGPU Workers: diffusion registry logging overflows the
371
+ // JSPI suspendable stack. Non-fatal — VLM functions don't depend on it.
372
+ logWarn(`rac_init failed in Worker (${e}), continuing — VLM does not require full core init`);
373
+ }
374
+ m._free(configPtr);
375
+ // ---- Load struct field offsets ----
376
+ // These are simple sizeof / offsetof helper exports that return plain ints.
377
+ // They do NOT call any callbacks → not JSPI-wrapped → synchronous.
378
+ logInfo('Step 3: Loading struct offsets...');
379
+ offsets = loadOffsetsFromModule(m);
380
+ logInfo('Step 3 done: Offsets loaded');
381
+ // ---- Register VLM backend ----
382
+ // rac_backend_llamacpp_vlm_register is only available when the WASM binary
383
+ // was built with --vlm (RAC_WASM_VLM=ON). It is in JSPI_EXPORTS so it
384
+ // returns a Promise → use ccall({async: true}).
385
+ logInfo('Step 4: Registering VLM backend...');
386
+ if (typeof m['_rac_backend_llamacpp_vlm_register'] !== 'function') {
387
+ throw new Error('VLM backend not available in WASM build. '
388
+ + 'Rebuild with: ./scripts/build.sh --webgpu --vlm');
389
+ }
390
+ const regResult = await m.ccall('rac_backend_llamacpp_vlm_register', 'number', [], [], { async: true });
391
+ logInfo(`Step 4 done: VLM backend registered (result: ${regResult})`);
392
+ // ---- Create VLM component ----
393
+ // rac_vlm_component_create is in JSPI_EXPORTS → returns Promise.
394
+ logInfo('Step 5: Creating VLM component...');
395
+ const handlePtr = m._malloc(4);
396
+ const createResult = await m.ccall('rac_vlm_component_create', 'number', ['number'], [handlePtr], { async: true });
397
+ if (createResult !== 0) {
398
+ m._free(handlePtr);
399
+ throw new Error(`rac_vlm_component_create failed: ${createResult}`);
400
+ }
401
+ vlmHandle = m.getValue(handlePtr, 'i32');
402
+ m._free(handlePtr);
403
+ logInfo(`WASM initialized, VLM component ready (${isWebGPU ? 'WebGPU' : 'CPU'})`);
404
+ }
405
+ // ---------------------------------------------------------------------------
406
+ // Model loading (reads from OPFS, writes to Worker's WASM FS)
407
+ // ---------------------------------------------------------------------------
408
+ async function loadModel(modelOpfsKey, modelFilename, mmprojOpfsKey, mmprojFilename, modelId, modelName, providedModelData, providedMmprojData) {
409
+ const m = wasmModule;
410
+ // Ensure /models directory exists in Emscripten FS
411
+ m.FS_createPath('/', 'models', true, true);
412
+ // Read model: use provided data (transferred from main thread) or OPFS
413
+ self.postMessage({ id: -1, type: 'progress', payload: { stage: 'Reading model from storage...' } });
414
+ let modelData;
415
+ if (providedModelData && providedModelData.byteLength > 0) {
416
+ logInfo(`Using transferred model data: ${(providedModelData.byteLength / 1024 / 1024).toFixed(1)} MB`);
417
+ modelData = new Uint8Array(providedModelData);
418
+ }
419
+ else {
420
+ logInfo(`Reading model from OPFS: key=${modelOpfsKey}`);
421
+ const opfsData = await loadFromOPFS(modelOpfsKey);
422
+ if (!opfsData)
423
+ throw new Error(`Model not found in OPFS: ${modelOpfsKey}`);
424
+ modelData = opfsData;
425
+ }
426
+ logInfo(`Model data: ${(modelData.length / 1024 / 1024).toFixed(1)} MB`);
427
+ // Write to WASM FS
428
+ self.postMessage({ id: -1, type: 'progress', payload: { stage: 'Preparing model...' } });
429
+ const modelPath = `/models/${modelFilename}`;
430
+ try {
431
+ m.FS_unlink(modelPath);
432
+ }
433
+ catch { /* doesn't exist */ }
434
+ logInfo(`Writing model to WASM FS: ${modelPath}`);
435
+ m.FS_createDataFile('/models', modelFilename, modelData, true, true, true);
436
+ logInfo('Model written to WASM FS');
437
+ // Read mmproj: use provided data or OPFS
438
+ self.postMessage({ id: -1, type: 'progress', payload: { stage: 'Reading vision encoder...' } });
439
+ let mmprojData;
440
+ if (providedMmprojData && providedMmprojData.byteLength > 0) {
441
+ logInfo(`Using transferred mmproj data: ${(providedMmprojData.byteLength / 1024 / 1024).toFixed(1)} MB`);
442
+ mmprojData = new Uint8Array(providedMmprojData);
443
+ }
444
+ else {
445
+ logInfo(`Reading mmproj from OPFS: key=${mmprojOpfsKey}`);
446
+ const opfsMmproj = await loadFromOPFS(mmprojOpfsKey);
447
+ if (!opfsMmproj)
448
+ throw new Error(`mmproj not found in OPFS: ${mmprojOpfsKey}`);
449
+ mmprojData = opfsMmproj;
450
+ }
451
+ logInfo(`mmproj data: ${(mmprojData.length / 1024 / 1024).toFixed(1)} MB`);
452
+ const mmprojPath = `/models/${mmprojFilename}`;
453
+ try {
454
+ m.FS_unlink(mmprojPath);
455
+ }
456
+ catch { /* doesn't exist */ }
457
+ logInfo(`Writing mmproj to WASM FS: ${mmprojPath}`);
458
+ m.FS_createDataFile('/models', mmprojFilename, mmprojData, true, true, true);
459
+ logInfo('mmproj written to WASM FS');
460
+ // Load model via VLM component
461
+ self.postMessage({ id: -1, type: 'progress', payload: { stage: 'Loading model...' } });
462
+ const pathPtr = allocString(modelPath);
463
+ const projPtr = allocString(mmprojPath);
464
+ const idPtr = allocString(modelId);
465
+ const namePtr = allocString(modelName);
466
+ try {
467
+ // {async: true} for JSPI — model loading creates WebGPU buffers and
468
+ // allocates GPU memory, which suspends the WASM stack.
469
+ const result = await m.ccall('rac_vlm_component_load_model', 'number', ['number', 'number', 'number', 'number', 'number'], [vlmHandle, pathPtr, projPtr, idPtr, namePtr], { async: true });
470
+ if (result !== 0) {
471
+ throw new Error(`rac_vlm_component_load_model failed: ${result}`);
472
+ }
473
+ logInfo(`Model loaded: ${modelId}`);
474
+ }
475
+ finally {
476
+ m._free(pathPtr);
477
+ m._free(projPtr);
478
+ m._free(idPtr);
479
+ m._free(namePtr);
480
+ }
481
+ }
482
+ // ---------------------------------------------------------------------------
483
+ // Image processing
484
+ // ---------------------------------------------------------------------------
485
+ async function processImage(rgbPixels, width, height, prompt, maxTokens, temperature, topP, systemPrompt, modelFamily) {
486
+ const m = wasmModule;
487
+ const pixelArray = new Uint8Array(rgbPixels);
488
+ // Use C sizeof helpers for correct struct sizes (avoids 32/64-bit mismatch)
489
+ const imageSize = m.ccall('rac_wasm_sizeof_vlm_image', 'number', [], []);
490
+ const optSize = m.ccall('rac_wasm_sizeof_vlm_options', 'number', [], []);
491
+ const resSize = m.ccall('rac_wasm_sizeof_vlm_result', 'number', [], []);
492
+ // Build rac_vlm_image_t struct (format=1 for RGB pixels)
493
+ const imagePtr = m._malloc(imageSize);
494
+ for (let i = 0; i < imageSize; i++)
495
+ m.setValue(imagePtr + i, 0, 'i8');
496
+ const vi = offsets.vlmImage;
497
+ m.setValue(imagePtr + vi.format, 1, 'i32'); // format = RGBPixels
498
+ const pixelPtr = m._malloc(pixelArray.length);
499
+ writeToWasmHeap(pixelArray, pixelPtr);
500
+ m.setValue(imagePtr + vi.pixelData, pixelPtr, '*');
501
+ m.setValue(imagePtr + vi.width, width, 'i32');
502
+ m.setValue(imagePtr + vi.height, height, 'i32');
503
+ m.setValue(imagePtr + vi.dataSize, pixelArray.length, 'i32');
504
+ // Build rac_vlm_options_t (offsets from compiler)
505
+ const optPtr = m._malloc(optSize);
506
+ for (let i = 0; i < optSize; i++)
507
+ m.setValue(optPtr + i, 0, 'i8');
508
+ const vo = offsets.vlmOptions;
509
+ m.setValue(optPtr + vo.maxTokens, maxTokens, 'i32');
510
+ m.setValue(optPtr + vo.temperature, Number.isFinite(temperature) ? temperature : 0.7, 'float');
511
+ m.setValue(optPtr + vo.topP, Number.isFinite(topP) ? topP : 0.9, 'float');
512
+ let systemPromptPtr = 0;
513
+ if (systemPrompt) {
514
+ systemPromptPtr = allocString(systemPrompt);
515
+ m.setValue(optPtr + vo.systemPrompt, systemPromptPtr, '*');
516
+ }
517
+ m.setValue(optPtr + vo.modelFamily, modelFamily ?? 0, 'i32');
518
+ const promptPtr = allocString(prompt);
519
+ // Result struct
520
+ const resPtr = m._malloc(resSize);
521
+ for (let i = 0; i < resSize; i++)
522
+ m.setValue(resPtr + i, 0, 'i8');
523
+ try {
524
+ // {async: true} for JSPI — VLM inference performs extensive GPU compute
525
+ // (CLIP encoding + LLM generation) that suspends the WASM stack.
526
+ const r = await m.ccall('rac_vlm_component_process', 'number', ['number', 'number', 'number', 'number', 'number'], [vlmHandle, imagePtr, promptPtr, optPtr, resPtr], { async: true });
527
+ if (r !== 0) {
528
+ throw new Error(`rac_vlm_component_process failed: ${r}`);
529
+ }
530
+ // Read rac_vlm_result_t (offsets from compiler via StructOffsets)
531
+ const vr = offsets.vlmResult;
532
+ const textPtr = m.getValue(resPtr + vr.text, '*');
533
+ const result = {
534
+ text: readString(textPtr),
535
+ promptTokens: m.getValue(resPtr + vr.promptTokens, 'i32'),
536
+ imageTokens: m.getValue(resPtr + vr.imageTokens, 'i32'),
537
+ completionTokens: m.getValue(resPtr + vr.completionTokens, 'i32'),
538
+ totalTokens: m.getValue(resPtr + vr.totalTokens, 'i32'),
539
+ };
540
+ // Free C-allocated internal strings, then free JS-allocated struct
541
+ m.ccall('rac_vlm_result_free', null, ['number'], [resPtr]);
542
+ return result;
543
+ }
544
+ finally {
545
+ if (systemPromptPtr)
546
+ m._free(systemPromptPtr);
547
+ m._free(promptPtr);
548
+ m._free(imagePtr);
549
+ m._free(optPtr);
550
+ m._free(pixelPtr);
551
+ m._free(resPtr);
552
+ }
553
+ }
554
+ // ---------------------------------------------------------------------------
555
+ // RPC message handler
556
+ // ---------------------------------------------------------------------------
557
+ function handleMessage(e) {
558
+ const { type, id } = e.data;
559
+ const respond = async () => {
560
+ switch (type) {
561
+ case 'init': {
562
+ await initWASM(e.data.payload.wasmJsUrl, e.data.payload.useWebGPU ?? false);
563
+ self.postMessage({ id, type: 'result', payload: { success: true, useWebGPU: isWebGPU } });
564
+ break;
565
+ }
566
+ case 'load-model': {
567
+ const p = e.data.payload;
568
+ await loadModel(p.modelOpfsKey, p.modelFilename, p.mmprojOpfsKey, p.mmprojFilename, p.modelId, p.modelName, p.modelData, p.mmprojData);
569
+ self.postMessage({ id, type: 'result', payload: { success: true } });
570
+ break;
571
+ }
572
+ case 'process': {
573
+ const p = e.data.payload;
574
+ const result = await processImage(p.rgbPixels, p.width, p.height, p.prompt, p.maxTokens, p.temperature, p.topP, p.systemPrompt, p.modelFamily);
575
+ self.postMessage({ id, type: 'result', payload: result });
576
+ break;
577
+ }
578
+ case 'cancel': {
579
+ if (wasmModule && vlmHandle) {
580
+ wasmModule.ccall('rac_vlm_component_cancel', 'number', ['number'], [vlmHandle]);
581
+ }
582
+ self.postMessage({ id, type: 'result', payload: { success: true } });
583
+ break;
584
+ }
585
+ case 'unload': {
586
+ if (wasmModule && vlmHandle) {
587
+ wasmModule.ccall('rac_vlm_component_unload', 'number', ['number'], [vlmHandle]);
588
+ }
589
+ self.postMessage({ id, type: 'result', payload: { success: true } });
590
+ break;
591
+ }
592
+ }
593
+ };
594
+ respond().catch((err) => {
595
+ const message = err instanceof Error ? err.message : String(err);
596
+ logError(`Error in ${type}:`, message);
597
+ self.postMessage({ id, type: 'error', payload: { message } });
598
+ });
599
+ }
600
+ // ---------------------------------------------------------------------------
601
+ // Public API: start the runtime
602
+ // ---------------------------------------------------------------------------
603
+ /**
604
+ * Start the VLM Worker runtime.
605
+ *
606
+ * Call this once from the Worker entry point. It sets up the `self.onmessage`
607
+ * handler that processes RPC commands from the main-thread VLMWorkerBridge.
608
+ *
609
+ * @example
610
+ * ```typescript
611
+ * // workers/vlm-worker.ts
612
+ * import { startVLMWorkerRuntime } from '../Infrastructure/VLMWorkerRuntime';
613
+ * startVLMWorkerRuntime();
614
+ * ```
615
+ */
616
+ export function startVLMWorkerRuntime() {
617
+ logInfo('VLM Worker runtime starting...');
618
+ self.onmessage = handleMessage;
619
+ }
620
+ //# sourceMappingURL=VLMWorkerRuntime.js.map