@cognipilot/rumoca-core 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/modelica_language.js +88 -0
- package/package.json +11 -1
- package/parse_worker.js +1 -2
- package/rumoca_bind_wasm.d.ts +96 -5
- package/rumoca_bind_wasm.js +480 -36
- package/rumoca_bind_wasm_bg.wasm +0 -0
- package/rumoca_diffsol.js +109 -0
- package/rumoca_gpu.js +139 -79
- package/rumoca_interactive.js +1098 -0
- package/rumoca_package_meta.json +1 -1
- package/rumoca_runtime.js +164 -0
- package/rumoca_worker.js +307 -87
package/rumoca_gpu.js
CHANGED
|
@@ -82,16 +82,24 @@ export async function probeGpu() {
|
|
|
82
82
|
return adapter;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
//
|
|
85
|
+
// Build a reusable GPU program for a prepared model: a WebGPU device, the
|
|
86
|
+
// compiled WGSL modules, compute pipelines, device buffers, and bind groups,
|
|
87
|
+
// plus a per-run `simulate(prep, onPhase)` closure.
|
|
88
|
+
//
|
|
89
|
+
// Everything built here is fully determined by the rendered shader and layout
|
|
90
|
+
// (i.e. the model source) and never by parameter *values*, so a parameter-only
|
|
91
|
+
// re-run can reuse the whole program and just re-upload y0/p0. `runGpuSimulation`
|
|
92
|
+
// caches the program keyed on `prep.wgsl`; call this directly only if you want
|
|
93
|
+
// to manage the program lifetime yourself.
|
|
86
94
|
//
|
|
87
95
|
// adapter : GPUAdapter (from `probeGpu`)
|
|
88
96
|
// prep : the parsed JSON from WASM `prepare_gpu_simulation`
|
|
89
97
|
// ({ wgsl, layout, n_states, y0, p0, t_start, t_end, dt })
|
|
90
98
|
// onPhase : optional (message, fraction|null) progress callback
|
|
91
99
|
//
|
|
92
|
-
// Returns {
|
|
93
|
-
//
|
|
94
|
-
export async function
|
|
100
|
+
// Returns { device, simulate } where `simulate(prepNow, onPhaseNow)` runs the
|
|
101
|
+
// RK4 loop and resolves to a result shaped like `simulate_model`.
|
|
102
|
+
export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
95
103
|
const layout = prep.layout || {};
|
|
96
104
|
const nStates = prep.n_states | 0;
|
|
97
105
|
const yLen = Math.max(layout.y_len | 0, 1);
|
|
@@ -160,10 +168,6 @@ export async function runGpuSimulation(adapter, prep, onPhase = () => {}) {
|
|
|
160
168
|
const yStage = storage(yLen, 'y-stage');
|
|
161
169
|
const pBuf = storage(Math.max(layout.p_len | 0, 1), 'p');
|
|
162
170
|
const kBufs = [0, 1, 2, 3].map((i) => storage(rows, `k${i + 1}`));
|
|
163
|
-
const y0 = new Float32Array(prep.y0 || []);
|
|
164
|
-
device.queue.writeBuffer(yBuf, 0, y0);
|
|
165
|
-
device.queue.writeBuffer(yStage, 0, y0);
|
|
166
|
-
device.queue.writeBuffer(pBuf, 0, new Float32Array(prep.p0 || []));
|
|
167
171
|
|
|
168
172
|
const timeUniform = device.createBuffer({
|
|
169
173
|
size: 16, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
|
|
@@ -254,82 +258,138 @@ export async function runGpuSimulation(adapter, prep, onPhase = () => {}) {
|
|
|
254
258
|
const writeTime = (t) => device.queue.writeBuffer(
|
|
255
259
|
timeUniform, 0, new Float32Array([t, 0, 0, 0]));
|
|
256
260
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
);
|
|
261
|
+
// Per-run execution. Only y0/p0 change when a parameter slider moves, so
|
|
262
|
+
// this re-uploads them and steps the RK4 loop; the device, modules,
|
|
263
|
+
// pipelines, buffers, and bind groups above are reused untouched.
|
|
264
|
+
async function simulate(prepNow, onPhaseNow = () => {}) {
|
|
265
|
+
const y0 = new Float32Array(prepNow.y0 || []);
|
|
266
|
+
device.queue.writeBuffer(yBuf, 0, y0);
|
|
267
|
+
device.queue.writeBuffer(yStage, 0, y0);
|
|
268
|
+
device.queue.writeBuffer(pBuf, 0, new Float32Array(prepNow.p0 || []));
|
|
269
|
+
|
|
270
|
+
const times = [tStart];
|
|
271
|
+
const samples = [Array.from(y0)];
|
|
272
|
+
onPhaseNow(`Simulating on WebGPU (0/${steps} steps)`, 0);
|
|
273
|
+
const wallStart = performance.now();
|
|
274
|
+
// One readback per step keeps the driver simple; the GPU work per
|
|
275
|
+
// step is small enough that this is not the bottleneck yet.
|
|
276
|
+
for (let step = 0; step < steps; step++) {
|
|
277
|
+
const t = tStart + step * dt;
|
|
278
|
+
const enc = device.createCommandEncoder();
|
|
279
|
+
writeTime(t);
|
|
280
|
+
dispatchDer(enc, 0);
|
|
281
|
+
dispatchStage(enc, axpyPipeline, axpyBinds[0]);
|
|
282
|
+
device.queue.submit([enc.finish()]);
|
|
283
|
+
const enc2 = device.createCommandEncoder();
|
|
284
|
+
writeTime(t + dt / 2);
|
|
285
|
+
dispatchDer(enc2, 1);
|
|
286
|
+
dispatchStage(enc2, axpyPipeline, axpyBinds[1]);
|
|
287
|
+
device.queue.submit([enc2.finish()]);
|
|
288
|
+
const enc3 = device.createCommandEncoder();
|
|
289
|
+
dispatchDer(enc3, 2);
|
|
290
|
+
dispatchStage(enc3, axpyPipeline, axpyBinds[2]);
|
|
291
|
+
device.queue.submit([enc3.finish()]);
|
|
292
|
+
const enc4 = device.createCommandEncoder();
|
|
293
|
+
writeTime(t + dt);
|
|
294
|
+
dispatchDer(enc4, 3);
|
|
295
|
+
dispatchStage(enc4, combinePipeline, combineBind);
|
|
296
|
+
enc4.copyBufferToBuffer(yBuf, 0, readback, 0, yLen * 4);
|
|
297
|
+
device.queue.submit([enc4.finish()]);
|
|
298
|
+
await readback.mapAsync(GPUMapMode.READ);
|
|
299
|
+
samples.push(Array.from(new Float32Array(readback.getMappedRange())));
|
|
300
|
+
readback.unmap();
|
|
301
|
+
times.push(t + dt);
|
|
302
|
+
if (step % 5 === 4 || step === steps - 1) {
|
|
303
|
+
onPhaseNow(
|
|
304
|
+
`Simulating on WebGPU (${step + 1}/${steps} steps)`,
|
|
305
|
+
(step + 1) / steps
|
|
306
|
+
);
|
|
307
|
+
}
|
|
294
308
|
}
|
|
295
|
-
|
|
296
|
-
const gpuSeconds = (performance.now() - wallStart) / 1000;
|
|
297
|
-
device.destroy();
|
|
309
|
+
const gpuSeconds = (performance.now() - wallStart) / 1000;
|
|
298
310
|
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
311
|
+
// Shape the result like simulate_model so plots and viz scripts work
|
|
312
|
+
// unchanged. Names come from the layout bindings (y-kind slots).
|
|
313
|
+
// Bindings include bare base-name aliases ("u" -> 0) alongside the
|
|
314
|
+
// indexed names ("u[1,1]" -> 0); prefer indexed names so array
|
|
315
|
+
// models keep their element naming.
|
|
316
|
+
const names = new Array(yLen).fill(null);
|
|
317
|
+
for (const [name, slot] of Object.entries(layout.bindings || {})) {
|
|
318
|
+
if (!slot || slot.kind !== 'y' || slot.index >= yLen) {
|
|
319
|
+
continue;
|
|
320
|
+
}
|
|
321
|
+
const existing = names[slot.index];
|
|
322
|
+
if (!existing || (!existing.includes('[') && name.includes('['))) {
|
|
323
|
+
names[slot.index] = name;
|
|
324
|
+
}
|
|
308
325
|
}
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
names[slot.index] = name;
|
|
326
|
+
for (let i = 0; i < yLen; i++) {
|
|
327
|
+
if (!names[i]) names[i] = `y[${i}]`;
|
|
312
328
|
}
|
|
329
|
+
const allData = [times];
|
|
330
|
+
for (let i = 0; i < yLen; i++) {
|
|
331
|
+
allData.push(samples.map((row) => row[i]));
|
|
332
|
+
}
|
|
333
|
+
const eventNote = (layout.runtime_event_roots | 0) > 0
|
|
334
|
+
? ' · events frozen (GPU v1)' : '';
|
|
335
|
+
return {
|
|
336
|
+
payload: {
|
|
337
|
+
names,
|
|
338
|
+
allData,
|
|
339
|
+
nStates,
|
|
340
|
+
simDetails: {
|
|
341
|
+
actual: { t_start: tStart, t_end: times[times.length - 1], points: times.length, variables: names.length },
|
|
342
|
+
requested: { solver: `wgsl-solve RK4 (f32)${eventNote}`, t_start: tStart, t_end: tEnd, dt },
|
|
343
|
+
},
|
|
344
|
+
},
|
|
345
|
+
metrics: { simulateSeconds: gpuSeconds },
|
|
346
|
+
};
|
|
313
347
|
}
|
|
314
|
-
|
|
315
|
-
|
|
348
|
+
|
|
349
|
+
return { device, simulate };
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Module-level fallback cache for callers that do not supply their own. Pass an
|
|
353
|
+
// explicit per-instance `cache` object (e.g. one per widget) when running
|
|
354
|
+
// independent models concurrently so they do not evict each other.
|
|
355
|
+
const sharedGpuCache = {};
|
|
356
|
+
|
|
357
|
+
// Integrate a prepared model on the GPU with fixed-step RK4, reusing a compiled
|
|
358
|
+
// program across runs.
|
|
359
|
+
//
|
|
360
|
+
// adapter : GPUAdapter (from `probeGpu`)
|
|
361
|
+
// prep : the parsed JSON from WASM `prepare_gpu_simulation`
|
|
362
|
+
// onPhase : optional (message, fraction|null) progress callback
|
|
363
|
+
// cache : caller-owned `{ program?, wgsl? }` holder; defaults to a shared
|
|
364
|
+
// module-level cache
|
|
365
|
+
//
|
|
366
|
+
// The program (device, modules, pipelines, buffers, bind groups) is fully
|
|
367
|
+
// determined by `prep.wgsl`, so a parameter-only re-run (same shader, new
|
|
368
|
+
// y0/p0) reuses the cached program and skips the shader recompile + pipeline
|
|
369
|
+
// rebuild entirely. A source edit re-renders the shader (new key -> rebuild,
|
|
370
|
+
// destroying the old device). If a reused device is lost (context loss, tab
|
|
371
|
+
// backgrounding), the cache is dropped so the next run rebuilds from a fresh
|
|
372
|
+
// device.
|
|
373
|
+
//
|
|
374
|
+
// Returns { payload: { names, allData, nStates, simDetails }, metrics } shaped
|
|
375
|
+
// like `simulate_model` so plots/viz scripts work unchanged.
|
|
376
|
+
export async function runGpuSimulation(adapter, prep, onPhase = () => {}, cache = sharedGpuCache) {
|
|
377
|
+
if (!cache.program || cache.wgsl !== prep.wgsl) {
|
|
378
|
+
if (cache.program) {
|
|
379
|
+
try { cache.program.device.destroy(); } catch (err) { /* device already lost */ }
|
|
380
|
+
cache.program = null;
|
|
381
|
+
}
|
|
382
|
+
cache.program = await buildGpuProgram(adapter, prep, onPhase);
|
|
383
|
+
cache.wgsl = prep.wgsl;
|
|
316
384
|
}
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
385
|
+
try {
|
|
386
|
+
return await cache.program.simulate(prep, onPhase);
|
|
387
|
+
} catch (err) {
|
|
388
|
+
// A reused device can be lost (context loss, tab backgrounding).
|
|
389
|
+
// Drop the cache so the next run rebuilds from a fresh device,
|
|
390
|
+
// restoring the self-healing the per-run rebuild used to give.
|
|
391
|
+
cache.program = null;
|
|
392
|
+
cache.wgsl = null;
|
|
393
|
+
throw err;
|
|
320
394
|
}
|
|
321
|
-
const eventNote = (layout.runtime_event_roots | 0) > 0
|
|
322
|
-
? ' · events frozen (GPU v1)' : '';
|
|
323
|
-
return {
|
|
324
|
-
payload: {
|
|
325
|
-
names,
|
|
326
|
-
allData,
|
|
327
|
-
nStates,
|
|
328
|
-
simDetails: {
|
|
329
|
-
actual: { t_start: tStart, t_end: times[times.length - 1], points: times.length, variables: names.length },
|
|
330
|
-
requested: { solver: `wgsl-solve RK4 (f32)${eventNote}`, t_start: tStart, t_end: tEnd, dt },
|
|
331
|
-
},
|
|
332
|
-
},
|
|
333
|
-
metrics: { simulateSeconds: gpuSeconds },
|
|
334
|
-
};
|
|
335
395
|
}
|