@cognipilot/rumoca-core 0.9.5 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/rumoca_bind_wasm.d.ts +6 -6
- package/rumoca_bind_wasm.js +25 -7
- package/rumoca_bind_wasm_bg.wasm +0 -0
- package/rumoca_gpu.js +619 -46
- package/rumoca_interactive.js +365 -30
- package/rumoca_package_meta.json +1 -1
- package/rumoca_worker.js +22 -8
package/rumoca_gpu.js
CHANGED
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
//
|
|
3
3
|
// Canonical, packaged runtime helper for the GPU simulation path. The
|
|
4
4
|
// compiler emits per-state derivative kernels via the `wgsl-solve` target
|
|
5
|
-
// (WASM `prepare_gpu_simulation`);
|
|
6
|
-
//
|
|
7
|
-
//
|
|
5
|
+
// (WASM `prepare_gpu_simulation`); the target also exposes implicit residual
|
|
6
|
+
// kernels in the layout for future implicit GPU solvers. This module wraps a
|
|
7
|
+
// fixed-step classic RK4 integrator around the derivative kernels. The RK4
|
|
8
|
+
// stage/combine algebra runs in the two small hand-written kernels below.
|
|
8
9
|
//
|
|
9
10
|
// v1 semantics: only the first `n_states` slots of y integrate; algebraic
|
|
10
11
|
// slots and all parameters (including relation memory) stay frozen at their
|
|
@@ -49,6 +50,9 @@ fn combine(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
49
50
|
}
|
|
50
51
|
`;
|
|
51
52
|
|
|
53
|
+
const GPU_STAGE_WORKGROUP_SIZE = 64;
|
|
54
|
+
const UINT32_MAX = 0xFFFF_FFFF;
|
|
55
|
+
|
|
52
56
|
async function compileGpuModule(device, code, label) {
|
|
53
57
|
const module = device.createShaderModule({ code, label });
|
|
54
58
|
const info = await module.getCompilationInfo();
|
|
@@ -59,6 +63,577 @@ async function compileGpuModule(device, code, label) {
|
|
|
59
63
|
return module;
|
|
60
64
|
}
|
|
61
65
|
|
|
66
|
+
function integerField(value, field, label, minValue = 0) {
|
|
67
|
+
const parsed = value?.[field];
|
|
68
|
+
if (!Number.isSafeInteger(parsed) || parsed < minValue) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
`${label} has invalid ${field} metadata (${value?.[field]}).`
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return parsed;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function safePositiveInteger(value, label) {
|
|
77
|
+
if (!Number.isSafeInteger(value) || value < 1) {
|
|
78
|
+
throw new Error(`${label} has invalid integer metadata (${value}).`);
|
|
79
|
+
}
|
|
80
|
+
return value;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function finiteNumberField(value, field, label) {
|
|
84
|
+
const parsed = value?.[field];
|
|
85
|
+
if (typeof parsed !== 'number' || !Number.isFinite(parsed)) {
|
|
86
|
+
throw new Error(`${label} has invalid ${field} metadata (${value?.[field]}).`);
|
|
87
|
+
}
|
|
88
|
+
return parsed;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function u32Value(value, label) {
|
|
92
|
+
if (!Number.isSafeInteger(value) || value < 0 || value > UINT32_MAX) {
|
|
93
|
+
throw new Error(`${label}=${value} cannot be represented as a WGSL u32.`);
|
|
94
|
+
}
|
|
95
|
+
return value;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function hasOwn(value, field) {
|
|
99
|
+
return value !== null
|
|
100
|
+
&& typeof value === 'object'
|
|
101
|
+
&& Object.prototype.hasOwnProperty.call(value, field);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function signedIntegerField(value, field, label) {
|
|
105
|
+
const parsed = value?.[field];
|
|
106
|
+
if (!Number.isSafeInteger(parsed)) {
|
|
107
|
+
throw new Error(
|
|
108
|
+
`${label} has invalid ${field} metadata (${value?.[field]}).`
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
return parsed;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function checkedMetadataAdd(left, right, label) {
|
|
115
|
+
const value = left + right;
|
|
116
|
+
if (!Number.isSafeInteger(value)) {
|
|
117
|
+
throw new Error(`${label} overflows JavaScript safe integer metadata range.`);
|
|
118
|
+
}
|
|
119
|
+
return value;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function checkedMetadataMul(left, right, label) {
|
|
123
|
+
const value = left * right;
|
|
124
|
+
if (!Number.isSafeInteger(value)) {
|
|
125
|
+
throw new Error(`${label} overflows JavaScript safe integer metadata range.`);
|
|
126
|
+
}
|
|
127
|
+
return value;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function checkedWorkgroupCount(
|
|
131
|
+
rows,
|
|
132
|
+
workgroupSize,
|
|
133
|
+
label,
|
|
134
|
+
maxWorkgroups,
|
|
135
|
+
usage = 'dispatch',
|
|
136
|
+
) {
|
|
137
|
+
const limit = safePositiveInteger(maxWorkgroups, `${label} workgroup limit`);
|
|
138
|
+
const groups = Math.floor((rows - 1) / workgroupSize) + 1;
|
|
139
|
+
if (!Number.isSafeInteger(groups) || groups < 1) {
|
|
140
|
+
throw new Error(`${label} ${usage} workgroup count is invalid.`);
|
|
141
|
+
}
|
|
142
|
+
if (groups > limit) {
|
|
143
|
+
throw new Error(
|
|
144
|
+
`${label} ${usage} needs ${groups} workgroups, exceeding `
|
|
145
|
+
+ `device limit ${limit}.`
|
|
146
|
+
);
|
|
147
|
+
}
|
|
148
|
+
return groups;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function storageByteSize(elementCount, label) {
|
|
152
|
+
const bytes = checkedMetadataMul(elementCount, 4, `${label} byte size`);
|
|
153
|
+
return Math.max(16, bytes);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
function deviceWorkgroupLimit(device) {
|
|
157
|
+
return safePositiveInteger(
|
|
158
|
+
device?.limits?.maxComputeWorkgroupsPerDimension,
|
|
159
|
+
'GPU device maxComputeWorkgroupsPerDimension',
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function simulationStepCount(tStart, tEnd, dt) {
|
|
164
|
+
if (tEnd < tStart) {
|
|
165
|
+
throw new Error(`GPU simulation t_end=${tEnd} is before t_start=${tStart}.`);
|
|
166
|
+
}
|
|
167
|
+
if (dt <= 0) {
|
|
168
|
+
throw new Error(`GPU simulation dt=${dt} must be greater than zero.`);
|
|
169
|
+
}
|
|
170
|
+
const rawSteps = (tEnd - tStart) / dt;
|
|
171
|
+
if (!Number.isFinite(rawSteps)) {
|
|
172
|
+
throw new Error('GPU simulation step count is not finite.');
|
|
173
|
+
}
|
|
174
|
+
const steps = Math.max(1, Math.round(rawSteps));
|
|
175
|
+
if (!Number.isSafeInteger(steps)) {
|
|
176
|
+
throw new Error('GPU simulation step count exceeds JavaScript safe integer range.');
|
|
177
|
+
}
|
|
178
|
+
return steps;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
function workgroupTotal(kernels, label) {
|
|
182
|
+
return kernels.reduce(
|
|
183
|
+
(total, kernel, index) => checkedMetadataAdd(
|
|
184
|
+
total,
|
|
185
|
+
kernel.workgroups,
|
|
186
|
+
`${label}[${index}] workgroup total`,
|
|
187
|
+
),
|
|
188
|
+
0,
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function markOutputSlot(covered, slot, label, rows, outputName) {
|
|
193
|
+
if (!Number.isSafeInteger(slot) || slot < 0 || slot >= rows) {
|
|
194
|
+
throw new Error(
|
|
195
|
+
`${label} writes ${outputName} output ${slot} outside layout.rows=${rows}.`
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
const previous = covered.get(slot);
|
|
199
|
+
if (previous !== undefined) {
|
|
200
|
+
throw new Error(
|
|
201
|
+
`${label} overlaps ${outputName} output ${slot} already written by `
|
|
202
|
+
+ `${previous}.`
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
covered.set(slot, label);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
function firstMissingOutputSlot(rows, covered) {
|
|
209
|
+
if (covered.size === rows) {
|
|
210
|
+
return -1;
|
|
211
|
+
}
|
|
212
|
+
for (let slot = 0; slot < rows; slot++) {
|
|
213
|
+
if (!covered.has(slot)) {
|
|
214
|
+
return slot;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return -1;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function outputMap(value, shape, label) {
|
|
221
|
+
if (typeof value.output_map !== 'object' || value.output_map === null) {
|
|
222
|
+
throw new Error(`${label} is missing native output_map metadata.`);
|
|
223
|
+
}
|
|
224
|
+
const start = integerField(value.output_map, 'start', `${label} output_map`);
|
|
225
|
+
if (!Array.isArray(value.output_map.strides)) {
|
|
226
|
+
throw new Error(`${label} output_map is missing strides metadata.`);
|
|
227
|
+
}
|
|
228
|
+
const strides = new Array(shape.length).fill(0);
|
|
229
|
+
const seen = new Array(shape.length).fill(false);
|
|
230
|
+
for (let termIndex = 0; termIndex < value.output_map.strides.length; termIndex++) {
|
|
231
|
+
const term = value.output_map.strides[termIndex];
|
|
232
|
+
const termLabel = `${label} output_map.strides[${termIndex}]`;
|
|
233
|
+
const dimension = integerField(term, 'dimension', termLabel);
|
|
234
|
+
const stride = signedIntegerField(term, 'stride', termLabel);
|
|
235
|
+
if (dimension >= shape.length) {
|
|
236
|
+
throw new Error(
|
|
237
|
+
`${termLabel} targets dimension ${dimension}, but domain rank is `
|
|
238
|
+
+ `${shape.length}.`
|
|
239
|
+
);
|
|
240
|
+
}
|
|
241
|
+
if (seen[dimension]) {
|
|
242
|
+
throw new Error(`${termLabel} duplicates dimension ${dimension}.`);
|
|
243
|
+
}
|
|
244
|
+
seen[dimension] = true;
|
|
245
|
+
strides[dimension] = stride;
|
|
246
|
+
}
|
|
247
|
+
return { start, strides };
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
function visitNativeOutputSlots(kernel, family, label, rows, outputName, visitSlot) {
|
|
251
|
+
const kernelRows = integerField(kernel, 'rows', label);
|
|
252
|
+
const familyRows = integerField(family, 'rows', `${label} native family`);
|
|
253
|
+
if (familyRows !== kernelRows) {
|
|
254
|
+
throw new Error(
|
|
255
|
+
`${label} row count ${kernelRows} does not match native family rows `
|
|
256
|
+
+ `${familyRows}.`
|
|
257
|
+
);
|
|
258
|
+
}
|
|
259
|
+
if (!Array.isArray(family.domain_shape) || family.domain_shape.length === 0) {
|
|
260
|
+
throw new Error(`${label} native family is missing domain_shape metadata.`);
|
|
261
|
+
}
|
|
262
|
+
const shape = family.domain_shape.map((_, dim) => (
|
|
263
|
+
integerField(family.domain_shape, dim, `${label} domain_shape`, 1)
|
|
264
|
+
));
|
|
265
|
+
const domainRows = shape.reduce((product, dim, dimIndex) => (
|
|
266
|
+
checkedMetadataMul(product, dim, `${label} domain_shape[${dimIndex}] product`)
|
|
267
|
+
), 1);
|
|
268
|
+
if (domainRows !== kernelRows) {
|
|
269
|
+
throw new Error(
|
|
270
|
+
`${label} rows=${kernelRows} does not match domain_shape product `
|
|
271
|
+
+ `${domainRows}.`
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
const kernelOutputMap = outputMap(kernel, shape, label);
|
|
275
|
+
const familyOutputMap = outputMap(family, shape, `${label} native family`);
|
|
276
|
+
if (familyOutputMap.start !== kernelOutputMap.start) {
|
|
277
|
+
throw new Error(
|
|
278
|
+
`${label} output_map.start ${kernelOutputMap.start} does not match native family `
|
|
279
|
+
+ `start ${familyOutputMap.start}.`
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
if (kernelOutputMap.strides.some((stride, dim) => stride !== familyOutputMap.strides[dim])) {
|
|
283
|
+
throw new Error(`${label} output_map.strides do not match native family metadata.`);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
for (let row = 0; row < kernelRows; row++) {
|
|
287
|
+
let remainder = row;
|
|
288
|
+
let slot = kernelOutputMap.start;
|
|
289
|
+
for (let dim = shape.length - 1; dim >= 0; dim--) {
|
|
290
|
+
const index = remainder % shape[dim];
|
|
291
|
+
remainder = Math.floor(remainder / shape[dim]);
|
|
292
|
+
const term = checkedMetadataMul(
|
|
293
|
+
index,
|
|
294
|
+
kernelOutputMap.strides[dim],
|
|
295
|
+
`${label} output_map dimension ${dim}`,
|
|
296
|
+
);
|
|
297
|
+
slot = checkedMetadataAdd(slot, term, `${label} output_map slot`);
|
|
298
|
+
}
|
|
299
|
+
if (slot < 0 || slot >= rows) {
|
|
300
|
+
throw new Error(
|
|
301
|
+
`${label} writes ${outputName} output ${slot} outside layout.rows=${rows}.`
|
|
302
|
+
);
|
|
303
|
+
}
|
|
304
|
+
visitSlot(slot);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
function scalarOutputSlots(kernel, label) {
|
|
309
|
+
const kernelRows = integerField(kernel, 'rows', label);
|
|
310
|
+
if (!Array.isArray(kernel.output_indices)) {
|
|
311
|
+
throw new Error(`${label} is missing scalar output_indices metadata.`);
|
|
312
|
+
}
|
|
313
|
+
if (kernel.output_indices.length !== kernelRows) {
|
|
314
|
+
throw new Error(
|
|
315
|
+
`${label} output_indices length ${kernel.output_indices.length} `
|
|
316
|
+
+ `does not match rows=${kernelRows}.`
|
|
317
|
+
);
|
|
318
|
+
}
|
|
319
|
+
return kernel.output_indices.map((slot, slotIndex) => {
|
|
320
|
+
if (!Number.isSafeInteger(slot)) {
|
|
321
|
+
throw new Error(
|
|
322
|
+
`${label} output_indices[${slotIndex}] has invalid slot metadata (${slot}).`
|
|
323
|
+
);
|
|
324
|
+
}
|
|
325
|
+
return slot;
|
|
326
|
+
});
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function stringField(value, field, label) {
|
|
330
|
+
const fieldValue = value?.[field];
|
|
331
|
+
if (typeof fieldValue !== 'string' || fieldValue.length === 0) {
|
|
332
|
+
throw new Error(`${label} has invalid ${field} metadata.`);
|
|
333
|
+
}
|
|
334
|
+
return fieldValue;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function stringArrayField(value, field, label) {
|
|
338
|
+
const fieldValue = value?.[field];
|
|
339
|
+
if (!Array.isArray(fieldValue) || fieldValue.length === 0) {
|
|
340
|
+
throw new Error(`${label} has invalid ${field} metadata.`);
|
|
341
|
+
}
|
|
342
|
+
return fieldValue.map((entry, index) => {
|
|
343
|
+
if (typeof entry !== 'string' || entry.length === 0) {
|
|
344
|
+
throw new Error(`${label}.${field}[${index}] has invalid prefix metadata.`);
|
|
345
|
+
}
|
|
346
|
+
return entry;
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function sameStrings(left, right) {
|
|
351
|
+
return left.length === right.length
|
|
352
|
+
&& left.every((entry, index) => entry === right[index]);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function validatedEntryPrefixes(block, options) {
|
|
356
|
+
const {
|
|
357
|
+
layoutLabel,
|
|
358
|
+
expectedNativeEntryPrefixes,
|
|
359
|
+
expectedScalarEntryPrefix,
|
|
360
|
+
} = options;
|
|
361
|
+
if (Object.prototype.hasOwnProperty.call(block ?? {}, 'kernel_prefix')) {
|
|
362
|
+
throw new Error(`${layoutLabel} has stale kernel_prefix metadata.`);
|
|
363
|
+
}
|
|
364
|
+
const prefixes = block?.entry_prefixes;
|
|
365
|
+
if (typeof prefixes !== 'object' || prefixes === null) {
|
|
366
|
+
throw new Error(`${layoutLabel} has invalid entry_prefixes metadata.`);
|
|
367
|
+
}
|
|
368
|
+
const nativeEntryPrefixes = stringArrayField(
|
|
369
|
+
prefixes, 'native', `${layoutLabel} entry_prefixes`);
|
|
370
|
+
const scalarEntryPrefix = stringField(
|
|
371
|
+
prefixes, 'scalar', `${layoutLabel} entry_prefixes`);
|
|
372
|
+
if (!sameStrings(nativeEntryPrefixes, expectedNativeEntryPrefixes)) {
|
|
373
|
+
throw new Error(
|
|
374
|
+
`${layoutLabel} native entry_prefixes must be `
|
|
375
|
+
+ `${expectedNativeEntryPrefixes.join(', ')}; got `
|
|
376
|
+
+ `${nativeEntryPrefixes.join(', ')}.`
|
|
377
|
+
);
|
|
378
|
+
}
|
|
379
|
+
if (scalarEntryPrefix !== expectedScalarEntryPrefix) {
|
|
380
|
+
throw new Error(
|
|
381
|
+
`${layoutLabel} scalar entry_prefix must be ${expectedScalarEntryPrefix}; `
|
|
382
|
+
+ `got ${scalarEntryPrefix}.`
|
|
383
|
+
);
|
|
384
|
+
}
|
|
385
|
+
return { nativeEntryPrefixes, scalarEntryPrefix };
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function validatedKernelSchedule(block, options) {
|
|
389
|
+
const {
|
|
390
|
+
layoutLabel,
|
|
391
|
+
kernelEntryLabel,
|
|
392
|
+
outputName,
|
|
393
|
+
nativeEntryPrefixes: expectedNativeEntryPrefixes,
|
|
394
|
+
scalarEntryPrefix: expectedScalarEntryPrefix,
|
|
395
|
+
denseOutputRequired,
|
|
396
|
+
allowEmptySchedule = false,
|
|
397
|
+
staleManifestHint = '',
|
|
398
|
+
} = options;
|
|
399
|
+
const { nativeEntryPrefixes, scalarEntryPrefix } = validatedEntryPrefixes(block, {
|
|
400
|
+
layoutLabel,
|
|
401
|
+
expectedNativeEntryPrefixes,
|
|
402
|
+
expectedScalarEntryPrefix,
|
|
403
|
+
});
|
|
404
|
+
const rows = integerField(block, 'rows', layoutLabel);
|
|
405
|
+
const layoutWorkgroupSize = integerField(block, 'workgroup_size', layoutLabel, 1);
|
|
406
|
+
const chunkSize = integerField(block, 'chunk_size', layoutLabel, 1);
|
|
407
|
+
if (chunkSize !== layoutWorkgroupSize) {
|
|
408
|
+
throw new Error(
|
|
409
|
+
`${layoutLabel} chunk_size=${chunkSize} does not match `
|
|
410
|
+
+ `workgroup_size=${layoutWorkgroupSize}.`
|
|
411
|
+
);
|
|
412
|
+
}
|
|
413
|
+
if (!Array.isArray(block.kernels)) {
|
|
414
|
+
throw new Error(
|
|
415
|
+
`${layoutLabel} has invalid kernels metadata.${staleManifestHint}`
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
const kernelCount = integerField(block, 'kernel_count', layoutLabel);
|
|
419
|
+
if (kernelCount !== block.kernels.length) {
|
|
420
|
+
throw new Error(
|
|
421
|
+
`${layoutLabel} kernel_count=${kernelCount} does not match `
|
|
422
|
+
+ `${block.kernels.length} ${outputName} kernel entries.`
|
|
423
|
+
);
|
|
424
|
+
}
|
|
425
|
+
const scalarChunkCount = integerField(block, 'chunks', layoutLabel);
|
|
426
|
+
if (hasOwn(block, 'native_families') && !Array.isArray(block.native_families)) {
|
|
427
|
+
throw new Error(`${layoutLabel} has invalid native_families metadata.`);
|
|
428
|
+
}
|
|
429
|
+
const nativeFamilies = Array.isArray(block.native_families) ? block.native_families : [];
|
|
430
|
+
if (block.kernels.length === 0) {
|
|
431
|
+
if (!allowEmptySchedule || rows !== 0) {
|
|
432
|
+
throw new Error(
|
|
433
|
+
`${layoutLabel} manifest has no kernel inventory.${staleManifestHint}`
|
|
434
|
+
);
|
|
435
|
+
}
|
|
436
|
+
if (scalarChunkCount !== 0 || nativeFamilies.length !== 0) {
|
|
437
|
+
throw new Error(
|
|
438
|
+
`${layoutLabel} empty ${outputName} inventory must not report `
|
|
439
|
+
+ 'scalar chunks or native families.'
|
|
440
|
+
);
|
|
441
|
+
}
|
|
442
|
+
return [];
|
|
443
|
+
}
|
|
444
|
+
const covered = new Map();
|
|
445
|
+
const entries = new Set();
|
|
446
|
+
let nativeIndex = 0;
|
|
447
|
+
let scalarKernelCount = 0;
|
|
448
|
+
const schedule = block.kernels.map((kernel) => {
|
|
449
|
+
const entry = typeof kernel?.entry === 'string' ? kernel.entry : '';
|
|
450
|
+
const label = `${kernelEntryLabel} ${kernel?.entry}`;
|
|
451
|
+
const kernelRows = integerField(kernel, 'rows', label, 1);
|
|
452
|
+
const workgroupSize = integerField(kernel, 'workgroup_size', label, 1);
|
|
453
|
+
if (workgroupSize !== layoutWorkgroupSize) {
|
|
454
|
+
throw new Error(
|
|
455
|
+
`${label} workgroup_size=${workgroupSize} does not match `
|
|
456
|
+
+ `${layoutLabel} workgroup_size=${layoutWorkgroupSize}.`
|
|
457
|
+
);
|
|
458
|
+
}
|
|
459
|
+
if (entry.length === 0) {
|
|
460
|
+
throw new Error(`${label} has invalid entry metadata.`);
|
|
461
|
+
}
|
|
462
|
+
if (entries.has(entry)) {
|
|
463
|
+
throw new Error(`${label} duplicates ${outputName} kernel entry ${entry}.`);
|
|
464
|
+
}
|
|
465
|
+
entries.add(entry);
|
|
466
|
+
const hasTensorOutput = hasOwn(kernel, 'output_map');
|
|
467
|
+
const hasScalarOutput = hasOwn(kernel, 'start_slot')
|
|
468
|
+
|| hasOwn(kernel, 'output_indices');
|
|
469
|
+
if (hasTensorOutput && hasScalarOutput) {
|
|
470
|
+
throw new Error(
|
|
471
|
+
`${label} mixes native tensor output metadata with scalar chunk metadata.`
|
|
472
|
+
);
|
|
473
|
+
}
|
|
474
|
+
if (hasTensorOutput) {
|
|
475
|
+
if (!nativeEntryPrefixes.some((prefix) => entry.startsWith(prefix))) {
|
|
476
|
+
throw new Error(
|
|
477
|
+
`${label} native entry must start with one of `
|
|
478
|
+
+ `${nativeEntryPrefixes.join(', ')}; got ${entry}.`
|
|
479
|
+
);
|
|
480
|
+
}
|
|
481
|
+
const family = nativeFamilies[nativeIndex];
|
|
482
|
+
if (!family) {
|
|
483
|
+
throw new Error(
|
|
484
|
+
`${kernelEntryLabel} ${entry} has no matching native family metadata.`
|
|
485
|
+
);
|
|
486
|
+
}
|
|
487
|
+
visitNativeOutputSlots(
|
|
488
|
+
kernel,
|
|
489
|
+
family,
|
|
490
|
+
`${kernelEntryLabel} ${entry}`,
|
|
491
|
+
rows,
|
|
492
|
+
outputName,
|
|
493
|
+
(slot) => {
|
|
494
|
+
markOutputSlot(
|
|
495
|
+
covered, slot, `${kernelEntryLabel} ${entry}`, rows, outputName);
|
|
496
|
+
},
|
|
497
|
+
);
|
|
498
|
+
nativeIndex += 1;
|
|
499
|
+
} else {
|
|
500
|
+
if (!entry.startsWith(scalarEntryPrefix)) {
|
|
501
|
+
throw new Error(
|
|
502
|
+
`${label} scalar chunk entry must start with ${scalarEntryPrefix}; `
|
|
503
|
+
+ `got ${entry}.`
|
|
504
|
+
);
|
|
505
|
+
}
|
|
506
|
+
integerField(kernel, 'start_slot', `${kernelEntryLabel} ${entry}`);
|
|
507
|
+
scalarKernelCount += 1;
|
|
508
|
+
for (const slot of scalarOutputSlots(kernel, `${kernelEntryLabel} ${entry}`)) {
|
|
509
|
+
markOutputSlot(
|
|
510
|
+
covered, slot, `${kernelEntryLabel} ${entry}`, rows, outputName);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
return { entry, rows: kernelRows, workgroupSize };
|
|
514
|
+
});
|
|
515
|
+
if (nativeIndex !== nativeFamilies.length) {
|
|
516
|
+
throw new Error(
|
|
517
|
+
`${layoutLabel} has ${nativeFamilies.length} native families but scheduled `
|
|
518
|
+
+ `${nativeIndex} native ${outputName} kernels.`
|
|
519
|
+
);
|
|
520
|
+
}
|
|
521
|
+
if (scalarChunkCount !== scalarKernelCount) {
|
|
522
|
+
throw new Error(
|
|
523
|
+
`${layoutLabel} chunks=${scalarChunkCount} does not match `
|
|
524
|
+
+ `${scalarKernelCount} scalar ${outputName} kernel entries.`
|
|
525
|
+
);
|
|
526
|
+
}
|
|
527
|
+
if (denseOutputRequired) {
|
|
528
|
+
const gap = firstMissingOutputSlot(rows, covered);
|
|
529
|
+
if (gap !== -1) {
|
|
530
|
+
throw new Error(
|
|
531
|
+
`${layoutLabel} schedule does not cover ${outputName} output ${gap}; `
|
|
532
|
+
+ 'GPU RK4 requires a dense derivative vector.'
|
|
533
|
+
);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
return schedule;
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Normalize and validate the derivative kernel schedule in the wgsl-solve
|
|
540
|
+
// layout. Native kernels write through generated WGSL output maps, so the host
|
|
541
|
+
// only dispatches them; it still validates the maps before building pipelines
|
|
542
|
+
// because the RK4 path assumes a dense derivative vector matching state order.
|
|
543
|
+
export function derivativeKernelSchedule(layout) {
|
|
544
|
+
return validatedKernelSchedule(layout, {
|
|
545
|
+
layoutLabel: 'GPU layout',
|
|
546
|
+
kernelEntryLabel: 'GPU kernel',
|
|
547
|
+
outputName: 'derivative',
|
|
548
|
+
nativeEntryPrefixes: ['derivative_rhs_map', 'derivative_rhs_stencil'],
|
|
549
|
+
scalarEntryPrefix: 'derivative_rhs_chunk',
|
|
550
|
+
denseOutputRequired: true,
|
|
551
|
+
staleManifestHint: ' The WASM package predates stencil emission. '
|
|
552
|
+
+ 'Rebuild it from the wgsl-backend sources '
|
|
553
|
+
+ '(wasm-pack build crates/rumoca-bind-wasm).',
|
|
554
|
+
});
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Validate the implicit RHS kernel inventory exposed by wgsl-solve. The
|
|
558
|
+
// browser RK4 path does not dispatch these kernels yet; this keeps the manifest
|
|
559
|
+
// contract executable for future implicit GPU solvers.
|
|
560
|
+
export function implicitKernelSchedule(layout) {
|
|
561
|
+
if (layout === null || typeof layout !== 'object') {
|
|
562
|
+
throw new Error('GPU layout has invalid implicit_rhs metadata.');
|
|
563
|
+
}
|
|
564
|
+
return validatedKernelSchedule(layout.implicit_rhs, {
|
|
565
|
+
layoutLabel: 'GPU implicit_rhs layout',
|
|
566
|
+
kernelEntryLabel: 'GPU implicit kernel',
|
|
567
|
+
outputName: 'implicit RHS',
|
|
568
|
+
nativeEntryPrefixes: ['implicit_rhs_map', 'implicit_rhs_stencil'],
|
|
569
|
+
scalarEntryPrefix: 'implicit_rhs_chunk',
|
|
570
|
+
denseOutputRequired: false,
|
|
571
|
+
allowEmptySchedule: true,
|
|
572
|
+
});
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
export function gpuKernelSchedules(layout) {
|
|
576
|
+
return {
|
|
577
|
+
derivative: derivativeKernelSchedule(layout),
|
|
578
|
+
implicit: implicitKernelSchedule(layout),
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
export function gpuKernelDispatchPlan(
|
|
583
|
+
schedule,
|
|
584
|
+
label = 'GPU kernel schedule',
|
|
585
|
+
maxWorkgroups = Number.MAX_SAFE_INTEGER,
|
|
586
|
+
) {
|
|
587
|
+
if (!Array.isArray(schedule) || schedule.length === 0) {
|
|
588
|
+
throw new Error(`${label} has no kernels to dispatch.`);
|
|
589
|
+
}
|
|
590
|
+
return schedule.map((kernel, index) => {
|
|
591
|
+
const entry = stringField(kernel, 'entry', `${label}[${index}]`);
|
|
592
|
+
const rows = integerField(kernel, 'rows', `${label}[${index}]`, 1);
|
|
593
|
+
const workgroupSize = integerField(
|
|
594
|
+
kernel, 'workgroupSize', `${label}[${index}]`, 1);
|
|
595
|
+
return {
|
|
596
|
+
entry,
|
|
597
|
+
rows,
|
|
598
|
+
workgroupSize,
|
|
599
|
+
workgroups: checkedWorkgroupCount(
|
|
600
|
+
rows,
|
|
601
|
+
workgroupSize,
|
|
602
|
+
`${label}[${index}] ${entry}`,
|
|
603
|
+
maxWorkgroups,
|
|
604
|
+
),
|
|
605
|
+
};
|
|
606
|
+
});
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
export function gpuKernelWorkgroupBudget(
|
|
610
|
+
schedule,
|
|
611
|
+
label = 'GPU kernel schedule',
|
|
612
|
+
maxWorkgroups = Number.MAX_SAFE_INTEGER,
|
|
613
|
+
) {
|
|
614
|
+
if (!Array.isArray(schedule)) {
|
|
615
|
+
throw new Error(`${label} metadata is invalid.`);
|
|
616
|
+
}
|
|
617
|
+
return schedule.reduce((total, kernel, index) => {
|
|
618
|
+
const entry = stringField(kernel, 'entry', `${label}[${index}]`);
|
|
619
|
+
const rows = integerField(kernel, 'rows', `${label}[${index}]`, 1);
|
|
620
|
+
const workgroupSize = integerField(
|
|
621
|
+
kernel, 'workgroupSize', `${label}[${index}]`, 1);
|
|
622
|
+
const workgroups = checkedWorkgroupCount(
|
|
623
|
+
rows,
|
|
624
|
+
workgroupSize,
|
|
625
|
+
`${label}[${index}] ${entry}`,
|
|
626
|
+
maxWorkgroups,
|
|
627
|
+
'budget',
|
|
628
|
+
);
|
|
629
|
+
return checkedMetadataAdd(
|
|
630
|
+
total,
|
|
631
|
+
workgroups,
|
|
632
|
+
`${label}[${index}] workgroup budget`,
|
|
633
|
+
);
|
|
634
|
+
}, 0);
|
|
635
|
+
}
|
|
636
|
+
|
|
62
637
|
// Acquire a WebGPU adapter, throwing actionable errors when WebGPU is
|
|
63
638
|
// unavailable. Returns a GPUAdapter suitable for `runGpuSimulation`.
|
|
64
639
|
export async function probeGpu() {
|
|
@@ -101,9 +676,12 @@ export async function probeGpu() {
|
|
|
101
676
|
// RK4 loop and resolves to a result shaped like `simulate_model`.
|
|
102
677
|
export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
103
678
|
const layout = prep.layout || {};
|
|
104
|
-
const nStates = prep
|
|
105
|
-
const yLen =
|
|
106
|
-
const rows =
|
|
679
|
+
const nStates = integerField(prep, 'n_states', 'GPU preparation');
|
|
680
|
+
const yLen = integerField(layout, 'y_len', 'GPU layout', 1);
|
|
681
|
+
const rows = integerField(layout, 'rows', 'GPU layout');
|
|
682
|
+
const pLen = integerField(layout, 'p_len', 'GPU layout');
|
|
683
|
+
const runtimeEventRoots = integerField(layout, 'runtime_event_roots', 'GPU layout');
|
|
684
|
+
u32Value(nStates, 'GPU preparation n_states');
|
|
107
685
|
if (rows === 0 || nStates === 0) {
|
|
108
686
|
throw new Error('Model has no continuous states to integrate on the GPU.');
|
|
109
687
|
}
|
|
@@ -113,28 +691,23 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
113
691
|
+ `states=${nStates}); this model is not supported yet.`
|
|
114
692
|
);
|
|
115
693
|
}
|
|
116
|
-
const tStart =
|
|
117
|
-
const tEnd =
|
|
118
|
-
const dt =
|
|
119
|
-
|
|
120
|
-
const
|
|
694
|
+
const tStart = finiteNumberField(prep, 't_start', 'GPU preparation');
|
|
695
|
+
const tEnd = finiteNumberField(prep, 't_end', 'GPU preparation');
|
|
696
|
+
const dt = finiteNumberField(prep, 'dt', 'GPU preparation');
|
|
697
|
+
const steps = simulationStepCount(tStart, tEnd, dt);
|
|
698
|
+
const schedules = gpuKernelSchedules(layout);
|
|
121
699
|
|
|
122
700
|
const device = await adapter.requestDevice();
|
|
701
|
+
const maxWorkgroups = deviceWorkgroupLimit(device);
|
|
702
|
+
const kernelList = gpuKernelDispatchPlan(
|
|
703
|
+
schedules.derivative, 'GPU derivative kernel schedule', maxWorkgroups);
|
|
704
|
+
const implicitWorkgroups = gpuKernelWorkgroupBudget(
|
|
705
|
+
schedules.implicit, 'GPU implicit kernel schedule', maxWorkgroups);
|
|
123
706
|
onPhase('Parsing GPU kernels (WGSL)', null);
|
|
124
707
|
const derModule = await compileGpuModule(device, prep.wgsl, 'wgsl-solve');
|
|
125
708
|
const stageModule = await compileGpuModule(device, GPU_STAGE_WGSL, 'rk4-stage');
|
|
126
709
|
const combineModule = await compileGpuModule(device, GPU_COMBINE_WGSL, 'rk4-combine');
|
|
127
710
|
|
|
128
|
-
// Kernel inventory: stencil-family kernels + residual chunks from
|
|
129
|
-
// the layout manifest.
|
|
130
|
-
if (!Array.isArray(layout.kernels) || layout.kernels.length === 0) {
|
|
131
|
-
throw new Error(
|
|
132
|
-
'GPU layout manifest has no kernel inventory; the WASM package '
|
|
133
|
-
+ 'predates stencil emission. Rebuild it from the wgsl-backend '
|
|
134
|
-
+ 'sources (wasm-pack build crates/rumoca-bind-wasm).'
|
|
135
|
-
);
|
|
136
|
-
}
|
|
137
|
-
const kernelList = layout.kernels;
|
|
138
711
|
let pipelinesBuilt = 0;
|
|
139
712
|
onPhase(`Building GPU pipelines (0/${kernelList.length})`, 0);
|
|
140
713
|
const derPipelines = await Promise.all(
|
|
@@ -150,8 +723,6 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
150
723
|
return pipeline;
|
|
151
724
|
}))
|
|
152
725
|
);
|
|
153
|
-
const kernelWorkgroups = kernelList.map(
|
|
154
|
-
(kernel) => Math.max(1, Math.ceil((kernel.rows | 0) / 64)));
|
|
155
726
|
const axpyPipeline = await device.createComputePipelineAsync({
|
|
156
727
|
layout: 'auto', compute: { module: stageModule, entryPoint: 'axpy' },
|
|
157
728
|
});
|
|
@@ -161,12 +732,12 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
161
732
|
|
|
162
733
|
const storage = (len, label) => device.createBuffer({
|
|
163
734
|
label,
|
|
164
|
-
size:
|
|
735
|
+
size: storageByteSize(len, label),
|
|
165
736
|
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
|
|
166
737
|
});
|
|
167
738
|
const yBuf = storage(yLen, 'y');
|
|
168
739
|
const yStage = storage(yLen, 'y-stage');
|
|
169
|
-
const pBuf = storage(Math.max(
|
|
740
|
+
const pBuf = storage(Math.max(pLen, 1), 'p');
|
|
170
741
|
const kBufs = [0, 1, 2, 3].map((i) => storage(rows, `k${i + 1}`));
|
|
171
742
|
|
|
172
743
|
const timeUniform = device.createBuffer({
|
|
@@ -233,13 +804,18 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
233
804
|
],
|
|
234
805
|
});
|
|
235
806
|
|
|
236
|
-
const stageGroups =
|
|
807
|
+
const stageGroups = checkedWorkgroupCount(
|
|
808
|
+
nStates,
|
|
809
|
+
GPU_STAGE_WORKGROUP_SIZE,
|
|
810
|
+
'GPU RK4 stage',
|
|
811
|
+
maxWorkgroups,
|
|
812
|
+
);
|
|
237
813
|
const dispatchDer = (enc, stage) => {
|
|
238
814
|
const pass = enc.beginComputePass();
|
|
239
815
|
derPipelines.forEach((pipe, c) => {
|
|
240
816
|
pass.setPipeline(pipe);
|
|
241
817
|
pass.setBindGroup(0, derBinds[stage][c]);
|
|
242
|
-
pass.dispatchWorkgroups(
|
|
818
|
+
pass.dispatchWorkgroups(kernelList[c].workgroups);
|
|
243
819
|
});
|
|
244
820
|
pass.end();
|
|
245
821
|
};
|
|
@@ -251,8 +827,9 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
251
827
|
pass.end();
|
|
252
828
|
};
|
|
253
829
|
|
|
830
|
+
const yReadBytes = checkedMetadataMul(yLen, 4, 'y readback byte size');
|
|
254
831
|
const readback = device.createBuffer({
|
|
255
|
-
size: Math.max(16,
|
|
832
|
+
size: Math.max(16, yReadBytes),
|
|
256
833
|
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
|
|
257
834
|
});
|
|
258
835
|
const writeTime = (t) => device.queue.writeBuffer(
|
|
@@ -293,7 +870,7 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
293
870
|
writeTime(t + dt);
|
|
294
871
|
dispatchDer(enc4, 3);
|
|
295
872
|
dispatchStage(enc4, combinePipeline, combineBind);
|
|
296
|
-
enc4.copyBufferToBuffer(yBuf, 0, readback, 0,
|
|
873
|
+
enc4.copyBufferToBuffer(yBuf, 0, readback, 0, yReadBytes);
|
|
297
874
|
device.queue.submit([enc4.finish()]);
|
|
298
875
|
await readback.mapAsync(GPUMapMode.READ);
|
|
299
876
|
samples.push(Array.from(new Float32Array(readback.getMappedRange())));
|
|
@@ -308,21 +885,10 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
308
885
|
}
|
|
309
886
|
const gpuSeconds = (performance.now() - wallStart) / 1000;
|
|
310
887
|
|
|
311
|
-
// Shape the result like simulate_model so plots and viz scripts work
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
// models keep their element naming.
|
|
316
|
-
const names = new Array(yLen).fill(null);
|
|
317
|
-
for (const [name, slot] of Object.entries(layout.bindings || {})) {
|
|
318
|
-
if (!slot || slot.kind !== 'y' || slot.index >= yLen) {
|
|
319
|
-
continue;
|
|
320
|
-
}
|
|
321
|
-
const existing = names[slot.index];
|
|
322
|
-
if (!existing || (!existing.includes('[') && name.includes('['))) {
|
|
323
|
-
names[slot.index] = name;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
888
|
+
// Shape the result like simulate_model so plots and viz scripts work unchanged.
|
|
889
|
+
const names = Array.isArray(prepNow.state_names)
|
|
890
|
+
? prepNow.state_names.slice(0, yLen)
|
|
891
|
+
: [];
|
|
326
892
|
for (let i = 0; i < yLen; i++) {
|
|
327
893
|
if (!names[i]) names[i] = `y[${i}]`;
|
|
328
894
|
}
|
|
@@ -330,7 +896,7 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
330
896
|
for (let i = 0; i < yLen; i++) {
|
|
331
897
|
allData.push(samples.map((row) => row[i]));
|
|
332
898
|
}
|
|
333
|
-
const eventNote =
|
|
899
|
+
const eventNote = runtimeEventRoots > 0
|
|
334
900
|
? ' · events frozen (GPU v1)' : '';
|
|
335
901
|
return {
|
|
336
902
|
payload: {
|
|
@@ -342,7 +908,14 @@ export async function buildGpuProgram(adapter, prep, onPhase = () => {}) {
|
|
|
342
908
|
requested: { solver: `wgsl-solve RK4 (f32)${eventNote}`, t_start: tStart, t_end: tEnd, dt },
|
|
343
909
|
},
|
|
344
910
|
},
|
|
345
|
-
metrics: {
|
|
911
|
+
metrics: {
|
|
912
|
+
simulateSeconds: gpuSeconds,
|
|
913
|
+
derivativeKernels: kernelList.length,
|
|
914
|
+
derivativeWorkgroups: workgroupTotal(
|
|
915
|
+
kernelList, 'GPU derivative kernel schedule'),
|
|
916
|
+
implicitKernels: schedules.implicit.length,
|
|
917
|
+
implicitWorkgroups,
|
|
918
|
+
},
|
|
346
919
|
};
|
|
347
920
|
}
|
|
348
921
|
|