@simulatte/webgpu 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/README.md +263 -71
  3. package/api-contract.md +70 -139
  4. package/assets/package-layers.svg +63 -0
  5. package/examples/direct-webgpu/compute-dispatch.js +66 -0
  6. package/examples/direct-webgpu/explicit-bind-group.js +85 -0
  7. package/examples/direct-webgpu/request-device.js +10 -0
  8. package/examples/doe-api/buffers-readback.js +9 -0
  9. package/examples/doe-api/compile-and-dispatch.js +30 -0
  10. package/examples/doe-api/compute-dispatch.js +25 -0
  11. package/examples/doe-routines/compute-once-like-input.js +36 -0
  12. package/examples/doe-routines/compute-once-matmul.js +53 -0
  13. package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
  14. package/examples/doe-routines/compute-once.js +23 -0
  15. package/headless-webgpu-comparison.md +2 -2
  16. package/layering-plan.md +1 -1
  17. package/native/doe_napi.c +102 -12
  18. package/package.json +2 -1
  19. package/prebuilds/darwin-arm64/doe_napi.node +0 -0
  20. package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
  21. package/prebuilds/darwin-arm64/metadata.json +6 -6
  22. package/prebuilds/linux-x64/doe_napi.node +0 -0
  23. package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
  24. package/prebuilds/linux-x64/metadata.json +5 -5
  25. package/scripts/generate-readme-assets.js +79 -6
  26. package/scripts/prebuild.js +23 -19
  27. package/src/auto_bind_group_layout.js +32 -0
  28. package/src/bun-ffi.js +93 -12
  29. package/src/bun.js +23 -2
  30. package/src/compute.d.ts +2 -1
  31. package/src/compute.js +671 -33
  32. package/src/doe.d.ts +127 -27
  33. package/src/doe.js +480 -114
  34. package/src/full.d.ts +8 -1
  35. package/src/full.js +28 -3
  36. package/src/index.js +1013 -38
@@ -0,0 +1,27 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+
5
+ const result = await gpu.compute.once({
6
+ code: `
7
+ @group(0) @binding(0) var<storage, read> lhs: array<f32>;
8
+ @group(0) @binding(1) var<storage, read> rhs: array<f32>;
9
+ @group(0) @binding(2) var<storage, read_write> dst: array<f32>;
10
+
11
+ @compute @workgroup_size(4)
12
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
13
+ let i = gid.x;
14
+ dst[i] = lhs[i] + rhs[i];
15
+ }
16
+ `,
17
+ inputs: [
18
+ new Float32Array([1, 2, 3, 4]),
19
+ new Float32Array([10, 20, 30, 40]),
20
+ ],
21
+ output: {
22
+ type: Float32Array,
23
+ },
24
+ workgroups: 1,
25
+ });
26
+
27
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,23 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+
5
+ const result = await gpu.compute.once({
6
+ code: `
7
+ @group(0) @binding(0) var<storage, read> src: array<f32>;
8
+ @group(0) @binding(1) var<storage, read_write> dst: array<f32>;
9
+
10
+ @compute @workgroup_size(4)
11
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
12
+ let i = gid.x;
13
+ dst[i] = src[i] * 3.0;
14
+ }
15
+ `,
16
+ inputs: [new Float32Array([1, 2, 3, 4])],
17
+ output: {
18
+ type: Float32Array,
19
+ },
20
+ workgroups: 1,
21
+ });
22
+
23
+ console.log(JSON.stringify(Array.from(result)));
@@ -7,7 +7,7 @@ This document outlines qualitative differences and target use-cases for headless
7
7
  | **Underlying Engine** | `libwebgpu_doe` (Zig + Lean pipeline) | Google Dawn (C++) | Google Dawn (C++) |
8
8
  | **Primary Focus** | Deterministic Compute, ML/AI, Verifiability | Browser Parity, Graphics | Browser Parity, Graphics |
9
9
  | **Binary Footprint** | Smaller targeted runtime expected | Varies by build/distribution | Varies by build/distribution |
10
- | **JS Binding Layer** | Node-API (N-API); experimental Bun FFI implementation also exists | Node-API (N-API) | Bun FFI (Fast Foreign Function) |
10
+ | **JS Binding Layer** | Node addon-backed path; Bun uses FFI on Linux and full/addon-backed path on macOS today | Node-API (N-API) | Bun FFI (Fast Foreign Function) |
11
11
  | **Security Model** | Explicit schema/gate discipline in Fawn pipeline | Runtime heuristics + Dawn validation | Runtime heuristics + Dawn validation |
12
12
  | **Resource Allocation** | Arena-backed, predictable memory | General WebGPU async allocations | General WebGPU async allocations |
13
13
  | **WebGPU Spec Compliance**| Compute-prioritized subset target | Broad Chromium-aligned coverage | Broad Chromium-aligned coverage |
@@ -17,7 +17,7 @@ This document outlines qualitative differences and target use-cases for headless
17
17
  ## Architectural Takeaways for Fawn
18
18
 
19
19
  1. Determinism and fail-fast contracts are the intended Doe value proposition for benchmarking workflows.
20
- 2. The package currently defaults Bun to the addon-backed runtime for correctness parity. The separate Bun FFI path may reduce wrapper overhead later, but end-to-end results must be measured per workload.
20
+ 2. The package currently uses a platform-split Bun path: Linux uses the direct FFI route, while macOS uses the addon-backed full path for correctness parity. FFI may reduce wrapper overhead, but end-to-end results must be measured per workload.
21
21
  3. Distribution size and startup claims must be backed by measured artifacts before release claims.
22
22
 
23
23
  ## Ecosystem reference: official/community competitors and stats
package/layering-plan.md CHANGED
@@ -37,7 +37,7 @@ Current reality:
37
37
  4. Canonical texture command handling now lives in `zig/src/core/resource/wgpu_texture_commands.zig`; canonical sampler and surface command handling now lives in `zig/src/full/render/wgpu_sampler_commands.zig` and `zig/src/full/surface/wgpu_surface_commands.zig`.
38
38
  5. `zig/src/wgpu_commands.zig`, `zig/src/wgpu_resources.zig`, and `zig/src/wgpu_extended_commands.zig` are now compatibility façades over the canonical subtrees, while `zig/src/webgpu_ffi.zig` remains the public façade and owner of `WebGPUBackend`.
39
39
  6. Dedicated Zig test lanes now exist as `zig build test-core` and `zig build test-full`, but split coverage remains thin and capability tracking is still represented by one shared coverage ledger.
40
- 7. The JS package now exposes a default `full` surface plus an explicit `compute` subpath, while the underlying JS implementation is still shared.
40
+ 7. The JS package now exposes a default `full` surface plus an explicit `compute` subpath, while the underlying JS implementation is still shared and presented through `Direct WebGPU`, `Doe API`, and `Doe routines` styles.
41
41
 
42
42
  That means this plan is now materially physicalized in the tree, and the remaining semantic split is concentrated in the public façade files and backend roots.
43
43
 
package/native/doe_napi.c CHANGED
@@ -60,6 +60,8 @@ typedef uint32_t WGPUBool;
60
60
  #define WGPU_WHOLE_SIZE UINT64_MAX
61
61
  #define WGPU_STYPE_SHADER_SOURCE_WGSL 0x00000002
62
62
  #define WGPU_WAIT_STATUS_SUCCESS 1
63
+ #define WGPU_WAIT_STATUS_TIMED_OUT 2
64
+ #define WGPU_WAIT_STATUS_ERROR 3
63
65
  #define WGPU_MAP_ASYNC_STATUS_SUCCESS 1
64
66
  #define WGPU_REQUEST_STATUS_SUCCESS 1
65
67
  #define WGPU_CALLBACK_MODE_ALLOW_PROCESS_EVENTS 2
@@ -824,6 +826,11 @@ typedef struct {
824
826
  uint32_t done;
825
827
  } BufferMapResult;
826
828
 
829
+ typedef struct {
830
+ uint32_t status;
831
+ uint32_t done;
832
+ } QueueWorkDoneResult;
833
+
827
834
  static void buffer_map_callback(uint32_t status, WGPUStringView message,
828
835
  void* userdata1, void* userdata2) {
829
836
  (void)message; (void)userdata2;
@@ -832,6 +839,14 @@ static void buffer_map_callback(uint32_t status, WGPUStringView message,
832
839
  r->done = 1;
833
840
  }
834
841
 
842
+ static void queue_work_done_callback(uint32_t status, WGPUStringView message,
843
+ void* userdata1, void* userdata2) {
844
+ (void)message; (void)userdata2;
845
+ QueueWorkDoneResult* r = (QueueWorkDoneResult*)userdata1;
846
+ r->status = status;
847
+ r->done = 1;
848
+ }
849
+
835
850
  /* bufferMapSync(instance, buffer, mode, offset, size) */
836
851
  static napi_value doe_buffer_map_sync(napi_env env, napi_callback_info info) {
837
852
  NAPI_ASSERT_ARGC(env, info, 5);
@@ -1387,16 +1402,58 @@ static napi_value doe_queue_write_buffer(napi_env env, napi_callback_info info)
1387
1402
  return NULL;
1388
1403
  }
1389
1404
 
1390
- /* queueFlush(queue) — wait for all pending GPU work to complete.
1391
- * Calls doeNativeQueueFlush directly (semaphore wait on pending command buffer)
1392
- * instead of routing through wgpuQueueOnSubmittedWorkDone (immediate no-op in Doe). */
1405
+ /* queueFlush(instance, queue) — wait for all pending GPU work to complete.
1406
+ * Use the Doe-native queue flush when available; otherwise fall back to the
1407
+ * portable queue work-done callback path and process events until completion. */
1393
1408
  static napi_value doe_queue_flush(napi_env env, napi_callback_info info) {
1394
- NAPI_ASSERT_ARGC(env, info, 1);
1409
+ NAPI_ASSERT_ARGC(env, info, 2);
1395
1410
  CHECK_LIB_LOADED(env);
1396
- WGPUQueue queue = unwrap_ptr(env, _args[0]);
1411
+ WGPUInstance inst = unwrap_ptr(env, _args[0]);
1412
+ WGPUQueue queue = unwrap_ptr(env, _args[1]);
1397
1413
  if (!queue) NAPI_THROW(env, "queueFlush requires queue");
1398
- if (!pfn_doeNativeQueueFlush) NAPI_THROW(env, "queueFlush: doeNativeQueueFlush not available");
1399
- pfn_doeNativeQueueFlush(queue);
1414
+ if (pfn_doeNativeQueueFlush) {
1415
+ pfn_doeNativeQueueFlush(queue);
1416
+ return NULL;
1417
+ }
1418
+ if (!inst) NAPI_THROW(env, "queueFlush requires instance when doeNativeQueueFlush is unavailable");
1419
+
1420
+ QueueWorkDoneResult result = {0, 0};
1421
+ WGPUQueueWorkDoneCallbackInfo cb_info = {
1422
+ .nextInChain = NULL,
1423
+ .mode = WGPU_CALLBACK_MODE_WAIT_ANY_ONLY,
1424
+ .callback = queue_work_done_callback,
1425
+ .userdata1 = &result,
1426
+ .userdata2 = NULL,
1427
+ };
1428
+
1429
+ WGPUFuture future = pfn_wgpuQueueOnSubmittedWorkDone(queue, cb_info);
1430
+ if (future.id == 0) NAPI_THROW(env, "queueFlush: queue work-done future unavailable");
1431
+ uint64_t start_ns = monotonic_now_ns();
1432
+ while (!result.done) {
1433
+ WGPUFutureWaitInfo wait_info = {
1434
+ .future = future,
1435
+ .completed = 0,
1436
+ };
1437
+ uint32_t wait_status = pfn_wgpuInstanceWaitAny(inst, 1, &wait_info, 0);
1438
+ if (wait_status == WGPU_WAIT_STATUS_SUCCESS) {
1439
+ if (!result.done) {
1440
+ pfn_wgpuInstanceProcessEvents(inst);
1441
+ }
1442
+ } else if (wait_status == WGPU_WAIT_STATUS_TIMED_OUT) {
1443
+ pfn_wgpuInstanceProcessEvents(inst);
1444
+ if (monotonic_now_ns() - start_ns >= DOE_DEFAULT_TIMEOUT_NS) {
1445
+ NAPI_THROW(env, "queueFlush: queue wait timed out");
1446
+ }
1447
+ wait_slice();
1448
+ } else if (wait_status == WGPU_WAIT_STATUS_ERROR) {
1449
+ NAPI_THROW(env, "queueFlush: wgpuInstanceWaitAny failed");
1450
+ } else {
1451
+ NAPI_THROW(env, "queueFlush: unsupported wait status");
1452
+ }
1453
+ }
1454
+ if (result.status != WGPU_QUEUE_WORK_DONE_STATUS_SUCCESS) {
1455
+ NAPI_THROW(env, "queueFlush: queue work did not complete");
1456
+ }
1400
1457
  return NULL;
1401
1458
  }
1402
1459
 
@@ -1547,7 +1604,6 @@ static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_i
1547
1604
  uint64_t copy_dst_off = (uint64_t)copy_dst_off_i;
1548
1605
  uint64_t copy_size = (uint64_t)copy_size_i;
1549
1606
  if (!device || !queue || !pipeline) NAPI_THROW(env, "submitComputeDispatchCopy requires device, queue, and pipeline");
1550
- if (!pfn_doeNativeComputeDispatchFlush) NAPI_THROW(env, "submitComputeDispatchCopy: doeNativeComputeDispatchFlush not available");
1551
1607
 
1552
1608
  uint32_t bg_count = 0;
1553
1609
  napi_get_array_length(env, bgs, &bg_count);
@@ -1559,10 +1615,44 @@ static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_i
1559
1615
  bg_ptrs[j] = unwrap_ptr(env, bg_val);
1560
1616
  }
1561
1617
 
1562
- pfn_doeNativeComputeDispatchFlush(
1563
- queue, pipeline, (void**)bg_ptrs, bg_count,
1564
- dx, dy, dz,
1565
- copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
1618
+ if (pfn_doeNativeComputeDispatchFlush) {
1619
+ pfn_doeNativeComputeDispatchFlush(
1620
+ queue, pipeline, (void**)bg_ptrs, bg_count,
1621
+ dx, dy, dz,
1622
+ copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
1623
+ return NULL;
1624
+ }
1625
+
1626
+ WGPUCommandEncoder encoder = pfn_wgpuDeviceCreateCommandEncoder(device, NULL);
1627
+ if (!encoder) NAPI_THROW(env, "submitComputeDispatchCopy: createCommandEncoder failed");
1628
+ WGPUComputePassEncoder pass = pfn_wgpuCommandEncoderBeginComputePass(encoder, NULL);
1629
+ if (!pass) {
1630
+ pfn_wgpuCommandEncoderRelease(encoder);
1631
+ NAPI_THROW(env, "submitComputeDispatchCopy: beginComputePass failed");
1632
+ }
1633
+ pfn_wgpuComputePassEncoderSetPipeline(pass, pipeline);
1634
+ for (uint32_t j = 0; j < bg_count; j++) {
1635
+ if (bg_ptrs[j]) pfn_wgpuComputePassEncoderSetBindGroup(pass, j, bg_ptrs[j], 0, NULL);
1636
+ }
1637
+ pfn_wgpuComputePassEncoderDispatchWorkgroups(pass, dx, dy, dz);
1638
+ pfn_wgpuComputePassEncoderEnd(pass);
1639
+ pfn_wgpuComputePassEncoderRelease(pass);
1640
+ pfn_wgpuCommandEncoderCopyBufferToBuffer(
1641
+ encoder,
1642
+ copy_src,
1643
+ copy_src_off,
1644
+ copy_dst,
1645
+ copy_dst_off,
1646
+ copy_size
1647
+ );
1648
+ WGPUCommandBuffer cmd_buf = pfn_wgpuCommandEncoderFinish(encoder, NULL);
1649
+ if (!cmd_buf) {
1650
+ pfn_wgpuCommandEncoderRelease(encoder);
1651
+ NAPI_THROW(env, "submitComputeDispatchCopy: finish failed");
1652
+ }
1653
+ pfn_wgpuQueueSubmit(queue, 1, &cmd_buf);
1654
+ pfn_wgpuCommandBufferRelease(cmd_buf);
1655
+ pfn_wgpuCommandEncoderRelease(encoder);
1566
1656
  return NULL;
1567
1657
  }
1568
1658
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@simulatte/webgpu",
3
- "version": "0.2.4",
3
+ "version": "0.3.1",
4
4
  "description": "Headless WebGPU runtime for Node.js and Bun, powered by Doe",
5
5
  "type": "module",
6
6
  "main": "./src/node-runtime.js",
@@ -35,6 +35,7 @@
35
35
  "files": [
36
36
  "assets/",
37
37
  "bin/",
38
+ "examples/",
38
39
  "src/",
39
40
  "scripts/",
40
41
  "native/",
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "schemaVersion": 1,
3
3
  "package": "@simulatte/webgpu",
4
- "packageVersion": "0.2.3",
4
+ "packageVersion": "0.3.1",
5
5
  "platform": "darwin",
6
6
  "arch": "arm64",
7
7
  "nodeNapiVersion": 8,
8
- "doeVersion": "68a193c88",
8
+ "doeVersion": "dae96664a",
9
9
  "doeBuild": {
10
10
  "artifact": "libwebgpu_doe",
11
11
  "leanVerifiedBuild": false,
@@ -13,14 +13,14 @@
13
13
  },
14
14
  "files": {
15
15
  "doe_napi.node": {
16
- "sha256": "472c753c5c5bd82b60444bfcc1d3837bdbd40d1fce1b8281e6c706043bb64a84"
16
+ "sha256": "4baee8a48487e8725b7b6bbccd269da4b4868d28c4d4a26e8ee075f3a8d1e75a"
17
17
  },
18
18
  "libwebgpu_doe.dylib": {
19
- "sha256": "d9a66fa8fad7a8e50736778329ea5c011457c814238f96ecf230eb8eb97bcc64"
19
+ "sha256": "1598ea11e54c7087ebe9020c07b5bbc0f7d376927b20edac3ed9ca08a0fe6789"
20
20
  },
21
21
  "libwebgpu_dawn.dylib": {
22
- "sha256": "22751faeb459e7a2ec778c0410ca122e23c23366eb3da145c651d1d43e26707d"
22
+ "sha256": "828fb9ef95b08b4192cb9e84392e432e0d0a4ebca90df678e076e2e3c731b82e"
23
23
  }
24
24
  },
25
- "builtAt": "2026-03-10T17:19:18.720Z"
25
+ "builtAt": "2026-03-11T18:42:09.399Z"
26
26
  }
Binary file
@@ -1,11 +1,11 @@
1
1
  {
2
2
  "schemaVersion": 1,
3
3
  "package": "@simulatte/webgpu",
4
- "packageVersion": "0.2.3",
4
+ "packageVersion": "0.3.1",
5
5
  "platform": "linux",
6
6
  "arch": "x64",
7
7
  "nodeNapiVersion": 8,
8
- "doeVersion": "b09d34586",
8
+ "doeVersion": "1010cdc41",
9
9
  "doeBuild": {
10
10
  "artifact": "libwebgpu_doe",
11
11
  "leanVerifiedBuild": false,
@@ -13,14 +13,14 @@
13
13
  },
14
14
  "files": {
15
15
  "doe_napi.node": {
16
- "sha256": "21475bcd04b499e1a0ed6e75d8af2a7bead08d365ddfb09708d509d0de62bf28"
16
+ "sha256": "08e6019b07dee38ac7140c27d4c1e10237fec5778e68506fcf85ea85dee797ec"
17
17
  },
18
18
  "libwebgpu_doe.so": {
19
- "sha256": "28c5da84da65a5d6f4a3a74b9757279cddee738aaa93fe8ece6e358d4d94cdb1"
19
+ "sha256": "ee4f0fcd43751574bc5570492b27a911408e31c90b2c91bd4ee4af09a8f7f5cb"
20
20
  },
21
21
  "libwebgpu_dawn.so": {
22
22
  "sha256": "9d329301f59fbe85a190cee4faacde97f9c991b07264a18a6750b890899cc417"
23
23
  }
24
24
  },
25
- "builtAt": "2026-03-10T21:30:16.241Z"
25
+ "builtAt": "2026-03-11T18:05:22.324Z"
26
26
  }
@@ -8,7 +8,8 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
8
8
  const PACKAGE_ROOT = resolve(__dirname, '..');
9
9
  const WORKSPACE_ROOT = resolve(PACKAGE_ROOT, '..', '..');
10
10
  const CUBE_SUMMARY_PATH = resolve(WORKSPACE_ROOT, 'bench', 'out', 'cube', 'latest', 'cube.summary.json');
11
- const OUTPUT_PATH = resolve(PACKAGE_ROOT, 'assets', 'package-surface-cube-snapshot.svg');
11
+ const CUBE_OUTPUT_PATH = resolve(PACKAGE_ROOT, 'assets', 'package-surface-cube-snapshot.svg');
12
+ const LAYERS_OUTPUT_PATH = resolve(PACKAGE_ROOT, 'assets', 'package-layers.svg');
12
13
 
13
14
  const UI_FONT = '"Segoe UI", "Helvetica Neue", Arial, sans-serif';
14
15
  const MONO_FONT = 'SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace';
@@ -256,15 +257,87 @@ ${renderSurfaceCard(SURFACE_SPECS[1], bunCells, 640)}
256
257
  `;
257
258
  }
258
259
 
260
+ function renderLayersSvg() {
261
+ return `<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="470" viewBox="0 0 1200 470" role="img" aria-labelledby="layers-title layers-desc">
262
+ <title id="layers-title">@simulatte/webgpu layered package graph</title>
263
+ <desc id="layers-desc">Layered package graph showing direct WebGPU, Doe API, and Doe routines over the same package surfaces.</desc>
264
+ <defs>
265
+ <linearGradient id="layers-bg" x1="0%" y1="0%" x2="100%" y2="100%">
266
+ <stop offset="0%" stop-color="#050816"/>
267
+ <stop offset="100%" stop-color="#140c1f"/>
268
+ </linearGradient>
269
+ <radialGradient id="layers-glow-top" cx="25%" cy="18%" r="55%">
270
+ <stop offset="0%" stop-color="#ef444430"/>
271
+ <stop offset="55%" stop-color="#7c3aed18"/>
272
+ <stop offset="100%" stop-color="#00000000"/>
273
+ </radialGradient>
274
+ <radialGradient id="layers-glow-bottom" cx="78%" cy="84%" r="52%">
275
+ <stop offset="0%" stop-color="#f59e0b26"/>
276
+ <stop offset="60%" stop-color="#f9731618"/>
277
+ <stop offset="100%" stop-color="#00000000"/>
278
+ </radialGradient>
279
+ <linearGradient id="layers-root" x1="0%" y1="0%" x2="100%" y2="100%">
280
+ <stop offset="0%" stop-color="#7c3aed"/>
281
+ <stop offset="100%" stop-color="#ef4444"/>
282
+ </linearGradient>
283
+ <linearGradient id="layers-direct" x1="0%" y1="0%" x2="100%" y2="100%">
284
+ <stop offset="0%" stop-color="#ef4444"/>
285
+ <stop offset="100%" stop-color="#f97316"/>
286
+ </linearGradient>
287
+ <linearGradient id="layers-api" x1="0%" y1="0%" x2="100%" y2="100%">
288
+ <stop offset="0%" stop-color="#f97316"/>
289
+ <stop offset="100%" stop-color="#f59e0b"/>
290
+ </linearGradient>
291
+ <linearGradient id="layers-routines" x1="0%" y1="0%" x2="100%" y2="100%">
292
+ <stop offset="0%" stop-color="#f59e0b"/>
293
+ <stop offset="100%" stop-color="#eab308"/>
294
+ </linearGradient>
295
+ <filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
296
+ <feDropShadow dx="0" dy="10" stdDeviation="14" flood-color="#000000" flood-opacity="0.32"/>
297
+ </filter>
298
+ <style>
299
+ .title { font: 700 34px ${UI_FONT}; fill: #ffffff; ${TEXT_STROKE} }
300
+ .subtitle { font: 500 18px ${UI_FONT}; fill: #cbd5e1; ${TEXT_STROKE} }
301
+ .nodeTitle { font: 700 22px ${UI_FONT}; fill: #ffffff; ${TEXT_STROKE} }
302
+ .box { stroke-width: 2.5; filter: url(#shadow); }
303
+ </style>
304
+ </defs>
305
+ <rect width="1200" height="470" fill="url(#layers-bg)"/>
306
+ <rect width="1200" height="470" fill="url(#layers-glow-top)"/>
307
+ <rect width="1200" height="470" fill="url(#layers-glow-bottom)"/>
308
+ <text x="64" y="62" class="title">Same package, four layers</text>
309
+ <text x="64" y="94" class="subtitle">The package surface stays the same while the API gets progressively higher-level.</text>
310
+
311
+ <rect x="170" y="122" width="860" height="64" rx="20" fill="url(#layers-root)" stroke="#c4b5fd" class="box"/>
312
+ <text x="600" y="162" text-anchor="middle" class="nodeTitle">@simulatte/webgpu / @simulatte/webgpu/compute</text>
313
+
314
+ <rect x="220" y="222" width="760" height="52" rx="18" fill="url(#layers-direct)" stroke="#fca5a5" class="box"/>
315
+ <text x="600" y="255" text-anchor="middle" class="nodeTitle">Direct WebGPU</text>
316
+
317
+ <rect x="280" y="310" width="640" height="52" rx="18" fill="url(#layers-api)" stroke="#fdba74" class="box"/>
318
+ <text x="600" y="343" text-anchor="middle" class="nodeTitle">Doe API</text>
319
+
320
+ <rect x="360" y="398" width="480" height="52" rx="18" fill="url(#layers-routines)" stroke="#fde68a" class="box"/>
321
+ <text x="600" y="431" text-anchor="middle" class="nodeTitle">Doe routines</text>
322
+ </svg>
323
+ `;
324
+ }
325
+
259
326
  function main() {
260
327
  const summary = readCubeSummary(CUBE_SUMMARY_PATH);
261
- const svg = renderSvg(summary);
262
- mkdirSync(dirname(OUTPUT_PATH), { recursive: true });
328
+ const cubeSvg = renderSvg(summary);
329
+ const layersSvg = renderLayersSvg();
330
+ mkdirSync(dirname(CUBE_OUTPUT_PATH), { recursive: true });
331
+ writeFileSync(
332
+ CUBE_OUTPUT_PATH,
333
+ `<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${cubeSvg}`,
334
+ );
335
+ console.log(`Wrote ${CUBE_OUTPUT_PATH}`);
263
336
  writeFileSync(
264
- OUTPUT_PATH,
265
- `<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${svg}`,
337
+ LAYERS_OUTPUT_PATH,
338
+ `<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${layersSvg}`,
266
339
  );
267
- console.log(`Wrote ${OUTPUT_PATH}`);
340
+ console.log(`Wrote ${LAYERS_OUTPUT_PATH}`);
268
341
  }
269
342
 
270
343
  main();
@@ -60,7 +60,7 @@ function copyArtifact(src, destName) {
60
60
  const dest = resolve(prebuildDir, destName);
61
61
  copyFileSync(src, dest);
62
62
  console.log(` ${destName} <- ${src}`);
63
- return { name: destName, sha256: sha256(dest) };
63
+ return { name: destName, path: dest };
64
64
  }
65
65
 
66
66
  // 1. Build addon if needed.
@@ -122,13 +122,31 @@ console.log(`\nAssembling prebuilds/${platform}-${arch}/`);
122
122
 
123
123
  const files = {};
124
124
  const addonEntry = copyArtifact(addonSrc, 'doe_napi.node');
125
- if (addonEntry) files[addonEntry.name] = { sha256: addonEntry.sha256 };
125
+ if (addonEntry) files[addonEntry.name] = { path: addonEntry.path };
126
126
 
127
127
  const doeEntry = copyArtifact(doeLib, `libwebgpu_doe.${ext}`);
128
- if (doeEntry) files[doeEntry.name] = { sha256: doeEntry.sha256 };
128
+ if (doeEntry) files[doeEntry.name] = { path: doeEntry.path };
129
129
 
130
130
  const sidecarEntry = copyArtifact(sidecarSrc, sidecarName);
131
- if (sidecarEntry) files[sidecarEntry.name] = { sha256: sidecarEntry.sha256 };
131
+ if (sidecarEntry) files[sidecarEntry.name] = { path: sidecarEntry.path };
132
+
133
+ // macOS: ad-hoc sign dylibs for distribution.
134
+ if (platform === 'darwin') {
135
+ console.log('\nSigning dylibs (ad-hoc)...');
136
+ for (const name of Object.keys(files)) {
137
+ if (name.endsWith('.dylib')) {
138
+ try {
139
+ execFileSync('codesign', ['-s', '-', resolve(prebuildDir, name)], { stdio: 'inherit' });
140
+ } catch {
141
+ console.warn(` Warning: codesign failed for ${name} (may already be signed)`);
142
+ }
143
+ }
144
+ }
145
+ }
146
+
147
+ const metadataFiles = Object.fromEntries(
148
+ Object.entries(files).map(([name, entry]) => [name, { sha256: sha256(entry.path) }]),
149
+ );
132
150
 
133
151
  // 5. Write metadata manifest.
134
152
  const pkg = JSON.parse(readFileSync(resolve(PACKAGE_ROOT, 'package.json'), 'utf8'));
@@ -153,7 +171,7 @@ const metadata = {
153
171
  leanVerifiedBuild: doeBuild.leanVerifiedBuild,
154
172
  proofArtifactSha256: doeBuild.proofArtifactSha256,
155
173
  },
156
- files,
174
+ files: metadataFiles,
157
175
  builtAt: new Date().toISOString(),
158
176
  };
159
177
 
@@ -161,19 +179,5 @@ const metadataPath = resolve(prebuildDir, 'metadata.json');
161
179
  writeFileSync(metadataPath, JSON.stringify(metadata, null, 2) + '\n');
162
180
  console.log(` metadata.json`);
163
181
 
164
- // macOS: ad-hoc sign dylibs for distribution.
165
- if (platform === 'darwin') {
166
- console.log('\nSigning dylibs (ad-hoc)...');
167
- for (const name of Object.keys(files)) {
168
- if (name.endsWith('.dylib')) {
169
- try {
170
- execFileSync('codesign', ['-s', '-', resolve(prebuildDir, name)], { stdio: 'inherit' });
171
- } catch {
172
- console.warn(` Warning: codesign failed for ${name} (may already be signed)`);
173
- }
174
- }
175
- }
176
- }
177
-
178
182
  console.log(`\nDone. Prebuild artifacts in prebuilds/${platform}-${arch}/`);
179
183
  console.log(`Total files: ${Object.keys(files).length}`);
@@ -0,0 +1,32 @@
1
+ export function inferAutoBindGroupLayouts(code, visibility) {
2
+ const groups = new Map();
3
+ const bindingPattern = /@group\((\d+)\)\s*@binding\((\d+)\)\s*var(?:<([^>]+)>)?\s+\w+\s*:\s*([^;]+);/g;
4
+
5
+ for (const match of code.matchAll(bindingPattern)) {
6
+ const group = Number(match[1]);
7
+ const binding = Number(match[2]);
8
+ const addressSpace = (match[3] ?? '').trim();
9
+ const typeExpr = (match[4] ?? '').trim();
10
+ let entry = null;
11
+
12
+ if (addressSpace.startsWith('uniform')) {
13
+ entry = { binding, visibility, buffer: { type: 'uniform' } };
14
+ } else if (addressSpace.startsWith('storage')) {
15
+ const readOnly = !addressSpace.includes('read_write');
16
+ entry = { binding, visibility, buffer: { type: readOnly ? 'read-only-storage' : 'storage' } };
17
+ } else if (typeExpr.startsWith('sampler')) {
18
+ entry = { binding, visibility, sampler: {} };
19
+ }
20
+
21
+ if (!entry) continue;
22
+ const entries = groups.get(group) ?? [];
23
+ entries.push(entry);
24
+ groups.set(group, entries);
25
+ }
26
+
27
+ for (const entries of groups.values()) {
28
+ entries.sort((left, right) => left.binding - right.binding);
29
+ }
30
+
31
+ return groups;
32
+ }