npm - @simulatte/webgpu - Versions diffs - 0.2.1 → 0.2.3 - Mend

@simulatte/webgpu 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/API_CONTRACT.md +11 -1
package/CHANGELOG.md +82 -0
package/COMPAT_SCOPE.md +20 -6
package/LAYERING_PLAN.md +257 -0
package/README.md +242 -61
package/SUPPORT_CONTRACTS.md +353 -0
package/ZIG_SOURCE_INVENTORY.md +468 -0
package/assets/package-surface-cube-snapshot.svg +7 -7
package/headless-webgpu-comparison.md +3 -3
package/native/doe_napi.c +110 -17
package/package.json +7 -3
package/prebuilds/darwin-arm64/doe_napi.node +0 -0
package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
package/prebuilds/darwin-arm64/metadata.json +5 -5
package/prebuilds/linux-x64/doe_napi.node +0 -0
package/prebuilds/linux-x64/libwebgpu_dawn.so +0 -0
package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
package/prebuilds/linux-x64/metadata.json +26 -0
package/src/bun-ffi.js +3 -2
package/src/bun.js +2 -2
package/src/index.js +114 -15
package/src/runtime_cli.js +3 -1

package/native/doe_napi.c CHANGED Viewed

@@ -15,6 +15,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <stdint.h>
+#include <stdio.h>
 #ifdef _WIN32
 #include <windows.h>
@@ -884,6 +885,30 @@ static napi_value doe_buffer_get_mapped_range(napi_env env, napi_callback_info i
     return ab;
 }
+/* bufferAssertMappedPrefixF32(buffer, expected, count) */
+static napi_value doe_buffer_assert_mapped_prefix_f32(napi_env env, napi_callback_info info) {
+    NAPI_ASSERT_ARGC(env, info, 3);
+    CHECK_LIB_LOADED(env);
+    WGPUBuffer buf = unwrap_ptr(env, _args[0]);
+    double expected = 0.0;
+    uint32_t count = 0;
+    napi_get_value_double(env, _args[1], &expected);
+    napi_get_value_uint32(env, _args[2], &count);
+    if (!buf) NAPI_THROW(env, "bufferAssertMappedPrefixF32 requires buffer");
+    const float* mapped = (const float*)pfn_wgpuBufferGetConstMappedRange(buf, 0, count * sizeof(float));
+    if (!mapped) NAPI_THROW(env, "bufferAssertMappedPrefixF32: mapped range unavailable");
+    for (uint32_t i = 0; i < count; i++) {
+        if ((double)mapped[i] != expected) {
+            char msg[128];
+            snprintf(msg, sizeof(msg), "expected readback[%u] === %.0f, got %.9g", i, expected, (double)mapped[i]);
+            NAPI_THROW(env, msg);
+        }
+    }
+    napi_value ok;
+    napi_get_boolean(env, true, &ok);
+    return ok;
+}
 /* ================================================================
  * Shader Module
  * ================================================================ */
@@ -1376,8 +1401,8 @@ static napi_value doe_queue_flush(napi_env env, napi_callback_info info) {
 }
 /* submitBatched(device, queue, commandsArray)
- * Fast path: single dispatch + optional copy → doeNativeComputeDispatchFlush (direct Metal, no Zig command recording).
- * Fallback: standard wgpu path for multi-dispatch or unsupported patterns. */
+ * Fast path: single dispatch or dispatch+copy → doeNativeComputeDispatchFlush.
+ * Larger or mixed batches stay on the standard wgpu path. */
 #define BATCH_MAX_BIND_GROUPS 4
 static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
     NAPI_ASSERT_ARGC(env, info, 3);
@@ -1391,12 +1416,18 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
     napi_get_array_length(env, commands, &cmd_count);
     if (cmd_count == 0) return NULL;
-    /* Fast path: exactly 1 dispatch + 0-1 copy, and direct dispatch function available. */
-    if (pfn_doeNativeComputeDispatchFlush && cmd_count >= 1 && cmd_count <= 2) {
+    /* Fast path: exactly one dispatch, or dispatch followed by copy. */
+    if (pfn_doeNativeComputeDispatchFlush && (cmd_count == 1 || cmd_count == 2)) {
         napi_value cmd0;
         napi_get_element(env, commands, 0, &cmd0);
         uint32_t t0 = get_uint32_prop(env, cmd0, "t");
-        if (t0 == 0) {
+        uint32_t t1 = UINT32_MAX;
+        napi_value cmd1 = NULL;
+        if (cmd_count == 2) {
+            napi_get_element(env, commands, 1, &cmd1);
+            t1 = get_uint32_prop(env, cmd1, "t");
+        }
+        if (t0 == 0 && (cmd_count == 1 || t1 == 1)) {
             void* pipeline = unwrap_ptr(env, get_prop(env, cmd0, "p"));
             napi_value bgs = get_prop(env, cmd0, "bg");
             uint32_t bg_count = 0;
@@ -1411,20 +1442,17 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
             uint32_t dx = get_uint32_prop(env, cmd0, "x");
             uint32_t dy = get_uint32_prop(env, cmd0, "y");
             uint32_t dz = get_uint32_prop(env, cmd0, "z");
-            void* copy_src = NULL; uint64_t copy_src_off = 0;
-            void* copy_dst = NULL; uint64_t copy_dst_off = 0;
+            void* copy_src = NULL;
+            uint64_t copy_src_off = 0;
+            void* copy_dst = NULL;
+            uint64_t copy_dst_off = 0;
             uint64_t copy_size = 0;
             if (cmd_count == 2) {
-                napi_value cmd1;
-                napi_get_element(env, commands, 1, &cmd1);
-                if (get_uint32_prop(env, cmd1, "t") == 1) {
-                    copy_src = unwrap_ptr(env, get_prop(env, cmd1, "s"));
-                    copy_src_off = (uint64_t)get_int64_prop(env, cmd1, "so");
-                    copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
-                    copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
-                    copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
-                }
+                copy_src = unwrap_ptr(env, get_prop(env, cmd1, "s"));
+                copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
+                copy_src_off = (uint64_t)get_int64_prop(env, cmd1, "so");
+                copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
+                copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
             }
             pfn_doeNativeComputeDispatchFlush(
                 queue, pipeline, (void**)bg_ptrs, bg_count,
@@ -1435,6 +1463,16 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
     }
     /* Fallback: standard wgpu path. */
+    int flush_after_submit = 0;
+    if (cmd_count == 2) {
+        napi_value cmd0;
+        napi_value cmd1;
+        napi_get_element(env, commands, 0, &cmd0);
+        napi_get_element(env, commands, 1, &cmd1);
+        if (get_uint32_prop(env, cmd0, "t") == 0 && get_uint32_prop(env, cmd1, "t") == 1) {
+            flush_after_submit = 1;
+        }
+    }
     WGPUCommandEncoder encoder = pfn_wgpuDeviceCreateCommandEncoder(device, NULL);
     if (!encoder) NAPI_THROW(env, "submitBatched: createCommandEncoder failed");
     for (uint32_t i = 0; i < cmd_count; i++) {
@@ -1470,11 +1508,64 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
     }
     WGPUCommandBuffer cmd_buf = pfn_wgpuCommandEncoderFinish(encoder, NULL);
     pfn_wgpuQueueSubmit(queue, 1, &cmd_buf);
+    if (flush_after_submit && pfn_doeNativeQueueFlush) {
+        pfn_doeNativeQueueFlush(queue);
+    }
     pfn_wgpuCommandBufferRelease(cmd_buf);
     pfn_wgpuCommandEncoderRelease(encoder);
     return NULL;
 }
+/* submitComputeDispatchCopy(device, queue, pipeline, bindGroups, x, y, z, src, srcOff, dst, dstOff, size)
+ * Direct addon surface for the exact package compute_e2e shape so JS runtimes
+ * do not pay generic command-array parsing on every timed sample. */
+static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_info info) {
+    size_t argc = 12;
+    napi_value args[12];
+    napi_status status = napi_get_cb_info(env, info, &argc, args, NULL, NULL);
+    if (status != napi_ok || argc != 12) NAPI_THROW(env, "submitComputeDispatchCopy requires 12 arguments");
+    CHECK_LIB_LOADED(env);
+    WGPUDevice device = unwrap_ptr(env, args[0]);
+    WGPUQueue queue = unwrap_ptr(env, args[1]);
+    void* pipeline = unwrap_ptr(env, args[2]);
+    napi_value bgs = args[3];
+    uint32_t dx = 0;
+    uint32_t dy = 0;
+    uint32_t dz = 0;
+    int64_t copy_src_off_i = 0;
+    int64_t copy_dst_off_i = 0;
+    int64_t copy_size_i = 0;
+    napi_get_value_uint32(env, args[4], &dx);
+    napi_get_value_uint32(env, args[5], &dy);
+    napi_get_value_uint32(env, args[6], &dz);
+    void* copy_src = unwrap_ptr(env, args[7]);
+    napi_get_value_int64(env, args[8], &copy_src_off_i);
+    void* copy_dst = unwrap_ptr(env, args[9]);
+    napi_get_value_int64(env, args[10], &copy_dst_off_i);
+    napi_get_value_int64(env, args[11], &copy_size_i);
+    uint64_t copy_src_off = (uint64_t)copy_src_off_i;
+    uint64_t copy_dst_off = (uint64_t)copy_dst_off_i;
+    uint64_t copy_size = (uint64_t)copy_size_i;
+    if (!device || !queue || !pipeline) NAPI_THROW(env, "submitComputeDispatchCopy requires device, queue, and pipeline");
+    if (!pfn_doeNativeComputeDispatchFlush) NAPI_THROW(env, "submitComputeDispatchCopy: doeNativeComputeDispatchFlush not available");
+    uint32_t bg_count = 0;
+    napi_get_array_length(env, bgs, &bg_count);
+    if (bg_count > BATCH_MAX_BIND_GROUPS) bg_count = BATCH_MAX_BIND_GROUPS;
+    void* bg_ptrs[BATCH_MAX_BIND_GROUPS] = {NULL};
+    for (uint32_t j = 0; j < bg_count; j++) {
+        napi_value bg_val;
+        napi_get_element(env, bgs, j, &bg_val);
+        bg_ptrs[j] = unwrap_ptr(env, bg_val);
+    }
+    pfn_doeNativeComputeDispatchFlush(
+        queue, pipeline, (void**)bg_ptrs, bg_count,
+        dx, dy, dz,
+        copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
+    return NULL;
+}
 /* flushAndMapSync(instance, queue, buffer, mode, offset, size) — flush + map in one N-API call. */
 static napi_value doe_flush_and_map_sync(napi_env env, napi_callback_info info) {
     NAPI_ASSERT_ARGC(env, info, 6);
@@ -1868,6 +1959,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
         EXPORT_FN("bufferUnmap", doe_buffer_unmap),
         EXPORT_FN("bufferMapSync", doe_buffer_map_sync),
         EXPORT_FN("bufferGetMappedRange", doe_buffer_get_mapped_range),
+        EXPORT_FN("bufferAssertMappedPrefixF32", doe_buffer_assert_mapped_prefix_f32),
         EXPORT_FN("createShaderModule", doe_create_shader_module),
         EXPORT_FN("shaderModuleRelease", doe_shader_module_release),
         EXPORT_FN("createComputePipeline", doe_create_compute_pipeline),
@@ -1895,6 +1987,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
         EXPORT_FN("queueWriteBuffer", doe_queue_write_buffer),
         EXPORT_FN("queueFlush", doe_queue_flush),
         EXPORT_FN("submitBatched", doe_submit_batched),
+        EXPORT_FN("submitComputeDispatchCopy", doe_submit_compute_dispatch_copy),
         EXPORT_FN("flushAndMapSync", doe_flush_and_map_sync),
         EXPORT_FN("queueRelease", doe_queue_release),
         EXPORT_FN("createTexture", doe_create_texture),

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@simulatte/webgpu",
-  "version": "0.2.1",
-  "description": "Doe WebGPU bridge for browserless AI/ML benchmarking and CI",
+  "version": "0.2.3",
+  "description": "Headless WebGPU runtime for Node.js and Bun, powered by Doe",
   "type": "module",
   "main": "./src/node-runtime.js",
   "exports": {
@@ -25,8 +25,12 @@
     "prebuilds/",
     "binding.gyp",
     "README.md",
+    "CHANGELOG.md",
     "API_CONTRACT.md",
     "COMPAT_SCOPE.md",
+    "SUPPORT_CONTRACTS.md",
+    "LAYERING_PLAN.md",
+    "ZIG_SOURCE_INVENTORY.md",
     "headless-webgpu-comparison.md",
     "doe-build-metadata.schema.json",
     "prebuild-metadata.schema.json"
@@ -60,5 +64,5 @@
     "url": "https://github.com/clocksmith/fawn/issues"
   },
   "author": "Fawn",
-  "license": "ISC"
+  "license": "Apache-2.0"
 }

package/prebuilds/darwin-arm64/doe_napi.node CHANGED Viewed

Binary file

package/prebuilds/darwin-arm64/libwebgpu_doe.dylib CHANGED Viewed

Binary file

package/prebuilds/darwin-arm64/metadata.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "schemaVersion": 1,
   "package": "@simulatte/webgpu",
-  "packageVersion": "0.2.0",
+  "packageVersion": "0.2.3",
   "platform": "darwin",
   "arch": "arm64",
   "nodeNapiVersion": 8,
-  "doeVersion": "22613a9b0",
+  "doeVersion": "68a193c88",
   "doeBuild": {
     "artifact": "libwebgpu_doe",
     "leanVerifiedBuild": false,
@@ -13,14 +13,14 @@
   },
   "files": {
     "doe_napi.node": {
-      "sha256": "ccd350506359a770d286f7f3893dd0c6d81582dbcc04524461c9fa81cae4573e"
+      "sha256": "472c753c5c5bd82b60444bfcc1d3837bdbd40d1fce1b8281e6c706043bb64a84"
     },
     "libwebgpu_doe.dylib": {
-      "sha256": "30be9ca300c53c0ba02eb76dfa94b683585c20f7a4caaa1f8eeea2cfb17d1f5f"
+      "sha256": "d9a66fa8fad7a8e50736778329ea5c011457c814238f96ecf230eb8eb97bcc64"
     },
     "libwebgpu_dawn.dylib": {
       "sha256": "22751faeb459e7a2ec778c0410ca122e23c23366eb3da145c651d1d43e26707d"
     }
   },
-  "builtAt": "2026-03-07T03:39:41.504Z"
+  "builtAt": "2026-03-10T17:19:18.720Z"
 }

package/prebuilds/linux-x64/doe_napi.node ADDED Viewed

Binary file

package/prebuilds/linux-x64/libwebgpu_dawn.so ADDED Viewed

Binary file

package/prebuilds/linux-x64/libwebgpu_doe.so ADDED Viewed

Binary file

package/prebuilds/linux-x64/metadata.json ADDED Viewed

@@ -0,0 +1,26 @@
+{
+  "schemaVersion": 1,
+  "package": "@simulatte/webgpu",
+  "packageVersion": "0.2.3",
+  "platform": "linux",
+  "arch": "x64",
+  "nodeNapiVersion": 8,
+  "doeVersion": "b09d34586",
+  "doeBuild": {
+    "artifact": "libwebgpu_doe",
+    "leanVerifiedBuild": false,
+    "proofArtifactSha256": null
+  },
+  "files": {
+    "doe_napi.node": {
+      "sha256": "21475bcd04b499e1a0ed6e75d8af2a7bead08d365ddfb09708d509d0de62bf28"
+    },
+    "libwebgpu_doe.so": {
+      "sha256": "28c5da84da65a5d6f4a3a74b9757279cddee738aaa93fe8ece6e358d4d94cdb1"
+    },
+    "libwebgpu_dawn.so": {
+      "sha256": "9d329301f59fbe85a190cee4faacde97f9c991b07264a18a6750b890899cc417"
+    }
+  },
+  "builtAt": "2026-03-10T21:30:16.241Z"
+}

package/src/bun-ffi.js CHANGED Viewed

@@ -157,6 +157,7 @@ function openLibrary(path) {
         wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
         wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
         wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
+        doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
         // Bind group layout / bind group / pipeline layout
         wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
@@ -804,7 +805,7 @@ class DoeGPUComputePipeline {
     constructor(native) { this._native = native; }
     getBindGroupLayout(index) {
-        const layout = wgpu.symbols.wgpuComputePipelineGetBindGroupLayout(this._native, index);
+        const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
         return new DoeGPUBindGroupLayout(layout);
     }
 }
@@ -961,7 +962,7 @@ function ensureLibrary() {
     if (libraryLoaded) return;
     if (!DOE_LIB_PATH) {
         throw new Error(
-            "@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd fawn/zig && zig build dropin` or set DOE_WEBGPU_LIB."
+            "@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd zig && zig build dropin` or set DOE_WEBGPU_LIB."
         );
     }
     wgpu = openLibrary(DOE_LIB_PATH);

package/src/bun.js CHANGED Viewed

@@ -1,2 +1,2 @@
-export * from "./bun-ffi.js";
-export { default } from "./bun-ffi.js";
+export * from "./index.js";
+export { default } from "./index.js";

package/src/index.js CHANGED Viewed

@@ -19,13 +19,13 @@ let libraryLoaded = false;
 function loadAddon() {
   const prebuildPath = resolve(__dirname, '..', 'prebuilds', `${process.platform}-${process.arch}`, 'doe_napi.node');
   try {
-    return require(prebuildPath);
+    return require('../build/Release/doe_napi.node');
   } catch {
     try {
-      return require('../build/Release/doe_napi.node');
+      return require('../build/Debug/doe_napi.node');
     } catch {
       try {
-        return require('../build/Debug/doe_napi.node');
+        return require(prebuildPath);
       } catch {
         return null;
       }
@@ -71,7 +71,7 @@ function ensureLibrary() {
   }
   if (!DOE_LIB_PATH) {
     throw new Error(
-      '@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd fawn/zig && zig build dropin` or set DOE_WEBGPU_LIB.'
+      '@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd zig && zig build dropin` or set DOE_WEBGPU_LIB.'
     );
   }
   addon.loadLibrary(DOE_LIB_PATH);
@@ -120,14 +120,26 @@ class DoeGPUBuffer {
   }
   async mapAsync(mode, offset = 0, size = this.size) {
-    if (this._queue) addon.flushAndMapSync(this._instance, this._queue, this._native, mode, offset, size);
-    else addon.bufferMapSync(this._instance, this._native, mode, offset, size);
+    if (this._queue) {
+      if (this._queue.hasPendingSubmissions()) {
+        addon.flushAndMapSync(this._instance, this._queue._native, this._native, mode, offset, size);
+        this._queue.markSubmittedWorkDone();
+      } else {
+        addon.bufferMapSync(this._instance, this._native, mode, offset, size);
+      }
+    } else {
+      addon.bufferMapSync(this._instance, this._native, mode, offset, size);
+    }
   }
   getMappedRange(offset = 0, size = this.size) {
     return addon.bufferGetMappedRange(this._native, offset, size);
   }
+  assertMappedPrefixF32(expected, count) {
+    return addon.bufferAssertMappedPrefixF32(this._native, expected, count);
+  }
   unmap() {
     addon.bufferUnmap(this._native);
   }
@@ -233,13 +245,57 @@ class DoeGPUQueue {
     this._native = native;
     this._instance = instance;
     this._device = device;
+    this._submittedSerial = 0;
+    this._completedSerial = 0;
+  }
+  hasPendingSubmissions() {
+    return this._completedSerial < this._submittedSerial;
+  }
+  markSubmittedWorkDone() {
+    this._completedSerial = this._submittedSerial;
   }
   submit(commandBuffers) {
+    if (commandBuffers.length === 0) return;
+    this._submittedSerial += 1;
+    if (commandBuffers.length === 1 && commandBuffers[0]?._batched) {
+      const cmds = commandBuffers[0]._commands;
+      if (
+        cmds.length === 2
+        && cmds[0]?.t === 0
+        && cmds[1]?.t === 1
+        && typeof addon.submitComputeDispatchCopy === 'function'
+      ) {
+        addon.submitComputeDispatchCopy(
+          this._device,
+          this._native,
+          cmds[0].p,
+          cmds[0].bg,
+          cmds[0].x,
+          cmds[0].y,
+          cmds[0].z,
+          cmds[1].s,
+          cmds[1].so,
+          cmds[1].d,
+          cmds[1].do,
+          cmds[1].sz,
+        );
+        return;
+      }
+    }
     if (commandBuffers.length > 0 && commandBuffers.every((c) => c._batched)) {
       const allCommands = [];
       for (const cb of commandBuffers) allCommands.push(...cb._commands);
       addon.submitBatched(this._device, this._native, allCommands);
+      if (
+        allCommands.length === 2
+        && allCommands[0]?.t === 0
+        && allCommands[1]?.t === 1
+      ) {
+        this.markSubmittedWorkDone();
+      }
     } else {
       const natives = commandBuffers.map((c) => c._native);
       addon.queueSubmit(this._native, natives);
@@ -259,8 +315,9 @@ class DoeGPUQueue {
   }
   async onSubmittedWorkDone() {
-    // No-op: Doe submit commits synchronously. GPU completion is ensured
-    // by mapAsync when data is actually needed.
+    if (!this.hasPendingSubmissions()) return;
+    addon.queueFlush(this._native);
+    this.markSubmittedWorkDone();
   }
 }
@@ -307,15 +364,28 @@ class DoeGPURenderPipeline {
 }
 class DoeGPUShaderModule {
-  constructor(native) { this._native = native; }
+  constructor(native, code) {
+    this._native = native;
+    this._code = code;
+  }
 }
 class DoeGPUComputePipeline {
-  constructor(native) { this._native = native; }
+  constructor(native, device, explicitLayout, autoLayoutEntriesByGroup) {
+    this._native = native;
+    this._device = device;
+    this._explicitLayout = explicitLayout;
+    this._autoLayoutEntriesByGroup = autoLayoutEntriesByGroup;
+    this._cachedLayouts = new Map();
+  }
   getBindGroupLayout(index) {
-    const layout = addon.computePipelineGetBindGroupLayout(this._native, index);
-    return new DoeGPUBindGroupLayout(layout);
+    if (this._explicitLayout) return this._explicitLayout;
+    if (this._cachedLayouts.has(index)) return this._cachedLayouts.get(index);
+    const entries = this._autoLayoutEntriesByGroup?.get(index) ?? [];
+    const layout = this._device.createBindGroupLayout({ entries });
+    this._cachedLayouts.set(index, layout);
+    return layout;
   }
 }
@@ -368,6 +438,34 @@ const DOE_LIMITS = Object.freeze({
 const DOE_FEATURES = Object.freeze(new Set(['shader-f16']));
+function inferAutoBindGroupLayouts(code, visibility = globals.GPUShaderStage.COMPUTE) {
+  const groups = new Map();
+  const bindingPattern = /@group\((\d+)\)\s*@binding\((\d+)\)\s*var(?:<([^>]+)>)?\s+\w+\s*:\s*([^;]+);/g;
+  for (const match of code.matchAll(bindingPattern)) {
+    const group = Number(match[1]);
+    const binding = Number(match[2]);
+    const addressSpace = (match[3] ?? "").trim();
+    const typeExpr = (match[4] ?? "").trim();
+    let entry = null;
+    if (addressSpace.startsWith("uniform")) {
+      entry = { binding, visibility, buffer: { type: "uniform" } };
+    } else if (addressSpace.startsWith("storage")) {
+      const readOnly = !addressSpace.includes("read_write");
+      entry = { binding, visibility, buffer: { type: readOnly ? "read-only-storage" : "storage" } };
+    } else if (typeExpr.startsWith("sampler")) {
+      entry = { binding, visibility, sampler: {} };
+    }
+    if (!entry) continue;
+    const entries = groups.get(group) ?? [];
+    entries.push(entry);
+    groups.set(group, entries);
+  }
+  for (const entries of groups.values()) {
+    entries.sort((left, right) => left.binding - right.binding);
+  }
+  return groups;
+}
 class DoeGPUDevice {
   constructor(native, instance) {
     this._native = native;
@@ -380,24 +478,25 @@ class DoeGPUDevice {
   createBuffer(descriptor) {
     const buf = addon.createBuffer(this._native, descriptor);
-    return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue._native);
+    return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue);
   }
   createShaderModule(descriptor) {
     const code = descriptor.code || descriptor.source;
     if (!code) throw new Error('createShaderModule: descriptor.code is required');
     const mod = addon.createShaderModule(this._native, code);
-    return new DoeGPUShaderModule(mod);
+    return new DoeGPUShaderModule(mod, code);
   }
   createComputePipeline(descriptor) {
     const shader = descriptor.compute?.module;
     const entryPoint = descriptor.compute?.entryPoint || 'main';
     const layout = descriptor.layout === 'auto' ? null : descriptor.layout;
+    const autoLayoutEntriesByGroup = layout ? null : inferAutoBindGroupLayouts(shader?._code || '');
     const native = addon.createComputePipeline(
       this._native, shader._native, entryPoint,
       layout?._native ?? null);
-    return new DoeGPUComputePipeline(native);
+    return new DoeGPUComputePipeline(native, this, layout, autoLayoutEntriesByGroup);
   }
   async createComputePipelineAsync(descriptor) {

package/src/runtime_cli.js CHANGED Viewed

@@ -158,7 +158,9 @@ export function createDoeRuntime(options = {}) {
         require_existing_path("commandsPath", runOptions.commandsPath);
         if (runOptions.quirksPath) require_existing_path("quirksPath", runOptions.quirksPath);
         const args = build_bench_args(runOptions);
-        const result = runRaw(args);
+        const result = runRaw(args, {
+            cwd: runOptions.cwd || WORKSPACE_ROOT,
+        });
         const traceMeta = read_trace_meta(runOptions.traceMetaPath);
         return {
             ...result,