npm - @simulatte/webgpu - Versions diffs - 0.2.4 → 0.3.1 - Mend

@simulatte/webgpu 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/CHANGELOG.md +33 -0
package/README.md +263 -71
package/api-contract.md +70 -139
package/assets/package-layers.svg +63 -0
package/examples/direct-webgpu/compute-dispatch.js +66 -0
package/examples/direct-webgpu/explicit-bind-group.js +85 -0
package/examples/direct-webgpu/request-device.js +10 -0
package/examples/doe-api/buffers-readback.js +9 -0
package/examples/doe-api/compile-and-dispatch.js +30 -0
package/examples/doe-api/compute-dispatch.js +25 -0
package/examples/doe-routines/compute-once-like-input.js +36 -0
package/examples/doe-routines/compute-once-matmul.js +53 -0
package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
package/examples/doe-routines/compute-once.js +23 -0
package/headless-webgpu-comparison.md +2 -2
package/layering-plan.md +1 -1
package/native/doe_napi.c +102 -12
package/package.json +2 -1
package/prebuilds/darwin-arm64/doe_napi.node +0 -0
package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
package/prebuilds/darwin-arm64/metadata.json +6 -6
package/prebuilds/linux-x64/doe_napi.node +0 -0
package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
package/prebuilds/linux-x64/metadata.json +5 -5
package/scripts/generate-readme-assets.js +79 -6
package/scripts/prebuild.js +23 -19
package/src/auto_bind_group_layout.js +32 -0
package/src/bun-ffi.js +93 -12
package/src/bun.js +23 -2
package/src/compute.d.ts +2 -1
package/src/compute.js +671 -33
package/src/doe.d.ts +127 -27
package/src/doe.js +480 -114
package/src/full.d.ts +8 -1
package/src/full.js +28 -3
package/src/index.js +1013 -38

package/src/bun-ffi.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { dirname, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 import { createDoeRuntime, runDawnVsDoeCompare } from "./runtime_cli.js";
 import { loadDoeBuildMetadata } from "./build_metadata.js";
+import { inferAutoBindGroupLayouts } from "./auto_bind_group_layout.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 const PACKAGE_ROOT = resolve(__dirname, "..");
@@ -120,7 +121,7 @@ let wgpu = null;
 // ---------------------------------------------------------------------------
 function openLibrary(path) {
-    return dlopen(path, {
+    const symbols = {
         // Instance
         wgpuCreateInstance:       { args: [FFIType.ptr], returns: FFIType.ptr },
         wgpuInstanceRelease:      { args: [FFIType.ptr], returns: FFIType.void },
@@ -157,7 +158,6 @@ function openLibrary(path) {
         wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
         wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
         wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
-        doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
         // Bind group layout / bind group / pipeline layout
         wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
@@ -203,7 +203,14 @@ function openLibrary(path) {
         wgpuRenderPassEncoderDraw: { args: [FFIType.ptr, FFIType.u32, FFIType.u32, FFIType.u32, FFIType.u32], returns: FFIType.void },
         wgpuRenderPassEncoderEnd: { args: [FFIType.ptr], returns: FFIType.void },
         wgpuRenderPassEncoderRelease: { args: [FFIType.ptr], returns: FFIType.void },
-    });
+    };
+    if (process.platform === "darwin") {
+        symbols.doeNativeComputePipelineGetBindGroupLayout = {
+            args: [FFIType.ptr, FFIType.u32],
+            returns: FFIType.ptr,
+        };
+    }
+    return dlopen(path, symbols);
 }
 // ---------------------------------------------------------------------------
@@ -626,6 +633,36 @@ function bufferMapSync(instancePtr, bufferPtr, mode, offset, size) {
     }
 }
+function waitForSubmittedWorkDoneSync(instancePtr, queuePtr) {
+    let queueStatus = null;
+    let done = false;
+    const cb = new JSCallback(
+        (status, _msgData, _msgLen, _ud1, _ud2) => {
+            queueStatus = status;
+            done = true;
+        },
+        { args: [FFIType.u32, FFIType.ptr, FFIType.u64, FFIType.ptr, FFIType.ptr], returns: FFIType.void },
+    );
+    try {
+        const futureId = wgpu.symbols.doeQueueOnSubmittedWorkDoneFlat(
+            queuePtr,
+            CALLBACK_MODE_ALLOW_PROCESS_EVENTS,
+            cb.ptr,
+            null,
+            null,
+        );
+        if (futureId === 0 || futureId === 0n) {
+            throw new Error("[fawn-webgpu] queue work-done future unavailable");
+        }
+        processEventsUntilDone(instancePtr, () => done);
+        if (queueStatus !== REQUEST_DEVICE_STATUS_SUCCESS) {
+            throw new Error(`[fawn-webgpu] queue work-done failed (status=${queueStatus})`);
+        }
+    } finally {
+        cb.close();
+    }
+}
 // ---------------------------------------------------------------------------
 // WebGPU wrapper classes — matches index.js surface exactly
 // ---------------------------------------------------------------------------
@@ -640,6 +677,10 @@ class DoeGPUBuffer {
     }
     async mapAsync(mode, offset = 0, size = this.size) {
+        if (this._queue?.hasPendingSubmissions()) {
+            waitForSubmittedWorkDoneSync(this._instance, this._queue._native);
+            this._queue.markSubmittedWorkDone();
+        }
         bufferMapSync(this._instance, this._native, mode, offset, size);
         this._mapMode = mode;
     }
@@ -727,6 +768,15 @@ class DoeGPUQueue {
     constructor(native, instance) {
         this._native = native;
         this._instance = instance;
+        this._pendingSubmissions = 0;
+    }
+    hasPendingSubmissions() {
+        return this._pendingSubmissions > 0;
+    }
+    markSubmittedWorkDone() {
+        this._pendingSubmissions = 0;
     }
     submit(commandBuffers) {
@@ -735,6 +785,9 @@ class DoeGPUQueue {
             ptrs[i] = BigInt(commandBuffers[i]._native);
         }
         wgpu.symbols.wgpuQueueSubmit(this._native, BigInt(commandBuffers.length), ptrs);
+        if (commandBuffers.length > 0) {
+            this._pendingSubmissions += commandBuffers.length;
+        }
     }
     writeBuffer(buffer, bufferOffset, data, dataOffset = 0, size) {
@@ -750,8 +803,9 @@ class DoeGPUQueue {
     }
     async onSubmittedWorkDone() {
-        // Match the Node provider contract: Doe submit commits synchronously,
-        // and mapAsync flushes when readback synchronization is required.
+        if (!this.hasPendingSubmissions()) return;
+        waitForSubmittedWorkDoneSync(this._instance, this._native);
+        this.markSubmittedWorkDone();
     }
 }
@@ -798,15 +852,38 @@ class DoeGPURenderPipeline {
 }
 class DoeGPUShaderModule {
-    constructor(native) { this._native = native; }
+    constructor(native, code) {
+        this._native = native;
+        this._code = code;
+    }
 }
 class DoeGPUComputePipeline {
-    constructor(native) { this._native = native; }
+    constructor(native, device, explicitLayout, autoLayoutEntriesByGroup) {
+        this._native = native;
+        this._device = device;
+        this._explicitLayout = explicitLayout;
+        this._autoLayoutEntriesByGroup = autoLayoutEntriesByGroup;
+        this._cachedLayouts = new Map();
+    }
     getBindGroupLayout(index) {
-        const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
-        return new DoeGPUBindGroupLayout(layout);
+        if (this._explicitLayout) return this._explicitLayout;
+        if (this._cachedLayouts.has(index)) return this._cachedLayouts.get(index);
+        let layout;
+        if (this._autoLayoutEntriesByGroup && process.platform === "darwin") {
+            const entries = this._autoLayoutEntriesByGroup.get(index) ?? [];
+            layout = this._device.createBindGroupLayout({ entries });
+        } else {
+            const native = process.platform === "darwin"
+                ? wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index)
+                : wgpu.symbols.wgpuComputePipelineGetBindGroupLayout(this._native, index);
+            layout = new DoeGPUBindGroupLayout(native);
+        }
+        this._cachedLayouts.set(index, layout);
+        return layout;
     }
 }
@@ -835,7 +912,7 @@ class DoeGPUDevice {
     createBuffer(descriptor) {
         const descBytes = buildBufferDescriptor(descriptor);
         const buf = wgpu.symbols.wgpuDeviceCreateBuffer(this._native, descBytes);
-        return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue._native);
+        return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue);
     }
     createShaderModule(descriptor) {
@@ -844,18 +921,22 @@ class DoeGPUDevice {
         const { desc, _refs } = buildShaderModuleDescriptor(code);
         const mod = wgpu.symbols.wgpuDeviceCreateShaderModule(this._native, desc);
         void _refs;
-        return new DoeGPUShaderModule(mod);
+        return new DoeGPUShaderModule(mod, code);
     }
     createComputePipeline(descriptor) {
         const shader = descriptor.compute?.module;
         const entryPoint = descriptor.compute?.entryPoint || "main";
         const layout = descriptor.layout === "auto" ? null : descriptor.layout;
+        const autoLayoutEntriesByGroup = layout ? null : inferAutoBindGroupLayouts(
+            shader?._code || "",
+            globals.GPUShaderStage.COMPUTE,
+        );
         const { desc, _refs } = buildComputePipelineDescriptor(
             shader._native, entryPoint, layout?._native ?? null);
         const native = wgpu.symbols.wgpuDeviceCreateComputePipeline(this._native, desc);
         void _refs;
-        return new DoeGPUComputePipeline(native);
+        return new DoeGPUComputePipeline(native, this, layout, autoLayoutEntriesByGroup);
     }
     async createComputePipelineAsync(descriptor) {

package/src/bun.js CHANGED Viewed

@@ -1,2 +1,23 @@
-export * from "./full.js";
-export { default } from "./full.js";
+import * as ffi from "./bun-ffi.js";
+import * as full from "./full.js";
+import { createDoeNamespace } from "./doe.js";
+const runtime = process.platform === "linux" ? ffi : full;
+export const doe = createDoeNamespace({
+  requestDevice: runtime.requestDevice,
+});
+export const create = runtime.create;
+export const globals = runtime.globals;
+export const setupGlobals = runtime.setupGlobals;
+export const requestAdapter = runtime.requestAdapter;
+export const requestDevice = runtime.requestDevice;
+export const providerInfo = runtime.providerInfo;
+export const createDoeRuntime = runtime.createDoeRuntime;
+export const runDawnVsDoeCompare = runtime.runDawnVsDoeCompare;
+export default {
+  ...runtime,
+  doe,
+};

package/src/compute.d.ts CHANGED Viewed

@@ -126,7 +126,8 @@ export interface ComputeDoeNamespace
     ComputeGPUBuffer,
     ComputeDoeKernel,
     ComputeBoundDoeNamespace,
-    ComputeDoeRunComputeOptions
+    ComputeDoeRunComputeOptions,
+    RequestDeviceOptions
   > {}
 export const globals: Record<string, unknown>;