@simulatte/webgpu 0.2.4 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +33 -0
  2. package/README.md +263 -71
  3. package/api-contract.md +70 -139
  4. package/assets/package-layers.svg +63 -0
  5. package/examples/direct-webgpu/compute-dispatch.js +66 -0
  6. package/examples/direct-webgpu/explicit-bind-group.js +85 -0
  7. package/examples/direct-webgpu/request-device.js +10 -0
  8. package/examples/doe-api/buffers-readback.js +9 -0
  9. package/examples/doe-api/compile-and-dispatch.js +30 -0
  10. package/examples/doe-api/compute-dispatch.js +25 -0
  11. package/examples/doe-routines/compute-once-like-input.js +36 -0
  12. package/examples/doe-routines/compute-once-matmul.js +53 -0
  13. package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
  14. package/examples/doe-routines/compute-once.js +23 -0
  15. package/headless-webgpu-comparison.md +2 -2
  16. package/layering-plan.md +1 -1
  17. package/native/doe_napi.c +102 -12
  18. package/package.json +2 -1
  19. package/prebuilds/darwin-arm64/doe_napi.node +0 -0
  20. package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
  21. package/prebuilds/darwin-arm64/metadata.json +6 -6
  22. package/prebuilds/linux-x64/doe_napi.node +0 -0
  23. package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
  24. package/prebuilds/linux-x64/metadata.json +5 -5
  25. package/scripts/generate-readme-assets.js +79 -6
  26. package/scripts/prebuild.js +23 -19
  27. package/src/auto_bind_group_layout.js +32 -0
  28. package/src/bun-ffi.js +93 -12
  29. package/src/bun.js +23 -2
  30. package/src/compute.d.ts +2 -1
  31. package/src/compute.js +671 -33
  32. package/src/doe.d.ts +127 -27
  33. package/src/doe.js +480 -114
  34. package/src/full.d.ts +8 -1
  35. package/src/full.js +28 -3
  36. package/src/index.js +1013 -38
package/src/bun-ffi.js CHANGED
@@ -4,6 +4,7 @@ import { dirname, resolve } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { createDoeRuntime, runDawnVsDoeCompare } from "./runtime_cli.js";
6
6
  import { loadDoeBuildMetadata } from "./build_metadata.js";
7
+ import { inferAutoBindGroupLayouts } from "./auto_bind_group_layout.js";
7
8
 
8
9
  const __dirname = dirname(fileURLToPath(import.meta.url));
9
10
  const PACKAGE_ROOT = resolve(__dirname, "..");
@@ -120,7 +121,7 @@ let wgpu = null;
120
121
  // ---------------------------------------------------------------------------
121
122
 
122
123
  function openLibrary(path) {
123
- return dlopen(path, {
124
+ const symbols = {
124
125
  // Instance
125
126
  wgpuCreateInstance: { args: [FFIType.ptr], returns: FFIType.ptr },
126
127
  wgpuInstanceRelease: { args: [FFIType.ptr], returns: FFIType.void },
@@ -157,7 +158,6 @@ function openLibrary(path) {
157
158
  wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
158
159
  wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
159
160
  wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
160
- doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
161
161
 
162
162
  // Bind group layout / bind group / pipeline layout
163
163
  wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
@@ -203,7 +203,14 @@ function openLibrary(path) {
203
203
  wgpuRenderPassEncoderDraw: { args: [FFIType.ptr, FFIType.u32, FFIType.u32, FFIType.u32, FFIType.u32], returns: FFIType.void },
204
204
  wgpuRenderPassEncoderEnd: { args: [FFIType.ptr], returns: FFIType.void },
205
205
  wgpuRenderPassEncoderRelease: { args: [FFIType.ptr], returns: FFIType.void },
206
- });
206
+ };
207
+ if (process.platform === "darwin") {
208
+ symbols.doeNativeComputePipelineGetBindGroupLayout = {
209
+ args: [FFIType.ptr, FFIType.u32],
210
+ returns: FFIType.ptr,
211
+ };
212
+ }
213
+ return dlopen(path, symbols);
207
214
  }
208
215
 
209
216
  // ---------------------------------------------------------------------------
@@ -626,6 +633,36 @@ function bufferMapSync(instancePtr, bufferPtr, mode, offset, size) {
626
633
  }
627
634
  }
628
635
 
636
+ function waitForSubmittedWorkDoneSync(instancePtr, queuePtr) {
637
+ let queueStatus = null;
638
+ let done = false;
639
+ const cb = new JSCallback(
640
+ (status, _msgData, _msgLen, _ud1, _ud2) => {
641
+ queueStatus = status;
642
+ done = true;
643
+ },
644
+ { args: [FFIType.u32, FFIType.ptr, FFIType.u64, FFIType.ptr, FFIType.ptr], returns: FFIType.void },
645
+ );
646
+ try {
647
+ const futureId = wgpu.symbols.doeQueueOnSubmittedWorkDoneFlat(
648
+ queuePtr,
649
+ CALLBACK_MODE_ALLOW_PROCESS_EVENTS,
650
+ cb.ptr,
651
+ null,
652
+ null,
653
+ );
654
+ if (futureId === 0 || futureId === 0n) {
655
+ throw new Error("[fawn-webgpu] queue work-done future unavailable");
656
+ }
657
+ processEventsUntilDone(instancePtr, () => done);
658
+ if (queueStatus !== REQUEST_DEVICE_STATUS_SUCCESS) {
659
+ throw new Error(`[fawn-webgpu] queue work-done failed (status=${queueStatus})`);
660
+ }
661
+ } finally {
662
+ cb.close();
663
+ }
664
+ }
665
+
629
666
  // ---------------------------------------------------------------------------
630
667
  // WebGPU wrapper classes — matches index.js surface exactly
631
668
  // ---------------------------------------------------------------------------
@@ -640,6 +677,10 @@ class DoeGPUBuffer {
640
677
  }
641
678
 
642
679
  async mapAsync(mode, offset = 0, size = this.size) {
680
+ if (this._queue?.hasPendingSubmissions()) {
681
+ waitForSubmittedWorkDoneSync(this._instance, this._queue._native);
682
+ this._queue.markSubmittedWorkDone();
683
+ }
643
684
  bufferMapSync(this._instance, this._native, mode, offset, size);
644
685
  this._mapMode = mode;
645
686
  }
@@ -727,6 +768,15 @@ class DoeGPUQueue {
727
768
  constructor(native, instance) {
728
769
  this._native = native;
729
770
  this._instance = instance;
771
+ this._pendingSubmissions = 0;
772
+ }
773
+
774
+ hasPendingSubmissions() {
775
+ return this._pendingSubmissions > 0;
776
+ }
777
+
778
+ markSubmittedWorkDone() {
779
+ this._pendingSubmissions = 0;
730
780
  }
731
781
 
732
782
  submit(commandBuffers) {
@@ -735,6 +785,9 @@ class DoeGPUQueue {
735
785
  ptrs[i] = BigInt(commandBuffers[i]._native);
736
786
  }
737
787
  wgpu.symbols.wgpuQueueSubmit(this._native, BigInt(commandBuffers.length), ptrs);
788
+ if (commandBuffers.length > 0) {
789
+ this._pendingSubmissions += commandBuffers.length;
790
+ }
738
791
  }
739
792
 
740
793
  writeBuffer(buffer, bufferOffset, data, dataOffset = 0, size) {
@@ -750,8 +803,9 @@ class DoeGPUQueue {
750
803
  }
751
804
 
752
805
  async onSubmittedWorkDone() {
753
- // Match the Node provider contract: Doe submit commits synchronously,
754
- // and mapAsync flushes when readback synchronization is required.
806
+ if (!this.hasPendingSubmissions()) return;
807
+ waitForSubmittedWorkDoneSync(this._instance, this._native);
808
+ this.markSubmittedWorkDone();
755
809
  }
756
810
  }
757
811
 
@@ -798,15 +852,38 @@ class DoeGPURenderPipeline {
798
852
  }
799
853
 
800
854
  class DoeGPUShaderModule {
801
- constructor(native) { this._native = native; }
855
+ constructor(native, code) {
856
+ this._native = native;
857
+ this._code = code;
858
+ }
802
859
  }
803
860
 
804
861
  class DoeGPUComputePipeline {
805
- constructor(native) { this._native = native; }
862
+ constructor(native, device, explicitLayout, autoLayoutEntriesByGroup) {
863
+ this._native = native;
864
+ this._device = device;
865
+ this._explicitLayout = explicitLayout;
866
+ this._autoLayoutEntriesByGroup = autoLayoutEntriesByGroup;
867
+ this._cachedLayouts = new Map();
868
+ }
806
869
 
807
870
  getBindGroupLayout(index) {
808
- const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
809
- return new DoeGPUBindGroupLayout(layout);
871
+ if (this._explicitLayout) return this._explicitLayout;
872
+ if (this._cachedLayouts.has(index)) return this._cachedLayouts.get(index);
873
+
874
+ let layout;
875
+ if (this._autoLayoutEntriesByGroup && process.platform === "darwin") {
876
+ const entries = this._autoLayoutEntriesByGroup.get(index) ?? [];
877
+ layout = this._device.createBindGroupLayout({ entries });
878
+ } else {
879
+ const native = process.platform === "darwin"
880
+ ? wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index)
881
+ : wgpu.symbols.wgpuComputePipelineGetBindGroupLayout(this._native, index);
882
+ layout = new DoeGPUBindGroupLayout(native);
883
+ }
884
+
885
+ this._cachedLayouts.set(index, layout);
886
+ return layout;
810
887
  }
811
888
  }
812
889
 
@@ -835,7 +912,7 @@ class DoeGPUDevice {
835
912
  createBuffer(descriptor) {
836
913
  const descBytes = buildBufferDescriptor(descriptor);
837
914
  const buf = wgpu.symbols.wgpuDeviceCreateBuffer(this._native, descBytes);
838
- return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue._native);
915
+ return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue);
839
916
  }
840
917
 
841
918
  createShaderModule(descriptor) {
@@ -844,18 +921,22 @@ class DoeGPUDevice {
844
921
  const { desc, _refs } = buildShaderModuleDescriptor(code);
845
922
  const mod = wgpu.symbols.wgpuDeviceCreateShaderModule(this._native, desc);
846
923
  void _refs;
847
- return new DoeGPUShaderModule(mod);
924
+ return new DoeGPUShaderModule(mod, code);
848
925
  }
849
926
 
850
927
  createComputePipeline(descriptor) {
851
928
  const shader = descriptor.compute?.module;
852
929
  const entryPoint = descriptor.compute?.entryPoint || "main";
853
930
  const layout = descriptor.layout === "auto" ? null : descriptor.layout;
931
+ const autoLayoutEntriesByGroup = layout ? null : inferAutoBindGroupLayouts(
932
+ shader?._code || "",
933
+ globals.GPUShaderStage.COMPUTE,
934
+ );
854
935
  const { desc, _refs } = buildComputePipelineDescriptor(
855
936
  shader._native, entryPoint, layout?._native ?? null);
856
937
  const native = wgpu.symbols.wgpuDeviceCreateComputePipeline(this._native, desc);
857
938
  void _refs;
858
- return new DoeGPUComputePipeline(native);
939
+ return new DoeGPUComputePipeline(native, this, layout, autoLayoutEntriesByGroup);
859
940
  }
860
941
 
861
942
  async createComputePipelineAsync(descriptor) {
package/src/bun.js CHANGED
@@ -1,2 +1,23 @@
1
- export * from "./full.js";
2
- export { default } from "./full.js";
1
+ import * as ffi from "./bun-ffi.js";
2
+ import * as full from "./full.js";
3
+ import { createDoeNamespace } from "./doe.js";
4
+
5
+ const runtime = process.platform === "linux" ? ffi : full;
6
+
7
+ export const doe = createDoeNamespace({
8
+ requestDevice: runtime.requestDevice,
9
+ });
10
+
11
+ export const create = runtime.create;
12
+ export const globals = runtime.globals;
13
+ export const setupGlobals = runtime.setupGlobals;
14
+ export const requestAdapter = runtime.requestAdapter;
15
+ export const requestDevice = runtime.requestDevice;
16
+ export const providerInfo = runtime.providerInfo;
17
+ export const createDoeRuntime = runtime.createDoeRuntime;
18
+ export const runDawnVsDoeCompare = runtime.runDawnVsDoeCompare;
19
+
20
+ export default {
21
+ ...runtime,
22
+ doe,
23
+ };
package/src/compute.d.ts CHANGED
@@ -126,7 +126,8 @@ export interface ComputeDoeNamespace
126
126
  ComputeGPUBuffer,
127
127
  ComputeDoeKernel,
128
128
  ComputeBoundDoeNamespace,
129
- ComputeDoeRunComputeOptions
129
+ ComputeDoeRunComputeOptions,
130
+ RequestDeviceOptions
130
131
  > {}
131
132
 
132
133
  export const globals: Record<string, unknown>;