@simulatte/webgpu 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +47 -4
  2. package/README.md +273 -235
  3. package/api-contract.md +163 -0
  4. package/assets/fawn-icon-main-256.png +0 -0
  5. package/assets/package-layers.svg +63 -0
  6. package/assets/package-surface-cube-snapshot.svg +7 -7
  7. package/{COMPAT_SCOPE.md → compat-scope.md} +1 -1
  8. package/examples/direct-webgpu/compute-dispatch.js +66 -0
  9. package/examples/direct-webgpu/explicit-bind-group.js +85 -0
  10. package/examples/direct-webgpu/request-device.js +10 -0
  11. package/examples/doe-api/buffers-readback.js +9 -0
  12. package/examples/doe-api/compile-and-dispatch.js +30 -0
  13. package/examples/doe-api/compute-dispatch.js +25 -0
  14. package/examples/doe-routines/compute-once-like-input.js +36 -0
  15. package/examples/doe-routines/compute-once-matmul.js +53 -0
  16. package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
  17. package/examples/doe-routines/compute-once.js +23 -0
  18. package/headless-webgpu-comparison.md +2 -2
  19. package/{LAYERING_PLAN.md → layering-plan.md} +10 -8
  20. package/native/doe_napi.c +102 -12
  21. package/package.json +26 -9
  22. package/prebuilds/darwin-arm64/doe_napi.node +0 -0
  23. package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
  24. package/prebuilds/darwin-arm64/metadata.json +6 -6
  25. package/prebuilds/linux-x64/doe_napi.node +0 -0
  26. package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
  27. package/prebuilds/linux-x64/metadata.json +5 -5
  28. package/scripts/generate-readme-assets.js +81 -8
  29. package/scripts/prebuild.js +23 -19
  30. package/src/auto_bind_group_layout.js +32 -0
  31. package/src/bun-ffi.js +93 -12
  32. package/src/bun.js +23 -2
  33. package/src/compute.d.ts +162 -0
  34. package/src/compute.js +915 -0
  35. package/src/doe.d.ts +184 -0
  36. package/src/doe.js +641 -0
  37. package/src/full.d.ts +119 -0
  38. package/src/full.js +35 -0
  39. package/src/index.js +1013 -38
  40. package/src/node-runtime.js +2 -2
  41. package/src/node.js +2 -2
  42. package/{SUPPORT_CONTRACTS.md → support-contracts.md} +27 -41
  43. package/{ZIG_SOURCE_INVENTORY.md → zig-source-inventory.md} +2 -2
  44. package/API_CONTRACT.md +0 -182
package/src/bun-ffi.js CHANGED
@@ -4,6 +4,7 @@ import { dirname, resolve } from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import { createDoeRuntime, runDawnVsDoeCompare } from "./runtime_cli.js";
6
6
  import { loadDoeBuildMetadata } from "./build_metadata.js";
7
+ import { inferAutoBindGroupLayouts } from "./auto_bind_group_layout.js";
7
8
 
8
9
  const __dirname = dirname(fileURLToPath(import.meta.url));
9
10
  const PACKAGE_ROOT = resolve(__dirname, "..");
@@ -120,7 +121,7 @@ let wgpu = null;
120
121
  // ---------------------------------------------------------------------------
121
122
 
122
123
  function openLibrary(path) {
123
- return dlopen(path, {
124
+ const symbols = {
124
125
  // Instance
125
126
  wgpuCreateInstance: { args: [FFIType.ptr], returns: FFIType.ptr },
126
127
  wgpuInstanceRelease: { args: [FFIType.ptr], returns: FFIType.void },
@@ -157,7 +158,6 @@ function openLibrary(path) {
157
158
  wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
158
159
  wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
159
160
  wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
160
- doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
161
161
 
162
162
  // Bind group layout / bind group / pipeline layout
163
163
  wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
@@ -203,7 +203,14 @@ function openLibrary(path) {
203
203
  wgpuRenderPassEncoderDraw: { args: [FFIType.ptr, FFIType.u32, FFIType.u32, FFIType.u32, FFIType.u32], returns: FFIType.void },
204
204
  wgpuRenderPassEncoderEnd: { args: [FFIType.ptr], returns: FFIType.void },
205
205
  wgpuRenderPassEncoderRelease: { args: [FFIType.ptr], returns: FFIType.void },
206
- });
206
+ };
207
+ if (process.platform === "darwin") {
208
+ symbols.doeNativeComputePipelineGetBindGroupLayout = {
209
+ args: [FFIType.ptr, FFIType.u32],
210
+ returns: FFIType.ptr,
211
+ };
212
+ }
213
+ return dlopen(path, symbols);
207
214
  }
208
215
 
209
216
  // ---------------------------------------------------------------------------
@@ -626,6 +633,36 @@ function bufferMapSync(instancePtr, bufferPtr, mode, offset, size) {
626
633
  }
627
634
  }
628
635
 
636
+ function waitForSubmittedWorkDoneSync(instancePtr, queuePtr) {
637
+ let queueStatus = null;
638
+ let done = false;
639
+ const cb = new JSCallback(
640
+ (status, _msgData, _msgLen, _ud1, _ud2) => {
641
+ queueStatus = status;
642
+ done = true;
643
+ },
644
+ { args: [FFIType.u32, FFIType.ptr, FFIType.u64, FFIType.ptr, FFIType.ptr], returns: FFIType.void },
645
+ );
646
+ try {
647
+ const futureId = wgpu.symbols.doeQueueOnSubmittedWorkDoneFlat(
648
+ queuePtr,
649
+ CALLBACK_MODE_ALLOW_PROCESS_EVENTS,
650
+ cb.ptr,
651
+ null,
652
+ null,
653
+ );
654
+ if (futureId === 0 || futureId === 0n) {
655
+ throw new Error("[fawn-webgpu] queue work-done future unavailable");
656
+ }
657
+ processEventsUntilDone(instancePtr, () => done);
658
+ if (queueStatus !== REQUEST_DEVICE_STATUS_SUCCESS) {
659
+ throw new Error(`[fawn-webgpu] queue work-done failed (status=${queueStatus})`);
660
+ }
661
+ } finally {
662
+ cb.close();
663
+ }
664
+ }
665
+
629
666
  // ---------------------------------------------------------------------------
630
667
  // WebGPU wrapper classes — matches index.js surface exactly
631
668
  // ---------------------------------------------------------------------------
@@ -640,6 +677,10 @@ class DoeGPUBuffer {
640
677
  }
641
678
 
642
679
  async mapAsync(mode, offset = 0, size = this.size) {
680
+ if (this._queue?.hasPendingSubmissions()) {
681
+ waitForSubmittedWorkDoneSync(this._instance, this._queue._native);
682
+ this._queue.markSubmittedWorkDone();
683
+ }
643
684
  bufferMapSync(this._instance, this._native, mode, offset, size);
644
685
  this._mapMode = mode;
645
686
  }
@@ -727,6 +768,15 @@ class DoeGPUQueue {
727
768
  constructor(native, instance) {
728
769
  this._native = native;
729
770
  this._instance = instance;
771
+ this._pendingSubmissions = 0;
772
+ }
773
+
774
+ hasPendingSubmissions() {
775
+ return this._pendingSubmissions > 0;
776
+ }
777
+
778
+ markSubmittedWorkDone() {
779
+ this._pendingSubmissions = 0;
730
780
  }
731
781
 
732
782
  submit(commandBuffers) {
@@ -735,6 +785,9 @@ class DoeGPUQueue {
735
785
  ptrs[i] = BigInt(commandBuffers[i]._native);
736
786
  }
737
787
  wgpu.symbols.wgpuQueueSubmit(this._native, BigInt(commandBuffers.length), ptrs);
788
+ if (commandBuffers.length > 0) {
789
+ this._pendingSubmissions += commandBuffers.length;
790
+ }
738
791
  }
739
792
 
740
793
  writeBuffer(buffer, bufferOffset, data, dataOffset = 0, size) {
@@ -750,8 +803,9 @@ class DoeGPUQueue {
750
803
  }
751
804
 
752
805
  async onSubmittedWorkDone() {
753
- // Match the Node provider contract: Doe submit commits synchronously,
754
- // and mapAsync flushes when readback synchronization is required.
806
+ if (!this.hasPendingSubmissions()) return;
807
+ waitForSubmittedWorkDoneSync(this._instance, this._native);
808
+ this.markSubmittedWorkDone();
755
809
  }
756
810
  }
757
811
 
@@ -798,15 +852,38 @@ class DoeGPURenderPipeline {
798
852
  }
799
853
 
800
854
  class DoeGPUShaderModule {
801
- constructor(native) { this._native = native; }
855
+ constructor(native, code) {
856
+ this._native = native;
857
+ this._code = code;
858
+ }
802
859
  }
803
860
 
804
861
  class DoeGPUComputePipeline {
805
- constructor(native) { this._native = native; }
862
+ constructor(native, device, explicitLayout, autoLayoutEntriesByGroup) {
863
+ this._native = native;
864
+ this._device = device;
865
+ this._explicitLayout = explicitLayout;
866
+ this._autoLayoutEntriesByGroup = autoLayoutEntriesByGroup;
867
+ this._cachedLayouts = new Map();
868
+ }
806
869
 
807
870
  getBindGroupLayout(index) {
808
- const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
809
- return new DoeGPUBindGroupLayout(layout);
871
+ if (this._explicitLayout) return this._explicitLayout;
872
+ if (this._cachedLayouts.has(index)) return this._cachedLayouts.get(index);
873
+
874
+ let layout;
875
+ if (this._autoLayoutEntriesByGroup && process.platform === "darwin") {
876
+ const entries = this._autoLayoutEntriesByGroup.get(index) ?? [];
877
+ layout = this._device.createBindGroupLayout({ entries });
878
+ } else {
879
+ const native = process.platform === "darwin"
880
+ ? wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index)
881
+ : wgpu.symbols.wgpuComputePipelineGetBindGroupLayout(this._native, index);
882
+ layout = new DoeGPUBindGroupLayout(native);
883
+ }
884
+
885
+ this._cachedLayouts.set(index, layout);
886
+ return layout;
810
887
  }
811
888
  }
812
889
 
@@ -835,7 +912,7 @@ class DoeGPUDevice {
835
912
  createBuffer(descriptor) {
836
913
  const descBytes = buildBufferDescriptor(descriptor);
837
914
  const buf = wgpu.symbols.wgpuDeviceCreateBuffer(this._native, descBytes);
838
- return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue._native);
915
+ return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue);
839
916
  }
840
917
 
841
918
  createShaderModule(descriptor) {
@@ -844,18 +921,22 @@ class DoeGPUDevice {
844
921
  const { desc, _refs } = buildShaderModuleDescriptor(code);
845
922
  const mod = wgpu.symbols.wgpuDeviceCreateShaderModule(this._native, desc);
846
923
  void _refs;
847
- return new DoeGPUShaderModule(mod);
924
+ return new DoeGPUShaderModule(mod, code);
848
925
  }
849
926
 
850
927
  createComputePipeline(descriptor) {
851
928
  const shader = descriptor.compute?.module;
852
929
  const entryPoint = descriptor.compute?.entryPoint || "main";
853
930
  const layout = descriptor.layout === "auto" ? null : descriptor.layout;
931
+ const autoLayoutEntriesByGroup = layout ? null : inferAutoBindGroupLayouts(
932
+ shader?._code || "",
933
+ globals.GPUShaderStage.COMPUTE,
934
+ );
854
935
  const { desc, _refs } = buildComputePipelineDescriptor(
855
936
  shader._native, entryPoint, layout?._native ?? null);
856
937
  const native = wgpu.symbols.wgpuDeviceCreateComputePipeline(this._native, desc);
857
938
  void _refs;
858
- return new DoeGPUComputePipeline(native);
939
+ return new DoeGPUComputePipeline(native, this, layout, autoLayoutEntriesByGroup);
859
940
  }
860
941
 
861
942
  async createComputePipelineAsync(descriptor) {
package/src/bun.js CHANGED
@@ -1,2 +1,23 @@
1
- export * from "./index.js";
2
- export { default } from "./index.js";
1
+ import * as ffi from "./bun-ffi.js";
2
+ import * as full from "./full.js";
3
+ import { createDoeNamespace } from "./doe.js";
4
+
5
+ const runtime = process.platform === "linux" ? ffi : full;
6
+
7
+ export const doe = createDoeNamespace({
8
+ requestDevice: runtime.requestDevice,
9
+ });
10
+
11
+ export const create = runtime.create;
12
+ export const globals = runtime.globals;
13
+ export const setupGlobals = runtime.setupGlobals;
14
+ export const requestAdapter = runtime.requestAdapter;
15
+ export const requestDevice = runtime.requestDevice;
16
+ export const providerInfo = runtime.providerInfo;
17
+ export const createDoeRuntime = runtime.createDoeRuntime;
18
+ export const runDawnVsDoeCompare = runtime.runDawnVsDoeCompare;
19
+
20
+ export default {
21
+ ...runtime,
22
+ doe,
23
+ };
@@ -0,0 +1,162 @@
1
+ import type {
2
+ BoundDoeNamespace,
3
+ DoeKernelDispatchOptions,
4
+ DoeNamespace,
5
+ DoeRunComputeOptions,
6
+ } from "./doe.js";
7
+ import type {
8
+ DoeRuntime,
9
+ DoeRuntimeRunResult,
10
+ ProviderInfo,
11
+ } from "./full.js";
12
+
13
+ export interface ComputeGPUBuffer {
14
+ readonly size: number;
15
+ readonly usage: number;
16
+ mapAsync(mode: number, offset?: number, size?: number): Promise<void>;
17
+ getMappedRange(offset?: number, size?: number): ArrayBuffer;
18
+ assertMappedPrefixF32?(expected: number[], count: number): boolean;
19
+ unmap(): void;
20
+ destroy(): void;
21
+ }
22
+
23
+ export interface ComputeBindGroupLayout {}
24
+
25
+ export interface ComputeBindGroup {}
26
+
27
+ export interface ComputePipelineLayout {}
28
+
29
+ export interface ComputeQuerySet {
30
+ destroy(): void;
31
+ }
32
+
33
+ export interface ComputeComputePipeline {
34
+ getBindGroupLayout(index: number): ComputeBindGroupLayout;
35
+ }
36
+
37
+ export interface ComputePassEncoder {
38
+ setPipeline(pipeline: ComputeComputePipeline): void;
39
+ setBindGroup(index: number, bindGroup: ComputeBindGroup): void;
40
+ dispatchWorkgroups(x: number, y?: number, z?: number): void;
41
+ dispatchWorkgroupsIndirect(indirectBuffer: ComputeGPUBuffer, indirectOffset?: number): void;
42
+ writeTimestamp?(querySet: ComputeQuerySet, queryIndex: number): void;
43
+ end(): void;
44
+ }
45
+
46
+ export interface ComputeCommandEncoder {
47
+ beginComputePass(descriptor?: GPUComputePassDescriptor): ComputePassEncoder;
48
+ copyBufferToBuffer(
49
+ source: ComputeGPUBuffer,
50
+ sourceOffset: number,
51
+ target: ComputeGPUBuffer,
52
+ targetOffset: number,
53
+ size: number
54
+ ): void;
55
+ resolveQuerySet?(
56
+ querySet: ComputeQuerySet,
57
+ firstQuery: number,
58
+ queryCount: number,
59
+ destination: ComputeGPUBuffer,
60
+ destinationOffset: number
61
+ ): void;
62
+ finish(): GPUCommandBuffer;
63
+ }
64
+
65
+ export interface ComputeQueue {
66
+ submit(commandBuffers: GPUCommandBuffer[]): void;
67
+ writeBuffer(
68
+ buffer: ComputeGPUBuffer,
69
+ bufferOffset: number,
70
+ data: BufferSource,
71
+ dataOffset?: number,
72
+ size?: number
73
+ ): void;
74
+ onSubmittedWorkDone(): Promise<void>;
75
+ }
76
+
77
+ export interface ComputeGPUDevice {
78
+ readonly queue: ComputeQueue;
79
+ readonly limits: GPUSupportedLimits;
80
+ readonly features: GPUSupportedFeatures;
81
+ createBuffer(descriptor: GPUBufferDescriptor): ComputeGPUBuffer;
82
+ createShaderModule(descriptor: GPUShaderModuleDescriptor): GPUShaderModule;
83
+ createComputePipeline(descriptor: GPUComputePipelineDescriptor): ComputeComputePipeline;
84
+ createComputePipelineAsync(descriptor: GPUComputePipelineDescriptor): Promise<ComputeComputePipeline>;
85
+ createBindGroupLayout(descriptor: GPUBindGroupLayoutDescriptor): ComputeBindGroupLayout;
86
+ createBindGroup(descriptor: GPUBindGroupDescriptor): ComputeBindGroup;
87
+ createPipelineLayout(descriptor: GPUPipelineLayoutDescriptor): ComputePipelineLayout;
88
+ createCommandEncoder(descriptor?: GPUCommandEncoderDescriptor): ComputeCommandEncoder;
89
+ createQuerySet?(descriptor: GPUQuerySetDescriptor): ComputeQuerySet;
90
+ destroy(): void;
91
+ }
92
+
93
+ export interface ComputeGPUAdapter {
94
+ readonly limits: GPUSupportedLimits;
95
+ readonly features: GPUSupportedFeatures;
96
+ requestDevice(descriptor?: GPUDeviceDescriptor): Promise<ComputeGPUDevice>;
97
+ destroy(): void;
98
+ }
99
+
100
+ export interface ComputeGPU {
101
+ requestAdapter(options?: GPURequestAdapterOptions): Promise<ComputeGPUAdapter | null>;
102
+ }
103
+
104
+ export interface RequestDeviceOptions {
105
+ adapterOptions?: GPURequestAdapterOptions;
106
+ deviceDescriptor?: GPUDeviceDescriptor;
107
+ createArgs?: string[] | null;
108
+ }
109
+
110
+ export interface ComputeDoeRunComputeOptions extends DoeRunComputeOptions<ComputeGPUBuffer> {}
111
+
112
+ export interface ComputeDoeKernelDispatchOptions extends DoeKernelDispatchOptions<ComputeGPUBuffer> {}
113
+
114
+ export interface ComputeDoeKernel {
115
+ readonly device: ComputeGPUDevice;
116
+ readonly entryPoint: string;
117
+ dispatch(options: ComputeDoeKernelDispatchOptions): Promise<void>;
118
+ }
119
+
120
+ export interface ComputeBoundDoeNamespace
121
+ extends BoundDoeNamespace<ComputeGPUDevice, ComputeGPUBuffer, ComputeDoeKernel, ComputeDoeRunComputeOptions> {}
122
+
123
+ export interface ComputeDoeNamespace
124
+ extends DoeNamespace<
125
+ ComputeGPUDevice,
126
+ ComputeGPUBuffer,
127
+ ComputeDoeKernel,
128
+ ComputeBoundDoeNamespace,
129
+ ComputeDoeRunComputeOptions,
130
+ RequestDeviceOptions
131
+ > {}
132
+
133
+ export const globals: Record<string, unknown>;
134
+ export function create(createArgs?: string[] | null): ComputeGPU;
135
+ export function setupGlobals(target?: object, createArgs?: string[] | null): ComputeGPU;
136
+ export function requestAdapter(
137
+ adapterOptions?: GPURequestAdapterOptions,
138
+ createArgs?: string[] | null
139
+ ): Promise<ComputeGPUAdapter | null>;
140
+ export function requestDevice(options?: RequestDeviceOptions): Promise<ComputeGPUDevice>;
141
+ export function providerInfo(): ProviderInfo;
142
+ export function createDoeRuntime(options?: {
143
+ binPath?: string;
144
+ libPath?: string;
145
+ }): DoeRuntime;
146
+ export function runDawnVsDoeCompare(options: Record<string, unknown>): DoeRuntimeRunResult;
147
+
148
+ export const doe: ComputeDoeNamespace;
149
+
150
+ declare const _default: {
151
+ create: typeof create;
152
+ globals: typeof globals;
153
+ setupGlobals: typeof setupGlobals;
154
+ requestAdapter: typeof requestAdapter;
155
+ requestDevice: typeof requestDevice;
156
+ providerInfo: typeof providerInfo;
157
+ createDoeRuntime: typeof createDoeRuntime;
158
+ runDawnVsDoeCompare: typeof runDawnVsDoeCompare;
159
+ doe: ComputeDoeNamespace;
160
+ };
161
+
162
+ export default _default;