@simulatte/webgpu 0.2.4 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +263 -71
  3. package/api-contract.md +70 -139
  4. package/assets/package-layers.svg +63 -0
  5. package/examples/direct-webgpu/compute-dispatch.js +66 -0
  6. package/examples/direct-webgpu/explicit-bind-group.js +85 -0
  7. package/examples/direct-webgpu/request-device.js +10 -0
  8. package/examples/doe-api/buffers-readback.js +9 -0
  9. package/examples/doe-api/compile-and-dispatch.js +30 -0
  10. package/examples/doe-api/compute-dispatch.js +25 -0
  11. package/examples/doe-routines/compute-once-like-input.js +36 -0
  12. package/examples/doe-routines/compute-once-matmul.js +53 -0
  13. package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
  14. package/examples/doe-routines/compute-once.js +23 -0
  15. package/headless-webgpu-comparison.md +2 -2
  16. package/layering-plan.md +1 -1
  17. package/native/doe_napi.c +102 -12
  18. package/package.json +2 -1
  19. package/prebuilds/darwin-arm64/doe_napi.node +0 -0
  20. package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
  21. package/prebuilds/darwin-arm64/metadata.json +6 -6
  22. package/prebuilds/linux-x64/doe_napi.node +0 -0
  23. package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
  24. package/prebuilds/linux-x64/metadata.json +5 -5
  25. package/scripts/generate-readme-assets.js +79 -6
  26. package/scripts/prebuild.js +23 -19
  27. package/src/auto_bind_group_layout.js +32 -0
  28. package/src/bun-ffi.js +93 -12
  29. package/src/bun.js +23 -2
  30. package/src/compute.d.ts +2 -1
  31. package/src/compute.js +671 -33
  32. package/src/doe.d.ts +127 -27
  33. package/src/doe.js +480 -114
  34. package/src/full.d.ts +8 -1
  35. package/src/full.js +28 -3
  36. package/src/index.js +1013 -38
package/api-contract.md CHANGED
@@ -3,16 +3,47 @@
3
3
  Contract version: `v1`
4
4
 
5
5
  Scope: current headless WebGPU package contract for Node.js and Bun, with a
6
- default `full` surface, an explicit `compute` subpath, and Doe runtime helpers
7
- used by benchmarking, CI, and artifact-backed comparison workflows.
6
+ default `full` surface, an explicit `compute` subpath, and the Doe API / Doe
7
+ routines surface used by benchmarking, CI, and artifact-backed comparison
8
+ workflows.
9
+
10
+ Terminology in this contract is explicit:
11
+
12
+ - `Doe runtime`
13
+ the Zig/native WebGPU runtime underneath the package
14
+ - `Doe API`
15
+ the explicit JS convenience surface under `doe.bind(...)`, `gpu.buffers.*`,
16
+ `gpu.compute.run(...)`, and `gpu.compute.compile(...)`
17
+ - `Doe routines`
18
+ the narrower, more opinionated JS flows layered on that same runtime;
19
+ currently `gpu.compute.once(...)`
8
20
 
9
21
  For the current `compute` vs `full` support split, see
10
22
  [`./support-contracts.md`](./support-contracts.md).
11
23
 
24
+ Exact type and method shapes live in:
25
+
26
+ - [`./src/full.d.ts`](./src/full.d.ts)
27
+ - [`./src/compute.d.ts`](./src/compute.d.ts)
28
+ - [`./src/doe.d.ts`](./src/doe.d.ts)
29
+
12
30
  This contract covers package-surface GPU access, provider metadata, and helper
13
31
  entrypoints. It does not promise DOM/canvas ownership or browser-process
14
32
  parity.
15
33
 
34
+ ## API styles
35
+
36
+ The current package surface is organized around three API styles:
37
+
38
+ - `Direct WebGPU`
39
+ raw `requestAdapter(...)`, `requestDevice(...)`, and direct `device.*` usage
40
+ - `Doe API`
41
+ the package's explicit JS convenience surface under `doe.bind(...)`,
42
+ `gpu.buffers.*`, `gpu.compute.run(...)`, and `gpu.compute.compile(...)`
43
+ - `Doe routines`
44
+ the package's more opinionated precomposed flows; currently
45
+ `gpu.compute.once(...)`
46
+
16
47
  ## Export surfaces
17
48
 
18
49
  ### `@simulatte/webgpu`
@@ -22,8 +53,8 @@ Default package surface.
22
53
  Contract:
23
54
 
24
55
  - headless `full` surface
25
- - includes compute plus render/sampler/surface APIs already exposed by the package runtime
26
- - also exports the `doe` ergonomic namespace
56
+ - includes compute plus render/sampler/surface APIs already exposed by the Doe runtime package surface
57
+ - also exports the shared `doe` namespace for the Doe API and Doe routines surface
27
58
 
28
59
  ### `@simulatte/webgpu/compute`
29
60
 
@@ -33,7 +64,7 @@ Contract:
33
64
 
34
65
  - sized for AI workloads and other buffer/dispatch-heavy headless execution
35
66
  - excludes render/sampler/surface methods from the public JS facade
36
- - also exports the same `doe` ergonomic namespace
67
+ - also exports the same `doe` namespace for the Doe API and Doe routines surface
37
68
 
38
69
  ## Shared runtime API
39
70
 
@@ -42,175 +73,75 @@ Modules:
42
73
  - `@simulatte/webgpu`
43
74
  - `@simulatte/webgpu/compute`
44
75
 
45
- ### `create(createArgs?)`
46
-
47
- Input:
48
-
49
- - `createArgs?: string[]` (currently ignored by the default Doe-native provider)
50
-
51
- Behavior:
52
-
53
- - loads the Doe-native N-API addon and `libwebgpu_doe`
54
- - returns a GPU object backed by the in-tree Doe provider
55
-
56
- Output:
57
-
58
- - `GPU` object with `requestAdapter(...)`
59
-
60
- ### `globals`
61
-
62
- Output:
63
-
64
- - provider globals object suitable for `Object.assign(globalThis, globals)`
65
-
66
- ### `setupGlobals(target?, createArgs?)`
67
-
68
- Input:
69
-
70
- - `target?: object` (default: `globalThis`)
71
- - `createArgs?: string[]`
72
-
73
- Behavior:
74
-
75
- - installs provider globals if missing
76
- - installs `navigator.gpu` if missing
77
-
78
- Output:
76
+ ### Top-level package API
79
77
 
80
- - `GPU` object
78
+ The exact signatures are defined in the `.d.ts` files above. At the contract
79
+ level:
81
80
 
82
- ### `requestAdapter(adapterOptions?, createArgs?)`
81
+ - `create(...)` loads the Doe-native addon/runtime and returns a package-local
82
+ `GPU` object.
83
+ - `globals` exposes provider globals suitable for `Object.assign(...)` or
84
+ bootstrap wiring.
85
+ - `setupGlobals(...)` installs globals and `navigator.gpu` when missing.
86
+ - `requestAdapter(...)` and `requestDevice(...)` are the `Direct WebGPU` entry
87
+ points.
83
88
 
84
- Output:
85
-
86
- - `Promise<GPUAdapter | null>`
87
-
88
- ### `requestDevice(options?)`
89
-
90
- Input:
91
-
92
- - `options.adapterOptions?: object`
93
- - `options.deviceDescriptor?: object`
94
- - `options.createArgs?: string[]`
95
-
96
- Output:
97
-
98
- - `Promise<GPUDevice>`
99
-
100
- On `@simulatte/webgpu/compute`, the returned device is a compute-only facade:
89
+ On `@simulatte/webgpu/compute`, the returned device is intentionally
90
+ compute-only:
101
91
 
102
92
  - buffer / bind group / compute pipeline / command encoder / queue methods are available
103
93
  - render / sampler / surface methods are intentionally absent from the facade
104
94
 
105
95
  ### `providerInfo()`
106
96
 
107
- Output object:
108
-
109
- - `module: string`
110
- - `loaded: boolean`
111
- - `loadError: string`
112
- - `defaultCreateArgs: string[]`
113
- - `doeNative: boolean`
114
- - `libraryFlavor: string`
115
- - `doeLibraryPath: string`
116
- - `buildMetadataSource: string`
117
- - `buildMetadataPath: string`
118
- - `leanVerifiedBuild: boolean | null`
119
- - `proofArtifactSha256: string | null`
120
-
121
97
  Behavior:
122
98
 
123
99
  - reports package-surface library provenance when prebuild metadata or Zig build
124
100
  metadata is available
125
101
  - does not guess: if metadata is unavailable, `leanVerifiedBuild` is `null`
102
+ - reports whether the Doe-native path is loaded and where build metadata came from
126
103
 
127
104
  ### `doe`
128
105
 
129
- Output object:
130
-
131
- - `bind(device)`
132
- - `createBuffer(device, options)`
133
- - `createBufferFromData(device, data, options?)`
134
- - `readBuffer(device, buffer, TypedArray, options?)`
135
- - `runCompute(device, options)`
136
- - `compileCompute(device, options)`
137
-
138
106
  Behavior:
139
107
 
140
- - provides an ergonomic JS surface for common headless compute tasks
108
+ - provides the `Doe API` and `Doe routines` surface for common headless
109
+ compute tasks
110
+ - the exported `doe` namespace is the JS convenience surface, distinct from
111
+ the underlying Doe runtime
112
+ - `requestDevice(options?)` resolves the package-local `requestDevice(...)` and returns
113
+ the bound helper object directly
141
114
  - supports both static helper calls and `doe.bind(device)` for device-bound workflows
142
- - infers `runCompute(...).bindings` access from Doe helper-created buffer usage when that
143
- usage maps to one bindable access mode (`uniform`, `storage-read`, `storage-readwrite`)
115
+ - helper methods are grouped under `buffers.*` and `compute.*`
116
+ - `buffers.*`, `compute.run(...)`, and `compute.compile(...)` are the main
117
+ `Doe API` surface
118
+ - `compute.once(...)` is the first `Doe routines` path and stays intentionally
119
+ narrow: typed-array/headless one-call execution, not a replacement for
120
+ explicit reusable resource ownership
121
+ - infers `compute.run(...).bindings` access from Doe helper-created buffer usage when that
122
+ usage maps to one bindable access mode (`uniform`, `storageRead`, `storageReadWrite`)
123
+ - `compute.once(...)` accepts Doe usage tokens only; raw numeric WebGPU usage flags stay on
124
+ the more explicit `Doe API` surface
144
125
  - fails fast for bare bindings that do not carry Doe helper usage metadata or whose
145
126
  usage is non-bindable/ambiguous; callers must pass `{ buffer, access }` explicitly
146
127
  - additive only; it does not replace the raw WebGPU-facing package API
147
128
 
148
129
  ### `createDoeRuntime(options?)`
149
130
 
150
- Input:
151
-
152
- - `options.binPath?: string`
153
- - `options.libPath?: string`
154
-
155
- Output object:
156
-
157
- - `binPath: string`
158
- - `libPath: string | null`
159
- - `runRaw(args: string[], spawnOptions?): RunResult`
160
- - `runBench(options: BenchOptions): BenchResult`
161
-
162
- `BenchOptions`:
163
-
164
- - `commandsPath: string` (required)
165
- - `quirksPath?: string`
166
- - `vendor?: string`
167
- - `api?: string`
168
- - `family?: string`
169
- - `driver?: string`
170
- - `traceJsonlPath?: string`
171
- - `traceMetaPath?: string`
172
- - `uploadBufferUsage?: string`
173
- - `uploadSubmitEvery?: number`
174
- - `queueWaitMode?: string`
175
- - `queueSyncMode?: string`
176
- - `extraArgs?: string[]`
177
-
178
- `RunResult`:
179
-
180
- - `ok: boolean`
181
- - `exitCode: number`
182
- - `stdout: string`
183
- - `stderr: string`
184
- - `signal: string | null`
185
- - `command: string[]`
186
-
187
- `BenchResult` extends `RunResult` with:
131
+ Behavior:
188
132
 
189
- - `traceJsonlPath: string | null`
190
- - `traceMetaPath: string | null`
191
- - `traceMeta: object | null`
133
+ - returns the local Doe runtime/CLI wrapper used for command-stream execution
134
+ and benchmark orchestration from Node/Bun environments
135
+ - preserves explicit file-path ownership for the binary/library location rather
136
+ than hiding them behind package-only assumptions
192
137
 
193
138
  ### `runDawnVsDoeCompare(options)`
194
139
 
195
- Input:
196
-
197
- - `repoRoot?: string`
198
- - `compareScriptPath?: string`
199
- - `pythonBin?: string`
200
- - `configPath?: string`
201
- - `outPath?: string`
202
- - `extraArgs?: string[]`
203
- - `env?: Record<string, string>`
204
-
205
140
  Behavior:
206
141
 
207
142
  - wraps `bench/compare_dawn_vs_doe.py`
208
143
  - requires either `configPath` or `--config` in `extraArgs`
209
144
 
210
- Output:
211
-
212
- - `RunResult`
213
-
214
145
  ## CLI contract
215
146
 
216
147
  ### `fawn-webgpu-bench`
@@ -0,0 +1,63 @@
1
+ <!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->
2
+ <svg xmlns="http://www.w3.org/2000/svg" width="1200" height="470" viewBox="0 0 1200 470" role="img" aria-labelledby="layers-title layers-desc">
3
+ <title id="layers-title">@simulatte/webgpu layered package graph</title>
4
+ <desc id="layers-desc">Layered package graph showing direct WebGPU, Doe API, and Doe routines over the same package surfaces.</desc>
5
+ <defs>
6
+ <linearGradient id="layers-bg" x1="0%" y1="0%" x2="100%" y2="100%">
7
+ <stop offset="0%" stop-color="#050816"/>
8
+ <stop offset="100%" stop-color="#140c1f"/>
9
+ </linearGradient>
10
+ <radialGradient id="layers-glow-top" cx="25%" cy="18%" r="55%">
11
+ <stop offset="0%" stop-color="#ef444430"/>
12
+ <stop offset="55%" stop-color="#7c3aed18"/>
13
+ <stop offset="100%" stop-color="#00000000"/>
14
+ </radialGradient>
15
+ <radialGradient id="layers-glow-bottom" cx="78%" cy="84%" r="52%">
16
+ <stop offset="0%" stop-color="#f59e0b26"/>
17
+ <stop offset="60%" stop-color="#f9731618"/>
18
+ <stop offset="100%" stop-color="#00000000"/>
19
+ </radialGradient>
20
+ <linearGradient id="layers-root" x1="0%" y1="0%" x2="100%" y2="100%">
21
+ <stop offset="0%" stop-color="#7c3aed"/>
22
+ <stop offset="100%" stop-color="#ef4444"/>
23
+ </linearGradient>
24
+ <linearGradient id="layers-direct" x1="0%" y1="0%" x2="100%" y2="100%">
25
+ <stop offset="0%" stop-color="#ef4444"/>
26
+ <stop offset="100%" stop-color="#f97316"/>
27
+ </linearGradient>
28
+ <linearGradient id="layers-api" x1="0%" y1="0%" x2="100%" y2="100%">
29
+ <stop offset="0%" stop-color="#f97316"/>
30
+ <stop offset="100%" stop-color="#f59e0b"/>
31
+ </linearGradient>
32
+ <linearGradient id="layers-routines" x1="0%" y1="0%" x2="100%" y2="100%">
33
+ <stop offset="0%" stop-color="#f59e0b"/>
34
+ <stop offset="100%" stop-color="#eab308"/>
35
+ </linearGradient>
36
+ <filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
37
+ <feDropShadow dx="0" dy="10" stdDeviation="14" flood-color="#000000" flood-opacity="0.32"/>
38
+ </filter>
39
+ <style>
40
+ .title { font: 700 34px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #ffffff; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
41
+ .subtitle { font: 500 18px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #cbd5e1; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
42
+ .nodeTitle { font: 700 22px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #ffffff; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
43
+ .box { stroke-width: 2.5; filter: url(#shadow); }
44
+ </style>
45
+ </defs>
46
+ <rect width="1200" height="470" fill="url(#layers-bg)"/>
47
+ <rect width="1200" height="470" fill="url(#layers-glow-top)"/>
48
+ <rect width="1200" height="470" fill="url(#layers-glow-bottom)"/>
49
+ <text x="64" y="62" class="title">Same package, four layers</text>
50
+ <text x="64" y="94" class="subtitle">The package surface stays the same while the API gets progressively higher-level.</text>
51
+
52
+ <rect x="170" y="122" width="860" height="64" rx="20" fill="url(#layers-root)" stroke="#c4b5fd" class="box"/>
53
+ <text x="600" y="162" text-anchor="middle" class="nodeTitle">@simulatte/webgpu / @simulatte/webgpu/compute</text>
54
+
55
+ <rect x="220" y="222" width="760" height="52" rx="18" fill="url(#layers-direct)" stroke="#fca5a5" class="box"/>
56
+ <text x="600" y="255" text-anchor="middle" class="nodeTitle">Direct WebGPU</text>
57
+
58
+ <rect x="280" y="310" width="640" height="52" rx="18" fill="url(#layers-api)" stroke="#fdba74" class="box"/>
59
+ <text x="600" y="343" text-anchor="middle" class="nodeTitle">Doe API</text>
60
+
61
+ <rect x="360" y="398" width="480" height="52" rx="18" fill="url(#layers-routines)" stroke="#fde68a" class="box"/>
62
+ <text x="600" y="431" text-anchor="middle" class="nodeTitle">Doe routines</text>
63
+ </svg>
@@ -0,0 +1,66 @@
1
+ import { globals, requestDevice } from "@simulatte/webgpu";
2
+
3
+ const device = await requestDevice();
4
+
5
+ const input = new Float32Array([1, 2, 3, 4]);
6
+ const inputBuffer = device.createBuffer({
7
+ size: input.byteLength,
8
+ usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_DST,
9
+ });
10
+ device.queue.writeBuffer(inputBuffer, 0, input);
11
+
12
+ const outputBuffer = device.createBuffer({
13
+ size: input.byteLength,
14
+ usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_SRC,
15
+ });
16
+
17
+ const readbackBuffer = device.createBuffer({
18
+ size: input.byteLength,
19
+ usage: globals.GPUBufferUsage.COPY_DST | globals.GPUBufferUsage.MAP_READ,
20
+ });
21
+
22
+ const shader = device.createShaderModule({
23
+ code: `
24
+ @group(0) @binding(0) var<storage, read> src: array<f32>;
25
+ @group(0) @binding(1) var<storage, read_write> dst: array<f32>;
26
+
27
+ @compute @workgroup_size(4)
28
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
29
+ let i = gid.x;
30
+ dst[i] = src[i] * 2.0;
31
+ }
32
+ `,
33
+ });
34
+
35
+ const pipeline = device.createComputePipeline({
36
+ layout: "auto",
37
+ compute: {
38
+ module: shader,
39
+ entryPoint: "main",
40
+ },
41
+ });
42
+
43
+ const bindGroup = device.createBindGroup({
44
+ layout: pipeline.getBindGroupLayout(0),
45
+ entries: [
46
+ { binding: 0, resource: { buffer: inputBuffer } },
47
+ { binding: 1, resource: { buffer: outputBuffer } },
48
+ ],
49
+ });
50
+
51
+ const encoder = device.createCommandEncoder();
52
+ const pass = encoder.beginComputePass();
53
+ pass.setPipeline(pipeline);
54
+ pass.setBindGroup(0, bindGroup);
55
+ pass.dispatchWorkgroups(1);
56
+ pass.end();
57
+ encoder.copyBufferToBuffer(outputBuffer, 0, readbackBuffer, 0, input.byteLength);
58
+
59
+ device.queue.submit([encoder.finish()]);
60
+ await device.queue.onSubmittedWorkDone();
61
+
62
+ await readbackBuffer.mapAsync(globals.GPUMapMode.READ);
63
+ const result = new Float32Array(readbackBuffer.getMappedRange().slice(0));
64
+ readbackBuffer.unmap();
65
+
66
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,85 @@
1
+ import { globals, requestDevice } from "@simulatte/webgpu";
2
+
3
+ const device = await requestDevice();
4
+
5
+ const input = new Float32Array([1, 2, 3, 4]);
6
+ const inputBuffer = device.createBuffer({
7
+ size: input.byteLength,
8
+ usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_DST,
9
+ });
10
+ device.queue.writeBuffer(inputBuffer, 0, input);
11
+
12
+ const outputBuffer = device.createBuffer({
13
+ size: input.byteLength,
14
+ usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_SRC,
15
+ });
16
+
17
+ const readbackBuffer = device.createBuffer({
18
+ size: input.byteLength,
19
+ usage: globals.GPUBufferUsage.COPY_DST | globals.GPUBufferUsage.MAP_READ,
20
+ });
21
+
22
+ const shader = device.createShaderModule({
23
+ code: `
24
+ @group(0) @binding(0) var<storage, read> src: array<f32>;
25
+ @group(0) @binding(1) var<storage, read_write> dst: array<f32>;
26
+
27
+ @compute @workgroup_size(4)
28
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
29
+ let i = gid.x;
30
+ dst[i] = src[i] * 4.0;
31
+ }
32
+ `,
33
+ });
34
+
35
+ const bindGroupLayout = device.createBindGroupLayout({
36
+ entries: [
37
+ {
38
+ binding: 0,
39
+ visibility: globals.GPUShaderStage.COMPUTE,
40
+ buffer: { type: "read-only-storage" },
41
+ },
42
+ {
43
+ binding: 1,
44
+ visibility: globals.GPUShaderStage.COMPUTE,
45
+ buffer: { type: "storage" },
46
+ },
47
+ ],
48
+ });
49
+
50
+ const pipelineLayout = device.createPipelineLayout({
51
+ bindGroupLayouts: [bindGroupLayout],
52
+ });
53
+
54
+ const pipeline = device.createComputePipeline({
55
+ layout: pipelineLayout,
56
+ compute: {
57
+ module: shader,
58
+ entryPoint: "main",
59
+ },
60
+ });
61
+
62
+ const bindGroup = device.createBindGroup({
63
+ layout: bindGroupLayout,
64
+ entries: [
65
+ { binding: 0, resource: { buffer: inputBuffer } },
66
+ { binding: 1, resource: { buffer: outputBuffer } },
67
+ ],
68
+ });
69
+
70
+ const encoder = device.createCommandEncoder();
71
+ const pass = encoder.beginComputePass();
72
+ pass.setPipeline(pipeline);
73
+ pass.setBindGroup(0, bindGroup);
74
+ pass.dispatchWorkgroups(1);
75
+ pass.end();
76
+ encoder.copyBufferToBuffer(outputBuffer, 0, readbackBuffer, 0, input.byteLength);
77
+
78
+ device.queue.submit([encoder.finish()]);
79
+ await device.queue.onSubmittedWorkDone();
80
+
81
+ await readbackBuffer.mapAsync(globals.GPUMapMode.READ);
82
+ const result = new Float32Array(readbackBuffer.getMappedRange().slice(0));
83
+ readbackBuffer.unmap();
84
+
85
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,10 @@
1
+ import { requestDevice } from "@simulatte/webgpu";
2
+
3
+ const device = await requestDevice();
4
+
5
+ console.log(JSON.stringify({
6
+ createBuffer: typeof device.createBuffer === "function",
7
+ createComputePipeline: typeof device.createComputePipeline === "function",
8
+ createRenderPipeline: typeof device.createRenderPipeline === "function",
9
+ writeBuffer: typeof device.queue?.writeBuffer === "function",
10
+ }));
@@ -0,0 +1,9 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+ const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]), {
5
+ usage: ["storageRead", "readback"],
6
+ });
7
+
8
+ const result = await gpu.buffers.read(src, Float32Array);
9
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,30 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+ const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]));
5
+ const dst = gpu.buffers.like(src, {
6
+ usage: "storageReadWrite",
7
+ });
8
+
9
+ const kernel = gpu.compute.compile({
10
+ code: `
11
+ @group(0) @binding(0) var<storage, read> src: array<f32>;
12
+ @group(0) @binding(1) var<storage, read_write> dst: array<f32>;
13
+
14
+ @compute @workgroup_size(4)
15
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
16
+ let i = gid.x;
17
+ dst[i] = src[i] * 5.0;
18
+ }
19
+ `,
20
+ bindings: [src, dst],
21
+ workgroups: 1,
22
+ });
23
+
24
+ await kernel.dispatch({
25
+ bindings: [src, dst],
26
+ workgroups: 1,
27
+ });
28
+
29
+ const result = await gpu.buffers.read(dst, Float32Array);
30
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,25 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+ const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]));
5
+ const dst = gpu.buffers.like(src, {
6
+ usage: "storageReadWrite",
7
+ });
8
+
9
+ await gpu.compute.run({
10
+ code: `
11
+ @group(0) @binding(0) var<storage, read> src: array<f32>;
12
+ @group(0) @binding(1) var<storage, read_write> dst: array<f32>;
13
+
14
+ @compute @workgroup_size(4)
15
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
16
+ let i = gid.x;
17
+ dst[i] = src[i] * 2.0;
18
+ }
19
+ `,
20
+ bindings: [src, dst],
21
+ workgroups: 1,
22
+ });
23
+
24
+ const result = await gpu.buffers.read(dst, Float32Array);
25
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,36 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+
5
+ const result = await gpu.compute.once({
6
+ code: `
7
+ struct Scale {
8
+ value: f32,
9
+ };
10
+
11
+ @group(0) @binding(0) var<uniform> scale: Scale;
12
+ @group(0) @binding(1) var<storage, read> src: array<f32>;
13
+ @group(0) @binding(2) var<storage, read_write> dst: array<f32>;
14
+
15
+ @compute @workgroup_size(4)
16
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
17
+ let i = gid.x;
18
+ dst[i] = src[i] * scale.value;
19
+ }
20
+ `,
21
+ inputs: [
22
+ {
23
+ data: new Float32Array([2]),
24
+ usage: "uniform",
25
+ access: "uniform",
26
+ },
27
+ new Float32Array([1, 2, 3, 4]),
28
+ ],
29
+ output: {
30
+ type: Float32Array,
31
+ likeInput: 1,
32
+ },
33
+ workgroups: [1, 1],
34
+ });
35
+
36
+ console.log(JSON.stringify(Array.from(result)));
@@ -0,0 +1,53 @@
1
+ import { doe } from "@simulatte/webgpu/compute";
2
+
3
+ const gpu = await doe.requestDevice();
4
+ const M = 256;
5
+ const K = 512;
6
+ const N = 256;
7
+
8
+ const lhs = Float32Array.from({ length: M * K }, (_, i) => (i % 17) / 17);
9
+ const rhs = Float32Array.from({ length: K * N }, (_, i) => (i % 13) / 13);
10
+ const dims = new Uint32Array([M, K, N, 0]);
11
+
12
+ const result = await gpu.compute.once({
13
+ code: `
14
+ struct Dims {
15
+ m: u32,
16
+ k: u32,
17
+ n: u32,
18
+ _pad: u32,
19
+ };
20
+
21
+ @group(0) @binding(0) var<uniform> dims: Dims;
22
+ @group(0) @binding(1) var<storage, read> lhs: array<f32>;
23
+ @group(0) @binding(2) var<storage, read> rhs: array<f32>;
24
+ @group(0) @binding(3) var<storage, read_write> out: array<f32>;
25
+
26
+ @compute @workgroup_size(8, 8)
27
+ fn main(@builtin(global_invocation_id) gid: vec3u) {
28
+ let row = gid.y;
29
+ let col = gid.x;
30
+ if (row >= dims.m || col >= dims.n) {
31
+ return;
32
+ }
33
+
34
+ var acc = 0.0;
35
+ for (var i = 0u; i < dims.k; i = i + 1u) {
36
+ acc += lhs[row * dims.k + i] * rhs[i * dims.n + col];
37
+ }
38
+ out[row * dims.n + col] = acc;
39
+ }
40
+ `,
41
+ inputs: [
42
+ { data: dims, usage: "uniform", access: "uniform" },
43
+ lhs,
44
+ rhs,
45
+ ],
46
+ output: {
47
+ type: Float32Array,
48
+ size: M * N * Float32Array.BYTES_PER_ELEMENT,
49
+ },
50
+ workgroups: [Math.ceil(N / 8), Math.ceil(M / 8)],
51
+ });
52
+
53
+ console.log(result.subarray(0, 8));