@simulatte/webgpu 0.2.4 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +263 -71
- package/api-contract.md +70 -139
- package/assets/package-layers.svg +63 -0
- package/examples/direct-webgpu/compute-dispatch.js +66 -0
- package/examples/direct-webgpu/explicit-bind-group.js +85 -0
- package/examples/direct-webgpu/request-device.js +10 -0
- package/examples/doe-api/buffers-readback.js +9 -0
- package/examples/doe-api/compile-and-dispatch.js +30 -0
- package/examples/doe-api/compute-dispatch.js +25 -0
- package/examples/doe-routines/compute-once-like-input.js +36 -0
- package/examples/doe-routines/compute-once-matmul.js +53 -0
- package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
- package/examples/doe-routines/compute-once.js +23 -0
- package/headless-webgpu-comparison.md +2 -2
- package/layering-plan.md +1 -1
- package/native/doe_napi.c +102 -12
- package/package.json +2 -1
- package/prebuilds/darwin-arm64/doe_napi.node +0 -0
- package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
- package/prebuilds/darwin-arm64/metadata.json +6 -6
- package/prebuilds/linux-x64/doe_napi.node +0 -0
- package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
- package/prebuilds/linux-x64/metadata.json +5 -5
- package/scripts/generate-readme-assets.js +79 -6
- package/scripts/prebuild.js +23 -19
- package/src/auto_bind_group_layout.js +32 -0
- package/src/bun-ffi.js +93 -12
- package/src/bun.js +23 -2
- package/src/compute.d.ts +2 -1
- package/src/compute.js +671 -33
- package/src/doe.d.ts +127 -27
- package/src/doe.js +480 -114
- package/src/full.d.ts +8 -1
- package/src/full.js +28 -3
- package/src/index.js +1013 -38
package/api-contract.md
CHANGED
|
@@ -3,16 +3,47 @@
|
|
|
3
3
|
Contract version: `v1`
|
|
4
4
|
|
|
5
5
|
Scope: current headless WebGPU package contract for Node.js and Bun, with a
|
|
6
|
-
default `full` surface, an explicit `compute` subpath, and Doe
|
|
7
|
-
used by benchmarking, CI, and artifact-backed comparison
|
|
6
|
+
default `full` surface, an explicit `compute` subpath, and the Doe API / Doe
|
|
7
|
+
routines surface used by benchmarking, CI, and artifact-backed comparison
|
|
8
|
+
workflows.
|
|
9
|
+
|
|
10
|
+
Terminology in this contract is explicit:
|
|
11
|
+
|
|
12
|
+
- `Doe runtime`
|
|
13
|
+
the Zig/native WebGPU runtime underneath the package
|
|
14
|
+
- `Doe API`
|
|
15
|
+
the explicit JS convenience surface under `doe.bind(...)`, `gpu.buffers.*`,
|
|
16
|
+
`gpu.compute.run(...)`, and `gpu.compute.compile(...)`
|
|
17
|
+
- `Doe routines`
|
|
18
|
+
the narrower, more opinionated JS flows layered on that same runtime;
|
|
19
|
+
currently `gpu.compute.once(...)`
|
|
8
20
|
|
|
9
21
|
For the current `compute` vs `full` support split, see
|
|
10
22
|
[`./support-contracts.md`](./support-contracts.md).
|
|
11
23
|
|
|
24
|
+
Exact type and method shapes live in:
|
|
25
|
+
|
|
26
|
+
- [`./src/full.d.ts`](./src/full.d.ts)
|
|
27
|
+
- [`./src/compute.d.ts`](./src/compute.d.ts)
|
|
28
|
+
- [`./src/doe.d.ts`](./src/doe.d.ts)
|
|
29
|
+
|
|
12
30
|
This contract covers package-surface GPU access, provider metadata, and helper
|
|
13
31
|
entrypoints. It does not promise DOM/canvas ownership or browser-process
|
|
14
32
|
parity.
|
|
15
33
|
|
|
34
|
+
## API styles
|
|
35
|
+
|
|
36
|
+
The current package surface is organized around three API styles:
|
|
37
|
+
|
|
38
|
+
- `Direct WebGPU`
|
|
39
|
+
raw `requestAdapter(...)`, `requestDevice(...)`, and direct `device.*` usage
|
|
40
|
+
- `Doe API`
|
|
41
|
+
the package's explicit JS convenience surface under `doe.bind(...)`,
|
|
42
|
+
`gpu.buffers.*`, `gpu.compute.run(...)`, and `gpu.compute.compile(...)`
|
|
43
|
+
- `Doe routines`
|
|
44
|
+
the package's more opinionated precomposed flows; currently
|
|
45
|
+
`gpu.compute.once(...)`
|
|
46
|
+
|
|
16
47
|
## Export surfaces
|
|
17
48
|
|
|
18
49
|
### `@simulatte/webgpu`
|
|
@@ -22,8 +53,8 @@ Default package surface.
|
|
|
22
53
|
Contract:
|
|
23
54
|
|
|
24
55
|
- headless `full` surface
|
|
25
|
-
- includes compute plus render/sampler/surface APIs already exposed by the package
|
|
26
|
-
- also exports the `doe`
|
|
56
|
+
- includes compute plus render/sampler/surface APIs already exposed by the Doe runtime package surface
|
|
57
|
+
- also exports the shared `doe` namespace for the Doe API and Doe routines surface
|
|
27
58
|
|
|
28
59
|
### `@simulatte/webgpu/compute`
|
|
29
60
|
|
|
@@ -33,7 +64,7 @@ Contract:
|
|
|
33
64
|
|
|
34
65
|
- sized for AI workloads and other buffer/dispatch-heavy headless execution
|
|
35
66
|
- excludes render/sampler/surface methods from the public JS facade
|
|
36
|
-
- also exports the same `doe`
|
|
67
|
+
- also exports the same `doe` namespace for the Doe API and Doe routines surface
|
|
37
68
|
|
|
38
69
|
## Shared runtime API
|
|
39
70
|
|
|
@@ -42,175 +73,75 @@ Modules:
|
|
|
42
73
|
- `@simulatte/webgpu`
|
|
43
74
|
- `@simulatte/webgpu/compute`
|
|
44
75
|
|
|
45
|
-
###
|
|
46
|
-
|
|
47
|
-
Input:
|
|
48
|
-
|
|
49
|
-
- `createArgs?: string[]` (currently ignored by the default Doe-native provider)
|
|
50
|
-
|
|
51
|
-
Behavior:
|
|
52
|
-
|
|
53
|
-
- loads the Doe-native N-API addon and `libwebgpu_doe`
|
|
54
|
-
- returns a GPU object backed by the in-tree Doe provider
|
|
55
|
-
|
|
56
|
-
Output:
|
|
57
|
-
|
|
58
|
-
- `GPU` object with `requestAdapter(...)`
|
|
59
|
-
|
|
60
|
-
### `globals`
|
|
61
|
-
|
|
62
|
-
Output:
|
|
63
|
-
|
|
64
|
-
- provider globals object suitable for `Object.assign(globalThis, globals)`
|
|
65
|
-
|
|
66
|
-
### `setupGlobals(target?, createArgs?)`
|
|
67
|
-
|
|
68
|
-
Input:
|
|
69
|
-
|
|
70
|
-
- `target?: object` (default: `globalThis`)
|
|
71
|
-
- `createArgs?: string[]`
|
|
72
|
-
|
|
73
|
-
Behavior:
|
|
74
|
-
|
|
75
|
-
- installs provider globals if missing
|
|
76
|
-
- installs `navigator.gpu` if missing
|
|
77
|
-
|
|
78
|
-
Output:
|
|
76
|
+
### Top-level package API
|
|
79
77
|
|
|
80
|
-
|
|
78
|
+
The exact signatures are defined in the `.d.ts` files above. At the contract
|
|
79
|
+
level:
|
|
81
80
|
|
|
82
|
-
|
|
81
|
+
- `create(...)` loads the Doe-native addon/runtime and returns a package-local
|
|
82
|
+
`GPU` object.
|
|
83
|
+
- `globals` exposes provider globals suitable for `Object.assign(...)` or
|
|
84
|
+
bootstrap wiring.
|
|
85
|
+
- `setupGlobals(...)` installs globals and `navigator.gpu` when missing.
|
|
86
|
+
- `requestAdapter(...)` and `requestDevice(...)` are the `Direct WebGPU` entry
|
|
87
|
+
points.
|
|
83
88
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
- `Promise<GPUAdapter | null>`
|
|
87
|
-
|
|
88
|
-
### `requestDevice(options?)`
|
|
89
|
-
|
|
90
|
-
Input:
|
|
91
|
-
|
|
92
|
-
- `options.adapterOptions?: object`
|
|
93
|
-
- `options.deviceDescriptor?: object`
|
|
94
|
-
- `options.createArgs?: string[]`
|
|
95
|
-
|
|
96
|
-
Output:
|
|
97
|
-
|
|
98
|
-
- `Promise<GPUDevice>`
|
|
99
|
-
|
|
100
|
-
On `@simulatte/webgpu/compute`, the returned device is a compute-only facade:
|
|
89
|
+
On `@simulatte/webgpu/compute`, the returned device is intentionally
|
|
90
|
+
compute-only:
|
|
101
91
|
|
|
102
92
|
- buffer / bind group / compute pipeline / command encoder / queue methods are available
|
|
103
93
|
- render / sampler / surface methods are intentionally absent from the facade
|
|
104
94
|
|
|
105
95
|
### `providerInfo()`
|
|
106
96
|
|
|
107
|
-
Output object:
|
|
108
|
-
|
|
109
|
-
- `module: string`
|
|
110
|
-
- `loaded: boolean`
|
|
111
|
-
- `loadError: string`
|
|
112
|
-
- `defaultCreateArgs: string[]`
|
|
113
|
-
- `doeNative: boolean`
|
|
114
|
-
- `libraryFlavor: string`
|
|
115
|
-
- `doeLibraryPath: string`
|
|
116
|
-
- `buildMetadataSource: string`
|
|
117
|
-
- `buildMetadataPath: string`
|
|
118
|
-
- `leanVerifiedBuild: boolean | null`
|
|
119
|
-
- `proofArtifactSha256: string | null`
|
|
120
|
-
|
|
121
97
|
Behavior:
|
|
122
98
|
|
|
123
99
|
- reports package-surface library provenance when prebuild metadata or Zig build
|
|
124
100
|
metadata is available
|
|
125
101
|
- does not guess: if metadata is unavailable, `leanVerifiedBuild` is `null`
|
|
102
|
+
- reports whether the Doe-native path is loaded and where build metadata came from
|
|
126
103
|
|
|
127
104
|
### `doe`
|
|
128
105
|
|
|
129
|
-
Output object:
|
|
130
|
-
|
|
131
|
-
- `bind(device)`
|
|
132
|
-
- `createBuffer(device, options)`
|
|
133
|
-
- `createBufferFromData(device, data, options?)`
|
|
134
|
-
- `readBuffer(device, buffer, TypedArray, options?)`
|
|
135
|
-
- `runCompute(device, options)`
|
|
136
|
-
- `compileCompute(device, options)`
|
|
137
|
-
|
|
138
106
|
Behavior:
|
|
139
107
|
|
|
140
|
-
- provides
|
|
108
|
+
- provides the `Doe API` and `Doe routines` surface for common headless
|
|
109
|
+
compute tasks
|
|
110
|
+
- the exported `doe` namespace is the JS convenience surface, distinct from
|
|
111
|
+
the underlying Doe runtime
|
|
112
|
+
- `requestDevice(options?)` resolves the package-local `requestDevice(...)` and returns
|
|
113
|
+
the bound helper object directly
|
|
141
114
|
- supports both static helper calls and `doe.bind(device)` for device-bound workflows
|
|
142
|
-
-
|
|
143
|
-
|
|
115
|
+
- helper methods are grouped under `buffers.*` and `compute.*`
|
|
116
|
+
- `buffers.*`, `compute.run(...)`, and `compute.compile(...)` are the main
|
|
117
|
+
`Doe API` surface
|
|
118
|
+
- `compute.once(...)` is the first `Doe routines` path and stays intentionally
|
|
119
|
+
narrow: typed-array/headless one-call execution, not a replacement for
|
|
120
|
+
explicit reusable resource ownership
|
|
121
|
+
- infers `compute.run(...).bindings` access from Doe helper-created buffer usage when that
|
|
122
|
+
usage maps to one bindable access mode (`uniform`, `storageRead`, `storageReadWrite`)
|
|
123
|
+
- `compute.once(...)` accepts Doe usage tokens only; raw numeric WebGPU usage flags stay on
|
|
124
|
+
the more explicit `Doe API` surface
|
|
144
125
|
- fails fast for bare bindings that do not carry Doe helper usage metadata or whose
|
|
145
126
|
usage is non-bindable/ambiguous; callers must pass `{ buffer, access }` explicitly
|
|
146
127
|
- additive only; it does not replace the raw WebGPU-facing package API
|
|
147
128
|
|
|
148
129
|
### `createDoeRuntime(options?)`
|
|
149
130
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
- `options.binPath?: string`
|
|
153
|
-
- `options.libPath?: string`
|
|
154
|
-
|
|
155
|
-
Output object:
|
|
156
|
-
|
|
157
|
-
- `binPath: string`
|
|
158
|
-
- `libPath: string | null`
|
|
159
|
-
- `runRaw(args: string[], spawnOptions?): RunResult`
|
|
160
|
-
- `runBench(options: BenchOptions): BenchResult`
|
|
161
|
-
|
|
162
|
-
`BenchOptions`:
|
|
163
|
-
|
|
164
|
-
- `commandsPath: string` (required)
|
|
165
|
-
- `quirksPath?: string`
|
|
166
|
-
- `vendor?: string`
|
|
167
|
-
- `api?: string`
|
|
168
|
-
- `family?: string`
|
|
169
|
-
- `driver?: string`
|
|
170
|
-
- `traceJsonlPath?: string`
|
|
171
|
-
- `traceMetaPath?: string`
|
|
172
|
-
- `uploadBufferUsage?: string`
|
|
173
|
-
- `uploadSubmitEvery?: number`
|
|
174
|
-
- `queueWaitMode?: string`
|
|
175
|
-
- `queueSyncMode?: string`
|
|
176
|
-
- `extraArgs?: string[]`
|
|
177
|
-
|
|
178
|
-
`RunResult`:
|
|
179
|
-
|
|
180
|
-
- `ok: boolean`
|
|
181
|
-
- `exitCode: number`
|
|
182
|
-
- `stdout: string`
|
|
183
|
-
- `stderr: string`
|
|
184
|
-
- `signal: string | null`
|
|
185
|
-
- `command: string[]`
|
|
186
|
-
|
|
187
|
-
`BenchResult` extends `RunResult` with:
|
|
131
|
+
Behavior:
|
|
188
132
|
|
|
189
|
-
-
|
|
190
|
-
|
|
191
|
-
-
|
|
133
|
+
- returns the local Doe runtime/CLI wrapper used for command-stream execution
|
|
134
|
+
and benchmark orchestration from Node/Bun environments
|
|
135
|
+
- preserves explicit file-path ownership for the binary/library location rather
|
|
136
|
+
than hiding them behind package-only assumptions
|
|
192
137
|
|
|
193
138
|
### `runDawnVsDoeCompare(options)`
|
|
194
139
|
|
|
195
|
-
Input:
|
|
196
|
-
|
|
197
|
-
- `repoRoot?: string`
|
|
198
|
-
- `compareScriptPath?: string`
|
|
199
|
-
- `pythonBin?: string`
|
|
200
|
-
- `configPath?: string`
|
|
201
|
-
- `outPath?: string`
|
|
202
|
-
- `extraArgs?: string[]`
|
|
203
|
-
- `env?: Record<string, string>`
|
|
204
|
-
|
|
205
140
|
Behavior:
|
|
206
141
|
|
|
207
142
|
- wraps `bench/compare_dawn_vs_doe.py`
|
|
208
143
|
- requires either `configPath` or `--config` in `extraArgs`
|
|
209
144
|
|
|
210
|
-
Output:
|
|
211
|
-
|
|
212
|
-
- `RunResult`
|
|
213
|
-
|
|
214
145
|
## CLI contract
|
|
215
146
|
|
|
216
147
|
### `fawn-webgpu-bench`
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->
|
|
2
|
+
<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="470" viewBox="0 0 1200 470" role="img" aria-labelledby="layers-title layers-desc">
|
|
3
|
+
<title id="layers-title">@simulatte/webgpu layered package graph</title>
|
|
4
|
+
<desc id="layers-desc">Layered package graph showing direct WebGPU, Doe API, and Doe routines over the same package surfaces.</desc>
|
|
5
|
+
<defs>
|
|
6
|
+
<linearGradient id="layers-bg" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
7
|
+
<stop offset="0%" stop-color="#050816"/>
|
|
8
|
+
<stop offset="100%" stop-color="#140c1f"/>
|
|
9
|
+
</linearGradient>
|
|
10
|
+
<radialGradient id="layers-glow-top" cx="25%" cy="18%" r="55%">
|
|
11
|
+
<stop offset="0%" stop-color="#ef444430"/>
|
|
12
|
+
<stop offset="55%" stop-color="#7c3aed18"/>
|
|
13
|
+
<stop offset="100%" stop-color="#00000000"/>
|
|
14
|
+
</radialGradient>
|
|
15
|
+
<radialGradient id="layers-glow-bottom" cx="78%" cy="84%" r="52%">
|
|
16
|
+
<stop offset="0%" stop-color="#f59e0b26"/>
|
|
17
|
+
<stop offset="60%" stop-color="#f9731618"/>
|
|
18
|
+
<stop offset="100%" stop-color="#00000000"/>
|
|
19
|
+
</radialGradient>
|
|
20
|
+
<linearGradient id="layers-root" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
21
|
+
<stop offset="0%" stop-color="#7c3aed"/>
|
|
22
|
+
<stop offset="100%" stop-color="#ef4444"/>
|
|
23
|
+
</linearGradient>
|
|
24
|
+
<linearGradient id="layers-direct" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
25
|
+
<stop offset="0%" stop-color="#ef4444"/>
|
|
26
|
+
<stop offset="100%" stop-color="#f97316"/>
|
|
27
|
+
</linearGradient>
|
|
28
|
+
<linearGradient id="layers-api" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
29
|
+
<stop offset="0%" stop-color="#f97316"/>
|
|
30
|
+
<stop offset="100%" stop-color="#f59e0b"/>
|
|
31
|
+
</linearGradient>
|
|
32
|
+
<linearGradient id="layers-routines" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
33
|
+
<stop offset="0%" stop-color="#f59e0b"/>
|
|
34
|
+
<stop offset="100%" stop-color="#eab308"/>
|
|
35
|
+
</linearGradient>
|
|
36
|
+
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
|
37
|
+
<feDropShadow dx="0" dy="10" stdDeviation="14" flood-color="#000000" flood-opacity="0.32"/>
|
|
38
|
+
</filter>
|
|
39
|
+
<style>
|
|
40
|
+
.title { font: 700 34px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #ffffff; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
|
|
41
|
+
.subtitle { font: 500 18px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #cbd5e1; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
|
|
42
|
+
.nodeTitle { font: 700 22px "Segoe UI", "Helvetica Neue", Arial, sans-serif; fill: #ffffff; paint-order: stroke fill; stroke: #000000; stroke-width: 2px; stroke-linejoin: round; }
|
|
43
|
+
.box { stroke-width: 2.5; filter: url(#shadow); }
|
|
44
|
+
</style>
|
|
45
|
+
</defs>
|
|
46
|
+
<rect width="1200" height="470" fill="url(#layers-bg)"/>
|
|
47
|
+
<rect width="1200" height="470" fill="url(#layers-glow-top)"/>
|
|
48
|
+
<rect width="1200" height="470" fill="url(#layers-glow-bottom)"/>
|
|
49
|
+
<text x="64" y="62" class="title">Same package, four layers</text>
|
|
50
|
+
<text x="64" y="94" class="subtitle">The package surface stays the same while the API gets progressively higher-level.</text>
|
|
51
|
+
|
|
52
|
+
<rect x="170" y="122" width="860" height="64" rx="20" fill="url(#layers-root)" stroke="#c4b5fd" class="box"/>
|
|
53
|
+
<text x="600" y="162" text-anchor="middle" class="nodeTitle">@simulatte/webgpu / @simulatte/webgpu/compute</text>
|
|
54
|
+
|
|
55
|
+
<rect x="220" y="222" width="760" height="52" rx="18" fill="url(#layers-direct)" stroke="#fca5a5" class="box"/>
|
|
56
|
+
<text x="600" y="255" text-anchor="middle" class="nodeTitle">Direct WebGPU</text>
|
|
57
|
+
|
|
58
|
+
<rect x="280" y="310" width="640" height="52" rx="18" fill="url(#layers-api)" stroke="#fdba74" class="box"/>
|
|
59
|
+
<text x="600" y="343" text-anchor="middle" class="nodeTitle">Doe API</text>
|
|
60
|
+
|
|
61
|
+
<rect x="360" y="398" width="480" height="52" rx="18" fill="url(#layers-routines)" stroke="#fde68a" class="box"/>
|
|
62
|
+
<text x="600" y="431" text-anchor="middle" class="nodeTitle">Doe routines</text>
|
|
63
|
+
</svg>
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import { globals, requestDevice } from "@simulatte/webgpu";
|
|
2
|
+
|
|
3
|
+
const device = await requestDevice();
|
|
4
|
+
|
|
5
|
+
const input = new Float32Array([1, 2, 3, 4]);
|
|
6
|
+
const inputBuffer = device.createBuffer({
|
|
7
|
+
size: input.byteLength,
|
|
8
|
+
usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_DST,
|
|
9
|
+
});
|
|
10
|
+
device.queue.writeBuffer(inputBuffer, 0, input);
|
|
11
|
+
|
|
12
|
+
const outputBuffer = device.createBuffer({
|
|
13
|
+
size: input.byteLength,
|
|
14
|
+
usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_SRC,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const readbackBuffer = device.createBuffer({
|
|
18
|
+
size: input.byteLength,
|
|
19
|
+
usage: globals.GPUBufferUsage.COPY_DST | globals.GPUBufferUsage.MAP_READ,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const shader = device.createShaderModule({
|
|
23
|
+
code: `
|
|
24
|
+
@group(0) @binding(0) var<storage, read> src: array<f32>;
|
|
25
|
+
@group(0) @binding(1) var<storage, read_write> dst: array<f32>;
|
|
26
|
+
|
|
27
|
+
@compute @workgroup_size(4)
|
|
28
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
29
|
+
let i = gid.x;
|
|
30
|
+
dst[i] = src[i] * 2.0;
|
|
31
|
+
}
|
|
32
|
+
`,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const pipeline = device.createComputePipeline({
|
|
36
|
+
layout: "auto",
|
|
37
|
+
compute: {
|
|
38
|
+
module: shader,
|
|
39
|
+
entryPoint: "main",
|
|
40
|
+
},
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const bindGroup = device.createBindGroup({
|
|
44
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
45
|
+
entries: [
|
|
46
|
+
{ binding: 0, resource: { buffer: inputBuffer } },
|
|
47
|
+
{ binding: 1, resource: { buffer: outputBuffer } },
|
|
48
|
+
],
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
const encoder = device.createCommandEncoder();
|
|
52
|
+
const pass = encoder.beginComputePass();
|
|
53
|
+
pass.setPipeline(pipeline);
|
|
54
|
+
pass.setBindGroup(0, bindGroup);
|
|
55
|
+
pass.dispatchWorkgroups(1);
|
|
56
|
+
pass.end();
|
|
57
|
+
encoder.copyBufferToBuffer(outputBuffer, 0, readbackBuffer, 0, input.byteLength);
|
|
58
|
+
|
|
59
|
+
device.queue.submit([encoder.finish()]);
|
|
60
|
+
await device.queue.onSubmittedWorkDone();
|
|
61
|
+
|
|
62
|
+
await readbackBuffer.mapAsync(globals.GPUMapMode.READ);
|
|
63
|
+
const result = new Float32Array(readbackBuffer.getMappedRange().slice(0));
|
|
64
|
+
readbackBuffer.unmap();
|
|
65
|
+
|
|
66
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import { globals, requestDevice } from "@simulatte/webgpu";
|
|
2
|
+
|
|
3
|
+
const device = await requestDevice();
|
|
4
|
+
|
|
5
|
+
const input = new Float32Array([1, 2, 3, 4]);
|
|
6
|
+
const inputBuffer = device.createBuffer({
|
|
7
|
+
size: input.byteLength,
|
|
8
|
+
usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_DST,
|
|
9
|
+
});
|
|
10
|
+
device.queue.writeBuffer(inputBuffer, 0, input);
|
|
11
|
+
|
|
12
|
+
const outputBuffer = device.createBuffer({
|
|
13
|
+
size: input.byteLength,
|
|
14
|
+
usage: globals.GPUBufferUsage.STORAGE | globals.GPUBufferUsage.COPY_SRC,
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
const readbackBuffer = device.createBuffer({
|
|
18
|
+
size: input.byteLength,
|
|
19
|
+
usage: globals.GPUBufferUsage.COPY_DST | globals.GPUBufferUsage.MAP_READ,
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
const shader = device.createShaderModule({
|
|
23
|
+
code: `
|
|
24
|
+
@group(0) @binding(0) var<storage, read> src: array<f32>;
|
|
25
|
+
@group(0) @binding(1) var<storage, read_write> dst: array<f32>;
|
|
26
|
+
|
|
27
|
+
@compute @workgroup_size(4)
|
|
28
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
29
|
+
let i = gid.x;
|
|
30
|
+
dst[i] = src[i] * 4.0;
|
|
31
|
+
}
|
|
32
|
+
`,
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
const bindGroupLayout = device.createBindGroupLayout({
|
|
36
|
+
entries: [
|
|
37
|
+
{
|
|
38
|
+
binding: 0,
|
|
39
|
+
visibility: globals.GPUShaderStage.COMPUTE,
|
|
40
|
+
buffer: { type: "read-only-storage" },
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
binding: 1,
|
|
44
|
+
visibility: globals.GPUShaderStage.COMPUTE,
|
|
45
|
+
buffer: { type: "storage" },
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
const pipelineLayout = device.createPipelineLayout({
|
|
51
|
+
bindGroupLayouts: [bindGroupLayout],
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
const pipeline = device.createComputePipeline({
|
|
55
|
+
layout: pipelineLayout,
|
|
56
|
+
compute: {
|
|
57
|
+
module: shader,
|
|
58
|
+
entryPoint: "main",
|
|
59
|
+
},
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
const bindGroup = device.createBindGroup({
|
|
63
|
+
layout: bindGroupLayout,
|
|
64
|
+
entries: [
|
|
65
|
+
{ binding: 0, resource: { buffer: inputBuffer } },
|
|
66
|
+
{ binding: 1, resource: { buffer: outputBuffer } },
|
|
67
|
+
],
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
const encoder = device.createCommandEncoder();
|
|
71
|
+
const pass = encoder.beginComputePass();
|
|
72
|
+
pass.setPipeline(pipeline);
|
|
73
|
+
pass.setBindGroup(0, bindGroup);
|
|
74
|
+
pass.dispatchWorkgroups(1);
|
|
75
|
+
pass.end();
|
|
76
|
+
encoder.copyBufferToBuffer(outputBuffer, 0, readbackBuffer, 0, input.byteLength);
|
|
77
|
+
|
|
78
|
+
device.queue.submit([encoder.finish()]);
|
|
79
|
+
await device.queue.onSubmittedWorkDone();
|
|
80
|
+
|
|
81
|
+
await readbackBuffer.mapAsync(globals.GPUMapMode.READ);
|
|
82
|
+
const result = new Float32Array(readbackBuffer.getMappedRange().slice(0));
|
|
83
|
+
readbackBuffer.unmap();
|
|
84
|
+
|
|
85
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import { requestDevice } from "@simulatte/webgpu";
|
|
2
|
+
|
|
3
|
+
const device = await requestDevice();
|
|
4
|
+
|
|
5
|
+
console.log(JSON.stringify({
|
|
6
|
+
createBuffer: typeof device.createBuffer === "function",
|
|
7
|
+
createComputePipeline: typeof device.createComputePipeline === "function",
|
|
8
|
+
createRenderPipeline: typeof device.createRenderPipeline === "function",
|
|
9
|
+
writeBuffer: typeof device.queue?.writeBuffer === "function",
|
|
10
|
+
}));
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]), {
|
|
5
|
+
usage: ["storageRead", "readback"],
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
const result = await gpu.buffers.read(src, Float32Array);
|
|
9
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]));
|
|
5
|
+
const dst = gpu.buffers.like(src, {
|
|
6
|
+
usage: "storageReadWrite",
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
const kernel = gpu.compute.compile({
|
|
10
|
+
code: `
|
|
11
|
+
@group(0) @binding(0) var<storage, read> src: array<f32>;
|
|
12
|
+
@group(0) @binding(1) var<storage, read_write> dst: array<f32>;
|
|
13
|
+
|
|
14
|
+
@compute @workgroup_size(4)
|
|
15
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
16
|
+
let i = gid.x;
|
|
17
|
+
dst[i] = src[i] * 5.0;
|
|
18
|
+
}
|
|
19
|
+
`,
|
|
20
|
+
bindings: [src, dst],
|
|
21
|
+
workgroups: 1,
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
await kernel.dispatch({
|
|
25
|
+
bindings: [src, dst],
|
|
26
|
+
workgroups: 1,
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
const result = await gpu.buffers.read(dst, Float32Array);
|
|
30
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
const src = gpu.buffers.fromData(new Float32Array([1, 2, 3, 4]));
|
|
5
|
+
const dst = gpu.buffers.like(src, {
|
|
6
|
+
usage: "storageReadWrite",
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
await gpu.compute.run({
|
|
10
|
+
code: `
|
|
11
|
+
@group(0) @binding(0) var<storage, read> src: array<f32>;
|
|
12
|
+
@group(0) @binding(1) var<storage, read_write> dst: array<f32>;
|
|
13
|
+
|
|
14
|
+
@compute @workgroup_size(4)
|
|
15
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
16
|
+
let i = gid.x;
|
|
17
|
+
dst[i] = src[i] * 2.0;
|
|
18
|
+
}
|
|
19
|
+
`,
|
|
20
|
+
bindings: [src, dst],
|
|
21
|
+
workgroups: 1,
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
const result = await gpu.buffers.read(dst, Float32Array);
|
|
25
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
|
|
5
|
+
const result = await gpu.compute.once({
|
|
6
|
+
code: `
|
|
7
|
+
struct Scale {
|
|
8
|
+
value: f32,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
@group(0) @binding(0) var<uniform> scale: Scale;
|
|
12
|
+
@group(0) @binding(1) var<storage, read> src: array<f32>;
|
|
13
|
+
@group(0) @binding(2) var<storage, read_write> dst: array<f32>;
|
|
14
|
+
|
|
15
|
+
@compute @workgroup_size(4)
|
|
16
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
17
|
+
let i = gid.x;
|
|
18
|
+
dst[i] = src[i] * scale.value;
|
|
19
|
+
}
|
|
20
|
+
`,
|
|
21
|
+
inputs: [
|
|
22
|
+
{
|
|
23
|
+
data: new Float32Array([2]),
|
|
24
|
+
usage: "uniform",
|
|
25
|
+
access: "uniform",
|
|
26
|
+
},
|
|
27
|
+
new Float32Array([1, 2, 3, 4]),
|
|
28
|
+
],
|
|
29
|
+
output: {
|
|
30
|
+
type: Float32Array,
|
|
31
|
+
likeInput: 1,
|
|
32
|
+
},
|
|
33
|
+
workgroups: [1, 1],
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
const M = 256;
|
|
5
|
+
const K = 512;
|
|
6
|
+
const N = 256;
|
|
7
|
+
|
|
8
|
+
const lhs = Float32Array.from({ length: M * K }, (_, i) => (i % 17) / 17);
|
|
9
|
+
const rhs = Float32Array.from({ length: K * N }, (_, i) => (i % 13) / 13);
|
|
10
|
+
const dims = new Uint32Array([M, K, N, 0]);
|
|
11
|
+
|
|
12
|
+
const result = await gpu.compute.once({
|
|
13
|
+
code: `
|
|
14
|
+
struct Dims {
|
|
15
|
+
m: u32,
|
|
16
|
+
k: u32,
|
|
17
|
+
n: u32,
|
|
18
|
+
_pad: u32,
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
@group(0) @binding(0) var<uniform> dims: Dims;
|
|
22
|
+
@group(0) @binding(1) var<storage, read> lhs: array<f32>;
|
|
23
|
+
@group(0) @binding(2) var<storage, read> rhs: array<f32>;
|
|
24
|
+
@group(0) @binding(3) var<storage, read_write> out: array<f32>;
|
|
25
|
+
|
|
26
|
+
@compute @workgroup_size(8, 8)
|
|
27
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
28
|
+
let row = gid.y;
|
|
29
|
+
let col = gid.x;
|
|
30
|
+
if (row >= dims.m || col >= dims.n) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
var acc = 0.0;
|
|
35
|
+
for (var i = 0u; i < dims.k; i = i + 1u) {
|
|
36
|
+
acc += lhs[row * dims.k + i] * rhs[i * dims.n + col];
|
|
37
|
+
}
|
|
38
|
+
out[row * dims.n + col] = acc;
|
|
39
|
+
}
|
|
40
|
+
`,
|
|
41
|
+
inputs: [
|
|
42
|
+
{ data: dims, usage: "uniform", access: "uniform" },
|
|
43
|
+
lhs,
|
|
44
|
+
rhs,
|
|
45
|
+
],
|
|
46
|
+
output: {
|
|
47
|
+
type: Float32Array,
|
|
48
|
+
size: M * N * Float32Array.BYTES_PER_ELEMENT,
|
|
49
|
+
},
|
|
50
|
+
workgroups: [Math.ceil(N / 8), Math.ceil(M / 8)],
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
console.log(result.subarray(0, 8));
|