@simulatte/webgpu 0.2.0 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/API_CONTRACT.md +11 -1
- package/CHANGELOG.md +82 -0
- package/COMPAT_SCOPE.md +20 -6
- package/LAYERING_PLAN.md +257 -0
- package/README.md +242 -61
- package/SUPPORT_CONTRACTS.md +353 -0
- package/ZIG_SOURCE_INVENTORY.md +468 -0
- package/assets/package-surface-cube-snapshot.svg +7 -7
- package/headless-webgpu-comparison.md +3 -3
- package/native/doe_napi.c +110 -17
- package/package.json +16 -3
- package/prebuilds/darwin-arm64/doe_napi.node +0 -0
- package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
- package/prebuilds/darwin-arm64/metadata.json +5 -5
- package/prebuilds/linux-x64/doe_napi.node +0 -0
- package/prebuilds/linux-x64/libwebgpu_dawn.so +0 -0
- package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
- package/prebuilds/linux-x64/metadata.json +26 -0
- package/src/bun-ffi.js +3 -2
- package/src/bun.js +2 -2
- package/src/index.js +114 -15
- package/src/runtime_cli.js +3 -1
package/README.md
CHANGED
|
@@ -1,58 +1,180 @@
|
|
|
1
1
|
# @simulatte/webgpu
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
3
|
+
Headless WebGPU for Node.js and Bun, powered by Doe, Fawn's Zig WebGPU
|
|
4
|
+
runtime.
|
|
5
5
|
|
|
6
6
|
<p align="center">
|
|
7
|
-
<img src="
|
|
7
|
+
<img src="assets/fawn-icon-main-256.png" alt="Fawn logo" width="196" />
|
|
8
8
|
</p>
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
10
|
+
Use this package for headless compute, CI, benchmarking, and offscreen GPU
|
|
11
|
+
execution. It is built for explicit runtime behavior, deterministic
|
|
12
|
+
traceability, and artifact-backed performance work. It is not a DOM/canvas
|
|
13
|
+
package and it should not be read as a promise of full browser-surface parity.
|
|
14
|
+
|
|
15
|
+
## Quick examples
|
|
16
|
+
|
|
17
|
+
### Inspect the provider
|
|
18
|
+
|
|
19
|
+
```js
|
|
20
|
+
import { providerInfo } from "@simulatte/webgpu";
|
|
21
|
+
|
|
22
|
+
console.log(providerInfo());
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Request a device
|
|
26
|
+
|
|
27
|
+
```js
|
|
28
|
+
import { requestDevice } from "@simulatte/webgpu";
|
|
29
|
+
|
|
30
|
+
const device = await requestDevice();
|
|
31
|
+
console.log(device.limits.maxBufferSize);
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### Estimate pi on the GPU
|
|
35
|
+
|
|
36
|
+
65,536 threads each test 1,024 points inside the unit square. Each thread
|
|
37
|
+
hashes its index to produce sample coordinates, counts how many land inside
|
|
38
|
+
the unit circle, and writes its count to a results array. The CPU sums the
|
|
39
|
+
counts and computes pi ≈ 4 × hits / total.
|
|
40
|
+
|
|
41
|
+
```js
|
|
42
|
+
import { globals, requestDevice } from "@simulatte/webgpu";
|
|
43
|
+
|
|
44
|
+
const { GPUBufferUsage, GPUMapMode, GPUShaderStage } = globals;
|
|
45
|
+
const device = await requestDevice();
|
|
46
|
+
|
|
47
|
+
const THREADS = 65536;
|
|
48
|
+
const WORKGROUP_SIZE = 256;
|
|
49
|
+
const SAMPLES_PER_THREAD = 1024;
|
|
50
|
+
|
|
51
|
+
if (THREADS % WORKGROUP_SIZE !== 0) {
|
|
52
|
+
throw new Error("THREADS must be a multiple of WORKGROUP_SIZE");
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const shader = device.createShaderModule({
|
|
56
|
+
code: `
|
|
57
|
+
@group(0) @binding(0) var<storage, read_write> counts: array<u32>;
|
|
58
|
+
|
|
59
|
+
fn hash(n: u32) -> u32 {
|
|
60
|
+
var x = n;
|
|
61
|
+
x ^= x >> 16u;
|
|
62
|
+
x *= 0x45d9f3bu;
|
|
63
|
+
x ^= x >> 16u;
|
|
64
|
+
x *= 0x45d9f3bu;
|
|
65
|
+
x ^= x >> 16u;
|
|
66
|
+
return x;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@compute @workgroup_size(${WORKGROUP_SIZE})
|
|
70
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
71
|
+
var count = 0u;
|
|
72
|
+
for (var i = 0u; i < ${SAMPLES_PER_THREAD}u; i += 1u) {
|
|
73
|
+
let idx = gid.x * ${SAMPLES_PER_THREAD}u + i;
|
|
74
|
+
let x = f32(hash(idx * 2u)) / 4294967295.0;
|
|
75
|
+
let y = f32(hash(idx * 2u + 1u)) / 4294967295.0;
|
|
76
|
+
if x * x + y * y <= 1.0 {
|
|
77
|
+
count += 1u;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
counts[gid.x] = count;
|
|
81
|
+
}
|
|
82
|
+
`,
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
const bindGroupLayout = device.createBindGroupLayout({
|
|
86
|
+
entries: [{
|
|
87
|
+
binding: 0,
|
|
88
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
89
|
+
buffer: { type: "storage" },
|
|
90
|
+
}],
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const pipeline = device.createComputePipeline({
|
|
94
|
+
layout: device.createPipelineLayout({ bindGroupLayouts: [bindGroupLayout] }),
|
|
95
|
+
compute: { module: shader, entryPoint: "main" },
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
const countsBuffer = device.createBuffer({
|
|
99
|
+
size: THREADS * 4,
|
|
100
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
|
|
101
|
+
});
|
|
102
|
+
const readback = device.createBuffer({
|
|
103
|
+
size: THREADS * 4,
|
|
104
|
+
usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const bindGroup = device.createBindGroup({
|
|
108
|
+
layout: bindGroupLayout,
|
|
109
|
+
entries: [{ binding: 0, resource: { buffer: countsBuffer } }],
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const encoder = device.createCommandEncoder();
|
|
113
|
+
const pass = encoder.beginComputePass();
|
|
114
|
+
pass.setPipeline(pipeline);
|
|
115
|
+
pass.setBindGroup(0, bindGroup);
|
|
116
|
+
pass.dispatchWorkgroups(THREADS / WORKGROUP_SIZE);
|
|
117
|
+
pass.end();
|
|
118
|
+
encoder.copyBufferToBuffer(countsBuffer, 0, readback, 0, THREADS * 4);
|
|
119
|
+
device.queue.submit([encoder.finish()]);
|
|
120
|
+
|
|
121
|
+
await readback.mapAsync(GPUMapMode.READ);
|
|
122
|
+
const counts = new Uint32Array(readback.getMappedRange());
|
|
123
|
+
const hits = counts.reduce((a, b) => a + b, 0);
|
|
124
|
+
readback.unmap();
|
|
125
|
+
|
|
126
|
+
const total = THREADS * SAMPLES_PER_THREAD;
|
|
127
|
+
const pi = 4 * hits / total;
|
|
128
|
+
console.log(`${total.toLocaleString()} samples → pi ≈ ${pi.toFixed(6)}`);
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
Expected output will vary slightly, but it should look like:
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
67,108,864 samples → pi ≈ 3.14...
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Increase `SAMPLES_PER_THREAD` for more precision.
|
|
138
|
+
|
|
139
|
+
## What this package is
|
|
140
|
+
|
|
141
|
+
`@simulatte/webgpu` is the canonical package surface for Doe. Node uses an
|
|
142
|
+
N-API addon and Bun currently routes through the same addon-backed runtime
|
|
143
|
+
entry to load `libwebgpu_doe`. Current package builds still ship a Dawn sidecar
|
|
144
|
+
where proc resolution requires it. The experimental raw Bun FFI path remains in
|
|
145
|
+
`src/bun-ffi.js`, but it is not the default package entry.
|
|
146
|
+
|
|
147
|
+
Doe is a Zig-first WebGPU runtime with explicit allocator control, startup-time
|
|
148
|
+
profile and quirk binding, a native WGSL pipeline (`lexer -> parser ->
|
|
149
|
+
semantic analysis -> IR -> backend emitters`), and explicit
|
|
150
|
+
Vulkan/Metal/D3D12 execution paths in one system. Optional
|
|
151
|
+
`-Dlean-verified=true` builds use Lean 4 where proved invariants can be
|
|
152
|
+
hoisted out of runtime branches instead of being re-checked on every command;
|
|
153
|
+
package consumers should not assume that path by default.
|
|
17
154
|
|
|
18
155
|
Doe also keeps adapter and driver quirks explicit. Profile selection happens at
|
|
19
156
|
startup, quirk data is schema-backed, and the runtime binds the selected
|
|
20
157
|
profile instead of relying on hidden per-command fallback logic.
|
|
21
158
|
|
|
22
|
-
|
|
23
|
-
`libwebgpu_doe`. Current package builds still ship a Dawn sidecar where proc
|
|
24
|
-
resolution requires it.
|
|
25
|
-
|
|
26
|
-
This directory is the package root for `@simulatte/webgpu`. It contains the
|
|
27
|
-
Node provider source, the addon build contract, the Bun FFI entrypoint, and
|
|
28
|
-
the CLI helpers used by benchmark and CI workflows.
|
|
29
|
-
|
|
30
|
-
## Surface maturity
|
|
159
|
+
## Current scope
|
|
31
160
|
|
|
32
161
|
- Node is the primary supported package surface (N-API bridge).
|
|
33
|
-
- Bun has API parity with Node
|
|
34
|
-
contract tests passing). Bun benchmark cube maturity remains
|
|
35
|
-
until
|
|
162
|
+
- Bun has API parity with Node through the package's addon-backed runtime entry
|
|
163
|
+
(61/61 contract tests passing). Bun benchmark cube maturity remains
|
|
164
|
+
prototype until the comparable macOS cells stabilize across repeated
|
|
165
|
+
governed runs.
|
|
36
166
|
- Package-surface comparisons should be read through the benchmark cube outputs
|
|
37
167
|
under `bench/out/cube/`, not as a replacement for strict backend reports.
|
|
38
168
|
|
|
39
|
-
The **benchmark cube** is a cross-product matrix of surface (backend_native,
|
|
40
|
-
node_package, bun_package) × provider pair (e.g. doe_vs_dawn) × workload set
|
|
41
|
-
(e.g. compute_e2e, render, upload). Each intersection is a **cell** with its
|
|
42
|
-
own comparability and claimability status. Cube outputs live in
|
|
43
|
-
`bench/out/cube/` and include a dashboard, matrix summary, and per-row data.
|
|
44
|
-
|
|
45
169
|
<p align="center">
|
|
46
|
-
<img src="
|
|
170
|
+
<img src="assets/package-surface-cube-snapshot.svg" alt="Static package-surface benchmark cube snapshot" width="920" />
|
|
47
171
|
</p>
|
|
48
172
|
|
|
49
|
-
|
|
173
|
+
Package-surface benchmark evidence lives under `bench/out/cube/latest/`. Read
|
|
174
|
+
those rows as package-surface positioning data, not as substitutes for strict
|
|
175
|
+
backend-native claim lanes.
|
|
50
176
|
|
|
51
|
-
|
|
52
|
-
- renderer: `npm run build:readme-assets`
|
|
53
|
-
- scope: package surfaces only; backend-native strict claim lanes remain separate
|
|
54
|
-
|
|
55
|
-
## Quickstart
|
|
177
|
+
## Quick start
|
|
56
178
|
|
|
57
179
|
```bash
|
|
58
180
|
npm install @simulatte/webgpu
|
|
@@ -67,10 +189,36 @@ const device = await requestDevice();
|
|
|
67
189
|
console.log(device.limits.maxBufferSize);
|
|
68
190
|
```
|
|
69
191
|
|
|
70
|
-
|
|
71
|
-
|
|
192
|
+
The install ships platform-specific prebuilds for macOS arm64 (Metal) and
|
|
193
|
+
Linux x64 (Vulkan). The commands are the same on both platforms; the correct
|
|
194
|
+
backend is selected automatically. The only external prerequisite is GPU
|
|
195
|
+
drivers on the host. If no prebuild matches your platform, install falls back
|
|
196
|
+
to building from source via node-gyp.
|
|
197
|
+
|
|
198
|
+
## Verify your install
|
|
199
|
+
|
|
200
|
+
After installing, run the smoke test to confirm native library loading and a
|
|
201
|
+
GPU round-trip:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
npm run smoke
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
To run the full contract test suite (adapter, device, buffers, compute
|
|
208
|
+
dispatch with readback, textures, samplers):
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
npm test # Node
|
|
212
|
+
npm run test:bun # Bun
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
If `npm run smoke` fails, check that GPU drivers are installed and that your
|
|
216
|
+
platform is supported (macOS arm64 or Linux x64).
|
|
217
|
+
|
|
218
|
+
## Building from source
|
|
72
219
|
|
|
73
|
-
|
|
220
|
+
Use this when working from the Fawn repo checkout or rebuilding the addon
|
|
221
|
+
against a local Doe runtime build.
|
|
74
222
|
|
|
75
223
|
```bash
|
|
76
224
|
# From the Fawn workspace root:
|
|
@@ -79,10 +227,29 @@ cd zig && zig build dropin # build libwebgpu_doe + Dawn sidecar
|
|
|
79
227
|
cd nursery/webgpu
|
|
80
228
|
npm run build:addon # compile doe_napi.node from source
|
|
81
229
|
npm run smoke # verify native loading + GPU round-trip
|
|
230
|
+
npm test # Node contract tests
|
|
231
|
+
npm run test:bun # Bun contract tests
|
|
82
232
|
```
|
|
83
233
|
|
|
84
|
-
|
|
85
|
-
|
|
234
|
+
Current macOS arm64 validation for `0.2.3` was rerun on March 10, 2026 with:
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
cd zig && zig build dropin
|
|
238
|
+
|
|
239
|
+
cd nursery/webgpu
|
|
240
|
+
npm run build:addon
|
|
241
|
+
npm run smoke
|
|
242
|
+
npm test
|
|
243
|
+
npm run test:bun
|
|
244
|
+
npm run prebuild -- --skip-addon-build
|
|
245
|
+
npm pack --dry-run
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
That path is green on the Apple Metal host. `npm run test:bun` also passed on
|
|
249
|
+
this host (`61 passed, 0 failed`) once Bun was added to `PATH`.
|
|
250
|
+
|
|
251
|
+
For Fawn development setup, build toolchain requirements, and benchmark
|
|
252
|
+
harness usage, see the [Fawn project README](../../README.md).
|
|
86
253
|
|
|
87
254
|
## Packaging prebuilds (CI / release)
|
|
88
255
|
|
|
@@ -93,21 +260,18 @@ npm run prebuild # assembles prebuilds/<platform>-<arch>/
|
|
|
93
260
|
Supported prebuild targets: macOS arm64 (Metal), Linux x64 (Vulkan),
|
|
94
261
|
Windows x64 (D3D12). Host GPU drivers are the only external prerequisite.
|
|
95
262
|
Install uses prebuilds when available, falls back to node-gyp from source.
|
|
263
|
+
Tracked `prebuilds/<platform>-<arch>/` directories are the source of truth for
|
|
264
|
+
reproducible package publishes. If a prebuild exists only on one local machine
|
|
265
|
+
and is not committed, `npm pack` output will differ by environment.
|
|
266
|
+
Generated `.tgz` package archives are release outputs and should not be
|
|
267
|
+
committed to the repo.
|
|
96
268
|
Prebuild `metadata.json` now records `doeBuild.leanVerifiedBuild` and
|
|
97
269
|
`proofArtifactSha256`, and `providerInfo()` surfaces the same values when
|
|
98
270
|
metadata is present.
|
|
99
271
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
- `src/node-runtime.js`: compatibility alias for the Node entrypoint
|
|
104
|
-
- `src/bun-ffi.js`: Bun FFI provider (full API parity with Node)
|
|
105
|
-
- `src/bun.js`: Bun re-export entrypoint
|
|
106
|
-
- `src/runtime_cli.js`: Doe CLI/runtime helpers
|
|
107
|
-
- `native/doe_napi.c`: N-API bridge for the in-process Node provider
|
|
108
|
-
- `binding.gyp`: addon build contract
|
|
109
|
-
- `bin/fawn-webgpu-bench.js`: command-stream bench wrapper
|
|
110
|
-
- `bin/fawn-webgpu-compare.js`: Dawn-vs-Doe compare wrapper
|
|
272
|
+
Package publication still depends on the governed Linux Vulkan release lane in
|
|
273
|
+
[`process.md`](../../process.md). A green macOS package rerun is necessary, but
|
|
274
|
+
not sufficient, for a release publish.
|
|
111
275
|
|
|
112
276
|
## Current caveats
|
|
113
277
|
|
|
@@ -123,16 +287,33 @@ metadata is present.
|
|
|
123
287
|
- Linux Node Doe-native path is now wired end-to-end (Linux guard removed).
|
|
124
288
|
No `DOE_WEBGPU_LIB` env var needed when prebuilds or workspace artifacts
|
|
125
289
|
are present.
|
|
126
|
-
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
290
|
+
- Fresh macOS package evidence from March 10, 2026 is reflected in
|
|
291
|
+
`bench/out/cube/latest/` (generated `2026-03-10T20:31:02.431911Z`):
|
|
292
|
+
Bun `uploads`, `compute_e2e`, and `full_comparable` are `claimable`;
|
|
293
|
+
Node `uploads`, `compute_e2e`, and `full_comparable` are also `claimable`.
|
|
294
|
+
- Separate Apple Metal extended-comparable backend evidence from March 10, 2026
|
|
295
|
+
(`bench/out/apple-metal/extended-comparable/20260310T121546Z/`) is
|
|
296
|
+
`31/31` comparable and `31/31` claimable. Read that lane as stricter
|
|
297
|
+
backend evidence, not as a replacement for the package-surface cube rows.
|
|
298
|
+
- Bun has API parity with Node (61/61 contract tests). The package-default Bun
|
|
299
|
+
entry currently routes through the addon-backed runtime, while
|
|
300
|
+
`src/bun-ffi.js` remains experimental. Bun benchmark lane is at
|
|
301
|
+
`bench/bun/compare.js`; benchmark interpretations should note which runtime
|
|
302
|
+
entry was exercised. Latest fresh macOS run
|
|
303
|
+
(`bench/out/bun-doe-vs-webgpu/doe-vs-bun-webgpu-2026-03-10T195022524Z.json`)
|
|
304
|
+
executes all `12` current workloads and has `9` comparable rows, all `9`
|
|
305
|
+
claimable. `compute_e2e_{256,4096,65536}` and
|
|
306
|
+
`copy_buffer_to_buffer_4kb` are claimable in the full macOS package lane.
|
|
307
|
+
The remaining three rows are intentional directional-only workloads
|
|
308
|
+
(`submit_empty`, `pipeline_create`, `compute_dispatch_simple`).
|
|
309
|
+
- Latest fresh macOS Node package run
|
|
310
|
+
(`bench/out/node-doe-vs-dawn-claim-full/doe-vs-dawn-node-2026-03-10T202406545Z.json`)
|
|
311
|
+
has `12` total rows, `9` comparable rows, and all `9` comparable rows are
|
|
312
|
+
claimable. `compute_e2e_{256,4096,65536}`, `copy_buffer_to_buffer_4kb`,
|
|
313
|
+
and the current upload set are claimable in the full package lane. The
|
|
314
|
+
remaining three rows are intentional directional-only workloads
|
|
315
|
+
(`submit_empty`, `pipeline_create`, `compute_dispatch_simple`).
|
|
135
316
|
- Self-contained install ships prebuilt `doe_napi.node` + `libwebgpu_doe` +
|
|
136
|
-
Dawn sidecar per platform.
|
|
317
|
+
Dawn sidecar per platform. See **Verify your install** above.
|
|
137
318
|
- API details live in `API_CONTRACT.md`.
|
|
138
319
|
- Compatibility scope is documented in `COMPAT_SCOPE.md`.
|