@simulatte/webgpu 0.2.1 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +104 -0
- package/README.md +135 -108
- package/{API_CONTRACT.md → api-contract.md} +63 -3
- package/assets/fawn-icon-main-256.png +0 -0
- package/assets/package-surface-cube-snapshot.svg +14 -14
- package/compat-scope.md +46 -0
- package/headless-webgpu-comparison.md +3 -3
- package/layering-plan.md +259 -0
- package/native/doe_napi.c +110 -17
- package/package.json +28 -8
- package/prebuilds/darwin-arm64/doe_napi.node +0 -0
- package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
- package/prebuilds/darwin-arm64/metadata.json +5 -5
- package/prebuilds/linux-x64/doe_napi.node +0 -0
- package/prebuilds/linux-x64/libwebgpu_dawn.so +0 -0
- package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
- package/prebuilds/linux-x64/metadata.json +26 -0
- package/scripts/generate-readme-assets.js +2 -2
- package/src/bun-ffi.js +3 -2
- package/src/bun.js +2 -2
- package/src/compute.d.ts +161 -0
- package/src/compute.js +277 -0
- package/src/doe.d.ts +84 -0
- package/src/doe.js +275 -0
- package/src/full.d.ts +112 -0
- package/src/full.js +10 -0
- package/src/index.js +114 -15
- package/src/node-runtime.js +2 -2
- package/src/node.js +2 -2
- package/src/runtime_cli.js +3 -1
- package/support-contracts.md +339 -0
- package/zig-source-inventory.md +468 -0
- package/COMPAT_SCOPE.md +0 -32
package/layering-plan.md
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
# Proposed layering plan for core and full
|
|
2
|
+
|
|
3
|
+
Plan status: `draft`
|
|
4
|
+
|
|
5
|
+
Scope:
|
|
6
|
+
|
|
7
|
+
- future Doe runtime/package sharding for headless WebGPU
|
|
8
|
+
- architecture and refactor sequencing only
|
|
9
|
+
- no current runtime behavior changes
|
|
10
|
+
|
|
11
|
+
This plan exists to keep the future `core` and `full` split honest before any
|
|
12
|
+
Zig source moves begin.
|
|
13
|
+
|
|
14
|
+
It answers four questions:
|
|
15
|
+
|
|
16
|
+
1. what boundary is being enforced
|
|
17
|
+
2. how that boundary is enforced in code review and CI
|
|
18
|
+
3. how capability coverage and gates split once the boundary exists
|
|
19
|
+
4. what order the refactor should happen in
|
|
20
|
+
|
|
21
|
+
Use this together with:
|
|
22
|
+
|
|
23
|
+
- `support-contracts.md` for product/support scope
|
|
24
|
+
- `api-contract.md` for the current package contract (`full` default, `compute` subpath)
|
|
25
|
+
- `compat-scope.md` for current package non-goals
|
|
26
|
+
- `zig-source-inventory.md` for the current `zig/src` file map
|
|
27
|
+
|
|
28
|
+
## Current state
|
|
29
|
+
|
|
30
|
+
The repo is now partially physically split into `core` and `full`, but the public façade boundary is still being reduced.
|
|
31
|
+
|
|
32
|
+
Current reality:
|
|
33
|
+
|
|
34
|
+
1. `zig/src/core/` and `zig/src/full/` are now real source subtrees with canonical implementations for the first runtime split slices.
|
|
35
|
+
2. The old root ABI shims (`zig/src/wgpu_types.zig`, `zig/src/wgpu_loader.zig`) have now been retired; the remaining root compatibility façades are narrower command/resource surfaces kept only while callers finish retargeting.
|
|
36
|
+
3. Canonical command partition and command dispatch now live in `zig/src/core/{command_partition.zig,command_dispatch.zig}` and `zig/src/full/{command_partition.zig,command_dispatch.zig}`.
|
|
37
|
+
4. Canonical texture command handling now lives in `zig/src/core/resource/wgpu_texture_commands.zig`; canonical sampler and surface command handling now lives in `zig/src/full/render/wgpu_sampler_commands.zig` and `zig/src/full/surface/wgpu_surface_commands.zig`.
|
|
38
|
+
5. `zig/src/wgpu_commands.zig`, `zig/src/wgpu_resources.zig`, and `zig/src/wgpu_extended_commands.zig` are now compatibility façades over the canonical subtrees, while `zig/src/webgpu_ffi.zig` remains the public façade and owner of `WebGPUBackend`.
|
|
39
|
+
6. Dedicated Zig test lanes now exist as `zig build test-core` and `zig build test-full`, but split coverage remains thin and capability tracking is still represented by one shared coverage ledger.
|
|
40
|
+
7. The JS package now exposes a default `full` surface plus an explicit `compute` subpath, while the underlying JS implementation is still shared.
|
|
41
|
+
|
|
42
|
+
That means this plan is now materially physicalized in the tree, and the remaining semantic split is concentrated in the public façade files and backend roots.
|
|
43
|
+
|
|
44
|
+
## Boundary definition
|
|
45
|
+
|
|
46
|
+
The target architecture is:
|
|
47
|
+
|
|
48
|
+
```text
|
|
49
|
+
Doe core
|
|
50
|
+
^
|
|
51
|
+
|
|
|
52
|
+
Doe full
|
|
53
|
+
^
|
|
54
|
+
|
|
|
55
|
+
Chromium Track A runtime artifact lane
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Rules:
|
|
59
|
+
|
|
60
|
+
1. `full` composes `core`; it does not toggle `core`.
|
|
61
|
+
2. `core` must never import `full`.
|
|
62
|
+
3. `full` may depend on `core` Zig modules, Lean modules, build outputs, and JS
|
|
63
|
+
helpers.
|
|
64
|
+
4. Chromium Track A depends on the full runtime artifact and browser-specific
|
|
65
|
+
gates, not on npm package layout.
|
|
66
|
+
|
|
67
|
+
The anti-bleed rule is the core of the design:
|
|
68
|
+
|
|
69
|
+
- no `if full_enabled` branches inside `core`
|
|
70
|
+
- no `full` fields added to `core` structs
|
|
71
|
+
- no browser-policy logic added to `full`
|
|
72
|
+
|
|
73
|
+
## Import fence rule
|
|
74
|
+
|
|
75
|
+
This is the primary long-term enforcement rule.
|
|
76
|
+
|
|
77
|
+
### Contract
|
|
78
|
+
|
|
79
|
+
1. `zig/src/core/**` may not import any file under `zig/src/full/**`
|
|
80
|
+
2. `lean/Fawn/Core/**` may not import any file under `lean/Fawn/Full/**`
|
|
81
|
+
3. package-level `core` entrypoints may not import `full` entrypoints
|
|
82
|
+
4. any exception requires redesign, not a one-off waiver
|
|
83
|
+
|
|
84
|
+
### CI enforcement
|
|
85
|
+
|
|
86
|
+
The dedicated import-fence check should fail if:
|
|
87
|
+
|
|
88
|
+
1. a Zig file under `core` references `full/`
|
|
89
|
+
2. a Lean file under `Core` references `Full`
|
|
90
|
+
3. a package `core` entrypoint reaches into a `full`-only module
|
|
91
|
+
|
|
92
|
+
The check should be a simple, explicit path-dependency audit. This is not a
|
|
93
|
+
lint preference; it is a release-blocking architectural boundary.
|
|
94
|
+
|
|
95
|
+
## Struct wrapping rule
|
|
96
|
+
|
|
97
|
+
`full` must extend `core` by composition, never by mutating `core` types in
|
|
98
|
+
place.
|
|
99
|
+
|
|
100
|
+
### Contract
|
|
101
|
+
|
|
102
|
+
1. if `full` needs shared state, it holds a `core` value or handle
|
|
103
|
+
2. if `full` needs extra state, that state lives in a `full` wrapper type
|
|
104
|
+
3. `core` structs may not gain render/surface/full-only fields just because
|
|
105
|
+
`full` needs them
|
|
106
|
+
4. `core` APIs may expose stable extension points, but not latent `full`
|
|
107
|
+
payload slots
|
|
108
|
+
|
|
109
|
+
### Example direction
|
|
110
|
+
|
|
111
|
+
Good shape:
|
|
112
|
+
|
|
113
|
+
```text
|
|
114
|
+
full.RenderPipeline
|
|
115
|
+
- core_pipeline_layout: core.PipelineLayout
|
|
116
|
+
- full_render_state: ...
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Bad shape:
|
|
120
|
+
|
|
121
|
+
```text
|
|
122
|
+
core.PipelineLayout
|
|
123
|
+
- maybe_render_state_if_full_enabled: ...
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
The intent is to keep `core` independently understandable, buildable, and
|
|
127
|
+
benchmarked.
|
|
128
|
+
|
|
129
|
+
## Coverage split rule
|
|
130
|
+
|
|
131
|
+
The current shared capability ledger is not enough once `core` and `full`
|
|
132
|
+
become separate release surfaces.
|
|
133
|
+
|
|
134
|
+
### Target split
|
|
135
|
+
|
|
136
|
+
1. `config/webgpu-core-coverage.json`
|
|
137
|
+
- only `core` contractual capabilities
|
|
138
|
+
2. `config/webgpu-full-coverage.json`
|
|
139
|
+
- `core` plus `full` contractual capabilities
|
|
140
|
+
3. Chromium Track A keeps its own browser/drop-in evidence and must not be
|
|
141
|
+
represented as mere package coverage
|
|
142
|
+
|
|
143
|
+
### Gate split
|
|
144
|
+
|
|
145
|
+
`core` gates should validate:
|
|
146
|
+
|
|
147
|
+
1. core package contract
|
|
148
|
+
2. core CTS subset
|
|
149
|
+
3. core package-surface benchmark cells
|
|
150
|
+
4. explicit unsupported taxonomy outside core scope
|
|
151
|
+
|
|
152
|
+
`full` gates should validate:
|
|
153
|
+
|
|
154
|
+
1. all core gates
|
|
155
|
+
2. full package contract
|
|
156
|
+
3. expanded CTS subset for render/lifecycle/query coverage
|
|
157
|
+
4. full package-surface benchmark cells
|
|
158
|
+
5. explicit unsupported taxonomy outside full scope
|
|
159
|
+
|
|
160
|
+
Track A gates remain separate:
|
|
161
|
+
|
|
162
|
+
1. drop-in symbol completeness
|
|
163
|
+
2. drop-in behavior suite
|
|
164
|
+
3. browser replay and trace parity
|
|
165
|
+
4. browser performance claimability
|
|
166
|
+
|
|
167
|
+
## Proposed source layout
|
|
168
|
+
|
|
169
|
+
Target layout:
|
|
170
|
+
|
|
171
|
+
```text
|
|
172
|
+
zig/src/core/
|
|
173
|
+
mod.zig
|
|
174
|
+
trace/
|
|
175
|
+
replay/
|
|
176
|
+
abi/
|
|
177
|
+
resource/
|
|
178
|
+
queue/
|
|
179
|
+
compute/
|
|
180
|
+
backend/common/
|
|
181
|
+
backend/{metal,vulkan,d3d12}/core/
|
|
182
|
+
|
|
183
|
+
zig/src/full/
|
|
184
|
+
mod.zig
|
|
185
|
+
render/
|
|
186
|
+
surface/
|
|
187
|
+
lifecycle/
|
|
188
|
+
backend/{metal,vulkan,d3d12}/full/
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
Matching Lean layout:
|
|
192
|
+
|
|
193
|
+
```text
|
|
194
|
+
lean/Fawn/Core/
|
|
195
|
+
lean/Fawn/Full/
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Matching package layout is currently:
|
|
199
|
+
|
|
200
|
+
1. one package with scoped exports
|
|
201
|
+
- `@simulatte/webgpu` => `full`
|
|
202
|
+
- `@simulatte/webgpu/compute` => compute-first subset
|
|
203
|
+
|
|
204
|
+
Separate packages remain optional later, but they are not the current shape.
|
|
205
|
+
The source boundary still comes first.
|
|
206
|
+
|
|
207
|
+
## Refactor order
|
|
208
|
+
|
|
209
|
+
Do not start by renaming packages.
|
|
210
|
+
|
|
211
|
+
Recommended order:
|
|
212
|
+
|
|
213
|
+
1. freeze support contracts
|
|
214
|
+
- define what `core` and `full` promise
|
|
215
|
+
2. add import-fence CI checks
|
|
216
|
+
- enforce the one-way dependency before extraction starts
|
|
217
|
+
3. add split coverage ledgers and split gate entrypoints
|
|
218
|
+
- even if both initially point at the current shared runtime
|
|
219
|
+
4. identify shared runtime modules that are genuinely `core`
|
|
220
|
+
- trace, replay, buffers, queue, compute, shared resource model
|
|
221
|
+
5. identify `full`-only modules
|
|
222
|
+
- render, surface, broader lifecycle/parity layers
|
|
223
|
+
6. extract `full` wrappers around `core` types
|
|
224
|
+
- composition only
|
|
225
|
+
7. move render/surface code out of the shared tree
|
|
226
|
+
- no behavior change intended during extraction
|
|
227
|
+
8. split package/API contracts
|
|
228
|
+
- only after the runtime boundary is real
|
|
229
|
+
9. retarget Chromium Track A to the full runtime artifact contract
|
|
230
|
+
- no npm package dependency in architecture docs
|
|
231
|
+
|
|
232
|
+
## Review checklist for future changes
|
|
233
|
+
|
|
234
|
+
Any future patch touching this split should answer:
|
|
235
|
+
|
|
236
|
+
1. does `core` now depend on `full` anywhere
|
|
237
|
+
2. did a `core` struct gain a `full`-only field
|
|
238
|
+
3. did a coverage or gate responsibility move without contract updates
|
|
239
|
+
4. did a browser-owned behavior get assigned to `core` or `full`
|
|
240
|
+
5. did packaging get ahead of the runtime boundary
|
|
241
|
+
|
|
242
|
+
If any answer is yes, the patch should be treated as architecture drift until
|
|
243
|
+
the contract is updated or the design is corrected.
|
|
244
|
+
|
|
245
|
+
## Immediate next artifacts
|
|
246
|
+
|
|
247
|
+
The earliest structural groundwork now exists: the inventory and import-fence check are in place, the first canonical `core/` and `full/` subtrees exist, and the legacy root command/resource files are compatibility façades.
|
|
248
|
+
|
|
249
|
+
The next technical artifacts should focus on the remaining semantic boundary:
|
|
250
|
+
|
|
251
|
+
1. split coverage contracts for `core` and `full`
|
|
252
|
+
2. classify tests and Lean proofs against the new `core`/`full` boundary
|
|
253
|
+
3. shrink the remaining public façade files:
|
|
254
|
+
- `model.zig`
|
|
255
|
+
- `webgpu_ffi.zig`
|
|
256
|
+
- backend root modules
|
|
257
|
+
4. retire root compatibility façades once callers stop importing them
|
|
258
|
+
|
|
259
|
+
The extraction work should now spend its effort on the remaining public boundary, not on re-moving files that already have canonical homes.
|
package/native/doe_napi.c
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
#include <stdlib.h>
|
|
16
16
|
#include <string.h>
|
|
17
17
|
#include <stdint.h>
|
|
18
|
+
#include <stdio.h>
|
|
18
19
|
|
|
19
20
|
#ifdef _WIN32
|
|
20
21
|
#include <windows.h>
|
|
@@ -884,6 +885,30 @@ static napi_value doe_buffer_get_mapped_range(napi_env env, napi_callback_info i
|
|
|
884
885
|
return ab;
|
|
885
886
|
}
|
|
886
887
|
|
|
888
|
+
/* bufferAssertMappedPrefixF32(buffer, expected, count) */
|
|
889
|
+
static napi_value doe_buffer_assert_mapped_prefix_f32(napi_env env, napi_callback_info info) {
|
|
890
|
+
NAPI_ASSERT_ARGC(env, info, 3);
|
|
891
|
+
CHECK_LIB_LOADED(env);
|
|
892
|
+
WGPUBuffer buf = unwrap_ptr(env, _args[0]);
|
|
893
|
+
double expected = 0.0;
|
|
894
|
+
uint32_t count = 0;
|
|
895
|
+
napi_get_value_double(env, _args[1], &expected);
|
|
896
|
+
napi_get_value_uint32(env, _args[2], &count);
|
|
897
|
+
if (!buf) NAPI_THROW(env, "bufferAssertMappedPrefixF32 requires buffer");
|
|
898
|
+
const float* mapped = (const float*)pfn_wgpuBufferGetConstMappedRange(buf, 0, count * sizeof(float));
|
|
899
|
+
if (!mapped) NAPI_THROW(env, "bufferAssertMappedPrefixF32: mapped range unavailable");
|
|
900
|
+
for (uint32_t i = 0; i < count; i++) {
|
|
901
|
+
if ((double)mapped[i] != expected) {
|
|
902
|
+
char msg[128];
|
|
903
|
+
snprintf(msg, sizeof(msg), "expected readback[%u] === %.0f, got %.9g", i, expected, (double)mapped[i]);
|
|
904
|
+
NAPI_THROW(env, msg);
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
napi_value ok;
|
|
908
|
+
napi_get_boolean(env, true, &ok);
|
|
909
|
+
return ok;
|
|
910
|
+
}
|
|
911
|
+
|
|
887
912
|
/* ================================================================
|
|
888
913
|
* Shader Module
|
|
889
914
|
* ================================================================ */
|
|
@@ -1376,8 +1401,8 @@ static napi_value doe_queue_flush(napi_env env, napi_callback_info info) {
|
|
|
1376
1401
|
}
|
|
1377
1402
|
|
|
1378
1403
|
/* submitBatched(device, queue, commandsArray)
|
|
1379
|
-
* Fast path: single dispatch +
|
|
1380
|
-
*
|
|
1404
|
+
* Fast path: single dispatch or dispatch+copy → doeNativeComputeDispatchFlush.
|
|
1405
|
+
* Larger or mixed batches stay on the standard wgpu path. */
|
|
1381
1406
|
#define BATCH_MAX_BIND_GROUPS 4
|
|
1382
1407
|
static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
1383
1408
|
NAPI_ASSERT_ARGC(env, info, 3);
|
|
@@ -1391,12 +1416,18 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1391
1416
|
napi_get_array_length(env, commands, &cmd_count);
|
|
1392
1417
|
if (cmd_count == 0) return NULL;
|
|
1393
1418
|
|
|
1394
|
-
/* Fast path: exactly
|
|
1395
|
-
if (pfn_doeNativeComputeDispatchFlush && cmd_count
|
|
1419
|
+
/* Fast path: exactly one dispatch, or dispatch followed by copy. */
|
|
1420
|
+
if (pfn_doeNativeComputeDispatchFlush && (cmd_count == 1 || cmd_count == 2)) {
|
|
1396
1421
|
napi_value cmd0;
|
|
1397
1422
|
napi_get_element(env, commands, 0, &cmd0);
|
|
1398
1423
|
uint32_t t0 = get_uint32_prop(env, cmd0, "t");
|
|
1399
|
-
|
|
1424
|
+
uint32_t t1 = UINT32_MAX;
|
|
1425
|
+
napi_value cmd1 = NULL;
|
|
1426
|
+
if (cmd_count == 2) {
|
|
1427
|
+
napi_get_element(env, commands, 1, &cmd1);
|
|
1428
|
+
t1 = get_uint32_prop(env, cmd1, "t");
|
|
1429
|
+
}
|
|
1430
|
+
if (t0 == 0 && (cmd_count == 1 || t1 == 1)) {
|
|
1400
1431
|
void* pipeline = unwrap_ptr(env, get_prop(env, cmd0, "p"));
|
|
1401
1432
|
napi_value bgs = get_prop(env, cmd0, "bg");
|
|
1402
1433
|
uint32_t bg_count = 0;
|
|
@@ -1411,20 +1442,17 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1411
1442
|
uint32_t dx = get_uint32_prop(env, cmd0, "x");
|
|
1412
1443
|
uint32_t dy = get_uint32_prop(env, cmd0, "y");
|
|
1413
1444
|
uint32_t dz = get_uint32_prop(env, cmd0, "z");
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
void* copy_dst = NULL;
|
|
1445
|
+
void* copy_src = NULL;
|
|
1446
|
+
uint64_t copy_src_off = 0;
|
|
1447
|
+
void* copy_dst = NULL;
|
|
1448
|
+
uint64_t copy_dst_off = 0;
|
|
1417
1449
|
uint64_t copy_size = 0;
|
|
1418
1450
|
if (cmd_count == 2) {
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
|
|
1425
|
-
copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
|
|
1426
|
-
copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
|
|
1427
|
-
}
|
|
1451
|
+
copy_src = unwrap_ptr(env, get_prop(env, cmd1, "s"));
|
|
1452
|
+
copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
|
|
1453
|
+
copy_src_off = (uint64_t)get_int64_prop(env, cmd1, "so");
|
|
1454
|
+
copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
|
|
1455
|
+
copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
|
|
1428
1456
|
}
|
|
1429
1457
|
pfn_doeNativeComputeDispatchFlush(
|
|
1430
1458
|
queue, pipeline, (void**)bg_ptrs, bg_count,
|
|
@@ -1435,6 +1463,16 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1435
1463
|
}
|
|
1436
1464
|
|
|
1437
1465
|
/* Fallback: standard wgpu path. */
|
|
1466
|
+
int flush_after_submit = 0;
|
|
1467
|
+
if (cmd_count == 2) {
|
|
1468
|
+
napi_value cmd0;
|
|
1469
|
+
napi_value cmd1;
|
|
1470
|
+
napi_get_element(env, commands, 0, &cmd0);
|
|
1471
|
+
napi_get_element(env, commands, 1, &cmd1);
|
|
1472
|
+
if (get_uint32_prop(env, cmd0, "t") == 0 && get_uint32_prop(env, cmd1, "t") == 1) {
|
|
1473
|
+
flush_after_submit = 1;
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1438
1476
|
WGPUCommandEncoder encoder = pfn_wgpuDeviceCreateCommandEncoder(device, NULL);
|
|
1439
1477
|
if (!encoder) NAPI_THROW(env, "submitBatched: createCommandEncoder failed");
|
|
1440
1478
|
for (uint32_t i = 0; i < cmd_count; i++) {
|
|
@@ -1470,11 +1508,64 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1470
1508
|
}
|
|
1471
1509
|
WGPUCommandBuffer cmd_buf = pfn_wgpuCommandEncoderFinish(encoder, NULL);
|
|
1472
1510
|
pfn_wgpuQueueSubmit(queue, 1, &cmd_buf);
|
|
1511
|
+
if (flush_after_submit && pfn_doeNativeQueueFlush) {
|
|
1512
|
+
pfn_doeNativeQueueFlush(queue);
|
|
1513
|
+
}
|
|
1473
1514
|
pfn_wgpuCommandBufferRelease(cmd_buf);
|
|
1474
1515
|
pfn_wgpuCommandEncoderRelease(encoder);
|
|
1475
1516
|
return NULL;
|
|
1476
1517
|
}
|
|
1477
1518
|
|
|
1519
|
+
/* submitComputeDispatchCopy(device, queue, pipeline, bindGroups, x, y, z, src, srcOff, dst, dstOff, size)
|
|
1520
|
+
* Direct addon surface for the exact package compute_e2e shape so JS runtimes
|
|
1521
|
+
* do not pay generic command-array parsing on every timed sample. */
|
|
1522
|
+
static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_info info) {
|
|
1523
|
+
size_t argc = 12;
|
|
1524
|
+
napi_value args[12];
|
|
1525
|
+
napi_status status = napi_get_cb_info(env, info, &argc, args, NULL, NULL);
|
|
1526
|
+
if (status != napi_ok || argc != 12) NAPI_THROW(env, "submitComputeDispatchCopy requires 12 arguments");
|
|
1527
|
+
CHECK_LIB_LOADED(env);
|
|
1528
|
+
WGPUDevice device = unwrap_ptr(env, args[0]);
|
|
1529
|
+
WGPUQueue queue = unwrap_ptr(env, args[1]);
|
|
1530
|
+
void* pipeline = unwrap_ptr(env, args[2]);
|
|
1531
|
+
napi_value bgs = args[3];
|
|
1532
|
+
uint32_t dx = 0;
|
|
1533
|
+
uint32_t dy = 0;
|
|
1534
|
+
uint32_t dz = 0;
|
|
1535
|
+
int64_t copy_src_off_i = 0;
|
|
1536
|
+
int64_t copy_dst_off_i = 0;
|
|
1537
|
+
int64_t copy_size_i = 0;
|
|
1538
|
+
napi_get_value_uint32(env, args[4], &dx);
|
|
1539
|
+
napi_get_value_uint32(env, args[5], &dy);
|
|
1540
|
+
napi_get_value_uint32(env, args[6], &dz);
|
|
1541
|
+
void* copy_src = unwrap_ptr(env, args[7]);
|
|
1542
|
+
napi_get_value_int64(env, args[8], ©_src_off_i);
|
|
1543
|
+
void* copy_dst = unwrap_ptr(env, args[9]);
|
|
1544
|
+
napi_get_value_int64(env, args[10], ©_dst_off_i);
|
|
1545
|
+
napi_get_value_int64(env, args[11], ©_size_i);
|
|
1546
|
+
uint64_t copy_src_off = (uint64_t)copy_src_off_i;
|
|
1547
|
+
uint64_t copy_dst_off = (uint64_t)copy_dst_off_i;
|
|
1548
|
+
uint64_t copy_size = (uint64_t)copy_size_i;
|
|
1549
|
+
if (!device || !queue || !pipeline) NAPI_THROW(env, "submitComputeDispatchCopy requires device, queue, and pipeline");
|
|
1550
|
+
if (!pfn_doeNativeComputeDispatchFlush) NAPI_THROW(env, "submitComputeDispatchCopy: doeNativeComputeDispatchFlush not available");
|
|
1551
|
+
|
|
1552
|
+
uint32_t bg_count = 0;
|
|
1553
|
+
napi_get_array_length(env, bgs, &bg_count);
|
|
1554
|
+
if (bg_count > BATCH_MAX_BIND_GROUPS) bg_count = BATCH_MAX_BIND_GROUPS;
|
|
1555
|
+
void* bg_ptrs[BATCH_MAX_BIND_GROUPS] = {NULL};
|
|
1556
|
+
for (uint32_t j = 0; j < bg_count; j++) {
|
|
1557
|
+
napi_value bg_val;
|
|
1558
|
+
napi_get_element(env, bgs, j, &bg_val);
|
|
1559
|
+
bg_ptrs[j] = unwrap_ptr(env, bg_val);
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
pfn_doeNativeComputeDispatchFlush(
|
|
1563
|
+
queue, pipeline, (void**)bg_ptrs, bg_count,
|
|
1564
|
+
dx, dy, dz,
|
|
1565
|
+
copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
|
|
1566
|
+
return NULL;
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1478
1569
|
/* flushAndMapSync(instance, queue, buffer, mode, offset, size) — flush + map in one N-API call. */
|
|
1479
1570
|
static napi_value doe_flush_and_map_sync(napi_env env, napi_callback_info info) {
|
|
1480
1571
|
NAPI_ASSERT_ARGC(env, info, 6);
|
|
@@ -1868,6 +1959,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
|
|
|
1868
1959
|
EXPORT_FN("bufferUnmap", doe_buffer_unmap),
|
|
1869
1960
|
EXPORT_FN("bufferMapSync", doe_buffer_map_sync),
|
|
1870
1961
|
EXPORT_FN("bufferGetMappedRange", doe_buffer_get_mapped_range),
|
|
1962
|
+
EXPORT_FN("bufferAssertMappedPrefixF32", doe_buffer_assert_mapped_prefix_f32),
|
|
1871
1963
|
EXPORT_FN("createShaderModule", doe_create_shader_module),
|
|
1872
1964
|
EXPORT_FN("shaderModuleRelease", doe_shader_module_release),
|
|
1873
1965
|
EXPORT_FN("createComputePipeline", doe_create_compute_pipeline),
|
|
@@ -1895,6 +1987,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
|
|
|
1895
1987
|
EXPORT_FN("queueWriteBuffer", doe_queue_write_buffer),
|
|
1896
1988
|
EXPORT_FN("queueFlush", doe_queue_flush),
|
|
1897
1989
|
EXPORT_FN("submitBatched", doe_submit_batched),
|
|
1990
|
+
EXPORT_FN("submitComputeDispatchCopy", doe_submit_compute_dispatch_copy),
|
|
1898
1991
|
EXPORT_FN("flushAndMapSync", doe_flush_and_map_sync),
|
|
1899
1992
|
EXPORT_FN("queueRelease", doe_queue_release),
|
|
1900
1993
|
EXPORT_FN("createTexture", doe_create_texture),
|
package/package.json
CHANGED
|
@@ -1,16 +1,32 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/webgpu",
|
|
3
|
-
"version": "0.2.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.2.4",
|
|
4
|
+
"description": "Headless WebGPU runtime for Node.js and Bun, powered by Doe",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/node-runtime.js",
|
|
7
|
+
"types": "./src/full.d.ts",
|
|
7
8
|
"exports": {
|
|
8
9
|
".": {
|
|
10
|
+
"types": "./src/full.d.ts",
|
|
9
11
|
"bun": "./src/bun.js",
|
|
10
12
|
"default": "./src/node-runtime.js"
|
|
11
13
|
},
|
|
12
|
-
"./bun":
|
|
13
|
-
|
|
14
|
+
"./bun": {
|
|
15
|
+
"types": "./src/full.d.ts",
|
|
16
|
+
"default": "./src/bun.js"
|
|
17
|
+
},
|
|
18
|
+
"./node": {
|
|
19
|
+
"types": "./src/full.d.ts",
|
|
20
|
+
"default": "./src/node-runtime.js"
|
|
21
|
+
},
|
|
22
|
+
"./compute": {
|
|
23
|
+
"types": "./src/compute.d.ts",
|
|
24
|
+
"default": "./src/compute.js"
|
|
25
|
+
},
|
|
26
|
+
"./full": {
|
|
27
|
+
"types": "./src/full.d.ts",
|
|
28
|
+
"default": "./src/full.js"
|
|
29
|
+
}
|
|
14
30
|
},
|
|
15
31
|
"bin": {
|
|
16
32
|
"fawn-webgpu-bench": "./bin/fawn-webgpu-bench.js",
|
|
@@ -25,8 +41,12 @@
|
|
|
25
41
|
"prebuilds/",
|
|
26
42
|
"binding.gyp",
|
|
27
43
|
"README.md",
|
|
28
|
-
"
|
|
29
|
-
"
|
|
44
|
+
"CHANGELOG.md",
|
|
45
|
+
"api-contract.md",
|
|
46
|
+
"compat-scope.md",
|
|
47
|
+
"support-contracts.md",
|
|
48
|
+
"layering-plan.md",
|
|
49
|
+
"zig-source-inventory.md",
|
|
30
50
|
"headless-webgpu-comparison.md",
|
|
31
51
|
"doe-build-metadata.schema.json",
|
|
32
52
|
"prebuild-metadata.schema.json"
|
|
@@ -36,7 +56,7 @@
|
|
|
36
56
|
"build:addon": "node-gyp rebuild",
|
|
37
57
|
"build:readme-assets": "node scripts/generate-readme-assets.js",
|
|
38
58
|
"prebuild": "node scripts/prebuild.js",
|
|
39
|
-
"test": "node ./test-node.js",
|
|
59
|
+
"test": "node ./test-node.js && node ./test-package-exports.js",
|
|
40
60
|
"test:bun": "bun ./test-bun.js",
|
|
41
61
|
"smoke": "node scripts/smoke-test.js"
|
|
42
62
|
},
|
|
@@ -60,5 +80,5 @@
|
|
|
60
80
|
"url": "https://github.com/clocksmith/fawn/issues"
|
|
61
81
|
},
|
|
62
82
|
"author": "Fawn",
|
|
63
|
-
"license": "
|
|
83
|
+
"license": "Apache-2.0"
|
|
64
84
|
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": 1,
|
|
3
3
|
"package": "@simulatte/webgpu",
|
|
4
|
-
"packageVersion": "0.2.
|
|
4
|
+
"packageVersion": "0.2.3",
|
|
5
5
|
"platform": "darwin",
|
|
6
6
|
"arch": "arm64",
|
|
7
7
|
"nodeNapiVersion": 8,
|
|
8
|
-
"doeVersion": "
|
|
8
|
+
"doeVersion": "68a193c88",
|
|
9
9
|
"doeBuild": {
|
|
10
10
|
"artifact": "libwebgpu_doe",
|
|
11
11
|
"leanVerifiedBuild": false,
|
|
@@ -13,14 +13,14 @@
|
|
|
13
13
|
},
|
|
14
14
|
"files": {
|
|
15
15
|
"doe_napi.node": {
|
|
16
|
-
"sha256": "
|
|
16
|
+
"sha256": "472c753c5c5bd82b60444bfcc1d3837bdbd40d1fce1b8281e6c706043bb64a84"
|
|
17
17
|
},
|
|
18
18
|
"libwebgpu_doe.dylib": {
|
|
19
|
-
"sha256": "
|
|
19
|
+
"sha256": "d9a66fa8fad7a8e50736778329ea5c011457c814238f96ecf230eb8eb97bcc64"
|
|
20
20
|
},
|
|
21
21
|
"libwebgpu_dawn.dylib": {
|
|
22
22
|
"sha256": "22751faeb459e7a2ec778c0410ca122e23c23366eb3da145c651d1d43e26707d"
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
|
-
"builtAt": "2026-03-
|
|
25
|
+
"builtAt": "2026-03-10T17:19:18.720Z"
|
|
26
26
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"package": "@simulatte/webgpu",
|
|
4
|
+
"packageVersion": "0.2.3",
|
|
5
|
+
"platform": "linux",
|
|
6
|
+
"arch": "x64",
|
|
7
|
+
"nodeNapiVersion": 8,
|
|
8
|
+
"doeVersion": "b09d34586",
|
|
9
|
+
"doeBuild": {
|
|
10
|
+
"artifact": "libwebgpu_doe",
|
|
11
|
+
"leanVerifiedBuild": false,
|
|
12
|
+
"proofArtifactSha256": null
|
|
13
|
+
},
|
|
14
|
+
"files": {
|
|
15
|
+
"doe_napi.node": {
|
|
16
|
+
"sha256": "21475bcd04b499e1a0ed6e75d8af2a7bead08d365ddfb09708d509d0de62bf28"
|
|
17
|
+
},
|
|
18
|
+
"libwebgpu_doe.so": {
|
|
19
|
+
"sha256": "28c5da84da65a5d6f4a3a74b9757279cddee738aaa93fe8ece6e358d4d94cdb1"
|
|
20
|
+
},
|
|
21
|
+
"libwebgpu_dawn.so": {
|
|
22
|
+
"sha256": "9d329301f59fbe85a190cee4faacde97f9c991b07264a18a6750b890899cc417"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"builtAt": "2026-03-10T21:30:16.241Z"
|
|
26
|
+
}
|
|
@@ -26,8 +26,8 @@ const SURFACE_SPECS = [
|
|
|
26
26
|
{
|
|
27
27
|
surface: 'bun_package',
|
|
28
28
|
title: 'Bun package lane',
|
|
29
|
-
supportLabel: '
|
|
30
|
-
preferredHostProfile: '
|
|
29
|
+
supportLabel: 'Validated support',
|
|
30
|
+
preferredHostProfile: 'mac_apple_silicon',
|
|
31
31
|
focusSets: ['compute_e2e', 'uploads'],
|
|
32
32
|
tone: 'right',
|
|
33
33
|
},
|
package/src/bun-ffi.js
CHANGED
|
@@ -157,6 +157,7 @@ function openLibrary(path) {
|
|
|
157
157
|
wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
|
|
158
158
|
wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
|
|
159
159
|
wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
|
|
160
|
+
doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
|
|
160
161
|
|
|
161
162
|
// Bind group layout / bind group / pipeline layout
|
|
162
163
|
wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
|
|
@@ -804,7 +805,7 @@ class DoeGPUComputePipeline {
|
|
|
804
805
|
constructor(native) { this._native = native; }
|
|
805
806
|
|
|
806
807
|
getBindGroupLayout(index) {
|
|
807
|
-
const layout = wgpu.symbols.
|
|
808
|
+
const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
|
|
808
809
|
return new DoeGPUBindGroupLayout(layout);
|
|
809
810
|
}
|
|
810
811
|
}
|
|
@@ -961,7 +962,7 @@ function ensureLibrary() {
|
|
|
961
962
|
if (libraryLoaded) return;
|
|
962
963
|
if (!DOE_LIB_PATH) {
|
|
963
964
|
throw new Error(
|
|
964
|
-
"@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd
|
|
965
|
+
"@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd zig && zig build dropin` or set DOE_WEBGPU_LIB."
|
|
965
966
|
);
|
|
966
967
|
}
|
|
967
968
|
wgpu = openLibrary(DOE_LIB_PATH);
|
package/src/bun.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export * from "./
|
|
2
|
-
export { default } from "./
|
|
1
|
+
export * from "./full.js";
|
|
2
|
+
export { default } from "./full.js";
|