@simulatte/webgpu 0.2.4 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/README.md +263 -71
- package/api-contract.md +70 -139
- package/assets/package-layers.svg +63 -0
- package/examples/direct-webgpu/compute-dispatch.js +66 -0
- package/examples/direct-webgpu/explicit-bind-group.js +85 -0
- package/examples/direct-webgpu/request-device.js +10 -0
- package/examples/doe-api/buffers-readback.js +9 -0
- package/examples/doe-api/compile-and-dispatch.js +30 -0
- package/examples/doe-api/compute-dispatch.js +25 -0
- package/examples/doe-routines/compute-once-like-input.js +36 -0
- package/examples/doe-routines/compute-once-matmul.js +53 -0
- package/examples/doe-routines/compute-once-multiple-inputs.js +27 -0
- package/examples/doe-routines/compute-once.js +23 -0
- package/headless-webgpu-comparison.md +2 -2
- package/layering-plan.md +1 -1
- package/native/doe_napi.c +102 -12
- package/package.json +2 -1
- package/prebuilds/darwin-arm64/doe_napi.node +0 -0
- package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
- package/prebuilds/darwin-arm64/metadata.json +6 -6
- package/prebuilds/linux-x64/doe_napi.node +0 -0
- package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
- package/prebuilds/linux-x64/metadata.json +5 -5
- package/scripts/generate-readme-assets.js +79 -6
- package/scripts/prebuild.js +23 -19
- package/src/auto_bind_group_layout.js +32 -0
- package/src/bun-ffi.js +93 -12
- package/src/bun.js +23 -2
- package/src/compute.d.ts +2 -1
- package/src/compute.js +671 -33
- package/src/doe.d.ts +127 -27
- package/src/doe.js +480 -114
- package/src/full.d.ts +8 -1
- package/src/full.js +28 -3
- package/src/index.js +1013 -38
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
|
|
5
|
+
const result = await gpu.compute.once({
|
|
6
|
+
code: `
|
|
7
|
+
@group(0) @binding(0) var<storage, read> lhs: array<f32>;
|
|
8
|
+
@group(0) @binding(1) var<storage, read> rhs: array<f32>;
|
|
9
|
+
@group(0) @binding(2) var<storage, read_write> dst: array<f32>;
|
|
10
|
+
|
|
11
|
+
@compute @workgroup_size(4)
|
|
12
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
13
|
+
let i = gid.x;
|
|
14
|
+
dst[i] = lhs[i] + rhs[i];
|
|
15
|
+
}
|
|
16
|
+
`,
|
|
17
|
+
inputs: [
|
|
18
|
+
new Float32Array([1, 2, 3, 4]),
|
|
19
|
+
new Float32Array([10, 20, 30, 40]),
|
|
20
|
+
],
|
|
21
|
+
output: {
|
|
22
|
+
type: Float32Array,
|
|
23
|
+
},
|
|
24
|
+
workgroups: 1,
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { doe } from "@simulatte/webgpu/compute";
|
|
2
|
+
|
|
3
|
+
const gpu = await doe.requestDevice();
|
|
4
|
+
|
|
5
|
+
const result = await gpu.compute.once({
|
|
6
|
+
code: `
|
|
7
|
+
@group(0) @binding(0) var<storage, read> src: array<f32>;
|
|
8
|
+
@group(0) @binding(1) var<storage, read_write> dst: array<f32>;
|
|
9
|
+
|
|
10
|
+
@compute @workgroup_size(4)
|
|
11
|
+
fn main(@builtin(global_invocation_id) gid: vec3u) {
|
|
12
|
+
let i = gid.x;
|
|
13
|
+
dst[i] = src[i] * 3.0;
|
|
14
|
+
}
|
|
15
|
+
`,
|
|
16
|
+
inputs: [new Float32Array([1, 2, 3, 4])],
|
|
17
|
+
output: {
|
|
18
|
+
type: Float32Array,
|
|
19
|
+
},
|
|
20
|
+
workgroups: 1,
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
console.log(JSON.stringify(Array.from(result)));
|
|
@@ -7,7 +7,7 @@ This document outlines qualitative differences and target use-cases for headless
|
|
|
7
7
|
| **Underlying Engine** | `libwebgpu_doe` (Zig + Lean pipeline) | Google Dawn (C++) | Google Dawn (C++) |
|
|
8
8
|
| **Primary Focus** | Deterministic Compute, ML/AI, Verifiability | Browser Parity, Graphics | Browser Parity, Graphics |
|
|
9
9
|
| **Binary Footprint** | Smaller targeted runtime expected | Varies by build/distribution | Varies by build/distribution |
|
|
10
|
-
| **JS Binding Layer** | Node-
|
|
10
|
+
| **JS Binding Layer** | Node addon-backed path; Bun uses FFI on Linux and full/addon-backed path on macOS today | Node-API (N-API) | Bun FFI (Fast Foreign Function) |
|
|
11
11
|
| **Security Model** | Explicit schema/gate discipline in Fawn pipeline | Runtime heuristics + Dawn validation | Runtime heuristics + Dawn validation |
|
|
12
12
|
| **Resource Allocation** | Arena-backed, predictable memory | General WebGPU async allocations | General WebGPU async allocations |
|
|
13
13
|
| **WebGPU Spec Compliance**| Compute-prioritized subset target | Broad Chromium-aligned coverage | Broad Chromium-aligned coverage |
|
|
@@ -17,7 +17,7 @@ This document outlines qualitative differences and target use-cases for headless
|
|
|
17
17
|
## Architectural Takeaways for Fawn
|
|
18
18
|
|
|
19
19
|
1. Determinism and fail-fast contracts are the intended Doe value proposition for benchmarking workflows.
|
|
20
|
-
2. The package currently
|
|
20
|
+
2. The package currently uses a platform-split Bun path: Linux uses the direct FFI route, while macOS uses the addon-backed full path for correctness parity. FFI may reduce wrapper overhead, but end-to-end results must be measured per workload.
|
|
21
21
|
3. Distribution size and startup claims must be backed by measured artifacts before release claims.
|
|
22
22
|
|
|
23
23
|
## Ecosystem reference: official/community competitors and stats
|
package/layering-plan.md
CHANGED
|
@@ -37,7 +37,7 @@ Current reality:
|
|
|
37
37
|
4. Canonical texture command handling now lives in `zig/src/core/resource/wgpu_texture_commands.zig`; canonical sampler and surface command handling now lives in `zig/src/full/render/wgpu_sampler_commands.zig` and `zig/src/full/surface/wgpu_surface_commands.zig`.
|
|
38
38
|
5. `zig/src/wgpu_commands.zig`, `zig/src/wgpu_resources.zig`, and `zig/src/wgpu_extended_commands.zig` are now compatibility façades over the canonical subtrees, while `zig/src/webgpu_ffi.zig` remains the public façade and owner of `WebGPUBackend`.
|
|
39
39
|
6. Dedicated Zig test lanes now exist as `zig build test-core` and `zig build test-full`, but split coverage remains thin and capability tracking is still represented by one shared coverage ledger.
|
|
40
|
-
7. The JS package now exposes a default `full` surface plus an explicit `compute` subpath, while the underlying JS implementation is still shared.
|
|
40
|
+
7. The JS package now exposes a default `full` surface plus an explicit `compute` subpath, while the underlying JS implementation is still shared and presented through `Direct WebGPU`, `Doe API`, and `Doe routines` styles.
|
|
41
41
|
|
|
42
42
|
That means this plan is now materially physicalized in the tree, and the remaining semantic split is concentrated in the public façade files and backend roots.
|
|
43
43
|
|
package/native/doe_napi.c
CHANGED
|
@@ -60,6 +60,8 @@ typedef uint32_t WGPUBool;
|
|
|
60
60
|
#define WGPU_WHOLE_SIZE UINT64_MAX
|
|
61
61
|
#define WGPU_STYPE_SHADER_SOURCE_WGSL 0x00000002
|
|
62
62
|
#define WGPU_WAIT_STATUS_SUCCESS 1
|
|
63
|
+
#define WGPU_WAIT_STATUS_TIMED_OUT 2
|
|
64
|
+
#define WGPU_WAIT_STATUS_ERROR 3
|
|
63
65
|
#define WGPU_MAP_ASYNC_STATUS_SUCCESS 1
|
|
64
66
|
#define WGPU_REQUEST_STATUS_SUCCESS 1
|
|
65
67
|
#define WGPU_CALLBACK_MODE_ALLOW_PROCESS_EVENTS 2
|
|
@@ -824,6 +826,11 @@ typedef struct {
|
|
|
824
826
|
uint32_t done;
|
|
825
827
|
} BufferMapResult;
|
|
826
828
|
|
|
829
|
+
typedef struct {
|
|
830
|
+
uint32_t status;
|
|
831
|
+
uint32_t done;
|
|
832
|
+
} QueueWorkDoneResult;
|
|
833
|
+
|
|
827
834
|
static void buffer_map_callback(uint32_t status, WGPUStringView message,
|
|
828
835
|
void* userdata1, void* userdata2) {
|
|
829
836
|
(void)message; (void)userdata2;
|
|
@@ -832,6 +839,14 @@ static void buffer_map_callback(uint32_t status, WGPUStringView message,
|
|
|
832
839
|
r->done = 1;
|
|
833
840
|
}
|
|
834
841
|
|
|
842
|
+
static void queue_work_done_callback(uint32_t status, WGPUStringView message,
|
|
843
|
+
void* userdata1, void* userdata2) {
|
|
844
|
+
(void)message; (void)userdata2;
|
|
845
|
+
QueueWorkDoneResult* r = (QueueWorkDoneResult*)userdata1;
|
|
846
|
+
r->status = status;
|
|
847
|
+
r->done = 1;
|
|
848
|
+
}
|
|
849
|
+
|
|
835
850
|
/* bufferMapSync(instance, buffer, mode, offset, size) */
|
|
836
851
|
static napi_value doe_buffer_map_sync(napi_env env, napi_callback_info info) {
|
|
837
852
|
NAPI_ASSERT_ARGC(env, info, 5);
|
|
@@ -1387,16 +1402,58 @@ static napi_value doe_queue_write_buffer(napi_env env, napi_callback_info info)
|
|
|
1387
1402
|
return NULL;
|
|
1388
1403
|
}
|
|
1389
1404
|
|
|
1390
|
-
/* queueFlush(queue) — wait for all pending GPU work to complete.
|
|
1391
|
-
*
|
|
1392
|
-
*
|
|
1405
|
+
/* queueFlush(instance, queue) — wait for all pending GPU work to complete.
|
|
1406
|
+
* Use the Doe-native queue flush when available; otherwise fall back to the
|
|
1407
|
+
* portable queue work-done callback path and process events until completion. */
|
|
1393
1408
|
static napi_value doe_queue_flush(napi_env env, napi_callback_info info) {
|
|
1394
|
-
NAPI_ASSERT_ARGC(env, info,
|
|
1409
|
+
NAPI_ASSERT_ARGC(env, info, 2);
|
|
1395
1410
|
CHECK_LIB_LOADED(env);
|
|
1396
|
-
|
|
1411
|
+
WGPUInstance inst = unwrap_ptr(env, _args[0]);
|
|
1412
|
+
WGPUQueue queue = unwrap_ptr(env, _args[1]);
|
|
1397
1413
|
if (!queue) NAPI_THROW(env, "queueFlush requires queue");
|
|
1398
|
-
if (
|
|
1399
|
-
|
|
1414
|
+
if (pfn_doeNativeQueueFlush) {
|
|
1415
|
+
pfn_doeNativeQueueFlush(queue);
|
|
1416
|
+
return NULL;
|
|
1417
|
+
}
|
|
1418
|
+
if (!inst) NAPI_THROW(env, "queueFlush requires instance when doeNativeQueueFlush is unavailable");
|
|
1419
|
+
|
|
1420
|
+
QueueWorkDoneResult result = {0, 0};
|
|
1421
|
+
WGPUQueueWorkDoneCallbackInfo cb_info = {
|
|
1422
|
+
.nextInChain = NULL,
|
|
1423
|
+
.mode = WGPU_CALLBACK_MODE_WAIT_ANY_ONLY,
|
|
1424
|
+
.callback = queue_work_done_callback,
|
|
1425
|
+
.userdata1 = &result,
|
|
1426
|
+
.userdata2 = NULL,
|
|
1427
|
+
};
|
|
1428
|
+
|
|
1429
|
+
WGPUFuture future = pfn_wgpuQueueOnSubmittedWorkDone(queue, cb_info);
|
|
1430
|
+
if (future.id == 0) NAPI_THROW(env, "queueFlush: queue work-done future unavailable");
|
|
1431
|
+
uint64_t start_ns = monotonic_now_ns();
|
|
1432
|
+
while (!result.done) {
|
|
1433
|
+
WGPUFutureWaitInfo wait_info = {
|
|
1434
|
+
.future = future,
|
|
1435
|
+
.completed = 0,
|
|
1436
|
+
};
|
|
1437
|
+
uint32_t wait_status = pfn_wgpuInstanceWaitAny(inst, 1, &wait_info, 0);
|
|
1438
|
+
if (wait_status == WGPU_WAIT_STATUS_SUCCESS) {
|
|
1439
|
+
if (!result.done) {
|
|
1440
|
+
pfn_wgpuInstanceProcessEvents(inst);
|
|
1441
|
+
}
|
|
1442
|
+
} else if (wait_status == WGPU_WAIT_STATUS_TIMED_OUT) {
|
|
1443
|
+
pfn_wgpuInstanceProcessEvents(inst);
|
|
1444
|
+
if (monotonic_now_ns() - start_ns >= DOE_DEFAULT_TIMEOUT_NS) {
|
|
1445
|
+
NAPI_THROW(env, "queueFlush: queue wait timed out");
|
|
1446
|
+
}
|
|
1447
|
+
wait_slice();
|
|
1448
|
+
} else if (wait_status == WGPU_WAIT_STATUS_ERROR) {
|
|
1449
|
+
NAPI_THROW(env, "queueFlush: wgpuInstanceWaitAny failed");
|
|
1450
|
+
} else {
|
|
1451
|
+
NAPI_THROW(env, "queueFlush: unsupported wait status");
|
|
1452
|
+
}
|
|
1453
|
+
}
|
|
1454
|
+
if (result.status != WGPU_QUEUE_WORK_DONE_STATUS_SUCCESS) {
|
|
1455
|
+
NAPI_THROW(env, "queueFlush: queue work did not complete");
|
|
1456
|
+
}
|
|
1400
1457
|
return NULL;
|
|
1401
1458
|
}
|
|
1402
1459
|
|
|
@@ -1547,7 +1604,6 @@ static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_i
|
|
|
1547
1604
|
uint64_t copy_dst_off = (uint64_t)copy_dst_off_i;
|
|
1548
1605
|
uint64_t copy_size = (uint64_t)copy_size_i;
|
|
1549
1606
|
if (!device || !queue || !pipeline) NAPI_THROW(env, "submitComputeDispatchCopy requires device, queue, and pipeline");
|
|
1550
|
-
if (!pfn_doeNativeComputeDispatchFlush) NAPI_THROW(env, "submitComputeDispatchCopy: doeNativeComputeDispatchFlush not available");
|
|
1551
1607
|
|
|
1552
1608
|
uint32_t bg_count = 0;
|
|
1553
1609
|
napi_get_array_length(env, bgs, &bg_count);
|
|
@@ -1559,10 +1615,44 @@ static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_i
|
|
|
1559
1615
|
bg_ptrs[j] = unwrap_ptr(env, bg_val);
|
|
1560
1616
|
}
|
|
1561
1617
|
|
|
1562
|
-
pfn_doeNativeComputeDispatchFlush
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1618
|
+
if (pfn_doeNativeComputeDispatchFlush) {
|
|
1619
|
+
pfn_doeNativeComputeDispatchFlush(
|
|
1620
|
+
queue, pipeline, (void**)bg_ptrs, bg_count,
|
|
1621
|
+
dx, dy, dz,
|
|
1622
|
+
copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
|
|
1623
|
+
return NULL;
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
WGPUCommandEncoder encoder = pfn_wgpuDeviceCreateCommandEncoder(device, NULL);
|
|
1627
|
+
if (!encoder) NAPI_THROW(env, "submitComputeDispatchCopy: createCommandEncoder failed");
|
|
1628
|
+
WGPUComputePassEncoder pass = pfn_wgpuCommandEncoderBeginComputePass(encoder, NULL);
|
|
1629
|
+
if (!pass) {
|
|
1630
|
+
pfn_wgpuCommandEncoderRelease(encoder);
|
|
1631
|
+
NAPI_THROW(env, "submitComputeDispatchCopy: beginComputePass failed");
|
|
1632
|
+
}
|
|
1633
|
+
pfn_wgpuComputePassEncoderSetPipeline(pass, pipeline);
|
|
1634
|
+
for (uint32_t j = 0; j < bg_count; j++) {
|
|
1635
|
+
if (bg_ptrs[j]) pfn_wgpuComputePassEncoderSetBindGroup(pass, j, bg_ptrs[j], 0, NULL);
|
|
1636
|
+
}
|
|
1637
|
+
pfn_wgpuComputePassEncoderDispatchWorkgroups(pass, dx, dy, dz);
|
|
1638
|
+
pfn_wgpuComputePassEncoderEnd(pass);
|
|
1639
|
+
pfn_wgpuComputePassEncoderRelease(pass);
|
|
1640
|
+
pfn_wgpuCommandEncoderCopyBufferToBuffer(
|
|
1641
|
+
encoder,
|
|
1642
|
+
copy_src,
|
|
1643
|
+
copy_src_off,
|
|
1644
|
+
copy_dst,
|
|
1645
|
+
copy_dst_off,
|
|
1646
|
+
copy_size
|
|
1647
|
+
);
|
|
1648
|
+
WGPUCommandBuffer cmd_buf = pfn_wgpuCommandEncoderFinish(encoder, NULL);
|
|
1649
|
+
if (!cmd_buf) {
|
|
1650
|
+
pfn_wgpuCommandEncoderRelease(encoder);
|
|
1651
|
+
NAPI_THROW(env, "submitComputeDispatchCopy: finish failed");
|
|
1652
|
+
}
|
|
1653
|
+
pfn_wgpuQueueSubmit(queue, 1, &cmd_buf);
|
|
1654
|
+
pfn_wgpuCommandBufferRelease(cmd_buf);
|
|
1655
|
+
pfn_wgpuCommandEncoderRelease(encoder);
|
|
1566
1656
|
return NULL;
|
|
1567
1657
|
}
|
|
1568
1658
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/webgpu",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Headless WebGPU runtime for Node.js and Bun, powered by Doe",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/node-runtime.js",
|
|
@@ -35,6 +35,7 @@
|
|
|
35
35
|
"files": [
|
|
36
36
|
"assets/",
|
|
37
37
|
"bin/",
|
|
38
|
+
"examples/",
|
|
38
39
|
"src/",
|
|
39
40
|
"scripts/",
|
|
40
41
|
"native/",
|
|
Binary file
|
|
Binary file
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": 1,
|
|
3
3
|
"package": "@simulatte/webgpu",
|
|
4
|
-
"packageVersion": "0.
|
|
4
|
+
"packageVersion": "0.3.1",
|
|
5
5
|
"platform": "darwin",
|
|
6
6
|
"arch": "arm64",
|
|
7
7
|
"nodeNapiVersion": 8,
|
|
8
|
-
"doeVersion": "
|
|
8
|
+
"doeVersion": "dae96664a",
|
|
9
9
|
"doeBuild": {
|
|
10
10
|
"artifact": "libwebgpu_doe",
|
|
11
11
|
"leanVerifiedBuild": false,
|
|
@@ -13,14 +13,14 @@
|
|
|
13
13
|
},
|
|
14
14
|
"files": {
|
|
15
15
|
"doe_napi.node": {
|
|
16
|
-
"sha256": "
|
|
16
|
+
"sha256": "4baee8a48487e8725b7b6bbccd269da4b4868d28c4d4a26e8ee075f3a8d1e75a"
|
|
17
17
|
},
|
|
18
18
|
"libwebgpu_doe.dylib": {
|
|
19
|
-
"sha256": "
|
|
19
|
+
"sha256": "1598ea11e54c7087ebe9020c07b5bbc0f7d376927b20edac3ed9ca08a0fe6789"
|
|
20
20
|
},
|
|
21
21
|
"libwebgpu_dawn.dylib": {
|
|
22
|
-
"sha256": "
|
|
22
|
+
"sha256": "828fb9ef95b08b4192cb9e84392e432e0d0a4ebca90df678e076e2e3c731b82e"
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
|
-
"builtAt": "2026-03-
|
|
25
|
+
"builtAt": "2026-03-11T18:42:09.399Z"
|
|
26
26
|
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": 1,
|
|
3
3
|
"package": "@simulatte/webgpu",
|
|
4
|
-
"packageVersion": "0.
|
|
4
|
+
"packageVersion": "0.3.1",
|
|
5
5
|
"platform": "linux",
|
|
6
6
|
"arch": "x64",
|
|
7
7
|
"nodeNapiVersion": 8,
|
|
8
|
-
"doeVersion": "
|
|
8
|
+
"doeVersion": "1010cdc41",
|
|
9
9
|
"doeBuild": {
|
|
10
10
|
"artifact": "libwebgpu_doe",
|
|
11
11
|
"leanVerifiedBuild": false,
|
|
@@ -13,14 +13,14 @@
|
|
|
13
13
|
},
|
|
14
14
|
"files": {
|
|
15
15
|
"doe_napi.node": {
|
|
16
|
-
"sha256": "
|
|
16
|
+
"sha256": "08e6019b07dee38ac7140c27d4c1e10237fec5778e68506fcf85ea85dee797ec"
|
|
17
17
|
},
|
|
18
18
|
"libwebgpu_doe.so": {
|
|
19
|
-
"sha256": "
|
|
19
|
+
"sha256": "ee4f0fcd43751574bc5570492b27a911408e31c90b2c91bd4ee4af09a8f7f5cb"
|
|
20
20
|
},
|
|
21
21
|
"libwebgpu_dawn.so": {
|
|
22
22
|
"sha256": "9d329301f59fbe85a190cee4faacde97f9c991b07264a18a6750b890899cc417"
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
|
-
"builtAt": "2026-03-
|
|
25
|
+
"builtAt": "2026-03-11T18:05:22.324Z"
|
|
26
26
|
}
|
|
@@ -8,7 +8,8 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
|
8
8
|
const PACKAGE_ROOT = resolve(__dirname, '..');
|
|
9
9
|
const WORKSPACE_ROOT = resolve(PACKAGE_ROOT, '..', '..');
|
|
10
10
|
const CUBE_SUMMARY_PATH = resolve(WORKSPACE_ROOT, 'bench', 'out', 'cube', 'latest', 'cube.summary.json');
|
|
11
|
-
const
|
|
11
|
+
const CUBE_OUTPUT_PATH = resolve(PACKAGE_ROOT, 'assets', 'package-surface-cube-snapshot.svg');
|
|
12
|
+
const LAYERS_OUTPUT_PATH = resolve(PACKAGE_ROOT, 'assets', 'package-layers.svg');
|
|
12
13
|
|
|
13
14
|
const UI_FONT = '"Segoe UI", "Helvetica Neue", Arial, sans-serif';
|
|
14
15
|
const MONO_FONT = 'SFMono-Regular, Menlo, Consolas, "Liberation Mono", monospace';
|
|
@@ -256,15 +257,87 @@ ${renderSurfaceCard(SURFACE_SPECS[1], bunCells, 640)}
|
|
|
256
257
|
`;
|
|
257
258
|
}
|
|
258
259
|
|
|
260
|
+
function renderLayersSvg() {
|
|
261
|
+
return `<svg xmlns="http://www.w3.org/2000/svg" width="1200" height="470" viewBox="0 0 1200 470" role="img" aria-labelledby="layers-title layers-desc">
|
|
262
|
+
<title id="layers-title">@simulatte/webgpu layered package graph</title>
|
|
263
|
+
<desc id="layers-desc">Layered package graph showing direct WebGPU, Doe API, and Doe routines over the same package surfaces.</desc>
|
|
264
|
+
<defs>
|
|
265
|
+
<linearGradient id="layers-bg" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
266
|
+
<stop offset="0%" stop-color="#050816"/>
|
|
267
|
+
<stop offset="100%" stop-color="#140c1f"/>
|
|
268
|
+
</linearGradient>
|
|
269
|
+
<radialGradient id="layers-glow-top" cx="25%" cy="18%" r="55%">
|
|
270
|
+
<stop offset="0%" stop-color="#ef444430"/>
|
|
271
|
+
<stop offset="55%" stop-color="#7c3aed18"/>
|
|
272
|
+
<stop offset="100%" stop-color="#00000000"/>
|
|
273
|
+
</radialGradient>
|
|
274
|
+
<radialGradient id="layers-glow-bottom" cx="78%" cy="84%" r="52%">
|
|
275
|
+
<stop offset="0%" stop-color="#f59e0b26"/>
|
|
276
|
+
<stop offset="60%" stop-color="#f9731618"/>
|
|
277
|
+
<stop offset="100%" stop-color="#00000000"/>
|
|
278
|
+
</radialGradient>
|
|
279
|
+
<linearGradient id="layers-root" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
280
|
+
<stop offset="0%" stop-color="#7c3aed"/>
|
|
281
|
+
<stop offset="100%" stop-color="#ef4444"/>
|
|
282
|
+
</linearGradient>
|
|
283
|
+
<linearGradient id="layers-direct" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
284
|
+
<stop offset="0%" stop-color="#ef4444"/>
|
|
285
|
+
<stop offset="100%" stop-color="#f97316"/>
|
|
286
|
+
</linearGradient>
|
|
287
|
+
<linearGradient id="layers-api" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
288
|
+
<stop offset="0%" stop-color="#f97316"/>
|
|
289
|
+
<stop offset="100%" stop-color="#f59e0b"/>
|
|
290
|
+
</linearGradient>
|
|
291
|
+
<linearGradient id="layers-routines" x1="0%" y1="0%" x2="100%" y2="100%">
|
|
292
|
+
<stop offset="0%" stop-color="#f59e0b"/>
|
|
293
|
+
<stop offset="100%" stop-color="#eab308"/>
|
|
294
|
+
</linearGradient>
|
|
295
|
+
<filter id="shadow" x="-20%" y="-20%" width="140%" height="140%">
|
|
296
|
+
<feDropShadow dx="0" dy="10" stdDeviation="14" flood-color="#000000" flood-opacity="0.32"/>
|
|
297
|
+
</filter>
|
|
298
|
+
<style>
|
|
299
|
+
.title { font: 700 34px ${UI_FONT}; fill: #ffffff; ${TEXT_STROKE} }
|
|
300
|
+
.subtitle { font: 500 18px ${UI_FONT}; fill: #cbd5e1; ${TEXT_STROKE} }
|
|
301
|
+
.nodeTitle { font: 700 22px ${UI_FONT}; fill: #ffffff; ${TEXT_STROKE} }
|
|
302
|
+
.box { stroke-width: 2.5; filter: url(#shadow); }
|
|
303
|
+
</style>
|
|
304
|
+
</defs>
|
|
305
|
+
<rect width="1200" height="470" fill="url(#layers-bg)"/>
|
|
306
|
+
<rect width="1200" height="470" fill="url(#layers-glow-top)"/>
|
|
307
|
+
<rect width="1200" height="470" fill="url(#layers-glow-bottom)"/>
|
|
308
|
+
<text x="64" y="62" class="title">Same package, four layers</text>
|
|
309
|
+
<text x="64" y="94" class="subtitle">The package surface stays the same while the API gets progressively higher-level.</text>
|
|
310
|
+
|
|
311
|
+
<rect x="170" y="122" width="860" height="64" rx="20" fill="url(#layers-root)" stroke="#c4b5fd" class="box"/>
|
|
312
|
+
<text x="600" y="162" text-anchor="middle" class="nodeTitle">@simulatte/webgpu / @simulatte/webgpu/compute</text>
|
|
313
|
+
|
|
314
|
+
<rect x="220" y="222" width="760" height="52" rx="18" fill="url(#layers-direct)" stroke="#fca5a5" class="box"/>
|
|
315
|
+
<text x="600" y="255" text-anchor="middle" class="nodeTitle">Direct WebGPU</text>
|
|
316
|
+
|
|
317
|
+
<rect x="280" y="310" width="640" height="52" rx="18" fill="url(#layers-api)" stroke="#fdba74" class="box"/>
|
|
318
|
+
<text x="600" y="343" text-anchor="middle" class="nodeTitle">Doe API</text>
|
|
319
|
+
|
|
320
|
+
<rect x="360" y="398" width="480" height="52" rx="18" fill="url(#layers-routines)" stroke="#fde68a" class="box"/>
|
|
321
|
+
<text x="600" y="431" text-anchor="middle" class="nodeTitle">Doe routines</text>
|
|
322
|
+
</svg>
|
|
323
|
+
`;
|
|
324
|
+
}
|
|
325
|
+
|
|
259
326
|
function main() {
|
|
260
327
|
const summary = readCubeSummary(CUBE_SUMMARY_PATH);
|
|
261
|
-
const
|
|
262
|
-
|
|
328
|
+
const cubeSvg = renderSvg(summary);
|
|
329
|
+
const layersSvg = renderLayersSvg();
|
|
330
|
+
mkdirSync(dirname(CUBE_OUTPUT_PATH), { recursive: true });
|
|
331
|
+
writeFileSync(
|
|
332
|
+
CUBE_OUTPUT_PATH,
|
|
333
|
+
`<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${cubeSvg}`,
|
|
334
|
+
);
|
|
335
|
+
console.log(`Wrote ${CUBE_OUTPUT_PATH}`);
|
|
263
336
|
writeFileSync(
|
|
264
|
-
|
|
265
|
-
`<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${
|
|
337
|
+
LAYERS_OUTPUT_PATH,
|
|
338
|
+
`<!-- Generated by scripts/generate-readme-assets.js. Do not edit by hand. -->\n${layersSvg}`,
|
|
266
339
|
);
|
|
267
|
-
console.log(`Wrote ${
|
|
340
|
+
console.log(`Wrote ${LAYERS_OUTPUT_PATH}`);
|
|
268
341
|
}
|
|
269
342
|
|
|
270
343
|
main();
|
package/scripts/prebuild.js
CHANGED
|
@@ -60,7 +60,7 @@ function copyArtifact(src, destName) {
|
|
|
60
60
|
const dest = resolve(prebuildDir, destName);
|
|
61
61
|
copyFileSync(src, dest);
|
|
62
62
|
console.log(` ${destName} <- ${src}`);
|
|
63
|
-
return { name: destName,
|
|
63
|
+
return { name: destName, path: dest };
|
|
64
64
|
}
|
|
65
65
|
|
|
66
66
|
// 1. Build addon if needed.
|
|
@@ -122,13 +122,31 @@ console.log(`\nAssembling prebuilds/${platform}-${arch}/`);
|
|
|
122
122
|
|
|
123
123
|
const files = {};
|
|
124
124
|
const addonEntry = copyArtifact(addonSrc, 'doe_napi.node');
|
|
125
|
-
if (addonEntry) files[addonEntry.name] = {
|
|
125
|
+
if (addonEntry) files[addonEntry.name] = { path: addonEntry.path };
|
|
126
126
|
|
|
127
127
|
const doeEntry = copyArtifact(doeLib, `libwebgpu_doe.${ext}`);
|
|
128
|
-
if (doeEntry) files[doeEntry.name] = {
|
|
128
|
+
if (doeEntry) files[doeEntry.name] = { path: doeEntry.path };
|
|
129
129
|
|
|
130
130
|
const sidecarEntry = copyArtifact(sidecarSrc, sidecarName);
|
|
131
|
-
if (sidecarEntry) files[sidecarEntry.name] = {
|
|
131
|
+
if (sidecarEntry) files[sidecarEntry.name] = { path: sidecarEntry.path };
|
|
132
|
+
|
|
133
|
+
// macOS: ad-hoc sign dylibs for distribution.
|
|
134
|
+
if (platform === 'darwin') {
|
|
135
|
+
console.log('\nSigning dylibs (ad-hoc)...');
|
|
136
|
+
for (const name of Object.keys(files)) {
|
|
137
|
+
if (name.endsWith('.dylib')) {
|
|
138
|
+
try {
|
|
139
|
+
execFileSync('codesign', ['-s', '-', resolve(prebuildDir, name)], { stdio: 'inherit' });
|
|
140
|
+
} catch {
|
|
141
|
+
console.warn(` Warning: codesign failed for ${name} (may already be signed)`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const metadataFiles = Object.fromEntries(
|
|
148
|
+
Object.entries(files).map(([name, entry]) => [name, { sha256: sha256(entry.path) }]),
|
|
149
|
+
);
|
|
132
150
|
|
|
133
151
|
// 5. Write metadata manifest.
|
|
134
152
|
const pkg = JSON.parse(readFileSync(resolve(PACKAGE_ROOT, 'package.json'), 'utf8'));
|
|
@@ -153,7 +171,7 @@ const metadata = {
|
|
|
153
171
|
leanVerifiedBuild: doeBuild.leanVerifiedBuild,
|
|
154
172
|
proofArtifactSha256: doeBuild.proofArtifactSha256,
|
|
155
173
|
},
|
|
156
|
-
files,
|
|
174
|
+
files: metadataFiles,
|
|
157
175
|
builtAt: new Date().toISOString(),
|
|
158
176
|
};
|
|
159
177
|
|
|
@@ -161,19 +179,5 @@ const metadataPath = resolve(prebuildDir, 'metadata.json');
|
|
|
161
179
|
writeFileSync(metadataPath, JSON.stringify(metadata, null, 2) + '\n');
|
|
162
180
|
console.log(` metadata.json`);
|
|
163
181
|
|
|
164
|
-
// macOS: ad-hoc sign dylibs for distribution.
|
|
165
|
-
if (platform === 'darwin') {
|
|
166
|
-
console.log('\nSigning dylibs (ad-hoc)...');
|
|
167
|
-
for (const name of Object.keys(files)) {
|
|
168
|
-
if (name.endsWith('.dylib')) {
|
|
169
|
-
try {
|
|
170
|
-
execFileSync('codesign', ['-s', '-', resolve(prebuildDir, name)], { stdio: 'inherit' });
|
|
171
|
-
} catch {
|
|
172
|
-
console.warn(` Warning: codesign failed for ${name} (may already be signed)`);
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
|
|
178
182
|
console.log(`\nDone. Prebuild artifacts in prebuilds/${platform}-${arch}/`);
|
|
179
183
|
console.log(`Total files: ${Object.keys(files).length}`);
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
export function inferAutoBindGroupLayouts(code, visibility) {
|
|
2
|
+
const groups = new Map();
|
|
3
|
+
const bindingPattern = /@group\((\d+)\)\s*@binding\((\d+)\)\s*var(?:<([^>]+)>)?\s+\w+\s*:\s*([^;]+);/g;
|
|
4
|
+
|
|
5
|
+
for (const match of code.matchAll(bindingPattern)) {
|
|
6
|
+
const group = Number(match[1]);
|
|
7
|
+
const binding = Number(match[2]);
|
|
8
|
+
const addressSpace = (match[3] ?? '').trim();
|
|
9
|
+
const typeExpr = (match[4] ?? '').trim();
|
|
10
|
+
let entry = null;
|
|
11
|
+
|
|
12
|
+
if (addressSpace.startsWith('uniform')) {
|
|
13
|
+
entry = { binding, visibility, buffer: { type: 'uniform' } };
|
|
14
|
+
} else if (addressSpace.startsWith('storage')) {
|
|
15
|
+
const readOnly = !addressSpace.includes('read_write');
|
|
16
|
+
entry = { binding, visibility, buffer: { type: readOnly ? 'read-only-storage' : 'storage' } };
|
|
17
|
+
} else if (typeExpr.startsWith('sampler')) {
|
|
18
|
+
entry = { binding, visibility, sampler: {} };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (!entry) continue;
|
|
22
|
+
const entries = groups.get(group) ?? [];
|
|
23
|
+
entries.push(entry);
|
|
24
|
+
groups.set(group, entries);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
for (const entries of groups.values()) {
|
|
28
|
+
entries.sort((left, right) => left.binding - right.binding);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return groups;
|
|
32
|
+
}
|