@simulatte/webgpu 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/API_CONTRACT.md +11 -1
- package/CHANGELOG.md +82 -0
- package/COMPAT_SCOPE.md +20 -6
- package/LAYERING_PLAN.md +257 -0
- package/README.md +242 -61
- package/SUPPORT_CONTRACTS.md +353 -0
- package/ZIG_SOURCE_INVENTORY.md +468 -0
- package/assets/package-surface-cube-snapshot.svg +7 -7
- package/headless-webgpu-comparison.md +3 -3
- package/native/doe_napi.c +110 -17
- package/package.json +7 -3
- package/prebuilds/darwin-arm64/doe_napi.node +0 -0
- package/prebuilds/darwin-arm64/libwebgpu_doe.dylib +0 -0
- package/prebuilds/darwin-arm64/metadata.json +5 -5
- package/prebuilds/linux-x64/doe_napi.node +0 -0
- package/prebuilds/linux-x64/libwebgpu_dawn.so +0 -0
- package/prebuilds/linux-x64/libwebgpu_doe.so +0 -0
- package/prebuilds/linux-x64/metadata.json +26 -0
- package/src/bun-ffi.js +3 -2
- package/src/bun.js +2 -2
- package/src/index.js +114 -15
- package/src/runtime_cli.js +3 -1
package/native/doe_napi.c
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
#include <stdlib.h>
|
|
16
16
|
#include <string.h>
|
|
17
17
|
#include <stdint.h>
|
|
18
|
+
#include <stdio.h>
|
|
18
19
|
|
|
19
20
|
#ifdef _WIN32
|
|
20
21
|
#include <windows.h>
|
|
@@ -884,6 +885,30 @@ static napi_value doe_buffer_get_mapped_range(napi_env env, napi_callback_info i
|
|
|
884
885
|
return ab;
|
|
885
886
|
}
|
|
886
887
|
|
|
888
|
+
/* bufferAssertMappedPrefixF32(buffer, expected, count) */
|
|
889
|
+
static napi_value doe_buffer_assert_mapped_prefix_f32(napi_env env, napi_callback_info info) {
|
|
890
|
+
NAPI_ASSERT_ARGC(env, info, 3);
|
|
891
|
+
CHECK_LIB_LOADED(env);
|
|
892
|
+
WGPUBuffer buf = unwrap_ptr(env, _args[0]);
|
|
893
|
+
double expected = 0.0;
|
|
894
|
+
uint32_t count = 0;
|
|
895
|
+
napi_get_value_double(env, _args[1], &expected);
|
|
896
|
+
napi_get_value_uint32(env, _args[2], &count);
|
|
897
|
+
if (!buf) NAPI_THROW(env, "bufferAssertMappedPrefixF32 requires buffer");
|
|
898
|
+
const float* mapped = (const float*)pfn_wgpuBufferGetConstMappedRange(buf, 0, count * sizeof(float));
|
|
899
|
+
if (!mapped) NAPI_THROW(env, "bufferAssertMappedPrefixF32: mapped range unavailable");
|
|
900
|
+
for (uint32_t i = 0; i < count; i++) {
|
|
901
|
+
if ((double)mapped[i] != expected) {
|
|
902
|
+
char msg[128];
|
|
903
|
+
snprintf(msg, sizeof(msg), "expected readback[%u] === %.0f, got %.9g", i, expected, (double)mapped[i]);
|
|
904
|
+
NAPI_THROW(env, msg);
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
napi_value ok;
|
|
908
|
+
napi_get_boolean(env, true, &ok);
|
|
909
|
+
return ok;
|
|
910
|
+
}
|
|
911
|
+
|
|
887
912
|
/* ================================================================
|
|
888
913
|
* Shader Module
|
|
889
914
|
* ================================================================ */
|
|
@@ -1376,8 +1401,8 @@ static napi_value doe_queue_flush(napi_env env, napi_callback_info info) {
|
|
|
1376
1401
|
}
|
|
1377
1402
|
|
|
1378
1403
|
/* submitBatched(device, queue, commandsArray)
|
|
1379
|
-
* Fast path: single dispatch +
|
|
1380
|
-
*
|
|
1404
|
+
* Fast path: single dispatch or dispatch+copy → doeNativeComputeDispatchFlush.
|
|
1405
|
+
* Larger or mixed batches stay on the standard wgpu path. */
|
|
1381
1406
|
#define BATCH_MAX_BIND_GROUPS 4
|
|
1382
1407
|
static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
1383
1408
|
NAPI_ASSERT_ARGC(env, info, 3);
|
|
@@ -1391,12 +1416,18 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1391
1416
|
napi_get_array_length(env, commands, &cmd_count);
|
|
1392
1417
|
if (cmd_count == 0) return NULL;
|
|
1393
1418
|
|
|
1394
|
-
/* Fast path: exactly
|
|
1395
|
-
if (pfn_doeNativeComputeDispatchFlush && cmd_count
|
|
1419
|
+
/* Fast path: exactly one dispatch, or dispatch followed by copy. */
|
|
1420
|
+
if (pfn_doeNativeComputeDispatchFlush && (cmd_count == 1 || cmd_count == 2)) {
|
|
1396
1421
|
napi_value cmd0;
|
|
1397
1422
|
napi_get_element(env, commands, 0, &cmd0);
|
|
1398
1423
|
uint32_t t0 = get_uint32_prop(env, cmd0, "t");
|
|
1399
|
-
|
|
1424
|
+
uint32_t t1 = UINT32_MAX;
|
|
1425
|
+
napi_value cmd1 = NULL;
|
|
1426
|
+
if (cmd_count == 2) {
|
|
1427
|
+
napi_get_element(env, commands, 1, &cmd1);
|
|
1428
|
+
t1 = get_uint32_prop(env, cmd1, "t");
|
|
1429
|
+
}
|
|
1430
|
+
if (t0 == 0 && (cmd_count == 1 || t1 == 1)) {
|
|
1400
1431
|
void* pipeline = unwrap_ptr(env, get_prop(env, cmd0, "p"));
|
|
1401
1432
|
napi_value bgs = get_prop(env, cmd0, "bg");
|
|
1402
1433
|
uint32_t bg_count = 0;
|
|
@@ -1411,20 +1442,17 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1411
1442
|
uint32_t dx = get_uint32_prop(env, cmd0, "x");
|
|
1412
1443
|
uint32_t dy = get_uint32_prop(env, cmd0, "y");
|
|
1413
1444
|
uint32_t dz = get_uint32_prop(env, cmd0, "z");
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
void* copy_dst = NULL;
|
|
1445
|
+
void* copy_src = NULL;
|
|
1446
|
+
uint64_t copy_src_off = 0;
|
|
1447
|
+
void* copy_dst = NULL;
|
|
1448
|
+
uint64_t copy_dst_off = 0;
|
|
1417
1449
|
uint64_t copy_size = 0;
|
|
1418
1450
|
if (cmd_count == 2) {
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
|
|
1425
|
-
copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
|
|
1426
|
-
copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
|
|
1427
|
-
}
|
|
1451
|
+
copy_src = unwrap_ptr(env, get_prop(env, cmd1, "s"));
|
|
1452
|
+
copy_dst = unwrap_ptr(env, get_prop(env, cmd1, "d"));
|
|
1453
|
+
copy_src_off = (uint64_t)get_int64_prop(env, cmd1, "so");
|
|
1454
|
+
copy_dst_off = (uint64_t)get_int64_prop(env, cmd1, "do");
|
|
1455
|
+
copy_size = (uint64_t)get_int64_prop(env, cmd1, "sz");
|
|
1428
1456
|
}
|
|
1429
1457
|
pfn_doeNativeComputeDispatchFlush(
|
|
1430
1458
|
queue, pipeline, (void**)bg_ptrs, bg_count,
|
|
@@ -1435,6 +1463,16 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1435
1463
|
}
|
|
1436
1464
|
|
|
1437
1465
|
/* Fallback: standard wgpu path. */
|
|
1466
|
+
int flush_after_submit = 0;
|
|
1467
|
+
if (cmd_count == 2) {
|
|
1468
|
+
napi_value cmd0;
|
|
1469
|
+
napi_value cmd1;
|
|
1470
|
+
napi_get_element(env, commands, 0, &cmd0);
|
|
1471
|
+
napi_get_element(env, commands, 1, &cmd1);
|
|
1472
|
+
if (get_uint32_prop(env, cmd0, "t") == 0 && get_uint32_prop(env, cmd1, "t") == 1) {
|
|
1473
|
+
flush_after_submit = 1;
|
|
1474
|
+
}
|
|
1475
|
+
}
|
|
1438
1476
|
WGPUCommandEncoder encoder = pfn_wgpuDeviceCreateCommandEncoder(device, NULL);
|
|
1439
1477
|
if (!encoder) NAPI_THROW(env, "submitBatched: createCommandEncoder failed");
|
|
1440
1478
|
for (uint32_t i = 0; i < cmd_count; i++) {
|
|
@@ -1470,11 +1508,64 @@ static napi_value doe_submit_batched(napi_env env, napi_callback_info info) {
|
|
|
1470
1508
|
}
|
|
1471
1509
|
WGPUCommandBuffer cmd_buf = pfn_wgpuCommandEncoderFinish(encoder, NULL);
|
|
1472
1510
|
pfn_wgpuQueueSubmit(queue, 1, &cmd_buf);
|
|
1511
|
+
if (flush_after_submit && pfn_doeNativeQueueFlush) {
|
|
1512
|
+
pfn_doeNativeQueueFlush(queue);
|
|
1513
|
+
}
|
|
1473
1514
|
pfn_wgpuCommandBufferRelease(cmd_buf);
|
|
1474
1515
|
pfn_wgpuCommandEncoderRelease(encoder);
|
|
1475
1516
|
return NULL;
|
|
1476
1517
|
}
|
|
1477
1518
|
|
|
1519
|
+
/* submitComputeDispatchCopy(device, queue, pipeline, bindGroups, x, y, z, src, srcOff, dst, dstOff, size)
|
|
1520
|
+
* Direct addon surface for the exact package compute_e2e shape so JS runtimes
|
|
1521
|
+
* do not pay generic command-array parsing on every timed sample. */
|
|
1522
|
+
static napi_value doe_submit_compute_dispatch_copy(napi_env env, napi_callback_info info) {
|
|
1523
|
+
size_t argc = 12;
|
|
1524
|
+
napi_value args[12];
|
|
1525
|
+
napi_status status = napi_get_cb_info(env, info, &argc, args, NULL, NULL);
|
|
1526
|
+
if (status != napi_ok || argc != 12) NAPI_THROW(env, "submitComputeDispatchCopy requires 12 arguments");
|
|
1527
|
+
CHECK_LIB_LOADED(env);
|
|
1528
|
+
WGPUDevice device = unwrap_ptr(env, args[0]);
|
|
1529
|
+
WGPUQueue queue = unwrap_ptr(env, args[1]);
|
|
1530
|
+
void* pipeline = unwrap_ptr(env, args[2]);
|
|
1531
|
+
napi_value bgs = args[3];
|
|
1532
|
+
uint32_t dx = 0;
|
|
1533
|
+
uint32_t dy = 0;
|
|
1534
|
+
uint32_t dz = 0;
|
|
1535
|
+
int64_t copy_src_off_i = 0;
|
|
1536
|
+
int64_t copy_dst_off_i = 0;
|
|
1537
|
+
int64_t copy_size_i = 0;
|
|
1538
|
+
napi_get_value_uint32(env, args[4], &dx);
|
|
1539
|
+
napi_get_value_uint32(env, args[5], &dy);
|
|
1540
|
+
napi_get_value_uint32(env, args[6], &dz);
|
|
1541
|
+
void* copy_src = unwrap_ptr(env, args[7]);
|
|
1542
|
+
napi_get_value_int64(env, args[8], ©_src_off_i);
|
|
1543
|
+
void* copy_dst = unwrap_ptr(env, args[9]);
|
|
1544
|
+
napi_get_value_int64(env, args[10], ©_dst_off_i);
|
|
1545
|
+
napi_get_value_int64(env, args[11], ©_size_i);
|
|
1546
|
+
uint64_t copy_src_off = (uint64_t)copy_src_off_i;
|
|
1547
|
+
uint64_t copy_dst_off = (uint64_t)copy_dst_off_i;
|
|
1548
|
+
uint64_t copy_size = (uint64_t)copy_size_i;
|
|
1549
|
+
if (!device || !queue || !pipeline) NAPI_THROW(env, "submitComputeDispatchCopy requires device, queue, and pipeline");
|
|
1550
|
+
if (!pfn_doeNativeComputeDispatchFlush) NAPI_THROW(env, "submitComputeDispatchCopy: doeNativeComputeDispatchFlush not available");
|
|
1551
|
+
|
|
1552
|
+
uint32_t bg_count = 0;
|
|
1553
|
+
napi_get_array_length(env, bgs, &bg_count);
|
|
1554
|
+
if (bg_count > BATCH_MAX_BIND_GROUPS) bg_count = BATCH_MAX_BIND_GROUPS;
|
|
1555
|
+
void* bg_ptrs[BATCH_MAX_BIND_GROUPS] = {NULL};
|
|
1556
|
+
for (uint32_t j = 0; j < bg_count; j++) {
|
|
1557
|
+
napi_value bg_val;
|
|
1558
|
+
napi_get_element(env, bgs, j, &bg_val);
|
|
1559
|
+
bg_ptrs[j] = unwrap_ptr(env, bg_val);
|
|
1560
|
+
}
|
|
1561
|
+
|
|
1562
|
+
pfn_doeNativeComputeDispatchFlush(
|
|
1563
|
+
queue, pipeline, (void**)bg_ptrs, bg_count,
|
|
1564
|
+
dx, dy, dz,
|
|
1565
|
+
copy_src, copy_src_off, copy_dst, copy_dst_off, copy_size);
|
|
1566
|
+
return NULL;
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1478
1569
|
/* flushAndMapSync(instance, queue, buffer, mode, offset, size) — flush + map in one N-API call. */
|
|
1479
1570
|
static napi_value doe_flush_and_map_sync(napi_env env, napi_callback_info info) {
|
|
1480
1571
|
NAPI_ASSERT_ARGC(env, info, 6);
|
|
@@ -1868,6 +1959,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
|
|
|
1868
1959
|
EXPORT_FN("bufferUnmap", doe_buffer_unmap),
|
|
1869
1960
|
EXPORT_FN("bufferMapSync", doe_buffer_map_sync),
|
|
1870
1961
|
EXPORT_FN("bufferGetMappedRange", doe_buffer_get_mapped_range),
|
|
1962
|
+
EXPORT_FN("bufferAssertMappedPrefixF32", doe_buffer_assert_mapped_prefix_f32),
|
|
1871
1963
|
EXPORT_FN("createShaderModule", doe_create_shader_module),
|
|
1872
1964
|
EXPORT_FN("shaderModuleRelease", doe_shader_module_release),
|
|
1873
1965
|
EXPORT_FN("createComputePipeline", doe_create_compute_pipeline),
|
|
@@ -1895,6 +1987,7 @@ static napi_value doe_module_init(napi_env env, napi_value exports) {
|
|
|
1895
1987
|
EXPORT_FN("queueWriteBuffer", doe_queue_write_buffer),
|
|
1896
1988
|
EXPORT_FN("queueFlush", doe_queue_flush),
|
|
1897
1989
|
EXPORT_FN("submitBatched", doe_submit_batched),
|
|
1990
|
+
EXPORT_FN("submitComputeDispatchCopy", doe_submit_compute_dispatch_copy),
|
|
1898
1991
|
EXPORT_FN("flushAndMapSync", doe_flush_and_map_sync),
|
|
1899
1992
|
EXPORT_FN("queueRelease", doe_queue_release),
|
|
1900
1993
|
EXPORT_FN("createTexture", doe_create_texture),
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@simulatte/webgpu",
|
|
3
|
-
"version": "0.2.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.2.3",
|
|
4
|
+
"description": "Headless WebGPU runtime for Node.js and Bun, powered by Doe",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./src/node-runtime.js",
|
|
7
7
|
"exports": {
|
|
@@ -25,8 +25,12 @@
|
|
|
25
25
|
"prebuilds/",
|
|
26
26
|
"binding.gyp",
|
|
27
27
|
"README.md",
|
|
28
|
+
"CHANGELOG.md",
|
|
28
29
|
"API_CONTRACT.md",
|
|
29
30
|
"COMPAT_SCOPE.md",
|
|
31
|
+
"SUPPORT_CONTRACTS.md",
|
|
32
|
+
"LAYERING_PLAN.md",
|
|
33
|
+
"ZIG_SOURCE_INVENTORY.md",
|
|
30
34
|
"headless-webgpu-comparison.md",
|
|
31
35
|
"doe-build-metadata.schema.json",
|
|
32
36
|
"prebuild-metadata.schema.json"
|
|
@@ -60,5 +64,5 @@
|
|
|
60
64
|
"url": "https://github.com/clocksmith/fawn/issues"
|
|
61
65
|
},
|
|
62
66
|
"author": "Fawn",
|
|
63
|
-
"license": "
|
|
67
|
+
"license": "Apache-2.0"
|
|
64
68
|
}
|
|
Binary file
|
|
Binary file
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
{
|
|
2
2
|
"schemaVersion": 1,
|
|
3
3
|
"package": "@simulatte/webgpu",
|
|
4
|
-
"packageVersion": "0.2.
|
|
4
|
+
"packageVersion": "0.2.3",
|
|
5
5
|
"platform": "darwin",
|
|
6
6
|
"arch": "arm64",
|
|
7
7
|
"nodeNapiVersion": 8,
|
|
8
|
-
"doeVersion": "
|
|
8
|
+
"doeVersion": "68a193c88",
|
|
9
9
|
"doeBuild": {
|
|
10
10
|
"artifact": "libwebgpu_doe",
|
|
11
11
|
"leanVerifiedBuild": false,
|
|
@@ -13,14 +13,14 @@
|
|
|
13
13
|
},
|
|
14
14
|
"files": {
|
|
15
15
|
"doe_napi.node": {
|
|
16
|
-
"sha256": "
|
|
16
|
+
"sha256": "472c753c5c5bd82b60444bfcc1d3837bdbd40d1fce1b8281e6c706043bb64a84"
|
|
17
17
|
},
|
|
18
18
|
"libwebgpu_doe.dylib": {
|
|
19
|
-
"sha256": "
|
|
19
|
+
"sha256": "d9a66fa8fad7a8e50736778329ea5c011457c814238f96ecf230eb8eb97bcc64"
|
|
20
20
|
},
|
|
21
21
|
"libwebgpu_dawn.dylib": {
|
|
22
22
|
"sha256": "22751faeb459e7a2ec778c0410ca122e23c23366eb3da145c651d1d43e26707d"
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
|
-
"builtAt": "2026-03-
|
|
25
|
+
"builtAt": "2026-03-10T17:19:18.720Z"
|
|
26
26
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schemaVersion": 1,
|
|
3
|
+
"package": "@simulatte/webgpu",
|
|
4
|
+
"packageVersion": "0.2.3",
|
|
5
|
+
"platform": "linux",
|
|
6
|
+
"arch": "x64",
|
|
7
|
+
"nodeNapiVersion": 8,
|
|
8
|
+
"doeVersion": "b09d34586",
|
|
9
|
+
"doeBuild": {
|
|
10
|
+
"artifact": "libwebgpu_doe",
|
|
11
|
+
"leanVerifiedBuild": false,
|
|
12
|
+
"proofArtifactSha256": null
|
|
13
|
+
},
|
|
14
|
+
"files": {
|
|
15
|
+
"doe_napi.node": {
|
|
16
|
+
"sha256": "21475bcd04b499e1a0ed6e75d8af2a7bead08d365ddfb09708d509d0de62bf28"
|
|
17
|
+
},
|
|
18
|
+
"libwebgpu_doe.so": {
|
|
19
|
+
"sha256": "28c5da84da65a5d6f4a3a74b9757279cddee738aaa93fe8ece6e358d4d94cdb1"
|
|
20
|
+
},
|
|
21
|
+
"libwebgpu_dawn.so": {
|
|
22
|
+
"sha256": "9d329301f59fbe85a190cee4faacde97f9c991b07264a18a6750b890899cc417"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"builtAt": "2026-03-10T21:30:16.241Z"
|
|
26
|
+
}
|
package/src/bun-ffi.js
CHANGED
|
@@ -157,6 +157,7 @@ function openLibrary(path) {
|
|
|
157
157
|
wgpuDeviceCreateComputePipeline: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
|
|
158
158
|
wgpuComputePipelineRelease: { args: [FFIType.ptr], returns: FFIType.void },
|
|
159
159
|
wgpuComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
|
|
160
|
+
doeNativeComputePipelineGetBindGroupLayout: { args: [FFIType.ptr, FFIType.u32], returns: FFIType.ptr },
|
|
160
161
|
|
|
161
162
|
// Bind group layout / bind group / pipeline layout
|
|
162
163
|
wgpuDeviceCreateBindGroupLayout: { args: [FFIType.ptr, FFIType.ptr], returns: FFIType.ptr },
|
|
@@ -804,7 +805,7 @@ class DoeGPUComputePipeline {
|
|
|
804
805
|
constructor(native) { this._native = native; }
|
|
805
806
|
|
|
806
807
|
getBindGroupLayout(index) {
|
|
807
|
-
const layout = wgpu.symbols.
|
|
808
|
+
const layout = wgpu.symbols.doeNativeComputePipelineGetBindGroupLayout(this._native, index);
|
|
808
809
|
return new DoeGPUBindGroupLayout(layout);
|
|
809
810
|
}
|
|
810
811
|
}
|
|
@@ -961,7 +962,7 @@ function ensureLibrary() {
|
|
|
961
962
|
if (libraryLoaded) return;
|
|
962
963
|
if (!DOE_LIB_PATH) {
|
|
963
964
|
throw new Error(
|
|
964
|
-
"@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd
|
|
965
|
+
"@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd zig && zig build dropin` or set DOE_WEBGPU_LIB."
|
|
965
966
|
);
|
|
966
967
|
}
|
|
967
968
|
wgpu = openLibrary(DOE_LIB_PATH);
|
package/src/bun.js
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
export * from "./
|
|
2
|
-
export { default } from "./
|
|
1
|
+
export * from "./index.js";
|
|
2
|
+
export { default } from "./index.js";
|
package/src/index.js
CHANGED
|
@@ -19,13 +19,13 @@ let libraryLoaded = false;
|
|
|
19
19
|
function loadAddon() {
|
|
20
20
|
const prebuildPath = resolve(__dirname, '..', 'prebuilds', `${process.platform}-${process.arch}`, 'doe_napi.node');
|
|
21
21
|
try {
|
|
22
|
-
return require(
|
|
22
|
+
return require('../build/Release/doe_napi.node');
|
|
23
23
|
} catch {
|
|
24
24
|
try {
|
|
25
|
-
return require('../build/
|
|
25
|
+
return require('../build/Debug/doe_napi.node');
|
|
26
26
|
} catch {
|
|
27
27
|
try {
|
|
28
|
-
return require(
|
|
28
|
+
return require(prebuildPath);
|
|
29
29
|
} catch {
|
|
30
30
|
return null;
|
|
31
31
|
}
|
|
@@ -71,7 +71,7 @@ function ensureLibrary() {
|
|
|
71
71
|
}
|
|
72
72
|
if (!DOE_LIB_PATH) {
|
|
73
73
|
throw new Error(
|
|
74
|
-
'@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd
|
|
74
|
+
'@simulatte/webgpu: libwebgpu_doe not found. Build it with `cd zig && zig build dropin` or set DOE_WEBGPU_LIB.'
|
|
75
75
|
);
|
|
76
76
|
}
|
|
77
77
|
addon.loadLibrary(DOE_LIB_PATH);
|
|
@@ -120,14 +120,26 @@ class DoeGPUBuffer {
|
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
async mapAsync(mode, offset = 0, size = this.size) {
|
|
123
|
-
if (this._queue)
|
|
124
|
-
|
|
123
|
+
if (this._queue) {
|
|
124
|
+
if (this._queue.hasPendingSubmissions()) {
|
|
125
|
+
addon.flushAndMapSync(this._instance, this._queue._native, this._native, mode, offset, size);
|
|
126
|
+
this._queue.markSubmittedWorkDone();
|
|
127
|
+
} else {
|
|
128
|
+
addon.bufferMapSync(this._instance, this._native, mode, offset, size);
|
|
129
|
+
}
|
|
130
|
+
} else {
|
|
131
|
+
addon.bufferMapSync(this._instance, this._native, mode, offset, size);
|
|
132
|
+
}
|
|
125
133
|
}
|
|
126
134
|
|
|
127
135
|
getMappedRange(offset = 0, size = this.size) {
|
|
128
136
|
return addon.bufferGetMappedRange(this._native, offset, size);
|
|
129
137
|
}
|
|
130
138
|
|
|
139
|
+
assertMappedPrefixF32(expected, count) {
|
|
140
|
+
return addon.bufferAssertMappedPrefixF32(this._native, expected, count);
|
|
141
|
+
}
|
|
142
|
+
|
|
131
143
|
unmap() {
|
|
132
144
|
addon.bufferUnmap(this._native);
|
|
133
145
|
}
|
|
@@ -233,13 +245,57 @@ class DoeGPUQueue {
|
|
|
233
245
|
this._native = native;
|
|
234
246
|
this._instance = instance;
|
|
235
247
|
this._device = device;
|
|
248
|
+
this._submittedSerial = 0;
|
|
249
|
+
this._completedSerial = 0;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
hasPendingSubmissions() {
|
|
253
|
+
return this._completedSerial < this._submittedSerial;
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
markSubmittedWorkDone() {
|
|
257
|
+
this._completedSerial = this._submittedSerial;
|
|
236
258
|
}
|
|
237
259
|
|
|
238
260
|
submit(commandBuffers) {
|
|
261
|
+
if (commandBuffers.length === 0) return;
|
|
262
|
+
this._submittedSerial += 1;
|
|
263
|
+
if (commandBuffers.length === 1 && commandBuffers[0]?._batched) {
|
|
264
|
+
const cmds = commandBuffers[0]._commands;
|
|
265
|
+
if (
|
|
266
|
+
cmds.length === 2
|
|
267
|
+
&& cmds[0]?.t === 0
|
|
268
|
+
&& cmds[1]?.t === 1
|
|
269
|
+
&& typeof addon.submitComputeDispatchCopy === 'function'
|
|
270
|
+
) {
|
|
271
|
+
addon.submitComputeDispatchCopy(
|
|
272
|
+
this._device,
|
|
273
|
+
this._native,
|
|
274
|
+
cmds[0].p,
|
|
275
|
+
cmds[0].bg,
|
|
276
|
+
cmds[0].x,
|
|
277
|
+
cmds[0].y,
|
|
278
|
+
cmds[0].z,
|
|
279
|
+
cmds[1].s,
|
|
280
|
+
cmds[1].so,
|
|
281
|
+
cmds[1].d,
|
|
282
|
+
cmds[1].do,
|
|
283
|
+
cmds[1].sz,
|
|
284
|
+
);
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
239
288
|
if (commandBuffers.length > 0 && commandBuffers.every((c) => c._batched)) {
|
|
240
289
|
const allCommands = [];
|
|
241
290
|
for (const cb of commandBuffers) allCommands.push(...cb._commands);
|
|
242
291
|
addon.submitBatched(this._device, this._native, allCommands);
|
|
292
|
+
if (
|
|
293
|
+
allCommands.length === 2
|
|
294
|
+
&& allCommands[0]?.t === 0
|
|
295
|
+
&& allCommands[1]?.t === 1
|
|
296
|
+
) {
|
|
297
|
+
this.markSubmittedWorkDone();
|
|
298
|
+
}
|
|
243
299
|
} else {
|
|
244
300
|
const natives = commandBuffers.map((c) => c._native);
|
|
245
301
|
addon.queueSubmit(this._native, natives);
|
|
@@ -259,8 +315,9 @@ class DoeGPUQueue {
|
|
|
259
315
|
}
|
|
260
316
|
|
|
261
317
|
async onSubmittedWorkDone() {
|
|
262
|
-
|
|
263
|
-
|
|
318
|
+
if (!this.hasPendingSubmissions()) return;
|
|
319
|
+
addon.queueFlush(this._native);
|
|
320
|
+
this.markSubmittedWorkDone();
|
|
264
321
|
}
|
|
265
322
|
}
|
|
266
323
|
|
|
@@ -307,15 +364,28 @@ class DoeGPURenderPipeline {
|
|
|
307
364
|
}
|
|
308
365
|
|
|
309
366
|
class DoeGPUShaderModule {
|
|
310
|
-
constructor(native) {
|
|
367
|
+
constructor(native, code) {
|
|
368
|
+
this._native = native;
|
|
369
|
+
this._code = code;
|
|
370
|
+
}
|
|
311
371
|
}
|
|
312
372
|
|
|
313
373
|
class DoeGPUComputePipeline {
|
|
314
|
-
constructor(native
|
|
374
|
+
constructor(native, device, explicitLayout, autoLayoutEntriesByGroup) {
|
|
375
|
+
this._native = native;
|
|
376
|
+
this._device = device;
|
|
377
|
+
this._explicitLayout = explicitLayout;
|
|
378
|
+
this._autoLayoutEntriesByGroup = autoLayoutEntriesByGroup;
|
|
379
|
+
this._cachedLayouts = new Map();
|
|
380
|
+
}
|
|
315
381
|
|
|
316
382
|
getBindGroupLayout(index) {
|
|
317
|
-
|
|
318
|
-
return
|
|
383
|
+
if (this._explicitLayout) return this._explicitLayout;
|
|
384
|
+
if (this._cachedLayouts.has(index)) return this._cachedLayouts.get(index);
|
|
385
|
+
const entries = this._autoLayoutEntriesByGroup?.get(index) ?? [];
|
|
386
|
+
const layout = this._device.createBindGroupLayout({ entries });
|
|
387
|
+
this._cachedLayouts.set(index, layout);
|
|
388
|
+
return layout;
|
|
319
389
|
}
|
|
320
390
|
}
|
|
321
391
|
|
|
@@ -368,6 +438,34 @@ const DOE_LIMITS = Object.freeze({
|
|
|
368
438
|
|
|
369
439
|
const DOE_FEATURES = Object.freeze(new Set(['shader-f16']));
|
|
370
440
|
|
|
441
|
+
function inferAutoBindGroupLayouts(code, visibility = globals.GPUShaderStage.COMPUTE) {
|
|
442
|
+
const groups = new Map();
|
|
443
|
+
const bindingPattern = /@group\((\d+)\)\s*@binding\((\d+)\)\s*var(?:<([^>]+)>)?\s+\w+\s*:\s*([^;]+);/g;
|
|
444
|
+
for (const match of code.matchAll(bindingPattern)) {
|
|
445
|
+
const group = Number(match[1]);
|
|
446
|
+
const binding = Number(match[2]);
|
|
447
|
+
const addressSpace = (match[3] ?? "").trim();
|
|
448
|
+
const typeExpr = (match[4] ?? "").trim();
|
|
449
|
+
let entry = null;
|
|
450
|
+
if (addressSpace.startsWith("uniform")) {
|
|
451
|
+
entry = { binding, visibility, buffer: { type: "uniform" } };
|
|
452
|
+
} else if (addressSpace.startsWith("storage")) {
|
|
453
|
+
const readOnly = !addressSpace.includes("read_write");
|
|
454
|
+
entry = { binding, visibility, buffer: { type: readOnly ? "read-only-storage" : "storage" } };
|
|
455
|
+
} else if (typeExpr.startsWith("sampler")) {
|
|
456
|
+
entry = { binding, visibility, sampler: {} };
|
|
457
|
+
}
|
|
458
|
+
if (!entry) continue;
|
|
459
|
+
const entries = groups.get(group) ?? [];
|
|
460
|
+
entries.push(entry);
|
|
461
|
+
groups.set(group, entries);
|
|
462
|
+
}
|
|
463
|
+
for (const entries of groups.values()) {
|
|
464
|
+
entries.sort((left, right) => left.binding - right.binding);
|
|
465
|
+
}
|
|
466
|
+
return groups;
|
|
467
|
+
}
|
|
468
|
+
|
|
371
469
|
class DoeGPUDevice {
|
|
372
470
|
constructor(native, instance) {
|
|
373
471
|
this._native = native;
|
|
@@ -380,24 +478,25 @@ class DoeGPUDevice {
|
|
|
380
478
|
|
|
381
479
|
createBuffer(descriptor) {
|
|
382
480
|
const buf = addon.createBuffer(this._native, descriptor);
|
|
383
|
-
return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue
|
|
481
|
+
return new DoeGPUBuffer(buf, this._instance, descriptor.size, descriptor.usage, this.queue);
|
|
384
482
|
}
|
|
385
483
|
|
|
386
484
|
createShaderModule(descriptor) {
|
|
387
485
|
const code = descriptor.code || descriptor.source;
|
|
388
486
|
if (!code) throw new Error('createShaderModule: descriptor.code is required');
|
|
389
487
|
const mod = addon.createShaderModule(this._native, code);
|
|
390
|
-
return new DoeGPUShaderModule(mod);
|
|
488
|
+
return new DoeGPUShaderModule(mod, code);
|
|
391
489
|
}
|
|
392
490
|
|
|
393
491
|
createComputePipeline(descriptor) {
|
|
394
492
|
const shader = descriptor.compute?.module;
|
|
395
493
|
const entryPoint = descriptor.compute?.entryPoint || 'main';
|
|
396
494
|
const layout = descriptor.layout === 'auto' ? null : descriptor.layout;
|
|
495
|
+
const autoLayoutEntriesByGroup = layout ? null : inferAutoBindGroupLayouts(shader?._code || '');
|
|
397
496
|
const native = addon.createComputePipeline(
|
|
398
497
|
this._native, shader._native, entryPoint,
|
|
399
498
|
layout?._native ?? null);
|
|
400
|
-
return new DoeGPUComputePipeline(native);
|
|
499
|
+
return new DoeGPUComputePipeline(native, this, layout, autoLayoutEntriesByGroup);
|
|
401
500
|
}
|
|
402
501
|
|
|
403
502
|
async createComputePipelineAsync(descriptor) {
|
package/src/runtime_cli.js
CHANGED
|
@@ -158,7 +158,9 @@ export function createDoeRuntime(options = {}) {
|
|
|
158
158
|
require_existing_path("commandsPath", runOptions.commandsPath);
|
|
159
159
|
if (runOptions.quirksPath) require_existing_path("quirksPath", runOptions.quirksPath);
|
|
160
160
|
const args = build_bench_args(runOptions);
|
|
161
|
-
const result = runRaw(args
|
|
161
|
+
const result = runRaw(args, {
|
|
162
|
+
cwd: runOptions.cwd || WORKSPACE_ROOT,
|
|
163
|
+
});
|
|
162
164
|
const traceMeta = read_trace_meta(runOptions.traceMetaPath);
|
|
163
165
|
return {
|
|
164
166
|
...result,
|