@plasius/gpu-lock-free-queue 0.1.2 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +96 -0
- package/README.md +25 -5
- package/dist/index.cjs +8 -44
- package/dist/index.cjs.map +1 -1
- package/dist/queue.wgsl +57 -14
- package/package.json +8 -3
- package/src/index.cjs +27 -0
- package/src/queue.wgsl +57 -14
package/CHANGELOG.md
CHANGED
|
@@ -20,6 +20,85 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
|
|
|
20
20
|
- **Security**
|
|
21
21
|
- (placeholder)
|
|
22
22
|
|
|
23
|
+
## [0.2.2] - 2026-02-28
|
|
24
|
+
|
|
25
|
+
- **Added**
|
|
26
|
+
- (placeholder)
|
|
27
|
+
|
|
28
|
+
- **Changed**
|
|
29
|
+
- (placeholder)
|
|
30
|
+
|
|
31
|
+
- **Fixed**
|
|
32
|
+
- (placeholder)
|
|
33
|
+
|
|
34
|
+
- **Security**
|
|
35
|
+
- (placeholder)
|
|
36
|
+
|
|
37
|
+
## [0.2.2] - 2026-02-28
|
|
38
|
+
|
|
39
|
+
- **Added**
|
|
40
|
+
- (placeholder)
|
|
41
|
+
|
|
42
|
+
- **Changed**
|
|
43
|
+
- (placeholder)
|
|
44
|
+
|
|
45
|
+
- **Fixed**
|
|
46
|
+
- (placeholder)
|
|
47
|
+
|
|
48
|
+
- **Security**
|
|
49
|
+
- (placeholder)
|
|
50
|
+
|
|
51
|
+
## [0.2.2] - 2026-02-28
|
|
52
|
+
|
|
53
|
+
- **Added**
|
|
54
|
+
- (placeholder)
|
|
55
|
+
|
|
56
|
+
- **Changed**
|
|
57
|
+
- (placeholder)
|
|
58
|
+
|
|
59
|
+
- **Fixed**
|
|
60
|
+
- (placeholder)
|
|
61
|
+
|
|
62
|
+
- **Security**
|
|
63
|
+
- (placeholder)
|
|
64
|
+
|
|
65
|
+
## [0.2.2] - 2026-02-28
|
|
66
|
+
|
|
67
|
+
- **Added**
|
|
68
|
+
- (placeholder)
|
|
69
|
+
|
|
70
|
+
- **Changed**
|
|
71
|
+
- (placeholder)
|
|
72
|
+
|
|
73
|
+
- **Fixed**
|
|
74
|
+
- (placeholder)
|
|
75
|
+
|
|
76
|
+
- **Security**
|
|
77
|
+
- (placeholder)
|
|
78
|
+
|
|
79
|
+
## [0.2.1] - 2026-01-23
|
|
80
|
+
|
|
81
|
+
- **Changed**
|
|
82
|
+
- **Breaking:** Queue payloads are now referenced by fixed metadata offsets into caller-managed payload buffers (no internal payload arena).
|
|
83
|
+
- **Breaking:** Queue header and bindings updated to remove payload arena fields and buffer.
|
|
84
|
+
- Demo and tests updated to reflect the new payload-handle layout.
|
|
85
|
+
- **Breaking:** Queue header now includes payload arena head/tail and capacity/mask fields.
|
|
86
|
+
- Queue helpers now expose a `queue_len` backlog snapshot for schedulers.
|
|
87
|
+
- Demo and tests updated to use job metadata and variable payload copies.
|
|
88
|
+
|
|
89
|
+
- **Fixed**
|
|
90
|
+
- Payload allocations now validate arena capacity before enqueue.
|
|
91
|
+
|
|
92
|
+
## [0.2.0] - 2026-01-23
|
|
93
|
+
|
|
94
|
+
- **Changed**
|
|
95
|
+
- **Breaking:** WGSL bindings now include a dedicated payload ring buffer plus input/output payload buffers.
|
|
96
|
+
- Queue headers now carry `payload_stride` (u32 words) and job payloads are copied into the ring on enqueue.
|
|
97
|
+
- Demo and tests updated to use payload buffers instead of `input_jobs`/`output_jobs`.
|
|
98
|
+
|
|
99
|
+
- **Fixed**
|
|
100
|
+
- Payload job counts now clamp to payload buffer lengths to prevent overruns.
|
|
101
|
+
|
|
23
102
|
## [0.1.2] - 2026-01-22
|
|
24
103
|
|
|
25
104
|
- **Added**
|
|
@@ -51,3 +130,20 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
|
|
|
51
130
|
|
|
52
131
|
[0.1.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.0
|
|
53
132
|
[0.1.2]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.2
|
|
133
|
+
[0.2.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.0
|
|
134
|
+
[0.2.1]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.1
|
|
135
|
+
|
|
136
|
+
## [0.2.1] - 2026-02-11
|
|
137
|
+
|
|
138
|
+
- **Added**
|
|
139
|
+
- Initial release.
|
|
140
|
+
|
|
141
|
+
- **Changed**
|
|
142
|
+
- (placeholder)
|
|
143
|
+
|
|
144
|
+
- **Fixed**
|
|
145
|
+
- (placeholder)
|
|
146
|
+
|
|
147
|
+
- **Security**
|
|
148
|
+
- (placeholder)
|
|
149
|
+
[0.2.2]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.2
|
package/README.md
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
# @plasius/gpu-lock-free-queue
|
|
2
2
|
|
|
3
|
-
[](https://www.npmjs.com/package/@plasius/gpu-lock-free-queue)
|
|
3
|
+
[](https://www.npmjs.com/package/@plasius/gpu-lock-free-queue)
|
|
4
|
+
[](https://github.com/Plasius-LTD/gpu-lock-free-queue/actions/workflows/ci.yml)
|
|
5
|
+
[](https://codecov.io/gh/Plasius-LTD/gpu-lock-free-queue)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
[](./CODE_OF_CONDUCT.md)
|
|
8
|
+
[](./SECURITY.md)
|
|
9
|
+
[](./CHANGELOG.md)
|
|
10
|
+
|
|
4
11
|
[](https://github.com/Plasius-LTD/gpu-lock-free-queue/actions/workflows/ci.yml)
|
|
5
12
|
[](./LICENSE)
|
|
6
13
|
|
|
@@ -25,11 +32,24 @@ const shaderCode = await loadQueueWgsl();
|
|
|
25
32
|
## What this is
|
|
26
33
|
- Lock-free multi-producer, multi-consumer ring queue on the GPU.
|
|
27
34
|
- Uses per-slot sequence numbers to avoid ABA for slots within a 32-bit epoch.
|
|
28
|
-
- Fixed-size
|
|
35
|
+
- Fixed-size job metadata with payload offsets into a caller-managed data arena or buffer.
|
|
36
|
+
|
|
37
|
+
## Buffer layout (breaking change in v0.4.0)
|
|
38
|
+
Bindings are:
|
|
39
|
+
1. `@binding(0)` queue header: `{ head, tail, capacity, mask }`
|
|
40
|
+
2. `@binding(1)` slot array (`Slot` with `seq`, `job_type`, `payload_offset`, `payload_words`)
|
|
41
|
+
3. `@binding(2)` input jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
|
|
42
|
+
4. `@binding(3)` output jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
|
|
43
|
+
5. `@binding(4)` input payloads (`array<u32>`, payload data referenced by `input_jobs.payload_offset`)
|
|
44
|
+
6. `@binding(5)` output payloads (`array<u32>`, length `job_count * output_stride`)
|
|
45
|
+
7. `@binding(6)` status flags (`array<u32>`, length `job_count`)
|
|
46
|
+
8. `@binding(7)` params (`Params` with `job_count`, `output_stride`)
|
|
47
|
+
|
|
48
|
+
`output_stride` is the per-job output stride (u32 words) used when copying payloads into `output_payloads`.
|
|
29
49
|
|
|
30
50
|
## Limitations
|
|
31
51
|
- Sequence counters are 32-bit. At extreme throughput over a long time, counters wrap and ABA can reappear. If you need true long-running safety, consider a reset protocol, sharding, or a future 64-bit atomic extension.
|
|
32
|
-
-
|
|
52
|
+
- Payload lifetimes are managed by the caller. Ensure payload buffers remain valid until consumers finish, or use frame-bounded arenas/generation handles.
|
|
33
53
|
- This demo is intentionally minimal; it is not yet integrated with a scheduler or backpressure policy.
|
|
34
54
|
|
|
35
55
|
## Run the demo
|
|
@@ -58,5 +78,5 @@ npm run test:e2e
|
|
|
58
78
|
- `src/queue.wgsl`: Lock-free queue implementation.
|
|
59
79
|
- `src/index.js`: Package entry point for loading the WGSL file.
|
|
60
80
|
|
|
61
|
-
##
|
|
62
|
-
|
|
81
|
+
## Payload shape
|
|
82
|
+
Payloads are variable-length chunks stored in a caller-managed buffer. Each job specifies `job_type`, `payload_offset`, and `payload_words` in `input_jobs`; dequeue copies payloads from `input_payloads` into `output_payloads` using `output_stride` and mirrors the metadata into `output_jobs`. If you need `f32`, store `bitcast<u32>(value)` and reinterpret on the consumer side.
|
package/dist/index.cjs
CHANGED
|
@@ -1,46 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
var
|
|
3
|
-
var
|
|
4
|
-
var
|
|
5
|
-
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
-
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
var __copyProps = (to, from, except, desc) => {
|
|
12
|
-
if (from && typeof from === "object" || typeof from === "function") {
|
|
13
|
-
for (let key of __getOwnPropNames(from))
|
|
14
|
-
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
15
|
-
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
16
|
-
}
|
|
17
|
-
return to;
|
|
18
|
-
};
|
|
19
|
-
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
20
|
-
// If the importer is in node compatibility mode or this is not an ESM
|
|
21
|
-
// file that has been converted to a CommonJS file using a Babel-
|
|
22
|
-
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
23
|
-
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
24
|
-
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
25
|
-
mod
|
|
26
|
-
));
|
|
27
|
-
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
28
|
-
|
|
29
|
-
// src/index.js
|
|
30
|
-
var index_exports = {};
|
|
31
|
-
__export(index_exports, {
|
|
32
|
-
loadQueueWgsl: () => loadQueueWgsl,
|
|
33
|
-
queueWgslUrl: () => queueWgslUrl
|
|
34
|
-
});
|
|
35
|
-
module.exports = __toCommonJS(index_exports);
|
|
36
|
-
var import_meta = {};
|
|
37
|
-
var queueWgslUrl = new URL("./queue.wgsl", import_meta.url);
|
|
1
|
+
// src/index.cjs
|
|
2
|
+
var { pathToFileURL, fileURLToPath } = require("url");
|
|
3
|
+
var { readFile } = require("fs/promises");
|
|
4
|
+
var queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
|
|
38
5
|
async function loadQueueWgsl(options = {}) {
|
|
39
6
|
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
40
7
|
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
41
8
|
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
42
|
-
const { readFile } = await import("fs/promises");
|
|
43
|
-
const { fileURLToPath } = await import("url");
|
|
44
9
|
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
45
10
|
}
|
|
46
11
|
const response = await fetcher(wgslUrl);
|
|
@@ -52,9 +17,8 @@ async function loadQueueWgsl(options = {}) {
|
|
|
52
17
|
}
|
|
53
18
|
return response.text();
|
|
54
19
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
loadQueueWgsl
|
|
58
|
-
|
|
59
|
-
});
|
|
20
|
+
module.exports = {
|
|
21
|
+
queueWgslUrl,
|
|
22
|
+
loadQueueWgsl
|
|
23
|
+
};
|
|
60
24
|
//# sourceMappingURL=index.cjs.map
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.
|
|
1
|
+
{"version":3,"sources":["../src/index.cjs"],"sourcesContent":["const { pathToFileURL, fileURLToPath } = require(\"node:url\");\nconst { readFile } = require(\"node:fs/promises\");\n\nconst queueWgslUrl = new URL(\"./queue.wgsl\", pathToFileURL(__filename));\n\nasync function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n\nmodule.exports = {\n queueWgslUrl,\n loadQueueWgsl,\n};\n"],"mappings":";AAAA,IAAM,EAAE,eAAe,cAAc,IAAI,QAAQ,KAAU;AAC3D,IAAM,EAAE,SAAS,IAAI,QAAQ,aAAkB;AAE/C,IAAM,eAAe,IAAI,IAAI,gBAAgB,cAAc,UAAU,CAAC;AAEtE,eAAe,cAAc,UAAU,CAAC,GAAG;AACzC,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;AAEA,OAAO,UAAU;AAAA,EACf;AAAA,EACA;AACF;","names":[]}
|
package/dist/queue.wgsl
CHANGED
|
@@ -3,26 +3,36 @@ struct Queue {
|
|
|
3
3
|
tail: atomic<u32>,
|
|
4
4
|
capacity: u32,
|
|
5
5
|
mask: u32,
|
|
6
|
-
_pad: vec2<u32>,
|
|
7
6
|
};
|
|
8
7
|
|
|
9
8
|
struct Slot {
|
|
10
9
|
seq: atomic<u32>,
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
job_type: u32,
|
|
11
|
+
payload_offset: u32,
|
|
12
|
+
payload_words: u32,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
struct JobMeta {
|
|
16
|
+
job_type: u32,
|
|
17
|
+
payload_offset: u32,
|
|
18
|
+
payload_words: u32,
|
|
19
|
+
_pad: u32,
|
|
13
20
|
};
|
|
14
21
|
|
|
15
22
|
struct Params {
|
|
16
23
|
job_count: u32,
|
|
17
|
-
|
|
24
|
+
output_stride: u32,
|
|
25
|
+
_pad: vec2<u32>,
|
|
18
26
|
};
|
|
19
27
|
|
|
20
28
|
@group(0) @binding(0) var<storage, read_write> queue: Queue;
|
|
21
29
|
@group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
|
|
22
|
-
@group(0) @binding(2) var<storage, read> input_jobs: array<
|
|
23
|
-
@group(0) @binding(3) var<storage, read_write> output_jobs: array<
|
|
24
|
-
@group(0) @binding(4) var<storage,
|
|
25
|
-
@group(0) @binding(5) var<
|
|
30
|
+
@group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
|
|
31
|
+
@group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
|
|
32
|
+
@group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
|
|
33
|
+
@group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
|
|
34
|
+
@group(0) @binding(6) var<storage, read_write> status: array<u32>;
|
|
35
|
+
@group(0) @binding(7) var<uniform> params: Params;
|
|
26
36
|
|
|
27
37
|
const MAX_RETRIES: u32 = 512u;
|
|
28
38
|
|
|
@@ -48,11 +58,28 @@ fn enqueue_job_count() -> u32 {
|
|
|
48
58
|
}
|
|
49
59
|
|
|
50
60
|
fn dequeue_job_count() -> u32 {
|
|
51
|
-
|
|
61
|
+
if (params.output_stride == 0u) {
|
|
62
|
+
return 0u;
|
|
63
|
+
}
|
|
64
|
+
let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
|
|
65
|
+
var count = min(params.job_count, arrayLength(&output_jobs));
|
|
66
|
+
count = min(count, payload_jobs);
|
|
52
67
|
return min(count, arrayLength(&status));
|
|
53
68
|
}
|
|
54
69
|
|
|
55
|
-
fn
|
|
70
|
+
fn queue_len() -> u32 {
|
|
71
|
+
let h = atomicLoad(&queue.head);
|
|
72
|
+
let t = atomicLoad(&queue.tail);
|
|
73
|
+
return t - h;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fn enqueue(idx: u32) -> u32 {
|
|
77
|
+
let job = input_jobs[idx];
|
|
78
|
+
let payload_words = job.payload_words;
|
|
79
|
+
let input_offset = job.payload_offset;
|
|
80
|
+
if (input_offset + payload_words > arrayLength(&input_payloads)) {
|
|
81
|
+
return 0u;
|
|
82
|
+
}
|
|
56
83
|
for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
|
|
57
84
|
let t = atomicLoad(&queue.tail);
|
|
58
85
|
let slot_index = t & queue.mask;
|
|
@@ -62,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
|
|
|
62
89
|
if (diff == 0) {
|
|
63
90
|
let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
|
|
64
91
|
if (res.exchanged) {
|
|
65
|
-
slots[slot_index].
|
|
92
|
+
slots[slot_index].job_type = job.job_type;
|
|
93
|
+
slots[slot_index].payload_offset = input_offset;
|
|
94
|
+
slots[slot_index].payload_words = payload_words;
|
|
66
95
|
atomicStore(&slots[slot_index].seq, t + 1u);
|
|
67
96
|
return 1u;
|
|
68
97
|
}
|
|
@@ -84,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
|
|
|
84
113
|
if (diff == 0) {
|
|
85
114
|
let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
|
|
86
115
|
if (res.exchanged) {
|
|
87
|
-
let
|
|
88
|
-
|
|
116
|
+
let payload_offset = slots[slot_index].payload_offset;
|
|
117
|
+
let payload_words = slots[slot_index].payload_words;
|
|
118
|
+
let job_type = slots[slot_index].job_type;
|
|
119
|
+
let output_stride = params.output_stride;
|
|
120
|
+
let dst_base = idx * output_stride;
|
|
121
|
+
let copy_words = min(payload_words, output_stride);
|
|
122
|
+
for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
|
|
123
|
+
output_payloads[dst_base + i] = input_payloads[payload_offset + i];
|
|
124
|
+
}
|
|
125
|
+
for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
|
|
126
|
+
output_payloads[dst_base + i] = 0u;
|
|
127
|
+
}
|
|
128
|
+
output_jobs[idx].job_type = job_type;
|
|
129
|
+
output_jobs[idx].payload_offset = payload_offset;
|
|
130
|
+
output_jobs[idx].payload_words = payload_words;
|
|
131
|
+
output_jobs[idx]._pad = 0u;
|
|
89
132
|
atomicStore(&slots[slot_index].seq, h + queue.capacity);
|
|
90
133
|
return 1u;
|
|
91
134
|
}
|
|
@@ -111,7 +154,7 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
111
154
|
return;
|
|
112
155
|
}
|
|
113
156
|
|
|
114
|
-
let ok = enqueue(
|
|
157
|
+
let ok = enqueue(idx);
|
|
115
158
|
if (ok == 1u) {
|
|
116
159
|
status[idx] = 1u;
|
|
117
160
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@plasius/gpu-lock-free-queue",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "WebGPU lock-free MPMC ring queue with sequence counters.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -29,7 +29,9 @@
|
|
|
29
29
|
"test": "npm run test:unit",
|
|
30
30
|
"test:unit": "node --test",
|
|
31
31
|
"test:e2e": "npx playwright install chromium && playwright test",
|
|
32
|
-
"test:coverage": "c8 --reporter=lcov --reporter=text node --test"
|
|
32
|
+
"test:coverage": "c8 --reporter=lcov --reporter=text node --test",
|
|
33
|
+
"pack:check": "node scripts/verify-public-package.cjs",
|
|
34
|
+
"prepublishOnly": "npm run build && npm run pack:check"
|
|
33
35
|
},
|
|
34
36
|
"keywords": [
|
|
35
37
|
"webgpu",
|
|
@@ -71,5 +73,8 @@
|
|
|
71
73
|
"type": "github",
|
|
72
74
|
"url": "https://github.com/sponsors/Plasius-LTD"
|
|
73
75
|
}
|
|
74
|
-
]
|
|
76
|
+
],
|
|
77
|
+
"overrides": {
|
|
78
|
+
"minimatch": "^10.2.1"
|
|
79
|
+
}
|
|
75
80
|
}
|
package/src/index.cjs
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const { pathToFileURL, fileURLToPath } = require("node:url");
|
|
2
|
+
const { readFile } = require("node:fs/promises");
|
|
3
|
+
|
|
4
|
+
const queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
|
|
5
|
+
|
|
6
|
+
async function loadQueueWgsl(options = {}) {
|
|
7
|
+
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
8
|
+
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
9
|
+
|
|
10
|
+
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
11
|
+
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const response = await fetcher(wgslUrl);
|
|
15
|
+
if (!response.ok) {
|
|
16
|
+
const status = "status" in response ? response.status : "unknown";
|
|
17
|
+
const statusText = "statusText" in response ? response.statusText : "";
|
|
18
|
+
const detail = statusText ? `${status} ${statusText}` : `${status}`;
|
|
19
|
+
throw new Error(`Failed to load WGSL (${detail})`);
|
|
20
|
+
}
|
|
21
|
+
return response.text();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
module.exports = {
|
|
25
|
+
queueWgslUrl,
|
|
26
|
+
loadQueueWgsl,
|
|
27
|
+
};
|
package/src/queue.wgsl
CHANGED
|
@@ -3,26 +3,36 @@ struct Queue {
|
|
|
3
3
|
tail: atomic<u32>,
|
|
4
4
|
capacity: u32,
|
|
5
5
|
mask: u32,
|
|
6
|
-
_pad: vec2<u32>,
|
|
7
6
|
};
|
|
8
7
|
|
|
9
8
|
struct Slot {
|
|
10
9
|
seq: atomic<u32>,
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
job_type: u32,
|
|
11
|
+
payload_offset: u32,
|
|
12
|
+
payload_words: u32,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
struct JobMeta {
|
|
16
|
+
job_type: u32,
|
|
17
|
+
payload_offset: u32,
|
|
18
|
+
payload_words: u32,
|
|
19
|
+
_pad: u32,
|
|
13
20
|
};
|
|
14
21
|
|
|
15
22
|
struct Params {
|
|
16
23
|
job_count: u32,
|
|
17
|
-
|
|
24
|
+
output_stride: u32,
|
|
25
|
+
_pad: vec2<u32>,
|
|
18
26
|
};
|
|
19
27
|
|
|
20
28
|
@group(0) @binding(0) var<storage, read_write> queue: Queue;
|
|
21
29
|
@group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
|
|
22
|
-
@group(0) @binding(2) var<storage, read> input_jobs: array<
|
|
23
|
-
@group(0) @binding(3) var<storage, read_write> output_jobs: array<
|
|
24
|
-
@group(0) @binding(4) var<storage,
|
|
25
|
-
@group(0) @binding(5) var<
|
|
30
|
+
@group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
|
|
31
|
+
@group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
|
|
32
|
+
@group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
|
|
33
|
+
@group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
|
|
34
|
+
@group(0) @binding(6) var<storage, read_write> status: array<u32>;
|
|
35
|
+
@group(0) @binding(7) var<uniform> params: Params;
|
|
26
36
|
|
|
27
37
|
const MAX_RETRIES: u32 = 512u;
|
|
28
38
|
|
|
@@ -48,11 +58,28 @@ fn enqueue_job_count() -> u32 {
|
|
|
48
58
|
}
|
|
49
59
|
|
|
50
60
|
fn dequeue_job_count() -> u32 {
|
|
51
|
-
|
|
61
|
+
if (params.output_stride == 0u) {
|
|
62
|
+
return 0u;
|
|
63
|
+
}
|
|
64
|
+
let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
|
|
65
|
+
var count = min(params.job_count, arrayLength(&output_jobs));
|
|
66
|
+
count = min(count, payload_jobs);
|
|
52
67
|
return min(count, arrayLength(&status));
|
|
53
68
|
}
|
|
54
69
|
|
|
55
|
-
fn
|
|
70
|
+
fn queue_len() -> u32 {
|
|
71
|
+
let h = atomicLoad(&queue.head);
|
|
72
|
+
let t = atomicLoad(&queue.tail);
|
|
73
|
+
return t - h;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fn enqueue(idx: u32) -> u32 {
|
|
77
|
+
let job = input_jobs[idx];
|
|
78
|
+
let payload_words = job.payload_words;
|
|
79
|
+
let input_offset = job.payload_offset;
|
|
80
|
+
if (input_offset + payload_words > arrayLength(&input_payloads)) {
|
|
81
|
+
return 0u;
|
|
82
|
+
}
|
|
56
83
|
for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
|
|
57
84
|
let t = atomicLoad(&queue.tail);
|
|
58
85
|
let slot_index = t & queue.mask;
|
|
@@ -62,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
|
|
|
62
89
|
if (diff == 0) {
|
|
63
90
|
let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
|
|
64
91
|
if (res.exchanged) {
|
|
65
|
-
slots[slot_index].
|
|
92
|
+
slots[slot_index].job_type = job.job_type;
|
|
93
|
+
slots[slot_index].payload_offset = input_offset;
|
|
94
|
+
slots[slot_index].payload_words = payload_words;
|
|
66
95
|
atomicStore(&slots[slot_index].seq, t + 1u);
|
|
67
96
|
return 1u;
|
|
68
97
|
}
|
|
@@ -84,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
|
|
|
84
113
|
if (diff == 0) {
|
|
85
114
|
let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
|
|
86
115
|
if (res.exchanged) {
|
|
87
|
-
let
|
|
88
|
-
|
|
116
|
+
let payload_offset = slots[slot_index].payload_offset;
|
|
117
|
+
let payload_words = slots[slot_index].payload_words;
|
|
118
|
+
let job_type = slots[slot_index].job_type;
|
|
119
|
+
let output_stride = params.output_stride;
|
|
120
|
+
let dst_base = idx * output_stride;
|
|
121
|
+
let copy_words = min(payload_words, output_stride);
|
|
122
|
+
for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
|
|
123
|
+
output_payloads[dst_base + i] = input_payloads[payload_offset + i];
|
|
124
|
+
}
|
|
125
|
+
for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
|
|
126
|
+
output_payloads[dst_base + i] = 0u;
|
|
127
|
+
}
|
|
128
|
+
output_jobs[idx].job_type = job_type;
|
|
129
|
+
output_jobs[idx].payload_offset = payload_offset;
|
|
130
|
+
output_jobs[idx].payload_words = payload_words;
|
|
131
|
+
output_jobs[idx]._pad = 0u;
|
|
89
132
|
atomicStore(&slots[slot_index].seq, h + queue.capacity);
|
|
90
133
|
return 1u;
|
|
91
134
|
}
|
|
@@ -111,7 +154,7 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
111
154
|
return;
|
|
112
155
|
}
|
|
113
156
|
|
|
114
|
-
let ok = enqueue(
|
|
157
|
+
let ok = enqueue(idx);
|
|
115
158
|
if (ok == 1u) {
|
|
116
159
|
status[idx] = 1u;
|
|
117
160
|
}
|