@plasius/gpu-lock-free-queue 0.1.2-beta.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -26
- package/README.md +23 -4
- package/dist/index.cjs +24 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/queue.wgsl +181 -0
- package/package.json +13 -4
- package/src/index.cjs +27 -0
- package/src/index.js +17 -2
- package/src/queue.wgsl +92 -15
package/CHANGELOG.md
CHANGED
|
@@ -20,53 +20,59 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
|
|
|
20
20
|
- **Security**
|
|
21
21
|
- (placeholder)
|
|
22
22
|
|
|
23
|
-
## [0.
|
|
24
|
-
|
|
25
|
-
- **Added**
|
|
26
|
-
- (placeholder)
|
|
23
|
+
## [0.2.1] - 2026-01-23
|
|
27
24
|
|
|
28
25
|
- **Changed**
|
|
29
|
-
- (
|
|
26
|
+
- **Breaking:** Queue payloads are now referenced by fixed metadata offsets into caller-managed payload buffers (no internal payload arena).
|
|
27
|
+
- **Breaking:** Queue header and bindings updated to remove payload arena fields and buffer.
|
|
28
|
+
- Demo and tests updated to reflect the new payload-handle layout.
|
|
29
|
+
- **Breaking:** Queue header now includes payload arena head/tail and capacity/mask fields.
|
|
30
|
+
- Queue helpers now expose a `queue_len` backlog snapshot for schedulers.
|
|
31
|
+
- Demo and tests updated to use job metadata and variable payload copies.
|
|
30
32
|
|
|
31
33
|
- **Fixed**
|
|
32
|
-
-
|
|
34
|
+
- Payload allocations now validate arena capacity before enqueue.
|
|
33
35
|
|
|
34
|
-
-
|
|
35
|
-
- (placeholder)
|
|
36
|
-
|
|
37
|
-
## [0.1.1-beta.1] - 2026-01-08
|
|
38
|
-
|
|
39
|
-
- **Added**
|
|
40
|
-
- (placeholder)
|
|
36
|
+
## [0.2.0] - 2026-01-23
|
|
41
37
|
|
|
42
38
|
- **Changed**
|
|
43
|
-
-
|
|
39
|
+
- **Breaking:** WGSL bindings now include a dedicated payload ring buffer plus input/output payload buffers.
|
|
40
|
+
- Queue headers now carry `payload_stride` (u32 words) and job payloads are copied into the ring on enqueue.
|
|
41
|
+
- Demo and tests updated to use payload buffers instead of `input_jobs`/`output_jobs`.
|
|
44
42
|
|
|
45
43
|
- **Fixed**
|
|
46
|
-
-
|
|
47
|
-
|
|
48
|
-
- **Security**
|
|
49
|
-
- (placeholder)
|
|
44
|
+
- Payload job counts now clamp to payload buffer lengths to prevent overruns.
|
|
50
45
|
|
|
51
|
-
## [0.1.
|
|
46
|
+
## [0.1.2] - 2026-01-22
|
|
52
47
|
|
|
53
48
|
- **Added**
|
|
54
|
-
-
|
|
49
|
+
- Deterministic demo test pattern mode for stable image hashing in e2e tests.
|
|
50
|
+
- 4x4 demo grid for multi-canvas output.
|
|
51
|
+
- Timestamped demo logging.
|
|
52
|
+
- Demo FPS counter and per-image progress indicators.
|
|
53
|
+
- Loader and WGSL guard tests, plus an e2e WGSL compilation check.
|
|
55
54
|
|
|
56
55
|
- **Changed**
|
|
57
|
-
-
|
|
56
|
+
- `loadQueueWgsl` accepts `url`/`fetcher` overrides and falls back to filesystem reads for `file:` URLs.
|
|
57
|
+
- Demo renders 500 interleaved static frames using per-image queues per frame.
|
|
58
|
+
- Demo updates canvases line-by-line for progressive static output.
|
|
59
|
+
- Build outputs now ship as ESM and CJS bundles with the WGSL asset in `dist/`.
|
|
58
60
|
|
|
59
61
|
- **Fixed**
|
|
60
|
-
-
|
|
62
|
+
- WGSL entry points now validate queue configuration and clamp job counts to buffer lengths.
|
|
63
|
+
- WGSL load errors now surface with explicit HTTP status details.
|
|
64
|
+
- CD build now installs TypeScript for the tsup build step.
|
|
61
65
|
|
|
62
66
|
- **Security**
|
|
63
|
-
-
|
|
67
|
+
- None.
|
|
64
68
|
|
|
65
69
|
## [0.1.0] - 2025-01-08
|
|
66
70
|
|
|
67
71
|
- **Added**
|
|
68
72
|
- WebGPU lock-free MPMC queue with sequence counters.
|
|
69
73
|
- Demo for enqueue/dequeue, FFT spectrogram, and randomness heuristics.
|
|
70
|
-
|
|
71
|
-
[0.1.
|
|
72
|
-
[0.1.2
|
|
74
|
+
|
|
75
|
+
[0.1.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.0
|
|
76
|
+
[0.1.2]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.2
|
|
77
|
+
[0.2.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.0
|
|
78
|
+
[0.2.1]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.1
|
package/README.md
CHANGED
|
@@ -6,6 +6,8 @@
|
|
|
6
6
|
|
|
7
7
|
A minimal WebGPU lock-free MPMC ring queue using a per-slot sequence counter (Vyukov-style). This is a starter implementation focused on correctness, robustness, and low overhead.
|
|
8
8
|
|
|
9
|
+
Apache-2.0. ESM + CJS builds. WGSL assets are published in `dist/`.
|
|
10
|
+
|
|
9
11
|
## Install
|
|
10
12
|
```
|
|
11
13
|
npm install @plasius/gpu-lock-free-queue
|
|
@@ -23,11 +25,24 @@ const shaderCode = await loadQueueWgsl();
|
|
|
23
25
|
## What this is
|
|
24
26
|
- Lock-free multi-producer, multi-consumer ring queue on the GPU.
|
|
25
27
|
- Uses per-slot sequence numbers to avoid ABA for slots within a 32-bit epoch.
|
|
26
|
-
- Fixed-size
|
|
28
|
+
- Fixed-size job metadata with payload offsets into a caller-managed data arena or buffer.
|
|
29
|
+
|
|
30
|
+
## Buffer layout (breaking change in v0.4.0)
|
|
31
|
+
Bindings are:
|
|
32
|
+
1. `@binding(0)` queue header: `{ head, tail, capacity, mask }`
|
|
33
|
+
2. `@binding(1)` slot array (`Slot` with `seq`, `job_type`, `payload_offset`, `payload_words`)
|
|
34
|
+
3. `@binding(2)` input jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
|
|
35
|
+
4. `@binding(3)` output jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
|
|
36
|
+
5. `@binding(4)` input payloads (`array<u32>`, payload data referenced by `input_jobs.payload_offset`)
|
|
37
|
+
6. `@binding(5)` output payloads (`array<u32>`, length `job_count * output_stride`)
|
|
38
|
+
7. `@binding(6)` status flags (`array<u32>`, length `job_count`)
|
|
39
|
+
8. `@binding(7)` params (`Params` with `job_count`, `output_stride`)
|
|
40
|
+
|
|
41
|
+
`output_stride` is the per-job output stride (u32 words) used when copying payloads into `output_payloads`.
|
|
27
42
|
|
|
28
43
|
## Limitations
|
|
29
44
|
- Sequence counters are 32-bit. At extreme throughput over a long time, counters wrap and ABA can reappear. If you need true long-running safety, consider a reset protocol, sharding, or a future 64-bit atomic extension.
|
|
30
|
-
-
|
|
45
|
+
- Payload lifetimes are managed by the caller. Ensure payload buffers remain valid until consumers finish, or use frame-bounded arenas/generation handles.
|
|
31
46
|
- This demo is intentionally minimal; it is not yet integrated with a scheduler or backpressure policy.
|
|
32
47
|
|
|
33
48
|
## Run the demo
|
|
@@ -39,6 +54,10 @@ python3 -m http.server
|
|
|
39
54
|
|
|
40
55
|
Then open `http://localhost:8000` and check the console/output.
|
|
41
56
|
|
|
57
|
+
## Build Outputs
|
|
58
|
+
|
|
59
|
+
`npm run build` emits `dist/index.js`, `dist/index.cjs`, and `dist/queue.wgsl`.
|
|
60
|
+
|
|
42
61
|
## Tests
|
|
43
62
|
```
|
|
44
63
|
npm run test:unit
|
|
@@ -52,5 +71,5 @@ npm run test:e2e
|
|
|
52
71
|
- `src/queue.wgsl`: Lock-free queue implementation.
|
|
53
72
|
- `src/index.js`: Package entry point for loading the WGSL file.
|
|
54
73
|
|
|
55
|
-
##
|
|
56
|
-
|
|
74
|
+
## Payload shape
|
|
75
|
+
Payloads are variable-length chunks stored in a caller-managed buffer. Each job specifies `job_type`, `payload_offset`, and `payload_words` in `input_jobs`; dequeue copies payloads from `input_payloads` into `output_payloads` using `output_stride` and mirrors the metadata into `output_jobs`. If you need `f32`, store `bitcast<u32>(value)` and reinterpret on the consumer side.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// src/index.cjs
|
|
2
|
+
var { pathToFileURL, fileURLToPath } = require("url");
|
|
3
|
+
var { readFile } = require("fs/promises");
|
|
4
|
+
var queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
|
|
5
|
+
async function loadQueueWgsl(options = {}) {
|
|
6
|
+
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
7
|
+
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
8
|
+
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
9
|
+
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
10
|
+
}
|
|
11
|
+
const response = await fetcher(wgslUrl);
|
|
12
|
+
if (!response.ok) {
|
|
13
|
+
const status = "status" in response ? response.status : "unknown";
|
|
14
|
+
const statusText = "statusText" in response ? response.statusText : "";
|
|
15
|
+
const detail = statusText ? `${status} ${statusText}` : `${status}`;
|
|
16
|
+
throw new Error(`Failed to load WGSL (${detail})`);
|
|
17
|
+
}
|
|
18
|
+
return response.text();
|
|
19
|
+
}
|
|
20
|
+
module.exports = {
|
|
21
|
+
queueWgslUrl,
|
|
22
|
+
loadQueueWgsl
|
|
23
|
+
};
|
|
24
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.cjs"],"sourcesContent":["const { pathToFileURL, fileURLToPath } = require(\"node:url\");\nconst { readFile } = require(\"node:fs/promises\");\n\nconst queueWgslUrl = new URL(\"./queue.wgsl\", pathToFileURL(__filename));\n\nasync function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n\nmodule.exports = {\n queueWgslUrl,\n loadQueueWgsl,\n};\n"],"mappings":";AAAA,IAAM,EAAE,eAAe,cAAc,IAAI,QAAQ,KAAU;AAC3D,IAAM,EAAE,SAAS,IAAI,QAAQ,aAAkB;AAE/C,IAAM,eAAe,IAAI,IAAI,gBAAgB,cAAc,UAAU,CAAC;AAEtE,eAAe,cAAc,UAAU,CAAC,GAAG;AACzC,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;AAEA,OAAO,UAAU;AAAA,EACf;AAAA,EACA;AACF;","names":[]}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
// src/index.js
|
|
2
|
+
var queueWgslUrl = new URL("./queue.wgsl", import.meta.url);
|
|
3
|
+
async function loadQueueWgsl(options = {}) {
|
|
4
|
+
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
5
|
+
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
6
|
+
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
7
|
+
const { readFile } = await import("fs/promises");
|
|
8
|
+
const { fileURLToPath } = await import("url");
|
|
9
|
+
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
10
|
+
}
|
|
11
|
+
const response = await fetcher(wgslUrl);
|
|
12
|
+
if (!response.ok) {
|
|
13
|
+
const status = "status" in response ? response.status : "unknown";
|
|
14
|
+
const statusText = "statusText" in response ? response.statusText : "";
|
|
15
|
+
const detail = statusText ? `${status} ${statusText}` : `${status}`;
|
|
16
|
+
throw new Error(`Failed to load WGSL (${detail})`);
|
|
17
|
+
}
|
|
18
|
+
return response.text();
|
|
19
|
+
}
|
|
20
|
+
export {
|
|
21
|
+
loadQueueWgsl,
|
|
22
|
+
queueWgslUrl
|
|
23
|
+
};
|
|
24
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.js"],"sourcesContent":["export const queueWgslUrl = new URL(\"./queue.wgsl\", import.meta.url);\n\nexport async function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n const { readFile } = await import(\"node:fs/promises\");\n const { fileURLToPath } = await import(\"node:url\");\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n"],"mappings":";AAAO,IAAM,eAAe,IAAI,IAAI,gBAAgB,YAAY,GAAG;AAEnE,eAAsB,cAAc,UAAU,CAAC,GAAG;AAChD,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,aAAkB;AACpD,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;","names":[]}
|
package/dist/queue.wgsl
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
struct Queue {
|
|
2
|
+
head: atomic<u32>,
|
|
3
|
+
tail: atomic<u32>,
|
|
4
|
+
capacity: u32,
|
|
5
|
+
mask: u32,
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
struct Slot {
|
|
9
|
+
seq: atomic<u32>,
|
|
10
|
+
job_type: u32,
|
|
11
|
+
payload_offset: u32,
|
|
12
|
+
payload_words: u32,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
struct JobMeta {
|
|
16
|
+
job_type: u32,
|
|
17
|
+
payload_offset: u32,
|
|
18
|
+
payload_words: u32,
|
|
19
|
+
_pad: u32,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
struct Params {
|
|
23
|
+
job_count: u32,
|
|
24
|
+
output_stride: u32,
|
|
25
|
+
_pad: vec2<u32>,
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
@group(0) @binding(0) var<storage, read_write> queue: Queue;
|
|
29
|
+
@group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
|
|
30
|
+
@group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
|
|
31
|
+
@group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
|
|
32
|
+
@group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
|
|
33
|
+
@group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
|
|
34
|
+
@group(0) @binding(6) var<storage, read_write> status: array<u32>;
|
|
35
|
+
@group(0) @binding(7) var<uniform> params: Params;
|
|
36
|
+
|
|
37
|
+
const MAX_RETRIES: u32 = 512u;
|
|
38
|
+
|
|
39
|
+
fn queue_config_valid() -> bool {
|
|
40
|
+
if (queue.capacity == 0u) {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
if ((queue.capacity & (queue.capacity - 1u)) != 0u) {
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
if (queue.mask != queue.capacity - 1u) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
if (queue.capacity > arrayLength(&slots)) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
fn enqueue_job_count() -> u32 {
|
|
56
|
+
let count = min(params.job_count, arrayLength(&input_jobs));
|
|
57
|
+
return min(count, arrayLength(&status));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
fn dequeue_job_count() -> u32 {
|
|
61
|
+
if (params.output_stride == 0u) {
|
|
62
|
+
return 0u;
|
|
63
|
+
}
|
|
64
|
+
let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
|
|
65
|
+
var count = min(params.job_count, arrayLength(&output_jobs));
|
|
66
|
+
count = min(count, payload_jobs);
|
|
67
|
+
return min(count, arrayLength(&status));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
fn queue_len() -> u32 {
|
|
71
|
+
let h = atomicLoad(&queue.head);
|
|
72
|
+
let t = atomicLoad(&queue.tail);
|
|
73
|
+
return t - h;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fn enqueue(idx: u32) -> u32 {
|
|
77
|
+
let job = input_jobs[idx];
|
|
78
|
+
let payload_words = job.payload_words;
|
|
79
|
+
let input_offset = job.payload_offset;
|
|
80
|
+
if (input_offset + payload_words > arrayLength(&input_payloads)) {
|
|
81
|
+
return 0u;
|
|
82
|
+
}
|
|
83
|
+
for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
|
|
84
|
+
let t = atomicLoad(&queue.tail);
|
|
85
|
+
let slot_index = t & queue.mask;
|
|
86
|
+
let seq = atomicLoad(&slots[slot_index].seq);
|
|
87
|
+
let diff = i32(seq) - i32(t);
|
|
88
|
+
|
|
89
|
+
if (diff == 0) {
|
|
90
|
+
let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
|
|
91
|
+
if (res.exchanged) {
|
|
92
|
+
slots[slot_index].job_type = job.job_type;
|
|
93
|
+
slots[slot_index].payload_offset = input_offset;
|
|
94
|
+
slots[slot_index].payload_words = payload_words;
|
|
95
|
+
atomicStore(&slots[slot_index].seq, t + 1u);
|
|
96
|
+
return 1u;
|
|
97
|
+
}
|
|
98
|
+
} else if (diff < 0) {
|
|
99
|
+
return 0u;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return 0u;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
fn dequeue(idx: u32) -> u32 {
|
|
107
|
+
for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
|
|
108
|
+
let h = atomicLoad(&queue.head);
|
|
109
|
+
let slot_index = h & queue.mask;
|
|
110
|
+
let seq = atomicLoad(&slots[slot_index].seq);
|
|
111
|
+
let diff = i32(seq) - i32(h + 1u);
|
|
112
|
+
|
|
113
|
+
if (diff == 0) {
|
|
114
|
+
let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
|
|
115
|
+
if (res.exchanged) {
|
|
116
|
+
let payload_offset = slots[slot_index].payload_offset;
|
|
117
|
+
let payload_words = slots[slot_index].payload_words;
|
|
118
|
+
let job_type = slots[slot_index].job_type;
|
|
119
|
+
let output_stride = params.output_stride;
|
|
120
|
+
let dst_base = idx * output_stride;
|
|
121
|
+
let copy_words = min(payload_words, output_stride);
|
|
122
|
+
for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
|
|
123
|
+
output_payloads[dst_base + i] = input_payloads[payload_offset + i];
|
|
124
|
+
}
|
|
125
|
+
for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
|
|
126
|
+
output_payloads[dst_base + i] = 0u;
|
|
127
|
+
}
|
|
128
|
+
output_jobs[idx].job_type = job_type;
|
|
129
|
+
output_jobs[idx].payload_offset = payload_offset;
|
|
130
|
+
output_jobs[idx].payload_words = payload_words;
|
|
131
|
+
output_jobs[idx]._pad = 0u;
|
|
132
|
+
atomicStore(&slots[slot_index].seq, h + queue.capacity);
|
|
133
|
+
return 1u;
|
|
134
|
+
}
|
|
135
|
+
} else if (diff < 0) {
|
|
136
|
+
return 0u;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return 0u;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
@compute @workgroup_size(64)
|
|
144
|
+
fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
145
|
+
let idx = gid.x;
|
|
146
|
+
let job_count = enqueue_job_count();
|
|
147
|
+
if (idx >= job_count) {
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
if (!queue_config_valid()) {
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
if (status[idx] == 1u) {
|
|
154
|
+
return;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
let ok = enqueue(idx);
|
|
158
|
+
if (ok == 1u) {
|
|
159
|
+
status[idx] = 1u;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
@compute @workgroup_size(64)
|
|
164
|
+
fn dequeue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
165
|
+
let idx = gid.x;
|
|
166
|
+
let job_count = dequeue_job_count();
|
|
167
|
+
if (idx >= job_count) {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
if (!queue_config_valid()) {
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
if (status[idx] == 1u) {
|
|
174
|
+
return;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
let ok = dequeue(idx);
|
|
178
|
+
if (ok == 1u) {
|
|
179
|
+
status[idx] = 1u;
|
|
180
|
+
}
|
|
181
|
+
}
|
package/package.json
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@plasius/gpu-lock-free-queue",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "WebGPU lock-free MPMC ring queue with sequence counters.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"private": false,
|
|
8
|
+
"main": "./dist/index.cjs",
|
|
9
|
+
"module": "./dist/index.js",
|
|
8
10
|
"files": [
|
|
11
|
+
"dist",
|
|
9
12
|
"src",
|
|
10
13
|
"README.md",
|
|
11
14
|
"CHANGELOG.md",
|
|
@@ -13,11 +16,15 @@
|
|
|
13
16
|
"legal"
|
|
14
17
|
],
|
|
15
18
|
"exports": {
|
|
16
|
-
".":
|
|
17
|
-
|
|
19
|
+
".": {
|
|
20
|
+
"import": "./dist/index.js",
|
|
21
|
+
"require": "./dist/index.cjs"
|
|
22
|
+
},
|
|
23
|
+
"./queue.wgsl": "./dist/queue.wgsl",
|
|
18
24
|
"./package.json": "./package.json"
|
|
19
25
|
},
|
|
20
26
|
"scripts": {
|
|
27
|
+
"build": "tsup && cp src/queue.wgsl dist/queue.wgsl",
|
|
21
28
|
"demo": "python3 -m http.server",
|
|
22
29
|
"test": "npm run test:unit",
|
|
23
30
|
"test:unit": "node --test",
|
|
@@ -40,7 +47,9 @@
|
|
|
40
47
|
"license": "Apache-2.0",
|
|
41
48
|
"devDependencies": {
|
|
42
49
|
"@playwright/test": "^1.57.0",
|
|
43
|
-
"c8": "^10.1.3"
|
|
50
|
+
"c8": "^10.1.3",
|
|
51
|
+
"tsup": "^8.5.0",
|
|
52
|
+
"typescript": "^5.9.3"
|
|
44
53
|
},
|
|
45
54
|
"repository": {
|
|
46
55
|
"type": "git",
|
package/src/index.cjs
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
const { pathToFileURL, fileURLToPath } = require("node:url");
|
|
2
|
+
const { readFile } = require("node:fs/promises");
|
|
3
|
+
|
|
4
|
+
const queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
|
|
5
|
+
|
|
6
|
+
async function loadQueueWgsl(options = {}) {
|
|
7
|
+
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
8
|
+
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
9
|
+
|
|
10
|
+
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
11
|
+
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const response = await fetcher(wgslUrl);
|
|
15
|
+
if (!response.ok) {
|
|
16
|
+
const status = "status" in response ? response.status : "unknown";
|
|
17
|
+
const statusText = "statusText" in response ? response.statusText : "";
|
|
18
|
+
const detail = statusText ? `${status} ${statusText}` : `${status}`;
|
|
19
|
+
throw new Error(`Failed to load WGSL (${detail})`);
|
|
20
|
+
}
|
|
21
|
+
return response.text();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
module.exports = {
|
|
25
|
+
queueWgslUrl,
|
|
26
|
+
loadQueueWgsl,
|
|
27
|
+
};
|
package/src/index.js
CHANGED
|
@@ -1,6 +1,21 @@
|
|
|
1
1
|
export const queueWgslUrl = new URL("./queue.wgsl", import.meta.url);
|
|
2
2
|
|
|
3
|
-
export async function loadQueueWgsl() {
|
|
4
|
-
const
|
|
3
|
+
export async function loadQueueWgsl(options = {}) {
|
|
4
|
+
const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
|
|
5
|
+
const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
|
|
6
|
+
|
|
7
|
+
if (!fetcher || wgslUrl.protocol === "file:") {
|
|
8
|
+
const { readFile } = await import("node:fs/promises");
|
|
9
|
+
const { fileURLToPath } = await import("node:url");
|
|
10
|
+
return readFile(fileURLToPath(wgslUrl), "utf8");
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
const response = await fetcher(wgslUrl);
|
|
14
|
+
if (!response.ok) {
|
|
15
|
+
const status = "status" in response ? response.status : "unknown";
|
|
16
|
+
const statusText = "statusText" in response ? response.statusText : "";
|
|
17
|
+
const detail = statusText ? `${status} ${statusText}` : `${status}`;
|
|
18
|
+
throw new Error(`Failed to load WGSL (${detail})`);
|
|
19
|
+
}
|
|
5
20
|
return response.text();
|
|
6
21
|
}
|
package/src/queue.wgsl
CHANGED
|
@@ -3,30 +3,83 @@ struct Queue {
|
|
|
3
3
|
tail: atomic<u32>,
|
|
4
4
|
capacity: u32,
|
|
5
5
|
mask: u32,
|
|
6
|
-
_pad: vec2<u32>,
|
|
7
6
|
};
|
|
8
7
|
|
|
9
8
|
struct Slot {
|
|
10
9
|
seq: atomic<u32>,
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
job_type: u32,
|
|
11
|
+
payload_offset: u32,
|
|
12
|
+
payload_words: u32,
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
struct JobMeta {
|
|
16
|
+
job_type: u32,
|
|
17
|
+
payload_offset: u32,
|
|
18
|
+
payload_words: u32,
|
|
19
|
+
_pad: u32,
|
|
13
20
|
};
|
|
14
21
|
|
|
15
22
|
struct Params {
|
|
16
23
|
job_count: u32,
|
|
17
|
-
|
|
24
|
+
output_stride: u32,
|
|
25
|
+
_pad: vec2<u32>,
|
|
18
26
|
};
|
|
19
27
|
|
|
20
28
|
@group(0) @binding(0) var<storage, read_write> queue: Queue;
|
|
21
29
|
@group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
|
|
22
|
-
@group(0) @binding(2) var<storage, read> input_jobs: array<
|
|
23
|
-
@group(0) @binding(3) var<storage, read_write> output_jobs: array<
|
|
24
|
-
@group(0) @binding(4) var<storage,
|
|
25
|
-
@group(0) @binding(5) var<
|
|
30
|
+
@group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
|
|
31
|
+
@group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
|
|
32
|
+
@group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
|
|
33
|
+
@group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
|
|
34
|
+
@group(0) @binding(6) var<storage, read_write> status: array<u32>;
|
|
35
|
+
@group(0) @binding(7) var<uniform> params: Params;
|
|
26
36
|
|
|
27
37
|
const MAX_RETRIES: u32 = 512u;
|
|
28
38
|
|
|
29
|
-
fn
|
|
39
|
+
fn queue_config_valid() -> bool {
|
|
40
|
+
if (queue.capacity == 0u) {
|
|
41
|
+
return false;
|
|
42
|
+
}
|
|
43
|
+
if ((queue.capacity & (queue.capacity - 1u)) != 0u) {
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
if (queue.mask != queue.capacity - 1u) {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
if (queue.capacity > arrayLength(&slots)) {
|
|
50
|
+
return false;
|
|
51
|
+
}
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
fn enqueue_job_count() -> u32 {
|
|
56
|
+
let count = min(params.job_count, arrayLength(&input_jobs));
|
|
57
|
+
return min(count, arrayLength(&status));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
fn dequeue_job_count() -> u32 {
|
|
61
|
+
if (params.output_stride == 0u) {
|
|
62
|
+
return 0u;
|
|
63
|
+
}
|
|
64
|
+
let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
|
|
65
|
+
var count = min(params.job_count, arrayLength(&output_jobs));
|
|
66
|
+
count = min(count, payload_jobs);
|
|
67
|
+
return min(count, arrayLength(&status));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
fn queue_len() -> u32 {
|
|
71
|
+
let h = atomicLoad(&queue.head);
|
|
72
|
+
let t = atomicLoad(&queue.tail);
|
|
73
|
+
return t - h;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
fn enqueue(idx: u32) -> u32 {
|
|
77
|
+
let job = input_jobs[idx];
|
|
78
|
+
let payload_words = job.payload_words;
|
|
79
|
+
let input_offset = job.payload_offset;
|
|
80
|
+
if (input_offset + payload_words > arrayLength(&input_payloads)) {
|
|
81
|
+
return 0u;
|
|
82
|
+
}
|
|
30
83
|
for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
|
|
31
84
|
let t = atomicLoad(&queue.tail);
|
|
32
85
|
let slot_index = t & queue.mask;
|
|
@@ -36,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
|
|
|
36
89
|
if (diff == 0) {
|
|
37
90
|
let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
|
|
38
91
|
if (res.exchanged) {
|
|
39
|
-
slots[slot_index].
|
|
92
|
+
slots[slot_index].job_type = job.job_type;
|
|
93
|
+
slots[slot_index].payload_offset = input_offset;
|
|
94
|
+
slots[slot_index].payload_words = payload_words;
|
|
40
95
|
atomicStore(&slots[slot_index].seq, t + 1u);
|
|
41
96
|
return 1u;
|
|
42
97
|
}
|
|
@@ -58,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
|
|
|
58
113
|
if (diff == 0) {
|
|
59
114
|
let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
|
|
60
115
|
if (res.exchanged) {
|
|
61
|
-
let
|
|
62
|
-
|
|
116
|
+
let payload_offset = slots[slot_index].payload_offset;
|
|
117
|
+
let payload_words = slots[slot_index].payload_words;
|
|
118
|
+
let job_type = slots[slot_index].job_type;
|
|
119
|
+
let output_stride = params.output_stride;
|
|
120
|
+
let dst_base = idx * output_stride;
|
|
121
|
+
let copy_words = min(payload_words, output_stride);
|
|
122
|
+
for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
|
|
123
|
+
output_payloads[dst_base + i] = input_payloads[payload_offset + i];
|
|
124
|
+
}
|
|
125
|
+
for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
|
|
126
|
+
output_payloads[dst_base + i] = 0u;
|
|
127
|
+
}
|
|
128
|
+
output_jobs[idx].job_type = job_type;
|
|
129
|
+
output_jobs[idx].payload_offset = payload_offset;
|
|
130
|
+
output_jobs[idx].payload_words = payload_words;
|
|
131
|
+
output_jobs[idx]._pad = 0u;
|
|
63
132
|
atomicStore(&slots[slot_index].seq, h + queue.capacity);
|
|
64
133
|
return 1u;
|
|
65
134
|
}
|
|
@@ -74,14 +143,18 @@ fn dequeue(idx: u32) -> u32 {
|
|
|
74
143
|
@compute @workgroup_size(64)
|
|
75
144
|
fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
76
145
|
let idx = gid.x;
|
|
77
|
-
|
|
146
|
+
let job_count = enqueue_job_count();
|
|
147
|
+
if (idx >= job_count) {
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
if (!queue_config_valid()) {
|
|
78
151
|
return;
|
|
79
152
|
}
|
|
80
153
|
if (status[idx] == 1u) {
|
|
81
154
|
return;
|
|
82
155
|
}
|
|
83
156
|
|
|
84
|
-
let ok = enqueue(
|
|
157
|
+
let ok = enqueue(idx);
|
|
85
158
|
if (ok == 1u) {
|
|
86
159
|
status[idx] = 1u;
|
|
87
160
|
}
|
|
@@ -90,7 +163,11 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
90
163
|
@compute @workgroup_size(64)
|
|
91
164
|
fn dequeue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
92
165
|
let idx = gid.x;
|
|
93
|
-
|
|
166
|
+
let job_count = dequeue_job_count();
|
|
167
|
+
if (idx >= job_count) {
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
if (!queue_config_valid()) {
|
|
94
171
|
return;
|
|
95
172
|
}
|
|
96
173
|
if (status[idx] == 1u) {
|