@plasius/gpu-lock-free-queue 0.1.2-beta.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -20,53 +20,59 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
20
20
  - **Security**
21
21
  - (placeholder)
22
22
 
23
- ## [0.1.2-beta.0] - 2026-01-08
24
-
25
- - **Added**
26
- - (placeholder)
23
+ ## [0.2.1] - 2026-01-23
27
24
 
28
25
  - **Changed**
29
- - (placeholder)
26
+ - **Breaking:** Queue payloads are now referenced by fixed metadata offsets into caller-managed payload buffers (no internal payload arena).
27
+ - **Breaking:** Queue header and bindings updated to remove payload arena fields and buffer.
28
+ - Demo and tests updated to reflect the new payload-handle layout.
29
+ - **Breaking:** Queue header now includes payload arena head/tail and capacity/mask fields.
30
+ - Queue helpers now expose a `queue_len` backlog snapshot for schedulers.
31
+ - Demo and tests updated to use job metadata and variable payload copies.
30
32
 
31
33
  - **Fixed**
32
- - (placeholder)
34
+ - Payload allocations now validate arena capacity before enqueue.
33
35
 
34
- - **Security**
35
- - (placeholder)
36
-
37
- ## [0.1.1-beta.1] - 2026-01-08
38
-
39
- - **Added**
40
- - (placeholder)
36
+ ## [0.2.0] - 2026-01-23
41
37
 
42
38
  - **Changed**
43
- - (placeholder)
39
+ - **Breaking:** WGSL bindings now include a dedicated payload ring buffer plus input/output payload buffers.
40
+ - Queue headers now carry `payload_stride` (u32 words) and job payloads are copied into the ring on enqueue.
41
+ - Demo and tests updated to use payload buffers instead of `input_jobs`/`output_jobs`.
44
42
 
45
43
  - **Fixed**
46
- - (placeholder)
47
-
48
- - **Security**
49
- - (placeholder)
44
+ - Payload job counts now clamp to payload buffer lengths to prevent overruns.
50
45
 
51
- ## [0.1.1-beta.0] - 2026-01-08
46
+ ## [0.1.2] - 2026-01-22
52
47
 
53
48
  - **Added**
54
- - (placeholder)
49
+ - Deterministic demo test pattern mode for stable image hashing in e2e tests.
50
+ - 4x4 demo grid for multi-canvas output.
51
+ - Timestamped demo logging.
52
+ - Demo FPS counter and per-image progress indicators.
53
+ - Loader and WGSL guard tests, plus an e2e WGSL compilation check.
55
54
 
56
55
  - **Changed**
57
- - (placeholder)
56
+ - `loadQueueWgsl` accepts `url`/`fetcher` overrides and falls back to filesystem reads for `file:` URLs.
57
+ - Demo renders 500 interleaved static frames using per-image queues per frame.
58
+ - Demo updates canvases line-by-line for progressive static output.
59
+ - Build outputs now ship as ESM and CJS bundles with the WGSL asset in `dist/`.
58
60
 
59
61
  - **Fixed**
60
- - (placeholder)
62
+ - WGSL entry points now validate queue configuration and clamp job counts to buffer lengths.
63
+ - WGSL load errors now surface with explicit HTTP status details.
64
+ - CD build now installs TypeScript for the tsup build step.
61
65
 
62
66
  - **Security**
63
- - (placeholder)
67
+ - None.
64
68
 
65
69
  ## [0.1.0] - 2025-01-08
66
70
 
67
71
  - **Added**
68
72
  - WebGPU lock-free MPMC queue with sequence counters.
69
73
  - Demo for enqueue/dequeue, FFT spectrogram, and randomness heuristics.
70
- [0.1.1-beta.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.1-beta.0
71
- [0.1.1-beta.1]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.1-beta.1
72
- [0.1.2-beta.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.2-beta.0
74
+
75
+ [0.1.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.0
76
+ [0.1.2]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.2
77
+ [0.2.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.0
78
+ [0.2.1]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.1
package/README.md CHANGED
@@ -6,6 +6,8 @@
6
6
 
7
7
  A minimal WebGPU lock-free MPMC ring queue using a per-slot sequence counter (Vyukov-style). This is a starter implementation focused on correctness, robustness, and low overhead.
8
8
 
9
+ Apache-2.0. ESM + CJS builds. WGSL assets are published in `dist/`.
10
+
9
11
  ## Install
10
12
  ```
11
13
  npm install @plasius/gpu-lock-free-queue
@@ -23,11 +25,24 @@ const shaderCode = await loadQueueWgsl();
23
25
  ## What this is
24
26
  - Lock-free multi-producer, multi-consumer ring queue on the GPU.
25
27
  - Uses per-slot sequence numbers to avoid ABA for slots within a 32-bit epoch.
26
- - Fixed-size jobs (u32) for now; a "job" can be expanded to a fixed-size struct or an index into a separate payload buffer.
28
+ - Fixed-size job metadata with payload offsets into a caller-managed data arena or buffer.
29
+
30
+ ## Buffer layout (breaking change in v0.4.0)
31
+ Bindings are:
32
+ 1. `@binding(0)` queue header: `{ head, tail, capacity, mask }`
33
+ 2. `@binding(1)` slot array (`Slot` with `seq`, `job_type`, `payload_offset`, `payload_words`)
34
+ 3. `@binding(2)` input jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
35
+ 4. `@binding(3)` output jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
36
+ 5. `@binding(4)` input payloads (`array<u32>`, payload data referenced by `input_jobs.payload_offset`)
37
+ 6. `@binding(5)` output payloads (`array<u32>`, length `job_count * output_stride`)
38
+ 7. `@binding(6)` status flags (`array<u32>`, length `job_count`)
39
+ 8. `@binding(7)` params (`Params` with `job_count`, `output_stride`)
40
+
41
+ `output_stride` is the per-job output stride (u32 words) used when copying payloads into `output_payloads`.
27
42
 
28
43
  ## Limitations
29
44
  - Sequence counters are 32-bit. At extreme throughput over a long time, counters wrap and ABA can reappear. If you need true long-running safety, consider a reset protocol, sharding, or a future 64-bit atomic extension.
30
- - Jobs are fixed-size and must be power-of-two capacity.
45
+ - Payload lifetimes are managed by the caller. Ensure payload buffers remain valid until consumers finish, or use frame-bounded arenas/generation handles.
31
46
  - This demo is intentionally minimal; it is not yet integrated with a scheduler or backpressure policy.
32
47
 
33
48
  ## Run the demo
@@ -39,6 +54,10 @@ python3 -m http.server
39
54
 
40
55
  Then open `http://localhost:8000` and check the console/output.
41
56
 
57
+ ## Build Outputs
58
+
59
+ `npm run build` emits `dist/index.js`, `dist/index.cjs`, and `dist/queue.wgsl`.
60
+
42
61
  ## Tests
43
62
  ```
44
63
  npm run test:unit
@@ -52,5 +71,5 @@ npm run test:e2e
52
71
  - `src/queue.wgsl`: Lock-free queue implementation.
53
72
  - `src/index.js`: Package entry point for loading the WGSL file.
54
73
 
55
- ## Job shape
56
- Current jobs are `u32` values. If you need richer jobs, use a fixed-size struct (e.g., 16 bytes) or store indices into a separate payload buffer. Variable-length jobs should be modeled as an index + length into a payload arena to keep the queue fixed-size.
74
+ ## Payload shape
75
+ Payloads are variable-length chunks stored in a caller-managed buffer. Each job specifies `job_type`, `payload_offset`, and `payload_words` in `input_jobs`; dequeue copies payloads from `input_payloads` into `output_payloads` using `output_stride` and mirrors the metadata into `output_jobs`. If you need `f32`, store `bitcast<u32>(value)` and reinterpret on the consumer side.
package/dist/index.cjs ADDED
@@ -0,0 +1,24 @@
1
+ // src/index.cjs
2
+ var { pathToFileURL, fileURLToPath } = require("url");
3
+ var { readFile } = require("fs/promises");
4
+ var queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
5
+ async function loadQueueWgsl(options = {}) {
6
+ const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
7
+ const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
8
+ if (!fetcher || wgslUrl.protocol === "file:") {
9
+ return readFile(fileURLToPath(wgslUrl), "utf8");
10
+ }
11
+ const response = await fetcher(wgslUrl);
12
+ if (!response.ok) {
13
+ const status = "status" in response ? response.status : "unknown";
14
+ const statusText = "statusText" in response ? response.statusText : "";
15
+ const detail = statusText ? `${status} ${statusText}` : `${status}`;
16
+ throw new Error(`Failed to load WGSL (${detail})`);
17
+ }
18
+ return response.text();
19
+ }
20
+ module.exports = {
21
+ queueWgslUrl,
22
+ loadQueueWgsl
23
+ };
24
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.cjs"],"sourcesContent":["const { pathToFileURL, fileURLToPath } = require(\"node:url\");\nconst { readFile } = require(\"node:fs/promises\");\n\nconst queueWgslUrl = new URL(\"./queue.wgsl\", pathToFileURL(__filename));\n\nasync function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n\nmodule.exports = {\n queueWgslUrl,\n loadQueueWgsl,\n};\n"],"mappings":";AAAA,IAAM,EAAE,eAAe,cAAc,IAAI,QAAQ,KAAU;AAC3D,IAAM,EAAE,SAAS,IAAI,QAAQ,aAAkB;AAE/C,IAAM,eAAe,IAAI,IAAI,gBAAgB,cAAc,UAAU,CAAC;AAEtE,eAAe,cAAc,UAAU,CAAC,GAAG;AACzC,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;AAEA,OAAO,UAAU;AAAA,EACf;AAAA,EACA;AACF;","names":[]}
package/dist/index.js ADDED
@@ -0,0 +1,24 @@
1
+ // src/index.js
2
+ var queueWgslUrl = new URL("./queue.wgsl", import.meta.url);
3
+ async function loadQueueWgsl(options = {}) {
4
+ const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
5
+ const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
6
+ if (!fetcher || wgslUrl.protocol === "file:") {
7
+ const { readFile } = await import("fs/promises");
8
+ const { fileURLToPath } = await import("url");
9
+ return readFile(fileURLToPath(wgslUrl), "utf8");
10
+ }
11
+ const response = await fetcher(wgslUrl);
12
+ if (!response.ok) {
13
+ const status = "status" in response ? response.status : "unknown";
14
+ const statusText = "statusText" in response ? response.statusText : "";
15
+ const detail = statusText ? `${status} ${statusText}` : `${status}`;
16
+ throw new Error(`Failed to load WGSL (${detail})`);
17
+ }
18
+ return response.text();
19
+ }
20
+ export {
21
+ loadQueueWgsl,
22
+ queueWgslUrl
23
+ };
24
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.js"],"sourcesContent":["export const queueWgslUrl = new URL(\"./queue.wgsl\", import.meta.url);\n\nexport async function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n const { readFile } = await import(\"node:fs/promises\");\n const { fileURLToPath } = await import(\"node:url\");\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n"],"mappings":";AAAO,IAAM,eAAe,IAAI,IAAI,gBAAgB,YAAY,GAAG;AAEnE,eAAsB,cAAc,UAAU,CAAC,GAAG;AAChD,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,aAAkB;AACpD,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;","names":[]}
@@ -0,0 +1,181 @@
1
+ struct Queue {
2
+ head: atomic<u32>,
3
+ tail: atomic<u32>,
4
+ capacity: u32,
5
+ mask: u32,
6
+ };
7
+
8
+ struct Slot {
9
+ seq: atomic<u32>,
10
+ job_type: u32,
11
+ payload_offset: u32,
12
+ payload_words: u32,
13
+ };
14
+
15
+ struct JobMeta {
16
+ job_type: u32,
17
+ payload_offset: u32,
18
+ payload_words: u32,
19
+ _pad: u32,
20
+ };
21
+
22
+ struct Params {
23
+ job_count: u32,
24
+ output_stride: u32,
25
+ _pad: vec2<u32>,
26
+ };
27
+
28
+ @group(0) @binding(0) var<storage, read_write> queue: Queue;
29
+ @group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
30
+ @group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
31
+ @group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
32
+ @group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
33
+ @group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
34
+ @group(0) @binding(6) var<storage, read_write> status: array<u32>;
35
+ @group(0) @binding(7) var<uniform> params: Params;
36
+
37
+ const MAX_RETRIES: u32 = 512u;
38
+
39
+ fn queue_config_valid() -> bool {
40
+ if (queue.capacity == 0u) {
41
+ return false;
42
+ }
43
+ if ((queue.capacity & (queue.capacity - 1u)) != 0u) {
44
+ return false;
45
+ }
46
+ if (queue.mask != queue.capacity - 1u) {
47
+ return false;
48
+ }
49
+ if (queue.capacity > arrayLength(&slots)) {
50
+ return false;
51
+ }
52
+ return true;
53
+ }
54
+
55
+ fn enqueue_job_count() -> u32 {
56
+ let count = min(params.job_count, arrayLength(&input_jobs));
57
+ return min(count, arrayLength(&status));
58
+ }
59
+
60
+ fn dequeue_job_count() -> u32 {
61
+ if (params.output_stride == 0u) {
62
+ return 0u;
63
+ }
64
+ let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
65
+ var count = min(params.job_count, arrayLength(&output_jobs));
66
+ count = min(count, payload_jobs);
67
+ return min(count, arrayLength(&status));
68
+ }
69
+
70
+ fn queue_len() -> u32 {
71
+ let h = atomicLoad(&queue.head);
72
+ let t = atomicLoad(&queue.tail);
73
+ return t - h;
74
+ }
75
+
76
+ fn enqueue(idx: u32) -> u32 {
77
+ let job = input_jobs[idx];
78
+ let payload_words = job.payload_words;
79
+ let input_offset = job.payload_offset;
80
+ if (input_offset + payload_words > arrayLength(&input_payloads)) {
81
+ return 0u;
82
+ }
83
+ for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
84
+ let t = atomicLoad(&queue.tail);
85
+ let slot_index = t & queue.mask;
86
+ let seq = atomicLoad(&slots[slot_index].seq);
87
+ let diff = i32(seq) - i32(t);
88
+
89
+ if (diff == 0) {
90
+ let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
91
+ if (res.exchanged) {
92
+ slots[slot_index].job_type = job.job_type;
93
+ slots[slot_index].payload_offset = input_offset;
94
+ slots[slot_index].payload_words = payload_words;
95
+ atomicStore(&slots[slot_index].seq, t + 1u);
96
+ return 1u;
97
+ }
98
+ } else if (diff < 0) {
99
+ return 0u;
100
+ }
101
+ }
102
+
103
+ return 0u;
104
+ }
105
+
106
+ fn dequeue(idx: u32) -> u32 {
107
+ for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
108
+ let h = atomicLoad(&queue.head);
109
+ let slot_index = h & queue.mask;
110
+ let seq = atomicLoad(&slots[slot_index].seq);
111
+ let diff = i32(seq) - i32(h + 1u);
112
+
113
+ if (diff == 0) {
114
+ let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
115
+ if (res.exchanged) {
116
+ let payload_offset = slots[slot_index].payload_offset;
117
+ let payload_words = slots[slot_index].payload_words;
118
+ let job_type = slots[slot_index].job_type;
119
+ let output_stride = params.output_stride;
120
+ let dst_base = idx * output_stride;
121
+ let copy_words = min(payload_words, output_stride);
122
+ for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
123
+ output_payloads[dst_base + i] = input_payloads[payload_offset + i];
124
+ }
125
+ for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
126
+ output_payloads[dst_base + i] = 0u;
127
+ }
128
+ output_jobs[idx].job_type = job_type;
129
+ output_jobs[idx].payload_offset = payload_offset;
130
+ output_jobs[idx].payload_words = payload_words;
131
+ output_jobs[idx]._pad = 0u;
132
+ atomicStore(&slots[slot_index].seq, h + queue.capacity);
133
+ return 1u;
134
+ }
135
+ } else if (diff < 0) {
136
+ return 0u;
137
+ }
138
+ }
139
+
140
+ return 0u;
141
+ }
142
+
143
+ @compute @workgroup_size(64)
144
+ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
145
+ let idx = gid.x;
146
+ let job_count = enqueue_job_count();
147
+ if (idx >= job_count) {
148
+ return;
149
+ }
150
+ if (!queue_config_valid()) {
151
+ return;
152
+ }
153
+ if (status[idx] == 1u) {
154
+ return;
155
+ }
156
+
157
+ let ok = enqueue(idx);
158
+ if (ok == 1u) {
159
+ status[idx] = 1u;
160
+ }
161
+ }
162
+
163
+ @compute @workgroup_size(64)
164
+ fn dequeue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
165
+ let idx = gid.x;
166
+ let job_count = dequeue_job_count();
167
+ if (idx >= job_count) {
168
+ return;
169
+ }
170
+ if (!queue_config_valid()) {
171
+ return;
172
+ }
173
+ if (status[idx] == 1u) {
174
+ return;
175
+ }
176
+
177
+ let ok = dequeue(idx);
178
+ if (ok == 1u) {
179
+ status[idx] = 1u;
180
+ }
181
+ }
package/package.json CHANGED
@@ -1,11 +1,14 @@
1
1
  {
2
2
  "name": "@plasius/gpu-lock-free-queue",
3
- "version": "0.1.2-beta.0",
3
+ "version": "0.2.1",
4
4
  "description": "WebGPU lock-free MPMC ring queue with sequence counters.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
7
7
  "private": false,
8
+ "main": "./dist/index.cjs",
9
+ "module": "./dist/index.js",
8
10
  "files": [
11
+ "dist",
9
12
  "src",
10
13
  "README.md",
11
14
  "CHANGELOG.md",
@@ -13,11 +16,15 @@
13
16
  "legal"
14
17
  ],
15
18
  "exports": {
16
- ".": "./src/index.js",
17
- "./queue.wgsl": "./src/queue.wgsl",
19
+ ".": {
20
+ "import": "./dist/index.js",
21
+ "require": "./dist/index.cjs"
22
+ },
23
+ "./queue.wgsl": "./dist/queue.wgsl",
18
24
  "./package.json": "./package.json"
19
25
  },
20
26
  "scripts": {
27
+ "build": "tsup && cp src/queue.wgsl dist/queue.wgsl",
21
28
  "demo": "python3 -m http.server",
22
29
  "test": "npm run test:unit",
23
30
  "test:unit": "node --test",
@@ -40,7 +47,9 @@
40
47
  "license": "Apache-2.0",
41
48
  "devDependencies": {
42
49
  "@playwright/test": "^1.57.0",
43
- "c8": "^10.1.3"
50
+ "c8": "^10.1.3",
51
+ "tsup": "^8.5.0",
52
+ "typescript": "^5.9.3"
44
53
  },
45
54
  "repository": {
46
55
  "type": "git",
package/src/index.cjs ADDED
@@ -0,0 +1,27 @@
1
+ const { pathToFileURL, fileURLToPath } = require("node:url");
2
+ const { readFile } = require("node:fs/promises");
3
+
4
+ const queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
5
+
6
+ async function loadQueueWgsl(options = {}) {
7
+ const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
8
+ const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
9
+
10
+ if (!fetcher || wgslUrl.protocol === "file:") {
11
+ return readFile(fileURLToPath(wgslUrl), "utf8");
12
+ }
13
+
14
+ const response = await fetcher(wgslUrl);
15
+ if (!response.ok) {
16
+ const status = "status" in response ? response.status : "unknown";
17
+ const statusText = "statusText" in response ? response.statusText : "";
18
+ const detail = statusText ? `${status} ${statusText}` : `${status}`;
19
+ throw new Error(`Failed to load WGSL (${detail})`);
20
+ }
21
+ return response.text();
22
+ }
23
+
24
+ module.exports = {
25
+ queueWgslUrl,
26
+ loadQueueWgsl,
27
+ };
package/src/index.js CHANGED
@@ -1,6 +1,21 @@
1
1
  export const queueWgslUrl = new URL("./queue.wgsl", import.meta.url);
2
2
 
3
- export async function loadQueueWgsl() {
4
- const response = await fetch(queueWgslUrl);
3
+ export async function loadQueueWgsl(options = {}) {
4
+ const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
5
+ const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
6
+
7
+ if (!fetcher || wgslUrl.protocol === "file:") {
8
+ const { readFile } = await import("node:fs/promises");
9
+ const { fileURLToPath } = await import("node:url");
10
+ return readFile(fileURLToPath(wgslUrl), "utf8");
11
+ }
12
+
13
+ const response = await fetcher(wgslUrl);
14
+ if (!response.ok) {
15
+ const status = "status" in response ? response.status : "unknown";
16
+ const statusText = "statusText" in response ? response.statusText : "";
17
+ const detail = statusText ? `${status} ${statusText}` : `${status}`;
18
+ throw new Error(`Failed to load WGSL (${detail})`);
19
+ }
5
20
  return response.text();
6
21
  }
package/src/queue.wgsl CHANGED
@@ -3,30 +3,83 @@ struct Queue {
3
3
  tail: atomic<u32>,
4
4
  capacity: u32,
5
5
  mask: u32,
6
- _pad: vec2<u32>,
7
6
  };
8
7
 
9
8
  struct Slot {
10
9
  seq: atomic<u32>,
11
- value: u32,
12
- _pad: vec2<u32>,
10
+ job_type: u32,
11
+ payload_offset: u32,
12
+ payload_words: u32,
13
+ };
14
+
15
+ struct JobMeta {
16
+ job_type: u32,
17
+ payload_offset: u32,
18
+ payload_words: u32,
19
+ _pad: u32,
13
20
  };
14
21
 
15
22
  struct Params {
16
23
  job_count: u32,
17
- _pad: vec3<u32>,
24
+ output_stride: u32,
25
+ _pad: vec2<u32>,
18
26
  };
19
27
 
20
28
  @group(0) @binding(0) var<storage, read_write> queue: Queue;
21
29
  @group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
22
- @group(0) @binding(2) var<storage, read> input_jobs: array<u32>;
23
- @group(0) @binding(3) var<storage, read_write> output_jobs: array<u32>;
24
- @group(0) @binding(4) var<storage, read_write> status: array<u32>;
25
- @group(0) @binding(5) var<uniform> params: Params;
30
+ @group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
31
+ @group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
32
+ @group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
33
+ @group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
34
+ @group(0) @binding(6) var<storage, read_write> status: array<u32>;
35
+ @group(0) @binding(7) var<uniform> params: Params;
26
36
 
27
37
  const MAX_RETRIES: u32 = 512u;
28
38
 
29
- fn enqueue(val: u32) -> u32 {
39
+ fn queue_config_valid() -> bool {
40
+ if (queue.capacity == 0u) {
41
+ return false;
42
+ }
43
+ if ((queue.capacity & (queue.capacity - 1u)) != 0u) {
44
+ return false;
45
+ }
46
+ if (queue.mask != queue.capacity - 1u) {
47
+ return false;
48
+ }
49
+ if (queue.capacity > arrayLength(&slots)) {
50
+ return false;
51
+ }
52
+ return true;
53
+ }
54
+
55
+ fn enqueue_job_count() -> u32 {
56
+ let count = min(params.job_count, arrayLength(&input_jobs));
57
+ return min(count, arrayLength(&status));
58
+ }
59
+
60
+ fn dequeue_job_count() -> u32 {
61
+ if (params.output_stride == 0u) {
62
+ return 0u;
63
+ }
64
+ let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
65
+ var count = min(params.job_count, arrayLength(&output_jobs));
66
+ count = min(count, payload_jobs);
67
+ return min(count, arrayLength(&status));
68
+ }
69
+
70
+ fn queue_len() -> u32 {
71
+ let h = atomicLoad(&queue.head);
72
+ let t = atomicLoad(&queue.tail);
73
+ return t - h;
74
+ }
75
+
76
+ fn enqueue(idx: u32) -> u32 {
77
+ let job = input_jobs[idx];
78
+ let payload_words = job.payload_words;
79
+ let input_offset = job.payload_offset;
80
+ if (input_offset + payload_words > arrayLength(&input_payloads)) {
81
+ return 0u;
82
+ }
30
83
  for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
31
84
  let t = atomicLoad(&queue.tail);
32
85
  let slot_index = t & queue.mask;
@@ -36,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
36
89
  if (diff == 0) {
37
90
  let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
38
91
  if (res.exchanged) {
39
- slots[slot_index].value = val;
92
+ slots[slot_index].job_type = job.job_type;
93
+ slots[slot_index].payload_offset = input_offset;
94
+ slots[slot_index].payload_words = payload_words;
40
95
  atomicStore(&slots[slot_index].seq, t + 1u);
41
96
  return 1u;
42
97
  }
@@ -58,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
58
113
  if (diff == 0) {
59
114
  let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
60
115
  if (res.exchanged) {
61
- let val = slots[slot_index].value;
62
- output_jobs[idx] = val;
116
+ let payload_offset = slots[slot_index].payload_offset;
117
+ let payload_words = slots[slot_index].payload_words;
118
+ let job_type = slots[slot_index].job_type;
119
+ let output_stride = params.output_stride;
120
+ let dst_base = idx * output_stride;
121
+ let copy_words = min(payload_words, output_stride);
122
+ for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
123
+ output_payloads[dst_base + i] = input_payloads[payload_offset + i];
124
+ }
125
+ for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
126
+ output_payloads[dst_base + i] = 0u;
127
+ }
128
+ output_jobs[idx].job_type = job_type;
129
+ output_jobs[idx].payload_offset = payload_offset;
130
+ output_jobs[idx].payload_words = payload_words;
131
+ output_jobs[idx]._pad = 0u;
63
132
  atomicStore(&slots[slot_index].seq, h + queue.capacity);
64
133
  return 1u;
65
134
  }
@@ -74,14 +143,18 @@ fn dequeue(idx: u32) -> u32 {
74
143
  @compute @workgroup_size(64)
75
144
  fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
76
145
  let idx = gid.x;
77
- if (idx >= params.job_count) {
146
+ let job_count = enqueue_job_count();
147
+ if (idx >= job_count) {
148
+ return;
149
+ }
150
+ if (!queue_config_valid()) {
78
151
  return;
79
152
  }
80
153
  if (status[idx] == 1u) {
81
154
  return;
82
155
  }
83
156
 
84
- let ok = enqueue(input_jobs[idx]);
157
+ let ok = enqueue(idx);
85
158
  if (ok == 1u) {
86
159
  status[idx] = 1u;
87
160
  }
@@ -90,7 +163,11 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
90
163
  @compute @workgroup_size(64)
91
164
  fn dequeue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
92
165
  let idx = gid.x;
93
- if (idx >= params.job_count) {
166
+ let job_count = dequeue_job_count();
167
+ if (idx >= job_count) {
168
+ return;
169
+ }
170
+ if (!queue_config_valid()) {
94
171
  return;
95
172
  }
96
173
  if (status[idx] == 1u) {