@plasius/gpu-lock-free-queue 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -20,6 +20,29 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
20
20
  - **Security**
21
21
  - (placeholder)
22
22
 
23
+ ## [0.2.1] - 2026-01-23
24
+
25
+ - **Changed**
26
+ - **Breaking:** Queue payloads are now referenced by fixed metadata offsets into caller-managed payload buffers (no internal payload arena).
27
+ - **Breaking:** Queue header and bindings updated to remove payload arena fields and buffer.
28
+ - Demo and tests updated to reflect the new payload-handle layout.
29
+ - **Breaking:** Queue header now includes payload arena head/tail and capacity/mask fields.
30
+ - Queue helpers now expose a `queue_len` backlog snapshot for schedulers.
31
+ - Demo and tests updated to use job metadata and variable payload copies.
32
+
33
+ - **Fixed**
34
+ - Payload allocations now validate arena capacity before enqueue.
35
+
36
+ ## [0.2.0] - 2026-01-23
37
+
38
+ - **Changed**
39
+ - **Breaking:** WGSL bindings now include a dedicated payload ring buffer plus input/output payload buffers.
40
+ - Queue headers now carry `payload_stride` (u32 words) and job payloads are copied into the ring on enqueue.
41
+ - Demo and tests updated to use payload buffers instead of `input_jobs`/`output_jobs`.
42
+
43
+ - **Fixed**
44
+ - Payload job counts now clamp to payload buffer lengths to prevent overruns.
45
+
23
46
  ## [0.1.2] - 2026-01-22
24
47
 
25
48
  - **Added**
@@ -51,3 +74,5 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
51
74
 
52
75
  [0.1.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.0
53
76
  [0.1.2]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.1.2
77
+ [0.2.0]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.0
78
+ [0.2.1]: https://github.com/Plasius-LTD/gpu-lock-free-queue/releases/tag/v0.2.1
package/README.md CHANGED
@@ -25,11 +25,24 @@ const shaderCode = await loadQueueWgsl();
25
25
  ## What this is
26
26
  - Lock-free multi-producer, multi-consumer ring queue on the GPU.
27
27
  - Uses per-slot sequence numbers to avoid ABA for slots within a 32-bit epoch.
28
- - Fixed-size jobs (u32) for now; a "job" can be expanded to a fixed-size struct or an index into a separate payload buffer.
28
+ - Fixed-size job metadata with payload offsets into a caller-managed data arena or buffer.
29
+
30
+ ## Buffer layout (breaking change in v0.4.0)
31
+ Bindings are:
32
+ 1. `@binding(0)` queue header: `{ head, tail, capacity, mask }`
33
+ 2. `@binding(1)` slot array (`Slot` with `seq`, `job_type`, `payload_offset`, `payload_words`)
34
+ 3. `@binding(2)` input jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
35
+ 4. `@binding(3)` output jobs (`array<JobMeta>` with `job_type`, `payload_offset`, `payload_words`)
36
+ 5. `@binding(4)` input payloads (`array<u32>`, payload data referenced by `input_jobs.payload_offset`)
37
+ 6. `@binding(5)` output payloads (`array<u32>`, length `job_count * output_stride`)
38
+ 7. `@binding(6)` status flags (`array<u32>`, length `job_count`)
39
+ 8. `@binding(7)` params (`Params` with `job_count`, `output_stride`)
40
+
41
+ `output_stride` is the per-job output stride (u32 words) used when copying payloads into `output_payloads`.
29
42
 
30
43
  ## Limitations
31
44
  - Sequence counters are 32-bit. At extreme throughput over a long time, counters wrap and ABA can reappear. If you need true long-running safety, consider a reset protocol, sharding, or a future 64-bit atomic extension.
32
- - Jobs are fixed-size and must be power-of-two capacity.
45
+ - Payload lifetimes are managed by the caller. Ensure payload buffers remain valid until consumers finish, or use frame-bounded arenas/generation handles.
33
46
  - This demo is intentionally minimal; it is not yet integrated with a scheduler or backpressure policy.
34
47
 
35
48
  ## Run the demo
@@ -58,5 +71,5 @@ npm run test:e2e
58
71
  - `src/queue.wgsl`: Lock-free queue implementation.
59
72
  - `src/index.js`: Package entry point for loading the WGSL file.
60
73
 
61
- ## Job shape
62
- Current jobs are `u32` values. If you need richer jobs, use a fixed-size struct (e.g., 16 bytes) or store indices into a separate payload buffer. Variable-length jobs should be modeled as an index + length into a payload arena to keep the queue fixed-size.
74
+ ## Payload shape
75
+ Payloads are variable-length chunks stored in a caller-managed buffer. Each job specifies `job_type`, `payload_offset`, and `payload_words` in `input_jobs`; dequeue copies payloads from `input_payloads` into `output_payloads` using `output_stride` and mirrors the metadata into `output_jobs`. If you need `f32`, store `bitcast<u32>(value)` and reinterpret on the consumer side.
package/dist/index.cjs CHANGED
@@ -1,46 +1,11 @@
1
- var __create = Object.create;
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __getProtoOf = Object.getPrototypeOf;
6
- var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __export = (target, all) => {
8
- for (var name in all)
9
- __defProp(target, name, { get: all[name], enumerable: true });
10
- };
11
- var __copyProps = (to, from, except, desc) => {
12
- if (from && typeof from === "object" || typeof from === "function") {
13
- for (let key of __getOwnPropNames(from))
14
- if (!__hasOwnProp.call(to, key) && key !== except)
15
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
16
- }
17
- return to;
18
- };
19
- var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
20
- // If the importer is in node compatibility mode or this is not an ESM
21
- // file that has been converted to a CommonJS file using a Babel-
22
- // compatible transform (i.e. "__esModule" has not been set), then set
23
- // "default" to the CommonJS "module.exports" for node compatibility.
24
- isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
25
- mod
26
- ));
27
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
28
-
29
- // src/index.js
30
- var index_exports = {};
31
- __export(index_exports, {
32
- loadQueueWgsl: () => loadQueueWgsl,
33
- queueWgslUrl: () => queueWgslUrl
34
- });
35
- module.exports = __toCommonJS(index_exports);
36
- var import_meta = {};
37
- var queueWgslUrl = new URL("./queue.wgsl", import_meta.url);
1
+ // src/index.cjs
2
+ var { pathToFileURL, fileURLToPath } = require("url");
3
+ var { readFile } = require("fs/promises");
4
+ var queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
38
5
  async function loadQueueWgsl(options = {}) {
39
6
  const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
40
7
  const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
41
8
  if (!fetcher || wgslUrl.protocol === "file:") {
42
- const { readFile } = await import("fs/promises");
43
- const { fileURLToPath } = await import("url");
44
9
  return readFile(fileURLToPath(wgslUrl), "utf8");
45
10
  }
46
11
  const response = await fetcher(wgslUrl);
@@ -52,9 +17,8 @@ async function loadQueueWgsl(options = {}) {
52
17
  }
53
18
  return response.text();
54
19
  }
55
- // Annotate the CommonJS export names for ESM import in node:
56
- 0 && (module.exports = {
57
- loadQueueWgsl,
58
- queueWgslUrl
59
- });
20
+ module.exports = {
21
+ queueWgslUrl,
22
+ loadQueueWgsl
23
+ };
60
24
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.js"],"sourcesContent":["export const queueWgslUrl = new URL(\"./queue.wgsl\", import.meta.url);\n\nexport async function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n const { readFile } = await import(\"node:fs/promises\");\n const { fileURLToPath } = await import(\"node:url\");\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAO,IAAM,eAAe,IAAI,IAAI,gBAAgB,YAAY,GAAG;AAEnE,eAAsB,cAAc,UAAU,CAAC,GAAG;AAChD,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,UAAM,EAAE,SAAS,IAAI,MAAM,OAAO,aAAkB;AACpD,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,KAAU;AACjD,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;","names":[]}
1
+ {"version":3,"sources":["../src/index.cjs"],"sourcesContent":["const { pathToFileURL, fileURLToPath } = require(\"node:url\");\nconst { readFile } = require(\"node:fs/promises\");\n\nconst queueWgslUrl = new URL(\"./queue.wgsl\", pathToFileURL(__filename));\n\nasync function loadQueueWgsl(options = {}) {\n const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};\n const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);\n\n if (!fetcher || wgslUrl.protocol === \"file:\") {\n return readFile(fileURLToPath(wgslUrl), \"utf8\");\n }\n\n const response = await fetcher(wgslUrl);\n if (!response.ok) {\n const status = \"status\" in response ? response.status : \"unknown\";\n const statusText = \"statusText\" in response ? response.statusText : \"\";\n const detail = statusText ? `${status} ${statusText}` : `${status}`;\n throw new Error(`Failed to load WGSL (${detail})`);\n }\n return response.text();\n}\n\nmodule.exports = {\n queueWgslUrl,\n loadQueueWgsl,\n};\n"],"mappings":";AAAA,IAAM,EAAE,eAAe,cAAc,IAAI,QAAQ,KAAU;AAC3D,IAAM,EAAE,SAAS,IAAI,QAAQ,aAAkB;AAE/C,IAAM,eAAe,IAAI,IAAI,gBAAgB,cAAc,UAAU,CAAC;AAEtE,eAAe,cAAc,UAAU,CAAC,GAAG;AACzC,QAAM,EAAE,MAAM,cAAc,UAAU,WAAW,MAAM,IAAI,WAAW,CAAC;AACvE,QAAM,UAAU,eAAe,MAAM,MAAM,IAAI,IAAI,KAAK,YAAY;AAEpE,MAAI,CAAC,WAAW,QAAQ,aAAa,SAAS;AAC5C,WAAO,SAAS,cAAc,OAAO,GAAG,MAAM;AAAA,EAChD;AAEA,QAAM,WAAW,MAAM,QAAQ,OAAO;AACtC,MAAI,CAAC,SAAS,IAAI;AAChB,UAAM,SAAS,YAAY,WAAW,SAAS,SAAS;AACxD,UAAM,aAAa,gBAAgB,WAAW,SAAS,aAAa;AACpE,UAAM,SAAS,aAAa,GAAG,MAAM,IAAI,UAAU,KAAK,GAAG,MAAM;AACjE,UAAM,IAAI,MAAM,wBAAwB,MAAM,GAAG;AAAA,EACnD;AACA,SAAO,SAAS,KAAK;AACvB;AAEA,OAAO,UAAU;AAAA,EACf;AAAA,EACA;AACF;","names":[]}
package/dist/queue.wgsl CHANGED
@@ -3,26 +3,36 @@ struct Queue {
3
3
  tail: atomic<u32>,
4
4
  capacity: u32,
5
5
  mask: u32,
6
- _pad: vec2<u32>,
7
6
  };
8
7
 
9
8
  struct Slot {
10
9
  seq: atomic<u32>,
11
- value: u32,
12
- _pad: vec2<u32>,
10
+ job_type: u32,
11
+ payload_offset: u32,
12
+ payload_words: u32,
13
+ };
14
+
15
+ struct JobMeta {
16
+ job_type: u32,
17
+ payload_offset: u32,
18
+ payload_words: u32,
19
+ _pad: u32,
13
20
  };
14
21
 
15
22
  struct Params {
16
23
  job_count: u32,
17
- _pad: vec3<u32>,
24
+ output_stride: u32,
25
+ _pad: vec2<u32>,
18
26
  };
19
27
 
20
28
  @group(0) @binding(0) var<storage, read_write> queue: Queue;
21
29
  @group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
22
- @group(0) @binding(2) var<storage, read> input_jobs: array<u32>;
23
- @group(0) @binding(3) var<storage, read_write> output_jobs: array<u32>;
24
- @group(0) @binding(4) var<storage, read_write> status: array<u32>;
25
- @group(0) @binding(5) var<uniform> params: Params;
30
+ @group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
31
+ @group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
32
+ @group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
33
+ @group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
34
+ @group(0) @binding(6) var<storage, read_write> status: array<u32>;
35
+ @group(0) @binding(7) var<uniform> params: Params;
26
36
 
27
37
  const MAX_RETRIES: u32 = 512u;
28
38
 
@@ -48,11 +58,28 @@ fn enqueue_job_count() -> u32 {
48
58
  }
49
59
 
50
60
  fn dequeue_job_count() -> u32 {
51
- let count = min(params.job_count, arrayLength(&output_jobs));
61
+ if (params.output_stride == 0u) {
62
+ return 0u;
63
+ }
64
+ let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
65
+ var count = min(params.job_count, arrayLength(&output_jobs));
66
+ count = min(count, payload_jobs);
52
67
  return min(count, arrayLength(&status));
53
68
  }
54
69
 
55
- fn enqueue(val: u32) -> u32 {
70
+ fn queue_len() -> u32 {
71
+ let h = atomicLoad(&queue.head);
72
+ let t = atomicLoad(&queue.tail);
73
+ return t - h;
74
+ }
75
+
76
+ fn enqueue(idx: u32) -> u32 {
77
+ let job = input_jobs[idx];
78
+ let payload_words = job.payload_words;
79
+ let input_offset = job.payload_offset;
80
+ if (input_offset + payload_words > arrayLength(&input_payloads)) {
81
+ return 0u;
82
+ }
56
83
  for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
57
84
  let t = atomicLoad(&queue.tail);
58
85
  let slot_index = t & queue.mask;
@@ -62,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
62
89
  if (diff == 0) {
63
90
  let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
64
91
  if (res.exchanged) {
65
- slots[slot_index].value = val;
92
+ slots[slot_index].job_type = job.job_type;
93
+ slots[slot_index].payload_offset = input_offset;
94
+ slots[slot_index].payload_words = payload_words;
66
95
  atomicStore(&slots[slot_index].seq, t + 1u);
67
96
  return 1u;
68
97
  }
@@ -84,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
84
113
  if (diff == 0) {
85
114
  let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
86
115
  if (res.exchanged) {
87
- let val = slots[slot_index].value;
88
- output_jobs[idx] = val;
116
+ let payload_offset = slots[slot_index].payload_offset;
117
+ let payload_words = slots[slot_index].payload_words;
118
+ let job_type = slots[slot_index].job_type;
119
+ let output_stride = params.output_stride;
120
+ let dst_base = idx * output_stride;
121
+ let copy_words = min(payload_words, output_stride);
122
+ for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
123
+ output_payloads[dst_base + i] = input_payloads[payload_offset + i];
124
+ }
125
+ for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
126
+ output_payloads[dst_base + i] = 0u;
127
+ }
128
+ output_jobs[idx].job_type = job_type;
129
+ output_jobs[idx].payload_offset = payload_offset;
130
+ output_jobs[idx].payload_words = payload_words;
131
+ output_jobs[idx]._pad = 0u;
89
132
  atomicStore(&slots[slot_index].seq, h + queue.capacity);
90
133
  return 1u;
91
134
  }
@@ -111,7 +154,7 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
111
154
  return;
112
155
  }
113
156
 
114
- let ok = enqueue(input_jobs[idx]);
157
+ let ok = enqueue(idx);
115
158
  if (ok == 1u) {
116
159
  status[idx] = 1u;
117
160
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plasius/gpu-lock-free-queue",
3
- "version": "0.1.2",
3
+ "version": "0.2.1",
4
4
  "description": "WebGPU lock-free MPMC ring queue with sequence counters.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
package/src/index.cjs ADDED
@@ -0,0 +1,27 @@
1
+ const { pathToFileURL, fileURLToPath } = require("node:url");
2
+ const { readFile } = require("node:fs/promises");
3
+
4
+ const queueWgslUrl = new URL("./queue.wgsl", pathToFileURL(__filename));
5
+
6
+ async function loadQueueWgsl(options = {}) {
7
+ const { url = queueWgslUrl, fetcher = globalThis.fetch } = options ?? {};
8
+ const wgslUrl = url instanceof URL ? url : new URL(url, queueWgslUrl);
9
+
10
+ if (!fetcher || wgslUrl.protocol === "file:") {
11
+ return readFile(fileURLToPath(wgslUrl), "utf8");
12
+ }
13
+
14
+ const response = await fetcher(wgslUrl);
15
+ if (!response.ok) {
16
+ const status = "status" in response ? response.status : "unknown";
17
+ const statusText = "statusText" in response ? response.statusText : "";
18
+ const detail = statusText ? `${status} ${statusText}` : `${status}`;
19
+ throw new Error(`Failed to load WGSL (${detail})`);
20
+ }
21
+ return response.text();
22
+ }
23
+
24
+ module.exports = {
25
+ queueWgslUrl,
26
+ loadQueueWgsl,
27
+ };
package/src/queue.wgsl CHANGED
@@ -3,26 +3,36 @@ struct Queue {
3
3
  tail: atomic<u32>,
4
4
  capacity: u32,
5
5
  mask: u32,
6
- _pad: vec2<u32>,
7
6
  };
8
7
 
9
8
  struct Slot {
10
9
  seq: atomic<u32>,
11
- value: u32,
12
- _pad: vec2<u32>,
10
+ job_type: u32,
11
+ payload_offset: u32,
12
+ payload_words: u32,
13
+ };
14
+
15
+ struct JobMeta {
16
+ job_type: u32,
17
+ payload_offset: u32,
18
+ payload_words: u32,
19
+ _pad: u32,
13
20
  };
14
21
 
15
22
  struct Params {
16
23
  job_count: u32,
17
- _pad: vec3<u32>,
24
+ output_stride: u32,
25
+ _pad: vec2<u32>,
18
26
  };
19
27
 
20
28
  @group(0) @binding(0) var<storage, read_write> queue: Queue;
21
29
  @group(0) @binding(1) var<storage, read_write> slots: array<Slot>;
22
- @group(0) @binding(2) var<storage, read> input_jobs: array<u32>;
23
- @group(0) @binding(3) var<storage, read_write> output_jobs: array<u32>;
24
- @group(0) @binding(4) var<storage, read_write> status: array<u32>;
25
- @group(0) @binding(5) var<uniform> params: Params;
30
+ @group(0) @binding(2) var<storage, read> input_jobs: array<JobMeta>;
31
+ @group(0) @binding(3) var<storage, read_write> output_jobs: array<JobMeta>;
32
+ @group(0) @binding(4) var<storage, read> input_payloads: array<u32>;
33
+ @group(0) @binding(5) var<storage, read_write> output_payloads: array<u32>;
34
+ @group(0) @binding(6) var<storage, read_write> status: array<u32>;
35
+ @group(0) @binding(7) var<uniform> params: Params;
26
36
 
27
37
  const MAX_RETRIES: u32 = 512u;
28
38
 
@@ -48,11 +58,28 @@ fn enqueue_job_count() -> u32 {
48
58
  }
49
59
 
50
60
  fn dequeue_job_count() -> u32 {
51
- let count = min(params.job_count, arrayLength(&output_jobs));
61
+ if (params.output_stride == 0u) {
62
+ return 0u;
63
+ }
64
+ let payload_jobs = arrayLength(&output_payloads) / params.output_stride;
65
+ var count = min(params.job_count, arrayLength(&output_jobs));
66
+ count = min(count, payload_jobs);
52
67
  return min(count, arrayLength(&status));
53
68
  }
54
69
 
55
- fn enqueue(val: u32) -> u32 {
70
+ fn queue_len() -> u32 {
71
+ let h = atomicLoad(&queue.head);
72
+ let t = atomicLoad(&queue.tail);
73
+ return t - h;
74
+ }
75
+
76
+ fn enqueue(idx: u32) -> u32 {
77
+ let job = input_jobs[idx];
78
+ let payload_words = job.payload_words;
79
+ let input_offset = job.payload_offset;
80
+ if (input_offset + payload_words > arrayLength(&input_payloads)) {
81
+ return 0u;
82
+ }
56
83
  for (var attempt: u32 = 0u; attempt < MAX_RETRIES; attempt++) {
57
84
  let t = atomicLoad(&queue.tail);
58
85
  let slot_index = t & queue.mask;
@@ -62,7 +89,9 @@ fn enqueue(val: u32) -> u32 {
62
89
  if (diff == 0) {
63
90
  let res = atomicCompareExchangeWeak(&queue.tail, t, t + 1u);
64
91
  if (res.exchanged) {
65
- slots[slot_index].value = val;
92
+ slots[slot_index].job_type = job.job_type;
93
+ slots[slot_index].payload_offset = input_offset;
94
+ slots[slot_index].payload_words = payload_words;
66
95
  atomicStore(&slots[slot_index].seq, t + 1u);
67
96
  return 1u;
68
97
  }
@@ -84,8 +113,22 @@ fn dequeue(idx: u32) -> u32 {
84
113
  if (diff == 0) {
85
114
  let res = atomicCompareExchangeWeak(&queue.head, h, h + 1u);
86
115
  if (res.exchanged) {
87
- let val = slots[slot_index].value;
88
- output_jobs[idx] = val;
116
+ let payload_offset = slots[slot_index].payload_offset;
117
+ let payload_words = slots[slot_index].payload_words;
118
+ let job_type = slots[slot_index].job_type;
119
+ let output_stride = params.output_stride;
120
+ let dst_base = idx * output_stride;
121
+ let copy_words = min(payload_words, output_stride);
122
+ for (var i: u32 = 0u; i < copy_words; i = i + 1u) {
123
+ output_payloads[dst_base + i] = input_payloads[payload_offset + i];
124
+ }
125
+ for (var i: u32 = copy_words; i < output_stride; i = i + 1u) {
126
+ output_payloads[dst_base + i] = 0u;
127
+ }
128
+ output_jobs[idx].job_type = job_type;
129
+ output_jobs[idx].payload_offset = payload_offset;
130
+ output_jobs[idx].payload_words = payload_words;
131
+ output_jobs[idx]._pad = 0u;
89
132
  atomicStore(&slots[slot_index].seq, h + queue.capacity);
90
133
  return 1u;
91
134
  }
@@ -111,7 +154,7 @@ fn enqueue_main(@builtin(global_invocation_id) gid: vec3<u32>) {
111
154
  return;
112
155
  }
113
156
 
114
- let ok = enqueue(input_jobs[idx]);
157
+ let ok = enqueue(idx);
115
158
  if (ok == 1u) {
116
159
  status[idx] = 1u;
117
160
  }