@plasius/gpu-worker 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/worker.wgsl CHANGED
@@ -1,265 +1,37 @@
1
- struct SimParams {
2
- count: u32,
3
- steps: u32,
4
- _pad0: vec2<u32>,
5
- dt: f32,
6
- range: f32,
7
- _pad1: vec2<f32>,
8
- bounds_min: vec4<f32>,
9
- bounds_max: vec4<f32>,
10
- sensor: vec4<f32>,
11
- };
12
-
13
- struct Instance {
14
- pos: vec4<f32>,
15
- half: vec4<f32>,
16
- vel: vec4<f32>,
17
- };
18
-
19
- struct Result {
20
- aabb_min: vec4<f32>,
21
- aabb_max: vec4<f32>,
22
- sphere: vec4<f32>,
23
- metrics: vec4<f32>,
24
- };
25
-
26
- struct BounceResult {
27
- pos: vec3<f32>,
28
- vel: vec3<f32>,
29
- mask: u32,
30
- };
31
-
32
- @group(1) @binding(0) var<storage, read_write> instances: array<Instance>;
33
- @group(1) @binding(1) var<storage, read_write> results: array<Result>;
34
- @group(1) @binding(2) var<uniform> sim: SimParams;
35
- @group(1) @binding(3) var<storage, read_write> stats: array<atomic<u32>>;
36
-
37
- const STAT_IN_RANGE: u32 = 0u;
38
- const STAT_FACE_CONTACTS: u32 = 1u;
39
- const STAT_FACE_X_NEG: u32 = 2u;
40
- const STAT_FACE_X_POS: u32 = 3u;
41
- const STAT_FACE_Y_NEG: u32 = 4u;
42
- const STAT_FACE_Y_POS: u32 = 5u;
43
- const STAT_FACE_Z_NEG: u32 = 6u;
44
- const STAT_FACE_Z_POS: u32 = 7u;
45
- const STAT_BODY_CONTACTS: u32 = 8u;
46
-
47
- const BODY_CONTACT_FLAG: u32 = 64u;
48
- const COLLISION_SAMPLES: u32 = 24u;
49
- const COLLISION_BRUTE_FORCE_MAX: u32 = 2048u;
50
- const COLLISION_RESTITUTION: f32 = 0.65;
51
- const COLLISION_PUSH: f32 = 0.5;
52
- const COLLISION_EPSILON: f32 = 1e-5;
53
-
54
- fn apply_bounds(pos: vec3<f32>, vel: vec3<f32>, half: vec3<f32>) -> BounceResult {
55
- let min_bound = sim.bounds_min.xyz + half;
56
- let max_bound = sim.bounds_max.xyz - half;
57
- var p = pos;
58
- var v = vel;
59
- var mask: u32 = 0u;
60
-
61
- if (p.x < min_bound.x) {
62
- p.x = min_bound.x;
63
- v.x = abs(v.x);
64
- mask = mask | 1u;
65
- }
66
- if (p.x > max_bound.x) {
67
- p.x = max_bound.x;
68
- v.x = -abs(v.x);
69
- mask = mask | 2u;
70
- }
71
- if (p.y < min_bound.y) {
72
- p.y = min_bound.y;
73
- v.y = abs(v.y);
74
- mask = mask | 4u;
75
- }
76
- if (p.y > max_bound.y) {
77
- p.y = max_bound.y;
78
- v.y = -abs(v.y);
79
- mask = mask | 8u;
80
- }
81
- if (p.z < min_bound.z) {
82
- p.z = min_bound.z;
83
- v.z = abs(v.z);
84
- mask = mask | 16u;
85
- }
86
- if (p.z > max_bound.z) {
87
- p.z = max_bound.z;
88
- v.z = -abs(v.z);
89
- mask = mask | 32u;
90
- }
91
-
92
- return BounceResult(p, v, mask);
93
- }
94
-
95
- fn hash_u32(x: u32) -> u32 {
96
- var v = x;
97
- v = v ^ (v >> 16u);
98
- v = v * 0x7feb352du;
99
- v = v ^ (v >> 15u);
100
- v = v * 0x846ca68bu;
101
- v = v ^ (v >> 16u);
102
- return v;
1
+ // Minimal GPU worker entry point.
2
+ //
3
+ // This file is intended to be concatenated with the lock-free queue WGSL
4
+ // via assembleWorkerWgsl(). It only handles dequeue and dispatches to a
5
+ // user hook. Replace this file (or provide your own WGSL) to implement
6
+ // real workloads.
7
+
8
+ fn payload_word(job_index: u32, word_index: u32) -> u32 {
9
+ let stride = params.output_stride;
10
+ if (stride == 0u || word_index >= stride) {
11
+ return 0u;
12
+ }
13
+ let base = job_index * stride;
14
+ return output_payloads[base + word_index];
103
15
  }
104
16
 
105
- fn neighbor_index(seed: u32, salt: u32, count: u32) -> u32 {
106
- return hash_u32(seed ^ salt) % count;
107
- }
17
+ // process_job(job_index, job_type, payload_words) must be defined by the
18
+ // job WGSL that you concatenate before this file.
108
19
 
109
20
  @compute @workgroup_size(64)
110
- fn simulate_main(@builtin(global_invocation_id) gid: vec3<u32>) {
21
+ fn worker_main(@builtin(global_invocation_id) gid: vec3<u32>) {
111
22
  let idx = gid.x;
112
- if (idx >= params.job_count) {
113
- return;
114
- }
115
-
116
- let ok = dequeue(idx);
117
- if (ok == 0u) {
23
+ let job_count = dequeue_job_count();
24
+ if (idx >= job_count) {
118
25
  return;
119
26
  }
120
-
121
- let payload_words = output_jobs[idx].payload_words;
122
- if (payload_words == 0u) {
27
+ if (!queue_config_valid()) {
123
28
  return;
124
29
  }
125
- let payload_offset = output_jobs[idx].payload_offset;
126
- if (payload_offset + payload_words > arrayLength(&input_payloads)) {
127
- return;
128
- }
129
- let job = input_payloads[payload_offset];
130
- if (job >= sim.count) {
30
+ let ok = dequeue(idx);
31
+ if (ok == 0u) {
131
32
  return;
132
33
  }
133
34
 
134
- var inst = instances[job];
135
- var pos = inst.pos.xyz;
136
- var vel = inst.vel.xyz;
137
- let half = inst.half.xyz;
138
- var face_mask: u32 = 0u;
139
- var body_hits: u32 = 0u;
140
- let self_radius = length(half);
141
-
142
- for (var step: u32 = 0u; step < sim.steps; step = step + 1u) {
143
- pos = pos + vel * sim.dt;
144
- let bounce = apply_bounds(pos, vel, half);
145
- pos = bounce.pos;
146
- vel = bounce.vel;
147
- face_mask = face_mask | bounce.mask;
148
-
149
- if (sim.count <= COLLISION_BRUTE_FORCE_MAX) {
150
- for (var other_idx: u32 = 0u; other_idx < sim.count; other_idx = other_idx + 1u) {
151
- if (other_idx == job) {
152
- continue;
153
- }
154
- let other = instances[other_idx];
155
- let other_pos = other.pos.xyz;
156
- let other_vel = other.vel.xyz;
157
- let other_radius = length(other.half.xyz);
158
- let delta = pos - other_pos;
159
- let dist_sq = dot(delta, delta);
160
- let min_dist = self_radius + other_radius;
161
-
162
- if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
163
- let dist = sqrt(dist_sq);
164
- let normal = delta / dist;
165
- let overlap = min_dist - dist;
166
- pos = pos + normal * (overlap * COLLISION_PUSH);
167
- let rel_vel = vel - other_vel;
168
- let approach = dot(rel_vel, normal);
169
- if (approach < 0.0) {
170
- vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
171
- }
172
- body_hits = body_hits + 1u;
173
- }
174
- }
175
- } else {
176
- let step_seed = job ^ (step * 0x9e3779b9u);
177
- for (var sample: u32 = 0u; sample < COLLISION_SAMPLES; sample = sample + 1u) {
178
- let neighbor = neighbor_index(
179
- step_seed,
180
- sample * 0x85ebca6bu + 0x27d4eb2du,
181
- sim.count,
182
- );
183
- if (neighbor == job) {
184
- continue;
185
- }
186
-
187
- let other = instances[neighbor];
188
- let other_pos = other.pos.xyz;
189
- let other_vel = other.vel.xyz;
190
- let other_radius = length(other.half.xyz);
191
- let delta = pos - other_pos;
192
- let dist_sq = dot(delta, delta);
193
- let min_dist = self_radius + other_radius;
194
-
195
- if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
196
- let dist = sqrt(dist_sq);
197
- let normal = delta / dist;
198
- let overlap = min_dist - dist;
199
- pos = pos + normal * (overlap * COLLISION_PUSH);
200
- let rel_vel = vel - other_vel;
201
- let approach = dot(rel_vel, normal);
202
- if (approach < 0.0) {
203
- vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
204
- }
205
- body_hits = body_hits + 1u;
206
- }
207
- }
208
- }
209
-
210
- let bounce2 = apply_bounds(pos, vel, half);
211
- pos = bounce2.pos;
212
- vel = bounce2.vel;
213
- face_mask = face_mask | bounce2.mask;
214
- }
215
-
216
- let aabb_min = pos - half;
217
- let aabb_max = pos + half;
218
- let radius = length(half);
219
- let dist = length(pos - sim.sensor.xyz);
220
- var in_range: f32 = 0.0;
221
- if (dist <= sim.range + radius) {
222
- in_range = 1.0;
223
- }
224
- let speed = length(vel);
225
-
226
- let boundary_mask = face_mask;
227
- let face_hits = countOneBits(boundary_mask);
228
- if (in_range > 0.0) {
229
- atomicAdd(&stats[STAT_IN_RANGE], 1u);
230
- }
231
- if (face_hits > 0u) {
232
- atomicAdd(&stats[STAT_FACE_CONTACTS], face_hits);
233
- }
234
- if ((boundary_mask & 1u) != 0u) {
235
- atomicAdd(&stats[STAT_FACE_X_NEG], 1u);
236
- }
237
- if ((boundary_mask & 2u) != 0u) {
238
- atomicAdd(&stats[STAT_FACE_X_POS], 1u);
239
- }
240
- if ((boundary_mask & 4u) != 0u) {
241
- atomicAdd(&stats[STAT_FACE_Y_NEG], 1u);
242
- }
243
- if ((boundary_mask & 8u) != 0u) {
244
- atomicAdd(&stats[STAT_FACE_Y_POS], 1u);
245
- }
246
- if ((boundary_mask & 16u) != 0u) {
247
- atomicAdd(&stats[STAT_FACE_Z_NEG], 1u);
248
- }
249
- if ((boundary_mask & 32u) != 0u) {
250
- atomicAdd(&stats[STAT_FACE_Z_POS], 1u);
251
- }
252
- if (body_hits > 0u) {
253
- atomicAdd(&stats[STAT_BODY_CONTACTS], body_hits);
254
- face_mask = boundary_mask | BODY_CONTACT_FLAG;
255
- }
256
-
257
- inst.pos = vec4<f32>(pos, 1.0);
258
- inst.vel = vec4<f32>(vel, 0.0);
259
- instances[job] = inst;
260
-
261
- results[job].aabb_min = vec4<f32>(aabb_min, 0.0);
262
- results[job].aabb_max = vec4<f32>(aabb_max, 0.0);
263
- results[job].sphere = vec4<f32>(pos, radius);
264
- results[job].metrics = vec4<f32>(dist, speed, in_range, f32(face_mask));
35
+ let job_info = output_jobs[idx];
36
+ process_job(idx, job_info.job_type, job_info.payload_words);
265
37
  }