@plasius/gpu-worker 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -4
- package/README.md +66 -9
- package/dist/index.cjs +490 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +478 -7
- package/dist/index.js.map +1 -1
- package/dist/worker.wgsl +24 -252
- package/package.json +1 -1
- package/src/index.js +532 -9
- package/src/worker.wgsl +24 -252
package/dist/worker.wgsl
CHANGED
|
@@ -1,265 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
half: vec4<f32>,
|
|
16
|
-
vel: vec4<f32>,
|
|
17
|
-
};
|
|
18
|
-
|
|
19
|
-
struct Result {
|
|
20
|
-
aabb_min: vec4<f32>,
|
|
21
|
-
aabb_max: vec4<f32>,
|
|
22
|
-
sphere: vec4<f32>,
|
|
23
|
-
metrics: vec4<f32>,
|
|
24
|
-
};
|
|
25
|
-
|
|
26
|
-
struct BounceResult {
|
|
27
|
-
pos: vec3<f32>,
|
|
28
|
-
vel: vec3<f32>,
|
|
29
|
-
mask: u32,
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
@group(1) @binding(0) var<storage, read_write> instances: array<Instance>;
|
|
33
|
-
@group(1) @binding(1) var<storage, read_write> results: array<Result>;
|
|
34
|
-
@group(1) @binding(2) var<uniform> sim: SimParams;
|
|
35
|
-
@group(1) @binding(3) var<storage, read_write> stats: array<atomic<u32>>;
|
|
36
|
-
|
|
37
|
-
const STAT_IN_RANGE: u32 = 0u;
|
|
38
|
-
const STAT_FACE_CONTACTS: u32 = 1u;
|
|
39
|
-
const STAT_FACE_X_NEG: u32 = 2u;
|
|
40
|
-
const STAT_FACE_X_POS: u32 = 3u;
|
|
41
|
-
const STAT_FACE_Y_NEG: u32 = 4u;
|
|
42
|
-
const STAT_FACE_Y_POS: u32 = 5u;
|
|
43
|
-
const STAT_FACE_Z_NEG: u32 = 6u;
|
|
44
|
-
const STAT_FACE_Z_POS: u32 = 7u;
|
|
45
|
-
const STAT_BODY_CONTACTS: u32 = 8u;
|
|
46
|
-
|
|
47
|
-
const BODY_CONTACT_FLAG: u32 = 64u;
|
|
48
|
-
const COLLISION_SAMPLES: u32 = 24u;
|
|
49
|
-
const COLLISION_BRUTE_FORCE_MAX: u32 = 2048u;
|
|
50
|
-
const COLLISION_RESTITUTION: f32 = 0.65;
|
|
51
|
-
const COLLISION_PUSH: f32 = 0.5;
|
|
52
|
-
const COLLISION_EPSILON: f32 = 1e-5;
|
|
53
|
-
|
|
54
|
-
fn apply_bounds(pos: vec3<f32>, vel: vec3<f32>, half: vec3<f32>) -> BounceResult {
|
|
55
|
-
let min_bound = sim.bounds_min.xyz + half;
|
|
56
|
-
let max_bound = sim.bounds_max.xyz - half;
|
|
57
|
-
var p = pos;
|
|
58
|
-
var v = vel;
|
|
59
|
-
var mask: u32 = 0u;
|
|
60
|
-
|
|
61
|
-
if (p.x < min_bound.x) {
|
|
62
|
-
p.x = min_bound.x;
|
|
63
|
-
v.x = abs(v.x);
|
|
64
|
-
mask = mask | 1u;
|
|
65
|
-
}
|
|
66
|
-
if (p.x > max_bound.x) {
|
|
67
|
-
p.x = max_bound.x;
|
|
68
|
-
v.x = -abs(v.x);
|
|
69
|
-
mask = mask | 2u;
|
|
70
|
-
}
|
|
71
|
-
if (p.y < min_bound.y) {
|
|
72
|
-
p.y = min_bound.y;
|
|
73
|
-
v.y = abs(v.y);
|
|
74
|
-
mask = mask | 4u;
|
|
75
|
-
}
|
|
76
|
-
if (p.y > max_bound.y) {
|
|
77
|
-
p.y = max_bound.y;
|
|
78
|
-
v.y = -abs(v.y);
|
|
79
|
-
mask = mask | 8u;
|
|
80
|
-
}
|
|
81
|
-
if (p.z < min_bound.z) {
|
|
82
|
-
p.z = min_bound.z;
|
|
83
|
-
v.z = abs(v.z);
|
|
84
|
-
mask = mask | 16u;
|
|
85
|
-
}
|
|
86
|
-
if (p.z > max_bound.z) {
|
|
87
|
-
p.z = max_bound.z;
|
|
88
|
-
v.z = -abs(v.z);
|
|
89
|
-
mask = mask | 32u;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
return BounceResult(p, v, mask);
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
fn hash_u32(x: u32) -> u32 {
|
|
96
|
-
var v = x;
|
|
97
|
-
v = v ^ (v >> 16u);
|
|
98
|
-
v = v * 0x7feb352du;
|
|
99
|
-
v = v ^ (v >> 15u);
|
|
100
|
-
v = v * 0x846ca68bu;
|
|
101
|
-
v = v ^ (v >> 16u);
|
|
102
|
-
return v;
|
|
1
|
+
// Minimal GPU worker entry point.
|
|
2
|
+
//
|
|
3
|
+
// This file is intended to be concatenated with the lock-free queue WGSL
|
|
4
|
+
// via assembleWorkerWgsl(). It only handles dequeue and dispatches to a
|
|
5
|
+
// user hook. Replace this file (or provide your own WGSL) to implement
|
|
6
|
+
// real workloads.
|
|
7
|
+
|
|
8
|
+
fn payload_word(job_index: u32, word_index: u32) -> u32 {
|
|
9
|
+
let stride = params.output_stride;
|
|
10
|
+
if (stride == 0u || word_index >= stride) {
|
|
11
|
+
return 0u;
|
|
12
|
+
}
|
|
13
|
+
let base = job_index * stride;
|
|
14
|
+
return output_payloads[base + word_index];
|
|
103
15
|
}
|
|
104
16
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
17
|
+
// process_job(job_index, job_type, payload_words) must be defined by the
|
|
18
|
+
// job WGSL that you concatenate before this file.
|
|
108
19
|
|
|
109
20
|
@compute @workgroup_size(64)
|
|
110
|
-
fn
|
|
21
|
+
fn worker_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
111
22
|
let idx = gid.x;
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
let ok = dequeue(idx);
|
|
117
|
-
if (ok == 0u) {
|
|
23
|
+
let job_count = dequeue_job_count();
|
|
24
|
+
if (idx >= job_count) {
|
|
118
25
|
return;
|
|
119
26
|
}
|
|
120
|
-
|
|
121
|
-
let payload_words = output_jobs[idx].payload_words;
|
|
122
|
-
if (payload_words == 0u) {
|
|
27
|
+
if (!queue_config_valid()) {
|
|
123
28
|
return;
|
|
124
29
|
}
|
|
125
|
-
let
|
|
126
|
-
if (
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
let job = input_payloads[payload_offset];
|
|
130
|
-
if (job >= sim.count) {
|
|
30
|
+
let ok = dequeue(idx);
|
|
31
|
+
if (ok == 0u) {
|
|
131
32
|
return;
|
|
132
33
|
}
|
|
133
34
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
var vel = inst.vel.xyz;
|
|
137
|
-
let half = inst.half.xyz;
|
|
138
|
-
var face_mask: u32 = 0u;
|
|
139
|
-
var body_hits: u32 = 0u;
|
|
140
|
-
let self_radius = length(half);
|
|
141
|
-
|
|
142
|
-
for (var step: u32 = 0u; step < sim.steps; step = step + 1u) {
|
|
143
|
-
pos = pos + vel * sim.dt;
|
|
144
|
-
let bounce = apply_bounds(pos, vel, half);
|
|
145
|
-
pos = bounce.pos;
|
|
146
|
-
vel = bounce.vel;
|
|
147
|
-
face_mask = face_mask | bounce.mask;
|
|
148
|
-
|
|
149
|
-
if (sim.count <= COLLISION_BRUTE_FORCE_MAX) {
|
|
150
|
-
for (var other_idx: u32 = 0u; other_idx < sim.count; other_idx = other_idx + 1u) {
|
|
151
|
-
if (other_idx == job) {
|
|
152
|
-
continue;
|
|
153
|
-
}
|
|
154
|
-
let other = instances[other_idx];
|
|
155
|
-
let other_pos = other.pos.xyz;
|
|
156
|
-
let other_vel = other.vel.xyz;
|
|
157
|
-
let other_radius = length(other.half.xyz);
|
|
158
|
-
let delta = pos - other_pos;
|
|
159
|
-
let dist_sq = dot(delta, delta);
|
|
160
|
-
let min_dist = self_radius + other_radius;
|
|
161
|
-
|
|
162
|
-
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
163
|
-
let dist = sqrt(dist_sq);
|
|
164
|
-
let normal = delta / dist;
|
|
165
|
-
let overlap = min_dist - dist;
|
|
166
|
-
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
167
|
-
let rel_vel = vel - other_vel;
|
|
168
|
-
let approach = dot(rel_vel, normal);
|
|
169
|
-
if (approach < 0.0) {
|
|
170
|
-
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
171
|
-
}
|
|
172
|
-
body_hits = body_hits + 1u;
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
} else {
|
|
176
|
-
let step_seed = job ^ (step * 0x9e3779b9u);
|
|
177
|
-
for (var sample: u32 = 0u; sample < COLLISION_SAMPLES; sample = sample + 1u) {
|
|
178
|
-
let neighbor = neighbor_index(
|
|
179
|
-
step_seed,
|
|
180
|
-
sample * 0x85ebca6bu + 0x27d4eb2du,
|
|
181
|
-
sim.count,
|
|
182
|
-
);
|
|
183
|
-
if (neighbor == job) {
|
|
184
|
-
continue;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
let other = instances[neighbor];
|
|
188
|
-
let other_pos = other.pos.xyz;
|
|
189
|
-
let other_vel = other.vel.xyz;
|
|
190
|
-
let other_radius = length(other.half.xyz);
|
|
191
|
-
let delta = pos - other_pos;
|
|
192
|
-
let dist_sq = dot(delta, delta);
|
|
193
|
-
let min_dist = self_radius + other_radius;
|
|
194
|
-
|
|
195
|
-
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
196
|
-
let dist = sqrt(dist_sq);
|
|
197
|
-
let normal = delta / dist;
|
|
198
|
-
let overlap = min_dist - dist;
|
|
199
|
-
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
200
|
-
let rel_vel = vel - other_vel;
|
|
201
|
-
let approach = dot(rel_vel, normal);
|
|
202
|
-
if (approach < 0.0) {
|
|
203
|
-
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
204
|
-
}
|
|
205
|
-
body_hits = body_hits + 1u;
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
let bounce2 = apply_bounds(pos, vel, half);
|
|
211
|
-
pos = bounce2.pos;
|
|
212
|
-
vel = bounce2.vel;
|
|
213
|
-
face_mask = face_mask | bounce2.mask;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
let aabb_min = pos - half;
|
|
217
|
-
let aabb_max = pos + half;
|
|
218
|
-
let radius = length(half);
|
|
219
|
-
let dist = length(pos - sim.sensor.xyz);
|
|
220
|
-
var in_range: f32 = 0.0;
|
|
221
|
-
if (dist <= sim.range + radius) {
|
|
222
|
-
in_range = 1.0;
|
|
223
|
-
}
|
|
224
|
-
let speed = length(vel);
|
|
225
|
-
|
|
226
|
-
let boundary_mask = face_mask;
|
|
227
|
-
let face_hits = countOneBits(boundary_mask);
|
|
228
|
-
if (in_range > 0.0) {
|
|
229
|
-
atomicAdd(&stats[STAT_IN_RANGE], 1u);
|
|
230
|
-
}
|
|
231
|
-
if (face_hits > 0u) {
|
|
232
|
-
atomicAdd(&stats[STAT_FACE_CONTACTS], face_hits);
|
|
233
|
-
}
|
|
234
|
-
if ((boundary_mask & 1u) != 0u) {
|
|
235
|
-
atomicAdd(&stats[STAT_FACE_X_NEG], 1u);
|
|
236
|
-
}
|
|
237
|
-
if ((boundary_mask & 2u) != 0u) {
|
|
238
|
-
atomicAdd(&stats[STAT_FACE_X_POS], 1u);
|
|
239
|
-
}
|
|
240
|
-
if ((boundary_mask & 4u) != 0u) {
|
|
241
|
-
atomicAdd(&stats[STAT_FACE_Y_NEG], 1u);
|
|
242
|
-
}
|
|
243
|
-
if ((boundary_mask & 8u) != 0u) {
|
|
244
|
-
atomicAdd(&stats[STAT_FACE_Y_POS], 1u);
|
|
245
|
-
}
|
|
246
|
-
if ((boundary_mask & 16u) != 0u) {
|
|
247
|
-
atomicAdd(&stats[STAT_FACE_Z_NEG], 1u);
|
|
248
|
-
}
|
|
249
|
-
if ((boundary_mask & 32u) != 0u) {
|
|
250
|
-
atomicAdd(&stats[STAT_FACE_Z_POS], 1u);
|
|
251
|
-
}
|
|
252
|
-
if (body_hits > 0u) {
|
|
253
|
-
atomicAdd(&stats[STAT_BODY_CONTACTS], body_hits);
|
|
254
|
-
face_mask = boundary_mask | BODY_CONTACT_FLAG;
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
inst.pos = vec4<f32>(pos, 1.0);
|
|
258
|
-
inst.vel = vec4<f32>(vel, 0.0);
|
|
259
|
-
instances[job] = inst;
|
|
260
|
-
|
|
261
|
-
results[job].aabb_min = vec4<f32>(aabb_min, 0.0);
|
|
262
|
-
results[job].aabb_max = vec4<f32>(aabb_max, 0.0);
|
|
263
|
-
results[job].sphere = vec4<f32>(pos, radius);
|
|
264
|
-
results[job].metrics = vec4<f32>(dist, speed, in_range, f32(face_mask));
|
|
35
|
+
let job_info = output_jobs[idx];
|
|
36
|
+
process_job(idx, job_info.job_type, job_info.payload_words);
|
|
265
37
|
}
|