@plasius/gpu-worker 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +23 -1
- package/README.md +22 -2
- package/dist/index.cjs +4 -3
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +4 -3
- package/dist/index.js.map +1 -1
- package/dist/worker.wgsl +232 -101
- package/package.json +3 -2
- package/src/index.js +5 -3
- package/src/worker.wgsl +232 -101
package/CHANGELOG.md
CHANGED
|
@@ -20,6 +20,25 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
|
|
|
20
20
|
- **Security**
|
|
21
21
|
- (placeholder)
|
|
22
22
|
|
|
23
|
+
## [0.1.1] - 2026-01-23
|
|
24
|
+
|
|
25
|
+
- **Added**
|
|
26
|
+
- `assembleWorkerWgsl` now accepts optional queue WGSL overrides for local demos.
|
|
27
|
+
|
|
28
|
+
- **Changed**
|
|
29
|
+
- Demo now simulates millions of instanced objects with range checks, bounding spheres/AABBs, and face contact stats.
|
|
30
|
+
- **Breaking:** Queue bindings updated to remove the payload arena and use payload offsets into caller-managed buffers.
|
|
31
|
+
- Demo updated to match the new payload-handle layout.
|
|
32
|
+
- **Breaking:** Queue bindings now use job metadata and a variable-size payload arena.
|
|
33
|
+
- Worker job payloads are read from the output payload buffer using `output_stride`.
|
|
34
|
+
- Demo updated to emit job metadata and payload buffers.
|
|
35
|
+
|
|
36
|
+
- **Fixed**
|
|
37
|
+
- Demo can load a local queue WGSL to avoid mismatched dependency versions.
|
|
38
|
+
|
|
39
|
+
- **Security**
|
|
40
|
+
- (placeholder)
|
|
41
|
+
|
|
23
42
|
## [0.1.0] - 2026-01-22
|
|
24
43
|
|
|
25
44
|
- **Added**
|
|
@@ -69,6 +88,9 @@ The format is based on **[Keep a Changelog](https://keepachangelog.com/en/1.1.0/
|
|
|
69
88
|
|
|
70
89
|
---
|
|
71
90
|
|
|
72
|
-
[Unreleased]: https://github.com/Plasius-LTD/gpu-worker/compare/v0.1.
|
|
91
|
+
[Unreleased]: https://github.com/Plasius-LTD/gpu-worker/compare/v0.1.1...HEAD
|
|
73
92
|
[0.1.0-beta.1]: https://github.com/Plasius-LTD/gpu-worker/releases/tag/v0.1.0-beta.1
|
|
74
93
|
[0.1.0]: https://github.com/Plasius-LTD/gpu-worker/releases/tag/v0.1.0
|
|
94
|
+
[0.2.0]: https://github.com/Plasius-LTD/gpu-worker/releases/tag/v0.2.0
|
|
95
|
+
[0.3.0]: https://github.com/Plasius-LTD/gpu-worker/releases/tag/v0.3.0
|
|
96
|
+
[0.1.1]: https://github.com/Plasius-LTD/gpu-worker/releases/tag/v0.1.1
|
package/README.md
CHANGED
|
@@ -22,6 +22,9 @@ const shaderCode = await assembleWorkerWgsl(workerWgsl);
|
|
|
22
22
|
// Pass shaderCode to device.createShaderModule({ code: shaderCode })
|
|
23
23
|
```
|
|
24
24
|
|
|
25
|
+
`assembleWorkerWgsl` also accepts an optional second argument to override the queue WGSL source:
|
|
26
|
+
`assembleWorkerWgsl(workerWgsl, { queueWgsl, queueUrl, fetcher })`.
|
|
27
|
+
|
|
25
28
|
## What this is
|
|
26
29
|
- A minimal GPU worker layer that combines a lock-free queue with user WGSL jobs.
|
|
27
30
|
- A helper to assemble WGSL modules with queue helpers included.
|
|
@@ -33,11 +36,26 @@ dependencies first so the lock-free queue package is available for the browser i
|
|
|
33
36
|
|
|
34
37
|
```
|
|
35
38
|
npm install
|
|
36
|
-
|
|
39
|
+
npm run demo
|
|
37
40
|
```
|
|
38
41
|
|
|
39
42
|
Then open `http://localhost:8000/demo/`.
|
|
40
43
|
|
|
44
|
+
### HTTPS demo
|
|
45
|
+
WebGPU requires a secure context. For non-localhost access, run the HTTPS demo server.
|
|
46
|
+
|
|
47
|
+
```
|
|
48
|
+
mkdir -p demo/certs
|
|
49
|
+
mkcert -key-file demo/certs/localhost-key.pem -cert-file demo/certs/localhost.pem localhost 127.0.0.1 ::1
|
|
50
|
+
# or
|
|
51
|
+
openssl req -x509 -newkey rsa:2048 -nodes -keyout demo/certs/localhost-key.pem -out demo/certs/localhost.pem -days 365 -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1"
|
|
52
|
+
npm run demo:https
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Then open `https://localhost:8443/demo/`. If you use a different hostname/IP, generate a
|
|
56
|
+
certificate for that name and set `DEMO_HOST`, `DEMO_PORT`, `DEMO_TLS_CERT`, and
|
|
57
|
+
`DEMO_TLS_KEY` as needed.
|
|
58
|
+
|
|
41
59
|
## Build Outputs
|
|
42
60
|
|
|
43
61
|
`npm run build` emits `dist/index.js`, `dist/index.cjs`, and `dist/worker.wgsl`.
|
|
@@ -49,4 +67,6 @@ Then open `http://localhost:8000/demo/`.
|
|
|
49
67
|
- `src/index.js`: Helper functions to load/assemble WGSL.
|
|
50
68
|
|
|
51
69
|
## Job shape
|
|
52
|
-
Jobs are `
|
|
70
|
+
Jobs are variable-length payloads stored in a caller-managed buffer. Each job supplies `job_type`, `payload_offset`, and `payload_words` metadata plus a payload stored in the input payload buffer. For simple cases, use a single-word payload containing an index into your workload array.
|
|
71
|
+
|
|
72
|
+
Set `output_stride` in queue params to the maximum payload size you want copied out for a job; `job_type` can be used by schedulers to route work to different kernels. The queue mirrors input metadata into `output_jobs` and optionally copies payloads into `output_payloads`.
|
package/dist/index.cjs
CHANGED
|
@@ -40,10 +40,11 @@ async function loadWorkerWgsl() {
|
|
|
40
40
|
const response = await fetch(workerWgslUrl);
|
|
41
41
|
return response.text();
|
|
42
42
|
}
|
|
43
|
-
async function assembleWorkerWgsl(workerWgsl) {
|
|
44
|
-
const queueWgsl =
|
|
43
|
+
async function assembleWorkerWgsl(workerWgsl, options = {}) {
|
|
44
|
+
const { queueWgsl, queueUrl, fetcher } = options ?? {};
|
|
45
|
+
const queueSource = queueWgsl ?? await (0, import_gpu_lock_free_queue.loadQueueWgsl)({ url: queueUrl, fetcher });
|
|
45
46
|
const body = workerWgsl ?? await loadWorkerWgsl();
|
|
46
|
-
return `${
|
|
47
|
+
return `${queueSource}
|
|
47
48
|
|
|
48
49
|
${body}`;
|
|
49
50
|
}
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.js"],"sourcesContent":["import { loadQueueWgsl } from \"@plasius/gpu-lock-free-queue\";\n\nexport const workerWgslUrl = (() => {\n if (typeof __IMPORT_META_URL__ !== \"undefined\") {\n return new URL(\"./worker.wgsl\", __IMPORT_META_URL__);\n }\n if (typeof __filename !== \"undefined\" && typeof require !== \"undefined\") {\n const { pathToFileURL } = require(\"node:url\");\n return new URL(\"./worker.wgsl\", pathToFileURL(__filename));\n }\n const base =\n typeof process !== \"undefined\" && process.cwd\n ? `file://${process.cwd()}/`\n : \"file:///\";\n return new URL(\"./worker.wgsl\", base);\n})();\n\nexport async function loadWorkerWgsl() {\n const response = await fetch(workerWgslUrl);\n return response.text();\n}\n\nexport async function assembleWorkerWgsl(workerWgsl) {\n const queueWgsl = await loadQueueWgsl();\n const body = workerWgsl ?? (await loadWorkerWgsl());\n return `${
|
|
1
|
+
{"version":3,"sources":["../src/index.js"],"sourcesContent":["import { loadQueueWgsl } from \"@plasius/gpu-lock-free-queue\";\n\nexport const workerWgslUrl = (() => {\n if (typeof __IMPORT_META_URL__ !== \"undefined\") {\n return new URL(\"./worker.wgsl\", __IMPORT_META_URL__);\n }\n if (typeof __filename !== \"undefined\" && typeof require !== \"undefined\") {\n const { pathToFileURL } = require(\"node:url\");\n return new URL(\"./worker.wgsl\", pathToFileURL(__filename));\n }\n const base =\n typeof process !== \"undefined\" && process.cwd\n ? `file://${process.cwd()}/`\n : \"file:///\";\n return new URL(\"./worker.wgsl\", base);\n})();\n\nexport async function loadWorkerWgsl() {\n const response = await fetch(workerWgslUrl);\n return response.text();\n}\n\nexport async function assembleWorkerWgsl(workerWgsl, options = {}) {\n const { queueWgsl, queueUrl, fetcher } = options ?? {};\n const queueSource =\n queueWgsl ?? (await loadQueueWgsl({ url: queueUrl, fetcher }));\n const body = workerWgsl ?? (await loadWorkerWgsl());\n return `${queueSource}\\n\\n${body}`;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,iCAA8B;AAEvB,IAAM,iBAAiB,MAAM;AAClC,MAAI,OAA4C;AAC9C,WAAO,IAAI,IAAI,iBAAiB,MAAmB;AAAA,EACrD;AACA,MAAI,OAAO,eAAe,eAAe,OAAO,YAAY,aAAa;AACvE,UAAM,EAAE,cAAc,IAAI,QAAQ,KAAU;AAC5C,WAAO,IAAI,IAAI,iBAAiB,cAAc,UAAU,CAAC;AAAA,EAC3D;AACA,QAAM,OACJ,OAAO,YAAY,eAAe,QAAQ,MACtC,UAAU,QAAQ,IAAI,CAAC,MACvB;AACN,SAAO,IAAI,IAAI,iBAAiB,IAAI;AACtC,GAAG;AAEH,eAAsB,iBAAiB;AACrC,QAAM,WAAW,MAAM,MAAM,aAAa;AAC1C,SAAO,SAAS,KAAK;AACvB;AAEA,eAAsB,mBAAmB,YAAY,UAAU,CAAC,GAAG;AACjE,QAAM,EAAE,WAAW,UAAU,QAAQ,IAAI,WAAW,CAAC;AACrD,QAAM,cACJ,aAAc,UAAM,0CAAc,EAAE,KAAK,UAAU,QAAQ,CAAC;AAC9D,QAAM,OAAO,cAAe,MAAM,eAAe;AACjD,SAAO,GAAG,WAAW;AAAA;AAAA,EAAO,IAAI;AAClC;","names":[]}
|
package/dist/index.js
CHANGED
|
@@ -22,10 +22,11 @@ async function loadWorkerWgsl() {
|
|
|
22
22
|
const response = await fetch(workerWgslUrl);
|
|
23
23
|
return response.text();
|
|
24
24
|
}
|
|
25
|
-
async function assembleWorkerWgsl(workerWgsl) {
|
|
26
|
-
const queueWgsl =
|
|
25
|
+
async function assembleWorkerWgsl(workerWgsl, options = {}) {
|
|
26
|
+
const { queueWgsl, queueUrl, fetcher } = options ?? {};
|
|
27
|
+
const queueSource = queueWgsl ?? await loadQueueWgsl({ url: queueUrl, fetcher });
|
|
27
28
|
const body = workerWgsl ?? await loadWorkerWgsl();
|
|
28
|
-
return `${
|
|
29
|
+
return `${queueSource}
|
|
29
30
|
|
|
30
31
|
${body}`;
|
|
31
32
|
}
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.js"],"sourcesContent":["import { loadQueueWgsl } from \"@plasius/gpu-lock-free-queue\";\n\nexport const workerWgslUrl = (() => {\n if (typeof __IMPORT_META_URL__ !== \"undefined\") {\n return new URL(\"./worker.wgsl\", __IMPORT_META_URL__);\n }\n if (typeof __filename !== \"undefined\" && typeof require !== \"undefined\") {\n const { pathToFileURL } = require(\"node:url\");\n return new URL(\"./worker.wgsl\", pathToFileURL(__filename));\n }\n const base =\n typeof process !== \"undefined\" && process.cwd\n ? `file://${process.cwd()}/`\n : \"file:///\";\n return new URL(\"./worker.wgsl\", base);\n})();\n\nexport async function loadWorkerWgsl() {\n const response = await fetch(workerWgslUrl);\n return response.text();\n}\n\nexport async function assembleWorkerWgsl(workerWgsl) {\n const queueWgsl = await loadQueueWgsl();\n const body = workerWgsl ?? (await loadWorkerWgsl());\n return `${
|
|
1
|
+
{"version":3,"sources":["../src/index.js"],"sourcesContent":["import { loadQueueWgsl } from \"@plasius/gpu-lock-free-queue\";\n\nexport const workerWgslUrl = (() => {\n if (typeof __IMPORT_META_URL__ !== \"undefined\") {\n return new URL(\"./worker.wgsl\", __IMPORT_META_URL__);\n }\n if (typeof __filename !== \"undefined\" && typeof require !== \"undefined\") {\n const { pathToFileURL } = require(\"node:url\");\n return new URL(\"./worker.wgsl\", pathToFileURL(__filename));\n }\n const base =\n typeof process !== \"undefined\" && process.cwd\n ? `file://${process.cwd()}/`\n : \"file:///\";\n return new URL(\"./worker.wgsl\", base);\n})();\n\nexport async function loadWorkerWgsl() {\n const response = await fetch(workerWgslUrl);\n return response.text();\n}\n\nexport async function assembleWorkerWgsl(workerWgsl, options = {}) {\n const { queueWgsl, queueUrl, fetcher } = options ?? {};\n const queueSource =\n queueWgsl ?? (await loadQueueWgsl({ url: queueUrl, fetcher }));\n const body = workerWgsl ?? (await loadWorkerWgsl());\n return `${queueSource}\\n\\n${body}`;\n}\n"],"mappings":";;;;;;;;AAAA,SAAS,qBAAqB;AAEvB,IAAM,iBAAiB,MAAM;AAClC,MAAI,OAAO,oBAAwB,aAAa;AAC9C,WAAO,IAAI,IAAI,iBAAiB,eAAmB;AAAA,EACrD;AACA,MAAI,OAAO,eAAe,eAAe,OAAO,cAAY,aAAa;AACvE,UAAM,EAAE,cAAc,IAAI,UAAQ,KAAU;AAC5C,WAAO,IAAI,IAAI,iBAAiB,cAAc,UAAU,CAAC;AAAA,EAC3D;AACA,QAAM,OACJ,OAAO,YAAY,eAAe,QAAQ,MACtC,UAAU,QAAQ,IAAI,CAAC,MACvB;AACN,SAAO,IAAI,IAAI,iBAAiB,IAAI;AACtC,GAAG;AAEH,eAAsB,iBAAiB;AACrC,QAAM,WAAW,MAAM,MAAM,aAAa;AAC1C,SAAO,SAAS,KAAK;AACvB;AAEA,eAAsB,mBAAmB,YAAY,UAAU,CAAC,GAAG;AACjE,QAAM,EAAE,WAAW,UAAU,QAAQ,IAAI,WAAW,CAAC;AACrD,QAAM,cACJ,aAAc,MAAM,cAAc,EAAE,KAAK,UAAU,QAAQ,CAAC;AAC9D,QAAM,OAAO,cAAe,MAAM,eAAe;AACjD,SAAO,GAAG,WAAW;AAAA;AAAA,EAAO,IAAI;AAClC;","names":[]}
|
package/dist/worker.wgsl
CHANGED
|
@@ -1,62 +1,113 @@
|
|
|
1
|
-
struct
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
_pad1: f32,
|
|
1
|
+
struct SimParams {
|
|
2
|
+
count: u32,
|
|
3
|
+
steps: u32,
|
|
4
|
+
_pad0: vec2<u32>,
|
|
5
|
+
dt: f32,
|
|
6
|
+
range: f32,
|
|
7
|
+
_pad1: vec2<f32>,
|
|
8
|
+
bounds_min: vec4<f32>,
|
|
9
|
+
bounds_max: vec4<f32>,
|
|
10
|
+
sensor: vec4<f32>,
|
|
12
11
|
};
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
struct Instance {
|
|
14
|
+
pos: vec4<f32>,
|
|
15
|
+
half: vec4<f32>,
|
|
16
|
+
vel: vec4<f32>,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
struct Result {
|
|
20
|
+
aabb_min: vec4<f32>,
|
|
21
|
+
aabb_max: vec4<f32>,
|
|
22
|
+
sphere: vec4<f32>,
|
|
23
|
+
metrics: vec4<f32>,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
struct BounceResult {
|
|
27
|
+
pos: vec3<f32>,
|
|
28
|
+
vel: vec3<f32>,
|
|
29
|
+
mask: u32,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
@group(1) @binding(0) var<storage, read_write> instances: array<Instance>;
|
|
33
|
+
@group(1) @binding(1) var<storage, read_write> results: array<Result>;
|
|
34
|
+
@group(1) @binding(2) var<uniform> sim: SimParams;
|
|
35
|
+
@group(1) @binding(3) var<storage, read_write> stats: array<atomic<u32>>;
|
|
16
36
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
37
|
+
const STAT_IN_RANGE: u32 = 0u;
|
|
38
|
+
const STAT_FACE_CONTACTS: u32 = 1u;
|
|
39
|
+
const STAT_FACE_X_NEG: u32 = 2u;
|
|
40
|
+
const STAT_FACE_X_POS: u32 = 3u;
|
|
41
|
+
const STAT_FACE_Y_NEG: u32 = 4u;
|
|
42
|
+
const STAT_FACE_Y_POS: u32 = 5u;
|
|
43
|
+
const STAT_FACE_Z_NEG: u32 = 6u;
|
|
44
|
+
const STAT_FACE_Z_POS: u32 = 7u;
|
|
45
|
+
const STAT_BODY_CONTACTS: u32 = 8u;
|
|
46
|
+
|
|
47
|
+
const BODY_CONTACT_FLAG: u32 = 64u;
|
|
48
|
+
const COLLISION_SAMPLES: u32 = 24u;
|
|
49
|
+
const COLLISION_BRUTE_FORCE_MAX: u32 = 2048u;
|
|
50
|
+
const COLLISION_RESTITUTION: f32 = 0.65;
|
|
51
|
+
const COLLISION_PUSH: f32 = 0.5;
|
|
52
|
+
const COLLISION_EPSILON: f32 = 1e-5;
|
|
53
|
+
|
|
54
|
+
fn apply_bounds(pos: vec3<f32>, vel: vec3<f32>, half: vec3<f32>) -> BounceResult {
|
|
55
|
+
let min_bound = sim.bounds_min.xyz + half;
|
|
56
|
+
let max_bound = sim.bounds_max.xyz - half;
|
|
57
|
+
var p = pos;
|
|
58
|
+
var v = vel;
|
|
59
|
+
var mask: u32 = 0u;
|
|
60
|
+
|
|
61
|
+
if (p.x < min_bound.x) {
|
|
62
|
+
p.x = min_bound.x;
|
|
63
|
+
v.x = abs(v.x);
|
|
64
|
+
mask = mask | 1u;
|
|
25
65
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
66
|
+
if (p.x > max_bound.x) {
|
|
67
|
+
p.x = max_bound.x;
|
|
68
|
+
v.x = -abs(v.x);
|
|
69
|
+
mask = mask | 2u;
|
|
30
70
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
71
|
+
if (p.y < min_bound.y) {
|
|
72
|
+
p.y = min_bound.y;
|
|
73
|
+
v.y = abs(v.y);
|
|
74
|
+
mask = mask | 4u;
|
|
75
|
+
}
|
|
76
|
+
if (p.y > max_bound.y) {
|
|
77
|
+
p.y = max_bound.y;
|
|
78
|
+
v.y = -abs(v.y);
|
|
79
|
+
mask = mask | 8u;
|
|
80
|
+
}
|
|
81
|
+
if (p.z < min_bound.z) {
|
|
82
|
+
p.z = min_bound.z;
|
|
83
|
+
v.z = abs(v.z);
|
|
84
|
+
mask = mask | 16u;
|
|
85
|
+
}
|
|
86
|
+
if (p.z > max_bound.z) {
|
|
87
|
+
p.z = max_bound.z;
|
|
88
|
+
v.z = -abs(v.z);
|
|
89
|
+
mask = mask | 32u;
|
|
34
90
|
}
|
|
35
|
-
return -1.0;
|
|
36
|
-
}
|
|
37
91
|
|
|
38
|
-
|
|
39
|
-
let t = 0.5 * (dir.y + 1.0);
|
|
40
|
-
return mix(vec3<f32>(0.04, 0.05, 0.08), vec3<f32>(0.65, 0.78, 0.92), t);
|
|
92
|
+
return BounceResult(p, v, mask);
|
|
41
93
|
}
|
|
42
94
|
|
|
43
|
-
fn
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
95
|
+
fn hash_u32(x: u32) -> u32 {
|
|
96
|
+
var v = x;
|
|
97
|
+
v = v ^ (v >> 16u);
|
|
98
|
+
v = v * 0x7feb352du;
|
|
99
|
+
v = v ^ (v >> 15u);
|
|
100
|
+
v = v * 0x846ca68bu;
|
|
101
|
+
v = v ^ (v >> 16u);
|
|
102
|
+
return v;
|
|
48
103
|
}
|
|
49
104
|
|
|
50
|
-
fn
|
|
51
|
-
|
|
52
|
-
let r = u32(round(c.x * 255.0));
|
|
53
|
-
let g = u32(round(c.y * 255.0));
|
|
54
|
-
let b = u32(round(c.z * 255.0));
|
|
55
|
-
return (255u << 24) | (b << 16) | (g << 8) | r;
|
|
105
|
+
fn neighbor_index(seed: u32, salt: u32, count: u32) -> u32 {
|
|
106
|
+
return hash_u32(seed ^ salt) % count;
|
|
56
107
|
}
|
|
57
108
|
|
|
58
109
|
@compute @workgroup_size(64)
|
|
59
|
-
fn
|
|
110
|
+
fn simulate_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
60
111
|
let idx = gid.x;
|
|
61
112
|
if (idx >= params.job_count) {
|
|
62
113
|
return;
|
|
@@ -67,68 +118,148 @@ fn raytrace_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
67
118
|
return;
|
|
68
119
|
}
|
|
69
120
|
|
|
70
|
-
let
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
let
|
|
75
|
-
|
|
121
|
+
let payload_words = output_jobs[idx].payload_words;
|
|
122
|
+
if (payload_words == 0u) {
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
let payload_offset = output_jobs[idx].payload_offset;
|
|
126
|
+
if (payload_offset + payload_words > arrayLength(&input_payloads)) {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
let job = input_payloads[payload_offset];
|
|
130
|
+
if (job >= sim.count) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
76
133
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
let
|
|
134
|
+
var inst = instances[job];
|
|
135
|
+
var pos = inst.pos.xyz;
|
|
136
|
+
var vel = inst.vel.xyz;
|
|
137
|
+
let half = inst.half.xyz;
|
|
138
|
+
var face_mask: u32 = 0u;
|
|
139
|
+
var body_hits: u32 = 0u;
|
|
140
|
+
let self_radius = length(half);
|
|
81
141
|
|
|
82
|
-
for (var
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
let px = start_x + x;
|
|
89
|
-
if (px >= render.width) {
|
|
90
|
-
continue;
|
|
91
|
-
}
|
|
142
|
+
for (var step: u32 = 0u; step < sim.steps; step = step + 1u) {
|
|
143
|
+
pos = pos + vel * sim.dt;
|
|
144
|
+
let bounce = apply_bounds(pos, vel, half);
|
|
145
|
+
pos = bounce.pos;
|
|
146
|
+
vel = bounce.vel;
|
|
147
|
+
face_mask = face_mask | bounce.mask;
|
|
92
148
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
var t_hit = -1.0;
|
|
106
|
-
var base = vec3<f32>(0.0);
|
|
107
|
-
var center = vec3<f32>(0.0);
|
|
108
|
-
if (t1 > 0.0) {
|
|
109
|
-
t_hit = t1;
|
|
110
|
-
base = vec3<f32>(0.86, 0.42, 0.32);
|
|
111
|
-
center = vec3<f32>(0.0, 1.0, 0.0);
|
|
112
|
-
}
|
|
113
|
-
if (t2 > 0.0 && (t_hit < 0.0 || t2 < t_hit)) {
|
|
114
|
-
t_hit = t2;
|
|
115
|
-
base = vec3<f32>(0.2, 0.7, 0.9);
|
|
116
|
-
center = vec3<f32>(-1.6, 0.6, -0.5);
|
|
117
|
-
}
|
|
118
|
-
if (t3 > 0.0 && (t_hit < 0.0 || t3 < t_hit)) {
|
|
119
|
-
t_hit = t3;
|
|
120
|
-
base = vec3<f32>(0.32, 0.3, 0.26);
|
|
121
|
-
center = vec3<f32>(0.0, -1000.0, 0.0);
|
|
122
|
-
}
|
|
149
|
+
if (sim.count <= COLLISION_BRUTE_FORCE_MAX) {
|
|
150
|
+
for (var other_idx: u32 = 0u; other_idx < sim.count; other_idx = other_idx + 1u) {
|
|
151
|
+
if (other_idx == job) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
let other = instances[other_idx];
|
|
155
|
+
let other_pos = other.pos.xyz;
|
|
156
|
+
let other_vel = other.vel.xyz;
|
|
157
|
+
let other_radius = length(other.half.xyz);
|
|
158
|
+
let delta = pos - other_pos;
|
|
159
|
+
let dist_sq = dot(delta, delta);
|
|
160
|
+
let min_dist = self_radius + other_radius;
|
|
123
161
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
162
|
+
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
163
|
+
let dist = sqrt(dist_sq);
|
|
164
|
+
let normal = delta / dist;
|
|
165
|
+
let overlap = min_dist - dist;
|
|
166
|
+
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
167
|
+
let rel_vel = vel - other_vel;
|
|
168
|
+
let approach = dot(rel_vel, normal);
|
|
169
|
+
if (approach < 0.0) {
|
|
170
|
+
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
171
|
+
}
|
|
172
|
+
body_hits = body_hits + 1u;
|
|
173
|
+
}
|
|
128
174
|
}
|
|
175
|
+
} else {
|
|
176
|
+
let step_seed = job ^ (step * 0x9e3779b9u);
|
|
177
|
+
for (var sample: u32 = 0u; sample < COLLISION_SAMPLES; sample = sample + 1u) {
|
|
178
|
+
let neighbor = neighbor_index(
|
|
179
|
+
step_seed,
|
|
180
|
+
sample * 0x85ebca6bu + 0x27d4eb2du,
|
|
181
|
+
sim.count,
|
|
182
|
+
);
|
|
183
|
+
if (neighbor == job) {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
129
186
|
|
|
130
|
-
|
|
131
|
-
|
|
187
|
+
let other = instances[neighbor];
|
|
188
|
+
let other_pos = other.pos.xyz;
|
|
189
|
+
let other_vel = other.vel.xyz;
|
|
190
|
+
let other_radius = length(other.half.xyz);
|
|
191
|
+
let delta = pos - other_pos;
|
|
192
|
+
let dist_sq = dot(delta, delta);
|
|
193
|
+
let min_dist = self_radius + other_radius;
|
|
194
|
+
|
|
195
|
+
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
196
|
+
let dist = sqrt(dist_sq);
|
|
197
|
+
let normal = delta / dist;
|
|
198
|
+
let overlap = min_dist - dist;
|
|
199
|
+
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
200
|
+
let rel_vel = vel - other_vel;
|
|
201
|
+
let approach = dot(rel_vel, normal);
|
|
202
|
+
if (approach < 0.0) {
|
|
203
|
+
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
204
|
+
}
|
|
205
|
+
body_hits = body_hits + 1u;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
132
208
|
}
|
|
209
|
+
|
|
210
|
+
let bounce2 = apply_bounds(pos, vel, half);
|
|
211
|
+
pos = bounce2.pos;
|
|
212
|
+
vel = bounce2.vel;
|
|
213
|
+
face_mask = face_mask | bounce2.mask;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let aabb_min = pos - half;
|
|
217
|
+
let aabb_max = pos + half;
|
|
218
|
+
let radius = length(half);
|
|
219
|
+
let dist = length(pos - sim.sensor.xyz);
|
|
220
|
+
var in_range: f32 = 0.0;
|
|
221
|
+
if (dist <= sim.range + radius) {
|
|
222
|
+
in_range = 1.0;
|
|
223
|
+
}
|
|
224
|
+
let speed = length(vel);
|
|
225
|
+
|
|
226
|
+
let boundary_mask = face_mask;
|
|
227
|
+
let face_hits = countOneBits(boundary_mask);
|
|
228
|
+
if (in_range > 0.0) {
|
|
229
|
+
atomicAdd(&stats[STAT_IN_RANGE], 1u);
|
|
133
230
|
}
|
|
231
|
+
if (face_hits > 0u) {
|
|
232
|
+
atomicAdd(&stats[STAT_FACE_CONTACTS], face_hits);
|
|
233
|
+
}
|
|
234
|
+
if ((boundary_mask & 1u) != 0u) {
|
|
235
|
+
atomicAdd(&stats[STAT_FACE_X_NEG], 1u);
|
|
236
|
+
}
|
|
237
|
+
if ((boundary_mask & 2u) != 0u) {
|
|
238
|
+
atomicAdd(&stats[STAT_FACE_X_POS], 1u);
|
|
239
|
+
}
|
|
240
|
+
if ((boundary_mask & 4u) != 0u) {
|
|
241
|
+
atomicAdd(&stats[STAT_FACE_Y_NEG], 1u);
|
|
242
|
+
}
|
|
243
|
+
if ((boundary_mask & 8u) != 0u) {
|
|
244
|
+
atomicAdd(&stats[STAT_FACE_Y_POS], 1u);
|
|
245
|
+
}
|
|
246
|
+
if ((boundary_mask & 16u) != 0u) {
|
|
247
|
+
atomicAdd(&stats[STAT_FACE_Z_NEG], 1u);
|
|
248
|
+
}
|
|
249
|
+
if ((boundary_mask & 32u) != 0u) {
|
|
250
|
+
atomicAdd(&stats[STAT_FACE_Z_POS], 1u);
|
|
251
|
+
}
|
|
252
|
+
if (body_hits > 0u) {
|
|
253
|
+
atomicAdd(&stats[STAT_BODY_CONTACTS], body_hits);
|
|
254
|
+
face_mask = boundary_mask | BODY_CONTACT_FLAG;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
inst.pos = vec4<f32>(pos, 1.0);
|
|
258
|
+
inst.vel = vec4<f32>(vel, 0.0);
|
|
259
|
+
instances[job] = inst;
|
|
260
|
+
|
|
261
|
+
results[job].aabb_min = vec4<f32>(aabb_min, 0.0);
|
|
262
|
+
results[job].aabb_max = vec4<f32>(aabb_max, 0.0);
|
|
263
|
+
results[job].sphere = vec4<f32>(pos, radius);
|
|
264
|
+
results[job].metrics = vec4<f32>(dist, speed, in_range, f32(face_mask));
|
|
134
265
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@plasius/gpu-worker",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.1",
|
|
4
4
|
"description": "WebGPU worker runtime with a lock-free job queue for WGSL workloads.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"sideEffects": false,
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
"scripts": {
|
|
27
27
|
"build": "tsup && cp src/worker.wgsl dist/worker.wgsl",
|
|
28
28
|
"demo": "python3 -m http.server",
|
|
29
|
+
"demo:https": "node demo/serve-https.js",
|
|
29
30
|
"test": "npm run test:unit",
|
|
30
31
|
"test:unit": "node --test",
|
|
31
32
|
"test:e2e": "npx playwright install chromium && playwright test",
|
|
@@ -46,7 +47,7 @@
|
|
|
46
47
|
"author": "Plasius LTD <development@plasius.co.uk>",
|
|
47
48
|
"license": "Apache-2.0",
|
|
48
49
|
"dependencies": {
|
|
49
|
-
"@plasius/gpu-lock-free-queue": "^0.
|
|
50
|
+
"@plasius/gpu-lock-free-queue": "^0.2.1"
|
|
50
51
|
},
|
|
51
52
|
"devDependencies": {
|
|
52
53
|
"@playwright/test": "^1.57.0",
|
package/src/index.js
CHANGED
|
@@ -20,8 +20,10 @@ export async function loadWorkerWgsl() {
|
|
|
20
20
|
return response.text();
|
|
21
21
|
}
|
|
22
22
|
|
|
23
|
-
export async function assembleWorkerWgsl(workerWgsl) {
|
|
24
|
-
const queueWgsl =
|
|
23
|
+
export async function assembleWorkerWgsl(workerWgsl, options = {}) {
|
|
24
|
+
const { queueWgsl, queueUrl, fetcher } = options ?? {};
|
|
25
|
+
const queueSource =
|
|
26
|
+
queueWgsl ?? (await loadQueueWgsl({ url: queueUrl, fetcher }));
|
|
25
27
|
const body = workerWgsl ?? (await loadWorkerWgsl());
|
|
26
|
-
return `${
|
|
28
|
+
return `${queueSource}\n\n${body}`;
|
|
27
29
|
}
|
package/src/worker.wgsl
CHANGED
|
@@ -1,62 +1,113 @@
|
|
|
1
|
-
struct
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
_pad1: f32,
|
|
1
|
+
struct SimParams {
|
|
2
|
+
count: u32,
|
|
3
|
+
steps: u32,
|
|
4
|
+
_pad0: vec2<u32>,
|
|
5
|
+
dt: f32,
|
|
6
|
+
range: f32,
|
|
7
|
+
_pad1: vec2<f32>,
|
|
8
|
+
bounds_min: vec4<f32>,
|
|
9
|
+
bounds_max: vec4<f32>,
|
|
10
|
+
sensor: vec4<f32>,
|
|
12
11
|
};
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
struct Instance {
|
|
14
|
+
pos: vec4<f32>,
|
|
15
|
+
half: vec4<f32>,
|
|
16
|
+
vel: vec4<f32>,
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
struct Result {
|
|
20
|
+
aabb_min: vec4<f32>,
|
|
21
|
+
aabb_max: vec4<f32>,
|
|
22
|
+
sphere: vec4<f32>,
|
|
23
|
+
metrics: vec4<f32>,
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
struct BounceResult {
|
|
27
|
+
pos: vec3<f32>,
|
|
28
|
+
vel: vec3<f32>,
|
|
29
|
+
mask: u32,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
@group(1) @binding(0) var<storage, read_write> instances: array<Instance>;
|
|
33
|
+
@group(1) @binding(1) var<storage, read_write> results: array<Result>;
|
|
34
|
+
@group(1) @binding(2) var<uniform> sim: SimParams;
|
|
35
|
+
@group(1) @binding(3) var<storage, read_write> stats: array<atomic<u32>>;
|
|
16
36
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
37
|
+
const STAT_IN_RANGE: u32 = 0u;
|
|
38
|
+
const STAT_FACE_CONTACTS: u32 = 1u;
|
|
39
|
+
const STAT_FACE_X_NEG: u32 = 2u;
|
|
40
|
+
const STAT_FACE_X_POS: u32 = 3u;
|
|
41
|
+
const STAT_FACE_Y_NEG: u32 = 4u;
|
|
42
|
+
const STAT_FACE_Y_POS: u32 = 5u;
|
|
43
|
+
const STAT_FACE_Z_NEG: u32 = 6u;
|
|
44
|
+
const STAT_FACE_Z_POS: u32 = 7u;
|
|
45
|
+
const STAT_BODY_CONTACTS: u32 = 8u;
|
|
46
|
+
|
|
47
|
+
const BODY_CONTACT_FLAG: u32 = 64u;
|
|
48
|
+
const COLLISION_SAMPLES: u32 = 24u;
|
|
49
|
+
const COLLISION_BRUTE_FORCE_MAX: u32 = 2048u;
|
|
50
|
+
const COLLISION_RESTITUTION: f32 = 0.65;
|
|
51
|
+
const COLLISION_PUSH: f32 = 0.5;
|
|
52
|
+
const COLLISION_EPSILON: f32 = 1e-5;
|
|
53
|
+
|
|
54
|
+
fn apply_bounds(pos: vec3<f32>, vel: vec3<f32>, half: vec3<f32>) -> BounceResult {
|
|
55
|
+
let min_bound = sim.bounds_min.xyz + half;
|
|
56
|
+
let max_bound = sim.bounds_max.xyz - half;
|
|
57
|
+
var p = pos;
|
|
58
|
+
var v = vel;
|
|
59
|
+
var mask: u32 = 0u;
|
|
60
|
+
|
|
61
|
+
if (p.x < min_bound.x) {
|
|
62
|
+
p.x = min_bound.x;
|
|
63
|
+
v.x = abs(v.x);
|
|
64
|
+
mask = mask | 1u;
|
|
25
65
|
}
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
66
|
+
if (p.x > max_bound.x) {
|
|
67
|
+
p.x = max_bound.x;
|
|
68
|
+
v.x = -abs(v.x);
|
|
69
|
+
mask = mask | 2u;
|
|
30
70
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
71
|
+
if (p.y < min_bound.y) {
|
|
72
|
+
p.y = min_bound.y;
|
|
73
|
+
v.y = abs(v.y);
|
|
74
|
+
mask = mask | 4u;
|
|
75
|
+
}
|
|
76
|
+
if (p.y > max_bound.y) {
|
|
77
|
+
p.y = max_bound.y;
|
|
78
|
+
v.y = -abs(v.y);
|
|
79
|
+
mask = mask | 8u;
|
|
80
|
+
}
|
|
81
|
+
if (p.z < min_bound.z) {
|
|
82
|
+
p.z = min_bound.z;
|
|
83
|
+
v.z = abs(v.z);
|
|
84
|
+
mask = mask | 16u;
|
|
85
|
+
}
|
|
86
|
+
if (p.z > max_bound.z) {
|
|
87
|
+
p.z = max_bound.z;
|
|
88
|
+
v.z = -abs(v.z);
|
|
89
|
+
mask = mask | 32u;
|
|
34
90
|
}
|
|
35
|
-
return -1.0;
|
|
36
|
-
}
|
|
37
91
|
|
|
38
|
-
|
|
39
|
-
let t = 0.5 * (dir.y + 1.0);
|
|
40
|
-
return mix(vec3<f32>(0.04, 0.05, 0.08), vec3<f32>(0.65, 0.78, 0.92), t);
|
|
92
|
+
return BounceResult(p, v, mask);
|
|
41
93
|
}
|
|
42
94
|
|
|
43
|
-
fn
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
95
|
+
fn hash_u32(x: u32) -> u32 {
|
|
96
|
+
var v = x;
|
|
97
|
+
v = v ^ (v >> 16u);
|
|
98
|
+
v = v * 0x7feb352du;
|
|
99
|
+
v = v ^ (v >> 15u);
|
|
100
|
+
v = v * 0x846ca68bu;
|
|
101
|
+
v = v ^ (v >> 16u);
|
|
102
|
+
return v;
|
|
48
103
|
}
|
|
49
104
|
|
|
50
|
-
fn
|
|
51
|
-
|
|
52
|
-
let r = u32(round(c.x * 255.0));
|
|
53
|
-
let g = u32(round(c.y * 255.0));
|
|
54
|
-
let b = u32(round(c.z * 255.0));
|
|
55
|
-
return (255u << 24) | (b << 16) | (g << 8) | r;
|
|
105
|
+
fn neighbor_index(seed: u32, salt: u32, count: u32) -> u32 {
|
|
106
|
+
return hash_u32(seed ^ salt) % count;
|
|
56
107
|
}
|
|
57
108
|
|
|
58
109
|
@compute @workgroup_size(64)
|
|
59
|
-
fn
|
|
110
|
+
fn simulate_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
60
111
|
let idx = gid.x;
|
|
61
112
|
if (idx >= params.job_count) {
|
|
62
113
|
return;
|
|
@@ -67,68 +118,148 @@ fn raytrace_main(@builtin(global_invocation_id) gid: vec3<u32>) {
|
|
|
67
118
|
return;
|
|
68
119
|
}
|
|
69
120
|
|
|
70
|
-
let
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
let
|
|
75
|
-
|
|
121
|
+
let payload_words = output_jobs[idx].payload_words;
|
|
122
|
+
if (payload_words == 0u) {
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
let payload_offset = output_jobs[idx].payload_offset;
|
|
126
|
+
if (payload_offset + payload_words > arrayLength(&input_payloads)) {
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
let job = input_payloads[payload_offset];
|
|
130
|
+
if (job >= sim.count) {
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
76
133
|
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
let
|
|
134
|
+
var inst = instances[job];
|
|
135
|
+
var pos = inst.pos.xyz;
|
|
136
|
+
var vel = inst.vel.xyz;
|
|
137
|
+
let half = inst.half.xyz;
|
|
138
|
+
var face_mask: u32 = 0u;
|
|
139
|
+
var body_hits: u32 = 0u;
|
|
140
|
+
let self_radius = length(half);
|
|
81
141
|
|
|
82
|
-
for (var
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
let px = start_x + x;
|
|
89
|
-
if (px >= render.width) {
|
|
90
|
-
continue;
|
|
91
|
-
}
|
|
142
|
+
for (var step: u32 = 0u; step < sim.steps; step = step + 1u) {
|
|
143
|
+
pos = pos + vel * sim.dt;
|
|
144
|
+
let bounce = apply_bounds(pos, vel, half);
|
|
145
|
+
pos = bounce.pos;
|
|
146
|
+
vel = bounce.vel;
|
|
147
|
+
face_mask = face_mask | bounce.mask;
|
|
92
148
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
var t_hit = -1.0;
|
|
106
|
-
var base = vec3<f32>(0.0);
|
|
107
|
-
var center = vec3<f32>(0.0);
|
|
108
|
-
if (t1 > 0.0) {
|
|
109
|
-
t_hit = t1;
|
|
110
|
-
base = vec3<f32>(0.86, 0.42, 0.32);
|
|
111
|
-
center = vec3<f32>(0.0, 1.0, 0.0);
|
|
112
|
-
}
|
|
113
|
-
if (t2 > 0.0 && (t_hit < 0.0 || t2 < t_hit)) {
|
|
114
|
-
t_hit = t2;
|
|
115
|
-
base = vec3<f32>(0.2, 0.7, 0.9);
|
|
116
|
-
center = vec3<f32>(-1.6, 0.6, -0.5);
|
|
117
|
-
}
|
|
118
|
-
if (t3 > 0.0 && (t_hit < 0.0 || t3 < t_hit)) {
|
|
119
|
-
t_hit = t3;
|
|
120
|
-
base = vec3<f32>(0.32, 0.3, 0.26);
|
|
121
|
-
center = vec3<f32>(0.0, -1000.0, 0.0);
|
|
122
|
-
}
|
|
149
|
+
if (sim.count <= COLLISION_BRUTE_FORCE_MAX) {
|
|
150
|
+
for (var other_idx: u32 = 0u; other_idx < sim.count; other_idx = other_idx + 1u) {
|
|
151
|
+
if (other_idx == job) {
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
let other = instances[other_idx];
|
|
155
|
+
let other_pos = other.pos.xyz;
|
|
156
|
+
let other_vel = other.vel.xyz;
|
|
157
|
+
let other_radius = length(other.half.xyz);
|
|
158
|
+
let delta = pos - other_pos;
|
|
159
|
+
let dist_sq = dot(delta, delta);
|
|
160
|
+
let min_dist = self_radius + other_radius;
|
|
123
161
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
162
|
+
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
163
|
+
let dist = sqrt(dist_sq);
|
|
164
|
+
let normal = delta / dist;
|
|
165
|
+
let overlap = min_dist - dist;
|
|
166
|
+
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
167
|
+
let rel_vel = vel - other_vel;
|
|
168
|
+
let approach = dot(rel_vel, normal);
|
|
169
|
+
if (approach < 0.0) {
|
|
170
|
+
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
171
|
+
}
|
|
172
|
+
body_hits = body_hits + 1u;
|
|
173
|
+
}
|
|
128
174
|
}
|
|
175
|
+
} else {
|
|
176
|
+
let step_seed = job ^ (step * 0x9e3779b9u);
|
|
177
|
+
for (var sample: u32 = 0u; sample < COLLISION_SAMPLES; sample = sample + 1u) {
|
|
178
|
+
let neighbor = neighbor_index(
|
|
179
|
+
step_seed,
|
|
180
|
+
sample * 0x85ebca6bu + 0x27d4eb2du,
|
|
181
|
+
sim.count,
|
|
182
|
+
);
|
|
183
|
+
if (neighbor == job) {
|
|
184
|
+
continue;
|
|
185
|
+
}
|
|
129
186
|
|
|
130
|
-
|
|
131
|
-
|
|
187
|
+
let other = instances[neighbor];
|
|
188
|
+
let other_pos = other.pos.xyz;
|
|
189
|
+
let other_vel = other.vel.xyz;
|
|
190
|
+
let other_radius = length(other.half.xyz);
|
|
191
|
+
let delta = pos - other_pos;
|
|
192
|
+
let dist_sq = dot(delta, delta);
|
|
193
|
+
let min_dist = self_radius + other_radius;
|
|
194
|
+
|
|
195
|
+
if (dist_sq < min_dist * min_dist && dist_sq > COLLISION_EPSILON) {
|
|
196
|
+
let dist = sqrt(dist_sq);
|
|
197
|
+
let normal = delta / dist;
|
|
198
|
+
let overlap = min_dist - dist;
|
|
199
|
+
pos = pos + normal * (overlap * COLLISION_PUSH);
|
|
200
|
+
let rel_vel = vel - other_vel;
|
|
201
|
+
let approach = dot(rel_vel, normal);
|
|
202
|
+
if (approach < 0.0) {
|
|
203
|
+
vel = vel - normal * (approach * (1.0 + COLLISION_RESTITUTION)) * 0.5;
|
|
204
|
+
}
|
|
205
|
+
body_hits = body_hits + 1u;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
132
208
|
}
|
|
209
|
+
|
|
210
|
+
let bounce2 = apply_bounds(pos, vel, half);
|
|
211
|
+
pos = bounce2.pos;
|
|
212
|
+
vel = bounce2.vel;
|
|
213
|
+
face_mask = face_mask | bounce2.mask;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
let aabb_min = pos - half;
|
|
217
|
+
let aabb_max = pos + half;
|
|
218
|
+
let radius = length(half);
|
|
219
|
+
let dist = length(pos - sim.sensor.xyz);
|
|
220
|
+
var in_range: f32 = 0.0;
|
|
221
|
+
if (dist <= sim.range + radius) {
|
|
222
|
+
in_range = 1.0;
|
|
223
|
+
}
|
|
224
|
+
let speed = length(vel);
|
|
225
|
+
|
|
226
|
+
let boundary_mask = face_mask;
|
|
227
|
+
let face_hits = countOneBits(boundary_mask);
|
|
228
|
+
if (in_range > 0.0) {
|
|
229
|
+
atomicAdd(&stats[STAT_IN_RANGE], 1u);
|
|
133
230
|
}
|
|
231
|
+
if (face_hits > 0u) {
|
|
232
|
+
atomicAdd(&stats[STAT_FACE_CONTACTS], face_hits);
|
|
233
|
+
}
|
|
234
|
+
if ((boundary_mask & 1u) != 0u) {
|
|
235
|
+
atomicAdd(&stats[STAT_FACE_X_NEG], 1u);
|
|
236
|
+
}
|
|
237
|
+
if ((boundary_mask & 2u) != 0u) {
|
|
238
|
+
atomicAdd(&stats[STAT_FACE_X_POS], 1u);
|
|
239
|
+
}
|
|
240
|
+
if ((boundary_mask & 4u) != 0u) {
|
|
241
|
+
atomicAdd(&stats[STAT_FACE_Y_NEG], 1u);
|
|
242
|
+
}
|
|
243
|
+
if ((boundary_mask & 8u) != 0u) {
|
|
244
|
+
atomicAdd(&stats[STAT_FACE_Y_POS], 1u);
|
|
245
|
+
}
|
|
246
|
+
if ((boundary_mask & 16u) != 0u) {
|
|
247
|
+
atomicAdd(&stats[STAT_FACE_Z_NEG], 1u);
|
|
248
|
+
}
|
|
249
|
+
if ((boundary_mask & 32u) != 0u) {
|
|
250
|
+
atomicAdd(&stats[STAT_FACE_Z_POS], 1u);
|
|
251
|
+
}
|
|
252
|
+
if (body_hits > 0u) {
|
|
253
|
+
atomicAdd(&stats[STAT_BODY_CONTACTS], body_hits);
|
|
254
|
+
face_mask = boundary_mask | BODY_CONTACT_FLAG;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
inst.pos = vec4<f32>(pos, 1.0);
|
|
258
|
+
inst.vel = vec4<f32>(vel, 0.0);
|
|
259
|
+
instances[job] = inst;
|
|
260
|
+
|
|
261
|
+
results[job].aabb_min = vec4<f32>(aabb_min, 0.0);
|
|
262
|
+
results[job].aabb_max = vec4<f32>(aabb_max, 0.0);
|
|
263
|
+
results[job].sphere = vec4<f32>(pos, radius);
|
|
264
|
+
results[job].metrics = vec4<f32>(dist, speed, in_range, f32(face_mask));
|
|
134
265
|
}
|