@holoscript/engine 6.0.3 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/AutoMesher-CK47F6AV.js +17 -0
- package/dist/GPUBuffers-2LHBCD7X.js +9 -0
- package/dist/WebGPUContext-TNEUYU2Y.js +11 -0
- package/dist/animation/index.cjs +38 -38
- package/dist/animation/index.d.cts +1 -1
- package/dist/animation/index.d.ts +1 -1
- package/dist/animation/index.js +1 -1
- package/dist/audio/index.cjs +16 -6
- package/dist/audio/index.d.cts +1 -1
- package/dist/audio/index.d.ts +1 -1
- package/dist/audio/index.js +1 -1
- package/dist/camera/index.cjs +23 -23
- package/dist/camera/index.d.cts +1 -1
- package/dist/camera/index.d.ts +1 -1
- package/dist/camera/index.js +1 -1
- package/dist/character/index.cjs +6 -4
- package/dist/character/index.js +1 -1
- package/dist/choreography/index.cjs +1194 -0
- package/dist/choreography/index.d.cts +687 -0
- package/dist/choreography/index.d.ts +687 -0
- package/dist/choreography/index.js +1156 -0
- package/dist/chunk-2CSNRI2N.js +217 -0
- package/dist/chunk-33T2WINR.js +266 -0
- package/dist/chunk-35R73OFM.js +1257 -0
- package/dist/chunk-4MMDSUNP.js +1256 -0
- package/dist/chunk-5V6HOU72.js +319 -0
- package/dist/chunk-6QOP6PYF.js +1038 -0
- package/dist/chunk-7KMJVHIL.js +8944 -0
- package/dist/chunk-7VPUC62U.js +1106 -0
- package/dist/chunk-A2Y6RCAT.js +1878 -0
- package/dist/chunk-AHM42MK6.js +8944 -0
- package/dist/chunk-BL7IDTHE.js +218 -0
- package/dist/chunk-CITOMSWL.js +10462 -0
- package/dist/chunk-CXDPKW2K.js +8944 -0
- package/dist/chunk-CXZPLD4S.js +223 -0
- package/dist/chunk-CZYJE7IH.js +5169 -0
- package/dist/chunk-D2OP7YC7.js +6325 -0
- package/dist/chunk-EDRVQHUU.js +1544 -0
- package/dist/chunk-EJSLOOW2.js +3589 -0
- package/dist/chunk-F53SFGW5.js +1878 -0
- package/dist/chunk-HCFPELPY.js +919 -0
- package/dist/chunk-HNEE36PY.js +93 -0
- package/dist/chunk-HYXNV36F.js +1256 -0
- package/dist/chunk-IB7KHVFY.js +821 -0
- package/dist/chunk-IBBO7YYG.js +690 -0
- package/dist/chunk-ILIBGINU.js +5470 -0
- package/dist/chunk-IS4MHLKN.js +5479 -0
- package/dist/chunk-JT2PFKWD.js +5479 -0
- package/dist/chunk-K4CUB4NY.js +1038 -0
- package/dist/chunk-KATDQXRJ.js +10462 -0
- package/dist/chunk-KBQE6ZFJ.js +8944 -0
- package/dist/chunk-KBVD5K7E.js +560 -0
- package/dist/chunk-KCDPVQRY.js +4088 -0
- package/dist/chunk-KN4QJPKN.js +8944 -0
- package/dist/chunk-KWJ3ROSI.js +8944 -0
- package/dist/chunk-L45VF6DD.js +919 -0
- package/dist/chunk-LY4T37YK.js +307 -0
- package/dist/chunk-MDN5WZXA.js +1544 -0
- package/dist/chunk-MGCDP6VU.js +928 -0
- package/dist/chunk-NCX7X6G2.js +8681 -0
- package/dist/chunk-OF54BPVD.js +913 -0
- package/dist/chunk-OWSN2Q3Q.js +690 -0
- package/dist/chunk-PRRB5TTA.js +406 -0
- package/dist/chunk-PXWVQF76.js +4086 -0
- package/dist/chunk-PYCOIDT2.js +812 -0
- package/dist/chunk-PZCSADOV.js +928 -0
- package/dist/chunk-Q2XBVS2K.js +1038 -0
- package/dist/chunk-QDZRXWN5.js +1776 -0
- package/dist/chunk-RNWOZ6WQ.js +913 -0
- package/dist/chunk-ROLFT4CJ.js +1693 -0
- package/dist/chunk-SLTJRZ2N.js +266 -0
- package/dist/chunk-SRUS5XSU.js +4088 -0
- package/dist/chunk-TKCA3WZ5.js +5409 -0
- package/dist/chunk-TNRMXYI2.js +1650 -0
- package/dist/chunk-TQB3GJGM.js +9763 -0
- package/dist/chunk-TUFGXG6K.js +510 -0
- package/dist/chunk-U6KMTGQJ.js +632 -0
- package/dist/chunk-VMGJQST6.js +8681 -0
- package/dist/chunk-X4F4TCG4.js +5470 -0
- package/dist/chunk-ZIFROE75.js +1544 -0
- package/dist/chunk-ZIJQYHSQ.js +1204 -0
- package/dist/combat/index.cjs +4 -4
- package/dist/combat/index.d.cts +1 -1
- package/dist/combat/index.d.ts +1 -1
- package/dist/combat/index.js +1 -1
- package/dist/ecs/index.cjs +1 -1
- package/dist/ecs/index.js +1 -1
- package/dist/environment/index.cjs +14 -14
- package/dist/environment/index.d.cts +1 -1
- package/dist/environment/index.d.ts +1 -1
- package/dist/environment/index.js +1 -1
- package/dist/gpu/index.cjs +4810 -0
- package/dist/gpu/index.js +3714 -0
- package/dist/hologram/index.cjs +27 -1
- package/dist/hologram/index.js +1 -1
- package/dist/index-B2PIsAmR.d.cts +2180 -0
- package/dist/index-B2PIsAmR.d.ts +2180 -0
- package/dist/index-BHySEPX7.d.cts +2921 -0
- package/dist/index-BJV21zuy.d.cts +341 -0
- package/dist/index-BJV21zuy.d.ts +341 -0
- package/dist/index-BQutTphC.d.cts +790 -0
- package/dist/index-ByIq2XrS.d.cts +3910 -0
- package/dist/index-BysHjDSO.d.cts +224 -0
- package/dist/index-BysHjDSO.d.ts +224 -0
- package/dist/index-CKwAJGck.d.ts +455 -0
- package/dist/index-CUl3QstQ.d.cts +3006 -0
- package/dist/index-CUl3QstQ.d.ts +3006 -0
- package/dist/index-CmYtNiI-.d.cts +953 -0
- package/dist/index-CmYtNiI-.d.ts +953 -0
- package/dist/index-CnRzWxi_.d.cts +522 -0
- package/dist/index-CnRzWxi_.d.ts +522 -0
- package/dist/index-CwRWbSC7.d.ts +2921 -0
- package/dist/index-CxKIBstO.d.ts +790 -0
- package/dist/index-DJ6-R8vh.d.cts +455 -0
- package/dist/index-DQKisbcI.d.cts +4968 -0
- package/dist/index-DQKisbcI.d.ts +4968 -0
- package/dist/index-DRT2zJez.d.ts +3910 -0
- package/dist/index-DfNLiAka.d.cts +192 -0
- package/dist/index-DfNLiAka.d.ts +192 -0
- package/dist/index-nMvkoRm8.d.cts +405 -0
- package/dist/index-nMvkoRm8.d.ts +405 -0
- package/dist/index-s9yOFU37.d.cts +604 -0
- package/dist/index-s9yOFU37.d.ts +604 -0
- package/dist/index.cjs +22966 -6960
- package/dist/index.d.cts +864 -20
- package/dist/index.d.ts +864 -20
- package/dist/index.js +3062 -48
- package/dist/input/index.cjs +1 -1
- package/dist/input/index.js +1 -1
- package/dist/orbital/index.cjs +3 -3
- package/dist/orbital/index.d.cts +1 -1
- package/dist/orbital/index.d.ts +1 -1
- package/dist/orbital/index.js +1 -1
- package/dist/particles/index.cjs +16 -16
- package/dist/particles/index.d.cts +1 -1
- package/dist/particles/index.d.ts +1 -1
- package/dist/particles/index.js +1 -1
- package/dist/physics/index.cjs +2377 -21
- package/dist/physics/index.d.cts +1 -1
- package/dist/physics/index.d.ts +1 -1
- package/dist/physics/index.js +35 -1
- package/dist/postfx/index.cjs +3491 -0
- package/dist/postfx/index.js +93 -0
- package/dist/procedural/index.cjs +1 -1
- package/dist/procedural/index.js +1 -1
- package/dist/puppeteer-5VF6KDVO.js +52197 -0
- package/dist/puppeteer-IZVZ3SG4.js +52197 -0
- package/dist/rendering/index.cjs +33 -32
- package/dist/rendering/index.d.cts +1 -1
- package/dist/rendering/index.d.ts +1 -1
- package/dist/rendering/index.js +8 -6
- package/dist/runtime/index.cjs +23 -13
- package/dist/runtime/index.d.cts +1 -1
- package/dist/runtime/index.d.ts +1 -1
- package/dist/runtime/index.js +8 -6
- package/dist/runtime/protocols/index.cjs +349 -0
- package/dist/runtime/protocols/index.js +15 -0
- package/dist/scene/index.cjs +8 -8
- package/dist/scene/index.d.cts +1 -1
- package/dist/scene/index.d.ts +1 -1
- package/dist/scene/index.js +1 -1
- package/dist/shader/index.cjs +3087 -0
- package/dist/shader/index.js +3044 -0
- package/dist/simulation/index.cjs +10680 -0
- package/dist/simulation/index.d.cts +3 -0
- package/dist/simulation/index.d.ts +3 -0
- package/dist/simulation/index.js +307 -0
- package/dist/spatial/index.cjs +2443 -0
- package/dist/spatial/index.d.cts +1545 -0
- package/dist/spatial/index.d.ts +1545 -0
- package/dist/spatial/index.js +2400 -0
- package/dist/terrain/index.cjs +1 -1
- package/dist/terrain/index.d.cts +1 -1
- package/dist/terrain/index.d.ts +1 -1
- package/dist/terrain/index.js +1 -1
- package/dist/transformers.node-4NKAPD5U.js +45620 -0
- package/dist/vm/index.cjs +7 -8
- package/dist/vm/index.d.cts +1 -1
- package/dist/vm/index.d.ts +1 -1
- package/dist/vm/index.js +1 -1
- package/dist/vm-bridge/index.cjs +2 -2
- package/dist/vm-bridge/index.d.cts +2 -2
- package/dist/vm-bridge/index.d.ts +2 -2
- package/dist/vm-bridge/index.js +1 -1
- package/dist/vr/index.cjs +6 -6
- package/dist/vr/index.js +1 -1
- package/dist/world/index.cjs +3 -3
- package/dist/world/index.d.cts +1 -1
- package/dist/world/index.d.ts +1 -1
- package/dist/world/index.js +1 -1
- package/package.json +53 -21
- package/LICENSE +0 -21
|
@@ -0,0 +1,4810 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
6
|
+
var __esm = (fn, res) => function __init() {
|
|
7
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
8
|
+
};
|
|
9
|
+
var __export = (target, all) => {
|
|
10
|
+
for (var name in all)
|
|
11
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
12
|
+
};
|
|
13
|
+
var __copyProps = (to, from, except, desc) => {
|
|
14
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
15
|
+
for (let key of __getOwnPropNames(from))
|
|
16
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
17
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
18
|
+
}
|
|
19
|
+
return to;
|
|
20
|
+
};
|
|
21
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
22
|
+
|
|
23
|
+
// src/gpu/WebGPUContext.ts
|
|
24
|
+
var WebGPUContext_exports = {};
|
|
25
|
+
__export(WebGPUContext_exports, {
|
|
26
|
+
WebGPUContext: () => WebGPUContext,
|
|
27
|
+
createPhysicsSimulation: () => createPhysicsSimulation,
|
|
28
|
+
getGlobalWebGPUContext: () => getGlobalWebGPUContext
|
|
29
|
+
});
|
|
30
|
+
function getGlobalWebGPUContext(options) {
|
|
31
|
+
if (!globalContext) {
|
|
32
|
+
globalContext = new WebGPUContext(options);
|
|
33
|
+
}
|
|
34
|
+
return globalContext;
|
|
35
|
+
}
|
|
36
|
+
async function createPhysicsSimulation(options) {
|
|
37
|
+
if (options.preferGPU !== false) {
|
|
38
|
+
try {
|
|
39
|
+
const context = new WebGPUContext({ fallbackToCPU: true });
|
|
40
|
+
await context.initialize();
|
|
41
|
+
if (context.isSupported()) {
|
|
42
|
+
return { usingGPU: true, context };
|
|
43
|
+
}
|
|
44
|
+
} catch (error) {
|
|
45
|
+
console.warn("GPU initialization failed, using CPU fallback:", error);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return { usingGPU: false };
|
|
49
|
+
}
|
|
50
|
+
var WebGPUContext, globalContext;
|
|
51
|
+
var init_WebGPUContext = __esm({
|
|
52
|
+
"src/gpu/WebGPUContext.ts"() {
|
|
53
|
+
"use strict";
|
|
54
|
+
WebGPUContext = class {
|
|
55
|
+
adapter = null;
|
|
56
|
+
device = null;
|
|
57
|
+
limits = null;
|
|
58
|
+
features = /* @__PURE__ */ new Set();
|
|
59
|
+
options;
|
|
60
|
+
initialized = false;
|
|
61
|
+
constructor(options = {}) {
|
|
62
|
+
this.options = {
|
|
63
|
+
powerPreference: options.powerPreference ?? "high-performance",
|
|
64
|
+
requiredFeatures: options.requiredFeatures ?? [],
|
|
65
|
+
requiredLimits: options.requiredLimits ?? {},
|
|
66
|
+
fallbackToCPU: options.fallbackToCPU ?? true
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Initialize WebGPU context
|
|
71
|
+
*
|
|
72
|
+
* @returns Promise that resolves when initialization is complete
|
|
73
|
+
* @throws Error if WebGPU is not supported and fallbackToCPU is false
|
|
74
|
+
*/
|
|
75
|
+
async initialize() {
|
|
76
|
+
if (this.initialized) {
|
|
77
|
+
console.warn("WebGPUContext already initialized");
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
if (!("gpu" in navigator)) {
|
|
81
|
+
this.handleUnsupported("WebGPU not supported in this browser");
|
|
82
|
+
return;
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
this.adapter = await navigator.gpu.requestAdapter({
|
|
86
|
+
powerPreference: this.options.powerPreference
|
|
87
|
+
});
|
|
88
|
+
if (!this.adapter) {
|
|
89
|
+
this.handleUnsupported("Failed to get WebGPU adapter");
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
this.limits = this.adapter.limits;
|
|
93
|
+
this.features = new Set(this.adapter.features);
|
|
94
|
+
const missingFeatures = this.options.requiredFeatures.filter(
|
|
95
|
+
(feature) => !this.features.has(feature)
|
|
96
|
+
);
|
|
97
|
+
if (missingFeatures.length > 0) {
|
|
98
|
+
throw new Error(`Missing required WebGPU features: ${missingFeatures.join(", ")}`);
|
|
99
|
+
}
|
|
100
|
+
const deviceDescriptor = {
|
|
101
|
+
requiredFeatures: this.options.requiredFeatures,
|
|
102
|
+
requiredLimits: this.options.requiredLimits
|
|
103
|
+
};
|
|
104
|
+
this.device = await this.adapter.requestDevice(deviceDescriptor);
|
|
105
|
+
this.device.lost.then((info) => {
|
|
106
|
+
console.error("WebGPU device lost:", info.message, info.reason);
|
|
107
|
+
if (info.reason !== "destroyed") {
|
|
108
|
+
this.handleDeviceLost(info);
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
this.device.addEventListener("uncapturederror", (event) => {
|
|
112
|
+
console.error("WebGPU uncaptured error:", event.error);
|
|
113
|
+
});
|
|
114
|
+
this.initialized = true;
|
|
115
|
+
} catch (error) {
|
|
116
|
+
this.handleUnsupported(`WebGPU initialization failed: ${error}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Check if WebGPU is supported and initialized
|
|
121
|
+
*/
|
|
122
|
+
isSupported() {
|
|
123
|
+
return this.initialized && this.device !== null;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Get WebGPU device
|
|
127
|
+
* @throws Error if device is not initialized
|
|
128
|
+
*/
|
|
129
|
+
getDevice() {
|
|
130
|
+
if (!this.device) {
|
|
131
|
+
throw new Error("WebGPU device not initialized. Call initialize() first.");
|
|
132
|
+
}
|
|
133
|
+
return this.device;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Get WebGPU adapter
|
|
137
|
+
* @throws Error if adapter is not available
|
|
138
|
+
*/
|
|
139
|
+
getAdapter() {
|
|
140
|
+
if (!this.adapter) {
|
|
141
|
+
throw new Error("WebGPU adapter not available");
|
|
142
|
+
}
|
|
143
|
+
return this.adapter;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get device limits
|
|
147
|
+
*/
|
|
148
|
+
getLimits() {
|
|
149
|
+
if (!this.limits) {
|
|
150
|
+
throw new Error("WebGPU limits not available");
|
|
151
|
+
}
|
|
152
|
+
return this.limits;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Get supported features
|
|
156
|
+
*/
|
|
157
|
+
getFeatures() {
|
|
158
|
+
return this.features;
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Check if a specific feature is supported
|
|
162
|
+
*/
|
|
163
|
+
hasFeature(feature) {
|
|
164
|
+
return this.features.has(feature);
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Get capabilities object
|
|
168
|
+
*/
|
|
169
|
+
getCapabilities() {
|
|
170
|
+
return {
|
|
171
|
+
supported: this.isSupported(),
|
|
172
|
+
adapter: this.adapter,
|
|
173
|
+
device: this.device,
|
|
174
|
+
limits: this.limits,
|
|
175
|
+
features: this.features
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Get optimal workgroup size for compute shaders
|
|
180
|
+
*
|
|
181
|
+
* Returns a workgroup size that maximizes occupancy based on device limits.
|
|
182
|
+
* Common values: 64, 128, 256 (must be power of 2)
|
|
183
|
+
*/
|
|
184
|
+
getOptimalWorkgroupSize() {
|
|
185
|
+
if (!this.limits) return 256;
|
|
186
|
+
const maxInvocations = this.limits.maxComputeInvocationsPerWorkgroup;
|
|
187
|
+
if (maxInvocations >= 256) return 256;
|
|
188
|
+
if (maxInvocations >= 128) return 128;
|
|
189
|
+
if (maxInvocations >= 64) return 64;
|
|
190
|
+
return 32;
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Cleanup resources
|
|
194
|
+
*/
|
|
195
|
+
destroy() {
|
|
196
|
+
if (this.device) {
|
|
197
|
+
this.device.destroy();
|
|
198
|
+
this.device = null;
|
|
199
|
+
}
|
|
200
|
+
this.adapter = null;
|
|
201
|
+
this.limits = null;
|
|
202
|
+
this.features.clear();
|
|
203
|
+
this.initialized = false;
|
|
204
|
+
}
|
|
205
|
+
/**
|
|
206
|
+
* Handle unsupported browser/device
|
|
207
|
+
*/
|
|
208
|
+
handleUnsupported(reason) {
|
|
209
|
+
console.warn(`\u26A0\uFE0F ${reason}`);
|
|
210
|
+
if (!this.options.fallbackToCPU) {
|
|
211
|
+
throw new Error(`WebGPU required but not available: ${reason}`);
|
|
212
|
+
}
|
|
213
|
+
console.warn("\u{1F4A1} Falling back to CPU-based physics simulation");
|
|
214
|
+
this.initialized = false;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Handle device lost event
|
|
218
|
+
*/
|
|
219
|
+
handleDeviceLost(_info) {
|
|
220
|
+
console.error("WebGPU device lost, attempting to recreate...");
|
|
221
|
+
this.initialized = false;
|
|
222
|
+
this.initialize().catch((error) => {
|
|
223
|
+
console.error("Failed to reinitialize WebGPU device:", error);
|
|
224
|
+
if (this.options.fallbackToCPU) {
|
|
225
|
+
console.warn("Falling back to CPU physics");
|
|
226
|
+
}
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
/**
|
|
230
|
+
* Detect and log GPU information (for debugging)
|
|
231
|
+
*/
|
|
232
|
+
async logGPUInfo() {
|
|
233
|
+
if (!this.adapter) {
|
|
234
|
+
console.warn("No adapter available");
|
|
235
|
+
return;
|
|
236
|
+
}
|
|
237
|
+
const info = await this.adapter.requestAdapterInfo?.();
|
|
238
|
+
if (info) {
|
|
239
|
+
void info;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
globalContext = null;
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
// src/gpu/GPUBuffers.ts
|
|
248
|
+
var GPUBuffers_exports = {};
|
|
249
|
+
__export(GPUBuffers_exports, {
|
|
250
|
+
GPUBufferManager: () => GPUBufferManager,
|
|
251
|
+
createInitialParticleData: () => createInitialParticleData
|
|
252
|
+
});
|
|
253
|
+
function createInitialParticleData(count, options = {}) {
|
|
254
|
+
const { positionRange = { min: -5, max: 5 }, radius = 0.05, mass = 1 } = options;
|
|
255
|
+
const positions = new Float32Array(count * 4);
|
|
256
|
+
const velocities = new Float32Array(count * 4);
|
|
257
|
+
const states = new Float32Array(count * 4);
|
|
258
|
+
for (let i = 0; i < count; i++) {
|
|
259
|
+
const idx = i * 4;
|
|
260
|
+
positions[idx + 0] = Math.random() * (positionRange.max - positionRange.min) + positionRange.min;
|
|
261
|
+
positions[idx + 1] = Math.random() * (positionRange.max - positionRange.min) + positionRange.min;
|
|
262
|
+
positions[idx + 2] = Math.random() * (positionRange.max - positionRange.min) + positionRange.min;
|
|
263
|
+
positions[idx + 3] = radius;
|
|
264
|
+
velocities[idx + 0] = 0;
|
|
265
|
+
velocities[idx + 1] = 0;
|
|
266
|
+
velocities[idx + 2] = 0;
|
|
267
|
+
velocities[idx + 3] = mass;
|
|
268
|
+
states[idx + 0] = 1;
|
|
269
|
+
states[idx + 1] = 0;
|
|
270
|
+
states[idx + 2] = 1;
|
|
271
|
+
states[idx + 3] = 0;
|
|
272
|
+
}
|
|
273
|
+
return { positions, velocities, states };
|
|
274
|
+
}
|
|
275
|
+
var GPUBufferManager;
|
|
276
|
+
var init_GPUBuffers = __esm({
|
|
277
|
+
"src/gpu/GPUBuffers.ts"() {
|
|
278
|
+
"use strict";
|
|
279
|
+
GPUBufferManager = class {
|
|
280
|
+
context;
|
|
281
|
+
device;
|
|
282
|
+
particleCount;
|
|
283
|
+
buffers = null;
|
|
284
|
+
// Buffer sizes (in bytes)
|
|
285
|
+
positionBufferSize;
|
|
286
|
+
velocityBufferSize;
|
|
287
|
+
stateBufferSize;
|
|
288
|
+
uniformBufferSize;
|
|
289
|
+
constructor(context, particleCount) {
|
|
290
|
+
this.context = context;
|
|
291
|
+
this.device = context.getDevice();
|
|
292
|
+
this.particleCount = particleCount;
|
|
293
|
+
this.positionBufferSize = particleCount * 4 * Float32Array.BYTES_PER_ELEMENT;
|
|
294
|
+
this.velocityBufferSize = particleCount * 4 * Float32Array.BYTES_PER_ELEMENT;
|
|
295
|
+
this.stateBufferSize = particleCount * 4 * Float32Array.BYTES_PER_ELEMENT;
|
|
296
|
+
this.uniformBufferSize = 32;
|
|
297
|
+
}
|
|
298
|
+
/**
|
|
299
|
+
* Initialize GPU buffers
|
|
300
|
+
*/
|
|
301
|
+
async initialize() {
|
|
302
|
+
const positionsRead = this.createStorageBuffer(this.positionBufferSize, "positions-read");
|
|
303
|
+
const positionsWrite = this.createStorageBuffer(this.positionBufferSize, "positions-write");
|
|
304
|
+
const velocitiesRead = this.createStorageBuffer(this.velocityBufferSize, "velocities-read");
|
|
305
|
+
const velocitiesWrite = this.createStorageBuffer(this.velocityBufferSize, "velocities-write");
|
|
306
|
+
const statesRead = this.createStorageBuffer(this.stateBufferSize, "states-read");
|
|
307
|
+
const statesWrite = this.createStorageBuffer(this.stateBufferSize, "states-write");
|
|
308
|
+
const uniforms = this.device.createBuffer({
|
|
309
|
+
label: "uniforms",
|
|
310
|
+
size: this.uniformBufferSize,
|
|
311
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
312
|
+
});
|
|
313
|
+
this.buffers = {
|
|
314
|
+
positionsRead,
|
|
315
|
+
positionsWrite,
|
|
316
|
+
velocitiesRead,
|
|
317
|
+
velocitiesWrite,
|
|
318
|
+
statesRead,
|
|
319
|
+
statesWrite,
|
|
320
|
+
uniforms
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Create a storage buffer
|
|
325
|
+
*/
|
|
326
|
+
createStorageBuffer(size, label) {
|
|
327
|
+
return this.device.createBuffer({
|
|
328
|
+
label,
|
|
329
|
+
size,
|
|
330
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
|
|
331
|
+
});
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Upload particle data to GPU
|
|
335
|
+
*/
|
|
336
|
+
uploadParticleData(data) {
|
|
337
|
+
if (!this.buffers) {
|
|
338
|
+
throw new Error("Buffers not initialized. Call initialize() first.");
|
|
339
|
+
}
|
|
340
|
+
this.device.queue.writeBuffer(this.buffers.positionsRead, 0, data.positions.buffer);
|
|
341
|
+
this.device.queue.writeBuffer(this.buffers.velocitiesRead, 0, data.velocities.buffer);
|
|
342
|
+
this.device.queue.writeBuffer(this.buffers.statesRead, 0, data.states.buffer);
|
|
343
|
+
}
|
|
344
|
+
/**
|
|
345
|
+
* Upload uniform data (simulation parameters)
|
|
346
|
+
*/
|
|
347
|
+
uploadUniformData(uniforms) {
|
|
348
|
+
if (!this.buffers) {
|
|
349
|
+
throw new Error("Buffers not initialized");
|
|
350
|
+
}
|
|
351
|
+
const data = new Float32Array([
|
|
352
|
+
uniforms.dt,
|
|
353
|
+
uniforms.gravity,
|
|
354
|
+
uniforms.groundY,
|
|
355
|
+
uniforms.restitution,
|
|
356
|
+
uniforms.friction,
|
|
357
|
+
uniforms.particleCount,
|
|
358
|
+
uniforms._pad1 ?? 0,
|
|
359
|
+
uniforms._pad2 ?? 0
|
|
360
|
+
]);
|
|
361
|
+
this.device.queue.writeBuffer(this.buffers.uniforms, 0, data.buffer);
|
|
362
|
+
}
|
|
363
|
+
/**
|
|
364
|
+
* Download particle data from GPU (for rendering or analysis)
|
|
365
|
+
*
|
|
366
|
+
* Note: This is an async operation that stalls the pipeline.
|
|
367
|
+
* Use sparingly (e.g., once per frame for rendering).
|
|
368
|
+
*/
|
|
369
|
+
async downloadParticleData() {
|
|
370
|
+
if (!this.buffers) {
|
|
371
|
+
throw new Error("Buffers not initialized");
|
|
372
|
+
}
|
|
373
|
+
const positionsStaging = this.createStagingBuffer(this.positionBufferSize);
|
|
374
|
+
const velocitiesStaging = this.createStagingBuffer(this.velocityBufferSize);
|
|
375
|
+
const statesStaging = this.createStagingBuffer(this.stateBufferSize);
|
|
376
|
+
const commandEncoder = this.device.createCommandEncoder({ label: "readback-encoder" });
|
|
377
|
+
commandEncoder.copyBufferToBuffer(
|
|
378
|
+
this.buffers.positionsRead,
|
|
379
|
+
0,
|
|
380
|
+
positionsStaging,
|
|
381
|
+
0,
|
|
382
|
+
this.positionBufferSize
|
|
383
|
+
);
|
|
384
|
+
commandEncoder.copyBufferToBuffer(
|
|
385
|
+
this.buffers.velocitiesRead,
|
|
386
|
+
0,
|
|
387
|
+
velocitiesStaging,
|
|
388
|
+
0,
|
|
389
|
+
this.velocityBufferSize
|
|
390
|
+
);
|
|
391
|
+
commandEncoder.copyBufferToBuffer(
|
|
392
|
+
this.buffers.statesRead,
|
|
393
|
+
0,
|
|
394
|
+
statesStaging,
|
|
395
|
+
0,
|
|
396
|
+
this.stateBufferSize
|
|
397
|
+
);
|
|
398
|
+
this.device.queue.submit([commandEncoder.finish()]);
|
|
399
|
+
await Promise.all([
|
|
400
|
+
positionsStaging.mapAsync(GPUMapMode.READ),
|
|
401
|
+
velocitiesStaging.mapAsync(GPUMapMode.READ),
|
|
402
|
+
statesStaging.mapAsync(GPUMapMode.READ)
|
|
403
|
+
]);
|
|
404
|
+
const positions = new Float32Array(positionsStaging.getMappedRange()).slice();
|
|
405
|
+
const velocities = new Float32Array(velocitiesStaging.getMappedRange()).slice();
|
|
406
|
+
const states = new Float32Array(statesStaging.getMappedRange()).slice();
|
|
407
|
+
positionsStaging.unmap();
|
|
408
|
+
velocitiesStaging.unmap();
|
|
409
|
+
statesStaging.unmap();
|
|
410
|
+
positionsStaging.destroy();
|
|
411
|
+
velocitiesStaging.destroy();
|
|
412
|
+
statesStaging.destroy();
|
|
413
|
+
return { positions, velocities, states };
|
|
414
|
+
}
|
|
415
|
+
/**
|
|
416
|
+
* Create a staging buffer for GPU→CPU readback
|
|
417
|
+
*/
|
|
418
|
+
createStagingBuffer(size) {
|
|
419
|
+
return this.device.createBuffer({
|
|
420
|
+
size,
|
|
421
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
422
|
+
});
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Swap read/write buffers (ping-pong)
|
|
426
|
+
*
|
|
427
|
+
* After a compute pass, the "write" buffers contain the new state.
|
|
428
|
+
* This function swaps read ↔ write so the next pass can read the latest data.
|
|
429
|
+
*/
|
|
430
|
+
swap() {
|
|
431
|
+
if (!this.buffers) {
|
|
432
|
+
throw new Error("Buffers not initialized");
|
|
433
|
+
}
|
|
434
|
+
const tempPos = this.buffers.positionsRead;
|
|
435
|
+
this.buffers.positionsRead = this.buffers.positionsWrite;
|
|
436
|
+
this.buffers.positionsWrite = tempPos;
|
|
437
|
+
const tempVel = this.buffers.velocitiesRead;
|
|
438
|
+
this.buffers.velocitiesRead = this.buffers.velocitiesWrite;
|
|
439
|
+
this.buffers.velocitiesWrite = tempVel;
|
|
440
|
+
const tempState = this.buffers.statesRead;
|
|
441
|
+
this.buffers.statesRead = this.buffers.statesWrite;
|
|
442
|
+
this.buffers.statesWrite = tempState;
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Get buffer set for binding to compute pipeline
|
|
446
|
+
*/
|
|
447
|
+
getBuffers() {
|
|
448
|
+
if (!this.buffers) {
|
|
449
|
+
throw new Error("Buffers not initialized");
|
|
450
|
+
}
|
|
451
|
+
return this.buffers;
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Get particle count
|
|
455
|
+
*/
|
|
456
|
+
getParticleCount() {
|
|
457
|
+
return this.particleCount;
|
|
458
|
+
}
|
|
459
|
+
/**
|
|
460
|
+
* Destroy buffers and free GPU memory
|
|
461
|
+
*/
|
|
462
|
+
destroy() {
|
|
463
|
+
if (!this.buffers) return;
|
|
464
|
+
this.buffers.positionsRead.destroy();
|
|
465
|
+
this.buffers.positionsWrite.destroy();
|
|
466
|
+
this.buffers.velocitiesRead.destroy();
|
|
467
|
+
this.buffers.velocitiesWrite.destroy();
|
|
468
|
+
this.buffers.statesRead.destroy();
|
|
469
|
+
this.buffers.statesWrite.destroy();
|
|
470
|
+
this.buffers.uniforms.destroy();
|
|
471
|
+
this.buffers = null;
|
|
472
|
+
}
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
});
|
|
476
|
+
|
|
477
|
+
// src/gpu/index.ts
|
|
478
|
+
var gpu_exports = {};
|
|
479
|
+
__export(gpu_exports, {
|
|
480
|
+
AbstractGaussianCodec: () => AbstractGaussianCodec,
|
|
481
|
+
CodecDecodeError: () => CodecDecodeError,
|
|
482
|
+
CodecDecompressError: () => CodecDecompressError,
|
|
483
|
+
CodecEncodeError: () => CodecEncodeError,
|
|
484
|
+
CodecMemoryError: () => CodecMemoryError,
|
|
485
|
+
CodecNotSupportedError: () => CodecNotSupportedError,
|
|
486
|
+
ComputePipeline: () => ComputePipeline,
|
|
487
|
+
GPUBufferManager: () => GPUBufferManager,
|
|
488
|
+
GaussianCodecError: () => GaussianCodecError,
|
|
489
|
+
GaussianCodecRegistry: () => GaussianCodecRegistry,
|
|
490
|
+
GaussianSplatExtractor: () => GaussianSplatExtractor,
|
|
491
|
+
GaussianSplatSorter: () => GaussianSplatSorter,
|
|
492
|
+
GltfGaussianSplatCodec: () => GltfGaussianSplatCodec,
|
|
493
|
+
InstancedRenderer: () => InstancedRenderer,
|
|
494
|
+
MpegGscCodec: () => MpegGscCodec,
|
|
495
|
+
SparseLinearSolver: () => SparseLinearSolver,
|
|
496
|
+
SpatialGrid: () => SpatialGrid,
|
|
497
|
+
SpzCodec: () => SpzCodec,
|
|
498
|
+
WebGPUContext: () => WebGPUContext,
|
|
499
|
+
createDefaultCodecRegistry: () => createDefaultCodecRegistry,
|
|
500
|
+
createGPUPhysicsSimulation: () => createGPUPhysicsSimulation,
|
|
501
|
+
createGaussianSplatSorter: () => createGaussianSplatSorter,
|
|
502
|
+
createInitialParticleData: () => createInitialParticleData,
|
|
503
|
+
createPhysicsSimulation: () => createPhysicsSimulation,
|
|
504
|
+
getGlobalCodecRegistry: () => getGlobalCodecRegistry,
|
|
505
|
+
getGlobalWebGPUContext: () => getGlobalWebGPUContext,
|
|
506
|
+
resetGlobalCodecRegistry: () => resetGlobalCodecRegistry
|
|
507
|
+
});
|
|
508
|
+
module.exports = __toCommonJS(gpu_exports);
|
|
509
|
+
|
|
510
|
+
// src/gpu/ComputePipeline.ts
|
|
511
|
+
var ComputePipeline = class {
|
|
512
|
+
context;
|
|
513
|
+
bufferManager;
|
|
514
|
+
device;
|
|
515
|
+
options;
|
|
516
|
+
pipeline = null;
|
|
517
|
+
bindGroupLayout = null;
|
|
518
|
+
bindGroup = null;
|
|
519
|
+
workgroupsX = 0;
|
|
520
|
+
constructor(context, bufferManager, options) {
|
|
521
|
+
this.context = context;
|
|
522
|
+
this.bufferManager = bufferManager;
|
|
523
|
+
this.device = context.getDevice();
|
|
524
|
+
this.options = {
|
|
525
|
+
shaderCode: options.shaderCode,
|
|
526
|
+
entryPoint: options.entryPoint ?? "main",
|
|
527
|
+
workgroupSize: options.workgroupSize ?? this.context.getOptimalWorkgroupSize()
|
|
528
|
+
};
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Initialize compute pipeline and bind groups
|
|
532
|
+
*/
|
|
533
|
+
async initialize() {
|
|
534
|
+
const shaderModule = this.device.createShaderModule({
|
|
535
|
+
label: "particle-physics-shader",
|
|
536
|
+
code: this.options.shaderCode
|
|
537
|
+
});
|
|
538
|
+
const compilationInfo = await shaderModule.getCompilationInfo();
|
|
539
|
+
for (const message of compilationInfo.messages) {
|
|
540
|
+
if (message.type === "error") {
|
|
541
|
+
console.error("Shader error:", message.message, `at line ${message.lineNum}`);
|
|
542
|
+
} else if (message.type === "warning") {
|
|
543
|
+
console.warn("Shader warning:", message.message, `at line ${message.lineNum}`);
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
this.bindGroupLayout = this.device.createBindGroupLayout({
|
|
547
|
+
label: "particle-physics-bind-group-layout",
|
|
548
|
+
entries: [
|
|
549
|
+
// @binding(0): Uniforms (read-only)
|
|
550
|
+
{
|
|
551
|
+
binding: 0,
|
|
552
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
553
|
+
buffer: { type: "uniform" }
|
|
554
|
+
},
|
|
555
|
+
// @binding(1): positions_in (read-only)
|
|
556
|
+
{
|
|
557
|
+
binding: 1,
|
|
558
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
559
|
+
buffer: { type: "read-only-storage" }
|
|
560
|
+
},
|
|
561
|
+
// @binding(2): velocities_in (read-only)
|
|
562
|
+
{
|
|
563
|
+
binding: 2,
|
|
564
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
565
|
+
buffer: { type: "read-only-storage" }
|
|
566
|
+
},
|
|
567
|
+
// @binding(3): states_in (read-only)
|
|
568
|
+
{
|
|
569
|
+
binding: 3,
|
|
570
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
571
|
+
buffer: { type: "read-only-storage" }
|
|
572
|
+
},
|
|
573
|
+
// @binding(4): positions_out (read-write)
|
|
574
|
+
{
|
|
575
|
+
binding: 4,
|
|
576
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
577
|
+
buffer: { type: "storage" }
|
|
578
|
+
},
|
|
579
|
+
// @binding(5): velocities_out (read-write)
|
|
580
|
+
{
|
|
581
|
+
binding: 5,
|
|
582
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
583
|
+
buffer: { type: "storage" }
|
|
584
|
+
},
|
|
585
|
+
// @binding(6): states_out (read-write)
|
|
586
|
+
{
|
|
587
|
+
binding: 6,
|
|
588
|
+
visibility: GPUShaderStage.COMPUTE,
|
|
589
|
+
buffer: { type: "storage" }
|
|
590
|
+
}
|
|
591
|
+
]
|
|
592
|
+
});
|
|
593
|
+
const pipelineLayout = this.device.createPipelineLayout({
|
|
594
|
+
label: "particle-physics-pipeline-layout",
|
|
595
|
+
bindGroupLayouts: [this.bindGroupLayout]
|
|
596
|
+
});
|
|
597
|
+
this.pipeline = this.device.createComputePipeline({
|
|
598
|
+
label: "particle-physics-pipeline",
|
|
599
|
+
layout: pipelineLayout,
|
|
600
|
+
compute: {
|
|
601
|
+
module: shaderModule,
|
|
602
|
+
entryPoint: this.options.entryPoint
|
|
603
|
+
}
|
|
604
|
+
});
|
|
605
|
+
this.createBindGroup();
|
|
606
|
+
const particleCount = this.bufferManager.getParticleCount();
|
|
607
|
+
this.workgroupsX = Math.ceil(particleCount / this.options.workgroupSize);
|
|
608
|
+
}
|
|
609
|
+
/**
|
|
610
|
+
* Create bind group linking buffers to shader bindings
|
|
611
|
+
*/
|
|
612
|
+
createBindGroup() {
|
|
613
|
+
if (!this.bindGroupLayout) {
|
|
614
|
+
throw new Error("Bind group layout not created");
|
|
615
|
+
}
|
|
616
|
+
const buffers = this.bufferManager.getBuffers();
|
|
617
|
+
this.bindGroup = this.device.createBindGroup({
|
|
618
|
+
label: "particle-physics-bind-group",
|
|
619
|
+
layout: this.bindGroupLayout,
|
|
620
|
+
entries: [
|
|
621
|
+
{ binding: 0, resource: { buffer: buffers.uniforms } },
|
|
622
|
+
{ binding: 1, resource: { buffer: buffers.positionsRead } },
|
|
623
|
+
{ binding: 2, resource: { buffer: buffers.velocitiesRead } },
|
|
624
|
+
{ binding: 3, resource: { buffer: buffers.statesRead } },
|
|
625
|
+
{ binding: 4, resource: { buffer: buffers.positionsWrite } },
|
|
626
|
+
{ binding: 5, resource: { buffer: buffers.velocitiesWrite } },
|
|
627
|
+
{ binding: 6, resource: { buffer: buffers.statesWrite } }
|
|
628
|
+
]
|
|
629
|
+
});
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Update uniform buffer with simulation parameters
|
|
633
|
+
*/
|
|
634
|
+
updateUniforms(uniforms) {
|
|
635
|
+
this.bufferManager.uploadUniformData(uniforms);
|
|
636
|
+
}
|
|
637
|
+
/**
|
|
638
|
+
* Dispatch compute shader to update all particles
|
|
639
|
+
*
|
|
640
|
+
* @param commandEncoder Optional command encoder (creates new one if not provided)
|
|
641
|
+
* @returns Command encoder (for chaining or submission)
|
|
642
|
+
*/
|
|
643
|
+
dispatch(commandEncoder) {
|
|
644
|
+
if (!this.pipeline || !this.bindGroup) {
|
|
645
|
+
throw new Error("Pipeline not initialized. Call initialize() first.");
|
|
646
|
+
}
|
|
647
|
+
const encoder = commandEncoder ?? this.device.createCommandEncoder({
|
|
648
|
+
label: "particle-physics-compute-encoder"
|
|
649
|
+
});
|
|
650
|
+
const computePass = encoder.beginComputePass({
|
|
651
|
+
label: "particle-physics-compute-pass"
|
|
652
|
+
});
|
|
653
|
+
computePass.setPipeline(this.pipeline);
|
|
654
|
+
computePass.setBindGroup(0, this.bindGroup);
|
|
655
|
+
computePass.dispatchWorkgroups(this.workgroupsX, 1, 1);
|
|
656
|
+
computePass.end();
|
|
657
|
+
return encoder;
|
|
658
|
+
}
|
|
659
|
+
/**
|
|
660
|
+
* Execute a single simulation step
|
|
661
|
+
*
|
|
662
|
+
* Convenience method that dispatches compute shader and submits to queue.
|
|
663
|
+
*
|
|
664
|
+
* @param uniforms Simulation parameters for this step
|
|
665
|
+
*/
|
|
666
|
+
async step(uniforms) {
|
|
667
|
+
this.updateUniforms(uniforms);
|
|
668
|
+
const encoder = this.dispatch();
|
|
669
|
+
this.device.queue.submit([encoder.finish()]);
|
|
670
|
+
this.bufferManager.swap();
|
|
671
|
+
this.createBindGroup();
|
|
672
|
+
}
|
|
673
|
+
/**
|
|
674
|
+
* Execute multiple simulation steps (batch processing)
|
|
675
|
+
*
|
|
676
|
+
* @param steps Number of steps to execute
|
|
677
|
+
* @param uniforms Simulation parameters (same for all steps)
|
|
678
|
+
* @param onProgress Optional progress callback
|
|
679
|
+
*/
|
|
680
|
+
async run(steps, uniforms, onProgress) {
|
|
681
|
+
const _startTime = performance.now();
|
|
682
|
+
for (let i = 0; i < steps; i++) {
|
|
683
|
+
await this.step(uniforms);
|
|
684
|
+
if (onProgress && i % 10 === 0) {
|
|
685
|
+
onProgress(i + 1, steps);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
/**
|
|
690
|
+
* Get pipeline statistics
|
|
691
|
+
*/
|
|
692
|
+
getStats() {
|
|
693
|
+
return {
|
|
694
|
+
particleCount: this.bufferManager.getParticleCount(),
|
|
695
|
+
workgroupSize: this.options.workgroupSize,
|
|
696
|
+
workgroups: this.workgroupsX,
|
|
697
|
+
threadsTotal: this.workgroupsX * this.options.workgroupSize
|
|
698
|
+
};
|
|
699
|
+
}
|
|
700
|
+
/**
|
|
701
|
+
* Cleanup resources
|
|
702
|
+
*/
|
|
703
|
+
destroy() {
|
|
704
|
+
this.pipeline = null;
|
|
705
|
+
this.bindGroupLayout = null;
|
|
706
|
+
this.bindGroup = null;
|
|
707
|
+
}
|
|
708
|
+
};
|
|
709
|
+
async function createGPUPhysicsSimulation(options) {
|
|
710
|
+
const { WebGPUContext: WebGPUContext2 } = await Promise.resolve().then(() => (init_WebGPUContext(), WebGPUContext_exports));
|
|
711
|
+
const { GPUBufferManager: GPUBufferManager2 } = await Promise.resolve().then(() => (init_GPUBuffers(), GPUBuffers_exports));
|
|
712
|
+
const context = new WebGPUContext2(options.contextOptions);
|
|
713
|
+
await context.initialize();
|
|
714
|
+
if (!context.isSupported()) {
|
|
715
|
+
throw new Error("WebGPU not supported on this device");
|
|
716
|
+
}
|
|
717
|
+
const bufferManager = new GPUBufferManager2(context, options.particleCount);
|
|
718
|
+
await bufferManager.initialize();
|
|
719
|
+
const pipeline = new ComputePipeline(context, bufferManager, {
|
|
720
|
+
shaderCode: options.shaderCode,
|
|
721
|
+
workgroupSize: options.workgroupSize
|
|
722
|
+
});
|
|
723
|
+
await pipeline.initialize();
|
|
724
|
+
return { context, bufferManager, pipeline };
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
// src/gpu/index.ts
|
|
728
|
+
init_GPUBuffers();
|
|
729
|
+
|
|
730
|
+
// src/gpu/GaussianSplatExtractor.ts
|
|
731
|
+
var GaussianSplatExtractor = class {
|
|
732
|
+
context;
|
|
733
|
+
device;
|
|
734
|
+
options;
|
|
735
|
+
readbackBuffersA = null;
|
|
736
|
+
readbackBuffersB = null;
|
|
737
|
+
isUsingA = true;
|
|
738
|
+
ongoingReadback = null;
|
|
739
|
+
frameCounter = 0;
|
|
740
|
+
constructor(context, options) {
|
|
741
|
+
this.context = context;
|
|
742
|
+
this.device = context.getDevice();
|
|
743
|
+
this.options = {
|
|
744
|
+
maxSplats: options.maxSplats
|
|
745
|
+
};
|
|
746
|
+
this.initializeBuffers();
|
|
747
|
+
}
|
|
748
|
+
initializeBuffers() {
|
|
749
|
+
const compressedSize = this.options.maxSplats * 32;
|
|
750
|
+
const indicesSize = this.options.maxSplats * 4;
|
|
751
|
+
this.readbackBuffersA = {
|
|
752
|
+
compressed: this.createReadbackBuffer(compressedSize),
|
|
753
|
+
indices: this.createReadbackBuffer(indicesSize)
|
|
754
|
+
};
|
|
755
|
+
this.readbackBuffersB = {
|
|
756
|
+
compressed: this.createReadbackBuffer(compressedSize),
|
|
757
|
+
indices: this.createReadbackBuffer(indicesSize)
|
|
758
|
+
};
|
|
759
|
+
}
|
|
760
|
+
createReadbackBuffer(size) {
|
|
761
|
+
return this.device.createBuffer({
|
|
762
|
+
size,
|
|
763
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
/**
|
|
767
|
+
* Captures the sorted state of the GaussianSplatSorter.
|
|
768
|
+
* Internally pipelines the GPU readback to avoid stalling the main render loop.
|
|
769
|
+
*/
|
|
770
|
+
extractFrame(sorter, camera, compressedSource, indicesSource) {
|
|
771
|
+
const stats = sorter.getStats();
|
|
772
|
+
if (stats.splatCount === 0) return Promise.resolve(null);
|
|
773
|
+
if (this.ongoingReadback) {
|
|
774
|
+
return Promise.resolve(null);
|
|
775
|
+
}
|
|
776
|
+
const currentBuffers = this.isUsingA ? this.readbackBuffersA : this.readbackBuffersB;
|
|
777
|
+
this.isUsingA = !this.isUsingA;
|
|
778
|
+
const compressedSize = stats.splatCount * 32;
|
|
779
|
+
const indicesSize = stats.splatCount * 4;
|
|
780
|
+
const currentFrame = ++this.frameCounter;
|
|
781
|
+
const encoder = this.device.createCommandEncoder({
|
|
782
|
+
label: "splat-extractor-copy-encoder"
|
|
783
|
+
});
|
|
784
|
+
encoder.copyBufferToBuffer(compressedSource, 0, currentBuffers.compressed, 0, compressedSize);
|
|
785
|
+
encoder.copyBufferToBuffer(indicesSource, 0, currentBuffers.indices, 0, indicesSize);
|
|
786
|
+
this.device.queue.submit([encoder.finish()]);
|
|
787
|
+
this.ongoingReadback = Promise.all([
|
|
788
|
+
currentBuffers.compressed.mapAsync(GPUMapMode.READ, 0, compressedSize),
|
|
789
|
+
currentBuffers.indices.mapAsync(GPUMapMode.READ, 0, indicesSize)
|
|
790
|
+
]).then(() => {
|
|
791
|
+
const compData = currentBuffers.compressed.getMappedRange(0, compressedSize).slice(0);
|
|
792
|
+
const indData = currentBuffers.indices.getMappedRange(0, indicesSize).slice(0);
|
|
793
|
+
currentBuffers.compressed.unmap();
|
|
794
|
+
currentBuffers.indices.unmap();
|
|
795
|
+
this.ongoingReadback = null;
|
|
796
|
+
const packet = {
|
|
797
|
+
frameId: currentFrame,
|
|
798
|
+
cameraState: {
|
|
799
|
+
viewProjectionMatrix: Array.from(camera.viewProjectionMatrix),
|
|
800
|
+
cameraPosition: [
|
|
801
|
+
camera.cameraPosition[0],
|
|
802
|
+
camera.cameraPosition[1],
|
|
803
|
+
camera.cameraPosition[2]
|
|
804
|
+
]
|
|
805
|
+
},
|
|
806
|
+
splatCount: stats.splatCount,
|
|
807
|
+
compressedSplatsBuffer: compData,
|
|
808
|
+
sortedIndicesBuffer: indData
|
|
809
|
+
};
|
|
810
|
+
return packet;
|
|
811
|
+
}).catch((e) => {
|
|
812
|
+
console.warn("GaussianSplatExtractor readback failed", e);
|
|
813
|
+
this.ongoingReadback = null;
|
|
814
|
+
return null;
|
|
815
|
+
});
|
|
816
|
+
return this.ongoingReadback;
|
|
817
|
+
}
|
|
818
|
+
};
|
|
819
|
+
|
|
820
|
+
// wgsl-raw:C:\Users\josep\Documents\GitHub\HoloScript\packages\engine\src\gpu\shaders\radix-sort.wgsl
|
|
821
|
+
var radix_sort_default = `/**
|
|
822
|
+
* Wait-Free Hierarchical Radix Sort - WebGPU Compute Shader
|
|
823
|
+
*
|
|
824
|
+
* Implements a 4-pass 8-bit LSD radix sort using Blelloch exclusive prefix sum.
|
|
825
|
+
* Designed for sorting Gaussian splat indices by depth (back-to-front).
|
|
826
|
+
*
|
|
827
|
+
* Architecture:
|
|
828
|
+
* - 4 passes, each sorting 8 bits of a 32-bit key (LSD order: bits 0-7, 8-15, 16-23, 24-31)
|
|
829
|
+
* - Each pass: histogram -> Blelloch scan -> scatter
|
|
830
|
+
* - Wait-free: no global atomics, only workgroup-level shared memory with barriers
|
|
831
|
+
* - Hierarchical block scan for cross-workgroup prefix sums
|
|
832
|
+
*
|
|
833
|
+
* Key design decisions for cross-browser compatibility:
|
|
834
|
+
* - No subgroup operations (not supported in Safari/Firefox)
|
|
835
|
+
* - No global atomics (wait-free guarantee)
|
|
836
|
+
* - workgroup_size(256) fits all GPU vendors (NVIDIA, AMD, Apple, Qualcomm)
|
|
837
|
+
* - All shared memory fits within 16KB (WebGPU minimum guarantee)
|
|
838
|
+
*
|
|
839
|
+
* References:
|
|
840
|
+
* - Blelloch (1990): "Prefix Sums and Their Applications"
|
|
841
|
+
* - Merrill & Grimshaw (2010): "Revisiting Sorting on GPUs"
|
|
842
|
+
* - HoloScript W.035, G.030.01 (3.COMPRESS research)
|
|
843
|
+
*
|
|
844
|
+
* @version 1.0.0
|
|
845
|
+
*/
|
|
846
|
+
|
|
847
|
+
// =============================================================================
|
|
848
|
+
// Constants
|
|
849
|
+
// =============================================================================
|
|
850
|
+
|
|
851
|
+
const WORKGROUP_SIZE: u32 = 256u;
|
|
852
|
+
const RADIX_BITS: u32 = 8u;
|
|
853
|
+
const RADIX_SIZE: u32 = 256u; // 2^8 = 256 buckets per pass
|
|
854
|
+
const ELEMENTS_PER_THREAD: u32 = 4u;
|
|
855
|
+
const BLOCK_SIZE: u32 = 1024u; // WORKGROUP_SIZE * ELEMENTS_PER_THREAD
|
|
856
|
+
|
|
857
|
+
// =============================================================================
|
|
858
|
+
// Uniforms
|
|
859
|
+
// =============================================================================
|
|
860
|
+
|
|
861
|
+
struct SortUniforms {
|
|
862
|
+
totalCount: u32, // Total number of splats to sort
|
|
863
|
+
bitOffset: u32, // Current bit offset (0, 8, 16, 24)
|
|
864
|
+
blockCount: u32, // Number of workgroup blocks
|
|
865
|
+
_pad: u32,
|
|
866
|
+
};
|
|
867
|
+
|
|
868
|
+
@group(0) @binding(0) var<uniform> uniforms: SortUniforms;
|
|
869
|
+
|
|
870
|
+
// =============================================================================
|
|
871
|
+
// Storage Buffers
|
|
872
|
+
// =============================================================================
|
|
873
|
+
|
|
874
|
+
// Keys (depth values, 32-bit uint - quantized camera-space Z)
|
|
875
|
+
@group(0) @binding(1) var<storage, read> keysIn: array<u32>;
|
|
876
|
+
@group(0) @binding(2) var<storage, read_write> keysOut: array<u32>;
|
|
877
|
+
|
|
878
|
+
// Values (splat indices, 32-bit uint)
|
|
879
|
+
@group(0) @binding(3) var<storage, read> valuesIn: array<u32>;
|
|
880
|
+
@group(0) @binding(4) var<storage, read_write> valuesOut: array<u32>;
|
|
881
|
+
|
|
882
|
+
// Per-block histograms: blockCount * RADIX_SIZE
|
|
883
|
+
@group(0) @binding(5) var<storage, read_write> blockHistograms: array<u32>;
|
|
884
|
+
|
|
885
|
+
// Global prefix sums for each radix digit: RADIX_SIZE
|
|
886
|
+
@group(0) @binding(6) var<storage, read_write> globalPrefixes: array<u32>;
|
|
887
|
+
|
|
888
|
+
// =============================================================================
|
|
889
|
+
// Shared Memory
|
|
890
|
+
// =============================================================================
|
|
891
|
+
|
|
892
|
+
// Shared histogram for Blelloch scan (256 entries)
|
|
893
|
+
var<workgroup> sharedHist: array<u32, 256>;
|
|
894
|
+
|
|
895
|
+
// Shared scratch for local key/value staging
|
|
896
|
+
var<workgroup> sharedKeys: array<u32, 1024>;
|
|
897
|
+
var<workgroup> sharedVals: array<u32, 1024>;
|
|
898
|
+
|
|
899
|
+
// Shared local histogram for 256-way counting
|
|
900
|
+
var<workgroup> sharedLocalHist: array<atomic<u32>, 256>;
|
|
901
|
+
|
|
902
|
+
// =============================================================================
|
|
903
|
+
// Helper: Extract radix digit from key
|
|
904
|
+
// =============================================================================
|
|
905
|
+
|
|
906
|
+
fn extractDigit(key: u32, bitOffset: u32) -> u32 {
|
|
907
|
+
return (key >> bitOffset) & 0xFFu;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
// =============================================================================
|
|
911
|
+
// Pass 1: Build Per-Block Histograms
|
|
912
|
+
// =============================================================================
|
|
913
|
+
|
|
914
|
+
/**
|
|
915
|
+
* Each workgroup processes BLOCK_SIZE elements, counting occurrences of each
|
|
916
|
+
* 8-bit radix digit. Results written to blockHistograms[blockIdx * 256 + digit].
|
|
917
|
+
*
|
|
918
|
+
* This is wait-free: each workgroup writes only to its own histogram region.
|
|
919
|
+
*/
|
|
920
|
+
@compute @workgroup_size(256)
|
|
921
|
+
fn buildHistogram(
|
|
922
|
+
@builtin(global_invocation_id) globalId: vec3<u32>,
|
|
923
|
+
@builtin(local_invocation_id) localId: vec3<u32>,
|
|
924
|
+
@builtin(workgroup_id) groupId: vec3<u32>,
|
|
925
|
+
) {
|
|
926
|
+
let tid = localId.x;
|
|
927
|
+
let blockIdx = groupId.x;
|
|
928
|
+
let blockStart = blockIdx * BLOCK_SIZE;
|
|
929
|
+
|
|
930
|
+
// Clear shared local histogram
|
|
931
|
+
atomicStore(&sharedLocalHist[tid], 0u);
|
|
932
|
+
workgroupBarrier();
|
|
933
|
+
|
|
934
|
+
// Each thread counts ELEMENTS_PER_THREAD elements
|
|
935
|
+
for (var i = 0u; i < ELEMENTS_PER_THREAD; i++) {
|
|
936
|
+
let idx = blockStart + tid * ELEMENTS_PER_THREAD + i;
|
|
937
|
+
if (idx < uniforms.totalCount) {
|
|
938
|
+
let key = keysIn[idx];
|
|
939
|
+
let digit = extractDigit(key, uniforms.bitOffset);
|
|
940
|
+
atomicAdd(&sharedLocalHist[digit], 1u);
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
workgroupBarrier();
|
|
945
|
+
|
|
946
|
+
// Write shared histogram to global memory
|
|
947
|
+
// Each thread writes one bucket value (256 threads, 256 buckets)
|
|
948
|
+
let histValue = atomicLoad(&sharedLocalHist[tid]);
|
|
949
|
+
blockHistograms[blockIdx * RADIX_SIZE + tid] = histValue;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// =============================================================================
|
|
953
|
+
// Pass 2: Blelloch Exclusive Prefix Sum (Hierarchical)
|
|
954
|
+
// =============================================================================
|
|
955
|
+
|
|
956
|
+
/**
|
|
957
|
+
* Computes exclusive prefix sums across all block histograms for each radix digit.
|
|
958
|
+
*
|
|
959
|
+
* For each digit d (0..255):
|
|
960
|
+
* globalPrefixes[d] = sum of all blockHistograms[block * 256 + d] for block < blockCount
|
|
961
|
+
*
|
|
962
|
+
* This pass processes one radix digit per workgroup.
|
|
963
|
+
* Each workgroup computes prefix sums across blocks for its assigned digit.
|
|
964
|
+
*
|
|
965
|
+
* Wait-free: uses Blelloch scan on shared memory with workgroup barriers only.
|
|
966
|
+
*/
|
|
967
|
+
@compute @workgroup_size(256)
|
|
968
|
+
fn blellochScan(
|
|
969
|
+
@builtin(local_invocation_id) localId: vec3<u32>,
|
|
970
|
+
@builtin(workgroup_id) groupId: vec3<u32>,
|
|
971
|
+
) {
|
|
972
|
+
let tid = localId.x;
|
|
973
|
+
let digit = groupId.x; // One workgroup per radix digit
|
|
974
|
+
|
|
975
|
+
// Load block histogram values for this digit into shared memory
|
|
976
|
+
// If blockCount <= 256, each thread loads one value
|
|
977
|
+
// For larger counts, we'd need multi-pass (rare for typical splat counts)
|
|
978
|
+
if (tid < uniforms.blockCount) {
|
|
979
|
+
sharedHist[tid] = blockHistograms[tid * RADIX_SIZE + digit];
|
|
980
|
+
} else {
|
|
981
|
+
sharedHist[tid] = 0u;
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
workgroupBarrier();
|
|
985
|
+
|
|
986
|
+
// ---- Blelloch Up-Sweep (Reduce) Phase ----
|
|
987
|
+
// Build partial sums bottom-up in a binary tree
|
|
988
|
+
var offset = 1u;
|
|
989
|
+
for (var d = WORKGROUP_SIZE >> 1u; d > 0u; d >>= 1u) {
|
|
990
|
+
if (tid < d) {
|
|
991
|
+
let ai = offset * (2u * tid + 1u) - 1u;
|
|
992
|
+
let bi = offset * (2u * tid + 2u) - 1u;
|
|
993
|
+
if (ai < WORKGROUP_SIZE && bi < WORKGROUP_SIZE) {
|
|
994
|
+
sharedHist[bi] += sharedHist[ai];
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
offset <<= 1u;
|
|
998
|
+
workgroupBarrier();
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
// Store total sum and clear last element for exclusive scan
|
|
1002
|
+
if (tid == 0u) {
|
|
1003
|
+
// Total across all blocks for this digit
|
|
1004
|
+
globalPrefixes[digit] = sharedHist[WORKGROUP_SIZE - 1u];
|
|
1005
|
+
sharedHist[WORKGROUP_SIZE - 1u] = 0u;
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
workgroupBarrier();
|
|
1009
|
+
|
|
1010
|
+
// ---- Blelloch Down-Sweep Phase ----
|
|
1011
|
+
// Propagate partial sums back down to produce exclusive prefix sums
|
|
1012
|
+
for (var d = 1u; d < WORKGROUP_SIZE; d <<= 1u) {
|
|
1013
|
+
offset >>= 1u;
|
|
1014
|
+
if (tid < d) {
|
|
1015
|
+
let ai = offset * (2u * tid + 1u) - 1u;
|
|
1016
|
+
let bi = offset * (2u * tid + 2u) - 1u;
|
|
1017
|
+
if (ai < WORKGROUP_SIZE && bi < WORKGROUP_SIZE) {
|
|
1018
|
+
let temp = sharedHist[ai];
|
|
1019
|
+
sharedHist[ai] = sharedHist[bi];
|
|
1020
|
+
sharedHist[bi] += temp;
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
1023
|
+
workgroupBarrier();
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
// Write back the exclusive prefix sum for each block
|
|
1027
|
+
if (tid < uniforms.blockCount) {
|
|
1028
|
+
blockHistograms[tid * RADIX_SIZE + digit] = sharedHist[tid];
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
// =============================================================================
|
|
1033
|
+
// Pass 2b: Global Prefix Sum over Digit Totals
|
|
1034
|
+
// =============================================================================
|
|
1035
|
+
|
|
1036
|
+
/**
|
|
1037
|
+
* After blellochScan, globalPrefixes[d] contains the total count for digit d.
|
|
1038
|
+
* This pass computes an exclusive prefix sum over those totals to get the
|
|
1039
|
+
* global scatter offset for each digit.
|
|
1040
|
+
*
|
|
1041
|
+
* Single workgroup: 256 threads for 256 digits.
|
|
1042
|
+
*/
|
|
1043
|
+
@compute @workgroup_size(256)
|
|
1044
|
+
fn globalPrefixScan(
|
|
1045
|
+
@builtin(local_invocation_id) localId: vec3<u32>,
|
|
1046
|
+
) {
|
|
1047
|
+
let tid = localId.x;
|
|
1048
|
+
|
|
1049
|
+
// Load digit totals into shared memory
|
|
1050
|
+
sharedHist[tid] = globalPrefixes[tid];
|
|
1051
|
+
|
|
1052
|
+
workgroupBarrier();
|
|
1053
|
+
|
|
1054
|
+
// ---- Blelloch Up-Sweep ----
|
|
1055
|
+
var offset = 1u;
|
|
1056
|
+
for (var d = WORKGROUP_SIZE >> 1u; d > 0u; d >>= 1u) {
|
|
1057
|
+
if (tid < d) {
|
|
1058
|
+
let ai = offset * (2u * tid + 1u) - 1u;
|
|
1059
|
+
let bi = offset * (2u * tid + 2u) - 1u;
|
|
1060
|
+
if (ai < WORKGROUP_SIZE && bi < WORKGROUP_SIZE) {
|
|
1061
|
+
sharedHist[bi] += sharedHist[ai];
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
offset <<= 1u;
|
|
1065
|
+
workgroupBarrier();
|
|
1066
|
+
}
|
|
1067
|
+
|
|
1068
|
+
// Clear last for exclusive scan
|
|
1069
|
+
if (tid == 0u) {
|
|
1070
|
+
sharedHist[WORKGROUP_SIZE - 1u] = 0u;
|
|
1071
|
+
}
|
|
1072
|
+
workgroupBarrier();
|
|
1073
|
+
|
|
1074
|
+
// ---- Blelloch Down-Sweep ----
|
|
1075
|
+
for (var d = 1u; d < WORKGROUP_SIZE; d <<= 1u) {
|
|
1076
|
+
offset >>= 1u;
|
|
1077
|
+
if (tid < d) {
|
|
1078
|
+
let ai = offset * (2u * tid + 1u) - 1u;
|
|
1079
|
+
let bi = offset * (2u * tid + 2u) - 1u;
|
|
1080
|
+
if (ai < WORKGROUP_SIZE && bi < WORKGROUP_SIZE) {
|
|
1081
|
+
let temp = sharedHist[ai];
|
|
1082
|
+
sharedHist[ai] = sharedHist[bi];
|
|
1083
|
+
sharedHist[bi] += temp;
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
workgroupBarrier();
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
// Write exclusive prefix sums back
|
|
1090
|
+
globalPrefixes[tid] = sharedHist[tid];
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
// =============================================================================
|
|
1094
|
+
// Pass 3: Scatter (Reorder)
|
|
1095
|
+
// =============================================================================
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* Each workgroup scatters its BLOCK_SIZE elements to globally sorted positions.
|
|
1099
|
+
*
|
|
1100
|
+
* For element at local position i with digit d:
|
|
1101
|
+
* globalDst = globalPrefixes[d] // global offset for digit d
|
|
1102
|
+
* + blockHistograms[block*256+d] // offset from prior blocks
|
|
1103
|
+
* + localRank // rank within this block for digit d
|
|
1104
|
+
*
|
|
1105
|
+
* Wait-free: each element writes to a unique output position.
|
|
1106
|
+
*/
|
|
1107
|
+
@compute @workgroup_size(256)
|
|
1108
|
+
fn scatter(
|
|
1109
|
+
@builtin(local_invocation_id) localId: vec3<u32>,
|
|
1110
|
+
@builtin(workgroup_id) groupId: vec3<u32>,
|
|
1111
|
+
) {
|
|
1112
|
+
let tid = localId.x;
|
|
1113
|
+
let blockIdx = groupId.x;
|
|
1114
|
+
let blockStart = blockIdx * BLOCK_SIZE;
|
|
1115
|
+
|
|
1116
|
+
// Clear shared local histogram for rank computation
|
|
1117
|
+
atomicStore(&sharedLocalHist[tid], 0u);
|
|
1118
|
+
workgroupBarrier();
|
|
1119
|
+
|
|
1120
|
+
// Load elements into shared memory and compute local histograms
|
|
1121
|
+
var myKeys: array<u32, 4>;
|
|
1122
|
+
var myVals: array<u32, 4>;
|
|
1123
|
+
var myDigits: array<u32, 4>;
|
|
1124
|
+
|
|
1125
|
+
for (var i = 0u; i < ELEMENTS_PER_THREAD; i++) {
|
|
1126
|
+
let idx = blockStart + tid * ELEMENTS_PER_THREAD + i;
|
|
1127
|
+
if (idx < uniforms.totalCount) {
|
|
1128
|
+
myKeys[i] = keysIn[idx];
|
|
1129
|
+
myVals[i] = valuesIn[idx];
|
|
1130
|
+
myDigits[i] = extractDigit(myKeys[i], uniforms.bitOffset);
|
|
1131
|
+
} else {
|
|
1132
|
+
myKeys[i] = 0xFFFFFFFFu; // Sentinel: sorts to end
|
|
1133
|
+
myVals[i] = 0u;
|
|
1134
|
+
myDigits[i] = 255u;
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
// Two-phase local ranking:
|
|
1139
|
+
// Phase 1: Count elements per digit in this block
|
|
1140
|
+
for (var i = 0u; i < ELEMENTS_PER_THREAD; i++) {
|
|
1141
|
+
let idx = blockStart + tid * ELEMENTS_PER_THREAD + i;
|
|
1142
|
+
if (idx < uniforms.totalCount) {
|
|
1143
|
+
atomicAdd(&sharedLocalHist[myDigits[i]], 1u);
|
|
1144
|
+
}
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
workgroupBarrier();
|
|
1148
|
+
|
|
1149
|
+
// Phase 2: Each thread needs its rank within its digit bucket.
|
|
1150
|
+
// We use a serialized approach per-digit that's safe across all browsers.
|
|
1151
|
+
// Load histogram into non-atomic shared for prefix computation.
|
|
1152
|
+
let digitCount = atomicLoad(&sharedLocalHist[tid]);
|
|
1153
|
+
sharedHist[tid] = digitCount;
|
|
1154
|
+
|
|
1155
|
+
workgroupBarrier();
|
|
1156
|
+
|
|
1157
|
+
// Compute exclusive prefix sum of digit counts (local to this block)
|
|
1158
|
+
// This gives the starting offset within the block for each digit
|
|
1159
|
+
var blockDigitOffset = 0u;
|
|
1160
|
+
for (var d = 0u; d < tid; d++) {
|
|
1161
|
+
blockDigitOffset += sharedHist[d];
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
// Store the block-local prefix for digit tid
|
|
1165
|
+
sharedKeys[tid] = blockDigitOffset;
|
|
1166
|
+
|
|
1167
|
+
workgroupBarrier();
|
|
1168
|
+
|
|
1169
|
+
// Reset shared histogram for per-element ranking
|
|
1170
|
+
atomicStore(&sharedLocalHist[tid], 0u);
|
|
1171
|
+
|
|
1172
|
+
workgroupBarrier();
|
|
1173
|
+
|
|
1174
|
+
// Each thread scatters its elements
|
|
1175
|
+
for (var i = 0u; i < ELEMENTS_PER_THREAD; i++) {
|
|
1176
|
+
let idx = blockStart + tid * ELEMENTS_PER_THREAD + i;
|
|
1177
|
+
if (idx < uniforms.totalCount) {
|
|
1178
|
+
let digit = myDigits[i];
|
|
1179
|
+
|
|
1180
|
+
// Get rank within this digit in this block (atomically increment)
|
|
1181
|
+
let localRank = atomicAdd(&sharedLocalHist[digit], 1u);
|
|
1182
|
+
|
|
1183
|
+
// Compute global destination:
|
|
1184
|
+
// globalPrefixes[digit] + blockHistograms[blockIdx * 256 + digit] + localRank
|
|
1185
|
+
let globalOffset = globalPrefixes[digit];
|
|
1186
|
+
let blockOffset = blockHistograms[blockIdx * RADIX_SIZE + digit];
|
|
1187
|
+
let dst = globalOffset + blockOffset + localRank;
|
|
1188
|
+
|
|
1189
|
+
if (dst < uniforms.totalCount) {
|
|
1190
|
+
keysOut[dst] = myKeys[i];
|
|
1191
|
+
valuesOut[dst] = myVals[i];
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
}
|
|
1196
|
+
`;
|
|
1197
|
+
|
|
1198
|
+
// wgsl-raw:C:\Users\josep\Documents\GitHub\HoloScript\packages\engine\src\gpu\shaders\splat-compress.wgsl
|
|
1199
|
+
var splat_compress_default = "/**\n * Gaussian Splat Data Compression & Depth Key Generation\n *\n * Compresses Gaussian splat data for efficient GPU sorting and rendering:\n * - RGBA8 color packing: 4 bytes instead of 16 bytes (vec4<f32>)\n * - Compressed ellipse axes: 2D covariance stored as 3x f16 (6 bytes)\n * - Depth key generation: quantized camera-space Z for radix sort\n *\n * Memory layout (compressed, 32 bytes per splat):\n * [0:12] position (vec3<f32>) 12 bytes\n * [12:16] packedColor (u32, RGBA8) 4 bytes\n * [16:22] packedCov2D (3x u16) 6 bytes (f16 cov entries)\n * [22:24] opacity (f16) 2 bytes\n * [24:28] depth (f32) 4 bytes (camera-space Z)\n * [28:32] padding 4 bytes\n *\n * vs. uncompressed (64 bytes per splat):\n * position: vec3<f32> 12 bytes\n * scale: vec3<f32> 12 bytes\n * rotation: vec4<f32> 16 bytes\n * color: vec4<f32> 16 bytes\n * padding 8 bytes\n *\n * Compression ratio: 32/64 = 50% memory reduction\n *\n * Cross-browser notes:\n * - Uses u32 bit packing instead of f16 (f16 requires shader-f16 feature)\n * - All operations use u32 and f32, universally supported\n *\n * @version 1.0.0\n */\n\n// =============================================================================\n// Structures\n// =============================================================================\n\nstruct SplatRaw {\n pos: vec3<f32>,\n scale: vec3<f32>,\n rot: vec4<f32>, // quaternion\n color: vec4<f32>, // RGBA float\n};\n\nstruct SplatCompressed {\n pos: vec3<f32>, // 12 bytes\n packedColor: u32, // 4 bytes (RGBA8)\n packedCov2D_01: u32, // 4 bytes (cov[0] and cov[1] as f16 pair)\n packedCov2D_2_opacity: u32, // 4 bytes (cov[2] as f16, opacity as f16)\n depth: f32, // 4 bytes (camera-space Z)\n _pad: u32, // 4 bytes alignment\n};\n\nstruct CompressUniforms {\n viewMatrix: mat4x4<f32>, // 64 bytes\n projMatrix: mat4x4<f32>, // 64 bytes\n screenWidth: f32, // 4 bytes\n screenHeight: f32, // 4 bytes\n focalX: f32, // 4 bytes\n focalY: f32, // 4 bytes\n splatCount: u32, // 4 bytes\n _pad1: u32,\n _pad2: u32,\n _pad3: u32,\n};\n\n// =============================================================================\n// Bindings\n// =============================================================================\n\n@group(0) @binding(0) var<uniform> uniforms: CompressUniforms;\n@group(0) @binding(1) var<storage, read> splatsIn: array<SplatRaw>;\n@group(0) @binding(2) var<storage, read_write> splatsOut: array<SplatCompressed>;\n@group(0) @binding(3) var<storage, read_write> sortKeys: array<u32>;\n@group(0) @binding(4) var<storage, read_write> sortValues: array<u32>;\n\n// =============================================================================\n// f16 Packing Helpers (no shader-f16 feature required)\n// =============================================================================\n\n/**\n * Pack a f32 value into f16 (IEEE 754 half-precision) stored in lower 16 bits of u32.\n * Handles normals, denormals, inf, and nan correctly.\n */\nfn f32ToF16(value: f32) -> u32 {\n let bits = bitcast<u32>(value);\n let sign = (bits >> 16u) & 0x8000u;\n let exponent = (bits >> 23u) & 0xFFu;\n let mantissa = bits & 0x7FFFFFu;\n\n // Handle special cases\n if (exponent == 0u) {\n // Zero or denormal -> zero in f16\n return sign;\n }\n if (exponent == 255u) {\n // Inf or NaN\n if (mantissa != 0u) {\n return sign | 0x7E00u; // NaN\n }\n return sign | 0x7C00u; // Inf\n }\n\n // Bias conversion: f32 bias=127, f16 bias=15\n let newExponent = i32(exponent) - 127 + 15;\n\n if (newExponent <= 0) {\n // Underflow to zero\n return sign;\n }\n if (newExponent >= 31) {\n // Overflow to infinity\n return sign | 0x7C00u;\n }\n\n return sign | (u32(newExponent) << 10u) | (mantissa >> 13u);\n}\n\n/**\n * Unpack f16 (stored in lower 16 bits of u32) back to f32.\n */\nfn f16ToF32(h: u32) -> f32 {\n let sign = (h & 0x8000u) << 16u;\n let exponent = (h >> 10u) & 0x1Fu;\n let mantissa = h & 0x3FFu;\n\n if (exponent == 0u) {\n if (mantissa == 0u) {\n return bitcast<f32>(sign); // Signed zero\n }\n // Denormalized: convert to normalized f32\n var m = mantissa;\n var e = 0u;\n while ((m & 0x400u) == 0u) {\n m <<= 1u;\n e++;\n }\n let newExp = (127u - 15u - e) << 23u;\n let newMant = (m & 0x3FFu) << 13u;\n return bitcast<f32>(sign | newExp | newMant);\n }\n if (exponent == 31u) {\n if (mantissa == 0u) {\n return bitcast<f32>(sign | 0x7F800000u); // Inf\n }\n return bitcast<f32>(sign | 0x7FC00000u); // NaN\n }\n\n let newExp = (exponent + 127u - 15u) << 23u;\n let newMant = mantissa << 13u;\n return bitcast<f32>(sign | newExp | newMant);\n}\n\n/**\n * Pack two f16 values into a single u32.\n */\nfn packF16x2(a: f32, b: f32) -> u32 {\n return f32ToF16(a) | (f32ToF16(b) << 16u);\n}\n\n// =============================================================================\n// RGBA8 Color Packing\n// =============================================================================\n\n/**\n * Pack vec4<f32> color (0..1 range) into RGBA8 u32.\n * Layout: R[7:0] G[15:8] B[23:16] A[31:24]\n */\nfn packRGBA8(color: vec4<f32>) -> u32 {\n let r = u32(clamp(color.r * 255.0, 0.0, 255.0));\n let g = u32(clamp(color.g * 255.0, 0.0, 255.0));\n let b = u32(clamp(color.b * 255.0, 0.0, 255.0));\n let a = u32(clamp(color.a * 255.0, 0.0, 255.0));\n return r | (g << 8u) | (b << 16u) | (a << 24u);\n}\n\n/**\n * Unpack RGBA8 u32 back to vec4<f32>.\n */\nfn unpackRGBA8(packed: u32) -> vec4<f32> {\n return vec4<f32>(\n f32(packed & 0xFFu) / 255.0,\n f32((packed >> 8u) & 0xFFu) / 255.0,\n f32((packed >> 16u) & 0xFFu) / 255.0,\n f32((packed >> 24u) & 0xFFu) / 255.0,\n );\n}\n\n// =============================================================================\n// 3D to 2D Covariance Projection (Compressed Ellipse Axes)\n// =============================================================================\n\n/**\n * Compute 2D covariance matrix from 3D Gaussian parameters.\n *\n * The 3D covariance matrix Sigma = R * S * S^T * R^T where:\n * R = rotation matrix from quaternion\n * S = diagonal scale matrix\n *\n * Projected to 2D using the Jacobian of the perspective projection:\n * Sigma_2D = J * V * Sigma * V^T * J^T\n *\n * where V is the upper-left 3x3 of the view matrix and J is the Jacobian.\n *\n * Returns: vec3(cov[0][0], cov[0][1], cov[1][1]) - the symmetric 2x2 matrix.\n */\nfn computeCov2D(\n pos: vec3<f32>,\n scale: vec3<f32>,\n rot: vec4<f32>,\n viewMatrix: mat4x4<f32>,\n focalX: f32,\n focalY: f32,\n) -> vec3<f32> {\n // Transform position to camera space\n let camPos = viewMatrix * vec4<f32>(pos, 1.0);\n let tz = camPos.z;\n\n // Avoid division by zero\n let tzSafe = select(tz, 0.001, abs(tz) < 0.001);\n\n // Jacobian of perspective projection\n let tanFovX = 1.0 / focalX;\n let tanFovY = 1.0 / focalY;\n let limX = 1.3 * tanFovX;\n let limY = 1.3 * tanFovY;\n\n let tx = clamp(camPos.x / tzSafe, -limX, limX) * tzSafe;\n let ty = clamp(camPos.y / tzSafe, -limY, limY) * tzSafe;\n\n // Jacobian matrix (2x3)\n let J00 = focalX / tzSafe;\n let J02 = -focalX * tx / (tzSafe * tzSafe);\n let J11 = focalY / tzSafe;\n let J12 = -focalY * ty / (tzSafe * tzSafe);\n\n // Build rotation matrix from quaternion\n let r = rot.x;\n let x = rot.y;\n let y = rot.z;\n let z = rot.w;\n\n let R = mat3x3<f32>(\n vec3<f32>(1.0 - 2.0*(y*y + z*z), 2.0*(x*y - r*z), 2.0*(x*z + r*y)),\n vec3<f32>(2.0*(x*y + r*z), 1.0 - 2.0*(x*x + z*z), 2.0*(y*z - r*x)),\n vec3<f32>(2.0*(x*z - r*y), 2.0*(y*z + r*x), 1.0 - 2.0*(x*x + y*y)),\n );\n\n // Scale matrix (diagonal) applied as column scaling\n let M = mat3x3<f32>(\n R[0] * scale.x,\n R[1] * scale.y,\n R[2] * scale.z,\n );\n\n // 3D covariance: Sigma = M * M^T\n let Sigma = mat3x3<f32>(\n vec3<f32>(dot(M[0], M[0]), dot(M[0], M[1]), dot(M[0], M[2])),\n vec3<f32>(dot(M[1], M[0]), dot(M[1], M[1]), dot(M[1], M[2])),\n vec3<f32>(dot(M[2], M[0]), dot(M[2], M[1]), dot(M[2], M[2])),\n );\n\n // View rotation (upper-left 3x3)\n let V = mat3x3<f32>(\n viewMatrix[0].xyz,\n viewMatrix[1].xyz,\n viewMatrix[2].xyz,\n );\n\n // Transform covariance to camera space: T = V * Sigma * V^T\n let T = V * Sigma * transpose(V);\n\n // Apply Jacobian to get 2D covariance\n // cov2D = J * T * J^T (where J is 2x3, T is 3x3)\n let cov00 = J00 * J00 * T[0][0] + 2.0 * J00 * J02 * T[0][2] + J02 * J02 * T[2][2];\n let cov01 = J00 * J11 * T[0][1] + J00 * J12 * T[0][2] + J02 * J11 * T[1][2] + J02 * J12 * T[2][2];\n let cov11 = J11 * J11 * T[1][1] + 2.0 * J11 * J12 * T[1][2] + J12 * J12 * T[2][2];\n\n // Add low-pass filter to avoid aliasing (minimum 0.3px Gaussian)\n let covFiltered = vec3<f32>(cov00 + 0.3, cov01, cov11 + 0.3);\n\n return covFiltered;\n}\n\n// =============================================================================\n// Main Compression + Depth Key Compute Shader\n// =============================================================================\n\n/**\n * Compress raw splats and generate sort keys in a single pass.\n *\n * For each splat:\n * 1. Project to camera space, compute depth\n * 2. Compute 2D covariance (compressed ellipse axes)\n * 3. Pack color as RGBA8\n * 4. Pack covariance as f16x3\n * 5. Generate quantized depth key for radix sort\n * 6. Initialize sort value (splat index)\n */\n@compute @workgroup_size(256)\nfn compressAndKey(\n @builtin(global_invocation_id) globalId: vec3<u32>,\n) {\n let idx = globalId.x;\n if (idx >= uniforms.splatCount) {\n return;\n }\n\n let raw = splatsIn[idx];\n\n // Transform to camera space for depth\n let camPos = uniforms.viewMatrix * vec4<f32>(raw.pos, 1.0);\n let depth = camPos.z;\n\n // Frustum culling: skip splats behind camera\n // (They'll be sorted to the end with max depth key)\n var depthKey: u32;\n if (depth < 0.01) {\n depthKey = 0xFFFFFFFFu; // Behind camera -> max depth -> sorted last\n } else {\n // Quantize depth to 32-bit uint for radix sort\n // Use bit-cast of float: IEEE 754 floats sort correctly as uint when positive\n // (which camera-space depth always is for visible splats)\n depthKey = bitcast<u32>(depth);\n }\n\n // Compute 2D covariance (compressed ellipse axes)\n let cov2D = computeCov2D(\n raw.pos,\n raw.scale,\n raw.rot,\n uniforms.viewMatrix,\n uniforms.focalX,\n uniforms.focalY,\n );\n\n // Pack compressed splat\n var compressed: SplatCompressed;\n compressed.pos = raw.pos;\n compressed.packedColor = packRGBA8(raw.color);\n compressed.packedCov2D_01 = packF16x2(cov2D.x, cov2D.y);\n compressed.packedCov2D_2_opacity = packF16x2(cov2D.z, raw.color.a);\n compressed.depth = depth;\n compressed._pad = 0u;\n\n // Write compressed data\n splatsOut[idx] = compressed;\n\n // Write sort key-value pair\n sortKeys[idx] = depthKey;\n sortValues[idx] = idx;\n}\n";
|
|
1200
|
+
|
|
1201
|
+
// wgsl-raw:C:\Users\josep\Documents\GitHub\HoloScript\packages\engine\src\gpu\shaders\splat-render-sorted.wgsl
|
|
1202
|
+
var splat_render_sorted_default = "/**\n * Sorted Gaussian Splat Renderer\n *\n * Renders compressed, depth-sorted Gaussian splats using the output\n * of the radix sort pipeline. Reads RGBA8-packed colors and f16-packed\n * 2D covariance (compressed ellipse axes) for efficient memory bandwidth.\n *\n * Rendering approach:\n * - Instance-based quad rendering (4 vertices per splat)\n * - Back-to-front order (via radix-sorted indices)\n * - Alpha blending with premultiplied alpha\n * - 2D Gaussian falloff using projected covariance (elliptical)\n *\n * Cross-browser compatible:\n * - No f16 shader feature required\n * - No subgroup operations\n * - Standard vertex/fragment pipeline\n *\n * @version 1.0.0\n */\n\n// =============================================================================\n// Structures\n// =============================================================================\n\nstruct SplatCompressed {\n pos: vec3<f32>,\n packedColor: u32,\n packedCov2D_01: u32,\n packedCov2D_2_opacity: u32,\n depth: f32,\n _pad: u32,\n};\n\nstruct RenderUniforms {\n viewProjection: mat4x4<f32>,\n viewMatrix: mat4x4<f32>,\n cameraPosition: vec3<f32>,\n screenWidth: f32,\n screenHeight: f32,\n focalX: f32,\n focalY: f32,\n _pad: f32,\n};\n\nstruct VertexOutput {\n @builtin(position) position: vec4<f32>,\n @location(0) color: vec4<f32>,\n @location(1) conicAndOpacity: vec4<f32>, // conic.xyz + opacity\n @location(2) centerScreen: vec2<f32>,\n};\n\n// =============================================================================\n// Bindings\n// =============================================================================\n\n@group(0) @binding(0) var<uniform> uniforms: RenderUniforms;\n@group(0) @binding(1) var<storage, read> splats: array<SplatCompressed>;\n@group(0) @binding(2) var<storage, read> sortedIndices: array<u32>;\n\n// =============================================================================\n// f16 Unpacking Helpers\n// =============================================================================\n\nfn f16ToF32(h: u32) -> f32 {\n let sign = (h & 0x8000u) << 16u;\n let exponent = (h >> 10u) & 0x1Fu;\n let mantissa = h & 0x3FFu;\n\n if (exponent == 0u) {\n if (mantissa == 0u) {\n return bitcast<f32>(sign);\n }\n var m = mantissa;\n var e = 0u;\n while ((m & 0x400u) == 0u) {\n m <<= 1u;\n e++;\n }\n let newExp = (127u - 15u - e) << 23u;\n let newMant = (m & 0x3FFu) << 13u;\n return bitcast<f32>(sign | newExp | newMant);\n }\n if (exponent == 31u) {\n if (mantissa == 0u) {\n return bitcast<f32>(sign | 0x7F800000u);\n }\n return bitcast<f32>(sign | 0x7FC00000u);\n }\n\n let newExp = (exponent + 127u - 15u) << 23u;\n let newMant = mantissa << 13u;\n return bitcast<f32>(sign | newExp | newMant);\n}\n\nfn unpackF16Low(packed: u32) -> f32 {\n return f16ToF32(packed & 0xFFFFu);\n}\n\nfn unpackF16High(packed: u32) -> f32 {\n return f16ToF32((packed >> 16u) & 0xFFFFu);\n}\n\nfn unpackRGBA8(packed: u32) -> vec4<f32> {\n return vec4<f32>(\n f32(packed & 0xFFu) / 255.0,\n f32((packed >> 8u) & 0xFFu) / 255.0,\n f32((packed >> 16u) & 0xFFu) / 255.0,\n f32((packed >> 24u) & 0xFFu) / 255.0,\n );\n}\n\n// =============================================================================\n// Vertex Shader\n// =============================================================================\n\n/**\n * Renders a billboard quad for each sorted Gaussian splat.\n *\n * The quad is sized based on the 2D covariance ellipse to tightly\n * bound the Gaussian at the 3-sigma level.\n *\n * vertex_index 0..3 maps to quad corners:\n * 0: (-1, -1) 1: (1, -1) 2: (-1, 1) 3: (1, 1)\n */\n@vertex\nfn vs_main(\n @builtin(vertex_index) vertexIndex: u32,\n @builtin(instance_index) instanceIndex: u32,\n) -> VertexOutput {\n // Look up sorted splat index\n let splatIndex = sortedIndices[instanceIndex];\n let splat = splats[splatIndex];\n\n // Unpack compressed data\n let color = unpackRGBA8(splat.packedColor);\n let cov00 = unpackF16Low(splat.packedCov2D_01);\n let cov01 = unpackF16High(splat.packedCov2D_01);\n let cov11 = unpackF16Low(splat.packedCov2D_2_opacity);\n let opacity = unpackF16High(splat.packedCov2D_2_opacity);\n\n // Project center to screen space\n let clipPos = uniforms.viewProjection * vec4<f32>(splat.pos, 1.0);\n let ndcPos = clipPos.xyz / clipPos.w;\n\n // Screen-space center\n let centerScreen = vec2<f32>(\n (ndcPos.x * 0.5 + 0.5) * uniforms.screenWidth,\n (ndcPos.y * -0.5 + 0.5) * uniforms.screenHeight,\n );\n\n // Compute inverse covariance (conic) for Gaussian evaluation in fragment shader\n // For 2x2 symmetric matrix [[a, b], [b, c]]:\n // det = a*c - b*b\n // inv = [[c, -b], [-b, a]] / det\n let det = cov00 * cov11 - cov01 * cov01;\n let detSafe = max(det, 1e-6);\n let conic = vec3<f32>(cov11 / detSafe, -cov01 / detSafe, cov00 / detSafe);\n\n // Compute eigenvalues for quad sizing (ellipse bounding box)\n let mid = 0.5 * (cov00 + cov11);\n let discriminant = max(mid * mid - det, 0.0);\n let lambda1 = mid + sqrt(discriminant);\n let lambda2 = mid - sqrt(discriminant);\n\n // 3-sigma bounding radius in pixels\n let maxRadius = ceil(3.0 * sqrt(max(lambda1, 0.0)));\n\n // Quad vertex positions (billboard)\n let quadUV = vec2<f32>(\n f32(vertexIndex & 1u) * 2.0 - 1.0,\n f32((vertexIndex >> 1u) & 1u) * 2.0 - 1.0,\n );\n\n let pixelOffset = quadUV * maxRadius;\n let screenPos = centerScreen + pixelOffset;\n\n // Convert back to NDC\n let finalNdc = vec2<f32>(\n (screenPos.x / uniforms.screenWidth) * 2.0 - 1.0,\n -((screenPos.y / uniforms.screenHeight) * 2.0 - 1.0),\n );\n\n var out: VertexOutput;\n out.position = vec4<f32>(finalNdc, ndcPos.z, 1.0);\n out.color = color;\n out.conicAndOpacity = vec4<f32>(conic, opacity);\n out.centerScreen = centerScreen;\n\n return out;\n}\n\n// =============================================================================\n// Fragment Shader\n// =============================================================================\n\n/**\n * Evaluates the 2D Gaussian using the inverse covariance (conic).\n *\n * For a pixel at position p relative to the Gaussian center c:\n * power = -0.5 * (d^T * Sigma^{-1} * d)\n * alpha = opacity * exp(power)\n *\n * where d = p - c, and Sigma^{-1} is the conic (inverse covariance).\n */\n@fragment\nfn fs_main(in: VertexOutput) -> @location(0) vec4<f32> {\n // Fragment position in screen space (pixels)\n let fragScreen = in.position.xy;\n\n // Distance from Gaussian center in pixels\n let d = fragScreen - in.centerScreen;\n\n // Evaluate Gaussian: power = -0.5 * (conic.x * dx^2 + 2*conic.y * dx*dy + conic.z * dy^2)\n let power = -0.5 * (in.conicAndOpacity.x * d.x * d.x\n + 2.0 * in.conicAndOpacity.y * d.x * d.y\n + in.conicAndOpacity.z * d.y * d.y);\n\n // Clamp power to avoid numerical issues\n if (power > 0.0) {\n discard;\n }\n\n let alpha = min(0.99, in.conicAndOpacity.w * exp(power));\n\n // Discard nearly transparent fragments\n if (alpha < 1.0 / 255.0) {\n discard;\n }\n\n // Premultiplied alpha output for back-to-front compositing\n return vec4<f32>(in.color.rgb * alpha, alpha);\n}\n";
|
|
1203
|
+
|
|
1204
|
+
// src/gpu/GaussianSplatSorter.ts
|
|
1205
|
+
var RADIX_BITS = 8;
|
|
1206
|
+
var RADIX_SIZE = 256;
|
|
1207
|
+
var NUM_PASSES = 4;
|
|
1208
|
+
var BYTES_PER_COMPRESSED_SPLAT = 32;
|
|
1209
|
+
var BYTES_PER_RAW_SPLAT = 64;
|
|
1210
|
+
var GaussianSplatSorter = class {
|
|
1211
|
+
context;
|
|
1212
|
+
device;
|
|
1213
|
+
options;
|
|
1214
|
+
// Shader modules
|
|
1215
|
+
sortShaderModule = null;
|
|
1216
|
+
compressShaderModule = null;
|
|
1217
|
+
renderShaderModule = null;
|
|
1218
|
+
// Compute pipelines
|
|
1219
|
+
compressPipeline = null;
|
|
1220
|
+
histogramPipeline = null;
|
|
1221
|
+
blellochScanPipeline = null;
|
|
1222
|
+
globalPrefixPipeline = null;
|
|
1223
|
+
scatterPipeline = null;
|
|
1224
|
+
// Render pipeline
|
|
1225
|
+
renderPipeline = null;
|
|
1226
|
+
// Buffers
|
|
1227
|
+
rawSplatBuffer = null;
|
|
1228
|
+
compressedSplatBuffer = null;
|
|
1229
|
+
sortKeysA = null;
|
|
1230
|
+
sortKeysB = null;
|
|
1231
|
+
sortValuesA = null;
|
|
1232
|
+
sortValuesB = null;
|
|
1233
|
+
blockHistogramsBuffer = null;
|
|
1234
|
+
globalPrefixesBuffer = null;
|
|
1235
|
+
compressUniformBuffer = null;
|
|
1236
|
+
sortUniformBuffer = null;
|
|
1237
|
+
renderUniformBuffer = null;
|
|
1238
|
+
// Bind groups (rebuilt each frame due to ping-pong)
|
|
1239
|
+
compressBindGroup = null;
|
|
1240
|
+
// State
|
|
1241
|
+
splatCount = 0;
|
|
1242
|
+
blockCount = 0;
|
|
1243
|
+
initialized = false;
|
|
1244
|
+
constructor(context, options) {
|
|
1245
|
+
this.context = context;
|
|
1246
|
+
this.device = context.getDevice();
|
|
1247
|
+
this.options = {
|
|
1248
|
+
maxSplats: options.maxSplats,
|
|
1249
|
+
workgroupSize: options.workgroupSize ?? 256,
|
|
1250
|
+
elementsPerThread: options.elementsPerThread ?? 4,
|
|
1251
|
+
enableTimestamps: options.enableTimestamps ?? false,
|
|
1252
|
+
canvasWidth: options.canvasWidth,
|
|
1253
|
+
canvasHeight: options.canvasHeight
|
|
1254
|
+
};
|
|
1255
|
+
}
|
|
1256
|
+
// ===========================================================================
|
|
1257
|
+
// Initialization
|
|
1258
|
+
// ===========================================================================
|
|
1259
|
+
/**
|
|
1260
|
+
* Initialize all GPU resources: shaders, pipelines, buffers.
|
|
1261
|
+
*
|
|
1262
|
+
* Must be called before any sort or render operations.
|
|
1263
|
+
*/
|
|
1264
|
+
async initialize() {
|
|
1265
|
+
if (this.initialized) {
|
|
1266
|
+
console.warn("GaussianSplatSorter already initialized");
|
|
1267
|
+
return;
|
|
1268
|
+
}
|
|
1269
|
+
await this.createShaderModules();
|
|
1270
|
+
this.createComputePipelines();
|
|
1271
|
+
this.createRenderPipeline();
|
|
1272
|
+
this.createBuffers();
|
|
1273
|
+
this.initialized = true;
|
|
1274
|
+
}
|
|
1275
|
+
/**
|
|
1276
|
+
* Create and validate shader modules with cross-browser error reporting.
|
|
1277
|
+
*/
|
|
1278
|
+
async createShaderModules() {
|
|
1279
|
+
const createModule = async (code, label) => {
|
|
1280
|
+
const module2 = this.device.createShaderModule({ label, code });
|
|
1281
|
+
try {
|
|
1282
|
+
const info = await module2.getCompilationInfo();
|
|
1283
|
+
for (const msg of info.messages) {
|
|
1284
|
+
if (msg.type === "error") {
|
|
1285
|
+
throw new Error(
|
|
1286
|
+
`Shader compilation error in ${label}: ${msg.message} (line ${msg.lineNum})`
|
|
1287
|
+
);
|
|
1288
|
+
}
|
|
1289
|
+
if (msg.type === "warning") {
|
|
1290
|
+
console.warn(`Shader warning in ${label}: ${msg.message} (line ${msg.lineNum})`);
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
} catch (e) {
|
|
1294
|
+
if (e instanceof Error && e.message?.includes("Shader compilation error")) {
|
|
1295
|
+
throw e;
|
|
1296
|
+
}
|
|
1297
|
+
console.warn(
|
|
1298
|
+
`Could not validate shader ${label}:`,
|
|
1299
|
+
e instanceof Error ? e.message : String(e)
|
|
1300
|
+
);
|
|
1301
|
+
}
|
|
1302
|
+
return module2;
|
|
1303
|
+
};
|
|
1304
|
+
this.sortShaderModule = await createModule(radix_sort_default, "radix-sort");
|
|
1305
|
+
this.compressShaderModule = await createModule(splat_compress_default, "splat-compress");
|
|
1306
|
+
this.renderShaderModule = await createModule(splat_render_sorted_default, "splat-render-sorted");
|
|
1307
|
+
}
|
|
1308
|
+
/**
|
|
1309
|
+
* Create all compute pipelines for the sort.
|
|
1310
|
+
*/
|
|
1311
|
+
createComputePipelines() {
|
|
1312
|
+
if (!this.sortShaderModule || !this.compressShaderModule) {
|
|
1313
|
+
throw new Error("Shader modules not created");
|
|
1314
|
+
}
|
|
1315
|
+
this.compressPipeline = this.device.createComputePipeline({
|
|
1316
|
+
label: "splat-compress-pipeline",
|
|
1317
|
+
layout: "auto",
|
|
1318
|
+
compute: {
|
|
1319
|
+
module: this.compressShaderModule,
|
|
1320
|
+
entryPoint: "compressAndKey"
|
|
1321
|
+
}
|
|
1322
|
+
});
|
|
1323
|
+
this.histogramPipeline = this.device.createComputePipeline({
|
|
1324
|
+
label: "radix-histogram-pipeline",
|
|
1325
|
+
layout: "auto",
|
|
1326
|
+
compute: {
|
|
1327
|
+
module: this.sortShaderModule,
|
|
1328
|
+
entryPoint: "buildHistogram"
|
|
1329
|
+
}
|
|
1330
|
+
});
|
|
1331
|
+
this.blellochScanPipeline = this.device.createComputePipeline({
|
|
1332
|
+
label: "blelloch-scan-pipeline",
|
|
1333
|
+
layout: "auto",
|
|
1334
|
+
compute: {
|
|
1335
|
+
module: this.sortShaderModule,
|
|
1336
|
+
entryPoint: "blellochScan"
|
|
1337
|
+
}
|
|
1338
|
+
});
|
|
1339
|
+
this.globalPrefixPipeline = this.device.createComputePipeline({
|
|
1340
|
+
label: "global-prefix-pipeline",
|
|
1341
|
+
layout: "auto",
|
|
1342
|
+
compute: {
|
|
1343
|
+
module: this.sortShaderModule,
|
|
1344
|
+
entryPoint: "globalPrefixScan"
|
|
1345
|
+
}
|
|
1346
|
+
});
|
|
1347
|
+
this.scatterPipeline = this.device.createComputePipeline({
|
|
1348
|
+
label: "radix-scatter-pipeline",
|
|
1349
|
+
layout: "auto",
|
|
1350
|
+
compute: {
|
|
1351
|
+
module: this.sortShaderModule,
|
|
1352
|
+
entryPoint: "scatter"
|
|
1353
|
+
}
|
|
1354
|
+
});
|
|
1355
|
+
}
|
|
1356
|
+
/**
|
|
1357
|
+
* Create render pipeline for sorted splat rendering.
|
|
1358
|
+
*/
|
|
1359
|
+
createRenderPipeline() {
|
|
1360
|
+
if (!this.renderShaderModule) {
|
|
1361
|
+
throw new Error("Render shader module not created");
|
|
1362
|
+
}
|
|
1363
|
+
this.renderPipeline = this.device.createRenderPipeline({
|
|
1364
|
+
label: "sorted-splat-render-pipeline",
|
|
1365
|
+
layout: "auto",
|
|
1366
|
+
vertex: {
|
|
1367
|
+
module: this.renderShaderModule,
|
|
1368
|
+
entryPoint: "vs_main",
|
|
1369
|
+
buffers: []
|
|
1370
|
+
// All data comes from storage buffers
|
|
1371
|
+
},
|
|
1372
|
+
fragment: {
|
|
1373
|
+
module: this.renderShaderModule,
|
|
1374
|
+
entryPoint: "fs_main",
|
|
1375
|
+
targets: [
|
|
1376
|
+
{
|
|
1377
|
+
format: navigator.gpu.getPreferredCanvasFormat(),
|
|
1378
|
+
blend: {
|
|
1379
|
+
// Premultiplied alpha blending (back-to-front)
|
|
1380
|
+
color: {
|
|
1381
|
+
srcFactor: "one",
|
|
1382
|
+
dstFactor: "one-minus-src-alpha",
|
|
1383
|
+
operation: "add"
|
|
1384
|
+
},
|
|
1385
|
+
alpha: {
|
|
1386
|
+
srcFactor: "one",
|
|
1387
|
+
dstFactor: "one-minus-src-alpha",
|
|
1388
|
+
operation: "add"
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
}
|
|
1392
|
+
]
|
|
1393
|
+
},
|
|
1394
|
+
primitive: {
|
|
1395
|
+
topology: "triangle-strip",
|
|
1396
|
+
stripIndexFormat: void 0
|
|
1397
|
+
},
|
|
1398
|
+
depthStencil: {
|
|
1399
|
+
format: "depth24plus",
|
|
1400
|
+
// Disable depth write for sorted splats (they're already in order)
|
|
1401
|
+
depthWriteEnabled: false,
|
|
1402
|
+
depthCompare: "always"
|
|
1403
|
+
}
|
|
1404
|
+
});
|
|
1405
|
+
}
|
|
1406
|
+
/**
|
|
1407
|
+
* Allocate all GPU buffers.
|
|
1408
|
+
*/
|
|
1409
|
+
createBuffers() {
|
|
1410
|
+
const maxSplats = this.options.maxSplats;
|
|
1411
|
+
const blockSize = this.options.workgroupSize * this.options.elementsPerThread;
|
|
1412
|
+
const maxBlocks = Math.ceil(maxSplats / blockSize);
|
|
1413
|
+
this.rawSplatBuffer = this.device.createBuffer({
|
|
1414
|
+
label: "raw-splats",
|
|
1415
|
+
size: maxSplats * BYTES_PER_RAW_SPLAT,
|
|
1416
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
1417
|
+
});
|
|
1418
|
+
this.compressedSplatBuffer = this.device.createBuffer({
|
|
1419
|
+
label: "compressed-splats",
|
|
1420
|
+
size: maxSplats * BYTES_PER_COMPRESSED_SPLAT,
|
|
1421
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
1422
|
+
});
|
|
1423
|
+
const sortBufferSize = maxSplats * 4;
|
|
1424
|
+
this.sortKeysA = this.createSortBuffer("sort-keys-a", sortBufferSize);
|
|
1425
|
+
this.sortKeysB = this.createSortBuffer("sort-keys-b", sortBufferSize);
|
|
1426
|
+
this.sortValuesA = this.createSortBuffer("sort-values-a", sortBufferSize);
|
|
1427
|
+
this.sortValuesB = this.createSortBuffer("sort-values-b", sortBufferSize);
|
|
1428
|
+
this.blockHistogramsBuffer = this.device.createBuffer({
|
|
1429
|
+
label: "block-histograms",
|
|
1430
|
+
size: maxBlocks * RADIX_SIZE * 4,
|
|
1431
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
1432
|
+
});
|
|
1433
|
+
this.globalPrefixesBuffer = this.device.createBuffer({
|
|
1434
|
+
label: "global-prefixes",
|
|
1435
|
+
size: RADIX_SIZE * 4,
|
|
1436
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
1437
|
+
});
|
|
1438
|
+
this.compressUniformBuffer = this.device.createBuffer({
|
|
1439
|
+
label: "compress-uniforms",
|
|
1440
|
+
size: 160,
|
|
1441
|
+
// 2 * mat4x4 (128) + 4 floats (16) + 4 u32 (16) = 160
|
|
1442
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
1443
|
+
});
|
|
1444
|
+
this.sortUniformBuffer = this.device.createBuffer({
|
|
1445
|
+
label: "sort-uniforms",
|
|
1446
|
+
size: 16,
|
|
1447
|
+
// totalCount (4) + bitOffset (4) + blockCount (4) + pad (4)
|
|
1448
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
1449
|
+
});
|
|
1450
|
+
this.renderUniformBuffer = this.device.createBuffer({
|
|
1451
|
+
label: "render-uniforms",
|
|
1452
|
+
size: 160,
|
|
1453
|
+
// viewProj (64) + view (64) + camPos (12) + 5 floats (20)
|
|
1454
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
1455
|
+
});
|
|
1456
|
+
}
|
|
1457
|
+
createSortBuffer(label, size) {
|
|
1458
|
+
return this.device.createBuffer({
|
|
1459
|
+
label,
|
|
1460
|
+
size,
|
|
1461
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
|
|
1462
|
+
});
|
|
1463
|
+
}
|
|
1464
|
+
// ===========================================================================
|
|
1465
|
+
// Data Upload
|
|
1466
|
+
// ===========================================================================
|
|
1467
|
+
/**
|
|
1468
|
+
* Upload raw splat data to the GPU.
|
|
1469
|
+
*
|
|
1470
|
+
* Expected layout per splat (64 bytes):
|
|
1471
|
+
* position: vec3<f32> (12 bytes)
|
|
1472
|
+
* scale: vec3<f32> (12 bytes)
|
|
1473
|
+
* rotation: vec4<f32> (16 bytes) - quaternion (w, x, y, z)
|
|
1474
|
+
* color: vec4<f32> (16 bytes) - RGBA [0..1]
|
|
1475
|
+
* padding: (8 bytes)
|
|
1476
|
+
*
|
|
1477
|
+
* @param data Raw splat data as Float32Array
|
|
1478
|
+
* @param count Number of splats (not bytes)
|
|
1479
|
+
*/
|
|
1480
|
+
uploadSplatData(data, count) {
|
|
1481
|
+
if (!this.rawSplatBuffer) {
|
|
1482
|
+
throw new Error("Buffers not initialized");
|
|
1483
|
+
}
|
|
1484
|
+
if (count > this.options.maxSplats) {
|
|
1485
|
+
throw new Error(`Splat count ${count} exceeds max ${this.options.maxSplats}`);
|
|
1486
|
+
}
|
|
1487
|
+
this.splatCount = count;
|
|
1488
|
+
this.blockCount = Math.ceil(
|
|
1489
|
+
count / (this.options.workgroupSize * this.options.elementsPerThread)
|
|
1490
|
+
);
|
|
1491
|
+
this.device.queue.writeBuffer(
|
|
1492
|
+
this.rawSplatBuffer,
|
|
1493
|
+
0,
|
|
1494
|
+
data.buffer,
|
|
1495
|
+
data.byteOffset,
|
|
1496
|
+
count * BYTES_PER_RAW_SPLAT
|
|
1497
|
+
);
|
|
1498
|
+
}
|
|
1499
|
+
// ===========================================================================
|
|
1500
|
+
// Sort Execution
|
|
1501
|
+
// ===========================================================================
|
|
1502
|
+
/**
|
|
1503
|
+
* Execute the full sort pipeline: compress -> 4-pass radix sort.
|
|
1504
|
+
*
|
|
1505
|
+
* Should be called each frame before rendering when the camera moves.
|
|
1506
|
+
* Uses a single command encoder for all passes to minimize CPU overhead.
|
|
1507
|
+
*
|
|
1508
|
+
* @param camera Current camera state for depth computation
|
|
1509
|
+
* @param commandEncoder Optional encoder to chain with other passes
|
|
1510
|
+
* @returns Command encoder with all sort passes recorded
|
|
1511
|
+
*/
|
|
1512
|
+
sort(camera, commandEncoder) {
|
|
1513
|
+
if (!this.initialized) {
|
|
1514
|
+
throw new Error("Not initialized. Call initialize() first.");
|
|
1515
|
+
}
|
|
1516
|
+
const encoder = commandEncoder ?? this.device.createCommandEncoder({
|
|
1517
|
+
label: "gaussian-splat-sort-encoder"
|
|
1518
|
+
});
|
|
1519
|
+
this.recordCompressPass(encoder, camera);
|
|
1520
|
+
for (let pass = 0; pass < NUM_PASSES; pass++) {
|
|
1521
|
+
const bitOffset = pass * RADIX_BITS;
|
|
1522
|
+
const readFromA = pass % 2 === 0;
|
|
1523
|
+
this.recordSortPass(encoder, bitOffset, readFromA);
|
|
1524
|
+
}
|
|
1525
|
+
return encoder;
|
|
1526
|
+
}
|
|
1527
|
+
/**
|
|
1528
|
+
* Record compression compute pass.
|
|
1529
|
+
*/
|
|
1530
|
+
recordCompressPass(encoder, camera) {
|
|
1531
|
+
if (!this.compressPipeline || !this.compressUniformBuffer) {
|
|
1532
|
+
throw new Error("Compress pipeline not created");
|
|
1533
|
+
}
|
|
1534
|
+
const uniforms = new Float32Array(40);
|
|
1535
|
+
uniforms.set(camera.viewMatrix, 0);
|
|
1536
|
+
uniforms.set(camera.projMatrix, 16);
|
|
1537
|
+
const uintView = new Uint32Array(uniforms.buffer);
|
|
1538
|
+
uniforms[32] = this.options.canvasWidth;
|
|
1539
|
+
uniforms[33] = this.options.canvasHeight;
|
|
1540
|
+
uniforms[34] = camera.focalX;
|
|
1541
|
+
uniforms[35] = camera.focalY;
|
|
1542
|
+
uintView[36] = this.splatCount;
|
|
1543
|
+
uintView[37] = 0;
|
|
1544
|
+
uintView[38] = 0;
|
|
1545
|
+
uintView[39] = 0;
|
|
1546
|
+
this.device.queue.writeBuffer(this.compressUniformBuffer, 0, uniforms);
|
|
1547
|
+
this.compressBindGroup = this.device.createBindGroup({
|
|
1548
|
+
label: "compress-bind-group",
|
|
1549
|
+
layout: this.compressPipeline.getBindGroupLayout(0),
|
|
1550
|
+
entries: [
|
|
1551
|
+
{ binding: 0, resource: { buffer: this.compressUniformBuffer } },
|
|
1552
|
+
{ binding: 1, resource: { buffer: this.rawSplatBuffer } },
|
|
1553
|
+
{ binding: 2, resource: { buffer: this.compressedSplatBuffer } },
|
|
1554
|
+
{ binding: 3, resource: { buffer: this.sortKeysA } },
|
|
1555
|
+
{ binding: 4, resource: { buffer: this.sortValuesA } }
|
|
1556
|
+
]
|
|
1557
|
+
});
|
|
1558
|
+
const computePass = encoder.beginComputePass({ label: "compress-pass" });
|
|
1559
|
+
computePass.setPipeline(this.compressPipeline);
|
|
1560
|
+
computePass.setBindGroup(0, this.compressBindGroup);
|
|
1561
|
+
computePass.dispatchWorkgroups(Math.ceil(this.splatCount / this.options.workgroupSize));
|
|
1562
|
+
computePass.end();
|
|
1563
|
+
}
|
|
1564
|
+
/**
|
|
1565
|
+
* Record one radix sort pass (histogram + scan + scatter).
|
|
1566
|
+
*/
|
|
1567
|
+
recordSortPass(encoder, bitOffset, readFromA) {
|
|
1568
|
+
const keysIn = readFromA ? this.sortKeysA : this.sortKeysB;
|
|
1569
|
+
const keysOut = readFromA ? this.sortKeysB : this.sortKeysA;
|
|
1570
|
+
const valuesIn = readFromA ? this.sortValuesA : this.sortValuesB;
|
|
1571
|
+
const valuesOut = readFromA ? this.sortValuesB : this.sortValuesA;
|
|
1572
|
+
const sortUniforms = new Uint32Array([
|
|
1573
|
+
this.splatCount,
|
|
1574
|
+
bitOffset,
|
|
1575
|
+
this.blockCount,
|
|
1576
|
+
0
|
|
1577
|
+
// pad
|
|
1578
|
+
]);
|
|
1579
|
+
this.device.queue.writeBuffer(this.sortUniformBuffer, 0, sortUniforms);
|
|
1580
|
+
const histBindGroup = this.device.createBindGroup({
|
|
1581
|
+
label: `histogram-bind-group-pass-${bitOffset}`,
|
|
1582
|
+
layout: this.histogramPipeline.getBindGroupLayout(0),
|
|
1583
|
+
entries: [
|
|
1584
|
+
{ binding: 0, resource: { buffer: this.sortUniformBuffer } },
|
|
1585
|
+
{ binding: 1, resource: { buffer: keysIn } },
|
|
1586
|
+
{ binding: 2, resource: { buffer: keysOut } },
|
|
1587
|
+
{ binding: 3, resource: { buffer: valuesIn } },
|
|
1588
|
+
{ binding: 4, resource: { buffer: valuesOut } },
|
|
1589
|
+
{ binding: 5, resource: { buffer: this.blockHistogramsBuffer } },
|
|
1590
|
+
{ binding: 6, resource: { buffer: this.globalPrefixesBuffer } }
|
|
1591
|
+
]
|
|
1592
|
+
});
|
|
1593
|
+
const histPass = encoder.beginComputePass({ label: `histogram-${bitOffset}` });
|
|
1594
|
+
histPass.setPipeline(this.histogramPipeline);
|
|
1595
|
+
histPass.setBindGroup(0, histBindGroup);
|
|
1596
|
+
histPass.dispatchWorkgroups(this.blockCount);
|
|
1597
|
+
histPass.end();
|
|
1598
|
+
const scanBindGroup = this.device.createBindGroup({
|
|
1599
|
+
label: `blelloch-scan-bind-group-pass-${bitOffset}`,
|
|
1600
|
+
layout: this.blellochScanPipeline.getBindGroupLayout(0),
|
|
1601
|
+
entries: [
|
|
1602
|
+
{ binding: 0, resource: { buffer: this.sortUniformBuffer } },
|
|
1603
|
+
{ binding: 1, resource: { buffer: keysIn } },
|
|
1604
|
+
{ binding: 2, resource: { buffer: keysOut } },
|
|
1605
|
+
{ binding: 3, resource: { buffer: valuesIn } },
|
|
1606
|
+
{ binding: 4, resource: { buffer: valuesOut } },
|
|
1607
|
+
{ binding: 5, resource: { buffer: this.blockHistogramsBuffer } },
|
|
1608
|
+
{ binding: 6, resource: { buffer: this.globalPrefixesBuffer } }
|
|
1609
|
+
]
|
|
1610
|
+
});
|
|
1611
|
+
const scanPass = encoder.beginComputePass({ label: `blelloch-scan-${bitOffset}` });
|
|
1612
|
+
scanPass.setPipeline(this.blellochScanPipeline);
|
|
1613
|
+
scanPass.setBindGroup(0, scanBindGroup);
|
|
1614
|
+
scanPass.dispatchWorkgroups(RADIX_SIZE);
|
|
1615
|
+
scanPass.end();
|
|
1616
|
+
const globalPrefixBindGroup = this.device.createBindGroup({
|
|
1617
|
+
label: `global-prefix-bind-group-pass-${bitOffset}`,
|
|
1618
|
+
layout: this.globalPrefixPipeline.getBindGroupLayout(0),
|
|
1619
|
+
entries: [
|
|
1620
|
+
{ binding: 0, resource: { buffer: this.sortUniformBuffer } },
|
|
1621
|
+
{ binding: 1, resource: { buffer: keysIn } },
|
|
1622
|
+
{ binding: 2, resource: { buffer: keysOut } },
|
|
1623
|
+
{ binding: 3, resource: { buffer: valuesIn } },
|
|
1624
|
+
{ binding: 4, resource: { buffer: valuesOut } },
|
|
1625
|
+
{ binding: 5, resource: { buffer: this.blockHistogramsBuffer } },
|
|
1626
|
+
{ binding: 6, resource: { buffer: this.globalPrefixesBuffer } }
|
|
1627
|
+
]
|
|
1628
|
+
});
|
|
1629
|
+
const globalPrefixPass = encoder.beginComputePass({ label: `global-prefix-${bitOffset}` });
|
|
1630
|
+
globalPrefixPass.setPipeline(this.globalPrefixPipeline);
|
|
1631
|
+
globalPrefixPass.setBindGroup(0, globalPrefixBindGroup);
|
|
1632
|
+
globalPrefixPass.dispatchWorkgroups(1);
|
|
1633
|
+
globalPrefixPass.end();
|
|
1634
|
+
const scatterBindGroup = this.device.createBindGroup({
|
|
1635
|
+
label: `scatter-bind-group-pass-${bitOffset}`,
|
|
1636
|
+
layout: this.scatterPipeline.getBindGroupLayout(0),
|
|
1637
|
+
entries: [
|
|
1638
|
+
{ binding: 0, resource: { buffer: this.sortUniformBuffer } },
|
|
1639
|
+
{ binding: 1, resource: { buffer: keysIn } },
|
|
1640
|
+
{ binding: 2, resource: { buffer: keysOut } },
|
|
1641
|
+
{ binding: 3, resource: { buffer: valuesIn } },
|
|
1642
|
+
{ binding: 4, resource: { buffer: valuesOut } },
|
|
1643
|
+
{ binding: 5, resource: { buffer: this.blockHistogramsBuffer } },
|
|
1644
|
+
{ binding: 6, resource: { buffer: this.globalPrefixesBuffer } }
|
|
1645
|
+
]
|
|
1646
|
+
});
|
|
1647
|
+
const scatterPass = encoder.beginComputePass({ label: `scatter-${bitOffset}` });
|
|
1648
|
+
scatterPass.setPipeline(this.scatterPipeline);
|
|
1649
|
+
scatterPass.setBindGroup(0, scatterBindGroup);
|
|
1650
|
+
scatterPass.dispatchWorkgroups(this.blockCount);
|
|
1651
|
+
scatterPass.end();
|
|
1652
|
+
}
|
|
1653
|
+
// ===========================================================================
|
|
1654
|
+
// Rendering
|
|
1655
|
+
// ===========================================================================
|
|
1656
|
+
/**
|
|
1657
|
+
* Record render pass for sorted Gaussian splats.
|
|
1658
|
+
*
|
|
1659
|
+
* @param encoder Command encoder to record into
|
|
1660
|
+
* @param camera Camera state for rendering
|
|
1661
|
+
* @param colorView Color attachment view
|
|
1662
|
+
* @param depthView Depth attachment view
|
|
1663
|
+
* @param clearColor Optional clear color (default: transparent black)
|
|
1664
|
+
*/
|
|
1665
|
+
recordRenderPass(encoder, camera, colorView, depthView, clearColor) {
|
|
1666
|
+
if (!this.renderPipeline || !this.renderUniformBuffer) {
|
|
1667
|
+
throw new Error("Render pipeline not created");
|
|
1668
|
+
}
|
|
1669
|
+
const renderUniforms = new Float32Array(40);
|
|
1670
|
+
renderUniforms.set(camera.viewProjectionMatrix, 0);
|
|
1671
|
+
renderUniforms.set(camera.viewMatrix, 16);
|
|
1672
|
+
renderUniforms[32] = camera.cameraPosition[0];
|
|
1673
|
+
renderUniforms[33] = camera.cameraPosition[1];
|
|
1674
|
+
renderUniforms[34] = camera.cameraPosition[2];
|
|
1675
|
+
renderUniforms[35] = this.options.canvasWidth;
|
|
1676
|
+
renderUniforms[36] = this.options.canvasHeight;
|
|
1677
|
+
renderUniforms[37] = camera.focalX;
|
|
1678
|
+
renderUniforms[38] = camera.focalY;
|
|
1679
|
+
renderUniforms[39] = 0;
|
|
1680
|
+
this.device.queue.writeBuffer(this.renderUniformBuffer, 0, renderUniforms);
|
|
1681
|
+
const sortedIndicesBuffer = this.sortValuesA;
|
|
1682
|
+
const renderBindGroup = this.device.createBindGroup({
|
|
1683
|
+
label: "sorted-splat-render-bind-group",
|
|
1684
|
+
layout: this.renderPipeline.getBindGroupLayout(0),
|
|
1685
|
+
entries: [
|
|
1686
|
+
{ binding: 0, resource: { buffer: this.renderUniformBuffer } },
|
|
1687
|
+
{ binding: 1, resource: { buffer: this.compressedSplatBuffer } },
|
|
1688
|
+
{ binding: 2, resource: { buffer: sortedIndicesBuffer } }
|
|
1689
|
+
]
|
|
1690
|
+
});
|
|
1691
|
+
const renderPass = encoder.beginRenderPass({
|
|
1692
|
+
label: "sorted-splat-render-pass",
|
|
1693
|
+
colorAttachments: [
|
|
1694
|
+
{
|
|
1695
|
+
view: colorView,
|
|
1696
|
+
clearValue: clearColor ?? { r: 0, g: 0, b: 0, a: 0 },
|
|
1697
|
+
loadOp: clearColor ? "clear" : "load",
|
|
1698
|
+
storeOp: "store"
|
|
1699
|
+
}
|
|
1700
|
+
],
|
|
1701
|
+
depthStencilAttachment: {
|
|
1702
|
+
view: depthView,
|
|
1703
|
+
depthClearValue: 1,
|
|
1704
|
+
depthLoadOp: "clear",
|
|
1705
|
+
depthStoreOp: "store"
|
|
1706
|
+
}
|
|
1707
|
+
});
|
|
1708
|
+
renderPass.setPipeline(this.renderPipeline);
|
|
1709
|
+
renderPass.setBindGroup(0, renderBindGroup);
|
|
1710
|
+
renderPass.draw(4, this.splatCount);
|
|
1711
|
+
renderPass.end();
|
|
1712
|
+
}
|
|
1713
|
+
/**
|
|
1714
|
+
* Execute full frame: sort + render in a single command submission.
|
|
1715
|
+
*
|
|
1716
|
+
* This is the main per-frame method for most use cases.
|
|
1717
|
+
*
|
|
1718
|
+
* @param camera Current camera state
|
|
1719
|
+
* @param colorView Color attachment view
|
|
1720
|
+
* @param depthView Depth attachment view
|
|
1721
|
+
* @param clearColor Optional clear color
|
|
1722
|
+
*/
|
|
1723
|
+
frame(camera, colorView, depthView, clearColor) {
|
|
1724
|
+
const encoder = this.device.createCommandEncoder({
|
|
1725
|
+
label: "gaussian-splat-frame-encoder"
|
|
1726
|
+
});
|
|
1727
|
+
this.sort(camera, encoder);
|
|
1728
|
+
this.recordRenderPass(encoder, camera, colorView, depthView, clearColor);
|
|
1729
|
+
this.device.queue.submit([encoder.finish()]);
|
|
1730
|
+
}
|
|
1731
|
+
// ===========================================================================
|
|
1732
|
+
// Statistics & Debugging
|
|
1733
|
+
// ===========================================================================
|
|
1734
|
+
/**
|
|
1735
|
+
* Get current sort statistics.
|
|
1736
|
+
*/
|
|
1737
|
+
getStats() {
|
|
1738
|
+
return {
|
|
1739
|
+
splatCount: this.splatCount,
|
|
1740
|
+
blockCount: this.blockCount,
|
|
1741
|
+
memoryUsageBytes: this.getMemoryUsage()
|
|
1742
|
+
};
|
|
1743
|
+
}
|
|
1744
|
+
/**
|
|
1745
|
+
* Calculate total GPU memory usage in bytes.
|
|
1746
|
+
*/
|
|
1747
|
+
getMemoryUsage() {
|
|
1748
|
+
const maxSplats = this.options.maxSplats;
|
|
1749
|
+
const maxBlocks = Math.ceil(
|
|
1750
|
+
maxSplats / (this.options.workgroupSize * this.options.elementsPerThread)
|
|
1751
|
+
);
|
|
1752
|
+
return maxSplats * BYTES_PER_RAW_SPLAT + // raw splats
|
|
1753
|
+
maxSplats * BYTES_PER_COMPRESSED_SPLAT + // compressed splats
|
|
1754
|
+
maxSplats * 4 * 4 + // 2x keys + 2x values (u32 each)
|
|
1755
|
+
maxBlocks * RADIX_SIZE * 4 + // block histograms
|
|
1756
|
+
RADIX_SIZE * 4 + // global prefixes
|
|
1757
|
+
160 + 16 + 160;
|
|
1758
|
+
}
|
|
1759
|
+
/**
|
|
1760
|
+
* Get the sorted index buffer (for external rendering integration).
|
|
1761
|
+
*
|
|
1762
|
+
* After sort(), the sorted indices are in sortValuesA (for even pass count).
|
|
1763
|
+
*/
|
|
1764
|
+
getSortedIndicesBuffer() {
|
|
1765
|
+
if (!this.sortValuesA) {
|
|
1766
|
+
throw new Error("Buffers not initialized");
|
|
1767
|
+
}
|
|
1768
|
+
return this.sortValuesA;
|
|
1769
|
+
}
|
|
1770
|
+
/**
|
|
1771
|
+
* Get the compressed splat buffer (for external rendering integration).
|
|
1772
|
+
*/
|
|
1773
|
+
getCompressedSplatBuffer() {
|
|
1774
|
+
if (!this.compressedSplatBuffer) {
|
|
1775
|
+
throw new Error("Buffers not initialized");
|
|
1776
|
+
}
|
|
1777
|
+
return this.compressedSplatBuffer;
|
|
1778
|
+
}
|
|
1779
|
+
/**
|
|
1780
|
+
* Update canvas dimensions (e.g., on resize).
|
|
1781
|
+
*/
|
|
1782
|
+
updateDimensions(width, height) {
|
|
1783
|
+
this.options.canvasWidth = width;
|
|
1784
|
+
this.options.canvasHeight = height;
|
|
1785
|
+
}
|
|
1786
|
+
// ===========================================================================
|
|
1787
|
+
// Cleanup
|
|
1788
|
+
// ===========================================================================
|
|
1789
|
+
/**
|
|
1790
|
+
* Destroy all GPU resources.
|
|
1791
|
+
*/
|
|
1792
|
+
destroy() {
|
|
1793
|
+
const buffers = [
|
|
1794
|
+
this.rawSplatBuffer,
|
|
1795
|
+
this.compressedSplatBuffer,
|
|
1796
|
+
this.sortKeysA,
|
|
1797
|
+
this.sortKeysB,
|
|
1798
|
+
this.sortValuesA,
|
|
1799
|
+
this.sortValuesB,
|
|
1800
|
+
this.blockHistogramsBuffer,
|
|
1801
|
+
this.globalPrefixesBuffer,
|
|
1802
|
+
this.compressUniformBuffer,
|
|
1803
|
+
this.sortUniformBuffer,
|
|
1804
|
+
this.renderUniformBuffer
|
|
1805
|
+
];
|
|
1806
|
+
for (const buffer of buffers) {
|
|
1807
|
+
buffer?.destroy();
|
|
1808
|
+
}
|
|
1809
|
+
this.rawSplatBuffer = null;
|
|
1810
|
+
this.compressedSplatBuffer = null;
|
|
1811
|
+
this.sortKeysA = null;
|
|
1812
|
+
this.sortKeysB = null;
|
|
1813
|
+
this.sortValuesA = null;
|
|
1814
|
+
this.sortValuesB = null;
|
|
1815
|
+
this.blockHistogramsBuffer = null;
|
|
1816
|
+
this.globalPrefixesBuffer = null;
|
|
1817
|
+
this.compressUniformBuffer = null;
|
|
1818
|
+
this.sortUniformBuffer = null;
|
|
1819
|
+
this.renderUniformBuffer = null;
|
|
1820
|
+
this.sortShaderModule = null;
|
|
1821
|
+
this.compressShaderModule = null;
|
|
1822
|
+
this.renderShaderModule = null;
|
|
1823
|
+
this.compressPipeline = null;
|
|
1824
|
+
this.histogramPipeline = null;
|
|
1825
|
+
this.blellochScanPipeline = null;
|
|
1826
|
+
this.globalPrefixPipeline = null;
|
|
1827
|
+
this.scatterPipeline = null;
|
|
1828
|
+
this.renderPipeline = null;
|
|
1829
|
+
this.initialized = false;
|
|
1830
|
+
}
|
|
1831
|
+
};
|
|
1832
|
+
async function createGaussianSplatSorter(options) {
|
|
1833
|
+
const { WebGPUContext: WebGPUContext2 } = await Promise.resolve().then(() => (init_WebGPUContext(), WebGPUContext_exports));
|
|
1834
|
+
const context = new WebGPUContext2(options.contextOptions);
|
|
1835
|
+
await context.initialize();
|
|
1836
|
+
if (!context.isSupported()) {
|
|
1837
|
+
throw new Error("WebGPU not supported - GaussianSplatSorter requires WebGPU");
|
|
1838
|
+
}
|
|
1839
|
+
const sorter = new GaussianSplatSorter(context, options);
|
|
1840
|
+
await sorter.initialize();
|
|
1841
|
+
return sorter;
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1844
|
+
// src/gpu/InstancedRenderer.ts
|
|
1845
|
+
var InstancedRenderer = class {
|
|
1846
|
+
context;
|
|
1847
|
+
device;
|
|
1848
|
+
canvas;
|
|
1849
|
+
gpuContext = null;
|
|
1850
|
+
options;
|
|
1851
|
+
// Rendering resources
|
|
1852
|
+
pipeline = null;
|
|
1853
|
+
vertexBuffer = null;
|
|
1854
|
+
indexBuffer = null;
|
|
1855
|
+
instanceBuffer = null;
|
|
1856
|
+
uniformBuffer = null;
|
|
1857
|
+
// Geometry data
|
|
1858
|
+
indexCount = 0;
|
|
1859
|
+
vertexCount = 0;
|
|
1860
|
+
// State
|
|
1861
|
+
lastFrameTime = 0;
|
|
1862
|
+
frameCount = 0;
|
|
1863
|
+
constructor(context, canvas, options) {
|
|
1864
|
+
this.context = context;
|
|
1865
|
+
this.device = context.getDevice();
|
|
1866
|
+
this.canvas = canvas;
|
|
1867
|
+
this.options = {
|
|
1868
|
+
maxParticles: options.maxParticles,
|
|
1869
|
+
sphereSegments: options.sphereSegments ?? 16,
|
|
1870
|
+
enableLOD: options.enableLOD ?? true,
|
|
1871
|
+
lodDistances: options.lodDistances ?? [20, 50, 100],
|
|
1872
|
+
enableFrustumCulling: options.enableFrustumCulling ?? true
|
|
1873
|
+
};
|
|
1874
|
+
}
|
|
1875
|
+
/**
|
|
1876
|
+
* Initialize renderer
|
|
1877
|
+
*/
|
|
1878
|
+
async initialize() {
|
|
1879
|
+
this.gpuContext = this.canvas.getContext("webgpu");
|
|
1880
|
+
if (!this.gpuContext) {
|
|
1881
|
+
throw new Error("Failed to get WebGPU canvas context");
|
|
1882
|
+
}
|
|
1883
|
+
const canvasFormat = navigator.gpu.getPreferredCanvasFormat();
|
|
1884
|
+
this.gpuContext.configure({
|
|
1885
|
+
device: this.device,
|
|
1886
|
+
format: canvasFormat,
|
|
1887
|
+
alphaMode: "opaque"
|
|
1888
|
+
});
|
|
1889
|
+
this.createSphereGeometry();
|
|
1890
|
+
this.createBuffers();
|
|
1891
|
+
this.createRenderPipeline(canvasFormat);
|
|
1892
|
+
}
|
|
1893
|
+
/**
|
|
1894
|
+
* Create sphere geometry
|
|
1895
|
+
*/
|
|
1896
|
+
createSphereGeometry() {
|
|
1897
|
+
const segments = this.options.sphereSegments;
|
|
1898
|
+
const vertices = [];
|
|
1899
|
+
const indices = [];
|
|
1900
|
+
for (let lat = 0; lat <= segments; lat++) {
|
|
1901
|
+
const theta = lat * Math.PI / segments;
|
|
1902
|
+
const sinTheta = Math.sin(theta);
|
|
1903
|
+
const cosTheta = Math.cos(theta);
|
|
1904
|
+
for (let lon = 0; lon <= segments; lon++) {
|
|
1905
|
+
const phi = lon * 2 * Math.PI / segments;
|
|
1906
|
+
const sinPhi = Math.sin(phi);
|
|
1907
|
+
const cosPhi = Math.cos(phi);
|
|
1908
|
+
const x = cosPhi * sinTheta;
|
|
1909
|
+
const y = cosTheta;
|
|
1910
|
+
const z = sinPhi * sinTheta;
|
|
1911
|
+
vertices.push(x, y, z);
|
|
1912
|
+
vertices.push(x, y, z);
|
|
1913
|
+
}
|
|
1914
|
+
}
|
|
1915
|
+
for (let lat = 0; lat < segments; lat++) {
|
|
1916
|
+
for (let lon = 0; lon < segments; lon++) {
|
|
1917
|
+
const first = lat * (segments + 1) + lon;
|
|
1918
|
+
const second = first + segments + 1;
|
|
1919
|
+
indices.push(first, second, first + 1);
|
|
1920
|
+
indices.push(second, second + 1, first + 1);
|
|
1921
|
+
}
|
|
1922
|
+
}
|
|
1923
|
+
this.vertexCount = vertices.length / 6;
|
|
1924
|
+
this.indexCount = indices.length;
|
|
1925
|
+
this.createVertexBuffer(new Float32Array(vertices));
|
|
1926
|
+
this.createIndexBuffer(new Uint16Array(indices));
|
|
1927
|
+
}
|
|
1928
|
+
/**
|
|
1929
|
+
* Create vertex buffer
|
|
1930
|
+
*/
|
|
1931
|
+
createVertexBuffer(vertices) {
|
|
1932
|
+
this.vertexBuffer = this.device.createBuffer({
|
|
1933
|
+
label: "sphere-vertices",
|
|
1934
|
+
size: vertices.byteLength,
|
|
1935
|
+
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST
|
|
1936
|
+
});
|
|
1937
|
+
this.device.queue.writeBuffer(
|
|
1938
|
+
this.vertexBuffer,
|
|
1939
|
+
0,
|
|
1940
|
+
vertices
|
|
1941
|
+
);
|
|
1942
|
+
}
|
|
1943
|
+
/**
|
|
1944
|
+
* Create index buffer
|
|
1945
|
+
*/
|
|
1946
|
+
createIndexBuffer(indices) {
|
|
1947
|
+
this.indexBuffer = this.device.createBuffer({
|
|
1948
|
+
label: "sphere-indices",
|
|
1949
|
+
size: indices.byteLength,
|
|
1950
|
+
usage: GPUBufferUsage.INDEX | GPUBufferUsage.COPY_DST
|
|
1951
|
+
});
|
|
1952
|
+
this.device.queue.writeBuffer(
|
|
1953
|
+
this.indexBuffer,
|
|
1954
|
+
0,
|
|
1955
|
+
indices
|
|
1956
|
+
);
|
|
1957
|
+
}
|
|
1958
|
+
/**
|
|
1959
|
+
* Create instance and uniform buffers
|
|
1960
|
+
*/
|
|
1961
|
+
createBuffers() {
|
|
1962
|
+
const instanceSize = 8 * Float32Array.BYTES_PER_ELEMENT;
|
|
1963
|
+
const instanceBufferSize = this.options.maxParticles * instanceSize;
|
|
1964
|
+
this.instanceBuffer = this.device.createBuffer({
|
|
1965
|
+
label: "instance-buffer",
|
|
1966
|
+
size: instanceBufferSize,
|
|
1967
|
+
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST
|
|
1968
|
+
});
|
|
1969
|
+
const uniformBufferSize = 32 * Float32Array.BYTES_PER_ELEMENT;
|
|
1970
|
+
this.uniformBuffer = this.device.createBuffer({
|
|
1971
|
+
label: "camera-uniforms",
|
|
1972
|
+
size: uniformBufferSize,
|
|
1973
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
1974
|
+
});
|
|
1975
|
+
}
|
|
1976
|
+
/**
|
|
1977
|
+
* Create render pipeline
|
|
1978
|
+
*/
|
|
1979
|
+
createRenderPipeline(format) {
|
|
1980
|
+
const shaderCode = `
|
|
1981
|
+
struct Uniforms {
|
|
1982
|
+
view: mat4x4<f32>,
|
|
1983
|
+
projection: mat4x4<f32>,
|
|
1984
|
+
};
|
|
1985
|
+
|
|
1986
|
+
@group(0) @binding(0) var<uniform> uniforms: Uniforms;
|
|
1987
|
+
|
|
1988
|
+
struct VertexInput {
|
|
1989
|
+
@location(0) position: vec3<f32>,
|
|
1990
|
+
@location(1) normal: vec3<f32>,
|
|
1991
|
+
};
|
|
1992
|
+
|
|
1993
|
+
struct InstanceInput {
|
|
1994
|
+
@location(2) instancePosition: vec4<f32>, // xyz = pos, w = radius
|
|
1995
|
+
@location(3) instanceColor: vec4<f32>, // rgba
|
|
1996
|
+
};
|
|
1997
|
+
|
|
1998
|
+
struct VertexOutput {
|
|
1999
|
+
@builtin(position) position: vec4<f32>,
|
|
2000
|
+
@location(0) normal: vec3<f32>,
|
|
2001
|
+
@location(1) worldPos: vec3<f32>,
|
|
2002
|
+
@location(2) color: vec4<f32>,
|
|
2003
|
+
};
|
|
2004
|
+
|
|
2005
|
+
@vertex
|
|
2006
|
+
fn vertexMain(
|
|
2007
|
+
vertex: VertexInput,
|
|
2008
|
+
instance: InstanceInput
|
|
2009
|
+
) -> VertexOutput {
|
|
2010
|
+
var output: VertexOutput;
|
|
2011
|
+
|
|
2012
|
+
// Scale vertex by instance radius
|
|
2013
|
+
let scaledPos = vertex.position * instance.instancePosition.w;
|
|
2014
|
+
|
|
2015
|
+
// Translate to instance position
|
|
2016
|
+
let worldPos = scaledPos + instance.instancePosition.xyz;
|
|
2017
|
+
|
|
2018
|
+
// Transform to clip space
|
|
2019
|
+
output.position = uniforms.projection * uniforms.view * vec4<f32>(worldPos, 1.0);
|
|
2020
|
+
output.normal = vertex.normal;
|
|
2021
|
+
output.worldPos = worldPos;
|
|
2022
|
+
output.color = instance.instanceColor;
|
|
2023
|
+
|
|
2024
|
+
return output;
|
|
2025
|
+
}
|
|
2026
|
+
|
|
2027
|
+
@fragment
|
|
2028
|
+
fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
|
|
2029
|
+
// Simple Phong lighting
|
|
2030
|
+
let lightDir = normalize(vec3<f32>(1.0, 1.0, 1.0));
|
|
2031
|
+
let normal = normalize(input.normal);
|
|
2032
|
+
|
|
2033
|
+
let ambient = 0.3;
|
|
2034
|
+
let diffuse = max(dot(normal, lightDir), 0.0) * 0.7;
|
|
2035
|
+
let lighting = ambient + diffuse;
|
|
2036
|
+
|
|
2037
|
+
return vec4<f32>(input.color.rgb * lighting, input.color.a);
|
|
2038
|
+
}
|
|
2039
|
+
`;
|
|
2040
|
+
const shaderModule = this.device.createShaderModule({
|
|
2041
|
+
label: "instanced-shader",
|
|
2042
|
+
code: shaderCode
|
|
2043
|
+
});
|
|
2044
|
+
const pipelineLayout = this.device.createPipelineLayout({
|
|
2045
|
+
bindGroupLayouts: [
|
|
2046
|
+
this.device.createBindGroupLayout({
|
|
2047
|
+
entries: [
|
|
2048
|
+
{
|
|
2049
|
+
binding: 0,
|
|
2050
|
+
visibility: GPUShaderStage.VERTEX,
|
|
2051
|
+
buffer: { type: "uniform" }
|
|
2052
|
+
}
|
|
2053
|
+
]
|
|
2054
|
+
})
|
|
2055
|
+
]
|
|
2056
|
+
});
|
|
2057
|
+
this.pipeline = this.device.createRenderPipeline({
|
|
2058
|
+
label: "instanced-pipeline",
|
|
2059
|
+
layout: pipelineLayout,
|
|
2060
|
+
vertex: {
|
|
2061
|
+
module: shaderModule,
|
|
2062
|
+
entryPoint: "vertexMain",
|
|
2063
|
+
buffers: [
|
|
2064
|
+
// Vertex buffer (per-vertex)
|
|
2065
|
+
{
|
|
2066
|
+
arrayStride: 6 * Float32Array.BYTES_PER_ELEMENT,
|
|
2067
|
+
// position + normal
|
|
2068
|
+
attributes: [
|
|
2069
|
+
{ shaderLocation: 0, offset: 0, format: "float32x3" },
|
|
2070
|
+
// position
|
|
2071
|
+
{ shaderLocation: 1, offset: 3 * 4, format: "float32x3" }
|
|
2072
|
+
// normal
|
|
2073
|
+
]
|
|
2074
|
+
},
|
|
2075
|
+
// Instance buffer (per-instance)
|
|
2076
|
+
{
|
|
2077
|
+
arrayStride: 8 * Float32Array.BYTES_PER_ELEMENT,
|
|
2078
|
+
// position+radius + color
|
|
2079
|
+
stepMode: "instance",
|
|
2080
|
+
attributes: [
|
|
2081
|
+
{ shaderLocation: 2, offset: 0, format: "float32x4" },
|
|
2082
|
+
// instancePosition
|
|
2083
|
+
{ shaderLocation: 3, offset: 4 * 4, format: "float32x4" }
|
|
2084
|
+
// instanceColor
|
|
2085
|
+
]
|
|
2086
|
+
}
|
|
2087
|
+
]
|
|
2088
|
+
},
|
|
2089
|
+
fragment: {
|
|
2090
|
+
module: shaderModule,
|
|
2091
|
+
entryPoint: "fragmentMain",
|
|
2092
|
+
targets: [{ format }]
|
|
2093
|
+
},
|
|
2094
|
+
primitive: {
|
|
2095
|
+
topology: "triangle-list",
|
|
2096
|
+
cullMode: "back"
|
|
2097
|
+
},
|
|
2098
|
+
depthStencil: {
|
|
2099
|
+
format: "depth24plus",
|
|
2100
|
+
depthWriteEnabled: true,
|
|
2101
|
+
depthCompare: "less"
|
|
2102
|
+
}
|
|
2103
|
+
});
|
|
2104
|
+
}
|
|
2105
|
+
/**
|
|
2106
|
+
* Update instance buffer with particle data
|
|
2107
|
+
*/
|
|
2108
|
+
updateInstances(positions, count) {
|
|
2109
|
+
if (!this.instanceBuffer) return;
|
|
2110
|
+
const instanceData = new Float32Array(count * 8);
|
|
2111
|
+
for (let i = 0; i < count; i++) {
|
|
2112
|
+
const posIdx = i * 4;
|
|
2113
|
+
const instIdx = i * 8;
|
|
2114
|
+
instanceData[instIdx + 0] = positions[posIdx + 0];
|
|
2115
|
+
instanceData[instIdx + 1] = positions[posIdx + 1];
|
|
2116
|
+
instanceData[instIdx + 2] = positions[posIdx + 2];
|
|
2117
|
+
instanceData[instIdx + 3] = positions[posIdx + 3];
|
|
2118
|
+
const y = positions[posIdx + 1];
|
|
2119
|
+
instanceData[instIdx + 4] = 0.4 + y * 0.02;
|
|
2120
|
+
instanceData[instIdx + 5] = 0.5 + y * 0.01;
|
|
2121
|
+
instanceData[instIdx + 6] = 0.8;
|
|
2122
|
+
instanceData[instIdx + 7] = 1;
|
|
2123
|
+
}
|
|
2124
|
+
this.device.queue.writeBuffer(this.instanceBuffer, 0, instanceData);
|
|
2125
|
+
}
|
|
2126
|
+
/**
|
|
2127
|
+
* Update camera uniforms
|
|
2128
|
+
*/
|
|
2129
|
+
updateCamera(camera) {
|
|
2130
|
+
if (!this.uniformBuffer) return;
|
|
2131
|
+
const view = this.buildViewMatrix(camera.position, camera.target);
|
|
2132
|
+
const projection = this.buildProjectionMatrix(
|
|
2133
|
+
camera.fov,
|
|
2134
|
+
camera.aspect,
|
|
2135
|
+
camera.near,
|
|
2136
|
+
camera.far
|
|
2137
|
+
);
|
|
2138
|
+
const uniforms = new Float32Array(32);
|
|
2139
|
+
uniforms.set(view, 0);
|
|
2140
|
+
uniforms.set(projection, 16);
|
|
2141
|
+
this.device.queue.writeBuffer(this.uniformBuffer, 0, uniforms);
|
|
2142
|
+
}
|
|
2143
|
+
/**
|
|
2144
|
+
* Build view matrix (lookAt)
|
|
2145
|
+
*/
|
|
2146
|
+
buildViewMatrix(eye, target) {
|
|
2147
|
+
const zAxis = this.normalize([eye[0] - target[0], eye[1] - target[1], eye[2] - target[2]]);
|
|
2148
|
+
const xAxis = this.normalize(this.cross([0, 1, 0], zAxis));
|
|
2149
|
+
const yAxis = this.cross(zAxis, xAxis);
|
|
2150
|
+
return new Float32Array([
|
|
2151
|
+
xAxis[0],
|
|
2152
|
+
yAxis[0],
|
|
2153
|
+
zAxis[0],
|
|
2154
|
+
0,
|
|
2155
|
+
xAxis[1],
|
|
2156
|
+
yAxis[1],
|
|
2157
|
+
zAxis[1],
|
|
2158
|
+
0,
|
|
2159
|
+
xAxis[2],
|
|
2160
|
+
yAxis[2],
|
|
2161
|
+
zAxis[2],
|
|
2162
|
+
0,
|
|
2163
|
+
-this.dot(xAxis, eye),
|
|
2164
|
+
-this.dot(yAxis, eye),
|
|
2165
|
+
-this.dot(zAxis, eye),
|
|
2166
|
+
1
|
|
2167
|
+
]);
|
|
2168
|
+
}
|
|
2169
|
+
/**
|
|
2170
|
+
* Build projection matrix (perspective)
|
|
2171
|
+
*/
|
|
2172
|
+
buildProjectionMatrix(fov, aspect, near, far) {
|
|
2173
|
+
const f = 1 / Math.tan(fov / 2);
|
|
2174
|
+
const rangeInv = 1 / (near - far);
|
|
2175
|
+
return new Float32Array([
|
|
2176
|
+
f / aspect,
|
|
2177
|
+
0,
|
|
2178
|
+
0,
|
|
2179
|
+
0,
|
|
2180
|
+
0,
|
|
2181
|
+
f,
|
|
2182
|
+
0,
|
|
2183
|
+
0,
|
|
2184
|
+
0,
|
|
2185
|
+
0,
|
|
2186
|
+
(near + far) * rangeInv,
|
|
2187
|
+
-1,
|
|
2188
|
+
0,
|
|
2189
|
+
0,
|
|
2190
|
+
near * far * rangeInv * 2,
|
|
2191
|
+
0
|
|
2192
|
+
]);
|
|
2193
|
+
}
|
|
2194
|
+
/**
|
|
2195
|
+
* Render particles
|
|
2196
|
+
*/
|
|
2197
|
+
render(positions, particleCount, camera) {
|
|
2198
|
+
if (!this.gpuContext || !this.pipeline || !this.vertexBuffer || !this.indexBuffer || !this.instanceBuffer || !this.uniformBuffer) {
|
|
2199
|
+
throw new Error("Renderer not initialized");
|
|
2200
|
+
}
|
|
2201
|
+
this.updateInstances(positions, particleCount);
|
|
2202
|
+
this.updateCamera(camera);
|
|
2203
|
+
const depthTexture = this.device.createTexture({
|
|
2204
|
+
size: [this.canvas.width, this.canvas.height],
|
|
2205
|
+
format: "depth24plus",
|
|
2206
|
+
usage: GPUTextureUsage.RENDER_ATTACHMENT
|
|
2207
|
+
});
|
|
2208
|
+
const bindGroup = this.device.createBindGroup({
|
|
2209
|
+
layout: this.pipeline.getBindGroupLayout(0),
|
|
2210
|
+
entries: [{ binding: 0, resource: { buffer: this.uniformBuffer } }]
|
|
2211
|
+
});
|
|
2212
|
+
const commandEncoder = this.device.createCommandEncoder({ label: "render-encoder" });
|
|
2213
|
+
const renderPass = commandEncoder.beginRenderPass({
|
|
2214
|
+
colorAttachments: [
|
|
2215
|
+
{
|
|
2216
|
+
view: this.gpuContext.getCurrentTexture().createView(),
|
|
2217
|
+
loadOp: "clear",
|
|
2218
|
+
clearValue: { r: 0.1, g: 0.1, b: 0.15, a: 1 },
|
|
2219
|
+
storeOp: "store"
|
|
2220
|
+
}
|
|
2221
|
+
],
|
|
2222
|
+
depthStencilAttachment: {
|
|
2223
|
+
view: depthTexture.createView(),
|
|
2224
|
+
depthLoadOp: "clear",
|
|
2225
|
+
depthClearValue: 1,
|
|
2226
|
+
depthStoreOp: "store"
|
|
2227
|
+
}
|
|
2228
|
+
});
|
|
2229
|
+
renderPass.setPipeline(this.pipeline);
|
|
2230
|
+
renderPass.setBindGroup(0, bindGroup);
|
|
2231
|
+
renderPass.setVertexBuffer(0, this.vertexBuffer);
|
|
2232
|
+
renderPass.setVertexBuffer(1, this.instanceBuffer);
|
|
2233
|
+
renderPass.setIndexBuffer(this.indexBuffer, "uint16");
|
|
2234
|
+
renderPass.drawIndexed(this.indexCount, particleCount, 0, 0, 0);
|
|
2235
|
+
renderPass.end();
|
|
2236
|
+
this.device.queue.submit([commandEncoder.finish()]);
|
|
2237
|
+
this.frameCount++;
|
|
2238
|
+
const now = performance.now();
|
|
2239
|
+
if (now - this.lastFrameTime >= 1e3) {
|
|
2240
|
+
this.frameCount = 0;
|
|
2241
|
+
this.lastFrameTime = now;
|
|
2242
|
+
}
|
|
2243
|
+
}
|
|
2244
|
+
/**
|
|
2245
|
+
* Vector math helpers
|
|
2246
|
+
*/
|
|
2247
|
+
normalize(v) {
|
|
2248
|
+
const len = Math.sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);
|
|
2249
|
+
return [v[0] / len, v[1] / len, v[2] / len];
|
|
2250
|
+
}
|
|
2251
|
+
cross(a, b) {
|
|
2252
|
+
return [a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]];
|
|
2253
|
+
}
|
|
2254
|
+
dot(a, b) {
|
|
2255
|
+
return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
|
|
2256
|
+
}
|
|
2257
|
+
/**
|
|
2258
|
+
* Cleanup resources
|
|
2259
|
+
*/
|
|
2260
|
+
destroy() {
|
|
2261
|
+
this.vertexBuffer?.destroy();
|
|
2262
|
+
this.indexBuffer?.destroy();
|
|
2263
|
+
this.instanceBuffer?.destroy();
|
|
2264
|
+
this.uniformBuffer?.destroy();
|
|
2265
|
+
this.pipeline = null;
|
|
2266
|
+
}
|
|
2267
|
+
};
|
|
2268
|
+
|
|
2269
|
+
// wgsl-raw:C:\Users\josep\Documents\GitHub\HoloScript\packages\engine\src\gpu\shaders\cg_kernels.wgsl
|
|
2270
|
+
var cg_kernels_default = "/**\n * Conjugate Gradient Kernels \u2014 Sparse Linear Algebra on WebGPU\n *\n * Unified bind group layout:\n * group(0): CSR matrix (SpMV only)\n * group(1): Vectors (vec_in read, vec_out read_write)\n * group(2): SolverArgs uniform\n * group(3): Reduction workspace (dot/final_reduce only)\n *\n * Each entry point references only the groups it needs.\n * With layout:'auto', each pipeline gets a layout derived from\n * only the bindings its entry point actually accesses.\n */\n\n// \u2500\u2500 Shared Types \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\nstruct SolverArgs {\n num_rows: u32,\n vector_width: u32,\n n: u32,\n alpha: f32,\n};\n\n// \u2500\u2500 Group 0: CSR Matrix \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(0) @binding(0) var<storage, read> csr_val: array<f32>;\n@group(0) @binding(1) var<storage, read> csr_col: array<u32>;\n@group(0) @binding(2) var<storage, read> csr_row: array<u32>;\n\n// \u2500\u2500 Group 1: Vectors \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(1) @binding(0) var<storage, read> vec_in: array<f32>;\n@group(1) @binding(1) var<storage, read_write> vec_out: array<f32>;\n\n// \u2500\u2500 Group 2: Solver Arguments \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(2) @binding(0) var<uniform> args: SolverArgs;\n\n// \u2500\u2500 Group 3: Reduction Workspace \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n\n@group(3) @binding(0) var<storage, read_write> partial_sums: array<f32>;\n@group(3) @binding(1) var<storage, read_write> scalar_result: array<f32>;\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 1. SpMV \u2014 CSR-Vector (multi-thread per row)\n// Assigns vector_width threads per row for irregular TET10 sparsity.\n// Uses: groups 0, 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> spmv_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn spmv_vector(\n @builtin(global_invocation_id) global_id: vec3<u32>,\n @builtin(local_invocation_id) local_id: vec3<u32>\n) {\n let tid = local_id.x;\n let gid = global_id.x;\n let threads_per_row = args.vector_width;\n let row = gid / threads_per_row;\n let lane = gid % threads_per_row;\n\n if (row >= args.num_rows) {\n return;\n }\n\n let row_start = csr_row[row];\n let row_end = csr_row[row + 1];\n\n var sum: f32 = 0.0;\n for (var i = row_start + lane; i < row_end; i = i + threads_per_row) {\n sum += csr_val[i] * vec_in[csr_col[i]];\n }\n\n spmv_shared[tid] = sum;\n workgroupBarrier();\n\n for (var s = threads_per_row / 2u; s > 0u; s >>= 1u) {\n if (lane < s) {\n spmv_shared[tid] += spmv_shared[tid + s];\n }\n workgroupBarrier();\n }\n\n if (lane == 0u) {\n vec_out[row] = spmv_shared[tid];\n }\n}\n\n// Legacy scalar SpMV (1 thread per row, for small/regular matrices)\n// Uses: groups 0, 1, 2\n@compute @workgroup_size(64)\nfn spmv(@builtin(global_invocation_id) global_id: vec3<u32>) {\n let row = global_id.x;\n if (row >= args.num_rows) {\n return;\n }\n\n let row_start = csr_row[row];\n let row_end = csr_row[row + 1];\n\n var sum: f32 = 0.0;\n for (var i = row_start; i < row_end; i = i + 1u) {\n sum += csr_val[i] * vec_in[csr_col[i]];\n }\n\n vec_out[row] = sum;\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 2. SAXPY: vec_out = alpha * vec_in + vec_out\n// Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn saxpy(@builtin(global_invocation_id) global_id: vec3<u32>) {\n let idx = global_id.x;\n if (idx >= args.n) {\n return;\n }\n vec_out[idx] = args.alpha * vec_in[idx] + vec_out[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 3. Fused CG Update: p = r + beta * p\n// vec_in = r (read), vec_out = p (read_write), args.alpha = beta\n// Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn p_update(@builtin(global_invocation_id) global_id: vec3<u32>) {\n let idx = global_id.x;\n if (idx >= args.n) {\n return;\n }\n vec_out[idx] = vec_in[idx] + args.alpha * vec_out[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 4. Vector Copy: vec_out = vec_in\n// Uses: groups 1, 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn vec_copy(@builtin(global_invocation_id) global_id: vec3<u32>) {\n let idx = global_id.x;\n if (idx >= args.n) {\n return;\n }\n vec_out[idx] = vec_in[idx];\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 5. Vector Zero: vec_out = 0\n// Uses: groups 1 (binding 1 only), 2\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\n@compute @workgroup_size(256)\nfn vec_zero(@builtin(global_invocation_id) global_id: vec3<u32>) {\n let idx = global_id.x;\n if (idx >= args.n) {\n return;\n }\n vec_out[idx] = 0.0;\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 6. Dot Product \u2014 Phase 1: per-workgroup partial sums\n// result[wg_id] = sum of vec_in[i] * vec_out[i] for this workgroup\n// Uses: groups 1, 2, 3 (binding 0)\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> dot_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn dot_product(\n @builtin(global_invocation_id) global_id: vec3<u32>,\n @builtin(local_invocation_id) local_id: vec3<u32>,\n @builtin(workgroup_id) workgroup_id: vec3<u32>\n) {\n let idx = global_id.x;\n let tid = local_id.x;\n\n if (idx < args.n) {\n dot_shared[tid] = vec_in[idx] * vec_out[idx];\n } else {\n dot_shared[tid] = 0.0;\n }\n\n workgroupBarrier();\n\n for (var s = 128u; s > 0u; s >>= 1u) {\n if (tid < s) {\n dot_shared[tid] += dot_shared[tid + s];\n }\n workgroupBarrier();\n }\n\n if (tid == 0u) {\n partial_sums[workgroup_id.x] = dot_shared[0];\n }\n}\n\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n// 7. Final Reduce \u2014 Phase 2: sum partial_sums \u2192 scalar_result[0]\n// args.n = number of partial sums to reduce\n// Uses: groups 2, 3 (bindings 0 and 1)\n// \u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\n\nvar<workgroup> reduce_shared: array<f32, 256>;\n\n@compute @workgroup_size(256)\nfn final_reduce(@builtin(local_invocation_id) local_id: vec3<u32>) {\n let tid = local_id.x;\n let count = args.n;\n\n var acc: f32 = 0.0;\n var i = tid;\n loop {\n if (i >= count) {\n break;\n }\n acc += partial_sums[i];\n i += 256u;\n }\n reduce_shared[tid] = acc;\n\n workgroupBarrier();\n\n for (var s = 128u; s > 0u; s >>= 1u) {\n if (tid < s) {\n reduce_shared[tid] += reduce_shared[tid + s];\n }\n workgroupBarrier();\n }\n\n if (tid == 0u) {\n scalar_result[0] = reduce_shared[0];\n }\n}\n";
|
|
2271
|
+
|
|
2272
|
+
// src/gpu/SparseLinearSolver.ts
|
|
2273
|
+
var WG_SIZE = 256;
|
|
2274
|
+
var SparseLinearSolver = class {
|
|
2275
|
+
constructor(context) {
|
|
2276
|
+
this.context = context;
|
|
2277
|
+
this.device = context.getDevice();
|
|
2278
|
+
}
|
|
2279
|
+
context;
|
|
2280
|
+
device;
|
|
2281
|
+
shaderModule;
|
|
2282
|
+
spmvPipeline;
|
|
2283
|
+
spmvVectorPipeline;
|
|
2284
|
+
saxpyPipeline;
|
|
2285
|
+
dotPipeline;
|
|
2286
|
+
finalReducePipeline;
|
|
2287
|
+
vecCopyPipeline;
|
|
2288
|
+
vecZeroPipeline;
|
|
2289
|
+
pUpdatePipeline;
|
|
2290
|
+
initialized = false;
|
|
2291
|
+
/** Compile shaders and create all compute pipelines */
|
|
2292
|
+
async initialize() {
|
|
2293
|
+
if (this.initialized) return;
|
|
2294
|
+
this.shaderModule = this.device.createShaderModule({
|
|
2295
|
+
label: "CG Kernels",
|
|
2296
|
+
code: cg_kernels_default
|
|
2297
|
+
});
|
|
2298
|
+
const [spmv, spmvVec, saxpy, dot, finalReduce, vecCopy, vecZero, pUpdate] = await Promise.all([
|
|
2299
|
+
this.device.createComputePipelineAsync({
|
|
2300
|
+
label: "SpMV Scalar",
|
|
2301
|
+
layout: "auto",
|
|
2302
|
+
compute: { module: this.shaderModule, entryPoint: "spmv" }
|
|
2303
|
+
}),
|
|
2304
|
+
this.device.createComputePipelineAsync({
|
|
2305
|
+
label: "SpMV Vector",
|
|
2306
|
+
layout: "auto",
|
|
2307
|
+
compute: { module: this.shaderModule, entryPoint: "spmv_vector" }
|
|
2308
|
+
}),
|
|
2309
|
+
this.device.createComputePipelineAsync({
|
|
2310
|
+
label: "SAXPY",
|
|
2311
|
+
layout: "auto",
|
|
2312
|
+
compute: { module: this.shaderModule, entryPoint: "saxpy" }
|
|
2313
|
+
}),
|
|
2314
|
+
this.device.createComputePipelineAsync({
|
|
2315
|
+
label: "Dot Product",
|
|
2316
|
+
layout: "auto",
|
|
2317
|
+
compute: { module: this.shaderModule, entryPoint: "dot_product" }
|
|
2318
|
+
}),
|
|
2319
|
+
this.device.createComputePipelineAsync({
|
|
2320
|
+
label: "Final Reduce",
|
|
2321
|
+
layout: "auto",
|
|
2322
|
+
compute: { module: this.shaderModule, entryPoint: "final_reduce" }
|
|
2323
|
+
}),
|
|
2324
|
+
this.device.createComputePipelineAsync({
|
|
2325
|
+
label: "Vec Copy",
|
|
2326
|
+
layout: "auto",
|
|
2327
|
+
compute: { module: this.shaderModule, entryPoint: "vec_copy" }
|
|
2328
|
+
}),
|
|
2329
|
+
this.device.createComputePipelineAsync({
|
|
2330
|
+
label: "Vec Zero",
|
|
2331
|
+
layout: "auto",
|
|
2332
|
+
compute: { module: this.shaderModule, entryPoint: "vec_zero" }
|
|
2333
|
+
}),
|
|
2334
|
+
this.device.createComputePipelineAsync({
|
|
2335
|
+
label: "P-Update",
|
|
2336
|
+
layout: "auto",
|
|
2337
|
+
compute: { module: this.shaderModule, entryPoint: "p_update" }
|
|
2338
|
+
})
|
|
2339
|
+
]);
|
|
2340
|
+
this.spmvPipeline = spmv;
|
|
2341
|
+
this.spmvVectorPipeline = spmvVec;
|
|
2342
|
+
this.saxpyPipeline = saxpy;
|
|
2343
|
+
this.dotPipeline = dot;
|
|
2344
|
+
this.finalReducePipeline = finalReduce;
|
|
2345
|
+
this.vecCopyPipeline = vecCopy;
|
|
2346
|
+
this.vecZeroPipeline = vecZero;
|
|
2347
|
+
this.pUpdatePipeline = pUpdate;
|
|
2348
|
+
this.initialized = true;
|
|
2349
|
+
}
|
|
2350
|
+
/**
|
|
2351
|
+
* Solve Ax = b using Conjugate Gradient on the GPU.
|
|
2352
|
+
*
|
|
2353
|
+
* Algorithm (Hestenes-Stiefel):
|
|
2354
|
+
* r₀ = b - A·x₀
|
|
2355
|
+
* p₀ = r₀
|
|
2356
|
+
* for k = 0, 1, 2, ...
|
|
2357
|
+
* Ap = A·p
|
|
2358
|
+
* α = (r·r) / (p·Ap)
|
|
2359
|
+
* x = x + α·p
|
|
2360
|
+
* r = r - α·Ap
|
|
2361
|
+
* if ||r||² < tol: break
|
|
2362
|
+
* β = (r_new·r_new) / (r·r)
|
|
2363
|
+
* p = r + β·p ← fused kernel
|
|
2364
|
+
*/
|
|
2365
|
+
async solveCG(A, b, xGuess, options = {}) {
|
|
2366
|
+
if (!this.initialized) {
|
|
2367
|
+
throw new Error("SparseLinearSolver not initialized. Call initialize() first.");
|
|
2368
|
+
}
|
|
2369
|
+
const {
|
|
2370
|
+
maxIterations = 1e3,
|
|
2371
|
+
toleranceSq = 1e-10,
|
|
2372
|
+
convergenceCheckInterval = 50,
|
|
2373
|
+
onProgress
|
|
2374
|
+
} = options;
|
|
2375
|
+
const n = A.num_rows;
|
|
2376
|
+
const vectorWidth = 16;
|
|
2377
|
+
const numWgSpmvVec = Math.ceil(n * vectorWidth / WG_SIZE);
|
|
2378
|
+
const numWgVec = Math.ceil(n / WG_SIZE);
|
|
2379
|
+
const numWgDot = Math.ceil(n / WG_SIZE);
|
|
2380
|
+
const csrVal = this.uploadStorage(A.val, "csr-val");
|
|
2381
|
+
const csrCol = this.uploadStorage(A.col_ind, "csr-col");
|
|
2382
|
+
const csrRow = this.uploadStorage(A.row_ptr, "csr-row");
|
|
2383
|
+
const bufB = this.uploadStorage(b, "vec-b");
|
|
2384
|
+
const bufX = this.uploadStorage(xGuess, "vec-x");
|
|
2385
|
+
const bufR = this.emptyVec(n, "vec-r");
|
|
2386
|
+
const bufP = this.emptyVec(n, "vec-p");
|
|
2387
|
+
const bufAp = this.emptyVec(n, "vec-Ap");
|
|
2388
|
+
const bufPartials = this.emptyVec(numWgDot, "partial-sums");
|
|
2389
|
+
const bufScalar = this.emptyVec(1, "scalar-result");
|
|
2390
|
+
const bufStaging = this.device.createBuffer({
|
|
2391
|
+
label: "staging",
|
|
2392
|
+
size: 4,
|
|
2393
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
2394
|
+
});
|
|
2395
|
+
const bufArgs = this.device.createBuffer({
|
|
2396
|
+
label: "solver-args",
|
|
2397
|
+
size: 16,
|
|
2398
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
2399
|
+
});
|
|
2400
|
+
const allBuffers = [csrVal, csrCol, csrRow, bufB, bufX, bufR, bufP, bufAp, bufPartials, bufScalar, bufStaging, bufArgs];
|
|
2401
|
+
{
|
|
2402
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, 0);
|
|
2403
|
+
const enc = this.device.createCommandEncoder({ label: "init-spmv" });
|
|
2404
|
+
this.dispatchSpmv(enc, csrVal, csrCol, csrRow, bufX, bufAp, bufArgs, numWgSpmvVec, true);
|
|
2405
|
+
this.device.queue.submit([enc.finish()]);
|
|
2406
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2407
|
+
}
|
|
2408
|
+
{
|
|
2409
|
+
const enc = this.device.createCommandEncoder({ label: "init-residual" });
|
|
2410
|
+
enc.copyBufferToBuffer(bufB, 0, bufR, 0, n * 4);
|
|
2411
|
+
this.device.queue.submit([enc.finish()]);
|
|
2412
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2413
|
+
}
|
|
2414
|
+
{
|
|
2415
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, -1);
|
|
2416
|
+
const enc = this.device.createCommandEncoder({ label: "init-saxpy" });
|
|
2417
|
+
this.dispatchSaxpy(enc, bufAp, bufR, bufArgs, numWgVec);
|
|
2418
|
+
this.device.queue.submit([enc.finish()]);
|
|
2419
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2420
|
+
}
|
|
2421
|
+
{
|
|
2422
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, 0);
|
|
2423
|
+
const enc = this.device.createCommandEncoder({ label: "init-copy-p" });
|
|
2424
|
+
this.dispatchVecCopy(enc, bufR, bufP, bufArgs, numWgVec);
|
|
2425
|
+
this.device.queue.submit([enc.finish()]);
|
|
2426
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2427
|
+
}
|
|
2428
|
+
let rDotR = await this.dotProduct(bufR, bufR, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
|
|
2429
|
+
if (rDotR < toleranceSq) {
|
|
2430
|
+
const x = await this.readback(bufX, n);
|
|
2431
|
+
this.cleanup(allBuffers);
|
|
2432
|
+
return { x, iterations: 0, residualNormSq: rDotR, converged: true };
|
|
2433
|
+
}
|
|
2434
|
+
let iteration = 0;
|
|
2435
|
+
let converged = false;
|
|
2436
|
+
for (iteration = 0; iteration < maxIterations; iteration++) {
|
|
2437
|
+
{
|
|
2438
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, 0);
|
|
2439
|
+
const enc = this.device.createCommandEncoder();
|
|
2440
|
+
this.dispatchSpmv(enc, csrVal, csrCol, csrRow, bufP, bufAp, bufArgs, numWgSpmvVec, true);
|
|
2441
|
+
this.device.queue.submit([enc.finish()]);
|
|
2442
|
+
}
|
|
2443
|
+
const pAp = await this.dotProduct(bufP, bufAp, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
|
|
2444
|
+
if (Math.abs(pAp) < 1e-30) {
|
|
2445
|
+
converged = rDotR < toleranceSq;
|
|
2446
|
+
break;
|
|
2447
|
+
}
|
|
2448
|
+
const alpha = rDotR / pAp;
|
|
2449
|
+
{
|
|
2450
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, alpha);
|
|
2451
|
+
const enc = this.device.createCommandEncoder();
|
|
2452
|
+
this.dispatchSaxpy(enc, bufP, bufX, bufArgs, numWgVec);
|
|
2453
|
+
this.device.queue.submit([enc.finish()]);
|
|
2454
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2455
|
+
}
|
|
2456
|
+
{
|
|
2457
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, -alpha);
|
|
2458
|
+
const enc = this.device.createCommandEncoder();
|
|
2459
|
+
this.dispatchSaxpy(enc, bufAp, bufR, bufArgs, numWgVec);
|
|
2460
|
+
this.device.queue.submit([enc.finish()]);
|
|
2461
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2462
|
+
}
|
|
2463
|
+
const rNewDotRNew = await this.dotProduct(bufR, bufR, bufPartials, bufScalar, bufStaging, bufArgs, n, numWgDot);
|
|
2464
|
+
if (rNewDotRNew < toleranceSq) {
|
|
2465
|
+
rDotR = rNewDotRNew;
|
|
2466
|
+
converged = true;
|
|
2467
|
+
iteration++;
|
|
2468
|
+
onProgress?.(iteration, rNewDotRNew);
|
|
2469
|
+
break;
|
|
2470
|
+
}
|
|
2471
|
+
if (onProgress && iteration % convergenceCheckInterval === 0) {
|
|
2472
|
+
onProgress(iteration, rNewDotRNew);
|
|
2473
|
+
}
|
|
2474
|
+
const beta = rNewDotRNew / rDotR;
|
|
2475
|
+
{
|
|
2476
|
+
this.writeArgs(bufArgs, n, vectorWidth, n, beta);
|
|
2477
|
+
const enc = this.device.createCommandEncoder();
|
|
2478
|
+
this.dispatchPUpdate(enc, bufR, bufP, bufArgs, numWgVec);
|
|
2479
|
+
this.device.queue.submit([enc.finish()]);
|
|
2480
|
+
await this.device.queue.onSubmittedWorkDone();
|
|
2481
|
+
}
|
|
2482
|
+
rDotR = rNewDotRNew;
|
|
2483
|
+
}
|
|
2484
|
+
const solution = await this.readback(bufX, n);
|
|
2485
|
+
this.cleanup(allBuffers);
|
|
2486
|
+
return { x: solution, iterations: iteration, residualNormSq: rDotR, converged };
|
|
2487
|
+
}
|
|
2488
|
+
/**
|
|
2489
|
+
* solveCGDirect — Direct GPU-to-GPU Conjugate Gradient solve.
|
|
2490
|
+
*
|
|
2491
|
+
* Same as solveCG but avoids CPU readback of the solution vector.
|
|
2492
|
+
* Returns the live GPUBuffer containing the result.
|
|
2493
|
+
*
|
|
2494
|
+
* @warning Caller is responsible for destroying the returned xBuffer.
|
|
2495
|
+
*/
|
|
2496
|
+
async solveCGDirect(A, b, x0, options = {}) {
|
|
2497
|
+
const n = A.num_rows;
|
|
2498
|
+
const maxIterations = options.maxIterations ?? 1e3;
|
|
2499
|
+
const toleranceSq = options.toleranceSq ?? 1e-10;
|
|
2500
|
+
const xExtraUsage = options.xExtraUsage ?? 0;
|
|
2501
|
+
const valBuffer = this.uploadStorage(A.val, "val");
|
|
2502
|
+
const colIndBuffer = this.uploadStorage(new Uint32Array(A.col_ind), "col_ind");
|
|
2503
|
+
const rowPtrBuffer = this.uploadStorage(new Uint32Array(A.row_ptr), "row_ptr");
|
|
2504
|
+
const bBuffer = this.uploadStorage(b, "b");
|
|
2505
|
+
const xBuffer = this.uploadStorage(x0, "x", xExtraUsage);
|
|
2506
|
+
const rBuffer = this.emptyVec(n, "r");
|
|
2507
|
+
const pBuffer = this.emptyVec(n, "p");
|
|
2508
|
+
const ApBuffer = this.emptyVec(n, "Ap");
|
|
2509
|
+
const rDotRBuffer = this.emptyVec(1, "rDotR");
|
|
2510
|
+
const rDotRStagingBuffer = this.device.createBuffer({
|
|
2511
|
+
size: 4,
|
|
2512
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
2513
|
+
});
|
|
2514
|
+
const numWgVec = Math.ceil(n / WG_SIZE);
|
|
2515
|
+
const numWgDot = Math.ceil(n / WG_SIZE);
|
|
2516
|
+
const bufArgs = this.device.createBuffer({ size: 16, usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST });
|
|
2517
|
+
const partials = this.emptyVec(numWgDot, "partials");
|
|
2518
|
+
{
|
|
2519
|
+
const enc = this.device.createCommandEncoder();
|
|
2520
|
+
this.dispatchVecCopy(enc, bBuffer, rBuffer, bufArgs, numWgVec);
|
|
2521
|
+
this.dispatchSpmv(enc, valBuffer, colIndBuffer, rowPtrBuffer, xBuffer, ApBuffer, bufArgs, numWgVec, true);
|
|
2522
|
+
this.device.queue.submit([enc.finish()]);
|
|
2523
|
+
}
|
|
2524
|
+
{
|
|
2525
|
+
this.writeArgs(bufArgs, n, 0, n, -1);
|
|
2526
|
+
const enc = this.device.createCommandEncoder();
|
|
2527
|
+
this.dispatchSaxpy(enc, ApBuffer, rBuffer, bufArgs, numWgVec);
|
|
2528
|
+
this.device.queue.submit([enc.finish()]);
|
|
2529
|
+
}
|
|
2530
|
+
{
|
|
2531
|
+
const enc = this.device.createCommandEncoder();
|
|
2532
|
+
this.dispatchVecCopy(enc, rBuffer, pBuffer, bufArgs, numWgVec);
|
|
2533
|
+
this.device.queue.submit([enc.finish()]);
|
|
2534
|
+
}
|
|
2535
|
+
let iteration = 0;
|
|
2536
|
+
let converged = false;
|
|
2537
|
+
let rDotR = await this.dotProduct(rBuffer, rBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
|
|
2538
|
+
for (iteration = 0; iteration < maxIterations; iteration++) {
|
|
2539
|
+
if (rDotR < toleranceSq) {
|
|
2540
|
+
converged = true;
|
|
2541
|
+
break;
|
|
2542
|
+
}
|
|
2543
|
+
{
|
|
2544
|
+
const enc = this.device.createCommandEncoder();
|
|
2545
|
+
this.dispatchSpmv(enc, valBuffer, colIndBuffer, rowPtrBuffer, pBuffer, ApBuffer, bufArgs, numWgVec, true);
|
|
2546
|
+
this.device.queue.submit([enc.finish()]);
|
|
2547
|
+
}
|
|
2548
|
+
const pAp = await this.dotProduct(pBuffer, ApBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
|
|
2549
|
+
const alpha = rDotR / (pAp + 1e-20);
|
|
2550
|
+
{
|
|
2551
|
+
this.writeArgs(bufArgs, n, 0, n, alpha);
|
|
2552
|
+
const enc = this.device.createCommandEncoder();
|
|
2553
|
+
this.dispatchSaxpy(enc, pBuffer, xBuffer, bufArgs, numWgVec);
|
|
2554
|
+
this.device.queue.submit([enc.finish()]);
|
|
2555
|
+
}
|
|
2556
|
+
{
|
|
2557
|
+
this.writeArgs(bufArgs, n, 0, n, -alpha);
|
|
2558
|
+
const enc = this.device.createCommandEncoder();
|
|
2559
|
+
this.dispatchSaxpy(enc, ApBuffer, rBuffer, bufArgs, numWgVec);
|
|
2560
|
+
this.device.queue.submit([enc.finish()]);
|
|
2561
|
+
}
|
|
2562
|
+
const oldRDotR = rDotR;
|
|
2563
|
+
rDotR = await this.dotProduct(rBuffer, rBuffer, partials, rDotRBuffer, rDotRStagingBuffer, bufArgs, n, numWgDot);
|
|
2564
|
+
const beta = rDotR / (oldRDotR + 1e-20);
|
|
2565
|
+
{
|
|
2566
|
+
this.writeArgs(bufArgs, n, 0, n, beta);
|
|
2567
|
+
const enc = this.device.createCommandEncoder();
|
|
2568
|
+
this.dispatchPUpdate(enc, rBuffer, pBuffer, bufArgs, numWgVec);
|
|
2569
|
+
this.device.queue.submit([enc.finish()]);
|
|
2570
|
+
}
|
|
2571
|
+
}
|
|
2572
|
+
this.cleanup([
|
|
2573
|
+
valBuffer,
|
|
2574
|
+
colIndBuffer,
|
|
2575
|
+
rowPtrBuffer,
|
|
2576
|
+
bBuffer,
|
|
2577
|
+
rBuffer,
|
|
2578
|
+
pBuffer,
|
|
2579
|
+
ApBuffer,
|
|
2580
|
+
rDotRBuffer,
|
|
2581
|
+
rDotRStagingBuffer
|
|
2582
|
+
]);
|
|
2583
|
+
return { xBuffer, iterations: iteration, residualNormSq: rDotR, converged };
|
|
2584
|
+
}
|
|
2585
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
2586
|
+
// Dispatch helpers — each sets the bind groups its entry point needs
|
|
2587
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
2588
|
+
/** SpMV: groups 0 (CSR), 1 (vecs), 2 (args) */
|
|
2589
|
+
dispatchSpmv(enc, val, col, row, x, y, args, numWgs, useVector) {
|
|
2590
|
+
const pipeline = useVector ? this.spmvVectorPipeline : this.spmvPipeline;
|
|
2591
|
+
const pass = enc.beginComputePass({ label: "spmv" });
|
|
2592
|
+
pass.setPipeline(pipeline);
|
|
2593
|
+
pass.setBindGroup(0, this.device.createBindGroup({
|
|
2594
|
+
layout: pipeline.getBindGroupLayout(0),
|
|
2595
|
+
entries: [
|
|
2596
|
+
{ binding: 0, resource: { buffer: val } },
|
|
2597
|
+
{ binding: 1, resource: { buffer: col } },
|
|
2598
|
+
{ binding: 2, resource: { buffer: row } }
|
|
2599
|
+
]
|
|
2600
|
+
}));
|
|
2601
|
+
pass.setBindGroup(1, this.device.createBindGroup({
|
|
2602
|
+
layout: pipeline.getBindGroupLayout(1),
|
|
2603
|
+
entries: [
|
|
2604
|
+
{ binding: 0, resource: { buffer: x } },
|
|
2605
|
+
{ binding: 1, resource: { buffer: y } }
|
|
2606
|
+
]
|
|
2607
|
+
}));
|
|
2608
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2609
|
+
layout: pipeline.getBindGroupLayout(2),
|
|
2610
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2611
|
+
}));
|
|
2612
|
+
pass.dispatchWorkgroups(numWgs);
|
|
2613
|
+
pass.end();
|
|
2614
|
+
}
|
|
2615
|
+
/** SAXPY: groups 1 (vecs), 2 (args) */
|
|
2616
|
+
dispatchSaxpy(enc, x, y, args, numWgs) {
|
|
2617
|
+
const pass = enc.beginComputePass({ label: "saxpy" });
|
|
2618
|
+
pass.setPipeline(this.saxpyPipeline);
|
|
2619
|
+
pass.setBindGroup(1, this.device.createBindGroup({
|
|
2620
|
+
layout: this.saxpyPipeline.getBindGroupLayout(1),
|
|
2621
|
+
entries: [
|
|
2622
|
+
{ binding: 0, resource: { buffer: x } },
|
|
2623
|
+
{ binding: 1, resource: { buffer: y } }
|
|
2624
|
+
]
|
|
2625
|
+
}));
|
|
2626
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2627
|
+
layout: this.saxpyPipeline.getBindGroupLayout(2),
|
|
2628
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2629
|
+
}));
|
|
2630
|
+
pass.dispatchWorkgroups(numWgs);
|
|
2631
|
+
pass.end();
|
|
2632
|
+
}
|
|
2633
|
+
/** Fused p = r + beta*p: groups 1 (vecs), 2 (args) */
|
|
2634
|
+
dispatchPUpdate(enc, r, p, args, numWgs) {
|
|
2635
|
+
const pass = enc.beginComputePass({ label: "p-update" });
|
|
2636
|
+
pass.setPipeline(this.pUpdatePipeline);
|
|
2637
|
+
pass.setBindGroup(1, this.device.createBindGroup({
|
|
2638
|
+
layout: this.pUpdatePipeline.getBindGroupLayout(1),
|
|
2639
|
+
entries: [
|
|
2640
|
+
{ binding: 0, resource: { buffer: r } },
|
|
2641
|
+
{ binding: 1, resource: { buffer: p } }
|
|
2642
|
+
]
|
|
2643
|
+
}));
|
|
2644
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2645
|
+
layout: this.pUpdatePipeline.getBindGroupLayout(2),
|
|
2646
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2647
|
+
}));
|
|
2648
|
+
pass.dispatchWorkgroups(numWgs);
|
|
2649
|
+
pass.end();
|
|
2650
|
+
}
|
|
2651
|
+
/** Vec copy: groups 1 (vecs), 2 (args) */
|
|
2652
|
+
dispatchVecCopy(enc, src, dst, args, numWgs) {
|
|
2653
|
+
const pass = enc.beginComputePass({ label: "vec-copy" });
|
|
2654
|
+
pass.setPipeline(this.vecCopyPipeline);
|
|
2655
|
+
pass.setBindGroup(1, this.device.createBindGroup({
|
|
2656
|
+
layout: this.vecCopyPipeline.getBindGroupLayout(1),
|
|
2657
|
+
entries: [
|
|
2658
|
+
{ binding: 0, resource: { buffer: src } },
|
|
2659
|
+
{ binding: 1, resource: { buffer: dst } }
|
|
2660
|
+
]
|
|
2661
|
+
}));
|
|
2662
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2663
|
+
layout: this.vecCopyPipeline.getBindGroupLayout(2),
|
|
2664
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2665
|
+
}));
|
|
2666
|
+
pass.dispatchWorkgroups(numWgs);
|
|
2667
|
+
pass.end();
|
|
2668
|
+
}
|
|
2669
|
+
/**
|
|
2670
|
+
* Full dot product: v1·v2
|
|
2671
|
+
* Phase 1: dot_product kernel → partial_sums (per-workgroup)
|
|
2672
|
+
* Phase 2: final_reduce → scalar_result[0]
|
|
2673
|
+
* Readback: staging mapAsync → CPU f32
|
|
2674
|
+
*/
|
|
2675
|
+
async dotProduct(v1, v2, partials, scalar, staging, args, n, numWgDot) {
|
|
2676
|
+
{
|
|
2677
|
+
this.writeArgs(args, n, 0, n, 0);
|
|
2678
|
+
const enc = this.device.createCommandEncoder({ label: "dot-phase1" });
|
|
2679
|
+
const pass = enc.beginComputePass();
|
|
2680
|
+
pass.setPipeline(this.dotPipeline);
|
|
2681
|
+
pass.setBindGroup(1, this.device.createBindGroup({
|
|
2682
|
+
layout: this.dotPipeline.getBindGroupLayout(1),
|
|
2683
|
+
entries: [
|
|
2684
|
+
{ binding: 0, resource: { buffer: v1 } },
|
|
2685
|
+
{ binding: 1, resource: { buffer: v2 } }
|
|
2686
|
+
]
|
|
2687
|
+
}));
|
|
2688
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2689
|
+
layout: this.dotPipeline.getBindGroupLayout(2),
|
|
2690
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2691
|
+
}));
|
|
2692
|
+
pass.setBindGroup(3, this.device.createBindGroup({
|
|
2693
|
+
layout: this.dotPipeline.getBindGroupLayout(3),
|
|
2694
|
+
entries: [{ binding: 0, resource: { buffer: partials } }]
|
|
2695
|
+
}));
|
|
2696
|
+
pass.dispatchWorkgroups(numWgDot);
|
|
2697
|
+
pass.end();
|
|
2698
|
+
this.device.queue.submit([enc.finish()]);
|
|
2699
|
+
}
|
|
2700
|
+
{
|
|
2701
|
+
this.writeArgs(args, numWgDot, 0, numWgDot, 0);
|
|
2702
|
+
const enc = this.device.createCommandEncoder({ label: "dot-phase2" });
|
|
2703
|
+
const pass = enc.beginComputePass();
|
|
2704
|
+
pass.setPipeline(this.finalReducePipeline);
|
|
2705
|
+
pass.setBindGroup(2, this.device.createBindGroup({
|
|
2706
|
+
layout: this.finalReducePipeline.getBindGroupLayout(2),
|
|
2707
|
+
entries: [{ binding: 0, resource: { buffer: args } }]
|
|
2708
|
+
}));
|
|
2709
|
+
pass.setBindGroup(3, this.device.createBindGroup({
|
|
2710
|
+
layout: this.finalReducePipeline.getBindGroupLayout(3),
|
|
2711
|
+
entries: [
|
|
2712
|
+
{ binding: 0, resource: { buffer: partials } },
|
|
2713
|
+
{ binding: 1, resource: { buffer: scalar } }
|
|
2714
|
+
]
|
|
2715
|
+
}));
|
|
2716
|
+
pass.dispatchWorkgroups(1);
|
|
2717
|
+
pass.end();
|
|
2718
|
+
enc.copyBufferToBuffer(scalar, 0, staging, 0, 4);
|
|
2719
|
+
this.device.queue.submit([enc.finish()]);
|
|
2720
|
+
}
|
|
2721
|
+
await staging.mapAsync(GPUMapMode.READ);
|
|
2722
|
+
const value = new Float32Array(staging.getMappedRange())[0];
|
|
2723
|
+
staging.unmap();
|
|
2724
|
+
return value;
|
|
2725
|
+
}
|
|
2726
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
2727
|
+
// Buffer helpers
|
|
2728
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
2729
|
+
writeArgs(buf, numRows, vectorWidth, n, alpha) {
|
|
2730
|
+
const data = new ArrayBuffer(16);
|
|
2731
|
+
new Uint32Array(data, 0, 3).set([numRows, vectorWidth, n]);
|
|
2732
|
+
new Float32Array(data, 12, 1).set([alpha]);
|
|
2733
|
+
this.device.queue.writeBuffer(buf, 0, data);
|
|
2734
|
+
}
|
|
2735
|
+
uploadStorage(data, label, extraUsage = 0) {
|
|
2736
|
+
const buf = this.device.createBuffer({
|
|
2737
|
+
label,
|
|
2738
|
+
size: data.byteLength,
|
|
2739
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | extraUsage,
|
|
2740
|
+
mappedAtCreation: true
|
|
2741
|
+
});
|
|
2742
|
+
if (data instanceof Float32Array) new Float32Array(buf.getMappedRange()).set(data);
|
|
2743
|
+
else new Uint32Array(buf.getMappedRange()).set(data);
|
|
2744
|
+
buf.unmap();
|
|
2745
|
+
return buf;
|
|
2746
|
+
}
|
|
2747
|
+
emptyVec(n, label, extraUsage = 0) {
|
|
2748
|
+
return this.device.createBuffer({
|
|
2749
|
+
label,
|
|
2750
|
+
size: Math.max(4, n * 4),
|
|
2751
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST | extraUsage
|
|
2752
|
+
});
|
|
2753
|
+
}
|
|
2754
|
+
async readback(buf, n) {
|
|
2755
|
+
const staging = this.device.createBuffer({
|
|
2756
|
+
size: n * 4,
|
|
2757
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
2758
|
+
});
|
|
2759
|
+
const enc = this.device.createCommandEncoder();
|
|
2760
|
+
enc.copyBufferToBuffer(buf, 0, staging, 0, n * 4);
|
|
2761
|
+
this.device.queue.submit([enc.finish()]);
|
|
2762
|
+
await staging.mapAsync(GPUMapMode.READ);
|
|
2763
|
+
const result = new Float32Array(staging.getMappedRange()).slice();
|
|
2764
|
+
staging.unmap();
|
|
2765
|
+
staging.destroy();
|
|
2766
|
+
return result;
|
|
2767
|
+
}
|
|
2768
|
+
cleanup(buffers) {
|
|
2769
|
+
for (const b of buffers) b.destroy();
|
|
2770
|
+
}
|
|
2771
|
+
destroy() {
|
|
2772
|
+
this.initialized = false;
|
|
2773
|
+
}
|
|
2774
|
+
};
|
|
2775
|
+
|
|
2776
|
+
// src/gpu/SpatialGrid.ts
|
|
2777
|
+
var SpatialGrid = class {
|
|
2778
|
+
context;
|
|
2779
|
+
device;
|
|
2780
|
+
particleCount;
|
|
2781
|
+
options;
|
|
2782
|
+
// Grid dimensions
|
|
2783
|
+
totalCells;
|
|
2784
|
+
// Pipelines
|
|
2785
|
+
clearPipeline = null;
|
|
2786
|
+
buildPipeline = null;
|
|
2787
|
+
collisionPipeline = null;
|
|
2788
|
+
// Buffers
|
|
2789
|
+
uniformBuffer = null;
|
|
2790
|
+
gridCellStartBuffer = null;
|
|
2791
|
+
gridCellEndBuffer = null;
|
|
2792
|
+
gridParticleIndicesBuffer = null;
|
|
2793
|
+
collisionForcesBuffer = null;
|
|
2794
|
+
// Bind groups
|
|
2795
|
+
clearBindGroup = null;
|
|
2796
|
+
buildBindGroup = null;
|
|
2797
|
+
collisionBindGroup = null;
|
|
2798
|
+
constructor(context, particleCount, options) {
|
|
2799
|
+
this.context = context;
|
|
2800
|
+
this.device = context.getDevice();
|
|
2801
|
+
this.particleCount = particleCount;
|
|
2802
|
+
this.options = {
|
|
2803
|
+
cellSize: options.cellSize,
|
|
2804
|
+
gridDimensions: options.gridDimensions,
|
|
2805
|
+
maxParticlesPerCell: options.maxParticlesPerCell ?? 64,
|
|
2806
|
+
shaderCode: options.shaderCode
|
|
2807
|
+
};
|
|
2808
|
+
this.totalCells = this.options.gridDimensions.x * this.options.gridDimensions.y * this.options.gridDimensions.z;
|
|
2809
|
+
}
|
|
2810
|
+
/**
|
|
2811
|
+
* Initialize spatial grid buffers and pipelines
|
|
2812
|
+
*/
|
|
2813
|
+
async initialize() {
|
|
2814
|
+
const shaderModule = this.device.createShaderModule({
|
|
2815
|
+
label: "spatial-grid-shader",
|
|
2816
|
+
code: this.options.shaderCode
|
|
2817
|
+
});
|
|
2818
|
+
const compilationInfo = await shaderModule.getCompilationInfo();
|
|
2819
|
+
for (const message of compilationInfo.messages) {
|
|
2820
|
+
if (message.type === "error") {
|
|
2821
|
+
console.error("Spatial grid shader error:", message.message);
|
|
2822
|
+
}
|
|
2823
|
+
}
|
|
2824
|
+
this.createBuffers();
|
|
2825
|
+
this.createPipelines(shaderModule);
|
|
2826
|
+
}
|
|
2827
|
+
/**
|
|
2828
|
+
* Create GPU buffers for spatial grid
|
|
2829
|
+
*/
|
|
2830
|
+
createBuffers() {
|
|
2831
|
+
this.uniformBuffer = this.device.createBuffer({
|
|
2832
|
+
label: "spatial-grid-uniforms",
|
|
2833
|
+
size: 32,
|
|
2834
|
+
// 8 × f32/u32 = 32 bytes
|
|
2835
|
+
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
|
|
2836
|
+
});
|
|
2837
|
+
const cellCounterSize = this.totalCells * Uint32Array.BYTES_PER_ELEMENT;
|
|
2838
|
+
this.gridCellStartBuffer = this.device.createBuffer({
|
|
2839
|
+
label: "grid-cell-start",
|
|
2840
|
+
size: cellCounterSize,
|
|
2841
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
2842
|
+
});
|
|
2843
|
+
this.gridCellEndBuffer = this.device.createBuffer({
|
|
2844
|
+
label: "grid-cell-end",
|
|
2845
|
+
size: cellCounterSize,
|
|
2846
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
2847
|
+
});
|
|
2848
|
+
const indicesSize = this.totalCells * this.options.maxParticlesPerCell * Uint32Array.BYTES_PER_ELEMENT;
|
|
2849
|
+
this.gridParticleIndicesBuffer = this.device.createBuffer({
|
|
2850
|
+
label: "grid-particle-indices",
|
|
2851
|
+
size: indicesSize,
|
|
2852
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
|
|
2853
|
+
});
|
|
2854
|
+
const forcesSize = this.particleCount * 4 * Float32Array.BYTES_PER_ELEMENT;
|
|
2855
|
+
this.collisionForcesBuffer = this.device.createBuffer({
|
|
2856
|
+
label: "collision-forces",
|
|
2857
|
+
size: forcesSize,
|
|
2858
|
+
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
|
|
2859
|
+
});
|
|
2860
|
+
this.uploadUniforms();
|
|
2861
|
+
}
|
|
2862
|
+
/**
|
|
2863
|
+
* Upload uniform data to GPU
|
|
2864
|
+
*/
|
|
2865
|
+
uploadUniforms() {
|
|
2866
|
+
if (!this.uniformBuffer) return;
|
|
2867
|
+
const data = new ArrayBuffer(32);
|
|
2868
|
+
const floatView = new Float32Array(data);
|
|
2869
|
+
const uintView = new Uint32Array(data);
|
|
2870
|
+
floatView[0] = this.options.cellSize;
|
|
2871
|
+
uintView[1] = this.options.gridDimensions.x;
|
|
2872
|
+
uintView[2] = this.options.gridDimensions.y;
|
|
2873
|
+
uintView[3] = this.options.gridDimensions.z;
|
|
2874
|
+
uintView[4] = this.particleCount;
|
|
2875
|
+
uintView[5] = this.options.maxParticlesPerCell;
|
|
2876
|
+
uintView[6] = 0;
|
|
2877
|
+
uintView[7] = 0;
|
|
2878
|
+
this.device.queue.writeBuffer(this.uniformBuffer, 0, data);
|
|
2879
|
+
}
|
|
2880
|
+
/**
|
|
2881
|
+
* Create compute pipelines
|
|
2882
|
+
*/
|
|
2883
|
+
createPipelines(shaderModule) {
|
|
2884
|
+
const clearLayout = this.createClearBindGroupLayout();
|
|
2885
|
+
const buildLayout = this.createBuildBindGroupLayout();
|
|
2886
|
+
const collisionLayout = this.createCollisionBindGroupLayout();
|
|
2887
|
+
this.clearPipeline = this.device.createComputePipeline({
|
|
2888
|
+
label: "grid-clear-pipeline",
|
|
2889
|
+
layout: this.device.createPipelineLayout({
|
|
2890
|
+
bindGroupLayouts: [clearLayout]
|
|
2891
|
+
}),
|
|
2892
|
+
compute: {
|
|
2893
|
+
module: shaderModule,
|
|
2894
|
+
entryPoint: "gridClear"
|
|
2895
|
+
}
|
|
2896
|
+
});
|
|
2897
|
+
this.buildPipeline = this.device.createComputePipeline({
|
|
2898
|
+
label: "grid-build-pipeline",
|
|
2899
|
+
layout: this.device.createPipelineLayout({
|
|
2900
|
+
bindGroupLayouts: [buildLayout]
|
|
2901
|
+
}),
|
|
2902
|
+
compute: {
|
|
2903
|
+
module: shaderModule,
|
|
2904
|
+
entryPoint: "gridBuild"
|
|
2905
|
+
}
|
|
2906
|
+
});
|
|
2907
|
+
this.collisionPipeline = this.device.createComputePipeline({
|
|
2908
|
+
label: "grid-collision-pipeline",
|
|
2909
|
+
layout: this.device.createPipelineLayout({
|
|
2910
|
+
bindGroupLayouts: [collisionLayout]
|
|
2911
|
+
}),
|
|
2912
|
+
compute: {
|
|
2913
|
+
module: shaderModule,
|
|
2914
|
+
entryPoint: "gridCollision"
|
|
2915
|
+
}
|
|
2916
|
+
});
|
|
2917
|
+
}
|
|
2918
|
+
/**
|
|
2919
|
+
* Create bind group layout for clear pass
|
|
2920
|
+
*/
|
|
2921
|
+
createClearBindGroupLayout() {
|
|
2922
|
+
return this.device.createBindGroupLayout({
|
|
2923
|
+
label: "grid-clear-layout",
|
|
2924
|
+
entries: [
|
|
2925
|
+
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform" } },
|
|
2926
|
+
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
|
|
2927
|
+
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }
|
|
2928
|
+
]
|
|
2929
|
+
});
|
|
2930
|
+
}
|
|
2931
|
+
/**
|
|
2932
|
+
* Create bind group layout for build pass
|
|
2933
|
+
*/
|
|
2934
|
+
createBuildBindGroupLayout() {
|
|
2935
|
+
return this.device.createBindGroupLayout({
|
|
2936
|
+
label: "grid-build-layout",
|
|
2937
|
+
entries: [
|
|
2938
|
+
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform" } },
|
|
2939
|
+
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
|
2940
|
+
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
|
|
2941
|
+
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }
|
|
2942
|
+
]
|
|
2943
|
+
});
|
|
2944
|
+
}
|
|
2945
|
+
/**
|
|
2946
|
+
* Create bind group layout for collision pass
|
|
2947
|
+
*/
|
|
2948
|
+
createCollisionBindGroupLayout() {
|
|
2949
|
+
return this.device.createBindGroupLayout({
|
|
2950
|
+
label: "grid-collision-layout",
|
|
2951
|
+
entries: [
|
|
2952
|
+
{ binding: 0, visibility: GPUShaderStage.COMPUTE, buffer: { type: "uniform" } },
|
|
2953
|
+
{ binding: 1, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
|
2954
|
+
{ binding: 2, visibility: GPUShaderStage.COMPUTE, buffer: { type: "read-only-storage" } },
|
|
2955
|
+
{ binding: 3, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
|
|
2956
|
+
{ binding: 4, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
|
|
2957
|
+
{ binding: 5, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } },
|
|
2958
|
+
{ binding: 6, visibility: GPUShaderStage.COMPUTE, buffer: { type: "storage" } }
|
|
2959
|
+
]
|
|
2960
|
+
});
|
|
2961
|
+
}
|
|
2962
|
+
/**
|
|
2963
|
+
* Clear grid counters (run before buildGrid each frame)
|
|
2964
|
+
*/
|
|
2965
|
+
clearGrid(commandEncoder) {
|
|
2966
|
+
if (!this.clearPipeline || !this.uniformBuffer || !this.gridCellStartBuffer || !this.gridCellEndBuffer) {
|
|
2967
|
+
throw new Error("Spatial grid not initialized");
|
|
2968
|
+
}
|
|
2969
|
+
if (!this.clearBindGroup) {
|
|
2970
|
+
this.clearBindGroup = this.device.createBindGroup({
|
|
2971
|
+
layout: this.clearPipeline.getBindGroupLayout(0),
|
|
2972
|
+
entries: [
|
|
2973
|
+
{ binding: 0, resource: { buffer: this.uniformBuffer } },
|
|
2974
|
+
{ binding: 3, resource: { buffer: this.gridCellStartBuffer } },
|
|
2975
|
+
{ binding: 4, resource: { buffer: this.gridCellEndBuffer } }
|
|
2976
|
+
]
|
|
2977
|
+
});
|
|
2978
|
+
}
|
|
2979
|
+
const encoder = commandEncoder ?? this.device.createCommandEncoder({ label: "grid-clear-encoder" });
|
|
2980
|
+
const pass = encoder.beginComputePass({ label: "grid-clear-pass" });
|
|
2981
|
+
pass.setPipeline(this.clearPipeline);
|
|
2982
|
+
pass.setBindGroup(0, this.clearBindGroup);
|
|
2983
|
+
const workgroups = Math.ceil(this.totalCells / 256);
|
|
2984
|
+
pass.dispatchWorkgroups(workgroups, 1, 1);
|
|
2985
|
+
pass.end();
|
|
2986
|
+
return encoder;
|
|
2987
|
+
}
|
|
2988
|
+
/**
|
|
2989
|
+
* Build spatial grid from particle positions
|
|
2990
|
+
*/
|
|
2991
|
+
buildGrid(positionBuffer, commandEncoder) {
|
|
2992
|
+
if (!this.buildPipeline || !this.uniformBuffer || !this.gridCellEndBuffer || !this.gridParticleIndicesBuffer) {
|
|
2993
|
+
throw new Error("Spatial grid not initialized");
|
|
2994
|
+
}
|
|
2995
|
+
if (!this.buildBindGroup) {
|
|
2996
|
+
this.buildBindGroup = this.device.createBindGroup({
|
|
2997
|
+
layout: this.buildPipeline.getBindGroupLayout(0),
|
|
2998
|
+
entries: [
|
|
2999
|
+
{ binding: 0, resource: { buffer: this.uniformBuffer } },
|
|
3000
|
+
{ binding: 1, resource: { buffer: positionBuffer } },
|
|
3001
|
+
{ binding: 4, resource: { buffer: this.gridCellEndBuffer } },
|
|
3002
|
+
{ binding: 5, resource: { buffer: this.gridParticleIndicesBuffer } }
|
|
3003
|
+
]
|
|
3004
|
+
});
|
|
3005
|
+
}
|
|
3006
|
+
const encoder = commandEncoder ?? this.device.createCommandEncoder({ label: "grid-build-encoder" });
|
|
3007
|
+
const pass = encoder.beginComputePass({ label: "grid-build-pass" });
|
|
3008
|
+
pass.setPipeline(this.buildPipeline);
|
|
3009
|
+
pass.setBindGroup(0, this.buildBindGroup);
|
|
3010
|
+
const workgroups = Math.ceil(this.particleCount / 256);
|
|
3011
|
+
pass.dispatchWorkgroups(workgroups, 1, 1);
|
|
3012
|
+
pass.end();
|
|
3013
|
+
return encoder;
|
|
3014
|
+
}
|
|
3015
|
+
/**
|
|
3016
|
+
* Detect collisions using spatial grid
|
|
3017
|
+
*/
|
|
3018
|
+
detectCollisions(positionBuffer, velocityBuffer, commandEncoder) {
|
|
3019
|
+
if (!this.collisionPipeline || !this.uniformBuffer || !this.collisionForcesBuffer) {
|
|
3020
|
+
throw new Error("Spatial grid not initialized");
|
|
3021
|
+
}
|
|
3022
|
+
if (!this.collisionBindGroup) {
|
|
3023
|
+
this.collisionBindGroup = this.device.createBindGroup({
|
|
3024
|
+
layout: this.collisionPipeline.getBindGroupLayout(0),
|
|
3025
|
+
entries: [
|
|
3026
|
+
{ binding: 0, resource: { buffer: this.uniformBuffer } },
|
|
3027
|
+
{ binding: 1, resource: { buffer: positionBuffer } },
|
|
3028
|
+
{ binding: 2, resource: { buffer: velocityBuffer } },
|
|
3029
|
+
{ binding: 3, resource: { buffer: this.gridCellStartBuffer } },
|
|
3030
|
+
{ binding: 4, resource: { buffer: this.gridCellEndBuffer } },
|
|
3031
|
+
{ binding: 5, resource: { buffer: this.gridParticleIndicesBuffer } },
|
|
3032
|
+
{ binding: 6, resource: { buffer: this.collisionForcesBuffer } }
|
|
3033
|
+
]
|
|
3034
|
+
});
|
|
3035
|
+
}
|
|
3036
|
+
const encoder = commandEncoder ?? this.device.createCommandEncoder({ label: "grid-collision-encoder" });
|
|
3037
|
+
const pass = encoder.beginComputePass({ label: "grid-collision-pass" });
|
|
3038
|
+
pass.setPipeline(this.collisionPipeline);
|
|
3039
|
+
pass.setBindGroup(0, this.collisionBindGroup);
|
|
3040
|
+
const workgroups = Math.ceil(this.particleCount / 256);
|
|
3041
|
+
pass.dispatchWorkgroups(workgroups, 1, 1);
|
|
3042
|
+
pass.end();
|
|
3043
|
+
return encoder;
|
|
3044
|
+
}
|
|
3045
|
+
/**
|
|
3046
|
+
* Execute full collision detection pipeline
|
|
3047
|
+
*
|
|
3048
|
+
* Convenience method that runs all three passes: clear → build → detect
|
|
3049
|
+
*/
|
|
3050
|
+
async execute(positionBuffer, velocityBuffer) {
|
|
3051
|
+
const encoder = this.device.createCommandEncoder({ label: "spatial-grid-full-encoder" });
|
|
3052
|
+
this.clearGrid(encoder);
|
|
3053
|
+
this.buildGrid(positionBuffer, encoder);
|
|
3054
|
+
this.detectCollisions(positionBuffer, velocityBuffer, encoder);
|
|
3055
|
+
this.device.queue.submit([encoder.finish()]);
|
|
3056
|
+
return await this.downloadCollisionForces();
|
|
3057
|
+
}
|
|
3058
|
+
/**
|
|
3059
|
+
* Download collision forces from GPU
|
|
3060
|
+
*/
|
|
3061
|
+
async downloadCollisionForces() {
|
|
3062
|
+
if (!this.collisionForcesBuffer) {
|
|
3063
|
+
throw new Error("Collision forces buffer not initialized");
|
|
3064
|
+
}
|
|
3065
|
+
const stagingBuffer = this.device.createBuffer({
|
|
3066
|
+
size: this.collisionForcesBuffer.size,
|
|
3067
|
+
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
|
|
3068
|
+
});
|
|
3069
|
+
const encoder = this.device.createCommandEncoder({ label: "collision-forces-readback" });
|
|
3070
|
+
encoder.copyBufferToBuffer(
|
|
3071
|
+
this.collisionForcesBuffer,
|
|
3072
|
+
0,
|
|
3073
|
+
stagingBuffer,
|
|
3074
|
+
0,
|
|
3075
|
+
this.collisionForcesBuffer.size
|
|
3076
|
+
);
|
|
3077
|
+
this.device.queue.submit([encoder.finish()]);
|
|
3078
|
+
await stagingBuffer.mapAsync(GPUMapMode.READ);
|
|
3079
|
+
const data = new Float32Array(stagingBuffer.getMappedRange()).slice();
|
|
3080
|
+
stagingBuffer.unmap();
|
|
3081
|
+
stagingBuffer.destroy();
|
|
3082
|
+
return data;
|
|
3083
|
+
}
|
|
3084
|
+
/**
|
|
3085
|
+
* Get collision forces buffer (for use in other compute passes)
|
|
3086
|
+
*/
|
|
3087
|
+
getCollisionForcesBuffer() {
|
|
3088
|
+
if (!this.collisionForcesBuffer) {
|
|
3089
|
+
throw new Error("Collision forces buffer not initialized");
|
|
3090
|
+
}
|
|
3091
|
+
return this.collisionForcesBuffer;
|
|
3092
|
+
}
|
|
3093
|
+
/**
|
|
3094
|
+
* Calculate memory usage
|
|
3095
|
+
*/
|
|
3096
|
+
calculateMemoryUsage() {
|
|
3097
|
+
const cellCounters = this.totalCells * 4 * 2;
|
|
3098
|
+
const indices = this.totalCells * this.options.maxParticlesPerCell * 4;
|
|
3099
|
+
const forces = this.particleCount * 4 * 4;
|
|
3100
|
+
const total = (cellCounters + indices + forces) / 1024 / 1024;
|
|
3101
|
+
return `${total.toFixed(2)} MB`;
|
|
3102
|
+
}
|
|
3103
|
+
/**
|
|
3104
|
+
* Get grid statistics
|
|
3105
|
+
*/
|
|
3106
|
+
getStats() {
|
|
3107
|
+
return {
|
|
3108
|
+
cellSize: this.options.cellSize,
|
|
3109
|
+
gridDimensions: this.options.gridDimensions,
|
|
3110
|
+
totalCells: this.totalCells,
|
|
3111
|
+
maxParticlesPerCell: this.options.maxParticlesPerCell,
|
|
3112
|
+
memoryUsage: this.calculateMemoryUsage()
|
|
3113
|
+
};
|
|
3114
|
+
}
|
|
3115
|
+
/**
|
|
3116
|
+
* Cleanup resources
|
|
3117
|
+
*/
|
|
3118
|
+
destroy() {
|
|
3119
|
+
this.uniformBuffer?.destroy();
|
|
3120
|
+
this.gridCellStartBuffer?.destroy();
|
|
3121
|
+
this.gridCellEndBuffer?.destroy();
|
|
3122
|
+
this.gridParticleIndicesBuffer?.destroy();
|
|
3123
|
+
this.collisionForcesBuffer?.destroy();
|
|
3124
|
+
this.clearPipeline = null;
|
|
3125
|
+
this.buildPipeline = null;
|
|
3126
|
+
this.collisionPipeline = null;
|
|
3127
|
+
this.clearBindGroup = null;
|
|
3128
|
+
this.buildBindGroup = null;
|
|
3129
|
+
this.collisionBindGroup = null;
|
|
3130
|
+
}
|
|
3131
|
+
};
|
|
3132
|
+
|
|
3133
|
+
// src/gpu/index.ts
|
|
3134
|
+
init_WebGPUContext();
|
|
3135
|
+
|
|
3136
|
+
// src/gpu/codecs/IGaussianCodec.ts
|
|
3137
|
+
var GaussianCodecError = class extends Error {
|
|
3138
|
+
constructor(message, codecId, operation, cause) {
|
|
3139
|
+
super(`[${codecId}] ${operation}: ${message}`);
|
|
3140
|
+
this.codecId = codecId;
|
|
3141
|
+
this.operation = operation;
|
|
3142
|
+
this.cause = cause;
|
|
3143
|
+
this.name = "GaussianCodecError";
|
|
3144
|
+
}
|
|
3145
|
+
codecId;
|
|
3146
|
+
operation;
|
|
3147
|
+
cause;
|
|
3148
|
+
};
|
|
3149
|
+
var CodecNotSupportedError = class extends GaussianCodecError {
|
|
3150
|
+
constructor(codecId, operation) {
|
|
3151
|
+
super(
|
|
3152
|
+
`Operation '${operation}' is not supported by this codec. Check capabilities before calling.`,
|
|
3153
|
+
codecId,
|
|
3154
|
+
operation
|
|
3155
|
+
);
|
|
3156
|
+
this.name = "CodecNotSupportedError";
|
|
3157
|
+
}
|
|
3158
|
+
};
|
|
3159
|
+
var CodecDecodeError = class extends GaussianCodecError {
|
|
3160
|
+
constructor(codecId, message, cause) {
|
|
3161
|
+
super(message, codecId, "decode", cause);
|
|
3162
|
+
this.name = "CodecDecodeError";
|
|
3163
|
+
}
|
|
3164
|
+
};
|
|
3165
|
+
var CodecEncodeError = class extends GaussianCodecError {
|
|
3166
|
+
constructor(codecId, message, cause) {
|
|
3167
|
+
super(message, codecId, "encode", cause);
|
|
3168
|
+
this.name = "CodecEncodeError";
|
|
3169
|
+
}
|
|
3170
|
+
};
|
|
3171
|
+
var CodecMemoryError = class extends GaussianCodecError {
|
|
3172
|
+
constructor(codecId, requiredMB, budgetMB) {
|
|
3173
|
+
super(
|
|
3174
|
+
`Memory budget exceeded: operation requires ~${requiredMB.toFixed(1)} MB but budget is ${budgetMB} MB`,
|
|
3175
|
+
codecId,
|
|
3176
|
+
"decode"
|
|
3177
|
+
);
|
|
3178
|
+
this.requiredMB = requiredMB;
|
|
3179
|
+
this.budgetMB = budgetMB;
|
|
3180
|
+
this.name = "CodecMemoryError";
|
|
3181
|
+
}
|
|
3182
|
+
requiredMB;
|
|
3183
|
+
budgetMB;
|
|
3184
|
+
};
|
|
3185
|
+
var CodecDecompressError = class extends GaussianCodecError {
|
|
3186
|
+
constructor(codecId, message, cause) {
|
|
3187
|
+
super(message, codecId, "decompress", cause);
|
|
3188
|
+
this.name = "CodecDecompressError";
|
|
3189
|
+
}
|
|
3190
|
+
};
|
|
3191
|
+
var AbstractGaussianCodec = class {
|
|
3192
|
+
initialized = false;
|
|
3193
|
+
// Default implementations for optional operations:
|
|
3194
|
+
async encode(_data, _options) {
|
|
3195
|
+
throw new CodecNotSupportedError(this.getCapabilities().id, "encode");
|
|
3196
|
+
}
|
|
3197
|
+
async *stream(_source, _options) {
|
|
3198
|
+
throw new CodecNotSupportedError(this.getCapabilities().id, "stream");
|
|
3199
|
+
}
|
|
3200
|
+
async decompress(_compressed) {
|
|
3201
|
+
throw new CodecNotSupportedError(this.getCapabilities().id, "decompress");
|
|
3202
|
+
}
|
|
3203
|
+
async initialize() {
|
|
3204
|
+
this.initialized = true;
|
|
3205
|
+
}
|
|
3206
|
+
dispose() {
|
|
3207
|
+
this.initialized = false;
|
|
3208
|
+
}
|
|
3209
|
+
/**
|
|
3210
|
+
* Estimate memory footprint in MB for a given Gaussian count.
|
|
3211
|
+
* Used by decode() to check memory budgets.
|
|
3212
|
+
*/
|
|
3213
|
+
estimateMemoryMB(gaussianCount) {
|
|
3214
|
+
const bytesPerGaussian = 15 * 4;
|
|
3215
|
+
return gaussianCount * bytesPerGaussian / (1024 * 1024);
|
|
3216
|
+
}
|
|
3217
|
+
/**
|
|
3218
|
+
* Check if a decode operation would exceed the memory budget.
|
|
3219
|
+
* @throws CodecMemoryError if budget would be exceeded
|
|
3220
|
+
*/
|
|
3221
|
+
checkMemoryBudget(gaussianCount, maxMemoryMB) {
|
|
3222
|
+
const requiredMB = this.estimateMemoryMB(gaussianCount);
|
|
3223
|
+
if (requiredMB > maxMemoryMB) {
|
|
3224
|
+
throw new CodecMemoryError(this.getCapabilities().id, requiredMB, maxMemoryMB);
|
|
3225
|
+
}
|
|
3226
|
+
}
|
|
3227
|
+
};
|
|
3228
|
+
|
|
3229
|
+
// src/gpu/codecs/SpzCodec.ts
|
|
3230
|
+
var SPZ_MAGIC = 1347635022;
|
|
3231
|
+
var SPZ_HEADER_SIZE = 16;
|
|
3232
|
+
var SPZ_MAX_POINTS = 1e7;
|
|
3233
|
+
var DEFAULT_MAX_MEMORY_MB = 512;
|
|
3234
|
+
var SPZ_COLOR_SCALE = 0.15;
|
|
3235
|
+
var SH_C0 = 0.2820948;
|
|
3236
|
+
var SQRT1_2 = Math.SQRT1_2;
|
|
3237
|
+
var GZIP_MAGIC_0 = 31;
|
|
3238
|
+
var GZIP_MAGIC_1 = 139;
|
|
3239
|
+
var SpzCodec = class extends AbstractGaussianCodec {
|
|
3240
|
+
codecId;
|
|
3241
|
+
constructor() {
|
|
3242
|
+
super();
|
|
3243
|
+
this.codecId = "khr.spz.v2";
|
|
3244
|
+
}
|
|
3245
|
+
// ─── Capabilities ─────────────────────────────────────────────────────────
|
|
3246
|
+
getCapabilities() {
|
|
3247
|
+
return {
|
|
3248
|
+
id: this.codecId,
|
|
3249
|
+
name: "Niantic SPZ Gaussian Splat Codec",
|
|
3250
|
+
version: "1.0.0",
|
|
3251
|
+
fileExtensions: ["spz"],
|
|
3252
|
+
mimeTypes: ["application/x-spz", "application/gzip"],
|
|
3253
|
+
canEncode: true,
|
|
3254
|
+
canDecode: true,
|
|
3255
|
+
canStream: true,
|
|
3256
|
+
canDecodeTemporal: false,
|
|
3257
|
+
maxSHDegree: 3,
|
|
3258
|
+
maxGaussianCount: SPZ_MAX_POINTS,
|
|
3259
|
+
requiresWasm: false,
|
|
3260
|
+
requiresWebGPU: false,
|
|
3261
|
+
standard: "khronos",
|
|
3262
|
+
maturity: "production"
|
|
3263
|
+
};
|
|
3264
|
+
}
|
|
3265
|
+
// ─── Probe ────────────────────────────────────────────────────────────────
|
|
3266
|
+
canDecode(buffer) {
|
|
3267
|
+
if (buffer.byteLength < 2) return false;
|
|
3268
|
+
const bytes = new Uint8Array(buffer, 0, 2);
|
|
3269
|
+
return bytes[0] === GZIP_MAGIC_0 && bytes[1] === GZIP_MAGIC_1;
|
|
3270
|
+
}
|
|
3271
|
+
// ─── Decompress ───────────────────────────────────────────────────────────
|
|
3272
|
+
async decompress(compressed) {
|
|
3273
|
+
try {
|
|
3274
|
+
return await decompressGzip(compressed);
|
|
3275
|
+
} catch (err) {
|
|
3276
|
+
throw new CodecDecompressError(
|
|
3277
|
+
this.codecId,
|
|
3278
|
+
"Gzip decompression failed. Ensure the data is a valid SPZ file.",
|
|
3279
|
+
err instanceof Error ? err : void 0
|
|
3280
|
+
);
|
|
3281
|
+
}
|
|
3282
|
+
}
|
|
3283
|
+
// ─── Extract Metadata ─────────────────────────────────────────────────────
|
|
3284
|
+
async extractMetadata(buffer) {
|
|
3285
|
+
const raw = await this.decompress(buffer);
|
|
3286
|
+
const header = parseSpzHeader(new DataView(raw));
|
|
3287
|
+
validateSpzHeader(header, this.codecId);
|
|
3288
|
+
const isV3 = header.version >= 3;
|
|
3289
|
+
const rotBytes = isV3 ? 4 : 3;
|
|
3290
|
+
const shDim = shDimForDegree(header.shDegree);
|
|
3291
|
+
const uncompressedSize = SPZ_HEADER_SIZE + header.numPoints * 9 + // positions
|
|
3292
|
+
header.numPoints + // alphas
|
|
3293
|
+
header.numPoints * 3 + // colors
|
|
3294
|
+
header.numPoints * 3 + // scales
|
|
3295
|
+
header.numPoints * rotBytes + // rotations
|
|
3296
|
+
header.numPoints * shDim * 3;
|
|
3297
|
+
return {
|
|
3298
|
+
version: header.version,
|
|
3299
|
+
gaussianCount: header.numPoints,
|
|
3300
|
+
shDegree: header.shDegree,
|
|
3301
|
+
compressedSizeBytes: buffer.byteLength,
|
|
3302
|
+
uncompressedSizeBytes: uncompressedSize,
|
|
3303
|
+
compressionRatio: buffer.byteLength / uncompressedSize,
|
|
3304
|
+
antialiased: (header.flags & 1) !== 0
|
|
3305
|
+
};
|
|
3306
|
+
}
|
|
3307
|
+
// ─── Decode ───────────────────────────────────────────────────────────────
|
|
3308
|
+
async decode(buffer, options) {
|
|
3309
|
+
const startTime = performance.now();
|
|
3310
|
+
const warnings = [];
|
|
3311
|
+
const maxGaussians = options?.maxGaussians ?? SPZ_MAX_POINTS;
|
|
3312
|
+
const maxMemoryMB = options?.maxMemoryMB ?? DEFAULT_MAX_MEMORY_MB;
|
|
3313
|
+
const decodeSH = options?.decodeSH ?? true;
|
|
3314
|
+
const alphaThreshold = options?.alphaThreshold ?? 0;
|
|
3315
|
+
const raw = await this.decompress(buffer);
|
|
3316
|
+
const data = new Uint8Array(raw);
|
|
3317
|
+
const view = new DataView(raw);
|
|
3318
|
+
const header = parseSpzHeader(view);
|
|
3319
|
+
validateSpzHeader(header, this.codecId);
|
|
3320
|
+
const N = Math.min(header.numPoints, maxGaussians);
|
|
3321
|
+
if (N < header.numPoints) {
|
|
3322
|
+
warnings.push(
|
|
3323
|
+
`Clamped Gaussian count from ${header.numPoints.toLocaleString()} to ${N.toLocaleString()} (maxGaussians limit)`
|
|
3324
|
+
);
|
|
3325
|
+
}
|
|
3326
|
+
this.checkMemoryBudget(N, maxMemoryMB);
|
|
3327
|
+
const isV3 = header.version >= 3;
|
|
3328
|
+
const rotBytes = isV3 ? 4 : 3;
|
|
3329
|
+
const shDim = decodeSH ? shDimForDegree(header.shDegree) : 0;
|
|
3330
|
+
const posScale = 1 / (1 << header.fractionalBits);
|
|
3331
|
+
const posStart = SPZ_HEADER_SIZE;
|
|
3332
|
+
const alphaStart = posStart + header.numPoints * 9;
|
|
3333
|
+
const colorStart = alphaStart + header.numPoints;
|
|
3334
|
+
const scaleStart = colorStart + header.numPoints * 3;
|
|
3335
|
+
const rotStart = scaleStart + header.numPoints * 3;
|
|
3336
|
+
const shStart = rotStart + header.numPoints * rotBytes;
|
|
3337
|
+
const expectedSize = shStart + header.numPoints * shDimForDegree(header.shDegree) * 3;
|
|
3338
|
+
if (data.length < expectedSize) {
|
|
3339
|
+
throw new CodecDecodeError(
|
|
3340
|
+
this.codecId,
|
|
3341
|
+
`SPZ buffer too short: ${data.length} bytes, expected at least ${expectedSize} bytes for ${header.numPoints} points with SH degree ${header.shDegree}`
|
|
3342
|
+
);
|
|
3343
|
+
}
|
|
3344
|
+
const positions = new Float32Array(N * 3);
|
|
3345
|
+
const scales = new Float32Array(N * 3);
|
|
3346
|
+
const rotations = new Float32Array(N * 4);
|
|
3347
|
+
const colors = new Float32Array(N * 4);
|
|
3348
|
+
const opacities = new Float32Array(N);
|
|
3349
|
+
let shCoefficients;
|
|
3350
|
+
if (shDim > 0) {
|
|
3351
|
+
shCoefficients = new Float32Array(N * shDim * 3);
|
|
3352
|
+
}
|
|
3353
|
+
for (let i = 0; i < N; i++) {
|
|
3354
|
+
const pOff = posStart + i * 9;
|
|
3355
|
+
for (let c = 0; c < 3; c++) {
|
|
3356
|
+
const byteOff = pOff + c * 3;
|
|
3357
|
+
let fixed32 = data[byteOff] | data[byteOff + 1] << 8 | data[byteOff + 2] << 16;
|
|
3358
|
+
if (fixed32 & 8388608) fixed32 |= 4278190080;
|
|
3359
|
+
fixed32 = fixed32 | 0;
|
|
3360
|
+
positions[i * 3 + c] = fixed32 * posScale;
|
|
3361
|
+
}
|
|
3362
|
+
}
|
|
3363
|
+
for (let i = 0; i < N; i++) {
|
|
3364
|
+
const rawAlpha = data[alphaStart + i] / 255;
|
|
3365
|
+
opacities[i] = rawAlpha;
|
|
3366
|
+
colors[i * 4 + 3] = rawAlpha;
|
|
3367
|
+
}
|
|
3368
|
+
for (let i = 0; i < N; i++) {
|
|
3369
|
+
const cOff = colorStart + i * 3;
|
|
3370
|
+
for (let c = 0; c < 3; c++) {
|
|
3371
|
+
const normalized = data[cOff + c] / 255;
|
|
3372
|
+
const shCoeff = (normalized - 0.5) / SPZ_COLOR_SCALE;
|
|
3373
|
+
const rgb = 0.5 + SH_C0 * shCoeff;
|
|
3374
|
+
colors[i * 4 + c] = Math.max(0, Math.min(1, rgb));
|
|
3375
|
+
}
|
|
3376
|
+
}
|
|
3377
|
+
for (let i = 0; i < N; i++) {
|
|
3378
|
+
const sOff = scaleStart + i * 3;
|
|
3379
|
+
for (let c = 0; c < 3; c++) {
|
|
3380
|
+
const logScale = data[sOff + c] / 16 - 10;
|
|
3381
|
+
scales[i * 3 + c] = Math.exp(logScale);
|
|
3382
|
+
}
|
|
3383
|
+
}
|
|
3384
|
+
for (let i = 0; i < N; i++) {
|
|
3385
|
+
const rOff = rotStart + i * rotBytes;
|
|
3386
|
+
let quat;
|
|
3387
|
+
if (isV3) {
|
|
3388
|
+
quat = decodeQuaternionV3(data, rOff);
|
|
3389
|
+
} else {
|
|
3390
|
+
quat = decodeQuaternionV2(data, rOff);
|
|
3391
|
+
}
|
|
3392
|
+
rotations[i * 4] = quat[0];
|
|
3393
|
+
rotations[i * 4 + 1] = quat[1];
|
|
3394
|
+
rotations[i * 4 + 2] = quat[2];
|
|
3395
|
+
rotations[i * 4 + 3] = quat[3];
|
|
3396
|
+
}
|
|
3397
|
+
if (shCoefficients && shDim > 0) {
|
|
3398
|
+
for (let i = 0; i < N; i++) {
|
|
3399
|
+
for (let s = 0; s < shDim; s++) {
|
|
3400
|
+
const off = shStart + (i * shDim + s) * 3;
|
|
3401
|
+
for (let c = 0; c < 3; c++) {
|
|
3402
|
+
const raw2 = data[off + c];
|
|
3403
|
+
const signed = raw2 > 127 ? raw2 - 256 : raw2;
|
|
3404
|
+
shCoefficients[(i * shDim + s) * 3 + c] = signed / 128 * SPZ_COLOR_SCALE;
|
|
3405
|
+
}
|
|
3406
|
+
}
|
|
3407
|
+
}
|
|
3408
|
+
}
|
|
3409
|
+
let finalCount = N;
|
|
3410
|
+
if (alphaThreshold > 0) {
|
|
3411
|
+
let writeIdx = 0;
|
|
3412
|
+
for (let i = 0; i < N; i++) {
|
|
3413
|
+
if (opacities[i] >= alphaThreshold) {
|
|
3414
|
+
if (writeIdx !== i) {
|
|
3415
|
+
positions[writeIdx * 3] = positions[i * 3];
|
|
3416
|
+
positions[writeIdx * 3 + 1] = positions[i * 3 + 1];
|
|
3417
|
+
positions[writeIdx * 3 + 2] = positions[i * 3 + 2];
|
|
3418
|
+
scales[writeIdx * 3] = scales[i * 3];
|
|
3419
|
+
scales[writeIdx * 3 + 1] = scales[i * 3 + 1];
|
|
3420
|
+
scales[writeIdx * 3 + 2] = scales[i * 3 + 2];
|
|
3421
|
+
rotations[writeIdx * 4] = rotations[i * 4];
|
|
3422
|
+
rotations[writeIdx * 4 + 1] = rotations[i * 4 + 1];
|
|
3423
|
+
rotations[writeIdx * 4 + 2] = rotations[i * 4 + 2];
|
|
3424
|
+
rotations[writeIdx * 4 + 3] = rotations[i * 4 + 3];
|
|
3425
|
+
colors[writeIdx * 4] = colors[i * 4];
|
|
3426
|
+
colors[writeIdx * 4 + 1] = colors[i * 4 + 1];
|
|
3427
|
+
colors[writeIdx * 4 + 2] = colors[i * 4 + 2];
|
|
3428
|
+
colors[writeIdx * 4 + 3] = colors[i * 4 + 3];
|
|
3429
|
+
opacities[writeIdx] = opacities[i];
|
|
3430
|
+
}
|
|
3431
|
+
writeIdx++;
|
|
3432
|
+
}
|
|
3433
|
+
}
|
|
3434
|
+
finalCount = writeIdx;
|
|
3435
|
+
if (finalCount < N) {
|
|
3436
|
+
warnings.push(
|
|
3437
|
+
`Filtered ${N - finalCount} Gaussians below alpha threshold ${alphaThreshold}`
|
|
3438
|
+
);
|
|
3439
|
+
}
|
|
3440
|
+
}
|
|
3441
|
+
const result = {
|
|
3442
|
+
positions: finalCount < N ? positions.slice(0, finalCount * 3) : positions,
|
|
3443
|
+
scales: finalCount < N ? scales.slice(0, finalCount * 3) : scales,
|
|
3444
|
+
rotations: finalCount < N ? rotations.slice(0, finalCount * 4) : rotations,
|
|
3445
|
+
colors: finalCount < N ? colors.slice(0, finalCount * 4) : colors,
|
|
3446
|
+
opacities: finalCount < N ? opacities.slice(0, finalCount) : opacities,
|
|
3447
|
+
shCoefficients: shCoefficients ? finalCount < N ? shCoefficients.slice(0, finalCount * shDim * 3) : shCoefficients : void 0,
|
|
3448
|
+
shDegree: decodeSH ? header.shDegree : 0,
|
|
3449
|
+
count: finalCount
|
|
3450
|
+
};
|
|
3451
|
+
const durationMs = performance.now() - startTime;
|
|
3452
|
+
return {
|
|
3453
|
+
data: result,
|
|
3454
|
+
durationMs,
|
|
3455
|
+
warnings
|
|
3456
|
+
};
|
|
3457
|
+
}
|
|
3458
|
+
// ─── Encode ───────────────────────────────────────────────────────────────
|
|
3459
|
+
async encode(data, options) {
|
|
3460
|
+
const startTime = performance.now();
|
|
3461
|
+
const warnings = [];
|
|
3462
|
+
const shDegree = options?.shDegree ?? data.shDegree;
|
|
3463
|
+
const fractionalBits = options?.fractionalBits ?? 12;
|
|
3464
|
+
const antialiased = options?.antialiased ?? false;
|
|
3465
|
+
const encodingVersion = options?.encodingVersion ?? 3;
|
|
3466
|
+
const N = data.count;
|
|
3467
|
+
if (N > SPZ_MAX_POINTS) {
|
|
3468
|
+
throw new CodecEncodeError(
|
|
3469
|
+
this.codecId,
|
|
3470
|
+
`Cannot encode ${N.toLocaleString()} Gaussians: exceeds maximum of ${SPZ_MAX_POINTS.toLocaleString()}`
|
|
3471
|
+
);
|
|
3472
|
+
}
|
|
3473
|
+
const isV3 = encodingVersion >= 3;
|
|
3474
|
+
const shDim = shDimForDegree(shDegree);
|
|
3475
|
+
const rotBytes = isV3 ? 4 : 3;
|
|
3476
|
+
const payloadSize = SPZ_HEADER_SIZE + N * 9 + // positions
|
|
3477
|
+
N + // alphas
|
|
3478
|
+
N * 3 + // colors
|
|
3479
|
+
N * 3 + // scales
|
|
3480
|
+
N * rotBytes + // rotations
|
|
3481
|
+
N * shDim * 3;
|
|
3482
|
+
const buffer = new ArrayBuffer(payloadSize);
|
|
3483
|
+
const out = new Uint8Array(buffer);
|
|
3484
|
+
const outView = new DataView(buffer);
|
|
3485
|
+
outView.setUint32(0, SPZ_MAGIC, true);
|
|
3486
|
+
outView.setUint32(4, encodingVersion, true);
|
|
3487
|
+
outView.setUint32(8, N, true);
|
|
3488
|
+
out[12] = shDegree;
|
|
3489
|
+
out[13] = fractionalBits;
|
|
3490
|
+
out[14] = antialiased ? 1 : 0;
|
|
3491
|
+
out[15] = 0;
|
|
3492
|
+
const posScale = 1 << fractionalBits;
|
|
3493
|
+
const posStart = SPZ_HEADER_SIZE;
|
|
3494
|
+
for (let i = 0; i < N; i++) {
|
|
3495
|
+
const pOff = posStart + i * 9;
|
|
3496
|
+
for (let c = 0; c < 3; c++) {
|
|
3497
|
+
const fixed = Math.round(data.positions[i * 3 + c] * posScale);
|
|
3498
|
+
const clamped = Math.max(-8388608, Math.min(8388607, fixed));
|
|
3499
|
+
const unsigned = clamped & 16777215;
|
|
3500
|
+
const byteOff = pOff + c * 3;
|
|
3501
|
+
out[byteOff] = unsigned & 255;
|
|
3502
|
+
out[byteOff + 1] = unsigned >> 8 & 255;
|
|
3503
|
+
out[byteOff + 2] = unsigned >> 16 & 255;
|
|
3504
|
+
}
|
|
3505
|
+
}
|
|
3506
|
+
const alphaStart = posStart + N * 9;
|
|
3507
|
+
for (let i = 0; i < N; i++) {
|
|
3508
|
+
out[alphaStart + i] = Math.round(Math.max(0, Math.min(1, data.opacities[i])) * 255);
|
|
3509
|
+
}
|
|
3510
|
+
const colorStart = alphaStart + N;
|
|
3511
|
+
for (let i = 0; i < N; i++) {
|
|
3512
|
+
const cOff = colorStart + i * 3;
|
|
3513
|
+
for (let c = 0; c < 3; c++) {
|
|
3514
|
+
const rgb = data.colors[i * 4 + c];
|
|
3515
|
+
const shCoeff = (rgb - 0.5) / SH_C0;
|
|
3516
|
+
const normalized = shCoeff * SPZ_COLOR_SCALE + 0.5;
|
|
3517
|
+
out[cOff + c] = Math.round(Math.max(0, Math.min(1, normalized)) * 255);
|
|
3518
|
+
}
|
|
3519
|
+
}
|
|
3520
|
+
const scaleStart = colorStart + N * 3;
|
|
3521
|
+
for (let i = 0; i < N; i++) {
|
|
3522
|
+
const sOff = scaleStart + i * 3;
|
|
3523
|
+
for (let c = 0; c < 3; c++) {
|
|
3524
|
+
const scale = data.scales[i * 3 + c];
|
|
3525
|
+
const logScale = Math.log(Math.max(1e-10, scale));
|
|
3526
|
+
const encoded2 = Math.round((logScale + 10) * 16);
|
|
3527
|
+
out[sOff + c] = Math.max(0, Math.min(255, encoded2));
|
|
3528
|
+
}
|
|
3529
|
+
}
|
|
3530
|
+
const rotStart = scaleStart + N * 3;
|
|
3531
|
+
if (isV3) {
|
|
3532
|
+
for (let i = 0; i < N; i++) {
|
|
3533
|
+
const rOff = rotStart + i * 4;
|
|
3534
|
+
const packed = encodeQuaternionV3(
|
|
3535
|
+
data.rotations[i * 4],
|
|
3536
|
+
data.rotations[i * 4 + 1],
|
|
3537
|
+
data.rotations[i * 4 + 2],
|
|
3538
|
+
data.rotations[i * 4 + 3]
|
|
3539
|
+
);
|
|
3540
|
+
out[rOff] = packed & 255;
|
|
3541
|
+
out[rOff + 1] = packed >>> 8 & 255;
|
|
3542
|
+
out[rOff + 2] = packed >>> 16 & 255;
|
|
3543
|
+
out[rOff + 3] = packed >>> 24 & 255;
|
|
3544
|
+
}
|
|
3545
|
+
} else {
|
|
3546
|
+
for (let i = 0; i < N; i++) {
|
|
3547
|
+
const rOff = rotStart + i * 3;
|
|
3548
|
+
const x = data.rotations[i * 4];
|
|
3549
|
+
const y = data.rotations[i * 4 + 1];
|
|
3550
|
+
const z = data.rotations[i * 4 + 2];
|
|
3551
|
+
out[rOff] = Math.round(Math.max(0, Math.min(255, (x + 1) * 127.5)));
|
|
3552
|
+
out[rOff + 1] = Math.round(Math.max(0, Math.min(255, (y + 1) * 127.5)));
|
|
3553
|
+
out[rOff + 2] = Math.round(Math.max(0, Math.min(255, (z + 1) * 127.5)));
|
|
3554
|
+
}
|
|
3555
|
+
}
|
|
3556
|
+
if (shDim > 0 && data.shCoefficients) {
|
|
3557
|
+
const shStartOffset = rotStart + N * rotBytes;
|
|
3558
|
+
for (let i = 0; i < N; i++) {
|
|
3559
|
+
for (let s = 0; s < shDim; s++) {
|
|
3560
|
+
const off = shStartOffset + (i * shDim + s) * 3;
|
|
3561
|
+
for (let c = 0; c < 3; c++) {
|
|
3562
|
+
const coeff = data.shCoefficients[(i * shDim + s) * 3 + c];
|
|
3563
|
+
const scaled = coeff / SPZ_COLOR_SCALE * 128;
|
|
3564
|
+
const clamped = Math.round(Math.max(-128, Math.min(127, scaled)));
|
|
3565
|
+
out[off + c] = clamped < 0 ? clamped + 256 : clamped;
|
|
3566
|
+
}
|
|
3567
|
+
}
|
|
3568
|
+
}
|
|
3569
|
+
}
|
|
3570
|
+
const compressed = await compressGzip(buffer);
|
|
3571
|
+
const metadata = {
|
|
3572
|
+
version: encodingVersion,
|
|
3573
|
+
gaussianCount: N,
|
|
3574
|
+
shDegree,
|
|
3575
|
+
compressedSizeBytes: compressed.byteLength,
|
|
3576
|
+
uncompressedSizeBytes: payloadSize,
|
|
3577
|
+
compressionRatio: compressed.byteLength / payloadSize,
|
|
3578
|
+
antialiased
|
|
3579
|
+
};
|
|
3580
|
+
const encoded = {
|
|
3581
|
+
data: compressed,
|
|
3582
|
+
codecId: this.codecId,
|
|
3583
|
+
metadata
|
|
3584
|
+
};
|
|
3585
|
+
return {
|
|
3586
|
+
data: encoded,
|
|
3587
|
+
durationMs: performance.now() - startTime,
|
|
3588
|
+
warnings
|
|
3589
|
+
};
|
|
3590
|
+
}
|
|
3591
|
+
// ─── Stream Decode ────────────────────────────────────────────────────────
|
|
3592
|
+
async *stream(source, options) {
|
|
3593
|
+
const signal = options?.signal;
|
|
3594
|
+
let buffer;
|
|
3595
|
+
if (typeof source === "string") {
|
|
3596
|
+
const response = await fetch(source, { signal });
|
|
3597
|
+
if (!response.ok) {
|
|
3598
|
+
throw new CodecDecodeError(this.codecId, `HTTP ${response.status}: ${response.statusText}`);
|
|
3599
|
+
}
|
|
3600
|
+
const contentLength = parseInt(response.headers.get("content-length") ?? "0", 10);
|
|
3601
|
+
if (!response.body) {
|
|
3602
|
+
buffer = await response.arrayBuffer();
|
|
3603
|
+
options?.onProgress?.({
|
|
3604
|
+
bytesLoaded: buffer.byteLength,
|
|
3605
|
+
bytesTotal: buffer.byteLength,
|
|
3606
|
+
gaussiansDecoded: 0,
|
|
3607
|
+
gaussiansTotal: -1,
|
|
3608
|
+
phase: "downloading"
|
|
3609
|
+
});
|
|
3610
|
+
} else {
|
|
3611
|
+
const reader = response.body.getReader();
|
|
3612
|
+
const chunks = [];
|
|
3613
|
+
let loaded = 0;
|
|
3614
|
+
while (true) {
|
|
3615
|
+
if (signal?.aborted) {
|
|
3616
|
+
reader.cancel();
|
|
3617
|
+
return;
|
|
3618
|
+
}
|
|
3619
|
+
const { done, value } = await reader.read();
|
|
3620
|
+
if (done) break;
|
|
3621
|
+
chunks.push(value);
|
|
3622
|
+
loaded += value.byteLength;
|
|
3623
|
+
options?.onProgress?.({
|
|
3624
|
+
bytesLoaded: loaded,
|
|
3625
|
+
bytesTotal: contentLength || -1,
|
|
3626
|
+
gaussiansDecoded: 0,
|
|
3627
|
+
gaussiansTotal: -1,
|
|
3628
|
+
phase: "downloading"
|
|
3629
|
+
});
|
|
3630
|
+
}
|
|
3631
|
+
const result = new Uint8Array(loaded);
|
|
3632
|
+
let offset = 0;
|
|
3633
|
+
for (const chunk of chunks) {
|
|
3634
|
+
result.set(chunk, offset);
|
|
3635
|
+
offset += chunk.byteLength;
|
|
3636
|
+
}
|
|
3637
|
+
buffer = result.buffer;
|
|
3638
|
+
}
|
|
3639
|
+
} else {
|
|
3640
|
+
const reader = source.getReader();
|
|
3641
|
+
const chunks = [];
|
|
3642
|
+
let loaded = 0;
|
|
3643
|
+
while (true) {
|
|
3644
|
+
const { done, value } = await reader.read();
|
|
3645
|
+
if (done) break;
|
|
3646
|
+
chunks.push(value);
|
|
3647
|
+
loaded += value.byteLength;
|
|
3648
|
+
}
|
|
3649
|
+
const result = new Uint8Array(loaded);
|
|
3650
|
+
let offset = 0;
|
|
3651
|
+
for (const chunk of chunks) {
|
|
3652
|
+
result.set(chunk, offset);
|
|
3653
|
+
offset += chunk.byteLength;
|
|
3654
|
+
}
|
|
3655
|
+
buffer = result.buffer;
|
|
3656
|
+
}
|
|
3657
|
+
options?.onProgress?.({
|
|
3658
|
+
bytesLoaded: buffer.byteLength,
|
|
3659
|
+
bytesTotal: buffer.byteLength,
|
|
3660
|
+
gaussiansDecoded: 0,
|
|
3661
|
+
gaussiansTotal: -1,
|
|
3662
|
+
phase: "decompressing"
|
|
3663
|
+
});
|
|
3664
|
+
const decoded = await this.decode(buffer, options);
|
|
3665
|
+
options?.onProgress?.({
|
|
3666
|
+
bytesLoaded: buffer.byteLength,
|
|
3667
|
+
bytesTotal: buffer.byteLength,
|
|
3668
|
+
gaussiansDecoded: decoded.data.count,
|
|
3669
|
+
gaussiansTotal: decoded.data.count,
|
|
3670
|
+
phase: "complete"
|
|
3671
|
+
});
|
|
3672
|
+
yield decoded;
|
|
3673
|
+
}
|
|
3674
|
+
};
|
|
3675
|
+
async function decompressGzip(compressed) {
|
|
3676
|
+
if (typeof DecompressionStream !== "undefined") {
|
|
3677
|
+
const ds = new DecompressionStream("gzip");
|
|
3678
|
+
const writer = ds.writable.getWriter();
|
|
3679
|
+
const reader = ds.readable.getReader();
|
|
3680
|
+
writer.write(new Uint8Array(compressed));
|
|
3681
|
+
writer.close();
|
|
3682
|
+
const chunks = [];
|
|
3683
|
+
let totalLength = 0;
|
|
3684
|
+
while (true) {
|
|
3685
|
+
const { done, value } = await reader.read();
|
|
3686
|
+
if (done) break;
|
|
3687
|
+
chunks.push(value);
|
|
3688
|
+
totalLength += value.byteLength;
|
|
3689
|
+
}
|
|
3690
|
+
const result = new Uint8Array(totalLength);
|
|
3691
|
+
let offset = 0;
|
|
3692
|
+
for (const chunk of chunks) {
|
|
3693
|
+
result.set(chunk, offset);
|
|
3694
|
+
offset += chunk.byteLength;
|
|
3695
|
+
}
|
|
3696
|
+
return result.buffer;
|
|
3697
|
+
}
|
|
3698
|
+
const _global = globalThis;
|
|
3699
|
+
if (typeof globalThis !== "undefined" && _global.pako) {
|
|
3700
|
+
const pako = _global.pako;
|
|
3701
|
+
const decompressed = pako.inflate(new Uint8Array(compressed));
|
|
3702
|
+
return decompressed.buffer;
|
|
3703
|
+
}
|
|
3704
|
+
throw new Error(
|
|
3705
|
+
"SPZ decompression requires DecompressionStream API (modern browsers) or pako library."
|
|
3706
|
+
);
|
|
3707
|
+
}
|
|
3708
|
+
async function compressGzip(data) {
|
|
3709
|
+
if (typeof CompressionStream !== "undefined") {
|
|
3710
|
+
const cs = new CompressionStream("gzip");
|
|
3711
|
+
const writer = cs.writable.getWriter();
|
|
3712
|
+
const reader = cs.readable.getReader();
|
|
3713
|
+
writer.write(new Uint8Array(data));
|
|
3714
|
+
writer.close();
|
|
3715
|
+
const chunks = [];
|
|
3716
|
+
let totalLength = 0;
|
|
3717
|
+
while (true) {
|
|
3718
|
+
const { done, value } = await reader.read();
|
|
3719
|
+
if (done) break;
|
|
3720
|
+
chunks.push(value);
|
|
3721
|
+
totalLength += value.byteLength;
|
|
3722
|
+
}
|
|
3723
|
+
const result = new Uint8Array(totalLength);
|
|
3724
|
+
let offset = 0;
|
|
3725
|
+
for (const chunk of chunks) {
|
|
3726
|
+
result.set(chunk, offset);
|
|
3727
|
+
offset += chunk.byteLength;
|
|
3728
|
+
}
|
|
3729
|
+
return result.buffer;
|
|
3730
|
+
}
|
|
3731
|
+
throw new Error("SPZ encoding requires CompressionStream API (modern browsers).");
|
|
3732
|
+
}
|
|
3733
|
+
function parseSpzHeader(view) {
|
|
3734
|
+
return {
|
|
3735
|
+
magic: view.getUint32(0, true),
|
|
3736
|
+
version: view.getUint32(4, true),
|
|
3737
|
+
numPoints: view.getUint32(8, true),
|
|
3738
|
+
shDegree: view.getUint8(12),
|
|
3739
|
+
fractionalBits: view.getUint8(13),
|
|
3740
|
+
flags: view.getUint8(14),
|
|
3741
|
+
reserved: view.getUint8(15)
|
|
3742
|
+
};
|
|
3743
|
+
}
|
|
3744
|
+
function validateSpzHeader(header, codecId) {
|
|
3745
|
+
if (header.magic !== SPZ_MAGIC) {
|
|
3746
|
+
throw new CodecDecodeError(
|
|
3747
|
+
codecId,
|
|
3748
|
+
`Invalid SPZ magic: 0x${header.magic.toString(16).toUpperCase()}, expected 0x${SPZ_MAGIC.toString(16).toUpperCase()} ("NGSP")`
|
|
3749
|
+
);
|
|
3750
|
+
}
|
|
3751
|
+
if (header.version < 1 || header.version > 3) {
|
|
3752
|
+
throw new CodecDecodeError(
|
|
3753
|
+
codecId,
|
|
3754
|
+
`Unsupported SPZ version: ${header.version} (supported: 1-3)`
|
|
3755
|
+
);
|
|
3756
|
+
}
|
|
3757
|
+
if (header.numPoints > SPZ_MAX_POINTS) {
|
|
3758
|
+
throw new CodecDecodeError(
|
|
3759
|
+
codecId,
|
|
3760
|
+
`SPZ file contains ${header.numPoints.toLocaleString()} points, exceeding maximum of ${SPZ_MAX_POINTS.toLocaleString()}`
|
|
3761
|
+
);
|
|
3762
|
+
}
|
|
3763
|
+
if (header.shDegree > 3) {
|
|
3764
|
+
throw new CodecDecodeError(codecId, `Invalid SPZ SH degree: ${header.shDegree} (max 3)`);
|
|
3765
|
+
}
|
|
3766
|
+
}
|
|
3767
|
+
function shDimForDegree(degree) {
|
|
3768
|
+
switch (degree) {
|
|
3769
|
+
case 0:
|
|
3770
|
+
return 0;
|
|
3771
|
+
case 1:
|
|
3772
|
+
return 3;
|
|
3773
|
+
case 2:
|
|
3774
|
+
return 8;
|
|
3775
|
+
case 3:
|
|
3776
|
+
return 15;
|
|
3777
|
+
default:
|
|
3778
|
+
return 0;
|
|
3779
|
+
}
|
|
3780
|
+
}
|
|
3781
|
+
function decodeQuaternionV2(data, offset) {
|
|
3782
|
+
const x = data[offset] / 127.5 - 1;
|
|
3783
|
+
const y = data[offset + 1] / 127.5 - 1;
|
|
3784
|
+
const z = data[offset + 2] / 127.5 - 1;
|
|
3785
|
+
const w = Math.sqrt(Math.max(0, 1 - x * x - y * y - z * z));
|
|
3786
|
+
return [x, y, z, w];
|
|
3787
|
+
}
|
|
3788
|
+
function decodeQuaternionV3(data, offset) {
|
|
3789
|
+
const comp = data[offset] | data[offset + 1] << 8 | data[offset + 2] << 16 | data[offset + 3] << 24;
|
|
3790
|
+
const iLargest = comp >>> 30 & 3;
|
|
3791
|
+
const MASK_9 = (1 << 9) - 1;
|
|
3792
|
+
const quat = [0, 0, 0, 0];
|
|
3793
|
+
let sumSquares = 0;
|
|
3794
|
+
let bitPos = 0;
|
|
3795
|
+
for (let i = 0; i < 4; i++) {
|
|
3796
|
+
if (i === iLargest) continue;
|
|
3797
|
+
const mag = comp >>> bitPos & MASK_9;
|
|
3798
|
+
const negBit = comp >>> bitPos + 9 & 1;
|
|
3799
|
+
bitPos += 10;
|
|
3800
|
+
let value = SQRT1_2 * mag / MASK_9;
|
|
3801
|
+
if (negBit === 1) value = -value;
|
|
3802
|
+
quat[i] = value;
|
|
3803
|
+
sumSquares += value * value;
|
|
3804
|
+
}
|
|
3805
|
+
quat[iLargest] = Math.sqrt(Math.max(0, 1 - sumSquares));
|
|
3806
|
+
return quat;
|
|
3807
|
+
}
|
|
3808
|
+
function encodeQuaternionV3(x, y, z, w) {
|
|
3809
|
+
const len = Math.sqrt(x * x + y * y + z * z + w * w);
|
|
3810
|
+
if (len > 0) {
|
|
3811
|
+
const invLen = 1 / len;
|
|
3812
|
+
x *= invLen;
|
|
3813
|
+
y *= invLen;
|
|
3814
|
+
z *= invLen;
|
|
3815
|
+
w *= invLen;
|
|
3816
|
+
} else {
|
|
3817
|
+
x = 0;
|
|
3818
|
+
y = 0;
|
|
3819
|
+
z = 0;
|
|
3820
|
+
w = 1;
|
|
3821
|
+
}
|
|
3822
|
+
const abs = [Math.abs(x), Math.abs(y), Math.abs(z), Math.abs(w)];
|
|
3823
|
+
let iLargest = 0;
|
|
3824
|
+
if (abs[1] > abs[iLargest]) iLargest = 1;
|
|
3825
|
+
if (abs[2] > abs[iLargest]) iLargest = 2;
|
|
3826
|
+
if (abs[3] > abs[iLargest]) iLargest = 3;
|
|
3827
|
+
const quat = [x, y, z, w];
|
|
3828
|
+
if (quat[iLargest] < 0) {
|
|
3829
|
+
quat[0] = -quat[0];
|
|
3830
|
+
quat[1] = -quat[1];
|
|
3831
|
+
quat[2] = -quat[2];
|
|
3832
|
+
quat[3] = -quat[3];
|
|
3833
|
+
}
|
|
3834
|
+
const MASK_9 = (1 << 9) - 1;
|
|
3835
|
+
let packed = 0;
|
|
3836
|
+
let bitPos = 0;
|
|
3837
|
+
for (let i = 0; i < 4; i++) {
|
|
3838
|
+
if (i === iLargest) continue;
|
|
3839
|
+
const value = quat[i];
|
|
3840
|
+
const negBit = value < 0 ? 1 : 0;
|
|
3841
|
+
const mag = Math.min(MASK_9, Math.round(Math.abs(value) / SQRT1_2 * MASK_9));
|
|
3842
|
+
packed |= mag << bitPos;
|
|
3843
|
+
packed |= negBit << bitPos + 9;
|
|
3844
|
+
bitPos += 10;
|
|
3845
|
+
}
|
|
3846
|
+
packed |= iLargest << 30;
|
|
3847
|
+
return packed >>> 0;
|
|
3848
|
+
}
|
|
3849
|
+
|
|
3850
|
+
// src/gpu/codecs/GltfGaussianSplatCodec.ts
|
|
3851
|
+
var GLB_MAGIC = 1179937895;
|
|
3852
|
+
var GLB_HEADER_SIZE = 12;
|
|
3853
|
+
var GLB_CHUNK_HEADER_SIZE = 8;
|
|
3854
|
+
var GLB_CHUNK_TYPE_JSON = 1313821514;
|
|
3855
|
+
var GLB_CHUNK_TYPE_BIN = 5130562;
|
|
3856
|
+
var EXT_GAUSSIAN_SPLATTING = "KHR_gaussian_splatting";
|
|
3857
|
+
var EXT_GAUSSIAN_SPLATTING_COMPRESSION_SPZ = "KHR_gaussian_splatting_compression_spz";
|
|
3858
|
+
var MAX_GAUSSIAN_COUNT = 1e7;
|
|
3859
|
+
var DEFAULT_MAX_MEMORY_MB2 = 512;
|
|
3860
|
+
var SH_C02 = 0.2820947917738781;
|
|
3861
|
+
var SH_DC_BIAS = 0.5;
|
|
3862
|
+
var GLTF_TYPE_SIZES = {
|
|
3863
|
+
SCALAR: 1,
|
|
3864
|
+
VEC2: 2,
|
|
3865
|
+
VEC3: 3,
|
|
3866
|
+
VEC4: 4,
|
|
3867
|
+
MAT2: 4,
|
|
3868
|
+
MAT3: 9,
|
|
3869
|
+
MAT4: 16
|
|
3870
|
+
};
|
|
3871
|
+
var GLTF_COMPONENT_SIZES = {
|
|
3872
|
+
[5120 /* BYTE */]: 1,
|
|
3873
|
+
[5121 /* UNSIGNED_BYTE */]: 1,
|
|
3874
|
+
[5122 /* SHORT */]: 2,
|
|
3875
|
+
[5123 /* UNSIGNED_SHORT */]: 2,
|
|
3876
|
+
[5125 /* UNSIGNED_INT */]: 4,
|
|
3877
|
+
[5126 /* FLOAT */]: 4
|
|
3878
|
+
};
|
|
3879
|
+
function linearToSrgb(linear) {
|
|
3880
|
+
return linear <= 31308e-7 ? linear * 12.92 : 1.055 * Math.pow(linear, 1 / 2.4) - 0.055;
|
|
3881
|
+
}
|
|
3882
|
+
var GltfGaussianSplatCodec = class extends AbstractGaussianCodec {
|
|
3883
|
+
codecId;
|
|
3884
|
+
spzCodec;
|
|
3885
|
+
constructor() {
|
|
3886
|
+
super();
|
|
3887
|
+
this.codecId = "khr.gltf.gaussian";
|
|
3888
|
+
this.spzCodec = new SpzCodec();
|
|
3889
|
+
}
|
|
3890
|
+
// ─── Capabilities ─────────────────────────────────────────────────────────
|
|
3891
|
+
getCapabilities() {
|
|
3892
|
+
return {
|
|
3893
|
+
id: this.codecId,
|
|
3894
|
+
name: "glTF Gaussian Splat Codec (KHR_gaussian_splatting)",
|
|
3895
|
+
version: "1.0.0",
|
|
3896
|
+
fileExtensions: ["glb", "gltf"],
|
|
3897
|
+
mimeTypes: ["model/gltf-binary", "model/gltf+json"],
|
|
3898
|
+
canEncode: false,
|
|
3899
|
+
canDecode: true,
|
|
3900
|
+
canStream: false,
|
|
3901
|
+
canDecodeTemporal: false,
|
|
3902
|
+
maxSHDegree: 3,
|
|
3903
|
+
maxGaussianCount: MAX_GAUSSIAN_COUNT,
|
|
3904
|
+
requiresWasm: false,
|
|
3905
|
+
requiresWebGPU: false,
|
|
3906
|
+
standard: "khronos",
|
|
3907
|
+
maturity: "beta"
|
|
3908
|
+
};
|
|
3909
|
+
}
|
|
3910
|
+
// ─── Probe ────────────────────────────────────────────────────────────────
|
|
3911
|
+
/**
|
|
3912
|
+
* Check if a buffer contains a GLB file by inspecting the magic bytes,
|
|
3913
|
+
* or if it starts with a JSON object (potential .gltf).
|
|
3914
|
+
*/
|
|
3915
|
+
canDecode(buffer) {
|
|
3916
|
+
if (buffer.byteLength < 4) return false;
|
|
3917
|
+
const view = new DataView(buffer);
|
|
3918
|
+
if (view.getUint32(0, true) === GLB_MAGIC) {
|
|
3919
|
+
return true;
|
|
3920
|
+
}
|
|
3921
|
+
const firstByte = new Uint8Array(buffer, 0, 1)[0];
|
|
3922
|
+
if (firstByte === 123) {
|
|
3923
|
+
try {
|
|
3924
|
+
const decoder = new TextDecoder("utf-8");
|
|
3925
|
+
const preview = decoder.decode(buffer.slice(0, Math.min(buffer.byteLength, 4096)));
|
|
3926
|
+
return preview.includes(EXT_GAUSSIAN_SPLATTING);
|
|
3927
|
+
} catch {
|
|
3928
|
+
return false;
|
|
3929
|
+
}
|
|
3930
|
+
}
|
|
3931
|
+
return false;
|
|
3932
|
+
}
|
|
3933
|
+
// ─── Extract Metadata ─────────────────────────────────────────────────────
|
|
3934
|
+
async extractMetadata(buffer) {
|
|
3935
|
+
const parsed = this.parseGltfContainer(buffer);
|
|
3936
|
+
const gltf = parsed.json;
|
|
3937
|
+
const { primitive, gaussianExt, spzExt } = this.findGaussianPrimitive(gltf);
|
|
3938
|
+
let gaussianCount = 0;
|
|
3939
|
+
let shDegree = 0;
|
|
3940
|
+
if (spzExt) {
|
|
3941
|
+
const spzData = this.extractBufferViewData(parsed, spzExt.bufferView);
|
|
3942
|
+
const spzMeta = await this.spzCodec.extractMetadata(spzData);
|
|
3943
|
+
gaussianCount = spzMeta.gaussianCount;
|
|
3944
|
+
shDegree = spzMeta.shDegree;
|
|
3945
|
+
} else {
|
|
3946
|
+
const posAccessorIndex = primitive.attributes["POSITION"];
|
|
3947
|
+
if (posAccessorIndex !== void 0 && gltf.accessors) {
|
|
3948
|
+
gaussianCount = gltf.accessors[posAccessorIndex].count;
|
|
3949
|
+
}
|
|
3950
|
+
shDegree = this.detectShDegree(primitive);
|
|
3951
|
+
}
|
|
3952
|
+
return {
|
|
3953
|
+
version: 1,
|
|
3954
|
+
gaussianCount,
|
|
3955
|
+
shDegree,
|
|
3956
|
+
compressedSizeBytes: buffer.byteLength,
|
|
3957
|
+
uncompressedSizeBytes: gaussianCount * 60,
|
|
3958
|
+
// estimate
|
|
3959
|
+
compressionRatio: spzExt ? buffer.byteLength / (gaussianCount * 60) : 1,
|
|
3960
|
+
antialiased: false,
|
|
3961
|
+
extensions: {
|
|
3962
|
+
colorSpace: gaussianExt.colorSpace ?? "srgb_rec709_display",
|
|
3963
|
+
hasSpzCompression: !!spzExt
|
|
3964
|
+
}
|
|
3965
|
+
};
|
|
3966
|
+
}
|
|
3967
|
+
// ─── Decode ───────────────────────────────────────────────────────────────
|
|
3968
|
+
async decode(buffer, options) {
|
|
3969
|
+
const startTime = performance.now();
|
|
3970
|
+
const warnings = [];
|
|
3971
|
+
const maxGaussians = options?.maxGaussians ?? MAX_GAUSSIAN_COUNT;
|
|
3972
|
+
const maxMemoryMB = options?.maxMemoryMB ?? DEFAULT_MAX_MEMORY_MB2;
|
|
3973
|
+
const decodeSH = options?.decodeSH ?? true;
|
|
3974
|
+
const parsed = this.parseGltfContainer(buffer);
|
|
3975
|
+
const gltf = parsed.json;
|
|
3976
|
+
const { primitive, gaussianExt, spzExt } = this.findGaussianPrimitive(gltf);
|
|
3977
|
+
let result;
|
|
3978
|
+
if (spzExt) {
|
|
3979
|
+
warnings.push("Decoding via SPZ compression extension delegation");
|
|
3980
|
+
const spzData = this.extractBufferViewData(parsed, spzExt.bufferView);
|
|
3981
|
+
result = await this.spzCodec.decode(spzData, options);
|
|
3982
|
+
const colorSpace = gaussianExt.colorSpace ?? "srgb_rec709_display";
|
|
3983
|
+
if (colorSpace === "lin_rec709_display") {
|
|
3984
|
+
this.convertColorsLinearToSrgb(result.data.colors, result.data.count);
|
|
3985
|
+
warnings.push("Converted linear RGB to sRGB for display");
|
|
3986
|
+
}
|
|
3987
|
+
} else {
|
|
3988
|
+
result = this.decodeBaseline(
|
|
3989
|
+
parsed,
|
|
3990
|
+
gltf,
|
|
3991
|
+
primitive,
|
|
3992
|
+
gaussianExt,
|
|
3993
|
+
maxGaussians,
|
|
3994
|
+
maxMemoryMB,
|
|
3995
|
+
decodeSH,
|
|
3996
|
+
warnings
|
|
3997
|
+
);
|
|
3998
|
+
}
|
|
3999
|
+
const durationMs = performance.now() - startTime;
|
|
4000
|
+
return {
|
|
4001
|
+
data: result.data,
|
|
4002
|
+
durationMs,
|
|
4003
|
+
warnings: [...warnings, ...result.warnings]
|
|
4004
|
+
};
|
|
4005
|
+
}
|
|
4006
|
+
// ─── Lifecycle ──────────────────────────────────────────────────────────────
|
|
4007
|
+
async initialize() {
|
|
4008
|
+
await super.initialize();
|
|
4009
|
+
await this.spzCodec.initialize();
|
|
4010
|
+
}
|
|
4011
|
+
dispose() {
|
|
4012
|
+
this.spzCodec.dispose();
|
|
4013
|
+
super.dispose();
|
|
4014
|
+
}
|
|
4015
|
+
// ─── Private: glTF Container Parsing ────────────────────────────────────
|
|
4016
|
+
/**
|
|
4017
|
+
* Parse a glTF container, handling both GLB and JSON-only formats.
|
|
4018
|
+
*
|
|
4019
|
+
* @returns Parsed glTF JSON and optional binary buffer
|
|
4020
|
+
*/
|
|
4021
|
+
parseGltfContainer(buffer) {
|
|
4022
|
+
const view = new DataView(buffer);
|
|
4023
|
+
if (buffer.byteLength >= GLB_HEADER_SIZE && view.getUint32(0, true) === GLB_MAGIC) {
|
|
4024
|
+
return this.parseGlb(buffer, view);
|
|
4025
|
+
}
|
|
4026
|
+
return this.parseGltfJson(buffer);
|
|
4027
|
+
}
|
|
4028
|
+
/**
|
|
4029
|
+
* Parse a GLB binary container.
|
|
4030
|
+
*
|
|
4031
|
+
* GLB layout:
|
|
4032
|
+
* [Header: 12 bytes] magic(4) + version(4) + length(4)
|
|
4033
|
+
* [JSON chunk: 8 + N bytes] chunkLength(4) + chunkType(4) + jsonData(N)
|
|
4034
|
+
* [BIN chunk: 8 + M bytes] chunkLength(4) + chunkType(4) + binData(M)
|
|
4035
|
+
*/
|
|
4036
|
+
parseGlb(buffer, view) {
|
|
4037
|
+
const version = view.getUint32(4, true);
|
|
4038
|
+
const totalLength = view.getUint32(8, true);
|
|
4039
|
+
if (version < 2) {
|
|
4040
|
+
throw new CodecDecodeError(
|
|
4041
|
+
this.codecId,
|
|
4042
|
+
`Unsupported GLB version: ${version} (expected >= 2)`
|
|
4043
|
+
);
|
|
4044
|
+
}
|
|
4045
|
+
if (totalLength > buffer.byteLength) {
|
|
4046
|
+
throw new CodecDecodeError(
|
|
4047
|
+
this.codecId,
|
|
4048
|
+
`GLB header declares ${totalLength} bytes but buffer is only ${buffer.byteLength} bytes`
|
|
4049
|
+
);
|
|
4050
|
+
}
|
|
4051
|
+
let jsonChunk;
|
|
4052
|
+
let binChunk;
|
|
4053
|
+
let offset = GLB_HEADER_SIZE;
|
|
4054
|
+
while (offset < totalLength) {
|
|
4055
|
+
if (offset + GLB_CHUNK_HEADER_SIZE > totalLength) break;
|
|
4056
|
+
const chunkLength = view.getUint32(offset, true);
|
|
4057
|
+
const chunkType = view.getUint32(offset + 4, true);
|
|
4058
|
+
const chunkDataStart = offset + GLB_CHUNK_HEADER_SIZE;
|
|
4059
|
+
const chunkDataEnd = chunkDataStart + chunkLength;
|
|
4060
|
+
if (chunkDataEnd > totalLength) {
|
|
4061
|
+
throw new CodecDecodeError(
|
|
4062
|
+
this.codecId,
|
|
4063
|
+
`GLB chunk at offset ${offset} extends beyond file (${chunkDataEnd} > ${totalLength})`
|
|
4064
|
+
);
|
|
4065
|
+
}
|
|
4066
|
+
if (chunkType === GLB_CHUNK_TYPE_JSON) {
|
|
4067
|
+
const jsonBytes = new Uint8Array(buffer, chunkDataStart, chunkLength);
|
|
4068
|
+
const decoder = new TextDecoder("utf-8");
|
|
4069
|
+
const jsonText = decoder.decode(jsonBytes);
|
|
4070
|
+
try {
|
|
4071
|
+
jsonChunk = JSON.parse(jsonText);
|
|
4072
|
+
} catch (e) {
|
|
4073
|
+
throw new CodecDecodeError(
|
|
4074
|
+
this.codecId,
|
|
4075
|
+
"Failed to parse GLB JSON chunk",
|
|
4076
|
+
e instanceof Error ? e : void 0
|
|
4077
|
+
);
|
|
4078
|
+
}
|
|
4079
|
+
} else if (chunkType === GLB_CHUNK_TYPE_BIN) {
|
|
4080
|
+
binChunk = buffer.slice(chunkDataStart, chunkDataEnd);
|
|
4081
|
+
}
|
|
4082
|
+
offset = chunkDataEnd;
|
|
4083
|
+
while (offset % 4 !== 0 && offset < totalLength) offset++;
|
|
4084
|
+
}
|
|
4085
|
+
if (!jsonChunk) {
|
|
4086
|
+
throw new CodecDecodeError(this.codecId, "GLB file does not contain a JSON chunk");
|
|
4087
|
+
}
|
|
4088
|
+
return {
|
|
4089
|
+
json: jsonChunk,
|
|
4090
|
+
binaryChunks: binChunk ? [binChunk] : []
|
|
4091
|
+
};
|
|
4092
|
+
}
|
|
4093
|
+
/**
|
|
4094
|
+
* Parse a JSON-only glTF file.
|
|
4095
|
+
*/
|
|
4096
|
+
parseGltfJson(buffer) {
|
|
4097
|
+
const decoder = new TextDecoder("utf-8");
|
|
4098
|
+
const jsonText = decoder.decode(buffer);
|
|
4099
|
+
let json;
|
|
4100
|
+
try {
|
|
4101
|
+
json = JSON.parse(jsonText);
|
|
4102
|
+
} catch (e) {
|
|
4103
|
+
throw new CodecDecodeError(
|
|
4104
|
+
this.codecId,
|
|
4105
|
+
"Failed to parse glTF JSON",
|
|
4106
|
+
e instanceof Error ? e : void 0
|
|
4107
|
+
);
|
|
4108
|
+
}
|
|
4109
|
+
return {
|
|
4110
|
+
json,
|
|
4111
|
+
binaryChunks: []
|
|
4112
|
+
};
|
|
4113
|
+
}
|
|
4114
|
+
// ─── Private: Gaussian Primitive Discovery ──────────────────────────────
|
|
4115
|
+
/**
|
|
4116
|
+
* Find the first mesh primitive that carries KHR_gaussian_splatting.
|
|
4117
|
+
*
|
|
4118
|
+
* @throws CodecDecodeError if no Gaussian splatting primitive is found
|
|
4119
|
+
*/
|
|
4120
|
+
findGaussianPrimitive(gltf) {
|
|
4121
|
+
if (!gltf.meshes || gltf.meshes.length === 0) {
|
|
4122
|
+
throw new CodecDecodeError(this.codecId, "glTF file contains no meshes");
|
|
4123
|
+
}
|
|
4124
|
+
for (const mesh of gltf.meshes) {
|
|
4125
|
+
for (const primitive of mesh.primitives) {
|
|
4126
|
+
const gsExt = primitive.extensions?.[EXT_GAUSSIAN_SPLATTING];
|
|
4127
|
+
if (gsExt) {
|
|
4128
|
+
const spzExt = primitive.extensions?.[EXT_GAUSSIAN_SPLATTING_COMPRESSION_SPZ];
|
|
4129
|
+
return { primitive, gaussianExt: gsExt, spzExt };
|
|
4130
|
+
}
|
|
4131
|
+
}
|
|
4132
|
+
}
|
|
4133
|
+
throw new CodecDecodeError(
|
|
4134
|
+
this.codecId,
|
|
4135
|
+
`No mesh primitive with ${EXT_GAUSSIAN_SPLATTING} extension found in glTF file`
|
|
4136
|
+
);
|
|
4137
|
+
}
|
|
4138
|
+
// ─── Private: BufferView Data Extraction ────────────────────────────────
|
|
4139
|
+
/**
|
|
4140
|
+
* Extract raw bytes from a glTF bufferView.
|
|
4141
|
+
*/
|
|
4142
|
+
extractBufferViewData(parsed, bufferViewIndex) {
|
|
4143
|
+
const gltf = parsed.json;
|
|
4144
|
+
if (!gltf.bufferViews || bufferViewIndex >= gltf.bufferViews.length) {
|
|
4145
|
+
throw new CodecDecodeError(
|
|
4146
|
+
this.codecId,
|
|
4147
|
+
`BufferView index ${bufferViewIndex} is out of range (${gltf.bufferViews?.length ?? 0} bufferViews)`
|
|
4148
|
+
);
|
|
4149
|
+
}
|
|
4150
|
+
const bv = gltf.bufferViews[bufferViewIndex];
|
|
4151
|
+
const bufferIndex = bv.buffer;
|
|
4152
|
+
const byteOffset = bv.byteOffset ?? 0;
|
|
4153
|
+
const byteLength = bv.byteLength;
|
|
4154
|
+
const bufferData = this.resolveBuffer(parsed, bufferIndex);
|
|
4155
|
+
if (byteOffset + byteLength > bufferData.byteLength) {
|
|
4156
|
+
throw new CodecDecodeError(
|
|
4157
|
+
this.codecId,
|
|
4158
|
+
`BufferView [${bufferViewIndex}] range (${byteOffset}..${byteOffset + byteLength}) exceeds buffer [${bufferIndex}] size (${bufferData.byteLength})`
|
|
4159
|
+
);
|
|
4160
|
+
}
|
|
4161
|
+
return bufferData.slice(byteOffset, byteOffset + byteLength);
|
|
4162
|
+
}
|
|
4163
|
+
/**
|
|
4164
|
+
* Resolve a glTF buffer index to its ArrayBuffer data.
|
|
4165
|
+
*
|
|
4166
|
+
* For GLB: buffer 0 is the BIN chunk.
|
|
4167
|
+
* For external URIs: not supported (would require async fetch).
|
|
4168
|
+
*/
|
|
4169
|
+
resolveBuffer(parsed, bufferIndex) {
|
|
4170
|
+
if (bufferIndex === 0 && parsed.binaryChunks.length > 0) {
|
|
4171
|
+
return parsed.binaryChunks[0];
|
|
4172
|
+
}
|
|
4173
|
+
const gltf = parsed.json;
|
|
4174
|
+
if (gltf.buffers && bufferIndex < gltf.buffers.length) {
|
|
4175
|
+
const buffer = gltf.buffers[bufferIndex];
|
|
4176
|
+
if (buffer.uri && buffer.uri.startsWith("data:")) {
|
|
4177
|
+
return this.decodeDataUri(buffer.uri);
|
|
4178
|
+
}
|
|
4179
|
+
}
|
|
4180
|
+
throw new CodecDecodeError(
|
|
4181
|
+
this.codecId,
|
|
4182
|
+
`Cannot resolve buffer [${bufferIndex}]: external URI buffers are not supported in synchronous decode. Use GLB format or data URIs for embedded data.`
|
|
4183
|
+
);
|
|
4184
|
+
}
|
|
4185
|
+
/**
|
|
4186
|
+
* Decode a data URI to an ArrayBuffer.
|
|
4187
|
+
*/
|
|
4188
|
+
decodeDataUri(uri) {
|
|
4189
|
+
const commaIndex = uri.indexOf(",");
|
|
4190
|
+
if (commaIndex === -1) {
|
|
4191
|
+
throw new CodecDecodeError(this.codecId, "Invalid data URI format");
|
|
4192
|
+
}
|
|
4193
|
+
const header = uri.substring(0, commaIndex);
|
|
4194
|
+
const data = uri.substring(commaIndex + 1);
|
|
4195
|
+
if (header.includes(";base64")) {
|
|
4196
|
+
const binary = atob(data);
|
|
4197
|
+
const bytes = new Uint8Array(binary.length);
|
|
4198
|
+
for (let i = 0; i < binary.length; i++) {
|
|
4199
|
+
bytes[i] = binary.charCodeAt(i);
|
|
4200
|
+
}
|
|
4201
|
+
return bytes.buffer;
|
|
4202
|
+
}
|
|
4203
|
+
const decoded = decodeURIComponent(data);
|
|
4204
|
+
const encoder = new TextEncoder();
|
|
4205
|
+
return encoder.encode(decoded).buffer;
|
|
4206
|
+
}
|
|
4207
|
+
// ─── Private: Accessor Data Reading ─────────────────────────────────────
|
|
4208
|
+
/**
|
|
4209
|
+
* Read accessor data as a Float32Array.
|
|
4210
|
+
*
|
|
4211
|
+
* Handles component type conversion and normalization for:
|
|
4212
|
+
* - FLOAT: Direct read
|
|
4213
|
+
* - BYTE/SHORT (normalized): Scale to [-1, 1]
|
|
4214
|
+
* - UNSIGNED_BYTE/UNSIGNED_SHORT (normalized): Scale to [0, 1]
|
|
4215
|
+
* - BYTE/SHORT/UNSIGNED_BYTE/UNSIGNED_SHORT (non-normalized): Direct int-to-float
|
|
4216
|
+
*/
|
|
4217
|
+
readAccessorFloat32(parsed, accessorIndex) {
|
|
4218
|
+
const gltf = parsed.json;
|
|
4219
|
+
if (!gltf.accessors || accessorIndex >= gltf.accessors.length) {
|
|
4220
|
+
throw new CodecDecodeError(this.codecId, `Accessor index ${accessorIndex} is out of range`);
|
|
4221
|
+
}
|
|
4222
|
+
const accessor = gltf.accessors[accessorIndex];
|
|
4223
|
+
const componentCount = GLTF_TYPE_SIZES[accessor.type] ?? 1;
|
|
4224
|
+
const totalElements = accessor.count * componentCount;
|
|
4225
|
+
const result = new Float32Array(totalElements);
|
|
4226
|
+
if (accessor.bufferView === void 0) {
|
|
4227
|
+
return result;
|
|
4228
|
+
}
|
|
4229
|
+
const bv = gltf.bufferViews[accessor.bufferView];
|
|
4230
|
+
const bufferData = this.resolveBuffer(parsed, bv.buffer);
|
|
4231
|
+
const byteOffset = (bv.byteOffset ?? 0) + (accessor.byteOffset ?? 0);
|
|
4232
|
+
const componentSize = GLTF_COMPONENT_SIZES[accessor.componentType] ?? 4;
|
|
4233
|
+
const stride = bv.byteStride ?? componentCount * componentSize;
|
|
4234
|
+
const dataView = new DataView(bufferData);
|
|
4235
|
+
for (let i = 0; i < accessor.count; i++) {
|
|
4236
|
+
const elementOffset = byteOffset + i * stride;
|
|
4237
|
+
for (let c = 0; c < componentCount; c++) {
|
|
4238
|
+
const compOffset = elementOffset + c * componentSize;
|
|
4239
|
+
let value;
|
|
4240
|
+
switch (accessor.componentType) {
|
|
4241
|
+
case 5126 /* FLOAT */:
|
|
4242
|
+
value = dataView.getFloat32(compOffset, true);
|
|
4243
|
+
break;
|
|
4244
|
+
case 5120 /* BYTE */:
|
|
4245
|
+
value = dataView.getInt8(compOffset);
|
|
4246
|
+
if (accessor.normalized) value = Math.max(value / 127, -1);
|
|
4247
|
+
break;
|
|
4248
|
+
case 5121 /* UNSIGNED_BYTE */:
|
|
4249
|
+
value = dataView.getUint8(compOffset);
|
|
4250
|
+
if (accessor.normalized) value = value / 255;
|
|
4251
|
+
break;
|
|
4252
|
+
case 5122 /* SHORT */:
|
|
4253
|
+
value = dataView.getInt16(compOffset, true);
|
|
4254
|
+
if (accessor.normalized) value = Math.max(value / 32767, -1);
|
|
4255
|
+
break;
|
|
4256
|
+
case 5123 /* UNSIGNED_SHORT */:
|
|
4257
|
+
value = dataView.getUint16(compOffset, true);
|
|
4258
|
+
if (accessor.normalized) value = value / 65535;
|
|
4259
|
+
break;
|
|
4260
|
+
case 5125 /* UNSIGNED_INT */:
|
|
4261
|
+
value = dataView.getUint32(compOffset, true);
|
|
4262
|
+
break;
|
|
4263
|
+
default:
|
|
4264
|
+
value = 0;
|
|
4265
|
+
}
|
|
4266
|
+
result[i * componentCount + c] = value;
|
|
4267
|
+
}
|
|
4268
|
+
}
|
|
4269
|
+
return result;
|
|
4270
|
+
}
|
|
4271
|
+
// ─── Private: Baseline Decode ─────────────────────────────────────────────
|
|
4272
|
+
/**
|
|
4273
|
+
* Decode uncompressed KHR_gaussian_splatting attributes from glTF accessors.
|
|
4274
|
+
*/
|
|
4275
|
+
decodeBaseline(parsed, gltf, primitive, gaussianExt, maxGaussians, maxMemoryMB, decodeSH, warnings) {
|
|
4276
|
+
const attrs = primitive.attributes;
|
|
4277
|
+
const posIndex = attrs["POSITION"];
|
|
4278
|
+
if (posIndex === void 0) {
|
|
4279
|
+
throw new CodecDecodeError(
|
|
4280
|
+
this.codecId,
|
|
4281
|
+
"Missing required POSITION attribute in Gaussian splatting primitive"
|
|
4282
|
+
);
|
|
4283
|
+
}
|
|
4284
|
+
const posAccessor = gltf.accessors[posIndex];
|
|
4285
|
+
const totalCount = posAccessor.count;
|
|
4286
|
+
const N = Math.min(totalCount, maxGaussians);
|
|
4287
|
+
if (N < totalCount) {
|
|
4288
|
+
warnings.push(
|
|
4289
|
+
`Clamped Gaussian count from ${totalCount.toLocaleString()} to ${N.toLocaleString()} (maxGaussians limit)`
|
|
4290
|
+
);
|
|
4291
|
+
}
|
|
4292
|
+
this.checkMemoryBudget(N, maxMemoryMB);
|
|
4293
|
+
const rawPositions = this.readAccessorFloat32(parsed, posIndex);
|
|
4294
|
+
const positions = N < totalCount ? rawPositions.slice(0, N * 3) : rawPositions;
|
|
4295
|
+
const rotIndex = attrs["KHR_gaussian_splatting:ROTATION"] ?? attrs["_ROTATION"];
|
|
4296
|
+
let rotations;
|
|
4297
|
+
if (rotIndex !== void 0) {
|
|
4298
|
+
const rawRotations = this.readAccessorFloat32(parsed, rotIndex);
|
|
4299
|
+
rotations = N < totalCount ? rawRotations.slice(0, N * 4) : rawRotations;
|
|
4300
|
+
} else {
|
|
4301
|
+
warnings.push("Missing ROTATION attribute; using identity quaternions");
|
|
4302
|
+
rotations = new Float32Array(N * 4);
|
|
4303
|
+
for (let i = 0; i < N; i++) {
|
|
4304
|
+
rotations[i * 4 + 3] = 1;
|
|
4305
|
+
}
|
|
4306
|
+
}
|
|
4307
|
+
const scaleIndex = attrs["KHR_gaussian_splatting:SCALE"] ?? attrs["_SCALE"];
|
|
4308
|
+
let scales;
|
|
4309
|
+
if (scaleIndex !== void 0) {
|
|
4310
|
+
const rawScales = this.readAccessorFloat32(parsed, scaleIndex);
|
|
4311
|
+
scales = new Float32Array(N * 3);
|
|
4312
|
+
const count = Math.min(rawScales.length, N * 3);
|
|
4313
|
+
for (let i = 0; i < count; i++) {
|
|
4314
|
+
scales[i] = Math.exp(rawScales[i]);
|
|
4315
|
+
}
|
|
4316
|
+
} else {
|
|
4317
|
+
warnings.push("Missing SCALE attribute; using uniform scale 0.01");
|
|
4318
|
+
scales = new Float32Array(N * 3);
|
|
4319
|
+
scales.fill(0.01);
|
|
4320
|
+
}
|
|
4321
|
+
const opacityIndex = attrs["KHR_gaussian_splatting:OPACITY"] ?? attrs["_OPACITY"];
|
|
4322
|
+
let opacities;
|
|
4323
|
+
if (opacityIndex !== void 0) {
|
|
4324
|
+
const rawOpacities = this.readAccessorFloat32(parsed, opacityIndex);
|
|
4325
|
+
opacities = N < totalCount ? rawOpacities.slice(0, N) : rawOpacities;
|
|
4326
|
+
} else {
|
|
4327
|
+
warnings.push("Missing OPACITY attribute; using full opacity (1.0)");
|
|
4328
|
+
opacities = new Float32Array(N);
|
|
4329
|
+
opacities.fill(1);
|
|
4330
|
+
}
|
|
4331
|
+
const colors = new Float32Array(N * 4);
|
|
4332
|
+
const colorIndex = attrs["COLOR_0"];
|
|
4333
|
+
const shDc0Index = attrs["KHR_gaussian_splatting:SH_DEGREE_0_COEF_0"] ?? attrs["_SH_DEGREE_0_COEF_0"];
|
|
4334
|
+
if (colorIndex !== void 0) {
|
|
4335
|
+
const rawColors = this.readAccessorFloat32(parsed, colorIndex);
|
|
4336
|
+
const colorAccessor = gltf.accessors[colorIndex];
|
|
4337
|
+
const colorComponents = GLTF_TYPE_SIZES[colorAccessor.type] ?? 4;
|
|
4338
|
+
for (let i = 0; i < N; i++) {
|
|
4339
|
+
if (colorComponents >= 3) {
|
|
4340
|
+
colors[i * 4] = rawColors[i * colorComponents];
|
|
4341
|
+
colors[i * 4 + 1] = rawColors[i * colorComponents + 1];
|
|
4342
|
+
colors[i * 4 + 2] = rawColors[i * colorComponents + 2];
|
|
4343
|
+
}
|
|
4344
|
+
colors[i * 4 + 3] = colorComponents >= 4 ? rawColors[i * colorComponents + 3] : opacities[i];
|
|
4345
|
+
}
|
|
4346
|
+
} else if (shDc0Index !== void 0) {
|
|
4347
|
+
const rawSh0 = this.readAccessorFloat32(parsed, shDc0Index);
|
|
4348
|
+
for (let i = 0; i < N; i++) {
|
|
4349
|
+
for (let c = 0; c < 3; c++) {
|
|
4350
|
+
const shCoeff = rawSh0[i * 3 + c];
|
|
4351
|
+
colors[i * 4 + c] = Math.max(0, Math.min(1, SH_C02 * shCoeff + SH_DC_BIAS));
|
|
4352
|
+
}
|
|
4353
|
+
colors[i * 4 + 3] = opacities[i];
|
|
4354
|
+
}
|
|
4355
|
+
} else {
|
|
4356
|
+
warnings.push("No color or SH degree 0 data found; using mid-gray");
|
|
4357
|
+
for (let i = 0; i < N; i++) {
|
|
4358
|
+
colors[i * 4] = 0.5;
|
|
4359
|
+
colors[i * 4 + 1] = 0.5;
|
|
4360
|
+
colors[i * 4 + 2] = 0.5;
|
|
4361
|
+
colors[i * 4 + 3] = opacities[i];
|
|
4362
|
+
}
|
|
4363
|
+
}
|
|
4364
|
+
const colorSpace = gaussianExt.colorSpace ?? "srgb_rec709_display";
|
|
4365
|
+
if (colorSpace === "lin_rec709_display") {
|
|
4366
|
+
this.convertColorsLinearToSrgb(colors, N);
|
|
4367
|
+
warnings.push("Converted linear RGB to sRGB for display");
|
|
4368
|
+
}
|
|
4369
|
+
let shCoefficients;
|
|
4370
|
+
let shDegree = 0;
|
|
4371
|
+
if (decodeSH) {
|
|
4372
|
+
shDegree = this.detectShDegree(primitive);
|
|
4373
|
+
if (shDegree > 0) {
|
|
4374
|
+
const shDim = shDimForDegree2(shDegree);
|
|
4375
|
+
shCoefficients = new Float32Array(N * shDim * 3);
|
|
4376
|
+
let shOffset = 0;
|
|
4377
|
+
for (let d = 1; d <= shDegree; d++) {
|
|
4378
|
+
const coeffsPerDegree = 2 * d + 1;
|
|
4379
|
+
for (let coef = 0; coef < coeffsPerDegree; coef++) {
|
|
4380
|
+
const attrName = attrs[`KHR_gaussian_splatting:SH_DEGREE_${d}_COEF_${coef}`] ?? attrs[`_SH_DEGREE_${d}_COEF_${coef}`];
|
|
4381
|
+
if (attrName !== void 0) {
|
|
4382
|
+
const rawSh = this.readAccessorFloat32(parsed, attrName);
|
|
4383
|
+
for (let i = 0; i < N; i++) {
|
|
4384
|
+
for (let c = 0; c < 3; c++) {
|
|
4385
|
+
shCoefficients[(i * shDim + shOffset) * 3 + c] = rawSh[i * 3 + c];
|
|
4386
|
+
}
|
|
4387
|
+
}
|
|
4388
|
+
}
|
|
4389
|
+
shOffset++;
|
|
4390
|
+
}
|
|
4391
|
+
}
|
|
4392
|
+
}
|
|
4393
|
+
}
|
|
4394
|
+
const data = {
|
|
4395
|
+
positions,
|
|
4396
|
+
scales,
|
|
4397
|
+
rotations,
|
|
4398
|
+
colors,
|
|
4399
|
+
opacities,
|
|
4400
|
+
shCoefficients,
|
|
4401
|
+
shDegree,
|
|
4402
|
+
count: N
|
|
4403
|
+
};
|
|
4404
|
+
return {
|
|
4405
|
+
data,
|
|
4406
|
+
durationMs: 0,
|
|
4407
|
+
// Will be overridden by caller
|
|
4408
|
+
warnings
|
|
4409
|
+
};
|
|
4410
|
+
}
|
|
4411
|
+
// ─── Private: SH Degree Detection ─────────────────────────────────────────
|
|
4412
|
+
/**
|
|
4413
|
+
* Detect the highest SH degree present in a primitive's attributes.
|
|
4414
|
+
*/
|
|
4415
|
+
detectShDegree(primitive) {
|
|
4416
|
+
const attrs = primitive.attributes;
|
|
4417
|
+
let degree = 0;
|
|
4418
|
+
if (attrs["KHR_gaussian_splatting:SH_DEGREE_3_COEF_0"] !== void 0 || attrs["_SH_DEGREE_3_COEF_0"] !== void 0) {
|
|
4419
|
+
degree = 3;
|
|
4420
|
+
} else if (attrs["KHR_gaussian_splatting:SH_DEGREE_2_COEF_0"] !== void 0 || attrs["_SH_DEGREE_2_COEF_0"] !== void 0) {
|
|
4421
|
+
degree = 2;
|
|
4422
|
+
} else if (attrs["KHR_gaussian_splatting:SH_DEGREE_1_COEF_0"] !== void 0 || attrs["_SH_DEGREE_1_COEF_0"] !== void 0) {
|
|
4423
|
+
degree = 1;
|
|
4424
|
+
}
|
|
4425
|
+
return degree;
|
|
4426
|
+
}
|
|
4427
|
+
// ─── Private: Color Space Conversion ──────────────────────────────────────
|
|
4428
|
+
/**
|
|
4429
|
+
* Convert color array from linear RGB to sRGB in-place.
|
|
4430
|
+
*/
|
|
4431
|
+
convertColorsLinearToSrgb(colors, count) {
|
|
4432
|
+
for (let i = 0; i < count; i++) {
|
|
4433
|
+
colors[i * 4] = linearToSrgb(Math.max(0, Math.min(1, colors[i * 4])));
|
|
4434
|
+
colors[i * 4 + 1] = linearToSrgb(Math.max(0, Math.min(1, colors[i * 4 + 1])));
|
|
4435
|
+
colors[i * 4 + 2] = linearToSrgb(Math.max(0, Math.min(1, colors[i * 4 + 2])));
|
|
4436
|
+
}
|
|
4437
|
+
}
|
|
4438
|
+
};
|
|
4439
|
+
function shDimForDegree2(degree) {
|
|
4440
|
+
switch (degree) {
|
|
4441
|
+
case 0:
|
|
4442
|
+
return 0;
|
|
4443
|
+
case 1:
|
|
4444
|
+
return 3;
|
|
4445
|
+
case 2:
|
|
4446
|
+
return 8;
|
|
4447
|
+
case 3:
|
|
4448
|
+
return 15;
|
|
4449
|
+
default:
|
|
4450
|
+
return 0;
|
|
4451
|
+
}
|
|
4452
|
+
}
|
|
4453
|
+
|
|
4454
|
+
// src/gpu/codecs/MpegGscCodec.ts
|
|
4455
|
+
var MPEG_GSC_MAGIC = 1296520003;
|
|
4456
|
+
var MpegGscCodec = class extends AbstractGaussianCodec {
|
|
4457
|
+
codecId = "mpeg.gsc.v1";
|
|
4458
|
+
// ─── Capabilities ─────────────────────────────────────────────────────────
|
|
4459
|
+
getCapabilities() {
|
|
4460
|
+
return {
|
|
4461
|
+
id: this.codecId,
|
|
4462
|
+
name: "MPEG Gaussian Splat Coding (Stub)",
|
|
4463
|
+
version: "0.1.0-stub",
|
|
4464
|
+
fileExtensions: [],
|
|
4465
|
+
// TBD when standard is finalized
|
|
4466
|
+
mimeTypes: [],
|
|
4467
|
+
// TBD when standard is finalized
|
|
4468
|
+
canEncode: false,
|
|
4469
|
+
canDecode: false,
|
|
4470
|
+
canStream: false,
|
|
4471
|
+
canDecodeTemporal: false,
|
|
4472
|
+
maxSHDegree: 3,
|
|
4473
|
+
maxGaussianCount: -1,
|
|
4474
|
+
// Unlimited (TBD)
|
|
4475
|
+
requiresWasm: true,
|
|
4476
|
+
// Likely will require WASM for HEVC decode
|
|
4477
|
+
requiresWebGPU: false,
|
|
4478
|
+
standard: "mpeg",
|
|
4479
|
+
maturity: "stub"
|
|
4480
|
+
};
|
|
4481
|
+
}
|
|
4482
|
+
// ─── Probe ────────────────────────────────────────────────────────────────
|
|
4483
|
+
canDecode(buffer) {
|
|
4484
|
+
if (buffer.byteLength < 4) return false;
|
|
4485
|
+
const view = new DataView(buffer);
|
|
4486
|
+
return view.getUint32(0, true) === MPEG_GSC_MAGIC;
|
|
4487
|
+
}
|
|
4488
|
+
// ─── Extract Metadata (Stub) ──────────────────────────────────────────────
|
|
4489
|
+
async extractMetadata(_buffer) {
|
|
4490
|
+
throw new CodecNotSupportedError(
|
|
4491
|
+
this.codecId,
|
|
4492
|
+
"extractMetadata (MPEG GSC standard is not yet finalized)"
|
|
4493
|
+
);
|
|
4494
|
+
}
|
|
4495
|
+
// ─── Decode (Stub) ────────────────────────────────────────────────────────
|
|
4496
|
+
async decode(_buffer, _options) {
|
|
4497
|
+
throw new CodecNotSupportedError(
|
|
4498
|
+
this.codecId,
|
|
4499
|
+
"decode (MPEG GSC standard is not yet finalized - currently in MPEG Exploration phase as of 2026-03-01)"
|
|
4500
|
+
);
|
|
4501
|
+
}
|
|
4502
|
+
// ─── Decompress (Stub) ────────────────────────────────────────────────────
|
|
4503
|
+
async decompress(_compressed) {
|
|
4504
|
+
throw new CodecNotSupportedError(
|
|
4505
|
+
this.codecId,
|
|
4506
|
+
"decompress (MPEG GSC standard is not yet finalized)"
|
|
4507
|
+
);
|
|
4508
|
+
}
|
|
4509
|
+
// ─── Informational Methods ────────────────────────────────────────────────
|
|
4510
|
+
/**
|
|
4511
|
+
* Get the current standardization status of MPEG GSC.
|
|
4512
|
+
*
|
|
4513
|
+
* This method is specific to the MPEG stub and provides context about
|
|
4514
|
+
* when the full implementation can be expected.
|
|
4515
|
+
*/
|
|
4516
|
+
getStandardizationStatus() {
|
|
4517
|
+
return {
|
|
4518
|
+
phase: "exploration",
|
|
4519
|
+
workingGroups: ["WG4", "WG5", "WG7"],
|
|
4520
|
+
lastMeetingDate: "2026-01-23",
|
|
4521
|
+
lastMeetingName: "41st JVET / 153rd MPEG",
|
|
4522
|
+
compressionApproaches: [
|
|
4523
|
+
"GPCC v1 (Geometry-based Point Cloud Compression)",
|
|
4524
|
+
"HEVC (High Efficiency Video Coding) for attribute maps",
|
|
4525
|
+
"Custom Gaussian-specific entropy coding"
|
|
4526
|
+
],
|
|
4527
|
+
expectedTimeline: "TBD - no formal standardization kicked off",
|
|
4528
|
+
referenceUrl: "https://mpeg.expert/gsc/index.html"
|
|
4529
|
+
};
|
|
4530
|
+
}
|
|
4531
|
+
/**
|
|
4532
|
+
* Check if a newer version of the MPEG GSC codec is available.
|
|
4533
|
+
*
|
|
4534
|
+
* In the future, this could check a remote registry for codec updates.
|
|
4535
|
+
* For now, it always returns false since the standard is not finalized.
|
|
4536
|
+
*/
|
|
4537
|
+
async checkForUpdates() {
|
|
4538
|
+
return false;
|
|
4539
|
+
}
|
|
4540
|
+
};
|
|
4541
|
+
|
|
4542
|
+
// src/gpu/codecs/GaussianCodecRegistry.ts
|
|
4543
|
+
var GaussianCodecRegistry = class {
|
|
4544
|
+
codecs = /* @__PURE__ */ new Map();
|
|
4545
|
+
// ─── Registration ─────────────────────────────────────────────────────────
|
|
4546
|
+
/**
|
|
4547
|
+
* Register a codec with the registry.
|
|
4548
|
+
*
|
|
4549
|
+
* @param codec - Codec instance to register
|
|
4550
|
+
* @param priority - Priority for codec selection (default: 0, higher = preferred)
|
|
4551
|
+
* @returns The registry instance (for chaining)
|
|
4552
|
+
*/
|
|
4553
|
+
register(codec, priority = 0) {
|
|
4554
|
+
const capabilities = codec.getCapabilities();
|
|
4555
|
+
this.codecs.set(capabilities.id, {
|
|
4556
|
+
codec,
|
|
4557
|
+
capabilities,
|
|
4558
|
+
priority,
|
|
4559
|
+
initialized: false
|
|
4560
|
+
});
|
|
4561
|
+
return this;
|
|
4562
|
+
}
|
|
4563
|
+
/**
|
|
4564
|
+
* Unregister a codec by ID.
|
|
4565
|
+
*
|
|
4566
|
+
* @param codecId - ID of the codec to unregister
|
|
4567
|
+
* @returns true if the codec was found and removed
|
|
4568
|
+
*/
|
|
4569
|
+
unregister(codecId) {
|
|
4570
|
+
const entry = this.codecs.get(codecId);
|
|
4571
|
+
if (entry) {
|
|
4572
|
+
entry.codec.dispose();
|
|
4573
|
+
this.codecs.delete(codecId);
|
|
4574
|
+
return true;
|
|
4575
|
+
}
|
|
4576
|
+
return false;
|
|
4577
|
+
}
|
|
4578
|
+
// ─── Codec Access ─────────────────────────────────────────────────────────
|
|
4579
|
+
/**
|
|
4580
|
+
* Get a specific codec by ID.
|
|
4581
|
+
*
|
|
4582
|
+
* @param codecId - Codec identifier
|
|
4583
|
+
* @returns The codec instance, or undefined if not registered
|
|
4584
|
+
*/
|
|
4585
|
+
getCodec(codecId) {
|
|
4586
|
+
return this.codecs.get(codecId)?.codec;
|
|
4587
|
+
}
|
|
4588
|
+
/**
|
|
4589
|
+
* Get a specific codec by ID, throwing if not found.
|
|
4590
|
+
*
|
|
4591
|
+
* @param codecId - Codec identifier
|
|
4592
|
+
* @returns The codec instance
|
|
4593
|
+
* @throws Error if the codec is not registered
|
|
4594
|
+
*/
|
|
4595
|
+
requireCodec(codecId) {
|
|
4596
|
+
const codec = this.getCodec(codecId);
|
|
4597
|
+
if (!codec) {
|
|
4598
|
+
throw new Error(
|
|
4599
|
+
`Codec '${codecId}' is not registered. Available codecs: ${this.getRegisteredIds().join(", ")}`
|
|
4600
|
+
);
|
|
4601
|
+
}
|
|
4602
|
+
return codec;
|
|
4603
|
+
}
|
|
4604
|
+
/**
|
|
4605
|
+
* Get all registered codec IDs.
|
|
4606
|
+
*/
|
|
4607
|
+
getRegisteredIds() {
|
|
4608
|
+
return Array.from(this.codecs.keys());
|
|
4609
|
+
}
|
|
4610
|
+
/**
|
|
4611
|
+
* Get capabilities of all registered codecs.
|
|
4612
|
+
*/
|
|
4613
|
+
getAllCapabilities() {
|
|
4614
|
+
return Array.from(this.codecs.values()).map((entry) => entry.capabilities);
|
|
4615
|
+
}
|
|
4616
|
+
/**
|
|
4617
|
+
* Check if a specific codec is registered.
|
|
4618
|
+
*/
|
|
4619
|
+
hasCodec(codecId) {
|
|
4620
|
+
return this.codecs.has(codecId);
|
|
4621
|
+
}
|
|
4622
|
+
// ─── Auto-Detection ───────────────────────────────────────────────────────
|
|
4623
|
+
/**
|
|
4624
|
+
* Auto-detect the best codec for a given file.
|
|
4625
|
+
*
|
|
4626
|
+
* Detection priority:
|
|
4627
|
+
* 1. Explicit codecId in options (bypass detection)
|
|
4628
|
+
* 2. Magic byte detection from headerBytes
|
|
4629
|
+
* 3. File extension detection from URL
|
|
4630
|
+
* 4. Priority-based fallback among matching codecs
|
|
4631
|
+
*
|
|
4632
|
+
* @param options - Detection options (URL, header bytes, etc.)
|
|
4633
|
+
* @returns Best-matching codec, or undefined if no codec can handle the file
|
|
4634
|
+
*/
|
|
4635
|
+
detectCodec(options) {
|
|
4636
|
+
if (options.codecId) {
|
|
4637
|
+
return this.getCodec(options.codecId);
|
|
4638
|
+
}
|
|
4639
|
+
const candidates = [];
|
|
4640
|
+
if (options.headerBytes) {
|
|
4641
|
+
for (const entry of this.codecs.values()) {
|
|
4642
|
+
if (this.matchesMaturity(entry, options.maturityFilter)) {
|
|
4643
|
+
if (entry.codec.canDecode(options.headerBytes)) {
|
|
4644
|
+
candidates.push(entry);
|
|
4645
|
+
}
|
|
4646
|
+
}
|
|
4647
|
+
}
|
|
4648
|
+
}
|
|
4649
|
+
if (candidates.length === 0 && options.url) {
|
|
4650
|
+
const ext = this.extractExtension(options.url);
|
|
4651
|
+
if (ext) {
|
|
4652
|
+
for (const entry of this.codecs.values()) {
|
|
4653
|
+
if (this.matchesMaturity(entry, options.maturityFilter)) {
|
|
4654
|
+
if (entry.capabilities.fileExtensions.includes(ext)) {
|
|
4655
|
+
candidates.push(entry);
|
|
4656
|
+
}
|
|
4657
|
+
}
|
|
4658
|
+
}
|
|
4659
|
+
}
|
|
4660
|
+
}
|
|
4661
|
+
if (candidates.length === 0) return void 0;
|
|
4662
|
+
candidates.sort((a, b) => b.priority - a.priority);
|
|
4663
|
+
return candidates[0].codec;
|
|
4664
|
+
}
|
|
4665
|
+
/**
|
|
4666
|
+
* Auto-detect and decode a buffer.
|
|
4667
|
+
*
|
|
4668
|
+
* Convenience method that combines detection and decode in one call.
|
|
4669
|
+
*
|
|
4670
|
+
* @param buffer - Raw binary data
|
|
4671
|
+
* @param options - Decode options + detection options
|
|
4672
|
+
* @returns Decoded Gaussian data
|
|
4673
|
+
* @throws Error if no codec can handle the data
|
|
4674
|
+
*/
|
|
4675
|
+
async decode(buffer, options) {
|
|
4676
|
+
const codec = this.detectCodec({
|
|
4677
|
+
codecId: options?.codecId,
|
|
4678
|
+
headerBytes: buffer.slice(0, Math.min(buffer.byteLength, 64)),
|
|
4679
|
+
url: options?.url,
|
|
4680
|
+
maturityFilter: options?.maturityFilter
|
|
4681
|
+
});
|
|
4682
|
+
if (!codec) {
|
|
4683
|
+
throw new Error(
|
|
4684
|
+
`No codec found that can decode this data. Registered codecs: ${this.getRegisteredIds().join(", ")}. Ensure the correct codec is registered or specify codecId explicitly.`
|
|
4685
|
+
);
|
|
4686
|
+
}
|
|
4687
|
+
await this.ensureInitialized(codec);
|
|
4688
|
+
return codec.decode(buffer, options);
|
|
4689
|
+
}
|
|
4690
|
+
/**
|
|
4691
|
+
* Auto-detect codec from URL and decode via streaming.
|
|
4692
|
+
*
|
|
4693
|
+
* @param url - URL to fetch and decode
|
|
4694
|
+
* @param options - Decode and detection options
|
|
4695
|
+
* @returns Decoded Gaussian data
|
|
4696
|
+
*/
|
|
4697
|
+
async decodeFromUrl(url, options) {
|
|
4698
|
+
const codec = this.detectCodec({
|
|
4699
|
+
codecId: options?.codecId,
|
|
4700
|
+
url,
|
|
4701
|
+
maturityFilter: options?.maturityFilter ?? ["production", "beta"]
|
|
4702
|
+
});
|
|
4703
|
+
if (!codec) {
|
|
4704
|
+
throw new Error(
|
|
4705
|
+
`No codec found for URL '${url}'. Registered codecs: ${this.getRegisteredIds().join(", ")}`
|
|
4706
|
+
);
|
|
4707
|
+
}
|
|
4708
|
+
await this.ensureInitialized(codec);
|
|
4709
|
+
const response = await fetch(url);
|
|
4710
|
+
if (!response.ok) {
|
|
4711
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText} for ${url}`);
|
|
4712
|
+
}
|
|
4713
|
+
const buffer = await response.arrayBuffer();
|
|
4714
|
+
return codec.decode(buffer, options);
|
|
4715
|
+
}
|
|
4716
|
+
// ─── Lifecycle ────────────────────────────────────────────────────────────
|
|
4717
|
+
/**
|
|
4718
|
+
* Initialize all registered codecs.
|
|
4719
|
+
*
|
|
4720
|
+
* Useful for pre-warming at application startup.
|
|
4721
|
+
*/
|
|
4722
|
+
async initializeAll() {
|
|
4723
|
+
const entries = Array.from(this.codecs.values());
|
|
4724
|
+
await Promise.all(
|
|
4725
|
+
entries.map(async (entry) => {
|
|
4726
|
+
if (!entry.initialized) {
|
|
4727
|
+
await entry.codec.initialize();
|
|
4728
|
+
entry.initialized = true;
|
|
4729
|
+
}
|
|
4730
|
+
})
|
|
4731
|
+
);
|
|
4732
|
+
}
|
|
4733
|
+
/**
|
|
4734
|
+
* Dispose all registered codecs and clear the registry.
|
|
4735
|
+
*/
|
|
4736
|
+
disposeAll() {
|
|
4737
|
+
for (const entry of this.codecs.values()) {
|
|
4738
|
+
entry.codec.dispose();
|
|
4739
|
+
}
|
|
4740
|
+
this.codecs.clear();
|
|
4741
|
+
}
|
|
4742
|
+
// ─── Private Helpers ──────────────────────────────────────────────────────
|
|
4743
|
+
async ensureInitialized(codec) {
|
|
4744
|
+
const id = codec.getCapabilities().id;
|
|
4745
|
+
const entry = this.codecs.get(id);
|
|
4746
|
+
if (entry && !entry.initialized) {
|
|
4747
|
+
await codec.initialize();
|
|
4748
|
+
entry.initialized = true;
|
|
4749
|
+
}
|
|
4750
|
+
}
|
|
4751
|
+
extractExtension(url) {
|
|
4752
|
+
const clean = url.split(/[?#]/)[0];
|
|
4753
|
+
const ext = clean.split(".").pop()?.toLowerCase();
|
|
4754
|
+
return ext;
|
|
4755
|
+
}
|
|
4756
|
+
matchesMaturity(entry, filter) {
|
|
4757
|
+
if (!filter) return true;
|
|
4758
|
+
return filter.includes(entry.capabilities.maturity);
|
|
4759
|
+
}
|
|
4760
|
+
};
|
|
4761
|
+
function createDefaultCodecRegistry() {
|
|
4762
|
+
const registry = new GaussianCodecRegistry();
|
|
4763
|
+
registry.register(new SpzCodec(), 100);
|
|
4764
|
+
registry.register(new GltfGaussianSplatCodec(), 50);
|
|
4765
|
+
registry.register(new MpegGscCodec(), 0);
|
|
4766
|
+
return registry;
|
|
4767
|
+
}
|
|
4768
|
+
var globalRegistry = null;
|
|
4769
|
+
function getGlobalCodecRegistry() {
|
|
4770
|
+
if (!globalRegistry) {
|
|
4771
|
+
globalRegistry = createDefaultCodecRegistry();
|
|
4772
|
+
}
|
|
4773
|
+
return globalRegistry;
|
|
4774
|
+
}
|
|
4775
|
+
function resetGlobalCodecRegistry() {
|
|
4776
|
+
if (globalRegistry) {
|
|
4777
|
+
globalRegistry.disposeAll();
|
|
4778
|
+
globalRegistry = null;
|
|
4779
|
+
}
|
|
4780
|
+
}
|
|
4781
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
4782
|
+
0 && (module.exports = {
|
|
4783
|
+
AbstractGaussianCodec,
|
|
4784
|
+
CodecDecodeError,
|
|
4785
|
+
CodecDecompressError,
|
|
4786
|
+
CodecEncodeError,
|
|
4787
|
+
CodecMemoryError,
|
|
4788
|
+
CodecNotSupportedError,
|
|
4789
|
+
ComputePipeline,
|
|
4790
|
+
GPUBufferManager,
|
|
4791
|
+
GaussianCodecError,
|
|
4792
|
+
GaussianCodecRegistry,
|
|
4793
|
+
GaussianSplatExtractor,
|
|
4794
|
+
GaussianSplatSorter,
|
|
4795
|
+
GltfGaussianSplatCodec,
|
|
4796
|
+
InstancedRenderer,
|
|
4797
|
+
MpegGscCodec,
|
|
4798
|
+
SparseLinearSolver,
|
|
4799
|
+
SpatialGrid,
|
|
4800
|
+
SpzCodec,
|
|
4801
|
+
WebGPUContext,
|
|
4802
|
+
createDefaultCodecRegistry,
|
|
4803
|
+
createGPUPhysicsSimulation,
|
|
4804
|
+
createGaussianSplatSorter,
|
|
4805
|
+
createInitialParticleData,
|
|
4806
|
+
createPhysicsSimulation,
|
|
4807
|
+
getGlobalCodecRegistry,
|
|
4808
|
+
getGlobalWebGPUContext,
|
|
4809
|
+
resetGlobalCodecRegistry
|
|
4810
|
+
});
|