rayzee 6.4.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -5
- package/dist/rayzee.es.js +4953 -4225
- package/dist/rayzee.es.js.map +1 -1
- package/dist/rayzee.umd.js +157 -236
- package/dist/rayzee.umd.js.map +1 -1
- package/package.json +1 -1
- package/src/EngineDefaults.js +29 -13
- package/src/PathTracerApp.js +119 -26
- package/src/Pipeline/PipelineContext.js +1 -2
- package/src/Pipeline/RenderPipeline.js +1 -1
- package/src/Pipeline/RenderStage.js +1 -1
- package/src/Processor/CameraOptimizer.js +0 -5
- package/src/Processor/GeometryExtractor.js +22 -1
- package/src/Processor/KernelManager.js +277 -0
- package/src/Processor/PackedRayBuffer.js +265 -0
- package/src/Processor/QueueManager.js +173 -0
- package/src/Processor/SceneProcessor.js +1 -0
- package/src/Processor/ShaderBuilder.js +11 -316
- package/src/Processor/StorageTexturePool.js +29 -15
- package/src/Processor/TextureCreator.js +6 -0
- package/src/Processor/VRAMTracker.js +169 -0
- package/src/Processor/utils.js +11 -110
- package/src/RenderSettings.js +1 -3
- package/src/Stages/ASVGF.js +76 -20
- package/src/Stages/BilateralFilter.js +34 -10
- package/src/Stages/EdgeFilter.js +2 -3
- package/src/Stages/MotionVector.js +16 -9
- package/src/Stages/NormalDepth.js +17 -5
- package/src/Stages/PathTracer.js +671 -1456
- package/src/Stages/PathTracerStage.js +1451 -0
- package/src/Stages/SSRC.js +32 -15
- package/src/Stages/Variance.js +35 -12
- package/src/TSL/BVHTraversal.js +7 -1
- package/src/TSL/Common.js +12 -2
- package/src/TSL/CompactKernel.js +110 -0
- package/src/TSL/DebugKernel.js +98 -0
- package/src/TSL/Environment.js +13 -11
- package/src/TSL/ExtendKernel.js +75 -0
- package/src/TSL/FinalWriteKernel.js +121 -0
- package/src/TSL/GenerateKernel.js +109 -0
- package/src/TSL/LightsSampling.js +2 -2
- package/src/TSL/MaterialTransmission.js +32 -2
- package/src/TSL/PathTracerCore.js +43 -912
- package/src/TSL/ShadeKernel.js +873 -0
- package/src/TSL/Struct.js +5 -0
- package/src/TSL/Subsurface.js +232 -0
- package/src/TSL/patches.js +81 -4
- package/src/index.js +3 -0
- package/src/managers/CameraManager.js +1 -1
- package/src/managers/DenoisingManager.js +40 -75
- package/src/managers/EnvironmentManager.js +30 -39
- package/src/managers/MaterialDataManager.js +60 -1
- package/src/managers/OverlayManager.js +7 -22
- package/src/managers/UniformManager.js +1 -3
- package/src/managers/helpers/TileHelper.js +2 -2
- package/src/Stages/AdaptiveSampling.js +0 -483
- package/src/TSL/PathTracer.js +0 -384
- package/src/managers/TileManager.js +0 -298
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* KernelManager.js
|
|
3
|
+
*
|
|
4
|
+
* Builds, caches, and dispatches individual compute nodes for the wavefront
|
|
5
|
+
* path tracing pipeline. Each kernel is a separate `Fn().compute()` node.
|
|
6
|
+
*
|
|
7
|
+
* Manages workgroup sizes, dispatch dimensions, and provides a unified
|
|
8
|
+
* dispatch interface that wraps `renderer.compute(node)`.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
/** Default workgroup sizes per kernel type */
|
|
12
|
+
const WORKGROUP_SIZES = {
|
|
13
|
+
generate: [ 16, 16, 1 ], // 2D screen-space
|
|
14
|
+
extend: [ 256, 1, 1 ], // 1D ray-parallel
|
|
15
|
+
sort: [ 256, 1, 1 ], // 1D ray-parallel
|
|
16
|
+
shade: [ 256, 1, 1 ], // 1D ray-parallel (sorted)
|
|
17
|
+
connect: [ 256, 1, 1 ], // 1D shadow-ray-parallel
|
|
18
|
+
accumulate: [ 256, 1, 1 ], // 1D shadow-ray-parallel
|
|
19
|
+
compact: [ 256, 1, 1 ], // 1D ray-parallel
|
|
20
|
+
resetCounters: [ 1, 1, 1 ], // Single thread
|
|
21
|
+
finalWrite: [ 16, 16, 1 ], // 2D screen-space
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export class KernelManager {
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* @param {WebGPURenderer} renderer - Three.js WebGPU renderer
|
|
28
|
+
*/
|
|
29
|
+
constructor( renderer ) {
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @type {WebGPURenderer}
|
|
33
|
+
*/
|
|
34
|
+
this.renderer = renderer;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Map of kernel name → ComputeNode.
|
|
38
|
+
* @type {Map<string, ComputeNode>}
|
|
39
|
+
*/
|
|
40
|
+
this.kernels = new Map();
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Map of kernel name → workgroup size [x, y, z].
|
|
44
|
+
* @type {Map<string, number[]>}
|
|
45
|
+
*/
|
|
46
|
+
this.workgroupSizes = new Map();
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Timing data for performance profiling.
|
|
50
|
+
* @type {Map<string, {compiledOnce: boolean, lastDispatchMs: number}>}
|
|
51
|
+
*/
|
|
52
|
+
this.timing = new Map();
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Optional per-kernel CPU-side submission timing (encode/dispatch cost only;
|
|
56
|
+
* does NOT measure GPU execution time). Toggle via enableProfiling().
|
|
57
|
+
* @type {boolean}
|
|
58
|
+
*/
|
|
59
|
+
this.profiling = false;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Aggregated profile: kernel name → { calls, totalMs }.
|
|
63
|
+
* @type {Map<string, {calls: number, totalMs: number}>}
|
|
64
|
+
*/
|
|
65
|
+
this.profile = new Map();
|
|
66
|
+
|
|
67
|
+
// Initialize workgroup sizes from defaults
|
|
68
|
+
for ( const [ name, wgSize ] of Object.entries( WORKGROUP_SIZES ) ) {
|
|
69
|
+
|
|
70
|
+
this.workgroupSizes.set( name, wgSize );
|
|
71
|
+
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Register a pre-built compute node.
|
|
78
|
+
* @param {string} name - Kernel name (e.g. 'generate', 'extend')
|
|
79
|
+
* @param {ComputeNode} computeNode - Built via `Fn().compute([dx,dy,dz], [wgx,wgy,wgz])`
|
|
80
|
+
*/
|
|
81
|
+
register( name, computeNode ) {
|
|
82
|
+
|
|
83
|
+
this.kernels.set( name, computeNode );
|
|
84
|
+
this.timing.set( name, { compiledOnce: false, lastDispatchMs: 0 } );
|
|
85
|
+
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Dispatch a kernel by name.
|
|
90
|
+
* @param {string} name - Kernel name
|
|
91
|
+
*/
|
|
92
|
+
dispatch( name ) {
|
|
93
|
+
|
|
94
|
+
const node = this.kernels.get( name );
|
|
95
|
+
|
|
96
|
+
if ( ! node ) {
|
|
97
|
+
|
|
98
|
+
throw new Error( `KernelManager: Unknown kernel '${name}'` );
|
|
99
|
+
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const timingEntry = this.timing.get( name );
|
|
103
|
+
|
|
104
|
+
if ( timingEntry && ! timingEntry.compiledOnce ) {
|
|
105
|
+
|
|
106
|
+
const t0 = performance.now();
|
|
107
|
+
this.renderer.compute( node );
|
|
108
|
+
const t1 = performance.now();
|
|
109
|
+
timingEntry.compiledOnce = true;
|
|
110
|
+
timingEntry.lastDispatchMs = t1 - t0;
|
|
111
|
+
console.log( `[Wavefront] Kernel '${name}' first dispatch (includes compilation): ${( t1 - t0 ).toFixed( 1 )}ms` );
|
|
112
|
+
|
|
113
|
+
} else if ( this.profiling ) {
|
|
114
|
+
|
|
115
|
+
const t0 = performance.now();
|
|
116
|
+
this.renderer.compute( node );
|
|
117
|
+
const t1 = performance.now();
|
|
118
|
+
let p = this.profile.get( name );
|
|
119
|
+
if ( ! p ) {
|
|
120
|
+
|
|
121
|
+
p = { calls: 0, totalMs: 0 };
|
|
122
|
+
this.profile.set( name, p );
|
|
123
|
+
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
p.calls ++;
|
|
127
|
+
p.totalMs += t1 - t0;
|
|
128
|
+
|
|
129
|
+
} else {
|
|
130
|
+
|
|
131
|
+
this.renderer.compute( node );
|
|
132
|
+
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Update dispatch dimensions for a kernel.
|
|
139
|
+
* @param {string} name - Kernel name
|
|
140
|
+
* @param {number[]} count - Dispatch dimensions [x, y, z]
|
|
141
|
+
*/
|
|
142
|
+
setDispatchCount( name, count ) {
|
|
143
|
+
|
|
144
|
+
const node = this.kernels.get( name );
|
|
145
|
+
if ( ! node ) return;
|
|
146
|
+
node.dispatchSize = count;
|
|
147
|
+
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Calculate 2D dispatch dimensions for a screen-space kernel.
|
|
152
|
+
* @param {number} width - Render width in pixels
|
|
153
|
+
* @param {number} height - Render height in pixels
|
|
154
|
+
* @param {string} kernelName - Kernel name for WG size lookup
|
|
155
|
+
* @returns {number[]} [dispatchX, dispatchY, 1]
|
|
156
|
+
*/
|
|
157
|
+
calcScreenDispatch( width, height, kernelName ) {
|
|
158
|
+
|
|
159
|
+
const wg = this.workgroupSizes.get( kernelName ) || [ 16, 16, 1 ];
|
|
160
|
+
return [
|
|
161
|
+
Math.ceil( width / wg[ 0 ] ),
|
|
162
|
+
Math.ceil( height / wg[ 1 ] ),
|
|
163
|
+
1
|
|
164
|
+
];
|
|
165
|
+
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Calculate 1D dispatch dimensions for a ray-parallel kernel.
|
|
170
|
+
* @param {number} rayCount - Number of rays to process
|
|
171
|
+
* @param {string} kernelName - Kernel name for WG size lookup
|
|
172
|
+
* @returns {number[]} [dispatchX, 1, 1]
|
|
173
|
+
*/
|
|
174
|
+
calcRayDispatch( rayCount, kernelName ) {
|
|
175
|
+
|
|
176
|
+
const wg = this.workgroupSizes.get( kernelName ) || [ 256, 1, 1 ];
|
|
177
|
+
return [
|
|
178
|
+
Math.ceil( rayCount / wg[ 0 ] ),
|
|
179
|
+
1,
|
|
180
|
+
1
|
|
181
|
+
];
|
|
182
|
+
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Get the workgroup size for a kernel.
|
|
187
|
+
* @param {string} name
|
|
188
|
+
* @returns {number[]}
|
|
189
|
+
*/
|
|
190
|
+
getWorkgroupSize( name ) {
|
|
191
|
+
|
|
192
|
+
return this.workgroupSizes.get( name ) || [ 256, 1, 1 ];
|
|
193
|
+
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Check if a kernel has been registered.
|
|
198
|
+
* @param {string} name
|
|
199
|
+
* @returns {boolean}
|
|
200
|
+
*/
|
|
201
|
+
has( name ) {
|
|
202
|
+
|
|
203
|
+
return this.kernels.has( name );
|
|
204
|
+
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Get the underlying compute node.
|
|
209
|
+
* @param {string} name
|
|
210
|
+
* @returns {ComputeNode|undefined}
|
|
211
|
+
*/
|
|
212
|
+
get( name ) {
|
|
213
|
+
|
|
214
|
+
return this.kernels.get( name );
|
|
215
|
+
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Get first-dispatch compilation timing for all kernels.
|
|
220
|
+
* @returns {Object} name → { compiledOnce, lastDispatchMs }
|
|
221
|
+
*/
|
|
222
|
+
getTimingReport() {
|
|
223
|
+
|
|
224
|
+
const report = {};
|
|
225
|
+
|
|
226
|
+
for ( const [ name, data ] of this.timing ) {
|
|
227
|
+
|
|
228
|
+
report[ name ] = { ...data };
|
|
229
|
+
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
return report;
|
|
233
|
+
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Toggle per-kernel CPU-submission profiling. Measures only encode/dispatch
|
|
238
|
+
* cost on CPU (GPU work is async and NOT included).
|
|
239
|
+
* @param {boolean} enabled
|
|
240
|
+
*/
|
|
241
|
+
enableProfiling( enabled ) {
|
|
242
|
+
|
|
243
|
+
this.profiling = enabled;
|
|
244
|
+
if ( enabled ) this.profile.clear();
|
|
245
|
+
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Get accumulated profiling data.
|
|
250
|
+
* @returns {Object} name → { calls, totalMs, avgMs }
|
|
251
|
+
*/
|
|
252
|
+
getProfileReport() {
|
|
253
|
+
|
|
254
|
+
const rows = [];
|
|
255
|
+
let sum = 0;
|
|
256
|
+
for ( const [ name, { calls, totalMs } ] of this.profile ) {
|
|
257
|
+
|
|
258
|
+
sum += totalMs;
|
|
259
|
+
rows.push( { name, calls, totalMs: + totalMs.toFixed( 2 ), avgMs: + ( totalMs / calls ).toFixed( 3 ) } );
|
|
260
|
+
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
rows.sort( ( a, b ) => b.totalMs - a.totalMs );
|
|
264
|
+
rows.push( { name: 'TOTAL', calls: rows.reduce( ( s, r ) => s + r.calls, 0 ), totalMs: + sum.toFixed( 2 ), avgMs: null } );
|
|
265
|
+
return rows;
|
|
266
|
+
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
dispose() {
|
|
270
|
+
|
|
271
|
+
this.kernels.clear();
|
|
272
|
+
this.timing.clear();
|
|
273
|
+
this.profile.clear();
|
|
274
|
+
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
}
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Packed buffer manager for wavefront path tracing — one storage buffer per data category.
|
|
3
|
+
* RAY/HIT are SoA-within-a-buffer (field `slot` of element `id` lives at `id + slot*_cap`).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
storage, uintBitsToFloat, floatBitsToUint, vec2, vec3, vec4, uvec4, uint, int,
|
|
8
|
+
packSnorm2x16, packUnorm2x16, unpackSnorm2x16, unpackUnorm2x16,
|
|
9
|
+
} from 'three/tsl';
|
|
10
|
+
import { StorageInstancedBufferAttribute } from 'three/webgpu';
|
|
11
|
+
|
|
12
|
+
export const RAY_STRIDE = 7;
|
|
13
|
+
export const HIT_STRIDE = 2;
|
|
14
|
+
// Per-pixel G-buffer (first-hit MRT staging): 1 uvec4/pixel, half-precision packed (pack2x16, no f32 bitcast).
|
|
15
|
+
// .x=packSnorm2x16(normal.xy) .y=packSnorm2x16(normal.z, depth) .z=packUnorm2x16(albedo.rg) .w=packUnorm2x16(albedo.b, 0)
|
|
16
|
+
// Separate buffer from RAY (per-pixel, not per-ray×S) — written by Generate/Shade bounce-0, read only by FinalWrite.
|
|
17
|
+
export const GBUFFER_STRIDE = 1;
|
|
18
|
+
|
|
19
|
+
export const RAY = {
|
|
20
|
+
ORIGIN_META: 0, // vec4(origin.xyz, uintBitsToFloat(perRayBounces | sssSteps<<8)); pixelIndex+sampleIndex derived from rayID
|
|
21
|
+
DIR_FLAGS: 1, // vec4(direction.xyz, uintBitsToFloat(bounceFlags))
|
|
22
|
+
THROUGHPUT_PDF: 2, // vec4(throughput.xyz, pdf)
|
|
23
|
+
RADIANCE_ALPHA: 3, // vec4(radiance.xyz, alpha)
|
|
24
|
+
MEDIUM_STACK: 4, // vec4(uintBitsToFloat(stackDepth|transTraversals<<8|wavelength<<16), ior1, ior2, ior3)
|
|
25
|
+
MEDIUM_SIGMA_A: 5, // vec4(sigmaA.xyz, _) — Beer-Lambert absorption coeff of the active medium (KHR_materials_volume + SSS)
|
|
26
|
+
SSS_SIGMA_S: 6, // vec4(sigmaS.xyz, g) — SSS scattering coeff + Henyey-Greenstein anisotropy (sigmaS==0 ⇒ glass)
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export const HIT = {
|
|
30
|
+
DIST_TRI_BARY: 0, // vec4(distance, uintBitsToFloat(triIndex), bary.u, bary.v)
|
|
31
|
+
NORMAL_MAT: 1, // vec4(geoNormal.xyz, uintBitsToFloat(matIndex | meshIndex<<16))
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
// SoA region stride, baked into the shader graph at build time; single instance, rebuilt on resize.
|
|
35
|
+
let _cap = 0;
|
|
36
|
+
|
|
37
|
+
const soa = ( id, slot ) => ( slot === 0 ? id : id.add( slot * _cap ) );
|
|
38
|
+
|
|
39
|
+
export class PackedRayBuffer {
|
|
40
|
+
|
|
41
|
+
// Capacity maxRays would allocate (mirrors allocate()/resize()). 1.25× headroom, NO pow2 rounding —
|
|
42
|
+
// the pow2 jump nearly doubled VRAM (e.g. 2048²: 5.24M→8.39M) for no realloc benefit: the app's
|
|
43
|
+
// discrete resolution presets always exceed the 1.25× margin on a tier change, so they rebuild anyway.
|
|
44
|
+
static requiredCapacity( maxRays ) {
|
|
45
|
+
|
|
46
|
+
return Math.ceil( maxRays * 1.25 );
|
|
47
|
+
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
constructor( maxRays = 0 ) {
|
|
51
|
+
|
|
52
|
+
this.capacity = 0;
|
|
53
|
+
this._attrs = {};
|
|
54
|
+
|
|
55
|
+
// Each: { rw: StorageBufferNode, ro: StorageBufferNode } over one shared GPU buffer.
|
|
56
|
+
this.rayBuffer = null;
|
|
57
|
+
this.rngBuffer = null;
|
|
58
|
+
this.hitBuffer = null;
|
|
59
|
+
|
|
60
|
+
if ( maxRays > 0 ) this.allocate( maxRays );
|
|
61
|
+
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
allocate( maxRays ) {
|
|
65
|
+
|
|
66
|
+
this.dispose();
|
|
67
|
+
|
|
68
|
+
const capacity = Math.ceil( maxRays * 1.25 );
|
|
69
|
+
this.capacity = capacity;
|
|
70
|
+
_cap = capacity;
|
|
71
|
+
|
|
72
|
+
// count=0 so StorageBufferNode.getHash() shares the buffer → RW and RO nodes bind the same GPU data.
|
|
73
|
+
const rayCount = capacity * RAY_STRIDE;
|
|
74
|
+
const rayAttr = new StorageInstancedBufferAttribute( new Float32Array( rayCount * 4 ), 4 );
|
|
75
|
+
this._attrs.ray = rayAttr;
|
|
76
|
+
this.rayBuffer = {
|
|
77
|
+
rw: storage( rayAttr, 'vec4' ),
|
|
78
|
+
ro: storage( rayAttr, 'vec4' ).toReadOnly(),
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const rngAttr = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
|
|
82
|
+
this._attrs.rng = rngAttr;
|
|
83
|
+
this.rngBuffer = {
|
|
84
|
+
rw: storage( rngAttr, 'uint' ),
|
|
85
|
+
ro: storage( rngAttr, 'uint' ).toReadOnly(),
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
const hitCount = capacity * HIT_STRIDE;
|
|
89
|
+
const hitAttr = new StorageInstancedBufferAttribute( new Float32Array( hitCount * 4 ), 4 );
|
|
90
|
+
this._attrs.hit = hitAttr;
|
|
91
|
+
this.hitBuffer = {
|
|
92
|
+
rw: storage( hitAttr, 'vec4' ),
|
|
93
|
+
ro: storage( hitAttr, 'vec4' ).toReadOnly(),
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const totalMB = (
|
|
97
|
+
rayCount * 16 + capacity * 4 + hitCount * 16
|
|
98
|
+
) / ( 1024 * 1024 );
|
|
99
|
+
|
|
100
|
+
console.log(
|
|
101
|
+
`PackedRayBuffer: capacity=${capacity}, total=${totalMB.toFixed( 1 )} MB ` +
|
|
102
|
+
`(ray=${( rayCount * 16 / 1048576 ).toFixed( 0 )}MB hit=${( hitCount * 16 / 1048576 ).toFixed( 0 )}MB) [SoA ray/hit]`
|
|
103
|
+
);
|
|
104
|
+
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Reallocates only if maxRays needs more capacity; returns true if it did.
|
|
108
|
+
resize( maxRays ) {
|
|
109
|
+
|
|
110
|
+
const needed = Math.ceil( maxRays * 1.25 );
|
|
111
|
+
if ( needed <= this.capacity && this.capacity > 0 ) return false;
|
|
112
|
+
this.allocate( maxRays );
|
|
113
|
+
return true;
|
|
114
|
+
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
dispose() {
|
|
118
|
+
|
|
119
|
+
this._attrs = {};
|
|
120
|
+
this.rayBuffer = null;
|
|
121
|
+
this.rngBuffer = null;
|
|
122
|
+
this.hitBuffer = null;
|
|
123
|
+
this.capacity = 0;
|
|
124
|
+
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// TSL accessor helpers — call inside Fn() scopes. `buf` is the .rw/.ro StorageBufferNode, `id` a uint node.
|
|
130
|
+
|
|
131
|
+
export const readRayOrigin = ( buf, id ) =>
|
|
132
|
+
buf.element( soa( id, RAY.ORIGIN_META ) ).xyz;
|
|
133
|
+
|
|
134
|
+
export const readRayDirection = ( buf, id ) =>
|
|
135
|
+
buf.element( soa( id, RAY.DIR_FLAGS ) ).xyz;
|
|
136
|
+
|
|
137
|
+
export const readRayBounceFlags = ( buf, id ) =>
|
|
138
|
+
floatBitsToUint( buf.element( soa( id, RAY.DIR_FLAGS ) ).w );
|
|
139
|
+
|
|
140
|
+
export const readRayThroughput = ( buf, id ) =>
|
|
141
|
+
buf.element( soa( id, RAY.THROUGHPUT_PDF ) ).xyz;
|
|
142
|
+
|
|
143
|
+
export const readRayPdf = ( buf, id ) =>
|
|
144
|
+
buf.element( soa( id, RAY.THROUGHPUT_PDF ) ).w;
|
|
145
|
+
|
|
146
|
+
export const readRayRadiance = ( buf, id ) =>
|
|
147
|
+
buf.element( soa( id, RAY.RADIANCE_ALPHA ) );
|
|
148
|
+
|
|
149
|
+
// ── Per-pixel G-buffer (first-hit MRT). 1 uvec4/pixel (element p), pack2x16 lanes. ──
|
|
150
|
+
// normal: raw unit vec3; depth: linear [0,1]; albedo: vec3 [0,1]. Packed values live in u32 lanes
|
|
151
|
+
// verbatim (no f32 bitcast) so NaN-range bit patterns (snorm ±1 → 0x7FFF) survive store/load intact.
|
|
152
|
+
export const writeGBuffer = ( buf, pixelIndex, normal, depth, albedo ) =>
|
|
153
|
+
buf.element( pixelIndex ).assign( uvec4(
|
|
154
|
+
packSnorm2x16( vec2( normal.x, normal.y ) ),
|
|
155
|
+
packSnorm2x16( vec2( normal.z, depth ) ),
|
|
156
|
+
packUnorm2x16( vec2( albedo.x, albedo.y ) ),
|
|
157
|
+
packUnorm2x16( vec2( albedo.z, 0.0 ) ),
|
|
158
|
+
) );
|
|
159
|
+
export const readGBuffer = ( buf, pixelIndex ) => buf.element( pixelIndex );
|
|
160
|
+
// Decode for FinalWrite. normalDepth.xyz matches the prior path (normal*0.5+0.5), .w = raw depth.
|
|
161
|
+
export const gbDecodeNormalDepth = ( packed ) => {
|
|
162
|
+
|
|
163
|
+
const nxy = unpackSnorm2x16( packed.x );
|
|
164
|
+
const nzd = unpackSnorm2x16( packed.y );
|
|
165
|
+
return vec4( vec3( nxy.x, nxy.y, nzd.x ).mul( 0.5 ).add( 0.5 ), nzd.y );
|
|
166
|
+
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
export const gbDecodeAlbedo = ( packed ) =>
|
|
170
|
+
vec3( unpackUnorm2x16( packed.z ), unpackUnorm2x16( packed.w ).x );
|
|
171
|
+
|
|
172
|
+
// .w packs per-ray bounce state: perRayBounces (bits 0-7) | sssSteps (bits 8-15). pixelIndex +
|
|
173
|
+
// sampleIndex are NOT stored — derived from rayID (= subSample*w*h + pixelIndex) in-kernel.
|
|
174
|
+
export const writeRayOriginMeta = ( buf, id, origin, bounces, sssSteps ) =>
|
|
175
|
+
buf.element( soa( id, RAY.ORIGIN_META ) )
|
|
176
|
+
.assign( vec4( origin, uintBitsToFloat(
|
|
177
|
+
uint( bounces ).bitOr( uint( sssSteps ).shiftLeft( 8 ) )
|
|
178
|
+
) ) );
|
|
179
|
+
|
|
180
|
+
export const writeRayDirFlags = ( buf, id, direction, bounceFlags ) =>
|
|
181
|
+
buf.element( soa( id, RAY.DIR_FLAGS ) )
|
|
182
|
+
.assign( vec4( direction, uintBitsToFloat( bounceFlags ) ) );
|
|
183
|
+
|
|
184
|
+
export const writeRayThroughputPdf = ( buf, id, throughput, pdf ) =>
|
|
185
|
+
buf.element( soa( id, RAY.THROUGHPUT_PDF ) )
|
|
186
|
+
.assign( vec4( throughput, pdf ) );
|
|
187
|
+
|
|
188
|
+
export const writeRayRadiance = ( buf, id, radiance ) =>
|
|
189
|
+
buf.element( soa( id, RAY.RADIANCE_ALPHA ) )
|
|
190
|
+
.assign( radiance );
|
|
191
|
+
|
|
192
|
+
export const readHitDistance = ( buf, id ) =>
|
|
193
|
+
buf.element( soa( id, HIT.DIST_TRI_BARY ) ).x;
|
|
194
|
+
|
|
195
|
+
export const readHitTriangleIndex = ( buf, id ) =>
|
|
196
|
+
floatBitsToUint( buf.element( soa( id, HIT.DIST_TRI_BARY ) ).y );
|
|
197
|
+
|
|
198
|
+
export const readHitBarycentrics = ( buf, id ) =>
|
|
199
|
+
buf.element( soa( id, HIT.DIST_TRI_BARY ) ).zw;
|
|
200
|
+
|
|
201
|
+
export const readHitNormal = ( buf, id ) =>
|
|
202
|
+
buf.element( soa( id, HIT.NORMAL_MAT ) ).xyz;
|
|
203
|
+
|
|
204
|
+
export const readHitMaterialIndex = ( buf, id ) =>
|
|
205
|
+
uint( floatBitsToUint( buf.element( soa( id, HIT.NORMAL_MAT ) ).w ).bitAnd( 0xFFFF ) );
|
|
206
|
+
|
|
207
|
+
export const readHitMeshIndex = ( buf, id ) =>
|
|
208
|
+
floatBitsToUint( buf.element( soa( id, HIT.NORMAL_MAT ) ).w ).shiftRight( 16 );
|
|
209
|
+
|
|
210
|
+
export const writeHitPacked = ( buf, id, distance, triIndex, baryU, baryV, normal, matIndex, meshIndex ) => {
|
|
211
|
+
|
|
212
|
+
buf.element( soa( id, HIT.DIST_TRI_BARY ) )
|
|
213
|
+
.assign( vec4( distance, uintBitsToFloat( triIndex ), baryU, baryV ) );
|
|
214
|
+
buf.element( soa( id, HIT.NORMAL_MAT ) )
|
|
215
|
+
.assign( vec4( normal, uintBitsToFloat( matIndex.bitOr( meshIndex.shiftLeft( 16 ) ) ) ) );
|
|
216
|
+
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
// Region 6 word packs stackDepth | transTraversals<<8 | wavelength<<16 (nm, 0=achromatic).
|
|
220
|
+
export const readMediumStack = ( buf, id ) => {
|
|
221
|
+
|
|
222
|
+
const packed = buf.element( soa( id, RAY.MEDIUM_STACK ) );
|
|
223
|
+
const packedInt = floatBitsToUint( packed.x );
|
|
224
|
+
return {
|
|
225
|
+
stackDepth: packedInt.bitAnd( 0xFF ),
|
|
226
|
+
transTraversals: packedInt.shiftRight( 8 ).bitAnd( 0xFF ),
|
|
227
|
+
wavelength: packedInt.shiftRight( 16 ).bitAnd( 0xFFFF ),
|
|
228
|
+
ior1: packed.y,
|
|
229
|
+
ior2: packed.z,
|
|
230
|
+
ior3: packed.w,
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
export const writeMediumStack = ( buf, id, stackDepth, transTraversals, ior1, ior2, ior3, wavelength = uint( 0 ) ) =>
|
|
236
|
+
buf.element( soa( id, RAY.MEDIUM_STACK ) )
|
|
237
|
+
.assign( vec4( uintBitsToFloat(
|
|
238
|
+
stackDepth.bitOr( transTraversals.shiftLeft( 8 ) ).bitOr( wavelength.shiftLeft( 16 ) )
|
|
239
|
+
), ior1, ior2, ior3 ) );
|
|
240
|
+
|
|
241
|
+
// Region 7: Beer-Lambert sigmaA of the active medium; single-slot, absorption gated on stackDepth>0.
|
|
242
|
+
export const readMediumSigmaA = ( buf, id ) => buf.element( soa( id, RAY.MEDIUM_SIGMA_A ) ).xyz;
|
|
243
|
+
|
|
244
|
+
export const writeMediumSigmaA = ( buf, id, sigmaA ) =>
|
|
245
|
+
buf.element( soa( id, RAY.MEDIUM_SIGMA_A ) ).assign( vec4( sigmaA, 0.0 ) );
|
|
246
|
+
|
|
247
|
+
// Per-ray bounce state packed into ORIGIN_META.w (written by writeRayOriginMeta alongside the origin):
|
|
248
|
+
// perRayBounces = bits 0-7 (camera-bounce depth; the loop index can't track it once free bounces decouple it)
|
|
249
|
+
// sssSteps = bits 8-15 (SSS random-walk step counter)
|
|
250
|
+
// sampleIndex (the multi-sample sub-sample 0..S-1) is derived in-kernel from rayID, not stored.
|
|
251
|
+
export const readPathBounces = ( buf, id ) =>
|
|
252
|
+
int( floatBitsToUint( buf.element( soa( id, RAY.ORIGIN_META ) ).w ).bitAnd( 0xFF ) );
|
|
253
|
+
export const readSssSteps = ( buf, id ) =>
|
|
254
|
+
int( floatBitsToUint( buf.element( soa( id, RAY.ORIGIN_META ) ).w ).shiftRight( 8 ).bitAnd( 0xFF ) );
|
|
255
|
+
|
|
256
|
+
// Region 9: SSS sigmaS + Henyey-Greenstein g. sigmaS==0 marks glass (Beer-Lambert path, not random walk).
|
|
257
|
+
export const readSSSMedium = ( buf, id ) => {
|
|
258
|
+
|
|
259
|
+
const v = buf.element( soa( id, RAY.SSS_SIGMA_S ) );
|
|
260
|
+
return { sigmaS: v.xyz, g: v.w };
|
|
261
|
+
|
|
262
|
+
};
|
|
263
|
+
|
|
264
|
+
export const writeSSSMedium = ( buf, id, sigmaS, g ) =>
|
|
265
|
+
buf.element( soa( id, RAY.SSS_SIGMA_S ) ).assign( vec4( sigmaS, g ) );
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QueueManager.js — wavefront ray queues: active indices (ping-pong), sorted indices, atomic counters.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { storage } from 'three/tsl';
|
|
6
|
+
import { StorageInstancedBufferAttribute } from 'three/webgpu';
|
|
7
|
+
|
|
8
|
+
/** Counter indices — must match ResetCounters kernel */
|
|
9
|
+
export const COUNTER = {
|
|
10
|
+
ACTIVE_RAY_COUNT: 0,
|
|
11
|
+
// rays entering current bounce; snapshotted before ACTIVE_RAY_COUNT reset so over-sized dispatch is safe.
|
|
12
|
+
ENTERING_COUNT: 1,
|
|
13
|
+
COUNT: 2,
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
/** Ray flag bits packed into rayBounceFlags (uint) */
|
|
17
|
+
export const RAY_FLAG = {
|
|
18
|
+
BOUNCE_MASK: 0xFF, // bits 0-7: bounce count (0-255)
|
|
19
|
+
ACTIVE: 1 << 8, // bit 8: ray is alive
|
|
20
|
+
SPECULAR: 1 << 9, // bit 9: last bounce was specular
|
|
21
|
+
INSIDE_MEDIUM: 1 << 10, // bit 10: ray is inside a transmissive medium
|
|
22
|
+
// bits 11-15: ray type
|
|
23
|
+
RAY_TYPE_SHIFT: 11,
|
|
24
|
+
RAY_TYPE_MASK: 0x1F << 11,
|
|
25
|
+
// bits 16-31: spare per-ray state carried across bounces
|
|
26
|
+
HAS_HIT_OPAQUE: 1 << 16, // bit 16: ray chain has hit non-transmissive geometry (transparent-bg alpha; megakernel hasHitOpaqueSurface)
|
|
27
|
+
AUX_LOCKED: 1 << 17, // bit 17: OIDN aux (normal/albedo) locked onto first non-specular hit (megakernel auxLocked)
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
export class QueueManager {
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @param {number} maxRays - Maximum number of rays (typically width * height)
|
|
34
|
+
*/
|
|
35
|
+
constructor( maxRays = 0 ) {
|
|
36
|
+
|
|
37
|
+
this.capacity = 0;
|
|
38
|
+
this.counters = null;
|
|
39
|
+
// A/B alternate: one read by current bounce, other written by compaction
|
|
40
|
+
this.activeIndices = null;
|
|
41
|
+
this.activeIndicesRO = null;
|
|
42
|
+
this.pingPong = 0; // 0 = read A / write B, 1 = read B / write A
|
|
43
|
+
|
|
44
|
+
if ( maxRays > 0 ) {
|
|
45
|
+
|
|
46
|
+
this.allocate( maxRays );
|
|
47
|
+
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// capacity must match RayBufferPool.allocatedCapacity
|
|
53
|
+
allocate( capacity ) {
|
|
54
|
+
|
|
55
|
+
this.dispose();
|
|
56
|
+
this.capacity = capacity;
|
|
57
|
+
|
|
58
|
+
// explicit attribute (not attributeArray) so it can be referenced for async readback
|
|
59
|
+
this._countersAttr = new StorageInstancedBufferAttribute( new Uint32Array( COUNTER.COUNT ), 1 );
|
|
60
|
+
this.counters = storage( this._countersAttr, 'uint' ).toAtomic();
|
|
61
|
+
|
|
62
|
+
// per-bounce ACTIVE_RAY_COUNT snapshots; read back async to size/skip late bounces next frame
|
|
63
|
+
this.MAX_BOUNCE_SNAPSHOTS = 32;
|
|
64
|
+
this._bounceCountsAttr = new StorageInstancedBufferAttribute(
|
|
65
|
+
new Uint32Array( this.MAX_BOUNCE_SNAPSHOTS ), 1,
|
|
66
|
+
);
|
|
67
|
+
this.bounceCounts = storage( this._bounceCountsAttr, 'uint' );
|
|
68
|
+
|
|
69
|
+
const attrA = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
|
|
70
|
+
const attrB = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
|
|
71
|
+
this._attrA = attrA;
|
|
72
|
+
this._attrB = attrB;
|
|
73
|
+
|
|
74
|
+
this.activeIndices = {
|
|
75
|
+
a: storage( attrA, 'uint' ),
|
|
76
|
+
b: storage( attrB, 'uint' ),
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// RO reuses the same attribute so RW/RO share one GPU buffer
|
|
80
|
+
this.activeIndicesRO = {
|
|
81
|
+
a: storage( attrA, 'uint' ).toReadOnly(),
|
|
82
|
+
b: storage( attrB, 'uint' ).toReadOnly(),
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
this.pingPong = 0;
|
|
86
|
+
|
|
87
|
+
const totalBytes = (
|
|
88
|
+
COUNTER.COUNT * 4 +
|
|
89
|
+
capacity * 4 * 2
|
|
90
|
+
);
|
|
91
|
+
|
|
92
|
+
console.log(
|
|
93
|
+
`QueueManager: Allocated capacity=${capacity}, ` +
|
|
94
|
+
`total=${( totalBytes / ( 1024 * 1024 ) ).toFixed( 1 )} MB`
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// returns true if reallocation occurred
|
|
100
|
+
resize( capacity ) {
|
|
101
|
+
|
|
102
|
+
if ( capacity <= this.capacity && this.capacity > 0 ) return false;
|
|
103
|
+
this.allocate( capacity );
|
|
104
|
+
return true;
|
|
105
|
+
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
getCounters() {
|
|
109
|
+
|
|
110
|
+
return this.counters;
|
|
111
|
+
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
getActiveReadRO() {
|
|
115
|
+
|
|
116
|
+
return this.pingPong === 0 ? this.activeIndicesRO.a : this.activeIndicesRO.b;
|
|
117
|
+
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// RW version for compaction input
|
|
121
|
+
getActiveRead() {
|
|
122
|
+
|
|
123
|
+
return this.pingPong === 0 ? this.activeIndices.a : this.activeIndices.b;
|
|
124
|
+
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
getActiveWrite() {
|
|
128
|
+
|
|
129
|
+
return this.pingPong === 0 ? this.activeIndices.b : this.activeIndices.a;
|
|
130
|
+
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// raw attribute for `renderer.getArrayBufferAsync(...)` readback
|
|
134
|
+
getCountersAttribute() {
|
|
135
|
+
|
|
136
|
+
return this._countersAttr;
|
|
137
|
+
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
getBounceCounts() {
|
|
141
|
+
|
|
142
|
+
return this.bounceCounts;
|
|
143
|
+
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
getBounceCountsAttribute() {
|
|
147
|
+
|
|
148
|
+
return this._bounceCountsAttr;
|
|
149
|
+
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
swap() {
|
|
153
|
+
|
|
154
|
+
this.pingPong = 1 - this.pingPong;
|
|
155
|
+
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
resetPingPong() {
|
|
159
|
+
|
|
160
|
+
this.pingPong = 0;
|
|
161
|
+
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
dispose() {
|
|
165
|
+
|
|
166
|
+
this.counters = null;
|
|
167
|
+
this.activeIndices = null;
|
|
168
|
+
this.activeIndicesRO = null;
|
|
169
|
+
this.capacity = 0;
|
|
170
|
+
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
}
|