rayzee 6.4.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +24 -5
  2. package/dist/rayzee.es.js +4953 -4225
  3. package/dist/rayzee.es.js.map +1 -1
  4. package/dist/rayzee.umd.js +157 -236
  5. package/dist/rayzee.umd.js.map +1 -1
  6. package/package.json +1 -1
  7. package/src/EngineDefaults.js +29 -13
  8. package/src/PathTracerApp.js +119 -26
  9. package/src/Pipeline/PipelineContext.js +1 -2
  10. package/src/Pipeline/RenderPipeline.js +1 -1
  11. package/src/Pipeline/RenderStage.js +1 -1
  12. package/src/Processor/CameraOptimizer.js +0 -5
  13. package/src/Processor/GeometryExtractor.js +22 -1
  14. package/src/Processor/KernelManager.js +277 -0
  15. package/src/Processor/PackedRayBuffer.js +265 -0
  16. package/src/Processor/QueueManager.js +173 -0
  17. package/src/Processor/SceneProcessor.js +1 -0
  18. package/src/Processor/ShaderBuilder.js +11 -316
  19. package/src/Processor/StorageTexturePool.js +29 -15
  20. package/src/Processor/TextureCreator.js +6 -0
  21. package/src/Processor/VRAMTracker.js +169 -0
  22. package/src/Processor/utils.js +11 -110
  23. package/src/RenderSettings.js +1 -3
  24. package/src/Stages/ASVGF.js +76 -20
  25. package/src/Stages/BilateralFilter.js +34 -10
  26. package/src/Stages/EdgeFilter.js +2 -3
  27. package/src/Stages/MotionVector.js +16 -9
  28. package/src/Stages/NormalDepth.js +17 -5
  29. package/src/Stages/PathTracer.js +671 -1456
  30. package/src/Stages/PathTracerStage.js +1451 -0
  31. package/src/Stages/SSRC.js +32 -15
  32. package/src/Stages/Variance.js +35 -12
  33. package/src/TSL/BVHTraversal.js +7 -1
  34. package/src/TSL/Common.js +12 -2
  35. package/src/TSL/CompactKernel.js +110 -0
  36. package/src/TSL/DebugKernel.js +98 -0
  37. package/src/TSL/Environment.js +13 -11
  38. package/src/TSL/ExtendKernel.js +75 -0
  39. package/src/TSL/FinalWriteKernel.js +121 -0
  40. package/src/TSL/GenerateKernel.js +109 -0
  41. package/src/TSL/LightsSampling.js +2 -2
  42. package/src/TSL/MaterialTransmission.js +32 -2
  43. package/src/TSL/PathTracerCore.js +43 -912
  44. package/src/TSL/ShadeKernel.js +873 -0
  45. package/src/TSL/Struct.js +5 -0
  46. package/src/TSL/Subsurface.js +232 -0
  47. package/src/TSL/patches.js +81 -4
  48. package/src/index.js +3 -0
  49. package/src/managers/CameraManager.js +1 -1
  50. package/src/managers/DenoisingManager.js +40 -75
  51. package/src/managers/EnvironmentManager.js +30 -39
  52. package/src/managers/MaterialDataManager.js +60 -1
  53. package/src/managers/OverlayManager.js +7 -22
  54. package/src/managers/UniformManager.js +1 -3
  55. package/src/managers/helpers/TileHelper.js +2 -2
  56. package/src/Stages/AdaptiveSampling.js +0 -483
  57. package/src/TSL/PathTracer.js +0 -384
  58. package/src/managers/TileManager.js +0 -298
@@ -0,0 +1,277 @@
1
+ /**
2
+ * KernelManager.js
3
+ *
4
+ * Builds, caches, and dispatches individual compute nodes for the wavefront
5
+ * path tracing pipeline. Each kernel is a separate `Fn().compute()` node.
6
+ *
7
+ * Manages workgroup sizes, dispatch dimensions, and provides a unified
8
+ * dispatch interface that wraps `renderer.compute(node)`.
9
+ */
10
+
11
+ /** Default workgroup sizes per kernel type */
12
+ const WORKGROUP_SIZES = {
13
+ generate: [ 16, 16, 1 ], // 2D screen-space
14
+ extend: [ 256, 1, 1 ], // 1D ray-parallel
15
+ sort: [ 256, 1, 1 ], // 1D ray-parallel
16
+ shade: [ 256, 1, 1 ], // 1D ray-parallel (sorted)
17
+ connect: [ 256, 1, 1 ], // 1D shadow-ray-parallel
18
+ accumulate: [ 256, 1, 1 ], // 1D shadow-ray-parallel
19
+ compact: [ 256, 1, 1 ], // 1D ray-parallel
20
+ resetCounters: [ 1, 1, 1 ], // Single thread
21
+ finalWrite: [ 16, 16, 1 ], // 2D screen-space
22
+ };
23
+
24
+ export class KernelManager {
25
+
26
+ /**
27
+ * @param {WebGPURenderer} renderer - Three.js WebGPU renderer
28
+ */
29
+ constructor( renderer ) {
30
+
31
+ /**
32
+ * @type {WebGPURenderer}
33
+ */
34
+ this.renderer = renderer;
35
+
36
+ /**
37
+ * Map of kernel name → ComputeNode.
38
+ * @type {Map<string, ComputeNode>}
39
+ */
40
+ this.kernels = new Map();
41
+
42
+ /**
43
+ * Map of kernel name → workgroup size [x, y, z].
44
+ * @type {Map<string, number[]>}
45
+ */
46
+ this.workgroupSizes = new Map();
47
+
48
+ /**
49
+ * Timing data for performance profiling.
50
+ * @type {Map<string, {compiledOnce: boolean, lastDispatchMs: number}>}
51
+ */
52
+ this.timing = new Map();
53
+
54
+ /**
55
+ * Optional per-kernel CPU-side submission timing (encode/dispatch cost only;
56
+ * does NOT measure GPU execution time). Toggle via enableProfiling().
57
+ * @type {boolean}
58
+ */
59
+ this.profiling = false;
60
+
61
+ /**
62
+ * Aggregated profile: kernel name → { calls, totalMs }.
63
+ * @type {Map<string, {calls: number, totalMs: number}>}
64
+ */
65
+ this.profile = new Map();
66
+
67
+ // Initialize workgroup sizes from defaults
68
+ for ( const [ name, wgSize ] of Object.entries( WORKGROUP_SIZES ) ) {
69
+
70
+ this.workgroupSizes.set( name, wgSize );
71
+
72
+ }
73
+
74
+ }
75
+
76
+ /**
77
+ * Register a pre-built compute node.
78
+ * @param {string} name - Kernel name (e.g. 'generate', 'extend')
79
+ * @param {ComputeNode} computeNode - Built via `Fn().compute([dx,dy,dz], [wgx,wgy,wgz])`
80
+ */
81
+ register( name, computeNode ) {
82
+
83
+ this.kernels.set( name, computeNode );
84
+ this.timing.set( name, { compiledOnce: false, lastDispatchMs: 0 } );
85
+
86
+ }
87
+
88
+ /**
89
+ * Dispatch a kernel by name.
90
+ * @param {string} name - Kernel name
91
+ */
92
+ dispatch( name ) {
93
+
94
+ const node = this.kernels.get( name );
95
+
96
+ if ( ! node ) {
97
+
98
+ throw new Error( `KernelManager: Unknown kernel '${name}'` );
99
+
100
+ }
101
+
102
+ const timingEntry = this.timing.get( name );
103
+
104
+ if ( timingEntry && ! timingEntry.compiledOnce ) {
105
+
106
+ const t0 = performance.now();
107
+ this.renderer.compute( node );
108
+ const t1 = performance.now();
109
+ timingEntry.compiledOnce = true;
110
+ timingEntry.lastDispatchMs = t1 - t0;
111
+ console.log( `[Wavefront] Kernel '${name}' first dispatch (includes compilation): ${( t1 - t0 ).toFixed( 1 )}ms` );
112
+
113
+ } else if ( this.profiling ) {
114
+
115
+ const t0 = performance.now();
116
+ this.renderer.compute( node );
117
+ const t1 = performance.now();
118
+ let p = this.profile.get( name );
119
+ if ( ! p ) {
120
+
121
+ p = { calls: 0, totalMs: 0 };
122
+ this.profile.set( name, p );
123
+
124
+ }
125
+
126
+ p.calls ++;
127
+ p.totalMs += t1 - t0;
128
+
129
+ } else {
130
+
131
+ this.renderer.compute( node );
132
+
133
+ }
134
+
135
+ }
136
+
137
+ /**
138
+ * Update dispatch dimensions for a kernel.
139
+ * @param {string} name - Kernel name
140
+ * @param {number[]} count - Dispatch dimensions [x, y, z]
141
+ */
142
+ setDispatchCount( name, count ) {
143
+
144
+ const node = this.kernels.get( name );
145
+ if ( ! node ) return;
146
+ node.dispatchSize = count;
147
+
148
+ }
149
+
150
+ /**
151
+ * Calculate 2D dispatch dimensions for a screen-space kernel.
152
+ * @param {number} width - Render width in pixels
153
+ * @param {number} height - Render height in pixels
154
+ * @param {string} kernelName - Kernel name for WG size lookup
155
+ * @returns {number[]} [dispatchX, dispatchY, 1]
156
+ */
157
+ calcScreenDispatch( width, height, kernelName ) {
158
+
159
+ const wg = this.workgroupSizes.get( kernelName ) || [ 16, 16, 1 ];
160
+ return [
161
+ Math.ceil( width / wg[ 0 ] ),
162
+ Math.ceil( height / wg[ 1 ] ),
163
+ 1
164
+ ];
165
+
166
+ }
167
+
168
+ /**
169
+ * Calculate 1D dispatch dimensions for a ray-parallel kernel.
170
+ * @param {number} rayCount - Number of rays to process
171
+ * @param {string} kernelName - Kernel name for WG size lookup
172
+ * @returns {number[]} [dispatchX, 1, 1]
173
+ */
174
+ calcRayDispatch( rayCount, kernelName ) {
175
+
176
+ const wg = this.workgroupSizes.get( kernelName ) || [ 256, 1, 1 ];
177
+ return [
178
+ Math.ceil( rayCount / wg[ 0 ] ),
179
+ 1,
180
+ 1
181
+ ];
182
+
183
+ }
184
+
185
+ /**
186
+ * Get the workgroup size for a kernel.
187
+ * @param {string} name
188
+ * @returns {number[]}
189
+ */
190
+ getWorkgroupSize( name ) {
191
+
192
+ return this.workgroupSizes.get( name ) || [ 256, 1, 1 ];
193
+
194
+ }
195
+
196
+ /**
197
+ * Check if a kernel has been registered.
198
+ * @param {string} name
199
+ * @returns {boolean}
200
+ */
201
+ has( name ) {
202
+
203
+ return this.kernels.has( name );
204
+
205
+ }
206
+
207
+ /**
208
+ * Get the underlying compute node.
209
+ * @param {string} name
210
+ * @returns {ComputeNode|undefined}
211
+ */
212
+ get( name ) {
213
+
214
+ return this.kernels.get( name );
215
+
216
+ }
217
+
218
+ /**
219
+ * Get first-dispatch compilation timing for all kernels.
220
+ * @returns {Object} name → { compiledOnce, lastDispatchMs }
221
+ */
222
+ getTimingReport() {
223
+
224
+ const report = {};
225
+
226
+ for ( const [ name, data ] of this.timing ) {
227
+
228
+ report[ name ] = { ...data };
229
+
230
+ }
231
+
232
+ return report;
233
+
234
+ }
235
+
236
+ /**
237
+ * Toggle per-kernel CPU-submission profiling. Measures only encode/dispatch
238
+ * cost on CPU (GPU work is async and NOT included).
239
+ * @param {boolean} enabled
240
+ */
241
+ enableProfiling( enabled ) {
242
+
243
+ this.profiling = enabled;
244
+ if ( enabled ) this.profile.clear();
245
+
246
+ }
247
+
248
+ /**
249
+ * Get accumulated profiling data.
250
+ * @returns {Object} name → { calls, totalMs, avgMs }
251
+ */
252
+ getProfileReport() {
253
+
254
+ const rows = [];
255
+ let sum = 0;
256
+ for ( const [ name, { calls, totalMs } ] of this.profile ) {
257
+
258
+ sum += totalMs;
259
+ rows.push( { name, calls, totalMs: + totalMs.toFixed( 2 ), avgMs: + ( totalMs / calls ).toFixed( 3 ) } );
260
+
261
+ }
262
+
263
+ rows.sort( ( a, b ) => b.totalMs - a.totalMs );
264
+ rows.push( { name: 'TOTAL', calls: rows.reduce( ( s, r ) => s + r.calls, 0 ), totalMs: + sum.toFixed( 2 ), avgMs: null } );
265
+ return rows;
266
+
267
+ }
268
+
269
+ dispose() {
270
+
271
+ this.kernels.clear();
272
+ this.timing.clear();
273
+ this.profile.clear();
274
+
275
+ }
276
+
277
+ }
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Packed buffer manager for wavefront path tracing — one storage buffer per data category.
3
+ * RAY/HIT are SoA-within-a-buffer (field `slot` of element `id` lives at `id + slot*_cap`).
4
+ */
5
+
6
+ import {
7
+ storage, uintBitsToFloat, floatBitsToUint, vec2, vec3, vec4, uvec4, uint, int,
8
+ packSnorm2x16, packUnorm2x16, unpackSnorm2x16, unpackUnorm2x16,
9
+ } from 'three/tsl';
10
+ import { StorageInstancedBufferAttribute } from 'three/webgpu';
11
+
12
+ export const RAY_STRIDE = 7;
13
+ export const HIT_STRIDE = 2;
14
+ // Per-pixel G-buffer (first-hit MRT staging): 1 uvec4/pixel, half-precision packed (pack2x16, no f32 bitcast).
15
+ // .x=packSnorm2x16(normal.xy) .y=packSnorm2x16(normal.z, depth) .z=packUnorm2x16(albedo.rg) .w=packUnorm2x16(albedo.b, 0)
16
+ // Separate buffer from RAY (per-pixel, not per-ray×S) — written by Generate/Shade bounce-0, read only by FinalWrite.
17
+ export const GBUFFER_STRIDE = 1;
18
+
19
+ export const RAY = {
20
+ ORIGIN_META: 0, // vec4(origin.xyz, uintBitsToFloat(perRayBounces | sssSteps<<8)); pixelIndex+sampleIndex derived from rayID
21
+ DIR_FLAGS: 1, // vec4(direction.xyz, uintBitsToFloat(bounceFlags))
22
+ THROUGHPUT_PDF: 2, // vec4(throughput.xyz, pdf)
23
+ RADIANCE_ALPHA: 3, // vec4(radiance.xyz, alpha)
24
+ MEDIUM_STACK: 4, // vec4(uintBitsToFloat(stackDepth|transTraversals<<8|wavelength<<16), ior1, ior2, ior3)
25
+ MEDIUM_SIGMA_A: 5, // vec4(sigmaA.xyz, _) — Beer-Lambert absorption coeff of the active medium (KHR_materials_volume + SSS)
26
+ SSS_SIGMA_S: 6, // vec4(sigmaS.xyz, g) — SSS scattering coeff + Henyey-Greenstein anisotropy (sigmaS==0 ⇒ glass)
27
+ };
28
+
29
+ export const HIT = {
30
+ DIST_TRI_BARY: 0, // vec4(distance, uintBitsToFloat(triIndex), bary.u, bary.v)
31
+ NORMAL_MAT: 1, // vec4(geoNormal.xyz, uintBitsToFloat(matIndex | meshIndex<<16))
32
+ };
33
+
34
+ // SoA region stride, baked into the shader graph at build time; single instance, rebuilt on resize.
35
+ let _cap = 0;
36
+
37
+ const soa = ( id, slot ) => ( slot === 0 ? id : id.add( slot * _cap ) );
38
+
39
+ export class PackedRayBuffer {
40
+
41
+ // Capacity maxRays would allocate (mirrors allocate()/resize()). 1.25× headroom, NO pow2 rounding —
42
+ // the pow2 jump nearly doubled VRAM (e.g. 2048²: 5.24M→8.39M) for no realloc benefit: the app's
43
+ // discrete resolution presets always exceed the 1.25× margin on a tier change, so they rebuild anyway.
44
+ static requiredCapacity( maxRays ) {
45
+
46
+ return Math.ceil( maxRays * 1.25 );
47
+
48
+ }
49
+
50
+ constructor( maxRays = 0 ) {
51
+
52
+ this.capacity = 0;
53
+ this._attrs = {};
54
+
55
+ // Each: { rw: StorageBufferNode, ro: StorageBufferNode } over one shared GPU buffer.
56
+ this.rayBuffer = null;
57
+ this.rngBuffer = null;
58
+ this.hitBuffer = null;
59
+
60
+ if ( maxRays > 0 ) this.allocate( maxRays );
61
+
62
+ }
63
+
64
+ allocate( maxRays ) {
65
+
66
+ this.dispose();
67
+
68
+ const capacity = Math.ceil( maxRays * 1.25 );
69
+ this.capacity = capacity;
70
+ _cap = capacity;
71
+
72
+ // count=0 so StorageBufferNode.getHash() shares the buffer → RW and RO nodes bind the same GPU data.
73
+ const rayCount = capacity * RAY_STRIDE;
74
+ const rayAttr = new StorageInstancedBufferAttribute( new Float32Array( rayCount * 4 ), 4 );
75
+ this._attrs.ray = rayAttr;
76
+ this.rayBuffer = {
77
+ rw: storage( rayAttr, 'vec4' ),
78
+ ro: storage( rayAttr, 'vec4' ).toReadOnly(),
79
+ };
80
+
81
+ const rngAttr = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
82
+ this._attrs.rng = rngAttr;
83
+ this.rngBuffer = {
84
+ rw: storage( rngAttr, 'uint' ),
85
+ ro: storage( rngAttr, 'uint' ).toReadOnly(),
86
+ };
87
+
88
+ const hitCount = capacity * HIT_STRIDE;
89
+ const hitAttr = new StorageInstancedBufferAttribute( new Float32Array( hitCount * 4 ), 4 );
90
+ this._attrs.hit = hitAttr;
91
+ this.hitBuffer = {
92
+ rw: storage( hitAttr, 'vec4' ),
93
+ ro: storage( hitAttr, 'vec4' ).toReadOnly(),
94
+ };
95
+
96
+ const totalMB = (
97
+ rayCount * 16 + capacity * 4 + hitCount * 16
98
+ ) / ( 1024 * 1024 );
99
+
100
+ console.log(
101
+ `PackedRayBuffer: capacity=${capacity}, total=${totalMB.toFixed( 1 )} MB ` +
102
+ `(ray=${( rayCount * 16 / 1048576 ).toFixed( 0 )}MB hit=${( hitCount * 16 / 1048576 ).toFixed( 0 )}MB) [SoA ray/hit]`
103
+ );
104
+
105
+ }
106
+
107
+ // Reallocates only if maxRays needs more capacity; returns true if it did.
108
+ resize( maxRays ) {
109
+
110
+ const needed = Math.ceil( maxRays * 1.25 );
111
+ if ( needed <= this.capacity && this.capacity > 0 ) return false;
112
+ this.allocate( maxRays );
113
+ return true;
114
+
115
+ }
116
+
117
+ dispose() {
118
+
119
+ this._attrs = {};
120
+ this.rayBuffer = null;
121
+ this.rngBuffer = null;
122
+ this.hitBuffer = null;
123
+ this.capacity = 0;
124
+
125
+ }
126
+
127
+ }
128
+
129
+ // TSL accessor helpers — call inside Fn() scopes. `buf` is the .rw/.ro StorageBufferNode, `id` a uint node.
130
+
131
+ export const readRayOrigin = ( buf, id ) =>
132
+ buf.element( soa( id, RAY.ORIGIN_META ) ).xyz;
133
+
134
+ export const readRayDirection = ( buf, id ) =>
135
+ buf.element( soa( id, RAY.DIR_FLAGS ) ).xyz;
136
+
137
+ export const readRayBounceFlags = ( buf, id ) =>
138
+ floatBitsToUint( buf.element( soa( id, RAY.DIR_FLAGS ) ).w );
139
+
140
+ export const readRayThroughput = ( buf, id ) =>
141
+ buf.element( soa( id, RAY.THROUGHPUT_PDF ) ).xyz;
142
+
143
+ export const readRayPdf = ( buf, id ) =>
144
+ buf.element( soa( id, RAY.THROUGHPUT_PDF ) ).w;
145
+
146
+ export const readRayRadiance = ( buf, id ) =>
147
+ buf.element( soa( id, RAY.RADIANCE_ALPHA ) );
148
+
149
+ // ── Per-pixel G-buffer (first-hit MRT). 1 uvec4/pixel (element p), pack2x16 lanes. ──
150
+ // normal: raw unit vec3; depth: linear [0,1]; albedo: vec3 [0,1]. Packed values live in u32 lanes
151
+ // verbatim (no f32 bitcast) so NaN-range bit patterns (snorm ±1 → 0x7FFF) survive store/load intact.
152
+ export const writeGBuffer = ( buf, pixelIndex, normal, depth, albedo ) =>
153
+ buf.element( pixelIndex ).assign( uvec4(
154
+ packSnorm2x16( vec2( normal.x, normal.y ) ),
155
+ packSnorm2x16( vec2( normal.z, depth ) ),
156
+ packUnorm2x16( vec2( albedo.x, albedo.y ) ),
157
+ packUnorm2x16( vec2( albedo.z, 0.0 ) ),
158
+ ) );
159
+ export const readGBuffer = ( buf, pixelIndex ) => buf.element( pixelIndex );
160
+ // Decode for FinalWrite. normalDepth.xyz matches the prior path (normal*0.5+0.5), .w = raw depth.
161
+ export const gbDecodeNormalDepth = ( packed ) => {
162
+
163
+ const nxy = unpackSnorm2x16( packed.x );
164
+ const nzd = unpackSnorm2x16( packed.y );
165
+ return vec4( vec3( nxy.x, nxy.y, nzd.x ).mul( 0.5 ).add( 0.5 ), nzd.y );
166
+
167
+ };
168
+
169
+ export const gbDecodeAlbedo = ( packed ) =>
170
+ vec3( unpackUnorm2x16( packed.z ), unpackUnorm2x16( packed.w ).x );
171
+
172
+ // .w packs per-ray bounce state: perRayBounces (bits 0-7) | sssSteps (bits 8-15). pixelIndex +
173
+ // sampleIndex are NOT stored — derived from rayID (= subSample*w*h + pixelIndex) in-kernel.
174
+ export const writeRayOriginMeta = ( buf, id, origin, bounces, sssSteps ) =>
175
+ buf.element( soa( id, RAY.ORIGIN_META ) )
176
+ .assign( vec4( origin, uintBitsToFloat(
177
+ uint( bounces ).bitOr( uint( sssSteps ).shiftLeft( 8 ) )
178
+ ) ) );
179
+
180
+ export const writeRayDirFlags = ( buf, id, direction, bounceFlags ) =>
181
+ buf.element( soa( id, RAY.DIR_FLAGS ) )
182
+ .assign( vec4( direction, uintBitsToFloat( bounceFlags ) ) );
183
+
184
+ export const writeRayThroughputPdf = ( buf, id, throughput, pdf ) =>
185
+ buf.element( soa( id, RAY.THROUGHPUT_PDF ) )
186
+ .assign( vec4( throughput, pdf ) );
187
+
188
+ export const writeRayRadiance = ( buf, id, radiance ) =>
189
+ buf.element( soa( id, RAY.RADIANCE_ALPHA ) )
190
+ .assign( radiance );
191
+
192
+ export const readHitDistance = ( buf, id ) =>
193
+ buf.element( soa( id, HIT.DIST_TRI_BARY ) ).x;
194
+
195
+ export const readHitTriangleIndex = ( buf, id ) =>
196
+ floatBitsToUint( buf.element( soa( id, HIT.DIST_TRI_BARY ) ).y );
197
+
198
+ export const readHitBarycentrics = ( buf, id ) =>
199
+ buf.element( soa( id, HIT.DIST_TRI_BARY ) ).zw;
200
+
201
+ export const readHitNormal = ( buf, id ) =>
202
+ buf.element( soa( id, HIT.NORMAL_MAT ) ).xyz;
203
+
204
+ export const readHitMaterialIndex = ( buf, id ) =>
205
+ uint( floatBitsToUint( buf.element( soa( id, HIT.NORMAL_MAT ) ).w ).bitAnd( 0xFFFF ) );
206
+
207
+ export const readHitMeshIndex = ( buf, id ) =>
208
+ floatBitsToUint( buf.element( soa( id, HIT.NORMAL_MAT ) ).w ).shiftRight( 16 );
209
+
210
+ export const writeHitPacked = ( buf, id, distance, triIndex, baryU, baryV, normal, matIndex, meshIndex ) => {
211
+
212
+ buf.element( soa( id, HIT.DIST_TRI_BARY ) )
213
+ .assign( vec4( distance, uintBitsToFloat( triIndex ), baryU, baryV ) );
214
+ buf.element( soa( id, HIT.NORMAL_MAT ) )
215
+ .assign( vec4( normal, uintBitsToFloat( matIndex.bitOr( meshIndex.shiftLeft( 16 ) ) ) ) );
216
+
217
+ };
218
+
219
+ // Region 6 word packs stackDepth | transTraversals<<8 | wavelength<<16 (nm, 0=achromatic).
220
+ export const readMediumStack = ( buf, id ) => {
221
+
222
+ const packed = buf.element( soa( id, RAY.MEDIUM_STACK ) );
223
+ const packedInt = floatBitsToUint( packed.x );
224
+ return {
225
+ stackDepth: packedInt.bitAnd( 0xFF ),
226
+ transTraversals: packedInt.shiftRight( 8 ).bitAnd( 0xFF ),
227
+ wavelength: packedInt.shiftRight( 16 ).bitAnd( 0xFFFF ),
228
+ ior1: packed.y,
229
+ ior2: packed.z,
230
+ ior3: packed.w,
231
+ };
232
+
233
+ };
234
+
235
+ export const writeMediumStack = ( buf, id, stackDepth, transTraversals, ior1, ior2, ior3, wavelength = uint( 0 ) ) =>
236
+ buf.element( soa( id, RAY.MEDIUM_STACK ) )
237
+ .assign( vec4( uintBitsToFloat(
238
+ stackDepth.bitOr( transTraversals.shiftLeft( 8 ) ).bitOr( wavelength.shiftLeft( 16 ) )
239
+ ), ior1, ior2, ior3 ) );
240
+
241
+ // Region 7: Beer-Lambert sigmaA of the active medium; single-slot, absorption gated on stackDepth>0.
242
+ export const readMediumSigmaA = ( buf, id ) => buf.element( soa( id, RAY.MEDIUM_SIGMA_A ) ).xyz;
243
+
244
+ export const writeMediumSigmaA = ( buf, id, sigmaA ) =>
245
+ buf.element( soa( id, RAY.MEDIUM_SIGMA_A ) ).assign( vec4( sigmaA, 0.0 ) );
246
+
247
+ // Per-ray bounce state packed into ORIGIN_META.w (written by writeRayOriginMeta alongside the origin):
248
+ // perRayBounces = bits 0-7 (camera-bounce depth; the loop index can't track it once free bounces decouple it)
249
+ // sssSteps = bits 8-15 (SSS random-walk step counter)
250
+ // sampleIndex (the multi-sample sub-sample 0..S-1) is derived in-kernel from rayID, not stored.
251
+ export const readPathBounces = ( buf, id ) =>
252
+ int( floatBitsToUint( buf.element( soa( id, RAY.ORIGIN_META ) ).w ).bitAnd( 0xFF ) );
253
+ export const readSssSteps = ( buf, id ) =>
254
+ int( floatBitsToUint( buf.element( soa( id, RAY.ORIGIN_META ) ).w ).shiftRight( 8 ).bitAnd( 0xFF ) );
255
+
256
+ // Region 9: SSS sigmaS + Henyey-Greenstein g. sigmaS==0 marks glass (Beer-Lambert path, not random walk).
257
+ export const readSSSMedium = ( buf, id ) => {
258
+
259
+ const v = buf.element( soa( id, RAY.SSS_SIGMA_S ) );
260
+ return { sigmaS: v.xyz, g: v.w };
261
+
262
+ };
263
+
264
+ export const writeSSSMedium = ( buf, id, sigmaS, g ) =>
265
+ buf.element( soa( id, RAY.SSS_SIGMA_S ) ).assign( vec4( sigmaS, g ) );
@@ -0,0 +1,173 @@
1
+ /**
2
+ * QueueManager.js — wavefront ray queues: active indices (ping-pong), sorted indices, atomic counters.
3
+ */
4
+
5
+ import { storage } from 'three/tsl';
6
+ import { StorageInstancedBufferAttribute } from 'three/webgpu';
7
+
8
+ /** Counter indices — must match ResetCounters kernel */
9
+ export const COUNTER = {
10
+ ACTIVE_RAY_COUNT: 0,
11
+ // rays entering current bounce; snapshotted before ACTIVE_RAY_COUNT reset so over-sized dispatch is safe.
12
+ ENTERING_COUNT: 1,
13
+ COUNT: 2,
14
+ };
15
+
16
+ /** Ray flag bits packed into rayBounceFlags (uint) */
17
+ export const RAY_FLAG = {
18
+ BOUNCE_MASK: 0xFF, // bits 0-7: bounce count (0-255)
19
+ ACTIVE: 1 << 8, // bit 8: ray is alive
20
+ SPECULAR: 1 << 9, // bit 9: last bounce was specular
21
+ INSIDE_MEDIUM: 1 << 10, // bit 10: ray is inside a transmissive medium
22
+ // bits 11-15: ray type
23
+ RAY_TYPE_SHIFT: 11,
24
+ RAY_TYPE_MASK: 0x1F << 11,
25
+ // bits 16-31: spare per-ray state carried across bounces
26
+ HAS_HIT_OPAQUE: 1 << 16, // bit 16: ray chain has hit non-transmissive geometry (transparent-bg alpha; megakernel hasHitOpaqueSurface)
27
+ AUX_LOCKED: 1 << 17, // bit 17: OIDN aux (normal/albedo) locked onto first non-specular hit (megakernel auxLocked)
28
+ };
29
+
30
+ export class QueueManager {
31
+
32
+ /**
33
+ * @param {number} maxRays - Maximum number of rays (typically width * height)
34
+ */
35
+ constructor( maxRays = 0 ) {
36
+
37
+ this.capacity = 0;
38
+ this.counters = null;
39
+ // A/B alternate: one read by current bounce, other written by compaction
40
+ this.activeIndices = null;
41
+ this.activeIndicesRO = null;
42
+ this.pingPong = 0; // 0 = read A / write B, 1 = read B / write A
43
+
44
+ if ( maxRays > 0 ) {
45
+
46
+ this.allocate( maxRays );
47
+
48
+ }
49
+
50
+ }
51
+
52
+ // capacity must match RayBufferPool.allocatedCapacity
53
+ allocate( capacity ) {
54
+
55
+ this.dispose();
56
+ this.capacity = capacity;
57
+
58
+ // explicit attribute (not attributeArray) so it can be referenced for async readback
59
+ this._countersAttr = new StorageInstancedBufferAttribute( new Uint32Array( COUNTER.COUNT ), 1 );
60
+ this.counters = storage( this._countersAttr, 'uint' ).toAtomic();
61
+
62
+ // per-bounce ACTIVE_RAY_COUNT snapshots; read back async to size/skip late bounces next frame
63
+ this.MAX_BOUNCE_SNAPSHOTS = 32;
64
+ this._bounceCountsAttr = new StorageInstancedBufferAttribute(
65
+ new Uint32Array( this.MAX_BOUNCE_SNAPSHOTS ), 1,
66
+ );
67
+ this.bounceCounts = storage( this._bounceCountsAttr, 'uint' );
68
+
69
+ const attrA = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
70
+ const attrB = new StorageInstancedBufferAttribute( new Uint32Array( capacity ), 1 );
71
+ this._attrA = attrA;
72
+ this._attrB = attrB;
73
+
74
+ this.activeIndices = {
75
+ a: storage( attrA, 'uint' ),
76
+ b: storage( attrB, 'uint' ),
77
+ };
78
+
79
+ // RO reuses the same attribute so RW/RO share one GPU buffer
80
+ this.activeIndicesRO = {
81
+ a: storage( attrA, 'uint' ).toReadOnly(),
82
+ b: storage( attrB, 'uint' ).toReadOnly(),
83
+ };
84
+
85
+ this.pingPong = 0;
86
+
87
+ const totalBytes = (
88
+ COUNTER.COUNT * 4 +
89
+ capacity * 4 * 2
90
+ );
91
+
92
+ console.log(
93
+ `QueueManager: Allocated capacity=${capacity}, ` +
94
+ `total=${( totalBytes / ( 1024 * 1024 ) ).toFixed( 1 )} MB`
95
+ );
96
+
97
+ }
98
+
99
+ // returns true if reallocation occurred
100
+ resize( capacity ) {
101
+
102
+ if ( capacity <= this.capacity && this.capacity > 0 ) return false;
103
+ this.allocate( capacity );
104
+ return true;
105
+
106
+ }
107
+
108
+ getCounters() {
109
+
110
+ return this.counters;
111
+
112
+ }
113
+
114
+ getActiveReadRO() {
115
+
116
+ return this.pingPong === 0 ? this.activeIndicesRO.a : this.activeIndicesRO.b;
117
+
118
+ }
119
+
120
+ // RW version for compaction input
121
+ getActiveRead() {
122
+
123
+ return this.pingPong === 0 ? this.activeIndices.a : this.activeIndices.b;
124
+
125
+ }
126
+
127
+ getActiveWrite() {
128
+
129
+ return this.pingPong === 0 ? this.activeIndices.b : this.activeIndices.a;
130
+
131
+ }
132
+
133
+ // raw attribute for `renderer.getArrayBufferAsync(...)` readback
134
+ getCountersAttribute() {
135
+
136
+ return this._countersAttr;
137
+
138
+ }
139
+
140
+ getBounceCounts() {
141
+
142
+ return this.bounceCounts;
143
+
144
+ }
145
+
146
+ getBounceCountsAttribute() {
147
+
148
+ return this._bounceCountsAttr;
149
+
150
+ }
151
+
152
+ swap() {
153
+
154
+ this.pingPong = 1 - this.pingPong;
155
+
156
+ }
157
+
158
+ resetPingPong() {
159
+
160
+ this.pingPong = 0;
161
+
162
+ }
163
+
164
+ dispose() {
165
+
166
+ this.counters = null;
167
+ this.activeIndices = null;
168
+ this.activeIndicesRO = null;
169
+ this.capacity = 0;
170
+
171
+ }
172
+
173
+ }