rayzee 6.5.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +24 -5
  2. package/dist/rayzee.es.js +7554 -7014
  3. package/dist/rayzee.es.js.map +1 -1
  4. package/dist/rayzee.umd.js +157 -236
  5. package/dist/rayzee.umd.js.map +1 -1
  6. package/package.json +1 -1
  7. package/src/EngineDefaults.js +12 -9
  8. package/src/PathTracerApp.js +118 -26
  9. package/src/Pipeline/PipelineContext.js +1 -2
  10. package/src/Pipeline/RenderPipeline.js +1 -1
  11. package/src/Pipeline/RenderStage.js +1 -1
  12. package/src/Processor/CameraOptimizer.js +0 -5
  13. package/src/Processor/GeometryExtractor.js +6 -0
  14. package/src/Processor/KernelManager.js +277 -0
  15. package/src/Processor/PackedRayBuffer.js +265 -0
  16. package/src/Processor/QueueManager.js +173 -0
  17. package/src/Processor/SceneProcessor.js +1 -0
  18. package/src/Processor/ShaderBuilder.js +11 -317
  19. package/src/Processor/StorageTexturePool.js +29 -15
  20. package/src/Processor/VRAMTracker.js +169 -0
  21. package/src/Processor/utils.js +11 -110
  22. package/src/RenderSettings.js +0 -3
  23. package/src/Stages/ASVGF.js +76 -20
  24. package/src/Stages/BilateralFilter.js +34 -10
  25. package/src/Stages/EdgeFilter.js +2 -3
  26. package/src/Stages/MotionVector.js +16 -9
  27. package/src/Stages/NormalDepth.js +17 -5
  28. package/src/Stages/PathTracer.js +671 -1456
  29. package/src/Stages/PathTracerStage.js +1451 -0
  30. package/src/Stages/SSRC.js +32 -15
  31. package/src/Stages/Variance.js +35 -12
  32. package/src/TSL/CompactKernel.js +110 -0
  33. package/src/TSL/DebugKernel.js +98 -0
  34. package/src/TSL/Environment.js +13 -11
  35. package/src/TSL/ExtendKernel.js +75 -0
  36. package/src/TSL/FinalWriteKernel.js +121 -0
  37. package/src/TSL/GenerateKernel.js +109 -0
  38. package/src/TSL/LightsSampling.js +2 -2
  39. package/src/TSL/PathTracerCore.js +43 -1039
  40. package/src/TSL/ShadeKernel.js +873 -0
  41. package/src/TSL/patches.js +81 -4
  42. package/src/index.js +3 -0
  43. package/src/managers/CameraManager.js +1 -1
  44. package/src/managers/DenoisingManager.js +40 -75
  45. package/src/managers/EnvironmentManager.js +30 -39
  46. package/src/managers/OverlayManager.js +7 -22
  47. package/src/managers/UniformManager.js +0 -3
  48. package/src/managers/helpers/TileHelper.js +2 -2
  49. package/src/Stages/AdaptiveSampling.js +0 -483
  50. package/src/TSL/PathTracer.js +0 -384
  51. package/src/managers/TileManager.js +0 -298
@@ -12,10 +12,11 @@
12
12
  // Textures read: pathtracer:color, pathtracer:normalDepth, motionVector:screenSpace
13
13
 
14
14
  import { uniform } from 'three/tsl';
15
- import { StorageTexture, TextureNode } from 'three/webgpu';
16
- import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter } from 'three';
15
+ import { StorageTexture, TextureNode, RenderTarget } from 'three/webgpu';
16
+ import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter, Box2, Vector2 } from 'three';
17
17
  import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
18
18
  import { buildTemporalPass, buildSpatialPass } from '../TSL/SSRC.js';
19
+ import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
19
20
 
20
21
  export class SSRC extends RenderStage {
21
22
 
@@ -51,18 +52,30 @@ export class SSRC extends RenderStage {
51
52
  this._readPass1CacheTexNode = new TextureNode(); // current cache (for spatial pass)
52
53
 
53
54
  // ─── StorageTextures (5 total) ───
54
- const w = 1, h = 1; // resized on first render
55
+ // StorageTextures stay at max alloc see resize crash fix (three.js #33061).
56
+ const w = 1, h = 1; // RTs/uniforms resized on first render
55
57
 
56
58
  // Ping-pong temporal cache: .rgb = radiance, .w = history count
57
- this._cacheTexA = this._createStorageTex( w, h, NearestFilter );
58
- this._cacheTexB = this._createStorageTex( w, h, NearestFilter );
59
+ this._cacheTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
60
+ this._cacheTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
59
61
 
60
62
  // Ping-pong previous-frame normalDepth (for edge-stopping in temporal pass)
61
- this._prevNDTexA = this._createStorageTex( w, h, NearestFilter );
62
- this._prevNDTexB = this._createStorageTex( w, h, NearestFilter );
63
+ this._prevNDTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
64
+ this._prevNDTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
63
65
 
64
66
  // Final output (LinearFilter for Display fragment shader sampling)
65
- this._outputTex = this._createStorageTex( w, h, LinearFilter );
67
+ this._outputTex = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, LinearFilter );
68
+
69
+ // Active-region copy target — published downstream (storage tex is over-allocated)
70
+ this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
71
+ this.outputTarget = new RenderTarget( w, h, {
72
+ type: HalfFloatType,
73
+ format: RGBAFormat,
74
+ minFilter: LinearFilter,
75
+ magFilter: LinearFilter,
76
+ depthBuffer: false,
77
+ stencilBuffer: false
78
+ } );
66
79
 
67
80
  // ─── State ───
68
81
  this._currentPingPong = 0; // 0: read B, write A; 1: read A, write B
@@ -99,7 +112,7 @@ export class SSRC extends RenderStage {
99
112
  if ( colorTex?.image ) {
100
113
 
101
114
  const { width, height } = colorTex.image;
102
- if ( width !== this._cacheTexA.image.width || height !== this._cacheTexA.image.height ) {
115
+ if ( width !== this.outputTarget.width || height !== this.outputTarget.height ) {
103
116
 
104
117
  this.setSize( width, height );
105
118
 
@@ -145,8 +158,13 @@ export class SSRC extends RenderStage {
145
158
  // Advance frames-since-reset counter (capped to avoid overflow)
146
159
  this._framesSinceReset.value = Math.min( this._framesSinceReset.value + 1, 9999 );
147
160
 
161
+ // Copy active region out of the over-allocated StorageTexture into the
162
+ // right-sized RenderTarget; downstream stages UV-sample the latter.
163
+ this._srcRegion.max.set( this.outputTarget.width, this.outputTarget.height );
164
+ this.renderer.copyTextureToTexture( this._outputTex, this.outputTarget.texture, this._srcRegion );
165
+
148
166
  // Publish final output
149
- context.setTexture( 'ssrc:output', this._outputTex );
167
+ context.setTexture( 'ssrc:output', this.outputTarget.texture );
150
168
 
151
169
  // Advance ping-pong
152
170
  this._currentPingPong = 1 - this._currentPingPong;
@@ -163,11 +181,9 @@ export class SSRC extends RenderStage {
163
181
 
164
182
  if ( width < 1 || height < 1 ) return;
165
183
 
166
- this._cacheTexA.setSize( width, height );
167
- this._cacheTexB.setSize( width, height );
168
- this._prevNDTexA.setSize( width, height );
169
- this._prevNDTexB.setSize( width, height );
170
- this._outputTex.setSize( width, height );
184
+ // StorageTextures stay at their max allocation (see constructor).
185
+ this.outputTarget.setSize( width, height );
186
+ this.outputTarget.texture.needsUpdate = true;
171
187
 
172
188
  this.resW.value = width;
173
189
  this.resH.value = height;
@@ -194,6 +210,7 @@ export class SSRC extends RenderStage {
194
210
  this._prevNDTexA.dispose();
195
211
  this._prevNDTexB.dispose();
196
212
  this._outputTex.dispose();
213
+ this.outputTarget?.dispose();
197
214
  this._colorTexNode?.dispose();
198
215
  this._ndTexNode?.dispose();
199
216
  this._motionTexNode?.dispose();
@@ -1,9 +1,10 @@
1
1
  import { Fn, wgslFn, float, int, uint, ivec2, uvec2, uniform, If, max,
2
2
  textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
3
- import { TextureNode, StorageTexture } from 'three/webgpu';
4
- import { FloatType, RGBAFormat, LinearFilter } from 'three';
3
+ import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
4
+ import { FloatType, RGBAFormat, LinearFilter, Box2, Vector2 } from 'three';
5
5
  import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
6
6
  import { luminance } from '../TSL/Common.js';
7
+ import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
7
8
 
8
9
  // ── wgslFn helpers ──────────────────────────────────────────
9
10
 
@@ -40,8 +41,7 @@ const temporalAccumulate = /*@__PURE__*/ wgslFn( `
40
41
  * WebGPU Variance Estimation Stage (Compute Shader)
41
42
  *
42
43
  * Computes temporal and spatial variance from the path tracer output.
43
- * Used by AdaptiveSampling for sampling guidance and by
44
- * BilateralFilter for variance-guided filtering.
44
+ * Used by BilateralFilter for variance-guided filtering.
45
45
  *
46
46
  * Uses compute shader with workgroup shared memory for the 3×3
47
47
  * spatial variance computation. Each 8×8 workgroup loads a 10×10
@@ -98,18 +98,34 @@ export class Variance extends RenderStage {
98
98
  // FloatType (f32) required — HalfFloat's ~3.3 decimal digits cause catastrophic
99
99
  // cancellation in (meanSq - mean²) for converged pixels, producing a variance
100
100
  // floor of ~0.0001 that the (frame+1)² scaling amplifies to enormous values.
101
- this._storageTexA = new StorageTexture( w, h );
101
+ // StorageTextures over-allocated at max defensive against three.js #33061
102
+ // (TSL compute pipeline keeps stale GPUTextureView after StorageTexture.setSize).
103
+ this._storageTexA = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
102
104
  this._storageTexA.type = FloatType;
103
105
  this._storageTexA.format = RGBAFormat;
104
106
  this._storageTexA.minFilter = LinearFilter;
105
107
  this._storageTexA.magFilter = LinearFilter;
106
108
 
107
- this._storageTexB = new StorageTexture( w, h );
109
+ this._storageTexB = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
108
110
  this._storageTexB.type = FloatType;
109
111
  this._storageTexB.format = RGBAFormat;
110
112
  this._storageTexB.minFilter = LinearFilter;
111
113
  this._storageTexB.magFilter = LinearFilter;
112
114
 
115
+ this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
116
+
117
+ // Right-sized RenderTarget published to downstream (UV-sampled). The
118
+ // over-allocated StorageTexture itself must not be published — UV
119
+ // sampling a 2048 texture would read the wrong region.
120
+ this._outputTarget = new RenderTarget( w, h, {
121
+ type: FloatType,
122
+ format: RGBAFormat,
123
+ minFilter: LinearFilter,
124
+ magFilter: LinearFilter,
125
+ depthBuffer: false,
126
+ stencilBuffer: false
127
+ } );
128
+
113
129
  this.currentMoments = 0; // 0 = write A, read B; 1 = write B, read A
114
130
  this._compiled = false;
115
131
  this._needsWarmReset = false;
@@ -275,8 +291,8 @@ export class Variance extends RenderStage {
275
291
  const img = colorTex.image;
276
292
  if ( img && img.width > 0 && img.height > 0 ) {
277
293
 
278
- if ( img.width !== this._storageTexA.image.width ||
279
- img.height !== this._storageTexA.image.height ) {
294
+ if ( img.width !== this._outputTarget.width ||
295
+ img.height !== this._outputTarget.height ) {
280
296
 
281
297
  this.setSize( img.width, img.height );
282
298
 
@@ -333,8 +349,13 @@ export class Variance extends RenderStage {
333
349
  // Swap for next frame
334
350
  this.currentMoments = 1 - this.currentMoments;
335
351
 
336
- // Publish (StorageTexture works as regular Texture for downstream sampling)
337
- context.setTexture( 'variance:output', writeTarget );
352
+ // Copy the active region out of the over-allocated StorageTexture into the
353
+ // right-sized RenderTarget; downstream stages UV-sample the latter.
354
+ this._srcRegion.max.set( this._outputTarget.width, this._outputTarget.height );
355
+ this.renderer.copyTextureToTexture( writeTarget, this._outputTarget.texture, this._srcRegion );
356
+
357
+ // Publish the RenderTarget (not the over-allocated StorageTexture)
358
+ context.setTexture( 'variance:output', this._outputTarget.texture );
338
359
 
339
360
  }
340
361
 
@@ -352,8 +373,9 @@ export class Variance extends RenderStage {
352
373
 
353
374
  setSize( width, height ) {
354
375
 
355
- this._storageTexA.setSize( width, height );
356
- this._storageTexB.setSize( width, height );
376
+ // StorageTextures stay at their max allocation (see constructor).
377
+ this._outputTarget.setSize( width, height );
378
+ this._outputTarget.texture.needsUpdate = true;
357
379
  this.resW.value = width;
358
380
  this.resH.value = height;
359
381
 
@@ -371,6 +393,7 @@ export class Variance extends RenderStage {
371
393
  this._computeNodeB?.dispose();
372
394
  this._storageTexA?.dispose();
373
395
  this._storageTexB?.dispose();
396
+ this._outputTarget?.dispose();
374
397
  this._colorTexNode?.dispose();
375
398
  this._readTexNodeA?.dispose();
376
399
  this._readTexNodeB?.dispose();
@@ -0,0 +1,110 @@
1
+ /**
2
+ * CompactKernel.js — wavefront stream compaction: active rays → dense index array for next bounce (256×1, 1D).
3
+ */
4
+
5
+ import {
6
+ Fn, uint, select,
7
+ If,
8
+ instanceIndex,
9
+ atomicAdd, atomicLoad,
10
+ subgroupExclusiveAdd, subgroupAdd, subgroupBroadcast,
11
+ Return,
12
+ } from 'three/tsl';
13
+
14
+ import { readRayBounceFlags } from '../Processor/PackedRayBuffer.js';
15
+ import { RAY_FLAG, COUNTER } from '../Processor/QueueManager.js';
16
+
17
+ const WG_SIZE = 256;
18
+
19
+ export function buildCompactKernel( params ) {
20
+
21
+ const {
22
+ rayBufferRO,
23
+ activeIndicesReadRO,
24
+ activeIndicesWriteRW,
25
+ counters,
26
+ currentActiveCount,
27
+ } = params;
28
+
29
+ const computeFn = Fn( () => {
30
+
31
+ const threadIdx = instanceIndex;
32
+
33
+ // ACTIVE_RAY_COUNT is zeroed before compact, so the dense-list length comes from ENTERING_COUNT.
34
+ const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
35
+ If( threadIdx.greaterThanEqual( bound ), () => {
36
+
37
+ Return();
38
+
39
+ } );
40
+
41
+ const rayID = activeIndicesReadRO.element( threadIdx );
42
+
43
+ const flags = readRayBounceFlags( rayBufferRO, rayID );
44
+
45
+ If( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ), () => {
46
+
47
+ const writeIdx = atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), uint( 1 ) );
48
+ activeIndicesWriteRW.element( writeIdx ).assign( rayID );
49
+
50
+ } );
51
+
52
+ } );
53
+
54
+ return computeFn;
55
+
56
+ }
57
+
58
+ /**
59
+ * Subgroup prefix-sum compaction: one global atomicAdd per subgroup instead of per survivor.
60
+ * Requires renderer.hasFeature('subgroups'); control flow must stay uniform (no divergent Return).
61
+ */
62
+ export function buildCompactSubgroupKernel( params ) {
63
+
64
+ const {
65
+ rayBufferRO,
66
+ activeIndicesReadRO,
67
+ activeIndicesWriteRW,
68
+ counters,
69
+ currentActiveCount,
70
+ } = params;
71
+
72
+ const computeFn = Fn( () => {
73
+
74
+ const threadIdx = instanceIndex;
75
+ const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
76
+
77
+ // No early Return: all lanes must reach the subgroup ops; out-of-range lanes contribute 0 and read stale-but-in-capacity slots.
78
+ const inRange = threadIdx.lessThan( bound );
79
+ const rayID = activeIndicesReadRO.element( threadIdx );
80
+ const flags = readRayBounceFlags( rayBufferRO, rayID );
81
+ const isActive = inRange.and( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ) );
82
+ const activeU = select( isActive, uint( 1 ), uint( 0 ) );
83
+
84
+ // .toVar() materializes the subgroup ops at uniform control flow; inlining into the divergent If(isActive) write is rejected by WGSL.
85
+ const localOffset = subgroupExclusiveAdd( activeU ).toVar();
86
+ const sgCount = subgroupAdd( activeU ).toVar();
87
+
88
+ // laneId via exclusiveAdd(1) since TSL lacks subgroup_invocation_id; lane 0 does the single per-subgroup atomicAdd.
89
+ const laneId = subgroupExclusiveAdd( uint( 1 ) ).toVar();
90
+ const base = uint( 0 ).toVar();
91
+ If( laneId.equal( uint( 0 ) ), () => {
92
+
93
+ base.assign( atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), sgCount ) );
94
+
95
+ } );
96
+ const sgBase = subgroupBroadcast( base, uint( 0 ) ).toVar();
97
+
98
+ If( isActive, () => {
99
+
100
+ activeIndicesWriteRW.element( sgBase.add( localOffset ) ).assign( rayID );
101
+
102
+ } );
103
+
104
+ } );
105
+
106
+ return computeFn;
107
+
108
+ }
109
+
110
+ export { WG_SIZE as COMPACT_WG_SIZE };
@@ -0,0 +1,98 @@
1
+ /**
2
+ * DebugKernel.js — wavefront debug visualization (16×16, 2D screen-space dispatch).
3
+ *
4
+ * Single-pass primary-ray debug viz for visMode 1-10 (mode 11 = NaN/Inf is a FinalWrite
5
+ * post-branch on the accumulated color, handled there). Generates a camera ray per pixel and
6
+ * delegates to the renderer-agnostic TraceDebugMode for the per-mode color; mode 9 (stratified
7
+ * sample pattern) is computed inline. Writes the color directly to the output (no accumulation).
8
+ */
9
+
10
+ import {
11
+ Fn, float, vec2, vec4, int, uint, uvec2,
12
+ If, textureStore,
13
+ localId, workgroupId,
14
+ } from 'three/tsl';
15
+
16
+ import { generateRayFromCamera } from './BVHTraversal.js';
17
+ import { Ray } from './Struct.js';
18
+ import { TraceDebugMode } from './Debugger.js';
19
+ import { pcgHash, getStratifiedSample } from './Random.js';
20
+
21
+ const WG_SIZE = 16;
22
+
23
+ export function buildDebugKernel( params ) {
24
+
25
+ const {
26
+ writeColorTex, writeNDTex, writeAlbedoTex,
27
+ resolution, renderWidth, renderHeight,
28
+ cameraWorldMatrix, cameraProjectionMatrixInverse, cameraProjectionMatrix, cameraViewMatrix,
29
+ enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
30
+ bvhBuffer, triangleBuffer, materialBuffer,
31
+ envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
32
+ visMode, debugVisScale,
33
+ albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
34
+ frame,
35
+ samplesPerPass = 1,
36
+ } = params;
37
+
38
+ const computeFn = Fn( () => {
39
+
40
+ const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
41
+ const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
42
+
43
+ If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
44
+
45
+ const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
46
+ const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
47
+ const seed = pcgHash( { state: uint( pixelIndex ).add( uint( 1 ) ) } ).toVar();
48
+
49
+ // Center-pixel primary ray (no AA jitter — debug viz wants a stable, sharp image).
50
+ const screenPosition = pixelCoord.div( resolution ).mul( 2.0 ).sub( 1.0 ).toVar();
51
+ screenPosition.y.assign( screenPosition.y.negate() );
52
+
53
+ const ray = Ray.wrap( generateRayFromCamera(
54
+ screenPosition, seed,
55
+ cameraWorldMatrix, cameraProjectionMatrixInverse,
56
+ enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
57
+ ) );
58
+
59
+ const color = vec4( 1.0, 0.0, 1.0, 1.0 ).toVar();
60
+
61
+ // Mode 9: visualize the stratified AA-jitter pattern (R,G = jitter).
62
+ If( visMode.equal( int( 9 ) ), () => {
63
+
64
+ // Use the real per-frame sample count so >1 SPP shows the stratified lattice (totalRays≤1 → plain random).
65
+ const jitter = getStratifiedSample( pixelCoord, int( 0 ), int( samplesPerPass ), seed, resolution, frame );
66
+ color.assign( vec4( jitter, 1.0, 1.0 ) );
67
+
68
+ } ).Else( () => {
69
+
70
+ // Modes 1-8, 10 — shared per-mode debug color (primary-ray trace + counters).
71
+ color.assign( TraceDebugMode(
72
+ ray.origin, ray.direction,
73
+ bvhBuffer, triangleBuffer, materialBuffer,
74
+ envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
75
+ visMode, debugVisScale,
76
+ pixelCoord, resolution,
77
+ albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
78
+ cameraProjectionMatrix, cameraViewMatrix,
79
+ frame,
80
+ ) );
81
+
82
+ } );
83
+
84
+ const uintCoord = uvec2( uint( gx ), uint( gy ) );
85
+ textureStore( writeColorTex, uintCoord, color ).toWriteOnly();
86
+ // Benign MRT so the denoiser/display never read stale normal/albedo on a debug frame.
87
+ textureStore( writeNDTex, uintCoord, vec4( 0.5, 0.5, 1.0, 1.0 ) ).toWriteOnly();
88
+ textureStore( writeAlbedoTex, uintCoord, vec4( color.xyz, 1.0 ) ).toWriteOnly();
89
+
90
+ } );
91
+
92
+ } );
93
+
94
+ return computeFn;
95
+
96
+ }
97
+
98
+ export { WG_SIZE as DEBUG_WG_SIZE };
@@ -1,4 +1,4 @@
1
- import { Fn, wgslFn, vec2, vec4, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
1
+ import { Fn, wgslFn, vec2, vec4, ivec2, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
2
2
 
3
3
  import { REC709_LUMINANCE_COEFFICIENTS } from './Common.js';
4
4
 
@@ -74,7 +74,7 @@ export const sampleEquirect = Fn( ( [ environment, direction, environmentMatrix,
74
74
  // Exact implementation from three-gpu-pathtracer
75
75
  export const sampleEquirectProbability = Fn( ( [
76
76
  environment,
77
- envCDFBuffer,
77
+ envCDFTexture,
78
78
  environmentMatrix,
79
79
  environmentIntensity,
80
80
  envTotalSum,
@@ -84,9 +84,8 @@ export const sampleEquirectProbability = Fn( ( [
84
84
  colorOutput
85
85
  ] ) => {
86
86
 
87
- // Packed CDF layout: [marginal (envResolution.y floats) | conditional (envResolution.x * envResolution.y floats)]
88
- // The conditional offset equals the marginal length, which is envResolution.y.
89
- const condOffset = int( envResolution.y ).toVar();
87
+ // CDF texture layout: (W+1)×H R32F conditional[cy*W+cx] at texel (cx,cy); marginal[cy] at column W.
88
+ const cdfMarginalCol = int( envResolution.x ).toVar();
90
89
 
91
90
  // Sample marginal CDF for V coordinate (1D, linear interpolation)
92
91
  const marginalSize = envResolution.y;
@@ -94,7 +93,11 @@ export const sampleEquirectProbability = Fn( ( [
94
93
  const mI0 = int( floor( mIdx ) );
95
94
  const mI1 = min( mI0.add( 1 ), int( marginalSize ).sub( 1 ) );
96
95
  const mFrac = fract( mIdx );
97
- const v = mix( envCDFBuffer.element( mI0 ), envCDFBuffer.element( mI1 ), mFrac ).toVar();
96
+ const v = mix(
97
+ envCDFTexture.load( ivec2( cdfMarginalCol, mI0 ) ).x,
98
+ envCDFTexture.load( ivec2( cdfMarginalCol, mI1 ) ).x,
99
+ mFrac,
100
+ ).toVar();
98
101
 
99
102
  // Sample conditional CDF for U coordinate (2D grid, bilinear interpolation)
100
103
  const condW = envResolution.x;
@@ -107,11 +110,10 @@ export const sampleEquirectProbability = Fn( ( [
107
110
  const cy1 = min( cy0.add( 1 ), int( condH ).sub( 1 ) );
108
111
  const fx = fract( cxf );
109
112
  const fy = fract( cyf );
110
- const condWi = int( condW );
111
- const v00 = envCDFBuffer.element( condOffset.add( cy0.mul( condWi ).add( cx0 ) ) );
112
- const v10 = envCDFBuffer.element( condOffset.add( cy0.mul( condWi ).add( cx1 ) ) );
113
- const v01 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx0 ) ) );
114
- const v11 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx1 ) ) );
113
+ const v00 = envCDFTexture.load( ivec2( cx0, cy0 ) ).x;
114
+ const v10 = envCDFTexture.load( ivec2( cx1, cy0 ) ).x;
115
+ const v01 = envCDFTexture.load( ivec2( cx0, cy1 ) ).x;
116
+ const v11 = envCDFTexture.load( ivec2( cx1, cy1 ) ).x;
115
117
  const u = mix( mix( v00, v10, fx ), mix( v01, v11, fx ), fy ).toVar();
116
118
 
117
119
  const uv = vec2( u, v ).toVar();
@@ -0,0 +1,75 @@
1
+ /**
2
+ * ExtendKernel.js — wavefront BVH traversal (256×1, 1D ray-parallel dispatch).
3
+ */
4
+
5
+ import {
6
+ Fn, uint,
7
+ If,
8
+ instanceIndex,
9
+ atomicLoad,
10
+ Return,
11
+ } from 'three/tsl';
12
+
13
+ import { traverseBVH } from './BVHTraversal.js';
14
+ import { Ray, HitInfo } from './Struct.js';
15
+ import {
16
+ readRayOrigin, readRayDirection, readMediumStack,
17
+ writeHitPacked,
18
+ } from '../Processor/PackedRayBuffer.js';
19
+ import { COUNTER } from '../Processor/QueueManager.js';
20
+
21
+ const WG_SIZE = 256;
22
+
23
+ export function buildExtendKernel( params ) {
24
+
25
+ const {
26
+ bvhBuffer, triangleBuffer, materialBuffer,
27
+ rayBufferRO,
28
+ hitBufferRW,
29
+ activeIndicesRO,
30
+ counters,
31
+ maxRayCount,
32
+ } = params;
33
+
34
+ const computeFn = Fn( () => {
35
+
36
+ const threadIdx = instanceIndex;
37
+
38
+ // kernels bound on ENTERING_COUNT so an over-sized (margin) dispatch is safe.
39
+ const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : maxRayCount;
40
+ If( threadIdx.greaterThanEqual( bound ), () => {
41
+
42
+ Return();
43
+
44
+ } );
45
+
46
+ const rayID = activeIndicesRO.element( threadIdx );
47
+
48
+ const origin = readRayOrigin( rayBufferRO, rayID ).toVar();
49
+ const direction = readRayDirection( rayBufferRO, rayID ).toVar();
50
+
51
+ const ray = Ray( { origin, direction } );
52
+
53
+ // insideMedium bypasses front/back culling so the ray can hit a glass/SSS back-facing boundary.
54
+ const insideMedium = readMediumStack( rayBufferRO, rayID ).stackDepth.greaterThan( uint( 0 ) );
55
+ const hitInfo = HitInfo.wrap( traverseBVH(
56
+ ray, bvhBuffer, triangleBuffer, insideMedium,
57
+ ) ).toVar();
58
+
59
+ writeHitPacked(
60
+ hitBufferRW, rayID,
61
+ hitInfo.dst,
62
+ uint( hitInfo.triangleIndex ),
63
+ hitInfo.uv.x, hitInfo.uv.y,
64
+ hitInfo.normal,
65
+ uint( hitInfo.materialIndex ),
66
+ uint( hitInfo.meshIndex ),
67
+ );
68
+
69
+ } );
70
+
71
+ return computeFn;
72
+
73
+ }
74
+
75
+ export { WG_SIZE as EXTEND_WG_SIZE };
@@ -0,0 +1,121 @@
1
+ /**
2
+ * FinalWriteKernel.js — wavefront final output: temporal accumulation + MRT StorageTexture writes (16×16, 2D).
3
+ */
4
+
5
+ import {
6
+ Fn, wgslFn, float, vec2, vec4, int, uint, uvec2,
7
+ If, mix, select, texture, textureStore,
8
+ localId, workgroupId,
9
+ } from 'three/tsl';
10
+
11
+ import {
12
+ readRayRadiance, readGBuffer, gbDecodeNormalDepth, gbDecodeAlbedo,
13
+ } from '../Processor/PackedRayBuffer.js';
14
+
15
+ const WG_SIZE = 16;
16
+
17
+ // Debug mode 11: NaN/Inf detector — red where the accumulated color is NaN/Inf, black elsewhere.
18
+ const nanInfToRed = /*@__PURE__*/ wgslFn( `
19
+ fn nanInfToRed( c: vec3f ) -> vec3f {
20
+ let isNan = c.x != c.x || c.y != c.y || c.z != c.z;
21
+ let isInf = abs( c.x ) > 1e30f || abs( c.y ) > 1e30f || abs( c.z ) > 1e30f;
22
+ if ( isNan || isInf ) { return vec3f( 1.0f, 0.0f, 0.0f ); }
23
+ return vec3f( 0.0f );
24
+ }
25
+ ` );
26
+
27
+ export function buildFinalWriteKernel( params ) {
28
+
29
+ const {
30
+ rayBufferRO, gBufferRO,
31
+ writeColorTex, writeNDTex, writeAlbedoTex,
32
+ resolution, frame,
33
+ enableAccumulation, hasPreviousAccumulated, accumulationAlpha, cameraIsMoving,
34
+ transparentBackground,
35
+ prevAccumTexture, prevNormalDepthTexture, prevAlbedoTexture,
36
+ renderWidth, renderHeight,
37
+ // Multi-sample: average S sample-slots per pixel (slot = pixel + k*w*h, w*h from the resolution uniform).
38
+ samplesPerPass = 1,
39
+ visMode,
40
+ } = params;
41
+
42
+ const S = samplesPerPass | 0;
43
+
44
+ const computeFn = Fn( () => {
45
+
46
+ const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
47
+ const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
48
+
49
+ If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
50
+
51
+ const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
52
+ const rayID = uint( pixelIndex );
53
+
54
+ // Average the S sub-samples; MRT (normal/depth/albedo) from sub-sample 0.
55
+ const sampleColor = ( () => {
56
+
57
+ if ( S <= 1 ) return readRayRadiance( rayBufferRO, rayID );
58
+ const acc = readRayRadiance( rayBufferRO, rayID ).toVar();
59
+ const mrps = uint( resolution.x ).mul( uint( resolution.y ) ).toVar(); // w*h from the resolution uniform, not baked
60
+ for ( let k = 1; k < S; k ++ ) {
61
+
62
+ acc.addAssign( readRayRadiance( rayBufferRO, rayID.add( uint( k ).mul( mrps ) ) ) );
63
+
64
+ }
65
+
66
+ acc.assign( acc.div( float( S ) ) );
67
+ return acc;
68
+
69
+ } )();
70
+ // MRT comes from the per-pixel G-buffer (rayID == pixelIndex here, i.e. sub-sample 0). Half-packed: decode.
71
+ const gbuf = readGBuffer( gBufferRO, rayID );
72
+ const normalDepth = gbDecodeNormalDepth( gbuf );
73
+ const albedoID = vec4( gbDecodeAlbedo( gbuf ), 0.0 );
74
+
75
+ const finalColor = sampleColor.xyz.toVar();
76
+ const finalNormalDepth = normalDepth.toVar();
77
+ const finalAlbedo = albedoID.xyz.toVar();
78
+ const outputAlpha = select( transparentBackground, sampleColor.w, float( 1.0 ) ).toVar();
79
+
80
+ const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
81
+ const prevUV = pixelCoord.div( resolution );
82
+
83
+ // visMode 11 (NaN/Inf) bypasses accumulation (megakernel parity main_TSL_PathTracer.js:355) so the
84
+ // detector runs on each frame's fresh color — else mix() propagates a transient NaN and it stays red forever.
85
+ If( enableAccumulation.and( cameraIsMoving.not() ).and( frame.greaterThan( uint( 0 ) ) ).and( hasPreviousAccumulated ).and( visMode.notEqual( int( 11 ) ) ), () => {
86
+
87
+ const prevAccumSample = texture( prevAccumTexture, prevUV, 0 ).toVar();
88
+
89
+ finalColor.assign( mix( prevAccumSample.xyz, sampleColor.xyz, accumulationAlpha ) );
90
+ finalNormalDepth.assign( mix( texture( prevNormalDepthTexture, prevUV, 0 ), finalNormalDepth, accumulationAlpha ) );
91
+ finalAlbedo.assign( mix( texture( prevAlbedoTexture, prevUV, 0 ).xyz, finalAlbedo, accumulationAlpha ) );
92
+
93
+ If( transparentBackground, () => {
94
+
95
+ outputAlpha.assign( mix( prevAccumSample.w, sampleColor.w, accumulationAlpha ) );
96
+
97
+ } );
98
+
99
+ } );
100
+
101
+ // Debug mode 11: flag NaN/Inf on the accumulated color (red on NaN/Inf, black elsewhere).
102
+ If( visMode.equal( int( 11 ) ), () => {
103
+
104
+ finalColor.assign( nanInfToRed( finalColor ) );
105
+
106
+ } );
107
+
108
+ const uintCoord = uvec2( uint( gx ), uint( gy ) );
109
+ textureStore( writeColorTex, uintCoord, vec4( finalColor, outputAlpha ) ).toWriteOnly();
110
+ textureStore( writeNDTex, uintCoord, finalNormalDepth ).toWriteOnly();
111
+ textureStore( writeAlbedoTex, uintCoord, vec4( finalAlbedo, 1.0 ) ).toWriteOnly();
112
+
113
+ } );
114
+
115
+ } );
116
+
117
+ return computeFn;
118
+
119
+ }
120
+
121
+ export { WG_SIZE as FINALWRITE_WG_SIZE };