rayzee 6.5.0 → 7.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -5
- package/dist/rayzee.es.js +7624 -7063
- package/dist/rayzee.es.js.map +1 -1
- package/dist/rayzee.umd.js +157 -236
- package/dist/rayzee.umd.js.map +1 -1
- package/package.json +1 -1
- package/src/EngineDefaults.js +26 -9
- package/src/PathTracerApp.js +118 -26
- package/src/Pipeline/PipelineContext.js +1 -2
- package/src/Pipeline/RenderPipeline.js +1 -1
- package/src/Pipeline/RenderStage.js +1 -1
- package/src/Processor/CameraOptimizer.js +0 -5
- package/src/Processor/GeometryExtractor.js +6 -0
- package/src/Processor/KernelManager.js +277 -0
- package/src/Processor/PackedRayBuffer.js +291 -0
- package/src/Processor/QueueManager.js +173 -0
- package/src/Processor/SceneProcessor.js +1 -0
- package/src/Processor/ShaderBuilder.js +11 -317
- package/src/Processor/StorageTexturePool.js +29 -15
- package/src/Processor/VRAMTracker.js +169 -0
- package/src/Processor/utils.js +11 -110
- package/src/RenderSettings.js +0 -3
- package/src/Stages/ASVGF.js +151 -78
- package/src/Stages/BilateralFilter.js +34 -10
- package/src/Stages/EdgeFilter.js +2 -3
- package/src/Stages/MotionVector.js +16 -9
- package/src/Stages/NormalDepth.js +17 -5
- package/src/Stages/PathTracer.js +671 -1456
- package/src/Stages/PathTracerStage.js +1451 -0
- package/src/Stages/SSRC.js +32 -15
- package/src/Stages/Variance.js +35 -12
- package/src/TSL/CompactKernel.js +110 -0
- package/src/TSL/DebugKernel.js +98 -0
- package/src/TSL/Environment.js +13 -11
- package/src/TSL/ExtendKernel.js +75 -0
- package/src/TSL/FinalWriteKernel.js +121 -0
- package/src/TSL/GenerateKernel.js +111 -0
- package/src/TSL/LightsSampling.js +2 -2
- package/src/TSL/PathTracerCore.js +43 -1039
- package/src/TSL/ShadeKernel.js +876 -0
- package/src/TSL/patches.js +81 -4
- package/src/index.js +3 -0
- package/src/managers/CameraManager.js +1 -1
- package/src/managers/DenoisingManager.js +40 -75
- package/src/managers/EnvironmentManager.js +30 -39
- package/src/managers/OverlayManager.js +7 -22
- package/src/managers/UniformManager.js +0 -3
- package/src/managers/helpers/TileHelper.js +2 -2
- package/src/Stages/AdaptiveSampling.js +0 -483
- package/src/TSL/PathTracer.js +0 -384
- package/src/managers/TileManager.js +0 -298
package/src/Stages/SSRC.js
CHANGED
|
@@ -12,10 +12,11 @@
|
|
|
12
12
|
// Textures read: pathtracer:color, pathtracer:normalDepth, motionVector:screenSpace
|
|
13
13
|
|
|
14
14
|
import { uniform } from 'three/tsl';
|
|
15
|
-
import { StorageTexture, TextureNode } from 'three/webgpu';
|
|
16
|
-
import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter } from 'three';
|
|
15
|
+
import { StorageTexture, TextureNode, RenderTarget } from 'three/webgpu';
|
|
16
|
+
import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter, Box2, Vector2 } from 'three';
|
|
17
17
|
import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
|
|
18
18
|
import { buildTemporalPass, buildSpatialPass } from '../TSL/SSRC.js';
|
|
19
|
+
import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
|
|
19
20
|
|
|
20
21
|
export class SSRC extends RenderStage {
|
|
21
22
|
|
|
@@ -51,18 +52,30 @@ export class SSRC extends RenderStage {
|
|
|
51
52
|
this._readPass1CacheTexNode = new TextureNode(); // current cache (for spatial pass)
|
|
52
53
|
|
|
53
54
|
// ─── StorageTextures (5 total) ───
|
|
54
|
-
|
|
55
|
+
// StorageTextures stay at max alloc — see resize crash fix (three.js #33061).
|
|
56
|
+
const w = 1, h = 1; // RTs/uniforms resized on first render
|
|
55
57
|
|
|
56
58
|
// Ping-pong temporal cache: .rgb = radiance, .w = history count
|
|
57
|
-
this._cacheTexA = this._createStorageTex(
|
|
58
|
-
this._cacheTexB = this._createStorageTex(
|
|
59
|
+
this._cacheTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
60
|
+
this._cacheTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
59
61
|
|
|
60
62
|
// Ping-pong previous-frame normalDepth (for edge-stopping in temporal pass)
|
|
61
|
-
this._prevNDTexA = this._createStorageTex(
|
|
62
|
-
this._prevNDTexB = this._createStorageTex(
|
|
63
|
+
this._prevNDTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
64
|
+
this._prevNDTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
63
65
|
|
|
64
66
|
// Final output (LinearFilter for Display fragment shader sampling)
|
|
65
|
-
this._outputTex = this._createStorageTex(
|
|
67
|
+
this._outputTex = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, LinearFilter );
|
|
68
|
+
|
|
69
|
+
// Active-region copy target — published downstream (storage tex is over-allocated)
|
|
70
|
+
this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
|
|
71
|
+
this.outputTarget = new RenderTarget( w, h, {
|
|
72
|
+
type: HalfFloatType,
|
|
73
|
+
format: RGBAFormat,
|
|
74
|
+
minFilter: LinearFilter,
|
|
75
|
+
magFilter: LinearFilter,
|
|
76
|
+
depthBuffer: false,
|
|
77
|
+
stencilBuffer: false
|
|
78
|
+
} );
|
|
66
79
|
|
|
67
80
|
// ─── State ───
|
|
68
81
|
this._currentPingPong = 0; // 0: read B, write A; 1: read A, write B
|
|
@@ -99,7 +112,7 @@ export class SSRC extends RenderStage {
|
|
|
99
112
|
if ( colorTex?.image ) {
|
|
100
113
|
|
|
101
114
|
const { width, height } = colorTex.image;
|
|
102
|
-
if ( width !== this.
|
|
115
|
+
if ( width !== this.outputTarget.width || height !== this.outputTarget.height ) {
|
|
103
116
|
|
|
104
117
|
this.setSize( width, height );
|
|
105
118
|
|
|
@@ -145,8 +158,13 @@ export class SSRC extends RenderStage {
|
|
|
145
158
|
// Advance frames-since-reset counter (capped to avoid overflow)
|
|
146
159
|
this._framesSinceReset.value = Math.min( this._framesSinceReset.value + 1, 9999 );
|
|
147
160
|
|
|
161
|
+
// Copy active region out of the over-allocated StorageTexture into the
|
|
162
|
+
// right-sized RenderTarget; downstream stages UV-sample the latter.
|
|
163
|
+
this._srcRegion.max.set( this.outputTarget.width, this.outputTarget.height );
|
|
164
|
+
this.renderer.copyTextureToTexture( this._outputTex, this.outputTarget.texture, this._srcRegion );
|
|
165
|
+
|
|
148
166
|
// Publish final output
|
|
149
|
-
context.setTexture( 'ssrc:output', this.
|
|
167
|
+
context.setTexture( 'ssrc:output', this.outputTarget.texture );
|
|
150
168
|
|
|
151
169
|
// Advance ping-pong
|
|
152
170
|
this._currentPingPong = 1 - this._currentPingPong;
|
|
@@ -163,11 +181,9 @@ export class SSRC extends RenderStage {
|
|
|
163
181
|
|
|
164
182
|
if ( width < 1 || height < 1 ) return;
|
|
165
183
|
|
|
166
|
-
|
|
167
|
-
this.
|
|
168
|
-
this.
|
|
169
|
-
this._prevNDTexB.setSize( width, height );
|
|
170
|
-
this._outputTex.setSize( width, height );
|
|
184
|
+
// StorageTextures stay at their max allocation (see constructor).
|
|
185
|
+
this.outputTarget.setSize( width, height );
|
|
186
|
+
this.outputTarget.texture.needsUpdate = true;
|
|
171
187
|
|
|
172
188
|
this.resW.value = width;
|
|
173
189
|
this.resH.value = height;
|
|
@@ -194,6 +210,7 @@ export class SSRC extends RenderStage {
|
|
|
194
210
|
this._prevNDTexA.dispose();
|
|
195
211
|
this._prevNDTexB.dispose();
|
|
196
212
|
this._outputTex.dispose();
|
|
213
|
+
this.outputTarget?.dispose();
|
|
197
214
|
this._colorTexNode?.dispose();
|
|
198
215
|
this._ndTexNode?.dispose();
|
|
199
216
|
this._motionTexNode?.dispose();
|
package/src/Stages/Variance.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { Fn, wgslFn, float, int, uint, ivec2, uvec2, uniform, If, max,
|
|
2
2
|
textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
|
|
3
|
-
import { TextureNode, StorageTexture } from 'three/webgpu';
|
|
4
|
-
import { FloatType, RGBAFormat, LinearFilter } from 'three';
|
|
3
|
+
import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
|
|
4
|
+
import { FloatType, RGBAFormat, LinearFilter, Box2, Vector2 } from 'three';
|
|
5
5
|
import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
|
|
6
6
|
import { luminance } from '../TSL/Common.js';
|
|
7
|
+
import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
|
|
7
8
|
|
|
8
9
|
// ── wgslFn helpers ──────────────────────────────────────────
|
|
9
10
|
|
|
@@ -40,8 +41,7 @@ const temporalAccumulate = /*@__PURE__*/ wgslFn( `
|
|
|
40
41
|
* WebGPU Variance Estimation Stage (Compute Shader)
|
|
41
42
|
*
|
|
42
43
|
* Computes temporal and spatial variance from the path tracer output.
|
|
43
|
-
* Used by
|
|
44
|
-
* BilateralFilter for variance-guided filtering.
|
|
44
|
+
* Used by BilateralFilter for variance-guided filtering.
|
|
45
45
|
*
|
|
46
46
|
* Uses compute shader with workgroup shared memory for the 3×3
|
|
47
47
|
* spatial variance computation. Each 8×8 workgroup loads a 10×10
|
|
@@ -98,18 +98,34 @@ export class Variance extends RenderStage {
|
|
|
98
98
|
// FloatType (f32) required — HalfFloat's ~3.3 decimal digits cause catastrophic
|
|
99
99
|
// cancellation in (meanSq - mean²) for converged pixels, producing a variance
|
|
100
100
|
// floor of ~0.0001 that the (frame+1)² scaling amplifies to enormous values.
|
|
101
|
-
|
|
101
|
+
// StorageTextures over-allocated at max — defensive against three.js #33061
|
|
102
|
+
// (TSL compute pipeline keeps stale GPUTextureView after StorageTexture.setSize).
|
|
103
|
+
this._storageTexA = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
|
|
102
104
|
this._storageTexA.type = FloatType;
|
|
103
105
|
this._storageTexA.format = RGBAFormat;
|
|
104
106
|
this._storageTexA.minFilter = LinearFilter;
|
|
105
107
|
this._storageTexA.magFilter = LinearFilter;
|
|
106
108
|
|
|
107
|
-
this._storageTexB = new StorageTexture(
|
|
109
|
+
this._storageTexB = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
|
|
108
110
|
this._storageTexB.type = FloatType;
|
|
109
111
|
this._storageTexB.format = RGBAFormat;
|
|
110
112
|
this._storageTexB.minFilter = LinearFilter;
|
|
111
113
|
this._storageTexB.magFilter = LinearFilter;
|
|
112
114
|
|
|
115
|
+
this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
|
|
116
|
+
|
|
117
|
+
// Right-sized RenderTarget published to downstream (UV-sampled). The
|
|
118
|
+
// over-allocated StorageTexture itself must not be published — UV
|
|
119
|
+
// sampling a 2048 texture would read the wrong region.
|
|
120
|
+
this._outputTarget = new RenderTarget( w, h, {
|
|
121
|
+
type: FloatType,
|
|
122
|
+
format: RGBAFormat,
|
|
123
|
+
minFilter: LinearFilter,
|
|
124
|
+
magFilter: LinearFilter,
|
|
125
|
+
depthBuffer: false,
|
|
126
|
+
stencilBuffer: false
|
|
127
|
+
} );
|
|
128
|
+
|
|
113
129
|
this.currentMoments = 0; // 0 = write A, read B; 1 = write B, read A
|
|
114
130
|
this._compiled = false;
|
|
115
131
|
this._needsWarmReset = false;
|
|
@@ -275,8 +291,8 @@ export class Variance extends RenderStage {
|
|
|
275
291
|
const img = colorTex.image;
|
|
276
292
|
if ( img && img.width > 0 && img.height > 0 ) {
|
|
277
293
|
|
|
278
|
-
if ( img.width !== this.
|
|
279
|
-
img.height !== this.
|
|
294
|
+
if ( img.width !== this._outputTarget.width ||
|
|
295
|
+
img.height !== this._outputTarget.height ) {
|
|
280
296
|
|
|
281
297
|
this.setSize( img.width, img.height );
|
|
282
298
|
|
|
@@ -333,8 +349,13 @@ export class Variance extends RenderStage {
|
|
|
333
349
|
// Swap for next frame
|
|
334
350
|
this.currentMoments = 1 - this.currentMoments;
|
|
335
351
|
|
|
336
|
-
//
|
|
337
|
-
|
|
352
|
+
// Copy the active region out of the over-allocated StorageTexture into the
|
|
353
|
+
// right-sized RenderTarget; downstream stages UV-sample the latter.
|
|
354
|
+
this._srcRegion.max.set( this._outputTarget.width, this._outputTarget.height );
|
|
355
|
+
this.renderer.copyTextureToTexture( writeTarget, this._outputTarget.texture, this._srcRegion );
|
|
356
|
+
|
|
357
|
+
// Publish the RenderTarget (not the over-allocated StorageTexture)
|
|
358
|
+
context.setTexture( 'variance:output', this._outputTarget.texture );
|
|
338
359
|
|
|
339
360
|
}
|
|
340
361
|
|
|
@@ -352,8 +373,9 @@ export class Variance extends RenderStage {
|
|
|
352
373
|
|
|
353
374
|
setSize( width, height ) {
|
|
354
375
|
|
|
355
|
-
|
|
356
|
-
this.
|
|
376
|
+
// StorageTextures stay at their max allocation (see constructor).
|
|
377
|
+
this._outputTarget.setSize( width, height );
|
|
378
|
+
this._outputTarget.texture.needsUpdate = true;
|
|
357
379
|
this.resW.value = width;
|
|
358
380
|
this.resH.value = height;
|
|
359
381
|
|
|
@@ -371,6 +393,7 @@ export class Variance extends RenderStage {
|
|
|
371
393
|
this._computeNodeB?.dispose();
|
|
372
394
|
this._storageTexA?.dispose();
|
|
373
395
|
this._storageTexB?.dispose();
|
|
396
|
+
this._outputTarget?.dispose();
|
|
374
397
|
this._colorTexNode?.dispose();
|
|
375
398
|
this._readTexNodeA?.dispose();
|
|
376
399
|
this._readTexNodeB?.dispose();
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CompactKernel.js — wavefront stream compaction: active rays → dense index array for next bounce (256×1, 1D).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Fn, uint, select,
|
|
7
|
+
If,
|
|
8
|
+
instanceIndex,
|
|
9
|
+
atomicAdd, atomicLoad,
|
|
10
|
+
subgroupExclusiveAdd, subgroupAdd, subgroupBroadcast,
|
|
11
|
+
Return,
|
|
12
|
+
} from 'three/tsl';
|
|
13
|
+
|
|
14
|
+
import { readRayBounceFlags } from '../Processor/PackedRayBuffer.js';
|
|
15
|
+
import { RAY_FLAG, COUNTER } from '../Processor/QueueManager.js';
|
|
16
|
+
|
|
17
|
+
const WG_SIZE = 256;
|
|
18
|
+
|
|
19
|
+
export function buildCompactKernel( params ) {
|
|
20
|
+
|
|
21
|
+
const {
|
|
22
|
+
rayBufferRO,
|
|
23
|
+
activeIndicesReadRO,
|
|
24
|
+
activeIndicesWriteRW,
|
|
25
|
+
counters,
|
|
26
|
+
currentActiveCount,
|
|
27
|
+
} = params;
|
|
28
|
+
|
|
29
|
+
const computeFn = Fn( () => {
|
|
30
|
+
|
|
31
|
+
const threadIdx = instanceIndex;
|
|
32
|
+
|
|
33
|
+
// ACTIVE_RAY_COUNT is zeroed before compact, so the dense-list length comes from ENTERING_COUNT.
|
|
34
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
|
|
35
|
+
If( threadIdx.greaterThanEqual( bound ), () => {
|
|
36
|
+
|
|
37
|
+
Return();
|
|
38
|
+
|
|
39
|
+
} );
|
|
40
|
+
|
|
41
|
+
const rayID = activeIndicesReadRO.element( threadIdx );
|
|
42
|
+
|
|
43
|
+
const flags = readRayBounceFlags( rayBufferRO, rayID );
|
|
44
|
+
|
|
45
|
+
If( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ), () => {
|
|
46
|
+
|
|
47
|
+
const writeIdx = atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), uint( 1 ) );
|
|
48
|
+
activeIndicesWriteRW.element( writeIdx ).assign( rayID );
|
|
49
|
+
|
|
50
|
+
} );
|
|
51
|
+
|
|
52
|
+
} );
|
|
53
|
+
|
|
54
|
+
return computeFn;
|
|
55
|
+
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Subgroup prefix-sum compaction: one global atomicAdd per subgroup instead of per survivor.
|
|
60
|
+
* Requires renderer.hasFeature('subgroups'); control flow must stay uniform (no divergent Return).
|
|
61
|
+
*/
|
|
62
|
+
export function buildCompactSubgroupKernel( params ) {
|
|
63
|
+
|
|
64
|
+
const {
|
|
65
|
+
rayBufferRO,
|
|
66
|
+
activeIndicesReadRO,
|
|
67
|
+
activeIndicesWriteRW,
|
|
68
|
+
counters,
|
|
69
|
+
currentActiveCount,
|
|
70
|
+
} = params;
|
|
71
|
+
|
|
72
|
+
const computeFn = Fn( () => {
|
|
73
|
+
|
|
74
|
+
const threadIdx = instanceIndex;
|
|
75
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
|
|
76
|
+
|
|
77
|
+
// No early Return: all lanes must reach the subgroup ops; out-of-range lanes contribute 0 and read stale-but-in-capacity slots.
|
|
78
|
+
const inRange = threadIdx.lessThan( bound );
|
|
79
|
+
const rayID = activeIndicesReadRO.element( threadIdx );
|
|
80
|
+
const flags = readRayBounceFlags( rayBufferRO, rayID );
|
|
81
|
+
const isActive = inRange.and( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ) );
|
|
82
|
+
const activeU = select( isActive, uint( 1 ), uint( 0 ) );
|
|
83
|
+
|
|
84
|
+
// .toVar() materializes the subgroup ops at uniform control flow; inlining into the divergent If(isActive) write is rejected by WGSL.
|
|
85
|
+
const localOffset = subgroupExclusiveAdd( activeU ).toVar();
|
|
86
|
+
const sgCount = subgroupAdd( activeU ).toVar();
|
|
87
|
+
|
|
88
|
+
// laneId via exclusiveAdd(1) since TSL lacks subgroup_invocation_id; lane 0 does the single per-subgroup atomicAdd.
|
|
89
|
+
const laneId = subgroupExclusiveAdd( uint( 1 ) ).toVar();
|
|
90
|
+
const base = uint( 0 ).toVar();
|
|
91
|
+
If( laneId.equal( uint( 0 ) ), () => {
|
|
92
|
+
|
|
93
|
+
base.assign( atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), sgCount ) );
|
|
94
|
+
|
|
95
|
+
} );
|
|
96
|
+
const sgBase = subgroupBroadcast( base, uint( 0 ) ).toVar();
|
|
97
|
+
|
|
98
|
+
If( isActive, () => {
|
|
99
|
+
|
|
100
|
+
activeIndicesWriteRW.element( sgBase.add( localOffset ) ).assign( rayID );
|
|
101
|
+
|
|
102
|
+
} );
|
|
103
|
+
|
|
104
|
+
} );
|
|
105
|
+
|
|
106
|
+
return computeFn;
|
|
107
|
+
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export { WG_SIZE as COMPACT_WG_SIZE };
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DebugKernel.js — wavefront debug visualization (16×16, 2D screen-space dispatch).
|
|
3
|
+
*
|
|
4
|
+
* Single-pass primary-ray debug viz for visMode 1-10 (mode 11 = NaN/Inf is a FinalWrite
|
|
5
|
+
* post-branch on the accumulated color, handled there). Generates a camera ray per pixel and
|
|
6
|
+
* delegates to the renderer-agnostic TraceDebugMode for the per-mode color; mode 9 (stratified
|
|
7
|
+
* sample pattern) is computed inline. Writes the color directly to the output (no accumulation).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
Fn, float, vec2, vec4, int, uint, uvec2,
|
|
12
|
+
If, textureStore,
|
|
13
|
+
localId, workgroupId,
|
|
14
|
+
} from 'three/tsl';
|
|
15
|
+
|
|
16
|
+
import { generateRayFromCamera } from './BVHTraversal.js';
|
|
17
|
+
import { Ray } from './Struct.js';
|
|
18
|
+
import { TraceDebugMode } from './Debugger.js';
|
|
19
|
+
import { pcgHash, getStratifiedSample } from './Random.js';
|
|
20
|
+
|
|
21
|
+
const WG_SIZE = 16;
|
|
22
|
+
|
|
23
|
+
export function buildDebugKernel( params ) {
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
writeColorTex, writeNDTex, writeAlbedoTex,
|
|
27
|
+
resolution, renderWidth, renderHeight,
|
|
28
|
+
cameraWorldMatrix, cameraProjectionMatrixInverse, cameraProjectionMatrix, cameraViewMatrix,
|
|
29
|
+
enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
|
|
30
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
31
|
+
envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
|
|
32
|
+
visMode, debugVisScale,
|
|
33
|
+
albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
|
|
34
|
+
frame,
|
|
35
|
+
samplesPerPass = 1,
|
|
36
|
+
} = params;
|
|
37
|
+
|
|
38
|
+
const computeFn = Fn( () => {
|
|
39
|
+
|
|
40
|
+
const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
|
|
41
|
+
const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
|
|
42
|
+
|
|
43
|
+
If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
|
|
44
|
+
|
|
45
|
+
const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
|
|
46
|
+
const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
|
|
47
|
+
const seed = pcgHash( { state: uint( pixelIndex ).add( uint( 1 ) ) } ).toVar();
|
|
48
|
+
|
|
49
|
+
// Center-pixel primary ray (no AA jitter — debug viz wants a stable, sharp image).
|
|
50
|
+
const screenPosition = pixelCoord.div( resolution ).mul( 2.0 ).sub( 1.0 ).toVar();
|
|
51
|
+
screenPosition.y.assign( screenPosition.y.negate() );
|
|
52
|
+
|
|
53
|
+
const ray = Ray.wrap( generateRayFromCamera(
|
|
54
|
+
screenPosition, seed,
|
|
55
|
+
cameraWorldMatrix, cameraProjectionMatrixInverse,
|
|
56
|
+
enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
|
|
57
|
+
) );
|
|
58
|
+
|
|
59
|
+
const color = vec4( 1.0, 0.0, 1.0, 1.0 ).toVar();
|
|
60
|
+
|
|
61
|
+
// Mode 9: visualize the stratified AA-jitter pattern (R,G = jitter).
|
|
62
|
+
If( visMode.equal( int( 9 ) ), () => {
|
|
63
|
+
|
|
64
|
+
// Use the real per-frame sample count so >1 SPP shows the stratified lattice (totalRays≤1 → plain random).
|
|
65
|
+
const jitter = getStratifiedSample( pixelCoord, int( 0 ), int( samplesPerPass ), seed, resolution, frame );
|
|
66
|
+
color.assign( vec4( jitter, 1.0, 1.0 ) );
|
|
67
|
+
|
|
68
|
+
} ).Else( () => {
|
|
69
|
+
|
|
70
|
+
// Modes 1-8, 10 — shared per-mode debug color (primary-ray trace + counters).
|
|
71
|
+
color.assign( TraceDebugMode(
|
|
72
|
+
ray.origin, ray.direction,
|
|
73
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
74
|
+
envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
|
|
75
|
+
visMode, debugVisScale,
|
|
76
|
+
pixelCoord, resolution,
|
|
77
|
+
albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
|
|
78
|
+
cameraProjectionMatrix, cameraViewMatrix,
|
|
79
|
+
frame,
|
|
80
|
+
) );
|
|
81
|
+
|
|
82
|
+
} );
|
|
83
|
+
|
|
84
|
+
const uintCoord = uvec2( uint( gx ), uint( gy ) );
|
|
85
|
+
textureStore( writeColorTex, uintCoord, color ).toWriteOnly();
|
|
86
|
+
// Benign MRT so the denoiser/display never read stale normal/albedo on a debug frame.
|
|
87
|
+
textureStore( writeNDTex, uintCoord, vec4( 0.5, 0.5, 1.0, 1.0 ) ).toWriteOnly();
|
|
88
|
+
textureStore( writeAlbedoTex, uintCoord, vec4( color.xyz, 1.0 ) ).toWriteOnly();
|
|
89
|
+
|
|
90
|
+
} );
|
|
91
|
+
|
|
92
|
+
} );
|
|
93
|
+
|
|
94
|
+
return computeFn;
|
|
95
|
+
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export { WG_SIZE as DEBUG_WG_SIZE };
|
package/src/TSL/Environment.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Fn, wgslFn, vec2, vec4, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
|
|
1
|
+
import { Fn, wgslFn, vec2, vec4, ivec2, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
|
|
2
2
|
|
|
3
3
|
import { REC709_LUMINANCE_COEFFICIENTS } from './Common.js';
|
|
4
4
|
|
|
@@ -74,7 +74,7 @@ export const sampleEquirect = Fn( ( [ environment, direction, environmentMatrix,
|
|
|
74
74
|
// Exact implementation from three-gpu-pathtracer
|
|
75
75
|
export const sampleEquirectProbability = Fn( ( [
|
|
76
76
|
environment,
|
|
77
|
-
|
|
77
|
+
envCDFTexture,
|
|
78
78
|
environmentMatrix,
|
|
79
79
|
environmentIntensity,
|
|
80
80
|
envTotalSum,
|
|
@@ -84,9 +84,8 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
84
84
|
colorOutput
|
|
85
85
|
] ) => {
|
|
86
86
|
|
|
87
|
-
//
|
|
88
|
-
|
|
89
|
-
const condOffset = int( envResolution.y ).toVar();
|
|
87
|
+
// CDF texture layout: (W+1)×H R32F — conditional[cy*W+cx] at texel (cx,cy); marginal[cy] at column W.
|
|
88
|
+
const cdfMarginalCol = int( envResolution.x ).toVar();
|
|
90
89
|
|
|
91
90
|
// Sample marginal CDF for V coordinate (1D, linear interpolation)
|
|
92
91
|
const marginalSize = envResolution.y;
|
|
@@ -94,7 +93,11 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
94
93
|
const mI0 = int( floor( mIdx ) );
|
|
95
94
|
const mI1 = min( mI0.add( 1 ), int( marginalSize ).sub( 1 ) );
|
|
96
95
|
const mFrac = fract( mIdx );
|
|
97
|
-
const v = mix(
|
|
96
|
+
const v = mix(
|
|
97
|
+
envCDFTexture.load( ivec2( cdfMarginalCol, mI0 ) ).x,
|
|
98
|
+
envCDFTexture.load( ivec2( cdfMarginalCol, mI1 ) ).x,
|
|
99
|
+
mFrac,
|
|
100
|
+
).toVar();
|
|
98
101
|
|
|
99
102
|
// Sample conditional CDF for U coordinate (2D grid, bilinear interpolation)
|
|
100
103
|
const condW = envResolution.x;
|
|
@@ -107,11 +110,10 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
107
110
|
const cy1 = min( cy0.add( 1 ), int( condH ).sub( 1 ) );
|
|
108
111
|
const fx = fract( cxf );
|
|
109
112
|
const fy = fract( cyf );
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const
|
|
113
|
-
const
|
|
114
|
-
const v11 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx1 ) ) );
|
|
113
|
+
const v00 = envCDFTexture.load( ivec2( cx0, cy0 ) ).x;
|
|
114
|
+
const v10 = envCDFTexture.load( ivec2( cx1, cy0 ) ).x;
|
|
115
|
+
const v01 = envCDFTexture.load( ivec2( cx0, cy1 ) ).x;
|
|
116
|
+
const v11 = envCDFTexture.load( ivec2( cx1, cy1 ) ).x;
|
|
115
117
|
const u = mix( mix( v00, v10, fx ), mix( v01, v11, fx ), fy ).toVar();
|
|
116
118
|
|
|
117
119
|
const uv = vec2( u, v ).toVar();
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ExtendKernel.js — wavefront BVH traversal (256×1, 1D ray-parallel dispatch).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Fn, uint,
|
|
7
|
+
If,
|
|
8
|
+
instanceIndex,
|
|
9
|
+
atomicLoad,
|
|
10
|
+
Return,
|
|
11
|
+
} from 'three/tsl';
|
|
12
|
+
|
|
13
|
+
import { traverseBVH } from './BVHTraversal.js';
|
|
14
|
+
import { Ray, HitInfo } from './Struct.js';
|
|
15
|
+
import {
|
|
16
|
+
readRayOrigin, readRayDirection, readMediumStack,
|
|
17
|
+
writeHitPacked,
|
|
18
|
+
} from '../Processor/PackedRayBuffer.js';
|
|
19
|
+
import { COUNTER } from '../Processor/QueueManager.js';
|
|
20
|
+
|
|
21
|
+
const WG_SIZE = 256;
|
|
22
|
+
|
|
23
|
+
export function buildExtendKernel( params ) {
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
27
|
+
rayBufferRO,
|
|
28
|
+
hitBufferRW,
|
|
29
|
+
activeIndicesRO,
|
|
30
|
+
counters,
|
|
31
|
+
maxRayCount,
|
|
32
|
+
} = params;
|
|
33
|
+
|
|
34
|
+
const computeFn = Fn( () => {
|
|
35
|
+
|
|
36
|
+
const threadIdx = instanceIndex;
|
|
37
|
+
|
|
38
|
+
// kernels bound on ENTERING_COUNT so an over-sized (margin) dispatch is safe.
|
|
39
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : maxRayCount;
|
|
40
|
+
If( threadIdx.greaterThanEqual( bound ), () => {
|
|
41
|
+
|
|
42
|
+
Return();
|
|
43
|
+
|
|
44
|
+
} );
|
|
45
|
+
|
|
46
|
+
const rayID = activeIndicesRO.element( threadIdx );
|
|
47
|
+
|
|
48
|
+
const origin = readRayOrigin( rayBufferRO, rayID ).toVar();
|
|
49
|
+
const direction = readRayDirection( rayBufferRO, rayID ).toVar();
|
|
50
|
+
|
|
51
|
+
const ray = Ray( { origin, direction } );
|
|
52
|
+
|
|
53
|
+
// insideMedium bypasses front/back culling so the ray can hit a glass/SSS back-facing boundary.
|
|
54
|
+
const insideMedium = readMediumStack( rayBufferRO, rayID ).stackDepth.greaterThan( uint( 0 ) );
|
|
55
|
+
const hitInfo = HitInfo.wrap( traverseBVH(
|
|
56
|
+
ray, bvhBuffer, triangleBuffer, insideMedium,
|
|
57
|
+
) ).toVar();
|
|
58
|
+
|
|
59
|
+
writeHitPacked(
|
|
60
|
+
hitBufferRW, rayID,
|
|
61
|
+
hitInfo.dst,
|
|
62
|
+
uint( hitInfo.triangleIndex ),
|
|
63
|
+
hitInfo.uv.x, hitInfo.uv.y,
|
|
64
|
+
hitInfo.normal,
|
|
65
|
+
uint( hitInfo.materialIndex ),
|
|
66
|
+
uint( hitInfo.meshIndex ),
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
} );
|
|
70
|
+
|
|
71
|
+
return computeFn;
|
|
72
|
+
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export { WG_SIZE as EXTEND_WG_SIZE };
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* FinalWriteKernel.js — wavefront final output: temporal accumulation + MRT StorageTexture writes (16×16, 2D).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Fn, wgslFn, float, vec2, vec4, int, uint, uvec2,
|
|
7
|
+
If, mix, select, texture, textureStore,
|
|
8
|
+
localId, workgroupId,
|
|
9
|
+
} from 'three/tsl';
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
readRayRadiance, readGBuffer, gbDecodeNormalDepth, gbDecodeAlbedo,
|
|
13
|
+
} from '../Processor/PackedRayBuffer.js';
|
|
14
|
+
|
|
15
|
+
const WG_SIZE = 16;
|
|
16
|
+
|
|
17
|
+
// Debug mode 11: NaN/Inf detector — red where the accumulated color is NaN/Inf, black elsewhere.
|
|
18
|
+
const nanInfToRed = /*@__PURE__*/ wgslFn( `
|
|
19
|
+
fn nanInfToRed( c: vec3f ) -> vec3f {
|
|
20
|
+
let isNan = c.x != c.x || c.y != c.y || c.z != c.z;
|
|
21
|
+
let isInf = abs( c.x ) > 1e30f || abs( c.y ) > 1e30f || abs( c.z ) > 1e30f;
|
|
22
|
+
if ( isNan || isInf ) { return vec3f( 1.0f, 0.0f, 0.0f ); }
|
|
23
|
+
return vec3f( 0.0f );
|
|
24
|
+
}
|
|
25
|
+
` );
|
|
26
|
+
|
|
27
|
+
export function buildFinalWriteKernel( params ) {
|
|
28
|
+
|
|
29
|
+
const {
|
|
30
|
+
rayBufferRO, gBufferRO,
|
|
31
|
+
writeColorTex, writeNDTex, writeAlbedoTex,
|
|
32
|
+
resolution, frame,
|
|
33
|
+
enableAccumulation, hasPreviousAccumulated, accumulationAlpha, cameraIsMoving,
|
|
34
|
+
transparentBackground,
|
|
35
|
+
prevAccumTexture, prevNormalDepthTexture, prevAlbedoTexture,
|
|
36
|
+
renderWidth, renderHeight,
|
|
37
|
+
// Multi-sample: average S sample-slots per pixel (slot = pixel + k*w*h, w*h from the resolution uniform).
|
|
38
|
+
samplesPerPass = 1,
|
|
39
|
+
visMode,
|
|
40
|
+
} = params;
|
|
41
|
+
|
|
42
|
+
const S = samplesPerPass | 0;
|
|
43
|
+
|
|
44
|
+
const computeFn = Fn( () => {
|
|
45
|
+
|
|
46
|
+
const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
|
|
47
|
+
const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
|
|
48
|
+
|
|
49
|
+
If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
|
|
50
|
+
|
|
51
|
+
const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
|
|
52
|
+
const rayID = uint( pixelIndex );
|
|
53
|
+
|
|
54
|
+
// Average the S sub-samples; MRT (normal/depth/albedo) from sub-sample 0.
|
|
55
|
+
const sampleColor = ( () => {
|
|
56
|
+
|
|
57
|
+
if ( S <= 1 ) return readRayRadiance( rayBufferRO, rayID );
|
|
58
|
+
const acc = readRayRadiance( rayBufferRO, rayID ).toVar();
|
|
59
|
+
const mrps = uint( resolution.x ).mul( uint( resolution.y ) ).toVar(); // w*h from the resolution uniform, not baked
|
|
60
|
+
for ( let k = 1; k < S; k ++ ) {
|
|
61
|
+
|
|
62
|
+
acc.addAssign( readRayRadiance( rayBufferRO, rayID.add( uint( k ).mul( mrps ) ) ) );
|
|
63
|
+
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
acc.assign( acc.div( float( S ) ) );
|
|
67
|
+
return acc;
|
|
68
|
+
|
|
69
|
+
} )();
|
|
70
|
+
// MRT comes from the per-pixel G-buffer (rayID == pixelIndex here, i.e. sub-sample 0). Half-packed: decode.
|
|
71
|
+
const gbuf = readGBuffer( gBufferRO, rayID );
|
|
72
|
+
const normalDepth = gbDecodeNormalDepth( gbuf );
|
|
73
|
+
const albedoID = vec4( gbDecodeAlbedo( gbuf ), 0.0 );
|
|
74
|
+
|
|
75
|
+
const finalColor = sampleColor.xyz.toVar();
|
|
76
|
+
const finalNormalDepth = normalDepth.toVar();
|
|
77
|
+
const finalAlbedo = albedoID.xyz.toVar();
|
|
78
|
+
const outputAlpha = select( transparentBackground, sampleColor.w, float( 1.0 ) ).toVar();
|
|
79
|
+
|
|
80
|
+
const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
|
|
81
|
+
const prevUV = pixelCoord.div( resolution );
|
|
82
|
+
|
|
83
|
+
// visMode 11 (NaN/Inf) bypasses accumulation (megakernel parity main_TSL_PathTracer.js:355) so the
|
|
84
|
+
// detector runs on each frame's fresh color — else mix() propagates a transient NaN and it stays red forever.
|
|
85
|
+
If( enableAccumulation.and( cameraIsMoving.not() ).and( frame.greaterThan( uint( 0 ) ) ).and( hasPreviousAccumulated ).and( visMode.notEqual( int( 11 ) ) ), () => {
|
|
86
|
+
|
|
87
|
+
const prevAccumSample = texture( prevAccumTexture, prevUV, 0 ).toVar();
|
|
88
|
+
|
|
89
|
+
finalColor.assign( mix( prevAccumSample.xyz, sampleColor.xyz, accumulationAlpha ) );
|
|
90
|
+
finalNormalDepth.assign( mix( texture( prevNormalDepthTexture, prevUV, 0 ), finalNormalDepth, accumulationAlpha ) );
|
|
91
|
+
finalAlbedo.assign( mix( texture( prevAlbedoTexture, prevUV, 0 ).xyz, finalAlbedo, accumulationAlpha ) );
|
|
92
|
+
|
|
93
|
+
If( transparentBackground, () => {
|
|
94
|
+
|
|
95
|
+
outputAlpha.assign( mix( prevAccumSample.w, sampleColor.w, accumulationAlpha ) );
|
|
96
|
+
|
|
97
|
+
} );
|
|
98
|
+
|
|
99
|
+
} );
|
|
100
|
+
|
|
101
|
+
// Debug mode 11: flag NaN/Inf on the accumulated color (red on NaN/Inf, black elsewhere).
|
|
102
|
+
If( visMode.equal( int( 11 ) ), () => {
|
|
103
|
+
|
|
104
|
+
finalColor.assign( nanInfToRed( finalColor ) );
|
|
105
|
+
|
|
106
|
+
} );
|
|
107
|
+
|
|
108
|
+
const uintCoord = uvec2( uint( gx ), uint( gy ) );
|
|
109
|
+
textureStore( writeColorTex, uintCoord, vec4( finalColor, outputAlpha ) ).toWriteOnly();
|
|
110
|
+
textureStore( writeNDTex, uintCoord, finalNormalDepth ).toWriteOnly();
|
|
111
|
+
textureStore( writeAlbedoTex, uintCoord, vec4( finalAlbedo, 1.0 ) ).toWriteOnly();
|
|
112
|
+
|
|
113
|
+
} );
|
|
114
|
+
|
|
115
|
+
} );
|
|
116
|
+
|
|
117
|
+
return computeFn;
|
|
118
|
+
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export { WG_SIZE as FINALWRITE_WG_SIZE };
|