rayzee 6.4.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -5
- package/dist/rayzee.es.js +4953 -4225
- package/dist/rayzee.es.js.map +1 -1
- package/dist/rayzee.umd.js +157 -236
- package/dist/rayzee.umd.js.map +1 -1
- package/package.json +1 -1
- package/src/EngineDefaults.js +29 -13
- package/src/PathTracerApp.js +119 -26
- package/src/Pipeline/PipelineContext.js +1 -2
- package/src/Pipeline/RenderPipeline.js +1 -1
- package/src/Pipeline/RenderStage.js +1 -1
- package/src/Processor/CameraOptimizer.js +0 -5
- package/src/Processor/GeometryExtractor.js +22 -1
- package/src/Processor/KernelManager.js +277 -0
- package/src/Processor/PackedRayBuffer.js +265 -0
- package/src/Processor/QueueManager.js +173 -0
- package/src/Processor/SceneProcessor.js +1 -0
- package/src/Processor/ShaderBuilder.js +11 -316
- package/src/Processor/StorageTexturePool.js +29 -15
- package/src/Processor/TextureCreator.js +6 -0
- package/src/Processor/VRAMTracker.js +169 -0
- package/src/Processor/utils.js +11 -110
- package/src/RenderSettings.js +1 -3
- package/src/Stages/ASVGF.js +76 -20
- package/src/Stages/BilateralFilter.js +34 -10
- package/src/Stages/EdgeFilter.js +2 -3
- package/src/Stages/MotionVector.js +16 -9
- package/src/Stages/NormalDepth.js +17 -5
- package/src/Stages/PathTracer.js +671 -1456
- package/src/Stages/PathTracerStage.js +1451 -0
- package/src/Stages/SSRC.js +32 -15
- package/src/Stages/Variance.js +35 -12
- package/src/TSL/BVHTraversal.js +7 -1
- package/src/TSL/Common.js +12 -2
- package/src/TSL/CompactKernel.js +110 -0
- package/src/TSL/DebugKernel.js +98 -0
- package/src/TSL/Environment.js +13 -11
- package/src/TSL/ExtendKernel.js +75 -0
- package/src/TSL/FinalWriteKernel.js +121 -0
- package/src/TSL/GenerateKernel.js +109 -0
- package/src/TSL/LightsSampling.js +2 -2
- package/src/TSL/MaterialTransmission.js +32 -2
- package/src/TSL/PathTracerCore.js +43 -912
- package/src/TSL/ShadeKernel.js +873 -0
- package/src/TSL/Struct.js +5 -0
- package/src/TSL/Subsurface.js +232 -0
- package/src/TSL/patches.js +81 -4
- package/src/index.js +3 -0
- package/src/managers/CameraManager.js +1 -1
- package/src/managers/DenoisingManager.js +40 -75
- package/src/managers/EnvironmentManager.js +30 -39
- package/src/managers/MaterialDataManager.js +60 -1
- package/src/managers/OverlayManager.js +7 -22
- package/src/managers/UniformManager.js +1 -3
- package/src/managers/helpers/TileHelper.js +2 -2
- package/src/Stages/AdaptiveSampling.js +0 -483
- package/src/TSL/PathTracer.js +0 -384
- package/src/managers/TileManager.js +0 -298
package/src/Stages/SSRC.js
CHANGED
|
@@ -12,10 +12,11 @@
|
|
|
12
12
|
// Textures read: pathtracer:color, pathtracer:normalDepth, motionVector:screenSpace
|
|
13
13
|
|
|
14
14
|
import { uniform } from 'three/tsl';
|
|
15
|
-
import { StorageTexture, TextureNode } from 'three/webgpu';
|
|
16
|
-
import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter } from 'three';
|
|
15
|
+
import { StorageTexture, TextureNode, RenderTarget } from 'three/webgpu';
|
|
16
|
+
import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter, Box2, Vector2 } from 'three';
|
|
17
17
|
import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
|
|
18
18
|
import { buildTemporalPass, buildSpatialPass } from '../TSL/SSRC.js';
|
|
19
|
+
import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
|
|
19
20
|
|
|
20
21
|
export class SSRC extends RenderStage {
|
|
21
22
|
|
|
@@ -51,18 +52,30 @@ export class SSRC extends RenderStage {
|
|
|
51
52
|
this._readPass1CacheTexNode = new TextureNode(); // current cache (for spatial pass)
|
|
52
53
|
|
|
53
54
|
// ─── StorageTextures (5 total) ───
|
|
54
|
-
|
|
55
|
+
// StorageTextures stay at max alloc — see resize crash fix (three.js #33061).
|
|
56
|
+
const w = 1, h = 1; // RTs/uniforms resized on first render
|
|
55
57
|
|
|
56
58
|
// Ping-pong temporal cache: .rgb = radiance, .w = history count
|
|
57
|
-
this._cacheTexA = this._createStorageTex(
|
|
58
|
-
this._cacheTexB = this._createStorageTex(
|
|
59
|
+
this._cacheTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
60
|
+
this._cacheTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
59
61
|
|
|
60
62
|
// Ping-pong previous-frame normalDepth (for edge-stopping in temporal pass)
|
|
61
|
-
this._prevNDTexA = this._createStorageTex(
|
|
62
|
-
this._prevNDTexB = this._createStorageTex(
|
|
63
|
+
this._prevNDTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
64
|
+
this._prevNDTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
|
|
63
65
|
|
|
64
66
|
// Final output (LinearFilter for Display fragment shader sampling)
|
|
65
|
-
this._outputTex = this._createStorageTex(
|
|
67
|
+
this._outputTex = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, LinearFilter );
|
|
68
|
+
|
|
69
|
+
// Active-region copy target — published downstream (storage tex is over-allocated)
|
|
70
|
+
this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
|
|
71
|
+
this.outputTarget = new RenderTarget( w, h, {
|
|
72
|
+
type: HalfFloatType,
|
|
73
|
+
format: RGBAFormat,
|
|
74
|
+
minFilter: LinearFilter,
|
|
75
|
+
magFilter: LinearFilter,
|
|
76
|
+
depthBuffer: false,
|
|
77
|
+
stencilBuffer: false
|
|
78
|
+
} );
|
|
66
79
|
|
|
67
80
|
// ─── State ───
|
|
68
81
|
this._currentPingPong = 0; // 0: read B, write A; 1: read A, write B
|
|
@@ -99,7 +112,7 @@ export class SSRC extends RenderStage {
|
|
|
99
112
|
if ( colorTex?.image ) {
|
|
100
113
|
|
|
101
114
|
const { width, height } = colorTex.image;
|
|
102
|
-
if ( width !== this.
|
|
115
|
+
if ( width !== this.outputTarget.width || height !== this.outputTarget.height ) {
|
|
103
116
|
|
|
104
117
|
this.setSize( width, height );
|
|
105
118
|
|
|
@@ -145,8 +158,13 @@ export class SSRC extends RenderStage {
|
|
|
145
158
|
// Advance frames-since-reset counter (capped to avoid overflow)
|
|
146
159
|
this._framesSinceReset.value = Math.min( this._framesSinceReset.value + 1, 9999 );
|
|
147
160
|
|
|
161
|
+
// Copy active region out of the over-allocated StorageTexture into the
|
|
162
|
+
// right-sized RenderTarget; downstream stages UV-sample the latter.
|
|
163
|
+
this._srcRegion.max.set( this.outputTarget.width, this.outputTarget.height );
|
|
164
|
+
this.renderer.copyTextureToTexture( this._outputTex, this.outputTarget.texture, this._srcRegion );
|
|
165
|
+
|
|
148
166
|
// Publish final output
|
|
149
|
-
context.setTexture( 'ssrc:output', this.
|
|
167
|
+
context.setTexture( 'ssrc:output', this.outputTarget.texture );
|
|
150
168
|
|
|
151
169
|
// Advance ping-pong
|
|
152
170
|
this._currentPingPong = 1 - this._currentPingPong;
|
|
@@ -163,11 +181,9 @@ export class SSRC extends RenderStage {
|
|
|
163
181
|
|
|
164
182
|
if ( width < 1 || height < 1 ) return;
|
|
165
183
|
|
|
166
|
-
|
|
167
|
-
this.
|
|
168
|
-
this.
|
|
169
|
-
this._prevNDTexB.setSize( width, height );
|
|
170
|
-
this._outputTex.setSize( width, height );
|
|
184
|
+
// StorageTextures stay at their max allocation (see constructor).
|
|
185
|
+
this.outputTarget.setSize( width, height );
|
|
186
|
+
this.outputTarget.texture.needsUpdate = true;
|
|
171
187
|
|
|
172
188
|
this.resW.value = width;
|
|
173
189
|
this.resH.value = height;
|
|
@@ -194,6 +210,7 @@ export class SSRC extends RenderStage {
|
|
|
194
210
|
this._prevNDTexA.dispose();
|
|
195
211
|
this._prevNDTexB.dispose();
|
|
196
212
|
this._outputTex.dispose();
|
|
213
|
+
this.outputTarget?.dispose();
|
|
197
214
|
this._colorTexNode?.dispose();
|
|
198
215
|
this._ndTexNode?.dispose();
|
|
199
216
|
this._motionTexNode?.dispose();
|
package/src/Stages/Variance.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { Fn, wgslFn, float, int, uint, ivec2, uvec2, uniform, If, max,
|
|
2
2
|
textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
|
|
3
|
-
import { TextureNode, StorageTexture } from 'three/webgpu';
|
|
4
|
-
import { FloatType, RGBAFormat, LinearFilter } from 'three';
|
|
3
|
+
import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
|
|
4
|
+
import { FloatType, RGBAFormat, LinearFilter, Box2, Vector2 } from 'three';
|
|
5
5
|
import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
|
|
6
6
|
import { luminance } from '../TSL/Common.js';
|
|
7
|
+
import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
|
|
7
8
|
|
|
8
9
|
// ── wgslFn helpers ──────────────────────────────────────────
|
|
9
10
|
|
|
@@ -40,8 +41,7 @@ const temporalAccumulate = /*@__PURE__*/ wgslFn( `
|
|
|
40
41
|
* WebGPU Variance Estimation Stage (Compute Shader)
|
|
41
42
|
*
|
|
42
43
|
* Computes temporal and spatial variance from the path tracer output.
|
|
43
|
-
* Used by
|
|
44
|
-
* BilateralFilter for variance-guided filtering.
|
|
44
|
+
* Used by BilateralFilter for variance-guided filtering.
|
|
45
45
|
*
|
|
46
46
|
* Uses compute shader with workgroup shared memory for the 3×3
|
|
47
47
|
* spatial variance computation. Each 8×8 workgroup loads a 10×10
|
|
@@ -98,18 +98,34 @@ export class Variance extends RenderStage {
|
|
|
98
98
|
// FloatType (f32) required — HalfFloat's ~3.3 decimal digits cause catastrophic
|
|
99
99
|
// cancellation in (meanSq - mean²) for converged pixels, producing a variance
|
|
100
100
|
// floor of ~0.0001 that the (frame+1)² scaling amplifies to enormous values.
|
|
101
|
-
|
|
101
|
+
// StorageTextures over-allocated at max — defensive against three.js #33061
|
|
102
|
+
// (TSL compute pipeline keeps stale GPUTextureView after StorageTexture.setSize).
|
|
103
|
+
this._storageTexA = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
|
|
102
104
|
this._storageTexA.type = FloatType;
|
|
103
105
|
this._storageTexA.format = RGBAFormat;
|
|
104
106
|
this._storageTexA.minFilter = LinearFilter;
|
|
105
107
|
this._storageTexA.magFilter = LinearFilter;
|
|
106
108
|
|
|
107
|
-
this._storageTexB = new StorageTexture(
|
|
109
|
+
this._storageTexB = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
|
|
108
110
|
this._storageTexB.type = FloatType;
|
|
109
111
|
this._storageTexB.format = RGBAFormat;
|
|
110
112
|
this._storageTexB.minFilter = LinearFilter;
|
|
111
113
|
this._storageTexB.magFilter = LinearFilter;
|
|
112
114
|
|
|
115
|
+
this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
|
|
116
|
+
|
|
117
|
+
// Right-sized RenderTarget published to downstream (UV-sampled). The
|
|
118
|
+
// over-allocated StorageTexture itself must not be published — UV
|
|
119
|
+
// sampling a 2048 texture would read the wrong region.
|
|
120
|
+
this._outputTarget = new RenderTarget( w, h, {
|
|
121
|
+
type: FloatType,
|
|
122
|
+
format: RGBAFormat,
|
|
123
|
+
minFilter: LinearFilter,
|
|
124
|
+
magFilter: LinearFilter,
|
|
125
|
+
depthBuffer: false,
|
|
126
|
+
stencilBuffer: false
|
|
127
|
+
} );
|
|
128
|
+
|
|
113
129
|
this.currentMoments = 0; // 0 = write A, read B; 1 = write B, read A
|
|
114
130
|
this._compiled = false;
|
|
115
131
|
this._needsWarmReset = false;
|
|
@@ -275,8 +291,8 @@ export class Variance extends RenderStage {
|
|
|
275
291
|
const img = colorTex.image;
|
|
276
292
|
if ( img && img.width > 0 && img.height > 0 ) {
|
|
277
293
|
|
|
278
|
-
if ( img.width !== this.
|
|
279
|
-
img.height !== this.
|
|
294
|
+
if ( img.width !== this._outputTarget.width ||
|
|
295
|
+
img.height !== this._outputTarget.height ) {
|
|
280
296
|
|
|
281
297
|
this.setSize( img.width, img.height );
|
|
282
298
|
|
|
@@ -333,8 +349,13 @@ export class Variance extends RenderStage {
|
|
|
333
349
|
// Swap for next frame
|
|
334
350
|
this.currentMoments = 1 - this.currentMoments;
|
|
335
351
|
|
|
336
|
-
//
|
|
337
|
-
|
|
352
|
+
// Copy the active region out of the over-allocated StorageTexture into the
|
|
353
|
+
// right-sized RenderTarget; downstream stages UV-sample the latter.
|
|
354
|
+
this._srcRegion.max.set( this._outputTarget.width, this._outputTarget.height );
|
|
355
|
+
this.renderer.copyTextureToTexture( writeTarget, this._outputTarget.texture, this._srcRegion );
|
|
356
|
+
|
|
357
|
+
// Publish the RenderTarget (not the over-allocated StorageTexture)
|
|
358
|
+
context.setTexture( 'variance:output', this._outputTarget.texture );
|
|
338
359
|
|
|
339
360
|
}
|
|
340
361
|
|
|
@@ -352,8 +373,9 @@ export class Variance extends RenderStage {
|
|
|
352
373
|
|
|
353
374
|
setSize( width, height ) {
|
|
354
375
|
|
|
355
|
-
|
|
356
|
-
this.
|
|
376
|
+
// StorageTextures stay at their max allocation (see constructor).
|
|
377
|
+
this._outputTarget.setSize( width, height );
|
|
378
|
+
this._outputTarget.texture.needsUpdate = true;
|
|
357
379
|
this.resW.value = width;
|
|
358
380
|
this.resH.value = height;
|
|
359
381
|
|
|
@@ -371,6 +393,7 @@ export class Variance extends RenderStage {
|
|
|
371
393
|
this._computeNodeB?.dispose();
|
|
372
394
|
this._storageTexA?.dispose();
|
|
373
395
|
this._storageTexB?.dispose();
|
|
396
|
+
this._outputTarget?.dispose();
|
|
374
397
|
this._colorTexNode?.dispose();
|
|
375
398
|
this._readTexNodeA?.dispose();
|
|
376
399
|
this._readTexNodeB?.dispose();
|
package/src/TSL/BVHTraversal.js
CHANGED
|
@@ -19,6 +19,7 @@ import {
|
|
|
19
19
|
lessThan,
|
|
20
20
|
mat3,
|
|
21
21
|
array,
|
|
22
|
+
bool as tslBool,
|
|
22
23
|
} from 'three/tsl';
|
|
23
24
|
|
|
24
25
|
import { Ray, HitInfo } from './Struct.js';
|
|
@@ -178,8 +179,13 @@ export const traverseBVH = Fn( ( [
|
|
|
178
179
|
ray,
|
|
179
180
|
bvhBuffer,
|
|
180
181
|
triangleBuffer,
|
|
182
|
+
insideMedium, // optional: when true (ray inside a medium), bypass front/back culling
|
|
181
183
|
] ) => {
|
|
182
184
|
|
|
185
|
+
// Interior medium rays (SSS/transmission) must be able to hit boundary faces from
|
|
186
|
+
// either side to find the exit; exterior rays honor the authored side as before.
|
|
187
|
+
const inMedium = insideMedium ?? tslBool( false );
|
|
188
|
+
|
|
183
189
|
const closestHit = HitInfo( {
|
|
184
190
|
didHit: false,
|
|
185
191
|
dst: float( 1e20 ),
|
|
@@ -280,7 +286,7 @@ export const traverseBVH = Fn( ( [
|
|
|
280
286
|
|
|
281
287
|
// Side culling (inline; per-mesh visibility is at the BLAS-pointer level).
|
|
282
288
|
// 0=front (reject back-facing), 1=back (reject front-facing), 2=double (pass).
|
|
283
|
-
const sidePass = side.equal( int( 2 ) )
|
|
289
|
+
const sidePass = inMedium.or( side.equal( int( 2 ) ) )
|
|
284
290
|
.or( side.equal( int( 0 ) ).and( rayDotNormal.lessThan( - 0.0001 ) ) )
|
|
285
291
|
.or( side.equal( int( 1 ) ).and( rayDotNormal.greaterThan( 0.0001 ) ) );
|
|
286
292
|
If( sidePass, () => {
|
package/src/TSL/Common.js
CHANGED
|
@@ -201,13 +201,14 @@ export const applySoftSuppressionRGB = wgslFn( `
|
|
|
201
201
|
`, [ applySoftSuppression ] );
|
|
202
202
|
|
|
203
203
|
// Pre-computed material classification for faster branching
|
|
204
|
-
export const classifyMaterial = Fn( ( [ metalness, roughness, transmission, clearcoat, emissive ] ) => {
|
|
204
|
+
export const classifyMaterial = Fn( ( [ metalness, roughness, transmission, clearcoat, emissive, subsurface ] ) => {
|
|
205
205
|
|
|
206
206
|
const isMetallic = metalness.greaterThan( 0.7 ).toVar();
|
|
207
207
|
const isRough = roughness.greaterThan( 0.8 );
|
|
208
208
|
const isSmooth = roughness.lessThan( 0.3 ).toVar();
|
|
209
209
|
const isTransmissive = transmission.greaterThan( 0.5 ).toVar();
|
|
210
210
|
const hasClearcoat = clearcoat.greaterThan( 0.5 ).toVar();
|
|
211
|
+
const isSubsurface = subsurface.greaterThan( 0.0 ); // only feeds complexityScore below
|
|
211
212
|
|
|
212
213
|
// Fast emissive check using sum
|
|
213
214
|
const emissiveMag = emissive.x.add( emissive.y ).add( emissive.z );
|
|
@@ -218,7 +219,8 @@ export const classifyMaterial = Fn( ( [ metalness, roughness, transmission, clea
|
|
|
218
219
|
.add( float( 0.25 ).mul( float( isSmooth ) ) )
|
|
219
220
|
.add( float( 0.45 ).mul( float( isTransmissive ) ) )
|
|
220
221
|
.add( float( 0.35 ).mul( float( hasClearcoat ) ) )
|
|
221
|
-
.add( float( 0.3 ).mul( float( isEmissive ) ) )
|
|
222
|
+
.add( float( 0.3 ).mul( float( isEmissive ) ) )
|
|
223
|
+
.add( float( 0.4 ).mul( float( isSubsurface ) ) ); // SSS walks are deep + high-value → keep alive in RR
|
|
222
224
|
|
|
223
225
|
// Add material interaction complexity
|
|
224
226
|
const interactionComplexity = float( 0.0 ).toVar();
|
|
@@ -340,6 +342,9 @@ export const getMaterial = Fn( ( [ materialIndex, materialBuffer ] ) => {
|
|
|
340
342
|
const data24 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.BUMP_TRANSFORM_B ), int( MATERIAL_SLOTS ) ).toVar();
|
|
341
343
|
const data25 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.DISPLACEMENT_TRANSFORM_A ), int( MATERIAL_SLOTS ) ).toVar();
|
|
342
344
|
const data26 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.DISPLACEMENT_TRANSFORM_B ), int( MATERIAL_SLOTS ) ).toVar();
|
|
345
|
+
const data27 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.SUBSURFACE_A ), int( MATERIAL_SLOTS ) ).toVar();
|
|
346
|
+
const data28 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.SUBSURFACE_B ), int( MATERIAL_SLOTS ) ).toVar();
|
|
347
|
+
const data29 = getDatafromStorageBuffer( materialBuffer, materialIndex, int( S.SUBSURFACE_C ), int( MATERIAL_SLOTS ) ).toVar();
|
|
343
348
|
|
|
344
349
|
return RayTracingMaterial( {
|
|
345
350
|
color: vec4( data0.rgb, 1.0 ),
|
|
@@ -361,6 +366,11 @@ export const getMaterial = Fn( ( [ materialIndex, materialBuffer ] ) => {
|
|
|
361
366
|
iridescence: data7.r,
|
|
362
367
|
iridescenceIOR: data7.g,
|
|
363
368
|
iridescenceThicknessRange: data7.ba,
|
|
369
|
+
subsurfaceColor: data27.rgb,
|
|
370
|
+
subsurface: data27.a,
|
|
371
|
+
subsurfaceRadius: data28.rgb,
|
|
372
|
+
subsurfaceRadiusScale: data28.a,
|
|
373
|
+
subsurfaceAnisotropy: data29.r,
|
|
364
374
|
albedoMapIndex: int( data8.r ),
|
|
365
375
|
normalMapIndex: int( data8.g ),
|
|
366
376
|
roughnessMapIndex: int( data8.b ),
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CompactKernel.js — wavefront stream compaction: active rays → dense index array for next bounce (256×1, 1D).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Fn, uint, select,
|
|
7
|
+
If,
|
|
8
|
+
instanceIndex,
|
|
9
|
+
atomicAdd, atomicLoad,
|
|
10
|
+
subgroupExclusiveAdd, subgroupAdd, subgroupBroadcast,
|
|
11
|
+
Return,
|
|
12
|
+
} from 'three/tsl';
|
|
13
|
+
|
|
14
|
+
import { readRayBounceFlags } from '../Processor/PackedRayBuffer.js';
|
|
15
|
+
import { RAY_FLAG, COUNTER } from '../Processor/QueueManager.js';
|
|
16
|
+
|
|
17
|
+
const WG_SIZE = 256;
|
|
18
|
+
|
|
19
|
+
export function buildCompactKernel( params ) {
|
|
20
|
+
|
|
21
|
+
const {
|
|
22
|
+
rayBufferRO,
|
|
23
|
+
activeIndicesReadRO,
|
|
24
|
+
activeIndicesWriteRW,
|
|
25
|
+
counters,
|
|
26
|
+
currentActiveCount,
|
|
27
|
+
} = params;
|
|
28
|
+
|
|
29
|
+
const computeFn = Fn( () => {
|
|
30
|
+
|
|
31
|
+
const threadIdx = instanceIndex;
|
|
32
|
+
|
|
33
|
+
// ACTIVE_RAY_COUNT is zeroed before compact, so the dense-list length comes from ENTERING_COUNT.
|
|
34
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
|
|
35
|
+
If( threadIdx.greaterThanEqual( bound ), () => {
|
|
36
|
+
|
|
37
|
+
Return();
|
|
38
|
+
|
|
39
|
+
} );
|
|
40
|
+
|
|
41
|
+
const rayID = activeIndicesReadRO.element( threadIdx );
|
|
42
|
+
|
|
43
|
+
const flags = readRayBounceFlags( rayBufferRO, rayID );
|
|
44
|
+
|
|
45
|
+
If( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ), () => {
|
|
46
|
+
|
|
47
|
+
const writeIdx = atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), uint( 1 ) );
|
|
48
|
+
activeIndicesWriteRW.element( writeIdx ).assign( rayID );
|
|
49
|
+
|
|
50
|
+
} );
|
|
51
|
+
|
|
52
|
+
} );
|
|
53
|
+
|
|
54
|
+
return computeFn;
|
|
55
|
+
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Subgroup prefix-sum compaction: one global atomicAdd per subgroup instead of per survivor.
|
|
60
|
+
* Requires renderer.hasFeature('subgroups'); control flow must stay uniform (no divergent Return).
|
|
61
|
+
*/
|
|
62
|
+
export function buildCompactSubgroupKernel( params ) {
|
|
63
|
+
|
|
64
|
+
const {
|
|
65
|
+
rayBufferRO,
|
|
66
|
+
activeIndicesReadRO,
|
|
67
|
+
activeIndicesWriteRW,
|
|
68
|
+
counters,
|
|
69
|
+
currentActiveCount,
|
|
70
|
+
} = params;
|
|
71
|
+
|
|
72
|
+
const computeFn = Fn( () => {
|
|
73
|
+
|
|
74
|
+
const threadIdx = instanceIndex;
|
|
75
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
|
|
76
|
+
|
|
77
|
+
// No early Return: all lanes must reach the subgroup ops; out-of-range lanes contribute 0 and read stale-but-in-capacity slots.
|
|
78
|
+
const inRange = threadIdx.lessThan( bound );
|
|
79
|
+
const rayID = activeIndicesReadRO.element( threadIdx );
|
|
80
|
+
const flags = readRayBounceFlags( rayBufferRO, rayID );
|
|
81
|
+
const isActive = inRange.and( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ) );
|
|
82
|
+
const activeU = select( isActive, uint( 1 ), uint( 0 ) );
|
|
83
|
+
|
|
84
|
+
// .toVar() materializes the subgroup ops at uniform control flow; inlining into the divergent If(isActive) write is rejected by WGSL.
|
|
85
|
+
const localOffset = subgroupExclusiveAdd( activeU ).toVar();
|
|
86
|
+
const sgCount = subgroupAdd( activeU ).toVar();
|
|
87
|
+
|
|
88
|
+
// laneId via exclusiveAdd(1) since TSL lacks subgroup_invocation_id; lane 0 does the single per-subgroup atomicAdd.
|
|
89
|
+
const laneId = subgroupExclusiveAdd( uint( 1 ) ).toVar();
|
|
90
|
+
const base = uint( 0 ).toVar();
|
|
91
|
+
If( laneId.equal( uint( 0 ) ), () => {
|
|
92
|
+
|
|
93
|
+
base.assign( atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), sgCount ) );
|
|
94
|
+
|
|
95
|
+
} );
|
|
96
|
+
const sgBase = subgroupBroadcast( base, uint( 0 ) ).toVar();
|
|
97
|
+
|
|
98
|
+
If( isActive, () => {
|
|
99
|
+
|
|
100
|
+
activeIndicesWriteRW.element( sgBase.add( localOffset ) ).assign( rayID );
|
|
101
|
+
|
|
102
|
+
} );
|
|
103
|
+
|
|
104
|
+
} );
|
|
105
|
+
|
|
106
|
+
return computeFn;
|
|
107
|
+
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export { WG_SIZE as COMPACT_WG_SIZE };
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DebugKernel.js — wavefront debug visualization (16×16, 2D screen-space dispatch).
|
|
3
|
+
*
|
|
4
|
+
* Single-pass primary-ray debug viz for visMode 1-10 (mode 11 = NaN/Inf is a FinalWrite
|
|
5
|
+
* post-branch on the accumulated color, handled there). Generates a camera ray per pixel and
|
|
6
|
+
* delegates to the renderer-agnostic TraceDebugMode for the per-mode color; mode 9 (stratified
|
|
7
|
+
* sample pattern) is computed inline. Writes the color directly to the output (no accumulation).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
Fn, float, vec2, vec4, int, uint, uvec2,
|
|
12
|
+
If, textureStore,
|
|
13
|
+
localId, workgroupId,
|
|
14
|
+
} from 'three/tsl';
|
|
15
|
+
|
|
16
|
+
import { generateRayFromCamera } from './BVHTraversal.js';
|
|
17
|
+
import { Ray } from './Struct.js';
|
|
18
|
+
import { TraceDebugMode } from './Debugger.js';
|
|
19
|
+
import { pcgHash, getStratifiedSample } from './Random.js';
|
|
20
|
+
|
|
21
|
+
const WG_SIZE = 16;
|
|
22
|
+
|
|
23
|
+
export function buildDebugKernel( params ) {
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
writeColorTex, writeNDTex, writeAlbedoTex,
|
|
27
|
+
resolution, renderWidth, renderHeight,
|
|
28
|
+
cameraWorldMatrix, cameraProjectionMatrixInverse, cameraProjectionMatrix, cameraViewMatrix,
|
|
29
|
+
enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
|
|
30
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
31
|
+
envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
|
|
32
|
+
visMode, debugVisScale,
|
|
33
|
+
albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
|
|
34
|
+
frame,
|
|
35
|
+
samplesPerPass = 1,
|
|
36
|
+
} = params;
|
|
37
|
+
|
|
38
|
+
const computeFn = Fn( () => {
|
|
39
|
+
|
|
40
|
+
const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
|
|
41
|
+
const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
|
|
42
|
+
|
|
43
|
+
If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
|
|
44
|
+
|
|
45
|
+
const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
|
|
46
|
+
const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
|
|
47
|
+
const seed = pcgHash( { state: uint( pixelIndex ).add( uint( 1 ) ) } ).toVar();
|
|
48
|
+
|
|
49
|
+
// Center-pixel primary ray (no AA jitter — debug viz wants a stable, sharp image).
|
|
50
|
+
const screenPosition = pixelCoord.div( resolution ).mul( 2.0 ).sub( 1.0 ).toVar();
|
|
51
|
+
screenPosition.y.assign( screenPosition.y.negate() );
|
|
52
|
+
|
|
53
|
+
const ray = Ray.wrap( generateRayFromCamera(
|
|
54
|
+
screenPosition, seed,
|
|
55
|
+
cameraWorldMatrix, cameraProjectionMatrixInverse,
|
|
56
|
+
enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
|
|
57
|
+
) );
|
|
58
|
+
|
|
59
|
+
const color = vec4( 1.0, 0.0, 1.0, 1.0 ).toVar();
|
|
60
|
+
|
|
61
|
+
// Mode 9: visualize the stratified AA-jitter pattern (R,G = jitter).
|
|
62
|
+
If( visMode.equal( int( 9 ) ), () => {
|
|
63
|
+
|
|
64
|
+
// Use the real per-frame sample count so >1 SPP shows the stratified lattice (totalRays≤1 → plain random).
|
|
65
|
+
const jitter = getStratifiedSample( pixelCoord, int( 0 ), int( samplesPerPass ), seed, resolution, frame );
|
|
66
|
+
color.assign( vec4( jitter, 1.0, 1.0 ) );
|
|
67
|
+
|
|
68
|
+
} ).Else( () => {
|
|
69
|
+
|
|
70
|
+
// Modes 1-8, 10 — shared per-mode debug color (primary-ray trace + counters).
|
|
71
|
+
color.assign( TraceDebugMode(
|
|
72
|
+
ray.origin, ray.direction,
|
|
73
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
74
|
+
envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
|
|
75
|
+
visMode, debugVisScale,
|
|
76
|
+
pixelCoord, resolution,
|
|
77
|
+
albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
|
|
78
|
+
cameraProjectionMatrix, cameraViewMatrix,
|
|
79
|
+
frame,
|
|
80
|
+
) );
|
|
81
|
+
|
|
82
|
+
} );
|
|
83
|
+
|
|
84
|
+
const uintCoord = uvec2( uint( gx ), uint( gy ) );
|
|
85
|
+
textureStore( writeColorTex, uintCoord, color ).toWriteOnly();
|
|
86
|
+
// Benign MRT so the denoiser/display never read stale normal/albedo on a debug frame.
|
|
87
|
+
textureStore( writeNDTex, uintCoord, vec4( 0.5, 0.5, 1.0, 1.0 ) ).toWriteOnly();
|
|
88
|
+
textureStore( writeAlbedoTex, uintCoord, vec4( color.xyz, 1.0 ) ).toWriteOnly();
|
|
89
|
+
|
|
90
|
+
} );
|
|
91
|
+
|
|
92
|
+
} );
|
|
93
|
+
|
|
94
|
+
return computeFn;
|
|
95
|
+
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export { WG_SIZE as DEBUG_WG_SIZE };
|
package/src/TSL/Environment.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Fn, wgslFn, vec2, vec4, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
|
|
1
|
+
import { Fn, wgslFn, vec2, vec4, ivec2, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
|
|
2
2
|
|
|
3
3
|
import { REC709_LUMINANCE_COEFFICIENTS } from './Common.js';
|
|
4
4
|
|
|
@@ -74,7 +74,7 @@ export const sampleEquirect = Fn( ( [ environment, direction, environmentMatrix,
|
|
|
74
74
|
// Exact implementation from three-gpu-pathtracer
|
|
75
75
|
export const sampleEquirectProbability = Fn( ( [
|
|
76
76
|
environment,
|
|
77
|
-
|
|
77
|
+
envCDFTexture,
|
|
78
78
|
environmentMatrix,
|
|
79
79
|
environmentIntensity,
|
|
80
80
|
envTotalSum,
|
|
@@ -84,9 +84,8 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
84
84
|
colorOutput
|
|
85
85
|
] ) => {
|
|
86
86
|
|
|
87
|
-
//
|
|
88
|
-
|
|
89
|
-
const condOffset = int( envResolution.y ).toVar();
|
|
87
|
+
// CDF texture layout: (W+1)×H R32F — conditional[cy*W+cx] at texel (cx,cy); marginal[cy] at column W.
|
|
88
|
+
const cdfMarginalCol = int( envResolution.x ).toVar();
|
|
90
89
|
|
|
91
90
|
// Sample marginal CDF for V coordinate (1D, linear interpolation)
|
|
92
91
|
const marginalSize = envResolution.y;
|
|
@@ -94,7 +93,11 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
94
93
|
const mI0 = int( floor( mIdx ) );
|
|
95
94
|
const mI1 = min( mI0.add( 1 ), int( marginalSize ).sub( 1 ) );
|
|
96
95
|
const mFrac = fract( mIdx );
|
|
97
|
-
const v = mix(
|
|
96
|
+
const v = mix(
|
|
97
|
+
envCDFTexture.load( ivec2( cdfMarginalCol, mI0 ) ).x,
|
|
98
|
+
envCDFTexture.load( ivec2( cdfMarginalCol, mI1 ) ).x,
|
|
99
|
+
mFrac,
|
|
100
|
+
).toVar();
|
|
98
101
|
|
|
99
102
|
// Sample conditional CDF for U coordinate (2D grid, bilinear interpolation)
|
|
100
103
|
const condW = envResolution.x;
|
|
@@ -107,11 +110,10 @@ export const sampleEquirectProbability = Fn( ( [
|
|
|
107
110
|
const cy1 = min( cy0.add( 1 ), int( condH ).sub( 1 ) );
|
|
108
111
|
const fx = fract( cxf );
|
|
109
112
|
const fy = fract( cyf );
|
|
110
|
-
const
|
|
111
|
-
const
|
|
112
|
-
const
|
|
113
|
-
const
|
|
114
|
-
const v11 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx1 ) ) );
|
|
113
|
+
const v00 = envCDFTexture.load( ivec2( cx0, cy0 ) ).x;
|
|
114
|
+
const v10 = envCDFTexture.load( ivec2( cx1, cy0 ) ).x;
|
|
115
|
+
const v01 = envCDFTexture.load( ivec2( cx0, cy1 ) ).x;
|
|
116
|
+
const v11 = envCDFTexture.load( ivec2( cx1, cy1 ) ).x;
|
|
115
117
|
const u = mix( mix( v00, v10, fx ), mix( v01, v11, fx ), fy ).toVar();
|
|
116
118
|
|
|
117
119
|
const uv = vec2( u, v ).toVar();
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ExtendKernel.js — wavefront BVH traversal (256×1, 1D ray-parallel dispatch).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import {
|
|
6
|
+
Fn, uint,
|
|
7
|
+
If,
|
|
8
|
+
instanceIndex,
|
|
9
|
+
atomicLoad,
|
|
10
|
+
Return,
|
|
11
|
+
} from 'three/tsl';
|
|
12
|
+
|
|
13
|
+
import { traverseBVH } from './BVHTraversal.js';
|
|
14
|
+
import { Ray, HitInfo } from './Struct.js';
|
|
15
|
+
import {
|
|
16
|
+
readRayOrigin, readRayDirection, readMediumStack,
|
|
17
|
+
writeHitPacked,
|
|
18
|
+
} from '../Processor/PackedRayBuffer.js';
|
|
19
|
+
import { COUNTER } from '../Processor/QueueManager.js';
|
|
20
|
+
|
|
21
|
+
const WG_SIZE = 256;
|
|
22
|
+
|
|
23
|
+
export function buildExtendKernel( params ) {
|
|
24
|
+
|
|
25
|
+
const {
|
|
26
|
+
bvhBuffer, triangleBuffer, materialBuffer,
|
|
27
|
+
rayBufferRO,
|
|
28
|
+
hitBufferRW,
|
|
29
|
+
activeIndicesRO,
|
|
30
|
+
counters,
|
|
31
|
+
maxRayCount,
|
|
32
|
+
} = params;
|
|
33
|
+
|
|
34
|
+
const computeFn = Fn( () => {
|
|
35
|
+
|
|
36
|
+
const threadIdx = instanceIndex;
|
|
37
|
+
|
|
38
|
+
// kernels bound on ENTERING_COUNT so an over-sized (margin) dispatch is safe.
|
|
39
|
+
const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : maxRayCount;
|
|
40
|
+
If( threadIdx.greaterThanEqual( bound ), () => {
|
|
41
|
+
|
|
42
|
+
Return();
|
|
43
|
+
|
|
44
|
+
} );
|
|
45
|
+
|
|
46
|
+
const rayID = activeIndicesRO.element( threadIdx );
|
|
47
|
+
|
|
48
|
+
const origin = readRayOrigin( rayBufferRO, rayID ).toVar();
|
|
49
|
+
const direction = readRayDirection( rayBufferRO, rayID ).toVar();
|
|
50
|
+
|
|
51
|
+
const ray = Ray( { origin, direction } );
|
|
52
|
+
|
|
53
|
+
// insideMedium bypasses front/back culling so the ray can hit a glass/SSS back-facing boundary.
|
|
54
|
+
const insideMedium = readMediumStack( rayBufferRO, rayID ).stackDepth.greaterThan( uint( 0 ) );
|
|
55
|
+
const hitInfo = HitInfo.wrap( traverseBVH(
|
|
56
|
+
ray, bvhBuffer, triangleBuffer, insideMedium,
|
|
57
|
+
) ).toVar();
|
|
58
|
+
|
|
59
|
+
writeHitPacked(
|
|
60
|
+
hitBufferRW, rayID,
|
|
61
|
+
hitInfo.dst,
|
|
62
|
+
uint( hitInfo.triangleIndex ),
|
|
63
|
+
hitInfo.uv.x, hitInfo.uv.y,
|
|
64
|
+
hitInfo.normal,
|
|
65
|
+
uint( hitInfo.materialIndex ),
|
|
66
|
+
uint( hitInfo.meshIndex ),
|
|
67
|
+
);
|
|
68
|
+
|
|
69
|
+
} );
|
|
70
|
+
|
|
71
|
+
return computeFn;
|
|
72
|
+
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export { WG_SIZE as EXTEND_WG_SIZE };
|