npm - rayzee - Versions diffs - 6.5.0 → 7.0.0 - Mend

rayzee 6.5.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/README.md +24 -5
package/dist/rayzee.es.js +7554 -7014
package/dist/rayzee.es.js.map +1 -1
package/dist/rayzee.umd.js +157 -236
package/dist/rayzee.umd.js.map +1 -1
package/package.json +1 -1
package/src/EngineDefaults.js +12 -9
package/src/PathTracerApp.js +118 -26
package/src/Pipeline/PipelineContext.js +1 -2
package/src/Pipeline/RenderPipeline.js +1 -1
package/src/Pipeline/RenderStage.js +1 -1
package/src/Processor/CameraOptimizer.js +0 -5
package/src/Processor/GeometryExtractor.js +6 -0
package/src/Processor/KernelManager.js +277 -0
package/src/Processor/PackedRayBuffer.js +265 -0
package/src/Processor/QueueManager.js +173 -0
package/src/Processor/SceneProcessor.js +1 -0
package/src/Processor/ShaderBuilder.js +11 -317
package/src/Processor/StorageTexturePool.js +29 -15
package/src/Processor/VRAMTracker.js +169 -0
package/src/Processor/utils.js +11 -110
package/src/RenderSettings.js +0 -3
package/src/Stages/ASVGF.js +76 -20
package/src/Stages/BilateralFilter.js +34 -10
package/src/Stages/EdgeFilter.js +2 -3
package/src/Stages/MotionVector.js +16 -9
package/src/Stages/NormalDepth.js +17 -5
package/src/Stages/PathTracer.js +671 -1456
package/src/Stages/PathTracerStage.js +1451 -0
package/src/Stages/SSRC.js +32 -15
package/src/Stages/Variance.js +35 -12
package/src/TSL/CompactKernel.js +110 -0
package/src/TSL/DebugKernel.js +98 -0
package/src/TSL/Environment.js +13 -11
package/src/TSL/ExtendKernel.js +75 -0
package/src/TSL/FinalWriteKernel.js +121 -0
package/src/TSL/GenerateKernel.js +109 -0
package/src/TSL/LightsSampling.js +2 -2
package/src/TSL/PathTracerCore.js +43 -1039
package/src/TSL/ShadeKernel.js +873 -0
package/src/TSL/patches.js +81 -4
package/src/index.js +3 -0
package/src/managers/CameraManager.js +1 -1
package/src/managers/DenoisingManager.js +40 -75
package/src/managers/EnvironmentManager.js +30 -39
package/src/managers/OverlayManager.js +7 -22
package/src/managers/UniformManager.js +0 -3
package/src/managers/helpers/TileHelper.js +2 -2
package/src/Stages/AdaptiveSampling.js +0 -483
package/src/TSL/PathTracer.js +0 -384
package/src/managers/TileManager.js +0 -298

package/src/Stages/SSRC.js CHANGED Viewed

@@ -12,10 +12,11 @@
 // Textures read:       pathtracer:color, pathtracer:normalDepth, motionVector:screenSpace
 import { uniform } from 'three/tsl';
-import { StorageTexture, TextureNode } from 'three/webgpu';
-import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter } from 'three';
+import { StorageTexture, TextureNode, RenderTarget } from 'three/webgpu';
+import { HalfFloatType, RGBAFormat, NearestFilter, LinearFilter, Box2, Vector2 } from 'three';
 import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
 import { buildTemporalPass, buildSpatialPass } from '../TSL/SSRC.js';
+import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
 export class SSRC extends RenderStage {
@@ -51,18 +52,30 @@ export class SSRC extends RenderStage {
 		this._readPass1CacheTexNode = new TextureNode(); // current cache (for spatial pass)
 		// ─── StorageTextures (5 total) ───
-		const w = 1, h = 1; // resized on first render
+		// StorageTextures stay at max alloc — see resize crash fix (three.js #33061).
+		const w = 1, h = 1; // RTs/uniforms resized on first render
 		// Ping-pong temporal cache: .rgb = radiance, .w = history count
-		this._cacheTexA = this._createStorageTex( w, h, NearestFilter );
-		this._cacheTexB = this._createStorageTex( w, h, NearestFilter );
+		this._cacheTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
+		this._cacheTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
 		// Ping-pong previous-frame normalDepth (for edge-stopping in temporal pass)
-		this._prevNDTexA = this._createStorageTex( w, h, NearestFilter );
-		this._prevNDTexB = this._createStorageTex( w, h, NearestFilter );
+		this._prevNDTexA = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
+		this._prevNDTexB = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, NearestFilter );
 		// Final output (LinearFilter for Display fragment shader sampling)
-		this._outputTex = this._createStorageTex( w, h, LinearFilter );
+		this._outputTex = this._createStorageTex( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE, LinearFilter );
+		// Active-region copy target — published downstream (storage tex is over-allocated)
+		this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
+		this.outputTarget = new RenderTarget( w, h, {
+			type: HalfFloatType,
+			format: RGBAFormat,
+			minFilter: LinearFilter,
+			magFilter: LinearFilter,
+			depthBuffer: false,
+			stencilBuffer: false
+		} );
 		// ─── State ───
 		this._currentPingPong = 0; // 0: read B, write A; 1: read A, write B
@@ -99,7 +112,7 @@ export class SSRC extends RenderStage {
 		if ( colorTex?.image ) {
 			const { width, height } = colorTex.image;
-			if ( width !== this._cacheTexA.image.width || height !== this._cacheTexA.image.height ) {
+			if ( width !== this.outputTarget.width || height !== this.outputTarget.height ) {
 				this.setSize( width, height );
@@ -145,8 +158,13 @@ export class SSRC extends RenderStage {
 		// Advance frames-since-reset counter (capped to avoid overflow)
 		this._framesSinceReset.value = Math.min( this._framesSinceReset.value + 1, 9999 );
+		// Copy active region out of the over-allocated StorageTexture into the
+		// right-sized RenderTarget; downstream stages UV-sample the latter.
+		this._srcRegion.max.set( this.outputTarget.width, this.outputTarget.height );
+		this.renderer.copyTextureToTexture( this._outputTex, this.outputTarget.texture, this._srcRegion );
 		// Publish final output
-		context.setTexture( 'ssrc:output', this._outputTex );
+		context.setTexture( 'ssrc:output', this.outputTarget.texture );
 		// Advance ping-pong
 		this._currentPingPong = 1 - this._currentPingPong;
@@ -163,11 +181,9 @@ export class SSRC extends RenderStage {
 		if ( width < 1 || height < 1 ) return;
-		this._cacheTexA.setSize( width, height );
-		this._cacheTexB.setSize( width, height );
-		this._prevNDTexA.setSize( width, height );
-		this._prevNDTexB.setSize( width, height );
-		this._outputTex.setSize( width, height );
+		// StorageTextures stay at their max allocation (see constructor).
+		this.outputTarget.setSize( width, height );
+		this.outputTarget.texture.needsUpdate = true;
 		this.resW.value = width;
 		this.resH.value = height;
@@ -194,6 +210,7 @@ export class SSRC extends RenderStage {
 		this._prevNDTexA.dispose();
 		this._prevNDTexB.dispose();
 		this._outputTex.dispose();
+		this.outputTarget?.dispose();
 		this._colorTexNode?.dispose();
 		this._ndTexNode?.dispose();
 		this._motionTexNode?.dispose();

package/src/Stages/Variance.js CHANGED Viewed

@@ -1,9 +1,10 @@
 import { Fn, wgslFn, float, int, uint, ivec2, uvec2, uniform, If, max,
 	textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
-import { TextureNode, StorageTexture } from 'three/webgpu';
-import { FloatType, RGBAFormat, LinearFilter } from 'three';
+import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
+import { FloatType, RGBAFormat, LinearFilter, Box2, Vector2 } from 'three';
 import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
 import { luminance } from '../TSL/Common.js';
+import { MAX_STORAGE_TEXTURE_SIZE } from '../EngineDefaults.js';
 // ── wgslFn helpers ──────────────────────────────────────────
@@ -40,8 +41,7 @@ const temporalAccumulate = /*@__PURE__*/ wgslFn( `
  * WebGPU Variance Estimation Stage (Compute Shader)
  *
  * Computes temporal and spatial variance from the path tracer output.
- * Used by AdaptiveSampling for sampling guidance and by
- * BilateralFilter for variance-guided filtering.
+ * Used by BilateralFilter for variance-guided filtering.
  *
  * Uses compute shader with workgroup shared memory for the 3×3
  * spatial variance computation. Each 8×8 workgroup loads a 10×10
@@ -98,18 +98,34 @@ export class Variance extends RenderStage {
 		// FloatType (f32) required — HalfFloat's ~3.3 decimal digits cause catastrophic
 		// cancellation in (meanSq - mean²) for converged pixels, producing a variance
 		// floor of ~0.0001 that the (frame+1)² scaling amplifies to enormous values.
-		this._storageTexA = new StorageTexture( w, h );
+		// StorageTextures over-allocated at max — defensive against three.js #33061
+		// (TSL compute pipeline keeps stale GPUTextureView after StorageTexture.setSize).
+		this._storageTexA = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
 		this._storageTexA.type = FloatType;
 		this._storageTexA.format = RGBAFormat;
 		this._storageTexA.minFilter = LinearFilter;
 		this._storageTexA.magFilter = LinearFilter;
-		this._storageTexB = new StorageTexture( w, h );
+		this._storageTexB = new StorageTexture( MAX_STORAGE_TEXTURE_SIZE, MAX_STORAGE_TEXTURE_SIZE );
 		this._storageTexB.type = FloatType;
 		this._storageTexB.format = RGBAFormat;
 		this._storageTexB.minFilter = LinearFilter;
 		this._storageTexB.magFilter = LinearFilter;
+		this._srcRegion = new Box2( new Vector2( 0, 0 ), new Vector2( 0, 0 ) );
+		// Right-sized RenderTarget published to downstream (UV-sampled). The
+		// over-allocated StorageTexture itself must not be published — UV
+		// sampling a 2048 texture would read the wrong region.
+		this._outputTarget = new RenderTarget( w, h, {
+			type: FloatType,
+			format: RGBAFormat,
+			minFilter: LinearFilter,
+			magFilter: LinearFilter,
+			depthBuffer: false,
+			stencilBuffer: false
+		} );
 		this.currentMoments = 0; // 0 = write A, read B; 1 = write B, read A
 		this._compiled = false;
 		this._needsWarmReset = false;
@@ -275,8 +291,8 @@ export class Variance extends RenderStage {
 		const img = colorTex.image;
 		if ( img && img.width > 0 && img.height > 0 ) {
-			if ( img.width !== this._storageTexA.image.width ||
-				img.height !== this._storageTexA.image.height ) {
+			if ( img.width !== this._outputTarget.width ||
+				img.height !== this._outputTarget.height ) {
 				this.setSize( img.width, img.height );
@@ -333,8 +349,13 @@ export class Variance extends RenderStage {
 		// Swap for next frame
 		this.currentMoments = 1 - this.currentMoments;
-		// Publish (StorageTexture works as regular Texture for downstream sampling)
-		context.setTexture( 'variance:output', writeTarget );
+		// Copy the active region out of the over-allocated StorageTexture into the
+		// right-sized RenderTarget; downstream stages UV-sample the latter.
+		this._srcRegion.max.set( this._outputTarget.width, this._outputTarget.height );
+		this.renderer.copyTextureToTexture( writeTarget, this._outputTarget.texture, this._srcRegion );
+		// Publish the RenderTarget (not the over-allocated StorageTexture)
+		context.setTexture( 'variance:output', this._outputTarget.texture );
 	}
@@ -352,8 +373,9 @@ export class Variance extends RenderStage {
 	setSize( width, height ) {
-		this._storageTexA.setSize( width, height );
-		this._storageTexB.setSize( width, height );
+		// StorageTextures stay at their max allocation (see constructor).
+		this._outputTarget.setSize( width, height );
+		this._outputTarget.texture.needsUpdate = true;
 		this.resW.value = width;
 		this.resH.value = height;
@@ -371,6 +393,7 @@ export class Variance extends RenderStage {
 		this._computeNodeB?.dispose();
 		this._storageTexA?.dispose();
 		this._storageTexB?.dispose();
+		this._outputTarget?.dispose();
 		this._colorTexNode?.dispose();
 		this._readTexNodeA?.dispose();
 		this._readTexNodeB?.dispose();

package/src/TSL/CompactKernel.js ADDED Viewed

@@ -0,0 +1,110 @@
+/**
+ * CompactKernel.js — wavefront stream compaction: active rays → dense index array for next bounce (256×1, 1D).
+ */
+import {
+	Fn, uint, select,
+	If,
+	instanceIndex,
+	atomicAdd, atomicLoad,
+	subgroupExclusiveAdd, subgroupAdd, subgroupBroadcast,
+	Return,
+} from 'three/tsl';
+import { readRayBounceFlags } from '../Processor/PackedRayBuffer.js';
+import { RAY_FLAG, COUNTER } from '../Processor/QueueManager.js';
+const WG_SIZE = 256;
+export function buildCompactKernel( params ) {
+	const {
+		rayBufferRO,
+		activeIndicesReadRO,
+		activeIndicesWriteRW,
+		counters,
+		currentActiveCount,
+	} = params;
+	const computeFn = Fn( () => {
+		const threadIdx = instanceIndex;
+		// ACTIVE_RAY_COUNT is zeroed before compact, so the dense-list length comes from ENTERING_COUNT.
+		const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
+		If( threadIdx.greaterThanEqual( bound ), () => {
+			Return();
+		} );
+		const rayID = activeIndicesReadRO.element( threadIdx );
+		const flags = readRayBounceFlags( rayBufferRO, rayID );
+		If( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ), () => {
+			const writeIdx = atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), uint( 1 ) );
+			activeIndicesWriteRW.element( writeIdx ).assign( rayID );
+		} );
+	} );
+	return computeFn;
+}
+/**
+ * Subgroup prefix-sum compaction: one global atomicAdd per subgroup instead of per survivor.
+ * Requires renderer.hasFeature('subgroups'); control flow must stay uniform (no divergent Return).
+ */
+export function buildCompactSubgroupKernel( params ) {
+	const {
+		rayBufferRO,
+		activeIndicesReadRO,
+		activeIndicesWriteRW,
+		counters,
+		currentActiveCount,
+	} = params;
+	const computeFn = Fn( () => {
+		const threadIdx = instanceIndex;
+		const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : currentActiveCount;
+		// No early Return: all lanes must reach the subgroup ops; out-of-range lanes contribute 0 and read stale-but-in-capacity slots.
+		const inRange = threadIdx.lessThan( bound );
+		const rayID = activeIndicesReadRO.element( threadIdx );
+		const flags = readRayBounceFlags( rayBufferRO, rayID );
+		const isActive = inRange.and( flags.bitAnd( uint( RAY_FLAG.ACTIVE ) ).notEqual( uint( 0 ) ) );
+		const activeU = select( isActive, uint( 1 ), uint( 0 ) );
+		// .toVar() materializes the subgroup ops at uniform control flow; inlining into the divergent If(isActive) write is rejected by WGSL.
+		const localOffset = subgroupExclusiveAdd( activeU ).toVar();
+		const sgCount = subgroupAdd( activeU ).toVar();
+		// laneId via exclusiveAdd(1) since TSL lacks subgroup_invocation_id; lane 0 does the single per-subgroup atomicAdd.
+		const laneId = subgroupExclusiveAdd( uint( 1 ) ).toVar();
+		const base = uint( 0 ).toVar();
+		If( laneId.equal( uint( 0 ) ), () => {
+			base.assign( atomicAdd( counters.element( uint( COUNTER.ACTIVE_RAY_COUNT ) ), sgCount ) );
+		} );
+		const sgBase = subgroupBroadcast( base, uint( 0 ) ).toVar();
+		If( isActive, () => {
+			activeIndicesWriteRW.element( sgBase.add( localOffset ) ).assign( rayID );
+		} );
+	} );
+	return computeFn;
+}
+export { WG_SIZE as COMPACT_WG_SIZE };

package/src/TSL/DebugKernel.js ADDED Viewed

@@ -0,0 +1,98 @@
+/**
+ * DebugKernel.js — wavefront debug visualization (16×16, 2D screen-space dispatch).
+ *
+ * Single-pass primary-ray debug viz for visMode 1-10 (mode 11 = NaN/Inf is a FinalWrite
+ * post-branch on the accumulated color, handled there). Generates a camera ray per pixel and
+ * delegates to the renderer-agnostic TraceDebugMode for the per-mode color; mode 9 (stratified
+ * sample pattern) is computed inline. Writes the color directly to the output (no accumulation).
+ */
+import {
+	Fn, float, vec2, vec4, int, uint, uvec2,
+	If, textureStore,
+	localId, workgroupId,
+} from 'three/tsl';
+import { generateRayFromCamera } from './BVHTraversal.js';
+import { Ray } from './Struct.js';
+import { TraceDebugMode } from './Debugger.js';
+import { pcgHash, getStratifiedSample } from './Random.js';
+const WG_SIZE = 16;
+export function buildDebugKernel( params ) {
+	const {
+		writeColorTex, writeNDTex, writeAlbedoTex,
+		resolution, renderWidth, renderHeight,
+		cameraWorldMatrix, cameraProjectionMatrixInverse, cameraProjectionMatrix, cameraViewMatrix,
+		enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
+		bvhBuffer, triangleBuffer, materialBuffer,
+		envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
+		visMode, debugVisScale,
+		albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
+		frame,
+		samplesPerPass = 1,
+	} = params;
+	const computeFn = Fn( () => {
+		const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
+		const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
+		If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
+			const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
+			const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
+			const seed = pcgHash( { state: uint( pixelIndex ).add( uint( 1 ) ) } ).toVar();
+			// Center-pixel primary ray (no AA jitter — debug viz wants a stable, sharp image).
+			const screenPosition = pixelCoord.div( resolution ).mul( 2.0 ).sub( 1.0 ).toVar();
+			screenPosition.y.assign( screenPosition.y.negate() );
+			const ray = Ray.wrap( generateRayFromCamera(
+				screenPosition, seed,
+				cameraWorldMatrix, cameraProjectionMatrixInverse,
+				enableDOF, focalLength, aperture, focusDistance, sceneScale, apertureScale, anamorphicRatio,
+			) );
+			const color = vec4( 1.0, 0.0, 1.0, 1.0 ).toVar();
+			// Mode 9: visualize the stratified AA-jitter pattern (R,G = jitter).
+			If( visMode.equal( int( 9 ) ), () => {
+				// Use the real per-frame sample count so >1 SPP shows the stratified lattice (totalRays≤1 → plain random).
+				const jitter = getStratifiedSample( pixelCoord, int( 0 ), int( samplesPerPass ), seed, resolution, frame );
+				color.assign( vec4( jitter, 1.0, 1.0 ) );
+			} ).Else( () => {
+				// Modes 1-8, 10 — shared per-mode debug color (primary-ray trace + counters).
+				color.assign( TraceDebugMode(
+					ray.origin, ray.direction,
+					bvhBuffer, triangleBuffer, materialBuffer,
+					envTexture, environmentMatrix, environmentIntensity, enableEnvironmentLight,
+					visMode, debugVisScale,
+					pixelCoord, resolution,
+					albedoMaps, normalMaps, bumpMaps, metalnessMaps, roughnessMaps, emissiveMaps,
+					cameraProjectionMatrix, cameraViewMatrix,
+					frame,
+				) );
+			} );
+			const uintCoord = uvec2( uint( gx ), uint( gy ) );
+			textureStore( writeColorTex, uintCoord, color ).toWriteOnly();
+			// Benign MRT so the denoiser/display never read stale normal/albedo on a debug frame.
+			textureStore( writeNDTex, uintCoord, vec4( 0.5, 0.5, 1.0, 1.0 ) ).toWriteOnly();
+			textureStore( writeAlbedoTex, uintCoord, vec4( color.xyz, 1.0 ) ).toWriteOnly();
+		} );
+	} );
+	return computeFn;
+}
+export { WG_SIZE as DEBUG_WG_SIZE };

package/src/TSL/Environment.js CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Fn, wgslFn, vec2, vec4, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
+import { Fn, wgslFn, vec2, vec4, ivec2, float, int, If, texture, dot, sin, sqrt, floor, fract, min, max, mix, clamp } from 'three/tsl';
 import { REC709_LUMINANCE_COEFFICIENTS } from './Common.js';
@@ -74,7 +74,7 @@ export const sampleEquirect = Fn( ( [ environment, direction, environmentMatrix,
 // Exact implementation from three-gpu-pathtracer
 export const sampleEquirectProbability = Fn( ( [
 	environment,
-	envCDFBuffer,
+	envCDFTexture,
 	environmentMatrix,
 	environmentIntensity,
 	envTotalSum,
@@ -84,9 +84,8 @@ export const sampleEquirectProbability = Fn( ( [
 	colorOutput
 ] ) => {
-	// Packed CDF layout: [marginal (envResolution.y floats) | conditional (envResolution.x * envResolution.y floats)]
-	// The conditional offset equals the marginal length, which is envResolution.y.
-	const condOffset = int( envResolution.y ).toVar();
+	// CDF texture layout: (W+1)×H R32F — conditional[cy*W+cx] at texel (cx,cy); marginal[cy] at column W.
+	const cdfMarginalCol = int( envResolution.x ).toVar();
 	// Sample marginal CDF for V coordinate (1D, linear interpolation)
 	const marginalSize = envResolution.y;
@@ -94,7 +93,11 @@ export const sampleEquirectProbability = Fn( ( [
 	const mI0 = int( floor( mIdx ) );
 	const mI1 = min( mI0.add( 1 ), int( marginalSize ).sub( 1 ) );
 	const mFrac = fract( mIdx );
-	const v = mix( envCDFBuffer.element( mI0 ), envCDFBuffer.element( mI1 ), mFrac ).toVar();
+	const v = mix(
+		envCDFTexture.load( ivec2( cdfMarginalCol, mI0 ) ).x,
+		envCDFTexture.load( ivec2( cdfMarginalCol, mI1 ) ).x,
+		mFrac,
+	).toVar();
 	// Sample conditional CDF for U coordinate (2D grid, bilinear interpolation)
 	const condW = envResolution.x;
@@ -107,11 +110,10 @@ export const sampleEquirectProbability = Fn( ( [
 	const cy1 = min( cy0.add( 1 ), int( condH ).sub( 1 ) );
 	const fx = fract( cxf );
 	const fy = fract( cyf );
-	const condWi = int( condW );
-	const v00 = envCDFBuffer.element( condOffset.add( cy0.mul( condWi ).add( cx0 ) ) );
-	const v10 = envCDFBuffer.element( condOffset.add( cy0.mul( condWi ).add( cx1 ) ) );
-	const v01 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx0 ) ) );
-	const v11 = envCDFBuffer.element( condOffset.add( cy1.mul( condWi ).add( cx1 ) ) );
+	const v00 = envCDFTexture.load( ivec2( cx0, cy0 ) ).x;
+	const v10 = envCDFTexture.load( ivec2( cx1, cy0 ) ).x;
+	const v01 = envCDFTexture.load( ivec2( cx0, cy1 ) ).x;
+	const v11 = envCDFTexture.load( ivec2( cx1, cy1 ) ).x;
 	const u = mix( mix( v00, v10, fx ), mix( v01, v11, fx ), fy ).toVar();
 	const uv = vec2( u, v ).toVar();

package/src/TSL/ExtendKernel.js ADDED Viewed

@@ -0,0 +1,75 @@
+/**
+ * ExtendKernel.js — wavefront BVH traversal (256×1, 1D ray-parallel dispatch).
+ */
+import {
+	Fn, uint,
+	If,
+	instanceIndex,
+	atomicLoad,
+	Return,
+} from 'three/tsl';
+import { traverseBVH } from './BVHTraversal.js';
+import { Ray, HitInfo } from './Struct.js';
+import {
+	readRayOrigin, readRayDirection, readMediumStack,
+	writeHitPacked,
+} from '../Processor/PackedRayBuffer.js';
+import { COUNTER } from '../Processor/QueueManager.js';
+const WG_SIZE = 256;
+export function buildExtendKernel( params ) {
+	const {
+		bvhBuffer, triangleBuffer, materialBuffer,
+		rayBufferRO,
+		hitBufferRW,
+		activeIndicesRO,
+		counters,
+		maxRayCount,
+	} = params;
+	const computeFn = Fn( () => {
+		const threadIdx = instanceIndex;
+		// kernels bound on ENTERING_COUNT so an over-sized (margin) dispatch is safe.
+		const bound = counters ? atomicLoad( counters.element( uint( COUNTER.ENTERING_COUNT ) ) ) : maxRayCount;
+		If( threadIdx.greaterThanEqual( bound ), () => {
+			Return();
+		} );
+		const rayID = activeIndicesRO.element( threadIdx );
+		const origin = readRayOrigin( rayBufferRO, rayID ).toVar();
+		const direction = readRayDirection( rayBufferRO, rayID ).toVar();
+		const ray = Ray( { origin, direction } );
+		// insideMedium bypasses front/back culling so the ray can hit a glass/SSS back-facing boundary.
+		const insideMedium = readMediumStack( rayBufferRO, rayID ).stackDepth.greaterThan( uint( 0 ) );
+		const hitInfo = HitInfo.wrap( traverseBVH(
+			ray, bvhBuffer, triangleBuffer, insideMedium,
+		) ).toVar();
+		writeHitPacked(
+			hitBufferRW, rayID,
+			hitInfo.dst,
+			uint( hitInfo.triangleIndex ),
+			hitInfo.uv.x, hitInfo.uv.y,
+			hitInfo.normal,
+			uint( hitInfo.materialIndex ),
+			uint( hitInfo.meshIndex ),
+		);
+	} );
+	return computeFn;
+}
+export { WG_SIZE as EXTEND_WG_SIZE };

package/src/TSL/FinalWriteKernel.js ADDED Viewed

@@ -0,0 +1,121 @@
+/**
+ * FinalWriteKernel.js — wavefront final output: temporal accumulation + MRT StorageTexture writes (16×16, 2D).
+ */
+import {
+	Fn, wgslFn, float, vec2, vec4, int, uint, uvec2,
+	If, mix, select, texture, textureStore,
+	localId, workgroupId,
+} from 'three/tsl';
+import {
+	readRayRadiance, readGBuffer, gbDecodeNormalDepth, gbDecodeAlbedo,
+} from '../Processor/PackedRayBuffer.js';
+const WG_SIZE = 16;
+// Debug mode 11: NaN/Inf detector — red where the accumulated color is NaN/Inf, black elsewhere.
+const nanInfToRed = /*@__PURE__*/ wgslFn( `
+	fn nanInfToRed( c: vec3f ) -> vec3f {
+		let isNan = c.x != c.x || c.y != c.y || c.z != c.z;
+		let isInf = abs( c.x ) > 1e30f || abs( c.y ) > 1e30f || abs( c.z ) > 1e30f;
+		if ( isNan || isInf ) { return vec3f( 1.0f, 0.0f, 0.0f ); }
+		return vec3f( 0.0f );
+	}
+` );
+export function buildFinalWriteKernel( params ) {
+	const {
+		rayBufferRO, gBufferRO,
+		writeColorTex, writeNDTex, writeAlbedoTex,
+		resolution, frame,
+		enableAccumulation, hasPreviousAccumulated, accumulationAlpha, cameraIsMoving,
+		transparentBackground,
+		prevAccumTexture, prevNormalDepthTexture, prevAlbedoTexture,
+		renderWidth, renderHeight,
+		// Multi-sample: average S sample-slots per pixel (slot = pixel + k*w*h, w*h from the resolution uniform).
+		samplesPerPass = 1,
+		visMode,
+	} = params;
+	const S = samplesPerPass | 0;
+	const computeFn = Fn( () => {
+		const gx = int( workgroupId.x ).mul( WG_SIZE ).add( int( localId.x ) );
+		const gy = int( workgroupId.y ).mul( WG_SIZE ).add( int( localId.y ) );
+		If( gx.lessThan( renderWidth ).and( gy.lessThan( renderHeight ) ), () => {
+			const pixelIndex = gy.mul( int( resolution.x ) ).add( gx );
+			const rayID = uint( pixelIndex );
+			// Average the S sub-samples; MRT (normal/depth/albedo) from sub-sample 0.
+			const sampleColor = ( () => {
+				if ( S <= 1 ) return readRayRadiance( rayBufferRO, rayID );
+				const acc = readRayRadiance( rayBufferRO, rayID ).toVar();
+				const mrps = uint( resolution.x ).mul( uint( resolution.y ) ).toVar(); // w*h from the resolution uniform, not baked
+				for ( let k = 1; k < S; k ++ ) {
+					acc.addAssign( readRayRadiance( rayBufferRO, rayID.add( uint( k ).mul( mrps ) ) ) );
+				}
+				acc.assign( acc.div( float( S ) ) );
+				return acc;
+			} )();
+			// MRT comes from the per-pixel G-buffer (rayID == pixelIndex here, i.e. sub-sample 0). Half-packed: decode.
+			const gbuf = readGBuffer( gBufferRO, rayID );
+			const normalDepth = gbDecodeNormalDepth( gbuf );
+			const albedoID = vec4( gbDecodeAlbedo( gbuf ), 0.0 );
+			const finalColor = sampleColor.xyz.toVar();
+			const finalNormalDepth = normalDepth.toVar();
+			const finalAlbedo = albedoID.xyz.toVar();
+			const outputAlpha = select( transparentBackground, sampleColor.w, float( 1.0 ) ).toVar();
+			const pixelCoord = vec2( float( gx ).add( 0.5 ), float( gy ).add( 0.5 ) );
+			const prevUV = pixelCoord.div( resolution );
+			// visMode 11 (NaN/Inf) bypasses accumulation (megakernel parity main_TSL_PathTracer.js:355) so the
+			// detector runs on each frame's fresh color — else mix() propagates a transient NaN and it stays red forever.
+			If( enableAccumulation.and( cameraIsMoving.not() ).and( frame.greaterThan( uint( 0 ) ) ).and( hasPreviousAccumulated ).and( visMode.notEqual( int( 11 ) ) ), () => {
+				const prevAccumSample = texture( prevAccumTexture, prevUV, 0 ).toVar();
+				finalColor.assign( mix( prevAccumSample.xyz, sampleColor.xyz, accumulationAlpha ) );
+				finalNormalDepth.assign( mix( texture( prevNormalDepthTexture, prevUV, 0 ), finalNormalDepth, accumulationAlpha ) );
+				finalAlbedo.assign( mix( texture( prevAlbedoTexture, prevUV, 0 ).xyz, finalAlbedo, accumulationAlpha ) );
+				If( transparentBackground, () => {
+					outputAlpha.assign( mix( prevAccumSample.w, sampleColor.w, accumulationAlpha ) );
+				} );
+			} );
+			// Debug mode 11: flag NaN/Inf on the accumulated color (red on NaN/Inf, black elsewhere).
+			If( visMode.equal( int( 11 ) ), () => {
+				finalColor.assign( nanInfToRed( finalColor ) );
+			} );
+			const uintCoord = uvec2( uint( gx ), uint( gy ) );
+			textureStore( writeColorTex, uintCoord, vec4( finalColor, outputAlpha ) ).toWriteOnly();
+			textureStore( writeNDTex, uintCoord, finalNormalDepth ).toWriteOnly();
+			textureStore( writeAlbedoTex, uintCoord, vec4( finalAlbedo, 1.0 ) ).toWriteOnly();
+		} );
+	} );
+	return computeFn;
+}
+export { WG_SIZE as FINALWRITE_WG_SIZE };