rayzee 5.3.7 → 5.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  import { Fn, wgslFn, vec4, float, int, uint, ivec2, uvec2, uniform, If, max,
2
- textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
3
- import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
2
+ textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId,
3
+ attributeArray } from 'three/tsl';
4
+ import { RenderTarget, TextureNode, StorageTexture, ReadbackBuffer } from 'three/webgpu';
4
5
  import { FloatType, RGBAFormat, NearestFilter } from 'three';
5
6
  import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
6
7
  import { luminance } from '../TSL/Common.js';
@@ -54,8 +55,11 @@ const adaptExposure = /*@__PURE__*/ wgslFn( `
54
55
  * 2. Reduction (compute): parallel reduction 64×64 → 1×1 via shared memory
55
56
  * Single workgroup of 256 threads, each loads 16 texels.
56
57
  * Computes geometric mean: exp(Σlog(L) / N)
57
- * 3. Adaptation (compute): temporal smoothing with prev exposure
58
- * 4. Async readback (1×1): apply to renderer.toneMappingExposure
58
+ * 3. Adaptation (compute): temporal smoothing with prev exposure; writes
59
+ * vec4(exposure, luminance, targetExposure, 1) into a 1-element storage buffer.
60
+ * 4. Async readback via `renderer.getArrayBufferAsync(attr, ReadbackBuffer)`:
61
+ * the ReadbackBuffer pools its staging GPUBuffer across frames, avoiding
62
+ * per-frame allocation churn. Apply to renderer.toneMappingExposure.
59
63
  *
60
64
  * WebGPU advantage: async readback (no GPU pipeline stall).
61
65
  * 1-frame delay is imperceptible for slowly-changing exposure.
@@ -156,15 +160,12 @@ export class AutoExposure extends RenderStage {
156
160
  // from StorageTexture return zeros — must copy to RenderTarget first)
157
161
  this._reductionReadTarget = new RenderTarget( 1, 1, rtOpts );
158
162
 
159
- // Adaptation StorageTexture (1) compute writes here
160
- this._adaptationStorageTex = new StorageTexture( 1, 1 );
161
- this._adaptationStorageTex.type = FloatType;
162
- this._adaptationStorageTex.format = RGBAFormat;
163
- this._adaptationStorageTex.minFilter = NearestFilter;
164
- this._adaptationStorageTex.magFilter = NearestFilter;
165
-
166
- // Adaptation target (1×1) — readable copy for async readback
167
- this._adaptationTarget = new RenderTarget( 1, 1, rtOpts );
163
+ // Adaptation result vec4 storage buffer attribute. Compute writes
164
+ // vec4(exposure, luminance, targetExposure, 1) here; CPU reads via
165
+ // getArrayBufferAsync + a pooled ReadbackBuffer (16 bytes).
166
+ this._adaptationResult = attributeArray( 1, 'vec4' );
167
+ this._readbackBuffer = new ReadbackBuffer( 16 );
168
+ this._readbackBuffer.name = 'AutoExposureAdaptation';
168
169
 
169
170
  }
170
171
 
@@ -351,16 +352,15 @@ export class AutoExposure extends RenderStage {
351
352
  * Adaptation (compute): temporal smoothing
352
353
  *
353
354
  * Single-thread compute dispatch [1, 1, 1], workgroup [1, 1, 1].
354
- * Reads geometric mean from reduction RenderTarget (copied from StorageTexture),
355
- * reads previous adaptation from adaptation RenderTarget,
356
- * applies asymmetric temporal smoothing, writes to adaptation StorageTexture.
357
- *
358
- * Output: R = exposure, G = luminance, B = targetExposure, A = 1
355
+ * Reads geometric mean from reduction RenderTarget, applies asymmetric
356
+ * temporal smoothing using the previous-exposure uniform, and writes
357
+ * vec4(exposure, luminance, targetExposure, 1) into a 1-element storage
358
+ * buffer which the CPU reads via getArrayBufferAsync + ReadbackBuffer.
359
359
  */
360
360
  _buildAdaptationCompute() {
361
361
 
362
362
  const reductionTex = this._reductionReadTexNode;
363
- const outputTex = this._adaptationStorageTex;
363
+ const resultBuf = this._adaptationResult;
364
364
  const keyValue = this.keyValueU;
365
365
  const minExp = this.minExposureU;
366
366
  const maxExp = this.maxExposureU;
@@ -381,11 +381,7 @@ export class AutoExposure extends RenderStage {
381
381
  dt, isFirst
382
382
  );
383
383
 
384
- textureStore(
385
- outputTex,
386
- uvec2( uint( 0 ), uint( 0 ) ),
387
- result
388
- ).toWriteOnly();
384
+ resultBuf.element( uint( 0 ) ).assign( result );
389
385
 
390
386
  } );
391
387
 
@@ -463,22 +459,31 @@ export class AutoExposure extends RenderStage {
463
459
 
464
460
  this._reductionReadTexNode.value = this._reductionReadTarget.texture;
465
461
  this.renderer.compute( this._adaptationComputeNode );
466
- this.renderer.copyTextureToTexture( this._adaptationStorageTex, this._adaptationTarget.texture );
467
462
 
468
- // ── Async readback (WebGPU advantage) ────────────
463
+ // ── Async readback via pooled ReadbackBuffer ─────
464
+ // getArrayBufferAsync reuses the ReadbackBuffer's internal staging
465
+ // GPUBuffer across frames. ReadbackBuffer.release() must be called
466
+ // before it can be reused — the _pendingReadback flag gates reentry.
469
467
 
470
468
  if ( ! this._pendingReadback ) {
471
469
 
472
470
  this._pendingReadback = true;
473
471
  const generation = this._readbackGeneration;
474
472
 
475
- this.renderer.readRenderTargetPixelsAsync(
476
- this._adaptationTarget, 0, 0, 1, 1
477
- ).then( ( data ) => {
473
+ this.renderer.getArrayBufferAsync(
474
+ this._adaptationResult.value, this._readbackBuffer
475
+ ).then( ( readback ) => {
478
476
 
477
+ // Copy the 4 floats out of the mapped buffer before release(),
478
+ // because release() nulls readback.buffer and unmaps the GPU buffer.
479
+ const data = readback && readback.buffer
480
+ ? new Float32Array( readback.buffer.slice( 0 ) )
481
+ : null;
482
+ this._readbackBuffer.release();
479
483
  this._pendingReadback = false;
484
+
480
485
  // Discard stale readback from before a reset
481
- if ( generation === this._readbackGeneration ) {
486
+ if ( data && generation === this._readbackGeneration ) {
482
487
 
483
488
  this._applyReadback( data );
484
489
 
@@ -486,6 +491,12 @@ export class AutoExposure extends RenderStage {
486
491
 
487
492
  } ).catch( () => {
488
493
 
494
+ try {
495
+
496
+ this._readbackBuffer.release();
497
+
498
+ } catch { /* buffer may not be mapped on error */ }
499
+
489
500
  this._pendingReadback = false;
490
501
 
491
502
  } );
@@ -612,8 +623,7 @@ export class AutoExposure extends RenderStage {
612
623
  this._downsampleStorageTex?.dispose();
613
624
  this._reductionStorageTex?.dispose();
614
625
  this._reductionReadTarget?.dispose();
615
- this._adaptationStorageTex?.dispose();
616
- this._adaptationTarget?.dispose();
626
+ this._readbackBuffer?.dispose();
617
627
 
618
628
  }
619
629
 
@@ -304,8 +304,8 @@ export class BilateralFilter extends RenderStage {
304
304
  // Update dispatch dimensions
305
305
  this._dispatchX = Math.ceil( width / 8 );
306
306
  this._dispatchY = Math.ceil( height / 8 );
307
- this._computeNodeA.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
308
- this._computeNodeB.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
307
+ this._computeNodeA.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
308
+ this._computeNodeB.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
309
309
 
310
310
  }
311
311
 
@@ -111,7 +111,8 @@ export class Display extends RenderStage {
111
111
  dispose() {
112
112
 
113
113
  this.displayMaterial?.dispose();
114
- this.displayQuad?.dispose();
114
+ // QuadMesh extends Mesh — no dispose method; material already disposed.
115
+ this.displayQuad = null;
115
116
 
116
117
  }
117
118
 
@@ -50,7 +50,9 @@ export class EdgeFilter extends RenderStage {
50
50
 
51
51
  // Output StorageTexture (compute writes here)
52
52
  // Pre-allocated at max size — NEVER resize/dispose after this.
53
- // StorageTexture.setSize() breaks textureStore bind groups (Three.js bug #32969).
53
+ // Kept as a defensive pattern: bug #32969 (setSize bind-group staleness)
54
+ // was fixed in r184 (PR #33028), but #33061 (TSL compute pipeline
55
+ // re-compile returns zeros) is still open.
54
56
  const MAX_STORAGE_SIZE = 2048;
55
57
  const w = options.width || 1;
56
58
  const h = options.height || 1;
@@ -254,7 +256,8 @@ export class EdgeFilter extends RenderStage {
254
256
  setSize( width, height ) {
255
257
 
256
258
  // Only resize the RenderTarget — StorageTexture stays at max allocation
257
- // (StorageTexture.setSize() breaks textureStore bind groups, Three.js bug #32969)
259
+ // (see constructor note: pre-allocation is a defensive pattern, retained
260
+ // after r184 fixed #32969, because #33061 is still open.)
258
261
  this.outputTarget.setSize( width, height );
259
262
  this.outputTarget.texture.needsUpdate = true;
260
263
  this.resW.value = width;
@@ -263,7 +266,7 @@ export class EdgeFilter extends RenderStage {
263
266
  // Update dispatch dimensions
264
267
  this._dispatchX = Math.ceil( width / 16 );
265
268
  this._dispatchY = Math.ceil( height / 16 );
266
- this._computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
269
+ this._computeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
267
270
 
268
271
  }
269
272
 
@@ -515,13 +515,13 @@ export class MotionVector extends RenderStage {
515
515
  this._dispatchY = Math.ceil( height / 16 );
516
516
  if ( this._screenSpaceComputeNode ) {
517
517
 
518
- this._screenSpaceComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
518
+ this._screenSpaceComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
519
519
 
520
520
  }
521
521
 
522
522
  if ( this._worldSpaceComputeNode ) {
523
523
 
524
- this._worldSpaceComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
524
+ this._worldSpaceComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
525
525
 
526
526
  }
527
527
 
@@ -352,7 +352,7 @@ export class NormalDepth extends RenderStage {
352
352
  this._dispatchY = Math.ceil( height / 8 );
353
353
  if ( this._computeNode ) {
354
354
 
355
- this._computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
355
+ this._computeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
356
356
 
357
357
  }
358
358
 
@@ -1135,9 +1135,6 @@ export class PathTracer extends RenderStage {
1135
1135
  // Update frame uniform
1136
1136
  this.frame.value = frameValue;
1137
1137
 
1138
- // Force-compile compute nodes on first frame
1139
- this.shaderBuilder.forceCompile( this.renderer );
1140
-
1141
1138
  // Set dispatch region — tile-only dispatch for tiled mode, full-screen otherwise
1142
1139
  if ( tileInfo.tileIndex >= 0 && tileInfo.tileBounds ) {
1143
1140
 
@@ -1616,6 +1613,7 @@ export class PathTracer extends RenderStage {
1616
1613
  this.materialData?.dispose();
1617
1614
  this.environment?.dispose();
1618
1615
  this.shaderBuilder?.dispose();
1616
+ this.uniforms?.dispose();
1619
1617
 
1620
1618
  // Dispose storage textures
1621
1619
  this.storageTextures?.dispose();
@@ -175,10 +175,10 @@ export class SSRC extends RenderStage {
175
175
  this._dispatchX = Math.ceil( width / 8 );
176
176
  this._dispatchY = Math.ceil( height / 8 );
177
177
 
178
- const count = [ this._dispatchX, this._dispatchY, 1 ];
179
- if ( this._pass1NodeA ) this._pass1NodeA.setCount( count );
180
- if ( this._pass1NodeB ) this._pass1NodeB.setCount( count );
181
- if ( this._pass2Node ) this._pass2Node.setCount( count );
178
+ const dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
179
+ if ( this._pass1NodeA ) this._pass1NodeA.dispatchSize = dispatchSize;
180
+ if ( this._pass1NodeB ) this._pass1NodeB.dispatchSize = dispatchSize;
181
+ if ( this._pass2Node ) this._pass2Node.dispatchSize = dispatchSize;
182
182
 
183
183
  this._resetCache();
184
184
 
@@ -360,8 +360,8 @@ export class Variance extends RenderStage {
360
360
  // Update dispatch dimensions
361
361
  this._dispatchX = Math.ceil( width / 8 );
362
362
  this._dispatchY = Math.ceil( height / 8 );
363
- this._computeNodeA.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
364
- this._computeNodeB.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
363
+ this._computeNodeA.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
364
+ this._computeNodeB.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
365
365
 
366
366
  }
367
367
 
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Monkey-patch to disable WGSL global-variable promotion for compute shaders.
3
+ *
4
+ * Three.js r184 introduced `WGSLNodeBuilder.allowGlobalVariables = true` which
5
+ * emits `.toVar()` declarations at WGSL module scope as `var<private> name : T`
6
+ * instead of function-local `var name : T` inside `fn main()` (as r183 did).
7
+ *
8
+ * For shaders with hundreds of `.toVar()` calls inside loops (e.g. our BVH
9
+ * traversal + BRDF path tracer), `var<private>` increases GPU register pressure
10
+ * because the Dawn/Chromium WGSL compiler cannot aggressively register-allocate
11
+ * variables with a stable per-invocation memory address. We measured a ~8% fps
12
+ * regression (120 → 110) on the path tracer after upgrading r183 → r184 that
13
+ * traced entirely to GPU execution, not CPU.
14
+ *
15
+ * This patch wraps `WebGPUBackend.createNodeBuilder` so every newly constructed
16
+ * node builder reports `allowGlobalVariables = false`, restoring r183's
17
+ * function-scoped `var` emission inside `fn main()`. No behavior change —
18
+ * WGSL spec guarantees `var<private>` and function-local `var` are semantically
19
+ * equivalent for per-invocation storage; only the compiler's register-allocation
20
+ * latitude differs.
21
+ *
22
+ * Relevant upstream lines:
23
+ * - `node_modules/three/src/renderers/webgpu/nodes/WGSLNodeBuilder.js:247`
24
+ * (`this.allowGlobalVariables = true`)
25
+ * - `...WGSLNodeBuilder.js:2458` (module-scope vars block)
26
+ * - `...WGSLNodeBuilder.js:2467` (function-body vars block)
27
+ *
28
+ * Revisit if upstream adds an official opt-out or fixes register pressure.
29
+ * Import this module once at app startup (side-effect only).
30
+ */
31
+
32
+ import { WebGPUBackend } from 'three/webgpu';
33
+
34
+ const _origCreateNodeBuilder = WebGPUBackend.prototype.createNodeBuilder;
35
+
36
+ // WGSLNodeBuilder's `allowGlobalVariables` switch is ONLY consumed by the
37
+ // compute-shader template (see `_getWGSLComputeCode`). The vertex/fragment
38
+ // templates always emit `shaderData.vars` at module scope and therefore
39
+ // REQUIRE `allowGlobalVariables=true` (emitting function-local `var` at
40
+ // module scope is invalid WGSL and crashes pipeline creation with
41
+ // "Invalid ShaderModule"). We install a per-instance accessor that returns
42
+ // `false` only when the builder is for a compute node (material === null)
43
+ // and `true` otherwise, so render pipelines keep r184 behavior untouched.
44
+ WebGPUBackend.prototype.createNodeBuilder = function ( object, renderer ) {
45
+
46
+ const builder = _origCreateNodeBuilder.call( this, object, renderer );
47
+
48
+ Object.defineProperty( builder, 'allowGlobalVariables', {
49
+ get() {
50
+
51
+ return this.material !== null;
52
+
53
+ },
54
+ set() { /* ignore — the value is derived from material presence */ },
55
+ configurable: true,
56
+ } );
57
+
58
+ return builder;
59
+
60
+ };
package/src/index.js CHANGED
@@ -6,7 +6,7 @@
6
6
  */
7
7
 
8
8
  // Patches (side-effect imports — must run before any StorageTexture usage)
9
- import './TSL/storageTexturePatch.js';
9
+ import './TSL/wgslGlobalVarsPatch.js';
10
10
 
11
11
  // Main application
12
12
  export { PathTracerApp } from './PathTracerApp.js';
@@ -255,6 +255,26 @@ export class LightManager extends EventDispatcher {
255
255
 
256
256
  }
257
257
 
258
+ /**
259
+ * Releases all scene lights, helper nodes, and callback refs.
260
+ * Safe to call multiple times.
261
+ */
262
+ dispose() {
263
+
264
+ if ( this._disposed ) return;
265
+ this._disposed = true;
266
+
267
+ this.sceneHelpers?.clear();
268
+ this._removeAllLights();
269
+
270
+ // Drop external refs so GC can collect scene/pathTracer
271
+ this._onReset = null;
272
+ this.pathTracer = null;
273
+ this.sceneHelpers = null;
274
+ this.scene = null;
275
+
276
+ }
277
+
258
278
  // ── Private ───────────────────────────────────────────────────
259
279
 
260
280
  /** Syncs helpers in sceneHelpers with current scene lights. */
@@ -265,4 +265,23 @@ export class UniformManager {
265
265
 
266
266
  }
267
267
 
268
+ /**
269
+ * Releases uniform node references. Safe to call multiple times.
270
+ *
271
+ * Note: TSL uniform nodes are registered in the shader graph — once a
272
+ * compiled pipeline references them they are kept alive by the renderer
273
+ * until the pipeline is disposed. Clearing our maps here just drops the
274
+ * JS-side strong refs so UniformManager itself can be collected.
275
+ */
276
+ dispose() {
277
+
278
+ if ( this._disposed ) return;
279
+ this._disposed = true;
280
+
281
+ this._uniforms.clear();
282
+ this._booleans.clear();
283
+ this._lightBuffers = {};
284
+
285
+ }
286
+
268
287
  }
@@ -121,7 +121,9 @@ export class OutlineHelper {
121
121
  this.visible = false;
122
122
  this._outlineNode?.dispose();
123
123
  this._material?.dispose();
124
- this._quad?.dispose();
124
+ // QuadMesh extends Mesh — no dispose method on the mesh itself;
125
+ // its material is already disposed above. Just drop the ref.
126
+ this._quad = null;
125
127
 
126
128
  }
127
129