rayzee 5.3.8 → 5.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/rayzee.es.js +2933 -2888
  2. package/dist/rayzee.es.js.map +1 -1
  3. package/dist/rayzee.umd.js +53 -53
  4. package/dist/rayzee.umd.js.map +1 -1
  5. package/package.json +2 -2
  6. package/src/Passes/AIUpscaler.js +30 -6
  7. package/src/Passes/OIDNDenoiser.js +57 -15
  8. package/src/PathTracerApp.js +154 -21
  9. package/src/Pipeline/RenderPipeline.js +10 -1
  10. package/src/Processor/AssetLoader.js +40 -18
  11. package/src/Processor/EquirectHDRInfo.js +38 -29
  12. package/src/Processor/InstanceTable.js +16 -0
  13. package/src/Processor/SceneProcessor.js +22 -33
  14. package/src/Processor/ShaderBuilder.js +67 -22
  15. package/src/Processor/TLASBuilder.js +9 -4
  16. package/src/Stages/ASVGF.js +4 -4
  17. package/src/Stages/AdaptiveSampling.js +2 -2
  18. package/src/Stages/AutoExposure.js +42 -32
  19. package/src/Stages/BilateralFilter.js +2 -2
  20. package/src/Stages/Display.js +2 -1
  21. package/src/Stages/EdgeFilter.js +6 -3
  22. package/src/Stages/MotionVector.js +2 -2
  23. package/src/Stages/NormalDepth.js +1 -1
  24. package/src/Stages/PathTracer.js +88 -46
  25. package/src/Stages/SSRC.js +4 -4
  26. package/src/Stages/Variance.js +2 -2
  27. package/src/TSL/BVHTraversal.js +15 -63
  28. package/src/TSL/Clearcoat.js +1 -1
  29. package/src/TSL/Displacement.js +1 -1
  30. package/src/TSL/EmissiveSampling.js +17 -13
  31. package/src/TSL/Environment.js +12 -9
  32. package/src/TSL/LightBVHSampling.js +3 -2
  33. package/src/TSL/LightsCore.js +1 -1
  34. package/src/TSL/LightsDirect.js +1 -1
  35. package/src/TSL/LightsIndirect.js +0 -1
  36. package/src/TSL/LightsSampling.js +2 -2
  37. package/src/TSL/MaterialTransmission.js +1 -1
  38. package/src/TSL/PathTracer.js +4 -4
  39. package/src/TSL/PathTracerCore.js +6 -6
  40. package/src/TSL/Struct.js +1 -1
  41. package/src/TSL/patches.js +145 -0
  42. package/src/index.js +1 -1
  43. package/src/managers/EnvironmentManager.js +32 -56
  44. package/src/managers/LightManager.js +20 -0
  45. package/src/managers/UniformManager.js +22 -0
  46. package/src/managers/helpers/OutlineHelper.js +3 -1
  47. package/src/TSL/storageTexturePatch.js +0 -31
  48. package/src/TSL/structProxy.js +0 -87
@@ -220,53 +220,62 @@ export class EquirectHDRInfo {
220
220
 
221
221
  const { floatData, width, height } = extractFloatData( hdr );
222
222
 
223
- // Reuse worker across calls; create on first use
224
- if ( ! this._worker ) {
223
+ // Fresh worker per call terminated in finally to avoid ~30 MB residency.
224
+ try {
225
225
 
226
- try {
226
+ this._worker = new Worker( CDF_WORKER_URL, { type: 'module' } );
227
227
 
228
- this._worker = new Worker( CDF_WORKER_URL, { type: 'module' } );
228
+ } catch ( e ) {
229
229
 
230
- } catch ( e ) {
230
+ if ( e.name !== 'SecurityError' ) throw e;
231
+ this._worker = await fetchAsWorker( CDF_WORKER_URL );
231
232
 
232
- if ( e.name !== 'SecurityError' ) throw e;
233
- this._worker = await fetchAsWorker( CDF_WORKER_URL );
233
+ }
234
234
 
235
- }
235
+ try {
236
236
 
237
- }
237
+ const result = await new Promise( ( resolve, reject ) => {
238
238
 
239
- const result = await new Promise( ( resolve, reject ) => {
239
+ this._worker.onmessage = ( e ) => {
240
240
 
241
- this._worker.onmessage = ( e ) => {
241
+ if ( e.data.error ) {
242
242
 
243
- if ( e.data.error ) {
243
+ reject( new Error( e.data.error ) );
244
244
 
245
- reject( new Error( e.data.error ) );
245
+ } else {
246
246
 
247
- } else {
247
+ resolve( e.data );
248
248
 
249
- resolve( e.data );
249
+ }
250
250
 
251
- }
251
+ };
252
252
 
253
- };
253
+ this._worker.onerror = reject;
254
254
 
255
- this._worker.onerror = reject;
255
+ // Transfer floatData to worker (zero-copy)
256
+ this._worker.postMessage(
257
+ { floatData, width, height },
258
+ [ floatData.buffer ]
259
+ );
256
260
 
257
- // Transfer floatData to worker (zero-copy)
258
- this._worker.postMessage(
259
- { floatData, width, height },
260
- [ floatData.buffer ]
261
- );
261
+ } );
262
262
 
263
- } );
263
+ this.marginalData = result.marginalData;
264
+ this.conditionalData = result.conditionalData;
265
+ this.totalSum = result.totalSum;
266
+ this.width = result.width;
267
+ this.height = result.height;
264
268
 
265
- this.marginalData = result.marginalData;
266
- this.conditionalData = result.conditionalData;
267
- this.totalSum = result.totalSum;
268
- this.width = result.width;
269
- this.height = result.height;
269
+ } finally {
270
+
271
+ if ( this._worker ) {
272
+
273
+ this._worker.terminate();
274
+ this._worker = null;
275
+
276
+ }
277
+
278
+ }
270
279
 
271
280
  }
272
281
 
@@ -57,10 +57,26 @@ export class InstanceTable {
57
57
  worldAABB: null, // Computed from triangle data
58
58
  originalToBvhMap,
59
59
  bvhData,
60
+ visible: true, // Per-mesh visibility (baked into TLAS leaf slot [2])
61
+ tlasLeafIndex: - 1, // Set by TLASBuilder.flatten() — enables in-place visibility patching
60
62
  };
61
63
 
62
64
  }
63
65
 
66
+ /**
67
+ * Set per-mesh visibility flag. Does NOT update the GPU buffer —
68
+ * caller must patch combinedBvhData[tlasLeafIndex*16 + 2] and mark bvh attr dirty.
69
+ *
70
+ * @param {number} meshIndex
71
+ * @param {boolean} visible
72
+ */
73
+ setVisibility( meshIndex, visible ) {
74
+
75
+ const entry = this.entries[ meshIndex ];
76
+ if ( entry ) entry.visible = visible;
77
+
78
+ }
79
+
64
80
  /**
65
81
  * Compute world-space AABBs for all entries from their BLAS root node data.
66
82
  * O(1) per mesh for inner roots; falls back to triangle scan for leaf roots (rare).
@@ -476,48 +476,36 @@ export class SceneProcessor {
476
476
 
477
477
  const validEntries = this.instanceTable.entries.filter( e => e !== null );
478
478
 
479
- if ( validEntries.length === 1 ) {
480
-
481
- // Single mesh use BLAS directly as flat BVH (no TLAS wrapper).
482
- // Avoids per-ray TLAS overhead and the extra branch in traversal.
483
- const entry = validEntries[ 0 ];
484
- this.bvhData = entry.bvhData;
485
- this.instanceTable.assignOffsets( 0 ); // BLAS at offset 0
486
- this._buildGlobalOriginalToBvhMap();
487
- entry.originalToBvhMap = null;
488
- entry.bvhData = null;
489
-
490
- } else {
491
-
492
- // Multi-mesh — build TLAS over mesh AABBs
493
- this.instanceTable.computeAABBs( this.triangleData );
494
- const { root: tlasRoot, nodeCount: tlasNodeCount } = this.tlasBuilder.build( validEntries );
479
+ // Always build a TLAS even for a single mesh — so the BLAS-pointer leaf
480
+ // carries packed per-mesh visibility in its slot [2]. The 1-node TLAS
481
+ // overhead (one extra leaf fetch per ray) is negligible and eliminates
482
+ // a dedicated visibility storage buffer binding.
483
+ this.instanceTable.computeAABBs( this.triangleData );
484
+ const { root: tlasRoot, nodeCount: tlasNodeCount } = this.tlasBuilder.build( validEntries );
495
485
 
496
- this.instanceTable.assignOffsets( tlasNodeCount );
497
- const totalNodes = this.instanceTable.totalNodeCount;
486
+ this.instanceTable.assignOffsets( tlasNodeCount );
487
+ const totalNodes = this.instanceTable.totalNodeCount;
498
488
 
499
- const tlasData = this.tlasBuilder.flatten( tlasRoot, validEntries );
489
+ const tlasData = this.tlasBuilder.flatten( tlasRoot, validEntries );
500
490
 
501
- // Assemble combined buffer: [TLAS][BLAS_0][BLAS_1]...[BLAS_M]
502
- this.bvhData = new Float32Array( totalNodes * 16 );
503
- this.bvhData.set( tlasData );
491
+ // Assemble combined buffer: [TLAS][BLAS_0][BLAS_1]...[BLAS_M]
492
+ this.bvhData = new Float32Array( totalNodes * 16 );
493
+ this.bvhData.set( tlasData );
504
494
 
505
- for ( const entry of validEntries ) {
495
+ for ( const entry of validEntries ) {
506
496
 
507
- const destOffset = entry.blasOffset * 16;
508
- this.bvhData.set( entry.bvhData, destOffset );
509
- this._offsetBLASInPlace( destOffset, entry.bvhData.length / 16, entry.blasOffset, entry.triOffset );
497
+ const destOffset = entry.blasOffset * 16;
498
+ this.bvhData.set( entry.bvhData, destOffset );
499
+ this._offsetBLASInPlace( destOffset, entry.bvhData.length / 16, entry.blasOffset, entry.triOffset );
510
500
 
511
- }
512
-
513
- this._buildGlobalOriginalToBvhMap();
501
+ }
514
502
 
515
- for ( const entry of validEntries ) {
503
+ this._buildGlobalOriginalToBvhMap();
516
504
 
517
- entry.originalToBvhMap = null;
518
- entry.bvhData = null;
505
+ for ( const entry of validEntries ) {
519
506
 
520
- }
507
+ entry.originalToBvhMap = null;
508
+ entry.bvhData = null;
521
509
 
522
510
  }
523
511
 
@@ -1384,6 +1372,7 @@ export class SceneProcessor {
1384
1372
  }
1385
1373
 
1386
1374
  pathTracer.setBVHData( this.bvhData );
1375
+ pathTracer.setInstanceTable( this.instanceTable );
1387
1376
 
1388
1377
  if ( this.materialData ) {
1389
1378
 
@@ -16,7 +16,6 @@ import { Fn, texture, vec2, float, int, uniform, If,
16
16
  import { TextureNode } from 'three/webgpu';
17
17
  import { LinearFilter, DataArrayTexture } from 'three';
18
18
  import { pathTracerMain } from '../TSL/PathTracer.js';
19
- import { setMeshVisibilityBuffer } from '../TSL/BVHTraversal.js';
20
19
  import { setShadowAlbedoMaps, setAlphaShadowsUniform } from '../TSL/LightsDirect.js';
21
20
  import { BuildTimer } from './BuildTimer.js';
22
21
 
@@ -50,9 +49,18 @@ export class ShaderBuilder {
50
49
  this._dispatchX = 0;
51
50
  this._dispatchY = 0;
52
51
 
52
+ // Reused per-frame dispatchSize array — avoids GC pressure from
53
+ // allocating [x,y,z] on every setFullScreenDispatch/setTileDispatch call.
54
+ // WebGPUBackend only reads indices 0..2 of this array during compute dispatch.
55
+ this._dispatchSize = [ 0, 0, 1 ];
56
+
53
57
  // Scene texture nodes cache (for in-place updates on model change)
54
58
  this._sceneTextureNodes = null;
55
59
 
60
+ // Whether the GPU compute pipeline has been compiled (via a real dispatch).
61
+ // Reset on setupCompute() rebuilds and on dispose().
62
+ this._compiled = false;
63
+
56
64
  }
57
65
 
58
66
  /**
@@ -89,6 +97,9 @@ export class ShaderBuilder {
89
97
  writeTex.color, writeTex.normalDepth, writeTex.albedo
90
98
  );
91
99
 
100
+ // New compute node → needs a fresh GPU pipeline compile
101
+ this._compiled = false;
102
+
92
103
  timer.end( 'Build compute node (TSL)' );
93
104
 
94
105
  timer.print();
@@ -131,7 +142,13 @@ export class ShaderBuilder {
131
142
  this._dispatchX = Math.ceil( width / WG_SIZE );
132
143
  this._dispatchY = Math.ceil( height / WG_SIZE );
133
144
 
134
- if ( this.computeNode ) this.computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
145
+ if ( this.computeNode ) {
146
+
147
+ this._dispatchSize[ 0 ] = this._dispatchX;
148
+ this._dispatchSize[ 1 ] = this._dispatchY;
149
+ this.computeNode.dispatchSize = this._dispatchSize;
150
+
151
+ }
135
152
 
136
153
  this.renderWidth.value = width;
137
154
  this.renderHeight.value = height;
@@ -158,7 +175,13 @@ export class ShaderBuilder {
158
175
  const dispatchX = Math.ceil( tileWidth / WG_SIZE );
159
176
  const dispatchY = Math.ceil( tileHeight / WG_SIZE );
160
177
 
161
- if ( this.computeNode ) this.computeNode.setCount( [ dispatchX, dispatchY, 1 ] );
178
+ if ( this.computeNode ) {
179
+
180
+ this._dispatchSize[ 0 ] = dispatchX;
181
+ this._dispatchSize[ 1 ] = dispatchY;
182
+ this.computeNode.dispatchSize = this._dispatchSize;
183
+
184
+ }
162
185
 
163
186
  }
164
187
 
@@ -170,13 +193,36 @@ export class ShaderBuilder {
170
193
  this.tileOffsetX.value = 0;
171
194
  this.tileOffsetY.value = 0;
172
195
 
173
- if ( this.computeNode ) this.computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
196
+ if ( this.computeNode ) {
197
+
198
+ this._dispatchSize[ 0 ] = this._dispatchX;
199
+ this._dispatchSize[ 1 ] = this._dispatchY;
200
+ this.computeNode.dispatchSize = this._dispatchSize;
201
+
202
+ }
174
203
 
175
204
  }
176
205
 
177
- forceCompile() {
206
+ /**
207
+ * Front-load GPU compute pipeline creation via a single dispatch.
208
+ *
209
+ * Three.js WebGPU has no `createComputePipelineAsync` path — compute
210
+ * pipelines always compile synchronously on first `renderer.compute(node)`.
211
+ * Calling this at build time (while a "Compiling shaders…" status is
212
+ * already visible) moves the stall off the first animate frame.
213
+ *
214
+ * The dispatch writes to ping-pong storage textures whose contents are
215
+ * discarded by the subsequent `reset()` (frame counter back to 0 →
216
+ * `hasPreviousAccumulated = 0` → prev textures are not read).
217
+ *
218
+ * @param {object} renderer - WebGPURenderer
219
+ */
220
+ forceCompile( renderer ) {
178
221
 
179
- // No-op compilation happens on first renderer.compute() call.
222
+ if ( this._compiled || ! this.computeNode || ! renderer ) return;
223
+
224
+ this._compiled = true;
225
+ renderer.compute( this.computeNode );
180
226
 
181
227
  }
182
228
 
@@ -187,11 +233,9 @@ export class ShaderBuilder {
187
233
  const triStorage = stage.triangleStorageNode;
188
234
  const bvhStorage = stage.bvhStorageNode;
189
235
  const matStorage = stage.materialData.materialStorageNode;
190
- const emissiveTriStorage = stage.emissiveTriangleStorageNode;
191
- const lightBVHStorage = stage.lightBVHStorageNode;
192
-
193
- // Set per-mesh visibility buffer (module-level in BVHTraversal.js, read during graph construction)
194
- setMeshVisibilityBuffer( stage.meshVisibilityStorageNode );
236
+ // Packed light buffer — [lightBVH | emissive triangles]. One node fed to both
237
+ // TSL params; emissive reads offset by stage.emissiveVec4Offset.
238
+ const lightBufferStorage = stage.lightStorageNode;
195
239
 
196
240
  // Set alpha-shadow uniform (module-level in LightsDirect.js, read at runtime)
197
241
  setAlphaShadowsUniform( stage.uniforms.get( 'enableAlphaShadows' ) );
@@ -202,9 +246,9 @@ export class ShaderBuilder {
202
246
  const adaptiveSamplingTex = new TextureNode();
203
247
  this.adaptiveSamplingTexNode = adaptiveSamplingTex;
204
248
 
205
- // Environment importance sampling CDF (storage buffers)
206
- const marginalCDFStorage = stage.environment.envMarginalStorageNode;
207
- const conditionalCDFStorage = stage.environment.envConditionalStorageNode;
249
+ // Environment importance sampling CDF — packed storage buffer
250
+ // Layout: [marginal (envResolution.y floats) | conditional (envResolution.x * envResolution.y floats)]
251
+ const envCDFStorage = stage.environment.envCDFStorageNode;
208
252
 
209
253
  // Previous-frame texture nodes — initialized from readTarget textures
210
254
  const readTextures = storageTextures.getReadTextures();
@@ -238,8 +282,8 @@ export class ShaderBuilder {
238
282
  setShadowAlbedoMaps( albedoMapsTex );
239
283
 
240
284
  const result = {
241
- triStorage, bvhStorage, matStorage, emissiveTriStorage, lightBVHStorage,
242
- envTex, adaptiveSamplingTex, marginalCDFStorage, conditionalCDFStorage,
285
+ triStorage, bvhStorage, matStorage, lightBufferStorage,
286
+ envTex, adaptiveSamplingTex, envCDFStorage,
243
287
  albedoMapsTex, normalMapsTex, bumpMapsTex,
244
288
  metalnessMapsTex, roughnessMapsTex, emissiveMapsTex, displacementMapsTex,
245
289
  };
@@ -257,8 +301,8 @@ export class ShaderBuilder {
257
301
  writeColorTex, writeNDTex, writeAlbedoTex ) {
258
302
 
259
303
  const {
260
- triStorage, bvhStorage, matStorage, emissiveTriStorage, lightBVHStorage,
261
- envTex, adaptiveSamplingTex, marginalCDFStorage, conditionalCDFStorage,
304
+ triStorage, bvhStorage, matStorage, lightBufferStorage,
305
+ envTex, adaptiveSamplingTex, envCDFStorage,
262
306
  albedoMapsTex, normalMapsTex, bumpMapsTex,
263
307
  metalnessMapsTex, roughnessMapsTex, emissiveMapsTex, displacementMapsTex,
264
308
  } = textureNodes;
@@ -319,8 +363,7 @@ export class ShaderBuilder {
319
363
  envTexture: envTex,
320
364
  environmentIntensity: stage.environmentIntensity,
321
365
  envMatrix: stage.environmentMatrix,
322
- envMarginalWeights: marginalCDFStorage,
323
- envConditionalWeights: conditionalCDFStorage,
366
+ envCDFBuffer: envCDFStorage,
324
367
  envTotalSum: stage.envTotalSum,
325
368
  envResolution: stage.envResolution,
326
369
  enableEnvironmentLight: stage.enableEnvironment,
@@ -334,11 +377,12 @@ export class ShaderBuilder {
334
377
  globalIlluminationIntensity: stage.globalIlluminationIntensity,
335
378
  totalTriangleCount: stage.totalTriangleCount,
336
379
  enableEmissiveTriangleSampling: stage.enableEmissiveTriangleSampling,
337
- emissiveTriangleBuffer: emissiveTriStorage,
380
+ emissiveTriangleBuffer: lightBufferStorage,
338
381
  emissiveTriangleCount: stage.emissiveTriangleCount,
339
382
  emissiveTotalPower: stage.emissiveTotalPower,
340
383
  emissiveBoost: stage.emissiveBoost,
341
- lightBVHBuffer: lightBVHStorage,
384
+ emissiveVec4Offset: stage.emissiveVec4Offset,
385
+ lightBVHBuffer: lightBufferStorage,
342
386
  lightBVHNodeCount: stage.lightBVHNodeCount,
343
387
  debugVisScale: stage.debugVisScale,
344
388
  enableAccumulation: stage.enableAccumulation,
@@ -379,6 +423,7 @@ export class ShaderBuilder {
379
423
  this.prevAlbedoTexNode = null;
380
424
  this.adaptiveSamplingTexNode = null;
381
425
  this._sceneTextureNodes = null;
426
+ this._compiled = false;
382
427
 
383
428
  }
384
429
 
@@ -199,10 +199,13 @@ export class TLASBuilder {
199
199
  /**
200
200
  * Flatten TLAS tree into Float32Array.
201
201
  * Inner nodes: same format as BVH.
202
- * Leaf nodes: [blasRootNodeIndex, 0, 0, -2] (BLAS-pointer marker).
202
+ * Leaf nodes: [blasRootNodeIndex, meshIndex, visibility, -2] (BLAS-pointer marker).
203
+ *
204
+ * Side effect: records each entry's flat leaf index on `entry.tlasLeafIndex` so that
205
+ * visibility can later be patched in place (combinedBvhData[tlasLeafIndex*16 + 2]).
203
206
  *
204
207
  * @param {TLASNode} root
205
- * @param {Array<{blasOffset: number}>} entries - Instance table entries with assigned blasOffsets
208
+ * @param {Array<{blasOffset: number, visible: boolean, tlasLeafIndex: number}>} entries
206
209
  * @returns {Float32Array}
207
210
  */
208
211
  flatten( root, entries ) {
@@ -268,10 +271,12 @@ export class TLASBuilder {
268
271
  // Leaf node — BLAS pointer
269
272
  const entry = entries[ n.entryIndex ];
270
273
  data[ o ] = entry.blasOffset; // Absolute node index of BLAS root in combined buffer
271
- data[ o + 1 ] = n.entryIndex; // meshIndex for per-mesh visibility check
272
- // data[o+2] = 0
274
+ data[ o + 1 ] = n.entryIndex; // meshIndex (kept for debug/ID traversal uses slot [2])
275
+ data[ o + 2 ] = entry.visible === false ? 0.0 : 1.0; // Per-mesh visibility (packed — frees a binding)
273
276
  data[ o + 3 ] = BVH_LEAF_MARKERS.BLAS_POINTER_LEAF; // -2 marker
274
277
 
278
+ entry.tlasLeafIndex = i;
279
+
275
280
  }
276
281
 
277
282
  }
@@ -803,10 +803,10 @@ export class ASVGF extends RenderStage {
803
803
  // Update dispatch dimensions
804
804
  this._dispatchX = Math.ceil( width / 8 );
805
805
  this._dispatchY = Math.ceil( height / 8 );
806
- this._gradientNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
807
- this._temporalNodeA.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
808
- this._temporalNodeB.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
809
- this._heatmapComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
806
+ this._gradientNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
807
+ this._temporalNodeA.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
808
+ this._temporalNodeB.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
809
+ this._heatmapComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
810
810
 
811
811
  }
812
812
 
@@ -422,8 +422,8 @@ export class AdaptiveSampling extends RenderStage {
422
422
  // Update dispatch dimensions
423
423
  this._dispatchX = Math.ceil( width / 16 );
424
424
  this._dispatchY = Math.ceil( height / 16 );
425
- this._computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
426
- this._heatmapComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
425
+ this._computeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
426
+ this._heatmapComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
427
427
 
428
428
  }
429
429
 
@@ -1,6 +1,7 @@
1
1
  import { Fn, wgslFn, vec4, float, int, uint, ivec2, uvec2, uniform, If, max,
2
- textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId } from 'three/tsl';
3
- import { RenderTarget, TextureNode, StorageTexture } from 'three/webgpu';
2
+ textureLoad, textureStore, workgroupArray, workgroupBarrier, localId, workgroupId,
3
+ attributeArray } from 'three/tsl';
4
+ import { RenderTarget, TextureNode, StorageTexture, ReadbackBuffer } from 'three/webgpu';
4
5
  import { FloatType, RGBAFormat, NearestFilter } from 'three';
5
6
  import { RenderStage, StageExecutionMode } from '../Pipeline/RenderStage.js';
6
7
  import { luminance } from '../TSL/Common.js';
@@ -54,8 +55,11 @@ const adaptExposure = /*@__PURE__*/ wgslFn( `
54
55
  * 2. Reduction (compute): parallel reduction 64×64 → 1×1 via shared memory
55
56
  * Single workgroup of 256 threads, each loads 16 texels.
56
57
  * Computes geometric mean: exp(Σlog(L) / N)
57
- * 3. Adaptation (compute): temporal smoothing with prev exposure
58
- * 4. Async readback (1×1): apply to renderer.toneMappingExposure
58
+ * 3. Adaptation (compute): temporal smoothing with prev exposure; writes
59
+ * vec4(exposure, luminance, targetExposure, 1) into a 1-element storage buffer.
60
+ * 4. Async readback via `renderer.getArrayBufferAsync(attr, ReadbackBuffer)`:
61
+ * the ReadbackBuffer pools its staging GPUBuffer across frames, avoiding
62
+ * per-frame allocation churn. Apply to renderer.toneMappingExposure.
59
63
  *
60
64
  * WebGPU advantage: async readback (no GPU pipeline stall).
61
65
  * 1-frame delay is imperceptible for slowly-changing exposure.
@@ -156,15 +160,12 @@ export class AutoExposure extends RenderStage {
156
160
  // from StorageTexture return zeros — must copy to RenderTarget first)
157
161
  this._reductionReadTarget = new RenderTarget( 1, 1, rtOpts );
158
162
 
159
- // Adaptation StorageTexture (1) compute writes here
160
- this._adaptationStorageTex = new StorageTexture( 1, 1 );
161
- this._adaptationStorageTex.type = FloatType;
162
- this._adaptationStorageTex.format = RGBAFormat;
163
- this._adaptationStorageTex.minFilter = NearestFilter;
164
- this._adaptationStorageTex.magFilter = NearestFilter;
165
-
166
- // Adaptation target (1×1) — readable copy for async readback
167
- this._adaptationTarget = new RenderTarget( 1, 1, rtOpts );
163
+ // Adaptation result vec4 storage buffer attribute. Compute writes
164
+ // vec4(exposure, luminance, targetExposure, 1) here; CPU reads via
165
+ // getArrayBufferAsync + a pooled ReadbackBuffer (16 bytes).
166
+ this._adaptationResult = attributeArray( 1, 'vec4' );
167
+ this._readbackBuffer = new ReadbackBuffer( 16 );
168
+ this._readbackBuffer.name = 'AutoExposureAdaptation';
168
169
 
169
170
  }
170
171
 
@@ -351,16 +352,15 @@ export class AutoExposure extends RenderStage {
351
352
  * Adaptation (compute): temporal smoothing
352
353
  *
353
354
  * Single-thread compute dispatch [1, 1, 1], workgroup [1, 1, 1].
354
- * Reads geometric mean from reduction RenderTarget (copied from StorageTexture),
355
- * reads previous adaptation from adaptation RenderTarget,
356
- * applies asymmetric temporal smoothing, writes to adaptation StorageTexture.
357
- *
358
- * Output: R = exposure, G = luminance, B = targetExposure, A = 1
355
+ * Reads geometric mean from reduction RenderTarget, applies asymmetric
356
+ * temporal smoothing using the previous-exposure uniform, and writes
357
+ * vec4(exposure, luminance, targetExposure, 1) into a 1-element storage
358
+ * buffer which the CPU reads via getArrayBufferAsync + ReadbackBuffer.
359
359
  */
360
360
  _buildAdaptationCompute() {
361
361
 
362
362
  const reductionTex = this._reductionReadTexNode;
363
- const outputTex = this._adaptationStorageTex;
363
+ const resultBuf = this._adaptationResult;
364
364
  const keyValue = this.keyValueU;
365
365
  const minExp = this.minExposureU;
366
366
  const maxExp = this.maxExposureU;
@@ -381,11 +381,7 @@ export class AutoExposure extends RenderStage {
381
381
  dt, isFirst
382
382
  );
383
383
 
384
- textureStore(
385
- outputTex,
386
- uvec2( uint( 0 ), uint( 0 ) ),
387
- result
388
- ).toWriteOnly();
384
+ resultBuf.element( uint( 0 ) ).assign( result );
389
385
 
390
386
  } );
391
387
 
@@ -463,22 +459,31 @@ export class AutoExposure extends RenderStage {
463
459
 
464
460
  this._reductionReadTexNode.value = this._reductionReadTarget.texture;
465
461
  this.renderer.compute( this._adaptationComputeNode );
466
- this.renderer.copyTextureToTexture( this._adaptationStorageTex, this._adaptationTarget.texture );
467
462
 
468
- // ── Async readback (WebGPU advantage) ────────────
463
+ // ── Async readback via pooled ReadbackBuffer ─────
464
+ // getArrayBufferAsync reuses the ReadbackBuffer's internal staging
465
+ // GPUBuffer across frames. ReadbackBuffer.release() must be called
466
+ // before it can be reused — the _pendingReadback flag gates reentry.
469
467
 
470
468
  if ( ! this._pendingReadback ) {
471
469
 
472
470
  this._pendingReadback = true;
473
471
  const generation = this._readbackGeneration;
474
472
 
475
- this.renderer.readRenderTargetPixelsAsync(
476
- this._adaptationTarget, 0, 0, 1, 1
477
- ).then( ( data ) => {
473
+ this.renderer.getArrayBufferAsync(
474
+ this._adaptationResult.value, this._readbackBuffer
475
+ ).then( ( readback ) => {
478
476
 
477
+ // Copy the 4 floats out of the mapped buffer before release(),
478
+ // because release() nulls readback.buffer and unmaps the GPU buffer.
479
+ const data = readback && readback.buffer
480
+ ? new Float32Array( readback.buffer.slice( 0 ) )
481
+ : null;
482
+ this._readbackBuffer.release();
479
483
  this._pendingReadback = false;
484
+
480
485
  // Discard stale readback from before a reset
481
- if ( generation === this._readbackGeneration ) {
486
+ if ( data && generation === this._readbackGeneration ) {
482
487
 
483
488
  this._applyReadback( data );
484
489
 
@@ -486,6 +491,12 @@ export class AutoExposure extends RenderStage {
486
491
 
487
492
  } ).catch( () => {
488
493
 
494
+ try {
495
+
496
+ this._readbackBuffer.release();
497
+
498
+ } catch { /* buffer may not be mapped on error */ }
499
+
489
500
  this._pendingReadback = false;
490
501
 
491
502
  } );
@@ -612,8 +623,7 @@ export class AutoExposure extends RenderStage {
612
623
  this._downsampleStorageTex?.dispose();
613
624
  this._reductionStorageTex?.dispose();
614
625
  this._reductionReadTarget?.dispose();
615
- this._adaptationStorageTex?.dispose();
616
- this._adaptationTarget?.dispose();
626
+ this._readbackBuffer?.dispose();
617
627
 
618
628
  }
619
629
 
@@ -304,8 +304,8 @@ export class BilateralFilter extends RenderStage {
304
304
  // Update dispatch dimensions
305
305
  this._dispatchX = Math.ceil( width / 8 );
306
306
  this._dispatchY = Math.ceil( height / 8 );
307
- this._computeNodeA.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
308
- this._computeNodeB.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
307
+ this._computeNodeA.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
308
+ this._computeNodeB.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
309
309
 
310
310
  }
311
311
 
@@ -111,7 +111,8 @@ export class Display extends RenderStage {
111
111
  dispose() {
112
112
 
113
113
  this.displayMaterial?.dispose();
114
- this.displayQuad?.dispose();
114
+ // QuadMesh extends Mesh — no dispose method; material already disposed.
115
+ this.displayQuad = null;
115
116
 
116
117
  }
117
118
 
@@ -50,7 +50,9 @@ export class EdgeFilter extends RenderStage {
50
50
 
51
51
  // Output StorageTexture (compute writes here)
52
52
  // Pre-allocated at max size — NEVER resize/dispose after this.
53
- // StorageTexture.setSize() breaks textureStore bind groups (Three.js bug #32969).
53
+ // Kept as a defensive pattern: bug #32969 (setSize bind-group staleness)
54
+ // was fixed in r184 (PR #33028), but #33061 (TSL compute pipeline
55
+ // re-compile returns zeros) is still open.
54
56
  const MAX_STORAGE_SIZE = 2048;
55
57
  const w = options.width || 1;
56
58
  const h = options.height || 1;
@@ -254,7 +256,8 @@ export class EdgeFilter extends RenderStage {
254
256
  setSize( width, height ) {
255
257
 
256
258
  // Only resize the RenderTarget — StorageTexture stays at max allocation
257
- // (StorageTexture.setSize() breaks textureStore bind groups, Three.js bug #32969)
259
+ // (see constructor note: pre-allocation is a defensive pattern, retained
260
+ // after r184 fixed #32969, because #33061 is still open.)
258
261
  this.outputTarget.setSize( width, height );
259
262
  this.outputTarget.texture.needsUpdate = true;
260
263
  this.resW.value = width;
@@ -263,7 +266,7 @@ export class EdgeFilter extends RenderStage {
263
266
  // Update dispatch dimensions
264
267
  this._dispatchX = Math.ceil( width / 16 );
265
268
  this._dispatchY = Math.ceil( height / 16 );
266
- this._computeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
269
+ this._computeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
267
270
 
268
271
  }
269
272
 
@@ -515,13 +515,13 @@ export class MotionVector extends RenderStage {
515
515
  this._dispatchY = Math.ceil( height / 16 );
516
516
  if ( this._screenSpaceComputeNode ) {
517
517
 
518
- this._screenSpaceComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
518
+ this._screenSpaceComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
519
519
 
520
520
  }
521
521
 
522
522
  if ( this._worldSpaceComputeNode ) {
523
523
 
524
- this._worldSpaceComputeNode.setCount( [ this._dispatchX, this._dispatchY, 1 ] );
524
+ this._worldSpaceComputeNode.dispatchSize = [ this._dispatchX, this._dispatchY, 1 ];
525
525
 
526
526
  }
527
527