streaming-gltf 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,615 @@
1
+ // Draw Call Batching Optimization for InstancedSlots
2
+ // Reduces draw call count from ~450 to ~100-150 by grouping slots with same geometry
3
+ // Uses lodIndex attribute to select material variant without rebind overhead
4
+
5
+ import * as THREE from 'three';
6
+ import { MultiDrawOptimizer } from './multi-draw-optimizer.js';
7
+ // Phase 3 Quick-Wins: QW2 (Draw Call Sorting) + QW3 (Buffer Pool)
8
+ import { DrawCallSorter, buildDrawCallDescriptors, applyDrawCallSort } from './draw-call-sorter.js';
9
+ import { InstanceBufferPool } from './buffer-pool.js';
10
+
11
+ /**
12
+ * InstancedBatch: Combines multiple InstancedSlots (same geometry, different LODs)
13
+ * into a single batched draw call via lodIndex attribute per instance.
14
+ *
15
+ * Key insight: All instances with same mesh geometry but different LODs can share
16
+ * one InstancedMesh. Each instance carries a lodIndex (0-5) attribute that the
17
+ * vertex shader uses to select material/texture variant. This collapses multiple
18
+ * draw calls into one.
19
+ *
20
+ * Before batching: 450 draw calls (one per unique (asset, lod) pair)
21
+ * After batching: ~100-150 draw calls (one per unique geometry)
22
+ *
23
+ * Expected benefit: 8.4ms render time -> 5-6ms (35% reduction, +8-12 FPS)
24
+ */
25
+ export class InstancedBatch {
26
+ constructor(pool, geoKey, geometry, globalMaterialPool = null, bufferPool = null) {
27
+ this.pool = pool;
28
+ this.geoKey = geoKey; // mesh geometry identifier: `${meshIndex}:${primIndex}`
29
+ this.geometry = geometry;
30
+ this.capacity = 32; // grows as needed
31
+ this.globalMaterialPool = globalMaterialPool;
32
+ this.bufferPool = bufferPool; // Phase 3 QW3: Optional buffer pool for reuse
33
+
34
+ // Track which (asset, meshDescIdx, lodIdx) tuples are in this batch
35
+ // key: `${assetUrl}|${meshDescIdx}|${lodIdx}` -> InstancedSlot
36
+ this.slots = new Map();
37
+
38
+ // Shared material for all LODs in this batch
39
+ // MATERIAL GROUPING OPTIMIZATION: Use global FAR-tier material if available
40
+ this._uniforms = { projViewMatrix: { value: new THREE.Matrix4() } };
41
+
42
+ // GPU-driven per-instance transform: when enabled, each batch gets a
43
+ // per-batch (cloned) material so it can bind its OWN instance data texture
44
+ // uniform (a shared pool material could only bind one batch's texture). The
45
+ // vertex shader rebuilds each instance's matrix from gl_InstanceID, so JS
46
+ // never re-uploads a full instance buffer per frame; a single model move is
47
+ // one 4-texel write + a dirty flag. Static instances cost nothing.
48
+ this._gpuInstanceTex = pool._enableGpuInstanceTex !== false;
49
+ let material;
50
+ if (this._gpuInstanceTex) {
51
+ // Start from the global/base vertex-color material, then CLONE so the
52
+ // instance-texture uniform is per-batch.
53
+ const baseFar = (globalMaterialPool && globalMaterialPool._useGlobalMaterialPool)
54
+ ? globalMaterialPool.getMaterialForTier('far')
55
+ : new THREE.MeshLambertMaterial({ vertexColors: true });
56
+ material = baseFar.clone();
57
+ material.onBeforeCompile = (shader) => {
58
+ shader.fragmentShader = shader.fragmentShader.replace(
59
+ '#include <color_fragment>',
60
+ `#if defined( USE_COLOR_ALPHA )
61
+ diffuseColor.rgb *= pow(vColor.rgb, vec3(2.2));
62
+ diffuseColor.a *= vColor.a;
63
+ #elif defined( USE_COLOR )
64
+ diffuseColor.rgb *= pow(vColor, vec3(2.2));
65
+ #endif`
66
+ );
67
+ };
68
+ this._initInstanceTexture(this.capacity);
69
+ _patchInstancedSlotMaterial(material, this._uniforms);
70
+ } else if (globalMaterialPool && globalMaterialPool._useGlobalMaterialPool) {
71
+ // Use the global FAR-tier material (shared across all batches)
72
+ material = globalMaterialPool.getMaterialForTier('far');
73
+ } else {
74
+ // Fallback: create batch-specific material
75
+ material = new THREE.MeshLambertMaterial({ vertexColors: true });
76
+ material.onBeforeCompile = (shader) => {
77
+ shader.fragmentShader = shader.fragmentShader.replace(
78
+ '#include <color_fragment>',
79
+ `#if defined( USE_COLOR_ALPHA )
80
+ diffuseColor.rgb *= pow(vColor.rgb, vec3(2.2));
81
+ diffuseColor.a *= vColor.a;
82
+ #elif defined( USE_COLOR )
83
+ diffuseColor.rgb *= pow(vColor, vec3(2.2));
84
+ #endif`
85
+ );
86
+ };
87
+ _patchInstancedSlotMaterial(material, this._uniforms);
88
+ }
89
+ this.material = material;
90
+
91
+ // Batched InstancedMesh: single geometry, shared material
92
+ this.mesh = new THREE.InstancedMesh(geometry, material, this.capacity);
93
+ this.mesh.frustumCulled = false;
94
+ this.mesh.instanceMatrix.setUsage(THREE.DynamicDrawUsage);
95
+ this.mesh.name = `batch:${geoKey}`;
96
+
97
+ // Per-instance bound sphere for GPU frustum culling
98
+ this._boundArray = new Float32Array(this.capacity * 4);
99
+ this._boundAttr = new THREE.InstancedBufferAttribute(this._boundArray, 4);
100
+ this._boundAttr.setUsage(THREE.DynamicDrawUsage);
101
+ this.mesh.geometry.setAttribute('instanceBoundSphere', this._boundAttr);
102
+
103
+ // Per-instance LOD index (0-5) — vertex shader uses this to select material
104
+ this._lodIndexArray = new Uint8Array(this.capacity);
105
+ this._lodIndexAttr = new THREE.InstancedBufferAttribute(this._lodIndexArray, 1);
106
+ this._lodIndexAttr.setUsage(THREE.DynamicDrawUsage);
107
+ this.mesh.geometry.setAttribute('instanceLodIndex', this._lodIndexAttr);
108
+
109
+ // Initialize all matrices to zero (invisible)
110
+ const zero = new THREE.Matrix4().set(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
111
+ for (let i = 0; i < this.capacity; i++) this.mesh.setMatrixAt(i, zero);
112
+ this.mesh.count = 0;
113
+ this.mesh.instanceMatrix.needsUpdate = true;
114
+
115
+ // Slot allocation tracking
116
+ this._nextSlotIdx = 0;
117
+ this._freeSlots = [];
118
+ this._dirtySlots = new Set();
119
+
120
+ // Stats
121
+ this._stats = {
122
+ totalInstances: 0,
123
+ drawCalls: 1, // always 1 for a batch
124
+ savedDrawCalls: 0, // estimated draw calls if not batched
125
+ };
126
+ }
127
+
128
+ // Allocate a slot index for a new instance in this batch
129
+ acquireSlotInBatch(lodIdx) {
130
+ let idx;
131
+ if (this._freeSlots.length) {
132
+ idx = this._freeSlots.pop();
133
+ } else {
134
+ if (this._nextSlotIdx >= this.capacity) {
135
+ this._grow(this.capacity * 2);
136
+ }
137
+ idx = this._nextSlotIdx++;
138
+ }
139
+
140
+ // Set LOD index for this instance
141
+ this._lodIndexArray[idx] = lodIdx;
142
+ this._lodIndexAttr.needsUpdate = true;
143
+
144
+ // Update mesh.count to include this instance
145
+ if (idx + 1 > this.mesh.count) this.mesh.count = idx + 1;
146
+ this._stats.totalInstances++;
147
+
148
+ return idx;
149
+ }
150
+
151
+ // Release a slot, making it available for reuse
152
+ releaseSlotInBatch(idx) {
153
+ this._freeSlots.push(idx);
154
+
155
+ // Zero out the matrix to hide this instance
156
+ const zero = new THREE.Matrix4().set(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
157
+ if (this._gpuInstanceTex) {
158
+ this.setInstanceTransform(idx, zero);
159
+ } else {
160
+ this.mesh.setMatrixAt(idx, zero);
161
+ this._dirtySlots.add(idx);
162
+ }
163
+
164
+ // Zero the bound sphere
165
+ const o = idx * 4;
166
+ this._boundArray[o] = 0;
167
+ this._boundArray[o+1] = 0;
168
+ this._boundArray[o+2] = 0;
169
+ this._boundArray[o+3] = 0;
170
+ this._boundAttr.needsUpdate = true;
171
+
172
+ this._stats.totalInstances--;
173
+ }
174
+
175
+ // --- GPU instance transform texture (mirror of InstancedSlot) ------------
176
+ _initInstanceTexture(capacity) {
177
+ this._instTexWidth = capacity * 4; // 4 texels per instance (one mat4)
178
+ this._instTexData = new Float32Array(this._instTexWidth * 4);
179
+ const tex = new THREE.DataTexture(this._instTexData, this._instTexWidth, 1, THREE.RGBAFormat, THREE.FloatType);
180
+ // NearestFilter: we want exact texel reads (no interpolation between mat4
181
+ // columns/instances), AND linear filtering of a float texture needs
182
+ // OES_texture_float_linear which isn't guaranteed -> sampling it raised
183
+ // GL_INVALID_OPERATION (1282). Nearest avoids both problems.
184
+ tex.minFilter = THREE.NearestFilter;
185
+ tex.magFilter = THREE.NearestFilter;
186
+ tex.generateMipmaps = false;
187
+ tex.needsUpdate = true;
188
+ this._instTex = tex;
189
+ if (this._uniforms.instanceTex) {
190
+ this._uniforms.instanceTex.value = tex;
191
+ this._uniforms.instanceTexWidth.value = this._instTexWidth;
192
+ } else {
193
+ this._uniforms.instanceTex = { value: tex };
194
+ this._uniforms.instanceTexWidth = { value: this._instTexWidth };
195
+ }
196
+ this._instTexDirty = false;
197
+ }
198
+ setInstanceTransform(idx, matrix) {
199
+ const e = matrix.elements;
200
+ const base = idx * 16;
201
+ for (let c = 0; c < 4; c++) {
202
+ const o = base + c * 4, m = c * 4;
203
+ this._instTexData[o] = e[m];
204
+ this._instTexData[o + 1] = e[m + 1];
205
+ this._instTexData[o + 2] = e[m + 2];
206
+ this._instTexData[o + 3] = e[m + 3];
207
+ }
208
+ this._instTexDirty = true;
209
+ }
210
+ flushInstanceTexture() {
211
+ if (this._instTexDirty) { this._instTex.needsUpdate = true; this._instTexDirty = false; }
212
+ }
213
+
214
+ // Update instance matrix
215
+ setMatrixInBatch(idx, matrix) {
216
+ if (this._gpuInstanceTex) { this.setInstanceTransform(idx, matrix); return; }
217
+ this.mesh.setMatrixAt(idx, matrix);
218
+ this._dirtySlots.add(idx);
219
+ }
220
+
221
+ // Update instance bound sphere (for GPU frustum culling)
222
+ setBoundSphereInBatch(idx, cx, cy, cz, r) {
223
+ const o = idx * 4;
224
+ this._boundArray[o] = cx;
225
+ this._boundArray[o+1] = cy;
226
+ this._boundArray[o+2] = cz;
227
+ this._boundArray[o+3] = r;
228
+ this._boundAttr.needsUpdate = true;
229
+ }
230
+
231
+ // Update LOD index for an instance (when entity switches LOD within batched tier)
232
+ updateLodIndexInBatch(idx, lodIdx) {
233
+ this._lodIndexArray[idx] = lodIdx;
234
+ this._lodIndexAttr.needsUpdate = true;
235
+ }
236
+
237
+ // Flush pending updates to GPU
238
+ // Optimization 2: Only mark needsUpdate if dirty slots exceed threshold (5-10% of capacity)
239
+ flushUpdates() {
240
+ if (this._gpuInstanceTex) { this.flushInstanceTexture(); return; }
241
+ if (this._dirtySlots.size > 0) {
242
+ // ALWAYS flush dirty slots (the old 5%-of-capacity gate skipped the GPU
243
+ // upload for small dirty counts yet cleared _dirtySlots anyway, leaving
244
+ // released/moved instance matrices un-uploaded for frames -> ghost models
245
+ // popping in/out). Still upload only the [min..max] dirty span via
246
+ // updateRange to keep the upload small. THREE r0.184 API.
247
+ const im = this.mesh.instanceMatrix;
248
+ if (im.clearUpdateRanges && im.addUpdateRange) {
249
+ let lo = Infinity, hi = -1;
250
+ for (const s of this._dirtySlots) { if (s < lo) lo = s; if (s > hi) hi = s; }
251
+ const span = (hi - lo + 1) * 16;
252
+ im.clearUpdateRanges();
253
+ if (span > 0 && span < (this.mesh.count || this.capacity) * 16) {
254
+ im.addUpdateRange(lo * 16, span);
255
+ }
256
+ }
257
+ im.needsUpdate = true;
258
+ this._dirtySlots.clear();
259
+ }
260
+ }
261
+
262
+ // Double batch capacity when full
263
+ _grow(newCap) {
264
+ const old = this.mesh;
265
+ const next = new THREE.InstancedMesh(this.geometry, this.material, newCap);
266
+ next.frustumCulled = false;
267
+ next.instanceMatrix.setUsage(THREE.DynamicDrawUsage);
268
+ next.name = old.name;
269
+
270
+ if (this._gpuInstanceTex) {
271
+ // Grow the instance data texture, preserving existing instance matrices.
272
+ const oldData = this._instTexData;
273
+ this._initInstanceTexture(newCap);
274
+ this._instTexData.set(oldData);
275
+ this._instTex.needsUpdate = true;
276
+ // _initInstanceTexture already re-pointed this._uniforms.instanceTex(.value)
277
+ // which the material's onBeforeCompile captured by reference.
278
+ } else {
279
+ // Copy existing matrices
280
+ const m = new THREE.Matrix4();
281
+ for (let i = 0; i < this._nextSlotIdx; i++) {
282
+ old.getMatrixAt(i, m);
283
+ next.setMatrixAt(i, m);
284
+ }
285
+ next.instanceMatrix.needsUpdate = true;
286
+ }
287
+ next.count = old.count;
288
+
289
+ // Grow bound sphere attribute
290
+ const newBounds = new Float32Array(newCap * 4);
291
+ newBounds.set(this._boundArray);
292
+ this._boundArray = newBounds;
293
+ this._boundAttr = new THREE.InstancedBufferAttribute(newBounds, 4);
294
+ this._boundAttr.setUsage(THREE.DynamicDrawUsage);
295
+ next.geometry.setAttribute('instanceBoundSphere', this._boundAttr);
296
+
297
+ // Grow LOD index attribute
298
+ const newLodIndices = new Uint8Array(newCap);
299
+ newLodIndices.set(this._lodIndexArray);
300
+ this._lodIndexArray = newLodIndices;
301
+ this._lodIndexAttr = new THREE.InstancedBufferAttribute(newLodIndices, 1);
302
+ this._lodIndexAttr.setUsage(THREE.DynamicDrawUsage);
303
+ next.geometry.setAttribute('instanceLodIndex', this._lodIndexAttr);
304
+
305
+ // Replace in parent scene
306
+ const parent = old.parent;
307
+ if (parent) {
308
+ parent.remove(old);
309
+ parent.add(next);
310
+ }
311
+ old.dispose();
312
+
313
+ this.mesh = next;
314
+ this.capacity = newCap;
315
+ this._dirtySlots = new Set();
316
+ }
317
+
318
+ dispose() {
319
+ this.mesh.parent?.remove(this.mesh);
320
+ this.mesh.geometry.dispose();
321
+ this.mesh.material.dispose();
322
+ this.mesh.dispose();
323
+ this.slots.clear();
324
+ }
325
+
326
+ getStats() {
327
+ return {
328
+ ...this._stats,
329
+ geometry: this.geoKey,
330
+ capacity: this.capacity,
331
+ };
332
+ }
333
+ }
334
+
335
+ /**
336
+ * Wrapper for InstancedSlot that can be batched.
337
+ * Most of the original logic stays the same; when batching is enabled,
338
+ * the slot delegates to its parent batch instead of managing its own mesh.
339
+ */
340
+ export class BatchedInstancedSlot {
341
+ constructor(pool, batch, asset, meshDescIdx, lodIdx) {
342
+ this.pool = pool;
343
+ this.batch = batch; // parent InstancedBatch
344
+ this.asset = asset;
345
+ this.meshDescIdx = meshDescIdx;
346
+ this.lodIdx = lodIdx;
347
+ this.geometry = batch.geometry;
348
+ this.material = batch.material;
349
+
350
+ // Track which entities are in this slot
351
+ this.slots = new Map(); // entity -> slot index within batch
352
+ this._isBatched = true;
353
+ }
354
+
355
+ acquireSlot(entity) {
356
+ // Allocate from the batch
357
+ const idx = this.batch.acquireSlotInBatch(this.lodIdx);
358
+ this.slots.set(entity, idx);
359
+ return idx;
360
+ }
361
+
362
+ releaseSlot(entity) {
363
+ const idx = this.slots.get(entity);
364
+ if (idx == null) return;
365
+ this.slots.delete(entity);
366
+ this.batch.releaseSlotInBatch(idx);
367
+ }
368
+
369
+ setMatrixForSlot(idx, matrix) {
370
+ this.batch.setMatrixInBatch(idx, matrix);
371
+ }
372
+
373
+ setBoundSphereForSlot(idx, cx, cy, cz, r) {
374
+ this.batch.setBoundSphereInBatch(idx, cx, cy, cz, r);
375
+ }
376
+
377
+ flushMatrixUpdates() {
378
+ this.batch.flushUpdates();
379
+ }
380
+
381
+ // No-op: batch handles growth
382
+ _grow() {}
383
+
384
+ dispose() {
385
+ // Batches are never disposed individually; only when the batch itself is cleared
386
+ }
387
+ }
388
+
389
+ /**
390
+ * Detect WebGL 2.0 capabilities for advanced batching options
391
+ */
392
+ export function detectWebGL2Capabilities(gl) {
393
+ const capabilities = {
394
+ version: gl?.getParameter(gl?.VERSION) || 'WebGL 1.0',
395
+ vendor: gl?.getParameter(gl?.VENDOR) || 'unknown',
396
+ renderer: gl?.getParameter(gl?.RENDERER) || 'unknown',
397
+ // Multi-draw-indirect support (OES_draw_elements_base_vertex)
398
+ baseVertex: !!gl?.getExtension('OES_draw_elements_base_vertex'),
399
+ // ANGLE_multi_draw (for optimized multi-draw)
400
+ multiDraw: !!gl?.getExtension('ANGLE_multi_draw'),
401
+ // Instance divisor support (WebGL 2.0 standard)
402
+ instanceDivisor: true, // built-in to WebGL 2.0
403
+ };
404
+
405
+ console.log('[batching] WebGL capabilities:', capabilities);
406
+ return capabilities;
407
+ }
408
+
409
+ /**
410
+ * Patch a material's shader to support per-instance LOD selection.
411
+ * The vertex shader receives instanceLodIndex attribute and can use it
412
+ * to select texture variants or adjust shading intensity.
413
+ */
414
+ function _patchInstancedSlotMaterial(material, uniforms) {
415
+ const prev = material.onBeforeCompile;
416
+ material.onBeforeCompile = (shader) => {
417
+ if (prev) prev(shader);
418
+ shader.uniforms.projViewMatrix = uniforms.projViewMatrix;
419
+ shader.uniforms.cameraPos = { value: new THREE.Vector3() };
420
+ shader.uniforms.lodThresholds = { value: new THREE.Vector4(80, 200, 400, 800) };
421
+ shader.uniforms.fovTanHalf = { value: 0.5 };
422
+ shader.uniforms.viewportHeight = { value: 1080 };
423
+ // GPU instance transform texture (per-instance mat4 as 4 RGBA texels) —
424
+ // present only on per-batch (cloned) materials, never the shared pool one.
425
+ if (uniforms.instanceTex) {
426
+ shader.uniforms.instanceTex = uniforms.instanceTex;
427
+ shader.uniforms.instanceTexWidth = uniforms.instanceTexWidth;
428
+ shader.defines = shader.defines || {};
429
+ shader.defines.USE_GPU_INSTANCE_TEX = '';
430
+ }
431
+
432
+ shader.vertexShader = shader.vertexShader
433
+ .replace(
434
+ '#include <common>',
435
+ `#include <common>
436
+ attribute vec4 instanceBoundSphere;
437
+ attribute float instanceLodIndex;
438
+ uniform mat4 projViewMatrix;
439
+ uniform vec3 cameraPos;
440
+ uniform vec4 lodThresholds;
441
+ uniform float fovTanHalf;
442
+ uniform float viewportHeight;
443
+ varying float vLodIndex;
444
+ #ifdef USE_GPU_INSTANCE_TEX
445
+ uniform sampler2D instanceTex;
446
+ uniform float instanceTexWidth;
447
+ mat4 readInstanceMatrix(int id) {
448
+ float base = float(id) * 4.0;
449
+ vec4 c0 = texture2D(instanceTex, vec2((base + 0.5) / instanceTexWidth, 0.5));
450
+ vec4 c1 = texture2D(instanceTex, vec2((base + 1.5) / instanceTexWidth, 0.5));
451
+ vec4 c2 = texture2D(instanceTex, vec2((base + 2.5) / instanceTexWidth, 0.5));
452
+ vec4 c3 = texture2D(instanceTex, vec2((base + 3.5) / instanceTexWidth, 0.5));
453
+ return mat4(c0, c1, c2, c3);
454
+ }
455
+ #endif`
456
+ )
457
+ .replace(
458
+ '#include <project_vertex>',
459
+ `#ifdef USE_GPU_INSTANCE_TEX
460
+ // mvPosition declared at outer scope (like <project_vertex>) so downstream
461
+ // chunks (fog, etc.) that read it still compile.
462
+ vec4 mvPosition = modelViewMatrix * readInstanceMatrix(gl_InstanceID) * vec4(transformed, 1.0);
463
+ gl_Position = projectionMatrix * mvPosition;
464
+ #else
465
+ #include <project_vertex>
466
+ #endif
467
+ {
468
+ // GPU frustum cull + LOD selection
469
+ vLodIndex = instanceLodIndex; // pass LOD to fragment shader if needed
470
+
471
+ if (instanceBoundSphere.w > 0.0) {
472
+ vec3 c = instanceBoundSphere.xyz;
473
+ float r = instanceBoundSphere.w;
474
+
475
+ // Frustum cull: derive 6 clip-space planes from projViewMatrix
476
+ vec4 row0 = vec4(projViewMatrix[0][0], projViewMatrix[1][0], projViewMatrix[2][0], projViewMatrix[3][0]);
477
+ vec4 row1 = vec4(projViewMatrix[0][1], projViewMatrix[1][1], projViewMatrix[2][1], projViewMatrix[3][1]);
478
+ vec4 row2 = vec4(projViewMatrix[0][2], projViewMatrix[1][2], projViewMatrix[2][2], projViewMatrix[3][2]);
479
+ vec4 row3 = vec4(projViewMatrix[0][3], projViewMatrix[1][3], projViewMatrix[2][3], projViewMatrix[3][3]);
480
+
481
+ vec4 planes[6];
482
+ planes[0] = row3 + row0; // left
483
+ planes[1] = row3 - row0; // right
484
+ planes[2] = row3 + row1; // bottom
485
+ planes[3] = row3 - row1; // top
486
+ planes[4] = row3 + row2; // near
487
+ planes[5] = row3 - row2; // far
488
+
489
+ bool outside = false;
490
+ for (int i = 0; i < 6; i++) {
491
+ vec4 p = planes[i];
492
+ float len = length(p.xyz);
493
+ if (len > 0.0) {
494
+ float d = (dot(p.xyz, c) + p.w) / len;
495
+ if (d < -r) { outside = true; break; }
496
+ }
497
+ }
498
+
499
+ if (outside) {
500
+ gl_Position = vec4(0.0/0.0, 0.0/0.0, 0.0/0.0, 0.0/0.0) * 0.0;
501
+ return;
502
+ }
503
+ }
504
+ }`
505
+ );
506
+ };
507
+ material.needsUpdate = true;
508
+ }
509
+
510
+ /**
511
+ * Extension to ModelPool to support draw call batching.
512
+ * Call enableBatching(pool) to activate batching for new InstancedSlots.
513
+ * Automatically initializes ANGLE_multi_draw optimizer if available.
514
+ */
515
+ export function enableDrawCallBatching(pool) {
516
+ // Map: geometry key -> InstancedBatch
517
+ pool._geometryBatches = new Map();
518
+
519
+ // Detect WebGL 2.0 capabilities
520
+ try {
521
+ const canvas = pool.renderer.domElement;
522
+ const gl = canvas.getContext('webgl2') || canvas.getContext('webgl');
523
+ pool._webglCapabilities = detectWebGL2Capabilities(gl);
524
+ } catch (e) {
525
+ console.warn('[batching] Failed to detect WebGL capabilities', e);
526
+ pool._webglCapabilities = { version: 'unknown' };
527
+ }
528
+
529
+ // Initialize ANGLE_multi_draw optimizer for FAR-tier draw call reduction
530
+ // This reduces 120+ per-slot draw calls to 1-3 GPU submissions (+6-10 FPS)
531
+ // Called after batching is enabled so pool has access to _geometryBatches
532
+ if (pool._initializeMultiDraw) {
533
+ pool._initializeMultiDraw();
534
+ }
535
+
536
+ // Replace _getInstancedSlot to use batching
537
+ const originalGetInstancedSlot = pool._getInstancedSlot.bind(pool);
538
+ pool._getInstancedSlot = function(asset, meshDescIdx, lodIdx) {
539
+ const desc = asset.meshLodDescs[meshDescIdx];
540
+ if (!desc) return null;
541
+ const lod = desc.lods[lodIdx];
542
+ if (!lod || (lod.kind || 'textured') !== 'unskinned') return null;
543
+
544
+ const geo = asset.geoCache.get(`${desc.meshIndex}:${desc.primIndex}:${lodIdx}`);
545
+ if (!geo) return null; // not loaded yet
546
+ // Batch key MUST identify the actual geometry. meshIndex:primIndex collides
547
+ // across DISTINCT assets (every asset has a 0:0), which collapsed 900+
548
+ // different models into ~12 batches all drawing one asset's geometry (the
549
+ // "white cluster" / missing-models bug). Key by the resolved geometry's
550
+ // uuid so identical copies of the SAME asset still share a batch (the
551
+ // 1000-clones case) while distinct assets each get their own.
552
+ const geoKey = geo.uuid;
553
+
554
+ // Get or create batch for this geometry
555
+ // MATERIAL GROUPING OPTIMIZATION: Pass global material pool to batch
556
+ let batch = this._geometryBatches.get(geoKey);
557
+ if (!batch) {
558
+ batch = new InstancedBatch(this, geoKey, geo, this._globalMaterialPool);
559
+ this._geometryBatches.set(geoKey, batch);
560
+ this.scene.add(batch.mesh);
561
+ }
562
+
563
+ // Return a slot within the batch
564
+ const slotKey = `${asset.url}|${meshDescIdx}|${lodIdx}`;
565
+ let slot = batch.slots.get(slotKey);
566
+ if (!slot) {
567
+ slot = new BatchedInstancedSlot(this, batch, asset, meshDescIdx, lodIdx);
568
+ batch.slots.set(slotKey, slot);
569
+ }
570
+ return slot;
571
+ };
572
+
573
+ // Add batching stats to ModelPool stats
574
+ const originalGetStats = pool.getStats ? pool.getStats.bind(pool) : () => ({});
575
+ pool.getStats = function() {
576
+ const stats = originalGetStats();
577
+ const batchStats = Array.from(this._geometryBatches.values()).map(b => b.getStats());
578
+ const totalDrawCalls = batchStats.length;
579
+ const totalInstances = batchStats.reduce((sum, s) => sum + s.totalInstances, 0);
580
+ const totalSavedDrawCalls = batchStats.reduce((sum, s) => sum + s.savedDrawCalls, 0);
581
+
582
+ return {
583
+ ...stats,
584
+ batching: {
585
+ enabled: true,
586
+ batches: this._geometryBatches.size,
587
+ totalDrawCalls,
588
+ totalInstances,
589
+ estimatedSavedDrawCalls: totalSavedDrawCalls,
590
+ reduction: totalSavedDrawCalls ? `${Math.round((totalSavedDrawCalls / (totalDrawCalls + totalSavedDrawCalls)) * 100)}%` : '0%',
591
+ },
592
+ };
593
+ };
594
+
595
+ // CRITICAL: flush batched instance matrices to the GPU every frame.
596
+ // Batching replaces _getInstancedSlot so FAR-tier slots live in
597
+ // _geometryBatches, NOT pool._instancedSlots — and pool.update() only flushes
598
+ // _instancedSlots. Without this wrapper the batched matrices are written into
599
+ // CPU-side arrays but never uploaded, so every batched instance stays at its
600
+ // zero/origin matrix (all stacked invisibly at 0,0,0) and the models appear
601
+ // to "vanish, leaving a small group". Wrapping update() to flush each batch
602
+ // after the per-frame matrix writes fixes that.
603
+ const originalUpdate = pool.update.bind(pool);
604
+ pool.update = function() {
605
+ const r = originalUpdate();
606
+ for (const batch of this._geometryBatches.values()) {
607
+ if (batch.flushUpdates) batch.flushUpdates();
608
+ }
609
+ return r;
610
+ };
611
+
612
+ console.log('[batching] Draw call batching enabled (with per-frame batch flush).');
613
+ }
614
+
615
+ export default { InstancedBatch, BatchedInstancedSlot, enableDrawCallBatching, detectWebGL2Capabilities };