streaming-gltf 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +90 -0
- package/examples/local-progressive/batched-far-tier.js +296 -0
- package/examples/local-progressive/buffer-pool.js +182 -0
- package/examples/local-progressive/deferred-load-queue.js +253 -0
- package/examples/local-progressive/draw-call-batching.js +615 -0
- package/examples/local-progressive/draw-call-sorter.js +146 -0
- package/examples/local-progressive/frustum-cache.js +104 -0
- package/examples/local-progressive/lod-unload-manager.js +162 -0
- package/examples/local-progressive/lod-worker.js +297 -0
- package/examples/local-progressive/material-pool.js +241 -0
- package/examples/local-progressive/model-pool.js +2961 -0
- package/examples/local-progressive/multi-draw-optimizer.js +347 -0
- package/examples/local-progressive/multi-draw-utils.js +199 -0
- package/examples/local-progressive/stress.js +655 -0
- package/examples/local-progressive/vertex-compression.js +128 -0
- package/index.js +23 -0
- package/package.json +48 -0
- package/tools/bake-all.mjs +126 -0
- package/tools/bake-progressive.mjs +663 -0
- package/tools/bake-streaming.mjs +453 -0
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
// Draw Call Batching Optimization for InstancedSlots
|
|
2
|
+
// Reduces draw call count from ~450 to ~100-150 by grouping slots with same geometry
|
|
3
|
+
// Uses lodIndex attribute to select material variant without rebind overhead
|
|
4
|
+
|
|
5
|
+
import * as THREE from 'three';
|
|
6
|
+
import { MultiDrawOptimizer } from './multi-draw-optimizer.js';
|
|
7
|
+
// Phase 3 Quick-Wins: QW2 (Draw Call Sorting) + QW3 (Buffer Pool)
|
|
8
|
+
import { DrawCallSorter, buildDrawCallDescriptors, applyDrawCallSort } from './draw-call-sorter.js';
|
|
9
|
+
import { InstanceBufferPool } from './buffer-pool.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* InstancedBatch: Combines multiple InstancedSlots (same geometry, different LODs)
|
|
13
|
+
* into a single batched draw call via lodIndex attribute per instance.
|
|
14
|
+
*
|
|
15
|
+
* Key insight: All instances with same mesh geometry but different LODs can share
|
|
16
|
+
* one InstancedMesh. Each instance carries a lodIndex (0-5) attribute that the
|
|
17
|
+
* vertex shader uses to select material/texture variant. This collapses multiple
|
|
18
|
+
* draw calls into one.
|
|
19
|
+
*
|
|
20
|
+
* Before batching: 450 draw calls (one per unique (asset, lod) pair)
|
|
21
|
+
* After batching: ~100-150 draw calls (one per unique geometry)
|
|
22
|
+
*
|
|
23
|
+
* Expected benefit: 8.4ms render time -> 5-6ms (35% reduction, +8-12 FPS)
|
|
24
|
+
*/
|
|
25
|
+
export class InstancedBatch {
|
|
26
|
+
constructor(pool, geoKey, geometry, globalMaterialPool = null, bufferPool = null) {
|
|
27
|
+
this.pool = pool;
|
|
28
|
+
this.geoKey = geoKey; // mesh geometry identifier: `${meshIndex}:${primIndex}`
|
|
29
|
+
this.geometry = geometry;
|
|
30
|
+
this.capacity = 32; // grows as needed
|
|
31
|
+
this.globalMaterialPool = globalMaterialPool;
|
|
32
|
+
this.bufferPool = bufferPool; // Phase 3 QW3: Optional buffer pool for reuse
|
|
33
|
+
|
|
34
|
+
// Track which (asset, meshDescIdx, lodIdx) tuples are in this batch
|
|
35
|
+
// key: `${assetUrl}|${meshDescIdx}|${lodIdx}` -> InstancedSlot
|
|
36
|
+
this.slots = new Map();
|
|
37
|
+
|
|
38
|
+
// Shared material for all LODs in this batch
|
|
39
|
+
// MATERIAL GROUPING OPTIMIZATION: Use global FAR-tier material if available
|
|
40
|
+
this._uniforms = { projViewMatrix: { value: new THREE.Matrix4() } };
|
|
41
|
+
|
|
42
|
+
// GPU-driven per-instance transform: when enabled, each batch gets a
|
|
43
|
+
// per-batch (cloned) material so it can bind its OWN instance data texture
|
|
44
|
+
// uniform (a shared pool material could only bind one batch's texture). The
|
|
45
|
+
// vertex shader rebuilds each instance's matrix from gl_InstanceID, so JS
|
|
46
|
+
// never re-uploads a full instance buffer per frame; a single model move is
|
|
47
|
+
// one 4-texel write + a dirty flag. Static instances cost nothing.
|
|
48
|
+
this._gpuInstanceTex = pool._enableGpuInstanceTex !== false;
|
|
49
|
+
let material;
|
|
50
|
+
if (this._gpuInstanceTex) {
|
|
51
|
+
// Start from the global/base vertex-color material, then CLONE so the
|
|
52
|
+
// instance-texture uniform is per-batch.
|
|
53
|
+
const baseFar = (globalMaterialPool && globalMaterialPool._useGlobalMaterialPool)
|
|
54
|
+
? globalMaterialPool.getMaterialForTier('far')
|
|
55
|
+
: new THREE.MeshLambertMaterial({ vertexColors: true });
|
|
56
|
+
material = baseFar.clone();
|
|
57
|
+
material.onBeforeCompile = (shader) => {
|
|
58
|
+
shader.fragmentShader = shader.fragmentShader.replace(
|
|
59
|
+
'#include <color_fragment>',
|
|
60
|
+
`#if defined( USE_COLOR_ALPHA )
|
|
61
|
+
diffuseColor.rgb *= pow(vColor.rgb, vec3(2.2));
|
|
62
|
+
diffuseColor.a *= vColor.a;
|
|
63
|
+
#elif defined( USE_COLOR )
|
|
64
|
+
diffuseColor.rgb *= pow(vColor, vec3(2.2));
|
|
65
|
+
#endif`
|
|
66
|
+
);
|
|
67
|
+
};
|
|
68
|
+
this._initInstanceTexture(this.capacity);
|
|
69
|
+
_patchInstancedSlotMaterial(material, this._uniforms);
|
|
70
|
+
} else if (globalMaterialPool && globalMaterialPool._useGlobalMaterialPool) {
|
|
71
|
+
// Use the global FAR-tier material (shared across all batches)
|
|
72
|
+
material = globalMaterialPool.getMaterialForTier('far');
|
|
73
|
+
} else {
|
|
74
|
+
// Fallback: create batch-specific material
|
|
75
|
+
material = new THREE.MeshLambertMaterial({ vertexColors: true });
|
|
76
|
+
material.onBeforeCompile = (shader) => {
|
|
77
|
+
shader.fragmentShader = shader.fragmentShader.replace(
|
|
78
|
+
'#include <color_fragment>',
|
|
79
|
+
`#if defined( USE_COLOR_ALPHA )
|
|
80
|
+
diffuseColor.rgb *= pow(vColor.rgb, vec3(2.2));
|
|
81
|
+
diffuseColor.a *= vColor.a;
|
|
82
|
+
#elif defined( USE_COLOR )
|
|
83
|
+
diffuseColor.rgb *= pow(vColor, vec3(2.2));
|
|
84
|
+
#endif`
|
|
85
|
+
);
|
|
86
|
+
};
|
|
87
|
+
_patchInstancedSlotMaterial(material, this._uniforms);
|
|
88
|
+
}
|
|
89
|
+
this.material = material;
|
|
90
|
+
|
|
91
|
+
// Batched InstancedMesh: single geometry, shared material
|
|
92
|
+
this.mesh = new THREE.InstancedMesh(geometry, material, this.capacity);
|
|
93
|
+
this.mesh.frustumCulled = false;
|
|
94
|
+
this.mesh.instanceMatrix.setUsage(THREE.DynamicDrawUsage);
|
|
95
|
+
this.mesh.name = `batch:${geoKey}`;
|
|
96
|
+
|
|
97
|
+
// Per-instance bound sphere for GPU frustum culling
|
|
98
|
+
this._boundArray = new Float32Array(this.capacity * 4);
|
|
99
|
+
this._boundAttr = new THREE.InstancedBufferAttribute(this._boundArray, 4);
|
|
100
|
+
this._boundAttr.setUsage(THREE.DynamicDrawUsage);
|
|
101
|
+
this.mesh.geometry.setAttribute('instanceBoundSphere', this._boundAttr);
|
|
102
|
+
|
|
103
|
+
// Per-instance LOD index (0-5) — vertex shader uses this to select material
|
|
104
|
+
this._lodIndexArray = new Uint8Array(this.capacity);
|
|
105
|
+
this._lodIndexAttr = new THREE.InstancedBufferAttribute(this._lodIndexArray, 1);
|
|
106
|
+
this._lodIndexAttr.setUsage(THREE.DynamicDrawUsage);
|
|
107
|
+
this.mesh.geometry.setAttribute('instanceLodIndex', this._lodIndexAttr);
|
|
108
|
+
|
|
109
|
+
// Initialize all matrices to zero (invisible)
|
|
110
|
+
const zero = new THREE.Matrix4().set(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
|
|
111
|
+
for (let i = 0; i < this.capacity; i++) this.mesh.setMatrixAt(i, zero);
|
|
112
|
+
this.mesh.count = 0;
|
|
113
|
+
this.mesh.instanceMatrix.needsUpdate = true;
|
|
114
|
+
|
|
115
|
+
// Slot allocation tracking
|
|
116
|
+
this._nextSlotIdx = 0;
|
|
117
|
+
this._freeSlots = [];
|
|
118
|
+
this._dirtySlots = new Set();
|
|
119
|
+
|
|
120
|
+
// Stats
|
|
121
|
+
this._stats = {
|
|
122
|
+
totalInstances: 0,
|
|
123
|
+
drawCalls: 1, // always 1 for a batch
|
|
124
|
+
savedDrawCalls: 0, // estimated draw calls if not batched
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Allocate a slot index for a new instance in this batch
|
|
129
|
+
acquireSlotInBatch(lodIdx) {
|
|
130
|
+
let idx;
|
|
131
|
+
if (this._freeSlots.length) {
|
|
132
|
+
idx = this._freeSlots.pop();
|
|
133
|
+
} else {
|
|
134
|
+
if (this._nextSlotIdx >= this.capacity) {
|
|
135
|
+
this._grow(this.capacity * 2);
|
|
136
|
+
}
|
|
137
|
+
idx = this._nextSlotIdx++;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Set LOD index for this instance
|
|
141
|
+
this._lodIndexArray[idx] = lodIdx;
|
|
142
|
+
this._lodIndexAttr.needsUpdate = true;
|
|
143
|
+
|
|
144
|
+
// Update mesh.count to include this instance
|
|
145
|
+
if (idx + 1 > this.mesh.count) this.mesh.count = idx + 1;
|
|
146
|
+
this._stats.totalInstances++;
|
|
147
|
+
|
|
148
|
+
return idx;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Release a slot, making it available for reuse
|
|
152
|
+
releaseSlotInBatch(idx) {
|
|
153
|
+
this._freeSlots.push(idx);
|
|
154
|
+
|
|
155
|
+
// Zero out the matrix to hide this instance
|
|
156
|
+
const zero = new THREE.Matrix4().set(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
|
|
157
|
+
if (this._gpuInstanceTex) {
|
|
158
|
+
this.setInstanceTransform(idx, zero);
|
|
159
|
+
} else {
|
|
160
|
+
this.mesh.setMatrixAt(idx, zero);
|
|
161
|
+
this._dirtySlots.add(idx);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Zero the bound sphere
|
|
165
|
+
const o = idx * 4;
|
|
166
|
+
this._boundArray[o] = 0;
|
|
167
|
+
this._boundArray[o+1] = 0;
|
|
168
|
+
this._boundArray[o+2] = 0;
|
|
169
|
+
this._boundArray[o+3] = 0;
|
|
170
|
+
this._boundAttr.needsUpdate = true;
|
|
171
|
+
|
|
172
|
+
this._stats.totalInstances--;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// --- GPU instance transform texture (mirror of InstancedSlot) ------------
|
|
176
|
+
_initInstanceTexture(capacity) {
|
|
177
|
+
this._instTexWidth = capacity * 4; // 4 texels per instance (one mat4)
|
|
178
|
+
this._instTexData = new Float32Array(this._instTexWidth * 4);
|
|
179
|
+
const tex = new THREE.DataTexture(this._instTexData, this._instTexWidth, 1, THREE.RGBAFormat, THREE.FloatType);
|
|
180
|
+
// NearestFilter: we want exact texel reads (no interpolation between mat4
|
|
181
|
+
// columns/instances), AND linear filtering of a float texture needs
|
|
182
|
+
// OES_texture_float_linear which isn't guaranteed -> sampling it raised
|
|
183
|
+
// GL_INVALID_OPERATION (1282). Nearest avoids both problems.
|
|
184
|
+
tex.minFilter = THREE.NearestFilter;
|
|
185
|
+
tex.magFilter = THREE.NearestFilter;
|
|
186
|
+
tex.generateMipmaps = false;
|
|
187
|
+
tex.needsUpdate = true;
|
|
188
|
+
this._instTex = tex;
|
|
189
|
+
if (this._uniforms.instanceTex) {
|
|
190
|
+
this._uniforms.instanceTex.value = tex;
|
|
191
|
+
this._uniforms.instanceTexWidth.value = this._instTexWidth;
|
|
192
|
+
} else {
|
|
193
|
+
this._uniforms.instanceTex = { value: tex };
|
|
194
|
+
this._uniforms.instanceTexWidth = { value: this._instTexWidth };
|
|
195
|
+
}
|
|
196
|
+
this._instTexDirty = false;
|
|
197
|
+
}
|
|
198
|
+
setInstanceTransform(idx, matrix) {
|
|
199
|
+
const e = matrix.elements;
|
|
200
|
+
const base = idx * 16;
|
|
201
|
+
for (let c = 0; c < 4; c++) {
|
|
202
|
+
const o = base + c * 4, m = c * 4;
|
|
203
|
+
this._instTexData[o] = e[m];
|
|
204
|
+
this._instTexData[o + 1] = e[m + 1];
|
|
205
|
+
this._instTexData[o + 2] = e[m + 2];
|
|
206
|
+
this._instTexData[o + 3] = e[m + 3];
|
|
207
|
+
}
|
|
208
|
+
this._instTexDirty = true;
|
|
209
|
+
}
|
|
210
|
+
flushInstanceTexture() {
|
|
211
|
+
if (this._instTexDirty) { this._instTex.needsUpdate = true; this._instTexDirty = false; }
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Update instance matrix
|
|
215
|
+
setMatrixInBatch(idx, matrix) {
|
|
216
|
+
if (this._gpuInstanceTex) { this.setInstanceTransform(idx, matrix); return; }
|
|
217
|
+
this.mesh.setMatrixAt(idx, matrix);
|
|
218
|
+
this._dirtySlots.add(idx);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Update instance bound sphere (for GPU frustum culling)
|
|
222
|
+
setBoundSphereInBatch(idx, cx, cy, cz, r) {
|
|
223
|
+
const o = idx * 4;
|
|
224
|
+
this._boundArray[o] = cx;
|
|
225
|
+
this._boundArray[o+1] = cy;
|
|
226
|
+
this._boundArray[o+2] = cz;
|
|
227
|
+
this._boundArray[o+3] = r;
|
|
228
|
+
this._boundAttr.needsUpdate = true;
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Update LOD index for an instance (when entity switches LOD within batched tier)
|
|
232
|
+
updateLodIndexInBatch(idx, lodIdx) {
|
|
233
|
+
this._lodIndexArray[idx] = lodIdx;
|
|
234
|
+
this._lodIndexAttr.needsUpdate = true;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Flush pending updates to GPU
|
|
238
|
+
// Optimization 2: Only mark needsUpdate if dirty slots exceed threshold (5-10% of capacity)
|
|
239
|
+
flushUpdates() {
|
|
240
|
+
if (this._gpuInstanceTex) { this.flushInstanceTexture(); return; }
|
|
241
|
+
if (this._dirtySlots.size > 0) {
|
|
242
|
+
// ALWAYS flush dirty slots (the old 5%-of-capacity gate skipped the GPU
|
|
243
|
+
// upload for small dirty counts yet cleared _dirtySlots anyway, leaving
|
|
244
|
+
// released/moved instance matrices un-uploaded for frames -> ghost models
|
|
245
|
+
// popping in/out). Still upload only the [min..max] dirty span via
|
|
246
|
+
// updateRange to keep the upload small. THREE r0.184 API.
|
|
247
|
+
const im = this.mesh.instanceMatrix;
|
|
248
|
+
if (im.clearUpdateRanges && im.addUpdateRange) {
|
|
249
|
+
let lo = Infinity, hi = -1;
|
|
250
|
+
for (const s of this._dirtySlots) { if (s < lo) lo = s; if (s > hi) hi = s; }
|
|
251
|
+
const span = (hi - lo + 1) * 16;
|
|
252
|
+
im.clearUpdateRanges();
|
|
253
|
+
if (span > 0 && span < (this.mesh.count || this.capacity) * 16) {
|
|
254
|
+
im.addUpdateRange(lo * 16, span);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
im.needsUpdate = true;
|
|
258
|
+
this._dirtySlots.clear();
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Double batch capacity when full
|
|
263
|
+
_grow(newCap) {
|
|
264
|
+
const old = this.mesh;
|
|
265
|
+
const next = new THREE.InstancedMesh(this.geometry, this.material, newCap);
|
|
266
|
+
next.frustumCulled = false;
|
|
267
|
+
next.instanceMatrix.setUsage(THREE.DynamicDrawUsage);
|
|
268
|
+
next.name = old.name;
|
|
269
|
+
|
|
270
|
+
if (this._gpuInstanceTex) {
|
|
271
|
+
// Grow the instance data texture, preserving existing instance matrices.
|
|
272
|
+
const oldData = this._instTexData;
|
|
273
|
+
this._initInstanceTexture(newCap);
|
|
274
|
+
this._instTexData.set(oldData);
|
|
275
|
+
this._instTex.needsUpdate = true;
|
|
276
|
+
// _initInstanceTexture already re-pointed this._uniforms.instanceTex(.value)
|
|
277
|
+
// which the material's onBeforeCompile captured by reference.
|
|
278
|
+
} else {
|
|
279
|
+
// Copy existing matrices
|
|
280
|
+
const m = new THREE.Matrix4();
|
|
281
|
+
for (let i = 0; i < this._nextSlotIdx; i++) {
|
|
282
|
+
old.getMatrixAt(i, m);
|
|
283
|
+
next.setMatrixAt(i, m);
|
|
284
|
+
}
|
|
285
|
+
next.instanceMatrix.needsUpdate = true;
|
|
286
|
+
}
|
|
287
|
+
next.count = old.count;
|
|
288
|
+
|
|
289
|
+
// Grow bound sphere attribute
|
|
290
|
+
const newBounds = new Float32Array(newCap * 4);
|
|
291
|
+
newBounds.set(this._boundArray);
|
|
292
|
+
this._boundArray = newBounds;
|
|
293
|
+
this._boundAttr = new THREE.InstancedBufferAttribute(newBounds, 4);
|
|
294
|
+
this._boundAttr.setUsage(THREE.DynamicDrawUsage);
|
|
295
|
+
next.geometry.setAttribute('instanceBoundSphere', this._boundAttr);
|
|
296
|
+
|
|
297
|
+
// Grow LOD index attribute
|
|
298
|
+
const newLodIndices = new Uint8Array(newCap);
|
|
299
|
+
newLodIndices.set(this._lodIndexArray);
|
|
300
|
+
this._lodIndexArray = newLodIndices;
|
|
301
|
+
this._lodIndexAttr = new THREE.InstancedBufferAttribute(newLodIndices, 1);
|
|
302
|
+
this._lodIndexAttr.setUsage(THREE.DynamicDrawUsage);
|
|
303
|
+
next.geometry.setAttribute('instanceLodIndex', this._lodIndexAttr);
|
|
304
|
+
|
|
305
|
+
// Replace in parent scene
|
|
306
|
+
const parent = old.parent;
|
|
307
|
+
if (parent) {
|
|
308
|
+
parent.remove(old);
|
|
309
|
+
parent.add(next);
|
|
310
|
+
}
|
|
311
|
+
old.dispose();
|
|
312
|
+
|
|
313
|
+
this.mesh = next;
|
|
314
|
+
this.capacity = newCap;
|
|
315
|
+
this._dirtySlots = new Set();
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
dispose() {
|
|
319
|
+
this.mesh.parent?.remove(this.mesh);
|
|
320
|
+
this.mesh.geometry.dispose();
|
|
321
|
+
this.mesh.material.dispose();
|
|
322
|
+
this.mesh.dispose();
|
|
323
|
+
this.slots.clear();
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
getStats() {
|
|
327
|
+
return {
|
|
328
|
+
...this._stats,
|
|
329
|
+
geometry: this.geoKey,
|
|
330
|
+
capacity: this.capacity,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Wrapper for InstancedSlot that can be batched.
|
|
337
|
+
* Most of the original logic stays the same; when batching is enabled,
|
|
338
|
+
* the slot delegates to its parent batch instead of managing its own mesh.
|
|
339
|
+
*/
|
|
340
|
+
export class BatchedInstancedSlot {
|
|
341
|
+
constructor(pool, batch, asset, meshDescIdx, lodIdx) {
|
|
342
|
+
this.pool = pool;
|
|
343
|
+
this.batch = batch; // parent InstancedBatch
|
|
344
|
+
this.asset = asset;
|
|
345
|
+
this.meshDescIdx = meshDescIdx;
|
|
346
|
+
this.lodIdx = lodIdx;
|
|
347
|
+
this.geometry = batch.geometry;
|
|
348
|
+
this.material = batch.material;
|
|
349
|
+
|
|
350
|
+
// Track which entities are in this slot
|
|
351
|
+
this.slots = new Map(); // entity -> slot index within batch
|
|
352
|
+
this._isBatched = true;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
acquireSlot(entity) {
|
|
356
|
+
// Allocate from the batch
|
|
357
|
+
const idx = this.batch.acquireSlotInBatch(this.lodIdx);
|
|
358
|
+
this.slots.set(entity, idx);
|
|
359
|
+
return idx;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
releaseSlot(entity) {
|
|
363
|
+
const idx = this.slots.get(entity);
|
|
364
|
+
if (idx == null) return;
|
|
365
|
+
this.slots.delete(entity);
|
|
366
|
+
this.batch.releaseSlotInBatch(idx);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
setMatrixForSlot(idx, matrix) {
|
|
370
|
+
this.batch.setMatrixInBatch(idx, matrix);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
setBoundSphereForSlot(idx, cx, cy, cz, r) {
|
|
374
|
+
this.batch.setBoundSphereInBatch(idx, cx, cy, cz, r);
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
flushMatrixUpdates() {
|
|
378
|
+
this.batch.flushUpdates();
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// No-op: batch handles growth
|
|
382
|
+
_grow() {}
|
|
383
|
+
|
|
384
|
+
dispose() {
|
|
385
|
+
// Batches are never disposed individually; only when the batch itself is cleared
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Detect WebGL 2.0 capabilities for advanced batching options
|
|
391
|
+
*/
|
|
392
|
+
export function detectWebGL2Capabilities(gl) {
|
|
393
|
+
const capabilities = {
|
|
394
|
+
version: gl?.getParameter(gl?.VERSION) || 'WebGL 1.0',
|
|
395
|
+
vendor: gl?.getParameter(gl?.VENDOR) || 'unknown',
|
|
396
|
+
renderer: gl?.getParameter(gl?.RENDERER) || 'unknown',
|
|
397
|
+
// Multi-draw-indirect support (OES_draw_elements_base_vertex)
|
|
398
|
+
baseVertex: !!gl?.getExtension('OES_draw_elements_base_vertex'),
|
|
399
|
+
// ANGLE_multi_draw (for optimized multi-draw)
|
|
400
|
+
multiDraw: !!gl?.getExtension('ANGLE_multi_draw'),
|
|
401
|
+
// Instance divisor support (WebGL 2.0 standard)
|
|
402
|
+
instanceDivisor: true, // built-in to WebGL 2.0
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
console.log('[batching] WebGL capabilities:', capabilities);
|
|
406
|
+
return capabilities;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Patch a material's shader to support per-instance LOD selection.
|
|
411
|
+
* The vertex shader receives instanceLodIndex attribute and can use it
|
|
412
|
+
* to select texture variants or adjust shading intensity.
|
|
413
|
+
*/
|
|
414
|
+
function _patchInstancedSlotMaterial(material, uniforms) {
|
|
415
|
+
const prev = material.onBeforeCompile;
|
|
416
|
+
material.onBeforeCompile = (shader) => {
|
|
417
|
+
if (prev) prev(shader);
|
|
418
|
+
shader.uniforms.projViewMatrix = uniforms.projViewMatrix;
|
|
419
|
+
shader.uniforms.cameraPos = { value: new THREE.Vector3() };
|
|
420
|
+
shader.uniforms.lodThresholds = { value: new THREE.Vector4(80, 200, 400, 800) };
|
|
421
|
+
shader.uniforms.fovTanHalf = { value: 0.5 };
|
|
422
|
+
shader.uniforms.viewportHeight = { value: 1080 };
|
|
423
|
+
// GPU instance transform texture (per-instance mat4 as 4 RGBA texels) —
|
|
424
|
+
// present only on per-batch (cloned) materials, never the shared pool one.
|
|
425
|
+
if (uniforms.instanceTex) {
|
|
426
|
+
shader.uniforms.instanceTex = uniforms.instanceTex;
|
|
427
|
+
shader.uniforms.instanceTexWidth = uniforms.instanceTexWidth;
|
|
428
|
+
shader.defines = shader.defines || {};
|
|
429
|
+
shader.defines.USE_GPU_INSTANCE_TEX = '';
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
shader.vertexShader = shader.vertexShader
|
|
433
|
+
.replace(
|
|
434
|
+
'#include <common>',
|
|
435
|
+
`#include <common>
|
|
436
|
+
attribute vec4 instanceBoundSphere;
|
|
437
|
+
attribute float instanceLodIndex;
|
|
438
|
+
uniform mat4 projViewMatrix;
|
|
439
|
+
uniform vec3 cameraPos;
|
|
440
|
+
uniform vec4 lodThresholds;
|
|
441
|
+
uniform float fovTanHalf;
|
|
442
|
+
uniform float viewportHeight;
|
|
443
|
+
varying float vLodIndex;
|
|
444
|
+
#ifdef USE_GPU_INSTANCE_TEX
|
|
445
|
+
uniform sampler2D instanceTex;
|
|
446
|
+
uniform float instanceTexWidth;
|
|
447
|
+
mat4 readInstanceMatrix(int id) {
|
|
448
|
+
float base = float(id) * 4.0;
|
|
449
|
+
vec4 c0 = texture2D(instanceTex, vec2((base + 0.5) / instanceTexWidth, 0.5));
|
|
450
|
+
vec4 c1 = texture2D(instanceTex, vec2((base + 1.5) / instanceTexWidth, 0.5));
|
|
451
|
+
vec4 c2 = texture2D(instanceTex, vec2((base + 2.5) / instanceTexWidth, 0.5));
|
|
452
|
+
vec4 c3 = texture2D(instanceTex, vec2((base + 3.5) / instanceTexWidth, 0.5));
|
|
453
|
+
return mat4(c0, c1, c2, c3);
|
|
454
|
+
}
|
|
455
|
+
#endif`
|
|
456
|
+
)
|
|
457
|
+
.replace(
|
|
458
|
+
'#include <project_vertex>',
|
|
459
|
+
`#ifdef USE_GPU_INSTANCE_TEX
|
|
460
|
+
// mvPosition declared at outer scope (like <project_vertex>) so downstream
|
|
461
|
+
// chunks (fog, etc.) that read it still compile.
|
|
462
|
+
vec4 mvPosition = modelViewMatrix * readInstanceMatrix(gl_InstanceID) * vec4(transformed, 1.0);
|
|
463
|
+
gl_Position = projectionMatrix * mvPosition;
|
|
464
|
+
#else
|
|
465
|
+
#include <project_vertex>
|
|
466
|
+
#endif
|
|
467
|
+
{
|
|
468
|
+
// GPU frustum cull + LOD selection
|
|
469
|
+
vLodIndex = instanceLodIndex; // pass LOD to fragment shader if needed
|
|
470
|
+
|
|
471
|
+
if (instanceBoundSphere.w > 0.0) {
|
|
472
|
+
vec3 c = instanceBoundSphere.xyz;
|
|
473
|
+
float r = instanceBoundSphere.w;
|
|
474
|
+
|
|
475
|
+
// Frustum cull: derive 6 clip-space planes from projViewMatrix
|
|
476
|
+
vec4 row0 = vec4(projViewMatrix[0][0], projViewMatrix[1][0], projViewMatrix[2][0], projViewMatrix[3][0]);
|
|
477
|
+
vec4 row1 = vec4(projViewMatrix[0][1], projViewMatrix[1][1], projViewMatrix[2][1], projViewMatrix[3][1]);
|
|
478
|
+
vec4 row2 = vec4(projViewMatrix[0][2], projViewMatrix[1][2], projViewMatrix[2][2], projViewMatrix[3][2]);
|
|
479
|
+
vec4 row3 = vec4(projViewMatrix[0][3], projViewMatrix[1][3], projViewMatrix[2][3], projViewMatrix[3][3]);
|
|
480
|
+
|
|
481
|
+
vec4 planes[6];
|
|
482
|
+
planes[0] = row3 + row0; // left
|
|
483
|
+
planes[1] = row3 - row0; // right
|
|
484
|
+
planes[2] = row3 + row1; // bottom
|
|
485
|
+
planes[3] = row3 - row1; // top
|
|
486
|
+
planes[4] = row3 + row2; // near
|
|
487
|
+
planes[5] = row3 - row2; // far
|
|
488
|
+
|
|
489
|
+
bool outside = false;
|
|
490
|
+
for (int i = 0; i < 6; i++) {
|
|
491
|
+
vec4 p = planes[i];
|
|
492
|
+
float len = length(p.xyz);
|
|
493
|
+
if (len > 0.0) {
|
|
494
|
+
float d = (dot(p.xyz, c) + p.w) / len;
|
|
495
|
+
if (d < -r) { outside = true; break; }
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
if (outside) {
|
|
500
|
+
gl_Position = vec4(0.0/0.0, 0.0/0.0, 0.0/0.0, 0.0/0.0) * 0.0;
|
|
501
|
+
return;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}`
|
|
505
|
+
);
|
|
506
|
+
};
|
|
507
|
+
material.needsUpdate = true;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Extension to ModelPool to support draw call batching.
|
|
512
|
+
* Call enableBatching(pool) to activate batching for new InstancedSlots.
|
|
513
|
+
* Automatically initializes ANGLE_multi_draw optimizer if available.
|
|
514
|
+
*/
|
|
515
|
+
export function enableDrawCallBatching(pool) {
|
|
516
|
+
// Map: geometry key -> InstancedBatch
|
|
517
|
+
pool._geometryBatches = new Map();
|
|
518
|
+
|
|
519
|
+
// Detect WebGL 2.0 capabilities
|
|
520
|
+
try {
|
|
521
|
+
const canvas = pool.renderer.domElement;
|
|
522
|
+
const gl = canvas.getContext('webgl2') || canvas.getContext('webgl');
|
|
523
|
+
pool._webglCapabilities = detectWebGL2Capabilities(gl);
|
|
524
|
+
} catch (e) {
|
|
525
|
+
console.warn('[batching] Failed to detect WebGL capabilities', e);
|
|
526
|
+
pool._webglCapabilities = { version: 'unknown' };
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
// Initialize ANGLE_multi_draw optimizer for FAR-tier draw call reduction
|
|
530
|
+
// This reduces 120+ per-slot draw calls to 1-3 GPU submissions (+6-10 FPS)
|
|
531
|
+
// Called after batching is enabled so pool has access to _geometryBatches
|
|
532
|
+
if (pool._initializeMultiDraw) {
|
|
533
|
+
pool._initializeMultiDraw();
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
// Replace _getInstancedSlot to use batching
|
|
537
|
+
const originalGetInstancedSlot = pool._getInstancedSlot.bind(pool);
|
|
538
|
+
pool._getInstancedSlot = function(asset, meshDescIdx, lodIdx) {
|
|
539
|
+
const desc = asset.meshLodDescs[meshDescIdx];
|
|
540
|
+
if (!desc) return null;
|
|
541
|
+
const lod = desc.lods[lodIdx];
|
|
542
|
+
if (!lod || (lod.kind || 'textured') !== 'unskinned') return null;
|
|
543
|
+
|
|
544
|
+
const geo = asset.geoCache.get(`${desc.meshIndex}:${desc.primIndex}:${lodIdx}`);
|
|
545
|
+
if (!geo) return null; // not loaded yet
|
|
546
|
+
// Batch key MUST identify the actual geometry. meshIndex:primIndex collides
|
|
547
|
+
// across DISTINCT assets (every asset has a 0:0), which collapsed 900+
|
|
548
|
+
// different models into ~12 batches all drawing one asset's geometry (the
|
|
549
|
+
// "white cluster" / missing-models bug). Key by the resolved geometry's
|
|
550
|
+
// uuid so identical copies of the SAME asset still share a batch (the
|
|
551
|
+
// 1000-clones case) while distinct assets each get their own.
|
|
552
|
+
const geoKey = geo.uuid;
|
|
553
|
+
|
|
554
|
+
// Get or create batch for this geometry
|
|
555
|
+
// MATERIAL GROUPING OPTIMIZATION: Pass global material pool to batch
|
|
556
|
+
let batch = this._geometryBatches.get(geoKey);
|
|
557
|
+
if (!batch) {
|
|
558
|
+
batch = new InstancedBatch(this, geoKey, geo, this._globalMaterialPool);
|
|
559
|
+
this._geometryBatches.set(geoKey, batch);
|
|
560
|
+
this.scene.add(batch.mesh);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// Return a slot within the batch
|
|
564
|
+
const slotKey = `${asset.url}|${meshDescIdx}|${lodIdx}`;
|
|
565
|
+
let slot = batch.slots.get(slotKey);
|
|
566
|
+
if (!slot) {
|
|
567
|
+
slot = new BatchedInstancedSlot(this, batch, asset, meshDescIdx, lodIdx);
|
|
568
|
+
batch.slots.set(slotKey, slot);
|
|
569
|
+
}
|
|
570
|
+
return slot;
|
|
571
|
+
};
|
|
572
|
+
|
|
573
|
+
// Add batching stats to ModelPool stats
|
|
574
|
+
const originalGetStats = pool.getStats ? pool.getStats.bind(pool) : () => ({});
|
|
575
|
+
pool.getStats = function() {
|
|
576
|
+
const stats = originalGetStats();
|
|
577
|
+
const batchStats = Array.from(this._geometryBatches.values()).map(b => b.getStats());
|
|
578
|
+
const totalDrawCalls = batchStats.length;
|
|
579
|
+
const totalInstances = batchStats.reduce((sum, s) => sum + s.totalInstances, 0);
|
|
580
|
+
const totalSavedDrawCalls = batchStats.reduce((sum, s) => sum + s.savedDrawCalls, 0);
|
|
581
|
+
|
|
582
|
+
return {
|
|
583
|
+
...stats,
|
|
584
|
+
batching: {
|
|
585
|
+
enabled: true,
|
|
586
|
+
batches: this._geometryBatches.size,
|
|
587
|
+
totalDrawCalls,
|
|
588
|
+
totalInstances,
|
|
589
|
+
estimatedSavedDrawCalls: totalSavedDrawCalls,
|
|
590
|
+
reduction: totalSavedDrawCalls ? `${Math.round((totalSavedDrawCalls / (totalDrawCalls + totalSavedDrawCalls)) * 100)}%` : '0%',
|
|
591
|
+
},
|
|
592
|
+
};
|
|
593
|
+
};
|
|
594
|
+
|
|
595
|
+
// CRITICAL: flush batched instance matrices to the GPU every frame.
|
|
596
|
+
// Batching replaces _getInstancedSlot so FAR-tier slots live in
|
|
597
|
+
// _geometryBatches, NOT pool._instancedSlots — and pool.update() only flushes
|
|
598
|
+
// _instancedSlots. Without this wrapper the batched matrices are written into
|
|
599
|
+
// CPU-side arrays but never uploaded, so every batched instance stays at its
|
|
600
|
+
// zero/origin matrix (all stacked invisibly at 0,0,0) and the models appear
|
|
601
|
+
// to "vanish, leaving a small group". Wrapping update() to flush each batch
|
|
602
|
+
// after the per-frame matrix writes fixes that.
|
|
603
|
+
const originalUpdate = pool.update.bind(pool);
|
|
604
|
+
pool.update = function() {
|
|
605
|
+
const r = originalUpdate();
|
|
606
|
+
for (const batch of this._geometryBatches.values()) {
|
|
607
|
+
if (batch.flushUpdates) batch.flushUpdates();
|
|
608
|
+
}
|
|
609
|
+
return r;
|
|
610
|
+
};
|
|
611
|
+
|
|
612
|
+
console.log('[batching] Draw call batching enabled (with per-frame batch flush).');
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
export default { InstancedBatch, BatchedInstancedSlot, enableDrawCallBatching, detectWebGL2Capabilities };
|