npm - @codexo/exojs - Versions diffs - 2.1.0 → 2.1.1 - Mend

@codexo/exojs 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +20 -0
package/README.md +1 -3
package/dist/esm/particles/emitters/ParticleOptions.d.ts +11 -0
package/dist/esm/particles/emitters/ParticleOptions.js +11 -0
package/dist/esm/particles/emitters/ParticleOptions.js.map +1 -1
package/dist/esm/rendering/Container.d.ts +1 -1
package/dist/esm/rendering/Container.js +5 -2
package/dist/esm/rendering/Container.js.map +1 -1
package/dist/esm/rendering/webgl2/WebGl2ShaderRuntime.js +7 -0
package/dist/esm/rendering/webgl2/WebGl2ShaderRuntime.js.map +1 -1
package/dist/esm/rendering/webgpu/WebGpuParticleRenderer.js +66 -43
package/dist/esm/rendering/webgpu/WebGpuParticleRenderer.js.map +1 -1
package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.d.ts +2 -6
package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.js +160 -93
package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.js.map +1 -1
package/dist/esm/rendering/webgpu/WebGpuRenderManager.js +50 -39
package/dist/esm/rendering/webgpu/WebGpuRenderManager.js.map +1 -1
package/dist/esm/rendering/webgpu/WebGpuSpriteRenderer.js +75 -32
package/dist/esm/rendering/webgpu/WebGpuSpriteRenderer.js.map +1 -1
package/dist/exo.d.ts +14 -7
package/dist/exo.esm.js +374 -209
package/dist/exo.esm.js.map +1 -1
package/dist/exo.esm.min.js +1 -1
package/dist/exo.esm.min.js.map +1 -1
package/dist/exo.global.js +374 -209
package/dist/exo.global.js.map +1 -1
package/dist/exo.global.min.js +1 -1
package/dist/exo.global.min.js.map +1 -1
package/package.json +3 -4

package/dist/exo.esm.js CHANGED Viewed

@@ -5291,6 +5291,13 @@ function createWebGl2ShaderRuntime(gl) {
             gl.useProgram(null);
         },
         sync: () => {
+            // Bind the program before syncing uniforms. WebGl2RenderManager
+            // does not call bindShader() on the active renderer's shader
+            // during normal draw flow, so sync() is the first entry point
+            // that must establish program binding — otherwise uniform*
+            // targets the wrong (or no) program and the subsequent draw
+            // call fails with "no valid shader program in use".
+            gl.useProgram(program);
             syncUniforms();
         },
         destroy: (shader) => {
@@ -7423,15 +7430,8 @@ function getWebGpuBlendState(blendMode) {
 /// <reference types="@webgpu/types" />
 const primitiveShaderSource = `
-struct TransformUniforms {
-    matrix: mat4x4<f32>,
-};
-@group(0) @binding(0)
-var<uniform> uniforms: TransformUniforms;
 struct VertexInput {
-    @location(0) position: vec2<f32>,
+    @location(0) position: vec4<f32>,
     @location(1) color: vec4<f32>,
 };
@@ -7444,7 +7444,7 @@ struct VertexOutput {
 fn vertexMain(input: VertexInput) -> VertexOutput {
     var output: VertexOutput;
-    output.position = uniforms.matrix * vec4<f32>(input.position, 0.0, 1.0);
+    output.position = input.position;
     output.color = vec4<f32>(input.color.rgb * input.color.a, input.color.a);
     return output;
@@ -7455,23 +7455,24 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
     return input.color;
 }
 `;
-const vertexStrideBytes$1 = 12;
-const transformByteLength = 64;
+// 4 floats (pre-transformed clip-space position) + 1 u32 (color) = 20 bytes.
+// The CPU applies (view * shape.globalTransform) to each vertex before writing
+// it into the vertex buffer, so the shader outputs the position as-is. This
+// matches the sprite renderer's approach and eliminates the need for a per-
+// drawcall uniform binding.
+const vertexStrideBytes$1 = 20;
+const wordsPerVertex$1 = vertexStrideBytes$1 / Uint32Array.BYTES_PER_ELEMENT;
 class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
     constructor() {
         super(...arguments);
         this._combinedTransform = new Matrix();
         this._drawCalls = [];
         this._drawCallCount = 0;
-        this._transformData = new Float32Array(transformByteLength / Float32Array.BYTES_PER_ELEMENT);
         this._pipelines = new Map();
         this._renderManager = null;
         this._device = null;
         this._shaderModule = null;
-        this._bindGroupLayout = null;
         this._pipelineLayout = null;
-        this._uniformBuffer = null;
-        this._bindGroup = null;
         this._vertexBuffer = null;
         this._indexBuffer = null;
         this._vertexBufferCapacity = 0;
@@ -7479,6 +7480,7 @@ class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
         this._vertexData = new ArrayBuffer(0);
         this._float32View = new Float32Array(this._vertexData);
         this._uint32View = new Uint32Array(this._vertexData);
+        this._packedIndexData = new Uint16Array(0);
         this._generatedIndexData = new Uint16Array(0);
         this._sequentialIndexData = new Uint16Array(0);
     }
@@ -7516,60 +7518,121 @@ class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
     flush() {
         const runtime = this._renderManager;
         const device = this._device;
-        const bindGroup = this._bindGroup;
-        const uniformBuffer = this._uniformBuffer;
-        if (!runtime || !device || !bindGroup || !uniformBuffer) {
+        if (!runtime || !device) {
             return;
         }
         if (this._drawCallCount === 0 && !runtime.clearRequested) {
             return;
         }
-        const encoder = device.createCommandEncoder();
-        const pass = encoder.beginRenderPass({
-            colorAttachments: [runtime.createColorAttachment()],
-        });
-        runtime.stats.renderPasses++;
         const scissor = runtime.getScissorRect();
         const maskClipsAll = scissor !== null && (scissor.width <= 0 || scissor.height <= 0);
-        if (scissor !== null && !maskClipsAll) {
-            pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
-        }
-        if (!maskClipsAll) {
+        // Phase 1: resolve drawcalls and record each one's offsets into the
+        // shared packed buffers. Transform gets baked into the vertex data
+        // during phase 2 so no per-drawcall uniform binding is needed.
+        const plan = [];
+        const resolvedDrawCalls = [];
+        let totalVertices = 0;
+        let totalIndices = 0;
+        if (this._drawCallCount > 0 && !maskClipsAll) {
             for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
                 const drawCall = this._drawCalls[drawCallIndex];
                 const shape = drawCall.shape;
-                const vertices = shape.geometry.vertices;
-                const resolvedDrawCall = this._resolveDrawCall(shape);
-                if (resolvedDrawCall === null) {
+                const resolved = this._resolveDrawCall(shape);
+                resolvedDrawCalls.push(resolved);
+                if (resolved === null) {
                     continue;
                 }
                 const pipeline = this._getPipeline({
-                    topology: resolvedDrawCall.topology,
-                    usesStripIndex: resolvedDrawCall.usesStripIndex,
+                    topology: resolved.topology,
+                    usesStripIndex: resolved.usesStripIndex,
                     blendMode: drawCall.blendMode,
                     format: runtime.renderTargetFormat,
                 });
-                this._ensureVertexCapacity(resolvedDrawCall.vertexCount);
-                this._writeVertexData(vertices, shape.color.toRgba());
-                this._writeTransformData(runtime, shape);
-                device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, resolvedDrawCall.vertexCount * vertexStrideBytes$1);
-                device.queue.writeBuffer(uniformBuffer, 0, this._transformData.buffer, this._transformData.byteOffset, this._transformData.byteLength);
-                pass.setPipeline(pipeline);
-                pass.setBindGroup(0, bindGroup);
-                pass.setVertexBuffer(0, this._vertexBuffer);
-                if (resolvedDrawCall.indices !== null && resolvedDrawCall.indexCount > 0) {
-                    this._ensureIndexCapacity(resolvedDrawCall.indexCount);
-                    device.queue.writeBuffer(this._indexBuffer, 0, resolvedDrawCall.indices.buffer, resolvedDrawCall.indices.byteOffset, resolvedDrawCall.indexCount * Uint16Array.BYTES_PER_ELEMENT);
-                    pass.setIndexBuffer(this._indexBuffer, 'uint16');
-                    pass.drawIndexed(resolvedDrawCall.indexCount);
+                plan.push({
+                    pipeline,
+                    vertexByteOffset: totalVertices * vertexStrideBytes$1,
+                    vertexCount: resolved.vertexCount,
+                    indexByteOffset: totalIndices * Uint16Array.BYTES_PER_ELEMENT,
+                    indexCount: resolved.indexCount,
+                });
+                totalVertices += resolved.vertexCount;
+                totalIndices += resolved.indexCount;
+            }
+        }
+        // If nothing will actually render, still honor a pending clear with
+        // a single empty pass so createColorAttachment consumes the clear
+        // state exactly once.
+        if (plan.length === 0) {
+            if (runtime.clearRequested) {
+                const encoder = device.createCommandEncoder();
+                const pass = encoder.beginRenderPass({
+                    colorAttachments: [runtime.createColorAttachment()],
+                });
+                runtime.stats.renderPasses++;
+                pass.end();
+                runtime.submit(encoder.finish());
+            }
+            this._drawCallCount = 0;
+            return;
+        }
+        // Phase 2: size GPU buffers for the whole-frame totals, then pack
+        // every drawcall's CPU-side data. _writeShapeVertices applies
+        // (view * shape.globalTransform) per-vertex so the shader simply
+        // outputs input.position unchanged.
+        this._ensureVertexCapacity(totalVertices);
+        if (totalIndices > 0) {
+            this._ensureIndexCapacity(totalIndices);
+            if (this._packedIndexData.length < totalIndices) {
+                this._packedIndexData = new Uint16Array(Math.max(totalIndices, this._packedIndexData.length === 0 ? 1 : this._packedIndexData.length * 2));
+            }
+        }
+        {
+            let vOffset = 0;
+            let iOffset = 0;
+            for (let i = 0; i < this._drawCallCount; i++) {
+                const resolved = resolvedDrawCalls[i];
+                if (resolved === null) {
+                    continue;
                 }
-                else {
-                    pass.draw(resolvedDrawCall.vertexCount);
+                const drawCall = this._drawCalls[i];
+                const shape = drawCall.shape;
+                this._writeShapeVertices(runtime, shape, vOffset);
+                if (resolved.indices !== null && resolved.indexCount > 0) {
+                    this._packedIndexData.set(resolved.indices.subarray(0, resolved.indexCount), iOffset);
+                    iOffset += resolved.indexCount;
                 }
-                runtime.stats.batches++;
-                runtime.stats.drawCalls++;
+                vOffset += resolved.vertexCount;
             }
         }
+        // Phase 3: single writeBuffer per GPU buffer covers the whole frame.
+        device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, totalVertices * vertexStrideBytes$1);
+        if (totalIndices > 0) {
+            device.queue.writeBuffer(this._indexBuffer, 0, this._packedIndexData.buffer, this._packedIndexData.byteOffset, totalIndices * Uint16Array.BYTES_PER_ELEMENT);
+        }
+        // Phase 4: single render pass. Per-draw state is just pipeline and
+        // vertex/index subrange offsets — the transform has already been
+        // baked into the vertex data.
+        const encoder = device.createCommandEncoder();
+        const pass = encoder.beginRenderPass({
+            colorAttachments: [runtime.createColorAttachment()],
+        });
+        runtime.stats.renderPasses++;
+        if (scissor !== null) {
+            pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
+        }
+        for (const planned of plan) {
+            pass.setPipeline(planned.pipeline);
+            pass.setVertexBuffer(0, this._vertexBuffer, planned.vertexByteOffset);
+            if (planned.indexCount > 0) {
+                pass.setIndexBuffer(this._indexBuffer, 'uint16', planned.indexByteOffset);
+                pass.drawIndexed(planned.indexCount);
+            }
+            else {
+                pass.draw(planned.vertexCount);
+            }
+            runtime.stats.batches++;
+            runtime.stats.drawCalls++;
+        }
         pass.end();
         runtime.submit(encoder.finish());
         this._drawCallCount = 0;
@@ -7582,65 +7645,76 @@ class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
         this._renderManager = runtime;
         this._device = this._renderManager.device;
         this._shaderModule = this._device.createShaderModule({ code: primitiveShaderSource });
-        this._bindGroupLayout = this._device.createBindGroupLayout({
-            entries: [{
-                    binding: 0,
-                    visibility: GPUShaderStage.VERTEX,
-                    buffer: {
-                        type: 'uniform',
-                    },
-                }],
-        });
+        // Transform is applied per-vertex on the CPU, so no uniform binding
+        // is needed — the shader outputs input.position directly.
         this._pipelineLayout = this._device.createPipelineLayout({
-            bindGroupLayouts: [this._bindGroupLayout],
-        });
-        this._uniformBuffer = this._device.createBuffer({
-            size: transformByteLength,
-            usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
-        });
-        this._bindGroup = this._device.createBindGroup({
-            layout: this._bindGroupLayout,
-            entries: [{
-                    binding: 0,
-                    resource: {
-                        buffer: this._uniformBuffer,
-                    },
-                }],
+            bindGroupLayouts: [],
         });
     }
     onDisconnect() {
         this.flush();
         this._destroyBuffers();
         this._pipelines.clear();
-        this._uniformBuffer?.destroy();
-        this._uniformBuffer = null;
-        this._bindGroup = null;
-        this._bindGroupLayout = null;
         this._pipelineLayout = null;
         this._shaderModule = null;
         this._device = null;
         this._renderManager = null;
         this._drawCallCount = 0;
     }
-    _writeTransformData(runtime, shape) {
+    _writeShapeVertices(runtime, shape, vertexStart) {
+        // Matrix.combine is `other * this` (see Matrix.rotate and
+        // SceneNode.getGlobalTransform, both of which chain via
+        // local.combine(parent.global) to yield parent.global * local).
+        //
+        // We need view * global applied to a local vertex, so start with
+        // global and combine with view — that gives
+        // _combinedTransform = view * global.
         const matrix = this._combinedTransform
-            .copy(runtime.view.getTransform())
-            .combine(shape.getGlobalTransform());
-        this._transformData.set([
-            matrix.a, matrix.c, 0, 0,
-            matrix.b, matrix.d, 0, 0,
-            0, 0, 1, 0,
-            matrix.x, matrix.y, 0, matrix.z,
-        ]);
-    }
-    _writeVertexData(vertices, color) {
+            .copy(shape.getGlobalTransform())
+            .combine(runtime.view.getTransform());
+        // Match the original uniform-based WGSL layout exactly.
+        //
+        // The shader packs the Matrix's 9 fields into a 4x4 mat (column-major
+        // in WGSL):
+        //   col 0 = [a, c, 0, 0]
+        //   col 1 = [b, d, 0, 0]
+        //   col 2 = [0, 0, 1, 0]
+        //   col 3 = [x, y, 0, z]
+        //
+        // Multiplied by vec4(px, py, 0, 1):
+        //   out = col0*px + col1*py + col2*0 + col3*1
+        //   out.x = a*px + b*py + x
+        //   out.y = c*px + d*py + y
+        //   out.z = 0
+        //   out.w = z
+        //
+        // The Matrix class represents the affine matrix in the order
+        //   [a b x]
+        //   [c d y]
+        //   [e f z]
+        // so a/b/c/d are rotation+scale (note: b on the TOP row, c on the
+        // LEFT column, not the other way around) and x/y/z the translation /
+        // w component. Matrix.toArray(false) confirms this layout.
+        const a = matrix.a;
+        const b = matrix.b;
+        const c = matrix.c;
+        const d = matrix.d;
+        const tx = matrix.x;
+        const ty = matrix.y;
+        const tw = matrix.z;
+        const color = shape.color.toRgba();
+        const vertices = shape.geometry.vertices;
         const vertexCount = vertices.length / 2;
         for (let i = 0; i < vertexCount; i++) {
             const sourceIndex = i * 2;
-            const targetIndex = i * 3;
-            this._float32View[targetIndex] = vertices[sourceIndex];
-            this._float32View[targetIndex + 1] = vertices[sourceIndex + 1];
-            this._uint32View[targetIndex + 2] = color;
+            const targetIndex = (vertexStart + i) * wordsPerVertex$1;
+            const px = vertices[sourceIndex];
+            const py = vertices[sourceIndex + 1];
+            this._float32View[targetIndex + 0] = a * px + b * py + tx;
+            this._float32View[targetIndex + 1] = c * px + d * py + ty;
+            this._float32View[targetIndex + 2] = 0;
+            this._float32View[targetIndex + 3] = tw;
+            this._uint32View[targetIndex + 4] = color;
         }
     }
     _ensureVertexCapacity(vertexCount) {
@@ -7687,10 +7761,10 @@ class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
                         attributes: [{
                                 shaderLocation: 0,
                                 offset: 0,
-                                format: 'float32x2',
+                                format: 'float32x4',
                             }, {
                                 shaderLocation: 1,
-                                offset: 8,
+                                offset: 16,
                                 format: 'unorm8x4',
                             }],
                     }],
@@ -7844,7 +7918,7 @@ const spriteShaderSource = `
 struct ProjectionUniforms {
     matrix: mat4x4<f32>,
 };
 @group(0) @binding(0)
 var<uniform> projection: ProjectionUniforms;
@@ -7881,7 +7955,7 @@ var spriteSampler5: sampler;
 var spriteSampler6: sampler;
 @group(1) @binding(15)
 var spriteSampler7: sampler;
 struct VertexInput {
     @location(0) position: vec2<f32>,
     @location(1) texcoord: vec2<f32>,
@@ -7889,7 +7963,7 @@ struct VertexInput {
     @location(3) premultiplySample: u32,
     @location(4) textureSlot: u32,
 };
 struct VertexOutput {
     @builtin(position) position: vec4<f32>,
     @location(0) texcoord: vec2<f32>,
@@ -7897,12 +7971,12 @@ struct VertexOutput {
     @location(2) @interpolate(flat) premultiplySample: u32,
     @location(3) @interpolate(flat) textureSlot: u32,
 };
-@vertex
-fn vertexMain(input: VertexInput) -> VertexOutput {
-    var output: VertexOutput;
-    output.position = projection.matrix * vec4<f32>(input.position, 0.0, 1.0);
+@vertex
+fn vertexMain(input: VertexInput) -> VertexOutput {
+    var output: VertexOutput;
+    output.position = projection.matrix * vec4<f32>(input.position, 0.0, 1.0);
     output.texcoord = input.texcoord;
     output.color = vec4(input.color.rgb * input.color.a, input.color.a);
     output.premultiplySample = input.premultiplySample;
@@ -7911,38 +7985,46 @@ fn vertexMain(input: VertexInput) -> VertexOutput {
     return output;
 }
-fn sampleTexture(slot: u32, uv: vec2<f32>) -> vec4<f32> {
+fn sampleTexture(slot: u32, uv: vec2<f32>, ddx: vec2<f32>, ddy: vec2<f32>) -> vec4<f32> {
     switch slot {
         case 0u: {
-            return textureSample(spriteTexture0, spriteSampler0, uv);
+            return textureSampleGrad(spriteTexture0, spriteSampler0, uv, ddx, ddy);
         }
         case 1u: {
-            return textureSample(spriteTexture1, spriteSampler1, uv);
+            return textureSampleGrad(spriteTexture1, spriteSampler1, uv, ddx, ddy);
         }
         case 2u: {
-            return textureSample(spriteTexture2, spriteSampler2, uv);
+            return textureSampleGrad(spriteTexture2, spriteSampler2, uv, ddx, ddy);
         }
         case 3u: {
-            return textureSample(spriteTexture3, spriteSampler3, uv);
+            return textureSampleGrad(spriteTexture3, spriteSampler3, uv, ddx, ddy);
         }
         case 4u: {
-            return textureSample(spriteTexture4, spriteSampler4, uv);
+            return textureSampleGrad(spriteTexture4, spriteSampler4, uv, ddx, ddy);
         }
         case 5u: {
-            return textureSample(spriteTexture5, spriteSampler5, uv);
+            return textureSampleGrad(spriteTexture5, spriteSampler5, uv, ddx, ddy);
         }
         case 6u: {
-            return textureSample(spriteTexture6, spriteSampler6, uv);
+            return textureSampleGrad(spriteTexture6, spriteSampler6, uv, ddx, ddy);
         }
         default: {
-            return textureSample(spriteTexture7, spriteSampler7, uv);
+            return textureSampleGrad(spriteTexture7, spriteSampler7, uv, ddx, ddy);
         }
     }
 }
 @fragment
 fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
-    let sample = sampleTexture(input.textureSlot, input.texcoord);
+    // Compute screen-space derivatives in uniform control flow before the
+    // per-slot switch. WGSL requires textureSample (implicit LOD) to run in
+    // uniform control flow, which multi-texture batching breaks because the
+    // slot varies per fragment. textureSampleGrad takes explicit derivatives
+    // and is valid regardless of control-flow uniformity, while preserving
+    // mipmap-correct LOD when sprites use mipmapped textures.
+    let ddx = dpdx(input.texcoord);
+    let ddy = dpdy(input.texcoord);
+    let sample = sampleTexture(input.textureSlot, input.texcoord, ddx, ddy);
     let resolvedSample = select(sample, vec4(sample.rgb * sample.a, sample.a), input.premultiplySample == 1u);
     return resolvedSample * input.color;
@@ -8092,6 +8174,44 @@ class WebGpuSpriteRenderer extends AbstractWebGpuRenderer {
         if (this._drawCallCount === 0 && !renderManager.clearRequested) {
             return;
         }
+        // Grow vertex/index buffers up front for the TOTAL sprite count. Two
+        // reasons this must happen before the render pass begins:
+        //   1. _ensureBatchCapacity destroys old buffers and creates new ones
+        //      when capacity grows, so running it after setVertexBuffer /
+        //      setIndexBuffer would leave the pass bound to destroyed buffers.
+        //   2. All batches are packed into the vertex buffer at distinct
+        //      sprite offsets, so the buffer must hold every sprite in the
+        //      flush, not just one batch worth.
+        if (this._drawCallCount > 0) {
+            this._ensureBatchCapacity(this._drawCallCount);
+        }
+        // Walk the batches once, packing each batch's vertex data into the
+        // CPU-side buffer at its own sprite-aligned offset. Each batch's
+        // metadata is recorded for the draw loop below.
+        //
+        // This replaces an earlier per-batch queue.writeBuffer(..., offset: 0)
+        // pattern where every writeBuffer targeted the same GPU offset. All
+        // writeBuffers in a frame execute before queue.submit(commandBuffer),
+        // so only the last batch's vertex data survived — which meant any
+        // flush containing more than one batch rendered every batch using
+        // the LAST batch's vertices (background vanished, sprites duplicated
+        // at wrong sizes, etc. whenever blend mode / texture slot / pipeline
+        // caused a split into multiple batches).
+        const batchPlan = [];
+        let packedSpriteCount = 0;
+        for (let start = 0; start < this._drawCallCount;) {
+            const batch = this._getBatchRange(start);
+            const spriteCount = batch.end - batch.start;
+            this._writeBatchVertexData(batch, packedSpriteCount);
+            batchPlan.push({
+                firstSprite: packedSpriteCount,
+                spriteCount,
+                blendMode: batch.blendMode,
+                textures: batch.textures,
+            });
+            packedSpriteCount += spriteCount;
+            start = batch.end;
+        }
         const viewMatrix = renderManager.view.getTransform();
         this._projectionData.set([
             viewMatrix.a, viewMatrix.c, 0, 0,
@@ -8111,23 +8231,20 @@ class WebGpuSpriteRenderer extends AbstractWebGpuRenderer {
             pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
         }
         if (this._drawCallCount > 0 && !maskClipsAll) {
+            // Single upload for the whole packed vertex buffer — every batch
+            // reads from its own sprite range via drawIndexed's firstIndex.
+            device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, packedSpriteCount * spriteVertexCount * vertexStrideBytes);
             pass.setBindGroup(0, uniformBindGroup);
             pass.setVertexBuffer(0, this._vertexBuffer);
             pass.setIndexBuffer(this._indexBuffer, 'uint32');
-            for (let start = 0; start < this._drawCallCount;) {
-                const batch = this._getBatchRange(start);
-                const pipeline = this._getPipeline(batch.blendMode, renderManager.renderTargetFormat);
-                const spriteCount = batch.end - batch.start;
-                this._ensureBatchCapacity(spriteCount);
-                this._writeBatchVertexData(batch);
-                device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, spriteCount * spriteVertexCount * vertexStrideBytes);
-                const textureBindGroup = this._createTextureBindGroup(device, renderManager, batch.textures);
+            for (const plan of batchPlan) {
+                const pipeline = this._getPipeline(plan.blendMode, renderManager.renderTargetFormat);
+                const textureBindGroup = this._createTextureBindGroup(device, renderManager, plan.textures);
                 pass.setPipeline(pipeline);
                 pass.setBindGroup(1, textureBindGroup);
-                pass.drawIndexed(batch.spriteCount * spriteIndexCount, 1, 0, 0, 0);
+                pass.drawIndexed(plan.spriteCount * spriteIndexCount, 1, plan.firstSprite * spriteIndexCount, 0, 0);
                 renderManager.stats.batches++;
                 renderManager.stats.drawCalls++;
-                start = batch.end;
             }
         }
         pass.end();
@@ -8152,7 +8269,7 @@ class WebGpuSpriteRenderer extends AbstractWebGpuRenderer {
         });
         const indexData = new Uint32Array(nextCapacity * spriteIndexCount);
         const indexBuffer = this._device.createBuffer({
-            size: indexData.byteLength * Uint32Array.BYTES_PER_ELEMENT,
+            size: indexData.byteLength,
             usage: GPUBufferUsage.INDEX | GPUBufferUsage.COPY_DST,
         });
         for (let spriteIndex = 0; spriteIndex < nextCapacity; spriteIndex++) {
@@ -8175,12 +8292,12 @@ class WebGpuSpriteRenderer extends AbstractWebGpuRenderer {
         this._vertexBuffer = vertexBuffer;
         this._indexBuffer = indexBuffer;
     }
-    _writeBatchVertexData(batch) {
+    _writeBatchVertexData(batch, firstSprite) {
         const renderManager = this._renderManager;
         if (!renderManager) {
             return;
         }
-        let vertexOffset = 0;
+        let vertexOffset = firstSprite * spriteVertexCount * wordsPerVertex;
         for (let drawCallIndex = batch.start; drawCallIndex < batch.end; drawCallIndex++) {
             const drawCall = this._drawCalls[drawCallIndex];
             const textureSlot = batch.textureSlots.get(drawCall.texture) ?? 0;
@@ -8462,54 +8579,77 @@ class WebGpuParticleRenderer extends AbstractWebGpuRenderer {
         if (this._drawCallCount === 0 && !runtime.clearRequested) {
             return;
         }
-        const encoder = device.createCommandEncoder();
-        const pass = encoder.beginRenderPass({
-            colorAttachments: [runtime.createColorAttachment()],
-        });
-        runtime.stats.renderPasses++;
         const scissor = runtime.getScissorRect();
         const maskClipsAll = scissor !== null && (scissor.width <= 0 || scissor.height <= 0);
-        if (scissor !== null && !maskClipsAll) {
-            pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
-        }
-        if (!maskClipsAll) {
-            pass.setBindGroup(0, uniformBindGroup);
-            for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
-                const drawCall = this._drawCalls[drawCallIndex];
-                const system = drawCall.system;
-                const particleCount = system.particles.length;
-                if (particleCount === 0) {
-                    continue;
-                }
-                const pipeline = this._getPipeline(drawCall.blendMode, runtime.renderTargetFormat);
-                const textureBinding = runtime.getTextureBinding(drawCall.texture);
-                const textureBindGroup = device.createBindGroup({
-                    layout: this._textureBindGroupLayout,
-                    entries: [{
-                            binding: 0,
-                            resource: textureBinding.view,
-                        }, {
-                            binding: 1,
-                            resource: textureBinding.sampler,
-                        }],
+        // If no drawcalls will actually render (none queued, or the scissor
+        // clips everything), but a clear is pending, open a single empty
+        // pass so createColorAttachment consumes the clear state.
+        if (this._drawCallCount === 0 || maskClipsAll) {
+            if (runtime.clearRequested) {
+                const encoder = device.createCommandEncoder();
+                const pass = encoder.beginRenderPass({
+                    colorAttachments: [runtime.createColorAttachment()],
                 });
-                this._ensureCapacity(particleCount);
-                this._writeInstanceData(system.vertices, system.texCoords, system.particles);
-                this._writeUniformData(runtime, system, drawCall.texture);
-                device.queue.writeBuffer(this._instanceBuffer, 0, this._instanceData, 0, particleCount * instanceStrideBytes);
-                device.queue.writeBuffer(uniformBuffer, 0, this._uniformData.buffer, this._uniformData.byteOffset, this._uniformData.byteLength);
-                pass.setPipeline(pipeline);
-                pass.setBindGroup(1, textureBindGroup);
-                pass.setVertexBuffer(0, staticVertexBuffer);
-                pass.setVertexBuffer(1, this._instanceBuffer);
-                pass.setIndexBuffer(indexBuffer, 'uint16');
-                pass.drawIndexed(indicesPerParticle, particleCount, 0, 0, 0);
-                runtime.stats.batches++;
-                runtime.stats.drawCalls++;
+                runtime.stats.renderPasses++;
+                pass.end();
+                runtime.submit(encoder.finish());
             }
+            this._drawCallCount = 0;
+            return;
+        }
+        // One command encoder / pass per drawcall. Each particle system's
+        // queue.writeBuffer calls target offset 0 of the instance and uniform
+        // buffers — a single pass with multiple systems would see all
+        // writeBuffers serialize before submit, leaving only the last
+        // system's data in those buffers and making every earlier draw read
+        // the wrong data. Also: _ensureCapacity may destroy and recreate the
+        // instance buffer on growth; keeping one drawcall per pass means
+        // that destroy happens strictly between submits, so no pass holds a
+        // reference to a buffer that has since been destroyed.
+        for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
+            const drawCall = this._drawCalls[drawCallIndex];
+            const system = drawCall.system;
+            const particleCount = system.particles.length;
+            if (particleCount === 0) {
+                continue;
+            }
+            const pipeline = this._getPipeline(drawCall.blendMode, runtime.renderTargetFormat);
+            const textureBinding = runtime.getTextureBinding(drawCall.texture);
+            const textureBindGroup = device.createBindGroup({
+                layout: this._textureBindGroupLayout,
+                entries: [{
+                        binding: 0,
+                        resource: textureBinding.view,
+                    }, {
+                        binding: 1,
+                        resource: textureBinding.sampler,
+                    }],
+            });
+            this._ensureCapacity(particleCount);
+            this._writeInstanceData(system.vertices, system.texCoords, system.particles);
+            this._writeUniformData(runtime, system, drawCall.texture);
+            device.queue.writeBuffer(this._instanceBuffer, 0, this._instanceData, 0, particleCount * instanceStrideBytes);
+            device.queue.writeBuffer(uniformBuffer, 0, this._uniformData.buffer, this._uniformData.byteOffset, this._uniformData.byteLength);
+            const encoder = device.createCommandEncoder();
+            const pass = encoder.beginRenderPass({
+                colorAttachments: [runtime.createColorAttachment()],
+            });
+            runtime.stats.renderPasses++;
+            if (scissor !== null) {
+                pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
+            }
+            pass.setBindGroup(0, uniformBindGroup);
+            pass.setPipeline(pipeline);
+            pass.setBindGroup(1, textureBindGroup);
+            pass.setVertexBuffer(0, staticVertexBuffer);
+            pass.setVertexBuffer(1, this._instanceBuffer);
+            pass.setIndexBuffer(indexBuffer, 'uint16');
+            pass.drawIndexed(indicesPerParticle, particleCount, 0, 0, 0);
+            runtime.stats.batches++;
+            runtime.stats.drawCalls++;
+            pass.end();
+            runtime.submit(encoder.finish());
         }
-        pass.end();
-        runtime.submit(encoder.finish());
         this._drawCallCount = 0;
     }
     destroy() {
@@ -9092,10 +9232,11 @@ class WebGpuRenderManager {
         if (typeof gpuNavigator.gpu.getPreferredCanvasFormat !== 'function') {
             throw new Error('WebGPU is available, but navigator.gpu.getPreferredCanvasFormat is not implemented.');
         }
-        const context = this._canvas.getContext('webgpu');
-        if (context === null) {
-            throw new Error('Could not create WebGPU canvas context.');
-        }
+        // Request the adapter before acquiring a WebGPU canvas context.
+        // getContext('webgpu') is exclusive per canvas — once it succeeds, the
+        // same canvas can no longer produce a WebGL2 context. Doing it the
+        // other way round means an unavailable adapter still locks the canvas
+        // and breaks the automatic WebGL2 fallback in Application.
         let adapter = null;
         try {
             adapter = await gpuNavigator.gpu.requestAdapter();
@@ -9106,6 +9247,10 @@ class WebGpuRenderManager {
         if (adapter === null) {
             throw new Error('Could not acquire a WebGPU adapter.');
         }
+        const context = this._canvas.getContext('webgpu');
+        if (context === null) {
+            throw new Error('Could not create WebGPU canvas context.');
+        }
         if (typeof adapter.requestDevice !== 'function') {
             throw new Error('WebGPU adapter does not expose requestDevice().');
         }
@@ -9396,41 +9541,47 @@ class WebGpuRenderManager {
     _getMipmapResources() {
         if (this._mipmapShaderModule === null || this._mipmapBindGroupLayout === null || this._mipmapPipelineLayout === null || this._mipmapPipeline === null || this._mipmapSampler === null) {
             this._mipmapShaderModule = this.device.createShaderModule({
-                code: `
-struct VertexOutput {
-    @builtin(position) position: vec4<f32>,
-    @location(0) texcoord: vec2<f32>,
-};
-@group(0) @binding(0)
-var sourceTexture: texture_2d<f32>;
-@group(0) @binding(1)
-var sourceSampler: sampler;
-@vertex
-fn vertexMain(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
-    var positions = array<vec2<f32>, 3>(
-        vec2<f32>(-1.0, -1.0),
-        vec2<f32>(3.0, -1.0),
-        vec2<f32>(-1.0, 3.0)
-    );
-    var texcoords = array<vec2<f32>, 3>(
-        vec2<f32>(0.0, 0.0),
-        vec2<f32>(2.0, 0.0),
-        vec2<f32>(0.0, 2.0)
-    );
-    var output: VertexOutput;
-    output.position = vec4<f32>(positions[vertexIndex], 0.0, 1.0);
-    output.texcoord = texcoords[vertexIndex];
-    return output;
-}
-@fragment
-fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
-    return textureSample(sourceTexture, sourceSampler, input.texcoord);
-}
+                code: `
+struct VertexOutput {
+    @builtin(position) position: vec4<f32>,
+    @location(0) texcoord: vec2<f32>,
+};
+@group(0) @binding(0)
+var sourceTexture: texture_2d<f32>;
+@group(0) @binding(1)
+var sourceSampler: sampler;
+@vertex
+fn vertexMain(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
+    var positions = array<vec2<f32>, 3>(
+        vec2<f32>(-1.0, -1.0),
+        vec2<f32>(3.0, -1.0),
+        vec2<f32>(-1.0, 3.0)
+    );
+    // Y is flipped vs the position array: NDC Y points up, but texture UV
+    // Y points down (UV (0,0) is the top-left of the source). Matching the
+    // two ensures that the output texture's top-left pixel samples from the
+    // source's top-left, so every mip level has the same orientation as the
+    // level above it. Prior to this, odd mip levels were rendered upside
+    // down, producing visible texture flips at view-size doublings.
+    var texcoords = array<vec2<f32>, 3>(
+        vec2<f32>(0.0, 1.0),
+        vec2<f32>(2.0, 1.0),
+        vec2<f32>(0.0, -1.0)
+    );
+    var output: VertexOutput;
+    output.position = vec4<f32>(positions[vertexIndex], 0.0, 1.0);
+    output.texcoord = texcoords[vertexIndex];
+    return output;
+}
+@fragment
+fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
+    return textureSample(sourceTexture, sourceSampler, input.texcoord);
+}
 `,
             });
             this._mipmapBindGroupLayout = this.device.createBindGroupLayout({
@@ -13037,8 +13188,11 @@ class Container extends RenderNode {
     get bottom() {
         return (this.y + this.height - this.origin.y);
     }
-    addChild(child) {
-        return this.addChildAt(child, this._children.length);
+    addChild(...children) {
+        for (const child of children) {
+            this.addChildAt(child, this._children.length);
+        }
+        return this;
     }
     addChildAt(child, index) {
         if (index < 0 || index > this._children.length) {
@@ -15155,6 +15309,17 @@ class ParticleOptions {
     set elapsedLifetime(elapsedLifetime) {
         this._elapsedLifetime.copy(elapsedLifetime);
     }
+    /**
+     * Spawn position for particles emitted with these options, expressed in
+     * the owning ParticleSystem's LOCAL coordinate space — the system's own
+     * `getGlobalTransform()` is applied on top during rendering (both the
+     * WebGL2 and WebGPU shaders do `projection * translation * rotated`).
+     *
+     * Setting a world-space value here (e.g. `system.x + offset`) will
+     * double-translate the emitter because the shader will translate again.
+     * For an emitter anchored at the system origin, use small offsets around
+     * `(0, 0)` and position the system itself via `system.setPosition(...)`.
+     */
     get position() {
         return this._position;
     }