@codexo/exojs 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/README.md +1 -3
  3. package/dist/esm/particles/emitters/ParticleOptions.d.ts +11 -0
  4. package/dist/esm/particles/emitters/ParticleOptions.js +11 -0
  5. package/dist/esm/particles/emitters/ParticleOptions.js.map +1 -1
  6. package/dist/esm/rendering/Container.d.ts +1 -1
  7. package/dist/esm/rendering/Container.js +5 -2
  8. package/dist/esm/rendering/Container.js.map +1 -1
  9. package/dist/esm/rendering/webgl2/WebGl2ShaderRuntime.js +7 -0
  10. package/dist/esm/rendering/webgl2/WebGl2ShaderRuntime.js.map +1 -1
  11. package/dist/esm/rendering/webgpu/WebGpuParticleRenderer.js +66 -43
  12. package/dist/esm/rendering/webgpu/WebGpuParticleRenderer.js.map +1 -1
  13. package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.d.ts +2 -6
  14. package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.js +160 -93
  15. package/dist/esm/rendering/webgpu/WebGpuPrimitiveRenderer.js.map +1 -1
  16. package/dist/esm/rendering/webgpu/WebGpuRenderManager.js +50 -39
  17. package/dist/esm/rendering/webgpu/WebGpuRenderManager.js.map +1 -1
  18. package/dist/esm/rendering/webgpu/WebGpuSpriteRenderer.js +75 -32
  19. package/dist/esm/rendering/webgpu/WebGpuSpriteRenderer.js.map +1 -1
  20. package/dist/exo.d.ts +14 -7
  21. package/dist/exo.esm.js +374 -209
  22. package/dist/exo.esm.js.map +1 -1
  23. package/dist/exo.esm.min.js +1 -1
  24. package/dist/exo.esm.min.js.map +1 -1
  25. package/dist/exo.global.js +374 -209
  26. package/dist/exo.global.js.map +1 -1
  27. package/dist/exo.global.min.js +1 -1
  28. package/dist/exo.global.min.js.map +1 -1
  29. package/package.json +3 -4
@@ -5294,6 +5294,13 @@ var Exo = (function (exports) {
5294
5294
  gl.useProgram(null);
5295
5295
  },
5296
5296
  sync: () => {
5297
+ // Bind the program before syncing uniforms. WebGl2RenderManager
5298
+ // does not call bindShader() on the active renderer's shader
5299
+ // during normal draw flow, so sync() is the first entry point
5300
+ // that must establish program binding — otherwise uniform*
5301
+ // targets the wrong (or no) program and the subsequent draw
5302
+ // call fails with "no valid shader program in use".
5303
+ gl.useProgram(program);
5297
5304
  syncUniforms();
5298
5305
  },
5299
5306
  destroy: (shader) => {
@@ -7426,15 +7433,8 @@ var Exo = (function (exports) {
7426
7433
 
7427
7434
  /// <reference types="@webgpu/types" />
7428
7435
  const primitiveShaderSource = `
7429
- struct TransformUniforms {
7430
- matrix: mat4x4<f32>,
7431
- };
7432
-
7433
- @group(0) @binding(0)
7434
- var<uniform> uniforms: TransformUniforms;
7435
-
7436
7436
  struct VertexInput {
7437
- @location(0) position: vec2<f32>,
7437
+ @location(0) position: vec4<f32>,
7438
7438
  @location(1) color: vec4<f32>,
7439
7439
  };
7440
7440
 
@@ -7447,7 +7447,7 @@ struct VertexOutput {
7447
7447
  fn vertexMain(input: VertexInput) -> VertexOutput {
7448
7448
  var output: VertexOutput;
7449
7449
 
7450
- output.position = uniforms.matrix * vec4<f32>(input.position, 0.0, 1.0);
7450
+ output.position = input.position;
7451
7451
  output.color = vec4<f32>(input.color.rgb * input.color.a, input.color.a);
7452
7452
 
7453
7453
  return output;
@@ -7458,23 +7458,24 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7458
7458
  return input.color;
7459
7459
  }
7460
7460
  `;
7461
- const vertexStrideBytes$1 = 12;
7462
- const transformByteLength = 64;
7461
+ // 4 floats (pre-transformed clip-space position) + 1 u32 (color) = 20 bytes.
7462
+ // The CPU applies (view * shape.globalTransform) to each vertex before writing
7463
+ // it into the vertex buffer, so the shader outputs the position as-is. This
7464
+ // matches the sprite renderer's approach and eliminates the need for a per-
7465
+ // drawcall uniform binding.
7466
+ const vertexStrideBytes$1 = 20;
7467
+ const wordsPerVertex$1 = vertexStrideBytes$1 / Uint32Array.BYTES_PER_ELEMENT;
7463
7468
  class WebGpuPrimitiveRenderer extends AbstractWebGpuRenderer {
7464
7469
  constructor() {
7465
7470
  super(...arguments);
7466
7471
  this._combinedTransform = new Matrix();
7467
7472
  this._drawCalls = [];
7468
7473
  this._drawCallCount = 0;
7469
- this._transformData = new Float32Array(transformByteLength / Float32Array.BYTES_PER_ELEMENT);
7470
7474
  this._pipelines = new Map();
7471
7475
  this._renderManager = null;
7472
7476
  this._device = null;
7473
7477
  this._shaderModule = null;
7474
- this._bindGroupLayout = null;
7475
7478
  this._pipelineLayout = null;
7476
- this._uniformBuffer = null;
7477
- this._bindGroup = null;
7478
7479
  this._vertexBuffer = null;
7479
7480
  this._indexBuffer = null;
7480
7481
  this._vertexBufferCapacity = 0;
@@ -7482,6 +7483,7 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7482
7483
  this._vertexData = new ArrayBuffer(0);
7483
7484
  this._float32View = new Float32Array(this._vertexData);
7484
7485
  this._uint32View = new Uint32Array(this._vertexData);
7486
+ this._packedIndexData = new Uint16Array(0);
7485
7487
  this._generatedIndexData = new Uint16Array(0);
7486
7488
  this._sequentialIndexData = new Uint16Array(0);
7487
7489
  }
@@ -7519,60 +7521,121 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7519
7521
  flush() {
7520
7522
  const runtime = this._renderManager;
7521
7523
  const device = this._device;
7522
- const bindGroup = this._bindGroup;
7523
- const uniformBuffer = this._uniformBuffer;
7524
- if (!runtime || !device || !bindGroup || !uniformBuffer) {
7524
+ if (!runtime || !device) {
7525
7525
  return;
7526
7526
  }
7527
7527
  if (this._drawCallCount === 0 && !runtime.clearRequested) {
7528
7528
  return;
7529
7529
  }
7530
- const encoder = device.createCommandEncoder();
7531
- const pass = encoder.beginRenderPass({
7532
- colorAttachments: [runtime.createColorAttachment()],
7533
- });
7534
- runtime.stats.renderPasses++;
7535
7530
  const scissor = runtime.getScissorRect();
7536
7531
  const maskClipsAll = scissor !== null && (scissor.width <= 0 || scissor.height <= 0);
7537
- if (scissor !== null && !maskClipsAll) {
7538
- pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
7539
- }
7540
- if (!maskClipsAll) {
7532
+ // Phase 1: resolve drawcalls and record each one's offsets into the
7533
+ // shared packed buffers. Transform gets baked into the vertex data
7534
+ // during phase 2 so no per-drawcall uniform binding is needed.
7535
+ const plan = [];
7536
+ const resolvedDrawCalls = [];
7537
+ let totalVertices = 0;
7538
+ let totalIndices = 0;
7539
+ if (this._drawCallCount > 0 && !maskClipsAll) {
7541
7540
  for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
7542
7541
  const drawCall = this._drawCalls[drawCallIndex];
7543
7542
  const shape = drawCall.shape;
7544
- const vertices = shape.geometry.vertices;
7545
- const resolvedDrawCall = this._resolveDrawCall(shape);
7546
- if (resolvedDrawCall === null) {
7543
+ const resolved = this._resolveDrawCall(shape);
7544
+ resolvedDrawCalls.push(resolved);
7545
+ if (resolved === null) {
7547
7546
  continue;
7548
7547
  }
7549
7548
  const pipeline = this._getPipeline({
7550
- topology: resolvedDrawCall.topology,
7551
- usesStripIndex: resolvedDrawCall.usesStripIndex,
7549
+ topology: resolved.topology,
7550
+ usesStripIndex: resolved.usesStripIndex,
7552
7551
  blendMode: drawCall.blendMode,
7553
7552
  format: runtime.renderTargetFormat,
7554
7553
  });
7555
- this._ensureVertexCapacity(resolvedDrawCall.vertexCount);
7556
- this._writeVertexData(vertices, shape.color.toRgba());
7557
- this._writeTransformData(runtime, shape);
7558
- device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, resolvedDrawCall.vertexCount * vertexStrideBytes$1);
7559
- device.queue.writeBuffer(uniformBuffer, 0, this._transformData.buffer, this._transformData.byteOffset, this._transformData.byteLength);
7560
- pass.setPipeline(pipeline);
7561
- pass.setBindGroup(0, bindGroup);
7562
- pass.setVertexBuffer(0, this._vertexBuffer);
7563
- if (resolvedDrawCall.indices !== null && resolvedDrawCall.indexCount > 0) {
7564
- this._ensureIndexCapacity(resolvedDrawCall.indexCount);
7565
- device.queue.writeBuffer(this._indexBuffer, 0, resolvedDrawCall.indices.buffer, resolvedDrawCall.indices.byteOffset, resolvedDrawCall.indexCount * Uint16Array.BYTES_PER_ELEMENT);
7566
- pass.setIndexBuffer(this._indexBuffer, 'uint16');
7567
- pass.drawIndexed(resolvedDrawCall.indexCount);
7554
+ plan.push({
7555
+ pipeline,
7556
+ vertexByteOffset: totalVertices * vertexStrideBytes$1,
7557
+ vertexCount: resolved.vertexCount,
7558
+ indexByteOffset: totalIndices * Uint16Array.BYTES_PER_ELEMENT,
7559
+ indexCount: resolved.indexCount,
7560
+ });
7561
+ totalVertices += resolved.vertexCount;
7562
+ totalIndices += resolved.indexCount;
7563
+ }
7564
+ }
7565
+ // If nothing will actually render, still honor a pending clear with
7566
+ // a single empty pass so createColorAttachment consumes the clear
7567
+ // state exactly once.
7568
+ if (plan.length === 0) {
7569
+ if (runtime.clearRequested) {
7570
+ const encoder = device.createCommandEncoder();
7571
+ const pass = encoder.beginRenderPass({
7572
+ colorAttachments: [runtime.createColorAttachment()],
7573
+ });
7574
+ runtime.stats.renderPasses++;
7575
+ pass.end();
7576
+ runtime.submit(encoder.finish());
7577
+ }
7578
+ this._drawCallCount = 0;
7579
+ return;
7580
+ }
7581
+ // Phase 2: size GPU buffers for the whole-frame totals, then pack
7582
+ // every drawcall's CPU-side data. _writeShapeVertices applies
7583
+ // (view * shape.globalTransform) per-vertex so the shader simply
7584
+ // outputs input.position unchanged.
7585
+ this._ensureVertexCapacity(totalVertices);
7586
+ if (totalIndices > 0) {
7587
+ this._ensureIndexCapacity(totalIndices);
7588
+ if (this._packedIndexData.length < totalIndices) {
7589
+ this._packedIndexData = new Uint16Array(Math.max(totalIndices, this._packedIndexData.length === 0 ? 1 : this._packedIndexData.length * 2));
7590
+ }
7591
+ }
7592
+ {
7593
+ let vOffset = 0;
7594
+ let iOffset = 0;
7595
+ for (let i = 0; i < this._drawCallCount; i++) {
7596
+ const resolved = resolvedDrawCalls[i];
7597
+ if (resolved === null) {
7598
+ continue;
7568
7599
  }
7569
- else {
7570
- pass.draw(resolvedDrawCall.vertexCount);
7600
+ const drawCall = this._drawCalls[i];
7601
+ const shape = drawCall.shape;
7602
+ this._writeShapeVertices(runtime, shape, vOffset);
7603
+ if (resolved.indices !== null && resolved.indexCount > 0) {
7604
+ this._packedIndexData.set(resolved.indices.subarray(0, resolved.indexCount), iOffset);
7605
+ iOffset += resolved.indexCount;
7571
7606
  }
7572
- runtime.stats.batches++;
7573
- runtime.stats.drawCalls++;
7607
+ vOffset += resolved.vertexCount;
7574
7608
  }
7575
7609
  }
7610
+ // Phase 3: single writeBuffer per GPU buffer covers the whole frame.
7611
+ device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, totalVertices * vertexStrideBytes$1);
7612
+ if (totalIndices > 0) {
7613
+ device.queue.writeBuffer(this._indexBuffer, 0, this._packedIndexData.buffer, this._packedIndexData.byteOffset, totalIndices * Uint16Array.BYTES_PER_ELEMENT);
7614
+ }
7615
+ // Phase 4: single render pass. Per-draw state is just pipeline and
7616
+ // vertex/index subrange offsets — the transform has already been
7617
+ // baked into the vertex data.
7618
+ const encoder = device.createCommandEncoder();
7619
+ const pass = encoder.beginRenderPass({
7620
+ colorAttachments: [runtime.createColorAttachment()],
7621
+ });
7622
+ runtime.stats.renderPasses++;
7623
+ if (scissor !== null) {
7624
+ pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
7625
+ }
7626
+ for (const planned of plan) {
7627
+ pass.setPipeline(planned.pipeline);
7628
+ pass.setVertexBuffer(0, this._vertexBuffer, planned.vertexByteOffset);
7629
+ if (planned.indexCount > 0) {
7630
+ pass.setIndexBuffer(this._indexBuffer, 'uint16', planned.indexByteOffset);
7631
+ pass.drawIndexed(planned.indexCount);
7632
+ }
7633
+ else {
7634
+ pass.draw(planned.vertexCount);
7635
+ }
7636
+ runtime.stats.batches++;
7637
+ runtime.stats.drawCalls++;
7638
+ }
7576
7639
  pass.end();
7577
7640
  runtime.submit(encoder.finish());
7578
7641
  this._drawCallCount = 0;
@@ -7585,65 +7648,76 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7585
7648
  this._renderManager = runtime;
7586
7649
  this._device = this._renderManager.device;
7587
7650
  this._shaderModule = this._device.createShaderModule({ code: primitiveShaderSource });
7588
- this._bindGroupLayout = this._device.createBindGroupLayout({
7589
- entries: [{
7590
- binding: 0,
7591
- visibility: GPUShaderStage.VERTEX,
7592
- buffer: {
7593
- type: 'uniform',
7594
- },
7595
- }],
7596
- });
7651
+ // Transform is applied per-vertex on the CPU, so no uniform binding
7652
+ // is needed — the shader outputs input.position directly.
7597
7653
  this._pipelineLayout = this._device.createPipelineLayout({
7598
- bindGroupLayouts: [this._bindGroupLayout],
7599
- });
7600
- this._uniformBuffer = this._device.createBuffer({
7601
- size: transformByteLength,
7602
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
7603
- });
7604
- this._bindGroup = this._device.createBindGroup({
7605
- layout: this._bindGroupLayout,
7606
- entries: [{
7607
- binding: 0,
7608
- resource: {
7609
- buffer: this._uniformBuffer,
7610
- },
7611
- }],
7654
+ bindGroupLayouts: [],
7612
7655
  });
7613
7656
  }
7614
7657
  onDisconnect() {
7615
7658
  this.flush();
7616
7659
  this._destroyBuffers();
7617
7660
  this._pipelines.clear();
7618
- this._uniformBuffer?.destroy();
7619
- this._uniformBuffer = null;
7620
- this._bindGroup = null;
7621
- this._bindGroupLayout = null;
7622
7661
  this._pipelineLayout = null;
7623
7662
  this._shaderModule = null;
7624
7663
  this._device = null;
7625
7664
  this._renderManager = null;
7626
7665
  this._drawCallCount = 0;
7627
7666
  }
7628
- _writeTransformData(runtime, shape) {
7667
+ _writeShapeVertices(runtime, shape, vertexStart) {
7668
+ // Matrix.combine is `other * this` (see Matrix.rotate and
7669
+ // SceneNode.getGlobalTransform, both of which chain via
7670
+ // local.combine(parent.global) to yield parent.global * local).
7671
+ //
7672
+ // We need view * global applied to a local vertex, so start with
7673
+ // global and combine with view — that gives
7674
+ // _combinedTransform = view * global.
7629
7675
  const matrix = this._combinedTransform
7630
- .copy(runtime.view.getTransform())
7631
- .combine(shape.getGlobalTransform());
7632
- this._transformData.set([
7633
- matrix.a, matrix.c, 0, 0,
7634
- matrix.b, matrix.d, 0, 0,
7635
- 0, 0, 1, 0,
7636
- matrix.x, matrix.y, 0, matrix.z,
7637
- ]);
7638
- }
7639
- _writeVertexData(vertices, color) {
7676
+ .copy(shape.getGlobalTransform())
7677
+ .combine(runtime.view.getTransform());
7678
+ // Match the original uniform-based WGSL layout exactly.
7679
+ //
7680
+ // The shader packs the Matrix's 9 fields into a 4x4 mat (column-major
7681
+ // in WGSL):
7682
+ // col 0 = [a, c, 0, 0]
7683
+ // col 1 = [b, d, 0, 0]
7684
+ // col 2 = [0, 0, 1, 0]
7685
+ // col 3 = [x, y, 0, z]
7686
+ //
7687
+ // Multiplied by vec4(px, py, 0, 1):
7688
+ // out = col0*px + col1*py + col2*0 + col3*1
7689
+ // out.x = a*px + b*py + x
7690
+ // out.y = c*px + d*py + y
7691
+ // out.z = 0
7692
+ // out.w = z
7693
+ //
7694
+ // The Matrix class represents the affine matrix in the order
7695
+ // [a b x]
7696
+ // [c d y]
7697
+ // [e f z]
7698
+ // so a/b/c/d are rotation+scale (note: b on the TOP row, c on the
7699
+ // LEFT column, not the other way around) and x/y/z the translation /
7700
+ // w component. Matrix.toArray(false) confirms this layout.
7701
+ const a = matrix.a;
7702
+ const b = matrix.b;
7703
+ const c = matrix.c;
7704
+ const d = matrix.d;
7705
+ const tx = matrix.x;
7706
+ const ty = matrix.y;
7707
+ const tw = matrix.z;
7708
+ const color = shape.color.toRgba();
7709
+ const vertices = shape.geometry.vertices;
7640
7710
  const vertexCount = vertices.length / 2;
7641
7711
  for (let i = 0; i < vertexCount; i++) {
7642
7712
  const sourceIndex = i * 2;
7643
- const targetIndex = i * 3;
7644
- this._float32View[targetIndex] = vertices[sourceIndex];
7645
- this._float32View[targetIndex + 1] = vertices[sourceIndex + 1];
7646
- this._uint32View[targetIndex + 2] = color;
7713
+ const targetIndex = (vertexStart + i) * wordsPerVertex$1;
7714
+ const px = vertices[sourceIndex];
7715
+ const py = vertices[sourceIndex + 1];
7716
+ this._float32View[targetIndex + 0] = a * px + b * py + tx;
7717
+ this._float32View[targetIndex + 1] = c * px + d * py + ty;
7718
+ this._float32View[targetIndex + 2] = 0;
7719
+ this._float32View[targetIndex + 3] = tw;
7720
+ this._uint32View[targetIndex + 4] = color;
7647
7721
  }
7648
7722
  }
7649
7723
  _ensureVertexCapacity(vertexCount) {
@@ -7690,10 +7764,10 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7690
7764
  attributes: [{
7691
7765
  shaderLocation: 0,
7692
7766
  offset: 0,
7693
- format: 'float32x2',
7767
+ format: 'float32x4',
7694
7768
  }, {
7695
7769
  shaderLocation: 1,
7696
- offset: 8,
7770
+ offset: 16,
7697
7771
  format: 'unorm8x4',
7698
7772
  }],
7699
7773
  }],
@@ -7847,7 +7921,7 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7847
7921
  struct ProjectionUniforms {
7848
7922
  matrix: mat4x4<f32>,
7849
7923
  };
7850
-
7924
+
7851
7925
  @group(0) @binding(0)
7852
7926
  var<uniform> projection: ProjectionUniforms;
7853
7927
 
@@ -7884,7 +7958,7 @@ var spriteSampler5: sampler;
7884
7958
  var spriteSampler6: sampler;
7885
7959
  @group(1) @binding(15)
7886
7960
  var spriteSampler7: sampler;
7887
-
7961
+
7888
7962
  struct VertexInput {
7889
7963
  @location(0) position: vec2<f32>,
7890
7964
  @location(1) texcoord: vec2<f32>,
@@ -7892,7 +7966,7 @@ struct VertexInput {
7892
7966
  @location(3) premultiplySample: u32,
7893
7967
  @location(4) textureSlot: u32,
7894
7968
  };
7895
-
7969
+
7896
7970
  struct VertexOutput {
7897
7971
  @builtin(position) position: vec4<f32>,
7898
7972
  @location(0) texcoord: vec2<f32>,
@@ -7900,12 +7974,12 @@ struct VertexOutput {
7900
7974
  @location(2) @interpolate(flat) premultiplySample: u32,
7901
7975
  @location(3) @interpolate(flat) textureSlot: u32,
7902
7976
  };
7903
-
7904
- @vertex
7905
- fn vertexMain(input: VertexInput) -> VertexOutput {
7906
- var output: VertexOutput;
7907
-
7908
- output.position = projection.matrix * vec4<f32>(input.position, 0.0, 1.0);
7977
+
7978
+ @vertex
7979
+ fn vertexMain(input: VertexInput) -> VertexOutput {
7980
+ var output: VertexOutput;
7981
+
7982
+ output.position = projection.matrix * vec4<f32>(input.position, 0.0, 1.0);
7909
7983
  output.texcoord = input.texcoord;
7910
7984
  output.color = vec4(input.color.rgb * input.color.a, input.color.a);
7911
7985
  output.premultiplySample = input.premultiplySample;
@@ -7914,38 +7988,46 @@ fn vertexMain(input: VertexInput) -> VertexOutput {
7914
7988
  return output;
7915
7989
  }
7916
7990
 
7917
- fn sampleTexture(slot: u32, uv: vec2<f32>) -> vec4<f32> {
7991
+ fn sampleTexture(slot: u32, uv: vec2<f32>, ddx: vec2<f32>, ddy: vec2<f32>) -> vec4<f32> {
7918
7992
  switch slot {
7919
7993
  case 0u: {
7920
- return textureSample(spriteTexture0, spriteSampler0, uv);
7994
+ return textureSampleGrad(spriteTexture0, spriteSampler0, uv, ddx, ddy);
7921
7995
  }
7922
7996
  case 1u: {
7923
- return textureSample(spriteTexture1, spriteSampler1, uv);
7997
+ return textureSampleGrad(spriteTexture1, spriteSampler1, uv, ddx, ddy);
7924
7998
  }
7925
7999
  case 2u: {
7926
- return textureSample(spriteTexture2, spriteSampler2, uv);
8000
+ return textureSampleGrad(spriteTexture2, spriteSampler2, uv, ddx, ddy);
7927
8001
  }
7928
8002
  case 3u: {
7929
- return textureSample(spriteTexture3, spriteSampler3, uv);
8003
+ return textureSampleGrad(spriteTexture3, spriteSampler3, uv, ddx, ddy);
7930
8004
  }
7931
8005
  case 4u: {
7932
- return textureSample(spriteTexture4, spriteSampler4, uv);
8006
+ return textureSampleGrad(spriteTexture4, spriteSampler4, uv, ddx, ddy);
7933
8007
  }
7934
8008
  case 5u: {
7935
- return textureSample(spriteTexture5, spriteSampler5, uv);
8009
+ return textureSampleGrad(spriteTexture5, spriteSampler5, uv, ddx, ddy);
7936
8010
  }
7937
8011
  case 6u: {
7938
- return textureSample(spriteTexture6, spriteSampler6, uv);
8012
+ return textureSampleGrad(spriteTexture6, spriteSampler6, uv, ddx, ddy);
7939
8013
  }
7940
8014
  default: {
7941
- return textureSample(spriteTexture7, spriteSampler7, uv);
8015
+ return textureSampleGrad(spriteTexture7, spriteSampler7, uv, ddx, ddy);
7942
8016
  }
7943
8017
  }
7944
8018
  }
7945
8019
 
7946
8020
  @fragment
7947
8021
  fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
7948
- let sample = sampleTexture(input.textureSlot, input.texcoord);
8022
+ // Compute screen-space derivatives in uniform control flow before the
8023
+ // per-slot switch. WGSL requires textureSample (implicit LOD) to run in
8024
+ // uniform control flow, which multi-texture batching breaks because the
8025
+ // slot varies per fragment. textureSampleGrad takes explicit derivatives
8026
+ // and is valid regardless of control-flow uniformity, while preserving
8027
+ // mipmap-correct LOD when sprites use mipmapped textures.
8028
+ let ddx = dpdx(input.texcoord);
8029
+ let ddy = dpdy(input.texcoord);
8030
+ let sample = sampleTexture(input.textureSlot, input.texcoord, ddx, ddy);
7949
8031
  let resolvedSample = select(sample, vec4(sample.rgb * sample.a, sample.a), input.premultiplySample == 1u);
7950
8032
 
7951
8033
  return resolvedSample * input.color;
@@ -8095,6 +8177,44 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
8095
8177
  if (this._drawCallCount === 0 && !renderManager.clearRequested) {
8096
8178
  return;
8097
8179
  }
8180
+ // Grow vertex/index buffers up front for the TOTAL sprite count. Two
8181
+ // reasons this must happen before the render pass begins:
8182
+ // 1. _ensureBatchCapacity destroys old buffers and creates new ones
8183
+ // when capacity grows, so running it after setVertexBuffer /
8184
+ // setIndexBuffer would leave the pass bound to destroyed buffers.
8185
+ // 2. All batches are packed into the vertex buffer at distinct
8186
+ // sprite offsets, so the buffer must hold every sprite in the
8187
+ // flush, not just one batch worth.
8188
+ if (this._drawCallCount > 0) {
8189
+ this._ensureBatchCapacity(this._drawCallCount);
8190
+ }
8191
+ // Walk the batches once, packing each batch's vertex data into the
8192
+ // CPU-side buffer at its own sprite-aligned offset. Each batch's
8193
+ // metadata is recorded for the draw loop below.
8194
+ //
8195
+ // This replaces an earlier per-batch queue.writeBuffer(..., offset: 0)
8196
+ // pattern where every writeBuffer targeted the same GPU offset. All
8197
+ // writeBuffers in a frame execute before queue.submit(commandBuffer),
8198
+ // so only the last batch's vertex data survived — which meant any
8199
+ // flush containing more than one batch rendered every batch using
8200
+ // the LAST batch's vertices (background vanished, sprites duplicated
8201
+ // at wrong sizes, etc. whenever blend mode / texture slot / pipeline
8202
+ // caused a split into multiple batches).
8203
+ const batchPlan = [];
8204
+ let packedSpriteCount = 0;
8205
+ for (let start = 0; start < this._drawCallCount;) {
8206
+ const batch = this._getBatchRange(start);
8207
+ const spriteCount = batch.end - batch.start;
8208
+ this._writeBatchVertexData(batch, packedSpriteCount);
8209
+ batchPlan.push({
8210
+ firstSprite: packedSpriteCount,
8211
+ spriteCount,
8212
+ blendMode: batch.blendMode,
8213
+ textures: batch.textures,
8214
+ });
8215
+ packedSpriteCount += spriteCount;
8216
+ start = batch.end;
8217
+ }
8098
8218
  const viewMatrix = renderManager.view.getTransform();
8099
8219
  this._projectionData.set([
8100
8220
  viewMatrix.a, viewMatrix.c, 0, 0,
@@ -8114,23 +8234,20 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
8114
8234
  pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
8115
8235
  }
8116
8236
  if (this._drawCallCount > 0 && !maskClipsAll) {
8237
+ // Single upload for the whole packed vertex buffer — every batch
8238
+ // reads from its own sprite range via drawIndexed's firstIndex.
8239
+ device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, packedSpriteCount * spriteVertexCount * vertexStrideBytes);
8117
8240
  pass.setBindGroup(0, uniformBindGroup);
8118
8241
  pass.setVertexBuffer(0, this._vertexBuffer);
8119
8242
  pass.setIndexBuffer(this._indexBuffer, 'uint32');
8120
- for (let start = 0; start < this._drawCallCount;) {
8121
- const batch = this._getBatchRange(start);
8122
- const pipeline = this._getPipeline(batch.blendMode, renderManager.renderTargetFormat);
8123
- const spriteCount = batch.end - batch.start;
8124
- this._ensureBatchCapacity(spriteCount);
8125
- this._writeBatchVertexData(batch);
8126
- device.queue.writeBuffer(this._vertexBuffer, 0, this._vertexData, 0, spriteCount * spriteVertexCount * vertexStrideBytes);
8127
- const textureBindGroup = this._createTextureBindGroup(device, renderManager, batch.textures);
8243
+ for (const plan of batchPlan) {
8244
+ const pipeline = this._getPipeline(plan.blendMode, renderManager.renderTargetFormat);
8245
+ const textureBindGroup = this._createTextureBindGroup(device, renderManager, plan.textures);
8128
8246
  pass.setPipeline(pipeline);
8129
8247
  pass.setBindGroup(1, textureBindGroup);
8130
- pass.drawIndexed(batch.spriteCount * spriteIndexCount, 1, 0, 0, 0);
8248
+ pass.drawIndexed(plan.spriteCount * spriteIndexCount, 1, plan.firstSprite * spriteIndexCount, 0, 0);
8131
8249
  renderManager.stats.batches++;
8132
8250
  renderManager.stats.drawCalls++;
8133
- start = batch.end;
8134
8251
  }
8135
8252
  }
8136
8253
  pass.end();
@@ -8155,7 +8272,7 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
8155
8272
  });
8156
8273
  const indexData = new Uint32Array(nextCapacity * spriteIndexCount);
8157
8274
  const indexBuffer = this._device.createBuffer({
8158
- size: indexData.byteLength * Uint32Array.BYTES_PER_ELEMENT,
8275
+ size: indexData.byteLength,
8159
8276
  usage: GPUBufferUsage.INDEX | GPUBufferUsage.COPY_DST,
8160
8277
  });
8161
8278
  for (let spriteIndex = 0; spriteIndex < nextCapacity; spriteIndex++) {
@@ -8178,12 +8295,12 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
8178
8295
  this._vertexBuffer = vertexBuffer;
8179
8296
  this._indexBuffer = indexBuffer;
8180
8297
  }
8181
- _writeBatchVertexData(batch) {
8298
+ _writeBatchVertexData(batch, firstSprite) {
8182
8299
  const renderManager = this._renderManager;
8183
8300
  if (!renderManager) {
8184
8301
  return;
8185
8302
  }
8186
- let vertexOffset = 0;
8303
+ let vertexOffset = firstSprite * spriteVertexCount * wordsPerVertex;
8187
8304
  for (let drawCallIndex = batch.start; drawCallIndex < batch.end; drawCallIndex++) {
8188
8305
  const drawCall = this._drawCalls[drawCallIndex];
8189
8306
  const textureSlot = batch.textureSlots.get(drawCall.texture) ?? 0;
@@ -8465,54 +8582,77 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
8465
8582
  if (this._drawCallCount === 0 && !runtime.clearRequested) {
8466
8583
  return;
8467
8584
  }
8468
- const encoder = device.createCommandEncoder();
8469
- const pass = encoder.beginRenderPass({
8470
- colorAttachments: [runtime.createColorAttachment()],
8471
- });
8472
- runtime.stats.renderPasses++;
8473
8585
  const scissor = runtime.getScissorRect();
8474
8586
  const maskClipsAll = scissor !== null && (scissor.width <= 0 || scissor.height <= 0);
8475
- if (scissor !== null && !maskClipsAll) {
8476
- pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
8477
- }
8478
- if (!maskClipsAll) {
8479
- pass.setBindGroup(0, uniformBindGroup);
8480
- for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
8481
- const drawCall = this._drawCalls[drawCallIndex];
8482
- const system = drawCall.system;
8483
- const particleCount = system.particles.length;
8484
- if (particleCount === 0) {
8485
- continue;
8486
- }
8487
- const pipeline = this._getPipeline(drawCall.blendMode, runtime.renderTargetFormat);
8488
- const textureBinding = runtime.getTextureBinding(drawCall.texture);
8489
- const textureBindGroup = device.createBindGroup({
8490
- layout: this._textureBindGroupLayout,
8491
- entries: [{
8492
- binding: 0,
8493
- resource: textureBinding.view,
8494
- }, {
8495
- binding: 1,
8496
- resource: textureBinding.sampler,
8497
- }],
8587
+ // If no drawcalls will actually render (none queued, or the scissor
8588
+ // clips everything), but a clear is pending, open a single empty
8589
+ // pass so createColorAttachment consumes the clear state.
8590
+ if (this._drawCallCount === 0 || maskClipsAll) {
8591
+ if (runtime.clearRequested) {
8592
+ const encoder = device.createCommandEncoder();
8593
+ const pass = encoder.beginRenderPass({
8594
+ colorAttachments: [runtime.createColorAttachment()],
8498
8595
  });
8499
- this._ensureCapacity(particleCount);
8500
- this._writeInstanceData(system.vertices, system.texCoords, system.particles);
8501
- this._writeUniformData(runtime, system, drawCall.texture);
8502
- device.queue.writeBuffer(this._instanceBuffer, 0, this._instanceData, 0, particleCount * instanceStrideBytes);
8503
- device.queue.writeBuffer(uniformBuffer, 0, this._uniformData.buffer, this._uniformData.byteOffset, this._uniformData.byteLength);
8504
- pass.setPipeline(pipeline);
8505
- pass.setBindGroup(1, textureBindGroup);
8506
- pass.setVertexBuffer(0, staticVertexBuffer);
8507
- pass.setVertexBuffer(1, this._instanceBuffer);
8508
- pass.setIndexBuffer(indexBuffer, 'uint16');
8509
- pass.drawIndexed(indicesPerParticle, particleCount, 0, 0, 0);
8510
- runtime.stats.batches++;
8511
- runtime.stats.drawCalls++;
8596
+ runtime.stats.renderPasses++;
8597
+ pass.end();
8598
+ runtime.submit(encoder.finish());
8512
8599
  }
8600
+ this._drawCallCount = 0;
8601
+ return;
8602
+ }
8603
+ // One command encoder / pass per drawcall. Each particle system's
8604
+ // queue.writeBuffer calls target offset 0 of the instance and uniform
8605
+ // buffers — a single pass with multiple systems would see all
8606
+ // writeBuffers serialize before submit, leaving only the last
8607
+ // system's data in those buffers and making every earlier draw read
8608
+ // the wrong data. Also: _ensureCapacity may destroy and recreate the
8609
+ // instance buffer on growth; keeping one drawcall per pass means
8610
+ // that destroy happens strictly between submits, so no pass holds a
8611
+ // reference to a buffer that has since been destroyed.
8612
+ for (let drawCallIndex = 0; drawCallIndex < this._drawCallCount; drawCallIndex++) {
8613
+ const drawCall = this._drawCalls[drawCallIndex];
8614
+ const system = drawCall.system;
8615
+ const particleCount = system.particles.length;
8616
+ if (particleCount === 0) {
8617
+ continue;
8618
+ }
8619
+ const pipeline = this._getPipeline(drawCall.blendMode, runtime.renderTargetFormat);
8620
+ const textureBinding = runtime.getTextureBinding(drawCall.texture);
8621
+ const textureBindGroup = device.createBindGroup({
8622
+ layout: this._textureBindGroupLayout,
8623
+ entries: [{
8624
+ binding: 0,
8625
+ resource: textureBinding.view,
8626
+ }, {
8627
+ binding: 1,
8628
+ resource: textureBinding.sampler,
8629
+ }],
8630
+ });
8631
+ this._ensureCapacity(particleCount);
8632
+ this._writeInstanceData(system.vertices, system.texCoords, system.particles);
8633
+ this._writeUniformData(runtime, system, drawCall.texture);
8634
+ device.queue.writeBuffer(this._instanceBuffer, 0, this._instanceData, 0, particleCount * instanceStrideBytes);
8635
+ device.queue.writeBuffer(uniformBuffer, 0, this._uniformData.buffer, this._uniformData.byteOffset, this._uniformData.byteLength);
8636
+ const encoder = device.createCommandEncoder();
8637
+ const pass = encoder.beginRenderPass({
8638
+ colorAttachments: [runtime.createColorAttachment()],
8639
+ });
8640
+ runtime.stats.renderPasses++;
8641
+ if (scissor !== null) {
8642
+ pass.setScissorRect(scissor.x, scissor.y, scissor.width, scissor.height);
8643
+ }
8644
+ pass.setBindGroup(0, uniformBindGroup);
8645
+ pass.setPipeline(pipeline);
8646
+ pass.setBindGroup(1, textureBindGroup);
8647
+ pass.setVertexBuffer(0, staticVertexBuffer);
8648
+ pass.setVertexBuffer(1, this._instanceBuffer);
8649
+ pass.setIndexBuffer(indexBuffer, 'uint16');
8650
+ pass.drawIndexed(indicesPerParticle, particleCount, 0, 0, 0);
8651
+ runtime.stats.batches++;
8652
+ runtime.stats.drawCalls++;
8653
+ pass.end();
8654
+ runtime.submit(encoder.finish());
8513
8655
  }
8514
- pass.end();
8515
- runtime.submit(encoder.finish());
8516
8656
  this._drawCallCount = 0;
8517
8657
  }
8518
8658
  destroy() {
@@ -9095,10 +9235,11 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
9095
9235
  if (typeof gpuNavigator.gpu.getPreferredCanvasFormat !== 'function') {
9096
9236
  throw new Error('WebGPU is available, but navigator.gpu.getPreferredCanvasFormat is not implemented.');
9097
9237
  }
9098
- const context = this._canvas.getContext('webgpu');
9099
- if (context === null) {
9100
- throw new Error('Could not create WebGPU canvas context.');
9101
- }
9238
+ // Request the adapter before acquiring a WebGPU canvas context.
9239
+ // getContext('webgpu') is exclusive per canvas — once it succeeds, the
9240
+ // same canvas can no longer produce a WebGL2 context. Doing it the
9241
+ // other way round means an unavailable adapter still locks the canvas
9242
+ // and breaks the automatic WebGL2 fallback in Application.
9102
9243
  let adapter = null;
9103
9244
  try {
9104
9245
  adapter = await gpuNavigator.gpu.requestAdapter();
@@ -9109,6 +9250,10 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
9109
9250
  if (adapter === null) {
9110
9251
  throw new Error('Could not acquire a WebGPU adapter.');
9111
9252
  }
9253
+ const context = this._canvas.getContext('webgpu');
9254
+ if (context === null) {
9255
+ throw new Error('Could not create WebGPU canvas context.');
9256
+ }
9112
9257
  if (typeof adapter.requestDevice !== 'function') {
9113
9258
  throw new Error('WebGPU adapter does not expose requestDevice().');
9114
9259
  }
@@ -9399,41 +9544,47 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
9399
9544
  _getMipmapResources() {
9400
9545
  if (this._mipmapShaderModule === null || this._mipmapBindGroupLayout === null || this._mipmapPipelineLayout === null || this._mipmapPipeline === null || this._mipmapSampler === null) {
9401
9546
  this._mipmapShaderModule = this.device.createShaderModule({
9402
- code: `
9403
- struct VertexOutput {
9404
- @builtin(position) position: vec4<f32>,
9405
- @location(0) texcoord: vec2<f32>,
9406
- };
9407
-
9408
- @group(0) @binding(0)
9409
- var sourceTexture: texture_2d<f32>;
9410
- @group(0) @binding(1)
9411
- var sourceSampler: sampler;
9412
-
9413
- @vertex
9414
- fn vertexMain(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
9415
- var positions = array<vec2<f32>, 3>(
9416
- vec2<f32>(-1.0, -1.0),
9417
- vec2<f32>(3.0, -1.0),
9418
- vec2<f32>(-1.0, 3.0)
9419
- );
9420
- var texcoords = array<vec2<f32>, 3>(
9421
- vec2<f32>(0.0, 0.0),
9422
- vec2<f32>(2.0, 0.0),
9423
- vec2<f32>(0.0, 2.0)
9424
- );
9425
- var output: VertexOutput;
9426
-
9427
- output.position = vec4<f32>(positions[vertexIndex], 0.0, 1.0);
9428
- output.texcoord = texcoords[vertexIndex];
9429
-
9430
- return output;
9431
- }
9432
-
9433
- @fragment
9434
- fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
9435
- return textureSample(sourceTexture, sourceSampler, input.texcoord);
9436
- }
9547
+ code: `
9548
+ struct VertexOutput {
9549
+ @builtin(position) position: vec4<f32>,
9550
+ @location(0) texcoord: vec2<f32>,
9551
+ };
9552
+
9553
+ @group(0) @binding(0)
9554
+ var sourceTexture: texture_2d<f32>;
9555
+ @group(0) @binding(1)
9556
+ var sourceSampler: sampler;
9557
+
9558
+ @vertex
9559
+ fn vertexMain(@builtin(vertex_index) vertexIndex: u32) -> VertexOutput {
9560
+ var positions = array<vec2<f32>, 3>(
9561
+ vec2<f32>(-1.0, -1.0),
9562
+ vec2<f32>(3.0, -1.0),
9563
+ vec2<f32>(-1.0, 3.0)
9564
+ );
9565
+ // Y is flipped vs the position array: NDC Y points up, but texture UV
9566
+ // Y points down (UV (0,0) is the top-left of the source). Matching the
9567
+ // two ensures that the output texture's top-left pixel samples from the
9568
+ // source's top-left, so every mip level has the same orientation as the
9569
+ // level above it. Prior to this, odd mip levels were rendered upside
9570
+ // down, producing visible texture flips at view-size doublings.
9571
+ var texcoords = array<vec2<f32>, 3>(
9572
+ vec2<f32>(0.0, 1.0),
9573
+ vec2<f32>(2.0, 1.0),
9574
+ vec2<f32>(0.0, -1.0)
9575
+ );
9576
+ var output: VertexOutput;
9577
+
9578
+ output.position = vec4<f32>(positions[vertexIndex], 0.0, 1.0);
9579
+ output.texcoord = texcoords[vertexIndex];
9580
+
9581
+ return output;
9582
+ }
9583
+
9584
+ @fragment
9585
+ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
9586
+ return textureSample(sourceTexture, sourceSampler, input.texcoord);
9587
+ }
9437
9588
  `,
9438
9589
  });
9439
9590
  this._mipmapBindGroupLayout = this.device.createBindGroupLayout({
@@ -13040,8 +13191,11 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
13040
13191
  get bottom() {
13041
13192
  return (this.y + this.height - this.origin.y);
13042
13193
  }
13043
- addChild(child) {
13044
- return this.addChildAt(child, this._children.length);
13194
+ addChild(...children) {
13195
+ for (const child of children) {
13196
+ this.addChildAt(child, this._children.length);
13197
+ }
13198
+ return this;
13045
13199
  }
13046
13200
  addChildAt(child, index) {
13047
13201
  if (index < 0 || index > this._children.length) {
@@ -15158,6 +15312,17 @@ fn fragmentMain(input: VertexOutput) -> @location(0) vec4<f32> {
15158
15312
  set elapsedLifetime(elapsedLifetime) {
15159
15313
  this._elapsedLifetime.copy(elapsedLifetime);
15160
15314
  }
15315
+ /**
15316
+ * Spawn position for particles emitted with these options, expressed in
15317
+ * the owning ParticleSystem's LOCAL coordinate space — the system's own
15318
+ * `getGlobalTransform()` is applied on top during rendering (both the
15319
+ * WebGL2 and WebGPU shaders do `projection * translation * rotated`).
15320
+ *
15321
+ * Setting a world-space value here (e.g. `system.x + offset`) will
15322
+ * double-translate the emitter because the shader will translate again.
15323
+ * For an emitter anchored at the system origin, use small offsets around
15324
+ * `(0, 0)` and position the system itself via `system.setPosition(...)`.
15325
+ */
15161
15326
  get position() {
15162
15327
  return this._position;
15163
15328
  }