@spatialwalk/avatarkit 1.0.0-beta.75 → 1.0.0-beta.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8243,7 +8243,7 @@ const _AnimationPlayer = class _AnimationPlayer {
8243
8243
  if (this.streamingPlayer) {
8244
8244
  return;
8245
8245
  }
8246
- const { StreamingAudioPlayer } = await import("./StreamingAudioPlayer-8jk86K8D.js");
8246
+ const { StreamingAudioPlayer } = await import("./StreamingAudioPlayer-BtOgYxcz.js");
8247
8247
  const { AvatarSDK: AvatarSDK2 } = await Promise.resolve().then(() => AvatarSDK$1);
8248
8248
  const audioFormat = AvatarSDK2.getAudioFormat();
8249
8249
  this.streamingPlayer = new StreamingAudioPlayer({
@@ -8756,6 +8756,52 @@ class AvatarCoreMemoryManager {
8756
8756
  flatData.set(this.module.HEAPF32.subarray(floatOffset, floatOffset + totalFloats));
8757
8757
  return flatData;
8758
8758
  }
8759
+ /**
8760
+ * 🆕 读取 AvatarFaceGeometryArray 结构体数据 (WebGPU优化路径)
8761
+ * 每个Face Geometry: center[3] + scale + quat[4] = 8 floats
8762
+ */
8763
+ readFaceGeometryArray(arrayPtr) {
8764
+ if (!arrayPtr) {
8765
+ throw new Error("Invalid face geometry array pointer");
8766
+ }
8767
+ const geometriesPtr = this.module.getValue(arrayPtr, "i32");
8768
+ const geometryCount = this.module.getValue(arrayPtr + 4, "i32");
8769
+ if (geometryCount === 0 || !geometriesPtr) {
8770
+ return null;
8771
+ }
8772
+ const floatsPerGeometry = 8;
8773
+ const totalFloats = geometryCount * floatsPerGeometry;
8774
+ const floatOffset = geometriesPtr / 4;
8775
+ return this.module.HEAPF32.subarray(floatOffset, floatOffset + totalFloats);
8776
+ }
8777
+ /**
8778
+ * 🆕 读取 AvatarOriginalSplatArray 结构体数据 (WebGPU优化路径)
8779
+ * 每个Original Splat: 15 floats + 1 int32 = 64 bytes
8780
+ */
8781
+ readOriginalSplatArray(arrayPtr) {
8782
+ if (!arrayPtr) {
8783
+ throw new Error("Invalid original splat array pointer");
8784
+ }
8785
+ const splatsPtr = this.module.getValue(arrayPtr, "i32");
8786
+ const splatCount = this.module.getValue(arrayPtr + 4, "i32");
8787
+ if (splatCount === 0 || !splatsPtr) {
8788
+ return null;
8789
+ }
8790
+ const floatsPerSplat = 16;
8791
+ const totalFloats = splatCount * floatsPerSplat;
8792
+ const splatData = new Float32Array(totalFloats);
8793
+ const startFloatOffset = splatsPtr / 4;
8794
+ for (let i2 = 0; i2 < splatCount; i2++) {
8795
+ const splatFloatOffset = startFloatOffset + i2 * 16;
8796
+ for (let j2 = 0; j2 < 15; j2++) {
8797
+ splatData[i2 * 16 + j2] = this.module.HEAPF32[splatFloatOffset + j2];
8798
+ }
8799
+ const bindingByteOffset = splatsPtr + i2 * 64 + 60;
8800
+ const bindingInt = this.module.getValue(bindingByteOffset, "i32");
8801
+ splatData[i2 * 16 + 15] = bindingInt;
8802
+ }
8803
+ return { data: splatData, count: splatCount };
8804
+ }
8759
8805
  /**
8760
8806
  * 读取AvatarMeshData结构体数据
8761
8807
  */
@@ -9017,7 +9063,13 @@ class AvatarCoreAdapter {
9017
9063
  // core, character, x, y, z
9018
9064
  resetEyeTracking: this.wasmModule.cwrap("avatar_core_reset_eye_tracking", "number", ["number"]),
9019
9065
  // FLAME information query
9020
- getFlameInfo: this.wasmModule.cwrap("avatar_core_get_flame_info", "number", ["number", "number", "number", "number"])
9066
+ getFlameInfo: this.wasmModule.cwrap("avatar_core_get_flame_info", "number", ["number", "number", "number", "number"]),
9067
+ // 🆕 GPU 相关 API
9068
+ computeFrameAsFaceGeometry: this.wasmModule.cwrap("avatar_core_compute_frame_as_face_geometry", "number", ["number", "number", "number", "number"]),
9069
+ getOriginalSplats: this.wasmModule.cwrap("avatar_core_get_original_splats", "number", ["number", "number", "number"]),
9070
+ getFLAMETemplateData: this.wasmModule.cwrap("avatar_core_get_flame_template_data", "number", ["number", "number", "number"]),
9071
+ freeFaceGeometry: this.wasmModule.cwrap("avatar_core_free_face_geometry", null, ["number"]),
9072
+ freeOriginalSplats: this.wasmModule.cwrap("avatar_core_free_original_splats", null, ["number"])
9021
9073
  };
9022
9074
  }
9023
9075
  /**
@@ -9670,6 +9722,181 @@ class AvatarCoreAdapter {
9670
9722
  }
9671
9723
  return null;
9672
9724
  }
9725
+ // ==================== 🆕 GPU 相关方法 ====================
9726
+ /**
9727
+ * 🆕 GPU 路径: 计算帧并返回 Face Geometry 数据
9728
+ */
9729
+ async computeFrameAsFaceGeometry(params) {
9730
+ if (!this.isCharacterLoaded) {
9731
+ throw new Error("Character not loaded");
9732
+ }
9733
+ let outputPtr = null;
9734
+ let paramsPtr = null;
9735
+ try {
9736
+ const frameIndex = (params == null ? void 0 : params.frameIndex) ?? 0;
9737
+ const characterId = params == null ? void 0 : params.characterId;
9738
+ paramsPtr = await this.getAnimationFrameParams(frameIndex, characterId);
9739
+ outputPtr = this.wasmModule._malloc(12);
9740
+ const result2 = this.api.computeFrameAsFaceGeometry(
9741
+ this.coreHandle,
9742
+ this.characterHandle,
9743
+ paramsPtr,
9744
+ outputPtr
9745
+ );
9746
+ this.checkError(result2, "avatar_core_compute_frame_as_face_geometry");
9747
+ return this.memoryManager.readFaceGeometryArray(outputPtr);
9748
+ } catch (error) {
9749
+ const errorMessage = error instanceof Error ? error.message : String(error);
9750
+ logger.errorWithError("❌ computeFrameAsFaceGeometry failed:", errorMessage);
9751
+ throw error;
9752
+ } finally {
9753
+ if (paramsPtr !== null) {
9754
+ this.wasmModule._free(paramsPtr);
9755
+ }
9756
+ if (outputPtr !== null) {
9757
+ this.api.freeFaceGeometry(outputPtr);
9758
+ this.wasmModule._free(outputPtr);
9759
+ }
9760
+ }
9761
+ }
9762
+ /**
9763
+ * 🆕 获取原始3DGS点数据 (一次性调用)
9764
+ */
9765
+ async getOriginalSplatsData() {
9766
+ if (!this.isCharacterLoaded) {
9767
+ throw new Error("Character not loaded");
9768
+ }
9769
+ let outputPtr = null;
9770
+ try {
9771
+ outputPtr = this.wasmModule._malloc(8);
9772
+ const result2 = this.api.getOriginalSplats(
9773
+ this.coreHandle,
9774
+ this.characterHandle,
9775
+ outputPtr
9776
+ );
9777
+ this.checkError(result2, "avatar_core_get_original_splats");
9778
+ const splatData = this.memoryManager.readOriginalSplatArray(outputPtr);
9779
+ if (splatData) {
9780
+ logger.log(`✅ Loaded ${splatData.count} original splats for WebGPU (${(splatData.data.byteLength / 1024 / 1024).toFixed(2)} MB)`);
9781
+ }
9782
+ return splatData;
9783
+ } catch (error) {
9784
+ const errorMessage = error instanceof Error ? error.message : String(error);
9785
+ logger.errorWithError("❌ getOriginalSplatsData failed:", errorMessage);
9786
+ throw error;
9787
+ } finally {
9788
+ if (outputPtr !== null) {
9789
+ this.api.freeOriginalSplats(outputPtr);
9790
+ this.wasmModule._free(outputPtr);
9791
+ }
9792
+ }
9793
+ }
9794
+ /**
9795
+ * 🆕 获取角色 Shape 参数
9796
+ */
9797
+ async getCharacterShapeParams(characterId) {
9798
+ if (!this.isInitialized) {
9799
+ throw new Error("Avatar Core not initialized");
9800
+ }
9801
+ const charHandle = characterId ? this.characterHandles.get(characterId) || null : this.characterHandle;
9802
+ if (!charHandle) {
9803
+ throw new Error("Character not loaded");
9804
+ }
9805
+ try {
9806
+ const paramsPtr = this.wasmModule._malloc(300 * 4);
9807
+ const result2 = this.api.getCharacterShapeParams(charHandle, paramsPtr);
9808
+ this.checkError(result2, "avatar_core_get_character_shape_params");
9809
+ const buffer = this.wasmModule.HEAPU8.buffer;
9810
+ const params = Array.from(new Float32Array(buffer, paramsPtr, 300));
9811
+ this.wasmModule._free(paramsPtr);
9812
+ return { params };
9813
+ } catch (error) {
9814
+ logger.errorWithError("getCharacterShapeParams failed:", error);
9815
+ throw error;
9816
+ }
9817
+ }
9818
+ /**
9819
+ * 🆕 获取 FLAME 模板数据(用于 GPU FLAME Pipeline)
9820
+ */
9821
+ async getFLAMETemplateData(characterId) {
9822
+ if (!this.isInitialized) {
9823
+ throw new Error("Avatar Core not initialized");
9824
+ }
9825
+ const characterHandle = characterId ? this.characterHandles.get(characterId) || null : this.characterHandle;
9826
+ let structPtr = null;
9827
+ try {
9828
+ structPtr = this.wasmModule._malloc(64);
9829
+ const result2 = this.api.getFLAMETemplateData(
9830
+ this.coreHandle,
9831
+ characterHandle || 0,
9832
+ structPtr
9833
+ );
9834
+ this.checkError(result2, "avatar_core_get_flame_template_data");
9835
+ const vTemplatePtr = this.wasmModule.getValue(structPtr, "i32");
9836
+ const vertexCount = this.wasmModule.getValue(structPtr + 4, "i32");
9837
+ const shapedirsPtr = this.wasmModule.getValue(structPtr + 8, "i32");
9838
+ const shapeParamCount = this.wasmModule.getValue(structPtr + 12, "i32");
9839
+ const posedirsPtr = this.wasmModule.getValue(structPtr + 16, "i32");
9840
+ const poseParamCount = this.wasmModule.getValue(structPtr + 20, "i32");
9841
+ const jRegressorPtr = this.wasmModule.getValue(structPtr + 24, "i32");
9842
+ const jointCount = this.wasmModule.getValue(structPtr + 28, "i32");
9843
+ const lbsWeightsPtr = this.wasmModule.getValue(structPtr + 32, "i32");
9844
+ const parentsPtr = this.wasmModule.getValue(structPtr + 36, "i32");
9845
+ const facesPtr = this.wasmModule.getValue(structPtr + 40, "i32");
9846
+ const faceCount = this.wasmModule.getValue(structPtr + 44, "i32");
9847
+ const staticOffsetPtr = this.wasmModule.getValue(structPtr + 48, "i32");
9848
+ const staticOffsetCount = this.wasmModule.getValue(structPtr + 52, "i32");
9849
+ const buffer = this.wasmModule.HEAPU8.buffer;
9850
+ const vTemplate = new Float32Array(buffer, vTemplatePtr, vertexCount * 3).slice();
9851
+ const shapedirs = new Float32Array(buffer, shapedirsPtr, vertexCount * 3 * shapeParamCount).slice();
9852
+ const posedirs = new Float32Array(buffer, posedirsPtr, vertexCount * 3 * poseParamCount).slice();
9853
+ const effectiveJointCount = jointCount > 0 ? jointCount : 5;
9854
+ const jRegressor = new Float32Array(buffer, jRegressorPtr, effectiveJointCount * vertexCount).slice();
9855
+ const lbsWeights = new Float32Array(buffer, lbsWeightsPtr, vertexCount * effectiveJointCount).slice();
9856
+ const parents = new Int32Array(buffer, parentsPtr, effectiveJointCount).slice();
9857
+ const faces = new Uint32Array(buffer, facesPtr, faceCount * 3).slice();
9858
+ const staticOffset = staticOffsetPtr && staticOffsetCount > 0 ? new Float32Array(buffer, staticOffsetPtr, staticOffsetCount * 3).slice() : null;
9859
+ const optimizedShapedirs = transposeBlendshapeData(shapedirs, vertexCount, shapeParamCount);
9860
+ const optimizedPosedirs = transposeBlendshapeData(posedirs, vertexCount, poseParamCount);
9861
+ logger.log(`FLAME template data retrieved (${((vTemplate.byteLength + optimizedShapedirs.byteLength + optimizedPosedirs.byteLength) / 1024 / 1024).toFixed(2)} MB)`);
9862
+ return {
9863
+ vTemplate,
9864
+ vertexCount,
9865
+ shapedirs: optimizedShapedirs,
9866
+ shapeParamCount,
9867
+ posedirs: optimizedPosedirs,
9868
+ poseParamCount,
9869
+ jRegressor,
9870
+ jointCount: effectiveJointCount,
9871
+ lbsWeights,
9872
+ parents,
9873
+ faces,
9874
+ faceCount,
9875
+ staticOffset,
9876
+ staticOffsetCount
9877
+ };
9878
+ } catch (error) {
9879
+ logger.errorWithError("getFLAMETemplateData failed:", error);
9880
+ throw error;
9881
+ } finally {
9882
+ if (structPtr !== null) {
9883
+ this.wasmModule._free(structPtr);
9884
+ }
9885
+ }
9886
+ }
9887
+ }
9888
+ function transposeBlendshapeData(data, vertexCount, paramCount) {
9889
+ const result2 = new Float32Array(data.length);
9890
+ for (let p2 = 0; p2 < paramCount; p2++) {
9891
+ for (let v2 = 0; v2 < vertexCount; v2++) {
9892
+ for (let c2 = 0; c2 < 3; c2++) {
9893
+ const srcIdx = v2 * 3 * paramCount + c2 * paramCount + p2;
9894
+ const dstIdx = p2 * vertexCount * 3 + v2 * 3 + c2;
9895
+ result2[dstIdx] = data[srcIdx];
9896
+ }
9897
+ }
9898
+ }
9899
+ return result2;
9673
9900
  }
9674
9901
  class AvatarSDK {
9675
9902
  /**
@@ -9936,7 +10163,7 @@ class AvatarSDK {
9936
10163
  }
9937
10164
  __publicField(AvatarSDK, "_isInitialized", false);
9938
10165
  __publicField(AvatarSDK, "_configuration", null);
9939
- __publicField(AvatarSDK, "_version", "1.0.0-beta.75");
10166
+ __publicField(AvatarSDK, "_version", "1.0.0-beta.77");
9940
10167
  __publicField(AvatarSDK, "_avatarCore", null);
9941
10168
  __publicField(AvatarSDK, "_dynamicSdkConfig", null);
9942
10169
  const AvatarSDK$1 = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
@@ -11773,6 +12000,8 @@ class AvatarController {
11773
12000
  // Character handle for multi-character support
11774
12001
  __publicField(this, "characterId", null);
11775
12002
  // Character ID for multi-character support (used for eye tracking)
12003
+ __publicField(this, "useGPUPath", false);
12004
+ // 🆕 是否使用 GPU 路径(跳过 splatData 计算)
11776
12005
  // ========== Post-processing Configuration ==========
11777
12006
  __publicField(this, "postProcessingConfig", null);
11778
12007
  // ========== Playback Loop ==========
@@ -12401,13 +12630,19 @@ class AvatarController {
12401
12630
  }
12402
12631
  /**
12403
12632
  * Set render callback (called by AvatarView)
12633
+ * @param callback 渲染回调函数
12634
+ * @param characterHandle 角色句柄
12635
+ * @param useGPUPath 是否使用 GPU 路径(跳过 splatData 计算)
12404
12636
  * @internal
12405
12637
  */
12406
- setRenderCallback(callback, characterHandle) {
12638
+ setRenderCallback(callback, characterHandle, useGPUPath) {
12407
12639
  this.renderCallback = callback;
12408
12640
  if (characterHandle !== void 0) {
12409
12641
  this.characterHandle = characterHandle;
12410
12642
  }
12643
+ if (useGPUPath !== void 0) {
12644
+ this.useGPUPath = useGPUPath;
12645
+ }
12411
12646
  }
12412
12647
  /**
12413
12648
  * Set character ID (for multi-character support, used for eye tracking)
@@ -12784,16 +13019,22 @@ class AvatarController {
12784
13019
  }
12785
13020
  }
12786
13021
  if (arrayIndex >= 0 && arrayIndex < this.currentKeyframes.length) {
12787
- const currentFrame = this.currentKeyframes[arrayIndex];
12788
- let wasmParams = convertProtoFlameToWasmParams(currentFrame);
12789
- if (this.postProcessingConfig) {
12790
- wasmParams = this.applyPostProcessingToParams(wasmParams);
12791
- }
12792
- const avatarCore = AvatarSDK.getAvatarCore();
12793
- if (avatarCore) {
12794
- const splatData = await avatarCore.computeFrameFlatFromParams(wasmParams, this.characterHandle ?? void 0);
12795
- if (splatData && this.renderCallback) {
12796
- this.renderCallback(splatData, frameIndex);
13022
+ if (this.useGPUPath) {
13023
+ if (this.renderCallback) {
13024
+ this.renderCallback(new Float32Array(0), frameIndex);
13025
+ }
13026
+ } else {
13027
+ const currentFrame = this.currentKeyframes[arrayIndex];
13028
+ let wasmParams = convertProtoFlameToWasmParams(currentFrame);
13029
+ if (this.postProcessingConfig) {
13030
+ wasmParams = this.applyPostProcessingToParams(wasmParams);
13031
+ }
13032
+ const avatarCore = AvatarSDK.getAvatarCore();
13033
+ if (avatarCore) {
13034
+ const splatData = await avatarCore.computeFrameFlatFromParams(wasmParams, this.characterHandle ?? void 0);
13035
+ if (splatData && this.renderCallback) {
13036
+ this.renderCallback(splatData, frameIndex);
13037
+ }
12797
13038
  }
12798
13039
  }
12799
13040
  }
@@ -14530,122 +14771,1720 @@ class WebGLRenderer {
14530
14771
  }
14531
14772
  const renderShaderCode = "/**\n * WebGPU 3DGS 渲染着色器\n *\n * 实例化渲染:每个 splat 绘制一个四边形\n * 对应 WebGL 版本的 GLSL 着色器\n */\n\n// ============ Uniform Bindings ============\n\nstruct Uniforms {\n viewMatrix: mat4x4f,\n projectionMatrix: mat4x4f,\n screenSize: vec2f,\n enableFrustumCulling: u32,\n}\n\n@group(0) @binding(0) var<uniform> uniforms: Uniforms;\n\n// ============ Storage Buffer Bindings (间接索引渲染) ============\n\n@group(1) @binding(0) var<storage, read> sortIndices: array<u32>;\n@group(1) @binding(1) var<storage, read> splatData: array<f32>;\n\n// ============ Vertex Shader ============\n\nstruct VertexInput {\n // 共享四边形顶点 (per-vertex)\n @location(0) quadVertex: vec2f,\n}\n\nstruct VertexOutput {\n @builtin(position) position: vec4f,\n @location(0) relativePosition: vec2f,\n @location(1) color: vec4f,\n}\n\n// 常量定义\nconst BOUNDS_RADIUS: f32 = 3.0;\n\n/**\n * 计算2D协方差矩阵(复刻 WebGL 版本)\n */\nfn calcCovariance2D(\n viewPos: vec3f,\n cov3Da: vec3f,\n cov3Db: vec3f,\n viewMatrix: mat4x4f,\n projectionMatrix: mat4x4f,\n screenSize: vec2f\n) -> vec3f {\n let invViewPosZ = 1.0 / viewPos.z;\n let invViewPosZSquared = invViewPosZ * invViewPosZ;\n\n // FOV 限制\n let tanHalfFovX = 1.0 / projectionMatrix[0][0];\n let tanHalfFovY = 1.0 / projectionMatrix[1][1];\n let limX = 1.3 * tanHalfFovX;\n let limY = 1.3 * tanHalfFovY;\n\n var clampedViewPos = viewPos;\n clampedViewPos.x = clamp(viewPos.x * invViewPosZ, -limX, limX) * viewPos.z;\n clampedViewPos.y = clamp(viewPos.y * invViewPosZ, -limY, limY) * viewPos.z;\n\n // 焦距计算\n let focalX = screenSize.x * projectionMatrix[0][0] / 2.0;\n let focalY = screenSize.y * projectionMatrix[1][1] / 2.0;\n\n // 雅可比矩阵 J\n let J = mat3x3f(\n focalX * invViewPosZ, 0.0, -(focalX * clampedViewPos.x) * invViewPosZSquared,\n 0.0, focalY * invViewPosZ, -(focalY * clampedViewPos.y) * invViewPosZSquared,\n 0.0, 0.0, 0.0\n );\n\n // 视图变换矩阵 W (仅旋转部分) - 对齐 Android SDK,不使用转置\n let W = mat3x3f(\n viewMatrix[0].xyz,\n viewMatrix[1].xyz,\n viewMatrix[2].xyz\n );\n\n // 投影变换 T = J * W\n let T = J * W;\n\n // 3D 协方差矩阵 Vrk(对称矩阵)\n let Vrk = mat3x3f(\n cov3Da.x, cov3Da.y, cov3Da.z,\n cov3Da.y, cov3Db.x, cov3Db.y,\n cov3Da.z, cov3Db.y, cov3Db.z\n );\n\n // 2D 协方差矩阵: cov = T * Vrk * T^T\n let cov = T * Vrk * transpose(T);\n\n // 低通滤波器\n var result = vec3f(cov[0][0], cov[0][1], cov[1][1]);\n result.x += 0.3;\n result.z += 0.3;\n\n return result;\n}\n\n/**\n * 分解协方差矩阵\n */\nfn decomposeCovariance(cov2D: vec3f) -> array<vec2f, 2> {\n let a = cov2D.x;\n let b = cov2D.y;\n let d = cov2D.z;\n\n let det = a * d - b * b;\n let trace = a + d;\n\n let mean = 0.5 * trace;\n let dist = max(0.1, sqrt(mean * mean - det));\n\n // 特征值\n var lambda1 = mean + dist;\n var lambda2 = mean - dist;\n\n // 确保特征值为正\n lambda1 = max(lambda1, 0.01);\n lambda2 = max(lambda2, 0.01);\n\n // 特征向量(复刻 WebGL MetalSplatter 算法)\n var eigenvector1: vec2f;\n if (abs(b) < 1e-6) {\n eigenvector1 = select(vec2f(0.0, 1.0), vec2f(1.0, 0.0), a > d);\n } else {\n eigenvector1 = normalize(vec2f(b, d - lambda2));\n }\n\n // 正交特征向量\n let eigenvector2 = vec2f(eigenvector1.y, -eigenvector1.x);\n\n let v1 = eigenvector1 * sqrt(lambda1);\n let v2 = eigenvector2 * sqrt(lambda2);\n\n return array<vec2f, 2>(v1, v2);\n}\n\n@vertex\nfn vertexMain(\n input: VertexInput,\n @builtin(instance_index) instanceIndex: u32\n) -> VertexOutput {\n var output: VertexOutput;\n\n // 🚀 间接索引:通过排序索引读取实际数据\n let sortedIdx = sortIndices[instanceIndex];\n let dataOffset = sortedIdx * 13u;\n\n // 从 storage buffer 读取 splat 数据\n let position = vec3f(\n splatData[dataOffset + 0u],\n splatData[dataOffset + 1u],\n splatData[dataOffset + 2u]\n );\n let color = vec4f(\n splatData[dataOffset + 3u],\n splatData[dataOffset + 4u],\n splatData[dataOffset + 5u],\n splatData[dataOffset + 6u]\n );\n let covA = vec3f(\n splatData[dataOffset + 7u],\n splatData[dataOffset + 8u],\n splatData[dataOffset + 9u]\n );\n let covB = vec3f(\n splatData[dataOffset + 10u],\n splatData[dataOffset + 11u],\n splatData[dataOffset + 12u]\n );\n\n // 转换到视图空间\n let viewPosition4 = uniforms.viewMatrix * vec4f(position, 1.0);\n let viewPosition3 = viewPosition4.xyz;\n\n // 计算 2D 协方差矩阵\n let cov2D = calcCovariance2D(\n viewPosition3,\n covA,\n covB,\n uniforms.viewMatrix,\n uniforms.projectionMatrix,\n uniforms.screenSize\n );\n\n // 分解协方差矩阵\n let axes = decomposeCovariance(cov2D);\n let axis1 = axes[0];\n let axis2 = axes[1];\n\n // 投影到屏幕空间\n let projectedCenter = uniforms.projectionMatrix * viewPosition4;\n\n // 视锥体剔除\n if (uniforms.enableFrustumCulling == 1u) {\n let bounds = 1.2 * projectedCenter.w;\n if (projectedCenter.z < 0.0 ||\n projectedCenter.z > projectedCenter.w ||\n projectedCenter.x < -bounds ||\n projectedCenter.x > bounds ||\n projectedCenter.y < -bounds ||\n projectedCenter.y > bounds) {\n // 剔除到屏幕外\n output.position = vec4f(2.0, 2.0, 0.0, 1.0);\n output.relativePosition = vec2f(0.0);\n output.color = vec4f(0.0);\n return output;\n }\n }\n\n // 使用实例化的四边形顶点\n let relativeCoord = input.quadVertex;\n\n // 计算椭圆变换后的相对位置(像素单位)\n let ellipseRelativePos = relativeCoord.x * axis1 + relativeCoord.y * axis2;\n\n // 计算屏幕空间偏移\n let projectedScreenDelta = ellipseRelativePos * 2.0 * BOUNDS_RADIUS / uniforms.screenSize;\n\n // 最终顶点位置\n output.position = vec4f(\n projectedCenter.x + projectedScreenDelta.x * projectedCenter.w,\n projectedCenter.y + projectedScreenDelta.y * projectedCenter.w,\n projectedCenter.z,\n projectedCenter.w\n );\n\n // 传递给 fragment shader\n output.relativePosition = relativeCoord * BOUNDS_RADIUS;\n output.color = color;\n\n return output;\n}\n\n// ============ Fragment Shader ============\n\nconst BOUNDS_RADIUS_SQUARED: f32 = BOUNDS_RADIUS * BOUNDS_RADIUS;\n\nfn splatFragmentAlpha(relativePosition: vec2f, splatAlpha: f32) -> f32 {\n // 复刻 WebGL MetalSplatter 计算方式\n let negativeMagnitudeSquared = -dot(relativePosition, relativePosition);\n\n // 边界检查:超出椭圆边界的点被剔除\n if (negativeMagnitudeSquared < -BOUNDS_RADIUS_SQUARED) {\n return 0.0;\n }\n\n // 高斯衰减\n return exp(0.5 * negativeMagnitudeSquared) * splatAlpha;\n}\n\n@fragment\nfn fragmentMain(input: VertexOutput) -> @location(0) vec4f {\n let alpha = splatFragmentAlpha(input.relativePosition, input.color.a);\n\n // ✅ 优化:提前丢弃几乎透明的片段(提升性能和质量,对齐 Android SDK)\n if (alpha < 0.001) {\n discard;\n }\n\n // 预乘 alpha 输出(匹配 alphaMode: 'premultiplied')\n return vec4f(input.color.rgb * alpha, alpha);\n}\n";
14532
14773
  const blitShaderCode = "/**\n * WebGPU Blit Shader\n * 用于将 render texture 绘制到屏幕,应用 transform\n */\n\nstruct BlitUniforms {\n offset: vec2f, // 屏幕空间偏移(NDC坐标)\n scale: f32, // 缩放因子\n}\n\n@group(0) @binding(0) var<uniform> blitUniforms: BlitUniforms;\n@group(1) @binding(0) var texture: texture_2d<f32>;\n@group(1) @binding(1) var textureSampler: sampler;\n\nstruct VertexInput {\n @location(0) position: vec2f,\n @location(1) texCoord: vec2f,\n}\n\nstruct VertexOutput {\n @builtin(position) position: vec4f,\n @location(0) texCoord: vec2f,\n}\n\n@vertex\nfn vertexMain(input: VertexInput) -> VertexOutput {\n var output: VertexOutput;\n // 应用缩放和偏移\n let pos = input.position * blitUniforms.scale + blitUniforms.offset;\n output.position = vec4f(pos, 0.0, 1.0);\n // WebGPU framebuffer 纹理坐标需要翻转 Y 轴\n // framebuffer 的内容是从上到下存储的,但纹理坐标 (0,0) 在左上角,所以需要翻转\n output.texCoord = vec2f(input.texCoord.x, 1.0 - input.texCoord.y);\n return output;\n}\n\n@fragment\nfn fragmentMain(input: VertexOutput) -> @location(0) vec4f {\n return textureSample(texture, textureSampler, input.texCoord);\n}\n\n";
14533
- class WebGPURenderer {
14534
- constructor(canvas, backgroundColor, alpha = true) {
14535
- __publicField(this, "canvas");
14536
- __publicField(this, "backgroundColor");
14537
- __publicField(this, "device", null);
14538
- __publicField(this, "context", null);
14539
- __publicField(this, "renderPipeline", null);
14540
- __publicField(this, "renderTexturePipeline", null);
14541
- // 用于渲染到 render texture
14542
- __publicField(this, "quadVertexBuffer", null);
14543
- __publicField(this, "uniformBuffer", null);
14544
- __publicField(this, "uniformBindGroup", null);
14545
- // 🚀 间接索引渲染 buffers
14546
- __publicField(this, "sortIndexBuffer", null);
14547
- __publicField(this, "splatDataBuffer", null);
14548
- __publicField(this, "storageBindGroup", null);
14549
- __publicField(this, "bindGroupNeedsUpdate", false);
14550
- // 标记 bind group 是否需要更新
14774
+ const transformShaderCode = "/**\n * WebGPU 3DGS Transform Compute Shader\n *\n * 功能: 在GPU上执行3DGS变换 + 协方差计算\n * 输入: Original Splats (110K点) + Face Geometry (15.4K面)\n * 输出: Transformed Splats with Covariance (GPU格式)\n */\n\n// ============================================================================\n// 数据结构定义\n// ============================================================================\n\n// ============================================================================\n// Bindings (使用flat array避免struct padding问题)\n// ============================================================================\n\n// Original Splats: 每个splat 16 floats (64 bytes紧密排列)\n// [position.xyz, scale.xyz, rotation.xyzw, color.rgba, opacity, binding(as float)]\n@group(0) @binding(0) var<storage, read> originalSplatsData: array<f32>;\n\n// Face Geometries: 每个face 8 floats (32 bytes紧密排列)\n// [center.xyz, scale, quat.xyzw]\n@group(0) @binding(1) var<storage, read> faceGeometriesData: array<f32>;\n\n// 输出为flat float array: [pos.xyz, color.rgba, cov[6]] = 13 floats per splat\n@group(0) @binding(2) var<storage, read_write> transformedData: array<f32>;\n\n// 🚀 性能优化: 单独输出紧凑的positions (用于排序)\n// [xyz] = 3 floats per splat\n@group(0) @binding(3) var<storage, read_write> positionsOutput: array<f32>;\n\n// 🆕 GPU排序优化: ViewMatrix uniform (用于计算view-space depth)\nstruct Uniforms {\n viewMatrix: mat4x4f,\n}\n@group(0) @binding(4) var<uniform> uniforms: Uniforms;\n\n// 🆕 GPU排序优化: 输出深度值 (Uint32格式, 已处理降序)\n@group(0) @binding(5) var<storage, read_write> depthsOutput: array<u32>;\n\n// ============================================================================\n// 辅助函数\n// ============================================================================\n\n/**\n * 四元数归一化\n */\nfn normalizeQuaternion(q: vec4f) -> vec4f {\n let norm = length(q);\n if (norm < 1e-8) {\n return vec4f(0.0, 0.0, 0.0, 1.0); // 单位四元数\n }\n return q / norm;\n}\n\n/**\n * 四元数乘法 (q1 * q2)\n * 注意: 四元数乘法不可交换\n */\nfn multiplyQuaternions(q1: vec4f, q2: vec4f) -> vec4f {\n return vec4f(\n q1.w * q2.x + q1.x * q2.w + q1.y * q2.z - q1.z * q2.y, // x\n q1.w * q2.y - q1.x * q2.z + q1.y * q2.w + q1.z * q2.x, // y\n q1.w * q2.z + q1.x * q2.y - q1.y * q2.x + q1.z * q2.w, // z\n q1.w * q2.w - q1.x * q2.x - q1.y * q2.y - q1.z * q2.z // w\n );\n}\n\n/**\n * 用四元数旋转向量\n * v_rotated = q * v * q_conjugate\n */\nfn rotateVectorByQuaternion(q: vec4f, v: vec3f) -> vec3f {\n // 优化版本: v' = v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v)\n let qxyz = q.xyz;\n let qw = q.w;\n let t = 2.0 * cross(qxyz, v);\n return v + qw * t + cross(qxyz, t);\n}\n\n/**\n * 将四元数转换为3x3旋转矩阵\n * ⚠️ CRITICAL: C++存储的是TRANSPOSED matrix!\n * 所以quaternion → matrix转换后需要再次转置才能匹配C++的orientation_mat\n */\nfn quaternionToMatrix(q: vec4f) -> mat3x3f {\n let qx = q.x;\n let qy = q.y;\n let qz = q.z;\n let qw = q.w;\n\n // 标准quaternion到matrix转换\n let m00 = 1.0 - 2.0 * (qy*qy + qz*qz);\n let m01 = 2.0 * (qx*qy - qz*qw);\n let m02 = 2.0 * (qx*qz + qy*qw);\n\n let m10 = 2.0 * (qx*qy + qz*qw);\n let m11 = 1.0 - 2.0 * (qx*qx + qz*qz);\n let m12 = 2.0 * (qy*qz - qx*qw);\n\n let m20 = 2.0 * (qx*qz - qy*qw);\n let m21 = 2.0 * (qy*qz + qx*qw);\n let m22 = 1.0 - 2.0 * (qx*qx + qy*qy);\n\n // WGSL mat3x3f is column-major\n // Standard quaternion-to-matrix conversion (no transpose)\n return mat3x3f(\n vec3f(m00, m10, m20), // column 0\n vec3f(m01, m11, m21), // column 1\n vec3f(m02, m12, m22) // column 2\n );\n}\n\n/**\n * 从四元数构建旋转矩阵并计算协方差\n * Covariance = (R*S) * (R*S)^T\n */\nfn computeCovariance3D(scale: vec3f, rotation: vec4f) -> array<f32, 6> {\n // 1. 归一化四元数\n let q = normalizeQuaternion(rotation);\n let qx = q.x;\n let qy = q.y;\n let qz = q.z;\n let qw = q.w;\n\n // 2. 构建旋转矩阵 R (3x3)\n let r00 = 1.0 - 2.0 * (qy*qy + qz*qz);\n let r01 = 2.0 * (qx*qy - qz*qw);\n let r02 = 2.0 * (qx*qz + qy*qw);\n\n let r10 = 2.0 * (qx*qy + qz*qw);\n let r11 = 1.0 - 2.0 * (qx*qx + qz*qz);\n let r12 = 2.0 * (qy*qz - qx*qw);\n\n let r20 = 2.0 * (qx*qz - qy*qw);\n let r21 = 2.0 * (qy*qz + qx*qw);\n let r22 = 1.0 - 2.0 * (qx*qx + qy*qy);\n\n // 3. 计算 R * S\n let sx = scale.x;\n let sy = scale.y;\n let sz = scale.z;\n\n let rs00 = r00 * sx;\n let rs01 = r01 * sy;\n let rs02 = r02 * sz;\n\n let rs10 = r10 * sx;\n let rs11 = r11 * sy;\n let rs12 = r12 * sz;\n\n let rs20 = r20 * sx;\n let rs21 = r21 * sy;\n let rs22 = r22 * sz;\n\n // 4. 计算协方差矩阵上三角 = (R*S) * (R*S)^T\n var cov: array<f32, 6>;\n cov[0] = rs00*rs00 + rs01*rs01 + rs02*rs02; // cov[0][0]\n cov[1] = rs00*rs10 + rs01*rs11 + rs02*rs12; // cov[0][1]\n cov[2] = rs00*rs20 + rs01*rs21 + rs02*rs22; // cov[0][2]\n cov[3] = rs10*rs10 + rs11*rs11 + rs12*rs12; // cov[1][1]\n cov[4] = rs10*rs20 + rs11*rs21 + rs12*rs22; // cov[1][2]\n cov[5] = rs20*rs20 + rs21*rs21 + rs22*rs22; // cov[2][2]\n\n return cov;\n}\n\n/**\n * 计算可排序深度\n *\n * View space: Z轴负方向,物体Z < 0,越远越小\n * RadixSort: ascending (小到大)\n * 目标: far-to-near (远到近)\n *\n * depth = viewPos.z (负数,远点如-10,近点如-2)\n * 转sortable: 负数小 → sortable小\n * Ascending: 小在前 → 远在前 ✅\n *\n * 🚀 优化: 只需要 viewPos.z,因此只提取 viewMatrix 第3行的点积\n * viewPos.z = row3 · [worldPosition, 1]\n */\nfn computeSortableDepth(worldPosition: vec3f) -> u32 {\n // 🚀 优化: 直接计算 viewPos.z,无需完整矩阵乘法\n // WGSL 列主序: uniforms.viewMatrix[col][row]\n // 第3行 = [viewMatrix[0][2], viewMatrix[1][2], viewMatrix[2][2], viewMatrix[3][2]]\n let depth = uniforms.viewMatrix[0][2] * worldPosition.x +\n uniforms.viewMatrix[1][2] * worldPosition.y +\n uniforms.viewMatrix[2][2] * worldPosition.z +\n uniforms.viewMatrix[3][2];\n\n let depthBits = bitcast<u32>(depth);\n let depthSortable = depthBits ^ select(0x80000000u, 0xffffffffu, depth < 0.0);\n return depthSortable;\n}\n\n// ============================================================================\n// Compute Shader Main\n// ============================================================================\n\n@compute @workgroup_size(256)\nfn main(@builtin(global_invocation_id) global_id: vec3u) {\n let idx = global_id.x;\n\n // 边界检查 (originalSplatsData长度 / 16 = splat数量)\n let splatCount = arrayLength(&originalSplatsData) / 16u;\n if (idx >= splatCount) {\n return;\n }\n\n // ============================================================================\n // 读取Original Splat (16 floats per splat)\n // [position.xyz, scale.xyz, rotation.xyzw, color.rgba, opacity, binding]\n // ============================================================================\n let splatOffset = idx * 16u;\n\n let position = vec3f(\n originalSplatsData[splatOffset + 0u],\n originalSplatsData[splatOffset + 1u],\n originalSplatsData[splatOffset + 2u]\n );\n\n let scale = vec3f(\n originalSplatsData[splatOffset + 3u],\n originalSplatsData[splatOffset + 4u],\n originalSplatsData[splatOffset + 5u]\n );\n\n let rotation = vec4f(\n originalSplatsData[splatOffset + 6u],\n originalSplatsData[splatOffset + 7u],\n originalSplatsData[splatOffset + 8u],\n originalSplatsData[splatOffset + 9u]\n );\n\n let color = vec4f(\n originalSplatsData[splatOffset + 10u],\n originalSplatsData[splatOffset + 11u],\n originalSplatsData[splatOffset + 12u],\n originalSplatsData[splatOffset + 13u]\n );\n\n let opacity = originalSplatsData[splatOffset + 14u];\n let binding = i32(originalSplatsData[splatOffset + 15u]);\n\n // ============================================================================\n // 获取绑定的Face Geometry (带边界检查)\n // ============================================================================\n let faceCount = arrayLength(&faceGeometriesData) / 8u;\n if (binding < 0 || u32(binding) >= faceCount) {\n // 绑定无效,跳过 (设置为无效点)\n let baseOffset = idx * 13u;\n for (var i = 0u; i < 13u; i++) {\n transformedData[baseOffset + i] = 0.0;\n }\n return;\n }\n\n // ============================================================================\n // 读取Face Geometry (8 floats per face)\n // [center.xyz, scale, quat.xyzw]\n // ============================================================================\n let faceOffset = u32(binding) * 8u;\n\n let faceCenter = vec3f(\n faceGeometriesData[faceOffset + 0u],\n faceGeometriesData[faceOffset + 1u],\n faceGeometriesData[faceOffset + 2u]\n );\n\n let faceScale = faceGeometriesData[faceOffset + 3u];\n\n let faceQuat = vec4f(\n faceGeometriesData[faceOffset + 4u],\n faceGeometriesData[faceOffset + 5u],\n faceGeometriesData[faceOffset + 6u],\n faceGeometriesData[faceOffset + 7u]\n );\n\n // ============================================================================\n // 1. 位置变换: position = orientation_mat * (original_pos * face_scale) + face_center\n // 匹配C++: transformed_splat.position = face_geometry.orientation_mat * original_splat.position * face_geometry.scaling + face_geometry.center;\n // ============================================================================\n let orientationMat = quaternionToMatrix(faceQuat);\n let scaledPosition = position * faceScale;\n let rotated = orientationMat * scaledPosition;\n let transformedPosition = rotated + faceCenter;\n\n // ============================================================================\n // 2. 缩放变换: scale = original_scale * face_scale\n // ============================================================================\n let transformedScale = scale * faceScale;\n\n // ============================================================================\n // 3. 旋转变换: rotation = quat_multiply(normalize(original_rotation), face_quat)\n // ============================================================================\n let normalizedOriginalRotation = normalizeQuaternion(rotation);\n let transformedRotation = multiplyQuaternions(normalizedOriginalRotation, faceQuat);\n\n // ============================================================================\n // 4. 计算3D协方差矩阵\n // ============================================================================\n let covariance = computeCovariance3D(transformedScale, transformedRotation);\n\n // ============================================================================\n // 5. 输出结果到flat array (13 floats per splat)\n // ============================================================================\n let baseOffset = idx * 13u;\n\n // position[3]\n transformedData[baseOffset + 0u] = transformedPosition.x;\n transformedData[baseOffset + 1u] = transformedPosition.y;\n transformedData[baseOffset + 2u] = transformedPosition.z;\n\n // color[4]: 从SH 0阶系数转换为RGB\n // SH_C0 = 0.28209479177387814\n // RGB = SH_C0 * sh[0] + 0.5\n let SH_C0 = 0.28209479177387814;\n let r = clamp(SH_C0 * color.r + 0.5, 0.0, 1.0);\n let g = clamp(SH_C0 * color.g + 0.5, 0.0, 1.0);\n let b = clamp(SH_C0 * color.b + 0.5, 0.0, 1.0);\n\n transformedData[baseOffset + 3u] = r;\n transformedData[baseOffset + 4u] = g;\n transformedData[baseOffset + 5u] = b;\n transformedData[baseOffset + 6u] = opacity;\n\n // covariance[6]\n transformedData[baseOffset + 7u] = covariance[0];\n transformedData[baseOffset + 8u] = covariance[1];\n transformedData[baseOffset + 9u] = covariance[2];\n transformedData[baseOffset + 10u] = covariance[3];\n transformedData[baseOffset + 11u] = covariance[4];\n transformedData[baseOffset + 12u] = covariance[5];\n\n // ============================================================================\n // 6. 🚀 同时输出紧凑的positions (用于排序,零额外开销)\n // ============================================================================\n let posOffset = idx * 3u;\n positionsOutput[posOffset + 0u] = transformedPosition.x;\n positionsOutput[posOffset + 1u] = transformedPosition.y;\n positionsOutput[posOffset + 2u] = transformedPosition.z;\n\n // ============================================================================\n // 7. 🆕 GPU排序优化: 输出可排序深度值 (Uint32, 降序)\n // ============================================================================\n depthsOutput[idx] = computeSortableDepth(transformedPosition);\n}\n\n";
14775
+ class TransformPipeline {
14776
+ constructor(device) {
14777
+ __publicField(this, "device");
14778
+ __publicField(this, "computePipeline", null);
14779
+ __publicField(this, "bindGroup", null);
14780
+ // GPU Buffers
14781
+ __publicField(this, "originalSplatsBuffer", null);
14782
+ __publicField(this, "faceGeometryBuffer", null);
14783
+ __publicField(this, "transformedOutputBuffer", null);
14784
+ __publicField(this, "positionsOutputBuffer", null);
14785
+ // 🚀 紧凑的positions输出
14786
+ __publicField(this, "viewMatrixBuffer", null);
14787
+ // 🆕 View matrix uniform
14788
+ __publicField(this, "depthsOutputBuffer", null);
14789
+ // 🆕 深度输出 (Uint32, GPU排序用)
14790
+ // 数据规模
14551
14791
  __publicField(this, "splatCount", 0);
14552
- __publicField(this, "presentationFormat", "bgra8unorm");
14553
- __publicField(this, "alpha");
14554
- // Render texture framebuffer
14555
- __publicField(this, "renderTexture", null);
14556
- __publicField(this, "renderTextureView", null);
14557
- __publicField(this, "depthTexture", null);
14558
- __publicField(this, "framebufferWidth", 0);
14559
- __publicField(this, "framebufferHeight", 0);
14560
- // Blit pipeline for drawing render texture to screen
14561
- __publicField(this, "blitPipeline", null);
14562
- __publicField(this, "blitUniformBuffer", null);
14563
- __publicField(this, "blitQuadBuffer", null);
14564
- __publicField(this, "blitSampler", null);
14565
- this.canvas = canvas;
14566
- this.backgroundColor = backgroundColor || [0, 0, 0, 0];
14567
- this.alpha = alpha;
14792
+ __publicField(this, "faceCount", 0);
14793
+ // 🆕 GPU FLAME 支持:标记是否使用外部 GPU buffer
14794
+ __publicField(this, "usesExternalFaceGeometryBuffer", false);
14795
+ this.device = device;
14568
14796
  }
14569
14797
  /**
14570
- * 初始化 WebGPU 渲染器
14798
+ * 初始化Pipeline
14571
14799
  */
14572
14800
  async initialize() {
14573
- const adapter = await navigator.gpu.requestAdapter({
14574
- powerPreference: "high-performance"
14801
+ const shaderModule = this.device.createShaderModule({
14802
+ label: "Transform Compute Shader",
14803
+ code: transformShaderCode
14575
14804
  });
14576
- if (!adapter) {
14577
- throw new Error("WebGPU: No GPU adapter found");
14805
+ this.computePipeline = await this.device.createComputePipelineAsync({
14806
+ label: "Transform Compute Pipeline",
14807
+ layout: "auto",
14808
+ compute: {
14809
+ module: shaderModule,
14810
+ entryPoint: "main"
14811
+ }
14812
+ });
14813
+ logger.log("✅ Transform Pipeline initialized");
14814
+ }
14815
+ /**
14816
+ * 上传Original Splats (一次性调用)
14817
+ * @param originalSplatsData Float32Array, 每个splat 16 floats (64 bytes)
14818
+ * @param splatCount splat数量
14819
+ */
14820
+ uploadOriginalSplats(originalSplatsData, splatCount) {
14821
+ var _a, _b;
14822
+ if (!this.device) {
14823
+ throw new Error("Device not initialized");
14578
14824
  }
14579
- this.device = await adapter.requestDevice();
14580
- this.context = this.canvas.getContext("webgpu");
14581
- if (!this.context) {
14582
- throw new Error("WebGPU: Failed to get canvas context");
14825
+ this.splatCount = splatCount;
14826
+ const bufferSize = originalSplatsData.byteLength;
14827
+ if (this.originalSplatsBuffer) {
14828
+ this.originalSplatsBuffer.destroy();
14583
14829
  }
14584
- this.presentationFormat = navigator.gpu.getPreferredCanvasFormat();
14585
- this.context.configure({
14586
- device: this.device,
14587
- format: this.presentationFormat,
14588
- alphaMode: this.alpha ? "premultiplied" : "opaque"
14830
+ this.originalSplatsBuffer = this.device.createBuffer({
14831
+ label: "Original Splats Buffer",
14832
+ size: bufferSize,
14833
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
14834
+ });
14835
+ this.device.queue.writeBuffer(
14836
+ this.originalSplatsBuffer,
14837
+ 0,
14838
+ originalSplatsData.buffer,
14839
+ originalSplatsData.byteOffset,
14840
+ originalSplatsData.byteLength
14841
+ );
14842
+ this.createTransformedOutputBuffer();
14843
+ this.createViewMatrixBuffer();
14844
+ logger.log(`✅ [TransformPipeline] Original Splats uploaded: ${splatCount} splats (${(bufferSize / 1024 / 1024).toFixed(2)} MB)`, {
14845
+ originalSplatsBufferSize: this.originalSplatsBuffer.size,
14846
+ transformedOutputBufferSize: ((_a = this.transformedOutputBuffer) == null ? void 0 : _a.size) || 0,
14847
+ viewMatrixBufferSize: ((_b = this.viewMatrixBuffer) == null ? void 0 : _b.size) || 0,
14848
+ bindGroupCreated: false
14849
+ // bind group在首次updateFaceGeometry时创建
14589
14850
  });
14590
- this.createUniformBuffer();
14591
- this.createQuadVertexBuffer();
14592
- await this.createRenderPipeline();
14593
- await this.createBlitPipeline();
14594
14851
  }
14595
14852
  /**
14596
- * 创建 Uniform Buffer
14853
+ * 🆕 设置外部 GPU FaceGeometry Buffer(GPU FLAME 路径)
14854
+ * @param externalBuffer 外部 GPU buffer(来自 FLAME Pipeline 的 faceGeometriesBuffer)
14855
+ * @param faceCount face 数量
14597
14856
  */
14598
- createUniformBuffer() {
14599
- if (!this.device)
14857
+ setFaceGeometryBufferFromGPU(externalBuffer, faceCount) {
14858
+ if (!this.device) {
14859
+ throw new Error("Device not initialized");
14860
+ }
14861
+ if (this.faceGeometryBuffer && !this.usesExternalFaceGeometryBuffer) {
14862
+ this.faceGeometryBuffer.destroy();
14863
+ }
14864
+ this.faceGeometryBuffer = externalBuffer;
14865
+ this.faceCount = faceCount;
14866
+ this.usesExternalFaceGeometryBuffer = true;
14867
+ this.createBindGroup();
14868
+ }
14869
+ /**
14870
+ * 更新Face Geometry Buffer (每帧调用) - CPU 路径
14871
+ * @param faceGeometryData Float32Array, 每个face 8 floats (32 bytes)
14872
+ */
14873
+ updateFaceGeometry(faceGeometryData) {
14874
+ if (!this.device) {
14875
+ throw new Error("Device not initialized");
14876
+ }
14877
+ const faceCount = faceGeometryData.length / 8;
14878
+ const bufferSize = faceGeometryData.byteLength;
14879
+ const needsRebuild = !this.faceGeometryBuffer || this.faceCount !== faceCount || this.usesExternalFaceGeometryBuffer;
14880
+ if (needsRebuild) {
14881
+ this.faceCount = faceCount;
14882
+ if (this.faceGeometryBuffer && !this.usesExternalFaceGeometryBuffer) {
14883
+ this.faceGeometryBuffer.destroy();
14884
+ }
14885
+ this.faceGeometryBuffer = this.device.createBuffer({
14886
+ label: "Face Geometry Buffer (CPU path)",
14887
+ size: bufferSize,
14888
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
14889
+ });
14890
+ this.usesExternalFaceGeometryBuffer = false;
14891
+ this.createBindGroup();
14892
+ }
14893
+ if (!this.faceGeometryBuffer) {
14894
+ throw new Error("FaceGeometry buffer not created");
14895
+ }
14896
+ this.device.queue.writeBuffer(
14897
+ this.faceGeometryBuffer,
14898
+ 0,
14899
+ faceGeometryData.buffer,
14900
+ faceGeometryData.byteOffset,
14901
+ faceGeometryData.byteLength
14902
+ );
14903
+ }
14904
+ /**
14905
+ * 执行Transform计算 (在给定的command encoder中)
14906
+ * @param commandEncoder 外部command encoder (与render共享以保证顺序)
14907
+ */
14908
+ executeInEncoder(commandEncoder) {
14909
+ if (!this.device || !this.computePipeline || !this.bindGroup) {
14600
14910
  return;
14601
- const uniformBufferSize = 160;
14602
- this.uniformBuffer = this.device.createBuffer({
14603
- label: "Uniform Buffer",
14604
- size: uniformBufferSize,
14605
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
14911
+ }
14912
+ if (this.splatCount === 0) {
14913
+ return;
14914
+ }
14915
+ const passEncoder = commandEncoder.beginComputePass({
14916
+ label: "Transform Compute Pass"
14606
14917
  });
14918
+ passEncoder.setPipeline(this.computePipeline);
14919
+ passEncoder.setBindGroup(0, this.bindGroup);
14920
+ const workgroupCount = Math.ceil(this.splatCount / 256);
14921
+ passEncoder.dispatchWorkgroups(workgroupCount);
14922
+ passEncoder.end();
14607
14923
  }
14608
14924
  /**
14609
- * 创建四边形顶点缓冲区(实例化渲染用)
14925
+ * 获取Transformed Output Buffer (供渲染器使用)
14610
14926
  */
14611
- createQuadVertexBuffer() {
14612
- if (!this.device)
14927
+ getTransformedOutputBuffer() {
14928
+ return this.transformedOutputBuffer;
14929
+ }
14930
+ /**
14931
+ * 🚀 获取Positions Output Buffer (供排序使用)
14932
+ */
14933
+ getPositionsOutputBuffer() {
14934
+ return this.positionsOutputBuffer;
14935
+ }
14936
+ /**
14937
+ * 🆕 获取Depths Output Buffer (供GPU排序使用)
14938
+ */
14939
+ getDepthsOutputBuffer() {
14940
+ return this.depthsOutputBuffer;
14941
+ }
14942
+ /**
14943
+ * 🆕 更新View Matrix (每帧调用)
14944
+ * @param viewMatrix 4x4 view matrix
14945
+ */
14946
+ updateViewMatrix(viewMatrix) {
14947
+ if (!this.device || !this.viewMatrixBuffer) {
14613
14948
  return;
14614
- const quadVertices = new Float32Array([
14615
- -1,
14616
- -1,
14617
- // 左下
14618
- -1,
14619
- 1,
14620
- // 左上
14621
- 1,
14622
- -1,
14623
- // 右下
14624
- 1,
14625
- 1
14626
- // 右上
14627
- ]);
14628
- this.quadVertexBuffer = this.device.createBuffer({
14629
- label: "Quad Vertex Buffer",
14630
- size: quadVertices.byteLength,
14631
- usage: GPUBufferUsage.VERTEX,
14632
- mappedAtCreation: true
14949
+ }
14950
+ this.device.queue.writeBuffer(
14951
+ this.viewMatrixBuffer,
14952
+ 0,
14953
+ viewMatrix.buffer,
14954
+ viewMatrix.byteOffset,
14955
+ viewMatrix.byteLength
14956
+ );
14957
+ }
14958
+ /**
14959
+ * 获取Splat数量
14960
+ */
14961
+ getSplatCount() {
14962
+ return this.splatCount;
14963
+ }
14964
+ /**
14965
+ * 创建Transformed Output Buffer
14966
+ * 格式: position[3] + color[4] + covariance[6] = 13 floats = 52 bytes
14967
+ */
14968
+ createTransformedOutputBuffer() {
14969
+ if (!this.device || this.splatCount === 0) return;
14970
+ const floatsPerSplat = 13;
14971
+ const bufferSize = this.splatCount * floatsPerSplat * 4;
14972
+ if (this.transformedOutputBuffer) {
14973
+ this.transformedOutputBuffer.destroy();
14974
+ }
14975
+ this.transformedOutputBuffer = this.device.createBuffer({
14976
+ label: "Transformed Output Buffer",
14977
+ size: bufferSize,
14978
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
14633
14979
  });
14634
- new Float32Array(this.quadVertexBuffer.getMappedRange()).set(quadVertices);
14635
- this.quadVertexBuffer.unmap();
14980
+ this.createPositionsOutputBuffer();
14981
+ this.createDepthsOutputBuffer();
14636
14982
  }
14637
14983
  /**
14638
- * 创建 Render Pipeline
14984
+ * 🚀 创建Positions Output Buffer (用于排序)
14985
+ * 格式: position[3] = 3 floats = 12 bytes per splat
14639
14986
  */
14640
- async createRenderPipeline() {
14641
- if (!this.device)
14642
- return;
14643
- const shaderModule = this.device.createShaderModule({
14644
- label: "3DGS Render Shader",
14645
- code: renderShaderCode
14987
+ createPositionsOutputBuffer() {
14988
+ if (!this.device || this.splatCount === 0) return;
14989
+ const floatsPerPosition = 3;
14990
+ const bufferSize = this.splatCount * floatsPerPosition * 4;
14991
+ if (this.positionsOutputBuffer) {
14992
+ this.positionsOutputBuffer.destroy();
14993
+ }
14994
+ this.positionsOutputBuffer = this.device.createBuffer({
14995
+ label: "Positions Output Buffer (for sorting)",
14996
+ size: bufferSize,
14997
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
14646
14998
  });
14647
- const uniformBindGroupLayout = this.device.createBindGroupLayout({
14648
- label: "Uniform Bind Group Layout",
14999
+ }
15000
+ /**
15001
+ * 🆕 创建View Matrix Buffer
15002
+ */
15003
+ createViewMatrixBuffer() {
15004
+ if (!this.device) return;
15005
+ const bufferSize = 64;
15006
+ this.viewMatrixBuffer = this.device.createBuffer({
15007
+ label: "View Matrix Uniform",
15008
+ size: bufferSize,
15009
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
15010
+ });
15011
+ }
15012
+ /**
15013
+ * 🆕 创建Depths Output Buffer (用于GPU排序)
15014
+ * 格式: depth (Uint32) = 4 bytes per splat
15015
+ */
15016
+ createDepthsOutputBuffer() {
15017
+ if (!this.device || this.splatCount === 0) return;
15018
+ const bufferSize = this.splatCount * 4;
15019
+ this.depthsOutputBuffer = this.device.createBuffer({
15020
+ label: "Depths Output Buffer (for GPU sorting)",
15021
+ size: bufferSize,
15022
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
15023
+ });
15024
+ }
15025
+ /**
15026
+ * 创建Bind Group
15027
+ */
15028
+ createBindGroup() {
15029
+ if (!this.device || !this.computePipeline || !this.originalSplatsBuffer || !this.faceGeometryBuffer || !this.transformedOutputBuffer || !this.positionsOutputBuffer || !this.viewMatrixBuffer || // 🆕 新增检查
15030
+ !this.depthsOutputBuffer) {
15031
+ return;
15032
+ }
15033
+ const bindGroupLayout = this.computePipeline.getBindGroupLayout(0);
15034
+ this.bindGroup = this.device.createBindGroup({
15035
+ label: "Transform Bind Group",
15036
+ layout: bindGroupLayout,
15037
+ entries: [
15038
+ {
15039
+ binding: 0,
15040
+ // originalSplats
15041
+ resource: { buffer: this.originalSplatsBuffer }
15042
+ },
15043
+ {
15044
+ binding: 1,
15045
+ // faceGeometries
15046
+ resource: { buffer: this.faceGeometryBuffer }
15047
+ },
15048
+ {
15049
+ binding: 2,
15050
+ // transformedSplats
15051
+ resource: { buffer: this.transformedOutputBuffer }
15052
+ },
15053
+ {
15054
+ binding: 3,
15055
+ // positionsOutput
15056
+ resource: { buffer: this.positionsOutputBuffer }
15057
+ },
15058
+ {
15059
+ binding: 4,
15060
+ // 🆕 viewMatrix (uniform)
15061
+ resource: { buffer: this.viewMatrixBuffer }
15062
+ },
15063
+ {
15064
+ binding: 5,
15065
+ // 🆕 depthsOutput
15066
+ resource: { buffer: this.depthsOutputBuffer }
15067
+ }
15068
+ ]
15069
+ });
15070
+ }
15071
+ /**
15072
+ * 清理资源
15073
+ */
15074
+ destroy() {
15075
+ var _a, _b, _c, _d, _e2;
15076
+ (_a = this.originalSplatsBuffer) == null ? void 0 : _a.destroy();
15077
+ if (this.faceGeometryBuffer && !this.usesExternalFaceGeometryBuffer) {
15078
+ this.faceGeometryBuffer.destroy();
15079
+ }
15080
+ (_b = this.transformedOutputBuffer) == null ? void 0 : _b.destroy();
15081
+ (_c = this.positionsOutputBuffer) == null ? void 0 : _c.destroy();
15082
+ (_d = this.viewMatrixBuffer) == null ? void 0 : _d.destroy();
15083
+ (_e2 = this.depthsOutputBuffer) == null ? void 0 : _e2.destroy();
15084
+ this.originalSplatsBuffer = null;
15085
+ this.faceGeometryBuffer = null;
15086
+ this.transformedOutputBuffer = null;
15087
+ this.positionsOutputBuffer = null;
15088
+ this.viewMatrixBuffer = null;
15089
+ this.depthsOutputBuffer = null;
15090
+ this.bindGroup = null;
15091
+ }
15092
+ }
15093
+ const flameCommonWGSL = "/**\n * FLAME Common Definitions\n *\n * 共享的结构体、常量和工具函数\n * 🔧 Updated: Added staticOffsetCount to FLAMEMetadata\n */\n\n// ============================================================================\n// 常量定义\n// ============================================================================\n\nconst PI: f32 = 3.14159265359;\n\n// ============================================================================\n// 结构体定义 (与 FLAMEGPUBuffers 对齐)\n// ============================================================================\n\n/**\n * FLAME 帧参数 (Uniform Buffer)\n * 🚀 优化: 移除 shapeParams (已分离为独立 Storage Buffer)\n * Layout (std140, vec4 对齐):\n * - exprParams: 25 vec4 (100 floats)\n * - rotation: 1 vec4 (3 floats + padding)\n * - translation: 1 vec4 (3 floats + padding)\n * - neckPose: 1 vec4 (3 floats + padding)\n * - jawPose: 1 vec4 (3 floats + padding)\n * - eyesPose: 2 vec4 (6 floats)\n * - eyelid: 1 vec4 (2 floats + padding)\n * Total: 32 vec4 = 512 bytes (was 1744 bytes, 节省 71%)\n */\nstruct FLAMEParams {\n exprParams: array<vec4<f32>, 25>, // [100] expression parameters\n rotation: vec4<f32>, // [3] global rotation (axis-angle) + padding\n translation: vec4<f32>, // [3] global translation + padding\n neckPose: vec4<f32>, // [3] neck pose + padding\n jawPose: vec4<f32>, // [3] jaw pose + padding\n eyesPose: array<vec4<f32>, 2>, // [6] eyes pose (2 vec4)\n eyelid: vec4<f32>, // [2] eyelid + padding\n}\n\n/**\n * FLAME 元数据 (Uniform Buffer)\n */\nstruct FLAMEMetadata {\n vertexCount: u32,\n faceCount: u32,\n jointCount: u32,\n shapeParamCount: u32,\n poseParamCount: u32,\n staticOffsetCount: u32, // 🆕 静态偏移顶点数量\n activeShapeCount: u32, // 🚀 活跃shape参数数量\n _padding0: u32, // 对齐到 32 bytes\n}\n\n// ============================================================================\n// 工具函数\n// ============================================================================\n\n/**\n * Rodrigues 公式: 轴角表示转换为旋转矩阵\n * @param axisAngle 轴角表示 (vec3)\n * @return 3x3 旋转矩阵\n */\nfn rodrigues(axisAngle: vec3<f32>) -> mat3x3<f32> {\n // 🔧 匹配 CPU 实现:将 epsilon 加到 axis_angle 的每个分量上\n // CPU: Vector3f axis_angle_with_epsilon(axis_angle.x + epsilon, axis_angle.y + epsilon, axis_angle.z + epsilon);\n const EPSILON = 1e-8;\n let axisAngleWithEpsilon = axisAngle + vec3<f32>(EPSILON);\n\n // 计算旋转角度 θ = ||axis_angle + epsilon||\n let theta = length(axisAngleWithEpsilon);\n\n // 归一化得到旋转轴\n let axis = axisAngleWithEpsilon / theta;\n let c = cos(theta);\n let s = sin(theta);\n let t = 1.0 - c;\n\n let x = axis.x;\n let y = axis.y;\n let z = axis.z;\n\n // 旋转矩阵 (列主序) - 使用 Rodrigues 公式:R = I + sin(θ)*K + (1-cos(θ))*K²\n // 其中 K 是反对称矩阵,K² 是 K 的平方\n return mat3x3<f32>(\n vec3<f32>(t*x*x + c, t*x*y + s*z, t*x*z - s*y),\n vec3<f32>(t*x*y - s*z, t*y*y + c, t*y*z + s*x),\n vec3<f32>(t*x*z + s*y, t*y*z - s*x, t*z*z + c)\n );\n}\n\n/**\n * 构建 4x4 变换矩阵 (从旋转和平移)\n * @param rotation 3x3 旋转矩阵\n * @param translation 平移向量\n * @return 4x4 变换矩阵\n */\nfn makeTransform(rotation: mat3x3<f32>, translation: vec3<f32>) -> mat4x4<f32> {\n return mat4x4<f32>(\n vec4<f32>(rotation[0], 0.0),\n vec4<f32>(rotation[1], 0.0),\n vec4<f32>(rotation[2], 0.0),\n vec4<f32>(translation, 1.0)\n );\n}\n\n/**\n * 🚀 优化: 从 vec4 数组提取 float,使用数组索引消除分支\n */\nfn extractFloatExpr(arr: array<vec4<f32>, 25>, idx: u32) -> f32 {\n let vecIdx = idx / 4u;\n let offset = idx % 4u;\n let v = arr[vecIdx];\n // 使用数组字面量代替 if-else,GPU 可以优化为 swizzle\n return array<f32, 4>(v.x, v.y, v.z, v.w)[offset];\n}\n\n/**\n * 矩阵乘法 (mat3x3 * vec3)\n */\nfn matMulVec3(m: mat3x3<f32>, v: vec3<f32>) -> vec3<f32> {\n return vec3<f32>(\n dot(m[0], v),\n dot(m[1], v),\n dot(m[2], v)\n );\n}\n\n/**\n * 矩阵乘法 (mat3x3 * mat3x3)\n */\nfn matMul3x3(a: mat3x3<f32>, b: mat3x3<f32>) -> mat3x3<f32> {\n return mat3x3<f32>(\n matMulVec3(a, b[0]),\n matMulVec3(a, b[1]),\n matMulVec3(a, b[2])\n );\n}\n\n/**\n * 计算三角形法向量\n */\nfn computeTriangleNormal(v0: vec3<f32>, v1: vec3<f32>, v2: vec3<f32>) -> vec3<f32> {\n let edge1 = v1 - v0;\n let edge2 = v2 - v0;\n return normalize(cross(edge1, edge2));\n}\n\n/**\n * 计算三角形质心\n */\nfn computeTriangleCentroid(v0: vec3<f32>, v1: vec3<f32>, v2: vec3<f32>) -> vec3<f32> {\n return (v0 + v1 + v2) / 3.0;\n}\n\n/**\n * 四元数转旋转矩阵\n * @param q 四元数 (x, y, z, w)\n * @return 3x3 旋转矩阵\n */\nfn quaternionToMatrix(q: vec4<f32>) -> mat3x3<f32> {\n let x = q.x;\n let y = q.y;\n let z = q.z;\n let w = q.w;\n\n let x2 = x * x;\n let y2 = y * y;\n let z2 = z * z;\n let xy = x * y;\n let xz = x * z;\n let yz = y * z;\n let wx = w * x;\n let wy = w * y;\n let wz = w * z;\n\n return mat3x3<f32>(\n vec3<f32>(1.0 - 2.0*(y2 + z2), 2.0*(xy + wz), 2.0*(xz - wy)),\n vec3<f32>(2.0*(xy - wz), 1.0 - 2.0*(x2 + z2), 2.0*(yz + wx)),\n vec3<f32>(2.0*(xz + wy), 2.0*(yz - wx), 1.0 - 2.0*(x2 + y2))\n );\n}\n\n";
15094
+ const flameShapeBlendWGSL = "@group(0) @binding(0) var<uniform> metadata: FLAMEMetadata;\n@group(0) @binding(1) var<storage, read> activeShapeIndices: array<u32>; // 🚀 活跃shape参数索引\n@group(0) @binding(2) var<storage, read> activeShapeValues: array<f32>; // 🚀 活跃shape参数值\n@group(0) @binding(3) var<uniform> params: FLAMEParams;\n\n@group(1) @binding(0) var<storage, read> vTemplate: array<f32>;\n@group(1) @binding(1) var<storage, read> shapedirs: array<f32>;\n@group(1) @binding(2) var<storage, read_write> vShaped: array<f32>;\n@group(1) @binding(3) var<storage, read> staticOffset: array<f32>;\n\n@compute @workgroup_size(256)\nfn main(@builtin(global_invocation_id) globalId: vec3<u32>) {\n let vertexIdx = globalId.x;\n let vertexCount = metadata.vertexCount;\n \n // 🔧 边界检查:确保不处理超出范围的顶点\n if (vertexIdx >= vertexCount) {\n return;\n }\n\n let baseIdx = vertexIdx * 3u;\n var vertex = vec3<f32>(\n vTemplate[baseIdx],\n vTemplate[baseIdx + 1u],\n vTemplate[baseIdx + 2u]\n );\n\n let numExprParams = 100u;\n let numActiveShapeParams = metadata.activeShapeCount; // 🚀 使用活跃参数数量\n\n // 🚀 优化: 只循环活跃的shape参数(零参数过滤)\n for (var i = 0u; i < numActiveShapeParams; i++) {\n let shapeParamIdx = activeShapeIndices[i]; // 原始参数索引 [0, 300)\n let shapeParam = activeShapeValues[i]; // 参数值\n\n let offset = shapeParamIdx * vertexCount * 3u + vertexIdx * 3u;\n let dx = shapedirs[offset];\n let dy = shapedirs[offset + 1u];\n let dz = shapedirs[offset + 2u];\n\n vertex += vec3<f32>(dx, dy, dz) * shapeParam;\n }\n\n for (var e = 0u; e < numExprParams; e++) {\n let exprParam = extractFloatExpr(params.exprParams, e);\n let paramIdx = 300u + e; // shape参数固定为300个\n\n let offset = paramIdx * vertexCount * 3u + vertexIdx * 3u;\n let dx = shapedirs[offset];\n let dy = shapedirs[offset + 1u];\n let dz = shapedirs[offset + 2u];\n\n vertex += vec3<f32>(dx, dy, dz) * exprParam;\n }\n\n if (vertexIdx < metadata.staticOffsetCount) {\n vertex.x += staticOffset[baseIdx];\n vertex.y += staticOffset[baseIdx + 1u];\n vertex.z += staticOffset[baseIdx + 2u];\n }\n\n vShaped[baseIdx] = vertex.x;\n vShaped[baseIdx + 1u] = vertex.y;\n vShaped[baseIdx + 2u] = vertex.z;\n}\n\n";
15095
+ const flamePoseDeformWGSL = "@group(0) @binding(0) var<uniform> params: FLAMEParams;\n@group(0) @binding(1) var<uniform> metadata: FLAMEMetadata;\n\n@group(1) @binding(0) var<storage, read> vShaped: array<f32>;\n@group(1) @binding(1) var<storage, read> posedirs: array<f32>;\n@group(1) @binding(2) var<storage, read_write> vPosed: array<f32>;\n\n/**\n * 计算 pose_feature 向量 (36 维)\n *\n * 从 5 个关节的旋转参数计算:\n * - Joint 0 (global): 跳过\n * - Joints 1-4 (neck, jaw, left_eye, right_eye): 各贡献 9 个元素\n *\n * 🔧 关键修复: WGSL mat3x3 是列主序的!\n * - mat[col][row] = M[row][col]\n * - 要按 C++ 行主序展平 (M[0][0], M[0][1], M[0][2], M[1][0], ...)\n * - 必须用 mat[col][row] 访问 M[row][col]\n */\nfn computePoseFeature() -> array<f32, 36> {\n var pose_feature: array<f32, 36>;\n\n // Joint 1: neck (indices 0-8)\n let R_neck = rodrigues(vec3<f32>(params.neckPose.x, params.neckPose.y, params.neckPose.z));\n let I = mat3x3<f32>(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0);\n let rel_neck = R_neck - I;\n\n // 🔧 修复: 按行主序展平 - mat[col][row] 对应 M[row][col]\n // 第 0 行: M[0][0], M[0][1], M[0][2] = mat[0][0], mat[1][0], mat[2][0]\n pose_feature[0] = rel_neck[0][0]; pose_feature[1] = rel_neck[1][0]; pose_feature[2] = rel_neck[2][0];\n // 第 1 行: M[1][0], M[1][1], M[1][2] = mat[0][1], mat[1][1], mat[2][1]\n pose_feature[3] = rel_neck[0][1]; pose_feature[4] = rel_neck[1][1]; pose_feature[5] = rel_neck[2][1];\n // 第 2 行: M[2][0], M[2][1], M[2][2] = mat[0][2], mat[1][2], mat[2][2]\n pose_feature[6] = rel_neck[0][2]; pose_feature[7] = rel_neck[1][2]; pose_feature[8] = rel_neck[2][2];\n\n // Joint 2: jaw (indices 9-17)\n let R_jaw = rodrigues(vec3<f32>(params.jawPose.x, params.jawPose.y, params.jawPose.z));\n let rel_jaw = R_jaw - I;\n\n pose_feature[9] = rel_jaw[0][0]; pose_feature[10] = rel_jaw[1][0]; pose_feature[11] = rel_jaw[2][0];\n pose_feature[12] = rel_jaw[0][1]; pose_feature[13] = rel_jaw[1][1]; pose_feature[14] = rel_jaw[2][1];\n pose_feature[15] = rel_jaw[0][2]; pose_feature[16] = rel_jaw[1][2]; pose_feature[17] = rel_jaw[2][2];\n\n // Joint 3: left_eye (indices 18-26)\n let R_left_eye = rodrigues(vec3<f32>(params.eyesPose[0].x, params.eyesPose[0].y, params.eyesPose[0].z));\n let rel_left_eye = R_left_eye - I;\n\n pose_feature[18] = rel_left_eye[0][0]; pose_feature[19] = rel_left_eye[1][0]; pose_feature[20] = rel_left_eye[2][0];\n pose_feature[21] = rel_left_eye[0][1]; pose_feature[22] = rel_left_eye[1][1]; pose_feature[23] = rel_left_eye[2][1];\n pose_feature[24] = rel_left_eye[0][2]; pose_feature[25] = rel_left_eye[1][2]; pose_feature[26] = rel_left_eye[2][2];\n\n // Joint 4: right_eye (indices 27-35)\n let R_right_eye = rodrigues(vec3<f32>(params.eyesPose[1].x, params.eyesPose[1].y, params.eyesPose[1].z));\n let rel_right_eye = R_right_eye - I;\n\n pose_feature[27] = rel_right_eye[0][0]; pose_feature[28] = rel_right_eye[1][0]; pose_feature[29] = rel_right_eye[2][0];\n pose_feature[30] = rel_right_eye[0][1]; pose_feature[31] = rel_right_eye[1][1]; pose_feature[32] = rel_right_eye[2][1];\n pose_feature[33] = rel_right_eye[0][2]; pose_feature[34] = rel_right_eye[1][2]; pose_feature[35] = rel_right_eye[2][2];\n\n return pose_feature;\n}\n\n// ============================================================================\n// Compute Shader\n// ============================================================================\n\n@compute @workgroup_size(256)\nfn main(@builtin(global_invocation_id) globalId: vec3<u32>) {\n let vertexIdx = globalId.x;\n let vertexCount = metadata.vertexCount;\n \n // 🔧 边界检查:确保不处理超出范围的顶点\n if (vertexIdx >= vertexCount) {\n return;\n }\n\n let baseIdx = vertexIdx * 3u;\n var vertex = vec3<f32>(\n vShaped[baseIdx],\n vShaped[baseIdx + 1u],\n vShaped[baseIdx + 2u]\n );\n\n // 计算 pose_feature (36 维)\n let pose_feature = computePoseFeature();\n let poseParamCount = metadata.poseParamCount; // 36\n\n var pose_offset = vec3<f32>(0.0, 0.0, 0.0);\n\n // 🔧 使用转置布局 [param][vertex][xyz] (GPU优化后的布局)\n // GPU 数据已通过 transposeBlendshapeData 转置,布局为 [param][vertex][xyz]\n // 索引公式: p * vertexCount * 3 + v * 3 + xyz\n for (var p = 0u; p < poseParamCount; p++) {\n let feature = pose_feature[p];\n\n let offset = p * vertexCount * 3u + vertexIdx * 3u;\n let dx = posedirs[offset];\n let dy = posedirs[offset + 1u];\n let dz = posedirs[offset + 2u];\n\n pose_offset += vec3<f32>(dx, dy, dz) * feature;\n }\n\n // 应用姿态偏移\n vertex += pose_offset;\n\n // 写回结果\n vPosed[baseIdx] = vertex.x;\n vPosed[baseIdx + 1u] = vertex.y;\n vPosed[baseIdx + 2u] = vertex.z;\n}\n\n";
15096
+ const flameJointRegressWGSL = "@group(0) @binding(1) var<uniform> metadata: FLAMEMetadata;\n\n@group(1) @binding(0) var<storage, read> vShaped: array<f32>; // 🔧 修复: 使用v_shaped而不是v_posed\n@group(1) @binding(1) var<storage, read> jRegressor: array<f32>;\n@group(1) @binding(2) var<storage, read_write> joints: array<f32>;\n\n\n// 每个 workgroup 的局部累加缓冲区 (256 threads × 3 coords)\nvar<workgroup> sharedSumX: array<f32, 256>;\nvar<workgroup> sharedSumY: array<f32, 256>;\nvar<workgroup> sharedSumZ: array<f32, 256>;\n\n// ============================================================================\n// Compute Shader\n// ============================================================================\n\n/**\n * 计算策略:\n * - 对于 5 个关节,使用 5 个 workgroups (每个 workgroup 256 threads)\n * - 每个 workgroup 处理一个关节的所有顶点\n * - 使用 shared memory 进行树状归约\n *\n * Dispatch: (5, 1, 1) workgroups × (256, 1, 1) threads\n */\n@compute @workgroup_size(256, 1, 1)\nfn main(\n @builtin(global_invocation_id) globalId: vec3<u32>,\n @builtin(local_invocation_id) localId: vec3<u32>,\n @builtin(workgroup_id) workgroupId: vec3<u32>\n) {\n let jointIdx = workgroupId.x;\n let threadIdx = localId.x;\n let vertexCount = metadata.vertexCount;\n let jointCount = metadata.jointCount;\n\n // 🔧 边界检查:确保不处理超出范围的关节\n if (jointIdx >= jointCount) {\n return;\n }\n\n // 初始化局部累加器\n var localSumX: f32 = 0.0;\n var localSumY: f32 = 0.0;\n var localSumZ: f32 = 0.0;\n\n // 每个线程处理多个顶点(循环展开)\n // 8031 vertices / 256 threads ≈ 32 iterations per thread\n let stride = 256u;\n var vertexIdx = threadIdx;\n\n // 🔧 修复: 使用v_shaped而不是v_posed(与CPU逻辑一致)\n while (vertexIdx < vertexCount) {\n let regressorIdx = jointIdx * vertexCount + vertexIdx;\n let weight = jRegressor[regressorIdx];\n let vIdx = vertexIdx * 3u;\n \n // 🔧 边界检查:确保不访问超出范围的顶点\n if (vIdx + 2u < arrayLength(&vShaped)) {\n let vx = vShaped[vIdx];\n let vy = vShaped[vIdx + 1u];\n let vz = vShaped[vIdx + 2u];\n\n localSumX += weight * vx;\n localSumY += weight * vy;\n localSumZ += weight * vz;\n }\n\n vertexIdx += stride;\n }\n\n // 写入 shared memory\n sharedSumX[threadIdx] = localSumX;\n sharedSumY[threadIdx] = localSumY;\n sharedSumZ[threadIdx] = localSumZ;\n\n // 同步所有线程\n workgroupBarrier();\n\n // 🚀 优化: 树状归约 (256 → 128 → 64 → 32,然后展开最后 5 次迭代)\n // 前 3 次迭代需要 barrier (128 → 64 → 32)\n var activeThreads = 128u;\n for (var i = 0u; i < 3u; i++) {\n if (threadIdx < activeThreads) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + activeThreads];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + activeThreads];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + activeThreads];\n }\n workgroupBarrier();\n activeThreads = activeThreads / 2u;\n }\n\n // 🔧 展开最后 5 次迭代 (32 → 16 → 8 → 4 → 2 → 1)\n // 注意:即使在同一 subgroup 内,也需要 barrier 以确保正确性\n if (threadIdx < 32u) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + 16u];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + 16u];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + 16u];\n }\n workgroupBarrier();\n \n if (threadIdx < 16u) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + 8u];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + 8u];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + 8u];\n }\n workgroupBarrier();\n \n if (threadIdx < 8u) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + 4u];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + 4u];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + 4u];\n }\n workgroupBarrier();\n \n if (threadIdx < 4u) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + 2u];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + 2u];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + 2u];\n }\n workgroupBarrier();\n \n if (threadIdx < 2u) {\n sharedSumX[threadIdx] += sharedSumX[threadIdx + 1u];\n sharedSumY[threadIdx] += sharedSumY[threadIdx + 1u];\n sharedSumZ[threadIdx] += sharedSumZ[threadIdx + 1u];\n }\n workgroupBarrier();\n\n // 线程 0 写入最终结果\n if (threadIdx == 0u) {\n let outputIdx = jointIdx * 3u;\n joints[outputIdx] = sharedSumX[0];\n joints[outputIdx + 1u] = sharedSumY[0];\n joints[outputIdx + 2u] = sharedSumZ[0];\n }\n}\n\n";
15097
+ const flameFKinematicsWGSL = "@group(0) @binding(0) var<uniform> params: FLAMEParams;\n@group(0) @binding(1) var<uniform> metadata: FLAMEMetadata;\n\n@group(1) @binding(0) var<storage, read> joints: array<f32>;\n@group(1) @binding(1) var<storage, read_write> jointTransforms: array<f32>;\n\n/**\n * 写入 4x4 矩阵到扁平数组 (列主序)\n */\nfn writeMat4(buffer: ptr<storage, array<f32>, read_write>, offset: u32, m: mat4x4<f32>) {\n (*buffer)[offset + 0u] = m[0][0];\n (*buffer)[offset + 1u] = m[0][1];\n (*buffer)[offset + 2u] = m[0][2];\n (*buffer)[offset + 3u] = m[0][3];\n (*buffer)[offset + 4u] = m[1][0];\n (*buffer)[offset + 5u] = m[1][1];\n (*buffer)[offset + 6u] = m[1][2];\n (*buffer)[offset + 7u] = m[1][3];\n (*buffer)[offset + 8u] = m[2][0];\n (*buffer)[offset + 9u] = m[2][1];\n (*buffer)[offset + 10u] = m[2][2];\n (*buffer)[offset + 11u] = m[2][3];\n (*buffer)[offset + 12u] = m[3][0];\n (*buffer)[offset + 13u] = m[3][1];\n (*buffer)[offset + 14u] = m[3][2];\n (*buffer)[offset + 15u] = m[3][3];\n}\n\n/**\n * 4x4 矩阵求逆 (简化版,假设是变换矩阵)\n * 对于变换矩阵 [R | t; 0 | 1],逆矩阵为 [R^T | -R^T*t; 0 | 1]\n *\n * 🔧 关键:WGSL 使用列主序!\n * m[0] = 第1列, m[1] = 第2列, m[2] = 第3列, m[3] = 第4列\n */\nfn invertTransform(m: mat4x4<f32>) -> mat4x4<f32> {\n // 🔧 正确提取旋转部分 (列主序)\n // m[col][row] -> M[row][col]\n let r00 = m[0][0]; let r01 = m[1][0]; let r02 = m[2][0]; // 第1行\n let r10 = m[0][1]; let r11 = m[1][1]; let r12 = m[2][1]; // 第2行\n let r20 = m[0][2]; let r21 = m[1][2]; let r22 = m[2][2]; // 第3行\n\n // 🔧 正确提取平移部分 (第4列)\n let tx = m[3][0]; // M[0][3]\n let ty = m[3][1]; // M[1][3]\n let tz = m[3][2]; // M[2][3]\n\n // 计算 R^T (旋转矩阵的转置)\n let rt00 = r00; let rt01 = r10; let rt02 = r20;\n let rt10 = r01; let rt11 = r11; let rt12 = r21;\n let rt20 = r02; let rt21 = r12; let rt22 = r22;\n\n // 计算 -R^T * t\n let ntx = -(rt00 * tx + rt01 * ty + rt02 * tz);\n let nty = -(rt10 * tx + rt11 * ty + rt12 * tz);\n let ntz = -(rt20 * tx + rt21 * ty + rt22 * tz);\n\n // 🔧 构建逆矩阵 (列主序)\n return mat4x4<f32>(\n vec4<f32>(rt00, rt10, rt20, 0.0), // 第1列\n vec4<f32>(rt01, rt11, rt21, 0.0), // 第2列\n vec4<f32>(rt02, rt12, rt22, 0.0), // 第3列\n vec4<f32>(ntx, nty, ntz, 1.0) // 第4列\n );\n}\n\n// ============================================================================\n// Compute Shader\n// ============================================================================\n\n@group(1) @binding(2) var<storage, read> parents: array<i32>;\n\n@compute @workgroup_size(1)\nfn main() {\n // 🔧 严格按照 CPU 路径逻辑:FLAME 标准有 5 个关节\n // CPU: poseToRotationMatrices 硬编码 5 个旋转矩阵\n // CPU: skinVerticesFlat 硬编码 5 个关节\n const NUM_JOINTS = 5u;\n \n // 🔧 使用 metadata 以防止编译器优化掉 binding\n // 确保 metadata 被实际使用(即使我们硬编码了 5)\n let jointCountFromMetadata = metadata.jointCount;\n // 验证:如果 metadata 中的 jointCount 不是 5,可能会有问题,但我们仍然使用硬编码的 5\n // 这只是为了防止编译器优化掉 metadata binding\n if (jointCountFromMetadata < NUM_JOINTS) {\n // 这个分支永远不会执行,但确保 metadata 被读取\n }\n \n // ========== Step 1: 读取关节位置 (绝对位置) ==========\n // 🔧 只读取前 5 个关节(与 CPU 逻辑一致)\n var J: array<vec3<f32>, NUM_JOINTS>;\n for (var i = 0u; i < NUM_JOINTS; i++) {\n let idx = i * 3u;\n J[i] = vec3<f32>(joints[idx], joints[idx + 1u], joints[idx + 2u]);\n }\n\n // ========== Step 2: 计算相对关节位置 ==========\n // C++: rel_joints[i] = joints[i] - joints[parents[i]]\n // CPU: for (size_t i = 1; i < num_joints; i++) { rel_joints[i] = rel_joints[i] - joints[parents[i]]; }\n var rel_J: array<vec3<f32>, NUM_JOINTS>;\n rel_J[0] = J[0]; // root: 无父关节\n for (var i = 1u; i < NUM_JOINTS; i++) {\n let parentIdx = parents[i];\n if (parentIdx >= 0) {\n rel_J[i] = J[i] - J[u32(parentIdx)];\n } else {\n rel_J[i] = J[i]; // 如果 parent 无效,使用绝对位置\n }\n }\n\n // ========== Step 3: 准备 pose 旋转矩阵 ==========\n // 🔧 严格按照 CPU: poseToRotationMatrices 从 15 维 full_pose 转换为 5 个旋转矩阵\n // full_pose[0:3] = rotation, [3:6] = neck, [6:9] = jaw, [9:12] = left_eye, [12:15] = right_eye\n let globalRotation = params.rotation.xyz; // full_pose[0:3]\n let neckPose = params.neckPose.xyz; // full_pose[3:6]\n let jawPose = params.jawPose.xyz; // full_pose[6:9]\n let leftEyePose = params.eyesPose[0].xyz; // full_pose[9:12] = [left_eye_pitch, left_eye_yaw, left_eye_roll]\n let rightEyePose = params.eyesPose[1].xyz; // full_pose[12:15] = [right_eye_pitch, right_eye_yaw, right_eye_roll]\n\n var R: array<mat3x3<f32>, NUM_JOINTS>;\n R[0] = rodrigues(globalRotation);\n R[1] = rodrigues(neckPose);\n R[2] = rodrigues(jawPose);\n R[3] = rodrigues(leftEyePose);\n R[4] = rodrigues(rightEyePose);\n\n // ========== Step 4: 创建局部变换矩阵 ==========\n // C++: transforms_mat[i] = Matrix4f(pose_matrices[i], rel_joints[i])\n var transforms_mat: array<mat4x4<f32>, NUM_JOINTS>;\n for (var i = 0u; i < NUM_JOINTS; i++) {\n transforms_mat[i] = makeTransform(R[i], rel_J[i]);\n }\n\n // ========== Step 5: 构建变换链 ==========\n // C++: transform_chain[0] = transforms_mat[0]\n // for (size_t i = 1; i < num_joints; i++) {\n // transform_chain[i] = transform_chain[parent] * transforms_mat[i]\n // }\n var transform_chain: array<mat4x4<f32>, NUM_JOINTS>;\n transform_chain[0] = transforms_mat[0]; // root\n \n for (var i = 1u; i < NUM_JOINTS; i++) {\n let parentIdx = parents[i];\n if (parentIdx >= 0 && parentIdx < i32(NUM_JOINTS)) {\n transform_chain[i] = transform_chain[u32(parentIdx)] * transforms_mat[i];\n } else {\n // CPU: 如果 parent 无效,使用 Identity\n transform_chain[i] = transforms_mat[i];\n }\n }\n\n // ========== Step 6: 应用 bind pose inverse ==========\n // C++: bind_pose = Matrix4f(Identity, joints[i])\n // rel_transforms[i] = transform_chain[i] * bind_pose.inverse()\n for (var i = 0u; i < NUM_JOINTS; i++) {\n let bind_pose = mat4x4<f32>(\n vec4<f32>(1.0, 0.0, 0.0, 0.0),\n vec4<f32>(0.0, 1.0, 0.0, 0.0),\n vec4<f32>(0.0, 0.0, 1.0, 0.0),\n vec4<f32>(J[i].x, J[i].y, J[i].z, 1.0)\n );\n\n let bind_pose_inv = invertTransform(bind_pose);\n let rel_transform = transform_chain[i] * bind_pose_inv;\n\n writeMat4(&jointTransforms, i * 16u, rel_transform);\n }\n}\n\n";
15098
+ const flameLBSWGSL = "@group(0) @binding(0) var<uniform> metadata: FLAMEMetadata;\n\n@group(1) @binding(0) var<storage, read> vPosed: array<f32>;\n@group(1) @binding(1) var<storage, read> jointTransforms: array<f32>;\n@group(1) @binding(2) var<storage, read> lbsWeights: array<f32>;\n@group(1) @binding(3) var<storage, read_write> vDeformed: array<f32>;\n\n// 🚀 优化: 使用 shared memory 缓存 joint transforms (5 joints × 16 floats = 80 floats)\n// 所有线程都需要读取相同的 5 个 joint transforms,缓存到 shared memory 可以显著提升性能\nvar<workgroup> sharedJointTransforms: array<f32, 80>; // 5 joints × 16 floats\n\nfn readMat4FromShared(jointIdx: u32) -> mat4x4<f32> {\n let offset = jointIdx * 16u;\n return mat4x4<f32>(\n vec4<f32>(sharedJointTransforms[offset + 0u], sharedJointTransforms[offset + 1u], sharedJointTransforms[offset + 2u], sharedJointTransforms[offset + 3u]),\n vec4<f32>(sharedJointTransforms[offset + 4u], sharedJointTransforms[offset + 5u], sharedJointTransforms[offset + 6u], sharedJointTransforms[offset + 7u]),\n vec4<f32>(sharedJointTransforms[offset + 8u], sharedJointTransforms[offset + 9u], sharedJointTransforms[offset + 10u], sharedJointTransforms[offset + 11u]),\n vec4<f32>(sharedJointTransforms[offset + 12u], sharedJointTransforms[offset + 13u], sharedJointTransforms[offset + 14u], sharedJointTransforms[offset + 15u])\n );\n}\n\nfn transformPoint(m: mat4x4<f32>, p: vec3<f32>) -> vec3<f32> {\n let p4 = vec4<f32>(p, 1.0);\n let transformed = m * p4;\n return transformed.xyz;\n}\n\n@compute @workgroup_size(256)\nfn main(\n @builtin(global_invocation_id) globalId: vec3<u32>,\n @builtin(local_invocation_id) localId: vec3<u32>\n) {\n let vertexIdx = globalId.x;\n let threadIdx = localId.x;\n\n // 🚀 优化: 使用 shared memory 加载 joint transforms\n // 每个 workgroup 只需要加载一次,所有线程共享\n // 使用前 5 个线程加载 5 个 joint transforms (每个 16 floats)\n const NUM_JOINTS = 5u;\n if (threadIdx < NUM_JOINTS) {\n let globalOffset = threadIdx * 16u;\n let sharedOffset = threadIdx * 16u;\n // 加载 16 floats (一个 mat4x4)\n for (var i = 0u; i < 16u; i++) {\n sharedJointTransforms[sharedOffset + i] = jointTransforms[globalOffset + i];\n }\n }\n\n // 同步所有线程,确保 shared memory 已加载完成\n // 🔧 必须在边界检查之前执行 barrier,以保证 uniform control flow\n workgroupBarrier();\n\n // 🔧 边界检查:确保不处理超出范围的顶点(在 barrier 之后)\n let vertexCount = metadata.vertexCount;\n if (vertexIdx >= vertexCount) {\n return;\n }\n\n let baseIdx = vertexIdx * 3u;\n let vertex = vec3<f32>(\n vPosed[baseIdx],\n vPosed[baseIdx + 1u],\n vPosed[baseIdx + 2u]\n );\n\n // 🔧 严格按照 CPU 路径逻辑:FLAME 标准有 5 个关节\n // 🔧 使用 metadata 以防止编译器优化掉 binding\n let _jointCount = metadata.jointCount; // 防止优化,但实际使用硬编码的 5\n let weightBase = vertexIdx * NUM_JOINTS;\n\n // 🔧 完全按照 CPU 逻辑:只使用前 5 个关节的权重和变换矩阵\n let w0 = lbsWeights[weightBase + 0u];\n let w1 = lbsWeights[weightBase + 1u];\n let w2 = lbsWeights[weightBase + 2u];\n let w3 = lbsWeights[weightBase + 3u];\n let w4 = lbsWeights[weightBase + 4u];\n\n // 🚀 优化: 从 shared memory 读取 joint transforms\n let T0 = readMat4FromShared(0u);\n let T1 = readMat4FromShared(1u);\n let T2 = readMat4FromShared(2u);\n let T3 = readMat4FromShared(3u);\n let T4 = readMat4FromShared(4u);\n\n // 🔧 完全按照 CPU 逻辑:内联 transformPoint 并加权\n // 🔧 CPU的skinVerticesFlat不包含全局平移,所以这里也不应用\n var result = transformPoint(T0, vertex) * w0 +\n transformPoint(T1, vertex) * w1 +\n transformPoint(T2, vertex) * w2 +\n transformPoint(T3, vertex) * w3 +\n transformPoint(T4, vertex) * w4;\n\n // ❌ 移除:CPU的skinVerticesFlat不包含全局平移\n // 全局平移应该在LBS之后、Face Geometry之前单独应用(如果需要)\n // result += params.translation.xyz;\n\n vDeformed[baseIdx] = result.x;\n vDeformed[baseIdx + 1u] = result.y;\n vDeformed[baseIdx + 2u] = result.z;\n}\n\n";
15099
+ const flameFaceGeometryWGSL = "/**\n * FLAME Face Geometry Compute Shader\n *\n * 为每个三角形面片计算几何信息:\n * - center: 面片重心 (v0 + v1 + v2) / 3\n * - scale: 面片缩放(基于面积)\n * - quaternion: 面片方向(从局部坐标系转换)\n *\n * 这些几何信息用于后续的 3DGS splat 变换\n *\n * Input:\n * - v_deformed: [vertexCount × 3] 最终变形顶点\n * - faces: [faceCount × 3] 面片索引\n *\n * Output:\n * - faceGeometries: [faceCount × 8] (center xyz, scale, quat xyzw)\n */\n\n// 导入公共定义\n// (WGSL 不支持 #include,在 TypeScript 中手动拼接)\n\n@group(0) @binding(0) var<uniform> params: FLAMEParams;\n@group(0) @binding(1) var<uniform> metadata: FLAMEMetadata;\n\n@group(1) @binding(0) var<storage, read> vDeformed: array<f32>;\n@group(1) @binding(1) var<storage, read> faces: array<u32>;\n@group(1) @binding(2) var<storage, read_write> faceGeometries: array<f32>;\n\n/**\n * 安全归一化向量(处理零长度情况)\n * 🔧 与CPU实现一致:长度为0时返回零向量\n */\nfn safeNormalize(v: vec3<f32>) -> vec3<f32> {\n let len = length(v);\n if (len > 1e-8) {\n return v / len;\n } else {\n // 🔧 与CPU一致:返回零向量(而不是默认方向)\n return vec3<f32>(0.0, 0.0, 0.0);\n }\n}\n\n/**\n * 3x3 旋转矩阵转四元数 (xyzw 顺序)\n *\n * 使用 Shepperd's method 确保数值稳定性\n */\nfn matrixToQuaternion(m: mat3x3<f32>) -> vec4<f32> {\n let trace = m[0][0] + m[1][1] + m[2][2];\n\n var quat: vec4<f32>;\n\n if (trace > 0.0) {\n // w 是最大分量\n let s = sqrt(trace + 1.0) * 2.0; // s = 4 * w\n quat.w = 0.25 * s;\n quat.x = (m[2][1] - m[1][2]) / s;\n quat.y = (m[0][2] - m[2][0]) / s;\n quat.z = (m[1][0] - m[0][1]) / s;\n } else if (m[0][0] > m[1][1] && m[0][0] > m[2][2]) {\n // x 是最大分量\n let s = sqrt(1.0 + m[0][0] - m[1][1] - m[2][2]) * 2.0; // s = 4 * x\n quat.w = (m[2][1] - m[1][2]) / s;\n quat.x = 0.25 * s;\n quat.y = (m[0][1] + m[1][0]) / s;\n quat.z = (m[0][2] + m[2][0]) / s;\n } else if (m[1][1] > m[2][2]) {\n // y 是最大分量\n let s = sqrt(1.0 + m[1][1] - m[0][0] - m[2][2]) * 2.0; // s = 4 * y\n quat.w = (m[0][2] - m[2][0]) / s;\n quat.x = (m[0][1] + m[1][0]) / s;\n quat.y = 0.25 * s;\n quat.z = (m[1][2] + m[2][1]) / s;\n } else {\n // z 是最大分量\n let s = sqrt(1.0 + m[2][2] - m[0][0] - m[1][1]) * 2.0; // s = 4 * z\n quat.w = (m[1][0] - m[0][1]) / s;\n quat.x = (m[0][2] + m[2][0]) / s;\n quat.y = (m[1][2] + m[2][1]) / s;\n quat.z = 0.25 * s;\n }\n\n // 手动归一化(与CPU实现一致)\n let len = length(quat);\n if (len > 1e-8) {\n // 🔧 确保 w 分量为正,消除符号歧义(q 和 -q 表示同一个旋转)\n // 在归一化之前检查未归一化的w(与CPU实现完全一致)\n var normalized = quat;\n if (quat.w < 0.0) {\n normalized.x = -quat.x;\n normalized.y = -quat.y;\n normalized.z = -quat.z;\n normalized.w = -quat.w;\n }\n \n // 然后归一化\n return normalized / len;\n } else {\n // 默认单位四元数\n return vec4<f32>(0.0, 0.0, 0.0, 1.0);\n }\n}\n\nfn computeFaceOrientationAndScaling(\n v0: vec3<f32>,\n v1: vec3<f32>,\n v2: vec3<f32>\n) -> mat3x3<f32> {\n let edge1 = v1 - v0;\n let edge2 = v2 - v0;\n\n let axis0 = safeNormalize(edge1);\n let tempAxis1 = cross(axis0, edge2);\n let axis1 = safeNormalize(tempAxis1);\n let tempAxis2 = cross(axis1, axis0);\n let axis2 = safeNormalize(tempAxis2) * -1.0;\n\n return mat3x3<f32>(\n axis0.x, axis1.x, axis2.x,\n axis0.y, axis1.y, axis2.y,\n axis0.z, axis1.z, axis2.z\n );\n}\n\n@compute @workgroup_size(256)\nfn main(@builtin(global_invocation_id) globalId: vec3<u32>) {\n let faceIdx = globalId.x;\n \n // 🔧 读取metadata以防止编译器优化掉binding\n let _faceCount = metadata.faceCount;\n let _vertexCount = metadata.vertexCount;\n\n let faceBaseIdx = faceIdx * 3u;\n let idx0 = faces[faceBaseIdx];\n let idx1 = faces[faceBaseIdx + 1u];\n let idx2 = faces[faceBaseIdx + 2u];\n\n // 🔧 读取顶点并应用全局平移(与CPU逻辑一致:CPU的Face Geometry使用已应用平移的顶点)\n let v0 = vec3<f32>(\n vDeformed[idx0 * 3u],\n vDeformed[idx0 * 3u + 1u],\n vDeformed[idx0 * 3u + 2u]\n ) + params.translation.xyz;\n\n let v1 = vec3<f32>(\n vDeformed[idx1 * 3u],\n vDeformed[idx1 * 3u + 1u],\n vDeformed[idx1 * 3u + 2u]\n ) + params.translation.xyz;\n\n let v2 = vec3<f32>(\n vDeformed[idx2 * 3u],\n vDeformed[idx2 * 3u + 1u],\n vDeformed[idx2 * 3u + 2u]\n ) + params.translation.xyz;\n\n let center = (v0 + v1 + v2) / 3.0;\n\n let edge1 = v1 - v0;\n let edge2 = v2 - v0;\n let orientationMatrix = computeFaceOrientationAndScaling(v0, v1, v2);\n\n let axis0 = safeNormalize(edge1);\n let tempAxis1 = cross(axis0, edge2);\n let axis1 = safeNormalize(tempAxis1);\n let tempAxis2 = cross(axis1, axis0);\n let axis2 = safeNormalize(tempAxis2) * -1.0;\n\n let s0 = length(edge1);\n let s1 = abs(dot(axis2, edge2));\n let scale = (s0 + s1) / 2.0;\n\n let quat = matrixToQuaternion(orientationMatrix);\n\n let outputIdx = faceIdx * 8u;\n faceGeometries[outputIdx] = center.x;\n faceGeometries[outputIdx + 1u] = center.y;\n faceGeometries[outputIdx + 2u] = center.z;\n faceGeometries[outputIdx + 3u] = scale;\n faceGeometries[outputIdx + 4u] = quat.x;\n faceGeometries[outputIdx + 5u] = quat.y;\n faceGeometries[outputIdx + 6u] = quat.z;\n faceGeometries[outputIdx + 7u] = quat.w;\n}\n\n";
15100
+ class FLAMEPipeline {
15101
+ // Face geometry 输出 [faceCount × 8]
15102
+ constructor(device, buffers, vertexCount, faceCount, jointCount) {
15103
+ __publicField(this, "device");
15104
+ __publicField(this, "buffers");
15105
+ // 元数据
15106
+ __publicField(this, "vertexCount");
15107
+ __publicField(this, "faceCount");
15108
+ __publicField(this, "jointCount");
15109
+ // 计算管线
15110
+ __publicField(this, "shapeBlendPipeline");
15111
+ __publicField(this, "poseDeformPipeline");
15112
+ __publicField(this, "jointRegressPipeline");
15113
+ __publicField(this, "fkPipeline");
15114
+ __publicField(this, "lbsPipeline");
15115
+ __publicField(this, "faceGeometryPipeline");
15116
+ __publicField(this, "shapeBlendParamsBindGroup");
15117
+ __publicField(this, "poseDeformParamsBindGroup");
15118
+ __publicField(this, "jointRegressMetadataBindGroup");
15119
+ __publicField(this, "fkParamsBindGroup");
15120
+ __publicField(this, "lbsMetadataBindGroup");
15121
+ __publicField(this, "shapeBlendBindGroup");
15122
+ __publicField(this, "poseDeformBindGroup");
15123
+ __publicField(this, "jointRegressBindGroup");
15124
+ __publicField(this, "fkBindGroup");
15125
+ __publicField(this, "lbsBindGroup");
15126
+ __publicField(this, "faceGeometryParamsBindGroup");
15127
+ __publicField(this, "faceGeometryBindGroup");
15128
+ // 中间缓冲区
15129
+ __publicField(this, "vShapedBuffer");
15130
+ // Shape blending 输出
15131
+ __publicField(this, "vPosedBuffer");
15132
+ // Pose deformation 输出
15133
+ __publicField(this, "jointsBuffer");
15134
+ // Joint regression 输出 [jointCount × 3]
15135
+ __publicField(this, "jointTransformsBuffer");
15136
+ // FK 输出 [jointCount × 16] (mat4)
15137
+ __publicField(this, "vDeformedBuffer");
15138
+ // LBS 输出
15139
+ __publicField(this, "faceGeometriesBuffer");
15140
+ this.device = device;
15141
+ this.buffers = buffers;
15142
+ this.vertexCount = vertexCount;
15143
+ this.faceCount = faceCount;
15144
+ this.jointCount = jointCount;
15145
+ this.initialize();
15146
+ }
15147
+ initialize() {
15148
+ logger.log("🔧 Initializing FLAME Pipeline...");
15149
+ this.createIntermediateBuffers();
15150
+ this.clearIntermediateBuffers();
15151
+ this.createComputePipelines();
15152
+ this.createBindGroups();
15153
+ logger.log("✅ FLAME Pipeline initialized");
15154
+ }
15155
+ /**
15156
+ * 创建中间缓冲区
15157
+ */
15158
+ createIntermediateBuffers() {
15159
+ const vertexBufferSize = this.vertexCount * 3 * 4;
15160
+ const safeJointCount = Math.max(1, this.jointCount);
15161
+ const jointBufferSize = safeJointCount * 3 * 4;
15162
+ const transformBufferSize = safeJointCount * 16 * 4;
15163
+ const faceGeometryBufferSize = this.faceCount * 8 * 4;
15164
+ const minBufferSize = 4;
15165
+ this.vShapedBuffer = this.device.createBuffer({
15166
+ label: "v_shaped",
15167
+ size: Math.max(vertexBufferSize, minBufferSize),
15168
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15169
+ });
15170
+ this.vPosedBuffer = this.device.createBuffer({
15171
+ label: "v_posed",
15172
+ size: Math.max(vertexBufferSize, minBufferSize),
15173
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15174
+ });
15175
+ this.jointsBuffer = this.device.createBuffer({
15176
+ label: "joints",
15177
+ size: Math.max(jointBufferSize, minBufferSize),
15178
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15179
+ });
15180
+ this.jointTransformsBuffer = this.device.createBuffer({
15181
+ label: "joint_transforms",
15182
+ size: Math.max(transformBufferSize, minBufferSize),
15183
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15184
+ });
15185
+ this.vDeformedBuffer = this.device.createBuffer({
15186
+ label: "v_deformed",
15187
+ size: vertexBufferSize,
15188
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15189
+ });
15190
+ this.faceGeometriesBuffer = this.device.createBuffer({
15191
+ label: "face_geometries",
15192
+ size: faceGeometryBufferSize,
15193
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST
15194
+ });
15195
+ }
15196
+ /**
15197
+ * 清零所有中间缓冲区 (避免未初始化的垃圾数据)
15198
+ * 🔧 关键修复: LBS shader 如果某些顶点权重全为0,会跳过不写入,导致保留垃圾数据
15199
+ */
15200
+ clearIntermediateBuffers() {
15201
+ const encoder = this.device.createCommandEncoder({ label: "Clear FLAME Buffers" });
15202
+ encoder.clearBuffer(this.vShapedBuffer);
15203
+ encoder.clearBuffer(this.vPosedBuffer);
15204
+ encoder.clearBuffer(this.jointsBuffer);
15205
+ encoder.clearBuffer(this.jointTransformsBuffer);
15206
+ encoder.clearBuffer(this.vDeformedBuffer);
15207
+ encoder.clearBuffer(this.faceGeometriesBuffer);
15208
+ this.device.queue.submit([encoder.finish()]);
15209
+ logger.log("🧹 Cleared all intermediate FLAME buffers");
15210
+ }
15211
+ /**
15212
+ * 创建计算管线
15213
+ */
15214
+ createComputePipelines() {
15215
+ this.shapeBlendPipeline = this.createPipeline(
15216
+ flameShapeBlendWGSL,
15217
+ "Shape Blending Pipeline"
15218
+ );
15219
+ this.poseDeformPipeline = this.createPipeline(
15220
+ flamePoseDeformWGSL,
15221
+ "Pose Deformation Pipeline"
15222
+ );
15223
+ this.jointRegressPipeline = this.createPipeline(
15224
+ flameJointRegressWGSL,
15225
+ "Joint Regression Pipeline"
15226
+ );
15227
+ this.fkPipeline = this.createPipeline(
15228
+ flameFKinematicsWGSL,
15229
+ "Forward Kinematics Pipeline"
15230
+ );
15231
+ this.lbsPipeline = this.createPipeline(
15232
+ flameLBSWGSL,
15233
+ "LBS Skinning Pipeline"
15234
+ );
15235
+ this.faceGeometryPipeline = this.createPipeline(
15236
+ flameFaceGeometryWGSL,
15237
+ "Face Geometry Pipeline"
15238
+ );
15239
+ }
15240
+ /**
15241
+ * 创建单个计算管线
15242
+ */
15243
+ createPipeline(shaderCode, label) {
15244
+ const fullShaderCode = flameCommonWGSL + "\n" + shaderCode;
15245
+ const shaderModule = this.device.createShaderModule({
15246
+ label: `${label} Shader`,
15247
+ code: fullShaderCode
15248
+ });
15249
+ return this.device.createComputePipeline({
15250
+ label,
15251
+ layout: "auto",
15252
+ compute: {
15253
+ module: shaderModule,
15254
+ entryPoint: "main"
15255
+ }
15256
+ });
15257
+ }
15258
+ /**
15259
+ * 创建绑定组
15260
+ */
15261
+ createBindGroups() {
15262
+ this.shapeBlendParamsBindGroup = this.device.createBindGroup({
15263
+ label: "Shape Blend Params Bind Group",
15264
+ layout: this.shapeBlendPipeline.getBindGroupLayout(0),
15265
+ entries: [
15266
+ { binding: 0, resource: { buffer: this.buffers.metadata } },
15267
+ { binding: 1, resource: { buffer: this.buffers.activeShapeIndices } },
15268
+ // 🚀 活跃shape参数索引
15269
+ { binding: 2, resource: { buffer: this.buffers.activeShapeValues } },
15270
+ // 🚀 活跃shape参数值
15271
+ { binding: 3, resource: { buffer: this.buffers.frameParams } }
15272
+ // 动态 Uniform Buffer
15273
+ ]
15274
+ });
15275
+ this.poseDeformParamsBindGroup = this.device.createBindGroup({
15276
+ label: "Pose Deform Params Bind Group",
15277
+ layout: this.poseDeformPipeline.getBindGroupLayout(0),
15278
+ entries: [
15279
+ { binding: 0, resource: { buffer: this.buffers.frameParams } },
15280
+ { binding: 1, resource: { buffer: this.buffers.metadata } }
15281
+ ]
15282
+ });
15283
+ this.fkParamsBindGroup = this.device.createBindGroup({
15284
+ label: "FK Params Bind Group",
15285
+ layout: this.fkPipeline.getBindGroupLayout(0),
15286
+ entries: [
15287
+ { binding: 0, resource: { buffer: this.buffers.frameParams } },
15288
+ { binding: 1, resource: { buffer: this.buffers.metadata } }
15289
+ ]
15290
+ });
15291
+ this.jointRegressMetadataBindGroup = this.device.createBindGroup({
15292
+ label: "Joint Regress Metadata Bind Group",
15293
+ layout: this.jointRegressPipeline.getBindGroupLayout(0),
15294
+ entries: [
15295
+ { binding: 1, resource: { buffer: this.buffers.metadata } }
15296
+ ]
15297
+ });
15298
+ this.lbsMetadataBindGroup = this.device.createBindGroup({
15299
+ label: "LBS Metadata Bind Group",
15300
+ layout: this.lbsPipeline.getBindGroupLayout(0),
15301
+ entries: [
15302
+ { binding: 0, resource: { buffer: this.buffers.metadata } }
15303
+ ]
15304
+ });
15305
+ const staticOffsetBuffer = this.buffers.staticOffset || this.device.createBuffer({
15306
+ label: "dummy_static_offset",
15307
+ size: 4,
15308
+ // 最小 buffer 大小
15309
+ usage: GPUBufferUsage.STORAGE
15310
+ });
15311
+ this.shapeBlendBindGroup = this.device.createBindGroup({
15312
+ label: "Shape Blend Bind Group",
15313
+ layout: this.shapeBlendPipeline.getBindGroupLayout(1),
15314
+ entries: [
15315
+ { binding: 0, resource: { buffer: this.buffers.vTemplate } },
15316
+ { binding: 1, resource: { buffer: this.buffers.shapedirs } },
15317
+ { binding: 2, resource: { buffer: this.vShapedBuffer } },
15318
+ { binding: 3, resource: { buffer: staticOffsetBuffer } }
15319
+ ]
15320
+ });
15321
+ this.poseDeformBindGroup = this.device.createBindGroup({
15322
+ label: "Pose Deform Bind Group",
15323
+ layout: this.poseDeformPipeline.getBindGroupLayout(1),
15324
+ entries: [
15325
+ { binding: 0, resource: { buffer: this.vShapedBuffer } },
15326
+ { binding: 1, resource: { buffer: this.buffers.posedirs } },
15327
+ { binding: 2, resource: { buffer: this.vPosedBuffer } }
15328
+ ]
15329
+ });
15330
+ this.jointRegressBindGroup = this.device.createBindGroup({
15331
+ label: "Joint Regress Bind Group",
15332
+ layout: this.jointRegressPipeline.getBindGroupLayout(1),
15333
+ entries: [
15334
+ { binding: 0, resource: { buffer: this.vShapedBuffer } },
15335
+ // 🔧 修复: 使用v_shaped
15336
+ { binding: 1, resource: { buffer: this.buffers.jRegressor } },
15337
+ { binding: 2, resource: { buffer: this.jointsBuffer } }
15338
+ ]
15339
+ });
15340
+ this.fkBindGroup = this.device.createBindGroup({
15341
+ label: "FK Bind Group",
15342
+ layout: this.fkPipeline.getBindGroupLayout(1),
15343
+ entries: [
15344
+ { binding: 0, resource: { buffer: this.jointsBuffer } },
15345
+ { binding: 1, resource: { buffer: this.jointTransformsBuffer } },
15346
+ { binding: 2, resource: { buffer: this.buffers.parents } }
15347
+ // 🔧 添加 parents buffer
15348
+ ]
15349
+ });
15350
+ this.lbsBindGroup = this.device.createBindGroup({
15351
+ label: "LBS Bind Group",
15352
+ layout: this.lbsPipeline.getBindGroupLayout(1),
15353
+ entries: [
15354
+ { binding: 0, resource: { buffer: this.vPosedBuffer } },
15355
+ { binding: 1, resource: { buffer: this.jointTransformsBuffer } },
15356
+ { binding: 2, resource: { buffer: this.buffers.lbsWeights } },
15357
+ { binding: 3, resource: { buffer: this.vDeformedBuffer } }
15358
+ ]
15359
+ });
15360
+ this.faceGeometryParamsBindGroup = this.device.createBindGroup({
15361
+ label: "Face Geometry Params Bind Group",
15362
+ layout: this.faceGeometryPipeline.getBindGroupLayout(0),
15363
+ entries: [
15364
+ { binding: 0, resource: { buffer: this.buffers.frameParams } },
15365
+ { binding: 1, resource: { buffer: this.buffers.metadata } }
15366
+ ]
15367
+ });
15368
+ this.faceGeometryBindGroup = this.device.createBindGroup({
15369
+ label: "Face Geometry Bind Group",
15370
+ layout: this.faceGeometryPipeline.getBindGroupLayout(1),
15371
+ entries: [
15372
+ { binding: 0, resource: { buffer: this.vDeformedBuffer } },
15373
+ { binding: 1, resource: { buffer: this.buffers.faces } },
15374
+ { binding: 2, resource: { buffer: this.faceGeometriesBuffer } }
15375
+ ]
15376
+ });
15377
+ }
15378
+ /**
15379
+ * 计算一帧 FLAME (主入口)
15380
+ * 🚀 优化: 拆分为6个独立pass,支持详细的GPU profiling
15381
+ */
15382
+ compute(commandEncoder) {
15383
+ const vertexWorkgroups = Math.ceil(this.vertexCount / 256);
15384
+ const faceWorkgroups = Math.ceil(this.faceCount / 256);
15385
+ const shapePass = commandEncoder.beginComputePass({
15386
+ label: "FLAME Shape Blending"
15387
+ });
15388
+ shapePass.setPipeline(this.shapeBlendPipeline);
15389
+ shapePass.setBindGroup(0, this.shapeBlendParamsBindGroup);
15390
+ shapePass.setBindGroup(1, this.shapeBlendBindGroup);
15391
+ shapePass.dispatchWorkgroups(vertexWorkgroups);
15392
+ shapePass.end();
15393
+ const NUM_JOINTS = 5;
15394
+ const actualJointCount = Math.min(this.jointCount, NUM_JOINTS);
15395
+ const jointRegressPass = commandEncoder.beginComputePass({
15396
+ label: "FLAME Joint Regression"
15397
+ });
15398
+ jointRegressPass.setPipeline(this.jointRegressPipeline);
15399
+ jointRegressPass.setBindGroup(0, this.jointRegressMetadataBindGroup);
15400
+ jointRegressPass.setBindGroup(1, this.jointRegressBindGroup);
15401
+ jointRegressPass.dispatchWorkgroups(actualJointCount, 1, 1);
15402
+ jointRegressPass.end();
15403
+ const posePass = commandEncoder.beginComputePass({
15404
+ label: "FLAME Pose Deformation"
15405
+ });
15406
+ posePass.setPipeline(this.poseDeformPipeline);
15407
+ posePass.setBindGroup(0, this.poseDeformParamsBindGroup);
15408
+ posePass.setBindGroup(1, this.poseDeformBindGroup);
15409
+ posePass.dispatchWorkgroups(vertexWorkgroups);
15410
+ posePass.end();
15411
+ const fkPass = commandEncoder.beginComputePass({
15412
+ label: "FLAME Forward Kinematics"
15413
+ });
15414
+ fkPass.setPipeline(this.fkPipeline);
15415
+ fkPass.setBindGroup(0, this.fkParamsBindGroup);
15416
+ fkPass.setBindGroup(1, this.fkBindGroup);
15417
+ fkPass.dispatchWorkgroups(1, 1, 1);
15418
+ fkPass.end();
15419
+ const lbsPass = commandEncoder.beginComputePass({
15420
+ label: "FLAME LBS"
15421
+ });
15422
+ lbsPass.setPipeline(this.lbsPipeline);
15423
+ lbsPass.setBindGroup(0, this.lbsMetadataBindGroup);
15424
+ lbsPass.setBindGroup(1, this.lbsBindGroup);
15425
+ lbsPass.dispatchWorkgroups(vertexWorkgroups);
15426
+ lbsPass.end();
15427
+ const faceGeomPass = commandEncoder.beginComputePass({
15428
+ label: "FLAME Face Geometry"
15429
+ });
15430
+ faceGeomPass.setPipeline(this.faceGeometryPipeline);
15431
+ faceGeomPass.setBindGroup(0, this.faceGeometryParamsBindGroup);
15432
+ faceGeomPass.setBindGroup(1, this.faceGeometryBindGroup);
15433
+ faceGeomPass.dispatchWorkgroups(faceWorkgroups);
15434
+ faceGeomPass.end();
15435
+ return {
15436
+ faceGeometries: this.faceGeometriesBuffer,
15437
+ faceCount: this.faceCount
15438
+ };
15439
+ }
15440
+ /**
15441
+ * 清理资源
15442
+ */
15443
+ destroy() {
15444
+ var _a, _b, _c, _d, _e2, _f;
15445
+ (_a = this.vShapedBuffer) == null ? void 0 : _a.destroy();
15446
+ (_b = this.vPosedBuffer) == null ? void 0 : _b.destroy();
15447
+ (_c = this.jointsBuffer) == null ? void 0 : _c.destroy();
15448
+ (_d = this.jointTransformsBuffer) == null ? void 0 : _d.destroy();
15449
+ (_e2 = this.vDeformedBuffer) == null ? void 0 : _e2.destroy();
15450
+ (_f = this.faceGeometriesBuffer) == null ? void 0 : _f.destroy();
15451
+ logger.log("🧹 FLAME Pipeline destroyed");
15452
+ }
15453
+ }
15454
+ class FLAMEGPUBuffers {
15455
+ constructor() {
15456
+ __publicField(this, "device", null);
15457
+ __publicField(this, "buffers", null);
15458
+ // 缓存元数据
15459
+ __publicField(this, "vertexCount", 0);
15460
+ __publicField(this, "faceCount", 0);
15461
+ __publicField(this, "jointCount", 0);
15462
+ __publicField(this, "shapeParamCount", 0);
15463
+ __publicField(this, "poseParamCount", 0);
15464
+ __publicField(this, "staticOffsetCount", 0);
15465
+ __publicField(this, "activeShapeCount", 0);
15466
+ // 🚀 活跃shape参数数量
15467
+ // 🚀 优化: 缓存参数数据数组,避免每帧创建新数组 (减少 GC 压力)
15468
+ // 减小size:移除 shapeParams[300],只保留动态参数
15469
+ __publicField(this, "paramDataCache", new Float32Array(32 * 4));
15470
+ }
15471
+ // 32 vec4 = 128 floats (expr[100] + poses[28])
15472
+ /**
15473
+ * 初始化 GPU 缓冲区并上传模板数据
15474
+ * 🚀 优化: 需要传入 characterHandle 以获取静态 shape parameters
15475
+ * @param activeShapeParams 活跃shape参数(零参数过滤优化,可选)
15476
+ */
15477
+ initialize(device, templateData, _shapeParams, activeShapeParams) {
15478
+ var _a;
15479
+ this.device = device;
15480
+ this.vertexCount = templateData.vertexCount;
15481
+ this.faceCount = templateData.faceCount;
15482
+ this.jointCount = templateData.jointCount;
15483
+ this.shapeParamCount = templateData.shapeParamCount;
15484
+ this.poseParamCount = templateData.poseParamCount;
15485
+ const ORIGINAL_FLAME_VERTEX_COUNT = 5023;
15486
+ this.staticOffsetCount = Math.min(templateData.staticOffsetCount, ORIGINAL_FLAME_VERTEX_COUNT);
15487
+ if (this.vertexCount === 0) {
15488
+ throw new Error(`Invalid vertexCount: ${this.vertexCount}`);
15489
+ }
15490
+ if (this.faceCount === 0) {
15491
+ throw new Error(`Invalid faceCount: ${this.faceCount}`);
15492
+ }
15493
+ if (this.jointCount === 0) {
15494
+ throw new Error(`Invalid jointCount: ${this.jointCount}`);
15495
+ }
15496
+ const expectedJRegressorSize = this.jointCount * this.vertexCount * 4;
15497
+ if (templateData.jRegressor.byteLength !== expectedJRegressorSize) {
15498
+ throw new Error(`Invalid jRegressor size: expected ${expectedJRegressorSize}, got ${templateData.jRegressor.byteLength}`);
15499
+ }
15500
+ logger.log("🔧 FLAME metadata validation:", {
15501
+ vertexCount: this.vertexCount,
15502
+ faceCount: this.faceCount,
15503
+ jointCount: this.jointCount,
15504
+ jRegressorSize: templateData.jRegressor.byteLength,
15505
+ expectedJRegressorSize
15506
+ });
15507
+ this.activeShapeCount = (activeShapeParams == null ? void 0 : activeShapeParams.count) || 0;
15508
+ this.buffers = {
15509
+ vTemplate: this.createStorageBuffer("v_template", templateData.vTemplate),
15510
+ shapedirs: this.createStorageBuffer("shapedirs", templateData.shapedirs),
15511
+ posedirs: this.createStorageBuffer("posedirs", templateData.posedirs),
15512
+ jRegressor: this.createStorageBuffer("J_regressor", templateData.jRegressor),
15513
+ lbsWeights: this.createStorageBuffer("lbs_weights", templateData.lbsWeights),
15514
+ parents: this.createStorageBuffer("parents", templateData.parents),
15515
+ faces: this.createStorageBuffer("faces", templateData.faces),
15516
+ staticOffset: templateData.staticOffset ? this.createStorageBuffer(
15517
+ "static_offset",
15518
+ templateData.staticOffset.slice(0, this.staticOffsetCount * 3)
15519
+ // 只上传前 5023 个顶点
15520
+ ) : null,
15521
+ // 🚀 优化: 使用活跃shape参数(零参数过滤)
15522
+ activeShapeIndices: activeShapeParams ? this.createStorageBuffer("active_shape_indices", activeShapeParams.activeIndices) : this.createStorageBuffer("active_shape_indices", new Uint32Array(0)),
15523
+ // 回退:空数组
15524
+ activeShapeValues: activeShapeParams ? this.createStorageBuffer("active_shape_values", activeShapeParams.activeValues) : this.createStorageBuffer("active_shape_values", new Float32Array(0)),
15525
+ // 回退:空数组
15526
+ frameParams: this.createFrameParamsBuffer(),
15527
+ metadata: this.createMetadataBuffer()
15528
+ };
15529
+ const totalSize = (templateData.vTemplate.byteLength + templateData.shapedirs.byteLength + templateData.posedirs.byteLength + templateData.jRegressor.byteLength + templateData.lbsWeights.byteLength + templateData.parents.byteLength + templateData.faces.byteLength + (((_a = templateData.staticOffset) == null ? void 0 : _a.byteLength) || 0)) / 1024 / 1024;
15530
+ logger.log(`✅ FLAME GPU buffers initialized (${totalSize.toFixed(2)} MB uploaded)`);
15531
+ logger.log(` Vertices: ${this.vertexCount}, Faces: ${this.faceCount}, Joints: ${this.jointCount}`);
15532
+ if (this.staticOffsetCount > 0) {
15533
+ logger.log(` Static offset: ${this.staticOffsetCount} vertices`);
15534
+ }
15535
+ }
15536
+ /**
15537
+ * 创建 Storage Buffer 并上传数据
15538
+ */
15539
+ createStorageBuffer(label, data) {
15540
+ const minBufferSize = 4;
15541
+ const bufferSize = Math.max(data.byteLength, minBufferSize);
15542
+ const buffer = this.device.createBuffer({
15543
+ label: `FLAME ${label}`,
15544
+ size: bufferSize,
15545
+ // 🔧 添加 COPY_SRC 以支持 debug 读取
15546
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
15547
+ mappedAtCreation: true
15548
+ });
15549
+ if (data instanceof Float32Array) {
15550
+ new Float32Array(buffer.getMappedRange()).set(data);
15551
+ } else if (data instanceof Int32Array) {
15552
+ new Int32Array(buffer.getMappedRange()).set(data);
15553
+ } else if (data instanceof Uint32Array) {
15554
+ new Uint32Array(buffer.getMappedRange()).set(data);
15555
+ }
15556
+ buffer.unmap();
15557
+ return buffer;
15558
+ }
15559
+ /**
15560
+ * 创建帧参数 Uniform Buffer
15561
+ * 🚀 优化: 移除 shapeParams,减小 70% 大小
15562
+ *
15563
+ * Layout (std140):
15564
+ * - exprParams: vec4[25] (100 floats, padded)
15565
+ * - rotation: vec4 (3 floats + padding)
15566
+ * - translation: vec4 (3 floats + padding)
15567
+ * - neckPose: vec4 (3 floats + padding)
15568
+ * - jawPose: vec4 (3 floats + padding)
15569
+ * - eyesPose: vec4[2] (6 floats, split into 2 vec4)
15570
+ * - eyelid: vec4 (2 floats + padding)
15571
+ */
15572
+ createFrameParamsBuffer() {
15573
+ const size = 25 * 16 + // exprParams (25 vec4)
15574
+ 16 + // rotation (1 vec4)
15575
+ 16 + // translation (1 vec4)
15576
+ 16 + // neckPose (1 vec4)
15577
+ 16 + // jawPose (1 vec4)
15578
+ 2 * 16 + // eyesPose (2 vec4)
15579
+ 16;
15580
+ return this.device.createBuffer({
15581
+ label: "FLAME frame params",
15582
+ size,
15583
+ // 🔧 添加 COPY_SRC 以支持 debug 读取
15584
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
15585
+ });
15586
+ }
15587
+ /**
15588
+ * 创建元数据 Uniform Buffer
15589
+ *
15590
+ * Layout:
15591
+ * - vertexCount: u32
15592
+ * - faceCount: u32
15593
+ * - jointCount: u32
15594
+ * - shapeParamCount: u32
15595
+ * - poseParamCount: u32
15596
+ * - staticOffsetCount: u32
15597
+ * (padding to 256 bytes for alignment)
15598
+ */
15599
+ createMetadataBuffer() {
15600
+ const buffer = this.device.createBuffer({
15601
+ label: "FLAME metadata",
15602
+ size: 256,
15603
+ // 足够大且对齐
15604
+ // 🔧 添加 COPY_SRC 以支持 debug 读取
15605
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC,
15606
+ mappedAtCreation: true
15607
+ });
15608
+ const view = new Uint32Array(buffer.getMappedRange());
15609
+ view[0] = this.vertexCount;
15610
+ view[1] = this.faceCount;
15611
+ view[2] = this.jointCount;
15612
+ view[3] = this.shapeParamCount;
15613
+ view[4] = this.poseParamCount;
15614
+ view[5] = this.staticOffsetCount;
15615
+ view[6] = this.activeShapeCount;
15616
+ buffer.unmap();
15617
+ return buffer;
15618
+ }
15619
+ /**
15620
+ * 更新每帧参数
15621
+ * 🚀 优化: 移除 shapeParams 打包,减少 70% 上传量
15622
+ */
15623
+ updateFrameParams(params) {
15624
+ if (!this.buffers || !this.device) {
15625
+ throw new Error("FLAME GPU buffers not initialized");
15626
+ }
15627
+ const data = this.paramDataCache;
15628
+ let offset = 0;
15629
+ for (let i2 = 0; i2 < 100; i2 += 4) {
15630
+ data[offset++] = params.exprParams[i2] || 0;
15631
+ data[offset++] = params.exprParams[i2 + 1] || 0;
15632
+ data[offset++] = params.exprParams[i2 + 2] || 0;
15633
+ data[offset++] = params.exprParams[i2 + 3] || 0;
15634
+ }
15635
+ data[offset++] = params.rotation[0] || 0;
15636
+ data[offset++] = params.rotation[1] || 0;
15637
+ data[offset++] = params.rotation[2] || 0;
15638
+ data[offset++] = 0;
15639
+ data[offset++] = params.translation[0] || 0;
15640
+ data[offset++] = params.translation[1] || 0;
15641
+ data[offset++] = params.translation[2] || 0;
15642
+ data[offset++] = 0;
15643
+ data[offset++] = params.neckPose[0] || 0;
15644
+ data[offset++] = params.neckPose[1] || 0;
15645
+ data[offset++] = params.neckPose[2] || 0;
15646
+ data[offset++] = 0;
15647
+ data[offset++] = params.jawPose[0] || 0;
15648
+ data[offset++] = params.jawPose[1] || 0;
15649
+ data[offset++] = params.jawPose[2] || 0;
15650
+ data[offset++] = 0;
15651
+ data[offset++] = params.eyesPose[0] || 0;
15652
+ data[offset++] = params.eyesPose[1] || 0;
15653
+ data[offset++] = params.eyesPose[2] || 0;
15654
+ data[offset++] = 0;
15655
+ data[offset++] = params.eyesPose[3] || 0;
15656
+ data[offset++] = params.eyesPose[4] || 0;
15657
+ data[offset++] = params.eyesPose[5] || 0;
15658
+ data[offset++] = 0;
15659
+ data[offset++] = params.eyelid[0] || 0;
15660
+ data[offset++] = params.eyelid[1] || 0;
15661
+ data[offset++] = 0;
15662
+ data[offset++] = 0;
15663
+ this.device.queue.writeBuffer(this.buffers.frameParams, 0, data);
15664
+ }
15665
+ /**
15666
+ * 获取所有缓冲区
15667
+ */
15668
+ getBuffers() {
15669
+ if (!this.buffers) {
15670
+ throw new Error("FLAME GPU buffers not initialized");
15671
+ }
15672
+ return this.buffers;
15673
+ }
15674
+ /**
15675
+ * 获取元数据
15676
+ */
15677
+ getMetadata() {
15678
+ return {
15679
+ vertexCount: this.vertexCount,
15680
+ faceCount: this.faceCount,
15681
+ jointCount: this.jointCount,
15682
+ shapeParamCount: this.shapeParamCount,
15683
+ poseParamCount: this.poseParamCount,
15684
+ staticOffsetCount: this.staticOffsetCount
15685
+ };
15686
+ }
15687
+ /**
15688
+ * 清理资源
15689
+ */
15690
+ destroy() {
15691
+ if (this.buffers) {
15692
+ Object.values(this.buffers).forEach((buffer) => {
15693
+ if (buffer) {
15694
+ buffer.destroy();
15695
+ }
15696
+ });
15697
+ this.buffers = null;
15698
+ }
15699
+ this.device = null;
15700
+ logger.log("🗑️ FLAME GPU buffers destroyed");
15701
+ }
15702
+ }
15703
+ const RADIX_SIZE = 256;
15704
+ const WORKGROUP_SIZE = 256;
15705
+ const ELEMENTS_PER_THREAD = 4;
15706
+ const ELEMENTS_PER_WG = WORKGROUP_SIZE * ELEMENTS_PER_THREAD;
15707
+ const computeDepthShader = (
15708
+ /* wgsl */
15709
+ `
15710
+ struct Uniforms {
15711
+ cameraPosition: vec3<f32>,
15712
+ _pad0: f32,
15713
+ cameraForward: vec3<f32>,
15714
+ _pad1: f32,
15715
+ splatCount: u32,
15716
+ paddedCount: u32,
15717
+ _pad2: u32,
15718
+ _pad3: u32,
15719
+ }
15720
+
15721
+ @group(0) @binding(0) var<uniform> uniforms: Uniforms;
15722
+ @group(0) @binding(1) var<storage, read> positions: array<f32>;
15723
+ @group(0) @binding(2) var<storage, read_write> keys: array<u32>;
15724
+ @group(0) @binding(3) var<storage, read_write> values: array<u32>;
15725
+
15726
+ @compute @workgroup_size(${WORKGROUP_SIZE})
15727
+ fn main(@builtin(global_invocation_id) globalId: vec3<u32>) {
15728
+ let idx = globalId.x;
15729
+
15730
+ if (idx >= uniforms.paddedCount) {
15731
+ return;
15732
+ }
15733
+
15734
+ if (idx >= uniforms.splatCount) {
15735
+ // 填充区域,使用最大 key 值,排序后会在最后
15736
+ keys[idx] = 0xFFFFFFFFu;
15737
+ values[idx] = idx;
15738
+ return;
15739
+ }
15740
+
15741
+ let posOffset = idx * 3u;
15742
+ let pos = vec3<f32>(
15743
+ positions[posOffset],
15744
+ positions[posOffset + 1u],
15745
+ positions[posOffset + 2u]
15746
+ );
15747
+
15748
+ // 计算深度
15749
+ let diff = pos - uniforms.cameraPosition;
15750
+ let depth = dot(diff, uniforms.cameraForward);
15751
+
15752
+ // 转换为可排序的 key(与 CPU sortSplats.ts 完全相同)
15753
+ let d = bitcast<i32>(depth);
15754
+ let signMask = d >> 31;
15755
+ let negSignMask = -signMask;
15756
+ let mask = negSignMask | i32(0x80000000u);
15757
+ let sortableKey = u32(d ^ mask);
15758
+
15759
+ // 不取反!CPU 是升序排序后翻转结果
15760
+ keys[idx] = sortableKey;
15761
+ values[idx] = idx;
15762
+ }
15763
+ `
15764
+ );
15765
+ const histogramShader = (
15766
+ /* wgsl */
15767
+ `
15768
+ struct Params {
15769
+ count: u32,
15770
+ shift: u32, // 当前 pass 的 bit shift (0, 8, 16, 24)
15771
+ numWorkgroups: u32,
15772
+ _pad: u32,
15773
+ }
15774
+
15775
+ @group(0) @binding(0) var<uniform> params: Params;
15776
+ @group(0) @binding(1) var<storage, read> keys: array<u32>;
15777
+ @group(0) @binding(2) var<storage, read_write> histograms: array<u32>; // numWorkgroups * 256
15778
+
15779
+ var<workgroup> localHist: array<atomic<u32>, ${RADIX_SIZE}>;
15780
+
15781
+ @compute @workgroup_size(${WORKGROUP_SIZE})
15782
+ fn main(
15783
+ @builtin(local_invocation_id) localId: vec3<u32>,
15784
+ @builtin(workgroup_id) groupId: vec3<u32>
15785
+ ) {
15786
+ let tid = localId.x;
15787
+ let gid = groupId.x;
15788
+
15789
+ // 初始化 local histogram
15790
+ atomicStore(&localHist[tid], 0u);
15791
+ workgroupBarrier();
15792
+
15793
+ // 每个线程处理多个元素
15794
+ let startIdx = gid * ${ELEMENTS_PER_WG}u + tid;
15795
+ for (var i = 0u; i < ${ELEMENTS_PER_THREAD}u; i++) {
15796
+ let idx = startIdx + i * ${WORKGROUP_SIZE}u;
15797
+ if (idx < params.count) {
15798
+ let key = keys[idx];
15799
+ let bucket = (key >> params.shift) & 0xFFu;
15800
+ atomicAdd(&localHist[bucket], 1u);
15801
+ }
15802
+ }
15803
+
15804
+ workgroupBarrier();
15805
+
15806
+ // 写入 global histogram
15807
+ // 布局: histograms[bucket * numWorkgroups + gid] = 该 workgroup 在该 bucket 的数量
15808
+ let histOffset = tid * params.numWorkgroups + gid;
15809
+ histograms[histOffset] = atomicLoad(&localHist[tid]);
15810
+ }
15811
+ `
15812
+ );
15813
+ const scanShader = (
15814
+ /* wgsl */
15815
+ `
15816
+ struct Params {
15817
+ count: u32, // 要 scan 的元素数量
15818
+ _pad0: u32,
15819
+ _pad1: u32,
15820
+ _pad2: u32,
15821
+ }
15822
+
15823
+ @group(0) @binding(0) var<uniform> params: Params;
15824
+ @group(0) @binding(1) var<storage, read_write> data: array<u32>;
15825
+ @group(0) @binding(2) var<storage, read_write> blockSums: array<u32>;
15826
+
15827
+ var<workgroup> temp: array<u32, ${WORKGROUP_SIZE * 2}>;
15828
+
15829
+ @compute @workgroup_size(${WORKGROUP_SIZE})
15830
+ fn main(
15831
+ @builtin(local_invocation_id) localId: vec3<u32>,
15832
+ @builtin(workgroup_id) groupId: vec3<u32>
15833
+ ) {
15834
+ let tid = localId.x;
15835
+ let gid = groupId.x;
15836
+ let blockSize = ${WORKGROUP_SIZE * 2}u;
15837
+ let offset = gid * blockSize;
15838
+
15839
+ // 加载数据到 shared memory
15840
+ let idx0 = offset + tid;
15841
+ let idx1 = offset + tid + ${WORKGROUP_SIZE}u;
15842
+ temp[tid] = select(0u, data[idx0], idx0 < params.count);
15843
+ temp[tid + ${WORKGROUP_SIZE}u] = select(0u, data[idx1], idx1 < params.count);
15844
+
15845
+ // Up-sweep (reduce)
15846
+ var stride = 1u;
15847
+ for (var d = blockSize >> 1u; d > 0u; d >>= 1u) {
15848
+ workgroupBarrier();
15849
+ if (tid < d) {
15850
+ let ai = stride * (2u * tid + 1u) - 1u;
15851
+ let bi = stride * (2u * tid + 2u) - 1u;
15852
+ temp[bi] += temp[ai];
15853
+ }
15854
+ stride <<= 1u;
15855
+ }
15856
+
15857
+ // 保存 block sum 并清零最后一个元素
15858
+ if (tid == 0u) {
15859
+ blockSums[gid] = temp[blockSize - 1u];
15860
+ temp[blockSize - 1u] = 0u;
15861
+ }
15862
+
15863
+ // Down-sweep
15864
+ for (var d = 1u; d < blockSize; d <<= 1u) {
15865
+ stride >>= 1u;
15866
+ workgroupBarrier();
15867
+ if (tid < d) {
15868
+ let ai = stride * (2u * tid + 1u) - 1u;
15869
+ let bi = stride * (2u * tid + 2u) - 1u;
15870
+ let t = temp[ai];
15871
+ temp[ai] = temp[bi];
15872
+ temp[bi] += t;
15873
+ }
15874
+ }
15875
+
15876
+ workgroupBarrier();
15877
+
15878
+ // 写回
15879
+ if (idx0 < params.count) { data[idx0] = temp[tid]; }
15880
+ if (idx1 < params.count) { data[idx1] = temp[tid + ${WORKGROUP_SIZE}u]; }
15881
+ }
15882
+ `
15883
+ );
15884
+ const addBlockSumsShader = (
15885
+ /* wgsl */
15886
+ `
15887
+ struct Params {
15888
+ count: u32,
15889
+ _pad0: u32,
15890
+ _pad1: u32,
15891
+ _pad2: u32,
15892
+ }
15893
+
15894
+ @group(0) @binding(0) var<uniform> params: Params;
15895
+ @group(0) @binding(1) var<storage, read_write> data: array<u32>;
15896
+ @group(0) @binding(2) var<storage, read> blockSums: array<u32>;
15897
+
15898
+ @compute @workgroup_size(${WORKGROUP_SIZE})
15899
+ fn main(
15900
+ @builtin(local_invocation_id) localId: vec3<u32>,
15901
+ @builtin(workgroup_id) groupId: vec3<u32>
15902
+ ) {
15903
+ let tid = localId.x;
15904
+ let gid = groupId.x;
15905
+
15906
+ if (gid == 0u) { return; } // 第一个 block 不需要加
15907
+
15908
+ let blockSize = ${WORKGROUP_SIZE * 2}u;
15909
+ let offset = gid * blockSize;
15910
+ let blockSum = blockSums[gid];
15911
+
15912
+ let idx0 = offset + tid;
15913
+ let idx1 = offset + tid + ${WORKGROUP_SIZE}u;
15914
+
15915
+ if (idx0 < params.count) { data[idx0] += blockSum; }
15916
+ if (idx1 < params.count) { data[idx1] += blockSum; }
15917
+ }
15918
+ `
15919
+ );
15920
+ const scatterShader = (
15921
+ /* wgsl */
15922
+ `
15923
+ struct Params {
15924
+ count: u32,
15925
+ shift: u32,
15926
+ numWorkgroups: u32,
15927
+ _pad: u32,
15928
+ }
15929
+
15930
+ @group(0) @binding(0) var<uniform> params: Params;
15931
+ @group(0) @binding(1) var<storage, read> keysIn: array<u32>;
15932
+ @group(0) @binding(2) var<storage, read> valuesIn: array<u32>;
15933
+ @group(0) @binding(3) var<storage, read> globalOffsets: array<u32>;
15934
+ @group(0) @binding(4) var<storage, read_write> keysOut: array<u32>;
15935
+ @group(0) @binding(5) var<storage, read_write> valuesOut: array<u32>;
15936
+
15937
+ var<workgroup> localOffsets: array<u32, ${RADIX_SIZE}>;
15938
+ var<workgroup> localCounts: array<u32, ${RADIX_SIZE}>;
15939
+ var<workgroup> elementOffsets: array<u32, ${ELEMENTS_PER_WG}>;
15940
+
15941
+ @compute @workgroup_size(${WORKGROUP_SIZE})
15942
+ fn main(
15943
+ @builtin(local_invocation_id) localId: vec3<u32>,
15944
+ @builtin(workgroup_id) groupId: vec3<u32>
15945
+ ) {
15946
+ let tid = localId.x;
15947
+ let gid = groupId.x;
15948
+
15949
+ // 加载该 workgroup 的 global offset
15950
+ let globalIdx = tid * params.numWorkgroups + gid;
15951
+ localOffsets[tid] = globalOffsets[globalIdx];
15952
+ localCounts[tid] = 0u;
15953
+
15954
+ workgroupBarrier();
15955
+
15956
+ let baseIdx = gid * ${ELEMENTS_PER_WG}u;
15957
+ let endIdx = min(baseIdx + ${ELEMENTS_PER_WG}u, params.count);
15958
+ let elemCount = endIdx - baseIdx;
15959
+
15960
+ // 阶段 1:线程 0 计算所有元素的 local offset(保证稳定性)
15961
+ if (tid == 0u) {
15962
+ for (var i = 0u; i < elemCount; i++) {
15963
+ let idx = baseIdx + i;
15964
+ let key = keysIn[idx];
15965
+ let bucket = (key >> params.shift) & 0xFFu;
15966
+
15967
+ elementOffsets[i] = localCounts[bucket];
15968
+ localCounts[bucket] = localCounts[bucket] + 1u;
15969
+ }
15970
+ }
15971
+
15972
+ workgroupBarrier();
15973
+
15974
+ // 阶段 2:所有线程并行写入
15975
+ for (var i = tid; i < elemCount; i += ${WORKGROUP_SIZE}u) {
15976
+ let idx = baseIdx + i;
15977
+ let key = keysIn[idx];
15978
+ let value = valuesIn[idx];
15979
+ let bucket = (key >> params.shift) & 0xFFu;
15980
+
15981
+ let dstIdx = localOffsets[bucket] + elementOffsets[i];
15982
+ keysOut[dstIdx] = key;
15983
+ valuesOut[dstIdx] = value;
15984
+ }
15985
+ }
15986
+ `
15987
+ );
15988
+ const reverseShader = (
15989
+ /* wgsl */
15990
+ `
15991
+ struct Params {
15992
+ count: u32,
15993
+ _pad0: u32,
15994
+ _pad1: u32,
15995
+ _pad2: u32,
15996
+ }
15997
+
15998
+ @group(0) @binding(0) var<uniform> params: Params;
15999
+ @group(0) @binding(1) var<storage, read> valuesIn: array<u32>;
16000
+ @group(0) @binding(2) var<storage, read_write> valuesOut: array<u32>;
16001
+
16002
+ @compute @workgroup_size(${WORKGROUP_SIZE})
16003
+ fn main(@builtin(global_invocation_id) globalId: vec3<u32>) {
16004
+ let idx = globalId.x;
16005
+ if (idx >= params.count) {
16006
+ return;
16007
+ }
16008
+
16009
+ // 翻转:第 i 个元素放到第 (count - 1 - i) 个位置
16010
+ valuesOut[idx] = valuesIn[params.count - 1u - idx];
16011
+ }
16012
+ `
16013
+ );
16014
+ class GPURadixSort {
16015
+ constructor(options) {
16016
+ __publicField(this, "device");
16017
+ __publicField(this, "maxSplatCount");
16018
+ __publicField(this, "paddedCount");
16019
+ __publicField(this, "numWorkgroups");
16020
+ // Pipelines
16021
+ __publicField(this, "depthPipeline", null);
16022
+ __publicField(this, "histogramPipeline", null);
16023
+ __publicField(this, "scanPipeline", null);
16024
+ __publicField(this, "addBlockSumsPipeline", null);
16025
+ __publicField(this, "scatterPipeline", null);
16026
+ __publicField(this, "reversePipeline", null);
16027
+ // Buffers
16028
+ __publicField(this, "uniformBuffer", null);
16029
+ __publicField(this, "paramsBuffer", null);
16030
+ __publicField(this, "scanParamsBuffer", null);
16031
+ // Double buffering for keys/values
16032
+ __publicField(this, "keysBuffer0", null);
16033
+ __publicField(this, "keysBuffer1", null);
16034
+ __publicField(this, "valuesBuffer0", null);
16035
+ __publicField(this, "valuesBuffer1", null);
16036
+ // Histogram and scan buffers
16037
+ __publicField(this, "histogramBuffer", null);
16038
+ __publicField(this, "blockSumsBuffer", null);
16039
+ __publicField(this, "blockSumsBuffer2", null);
16040
+ // 用于多级 scan
16041
+ // External
16042
+ __publicField(this, "positionsBuffer", null);
16043
+ this.device = options.device;
16044
+ this.maxSplatCount = options.maxSplatCount;
16045
+ this.paddedCount = Math.ceil(options.maxSplatCount / ELEMENTS_PER_WG) * ELEMENTS_PER_WG;
16046
+ this.numWorkgroups = this.paddedCount / ELEMENTS_PER_WG;
16047
+ this.initialize();
16048
+ }
16049
+ initialize() {
16050
+ this.depthPipeline = this.device.createComputePipeline({
16051
+ label: "Radix Sort - Depth",
16052
+ layout: "auto",
16053
+ compute: {
16054
+ module: this.device.createShaderModule({ code: computeDepthShader }),
16055
+ entryPoint: "main"
16056
+ }
16057
+ });
16058
+ this.histogramPipeline = this.device.createComputePipeline({
16059
+ label: "Radix Sort - Histogram",
16060
+ layout: "auto",
16061
+ compute: {
16062
+ module: this.device.createShaderModule({ code: histogramShader }),
16063
+ entryPoint: "main"
16064
+ }
16065
+ });
16066
+ this.scanPipeline = this.device.createComputePipeline({
16067
+ label: "Radix Sort - Scan",
16068
+ layout: "auto",
16069
+ compute: {
16070
+ module: this.device.createShaderModule({ code: scanShader }),
16071
+ entryPoint: "main"
16072
+ }
16073
+ });
16074
+ this.addBlockSumsPipeline = this.device.createComputePipeline({
16075
+ label: "Radix Sort - Add Block Sums",
16076
+ layout: "auto",
16077
+ compute: {
16078
+ module: this.device.createShaderModule({ code: addBlockSumsShader }),
16079
+ entryPoint: "main"
16080
+ }
16081
+ });
16082
+ this.scatterPipeline = this.device.createComputePipeline({
16083
+ label: "Radix Sort - Scatter",
16084
+ layout: "auto",
16085
+ compute: {
16086
+ module: this.device.createShaderModule({ code: scatterShader }),
16087
+ entryPoint: "main"
16088
+ }
16089
+ });
16090
+ this.reversePipeline = this.device.createComputePipeline({
16091
+ label: "Radix Sort - Reverse",
16092
+ layout: "auto",
16093
+ compute: {
16094
+ module: this.device.createShaderModule({ code: reverseShader }),
16095
+ entryPoint: "main"
16096
+ }
16097
+ });
16098
+ const n2 = this.paddedCount;
16099
+ this.uniformBuffer = this.device.createBuffer({
16100
+ label: "Radix Sort - Uniforms",
16101
+ size: 48,
16102
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
16103
+ });
16104
+ this.paramsBuffer = this.device.createBuffer({
16105
+ label: "Radix Sort - Params",
16106
+ size: 16,
16107
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
16108
+ });
16109
+ this.scanParamsBuffer = this.device.createBuffer({
16110
+ label: "Radix Sort - Scan Params",
16111
+ size: 16,
16112
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
16113
+ });
16114
+ this.keysBuffer0 = this.device.createBuffer({
16115
+ label: "Radix Sort - Keys 0",
16116
+ size: n2 * 4,
16117
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
16118
+ });
16119
+ this.keysBuffer1 = this.device.createBuffer({
16120
+ label: "Radix Sort - Keys 1",
16121
+ size: n2 * 4,
16122
+ usage: GPUBufferUsage.STORAGE
16123
+ });
16124
+ this.valuesBuffer0 = this.device.createBuffer({
16125
+ label: "Radix Sort - Values 0",
16126
+ size: n2 * 4,
16127
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
16128
+ });
16129
+ this.valuesBuffer1 = this.device.createBuffer({
16130
+ label: "Radix Sort - Values 1",
16131
+ size: n2 * 4,
16132
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
16133
+ });
16134
+ const histogramSize = RADIX_SIZE * this.numWorkgroups * 4;
16135
+ this.histogramBuffer = this.device.createBuffer({
16136
+ label: "Radix Sort - Histogram",
16137
+ size: histogramSize,
16138
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
16139
+ });
16140
+ const scanBlockSize = WORKGROUP_SIZE * 2;
16141
+ const numScanBlocks = Math.ceil(histogramSize / 4 / scanBlockSize);
16142
+ this.blockSumsBuffer = this.device.createBuffer({
16143
+ label: "Radix Sort - Block Sums",
16144
+ size: Math.max(numScanBlocks * 4, 16),
16145
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
16146
+ });
16147
+ this.blockSumsBuffer2 = this.device.createBuffer({
16148
+ label: "Radix Sort - Block Sums 2",
16149
+ size: 16,
16150
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
16151
+ });
16152
+ logger.log(`✅ [GPURadixSort] Initialized: maxSplatCount=${this.maxSplatCount}, paddedCount=${this.paddedCount}, numWorkgroups=${this.numWorkgroups}`);
16153
+ }
16154
+ setPositionsBuffer(buffer) {
16155
+ this.positionsBuffer = buffer;
16156
+ }
16157
+ async sortAsync(viewMatrix, splatCount) {
16158
+ if (!this.positionsBuffer) {
16159
+ throw new Error("Positions buffer not set");
16160
+ }
16161
+ const cameraPosition = [
16162
+ -viewMatrix[12],
16163
+ -viewMatrix[13],
16164
+ -viewMatrix[14]
16165
+ ];
16166
+ const cameraForward = [
16167
+ -viewMatrix[2],
16168
+ -viewMatrix[6],
16169
+ -viewMatrix[10]
16170
+ ];
16171
+ await this.runDepthPass(cameraPosition, cameraForward, splatCount);
16172
+ let keysIn = this.keysBuffer0;
16173
+ let keysOut = this.keysBuffer1;
16174
+ let valuesIn = this.valuesBuffer0;
16175
+ let valuesOut = this.valuesBuffer1;
16176
+ for (let pass = 0; pass < 4; pass++) {
16177
+ const shift = pass * 8;
16178
+ await this.runHistogramPass(keysIn, this.paddedCount, shift);
16179
+ await this.runPrefixSum();
16180
+ await this.runScatterPass(keysIn, valuesIn, keysOut, valuesOut, this.paddedCount, shift);
16181
+ const tempK = keysIn;
16182
+ keysIn = keysOut;
16183
+ keysOut = tempK;
16184
+ const tempV = valuesIn;
16185
+ valuesIn = valuesOut;
16186
+ valuesOut = tempV;
16187
+ }
16188
+ await this.runReversePass(valuesIn, valuesOut, splatCount);
16189
+ return valuesOut;
16190
+ }
16191
+ async runReversePass(valuesIn, valuesOut, splatCount) {
16192
+ const paramsData = new Uint32Array([splatCount, 0, 0, 0]);
16193
+ this.device.queue.writeBuffer(this.paramsBuffer, 0, paramsData);
16194
+ const bindGroup = this.device.createBindGroup({
16195
+ layout: this.reversePipeline.getBindGroupLayout(0),
16196
+ entries: [
16197
+ { binding: 0, resource: { buffer: this.paramsBuffer } },
16198
+ { binding: 1, resource: { buffer: valuesIn } },
16199
+ { binding: 2, resource: { buffer: valuesOut } }
16200
+ ]
16201
+ });
16202
+ const encoder = this.device.createCommandEncoder();
16203
+ const pass = encoder.beginComputePass();
16204
+ pass.setPipeline(this.reversePipeline);
16205
+ pass.setBindGroup(0, bindGroup);
16206
+ pass.dispatchWorkgroups(Math.ceil(splatCount / WORKGROUP_SIZE));
16207
+ pass.end();
16208
+ this.device.queue.submit([encoder.finish()]);
16209
+ }
16210
+ async runDepthPass(cameraPosition, cameraForward, splatCount) {
16211
+ const uniformData = new ArrayBuffer(48);
16212
+ const floatView = new Float32Array(uniformData);
16213
+ const uintView = new Uint32Array(uniformData);
16214
+ floatView[0] = cameraPosition[0];
16215
+ floatView[1] = cameraPosition[1];
16216
+ floatView[2] = cameraPosition[2];
16217
+ floatView[3] = 0;
16218
+ floatView[4] = cameraForward[0];
16219
+ floatView[5] = cameraForward[1];
16220
+ floatView[6] = cameraForward[2];
16221
+ floatView[7] = 0;
16222
+ uintView[8] = splatCount;
16223
+ uintView[9] = this.paddedCount;
16224
+ this.device.queue.writeBuffer(this.uniformBuffer, 0, uniformData);
16225
+ const bindGroup = this.device.createBindGroup({
16226
+ layout: this.depthPipeline.getBindGroupLayout(0),
16227
+ entries: [
16228
+ { binding: 0, resource: { buffer: this.uniformBuffer } },
16229
+ { binding: 1, resource: { buffer: this.positionsBuffer } },
16230
+ { binding: 2, resource: { buffer: this.keysBuffer0 } },
16231
+ { binding: 3, resource: { buffer: this.valuesBuffer0 } }
16232
+ ]
16233
+ });
16234
+ const encoder = this.device.createCommandEncoder();
16235
+ const pass = encoder.beginComputePass();
16236
+ pass.setPipeline(this.depthPipeline);
16237
+ pass.setBindGroup(0, bindGroup);
16238
+ pass.dispatchWorkgroups(Math.ceil(this.paddedCount / WORKGROUP_SIZE));
16239
+ pass.end();
16240
+ this.device.queue.submit([encoder.finish()]);
16241
+ }
16242
+ async runHistogramPass(keysBuffer, count, shift) {
16243
+ const paramsData = new Uint32Array([count, shift, this.numWorkgroups, 0]);
16244
+ this.device.queue.writeBuffer(this.paramsBuffer, 0, paramsData);
16245
+ const bindGroup = this.device.createBindGroup({
16246
+ layout: this.histogramPipeline.getBindGroupLayout(0),
16247
+ entries: [
16248
+ { binding: 0, resource: { buffer: this.paramsBuffer } },
16249
+ { binding: 1, resource: { buffer: keysBuffer } },
16250
+ { binding: 2, resource: { buffer: this.histogramBuffer } }
16251
+ ]
16252
+ });
16253
+ const encoder = this.device.createCommandEncoder();
16254
+ encoder.clearBuffer(this.histogramBuffer);
16255
+ const pass = encoder.beginComputePass();
16256
+ pass.setPipeline(this.histogramPipeline);
16257
+ pass.setBindGroup(0, bindGroup);
16258
+ pass.dispatchWorkgroups(this.numWorkgroups);
16259
+ pass.end();
16260
+ this.device.queue.submit([encoder.finish()]);
16261
+ }
16262
+ async runPrefixSum() {
16263
+ const histogramCount = RADIX_SIZE * this.numWorkgroups;
16264
+ const scanBlockSize = WORKGROUP_SIZE * 2;
16265
+ const numBlocks = Math.ceil(histogramCount / scanBlockSize);
16266
+ const scanParams = new Uint32Array([histogramCount, 0, 0, 0]);
16267
+ this.device.queue.writeBuffer(this.scanParamsBuffer, 0, scanParams);
16268
+ const scanBindGroup = this.device.createBindGroup({
16269
+ layout: this.scanPipeline.getBindGroupLayout(0),
16270
+ entries: [
16271
+ { binding: 0, resource: { buffer: this.scanParamsBuffer } },
16272
+ { binding: 1, resource: { buffer: this.histogramBuffer } },
16273
+ { binding: 2, resource: { buffer: this.blockSumsBuffer } }
16274
+ ]
16275
+ });
16276
+ const encoder = this.device.createCommandEncoder();
16277
+ encoder.clearBuffer(this.blockSumsBuffer);
16278
+ encoder.clearBuffer(this.blockSumsBuffer2);
16279
+ const pass1 = encoder.beginComputePass();
16280
+ pass1.setPipeline(this.scanPipeline);
16281
+ pass1.setBindGroup(0, scanBindGroup);
16282
+ pass1.dispatchWorkgroups(numBlocks);
16283
+ pass1.end();
16284
+ this.device.queue.submit([encoder.finish()]);
16285
+ if (numBlocks > 1) {
16286
+ const blockSumsParams = new Uint32Array([numBlocks, 0, 0, 0]);
16287
+ this.device.queue.writeBuffer(this.scanParamsBuffer, 0, blockSumsParams);
16288
+ const blockSumsScanBindGroup = this.device.createBindGroup({
16289
+ layout: this.scanPipeline.getBindGroupLayout(0),
16290
+ entries: [
16291
+ { binding: 0, resource: { buffer: this.scanParamsBuffer } },
16292
+ { binding: 1, resource: { buffer: this.blockSumsBuffer } },
16293
+ { binding: 2, resource: { buffer: this.blockSumsBuffer2 } }
16294
+ ]
16295
+ });
16296
+ const encoder2 = this.device.createCommandEncoder();
16297
+ const pass2 = encoder2.beginComputePass();
16298
+ pass2.setPipeline(this.scanPipeline);
16299
+ pass2.setBindGroup(0, blockSumsScanBindGroup);
16300
+ pass2.dispatchWorkgroups(1);
16301
+ pass2.end();
16302
+ this.device.queue.submit([encoder2.finish()]);
16303
+ this.device.queue.writeBuffer(this.scanParamsBuffer, 0, scanParams);
16304
+ const addBindGroup = this.device.createBindGroup({
16305
+ layout: this.addBlockSumsPipeline.getBindGroupLayout(0),
16306
+ entries: [
16307
+ { binding: 0, resource: { buffer: this.scanParamsBuffer } },
16308
+ { binding: 1, resource: { buffer: this.histogramBuffer } },
16309
+ { binding: 2, resource: { buffer: this.blockSumsBuffer } }
16310
+ ]
16311
+ });
16312
+ const encoder3 = this.device.createCommandEncoder();
16313
+ const pass3 = encoder3.beginComputePass();
16314
+ pass3.setPipeline(this.addBlockSumsPipeline);
16315
+ pass3.setBindGroup(0, addBindGroup);
16316
+ pass3.dispatchWorkgroups(numBlocks);
16317
+ pass3.end();
16318
+ this.device.queue.submit([encoder3.finish()]);
16319
+ }
16320
+ }
16321
+ async runScatterPass(keysIn, valuesIn, keysOut, valuesOut, count, shift) {
16322
+ const paramsData = new Uint32Array([count, shift, this.numWorkgroups, 0]);
16323
+ this.device.queue.writeBuffer(this.paramsBuffer, 0, paramsData);
16324
+ const bindGroup = this.device.createBindGroup({
16325
+ layout: this.scatterPipeline.getBindGroupLayout(0),
16326
+ entries: [
16327
+ { binding: 0, resource: { buffer: this.paramsBuffer } },
16328
+ { binding: 1, resource: { buffer: keysIn } },
16329
+ { binding: 2, resource: { buffer: valuesIn } },
16330
+ { binding: 3, resource: { buffer: this.histogramBuffer } },
16331
+ { binding: 4, resource: { buffer: keysOut } },
16332
+ { binding: 5, resource: { buffer: valuesOut } }
16333
+ ]
16334
+ });
16335
+ const encoder = this.device.createCommandEncoder();
16336
+ const pass = encoder.beginComputePass();
16337
+ pass.setPipeline(this.scatterPipeline);
16338
+ pass.setBindGroup(0, bindGroup);
16339
+ pass.dispatchWorkgroups(this.numWorkgroups);
16340
+ pass.end();
16341
+ this.device.queue.submit([encoder.finish()]);
16342
+ }
16343
+ destroy() {
16344
+ var _a, _b, _c, _d, _e2, _f, _g, _h, _i2, _j;
16345
+ (_a = this.uniformBuffer) == null ? void 0 : _a.destroy();
16346
+ (_b = this.paramsBuffer) == null ? void 0 : _b.destroy();
16347
+ (_c = this.scanParamsBuffer) == null ? void 0 : _c.destroy();
16348
+ (_d = this.keysBuffer0) == null ? void 0 : _d.destroy();
16349
+ (_e2 = this.keysBuffer1) == null ? void 0 : _e2.destroy();
16350
+ (_f = this.valuesBuffer0) == null ? void 0 : _f.destroy();
16351
+ (_g = this.valuesBuffer1) == null ? void 0 : _g.destroy();
16352
+ (_h = this.histogramBuffer) == null ? void 0 : _h.destroy();
16353
+ (_i2 = this.blockSumsBuffer) == null ? void 0 : _i2.destroy();
16354
+ (_j = this.blockSumsBuffer2) == null ? void 0 : _j.destroy();
16355
+ }
16356
+ }
16357
+ class WebGPURenderer {
16358
+ constructor(canvas, backgroundColor, alpha = true) {
16359
+ __publicField(this, "canvas");
16360
+ __publicField(this, "backgroundColor");
16361
+ __publicField(this, "device", null);
16362
+ __publicField(this, "context", null);
16363
+ __publicField(this, "renderPipeline", null);
16364
+ __publicField(this, "renderTexturePipeline", null);
16365
+ // 用于渲染到 render texture
16366
+ __publicField(this, "quadVertexBuffer", null);
16367
+ __publicField(this, "uniformBuffer", null);
16368
+ __publicField(this, "uniformBindGroup", null);
16369
+ // 🚀 间接索引渲染 buffers
16370
+ __publicField(this, "sortIndexBuffer", null);
16371
+ __publicField(this, "splatDataBuffer", null);
16372
+ __publicField(this, "storageBindGroup", null);
16373
+ __publicField(this, "bindGroupNeedsUpdate", false);
16374
+ // 标记 bind group 是否需要更新
16375
+ // 🆕 Transform Pipeline (GPU 3DGS Transform优化)
16376
+ __publicField(this, "transformPipeline", null);
16377
+ __publicField(this, "useGPUTransform", false);
16378
+ // 是否使用GPU Transform路径
16379
+ // 🆕 FLAME Pipeline (GPU FLAME Forward Pass优化)
16380
+ __publicField(this, "flamePipeline", null);
16381
+ __publicField(this, "flameGPUBuffers", null);
16382
+ __publicField(this, "useGPUFLAME", false);
16383
+ // 是否使用GPU FLAME路径
16384
+ // 🆕 GPU Radix Sort (GPU 深度排序优化)
16385
+ __publicField(this, "gpuRadixSort", null);
16386
+ __publicField(this, "useGPURadixSort", true);
16387
+ // 是否使用 GPU 排序
16388
+ __publicField(this, "splatCount", 0);
16389
+ __publicField(this, "presentationFormat", "bgra8unorm");
16390
+ __publicField(this, "alpha");
16391
+ // Render texture framebuffer
16392
+ __publicField(this, "renderTexture", null);
16393
+ __publicField(this, "renderTextureView", null);
16394
+ __publicField(this, "depthTexture", null);
16395
+ __publicField(this, "framebufferWidth", 0);
16396
+ __publicField(this, "framebufferHeight", 0);
16397
+ // Blit pipeline for drawing render texture to screen
16398
+ __publicField(this, "blitPipeline", null);
16399
+ __publicField(this, "blitUniformBuffer", null);
16400
+ __publicField(this, "blitQuadBuffer", null);
16401
+ __publicField(this, "blitSampler", null);
16402
+ this.canvas = canvas;
16403
+ this.backgroundColor = backgroundColor || [0, 0, 0, 0];
16404
+ this.alpha = alpha;
16405
+ }
16406
+ /**
16407
+ * 初始化 WebGPU 渲染器
16408
+ */
16409
+ async initialize() {
16410
+ const adapter = await navigator.gpu.requestAdapter({
16411
+ powerPreference: "high-performance"
16412
+ });
16413
+ if (!adapter) {
16414
+ throw new Error("WebGPU: No GPU adapter found");
16415
+ }
16416
+ this.device = await adapter.requestDevice();
16417
+ this.context = this.canvas.getContext("webgpu");
16418
+ if (!this.context) {
16419
+ throw new Error("WebGPU: Failed to get canvas context");
16420
+ }
16421
+ this.presentationFormat = navigator.gpu.getPreferredCanvasFormat();
16422
+ this.context.configure({
16423
+ device: this.device,
16424
+ format: this.presentationFormat,
16425
+ alphaMode: this.alpha ? "premultiplied" : "opaque"
16426
+ });
16427
+ this.createUniformBuffer();
16428
+ this.createQuadVertexBuffer();
16429
+ await this.createRenderPipeline();
16430
+ await this.createBlitPipeline();
16431
+ this.transformPipeline = new TransformPipeline(this.device);
16432
+ await this.transformPipeline.initialize();
16433
+ }
16434
+ /**
16435
+ * 创建 Uniform Buffer
16436
+ */
16437
+ createUniformBuffer() {
16438
+ if (!this.device)
16439
+ return;
16440
+ const uniformBufferSize = 160;
16441
+ this.uniformBuffer = this.device.createBuffer({
16442
+ label: "Uniform Buffer",
16443
+ size: uniformBufferSize,
16444
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
16445
+ });
16446
+ }
16447
+ /**
16448
+ * 创建四边形顶点缓冲区(实例化渲染用)
16449
+ */
16450
+ createQuadVertexBuffer() {
16451
+ if (!this.device)
16452
+ return;
16453
+ const quadVertices = new Float32Array([
16454
+ -1,
16455
+ -1,
16456
+ // 左下
16457
+ -1,
16458
+ 1,
16459
+ // 左上
16460
+ 1,
16461
+ -1,
16462
+ // 右下
16463
+ 1,
16464
+ 1
16465
+ // 右上
16466
+ ]);
16467
+ this.quadVertexBuffer = this.device.createBuffer({
16468
+ label: "Quad Vertex Buffer",
16469
+ size: quadVertices.byteLength,
16470
+ usage: GPUBufferUsage.VERTEX,
16471
+ mappedAtCreation: true
16472
+ });
16473
+ new Float32Array(this.quadVertexBuffer.getMappedRange()).set(quadVertices);
16474
+ this.quadVertexBuffer.unmap();
16475
+ }
16476
+ /**
16477
+ * 创建 Render Pipeline
16478
+ */
16479
+ async createRenderPipeline() {
16480
+ if (!this.device)
16481
+ return;
16482
+ const shaderModule = this.device.createShaderModule({
16483
+ label: "3DGS Render Shader",
16484
+ code: renderShaderCode
16485
+ });
16486
+ const uniformBindGroupLayout = this.device.createBindGroupLayout({
16487
+ label: "Uniform Bind Group Layout",
14649
16488
  entries: [
14650
16489
  {
14651
16490
  binding: 0,
@@ -14948,59 +16787,335 @@ class WebGPURenderer {
14948
16787
  if (this.splatDataBuffer) {
14949
16788
  this.splatDataBuffer.destroy();
14950
16789
  }
14951
- this.splatDataBuffer = this.device.createBuffer({
14952
- label: "Splat Data Buffer",
14953
- size: packedData.byteLength,
14954
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
14955
- });
16790
+ this.splatDataBuffer = this.device.createBuffer({
16791
+ label: "Splat Data Buffer",
16792
+ size: packedData.byteLength,
16793
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
16794
+ });
16795
+ this.bindGroupNeedsUpdate = true;
16796
+ }
16797
+ this.device.queue.writeBuffer(
16798
+ this.splatDataBuffer,
16799
+ 0,
16800
+ packedData.buffer,
16801
+ packedData.byteOffset,
16802
+ packedData.byteLength
16803
+ );
16804
+ if (sortOrder) {
16805
+ const indexBufferSize = sortOrder.byteLength;
16806
+ if (!this.sortIndexBuffer || this.sortIndexBuffer.size !== indexBufferSize) {
16807
+ if (this.sortIndexBuffer) {
16808
+ this.sortIndexBuffer.destroy();
16809
+ }
16810
+ this.sortIndexBuffer = this.device.createBuffer({
16811
+ label: "Sort Index Buffer",
16812
+ size: indexBufferSize,
16813
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
16814
+ });
16815
+ this.bindGroupNeedsUpdate = true;
16816
+ }
16817
+ this.device.queue.writeBuffer(
16818
+ this.sortIndexBuffer,
16819
+ 0,
16820
+ sortOrder.buffer,
16821
+ sortOrder.byteOffset,
16822
+ sortOrder.byteLength
16823
+ );
16824
+ if (this.bindGroupNeedsUpdate && this.renderPipeline && this.sortIndexBuffer && this.splatDataBuffer) {
16825
+ const storageBindGroupLayout = this.renderPipeline.getBindGroupLayout(1);
16826
+ this.storageBindGroup = this.device.createBindGroup({
16827
+ label: "Storage Bind Group",
16828
+ layout: storageBindGroupLayout,
16829
+ entries: [
16830
+ {
16831
+ binding: 0,
16832
+ resource: { buffer: this.sortIndexBuffer }
16833
+ },
16834
+ {
16835
+ binding: 1,
16836
+ resource: { buffer: this.splatDataBuffer }
16837
+ }
16838
+ ]
16839
+ });
16840
+ this.bindGroupNeedsUpdate = false;
16841
+ }
16842
+ }
16843
+ }
16844
+ /**
16845
+ * 🆕 上传原始Splats数据到GPU (一次性调用,角色加载时)
16846
+ * @param originalSplatsData Float32Array, 每个splat 16 floats (64 bytes)
16847
+ * @param splatCount splat数量
16848
+ */
16849
+ loadOriginalSplats(originalSplatsData, splatCount) {
16850
+ if (!this.transformPipeline) {
16851
+ logger.warn("⚠️ Transform pipeline not initialized, skipping original splats upload");
16852
+ return;
16853
+ }
16854
+ this.transformPipeline.uploadOriginalSplats(originalSplatsData, splatCount);
16855
+ this.splatCount = splatCount;
16856
+ this.useGPUTransform = true;
16857
+ if (this.useGPURadixSort && this.device) {
16858
+ try {
16859
+ this.gpuRadixSort = new GPURadixSort({
16860
+ device: this.device,
16861
+ maxSplatCount: splatCount
16862
+ });
16863
+ logger.log(`✅ [WebGPURenderer] GPU Radix Sort initialized for ${splatCount} splats`);
16864
+ } catch (e2) {
16865
+ logger.warn("⚠️ [WebGPURenderer] Failed to initialize GPU Radix Sort, falling back to CPU", e2);
16866
+ this.useGPURadixSort = false;
16867
+ }
16868
+ }
16869
+ if (!this.sortIndexBuffer || this.sortIndexBuffer.size !== splatCount * 4) {
16870
+ if (this.sortIndexBuffer) {
16871
+ this.sortIndexBuffer.destroy();
16872
+ }
16873
+ this.sortIndexBuffer = this.device.createBuffer({
16874
+ label: "Sort Index Buffer",
16875
+ size: splatCount * 4,
16876
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
16877
+ });
16878
+ const defaultSortOrder = new Uint32Array(splatCount);
16879
+ for (let i2 = 0; i2 < splatCount; i2++) {
16880
+ defaultSortOrder[i2] = i2;
16881
+ }
16882
+ this.device.queue.writeBuffer(this.sortIndexBuffer, 0, defaultSortOrder);
16883
+ this.bindGroupNeedsUpdate = true;
16884
+ }
16885
+ this.splatDataBuffer = this.transformPipeline.getTransformedOutputBuffer();
16886
+ if (this.renderPipeline && this.sortIndexBuffer && this.splatDataBuffer) {
16887
+ const storageBindGroupLayout = this.renderPipeline.getBindGroupLayout(1);
16888
+ this.storageBindGroup = this.device.createBindGroup({
16889
+ label: "Initial Storage Bind Group",
16890
+ layout: storageBindGroupLayout,
16891
+ entries: [
16892
+ {
16893
+ binding: 0,
16894
+ resource: { buffer: this.sortIndexBuffer }
16895
+ },
16896
+ {
16897
+ binding: 1,
16898
+ resource: { buffer: this.splatDataBuffer }
16899
+ }
16900
+ ]
16901
+ });
16902
+ this.bindGroupNeedsUpdate = false;
16903
+ logger.log("✅ [WebGPURenderer] loadOriginalSplats: Initial storage bind group created", {
16904
+ sortIndexBufferSize: this.sortIndexBuffer.size,
16905
+ splatDataBufferSize: this.splatDataBuffer.size,
16906
+ splatCount
16907
+ });
16908
+ } else {
16909
+ logger.warn("⚠️ [WebGPURenderer] loadOriginalSplats: 无法创建Initial storage bind group", {
16910
+ hasRenderPipeline: !!this.renderPipeline,
16911
+ hasSortIndexBuffer: !!this.sortIndexBuffer,
16912
+ hasSplatDataBuffer: !!this.splatDataBuffer
16913
+ });
16914
+ }
16915
+ logger.log(`✅ [WebGPURenderer] loadOriginalSplats: Original splats uploaded to GPU: ${splatCount} splats`);
16916
+ }
16917
+ /**
16918
+ * 🆕 更新Face Geometry (每帧调用,用于GPU Transform优化)
16919
+ * @param faceGeometryData Float32Array, 每个face 8 floats (32 bytes)
16920
+ */
16921
+ updateFaceGeometry(faceGeometryData) {
16922
+ if (!this.transformPipeline) {
16923
+ logger.warn("⚠️ Transform pipeline not initialized, skipping face geometry update");
16924
+ return;
16925
+ }
16926
+ this.transformPipeline.updateFaceGeometry(faceGeometryData);
16927
+ }
16928
+ /**
16929
+ * 🆕 加载 FLAME 模板数据到 GPU (一次性调用,角色加载时)
16930
+ * @param templateData FLAME 模板数据
16931
+ * @param shapeParams Shape 参数 [300]
16932
+ * @param activeShapeParams 活跃shape参数(零参数过滤优化,可选)
16933
+ */
16934
+ loadFLAMETemplateData(templateData, shapeParams, activeShapeParams) {
16935
+ if (!this.device) {
16936
+ throw new Error("Device not initialized");
16937
+ }
16938
+ this.flameGPUBuffers = new FLAMEGPUBuffers();
16939
+ this.flameGPUBuffers.initialize(this.device, templateData, shapeParams, activeShapeParams);
16940
+ const metadata = this.flameGPUBuffers.getMetadata();
16941
+ if (metadata.vertexCount === 0 || metadata.faceCount === 0 || metadata.jointCount === 0) {
16942
+ throw new Error(`Invalid FLAME metadata: vertexCount=${metadata.vertexCount}, faceCount=${metadata.faceCount}, jointCount=${metadata.jointCount}`);
16943
+ }
16944
+ logger.log("🔧 FLAME Pipeline metadata:", {
16945
+ vertexCount: metadata.vertexCount,
16946
+ faceCount: metadata.faceCount,
16947
+ jointCount: metadata.jointCount,
16948
+ shapeParamCount: metadata.shapeParamCount,
16949
+ poseParamCount: metadata.poseParamCount,
16950
+ staticOffsetCount: metadata.staticOffsetCount
16951
+ });
16952
+ this.flamePipeline = new FLAMEPipeline(
16953
+ this.device,
16954
+ this.flameGPUBuffers.getBuffers(),
16955
+ metadata.vertexCount,
16956
+ metadata.faceCount,
16957
+ metadata.jointCount
16958
+ );
16959
+ this.useGPUFLAME = true;
16960
+ logger.log("✅ FLAME Pipeline initialized and GPU FLAME path enabled");
16961
+ }
16962
+ /**
16963
+ * 🆕 更新 FLAME 帧参数 (每帧调用)
16964
+ * @param frameParams FLAME 帧参数
16965
+ */
16966
+ updateFLAMEFrameParams(frameParams) {
16967
+ if (!this.flameGPUBuffers) {
16968
+ return;
16969
+ }
16970
+ this.flameGPUBuffers.updateFrameParams(frameParams);
16971
+ }
16972
+ /**
16973
+ * 🆕 获取是否使用 GPU Transform 路径
16974
+ */
16975
+ getUseGPUTransform() {
16976
+ return this.useGPUTransform;
16977
+ }
16978
+ /**
16979
+ * 🆕 获取是否使用 GPU FLAME 路径
16980
+ */
16981
+ getUseGPUFLAME() {
16982
+ return this.useGPUFLAME;
16983
+ }
16984
+ /**
16985
+ * 🆕 使用Face Geometry渲染 (GPU Transform优化路径)
16986
+ * 数据流: Face Geometry → GPU Transform → Render
16987
+ *
16988
+ * 支持两种模式:
16989
+ * 1. CPU FLAME 路径:传入 faceGeometryData(从 CPU 计算)
16990
+ * 2. GPU FLAME 路径:传入 frameParams(在 GPU 上计算 FLAME)
16991
+ */
16992
+ async renderWithFaceGeometry(faceGeometryDataOrFrameParams, viewMatrix, projectionMatrix, screenSize, transform) {
16993
+ if (!this.transformPipeline || !this.useGPUTransform) {
16994
+ logger.error(`❌ Transform pipeline not ready or GPU Transform not enabled: hasTransformPipeline=${!!this.transformPipeline}, useGPUTransform=${this.useGPUTransform}`);
16995
+ return;
16996
+ }
16997
+ if (!this.device || !this.context || !this.renderPipeline || !this.uniformBindGroup) {
16998
+ logger.error(`❌ [WebGPURenderer] Render resources not ready: hasDevice=${!!this.device}, hasContext=${!!this.context}, hasRenderPipeline=${!!this.renderPipeline}, hasUniformBindGroup=${!!this.uniformBindGroup}`);
16999
+ return;
17000
+ }
17001
+ const [width, height] = screenSize;
17002
+ const needsTransform = transform && (transform.x !== 0 || transform.y !== 0 || transform.scale !== 1);
17003
+ let faceGeometryBuffer = null;
17004
+ const isFLAMEFrameParams = !(faceGeometryDataOrFrameParams instanceof Float32Array);
17005
+ const computeEncoder = this.device.createCommandEncoder({
17006
+ label: "FLAME + Transform Command Encoder"
17007
+ });
17008
+ if (this.useGPUFLAME && this.flamePipeline && isFLAMEFrameParams) {
17009
+ const frameParams = faceGeometryDataOrFrameParams;
17010
+ this.updateFLAMEFrameParams(frameParams);
17011
+ const flameOutput = this.flamePipeline.compute(computeEncoder);
17012
+ faceGeometryBuffer = flameOutput.faceGeometries;
17013
+ if (faceGeometryBuffer) {
17014
+ this.transformPipeline.setFaceGeometryBufferFromGPU(faceGeometryBuffer, flameOutput.faceCount);
17015
+ }
17016
+ } else {
17017
+ const faceGeometryData = faceGeometryDataOrFrameParams;
17018
+ this.transformPipeline.updateFaceGeometry(faceGeometryData);
17019
+ }
17020
+ this.transformPipeline.updateViewMatrix(viewMatrix);
17021
+ this.updateUniforms(viewMatrix, projectionMatrix, screenSize);
17022
+ this.transformPipeline.executeInEncoder(computeEncoder);
17023
+ const transformedBuffer = this.transformPipeline.getTransformedOutputBuffer();
17024
+ if (!transformedBuffer) {
17025
+ logger.error("❌ Transformed buffer not available");
17026
+ return;
17027
+ }
17028
+ const bufferChanged = !this.splatDataBuffer || this.splatDataBuffer !== transformedBuffer;
17029
+ if (bufferChanged) {
17030
+ this.splatDataBuffer = transformedBuffer;
14956
17031
  this.bindGroupNeedsUpdate = true;
14957
17032
  }
14958
- this.device.queue.writeBuffer(
14959
- this.splatDataBuffer,
14960
- 0,
14961
- packedData.buffer,
14962
- packedData.byteOffset,
14963
- packedData.byteLength
14964
- );
14965
- if (sortOrder) {
14966
- const indexBufferSize = sortOrder.byteLength;
14967
- if (!this.sortIndexBuffer || this.sortIndexBuffer.size !== indexBufferSize) {
14968
- if (this.sortIndexBuffer) {
14969
- this.sortIndexBuffer.destroy();
14970
- }
14971
- this.sortIndexBuffer = this.device.createBuffer({
14972
- label: "Sort Index Buffer",
14973
- size: indexBufferSize,
14974
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
14975
- });
14976
- this.bindGroupNeedsUpdate = true;
17033
+ this.device.queue.submit([computeEncoder.finish()]);
17034
+ await this.updateSortIndexFromGPU(viewMatrix);
17035
+ if (this.bindGroupNeedsUpdate && this.renderPipeline && this.sortIndexBuffer && this.splatDataBuffer && this.device) {
17036
+ const storageBindGroupLayout = this.renderPipeline.getBindGroupLayout(1);
17037
+ this.storageBindGroup = this.device.createBindGroup({
17038
+ label: "Storage Bind Group",
17039
+ layout: storageBindGroupLayout,
17040
+ entries: [
17041
+ {
17042
+ binding: 0,
17043
+ resource: { buffer: this.sortIndexBuffer }
17044
+ },
17045
+ {
17046
+ binding: 1,
17047
+ resource: { buffer: this.splatDataBuffer }
17048
+ }
17049
+ ]
17050
+ });
17051
+ this.bindGroupNeedsUpdate = false;
17052
+ }
17053
+ if (!this.storageBindGroup) {
17054
+ logger.error(`❌ Storage bind group not ready: bindGroupNeedsUpdate=${this.bindGroupNeedsUpdate}, hasRenderPipeline=${!!this.renderPipeline}, hasSortIndexBuffer=${!!this.sortIndexBuffer}, hasSplatDataBuffer=${!!this.splatDataBuffer}, hasDevice=${!!this.device}`);
17055
+ return;
17056
+ }
17057
+ const renderEncoder = this.device.createCommandEncoder({
17058
+ label: "Render Command Encoder"
17059
+ });
17060
+ if (needsTransform) {
17061
+ if (!this.renderTexture || this.framebufferWidth !== width || this.framebufferHeight !== height) {
17062
+ this.createRenderTexture(width, height);
14977
17063
  }
14978
- this.device.queue.writeBuffer(
14979
- this.sortIndexBuffer,
14980
- 0,
14981
- sortOrder.buffer,
14982
- sortOrder.byteOffset,
14983
- sortOrder.byteLength
14984
- );
14985
- if (this.bindGroupNeedsUpdate && this.renderPipeline && this.sortIndexBuffer && this.splatDataBuffer) {
14986
- const storageBindGroupLayout = this.renderPipeline.getBindGroupLayout(1);
14987
- this.storageBindGroup = this.device.createBindGroup({
14988
- label: "Storage Bind Group",
14989
- layout: storageBindGroupLayout,
14990
- entries: [
14991
- {
14992
- binding: 0,
14993
- resource: { buffer: this.sortIndexBuffer }
17064
+ const renderPass = renderEncoder.beginRenderPass({
17065
+ label: "Render to Texture Pass",
17066
+ colorAttachments: [
17067
+ {
17068
+ view: this.renderTextureView,
17069
+ clearValue: {
17070
+ r: this.backgroundColor[0],
17071
+ g: this.backgroundColor[1],
17072
+ b: this.backgroundColor[2],
17073
+ a: this.backgroundColor[3]
14994
17074
  },
14995
- {
14996
- binding: 1,
14997
- resource: { buffer: this.splatDataBuffer }
14998
- }
14999
- ]
15000
- });
15001
- this.bindGroupNeedsUpdate = false;
15002
- }
17075
+ loadOp: "clear",
17076
+ storeOp: "store"
17077
+ }
17078
+ ],
17079
+ depthStencilAttachment: {
17080
+ view: this.depthTexture.createView(),
17081
+ depthLoadOp: "clear",
17082
+ depthStoreOp: "store",
17083
+ depthClearValue: 1
17084
+ }
17085
+ });
17086
+ renderPass.setPipeline(this.renderTexturePipeline);
17087
+ renderPass.setBindGroup(0, this.uniformBindGroup);
17088
+ renderPass.setBindGroup(1, this.storageBindGroup);
17089
+ renderPass.setVertexBuffer(0, this.quadVertexBuffer);
17090
+ renderPass.draw(4, this.splatCount);
17091
+ renderPass.end();
17092
+ this.blitToScreen(renderEncoder, transform);
17093
+ } else {
17094
+ const textureView = this.context.getCurrentTexture().createView();
17095
+ const renderPass = renderEncoder.beginRenderPass({
17096
+ label: "Render Pass",
17097
+ colorAttachments: [
17098
+ {
17099
+ view: textureView,
17100
+ clearValue: {
17101
+ r: this.backgroundColor[0],
17102
+ g: this.backgroundColor[1],
17103
+ b: this.backgroundColor[2],
17104
+ a: this.backgroundColor[3]
17105
+ },
17106
+ loadOp: "clear",
17107
+ storeOp: "store"
17108
+ }
17109
+ ]
17110
+ });
17111
+ renderPass.setPipeline(this.renderPipeline);
17112
+ renderPass.setBindGroup(0, this.uniformBindGroup);
17113
+ renderPass.setBindGroup(1, this.storageBindGroup);
17114
+ renderPass.setVertexBuffer(0, this.quadVertexBuffer);
17115
+ renderPass.draw(4, this.splatCount);
17116
+ renderPass.end();
15003
17117
  }
17118
+ this.device.queue.submit([renderEncoder.finish()]);
15004
17119
  }
15005
17120
  /**
15006
17121
  * 渲染一帧
@@ -15013,9 +17128,54 @@ class WebGPURenderer {
15013
17128
  const [width, height] = screenSize;
15014
17129
  const needsTransform = transform && (transform.x !== 0 || transform.y !== 0 || transform.scale !== 1);
15015
17130
  this.updateUniforms(viewMatrix, projectionMatrix, screenSize);
17131
+ if (this.useGPUTransform && this.transformPipeline) {
17132
+ this.transformPipeline.updateViewMatrix(viewMatrix);
17133
+ const commandEncoder2 = this.device.createCommandEncoder({
17134
+ label: "Transform + Render Command Encoder"
17135
+ });
17136
+ this.transformPipeline.executeInEncoder(commandEncoder2);
17137
+ const transformedBuffer = this.transformPipeline.getTransformedOutputBuffer();
17138
+ if (transformedBuffer) {
17139
+ if (this.splatDataBuffer !== transformedBuffer) {
17140
+ this.splatDataBuffer = transformedBuffer;
17141
+ this.bindGroupNeedsUpdate = true;
17142
+ }
17143
+ }
17144
+ this.renderWithCommandEncoder(commandEncoder2, viewMatrix, projectionMatrix, screenSize, transform, needsTransform || false, width, height);
17145
+ this.device.queue.submit([commandEncoder2.finish()]);
17146
+ return;
17147
+ }
15016
17148
  const commandEncoder = this.device.createCommandEncoder({
15017
17149
  label: "Render Command Encoder"
15018
17150
  });
17151
+ this.renderWithCommandEncoder(commandEncoder, viewMatrix, projectionMatrix, screenSize, transform, needsTransform || false, width, height);
17152
+ this.device.queue.submit([commandEncoder.finish()]);
17153
+ }
17154
+ /**
17155
+ * 🆕 渲染逻辑(提取为独立方法,供Transform和传统路径共用)
17156
+ */
17157
+ renderWithCommandEncoder(commandEncoder, _viewMatrix, _projectionMatrix, _screenSize, transform, needsTransform, width, height) {
17158
+ if (this.bindGroupNeedsUpdate && this.renderPipeline && this.sortIndexBuffer && this.splatDataBuffer && this.device) {
17159
+ const storageBindGroupLayout = this.renderPipeline.getBindGroupLayout(1);
17160
+ this.storageBindGroup = this.device.createBindGroup({
17161
+ label: "Storage Bind Group",
17162
+ layout: storageBindGroupLayout,
17163
+ entries: [
17164
+ {
17165
+ binding: 0,
17166
+ resource: { buffer: this.sortIndexBuffer }
17167
+ },
17168
+ {
17169
+ binding: 1,
17170
+ resource: { buffer: this.splatDataBuffer }
17171
+ }
17172
+ ]
17173
+ });
17174
+ this.bindGroupNeedsUpdate = false;
17175
+ }
17176
+ if (!this.device || !this.context || !this.renderPipeline || !this.storageBindGroup) {
17177
+ return;
17178
+ }
15019
17179
  if (needsTransform) {
15020
17180
  if (!this.renderTexture || this.framebufferWidth !== width || this.framebufferHeight !== height) {
15021
17181
  this.createRenderTexture(width, height);
@@ -15074,7 +17234,6 @@ class WebGPURenderer {
15074
17234
  renderPass.draw(4, this.splatCount);
15075
17235
  renderPass.end();
15076
17236
  }
15077
- this.device.queue.submit([commandEncoder.finish()]);
15078
17237
  }
15079
17238
  /**
15080
17239
  * 将 render texture 绘制到屏幕(应用 transform)
@@ -15164,11 +17323,99 @@ class WebGPURenderer {
15164
17323
  updateBackgroundColor(backgroundColor) {
15165
17324
  this.backgroundColor = backgroundColor;
15166
17325
  }
17326
+ /**
17327
+ * 🔍 关键修复:从GPU读取transform后的positions,进行深度排序,更新sortIndexBuffer
17328
+ * 这解决了第一帧GPU路径渲染异常的问题(未排序导致渲染顺序错误)
17329
+ */
17330
+ async updateSortIndexFromGPU(viewMatrix) {
17331
+ if (!this.device || !this.transformPipeline || !this.sortIndexBuffer) {
17332
+ return;
17333
+ }
17334
+ if (this.useGPURadixSort && this.gpuRadixSort) {
17335
+ const positionsBuffer2 = this.transformPipeline.getPositionsOutputBuffer();
17336
+ if (!positionsBuffer2) {
17337
+ logger.warn("⚠️ [WebGPURenderer] updateSortIndexFromGPU: positionsBuffer not available");
17338
+ return;
17339
+ }
17340
+ this.gpuRadixSort.setPositionsBuffer(positionsBuffer2);
17341
+ const sortedIndicesBuffer = await this.gpuRadixSort.sortAsync(viewMatrix, this.splatCount);
17342
+ const copyEncoder = this.device.createCommandEncoder({ label: "Copy Sort Result" });
17343
+ copyEncoder.copyBufferToBuffer(
17344
+ sortedIndicesBuffer,
17345
+ 0,
17346
+ this.sortIndexBuffer,
17347
+ 0,
17348
+ this.splatCount * 4
17349
+ );
17350
+ this.device.queue.submit([copyEncoder.finish()]);
17351
+ return;
17352
+ }
17353
+ performance.now();
17354
+ const cameraPosition = [
17355
+ -viewMatrix[12],
17356
+ -viewMatrix[13],
17357
+ -viewMatrix[14]
17358
+ ];
17359
+ const cameraForward = [
17360
+ -viewMatrix[2],
17361
+ -viewMatrix[6],
17362
+ -viewMatrix[10]
17363
+ ];
17364
+ const positionsBuffer = this.transformPipeline.getPositionsOutputBuffer();
17365
+ if (!positionsBuffer) {
17366
+ logger.warn("⚠️ [WebGPURenderer] updateSortIndexFromGPU: positionsBuffer not available");
17367
+ return;
17368
+ }
17369
+ const positionsSize = this.splatCount * 3 * 4;
17370
+ const stagingBuffer = this.device.createBuffer({
17371
+ size: positionsSize,
17372
+ usage: GPUBufferUsage.COPY_DST | GPUBufferUsage.MAP_READ
17373
+ });
17374
+ const readbackStart = performance.now();
17375
+ const readbackEncoder = this.device.createCommandEncoder();
17376
+ readbackEncoder.copyBufferToBuffer(
17377
+ positionsBuffer,
17378
+ 0,
17379
+ stagingBuffer,
17380
+ 0,
17381
+ positionsSize
17382
+ );
17383
+ this.device.queue.submit([readbackEncoder.finish()]);
17384
+ await stagingBuffer.mapAsync(GPUMapMode.READ);
17385
+ const positionsMapped = stagingBuffer.getMappedRange();
17386
+ const positions = new Float32Array(positionsMapped);
17387
+ performance.now() - readbackStart;
17388
+ const convertStart = performance.now();
17389
+ const floatsPerPoint = 13;
17390
+ const packedData = new Float32Array(this.splatCount * floatsPerPoint);
17391
+ for (let i2 = 0; i2 < this.splatCount; i2++) {
17392
+ const offset = i2 * floatsPerPoint;
17393
+ const posOffset = i2 * 3;
17394
+ packedData[offset] = positions[posOffset];
17395
+ packedData[offset + 1] = positions[posOffset + 1];
17396
+ packedData[offset + 2] = positions[posOffset + 2];
17397
+ }
17398
+ performance.now() - convertStart;
17399
+ const sortStart = performance.now();
17400
+ const sortOrder = sortSplats(packedData, cameraPosition, cameraForward);
17401
+ performance.now() - sortStart;
17402
+ const writeStart = performance.now();
17403
+ this.device.queue.writeBuffer(
17404
+ this.sortIndexBuffer,
17405
+ 0,
17406
+ sortOrder.buffer,
17407
+ sortOrder.byteOffset,
17408
+ sortOrder.byteLength
17409
+ );
17410
+ performance.now() - writeStart;
17411
+ stagingBuffer.unmap();
17412
+ stagingBuffer.destroy();
17413
+ }
15167
17414
  /**
15168
17415
  * 清理资源
15169
17416
  */
15170
17417
  dispose() {
15171
- var _a, _b, _c, _d, _e2, _f, _g, _h, _i2;
17418
+ var _a, _b, _c, _d, _e2, _f, _g, _h, _i2, _j, _k, _l, _m;
15172
17419
  (_a = this.sortIndexBuffer) == null ? void 0 : _a.destroy();
15173
17420
  (_b = this.splatDataBuffer) == null ? void 0 : _b.destroy();
15174
17421
  (_c = this.quadVertexBuffer) == null ? void 0 : _c.destroy();
@@ -15177,7 +17424,11 @@ class WebGPURenderer {
15177
17424
  (_f = this.depthTexture) == null ? void 0 : _f.destroy();
15178
17425
  (_g = this.blitUniformBuffer) == null ? void 0 : _g.destroy();
15179
17426
  (_h = this.blitQuadBuffer) == null ? void 0 : _h.destroy();
15180
- (_i2 = this.device) == null ? void 0 : _i2.destroy();
17427
+ (_i2 = this.transformPipeline) == null ? void 0 : _i2.destroy();
17428
+ (_j = this.flamePipeline) == null ? void 0 : _j.destroy();
17429
+ (_k = this.flameGPUBuffers) == null ? void 0 : _k.destroy();
17430
+ (_l = this.gpuRadixSort) == null ? void 0 : _l.destroy();
17431
+ (_m = this.device) == null ? void 0 : _m.destroy();
15181
17432
  this.sortIndexBuffer = null;
15182
17433
  this.splatDataBuffer = null;
15183
17434
  this.quadVertexBuffer = null;
@@ -15191,6 +17442,10 @@ class WebGPURenderer {
15191
17442
  this.blitQuadBuffer = null;
15192
17443
  this.blitPipeline = null;
15193
17444
  this.blitSampler = null;
17445
+ this.transformPipeline = null;
17446
+ this.flamePipeline = null;
17447
+ this.flameGPUBuffers = null;
17448
+ this.gpuRadixSort = null;
15194
17449
  this.device = null;
15195
17450
  this.context = null;
15196
17451
  this.renderPipeline = null;
@@ -15317,6 +17572,33 @@ class RenderSystem {
15317
17572
  const renderTime = performance.now() - startRender;
15318
17573
  this.renderTime = renderTime;
15319
17574
  }
17575
+ /**
17576
+ * 🆕 使用Face Geometry渲染 (GPU Transform优化路径)
17577
+ * 数据流: Face Geometry → GPU Transform → Render
17578
+ */
17579
+ async renderFrameWithFaceGeometry(faceGeometryData, viewMatrix, projectionMatrix, screenSize, transform, cpuFaceGeometriesForComparison, avatarCore) {
17580
+ if (!this.renderer || this.backend !== "webgpu") {
17581
+ logger.warn("⚠️ renderFrameWithFaceGeometry only works with WebGPU, falling back to standard render");
17582
+ return;
17583
+ }
17584
+ this.updateCameraMatrices();
17585
+ const webgpuRenderer = this.renderer;
17586
+ if (typeof webgpuRenderer.renderWithFaceGeometry === "function") {
17587
+ await webgpuRenderer.renderWithFaceGeometry(
17588
+ faceGeometryData,
17589
+ viewMatrix ?? this.viewMatrix,
17590
+ projectionMatrix ?? this.projectionMatrix,
17591
+ screenSize ?? [this.canvas.width, this.canvas.height],
17592
+ transform ?? (this.offsetX !== 0 || this.offsetY !== 0 || this.scale !== 1 ? { x: this.offsetX, y: this.offsetY, scale: this.scale } : void 0),
17593
+ cpuFaceGeometriesForComparison,
17594
+ avatarCore
17595
+ );
17596
+ } else {
17597
+ logger.error("❌ WebGPU renderer does not support renderWithFaceGeometry");
17598
+ return;
17599
+ }
17600
+ this.renderTime = performance.now();
17601
+ }
15320
17602
  /**
15321
17603
  * Set transform for render texture blit
15322
17604
  * @param x - Horizontal offset in normalized coordinates (-1 to 1, where -1 = left edge, 0 = center, 1 = right edge)
@@ -15916,6 +18198,7 @@ class AvatarView {
15916
18198
  logger.log("[AvatarView] Initializing render system...");
15917
18199
  const cameraConfig = this.resolveCameraConfig(resources);
15918
18200
  await this.initializeRenderSystem(cameraConfig);
18201
+ await this.initializeGPUPath(avatarCore);
15919
18202
  if (APP_CONFIG.debug)
15920
18203
  logger.log("[AvatarView] Starting rendering...");
15921
18204
  await this.renderFirstFrame();
@@ -15951,6 +18234,78 @@ class AvatarView {
15951
18234
  if (APP_CONFIG.debug)
15952
18235
  logger.log("[AvatarView] Render system initialized successfully");
15953
18236
  }
18237
+ /**
18238
+ * 🆕 初始化 GPU 路径 (Transform + FLAME Pipeline)
18239
+ * @internal
18240
+ */
18241
+ async initializeGPUPath(avatarCore) {
18242
+ try {
18243
+ logger.log("[AvatarView] 🚀 Initializing GPU Transform optimization...");
18244
+ const renderer = this.renderSystem.renderer;
18245
+ logger.log("[AvatarView] GPU Init - hasRenderer:", !!renderer, "hasLoadOriginalSplats:", typeof (renderer == null ? void 0 : renderer.loadOriginalSplats) === "function");
18246
+ const originalSplatsResult = await avatarCore.getOriginalSplatsData();
18247
+ logger.log("[AvatarView] GPU Init - originalSplatsResult:", !!originalSplatsResult, "hasData:", !!(originalSplatsResult == null ? void 0 : originalSplatsResult.data), "count:", originalSplatsResult == null ? void 0 : originalSplatsResult.count);
18248
+ if (originalSplatsResult && originalSplatsResult.data) {
18249
+ if (renderer && typeof renderer.loadOriginalSplats === "function") {
18250
+ renderer.loadOriginalSplats(
18251
+ originalSplatsResult.data,
18252
+ originalSplatsResult.count
18253
+ );
18254
+ logger.log(`[AvatarView] ✅ Original splats uploaded to GPU: ${originalSplatsResult.count} splats`);
18255
+ if (typeof renderer.getUseGPUTransform === "function") {
18256
+ logger.log("[AvatarView] GPU Init - useGPUTransform after upload:", renderer.getUseGPUTransform());
18257
+ }
18258
+ } else {
18259
+ logger.warn("[AvatarView] ⚠️ GPU Init - renderer.loadOriginalSplats not available!");
18260
+ }
18261
+ } else {
18262
+ logger.warn("[AvatarView] ⚠️ GPU Init - No original splats data from WASM!");
18263
+ }
18264
+ try {
18265
+ const templateData = await avatarCore.getFLAMETemplateData(this.characterId);
18266
+ if (templateData && renderer && typeof renderer.loadFLAMETemplateData === "function") {
18267
+ const shapeParamsResult = await avatarCore.getCharacterShapeParams(this.characterId);
18268
+ if (shapeParamsResult && shapeParamsResult.params) {
18269
+ const shapeParams = new Float32Array(shapeParamsResult.params);
18270
+ const activeShapeIndices = [];
18271
+ const activeShapeValues = [];
18272
+ const EPSILON = 1e-6;
18273
+ for (let i2 = 0; i2 < shapeParams.length && i2 < 300; i2++) {
18274
+ if (Math.abs(shapeParams[i2]) > EPSILON) {
18275
+ activeShapeIndices.push(i2);
18276
+ activeShapeValues.push(shapeParams[i2]);
18277
+ }
18278
+ }
18279
+ renderer.loadFLAMETemplateData(templateData, shapeParams, {
18280
+ activeIndices: new Uint32Array(activeShapeIndices),
18281
+ activeValues: new Float32Array(activeShapeValues),
18282
+ count: activeShapeIndices.length
18283
+ });
18284
+ if (APP_CONFIG.debug)
18285
+ logger.log(`[AvatarView] ✅ FLAME template data uploaded to GPU: ${templateData.vertexCount} vertices`);
18286
+ }
18287
+ }
18288
+ } catch (flameError) {
18289
+ logger.warn("[AvatarView] Failed to load FLAME template data:", flameError instanceof Error ? flameError.message : String(flameError));
18290
+ }
18291
+ const useGPUPath = typeof (renderer == null ? void 0 : renderer.getUseGPUTransform) === "function" && renderer.getUseGPUTransform() && typeof (renderer == null ? void 0 : renderer.getUseGPUFLAME) === "function" && renderer.getUseGPUFLAME();
18292
+ if (useGPUPath) {
18293
+ this.avatarController.setRenderCallback(
18294
+ (splatData, frameIndex) => {
18295
+ this.renderRealtimeFrame(splatData, frameIndex);
18296
+ },
18297
+ this.characterHandle,
18298
+ true
18299
+ // useGPUPath = true
18300
+ );
18301
+ logger.log("[AvatarView] ✅ GPU path enabled for AvatarController (skipping splatData computation)");
18302
+ }
18303
+ if (APP_CONFIG.debug)
18304
+ logger.log("[AvatarView] ✅ GPU Transform optimization initialized");
18305
+ } catch (error) {
18306
+ logger.warn("[AvatarView] Failed to initialize GPU path, falling back to CPU:", error instanceof Error ? error.message : String(error));
18307
+ }
18308
+ }
15954
18309
  /**
15955
18310
  * Get default camera configuration
15956
18311
  * @internal
@@ -16048,27 +18403,76 @@ class AvatarView {
16048
18403
  if (!avatarCore) {
16049
18404
  throw new Error("AvatarCore not available");
16050
18405
  }
16051
- const neutralParams = {
16052
- shape_params: Array.from({ length: 100 }, () => 0),
16053
- expr_params: Array.from({ length: 50 }, () => 0),
16054
- rotation: [0, 0, 0],
16055
- translation: [0, 0, 0],
16056
- neck_pose: [0, 0, 0],
16057
- jaw_pose: [0, 0, 0],
16058
- eyes_pose: [0, 0, 0, 0, 0, 0]
16059
- };
16060
- const splatData = await avatarCore.computeFrameFlatFromParams(neutralParams, this.characterHandle ?? void 0);
16061
- if (splatData) {
16062
- this.renderSystem.loadSplatsFromPackedData(splatData);
16063
- this.renderSystem.renderFrame();
16064
- if (APP_CONFIG.debug)
16065
- logger.log("[AvatarView] First frame rendered successfully");
16066
- (_a = this.onFirstRendering) == null ? void 0 : _a.call(this);
16067
- this.reportAvatarActive();
16068
- this.startAvatarActiveHeartbeat();
18406
+ const backend = this.renderSystem.getBackend();
18407
+ const renderer = this.renderSystem.renderer;
18408
+ const hasGetUseGPUTransform = typeof (renderer == null ? void 0 : renderer.getUseGPUTransform) === "function";
18409
+ const gpuTransformFlag = hasGetUseGPUTransform ? renderer.getUseGPUTransform() : false;
18410
+ const useGPUTransform = backend === "webgpu" && renderer && hasGetUseGPUTransform && gpuTransformFlag;
18411
+ logger.log("[AvatarView] renderFirstFrame - GPU path check:", {
18412
+ backend,
18413
+ hasRenderer: !!renderer,
18414
+ hasGetUseGPUTransform,
18415
+ gpuTransformFlag,
18416
+ useGPUTransform
18417
+ });
18418
+ const useGPUFLAME = typeof (renderer == null ? void 0 : renderer.getUseGPUFLAME) === "function" && renderer.getUseGPUFLAME();
18419
+ if (useGPUTransform && useGPUFLAME) {
18420
+ logger.log("[AvatarView] 🚀🔥 Using FULL GPU path for first frame (GPU FLAME + GPU Transform)!");
18421
+ try {
18422
+ const neutralFrameParams = {
18423
+ exprParams: new Float32Array(100),
18424
+ rotation: new Float32Array([0, 0, 0]),
18425
+ translation: new Float32Array([0, 0, 0]),
18426
+ neckPose: new Float32Array([0, 0, 0]),
18427
+ jawPose: new Float32Array([0, 0, 0]),
18428
+ eyesPose: new Float32Array([0, 0, 0, 0, 0, 0]),
18429
+ eyelid: new Float32Array([0, 0])
18430
+ };
18431
+ await this.renderSystem.renderFrameWithFaceGeometry(neutralFrameParams);
18432
+ logger.log("[AvatarView] ✅ First frame rendered successfully (FULL GPU path)");
18433
+ } catch (gpuFlameError) {
18434
+ logger.error("[AvatarView] ❌ GPU FLAME path failed, falling back to CPU FLAME");
18435
+ const faceGeometryData = await avatarCore.computeFrameAsFaceGeometry({ frameIndex: 0, characterId: this.characterId });
18436
+ if (faceGeometryData) {
18437
+ await this.renderSystem.renderFrameWithFaceGeometry(faceGeometryData);
18438
+ logger.log("[AvatarView] ✅ First frame rendered successfully (fallback to CPU FLAME)");
18439
+ } else {
18440
+ throw new Error("Failed to compute first frame face geometry data");
18441
+ }
18442
+ }
18443
+ } else if (useGPUTransform) {
18444
+ logger.log("[AvatarView] 🚀 Using GPU Transform path for first frame (CPU FLAME)!");
18445
+ const faceGeometryData = await avatarCore.computeFrameAsFaceGeometry({ frameIndex: 0, characterId: this.characterId });
18446
+ if (faceGeometryData) {
18447
+ await this.renderSystem.renderFrameWithFaceGeometry(faceGeometryData);
18448
+ logger.log("[AvatarView] ✅ First frame rendered successfully (GPU Transform path)");
18449
+ } else {
18450
+ throw new Error("Failed to compute first frame face geometry data");
18451
+ }
16069
18452
  } else {
16070
- throw new Error("Failed to compute first frame splat data");
18453
+ logger.log("[AvatarView] 📊 Using CPU path for first frame (GPU not ready)");
18454
+ const neutralParams = {
18455
+ shape_params: Array.from({ length: 100 }, () => 0),
18456
+ expr_params: Array.from({ length: 50 }, () => 0),
18457
+ rotation: [0, 0, 0],
18458
+ translation: [0, 0, 0],
18459
+ neck_pose: [0, 0, 0],
18460
+ jaw_pose: [0, 0, 0],
18461
+ eyes_pose: [0, 0, 0, 0, 0, 0]
18462
+ };
18463
+ const splatData = await avatarCore.computeFrameFlatFromParams(neutralParams, this.characterHandle ?? void 0);
18464
+ if (splatData) {
18465
+ this.renderSystem.loadSplatsFromPackedData(splatData);
18466
+ this.renderSystem.renderFrame();
18467
+ if (APP_CONFIG.debug)
18468
+ logger.log("[AvatarView] First frame rendered successfully (CPU path)");
18469
+ } else {
18470
+ throw new Error("Failed to compute first frame splat data");
18471
+ }
16071
18472
  }
18473
+ (_a = this.onFirstRendering) == null ? void 0 : _a.call(this);
18474
+ this.reportAvatarActive();
18475
+ this.startAvatarActiveHeartbeat();
16072
18476
  }
16073
18477
  /**
16074
18478
  * Update FPS statistics (called in requestAnimationFrame callback)
@@ -16132,17 +18536,46 @@ class AvatarView {
16132
18536
  if (!avatarCore) {
16133
18537
  return;
16134
18538
  }
16135
- const splatData = await avatarCore.computeCompleteFrameFlat({ frameIndex: this.idleCurrentFrameIndex }, this.characterHandle ?? void 0);
16136
- this.idleCurrentFrameIndex++;
16137
- if (splatData) {
18539
+ const backend = this.renderSystem.getBackend();
18540
+ const renderer = this.renderSystem.renderer;
18541
+ const useGPUTransform = backend === "webgpu" && renderer && typeof renderer.getUseGPUTransform === "function" && renderer.getUseGPUTransform();
18542
+ const useGPUFLAME = typeof (renderer == null ? void 0 : renderer.getUseGPUFLAME) === "function" && renderer.getUseGPUFLAME();
18543
+ if (useGPUTransform && useGPUFLAME) {
18544
+ const flameParams = await avatarCore.getCurrentFrameParams(this.idleCurrentFrameIndex, this.characterId);
18545
+ this.idleCurrentFrameIndex++;
16138
18546
  if (this.renderingState !== "idle") {
16139
18547
  return;
16140
18548
  }
16141
18549
  if (this.isPureRenderingMode) {
16142
18550
  return;
16143
18551
  }
16144
- this.renderSystem.loadSplatsFromPackedData(splatData);
16145
- this.renderSystem.renderFrame();
18552
+ const frameParams = this.convertFlameParamsToGPUFormat(flameParams);
18553
+ await this.renderSystem.renderFrameWithFaceGeometry(frameParams);
18554
+ } else if (useGPUTransform) {
18555
+ const faceGeometryData = await avatarCore.computeFrameAsFaceGeometry({ frameIndex: this.idleCurrentFrameIndex, characterId: this.characterId });
18556
+ this.idleCurrentFrameIndex++;
18557
+ if (faceGeometryData) {
18558
+ if (this.renderingState !== "idle") {
18559
+ return;
18560
+ }
18561
+ if (this.isPureRenderingMode) {
18562
+ return;
18563
+ }
18564
+ await this.renderSystem.renderFrameWithFaceGeometry(faceGeometryData);
18565
+ }
18566
+ } else {
18567
+ const splatData = await avatarCore.computeCompleteFrameFlat({ frameIndex: this.idleCurrentFrameIndex }, this.characterHandle ?? void 0);
18568
+ this.idleCurrentFrameIndex++;
18569
+ if (splatData) {
18570
+ if (this.renderingState !== "idle") {
18571
+ return;
18572
+ }
18573
+ if (this.isPureRenderingMode) {
18574
+ return;
18575
+ }
18576
+ this.renderSystem.loadSplatsFromPackedData(splatData);
18577
+ this.renderSystem.renderFrame();
18578
+ }
16146
18579
  }
16147
18580
  this.idleAnimationLoopId = requestAnimationFrame(renderFrame);
16148
18581
  } catch (error) {
@@ -16209,10 +18642,18 @@ class AvatarView {
16209
18642
  const wasmParams = convertProtoFlameToWasmParams(currentFrame);
16210
18643
  const avatarCore = AvatarSDK.getAvatarCore();
16211
18644
  if (avatarCore) {
16212
- const sd = await avatarCore.computeFrameFlatFromParams(wasmParams, this.characterHandle ?? void 0);
16213
- if (sd) {
16214
- this.renderSystem.loadSplatsFromPackedData(sd);
16215
- this.renderSystem.renderFrame();
18645
+ const backend = this.renderSystem.getBackend();
18646
+ const renderer = this.renderSystem.renderer;
18647
+ const useGPUTransform = backend === "webgpu" && renderer && typeof renderer.getUseGPUTransform === "function" && renderer.getUseGPUTransform();
18648
+ if (useGPUTransform) {
18649
+ const frameParams = this.convertFlameParamsToGPUFormat(wasmParams);
18650
+ await this.renderSystem.renderFrameWithFaceGeometry(frameParams);
18651
+ } else {
18652
+ const sd = await avatarCore.computeFrameFlatFromParams(wasmParams, this.characterHandle ?? void 0);
18653
+ if (sd) {
18654
+ this.renderSystem.loadSplatsFromPackedData(sd);
18655
+ this.renderSystem.renderFrame();
18656
+ }
16216
18657
  }
16217
18658
  }
16218
18659
  if (progress >= 1) {
@@ -16294,18 +18735,48 @@ class AvatarView {
16294
18735
  * Render realtime frame (called by playback layer callback)
16295
18736
  * @internal
16296
18737
  */
16297
- renderRealtimeFrame(splatData, frameIndex) {
18738
+ async renderRealtimeFrame(splatData, frameIndex) {
16298
18739
  if (!this.renderSystem || this.renderingState !== "speaking") {
16299
18740
  return;
16300
18741
  }
16301
- this.renderSystem.loadSplatsFromPackedData(splatData);
16302
- this.renderSystem.renderFrame();
18742
+ const backend = this.renderSystem.getBackend();
18743
+ const renderer = this.renderSystem.renderer;
18744
+ const useGPUTransform = backend === "webgpu" && renderer && typeof renderer.getUseGPUTransform === "function" && renderer.getUseGPUTransform();
18745
+ if (useGPUTransform) {
18746
+ if (frameIndex >= 0 && frameIndex < this.currentKeyframes.length) {
18747
+ const flame = this.currentKeyframes[frameIndex];
18748
+ const wasmParams = convertProtoFlameToWasmParams(flame);
18749
+ const frameParams = this.convertFlameParamsToGPUFormat(wasmParams);
18750
+ await this.renderSystem.renderFrameWithFaceGeometry(frameParams);
18751
+ this.lastRealtimeProtoFrame = flame;
18752
+ }
18753
+ } else {
18754
+ this.renderSystem.loadSplatsFromPackedData(splatData);
18755
+ this.renderSystem.renderFrame();
18756
+ if (frameIndex >= 0 && frameIndex < this.currentKeyframes.length) {
18757
+ this.lastRealtimeProtoFrame = this.currentKeyframes[frameIndex];
18758
+ }
18759
+ }
16303
18760
  this.lastRenderedFrameIndex = frameIndex;
16304
18761
  if (frameIndex >= 0 && frameIndex < this.currentKeyframes.length) {
16305
- this.lastRealtimeProtoFrame = this.currentKeyframes[frameIndex];
16306
- this.currentPlayingFrame = this.lastRealtimeProtoFrame;
18762
+ this.currentPlayingFrame = this.currentKeyframes[frameIndex];
16307
18763
  }
16308
18764
  }
18765
+ /**
18766
+ * 🆕 将 FlameParams 转换为 FLAMEFrameParams (GPU 格式)
18767
+ * @internal
18768
+ */
18769
+ convertFlameParamsToGPUFormat(params) {
18770
+ return {
18771
+ exprParams: new Float32Array(params.expr_params || Array(100).fill(0)),
18772
+ rotation: new Float32Array(params.rotation || [0, 0, 0]),
18773
+ translation: new Float32Array(params.translation || [0, 0, 0]),
18774
+ neckPose: new Float32Array(params.neck_pose || [0, 0, 0]),
18775
+ jawPose: new Float32Array(params.jaw_pose || [0, 0, 0]),
18776
+ eyesPose: new Float32Array(params.eyes_pose || [0, 0, 0, 0, 0, 0]),
18777
+ eyelid: new Float32Array(params.eyelid || [0, 0])
18778
+ };
18779
+ }
16309
18780
  /**
16310
18781
  * State transition method
16311
18782
  * Unified state transition management to ensure state consistency
@@ -16666,13 +19137,21 @@ class AvatarView {
16666
19137
  if (!avatarCore) {
16667
19138
  throw new Error("AvatarCore not available");
16668
19139
  }
16669
- const splatData = await avatarCore.computeFrameFlatFromParams(
16670
- wasmParams,
16671
- this.characterHandle ?? void 0
16672
- );
16673
- if (splatData) {
16674
- this.renderSystem.loadSplatsFromPackedData(splatData);
16675
- this.renderSystem.renderFrame();
19140
+ const backend = this.renderSystem.getBackend();
19141
+ const renderer = this.renderSystem.renderer;
19142
+ const useGPUTransform = backend === "webgpu" && renderer && typeof renderer.getUseGPUTransform === "function" && renderer.getUseGPUTransform();
19143
+ if (useGPUTransform) {
19144
+ const frameParams = this.convertFlameParamsToGPUFormat(wasmParams);
19145
+ await this.renderSystem.renderFrameWithFaceGeometry(frameParams);
19146
+ } else {
19147
+ const splatData = await avatarCore.computeFrameFlatFromParams(
19148
+ wasmParams,
19149
+ this.characterHandle ?? void 0
19150
+ );
19151
+ if (splatData) {
19152
+ this.renderSystem.loadSplatsFromPackedData(splatData);
19153
+ this.renderSystem.renderFrame();
19154
+ }
16676
19155
  }
16677
19156
  } catch (error) {
16678
19157
  logger.error("[AvatarView] Failed to render flame:", error instanceof Error ? error.message : String(error));
@@ -16781,9 +19260,6 @@ class AvatarView {
16781
19260
  const { x: x2, y: y2, scale } = value;
16782
19261
  logger.log(`[AvatarView] Setting transform: x=${x2}, y=${y2}, scale=${scale}`);
16783
19262
  this.renderSystem.setTransform(x2, y2, scale);
16784
- if (this.isInitialized && this.renderSystem) {
16785
- this.renderSystem.renderFrame();
16786
- }
16787
19263
  }
16788
19264
  /**
16789
19265
  * Report avatar_active event