@plasius/gpu-renderer 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@plasius/gpu-renderer",
3
- "version": "0.2.7",
3
+ "version": "0.2.8",
4
4
  "description": "Framework-agnostic WebGPU renderer runtime for Plasius projects.",
5
5
  "type": "module",
6
6
  "sideEffects": false,
@@ -40,6 +40,7 @@ const PATH_VERTEX_RECORD_BYTES = 16;
40
40
  const GPU_SUBMITTED_WORK_TIMEOUT_MS = 5_000;
41
41
  const GPU_READBACK_COMPLETION_TIMEOUT_MS = 60_000;
42
42
  const GPU_MAX_SUBMITTED_WORK_TIMEOUT_MS = 60_000;
43
+ const GPU_MAX_SUBMITTED_WORK_DEADLINE_MS = 180_000;
43
44
  const CONFIG_BUFFER_BYTES = 320;
44
45
  const COUNTER_DISPATCH_ARGS_OFFSET = 16;
45
46
  const INDIRECT_DISPATCH_ARGS_BYTES = 12;
@@ -933,104 +934,9 @@ function normalizeVectorOrFallback(vector, fallback) {
933
934
  return normalize(Array.isArray(vector) ? vector : fallback, fallback);
934
935
  }
935
936
 
936
- function buildTriangleTangentBasis(v0, v1, v2, uv0, uv1, uv2, fallbackNormal) {
937
- const edge1 = subtract(v1, v0);
938
- const edge2 = subtract(v2, v0);
939
- const deltaUv1 = [uv1[0] - uv0[0], uv1[1] - uv0[1]];
940
- const deltaUv2 = [uv2[0] - uv0[0], uv2[1] - uv0[1]];
941
- const determinant = deltaUv1[0] * deltaUv2[1] - deltaUv1[1] * deltaUv2[0];
942
- if (Math.abs(determinant) < 1e-6) {
943
- const tangentFallback = Math.abs(fallbackNormal[1]) < 0.999 ? [0, 1, 0] : [1, 0, 0];
944
- const tangent = normalize(cross(tangentFallback, fallbackNormal), [1, 0, 0]);
945
- const bitangent = normalize(cross(fallbackNormal, tangent), [0, 0, 1]);
946
- return { tangent, bitangent };
947
- }
948
- const inverse = 1 / determinant;
949
- const tangent = normalize(
950
- [
951
- inverse * (edge1[0] * deltaUv2[1] - edge2[0] * deltaUv1[1]),
952
- inverse * (edge1[1] * deltaUv2[1] - edge2[1] * deltaUv1[1]),
953
- inverse * (edge1[2] * deltaUv2[1] - edge2[2] * deltaUv1[1]),
954
- ],
955
- [1, 0, 0]
956
- );
957
- const bitangent = normalize(
958
- [
959
- inverse * (-edge1[0] * deltaUv2[0] + edge2[0] * deltaUv1[0]),
960
- inverse * (-edge1[1] * deltaUv2[0] + edge2[1] * deltaUv1[0]),
961
- inverse * (-edge1[2] * deltaUv2[0] + edge2[2] * deltaUv1[0]),
962
- ],
963
- [0, 0, 1]
964
- );
965
- return { tangent, bitangent };
966
- }
967
-
968
- function applyNormalMap(normal, tangent, bitangent, normalTexture, uv) {
969
- if (!normalTexture) {
970
- return normalizeVectorOrFallback(normal, [0, 1, 0]);
971
- }
972
- const sample = sampleTextureRgba(normalTexture, uv, "linear");
973
- const strength = clampUnit(normalTexture.scale ?? 1);
974
- const tangentNormal = normalize(
975
- [
976
- (sample[0] * 2 - 1) * strength,
977
- (sample[1] * 2 - 1) * strength,
978
- 1 + (sample[2] * 2 - 1 - 1) * strength,
979
- ],
980
- [0, 0, 1]
981
- );
982
- return normalize(
983
- [
984
- tangent[0] * tangentNormal[0] + bitangent[0] * tangentNormal[1] + normal[0] * tangentNormal[2],
985
- tangent[1] * tangentNormal[0] + bitangent[1] * tangentNormal[1] + normal[1] * tangentNormal[2],
986
- tangent[2] * tangentNormal[0] + bitangent[2] * tangentNormal[1] + normal[2] * tangentNormal[2],
987
- ],
988
- normal
989
- );
990
- }
991
-
992
- function sampleBaseColor(mesh, uv) {
993
- const sample = mesh.baseColorTexture ? sampleTextureRgba(mesh.baseColorTexture, uv, "srgb") : [1, 1, 1, 1];
994
- return [
995
- clampUnit(mesh.color[0] * sample[0]),
996
- clampUnit(mesh.color[1] * sample[1]),
997
- clampUnit(mesh.color[2] * sample[2]),
998
- clampUnit((mesh.color[3] ?? 1) * sample[3]),
999
- ];
1000
- }
1001
-
1002
- function sampleSurfaceMaterial(mesh, uv) {
1003
- const textureSample = mesh.metallicRoughnessTexture
1004
- ? sampleTextureRgba(mesh.metallicRoughnessTexture, uv, "linear")
1005
- : [1, 1, 1, 1];
1006
- return {
1007
- roughness: clamp(mesh.roughness * textureSample[1], 0, 1),
1008
- metallic: clamp(mesh.metallic * textureSample[2], 0, 1),
1009
- };
1010
- }
1011
-
1012
- function averageColors(colors) {
1013
- const count = Math.max(colors.length, 1);
1014
- return colors.reduce(
1015
- (accumulator, color) => [
1016
- accumulator[0] + color[0] / count,
1017
- accumulator[1] + color[1] / count,
1018
- accumulator[2] + color[2] / count,
1019
- accumulator[3] + color[3] / count,
1020
- ],
1021
- [0, 0, 0, 0]
1022
- );
1023
- }
1024
-
1025
- function averageNumbers(values, fallback = 0) {
1026
- if (!Array.isArray(values) || values.length === 0) {
1027
- return fallback;
1028
- }
1029
- return values.reduce((sum, value) => sum + value, 0) / values.length;
1030
- }
1031
-
1032
- function createMeshTriangleRecords(meshes) {
937
+ function createMeshTriangleRecords(meshes, gpuMaterialSource = null) {
1033
938
  const source = Array.isArray(meshes) ? meshes : [];
939
+ const resolvedMaterialSource = gpuMaterialSource ?? createWavefrontGpuMaterialSource(source);
1034
940
  let nextTriangleId = 0;
1035
941
  return source.flatMap((meshInput, meshIndex) => {
1036
942
  const mesh = normalizeWavefrontMesh(meshInput, meshIndex);
@@ -1049,16 +955,6 @@ function createMeshTriangleRecords(meshes) {
1049
955
  const uv0 = mesh.uvs ? readVector2(mesh.uvs, a) : [0, 0];
1050
956
  const uv1 = mesh.uvs ? readVector2(mesh.uvs, b) : [0, 0];
1051
957
  const uv2 = mesh.uvs ? readVector2(mesh.uvs, c) : [0, 0];
1052
- const tangentBasis = buildTriangleTangentBasis(v0, v1, v2, uv0, uv1, uv2, faceNormal);
1053
- const shadedN0 = applyNormalMap(n0, tangentBasis.tangent, tangentBasis.bitangent, mesh.normalTexture, uv0);
1054
- const shadedN1 = applyNormalMap(n1, tangentBasis.tangent, tangentBasis.bitangent, mesh.normalTexture, uv1);
1055
- const shadedN2 = applyNormalMap(n2, tangentBasis.tangent, tangentBasis.bitangent, mesh.normalTexture, uv2);
1056
- const sampledColors = [sampleBaseColor(mesh, uv0), sampleBaseColor(mesh, uv1), sampleBaseColor(mesh, uv2)];
1057
- const sampledMaterials = [
1058
- sampleSurfaceMaterial(mesh, uv0),
1059
- sampleSurfaceMaterial(mesh, uv1),
1060
- sampleSurfaceMaterial(mesh, uv2),
1061
- ];
1062
958
  const bounds = triangleBounds(v0, v1, v2);
1063
959
 
1064
960
  triangles.push(
@@ -1073,17 +969,17 @@ function createMeshTriangleRecords(meshes) {
1073
969
  v0: Object.freeze(v0),
1074
970
  v1: Object.freeze(v1),
1075
971
  v2: Object.freeze(v2),
1076
- n0: Object.freeze(shadedN0),
1077
- n1: Object.freeze(shadedN1),
1078
- n2: Object.freeze(shadedN2),
972
+ n0: Object.freeze(n0),
973
+ n1: Object.freeze(n1),
974
+ n2: Object.freeze(n2),
1079
975
  uv0: Object.freeze(uv0),
1080
976
  uv1: Object.freeze(uv1),
1081
977
  uv2: Object.freeze(uv2),
1082
- color: Object.freeze(averageColors(sampledColors)),
978
+ color: mesh.color,
1083
979
  emission: mesh.emission,
1084
980
  material: Object.freeze([
1085
- averageNumbers(sampledMaterials.map((sample) => sample.roughness), mesh.roughness),
1086
- averageNumbers(sampledMaterials.map((sample) => sample.metallic), mesh.metallic),
981
+ mesh.roughness,
982
+ mesh.metallic,
1087
983
  mesh.opacity,
1088
984
  mesh.ior,
1089
985
  ]),
@@ -1105,6 +1001,27 @@ function createMeshTriangleRecords(meshes) {
1105
1001
  mesh.specularColor[2] ?? 1,
1106
1002
  1,
1107
1003
  ]),
1004
+ baseColorAtlas: Object.freeze(
1005
+ resolvedMaterialSource.baseColorAtlas.resolveRect(mesh.baseColorTexture)
1006
+ ),
1007
+ metallicRoughnessAtlas: Object.freeze(
1008
+ resolvedMaterialSource.metallicRoughnessAtlas.resolveRect(mesh.metallicRoughnessTexture)
1009
+ ),
1010
+ normalAtlas: Object.freeze(
1011
+ resolvedMaterialSource.normalAtlas.resolveRect(mesh.normalTexture)
1012
+ ),
1013
+ occlusionAtlas: Object.freeze(
1014
+ resolvedMaterialSource.occlusionAtlas.resolveRect(mesh.occlusionTexture)
1015
+ ),
1016
+ emissiveAtlas: Object.freeze(
1017
+ resolvedMaterialSource.emissiveAtlas.resolveRect(mesh.emissiveTexture)
1018
+ ),
1019
+ textureSettings: Object.freeze([
1020
+ clampUnit(mesh.normalTexture?.scale ?? mesh.normalTexture?.strength ?? 1),
1021
+ clampUnit(mesh.occlusionTexture?.strength ?? 1),
1022
+ clampUnit(mesh.emissiveTexture?.strength ?? 1),
1023
+ 0,
1024
+ ]),
1108
1025
  bounds: Object.freeze({
1109
1026
  min: Object.freeze(bounds.min),
1110
1027
  max: Object.freeze(bounds.max),
@@ -1187,9 +1104,10 @@ function buildBvh(triangles, maxLeafTriangles = 4) {
1187
1104
  });
1188
1105
  }
1189
1106
 
1190
- export function createWavefrontMeshAcceleration(meshes = []) {
1107
+ export function createWavefrontMeshAcceleration(meshes = [], gpuMaterialSource = null) {
1191
1108
  const source = Array.isArray(meshes) ? meshes : [meshes];
1192
- const triangles = createMeshTriangleRecords(source);
1109
+ const resolvedMaterialSource = gpuMaterialSource ?? createWavefrontGpuMaterialSource(source);
1110
+ const triangles = createMeshTriangleRecords(source, resolvedMaterialSource);
1193
1111
  return buildBvh(triangles);
1194
1112
  }
1195
1113
 
@@ -1524,12 +1442,13 @@ export function createWavefrontBvhBuildLevels(triangleCountInput) {
1524
1442
  }
1525
1443
 
1526
1444
  function resolveAccelerationBuildMode(options = {}) {
1527
- const mode = options.accelerationBuildMode ?? (options.displayQuality === true ? "gpu" : "cpu-debug");
1528
- if (mode !== "gpu" && mode !== "cpu-debug") {
1529
- throw new Error("accelerationBuildMode must be either \"gpu\" or \"cpu-debug\".");
1530
- }
1531
- if (options.displayQuality === true && mode !== "gpu") {
1532
- throw new Error("Display-quality path tracing requires GPU-built mesh acceleration.");
1445
+ const requestedMode =
1446
+ options.accelerationBuildMode ?? (options.displayQuality === true ? "cpu-upload" : "cpu-debug");
1447
+ const mode = requestedMode === "cpu-debug" ? "cpu-upload" : requestedMode;
1448
+ if (mode !== "gpu" && mode !== "cpu-upload") {
1449
+ throw new Error(
1450
+ "accelerationBuildMode must be either \"gpu\", \"cpu-upload\", or the legacy alias \"cpu-debug\"."
1451
+ );
1533
1452
  }
1534
1453
  return mode;
1535
1454
  }
@@ -2081,8 +2000,8 @@ export function createWavefrontPathTracingComputeConfig(options = {}) {
2081
2000
  ? createWavefrontGpuMeshSource(meshes, gpuMaterialSource)
2082
2001
  : createWavefrontGpuMeshSource([]);
2083
2002
  const meshAcceleration =
2084
- accelerationBuildMode === "cpu-debug"
2085
- ? createWavefrontMeshAcceleration(meshes)
2003
+ accelerationBuildMode === "cpu-upload"
2004
+ ? createWavefrontMeshAcceleration(meshes, gpuMaterialSource)
2086
2005
  : Object.freeze({ nodes: Object.freeze([]), triangles: Object.freeze([]) });
2087
2006
  const emissiveTriangleIndices = createWavefrontEmissiveTriangleIndexSource(
2088
2007
  meshes,
@@ -6076,9 +5995,28 @@ function nowMs() {
6076
5995
  return Date.now();
6077
5996
  }
6078
5997
 
6079
- function estimateSubmittedGpuWorkTimeoutMs(config, tileCount, overrideTimeoutMs = null) {
5998
+ function estimateAccelerationBuildWaitFactor(config) {
5999
+ if (config?.gpuAccelerationBuildRequired !== true) {
6000
+ return 1;
6001
+ }
6002
+ const bvhSortStageCount = Array.isArray(config?.bvhSortStages) ? config.bvhSortStages.length : 0;
6003
+ const bvhBuildLevelCount = Array.isArray(config?.bvhBuildLevels) ? config.bvhBuildLevels.length : 0;
6004
+ const accelerationStageCount = 2 + bvhSortStageCount + bvhBuildLevelCount;
6005
+ return Math.max(1, 1 + accelerationStageCount / 96);
6006
+ }
6007
+
6008
+ function estimateSubmittedGpuWorkTiming(
6009
+ config,
6010
+ tileCount,
6011
+ overrideTimeoutMs = null,
6012
+ options = {}
6013
+ ) {
6080
6014
  if (Number.isFinite(overrideTimeoutMs)) {
6081
- return Math.max(1, Math.trunc(Number(overrideTimeoutMs)));
6015
+ const overrideMs = Math.max(1, Math.trunc(Number(overrideTimeoutMs)));
6016
+ return Object.freeze({
6017
+ timeoutMs: overrideMs,
6018
+ maxWaitMs: overrideMs,
6019
+ });
6082
6020
  }
6083
6021
  const samplesPerPixel = Math.max(
6084
6022
  1,
@@ -6090,10 +6028,28 @@ function estimateSubmittedGpuWorkTimeoutMs(config, tileCount, overrideTimeoutMs
6090
6028
  const tiles = Math.max(1, Number(tileCount ?? 1));
6091
6029
  const estimatedPasses =
6092
6030
  tiles * (samplesPerPixel * (maxDepth + 1 + deferredResolvePasses) + denoisePasses + 1);
6093
- return Math.min(
6031
+ const triangleCount = Math.max(0, Number(config?.triangleCount ?? 0));
6032
+ const geometryFactor = Math.max(1, triangleCount / 131072);
6033
+ const includeAccelerationBuild = options.includeAccelerationBuild === true;
6034
+ const accelerationFactor = includeAccelerationBuild
6035
+ ? estimateAccelerationBuildWaitFactor(config)
6036
+ : 1;
6037
+ const estimatedWindowMs = Math.round(
6038
+ (GPU_SUBMITTED_WORK_TIMEOUT_MS + estimatedPasses * 5) * geometryFactor * accelerationFactor
6039
+ );
6040
+ const timeoutMs = Math.min(
6094
6041
  GPU_MAX_SUBMITTED_WORK_TIMEOUT_MS,
6095
- GPU_SUBMITTED_WORK_TIMEOUT_MS + estimatedPasses * 5
6042
+ Math.max(GPU_SUBMITTED_WORK_TIMEOUT_MS, estimatedWindowMs)
6043
+ );
6044
+ const maxWaitMultiplier = includeAccelerationBuild ? 3 : 2;
6045
+ const maxWaitMs = Math.min(
6046
+ GPU_MAX_SUBMITTED_WORK_DEADLINE_MS,
6047
+ Math.max(timeoutMs, estimatedWindowMs * maxWaitMultiplier)
6096
6048
  );
6049
+ return Object.freeze({
6050
+ timeoutMs,
6051
+ maxWaitMs,
6052
+ });
6097
6053
  }
6098
6054
 
6099
6055
  export async function createWavefrontPathTracingComputeRenderer(options = {}) {
@@ -7124,6 +7080,10 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7124
7080
  ? Number(options.timeoutMs)
7125
7081
  : GPU_SUBMITTED_WORK_TIMEOUT_MS
7126
7082
  );
7083
+ const maxWaitMs = Math.max(
7084
+ timeoutMs,
7085
+ Number.isFinite(options.maxWaitMs) ? Number(options.maxWaitMs) : timeoutMs
7086
+ );
7127
7087
  const allowTimeout = options.allowTimeout !== false;
7128
7088
  const completionPromise = device.queue.onSubmittedWorkDone().then(
7129
7089
  () => ({ status: "done" }),
@@ -7139,47 +7099,62 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7139
7099
  );
7140
7100
  })
7141
7101
  : null;
7142
- let timeoutHandle = null;
7143
- let resolveTimeoutPromise = null;
7144
- let timeoutSettled = false;
7145
- const settleTimeoutPromise = (value) => {
7146
- if (timeoutSettled) {
7147
- return;
7102
+ const startedAtMs = nowMs();
7103
+ while (true) {
7104
+ const elapsedMs = Math.max(0, nowMs() - startedAtMs);
7105
+ const remainingMs = Math.max(0, maxWaitMs - elapsedMs);
7106
+ if (remainingMs <= 0) {
7107
+ if (!allowTimeout) {
7108
+ throw new Error(`Timed out after ${Math.round(maxWaitMs)} ms waiting for submitted GPU work.`);
7109
+ }
7110
+ console.warn(
7111
+ `[plasius.wavefront] Submitted GPU work did not report completion within ${Math.round(maxWaitMs)} ms; continuing.`
7112
+ );
7113
+ return false;
7148
7114
  }
7149
- timeoutSettled = true;
7150
- resolveTimeoutPromise?.(value);
7151
- };
7152
- const timeoutPromise = new Promise((resolve) => {
7153
- resolveTimeoutPromise = resolve;
7154
- timeoutHandle = setTimeout(() => settleTimeoutPromise({ status: "timeout" }), timeoutMs);
7155
- });
7156
- let result;
7157
- try {
7158
- result = await Promise.race(
7159
- [completionPromise, timeoutPromise, lossPromise].filter(Boolean)
7160
- );
7161
- } finally {
7162
- if (timeoutHandle !== null) {
7163
- clearTimeout(timeoutHandle);
7164
- settleTimeoutPromise({ status: "cancelled" });
7115
+ const waitWindowMs = Math.max(1, Math.min(timeoutMs, remainingMs));
7116
+ let timeoutHandle = null;
7117
+ let resolveTimeoutPromise = null;
7118
+ let timeoutSettled = false;
7119
+ const settleTimeoutPromise = (value) => {
7120
+ if (timeoutSettled) {
7121
+ return;
7122
+ }
7123
+ timeoutSettled = true;
7124
+ resolveTimeoutPromise?.(value);
7125
+ };
7126
+ const timeoutPromise = new Promise((resolve) => {
7127
+ resolveTimeoutPromise = resolve;
7128
+ timeoutHandle = setTimeout(
7129
+ () => settleTimeoutPromise({ status: "timeout" }),
7130
+ waitWindowMs
7131
+ );
7132
+ });
7133
+ let result;
7134
+ try {
7135
+ result = await Promise.race(
7136
+ [completionPromise, timeoutPromise, lossPromise].filter(Boolean)
7137
+ );
7138
+ } finally {
7139
+ if (timeoutHandle !== null) {
7140
+ clearTimeout(timeoutHandle);
7141
+ settleTimeoutPromise({ status: "cancelled" });
7142
+ }
7165
7143
  }
7166
- }
7167
- if (result?.status === "timeout") {
7168
- if (!allowTimeout) {
7169
- throw new Error(`Timed out after ${timeoutMs} ms waiting for submitted GPU work.`);
7144
+ if (result?.status === "done") {
7145
+ return true;
7146
+ }
7147
+ if (result?.status !== "timeout") {
7148
+ return true;
7170
7149
  }
7171
- console.warn(
7172
- `[plasius.wavefront] Submitted GPU work did not report completion within ${timeoutMs} ms; continuing.`
7173
- );
7174
- return false;
7175
7150
  }
7176
- return true;
7177
7151
  }
7178
7152
 
7179
7153
  function dispatchFrameAwaitingGpu(
7180
7154
  frameIndex,
7181
7155
  parallelism,
7182
- renderedSamplesPerPixel = config.samplesPerPixel
7156
+ renderedSamplesPerPixel = config.samplesPerPixel,
7157
+ optionsForFrame = {}
7183
7158
  ) {
7184
7159
  const samplePassesPerSample = config.maxDepth + 1 + (config.deferredPathResolve ? 1 : 0);
7185
7160
  const denoisePassCount = config.denoise ? (renderedSamplesPerPixel < 4 ? 2 : 1) : 0;
@@ -7188,25 +7163,50 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7188
7163
  1,
7189
7164
  Math.floor(
7190
7165
  Math.max(config.maxFramePassesPerSubmission - tailPassCount, 1) /
7191
- Math.max(samplePassesPerSample, 1)
7166
+ Math.max(samplePassesPerSample, 1)
7192
7167
  )
7193
7168
  );
7194
- let submissionCount = 0;
7169
+ const sampleRangeStart = clamp(
7170
+ readNonNegativeInteger("sampleRangeStart", optionsForFrame.sampleRangeStart, 0),
7171
+ 0,
7172
+ renderedSamplesPerPixel
7173
+ );
7174
+ const sampleRangeEnd = clamp(
7175
+ readPositiveInteger("sampleRangeEnd", optionsForFrame.sampleRangeEnd, renderedSamplesPerPixel),
7176
+ sampleRangeStart,
7177
+ renderedSamplesPerPixel
7178
+ );
7179
+ const includeDenoise = optionsForFrame.includeDenoise === true;
7180
+ const includePresent = optionsForFrame.includePresent === true;
7181
+ const tileStartIndex = clamp(
7182
+ readNonNegativeInteger("tileStartIndex", optionsForFrame.tileStartIndex, 0),
7183
+ 0,
7184
+ tiles.length
7185
+ );
7186
+ const tileEndIndex = clamp(
7187
+ readPositiveInteger("tileEndIndex", optionsForFrame.tileEndIndex, tiles.length),
7188
+ tileStartIndex,
7189
+ tiles.length
7190
+ );
7191
+ let submissionCount = Math.max(
7192
+ 0,
7193
+ readNonNegativeInteger("startingSubmissionCount", optionsForFrame.startingSubmissionCount, 0)
7194
+ );
7195
+ let slot = Math.max(0, readNonNegativeInteger("startingSlot", optionsForFrame.startingSlot, 0));
7195
7196
 
7196
- for (const tile of tiles) {
7197
+ for (const tile of tiles.slice(tileStartIndex, tileEndIndex)) {
7197
7198
  for (
7198
- let sampleStart = 0;
7199
- sampleStart < renderedSamplesPerPixel;
7199
+ let sampleStart = sampleRangeStart;
7200
+ sampleStart < sampleRangeEnd;
7200
7201
  sampleStart += sampleBatchSize
7201
7202
  ) {
7202
- const sampleEnd = Math.min(renderedSamplesPerPixel, sampleStart + sampleBatchSize);
7203
+ const sampleEnd = Math.min(sampleRangeEnd, sampleStart + sampleBatchSize);
7203
7204
  const batch = createGpuSubmissionBatcher({
7204
7205
  device,
7205
7206
  frameIndex,
7206
7207
  maxFramePassesPerSubmission: config.maxFramePassesPerSubmission,
7207
7208
  startingSubmissionCount: submissionCount,
7208
7209
  });
7209
- let slot = 0;
7210
7210
  for (let sampleIndex = sampleStart; sampleIndex < sampleEnd; sampleIndex += 1) {
7211
7211
  const configOffset = writeFrameConfigSlot(slot, tile, frameIndex, {
7212
7212
  sampleIndex,
@@ -7223,11 +7223,12 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7223
7223
  encodeTileOutput(batch.reserve(1), tile, configOffset, parallelism);
7224
7224
  }
7225
7225
  }
7226
- if (!config.deferredPathResolve && sampleEnd >= renderedSamplesPerPixel) {
7226
+ if (!config.deferredPathResolve && sampleRangeEnd >= renderedSamplesPerPixel) {
7227
7227
  const outputConfigOffset = writeFrameConfigSlot(slot, tile, frameIndex, {
7228
7228
  sampleIndex: 0,
7229
7229
  sampleWeight: 1 / renderedSamplesPerPixel,
7230
7230
  });
7231
+ slot += 1;
7231
7232
  encodeTileOutput(batch.reserve(1), tile, outputConfigOffset, parallelism);
7232
7233
  }
7233
7234
  batch.flush();
@@ -7235,30 +7236,38 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7235
7236
  }
7236
7237
  }
7237
7238
 
7238
- const tail = createGpuSubmissionBatcher({
7239
- device,
7240
- frameIndex,
7241
- maxFramePassesPerSubmission: config.maxFramePassesPerSubmission,
7242
- startingSubmissionCount: submissionCount,
7243
- });
7244
- if (config.denoise) {
7245
- const denoiseConfigOffset = writeFrameConfigSlot(
7246
- 0,
7247
- { x: 0, y: 0, width: config.width, height: config.height },
7239
+ if (includeDenoise || includePresent) {
7240
+ const tail = createGpuSubmissionBatcher({
7241
+ device,
7248
7242
  frameIndex,
7249
- { sampleIndex: 0, sampleWeight: 1 / renderedSamplesPerPixel }
7250
- );
7251
- encodeDenoise(
7252
- tail.reserve(denoisePassCount),
7253
- denoiseConfigOffset,
7254
- parallelism,
7255
- renderedSamplesPerPixel
7256
- );
7243
+ maxFramePassesPerSubmission: config.maxFramePassesPerSubmission,
7244
+ startingSubmissionCount: submissionCount,
7245
+ });
7246
+ if (includeDenoise && config.denoise) {
7247
+ const denoiseConfigOffset = writeFrameConfigSlot(
7248
+ slot,
7249
+ { x: 0, y: 0, width: config.width, height: config.height },
7250
+ frameIndex,
7251
+ { sampleIndex: 0, sampleWeight: 1 / renderedSamplesPerPixel }
7252
+ );
7253
+ slot += 1;
7254
+ encodeDenoise(
7255
+ tail.reserve(denoisePassCount),
7256
+ denoiseConfigOffset,
7257
+ parallelism,
7258
+ renderedSamplesPerPixel
7259
+ );
7260
+ }
7261
+ if (includePresent) {
7262
+ encodePresent(tail.reserve(1));
7263
+ }
7264
+ tail.flush();
7265
+ submissionCount += tail.getSubmissionCount();
7257
7266
  }
7258
- encodePresent(tail.reserve(1));
7259
- tail.flush();
7260
- submissionCount += tail.getSubmissionCount();
7261
- return submissionCount;
7267
+ return Object.freeze({
7268
+ submissionCount,
7269
+ slot,
7270
+ });
7262
7271
  }
7263
7272
 
7264
7273
  async function readOutputProbe(optionsForProbe = {}) {
@@ -7307,26 +7316,59 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7307
7316
  const samplingPlan = resolveRenderedSamplesPerPixel(renderOptions, awaitGPUCompletion);
7308
7317
  const useThrottledHighSamplePath =
7309
7318
  awaitGPUCompletion && samplingPlan.renderedSamplesPerPixel >= 8;
7310
- const submittedWorkTimeoutMs = estimateSubmittedGpuWorkTimeoutMs(
7311
- { ...config, renderedSamplesPerPixel: samplingPlan.renderedSamplesPerPixel },
7312
- tiles.length,
7313
- renderOptions.submittedWorkTimeoutMs
7314
- );
7315
7319
  const frameStartTimeMs = nowMs();
7316
- const submissionWaitOptions = awaitGPUCompletion
7317
- ? { timeoutMs: submittedWorkTimeoutMs, allowTimeout: false }
7318
- : { timeoutMs: submittedWorkTimeoutMs };
7319
7320
  let frameStats;
7320
7321
  if (useThrottledHighSamplePath) {
7321
7322
  frame += 1;
7322
7323
  const frameIndex = frame + config.frameIndex;
7323
7324
  const parallelismCounters = createGpuParallelismCounters();
7324
7325
  const accelerationBuildSubmitted = dispatchGpuAccelerationBuild(frameIndex, parallelismCounters);
7325
- const frameSubmissionCount = dispatchFrameAwaitingGpu(
7326
- frameIndex,
7327
- parallelismCounters,
7328
- samplingPlan.renderedSamplesPerPixel
7329
- );
7326
+ let frameSubmissionCount = 0;
7327
+ let frameConfigSlot = 0;
7328
+ if (accelerationBuildSubmitted) {
7329
+ const accelerationWaitOptions = {
7330
+ ...estimateSubmittedGpuWorkTiming(
7331
+ { ...config, renderedSamplesPerPixel: 1 },
7332
+ 1,
7333
+ renderOptions.submittedWorkTimeoutMs,
7334
+ { includeAccelerationBuild: true }
7335
+ ),
7336
+ allowTimeout: false,
7337
+ };
7338
+ await waitForSubmittedGpuWork(accelerationWaitOptions);
7339
+ }
7340
+ for (let tileIndex = 0; tileIndex < tiles.length; tileIndex += 1) {
7341
+ const tileRangeDispatch = dispatchFrameAwaitingGpu(
7342
+ frameIndex,
7343
+ parallelismCounters,
7344
+ samplingPlan.renderedSamplesPerPixel,
7345
+ {
7346
+ sampleRangeStart: 0,
7347
+ sampleRangeEnd: samplingPlan.renderedSamplesPerPixel,
7348
+ tileStartIndex: tileIndex,
7349
+ tileEndIndex: tileIndex + 1,
7350
+ startingSubmissionCount: frameSubmissionCount,
7351
+ startingSlot: frameConfigSlot,
7352
+ includeDenoise: tileIndex + 1 >= tiles.length,
7353
+ includePresent: tileIndex + 1 >= tiles.length,
7354
+ }
7355
+ );
7356
+ frameSubmissionCount = tileRangeDispatch.submissionCount;
7357
+ frameConfigSlot = tileRangeDispatch.slot;
7358
+ const tileWaitOptions = {
7359
+ ...estimateSubmittedGpuWorkTiming(
7360
+ { ...config, renderedSamplesPerPixel: samplingPlan.renderedSamplesPerPixel },
7361
+ 1,
7362
+ renderOptions.submittedWorkTimeoutMs,
7363
+ {
7364
+ includeDenoise: tileIndex + 1 >= tiles.length && config.denoise,
7365
+ includePresent: tileIndex + 1 >= tiles.length,
7366
+ }
7367
+ ),
7368
+ allowTimeout: false,
7369
+ };
7370
+ await waitForSubmittedGpuWork(tileWaitOptions);
7371
+ }
7330
7372
  frameStats = createFrameStats({
7331
7373
  frameIndex,
7332
7374
  accelerationBuildSubmitted,
@@ -7338,10 +7380,26 @@ export async function createWavefrontPathTracingComputeRenderer(options = {}) {
7338
7380
  budgetConstrained: samplingPlan.budgetConstrained,
7339
7381
  });
7340
7382
  } else {
7383
+ const submittedWorkTiming = estimateSubmittedGpuWorkTiming(
7384
+ { ...config, renderedSamplesPerPixel: samplingPlan.renderedSamplesPerPixel },
7385
+ tiles.length,
7386
+ renderOptions.submittedWorkTimeoutMs,
7387
+ { includeAccelerationBuild: config.gpuAccelerationBuildRequired && !accelerationBuilt }
7388
+ );
7389
+ const submissionWaitOptions = awaitGPUCompletion
7390
+ ? {
7391
+ timeoutMs: submittedWorkTiming.timeoutMs,
7392
+ maxWaitMs: submittedWorkTiming.maxWaitMs,
7393
+ allowTimeout: false,
7394
+ }
7395
+ : {
7396
+ timeoutMs: submittedWorkTiming.timeoutMs,
7397
+ maxWaitMs: submittedWorkTiming.maxWaitMs,
7398
+ };
7341
7399
  frameStats = renderOnce(renderOptions, samplingPlan);
7342
- }
7343
- if (awaitGPUCompletion) {
7344
- await waitForSubmittedGpuWork(submissionWaitOptions);
7400
+ if (awaitGPUCompletion) {
7401
+ await waitForSubmittedGpuWork(submissionWaitOptions);
7402
+ }
7345
7403
  }
7346
7404
  const frameTimeMs = Math.max(0, nowMs() - frameStartTimeMs);
7347
7405
  if (awaitGPUCompletion) {