@gridspace/raster-path 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,27 +48,33 @@
48
48
  * ═══════════════════════════════════════════════════════════════════════════
49
49
  */
50
50
 
51
+ let config = {};
51
52
  let device = null;
53
+ let deviceCapabilities = null;
52
54
  let isInitialized = false;
53
55
  let cachedRasterizePipeline = null;
54
56
  let cachedRasterizeShaderModule = null;
55
57
  let cachedToolpathPipeline = null;
56
58
  let cachedToolpathShaderModule = null;
57
- let config = null;
58
- let deviceCapabilities = null;
59
+ let cachedRadialBatchPipeline = null;
60
+ let cachedRadialBatchShaderModule = null;
61
+ let lastlog;
59
62
 
60
63
  const EMPTY_CELL = -1e10;
61
- const log_pre = '[Raster Worker]';
62
-
63
- // url params to control logging
64
- let { search } = self.location;
65
- let verbose = search.indexOf('debug') >= 0;
66
- let quiet = search.indexOf('quiet') >= 0;
64
+ const log_pre = '[Worker]';
65
+ const diagnostic = false;
67
66
 
68
67
  const debug = {
69
68
  error: function() { console.error(log_pre, ...arguments) },
70
69
  warn: function() { console.warn(log_pre, ...arguments) },
71
- log: function() { !quiet && console.log(log_pre, ...arguments) },
70
+ log: function() {
71
+ if (!config.quiet) {
72
+ let now = performance.now();
73
+ let since = ((now - (lastlog ?? now)) | 0).toString().padStart(4,' ');
74
+ console.log(log_pre, `[${since}]`, ...arguments);
75
+ lastlog = now;
76
+ }
77
+ },
72
78
  ok: function() { console.log(log_pre, '✅', ...arguments) },
73
79
  };
74
80
 
@@ -140,6 +146,15 @@ async function initWebGPU() {
140
146
  compute: { module: cachedToolpathShaderModule, entryPoint: 'main' },
141
147
  });
142
148
 
149
+ // Pre-compile radial batch shader module
150
+ cachedRadialBatchShaderModule = device.createShaderModule({ code: radialRasterizeShaderCode });
151
+
152
+ // Pre-create radial batch pipeline
153
+ cachedRadialBatchPipeline = device.createComputePipeline({
154
+ layout: 'auto',
155
+ compute: { module: cachedRadialBatchShaderModule, entryPoint: 'main' },
156
+ });
157
+
143
158
  // Store device capabilities
144
159
  deviceCapabilities = {
145
160
  maxStorageBufferBindingSize: device.limits.maxStorageBufferBindingSize,
@@ -459,8 +474,8 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
459
474
  }
460
475
  `;
461
476
 
462
- // Radial V2: Rasterization with rotating ray planes and X-bucketing
463
- const radialRasterizeV2ShaderCode = `// Radial V2 rasterization with X-bucketing and rotating ray planes
477
+ // Radial: Rasterization with rotating ray planes and X-bucketing
478
+ const radialRasterizeShaderCode = `// Radial V2 rasterization with X-bucketing and rotating ray planes
464
479
  // Sentinel value for empty cells (far below any real geometry)
465
480
  const EMPTY_CELL: f32 = -1e10;
466
481
  const PI: f32 = 3.14159265359;
@@ -479,6 +494,7 @@ struct Uniforms {
479
494
  filter_mode: u32, // 0 = max Z (terrain), 1 = min Z (tool)
480
495
  num_buckets: u32, // Total number of X-buckets
481
496
  start_angle: f32, // Starting angle offset in radians (for batching)
497
+ bucket_offset: u32, // Offset for bucket batching (bucket_idx in batch writes to bucket_offset + bucket_idx in output)
482
498
  }
483
499
 
484
500
  struct BucketInfo {
@@ -587,6 +603,10 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
587
603
 
588
604
  // Step 2: Rotate position (scan_x, scan_y, scan_z) around X-axis by 'angle'
589
605
  // X stays the same, rotate YZ plane: y' = y*cos - z*sin, z' = y*sin + z*cos
606
+ // NOTE: This uses right-handed rotation (positive angle rotates +Y towards +Z)
607
+ // To reverse rotation direction (left-handed or opposite), flip signs:
608
+ // y' = y*cos + z*sin (flip sign on z term)
609
+ // z' = -y*sin + z*cos (flip sign on y term)
590
610
  let ray_origin_x = scan_x;
591
611
  let ray_origin_y = scan_y * cos(angle) - scan_z * sin(angle);
592
612
  let ray_origin_z = scan_y * sin(angle) + scan_z * cos(angle);
@@ -594,6 +614,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
594
614
 
595
615
  // Step 3: Rotate ray direction (0, 0, -1) around X-axis by 'angle'
596
616
  // X component stays 0, rotate YZ: dy = 0*cos - (-1)*sin = sin, dz = 0*sin + (-1)*cos = -cos
617
+ // NOTE: For reversed rotation, use: vec3<f32>(0.0, -sin(angle), -cos(angle))
597
618
  let ray_dir = vec3<f32>(0.0, sin(angle), -cos(angle));
598
619
 
599
620
  // Initialize best distance (closest hit)
@@ -636,11 +657,11 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
636
657
  }
637
658
 
638
659
  // Write output
639
- // Layout: bucket_idx * numAngles * bucketWidth * gridHeight
660
+ // Layout: (bucket_offset + bucket_idx) * numAngles * bucketWidth * gridHeight
640
661
  // + angle_idx * bucketWidth * gridHeight
641
662
  // + grid_y * bucketWidth
642
663
  // + local_x
643
- let output_idx = bucket_idx * uniforms.num_angles * uniforms.bucket_grid_width * uniforms.grid_y_height
664
+ let output_idx = (uniforms.bucket_offset + bucket_idx) * uniforms.num_angles * uniforms.bucket_grid_width * uniforms.grid_y_height
644
665
  + angle_idx * uniforms.bucket_grid_width * uniforms.grid_y_height
645
666
  + grid_y * uniforms.bucket_grid_width
646
667
  + local_x;
@@ -687,8 +708,6 @@ function buildSpatialGrid(triangles, bounds, cellSize = 5.0) {
687
708
  const gridHeight = Math.max(1, Math.ceil((bounds.max.y - bounds.min.y) / cellSize));
688
709
  const totalCells = gridWidth * gridHeight;
689
710
 
690
- // debug.log(`Building spatial grid ${gridWidth}x${gridHeight} (${cellSize}mm cells)`);
691
-
692
711
  const grid = new Array(totalCells);
693
712
  for (let i = 0; i < totalCells; i++) {
694
713
  grid[i] = [];
@@ -745,7 +764,13 @@ function buildSpatialGrid(triangles, bounds, cellSize = 5.0) {
745
764
  cellOffsets[totalCells] = currentOffset;
746
765
 
747
766
  const avgPerCell = totalTriangleRefs / totalCells;
748
- // debug.log(`Spatial grid: ${totalTriangleRefs} refs (avg ${avgPerCell.toFixed(1)} per cell)`);
767
+
768
+ // Calculate actual tool diameter from bounds for logging
769
+ const toolWidth = bounds.max.x - bounds.min.x;
770
+ const toolHeight = bounds.max.y - bounds.min.y;
771
+ const toolDiameter = Math.max(toolWidth, toolHeight);
772
+
773
+ debug.log(`Spatial grid: ${gridWidth}x${gridHeight} ${totalTriangleRefs} tri-refs ~${avgPerCell.toFixed(0)}/${cellSize}mm cell (tool: ${toolDiameter.toFixed(2)}mm)`);
749
774
 
750
775
  return {
751
776
  gridWidth,
@@ -774,7 +799,7 @@ async function rasterizeMeshSingle(triangles, stepSize, filterMode, options = {}
774
799
  debug.log(`First-time init: ${(initEnd - initStart).toFixed(1)}ms`);
775
800
  }
776
801
 
777
- // debug.log(`Rasterizing ${triangles.length / 9} triangles (step ${stepSize}mm, mode ${filterMode})...`);
802
+ // debug.log(`Raster ${triangles.length / 9} triangles (step ${stepSize}mm, mode ${filterMode})...`);
778
803
 
779
804
  // Extract options
780
805
  // boundsOverride: Optional manual bounds to avoid recalculating from triangles
@@ -805,14 +830,16 @@ async function rasterizeMeshSingle(triangles, stepSize, filterMode, options = {}
805
830
  const floatsPerPoint = filterMode === 0 ? 1 : 3;
806
831
  const outputSize = totalGridPoints * floatsPerPoint * 4;
807
832
  const maxBufferSize = device.limits.maxBufferSize || 268435456; // 256MB default
808
- const modeStr = filterMode === 0 ? 'terrain (dense Z-only)' : 'tool (sparse XYZ)';
833
+ // const modeStr = filterMode === 0 ? 'terrain (dense Z-only)' : 'tool (sparse XYZ)';
809
834
  // debug.log(`Output buffer size: ${(outputSize / 1024 / 1024).toFixed(2)} MB for ${modeStr} (max: ${(maxBufferSize / 1024 / 1024).toFixed(2)} MB)`);
810
835
 
811
836
  if (outputSize > maxBufferSize) {
812
837
  throw new Error(`Output buffer too large: ${(outputSize / 1024 / 1024).toFixed(2)} MB exceeds device limit of ${(maxBufferSize / 1024 / 1024).toFixed(2)} MB. Try a larger step size.`);
813
838
  }
814
839
 
840
+ console.time(`${log_pre} Build Spatial Grid`);
815
841
  const spatialGrid = buildSpatialGrid(triangles, bounds);
842
+ console.timeEnd(`${log_pre} Build Spatial Grid`);
816
843
 
817
844
  // Create buffers
818
845
  const triangleBuffer = device.createBuffer({
@@ -953,7 +980,7 @@ async function rasterizeMeshSingle(triangles, stepSize, filterMode, options = {}
953
980
  result = new Float32Array(outputData);
954
981
  pointCount = totalGridPoints;
955
982
 
956
- if (verbose) {
983
+ if (config.debug) {
957
984
  // Count valid points for logging (sentinel value = -1e10)
958
985
  let zeroCount = 0;
959
986
  let validCount = 0;
@@ -1293,7 +1320,6 @@ async function rasterizeMesh(triangles, stepSize, filterMode, options = {}) {
1293
1320
 
1294
1321
  const tileResult = await rasterizeMeshSingle(triangles, stepSize, filterMode, {
1295
1322
  ...tiles[i].bounds,
1296
- rotationAngleDeg: options.rotationAngleDeg
1297
1323
  });
1298
1324
 
1299
1325
  const tileTime = performance.now() - tileStart;
@@ -2135,412 +2161,22 @@ function stitchToolpathTiles(tileResults, globalBounds, gridStep, xStep, yStep)
2135
2161
  };
2136
2162
  }
2137
2163
 
2138
- // Radial rasterization - two-pass tiled with bit-attention
2139
- // Workload budget: triangles × scanlines should stay under this to avoid timeouts
2140
- const MAX_WORKLOAD_PER_TILE = 10_000_000; // triangles × gridHeight budget per tile
2141
-
2142
- let cachedRadialCullPipeline = null;
2143
- let cachedRadialRasterizePipeline = null;
2144
-
2145
- async function radialRasterize(triangles, stepSize, rotationStepDegrees, zFloor = 0, boundsOverride = null, params = {}) {
2146
- const startTime = performance.now();
2147
-
2148
- if (!isInitialized) {
2149
- await initWebGPU();
2150
- }
2151
-
2152
- // Check if SharedArrayBuffer is available and triangle data is large (>1M triangles = 9M floats = 36MB)
2153
- const numTriangles = triangles.length / 9;
2154
- const useSharedBuffer = typeof SharedArrayBuffer !== 'undefined' &&
2155
- numTriangles > 1000000 &&
2156
- !(triangles.buffer instanceof SharedArrayBuffer);
2157
-
2158
- if (useSharedBuffer) {
2159
- debug.log(`Large dataset (${numTriangles.toLocaleString()} triangles), converting to SharedArrayBuffer`);
2160
- const sab = new SharedArrayBuffer(triangles.byteLength);
2161
- const sharedTriangles = new Float32Array(sab);
2162
- sharedTriangles.set(triangles);
2163
- triangles = sharedTriangles;
2164
- }
2165
-
2166
- const bounds = boundsOverride || calculateBounds(triangles);
2167
-
2168
- // Calculate max radius (distance from X-axis)
2169
- let maxRadius = 0;
2170
- for (let i = 0; i < triangles.length; i += 3) {
2171
- const y = triangles[i + 1];
2172
- const z = triangles[i + 2];
2173
- const radius = Math.sqrt(y * y + z * z);
2174
- maxRadius = Math.max(maxRadius, radius);
2175
- }
2176
-
2177
- // Add margin for ray origins to start outside mesh
2178
- maxRadius *= 1.2;
2179
-
2180
- const circumference = 2 * Math.PI * maxRadius;
2181
- const xRange = bounds.max.x - bounds.min.x;
2182
- const rotationStepRadians = rotationStepDegrees * (Math.PI / 180);
2183
-
2184
- // Calculate grid height (number of angular scanlines)
2185
- const gridHeight = Math.ceil(360 / rotationStepDegrees) + 1;
2186
-
2187
- // Calculate number of tiles based on user config or auto-calculation
2188
- let numTiles, trianglesPerTileTarget;
2189
-
2190
- if (params.trianglesPerTile) {
2191
- // User specified explicit triangles per tile
2192
- trianglesPerTileTarget = params.trianglesPerTile;
2193
- numTiles = Math.max(1, Math.ceil(numTriangles / trianglesPerTileTarget));
2194
- debug.log(`Radial: ${numTriangles} triangles, ${gridHeight} scanlines, ${numTiles} X-tiles (${trianglesPerTileTarget} target tri/tile)`);
2195
- } else {
2196
- // Auto-calculate based on workload budget
2197
- // Total work = numTriangles × gridHeight × culling_efficiency
2198
- // More tiles = smaller X-slices = better culling = less work per tile
2199
- const totalWorkload = numTriangles * gridHeight * 0.5; // 0.5 = expected culling efficiency
2200
- numTiles = Math.max(1, Math.ceil(totalWorkload / MAX_WORKLOAD_PER_TILE));
2201
- trianglesPerTileTarget = Math.ceil(numTriangles / numTiles);
2202
- debug.log(`Radial: ${numTriangles} triangles, ${gridHeight} scanlines, ${numTiles} X-tiles (${trianglesPerTileTarget} avg tri/tile, auto-calculated)`);
2203
- }
2204
-
2205
- const tileWidth = xRange / numTiles;
2206
-
2207
- // Create pipelines on first use
2208
- if (!cachedRadialCullPipeline) {
2209
- const cullShaderModule = device.createShaderModule({ code: radialCullShaderCode });
2210
- cachedRadialCullPipeline = device.createComputePipeline({
2211
- layout: 'auto',
2212
- compute: { module: cullShaderModule, entryPoint: 'main' }
2213
- });
2214
- debug.log('Created radial cull pipeline');
2215
- }
2216
-
2217
- if (!cachedRadialRasterizePipeline) {
2218
- const rasterShaderModule = device.createShaderModule({ code: radialRasterizeShaderCode });
2219
- cachedRadialRasterizePipeline = device.createComputePipeline({
2220
- layout: 'auto',
2221
- compute: { module: rasterShaderModule, entryPoint: 'main' }
2222
- });
2223
- debug.log('Created radial rasterize pipeline');
2224
- }
2225
-
2226
- // Create shared triangle buffer
2227
- const triangleBuffer = device.createBuffer({
2228
- size: triangles.byteLength,
2229
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2230
- });
2231
- device.queue.writeBuffer(triangleBuffer, 0, triangles);
2232
-
2233
- // Calculate attention bit array size
2234
- const numWords = Math.ceil(numTriangles / 32);
2235
- debug.log(`Attention array: ${numWords} words (${numWords * 4} bytes)`);
2236
-
2237
- // Helper function to process a single tile
2238
- async function processTile(tileIdx) {
2239
- const prefix = `Tile ${tileIdx + 1}/${numTiles}:`;
2240
- try {
2241
- const tile_min_x = bounds.min.x + tileIdx * tileWidth;
2242
- const tile_max_x = bounds.min.x + (tileIdx + 1) * tileWidth;
2243
-
2244
- debug.log(`${prefix} X=[${tile_min_x.toFixed(2)}, ${tile_max_x.toFixed(2)}]`);
2245
-
2246
- // Pass 1: Cull triangles for this X-tile
2247
- const tileStartTime = performance.now();
2248
-
2249
- const attentionBuffer = device.createBuffer({
2250
- size: numWords * 4,
2251
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
2252
- });
2253
-
2254
- // Clear attention buffer
2255
- const zeros = new Uint32Array(numWords);
2256
- device.queue.writeBuffer(attentionBuffer, 0, zeros);
2257
-
2258
- const cullUniformData = new Float32Array(4);
2259
- cullUniformData[0] = tile_min_x;
2260
- cullUniformData[1] = tile_max_x;
2261
- const cullUniformU32 = new Uint32Array(cullUniformData.buffer);
2262
- cullUniformU32[2] = numTriangles;
2263
-
2264
- const cullUniformBuffer = device.createBuffer({
2265
- size: 16,
2266
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
2267
- });
2268
- device.queue.writeBuffer(cullUniformBuffer, 0, cullUniformData);
2269
-
2270
- // CRITICAL: Wait for writeBuffer operations before compute dispatch
2271
- await device.queue.onSubmittedWorkDone();
2272
-
2273
- const cullBindGroup = device.createBindGroup({
2274
- layout: cachedRadialCullPipeline.getBindGroupLayout(0),
2275
- entries: [
2276
- { binding: 0, resource: { buffer: triangleBuffer } },
2277
- { binding: 1, resource: { buffer: attentionBuffer } },
2278
- { binding: 2, resource: { buffer: cullUniformBuffer } },
2279
- ],
2280
- });
2281
-
2282
- const cullEncoder = device.createCommandEncoder();
2283
- const cullPass = cullEncoder.beginComputePass();
2284
- cullPass.setPipeline(cachedRadialCullPipeline);
2285
- cullPass.setBindGroup(0, cullBindGroup);
2286
- cullPass.dispatchWorkgroups(Math.ceil(numTriangles / 256));
2287
- cullPass.end();
2288
- device.queue.submit([cullEncoder.finish()]);
2289
- await device.queue.onSubmittedWorkDone();
2290
-
2291
- const cullTime = performance.now() - tileStartTime;
2292
- // debug.log(` Culling: ${cullTime.toFixed(1)}ms`);
2293
-
2294
- // Pass 1.5: Read back attention bits and compact to triangle index list
2295
- const compactStartTime = performance.now();
2296
-
2297
- // Create staging buffer to read attention bits
2298
- const attentionStagingBuffer = device.createBuffer({
2299
- size: numWords * 4,
2300
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
2301
- });
2302
-
2303
- const copyEncoder = device.createCommandEncoder();
2304
- copyEncoder.copyBufferToBuffer(attentionBuffer, 0, attentionStagingBuffer, 0, numWords * 4);
2305
- device.queue.submit([copyEncoder.finish()]);
2306
- await device.queue.onSubmittedWorkDone();
2307
-
2308
- await attentionStagingBuffer.mapAsync(GPUMapMode.READ);
2309
- const attentionBits = new Uint32Array(attentionStagingBuffer.getMappedRange());
2310
-
2311
- // CPU compaction: build list of marked triangle indices
2312
- const compactIndices = [];
2313
- for (let triIdx = 0; triIdx < numTriangles; triIdx++) {
2314
- const wordIdx = Math.floor(triIdx / 32);
2315
- const bitIdx = triIdx % 32;
2316
- const isMarked = (attentionBits[wordIdx] & (1 << bitIdx)) !== 0;
2317
- if (isMarked) {
2318
- compactIndices.push(triIdx);
2319
- }
2320
- }
2321
-
2322
- attentionStagingBuffer.unmap();
2323
- attentionStagingBuffer.destroy();
2324
-
2325
- const compactTime = performance.now() - compactStartTime;
2326
- const cullingEfficiency = ((numTriangles - compactIndices.length) / numTriangles * 100).toFixed(1);
2327
- debug.log(`${prefix} Compacted ${numTriangles} → ${compactIndices.length} triangles (${cullingEfficiency}% culled) in ${compactTime.toFixed(1)}ms`);
2328
-
2329
- // Create compact triangle index buffer
2330
- const compactIndexBuffer = device.createBuffer({
2331
- size: Math.max(4, compactIndices.length * 4), // At least 4 bytes
2332
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2333
- });
2334
- if (compactIndices.length > 0) {
2335
- device.queue.writeBuffer(compactIndexBuffer, 0, new Uint32Array(compactIndices));
2336
- }
2337
-
2338
- // Pass 2: Rasterize this X-tile
2339
- const rasterStartTime = performance.now();
2340
-
2341
- const gridWidth = Math.ceil(tileWidth / stepSize) + 1;
2342
- const gridHeight = Math.ceil(360 / rotationStepDegrees) + 1; // Number of angular samples
2343
- const totalCells = gridWidth * gridHeight;
2344
-
2345
- debug.log(`${prefix} Grid: ${gridWidth}×${gridHeight} = ${totalCells} cells (rotationStep=${rotationStepDegrees}°)`);
2346
-
2347
- const outputBuffer = device.createBuffer({
2348
- size: totalCells * 4,
2349
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
2350
- });
2351
-
2352
- // Initialize with EMPTY_CELL
2353
- const initData = new Float32Array(totalCells);
2354
- initData.fill(EMPTY_CELL);
2355
- device.queue.writeBuffer(outputBuffer, 0, initData);
2356
-
2357
- const rotationOffsetRadians = (params.radialRotationOffset ?? 0) * (Math.PI / 180);
2358
-
2359
- const rasterUniformData = new Float32Array(16);
2360
- rasterUniformData[0] = tile_min_x;
2361
- rasterUniformData[1] = tile_max_x;
2362
- rasterUniformData[2] = maxRadius;
2363
- rasterUniformData[3] = rotationStepRadians;
2364
- rasterUniformData[4] = stepSize;
2365
- rasterUniformData[5] = zFloor;
2366
- rasterUniformData[6] = rotationOffsetRadians;
2367
- rasterUniformData[7] = 0; // padding
2368
- const rasterUniformU32 = new Uint32Array(rasterUniformData.buffer);
2369
- rasterUniformU32[8] = gridWidth;
2370
- rasterUniformU32[9] = gridHeight;
2371
- rasterUniformU32[10] = compactIndices.length; // Use compact count instead of numTriangles
2372
- rasterUniformU32[11] = 0; // No longer using attention words
2373
-
2374
- const rasterUniformBuffer = device.createBuffer({
2375
- size: 64,
2376
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
2377
- });
2378
- device.queue.writeBuffer(rasterUniformBuffer, 0, rasterUniformData);
2379
-
2380
- // CRITICAL: Wait for writeBuffer operations before compute dispatch
2381
- await device.queue.onSubmittedWorkDone();
2382
-
2383
- const rasterBindGroup = device.createBindGroup({
2384
- layout: cachedRadialRasterizePipeline.getBindGroupLayout(0),
2385
- entries: [
2386
- { binding: 0, resource: { buffer: triangleBuffer } },
2387
- { binding: 1, resource: { buffer: compactIndexBuffer } }, // Use compact indices instead of attention bits
2388
- { binding: 2, resource: { buffer: outputBuffer } },
2389
- { binding: 3, resource: { buffer: rasterUniformBuffer } },
2390
- ],
2391
- });
2392
-
2393
- const rasterEncoder = device.createCommandEncoder();
2394
- const rasterPass = rasterEncoder.beginComputePass();
2395
- rasterPass.setPipeline(cachedRadialRasterizePipeline);
2396
- rasterPass.setBindGroup(0, rasterBindGroup);
2397
- const workgroupsX = Math.ceil(gridWidth / 16);
2398
- const workgroupsY = Math.ceil(gridHeight / 16);
2399
- rasterPass.dispatchWorkgroups(workgroupsX, workgroupsY);
2400
- rasterPass.end();
2401
-
2402
- const stagingBuffer = device.createBuffer({
2403
- size: totalCells * 4,
2404
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
2405
- });
2406
-
2407
- rasterEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, totalCells * 4);
2408
- device.queue.submit([rasterEncoder.finish()]);
2409
- await device.queue.onSubmittedWorkDone();
2410
-
2411
- await stagingBuffer.mapAsync(GPUMapMode.READ);
2412
- const outputData = new Float32Array(stagingBuffer.getMappedRange());
2413
- const tileResult = new Float32Array(outputData);
2414
- stagingBuffer.unmap();
2415
-
2416
- const rasterTime = performance.now() - rasterStartTime;
2417
- debug.log(`${prefix} Rasterization: ${rasterTime.toFixed(1)}ms`);
2418
-
2419
- // Cleanup tile buffers
2420
- attentionBuffer.destroy();
2421
- cullUniformBuffer.destroy();
2422
- compactIndexBuffer.destroy();
2423
- outputBuffer.destroy();
2424
- rasterUniformBuffer.destroy();
2425
- stagingBuffer.destroy();
2426
-
2427
- return {
2428
- tileIdx,
2429
- data: tileResult,
2430
- gridWidth,
2431
- gridHeight,
2432
- minX: tile_min_x,
2433
- maxX: tile_max_x
2434
- };
2435
- } catch (error) {
2436
- debug.error(`Error processing tile ${tileIdx + 1}/${numTiles}:`, error);
2437
- throw new Error(`Tile ${tileIdx + 1} failed: ${error.message}`);
2438
- }
2439
- }
2440
-
2441
- // Process tiles with rolling window to maintain constant concurrency
2442
- // This keeps GPU busy while preventing browser from allocating too many buffers at once
2443
- const maxConcurrentTiles = params.maxConcurrentTiles ?? 50;
2444
- debug.log(`Processing ${numTiles} tiles (max ${maxConcurrentTiles} concurrent)...`);
2445
-
2446
- let completedTiles = 0;
2447
- let nextTileIdx = 0;
2448
- const activeTiles = new Map(); // promise -> tileIdx
2449
- const tileResults = new Array(numTiles);
2450
-
2451
- // Helper to start a tile and track it
2452
- const startTile = (tileIdx) => {
2453
- const promise = processTile(tileIdx).then(result => {
2454
- completedTiles++;
2455
- const percent = Math.round((completedTiles / numTiles) * 100);
2456
-
2457
- // Report progress
2458
- self.postMessage({
2459
- type: 'rasterize-progress',
2460
- data: {
2461
- percent,
2462
- current: completedTiles,
2463
- total: numTiles
2464
- }
2465
- });
2466
-
2467
- tileResults[tileIdx] = result;
2468
- activeTiles.delete(promise);
2469
- return result;
2470
- });
2471
- activeTiles.set(promise, tileIdx);
2472
- return promise;
2473
- };
2474
-
2475
- // Start initial window of tiles
2476
- while (nextTileIdx < numTiles && activeTiles.size < maxConcurrentTiles) {
2477
- startTile(nextTileIdx++);
2478
- }
2479
-
2480
- // As tiles complete, start new ones to maintain window size
2481
- while (activeTiles.size > 0) {
2482
- // Wait for at least one tile to complete
2483
- await Promise.race(activeTiles.keys());
2484
-
2485
- // Start as many new tiles as needed to fill window
2486
- while (nextTileIdx < numTiles && activeTiles.size < maxConcurrentTiles) {
2487
- startTile(nextTileIdx++);
2488
- }
2489
- }
2490
-
2491
- triangleBuffer.destroy();
2492
-
2493
- const totalTime = performance.now() - startTime;
2494
- debug.log(`Radial complete in ${totalTime.toFixed(1)}ms`);
2495
-
2496
- // Stitch tiles together into a single dense array
2497
- const fullGridHeight = Math.ceil(360 / rotationStepDegrees) + 1; // Number of angular samples
2498
- const fullGridWidth = Math.ceil(xRange / stepSize) + 1;
2499
- const stitchedData = new Float32Array(fullGridWidth * fullGridHeight);
2500
- stitchedData.fill(EMPTY_CELL);
2501
-
2502
- for (const tile of tileResults) {
2503
- const tileXOffset = Math.round((tile.minX - bounds.min.x) / stepSize);
2504
-
2505
- for (let ty = 0; ty < tile.gridHeight; ty++) {
2506
- for (let tx = 0; tx < tile.gridWidth; tx++) {
2507
- const tileIdx = ty * tile.gridWidth + tx;
2508
- const fullX = tileXOffset + tx;
2509
- const fullY = ty;
2510
-
2511
- if (fullX >= 0 && fullX < fullGridWidth && fullY >= 0 && fullY < fullGridHeight) {
2512
- const fullIdx = fullY * fullGridWidth + fullX;
2513
- stitchedData[fullIdx] = tile.data[tileIdx];
2514
- }
2515
- }
2516
- }
2517
- }
2518
-
2519
- // For toolpath generation, bounds.max.y must match the actual grid dimensions
2520
- // so that createHeightMapFromPoints calculates the correct height:
2521
- // height = ceil((max.y - min.y) / stepSize) + 1 = gridHeight
2522
- // Therefore: max.y = (gridHeight - 1) * stepSize
2523
- const boundsMaxY = (fullGridHeight - 1) * stepSize;
2524
-
2525
- return {
2526
- positions: stitchedData,
2527
- pointCount: stitchedData.length,
2528
- bounds: {
2529
- min: { x: bounds.min.x, y: 0, z: 0 },
2530
- max: { x: bounds.max.x, y: boundsMaxY, z: maxRadius }
2531
- },
2532
- conversionTime: totalTime,
2533
- gridWidth: fullGridWidth,
2534
- gridHeight: fullGridHeight,
2535
- isDense: true,
2536
- maxRadius,
2537
- circumference,
2538
- rotationStepDegrees // NEW: needed for wrapping and toolpath generation
2539
- };
2540
- }
2541
-
2542
- // Radial V2: Rasterize model with rotating ray planes and X-bucketing
2543
- async function radialRasterizeV2(triangles, bucketData, resolution, angleStep, numAngles, maxRadius, toolWidth, zFloor, bounds, startAngle = 0) {
2164
+ // Radial: Rasterize model with rotating ray planes and X-bucketing
2165
+ async function radialRasterize({
2166
+ triangles,
2167
+ bucketData,
2168
+ resolution,
2169
+ angleStep,
2170
+ numAngles,
2171
+ maxRadius,
2172
+ toolWidth,
2173
+ zFloor,
2174
+ bounds,
2175
+ startAngle = 0,
2176
+ reusableBuffers = null,
2177
+ returnBuffersForReuse = false,
2178
+ batchInfo = {}
2179
+ }) {
2544
2180
  if (!device) {
2545
2181
  throw new Error('WebGPU not initialized');
2546
2182
  }
@@ -2567,50 +2203,65 @@ async function radialRasterizeV2(triangles, bucketData, resolution, angleStep, n
2567
2203
  const avgTriangles = bucketTriangleCounts.reduce((a, b) => a + b, 0) / bucketTriangleCounts.length;
2568
2204
  const workPerWorkgroup = maxTriangles * numAngles * bucketGridWidth * gridYHeight;
2569
2205
 
2570
- debug.log(`[Worker] Radial V2: ${gridWidth}x${gridYHeight} grid, ${numAngles} angles, ${bucketData.buckets.length} buckets`);
2571
- debug.log(`[Worker] Load: min=${minTriangles} max=${maxTriangles} avg=${avgTriangles.toFixed(0)} (${(maxTriangles/avgTriangles).toFixed(2)}x imbalance, worst=${(workPerWorkgroup/1e6).toFixed(1)}M tests)`);
2206
+ // Determine bucket batching to avoid GPU timeouts
2207
+ // Target: keep max work per batch under ~1M ray-triangle tests
2208
+ const maxWorkPerBatch = 1e6;
2209
+ const estimatedWorkPerBucket = avgTriangles * numAngles * bucketGridWidth * gridYHeight;
2210
+
2211
+ // Calculate buckets per batch, but enforce reasonable limits
2212
+ // - Minimum: 10 buckets per batch (unless total < 10)
2213
+ // - Maximum: all buckets if work is reasonable
2214
+ let maxBucketsPerBatch;
2215
+ if (estimatedWorkPerBucket === 0) {
2216
+ maxBucketsPerBatch = bucketData.numBuckets; // Empty model
2217
+ } else {
2218
+ const idealBucketsPerBatch = Math.floor(maxWorkPerBatch / estimatedWorkPerBucket);
2219
+ const minBucketsPerBatch = Math.min(4, bucketData.numBuckets);
2220
+ maxBucketsPerBatch = Math.max(minBucketsPerBatch, idealBucketsPerBatch);
2221
+ // Cap at total buckets
2222
+ maxBucketsPerBatch = Math.min(maxBucketsPerBatch, bucketData.numBuckets);
2223
+ }
2572
2224
 
2573
- // Create GPU buffers
2574
- const triangleBuffer = device.createBuffer({
2575
- size: triangles.byteLength,
2576
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2577
- mappedAtCreation: true
2578
- });
2579
- new Float32Array(triangleBuffer.getMappedRange()).set(triangles);
2580
- triangleBuffer.unmap();
2225
+ const numBucketBatches = Math.ceil(bucketData.numBuckets / maxBucketsPerBatch);
2581
2226
 
2582
- // Create bucket info buffer (f32, f32, u32, u32 per bucket)
2583
- const bucketInfoSize = bucketData.buckets.length * 16; // 4 fields * 4 bytes
2584
- const bucketInfoBuffer = device.createBuffer({
2585
- size: bucketInfoSize,
2586
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2587
- mappedAtCreation: true
2588
- });
2227
+ if (diagnostic) {
2228
+ debug.log(`Radial: ${gridWidth}x${gridYHeight} grid, ${numAngles} angles, ${bucketData.buckets.length} buckets`);
2229
+ debug.log(`Load: min=${minTriangles} max=${maxTriangles} avg=${avgTriangles.toFixed(0)} (${(maxTriangles/avgTriangles).toFixed(2)}x imbalance, worst=${(workPerWorkgroup/1e6).toFixed(1)}M tests)`);
2230
+ debug.log(`Estimated work/bucket: ${(estimatedWorkPerBucket/1e6).toFixed(1)}M tests`);
2231
+ if (numBucketBatches > 1) {
2232
+ debug.log(`Bucket batching: ${numBucketBatches} batches of ~${maxBucketsPerBatch} buckets to avoid timeout`);
2233
+ }
2234
+ }
2589
2235
 
2590
- const bucketView = new ArrayBuffer(bucketInfoSize);
2591
- const bucketFloatView = new Float32Array(bucketView);
2592
- const bucketUintView = new Uint32Array(bucketView);
2236
+ // Reuse buffers if provided, otherwise create new ones
2237
+ let triangleBuffer, triangleIndicesBuffer;
2238
+ let shouldCleanupBuffers = false;
2593
2239
 
2594
- for (let i = 0; i < bucketData.buckets.length; i++) {
2595
- const bucket = bucketData.buckets[i];
2596
- const offset = i * 4;
2597
- bucketFloatView[offset] = bucket.minX; // f32
2598
- bucketFloatView[offset + 1] = bucket.maxX; // f32
2599
- bucketUintView[offset + 2] = bucket.startIndex; // u32
2600
- bucketUintView[offset + 3] = bucket.count; // u32
2601
- }
2240
+ if (reusableBuffers) {
2241
+ // Reuse cached buffers from previous angle batch
2242
+ triangleBuffer = reusableBuffers.triangleBuffer;
2243
+ triangleIndicesBuffer = reusableBuffers.triangleIndicesBuffer;
2244
+ } else {
2245
+ // Create new GPU buffers (first batch or non-batched operation)
2246
+ shouldCleanupBuffers = true;
2602
2247
 
2603
- new Uint8Array(bucketInfoBuffer.getMappedRange()).set(new Uint8Array(bucketView));
2604
- bucketInfoBuffer.unmap();
2248
+ triangleBuffer = device.createBuffer({
2249
+ size: triangles.byteLength,
2250
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2251
+ mappedAtCreation: true
2252
+ });
2253
+ new Float32Array(triangleBuffer.getMappedRange()).set(triangles);
2254
+ triangleBuffer.unmap();
2605
2255
 
2606
- // Create triangle indices buffer
2607
- const triangleIndicesBuffer = device.createBuffer({
2608
- size: bucketData.triangleIndices.byteLength,
2609
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2610
- mappedAtCreation: true
2611
- });
2612
- new Uint32Array(triangleIndicesBuffer.getMappedRange()).set(bucketData.triangleIndices);
2613
- triangleIndicesBuffer.unmap();
2256
+ // Create triangle indices buffer
2257
+ triangleIndicesBuffer = device.createBuffer({
2258
+ size: bucketData.triangleIndices.byteLength,
2259
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2260
+ mappedAtCreation: true
2261
+ });
2262
+ new Uint32Array(triangleIndicesBuffer.getMappedRange()).set(bucketData.triangleIndices);
2263
+ triangleIndicesBuffer.unmap();
2264
+ }
2614
2265
 
2615
2266
  // Create output buffer (all angles, all buckets)
2616
2267
  const outputSize = numAngles * bucketData.numBuckets * bucketGridWidth * gridYHeight * 4;
@@ -2620,84 +2271,115 @@ async function radialRasterizeV2(triangles, bucketData, resolution, angleStep, n
2620
2271
  });
2621
2272
 
2622
2273
  // CRITICAL: Initialize output buffer with zFloor to avoid reading garbage data
2623
- // Use mappedAtCreation for deterministic initialization (not writeBuffer!)
2624
- const initEncoder = device.createCommandEncoder();
2625
2274
  const initData = new Float32Array(outputSize / 4);
2626
2275
  initData.fill(zFloor);
2627
2276
  device.queue.writeBuffer(outputBuffer, 0, initData);
2628
- // Force initialization to complete
2629
- device.queue.submit([initEncoder.finish()]);
2630
- await device.queue.onSubmittedWorkDone();
2277
+ // Note: No need to wait - GPU will execute writeBuffer before compute shader
2631
2278
 
2632
- // Create uniforms with proper alignment (f32 and u32 mixed)
2633
- // Struct layout: f32, f32, u32, f32, f32, u32, f32, u32, f32, f32, u32, u32, f32
2634
- const uniformBuffer = device.createBuffer({
2635
- size: 52, // 13 fields * 4 bytes
2636
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
2637
- mappedAtCreation: true
2638
- });
2639
-
2640
- const uniformView = new ArrayBuffer(52);
2641
- const floatView = new Float32Array(uniformView);
2642
- const uintView = new Uint32Array(uniformView);
2643
-
2644
- floatView[0] = resolution; // f32
2645
- floatView[1] = angleStep * (Math.PI / 180); // f32
2646
- uintView[2] = numAngles; // u32
2647
- floatView[3] = maxRadius; // f32
2648
- floatView[4] = toolWidth; // f32
2649
- uintView[5] = gridYHeight; // u32
2650
- floatView[6] = bucketData.buckets[0].maxX - bucketData.buckets[0].minX; // f32 bucketWidth
2651
- uintView[7] = bucketGridWidth; // u32
2652
- floatView[8] = bucketMinX; // f32 global_min_x (use bucket range)
2653
- floatView[9] = zFloor; // f32
2654
- uintView[10] = 0; // u32 filterMode
2655
- uintView[11] = bucketData.numBuckets; // u32
2656
- floatView[12] = startAngle * (Math.PI / 180); // f32 start_angle (radians)
2657
-
2658
- new Uint8Array(uniformBuffer.getMappedRange()).set(new Uint8Array(uniformView));
2659
- uniformBuffer.unmap();
2660
-
2661
- // Create shader and pipeline
2662
- const shaderModule = device.createShaderModule({ code: radialRasterizeV2ShaderCode });
2663
- const pipeline = device.createComputePipeline({
2664
- layout: 'auto',
2665
- compute: {
2666
- module: shaderModule,
2667
- entryPoint: 'main'
2668
- }
2669
- });
2279
+ // Prep complete, GPU starting
2280
+ timings.prep = performance.now() - timings.start;
2281
+ const gpuStart = performance.now();
2670
2282
 
2671
- // Create bind group
2672
- const bindGroup = device.createBindGroup({
2673
- layout: pipeline.getBindGroupLayout(0),
2674
- entries: [
2675
- { binding: 0, resource: { buffer: triangleBuffer } },
2676
- { binding: 1, resource: { buffer: outputBuffer } },
2677
- { binding: 2, resource: { buffer: uniformBuffer } },
2678
- { binding: 3, resource: { buffer: bucketInfoBuffer } },
2679
- { binding: 4, resource: { buffer: triangleIndicesBuffer } }
2680
- ]
2681
- });
2283
+ // Use cached pipeline (created in initWebGPU)
2284
+ const pipeline = cachedRadialBatchPipeline;
2682
2285
 
2683
- console.time('RADIAL COMPUTE');
2684
- // Dispatch
2286
+ // Process buckets in batches to avoid GPU timeouts
2685
2287
  const commandEncoder = device.createCommandEncoder();
2686
2288
  const passEncoder = commandEncoder.beginComputePass();
2687
2289
  passEncoder.setPipeline(pipeline);
2688
- passEncoder.setBindGroup(0, bindGroup);
2689
2290
 
2690
- // Prep complete, GPU starting
2691
- timings.prep = performance.now() - timings.start;
2692
- const gpuStart = performance.now();
2693
-
2694
- // Dispatch: (numAngles/8, gridYHeight/8, numBuckets)
2695
2291
  const dispatchX = Math.ceil(numAngles / 8);
2696
2292
  const dispatchY = Math.ceil(gridYHeight / 8);
2697
- const dispatchZ = bucketData.numBuckets;
2698
- debug.log(`[Worker] Dispatch: (${dispatchX}, ${dispatchY}, ${dispatchZ}) = ${dispatchX * 8} angles, ${dispatchY * 8} Y cells, ${dispatchZ} buckets`);
2699
2293
 
2700
- passEncoder.dispatchWorkgroups(dispatchX, dispatchY, dispatchZ);
2294
+ // Collect buffers to destroy after GPU completes
2295
+ const batchBuffersToDestroy = [];
2296
+ debug.log(`Dispatch (${dispatchX}, ${dispatchY}, ${maxBucketsPerBatch}) in ${numBucketBatches} Chunks`);
2297
+
2298
+ for (let batchIdx = 0; batchIdx < numBucketBatches; batchIdx++) {
2299
+ const startBucket = batchIdx * maxBucketsPerBatch;
2300
+ const endBucket = Math.min(startBucket + maxBucketsPerBatch, bucketData.numBuckets);
2301
+ const bucketsInBatch = endBucket - startBucket;
2302
+
2303
+ // Create bucket info buffer for this batch
2304
+ const bucketInfoSize = bucketsInBatch * 16; // 4 fields * 4 bytes per bucket
2305
+ const bucketInfoBuffer = device.createBuffer({
2306
+ size: bucketInfoSize,
2307
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
2308
+ mappedAtCreation: true
2309
+ });
2310
+
2311
+ const bucketView = new ArrayBuffer(bucketInfoSize);
2312
+ const bucketFloatView = new Float32Array(bucketView);
2313
+ const bucketUintView = new Uint32Array(bucketView);
2314
+
2315
+ for (let i = 0; i < bucketsInBatch; i++) {
2316
+ const bucket = bucketData.buckets[startBucket + i];
2317
+ const offset = i * 4;
2318
+ bucketFloatView[offset] = bucket.minX; // f32
2319
+ bucketFloatView[offset + 1] = bucket.maxX; // f32
2320
+ bucketUintView[offset + 2] = bucket.startIndex; // u32
2321
+ bucketUintView[offset + 3] = bucket.count; // u32
2322
+ }
2323
+
2324
+ new Uint8Array(bucketInfoBuffer.getMappedRange()).set(new Uint8Array(bucketView));
2325
+ bucketInfoBuffer.unmap();
2326
+
2327
+ // Create uniforms for this batch
2328
+ // Struct layout: f32, f32, u32, f32, f32, u32, f32, u32, f32, f32, u32, u32, f32, u32
2329
+ const uniformBuffer = device.createBuffer({
2330
+ size: 56, // 14 fields * 4 bytes
2331
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
2332
+ mappedAtCreation: true
2333
+ });
2334
+
2335
+ const uniformView = new ArrayBuffer(56);
2336
+ const floatView = new Float32Array(uniformView);
2337
+ const uintView = new Uint32Array(uniformView);
2338
+
2339
+ floatView[0] = resolution; // f32
2340
+ floatView[1] = angleStep * (Math.PI / 180); // f32
2341
+ uintView[2] = numAngles; // u32
2342
+ floatView[3] = maxRadius; // f32
2343
+ floatView[4] = toolWidth; // f32
2344
+ uintView[5] = gridYHeight; // u32
2345
+ floatView[6] = bucketData.buckets[0].maxX - bucketData.buckets[0].minX; // f32 bucketWidth
2346
+ uintView[7] = bucketGridWidth; // u32
2347
+ floatView[8] = bucketMinX; // f32 global_min_x
2348
+ floatView[9] = zFloor; // f32
2349
+ uintView[10] = 0; // u32 filterMode
2350
+ uintView[11] = bucketData.numBuckets; // u32 (total buckets, for validation)
2351
+ floatView[12] = startAngle * (Math.PI / 180); // f32 start_angle (radians)
2352
+ uintView[13] = startBucket; // u32 bucket_offset
2353
+
2354
+ new Uint8Array(uniformBuffer.getMappedRange()).set(new Uint8Array(uniformView));
2355
+ uniformBuffer.unmap();
2356
+
2357
+ // Create bind group for this batch
2358
+ const bindGroup = device.createBindGroup({
2359
+ layout: pipeline.getBindGroupLayout(0),
2360
+ entries: [
2361
+ { binding: 0, resource: { buffer: triangleBuffer } },
2362
+ { binding: 1, resource: { buffer: outputBuffer } },
2363
+ { binding: 2, resource: { buffer: uniformBuffer } },
2364
+ { binding: 3, resource: { buffer: bucketInfoBuffer } },
2365
+ { binding: 4, resource: { buffer: triangleIndicesBuffer } }
2366
+ ]
2367
+ });
2368
+
2369
+ passEncoder.setBindGroup(0, bindGroup);
2370
+
2371
+ // Dispatch for this batch
2372
+ const dispatchZ = bucketsInBatch;
2373
+ if (diagnostic) {
2374
+ debug.log(` Batch ${batchIdx + 1}/${numBucketBatches}: Dispatch (${dispatchX}, ${dispatchY}, ${dispatchZ}) = buckets ${startBucket}-${endBucket - 1}`);
2375
+ }
2376
+
2377
+ passEncoder.dispatchWorkgroups(dispatchX, dispatchY, dispatchZ);
2378
+
2379
+ // Save buffers to destroy after GPU completes
2380
+ batchBuffersToDestroy.push(uniformBuffer, bucketInfoBuffer);
2381
+ }
2382
+
2701
2383
  passEncoder.end();
2702
2384
 
2703
2385
  // Read back
@@ -2709,21 +2391,21 @@ async function radialRasterizeV2(triangles, bucketData, resolution, angleStep, n
2709
2391
  commandEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, outputSize);
2710
2392
  device.queue.submit([commandEncoder.finish()]);
2711
2393
 
2712
- // CRITICAL: Wait for GPU to finish before reading results
2394
+ // Wait for GPU to finish before reading results
2713
2395
  await device.queue.onSubmittedWorkDone();
2714
- console.timeEnd('RADIAL COMPUTE');
2715
-
2716
2396
  await stagingBuffer.mapAsync(GPUMapMode.READ);
2717
- const outputData = new Float32Array(stagingBuffer.getMappedRange());
2718
- const outputCopy = new Float32Array(outputData);
2397
+ // const outputData = new Float32Array(stagingBuffer.getMappedRange());
2398
+ // const outputCopy = new Float32Array(outputData);
2399
+ const outputCopy = new Float32Array(stagingBuffer.getMappedRange().slice());
2719
2400
  stagingBuffer.unmap();
2720
2401
 
2721
- // Cleanup
2722
- triangleBuffer.destroy();
2723
- bucketInfoBuffer.destroy();
2724
- triangleIndicesBuffer.destroy();
2402
+ // Now safe to destroy batch buffers (GPU has completed)
2403
+ for (const buffer of batchBuffersToDestroy) {
2404
+ buffer.destroy();
2405
+ }
2406
+
2407
+ // Cleanup main buffers
2725
2408
  outputBuffer.destroy();
2726
- uniformBuffer.destroy();
2727
2409
  stagingBuffer.destroy();
2728
2410
 
2729
2411
  timings.gpu = performance.now() - gpuStart;
@@ -2779,12 +2461,256 @@ async function radialRasterizeV2(triangles, bucketData, resolution, angleStep, n
2779
2461
  timings.stitch = performance.now() - stitchStart;
2780
2462
  const totalTime = performance.now() - timings.start;
2781
2463
 
2782
- debug.log(`[Worker] Radial V2 complete: ${totalTime.toFixed(0)}ms`);
2783
- debug.log(`[Worker] Prep: ${timings.prep.toFixed(0)}ms (${(timings.prep/totalTime*100).toFixed(0)}%)`);
2784
- debug.log(`[Worker] GPU: ${timings.gpu.toFixed(0)}ms (${(timings.gpu/totalTime*100).toFixed(0)}%)`);
2785
- debug.log(`[Worker] Stitch: ${timings.stitch.toFixed(0)}ms (${(timings.stitch/totalTime*100).toFixed(0)}%)`);
2464
+ Object.assign(batchInfo, {
2465
+ 'prep': (timings.prep | 0),
2466
+ 'raster': (timings.gpu | 0),
2467
+ 'stitch': (timings.stitch | 0)
2468
+ });
2469
+
2470
+ const result = { strips, timings };
2786
2471
 
2787
- return { strips, timings };
2472
+ // Decide what to do with triangle/indices buffers
2473
+ // Note: bucketInfoBuffer is now created/destroyed per bucket batch within the loop
2474
+ if (returnBuffersForReuse && shouldCleanupBuffers) {
2475
+ // First batch in multi-batch operation: return buffers for subsequent batches to reuse
2476
+ result.reusableBuffers = {
2477
+ triangleBuffer,
2478
+ triangleIndicesBuffer
2479
+ };
2480
+ } else if (shouldCleanupBuffers) {
2481
+ // Single batch operation OR we're NOT supposed to return buffers: destroy them now
2482
+ triangleBuffer.destroy();
2483
+ triangleIndicesBuffer.destroy();
2484
+ }
2485
+ // else: we're reusing buffers from a previous angle batch, don't destroy them (caller will destroy after all angle batches)
2486
+
2487
+ return result;
2488
+ }
2489
+
2490
+ // Radial: Complete pipeline - rasterize model + generate toolpaths for all strips
2491
+ async function generateRadialToolpaths({
2492
+ triangles,
2493
+ bucketData,
2494
+ toolData,
2495
+ resolution,
2496
+ angleStep,
2497
+ numAngles,
2498
+ maxRadius,
2499
+ toolWidth,
2500
+ zFloor,
2501
+ bounds,
2502
+ xStep,
2503
+ yStep
2504
+ }) {
2505
+ debug.log('radial-generate-toolpaths', { triangles: triangles.length, numAngles, resolution });
2506
+
2507
+ // Batch processing: rasterize angle ranges to avoid memory allocation failure
2508
+ // Calculate safe batch size based on available GPU memory
2509
+ const MAX_BUFFER_SIZE_MB = 1800; // Stay under 2GB WebGPU limit with headroom
2510
+ const bytesPerCell = 4; // f32
2511
+
2512
+ const xSize = bounds.max.x - bounds.min.x;
2513
+ const ySize = bounds.max.y - bounds.min.y;
2514
+ const gridXSize = Math.ceil(xSize / resolution);
2515
+ const gridYHeight = Math.ceil(ySize / resolution);
2516
+
2517
+ // Calculate total memory requirement
2518
+ const cellsPerAngle = gridXSize * gridYHeight;
2519
+ const bytesPerAngle = cellsPerAngle * bytesPerCell;
2520
+ const totalMemoryMB = (numAngles * bytesPerAngle) / (1024 * 1024);
2521
+
2522
+ // Only batch if total memory exceeds threshold
2523
+ const batchDivisor = config?.batchDivisor || 1;
2524
+ let ANGLES_PER_BATCH, numBatches;
2525
+ if (totalMemoryMB > MAX_BUFFER_SIZE_MB) {
2526
+ // Need to batch
2527
+ const maxAnglesPerBatch = Math.floor((MAX_BUFFER_SIZE_MB * 1024 * 1024) / bytesPerAngle);
2528
+ // Apply batch divisor for overhead testing
2529
+ const adjustedMaxAngles = Math.floor(maxAnglesPerBatch / batchDivisor);
2530
+
2531
+ ANGLES_PER_BATCH = Math.max(1, Math.min(adjustedMaxAngles, numAngles));
2532
+ numBatches = Math.ceil(numAngles / ANGLES_PER_BATCH);
2533
+ const batchSizeMB = (ANGLES_PER_BATCH * bytesPerAngle / 1024 / 1024).toFixed(1);
2534
+ debug.log(`Grid: ${gridXSize} x ${gridYHeight}, ${cellsPerAngle.toLocaleString()} cells/angle`);
2535
+ debug.log(`Total memory: ${totalMemoryMB.toFixed(1)}MB exceeds limit, batching required`);
2536
+ if (batchDivisor > 1) {
2537
+ debug.log(`batchDivisor: ${batchDivisor}x (testing overhead: ${maxAnglesPerBatch} → ${adjustedMaxAngles} angles/batch)`);
2538
+ }
2539
+ debug.log(`Batch size: ${ANGLES_PER_BATCH} angles (~${batchSizeMB}MB per batch)`);
2540
+ debug.log(`Processing ${numAngles} angles in ${numBatches} batch(es)`);
2541
+ } else {
2542
+ // Process all angles at once (but still respect batchDivisor for testing)
2543
+ if (batchDivisor > 1) {
2544
+ ANGLES_PER_BATCH = Math.max(10, Math.floor(numAngles / batchDivisor));
2545
+ numBatches = Math.ceil(numAngles / ANGLES_PER_BATCH);
2546
+ debug.log(`Grid: ${gridXSize} x ${gridYHeight}, ${cellsPerAngle.toLocaleString()} cells/angle`);
2547
+ debug.log(`Total memory: ${totalMemoryMB.toFixed(1)}MB (fits in buffer normally)`);
2548
+ debug.log(`batchDivisor: ${batchDivisor}x (artificially creating ${numBatches} batches for overhead testing)`);
2549
+ } else {
2550
+ ANGLES_PER_BATCH = numAngles;
2551
+ numBatches = 1;
2552
+ debug.log(`Grid: ${gridXSize} x ${gridYHeight}, ${cellsPerAngle.toLocaleString()} cells/angle`);
2553
+ debug.log(`Total memory: ${totalMemoryMB.toFixed(1)}MB fits in buffer, processing all ${numAngles} angles in single batch`);
2554
+ }
2555
+ }
2556
+
2557
+ const allStripToolpaths = [];
2558
+ let totalToolpathPoints = 0;
2559
+ const pipelineStartTime = performance.now();
2560
+
2561
+ // Prepare sparse tool once
2562
+ const sparseToolData = createSparseToolFromPoints(toolData.positions);
2563
+ debug.log(`Created sparse tool: ${sparseToolData.count} points (reusing for all strips)`);
2564
+
2565
+ // Create reusable rasterization buffers if batching (numBatches > 1)
2566
+ // These buffers (triangles, buckets, indices) don't change between batches
2567
+ let batchReuseBuffers = null;
2568
+ let batchTracking = [];
2569
+
2570
+ for (let batchIdx = 0; batchIdx < numBatches; batchIdx++) {
2571
+ const batchStartTime = performance.now();
2572
+ const startAngleIdx = batchIdx * ANGLES_PER_BATCH;
2573
+ const endAngleIdx = Math.min(startAngleIdx + ANGLES_PER_BATCH, numAngles);
2574
+ const batchNumAngles = endAngleIdx - startAngleIdx;
2575
+ const batchStartAngle = startAngleIdx * angleStep;
2576
+
2577
+ const batchInfo = {
2578
+ from: startAngleIdx,
2579
+ to: endAngleIdx
2580
+ };
2581
+ batchTracking.push(batchInfo);
2582
+
2583
+ debug.log(`Batch ${batchIdx + 1}/${numBatches}: angles ${startAngleIdx}-${endAngleIdx - 1} (${batchNumAngles} angles), startAngle=${batchStartAngle.toFixed(1)}°`);
2584
+
2585
+ // Rasterize this batch of strips
2586
+ const rasterStartTime = performance.now();
2587
+ const shouldReturnBuffers = (batchIdx === 0 && numBatches > 1); // First batch of multi-batch operation
2588
+ const batchModelResult = await radialRasterize({
2589
+ triangles,
2590
+ bucketData,
2591
+ resolution,
2592
+ angleStep,
2593
+ numAngles: batchNumAngles,
2594
+ maxRadius,
2595
+ toolWidth,
2596
+ zFloor,
2597
+ bounds,
2598
+ startAngle: batchStartAngle,
2599
+ reusableBuffers: batchReuseBuffers,
2600
+ returnBuffersForReuse: shouldReturnBuffers,
2601
+ batchInfo
2602
+ });
2603
+
2604
+ const rasterTime = performance.now() - rasterStartTime;
2605
+
2606
+ // Capture buffers from first batch for reuse
2607
+ if (batchIdx === 0 && batchModelResult.reusableBuffers) {
2608
+ batchReuseBuffers = batchModelResult.reusableBuffers;
2609
+ }
2610
+
2611
+ // Find max dimensions for this batch
2612
+ let maxStripWidth = 0;
2613
+ let maxStripHeight = 0;
2614
+ for (const strip of batchModelResult.strips) {
2615
+ maxStripWidth = Math.max(maxStripWidth, strip.gridWidth);
2616
+ maxStripHeight = Math.max(maxStripHeight, strip.gridHeight);
2617
+ }
2618
+
2619
+ // Create reusable buffers for this batch
2620
+ const reusableBuffers = createReusableToolpathBuffers(maxStripWidth, maxStripHeight, sparseToolData, xStep, maxStripHeight);
2621
+
2622
+ // Generate toolpaths for this batch
2623
+ const toolpathStartTime = performance.now();
2624
+
2625
+ for (let i = 0; i < batchModelResult.strips.length; i++) {
2626
+ const strip = batchModelResult.strips[i];
2627
+ const globalStripIdx = startAngleIdx + i;
2628
+
2629
+ if (globalStripIdx % 10 === 0 || globalStripIdx === numAngles - 1) {
2630
+ self.postMessage({
2631
+ type: 'toolpath-progress',
2632
+ data: {
2633
+ percent: Math.round(((globalStripIdx + 1) / numAngles) * 100),
2634
+ current: globalStripIdx + 1,
2635
+ total: numAngles,
2636
+ layer: globalStripIdx + 1
2637
+ }
2638
+ });
2639
+ }
2640
+
2641
+ if (!strip.positions || strip.positions.length === 0) continue;
2642
+
2643
+ // DEBUG: Diagnostic logging
2644
+ if (diagnostic && (globalStripIdx === 0 || globalStripIdx === 360)) {
2645
+ debug.log(`NF3EWAE6 | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}°) INPUT terrain first 5 Z values: ${strip.positions.slice(0, 5).map(v => v.toFixed(3)).join(',')}`);
2646
+ }
2647
+
2648
+ const stripToolpathResult = await runToolpathComputeWithBuffers(
2649
+ strip.positions,
2650
+ strip.gridWidth,
2651
+ strip.gridHeight,
2652
+ xStep,
2653
+ strip.gridHeight,
2654
+ zFloor,
2655
+ reusableBuffers,
2656
+ pipelineStartTime
2657
+ );
2658
+
2659
+ // DEBUG: Verify toolpath generation output
2660
+ if (diagnostic && (globalStripIdx === 0 || globalStripIdx === 360)) {
2661
+ debug.log(`NF3EWAE6 | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}°) OUTPUT toolpath first 5 Z values: ${stripToolpathResult.pathData.slice(0, 5).map(v => v.toFixed(3)).join(',')}`);
2662
+ }
2663
+
2664
+ allStripToolpaths.push({
2665
+ angle: strip.angle,
2666
+ pathData: stripToolpathResult.pathData,
2667
+ numScanlines: stripToolpathResult.numScanlines,
2668
+ pointsPerLine: stripToolpathResult.pointsPerLine,
2669
+ terrainBounds: strip.bounds
2670
+ });
2671
+
2672
+ totalToolpathPoints += stripToolpathResult.pathData.length;
2673
+ }
2674
+ const toolpathTime = performance.now() - toolpathStartTime;
2675
+
2676
+ // Free batch terrain data
2677
+ for (const strip of batchModelResult.strips) {
2678
+ strip.positions = null;
2679
+ }
2680
+ destroyReusableToolpathBuffers(reusableBuffers);
2681
+
2682
+ const batchTotalTime = performance.now() - batchStartTime;
2683
+
2684
+ Object.assign(batchInfo, {
2685
+ 'prep': batchInfo.prep || 0,
2686
+ 'gpu': batchInfo.gpu || 0,
2687
+ 'stitch': batchInfo.stitch || 0,
2688
+ 'raster': batchInfo.raster || 0,
2689
+ 'mkbuf': 0,
2690
+ 'paths': (toolpathTime | 0),
2691
+ 'strips': allStripToolpaths.length,
2692
+ 'total': (batchTotalTime | 0)
2693
+ });
2694
+ }
2695
+
2696
+ console.table(batchTracking);
2697
+
2698
+ // Cleanup cached rasterization buffers after all batches complete
2699
+ if (batchReuseBuffers) {
2700
+ batchReuseBuffers.triangleBuffer.destroy();
2701
+ batchReuseBuffers.triangleIndicesBuffer.destroy();
2702
+ // Note: bucketInfoBuffer is no longer in reusableBuffers (created/destroyed per bucket batch)
2703
+ debug.log(`Destroyed cached GPU buffers after all batches`);
2704
+ }
2705
+
2706
+ const pipelineTotalTime = performance.now() - pipelineStartTime;
2707
+ debug.log(`Complete radial toolpath: ${allStripToolpaths.length} strips, ${totalToolpathPoints} total points in ${pipelineTotalTime.toFixed(0)}ms`);
2708
+
2709
+ return {
2710
+ strips: allStripToolpaths,
2711
+ totalPoints: totalToolpathPoints,
2712
+ numStrips: allStripToolpaths.length
2713
+ };
2788
2714
  }
2789
2715
 
2790
2716
  // Handle messages from main thread
@@ -2800,7 +2726,8 @@ self.onmessage = async function(e) {
2800
2726
  gpuMemorySafetyMargin: 0.8,
2801
2727
  tileOverlapMM: 10,
2802
2728
  autoTiling: true,
2803
- minTileSize: 50
2729
+ minTileSize: 50,
2730
+ batchDivisor: 1 // For testing batching overhead: 1=optimal, 2=2x batches, 4=4x batches, etc.
2804
2731
  };
2805
2732
  const success = await initWebGPU();
2806
2733
  self.postMessage({
@@ -2848,149 +2775,11 @@ self.onmessage = async function(e) {
2848
2775
  break;
2849
2776
 
2850
2777
  case 'radial-generate-toolpaths':
2851
- // Complete radial pipeline: rasterize model + generate toolpaths for all strips
2852
- const {
2853
- triangles: radialModelTriangles,
2854
- bucketData: radialBucketData,
2855
- toolData: radialToolData,
2856
- resolution: radialResolution,
2857
- angleStep: radialAngleStep,
2858
- numAngles: radialNumAngles,
2859
- maxRadius: radialMaxRadius,
2860
- toolWidth: radialToolWidth,
2861
- zFloor: radialToolpathZFloor,
2862
- bounds: radialToolpathBounds,
2863
- xStep: radialXStep,
2864
- yStep: radialYStep,
2865
- gridStep: radialGridStep
2866
- } = data;
2867
-
2868
- debug.log('[Worker] Starting complete radial toolpath pipeline...');
2869
-
2870
- // Batch processing: rasterize angle ranges to avoid memory allocation failure
2871
- const ANGLES_PER_BATCH = 360; // Process 360 angles at a time
2872
- const numBatches = Math.ceil(radialNumAngles / ANGLES_PER_BATCH);
2873
-
2874
- debug.log(`[Worker] Processing ${radialNumAngles} angles in ${numBatches} batch(es) of up to ${ANGLES_PER_BATCH} angles`);
2875
-
2876
- const allStripToolpaths = [];
2877
- let totalToolpathPoints = 0;
2878
- const pipelineStartTime = performance.now();
2879
-
2880
- // Prepare sparse tool once
2881
- const sparseToolData = createSparseToolFromPoints(radialToolData.positions);
2882
- debug.log(`[Worker] Created sparse tool: ${sparseToolData.count} points (reusing for all strips)`);
2883
-
2884
- for (let batchIdx = 0; batchIdx < numBatches; batchIdx++) {
2885
- const startAngleIdx = batchIdx * ANGLES_PER_BATCH;
2886
- const endAngleIdx = Math.min(startAngleIdx + ANGLES_PER_BATCH, radialNumAngles);
2887
- const batchNumAngles = endAngleIdx - startAngleIdx;
2888
- const batchStartAngle = startAngleIdx * radialAngleStep;
2889
-
2890
- debug.log(`[Worker] Batch ${batchIdx + 1}/${numBatches}: angles ${startAngleIdx}-${endAngleIdx - 1} (${batchNumAngles} angles), startAngle=${batchStartAngle.toFixed(1)}°`);
2891
-
2892
- // Rasterize this batch of strips
2893
- const batchModelResult = await radialRasterizeV2(
2894
- radialModelTriangles,
2895
- radialBucketData,
2896
- radialResolution,
2897
- radialAngleStep,
2898
- batchNumAngles,
2899
- radialMaxRadius,
2900
- radialToolWidth,
2901
- radialToolpathZFloor,
2902
- radialToolpathBounds,
2903
- batchStartAngle // Start angle for this batch
2904
- );
2905
-
2906
- debug.log(`[Worker] Batch ${batchIdx + 1}: Rasterized ${batchModelResult.strips.length} strips, first angle=${batchModelResult.strips[0]?.angle.toFixed(1)}°, last angle=${batchModelResult.strips[batchModelResult.strips.length - 1]?.angle.toFixed(1)}°`);
2907
-
2908
- // Find max dimensions for this batch
2909
- let maxStripWidth = 0;
2910
- let maxStripHeight = 0;
2911
- for (const strip of batchModelResult.strips) {
2912
- maxStripWidth = Math.max(maxStripWidth, strip.gridWidth);
2913
- maxStripHeight = Math.max(maxStripHeight, strip.gridHeight);
2914
- }
2915
-
2916
- // Create reusable buffers for this batch
2917
- const reusableBuffers = createReusableToolpathBuffers(maxStripWidth, maxStripHeight, sparseToolData, radialXStep, maxStripHeight);
2918
-
2919
- // Generate toolpaths for this batch
2920
- debug.log(`[Worker] Batch ${batchIdx + 1}: Generating toolpaths for ${batchModelResult.strips.length} strips...`);
2921
- for (let i = 0; i < batchModelResult.strips.length; i++) {
2922
- const strip = batchModelResult.strips[i];
2923
- const globalStripIdx = startAngleIdx + i;
2924
-
2925
- if (globalStripIdx % 10 === 0 || globalStripIdx === radialNumAngles - 1) {
2926
- self.postMessage({
2927
- type: 'toolpath-progress',
2928
- data: {
2929
- percent: Math.round(((globalStripIdx + 1) / radialNumAngles) * 100),
2930
- current: globalStripIdx + 1,
2931
- total: radialNumAngles,
2932
- layer: globalStripIdx + 1
2933
- }
2934
- });
2935
- }
2936
-
2937
- if (!strip.positions || strip.positions.length === 0) continue;
2938
-
2939
- // DEBUG: Diagnostic logging (BUILD_ID gets injected during build)
2940
- // Used to trace data flow through radial toolpath pipeline
2941
- if (globalStripIdx === 0 || globalStripIdx === 360) {
2942
- debug.log(`[Worker] 4XFJ410L | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}°) INPUT terrain first 5 Z values: ${strip.positions.slice(0, 5).map(v => v.toFixed(3)).join(',')}`);
2943
- }
2944
-
2945
- const stripToolpathResult = await runToolpathComputeWithBuffers(
2946
- strip.positions,
2947
- strip.gridWidth,
2948
- strip.gridHeight,
2949
- radialXStep,
2950
- strip.gridHeight,
2951
- radialToolpathZFloor,
2952
- reusableBuffers,
2953
- pipelineStartTime
2954
- );
2955
-
2956
- // DEBUG: Verify toolpath generation output
2957
- if (globalStripIdx === 0 || globalStripIdx === 360) {
2958
- debug.log(`[Worker] 4XFJ410L | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}°) OUTPUT toolpath first 5 Z values: ${stripToolpathResult.pathData.slice(0, 5).map(v => v.toFixed(3)).join(',')}`);
2959
- }
2960
-
2961
- allStripToolpaths.push({
2962
- angle: strip.angle,
2963
- pathData: stripToolpathResult.pathData,
2964
- numScanlines: stripToolpathResult.numScanlines,
2965
- pointsPerLine: stripToolpathResult.pointsPerLine,
2966
- terrainBounds: strip.bounds // Include terrain bounds for display
2967
- });
2968
-
2969
- totalToolpathPoints += stripToolpathResult.pathData.length;
2970
- }
2971
-
2972
- destroyReusableToolpathBuffers(reusableBuffers);
2973
-
2974
- debug.log(`[Worker] Batch ${batchIdx + 1}: Completed, allStripToolpaths now has ${allStripToolpaths.length} strips total`);
2975
-
2976
- // Free batch terrain data
2977
- for (const strip of batchModelResult.strips) {
2978
- strip.positions = null;
2979
- }
2980
- }
2981
-
2982
- const pipelineTotalTime = performance.now() - pipelineStartTime;
2983
- debug.log(`[Worker] Complete radial toolpath: ${allStripToolpaths.length} strips, ${totalToolpathPoints} total points in ${pipelineTotalTime.toFixed(0)}ms`);
2984
-
2985
- const toolpathTransferBuffers = allStripToolpaths.map(strip => strip.pathData.buffer);
2986
-
2778
+ const radialToolpathResult = await generateRadialToolpaths(data);
2779
+ const toolpathTransferBuffers = radialToolpathResult.strips.map(strip => strip.pathData.buffer);
2987
2780
  self.postMessage({
2988
2781
  type: 'radial-toolpaths-complete',
2989
- data: {
2990
- strips: allStripToolpaths,
2991
- totalPoints: totalToolpathPoints,
2992
- numStrips: allStripToolpaths.length
2993
- }
2782
+ data: radialToolpathResult
2994
2783
  }, toolpathTransferBuffers);
2995
2784
  break;
2996
2785