@gridspace/raster-path 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,10 +4,11 @@ Fast browser-based terrain + tool path generator using WebGPU compute shaders.
4
4
 
5
5
  ## Features
6
6
 
7
- - **Dual Mode Rasterization**: Planar (traditional XY grid) and Radial (cylindrical unwrap) modes
7
+ - **Multiple Operational Modes**: Planar (XY grid), Radial (cylindrical), and Tracing (path-following)
8
8
  - **CNC Toolpath Generation**: Generate toolpaths by simulating tool movement over terrain
9
9
  - **GPU Accelerated**: 20-100× faster than CPU-based solutions
10
- - **Unified API**: Clean three-method interface that works uniformly across both modes
10
+ - **Optimized Radial Variants**: V2 (default), V3 (memory-optimized), and V4 (slice-based lathe)
11
+ - **Unified API**: Clean three-method interface that works uniformly across all modes
11
12
  - **ESM Module**: Importable package for browser applications
12
13
 
13
14
  ## Quick Start
@@ -53,7 +54,7 @@ raster.terminate();
53
54
  ### Radial Mode (for cylindrical parts)
54
55
 
55
56
  ```javascript
56
- // Initialize for radial mode
57
+ // Initialize for radial mode (V2 default)
57
58
  const raster = new RasterPath({
58
59
  mode: 'radial',
59
60
  resolution: 0.1, // Radial resolution (mm)
@@ -79,6 +80,62 @@ const toolpathData = await raster.generateToolpaths({
79
80
  console.log(`Generated ${toolpathData.numStrips} strips, ${toolpathData.totalPoints} points`);
80
81
  ```
81
82
 
83
+ **Radial Variants:**
84
+ ```javascript
85
+ // Use V3 (memory-optimized) for large models
86
+ const rasterV3 = new RasterPath({
87
+ mode: 'radial',
88
+ resolution: 0.1,
89
+ rotationStep: 1.0,
90
+ radialV3: true
91
+ });
92
+
93
+ // Use V4 (slice-based lathe, experimental) with pre-sliced data
94
+ const rasterV4 = new RasterPath({
95
+ mode: 'radial',
96
+ resolution: 0.5,
97
+ rotationStep: 1.0,
98
+ radialV4: true
99
+ });
100
+ ```
101
+
102
+ ### Tracing Mode (for path-following)
103
+
104
+ ```javascript
105
+ // Initialize for tracing mode
106
+ const raster = new RasterPath({
107
+ mode: 'tracing',
108
+ resolution: 0.1 // Terrain rasterization resolution
109
+ });
110
+ await raster.init();
111
+
112
+ // Load tool and terrain
113
+ await raster.loadTool({ triangles: toolTriangles });
114
+ await raster.loadTerrain({
115
+ triangles: terrainTriangles,
116
+ zFloor: -100
117
+ });
118
+
119
+ // Define input paths as arrays of XY coordinate pairs
120
+ const paths = [
121
+ new Float32Array([x1, y1, x2, y2, x3, y3, ...]), // Path 1
122
+ new Float32Array([x1, y1, x2, y2, ...]) // Path 2
123
+ ];
124
+
125
+ // Generate toolpaths by tracing along paths
126
+ const toolpathData = await raster.generateToolpaths({
127
+ paths: paths,
128
+ step: 0.5, // Sample every 0.5mm along each path
129
+ zFloor: -100
130
+ });
131
+
132
+ // Output is array of XYZ coordinate arrays (one per path)
133
+ console.log(`Generated ${toolpathData.pathResults.length} traced paths`);
134
+ toolpathData.pathResults.forEach((path, i) => {
135
+ console.log(` Path ${i}: ${path.length / 3} points`);
136
+ });
137
+ ```
138
+
82
139
  ### Demo UI
83
140
 
84
141
  ```bash
@@ -101,6 +158,9 @@ Open http://localhost:3000 and drag STL files onto the interface.
101
158
 
102
159
  ### Radial Mode (Cylindrical Rasterization)
103
160
 
161
+ Three variants are available with different performance characteristics:
162
+
163
+ #### V2 (Default) - Ray-Based Rasterization
104
164
  1. **Tool Rasterization**: Rasterize tool in planar mode (same as above)
105
165
  2. **Terrain Preparation**: Center terrain in YZ plane and store triangles
106
166
  3. **Toolpath Generation**:
@@ -110,6 +170,56 @@ Open http://localhost:3000 and drag STL files onto the interface.
110
170
  - Calculate tool-terrain collisions along each radial strip
111
171
  - Output array of strips (one per angle), each containing Z-heights along X-axis
112
172
 
173
+ #### V3 - Bucket-Angle Pipeline (Memory Optimized)
174
+ Enable with `radialV3: true` option.
175
+
176
+ **Algorithm:**
177
+ 1. **Tool Rasterization**: Same as V2
178
+ 2. **Terrain Preparation**: Bucket triangles by X-coordinate
179
+ 3. **Toolpath Generation** (for each rotation angle):
180
+ - Rotate all triangles in bucket by angle (GPU parallel)
181
+ - Filter by Y-bounds (skip triangles outside tool radius)
182
+ - Rasterize all buckets in single dispatch → dense terrain strip
183
+ - Generate toolpath from strip immediately
184
+
185
+ **Advantages over V2:**
186
+ - Lower memory usage (only one angle's data in GPU at a time)
187
+ - Y-axis filtering reduces unnecessary triangle processing
188
+ - Better cache locality by processing each bucket completely
189
+
190
+ #### V4 - Slice-Based Lathe (Experimental)
191
+ Enable with `radialV4: true` option.
192
+
193
+ **Algorithm:**
194
+ 1. **Tool Rasterization**: Same as V2
195
+ 2. **Terrain Slicing** (CPU): Slice model along X-axis at dense intervals
196
+ - Each slice is a YZ plane intersection → array of line segments
197
+ 3. **Toolpath Generation** (for each rotation angle):
198
+ - Rotate all slice lines around X-axis (CPU)
199
+ - GPU shader traces tool through rotated slices
200
+ - For each X position, ray-cast through corresponding slice to find max Z collision
201
+
202
+ **Advantages:**
203
+ - No rasterization overhead, works directly with geometry
204
+ - CPU/GPU balanced workload
205
+ - Based on proven Kiri:Moto lathePath algorithm
206
+
207
+ **Note:** V4 expects pre-sliced data and is designed for integration with external slicing engines.
208
+
209
+ ### Tracing Mode (Path-Following Toolpath)
210
+
211
+ 1. **Tool Rasterization**: Rasterize tool in planar mode
212
+ 2. **Terrain Rasterization**: Rasterize terrain on XY grid (same as planar mode)
213
+ 3. **Path Sampling**: Sample each input polyline at specified step resolution (e.g., every 0.5mm)
214
+ 4. **Toolpath Generation**:
215
+ - For each sampled point on each path:
216
+ - Convert world coordinates to terrain grid coordinates
217
+ - Test tool collision at that grid position using planar algorithm
218
+ - Calculate maximum collision Z-height
219
+ - Output array of XYZ coordinate arrays (one per input path)
220
+
221
+ **Use Case:** Generate toolpaths that follow pre-defined paths (e.g., outlines, contours) rather than scanning the entire grid.
222
+
113
223
  ## Performance
114
224
 
115
225
  Example (84×84×28mm model, 6,120 triangles):
@@ -148,9 +258,11 @@ build/ # Built files (generated by npm run build)
148
258
  Constructor: `new RasterPath(options)`
149
259
 
150
260
  **Options**:
151
- - `mode` (string): `'planar'` or `'radial'`
261
+ - `mode` (string): `'planar'`, `'radial'`, or `'tracing'`
152
262
  - `resolution` (number): Grid resolution in mm (e.g., 0.1)
153
263
  - `rotationStep` (number, radial only): Degrees between rays (e.g., 1.0)
264
+ - `radialV3` (boolean, radial only): Enable V3 memory-optimized pipeline (default: false)
265
+ - `radialV4` (boolean, radial only): Enable V4 slice-based lathe pipeline (default: false)
154
266
 
155
267
  #### `async init()`
156
268
  Initialize WebGPU worker. Must be called before other methods.
@@ -221,13 +333,22 @@ await raster.loadTerrain({
221
333
 
222
334
  ---
223
335
 
224
- #### `async generateToolpaths({ xStep, yStep, zFloor, onProgress })`
336
+ #### `async generateToolpaths(options)`
225
337
  Generate toolpaths from loaded tool and terrain. Must call `loadTool()` and `loadTerrain()` first.
226
338
 
227
- **Parameters**:
339
+ **Parameters (mode-dependent)**:
340
+
341
+ **Planar and Radial modes:**
228
342
  - `xStep` (number): Sample every Nth point in X direction
229
343
  - `yStep` (number): Sample every Nth point in Y direction
230
344
  - `zFloor` (number): Z floor value for out-of-bounds areas
345
+ - `radiusOffset` (number, radial only): Radial offset in mm
346
+ - `onProgress` (function, optional): Progress callback `(progress: number) => void`
347
+
348
+ **Tracing mode:**
349
+ - `paths` (Array<Float32Array>): Array of input polylines (each as XY coordinate pairs)
350
+ - `step` (number): Sample resolution along paths in world units (e.g., 0.5mm)
351
+ - `zFloor` (number): Z floor value for out-of-bounds areas
231
352
  - `onProgress` (function, optional): Progress callback `(progress: number) => void`
232
353
 
233
354
  **Returns**:
@@ -243,15 +364,30 @@ Generate toolpaths from loaded tool and terrain. Must call `loadTool()` and `loa
243
364
  - `numStrips` (number): Total number of strips
244
365
  - `totalPoints` (number): Sum of all points across strips
245
366
 
246
- **Example**:
367
+ - Tracing mode: `Promise<object>` with:
368
+ - `pathResults` (Array<Float32Array>): Array of XYZ coordinate arrays (one per input path)
369
+ - `totalPoints` (number): Sum of all points across paths
370
+
371
+ **Examples**:
247
372
  ```javascript
248
- // Works for both planar and radial modes!
373
+ // Planar and radial modes
249
374
  const toolpathData = await raster.generateToolpaths({
250
375
  xStep: 5,
251
376
  yStep: 5,
252
377
  zFloor: -100,
253
378
  radiusOffset: 20 // radial mode only
254
379
  });
380
+
381
+ // Tracing mode
382
+ const paths = [
383
+ new Float32Array([x1, y1, x2, y2, ...]),
384
+ new Float32Array([x1, y1, x2, y2, ...])
385
+ ];
386
+ const toolpathData = await raster.generateToolpaths({
387
+ paths: paths,
388
+ step: 0.5, // Sample every 0.5mm
389
+ zFloor: -100
390
+ });
255
391
  ```
256
392
 
257
393
  ---
@@ -112,6 +112,7 @@ export class RasterPath {
112
112
  autoTiling: config.autoTiling ?? true,
113
113
  batchDivisor: config.batchDivisor ?? 1, // For testing batching overhead
114
114
  radialV3: config.radialV3 ?? false, // Use radial V3 pipeline (rotate-filter-toolpath)
115
+ radialV4: config.radialV4 ?? false, // Use radial V4 pipeline (slice-based lathe)
115
116
  debug: config.debug,
116
117
  quiet: config.quiet
117
118
  };
@@ -463,7 +464,12 @@ export class RasterPath {
463
464
  // Set up progress handler if callback provided
464
465
  if (onProgress) {
465
466
  const progressHandler = (data) => {
466
- onProgress(data.percent, { current: data.current, total: data.total, pathIndex: data.pathIndex });
467
+ onProgress(data.percent, {
468
+ current: data.current,
469
+ total: data.total,
470
+ chunkIndex: data.chunkIndex,
471
+ totalChunks: data.totalChunks
472
+ });
467
473
  };
468
474
  this.messageHandlers.set('tracing-progress', progressHandler);
469
475
  }
@@ -532,10 +538,12 @@ export class RasterPath {
532
538
  resolve(data);
533
539
  };
534
540
 
535
- // Send entire pipeline to worker (use V3 if configured)
536
- const messageType = this.config.radialV3
537
- ? 'radial-generate-toolpaths-v3'
538
- : 'radial-generate-toolpaths';
541
+ // Send entire pipeline to worker (use V3 or V4 if configured)
542
+ const messageType = this.config.radialV4
543
+ ? 'radial-generate-toolpaths-v4'
544
+ : this.config.radialV3
545
+ ? 'radial-generate-toolpaths-v3'
546
+ : 'radial-generate-toolpaths';
539
547
 
540
548
  this.#sendMessage(
541
549
  messageType,
@@ -567,7 +575,7 @@ export class RasterPath {
567
575
  const { type, success, data } = e.data;
568
576
 
569
577
  // Handle progress messages (don't delete handler)
570
- if (type === 'rasterize-progress' || type === 'toolpath-progress') {
578
+ if (type === 'rasterize-progress' || type === 'toolpath-progress' || type === 'tracing-progress') {
571
579
  const handler = this.messageHandlers.get(type);
572
580
  if (handler) {
573
581
  handler(data);
@@ -2387,7 +2387,7 @@ async function generateRadialToolpaths({
2387
2387
  if (!strip.positions || strip.positions.length === 0)
2388
2388
  continue;
2389
2389
  if (diagnostic && (globalStripIdx === 0 || globalStripIdx === 360)) {
2390
- debug.log(`JRZ5FG9R | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}\xB0) INPUT terrain first 5 Z values: ${strip.positions.slice(0, 5).map((v) => v.toFixed(3)).join(",")}`);
2390
+ debug.log(`E1ZZPX44 | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}\xB0) INPUT terrain first 5 Z values: ${strip.positions.slice(0, 5).map((v) => v.toFixed(3)).join(",")}`);
2391
2391
  }
2392
2392
  const stripToolpathResult = await runToolpathComputeWithBuffers(
2393
2393
  strip.positions,
@@ -2400,7 +2400,7 @@ async function generateRadialToolpaths({
2400
2400
  pipelineStartTime
2401
2401
  );
2402
2402
  if (diagnostic && (globalStripIdx === 0 || globalStripIdx === 360)) {
2403
- debug.log(`JRZ5FG9R | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}\xB0) OUTPUT toolpath first 5 Z values: ${stripToolpathResult.pathData.slice(0, 5).map((v) => v.toFixed(3)).join(",")}`);
2403
+ debug.log(`E1ZZPX44 | Strip ${globalStripIdx} (${strip.angle.toFixed(1)}\xB0) OUTPUT toolpath first 5 Z values: ${stripToolpathResult.pathData.slice(0, 5).map((v) => v.toFixed(3)).join(",")}`);
2404
2404
  }
2405
2405
  allStripToolpaths.push({
2406
2406
  angle: strip.angle,
@@ -2862,59 +2862,115 @@ async function generateTracingToolpaths({
2862
2862
  usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST | GPUBufferUsage.COPY_SRC
2863
2863
  });
2864
2864
  device.queue.writeBuffer(maxZBuffer, 0, maxZInitData);
2865
- const outputPaths = [];
2865
+ debug.log("PHASE 1: Sampling all paths...");
2866
+ const pathIndex = [];
2867
+ const sampledSegments = [];
2866
2868
  let totalSampledPoints = 0;
2867
2869
  for (let pathIdx = 0; pathIdx < paths.length; pathIdx++) {
2868
- const pathStartTime = performance.now();
2869
2870
  const inputPath = paths[pathIdx];
2870
- debug.log(`Processing path ${pathIdx + 1}/${paths.length}: ${inputPath.length / 2} input vertices`);
2871
+ debug.log(`Path ${pathIdx + 1}/${paths.length}: ${inputPath.length / 2} input vertices`);
2871
2872
  const sampledPath = samplePath(inputPath, step);
2872
- const numSampledPoints = sampledPath.length / 2;
2873
- totalSampledPoints += numSampledPoints;
2874
- debug.log(` Sampled to ${numSampledPoints} points`);
2875
- const firstX = sampledPath[0];
2876
- const firstY = sampledPath[1];
2873
+ const numPoints = sampledPath.length / 2;
2874
+ pathIndex.push({
2875
+ startOffset: totalSampledPoints,
2876
+ endOffset: totalSampledPoints + numPoints,
2877
+ numPoints
2878
+ });
2879
+ sampledSegments.push(sampledPath);
2880
+ totalSampledPoints += numPoints;
2881
+ debug.log(` Sampled to ${numPoints} points`);
2882
+ }
2883
+ const unifiedSampledXY = new Float32Array(totalSampledPoints * 2);
2884
+ let writeOffset = 0;
2885
+ for (let pathIdx = 0; pathIdx < sampledSegments.length; pathIdx++) {
2886
+ const sampledPath = sampledSegments[pathIdx];
2887
+ unifiedSampledXY.set(sampledPath, writeOffset * 2);
2888
+ writeOffset += sampledPath.length / 2;
2889
+ }
2890
+ debug.log(`Unified buffer: ${totalSampledPoints} total points from ${paths.length} paths`);
2891
+ if (totalSampledPoints > 0) {
2892
+ const firstX = unifiedSampledXY[0];
2893
+ const firstY = unifiedSampledXY[1];
2877
2894
  const gridX = (firstX - terrainBounds.min.x) / gridStep;
2878
2895
  const gridY = (firstY - terrainBounds.min.y) / gridStep;
2879
- debug.log(` First point: world(${firstX.toFixed(2)}, ${firstY.toFixed(2)}) -> grid(${gridX.toFixed(2)}, ${gridY.toFixed(2)})`);
2880
- debug.log(` Terrain: ${terrainData.width}x${terrainData.height}, bounds: (${terrainBounds.min.x.toFixed(2)}, ${terrainBounds.min.y.toFixed(2)}) to (${terrainBounds.max.x.toFixed(2)}, ${terrainBounds.max.y.toFixed(2)})`);
2881
- const inputBufferSize = sampledPath.byteLength;
2882
- const outputBufferSize = numSampledPoints * 4;
2883
- const estimatedMemory = inputBufferSize + outputBufferSize;
2884
- const configuredLimit = config.maxGPUMemoryMB * 1024 * 1024;
2885
- const deviceLimit = deviceCapabilities.maxStorageBufferBindingSize;
2886
- const maxSafeSize = Math.min(configuredLimit, deviceLimit) * config.gpuMemorySafetyMargin;
2887
- if (estimatedMemory > maxSafeSize) {
2896
+ debug.log(`First point: world(${firstX.toFixed(2)}, ${firstY.toFixed(2)}) -> grid(${gridX.toFixed(2)}, ${gridY.toFixed(2)})`);
2897
+ }
2898
+ debug.log("PHASE 2: Calculating memory budget and chunking...");
2899
+ const bytesPerPoint = 8 + 4 + 4;
2900
+ const configuredLimit = config.maxGPUMemoryMB * 1024 * 1024;
2901
+ const deviceLimit = deviceCapabilities.maxStorageBufferBindingSize;
2902
+ const maxSafeSize = Math.min(configuredLimit, deviceLimit) * config.gpuMemorySafetyMargin;
2903
+ const fixedOverhead = terrainPositions.byteLength + sparseToolData.count * 16 + paths.length * 4 + 48;
2904
+ if (fixedOverhead > maxSafeSize) {
2905
+ if (shouldCleanupBuffers) {
2888
2906
  terrainBuffer.destroy();
2889
2907
  toolBuffer.destroy();
2890
- throw new Error(
2891
- `Path ${pathIdx + 1} exceeds GPU memory limits: ${(estimatedMemory / 1024 / 1024).toFixed(1)}MB > ${(maxSafeSize / 1024 / 1024).toFixed(1)}MB safe limit. Consider reducing step parameter or splitting path.`
2892
- );
2893
2908
  }
2894
- const inputBuffer = device.createBuffer({
2895
- size: sampledPath.byteLength,
2896
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
2897
- });
2898
- device.queue.writeBuffer(inputBuffer, 0, sampledPath);
2899
- const outputBuffer = device.createBuffer({
2900
- size: outputBufferSize,
2901
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
2909
+ throw new Error(
2910
+ `Fixed buffers (terrain + tool) exceed GPU memory: ${(fixedOverhead / 1024 / 1024).toFixed(1)}MB > ${(maxSafeSize / 1024 / 1024).toFixed(1)}MB. Try reducing terrain resolution or tool density.`
2911
+ );
2912
+ }
2913
+ const availableForPaths = maxSafeSize - fixedOverhead;
2914
+ const maxPointsPerChunkMemory = Math.floor(availableForPaths / bytesPerPoint);
2915
+ const maxWorkgroupsPerDimension = deviceCapabilities.maxComputeWorkgroupsPerDimension || 65535;
2916
+ const threadsPerWorkgroup = 64;
2917
+ const maxPointsPerChunkGPU = maxWorkgroupsPerDimension * threadsPerWorkgroup;
2918
+ const maxPointsPerChunk = Math.min(maxPointsPerChunkMemory, maxPointsPerChunkGPU);
2919
+ debug.log(`Memory budget: ${(maxSafeSize / 1024 / 1024).toFixed(1)}MB safe, ${(availableForPaths / 1024 / 1024).toFixed(1)}MB available for paths`);
2920
+ debug.log(`Memory-based max: ${maxPointsPerChunkMemory.toLocaleString()} points`);
2921
+ debug.log(`GPU dispatch max: ${maxPointsPerChunkGPU.toLocaleString()} points (${maxWorkgroupsPerDimension.toLocaleString()} workgroups)`);
2922
+ debug.log(`Max points per chunk: ${maxPointsPerChunk.toLocaleString()} (limited by ${maxPointsPerChunk === maxPointsPerChunkGPU ? "GPU" : "memory"})`);
2923
+ const chunks = [];
2924
+ let currentStart = 0;
2925
+ while (currentStart < totalSampledPoints) {
2926
+ const currentEnd = Math.min(currentStart + maxPointsPerChunk, totalSampledPoints);
2927
+ chunks.push({
2928
+ startPoint: currentStart,
2929
+ endPoint: currentEnd,
2930
+ numPoints: currentEnd - currentStart
2902
2931
  });
2932
+ currentStart = currentEnd;
2933
+ }
2934
+ debug.log(`Created ${chunks.length} chunk(s) for processing`);
2935
+ debug.log("PHASE 3: Creating reusable GPU buffers...");
2936
+ const inputBuffer = device.createBuffer({
2937
+ size: maxPointsPerChunk * 8,
2938
+ // 2 floats per point
2939
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
2940
+ });
2941
+ const outputBuffer = device.createBuffer({
2942
+ size: maxPointsPerChunk * 4,
2943
+ // 1 float per point
2944
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
2945
+ });
2946
+ const uniformBuffer = device.createBuffer({
2947
+ size: 48,
2948
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
2949
+ });
2950
+ const stagingBuffer = device.createBuffer({
2951
+ size: maxPointsPerChunk * 4,
2952
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
2953
+ });
2954
+ const unifiedOutputZ = new Float32Array(totalSampledPoints);
2955
+ debug.log(`Buffers created for ${maxPointsPerChunk.toLocaleString()} points per chunk`);
2956
+ debug.log("PHASE 4: Processing chunks...");
2957
+ for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
2958
+ const chunk = chunks[chunkIdx];
2959
+ const { startPoint, endPoint, numPoints } = chunk;
2960
+ debug.log(`Processing chunk ${chunkIdx + 1}/${chunks.length}: points ${startPoint}-${endPoint} (${numPoints} points)`);
2961
+ const chunkInputXY = unifiedSampledXY.subarray(startPoint * 2, endPoint * 2);
2962
+ device.queue.writeBuffer(inputBuffer, 0, chunkInputXY);
2903
2963
  const uniformData = new Uint32Array(12);
2904
2964
  uniformData[0] = terrainData.width;
2905
2965
  uniformData[1] = terrainData.height;
2906
2966
  uniformData[2] = sparseToolData.count;
2907
- uniformData[3] = numSampledPoints;
2908
- uniformData[4] = pathIdx;
2967
+ uniformData[3] = numPoints;
2968
+ uniformData[4] = 0;
2909
2969
  const uniformDataFloat = new Float32Array(uniformData.buffer);
2910
2970
  uniformDataFloat[5] = terrainBounds.min.x;
2911
2971
  uniformDataFloat[6] = terrainBounds.min.y;
2912
2972
  uniformDataFloat[7] = gridStep;
2913
2973
  uniformDataFloat[8] = zFloor;
2914
- const uniformBuffer = device.createBuffer({
2915
- size: uniformData.byteLength,
2916
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST
2917
- });
2918
2974
  device.queue.writeBuffer(uniformBuffer, 0, uniformData);
2919
2975
  await device.queue.onSubmittedWorkDone();
2920
2976
  const bindGroup = device.createBindGroup({
@@ -2925,6 +2981,7 @@ async function generateTracingToolpaths({
2925
2981
  { binding: 2, resource: { buffer: inputBuffer } },
2926
2982
  { binding: 3, resource: { buffer: outputBuffer } },
2927
2983
  { binding: 4, resource: { buffer: maxZBuffer } },
2984
+ // Keep for shader compatibility
2928
2985
  { binding: 5, resource: { buffer: uniformBuffer } }
2929
2986
  ]
2930
2987
  });
@@ -2932,60 +2989,60 @@ async function generateTracingToolpaths({
2932
2989
  const passEncoder = commandEncoder.beginComputePass();
2933
2990
  passEncoder.setPipeline(cachedTracingPipeline);
2934
2991
  passEncoder.setBindGroup(0, bindGroup);
2935
- const workgroupsX = Math.ceil(numSampledPoints / 64);
2992
+ const workgroupsX = Math.ceil(numPoints / 64);
2936
2993
  passEncoder.dispatchWorkgroups(workgroupsX);
2937
2994
  passEncoder.end();
2938
- const stagingBuffer = device.createBuffer({
2939
- size: outputBufferSize,
2940
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
2941
- });
2942
- commandEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, outputBufferSize);
2995
+ commandEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, numPoints * 4);
2943
2996
  device.queue.submit([commandEncoder.finish()]);
2944
2997
  await device.queue.onSubmittedWorkDone();
2945
2998
  await stagingBuffer.mapAsync(GPUMapMode.READ);
2946
- const outputDepths = new Float32Array(stagingBuffer.getMappedRange());
2947
- const depthsCopy = new Float32Array(outputDepths);
2999
+ const chunkOutputZ = new Float32Array(stagingBuffer.getMappedRange(), 0, numPoints);
3000
+ unifiedOutputZ.set(chunkOutputZ, startPoint);
2948
3001
  stagingBuffer.unmap();
2949
- const outputXYZ = new Float32Array(numSampledPoints * 3);
2950
- for (let i = 0; i < numSampledPoints; i++) {
2951
- outputXYZ[i * 3 + 0] = sampledPath[i * 2 + 0];
2952
- outputXYZ[i * 3 + 1] = sampledPath[i * 2 + 1];
2953
- outputXYZ[i * 3 + 2] = depthsCopy[i];
2954
- }
2955
- outputPaths.push(outputXYZ);
2956
- inputBuffer.destroy();
2957
- outputBuffer.destroy();
2958
- uniformBuffer.destroy();
2959
- stagingBuffer.destroy();
2960
- const pathTime = performance.now() - pathStartTime;
2961
- debug.log(` Path ${pathIdx + 1} complete: ${numSampledPoints} points in ${pathTime.toFixed(1)}ms`);
3002
+ debug.log(` Chunk ${chunkIdx + 1} complete: ${numPoints} points processed`);
2962
3003
  if (onProgress) {
2963
3004
  onProgress({
2964
3005
  type: "tracing-progress",
2965
3006
  data: {
2966
- percent: Math.round((pathIdx + 1) / paths.length * 100),
2967
- current: pathIdx + 1,
2968
- total: paths.length,
2969
- pathIndex: pathIdx
3007
+ percent: Math.round(endPoint / totalSampledPoints * 100),
3008
+ current: endPoint,
3009
+ total: totalSampledPoints,
3010
+ chunkIndex: chunkIdx + 1,
3011
+ totalChunks: chunks.length
2970
3012
  }
2971
3013
  });
2972
3014
  }
2973
3015
  }
2974
- const maxZStagingBuffer = device.createBuffer({
2975
- size: maxZInitData.byteLength,
2976
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
2977
- });
2978
- const maxZCommandEncoder = device.createCommandEncoder();
2979
- maxZCommandEncoder.copyBufferToBuffer(maxZBuffer, 0, maxZStagingBuffer, 0, maxZInitData.byteLength);
2980
- device.queue.submit([maxZCommandEncoder.finish()]);
2981
- await device.queue.onSubmittedWorkDone();
2982
- await maxZStagingBuffer.mapAsync(GPUMapMode.READ);
2983
- const maxZBitsI32 = new Int32Array(maxZStagingBuffer.getMappedRange());
2984
- const maxZBitsCopy = new Int32Array(maxZBitsI32);
2985
- maxZStagingBuffer.unmap();
2986
- const maxZValues = new Float32Array(maxZBitsCopy.buffer);
3016
+ inputBuffer.destroy();
3017
+ outputBuffer.destroy();
3018
+ uniformBuffer.destroy();
3019
+ stagingBuffer.destroy();
3020
+ debug.log("All chunks processed");
3021
+ debug.log("PHASE 5: Remapping to individual paths and computing maxZ...");
3022
+ const outputPaths = [];
3023
+ const maxZValues = new Array(paths.length).fill(zFloor);
3024
+ for (let pathIdx = 0; pathIdx < pathIndex.length; pathIdx++) {
3025
+ const { startOffset, numPoints } = pathIndex[pathIdx];
3026
+ if (numPoints === 0) {
3027
+ outputPaths.push(new Float32Array(0));
3028
+ debug.log(`Path ${pathIdx + 1}: empty`);
3029
+ continue;
3030
+ }
3031
+ const pathXYZ = new Float32Array(numPoints * 3);
3032
+ for (let i = 0; i < numPoints; i++) {
3033
+ const unifiedIdx = startOffset + i;
3034
+ const x = unifiedSampledXY[unifiedIdx * 2 + 0];
3035
+ const y = unifiedSampledXY[unifiedIdx * 2 + 1];
3036
+ const z = unifiedOutputZ[unifiedIdx];
3037
+ pathXYZ[i * 3 + 0] = x;
3038
+ pathXYZ[i * 3 + 1] = y;
3039
+ pathXYZ[i * 3 + 2] = z;
3040
+ maxZValues[pathIdx] = Math.max(maxZValues[pathIdx], z);
3041
+ }
3042
+ outputPaths.push(pathXYZ);
3043
+ debug.log(`Path ${pathIdx + 1}: ${numPoints} points, maxZ=${maxZValues[pathIdx].toFixed(2)}`);
3044
+ }
2987
3045
  maxZBuffer.destroy();
2988
- maxZStagingBuffer.destroy();
2989
3046
  if (shouldCleanupBuffers) {
2990
3047
  terrainBuffer.destroy();
2991
3048
  toolBuffer.destroy();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@gridspace/raster-path",
3
- "version": "1.0.8",
3
+ "version": "1.1.0",
4
4
  "private": false,
5
5
  "description": "Terrain and Tool Raster Path Finder using WebGPU",
6
6
  "type": "module",
@@ -272,70 +272,172 @@ export async function generateTracingToolpaths({
272
272
  });
273
273
  device.queue.writeBuffer(maxZBuffer, 0, maxZInitData);
274
274
 
275
- // Process each path
276
- const outputPaths = [];
275
+ // ═══════════════════════════════════════════════════════════════════════
276
+ // PHASE 1: Sample all paths and build unified buffer
277
+ // ═══════════════════════════════════════════════════════════════════════
278
+ debug.log('PHASE 1: Sampling all paths...');
279
+ const pathIndex = []; // Maps path ID → unified buffer offsets
280
+ const sampledSegments = [];
277
281
  let totalSampledPoints = 0;
278
282
 
279
283
  for (let pathIdx = 0; pathIdx < paths.length; pathIdx++) {
280
- const pathStartTime = performance.now();
281
284
  const inputPath = paths[pathIdx];
282
-
283
- debug.log(`Processing path ${pathIdx + 1}/${paths.length}: ${inputPath.length / 2} input vertices`);
285
+ debug.log(`Path ${pathIdx + 1}/${paths.length}: ${inputPath.length / 2} input vertices`);
284
286
 
285
287
  // Sample path at specified resolution
286
288
  const sampledPath = samplePath(inputPath, step);
287
- const numSampledPoints = sampledPath.length / 2;
288
- totalSampledPoints += numSampledPoints;
289
+ const numPoints = sampledPath.length / 2;
290
+
291
+ pathIndex.push({
292
+ startOffset: totalSampledPoints,
293
+ endOffset: totalSampledPoints + numPoints,
294
+ numPoints: numPoints
295
+ });
289
296
 
290
- debug.log(` Sampled to ${numSampledPoints} points`);
297
+ sampledSegments.push(sampledPath);
298
+ totalSampledPoints += numPoints;
291
299
 
292
- // Debug: Log first sampled point and its grid coordinates
293
- const firstX = sampledPath[0];
294
- const firstY = sampledPath[1];
300
+ debug.log(` Sampled to ${numPoints} points`);
301
+ }
302
+
303
+ // Concatenate all sampled paths into unified buffer
304
+ const unifiedSampledXY = new Float32Array(totalSampledPoints * 2);
305
+ let writeOffset = 0;
306
+
307
+ for (let pathIdx = 0; pathIdx < sampledSegments.length; pathIdx++) {
308
+ const sampledPath = sampledSegments[pathIdx];
309
+ unifiedSampledXY.set(sampledPath, writeOffset * 2);
310
+ writeOffset += sampledPath.length / 2;
311
+ }
312
+
313
+ debug.log(`Unified buffer: ${totalSampledPoints} total points from ${paths.length} paths`);
314
+
315
+ // Debug: Log first sampled point
316
+ if (totalSampledPoints > 0) {
317
+ const firstX = unifiedSampledXY[0];
318
+ const firstY = unifiedSampledXY[1];
295
319
  const gridX = (firstX - terrainBounds.min.x) / gridStep;
296
320
  const gridY = (firstY - terrainBounds.min.y) / gridStep;
297
- debug.log(` First point: world(${firstX.toFixed(2)}, ${firstY.toFixed(2)}) -> grid(${gridX.toFixed(2)}, ${gridY.toFixed(2)})`);
298
- debug.log(` Terrain: ${terrainData.width}x${terrainData.height}, bounds: (${terrainBounds.min.x.toFixed(2)}, ${terrainBounds.min.y.toFixed(2)}) to (${terrainBounds.max.x.toFixed(2)}, ${terrainBounds.max.y.toFixed(2)})`);
299
-
300
- // Check GPU memory limits
301
- const inputBufferSize = sampledPath.byteLength;
302
- const outputBufferSize = numSampledPoints * 4; // 4 bytes per float (Z only)
303
- const estimatedMemory = inputBufferSize + outputBufferSize;
304
- const configuredLimit = config.maxGPUMemoryMB * 1024 * 1024;
305
- const deviceLimit = deviceCapabilities.maxStorageBufferBindingSize;
306
- const maxSafeSize = Math.min(configuredLimit, deviceLimit) * config.gpuMemorySafetyMargin;
307
-
308
- if (estimatedMemory > maxSafeSize) {
321
+ debug.log(`First point: world(${firstX.toFixed(2)}, ${firstY.toFixed(2)}) -> grid(${gridX.toFixed(2)}, ${gridY.toFixed(2)})`);
322
+ }
323
+
324
+ // ═══════════════════════════════════════════════════════════════════════
325
+ // PHASE 2: Calculate memory budget and create chunks
326
+ // ═══════════════════════════════════════════════════════════════════════
327
+ debug.log('PHASE 2: Calculating memory budget and chunking...');
328
+ const bytesPerPoint = 8 + 4 + 4; // XY input (8) + Z output (4) + Z staging (4)
329
+ const configuredLimit = config.maxGPUMemoryMB * 1024 * 1024;
330
+ const deviceLimit = deviceCapabilities.maxStorageBufferBindingSize;
331
+ const maxSafeSize = Math.min(configuredLimit, deviceLimit) * config.gpuMemorySafetyMargin;
332
+
333
+ // Fixed overhead: terrain, tool, maxZ, uniforms
334
+ const fixedOverhead = terrainPositions.byteLength +
335
+ (sparseToolData.count * 16) +
336
+ (paths.length * 4) +
337
+ 48;
338
+
339
+ if (fixedOverhead > maxSafeSize) {
340
+ if (shouldCleanupBuffers) {
309
341
  terrainBuffer.destroy();
310
342
  toolBuffer.destroy();
311
- throw new Error(
312
- `Path ${pathIdx + 1} exceeds GPU memory limits: ` +
313
- `${(estimatedMemory / 1024 / 1024).toFixed(1)}MB > ` +
314
- `${(maxSafeSize / 1024 / 1024).toFixed(1)}MB safe limit. ` +
315
- `Consider reducing step parameter or splitting path.`
316
- );
317
343
  }
344
+ throw new Error(
345
+ `Fixed buffers (terrain + tool) exceed GPU memory: ` +
346
+ `${(fixedOverhead / 1024 / 1024).toFixed(1)}MB > ` +
347
+ `${(maxSafeSize / 1024 / 1024).toFixed(1)}MB. ` +
348
+ `Try reducing terrain resolution or tool density.`
349
+ );
350
+ }
318
351
 
319
- // Create GPU buffers for this path
320
- const inputBuffer = device.createBuffer({
321
- size: sampledPath.byteLength,
322
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
352
+ const availableForPaths = maxSafeSize - fixedOverhead;
353
+ const maxPointsPerChunkMemory = Math.floor(availableForPaths / bytesPerPoint);
354
+
355
+ // GPU dispatch limit: 65535 workgroups per dimension, 64 threads per workgroup
356
+ const maxWorkgroupsPerDimension = deviceCapabilities.maxComputeWorkgroupsPerDimension || 65535;
357
+ const threadsPerWorkgroup = 64;
358
+ const maxPointsPerChunkGPU = maxWorkgroupsPerDimension * threadsPerWorkgroup;
359
+
360
+ // Use the smaller of memory limit and GPU dispatch limit
361
+ const maxPointsPerChunk = Math.min(maxPointsPerChunkMemory, maxPointsPerChunkGPU);
362
+
363
+ debug.log(`Memory budget: ${(maxSafeSize / 1024 / 1024).toFixed(1)}MB safe, ${(availableForPaths / 1024 / 1024).toFixed(1)}MB available for paths`);
364
+ debug.log(`Memory-based max: ${maxPointsPerChunkMemory.toLocaleString()} points`);
365
+ debug.log(`GPU dispatch max: ${maxPointsPerChunkGPU.toLocaleString()} points (${maxWorkgroupsPerDimension.toLocaleString()} workgroups)`);
366
+ debug.log(`Max points per chunk: ${maxPointsPerChunk.toLocaleString()} (limited by ${maxPointsPerChunk === maxPointsPerChunkGPU ? 'GPU' : 'memory'})`);
367
+
368
+ // Create chunks
369
+ const chunks = [];
370
+ let currentStart = 0;
371
+ while (currentStart < totalSampledPoints) {
372
+ const currentEnd = Math.min(currentStart + maxPointsPerChunk, totalSampledPoints);
373
+ chunks.push({
374
+ startPoint: currentStart,
375
+ endPoint: currentEnd,
376
+ numPoints: currentEnd - currentStart
323
377
  });
324
- device.queue.writeBuffer(inputBuffer, 0, sampledPath);
378
+ currentStart = currentEnd;
379
+ }
325
380
 
326
- const outputBuffer = device.createBuffer({
327
- size: outputBufferSize,
328
- usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
329
- });
381
+ debug.log(`Created ${chunks.length} chunk(s) for processing`);
382
+
383
+ // ═══════════════════════════════════════════════════════════════════════
384
+ // PHASE 3: Create reusable GPU buffers (buffer pool pattern)
385
+ // ═══════════════════════════════════════════════════════════════════════
386
+ debug.log('PHASE 3: Creating reusable GPU buffers...');
387
+
388
+ // Input buffer: XY pairs for sampled points
389
+ const inputBuffer = device.createBuffer({
390
+ size: maxPointsPerChunk * 8, // 2 floats per point
391
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
392
+ });
393
+
394
+ // Output buffer: Z depths
395
+ const outputBuffer = device.createBuffer({
396
+ size: maxPointsPerChunk * 4, // 1 float per point
397
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC,
398
+ });
399
+
400
+ // Uniform buffer
401
+ const uniformBuffer = device.createBuffer({
402
+ size: 48,
403
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
404
+ });
405
+
406
+ // Staging buffer for readback
407
+ const stagingBuffer = device.createBuffer({
408
+ size: maxPointsPerChunk * 4,
409
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
410
+ });
411
+
412
+ // Unified output array (filled chunk-by-chunk)
413
+ const unifiedOutputZ = new Float32Array(totalSampledPoints);
414
+
415
+ debug.log(`Buffers created for ${maxPointsPerChunk.toLocaleString()} points per chunk`);
330
416
 
331
- // Create uniforms (aligned to match shader struct)
332
- // Struct: 5 u32s + 4 f32s = 36 bytes, padded to 48 bytes for alignment
417
+ // ═══════════════════════════════════════════════════════════════════════
418
+ // PHASE 4: Process each chunk with single GPU dispatch
419
+ // ═══════════════════════════════════════════════════════════════════════
420
+ debug.log('PHASE 4: Processing chunks...');
421
+
422
+ for (let chunkIdx = 0; chunkIdx < chunks.length; chunkIdx++) {
423
+ const chunk = chunks[chunkIdx];
424
+ const { startPoint, endPoint, numPoints } = chunk;
425
+
426
+ debug.log(`Processing chunk ${chunkIdx + 1}/${chunks.length}: points ${startPoint}-${endPoint} (${numPoints} points)`);
427
+
428
+ // Extract chunk slice from unified buffer
429
+ const chunkInputXY = unifiedSampledXY.subarray(startPoint * 2, endPoint * 2);
430
+
431
+ // Upload to GPU (reuse same buffers)
432
+ device.queue.writeBuffer(inputBuffer, 0, chunkInputXY);
433
+
434
+ // Update uniforms for this chunk
333
435
  const uniformData = new Uint32Array(12); // 48 bytes
334
436
  uniformData[0] = terrainData.width;
335
437
  uniformData[1] = terrainData.height;
336
438
  uniformData[2] = sparseToolData.count;
337
- uniformData[3] = numSampledPoints;
338
- uniformData[4] = pathIdx; // path_index for maxZ buffer indexing
439
+ uniformData[3] = numPoints; // point_count for THIS CHUNK
440
+ uniformData[4] = 0; // path_index (unused, maxZ computed on CPU)
339
441
 
340
442
  const uniformDataFloat = new Float32Array(uniformData.buffer);
341
443
  uniformDataFloat[5] = terrainBounds.min.x;
@@ -343,16 +445,12 @@ export async function generateTracingToolpaths({
343
445
  uniformDataFloat[7] = gridStep;
344
446
  uniformDataFloat[8] = zFloor;
345
447
 
346
- const uniformBuffer = device.createBuffer({
347
- size: uniformData.byteLength,
348
- usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
349
- });
350
448
  device.queue.writeBuffer(uniformBuffer, 0, uniformData);
351
449
 
352
- // Wait for buffer uploads
450
+ // Wait for uploads
353
451
  await device.queue.onSubmittedWorkDone();
354
452
 
355
- // Create bind group
453
+ // Create bind group (same bindings as before)
356
454
  const bindGroup = device.createBindGroup({
357
455
  layout: cachedTracingPipeline.getBindGroupLayout(0),
358
456
  entries: [
@@ -360,28 +458,23 @@ export async function generateTracingToolpaths({
360
458
  { binding: 1, resource: { buffer: toolBuffer } },
361
459
  { binding: 2, resource: { buffer: inputBuffer } },
362
460
  { binding: 3, resource: { buffer: outputBuffer } },
363
- { binding: 4, resource: { buffer: maxZBuffer } },
461
+ { binding: 4, resource: { buffer: maxZBuffer } }, // Keep for shader compatibility
364
462
  { binding: 5, resource: { buffer: uniformBuffer } },
365
463
  ],
366
464
  });
367
465
 
368
- // Dispatch compute shader
466
+ // Single GPU dispatch for entire chunk
369
467
  const commandEncoder = device.createCommandEncoder();
370
468
  const passEncoder = commandEncoder.beginComputePass();
371
469
  passEncoder.setPipeline(cachedTracingPipeline);
372
470
  passEncoder.setBindGroup(0, bindGroup);
373
471
 
374
- const workgroupsX = Math.ceil(numSampledPoints / 64);
472
+ const workgroupsX = Math.ceil(numPoints / 64);
375
473
  passEncoder.dispatchWorkgroups(workgroupsX);
376
474
  passEncoder.end();
377
475
 
378
476
  // Copy output to staging buffer
379
- const stagingBuffer = device.createBuffer({
380
- size: outputBufferSize,
381
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
382
- });
383
-
384
- commandEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, outputBufferSize);
477
+ commandEncoder.copyBufferToBuffer(outputBuffer, 0, stagingBuffer, 0, numPoints * 4);
385
478
  device.queue.submit([commandEncoder.finish()]);
386
479
 
387
480
  // Wait for GPU to finish
@@ -389,65 +482,79 @@ export async function generateTracingToolpaths({
389
482
 
390
483
  // Read back results
391
484
  await stagingBuffer.mapAsync(GPUMapMode.READ);
392
- const outputDepths = new Float32Array(stagingBuffer.getMappedRange());
393
- const depthsCopy = new Float32Array(outputDepths);
394
- stagingBuffer.unmap();
395
-
396
- // Build XYZ output array
397
- const outputXYZ = new Float32Array(numSampledPoints * 3);
398
- for (let i = 0; i < numSampledPoints; i++) {
399
- outputXYZ[i * 3 + 0] = sampledPath[i * 2 + 0]; // X
400
- outputXYZ[i * 3 + 1] = sampledPath[i * 2 + 1]; // Y
401
- outputXYZ[i * 3 + 2] = depthsCopy[i]; // Z
402
- }
485
+ const chunkOutputZ = new Float32Array(stagingBuffer.getMappedRange(), 0, numPoints);
403
486
 
404
- outputPaths.push(outputXYZ);
487
+ // Copy to unified output array
488
+ unifiedOutputZ.set(chunkOutputZ, startPoint);
405
489
 
406
- // Cleanup path-specific buffers
407
- inputBuffer.destroy();
408
- outputBuffer.destroy();
409
- uniformBuffer.destroy();
410
- stagingBuffer.destroy();
490
+ stagingBuffer.unmap();
411
491
 
412
- const pathTime = performance.now() - pathStartTime;
413
- debug.log(` Path ${pathIdx + 1} complete: ${numSampledPoints} points in ${pathTime.toFixed(1)}ms`);
492
+ debug.log(` Chunk ${chunkIdx + 1} complete: ${numPoints} points processed`);
414
493
 
415
- // Report progress
494
+ // Report progress (point-based, not path-based)
416
495
  if (onProgress) {
417
496
  onProgress({
418
497
  type: 'tracing-progress',
419
498
  data: {
420
- percent: Math.round(((pathIdx + 1) / paths.length) * 100),
421
- current: pathIdx + 1,
422
- total: paths.length,
423
- pathIndex: pathIdx
499
+ percent: Math.round((endPoint / totalSampledPoints) * 100),
500
+ current: endPoint,
501
+ total: totalSampledPoints,
502
+ chunkIndex: chunkIdx + 1,
503
+ totalChunks: chunks.length
424
504
  }
425
505
  });
426
506
  }
427
507
  }
428
508
 
429
- // Read back maxZ buffer
430
- const maxZStagingBuffer = device.createBuffer({
431
- size: maxZInitData.byteLength,
432
- usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
433
- });
509
+ // Cleanup reusable buffers
510
+ inputBuffer.destroy();
511
+ outputBuffer.destroy();
512
+ uniformBuffer.destroy();
513
+ stagingBuffer.destroy();
514
+
515
+ debug.log('All chunks processed');
516
+
517
+ // ═══════════════════════════════════════════════════════════════════════
518
+ // PHASE 5: Remap unified output back to individual paths & compute maxZ
519
+ // ═══════════════════════════════════════════════════════════════════════
520
+ debug.log('PHASE 5: Remapping to individual paths and computing maxZ...');
521
+
522
+ const outputPaths = [];
523
+ const maxZValues = new Array(paths.length).fill(zFloor);
524
+
525
+ for (let pathIdx = 0; pathIdx < pathIndex.length; pathIdx++) {
526
+ const { startOffset, numPoints } = pathIndex[pathIdx];
527
+
528
+ if (numPoints === 0) {
529
+ outputPaths.push(new Float32Array(0)); // Empty path
530
+ debug.log(`Path ${pathIdx + 1}: empty`);
531
+ continue;
532
+ }
533
+
534
+ // Allocate XYZ output
535
+ const pathXYZ = new Float32Array(numPoints * 3);
434
536
 
435
- const maxZCommandEncoder = device.createCommandEncoder();
436
- maxZCommandEncoder.copyBufferToBuffer(maxZBuffer, 0, maxZStagingBuffer, 0, maxZInitData.byteLength);
437
- device.queue.submit([maxZCommandEncoder.finish()]);
438
- await device.queue.onSubmittedWorkDone();
537
+ // Copy from unified buffers + compute maxZ
538
+ for (let i = 0; i < numPoints; i++) {
539
+ const unifiedIdx = startOffset + i;
540
+ const x = unifiedSampledXY[unifiedIdx * 2 + 0];
541
+ const y = unifiedSampledXY[unifiedIdx * 2 + 1];
542
+ const z = unifiedOutputZ[unifiedIdx];
439
543
 
440
- await maxZStagingBuffer.mapAsync(GPUMapMode.READ);
441
- const maxZBitsI32 = new Int32Array(maxZStagingBuffer.getMappedRange());
442
- const maxZBitsCopy = new Int32Array(maxZBitsI32);
443
- maxZStagingBuffer.unmap();
544
+ pathXYZ[i * 3 + 0] = x;
545
+ pathXYZ[i * 3 + 1] = y;
546
+ pathXYZ[i * 3 + 2] = z;
444
547
 
445
- // Convert i32 bits back to f32 values
446
- const maxZValues = new Float32Array(maxZBitsCopy.buffer);
548
+ // Track max Z for this path (CPU-side)
549
+ maxZValues[pathIdx] = Math.max(maxZValues[pathIdx], z);
550
+ }
447
551
 
448
- // Cleanup buffers
552
+ outputPaths.push(pathXYZ);
553
+ debug.log(`Path ${pathIdx + 1}: ${numPoints} points, maxZ=${maxZValues[pathIdx].toFixed(2)}`);
554
+ }
555
+
556
+ // Cleanup maxZ buffer (was only used for shader compatibility)
449
557
  maxZBuffer.destroy();
450
- maxZStagingBuffer.destroy();
451
558
 
452
559
  // Cleanup temporary buffers only (don't destroy cached buffers)
453
560
  if (shouldCleanupBuffers) {
@@ -468,25 +575,19 @@ export async function generateTracingToolpaths({
468
575
  }
469
576
 
470
577
  /**
471
- * TODO: Batched path processing
578
+ * IMPLEMENTATION NOTE: Unified Batching System
472
579
  *
473
- * OPTIMIZATION OPPORTUNITY:
474
- * Currently processes one path at a time. For better GPU utilization:
580
+ * This function uses a unified batching approach for optimal performance:
475
581
  *
476
- * 1. Concatenate all sampled paths into single input buffer
477
- * 2. Create offset table: [path1Start, path1End, path2Start, path2End, ...]
478
- * 3. Single GPU dispatch processes all paths
479
- * 4. Split output buffer back into individual path arrays
582
+ * 1. All paths are sampled and concatenated into a single unified buffer
583
+ * 2. Paths are chunked based on GPU memory limits (handles giant paths)
584
+ * 3. Each chunk is processed with a single GPU dispatch (reduces overhead)
585
+ * 4. Output is remapped back to individual path arrays
586
+ * 5. MaxZ is computed on CPU (avoids complex GPU atomic coordination)
480
587
  *
481
588
  * BENEFITS:
482
- * - Reduce GPU dispatch overhead (N dispatches 1 dispatch)
483
- * - Better GPU occupancy (more threads active)
484
- * - Fewer buffer create/destroy cycles
485
- *
486
- * COMPLEXITY:
487
- * - Need offset management in shader or CPU-side splitting
488
- * - Memory limit checking becomes more complex
489
- * - Progress reporting granularity reduced (can still report workgroup completion)
490
- *
491
- * ESTIMATE: 2-5x speedup for many small paths, minimal benefit for few large paths
589
+ * - Handles paths that exceed GPU memory limits (automatic chunking)
590
+ * - Reduces GPU dispatch overhead (10-100x for many small paths)
591
+ * - Better progress tracking (point-based instead of path-based)
592
+ * - Buffer pool pattern reduces allocation overhead
492
593
  */
@@ -112,6 +112,7 @@ export class RasterPath {
112
112
  autoTiling: config.autoTiling ?? true,
113
113
  batchDivisor: config.batchDivisor ?? 1, // For testing batching overhead
114
114
  radialV3: config.radialV3 ?? false, // Use radial V3 pipeline (rotate-filter-toolpath)
115
+ radialV4: config.radialV4 ?? false, // Use radial V4 pipeline (slice-based lathe)
115
116
  debug: config.debug,
116
117
  quiet: config.quiet
117
118
  };
@@ -463,7 +464,12 @@ export class RasterPath {
463
464
  // Set up progress handler if callback provided
464
465
  if (onProgress) {
465
466
  const progressHandler = (data) => {
466
- onProgress(data.percent, { current: data.current, total: data.total, pathIndex: data.pathIndex });
467
+ onProgress(data.percent, {
468
+ current: data.current,
469
+ total: data.total,
470
+ chunkIndex: data.chunkIndex,
471
+ totalChunks: data.totalChunks
472
+ });
467
473
  };
468
474
  this.messageHandlers.set('tracing-progress', progressHandler);
469
475
  }
@@ -532,10 +538,12 @@ export class RasterPath {
532
538
  resolve(data);
533
539
  };
534
540
 
535
- // Send entire pipeline to worker (use V3 if configured)
536
- const messageType = this.config.radialV3
537
- ? 'radial-generate-toolpaths-v3'
538
- : 'radial-generate-toolpaths';
541
+ // Send entire pipeline to worker (use V3 or V4 if configured)
542
+ const messageType = this.config.radialV4
543
+ ? 'radial-generate-toolpaths-v4'
544
+ : this.config.radialV3
545
+ ? 'radial-generate-toolpaths-v3'
546
+ : 'radial-generate-toolpaths';
539
547
 
540
548
  this.#sendMessage(
541
549
  messageType,
@@ -567,7 +575,7 @@ export class RasterPath {
567
575
  const { type, success, data } = e.data;
568
576
 
569
577
  // Handle progress messages (don't delete handler)
570
- if (type === 'rasterize-progress' || type === 'toolpath-progress') {
578
+ if (type === 'rasterize-progress' || type === 'toolpath-progress' || type === 'tracing-progress') {
571
579
  const handler = this.messageHandlers.get(type);
572
580
  if (handler) {
573
581
  handler(data);