@gridspace/raster-path 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,104 @@
1
+ /**
2
+ * ═══════════════════════════════════════════════════════════════════════════
3
+ * Raster Tool - Sparse Tool Representation
4
+ * ═══════════════════════════════════════════════════════════════════════════
5
+ *
6
+ * Utilities for creating sparse tool representations from rasterized tool data.
7
+ * Converts dense grid data into compact offset-based format for GPU toolpath
8
+ * generation.
9
+ *
10
+ * EXPORTS:
11
+ * ────────
12
+ * Functions:
13
+ * - createSparseToolFromPoints(points) - Convert tool points to sparse format
14
+ *
15
+ * DATA FORMATS:
16
+ * ─────────────
17
+ * Input (Dense Grid):
18
+ * - Float32Array of [gridX, gridY, Z, gridX, gridY, Z, ...]
19
+ * - gridX, gridY are integer grid indices (not world coordinates)
20
+ * - Z is height in world units (mm)
21
+ *
22
+ * Output (Sparse Tool):
23
+ * {
24
+ * count: number, // Number of tool points
25
+ * xOffsets: Int32Array, // X offset from tool center (grid cells)
26
+ * yOffsets: Int32Array, // Y offset from tool center (grid cells)
27
+ * zValues: Float32Array, // Z height (mm)
28
+ * referenceZ: number // Tool tip Z (lowest point)
29
+ * }
30
+ *
31
+ * ALGORITHM:
32
+ * ──────────
33
+ * 1. Find bounding box in grid space (integer coordinates)
34
+ * 2. Calculate tool center (grid coordinates)
35
+ * 3. Convert each point to offset from center
36
+ * 4. Store as parallel arrays for GPU consumption
37
+ *
38
+ * This sparse representation reduces memory usage and improves GPU cache
39
+ * coherency during toolpath generation, as the tool can be "stamped" at
40
+ * each position using simple offset arithmetic.
41
+ *
42
+ * ═══════════════════════════════════════════════════════════════════════════
43
+ */
44
+
45
+ // Create sparse tool representation from rasterized points
46
+ // Points come from GPU as [gridX, gridY, Z] - pure integer grid coordinates for X/Y
47
+ export function createSparseToolFromPoints(points) {
48
+ if (!points || points.length === 0) {
49
+ throw new Error('No tool points provided');
50
+ }
51
+
52
+ // Points are [gridX, gridY, Z] where gridX/gridY are grid indices (floats but integer values)
53
+ // Find bounds in grid space and tool tip Z
54
+ let minGridX = Infinity, minGridY = Infinity, minZ = Infinity;
55
+ let maxGridX = -Infinity, maxGridY = -Infinity;
56
+
57
+ for (let i = 0; i < points.length; i += 3) {
58
+ const gridX = points[i]; // Already a grid index
59
+ const gridY = points[i + 1]; // Already a grid index
60
+ const z = points[i + 2];
61
+
62
+ minGridX = Math.min(minGridX, gridX);
63
+ maxGridX = Math.max(maxGridX, gridX);
64
+ minGridY = Math.min(minGridY, gridY);
65
+ maxGridY = Math.max(maxGridY, gridY);
66
+ minZ = Math.min(minZ, z);
67
+ }
68
+
69
+ // Calculate tool center in grid coordinates (pure integer)
70
+ const width = Math.floor(maxGridX - minGridX) + 1;
71
+ const height = Math.floor(maxGridY - minGridY) + 1;
72
+ const centerX = Math.floor(minGridX) + Math.floor(width / 2);
73
+ const centerY = Math.floor(minGridY) + Math.floor(height / 2);
74
+
75
+ // Convert each point to offset from center (integer arithmetic only)
76
+ const xOffsets = [];
77
+ const yOffsets = [];
78
+ const zValues = [];
79
+
80
+ for (let i = 0; i < points.length; i += 3) {
81
+ const gridX = Math.floor(points[i]); // Grid index (ensure integer)
82
+ const gridY = Math.floor(points[i + 1]); // Grid index (ensure integer)
83
+ const z = points[i + 2];
84
+
85
+ // Calculate offset from tool center (pure integer arithmetic)
86
+ const xOffset = gridX - centerX;
87
+ const yOffset = gridY - centerY;
88
+ // Z relative to tool tip: tip=0, points above tip are positive
89
+ // minZ is the lowest Z (tip), so z - minZ gives positive offsets upward
90
+ const zValue = z;// - minZ;
91
+
92
+ xOffsets.push(xOffset);
93
+ yOffsets.push(yOffset);
94
+ zValues.push(zValue);
95
+ }
96
+
97
+ return {
98
+ count: xOffsets.length,
99
+ xOffsets: new Int32Array(xOffsets),
100
+ yOffsets: new Int32Array(yOffsets),
101
+ zValues: new Float32Array(zValues),
102
+ referenceZ: minZ
103
+ };
104
+ }
@@ -0,0 +1,152 @@
1
+ /**
2
+ * ═══════════════════════════════════════════════════════════════════════════
3
+ * WebGPU Worker - GPU Compute Operations
4
+ * ═══════════════════════════════════════════════════════════════════════════
5
+ *
6
+ * Offloads all WebGPU compute operations to a worker thread to prevent UI blocking.
7
+ * Handles both planar (XY grid) and radial (cylindrical) rasterization modes.
8
+ *
9
+ * MESSAGE PROTOCOL:
10
+ * ─────────────────
11
+ * Main Thread → Worker:
12
+ * 'init' - Initialize WebGPU device
13
+ * 'rasterize-planar' - Rasterize geometry to XY grid
14
+ * 'generate-toolpath-planar' - Generate planar toolpath from rasters
15
+ * 'radial-generate-toolpaths'- Generate radial toolpaths (does rasterization + toolpath)
16
+ * 'calibrate' - Run GPU workload calibration
17
+ *
18
+ * Worker → Main Thread:
19
+ * 'webgpu-ready' - Initialization complete
20
+ * 'rasterize-complete' - Planar rasterization complete
21
+ * 'rasterize-progress' - Progress update (0-1)
22
+ * 'toolpath-complete' - Planar toolpath complete
23
+ * 'toolpath-progress' - Progress update (0-1)
24
+ * 'radial-toolpaths-complete' - Radial toolpaths complete
25
+ * 'calibrate-complete' - Calibration results
26
+ *
27
+ * ARCHITECTURE:
28
+ * ─────────────
29
+ * 1. PLANAR MODE:
30
+ * - Rasterize terrain: XY grid, keep max Z per cell
31
+ * - Rasterize tool: XY grid, keep min Z per cell
32
+ * - Generate toolpath: Scan tool over terrain, compute Z-heights
33
+ *
34
+ * 2. RADIAL MODE:
35
+ * - Batched processing: 360 angles per batch
36
+ * - X-bucketing: Spatial partitioning to reduce triangle tests
37
+ * - For each angle:
38
+ * * Cast ray from origin
39
+ * * Rasterize terrain triangles along ray
40
+ * * Calculate tool-terrain collision
41
+ * * Output Z-heights along X-axis
42
+ *
43
+ * MEMORY MANAGEMENT:
44
+ * ──────────────────
45
+ * - All GPU buffers are preallocated to known maximum sizes
46
+ * - Triangle data transferred once per operation
47
+ * - Output buffers mapped asynchronously to avoid blocking
48
+ * - Worker maintains pipeline cache to avoid recompilation
49
+ *
50
+ * ═══════════════════════════════════════════════════════════════════════════
51
+ */
52
+
53
+ import { initWebGPU, setConfig, updateConfig, deviceCapabilities, debug, device } from './raster-config.js';
54
+ import { rasterizeMesh } from './raster-planar.js';
55
+ import { generateToolpath } from './path-planar.js';
56
+ import { generateRadialToolpaths } from './path-radial.js';
57
+ import { calibrateGPU } from './workload-calibrate.js';
58
+
59
+ // Global error handler for uncaught errors in worker
60
+ self.addEventListener('error', (event) => {
61
+ debug.error('Uncaught error:', event.error || event.message);
62
+ debug.error('Stack:', event.error?.stack);
63
+ });
64
+
65
+ self.addEventListener('unhandledrejection', (event) => {
66
+ debug.error('Unhandled promise rejection:', event.reason);
67
+ });
68
+
69
+ // Handle messages from main thread
70
+ self.onmessage = async function(e) {
71
+ const { type, data } = e.data;
72
+
73
+ try {
74
+ switch (type) {
75
+ case 'init':
76
+ // Store config
77
+ setConfig(data?.config || {
78
+ maxGPUMemoryMB: 256,
79
+ gpuMemorySafetyMargin: 0.8,
80
+ tileOverlapMM: 10,
81
+ autoTiling: true,
82
+ batchDivisor: 1, // For testing batching overhead: 1=optimal, 2=2x batches, 4=4x batches, etc.
83
+ maxConcurrentThreads: 32768 // GPU watchdog limit: max threads across all workgroups in a dispatch
84
+ });
85
+ const success = await initWebGPU();
86
+ self.postMessage({
87
+ type: 'webgpu-ready',
88
+ data: {
89
+ success,
90
+ capabilities: deviceCapabilities
91
+ }
92
+ });
93
+ break;
94
+
95
+ case 'update-config':
96
+ updateConfig(data.config);
97
+ debug.log('Config updated');
98
+ break;
99
+
100
+ case 'rasterize':
101
+ const { triangles, stepSize, filterMode, boundsOverride } = data;
102
+ const rasterOptions = boundsOverride || {};
103
+ const rasterResult = await rasterizeMesh(triangles, stepSize, filterMode, rasterOptions);
104
+ self.postMessage({
105
+ type: 'rasterize-complete',
106
+ data: rasterResult,
107
+ }, [rasterResult.positions.buffer]);
108
+ break;
109
+
110
+ case 'generate-toolpath':
111
+ const { terrainPositions, toolPositions, xStep, yStep, zFloor, gridStep, terrainBounds, singleScanline } = data;
112
+ const toolpathResult = await generateToolpath(
113
+ terrainPositions, toolPositions, xStep, yStep, zFloor, gridStep, terrainBounds, singleScanline
114
+ );
115
+ self.postMessage({
116
+ type: 'toolpath-complete',
117
+ data: toolpathResult
118
+ }, [toolpathResult.pathData.buffer]);
119
+ break;
120
+
121
+ case 'radial-generate-toolpaths':
122
+ const radialToolpathResult = await generateRadialToolpaths(data);
123
+ const toolpathTransferBuffers = radialToolpathResult.strips.map(strip => strip.pathData.buffer);
124
+ self.postMessage({
125
+ type: 'radial-toolpaths-complete',
126
+ data: radialToolpathResult
127
+ }, toolpathTransferBuffers);
128
+ break;
129
+
130
+ case 'calibrate':
131
+ const calibrationResult = await calibrateGPU(device, data?.options || {});
132
+ self.postMessage({
133
+ type: 'calibrate-complete',
134
+ data: calibrationResult
135
+ });
136
+ break;
137
+
138
+ default:
139
+ self.postMessage({
140
+ type: 'error',
141
+ message: 'Unknown message type: ' + type
142
+ });
143
+ }
144
+ } catch (error) {
145
+ debug.error('Error:', error);
146
+ self.postMessage({
147
+ type: 'error',
148
+ message: error.message,
149
+ stack: error.stack
150
+ });
151
+ }
152
+ };
@@ -0,0 +1,416 @@
1
+ /**
2
+ * ═══════════════════════════════════════════════════════════════════════════
3
+ * Workload Calibrate - GPU Watchdog Limit Detection
4
+ * ═══════════════════════════════════════════════════════════════════════════
5
+ *
6
+ * Calibrates GPU capabilities by testing workgroup configurations until
7
+ * watchdog kills are detected. Uses actual ray-triangle intersection code
8
+ * to simulate real workload characteristics.
9
+ *
10
+ * EXPORTS:
11
+ * ────────
12
+ * Functions:
13
+ * - calibrateGPU(device, options) - Run calibration and return limits
14
+ *
15
+ * DETECTION STRATEGY:
16
+ * ───────────────────
17
+ * Problem: Watchdog kills are SILENT - threads just stop executing
18
+ * Solution: Initialize output buffer to zeros, each thread writes 1 on completion
19
+ * Detection: Any zeros remaining = that thread was killed by watchdog
20
+ *
21
+ * TEST PARAMETERS:
22
+ * ────────────────
23
+ * 1. Workgroup dimensions (x, y, z)
24
+ * - Start: 16x16x1 (256 threads)
25
+ * - Increase: 32x32x1, 64x64x1, etc.
26
+ *
27
+ * 2. Work intensity (triangle_tests)
28
+ * - Start: 1000 intersection tests per thread
29
+ * - Increase: 2000, 5000, 10000, etc.
30
+ *
31
+ * BINARY SEARCH:
32
+ * ──────────────
33
+ * For each workgroup size:
34
+ * 1. Start with low work intensity (known to pass)
35
+ * 2. Binary search for max intensity before watchdog kill
36
+ * 3. Record (workgroup_size, max_intensity) pair
37
+ *
38
+ * OUTPUT:
39
+ * ───────
40
+ * {
41
+ * maxWorkgroupSize: { x: 64, y: 64, z: 1 }, // Largest safe config
42
+ * maxWorkPerThread: 50000, // Max intersection tests
43
+ * safeWorkloadMatrix: [ // Safe configs tested
44
+ * { workgroupSize: [16,16,1], maxWork: 100000, timingMs: 45 },
45
+ * { workgroupSize: [32,32,1], maxWork: 50000, timingMs: 123 },
46
+ * ...
47
+ * ],
48
+ * deviceInfo: {
49
+ * maxComputeWorkgroupSizeX: 256,
50
+ * maxComputeWorkgroupsPerDimension: 65535,
51
+ * ...
52
+ * }
53
+ * }
54
+ *
55
+ * USAGE:
56
+ * ──────
57
+ * const limits = await calibrateGPU(device, {
58
+ * minWorkgroupSize: [8, 8, 1],
59
+ * maxWorkgroupSize: [64, 64, 1],
60
+ * minWork: 1000,
61
+ * maxWork: 100000,
62
+ * });
63
+ *
64
+ * ═══════════════════════════════════════════════════════════════════════════
65
+ */
66
+
67
+ const calibrateShaderCode = 'SHADER:workload-calibrate';
68
+
69
+ // Test multiple workgroup dispatches (simulates real-world usage)
70
+ async function testWorkloadDispatch(device, pipeline, workgroupSize, triangleTests, dispatchCount) {
71
+ const [x, y, z] = workgroupSize;
72
+ const threadsPerWorkgroup = x * y * z;
73
+ const totalThreads = threadsPerWorkgroup * dispatchCount;
74
+
75
+ // Create completion flags buffer for ALL workgroups
76
+ const completionBuffer = device.createBuffer({
77
+ size: totalThreads * 4,
78
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
79
+ });
80
+
81
+ // Initialize to zeros
82
+ const zeroData = new Uint32Array(totalThreads);
83
+ device.queue.writeBuffer(completionBuffer, 0, zeroData);
84
+
85
+ // Create uniforms
86
+ const uniformData = new Uint32Array([x, y, z, triangleTests]);
87
+ const uniformBuffer = device.createBuffer({
88
+ size: uniformData.byteLength,
89
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
90
+ });
91
+ device.queue.writeBuffer(uniformBuffer, 0, uniformData);
92
+
93
+ await device.queue.onSubmittedWorkDone();
94
+
95
+ // Create bind group
96
+ const bindGroup = device.createBindGroup({
97
+ layout: pipeline.getBindGroupLayout(0),
98
+ entries: [
99
+ { binding: 0, resource: { buffer: completionBuffer } },
100
+ { binding: 1, resource: { buffer: uniformBuffer } },
101
+ ],
102
+ });
103
+
104
+ // Dispatch multiple workgroups
105
+ const startTime = performance.now();
106
+ const commandEncoder = device.createCommandEncoder();
107
+ const passEncoder = commandEncoder.beginComputePass();
108
+ passEncoder.setPipeline(pipeline);
109
+ passEncoder.setBindGroup(0, bindGroup);
110
+
111
+ // Dispatch NxN workgroups (e.g., 10×10 = 100 workgroups)
112
+ const dispatchX = Math.ceil(Math.sqrt(dispatchCount));
113
+ const dispatchY = Math.ceil(dispatchCount / dispatchX);
114
+ passEncoder.dispatchWorkgroups(dispatchX, dispatchY, 1);
115
+ passEncoder.end();
116
+
117
+ // Readback
118
+ const stagingBuffer = device.createBuffer({
119
+ size: totalThreads * 4,
120
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
121
+ });
122
+
123
+ commandEncoder.copyBufferToBuffer(completionBuffer, 0, stagingBuffer, 0, totalThreads * 4);
124
+ device.queue.submit([commandEncoder.finish()]);
125
+
126
+ await device.queue.onSubmittedWorkDone();
127
+ const elapsed = performance.now() - startTime;
128
+
129
+ await stagingBuffer.mapAsync(GPUMapMode.READ);
130
+ const completionData = new Uint32Array(stagingBuffer.getMappedRange());
131
+ const completionCopy = new Uint32Array(completionData);
132
+ stagingBuffer.unmap();
133
+
134
+ // Check for failures
135
+ let failedThreads = 0;
136
+ for (let i = 0; i < totalThreads; i++) {
137
+ if (completionCopy[i] === 0) {
138
+ failedThreads++;
139
+ }
140
+ }
141
+
142
+ // Cleanup
143
+ completionBuffer.destroy();
144
+ uniformBuffer.destroy();
145
+ stagingBuffer.destroy();
146
+
147
+ return {
148
+ success: failedThreads === 0,
149
+ failedThreads,
150
+ totalThreads,
151
+ dispatchCount,
152
+ elapsed,
153
+ };
154
+ }
155
+
156
+ // Test a specific workload configuration (single workgroup)
157
+ async function testWorkload(device, pipeline, workgroupSize, triangleTests) {
158
+ const [x, y, z] = workgroupSize;
159
+ const totalThreads = x * y * z;
160
+
161
+ // Create completion flags buffer (initialized to zeros)
162
+ const completionBuffer = device.createBuffer({
163
+ size: totalThreads * 4, // u32 per thread
164
+ usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
165
+ });
166
+
167
+ // Initialize to zeros (so we can detect threads that never completed)
168
+ const zeroData = new Uint32Array(totalThreads);
169
+ device.queue.writeBuffer(completionBuffer, 0, zeroData);
170
+
171
+ // Create uniforms
172
+ const uniformData = new Uint32Array([x, y, z, triangleTests]);
173
+ const uniformBuffer = device.createBuffer({
174
+ size: uniformData.byteLength,
175
+ usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
176
+ });
177
+ device.queue.writeBuffer(uniformBuffer, 0, uniformData);
178
+
179
+ // Wait for writes to complete
180
+ await device.queue.onSubmittedWorkDone();
181
+
182
+ // Create bind group
183
+ const bindGroup = device.createBindGroup({
184
+ layout: pipeline.getBindGroupLayout(0),
185
+ entries: [
186
+ { binding: 0, resource: { buffer: completionBuffer } },
187
+ { binding: 1, resource: { buffer: uniformBuffer } },
188
+ ],
189
+ });
190
+
191
+ // Dispatch compute shader
192
+ const startTime = performance.now();
193
+ const commandEncoder = device.createCommandEncoder();
194
+ const passEncoder = commandEncoder.beginComputePass();
195
+ passEncoder.setPipeline(pipeline);
196
+ passEncoder.setBindGroup(0, bindGroup);
197
+
198
+ // Dispatch exactly 1 workgroup (16x16x1 = 256 threads by default)
199
+ // The shader itself is parameterized with the workgroup size to test
200
+ passEncoder.dispatchWorkgroups(1, 1, 1);
201
+ passEncoder.end();
202
+
203
+ // Create staging buffer for readback
204
+ const stagingBuffer = device.createBuffer({
205
+ size: totalThreads * 4,
206
+ usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
207
+ });
208
+
209
+ commandEncoder.copyBufferToBuffer(completionBuffer, 0, stagingBuffer, 0, totalThreads * 4);
210
+ device.queue.submit([commandEncoder.finish()]);
211
+
212
+ // Wait for GPU to complete
213
+ await device.queue.onSubmittedWorkDone();
214
+ const elapsed = performance.now() - startTime;
215
+
216
+ // Read back completion flags
217
+ await stagingBuffer.mapAsync(GPUMapMode.READ);
218
+ const completionData = new Uint32Array(stagingBuffer.getMappedRange());
219
+ const completionCopy = new Uint32Array(completionData);
220
+ stagingBuffer.unmap();
221
+
222
+ // Check for failures (any zeros = thread didn't complete)
223
+ let failedThreads = 0;
224
+ for (let i = 0; i < totalThreads; i++) {
225
+ if (completionCopy[i] === 0) {
226
+ failedThreads++;
227
+ }
228
+ }
229
+
230
+ // Cleanup
231
+ completionBuffer.destroy();
232
+ uniformBuffer.destroy();
233
+ stagingBuffer.destroy();
234
+
235
+ return {
236
+ success: failedThreads === 0,
237
+ failedThreads,
238
+ totalThreads,
239
+ elapsed,
240
+ };
241
+ }
242
+
243
+ // Binary search for max work intensity at a given workgroup size
244
+ async function findMaxWork(device, pipeline, workgroupSize, minWork, maxWork) {
245
+ let low = minWork;
246
+ let high = maxWork;
247
+ let lastSuccess = minWork;
248
+
249
+ while (low <= high) {
250
+ const mid = Math.floor((low + high) / 2);
251
+ const result = await testWorkload(device, pipeline, workgroupSize, mid);
252
+
253
+ if (result.success) {
254
+ lastSuccess = mid;
255
+ low = mid + 1;
256
+ } else {
257
+ high = mid - 1;
258
+ }
259
+ }
260
+
261
+ return lastSuccess;
262
+ }
263
+
264
+ // Calibrate dispatch count limits (how many workgroups can be queued)
265
+ export async function calibrateDispatchLimits(device, options = {}) {
266
+ const {
267
+ workgroupSize = [16, 16, 1], // Use known-good size
268
+ triangleTests = 10000, // Moderate work per thread
269
+ minDispatch = 1,
270
+ maxDispatch = 100000, // Test up to 100k workgroups
271
+ verbose = true,
272
+ } = options;
273
+
274
+ const shaderModule = device.createShaderModule({ code: calibrateShaderCode });
275
+ const pipeline = device.createComputePipeline({
276
+ layout: 'auto',
277
+ compute: { module: shaderModule, entryPoint: 'main' },
278
+ });
279
+
280
+ if (verbose) {
281
+ console.log('[Calibrate] Testing dispatch count limits...');
282
+ console.log(`[Calibrate] Workgroup size: ${workgroupSize.join('x')}, work: ${triangleTests} tests/thread`);
283
+ }
284
+
285
+ // Binary search for max dispatch count
286
+ let low = minDispatch;
287
+ let high = maxDispatch;
288
+ let lastSuccess = minDispatch;
289
+ const results = [];
290
+
291
+ while (low <= high) {
292
+ const mid = Math.floor((low + high) / 2);
293
+
294
+ if (verbose) {
295
+ console.log(`[Calibrate] Testing ${mid} workgroups...`);
296
+ }
297
+
298
+ const result = await testWorkloadDispatch(device, pipeline, workgroupSize, triangleTests, mid);
299
+ results.push({ ...result, dispatchCount: mid });
300
+
301
+ if (result.success) {
302
+ lastSuccess = mid;
303
+ if (verbose) {
304
+ const totalThreads = result.totalThreads.toLocaleString();
305
+ const totalWork = (result.totalThreads * triangleTests).toLocaleString();
306
+ console.log(`[Calibrate] ✓ ${mid} workgroups OK (${totalThreads} threads, ${totalWork} total tests) in ${result.elapsed.toFixed(1)}ms`);
307
+ }
308
+ low = mid + 1;
309
+ } else {
310
+ if (verbose) {
311
+ console.log(`[Calibrate] ❌ ${mid} workgroups FAILED (${result.failedThreads}/${result.totalThreads} threads killed)`);
312
+ }
313
+ high = mid - 1;
314
+ }
315
+ }
316
+
317
+ if (verbose) {
318
+ console.log(`[Calibrate] Max safe dispatch count: ${lastSuccess} workgroups`);
319
+ }
320
+
321
+ return {
322
+ maxSafeDispatchCount: lastSuccess,
323
+ workgroupSize,
324
+ triangleTests,
325
+ results,
326
+ };
327
+ }
328
+
329
+ // Main calibration function
330
+ export async function calibrateGPU(device, options = {}) {
331
+ const {
332
+ workgroupSizes = [
333
+ [8, 8, 1],
334
+ [16, 16, 1],
335
+ [32, 32, 1],
336
+ [64, 64, 1],
337
+ ],
338
+ minWork = 1000,
339
+ maxWork = 100000,
340
+ verbose = true,
341
+ } = options;
342
+
343
+ // Compile calibration shader
344
+ const shaderModule = device.createShaderModule({ code: calibrateShaderCode });
345
+ const pipeline = device.createComputePipeline({
346
+ layout: 'auto',
347
+ compute: { module: shaderModule, entryPoint: 'main' },
348
+ });
349
+
350
+ const results = [];
351
+
352
+ if (verbose) {
353
+ console.log('[Calibrate] Starting GPU calibration...');
354
+ console.log('[Calibrate] Testing workgroup sizes:', workgroupSizes);
355
+ }
356
+
357
+ for (const size of workgroupSizes) {
358
+ const [x, y, z] = size;
359
+ const totalThreads = x * y * z;
360
+
361
+ if (verbose) {
362
+ console.log(`[Calibrate] Testing ${x}x${y}x${z} (${totalThreads} threads)...`);
363
+ }
364
+
365
+ // First, verify minimal work succeeds
366
+ const minTest = await testWorkload(device, pipeline, size, minWork);
367
+ if (!minTest.success) {
368
+ if (verbose) {
369
+ console.log(`[Calibrate] ❌ Failed even at minimum work (${minWork} tests)`);
370
+ }
371
+ break; // This workgroup size is too large
372
+ }
373
+
374
+ // Binary search for maximum work
375
+ const maxWorkFound = await findMaxWork(device, pipeline, size, minWork, maxWork);
376
+ const finalTest = await testWorkload(device, pipeline, size, maxWorkFound);
377
+
378
+ results.push({
379
+ workgroupSize: size,
380
+ totalThreads,
381
+ maxWork: maxWorkFound,
382
+ timingMs: finalTest.elapsed,
383
+ msPerThread: finalTest.elapsed / totalThreads,
384
+ testsPerSecond: (maxWorkFound * totalThreads) / (finalTest.elapsed / 1000),
385
+ });
386
+
387
+ if (verbose) {
388
+ console.log(`[Calibrate] ✓ Max work: ${maxWorkFound} tests (${finalTest.elapsed.toFixed(1)}ms)`);
389
+ console.log(`[Calibrate] ${(maxWorkFound * totalThreads).toLocaleString()} total ray-triangle tests`);
390
+ }
391
+ }
392
+
393
+ // Determine overall limits
394
+ const maxWorkgroupResult = results[results.length - 1];
395
+ const minWorkPerThread = Math.min(...results.map(r => r.maxWork));
396
+
397
+ const calibration = {
398
+ maxWorkgroupSize: maxWorkgroupResult.workgroupSize,
399
+ maxWorkPerThread: minWorkPerThread, // Conservative: min across all sizes
400
+ safeWorkloadMatrix: results,
401
+ deviceInfo: {
402
+ maxComputeWorkgroupSizeX: device.limits.maxComputeWorkgroupSizeX,
403
+ maxComputeWorkgroupSizeY: device.limits.maxComputeWorkgroupSizeY,
404
+ maxComputeWorkgroupSizeZ: device.limits.maxComputeWorkgroupSizeZ,
405
+ maxComputeWorkgroupsPerDimension: device.limits.maxComputeWorkgroupsPerDimension,
406
+ },
407
+ };
408
+
409
+ if (verbose) {
410
+ console.log('[Calibrate] Calibration complete:');
411
+ console.log(`[Calibrate] Max safe workgroup: ${maxWorkgroupResult.workgroupSize.join('x')}`);
412
+ console.log(`[Calibrate] Max work per thread: ${minWorkPerThread.toLocaleString()}`);
413
+ }
414
+
415
+ return calibration;
416
+ }