@gridspace/raster-path 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -5
- package/build/app.js +363 -39
- package/build/index.html +40 -2
- package/build/raster-path.js +16 -24
- package/build/raster-worker.js +2450 -0
- package/build/style.css +65 -0
- package/package.json +12 -4
- package/scripts/build-shaders.js +32 -8
- package/src/core/path-planar.js +788 -0
- package/src/core/path-radial.js +651 -0
- package/src/core/raster-config.js +185 -0
- package/src/{index.js → core/raster-path.js} +16 -24
- package/src/core/raster-planar.js +754 -0
- package/src/core/raster-tool.js +104 -0
- package/src/core/raster-worker.js +152 -0
- package/src/core/workload-calibrate.js +416 -0
- package/src/shaders/{radial-raster-v2.wgsl → radial-raster.wgsl} +8 -2
- package/src/shaders/workload-calibrate.wgsl +106 -0
- package/src/test/batch-divisor-benchmark.cjs +286 -0
- package/src/test/calibrate-test.cjs +136 -0
- package/src/test/extreme-work-test.cjs +167 -0
- package/src/test/lathe-cylinder-2-debug.cjs +334 -0
- package/src/test/lathe-cylinder-2-test.cjs +157 -0
- package/src/test/lathe-cylinder-test.cjs +198 -0
- package/src/test/radial-thread-limit-test.cjs +152 -0
- package/src/test/work-estimation-profile.cjs +406 -0
- package/src/test/workload-calculator-demo.cjs +113 -0
- package/src/test/workload-calibration.cjs +310 -0
- package/src/web/app.js +363 -39
- package/src/web/index.html +40 -2
- package/src/web/style.css +65 -0
- package/src/workload-calculator.js +318 -0
- package/build/webgpu-worker.js +0 -3011
- package/src/web/webgpu-worker.js +0 -2520
|
@@ -17,6 +17,7 @@ struct Uniforms {
|
|
|
17
17
|
filter_mode: u32, // 0 = max Z (terrain), 1 = min Z (tool)
|
|
18
18
|
num_buckets: u32, // Total number of X-buckets
|
|
19
19
|
start_angle: f32, // Starting angle offset in radians (for batching)
|
|
20
|
+
bucket_offset: u32, // Offset for bucket batching (bucket_idx in batch writes to bucket_offset + bucket_idx in output)
|
|
20
21
|
}
|
|
21
22
|
|
|
22
23
|
struct BucketInfo {
|
|
@@ -125,6 +126,10 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
|
125
126
|
|
|
126
127
|
// Step 2: Rotate position (scan_x, scan_y, scan_z) around X-axis by 'angle'
|
|
127
128
|
// X stays the same, rotate YZ plane: y' = y*cos - z*sin, z' = y*sin + z*cos
|
|
129
|
+
// NOTE: This uses right-handed rotation (positive angle rotates +Y towards +Z)
|
|
130
|
+
// To reverse rotation direction (left-handed or opposite), flip signs:
|
|
131
|
+
// y' = y*cos + z*sin (flip sign on z term)
|
|
132
|
+
// z' = -y*sin + z*cos (flip sign on y term)
|
|
128
133
|
let ray_origin_x = scan_x;
|
|
129
134
|
let ray_origin_y = scan_y * cos(angle) - scan_z * sin(angle);
|
|
130
135
|
let ray_origin_z = scan_y * sin(angle) + scan_z * cos(angle);
|
|
@@ -132,6 +137,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
|
132
137
|
|
|
133
138
|
// Step 3: Rotate ray direction (0, 0, -1) around X-axis by 'angle'
|
|
134
139
|
// X component stays 0, rotate YZ: dy = 0*cos - (-1)*sin = sin, dz = 0*sin + (-1)*cos = -cos
|
|
140
|
+
// NOTE: For reversed rotation, use: vec3<f32>(0.0, -sin(angle), -cos(angle))
|
|
135
141
|
let ray_dir = vec3<f32>(0.0, sin(angle), -cos(angle));
|
|
136
142
|
|
|
137
143
|
// Initialize best distance (closest hit)
|
|
@@ -174,11 +180,11 @@ fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
|
174
180
|
}
|
|
175
181
|
|
|
176
182
|
// Write output
|
|
177
|
-
// Layout: bucket_idx * numAngles * bucketWidth * gridHeight
|
|
183
|
+
// Layout: (bucket_offset + bucket_idx) * numAngles * bucketWidth * gridHeight
|
|
178
184
|
// + angle_idx * bucketWidth * gridHeight
|
|
179
185
|
// + grid_y * bucketWidth
|
|
180
186
|
// + local_x
|
|
181
|
-
let output_idx = bucket_idx * uniforms.num_angles * uniforms.bucket_grid_width * uniforms.grid_y_height
|
|
187
|
+
let output_idx = (uniforms.bucket_offset + bucket_idx) * uniforms.num_angles * uniforms.bucket_grid_width * uniforms.grid_y_height
|
|
182
188
|
+ angle_idx * uniforms.bucket_grid_width * uniforms.grid_y_height
|
|
183
189
|
+ grid_y * uniforms.bucket_grid_width
|
|
184
190
|
+ local_x;
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// Workload Calibration Shader
|
|
2
|
+
// Tests GPU watchdog limits by doing configurable amount of work per thread
|
|
3
|
+
|
|
4
|
+
struct Uniforms {
|
|
5
|
+
workgroup_size_x: u32,
|
|
6
|
+
workgroup_size_y: u32,
|
|
7
|
+
workgroup_size_z: u32,
|
|
8
|
+
triangle_tests: u32, // How many intersection tests to run
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
@group(0) @binding(0) var<storage, read_write> completion_flags: array<u32>;
|
|
12
|
+
@group(0) @binding(1) var<uniform> uniforms: Uniforms;
|
|
13
|
+
|
|
14
|
+
// Ray-triangle intersection using Möller-Trumbore algorithm
|
|
15
|
+
// This is the actual production code - same ALU/cache characteristics
|
|
16
|
+
fn ray_triangle_intersect(
|
|
17
|
+
ray_origin: vec3<f32>,
|
|
18
|
+
ray_dir: vec3<f32>,
|
|
19
|
+
v0: vec3<f32>,
|
|
20
|
+
v1: vec3<f32>,
|
|
21
|
+
v2: vec3<f32>
|
|
22
|
+
) -> vec2<f32> { // Returns (hit: 0.0 or 1.0, z: intersection_z)
|
|
23
|
+
let EPSILON = 0.0001;
|
|
24
|
+
|
|
25
|
+
// Calculate edges
|
|
26
|
+
let edge1 = v1 - v0;
|
|
27
|
+
let edge2 = v2 - v0;
|
|
28
|
+
|
|
29
|
+
// Cross product: ray_dir × edge2
|
|
30
|
+
let h = cross(ray_dir, edge2);
|
|
31
|
+
|
|
32
|
+
// Dot product: edge1 · h
|
|
33
|
+
let a = dot(edge1, h);
|
|
34
|
+
|
|
35
|
+
// Check if ray is parallel to triangle
|
|
36
|
+
if (abs(a) < EPSILON) {
|
|
37
|
+
return vec2<f32>(0.0, 0.0);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
let f = 1.0 / a;
|
|
41
|
+
let s = ray_origin - v0;
|
|
42
|
+
let u = f * dot(s, h);
|
|
43
|
+
|
|
44
|
+
// Check if intersection is outside triangle (u parameter)
|
|
45
|
+
if (u < 0.0 || u > 1.0) {
|
|
46
|
+
return vec2<f32>(0.0, 0.0);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
let q = cross(s, edge1);
|
|
50
|
+
let v = f * dot(ray_dir, q);
|
|
51
|
+
|
|
52
|
+
// Check if intersection is outside triangle (v parameter)
|
|
53
|
+
if (v < 0.0 || u + v > 1.0) {
|
|
54
|
+
return vec2<f32>(0.0, 0.0);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Calculate intersection point along ray
|
|
58
|
+
let t = f * dot(edge2, q);
|
|
59
|
+
|
|
60
|
+
if (t > EPSILON) {
|
|
61
|
+
// Ray hit triangle
|
|
62
|
+
let intersection_z = ray_origin.z + t * ray_dir.z;
|
|
63
|
+
return vec2<f32>(1.0, intersection_z);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return vec2<f32>(0.0, 0.0);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
@compute @workgroup_size(16, 16, 1)
|
|
70
|
+
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
|
|
71
|
+
let thread_index = global_id.z * (uniforms.workgroup_size_x * uniforms.workgroup_size_y) +
|
|
72
|
+
global_id.y * uniforms.workgroup_size_x +
|
|
73
|
+
global_id.x;
|
|
74
|
+
|
|
75
|
+
// Synthetic triangle vertices (deterministic, no memory reads needed)
|
|
76
|
+
let v0 = vec3<f32>(0.0, 0.0, 0.0);
|
|
77
|
+
let v1 = vec3<f32>(1.0, 0.0, 0.0);
|
|
78
|
+
let v2 = vec3<f32>(0.5, 1.0, 0.0);
|
|
79
|
+
|
|
80
|
+
// Ray parameters based on thread ID (deterministic)
|
|
81
|
+
let ray_origin = vec3<f32>(
|
|
82
|
+
f32(global_id.x) * 0.1,
|
|
83
|
+
f32(global_id.y) * 0.1,
|
|
84
|
+
10.0
|
|
85
|
+
);
|
|
86
|
+
let ray_dir = vec3<f32>(0.0, 0.0, -1.0);
|
|
87
|
+
|
|
88
|
+
// Perform N intersection tests (configurable workload)
|
|
89
|
+
var hit_count = 0u;
|
|
90
|
+
for (var i = 0u; i < uniforms.triangle_tests; i++) {
|
|
91
|
+
// Slightly vary triangle vertices to prevent compiler optimization
|
|
92
|
+
let offset = f32(i) * 0.001;
|
|
93
|
+
let v0_offset = v0 + vec3<f32>(offset, 0.0, 0.0);
|
|
94
|
+
let v1_offset = v1 + vec3<f32>(0.0, offset, 0.0);
|
|
95
|
+
let v2_offset = v2 + vec3<f32>(offset, offset, 0.0);
|
|
96
|
+
|
|
97
|
+
let result = ray_triangle_intersect(ray_origin, ray_dir, v0_offset, v1_offset, v2_offset);
|
|
98
|
+
if (result.x > 0.5) {
|
|
99
|
+
hit_count += 1u;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Write completion flag (1 = thread completed all work)
|
|
104
|
+
// If this thread was killed by watchdog, this write never happens (stays 0)
|
|
105
|
+
completion_flags[thread_index] = 1u;
|
|
106
|
+
}
|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
// batch-divisor-benchmark.cjs
|
|
2
|
+
// Benchmark test to measure batching overhead with different batch divisors
|
|
3
|
+
// Usage: node batch-divisor-benchmark.cjs [divisor1,divisor2,...]
|
|
4
|
+
// Example: node batch-divisor-benchmark.cjs 1,2,4,8,16,32
|
|
5
|
+
|
|
6
|
+
const { app, BrowserWindow } = require('electron');
|
|
7
|
+
const path = require('path');
|
|
8
|
+
const fs = require('fs');
|
|
9
|
+
|
|
10
|
+
const OUTPUT_DIR = path.join(__dirname, '../../test-output');
|
|
11
|
+
const RESULTS_FILE = path.join(OUTPUT_DIR, 'batch-divisor-results.json');
|
|
12
|
+
|
|
13
|
+
if (!fs.existsSync(OUTPUT_DIR)) {
|
|
14
|
+
fs.mkdirSync(OUTPUT_DIR, { recursive: true });
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Parse batch divisors from command line args or use defaults
|
|
18
|
+
const args = process.argv.slice(2);
|
|
19
|
+
const BATCH_DIVISORS = args.length > 0
|
|
20
|
+
? args[0].split(',').map(n => parseInt(n.trim()))
|
|
21
|
+
: [1, 2, 4, 8, 16, 32];
|
|
22
|
+
|
|
23
|
+
console.log('=== Batch Divisor Benchmark ===');
|
|
24
|
+
console.log('Testing with divisors:', BATCH_DIVISORS.join(', '));
|
|
25
|
+
console.log('');
|
|
26
|
+
|
|
27
|
+
let mainWindow;
|
|
28
|
+
let currentDivisorIndex = 0;
|
|
29
|
+
const results = [];
|
|
30
|
+
|
|
31
|
+
function createWindow() {
|
|
32
|
+
mainWindow = new BrowserWindow({
|
|
33
|
+
width: 1200,
|
|
34
|
+
height: 800,
|
|
35
|
+
show: false,
|
|
36
|
+
webPreferences: {
|
|
37
|
+
nodeIntegration: false,
|
|
38
|
+
contextIsolation: true,
|
|
39
|
+
enableBlinkFeatures: 'WebGPU',
|
|
40
|
+
}
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
const htmlPath = path.join(__dirname, '../../build/index.html');
|
|
44
|
+
mainWindow.loadFile(htmlPath);
|
|
45
|
+
|
|
46
|
+
mainWindow.webContents.on('did-finish-load', async () => {
|
|
47
|
+
console.log('✓ Page loaded');
|
|
48
|
+
await runNextTest();
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
// Capture console output from renderer process
|
|
52
|
+
mainWindow.webContents.on('console-message', (event, level, message) => {
|
|
53
|
+
// Filter for our timing logs
|
|
54
|
+
if (message.includes('Batch') && message.includes('timing:')) {
|
|
55
|
+
console.log('[TIMING]', message);
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function runNextTest() {
|
|
61
|
+
if (currentDivisorIndex >= BATCH_DIVISORS.length) {
|
|
62
|
+
// All tests complete - analyze and report
|
|
63
|
+
await analyzeResults();
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const divisor = BATCH_DIVISORS[currentDivisorIndex];
|
|
68
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
69
|
+
console.log(`Testing with BATCH_DIVISOR = ${divisor}`);
|
|
70
|
+
console.log('='.repeat(60));
|
|
71
|
+
|
|
72
|
+
const testScript = `
|
|
73
|
+
(async function() {
|
|
74
|
+
const divisor = ${divisor};
|
|
75
|
+
|
|
76
|
+
if (!navigator.gpu) {
|
|
77
|
+
return { error: 'WebGPU not available' };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Import RasterPath
|
|
81
|
+
const { RasterPath } = await import('./raster-path.js');
|
|
82
|
+
|
|
83
|
+
// Load STL files (same as radial-test.cjs - large enough to require batching)
|
|
84
|
+
const terrainResponse = await fetch('../benchmark/fixtures/terrain.stl');
|
|
85
|
+
const terrainBuffer = await terrainResponse.arrayBuffer();
|
|
86
|
+
|
|
87
|
+
const toolResponse = await fetch('../benchmark/fixtures/tool.stl');
|
|
88
|
+
const toolBuffer = await toolResponse.arrayBuffer();
|
|
89
|
+
|
|
90
|
+
// Parse STL files
|
|
91
|
+
function parseBinarySTL(buffer) {
|
|
92
|
+
const dataView = new DataView(buffer);
|
|
93
|
+
const numTriangles = dataView.getUint32(80, true);
|
|
94
|
+
const positions = new Float32Array(numTriangles * 9);
|
|
95
|
+
let offset = 84;
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < numTriangles; i++) {
|
|
98
|
+
offset += 12; // Skip normal
|
|
99
|
+
for (let j = 0; j < 9; j++) {
|
|
100
|
+
positions[i * 9 + j] = dataView.getFloat32(offset, true);
|
|
101
|
+
offset += 4;
|
|
102
|
+
}
|
|
103
|
+
offset += 2; // Skip attribute byte count
|
|
104
|
+
}
|
|
105
|
+
return positions;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const terrainTriangles = parseBinarySTL(terrainBuffer);
|
|
109
|
+
const toolTriangles = parseBinarySTL(toolBuffer);
|
|
110
|
+
|
|
111
|
+
// Test parameters - configured to require batching
|
|
112
|
+
const resolution = 0.05; // 0.05mm - finer resolution to force batching
|
|
113
|
+
const rotationStep = 1.0; // 1 degree between rays = 360 angles
|
|
114
|
+
const xStep = 1;
|
|
115
|
+
const yStep = 1;
|
|
116
|
+
const zFloor = 0;
|
|
117
|
+
const radiusOffset = 20;
|
|
118
|
+
|
|
119
|
+
console.log('Test parameters:');
|
|
120
|
+
console.log(' Resolution:', resolution, 'mm');
|
|
121
|
+
console.log(' Rotation step:', rotationStep, '°');
|
|
122
|
+
console.log(' Batch divisor:', divisor);
|
|
123
|
+
|
|
124
|
+
// Create RasterPath instance with specific batch divisor
|
|
125
|
+
const raster = new RasterPath({
|
|
126
|
+
mode: 'radial',
|
|
127
|
+
resolution: resolution,
|
|
128
|
+
rotationStep: rotationStep,
|
|
129
|
+
batchDivisor: divisor
|
|
130
|
+
});
|
|
131
|
+
await raster.init();
|
|
132
|
+
|
|
133
|
+
// Load tool
|
|
134
|
+
const t0 = performance.now();
|
|
135
|
+
await raster.loadTool({ triangles: toolTriangles });
|
|
136
|
+
const toolTime = performance.now() - t0;
|
|
137
|
+
|
|
138
|
+
// Load terrain
|
|
139
|
+
const t1 = performance.now();
|
|
140
|
+
await raster.loadTerrain({ triangles: terrainTriangles, zFloor: zFloor });
|
|
141
|
+
const terrainTime = performance.now() - t1;
|
|
142
|
+
|
|
143
|
+
// Generate toolpaths (this is where batching happens)
|
|
144
|
+
const t2 = performance.now();
|
|
145
|
+
const toolpathData = await raster.generateToolpaths({
|
|
146
|
+
xStep: xStep,
|
|
147
|
+
yStep: yStep,
|
|
148
|
+
zFloor: zFloor,
|
|
149
|
+
radiusOffset: radiusOffset
|
|
150
|
+
});
|
|
151
|
+
const toolpathTime = performance.now() - t2;
|
|
152
|
+
|
|
153
|
+
// Cleanup
|
|
154
|
+
raster.terminate();
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
success: true,
|
|
158
|
+
divisor: divisor,
|
|
159
|
+
timing: {
|
|
160
|
+
tool: toolTime,
|
|
161
|
+
terrain: terrainTime,
|
|
162
|
+
toolpath: toolpathTime,
|
|
163
|
+
total: toolTime + terrainTime + toolpathTime
|
|
164
|
+
},
|
|
165
|
+
result: {
|
|
166
|
+
numStrips: toolpathData.numStrips,
|
|
167
|
+
totalPoints: toolpathData.totalPoints
|
|
168
|
+
}
|
|
169
|
+
};
|
|
170
|
+
})();
|
|
171
|
+
`;
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
const result = await mainWindow.webContents.executeJavaScript(testScript);
|
|
175
|
+
|
|
176
|
+
if (result.error) {
|
|
177
|
+
console.error('❌ Test failed:', result.error);
|
|
178
|
+
app.exit(1);
|
|
179
|
+
return;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
results.push(result);
|
|
183
|
+
|
|
184
|
+
console.log(`\nResults for BATCH_DIVISOR = ${divisor}:`);
|
|
185
|
+
console.log(` Tool load time: ${result.timing.tool.toFixed(1)}ms`);
|
|
186
|
+
console.log(` Terrain load time: ${result.timing.terrain.toFixed(1)}ms`);
|
|
187
|
+
console.log(` Toolpath time: ${result.timing.toolpath.toFixed(1)}ms`);
|
|
188
|
+
console.log(` Total time: ${result.timing.total.toFixed(1)}ms`);
|
|
189
|
+
console.log(` Strips generated: ${result.result.numStrips}`);
|
|
190
|
+
console.log(` Total points: ${result.result.totalPoints}`);
|
|
191
|
+
|
|
192
|
+
// Move to next test
|
|
193
|
+
currentDivisorIndex++;
|
|
194
|
+
|
|
195
|
+
// Small delay before next test to ensure clean state
|
|
196
|
+
setTimeout(() => runNextTest(), 1000);
|
|
197
|
+
|
|
198
|
+
} catch (error) {
|
|
199
|
+
console.error('Error running test:', error);
|
|
200
|
+
app.exit(1);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async function analyzeResults() {
|
|
205
|
+
console.log('\n' + '='.repeat(60));
|
|
206
|
+
console.log('ANALYSIS');
|
|
207
|
+
console.log('='.repeat(60));
|
|
208
|
+
|
|
209
|
+
// Save raw results
|
|
210
|
+
const resultsData = {
|
|
211
|
+
timestamp: new Date().toISOString(),
|
|
212
|
+
divisors: BATCH_DIVISORS,
|
|
213
|
+
results: results
|
|
214
|
+
};
|
|
215
|
+
fs.writeFileSync(RESULTS_FILE, JSON.stringify(resultsData, null, 2));
|
|
216
|
+
console.log(`\n✓ Raw results saved to: ${RESULTS_FILE}`);
|
|
217
|
+
|
|
218
|
+
// Analyze overhead
|
|
219
|
+
console.log('\n--- Timing Comparison ---');
|
|
220
|
+
const baseline = results[0]; // Divisor = 1
|
|
221
|
+
console.log(`\nBaseline (divisor=1): ${baseline.timing.total.toFixed(1)}ms total`);
|
|
222
|
+
console.log(` Breakdown: ${baseline.timing.tool.toFixed(1)}ms tool + ${baseline.timing.terrain.toFixed(1)}ms terrain + ${baseline.timing.toolpath.toFixed(1)}ms toolpath`);
|
|
223
|
+
|
|
224
|
+
console.log('\nOverhead Analysis:');
|
|
225
|
+
console.log('┌──────────┬───────────┬────────────┬──────────────┬──────────────┐');
|
|
226
|
+
console.log('│ Divisor │ Total (ms)│ vs Baseline│ Overhead (ms)│ Overhead (%) │');
|
|
227
|
+
console.log('├──────────┼───────────┼────────────┼──────────────┼──────────────┤');
|
|
228
|
+
|
|
229
|
+
for (const result of results) {
|
|
230
|
+
const overhead = result.timing.total - baseline.timing.total;
|
|
231
|
+
const overheadPercent = ((overhead / baseline.timing.total) * 100);
|
|
232
|
+
const comparison = result.divisor === 1 ? 'baseline' : `+${overhead.toFixed(0)}ms`;
|
|
233
|
+
|
|
234
|
+
console.log(
|
|
235
|
+
`│ ${String(result.divisor).padEnd(8)} │ ` +
|
|
236
|
+
`${result.timing.total.toFixed(1).padStart(9)} │ ` +
|
|
237
|
+
`${comparison.padStart(10)} │ ` +
|
|
238
|
+
`${overhead.toFixed(1).padStart(12)} │ ` +
|
|
239
|
+
`${overheadPercent.toFixed(1).padStart(12)}% │`
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
console.log('└──────────┴───────────┴────────────┴──────────────┴──────────────┘');
|
|
243
|
+
|
|
244
|
+
// Calculate per-batch overhead
|
|
245
|
+
if (results.length > 1) {
|
|
246
|
+
console.log('\n--- Per-Batch Overhead Estimation ---');
|
|
247
|
+
// Assume divisor creates divisor times more batches
|
|
248
|
+
// So divisor=2 creates 2x batches, divisor=4 creates 4x batches, etc.
|
|
249
|
+
for (let i = 1; i < results.length; i++) {
|
|
250
|
+
const result = results[i];
|
|
251
|
+
const extraBatches = result.divisor - 1; // Assuming baseline has 1 effective batch unit
|
|
252
|
+
const overhead = result.timing.total - baseline.timing.total;
|
|
253
|
+
const perBatchOverhead = overhead / (result.divisor - 1);
|
|
254
|
+
|
|
255
|
+
console.log(`Divisor ${result.divisor}: ${overhead.toFixed(1)}ms overhead / ${extraBatches} extra batch(es) ≈ ${perBatchOverhead.toFixed(1)}ms per batch boundary`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Recommendations
|
|
260
|
+
console.log('\n--- Recommendations ---');
|
|
261
|
+
const maxResult = results[results.length - 1];
|
|
262
|
+
const maxOverheadPercent = ((maxResult.timing.total - baseline.timing.total) / baseline.timing.total) * 100;
|
|
263
|
+
|
|
264
|
+
if (maxOverheadPercent < 15) {
|
|
265
|
+
console.log('✓ LOW OVERHEAD (<15%): Batching overhead is acceptable.');
|
|
266
|
+
console.log(' Focus on other optimizations (shader efficiency, toolpath generation).');
|
|
267
|
+
} else if (maxOverheadPercent < 30) {
|
|
268
|
+
console.log('⚠ MEDIUM OVERHEAD (15-30%): Consider batch size tuning.');
|
|
269
|
+
console.log(' Investigate buffer creation/destruction costs.');
|
|
270
|
+
console.log(' Consider reusing buffers across batches.');
|
|
271
|
+
} else {
|
|
272
|
+
console.log('⚠ HIGH OVERHEAD (>30%): Priority optimization needed!');
|
|
273
|
+
console.log(' Critical to reduce batch overhead before increasing batch count.');
|
|
274
|
+
console.log(' Primary suspects:');
|
|
275
|
+
console.log(' - createReusableToolpathBuffers() per batch');
|
|
276
|
+
console.log(' - destroyReusableToolpathBuffers() per batch');
|
|
277
|
+
console.log(' - GPU context switching between batches');
|
|
278
|
+
console.log(' Recommendation: Implement buffer pooling or batch-level buffer reuse.');
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
console.log('\n✅ Benchmark complete!');
|
|
282
|
+
app.exit(0);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
app.whenReady().then(createWindow);
|
|
286
|
+
app.on('window-all-closed', () => app.quit());
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
// calibrate-test.cjs
|
|
2
|
+
// Test GPU workload calibration
|
|
3
|
+
|
|
4
|
+
const { app, BrowserWindow } = require('electron');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
7
|
+
let mainWindow;
|
|
8
|
+
|
|
9
|
+
function createWindow() {
|
|
10
|
+
mainWindow = new BrowserWindow({
|
|
11
|
+
width: 1200,
|
|
12
|
+
height: 800,
|
|
13
|
+
show: false,
|
|
14
|
+
webPreferences: {
|
|
15
|
+
nodeIntegration: false,
|
|
16
|
+
contextIsolation: true,
|
|
17
|
+
enableBlinkFeatures: 'WebGPU',
|
|
18
|
+
}
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
const htmlPath = path.join(__dirname, '../../build/index.html');
|
|
22
|
+
mainWindow.loadFile(htmlPath);
|
|
23
|
+
|
|
24
|
+
mainWindow.webContents.on('did-finish-load', async () => {
|
|
25
|
+
console.log('✓ Page loaded');
|
|
26
|
+
|
|
27
|
+
const testScript = `
|
|
28
|
+
(async function() {
|
|
29
|
+
console.log('=== GPU Workload Calibration Test ===');
|
|
30
|
+
|
|
31
|
+
if (!navigator.gpu) {
|
|
32
|
+
return { error: 'WebGPU not available' };
|
|
33
|
+
}
|
|
34
|
+
console.log('✓ WebGPU available');
|
|
35
|
+
|
|
36
|
+
// Import RasterPath
|
|
37
|
+
const { RasterPath } = await import('./raster-path.js');
|
|
38
|
+
|
|
39
|
+
// Create RasterPath instance (initializes worker)
|
|
40
|
+
console.log('\\nInitializing worker...');
|
|
41
|
+
const raster = new RasterPath({ mode: 'planar', resolution: 0.1 });
|
|
42
|
+
await raster.init();
|
|
43
|
+
console.log('✓ Worker initialized');
|
|
44
|
+
|
|
45
|
+
// Send calibration request
|
|
46
|
+
console.log('\\nRunning GPU dispatch count calibration...');
|
|
47
|
+
console.log('This will test how many workgroups can be dispatched simultaneously.');
|
|
48
|
+
|
|
49
|
+
const startTime = performance.now();
|
|
50
|
+
|
|
51
|
+
// Send calibrate message to worker
|
|
52
|
+
const calibrationPromise = new Promise((resolve, reject) => {
|
|
53
|
+
const handler = raster.worker.onmessage;
|
|
54
|
+
raster.worker.onmessage = (e) => {
|
|
55
|
+
if (e.data.type === 'calibrate-complete') {
|
|
56
|
+
resolve(e.data.data);
|
|
57
|
+
} else if (e.data.type === 'error') {
|
|
58
|
+
reject(new Error(e.data.message));
|
|
59
|
+
} else {
|
|
60
|
+
handler(e); // Pass through other messages
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
raster.worker.postMessage({
|
|
66
|
+
type: 'calibrate',
|
|
67
|
+
data: {
|
|
68
|
+
calibrationType: 'dispatch',
|
|
69
|
+
options: {
|
|
70
|
+
workgroupSize: [4, 4, 1], // VERY SMALL workgroup (16 threads)
|
|
71
|
+
triangleTests: 1000,
|
|
72
|
+
minDispatch: 1,
|
|
73
|
+
maxDispatch: 1000,
|
|
74
|
+
verbose: true,
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const results = await calibrationPromise;
|
|
80
|
+
const elapsed = performance.now() - startTime;
|
|
81
|
+
|
|
82
|
+
console.log('\\n✓ Calibration complete in', elapsed.toFixed(0) + 'ms');
|
|
83
|
+
console.log('\\n=== Results ===');
|
|
84
|
+
console.log('Max safe dispatch count:', results.maxSafeDispatchCount.toLocaleString());
|
|
85
|
+
console.log('Workgroup size:', results.workgroupSize.join('x'));
|
|
86
|
+
console.log('Triangle tests per thread:', results.triangleTests.toLocaleString());
|
|
87
|
+
|
|
88
|
+
const maxThreads = results.maxSafeDispatchCount * results.workgroupSize[0] * results.workgroupSize[1] * results.workgroupSize[2];
|
|
89
|
+
const maxTests = maxThreads * results.triangleTests;
|
|
90
|
+
console.log('\\nMax concurrent threads:', maxThreads.toLocaleString());
|
|
91
|
+
console.log('Max total ray tests:', maxTests.toLocaleString());
|
|
92
|
+
|
|
93
|
+
console.log('\\n=== Dispatch Test Results ===');
|
|
94
|
+
for (const entry of results.results) {
|
|
95
|
+
const status = entry.success ? '✓' : '❌';
|
|
96
|
+
const threads = entry.totalThreads.toLocaleString();
|
|
97
|
+
const time = entry.elapsed.toFixed(1);
|
|
98
|
+
const failed = entry.failedThreads > 0 ? \` (\${entry.failedThreads} failed)\` : '';
|
|
99
|
+
console.log(\` \${status} \${entry.dispatchCount.toString().padStart(6)} workgroups: \${threads.padStart(10)} threads in \${time.padStart(7)}ms\${failed}\`);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return { success: true, results };
|
|
103
|
+
})();
|
|
104
|
+
`;
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
const result = await mainWindow.webContents.executeJavaScript(testScript);
|
|
108
|
+
|
|
109
|
+
if (result.error) {
|
|
110
|
+
console.error('❌ Test failed:', result.error);
|
|
111
|
+
app.exit(1);
|
|
112
|
+
} else if (!result.success) {
|
|
113
|
+
console.error('❌ Test returned unsuccessful result');
|
|
114
|
+
app.exit(1);
|
|
115
|
+
} else {
|
|
116
|
+
console.log('\n✅ Calibration test complete');
|
|
117
|
+
app.exit(0);
|
|
118
|
+
}
|
|
119
|
+
} catch (error) {
|
|
120
|
+
console.error('❌ Test error:', error);
|
|
121
|
+
app.exit(1);
|
|
122
|
+
}
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
mainWindow.webContents.on('console-message', (event, level, message) => {
|
|
126
|
+
console.log(message);
|
|
127
|
+
});
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
app.whenReady().then(createWindow);
|
|
131
|
+
|
|
132
|
+
app.on('window-all-closed', () => {
|
|
133
|
+
if (process.platform !== 'darwin') {
|
|
134
|
+
app.quit();
|
|
135
|
+
}
|
|
136
|
+
});
|