kernel-craft 0.1.2__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kernel-craft
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: CUDA kernels for machine learning systems optimization
5
5
  Author-email: kernel-craft contributors <contact@example.com>
6
6
  License-Expression: MIT
@@ -45,7 +45,7 @@ python -m build
45
45
 
46
46
  The `.so` file will be at `src/python/build/kernel_craft_python.cpython-*.so`.
47
47
 
48
- ### Option 2: Build with CMake
48
+ ### Option 3: Build with CMake
49
49
 
50
50
  ```bash
51
51
  cd /path/to/kernel-craft
@@ -24,7 +24,7 @@ python -m build
24
24
 
25
25
  The `.so` file will be at `src/python/build/kernel_craft_python.cpython-*.so`.
26
26
 
27
- ### Option 2: Build with CMake
27
+ ### Option 3: Build with CMake
28
28
 
29
29
  ```bash
30
30
  cd /path/to/kernel-craft
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kernel-craft
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: CUDA kernels for machine learning systems optimization
5
5
  Author-email: kernel-craft contributors <contact@example.com>
6
6
  License-Expression: MIT
@@ -45,7 +45,7 @@ python -m build
45
45
 
46
46
  The `.so` file will be at `src/python/build/kernel_craft_python.cpython-*.so`.
47
47
 
48
- ### Option 2: Build with CMake
48
+ ### Option 3: Build with CMake
49
49
 
50
50
  ```bash
51
51
  cd /path/to/kernel-craft
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "kernel-craft"
7
- version = "0.1.2"
7
+ version = "0.1.4"
8
8
  description = "CUDA kernels for machine learning systems optimization"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11, <3.13"
@@ -0,0 +1,569 @@
1
+ """Python tests for kernel_craft Python bindings."""
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+
7
+ def conv_cpu(input_arr, kernel):
8
+ """CPU reference implementation for convolution."""
9
+ height, width = input_arr.shape
10
+ ksize = kernel.shape[0]
11
+ kHalf = ksize // 2
12
+ output = np.zeros_like(input_arr)
13
+ for oy in range(height):
14
+ for ox in range(width):
15
+ sum_val = 0.0
16
+ for ky in range(ksize):
17
+ iy = oy + ky - kHalf
18
+ if iy < 0 or iy >= height:
19
+ continue
20
+ for kx in range(ksize):
21
+ ix = ox + kx - kHalf
22
+ if ix < 0 or ix >= width:
23
+ continue
24
+ sum_val += input_arr[iy, ix] * kernel[ky, kx]
25
+ output[oy, ox] = sum_val
26
+ return output
27
+
28
+
29
+ class TestConvNaiveNumpy:
30
+ """Tests for conv_naive with numpy arrays."""
31
+
32
+ def test_basic(self):
33
+ """Test basic convolution produces correct shape."""
34
+ import kernel_craft_python as kc
35
+ input_arr = np.random.rand(8, 8).astype(np.float32)
36
+ kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
37
+ result = kc.conv_naive(input_arr, kernel)
38
+ assert result.shape == (8, 8)
39
+
40
+ def test_correctness(self):
41
+ """Test output matches CPU reference."""
42
+ import kernel_craft_python as kc
43
+ np.random.seed(42)
44
+ input_arr = np.random.rand(16, 16).astype(np.float32)
45
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
46
+ result_gpu = kc.conv_naive(input_arr, kernel)
47
+ result_cpu = conv_cpu(input_arr, kernel)
48
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
49
+
50
+ def test_large_kernel(self):
51
+ """Test with larger 5x5 kernel."""
52
+ import kernel_craft_python as kc
53
+ np.random.seed(42)
54
+ input_arr = np.random.rand(32, 32).astype(np.float32)
55
+ kernel = np.random.rand(5, 5).astype(np.float32)
56
+ result_gpu = kc.conv_naive(input_arr, kernel)
57
+ result_cpu = conv_cpu(input_arr, kernel)
58
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
59
+
60
+ def test_invalid_input_dim(self):
61
+ """Test that 1D input raises error."""
62
+ import kernel_craft_python as kc
63
+ input_arr = np.random.rand(16).astype(np.float32)
64
+ kernel = np.array([[1, 0], [0, 1]], dtype=np.float32)
65
+ with pytest.raises(RuntimeError):
66
+ kc.conv_naive(input_arr, kernel)
67
+
68
+ def test_invalid_kernel_dim(self):
69
+ """Test that non-2D kernel raises error."""
70
+ import kernel_craft_python as kc
71
+ input_arr = np.random.rand(16, 16).astype(np.float32)
72
+ kernel = np.random.rand(4).astype(np.float32)
73
+ with pytest.raises(RuntimeError):
74
+ kc.conv_naive(input_arr, kernel)
75
+
76
+ def test_even_kernel_raises(self):
77
+ """Test that even-sized kernel raises error."""
78
+ import kernel_craft_python as kc
79
+ input_arr = np.random.rand(16, 16).astype(np.float32)
80
+ kernel = np.ones((4, 4), dtype=np.float32)
81
+ with pytest.raises(RuntimeError):
82
+ kc.conv_naive(input_arr, kernel)
83
+
84
+
85
+ class TestConvTiledNumpy:
86
+ """Tests for conv_tiled with numpy arrays."""
87
+
88
+ def test_tile_8x8(self):
89
+ """Test tiled convolution with 8x8 tiles."""
90
+ import kernel_craft_python as kc
91
+ np.random.seed(42)
92
+ input_arr = np.random.rand(16, 16).astype(np.float32)
93
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
94
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
95
+ result_ref = kc.conv_naive(input_arr, kernel)
96
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
97
+
98
+ def test_tile_16x16(self):
99
+ """Test tiled convolution with 16x16 tiles."""
100
+ import kernel_craft_python as kc
101
+ np.random.seed(42)
102
+ input_arr = np.random.rand(32, 32).astype(np.float32)
103
+ kernel = np.random.rand(3, 3).astype(np.float32)
104
+ result = kc.conv_tiled(input_arr, kernel, 16, 16)
105
+ result_ref = kc.conv_naive(input_arr, kernel)
106
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
107
+
108
+ def test_tile_32x32(self):
109
+ """Test tiled convolution with 32x32 tiles on larger image."""
110
+ import kernel_craft_python as kc
111
+ np.random.seed(42)
112
+ input_arr = np.random.rand(64, 64).astype(np.float32)
113
+ kernel = np.random.rand(5, 5).astype(np.float32)
114
+ result = kc.conv_tiled(input_arr, kernel, 32, 32)
115
+ result_ref = kc.conv_naive(input_arr, kernel)
116
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
117
+
118
+ def test_different_tile_w_h(self):
119
+ """Test different tile width and height."""
120
+ import kernel_craft_python as kc
121
+ np.random.seed(42)
122
+ input_arr = np.random.rand(24, 24).astype(np.float32)
123
+ kernel = np.random.rand(3, 3).astype(np.float32)
124
+ result = kc.conv_tiled(input_arr, kernel, 8, 16)
125
+ result_ref = kc.conv_naive(input_arr, kernel)
126
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
127
+
128
+
129
+ class TestConvNaiveTorch:
130
+ """Tests for conv_naive with PyTorch tensors."""
131
+
132
+ def test_basic(self):
133
+ """Test basic convolution with PyTorch tensor."""
134
+ try:
135
+ import torch
136
+ import kernel_craft_python as kc
137
+ except ImportError:
138
+ pytest.skip("PyTorch not installed")
139
+
140
+ input_tensor = torch.rand(8, 8, dtype=torch.float32, device='cuda')
141
+ kernel = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32, device='cuda')
142
+ result = kc.conv_naive(input_tensor, kernel)
143
+ assert result.shape == (8, 8)
144
+ assert result.device.type == 'cuda'
145
+
146
+ def test_correctness(self):
147
+ """Test output matches numpy reference."""
148
+ try:
149
+ import torch
150
+ import kernel_craft_python as kc
151
+ except ImportError:
152
+ pytest.skip("PyTorch not installed")
153
+
154
+ np.random.seed(42)
155
+ input_np = np.random.rand(16, 16).astype(np.float32)
156
+ kernel_np = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
157
+
158
+ result_gpu = kc.conv_naive(
159
+ torch.from_numpy(input_np).cuda(),
160
+ torch.from_numpy(kernel_np).cuda()
161
+ )
162
+ result_cpu = conv_cpu(input_np, kernel_np)
163
+ np.testing.assert_allclose(result_gpu.cpu().numpy(), result_cpu, rtol=1e-4)
164
+
165
+
166
+ class TestConvTiledTorch:
167
+ """Tests for conv_tiled with PyTorch tensors."""
168
+
169
+ def test_tile_sizes(self):
170
+ """Test different tile sizes with PyTorch."""
171
+ try:
172
+ import torch
173
+ import kernel_craft_python as kc
174
+ except ImportError:
175
+ pytest.skip("PyTorch not installed")
176
+
177
+ input_tensor = torch.rand(32, 32, dtype=torch.float32, device='cuda')
178
+ kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
179
+
180
+ for tw, th in [(8, 8), (16, 16), (32, 32)]:
181
+ result = kc.conv_tiled(input_tensor, kernel, tw, th)
182
+ assert result.shape == (32, 32)
183
+ assert result.device.type == 'cuda'
184
+
185
+
186
+ class TestMemoryPool:
187
+ """Tests for memory pool (batch processing)."""
188
+
189
+ def test_batch_processing_numpy(self):
190
+ """Test batch processing with numpy arrays."""
191
+ import kernel_craft_python as kc
192
+
193
+ np.random.seed(42)
194
+ num_batches = 4
195
+ width, height = 64, 64
196
+ ksize = 3
197
+
198
+ inputs = [np.random.rand(height, width).astype(np.float32) for _ in range(num_batches)]
199
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
200
+
201
+ results = []
202
+ for inp in inputs:
203
+ result = kc.conv_tiled(inp, kernel, 8, 8)
204
+ results.append(result)
205
+
206
+ assert len(results) == num_batches
207
+ for r in results:
208
+ assert r.shape == (height, width)
209
+
210
+ def test_batch_correctness(self):
211
+ """Test that batch processing produces correct results."""
212
+ import kernel_craft_python as kc
213
+
214
+ np.random.seed(42)
215
+ input_arr = np.random.rand(16, 16).astype(np.float32)
216
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
217
+
218
+ result1 = kc.conv_tiled(input_arr, kernel, 8, 8)
219
+ result2 = kc.conv_tiled(input_arr, kernel, 8, 8)
220
+
221
+ np.testing.assert_allclose(result1, result2, rtol=1e-5)
222
+
223
+
224
+ class TestMixedPrecision:
225
+ """Tests for mixed precision (FP16) convolution."""
226
+
227
+ def test_fp16_conversion(self):
228
+ """Test FP16 conversion and back."""
229
+ import kernel_craft_python as kc
230
+
231
+ np.random.seed(42)
232
+ input_fp32 = np.random.rand(16, 16).astype(np.float32)
233
+ kernel = np.random.rand(3, 3).astype(np.float32)
234
+
235
+ result_fp32 = kc.conv_tiled(input_fp32, kernel, 8, 8)
236
+
237
+ input_fp16 = input_fp32.astype(np.float16)
238
+ kernel_fp16 = kernel.astype(np.float16)
239
+
240
+ result_fp16 = kc.conv_tiled(input_fp16.astype(np.float32), kernel_fp16.astype(np.float32), 8, 8)
241
+
242
+ assert result_fp32.shape == result_fp16.shape
243
+
244
+
245
+ class TestPerformance:
246
+ """Performance-related tests."""
247
+
248
+ def test_multiple_tile_sizes(self):
249
+ """Test that all tile sizes produce consistent results."""
250
+ import kernel_craft_python as kc
251
+
252
+ np.random.seed(42)
253
+ input_arr = np.random.rand(32, 32).astype(np.float32)
254
+ kernel = np.random.rand(3, 3).astype(np.float32)
255
+
256
+ result_ref = kc.conv_naive(input_arr, kernel)
257
+
258
+ for tw, th in [(8, 8), (16, 16)]:
259
+ result = kc.conv_tiled(input_arr, kernel, tw, th)
260
+ np.testing.assert_allclose(result, result_ref, rtol=1e-4)
261
+
262
+ def test_large_input(self):
263
+ """Test with larger input for performance characterization."""
264
+ import kernel_craft_python as kc
265
+
266
+ np.random.seed(42)
267
+ input_arr = np.random.rand(512, 512).astype(np.float32)
268
+ kernel = np.random.rand(3, 3).astype(np.float32)
269
+
270
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
271
+ assert result.shape == (512, 512)
272
+
273
+ result_ref = kc.conv_naive(input_arr, kernel)
274
+ np.testing.assert_allclose(result, result_ref, rtol=1e-4)
275
+
276
+
277
+ class TestEdgeCases:
278
+ """Tests for edge cases and boundary conditions."""
279
+
280
+ def test_minimum_size_input(self):
281
+ """Test with minimum size 3x3 input."""
282
+ import kernel_craft_python as kc
283
+ input_arr = np.ones((3, 3), dtype=np.float32)
284
+ kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
285
+ result = kc.conv_naive(input_arr, kernel)
286
+ assert result.shape == (3, 3)
287
+
288
+ def test_3x3_input_with_5x5_kernel(self):
289
+ """Test 3x3 input with 5x5 kernel."""
290
+ import kernel_craft_python as kc
291
+ np.random.seed(42)
292
+ input_arr = np.random.rand(3, 3).astype(np.float32)
293
+ kernel = np.random.rand(5, 5).astype(np.float32)
294
+ result = kc.conv_naive(input_arr, kernel)
295
+ assert result.shape == (3, 3)
296
+
297
+ def test_zero_input(self):
298
+ """Test with all-zero input."""
299
+ import kernel_craft_python as kc
300
+ input_arr = np.zeros((16, 16), dtype=np.float32)
301
+ kernel = np.random.rand(3, 3).astype(np.float32)
302
+ result = kc.conv_naive(input_arr, kernel)
303
+ np.testing.assert_array_equal(result, np.zeros_like(input_arr))
304
+
305
+ def test_zero_kernel(self):
306
+ """Test with all-zero kernel."""
307
+ import kernel_craft_python as kc
308
+ np.random.seed(42)
309
+ input_arr = np.random.rand(16, 16).astype(np.float32)
310
+ kernel = np.zeros((3, 3), dtype=np.float32)
311
+ result = kc.conv_naive(input_arr, kernel)
312
+ np.testing.assert_array_equal(result, np.zeros_like(input_arr))
313
+
314
+ def test_constant_input(self):
315
+ """Test with constant input."""
316
+ import kernel_craft_python as kc
317
+ input_arr = np.ones((16, 16), dtype=np.float32) * 5.0
318
+ kernel = np.ones((3, 3), dtype=np.float32)
319
+ result = kc.conv_naive(input_arr, kernel)
320
+ assert result.max() > 0
321
+
322
+ def test_single_pixel_output(self):
323
+ """Test with 1x1 output tile."""
324
+ import kernel_craft_python as kc
325
+ np.random.seed(42)
326
+ input_arr = np.random.rand(5, 5).astype(np.float32)
327
+ kernel = np.random.rand(3, 3).astype(np.float32)
328
+ result = kc.conv_tiled(input_arr, kernel, 1, 1)
329
+ result_ref = kc.conv_naive(input_arr, kernel)
330
+ np.testing.assert_allclose(result, result_ref, rtol=1e-4)
331
+
332
+ def test_odd_input_dimensions(self):
333
+ """Test with non-power-of-2 dimensions."""
334
+ import kernel_craft_python as kc
335
+ np.random.seed(42)
336
+ for wh in [(13, 13), (17, 23), (31, 47)]:
337
+ input_arr = np.random.rand(*wh).astype(np.float32)
338
+ kernel = np.random.rand(3, 3).astype(np.float32)
339
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
340
+ result_ref = kc.conv_naive(input_arr, kernel)
341
+ np.testing.assert_allclose(result, result_ref, rtol=1e-4)
342
+
343
+
344
+ class TestKernelTypes:
345
+ """Tests with different kernel types."""
346
+
347
+ def test_identity_kernel(self):
348
+ """Test with identity kernel."""
349
+ import kernel_craft_python as kc
350
+ np.random.seed(42)
351
+ input_arr = np.random.rand(16, 16).astype(np.float32)
352
+ kernel = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)
353
+ result = kc.conv_naive(input_arr, kernel)
354
+ np.testing.assert_allclose(result, input_arr, rtol=1e-5)
355
+
356
+ def test_sobel_x_kernel(self):
357
+ """Test with Sobel X edge detection kernel."""
358
+ import kernel_craft_python as kc
359
+ np.random.seed(42)
360
+ input_arr = np.random.rand(16, 16).astype(np.float32)
361
+ kernel = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtype=np.float32)
362
+ result_gpu = kc.conv_naive(input_arr, kernel)
363
+ result_cpu = conv_cpu(input_arr, kernel)
364
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
365
+
366
+ def test_sobel_y_kernel(self):
367
+ """Test with Sobel Y edge detection kernel."""
368
+ import kernel_craft_python as kc
369
+ np.random.seed(42)
370
+ input_arr = np.random.rand(16, 16).astype(np.float32)
371
+ kernel = np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtype=np.float32)
372
+ result_gpu = kc.conv_naive(input_arr, kernel)
373
+ result_cpu = conv_cpu(input_arr, kernel)
374
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
375
+
376
+ def test_laplacian_kernel(self):
377
+ """Test with Laplacian kernel for edge detection."""
378
+ import kernel_craft_python as kc
379
+ np.random.seed(42)
380
+ input_arr = np.random.rand(16, 16).astype(np.float32)
381
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
382
+ result_gpu = kc.conv_naive(input_arr, kernel)
383
+ result_cpu = conv_cpu(input_arr, kernel)
384
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
385
+
386
+ def test_gaussian_like_kernel(self):
387
+ """Test with Gaussian-like blur kernel."""
388
+ import kernel_craft_python as kc
389
+ np.random.seed(42)
390
+ input_arr = np.random.rand(16, 16).astype(np.float32)
391
+ kernel = np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]], dtype=np.float32) / 16.0
392
+ result_gpu = kc.conv_naive(input_arr, kernel)
393
+ result_cpu = conv_cpu(input_arr, kernel)
394
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
395
+
396
+ def test_7x7_kernel(self):
397
+ """Test with larger 7x7 kernel."""
398
+ import kernel_craft_python as kc
399
+ np.random.seed(42)
400
+ input_arr = np.random.rand(32, 32).astype(np.float32)
401
+ kernel = np.random.rand(7, 7).astype(np.float32)
402
+ result_gpu = kc.conv_naive(input_arr, kernel)
403
+ result_cpu = conv_cpu(input_arr, kernel)
404
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
405
+
406
+
407
+ class TestNumpyTorchInterop:
408
+ """Tests for numpy <-> torch interoperability."""
409
+
410
+ def test_numpy_to_torch_and_back(self):
411
+ """Test round-trip between numpy and torch."""
412
+ try:
413
+ import torch
414
+ import kernel_craft_python as kc
415
+ except ImportError:
416
+ pytest.skip("PyTorch not installed")
417
+
418
+ np.random.seed(42)
419
+ input_np = np.random.rand(16, 16).astype(np.float32)
420
+ kernel_np = np.random.rand(3, 3).astype(np.float32)
421
+
422
+ input_torch = torch.from_numpy(input_np).cuda()
423
+ kernel_torch = torch.from_numpy(kernel_np).cuda()
424
+
425
+ result_torch = kc.conv_naive(input_torch, kernel_torch)
426
+
427
+ result_back = result_torch.cpu().numpy()
428
+ result_ref = conv_cpu(input_np, kernel_np)
429
+ np.testing.assert_allclose(result_back, result_ref, rtol=1e-4)
430
+
431
+ def test_inplace_not_required(self):
432
+ """Test that input tensor is not modified."""
433
+ try:
434
+ import torch
435
+ import kernel_craft_python as kc
436
+ except ImportError:
437
+ pytest.skip("PyTorch not installed")
438
+
439
+ np.random.seed(42)
440
+ input_np = np.random.rand(16, 16).astype(np.float32)
441
+ kernel_np = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
442
+
443
+ input_torch = torch.from_numpy(input_np).cuda()
444
+ input_original = input_torch.clone()
445
+
446
+ _ = kc.conv_naive(input_torch, torch.from_numpy(kernel_np).cuda())
447
+
448
+ np.testing.assert_allclose(input_torch.cpu().numpy(), input_original.cpu().numpy())
449
+
450
+ def test_different_tiled_results_torch(self):
451
+ """Test tiled matches naive for torch on various sizes."""
452
+ try:
453
+ import torch
454
+ import kernel_craft_python as kc
455
+ except ImportError:
456
+ pytest.skip("PyTorch not installed")
457
+
458
+ for size in [8, 16, 32, 64]:
459
+ np.random.seed(42)
460
+ input_arr = np.random.rand(size, size).astype(np.float32)
461
+ kernel = np.random.rand(3, 3).astype(np.float32)
462
+
463
+ naive_result = kc.conv_naive(
464
+ torch.from_numpy(input_arr).cuda(),
465
+ torch.from_numpy(kernel).cuda()
466
+ )
467
+ tiled_result = kc.conv_tiled(
468
+ torch.from_numpy(input_arr).cuda(),
469
+ torch.from_numpy(kernel).cuda(),
470
+ 8, 8
471
+ )
472
+
473
+ np.testing.assert_allclose(
474
+ naive_result.cpu().numpy(),
475
+ tiled_result.cpu().numpy(),
476
+ rtol=1e-4
477
+ )
478
+
479
+
480
+ class TestMemoryAndBatch:
481
+ """Memory and batch processing tests."""
482
+
483
+ def test_multiple_consecutive_calls(self):
484
+ """Test multiple consecutive kernel calls."""
485
+ import kernel_craft_python as kc
486
+
487
+ np.random.seed(42)
488
+ kernel = np.random.rand(3, 3).astype(np.float32)
489
+
490
+ results = []
491
+ for _ in range(5):
492
+ input_arr = np.random.rand(32, 32).astype(np.float32)
493
+ result = kc.conv_naive(input_arr, kernel)
494
+ results.append(result)
495
+
496
+ assert len(results) == 5
497
+ for r in results:
498
+ assert r.shape == (32, 32)
499
+
500
+ def test_reuse_kernel(self):
501
+ """Test reusing the same kernel multiple times."""
502
+ import kernel_craft_python as kc
503
+
504
+ np.random.seed(42)
505
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
506
+
507
+ for i in range(3):
508
+ input_arr = np.random.rand(16, 16).astype(np.float32)
509
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
510
+ result_ref = conv_cpu(input_arr, kernel)
511
+ np.testing.assert_allclose(result, result_ref, rtol=1e-4)
512
+
513
+ def test_batch_varying_sizes(self):
514
+ """Test batch with varying input sizes."""
515
+ import kernel_craft_python as kc
516
+
517
+ np.random.seed(42)
518
+ sizes = [(8, 8), (16, 16), (24, 24), (32, 32)]
519
+ kernel = np.random.rand(3, 3).astype(np.float32)
520
+
521
+ results = []
522
+ for h, w in sizes:
523
+ input_arr = np.random.rand(h, w).astype(np.float32)
524
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
525
+ results.append((result.shape, w == result.shape[1]))
526
+
527
+ for (shape, w_ok) in results:
528
+ assert w_ok, f"Width mismatch: {shape}"
529
+
530
+
531
+ class TestLargeScale:
532
+ """Large scale tests."""
533
+
534
+ def test_1024x1024(self):
535
+ """Test with 1024x1024 image."""
536
+ import kernel_craft_python as kc
537
+
538
+ np.random.seed(42)
539
+ input_arr = np.random.rand(1024, 1024).astype(np.float32)
540
+ kernel = np.random.rand(3, 3).astype(np.float32)
541
+
542
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
543
+ assert result.shape == (1024, 1024)
544
+
545
+ def test_2048x2048(self):
546
+ """Test with 2048x2048 image."""
547
+ import kernel_craft_python as kc
548
+
549
+ np.random.seed(42)
550
+ input_arr = np.random.rand(2048, 2048).astype(np.float32)
551
+ kernel = np.random.rand(3, 3).astype(np.float32)
552
+
553
+ result = kc.conv_tiled(input_arr, kernel, 16, 16)
554
+ assert result.shape == (2048, 2048)
555
+
556
+ def test_4096x4096(self):
557
+ """Test with 4096x4096 image."""
558
+ import kernel_craft_python as kc
559
+
560
+ np.random.seed(42)
561
+ input_arr = np.random.rand(4096, 4096).astype(np.float32)
562
+ kernel = np.random.rand(3, 3).astype(np.float32)
563
+
564
+ result = kc.conv_tiled(input_arr, kernel, 32, 32)
565
+ assert result.shape == (4096, 4096)
566
+
567
+
568
+ if __name__ == "__main__":
569
+ pytest.main([__file__, "-v"])
@@ -1,187 +0,0 @@
1
- """Python tests for kernel_craft Python bindings."""
2
-
3
- import numpy as np
4
- import pytest
5
-
6
-
7
- def conv_cpu(input_arr, kernel):
8
- """CPU reference implementation for convolution."""
9
- height, width = input_arr.shape
10
- ksize = kernel.shape[0]
11
- kHalf = ksize // 2
12
- output = np.zeros_like(input_arr)
13
- for oy in range(height):
14
- for ox in range(width):
15
- sum_val = 0.0
16
- for ky in range(ksize):
17
- iy = oy + ky - kHalf
18
- if iy < 0 or iy >= height:
19
- continue
20
- for kx in range(ksize):
21
- ix = ox + kx - kHalf
22
- if ix < 0 or ix >= width:
23
- continue
24
- sum_val += input_arr[iy, ix] * kernel[ky, kx]
25
- output[oy, ox] = sum_val
26
- return output
27
-
28
-
29
- class TestConvNaiveNumpy:
30
- """Tests for conv_naive with numpy arrays."""
31
-
32
- def test_basic(self):
33
- """Test basic convolution produces correct shape."""
34
- import kernel_craft_python as kc
35
- input_arr = np.random.rand(8, 8).astype(np.float32)
36
- kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
37
- result = kc.conv_naive(input_arr, kernel)
38
- assert result.shape == (8, 8)
39
-
40
- def test_correctness(self):
41
- """Test output matches CPU reference."""
42
- import kernel_craft_python as kc
43
- np.random.seed(42)
44
- input_arr = np.random.rand(16, 16).astype(np.float32)
45
- kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
46
- result_gpu = kc.conv_naive(input_arr, kernel)
47
- result_cpu = conv_cpu(input_arr, kernel)
48
- np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
49
-
50
- def test_large_kernel(self):
51
- """Test with larger 5x5 kernel."""
52
- import kernel_craft_python as kc
53
- np.random.seed(42)
54
- input_arr = np.random.rand(32, 32).astype(np.float32)
55
- kernel = np.random.rand(5, 5).astype(np.float32)
56
- result_gpu = kc.conv_naive(input_arr, kernel)
57
- result_cpu = conv_cpu(input_arr, kernel)
58
- np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
59
-
60
- def test_invalid_input_dim(self):
61
- """Test that 1D input raises error."""
62
- import kernel_craft_python as kc
63
- input_arr = np.random.rand(16).astype(np.float32)
64
- kernel = np.array([[1, 0], [0, 1]], dtype=np.float32)
65
- with pytest.raises(RuntimeError):
66
- kc.conv_naive(input_arr, kernel)
67
-
68
- def test_invalid_kernel_dim(self):
69
- """Test that non-2D kernel raises error."""
70
- import kernel_craft_python as kc
71
- input_arr = np.random.rand(16, 16).astype(np.float32)
72
- kernel = np.random.rand(4).astype(np.float32)
73
- with pytest.raises(RuntimeError):
74
- kc.conv_naive(input_arr, kernel)
75
-
76
- def test_even_kernel_raises(self):
77
- """Test that even-sized kernel raises error."""
78
- import kernel_craft_python as kc
79
- input_arr = np.random.rand(16, 16).astype(np.float32)
80
- kernel = np.ones((4, 4), dtype=np.float32)
81
- with pytest.raises(RuntimeError):
82
- kc.conv_naive(input_arr, kernel)
83
-
84
-
85
- class TestConvTiledNumpy:
86
- """Tests for conv_tiled with numpy arrays."""
87
-
88
- def test_tile_8x8(self):
89
- """Test tiled convolution with 8x8 tiles."""
90
- import kernel_craft_python as kc
91
- np.random.seed(42)
92
- input_arr = np.random.rand(16, 16).astype(np.float32)
93
- kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
94
- result = kc.conv_tiled(input_arr, kernel, 8, 8)
95
- result_ref = kc.conv_naive(input_arr, kernel)
96
- np.testing.assert_allclose(result, result_ref, rtol=1e-5)
97
-
98
- def test_tile_16x16(self):
99
- """Test tiled convolution with 16x16 tiles."""
100
- import kernel_craft_python as kc
101
- np.random.seed(42)
102
- input_arr = np.random.rand(32, 32).astype(np.float32)
103
- kernel = np.random.rand(3, 3).astype(np.float32)
104
- result = kc.conv_tiled(input_arr, kernel, 16, 16)
105
- result_ref = kc.conv_naive(input_arr, kernel)
106
- np.testing.assert_allclose(result, result_ref, rtol=1e-5)
107
-
108
- def test_tile_32x32(self):
109
- """Test tiled convolution with 32x32 tiles on larger image."""
110
- import kernel_craft_python as kc
111
- np.random.seed(42)
112
- input_arr = np.random.rand(64, 64).astype(np.float32)
113
- kernel = np.random.rand(5, 5).astype(np.float32)
114
- result = kc.conv_tiled(input_arr, kernel, 32, 32)
115
- result_ref = kc.conv_naive(input_arr, kernel)
116
- np.testing.assert_allclose(result, result_ref, rtol=1e-5)
117
-
118
- def test_different_tile_w_h(self):
119
- """Test different tile width and height."""
120
- import kernel_craft_python as kc
121
- np.random.seed(42)
122
- input_arr = np.random.rand(24, 24).astype(np.float32)
123
- kernel = np.random.rand(3, 3).astype(np.float32)
124
- result = kc.conv_tiled(input_arr, kernel, 8, 16)
125
- result_ref = kc.conv_naive(input_arr, kernel)
126
- np.testing.assert_allclose(result, result_ref, rtol=1e-5)
127
-
128
-
129
- class TestConvNaiveTorch:
130
- """Tests for conv_naive with PyTorch tensors."""
131
-
132
- def test_basic(self):
133
- """Test basic convolution with PyTorch tensor."""
134
- try:
135
- import torch
136
- import kernel_craft_python as kc
137
- except ImportError:
138
- pytest.skip("PyTorch not installed")
139
-
140
- input_tensor = torch.rand(8, 8, dtype=torch.float32, device='cuda')
141
- kernel = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32, device='cuda')
142
- result = kc.conv_naive(input_tensor, kernel)
143
- assert result.shape == (8, 8)
144
- assert result.device.type == 'cuda'
145
-
146
- def test_correctness(self):
147
- """Test output matches numpy reference."""
148
- try:
149
- import torch
150
- import kernel_craft_python as kc
151
- except ImportError:
152
- pytest.skip("PyTorch not installed")
153
-
154
- np.random.seed(42)
155
- input_np = np.random.rand(16, 16).astype(np.float32)
156
- kernel_np = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
157
-
158
- result_gpu = kc.conv_naive(
159
- torch.from_numpy(input_np).cuda(),
160
- torch.from_numpy(kernel_np).cuda()
161
- )
162
- result_cpu = conv_cpu(input_np, kernel_np)
163
- np.testing.assert_allclose(result_gpu.cpu().numpy(), result_cpu, rtol=1e-4)
164
-
165
-
166
- class TestConvTiledTorch:
167
- """Tests for conv_tiled with PyTorch tensors."""
168
-
169
- def test_tile_sizes(self):
170
- """Test different tile sizes with PyTorch."""
171
- try:
172
- import torch
173
- import kernel_craft_python as kc
174
- except ImportError:
175
- pytest.skip("PyTorch not installed")
176
-
177
- input_tensor = torch.rand(32, 32, dtype=torch.float32, device='cuda')
178
- kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
179
-
180
- for tw, th in [(8, 8), (16, 16), (32, 32)]:
181
- result = kc.conv_tiled(input_tensor, kernel, tw, th)
182
- assert result.shape == (32, 32)
183
- assert result.device.type == 'cuda'
184
-
185
-
186
- if __name__ == "__main__":
187
- pytest.main([__file__, "-v"])
File without changes