kernel-craft 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kernel_craft-0.1.0/PKG-INFO +108 -0
- kernel_craft-0.1.0/README.md +87 -0
- kernel_craft-0.1.0/kernel_craft.egg-info/PKG-INFO +108 -0
- kernel_craft-0.1.0/kernel_craft.egg-info/SOURCES.txt +10 -0
- kernel_craft-0.1.0/kernel_craft.egg-info/dependency_links.txt +1 -0
- kernel_craft-0.1.0/kernel_craft.egg-info/requires.txt +6 -0
- kernel_craft-0.1.0/kernel_craft.egg-info/top_level.txt +1 -0
- kernel_craft-0.1.0/kernel_craft_python/kernel_craft_python.cpython-312-x86_64-linux-gnu.so +0 -0
- kernel_craft-0.1.0/pyproject.toml +39 -0
- kernel_craft-0.1.0/setup.cfg +4 -0
- kernel_craft-0.1.0/tests/conftest.py +9 -0
- kernel_craft-0.1.0/tests/test_bindings.py +187 -0
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kernel-craft
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CUDA kernels for machine learning systems optimization
|
|
5
|
+
Author-email: kernel-craft contributors <contact@example.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/anomalyco/kernel-craft
|
|
8
|
+
Keywords: cuda,gpu,convolution,machine-learning,deep-learning
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
14
|
+
Requires-Python: <3.13,>=3.11
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: numpy>=1.20
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
19
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# kernel-craft Python API
|
|
23
|
+
|
|
24
|
+
CUDA convolution kernels exposed to Python with numpy and PyTorch support.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Build from source with CMake:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
cd /path/to/kernel-craft
|
|
32
|
+
mkdir build && cd build
|
|
33
|
+
cmake ..
|
|
34
|
+
make kernel_craft_python
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import sys
|
|
43
|
+
sys.path.insert(0, 'src/python/build')
|
|
44
|
+
|
|
45
|
+
import kernel_craft_python as kc
|
|
46
|
+
import numpy as np
|
|
47
|
+
|
|
48
|
+
# Input: 2D float32 numpy array
|
|
49
|
+
input = np.random.randn(256, 256).astype(np.float32)
|
|
50
|
+
kernel = np.random.randn(3, 3).astype(np.float32)
|
|
51
|
+
|
|
52
|
+
# Naive convolution
|
|
53
|
+
out = kc.conv_naive(input, kernel) # -> np.ndarray
|
|
54
|
+
|
|
55
|
+
# Tiled convolution with configurable tile size
|
|
56
|
+
out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Version
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import kernel_craft_python as kc
|
|
63
|
+
print(kc.__version__) # "0.1.0"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## PyTorch Tensors
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
import kernel_craft_python as kc
|
|
71
|
+
|
|
72
|
+
# Input: 2D float32 PyTorch tensor on CUDA
|
|
73
|
+
input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
|
|
74
|
+
kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
|
|
75
|
+
|
|
76
|
+
# Naive convolution
|
|
77
|
+
out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
|
|
78
|
+
|
|
79
|
+
# Tiled convolution
|
|
80
|
+
out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## API Reference
|
|
84
|
+
|
|
85
|
+
| Function | Input Type | Output Type |
|
|
86
|
+
|----------|-----------|--------------|
|
|
87
|
+
| `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
88
|
+
| `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
89
|
+
|
|
90
|
+
### Parameters
|
|
91
|
+
|
|
92
|
+
- `input`: Input image (2D, float32)
|
|
93
|
+
- `kernel`: Convolution kernel (2D, float32, odd dimension)
|
|
94
|
+
- `tile_w`: Tile width for tiled convolution (default: 8)
|
|
95
|
+
- `tile_h`: Tile height for tiled convolution (default: 8)
|
|
96
|
+
|
|
97
|
+
### Supported Tile Sizes
|
|
98
|
+
|
|
99
|
+
- 8x8 (default, best overall performance)
|
|
100
|
+
- 16x16
|
|
101
|
+
- 32x32
|
|
102
|
+
|
|
103
|
+
### Error Handling
|
|
104
|
+
|
|
105
|
+
All functions raise `RuntimeError` with descriptive messages for:
|
|
106
|
+
- Invalid input dimensions (must be 2D)
|
|
107
|
+
- Invalid kernel dimensions (must be 2D, square, odd-sized)
|
|
108
|
+
- Invalid dtype (must be float32)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# kernel-craft Python API
|
|
2
|
+
|
|
3
|
+
CUDA convolution kernels exposed to Python with numpy and PyTorch support.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Build from source with CMake:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd /path/to/kernel-craft
|
|
11
|
+
mkdir build && cd build
|
|
12
|
+
cmake ..
|
|
13
|
+
make kernel_craft_python
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
|
|
17
|
+
|
|
18
|
+
## Usage
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
import sys
|
|
22
|
+
sys.path.insert(0, 'src/python/build')
|
|
23
|
+
|
|
24
|
+
import kernel_craft_python as kc
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
# Input: 2D float32 numpy array
|
|
28
|
+
input = np.random.randn(256, 256).astype(np.float32)
|
|
29
|
+
kernel = np.random.randn(3, 3).astype(np.float32)
|
|
30
|
+
|
|
31
|
+
# Naive convolution
|
|
32
|
+
out = kc.conv_naive(input, kernel) # -> np.ndarray
|
|
33
|
+
|
|
34
|
+
# Tiled convolution with configurable tile size
|
|
35
|
+
out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Version
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import kernel_craft_python as kc
|
|
42
|
+
print(kc.__version__) # "0.1.0"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## PyTorch Tensors
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
import torch
|
|
49
|
+
import kernel_craft_python as kc
|
|
50
|
+
|
|
51
|
+
# Input: 2D float32 PyTorch tensor on CUDA
|
|
52
|
+
input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
|
|
53
|
+
kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
|
|
54
|
+
|
|
55
|
+
# Naive convolution
|
|
56
|
+
out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
|
|
57
|
+
|
|
58
|
+
# Tiled convolution
|
|
59
|
+
out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## API Reference
|
|
63
|
+
|
|
64
|
+
| Function | Input Type | Output Type |
|
|
65
|
+
|----------|-----------|--------------|
|
|
66
|
+
| `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
67
|
+
| `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
68
|
+
|
|
69
|
+
### Parameters
|
|
70
|
+
|
|
71
|
+
- `input`: Input image (2D, float32)
|
|
72
|
+
- `kernel`: Convolution kernel (2D, float32, odd dimension)
|
|
73
|
+
- `tile_w`: Tile width for tiled convolution (default: 8)
|
|
74
|
+
- `tile_h`: Tile height for tiled convolution (default: 8)
|
|
75
|
+
|
|
76
|
+
### Supported Tile Sizes
|
|
77
|
+
|
|
78
|
+
- 8x8 (default, best overall performance)
|
|
79
|
+
- 16x16
|
|
80
|
+
- 32x32
|
|
81
|
+
|
|
82
|
+
### Error Handling
|
|
83
|
+
|
|
84
|
+
All functions raise `RuntimeError` with descriptive messages for:
|
|
85
|
+
- Invalid input dimensions (must be 2D)
|
|
86
|
+
- Invalid kernel dimensions (must be 2D, square, odd-sized)
|
|
87
|
+
- Invalid dtype (must be float32)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kernel-craft
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CUDA kernels for machine learning systems optimization
|
|
5
|
+
Author-email: kernel-craft contributors <contact@example.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/anomalyco/kernel-craft
|
|
8
|
+
Keywords: cuda,gpu,convolution,machine-learning,deep-learning
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
14
|
+
Requires-Python: <3.13,>=3.11
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: numpy>=1.20
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
19
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# kernel-craft Python API
|
|
23
|
+
|
|
24
|
+
CUDA convolution kernels exposed to Python with numpy and PyTorch support.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Build from source with CMake:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
cd /path/to/kernel-craft
|
|
32
|
+
mkdir build && cd build
|
|
33
|
+
cmake ..
|
|
34
|
+
make kernel_craft_python
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import sys
|
|
43
|
+
sys.path.insert(0, 'src/python/build')
|
|
44
|
+
|
|
45
|
+
import kernel_craft_python as kc
|
|
46
|
+
import numpy as np
|
|
47
|
+
|
|
48
|
+
# Input: 2D float32 numpy array
|
|
49
|
+
input = np.random.randn(256, 256).astype(np.float32)
|
|
50
|
+
kernel = np.random.randn(3, 3).astype(np.float32)
|
|
51
|
+
|
|
52
|
+
# Naive convolution
|
|
53
|
+
out = kc.conv_naive(input, kernel) # -> np.ndarray
|
|
54
|
+
|
|
55
|
+
# Tiled convolution with configurable tile size
|
|
56
|
+
out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Version
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import kernel_craft_python as kc
|
|
63
|
+
print(kc.__version__) # "0.1.0"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## PyTorch Tensors
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
import kernel_craft_python as kc
|
|
71
|
+
|
|
72
|
+
# Input: 2D float32 PyTorch tensor on CUDA
|
|
73
|
+
input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
|
|
74
|
+
kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
|
|
75
|
+
|
|
76
|
+
# Naive convolution
|
|
77
|
+
out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
|
|
78
|
+
|
|
79
|
+
# Tiled convolution
|
|
80
|
+
out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## API Reference
|
|
84
|
+
|
|
85
|
+
| Function | Input Type | Output Type |
|
|
86
|
+
|----------|-----------|--------------|
|
|
87
|
+
| `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
88
|
+
| `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
89
|
+
|
|
90
|
+
### Parameters
|
|
91
|
+
|
|
92
|
+
- `input`: Input image (2D, float32)
|
|
93
|
+
- `kernel`: Convolution kernel (2D, float32, odd dimension)
|
|
94
|
+
- `tile_w`: Tile width for tiled convolution (default: 8)
|
|
95
|
+
- `tile_h`: Tile height for tiled convolution (default: 8)
|
|
96
|
+
|
|
97
|
+
### Supported Tile Sizes
|
|
98
|
+
|
|
99
|
+
- 8x8 (default, best overall performance)
|
|
100
|
+
- 16x16
|
|
101
|
+
- 32x32
|
|
102
|
+
|
|
103
|
+
### Error Handling
|
|
104
|
+
|
|
105
|
+
All functions raise `RuntimeError` with descriptive messages for:
|
|
106
|
+
- Invalid input dimensions (must be 2D)
|
|
107
|
+
- Invalid kernel dimensions (must be 2D, square, odd-sized)
|
|
108
|
+
- Invalid dtype (must be float32)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
kernel_craft.egg-info/PKG-INFO
|
|
4
|
+
kernel_craft.egg-info/SOURCES.txt
|
|
5
|
+
kernel_craft.egg-info/dependency_links.txt
|
|
6
|
+
kernel_craft.egg-info/requires.txt
|
|
7
|
+
kernel_craft.egg-info/top_level.txt
|
|
8
|
+
kernel_craft_python/kernel_craft_python.cpython-312-x86_64-linux-gnu.so
|
|
9
|
+
tests/conftest.py
|
|
10
|
+
tests/test_bindings.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
kernel_craft_python
|
|
Binary file
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kernel-craft"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "CUDA kernels for machine learning systems optimization"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11, <3.13"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "kernel-craft contributors", email = "contact@example.com"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["cuda", "gpu", "convolution", "machine-learning", "deep-learning"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
"Environment :: GPU :: NVIDIA CUDA",
|
|
21
|
+
"Operating System :: POSIX :: Linux",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
dependencies = [
|
|
25
|
+
"numpy>=1.20",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
[project.optional-dependencies]
|
|
29
|
+
dev = ["pytest>=7.0", "twine>=4.0.0", "build>=1.0.0"]
|
|
30
|
+
|
|
31
|
+
[project.urls]
|
|
32
|
+
Homepage = "https://github.com/anomalyco/kernel-craft"
|
|
33
|
+
|
|
34
|
+
[tool.setuptools.packages.find]
|
|
35
|
+
where = ["."]
|
|
36
|
+
include = ["kernel_craft_python*"]
|
|
37
|
+
|
|
38
|
+
[tool.setuptools.package-data]
|
|
39
|
+
kernel_craft_python = ["*.so"]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""pytest configuration for kernel_craft Python tests."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
# Add build directory to path for module import
|
|
7
|
+
build_dir = Path(__file__).parent.parent / "build"
|
|
8
|
+
if build_dir.exists():
|
|
9
|
+
sys.path.insert(0, str(build_dir))
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""Python tests for kernel_craft Python bindings."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def conv_cpu(input_arr, kernel):
|
|
8
|
+
"""CPU reference implementation for convolution."""
|
|
9
|
+
height, width = input_arr.shape
|
|
10
|
+
ksize = kernel.shape[0]
|
|
11
|
+
kHalf = ksize // 2
|
|
12
|
+
output = np.zeros_like(input_arr)
|
|
13
|
+
for oy in range(height):
|
|
14
|
+
for ox in range(width):
|
|
15
|
+
sum_val = 0.0
|
|
16
|
+
for ky in range(ksize):
|
|
17
|
+
iy = oy + ky - kHalf
|
|
18
|
+
if iy < 0 or iy >= height:
|
|
19
|
+
continue
|
|
20
|
+
for kx in range(ksize):
|
|
21
|
+
ix = ox + kx - kHalf
|
|
22
|
+
if ix < 0 or ix >= width:
|
|
23
|
+
continue
|
|
24
|
+
sum_val += input_arr[iy, ix] * kernel[ky, kx]
|
|
25
|
+
output[oy, ox] = sum_val
|
|
26
|
+
return output
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestConvNaiveNumpy:
|
|
30
|
+
"""Tests for conv_naive with numpy arrays."""
|
|
31
|
+
|
|
32
|
+
def test_basic(self):
|
|
33
|
+
"""Test basic convolution produces correct shape."""
|
|
34
|
+
import kernel_craft_python as kc
|
|
35
|
+
input_arr = np.random.rand(8, 8).astype(np.float32)
|
|
36
|
+
kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
|
|
37
|
+
result = kc.conv_naive(input_arr, kernel)
|
|
38
|
+
assert result.shape == (8, 8)
|
|
39
|
+
|
|
40
|
+
def test_correctness(self):
|
|
41
|
+
"""Test output matches CPU reference."""
|
|
42
|
+
import kernel_craft_python as kc
|
|
43
|
+
np.random.seed(42)
|
|
44
|
+
input_arr = np.random.rand(16, 16).astype(np.float32)
|
|
45
|
+
kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
|
|
46
|
+
result_gpu = kc.conv_naive(input_arr, kernel)
|
|
47
|
+
result_cpu = conv_cpu(input_arr, kernel)
|
|
48
|
+
np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
|
|
49
|
+
|
|
50
|
+
def test_large_kernel(self):
|
|
51
|
+
"""Test with larger 5x5 kernel."""
|
|
52
|
+
import kernel_craft_python as kc
|
|
53
|
+
np.random.seed(42)
|
|
54
|
+
input_arr = np.random.rand(32, 32).astype(np.float32)
|
|
55
|
+
kernel = np.random.rand(5, 5).astype(np.float32)
|
|
56
|
+
result_gpu = kc.conv_naive(input_arr, kernel)
|
|
57
|
+
result_cpu = conv_cpu(input_arr, kernel)
|
|
58
|
+
np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
|
|
59
|
+
|
|
60
|
+
def test_invalid_input_dim(self):
|
|
61
|
+
"""Test that 1D input raises error."""
|
|
62
|
+
import kernel_craft_python as kc
|
|
63
|
+
input_arr = np.random.rand(16).astype(np.float32)
|
|
64
|
+
kernel = np.array([[1, 0], [0, 1]], dtype=np.float32)
|
|
65
|
+
with pytest.raises(RuntimeError):
|
|
66
|
+
kc.conv_naive(input_arr, kernel)
|
|
67
|
+
|
|
68
|
+
def test_invalid_kernel_dim(self):
|
|
69
|
+
"""Test that non-2D kernel raises error."""
|
|
70
|
+
import kernel_craft_python as kc
|
|
71
|
+
input_arr = np.random.rand(16, 16).astype(np.float32)
|
|
72
|
+
kernel = np.random.rand(4).astype(np.float32)
|
|
73
|
+
with pytest.raises(RuntimeError):
|
|
74
|
+
kc.conv_naive(input_arr, kernel)
|
|
75
|
+
|
|
76
|
+
def test_even_kernel_raises(self):
|
|
77
|
+
"""Test that even-sized kernel raises error."""
|
|
78
|
+
import kernel_craft_python as kc
|
|
79
|
+
input_arr = np.random.rand(16, 16).astype(np.float32)
|
|
80
|
+
kernel = np.ones((4, 4), dtype=np.float32)
|
|
81
|
+
with pytest.raises(RuntimeError):
|
|
82
|
+
kc.conv_naive(input_arr, kernel)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class TestConvTiledNumpy:
|
|
86
|
+
"""Tests for conv_tiled with numpy arrays."""
|
|
87
|
+
|
|
88
|
+
def test_tile_8x8(self):
|
|
89
|
+
"""Test tiled convolution with 8x8 tiles."""
|
|
90
|
+
import kernel_craft_python as kc
|
|
91
|
+
np.random.seed(42)
|
|
92
|
+
input_arr = np.random.rand(16, 16).astype(np.float32)
|
|
93
|
+
kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
|
|
94
|
+
result = kc.conv_tiled(input_arr, kernel, 8, 8)
|
|
95
|
+
result_ref = kc.conv_naive(input_arr, kernel)
|
|
96
|
+
np.testing.assert_allclose(result, result_ref, rtol=1e-5)
|
|
97
|
+
|
|
98
|
+
def test_tile_16x16(self):
|
|
99
|
+
"""Test tiled convolution with 16x16 tiles."""
|
|
100
|
+
import kernel_craft_python as kc
|
|
101
|
+
np.random.seed(42)
|
|
102
|
+
input_arr = np.random.rand(32, 32).astype(np.float32)
|
|
103
|
+
kernel = np.random.rand(3, 3).astype(np.float32)
|
|
104
|
+
result = kc.conv_tiled(input_arr, kernel, 16, 16)
|
|
105
|
+
result_ref = kc.conv_naive(input_arr, kernel)
|
|
106
|
+
np.testing.assert_allclose(result, result_ref, rtol=1e-5)
|
|
107
|
+
|
|
108
|
+
def test_tile_32x32(self):
|
|
109
|
+
"""Test tiled convolution with 32x32 tiles on larger image."""
|
|
110
|
+
import kernel_craft_python as kc
|
|
111
|
+
np.random.seed(42)
|
|
112
|
+
input_arr = np.random.rand(64, 64).astype(np.float32)
|
|
113
|
+
kernel = np.random.rand(5, 5).astype(np.float32)
|
|
114
|
+
result = kc.conv_tiled(input_arr, kernel, 32, 32)
|
|
115
|
+
result_ref = kc.conv_naive(input_arr, kernel)
|
|
116
|
+
np.testing.assert_allclose(result, result_ref, rtol=1e-5)
|
|
117
|
+
|
|
118
|
+
def test_different_tile_w_h(self):
|
|
119
|
+
"""Test different tile width and height."""
|
|
120
|
+
import kernel_craft_python as kc
|
|
121
|
+
np.random.seed(42)
|
|
122
|
+
input_arr = np.random.rand(24, 24).astype(np.float32)
|
|
123
|
+
kernel = np.random.rand(3, 3).astype(np.float32)
|
|
124
|
+
result = kc.conv_tiled(input_arr, kernel, 8, 16)
|
|
125
|
+
result_ref = kc.conv_naive(input_arr, kernel)
|
|
126
|
+
np.testing.assert_allclose(result, result_ref, rtol=1e-5)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TestConvNaiveTorch:
|
|
130
|
+
"""Tests for conv_naive with PyTorch tensors."""
|
|
131
|
+
|
|
132
|
+
def test_basic(self):
|
|
133
|
+
"""Test basic convolution with PyTorch tensor."""
|
|
134
|
+
try:
|
|
135
|
+
import torch
|
|
136
|
+
import kernel_craft_python as kc
|
|
137
|
+
except ImportError:
|
|
138
|
+
pytest.skip("PyTorch not installed")
|
|
139
|
+
|
|
140
|
+
input_tensor = torch.rand(8, 8, dtype=torch.float32, device='cuda')
|
|
141
|
+
kernel = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32, device='cuda')
|
|
142
|
+
result = kc.conv_naive(input_tensor, kernel)
|
|
143
|
+
assert result.shape == (8, 8)
|
|
144
|
+
assert result.device.type == 'cuda'
|
|
145
|
+
|
|
146
|
+
def test_correctness(self):
|
|
147
|
+
"""Test output matches numpy reference."""
|
|
148
|
+
try:
|
|
149
|
+
import torch
|
|
150
|
+
import kernel_craft_python as kc
|
|
151
|
+
except ImportError:
|
|
152
|
+
pytest.skip("PyTorch not installed")
|
|
153
|
+
|
|
154
|
+
np.random.seed(42)
|
|
155
|
+
input_np = np.random.rand(16, 16).astype(np.float32)
|
|
156
|
+
kernel_np = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
|
|
157
|
+
|
|
158
|
+
result_gpu = kc.conv_naive(
|
|
159
|
+
torch.from_numpy(input_np).cuda(),
|
|
160
|
+
torch.from_numpy(kernel_np).cuda()
|
|
161
|
+
)
|
|
162
|
+
result_cpu = conv_cpu(input_np, kernel_np)
|
|
163
|
+
np.testing.assert_allclose(result_gpu.cpu().numpy(), result_cpu, rtol=1e-4)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
class TestConvTiledTorch:
|
|
167
|
+
"""Tests for conv_tiled with PyTorch tensors."""
|
|
168
|
+
|
|
169
|
+
def test_tile_sizes(self):
|
|
170
|
+
"""Test different tile sizes with PyTorch."""
|
|
171
|
+
try:
|
|
172
|
+
import torch
|
|
173
|
+
import kernel_craft_python as kc
|
|
174
|
+
except ImportError:
|
|
175
|
+
pytest.skip("PyTorch not installed")
|
|
176
|
+
|
|
177
|
+
input_tensor = torch.rand(32, 32, dtype=torch.float32, device='cuda')
|
|
178
|
+
kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
|
|
179
|
+
|
|
180
|
+
for tw, th in [(8, 8), (16, 16), (32, 32)]:
|
|
181
|
+
result = kc.conv_tiled(input_tensor, kernel, tw, th)
|
|
182
|
+
assert result.shape == (32, 32)
|
|
183
|
+
assert result.device.type == 'cuda'
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
if __name__ == "__main__":
|
|
187
|
+
pytest.main([__file__, "-v"])
|