kernel-craft 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: kernel-craft
3
+ Version: 0.1.0
4
+ Summary: CUDA kernels for machine learning systems optimization
5
+ Author-email: kernel-craft contributors <contact@example.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/anomalyco/kernel-craft
8
+ Keywords: cuda,gpu,convolution,machine-learning,deep-learning
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Environment :: GPU :: NVIDIA CUDA
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Requires-Python: <3.13,>=3.11
15
+ Description-Content-Type: text/markdown
16
+ Requires-Dist: numpy>=1.20
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0; extra == "dev"
19
+ Requires-Dist: twine>=4.0.0; extra == "dev"
20
+ Requires-Dist: build>=1.0.0; extra == "dev"
21
+
22
+ # kernel-craft Python API
23
+
24
+ CUDA convolution kernels exposed to Python with numpy and PyTorch support.
25
+
26
+ ## Installation
27
+
28
+ Build from source with CMake:
29
+
30
+ ```bash
31
+ cd /path/to/kernel-craft
32
+ mkdir build && cd build
33
+ cmake ..
34
+ make kernel_craft_python
35
+ ```
36
+
37
+ The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
38
+
39
+ ## Usage
40
+
41
+ ```python
42
+ import sys
43
+ sys.path.insert(0, 'src/python/build')
44
+
45
+ import kernel_craft_python as kc
46
+ import numpy as np
47
+
48
+ # Input: 2D float32 numpy array
49
+ input = np.random.randn(256, 256).astype(np.float32)
50
+ kernel = np.random.randn(3, 3).astype(np.float32)
51
+
52
+ # Naive convolution
53
+ out = kc.conv_naive(input, kernel) # -> np.ndarray
54
+
55
+ # Tiled convolution with configurable tile size
56
+ out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
57
+ ```
58
+
59
+ ## Version
60
+
61
+ ```python
62
+ import kernel_craft_python as kc
63
+ print(kc.__version__) # "0.1.0"
64
+ ```
65
+
66
+ ## PyTorch Tensors
67
+
68
+ ```python
69
+ import torch
70
+ import kernel_craft_python as kc
71
+
72
+ # Input: 2D float32 PyTorch tensor on CUDA
73
+ input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
74
+ kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
75
+
76
+ # Naive convolution
77
+ out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
78
+
79
+ # Tiled convolution
80
+ out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
81
+ ```
82
+
83
+ ## API Reference
84
+
85
+ | Function | Input Type | Output Type |
86
+ |----------|-----------|--------------|
87
+ | `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
88
+ | `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
89
+
90
+ ### Parameters
91
+
92
+ - `input`: Input image (2D, float32)
93
+ - `kernel`: Convolution kernel (2D, float32, odd dimension)
94
+ - `tile_w`: Tile width for tiled convolution (default: 8)
95
+ - `tile_h`: Tile height for tiled convolution (default: 8)
96
+
97
+ ### Supported Tile Sizes
98
+
99
+ - 8x8 (default, best overall performance)
100
+ - 16x16
101
+ - 32x32
102
+
103
+ ### Error Handling
104
+
105
+ All functions raise `RuntimeError` with descriptive messages for:
106
+ - Invalid input dimensions (must be 2D)
107
+ - Invalid kernel dimensions (must be 2D, square, odd-sized)
108
+ - Invalid dtype (must be float32)
@@ -0,0 +1,87 @@
1
+ # kernel-craft Python API
2
+
3
+ CUDA convolution kernels exposed to Python with numpy and PyTorch support.
4
+
5
+ ## Installation
6
+
7
+ Build from source with CMake:
8
+
9
+ ```bash
10
+ cd /path/to/kernel-craft
11
+ mkdir build && cd build
12
+ cmake ..
13
+ make kernel_craft_python
14
+ ```
15
+
16
+ The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
17
+
18
+ ## Usage
19
+
20
+ ```python
21
+ import sys
22
+ sys.path.insert(0, 'src/python/build')
23
+
24
+ import kernel_craft_python as kc
25
+ import numpy as np
26
+
27
+ # Input: 2D float32 numpy array
28
+ input = np.random.randn(256, 256).astype(np.float32)
29
+ kernel = np.random.randn(3, 3).astype(np.float32)
30
+
31
+ # Naive convolution
32
+ out = kc.conv_naive(input, kernel) # -> np.ndarray
33
+
34
+ # Tiled convolution with configurable tile size
35
+ out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
36
+ ```
37
+
38
+ ## Version
39
+
40
+ ```python
41
+ import kernel_craft_python as kc
42
+ print(kc.__version__) # "0.1.0"
43
+ ```
44
+
45
+ ## PyTorch Tensors
46
+
47
+ ```python
48
+ import torch
49
+ import kernel_craft_python as kc
50
+
51
+ # Input: 2D float32 PyTorch tensor on CUDA
52
+ input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
53
+ kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
54
+
55
+ # Naive convolution
56
+ out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
57
+
58
+ # Tiled convolution
59
+ out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
60
+ ```
61
+
62
+ ## API Reference
63
+
64
+ | Function | Input Type | Output Type |
65
+ |----------|-----------|--------------|
66
+ | `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
67
+ | `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
68
+
69
+ ### Parameters
70
+
71
+ - `input`: Input image (2D, float32)
72
+ - `kernel`: Convolution kernel (2D, float32, odd dimension)
73
+ - `tile_w`: Tile width for tiled convolution (default: 8)
74
+ - `tile_h`: Tile height for tiled convolution (default: 8)
75
+
76
+ ### Supported Tile Sizes
77
+
78
+ - 8x8 (default, best overall performance)
79
+ - 16x16
80
+ - 32x32
81
+
82
+ ### Error Handling
83
+
84
+ All functions raise `RuntimeError` with descriptive messages for:
85
+ - Invalid input dimensions (must be 2D)
86
+ - Invalid kernel dimensions (must be 2D, square, odd-sized)
87
+ - Invalid dtype (must be float32)
@@ -0,0 +1,108 @@
1
+ Metadata-Version: 2.4
2
+ Name: kernel-craft
3
+ Version: 0.1.0
4
+ Summary: CUDA kernels for machine learning systems optimization
5
+ Author-email: kernel-craft contributors <contact@example.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/anomalyco/kernel-craft
8
+ Keywords: cuda,gpu,convolution,machine-learning,deep-learning
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Environment :: GPU :: NVIDIA CUDA
13
+ Classifier: Operating System :: POSIX :: Linux
14
+ Requires-Python: <3.13,>=3.11
15
+ Description-Content-Type: text/markdown
16
+ Requires-Dist: numpy>=1.20
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=7.0; extra == "dev"
19
+ Requires-Dist: twine>=4.0.0; extra == "dev"
20
+ Requires-Dist: build>=1.0.0; extra == "dev"
21
+
22
+ # kernel-craft Python API
23
+
24
+ CUDA convolution kernels exposed to Python with numpy and PyTorch support.
25
+
26
+ ## Installation
27
+
28
+ Build from source with CMake:
29
+
30
+ ```bash
31
+ cd /path/to/kernel-craft
32
+ mkdir build && cd build
33
+ cmake ..
34
+ make kernel_craft_python
35
+ ```
36
+
37
+ The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
38
+
39
+ ## Usage
40
+
41
+ ```python
42
+ import sys
43
+ sys.path.insert(0, 'src/python/build')
44
+
45
+ import kernel_craft_python as kc
46
+ import numpy as np
47
+
48
+ # Input: 2D float32 numpy array
49
+ input = np.random.randn(256, 256).astype(np.float32)
50
+ kernel = np.random.randn(3, 3).astype(np.float32)
51
+
52
+ # Naive convolution
53
+ out = kc.conv_naive(input, kernel) # -> np.ndarray
54
+
55
+ # Tiled convolution with configurable tile size
56
+ out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
57
+ ```
58
+
59
+ ## Version
60
+
61
+ ```python
62
+ import kernel_craft_python as kc
63
+ print(kc.__version__) # "0.1.0"
64
+ ```
65
+
66
+ ## PyTorch Tensors
67
+
68
+ ```python
69
+ import torch
70
+ import kernel_craft_python as kc
71
+
72
+ # Input: 2D float32 PyTorch tensor on CUDA
73
+ input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
74
+ kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
75
+
76
+ # Naive convolution
77
+ out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
78
+
79
+ # Tiled convolution
80
+ out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
81
+ ```
82
+
83
+ ## API Reference
84
+
85
+ | Function | Input Type | Output Type |
86
+ |----------|-----------|--------------|
87
+ | `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
88
+ | `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
89
+
90
+ ### Parameters
91
+
92
+ - `input`: Input image (2D, float32)
93
+ - `kernel`: Convolution kernel (2D, float32, odd dimension)
94
+ - `tile_w`: Tile width for tiled convolution (default: 8)
95
+ - `tile_h`: Tile height for tiled convolution (default: 8)
96
+
97
+ ### Supported Tile Sizes
98
+
99
+ - 8x8 (default, best overall performance)
100
+ - 16x16
101
+ - 32x32
102
+
103
+ ### Error Handling
104
+
105
+ All functions raise `RuntimeError` with descriptive messages for:
106
+ - Invalid input dimensions (must be 2D)
107
+ - Invalid kernel dimensions (must be 2D, square, odd-sized)
108
+ - Invalid dtype (must be float32)
@@ -0,0 +1,10 @@
1
+ README.md
2
+ pyproject.toml
3
+ kernel_craft.egg-info/PKG-INFO
4
+ kernel_craft.egg-info/SOURCES.txt
5
+ kernel_craft.egg-info/dependency_links.txt
6
+ kernel_craft.egg-info/requires.txt
7
+ kernel_craft.egg-info/top_level.txt
8
+ kernel_craft_python/kernel_craft_python.cpython-312-x86_64-linux-gnu.so
9
+ tests/conftest.py
10
+ tests/test_bindings.py
@@ -0,0 +1,6 @@
1
+ numpy>=1.20
2
+
3
+ [dev]
4
+ pytest>=7.0
5
+ twine>=4.0.0
6
+ build>=1.0.0
@@ -0,0 +1 @@
1
+ kernel_craft_python
@@ -0,0 +1,39 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "kernel-craft"
7
+ version = "0.1.0"
8
+ description = "CUDA kernels for machine learning systems optimization"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11, <3.13"
11
+ license = "MIT"
12
+ authors = [
13
+ {name = "kernel-craft contributors", email = "contact@example.com"}
14
+ ]
15
+ keywords = ["cuda", "gpu", "convolution", "machine-learning", "deep-learning"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ "Environment :: GPU :: NVIDIA CUDA",
21
+ "Operating System :: POSIX :: Linux",
22
+ ]
23
+
24
+ dependencies = [
25
+ "numpy>=1.20",
26
+ ]
27
+
28
+ [project.optional-dependencies]
29
+ dev = ["pytest>=7.0", "twine>=4.0.0", "build>=1.0.0"]
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/anomalyco/kernel-craft"
33
+
34
+ [tool.setuptools.packages.find]
35
+ where = ["."]
36
+ include = ["kernel_craft_python*"]
37
+
38
+ [tool.setuptools.package-data]
39
+ kernel_craft_python = ["*.so"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ """pytest configuration for kernel_craft Python tests."""
2
+
3
+ import sys
4
+ from pathlib import Path
5
+
6
+ # Add build directory to path for module import
7
+ build_dir = Path(__file__).parent.parent / "build"
8
+ if build_dir.exists():
9
+ sys.path.insert(0, str(build_dir))
@@ -0,0 +1,187 @@
1
+ """Python tests for kernel_craft Python bindings."""
2
+
3
+ import numpy as np
4
+ import pytest
5
+
6
+
7
+ def conv_cpu(input_arr, kernel):
8
+ """CPU reference implementation for convolution."""
9
+ height, width = input_arr.shape
10
+ ksize = kernel.shape[0]
11
+ kHalf = ksize // 2
12
+ output = np.zeros_like(input_arr)
13
+ for oy in range(height):
14
+ for ox in range(width):
15
+ sum_val = 0.0
16
+ for ky in range(ksize):
17
+ iy = oy + ky - kHalf
18
+ if iy < 0 or iy >= height:
19
+ continue
20
+ for kx in range(ksize):
21
+ ix = ox + kx - kHalf
22
+ if ix < 0 or ix >= width:
23
+ continue
24
+ sum_val += input_arr[iy, ix] * kernel[ky, kx]
25
+ output[oy, ox] = sum_val
26
+ return output
27
+
28
+
29
+ class TestConvNaiveNumpy:
30
+ """Tests for conv_naive with numpy arrays."""
31
+
32
+ def test_basic(self):
33
+ """Test basic convolution produces correct shape."""
34
+ import kernel_craft_python as kc
35
+ input_arr = np.random.rand(8, 8).astype(np.float32)
36
+ kernel = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.float32)
37
+ result = kc.conv_naive(input_arr, kernel)
38
+ assert result.shape == (8, 8)
39
+
40
+ def test_correctness(self):
41
+ """Test output matches CPU reference."""
42
+ import kernel_craft_python as kc
43
+ np.random.seed(42)
44
+ input_arr = np.random.rand(16, 16).astype(np.float32)
45
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
46
+ result_gpu = kc.conv_naive(input_arr, kernel)
47
+ result_cpu = conv_cpu(input_arr, kernel)
48
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-5)
49
+
50
+ def test_large_kernel(self):
51
+ """Test with larger 5x5 kernel."""
52
+ import kernel_craft_python as kc
53
+ np.random.seed(42)
54
+ input_arr = np.random.rand(32, 32).astype(np.float32)
55
+ kernel = np.random.rand(5, 5).astype(np.float32)
56
+ result_gpu = kc.conv_naive(input_arr, kernel)
57
+ result_cpu = conv_cpu(input_arr, kernel)
58
+ np.testing.assert_allclose(result_gpu, result_cpu, rtol=1e-4)
59
+
60
+ def test_invalid_input_dim(self):
61
+ """Test that 1D input raises error."""
62
+ import kernel_craft_python as kc
63
+ input_arr = np.random.rand(16).astype(np.float32)
64
+ kernel = np.array([[1, 0], [0, 1]], dtype=np.float32)
65
+ with pytest.raises(RuntimeError):
66
+ kc.conv_naive(input_arr, kernel)
67
+
68
+ def test_invalid_kernel_dim(self):
69
+ """Test that non-2D kernel raises error."""
70
+ import kernel_craft_python as kc
71
+ input_arr = np.random.rand(16, 16).astype(np.float32)
72
+ kernel = np.random.rand(4).astype(np.float32)
73
+ with pytest.raises(RuntimeError):
74
+ kc.conv_naive(input_arr, kernel)
75
+
76
+ def test_even_kernel_raises(self):
77
+ """Test that even-sized kernel raises error."""
78
+ import kernel_craft_python as kc
79
+ input_arr = np.random.rand(16, 16).astype(np.float32)
80
+ kernel = np.ones((4, 4), dtype=np.float32)
81
+ with pytest.raises(RuntimeError):
82
+ kc.conv_naive(input_arr, kernel)
83
+
84
+
85
+ class TestConvTiledNumpy:
86
+ """Tests for conv_tiled with numpy arrays."""
87
+
88
+ def test_tile_8x8(self):
89
+ """Test tiled convolution with 8x8 tiles."""
90
+ import kernel_craft_python as kc
91
+ np.random.seed(42)
92
+ input_arr = np.random.rand(16, 16).astype(np.float32)
93
+ kernel = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
94
+ result = kc.conv_tiled(input_arr, kernel, 8, 8)
95
+ result_ref = kc.conv_naive(input_arr, kernel)
96
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
97
+
98
+ def test_tile_16x16(self):
99
+ """Test tiled convolution with 16x16 tiles."""
100
+ import kernel_craft_python as kc
101
+ np.random.seed(42)
102
+ input_arr = np.random.rand(32, 32).astype(np.float32)
103
+ kernel = np.random.rand(3, 3).astype(np.float32)
104
+ result = kc.conv_tiled(input_arr, kernel, 16, 16)
105
+ result_ref = kc.conv_naive(input_arr, kernel)
106
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
107
+
108
+ def test_tile_32x32(self):
109
+ """Test tiled convolution with 32x32 tiles on larger image."""
110
+ import kernel_craft_python as kc
111
+ np.random.seed(42)
112
+ input_arr = np.random.rand(64, 64).astype(np.float32)
113
+ kernel = np.random.rand(5, 5).astype(np.float32)
114
+ result = kc.conv_tiled(input_arr, kernel, 32, 32)
115
+ result_ref = kc.conv_naive(input_arr, kernel)
116
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
117
+
118
+ def test_different_tile_w_h(self):
119
+ """Test different tile width and height."""
120
+ import kernel_craft_python as kc
121
+ np.random.seed(42)
122
+ input_arr = np.random.rand(24, 24).astype(np.float32)
123
+ kernel = np.random.rand(3, 3).astype(np.float32)
124
+ result = kc.conv_tiled(input_arr, kernel, 8, 16)
125
+ result_ref = kc.conv_naive(input_arr, kernel)
126
+ np.testing.assert_allclose(result, result_ref, rtol=1e-5)
127
+
128
+
129
+ class TestConvNaiveTorch:
130
+ """Tests for conv_naive with PyTorch tensors."""
131
+
132
+ def test_basic(self):
133
+ """Test basic convolution with PyTorch tensor."""
134
+ try:
135
+ import torch
136
+ import kernel_craft_python as kc
137
+ except ImportError:
138
+ pytest.skip("PyTorch not installed")
139
+
140
+ input_tensor = torch.rand(8, 8, dtype=torch.float32, device='cuda')
141
+ kernel = torch.tensor([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=torch.float32, device='cuda')
142
+ result = kc.conv_naive(input_tensor, kernel)
143
+ assert result.shape == (8, 8)
144
+ assert result.device.type == 'cuda'
145
+
146
+ def test_correctness(self):
147
+ """Test output matches numpy reference."""
148
+ try:
149
+ import torch
150
+ import kernel_craft_python as kc
151
+ except ImportError:
152
+ pytest.skip("PyTorch not installed")
153
+
154
+ np.random.seed(42)
155
+ input_np = np.random.rand(16, 16).astype(np.float32)
156
+ kernel_np = np.array([[0, 1, 0], [1, -4, 1], [0, 1, 0]], dtype=np.float32)
157
+
158
+ result_gpu = kc.conv_naive(
159
+ torch.from_numpy(input_np).cuda(),
160
+ torch.from_numpy(kernel_np).cuda()
161
+ )
162
+ result_cpu = conv_cpu(input_np, kernel_np)
163
+ np.testing.assert_allclose(result_gpu.cpu().numpy(), result_cpu, rtol=1e-4)
164
+
165
+
166
+ class TestConvTiledTorch:
167
+ """Tests for conv_tiled with PyTorch tensors."""
168
+
169
+ def test_tile_sizes(self):
170
+ """Test different tile sizes with PyTorch."""
171
+ try:
172
+ import torch
173
+ import kernel_craft_python as kc
174
+ except ImportError:
175
+ pytest.skip("PyTorch not installed")
176
+
177
+ input_tensor = torch.rand(32, 32, dtype=torch.float32, device='cuda')
178
+ kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
179
+
180
+ for tw, th in [(8, 8), (16, 16), (32, 32)]:
181
+ result = kc.conv_tiled(input_tensor, kernel, tw, th)
182
+ assert result.shape == (32, 32)
183
+ assert result.device.type == 'cuda'
184
+
185
+
186
+ if __name__ == "__main__":
187
+ pytest.main([__file__, "-v"])