kernel-craft 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kernel-craft
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CUDA kernels for machine learning systems optimization
|
|
5
|
+
Author-email: kernel-craft contributors <contact@example.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/anomalyco/kernel-craft
|
|
8
|
+
Keywords: cuda,gpu,convolution,machine-learning,deep-learning
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
12
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
13
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
14
|
+
Requires-Python: <3.13,>=3.11
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
Requires-Dist: numpy>=1.20
|
|
17
|
+
Provides-Extra: dev
|
|
18
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
19
|
+
Requires-Dist: twine>=4.0.0; extra == "dev"
|
|
20
|
+
Requires-Dist: build>=1.0.0; extra == "dev"
|
|
21
|
+
|
|
22
|
+
# kernel-craft Python API
|
|
23
|
+
|
|
24
|
+
CUDA convolution kernels exposed to Python with numpy and PyTorch support.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
Build from source with CMake:
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
cd /path/to/kernel-craft
|
|
32
|
+
mkdir build && cd build
|
|
33
|
+
cmake ..
|
|
34
|
+
make kernel_craft_python
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
The module will be at `src/python/build/kernel_craft_python.cpython-*.so`.
|
|
38
|
+
|
|
39
|
+
## Usage
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
import sys
|
|
43
|
+
sys.path.insert(0, 'src/python/build')
|
|
44
|
+
|
|
45
|
+
import kernel_craft_python as kc
|
|
46
|
+
import numpy as np
|
|
47
|
+
|
|
48
|
+
# Input: 2D float32 numpy array
|
|
49
|
+
input = np.random.randn(256, 256).astype(np.float32)
|
|
50
|
+
kernel = np.random.randn(3, 3).astype(np.float32)
|
|
51
|
+
|
|
52
|
+
# Naive convolution
|
|
53
|
+
out = kc.conv_naive(input, kernel) # -> np.ndarray
|
|
54
|
+
|
|
55
|
+
# Tiled convolution with configurable tile size
|
|
56
|
+
out = kc.conv_tiled(input, kernel, tile_w=8, tile_h=8) # -> np.ndarray
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Version
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import kernel_craft_python as kc
|
|
63
|
+
print(kc.__version__) # "0.1.0"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## PyTorch Tensors
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torch
|
|
70
|
+
import kernel_craft_python as kc
|
|
71
|
+
|
|
72
|
+
# Input: 2D float32 PyTorch tensor on CUDA
|
|
73
|
+
input = torch.rand(256, 256, dtype=torch.float32, device='cuda')
|
|
74
|
+
kernel = torch.rand(3, 3, dtype=torch.float32, device='cuda')
|
|
75
|
+
|
|
76
|
+
# Naive convolution
|
|
77
|
+
out = kc.conv_naive(input, kernel) # -> torch.Tensor on GPU
|
|
78
|
+
|
|
79
|
+
# Tiled convolution
|
|
80
|
+
out = kc.conv_tiled(input, kernel, tile_w=16, tile_h=16) # -> torch.Tensor on GPU
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## API Reference
|
|
84
|
+
|
|
85
|
+
| Function | Input Type | Output Type |
|
|
86
|
+
|----------|-----------|--------------|
|
|
87
|
+
| `conv_naive(input, kernel)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
88
|
+
| `conv_tiled(input, kernel, tile_w, tile_h)` | np.ndarray or Tensor | np.ndarray or Tensor |
|
|
89
|
+
|
|
90
|
+
### Parameters
|
|
91
|
+
|
|
92
|
+
- `input`: Input image (2D, float32)
|
|
93
|
+
- `kernel`: Convolution kernel (2D, float32, odd dimension)
|
|
94
|
+
- `tile_w`: Tile width for tiled convolution (default: 8)
|
|
95
|
+
- `tile_h`: Tile height for tiled convolution (default: 8)
|
|
96
|
+
|
|
97
|
+
### Supported Tile Sizes
|
|
98
|
+
|
|
99
|
+
- 8x8 (default, best overall performance)
|
|
100
|
+
- 16x16
|
|
101
|
+
- 32x32
|
|
102
|
+
|
|
103
|
+
### Error Handling
|
|
104
|
+
|
|
105
|
+
All functions raise `RuntimeError` with descriptive messages for:
|
|
106
|
+
- Invalid input dimensions (must be 2D)
|
|
107
|
+
- Invalid kernel dimensions (must be 2D, square, odd-sized)
|
|
108
|
+
- Invalid dtype (must be float32)
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
kernel_craft_python/kernel_craft_python.cpython-312-x86_64-linux-gnu.so,sha256=OzoOlFBKuu9PYPNCAAVFT4eYVc5eAD0iiD4LUb-XI9g,384984
|
|
2
|
+
kernel_craft-0.1.0.dist-info/METADATA,sha256=Ac68AWPDnUo8AtwbN5DwLXg_Lf5xAnIshLZh7W5G6c4,2957
|
|
3
|
+
kernel_craft-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
kernel_craft-0.1.0.dist-info/top_level.txt,sha256=RziAgd9_MYWWvPDd2hwcRwUZP2a2BL9rnIVF6eYOz3o,20
|
|
5
|
+
kernel_craft-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
kernel_craft_python
|
|
Binary file
|