froog 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- froog-0.2.4/PKG-INFO +75 -0
- froog-0.2.4/README.md +63 -0
- froog-0.2.4/froog.egg-info/PKG-INFO +75 -0
- {froog-0.2.2 → froog-0.2.4}/froog.egg-info/SOURCES.txt +9 -9
- {froog-0.2.2 → froog-0.2.4}/froog.egg-info/requires.txt +1 -0
- froog-0.2.4/froog.egg-info/top_level.txt +1 -0
- froog-0.2.4/ribbit/__init__.py +3 -0
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/gradcheck.py +4 -4
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/nn.py +2 -2
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/ops.py +2 -2
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/ops_gpu.py +166 -29
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/optim.py +1 -1
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/tensor.py +3 -3
- {froog-0.2.2/froog → froog-0.2.4/ribbit}/utils.py +2 -2
- {froog-0.2.2 → froog-0.2.4}/setup.py +5 -5
- {froog-0.2.2 → froog-0.2.4}/tests/test_conv_speed.py +4 -4
- froog-0.2.4/tests/test_models.py +134 -0
- {froog-0.2.2 → froog-0.2.4}/tests/test_ops.py +20 -16
- {froog-0.2.2 → froog-0.2.4}/tests/test_optim.py +6 -6
- {froog-0.2.2 → froog-0.2.4}/tests/test_tensor.py +13 -13
- froog-0.2.2/PKG-INFO +0 -73
- froog-0.2.2/README.md +0 -61
- froog-0.2.2/froog/__init__.py +0 -3
- froog-0.2.2/froog.egg-info/PKG-INFO +0 -73
- froog-0.2.2/froog.egg-info/top_level.txt +0 -1
- froog-0.2.2/tests/test_nn.py +0 -40
- {froog-0.2.2 → froog-0.2.4}/LICENSE +0 -0
- {froog-0.2.2 → froog-0.2.4}/froog.egg-info/dependency_links.txt +0 -0
- {froog-0.2.2 → froog-0.2.4}/setup.cfg +0 -0
froog-0.2.4/PKG-INFO
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: froog
|
3
|
+
Version: 0.2.4
|
4
|
+
Summary: a beautifully simplistic ml framework
|
5
|
+
Author: Kevin Buhler
|
6
|
+
License: MIT
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Requires-Python: >=3.8
|
10
|
+
Description-Content-Type: text/markdown
|
11
|
+
License-File: LICENSE
|
12
|
+
|
13
|
+
# ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
|
14
|
+
<div align="center" >
|
15
|
+
<img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
|
16
|
+
<br/>
|
17
|
+
ribbit: fast real-time optimization of gradients
|
18
|
+
<br/>
|
19
|
+
a beautifully compact machine-learning library
|
20
|
+
<br/>
|
21
|
+
<a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
|
22
|
+
<br/>
|
23
|
+
<br/>
|
24
|
+
</div>
|
25
|
+
|
26
|
+
RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
|
27
|
+
|
28
|
+
It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
|
29
|
+
|
30
|
+
# Installation
|
31
|
+
```bash
|
32
|
+
pip install ribbit
|
33
|
+
```
|
34
|
+
|
35
|
+
### Overview of Features
|
36
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
|
37
|
+
- Backpropagation
|
38
|
+
- Automatic Differentiation (autograd)
|
39
|
+
- Forward and backward passes
|
40
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
|
41
|
+
- 2D Convolutions (im2col)
|
42
|
+
- Numerical gradient checking
|
43
|
+
- Acceleration methods (Adam)
|
44
|
+
- Avg & Max pooling
|
45
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
|
46
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
|
47
|
+
- and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
|
48
|
+
|
49
|
+
### Sneak Peek
|
50
|
+
```python
|
51
|
+
from ribbit.tensor import Tensor
|
52
|
+
from ribbit.utils import Linear
|
53
|
+
import ribbit.optim as optim
|
54
|
+
|
55
|
+
class mnistMLP:
|
56
|
+
def __init__(self):
|
57
|
+
self.l1 = Tensor(Linear(784, 128))
|
58
|
+
self.l2 = Tensor(Linear(128, 10))
|
59
|
+
|
60
|
+
def forward(self, x):
|
61
|
+
return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
|
62
|
+
|
63
|
+
model = mnistMLP()
|
64
|
+
optim = optim.SGD([model.l1, model.l2], lr=0.001)
|
65
|
+
```
|
66
|
+
|
67
|
+
# Bounties
|
68
|
+
THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
|
69
|
+
|
70
|
+
Pull requests will be merged if they:
|
71
|
+
* increase simplicity
|
72
|
+
* increase functionality
|
73
|
+
* increase efficiency
|
74
|
+
|
75
|
+
more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
|
froog-0.2.4/README.md
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
|
2
|
+
<div align="center" >
|
3
|
+
<img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
|
4
|
+
<br/>
|
5
|
+
ribbit: fast real-time optimization of gradients
|
6
|
+
<br/>
|
7
|
+
a beautifully compact machine-learning library
|
8
|
+
<br/>
|
9
|
+
<a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
|
10
|
+
<br/>
|
11
|
+
<br/>
|
12
|
+
</div>
|
13
|
+
|
14
|
+
RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
|
15
|
+
|
16
|
+
It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
|
17
|
+
|
18
|
+
# Installation
|
19
|
+
```bash
|
20
|
+
pip install ribbit
|
21
|
+
```
|
22
|
+
|
23
|
+
### Overview of Features
|
24
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
|
25
|
+
- Backpropagation
|
26
|
+
- Automatic Differentiation (autograd)
|
27
|
+
- Forward and backward passes
|
28
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
|
29
|
+
- 2D Convolutions (im2col)
|
30
|
+
- Numerical gradient checking
|
31
|
+
- Acceleration methods (Adam)
|
32
|
+
- Avg & Max pooling
|
33
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
|
34
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
|
35
|
+
- and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
|
36
|
+
|
37
|
+
### Sneak Peek
|
38
|
+
```python
|
39
|
+
from ribbit.tensor import Tensor
|
40
|
+
from ribbit.utils import Linear
|
41
|
+
import ribbit.optim as optim
|
42
|
+
|
43
|
+
class mnistMLP:
|
44
|
+
def __init__(self):
|
45
|
+
self.l1 = Tensor(Linear(784, 128))
|
46
|
+
self.l2 = Tensor(Linear(128, 10))
|
47
|
+
|
48
|
+
def forward(self, x):
|
49
|
+
return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
|
50
|
+
|
51
|
+
model = mnistMLP()
|
52
|
+
optim = optim.SGD([model.l1, model.l2], lr=0.001)
|
53
|
+
```
|
54
|
+
|
55
|
+
# Bounties
|
56
|
+
THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
|
57
|
+
|
58
|
+
Pull requests will be merged if they:
|
59
|
+
* increase simplicity
|
60
|
+
* increase functionality
|
61
|
+
* increase efficiency
|
62
|
+
|
63
|
+
more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
|
@@ -0,0 +1,75 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: froog
|
3
|
+
Version: 0.2.4
|
4
|
+
Summary: a beautifully simplistic ml framework
|
5
|
+
Author: Kevin Buhler
|
6
|
+
License: MIT
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Requires-Python: >=3.8
|
10
|
+
Description-Content-Type: text/markdown
|
11
|
+
License-File: LICENSE
|
12
|
+
|
13
|
+
# ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
|
14
|
+
<div align="center" >
|
15
|
+
<img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
|
16
|
+
<br/>
|
17
|
+
ribbit: fast real-time optimization of gradients
|
18
|
+
<br/>
|
19
|
+
a beautifully compact machine-learning library
|
20
|
+
<br/>
|
21
|
+
<a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
|
22
|
+
<br/>
|
23
|
+
<br/>
|
24
|
+
</div>
|
25
|
+
|
26
|
+
RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
|
27
|
+
|
28
|
+
It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
|
29
|
+
|
30
|
+
# Installation
|
31
|
+
```bash
|
32
|
+
pip install ribbit
|
33
|
+
```
|
34
|
+
|
35
|
+
### Overview of Features
|
36
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
|
37
|
+
- Backpropagation
|
38
|
+
- Automatic Differentiation (autograd)
|
39
|
+
- Forward and backward passes
|
40
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
|
41
|
+
- 2D Convolutions (im2col)
|
42
|
+
- Numerical gradient checking
|
43
|
+
- Acceleration methods (Adam)
|
44
|
+
- Avg & Max pooling
|
45
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
|
46
|
+
- <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
|
47
|
+
- and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
|
48
|
+
|
49
|
+
### Sneak Peek
|
50
|
+
```python
|
51
|
+
from ribbit.tensor import Tensor
|
52
|
+
from ribbit.utils import Linear
|
53
|
+
import ribbit.optim as optim
|
54
|
+
|
55
|
+
class mnistMLP:
|
56
|
+
def __init__(self):
|
57
|
+
self.l1 = Tensor(Linear(784, 128))
|
58
|
+
self.l2 = Tensor(Linear(128, 10))
|
59
|
+
|
60
|
+
def forward(self, x):
|
61
|
+
return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
|
62
|
+
|
63
|
+
model = mnistMLP()
|
64
|
+
optim = optim.SGD([model.l1, model.l2], lr=0.001)
|
65
|
+
```
|
66
|
+
|
67
|
+
# Bounties
|
68
|
+
THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
|
69
|
+
|
70
|
+
Pull requests will be merged if they:
|
71
|
+
* increase simplicity
|
72
|
+
* increase functionality
|
73
|
+
* increase efficiency
|
74
|
+
|
75
|
+
more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
|
@@ -1,21 +1,21 @@
|
|
1
1
|
LICENSE
|
2
2
|
README.md
|
3
3
|
setup.py
|
4
|
-
froog/__init__.py
|
5
|
-
froog/gradcheck.py
|
6
|
-
froog/nn.py
|
7
|
-
froog/ops.py
|
8
|
-
froog/ops_gpu.py
|
9
|
-
froog/optim.py
|
10
|
-
froog/tensor.py
|
11
|
-
froog/utils.py
|
12
4
|
froog.egg-info/PKG-INFO
|
13
5
|
froog.egg-info/SOURCES.txt
|
14
6
|
froog.egg-info/dependency_links.txt
|
15
7
|
froog.egg-info/requires.txt
|
16
8
|
froog.egg-info/top_level.txt
|
9
|
+
ribbit/__init__.py
|
10
|
+
ribbit/gradcheck.py
|
11
|
+
ribbit/nn.py
|
12
|
+
ribbit/ops.py
|
13
|
+
ribbit/ops_gpu.py
|
14
|
+
ribbit/optim.py
|
15
|
+
ribbit/tensor.py
|
16
|
+
ribbit/utils.py
|
17
17
|
tests/test_conv_speed.py
|
18
|
-
tests/
|
18
|
+
tests/test_models.py
|
19
19
|
tests/test_ops.py
|
20
20
|
tests/test_optim.py
|
21
21
|
tests/test_tensor.py
|
@@ -0,0 +1 @@
|
|
1
|
+
ribbit
|
@@ -7,8 +7,8 @@
|
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
8
|
|
9
9
|
import numpy as np
|
10
|
-
from
|
11
|
-
from
|
10
|
+
from ribbit.tensor import Tensor
|
11
|
+
from ribbit.utils import mask_like
|
12
12
|
|
13
13
|
def jacobian(model, input):
|
14
14
|
output = model(input)
|
@@ -30,7 +30,7 @@ def numerical_jacobian(model, input, eps = 1e-6):
|
|
30
30
|
# Computes :
|
31
31
|
# First-order partial derivatives using Finite-Difference Approximation with Central Difference Method (CDM)
|
32
32
|
# Params:
|
33
|
-
# model : A
|
33
|
+
# model : A ribbit model
|
34
34
|
# input : An input
|
35
35
|
# eps : Perturbation step
|
36
36
|
# Returns:
|
@@ -58,7 +58,7 @@ def gradcheck(model, input, eps = 1e-06, atol = 1e-5, rtol = 0.001):
|
|
58
58
|
"""
|
59
59
|
Checks whether computed gradient is close to numerical approximation of the Jacobian
|
60
60
|
Params:
|
61
|
-
model :
|
61
|
+
model : ribbit model
|
62
62
|
eps : eps used to see if gradient is within tolerances
|
63
63
|
atol : absolute tolerance
|
64
64
|
rtol : relative tolerance
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# | | | | | || || || |_| |
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
8
|
|
9
|
-
from
|
9
|
+
from ribbit.tensor import Tensor
|
10
10
|
|
11
11
|
def swish(x):
|
12
12
|
return x.mul(x.sigmoid())
|
@@ -44,7 +44,7 @@ class BatchNorm2D:
|
|
44
44
|
# TODO: need running_mean and running_var
|
45
45
|
self.running_mean = Tensor.zeros(sz)
|
46
46
|
self.running_var = Tensor.zeros(sz)
|
47
|
-
self.num_batches_tracked = Tensor.zeros(
|
47
|
+
self.num_batches_tracked = Tensor.zeros(1)
|
48
48
|
|
49
49
|
def __call__(self, x):
|
50
50
|
x = x.sub(self.running_mean.reshape(shape=[1, -1, 1, 1]))
|
@@ -7,8 +7,8 @@
|
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
8
|
|
9
9
|
import numpy as np
|
10
|
-
from
|
11
|
-
from
|
10
|
+
from ribbit.tensor import Function, register
|
11
|
+
from ribbit.utils import im2col, col2im
|
12
12
|
|
13
13
|
# *****************************************************
|
14
14
|
# ____ ___ _____ __________ ____ ____ _____
|
@@ -19,6 +19,12 @@ def buffer_new(ctx, shape):
|
|
19
19
|
res_g.dtype = np.float32
|
20
20
|
return res_g
|
21
21
|
|
22
|
+
def buffer_zeros(ctx, shape):
|
23
|
+
res_g = cl.Buffer(ctx.cl_ctx, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=np.zeros(shape))
|
24
|
+
res_g.shape = shape
|
25
|
+
res_g.dtype = np.float32
|
26
|
+
return res_g
|
27
|
+
|
22
28
|
def buffer_like(ctx, x):
|
23
29
|
return buffer_new(ctx, x.shape)
|
24
30
|
|
@@ -27,35 +33,40 @@ def clbuild(cl_ctx, prg):
|
|
27
33
|
return cl.Program(cl_ctx, prg).build()
|
28
34
|
|
29
35
|
def binary_op(ctx, code, x, y):
|
36
|
+
if x.shape != y.shape:
|
37
|
+
raise Exception(f"binary op shape mismatch: {x.shape} != {y.shape}")
|
30
38
|
ret = buffer_like(ctx, x)
|
31
39
|
prg = clbuild(ctx.cl_ctx, """
|
32
|
-
__kernel void
|
40
|
+
__kernel void binop(
|
33
41
|
__global const float *a_g, __global const float *b_g, __global float *res_g)
|
34
42
|
{
|
35
43
|
int gid = get_global_id(0);
|
36
|
-
|
44
|
+
float a = a_g[gid];
|
45
|
+
float b = b_g[gid];
|
46
|
+
res_g[gid] = """+code+""";
|
37
47
|
}
|
38
48
|
""")
|
39
|
-
prg.
|
49
|
+
prg.binop(ctx.cl_queue, [np.prod(ret.shape)], None, x, y, ret) # (queue, size, ???, arg1, arg2, dest)
|
40
50
|
return ret
|
41
51
|
|
42
52
|
def unary_op(ctx, code, x):
|
43
53
|
ret = buffer_like(ctx, x)
|
44
54
|
prg = clbuild(ctx.cl_ctx, """
|
45
|
-
__kernel void
|
55
|
+
__kernel void unop(
|
46
56
|
__global const float *a_g, __global float *res_g)
|
47
57
|
{
|
48
58
|
int gid = get_global_id(0);
|
49
|
-
|
59
|
+
float a = a_g[gid];
|
60
|
+
res_g[gid] = """+code+"""
|
50
61
|
}
|
51
62
|
""")
|
52
|
-
prg.
|
63
|
+
prg.unop(ctx.cl_queue, [np.prod(ret.shape)], None, x, ret)
|
53
64
|
return ret
|
54
65
|
|
55
66
|
class Add(Function):
|
56
67
|
@staticmethod
|
57
68
|
def forward(ctx, x, y):
|
58
|
-
return binary_op(ctx, '
|
69
|
+
return binary_op(ctx, 'a+b', x, y)
|
59
70
|
|
60
71
|
@staticmethod
|
61
72
|
def backward(ctx, grad_output):
|
@@ -65,11 +76,11 @@ register('add', Add, gpu=True)
|
|
65
76
|
class Sub(Function):
|
66
77
|
@staticmethod
|
67
78
|
def forward(ctx, x, y):
|
68
|
-
return binary_op(ctx, '
|
79
|
+
return binary_op(ctx, 'a-b', x, y)
|
69
80
|
|
70
81
|
@staticmethod
|
71
82
|
def backward(ctx, grad_output):
|
72
|
-
not_grad_output = unary_op(ctx, '
|
83
|
+
not_grad_output = unary_op(ctx, '-a', grad_output)
|
73
84
|
return grad_output, not_grad_output
|
74
85
|
register('sub', Sub, gpu=True)
|
75
86
|
|
@@ -78,36 +89,27 @@ class Mul(Function):
|
|
78
89
|
def forward(ctx, x, y):
|
79
90
|
ctx.save_for_backward(x, y)
|
80
91
|
|
81
|
-
|
82
|
-
if y.shape == (1,):
|
83
|
-
return binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[0];', x, y)
|
84
|
-
elif x.shape == y.shape:
|
85
|
-
return binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', x, y)
|
86
|
-
else:
|
87
|
-
raise Exception("mismatched shapes %r %r" % (x.shape, y.shape))
|
88
|
-
|
89
|
-
return ret
|
92
|
+
return binary_op(ctx, 'a*b', x, y)
|
90
93
|
|
91
94
|
@staticmethod
|
92
95
|
def backward(ctx, grad_output):
|
93
96
|
x,y = ctx.saved_tensors
|
94
|
-
return binary_op(ctx, '
|
95
|
-
binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', x, grad_output)
|
97
|
+
return binary_op(ctx, 'a*b', y, grad_output), binary_op(ctx, 'a*b', x, grad_output)
|
96
98
|
register('mul', Mul, gpu=True)
|
97
99
|
|
98
100
|
class Pow(Function):
|
99
101
|
@staticmethod
|
100
102
|
def forward(ctx, x, y):
|
101
103
|
ctx.save_for_backward(x, y)
|
102
|
-
return binary_op(ctx, '
|
104
|
+
return binary_op(ctx, 'pow(a,b)', x, y)
|
103
105
|
|
104
106
|
@staticmethod
|
105
107
|
def backward(ctx, grad_output):
|
106
108
|
x,y = ctx.saved_tensors
|
107
|
-
gradx = binary_op(ctx, '
|
108
|
-
binary_op(ctx, '
|
109
|
-
grady = binary_op(ctx, '
|
110
|
-
binary_op(ctx, '
|
109
|
+
gradx = binary_op(ctx, 'a*b', grad_output,
|
110
|
+
binary_op(ctx, 'b * (pow((float)a, (float)(b-1.0)));', x, y))
|
111
|
+
grady = binary_op(ctx, 'a*b', grad_output,
|
112
|
+
binary_op(ctx, 'pow((float)a, (float)b) * log(a);', x, y))
|
111
113
|
return gradx, grady
|
112
114
|
register('pow', Pow, gpu=True)
|
113
115
|
|
@@ -133,7 +135,17 @@ class Sum(Function):
|
|
133
135
|
@staticmethod
|
134
136
|
def backward(ctx, grad_output):
|
135
137
|
input, = ctx.saved_tensors
|
136
|
-
|
138
|
+
ret = buffer_like(ctx, input)
|
139
|
+
prg = clbuild(ctx.cl_ctx, """
|
140
|
+
__kernel void fill(
|
141
|
+
__global const float *a_g, __global float *res_g)
|
142
|
+
{
|
143
|
+
int gid = get_global_id(0);
|
144
|
+
res_g[gid] = a_g[0];
|
145
|
+
}
|
146
|
+
""")
|
147
|
+
prg.fill(ctx.cl_queue, [np.prod(ret.shape)], None, grad_output, ret)
|
148
|
+
return ret
|
137
149
|
register('sum', Sum, gpu=True)
|
138
150
|
|
139
151
|
class Dot(Function):
|
@@ -210,7 +222,18 @@ class Reshape(Function):
|
|
210
222
|
@staticmethod
|
211
223
|
def forward(ctx, x, shape):
|
212
224
|
ctx.save_for_backward(x.shape)
|
213
|
-
|
225
|
+
ss = list(shape)
|
226
|
+
|
227
|
+
# ???
|
228
|
+
tsum = 1
|
229
|
+
for s in ss:
|
230
|
+
if s != -1:
|
231
|
+
tsum *= s
|
232
|
+
for i,s in enumerate(ss):
|
233
|
+
if s == -1:
|
234
|
+
ss[i] = np.prod(x.shape) // tsum
|
235
|
+
assert np.prod(x.shape) == np.prod(ss)
|
236
|
+
x.shape = tuple(ss)
|
214
237
|
return x
|
215
238
|
|
216
239
|
@staticmethod
|
@@ -226,12 +249,12 @@ class ReLU(Function):
|
|
226
249
|
@staticmethod
|
227
250
|
def forward(ctx, input):
|
228
251
|
ctx.save_for_backward(input)
|
229
|
-
return unary_op(ctx, '
|
252
|
+
return unary_op(ctx, 'max(a, (float)0.);', input)
|
230
253
|
|
231
254
|
@staticmethod
|
232
255
|
def backward(ctx, grad_output):
|
233
256
|
input, = ctx.saved_tensors
|
234
|
-
return binary_op(ctx, '
|
257
|
+
return binary_op(ctx, 'a * (b >= 0);', grad_output, input)
|
235
258
|
register('relu', ReLU, gpu=True)
|
236
259
|
|
237
260
|
class LogSoftmax(Function):
|
@@ -294,3 +317,117 @@ class LogSoftmax(Function):
|
|
294
317
|
return grad_input
|
295
318
|
register('logsoftmax', LogSoftmax, gpu=True)
|
296
319
|
|
320
|
+
# ***** CONV OPS ********
|
321
|
+
|
322
|
+
class Conv2D(Function):
|
323
|
+
@staticmethod
|
324
|
+
def forward(ctx, x, w, stride=1, groups=1):
|
325
|
+
if type(ctx.stride) == int: # ctx stores function params
|
326
|
+
ctx.stride = (ctx.stride, ctx.stride)
|
327
|
+
|
328
|
+
cout, cin, H, W = w.shape
|
329
|
+
y_stride, x_stride = ctx.stride
|
330
|
+
bs,cin_,iy,ix = x.shape
|
331
|
+
oy,ox = (iy-(H-y_stride))//y_stride, (ix-(W-x_stride))//x_stride
|
332
|
+
|
333
|
+
assert cin*ctx.groups == cin_
|
334
|
+
assert cout % ctx.groups == 0
|
335
|
+
|
336
|
+
rcout = cout//ctx.groups
|
337
|
+
|
338
|
+
# output buffer
|
339
|
+
ret = buffer_new(ctx, (bs, cout, oy, ox))
|
340
|
+
prg = clbuild(ctx.cl_ctx, """
|
341
|
+
|
342
|
+
__kernel void conv(__global const float *input, __global const float *weight, __global float *output,
|
343
|
+
int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
|
344
|
+
|
345
|
+
int B = get_global_id(0); // range 0-bs
|
346
|
+
int Y = get_global_id(1); // range 0-oy
|
347
|
+
int X = get_global_id(2); // range 0-ox
|
348
|
+
int IY = Y*ys;
|
349
|
+
int IX = X*xs;
|
350
|
+
|
351
|
+
// input = (bs, groups, cin, iy, ix)
|
352
|
+
// weight = (groups, rcout, cin, H, W)
|
353
|
+
// output = (bs, groups, rcout, oy, ox)
|
354
|
+
for (int g = 0; g < groups; g++) {
|
355
|
+
for (int c = 0; c < rcout; c++) {
|
356
|
+
float acc = 0.0;
|
357
|
+
for (int ci = 0; ci < cin; ci++) {
|
358
|
+
for (int y = IY; y < IY+H; y++) {
|
359
|
+
for (int x = IX; x < IX+W; x++) {
|
360
|
+
acc += input[B*groups*cin*iy*ix + g*cin*iy*ix + ci*iy*ix + y*ix + x] * \
|
361
|
+
weight[g*rcout*cin*H*W + c*cin*H*W + ci*H*W + (y-IY)*W + (x-IX)];
|
362
|
+
}
|
363
|
+
}
|
364
|
+
}
|
365
|
+
output[B*groups*rcout*oy*ox + g*rcout*oy*ox + c*oy*ox + Y*ox + X] = acc;
|
366
|
+
}
|
367
|
+
}
|
368
|
+
}
|
369
|
+
""")
|
370
|
+
|
371
|
+
prg.conv(ctx.cl_queue, [bs, oy, ox], None,
|
372
|
+
x, w, ret,
|
373
|
+
np.int32(H), np.int32(W),
|
374
|
+
np.int32(groups), np.int32(rcout), np.int32(cin),
|
375
|
+
np.int32(oy), np.int32(ox),
|
376
|
+
np.int32(iy), np.int32(ix),
|
377
|
+
np.int32(y_stride), np.int32(x_stride)
|
378
|
+
)
|
379
|
+
return ret
|
380
|
+
|
381
|
+
@staticmethod
|
382
|
+
def backward(ctx, grad_output):
|
383
|
+
raise Exception("not implemented")
|
384
|
+
|
385
|
+
register('conv2d', Conv2D, gpu=True)
|
386
|
+
|
387
|
+
class Pad2D(Function):
|
388
|
+
@staticmethod
|
389
|
+
def forward(ctx, x, padding=None):
|
390
|
+
bs,cin,iy,ix = x.shape
|
391
|
+
oy,ox = iy+padding[0]+padding[1], ix+padding[2]+padding[3] # top, bottom, left, right
|
392
|
+
ret = buffer_zeros(ctx, (bs, cin, oy, ox))
|
393
|
+
|
394
|
+
prg = clbuild(ctx.cl_ctx, """
|
395
|
+
__kernel void pad2d(
|
396
|
+
__global const float *input, __global float *output,
|
397
|
+
int cin, int py, int px, int oy, int ox, int iy, int ix
|
398
|
+
)
|
399
|
+
{
|
400
|
+
int B = get_global_id(0);
|
401
|
+
int C = get_global_id(1);
|
402
|
+
int Y = get_global_id(2);
|
403
|
+
int iptr = B*cin*iy*ix + C*iy*ix + Y*ix;
|
404
|
+
int optr = B*cin*oy*ox + C*oy*ox + (Y+py)*ox + px;
|
405
|
+
for (int x = 0; x < ix; x++) {
|
406
|
+
output[optr+x] = input[iptr+x];
|
407
|
+
}
|
408
|
+
}
|
409
|
+
""")
|
410
|
+
prg.pad2d(ctx.cl_queue, [bs, cin, iy], None,
|
411
|
+
x, ret,
|
412
|
+
np.int32(cin), np.int32(padding[0]), np.int32(padding[2]),
|
413
|
+
np.int32(oy), np.int32(ox), np.int32(iy), np.int32(ix)
|
414
|
+
)
|
415
|
+
return ret
|
416
|
+
|
417
|
+
@staticmethod
|
418
|
+
def backward(ctx, grad_output):
|
419
|
+
raise Exception("write this")
|
420
|
+
register('pad2d', Pad2D, gpu=True)
|
421
|
+
|
422
|
+
class Sigmoid(Function):
|
423
|
+
@staticmethod
|
424
|
+
def forward(ctx, input):
|
425
|
+
ret = unary_op(ctx, '1./(1+exp(-a))', input)
|
426
|
+
ctx.save_for_backward(ret)
|
427
|
+
return ret
|
428
|
+
|
429
|
+
@staticmethod
|
430
|
+
def backward(ctx, grad_output):
|
431
|
+
ret, = ctx.saved_tensors
|
432
|
+
return binary_op(ctx, 'a * (b * (1 - b));', grad_output, ret)
|
433
|
+
register('sigmoid', Sigmoid, gpu=True)
|
@@ -145,7 +145,7 @@ class Tensor:
|
|
145
145
|
init_gpu()
|
146
146
|
assert self.data.dtype == np.float32 # GPU only allows float32
|
147
147
|
# hostbuf is the data buffer on host machine with the data to be copied to the OpenCL buffer
|
148
|
-
data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data) # from pyopencl docs
|
148
|
+
data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel()) # from pyopencl docs
|
149
149
|
data.shape = self.shape
|
150
150
|
data.dtype = self.data.dtype
|
151
151
|
ret = Tensor(data)
|
@@ -232,6 +232,6 @@ def register(name, fxn, gpu=False):
|
|
232
232
|
setattr(Tensor, "__%s__" % name, dispatch)
|
233
233
|
setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
|
234
234
|
|
235
|
-
import
|
235
|
+
import ribbit.ops # this registers all the operations
|
236
236
|
if GPU:
|
237
|
-
import
|
237
|
+
import ribbit.ops_gpu
|
@@ -7,6 +7,7 @@
|
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
8
|
|
9
9
|
import numpy as np
|
10
|
+
import os
|
10
11
|
from functools import lru_cache
|
11
12
|
|
12
13
|
def Linear(*tensor_size):
|
@@ -17,7 +18,7 @@ def Linear(*tensor_size):
|
|
17
18
|
def fetch(url):
|
18
19
|
import requests, os, hashlib, tempfile
|
19
20
|
fp = os.path.join(tempfile.gettempdir(), hashlib.md5(url.encode('utf-8')).hexdigest())
|
20
|
-
if os.path.isfile(fp):
|
21
|
+
if os.path.isfile(fp) and os.stat(fp).st_size > 0:
|
21
22
|
print(f"opening cache from {url}...")
|
22
23
|
with open(fp, "rb") as f:
|
23
24
|
dat = f.read()
|
@@ -52,7 +53,6 @@ def get_im2col_index(oy, ox, cin, H, W):
|
|
52
53
|
idx = idx_channel * OY * OX + idx_y * OX + idx_x
|
53
54
|
return idx
|
54
55
|
|
55
|
-
# TODO: whats this doing?
|
56
56
|
@lru_cache
|
57
57
|
def rearrange_col2im_index(oy, ox, cin, H, W):
|
58
58
|
idx = get_im2col_index(oy, ox, cin, H, W)
|