froog 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
froog-0.2.4/PKG-INFO ADDED
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.1
2
+ Name: froog
3
+ Version: 0.2.4
4
+ Summary: a beautifully simplistic ml framework
5
+ Author: Kevin Buhler
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+
13
+ # ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
14
+ <div align="center" >
15
+ <img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
16
+ <br/>
17
+ ribbit: fast real-time optimization of gradients
18
+ <br/>
19
+ a beautifully compact machine-learning library
20
+ <br/>
21
+ <a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
22
+ <br/>
23
+ <br/>
24
+ </div>
25
+
26
+ RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
27
+
28
+ It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
29
+
30
+ # Installation
31
+ ```bash
32
+ pip install ribbit
33
+ ```
34
+
35
+ ### Overview of Features
36
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
37
+ - Backpropagation
38
+ - Automatic Differentiation (autograd)
39
+ - Forward and backward passes
40
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
41
+ - 2D Convolutions (im2col)
42
+ - Numerical gradient checking
43
+ - Acceleration methods (Adam)
44
+ - Avg & Max pooling
45
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
46
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
47
+ - and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
48
+
49
+ ### Sneak Peek
50
+ ```python
51
+ from ribbit.tensor import Tensor
52
+ from ribbit.utils import Linear
53
+ import ribbit.optim as optim
54
+
55
+ class mnistMLP:
56
+ def __init__(self):
57
+ self.l1 = Tensor(Linear(784, 128))
58
+ self.l2 = Tensor(Linear(128, 10))
59
+
60
+ def forward(self, x):
61
+ return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
62
+
63
+ model = mnistMLP()
64
+ optim = optim.SGD([model.l1, model.l2], lr=0.001)
65
+ ```
66
+
67
+ # Bounties
68
+ THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
69
+
70
+ Pull requests will be merged if they:
71
+ * increase simplicity
72
+ * increase functionality
73
+ * increase efficiency
74
+
75
+ more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
froog-0.2.4/README.md ADDED
@@ -0,0 +1,63 @@
1
+ # ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
2
+ <div align="center" >
3
+ <img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
4
+ <br/>
5
+ ribbit: fast real-time optimization of gradients
6
+ <br/>
7
+ a beautifully compact machine-learning library
8
+ <br/>
9
+ <a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
10
+ <br/>
11
+ <br/>
12
+ </div>
13
+
14
+ RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
15
+
16
+ It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
17
+
18
+ # Installation
19
+ ```bash
20
+ pip install ribbit
21
+ ```
22
+
23
+ ### Overview of Features
24
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
25
+ - Backpropagation
26
+ - Automatic Differentiation (autograd)
27
+ - Forward and backward passes
28
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
29
+ - 2D Convolutions (im2col)
30
+ - Numerical gradient checking
31
+ - Acceleration methods (Adam)
32
+ - Avg & Max pooling
33
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
34
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
35
+ - and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
36
+
37
+ ### Sneak Peek
38
+ ```python
39
+ from ribbit.tensor import Tensor
40
+ from ribbit.utils import Linear
41
+ import ribbit.optim as optim
42
+
43
+ class mnistMLP:
44
+ def __init__(self):
45
+ self.l1 = Tensor(Linear(784, 128))
46
+ self.l2 = Tensor(Linear(128, 10))
47
+
48
+ def forward(self, x):
49
+ return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
50
+
51
+ model = mnistMLP()
52
+ optim = optim.SGD([model.l1, model.l2], lr=0.001)
53
+ ```
54
+
55
+ # Bounties
56
+ THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
57
+
58
+ Pull requests will be merged if they:
59
+ * increase simplicity
60
+ * increase functionality
61
+ * increase efficiency
62
+
63
+ more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.1
2
+ Name: froog
3
+ Version: 0.2.4
4
+ Summary: a beautifully simplistic ml framework
5
+ Author: Kevin Buhler
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.8
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+
13
+ # ribbit <img src="https://github.com/kevbuh/ribbit/actions/workflows/test.yml/badge.svg" alt="unit test badge" >
14
+ <div align="center" >
15
+ <img src="https://raw.githubusercontent.com/kevbuh/ribbit/main/assets/froog.png" alt="froog the frog" height="200">
16
+ <br/>
17
+ ribbit: fast real-time optimization of gradients
18
+ <br/>
19
+ a beautifully compact machine-learning library
20
+ <br/>
21
+ <a href="https://github.com/kevbuh/ribbit">homepage</a> | <a href="https://github.com/kevbuh/ribbit/tree/main/docs">documentation</a> | <a href="https://pypi.org/project/ribbit/">pip</a>
22
+ <br/>
23
+ <br/>
24
+ </div>
25
+
26
+ RIBBIT is a SUPER SIMPLE machine learning framework with the goal of creating tools with AI --> easily and efficiently.
27
+
28
+ It's an end-to-end framework, encapsulating everything from ensemble trees to deep neural networks (still working on all that lol)
29
+
30
+ # Installation
31
+ ```bash
32
+ pip install ribbit
33
+ ```
34
+
35
+ ### Overview of Features
36
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/tensor.py">Custom Tensors</a>
37
+ - Backpropagation
38
+ - Automatic Differentiation (autograd)
39
+ - Forward and backward passes
40
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops.py">ML Operations</a>
41
+ - 2D Convolutions (im2col)
42
+ - Numerical gradient checking
43
+ - Acceleration methods (Adam)
44
+ - Avg & Max pooling
45
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/models/efficientnet.py">EfficientNet</a> inference
46
+ - <a href="https://github.com/kevbuh/ribbit/blob/main/ribbit/ops_gpu.py">GPU Support</a>
47
+ - and a bunch <a href="https://github.com/kevbuh/ribbit/tree/main/ribbit">more</a>
48
+
49
+ ### Sneak Peek
50
+ ```python
51
+ from ribbit.tensor import Tensor
52
+ from ribbit.utils import Linear
53
+ import ribbit.optim as optim
54
+
55
+ class mnistMLP:
56
+ def __init__(self):
57
+ self.l1 = Tensor(Linear(784, 128))
58
+ self.l2 = Tensor(Linear(128, 10))
59
+
60
+ def forward(self, x):
61
+ return x.dot(self.l1).relu().dot(self.l2).logsoftmax()
62
+
63
+ model = mnistMLP()
64
+ optim = optim.SGD([model.l1, model.l2], lr=0.001)
65
+ ```
66
+
67
+ # Bounties
68
+ THERES LOT OF STUFF TO WORK ON! VISIT THE <a href="https://github.com/kevbuh/ribbit/blob/main/docs/bounties.md">BOUNTY SHOP</a>
69
+
70
+ Pull requests will be merged if they:
71
+ * increase simplicity
72
+ * increase functionality
73
+ * increase efficiency
74
+
75
+ more info on <a href="https://github.com/kevbuh/ribbit/blob/main/docs/contributing.md">contributing</a>
@@ -1,21 +1,21 @@
1
1
  LICENSE
2
2
  README.md
3
3
  setup.py
4
- froog/__init__.py
5
- froog/gradcheck.py
6
- froog/nn.py
7
- froog/ops.py
8
- froog/ops_gpu.py
9
- froog/optim.py
10
- froog/tensor.py
11
- froog/utils.py
12
4
  froog.egg-info/PKG-INFO
13
5
  froog.egg-info/SOURCES.txt
14
6
  froog.egg-info/dependency_links.txt
15
7
  froog.egg-info/requires.txt
16
8
  froog.egg-info/top_level.txt
9
+ ribbit/__init__.py
10
+ ribbit/gradcheck.py
11
+ ribbit/nn.py
12
+ ribbit/ops.py
13
+ ribbit/ops_gpu.py
14
+ ribbit/optim.py
15
+ ribbit/tensor.py
16
+ ribbit/utils.py
17
17
  tests/test_conv_speed.py
18
- tests/test_nn.py
18
+ tests/test_models.py
19
19
  tests/test_ops.py
20
20
  tests/test_optim.py
21
21
  tests/test_tensor.py
@@ -1,2 +1,3 @@
1
1
  numpy
2
2
  requests
3
+ matplotlib
@@ -0,0 +1 @@
1
+ ribbit
@@ -0,0 +1,3 @@
1
+ import ribbit.optim
2
+ import ribbit.tensor
3
+ import ribbit.utils
@@ -7,8 +7,8 @@
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
9
  import numpy as np
10
- from froog.tensor import Tensor
11
- from froog.utils import mask_like
10
+ from ribbit.tensor import Tensor
11
+ from ribbit.utils import mask_like
12
12
 
13
13
  def jacobian(model, input):
14
14
  output = model(input)
@@ -30,7 +30,7 @@ def numerical_jacobian(model, input, eps = 1e-6):
30
30
  # Computes :
31
31
  # First-order partial derivatives using Finite-Difference Approximation with Central Difference Method (CDM)
32
32
  # Params:
33
- # model : A froog model
33
+ # model : A ribbit model
34
34
  # input : An input
35
35
  # eps : Perturbation step
36
36
  # Returns:
@@ -58,7 +58,7 @@ def gradcheck(model, input, eps = 1e-06, atol = 1e-5, rtol = 0.001):
58
58
  """
59
59
  Checks whether computed gradient is close to numerical approximation of the Jacobian
60
60
  Params:
61
- model : froog model
61
+ model : ribbit model
62
62
  eps : eps used to see if gradient is within tolerances
63
63
  atol : absolute tolerance
64
64
  rtol : relative tolerance
@@ -6,7 +6,7 @@
6
6
  # | | | | | || || || |_| |
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
- from froog.tensor import Tensor
9
+ from ribbit.tensor import Tensor
10
10
 
11
11
  def swish(x):
12
12
  return x.mul(x.sigmoid())
@@ -44,7 +44,7 @@ class BatchNorm2D:
44
44
  # TODO: need running_mean and running_var
45
45
  self.running_mean = Tensor.zeros(sz)
46
46
  self.running_var = Tensor.zeros(sz)
47
- self.num_batches_tracked = Tensor.zeros(0)
47
+ self.num_batches_tracked = Tensor.zeros(1)
48
48
 
49
49
  def __call__(self, x):
50
50
  x = x.sub(self.running_mean.reshape(shape=[1, -1, 1, 1]))
@@ -7,8 +7,8 @@
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
9
  import numpy as np
10
- from froog.tensor import Function, register
11
- from froog.utils import im2col, col2im
10
+ from ribbit.tensor import Function, register
11
+ from ribbit.utils import im2col, col2im
12
12
 
13
13
  # *****************************************************
14
14
  # ____ ___ _____ __________ ____ ____ _____
@@ -19,6 +19,12 @@ def buffer_new(ctx, shape):
19
19
  res_g.dtype = np.float32
20
20
  return res_g
21
21
 
22
+ def buffer_zeros(ctx, shape):
23
+ res_g = cl.Buffer(ctx.cl_ctx, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=np.zeros(shape))
24
+ res_g.shape = shape
25
+ res_g.dtype = np.float32
26
+ return res_g
27
+
22
28
  def buffer_like(ctx, x):
23
29
  return buffer_new(ctx, x.shape)
24
30
 
@@ -27,35 +33,40 @@ def clbuild(cl_ctx, prg):
27
33
  return cl.Program(cl_ctx, prg).build()
28
34
 
29
35
  def binary_op(ctx, code, x, y):
36
+ if x.shape != y.shape:
37
+ raise Exception(f"binary op shape mismatch: {x.shape} != {y.shape}")
30
38
  ret = buffer_like(ctx, x)
31
39
  prg = clbuild(ctx.cl_ctx, """
32
- __kernel void add(
40
+ __kernel void binop(
33
41
  __global const float *a_g, __global const float *b_g, __global float *res_g)
34
42
  {
35
43
  int gid = get_global_id(0);
36
- """+code+"""
44
+ float a = a_g[gid];
45
+ float b = b_g[gid];
46
+ res_g[gid] = """+code+""";
37
47
  }
38
48
  """)
39
- prg.add(ctx.cl_queue, [np.prod(ret.shape)], None, x, y, ret) # (queue, size, ???, arg1, arg2, dest)
49
+ prg.binop(ctx.cl_queue, [np.prod(ret.shape)], None, x, y, ret) # (queue, size, ???, arg1, arg2, dest)
40
50
  return ret
41
51
 
42
52
  def unary_op(ctx, code, x):
43
53
  ret = buffer_like(ctx, x)
44
54
  prg = clbuild(ctx.cl_ctx, """
45
- __kernel void relu(
55
+ __kernel void unop(
46
56
  __global const float *a_g, __global float *res_g)
47
57
  {
48
58
  int gid = get_global_id(0);
49
- """+code+"""
59
+ float a = a_g[gid];
60
+ res_g[gid] = """+code+"""
50
61
  }
51
62
  """)
52
- prg.relu(ctx.cl_queue, [np.prod(ret.shape)], None, x, ret)
63
+ prg.unop(ctx.cl_queue, [np.prod(ret.shape)], None, x, ret)
53
64
  return ret
54
65
 
55
66
  class Add(Function):
56
67
  @staticmethod
57
68
  def forward(ctx, x, y):
58
- return binary_op(ctx, 'res_g[gid] = a_g[gid] + b_g[gid];', x, y)
69
+ return binary_op(ctx, 'a+b', x, y)
59
70
 
60
71
  @staticmethod
61
72
  def backward(ctx, grad_output):
@@ -65,11 +76,11 @@ register('add', Add, gpu=True)
65
76
  class Sub(Function):
66
77
  @staticmethod
67
78
  def forward(ctx, x, y):
68
- return binary_op(ctx, 'res_g[gid] = a_g[gid] - b_g[gid];', x, y)
79
+ return binary_op(ctx, 'a-b', x, y)
69
80
 
70
81
  @staticmethod
71
82
  def backward(ctx, grad_output):
72
- not_grad_output = unary_op(ctx, 'res_g[gid] = -a_g[gid];', grad_output)
83
+ not_grad_output = unary_op(ctx, '-a', grad_output)
73
84
  return grad_output, not_grad_output
74
85
  register('sub', Sub, gpu=True)
75
86
 
@@ -78,36 +89,27 @@ class Mul(Function):
78
89
  def forward(ctx, x, y):
79
90
  ctx.save_for_backward(x, y)
80
91
 
81
- # HACK
82
- if y.shape == (1,):
83
- return binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[0];', x, y)
84
- elif x.shape == y.shape:
85
- return binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', x, y)
86
- else:
87
- raise Exception("mismatched shapes %r %r" % (x.shape, y.shape))
88
-
89
- return ret
92
+ return binary_op(ctx, 'a*b', x, y)
90
93
 
91
94
  @staticmethod
92
95
  def backward(ctx, grad_output):
93
96
  x,y = ctx.saved_tensors
94
- return binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', y, grad_output),\
95
- binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', x, grad_output)
97
+ return binary_op(ctx, 'a*b', y, grad_output), binary_op(ctx, 'a*b', x, grad_output)
96
98
  register('mul', Mul, gpu=True)
97
99
 
98
100
  class Pow(Function):
99
101
  @staticmethod
100
102
  def forward(ctx, x, y):
101
103
  ctx.save_for_backward(x, y)
102
- return binary_op(ctx, 'res_g[gid] = pow(a_g[gid], b_g[gid]);', x, y)
104
+ return binary_op(ctx, 'pow(a,b)', x, y)
103
105
 
104
106
  @staticmethod
105
107
  def backward(ctx, grad_output):
106
108
  x,y = ctx.saved_tensors
107
- gradx = binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', grad_output,
108
- binary_op(ctx, 'res_g[gid] = b_g[gid] * (pow((float)a_g[gid], (float)(b_g[gid]-1.0)));', x, y))
109
- grady = binary_op(ctx, 'res_g[gid] = a_g[gid] * b_g[gid];', grad_output,
110
- binary_op(ctx, 'res_g[gid] = pow((float)a_g[gid], (float)b_g[gid]) * log(a_g[gid]);', x, y))
109
+ gradx = binary_op(ctx, 'a*b', grad_output,
110
+ binary_op(ctx, 'b * (pow((float)a, (float)(b-1.0)));', x, y))
111
+ grady = binary_op(ctx, 'a*b', grad_output,
112
+ binary_op(ctx, 'pow((float)a, (float)b) * log(a);', x, y))
111
113
  return gradx, grady
112
114
  register('pow', Pow, gpu=True)
113
115
 
@@ -133,7 +135,17 @@ class Sum(Function):
133
135
  @staticmethod
134
136
  def backward(ctx, grad_output):
135
137
  input, = ctx.saved_tensors
136
- return binary_op(ctx, 'res_g[gid] = b_g[0];', input, grad_output) # Quick hack for fill
138
+ ret = buffer_like(ctx, input)
139
+ prg = clbuild(ctx.cl_ctx, """
140
+ __kernel void fill(
141
+ __global const float *a_g, __global float *res_g)
142
+ {
143
+ int gid = get_global_id(0);
144
+ res_g[gid] = a_g[0];
145
+ }
146
+ """)
147
+ prg.fill(ctx.cl_queue, [np.prod(ret.shape)], None, grad_output, ret)
148
+ return ret
137
149
  register('sum', Sum, gpu=True)
138
150
 
139
151
  class Dot(Function):
@@ -210,7 +222,18 @@ class Reshape(Function):
210
222
  @staticmethod
211
223
  def forward(ctx, x, shape):
212
224
  ctx.save_for_backward(x.shape)
213
- x.shape = shape
225
+ ss = list(shape)
226
+
227
+ # ???
228
+ tsum = 1
229
+ for s in ss:
230
+ if s != -1:
231
+ tsum *= s
232
+ for i,s in enumerate(ss):
233
+ if s == -1:
234
+ ss[i] = np.prod(x.shape) // tsum
235
+ assert np.prod(x.shape) == np.prod(ss)
236
+ x.shape = tuple(ss)
214
237
  return x
215
238
 
216
239
  @staticmethod
@@ -226,12 +249,12 @@ class ReLU(Function):
226
249
  @staticmethod
227
250
  def forward(ctx, input):
228
251
  ctx.save_for_backward(input)
229
- return unary_op(ctx, 'res_g[gid] = max(a_g[gid], (float)0.);', input)
252
+ return unary_op(ctx, 'max(a, (float)0.);', input)
230
253
 
231
254
  @staticmethod
232
255
  def backward(ctx, grad_output):
233
256
  input, = ctx.saved_tensors
234
- return binary_op(ctx, 'res_g[gid] = a_g[gid] * (b_g[gid] >= 0);', grad_output, input)
257
+ return binary_op(ctx, 'a * (b >= 0);', grad_output, input)
235
258
  register('relu', ReLU, gpu=True)
236
259
 
237
260
  class LogSoftmax(Function):
@@ -294,3 +317,117 @@ class LogSoftmax(Function):
294
317
  return grad_input
295
318
  register('logsoftmax', LogSoftmax, gpu=True)
296
319
 
320
+ # ***** CONV OPS ********
321
+
322
+ class Conv2D(Function):
323
+ @staticmethod
324
+ def forward(ctx, x, w, stride=1, groups=1):
325
+ if type(ctx.stride) == int: # ctx stores function params
326
+ ctx.stride = (ctx.stride, ctx.stride)
327
+
328
+ cout, cin, H, W = w.shape
329
+ y_stride, x_stride = ctx.stride
330
+ bs,cin_,iy,ix = x.shape
331
+ oy,ox = (iy-(H-y_stride))//y_stride, (ix-(W-x_stride))//x_stride
332
+
333
+ assert cin*ctx.groups == cin_
334
+ assert cout % ctx.groups == 0
335
+
336
+ rcout = cout//ctx.groups
337
+
338
+ # output buffer
339
+ ret = buffer_new(ctx, (bs, cout, oy, ox))
340
+ prg = clbuild(ctx.cl_ctx, """
341
+
342
+ __kernel void conv(__global const float *input, __global const float *weight, __global float *output,
343
+ int H, int W, int groups, int rcout, int cin, int oy, int ox, int iy, int ix, int ys, int xs) {
344
+
345
+ int B = get_global_id(0); // range 0-bs
346
+ int Y = get_global_id(1); // range 0-oy
347
+ int X = get_global_id(2); // range 0-ox
348
+ int IY = Y*ys;
349
+ int IX = X*xs;
350
+
351
+ // input = (bs, groups, cin, iy, ix)
352
+ // weight = (groups, rcout, cin, H, W)
353
+ // output = (bs, groups, rcout, oy, ox)
354
+ for (int g = 0; g < groups; g++) {
355
+ for (int c = 0; c < rcout; c++) {
356
+ float acc = 0.0;
357
+ for (int ci = 0; ci < cin; ci++) {
358
+ for (int y = IY; y < IY+H; y++) {
359
+ for (int x = IX; x < IX+W; x++) {
360
+ acc += input[B*groups*cin*iy*ix + g*cin*iy*ix + ci*iy*ix + y*ix + x] * \
361
+ weight[g*rcout*cin*H*W + c*cin*H*W + ci*H*W + (y-IY)*W + (x-IX)];
362
+ }
363
+ }
364
+ }
365
+ output[B*groups*rcout*oy*ox + g*rcout*oy*ox + c*oy*ox + Y*ox + X] = acc;
366
+ }
367
+ }
368
+ }
369
+ """)
370
+
371
+ prg.conv(ctx.cl_queue, [bs, oy, ox], None,
372
+ x, w, ret,
373
+ np.int32(H), np.int32(W),
374
+ np.int32(groups), np.int32(rcout), np.int32(cin),
375
+ np.int32(oy), np.int32(ox),
376
+ np.int32(iy), np.int32(ix),
377
+ np.int32(y_stride), np.int32(x_stride)
378
+ )
379
+ return ret
380
+
381
+ @staticmethod
382
+ def backward(ctx, grad_output):
383
+ raise Exception("not implemented")
384
+
385
+ register('conv2d', Conv2D, gpu=True)
386
+
387
+ class Pad2D(Function):
388
+ @staticmethod
389
+ def forward(ctx, x, padding=None):
390
+ bs,cin,iy,ix = x.shape
391
+ oy,ox = iy+padding[0]+padding[1], ix+padding[2]+padding[3] # top, bottom, left, right
392
+ ret = buffer_zeros(ctx, (bs, cin, oy, ox))
393
+
394
+ prg = clbuild(ctx.cl_ctx, """
395
+ __kernel void pad2d(
396
+ __global const float *input, __global float *output,
397
+ int cin, int py, int px, int oy, int ox, int iy, int ix
398
+ )
399
+ {
400
+ int B = get_global_id(0);
401
+ int C = get_global_id(1);
402
+ int Y = get_global_id(2);
403
+ int iptr = B*cin*iy*ix + C*iy*ix + Y*ix;
404
+ int optr = B*cin*oy*ox + C*oy*ox + (Y+py)*ox + px;
405
+ for (int x = 0; x < ix; x++) {
406
+ output[optr+x] = input[iptr+x];
407
+ }
408
+ }
409
+ """)
410
+ prg.pad2d(ctx.cl_queue, [bs, cin, iy], None,
411
+ x, ret,
412
+ np.int32(cin), np.int32(padding[0]), np.int32(padding[2]),
413
+ np.int32(oy), np.int32(ox), np.int32(iy), np.int32(ix)
414
+ )
415
+ return ret
416
+
417
+ @staticmethod
418
+ def backward(ctx, grad_output):
419
+ raise Exception("write this")
420
+ register('pad2d', Pad2D, gpu=True)
421
+
422
+ class Sigmoid(Function):
423
+ @staticmethod
424
+ def forward(ctx, input):
425
+ ret = unary_op(ctx, '1./(1+exp(-a))', input)
426
+ ctx.save_for_backward(ret)
427
+ return ret
428
+
429
+ @staticmethod
430
+ def backward(ctx, grad_output):
431
+ ret, = ctx.saved_tensors
432
+ return binary_op(ctx, 'a * (b * (1 - b));', grad_output, ret)
433
+ register('sigmoid', Sigmoid, gpu=True)
@@ -7,7 +7,7 @@
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
9
  import numpy as np
10
- from froog.tensor import Tensor, GPU
10
+ from ribbit.tensor import Tensor, GPU
11
11
 
12
12
  class Optimizer:
13
13
  def __init__(self, params):
@@ -145,7 +145,7 @@ class Tensor:
145
145
  init_gpu()
146
146
  assert self.data.dtype == np.float32 # GPU only allows float32
147
147
  # hostbuf is the data buffer on host machine with the data to be copied to the OpenCL buffer
148
- data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data) # from pyopencl docs
148
+ data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel()) # from pyopencl docs
149
149
  data.shape = self.shape
150
150
  data.dtype = self.data.dtype
151
151
  ret = Tensor(data)
@@ -232,6 +232,6 @@ def register(name, fxn, gpu=False):
232
232
  setattr(Tensor, "__%s__" % name, dispatch)
233
233
  setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
234
234
 
235
- import froog.ops # this registers all the operations
235
+ import ribbit.ops # this registers all the operations
236
236
  if GPU:
237
- import froog.ops_gpu
237
+ import ribbit.ops_gpu
@@ -7,6 +7,7 @@
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
9
  import numpy as np
10
+ import os
10
11
  from functools import lru_cache
11
12
 
12
13
  def Linear(*tensor_size):
@@ -17,7 +18,7 @@ def Linear(*tensor_size):
17
18
  def fetch(url):
18
19
  import requests, os, hashlib, tempfile
19
20
  fp = os.path.join(tempfile.gettempdir(), hashlib.md5(url.encode('utf-8')).hexdigest())
20
- if os.path.isfile(fp):
21
+ if os.path.isfile(fp) and os.stat(fp).st_size > 0:
21
22
  print(f"opening cache from {url}...")
22
23
  with open(fp, "rb") as f:
23
24
  dat = f.read()
@@ -52,7 +53,6 @@ def get_im2col_index(oy, ox, cin, H, W):
52
53
  idx = idx_channel * OY * OX + idx_y * OX + idx_x
53
54
  return idx
54
55
 
55
- # TODO: whats this doing?
56
56
  @lru_cache
57
57
  def rearrange_col2im_index(oy, ox, cin, H, W):
58
58
  idx = get_im2col_index(oy, ox, cin, H, W)