froog 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- froog/__init__.py +34 -1
- froog/{gradcheck.py → gradient.py} +4 -11
- froog/ops.py +307 -114
- froog/optim.py +104 -32
- froog/tensor.py +219 -219
- froog/utils.py +8 -7
- froog-0.5.0.dist-info/METADATA +205 -0
- froog-0.5.0.dist-info/RECORD +10 -0
- froog/nn.py +0 -60
- froog/ops_gpu.py +0 -598
- froog-0.4.2.dist-info/LICENSE +0 -1
- froog-0.4.2.dist-info/METADATA +0 -233
- froog-0.4.2.dist-info/RECORD +0 -13
- {froog-0.4.2.dist-info → froog-0.5.0.dist-info}/WHEEL +0 -0
- {froog-0.4.2.dist-info → froog-0.5.0.dist-info}/top_level.txt +0 -0
froog/optim.py
CHANGED
@@ -7,67 +7,139 @@
|
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
8
|
|
9
9
|
import numpy as np
|
10
|
-
from
|
10
|
+
from typing import List
|
11
|
+
from froog.tensor import Tensor
|
11
12
|
|
12
13
|
class Optimizer:
|
13
|
-
def __init__(self, params):
|
14
|
+
def __init__(self, params: List[Tensor]) -> None:
|
14
15
|
self.params = params
|
15
16
|
|
16
17
|
class SGD(Optimizer):
|
17
18
|
"""
|
18
19
|
Stochastic Gradient Descent
|
19
20
|
"""
|
20
|
-
def __init__(self, params, lr=0.001):
|
21
|
+
def __init__(self, params: List[Tensor], lr: float = 0.001, weight_decay: float = 0, clip_value: float = 0) -> None:
|
21
22
|
super(SGD, self).__init__(params)
|
22
|
-
self.lr = Tensor([lr], gpu=params[0].gpu)
|
23
|
+
self.lr = Tensor([lr], gpu=params[0].gpu if params else False)
|
24
|
+
self.weight_decay = weight_decay
|
25
|
+
self.clip_value = clip_value
|
23
26
|
|
24
|
-
def step(self):
|
27
|
+
def step(self) -> None:
|
25
28
|
for t in self.params:
|
26
|
-
|
29
|
+
if t.grad is None:
|
30
|
+
continue
|
31
|
+
|
32
|
+
if t.gpu:
|
33
|
+
from froog.gpu import get_device, download_tensor, upload_tensor
|
34
|
+
|
35
|
+
# device = get_device()
|
36
|
+
t_cpu = download_tensor(t)
|
37
|
+
grad_cpu = download_tensor(t.grad)
|
38
|
+
lr_cpu = download_tensor(self.lr)
|
39
|
+
|
40
|
+
if self.weight_decay > 0:
|
41
|
+
grad_cpu += self.weight_decay * t_cpu
|
42
|
+
|
43
|
+
if self.clip_value > 0:
|
44
|
+
grad_cpu = np.clip(grad_cpu, -self.clip_value, self.clip_value)
|
45
|
+
|
46
|
+
t_cpu -= grad_cpu * lr_cpu
|
47
|
+
t.data = upload_tensor(t_cpu)
|
48
|
+
else:
|
49
|
+
if self.weight_decay > 0:
|
50
|
+
t.grad.data += self.weight_decay * t.data
|
51
|
+
|
52
|
+
if self.clip_value > 0:
|
53
|
+
t.grad.data = np.clip(t.grad.data, -self.clip_value, self.clip_value)
|
54
|
+
|
55
|
+
t -= t.grad * self.lr
|
27
56
|
|
28
57
|
class Adam(Optimizer):
|
29
58
|
"""
|
30
|
-
Default ADAM
|
59
|
+
Default ADAM optimizer from https://arxiv.org/pdf/1412.6980.pdf algorithm
|
31
60
|
"""
|
32
|
-
def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=
|
61
|
+
def __init__(self, params: List[Tensor], lr: float = 0.001, b1: float = 0.9, b2: float = 0.999, eps: float = 1e-8, max_grad: float = 10.0) -> None:
|
33
62
|
super(Adam, self).__init__(params)
|
34
63
|
self.lr = lr
|
35
64
|
self.b1 = b1
|
36
65
|
self.b2 = b2
|
37
|
-
self.eps = eps
|
66
|
+
self.eps = eps
|
38
67
|
self.t = 0
|
68
|
+
self.max_grad = max_grad
|
69
|
+
self.on_gpu = any(t.gpu for t in self.params if t is not None)
|
70
|
+
|
71
|
+
if self.on_gpu:
|
72
|
+
from froog.gpu import download_tensor
|
73
|
+
self.m = [np.zeros_like(download_tensor(t.data)) for t in self.params]
|
74
|
+
self.v = [np.zeros_like(download_tensor(t.data)) for t in self.params]
|
75
|
+
else:
|
76
|
+
self.m = [np.zeros_like(t.data) for t in self.params]
|
77
|
+
self.v = [np.zeros_like(t.data) for t in self.params]
|
39
78
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
def step(self):
|
79
|
+
def step(self) -> None:
|
80
|
+
from froog.gpu import download_tensor, upload_tensor
|
81
|
+
|
44
82
|
self.t += 1
|
45
|
-
a = self.lr * (
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
t.
|
83
|
+
a = self.lr * (np.sqrt(1 - np.power(self.b2, self.t)) / (1 - np.power(self.b1, self.t)))
|
84
|
+
|
85
|
+
for i, t in enumerate(self.params):
|
86
|
+
if t.grad is None:
|
87
|
+
continue
|
88
|
+
|
89
|
+
if t.gpu:
|
90
|
+
try:
|
91
|
+
t_data_cpu = download_tensor(t.data)
|
92
|
+
grad_cpu = download_tensor(t.grad.data)
|
93
|
+
|
94
|
+
if np.isnan(grad_cpu).any() or np.isinf(grad_cpu).any():
|
95
|
+
print(f"Warning: NaN or Inf detected in gradients for parameter {i}")
|
96
|
+
grad_cpu = np.nan_to_num(grad_cpu, nan=0.0, posinf=self.max_grad, neginf=-self.max_grad)
|
97
|
+
|
98
|
+
if self.max_grad > 0:
|
99
|
+
grad_cpu = np.clip(grad_cpu, -self.max_grad, self.max_grad)
|
100
|
+
|
101
|
+
self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * grad_cpu
|
102
|
+
self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(grad_cpu)
|
103
|
+
|
104
|
+
denom = np.sqrt(self.v[i]) + self.eps
|
105
|
+
update = a * self.m[i] / denom
|
106
|
+
|
107
|
+
if np.isnan(update).any() or np.isinf(update).any():
|
108
|
+
print(f"Warning: NaN or Inf detected in update for parameter {i}")
|
109
|
+
max_update = np.finfo(np.float32).max / 100
|
110
|
+
update = np.nan_to_num(update, nan=0.0, posinf=max_update, neginf=-max_update)
|
111
|
+
|
112
|
+
t_data_cpu -= update
|
113
|
+
|
114
|
+
if np.isnan(t_data_cpu).any() or np.isinf(t_data_cpu).any():
|
115
|
+
print(f"Warning: NaN or Inf detected in parameter {i} after update")
|
116
|
+
max_val = np.finfo(np.float32).max / 10
|
117
|
+
t_data_cpu = np.nan_to_num(t_data_cpu, nan=0.0, posinf=max_val, neginf=-max_val)
|
118
|
+
|
119
|
+
t.data = upload_tensor(t_data_cpu)
|
120
|
+
except Exception as e:
|
121
|
+
print(f"Error in Adam update for GPU tensor {i}: {e}")
|
122
|
+
continue
|
123
|
+
else:
|
124
|
+
if self.max_grad > 0:
|
125
|
+
np.clip(t.grad.data, -self.max_grad, self.max_grad, out=t.grad.data)
|
126
|
+
|
127
|
+
self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * t.grad.data
|
128
|
+
self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(t.grad.data)
|
129
|
+
t.data -= a * self.m[i] / (np.sqrt(self.v[i]) + self.eps)
|
52
130
|
|
53
131
|
class RMSprop(Optimizer):
|
54
132
|
"""
|
55
|
-
|
56
|
-
https://optimization.cbe.cornell.edu/index.php?title=RMSProp
|
57
|
-
RMSprop divides the learning rate by an exponentially decaying average of squared gradients.
|
58
|
-
|
59
|
-
Notes:
|
60
|
-
The reason RPROP doesn't work is that it violates the central idea behind stochastic gradient descent,
|
61
|
-
which is when we have small enough learning rate, it averages the gradients over successive mini-batches.
|
133
|
+
RMSprop optimizer with epsilon for numerical stability.
|
62
134
|
"""
|
63
|
-
def __init__(self, params, decay=0.9, lr=0.001, eps=1e-8):
|
135
|
+
def __init__(self, params: List[Tensor], decay: float = 0.9, lr: float = 0.001, eps: float = 1e-8) -> None:
|
64
136
|
super(RMSprop, self).__init__(params)
|
65
137
|
self.lr = lr
|
66
138
|
self.decay = decay
|
67
139
|
self.eps = eps
|
68
|
-
self.v = [np.zeros_like(t.data) for t in self.params]
|
140
|
+
self.v: List[np.ndarray] = [np.zeros_like(t.data) for t in self.params]
|
69
141
|
|
70
|
-
def step(self):
|
71
|
-
for i,t in enumerate(self.params):
|
72
|
-
self.v[i] = self.decay * self.v[i] + (1-self.decay) * np.square(t.grad.data)
|
142
|
+
def step(self) -> None:
|
143
|
+
for i, t in enumerate(self.params):
|
144
|
+
self.v[i] = self.decay * self.v[i] + (1 - self.decay) * np.square(t.grad.data)
|
73
145
|
t.data -= self.lr / (np.sqrt(self.v[i]) + self.eps) * t.grad.data
|
froog/tensor.py
CHANGED
@@ -5,232 +5,232 @@
|
|
5
5
|
# | ___|| __ || |_| || |_| || || |
|
6
6
|
# | | | | | || || || |_| |
|
7
7
|
# |___| |___| |_||_______||_______||_______|
|
8
|
-
#
|
9
|
-
# inspired by pytorch
|
10
|
-
# inspired by tinygrad
|
11
|
-
# inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
|
12
8
|
|
13
9
|
import os
|
14
10
|
import numpy as np
|
15
11
|
from inspect import signature
|
12
|
+
from typing import Tuple, List, Union, Optional, Any, TypeVar, cast
|
13
|
+
from froog.gpu import get_device, upload_tensor, download_tensor, is_buffer
|
16
14
|
|
17
|
-
|
18
|
-
import pyopencl as cl
|
19
|
-
GPU = True
|
20
|
-
except ImportError:
|
21
|
-
# no GPU support
|
22
|
-
GPU = False
|
23
|
-
|
24
|
-
cl_ctx, cl_queue = None, None
|
25
|
-
def init_gpu():
|
26
|
-
"""
|
27
|
-
creates global OpenCL context and queue
|
28
|
-
"""
|
29
|
-
global cl_ctx, cl_queue
|
30
|
-
if cl_queue is None:
|
31
|
-
try:
|
32
|
-
# if you have an m2 mac
|
33
|
-
cl_ctx = cl.create_some_context(answers=[0])
|
34
|
-
except (cl._cl.RuntimeError, TypeError):
|
35
|
-
cl_ctx = cl.create_some_context(interactive=False)
|
36
|
-
cl_queue = cl.CommandQueue(cl_ctx)
|
37
|
-
|
38
|
-
# ************ Main Classes ************
|
39
|
-
# ********** Tensor, Function **********
|
40
|
-
# _____________ _______ ____ ____
|
41
|
-
# /_ __/ ____/ | / / ___// __ \/ __ \
|
42
|
-
# / / / __/ / |/ /\__ \/ / / / /_/ /
|
43
|
-
# / / / /___/ /| /___/ / /_/ / _, _/
|
44
|
-
# /_/ /_____/_/ |_//____/\____/_/ |_|
|
15
|
+
T = TypeVar('T', bound='Tensor')
|
45
16
|
|
46
17
|
class Tensor:
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
18
|
+
did_float_warning = False
|
19
|
+
ops = {}
|
20
|
+
ops_gpu = {}
|
21
|
+
|
22
|
+
def __init__(self, data: Union[List, np.ndarray, Any], gpu: bool = False):
|
23
|
+
if isinstance(data, list): data = np.array(data, dtype=np.float32)
|
24
|
+
elif is_buffer(data): self.gpu = True
|
25
|
+
elif not isinstance(data, np.ndarray): raise TypeError(f"Error constructing tensor with {data}")
|
26
|
+
if isinstance(data, np.ndarray):
|
27
|
+
if data.dtype != np.float32 and not Tensor.did_float_warning:
|
28
|
+
if os.getenv("WARNING") == "1": print(f"warning, {data.shape} isn't float32. float64 needed for numerical jacobian")
|
29
|
+
if not os.getenv("DEBUG") == "1": Tensor.did_float_warning = True
|
30
|
+
self.gpu = False
|
31
|
+
self.data = data
|
32
|
+
self.grad: Optional[Tensor] = None
|
33
|
+
self._ctx = None
|
34
|
+
if gpu: self.gpu_()
|
35
|
+
|
36
|
+
def __repr__(self) -> str: return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
|
37
|
+
def assign(self, x: T) -> None: self.data = x.data
|
38
|
+
|
39
|
+
@property
|
40
|
+
def shape(self) -> Tuple[int, ...]:
|
41
|
+
if self.gpu:
|
42
|
+
device = get_device()
|
43
|
+
if device is not None and hasattr(device, 'buffer_metadata'):
|
44
|
+
buffer_id = id(self.data)
|
45
|
+
if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['shape']
|
46
|
+
try:
|
47
|
+
data = download_tensor(self)
|
48
|
+
return data.shape
|
49
|
+
except Exception as e:
|
50
|
+
print(f"Warning: Failed to get shape from GPU tensor: {e}")
|
51
|
+
return (1,)
|
52
|
+
return self.data.shape
|
53
|
+
|
54
|
+
@property
|
55
|
+
def size(self, dim=None) -> Union[int, Tuple[int, ...]]:
|
56
|
+
if dim is not None: return self.shape[dim]
|
57
|
+
return int(np.prod(self.shape))
|
58
|
+
|
59
|
+
@property
|
60
|
+
def ndim(self) -> int: return len(self.shape)
|
61
|
+
|
62
|
+
@property
|
63
|
+
def transpose(self) -> T:
|
64
|
+
if isinstance(self.data, np.ndarray): return Tensor(self.data.T, gpu=self.gpu)
|
65
|
+
else:
|
66
|
+
cpu_tensor = self.to_cpu()
|
67
|
+
return Tensor(cpu_tensor.data.T, gpu=self.gpu)
|
68
|
+
|
69
|
+
@property
|
70
|
+
def dtype(self) -> np.dtype:
|
71
|
+
if self.gpu:
|
72
|
+
device = get_device()
|
73
|
+
if device is not None and hasattr(device, 'buffer_metadata'):
|
74
|
+
buffer_id = id(self.data)
|
75
|
+
if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['dtype']
|
76
|
+
return np.float32
|
77
|
+
return self.data.dtype
|
78
|
+
|
79
|
+
@property
|
80
|
+
def is_gpu(self) -> bool: return self.gpu
|
81
|
+
|
82
|
+
@staticmethod
|
83
|
+
def zeros(*shape: int) -> T: return Tensor(np.zeros(shape, dtype=np.float32))
|
84
|
+
|
85
|
+
@staticmethod
|
86
|
+
def ones(*shape: int) -> T: return Tensor(np.ones(shape, dtype=np.float32))
|
87
|
+
|
88
|
+
@staticmethod
|
89
|
+
def randn(*shape: int) -> T: return Tensor(np.random.randn(*shape).astype(np.float32))
|
90
|
+
|
91
|
+
@staticmethod
|
92
|
+
def eye(dim: int) -> T: return Tensor(np.eye(dim).astype(np.float32))
|
93
|
+
|
94
|
+
@staticmethod
|
95
|
+
def arange(start: Union[int, float], stop: Optional[Union[int, float]] = None, step: Union[int, float] = 1) -> T:
|
96
|
+
if stop is None:
|
97
|
+
stop = start
|
98
|
+
start = 0
|
99
|
+
return Tensor(np.arange(start, stop, step, dtype=np.float32))
|
100
|
+
|
101
|
+
def flatten(self) -> T: return Tensor(self.data.reshape(-1), gpu=self.gpu)
|
102
|
+
def detach(self) -> T: return Tensor(self.data.copy(), gpu=self.gpu)
|
103
|
+
def view(self, *shape: int) -> T: return Tensor(self.data.reshape(shape), gpu=self.gpu)
|
104
|
+
def to_float(self) -> T: return Tensor(self.data.astype(np.float32), gpu=self.gpu)
|
105
|
+
def to_int(self) -> T: return Tensor(self.data.astype(np.int32), gpu=self.gpu)
|
106
|
+
def to_bool(self) -> T: return Tensor(self.data.astype(bool), gpu=self.gpu)
|
107
|
+
|
108
|
+
def unsqueeze(self, dim: int) -> T:
|
109
|
+
shape = list(self.shape)
|
110
|
+
if dim < 0: dim = len(shape) + 1 + dim
|
111
|
+
shape.insert(dim, 1)
|
112
|
+
return Tensor(self.data.reshape(shape), gpu=self.gpu)
|
113
|
+
|
114
|
+
def squeeze(self, dim: Optional[int] = None) -> T:
|
115
|
+
if dim is None: return Tensor(self.data.squeeze(), gpu=self.gpu)
|
116
|
+
shape = list(self.shape)
|
117
|
+
if dim < 0: dim = len(shape) + dim
|
118
|
+
if 0 <= dim < len(shape) and shape[dim] == 1: shape.pop(dim)
|
119
|
+
return Tensor(self.data.reshape(shape), gpu=self.gpu)
|
120
|
+
|
121
|
+
def backward(self, allow_fill: bool = True) -> None:
|
122
|
+
if self._ctx is None: return
|
123
|
+
if self.grad is None and allow_fill:
|
124
|
+
assert self.shape == (1,)
|
125
|
+
self.grad = Tensor(np.ones(self.shape, dtype=self.dtype), gpu=self.gpu)
|
126
|
+
assert self.grad is not None
|
127
|
+
grads = self._ctx.backward(self._ctx, self.grad.data)
|
128
|
+
if len(self._ctx.parents) == 1: grads = [grads]
|
129
|
+
for t, g in zip(self._ctx.parents, grads):
|
130
|
+
if g is None: continue
|
131
|
+
t_shape = t.shape
|
132
|
+
if is_buffer(g):
|
133
|
+
device = get_device()
|
134
|
+
if device is not None and hasattr(device, 'buffer_metadata'):
|
135
|
+
buffer_id = id(g)
|
136
|
+
if buffer_id in device.buffer_metadata: g_shape = device.buffer_metadata[buffer_id]['shape']
|
137
|
+
else:
|
138
|
+
try:
|
139
|
+
g_cpu = download_tensor(g)
|
140
|
+
g_shape = g_cpu.shape
|
141
|
+
except:
|
142
|
+
print(f"Warning: Could not determine shape of gradient in {self._ctx}")
|
143
|
+
g_shape = t_shape
|
144
|
+
else: g_shape = g.shape
|
145
|
+
if g_shape != t_shape:
|
146
|
+
print(f"grad shape must match tensor shape in {self._ctx}, {g_shape} != {t_shape}")
|
147
|
+
assert False
|
148
|
+
t.grad = Tensor(g)
|
149
|
+
t.backward(allow_fill=False)
|
150
|
+
|
151
|
+
def mean(self) -> T:
|
152
|
+
div = Tensor(np.array([1 / self.size], dtype=np.float32), gpu=self.gpu)
|
153
|
+
return self.sum().mul(div)
|
154
|
+
|
155
|
+
def sqrt(self) -> T:
|
156
|
+
root = Tensor(np.zeros(self.shape, dtype=np.float32) + 0.5, gpu=self.gpu)
|
157
|
+
return self.pow(root)
|
158
|
+
|
159
|
+
def div(self, y: T) -> T:
|
160
|
+
root = Tensor(np.zeros(self.shape, dtype=np.float32) - 1, gpu=self.gpu)
|
161
|
+
return self.mul(y.pow(root))
|
55
162
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
if
|
61
|
-
|
62
|
-
self.gpu = False
|
63
|
-
|
64
|
-
self.data = data
|
65
|
-
self.grad = None
|
66
|
-
|
67
|
-
if gpu:
|
68
|
-
self.gpu_()
|
69
|
-
|
70
|
-
# internal variables used for autograd graph construction
|
71
|
-
self._ctx = None # these are where the backward gradient computation are saved
|
72
|
-
|
73
|
-
def __repr__(self):
|
74
|
-
return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
|
75
|
-
|
76
|
-
def assign(self, x):
|
77
|
-
self.data = x.data
|
78
|
-
|
79
|
-
@property
|
80
|
-
def shape(self):
|
81
|
-
return self.data.shape
|
82
|
-
|
83
|
-
@staticmethod
|
84
|
-
def zeros(*shape):
|
85
|
-
return Tensor(np.zeros(shape, dtype=np.float32))
|
86
|
-
|
87
|
-
@staticmethod
|
88
|
-
def ones(*shape):
|
89
|
-
return Tensor(np.ones(shape, dtype=np.float32))
|
90
|
-
|
91
|
-
@staticmethod
|
92
|
-
def randn(*shape):
|
93
|
-
return Tensor(np.random.randn(*shape).astype(np.float32))
|
94
|
-
|
95
|
-
@staticmethod
|
96
|
-
def eye(dim):
|
97
|
-
return Tensor(np.eye(dim).astype(np.float32))
|
98
|
-
|
99
|
-
def backward(self, allow_fill=True):
|
100
|
-
if self._ctx is None:
|
101
|
-
return
|
102
|
-
|
103
|
-
if self.grad is None and allow_fill:
|
104
|
-
# allow_fill gives backprop a starting point, fills in the first grad with one is its None
|
105
|
-
assert self.data.shape == (1,) # numpy returns tuples as shapes
|
106
|
-
self.grad = Tensor(np.ones(self.data.shape, dtype=self.data.dtype), gpu=self.gpu)
|
107
|
-
|
108
|
-
assert self.grad is not None
|
109
|
-
|
110
|
-
# THIS IS WHERE AUTO GRAD IS DONE
|
111
|
-
grads = self._ctx.backward(self._ctx, self.grad.data) # get gradients respective to what op happened
|
112
|
-
if len(self._ctx.parents) == 1:
|
113
|
-
grads = [grads]
|
114
|
-
for t, g in zip(self._ctx.parents, grads):
|
115
|
-
if g is None:
|
116
|
-
continue
|
117
|
-
if g.shape != t.data.shape:
|
118
|
-
print(f"grad shape must match tensor shape in {self._ctx}, {g.shape} != {t.data.shape}")
|
119
|
-
assert False
|
120
|
-
t.grad = Tensor(g) # access actual gradients using grad.data
|
121
|
-
t.backward(allow_fill=False)
|
122
|
-
|
123
|
-
# ****** cpu/gpu ******
|
163
|
+
def to_cpu(self) -> T:
|
164
|
+
if not self.gpu: return cast(T, self)
|
165
|
+
data = download_tensor(self)
|
166
|
+
ret = Tensor(data)
|
167
|
+
if self.grad: ret.grad = self.grad.to_cpu()
|
168
|
+
return ret
|
124
169
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
def to_gpu(self):
|
141
|
-
if not GPU:
|
142
|
-
raise Exception("no gpu support! install pyopencl")
|
143
|
-
if not self.gpu:
|
144
|
-
init_gpu()
|
145
|
-
assert self.data.dtype == np.float32 # GPU only allows float32
|
146
|
-
# hostbuf is the data buffer on host machine with the data to be copied to the OpenCL buffer
|
147
|
-
data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel()) # from pyopencl docs
|
148
|
-
data.shape = self.shape
|
149
|
-
data.dtype = self.data.dtype
|
150
|
-
ret = Tensor(data)
|
151
|
-
if self.grad:
|
152
|
-
ret.grad = self.grad.to_gpu()
|
153
|
-
return ret
|
154
|
-
else:
|
155
|
-
return self
|
156
|
-
|
157
|
-
ops = {} # stores operations that are done on the CPU
|
158
|
-
ops_gpu = {} # stores operations that are done on the GPU
|
159
|
-
|
160
|
-
# ****** basic tensor math ops ******
|
161
|
-
|
162
|
-
def mean(self):
|
163
|
-
div = Tensor(np.array([1 / np.prod(self.shape)], dtype=self.data.dtype), gpu=self.gpu)
|
164
|
-
return self.sum().mul(div)
|
165
|
-
|
166
|
-
def sqrt(self):
|
167
|
-
root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)+0.5, gpu=self.gpu)
|
168
|
-
return self.pow(root)
|
169
|
-
|
170
|
-
def div(self, y):
|
171
|
-
root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)-1, gpu=self.gpu)
|
172
|
-
return self.mul(y.pow(root))
|
173
|
-
|
174
|
-
# ________ ___ ______________________ _ __
|
175
|
-
# / ____/ / / / | / / ____/_ __/ _/ __ \/ | / /
|
176
|
-
# / /_ / / / / |/ / / / / / // / / / |/ /
|
177
|
-
# / __/ / /_/ / /| / /___ / / _/ // /_/ / /| /
|
178
|
-
# /_/ \____/_/ |_/\____/ /_/ /___/\____/_/ |_/
|
179
|
-
|
170
|
+
def gpu_(self) -> None:
|
171
|
+
if not self.gpu and (device := get_device()) is not None and device.name != "CPU":
|
172
|
+
self.data = upload_tensor(self.data)
|
173
|
+
self.gpu = True
|
174
|
+
if self.grad: self.grad.gpu_()
|
175
|
+
|
176
|
+
def to_gpu(self) -> T:
|
177
|
+
if (device := get_device()) is None or device.name == "CPU": raise Exception("no gpu support! install pyopencl or use a Metal-compatible device")
|
178
|
+
if self.gpu: return cast(T, self)
|
179
|
+
gpu_data = upload_tensor(self.data)
|
180
|
+
ret = Tensor(gpu_data)
|
181
|
+
ret.gpu = True
|
182
|
+
if self.grad: ret.grad = self.grad.to_gpu()
|
183
|
+
return ret
|
184
|
+
|
180
185
|
class Function:
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
self
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
setattr(
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
|
233
|
-
|
234
|
-
import froog.ops # this registers all the operations
|
235
|
-
if GPU:
|
236
|
-
import froog.ops_gpu
|
186
|
+
def __init__(self, *tensors: Tensor) -> None:
|
187
|
+
self.parents = tensors
|
188
|
+
self.saved_tensors: List[Any] = []
|
189
|
+
|
190
|
+
def save_for_backward(self, *x: Any) -> None:
|
191
|
+
self.saved_tensors.extend(x)
|
192
|
+
|
193
|
+
def apply(self, *x: Any, **kwargs: Any) -> Tensor:
|
194
|
+
op = self
|
195
|
+
ctx = op(*x)
|
196
|
+
params = signature(op.forward).parameters
|
197
|
+
for p in params.values():
|
198
|
+
if p.default is not p.empty: setattr(ctx, p.name, p.default)
|
199
|
+
for k, v in kwargs.items(): setattr(ctx, k, v)
|
200
|
+
ret = Tensor(op.forward(ctx, *[t.data for t in x], **kwargs))
|
201
|
+
ret._ctx = ctx
|
202
|
+
return ret
|
203
|
+
|
204
|
+
def register(name: str, fxn: Any, gpu: bool = False) -> None:
|
205
|
+
if gpu:
|
206
|
+
setattr(Tensor, name, lambda self, *x, **kwargs: fxn.apply(fxn, self, *x, **kwargs))
|
207
|
+
Tensor.ops_gpu[name] = fxn
|
208
|
+
else: Tensor.ops[name] = fxn
|
209
|
+
|
210
|
+
def dispatch(self: Tensor, *x: Any, **kwargs: Any) -> Tensor:
|
211
|
+
try:
|
212
|
+
op_func = (Tensor.ops_gpu if self.gpu else Tensor.ops)[name]
|
213
|
+
return op_func.apply(op_func, self, *x, **kwargs)
|
214
|
+
except Exception as e:
|
215
|
+
print(f"Error in {name} operation: {e}")
|
216
|
+
if os.getenv("DEBUG") == "1":
|
217
|
+
print(f" Self: {self}")
|
218
|
+
for i, arg in enumerate(x): print(f" Arg {i}: {arg}")
|
219
|
+
print(f" Kwargs: {kwargs}")
|
220
|
+
raise
|
221
|
+
|
222
|
+
setattr(Tensor, name, dispatch)
|
223
|
+
|
224
|
+
if name in ['add', 'sub', 'mul', 'div']:
|
225
|
+
setattr(Tensor, "__%s__" % name, dispatch)
|
226
|
+
setattr(Tensor, "__i%s__" % name, lambda self, x: self.assign(dispatch(self, x)))
|
227
|
+
|
228
|
+
if (device := get_device()) is not None and device.name != "CPU":
|
229
|
+
if device.__class__.__name__ == "MetalDevice":
|
230
|
+
try: import froog.gpu.metal.ops_metal
|
231
|
+
except ImportError:
|
232
|
+
if os.getenv("DEBUG") == "1": print("Failed to import Metal operations")
|
233
|
+
elif device.__class__.__name__ == "OpenCLDevice":
|
234
|
+
try: import froog.gpu.cl.ops_cl
|
235
|
+
except ImportError:
|
236
|
+
if os.getenv("DEBUG") == "1": print("Failed to import OpenCL operations")
|