froog 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
froog/optim.py CHANGED
@@ -7,67 +7,139 @@
7
7
  # |___| |___| |_||_______||_______||_______|
8
8
 
9
9
  import numpy as np
10
- from froog.tensor import Tensor, GPU
10
+ from typing import List
11
+ from froog.tensor import Tensor
11
12
 
12
13
  class Optimizer:
13
- def __init__(self, params):
14
+ def __init__(self, params: List[Tensor]) -> None:
14
15
  self.params = params
15
16
 
16
17
  class SGD(Optimizer):
17
18
  """
18
19
  Stochastic Gradient Descent
19
20
  """
20
- def __init__(self, params, lr=0.001):
21
+ def __init__(self, params: List[Tensor], lr: float = 0.001, weight_decay: float = 0, clip_value: float = 0) -> None:
21
22
  super(SGD, self).__init__(params)
22
- self.lr = Tensor([lr], gpu=params[0].gpu)
23
+ self.lr = Tensor([lr], gpu=params[0].gpu if params else False)
24
+ self.weight_decay = weight_decay
25
+ self.clip_value = clip_value
23
26
 
24
- def step(self):
27
+ def step(self) -> None:
25
28
  for t in self.params:
26
- t -= t.grad * self.lr
29
+ if t.grad is None:
30
+ continue
31
+
32
+ if t.gpu:
33
+ from froog.gpu import get_device, download_tensor, upload_tensor
34
+
35
+ # device = get_device()
36
+ t_cpu = download_tensor(t)
37
+ grad_cpu = download_tensor(t.grad)
38
+ lr_cpu = download_tensor(self.lr)
39
+
40
+ if self.weight_decay > 0:
41
+ grad_cpu += self.weight_decay * t_cpu
42
+
43
+ if self.clip_value > 0:
44
+ grad_cpu = np.clip(grad_cpu, -self.clip_value, self.clip_value)
45
+
46
+ t_cpu -= grad_cpu * lr_cpu
47
+ t.data = upload_tensor(t_cpu)
48
+ else:
49
+ if self.weight_decay > 0:
50
+ t.grad.data += self.weight_decay * t.data
51
+
52
+ if self.clip_value > 0:
53
+ t.grad.data = np.clip(t.grad.data, -self.clip_value, self.clip_value)
54
+
55
+ t -= t.grad * self.lr
27
56
 
28
57
  class Adam(Optimizer):
29
58
  """
30
- Default ADAM opimizer from https://arxiv.org/pdf/1412.6980.pdf algorithm
59
+ Default ADAM optimizer from https://arxiv.org/pdf/1412.6980.pdf algorithm
31
60
  """
32
- def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=10e-8):
61
+ def __init__(self, params: List[Tensor], lr: float = 0.001, b1: float = 0.9, b2: float = 0.999, eps: float = 1e-8, max_grad: float = 10.0) -> None:
33
62
  super(Adam, self).__init__(params)
34
63
  self.lr = lr
35
64
  self.b1 = b1
36
65
  self.b2 = b2
37
- self.eps = eps # should be 1e-8?
66
+ self.eps = eps
38
67
  self.t = 0
68
+ self.max_grad = max_grad
69
+ self.on_gpu = any(t.gpu for t in self.params if t is not None)
70
+
71
+ if self.on_gpu:
72
+ from froog.gpu import download_tensor
73
+ self.m = [np.zeros_like(download_tensor(t.data)) for t in self.params]
74
+ self.v = [np.zeros_like(download_tensor(t.data)) for t in self.params]
75
+ else:
76
+ self.m = [np.zeros_like(t.data) for t in self.params]
77
+ self.v = [np.zeros_like(t.data) for t in self.params]
39
78
 
40
- self.m = [np.zeros_like(t.data) for t in self.params]
41
- self.v = [np.zeros_like(t.data) for t in self.params]
42
-
43
- def step(self):
79
+ def step(self) -> None:
80
+ from froog.gpu import download_tensor, upload_tensor
81
+
44
82
  self.t += 1
45
- a = self.lr * (
46
- np.sqrt(1 - np.power(self.b2, self.t)) /
47
- (1 - np.power(self.b1, self.t)))
48
- for i,t in enumerate(self.params):
49
- self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * t.grad.data
50
- self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(t.grad.data)
51
- t.data -= a * self.m[i] / (np.sqrt(self.v[i]) + self.eps)
83
+ a = self.lr * (np.sqrt(1 - np.power(self.b2, self.t)) / (1 - np.power(self.b1, self.t)))
84
+
85
+ for i, t in enumerate(self.params):
86
+ if t.grad is None:
87
+ continue
88
+
89
+ if t.gpu:
90
+ try:
91
+ t_data_cpu = download_tensor(t.data)
92
+ grad_cpu = download_tensor(t.grad.data)
93
+
94
+ if np.isnan(grad_cpu).any() or np.isinf(grad_cpu).any():
95
+ print(f"Warning: NaN or Inf detected in gradients for parameter {i}")
96
+ grad_cpu = np.nan_to_num(grad_cpu, nan=0.0, posinf=self.max_grad, neginf=-self.max_grad)
97
+
98
+ if self.max_grad > 0:
99
+ grad_cpu = np.clip(grad_cpu, -self.max_grad, self.max_grad)
100
+
101
+ self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * grad_cpu
102
+ self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(grad_cpu)
103
+
104
+ denom = np.sqrt(self.v[i]) + self.eps
105
+ update = a * self.m[i] / denom
106
+
107
+ if np.isnan(update).any() or np.isinf(update).any():
108
+ print(f"Warning: NaN or Inf detected in update for parameter {i}")
109
+ max_update = np.finfo(np.float32).max / 100
110
+ update = np.nan_to_num(update, nan=0.0, posinf=max_update, neginf=-max_update)
111
+
112
+ t_data_cpu -= update
113
+
114
+ if np.isnan(t_data_cpu).any() or np.isinf(t_data_cpu).any():
115
+ print(f"Warning: NaN or Inf detected in parameter {i} after update")
116
+ max_val = np.finfo(np.float32).max / 10
117
+ t_data_cpu = np.nan_to_num(t_data_cpu, nan=0.0, posinf=max_val, neginf=-max_val)
118
+
119
+ t.data = upload_tensor(t_data_cpu)
120
+ except Exception as e:
121
+ print(f"Error in Adam update for GPU tensor {i}: {e}")
122
+ continue
123
+ else:
124
+ if self.max_grad > 0:
125
+ np.clip(t.grad.data, -self.max_grad, self.max_grad, out=t.grad.data)
126
+
127
+ self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * t.grad.data
128
+ self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(t.grad.data)
129
+ t.data -= a * self.m[i] / (np.sqrt(self.v[i]) + self.eps)
52
130
 
53
131
  class RMSprop(Optimizer):
54
132
  """
55
- This version has epsilon
56
- https://optimization.cbe.cornell.edu/index.php?title=RMSProp
57
- RMSprop divides the learning rate by an exponentially decaying average of squared gradients.
58
-
59
- Notes:
60
- The reason RPROP doesn’t work is that it violates the central idea behind stochastic gradient descent,
61
- which is when we have small enough learning rate, it averages the gradients over successive mini-batches.
133
+ RMSprop optimizer with epsilon for numerical stability.
62
134
  """
63
- def __init__(self, params, decay=0.9, lr=0.001, eps=1e-8):
135
+ def __init__(self, params: List[Tensor], decay: float = 0.9, lr: float = 0.001, eps: float = 1e-8) -> None:
64
136
  super(RMSprop, self).__init__(params)
65
137
  self.lr = lr
66
138
  self.decay = decay
67
139
  self.eps = eps
68
- self.v = [np.zeros_like(t.data) for t in self.params]
140
+ self.v: List[np.ndarray] = [np.zeros_like(t.data) for t in self.params]
69
141
 
70
- def step(self):
71
- for i,t in enumerate(self.params):
72
- self.v[i] = self.decay * self.v[i] + (1-self.decay) * np.square(t.grad.data)
142
+ def step(self) -> None:
143
+ for i, t in enumerate(self.params):
144
+ self.v[i] = self.decay * self.v[i] + (1 - self.decay) * np.square(t.grad.data)
73
145
  t.data -= self.lr / (np.sqrt(self.v[i]) + self.eps) * t.grad.data
froog/tensor.py CHANGED
@@ -5,232 +5,232 @@
5
5
  # | ___|| __ || |_| || |_| || || |
6
6
  # | | | | | || || || |_| |
7
7
  # |___| |___| |_||_______||_______||_______|
8
- #
9
- # inspired by pytorch
10
- # inspired by tinygrad
11
- # inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
12
8
 
13
9
  import os
14
10
  import numpy as np
15
11
  from inspect import signature
12
+ from typing import Tuple, List, Union, Optional, Any, TypeVar, cast
13
+ from froog.gpu import get_device, upload_tensor, download_tensor, is_buffer
16
14
 
17
- try:
18
- import pyopencl as cl
19
- GPU = True
20
- except ImportError:
21
- # no GPU support
22
- GPU = False
23
-
24
- cl_ctx, cl_queue = None, None
25
- def init_gpu():
26
- """
27
- creates global OpenCL context and queue
28
- """
29
- global cl_ctx, cl_queue
30
- if cl_queue is None:
31
- try:
32
- # if you have an m2 mac
33
- cl_ctx = cl.create_some_context(answers=[0])
34
- except (cl._cl.RuntimeError, TypeError):
35
- cl_ctx = cl.create_some_context(interactive=False)
36
- cl_queue = cl.CommandQueue(cl_ctx)
37
-
38
- # ************ Main Classes ************
39
- # ********** Tensor, Function **********
40
- # _____________ _______ ____ ____
41
- # /_ __/ ____/ | / / ___// __ \/ __ \
42
- # / / / __/ / |/ /\__ \/ / / / /_/ /
43
- # / / / /___/ /| /___/ / /_/ / _, _/
44
- # /_/ /_____/_/ |_//____/\____/_/ |_|
15
+ T = TypeVar('T', bound='Tensor')
45
16
 
46
17
  class Tensor:
47
- did_float_warning = False
48
- def __init__(self, data, gpu=False):
49
- if isinstance(data, list):
50
- data = np.array(data, dtype=np.float32)
51
- elif GPU and isinstance(data, cl._cl.Buffer):
52
- self.gpu = True
53
- elif not isinstance(data, np.ndarray):
54
- raise TypeError(f"Error constructing tensor with {data}")
18
+ did_float_warning = False
19
+ ops = {}
20
+ ops_gpu = {}
21
+
22
+ def __init__(self, data: Union[List, np.ndarray, Any], gpu: bool = False):
23
+ if isinstance(data, list): data = np.array(data, dtype=np.float32)
24
+ elif is_buffer(data): self.gpu = True
25
+ elif not isinstance(data, np.ndarray): raise TypeError(f"Error constructing tensor with {data}")
26
+ if isinstance(data, np.ndarray):
27
+ if data.dtype != np.float32 and not Tensor.did_float_warning:
28
+ if os.getenv("WARNING") == "1": print(f"warning, {data.shape} isn't float32. float64 needed for numerical jacobian")
29
+ if not os.getenv("DEBUG") == "1": Tensor.did_float_warning = True
30
+ self.gpu = False
31
+ self.data = data
32
+ self.grad: Optional[Tensor] = None
33
+ self._ctx = None
34
+ if gpu: self.gpu_()
35
+
36
+ def __repr__(self) -> str: return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
37
+ def assign(self, x: T) -> None: self.data = x.data
38
+
39
+ @property
40
+ def shape(self) -> Tuple[int, ...]:
41
+ if self.gpu:
42
+ device = get_device()
43
+ if device is not None and hasattr(device, 'buffer_metadata'):
44
+ buffer_id = id(self.data)
45
+ if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['shape']
46
+ try:
47
+ data = download_tensor(self)
48
+ return data.shape
49
+ except Exception as e:
50
+ print(f"Warning: Failed to get shape from GPU tensor: {e}")
51
+ return (1,)
52
+ return self.data.shape
53
+
54
+ @property
55
+ def size(self, dim=None) -> Union[int, Tuple[int, ...]]:
56
+ if dim is not None: return self.shape[dim]
57
+ return int(np.prod(self.shape))
58
+
59
+ @property
60
+ def ndim(self) -> int: return len(self.shape)
61
+
62
+ @property
63
+ def transpose(self) -> T:
64
+ if isinstance(self.data, np.ndarray): return Tensor(self.data.T, gpu=self.gpu)
65
+ else:
66
+ cpu_tensor = self.to_cpu()
67
+ return Tensor(cpu_tensor.data.T, gpu=self.gpu)
68
+
69
+ @property
70
+ def dtype(self) -> np.dtype:
71
+ if self.gpu:
72
+ device = get_device()
73
+ if device is not None and hasattr(device, 'buffer_metadata'):
74
+ buffer_id = id(self.data)
75
+ if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['dtype']
76
+ return np.float32
77
+ return self.data.dtype
78
+
79
+ @property
80
+ def is_gpu(self) -> bool: return self.gpu
81
+
82
+ @staticmethod
83
+ def zeros(*shape: int) -> T: return Tensor(np.zeros(shape, dtype=np.float32))
84
+
85
+ @staticmethod
86
+ def ones(*shape: int) -> T: return Tensor(np.ones(shape, dtype=np.float32))
87
+
88
+ @staticmethod
89
+ def randn(*shape: int) -> T: return Tensor(np.random.randn(*shape).astype(np.float32))
90
+
91
+ @staticmethod
92
+ def eye(dim: int) -> T: return Tensor(np.eye(dim).astype(np.float32))
93
+
94
+ @staticmethod
95
+ def arange(start: Union[int, float], stop: Optional[Union[int, float]] = None, step: Union[int, float] = 1) -> T:
96
+ if stop is None:
97
+ stop = start
98
+ start = 0
99
+ return Tensor(np.arange(start, stop, step, dtype=np.float32))
100
+
101
+ def flatten(self) -> T: return Tensor(self.data.reshape(-1), gpu=self.gpu)
102
+ def detach(self) -> T: return Tensor(self.data.copy(), gpu=self.gpu)
103
+ def view(self, *shape: int) -> T: return Tensor(self.data.reshape(shape), gpu=self.gpu)
104
+ def to_float(self) -> T: return Tensor(self.data.astype(np.float32), gpu=self.gpu)
105
+ def to_int(self) -> T: return Tensor(self.data.astype(np.int32), gpu=self.gpu)
106
+ def to_bool(self) -> T: return Tensor(self.data.astype(bool), gpu=self.gpu)
107
+
108
+ def unsqueeze(self, dim: int) -> T:
109
+ shape = list(self.shape)
110
+ if dim < 0: dim = len(shape) + 1 + dim
111
+ shape.insert(dim, 1)
112
+ return Tensor(self.data.reshape(shape), gpu=self.gpu)
113
+
114
+ def squeeze(self, dim: Optional[int] = None) -> T:
115
+ if dim is None: return Tensor(self.data.squeeze(), gpu=self.gpu)
116
+ shape = list(self.shape)
117
+ if dim < 0: dim = len(shape) + dim
118
+ if 0 <= dim < len(shape) and shape[dim] == 1: shape.pop(dim)
119
+ return Tensor(self.data.reshape(shape), gpu=self.gpu)
120
+
121
+ def backward(self, allow_fill: bool = True) -> None:
122
+ if self._ctx is None: return
123
+ if self.grad is None and allow_fill:
124
+ assert self.shape == (1,)
125
+ self.grad = Tensor(np.ones(self.shape, dtype=self.dtype), gpu=self.gpu)
126
+ assert self.grad is not None
127
+ grads = self._ctx.backward(self._ctx, self.grad.data)
128
+ if len(self._ctx.parents) == 1: grads = [grads]
129
+ for t, g in zip(self._ctx.parents, grads):
130
+ if g is None: continue
131
+ t_shape = t.shape
132
+ if is_buffer(g):
133
+ device = get_device()
134
+ if device is not None and hasattr(device, 'buffer_metadata'):
135
+ buffer_id = id(g)
136
+ if buffer_id in device.buffer_metadata: g_shape = device.buffer_metadata[buffer_id]['shape']
137
+ else:
138
+ try:
139
+ g_cpu = download_tensor(g)
140
+ g_shape = g_cpu.shape
141
+ except:
142
+ print(f"Warning: Could not determine shape of gradient in {self._ctx}")
143
+ g_shape = t_shape
144
+ else: g_shape = g.shape
145
+ if g_shape != t_shape:
146
+ print(f"grad shape must match tensor shape in {self._ctx}, {g_shape} != {t_shape}")
147
+ assert False
148
+ t.grad = Tensor(g)
149
+ t.backward(allow_fill=False)
150
+
151
+ def mean(self) -> T:
152
+ div = Tensor(np.array([1 / self.size], dtype=np.float32), gpu=self.gpu)
153
+ return self.sum().mul(div)
154
+
155
+ def sqrt(self) -> T:
156
+ root = Tensor(np.zeros(self.shape, dtype=np.float32) + 0.5, gpu=self.gpu)
157
+ return self.pow(root)
158
+
159
+ def div(self, y: T) -> T:
160
+ root = Tensor(np.zeros(self.shape, dtype=np.float32) - 1, gpu=self.gpu)
161
+ return self.mul(y.pow(root))
55
162
 
56
- if isinstance(data, np.ndarray):
57
- if data.dtype != np.float32 and not Tensor.did_float_warning:
58
- # TODO: set env flag to print all warnings, float64 needed for numerical jacobian
59
- print(f"warning, {data.shape} isn't float32")
60
- if not os.getenv("DEBUG") == "1":
61
- Tensor.did_float_warning = True
62
- self.gpu = False
63
-
64
- self.data = data
65
- self.grad = None
66
-
67
- if gpu:
68
- self.gpu_()
69
-
70
- # internal variables used for autograd graph construction
71
- self._ctx = None # these are where the backward gradient computation are saved
72
-
73
- def __repr__(self):
74
- return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
75
-
76
- def assign(self, x):
77
- self.data = x.data
78
-
79
- @property
80
- def shape(self):
81
- return self.data.shape
82
-
83
- @staticmethod
84
- def zeros(*shape):
85
- return Tensor(np.zeros(shape, dtype=np.float32))
86
-
87
- @staticmethod
88
- def ones(*shape):
89
- return Tensor(np.ones(shape, dtype=np.float32))
90
-
91
- @staticmethod
92
- def randn(*shape):
93
- return Tensor(np.random.randn(*shape).astype(np.float32))
94
-
95
- @staticmethod
96
- def eye(dim):
97
- return Tensor(np.eye(dim).astype(np.float32))
98
-
99
- def backward(self, allow_fill=True):
100
- if self._ctx is None:
101
- return
102
-
103
- if self.grad is None and allow_fill:
104
- # allow_fill gives backprop a starting point, fills in the first grad with one is its None
105
- assert self.data.shape == (1,) # numpy returns tuples as shapes
106
- self.grad = Tensor(np.ones(self.data.shape, dtype=self.data.dtype), gpu=self.gpu)
107
-
108
- assert self.grad is not None
109
-
110
- # THIS IS WHERE AUTO GRAD IS DONE
111
- grads = self._ctx.backward(self._ctx, self.grad.data) # get gradients respective to what op happened
112
- if len(self._ctx.parents) == 1:
113
- grads = [grads]
114
- for t, g in zip(self._ctx.parents, grads):
115
- if g is None:
116
- continue
117
- if g.shape != t.data.shape:
118
- print(f"grad shape must match tensor shape in {self._ctx}, {g.shape} != {t.data.shape}")
119
- assert False
120
- t.grad = Tensor(g) # access actual gradients using grad.data
121
- t.backward(allow_fill=False)
122
-
123
- # ****** cpu/gpu ******
163
+ def to_cpu(self) -> T:
164
+ if not self.gpu: return cast(T, self)
165
+ data = download_tensor(self)
166
+ ret = Tensor(data)
167
+ if self.grad: ret.grad = self.grad.to_cpu()
168
+ return ret
124
169
 
125
- def to_cpu(self):
126
- if self.gpu:
127
- data = np.empty(self.shape, dtype=np.float32)
128
- cl.enqueue_copy(cl_queue, data, self.data) # copy data from cpu to gpu (queue, dest, src)
129
- ret = Tensor(data)
130
- if self.grad:
131
- ret.grad = self.grad.to_cpu()
132
- return ret
133
- else:
134
- return self
135
-
136
- def gpu_(self):
137
- self.data = self.to_gpu().data
138
- self.gpu = True
139
-
140
- def to_gpu(self):
141
- if not GPU:
142
- raise Exception("no gpu support! install pyopencl")
143
- if not self.gpu:
144
- init_gpu()
145
- assert self.data.dtype == np.float32 # GPU only allows float32
146
- # hostbuf is the data buffer on host machine with the data to be copied to the OpenCL buffer
147
- data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel()) # from pyopencl docs
148
- data.shape = self.shape
149
- data.dtype = self.data.dtype
150
- ret = Tensor(data)
151
- if self.grad:
152
- ret.grad = self.grad.to_gpu()
153
- return ret
154
- else:
155
- return self
156
-
157
- ops = {} # stores operations that are done on the CPU
158
- ops_gpu = {} # stores operations that are done on the GPU
159
-
160
- # ****** basic tensor math ops ******
161
-
162
- def mean(self):
163
- div = Tensor(np.array([1 / np.prod(self.shape)], dtype=self.data.dtype), gpu=self.gpu)
164
- return self.sum().mul(div)
165
-
166
- def sqrt(self):
167
- root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)+0.5, gpu=self.gpu)
168
- return self.pow(root)
169
-
170
- def div(self, y):
171
- root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)-1, gpu=self.gpu)
172
- return self.mul(y.pow(root))
173
-
174
- # ________ ___ ______________________ _ __
175
- # / ____/ / / / | / / ____/_ __/ _/ __ \/ | / /
176
- # / /_ / / / / |/ / / / / / // / / / |/ /
177
- # / __/ / /_/ / /| / /___ / / _/ // /_/ / /| /
178
- # /_/ \____/_/ |_/\____/ /_/ /___/\____/_/ |_/
179
-
170
+ def gpu_(self) -> None:
171
+ if not self.gpu and (device := get_device()) is not None and device.name != "CPU":
172
+ self.data = upload_tensor(self.data)
173
+ self.gpu = True
174
+ if self.grad: self.grad.gpu_()
175
+
176
+ def to_gpu(self) -> T:
177
+ if (device := get_device()) is None or device.name == "CPU": raise Exception("no gpu support! install pyopencl or use a Metal-compatible device")
178
+ if self.gpu: return cast(T, self)
179
+ gpu_data = upload_tensor(self.data)
180
+ ret = Tensor(gpu_data)
181
+ ret.gpu = True
182
+ if self.grad: ret.grad = self.grad.to_gpu()
183
+ return ret
184
+
180
185
  class Function:
181
- """
182
- An instantiation of the Function class includes the context
183
- """
184
- def __init__(self, *tensors):
185
- self.parents = tensors
186
- self.saved_tensors = []
187
-
188
- def save_for_backward(self, *x):
189
- self.saved_tensors.extend(x)
190
-
191
- def apply(self, *x, **kwargs):
192
- """
193
- self : is the tensor with data
194
- *x : the input to the method
195
- """
196
- op = self # self is the operation class
197
- ctx = op(*x)
198
- params = signature(op.forward).parameters # gets the function params e.g. (ctx, x, y)
199
- for p in params.values(): # loops through each param
200
- if p.default is not p.empty: # p.default is the param value
201
- setattr(ctx, p.name, p.default) # add any func params to ctx
202
- for k, v in kwargs.items():
203
- setattr(ctx, k, v) # add any kwargs to ctx
204
-
205
- # this performs the actual operation (e.g., addition, multiplication, etc.) on the tensor data
206
- ret = Tensor(op.forward(ctx, *[t.data for t in x], **kwargs))
207
- ret._ctx = ctx
208
- return ret
209
-
210
- def register(name, fxn, gpu=False):
211
- """
212
- mechanism that allows you to chain methods in an intuitive and Pythonic way
213
- e.g. x.dot(w).relu(), where w is a tensor
214
-
215
- partialmethod is used to create a new method that has some of the arguments to
216
- another method already filled in the apply method of that instance is added
217
- """
218
- if gpu:
219
- Tensor.ops_gpu[name] = fxn
220
- else:
221
- Tensor.ops[name] = fxn
222
-
223
- def dispatch(self, *x, **kwargs):
224
- op_func = (Tensor.ops_gpu if self.gpu else Tensor.ops)[name]
225
- op_func.cl_ctx, op_func.cl_queue = cl_ctx, cl_queue
226
- return op_func.apply(op_func, self, *x, **kwargs)
227
-
228
- setattr(Tensor, name, dispatch)
229
-
230
- if name in ['add', 'sub', 'mul', 'div']:
231
- setattr(Tensor, "__%s__" % name, dispatch)
232
- setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
233
-
234
- import froog.ops # this registers all the operations
235
- if GPU:
236
- import froog.ops_gpu
186
+ def __init__(self, *tensors: Tensor) -> None:
187
+ self.parents = tensors
188
+ self.saved_tensors: List[Any] = []
189
+
190
+ def save_for_backward(self, *x: Any) -> None:
191
+ self.saved_tensors.extend(x)
192
+
193
+ def apply(self, *x: Any, **kwargs: Any) -> Tensor:
194
+ op = self
195
+ ctx = op(*x)
196
+ params = signature(op.forward).parameters
197
+ for p in params.values():
198
+ if p.default is not p.empty: setattr(ctx, p.name, p.default)
199
+ for k, v in kwargs.items(): setattr(ctx, k, v)
200
+ ret = Tensor(op.forward(ctx, *[t.data for t in x], **kwargs))
201
+ ret._ctx = ctx
202
+ return ret
203
+
204
+ def register(name: str, fxn: Any, gpu: bool = False) -> None:
205
+ if gpu:
206
+ setattr(Tensor, name, lambda self, *x, **kwargs: fxn.apply(fxn, self, *x, **kwargs))
207
+ Tensor.ops_gpu[name] = fxn
208
+ else: Tensor.ops[name] = fxn
209
+
210
+ def dispatch(self: Tensor, *x: Any, **kwargs: Any) -> Tensor:
211
+ try:
212
+ op_func = (Tensor.ops_gpu if self.gpu else Tensor.ops)[name]
213
+ return op_func.apply(op_func, self, *x, **kwargs)
214
+ except Exception as e:
215
+ print(f"Error in {name} operation: {e}")
216
+ if os.getenv("DEBUG") == "1":
217
+ print(f" Self: {self}")
218
+ for i, arg in enumerate(x): print(f" Arg {i}: {arg}")
219
+ print(f" Kwargs: {kwargs}")
220
+ raise
221
+
222
+ setattr(Tensor, name, dispatch)
223
+
224
+ if name in ['add', 'sub', 'mul', 'div']:
225
+ setattr(Tensor, "__%s__" % name, dispatch)
226
+ setattr(Tensor, "__i%s__" % name, lambda self, x: self.assign(dispatch(self, x)))
227
+
228
+ if (device := get_device()) is not None and device.name != "CPU":
229
+ if device.__class__.__name__ == "MetalDevice":
230
+ try: import froog.gpu.metal.ops_metal
231
+ except ImportError:
232
+ if os.getenv("DEBUG") == "1": print("Failed to import Metal operations")
233
+ elif device.__class__.__name__ == "OpenCLDevice":
234
+ try: import froog.gpu.cl.ops_cl
235
+ except ImportError:
236
+ if os.getenv("DEBUG") == "1": print("Failed to import OpenCL operations")