PyPI - froog - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

froog 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

froog/__init__.py +34 -1
froog/{gradcheck.py → gradient.py} +4 -11
froog/ops.py +354 -87
froog/optim.py +104 -32
froog/tensor.py +219 -219
froog/utils.py +8 -7
froog-0.5.0.dist-info/METADATA +205 -0
froog-0.5.0.dist-info/RECORD +10 -0
{froog-0.4.0.dist-info → froog-0.5.0.dist-info}/WHEEL +1 -1
froog/nn.py +0 -60
froog/ops_gpu.py +0 -598
froog-0.4.0.dist-info/LICENSE +0 -1
froog-0.4.0.dist-info/METADATA +0 -293
froog-0.4.0.dist-info/RECORD +0 -13
{froog-0.4.0.dist-info → froog-0.5.0.dist-info}/top_level.txt +0 -0

froog/optim.py CHANGED Viewed

@@ -7,67 +7,139 @@
 # |___|    |___|  |_||_______||_______||_______|
 import numpy as np
-from froog.tensor import Tensor, GPU
+from typing import List
+from froog.tensor import Tensor
 class Optimizer:
-  def __init__(self, params):
+  def __init__(self, params: List[Tensor]) -> None:
     self.params = params
 class SGD(Optimizer):
   """
   Stochastic Gradient Descent
   """
-  def __init__(self, params, lr=0.001):
+  def __init__(self, params: List[Tensor], lr: float = 0.001, weight_decay: float = 0, clip_value: float = 0) -> None:
     super(SGD, self).__init__(params)
-    self.lr = Tensor([lr], gpu=params[0].gpu)
+    self.lr = Tensor([lr], gpu=params[0].gpu if params else False)
+    self.weight_decay = weight_decay
+    self.clip_value = clip_value
-  def step(self):
+  def step(self) -> None:
     for t in self.params:
-      t -= t.grad * self.lr
+      if t.grad is None:
+        continue
+      if t.gpu:
+        from froog.gpu import get_device, download_tensor, upload_tensor
+        # device = get_device()
+        t_cpu = download_tensor(t)
+        grad_cpu = download_tensor(t.grad)
+        lr_cpu = download_tensor(self.lr)
+        if self.weight_decay > 0:
+          grad_cpu += self.weight_decay * t_cpu
+        if self.clip_value > 0:
+          grad_cpu = np.clip(grad_cpu, -self.clip_value, self.clip_value)
+        t_cpu -= grad_cpu * lr_cpu
+        t.data = upload_tensor(t_cpu)
+      else:
+        if self.weight_decay > 0:
+          t.grad.data += self.weight_decay * t.data
+        if self.clip_value > 0:
+          t.grad.data = np.clip(t.grad.data, -self.clip_value, self.clip_value)
+        t -= t.grad * self.lr
 class Adam(Optimizer):
   """
-  Default ADAM opimizer from https://arxiv.org/pdf/1412.6980.pdf algorithm
+  Default ADAM optimizer from https://arxiv.org/pdf/1412.6980.pdf algorithm
   """
-  def __init__(self, params, lr=0.001, b1=0.9, b2=0.999, eps=10e-8):
+  def __init__(self, params: List[Tensor], lr: float = 0.001, b1: float = 0.9, b2: float = 0.999, eps: float = 1e-8, max_grad: float = 10.0) -> None:
     super(Adam, self).__init__(params)
     self.lr = lr
     self.b1 = b1
     self.b2 = b2
-    self.eps = eps # should be 1e-8?
+    self.eps = eps
     self.t = 0
+    self.max_grad = max_grad
+    self.on_gpu = any(t.gpu for t in self.params if t is not None)
+    if self.on_gpu:
+      from froog.gpu import download_tensor
+      self.m = [np.zeros_like(download_tensor(t.data)) for t in self.params]
+      self.v = [np.zeros_like(download_tensor(t.data)) for t in self.params]
+    else:
+      self.m = [np.zeros_like(t.data) for t in self.params]
+      self.v = [np.zeros_like(t.data) for t in self.params]
-    self.m = [np.zeros_like(t.data) for t in self.params]
-    self.v = [np.zeros_like(t.data) for t in self.params]
-  def step(self):
+  def step(self) -> None:
+    from froog.gpu import download_tensor, upload_tensor
     self.t += 1
-    a = self.lr * (
-      np.sqrt(1 - np.power(self.b2, self.t)) /
-      (1 - np.power(self.b1, self.t)))
-    for i,t in enumerate(self.params):
-      self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * t.grad.data
-      self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(t.grad.data)
-      t.data -= a * self.m[i] / (np.sqrt(self.v[i]) + self.eps)
+    a = self.lr * (np.sqrt(1 - np.power(self.b2, self.t)) / (1 - np.power(self.b1, self.t)))
+    for i, t in enumerate(self.params):
+      if t.grad is None:
+        continue
+      if t.gpu:
+        try:
+          t_data_cpu = download_tensor(t.data)
+          grad_cpu = download_tensor(t.grad.data)
+          if np.isnan(grad_cpu).any() or np.isinf(grad_cpu).any():
+            print(f"Warning: NaN or Inf detected in gradients for parameter {i}")
+            grad_cpu = np.nan_to_num(grad_cpu, nan=0.0, posinf=self.max_grad, neginf=-self.max_grad)
+          if self.max_grad > 0:
+            grad_cpu = np.clip(grad_cpu, -self.max_grad, self.max_grad)
+          self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * grad_cpu
+          self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(grad_cpu)
+          denom = np.sqrt(self.v[i]) + self.eps
+          update = a * self.m[i] / denom
+          if np.isnan(update).any() or np.isinf(update).any():
+            print(f"Warning: NaN or Inf detected in update for parameter {i}")
+            max_update = np.finfo(np.float32).max / 100
+            update = np.nan_to_num(update, nan=0.0, posinf=max_update, neginf=-max_update)
+          t_data_cpu -= update
+          if np.isnan(t_data_cpu).any() or np.isinf(t_data_cpu).any():
+            print(f"Warning: NaN or Inf detected in parameter {i} after update")
+            max_val = np.finfo(np.float32).max / 10
+            t_data_cpu = np.nan_to_num(t_data_cpu, nan=0.0, posinf=max_val, neginf=-max_val)
+          t.data = upload_tensor(t_data_cpu)
+        except Exception as e:
+          print(f"Error in Adam update for GPU tensor {i}: {e}")
+          continue
+      else:
+        if self.max_grad > 0:
+          np.clip(t.grad.data, -self.max_grad, self.max_grad, out=t.grad.data)
+        self.m[i] = self.b1 * self.m[i] + (1 - self.b1) * t.grad.data
+        self.v[i] = self.b2 * self.v[i] + (1 - self.b2) * np.square(t.grad.data)
+        t.data -= a * self.m[i] / (np.sqrt(self.v[i]) + self.eps)
 class RMSprop(Optimizer):
   """
-  This version has epsilon
-  https://optimization.cbe.cornell.edu/index.php?title=RMSProp
-  RMSprop divides the learning rate by an exponentially decaying average of squared gradients.
-  Notes:
-  The reason RPROP doesn’t work is that it violates the central idea behind stochastic gradient descent,
-  which is when we have small enough learning rate, it averages the gradients over successive mini-batches.
+  RMSprop optimizer with epsilon for numerical stability.
   """
-  def __init__(self, params, decay=0.9, lr=0.001, eps=1e-8):
+  def __init__(self, params: List[Tensor], decay: float = 0.9, lr: float = 0.001, eps: float = 1e-8) -> None:
     super(RMSprop, self).__init__(params)
     self.lr = lr
     self.decay = decay
     self.eps = eps
-    self.v = [np.zeros_like(t.data) for t in self.params]
+    self.v: List[np.ndarray] = [np.zeros_like(t.data) for t in self.params]
-  def step(self):
-    for i,t in enumerate(self.params):
-      self.v[i] = self.decay * self.v[i] + (1-self.decay) * np.square(t.grad.data)
+  def step(self) -> None:
+    for i, t in enumerate(self.params):
+      self.v[i] = self.decay * self.v[i] + (1 - self.decay) * np.square(t.grad.data)
       t.data -= self.lr / (np.sqrt(self.v[i]) + self.eps) * t.grad.data

froog/tensor.py CHANGED Viewed

@@ -5,232 +5,232 @@
 # |    ___||    __  ||  |_|  ||  |_|  ||   ||  |
 # |   |    |   |  | ||       ||       ||   |_| |
 # |___|    |___|  |_||_______||_______||_______|
-#
-# inspired by pytorch
-# inspired by tinygrad
-# inspired by https://github.com/karpathy/micrograd/blob/master/micrograd/engine.py
 import os
 import numpy as np
 from inspect import signature
+from typing import Tuple, List, Union, Optional, Any, TypeVar, cast
+from froog.gpu import get_device, upload_tensor, download_tensor, is_buffer
-try:
-  import pyopencl as cl
-  GPU = True
-except ImportError:
-  # no GPU support
-  GPU = False
-cl_ctx, cl_queue = None, None
-def init_gpu():
-  """
-  creates global OpenCL context and queue
-  """
-  global cl_ctx, cl_queue
-  if cl_queue is None:
-    try:
-      # if you have an m2 mac
-      cl_ctx = cl.create_some_context(answers=[0])
-    except (cl._cl.RuntimeError, TypeError):
-      cl_ctx = cl.create_some_context(interactive=False)
-    cl_queue = cl.CommandQueue(cl_ctx)
-# ************ Main Classes ************
-# ********** Tensor, Function **********
-#   _____________   _______ ____  ____
-#  /_  __/ ____/ | / / ___// __ \/ __ \
-#   / / / __/ /  |/ /\__ \/ / / / /_/ /
-#  / / / /___/ /|  /___/ / /_/ / _, _/
-# /_/ /_____/_/ |_//____/\____/_/ |_|
+T = TypeVar('T', bound='Tensor')
 class Tensor:
-  did_float_warning = False
-  def __init__(self, data, gpu=False):
-    if isinstance(data, list):
-      data = np.array(data, dtype=np.float32)
-    elif GPU and isinstance(data, cl._cl.Buffer):
-      self.gpu = True
-    elif not isinstance(data, np.ndarray):
-      raise TypeError(f"Error constructing tensor with {data}")
+    did_float_warning = False
+    ops = {}
+    ops_gpu = {}
+    def __init__(self, data: Union[List, np.ndarray, Any], gpu: bool = False):
+        if isinstance(data, list): data = np.array(data, dtype=np.float32)
+        elif is_buffer(data): self.gpu = True
+        elif not isinstance(data, np.ndarray): raise TypeError(f"Error constructing tensor with {data}")
+        if isinstance(data, np.ndarray):
+            if data.dtype != np.float32 and not Tensor.did_float_warning:
+                if os.getenv("WARNING") == "1": print(f"warning, {data.shape} isn't float32. float64 needed for numerical jacobian")
+                if not os.getenv("DEBUG") == "1": Tensor.did_float_warning = True
+            self.gpu = False
+        self.data = data
+        self.grad: Optional[Tensor] = None
+        self._ctx = None
+        if gpu: self.gpu_()
+    def __repr__(self) -> str: return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
+    def assign(self, x: T) -> None: self.data = x.data
+    @property
+    def shape(self) -> Tuple[int, ...]:
+        if self.gpu:
+            device = get_device()
+            if device is not None and hasattr(device, 'buffer_metadata'):
+                buffer_id = id(self.data)
+                if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['shape']
+            try:
+                data = download_tensor(self)
+                return data.shape
+            except Exception as e:
+                print(f"Warning: Failed to get shape from GPU tensor: {e}")
+                return (1,)
+        return self.data.shape
+    @property
+    def size(self, dim=None) -> Union[int, Tuple[int, ...]]:
+        if dim is not None: return self.shape[dim]
+        return int(np.prod(self.shape))
+    @property
+    def ndim(self) -> int: return len(self.shape)
+    @property
+    def transpose(self) -> T:
+        if isinstance(self.data, np.ndarray): return Tensor(self.data.T, gpu=self.gpu)
+        else:
+            cpu_tensor = self.to_cpu()
+            return Tensor(cpu_tensor.data.T, gpu=self.gpu)
+    @property
+    def dtype(self) -> np.dtype:
+        if self.gpu:
+            device = get_device()
+            if device is not None and hasattr(device, 'buffer_metadata'):
+                buffer_id = id(self.data)
+                if buffer_id in device.buffer_metadata: return device.buffer_metadata[buffer_id]['dtype']
+            return np.float32
+        return self.data.dtype
+    @property
+    def is_gpu(self) -> bool: return self.gpu
+    @staticmethod
+    def zeros(*shape: int) -> T: return Tensor(np.zeros(shape, dtype=np.float32))
+    @staticmethod
+    def ones(*shape: int) -> T: return Tensor(np.ones(shape, dtype=np.float32))
+    @staticmethod
+    def randn(*shape: int) -> T: return Tensor(np.random.randn(*shape).astype(np.float32))
+    @staticmethod
+    def eye(dim: int) -> T: return Tensor(np.eye(dim).astype(np.float32))
+    @staticmethod
+    def arange(start: Union[int, float], stop: Optional[Union[int, float]] = None, step: Union[int, float] = 1) -> T:
+        if stop is None:
+            stop = start
+            start = 0
+        return Tensor(np.arange(start, stop, step, dtype=np.float32))
+    def flatten(self) -> T: return Tensor(self.data.reshape(-1), gpu=self.gpu)
+    def detach(self) -> T: return Tensor(self.data.copy(), gpu=self.gpu)
+    def view(self, *shape: int) -> T: return Tensor(self.data.reshape(shape), gpu=self.gpu)
+    def to_float(self) -> T: return Tensor(self.data.astype(np.float32), gpu=self.gpu)
+    def to_int(self) -> T: return Tensor(self.data.astype(np.int32), gpu=self.gpu)
+    def to_bool(self) -> T: return Tensor(self.data.astype(bool), gpu=self.gpu)
+    def unsqueeze(self, dim: int) -> T:
+        shape = list(self.shape)
+        if dim < 0: dim = len(shape) + 1 + dim
+        shape.insert(dim, 1)
+        return Tensor(self.data.reshape(shape), gpu=self.gpu)
+    def squeeze(self, dim: Optional[int] = None) -> T:
+        if dim is None: return Tensor(self.data.squeeze(), gpu=self.gpu)
+        shape = list(self.shape)
+        if dim < 0: dim = len(shape) + dim
+        if 0 <= dim < len(shape) and shape[dim] == 1: shape.pop(dim)
+        return Tensor(self.data.reshape(shape), gpu=self.gpu)
+    def backward(self, allow_fill: bool = True) -> None:
+        if self._ctx is None: return
+        if self.grad is None and allow_fill:
+            assert self.shape == (1,)
+            self.grad = Tensor(np.ones(self.shape, dtype=self.dtype), gpu=self.gpu)
+        assert self.grad is not None
+        grads = self._ctx.backward(self._ctx, self.grad.data)
+        if len(self._ctx.parents) == 1: grads = [grads]
+        for t, g in zip(self._ctx.parents, grads):
+            if g is None: continue
+            t_shape = t.shape
+            if is_buffer(g):
+                device = get_device()
+                if device is not None and hasattr(device, 'buffer_metadata'):
+                    buffer_id = id(g)
+                    if buffer_id in device.buffer_metadata: g_shape = device.buffer_metadata[buffer_id]['shape']
+                    else:
+                        try:
+                            g_cpu = download_tensor(g)
+                            g_shape = g_cpu.shape
+                        except:
+                            print(f"Warning: Could not determine shape of gradient in {self._ctx}")
+                            g_shape = t_shape
+            else: g_shape = g.shape
+            if g_shape != t_shape:
+                print(f"grad shape must match tensor shape in {self._ctx}, {g_shape} != {t_shape}")
+                assert False
+            t.grad = Tensor(g)
+            t.backward(allow_fill=False)
+    def mean(self) -> T:
+        div = Tensor(np.array([1 / self.size], dtype=np.float32), gpu=self.gpu)
+        return self.sum().mul(div)
+    def sqrt(self) -> T:
+        root = Tensor(np.zeros(self.shape, dtype=np.float32) + 0.5, gpu=self.gpu)
+        return self.pow(root)
+    def div(self, y: T) -> T:
+        root = Tensor(np.zeros(self.shape, dtype=np.float32) - 1, gpu=self.gpu)
+        return self.mul(y.pow(root))
-    if isinstance(data, np.ndarray):
-      if data.dtype != np.float32 and not Tensor.did_float_warning:
-        # TODO: set env flag to print all warnings, float64 needed for numerical jacobian
-        print(f"warning, {data.shape} isn't float32")
-        if not os.getenv("DEBUG") == "1":
-          Tensor.did_float_warning = True
-      self.gpu = False
-    self.data = data
-    self.grad = None
-    if gpu:
-      self.gpu_()
-    # internal variables used for autograd graph construction
-    self._ctx = None # these are where the backward gradient computation are saved
-  def __repr__(self):
-    return f"Tensor data: {self.data}, gradients: {self.grad.data if self.grad else None}"
-  def assign(self, x):
-    self.data = x.data
-  @property
-  def shape(self):
-    return self.data.shape
-  @staticmethod
-  def zeros(*shape):
-    return Tensor(np.zeros(shape, dtype=np.float32))
-  @staticmethod
-  def ones(*shape):
-    return Tensor(np.ones(shape, dtype=np.float32))
-  @staticmethod
-  def randn(*shape):
-    return Tensor(np.random.randn(*shape).astype(np.float32))
-  @staticmethod
-  def eye(dim):
-    return Tensor(np.eye(dim).astype(np.float32))
-  def backward(self, allow_fill=True):
-    if self._ctx is None:
-      return
-    if self.grad is None and allow_fill:
-      # allow_fill gives backprop a starting point, fills in the first grad with one is its None
-      assert self.data.shape == (1,) # numpy returns tuples as shapes
-      self.grad = Tensor(np.ones(self.data.shape, dtype=self.data.dtype), gpu=self.gpu)
-    assert self.grad is not None
-    # THIS IS WHERE AUTO GRAD IS DONE
-    grads = self._ctx.backward(self._ctx, self.grad.data) # get gradients respective to what op happened
-    if len(self._ctx.parents) == 1:
-      grads = [grads]
-    for t, g in zip(self._ctx.parents, grads):
-      if g is None:
-        continue
-      if g.shape != t.data.shape:
-        print(f"grad shape must match tensor shape in {self._ctx}, {g.shape} != {t.data.shape}")
-        assert False
-      t.grad = Tensor(g) # access actual gradients using grad.data
-      t.backward(allow_fill=False)
-  # ****** cpu/gpu ******
+    def to_cpu(self) -> T:
+        if not self.gpu: return cast(T, self)
+        data = download_tensor(self)
+        ret = Tensor(data)
+        if self.grad: ret.grad = self.grad.to_cpu()
+        return ret
-  def to_cpu(self):
-    if self.gpu:
-      data = np.empty(self.shape, dtype=np.float32)
-      cl.enqueue_copy(cl_queue, data, self.data) # copy data from cpu to gpu (queue, dest, src)
-      ret = Tensor(data)
-      if self.grad:
-        ret.grad = self.grad.to_cpu()
-      return ret
-    else:
-      return self
-  def gpu_(self):
-    self.data = self.to_gpu().data
-    self.gpu = True
-  def to_gpu(self):
-    if not GPU:
-      raise Exception("no gpu support! install pyopencl")
-    if not self.gpu:
-      init_gpu()
-      assert self.data.dtype == np.float32 # GPU only allows float32
-      # hostbuf is the data buffer on host machine with the data to be copied to the OpenCL buffer
-      data = cl.Buffer(cl_ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data.ravel()) # from pyopencl docs
-      data.shape = self.shape
-      data.dtype = self.data.dtype
-      ret = Tensor(data)
-      if self.grad:
-        ret.grad = self.grad.to_gpu()
-      return ret
-    else:
-      return self
-  ops = {}     # stores operations that are done on the CPU
-  ops_gpu = {} # stores operations that are done on the GPU
-  # ****** basic tensor math ops ******
-  def mean(self):
-    div = Tensor(np.array([1 / np.prod(self.shape)], dtype=self.data.dtype), gpu=self.gpu)
-    return self.sum().mul(div)
-  def sqrt(self):
-    root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)+0.5, gpu=self.gpu)
-    return self.pow(root)
-  def div(self, y):
-    root = Tensor(np.zeros(self.shape, dtype=self.data.dtype)-1, gpu=self.gpu)
-    return self.mul(y.pow(root))
-#     ________  ___   ______________________  _   __
-#    / ____/ / / / | / / ____/_  __/  _/ __ \/ | / /
-#   / /_  / / / /  |/ / /     / /  / // / / /  |/ /
-#  / __/ / /_/ / /|  / /___  / / _/ // /_/ / /|  /
-# /_/    \____/_/ |_/\____/ /_/ /___/\____/_/ |_/
+    def gpu_(self) -> None:
+        if not self.gpu and (device := get_device()) is not None and device.name != "CPU":
+            self.data = upload_tensor(self.data)
+            self.gpu = True
+            if self.grad: self.grad.gpu_()
+    def to_gpu(self) -> T:
+        if (device := get_device()) is None or device.name == "CPU": raise Exception("no gpu support! install pyopencl or use a Metal-compatible device")
+        if self.gpu: return cast(T, self)
+        gpu_data = upload_tensor(self.data)
+        ret = Tensor(gpu_data)
+        ret.gpu = True
+        if self.grad: ret.grad = self.grad.to_gpu()
+        return ret
 class Function:
-  """
-  An instantiation of the Function class includes the context
-  """
-  def __init__(self, *tensors):
-    self.parents = tensors
-    self.saved_tensors = []
-  def save_for_backward(self, *x):
-    self.saved_tensors.extend(x)
-  def apply(self, *x, **kwargs):
-    """
-    self  : is the tensor with data
-    *x    : the input to the method
-    """
-    op = self # self is the operation class
-    ctx = op(*x)
-    params = signature(op.forward).parameters # gets the function params e.g. (ctx, x, y)
-    for p in params.values():                 # loops through each param
-      if p.default is not p.empty:            # p.default is the param value
-        setattr(ctx, p.name, p.default)       # add any func params to ctx
-    for k, v in kwargs.items():
-      setattr(ctx, k, v)                      # add any kwargs to ctx
-    # this performs the actual operation (e.g., addition, multiplication, etc.) on the tensor data
-    ret = Tensor(op.forward(ctx, *[t.data for t in x], **kwargs))
-    ret._ctx = ctx
-    return ret
-def register(name, fxn, gpu=False):
-  """
-  mechanism that allows you to chain methods in an intuitive and Pythonic way
-  e.g. x.dot(w).relu(), where w is a tensor
-  partialmethod is used to create a new method that has some of the arguments to
-  another method already filled in the apply method of that instance is added
-  """
-  if gpu:
-    Tensor.ops_gpu[name] = fxn
-  else:
-    Tensor.ops[name] = fxn
-  def dispatch(self, *x, **kwargs):
-    op_func = (Tensor.ops_gpu if self.gpu else Tensor.ops)[name]
-    op_func.cl_ctx, op_func.cl_queue = cl_ctx, cl_queue
-    return op_func.apply(op_func, self, *x, **kwargs)
-  setattr(Tensor, name, dispatch)
-  if name in ['add', 'sub', 'mul', 'div']:
-    setattr(Tensor, "__%s__" % name, dispatch)
-    setattr(Tensor, "__i%s__" % name, lambda self,x: self.assign(dispatch(self,x)))
-import froog.ops # this registers all the operations
-if GPU:
-  import froog.ops_gpu
+    def __init__(self, *tensors: Tensor) -> None:
+        self.parents = tensors
+        self.saved_tensors: List[Any] = []
+    def save_for_backward(self, *x: Any) -> None:
+        self.saved_tensors.extend(x)
+    def apply(self, *x: Any, **kwargs: Any) -> Tensor:
+        op = self
+        ctx = op(*x)
+        params = signature(op.forward).parameters
+        for p in params.values():
+            if p.default is not p.empty: setattr(ctx, p.name, p.default)
+        for k, v in kwargs.items(): setattr(ctx, k, v)
+        ret = Tensor(op.forward(ctx, *[t.data for t in x], **kwargs))
+        ret._ctx = ctx
+        return ret
+def register(name: str, fxn: Any, gpu: bool = False) -> None:
+    if gpu:
+        setattr(Tensor, name, lambda self, *x, **kwargs: fxn.apply(fxn, self, *x, **kwargs))
+        Tensor.ops_gpu[name] = fxn
+    else: Tensor.ops[name] = fxn
+    def dispatch(self: Tensor, *x: Any, **kwargs: Any) -> Tensor:
+        try:
+            op_func = (Tensor.ops_gpu if self.gpu else Tensor.ops)[name]
+            return op_func.apply(op_func, self, *x, **kwargs)
+        except Exception as e:
+            print(f"Error in {name} operation: {e}")
+            if os.getenv("DEBUG") == "1":
+                print(f"  Self: {self}")
+                for i, arg in enumerate(x): print(f"  Arg {i}: {arg}")
+                print(f"  Kwargs: {kwargs}")
+            raise
+    setattr(Tensor, name, dispatch)
+    if name in ['add', 'sub', 'mul', 'div']:
+        setattr(Tensor, "__%s__" % name, dispatch)
+        setattr(Tensor, "__i%s__" % name, lambda self, x: self.assign(dispatch(self, x)))
+if (device := get_device()) is not None and device.name != "CPU":
+    if device.__class__.__name__ == "MetalDevice":
+        try: import froog.gpu.metal.ops_metal
+        except ImportError:
+            if os.getenv("DEBUG") == "1": print("Failed to import Metal operations")
+    elif device.__class__.__name__ == "OpenCLDevice":
+        try: import froog.gpu.cl.ops_cl
+        except ImportError:
+            if os.getenv("DEBUG") == "1": print("Failed to import OpenCL operations")

froog 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

froog 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl