PyPI - lucid-dl - Versions diffs - 2.9.0__tar.gz → 2.10.0__tar.gz - Mend

lucid-dl 2.9.0tar.gz → 2.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: lucid-dl
-Version: 2.9.0
+Version: 2.10.0
 Summary: Lumerico's Comprehensive Interface for Deep Learning
 Home-page: https://github.com/ChanLumerico/lucid
 Author: ChanLumerico
@@ -33,7 +33,7 @@ Dynamic: summary
 ![PyPI - Total Downloads](https://img.shields.io/badge/total%20downloads-34.0k-yellow.svg)
 ![GitHub code size in bytes](https://img.shields.io/github/languages/code-size/ChanLumerico/lucid.svg)
 ![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)
-![Lines of Code](https://img.shields.io/badge/lines%20of%20code-26.9k-purple.svg)
+![Lines of Code](https://img.shields.io/badge/lines%20of%20code-27.7k-purple.svg)
 **Lucid** is a minimalist deep learning framework built entirely from scratch in Python. It offers a pedagogically rich environment to explore the foundations of modern deep learning systems, including autodiff, neural network modules, and GPU acceleration — all while staying lightweight, readable, and free of complex dependencies.
@@ -50,9 +50,11 @@ Whether you're a student, educator, or an advanced researcher seeking to demysti
 - Now supports [**`Safetensors`**](https://github.com/huggingface/safetensors) for Lucid neural module porting along with the legacy `.lcd` format
-- Added new neural module category `nn.rnn`, including:
-  `nn.RNNBase`, `nn.RNN`, `nn.LSTM`, `nn.GRU`, `nn.RNNCell`, `nn.LSTMCell`, `nn.GRUCell`
+- Introduced **Backward Fusion** for CPU execution:
+  - Automatically fuses selected operation patterns during backpropagation to reduce graph overhead
+  - Supports identity/unary fusion (e.g. `log∘exp`, double negation, and view-like ops such as reshape/squeeze)
+  - Uses heuristic thresholds to avoid fusion overhead on small tensors
+  - Disabled by default on GPU paths to ensure stable performance
 ## 🔧 How to Install

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/README.md RENAMED Viewed

@@ -5,7 +5,7 @@
 ![PyPI - Total Downloads](https://img.shields.io/badge/total%20downloads-34.0k-yellow.svg)
 ![GitHub code size in bytes](https://img.shields.io/github/languages/code-size/ChanLumerico/lucid.svg)
 ![Code Style](https://img.shields.io/badge/code%20style-black-000000.svg)
-![Lines of Code](https://img.shields.io/badge/lines%20of%20code-26.9k-purple.svg)
+![Lines of Code](https://img.shields.io/badge/lines%20of%20code-27.7k-purple.svg)
 **Lucid** is a minimalist deep learning framework built entirely from scratch in Python. It offers a pedagogically rich environment to explore the foundations of modern deep learning systems, including autodiff, neural network modules, and GPU acceleration — all while staying lightweight, readable, and free of complex dependencies.
@@ -22,9 +22,11 @@ Whether you're a student, educator, or an advanced researcher seeking to demysti
 - Now supports [**`Safetensors`**](https://github.com/huggingface/safetensors) for Lucid neural module porting along with the legacy `.lcd` format
-- Added new neural module category `nn.rnn`, including:
-  `nn.RNNBase`, `nn.RNN`, `nn.LSTM`, `nn.GRU`, `nn.RNNCell`, `nn.LSTMCell`, `nn.GRUCell`
+- Introduced **Backward Fusion** for CPU execution:
+  - Automatically fuses selected operation patterns during backpropagation to reduce graph overhead
+  - Supports identity/unary fusion (e.g. `log∘exp`, double negation, and view-like ops such as reshape/squeeze)
+  - Uses heuristic thresholds to avoid fusion overhead on small tensors
+  - Disabled by default on GPU paths to ensure stable performance
 ## 🔧 How to Install

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/lucid/__init__.py RENAMED Viewed

@@ -50,6 +50,8 @@ import lucid.einops as einops
 import lucid.nn as nn
 import lucid.types as types
+from lucid._fusion import ENABLE_FUSION
 _grad_enabled: bool = True
 _flops_enabled: bool = False

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/lucid/_backend/conv.py RENAMED Viewed

@@ -8,10 +8,10 @@ import numpy as np
 from lucid._tensor import Tensor
 from lucid._backend.core import (
-    operation,
+    Operation,
     binary_func_op,
     _FuncOpReturnType,
-    _GradFuncType,
+    _GradType,
 )
 from lucid._backend.metal import mx
@@ -451,7 +451,7 @@ def _conv_backward_input(
     return grad_input
-class conv_nd(operation):
+class conv_nd(Operation):
     def __init__(
         self,
         stride: int | tuple[int, ...] | list[int],
@@ -499,7 +499,7 @@ class conv_nd(operation):
         self.result = Tensor(out)
         return self.result, partial(self.__grad__, a=a, b=b, lib_=mx)
-    def __grad__(self, a: Tensor, b: Tensor, lib_: ModuleType) -> _GradFuncType:
+    def __grad__(self, a: Tensor, b: Tensor, lib_: ModuleType) -> _GradType:
         stride = self._stride
         padding = self._padding
         dilation = self._dilation
@@ -519,6 +519,25 @@ class conv_nd(operation):
         return grad_input, grad_weight
+    def __flops__(self, a: Tensor, b: Tensor) -> int:
+        stride = self._stride
+        padding = self._padding
+        dilation = self._dilation
+        if stride is None or padding is None or dilation is None:
+            stride, padding, dilation = self._normalize(b)
+        N = int(a.shape[0])
+        C_out = int(b.shape[0])
+        C_in_g = int(b.shape[1])
+        kernel_size = tuple(int(v) for v in b.shape[2:])
+        out_dims = _conv_out_dims(
+            tuple(int(v) for v in a.shape[2:]), kernel_size, stride, padding, dilation
+        )
+        macs_per_out = C_in_g * _prod(kernel_size)
+        out_elems = N * C_out * _prod(tuple(out_dims))
+        return out_elems * macs_per_out
 def conv_nd_op(
     stride: int | tuple[int, ...] | list[int],

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/lucid/_backend/core.py RENAMED Viewed

@@ -4,18 +4,24 @@ import functools
 import weakref
 import lucid
-import lucid.types as types
-from lucid.types import _DeviceType, _NumPyArray, _MLXArray, _BuiltinNumeric
+from lucid.types import (
+    Numeric,
+    _DeviceType,
+    _NumPyArray,
+    _MLXArray,
+    _BuiltinNumeric,
+    _TensorLike,
+)
-from lucid._tensor import Tensor
 from lucid._backend.metal import is_gpu_op
-_GradFuncType = Callable[[None], Tuple[_NumPyArray | _MLXArray, ...]]
+_GradType = _NumPyArray | _MLXArray | Tuple[_NumPyArray | _MLXArray, ...]
+_GradFuncType = Callable[[], _GradType]
-_ReturnGradFuncPair = Tuple[Tensor, _GradFuncType]
-_FuncOpReturnType = Tuple[_ReturnGradFuncPair, ...]
+_ReturnGradFuncPair = Tuple[_TensorLike, _GradFuncType]
+_FuncOpReturnType = _ReturnGradFuncPair | Tuple[_ReturnGradFuncPair, ...]
 def func_op(
@@ -24,13 +30,13 @@ def func_op(
     has_gradient: bool = True,
     device: _DeviceType = "cpu",
 ) -> Callable:
-    def decorator(func: Callable[..., _FuncOpReturnType]) -> Callable:
-        @functools.wraps(func)
-        def wrapper(op_self: operation, *args, **kwargs) -> Tuple[Tensor, ...]:
-            tensors: Tuple[Tensor, ...] = tuple()
+    def decorator(forward_func: Callable[..., _FuncOpReturnType]) -> Callable:
+        @functools.wraps(forward_func)
+        def wrapper(op_self: Operation, *args, **kwargs) -> Tuple[_TensorLike, ...]:
+            tensors: Tuple[_TensorLike, ...] = tuple()
             requires_grad = False
             is_free = True
-            dtype_hint: _BuiltinNumeric | types.Numeric | None = None
+            dtype_hint: _BuiltinNumeric | Numeric | None = None
             if n_in is None:
                 tensor_args = args
@@ -42,7 +48,7 @@ def func_op(
                 tensor_args = args[:n_in]
             for arg in tensor_args:
-                if isinstance(arg, Tensor):
+                if isinstance(arg, _TensorLike):
                     dtype_hint = arg.dtype
                     break
@@ -64,7 +70,7 @@ def func_op(
             non_tensor_args = args[n_in:] if n_in is not None else ()
             new_args = (*tensors, *non_tensor_args)
-            func_return_pairs = func(op_self, *new_args, **kwargs)
+            func_return_pairs = forward_func(op_self, *new_args, **kwargs)
             tensor_refs = tuple(weakref.ref(t) for t in tensors)
@@ -78,7 +84,7 @@ def func_op(
             if n_ret is None:
                 if not isinstance(func_return_pairs, tuple):
                     raise ValueError(
-                        f"{func.__name__} should return multiple '_ReturnGradFuncPair'."
+                        f"{forward_func.__name__} should return multiple '_ReturnGradFuncPair'."
                     )
                 num_returns = len(func_return_pairs)
             else:
@@ -87,45 +93,27 @@ def func_op(
             if num_returns == 1:
                 func_return_pairs: _FuncOpReturnType = (func_return_pairs,)
-            results: Tuple[Tensor, ...] = tuple()
-            for result, compute_grad in func_return_pairs:
+            results: Tuple[_TensorLike, ...] = tuple()
+            for result, grad_func in func_return_pairs:
                 result.requires_grad = requires_grad and has_gradient and grad_enabled
-                if track_graph:
-                    result._op = op_self
                 result.to(device)
-                if is_free:
-                    result.free()
+                result.free() if is_free else ...
                 results += (result,)
                 if not track_graph:
                     continue
-                def _backward_op(
-                    *, _func: Callable = compute_grad, _tensor_refs=tensor_refs
-                ) -> None:
-                    grads = _func()
-                    if n_in == 1 or not isinstance(grads, tuple):
-                        grads = (grads,)
-                    live_tensors = tuple(ref() for ref in _tensor_refs)
-                    if any(t is None for t in live_tensors):
-                        return
-                    if len(grads) != len(live_tensors):
-                        raise ValueError(
-                            f"Expected {len(live_tensors)} gradients, got {len(grads)}."
-                        )
-                    for tensor, grad in zip(live_tensors, grads):
-                        new_grad = lucid._match_grad_shape(
-                            tensor.data, grad, device=device
-                        )
-                        lucid._set_tensor_grad(tensor, new_grad)
+                result._op = op_self
                 if result.requires_grad or lucid.flops_enabled():
                     result._prev = list(tensors)
-                    result._backward_op = (
-                        _backward_op if result.requires_grad else lambda: None
+                    if not result.requires_grad:
+                        continue
+                    result._backward_op = BackwardOperation(
+                        forward_op_ref=weakref.ref(op_self),
+                        grad_func=grad_func,
+                        tensor_refs=tensor_refs,
+                        device=device,
                     )
             if track_graph:
@@ -161,11 +149,11 @@ def poly_func_op(has_gradient: bool = True, device: _DeviceType = "cpu") -> Call
     return func_op(None, 1, has_gradient=has_gradient, device=device)
-class operation(ABC):
+class Operation(ABC):
     __fallback__: ClassVar[bool] = False
     def __init__(self) -> None:
-        self.result: Tensor | tuple[Tensor, ...] | None = None
+        self.result: _TensorLike | tuple[_TensorLike, ...] | None = None
         self._flops: int | None = None
     def clear(self) -> None:
@@ -177,11 +165,11 @@ class operation(ABC):
     @abstractmethod
     def gpu(self, *args, **kwargs) -> _FuncOpReturnType: ...
-    def __grad__(self, *args, **kwargs) -> _GradFuncType: ...
+    def __grad__(self, *args, **kwargs) -> _GradType: ...
-    def __grad_cpu__(self, *args, **kwargs) -> _GradFuncType: ...
+    def __grad_cpu__(self, *args, **kwargs) -> _GradType: ...
-    def __grad_gpu__(self, *args, **kwargs) -> _GradFuncType: ...
+    def __grad_gpu__(self, *args, **kwargs) -> _GradType: ...
     @property
     def flops(self) -> int:
@@ -196,12 +184,76 @@ class operation(ABC):
     def __flops__(self, *args, **kwargs) -> int:
         return 0
-    def __call__(self, *args, **kwargs) -> Tensor | tuple[Tensor, ...]:
+    def __call__(self, *args, **kwargs) -> _TensorLike | tuple[_TensorLike, ...]:
         if is_gpu_op(*args):
             return self.gpu(*args, **kwargs)
         return self.cpu(*args, **kwargs)
-def fallback(cls: type[operation]) -> type[operation]:
+def fallback(cls: type[Operation]) -> type[Operation]:
     cls.__fallback__ = True
     return cls
+class BackwardOperation:
+    def __init__(
+        self,
+        forward_op_ref: weakref.ref[Operation] | None,
+        grad_func: _GradFuncType | None,
+        tensor_refs: tuple[weakref.ref[_TensorLike]],
+        device: _DeviceType | None = "cpu",
+        custom_closure: Callable[[], None] | None = None,
+    ) -> None:
+        self.forward_op_ref = forward_op_ref
+        self.grad_func = grad_func
+        self.tensor_refs = tensor_refs
+        self.device = device
+        self.custom_closure = custom_closure
+        self.num_inputs = len(tensor_refs)
+        if self.grad_func is None and self.custom_closure is None:
+            raise ValueError("Either 'grad_func' or 'custom_closure' must be provided.")
+    def override_grad_func(self, new_grad_func: _GradFuncType) -> None:
+        if self.custom_closure is not None:
+            return
+        self.grad_func = new_grad_func
+    def override_tensor_refs(
+        self, new_tensor_refs: tuple[weakref.ref[_TensorLike]]
+    ) -> None:
+        self.tensor_refs = new_tensor_refs
+        self.num_inputs = len(new_tensor_refs)
+    def __call__(self) -> None:
+        if self.custom_closure is not None:
+            self.custom_closure()
+            return
+        if self.device is None and self.forward_op_ref is not None:
+            raise RuntimeError(
+                "Only 'noop' BackwardOperation can be called without device."
+            )
+        grads = self.grad_func()
+        if self.num_inputs == 1 or not isinstance(grads, tuple):
+            grads = (grads,)
+        live_tensors = tuple(ref() for ref in self.tensor_refs)
+        if any(t is None for t in live_tensors):
+            return
+        if len(grads) != len(live_tensors):
+            raise ValueError(
+                f"Expected {len(live_tensors)} gradients, got {len(grads)}."
+            )
+        for tensor, grad in zip(live_tensors, grads):
+            new_grad = lucid._match_grad_shape(tensor.data, grad, device=self.device)
+            lucid._set_tensor_grad(tensor, new_grad)
+noop = BackwardOperation(
+    forward_op_ref=None, grad_func=lambda: (), tensor_refs=(), device=None
+)

{lucid_dl-2.9.0 → lucid_dl-2.10.0}/lucid/_backend/pool.py RENAMED Viewed

@@ -7,10 +7,10 @@ import numpy as np
 from lucid._tensor import Tensor
 from lucid._backend.core import (
-    operation,
+    Operation,
     unary_func_op,
     _FuncOpReturnType,
-    _GradFuncType,
+    _GradType,
 )
 from lucid._backend.metal import mx
 from lucid.types import _NumPyArray, _MLXArray
@@ -211,7 +211,7 @@ def _pool_backward_max(
     return _crop_padding(grad_input_pad, padding)
-class pool_nd(operation):
+class pool_nd(Operation):
     def __init__(
         self,
         kernel_size: int | tuple[int, ...] | list[int],
@@ -295,7 +295,7 @@ class pool_nd(operation):
         self.result = Tensor(out)
         return self.result, partial(self.__grad__, lib_=mx)
-    def __grad__(self, lib_: ModuleType) -> _GradFuncType:
+    def __grad__(self, lib_: ModuleType) -> _GradType:
         if (
             self._kernel_size is None
             or self._stride is None
@@ -333,6 +333,24 @@ class pool_nd(operation):
         )
         return grad_input
+    def __flops__(self, a: Tensor) -> int:
+        if self._kernel_size is None or self._out_dims is None:
+            kernel, stride, padding = self._normalize(a)
+            out_dims = _pool_out_dims(a.shape[2:], kernel, stride, padding)
+        else:
+            kernel = self._kernel_size
+            out_dims = self._out_dims
+        kernel_elems = _prod(kernel)
+        out_elems = int(a.shape[0]) * int(a.shape[1]) * _prod(out_dims)
+        if kernel_elems <= 0 or out_elems <= 0:
+            return 0
+        if self.mode == "avg":
+            return out_elems * kernel_elems
+        return out_elems * max(kernel_elems - 1, 0)
 def avg_pool_nd_op(
     kernel_size: int | tuple[int, ...] | list[int],

lucid-dl 2.9.0__tar.gz → 2.10.0__tar.gz

lucid-dl 2.9.0tar.gz → 2.10.0tar.gz