py-sadl 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_sadl-1.0.2/.gitignore +210 -0
- py_sadl-1.0.2/LICENSE +21 -0
- py_sadl-1.0.2/PKG-INFO +338 -0
- py_sadl-1.0.2/README.md +292 -0
- py_sadl-1.0.2/pyproject.toml +230 -0
- py_sadl-1.0.2/sadl/__init__.py +74 -0
- py_sadl-1.0.2/sadl/backend.py +45 -0
- py_sadl-1.0.2/sadl/disk.py +147 -0
- py_sadl-1.0.2/sadl/function.py +415 -0
- py_sadl-1.0.2/sadl/grad_ops.py +1158 -0
- py_sadl-1.0.2/sadl/ops.py +67 -0
- py_sadl-1.0.2/sadl/optimizer.py +352 -0
- py_sadl-1.0.2/sadl/tensor.py +531 -0
- py_sadl-1.0.2/sadl/utils.py +33 -0
py_sadl-1.0.2/.gitignore
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
208
|
+
|
|
209
|
+
# MacOS
|
|
210
|
+
.DS_Store
|
py_sadl-1.0.2/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Tim Cares
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
py_sadl-1.0.2/PKG-INFO
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: py-sadl
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Simple Autograd Deep Learning: A minimal, readable deep learning framework built on NumPy
|
|
5
|
+
Project-URL: Homepage, https://github.com/timcares/sadl
|
|
6
|
+
Project-URL: Documentation, https://github.com/timcares/sadl#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/timcares/sadl
|
|
8
|
+
Project-URL: Issues, https://github.com/timcares/sadl/issues
|
|
9
|
+
Author: Tim Cares
|
|
10
|
+
License: MIT
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Keywords: autograd,automatic-differentiation,cupy,deep-learning,educational,machine-learning,neural-networks,numpy
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: GPU :: NVIDIA CUDA
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Education
|
|
17
|
+
Classifier: Intended Audience :: Science/Research
|
|
18
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
19
|
+
Classifier: Natural Language :: English
|
|
20
|
+
Classifier: Operating System :: OS Independent
|
|
21
|
+
Classifier: Programming Language :: Python :: 3
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
25
|
+
Classifier: Typing :: Typed
|
|
26
|
+
Requires-Python: >=3.13
|
|
27
|
+
Requires-Dist: numpy>=2.4.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: commitizen>=4.0.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: ipykernel>=7.1.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: ipywidgets>=8.1.8; extra == 'dev'
|
|
32
|
+
Requires-Dist: jupyter>=1.1.1; extra == 'dev'
|
|
33
|
+
Requires-Dist: matplotlib>=3.10.8; extra == 'dev'
|
|
34
|
+
Requires-Dist: mypy>=1.13.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: notebook>=7.5.3; extra == 'dev'
|
|
36
|
+
Requires-Dist: pre-commit>=4.0.0; extra == 'dev'
|
|
37
|
+
Requires-Dist: pytest-cov>=6.0.0; extra == 'dev'
|
|
38
|
+
Requires-Dist: pytest>=8.3.0; extra == 'dev'
|
|
39
|
+
Requires-Dist: python-semantic-release>=9.0.0; extra == 'dev'
|
|
40
|
+
Requires-Dist: ruff>=0.8.0; extra == 'dev'
|
|
41
|
+
Provides-Extra: gpu
|
|
42
|
+
Requires-Dist: cupy-cuda12x>=13.0.0; extra == 'gpu'
|
|
43
|
+
Provides-Extra: gpu-cuda11
|
|
44
|
+
Requires-Dist: cupy-cuda11x>=13.0.0; extra == 'gpu-cuda11'
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
<p align="center">
|
|
48
|
+
<img src="assets/sadl_icon_light.png" alt="SADL Logo" width="200">
|
|
49
|
+
</p>
|
|
50
|
+
|
|
51
|
+
<h1 align="center">SADL: Simple Autograd Deep Learning</h1>
|
|
52
|
+
|
|
53
|
+
<p align="center">
|
|
54
|
+
A minimal, readable deep learning framework built on NumPy and CuPy.<br>
|
|
55
|
+
Automatic differentiation, neural network primitives, and optimization in ~2000 lines of Python.
|
|
56
|
+
</p>
|
|
57
|
+
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
Using [uv](https://docs.astral.sh/uv/) for installation is recommended.
|
|
61
|
+
|
|
62
|
+
(I had to name the pypi project `py-sadl` instead of `sadl`, because `sadl` was too similar to an existing project.)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Install with uv (recommended)
|
|
66
|
+
uv add py-sadl
|
|
67
|
+
|
|
68
|
+
# With GPU support (CUDA 12.x)
|
|
69
|
+
uv add py-sadl --extra gpu
|
|
70
|
+
|
|
71
|
+
# With GPU support (CUDA 11.x)
|
|
72
|
+
uv add py-sadl --extra gpu-cuda11
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Alternatively, using pip:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Install with pip
|
|
79
|
+
pip install py-sadl
|
|
80
|
+
|
|
81
|
+
# With GPU support
|
|
82
|
+
pip install "py-sadl[gpu]"
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Quick Start
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import sadl
|
|
89
|
+
|
|
90
|
+
# Create tensors
|
|
91
|
+
x = sadl.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)
|
|
92
|
+
|
|
93
|
+
# Build a model
|
|
94
|
+
model = sadl.Mlp([
|
|
95
|
+
sadl.Linear(dim_in=2, dim_out=4),
|
|
96
|
+
sadl.ReLU(),
|
|
97
|
+
sadl.Linear(dim_in=4, dim_out=1),
|
|
98
|
+
])
|
|
99
|
+
|
|
100
|
+
# Forward pass
|
|
101
|
+
output = model(x)
|
|
102
|
+
loss = output.sum()
|
|
103
|
+
|
|
104
|
+
# Backward pass and optimization
|
|
105
|
+
optimizer = sadl.SGD(list(model.parameters), lr=0.01)
|
|
106
|
+
optimizer.backward(loss)
|
|
107
|
+
optimizer.step()
|
|
108
|
+
optimizer.zero_grad()
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Motivation
|
|
112
|
+
|
|
113
|
+
Modern deep learning frameworks like PyTorch and TensorFlow are powerful but complex. Their codebases span millions of lines, making it difficult to understand how automatic differentiation and neural network training actually work at a fundamental level.
|
|
114
|
+
|
|
115
|
+
SADL addresses this by providing a complete, functional deep learning framework that remains small enough to read and understand in its entirety. Every component, from tensor operations to backpropagation, is implemented transparently using standard NumPy operations.
|
|
116
|
+
|
|
117
|
+
The goal is not to replace production frameworks, but to serve as an educational resource and a foundation for experimentation. Researchers and engineers can trace exactly how gradients flow through computations without navigating layers of abstraction.
|
|
118
|
+
|
|
119
|
+
## Related Projects
|
|
120
|
+
|
|
121
|
+
SADL joins a family of educational and minimal deep learning frameworks that have made autodiff more accessible:
|
|
122
|
+
|
|
123
|
+
**[micrograd](https://github.com/karpathy/micrograd)** by Andrej Karpathy is an elegant, minimal autograd engine operating on scalar values. In roughly 150 lines of code, it demonstrates the core concepts of backpropagation with remarkable clarity. micrograd is an excellent starting point for understanding how gradients flow through computations.
|
|
124
|
+
|
|
125
|
+
**[tinygrad](https://github.com/tinygrad/tinygrad)** by George Hotz takes a different approach, building a fully-featured deep learning framework with a focus on simplicity and hardware portability. tinygrad supports multiple backends and has grown into a serious alternative for running models on diverse hardware.
|
|
126
|
+
|
|
127
|
+
SADL takes inspiration from both projects while pursuing its own path: building directly on NumPy's ndarray infrastructure. By subclassing `numpy.ndarray` and intercepting operations via `__array_ufunc__` and `__array_function__`, SADL achieves autograd without introducing a new tensor abstraction. This means existing NumPy code works unchanged, and the mental model stays close to the numerical computing patterns that researchers already know.
|
|
128
|
+
|
|
129
|
+
## Design Principles
|
|
130
|
+
|
|
131
|
+
### Build on NumPy
|
|
132
|
+
|
|
133
|
+
SADL extends `numpy.ndarray` directly rather than wrapping arrays in custom containers. This means:
|
|
134
|
+
|
|
135
|
+
- All NumPy operations work out of the box
|
|
136
|
+
- No need to learn a new tensor API
|
|
137
|
+
- Seamless interoperability with the scientific Python ecosystem
|
|
138
|
+
- GPU support through CuPy with zero code changes
|
|
139
|
+
|
|
140
|
+
### Mathematical Functions as First-Class Citizens
|
|
141
|
+
|
|
142
|
+
Neural network layers are modeled as mathematical functions, matching how they appear in research papers. The `Function` abstract base class enforces a simple contract: implement `__call__` to define the forward pass. This creates a natural bridge between mathematical notation and code.
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
class Sigmoid(Function):
|
|
146
|
+
def __call__(self, x: Tensor) -> Tensor:
|
|
147
|
+
return 1 / (xp.exp(-x) + 1)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Explicit Over Implicit
|
|
151
|
+
|
|
152
|
+
SADL favors explicit behavior over magic:
|
|
153
|
+
|
|
154
|
+
- Gradients must be explicitly enabled with `requires_grad=True`
|
|
155
|
+
- Parameters are a distinct type that always tracks gradients
|
|
156
|
+
- The computation graph is visible and inspectable
|
|
157
|
+
- Device transfers are explicit operations
|
|
158
|
+
|
|
159
|
+
### Minimal but Complete
|
|
160
|
+
|
|
161
|
+
The framework includes only what is necessary for training neural networks:
|
|
162
|
+
|
|
163
|
+
- Tensor with autograd support
|
|
164
|
+
- Parameter for learnable weights
|
|
165
|
+
- Function base class for layers
|
|
166
|
+
- Optimizer base class with SGD implementation
|
|
167
|
+
- Serialization for model persistence
|
|
168
|
+
|
|
169
|
+
Additional layers and optimizers can be built on these primitives without modifying core code.
|
|
170
|
+
|
|
171
|
+
## How Autodiff Works
|
|
172
|
+
|
|
173
|
+
SADL implements reverse-mode automatic differentiation (backpropagation) using a dynamic computation graph, similar to PyTorch.
|
|
174
|
+
|
|
175
|
+
### The Computation Graph
|
|
176
|
+
|
|
177
|
+
In SADL, **Tensors are the computation graph**. There is no separate graph data structure. Each Tensor stores a `src` attribute pointing to the Tensors it was created from. This forms a back-referencing graph where each node knows its parents, but parents do not know their children:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
Forward computation:
|
|
181
|
+
|
|
182
|
+
x ─┐
|
|
183
|
+
├─► z ─► loss
|
|
184
|
+
y ─┘
|
|
185
|
+
|
|
186
|
+
Graph structure (src references):
|
|
187
|
+
|
|
188
|
+
loss
|
|
189
|
+
│
|
|
190
|
+
▼
|
|
191
|
+
z
|
|
192
|
+
╱ ╲
|
|
193
|
+
▼ ▼
|
|
194
|
+
x y
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
This is intentional. Deep learning frameworks optimize for backward traversal because that is what backpropagation requires. Starting from the loss, we follow `src` references backward through the graph to compute gradients. Forward references (parent to child) are unnecessary and would only consume memory.
|
|
198
|
+
|
|
199
|
+
### Forward Pass: Building the Graph
|
|
200
|
+
|
|
201
|
+
When operations are performed on Tensors with `requires_grad=True`, the graph builds itself automatically:
|
|
202
|
+
|
|
203
|
+
1. The `Tensor` class overrides `__array_ufunc__` and `__array_function__` to intercept NumPy operations
|
|
204
|
+
2. Each operation creates a new Tensor that stores:
|
|
205
|
+
- `src`: References to input tensors (the parents in the graph)
|
|
206
|
+
- `backward_fn`: The gradient function for this operation
|
|
207
|
+
- `op_ctx`: Any context needed for gradient computation (axis, masks, etc.)
|
|
208
|
+
3. The graph grows dynamically as operations execute
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
x = sadl.tensor([1.0, 2.0], requires_grad=True) # leaf, src = ()
|
|
212
|
+
y = sadl.tensor([3.0, 4.0], requires_grad=True) # leaf, src = ()
|
|
213
|
+
z = x * y # z.src = (x, y), z.backward_fn = mul_backward
|
|
214
|
+
loss = z.sum() # loss.src = (z,), loss.backward_fn = sum_backward
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
A more complex example:
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
a = tensor(...) # leaf
|
|
221
|
+
b = tensor(...) # leaf
|
|
222
|
+
c = tensor(...) # leaf
|
|
223
|
+
|
|
224
|
+
d = a + b # d.src = (a, b)
|
|
225
|
+
e = d * c # e.src = (d, c)
|
|
226
|
+
f = e.sum() # f.src = (e,)
|
|
227
|
+
|
|
228
|
+
Graph (following src backwards from f):
|
|
229
|
+
|
|
230
|
+
f
|
|
231
|
+
│
|
|
232
|
+
▼
|
|
233
|
+
e
|
|
234
|
+
╱ ╲
|
|
235
|
+
▼ ▼
|
|
236
|
+
d c
|
|
237
|
+
╱ ╲
|
|
238
|
+
▼ ▼
|
|
239
|
+
a b
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
### Backward Pass: Computing Gradients
|
|
243
|
+
|
|
244
|
+
When `optimizer.backward(loss)` is called:
|
|
245
|
+
|
|
246
|
+
1. **Topological Sort**: The graph is traversed from the loss tensor to find all nodes, ordered so that each node appears after all nodes that depend on it. This uses an iterative stack-based algorithm to avoid recursion limits on deep graphs.
|
|
247
|
+
2. **Gradient Propagation**: Starting from the loss (seeded with gradient 1.0), each node's `backward_fn` is called with:
|
|
248
|
+
- The input tensors (`src`)
|
|
249
|
+
- Which inputs need gradients (`compute_grad`)
|
|
250
|
+
- The upstream gradient (`grad_out`)
|
|
251
|
+
- Operation context (`op_ctx`)
|
|
252
|
+
3. **Gradient Accumulation**: Gradients flow backward through the graph. When a tensor is used in multiple operations, gradients are summed.
|
|
253
|
+
4. **Graph Cleanup**: After backpropagation, the graph structure is cleared to free memory. Parameter gradients are retained for the optimizer step.
|
|
254
|
+
|
|
255
|
+
### Gradient Operations Registry
|
|
256
|
+
|
|
257
|
+
Each supported operation has a corresponding backward function registered in `grad_ops.py` with metadata (op type inspired by [tinygrad](https://github.com/tinygrad/tinygrad)):
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
@register_grad_op(
|
|
261
|
+
op_type=OpType.ELEMENTWISE,
|
|
262
|
+
op_inputs=OpInputs.BINARY,
|
|
263
|
+
forward_names=("mul", "multiply"),
|
|
264
|
+
)
|
|
265
|
+
@broadcastable
|
|
266
|
+
def mul_backward(*inputs, compute_grad, grad_out):
|
|
267
|
+
x, y = inputs
|
|
268
|
+
grad_x = y * grad_out if compute_grad[0] else None
|
|
269
|
+
grad_y = x * grad_out if compute_grad[1] else None
|
|
270
|
+
return grad_x, grad_y
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
The `@broadcastable` decorator handles gradient reduction when inputs were broadcast during the forward pass.
|
|
274
|
+
|
|
275
|
+
### Supported Operations
|
|
276
|
+
|
|
277
|
+
Unary: `abs`, `negative`, `sqrt`, `square`, `exp`, `log`, `sin`, `cos`
|
|
278
|
+
|
|
279
|
+
Binary: `add`, `subtract`, `multiply`, `divide`, `power`, `matmul`, `maximum`, `minimum`
|
|
280
|
+
|
|
281
|
+
Reductions: `sum`, `mean`, `max`, `min`
|
|
282
|
+
|
|
283
|
+
## Architecture
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
sadl/
|
|
287
|
+
├── __init__.py # Public API re-exports
|
|
288
|
+
├── backend.py # NumPy/CuPy abstraction
|
|
289
|
+
├── disk.py # Saving and loading data to/from disk
|
|
290
|
+
├── tensor.py # Tensor, Parameter, serialization
|
|
291
|
+
├── grad_ops.py # Gradient operation registry
|
|
292
|
+
├── function.py # Neural network layers
|
|
293
|
+
├── optimizer.py # Optimizer base class, SGD, backpropagation
|
|
294
|
+
├── ops.py # Array creation and device utilities
|
|
295
|
+
└── utils.py # Device transfer utilities
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### Key Components
|
|
299
|
+
|
|
300
|
+
**Tensor**: Subclass of `numpy.ndarray` with additional attributes for autograd. Intercepts NumPy operations to build the computation graph.
|
|
301
|
+
|
|
302
|
+
**Parameter**: Tensor subclass for learnable weights. Always requires gradients and retains them after backward pass for gradient accumulation.
|
|
303
|
+
|
|
304
|
+
**Function**: Abstract base class for neural network layers. Provides parameter traversal, device management, and train/inference mode switching.
|
|
305
|
+
|
|
306
|
+
**Optimizer**: Abstract base class that owns the backward pass. Performs topological sort, gradient computation, and graph cleanup.
|
|
307
|
+
|
|
308
|
+
**GradOp Registry**: Dictionary mapping operation names to backward functions. New operations can be registered with a decorator.
|
|
309
|
+
|
|
310
|
+
## Serialization
|
|
311
|
+
|
|
312
|
+
SADL uses a custom binary format (`.sadl` files) for efficient tensor storage:
|
|
313
|
+
|
|
314
|
+
- 4-byte magic header for format validation
|
|
315
|
+
- Version byte for forward compatibility
|
|
316
|
+
- Compact encoding of dtype, shape, and raw data
|
|
317
|
+
- Support for single tensors or ordered dictionaries of tensors
|
|
318
|
+
|
|
319
|
+
## Contributing
|
|
320
|
+
|
|
321
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for development setup, commands, and guidelines.
|
|
322
|
+
|
|
323
|
+
## Code of Conduct
|
|
324
|
+
|
|
325
|
+
See [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) for behavior guidelines. The file was created using the [Contributor Covenant](https://www.contributor-covenant.org).
|
|
326
|
+
|
|
327
|
+
## Future Plans
|
|
328
|
+
|
|
329
|
+
- Static graph compilation for repeated computations
|
|
330
|
+
- Additional layers and components (convolution, batch normalization, attention)
|
|
331
|
+
- More optimizers (Adam, AdamW, RMSprop)
|
|
332
|
+
- XLA compilation backend for TPU support
|
|
333
|
+
- Automatic mixed precision training
|
|
334
|
+
- Distributed training primitives
|
|
335
|
+
|
|
336
|
+
## See Also
|
|
337
|
+
|
|
338
|
+
- `docs/API_REFERENCE.md`: Complete API documentation
|