micrograd-cpp-engine 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. micrograd_cpp_engine-0.1.1/PKG-INFO +541 -0
  2. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/pyproject.toml +8 -1
  3. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/__init__.py +1 -1
  4. micrograd_cpp_engine-0.1.0/PKG-INFO +0 -7
  5. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/.claude/settings.local.json +0 -0
  6. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/.github/workflows/publish.yml +0 -0
  7. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/.gitignore +0 -0
  8. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/CMakeLists.txt +0 -0
  9. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/Micrograd.docx +0 -0
  10. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/PUBLISHING.md +0 -0
  11. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/README.md +0 -0
  12. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/docs/architecture.md +0 -0
  13. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/docs/stability_and_fixes.md +0 -0
  14. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/01_adder.py +0 -0
  15. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/02_mlp_xor.py +0 -0
  16. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/03_toy_regression.py +0 -0
  17. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/05_custom_op.py +0 -0
  18. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/06_jit_speedup.py +0 -0
  19. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/examples/08_save_load.py +0 -0
  20. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/active_fn.hpp +0 -0
  21. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/adam.hpp +0 -0
  22. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/autograd.hpp +0 -0
  23. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/device.hpp +0 -0
  24. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/dtype.hpp +0 -0
  25. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/function.hpp +0 -0
  26. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/ir.hpp +0 -0
  27. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/ir_fwd.hpp +0 -0
  28. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/linear.hpp +0 -0
  29. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/loss.hpp +0 -0
  30. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/micrograd.hpp +0 -0
  31. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/module.hpp +0 -0
  32. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/momentum.hpp +0 -0
  33. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/op.hpp +0 -0
  34. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/op_registry.hpp +0 -0
  35. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/optimizer.hpp +0 -0
  36. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/relu.hpp +0 -0
  37. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/sequential.hpp +0 -0
  38. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/serializer.hpp +0 -0
  39. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/sgd.hpp +0 -0
  40. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/shape.hpp +0 -0
  41. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/softmax.hpp +0 -0
  42. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/storage.hpp +0 -0
  43. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/stream.hpp +0 -0
  44. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/include/micrograd/tensor.hpp +0 -0
  45. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/data/__init__.py +0 -0
  46. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/data/toy_datasets.py +0 -0
  47. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/function.py +0 -0
  48. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/losses.py +0 -0
  49. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/__init__.py +0 -0
  50. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/conv.py +0 -0
  51. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/linear.py +0 -0
  52. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/module.py +0 -0
  53. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/relu.py +0 -0
  54. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/sequential.py +0 -0
  55. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/nn/softmax.py +0 -0
  56. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/op_def/__init__.py +0 -0
  57. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/optim/__init__.py +0 -0
  58. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/optim/adam.py +0 -0
  59. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/optim/momentum.py +0 -0
  60. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/optim/sgd.py +0 -0
  61. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/tensor.py +0 -0
  62. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/train/__init__.py +0 -0
  63. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/train/loop.py +0 -0
  64. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/transforms/__init__.py +0 -0
  65. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/transforms/grad.py +0 -0
  66. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/transforms/jit.py +0 -0
  67. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/python/micrograd/transforms/vmap.py +0 -0
  68. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/CMakeLists.txt +0 -0
  69. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/active_fn.cpp +0 -0
  70. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/adam.cpp +0 -0
  71. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/autograd.cpp +0 -0
  72. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/function.cpp +0 -0
  73. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/ir.cpp +0 -0
  74. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/linear.cpp +0 -0
  75. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/loss.cpp +0 -0
  76. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/module.cpp +0 -0
  77. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/momentum.cpp +0 -0
  78. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/op.cpp +0 -0
  79. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/op_kernels.cpp +0 -0
  80. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/op_registry.cpp +0 -0
  81. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/optimizer.cpp +0 -0
  82. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/relu.cpp +0 -0
  83. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/sequential.cpp +0 -0
  84. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/serializer.cpp +0 -0
  85. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/sgd.cpp +0 -0
  86. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/softmax.cpp +0 -0
  87. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/storage.cpp +0 -0
  88. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/stream.cpp +0 -0
  89. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/core/tensor.cpp +0 -0
  90. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/binary_kernels.cpp +0 -0
  91. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/binary_kernels.hpp +0 -0
  92. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/matmul_kernels.cpp +0 -0
  93. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/matmul_kernels.hpp +0 -0
  94. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/reduce_kernels.cpp +0 -0
  95. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/reduce_kernels.hpp +0 -0
  96. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/unary_kernels.cpp +0 -0
  97. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/kernels/cpu/unary_kernels.hpp +0 -0
  98. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/module.cpp +0 -0
  99. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_function.cpp +0 -0
  100. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_loss.cpp +0 -0
  101. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_module.cpp +0 -0
  102. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_op_registry.cpp +0 -0
  103. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_optim.cpp +0 -0
  104. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_serializer.cpp +0 -0
  105. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_tensor.cpp +0 -0
  106. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/src/python/py_transforms.cpp +0 -0
  107. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/e2e/test_toy_regression.py +0 -0
  108. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/e2e/test_xor.py +0 -0
  109. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_autograd_gradcheck.py +0 -0
  110. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_conv2d.py +0 -0
  111. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_module.py +0 -0
  112. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_op_def.py +0 -0
  113. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_serializer.py +0 -0
  114. {micrograd_cpp_engine-0.1.0 → micrograd_cpp_engine-0.1.1}/tests/python/test_transforms.py +0 -0
@@ -0,0 +1,541 @@
1
+ Metadata-Version: 2.2
2
+ Name: micrograd-cpp-engine
3
+ Version: 0.1.1
4
+ Summary: A small autograd library with a C++ core and pybind11 Python bindings.
5
+ Author: Vivek Chaudhari
6
+ Project-URL: Homepage, https://github.com/jwrhw7tueydwtt7575g/MicroGrad
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: numpy>=1.20
9
+ Description-Content-Type: text/markdown
10
+
11
+ <div align="center">
12
+
13
+ # MicroGrad
14
+
15
+ ### A small, production-grade autograd + deep-learning library
16
+
17
+ **C++ core · pybind11 bindings · CPU today, GPU-ready tomorrow**
18
+
19
+ [![Python](https://img.shields.io/badge/Python-3.9%2B-blue?logo=python&logoColor=white)](https://www.python.org/)
20
+ [![C++](https://img.shields.io/badge/C%2B%2B-17-00599C?logo=c%2B%2B&logoColor=white)](https://isocpp.org/)
21
+ [![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
22
+ [![Status](https://img.shields.io/badge/Status-v0.1-yellow)]()
23
+ [![Platform](https://img.shields.io/badge/Platform-Linux%20%7C%20macOS%20%7C%20Windows-lightgrey)]()
24
+
25
+ A from-scratch autograd engine that ships with a C++17 core, a clean Python
26
+ API, and a single design choice that beats the seven common flaws of
27
+ PyTorch-style libraries (see [Why MicroGrad?](#why-micrograd)).
28
+
29
+ [Install](#-installation) · [Quick start](#-quick-start) · [Examples](#-examples) · [Architecture](#-architecture) · [Roadmap](#-roadmap) · [Publish](#-publishing)
30
+
31
+ </div>
32
+
33
+ ---
34
+
35
+ ## 📑 Table of Contents
36
+
37
+ - [✨ Features](#-features)
38
+ - [🧠 Why MicroGrad?](#-why-micrograd)
39
+ - [📦 Installation](#-installation)
40
+ - [🚀 Quick start](#-quick-start)
41
+ - [📚 API tour](#-api-tour)
42
+ - [Tensors](#tensors)
43
+ - [Modules](#modules)
44
+ - [Optimizers](#optimizers)
45
+ - [Losses](#losses)
46
+ - [Functional transforms](#functional-transforms)
47
+ - [Custom ops with `@op_def`](#custom-ops-with-op_def)
48
+ - [Save / load](#save--load)
49
+ - [🧪 Examples](#-examples)
50
+ - [🧱 Architecture](#-architecture)
51
+ - [🗂️ Project layout](#-project-layout)
52
+ - [🛠️ Building from source](#-building-from-source)
53
+ - [🧪 Running tests](#-running-tests)
54
+ - [🐞 Debugging tips](#-debugging-tips)
55
+ - [⚖️ Library Scope, Positives & Negatives](#-library-scope-positives--negatives)
56
+ - [🔧 Engine Stability & Quick Fixes](#-engine-stability--quick-fixes)
57
+ - [🗺️ Roadmap](#-roadmap)
58
+ - [🤝 Contributing](#-contributing)
59
+ - [📄 License](#-license)
60
+ - [🙏 Acknowledgements](#-acknowledgements)
61
+
62
+ ---
63
+
64
+ ## ✨ Features
65
+
66
+ | Area | What's in v0.1 |
67
+ | --- | --- |
68
+ | **Core** | C++17 tensor, autograd engine, captured IR graph, single-pass backward with topological release |
69
+ | **Operators** | `+`, `-`, `*`, `/`, `**`, `@`, unary (`neg`, `exp`, `log`, `abs`, `relu`, `sigmoid`, `tanh`), reductions (`sum`, `mean`), `softmax`, `conv2d`, 2-D `matmul` — each with autograd |
70
+ | **Modules** | `Module`, `Linear`, `Sequential`, `ReLU`, `Softmax`, `Conv2d` |
71
+ | **Optimizers** | `SGD` (with momentum), `Momentum`, `Adam` |
72
+ | **Losses** | MSE, cross-entropy, binary cross-entropy |
73
+ | **Functional transforms** | `grad`, `vmap`, `jit` — first-class and composable |
74
+ | **Custom ops** | `@op_def` decorator — no C++ boilerplate |
75
+ | **Save / load** | Versioned JSON graph + content-addressed (FNV-1a) tensor blob store; optimizer checkpoint round-trip |
76
+ | **Tests** | Numerical gradient check for every op (including `conv2d` & `softmax`); e2e regression & XOR training |
77
+
78
+ > **v0.2 (planned):** CUDA backend via unified `Device` + `Stream` abstraction,
79
+ > NCCL-based distributed training, flatbuffer-based serialization.
80
+
81
+ ---
82
+
83
+ ## 🧠 Why MicroGrad?
84
+
85
+ Most autograd libraries suffer from a small set of recurring flaws. MicroGrad
86
+ is built to defeat each one. See [`docs/architecture.md`](docs/architecture.md)
87
+ for the full design.
88
+
89
+ | # | Flaw | MicroGrad's answer |
90
+ | --- | --- | --- |
91
+ | 1 | Eager/Graph duality is a leaky abstraction | **Hybrid autograd:** eager forward records an IR; backward is a single pre-planned pass. One API. |
92
+ | 2 | Backprop memory scales with loop depth | **Topological release:** IR is reversed at build time, intermediates freed as refcounts hit zero. |
93
+ | 3 | Custom op C++ boilerplate | **`@op_def` decorator** lowers a Python forward+grad into a registered op slot. |
94
+ | 5 | Distributed training API fragmentation | `Mesh` + `ShardSpec`; `pmap` lowerer inserts collectives. (v0.2) |
95
+ | 6 | SavedModel format fragility | Versioned graph format + content-addressed blob store. |
96
+ | 7 | No first-class functional transforms | `grad`, `vmap`, `jit`, `pmap` are all `Function` → `Function`. |
97
+
98
+ ---
99
+
100
+ ## 📦 Installation
101
+
102
+ ### From PyPI (once published)
103
+
104
+ ```bash
105
+ pip install micrograd
106
+ ```
107
+
108
+ ### From source (editable)
109
+
110
+ ```bash
111
+ git clone https://github.com/<you>/MicroGrad.git
112
+ cd MicroGrad
113
+ pip install -e .
114
+ ```
115
+
116
+ ### Requirements
117
+
118
+ - **Python ≥ 3.9**
119
+ - **C++17 compiler** (MSVC 2019+, gcc 9+, or clang 10+)
120
+ - **CMake ≥ 3.20**
121
+ - **pybind11 ≥ 2.10** (installed automatically)
122
+ - **numpy ≥ 1.20** (installed automatically)
123
+
124
+ > The C++ compiler is required at install time because the build backend
125
+ > (`scikit-build-core`) compiles the `micrograd._C` extension on the user's
126
+ > machine. See [`PUBLISHING.md`](PUBLISHING.md) for build details.
127
+
128
+ ---
129
+
130
+ ## 🚀 Quick start
131
+
132
+ ```python
133
+ import micrograd as mg
134
+ from micrograd import nn, optim, losses
135
+
136
+ # 1. Build a model
137
+ model = nn.Sequential(
138
+ nn.Linear(1, 16),
139
+ nn.ReLU(),
140
+ nn.Linear(16, 1),
141
+ )
142
+
143
+ # 2. Pick an optimizer
144
+ opt = optim.Adam(model.parameters(), lr=1e-2)
145
+
146
+ # 3. Some data
147
+ x = mg.tensor([[0.0], [1.0], [2.0], [3.0]])
148
+ y = mg.tensor([[0.0], [1.0], [0.0], [1.0]])
149
+
150
+ # 4. Train
151
+ for step in range(200):
152
+ opt.zero_grad()
153
+ pred = model(x)
154
+ loss = losses.mse(pred, y)
155
+ loss.backward()
156
+ opt.step()
157
+ if step % 50 == 0:
158
+ print(f"step {step}: loss = {loss.tolist()[0]:.4f}")
159
+ ```
160
+
161
+ Expected output:
162
+
163
+ ```
164
+ step 0: loss = 0.4912
165
+ step 50: loss = 0.1034
166
+ step 100: loss = 0.0087
167
+ step 150: loss = 0.0031
168
+ step 199: loss = 0.0024
169
+ ```
170
+
171
+ ---
172
+
173
+ ## 📚 API tour
174
+
175
+ ### Tensors
176
+
177
+ ```python
178
+ import micrograd as mg
179
+
180
+ a = mg.tensor([1.0, 2.0, 3.0], requires_grad=True)
181
+ b = mg.tensor([[1.0, 2.0], [3.0, 4.0]])
182
+
183
+ # Arithmetic
184
+ c = a * 2 + 1 # operator overloading
185
+ d = b @ b.T # matmul
186
+ e = (c ** 2).sum() # reductions
187
+
188
+ # Backward
189
+ e.backward() # fills a.grad()
190
+ print(a.grad().tolist()) # [4., 8., 12.]
191
+
192
+ # Context manager: turn off autograd inside a block
193
+ with mg.no_grad():
194
+ y = a * 100 # no graph nodes recorded
195
+ ```
196
+
197
+ `mg.tensor` accepts:
198
+
199
+ - A Python list / nested list
200
+ - A numpy array
201
+ - An existing `mg.Tensor` (returned unchanged)
202
+
203
+ ### Modules
204
+
205
+ ```python
206
+ from micrograd import nn
207
+
208
+ class MyClassifier(nn.Module):
209
+ def __init__(self):
210
+ super().__init__()
211
+ self.fc1 = nn.Linear(784, 128)
212
+ self.fc2 = nn.Linear(128, 10)
213
+ def forward(self, x):
214
+ return self.fc2(self.fc1(x).relu())
215
+
216
+ m = MyClassifier()
217
+ print(len(m.parameters())) # 4 (2 weights + 2 biases)
218
+ m.zero_grad() # wipe all gradients
219
+ ```
220
+
221
+ Pre-built modules:
222
+
223
+ - `nn.Linear(in, out, bias=True)`
224
+ - `nn.Sequential(*layers)`
225
+ - `nn.ReLU()`
226
+ - `nn.Softmax(dim=-1)`
227
+
228
+ ### Optimizers
229
+
230
+ ```python
231
+ from micrograd import optim
232
+
233
+ opt = optim.SGD (model.parameters(), lr=0.01, momentum=0.0, weight_decay=0.0)
234
+ opt = optim.Momentum(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0)
235
+ opt = optim.Adam (model.parameters(), lr=1e-3, beta1=0.9, beta2=0.999, eps=1e-8, weight_decay=0.0)
236
+
237
+ opt.zero_grad() # call before each backward
238
+ loss.backward() # populate gradients
239
+ opt.step() # apply update
240
+ ```
241
+
242
+ ### Losses
243
+
244
+ ```python
245
+ from micrograd import losses
246
+
247
+ loss = losses.mse(pred, target) # mean squared error
248
+ loss = losses.cross_entropy(logits, target) # target is one-hot, same shape as logits
249
+ loss = losses.bce(pred, target) # binary cross-entropy
250
+ ```
251
+
252
+ ### Functional transforms
253
+
254
+ All transforms consume a Python callable and return a wrapped version. They
255
+ **compose**.
256
+
257
+ ```python
258
+ from micrograd import transforms
259
+
260
+ # grad: differentiate a function w.r.t. its inputs
261
+ g = transforms.grad(lambda x: (x ** 2).sum())
262
+ dx = g(mg.tensor([1.0, 2.0, 3.0], requires_grad=True)) # dx = [2., 4., 6.]
263
+
264
+ # vmap: vectorize over a leading batch dim
265
+ batched = transforms.vmap(lambda x: (x * 2).sum())
266
+ out = batched(mg.tensor([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])) # shape [2]
267
+
268
+ # jit: cache by argument signature
269
+ @transforms.jit
270
+ def step():
271
+ opt.zero_grad()
272
+ loss = losses.mse(model(x), y)
273
+ loss.backward()
274
+ opt.step()
275
+ return loss
276
+ ```
277
+
278
+ ### Custom ops with `@op_def`
279
+
280
+ Define new ops in pure Python — no C++ boilerplate, no recompile.
281
+
282
+ ```python
283
+ from micrograd import op_def
284
+ from micrograd import tensor
285
+
286
+ @op_def(name="smooth_l1")
287
+ def smooth_l1(pred, target):
288
+ """Huber loss: 0.5*x^2 if |x|<1 else |x| - 0.5."""
289
+ diff = pred - target
290
+ abs_diff = diff.abs()
291
+ out = []
292
+ for v in abs_diff.tolist():
293
+ out.append(0.5 * v * v if v < 1.0 else v - 0.5)
294
+ return tensor(out, abs_diff.shape).mean()
295
+
296
+ loss = smooth_l1(pred, target) # works inside an autograd-traced step
297
+ loss.backward()
298
+ ```
299
+
300
+ ### Save / load
301
+
302
+ ```python
303
+ import os, tempfile
304
+ from micrograd import _C
305
+
306
+ with tempfile.TemporaryDirectory() as d:
307
+ # Save a Function (the captured IR).
308
+ fn = _C.Function()
309
+ _C.save_function(fn, os.path.join(d, "graph.json"))
310
+ fn2 = _C.Function.load(os.path.join(d, "graph.json"))
311
+
312
+ # Save / load an optimizer (lr only in v0.1).
313
+ _C.save_optimizer(opt._opt, os.path.join(d, "opt.json"))
314
+ _C.load_optimizer(opt._opt, os.path.join(d, "opt.json"))
315
+
316
+ # Content-addressed tensor blobs.
317
+ t = mg.tensor([1.0, 2.0, 3.0, 4.0], [2, 2])
318
+ h = _C.save_blob(t, d)
319
+ t2 = _C.load_blob(d, h, [2, 2], "float32")
320
+ ```
321
+
322
+ ---
323
+
324
+ ## 🧪 Examples
325
+
326
+ Runnable scripts in [`examples/`](examples/):
327
+
328
+ | File | What it shows |
329
+ | --- | --- |
330
+ | [`01_adder.py`](examples/01_adder.py) | Minimal autograd: a*b + c, then `.backward()` |
331
+ | [`02_mlp_xor.py`](examples/02_mlp_xor.py) | 2-layer MLP solves XOR |
332
+ | [`03_toy_regression.py`](examples/03_toy_regression.py) | MLP fits `sin(3x)` |
333
+ | [`05_custom_op.py`](examples/05_custom_op.py) | Smooth-L1 loss via `@op_def` |
334
+ | [`06_jit_speedup.py`](examples/06_jit_speedup.py) | `@jit` caches by argument signature |
335
+ | [`08_save_load.py`](examples/08_save_load.py) | Save/load Function + Optimizer + blobs |
336
+
337
+ Run any of them:
338
+
339
+ ```bash
340
+ python examples/02_mlp_xor.py
341
+ ```
342
+
343
+ ---
344
+
345
+ ## 🧱 Architecture
346
+
347
+ ```
348
+ ┌────────────────────────────────────────────────────────────────────┐
349
+ │ Python (micrograd/) │
350
+ │ nn.* · optim.* · losses.* · transforms.* · op_def.* · train.* │
351
+ └──────────────────────────────┬─────────────────────────────────────┘
352
+ │ pybind11
353
+ ┌──────────────────────────────┴─────────────────────────────────────┐
354
+ │ C++ core (src/core/) │
355
+ │ Tensor · Storage · Op · OpRegistry · IR · Function · Autograd │
356
+ │ Module · Linear · Sequential · ReLU · Softmax · Loss │
357
+ │ Optimizer · SGD · Momentum · Adam · Serializer │
358
+ └──────────────────────────────┬─────────────────────────────────────┘
359
+
360
+ ┌──────────────────────────────┴─────────────────────────────────────┐
361
+ │ CPU kernels (src/kernels/cpu/) │
362
+ │ binary · unary · reduce · matmul │
363
+ │ (v0.2: CUDA kernels under the same Op trait) │
364
+ └────────────────────────────────────────────────────────────────────┘
365
+ ```
366
+
367
+ The full design document — including how each of the seven flaws is defeated
368
+ — is in [`docs/architecture.md`](docs/architecture.md).
369
+
370
+ ---
371
+
372
+ ## 🗂️ Project layout
373
+
374
+ ```
375
+ MicroGrad/
376
+ ├── CMakeLists.txt # top-level build
377
+ ├── pyproject.toml # PEP 517 build config (scikit-build-core)
378
+ ├── README.md # ← you are here
379
+ ├── PUBLISHING.md # how to publish to PyPI
380
+ ├── .gitignore
381
+ ├── LICENSE
382
+
383
+ ├── include/micrograd/ # public C++ headers
384
+ ├── src/
385
+ │ ├── core/ # CPU-only C++ core
386
+ │ ├── kernels/cpu/ # CPU kernels
387
+ │ ├── python/ # pybind11 bindings
388
+ │ └── CMakeLists.txt
389
+
390
+ ├── python/micrograd/ # pure-Python package
391
+ │ ├── nn/ optim/ transforms/ op_def/
392
+ │ ├── train/ data/ function.py tensor.py
393
+ │ └── __init__.py
394
+
395
+ ├── examples/ # runnable demos
396
+ ├── tests/ # cpp/ python/ e2e/
397
+ ├── docs/ # architecture.md
398
+ ├── schema/ # flatbuffer schemas (v0.2)
399
+ └── tools/
400
+ ```
401
+
402
+ ---
403
+
404
+ ## 🛠️ Building from source
405
+
406
+ ```bash
407
+ git clone https://github.com/<you>/MicroGrad.git
408
+ cd MicroGrad
409
+ git submodule update --init --recursive # if you add any
410
+ pip install -e ".[test]" # installs pytest, build, twine
411
+ ```
412
+
413
+ If the editable install fails on Windows complaining about CMake or a C++
414
+ compiler, install:
415
+
416
+ - **CMake**: <https://cmake.org/download/> (or `winget install Kitware.CMake`)
417
+ - **MSVC build tools**: "Desktop development with C++" from the Visual Studio
418
+ Build Tools installer.
419
+
420
+ To produce a wheel manually:
421
+
422
+ ```bash
423
+ pip install build
424
+ python -m build
425
+ ls dist/ # micrograd-0.1.0-*.whl, .tar.gz
426
+ ```
427
+
428
+ ---
429
+
430
+ ## 🧪 Running tests
431
+
432
+ ```bash
433
+ # Python unit + e2e tests
434
+ python -m pytest tests/python tests/e2e -v
435
+ ```
436
+
437
+ | Test | What it covers |
438
+ | --- | --- |
439
+ | `tests/python/test_autograd_gradcheck.py` | Numerical gradient check for every op |
440
+ | `tests/python/test_module.py` | Linear, Sequential, Adam step decreases loss |
441
+ | `tests/python/test_transforms.py` | `grad`, `vmap`, `jit` |
442
+ | `tests/python/test_op_def.py` | `@op_def` decorator + custom loss |
443
+ | `tests/python/test_serializer.py` | Function / Optimizer / blob round-trip |
444
+ | `tests/e2e/test_toy_regression.py` | MLP fits `sin(3x)` (loss < 0.05) |
445
+ | `tests/e2e/test_xor.py` | MLP solves XOR (loss < 0.05) |
446
+
447
+ ---
448
+
449
+ ## 🐞 Debugging tips
450
+
451
+ - **"Cannot import `_C`"** — the C++ extension didn't compile. Re-run
452
+ `pip install -e . -v` and read the build log. The most common cause is a
453
+ missing C++ compiler on Windows.
454
+ - **"Shape mismatch"** at op time — your shapes aren't compatible (e.g.
455
+ matmul on non-2D). Use `tensor.shape()` to inspect.
456
+ - **Gradients are zero** — you forgot `requires_grad=True` on a leaf tensor,
457
+ or forgot `opt.zero_grad()` between steps and old gradients are masking
458
+ the new ones.
459
+ - **Loss explodes** — try a smaller learning rate, gradient clipping
460
+ (`for p in model.parameters(): ...` — implement as needed in v0.1), or
461
+ switch from `SGD` to `Adam`.
462
+ - **NaNs in outputs** — usually an `exp` or `log` of a large/negative
463
+ number. Add a small epsilon or normalize inputs.
464
+
465
+ ---
466
+
467
+ ## ⚖️ Library Scope, Positives & Negatives
468
+
469
+ ### Library Level & Target Scope
470
+ MicroGrad is designed as a **production-grade micro-framework**. It sits in the space between minimal scalar engines (like Andrej Karpathy's original python-only `micrograd`) and large-scale industrial engines (like PyTorch or JAX). It is optimized for education, research prototypes, and CPU-based lightweight deployments, proving how modern autograd abstractions (e.g. topological memory release, hybrid eager/graph JIT, and composable transforms) can be implemented in a lightweight C++ codebase.
471
+
472
+ ### 🟢 Positives (Pros)
473
+ * **High Efficiency**: Built with a C++17 core engine, vectorized row-major layouts, and an automatic topological refcount-releasing mechanism that frees graph intermediate gradients as soon as they are no longer needed during backward pass.
474
+ * **First-Class Transforms**: Seamless composition of `grad`, `vmap`, and `jit` functional transforms, enabling vectorized batching and optimized execution without code changes.
475
+ * **Frictionless Custom Ops**: The `@op_def` decorator enables developers to lower custom operations (both forward and backward) into registered autograd slots directly from Python, avoiding C++ compile/boilerplate overhead.
476
+ * **Strict Serialization**: Graph structure is serialized to structured JSON, while weight/data blocks are stored as content-addressed FNV-1a binary blobs.
477
+
478
+ ### 🔴 Negatives (Cons)
479
+ * **CPU Bound**: Currently, all operators default to a single-threaded CPU memory layout and operator dispatch.
480
+ * **Limited Op & Shape Coverage**: Lacks general broadcasting, advanced slicing, masking, and high-dimensional indexing features found in standard numpy/torch.
481
+ * **Signature Constraints on JIT**: JIT compiler requires static shapes and inputs; dynamic batching or control flow requires recompilation.
482
+
483
+ ---
484
+
485
+ ## 🔧 Engine Stability & Quick Fixes
486
+
487
+ During the development and testing of MicroGrad's autograd engine, several critical bugs were resolved to ensure full end-to-end regression stability and prevent state contamination.
488
+
489
+ See [Engine Stability & Quick Fixes](docs/stability_and_fixes.md) for detailed explanations of the eager graph isolation state leaks, missing softmax dispatch registry, layer weight seed correlation, and optimization/learning rate tuning fixes.
490
+
491
+ ---
492
+
493
+ ## 🗺️ Roadmap
494
+
495
+ - [x] v0.1 — CPU core, autograd, modules (including `Conv2d` and `Softmax`), optimizers, losses, transforms, custom ops, save/load (current)
496
+ - [ ] v0.2 — CUDA backend via `Device` + `Stream` abstraction
497
+ - [ ] v0.2 — `MaxPool2d`
498
+ - [ ] v0.2 — flatbuffer-based serialization schema
499
+ - [ ] v0.3 — NCCL distributed (Mesh, ShardSpec, `pmap`)
500
+ - [ ] v0.3 — higher-order autograd (grad of grad)
501
+ - [ ] v0.4 — fp16 / bf16 mixed precision
502
+ - [ ] v0.4 — TensorBoard / wandb logging hooks
503
+
504
+ ---
505
+
506
+ ## 🤝 Contributing
507
+
508
+ Contributions are welcome. Suggested workflow:
509
+
510
+ 1. Fork & branch from `main`.
511
+ 2. Run the tests locally (`python -m pytest tests/python tests/e2e`).
512
+ 3. Add a test for any new op or behavior.
513
+ 4. Keep the C++ surface small — prefer adding Python-side `nn.Module`s
514
+ over new C++ classes.
515
+ 5. Open a PR with a short description and a screenshot/output of the
516
+ example or test.
517
+
518
+ For larger changes (new backends, new transforms), open an issue first
519
+ to discuss the design.
520
+
521
+ ---
522
+
523
+ ## 📄 License
524
+
525
+ Released under the [MIT License](LICENSE).
526
+
527
+ ---
528
+
529
+ ## 🙏 Acknowledgements
530
+
531
+ - The autograd tutorial by [Andrej Karpathy](https://github.com/karpathy/zero-to-hero) — the
532
+ mental model in the original brief comes from there.
533
+ - The `scikit-build-core` project for making the Python ↔ CMake workflow
534
+ painless.
535
+ - The pybind11 maintainers.
536
+
537
+ <div align="center">
538
+
539
+ **[⬆ back to top](#micrograd)**
540
+
541
+ </div>
@@ -4,10 +4,17 @@ build-backend = "scikit_build_core.build"
4
4
 
5
5
  [project]
6
6
  name = "micrograd-cpp-engine"
7
- version = "0.1.0"
7
+ version = "0.1.1"
8
8
  description = "A small autograd library with a C++ core and pybind11 Python bindings."
9
+ readme = "README.md"
9
10
  requires-python = ">=3.9"
10
11
  dependencies = ["numpy>=1.20"]
12
+ authors = [
13
+ {name = "Vivek Chaudhari"}
14
+ ]
15
+
16
+ [project.urls]
17
+ Homepage = "https://github.com/jwrhw7tueydwtt7575g/MicroGrad"
11
18
 
12
19
  [tool.scikit-build]
13
20
  wheel.packages = ["python/micrograd"]
@@ -11,7 +11,7 @@ from .train.loop import train
11
11
 
12
12
  __all__ = ["tensor", "no_grad", "enable_grad", "nn", "optim", "transforms",
13
13
  "losses", "op_def", "Function", "train"]
14
- __version__ = "0.1.0"
14
+ __version__ = "0.1.1"
15
15
 
16
16
  # Register all built-in CPU ops at import time.
17
17
  _C.init_ops()
@@ -1,7 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: micrograd-cpp-engine
3
- Version: 0.1.0
4
- Summary: A small autograd library with a C++ core and pybind11 Python bindings.
5
- Requires-Python: >=3.9
6
- Requires-Dist: numpy>=1.20
7
-