modelstudio 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. {modelstudio-0.2.0/python/modelstudio.egg-info → modelstudio-0.3.0}/PKG-INFO +33 -6
  2. {modelstudio-0.2.0 → modelstudio-0.3.0}/README.md +32 -5
  3. modelstudio-0.3.0/benchmarks/bench_conv.py +40 -0
  4. modelstudio-0.3.0/benchmarks/bench_dropout.py +37 -0
  5. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/CMakeLists.txt +2 -0
  6. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cpu/cpu_backend.cpp +8 -0
  7. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cpu/cpu_backend.hpp +4 -0
  8. modelstudio-0.3.0/csrc/backends/cpu/kernels/mul.cpp +43 -0
  9. modelstudio-0.3.0/csrc/backends/cpu/kernels/relu.cpp +40 -0
  10. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cuda/cuda_backend.cu +10 -0
  11. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cuda/cuda_backend.hpp +2 -0
  12. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/oneapi/oneapi_backend.cpp +10 -0
  13. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/oneapi/oneapi_backend.hpp +2 -0
  14. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/rocm/rocm_backend.cpp +10 -0
  15. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/rocm/rocm_backend.hpp +2 -0
  16. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/dispatcher/backend.hpp +2 -0
  17. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/data.md +3 -2
  18. modelstudio-0.3.0/docs/modules.md +24 -0
  19. modelstudio-0.3.0/docs/native-backend-roadmap.md +26 -0
  20. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/nn.md +19 -2
  21. modelstudio-0.3.0/docs/randomness.md +23 -0
  22. modelstudio-0.3.0/docs/serialization.md +25 -0
  23. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/tensor-api.md +4 -1
  24. modelstudio-0.3.0/docs/training.md +29 -0
  25. modelstudio-0.3.0/examples/checkpoint_training.py +39 -0
  26. modelstudio-0.3.0/examples/dropout_batchnorm.py +35 -0
  27. modelstudio-0.3.0/examples/train_cnn_toy.py +40 -0
  28. {modelstudio-0.2.0 → modelstudio-0.3.0}/pyproject.toml +1 -1
  29. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/__init__.py +11 -1
  30. modelstudio-0.3.0/python/modelstudio/_version.py +1 -0
  31. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/data/dataloader.py +20 -2
  32. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/__init__.py +13 -2
  33. modelstudio-0.3.0/python/modelstudio/nn/activations.py +29 -0
  34. modelstudio-0.3.0/python/modelstudio/nn/convolution.py +213 -0
  35. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/embedding.py +5 -3
  36. modelstudio-0.3.0/python/modelstudio/nn/init.py +57 -0
  37. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/linear.py +8 -5
  38. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/losses.py +31 -7
  39. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/module.py +114 -45
  40. modelstudio-0.3.0/python/modelstudio/nn/normalization.py +93 -0
  41. modelstudio-0.3.0/python/modelstudio/nn/pooling.py +135 -0
  42. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/transformer.py +4 -4
  43. modelstudio-0.3.0/python/modelstudio/nn/utils.py +33 -0
  44. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/ops/__init__.py +9 -0
  45. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/ops/math.py +28 -1
  46. modelstudio-0.3.0/python/modelstudio/ops/movement.py +214 -0
  47. modelstudio-0.3.0/python/modelstudio/optim/adamw.py +120 -0
  48. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/optim/optimizer.py +14 -0
  49. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/optim/sgd.py +18 -0
  50. modelstudio-0.3.0/python/modelstudio/random.py +20 -0
  51. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/runtime/backend.py +2 -1
  52. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/tensor.py +15 -0
  53. {modelstudio-0.2.0 → modelstudio-0.3.0/python/modelstudio.egg-info}/PKG-INFO +33 -6
  54. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio.egg-info/SOURCES.txt +30 -0
  55. {modelstudio-0.2.0 → modelstudio-0.3.0}/scripts/smoke_test.py +12 -0
  56. modelstudio-0.3.0/tests/test_batchnorm.py +50 -0
  57. modelstudio-0.3.0/tests/test_concat_stack.py +51 -0
  58. modelstudio-0.3.0/tests/test_conv.py +57 -0
  59. modelstudio-0.3.0/tests/test_dataloader_seed.py +44 -0
  60. modelstudio-0.3.0/tests/test_dropout.py +60 -0
  61. modelstudio-0.3.0/tests/test_grad_clip.py +24 -0
  62. modelstudio-0.3.0/tests/test_init.py +43 -0
  63. modelstudio-0.3.0/tests/test_loss_reductions.py +34 -0
  64. modelstudio-0.3.0/tests/test_module_ergonomics.py +85 -0
  65. modelstudio-0.3.0/tests/test_optimizer_state.py +89 -0
  66. modelstudio-0.3.0/tests/test_pooling.py +31 -0
  67. modelstudio-0.3.0/tests/test_random.py +23 -0
  68. modelstudio-0.3.0/tests/test_shape_ops.py +33 -0
  69. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_transformer.py +10 -5
  70. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_version.py +1 -1
  71. modelstudio-0.2.0/python/modelstudio/_version.py +0 -1
  72. modelstudio-0.2.0/python/modelstudio/nn/activations.py +0 -14
  73. modelstudio-0.2.0/python/modelstudio/nn/normalization.py +0 -41
  74. modelstudio-0.2.0/python/modelstudio/ops/movement.py +0 -91
  75. modelstudio-0.2.0/python/modelstudio/optim/adamw.py +0 -55
  76. {modelstudio-0.2.0 → modelstudio-0.3.0}/CMakeLists.txt +0 -0
  77. {modelstudio-0.2.0 → modelstudio-0.3.0}/LICENSE +0 -0
  78. {modelstudio-0.2.0 → modelstudio-0.3.0}/MANIFEST.in +0 -0
  79. {modelstudio-0.2.0 → modelstudio-0.3.0}/benchmarks/bench_attention.py +0 -0
  80. {modelstudio-0.2.0 → modelstudio-0.3.0}/benchmarks/bench_dataloader.py +0 -0
  81. {modelstudio-0.2.0 → modelstudio-0.3.0}/benchmarks/bench_matmul.py +0 -0
  82. {modelstudio-0.2.0 → modelstudio-0.3.0}/benchmarks/bench_mlp.py +0 -0
  83. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cpu/kernels/add.cpp +0 -0
  84. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cpu/kernels/matmul.cpp +0 -0
  85. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cuda/README.md +0 -0
  86. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/cuda/cuda_memory.hpp +0 -0
  87. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/oneapi/README.md +0 -0
  88. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/oneapi/sycl_memory.hpp +0 -0
  89. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/rocm/README.md +0 -0
  90. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/backends/rocm/hip_memory.hpp +0 -0
  91. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/bindings/python_bindings.cpp +0 -0
  92. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/device.hpp +0 -0
  93. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/dtype.hpp +0 -0
  94. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/error.hpp +0 -0
  95. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/shape.hpp +0 -0
  96. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/storage.hpp +0 -0
  97. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/core/tensor.hpp +0 -0
  98. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/dispatcher/dispatcher.hpp +0 -0
  99. {modelstudio-0.2.0 → modelstudio-0.3.0}/csrc/dispatcher/operator_registry.hpp +0 -0
  100. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/autograd.md +0 -0
  101. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/backend-architecture.md +0 -0
  102. {modelstudio-0.2.0 → modelstudio-0.3.0}/docs/releasing.md +0 -0
  103. {modelstudio-0.2.0 → modelstudio-0.3.0}/examples/save_load.py +0 -0
  104. {modelstudio-0.2.0 → modelstudio-0.3.0}/examples/tiny_transformer.py +0 -0
  105. {modelstudio-0.2.0 → modelstudio-0.3.0}/examples/train_classifier.py +0 -0
  106. {modelstudio-0.2.0 → modelstudio-0.3.0}/examples/train_mlp.py +0 -0
  107. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/autograd/__init__.py +0 -0
  108. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/autograd/engine.py +0 -0
  109. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/autograd/function.py +0 -0
  110. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/autograd/grad_mode.py +0 -0
  111. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/compile/__init__.py +0 -0
  112. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/compile/graph_capture.py +0 -0
  113. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/compile/ir.py +0 -0
  114. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/compile/passes.py +0 -0
  115. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/data/__init__.py +0 -0
  116. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/data/dataset.py +0 -0
  117. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/device.py +0 -0
  118. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/dtypes.py +0 -0
  119. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/errors.py +0 -0
  120. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/nn/parameter.py +0 -0
  121. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/ops/creation.py +0 -0
  122. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/ops/linalg.py +0 -0
  123. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/ops/reductions.py +0 -0
  124. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/optim/__init__.py +0 -0
  125. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/py.typed +0 -0
  126. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/runtime/__init__.py +0 -0
  127. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/runtime/dispatcher.py +0 -0
  128. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/serialization.py +0 -0
  129. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/storage.py +0 -0
  130. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/testing/__init__.py +0 -0
  131. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio/testing/gradcheck.py +0 -0
  132. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio.egg-info/dependency_links.txt +0 -0
  133. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio.egg-info/requires.txt +0 -0
  134. {modelstudio-0.2.0 → modelstudio-0.3.0}/python/modelstudio.egg-info/top_level.txt +0 -0
  135. {modelstudio-0.2.0 → modelstudio-0.3.0}/setup.cfg +0 -0
  136. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_attention.py +0 -0
  137. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_autograd.py +0 -0
  138. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_buffers.py +0 -0
  139. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_data.py +0 -0
  140. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_dispatcher.py +0 -0
  141. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_embedding.py +0 -0
  142. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_gradcheck.py +0 -0
  143. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_indexing.py +0 -0
  144. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_losses.py +0 -0
  145. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_nn.py +0 -0
  146. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_norms.py +0 -0
  147. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_ops.py +0 -0
  148. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_optim.py +0 -0
  149. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_reductions_axis.py +0 -0
  150. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_serialization.py +0 -0
  151. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_state_dict.py +0 -0
  152. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_tensor.py +0 -0
  153. {modelstudio-0.2.0 → modelstudio-0.3.0}/tests/test_unary_ops.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelstudio
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
5
5
  Author: ModelStudio Contributors
6
6
  License-Expression: MIT
@@ -31,7 +31,7 @@ Dynamic: license-file
31
31
 
32
32
  # ModelStudio
33
33
 
34
- ModelStudio is an early-stage AI tensor framework. Version `0.2.0` provides a
34
+ ModelStudio is an early-stage AI tensor framework. Version `0.3.0` provides a
35
35
  CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
36
36
  basic data loading, and small LLM-oriented building blocks.
37
37
 
@@ -61,10 +61,12 @@ python -m pip install -e ".[dev]"
61
61
  | Autograd | Reverse-mode for core CPU ops |
62
62
  | Reductions | `sum`, `mean`, `max` with axis and keepdims; `max` is value-only |
63
63
  | Activations | ReLU, GELU, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
64
- | Losses | MSE and cross entropy |
65
- | Modules | Parameters, buffers, state dicts, save/load |
66
- | Layers | Linear, Embedding, LayerNorm, RMSNorm, TransformerBlock |
67
- | Data | Dataset, TensorDataset, DataLoader |
64
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
65
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
66
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
67
+ | Optimizers | SGD and AdamW with state serialization |
68
+ | Data | Dataset, TensorDataset, DataLoader with deterministic seeded shuffle |
69
+ | Randomness | `manual_seed`, RNG-backed `randn`, dropout, and init helpers |
68
70
  | Compiler | Placeholder IR and passes |
69
71
 
70
72
  ## Backend Status
@@ -163,6 +165,21 @@ y = block(x)
163
165
  print(y.shape)
164
166
  ```
165
167
 
168
+ ## 0.3.0 Training Utilities
169
+
170
+ ```python
171
+ ms.manual_seed(123)
172
+ model = nn.Linear(4, 2)
173
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
174
+ state = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
175
+ ms.save(state, "checkpoint.ms")
176
+ ```
177
+
178
+ New CPU-only helpers include `ms.concat`, `ms.stack`, `Tensor.flatten`,
179
+ `Tensor.squeeze`, `Tensor.unsqueeze`, `nn.init`, `nn.Dropout`,
180
+ `nn.BatchNorm1d`, `nn.Conv1d`, `nn.Conv2d`, `nn.AvgPool2d`, `nn.MaxPool2d`,
181
+ and `nn.utils` gradient clipping.
182
+
166
183
  ## Commands
167
184
 
168
185
  ```bash
@@ -172,10 +189,15 @@ python examples/train_mlp.py
172
189
  python examples/train_classifier.py
173
190
  python examples/tiny_transformer.py
174
191
  python examples/save_load.py
192
+ python examples/train_cnn_toy.py
193
+ python examples/dropout_batchnorm.py
194
+ python examples/checkpoint_training.py
175
195
  python benchmarks/bench_matmul.py
176
196
  python benchmarks/bench_mlp.py
177
197
  python benchmarks/bench_attention.py
178
198
  python benchmarks/bench_dataloader.py
199
+ python benchmarks/bench_conv.py
200
+ python benchmarks/bench_dropout.py
179
201
  ```
180
202
 
181
203
  ## Documentation
@@ -183,6 +205,11 @@ python benchmarks/bench_dataloader.py
183
205
  - [Tensor API](docs/tensor-api.md)
184
206
  - [Neural network API](docs/nn.md)
185
207
  - [Data utilities](docs/data.md)
208
+ - [Training](docs/training.md)
209
+ - [Modules](docs/modules.md)
210
+ - [Serialization](docs/serialization.md)
211
+ - [Randomness](docs/randomness.md)
212
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
186
213
  - [Backend architecture](docs/backend-architecture.md)
187
214
  - [Autograd design](docs/autograd.md)
188
215
  - [Releasing](docs/releasing.md)
@@ -1,6 +1,6 @@
1
1
  # ModelStudio
2
2
 
3
- ModelStudio is an early-stage AI tensor framework. Version `0.2.0` provides a
3
+ ModelStudio is an early-stage AI tensor framework. Version `0.3.0` provides a
4
4
  CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
5
5
  basic data loading, and small LLM-oriented building blocks.
6
6
 
@@ -30,10 +30,12 @@ python -m pip install -e ".[dev]"
30
30
  | Autograd | Reverse-mode for core CPU ops |
31
31
  | Reductions | `sum`, `mean`, `max` with axis and keepdims; `max` is value-only |
32
32
  | Activations | ReLU, GELU, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
33
- | Losses | MSE and cross entropy |
34
- | Modules | Parameters, buffers, state dicts, save/load |
35
- | Layers | Linear, Embedding, LayerNorm, RMSNorm, TransformerBlock |
36
- | Data | Dataset, TensorDataset, DataLoader |
33
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
34
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
35
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
36
+ | Optimizers | SGD and AdamW with state serialization |
37
+ | Data | Dataset, TensorDataset, DataLoader with deterministic seeded shuffle |
38
+ | Randomness | `manual_seed`, RNG-backed `randn`, dropout, and init helpers |
37
39
  | Compiler | Placeholder IR and passes |
38
40
 
39
41
  ## Backend Status
@@ -132,6 +134,21 @@ y = block(x)
132
134
  print(y.shape)
133
135
  ```
134
136
 
137
+ ## 0.3.0 Training Utilities
138
+
139
+ ```python
140
+ ms.manual_seed(123)
141
+ model = nn.Linear(4, 2)
142
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
143
+ state = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
144
+ ms.save(state, "checkpoint.ms")
145
+ ```
146
+
147
+ New CPU-only helpers include `ms.concat`, `ms.stack`, `Tensor.flatten`,
148
+ `Tensor.squeeze`, `Tensor.unsqueeze`, `nn.init`, `nn.Dropout`,
149
+ `nn.BatchNorm1d`, `nn.Conv1d`, `nn.Conv2d`, `nn.AvgPool2d`, `nn.MaxPool2d`,
150
+ and `nn.utils` gradient clipping.
151
+
135
152
  ## Commands
136
153
 
137
154
  ```bash
@@ -141,10 +158,15 @@ python examples/train_mlp.py
141
158
  python examples/train_classifier.py
142
159
  python examples/tiny_transformer.py
143
160
  python examples/save_load.py
161
+ python examples/train_cnn_toy.py
162
+ python examples/dropout_batchnorm.py
163
+ python examples/checkpoint_training.py
144
164
  python benchmarks/bench_matmul.py
145
165
  python benchmarks/bench_mlp.py
146
166
  python benchmarks/bench_attention.py
147
167
  python benchmarks/bench_dataloader.py
168
+ python benchmarks/bench_conv.py
169
+ python benchmarks/bench_dropout.py
148
170
  ```
149
171
 
150
172
  ## Documentation
@@ -152,6 +174,11 @@ python benchmarks/bench_dataloader.py
152
174
  - [Tensor API](docs/tensor-api.md)
153
175
  - [Neural network API](docs/nn.md)
154
176
  - [Data utilities](docs/data.md)
177
+ - [Training](docs/training.md)
178
+ - [Modules](docs/modules.md)
179
+ - [Serialization](docs/serialization.md)
180
+ - [Randomness](docs/randomness.md)
181
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
155
182
  - [Backend architecture](docs/backend-architecture.md)
156
183
  - [Autograd design](docs/autograd.md)
157
184
  - [Releasing](docs/releasing.md)
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+ from modelstudio import nn
9
+
10
+
11
+ def timeit(fn, iterations: int, warmup: int) -> float:
12
+ for _ in range(warmup):
13
+ fn()
14
+ start = time.perf_counter()
15
+ for _ in range(iterations):
16
+ fn()
17
+ return (time.perf_counter() - start) / iterations
18
+
19
+
20
+ def main() -> None:
21
+ warmup = 3
22
+ iterations = 10
23
+ shape = (8, 3, 16, 16)
24
+ ms.manual_seed(1)
25
+ x = ms.randn(shape)
26
+ conv = nn.Conv2d(3, 8, kernel_size=3, padding=1)
27
+
28
+ print(f"Python: {platform.python_version()}")
29
+ print(f"NumPy: {np.__version__}")
30
+ print(f"ModelStudio: {ms.__version__}")
31
+ print(f"Operation: Conv2d input={shape} out_channels=8 kernel=3 padding=1")
32
+ print(f"Warmup: {warmup}")
33
+ print(f"Iterations: {iterations}")
34
+ print("Backend: CPU only")
35
+ print(f"Conv2d avg: {timeit(lambda: conv(x), iterations, warmup) * 1_000:.3f} ms")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ main()
40
+
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+
9
+
10
+ def timeit(fn, iterations: int, warmup: int) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ start = time.perf_counter()
14
+ for _ in range(iterations):
15
+ fn()
16
+ return (time.perf_counter() - start) / iterations
17
+
18
+
19
+ def main() -> None:
20
+ warmup = 5
21
+ iterations = 50
22
+ shape = (512, 512)
23
+ x = ms.randn(shape)
24
+
25
+ print(f"Python: {platform.python_version()}")
26
+ print(f"NumPy: {np.__version__}")
27
+ print(f"ModelStudio: {ms.__version__}")
28
+ print(f"Operation: dropout shape={shape} p=0.5")
29
+ print(f"Warmup: {warmup}")
30
+ print(f"Iterations: {iterations}")
31
+ print("Backend: CPU only")
32
+ print(f"Dropout avg: {timeit(lambda: ms.dropout(x, p=0.5), iterations, warmup) * 1_000:.3f} ms")
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
37
+
@@ -2,6 +2,8 @@ add_library(modelstudio_native STATIC
2
2
  backends/cpu/cpu_backend.cpp
3
3
  backends/cpu/kernels/add.cpp
4
4
  backends/cpu/kernels/matmul.cpp
5
+ backends/cpu/kernels/mul.cpp
6
+ backends/cpu/kernels/relu.cpp
5
7
  )
6
8
 
7
9
  target_include_directories(modelstudio_native PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
@@ -10,8 +10,16 @@ Tensor CPUBackend::add(const Tensor& lhs, const Tensor& rhs) {
10
10
  return add_kernel(lhs, rhs);
11
11
  }
12
12
 
13
+ Tensor CPUBackend::mul(const Tensor& lhs, const Tensor& rhs) {
14
+ return mul_kernel(lhs, rhs);
15
+ }
16
+
13
17
  Tensor CPUBackend::matmul(const Tensor& lhs, const Tensor& rhs) {
14
18
  return matmul_kernel(lhs, rhs);
15
19
  }
16
20
 
21
+ Tensor CPUBackend::relu(const Tensor& input) {
22
+ return relu_kernel(input);
23
+ }
24
+
17
25
  } // namespace modelstudio::cpu
@@ -5,14 +5,18 @@
5
5
  namespace modelstudio::cpu {
6
6
 
7
7
  Tensor add_kernel(const Tensor& lhs, const Tensor& rhs);
8
+ Tensor mul_kernel(const Tensor& lhs, const Tensor& rhs);
8
9
  Tensor matmul_kernel(const Tensor& lhs, const Tensor& rhs);
10
+ Tensor relu_kernel(const Tensor& input);
9
11
 
10
12
  class CPUBackend final : public Backend {
11
13
  public:
12
14
  std::string_view name() const override { return "cpu"; }
13
15
  Tensor empty(const Shape& shape, DType dtype) override;
14
16
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
17
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
15
18
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
19
+ Tensor relu(const Tensor& input) override;
16
20
  };
17
21
 
18
22
  } // namespace modelstudio::cpu
@@ -0,0 +1,43 @@
1
+ #include "backends/cpu/cpu_backend.hpp"
2
+
3
+ namespace modelstudio::cpu {
4
+ namespace {
5
+
6
+ template <typename T>
7
+ void mul_typed(const Tensor& lhs, const Tensor& rhs, Tensor& out) {
8
+ const auto* lhs_ptr = static_cast<const T*>(lhs.data());
9
+ const auto* rhs_ptr = static_cast<const T*>(rhs.data());
10
+ auto* out_ptr = static_cast<T*>(out.data());
11
+ for (std::int64_t i = 0; i < lhs.numel(); ++i) {
12
+ out_ptr[i] = lhs_ptr[i] * rhs_ptr[i];
13
+ }
14
+ }
15
+
16
+ } // namespace
17
+
18
+ Tensor mul_kernel(const Tensor& lhs, const Tensor& rhs) {
19
+ if (lhs.shape() != rhs.shape() || lhs.dtype() != rhs.dtype()) {
20
+ throw Error("native CPU mul currently requires identical shape and dtype");
21
+ }
22
+ Tensor out(lhs.shape(), lhs.dtype(), lhs.device());
23
+ switch (lhs.dtype()) {
24
+ case DType::Float32:
25
+ mul_typed<float>(lhs, rhs, out);
26
+ break;
27
+ case DType::Float64:
28
+ mul_typed<double>(lhs, rhs, out);
29
+ break;
30
+ case DType::Int32:
31
+ mul_typed<std::int32_t>(lhs, rhs, out);
32
+ break;
33
+ case DType::Int64:
34
+ mul_typed<std::int64_t>(lhs, rhs, out);
35
+ break;
36
+ case DType::Bool:
37
+ mul_typed<bool>(lhs, rhs, out);
38
+ break;
39
+ }
40
+ return out;
41
+ }
42
+
43
+ } // namespace modelstudio::cpu
@@ -0,0 +1,40 @@
1
+ #include "backends/cpu/cpu_backend.hpp"
2
+
3
+ #include <algorithm>
4
+
5
+ namespace modelstudio::cpu {
6
+ namespace {
7
+
8
+ template <typename T>
9
+ void relu_typed(const Tensor& input, Tensor& out) {
10
+ const auto* input_ptr = static_cast<const T*>(input.data());
11
+ auto* out_ptr = static_cast<T*>(out.data());
12
+ for (std::int64_t i = 0; i < input.numel(); ++i) {
13
+ out_ptr[i] = std::max<T>(input_ptr[i], T{});
14
+ }
15
+ }
16
+
17
+ } // namespace
18
+
19
+ Tensor relu_kernel(const Tensor& input) {
20
+ Tensor out(input.shape(), input.dtype(), input.device());
21
+ switch (input.dtype()) {
22
+ case DType::Float32:
23
+ relu_typed<float>(input, out);
24
+ break;
25
+ case DType::Float64:
26
+ relu_typed<double>(input, out);
27
+ break;
28
+ case DType::Int32:
29
+ relu_typed<std::int32_t>(input, out);
30
+ break;
31
+ case DType::Int64:
32
+ relu_typed<std::int64_t>(input, out);
33
+ break;
34
+ case DType::Bool:
35
+ throw Error("native CPU bool relu is not implemented");
36
+ }
37
+ return out;
38
+ }
39
+
40
+ } // namespace modelstudio::cpu
@@ -14,9 +14,19 @@ Tensor CUDABackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("CUDA add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor CUDABackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested CUDA elementwise multiply kernel.
19
+ throw Error("CUDA mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor CUDABackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested CUDA matmul kernel or cuBLAS integration.
19
24
  throw Error("CUDA matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor CUDABackend::relu(const Tensor&) {
28
+ // TODO: Add a tested CUDA ReLU kernel.
29
+ throw Error("CUDA relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::cuda
@@ -9,7 +9,9 @@ class CUDABackend final : public Backend {
9
9
  std::string_view name() const override { return "cuda"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::cuda
@@ -14,9 +14,19 @@ Tensor OneAPIBackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("oneAPI add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor OneAPIBackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested SYCL elementwise multiply kernel.
19
+ throw Error("oneAPI mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor OneAPIBackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested SYCL matmul kernel or oneMKL integration.
19
24
  throw Error("oneAPI matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor OneAPIBackend::relu(const Tensor&) {
28
+ // TODO: Add a tested SYCL ReLU kernel.
29
+ throw Error("oneAPI relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::oneapi
@@ -9,7 +9,9 @@ class OneAPIBackend final : public Backend {
9
9
  std::string_view name() const override { return "oneapi"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::oneapi
@@ -14,9 +14,19 @@ Tensor ROCmBackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("ROCm add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor ROCmBackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested HIP elementwise multiply kernel.
19
+ throw Error("ROCm mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor ROCmBackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested HIP matmul kernel or rocBLAS integration.
19
24
  throw Error("ROCm matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor ROCmBackend::relu(const Tensor&) {
28
+ // TODO: Add a tested HIP ReLU kernel.
29
+ throw Error("ROCm relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::rocm
@@ -9,7 +9,9 @@ class ROCmBackend final : public Backend {
9
9
  std::string_view name() const override { return "rocm"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::rocm
@@ -12,7 +12,9 @@ class Backend {
12
12
  virtual std::string_view name() const = 0;
13
13
  virtual Tensor empty(const Shape& shape, DType dtype) = 0;
14
14
  virtual Tensor add(const Tensor& lhs, const Tensor& rhs) = 0;
15
+ virtual Tensor mul(const Tensor& lhs, const Tensor& rhs) = 0;
15
16
  virtual Tensor matmul(const Tensor& lhs, const Tensor& rhs) = 0;
17
+ virtual Tensor relu(const Tensor& input) = 0;
16
18
  };
17
19
 
18
20
  } // namespace modelstudio
@@ -20,7 +20,7 @@ All tensors must have the same first dimension. Items are returned as tuples.
20
20
  ## DataLoader
21
21
 
22
22
  ```python
23
- loader = data.DataLoader(dataset, batch_size=32, shuffle=True, drop_last=False)
23
+ loader = data.DataLoader(dataset, batch_size=32, shuffle=True, drop_last=False, seed=123)
24
24
  ```
25
25
 
26
26
  The current `DataLoader` is intentionally simple:
@@ -28,5 +28,6 @@ The current `DataLoader` is intentionally simple:
28
28
  - CPU only
29
29
  - no multiprocessing
30
30
  - no workers
31
- - supports tensors and tuples of tensors
31
+ - deterministic shuffle when `seed` is provided
32
+ - supports tensors, tuples/lists, and simple Python numeric values
32
33
  - batches along axis 0
@@ -0,0 +1,24 @@
1
+ # Modules
2
+
3
+ `nn.Module` is the base class for layers and models.
4
+
5
+ Important APIs:
6
+
7
+ - `parameters()` and `named_parameters()`
8
+ - `register_buffer(name, tensor)`
9
+ - `named_buffers()`
10
+ - `children()`, `modules()`, and `named_modules()`
11
+ - `train(mode=True)` and `eval()`
12
+ - `state_dict()` and `load_state_dict(state)`
13
+
14
+ Assigning a `Parameter` registers it as trainable. Assigning another `Module`
15
+ registers it as a child module. Assigning a plain `Tensor` does not make it a
16
+ parameter; persistent non-trainable tensors should use `register_buffer`.
17
+
18
+ Deleting a parameter, child module, or buffer attribute removes it from the
19
+ corresponding registry.
20
+
21
+ Available CPU layers include Linear, Embedding, LayerNorm, RMSNorm,
22
+ BatchNorm1d, Dropout, Conv1d, Conv2d, AvgPool2d, MaxPool2d, and
23
+ TransformerBlock.
24
+
@@ -0,0 +1,26 @@
1
+ # Native Backend Roadmap
2
+
3
+ The Python runtime still uses NumPy CPU kernels in 0.3.0. The native C++ tree is
4
+ scaffolding for future backend work and is intentionally not wired into Python
5
+ dispatch yet.
6
+
7
+ Current native CPU preparation includes interfaces for:
8
+
9
+ - `add`
10
+ - `mul`
11
+ - `matmul`
12
+ - `relu`
13
+
14
+ The intended path is:
15
+
16
+ 1. Expand native tensor storage and shape/stride metadata.
17
+ 2. Add tested C++ CPU kernels with broadcasting and dtype coverage.
18
+ 3. Bind native CPU kernels through the Python extension.
19
+ 4. Switch the Python CPU backend behind the dispatcher without changing public
20
+ tensor APIs.
21
+ 5. Add real CUDA, ROCm, and oneAPI packages only when kernels compile and run
22
+ under hardware-backed CI.
23
+
24
+ CUDA, ROCm, and oneAPI remain unavailable placeholders. Requesting those device
25
+ types raises `ModelStudioBackendUnavailable`.
26
+
@@ -17,6 +17,9 @@ model = nn.Linear(4, 2)
17
17
  - `named_parameters()`
18
18
  - `register_buffer()`
19
19
  - `named_buffers()`
20
+ - `children()`
21
+ - `modules()`
22
+ - `named_modules()`
20
23
  - `state_dict()`
21
24
  - `load_state_dict()`
22
25
  - `train()` / `eval()`
@@ -33,14 +36,28 @@ Current CPU layers include:
33
36
  - `Embedding`
34
37
  - `LayerNorm`
35
38
  - `RMSNorm`
39
+ - `BatchNorm1d`
40
+ - `Dropout`
41
+ - `Conv1d`
42
+ - `Conv2d`
43
+ - `AvgPool2d`
44
+ - `MaxPool2d`
36
45
  - `TransformerBlock`
37
46
  - `ReLU`
38
47
  - `GELU`
39
48
  - `MSELoss`
40
49
  - `CrossEntropyLoss`
41
50
 
42
- `TransformerBlock` is intentionally minimal and CPU-only. Nonzero dropout raises
43
- a clear error because dropout is not implemented yet.
51
+ `TransformerBlock` is intentionally minimal and CPU-only. Dropout is supported
52
+ through `nn.Dropout` and respects `train()` / `eval()` mode.
53
+
54
+ ## Initialization and Gradient Clipping
55
+
56
+ `nn.init` provides in-place seeded initialization helpers such as
57
+ `xavier_uniform_` and `kaiming_uniform_`.
58
+
59
+ `nn.utils.clip_grad_norm_` and `nn.utils.clip_grad_value_` mutate gradients
60
+ in-place and ignore parameters whose gradients are absent.
44
61
 
45
62
  ## Save and Load
46
63
 
@@ -0,0 +1,23 @@
1
+ # Randomness
2
+
3
+ ModelStudio uses a process-local NumPy `Generator` for framework randomness.
4
+
5
+ ```python
6
+ import modelstudio as ms
7
+
8
+ ms.manual_seed(123)
9
+ x = ms.randn((2, 3))
10
+ y = ms.dropout(ms.ones((2, 3)), p=0.5)
11
+ ```
12
+
13
+ `manual_seed(seed)` resets the ModelStudio RNG and returns the normalized seed.
14
+ It controls:
15
+
16
+ - `ms.randn`
17
+ - dropout masks
18
+ - seeded initialization helpers in `nn.init`
19
+ - DataLoader shuffling when no explicit DataLoader seed is provided
20
+
21
+ `DataLoader(..., seed=123)` uses its own deterministic shuffle sequence for
22
+ each iteration over that loader.
23
+
@@ -0,0 +1,25 @@
1
+ # Serialization
2
+
3
+ `ms.save(obj, path)` and `ms.load(path)` serialize nested Python containers that
4
+ contain ModelStudio tensors. This is suitable for model and optimizer
5
+ checkpoints in the CPU MVP.
6
+
7
+ ```python
8
+ checkpoint = {
9
+ "model": model.state_dict(),
10
+ "optimizer": optimizer.state_dict(),
11
+ }
12
+ ms.save(checkpoint, "checkpoint.ms")
13
+
14
+ loaded = ms.load("checkpoint.ms")
15
+ model.load_state_dict(loaded["model"])
16
+ optimizer.load_state_dict(loaded["optimizer"])
17
+ ```
18
+
19
+ Optimizer state dictionaries include hyperparameters and parameter-count
20
+ metadata. AdamW also stores step counts and first/second moment buffers.
21
+ Loading validates parameter count and moment shapes.
22
+
23
+ The format is currently a Python pickle-based internal format, not a stable
24
+ cross-language exchange format.
25
+
@@ -30,11 +30,14 @@ ms.arange(10)
30
30
  ## Operations
31
31
 
32
32
  ModelStudio supports CPU arithmetic, matrix multiplication, reductions, reshape,
33
- transpose, indexing, and common neural-network activations.
33
+ transpose, indexing, concat/stack, flatten/squeeze/unsqueeze, and common
34
+ neural-network activations.
34
35
 
35
36
  ```python
36
37
  y = x.sum(axis=-1, keepdims=True)
37
38
  z = x.softmax(axis=-1)
39
+ flat = x.flatten(start_dim=1)
40
+ joined = ms.concat([flat, flat], axis=-1)
38
41
  ```
39
42
 
40
43
  Autograd is implemented for the core CPU ops used by the neural-network API.