modelstudio 0.2.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. {modelstudio-0.2.0/python/modelstudio.egg-info → modelstudio-0.4.0}/PKG-INFO +76 -7
  2. {modelstudio-0.2.0 → modelstudio-0.4.0}/README.md +75 -6
  3. modelstudio-0.4.0/benchmarks/bench_conv.py +40 -0
  4. modelstudio-0.4.0/benchmarks/bench_creation.py +38 -0
  5. modelstudio-0.4.0/benchmarks/bench_dropout.py +37 -0
  6. modelstudio-0.4.0/benchmarks/bench_manipulation.py +38 -0
  7. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/CMakeLists.txt +2 -0
  8. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cpu/cpu_backend.cpp +8 -0
  9. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cpu/cpu_backend.hpp +4 -0
  10. modelstudio-0.4.0/csrc/backends/cpu/kernels/mul.cpp +43 -0
  11. modelstudio-0.4.0/csrc/backends/cpu/kernels/relu.cpp +40 -0
  12. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cuda/cuda_backend.cu +10 -0
  13. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cuda/cuda_backend.hpp +2 -0
  14. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/oneapi/oneapi_backend.cpp +10 -0
  15. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/oneapi/oneapi_backend.hpp +2 -0
  16. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/rocm/rocm_backend.cpp +10 -0
  17. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/rocm/rocm_backend.hpp +2 -0
  18. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/dispatcher/backend.hpp +2 -0
  19. modelstudio-0.4.0/docs/checkpointing.md +30 -0
  20. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/data.md +12 -2
  21. modelstudio-0.4.0/docs/metrics.md +11 -0
  22. modelstudio-0.4.0/docs/modules.md +24 -0
  23. modelstudio-0.4.0/docs/native-backend-roadmap.md +25 -0
  24. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/nn.md +22 -2
  25. modelstudio-0.4.0/docs/numpy-interop.md +22 -0
  26. modelstudio-0.4.0/docs/optimizers.md +28 -0
  27. modelstudio-0.4.0/docs/randomness.md +23 -0
  28. modelstudio-0.4.0/docs/serialization.md +25 -0
  29. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/tensor-api.md +9 -1
  30. modelstudio-0.4.0/docs/tensor-creation.md +23 -0
  31. modelstudio-0.4.0/docs/tensor-manipulation.md +25 -0
  32. modelstudio-0.4.0/docs/training.md +28 -0
  33. modelstudio-0.4.0/examples/checkpoint_resume.py +43 -0
  34. modelstudio-0.4.0/examples/checkpoint_training.py +39 -0
  35. modelstudio-0.4.0/examples/dropout_batchnorm.py +35 -0
  36. modelstudio-0.4.0/examples/metrics_demo.py +20 -0
  37. modelstudio-0.4.0/examples/numpy_interop.py +21 -0
  38. modelstudio-0.4.0/examples/scheduler_training.py +27 -0
  39. modelstudio-0.4.0/examples/train_cnn_toy.py +40 -0
  40. {modelstudio-0.2.0 → modelstudio-0.4.0}/pyproject.toml +1 -1
  41. modelstudio-0.4.0/python/modelstudio/__init__.py +148 -0
  42. modelstudio-0.4.0/python/modelstudio/_version.py +1 -0
  43. modelstudio-0.4.0/python/modelstudio/data/__init__.py +4 -0
  44. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/data/dataloader.py +20 -2
  45. modelstudio-0.4.0/python/modelstudio/data/dataset.py +59 -0
  46. modelstudio-0.4.0/python/modelstudio/interop.py +41 -0
  47. modelstudio-0.4.0/python/modelstudio/metrics/__init__.py +4 -0
  48. modelstudio-0.4.0/python/modelstudio/metrics/classification.py +19 -0
  49. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/__init__.py +16 -2
  50. modelstudio-0.4.0/python/modelstudio/nn/activations.py +57 -0
  51. modelstudio-0.4.0/python/modelstudio/nn/convolution.py +213 -0
  52. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/embedding.py +5 -3
  53. modelstudio-0.4.0/python/modelstudio/nn/init.py +57 -0
  54. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/linear.py +8 -5
  55. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/losses.py +31 -7
  56. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/module.py +114 -45
  57. modelstudio-0.4.0/python/modelstudio/nn/normalization.py +93 -0
  58. modelstudio-0.4.0/python/modelstudio/nn/pooling.py +135 -0
  59. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/transformer.py +4 -4
  60. modelstudio-0.4.0/python/modelstudio/nn/utils.py +33 -0
  61. modelstudio-0.4.0/python/modelstudio/ops/__init__.py +110 -0
  62. modelstudio-0.4.0/python/modelstudio/ops/creation.py +251 -0
  63. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/ops/math.py +96 -1
  64. modelstudio-0.4.0/python/modelstudio/ops/movement.py +354 -0
  65. modelstudio-0.4.0/python/modelstudio/optim/__init__.py +7 -0
  66. modelstudio-0.4.0/python/modelstudio/optim/adamw.py +135 -0
  67. modelstudio-0.4.0/python/modelstudio/optim/lr_scheduler.py +114 -0
  68. modelstudio-0.4.0/python/modelstudio/optim/optimizer.py +98 -0
  69. modelstudio-0.4.0/python/modelstudio/optim/sgd.py +51 -0
  70. modelstudio-0.4.0/python/modelstudio/random.py +20 -0
  71. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/runtime/backend.py +2 -1
  72. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/serialization.py +33 -0
  73. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/tensor.py +87 -1
  74. {modelstudio-0.2.0 → modelstudio-0.4.0/python/modelstudio.egg-info}/PKG-INFO +76 -7
  75. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio.egg-info/SOURCES.txt +57 -0
  76. modelstudio-0.4.0/scripts/smoke_test.py +74 -0
  77. modelstudio-0.4.0/tests/test_activations_more.py +42 -0
  78. modelstudio-0.4.0/tests/test_batchnorm.py +50 -0
  79. modelstudio-0.4.0/tests/test_checkpoint_helpers.py +37 -0
  80. modelstudio-0.4.0/tests/test_concat_stack.py +51 -0
  81. modelstudio-0.4.0/tests/test_conv.py +57 -0
  82. modelstudio-0.4.0/tests/test_creation_more.py +52 -0
  83. modelstudio-0.4.0/tests/test_data_split.py +32 -0
  84. modelstudio-0.4.0/tests/test_dataloader_seed.py +44 -0
  85. modelstudio-0.4.0/tests/test_dropout.py +60 -0
  86. modelstudio-0.4.0/tests/test_dtype_conversion.py +32 -0
  87. modelstudio-0.4.0/tests/test_grad_clip.py +24 -0
  88. modelstudio-0.4.0/tests/test_indexing_assignment.py +45 -0
  89. modelstudio-0.4.0/tests/test_init.py +43 -0
  90. modelstudio-0.4.0/tests/test_loss_reductions.py +34 -0
  91. modelstudio-0.4.0/tests/test_lr_scheduler.py +42 -0
  92. modelstudio-0.4.0/tests/test_manipulation_ops.py +64 -0
  93. modelstudio-0.4.0/tests/test_metrics.py +18 -0
  94. modelstudio-0.4.0/tests/test_module_ergonomics.py +85 -0
  95. modelstudio-0.4.0/tests/test_numpy_interop.py +51 -0
  96. modelstudio-0.4.0/tests/test_optimizer_param_groups.py +48 -0
  97. modelstudio-0.4.0/tests/test_optimizer_state.py +89 -0
  98. modelstudio-0.4.0/tests/test_pooling.py +31 -0
  99. modelstudio-0.4.0/tests/test_random.py +23 -0
  100. modelstudio-0.4.0/tests/test_shape_ops.py +33 -0
  101. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_transformer.py +10 -5
  102. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_version.py +1 -1
  103. modelstudio-0.2.0/python/modelstudio/__init__.py +0 -66
  104. modelstudio-0.2.0/python/modelstudio/_version.py +0 -1
  105. modelstudio-0.2.0/python/modelstudio/data/__init__.py +0 -4
  106. modelstudio-0.2.0/python/modelstudio/data/dataset.py +0 -30
  107. modelstudio-0.2.0/python/modelstudio/nn/activations.py +0 -14
  108. modelstudio-0.2.0/python/modelstudio/nn/normalization.py +0 -41
  109. modelstudio-0.2.0/python/modelstudio/ops/__init__.py +0 -49
  110. modelstudio-0.2.0/python/modelstudio/ops/creation.py +0 -90
  111. modelstudio-0.2.0/python/modelstudio/ops/movement.py +0 -91
  112. modelstudio-0.2.0/python/modelstudio/optim/__init__.py +0 -5
  113. modelstudio-0.2.0/python/modelstudio/optim/adamw.py +0 -55
  114. modelstudio-0.2.0/python/modelstudio/optim/optimizer.py +0 -21
  115. modelstudio-0.2.0/python/modelstudio/optim/sgd.py +0 -21
  116. modelstudio-0.2.0/scripts/smoke_test.py +0 -39
  117. {modelstudio-0.2.0 → modelstudio-0.4.0}/CMakeLists.txt +0 -0
  118. {modelstudio-0.2.0 → modelstudio-0.4.0}/LICENSE +0 -0
  119. {modelstudio-0.2.0 → modelstudio-0.4.0}/MANIFEST.in +0 -0
  120. {modelstudio-0.2.0 → modelstudio-0.4.0}/benchmarks/bench_attention.py +0 -0
  121. {modelstudio-0.2.0 → modelstudio-0.4.0}/benchmarks/bench_dataloader.py +0 -0
  122. {modelstudio-0.2.0 → modelstudio-0.4.0}/benchmarks/bench_matmul.py +0 -0
  123. {modelstudio-0.2.0 → modelstudio-0.4.0}/benchmarks/bench_mlp.py +0 -0
  124. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cpu/kernels/add.cpp +0 -0
  125. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cpu/kernels/matmul.cpp +0 -0
  126. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cuda/README.md +0 -0
  127. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/cuda/cuda_memory.hpp +0 -0
  128. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/oneapi/README.md +0 -0
  129. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/oneapi/sycl_memory.hpp +0 -0
  130. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/rocm/README.md +0 -0
  131. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/backends/rocm/hip_memory.hpp +0 -0
  132. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/bindings/python_bindings.cpp +0 -0
  133. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/device.hpp +0 -0
  134. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/dtype.hpp +0 -0
  135. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/error.hpp +0 -0
  136. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/shape.hpp +0 -0
  137. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/storage.hpp +0 -0
  138. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/core/tensor.hpp +0 -0
  139. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/dispatcher/dispatcher.hpp +0 -0
  140. {modelstudio-0.2.0 → modelstudio-0.4.0}/csrc/dispatcher/operator_registry.hpp +0 -0
  141. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/autograd.md +0 -0
  142. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/backend-architecture.md +0 -0
  143. {modelstudio-0.2.0 → modelstudio-0.4.0}/docs/releasing.md +0 -0
  144. {modelstudio-0.2.0 → modelstudio-0.4.0}/examples/save_load.py +0 -0
  145. {modelstudio-0.2.0 → modelstudio-0.4.0}/examples/tiny_transformer.py +0 -0
  146. {modelstudio-0.2.0 → modelstudio-0.4.0}/examples/train_classifier.py +0 -0
  147. {modelstudio-0.2.0 → modelstudio-0.4.0}/examples/train_mlp.py +0 -0
  148. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/autograd/__init__.py +0 -0
  149. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/autograd/engine.py +0 -0
  150. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/autograd/function.py +0 -0
  151. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/autograd/grad_mode.py +0 -0
  152. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/compile/__init__.py +0 -0
  153. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/compile/graph_capture.py +0 -0
  154. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/compile/ir.py +0 -0
  155. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/compile/passes.py +0 -0
  156. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/device.py +0 -0
  157. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/dtypes.py +0 -0
  158. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/errors.py +0 -0
  159. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/nn/parameter.py +0 -0
  160. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/ops/linalg.py +0 -0
  161. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/ops/reductions.py +0 -0
  162. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/py.typed +0 -0
  163. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/runtime/__init__.py +0 -0
  164. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/runtime/dispatcher.py +0 -0
  165. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/storage.py +0 -0
  166. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/testing/__init__.py +0 -0
  167. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio/testing/gradcheck.py +0 -0
  168. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio.egg-info/dependency_links.txt +0 -0
  169. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio.egg-info/requires.txt +0 -0
  170. {modelstudio-0.2.0 → modelstudio-0.4.0}/python/modelstudio.egg-info/top_level.txt +0 -0
  171. {modelstudio-0.2.0 → modelstudio-0.4.0}/setup.cfg +0 -0
  172. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_attention.py +0 -0
  173. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_autograd.py +0 -0
  174. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_buffers.py +0 -0
  175. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_data.py +0 -0
  176. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_dispatcher.py +0 -0
  177. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_embedding.py +0 -0
  178. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_gradcheck.py +0 -0
  179. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_indexing.py +0 -0
  180. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_losses.py +0 -0
  181. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_nn.py +0 -0
  182. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_norms.py +0 -0
  183. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_ops.py +0 -0
  184. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_optim.py +0 -0
  185. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_reductions_axis.py +0 -0
  186. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_serialization.py +0 -0
  187. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_state_dict.py +0 -0
  188. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_tensor.py +0 -0
  189. {modelstudio-0.2.0 → modelstudio-0.4.0}/tests/test_unary_ops.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelstudio
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
5
5
  Author: ModelStudio Contributors
6
6
  License-Expression: MIT
@@ -31,7 +31,7 @@ Dynamic: license-file
31
31
 
32
32
  # ModelStudio
33
33
 
34
- ModelStudio is an early-stage AI tensor framework. Version `0.2.0` provides a
34
+ ModelStudio is an early-stage AI tensor framework. Version `0.4.0` provides a
35
35
  CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
36
36
  basic data loading, and small LLM-oriented building blocks.
37
37
 
@@ -60,11 +60,15 @@ python -m pip install -e ".[dev]"
60
60
  | CPU tensors | Working MVP |
61
61
  | Autograd | Reverse-mode for core CPU ops |
62
62
  | Reductions | `sum`, `mean`, `max` with axis and keepdims; `max` is value-only |
63
- | Activations | ReLU, GELU, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
64
- | Losses | MSE and cross entropy |
65
- | Modules | Parameters, buffers, state dicts, save/load |
66
- | Layers | Linear, Embedding, LayerNorm, RMSNorm, TransformerBlock |
67
- | Data | Dataset, TensorDataset, DataLoader |
63
+ | Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
64
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
65
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
66
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
67
+ | Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
68
+ | Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
69
+ | Randomness | `manual_seed`, RNG-backed `randn`, dropout, and init helpers |
70
+ | Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
71
+ | Metrics | accuracy and top-k accuracy |
68
72
  | Compiler | Placeholder IR and passes |
69
73
 
70
74
  ## Backend Status
@@ -163,6 +167,49 @@ y = block(x)
163
167
  print(y.shape)
164
168
  ```
165
169
 
170
+ ## 0.4.0 Training Utilities
171
+
172
+ ```python
173
+ ms.manual_seed(123)
174
+ model = nn.Linear(4, 2)
175
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
176
+ state = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
177
+ ms.save(state, "checkpoint.ms")
178
+ ```
179
+
180
+ New CPU-only helpers include `ms.concat`, `ms.stack`, `Tensor.flatten`,
181
+ `Tensor.squeeze`, `Tensor.unsqueeze`, `nn.init`, `nn.Dropout`,
182
+ `nn.BatchNorm1d`, `nn.Conv1d`, `nn.Conv2d`, `nn.AvgPool2d`, `nn.MaxPool2d`,
183
+ and `nn.utils` gradient clipping.
184
+
185
+ ## NumPy Interop
186
+
187
+ ```python
188
+ x = ms.asarray([[1, 2, 3], [4, 5, 6]], dtype=ms.float32)
189
+ arr = ms.to_numpy(x)
190
+ y = ms.from_numpy(arr)
191
+ ```
192
+
193
+ CPU uses NumPy internally. Normal examples prefer ModelStudio APIs; `ms.numpy`
194
+ is exposed for advanced users who explicitly want NumPy access.
195
+
196
+ ## Schedulers and Metrics
197
+
198
+ ```python
199
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
200
+ scheduler = ms.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
201
+ scheduler.step()
202
+
203
+ acc = ms.metrics.accuracy(logits, targets)
204
+ ```
205
+
206
+ ## Checkpointing
207
+
208
+ ```python
209
+ ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, scheduler=scheduler, extra={"epoch": 1})
210
+ checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, scheduler=scheduler)
211
+ ```
212
+
166
213
  ## Commands
167
214
 
168
215
  ```bash
@@ -172,10 +219,21 @@ python examples/train_mlp.py
172
219
  python examples/train_classifier.py
173
220
  python examples/tiny_transformer.py
174
221
  python examples/save_load.py
222
+ python examples/train_cnn_toy.py
223
+ python examples/dropout_batchnorm.py
224
+ python examples/checkpoint_training.py
225
+ python examples/numpy_interop.py
226
+ python examples/scheduler_training.py
227
+ python examples/checkpoint_resume.py
228
+ python examples/metrics_demo.py
175
229
  python benchmarks/bench_matmul.py
176
230
  python benchmarks/bench_mlp.py
177
231
  python benchmarks/bench_attention.py
178
232
  python benchmarks/bench_dataloader.py
233
+ python benchmarks/bench_conv.py
234
+ python benchmarks/bench_dropout.py
235
+ python benchmarks/bench_creation.py
236
+ python benchmarks/bench_manipulation.py
179
237
  ```
180
238
 
181
239
  ## Documentation
@@ -183,6 +241,17 @@ python benchmarks/bench_dataloader.py
183
241
  - [Tensor API](docs/tensor-api.md)
184
242
  - [Neural network API](docs/nn.md)
185
243
  - [Data utilities](docs/data.md)
244
+ - [Training](docs/training.md)
245
+ - [Modules](docs/modules.md)
246
+ - [Serialization](docs/serialization.md)
247
+ - [Randomness](docs/randomness.md)
248
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
249
+ - [NumPy interop](docs/numpy-interop.md)
250
+ - [Tensor creation](docs/tensor-creation.md)
251
+ - [Tensor manipulation](docs/tensor-manipulation.md)
252
+ - [Optimizers](docs/optimizers.md)
253
+ - [Checkpointing](docs/checkpointing.md)
254
+ - [Metrics](docs/metrics.md)
186
255
  - [Backend architecture](docs/backend-architecture.md)
187
256
  - [Autograd design](docs/autograd.md)
188
257
  - [Releasing](docs/releasing.md)
@@ -1,6 +1,6 @@
1
1
  # ModelStudio
2
2
 
3
- ModelStudio is an early-stage AI tensor framework. Version `0.2.0` provides a
3
+ ModelStudio is an early-stage AI tensor framework. Version `0.4.0` provides a
4
4
  CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
5
5
  basic data loading, and small LLM-oriented building blocks.
6
6
 
@@ -29,11 +29,15 @@ python -m pip install -e ".[dev]"
29
29
  | CPU tensors | Working MVP |
30
30
  | Autograd | Reverse-mode for core CPU ops |
31
31
  | Reductions | `sum`, `mean`, `max` with axis and keepdims; `max` is value-only |
32
- | Activations | ReLU, GELU, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
33
- | Losses | MSE and cross entropy |
34
- | Modules | Parameters, buffers, state dicts, save/load |
35
- | Layers | Linear, Embedding, LayerNorm, RMSNorm, TransformerBlock |
36
- | Data | Dataset, TensorDataset, DataLoader |
32
+ | Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
33
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
34
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
35
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
36
+ | Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
37
+ | Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
38
+ | Randomness | `manual_seed`, RNG-backed `randn`, dropout, and init helpers |
39
+ | Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
40
+ | Metrics | accuracy and top-k accuracy |
37
41
  | Compiler | Placeholder IR and passes |
38
42
 
39
43
  ## Backend Status
@@ -132,6 +136,49 @@ y = block(x)
132
136
  print(y.shape)
133
137
  ```
134
138
 
139
+ ## 0.4.0 Training Utilities
140
+
141
+ ```python
142
+ ms.manual_seed(123)
143
+ model = nn.Linear(4, 2)
144
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
145
+ state = {"model": model.state_dict(), "optimizer": optimizer.state_dict()}
146
+ ms.save(state, "checkpoint.ms")
147
+ ```
148
+
149
+ New CPU-only helpers include `ms.concat`, `ms.stack`, `Tensor.flatten`,
150
+ `Tensor.squeeze`, `Tensor.unsqueeze`, `nn.init`, `nn.Dropout`,
151
+ `nn.BatchNorm1d`, `nn.Conv1d`, `nn.Conv2d`, `nn.AvgPool2d`, `nn.MaxPool2d`,
152
+ and `nn.utils` gradient clipping.
153
+
154
+ ## NumPy Interop
155
+
156
+ ```python
157
+ x = ms.asarray([[1, 2, 3], [4, 5, 6]], dtype=ms.float32)
158
+ arr = ms.to_numpy(x)
159
+ y = ms.from_numpy(arr)
160
+ ```
161
+
162
+ CPU uses NumPy internally. Normal examples prefer ModelStudio APIs; `ms.numpy`
163
+ is exposed for advanced users who explicitly want NumPy access.
164
+
165
+ ## Schedulers and Metrics
166
+
167
+ ```python
168
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
169
+ scheduler = ms.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.5)
170
+ scheduler.step()
171
+
172
+ acc = ms.metrics.accuracy(logits, targets)
173
+ ```
174
+
175
+ ## Checkpointing
176
+
177
+ ```python
178
+ ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, scheduler=scheduler, extra={"epoch": 1})
179
+ checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, scheduler=scheduler)
180
+ ```
181
+
135
182
  ## Commands
136
183
 
137
184
  ```bash
@@ -141,10 +188,21 @@ python examples/train_mlp.py
141
188
  python examples/train_classifier.py
142
189
  python examples/tiny_transformer.py
143
190
  python examples/save_load.py
191
+ python examples/train_cnn_toy.py
192
+ python examples/dropout_batchnorm.py
193
+ python examples/checkpoint_training.py
194
+ python examples/numpy_interop.py
195
+ python examples/scheduler_training.py
196
+ python examples/checkpoint_resume.py
197
+ python examples/metrics_demo.py
144
198
  python benchmarks/bench_matmul.py
145
199
  python benchmarks/bench_mlp.py
146
200
  python benchmarks/bench_attention.py
147
201
  python benchmarks/bench_dataloader.py
202
+ python benchmarks/bench_conv.py
203
+ python benchmarks/bench_dropout.py
204
+ python benchmarks/bench_creation.py
205
+ python benchmarks/bench_manipulation.py
148
206
  ```
149
207
 
150
208
  ## Documentation
@@ -152,6 +210,17 @@ python benchmarks/bench_dataloader.py
152
210
  - [Tensor API](docs/tensor-api.md)
153
211
  - [Neural network API](docs/nn.md)
154
212
  - [Data utilities](docs/data.md)
213
+ - [Training](docs/training.md)
214
+ - [Modules](docs/modules.md)
215
+ - [Serialization](docs/serialization.md)
216
+ - [Randomness](docs/randomness.md)
217
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
218
+ - [NumPy interop](docs/numpy-interop.md)
219
+ - [Tensor creation](docs/tensor-creation.md)
220
+ - [Tensor manipulation](docs/tensor-manipulation.md)
221
+ - [Optimizers](docs/optimizers.md)
222
+ - [Checkpointing](docs/checkpointing.md)
223
+ - [Metrics](docs/metrics.md)
155
224
  - [Backend architecture](docs/backend-architecture.md)
156
225
  - [Autograd design](docs/autograd.md)
157
226
  - [Releasing](docs/releasing.md)
@@ -0,0 +1,40 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+ from modelstudio import nn
9
+
10
+
11
+ def timeit(fn, iterations: int, warmup: int) -> float:
12
+ for _ in range(warmup):
13
+ fn()
14
+ start = time.perf_counter()
15
+ for _ in range(iterations):
16
+ fn()
17
+ return (time.perf_counter() - start) / iterations
18
+
19
+
20
+ def main() -> None:
21
+ warmup = 3
22
+ iterations = 10
23
+ shape = (8, 3, 16, 16)
24
+ ms.manual_seed(1)
25
+ x = ms.randn(shape)
26
+ conv = nn.Conv2d(3, 8, kernel_size=3, padding=1)
27
+
28
+ print(f"Python: {platform.python_version()}")
29
+ print(f"NumPy: {np.__version__}")
30
+ print(f"ModelStudio: {ms.__version__}")
31
+ print(f"Operation: Conv2d input={shape} out_channels=8 kernel=3 padding=1")
32
+ print(f"Warmup: {warmup}")
33
+ print(f"Iterations: {iterations}")
34
+ print("Backend: CPU only")
35
+ print(f"Conv2d avg: {timeit(lambda: conv(x), iterations, warmup) * 1_000:.3f} ms")
36
+
37
+
38
+ if __name__ == "__main__":
39
+ main()
40
+
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+
9
+
10
+ def timeit(fn, iterations: int, warmup: int) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ start = time.perf_counter()
14
+ for _ in range(iterations):
15
+ fn()
16
+ return (time.perf_counter() - start) / iterations
17
+
18
+
19
+ def main() -> None:
20
+ warmup = 5
21
+ iterations = 50
22
+ shape = (512, 512)
23
+
24
+ print(f"Python: {platform.python_version()}")
25
+ print(f"NumPy: {np.__version__}")
26
+ print(f"ModelStudio: {ms.__version__}")
27
+ print(f"Operation: rand/full/eye shape={shape}")
28
+ print(f"Warmup: {warmup}")
29
+ print(f"Iterations: {iterations}")
30
+ print("Backend: CPU only")
31
+ print(f"rand avg: {timeit(lambda: ms.rand(shape), iterations, warmup) * 1_000:.3f} ms")
32
+ print(f"full avg: {timeit(lambda: ms.full(shape, 1.25), iterations, warmup) * 1_000:.3f} ms")
33
+ print(f"eye avg: {timeit(lambda: ms.eye(512), iterations, warmup) * 1_000:.3f} ms")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()
38
+
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+
9
+
10
+ def timeit(fn, iterations: int, warmup: int) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ start = time.perf_counter()
14
+ for _ in range(iterations):
15
+ fn()
16
+ return (time.perf_counter() - start) / iterations
17
+
18
+
19
+ def main() -> None:
20
+ warmup = 5
21
+ iterations = 50
22
+ shape = (512, 512)
23
+ x = ms.randn(shape)
24
+
25
+ print(f"Python: {platform.python_version()}")
26
+ print(f"NumPy: {np.__version__}")
27
+ print(f"ModelStudio: {ms.__version__}")
28
+ print(f"Operation: dropout shape={shape} p=0.5")
29
+ print(f"Warmup: {warmup}")
30
+ print(f"Iterations: {iterations}")
31
+ print("Backend: CPU only")
32
+ print(f"Dropout avg: {timeit(lambda: ms.dropout(x, p=0.5), iterations, warmup) * 1_000:.3f} ms")
33
+
34
+
35
+ if __name__ == "__main__":
36
+ main()
37
+
@@ -0,0 +1,38 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+ import numpy as np
8
+
9
+
10
+ def timeit(fn, iterations: int, warmup: int) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ start = time.perf_counter()
14
+ for _ in range(iterations):
15
+ fn()
16
+ return (time.perf_counter() - start) / iterations
17
+
18
+
19
+ def main() -> None:
20
+ warmup = 5
21
+ iterations = 50
22
+ x = ms.randn((64, 64, 8))
23
+
24
+ print(f"Python: {platform.python_version()}")
25
+ print(f"NumPy: {np.__version__}")
26
+ print(f"ModelStudio: {ms.__version__}")
27
+ print("Operation: permute/repeat/tile")
28
+ print(f"Warmup: {warmup}")
29
+ print(f"Iterations: {iterations}")
30
+ print("Backend: CPU only")
31
+ print(f"permute avg: {timeit(lambda: ms.permute(x, (2, 0, 1)), iterations, warmup) * 1_000:.3f} ms")
32
+ print(f"repeat avg: {timeit(lambda: ms.repeat(x, 2, axis=0), iterations, warmup) * 1_000:.3f} ms")
33
+ print(f"tile avg: {timeit(lambda: ms.tile(x, (2, 1, 1)), iterations, warmup) * 1_000:.3f} ms")
34
+
35
+
36
+ if __name__ == "__main__":
37
+ main()
38
+
@@ -2,6 +2,8 @@ add_library(modelstudio_native STATIC
2
2
  backends/cpu/cpu_backend.cpp
3
3
  backends/cpu/kernels/add.cpp
4
4
  backends/cpu/kernels/matmul.cpp
5
+ backends/cpu/kernels/mul.cpp
6
+ backends/cpu/kernels/relu.cpp
5
7
  )
6
8
 
7
9
  target_include_directories(modelstudio_native PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
@@ -10,8 +10,16 @@ Tensor CPUBackend::add(const Tensor& lhs, const Tensor& rhs) {
10
10
  return add_kernel(lhs, rhs);
11
11
  }
12
12
 
13
+ Tensor CPUBackend::mul(const Tensor& lhs, const Tensor& rhs) {
14
+ return mul_kernel(lhs, rhs);
15
+ }
16
+
13
17
  Tensor CPUBackend::matmul(const Tensor& lhs, const Tensor& rhs) {
14
18
  return matmul_kernel(lhs, rhs);
15
19
  }
16
20
 
21
+ Tensor CPUBackend::relu(const Tensor& input) {
22
+ return relu_kernel(input);
23
+ }
24
+
17
25
  } // namespace modelstudio::cpu
@@ -5,14 +5,18 @@
5
5
  namespace modelstudio::cpu {
6
6
 
7
7
  Tensor add_kernel(const Tensor& lhs, const Tensor& rhs);
8
+ Tensor mul_kernel(const Tensor& lhs, const Tensor& rhs);
8
9
  Tensor matmul_kernel(const Tensor& lhs, const Tensor& rhs);
10
+ Tensor relu_kernel(const Tensor& input);
9
11
 
10
12
  class CPUBackend final : public Backend {
11
13
  public:
12
14
  std::string_view name() const override { return "cpu"; }
13
15
  Tensor empty(const Shape& shape, DType dtype) override;
14
16
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
17
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
15
18
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
19
+ Tensor relu(const Tensor& input) override;
16
20
  };
17
21
 
18
22
  } // namespace modelstudio::cpu
@@ -0,0 +1,43 @@
1
+ #include "backends/cpu/cpu_backend.hpp"
2
+
3
+ namespace modelstudio::cpu {
4
+ namespace {
5
+
6
+ template <typename T>
7
+ void mul_typed(const Tensor& lhs, const Tensor& rhs, Tensor& out) {
8
+ const auto* lhs_ptr = static_cast<const T*>(lhs.data());
9
+ const auto* rhs_ptr = static_cast<const T*>(rhs.data());
10
+ auto* out_ptr = static_cast<T*>(out.data());
11
+ for (std::int64_t i = 0; i < lhs.numel(); ++i) {
12
+ out_ptr[i] = lhs_ptr[i] * rhs_ptr[i];
13
+ }
14
+ }
15
+
16
+ } // namespace
17
+
18
+ Tensor mul_kernel(const Tensor& lhs, const Tensor& rhs) {
19
+ if (lhs.shape() != rhs.shape() || lhs.dtype() != rhs.dtype()) {
20
+ throw Error("native CPU mul currently requires identical shape and dtype");
21
+ }
22
+ Tensor out(lhs.shape(), lhs.dtype(), lhs.device());
23
+ switch (lhs.dtype()) {
24
+ case DType::Float32:
25
+ mul_typed<float>(lhs, rhs, out);
26
+ break;
27
+ case DType::Float64:
28
+ mul_typed<double>(lhs, rhs, out);
29
+ break;
30
+ case DType::Int32:
31
+ mul_typed<std::int32_t>(lhs, rhs, out);
32
+ break;
33
+ case DType::Int64:
34
+ mul_typed<std::int64_t>(lhs, rhs, out);
35
+ break;
36
+ case DType::Bool:
37
+ mul_typed<bool>(lhs, rhs, out);
38
+ break;
39
+ }
40
+ return out;
41
+ }
42
+
43
+ } // namespace modelstudio::cpu
@@ -0,0 +1,40 @@
1
+ #include "backends/cpu/cpu_backend.hpp"
2
+
3
+ #include <algorithm>
4
+
5
+ namespace modelstudio::cpu {
6
+ namespace {
7
+
8
+ template <typename T>
9
+ void relu_typed(const Tensor& input, Tensor& out) {
10
+ const auto* input_ptr = static_cast<const T*>(input.data());
11
+ auto* out_ptr = static_cast<T*>(out.data());
12
+ for (std::int64_t i = 0; i < input.numel(); ++i) {
13
+ out_ptr[i] = std::max<T>(input_ptr[i], T{});
14
+ }
15
+ }
16
+
17
+ } // namespace
18
+
19
+ Tensor relu_kernel(const Tensor& input) {
20
+ Tensor out(input.shape(), input.dtype(), input.device());
21
+ switch (input.dtype()) {
22
+ case DType::Float32:
23
+ relu_typed<float>(input, out);
24
+ break;
25
+ case DType::Float64:
26
+ relu_typed<double>(input, out);
27
+ break;
28
+ case DType::Int32:
29
+ relu_typed<std::int32_t>(input, out);
30
+ break;
31
+ case DType::Int64:
32
+ relu_typed<std::int64_t>(input, out);
33
+ break;
34
+ case DType::Bool:
35
+ throw Error("native CPU bool relu is not implemented");
36
+ }
37
+ return out;
38
+ }
39
+
40
+ } // namespace modelstudio::cpu
@@ -14,9 +14,19 @@ Tensor CUDABackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("CUDA add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor CUDABackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested CUDA elementwise multiply kernel.
19
+ throw Error("CUDA mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor CUDABackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested CUDA matmul kernel or cuBLAS integration.
19
24
  throw Error("CUDA matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor CUDABackend::relu(const Tensor&) {
28
+ // TODO: Add a tested CUDA ReLU kernel.
29
+ throw Error("CUDA relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::cuda
@@ -9,7 +9,9 @@ class CUDABackend final : public Backend {
9
9
  std::string_view name() const override { return "cuda"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::cuda
@@ -14,9 +14,19 @@ Tensor OneAPIBackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("oneAPI add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor OneAPIBackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested SYCL elementwise multiply kernel.
19
+ throw Error("oneAPI mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor OneAPIBackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested SYCL matmul kernel or oneMKL integration.
19
24
  throw Error("oneAPI matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor OneAPIBackend::relu(const Tensor&) {
28
+ // TODO: Add a tested SYCL ReLU kernel.
29
+ throw Error("oneAPI relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::oneapi
@@ -9,7 +9,9 @@ class OneAPIBackend final : public Backend {
9
9
  std::string_view name() const override { return "oneapi"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::oneapi
@@ -14,9 +14,19 @@ Tensor ROCmBackend::add(const Tensor&, const Tensor&) {
14
14
  throw Error("ROCm add kernel is not implemented");
15
15
  }
16
16
 
17
+ Tensor ROCmBackend::mul(const Tensor&, const Tensor&) {
18
+ // TODO: Add a tested HIP elementwise multiply kernel.
19
+ throw Error("ROCm mul kernel is not implemented");
20
+ }
21
+
17
22
  Tensor ROCmBackend::matmul(const Tensor&, const Tensor&) {
18
23
  // TODO: Add a tested HIP matmul kernel or rocBLAS integration.
19
24
  throw Error("ROCm matmul kernel is not implemented");
20
25
  }
21
26
 
27
+ Tensor ROCmBackend::relu(const Tensor&) {
28
+ // TODO: Add a tested HIP ReLU kernel.
29
+ throw Error("ROCm relu kernel is not implemented");
30
+ }
31
+
22
32
  } // namespace modelstudio::rocm
@@ -9,7 +9,9 @@ class ROCmBackend final : public Backend {
9
9
  std::string_view name() const override { return "rocm"; }
10
10
  Tensor empty(const Shape& shape, DType dtype) override;
11
11
  Tensor add(const Tensor& lhs, const Tensor& rhs) override;
12
+ Tensor mul(const Tensor& lhs, const Tensor& rhs) override;
12
13
  Tensor matmul(const Tensor& lhs, const Tensor& rhs) override;
14
+ Tensor relu(const Tensor& input) override;
13
15
  };
14
16
 
15
17
  } // namespace modelstudio::rocm
@@ -12,7 +12,9 @@ class Backend {
12
12
  virtual std::string_view name() const = 0;
13
13
  virtual Tensor empty(const Shape& shape, DType dtype) = 0;
14
14
  virtual Tensor add(const Tensor& lhs, const Tensor& rhs) = 0;
15
+ virtual Tensor mul(const Tensor& lhs, const Tensor& rhs) = 0;
15
16
  virtual Tensor matmul(const Tensor& lhs, const Tensor& rhs) = 0;
17
+ virtual Tensor relu(const Tensor& input) = 0;
16
18
  };
17
19
 
18
20
  } // namespace modelstudio
@@ -0,0 +1,30 @@
1
+ # Checkpointing
2
+
3
+ High-level checkpoint helpers wrap `ms.save` and `ms.load`.
4
+
5
+ ```python
6
+ ms.save_checkpoint(
7
+ "checkpoint.ms",
8
+ model=model,
9
+ optimizer=optimizer,
10
+ scheduler=scheduler,
11
+ extra={"epoch": 3},
12
+ )
13
+
14
+ checkpoint = ms.load_checkpoint(
15
+ "checkpoint.ms",
16
+ model=model,
17
+ optimizer=optimizer,
18
+ scheduler=scheduler,
19
+ )
20
+ ```
21
+
22
+ The checkpoint dictionary contains:
23
+
24
+ - `model`
25
+ - optional `optimizer`
26
+ - optional `scheduler`
27
+ - `extra`
28
+
29
+ The file format is currently ModelStudio's internal pickle-backed format.
30
+