modelstudio 0.4.0__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (240) hide show
  1. {modelstudio-0.4.0 → modelstudio-0.6.0}/CMakeLists.txt +10 -0
  2. modelstudio-0.6.0/PKG-INFO +278 -0
  3. modelstudio-0.6.0/README.md +247 -0
  4. modelstudio-0.6.0/benchmarks/bench_cuda_elementwise.py +54 -0
  5. modelstudio-0.6.0/benchmarks/bench_cuda_matmul.py +52 -0
  6. modelstudio-0.6.0/benchmarks/bench_elementwise.py +43 -0
  7. modelstudio-0.6.0/benchmarks/bench_trace.py +46 -0
  8. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/CMakeLists.txt +10 -2
  9. modelstudio-0.6.0/csrc/backends/cuda/README.md +19 -0
  10. modelstudio-0.6.0/csrc/backends/cuda/cuda_backend.cu +28 -0
  11. modelstudio-0.6.0/csrc/backends/cuda/cuda_context.cu +37 -0
  12. modelstudio-0.6.0/csrc/backends/cuda/cuda_context.hpp +10 -0
  13. modelstudio-0.6.0/csrc/backends/cuda/cuda_kernels.hpp +16 -0
  14. modelstudio-0.6.0/csrc/backends/cuda/cuda_memory.cu +34 -0
  15. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cuda/cuda_memory.hpp +2 -0
  16. modelstudio-0.6.0/csrc/backends/cuda/cuda_stream.cu +13 -0
  17. modelstudio-0.6.0/csrc/backends/cuda/cuda_stream.hpp +7 -0
  18. modelstudio-0.6.0/csrc/backends/cuda/kernels/elementwise.cu +27 -0
  19. modelstudio-0.6.0/csrc/backends/cuda/kernels/matmul.cu +13 -0
  20. modelstudio-0.6.0/csrc/backends/cuda/kernels/reductions.cu +15 -0
  21. modelstudio-0.6.0/csrc/bindings/cuda_bindings.cpp +12 -0
  22. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/backend-architecture.md +3 -3
  23. modelstudio-0.6.0/docs/backend-status.md +37 -0
  24. modelstudio-0.6.0/docs/comparison-ops.md +22 -0
  25. modelstudio-0.6.0/docs/cuda.md +53 -0
  26. modelstudio-0.6.0/docs/functional-api.md +21 -0
  27. modelstudio-0.6.0/docs/linalg.md +22 -0
  28. modelstudio-0.6.0/docs/native-backend-roadmap.md +44 -0
  29. modelstudio-0.6.0/docs/random.md +25 -0
  30. modelstudio-0.6.0/docs/serialization.md +34 -0
  31. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-api.md +1 -1
  32. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-creation.md +1 -2
  33. modelstudio-0.6.0/docs/tracing.md +20 -0
  34. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/training.md +1 -1
  35. modelstudio-0.6.0/examples/backend_status.py +26 -0
  36. modelstudio-0.6.0/examples/cuda_mlp_demo.py +45 -0
  37. modelstudio-0.6.0/examples/cuda_tensor_demo.py +33 -0
  38. modelstudio-0.6.0/examples/functional_training.py +34 -0
  39. modelstudio-0.6.0/examples/random_linalg_demo.py +19 -0
  40. modelstudio-0.6.0/examples/tracing_demo.py +19 -0
  41. {modelstudio-0.4.0 → modelstudio-0.6.0}/pyproject.toml +1 -1
  42. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/__init__.py +22 -14
  43. modelstudio-0.6.0/python/modelstudio/_version.py +1 -0
  44. modelstudio-0.6.0/python/modelstudio/backends/__init__.py +3 -0
  45. modelstudio-0.6.0/python/modelstudio/backends/cuda.py +84 -0
  46. modelstudio-0.6.0/python/modelstudio/backends/status.py +78 -0
  47. modelstudio-0.6.0/python/modelstudio/compile/graph_capture.py +83 -0
  48. modelstudio-0.6.0/python/modelstudio/compile/ir.py +62 -0
  49. modelstudio-0.6.0/python/modelstudio/cuda/__init__.py +13 -0
  50. modelstudio-0.6.0/python/modelstudio/cuda/device.py +45 -0
  51. modelstudio-0.6.0/python/modelstudio/cuda/memory.py +21 -0
  52. modelstudio-0.6.0/python/modelstudio/cuda/streams.py +12 -0
  53. modelstudio-0.6.0/python/modelstudio/linalg.py +31 -0
  54. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/__init__.py +2 -1
  55. modelstudio-0.6.0/python/modelstudio/nn/functional.py +162 -0
  56. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/module.py +6 -1
  57. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/__init__.py +6 -0
  58. modelstudio-0.6.0/python/modelstudio/ops/comparison.py +76 -0
  59. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/linalg.py +5 -1
  60. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/math.py +10 -2
  61. modelstudio-0.6.0/python/modelstudio/random.py +120 -0
  62. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/serialization.py +40 -12
  63. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/tensor.py +51 -1
  64. modelstudio-0.6.0/python/modelstudio.egg-info/PKG-INFO +278 -0
  65. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/SOURCES.txt +54 -0
  66. {modelstudio-0.4.0 → modelstudio-0.6.0}/scripts/smoke_test.py +33 -6
  67. modelstudio-0.6.0/tests/test_backend_status.py +43 -0
  68. modelstudio-0.6.0/tests/test_clone_copy.py +42 -0
  69. modelstudio-0.6.0/tests/test_comparison_ops.py +54 -0
  70. modelstudio-0.6.0/tests/test_cuda_autograd.py +72 -0
  71. modelstudio-0.6.0/tests/test_cuda_availability.py +77 -0
  72. modelstudio-0.6.0/tests/test_cuda_memory.py +43 -0
  73. modelstudio-0.6.0/tests/test_cuda_nn.py +36 -0
  74. modelstudio-0.6.0/tests/test_cuda_ops.py +93 -0
  75. modelstudio-0.6.0/tests/test_cuda_tensor.py +52 -0
  76. modelstudio-0.6.0/tests/test_functional.py +113 -0
  77. modelstudio-0.6.0/tests/test_linalg.py +35 -0
  78. modelstudio-0.6.0/tests/test_native_cpu_mode.py +34 -0
  79. modelstudio-0.6.0/tests/test_public_exports.py +27 -0
  80. modelstudio-0.6.0/tests/test_random_namespace.py +44 -0
  81. modelstudio-0.6.0/tests/test_scalar_behavior.py +40 -0
  82. modelstudio-0.6.0/tests/test_serialization_hardening.py +76 -0
  83. modelstudio-0.6.0/tests/test_trace.py +59 -0
  84. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_version.py +1 -1
  85. modelstudio-0.4.0/PKG-INFO +0 -265
  86. modelstudio-0.4.0/README.md +0 -234
  87. modelstudio-0.4.0/csrc/backends/cuda/README.md +0 -14
  88. modelstudio-0.4.0/csrc/backends/cuda/cuda_backend.cu +0 -32
  89. modelstudio-0.4.0/docs/native-backend-roadmap.md +0 -25
  90. modelstudio-0.4.0/docs/serialization.md +0 -25
  91. modelstudio-0.4.0/python/modelstudio/_version.py +0 -1
  92. modelstudio-0.4.0/python/modelstudio/compile/graph_capture.py +0 -12
  93. modelstudio-0.4.0/python/modelstudio/compile/ir.py +0 -37
  94. modelstudio-0.4.0/python/modelstudio/random.py +0 -20
  95. modelstudio-0.4.0/python/modelstudio.egg-info/PKG-INFO +0 -265
  96. {modelstudio-0.4.0 → modelstudio-0.6.0}/LICENSE +0 -0
  97. {modelstudio-0.4.0 → modelstudio-0.6.0}/MANIFEST.in +0 -0
  98. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_attention.py +0 -0
  99. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_conv.py +0 -0
  100. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_creation.py +0 -0
  101. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_dataloader.py +0 -0
  102. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_dropout.py +0 -0
  103. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_manipulation.py +0 -0
  104. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_matmul.py +0 -0
  105. {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_mlp.py +0 -0
  106. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/cpu_backend.cpp +0 -0
  107. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/cpu_backend.hpp +0 -0
  108. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/add.cpp +0 -0
  109. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/matmul.cpp +0 -0
  110. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/mul.cpp +0 -0
  111. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/relu.cpp +0 -0
  112. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cuda/cuda_backend.hpp +0 -0
  113. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/README.md +0 -0
  114. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/oneapi_backend.cpp +0 -0
  115. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/oneapi_backend.hpp +0 -0
  116. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/sycl_memory.hpp +0 -0
  117. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/README.md +0 -0
  118. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/hip_memory.hpp +0 -0
  119. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/rocm_backend.cpp +0 -0
  120. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/rocm_backend.hpp +0 -0
  121. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/bindings/python_bindings.cpp +0 -0
  122. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/device.hpp +0 -0
  123. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/dtype.hpp +0 -0
  124. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/error.hpp +0 -0
  125. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/shape.hpp +0 -0
  126. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/storage.hpp +0 -0
  127. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/tensor.hpp +0 -0
  128. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/backend.hpp +0 -0
  129. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/dispatcher.hpp +0 -0
  130. {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/operator_registry.hpp +0 -0
  131. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/autograd.md +0 -0
  132. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/checkpointing.md +0 -0
  133. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/data.md +0 -0
  134. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/metrics.md +0 -0
  135. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/modules.md +0 -0
  136. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/nn.md +0 -0
  137. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/numpy-interop.md +0 -0
  138. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/optimizers.md +0 -0
  139. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/randomness.md +0 -0
  140. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/releasing.md +0 -0
  141. {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-manipulation.md +0 -0
  142. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/checkpoint_resume.py +0 -0
  143. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/checkpoint_training.py +0 -0
  144. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/dropout_batchnorm.py +0 -0
  145. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/metrics_demo.py +0 -0
  146. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/numpy_interop.py +0 -0
  147. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/save_load.py +0 -0
  148. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/scheduler_training.py +0 -0
  149. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/tiny_transformer.py +0 -0
  150. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_classifier.py +0 -0
  151. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_cnn_toy.py +0 -0
  152. {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_mlp.py +0 -0
  153. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/__init__.py +0 -0
  154. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/engine.py +0 -0
  155. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/function.py +0 -0
  156. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/grad_mode.py +0 -0
  157. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/compile/__init__.py +0 -0
  158. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/compile/passes.py +0 -0
  159. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/__init__.py +0 -0
  160. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/dataloader.py +0 -0
  161. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/dataset.py +0 -0
  162. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/device.py +0 -0
  163. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/dtypes.py +0 -0
  164. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/errors.py +0 -0
  165. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/interop.py +0 -0
  166. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/metrics/__init__.py +0 -0
  167. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/metrics/classification.py +0 -0
  168. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/activations.py +0 -0
  169. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/convolution.py +0 -0
  170. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/embedding.py +0 -0
  171. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/init.py +0 -0
  172. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/linear.py +0 -0
  173. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/losses.py +0 -0
  174. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/normalization.py +0 -0
  175. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/parameter.py +0 -0
  176. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/pooling.py +0 -0
  177. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/transformer.py +0 -0
  178. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/utils.py +0 -0
  179. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/creation.py +0 -0
  180. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/movement.py +0 -0
  181. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/reductions.py +0 -0
  182. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/__init__.py +0 -0
  183. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/adamw.py +0 -0
  184. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/lr_scheduler.py +0 -0
  185. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/optimizer.py +0 -0
  186. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/sgd.py +0 -0
  187. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/py.typed +0 -0
  188. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/__init__.py +0 -0
  189. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/backend.py +0 -0
  190. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/dispatcher.py +0 -0
  191. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/storage.py +0 -0
  192. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/testing/__init__.py +0 -0
  193. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/testing/gradcheck.py +0 -0
  194. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/dependency_links.txt +0 -0
  195. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/requires.txt +0 -0
  196. {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/top_level.txt +0 -0
  197. {modelstudio-0.4.0 → modelstudio-0.6.0}/setup.cfg +0 -0
  198. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_activations_more.py +0 -0
  199. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_attention.py +0 -0
  200. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_autograd.py +0 -0
  201. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_batchnorm.py +0 -0
  202. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_buffers.py +0 -0
  203. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_checkpoint_helpers.py +0 -0
  204. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_concat_stack.py +0 -0
  205. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_conv.py +0 -0
  206. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_creation_more.py +0 -0
  207. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_data.py +0 -0
  208. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_data_split.py +0 -0
  209. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dataloader_seed.py +0 -0
  210. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dispatcher.py +0 -0
  211. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dropout.py +0 -0
  212. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dtype_conversion.py +0 -0
  213. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_embedding.py +0 -0
  214. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_grad_clip.py +0 -0
  215. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_gradcheck.py +0 -0
  216. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_indexing.py +0 -0
  217. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_indexing_assignment.py +0 -0
  218. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_init.py +0 -0
  219. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_loss_reductions.py +0 -0
  220. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_losses.py +0 -0
  221. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_lr_scheduler.py +0 -0
  222. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_manipulation_ops.py +0 -0
  223. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_metrics.py +0 -0
  224. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_module_ergonomics.py +0 -0
  225. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_nn.py +0 -0
  226. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_norms.py +0 -0
  227. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_numpy_interop.py +0 -0
  228. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_ops.py +0 -0
  229. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optim.py +0 -0
  230. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optimizer_param_groups.py +0 -0
  231. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optimizer_state.py +0 -0
  232. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_pooling.py +0 -0
  233. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_random.py +0 -0
  234. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_reductions_axis.py +0 -0
  235. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_serialization.py +0 -0
  236. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_shape_ops.py +0 -0
  237. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_state_dict.py +0 -0
  238. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_tensor.py +0 -0
  239. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_transformer.py +0 -0
  240. {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_unary_ops.py +0 -0
@@ -5,6 +5,16 @@ option(MODELSTUDIO_ENABLE_CUDA "Build CUDA backend" OFF)
5
5
  option(MODELSTUDIO_ENABLE_ROCM "Build ROCm backend" OFF)
6
6
  option(MODELSTUDIO_ENABLE_ONEAPI "Build oneAPI backend" OFF)
7
7
 
8
+ if(MODELSTUDIO_ENABLE_CUDA)
9
+ include(CheckLanguage)
10
+ check_language(CUDA)
11
+ if(NOT CMAKE_CUDA_COMPILER)
12
+ message(FATAL_ERROR "MODELSTUDIO_ENABLE_CUDA=ON requires an NVIDIA CUDA compiler/toolkit, but none was found.")
13
+ endif()
14
+ enable_language(CUDA)
15
+ find_package(CUDAToolkit REQUIRED)
16
+ endif()
17
+
8
18
  set(CMAKE_CXX_STANDARD 20)
9
19
  set(CMAKE_CXX_STANDARD_REQUIRED ON)
10
20
  set(CMAKE_CXX_EXTENSIONS OFF)
@@ -0,0 +1,278 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelstudio
3
+ Version: 0.6.0
4
+ Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
5
+ Author: ModelStudio Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/imattas/modelstudio
8
+ Project-URL: Repository, https://github.com/imattas/modelstudio
9
+ Project-URL: Issues, https://github.com/imattas/modelstudio/issues
10
+ Keywords: ai,autograd,deep-learning,neural-networks,tensor
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Typing :: Typed
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: numpy>=1.26
25
+ Provides-Extra: dev
26
+ Requires-Dist: pytest>=8; extra == "dev"
27
+ Requires-Dist: ruff>=0.6; extra == "dev"
28
+ Requires-Dist: build>=1.2; extra == "dev"
29
+ Requires-Dist: twine>=5; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # ModelStudio
33
+
34
+ ModelStudio is an early-stage AI tensor framework. Version `0.6.0` provides a
35
+ CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
36
+ data loading, graph tracing metadata, backend status inspection, a public CUDA
37
+ availability namespace, and small LLM-oriented building blocks.
38
+
39
+ It is not a PyTorch or TensorFlow replacement. The default PyPI package is
40
+ CPU-only. CUDA, ROCm, and oneAPI remain explicit scaffolds until real kernels
41
+ are built and tested in hardware-backed environments.
42
+
43
+ ## Installation
44
+
45
+ From PyPI:
46
+
47
+ ```bash
48
+ python -m pip install modelstudio
49
+ ```
50
+
51
+ For development:
52
+
53
+ ```bash
54
+ python -m pip install -e ".[dev]"
55
+ ```
56
+
57
+ ## Feature Table
58
+
59
+ | Area | Status |
60
+ | --- | --- |
61
+ | CPU tensors | Working MVP |
62
+ | Autograd | Reverse-mode for core CPU ops |
63
+ | Reductions | `sum`, `mean`, `max`, `all`, and `any`; `max` is value-only |
64
+ | Comparisons | Elementwise comparisons, `equal`, `isclose`, and `allclose` |
65
+ | Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
66
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
67
+ | Functional API | `modelstudio.nn.functional` wrappers for common NN operations |
68
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
69
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
70
+ | Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
71
+ | Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
72
+ | Randomness | `manual_seed`, `ms.random`, RNG-backed creation, dropout, and init helpers |
73
+ | Linalg | `ms.linalg.matmul`, `norm`, `vector_norm`, and `transpose` |
74
+ | Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
75
+ | Metrics | accuracy and top-k accuracy |
76
+ | Compiler | Metadata-only tracing plus placeholder IR and passes |
77
+ | CUDA API | Availability, device-count, sync, and memory-status facade; tensor execution is not implemented in the CPU wheel |
78
+
79
+ ## Architecture
80
+
81
+ ```text
82
+ Python frontend
83
+ -> Tensor, nn, optim, autograd, ops
84
+ -> runtime dispatcher
85
+ -> backend interface
86
+ -> NumPy CPU backend today
87
+ -> optional native CPU / CUDA / ROCm / oneAPI extensions later
88
+
89
+ Native scaffold
90
+ -> core metadata
91
+ -> dispatcher interfaces
92
+ -> CPU kernel prototypes
93
+ -> CUDA, ROCm, oneAPI backend directories
94
+ ```
95
+
96
+ ## Backend Status
97
+
98
+ ```python
99
+ import modelstudio as ms
100
+
101
+ print(ms.backends.status())
102
+ print(ms.backends.native_cpu_available())
103
+ ```
104
+
105
+ Expected shape:
106
+
107
+ ```python
108
+ {
109
+ "cpu": {"available": True, "native": False},
110
+ "cuda": {"available": False, "built": False, "device_count": 0, "reason": "..."},
111
+ "rocm": {"available": False, "reason": "..."},
112
+ "oneapi": {"available": False, "reason": "..."},
113
+ }
114
+ ```
115
+
116
+ The production CPU path is the NumPy backend. `ms.backends.use_native_cpu(True)`
117
+ raises `ModelStudioBackendUnavailable` unless a future optional native extension
118
+ is actually installed. Unsupported accelerator devices fail with
119
+ `ModelStudioBackendUnavailable`.
120
+
121
+ CUDA availability can also be checked through the public namespace:
122
+
123
+ ```python
124
+ print(ms.cuda.is_available())
125
+ print(ms.cuda.device_count())
126
+ print(ms.cuda.memory_summary())
127
+ ```
128
+
129
+ In the CPU-only wheel, explicit CUDA tensor requests raise a clear runtime error
130
+ instead of falling back to CPU.
131
+
132
+ ## Tensor Example
133
+
134
+ ```python
135
+ import modelstudio as ms
136
+
137
+ x = ms.randn((32, 784), requires_grad=True)
138
+ w = ms.randn((784, 10), requires_grad=True)
139
+ loss = (x @ w).mean()
140
+ loss.backward()
141
+ print(w.grad)
142
+ ```
143
+
144
+ ## Functional API
145
+
146
+ ```python
147
+ import modelstudio as ms
148
+ from modelstudio import nn
149
+ from modelstudio.nn import functional as F
150
+
151
+ model = nn.Linear(4, 2)
152
+ x = ms.random.randn((8, 4))
153
+ target = ms.random.randn((8, 2))
154
+ loss = F.mse_loss(F.relu(F.linear(x, model.weight, model.bias)), target)
155
+ ```
156
+
157
+ ## Tracing
158
+
159
+ ```python
160
+ import modelstudio as ms
161
+ from modelstudio.nn import functional as F
162
+
163
+ x = ms.random.randn((4, 3))
164
+ w = ms.random.randn((3, 2))
165
+ graph = ms.trace(lambda a, b: F.relu(a @ b), x, w)
166
+ print(graph)
167
+ ```
168
+
169
+ Tracing captures operation names and tensor metadata. It does not optimize or
170
+ execute graphs yet. `ms.compile(fn)` remains a documented no-op that returns the
171
+ original callable.
172
+
173
+ ## Random And Linalg
174
+
175
+ ```python
176
+ ms.random.seed(123)
177
+ x = ms.random.normal((4, 3), mean=0.0, std=1.0)
178
+ w = ms.random.uniform((3, 2), low=-0.1, high=0.1)
179
+ y = ms.linalg.matmul(x, w)
180
+ print(ms.linalg.norm(y).item())
181
+ ```
182
+
183
+ ## Comparisons
184
+
185
+ ```python
186
+ x = ms.tensor([1.0, 2.0, 3.0])
187
+ y = ms.tensor([1.0, 2.1, 3.0])
188
+ print(ms.isclose(x, y, atol=0.05))
189
+ print(ms.allclose(x, y, atol=0.05))
190
+ print((x > 1.5).any().item())
191
+ ```
192
+
193
+ Comparison and logical outputs are bool tensors and do not track gradients.
194
+
195
+ ## Checkpointing
196
+
197
+ ```python
198
+ model = nn.Linear(4, 2)
199
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
200
+ ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, extra={"epoch": 1})
201
+ checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, map_location="cpu")
202
+ ```
203
+
204
+ Checkpoint loading validates structure and model state. CPU is the only accepted
205
+ `map_location` in the current release.
206
+
207
+ ## Commands
208
+
209
+ ```bash
210
+ python -m pytest
211
+ python scripts/smoke_test.py
212
+ python examples/train_mlp.py
213
+ python examples/train_classifier.py
214
+ python examples/tiny_transformer.py
215
+ python examples/save_load.py
216
+ python examples/train_cnn_toy.py
217
+ python examples/dropout_batchnorm.py
218
+ python examples/checkpoint_training.py
219
+ python examples/numpy_interop.py
220
+ python examples/scheduler_training.py
221
+ python examples/checkpoint_resume.py
222
+ python examples/metrics_demo.py
223
+ python examples/backend_status.py
224
+ python examples/tracing_demo.py
225
+ python examples/functional_training.py
226
+ python examples/random_linalg_demo.py
227
+ python examples/cuda_tensor_demo.py
228
+ python examples/cuda_mlp_demo.py
229
+ python benchmarks/bench_matmul.py
230
+ python benchmarks/bench_mlp.py
231
+ python benchmarks/bench_attention.py
232
+ python benchmarks/bench_dataloader.py
233
+ python benchmarks/bench_conv.py
234
+ python benchmarks/bench_dropout.py
235
+ python benchmarks/bench_creation.py
236
+ python benchmarks/bench_manipulation.py
237
+ python benchmarks/bench_elementwise.py
238
+ python benchmarks/bench_trace.py
239
+ python benchmarks/bench_cuda_elementwise.py
240
+ python benchmarks/bench_cuda_matmul.py
241
+ ```
242
+
243
+ ## Documentation
244
+
245
+ - [Backend status](docs/backend-status.md)
246
+ - [CUDA status](docs/cuda.md)
247
+ - [Tracing](docs/tracing.md)
248
+ - [Functional API](docs/functional-api.md)
249
+ - [Random namespace](docs/random.md)
250
+ - [Linalg namespace](docs/linalg.md)
251
+ - [Comparison ops](docs/comparison-ops.md)
252
+ - [Tensor API](docs/tensor-api.md)
253
+ - [Neural network API](docs/nn.md)
254
+ - [Data utilities](docs/data.md)
255
+ - [Training](docs/training.md)
256
+ - [Modules](docs/modules.md)
257
+ - [Serialization](docs/serialization.md)
258
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
259
+ - [NumPy interop](docs/numpy-interop.md)
260
+ - [Tensor creation](docs/tensor-creation.md)
261
+ - [Tensor manipulation](docs/tensor-manipulation.md)
262
+ - [Optimizers](docs/optimizers.md)
263
+ - [Checkpointing](docs/checkpointing.md)
264
+ - [Metrics](docs/metrics.md)
265
+ - [Backend architecture](docs/backend-architecture.md)
266
+ - [Autograd design](docs/autograd.md)
267
+ - [Releasing](docs/releasing.md)
268
+ - [Contributing](CONTRIBUTING.md)
269
+
270
+ ## Roadmap
271
+
272
+ - Expand tensor and autograd coverage.
273
+ - Wire optional native CPU kernels only after a safe Python extension exists.
274
+ - Build a real optional CUDA package after tensor storage, kernels, bindings,
275
+ and hardware-backed CI are in place.
276
+ - Add tested ROCm and oneAPI packages after CUDA establishes the accelerator
277
+ backend contract.
278
+ - Improve compiler graph capture, analysis passes, and lowering.
@@ -0,0 +1,247 @@
1
+ # ModelStudio
2
+
3
+ ModelStudio is an early-stage AI tensor framework. Version `0.6.0` provides a
4
+ CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
5
+ data loading, graph tracing metadata, backend status inspection, a public CUDA
6
+ availability namespace, and small LLM-oriented building blocks.
7
+
8
+ It is not a PyTorch or TensorFlow replacement. The default PyPI package is
9
+ CPU-only. CUDA, ROCm, and oneAPI remain explicit scaffolds until real kernels
10
+ are built and tested in hardware-backed environments.
11
+
12
+ ## Installation
13
+
14
+ From PyPI:
15
+
16
+ ```bash
17
+ python -m pip install modelstudio
18
+ ```
19
+
20
+ For development:
21
+
22
+ ```bash
23
+ python -m pip install -e ".[dev]"
24
+ ```
25
+
26
+ ## Feature Table
27
+
28
+ | Area | Status |
29
+ | --- | --- |
30
+ | CPU tensors | Working MVP |
31
+ | Autograd | Reverse-mode for core CPU ops |
32
+ | Reductions | `sum`, `mean`, `max`, `all`, and `any`; `max` is value-only |
33
+ | Comparisons | Elementwise comparisons, `equal`, `isclose`, and `allclose` |
34
+ | Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
35
+ | Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
36
+ | Functional API | `modelstudio.nn.functional` wrappers for common NN operations |
37
+ | Modules | Parameters, buffers, child traversal, state dicts, save/load |
38
+ | Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
39
+ | Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
40
+ | Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
41
+ | Randomness | `manual_seed`, `ms.random`, RNG-backed creation, dropout, and init helpers |
42
+ | Linalg | `ms.linalg.matmul`, `norm`, `vector_norm`, and `transpose` |
43
+ | Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
44
+ | Metrics | accuracy and top-k accuracy |
45
+ | Compiler | Metadata-only tracing plus placeholder IR and passes |
46
+ | CUDA API | Availability, device-count, sync, and memory-status facade; tensor execution is not implemented in the CPU wheel |
47
+
48
+ ## Architecture
49
+
50
+ ```text
51
+ Python frontend
52
+ -> Tensor, nn, optim, autograd, ops
53
+ -> runtime dispatcher
54
+ -> backend interface
55
+ -> NumPy CPU backend today
56
+ -> optional native CPU / CUDA / ROCm / oneAPI extensions later
57
+
58
+ Native scaffold
59
+ -> core metadata
60
+ -> dispatcher interfaces
61
+ -> CPU kernel prototypes
62
+ -> CUDA, ROCm, oneAPI backend directories
63
+ ```
64
+
65
+ ## Backend Status
66
+
67
+ ```python
68
+ import modelstudio as ms
69
+
70
+ print(ms.backends.status())
71
+ print(ms.backends.native_cpu_available())
72
+ ```
73
+
74
+ Expected shape:
75
+
76
+ ```python
77
+ {
78
+ "cpu": {"available": True, "native": False},
79
+ "cuda": {"available": False, "built": False, "device_count": 0, "reason": "..."},
80
+ "rocm": {"available": False, "reason": "..."},
81
+ "oneapi": {"available": False, "reason": "..."},
82
+ }
83
+ ```
84
+
85
+ The production CPU path is the NumPy backend. `ms.backends.use_native_cpu(True)`
86
+ raises `ModelStudioBackendUnavailable` unless a future optional native extension
87
+ is actually installed. Unsupported accelerator devices fail with
88
+ `ModelStudioBackendUnavailable`.
89
+
90
+ CUDA availability can also be checked through the public namespace:
91
+
92
+ ```python
93
+ print(ms.cuda.is_available())
94
+ print(ms.cuda.device_count())
95
+ print(ms.cuda.memory_summary())
96
+ ```
97
+
98
+ In the CPU-only wheel, explicit CUDA tensor requests raise a clear runtime error
99
+ instead of falling back to CPU.
100
+
101
+ ## Tensor Example
102
+
103
+ ```python
104
+ import modelstudio as ms
105
+
106
+ x = ms.randn((32, 784), requires_grad=True)
107
+ w = ms.randn((784, 10), requires_grad=True)
108
+ loss = (x @ w).mean()
109
+ loss.backward()
110
+ print(w.grad)
111
+ ```
112
+
113
+ ## Functional API
114
+
115
+ ```python
116
+ import modelstudio as ms
117
+ from modelstudio import nn
118
+ from modelstudio.nn import functional as F
119
+
120
+ model = nn.Linear(4, 2)
121
+ x = ms.random.randn((8, 4))
122
+ target = ms.random.randn((8, 2))
123
+ loss = F.mse_loss(F.relu(F.linear(x, model.weight, model.bias)), target)
124
+ ```
125
+
126
+ ## Tracing
127
+
128
+ ```python
129
+ import modelstudio as ms
130
+ from modelstudio.nn import functional as F
131
+
132
+ x = ms.random.randn((4, 3))
133
+ w = ms.random.randn((3, 2))
134
+ graph = ms.trace(lambda a, b: F.relu(a @ b), x, w)
135
+ print(graph)
136
+ ```
137
+
138
+ Tracing captures operation names and tensor metadata. It does not optimize or
139
+ execute graphs yet. `ms.compile(fn)` remains a documented no-op that returns the
140
+ original callable.
141
+
142
+ ## Random And Linalg
143
+
144
+ ```python
145
+ ms.random.seed(123)
146
+ x = ms.random.normal((4, 3), mean=0.0, std=1.0)
147
+ w = ms.random.uniform((3, 2), low=-0.1, high=0.1)
148
+ y = ms.linalg.matmul(x, w)
149
+ print(ms.linalg.norm(y).item())
150
+ ```
151
+
152
+ ## Comparisons
153
+
154
+ ```python
155
+ x = ms.tensor([1.0, 2.0, 3.0])
156
+ y = ms.tensor([1.0, 2.1, 3.0])
157
+ print(ms.isclose(x, y, atol=0.05))
158
+ print(ms.allclose(x, y, atol=0.05))
159
+ print((x > 1.5).any().item())
160
+ ```
161
+
162
+ Comparison and logical outputs are bool tensors and do not track gradients.
163
+
164
+ ## Checkpointing
165
+
166
+ ```python
167
+ model = nn.Linear(4, 2)
168
+ optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
169
+ ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, extra={"epoch": 1})
170
+ checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, map_location="cpu")
171
+ ```
172
+
173
+ Checkpoint loading validates structure and model state. CPU is the only accepted
174
+ `map_location` in the current release.
175
+
176
+ ## Commands
177
+
178
+ ```bash
179
+ python -m pytest
180
+ python scripts/smoke_test.py
181
+ python examples/train_mlp.py
182
+ python examples/train_classifier.py
183
+ python examples/tiny_transformer.py
184
+ python examples/save_load.py
185
+ python examples/train_cnn_toy.py
186
+ python examples/dropout_batchnorm.py
187
+ python examples/checkpoint_training.py
188
+ python examples/numpy_interop.py
189
+ python examples/scheduler_training.py
190
+ python examples/checkpoint_resume.py
191
+ python examples/metrics_demo.py
192
+ python examples/backend_status.py
193
+ python examples/tracing_demo.py
194
+ python examples/functional_training.py
195
+ python examples/random_linalg_demo.py
196
+ python examples/cuda_tensor_demo.py
197
+ python examples/cuda_mlp_demo.py
198
+ python benchmarks/bench_matmul.py
199
+ python benchmarks/bench_mlp.py
200
+ python benchmarks/bench_attention.py
201
+ python benchmarks/bench_dataloader.py
202
+ python benchmarks/bench_conv.py
203
+ python benchmarks/bench_dropout.py
204
+ python benchmarks/bench_creation.py
205
+ python benchmarks/bench_manipulation.py
206
+ python benchmarks/bench_elementwise.py
207
+ python benchmarks/bench_trace.py
208
+ python benchmarks/bench_cuda_elementwise.py
209
+ python benchmarks/bench_cuda_matmul.py
210
+ ```
211
+
212
+ ## Documentation
213
+
214
+ - [Backend status](docs/backend-status.md)
215
+ - [CUDA status](docs/cuda.md)
216
+ - [Tracing](docs/tracing.md)
217
+ - [Functional API](docs/functional-api.md)
218
+ - [Random namespace](docs/random.md)
219
+ - [Linalg namespace](docs/linalg.md)
220
+ - [Comparison ops](docs/comparison-ops.md)
221
+ - [Tensor API](docs/tensor-api.md)
222
+ - [Neural network API](docs/nn.md)
223
+ - [Data utilities](docs/data.md)
224
+ - [Training](docs/training.md)
225
+ - [Modules](docs/modules.md)
226
+ - [Serialization](docs/serialization.md)
227
+ - [Native backend roadmap](docs/native-backend-roadmap.md)
228
+ - [NumPy interop](docs/numpy-interop.md)
229
+ - [Tensor creation](docs/tensor-creation.md)
230
+ - [Tensor manipulation](docs/tensor-manipulation.md)
231
+ - [Optimizers](docs/optimizers.md)
232
+ - [Checkpointing](docs/checkpointing.md)
233
+ - [Metrics](docs/metrics.md)
234
+ - [Backend architecture](docs/backend-architecture.md)
235
+ - [Autograd design](docs/autograd.md)
236
+ - [Releasing](docs/releasing.md)
237
+ - [Contributing](CONTRIBUTING.md)
238
+
239
+ ## Roadmap
240
+
241
+ - Expand tensor and autograd coverage.
242
+ - Wire optional native CPU kernels only after a safe Python extension exists.
243
+ - Build a real optional CUDA package after tensor storage, kernels, bindings,
244
+ and hardware-backed CI are in place.
245
+ - Add tested ROCm and oneAPI packages after CUDA establishes the accelerator
246
+ backend contract.
247
+ - Improve compiler graph capture, analysis passes, and lowering.
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+ from collections.abc import Callable
6
+
7
+ import modelstudio as ms
8
+
9
+
10
+ def _time_ms(fn: Callable[[], object], warmup: int, iterations: int, *, synchronize: bool) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ if synchronize:
14
+ ms.cuda.synchronize()
15
+ start = time.perf_counter()
16
+ for _ in range(iterations):
17
+ fn()
18
+ if synchronize:
19
+ ms.cuda.synchronize()
20
+ return (time.perf_counter() - start) * 1000.0 / iterations
21
+
22
+
23
+ def main() -> None:
24
+ shape = (1024, 1024)
25
+ warmup = 5
26
+ iterations = 50
27
+
28
+ print(f"Python: {platform.python_version()}")
29
+ print(f"NumPy: {ms.numpy.__version__}")
30
+ print(f"ModelStudio: {ms.__version__}")
31
+ print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
32
+ print(f"Shape: {shape}")
33
+ print(f"Warmup: {warmup}")
34
+ print(f"Iterations: {iterations}")
35
+
36
+ if not ms.cuda.is_available():
37
+ print(ms.cuda.memory_summary())
38
+ print("Skipping CUDA elementwise benchmark because CUDA tensor execution is not available.")
39
+ return
40
+
41
+ ms.manual_seed(123)
42
+ x = ms.randn(shape, device="cuda")
43
+ y = ms.randn(shape, device="cuda")
44
+
45
+ add_ms = _time_ms(lambda: x + y, warmup, iterations, synchronize=True)
46
+ relu_ms = _time_ms(lambda: ms.relu(x), warmup, iterations, synchronize=True)
47
+
48
+ print(f"CUDA add avg: {add_ms:.3f} ms")
49
+ print(f"CUDA relu avg: {relu_ms:.3f} ms")
50
+ print(ms.cuda.memory_summary())
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()
@@ -0,0 +1,52 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+ from collections.abc import Callable
6
+
7
+ import modelstudio as ms
8
+
9
+
10
+ def _time_ms(fn: Callable[[], object], warmup: int, iterations: int, *, synchronize: bool) -> float:
11
+ for _ in range(warmup):
12
+ fn()
13
+ if synchronize:
14
+ ms.cuda.synchronize()
15
+ start = time.perf_counter()
16
+ for _ in range(iterations):
17
+ fn()
18
+ if synchronize:
19
+ ms.cuda.synchronize()
20
+ return (time.perf_counter() - start) * 1000.0 / iterations
21
+
22
+
23
+ def main() -> None:
24
+ shape = (512, 512)
25
+ warmup = 3
26
+ iterations = 20
27
+
28
+ print(f"Python: {platform.python_version()}")
29
+ print(f"NumPy: {ms.numpy.__version__}")
30
+ print(f"ModelStudio: {ms.__version__}")
31
+ print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
32
+ print(f"Shape: {shape} x {shape}")
33
+ print(f"Warmup: {warmup}")
34
+ print(f"Iterations: {iterations}")
35
+
36
+ if not ms.cuda.is_available():
37
+ print(ms.cuda.memory_summary())
38
+ print("Skipping CUDA matmul benchmark because CUDA tensor execution is not available.")
39
+ return
40
+
41
+ ms.manual_seed(123)
42
+ a = ms.randn(shape, device="cuda")
43
+ b = ms.randn(shape, device="cuda")
44
+
45
+ matmul_ms = _time_ms(lambda: a @ b, warmup, iterations, synchronize=True)
46
+
47
+ print(f"CUDA matmul avg: {matmul_ms:.3f} ms")
48
+ print(ms.cuda.memory_summary())
49
+
50
+
51
+ if __name__ == "__main__":
52
+ main()
@@ -0,0 +1,43 @@
1
+ from __future__ import annotations
2
+
3
+ import platform
4
+ import time
5
+
6
+ import modelstudio as ms
7
+
8
+
9
+ def _time_ms(fn, warmup: int, iterations: int) -> float:
10
+ for _ in range(warmup):
11
+ fn()
12
+ start = time.perf_counter()
13
+ for _ in range(iterations):
14
+ fn()
15
+ return (time.perf_counter() - start) * 1000.0 / iterations
16
+
17
+
18
+ def main() -> None:
19
+ shape = (1024, 1024)
20
+ warmup = 5
21
+ iterations = 50
22
+ ms.random.seed(123)
23
+ x = ms.random.randn(shape)
24
+ y = ms.random.randn(shape)
25
+
26
+ add_ms = _time_ms(lambda: x + y, warmup, iterations)
27
+ relu_ms = _time_ms(lambda: ms.relu(x), warmup, iterations)
28
+ cmp_ms = _time_ms(lambda: x > y, warmup, iterations)
29
+
30
+ print(f"Python: {platform.python_version()}")
31
+ print(f"NumPy: {ms.numpy.__version__}")
32
+ print(f"ModelStudio: {ms.__version__}")
33
+ print(f"Shape: {shape}")
34
+ print(f"Warmup: {warmup}")
35
+ print(f"Iterations: {iterations}")
36
+ print(f"Backend: {ms.backends.status()}")
37
+ print(f"add avg: {add_ms:.3f} ms")
38
+ print(f"relu avg: {relu_ms:.3f} ms")
39
+ print(f"compare avg: {cmp_ms:.3f} ms")
40
+
41
+
42
+ if __name__ == "__main__":
43
+ main()