modelstudio 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelstudio-0.4.0 → modelstudio-0.6.0}/CMakeLists.txt +10 -0
- modelstudio-0.6.0/PKG-INFO +278 -0
- modelstudio-0.6.0/README.md +247 -0
- modelstudio-0.6.0/benchmarks/bench_cuda_elementwise.py +54 -0
- modelstudio-0.6.0/benchmarks/bench_cuda_matmul.py +52 -0
- modelstudio-0.6.0/benchmarks/bench_elementwise.py +43 -0
- modelstudio-0.6.0/benchmarks/bench_trace.py +46 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/CMakeLists.txt +10 -2
- modelstudio-0.6.0/csrc/backends/cuda/README.md +19 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_backend.cu +28 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_context.cu +37 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_context.hpp +10 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_kernels.hpp +16 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_memory.cu +34 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cuda/cuda_memory.hpp +2 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_stream.cu +13 -0
- modelstudio-0.6.0/csrc/backends/cuda/cuda_stream.hpp +7 -0
- modelstudio-0.6.0/csrc/backends/cuda/kernels/elementwise.cu +27 -0
- modelstudio-0.6.0/csrc/backends/cuda/kernels/matmul.cu +13 -0
- modelstudio-0.6.0/csrc/backends/cuda/kernels/reductions.cu +15 -0
- modelstudio-0.6.0/csrc/bindings/cuda_bindings.cpp +12 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/backend-architecture.md +3 -3
- modelstudio-0.6.0/docs/backend-status.md +37 -0
- modelstudio-0.6.0/docs/comparison-ops.md +22 -0
- modelstudio-0.6.0/docs/cuda.md +53 -0
- modelstudio-0.6.0/docs/functional-api.md +21 -0
- modelstudio-0.6.0/docs/linalg.md +22 -0
- modelstudio-0.6.0/docs/native-backend-roadmap.md +44 -0
- modelstudio-0.6.0/docs/random.md +25 -0
- modelstudio-0.6.0/docs/serialization.md +34 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-api.md +1 -1
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-creation.md +1 -2
- modelstudio-0.6.0/docs/tracing.md +20 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/training.md +1 -1
- modelstudio-0.6.0/examples/backend_status.py +26 -0
- modelstudio-0.6.0/examples/cuda_mlp_demo.py +45 -0
- modelstudio-0.6.0/examples/cuda_tensor_demo.py +33 -0
- modelstudio-0.6.0/examples/functional_training.py +34 -0
- modelstudio-0.6.0/examples/random_linalg_demo.py +19 -0
- modelstudio-0.6.0/examples/tracing_demo.py +19 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/pyproject.toml +1 -1
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/__init__.py +22 -14
- modelstudio-0.6.0/python/modelstudio/_version.py +1 -0
- modelstudio-0.6.0/python/modelstudio/backends/__init__.py +3 -0
- modelstudio-0.6.0/python/modelstudio/backends/cuda.py +84 -0
- modelstudio-0.6.0/python/modelstudio/backends/status.py +78 -0
- modelstudio-0.6.0/python/modelstudio/compile/graph_capture.py +83 -0
- modelstudio-0.6.0/python/modelstudio/compile/ir.py +62 -0
- modelstudio-0.6.0/python/modelstudio/cuda/__init__.py +13 -0
- modelstudio-0.6.0/python/modelstudio/cuda/device.py +45 -0
- modelstudio-0.6.0/python/modelstudio/cuda/memory.py +21 -0
- modelstudio-0.6.0/python/modelstudio/cuda/streams.py +12 -0
- modelstudio-0.6.0/python/modelstudio/linalg.py +31 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/__init__.py +2 -1
- modelstudio-0.6.0/python/modelstudio/nn/functional.py +162 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/module.py +6 -1
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/__init__.py +6 -0
- modelstudio-0.6.0/python/modelstudio/ops/comparison.py +76 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/linalg.py +5 -1
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/math.py +10 -2
- modelstudio-0.6.0/python/modelstudio/random.py +120 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/serialization.py +40 -12
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/tensor.py +51 -1
- modelstudio-0.6.0/python/modelstudio.egg-info/PKG-INFO +278 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/SOURCES.txt +54 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/scripts/smoke_test.py +33 -6
- modelstudio-0.6.0/tests/test_backend_status.py +43 -0
- modelstudio-0.6.0/tests/test_clone_copy.py +42 -0
- modelstudio-0.6.0/tests/test_comparison_ops.py +54 -0
- modelstudio-0.6.0/tests/test_cuda_autograd.py +72 -0
- modelstudio-0.6.0/tests/test_cuda_availability.py +77 -0
- modelstudio-0.6.0/tests/test_cuda_memory.py +43 -0
- modelstudio-0.6.0/tests/test_cuda_nn.py +36 -0
- modelstudio-0.6.0/tests/test_cuda_ops.py +93 -0
- modelstudio-0.6.0/tests/test_cuda_tensor.py +52 -0
- modelstudio-0.6.0/tests/test_functional.py +113 -0
- modelstudio-0.6.0/tests/test_linalg.py +35 -0
- modelstudio-0.6.0/tests/test_native_cpu_mode.py +34 -0
- modelstudio-0.6.0/tests/test_public_exports.py +27 -0
- modelstudio-0.6.0/tests/test_random_namespace.py +44 -0
- modelstudio-0.6.0/tests/test_scalar_behavior.py +40 -0
- modelstudio-0.6.0/tests/test_serialization_hardening.py +76 -0
- modelstudio-0.6.0/tests/test_trace.py +59 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_version.py +1 -1
- modelstudio-0.4.0/PKG-INFO +0 -265
- modelstudio-0.4.0/README.md +0 -234
- modelstudio-0.4.0/csrc/backends/cuda/README.md +0 -14
- modelstudio-0.4.0/csrc/backends/cuda/cuda_backend.cu +0 -32
- modelstudio-0.4.0/docs/native-backend-roadmap.md +0 -25
- modelstudio-0.4.0/docs/serialization.md +0 -25
- modelstudio-0.4.0/python/modelstudio/_version.py +0 -1
- modelstudio-0.4.0/python/modelstudio/compile/graph_capture.py +0 -12
- modelstudio-0.4.0/python/modelstudio/compile/ir.py +0 -37
- modelstudio-0.4.0/python/modelstudio/random.py +0 -20
- modelstudio-0.4.0/python/modelstudio.egg-info/PKG-INFO +0 -265
- {modelstudio-0.4.0 → modelstudio-0.6.0}/LICENSE +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/MANIFEST.in +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_attention.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_conv.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_creation.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_dataloader.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_dropout.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_manipulation.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_matmul.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/benchmarks/bench_mlp.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/cpu_backend.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/cpu_backend.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/add.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/matmul.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/mul.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cpu/kernels/relu.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/cuda/cuda_backend.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/README.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/oneapi_backend.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/oneapi_backend.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/oneapi/sycl_memory.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/README.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/hip_memory.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/rocm_backend.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/backends/rocm/rocm_backend.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/bindings/python_bindings.cpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/device.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/dtype.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/error.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/shape.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/storage.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/core/tensor.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/backend.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/dispatcher.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/csrc/dispatcher/operator_registry.hpp +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/autograd.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/checkpointing.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/data.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/metrics.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/modules.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/nn.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/numpy-interop.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/optimizers.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/randomness.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/releasing.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/docs/tensor-manipulation.md +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/checkpoint_resume.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/checkpoint_training.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/dropout_batchnorm.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/metrics_demo.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/numpy_interop.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/save_load.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/scheduler_training.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/tiny_transformer.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_classifier.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_cnn_toy.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/examples/train_mlp.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/engine.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/function.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/autograd/grad_mode.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/compile/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/compile/passes.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/dataloader.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/data/dataset.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/device.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/dtypes.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/errors.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/interop.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/metrics/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/metrics/classification.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/activations.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/convolution.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/embedding.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/init.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/linear.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/losses.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/normalization.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/parameter.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/pooling.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/transformer.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/nn/utils.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/creation.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/movement.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/ops/reductions.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/adamw.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/lr_scheduler.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/optimizer.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/optim/sgd.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/py.typed +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/backend.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/runtime/dispatcher.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/storage.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/testing/__init__.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio/testing/gradcheck.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/dependency_links.txt +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/requires.txt +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/python/modelstudio.egg-info/top_level.txt +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/setup.cfg +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_activations_more.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_attention.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_autograd.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_batchnorm.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_buffers.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_checkpoint_helpers.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_concat_stack.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_conv.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_creation_more.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_data.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_data_split.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dataloader_seed.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dispatcher.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dropout.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_dtype_conversion.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_embedding.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_grad_clip.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_gradcheck.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_indexing.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_indexing_assignment.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_init.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_loss_reductions.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_losses.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_lr_scheduler.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_manipulation_ops.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_metrics.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_module_ergonomics.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_nn.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_norms.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_numpy_interop.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_ops.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optim.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optimizer_param_groups.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_optimizer_state.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_pooling.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_random.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_reductions_axis.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_serialization.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_shape_ops.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_state_dict.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_tensor.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_transformer.py +0 -0
- {modelstudio-0.4.0 → modelstudio-0.6.0}/tests/test_unary_ops.py +0 -0
|
@@ -5,6 +5,16 @@ option(MODELSTUDIO_ENABLE_CUDA "Build CUDA backend" OFF)
|
|
|
5
5
|
option(MODELSTUDIO_ENABLE_ROCM "Build ROCm backend" OFF)
|
|
6
6
|
option(MODELSTUDIO_ENABLE_ONEAPI "Build oneAPI backend" OFF)
|
|
7
7
|
|
|
8
|
+
if(MODELSTUDIO_ENABLE_CUDA)
|
|
9
|
+
include(CheckLanguage)
|
|
10
|
+
check_language(CUDA)
|
|
11
|
+
if(NOT CMAKE_CUDA_COMPILER)
|
|
12
|
+
message(FATAL_ERROR "MODELSTUDIO_ENABLE_CUDA=ON requires an NVIDIA CUDA compiler/toolkit, but none was found.")
|
|
13
|
+
endif()
|
|
14
|
+
enable_language(CUDA)
|
|
15
|
+
find_package(CUDAToolkit REQUIRED)
|
|
16
|
+
endif()
|
|
17
|
+
|
|
8
18
|
set(CMAKE_CXX_STANDARD 20)
|
|
9
19
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
10
20
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: modelstudio
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
|
|
5
|
+
Author: ModelStudio Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/imattas/modelstudio
|
|
8
|
+
Project-URL: Repository, https://github.com/imattas/modelstudio
|
|
9
|
+
Project-URL: Issues, https://github.com/imattas/modelstudio/issues
|
|
10
|
+
Keywords: ai,autograd,deep-learning,neural-networks,tensor
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Classifier: Typing :: Typed
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: numpy>=1.26
|
|
25
|
+
Provides-Extra: dev
|
|
26
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.6; extra == "dev"
|
|
28
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
29
|
+
Requires-Dist: twine>=5; extra == "dev"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# ModelStudio
|
|
33
|
+
|
|
34
|
+
ModelStudio is an early-stage AI tensor framework. Version `0.6.0` provides a
|
|
35
|
+
CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
|
|
36
|
+
data loading, graph tracing metadata, backend status inspection, a public CUDA
|
|
37
|
+
availability namespace, and small LLM-oriented building blocks.
|
|
38
|
+
|
|
39
|
+
It is not a PyTorch or TensorFlow replacement. The default PyPI package is
|
|
40
|
+
CPU-only. CUDA, ROCm, and oneAPI remain explicit scaffolds until real kernels
|
|
41
|
+
are built and tested in hardware-backed environments.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
From PyPI:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
python -m pip install modelstudio
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
For development:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
python -m pip install -e ".[dev]"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Feature Table
|
|
58
|
+
|
|
59
|
+
| Area | Status |
|
|
60
|
+
| --- | --- |
|
|
61
|
+
| CPU tensors | Working MVP |
|
|
62
|
+
| Autograd | Reverse-mode for core CPU ops |
|
|
63
|
+
| Reductions | `sum`, `mean`, `max`, `all`, and `any`; `max` is value-only |
|
|
64
|
+
| Comparisons | Elementwise comparisons, `equal`, `isclose`, and `allclose` |
|
|
65
|
+
| Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
|
|
66
|
+
| Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
|
|
67
|
+
| Functional API | `modelstudio.nn.functional` wrappers for common NN operations |
|
|
68
|
+
| Modules | Parameters, buffers, child traversal, state dicts, save/load |
|
|
69
|
+
| Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
|
|
70
|
+
| Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
|
|
71
|
+
| Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
|
|
72
|
+
| Randomness | `manual_seed`, `ms.random`, RNG-backed creation, dropout, and init helpers |
|
|
73
|
+
| Linalg | `ms.linalg.matmul`, `norm`, `vector_norm`, and `transpose` |
|
|
74
|
+
| Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
|
|
75
|
+
| Metrics | accuracy and top-k accuracy |
|
|
76
|
+
| Compiler | Metadata-only tracing plus placeholder IR and passes |
|
|
77
|
+
| CUDA API | Availability, device-count, sync, and memory-status facade; tensor execution is not implemented in the CPU wheel |
|
|
78
|
+
|
|
79
|
+
## Architecture
|
|
80
|
+
|
|
81
|
+
```text
|
|
82
|
+
Python frontend
|
|
83
|
+
-> Tensor, nn, optim, autograd, ops
|
|
84
|
+
-> runtime dispatcher
|
|
85
|
+
-> backend interface
|
|
86
|
+
-> NumPy CPU backend today
|
|
87
|
+
-> optional native CPU / CUDA / ROCm / oneAPI extensions later
|
|
88
|
+
|
|
89
|
+
Native scaffold
|
|
90
|
+
-> core metadata
|
|
91
|
+
-> dispatcher interfaces
|
|
92
|
+
-> CPU kernel prototypes
|
|
93
|
+
-> CUDA, ROCm, oneAPI backend directories
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Backend Status
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
import modelstudio as ms
|
|
100
|
+
|
|
101
|
+
print(ms.backends.status())
|
|
102
|
+
print(ms.backends.native_cpu_available())
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Expected shape:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
{
|
|
109
|
+
"cpu": {"available": True, "native": False},
|
|
110
|
+
"cuda": {"available": False, "built": False, "device_count": 0, "reason": "..."},
|
|
111
|
+
"rocm": {"available": False, "reason": "..."},
|
|
112
|
+
"oneapi": {"available": False, "reason": "..."},
|
|
113
|
+
}
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
The production CPU path is the NumPy backend. `ms.backends.use_native_cpu(True)`
|
|
117
|
+
raises `ModelStudioBackendUnavailable` unless a future optional native extension
|
|
118
|
+
is actually installed. Unsupported accelerator devices fail with
|
|
119
|
+
`ModelStudioBackendUnavailable`.
|
|
120
|
+
|
|
121
|
+
CUDA availability can also be checked through the public namespace:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
print(ms.cuda.is_available())
|
|
125
|
+
print(ms.cuda.device_count())
|
|
126
|
+
print(ms.cuda.memory_summary())
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
In the CPU-only wheel, explicit CUDA tensor requests raise a clear runtime error
|
|
130
|
+
instead of falling back to CPU.
|
|
131
|
+
|
|
132
|
+
## Tensor Example
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
import modelstudio as ms
|
|
136
|
+
|
|
137
|
+
x = ms.randn((32, 784), requires_grad=True)
|
|
138
|
+
w = ms.randn((784, 10), requires_grad=True)
|
|
139
|
+
loss = (x @ w).mean()
|
|
140
|
+
loss.backward()
|
|
141
|
+
print(w.grad)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Functional API
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
import modelstudio as ms
|
|
148
|
+
from modelstudio import nn
|
|
149
|
+
from modelstudio.nn import functional as F
|
|
150
|
+
|
|
151
|
+
model = nn.Linear(4, 2)
|
|
152
|
+
x = ms.random.randn((8, 4))
|
|
153
|
+
target = ms.random.randn((8, 2))
|
|
154
|
+
loss = F.mse_loss(F.relu(F.linear(x, model.weight, model.bias)), target)
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Tracing
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import modelstudio as ms
|
|
161
|
+
from modelstudio.nn import functional as F
|
|
162
|
+
|
|
163
|
+
x = ms.random.randn((4, 3))
|
|
164
|
+
w = ms.random.randn((3, 2))
|
|
165
|
+
graph = ms.trace(lambda a, b: F.relu(a @ b), x, w)
|
|
166
|
+
print(graph)
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Tracing captures operation names and tensor metadata. It does not optimize or
|
|
170
|
+
execute graphs yet. `ms.compile(fn)` remains a documented no-op that returns the
|
|
171
|
+
original callable.
|
|
172
|
+
|
|
173
|
+
## Random And Linalg
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
ms.random.seed(123)
|
|
177
|
+
x = ms.random.normal((4, 3), mean=0.0, std=1.0)
|
|
178
|
+
w = ms.random.uniform((3, 2), low=-0.1, high=0.1)
|
|
179
|
+
y = ms.linalg.matmul(x, w)
|
|
180
|
+
print(ms.linalg.norm(y).item())
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## Comparisons
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
x = ms.tensor([1.0, 2.0, 3.0])
|
|
187
|
+
y = ms.tensor([1.0, 2.1, 3.0])
|
|
188
|
+
print(ms.isclose(x, y, atol=0.05))
|
|
189
|
+
print(ms.allclose(x, y, atol=0.05))
|
|
190
|
+
print((x > 1.5).any().item())
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Comparison and logical outputs are bool tensors and do not track gradients.
|
|
194
|
+
|
|
195
|
+
## Checkpointing
|
|
196
|
+
|
|
197
|
+
```python
|
|
198
|
+
model = nn.Linear(4, 2)
|
|
199
|
+
optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
|
|
200
|
+
ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, extra={"epoch": 1})
|
|
201
|
+
checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, map_location="cpu")
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Checkpoint loading validates structure and model state. CPU is the only accepted
|
|
205
|
+
`map_location` in the current release.
|
|
206
|
+
|
|
207
|
+
## Commands
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
python -m pytest
|
|
211
|
+
python scripts/smoke_test.py
|
|
212
|
+
python examples/train_mlp.py
|
|
213
|
+
python examples/train_classifier.py
|
|
214
|
+
python examples/tiny_transformer.py
|
|
215
|
+
python examples/save_load.py
|
|
216
|
+
python examples/train_cnn_toy.py
|
|
217
|
+
python examples/dropout_batchnorm.py
|
|
218
|
+
python examples/checkpoint_training.py
|
|
219
|
+
python examples/numpy_interop.py
|
|
220
|
+
python examples/scheduler_training.py
|
|
221
|
+
python examples/checkpoint_resume.py
|
|
222
|
+
python examples/metrics_demo.py
|
|
223
|
+
python examples/backend_status.py
|
|
224
|
+
python examples/tracing_demo.py
|
|
225
|
+
python examples/functional_training.py
|
|
226
|
+
python examples/random_linalg_demo.py
|
|
227
|
+
python examples/cuda_tensor_demo.py
|
|
228
|
+
python examples/cuda_mlp_demo.py
|
|
229
|
+
python benchmarks/bench_matmul.py
|
|
230
|
+
python benchmarks/bench_mlp.py
|
|
231
|
+
python benchmarks/bench_attention.py
|
|
232
|
+
python benchmarks/bench_dataloader.py
|
|
233
|
+
python benchmarks/bench_conv.py
|
|
234
|
+
python benchmarks/bench_dropout.py
|
|
235
|
+
python benchmarks/bench_creation.py
|
|
236
|
+
python benchmarks/bench_manipulation.py
|
|
237
|
+
python benchmarks/bench_elementwise.py
|
|
238
|
+
python benchmarks/bench_trace.py
|
|
239
|
+
python benchmarks/bench_cuda_elementwise.py
|
|
240
|
+
python benchmarks/bench_cuda_matmul.py
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Documentation
|
|
244
|
+
|
|
245
|
+
- [Backend status](docs/backend-status.md)
|
|
246
|
+
- [CUDA status](docs/cuda.md)
|
|
247
|
+
- [Tracing](docs/tracing.md)
|
|
248
|
+
- [Functional API](docs/functional-api.md)
|
|
249
|
+
- [Random namespace](docs/random.md)
|
|
250
|
+
- [Linalg namespace](docs/linalg.md)
|
|
251
|
+
- [Comparison ops](docs/comparison-ops.md)
|
|
252
|
+
- [Tensor API](docs/tensor-api.md)
|
|
253
|
+
- [Neural network API](docs/nn.md)
|
|
254
|
+
- [Data utilities](docs/data.md)
|
|
255
|
+
- [Training](docs/training.md)
|
|
256
|
+
- [Modules](docs/modules.md)
|
|
257
|
+
- [Serialization](docs/serialization.md)
|
|
258
|
+
- [Native backend roadmap](docs/native-backend-roadmap.md)
|
|
259
|
+
- [NumPy interop](docs/numpy-interop.md)
|
|
260
|
+
- [Tensor creation](docs/tensor-creation.md)
|
|
261
|
+
- [Tensor manipulation](docs/tensor-manipulation.md)
|
|
262
|
+
- [Optimizers](docs/optimizers.md)
|
|
263
|
+
- [Checkpointing](docs/checkpointing.md)
|
|
264
|
+
- [Metrics](docs/metrics.md)
|
|
265
|
+
- [Backend architecture](docs/backend-architecture.md)
|
|
266
|
+
- [Autograd design](docs/autograd.md)
|
|
267
|
+
- [Releasing](docs/releasing.md)
|
|
268
|
+
- [Contributing](CONTRIBUTING.md)
|
|
269
|
+
|
|
270
|
+
## Roadmap
|
|
271
|
+
|
|
272
|
+
- Expand tensor and autograd coverage.
|
|
273
|
+
- Wire optional native CPU kernels only after a safe Python extension exists.
|
|
274
|
+
- Build a real optional CUDA package after tensor storage, kernels, bindings,
|
|
275
|
+
and hardware-backed CI are in place.
|
|
276
|
+
- Add tested ROCm and oneAPI packages after CUDA establishes the accelerator
|
|
277
|
+
backend contract.
|
|
278
|
+
- Improve compiler graph capture, analysis passes, and lowering.
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
# ModelStudio
|
|
2
|
+
|
|
3
|
+
ModelStudio is an early-stage AI tensor framework. Version `0.6.0` provides a
|
|
4
|
+
CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
|
|
5
|
+
data loading, graph tracing metadata, backend status inspection, a public CUDA
|
|
6
|
+
availability namespace, and small LLM-oriented building blocks.
|
|
7
|
+
|
|
8
|
+
It is not a PyTorch or TensorFlow replacement. The default PyPI package is
|
|
9
|
+
CPU-only. CUDA, ROCm, and oneAPI remain explicit scaffolds until real kernels
|
|
10
|
+
are built and tested in hardware-backed environments.
|
|
11
|
+
|
|
12
|
+
## Installation
|
|
13
|
+
|
|
14
|
+
From PyPI:
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
python -m pip install modelstudio
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
For development:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
python -m pip install -e ".[dev]"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Feature Table
|
|
27
|
+
|
|
28
|
+
| Area | Status |
|
|
29
|
+
| --- | --- |
|
|
30
|
+
| CPU tensors | Working MVP |
|
|
31
|
+
| Autograd | Reverse-mode for core CPU ops |
|
|
32
|
+
| Reductions | `sum`, `mean`, `max`, `all`, and `any`; `max` is value-only |
|
|
33
|
+
| Comparisons | Elementwise comparisons, `equal`, `isclose`, and `allclose` |
|
|
34
|
+
| Activations | ReLU, GELU, LeakyReLU, ELU, Softplus, exp, log, tanh, sigmoid, SiLU, softmax, log-softmax |
|
|
35
|
+
| Losses | MSE and cross entropy with `none`, `mean`, and `sum` reductions |
|
|
36
|
+
| Functional API | `modelstudio.nn.functional` wrappers for common NN operations |
|
|
37
|
+
| Modules | Parameters, buffers, child traversal, state dicts, save/load |
|
|
38
|
+
| Layers | Linear, Embedding, LayerNorm, RMSNorm, BatchNorm1d, Dropout, Conv1d, Conv2d, pooling, TransformerBlock |
|
|
39
|
+
| Optimizers | SGD and AdamW with state serialization, parameter groups, and LR schedulers |
|
|
40
|
+
| Data | Dataset, TensorDataset, random_split, DataLoader with deterministic seeded shuffle |
|
|
41
|
+
| Randomness | `manual_seed`, `ms.random`, RNG-backed creation, dropout, and init helpers |
|
|
42
|
+
| Linalg | `ms.linalg.matmul`, `norm`, `vector_norm`, and `transpose` |
|
|
43
|
+
| Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
|
|
44
|
+
| Metrics | accuracy and top-k accuracy |
|
|
45
|
+
| Compiler | Metadata-only tracing plus placeholder IR and passes |
|
|
46
|
+
| CUDA API | Availability, device-count, sync, and memory-status facade; tensor execution is not implemented in the CPU wheel |
|
|
47
|
+
|
|
48
|
+
## Architecture
|
|
49
|
+
|
|
50
|
+
```text
|
|
51
|
+
Python frontend
|
|
52
|
+
-> Tensor, nn, optim, autograd, ops
|
|
53
|
+
-> runtime dispatcher
|
|
54
|
+
-> backend interface
|
|
55
|
+
-> NumPy CPU backend today
|
|
56
|
+
-> optional native CPU / CUDA / ROCm / oneAPI extensions later
|
|
57
|
+
|
|
58
|
+
Native scaffold
|
|
59
|
+
-> core metadata
|
|
60
|
+
-> dispatcher interfaces
|
|
61
|
+
-> CPU kernel prototypes
|
|
62
|
+
-> CUDA, ROCm, oneAPI backend directories
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Backend Status
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
import modelstudio as ms
|
|
69
|
+
|
|
70
|
+
print(ms.backends.status())
|
|
71
|
+
print(ms.backends.native_cpu_available())
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Expected shape:
|
|
75
|
+
|
|
76
|
+
```python
|
|
77
|
+
{
|
|
78
|
+
"cpu": {"available": True, "native": False},
|
|
79
|
+
"cuda": {"available": False, "built": False, "device_count": 0, "reason": "..."},
|
|
80
|
+
"rocm": {"available": False, "reason": "..."},
|
|
81
|
+
"oneapi": {"available": False, "reason": "..."},
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
The production CPU path is the NumPy backend. `ms.backends.use_native_cpu(True)`
|
|
86
|
+
raises `ModelStudioBackendUnavailable` unless a future optional native extension
|
|
87
|
+
is actually installed. Unsupported accelerator devices fail with
|
|
88
|
+
`ModelStudioBackendUnavailable`.
|
|
89
|
+
|
|
90
|
+
CUDA availability can also be checked through the public namespace:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
print(ms.cuda.is_available())
|
|
94
|
+
print(ms.cuda.device_count())
|
|
95
|
+
print(ms.cuda.memory_summary())
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
In the CPU-only wheel, explicit CUDA tensor requests raise a clear runtime error
|
|
99
|
+
instead of falling back to CPU.
|
|
100
|
+
|
|
101
|
+
## Tensor Example
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
import modelstudio as ms
|
|
105
|
+
|
|
106
|
+
x = ms.randn((32, 784), requires_grad=True)
|
|
107
|
+
w = ms.randn((784, 10), requires_grad=True)
|
|
108
|
+
loss = (x @ w).mean()
|
|
109
|
+
loss.backward()
|
|
110
|
+
print(w.grad)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## Functional API
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
import modelstudio as ms
|
|
117
|
+
from modelstudio import nn
|
|
118
|
+
from modelstudio.nn import functional as F
|
|
119
|
+
|
|
120
|
+
model = nn.Linear(4, 2)
|
|
121
|
+
x = ms.random.randn((8, 4))
|
|
122
|
+
target = ms.random.randn((8, 2))
|
|
123
|
+
loss = F.mse_loss(F.relu(F.linear(x, model.weight, model.bias)), target)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Tracing
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
import modelstudio as ms
|
|
130
|
+
from modelstudio.nn import functional as F
|
|
131
|
+
|
|
132
|
+
x = ms.random.randn((4, 3))
|
|
133
|
+
w = ms.random.randn((3, 2))
|
|
134
|
+
graph = ms.trace(lambda a, b: F.relu(a @ b), x, w)
|
|
135
|
+
print(graph)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Tracing captures operation names and tensor metadata. It does not optimize or
|
|
139
|
+
execute graphs yet. `ms.compile(fn)` remains a documented no-op that returns the
|
|
140
|
+
original callable.
|
|
141
|
+
|
|
142
|
+
## Random And Linalg
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
ms.random.seed(123)
|
|
146
|
+
x = ms.random.normal((4, 3), mean=0.0, std=1.0)
|
|
147
|
+
w = ms.random.uniform((3, 2), low=-0.1, high=0.1)
|
|
148
|
+
y = ms.linalg.matmul(x, w)
|
|
149
|
+
print(ms.linalg.norm(y).item())
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Comparisons
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
x = ms.tensor([1.0, 2.0, 3.0])
|
|
156
|
+
y = ms.tensor([1.0, 2.1, 3.0])
|
|
157
|
+
print(ms.isclose(x, y, atol=0.05))
|
|
158
|
+
print(ms.allclose(x, y, atol=0.05))
|
|
159
|
+
print((x > 1.5).any().item())
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Comparison and logical outputs are bool tensors and do not track gradients.
|
|
163
|
+
|
|
164
|
+
## Checkpointing
|
|
165
|
+
|
|
166
|
+
```python
|
|
167
|
+
model = nn.Linear(4, 2)
|
|
168
|
+
optimizer = ms.optim.AdamW(model.parameters(), lr=1e-3)
|
|
169
|
+
ms.save_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, extra={"epoch": 1})
|
|
170
|
+
checkpoint = ms.load_checkpoint("checkpoint.ms", model=model, optimizer=optimizer, map_location="cpu")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
Checkpoint loading validates structure and model state. CPU is the only accepted
|
|
174
|
+
`map_location` in the current release.
|
|
175
|
+
|
|
176
|
+
## Commands
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
python -m pytest
|
|
180
|
+
python scripts/smoke_test.py
|
|
181
|
+
python examples/train_mlp.py
|
|
182
|
+
python examples/train_classifier.py
|
|
183
|
+
python examples/tiny_transformer.py
|
|
184
|
+
python examples/save_load.py
|
|
185
|
+
python examples/train_cnn_toy.py
|
|
186
|
+
python examples/dropout_batchnorm.py
|
|
187
|
+
python examples/checkpoint_training.py
|
|
188
|
+
python examples/numpy_interop.py
|
|
189
|
+
python examples/scheduler_training.py
|
|
190
|
+
python examples/checkpoint_resume.py
|
|
191
|
+
python examples/metrics_demo.py
|
|
192
|
+
python examples/backend_status.py
|
|
193
|
+
python examples/tracing_demo.py
|
|
194
|
+
python examples/functional_training.py
|
|
195
|
+
python examples/random_linalg_demo.py
|
|
196
|
+
python examples/cuda_tensor_demo.py
|
|
197
|
+
python examples/cuda_mlp_demo.py
|
|
198
|
+
python benchmarks/bench_matmul.py
|
|
199
|
+
python benchmarks/bench_mlp.py
|
|
200
|
+
python benchmarks/bench_attention.py
|
|
201
|
+
python benchmarks/bench_dataloader.py
|
|
202
|
+
python benchmarks/bench_conv.py
|
|
203
|
+
python benchmarks/bench_dropout.py
|
|
204
|
+
python benchmarks/bench_creation.py
|
|
205
|
+
python benchmarks/bench_manipulation.py
|
|
206
|
+
python benchmarks/bench_elementwise.py
|
|
207
|
+
python benchmarks/bench_trace.py
|
|
208
|
+
python benchmarks/bench_cuda_elementwise.py
|
|
209
|
+
python benchmarks/bench_cuda_matmul.py
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Documentation
|
|
213
|
+
|
|
214
|
+
- [Backend status](docs/backend-status.md)
|
|
215
|
+
- [CUDA status](docs/cuda.md)
|
|
216
|
+
- [Tracing](docs/tracing.md)
|
|
217
|
+
- [Functional API](docs/functional-api.md)
|
|
218
|
+
- [Random namespace](docs/random.md)
|
|
219
|
+
- [Linalg namespace](docs/linalg.md)
|
|
220
|
+
- [Comparison ops](docs/comparison-ops.md)
|
|
221
|
+
- [Tensor API](docs/tensor-api.md)
|
|
222
|
+
- [Neural network API](docs/nn.md)
|
|
223
|
+
- [Data utilities](docs/data.md)
|
|
224
|
+
- [Training](docs/training.md)
|
|
225
|
+
- [Modules](docs/modules.md)
|
|
226
|
+
- [Serialization](docs/serialization.md)
|
|
227
|
+
- [Native backend roadmap](docs/native-backend-roadmap.md)
|
|
228
|
+
- [NumPy interop](docs/numpy-interop.md)
|
|
229
|
+
- [Tensor creation](docs/tensor-creation.md)
|
|
230
|
+
- [Tensor manipulation](docs/tensor-manipulation.md)
|
|
231
|
+
- [Optimizers](docs/optimizers.md)
|
|
232
|
+
- [Checkpointing](docs/checkpointing.md)
|
|
233
|
+
- [Metrics](docs/metrics.md)
|
|
234
|
+
- [Backend architecture](docs/backend-architecture.md)
|
|
235
|
+
- [Autograd design](docs/autograd.md)
|
|
236
|
+
- [Releasing](docs/releasing.md)
|
|
237
|
+
- [Contributing](CONTRIBUTING.md)
|
|
238
|
+
|
|
239
|
+
## Roadmap
|
|
240
|
+
|
|
241
|
+
- Expand tensor and autograd coverage.
|
|
242
|
+
- Wire optional native CPU kernels only after a safe Python extension exists.
|
|
243
|
+
- Build a real optional CUDA package after tensor storage, kernels, bindings,
|
|
244
|
+
and hardware-backed CI are in place.
|
|
245
|
+
- Add tested ROCm and oneAPI packages after CUDA establishes the accelerator
|
|
246
|
+
backend contract.
|
|
247
|
+
- Improve compiler graph capture, analysis passes, and lowering.
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import platform
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
|
|
7
|
+
import modelstudio as ms
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _time_ms(fn: Callable[[], object], warmup: int, iterations: int, *, synchronize: bool) -> float:
|
|
11
|
+
for _ in range(warmup):
|
|
12
|
+
fn()
|
|
13
|
+
if synchronize:
|
|
14
|
+
ms.cuda.synchronize()
|
|
15
|
+
start = time.perf_counter()
|
|
16
|
+
for _ in range(iterations):
|
|
17
|
+
fn()
|
|
18
|
+
if synchronize:
|
|
19
|
+
ms.cuda.synchronize()
|
|
20
|
+
return (time.perf_counter() - start) * 1000.0 / iterations
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main() -> None:
|
|
24
|
+
shape = (1024, 1024)
|
|
25
|
+
warmup = 5
|
|
26
|
+
iterations = 50
|
|
27
|
+
|
|
28
|
+
print(f"Python: {platform.python_version()}")
|
|
29
|
+
print(f"NumPy: {ms.numpy.__version__}")
|
|
30
|
+
print(f"ModelStudio: {ms.__version__}")
|
|
31
|
+
print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
|
|
32
|
+
print(f"Shape: {shape}")
|
|
33
|
+
print(f"Warmup: {warmup}")
|
|
34
|
+
print(f"Iterations: {iterations}")
|
|
35
|
+
|
|
36
|
+
if not ms.cuda.is_available():
|
|
37
|
+
print(ms.cuda.memory_summary())
|
|
38
|
+
print("Skipping CUDA elementwise benchmark because CUDA tensor execution is not available.")
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
ms.manual_seed(123)
|
|
42
|
+
x = ms.randn(shape, device="cuda")
|
|
43
|
+
y = ms.randn(shape, device="cuda")
|
|
44
|
+
|
|
45
|
+
add_ms = _time_ms(lambda: x + y, warmup, iterations, synchronize=True)
|
|
46
|
+
relu_ms = _time_ms(lambda: ms.relu(x), warmup, iterations, synchronize=True)
|
|
47
|
+
|
|
48
|
+
print(f"CUDA add avg: {add_ms:.3f} ms")
|
|
49
|
+
print(f"CUDA relu avg: {relu_ms:.3f} ms")
|
|
50
|
+
print(ms.cuda.memory_summary())
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
main()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import platform
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
|
|
7
|
+
import modelstudio as ms
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _time_ms(fn: Callable[[], object], warmup: int, iterations: int, *, synchronize: bool) -> float:
|
|
11
|
+
for _ in range(warmup):
|
|
12
|
+
fn()
|
|
13
|
+
if synchronize:
|
|
14
|
+
ms.cuda.synchronize()
|
|
15
|
+
start = time.perf_counter()
|
|
16
|
+
for _ in range(iterations):
|
|
17
|
+
fn()
|
|
18
|
+
if synchronize:
|
|
19
|
+
ms.cuda.synchronize()
|
|
20
|
+
return (time.perf_counter() - start) * 1000.0 / iterations
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main() -> None:
|
|
24
|
+
shape = (512, 512)
|
|
25
|
+
warmup = 3
|
|
26
|
+
iterations = 20
|
|
27
|
+
|
|
28
|
+
print(f"Python: {platform.python_version()}")
|
|
29
|
+
print(f"NumPy: {ms.numpy.__version__}")
|
|
30
|
+
print(f"ModelStudio: {ms.__version__}")
|
|
31
|
+
print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
|
|
32
|
+
print(f"Shape: {shape} x {shape}")
|
|
33
|
+
print(f"Warmup: {warmup}")
|
|
34
|
+
print(f"Iterations: {iterations}")
|
|
35
|
+
|
|
36
|
+
if not ms.cuda.is_available():
|
|
37
|
+
print(ms.cuda.memory_summary())
|
|
38
|
+
print("Skipping CUDA matmul benchmark because CUDA tensor execution is not available.")
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
ms.manual_seed(123)
|
|
42
|
+
a = ms.randn(shape, device="cuda")
|
|
43
|
+
b = ms.randn(shape, device="cuda")
|
|
44
|
+
|
|
45
|
+
matmul_ms = _time_ms(lambda: a @ b, warmup, iterations, synchronize=True)
|
|
46
|
+
|
|
47
|
+
print(f"CUDA matmul avg: {matmul_ms:.3f} ms")
|
|
48
|
+
print(ms.cuda.memory_summary())
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
if __name__ == "__main__":
|
|
52
|
+
main()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import platform
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import modelstudio as ms
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _time_ms(fn, warmup: int, iterations: int) -> float:
|
|
10
|
+
for _ in range(warmup):
|
|
11
|
+
fn()
|
|
12
|
+
start = time.perf_counter()
|
|
13
|
+
for _ in range(iterations):
|
|
14
|
+
fn()
|
|
15
|
+
return (time.perf_counter() - start) * 1000.0 / iterations
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main() -> None:
|
|
19
|
+
shape = (1024, 1024)
|
|
20
|
+
warmup = 5
|
|
21
|
+
iterations = 50
|
|
22
|
+
ms.random.seed(123)
|
|
23
|
+
x = ms.random.randn(shape)
|
|
24
|
+
y = ms.random.randn(shape)
|
|
25
|
+
|
|
26
|
+
add_ms = _time_ms(lambda: x + y, warmup, iterations)
|
|
27
|
+
relu_ms = _time_ms(lambda: ms.relu(x), warmup, iterations)
|
|
28
|
+
cmp_ms = _time_ms(lambda: x > y, warmup, iterations)
|
|
29
|
+
|
|
30
|
+
print(f"Python: {platform.python_version()}")
|
|
31
|
+
print(f"NumPy: {ms.numpy.__version__}")
|
|
32
|
+
print(f"ModelStudio: {ms.__version__}")
|
|
33
|
+
print(f"Shape: {shape}")
|
|
34
|
+
print(f"Warmup: {warmup}")
|
|
35
|
+
print(f"Iterations: {iterations}")
|
|
36
|
+
print(f"Backend: {ms.backends.status()}")
|
|
37
|
+
print(f"add avg: {add_ms:.3f} ms")
|
|
38
|
+
print(f"relu avg: {relu_ms:.3f} ms")
|
|
39
|
+
print(f"compare avg: {cmp_ms:.3f} ms")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
main()
|