modelstudio 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelstudio-0.6.0/python/modelstudio.egg-info → modelstudio-0.7.0}/PKG-INFO +8 -3
- {modelstudio-0.6.0 → modelstudio-0.7.0}/README.md +7 -2
- modelstudio-0.7.0/benchmarks/bench_cuda_autograd.py +66 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_cuda_elementwise.py +1 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_cuda_matmul.py +1 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_context.cu +6 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_context.hpp +3 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/backend-status.md +14 -1
- modelstudio-0.7.0/docs/cuda.md +127 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/native-backend-roadmap.md +1 -1
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/releasing.md +40 -1
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/training.md +1 -1
- modelstudio-0.7.0/examples/cuda_autograd_demo.py +45 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/pyproject.toml +1 -1
- modelstudio-0.7.0/python/modelstudio/_version.py +1 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/cuda/__init__.py +2 -1
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/cuda/device.py +14 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0/python/modelstudio.egg-info}/PKG-INFO +8 -3
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio.egg-info/SOURCES.txt +7 -0
- modelstudio-0.7.0/scripts/cuda_release_check.py +166 -0
- modelstudio-0.7.0/scripts/cuda_source_build_check.py +218 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_availability.py +2 -0
- modelstudio-0.7.0/tests/test_cuda_matmul.py +46 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_memory.py +2 -1
- modelstudio-0.7.0/tests/test_cuda_optim.py +38 -0
- modelstudio-0.7.0/tests/test_cuda_reductions.py +45 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_public_exports.py +2 -2
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_version.py +1 -1
- modelstudio-0.6.0/docs/cuda.md +0 -53
- modelstudio-0.6.0/python/modelstudio/_version.py +0 -1
- {modelstudio-0.6.0 → modelstudio-0.7.0}/CMakeLists.txt +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/LICENSE +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/MANIFEST.in +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_attention.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_conv.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_creation.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_dataloader.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_dropout.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_elementwise.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_manipulation.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_matmul.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_mlp.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/benchmarks/bench_trace.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/CMakeLists.txt +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/cpu_backend.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/cpu_backend.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/kernels/add.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/kernels/matmul.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/kernels/mul.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cpu/kernels/relu.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/README.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_backend.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_backend.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_kernels.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_memory.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_memory.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_stream.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/cuda_stream.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/kernels/elementwise.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/kernels/matmul.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/cuda/kernels/reductions.cu +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/oneapi/README.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/oneapi/oneapi_backend.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/oneapi/oneapi_backend.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/oneapi/sycl_memory.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/rocm/README.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/rocm/hip_memory.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/rocm/rocm_backend.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/backends/rocm/rocm_backend.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/bindings/cuda_bindings.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/bindings/python_bindings.cpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/device.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/dtype.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/error.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/shape.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/storage.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/core/tensor.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/dispatcher/backend.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/dispatcher/dispatcher.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/csrc/dispatcher/operator_registry.hpp +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/autograd.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/backend-architecture.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/checkpointing.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/comparison-ops.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/data.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/functional-api.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/linalg.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/metrics.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/modules.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/nn.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/numpy-interop.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/optimizers.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/random.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/randomness.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/serialization.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/tensor-api.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/tensor-creation.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/tensor-manipulation.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/docs/tracing.md +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/backend_status.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/checkpoint_resume.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/checkpoint_training.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/cuda_mlp_demo.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/cuda_tensor_demo.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/dropout_batchnorm.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/functional_training.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/metrics_demo.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/numpy_interop.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/random_linalg_demo.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/save_load.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/scheduler_training.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/tiny_transformer.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/tracing_demo.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/train_classifier.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/train_cnn_toy.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/examples/train_mlp.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/autograd/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/autograd/engine.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/autograd/function.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/autograd/grad_mode.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/backends/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/backends/cuda.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/backends/status.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/compile/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/compile/graph_capture.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/compile/ir.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/compile/passes.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/cuda/memory.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/cuda/streams.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/data/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/data/dataloader.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/data/dataset.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/device.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/dtypes.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/errors.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/interop.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/linalg.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/metrics/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/metrics/classification.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/activations.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/convolution.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/embedding.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/functional.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/init.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/linear.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/losses.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/module.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/normalization.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/parameter.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/pooling.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/transformer.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/nn/utils.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/comparison.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/creation.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/linalg.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/math.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/movement.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/ops/reductions.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/optim/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/optim/adamw.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/optim/lr_scheduler.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/optim/optimizer.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/optim/sgd.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/py.typed +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/random.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/runtime/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/runtime/backend.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/runtime/dispatcher.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/serialization.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/storage.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/tensor.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/testing/__init__.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio/testing/gradcheck.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio.egg-info/dependency_links.txt +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio.egg-info/requires.txt +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/python/modelstudio.egg-info/top_level.txt +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/scripts/smoke_test.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/setup.cfg +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_activations_more.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_attention.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_autograd.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_backend_status.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_batchnorm.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_buffers.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_checkpoint_helpers.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_clone_copy.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_comparison_ops.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_concat_stack.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_conv.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_creation_more.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_autograd.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_nn.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_ops.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_cuda_tensor.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_data.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_data_split.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_dataloader_seed.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_dispatcher.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_dropout.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_dtype_conversion.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_embedding.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_functional.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_grad_clip.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_gradcheck.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_indexing.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_indexing_assignment.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_init.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_linalg.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_loss_reductions.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_losses.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_lr_scheduler.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_manipulation_ops.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_metrics.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_module_ergonomics.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_native_cpu_mode.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_nn.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_norms.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_numpy_interop.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_ops.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_optim.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_optimizer_param_groups.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_optimizer_state.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_pooling.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_random.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_random_namespace.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_reductions_axis.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_scalar_behavior.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_serialization.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_serialization_hardening.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_shape_ops.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_state_dict.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_tensor.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_trace.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_transformer.py +0 -0
- {modelstudio-0.6.0 → modelstudio-0.7.0}/tests/test_unary_ops.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelstudio
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
|
|
5
5
|
Author: ModelStudio Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
|
|
32
32
|
# ModelStudio
|
|
33
33
|
|
|
34
|
-
ModelStudio is an early-stage AI tensor framework. Version `0.
|
|
34
|
+
ModelStudio is an early-stage AI tensor framework. Version `0.7.0` provides a
|
|
35
35
|
CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
|
|
36
36
|
data loading, graph tracing metadata, backend status inspection, a public CUDA
|
|
37
37
|
availability namespace, and small LLM-oriented building blocks.
|
|
@@ -74,7 +74,7 @@ python -m pip install -e ".[dev]"
|
|
|
74
74
|
| Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
|
|
75
75
|
| Metrics | accuracy and top-k accuracy |
|
|
76
76
|
| Compiler | Metadata-only tracing plus placeholder IR and passes |
|
|
77
|
-
| CUDA API | Availability, device-count, sync,
|
|
77
|
+
| CUDA API | Availability, device-count/name, sync, memory-status facade, and release-machine validation scripts; tensor execution is not implemented in the CPU wheel |
|
|
78
78
|
|
|
79
79
|
## Architecture
|
|
80
80
|
|
|
@@ -123,6 +123,7 @@ CUDA availability can also be checked through the public namespace:
|
|
|
123
123
|
```python
|
|
124
124
|
print(ms.cuda.is_available())
|
|
125
125
|
print(ms.cuda.device_count())
|
|
126
|
+
print(ms.cuda.device_name())
|
|
126
127
|
print(ms.cuda.memory_summary())
|
|
127
128
|
```
|
|
128
129
|
|
|
@@ -226,6 +227,7 @@ python examples/functional_training.py
|
|
|
226
227
|
python examples/random_linalg_demo.py
|
|
227
228
|
python examples/cuda_tensor_demo.py
|
|
228
229
|
python examples/cuda_mlp_demo.py
|
|
230
|
+
python examples/cuda_autograd_demo.py
|
|
229
231
|
python benchmarks/bench_matmul.py
|
|
230
232
|
python benchmarks/bench_mlp.py
|
|
231
233
|
python benchmarks/bench_attention.py
|
|
@@ -238,6 +240,9 @@ python benchmarks/bench_elementwise.py
|
|
|
238
240
|
python benchmarks/bench_trace.py
|
|
239
241
|
python benchmarks/bench_cuda_elementwise.py
|
|
240
242
|
python benchmarks/bench_cuda_matmul.py
|
|
243
|
+
python benchmarks/bench_cuda_autograd.py
|
|
244
|
+
python scripts/cuda_release_check.py
|
|
245
|
+
python scripts/cuda_source_build_check.py
|
|
241
246
|
```
|
|
242
247
|
|
|
243
248
|
## Documentation
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ModelStudio
|
|
2
2
|
|
|
3
|
-
ModelStudio is an early-stage AI tensor framework. Version `0.
|
|
3
|
+
ModelStudio is an early-stage AI tensor framework. Version `0.7.0` provides a
|
|
4
4
|
CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
|
|
5
5
|
data loading, graph tracing metadata, backend status inspection, a public CUDA
|
|
6
6
|
availability namespace, and small LLM-oriented building blocks.
|
|
@@ -43,7 +43,7 @@ python -m pip install -e ".[dev]"
|
|
|
43
43
|
| Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
|
|
44
44
|
| Metrics | accuracy and top-k accuracy |
|
|
45
45
|
| Compiler | Metadata-only tracing plus placeholder IR and passes |
|
|
46
|
-
| CUDA API | Availability, device-count, sync,
|
|
46
|
+
| CUDA API | Availability, device-count/name, sync, memory-status facade, and release-machine validation scripts; tensor execution is not implemented in the CPU wheel |
|
|
47
47
|
|
|
48
48
|
## Architecture
|
|
49
49
|
|
|
@@ -92,6 +92,7 @@ CUDA availability can also be checked through the public namespace:
|
|
|
92
92
|
```python
|
|
93
93
|
print(ms.cuda.is_available())
|
|
94
94
|
print(ms.cuda.device_count())
|
|
95
|
+
print(ms.cuda.device_name())
|
|
95
96
|
print(ms.cuda.memory_summary())
|
|
96
97
|
```
|
|
97
98
|
|
|
@@ -195,6 +196,7 @@ python examples/functional_training.py
|
|
|
195
196
|
python examples/random_linalg_demo.py
|
|
196
197
|
python examples/cuda_tensor_demo.py
|
|
197
198
|
python examples/cuda_mlp_demo.py
|
|
199
|
+
python examples/cuda_autograd_demo.py
|
|
198
200
|
python benchmarks/bench_matmul.py
|
|
199
201
|
python benchmarks/bench_mlp.py
|
|
200
202
|
python benchmarks/bench_attention.py
|
|
@@ -207,6 +209,9 @@ python benchmarks/bench_elementwise.py
|
|
|
207
209
|
python benchmarks/bench_trace.py
|
|
208
210
|
python benchmarks/bench_cuda_elementwise.py
|
|
209
211
|
python benchmarks/bench_cuda_matmul.py
|
|
212
|
+
python benchmarks/bench_cuda_autograd.py
|
|
213
|
+
python scripts/cuda_release_check.py
|
|
214
|
+
python scripts/cuda_source_build_check.py
|
|
210
215
|
```
|
|
211
216
|
|
|
212
217
|
## Documentation
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import platform
|
|
4
|
+
import time
|
|
5
|
+
from collections.abc import Callable
|
|
6
|
+
|
|
7
|
+
import modelstudio as ms
|
|
8
|
+
from modelstudio import nn
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _time_ms(fn: Callable[[], object], warmup: int, iterations: int, *, synchronize: bool) -> float:
|
|
12
|
+
for _ in range(warmup):
|
|
13
|
+
fn()
|
|
14
|
+
if synchronize:
|
|
15
|
+
ms.cuda.synchronize()
|
|
16
|
+
start = time.perf_counter()
|
|
17
|
+
for _ in range(iterations):
|
|
18
|
+
fn()
|
|
19
|
+
if synchronize:
|
|
20
|
+
ms.cuda.synchronize()
|
|
21
|
+
return (time.perf_counter() - start) * 1000.0 / iterations
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def main() -> None:
|
|
25
|
+
batch = 32
|
|
26
|
+
in_features = 128
|
|
27
|
+
out_features = 64
|
|
28
|
+
warmup = 3
|
|
29
|
+
iterations = 20
|
|
30
|
+
|
|
31
|
+
print(f"Python: {platform.python_version()}")
|
|
32
|
+
print(f"NumPy: {ms.numpy.__version__}")
|
|
33
|
+
print(f"ModelStudio: {ms.__version__}")
|
|
34
|
+
print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
|
|
35
|
+
print(f"GPU: {ms.cuda.device_name() or 'unavailable'}")
|
|
36
|
+
print(f"Shape: batch={batch} in={in_features} out={out_features}")
|
|
37
|
+
print(f"Warmup: {warmup}")
|
|
38
|
+
print(f"Iterations: {iterations}")
|
|
39
|
+
|
|
40
|
+
if not ms.cuda.is_available():
|
|
41
|
+
print(ms.cuda.memory_summary())
|
|
42
|
+
print("Skipping CUDA autograd benchmark because CUDA tensor execution is not available.")
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
ms.manual_seed(123)
|
|
46
|
+
model = nn.Linear(in_features, out_features).to("cuda")
|
|
47
|
+
optimizer = ms.optim.SGD(model.parameters(), lr=1e-3)
|
|
48
|
+
x = ms.randn((batch, in_features), device="cuda")
|
|
49
|
+
target = ms.randn((batch, out_features), device="cuda")
|
|
50
|
+
|
|
51
|
+
def step() -> ms.Tensor:
|
|
52
|
+
pred = model(x)
|
|
53
|
+
loss = ms.mse_loss(pred, target)
|
|
54
|
+
optimizer.zero_grad()
|
|
55
|
+
loss.backward()
|
|
56
|
+
optimizer.step()
|
|
57
|
+
return loss
|
|
58
|
+
|
|
59
|
+
autograd_ms = _time_ms(step, warmup, iterations, synchronize=True)
|
|
60
|
+
|
|
61
|
+
print(f"CUDA forward/backward/update avg: {autograd_ms:.3f} ms")
|
|
62
|
+
print(ms.cuda.memory_summary())
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__":
|
|
66
|
+
main()
|
|
@@ -29,6 +29,7 @@ def main() -> None:
|
|
|
29
29
|
print(f"NumPy: {ms.numpy.__version__}")
|
|
30
30
|
print(f"ModelStudio: {ms.__version__}")
|
|
31
31
|
print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
|
|
32
|
+
print(f"GPU: {ms.cuda.device_name() or 'unavailable'}")
|
|
32
33
|
print(f"Shape: {shape}")
|
|
33
34
|
print(f"Warmup: {warmup}")
|
|
34
35
|
print(f"Iterations: {iterations}")
|
|
@@ -29,6 +29,7 @@ def main() -> None:
|
|
|
29
29
|
print(f"NumPy: {ms.numpy.__version__}")
|
|
30
30
|
print(f"ModelStudio: {ms.__version__}")
|
|
31
31
|
print(f"CUDA: available={ms.cuda.is_available()} device_count={ms.cuda.device_count()}")
|
|
32
|
+
print(f"GPU: {ms.cuda.device_name() or 'unavailable'}")
|
|
32
33
|
print(f"Shape: {shape} x {shape}")
|
|
33
34
|
print(f"Warmup: {warmup}")
|
|
34
35
|
print(f"Iterations: {iterations}")
|
|
@@ -30,6 +30,12 @@ int current_device() {
|
|
|
30
30
|
return device;
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
std::string device_name(int index) {
|
|
34
|
+
cudaDeviceProp properties{};
|
|
35
|
+
check_cuda(cudaGetDeviceProperties(&properties, index), "cudaGetDeviceProperties");
|
|
36
|
+
return std::string(properties.name);
|
|
37
|
+
}
|
|
38
|
+
|
|
33
39
|
void set_device(int index) {
|
|
34
40
|
check_cuda(cudaSetDevice(index), "cudaSetDevice");
|
|
35
41
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Backend Status
|
|
2
2
|
|
|
3
|
-
ModelStudio 0.
|
|
3
|
+
ModelStudio 0.7.0 keeps CPU as the only available runtime backend in the
|
|
4
4
|
default PyPI package. It also exposes a public CUDA status namespace so users
|
|
5
5
|
can check accelerator availability without importing optional native artifacts.
|
|
6
6
|
|
|
@@ -31,6 +31,19 @@ The CUDA entry includes these fields:
|
|
|
31
31
|
device. `built` only reports whether the optional CUDA extension can be
|
|
32
32
|
imported. In the CPU-only package both are false.
|
|
33
33
|
|
|
34
|
+
The `ms.cuda` namespace provides safe availability probes:
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
ms.cuda.is_available()
|
|
38
|
+
ms.cuda.device_count()
|
|
39
|
+
ms.cuda.device_name()
|
|
40
|
+
ms.cuda.memory_allocated()
|
|
41
|
+
ms.cuda.memory_summary()
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
`device_name()` returns `None` when CUDA is unavailable or when an optional
|
|
45
|
+
backend does not expose a device-name binding.
|
|
46
|
+
|
|
34
47
|
`ms.backends.native_cpu_available()` checks for the optional future native CPU
|
|
35
48
|
extension. `ms.backends.use_native_cpu(True)` raises
|
|
36
49
|
`ModelStudioBackendUnavailable` unless that extension is installed. The NumPy
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# CUDA Status
|
|
2
|
+
|
|
3
|
+
ModelStudio 0.7.0 includes a public CUDA availability namespace and native scaffold,
|
|
4
|
+
but the default `modelstudio` PyPI package remains CPU-only.
|
|
5
|
+
|
|
6
|
+
```python
|
|
7
|
+
import modelstudio as ms
|
|
8
|
+
|
|
9
|
+
print(ms.cuda.is_available())
|
|
10
|
+
print(ms.cuda.device_count())
|
|
11
|
+
print(ms.cuda.device_name())
|
|
12
|
+
print(ms.cuda.current_device())
|
|
13
|
+
print(ms.cuda.memory_summary())
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
In the CPU-only package:
|
|
17
|
+
|
|
18
|
+
- `ms.cuda.is_available()` returns `False`.
|
|
19
|
+
- `ms.cuda.device_count()` returns `0`.
|
|
20
|
+
- `ms.cuda.device_name()` returns `None`.
|
|
21
|
+
- `ms.cuda.memory_allocated()` returns `0`.
|
|
22
|
+
- `ms.cuda.memory_summary()` explains why CUDA is unavailable.
|
|
23
|
+
- `ms.cuda.set_device(...)` and `ms.cuda.synchronize()` raise
|
|
24
|
+
`ModelStudioBackendUnavailable`.
|
|
25
|
+
- Creating tensors with `device="cuda"` raises `ModelStudioBackendUnavailable`.
|
|
26
|
+
|
|
27
|
+
The error is intentional:
|
|
28
|
+
|
|
29
|
+
```text
|
|
30
|
+
CUDA backend is not built. Install modelstudio-cuda or build with MODELSTUDIO_ENABLE_CUDA=ON.
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Native Scaffold
|
|
34
|
+
|
|
35
|
+
The `csrc/backends/cuda` tree includes conditional CMake wiring, CUDA context
|
|
36
|
+
helpers, memory and stream placeholders, and initial kernel entry points for:
|
|
37
|
+
|
|
38
|
+
- elementwise arithmetic
|
|
39
|
+
- reductions
|
|
40
|
+
- matrix multiplication
|
|
41
|
+
|
|
42
|
+
Those files are extension points, not a CUDA execution claim. A future CUDA
|
|
43
|
+
package must add device tensor storage, real kernels, Python bindings, packaging,
|
|
44
|
+
and hardware-backed tests before CUDA tensor execution can be advertised.
|
|
45
|
+
|
|
46
|
+
## Build Option
|
|
47
|
+
|
|
48
|
+
The top-level CMake option is:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
cmake -S . -B build-cuda -DMODELSTUDIO_ENABLE_CUDA=ON
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
If the CUDA compiler or toolkit is missing, configuration fails clearly instead
|
|
55
|
+
of silently producing a CPU-only build that looks CUDA-capable.
|
|
56
|
+
|
|
57
|
+
On Windows source checkouts, the helper below bootstraps missing Python-side
|
|
58
|
+
build tools such as the CMake wheel, runs CMake, and treats skipped CUDA tests
|
|
59
|
+
as a failure:
|
|
60
|
+
|
|
61
|
+
```powershell
|
|
62
|
+
python scripts/cuda_source_build_check.py
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
If this fails with a C++ compiler or generator error, install Visual Studio
|
|
66
|
+
Build Tools with the C++ workload and rerun it from a shell where `nvcc --version`
|
|
67
|
+
works.
|
|
68
|
+
|
|
69
|
+
## Phase 7 Release Gate
|
|
70
|
+
|
|
71
|
+
The next CUDA execution release must be prepared on a machine where all of
|
|
72
|
+
these commands work:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
nvidia-smi
|
|
76
|
+
nvcc --version
|
|
77
|
+
cmake -S . -B build-cuda -DMODELSTUDIO_ENABLE_CUDA=ON
|
|
78
|
+
cmake --build build-cuda --config Release
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Do not bump the package to `0.7.0`, create `v0.7.0`, or publish PyPI artifacts
|
|
82
|
+
unless the CUDA tests run instead of skipping:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
python -m pytest \
|
|
86
|
+
tests/test_cuda_availability.py \
|
|
87
|
+
tests/test_cuda_tensor.py \
|
|
88
|
+
tests/test_cuda_ops.py \
|
|
89
|
+
tests/test_cuda_reductions.py \
|
|
90
|
+
tests/test_cuda_matmul.py \
|
|
91
|
+
tests/test_cuda_autograd.py \
|
|
92
|
+
tests/test_cuda_optim.py \
|
|
93
|
+
tests/test_cuda_nn.py \
|
|
94
|
+
tests/test_cuda_memory.py
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
CPU-only machines may still run the same tests, but CUDA execution tests should
|
|
98
|
+
skip with `CUDA unavailable`. That is useful development feedback, not release
|
|
99
|
+
evidence for CUDA execution.
|
|
100
|
+
|
|
101
|
+
For the same gate in one command from a source checkout:
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
python scripts/cuda_source_build_check.py
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## CUDA Examples And Benchmarks
|
|
108
|
+
|
|
109
|
+
The CUDA scripts are safe to run on CPU-only installs:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
python examples/cuda_tensor_demo.py
|
|
113
|
+
python examples/cuda_mlp_demo.py
|
|
114
|
+
python examples/cuda_autograd_demo.py
|
|
115
|
+
python benchmarks/bench_cuda_elementwise.py
|
|
116
|
+
python benchmarks/bench_cuda_matmul.py
|
|
117
|
+
python benchmarks/bench_cuda_autograd.py
|
|
118
|
+
python scripts/cuda_release_check.py
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
On CPU-only installs they print the unavailable reason and exit with status 0.
|
|
122
|
+
On a CUDA build they should perform real CUDA tensor computation and synchronize
|
|
123
|
+
around timed benchmark regions.
|
|
124
|
+
|
|
125
|
+
`scripts/cuda_release_check.py` is stricter than the demos and benchmarks. It is
|
|
126
|
+
intended for a CUDA release machine and exits non-zero when CUDA is unavailable
|
|
127
|
+
or CUDA tensor execution fails.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Native Backend Roadmap
|
|
2
2
|
|
|
3
|
-
The Python runtime still uses NumPy CPU kernels in 0.
|
|
3
|
+
The Python runtime still uses NumPy CPU kernels in 0.7.0. The native C++ tree is
|
|
4
4
|
scaffolding for future backend work and is intentionally not wired into Python
|
|
5
5
|
dispatch yet.
|
|
6
6
|
|
|
@@ -17,10 +17,49 @@ Run the full local gate:
|
|
|
17
17
|
```bash
|
|
18
18
|
python -m ruff check .
|
|
19
19
|
python -m pytest
|
|
20
|
-
python examples/train_mlp.py
|
|
21
20
|
python scripts/smoke_test.py
|
|
21
|
+
python examples/backend_status.py
|
|
22
|
+
python examples/train_mlp.py
|
|
23
|
+
python examples/cuda_tensor_demo.py
|
|
24
|
+
python examples/cuda_mlp_demo.py
|
|
25
|
+
python examples/cuda_autograd_demo.py
|
|
22
26
|
python benchmarks/bench_matmul.py
|
|
23
27
|
python benchmarks/bench_mlp.py
|
|
28
|
+
python benchmarks/bench_cuda_elementwise.py
|
|
29
|
+
python benchmarks/bench_cuda_matmul.py
|
|
30
|
+
python benchmarks/bench_cuda_autograd.py
|
|
31
|
+
cmake -S . -B build-native
|
|
32
|
+
cmake --build build-native --config Release
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## CUDA Execution Release Gate
|
|
36
|
+
|
|
37
|
+
Do not publish a CUDA execution release from a CPU-only machine. Before bumping
|
|
38
|
+
to a release that claims CUDA execution, verify:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
nvidia-smi
|
|
42
|
+
nvcc --version
|
|
43
|
+
cmake -S . -B build-cuda -DMODELSTUDIO_ENABLE_CUDA=ON
|
|
44
|
+
cmake --build build-cuda --config Release
|
|
45
|
+
python -m pytest tests/test_cuda_availability.py tests/test_cuda_tensor.py tests/test_cuda_ops.py tests/test_cuda_reductions.py tests/test_cuda_matmul.py tests/test_cuda_autograd.py tests/test_cuda_optim.py tests/test_cuda_nn.py tests/test_cuda_memory.py
|
|
46
|
+
python examples/cuda_tensor_demo.py
|
|
47
|
+
python examples/cuda_mlp_demo.py
|
|
48
|
+
python examples/cuda_autograd_demo.py
|
|
49
|
+
python benchmarks/bench_cuda_elementwise.py
|
|
50
|
+
python benchmarks/bench_cuda_matmul.py
|
|
51
|
+
python benchmarks/bench_cuda_autograd.py
|
|
52
|
+
python scripts/cuda_release_check.py
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
On the CUDA release machine those tests must run and pass. If they skip because
|
|
56
|
+
CUDA is unavailable, do not tag or publish the CUDA release.
|
|
57
|
+
|
|
58
|
+
From a source checkout, this one-command gate also bootstraps missing
|
|
59
|
+
Python-side build tools such as the CMake wheel and fails if CUDA tests skip:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
python scripts/cuda_source_build_check.py
|
|
24
63
|
```
|
|
25
64
|
|
|
26
65
|
## Build Distributions
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import modelstudio as ms
|
|
4
|
+
from modelstudio import nn
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main() -> None:
|
|
8
|
+
print(f"ModelStudio: {ms.__version__}")
|
|
9
|
+
print(f"CUDA available: {ms.cuda.is_available()}")
|
|
10
|
+
print(f"CUDA device count: {ms.cuda.device_count()}")
|
|
11
|
+
|
|
12
|
+
if not ms.cuda.is_available():
|
|
13
|
+
print(ms.cuda.memory_summary())
|
|
14
|
+
print("Skipping CUDA autograd demo because CUDA tensor execution is not available.")
|
|
15
|
+
return
|
|
16
|
+
|
|
17
|
+
ms.manual_seed(123)
|
|
18
|
+
x_cpu = ms.randn((4, 3), requires_grad=True)
|
|
19
|
+
w_cpu = ms.randn((3, 2), requires_grad=True)
|
|
20
|
+
x = x_cpu.to("cuda")
|
|
21
|
+
w = w_cpu.to("cuda")
|
|
22
|
+
|
|
23
|
+
loss = ms.gelu(x @ w).mean()
|
|
24
|
+
loss.backward()
|
|
25
|
+
ms.cuda.synchronize()
|
|
26
|
+
|
|
27
|
+
print(f"loss={loss.item():.6f}")
|
|
28
|
+
print(f"x.grad.device={x.grad.device if x.grad is not None else None}")
|
|
29
|
+
print(f"w.grad.device={w.grad.device if w.grad is not None else None}")
|
|
30
|
+
|
|
31
|
+
model = nn.Linear(3, 2).to("cuda")
|
|
32
|
+
optimizer = ms.optim.SGD(model.parameters(), lr=1e-2)
|
|
33
|
+
target = ms.randn((4, 2), device="cuda")
|
|
34
|
+
train_loss = ms.mse_loss(model(x.detach()), target)
|
|
35
|
+
optimizer.zero_grad()
|
|
36
|
+
train_loss.backward()
|
|
37
|
+
optimizer.step()
|
|
38
|
+
ms.cuda.synchronize()
|
|
39
|
+
|
|
40
|
+
print(f"train_loss={train_loss.item():.6f}")
|
|
41
|
+
print(ms.cuda.memory_summary())
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
main()
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "modelstudio"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.7.0"
|
|
8
8
|
description = "An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.7.0"
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
from modelstudio.cuda.device import current_device, device_count, is_available, set_device
|
|
1
|
+
from modelstudio.cuda.device import current_device, device_count, device_name, is_available, set_device
|
|
2
2
|
from modelstudio.cuda.memory import memory_allocated, memory_summary
|
|
3
3
|
from modelstudio.cuda.streams import synchronize
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
6
|
"current_device",
|
|
7
7
|
"device_count",
|
|
8
|
+
"device_name",
|
|
8
9
|
"is_available",
|
|
9
10
|
"memory_allocated",
|
|
10
11
|
"memory_summary",
|
|
@@ -17,6 +17,20 @@ def device_count() -> int:
|
|
|
17
17
|
return cuda_backend.device_count()
|
|
18
18
|
|
|
19
19
|
|
|
20
|
+
def device_name(index: int = 0) -> str | None:
|
|
21
|
+
"""Return the CUDA device name when the optional backend exposes it."""
|
|
22
|
+
|
|
23
|
+
if not is_available():
|
|
24
|
+
return None
|
|
25
|
+
normalized = int(index)
|
|
26
|
+
if normalized < 0 or normalized >= device_count():
|
|
27
|
+
from modelstudio.errors import ModelStudioDeviceError
|
|
28
|
+
|
|
29
|
+
raise ModelStudioDeviceError(f"CUDA device index {normalized} is out of range")
|
|
30
|
+
name = cuda_backend.call_optional("device_name", None, normalized)
|
|
31
|
+
return None if name is None else str(name)
|
|
32
|
+
|
|
33
|
+
|
|
20
34
|
def current_device() -> int:
|
|
21
35
|
"""Return the selected CUDA device index.
|
|
22
36
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelstudio
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: An early-stage AI tensor framework with CPU tensors, autograd, and backend extension scaffolding.
|
|
5
5
|
Author: ModelStudio Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
|
|
32
32
|
# ModelStudio
|
|
33
33
|
|
|
34
|
-
ModelStudio is an early-stage AI tensor framework. Version `0.
|
|
34
|
+
ModelStudio is an early-stage AI tensor framework. Version `0.7.0` provides a
|
|
35
35
|
CPU tensor/autograd MVP with neural-network modules, optimizers, serialization,
|
|
36
36
|
data loading, graph tracing metadata, backend status inspection, a public CUDA
|
|
37
37
|
availability namespace, and small LLM-oriented building blocks.
|
|
@@ -74,7 +74,7 @@ python -m pip install -e ".[dev]"
|
|
|
74
74
|
| Interop | `asarray`, `from_numpy`, `to_numpy`, and `ms.numpy` |
|
|
75
75
|
| Metrics | accuracy and top-k accuracy |
|
|
76
76
|
| Compiler | Metadata-only tracing plus placeholder IR and passes |
|
|
77
|
-
| CUDA API | Availability, device-count, sync,
|
|
77
|
+
| CUDA API | Availability, device-count/name, sync, memory-status facade, and release-machine validation scripts; tensor execution is not implemented in the CPU wheel |
|
|
78
78
|
|
|
79
79
|
## Architecture
|
|
80
80
|
|
|
@@ -123,6 +123,7 @@ CUDA availability can also be checked through the public namespace:
|
|
|
123
123
|
```python
|
|
124
124
|
print(ms.cuda.is_available())
|
|
125
125
|
print(ms.cuda.device_count())
|
|
126
|
+
print(ms.cuda.device_name())
|
|
126
127
|
print(ms.cuda.memory_summary())
|
|
127
128
|
```
|
|
128
129
|
|
|
@@ -226,6 +227,7 @@ python examples/functional_training.py
|
|
|
226
227
|
python examples/random_linalg_demo.py
|
|
227
228
|
python examples/cuda_tensor_demo.py
|
|
228
229
|
python examples/cuda_mlp_demo.py
|
|
230
|
+
python examples/cuda_autograd_demo.py
|
|
229
231
|
python benchmarks/bench_matmul.py
|
|
230
232
|
python benchmarks/bench_mlp.py
|
|
231
233
|
python benchmarks/bench_attention.py
|
|
@@ -238,6 +240,9 @@ python benchmarks/bench_elementwise.py
|
|
|
238
240
|
python benchmarks/bench_trace.py
|
|
239
241
|
python benchmarks/bench_cuda_elementwise.py
|
|
240
242
|
python benchmarks/bench_cuda_matmul.py
|
|
243
|
+
python benchmarks/bench_cuda_autograd.py
|
|
244
|
+
python scripts/cuda_release_check.py
|
|
245
|
+
python scripts/cuda_source_build_check.py
|
|
241
246
|
```
|
|
242
247
|
|
|
243
248
|
## Documentation
|
|
@@ -6,6 +6,7 @@ pyproject.toml
|
|
|
6
6
|
benchmarks/bench_attention.py
|
|
7
7
|
benchmarks/bench_conv.py
|
|
8
8
|
benchmarks/bench_creation.py
|
|
9
|
+
benchmarks/bench_cuda_autograd.py
|
|
9
10
|
benchmarks/bench_cuda_elementwise.py
|
|
10
11
|
benchmarks/bench_cuda_matmul.py
|
|
11
12
|
benchmarks/bench_dataloader.py
|
|
@@ -81,6 +82,7 @@ docs/training.md
|
|
|
81
82
|
examples/backend_status.py
|
|
82
83
|
examples/checkpoint_resume.py
|
|
83
84
|
examples/checkpoint_training.py
|
|
85
|
+
examples/cuda_autograd_demo.py
|
|
84
86
|
examples/cuda_mlp_demo.py
|
|
85
87
|
examples/cuda_tensor_demo.py
|
|
86
88
|
examples/dropout_batchnorm.py
|
|
@@ -163,6 +165,8 @@ python/modelstudio/runtime/backend.py
|
|
|
163
165
|
python/modelstudio/runtime/dispatcher.py
|
|
164
166
|
python/modelstudio/testing/__init__.py
|
|
165
167
|
python/modelstudio/testing/gradcheck.py
|
|
168
|
+
scripts/cuda_release_check.py
|
|
169
|
+
scripts/cuda_source_build_check.py
|
|
166
170
|
scripts/smoke_test.py
|
|
167
171
|
tests/test_activations_more.py
|
|
168
172
|
tests/test_attention.py
|
|
@@ -178,9 +182,12 @@ tests/test_conv.py
|
|
|
178
182
|
tests/test_creation_more.py
|
|
179
183
|
tests/test_cuda_autograd.py
|
|
180
184
|
tests/test_cuda_availability.py
|
|
185
|
+
tests/test_cuda_matmul.py
|
|
181
186
|
tests/test_cuda_memory.py
|
|
182
187
|
tests/test_cuda_nn.py
|
|
183
188
|
tests/test_cuda_ops.py
|
|
189
|
+
tests/test_cuda_optim.py
|
|
190
|
+
tests/test_cuda_reductions.py
|
|
184
191
|
tests/test_cuda_tensor.py
|
|
185
192
|
tests/test_data.py
|
|
186
193
|
tests/test_data_split.py
|