megatron-core 0.16.0rc0.dev129397__tar.gz → 0.16.0rc0.dev131152__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/PKG-INFO +1 -1
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/dynamic_context.py +14 -7
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/engines/dynamic_engine.py +15 -1
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +2 -1
- megatron_core-0.16.0rc0.dev131152/megatron/core/inference/unified_memory.py +127 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/package_info.py +1 -1
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron_core.egg-info/PKG-INFO +1 -1
- megatron_core-0.16.0rc0.dev129397/megatron/core/inference/unified_memory.py +0 -89
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/MANIFEST.in +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/README.md +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/README.md +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/activations.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/megatron_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/checkpointable.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/package_info.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/reduce_scatter_with_fp32_accumulation.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/extensions/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fp4_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/full_cuda_graph.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/fusions/fused_weighted_squared_relu.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/hyper_comm_grid.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/dynamic_block_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/fused_kv_append_kernel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/data_parallel_inference_coordinator.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/headers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/inference_client.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/endpoints/common.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/endpoints/completions.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/run_mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/text_generation_server.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/text_generation_server/tokenization.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/jit.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/model_chunk_schedule_plan.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/nccl_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/bridge_communicator.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/combined_1f1b.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/safe_globals.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/timers.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/base_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/megatron_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/abstract_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/bytelevel_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/chat_template.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/megatron_hf_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/null_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/sentencepiece_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/libraries/tiktoken_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/bert_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/default_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/gpt_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/mamba_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/retro_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/models/t5_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/text_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/tokenizers/text/utils/build_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/cuda_graphs.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/fsdp_dtensor_checkpoint.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/heterogeneous/linear_replacements.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/pipeline_parallel_layer_layout.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron_core.egg-info/requires.txt +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/pyproject.toml +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/setup.cfg +0 -0
- {megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev131152
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -16,7 +16,10 @@ from megatron.core.inference.inference_request import DynamicInferenceRequest
|
|
|
16
16
|
from megatron.core.inference.model_inference_wrappers.inference_wrapper_config import (
|
|
17
17
|
InferenceWrapperConfig,
|
|
18
18
|
)
|
|
19
|
-
from megatron.core.inference.unified_memory import
|
|
19
|
+
from megatron.core.inference.unified_memory import (
|
|
20
|
+
UnifiedMemoryUnsupportedError,
|
|
21
|
+
create_unified_mempool,
|
|
22
|
+
)
|
|
20
23
|
from megatron.core.inference.utils import tensor_swap
|
|
21
24
|
from megatron.core.models.common.embeddings.rope_utils import apply_rotary_pos_emb
|
|
22
25
|
from megatron.core.package_info import __version__ as mcore_version
|
|
@@ -322,16 +325,20 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
322
325
|
self.params_dtype = params_dtype
|
|
323
326
|
self.num_layers = num_layers
|
|
324
327
|
self.max_sequence_length = max_sequence_length
|
|
328
|
+
|
|
329
|
+
# Unified memory.
|
|
325
330
|
self.unified_memory_level = unified_memory_level
|
|
326
331
|
if unified_memory_level > 0:
|
|
327
|
-
|
|
328
|
-
warnings.warn(
|
|
329
|
-
"Unified memory requested but not available; defaulting to GPU memory."
|
|
330
|
-
)
|
|
331
|
-
self.unified_memory_level = 0
|
|
332
|
-
else:
|
|
332
|
+
try:
|
|
333
333
|
self.unified_memory_mempool = create_unified_mempool()
|
|
334
|
+
except UnifiedMemoryUnsupportedError:
|
|
335
|
+
if torch.distributed.get_rank() == 0:
|
|
336
|
+
warnings.warn(
|
|
337
|
+
"Unified memory requested but not available; defaulting to GPU memory."
|
|
338
|
+
)
|
|
339
|
+
self.unified_memory_level = 0
|
|
334
340
|
|
|
341
|
+
# Request and token counts.
|
|
335
342
|
self.total_request_count = 0
|
|
336
343
|
self.active_token_count = 0
|
|
337
344
|
self.paused_request_count = 0
|
|
@@ -165,6 +165,17 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
165
165
|
context = self.context
|
|
166
166
|
controller = self.controller
|
|
167
167
|
|
|
168
|
+
config = controller.inference_wrapped_model.inference_wrapper_config
|
|
169
|
+
moe_pad_experts = config.moe_pad_experts_for_cuda_graph_inference
|
|
170
|
+
|
|
171
|
+
if moe_pad_experts and context.non_decode_cuda_graphs:
|
|
172
|
+
context.non_decode_cuda_graphs = False
|
|
173
|
+
if torch.distributed.get_rank() == 0:
|
|
174
|
+
warnings.warn(
|
|
175
|
+
"MoE models do not support non-decode cuda graphs. "
|
|
176
|
+
"Forcing non_decode_cuda_graphs to False."
|
|
177
|
+
)
|
|
178
|
+
|
|
168
179
|
time_start = time.time()
|
|
169
180
|
mem_stats_start = torch.cuda.memory_stats()
|
|
170
181
|
|
|
@@ -174,15 +185,18 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
174
185
|
context.cuda_graph_token_counts,
|
|
175
186
|
)
|
|
176
187
|
for warmup_engine_mode in [WarmupEngineMode.DECODE, WarmupEngineMode.NON_DECODE]:
|
|
177
|
-
#
|
|
188
|
+
# Check whether to skip non-decode graphs.
|
|
178
189
|
if (
|
|
179
190
|
warmup_engine_mode == WarmupEngineMode.NON_DECODE
|
|
180
191
|
and not context.non_decode_cuda_graphs
|
|
181
192
|
):
|
|
182
193
|
continue
|
|
194
|
+
|
|
183
195
|
tbar = enumerate(context.cuda_graph_token_counts)
|
|
184
196
|
if HAVE_TQDM:
|
|
185
197
|
tbar = tqdm(tbar, total=len(context.cuda_graph_token_counts))
|
|
198
|
+
|
|
199
|
+
# Iterate cuda graph dims.
|
|
186
200
|
for tbar_idx, cuda_graph_token_count in tbar:
|
|
187
201
|
if (
|
|
188
202
|
cuda_graph_token_count == 1
|
|
@@ -508,7 +508,8 @@ class TextGenerationController:
|
|
|
508
508
|
inference_wrapper_config.moe_pad_experts_for_cuda_graph_inference
|
|
509
509
|
)
|
|
510
510
|
if moe_pad_experts_for_cuda_graph_inference:
|
|
511
|
-
|
|
511
|
+
assert warmup_engine_mode is not WarmupEngineMode.NON_DECODE
|
|
512
|
+
if context.is_decode_only():
|
|
512
513
|
capacity_factor = model_config.num_moe_experts / model_config.moe_router_topk
|
|
513
514
|
set_decode_expert_padding(unwrapped_model, True, capacity_factor=capacity_factor)
|
|
514
515
|
else:
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import warnings
|
|
5
|
+
from enum import Enum, auto
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from torch.cuda.memory import CUDAPluggableAllocator
|
|
9
|
+
from torch.utils.cpp_extension import CUDA_HOME, load_inline
|
|
10
|
+
|
|
11
|
+
from megatron.core.utils import is_torch_min_version
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
if is_torch_min_version("2.8.0"):
|
|
15
|
+
from torch.cuda.memory import MemPool
|
|
16
|
+
else:
|
|
17
|
+
from torch.cuda import MemPool
|
|
18
|
+
_has_mem_pool = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
_has_mem_pool = False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CompilationState(Enum):
|
|
24
|
+
"""Enum to distinguish between unified memory (UVM) compilation states."""
|
|
25
|
+
|
|
26
|
+
UNATTEMPTED = auto() # Compilation has not been attempted.
|
|
27
|
+
FAILURE = auto() # Compilation attempted, but failed.
|
|
28
|
+
SUCCESS = auto() # Compilation attempted, and succeeded.
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Compilation vars.
|
|
32
|
+
_compilation_state = CompilationState.UNATTEMPTED
|
|
33
|
+
_alloc = None # must remain global until process exit.
|
|
34
|
+
_mod = None # must remain global until process exit.
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class UnifiedMemoryUnsupportedError(Exception):
|
|
38
|
+
"""Unified memory is not supported on this system."""
|
|
39
|
+
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def compile_allocator():
|
|
44
|
+
"""Attempt to compile UVM allocator."""
|
|
45
|
+
|
|
46
|
+
global _compilation_state, _alloc, _mod
|
|
47
|
+
|
|
48
|
+
if _compilation_state != CompilationState.UNATTEMPTED:
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
_mempool_c_src = r"""
|
|
52
|
+
#include <cuda_runtime_api.h>
|
|
53
|
+
#include <cstddef>
|
|
54
|
+
|
|
55
|
+
#define EXPORT extern "C"
|
|
56
|
+
|
|
57
|
+
EXPORT void* managed_malloc(size_t size, int device, void* stream) {
|
|
58
|
+
(void)stream;
|
|
59
|
+
int cur = -1;
|
|
60
|
+
cudaGetDevice(&cur);
|
|
61
|
+
if (device != cur && device >= 0) cudaSetDevice(device);
|
|
62
|
+
|
|
63
|
+
// cudaMallocManaged allows for more memory to be allocated than the device memory size.
|
|
64
|
+
// The cudaMemAttachGlobal flag makes the memory accessible from both host and device.
|
|
65
|
+
void* ptr = nullptr;
|
|
66
|
+
cudaError_t err = cudaMallocManaged(&ptr, (size_t)size, cudaMemAttachGlobal);
|
|
67
|
+
if (err != cudaSuccess) return nullptr;
|
|
68
|
+
|
|
69
|
+
if (device >= 0) {
|
|
70
|
+
// cudaMemAdviseSetPreferredLocation sets the preferred location for the memory.
|
|
71
|
+
// This is a hint that tries to prevent data from being migrated away from the device.
|
|
72
|
+
cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
|
|
73
|
+
// cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
|
|
74
|
+
// Even if the memory has to be migrated away from the device, it still does not page fault.
|
|
75
|
+
// The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
|
|
76
|
+
// but there is no harm in adding this flag as well for future-proofing.
|
|
77
|
+
cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
|
|
78
|
+
}
|
|
79
|
+
return ptr;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
EXPORT void managed_free(void* ptr, size_t size, int device, void* stream) {
|
|
83
|
+
// Memory allocated with cudaMallocManaged should be released with cudaFree.
|
|
84
|
+
(void)size; (void)device; (void)stream;
|
|
85
|
+
if (ptr) cudaFree(ptr);
|
|
86
|
+
}
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
# Build the .so upon import; this avoids issues.
|
|
90
|
+
if _has_mem_pool:
|
|
91
|
+
_extra_ldflags = ["-lcudart"]
|
|
92
|
+
if CUDA_HOME:
|
|
93
|
+
_cuda_lib = os.path.join(CUDA_HOME, "lib64")
|
|
94
|
+
if os.path.isdir(_cuda_lib):
|
|
95
|
+
_extra_ldflags = [f"-L{_cuda_lib}", "-lcudart"]
|
|
96
|
+
try:
|
|
97
|
+
_mod = load_inline(
|
|
98
|
+
name="managed_alloc_runtime",
|
|
99
|
+
cpp_sources=[_mempool_c_src],
|
|
100
|
+
functions=[],
|
|
101
|
+
with_cuda=True,
|
|
102
|
+
extra_ldflags=_extra_ldflags,
|
|
103
|
+
verbose=False,
|
|
104
|
+
)
|
|
105
|
+
_so_path = Path(_mod.__file__).as_posix()
|
|
106
|
+
_alloc = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free").allocator()
|
|
107
|
+
_compilation_state = CompilationState.SUCCESS
|
|
108
|
+
except (RuntimeError, ImportError, OSError):
|
|
109
|
+
warnings.warn("Failed to create unified memory mempool.")
|
|
110
|
+
_compilation_state = CompilationState.FAILURE
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def create_unified_mempool() -> MemPool:
|
|
114
|
+
"""Create a unified memory mempool using CUDA managed memory.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
(MemPool) Unified memory mempool.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
# Attempt to compile allocator.
|
|
121
|
+
compile_allocator()
|
|
122
|
+
|
|
123
|
+
# Return mempool.
|
|
124
|
+
if _compilation_state != CompilationState.SUCCESS:
|
|
125
|
+
raise UnifiedMemoryUnsupportedError()
|
|
126
|
+
else:
|
|
127
|
+
return MemPool(allocator=_alloc)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev131152
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import warnings
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
from torch.cuda.memory import CUDAPluggableAllocator
|
|
8
|
-
from torch.utils.cpp_extension import CUDA_HOME, load_inline
|
|
9
|
-
|
|
10
|
-
from megatron.core.utils import is_torch_min_version
|
|
11
|
-
|
|
12
|
-
try:
|
|
13
|
-
if is_torch_min_version("2.8.0"):
|
|
14
|
-
from torch.cuda.memory import MemPool
|
|
15
|
-
else:
|
|
16
|
-
from torch.cuda import MemPool
|
|
17
|
-
_has_mem_pool = True
|
|
18
|
-
except ImportError:
|
|
19
|
-
_has_mem_pool = False
|
|
20
|
-
|
|
21
|
-
_mempool_c_src = r"""
|
|
22
|
-
#include <cuda_runtime_api.h>
|
|
23
|
-
#include <cstddef>
|
|
24
|
-
|
|
25
|
-
#define EXPORT extern "C"
|
|
26
|
-
|
|
27
|
-
EXPORT void* managed_malloc(size_t size, int device, void* stream) {
|
|
28
|
-
(void)stream;
|
|
29
|
-
int cur = -1;
|
|
30
|
-
cudaGetDevice(&cur);
|
|
31
|
-
if (device != cur && device >= 0) cudaSetDevice(device);
|
|
32
|
-
|
|
33
|
-
// cudaMallocManaged allows for more memory to be allocated than the device memory size.
|
|
34
|
-
// The cudaMemAttachGlobal flag makes the memory accessible from both host and device.
|
|
35
|
-
void* ptr = nullptr;
|
|
36
|
-
cudaError_t err = cudaMallocManaged(&ptr, (size_t)size, cudaMemAttachGlobal);
|
|
37
|
-
if (err != cudaSuccess) return nullptr;
|
|
38
|
-
|
|
39
|
-
if (device >= 0) {
|
|
40
|
-
// cudaMemAdviseSetPreferredLocation sets the preferred location for the memory.
|
|
41
|
-
// This is a hint that tries to prevent data from being migrated away from the device.
|
|
42
|
-
cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetPreferredLocation, device);
|
|
43
|
-
// cudaMemAdviseSetAccessedBy ensures the memory always lives in the device's page table.
|
|
44
|
-
// Even if the memory has to be migrated away from the device, it still does not page fault.
|
|
45
|
-
// The CUDA docs claim that cudaMemAdviseSetPreferredLocation completely overrides this flag,
|
|
46
|
-
// but there is no harm in adding this flag as well for future-proofing.
|
|
47
|
-
cudaMemAdvise(ptr, (size_t)size, cudaMemAdviseSetAccessedBy, device);
|
|
48
|
-
}
|
|
49
|
-
return ptr;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
EXPORT void managed_free(void* ptr, size_t size, int device, void* stream) {
|
|
53
|
-
// Memory allocated with cudaMallocManaged should be released with cudaFree.
|
|
54
|
-
(void)size; (void)device; (void)stream;
|
|
55
|
-
if (ptr) cudaFree(ptr);
|
|
56
|
-
}
|
|
57
|
-
"""
|
|
58
|
-
|
|
59
|
-
# Avoid linting errors.
|
|
60
|
-
has_unified_memory = False
|
|
61
|
-
_alloc = None
|
|
62
|
-
|
|
63
|
-
# Build the .so upon import; this avoids issues.
|
|
64
|
-
if _has_mem_pool:
|
|
65
|
-
_extra_ldflags = ["-lcudart"]
|
|
66
|
-
if CUDA_HOME:
|
|
67
|
-
_cuda_lib = os.path.join(CUDA_HOME, "lib64")
|
|
68
|
-
if os.path.isdir(_cuda_lib):
|
|
69
|
-
_extra_ldflags = [f"-L{_cuda_lib}", "-lcudart"]
|
|
70
|
-
try:
|
|
71
|
-
_mod = load_inline(
|
|
72
|
-
name="managed_alloc_runtime",
|
|
73
|
-
cpp_sources=[_mempool_c_src],
|
|
74
|
-
functions=[],
|
|
75
|
-
with_cuda=True,
|
|
76
|
-
extra_ldflags=_extra_ldflags,
|
|
77
|
-
verbose=False,
|
|
78
|
-
)
|
|
79
|
-
_so_path = Path(_mod.__file__).as_posix()
|
|
80
|
-
_alloc = CUDAPluggableAllocator(_so_path, "managed_malloc", "managed_free").allocator()
|
|
81
|
-
has_unified_memory = True
|
|
82
|
-
except (RuntimeError, ImportError, OSError):
|
|
83
|
-
warnings.warn("Failed to create unified memory mempool.")
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def create_unified_mempool():
|
|
87
|
-
"""Create a unified memory mempool using CUDA managed memory."""
|
|
88
|
-
assert has_unified_memory
|
|
89
|
-
return MemPool(allocator=_alloc)
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/README.md
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/activations.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev129397 → megatron_core-0.16.0rc0.dev131152}/megatron/core/enums.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|