megatron-core 0.16.0rc0.dev126546__tar.gz → 0.16.0rc0.dev126744__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/PKG-INFO +14 -7
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/megatron_tokenizer.py +9 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/dynamic_context.py +80 -1
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/engines/dynamic_engine.py +72 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/package_info.py +1 -1
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/safe_globals.py +2 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/attention.py +14 -3
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/cuda_graphs.py +5 -1
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/dot_product_attention.py +2 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/router.py +2 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/pipeline_parallel_layer_layout.py +5 -2
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron_core.egg-info/PKG-INFO +14 -7
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron_core.egg-info/requires.txt +13 -6
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/pyproject.toml +13 -6
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/MANIFEST.in +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/README.md +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/README.md +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/activations.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/checkpointable.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/package_info.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/reduce_scatter_with_fp32_accumulation.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/extensions/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fp4_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/full_cuda_graph.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/fusions/fused_weighted_squared_relu.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/hyper_comm_grid.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/attention_context/metadata_base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/attention_context/mha_metadata.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/dynamic_block_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/fused_kv_append_kernel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/data_parallel_inference_coordinator.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/headers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/inference_client.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/endpoints/common.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/endpoints/completions.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/run_mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/text_generation_server.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/text_generation_server/tokenization.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/unified_memory.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/jit.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/model_chunk_schedule_plan.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/nccl_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/bridge_communicator.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/combined_1f1b.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/timers.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/base_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/megatron_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/abstract_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/bytelevel_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/chat_template.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/megatron_hf_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/null_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/sentencepiece_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/libraries/tiktoken_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/bert_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/default_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/gpt_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/mamba_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/retro_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/models/t5_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/text_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/tokenizers/text/utils/build_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/fsdp_dtensor_checkpoint.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/heterogeneous/linear_replacements.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/setup.cfg +0 -0
- {megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev126744
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -41,7 +41,7 @@ Requires-Dist: transformers; extra == "mlm"
|
|
|
41
41
|
Provides-Extra: dev
|
|
42
42
|
Requires-Dist: nvidia-modelopt[torch]; sys_platform != "darwin" and extra == "dev"
|
|
43
43
|
Requires-Dist: transformer-engine[pytorch]<2.10.0,>=2.9.0a0; extra == "dev"
|
|
44
|
-
Requires-Dist: nvidia-resiliency-ext
|
|
44
|
+
Requires-Dist: nvidia-resiliency-ext; extra == "dev"
|
|
45
45
|
Requires-Dist: tqdm; extra == "dev"
|
|
46
46
|
Requires-Dist: einops~=0.8; extra == "dev"
|
|
47
47
|
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "dev"
|
|
@@ -59,13 +59,20 @@ Requires-Dist: wget; extra == "dev"
|
|
|
59
59
|
Requires-Dist: onnxscript; extra == "dev"
|
|
60
60
|
Provides-Extra: lts
|
|
61
61
|
Requires-Dist: tqdm; extra == "lts"
|
|
62
|
-
Requires-Dist: einops; extra == "lts"
|
|
63
|
-
Requires-Dist: tensorstore!=0.1.46,!=0.1.72; extra == "lts"
|
|
64
|
-
Requires-Dist: nvtx; extra == "lts"
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: einops~=0.8; extra == "lts"
|
|
63
|
+
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "lts"
|
|
64
|
+
Requires-Dist: nvtx~=0.2; extra == "lts"
|
|
65
|
+
Requires-Dist: multi-storage-client~=0.27; extra == "lts"
|
|
66
|
+
Requires-Dist: opentelemetry-api~=1.33.1; extra == "lts"
|
|
67
67
|
Requires-Dist: setuptools<80.0.0; extra == "lts"
|
|
68
|
+
Requires-Dist: mamba-ssm~=2.2; extra == "lts"
|
|
69
|
+
Requires-Dist: causal-conv1d~=1.5; extra == "lts"
|
|
70
|
+
Requires-Dist: nv-grouped-gemm~=1.1; extra == "lts"
|
|
71
|
+
Requires-Dist: megatron-energon[av_decode]~=6.0; extra == "lts"
|
|
72
|
+
Requires-Dist: av<16.0.0; extra == "lts"
|
|
73
|
+
Requires-Dist: flashinfer-python; extra == "lts"
|
|
68
74
|
Requires-Dist: wget; extra == "lts"
|
|
75
|
+
Requires-Dist: onnxscript; extra == "lts"
|
|
69
76
|
|
|
70
77
|
<div align="center">
|
|
71
78
|
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
|
2
2
|
import json
|
|
3
|
+
import logging
|
|
3
4
|
from abc import ABC, abstractmethod
|
|
4
5
|
from collections import OrderedDict
|
|
5
6
|
from typing import Any
|
|
6
7
|
|
|
7
8
|
import numpy
|
|
8
9
|
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
9
12
|
|
|
10
13
|
class MegatronLegacyTokenizer(ABC):
|
|
11
14
|
"""Abstract class for tokenizer
|
|
@@ -20,6 +23,12 @@ class MegatronLegacyTokenizer(ABC):
|
|
|
20
23
|
"""
|
|
21
24
|
|
|
22
25
|
def __init__(self, *tokenizer_paths: str, **tokenizer_options: Any):
|
|
26
|
+
# Deprecation warning
|
|
27
|
+
logger.warning(
|
|
28
|
+
"You’re using the legacy tokenizer system, which is deprecated "
|
|
29
|
+
"and will be removed in a future release. Please migrate to the new tokenizer system "
|
|
30
|
+
"(`megatron.core.tokenizers.MegatronTokenizer`)."
|
|
31
|
+
)
|
|
23
32
|
self.unique_identifiers = OrderedDict()
|
|
24
33
|
self.unique_identifiers["class"] = type(self).__name__
|
|
25
34
|
self.unique_identifiers["tokenizer_path"] = list(tokenizer_paths)
|
|
@@ -4,7 +4,7 @@ import math
|
|
|
4
4
|
import warnings
|
|
5
5
|
from contextlib import nullcontext
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import List, Optional, Tuple
|
|
7
|
+
from typing import TYPE_CHECKING, List, Optional, Tuple
|
|
8
8
|
|
|
9
9
|
import torch
|
|
10
10
|
import torch.nn.functional as F
|
|
@@ -49,6 +49,17 @@ try:
|
|
|
49
49
|
except ImportError:
|
|
50
50
|
HAVE_FLASHINFER = False
|
|
51
51
|
|
|
52
|
+
try:
|
|
53
|
+
import wandb # pylint: disable=unused-import
|
|
54
|
+
|
|
55
|
+
HAVE_WANDB = True
|
|
56
|
+
except ImportError:
|
|
57
|
+
HAVE_WANDB = False
|
|
58
|
+
wandb = None
|
|
59
|
+
|
|
60
|
+
if TYPE_CHECKING:
|
|
61
|
+
import wandb as WandbModule
|
|
62
|
+
|
|
52
63
|
|
|
53
64
|
class ContextOverflowError(Exception):
|
|
54
65
|
"""Base exception for when a new request does not fit.
|
|
@@ -226,6 +237,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
226
237
|
levels will be included to control other tensors within the context.
|
|
227
238
|
use_flashinfer_fused_rope (bool): If True, use flashinfer's fused rope implementation.
|
|
228
239
|
If None, defaults to using flash-infer if available.
|
|
240
|
+
metrics_writer (Optional['WandbModule']): Wandb module for writing metrics.
|
|
229
241
|
"""
|
|
230
242
|
|
|
231
243
|
def __init__(
|
|
@@ -251,6 +263,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
251
263
|
use_cuda_graphs_for_non_decode_steps: bool = True,
|
|
252
264
|
use_flashinfer_fused_rope: bool = False,
|
|
253
265
|
unified_memory_level: Optional[int] = 0,
|
|
266
|
+
metrics_writer: Optional['WandbModule'] = None,
|
|
254
267
|
):
|
|
255
268
|
super().__init__(materialize_only_last_token_logits=materialize_only_last_token_logits)
|
|
256
269
|
|
|
@@ -260,6 +273,8 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
260
273
|
block_size_tokens == 64
|
|
261
274
|
), "Flash MLA requires a block size of 64. Set --inference-dynamic-batching-block-size 64 to fix this assert"
|
|
262
275
|
|
|
276
|
+
self.metrics_writer = metrics_writer
|
|
277
|
+
|
|
263
278
|
# Per partition num heads and hidden size.
|
|
264
279
|
projection_size = kv_channels * num_attention_heads
|
|
265
280
|
if tensor_model_parallel_size is None:
|
|
@@ -1569,3 +1584,67 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
1569
1584
|
|
|
1570
1585
|
# Convert each log prob tensor into a list
|
|
1571
1586
|
return [lp.tolist() for lp in selected_log_probs_list]
|
|
1587
|
+
|
|
1588
|
+
def get_kvcache_utilization_stats(self) -> dict:
|
|
1589
|
+
"""Compute KV cache buffer utilization stats for the current step.
|
|
1590
|
+
|
|
1591
|
+
Returns a dictionary with counts and percentages for both allocated block
|
|
1592
|
+
usage (overall buffer occupancy) and active usage (blocks referenced by
|
|
1593
|
+
currently active requests this step).
|
|
1594
|
+
|
|
1595
|
+
Return:
|
|
1596
|
+
{
|
|
1597
|
+
'total_blocks': int,
|
|
1598
|
+
'allocated_blocks': int,
|
|
1599
|
+
'active_unique_blocks': int,
|
|
1600
|
+
'allocated_utilization': float,
|
|
1601
|
+
'active_utilization': float,
|
|
1602
|
+
'active_request_count': int,
|
|
1603
|
+
'paused_request_count': int,
|
|
1604
|
+
'gtd_block_count': int,
|
|
1605
|
+
}
|
|
1606
|
+
"""
|
|
1607
|
+
# Total usable blocks exclude the reserved dummy block.
|
|
1608
|
+
total_blocks = max(self.block_allocator.block_count_total - 1, 1)
|
|
1609
|
+
block_count_avail = int(self.block_allocator.block_count_avail)
|
|
1610
|
+
|
|
1611
|
+
# Overall allocated blocks in the buffer right now.
|
|
1612
|
+
allocated_blocks = (self.block_allocator.block_count_total - 1) - block_count_avail
|
|
1613
|
+
allocated_blocks = int(max(0, allocated_blocks))
|
|
1614
|
+
|
|
1615
|
+
# Active unique blocks referenced by current active requests only.
|
|
1616
|
+
active_start = self.paused_request_count
|
|
1617
|
+
active_end = self.total_request_count
|
|
1618
|
+
if active_end > active_start:
|
|
1619
|
+
active_rows = self.request_to_kv_block_ids[active_start:active_end]
|
|
1620
|
+
# Filter valid block ids (>= 0) and count unique ids.
|
|
1621
|
+
valid_ids = active_rows[active_rows >= 0]
|
|
1622
|
+
if valid_ids.numel() > 0:
|
|
1623
|
+
unique_ids = torch.unique(valid_ids)
|
|
1624
|
+
active_unique_blocks = int(unique_ids.numel())
|
|
1625
|
+
else:
|
|
1626
|
+
active_unique_blocks = 0
|
|
1627
|
+
else:
|
|
1628
|
+
active_unique_blocks = 0
|
|
1629
|
+
|
|
1630
|
+
allocated_utilization = float(allocated_blocks) / float(total_blocks)
|
|
1631
|
+
active_utilization = float(active_unique_blocks) / float(total_blocks)
|
|
1632
|
+
|
|
1633
|
+
# Diagnostic helpers
|
|
1634
|
+
num_non_gtd_blocks = max(0, block_count_avail - int(self.gtd_block_count))
|
|
1635
|
+
total_request_count = int(self.total_request_count)
|
|
1636
|
+
return {
|
|
1637
|
+
'total_blocks': int(total_blocks),
|
|
1638
|
+
'allocated_blocks': int(allocated_blocks),
|
|
1639
|
+
'active_unique_blocks': int(active_unique_blocks),
|
|
1640
|
+
'allocated_utilization': allocated_utilization,
|
|
1641
|
+
'active_utilization': active_utilization,
|
|
1642
|
+
'active_request_count': int(self.get_active_request_count()),
|
|
1643
|
+
'paused_request_count': int(self.paused_request_count),
|
|
1644
|
+
'gtd_block_count': int(self.gtd_block_count),
|
|
1645
|
+
'block_count_avail': int(block_count_avail),
|
|
1646
|
+
'num_non_gtd_blocks': int(num_non_gtd_blocks),
|
|
1647
|
+
'active_token_count': int(self.active_token_count),
|
|
1648
|
+
'total_request_count': int(total_request_count),
|
|
1649
|
+
'max_requests': int(self.max_requests),
|
|
1650
|
+
}
|
|
@@ -57,6 +57,14 @@ try:
|
|
|
57
57
|
except:
|
|
58
58
|
HAVE_MSGPACK = False
|
|
59
59
|
|
|
60
|
+
try:
|
|
61
|
+
import wandb
|
|
62
|
+
|
|
63
|
+
HAVE_WANDB = True
|
|
64
|
+
except ImportError:
|
|
65
|
+
HAVE_WANDB = False
|
|
66
|
+
wandb = None
|
|
67
|
+
|
|
60
68
|
|
|
61
69
|
def format_mem_bytes(mem_bytes):
|
|
62
70
|
"""Convert a byte count to a human-readable string in tb, gb, mb, kb, or bytes."""
|
|
@@ -89,6 +97,8 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
89
97
|
static_sampling (bool): If True, all requests are assumed to have the same
|
|
90
98
|
sampling parameters. This avoids needing to loop through all requests and
|
|
91
99
|
their sampling parameters every generation step, improving latency.
|
|
100
|
+
inference_logging_step_interval (int): The step interval at which to log
|
|
101
|
+
inference metrics to wandb. Defaults to 0, which means no logging.
|
|
92
102
|
"""
|
|
93
103
|
|
|
94
104
|
def __init__(
|
|
@@ -101,6 +111,7 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
101
111
|
track_paused_request_events: bool = False,
|
|
102
112
|
enable_chunked_prefill: bool = True,
|
|
103
113
|
static_sampling: bool = False,
|
|
114
|
+
inference_logging_step_interval: int = 0,
|
|
104
115
|
):
|
|
105
116
|
|
|
106
117
|
if enable_cuda_graph is not None:
|
|
@@ -137,6 +148,32 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
137
148
|
self.enable_chunked_prefill = enable_chunked_prefill
|
|
138
149
|
self.static_sampling = static_sampling
|
|
139
150
|
|
|
151
|
+
self.inference_logging_step_interval = inference_logging_step_interval
|
|
152
|
+
# Configure wandb to use separate step counter for inference metrics (only once)
|
|
153
|
+
if self.inference_logging_step_interval > 0 and self.context.metrics_writer is not None:
|
|
154
|
+
logging.info(
|
|
155
|
+
f"\033[1;93m[INFERENCE]\033[0m "
|
|
156
|
+
f"\033[1;95mLogging inference metrics to wandb (rank {torch.distributed.get_rank()})\033[0m"
|
|
157
|
+
)
|
|
158
|
+
if HAVE_WANDB and self.context.metrics_writer.__name__ == "wandb":
|
|
159
|
+
# Make all inference/* metrics use inference_step as their x-axis
|
|
160
|
+
# This allows inference and training to have independent step counters
|
|
161
|
+
context.metrics_writer.define_metric(
|
|
162
|
+
"inference/*", step_metric="inference/inference_step"
|
|
163
|
+
)
|
|
164
|
+
# Initialize inference step offset by querying existing run history
|
|
165
|
+
self.inference_step_offset = 0
|
|
166
|
+
if wandb.run is not None:
|
|
167
|
+
api_run = wandb.Api().run(
|
|
168
|
+
f"{wandb.run.entity}/{wandb.run.project}/{wandb.run.id}"
|
|
169
|
+
)
|
|
170
|
+
max_step = 0
|
|
171
|
+
for row in api_run.scan_history(keys=["inference/inference_step"]):
|
|
172
|
+
val = row.get("inference/inference_step")
|
|
173
|
+
if isinstance(val, (int, float)) and int(val) > max_step:
|
|
174
|
+
max_step = int(val)
|
|
175
|
+
self.inference_step_offset = int(max_step)
|
|
176
|
+
|
|
140
177
|
# Initialize the asyncio loop if it has not already been initialized.
|
|
141
178
|
# TODO: Start the engine loop here.
|
|
142
179
|
self._loop = get_asyncio_loop()
|
|
@@ -780,6 +817,41 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
780
817
|
self.request_completion_futures[failed_request_id].set_result(failed_request)
|
|
781
818
|
self.failed_request_ids.clear()
|
|
782
819
|
|
|
820
|
+
# Log KV cache utilization stats to W&B
|
|
821
|
+
if (
|
|
822
|
+
self.inference_logging_step_interval > 0
|
|
823
|
+
and self.step_count > 0
|
|
824
|
+
and self.step_count % self.inference_logging_step_interval == 0
|
|
825
|
+
and self.context.metrics_writer is not None
|
|
826
|
+
):
|
|
827
|
+
|
|
828
|
+
# Get KV cache utilization stats from dynamic context
|
|
829
|
+
kv_stats = self.context.get_kvcache_utilization_stats()
|
|
830
|
+
|
|
831
|
+
# Prepare metrics dictionary with all stats
|
|
832
|
+
# Use 'inference/' prefix for all metrics to separate from training metrics
|
|
833
|
+
metrics = {
|
|
834
|
+
'inference/inference_step': int(self.inference_step_offset + int(self.step_count)),
|
|
835
|
+
'inference/step_time_s': float(step_time),
|
|
836
|
+
'inference/waiting_queue_len': int(len(self.waiting_request_ids)),
|
|
837
|
+
'inference/total_requests_dict_size': int(len(self.requests)),
|
|
838
|
+
}
|
|
839
|
+
# Add KV stats with inference/ prefix
|
|
840
|
+
# Convert utilization metrics from 0-1 range to 0-100 percentage range for better visualization
|
|
841
|
+
for key, value in kv_stats.items():
|
|
842
|
+
if 'utilization' in key:
|
|
843
|
+
# Convert to percentage (0-100) and group under kvcache_utilization
|
|
844
|
+
metrics[f'inference/{key}'] = float(value * 100.0)
|
|
845
|
+
else:
|
|
846
|
+
metrics[f'inference/{key}'] = value
|
|
847
|
+
|
|
848
|
+
if HAVE_WANDB and self.context.metrics_writer.__name__ == "wandb":
|
|
849
|
+
self.context.metrics_writer.log(metrics, commit=True)
|
|
850
|
+
else:
|
|
851
|
+
raise ValueError(
|
|
852
|
+
f"Unsupported metrics writer type: {type(self.context.metrics_writer)}"
|
|
853
|
+
)
|
|
854
|
+
|
|
783
855
|
# Print context state.
|
|
784
856
|
if verbose:
|
|
785
857
|
context = self.context
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
2
|
|
|
3
3
|
from argparse import Namespace
|
|
4
|
+
from io import BytesIO
|
|
4
5
|
from pathlib import PosixPath
|
|
5
6
|
from types import SimpleNamespace
|
|
6
7
|
|
|
@@ -26,6 +27,7 @@ SAFE_GLOBALS = [
|
|
|
26
27
|
RerunDiagnostic,
|
|
27
28
|
RerunMode,
|
|
28
29
|
RerunState,
|
|
30
|
+
BytesIO,
|
|
29
31
|
]
|
|
30
32
|
|
|
31
33
|
|
|
@@ -48,15 +48,26 @@ except ImportError:
|
|
|
48
48
|
rearrange = None
|
|
49
49
|
|
|
50
50
|
try:
|
|
51
|
-
from
|
|
52
|
-
from
|
|
51
|
+
from flash_attn_3.flash_attn_interface import _flash_attn_forward
|
|
52
|
+
from flash_attn_3.flash_attn_interface import (
|
|
53
53
|
flash_attn_with_kvcache as flash_attn3_with_kvcache,
|
|
54
54
|
)
|
|
55
55
|
|
|
56
56
|
HAVE_FA3 = True
|
|
57
|
-
except:
|
|
57
|
+
except ImportError as e:
|
|
58
58
|
HAVE_FA3 = False
|
|
59
59
|
|
|
60
|
+
if not HAVE_FA3:
|
|
61
|
+
try:
|
|
62
|
+
from flashattn_hopper.flash_attn_interface import _flash_attn_forward
|
|
63
|
+
from flashattn_hopper.flash_attn_interface import (
|
|
64
|
+
flash_attn_with_kvcache as flash_attn3_with_kvcache,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
HAVE_FA3 = True
|
|
68
|
+
except ImportError as e:
|
|
69
|
+
pass
|
|
70
|
+
|
|
60
71
|
try:
|
|
61
72
|
from flash_mla import flash_mla_with_kvcache, get_mla_metadata
|
|
62
73
|
|
|
@@ -1182,7 +1182,11 @@ class CudaGraphManager(torch.nn.Module):
|
|
|
1182
1182
|
|
|
1183
1183
|
if runner is None:
|
|
1184
1184
|
if _CudagraphGlobalRecord.cudagraph_created:
|
|
1185
|
-
assert False
|
|
1185
|
+
assert False, (
|
|
1186
|
+
f"`cudagraph_created` is set to True but no matching cudagraph "
|
|
1187
|
+
f"runners were found. This module has {len(self.cudagraph_runners)} "
|
|
1188
|
+
f"existing runners. Use `get_mismatch_errors` to debug mismatches."
|
|
1189
|
+
)
|
|
1186
1190
|
else:
|
|
1187
1191
|
runner = _CudaGraphRunner(
|
|
1188
1192
|
megatron_module,
|
|
@@ -66,6 +66,8 @@ class Router(ABC, MegatronModule):
|
|
|
66
66
|
"""Reset the router parameters."""
|
|
67
67
|
if self.config.perform_initialization:
|
|
68
68
|
self.config.init_method(self.weight)
|
|
69
|
+
if self.bias is not None:
|
|
70
|
+
self.config.init_method(self.bias)
|
|
69
71
|
self.weight.data = self.weight.data.to(dtype=self.config.params_dtype)
|
|
70
72
|
setattr(self.weight, 'sequence_parallel', self.config.sequence_parallel)
|
|
71
73
|
if self.bias is not None:
|
|
@@ -15,8 +15,11 @@ logger = logging.getLogger(__name__)
|
|
|
15
15
|
class PipelineParallelLayerLayout:
|
|
16
16
|
"""Configuration of custom pipeline parallel layer partitioning."""
|
|
17
17
|
|
|
18
|
-
def __repr__(self):
|
|
19
|
-
|
|
18
|
+
def __repr__(self) -> str:
|
|
19
|
+
if isinstance(self.input_data, str):
|
|
20
|
+
return self.input_data
|
|
21
|
+
else:
|
|
22
|
+
return str(self.input_data)
|
|
20
23
|
|
|
21
24
|
def __init__(self, layout: str | list, pipeline_model_parallel_size: int):
|
|
22
25
|
"""Initialize PipelineParallelLayerLayout from a list or a str.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev126744
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -41,7 +41,7 @@ Requires-Dist: transformers; extra == "mlm"
|
|
|
41
41
|
Provides-Extra: dev
|
|
42
42
|
Requires-Dist: nvidia-modelopt[torch]; sys_platform != "darwin" and extra == "dev"
|
|
43
43
|
Requires-Dist: transformer-engine[pytorch]<2.10.0,>=2.9.0a0; extra == "dev"
|
|
44
|
-
Requires-Dist: nvidia-resiliency-ext
|
|
44
|
+
Requires-Dist: nvidia-resiliency-ext; extra == "dev"
|
|
45
45
|
Requires-Dist: tqdm; extra == "dev"
|
|
46
46
|
Requires-Dist: einops~=0.8; extra == "dev"
|
|
47
47
|
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "dev"
|
|
@@ -59,13 +59,20 @@ Requires-Dist: wget; extra == "dev"
|
|
|
59
59
|
Requires-Dist: onnxscript; extra == "dev"
|
|
60
60
|
Provides-Extra: lts
|
|
61
61
|
Requires-Dist: tqdm; extra == "lts"
|
|
62
|
-
Requires-Dist: einops; extra == "lts"
|
|
63
|
-
Requires-Dist: tensorstore!=0.1.46,!=0.1.72; extra == "lts"
|
|
64
|
-
Requires-Dist: nvtx; extra == "lts"
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: einops~=0.8; extra == "lts"
|
|
63
|
+
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "lts"
|
|
64
|
+
Requires-Dist: nvtx~=0.2; extra == "lts"
|
|
65
|
+
Requires-Dist: multi-storage-client~=0.27; extra == "lts"
|
|
66
|
+
Requires-Dist: opentelemetry-api~=1.33.1; extra == "lts"
|
|
67
67
|
Requires-Dist: setuptools<80.0.0; extra == "lts"
|
|
68
|
+
Requires-Dist: mamba-ssm~=2.2; extra == "lts"
|
|
69
|
+
Requires-Dist: causal-conv1d~=1.5; extra == "lts"
|
|
70
|
+
Requires-Dist: nv-grouped-gemm~=1.1; extra == "lts"
|
|
71
|
+
Requires-Dist: megatron-energon[av_decode]~=6.0; extra == "lts"
|
|
72
|
+
Requires-Dist: av<16.0.0; extra == "lts"
|
|
73
|
+
Requires-Dist: flashinfer-python; extra == "lts"
|
|
68
74
|
Requires-Dist: wget; extra == "lts"
|
|
75
|
+
Requires-Dist: onnxscript; extra == "lts"
|
|
69
76
|
|
|
70
77
|
<div align="center">
|
|
71
78
|
|
|
@@ -4,7 +4,7 @@ packaging>=24.2
|
|
|
4
4
|
|
|
5
5
|
[dev]
|
|
6
6
|
transformer-engine[pytorch]<2.10.0,>=2.9.0a0
|
|
7
|
-
nvidia-resiliency-ext
|
|
7
|
+
nvidia-resiliency-ext
|
|
8
8
|
tqdm
|
|
9
9
|
einops~=0.8
|
|
10
10
|
tensorstore!=0.1.46,!=0.1.72,~=0.1
|
|
@@ -26,13 +26,20 @@ nvidia-modelopt[torch]
|
|
|
26
26
|
|
|
27
27
|
[lts]
|
|
28
28
|
tqdm
|
|
29
|
-
einops
|
|
30
|
-
tensorstore!=0.1.46,!=0.1.72
|
|
31
|
-
nvtx
|
|
32
|
-
|
|
33
|
-
|
|
29
|
+
einops~=0.8
|
|
30
|
+
tensorstore!=0.1.46,!=0.1.72,~=0.1
|
|
31
|
+
nvtx~=0.2
|
|
32
|
+
multi-storage-client~=0.27
|
|
33
|
+
opentelemetry-api~=1.33.1
|
|
34
34
|
setuptools<80.0.0
|
|
35
|
+
mamba-ssm~=2.2
|
|
36
|
+
causal-conv1d~=1.5
|
|
37
|
+
nv-grouped-gemm~=1.1
|
|
38
|
+
megatron-energon[av_decode]~=6.0
|
|
39
|
+
av<16.0.0
|
|
40
|
+
flashinfer-python
|
|
35
41
|
wget
|
|
42
|
+
onnxscript
|
|
36
43
|
|
|
37
44
|
[mlm]
|
|
38
45
|
flask-restful
|
|
@@ -69,7 +69,7 @@ mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb", "transformers"]
|
|
|
69
69
|
dev = [
|
|
70
70
|
"nvidia-modelopt[torch]; sys_platform != 'darwin'",
|
|
71
71
|
"transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
|
|
72
|
-
"nvidia-resiliency-ext
|
|
72
|
+
"nvidia-resiliency-ext",
|
|
73
73
|
"tqdm",
|
|
74
74
|
"einops~=0.8",
|
|
75
75
|
"tensorstore~=0.1,!=0.1.46,!=0.1.72",
|
|
@@ -89,13 +89,20 @@ dev = [
|
|
|
89
89
|
|
|
90
90
|
lts = [
|
|
91
91
|
"tqdm",
|
|
92
|
-
"einops",
|
|
93
|
-
"tensorstore
|
|
94
|
-
"nvtx",
|
|
95
|
-
"
|
|
96
|
-
"
|
|
92
|
+
"einops~=0.8",
|
|
93
|
+
"tensorstore~=0.1,!=0.1.46,!=0.1.72",
|
|
94
|
+
"nvtx~=0.2",
|
|
95
|
+
"multi-storage-client~=0.27",
|
|
96
|
+
"opentelemetry-api~=1.33.1",
|
|
97
97
|
"setuptools<80.0.0",
|
|
98
|
+
"mamba-ssm~=2.2",
|
|
99
|
+
"causal-conv1d~=1.5",
|
|
100
|
+
"nv-grouped-gemm~=1.1",
|
|
101
|
+
"megatron-energon[av_decode]~=6.0",
|
|
102
|
+
"av<16.0.0", # At the time, av 16.0.0 is not compatible with Python 3.12
|
|
103
|
+
"flashinfer-python",
|
|
98
104
|
"wget",
|
|
105
|
+
"onnxscript",
|
|
99
106
|
]
|
|
100
107
|
|
|
101
108
|
[dependency-groups]
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/README.md
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/activations.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev126546 → megatron_core-0.16.0rc0.dev126744}/megatron/core/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|