megatron-core 0.14.0rc0__tar.gz → 0.14.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.14.0rc0/megatron_core.egg-info → megatron_core-0.14.0rc1}/PKG-INFO +1 -1
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/indexed_dataset.py +5 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/distributed_data_parallel_config.py +9 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/extensions/transformer_engine.py +10 -9
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/engines/dynamic_engine.py +61 -32
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/inference_request.py +1 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +55 -2
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/package_info.py +1 -1
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/layers.py +9 -7
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/attention.py +2 -1
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/cuda_graphs.py +5 -1
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/utils.py +3 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1/megatron_core.egg-info}/PKG-INFO +1 -1
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/LICENSE +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/MANIFEST.in +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/README.md +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/README.md +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/megatron_tokenizer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/utils_object_storage.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/custom_fsdp/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/enums.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/hyper_comm_grid.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/contexts/dynamic_chunk_allocator.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/contexts/dynamic_context.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/jit.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/timers.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/heterogeneous/linear_replacements.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/pipeline_parallel_layer_layout.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron_core.egg-info/requires.txt +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/pyproject.toml +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/setup.cfg +0 -0
- {megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.0rc1
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/indexed_dataset.py
RENAMED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
# Essentially re-written in entirety
|
|
7
7
|
|
|
8
|
+
import gc
|
|
8
9
|
import logging
|
|
9
10
|
import os
|
|
10
11
|
import shutil
|
|
@@ -906,6 +907,10 @@ class IndexedDatasetBuilder(object):
|
|
|
906
907
|
assert index.sequence_modes is not None, "sequence_modes cannot not be None"
|
|
907
908
|
self.sequence_modes.extend(index.sequence_modes)
|
|
908
909
|
|
|
910
|
+
# Free up memory to make space for new indices
|
|
911
|
+
del index
|
|
912
|
+
gc.collect()
|
|
913
|
+
|
|
909
914
|
# Concatenate data
|
|
910
915
|
with self._open(get_bin_path(path_prefix), "rb") as f:
|
|
911
916
|
shutil.copyfileobj(f, self.data_file)
|
|
@@ -113,6 +113,15 @@ class DistributedDataParallelConfig:
|
|
|
113
113
|
"""
|
|
114
114
|
|
|
115
115
|
def __post_init__(self):
|
|
116
|
+
import os
|
|
117
|
+
|
|
116
118
|
"""Check the validity of the config."""
|
|
117
119
|
if self.reuse_grad_buf_for_mxfp8_param_ag:
|
|
118
120
|
assert self.fp8_param_gather, "Reuse grad buffer only when keeping params in MXFP8."
|
|
121
|
+
|
|
122
|
+
if self.nccl_ub:
|
|
123
|
+
if 'expandable_segments:True' in os.getenv('PYTORCH_CUDA_ALLOC_CONF', '').split(','):
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True is currently not supported "
|
|
126
|
+
"with nccl_ub due to compatibility issue with torch.cuda.MemPool API."
|
|
127
|
+
)
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/extensions/transformer_engine.py
RENAMED
|
@@ -39,6 +39,8 @@ from megatron.core.transformer.enums import AttnMaskType
|
|
|
39
39
|
from megatron.core.transformer.transformer_config import TransformerConfig
|
|
40
40
|
from megatron.core.transformer.utils import make_sharded_tensors_for_checkpoint
|
|
41
41
|
from megatron.core.utils import (
|
|
42
|
+
get_pg_rank,
|
|
43
|
+
get_pg_size,
|
|
42
44
|
get_te_version,
|
|
43
45
|
get_tensor_model_parallel_group_if_none,
|
|
44
46
|
is_te_min_version,
|
|
@@ -228,8 +230,7 @@ class TELinear(te.pytorch.Linear):
|
|
|
228
230
|
assert tp_group is None, "duplicated linear should not have tp_group set"
|
|
229
231
|
tp_size = 1
|
|
230
232
|
else:
|
|
231
|
-
|
|
232
|
-
tp_size = tp_group.size()
|
|
233
|
+
tp_size = get_pg_size(tp_group)
|
|
233
234
|
|
|
234
235
|
self.expert_parallel = self.config.expert_model_parallel_size > 1
|
|
235
236
|
if is_expert:
|
|
@@ -374,8 +375,8 @@ class TELayerNormColumnParallelLinear(te.pytorch.LayerNormLinear):
|
|
|
374
375
|
self.is_first_microbatch = True
|
|
375
376
|
self.disable_parameter_transpose_cache = self.config.disable_parameter_transpose_cache
|
|
376
377
|
extra_kwargs = _get_extra_te_kwargs(config)
|
|
377
|
-
self.tp_size = tp_group
|
|
378
|
-
self.tp_rank = tp_group
|
|
378
|
+
self.tp_size = get_pg_size(tp_group)
|
|
379
|
+
self.tp_rank = get_pg_rank(tp_group)
|
|
379
380
|
|
|
380
381
|
if self.config.delay_wgrad_compute:
|
|
381
382
|
if is_te_min_version("2.3.0"):
|
|
@@ -542,8 +543,8 @@ class TEColumnParallelLinear(TELinear):
|
|
|
542
543
|
if gather_output:
|
|
543
544
|
raise ValueError("Transformer Engine linear layers do not support gather_output = True")
|
|
544
545
|
tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
|
|
545
|
-
world_size = tp_group
|
|
546
|
-
rank = tp_group
|
|
546
|
+
world_size = get_pg_size(tp_group)
|
|
547
|
+
rank = get_pg_rank(tp_group)
|
|
547
548
|
|
|
548
549
|
super().__init__(
|
|
549
550
|
input_size=input_size,
|
|
@@ -657,8 +658,8 @@ class TERowParallelLinear(TELinear):
|
|
|
657
658
|
tp_group=tp_group,
|
|
658
659
|
)
|
|
659
660
|
if config.use_cpu_initialization:
|
|
660
|
-
world_size = tp_group
|
|
661
|
-
rank = tp_group
|
|
661
|
+
world_size = get_pg_size(tp_group)
|
|
662
|
+
rank = get_pg_rank(tp_group)
|
|
662
663
|
input_size_per_partition = divide(input_size, world_size)
|
|
663
664
|
self.master_weight = _initialize_affine_weight_cpu(
|
|
664
665
|
self.weight,
|
|
@@ -1003,7 +1004,7 @@ if HAVE_TE and is_te_min_version("1.9.0.dev0"):
|
|
|
1003
1004
|
# The comms between TP and EP group is explicitly handled by MoE token dispatcher.
|
|
1004
1005
|
# So we disable comms by making TE agnostic of model parallel.
|
|
1005
1006
|
tp_group = get_tensor_model_parallel_group_if_none(tp_group, is_expert=is_expert)
|
|
1006
|
-
tp_size = tp_group
|
|
1007
|
+
tp_size = get_pg_size(tp_group)
|
|
1007
1008
|
|
|
1008
1009
|
self.explicit_expert_comm = is_expert and (tp_size > 1 or self.expert_parallel)
|
|
1009
1010
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
2
2
|
|
|
3
3
|
import asyncio
|
|
4
|
-
import time
|
|
5
4
|
from collections import deque
|
|
6
5
|
from typing import Dict, List, Optional, Tuple, Union
|
|
7
6
|
|
|
@@ -70,6 +69,8 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
70
69
|
self.request_counter = Counter()
|
|
71
70
|
self.requests: Dict[int, DynamicInferenceRequest] = {}
|
|
72
71
|
self.request_completion_futures: Dict[int, asyncio.Future] = {}
|
|
72
|
+
self.step_start_event = torch.cuda.Event(enable_timing=True)
|
|
73
|
+
self.step_end_event = torch.cuda.Event(enable_timing=True)
|
|
73
74
|
|
|
74
75
|
# Initialize the asyncio loop if it has not already been initialized.
|
|
75
76
|
# TODO: Start the engine loop here.
|
|
@@ -176,19 +177,25 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
176
177
|
return self.request_completion_futures[request_id]
|
|
177
178
|
|
|
178
179
|
def post_process_requests(
|
|
179
|
-
self,
|
|
180
|
-
|
|
180
|
+
self,
|
|
181
|
+
request_ids: torch.Tensor,
|
|
182
|
+
finished_request_ids: torch.Tensor,
|
|
183
|
+
step_time: float,
|
|
184
|
+
sample: torch.Tensor,
|
|
185
|
+
) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest]]:
|
|
181
186
|
"""
|
|
182
187
|
Handles post-processing for requests after a step.
|
|
183
188
|
|
|
184
189
|
Args:
|
|
185
190
|
request_ids (torch.Tensor): A list of request_ids
|
|
186
191
|
finished_request_ids (torch.Tensor): A list of finished request ids
|
|
192
|
+
step_time (float): The latency of the last step
|
|
187
193
|
sample: (torch.Tensor): The newly generated tokens for each request
|
|
188
194
|
|
|
189
195
|
Returns:
|
|
190
|
-
A list of completed requests as `DynamicInferenceRequest` objects
|
|
196
|
+
A list of active requests and completed requests as `DynamicInferenceRequest` objects
|
|
191
197
|
"""
|
|
198
|
+
active_requests: List[DynamicInferenceRequest] = []
|
|
192
199
|
finished_requests: List[DynamicInferenceRequest] = []
|
|
193
200
|
finished_request_ids = set(finished_request_ids.tolist())
|
|
194
201
|
self.finished_request_count += len(finished_request_ids)
|
|
@@ -196,6 +203,9 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
196
203
|
for request_id, token in zip(request_ids.tolist(), sample.tolist()):
|
|
197
204
|
request: DynamicInferenceRequest = self.requests[request_id]
|
|
198
205
|
request.generated_tokens.append(token)
|
|
206
|
+
if request.tpot is None:
|
|
207
|
+
request.tpot = []
|
|
208
|
+
request.tpot.append(step_time)
|
|
199
209
|
|
|
200
210
|
if request_id in finished_request_ids:
|
|
201
211
|
request.generated_length = len(request.generated_tokens)
|
|
@@ -207,50 +217,67 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
207
217
|
finished_request.generated_tokens
|
|
208
218
|
)
|
|
209
219
|
self.request_completion_futures[request_id].set_result(finished_request)
|
|
210
|
-
|
|
211
|
-
|
|
220
|
+
else:
|
|
221
|
+
active_requests.append(request)
|
|
222
|
+
|
|
223
|
+
return active_requests, finished_requests
|
|
224
|
+
|
|
225
|
+
def schedule_waiting_requests(self):
|
|
226
|
+
"""Tries to schedule any requests in the waiting pool."""
|
|
227
|
+
for waiting_request_id in self.waiting_request_ids.copy():
|
|
228
|
+
waiting_request: DynamicInferenceRequest = self.requests[waiting_request_id]
|
|
229
|
+
try:
|
|
230
|
+
self.context.add_request(
|
|
231
|
+
waiting_request_id,
|
|
232
|
+
waiting_request.prompt_tokens,
|
|
233
|
+
waiting_request.sampling_params.num_tokens_to_generate,
|
|
234
|
+
)
|
|
235
|
+
self.waiting_request_ids.popleft()
|
|
236
|
+
except Exception as e:
|
|
237
|
+
break
|
|
212
238
|
|
|
213
239
|
async def async_step(
|
|
214
240
|
self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
|
|
215
|
-
) -> Tuple[List[DynamicInferenceRequest], float]:
|
|
216
|
-
"""
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
Uses `asyncio` for continuous generation which allows this
|
|
241
|
+
) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
|
|
242
|
+
"""
|
|
243
|
+
Wrapper for controller.generate_output_tokens_dynamic_batch(), to
|
|
244
|
+
match vLLM API. Uses `asyncio` for continuous generation which allows this
|
|
220
245
|
method to sleep and wake up when new requests are available.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
sampling_params (SamplingParams): The sampling parameters.
|
|
249
|
+
verbose (bool): Whether to run in verbose mode.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
A tuple comprised of:
|
|
253
|
+
1. Requests that ran in the last step and are still active.
|
|
254
|
+
2. Requests that ran in the last step and have now finished.
|
|
255
|
+
3. The step time in seconds.
|
|
221
256
|
"""
|
|
222
257
|
|
|
223
258
|
# Generate tokens.
|
|
224
|
-
t = time.time()
|
|
225
259
|
is_decode_only = self.context.is_decode_only()
|
|
260
|
+
self.step_start_event.record()
|
|
226
261
|
result = self.controller.generate_output_tokens_dynamic_batch(
|
|
227
262
|
sampling_params, self.termination_id
|
|
228
263
|
)
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
264
|
+
self.step_end_event.record()
|
|
265
|
+
self.step_end_event.synchronize()
|
|
266
|
+
step_time = self.step_start_event.elapsed_time(self.step_end_event) / 1e3
|
|
232
267
|
|
|
233
268
|
if result is not None:
|
|
234
269
|
request_ids, finished_request_ids, sample = result
|
|
235
270
|
|
|
236
271
|
# TODO: Move this to a background thread?
|
|
237
|
-
finished_requests.
|
|
238
|
-
|
|
272
|
+
(active_requests, finished_requests) = self.post_process_requests(
|
|
273
|
+
request_ids, finished_request_ids, step_time, sample
|
|
239
274
|
)
|
|
240
275
|
|
|
241
|
-
# Schedule waiting requests
|
|
242
276
|
# TODO: Move this to a background thread?
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
waiting_request_id,
|
|
248
|
-
waiting_request.prompt_tokens,
|
|
249
|
-
waiting_request.sampling_params.num_tokens_to_generate,
|
|
250
|
-
)
|
|
251
|
-
self.waiting_request_ids.popleft()
|
|
252
|
-
except Exception as e:
|
|
253
|
-
break
|
|
277
|
+
self.schedule_waiting_requests()
|
|
278
|
+
else:
|
|
279
|
+
active_requests: List[DynamicInferenceRequest] = []
|
|
280
|
+
finished_requests: List[DynamicInferenceRequest] = []
|
|
254
281
|
|
|
255
282
|
# Print context state.
|
|
256
283
|
if verbose:
|
|
@@ -278,9 +305,11 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
278
305
|
)
|
|
279
306
|
)
|
|
280
307
|
|
|
281
|
-
return finished_requests, step_time
|
|
308
|
+
return active_requests, finished_requests, step_time
|
|
282
309
|
|
|
283
|
-
def step(
|
|
310
|
+
def step(
|
|
311
|
+
self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
|
|
312
|
+
) -> Tuple[List[DynamicInferenceRequest], List[DynamicInferenceRequest], float]:
|
|
284
313
|
"""Synchronous wrapper for `self.async_step`."""
|
|
285
314
|
return self._loop.run_until_complete(
|
|
286
315
|
self.async_step(sampling_params=sampling_params, verbose=verbose)
|
|
@@ -297,7 +326,7 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
297
326
|
|
|
298
327
|
finished_requests_list = []
|
|
299
328
|
while self.has_unfinished_requests():
|
|
300
|
-
finished_requests, step_time = self.step(sampling_params)
|
|
329
|
+
active_requests, finished_requests, step_time = self.step(sampling_params)
|
|
301
330
|
finished_requests_list.extend(finished_requests)
|
|
302
331
|
|
|
303
332
|
return finished_requests_list
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/inference/inference_request.py
RENAMED
|
@@ -46,6 +46,7 @@ class InferenceRequest:
|
|
|
46
46
|
prompt_top_n_logprobs: Optional[List[Dict[str, float]]] = None
|
|
47
47
|
generated_top_n_logprobs: Optional[List[Dict[str, float]]] = None
|
|
48
48
|
generated_length: Optional[int] = None
|
|
49
|
+
tpot: Optional[List[int]] = None
|
|
49
50
|
|
|
50
51
|
def __post_init__(self):
|
|
51
52
|
if self.sampling_params is None and self.inference_parameters is not None:
|
|
@@ -34,6 +34,8 @@ try:
|
|
|
34
34
|
|
|
35
35
|
except ImportError:
|
|
36
36
|
HAVE_TE = False
|
|
37
|
+
Fp8Padding = None
|
|
38
|
+
Fp8Unpadding = None
|
|
37
39
|
|
|
38
40
|
|
|
39
41
|
class TextGenerationController:
|
|
@@ -312,6 +314,7 @@ class TextGenerationController:
|
|
|
312
314
|
current_context_end_position: int,
|
|
313
315
|
is_generation_done_tensor: torch.Tensor,
|
|
314
316
|
generated_sequence_lengths: torch.Tensor,
|
|
317
|
+
termination_id: Optional[int] = None,
|
|
315
318
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
316
319
|
"""Checks which prompts have reached an end condition
|
|
317
320
|
|
|
@@ -337,10 +340,12 @@ class TextGenerationController:
|
|
|
337
340
|
Tuple[torch.Tensor, torch.Tensor]: Returns the boolean
|
|
338
341
|
is_generation_done_tensor and the generated_sequence_lengths after updating it
|
|
339
342
|
"""
|
|
343
|
+
if termination_id is None:
|
|
344
|
+
termination_id = self.tokenizer.eod
|
|
340
345
|
latest_samples = updated_prompts_tokens[:, current_context_end_position]
|
|
341
346
|
# Make sure we are checking eod criterion only for prompts that have started generating
|
|
342
347
|
# (i.e) We only look at the generated tokenns and not the input tokens.
|
|
343
|
-
reached_eod = (latest_samples ==
|
|
348
|
+
reached_eod = (latest_samples == termination_id) & generation_started
|
|
344
349
|
is_generation_done_tensor = is_generation_done_tensor | reached_eod
|
|
345
350
|
# We increment generated sequence lengths when that prompt has not hit the
|
|
346
351
|
# EOD and generation has started
|
|
@@ -543,7 +548,7 @@ class TextGenerationController:
|
|
|
543
548
|
active_requests: OrderedDict[str, InferenceRequest],
|
|
544
549
|
active_streams: Optional[OrderedDict[str, AsyncStream]] = None,
|
|
545
550
|
) -> OrderedDict[str, InferenceRequest]:
|
|
546
|
-
"""Utility to generate
|
|
551
|
+
"""Utility to generate all the output tokens and probabilities for the prompts.
|
|
547
552
|
|
|
548
553
|
This utility generates the output tokens for a static batch. It runs the forward steps till
|
|
549
554
|
all prompts complete generation, updates the status of these requests to completed, adds
|
|
@@ -654,6 +659,10 @@ class TextGenerationController:
|
|
|
654
659
|
# to nearest power of 2
|
|
655
660
|
vocab_size = self.inference_wrapped_model.inference_wrapper_config.padded_vocab_size
|
|
656
661
|
|
|
662
|
+
# Check whether early termination is enabled
|
|
663
|
+
no_early_termination = getattr(sampling_params, "no_early_termination", False)
|
|
664
|
+
termination_id = -1 if no_early_termination else self.tokenizer.eod
|
|
665
|
+
|
|
657
666
|
streaming_enabled = active_streams is not None and len(active_streams) > 0
|
|
658
667
|
if streaming_enabled:
|
|
659
668
|
# Start a separate thread for streaming tokens to avoid blocking the
|
|
@@ -671,6 +680,11 @@ class TextGenerationController:
|
|
|
671
680
|
streaming_executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
|
672
681
|
stream_tokens = functools.partial(self.stream_tokens, sampling_params)
|
|
673
682
|
|
|
683
|
+
for request in active_requests.values():
|
|
684
|
+
# Initialize to a list to store a latency measurement for each generated token.
|
|
685
|
+
request.tpot = []
|
|
686
|
+
timing_events = []
|
|
687
|
+
|
|
674
688
|
with torch.inference_mode():
|
|
675
689
|
self.inference_wrapped_model.prep_model_for_inference()
|
|
676
690
|
|
|
@@ -694,7 +708,18 @@ class TextGenerationController:
|
|
|
694
708
|
context_start_position = 0
|
|
695
709
|
context_end_position = min_prompt_length_in_batch
|
|
696
710
|
|
|
711
|
+
# The initial iteration of this loop runs the prefill phase up to the shortest
|
|
712
|
+
# prompt length in the batch. Then every subsequent iterations runs a decode step.
|
|
713
|
+
# At least one new token will be generated in each iteration. The generated token
|
|
714
|
+
# will be ignored for requests which have prompt length > the current generated
|
|
715
|
+
# sequence length. Similarly, the generated token is ignored for requests which
|
|
716
|
+
# have maximum total sequence length < the current generated sequence length.
|
|
697
717
|
while True:
|
|
718
|
+
# Add a timing event at the start of each iteration. The token generation
|
|
719
|
+
# time will be the elapsed time between consective timing events.
|
|
720
|
+
timing_events.append(torch.cuda.Event(enable_timing=True))
|
|
721
|
+
timing_events[-1].record()
|
|
722
|
+
|
|
698
723
|
# Pick the context window that we need to pass through the network.
|
|
699
724
|
inference_input_for_context_window: Dict[str, Any] = (
|
|
700
725
|
self.inference_wrapped_model.get_batch_for_context_window(
|
|
@@ -817,6 +842,7 @@ class TextGenerationController:
|
|
|
817
842
|
current_context_end_position=context_end_position,
|
|
818
843
|
is_generation_done_tensor=is_generation_done_tensor,
|
|
819
844
|
generated_sequence_lengths=generated_sequence_lengths,
|
|
845
|
+
termination_id=termination_id,
|
|
820
846
|
)
|
|
821
847
|
)
|
|
822
848
|
|
|
@@ -852,6 +878,10 @@ class TextGenerationController:
|
|
|
852
878
|
if context_end_position >= max_sequence_length:
|
|
853
879
|
break
|
|
854
880
|
|
|
881
|
+
# Add a final timing event to compute the latency of every loop iteration
|
|
882
|
+
timing_events.append(torch.cuda.Event(enable_timing=True))
|
|
883
|
+
timing_events[-1].record()
|
|
884
|
+
|
|
855
885
|
# Close all streams
|
|
856
886
|
if streaming_enabled:
|
|
857
887
|
streaming_executor.shutdown()
|
|
@@ -870,6 +900,15 @@ class TextGenerationController:
|
|
|
870
900
|
generated_sequence_lengths > sampling_params.num_tokens_to_generate
|
|
871
901
|
] = sampling_params.num_tokens_to_generate
|
|
872
902
|
|
|
903
|
+
timing_events[-1].synchronize()
|
|
904
|
+
tpot = torch.tensor(
|
|
905
|
+
[
|
|
906
|
+
timing_events[i].elapsed_time(timing_events[i + 1]) / 1e3
|
|
907
|
+
for i in range(len(timing_events) - 1)
|
|
908
|
+
],
|
|
909
|
+
dtype=torch.float32,
|
|
910
|
+
)
|
|
911
|
+
|
|
873
912
|
for idx, request in enumerate(active_requests.values()):
|
|
874
913
|
input_prompt_length = int(prompt_lengths_in_batch[idx])
|
|
875
914
|
# Shorter prompts might have generated more than required tokens. So we trim them down
|
|
@@ -885,6 +924,20 @@ class TextGenerationController:
|
|
|
885
924
|
request.generated_length = required_sequence_length
|
|
886
925
|
request.generated_tokens = required_result_tokens
|
|
887
926
|
|
|
927
|
+
# Record the decode latencies for only the generated tokens
|
|
928
|
+
request_tpot = tpot.clone()
|
|
929
|
+
# Sum up the latencies of the first prompt tokens if the
|
|
930
|
+
# request prompt length > minimum prompt length
|
|
931
|
+
spill_length = input_prompt_length - min_prompt_length_in_batch
|
|
932
|
+
if spill_length > 0:
|
|
933
|
+
spill_latency = request_tpot[:spill_length].sum()
|
|
934
|
+
request_tpot = torch.cat((spill_latency.unsqueeze(0), request_tpot[spill_length:]))
|
|
935
|
+
|
|
936
|
+
# Remove the extraneous latencies if the
|
|
937
|
+
# request sequence length < maximum sequence length
|
|
938
|
+
request_tpot = request_tpot[:required_sequence_length]
|
|
939
|
+
request.tpot = request_tpot.tolist()
|
|
940
|
+
|
|
888
941
|
if output_log_probs is not None:
|
|
889
942
|
request.prompt_log_probs = output_log_probs[idx, : input_prompt_length - 1].tolist()
|
|
890
943
|
request.generated_log_probs = output_log_probs[
|
|
@@ -20,6 +20,8 @@ from megatron.core.parallel_state import (
|
|
|
20
20
|
)
|
|
21
21
|
from megatron.core.utils import (
|
|
22
22
|
divide,
|
|
23
|
+
get_pg_rank,
|
|
24
|
+
get_pg_size,
|
|
23
25
|
get_tensor_model_parallel_group_if_none,
|
|
24
26
|
is_torch_min_version,
|
|
25
27
|
make_tp_sharded_tensor_for_checkpoint,
|
|
@@ -219,7 +221,7 @@ class VocabParallelEmbedding(torch.nn.Module):
|
|
|
219
221
|
|
|
220
222
|
(self.vocab_start_index, self.vocab_end_index) = (
|
|
221
223
|
VocabUtility.vocab_range_from_global_vocab_size(
|
|
222
|
-
self.num_embeddings, self.tp_group
|
|
224
|
+
self.num_embeddings, get_pg_rank(self.tp_group), get_pg_size(self.tp_group)
|
|
223
225
|
)
|
|
224
226
|
)
|
|
225
227
|
self.num_embeddings_per_partition = self.vocab_end_index - self.vocab_start_index
|
|
@@ -241,8 +243,8 @@ class VocabParallelEmbedding(torch.nn.Module):
|
|
|
241
243
|
0,
|
|
242
244
|
init_method,
|
|
243
245
|
params_dtype=config.params_dtype,
|
|
244
|
-
rank=self.tp_group
|
|
245
|
-
world_size=self.tp_group
|
|
246
|
+
rank=get_pg_rank(self.tp_group),
|
|
247
|
+
world_size=get_pg_size(self.tp_group),
|
|
246
248
|
)
|
|
247
249
|
else:
|
|
248
250
|
self.weight = Parameter(
|
|
@@ -808,8 +810,8 @@ class ColumnParallelLinear(torch.nn.Module):
|
|
|
808
810
|
self.tp_group = get_tensor_model_parallel_group_if_none(
|
|
809
811
|
self.tp_group, is_expert=self.is_expert
|
|
810
812
|
)
|
|
811
|
-
world_size = self.tp_group
|
|
812
|
-
rank = self.tp_group
|
|
813
|
+
world_size = get_pg_size(self.tp_group)
|
|
814
|
+
rank = get_pg_rank(self.tp_group)
|
|
813
815
|
self.explicit_expert_comm = self.is_expert and (world_size > 1 or self.expert_parallel)
|
|
814
816
|
self.output_size_per_partition = divide(output_size, world_size)
|
|
815
817
|
|
|
@@ -1120,8 +1122,8 @@ class RowParallelLinear(torch.nn.Module):
|
|
|
1120
1122
|
self.tp_group, is_expert=self.is_expert
|
|
1121
1123
|
)
|
|
1122
1124
|
|
|
1123
|
-
world_size = self.tp_group
|
|
1124
|
-
rank = self.tp_group
|
|
1125
|
+
world_size = get_pg_size(self.tp_group)
|
|
1126
|
+
rank = get_pg_rank(self.tp_group)
|
|
1125
1127
|
self.explicit_expert_comm = self.is_expert and (world_size > 1 or self.expert_parallel)
|
|
1126
1128
|
|
|
1127
1129
|
self.input_size_per_partition = divide(input_size, world_size)
|
|
@@ -28,6 +28,7 @@ from megatron.core.transformer.spec_utils import ModuleSpec, build_module
|
|
|
28
28
|
from megatron.core.utils import (
|
|
29
29
|
deprecate_inference_params,
|
|
30
30
|
divide,
|
|
31
|
+
get_pg_size,
|
|
31
32
|
is_fa_min_version,
|
|
32
33
|
nvtx_range_pop,
|
|
33
34
|
nvtx_range_push,
|
|
@@ -135,7 +136,7 @@ class Attention(MegatronModule, ABC):
|
|
|
135
136
|
self.model_comm_pgs = model_comm_pgs
|
|
136
137
|
|
|
137
138
|
# Per attention head and per partition values
|
|
138
|
-
world_size = self.model_comm_pgs.tp
|
|
139
|
+
world_size = get_pg_size(self.model_comm_pgs.tp)
|
|
139
140
|
self.hidden_size_per_attention_head = divide(
|
|
140
141
|
self.query_projection_size, self.config.num_attention_heads
|
|
141
142
|
)
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/transformer/cuda_graphs.py
RENAMED
|
@@ -977,9 +977,13 @@ class CudaGraphManager(torch.nn.Module):
|
|
|
977
977
|
runner = self.get_cudagraph_runner(megatron_module)
|
|
978
978
|
runner.eval()
|
|
979
979
|
out = runner.record_graph_capture(args, kwargs)
|
|
980
|
-
elif self.training
|
|
980
|
+
elif self.training:
|
|
981
981
|
# Training mode
|
|
982
982
|
runner = self.get_cudagraph_runner(megatron_module)
|
|
983
|
+
# check if a layer is frozen during training.
|
|
984
|
+
if not torch.is_grad_enabled():
|
|
985
|
+
# If the layer is frozen, we need to set the runner to eval mode.
|
|
986
|
+
runner.eval()
|
|
983
987
|
out = runner.record_graph_capture(args, kwargs)
|
|
984
988
|
else:
|
|
985
989
|
# No cudagraphs were found in training mode with grad disabled, so fallback to
|
|
@@ -401,6 +401,9 @@ def deprecate_inference_params(inference_context, inference_params):
|
|
|
401
401
|
def get_tensor_model_parallel_group_if_none(tp_group, is_expert=False, check_initialized=True):
|
|
402
402
|
"""Issue a deprecation warning if tp_group is None and return the default tp group."""
|
|
403
403
|
# TODO(zijiey): remove this function later.
|
|
404
|
+
if not torch.distributed.is_initialized():
|
|
405
|
+
return None
|
|
406
|
+
|
|
404
407
|
if tp_group is None:
|
|
405
408
|
if torch.distributed.is_initialized() and torch.distributed.get_rank() == 0:
|
|
406
409
|
warnings.warn(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.0rc1
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/blended_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/masked_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/megatron_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/megatron_tokenizer.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/multimodal_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/object_storage_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/build.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/db/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/external_libs.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/build.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/factory.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/index/validate.py
RENAMED
|
File without changes
|
{megatron_core-0.14.0rc0 → megatron_core-0.14.0rc1}/megatron/core/datasets/retro/query/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|