megatron-core 0.12.0rc2__tar.gz → 0.12.0rc3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.12.0rc2/megatron_core.egg-info → megatron_core-0.12.0rc3}/PKG-INFO +1 -1
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py +0 -3
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/dynamic_context.py +19 -1
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/dynamic_engine.py +8 -2
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +6 -3
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/package_info.py +1 -1
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/utils.py +25 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3/megatron_core.egg-info}/PKG-INFO +1 -1
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/LICENSE +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/MANIFEST.in +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/README.md +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/README.md +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_tokenizer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/enums.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/transformer_engine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/model_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/model_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/jit.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/timers.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/cuda_graphs.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/legacy_a2a_token_dispatcher.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/requires.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/pyproject.toml +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.01/requirements.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.07/requirements.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.10/requirements.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_25.03/requirements.txt +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/setup.cfg +0 -0
- {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.0rc3
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
|
|
6
6
|
Download-URL: https://github.com/NVIDIA/Megatron-LM/releases
|
|
@@ -22,7 +22,6 @@ from megatron.core.distributed.custom_fsdp.param_and_grad_buffer import (
|
|
|
22
22
|
from megatron.core.distributed.data_parallel_base import _BaseDataParallel
|
|
23
23
|
from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
|
|
24
24
|
from megatron.core.fp8_utils import is_float8tensor
|
|
25
|
-
from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
|
|
26
25
|
from megatron.core.transformer.transformer_config import TransformerConfig
|
|
27
26
|
from megatron.core.transformer.transformer_layer import TransformerLayer
|
|
28
27
|
from megatron.core.utils import is_submodule, log_single_rank
|
|
@@ -124,8 +123,6 @@ class FullyShardedDataParallel(_BaseDataParallel):
|
|
|
124
123
|
self.fsdp_unit_modules = fsdp_unit_modules
|
|
125
124
|
else:
|
|
126
125
|
self.fsdp_unit_modules = [TransformerLayer]
|
|
127
|
-
if not getattr(self.module, "share_embeddings_and_output_weights", False):
|
|
128
|
-
self.fsdp_unit_modules.append(LanguageModelEmbedding)
|
|
129
126
|
self.main_weights = True
|
|
130
127
|
self.data_parallel_group = parallel_state.get_data_parallel_group(
|
|
131
128
|
with_context_parallel=True
|
|
@@ -177,6 +177,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
177
177
|
(self.max_requests,), 0, dtype=torch.int32, device=torch.cuda.current_device()
|
|
178
178
|
)
|
|
179
179
|
self.request_query_lengths = torch.empty_like(self.request_ids)
|
|
180
|
+
self.request_output_lengths = torch.empty_like(self.request_ids)
|
|
180
181
|
self.request_kv_length_offsets = torch.empty_like(self.request_ids)
|
|
181
182
|
self.request_kv_chunk_counts = torch.empty_like(self.request_ids)
|
|
182
183
|
self.request_last_kv_chunk_id = torch.empty_like(self.request_ids)
|
|
@@ -362,6 +363,10 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
362
363
|
lengths = lengths[self.paused_request_count : self.total_request_count]
|
|
363
364
|
return lengths
|
|
364
365
|
|
|
366
|
+
def get_max_sequence_lengths(self) -> Tensor:
|
|
367
|
+
"""Maximum sequence length for active requests."""
|
|
368
|
+
return self.request_output_lengths[self.paused_request_count : self.total_request_count]
|
|
369
|
+
|
|
365
370
|
def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
|
|
366
371
|
"""Append to KV cache.
|
|
367
372
|
|
|
@@ -628,6 +633,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
628
633
|
# Reset request indexes.
|
|
629
634
|
self.request_ids.fill_(0)
|
|
630
635
|
self.request_query_lengths.fill_(0)
|
|
636
|
+
self.request_output_lengths.fill_(0)
|
|
631
637
|
self.request_kv_length_offsets.fill_(0)
|
|
632
638
|
self.request_kv_chunk_counts.fill_(0)
|
|
633
639
|
self.request_last_kv_chunk_id.fill_(0)
|
|
@@ -693,7 +699,9 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
693
699
|
|
|
694
700
|
return last_token_logits
|
|
695
701
|
|
|
696
|
-
def add_request(
|
|
702
|
+
def add_request(
|
|
703
|
+
self, request_id: int, tokens: List[int], num_tokens_to_generate: Optional[int] = None
|
|
704
|
+
) -> None:
|
|
697
705
|
"""Add request to context.
|
|
698
706
|
|
|
699
707
|
After a request is added, it will first do one prefill step, followed by
|
|
@@ -731,9 +739,17 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
731
739
|
if new_chunk_ids is None:
|
|
732
740
|
raise ChunkOverflowError()
|
|
733
741
|
|
|
742
|
+
if num_tokens_to_generate is None:
|
|
743
|
+
num_tokens_to_generate = self.max_sequence_length - context_length
|
|
744
|
+
elif context_length + num_tokens_to_generate > self.max_sequence_length:
|
|
745
|
+
raise TokenOverflowError()
|
|
746
|
+
|
|
734
747
|
# Update request state.
|
|
735
748
|
self.request_ids[self.total_request_count] = request_id
|
|
736
749
|
self.request_query_lengths[self.total_request_count] = context_length
|
|
750
|
+
self.request_output_lengths[self.total_request_count] = (
|
|
751
|
+
context_length + num_tokens_to_generate
|
|
752
|
+
)
|
|
737
753
|
self.request_kv_length_offsets[self.total_request_count] = 0
|
|
738
754
|
self.request_kv_memory[self.total_request_count][:num_chunks_needed] = new_chunk_ids
|
|
739
755
|
self.request_kv_chunk_counts[self.total_request_count] = num_chunks_needed
|
|
@@ -861,6 +877,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
861
877
|
# Shift active requests left.
|
|
862
878
|
self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
|
|
863
879
|
self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
|
|
880
|
+
self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
|
|
864
881
|
self.request_ids[dst_idxs] = self.request_ids[src_idxs]
|
|
865
882
|
next_tokens[dst_idxs] = next_tokens[src_idxs]
|
|
866
883
|
|
|
@@ -910,6 +927,7 @@ class DynamicInferenceContext(BaseInferenceContext):
|
|
|
910
927
|
|
|
911
928
|
self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
|
|
912
929
|
self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
|
|
930
|
+
self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
|
|
913
931
|
self.request_ids[dst_idxs] = self.request_ids[src_idxs]
|
|
914
932
|
next_tokens[dst_idxs] = next_tokens[src_idxs]
|
|
915
933
|
|
|
@@ -85,12 +85,18 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
85
85
|
self.context.reset()
|
|
86
86
|
self.finished_request_count = 0
|
|
87
87
|
|
|
88
|
-
def add_request(
|
|
88
|
+
def add_request(
|
|
89
|
+
self,
|
|
90
|
+
request_id: int,
|
|
91
|
+
prompt: Union[str, List[int], Tensor],
|
|
92
|
+
num_tokens_to_generate: Optional[int] = None,
|
|
93
|
+
) -> None:
|
|
89
94
|
"""Add request to inference context.
|
|
90
95
|
|
|
91
96
|
Args:
|
|
92
97
|
request_id (int): Unique ID of request.
|
|
93
98
|
prompt (Union[str, Tensor]): Prompt as either a text string or token IDs.
|
|
99
|
+
num_tokens_to_generate (Optional[int]): Number of output tokens to generate
|
|
94
100
|
|
|
95
101
|
Return:
|
|
96
102
|
None.
|
|
@@ -120,7 +126,7 @@ class DynamicInferenceEngine(AbstractEngine):
|
|
|
120
126
|
raise Exception("specialize for <%s>." % type(prompt).__name__)
|
|
121
127
|
|
|
122
128
|
# Add request to context.
|
|
123
|
-
return self.context.add_request(request_id, tokens)
|
|
129
|
+
return self.context.add_request(request_id, tokens, num_tokens_to_generate)
|
|
124
130
|
|
|
125
131
|
def step(
|
|
126
132
|
self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
|
|
@@ -335,10 +335,13 @@ class TextGenerationController:
|
|
|
335
335
|
context.paused_request_count : context.total_request_count
|
|
336
336
|
].long()
|
|
337
337
|
active_sequence_lengths = context.get_active_sequence_lengths()
|
|
338
|
+
active_sequence_lengths += 1 # Account for the token we just generated
|
|
339
|
+
max_sequence_lengths = context.get_max_sequence_lengths()
|
|
338
340
|
|
|
339
|
-
# Request finished if termination_id or length
|
|
340
|
-
|
|
341
|
-
|
|
341
|
+
# Request finished if termination_id or length >= max_sequence_length.
|
|
342
|
+
|
|
343
|
+
active_request_mask = (new_sample != termination_id).byte() & torch.less(
|
|
344
|
+
active_sequence_lengths, max_sequence_lengths
|
|
342
345
|
).byte()
|
|
343
346
|
finished_idxs = (
|
|
344
347
|
torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
|
|
@@ -47,6 +47,7 @@ except Exception:
|
|
|
47
47
|
# This is a WAR for building docs, where torch is not actually imported
|
|
48
48
|
_torch_version = PkgVersion("0.0.0")
|
|
49
49
|
_te_version = None
|
|
50
|
+
_fa_version = None
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
class ExperimentalNotEnabledError(Exception):
|
|
@@ -279,6 +280,30 @@ def is_torch_min_version(version, check_equality=True):
|
|
|
279
280
|
return get_torch_version() > PkgVersion(version)
|
|
280
281
|
|
|
281
282
|
|
|
283
|
+
def get_fa_version():
|
|
284
|
+
"""Get Flash attention version from __version__; if not available use pip's. Use caching."""
|
|
285
|
+
|
|
286
|
+
def get_fa_version_str():
|
|
287
|
+
import flash_attn as fa
|
|
288
|
+
|
|
289
|
+
if hasattr(fa, '__version__'):
|
|
290
|
+
return str(fa.__version__)
|
|
291
|
+
else:
|
|
292
|
+
return version("flash-attn")
|
|
293
|
+
|
|
294
|
+
global _fa_version
|
|
295
|
+
if _fa_version is None:
|
|
296
|
+
_fa_version = PkgVersion(get_fa_version_str())
|
|
297
|
+
return _fa_version
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def is_fa_min_version(version, check_equality=True):
|
|
301
|
+
"""Check if minimum version of `flash-attn` is installed."""
|
|
302
|
+
if check_equality:
|
|
303
|
+
return get_fa_version() >= PkgVersion(version)
|
|
304
|
+
return get_fa_version() > PkgVersion(version)
|
|
305
|
+
|
|
306
|
+
|
|
282
307
|
def ensure_divisibility(numerator, denominator):
|
|
283
308
|
"""Ensure that numerator is divisible by the denominator."""
|
|
284
309
|
assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.0rc3
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
|
|
6
6
|
Download-URL: https://github.com/NVIDIA/Megatron-LM/releases
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/indexed_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/masked_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_tokenizer.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/multimodal_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/build.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/external_libs.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/build.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/factory.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/validate.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/query.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/core.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/dict_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/mapping.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/validation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/data_parallel_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_config.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_type.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_helper.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_layers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/transformer_engine.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_dropout.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_geglu.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_gelu.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_swiglu.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_cross_entropy.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_layer_norm.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/communication_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/base_context.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/mcore_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/static_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/inference_request.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/sampling_params.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|