megatron-core 0.13.0rc3__tar.gz → 0.13.0rc4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.13.0rc3/megatron_core.egg-info → megatron_core-0.13.0rc4}/PKG-INFO +1 -1
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/T5/t5_spec.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/bert_layer_specs.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +1 -1
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/gpt_layer_specs.py +4 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/moe_module_specs.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/decoder_spec.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/encoder_spec.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/package_info.py +1 -1
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/cuda_graphs.py +1 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/heterogeneous/linear_replacements.py +4 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/experts.py +1 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/moe_layer.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/moe_utils.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/multi_token_prediction.py +2 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/transformer_block.py +22 -11
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4/megatron_core.egg-info}/PKG-INFO +1 -1
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/LICENSE +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/MANIFEST.in +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/README.md +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/README.md +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/megatron_tokenizer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/utils_object_storage.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/custom_fsdp/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/enums.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/extensions/transformer_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/dynamic_chunk_allocator.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/dynamic_context.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/dynamic_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/jit.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/timers.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/pipeline_parallel_layer_layout.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/utils.py +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron_core.egg-info/requires.txt +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/pyproject.toml +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/setup.cfg +0 -0
- {megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.0rc4
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -16,6 +16,8 @@ from megatron.core.transformer.transformer_block import TransformerBlockSubmodul
|
|
|
16
16
|
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
|
|
17
17
|
|
|
18
18
|
try:
|
|
19
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
20
|
+
|
|
19
21
|
from megatron.core.extensions.transformer_engine import (
|
|
20
22
|
TEColumnParallelLinear,
|
|
21
23
|
TEDotProductAttention,
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/bert/bert_layer_specs.py
RENAMED
|
@@ -12,6 +12,8 @@ from megatron.core.transformer.spec_utils import ModuleSpec
|
|
|
12
12
|
from megatron.core.transformer.transformer_layer import TransformerLayer, TransformerLayerSubmodules
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
16
|
+
|
|
15
17
|
from megatron.core.extensions.transformer_engine import (
|
|
16
18
|
TEDotProductAttention,
|
|
17
19
|
TELayerNormColumnParallelLinear,
|
|
@@ -315,5 +315,5 @@ class MultimodalRotaryEmbedding(nn.Module):
|
|
|
315
315
|
if parallel_state.get_context_parallel_world_size() > 1:
|
|
316
316
|
# slice rotary_pos_emb along sequence dimension and select the parition of the current
|
|
317
317
|
# CP rank
|
|
318
|
-
emb = get_pos_emb_on_this_cp_rank(emb,
|
|
318
|
+
emb = get_pos_emb_on_this_cp_rank(emb, 0, parallel_state.get_context_parallel_group())
|
|
319
319
|
return emb
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/gpt_layer_specs.py
RENAMED
|
@@ -34,6 +34,8 @@ from megatron.core.transformer.transformer_layer import (
|
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
try:
|
|
37
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
38
|
+
|
|
37
39
|
from megatron.core.extensions.transformer_engine import TEFusedMLP, TENorm
|
|
38
40
|
from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
|
|
39
41
|
|
|
@@ -42,6 +44,8 @@ except ImportError:
|
|
|
42
44
|
HAVE_TE = False
|
|
43
45
|
|
|
44
46
|
try:
|
|
47
|
+
import nvidia_kitchen # pylint: disable=unused-import
|
|
48
|
+
|
|
45
49
|
from megatron.core.extensions.kitchen import KitchenSpecProvider
|
|
46
50
|
|
|
47
51
|
HAVE_KITCHEN = True
|
|
@@ -29,6 +29,8 @@ from megatron.core.transformer.transformer_layer import (
|
|
|
29
29
|
from megatron.core.utils import is_te_min_version
|
|
30
30
|
|
|
31
31
|
try:
|
|
32
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
33
|
+
|
|
32
34
|
from megatron.core.extensions.transformer_engine import (
|
|
33
35
|
TEDotProductAttention,
|
|
34
36
|
TELayerNormColumnParallelLinear,
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/gpt/moe_module_specs.py
RENAMED
|
@@ -9,6 +9,8 @@ from megatron.core.transformer.moe.shared_experts import SharedExpertMLP
|
|
|
9
9
|
from megatron.core.transformer.spec_utils import ModuleSpec
|
|
10
10
|
|
|
11
11
|
try:
|
|
12
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
13
|
+
|
|
12
14
|
from megatron.core.extensions.transformer_engine_spec_provider import TESpecProvider
|
|
13
15
|
|
|
14
16
|
HAVE_TE = True
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/models/retro/encoder_spec.py
RENAMED
|
@@ -21,6 +21,8 @@ from megatron.core.transformer.mlp import MLP, MLPSubmodules
|
|
|
21
21
|
from megatron.core.transformer.transformer_block import TransformerBlockSubmodules
|
|
22
22
|
|
|
23
23
|
try:
|
|
24
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
25
|
+
|
|
24
26
|
from megatron.core.extensions.transformer_engine import (
|
|
25
27
|
TEColumnParallelLinear,
|
|
26
28
|
TEDotProductAttention,
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/cuda_graphs.py
RENAMED
|
@@ -23,6 +23,7 @@ from megatron.core.transformer.transformer_config import TransformerConfig
|
|
|
23
23
|
from megatron.core.utils import is_te_min_version
|
|
24
24
|
|
|
25
25
|
try:
|
|
26
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
26
27
|
from transformer_engine.pytorch.fp8 import FP8GlobalStateManager, fp8_autocast
|
|
27
28
|
from transformer_engine.pytorch.graph import restore_fp8_tensors, save_fp8_tensors
|
|
28
29
|
from transformer_engine.pytorch.graph import set_capture_end as te_set_capture_end
|
|
@@ -16,6 +16,8 @@ from megatron.core.transformer.transformer_config import TransformerConfig
|
|
|
16
16
|
from megatron.core.utils import divide
|
|
17
17
|
|
|
18
18
|
try:
|
|
19
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
20
|
+
|
|
19
21
|
from megatron.core.extensions.transformer_engine import TELayerNormColumnParallelLinear
|
|
20
22
|
|
|
21
23
|
HAVE_TE = True
|
|
@@ -67,6 +69,7 @@ if HAVE_TE:
|
|
|
67
69
|
)
|
|
68
70
|
|
|
69
71
|
def forward(self, x, **kwargs):
|
|
72
|
+
"""Forward of TELayerNormColumnParallelLinearGathered"""
|
|
70
73
|
out, bias = super().forward(x)
|
|
71
74
|
assert bias is None, "bias should be None since we set skip_bias_add=False"
|
|
72
75
|
|
|
@@ -100,6 +103,7 @@ class ColumnParallelLinearGathered(ColumnParallelLinear):
|
|
|
100
103
|
runtime_gather_output: bool | None = None,
|
|
101
104
|
**kwargs,
|
|
102
105
|
):
|
|
106
|
+
"""Forward of ColumnParallelLinearGathered"""
|
|
103
107
|
out, bias = super().forward(input_, weight, runtime_gather_output)
|
|
104
108
|
assert bias is None, "bias should be None since we set skip_bias_add=False"
|
|
105
109
|
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/moe_layer.py
RENAMED
|
@@ -21,6 +21,8 @@ from megatron.core.transformer.spec_utils import ModuleSpec, build_module
|
|
|
21
21
|
from megatron.core.transformer.transformer_config import TransformerConfig
|
|
22
22
|
|
|
23
23
|
try:
|
|
24
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
25
|
+
|
|
24
26
|
from megatron.core.extensions.transformer_engine import te_checkpoint
|
|
25
27
|
|
|
26
28
|
HAVE_TE = True
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/moe/moe_utils.py
RENAMED
|
@@ -10,6 +10,8 @@ from megatron.core.process_groups_config import ModelCommProcessGroups
|
|
|
10
10
|
from megatron.core.tensor_parallel.mappings import gather_from_sequence_parallel_region
|
|
11
11
|
|
|
12
12
|
try:
|
|
13
|
+
import transformer_engine as te # pylint: disable=unused-import
|
|
14
|
+
|
|
13
15
|
from megatron.core.extensions.transformer_engine import (
|
|
14
16
|
fused_permute,
|
|
15
17
|
fused_permute_with_probs,
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/transformer/transformer_block.py
RENAMED
|
@@ -28,27 +28,38 @@ from megatron.core.transformer.utils import sharded_state_dict_default
|
|
|
28
28
|
from megatron.core.utils import WrappedTensor, deprecate_inference_params, make_viewless_tensor
|
|
29
29
|
|
|
30
30
|
try:
|
|
31
|
+
import transformer_engine.pytorch as te # pylint: disable=unused-import
|
|
32
|
+
|
|
33
|
+
HAVE_TE = True
|
|
34
|
+
except ImportError:
|
|
35
|
+
HAVE_TE = False
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
import apex # pylint: disable=unused-import
|
|
39
|
+
|
|
40
|
+
HAVE_APEX = True
|
|
41
|
+
except ImportError:
|
|
42
|
+
HAVE_APEX = False
|
|
43
|
+
|
|
44
|
+
get_cpu_offload_context = None
|
|
45
|
+
te_checkpoint = None
|
|
46
|
+
|
|
47
|
+
if HAVE_TE:
|
|
31
48
|
from megatron.core.extensions.transformer_engine import (
|
|
32
49
|
TENorm,
|
|
33
50
|
get_cpu_offload_context,
|
|
34
51
|
te_checkpoint,
|
|
35
52
|
)
|
|
36
53
|
|
|
37
|
-
HAVE_TE = True
|
|
38
54
|
LayerNormImpl = TENorm
|
|
39
|
-
except ImportError:
|
|
40
|
-
HAVE_TE = False
|
|
41
|
-
get_cpu_offload_context = None
|
|
42
|
-
|
|
43
|
-
try:
|
|
44
|
-
import apex # pylint: disable=unused-import
|
|
45
55
|
|
|
46
|
-
|
|
56
|
+
elif HAVE_APEX:
|
|
57
|
+
LayerNormImpl = FusedLayerNorm
|
|
47
58
|
|
|
48
|
-
|
|
49
|
-
|
|
59
|
+
else:
|
|
60
|
+
from megatron.core.transformer.torch_norm import WrappedTorchNorm
|
|
50
61
|
|
|
51
|
-
|
|
62
|
+
LayerNormImpl = WrappedTorchNorm
|
|
52
63
|
|
|
53
64
|
|
|
54
65
|
def get_num_layers_to_build(config: TransformerConfig, vp_stage: Optional[int] = None) -> int:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.0rc4
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/blended_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/indexed_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/masked_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/megatron_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/megatron_tokenizer.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/multimodal_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/object_storage_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/build.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/db/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/external_libs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/build.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/factory.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/index/validate.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/query.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/retro/query/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/datasets/utils_object_storage.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/core.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/dict_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/mapping.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/dist_checkpointing/validation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/distributed/data_parallel_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trt_model_config.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trt_model_type.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_helper.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/export/trtllm/trtllm_layers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/extensions/transformer_engine.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_dropout.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_geglu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_gelu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_bias_swiglu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_cross_entropy.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_indices_converter.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_layer_norm.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/fusions/fused_pad_routing_map.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/communication_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/contexts/base_context.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/mcore_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/engines/static_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc3 → megatron_core-0.13.0rc4}/megatron/core/inference/inference_request.py
RENAMED
|
File without changes
|
|
File without changes
|