megatron-core 0.13.0rc0__tar.gz → 0.13.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.13.0rc0/megatron_core.egg-info → megatron_core-0.13.0rc1}/PKG-INFO +1 -1
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/extensions/transformer_engine.py +6 -1
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/package_info.py +1 -1
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/pipeline_parallel/schedules.py +2 -1
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1/megatron_core.egg-info}/PKG-INFO +1 -1
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/LICENSE +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/MANIFEST.in +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/README.md +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/README.md +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/megatron_tokenizer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/utils_object_storage.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/custom_fsdp/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/enums.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fp8_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/dynamic_chunk_allocator.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/dynamic_context.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/dynamic_engine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/jit.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/timers.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/cuda_graphs.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/heterogeneous/linear_replacements.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/pipeline_parallel_layer_layout.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/utils.py +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron_core.egg-info/requires.txt +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/pyproject.toml +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/setup.cfg +0 -0
- {megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.0rc1
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/extensions/transformer_engine.py
RENAMED
|
@@ -1543,7 +1543,12 @@ try:
|
|
|
1543
1543
|
enabled, num_layers, model_layers, activation_offloading, weight_offloading
|
|
1544
1544
|
):
|
|
1545
1545
|
"""Get CPU offload context and sync function."""
|
|
1546
|
-
if is_te_min_version("
|
|
1546
|
+
if is_te_min_version("2.5.0"):
|
|
1547
|
+
# Enables the additional double buffering switch for activations during LLM training
|
|
1548
|
+
context, sync_func = _get_cpu_offload_context(
|
|
1549
|
+
enabled, num_layers, model_layers, activation_offloading, weight_offloading, True
|
|
1550
|
+
)
|
|
1551
|
+
elif is_te_min_version("1.10.0.dev0"):
|
|
1547
1552
|
context, sync_func = _get_cpu_offload_context(
|
|
1548
1553
|
enabled, num_layers, model_layers, activation_offloading, weight_offloading
|
|
1549
1554
|
)
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/pipeline_parallel/schedules.py
RENAMED
|
@@ -618,7 +618,8 @@ def get_pp_rank_microbatches(
|
|
|
618
618
|
num_warmup_microbatches += (num_model_chunks - 1) * microbatch_group_size_per_vp_stage
|
|
619
619
|
else:
|
|
620
620
|
# forward_backward_no_pipelining
|
|
621
|
-
|
|
621
|
+
# This path is only used for cuda graph capturing compatibility for the PP=1 case.
|
|
622
|
+
num_warmup_microbatches = 0
|
|
622
623
|
|
|
623
624
|
if num_warmup_microbatches >= total_num_microbatches:
|
|
624
625
|
num_warmup_microbatches = total_num_microbatches
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.0rc1
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/blended_dataset.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/indexed_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/masked_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/megatron_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/megatron_tokenizer.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/multimodal_dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/object_storage_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/config/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/build.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/dataset.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/db/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/external_libs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/build.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/factory.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/index.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/index/validate.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/query.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/retro/query/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/datasets/utils_object_storage.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/core.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/dict_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/mapping.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/dist_checkpointing/validation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/distributed/data_parallel_base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trt_model_config.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trt_model_type.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_helper.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/export/trtllm/trtllm_layers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_dropout.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_geglu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_gelu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_bias_swiglu.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_cross_entropy.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_indices_converter.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_layer_norm.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/fusions/fused_pad_routing_map.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/communication_utils.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/contexts/base_context.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/mcore_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/engines/static_engine.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/inference_request.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/inference/sampling_params.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/bert_layer_specs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/bert/bert_lm_head.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/gpt_layer_specs.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/gpt/moe_module_specs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/clip_model.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/module.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/huggingface/qwen_model.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mamba/mamba_layer_specs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mamba/mamba_model.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/config/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/config/base_configs.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/model/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/audio.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/base.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/mimo/submodules/vision.py
RENAMED
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{megatron_core-0.13.0rc0 → megatron_core-0.13.0rc1}/megatron/core/models/multimodal/llava_model.py
RENAMED
|
File without changes
|