megatron-core 0.4.0__tar.gz → 0.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.4.0 → megatron_core-0.10.0}/LICENSE +14 -33
- megatron_core-0.10.0/MANIFEST.in +3 -0
- megatron_core-0.10.0/PKG-INFO +936 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/README.md +174 -92
- megatron_core-0.10.0/megatron/core/README.md +14 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/__init__.py +16 -0
- megatron_core-0.10.0/megatron/core/config_logger.py +104 -0
- megatron_core-0.10.0/megatron/core/datasets/bert_dataset.py +192 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/blended_dataset.py +50 -39
- megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_builder.py +528 -0
- megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_config.py +177 -0
- megatron_core-0.10.0/megatron/core/datasets/gpt_dataset.py +810 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/helpers.cpp +124 -43
- megatron_core-0.10.0/megatron/core/datasets/helpers.py +64 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/indexed_dataset.py +271 -53
- megatron_core-0.10.0/megatron/core/datasets/masked_dataset.py +425 -0
- megatron_core-0.10.0/megatron/core/datasets/megatron_dataset.py +139 -0
- megatron_core-0.10.0/megatron/core/datasets/megatron_tokenizer.py +154 -0
- megatron_core-0.10.0/megatron/core/datasets/multimodal_dataset.py +62 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/__init__.py +5 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/config/__init__.py +16 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/config/bert_embedders.py +48 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/config/config.py +135 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +15 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/config/tokenizers.py +15 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/db/__init__.py +9 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/db/build.py +633 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/db/dataset.py +105 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/db/utils.py +367 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/external_libs.py +15 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/__init__.py +11 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/build.py +313 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/factory.py +40 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/index.py +133 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/__init__.py +10 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_base.py +150 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +208 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/utils.py +126 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/index/validate.py +191 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +109 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +107 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/query.py +393 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/retro_dataset.py +238 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/query/utils.py +35 -0
- megatron_core-0.10.0/megatron/core/datasets/retro/utils.py +349 -0
- megatron_core-0.10.0/megatron/core/datasets/t5_dataset.py +331 -0
- megatron_core-0.10.0/megatron/core/datasets/utils.py +87 -0
- megatron_core-0.10.0/megatron/core/datasets/utils_s3.py +164 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/__init__.py +2 -1
- megatron_core-0.10.0/megatron/core/dist_checkpointing/core.py +77 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/dict_utils.py +56 -27
- megatron_core-0.10.0/megatron/core/dist_checkpointing/exchange_utils.py +519 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/mapping.py +723 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/optimizer.py +62 -10
- megatron_core-0.10.0/megatron/core/dist_checkpointing/serialization.py +424 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/state_dict_transformation.py +270 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/__init__.py +7 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/async_utils.py +224 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/base.py +227 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/common.py +157 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/filesystem_async.py +439 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/fully_parallel.py +439 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/resharding.py +315 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +162 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/tensorstore.py +15 -18
- megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/torch.py +939 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/two_stage.py +14 -16
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/zarr.py +62 -26
- megatron_core-0.10.0/megatron/core/dist_checkpointing/utils.py +219 -0
- megatron_core-0.10.0/megatron/core/dist_checkpointing/validation.py +560 -0
- megatron_core-0.10.0/megatron/core/distributed/__init__.py +8 -0
- megatron_core-0.10.0/megatron/core/distributed/data_parallel_base.py +96 -0
- megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel.py +478 -0
- megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel_config.py +49 -0
- megatron_core-0.10.0/megatron/core/distributed/finalize_model_grads.py +284 -0
- megatron_core-0.10.0/megatron/core/distributed/param_and_grad_buffer.py +840 -0
- megatron_core-0.10.0/megatron/core/distributed/torch_fully_sharded_data_parallel.py +115 -0
- megatron_core-0.10.0/megatron/core/export/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/export/data_type.py +5 -0
- megatron_core-0.10.0/megatron/core/export/export_config.py +19 -0
- megatron_core-0.10.0/megatron/core/export/model_type.py +7 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +154 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +36 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_config.py +15 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_type.py +13 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_helper.py +588 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_layers.py +157 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +280 -0
- megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +471 -0
- megatron_core-0.10.0/megatron/core/extensions/transformer_engine.py +1268 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_dropout.py +6 -4
- megatron_core-0.10.0/megatron/core/fusions/fused_bias_geglu.py +85 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_gelu.py +10 -3
- megatron_core-0.10.0/megatron/core/fusions/fused_bias_swiglu.py +89 -0
- megatron_core-0.10.0/megatron/core/fusions/fused_cross_entropy.py +143 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_layer_norm.py +37 -19
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_softmax.py +18 -2
- megatron_core-0.10.0/megatron/core/inference/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/ammo_support/__init__.py +8 -0
- megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/model_specs.py +2 -0
- megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/state_dict_hooks.py +5 -0
- megatron_core-0.10.0/megatron/core/inference/common_inference_params.py +29 -0
- megatron_core-0.10.0/megatron/core/inference/communication_utils.py +50 -0
- megatron_core-0.10.0/megatron/core/inference/engines/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/engines/abstract_engine.py +17 -0
- megatron_core-0.10.0/megatron/core/inference/engines/mcore_engine.py +113 -0
- megatron_core-0.10.0/megatron/core/inference/inference_request.py +39 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +238 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +90 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +44 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +215 -0
- megatron_core-0.10.0/megatron/core/inference/modelopt_support/__init__.py +8 -0
- megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/model_specs.py +63 -0
- megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py +133 -0
- megatron_core-0.10.0/megatron/core/inference/scheduler.py +127 -0
- megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +35 -0
- megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +402 -0
- megatron_core-0.10.0/megatron/core/inference/utils.py +17 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/inference_params.py +4 -0
- megatron_core-0.10.0/megatron/core/jit.py +10 -0
- megatron_core-0.10.0/megatron/core/model_parallel_config.py +387 -0
- megatron_core-0.10.0/megatron/core/models/T5/__init__.py +2 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/T5/t5_model.py +173 -189
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/T5/t5_spec.py +63 -27
- megatron_core-0.10.0/megatron/core/models/bert/bert_layer_specs.py +116 -0
- megatron_core-0.10.0/megatron/core/models/bert/bert_lm_head.py +50 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/bert/bert_model.py +135 -36
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/bert/pooler.py +1 -0
- megatron_core-0.10.0/megatron/core/models/common/embeddings/__init__.py +5 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/common/embeddings/language_model_embedding.py +25 -45
- megatron_core-0.10.0/megatron/core/models/common/embeddings/rope_utils.py +261 -0
- megatron_core-0.10.0/megatron/core/models/common/embeddings/rotary_pos_embedding.py +213 -0
- megatron_core-0.10.0/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +179 -0
- megatron_core-0.10.0/megatron/core/models/common/language_module/language_module.py +204 -0
- megatron_core-0.10.0/megatron/core/models/common/vision_module/vision_module.py +17 -0
- megatron_core-0.10.0/megatron/core/models/gpt/__init__.py +2 -0
- megatron_core-0.10.0/megatron/core/models/gpt/gpt_layer_specs.py +357 -0
- megatron_core-0.10.0/megatron/core/models/gpt/gpt_model.py +309 -0
- megatron_core-0.10.0/megatron/core/models/mamba/__init__.py +2 -0
- megatron_core-0.10.0/megatron/core/models/mamba/mamba_layer_specs.py +67 -0
- megatron_core-0.4.0/megatron/core/models/gpt/gpt_model.py → megatron_core-0.10.0/megatron/core/models/mamba/mamba_model.py +74 -87
- megatron_core-0.10.0/megatron/core/models/multimodal/__init__.py +1 -0
- megatron_core-0.10.0/megatron/core/models/multimodal/llava_model.py +923 -0
- megatron_core-0.10.0/megatron/core/models/multimodal/llava_spec.py +87 -0
- megatron_core-0.10.0/megatron/core/models/retro/__init__.py +13 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/base_attention.py +10 -12
- megatron_core-0.10.0/megatron/core/models/retro/config.py +85 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/decoder_attention.py +71 -67
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/decoder_spec.py +66 -33
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/encoder_attention.py +52 -49
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/encoder_spec.py +51 -24
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/model.py +34 -24
- megatron_core-0.10.0/megatron/core/models/retro/utils.py +24 -0
- megatron_core-0.10.0/megatron/core/models/vision/__init__.py +0 -0
- megatron_core-0.10.0/megatron/core/models/vision/clip_vit_model.py +219 -0
- megatron_core-0.10.0/megatron/core/models/vision/multimodal_projector.py +74 -0
- megatron_core-0.10.0/megatron/core/models/vision/vit_layer_specs.py +95 -0
- megatron_core-0.10.0/megatron/core/num_microbatches_calculator.py +508 -0
- megatron_core-0.10.0/megatron/core/optimizer/__init__.py +459 -0
- megatron_core-0.10.0/megatron/core/optimizer/clip_grads.py +220 -0
- megatron_core-0.10.0/megatron/core/optimizer/distrib_optimizer.py +1822 -0
- megatron_core-0.10.0/megatron/core/optimizer/grad_scaler.py +142 -0
- megatron_core-0.10.0/megatron/core/optimizer/optimizer.py +1069 -0
- megatron_core-0.10.0/megatron/core/optimizer/optimizer_config.py +116 -0
- megatron_core-0.10.0/megatron/core/optimizer_param_scheduler.py +297 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/package_info.py +2 -2
- megatron_core-0.10.0/megatron/core/packed_seq_params.py +20 -0
- megatron_core-0.10.0/megatron/core/parallel_state.py +1900 -0
- megatron_core-0.10.0/megatron/core/pipeline_parallel/__init__.py +2 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/pipeline_parallel/p2p_communication.py +129 -68
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/pipeline_parallel/schedules.py +835 -251
- megatron_core-0.10.0/megatron/core/requirements.txt +2 -0
- megatron_core-0.10.0/megatron/core/rerun_state_machine.py +1133 -0
- megatron_core-0.10.0/megatron/core/ssm/__init__.py +0 -0
- megatron_core-0.10.0/megatron/core/ssm/mamba_block.py +336 -0
- megatron_core-0.10.0/megatron/core/ssm/mamba_hybrid_layer_allocation.py +191 -0
- megatron_core-0.10.0/megatron/core/ssm/mamba_layer.py +116 -0
- megatron_core-0.10.0/megatron/core/ssm/mamba_mixer.py +718 -0
- megatron_core-0.10.0/megatron/core/ssm/triton_cache_manager.py +81 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/__init__.py +12 -5
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/cross_entropy.py +132 -42
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/data.py +6 -5
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/layers.py +465 -214
- megatron_core-0.10.0/megatron/core/tensor_parallel/mappings.py +576 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/random.py +67 -25
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/utils.py +34 -34
- megatron_core-0.10.0/megatron/core/timers.py +421 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/__init__.py +1 -1
- megatron_core-0.10.0/megatron/core/transformer/attention.py +734 -0
- megatron_core-0.10.0/megatron/core/transformer/cuda_graphs.py +313 -0
- megatron_core-0.10.0/megatron/core/transformer/custom_layers/__init__.py +0 -0
- megatron_core-0.10.0/megatron/core/transformer/custom_layers/transformer_engine.py +12 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/dot_product_attention.py +30 -19
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/enums.py +2 -0
- megatron_core-0.10.0/megatron/core/transformer/mlp.py +261 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/module.py +51 -13
- megatron_core-0.10.0/megatron/core/transformer/moe/__init__.py +0 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/experts.py +853 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/grouped_gemm_util.py +22 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/legacy_a2a_token_dispatcher.py +314 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/moe_layer.py +160 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/moe_utils.py +407 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/router.py +305 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/shared_experts.py +244 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/token_dispatcher.py +594 -0
- megatron_core-0.10.0/megatron/core/transformer/moe/upcycling_utils.py +196 -0
- megatron_core-0.10.0/megatron/core/transformer/multi_latent_attention.py +387 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/spec_utils.py +0 -3
- megatron_core-0.10.0/megatron/core/transformer/torch_layer_norm.py +4 -0
- megatron_core-0.10.0/megatron/core/transformer/torch_norm.py +48 -0
- megatron_core-0.10.0/megatron/core/transformer/transformer_block.py +618 -0
- megatron_core-0.10.0/megatron/core/transformer/transformer_config.py +637 -0
- megatron_core-0.10.0/megatron/core/transformer/transformer_layer.py +397 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/utils.py +60 -20
- megatron_core-0.10.0/megatron/core/utils.py +1415 -0
- megatron_core-0.10.0/megatron_core.egg-info/PKG-INFO +936 -0
- megatron_core-0.10.0/megatron_core.egg-info/SOURCES.txt +243 -0
- megatron_core-0.10.0/megatron_core.egg-info/requires.txt +16 -0
- megatron_core-0.10.0/pyproject.toml +72 -0
- megatron_core-0.10.0/requirements/pytorch:24.01/requirements.txt +15 -0
- megatron_core-0.10.0/requirements/pytorch:24.07/requirements.txt +14 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/setup.py +19 -25
- megatron_core-0.4.0/PKG-INFO +0 -34
- megatron_core-0.4.0/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -328
- megatron_core-0.4.0/megatron/core/datasets/blended_megatron_dataset_config.py +0 -119
- megatron_core-0.4.0/megatron/core/datasets/gpt_dataset.py +0 -460
- megatron_core-0.4.0/megatron/core/datasets/megatron_dataset.py +0 -135
- megatron_core-0.4.0/megatron/core/datasets/utils.py +0 -60
- megatron_core-0.4.0/megatron/core/dist_checkpointing/core.py +0 -41
- megatron_core-0.4.0/megatron/core/dist_checkpointing/mapping.py +0 -308
- megatron_core-0.4.0/megatron/core/dist_checkpointing/serialization.py +0 -385
- megatron_core-0.4.0/megatron/core/dist_checkpointing/strategies/__init__.py +0 -16
- megatron_core-0.4.0/megatron/core/dist_checkpointing/strategies/base.py +0 -90
- megatron_core-0.4.0/megatron/core/dist_checkpointing/utils.py +0 -44
- megatron_core-0.4.0/megatron/core/distributed/__init__.py +0 -2
- megatron_core-0.4.0/megatron/core/distributed/distributed_data_parallel.py +0 -250
- megatron_core-0.4.0/megatron/core/distributed/finalize_model_grads.py +0 -158
- megatron_core-0.4.0/megatron/core/distributed/grad_buffer.py +0 -410
- megatron_core-0.4.0/megatron/core/model_parallel_config.py +0 -222
- megatron_core-0.4.0/megatron/core/models/T5/__init__.py +0 -1
- megatron_core-0.4.0/megatron/core/models/bert/bert_layer_specs.py +0 -64
- megatron_core-0.4.0/megatron/core/models/bert/bert_lm_head.py +0 -75
- megatron_core-0.4.0/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -167
- megatron_core-0.4.0/megatron/core/models/common/language_module/language_module.py +0 -105
- megatron_core-0.4.0/megatron/core/models/gpt/__init__.py +0 -1
- megatron_core-0.4.0/megatron/core/models/gpt/gpt_layer_specs.py +0 -123
- megatron_core-0.4.0/megatron/core/models/retro/__init__.py +0 -5
- megatron_core-0.4.0/megatron/core/models/retro/config.py +0 -43
- megatron_core-0.4.0/megatron/core/parallel_state.py +0 -980
- megatron_core-0.4.0/megatron/core/pipeline_parallel/__init__.py +0 -1
- megatron_core-0.4.0/megatron/core/tensor_parallel/mappings.py +0 -358
- megatron_core-0.4.0/megatron/core/transformer/attention.py +0 -443
- megatron_core-0.4.0/megatron/core/transformer/custom_layers/transformer_engine.py +0 -431
- megatron_core-0.4.0/megatron/core/transformer/mlp.py +0 -184
- megatron_core-0.4.0/megatron/core/transformer/switch_mlp.py +0 -158
- megatron_core-0.4.0/megatron/core/transformer/transformer_block.py +0 -349
- megatron_core-0.4.0/megatron/core/transformer/transformer_config.py +0 -288
- megatron_core-0.4.0/megatron/core/transformer/transformer_layer.py +0 -245
- megatron_core-0.4.0/megatron/core/utils.py +0 -236
- megatron_core-0.4.0/megatron_core.egg-info/PKG-INFO +0 -34
- megatron_core-0.4.0/megatron_core.egg-info/SOURCES.txt +0 -96
- megatron_core-0.4.0/pyproject.toml +0 -24
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/enums.py +0 -0
- {megatron_core-0.4.0/megatron/core/fusions → megatron_core-0.10.0/megatron/core/extensions}/__init__.py +0 -0
- {megatron_core-0.4.0/megatron/core/models → megatron_core-0.10.0/megatron/core/fusions}/__init__.py +0 -0
- {megatron_core-0.4.0/megatron/core/models/bert → megatron_core-0.10.0/megatron/core/models}/__init__.py +0 -0
- {megatron_core-0.4.0/megatron/core/models/common → megatron_core-0.10.0/megatron/core/models/bert}/__init__.py +0 -0
- {megatron_core-0.4.0/megatron/core/models/common/embeddings → megatron_core-0.10.0/megatron/core/models/common}/__init__.py +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.4.0/megatron/core/transformer/custom_layers → megatron_core-0.10.0/megatron/core/models/common/vision_module}/__init__.py +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.4.0 → megatron_core-0.10.0}/setup.cfg +0 -0
|
@@ -29,13 +29,15 @@ The following applies to all files unless otherwise noted:
|
|
|
29
29
|
--
|
|
30
30
|
|
|
31
31
|
This repository also contains code from Hugging Face Inc., Google Research,
|
|
32
|
-
Facebook (from their Fairseq and
|
|
33
|
-
Swin-Transformer project)
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
Facebook (from their Fairseq, Dino, and ParlAI projects), Microsoft (from their
|
|
33
|
+
Swin-Transformer project), Philip Popien, the Mamba project (Tri Dao and
|
|
34
|
+
Albert Gu), and the Triton language and compiler project (Philippe Tillet and
|
|
35
|
+
OpenAI). Files from these organizations have notices at the top of each file.
|
|
36
|
+
Below are licenses used in those files, as indicated.
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
|
|
39
|
+
--------------------------------------------------------------------------------
|
|
40
|
+
-- LICENSE FOR Facebook, huggingface, Google Research, LLaVA, and Mamba code --
|
|
39
41
|
|
|
40
42
|
|
|
41
43
|
Apache License
|
|
@@ -240,12 +242,16 @@ licenses used in those files, as indicated.
|
|
|
240
242
|
See the License for the specific language governing permissions and
|
|
241
243
|
limitations under the License.
|
|
242
244
|
|
|
243
|
-
|
|
245
|
+
--------------------------------------------------------------------------------
|
|
246
|
+
LICENSE FOR
|
|
247
|
+
Facebook, Inc. and its affiliates,
|
|
248
|
+
Meta Platforms, Inc. and its affiliates,
|
|
249
|
+
Microsoft Corporation,
|
|
250
|
+
OpenGVLab/InternVL, and
|
|
251
|
+
Triton language and compiler.
|
|
244
252
|
|
|
245
253
|
MIT License
|
|
246
254
|
|
|
247
|
-
Copyright (c) Facebook, Inc. and its affiliates.
|
|
248
|
-
|
|
249
255
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
250
256
|
of this software and associated documentation files (the "Software"), to deal
|
|
251
257
|
in the Software without restriction, including without limitation the rights
|
|
@@ -264,28 +270,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
264
270
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
265
271
|
SOFTWARE.
|
|
266
272
|
|
|
267
|
-
------------- LICENSE FOR Mircrosoft Swin transformer code --------------
|
|
268
|
-
|
|
269
|
-
MIT License
|
|
270
|
-
|
|
271
|
-
Copyright (c) Microsoft Corporation.
|
|
272
|
-
|
|
273
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
274
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
275
|
-
in the Software without restriction, including without limitation the rights
|
|
276
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
277
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
278
|
-
furnished to do so, subject to the following conditions:
|
|
279
|
-
|
|
280
|
-
The above copyright notice and this permission notice shall be included in all
|
|
281
|
-
copies or substantial portions of the Software.
|
|
282
|
-
|
|
283
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
284
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
285
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
286
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
287
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
288
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
289
|
-
SOFTWARE
|
|
290
|
-
|
|
291
|
-
|