megatron-core 0.4.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megatron-core might be problematic. Click here for more details.

Files changed (285) hide show
  1. {megatron_core-0.4.0 → megatron_core-0.10.0}/LICENSE +14 -33
  2. megatron_core-0.10.0/MANIFEST.in +3 -0
  3. megatron_core-0.10.0/PKG-INFO +936 -0
  4. {megatron_core-0.4.0 → megatron_core-0.10.0}/README.md +174 -92
  5. megatron_core-0.10.0/megatron/core/README.md +14 -0
  6. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/__init__.py +16 -0
  7. megatron_core-0.10.0/megatron/core/config_logger.py +104 -0
  8. megatron_core-0.10.0/megatron/core/datasets/bert_dataset.py +192 -0
  9. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/blended_dataset.py +50 -39
  10. megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_builder.py +528 -0
  11. megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_config.py +177 -0
  12. megatron_core-0.10.0/megatron/core/datasets/gpt_dataset.py +810 -0
  13. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/helpers.cpp +124 -43
  14. megatron_core-0.10.0/megatron/core/datasets/helpers.py +64 -0
  15. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/indexed_dataset.py +271 -53
  16. megatron_core-0.10.0/megatron/core/datasets/masked_dataset.py +425 -0
  17. megatron_core-0.10.0/megatron/core/datasets/megatron_dataset.py +139 -0
  18. megatron_core-0.10.0/megatron/core/datasets/megatron_tokenizer.py +154 -0
  19. megatron_core-0.10.0/megatron/core/datasets/multimodal_dataset.py +62 -0
  20. megatron_core-0.10.0/megatron/core/datasets/retro/__init__.py +5 -0
  21. megatron_core-0.10.0/megatron/core/datasets/retro/config/__init__.py +16 -0
  22. megatron_core-0.10.0/megatron/core/datasets/retro/config/bert_embedders.py +48 -0
  23. megatron_core-0.10.0/megatron/core/datasets/retro/config/config.py +135 -0
  24. megatron_core-0.10.0/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +15 -0
  25. megatron_core-0.10.0/megatron/core/datasets/retro/config/tokenizers.py +15 -0
  26. megatron_core-0.10.0/megatron/core/datasets/retro/db/__init__.py +9 -0
  27. megatron_core-0.10.0/megatron/core/datasets/retro/db/build.py +633 -0
  28. megatron_core-0.10.0/megatron/core/datasets/retro/db/dataset.py +105 -0
  29. megatron_core-0.10.0/megatron/core/datasets/retro/db/utils.py +367 -0
  30. megatron_core-0.10.0/megatron/core/datasets/retro/external_libs.py +15 -0
  31. megatron_core-0.10.0/megatron/core/datasets/retro/index/__init__.py +11 -0
  32. megatron_core-0.10.0/megatron/core/datasets/retro/index/build.py +313 -0
  33. megatron_core-0.10.0/megatron/core/datasets/retro/index/factory.py +40 -0
  34. megatron_core-0.10.0/megatron/core/datasets/retro/index/index.py +133 -0
  35. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/__init__.py +10 -0
  36. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_base.py +150 -0
  37. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +208 -0
  38. megatron_core-0.10.0/megatron/core/datasets/retro/index/utils.py +126 -0
  39. megatron_core-0.10.0/megatron/core/datasets/retro/index/validate.py +191 -0
  40. megatron_core-0.10.0/megatron/core/datasets/retro/query/__init__.py +1 -0
  41. megatron_core-0.10.0/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +109 -0
  42. megatron_core-0.10.0/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +107 -0
  43. megatron_core-0.10.0/megatron/core/datasets/retro/query/query.py +393 -0
  44. megatron_core-0.10.0/megatron/core/datasets/retro/query/retro_dataset.py +238 -0
  45. megatron_core-0.10.0/megatron/core/datasets/retro/query/utils.py +35 -0
  46. megatron_core-0.10.0/megatron/core/datasets/retro/utils.py +349 -0
  47. megatron_core-0.10.0/megatron/core/datasets/t5_dataset.py +331 -0
  48. megatron_core-0.10.0/megatron/core/datasets/utils.py +87 -0
  49. megatron_core-0.10.0/megatron/core/datasets/utils_s3.py +164 -0
  50. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/__init__.py +2 -1
  51. megatron_core-0.10.0/megatron/core/dist_checkpointing/core.py +77 -0
  52. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/dict_utils.py +56 -27
  53. megatron_core-0.10.0/megatron/core/dist_checkpointing/exchange_utils.py +519 -0
  54. megatron_core-0.10.0/megatron/core/dist_checkpointing/mapping.py +723 -0
  55. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/optimizer.py +62 -10
  56. megatron_core-0.10.0/megatron/core/dist_checkpointing/serialization.py +424 -0
  57. megatron_core-0.10.0/megatron/core/dist_checkpointing/state_dict_transformation.py +270 -0
  58. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/__init__.py +7 -0
  59. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/async_utils.py +224 -0
  60. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/base.py +227 -0
  61. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/common.py +157 -0
  62. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/filesystem_async.py +439 -0
  63. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/fully_parallel.py +439 -0
  64. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/resharding.py +315 -0
  65. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +162 -0
  66. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/tensorstore.py +15 -18
  67. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/torch.py +939 -0
  68. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/two_stage.py +14 -16
  69. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/zarr.py +62 -26
  70. megatron_core-0.10.0/megatron/core/dist_checkpointing/utils.py +219 -0
  71. megatron_core-0.10.0/megatron/core/dist_checkpointing/validation.py +560 -0
  72. megatron_core-0.10.0/megatron/core/distributed/__init__.py +8 -0
  73. megatron_core-0.10.0/megatron/core/distributed/data_parallel_base.py +96 -0
  74. megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel.py +478 -0
  75. megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel_config.py +49 -0
  76. megatron_core-0.10.0/megatron/core/distributed/finalize_model_grads.py +284 -0
  77. megatron_core-0.10.0/megatron/core/distributed/param_and_grad_buffer.py +840 -0
  78. megatron_core-0.10.0/megatron/core/distributed/torch_fully_sharded_data_parallel.py +115 -0
  79. megatron_core-0.10.0/megatron/core/export/__init__.py +1 -0
  80. megatron_core-0.10.0/megatron/core/export/data_type.py +5 -0
  81. megatron_core-0.10.0/megatron/core/export/export_config.py +19 -0
  82. megatron_core-0.10.0/megatron/core/export/model_type.py +7 -0
  83. megatron_core-0.10.0/megatron/core/export/trtllm/__init__.py +1 -0
  84. megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/__init__.py +1 -0
  85. megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +154 -0
  86. megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +1 -0
  87. megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +36 -0
  88. megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_config.py +15 -0
  89. megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_type.py +13 -0
  90. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_helper.py +588 -0
  91. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_layers.py +157 -0
  92. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +1 -0
  93. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +280 -0
  94. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +471 -0
  95. megatron_core-0.10.0/megatron/core/extensions/transformer_engine.py +1268 -0
  96. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_dropout.py +6 -4
  97. megatron_core-0.10.0/megatron/core/fusions/fused_bias_geglu.py +85 -0
  98. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_gelu.py +10 -3
  99. megatron_core-0.10.0/megatron/core/fusions/fused_bias_swiglu.py +89 -0
  100. megatron_core-0.10.0/megatron/core/fusions/fused_cross_entropy.py +143 -0
  101. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_layer_norm.py +37 -19
  102. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_softmax.py +18 -2
  103. megatron_core-0.10.0/megatron/core/inference/__init__.py +1 -0
  104. megatron_core-0.10.0/megatron/core/inference/ammo_support/__init__.py +8 -0
  105. megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/model_specs.py +2 -0
  106. megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/state_dict_hooks.py +5 -0
  107. megatron_core-0.10.0/megatron/core/inference/common_inference_params.py +29 -0
  108. megatron_core-0.10.0/megatron/core/inference/communication_utils.py +50 -0
  109. megatron_core-0.10.0/megatron/core/inference/engines/__init__.py +1 -0
  110. megatron_core-0.10.0/megatron/core/inference/engines/abstract_engine.py +17 -0
  111. megatron_core-0.10.0/megatron/core/inference/engines/mcore_engine.py +113 -0
  112. megatron_core-0.10.0/megatron/core/inference/inference_request.py +39 -0
  113. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/__init__.py +1 -0
  114. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +238 -0
  115. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +1 -0
  116. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +90 -0
  117. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +44 -0
  118. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/__init__.py +1 -0
  119. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +215 -0
  120. megatron_core-0.10.0/megatron/core/inference/modelopt_support/__init__.py +8 -0
  121. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/__init__.py +1 -0
  122. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/model_specs.py +63 -0
  123. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py +133 -0
  124. megatron_core-0.10.0/megatron/core/inference/scheduler.py +127 -0
  125. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/__init__.py +1 -0
  126. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +35 -0
  127. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +402 -0
  128. megatron_core-0.10.0/megatron/core/inference/utils.py +17 -0
  129. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/inference_params.py +4 -0
  130. megatron_core-0.10.0/megatron/core/jit.py +10 -0
  131. megatron_core-0.10.0/megatron/core/model_parallel_config.py +387 -0
  132. megatron_core-0.10.0/megatron/core/models/T5/__init__.py +2 -0
  133. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/T5/t5_model.py +173 -189
  134. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/T5/t5_spec.py +63 -27
  135. megatron_core-0.10.0/megatron/core/models/bert/bert_layer_specs.py +116 -0
  136. megatron_core-0.10.0/megatron/core/models/bert/bert_lm_head.py +50 -0
  137. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/bert/bert_model.py +135 -36
  138. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/bert/pooler.py +1 -0
  139. megatron_core-0.10.0/megatron/core/models/common/embeddings/__init__.py +5 -0
  140. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/common/embeddings/language_model_embedding.py +25 -45
  141. megatron_core-0.10.0/megatron/core/models/common/embeddings/rope_utils.py +261 -0
  142. megatron_core-0.10.0/megatron/core/models/common/embeddings/rotary_pos_embedding.py +213 -0
  143. megatron_core-0.10.0/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +179 -0
  144. megatron_core-0.10.0/megatron/core/models/common/language_module/language_module.py +204 -0
  145. megatron_core-0.10.0/megatron/core/models/common/vision_module/vision_module.py +17 -0
  146. megatron_core-0.10.0/megatron/core/models/gpt/__init__.py +2 -0
  147. megatron_core-0.10.0/megatron/core/models/gpt/gpt_layer_specs.py +357 -0
  148. megatron_core-0.10.0/megatron/core/models/gpt/gpt_model.py +309 -0
  149. megatron_core-0.10.0/megatron/core/models/mamba/__init__.py +2 -0
  150. megatron_core-0.10.0/megatron/core/models/mamba/mamba_layer_specs.py +67 -0
  151. megatron_core-0.4.0/megatron/core/models/gpt/gpt_model.py → megatron_core-0.10.0/megatron/core/models/mamba/mamba_model.py +74 -87
  152. megatron_core-0.10.0/megatron/core/models/multimodal/__init__.py +1 -0
  153. megatron_core-0.10.0/megatron/core/models/multimodal/llava_model.py +923 -0
  154. megatron_core-0.10.0/megatron/core/models/multimodal/llava_spec.py +87 -0
  155. megatron_core-0.10.0/megatron/core/models/retro/__init__.py +13 -0
  156. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/base_attention.py +10 -12
  157. megatron_core-0.10.0/megatron/core/models/retro/config.py +85 -0
  158. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/decoder_attention.py +71 -67
  159. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/decoder_spec.py +66 -33
  160. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/encoder_attention.py +52 -49
  161. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/encoder_spec.py +51 -24
  162. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/retro/model.py +34 -24
  163. megatron_core-0.10.0/megatron/core/models/retro/utils.py +24 -0
  164. megatron_core-0.10.0/megatron/core/models/vision/__init__.py +0 -0
  165. megatron_core-0.10.0/megatron/core/models/vision/clip_vit_model.py +219 -0
  166. megatron_core-0.10.0/megatron/core/models/vision/multimodal_projector.py +74 -0
  167. megatron_core-0.10.0/megatron/core/models/vision/vit_layer_specs.py +95 -0
  168. megatron_core-0.10.0/megatron/core/num_microbatches_calculator.py +508 -0
  169. megatron_core-0.10.0/megatron/core/optimizer/__init__.py +459 -0
  170. megatron_core-0.10.0/megatron/core/optimizer/clip_grads.py +220 -0
  171. megatron_core-0.10.0/megatron/core/optimizer/distrib_optimizer.py +1822 -0
  172. megatron_core-0.10.0/megatron/core/optimizer/grad_scaler.py +142 -0
  173. megatron_core-0.10.0/megatron/core/optimizer/optimizer.py +1069 -0
  174. megatron_core-0.10.0/megatron/core/optimizer/optimizer_config.py +116 -0
  175. megatron_core-0.10.0/megatron/core/optimizer_param_scheduler.py +297 -0
  176. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/package_info.py +2 -2
  177. megatron_core-0.10.0/megatron/core/packed_seq_params.py +20 -0
  178. megatron_core-0.10.0/megatron/core/parallel_state.py +1900 -0
  179. megatron_core-0.10.0/megatron/core/pipeline_parallel/__init__.py +2 -0
  180. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/pipeline_parallel/p2p_communication.py +129 -68
  181. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/pipeline_parallel/schedules.py +835 -251
  182. megatron_core-0.10.0/megatron/core/requirements.txt +2 -0
  183. megatron_core-0.10.0/megatron/core/rerun_state_machine.py +1133 -0
  184. megatron_core-0.10.0/megatron/core/ssm/__init__.py +0 -0
  185. megatron_core-0.10.0/megatron/core/ssm/mamba_block.py +336 -0
  186. megatron_core-0.10.0/megatron/core/ssm/mamba_hybrid_layer_allocation.py +191 -0
  187. megatron_core-0.10.0/megatron/core/ssm/mamba_layer.py +116 -0
  188. megatron_core-0.10.0/megatron/core/ssm/mamba_mixer.py +718 -0
  189. megatron_core-0.10.0/megatron/core/ssm/triton_cache_manager.py +81 -0
  190. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/__init__.py +12 -5
  191. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/cross_entropy.py +132 -42
  192. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/data.py +6 -5
  193. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/layers.py +465 -214
  194. megatron_core-0.10.0/megatron/core/tensor_parallel/mappings.py +576 -0
  195. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/random.py +67 -25
  196. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/utils.py +34 -34
  197. megatron_core-0.10.0/megatron/core/timers.py +421 -0
  198. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/__init__.py +1 -1
  199. megatron_core-0.10.0/megatron/core/transformer/attention.py +734 -0
  200. megatron_core-0.10.0/megatron/core/transformer/cuda_graphs.py +313 -0
  201. megatron_core-0.10.0/megatron/core/transformer/custom_layers/__init__.py +0 -0
  202. megatron_core-0.10.0/megatron/core/transformer/custom_layers/transformer_engine.py +12 -0
  203. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/dot_product_attention.py +30 -19
  204. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/enums.py +2 -0
  205. megatron_core-0.10.0/megatron/core/transformer/mlp.py +261 -0
  206. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/module.py +51 -13
  207. megatron_core-0.10.0/megatron/core/transformer/moe/__init__.py +0 -0
  208. megatron_core-0.10.0/megatron/core/transformer/moe/experts.py +853 -0
  209. megatron_core-0.10.0/megatron/core/transformer/moe/grouped_gemm_util.py +22 -0
  210. megatron_core-0.10.0/megatron/core/transformer/moe/legacy_a2a_token_dispatcher.py +314 -0
  211. megatron_core-0.10.0/megatron/core/transformer/moe/moe_layer.py +160 -0
  212. megatron_core-0.10.0/megatron/core/transformer/moe/moe_utils.py +407 -0
  213. megatron_core-0.10.0/megatron/core/transformer/moe/router.py +305 -0
  214. megatron_core-0.10.0/megatron/core/transformer/moe/shared_experts.py +244 -0
  215. megatron_core-0.10.0/megatron/core/transformer/moe/token_dispatcher.py +594 -0
  216. megatron_core-0.10.0/megatron/core/transformer/moe/upcycling_utils.py +196 -0
  217. megatron_core-0.10.0/megatron/core/transformer/multi_latent_attention.py +387 -0
  218. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/spec_utils.py +0 -3
  219. megatron_core-0.10.0/megatron/core/transformer/torch_layer_norm.py +4 -0
  220. megatron_core-0.10.0/megatron/core/transformer/torch_norm.py +48 -0
  221. megatron_core-0.10.0/megatron/core/transformer/transformer_block.py +618 -0
  222. megatron_core-0.10.0/megatron/core/transformer/transformer_config.py +637 -0
  223. megatron_core-0.10.0/megatron/core/transformer/transformer_layer.py +397 -0
  224. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/utils.py +60 -20
  225. megatron_core-0.10.0/megatron/core/utils.py +1415 -0
  226. megatron_core-0.10.0/megatron_core.egg-info/PKG-INFO +936 -0
  227. megatron_core-0.10.0/megatron_core.egg-info/SOURCES.txt +243 -0
  228. megatron_core-0.10.0/megatron_core.egg-info/requires.txt +16 -0
  229. megatron_core-0.10.0/pyproject.toml +72 -0
  230. megatron_core-0.10.0/requirements/pytorch:24.01/requirements.txt +15 -0
  231. megatron_core-0.10.0/requirements/pytorch:24.07/requirements.txt +14 -0
  232. {megatron_core-0.4.0 → megatron_core-0.10.0}/setup.py +19 -25
  233. megatron_core-0.4.0/PKG-INFO +0 -34
  234. megatron_core-0.4.0/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -328
  235. megatron_core-0.4.0/megatron/core/datasets/blended_megatron_dataset_config.py +0 -119
  236. megatron_core-0.4.0/megatron/core/datasets/gpt_dataset.py +0 -460
  237. megatron_core-0.4.0/megatron/core/datasets/megatron_dataset.py +0 -135
  238. megatron_core-0.4.0/megatron/core/datasets/utils.py +0 -60
  239. megatron_core-0.4.0/megatron/core/dist_checkpointing/core.py +0 -41
  240. megatron_core-0.4.0/megatron/core/dist_checkpointing/mapping.py +0 -308
  241. megatron_core-0.4.0/megatron/core/dist_checkpointing/serialization.py +0 -385
  242. megatron_core-0.4.0/megatron/core/dist_checkpointing/strategies/__init__.py +0 -16
  243. megatron_core-0.4.0/megatron/core/dist_checkpointing/strategies/base.py +0 -90
  244. megatron_core-0.4.0/megatron/core/dist_checkpointing/utils.py +0 -44
  245. megatron_core-0.4.0/megatron/core/distributed/__init__.py +0 -2
  246. megatron_core-0.4.0/megatron/core/distributed/distributed_data_parallel.py +0 -250
  247. megatron_core-0.4.0/megatron/core/distributed/finalize_model_grads.py +0 -158
  248. megatron_core-0.4.0/megatron/core/distributed/grad_buffer.py +0 -410
  249. megatron_core-0.4.0/megatron/core/model_parallel_config.py +0 -222
  250. megatron_core-0.4.0/megatron/core/models/T5/__init__.py +0 -1
  251. megatron_core-0.4.0/megatron/core/models/bert/bert_layer_specs.py +0 -64
  252. megatron_core-0.4.0/megatron/core/models/bert/bert_lm_head.py +0 -75
  253. megatron_core-0.4.0/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -167
  254. megatron_core-0.4.0/megatron/core/models/common/language_module/language_module.py +0 -105
  255. megatron_core-0.4.0/megatron/core/models/gpt/__init__.py +0 -1
  256. megatron_core-0.4.0/megatron/core/models/gpt/gpt_layer_specs.py +0 -123
  257. megatron_core-0.4.0/megatron/core/models/retro/__init__.py +0 -5
  258. megatron_core-0.4.0/megatron/core/models/retro/config.py +0 -43
  259. megatron_core-0.4.0/megatron/core/parallel_state.py +0 -980
  260. megatron_core-0.4.0/megatron/core/pipeline_parallel/__init__.py +0 -1
  261. megatron_core-0.4.0/megatron/core/tensor_parallel/mappings.py +0 -358
  262. megatron_core-0.4.0/megatron/core/transformer/attention.py +0 -443
  263. megatron_core-0.4.0/megatron/core/transformer/custom_layers/transformer_engine.py +0 -431
  264. megatron_core-0.4.0/megatron/core/transformer/mlp.py +0 -184
  265. megatron_core-0.4.0/megatron/core/transformer/switch_mlp.py +0 -158
  266. megatron_core-0.4.0/megatron/core/transformer/transformer_block.py +0 -349
  267. megatron_core-0.4.0/megatron/core/transformer/transformer_config.py +0 -288
  268. megatron_core-0.4.0/megatron/core/transformer/transformer_layer.py +0 -245
  269. megatron_core-0.4.0/megatron/core/utils.py +0 -236
  270. megatron_core-0.4.0/megatron_core.egg-info/PKG-INFO +0 -34
  271. megatron_core-0.4.0/megatron_core.egg-info/SOURCES.txt +0 -96
  272. megatron_core-0.4.0/pyproject.toml +0 -24
  273. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/datasets/__init__.py +0 -0
  274. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/enums.py +0 -0
  275. {megatron_core-0.4.0/megatron/core/fusions → megatron_core-0.10.0/megatron/core/extensions}/__init__.py +0 -0
  276. {megatron_core-0.4.0/megatron/core/models → megatron_core-0.10.0/megatron/core/fusions}/__init__.py +0 -0
  277. {megatron_core-0.4.0/megatron/core/models/bert → megatron_core-0.10.0/megatron/core/models}/__init__.py +0 -0
  278. {megatron_core-0.4.0/megatron/core/models/common → megatron_core-0.10.0/megatron/core/models/bert}/__init__.py +0 -0
  279. {megatron_core-0.4.0/megatron/core/models/common/embeddings → megatron_core-0.10.0/megatron/core/models/common}/__init__.py +0 -0
  280. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/models/common/language_module/__init__.py +0 -0
  281. {megatron_core-0.4.0/megatron/core/transformer/custom_layers → megatron_core-0.10.0/megatron/core/models/common/vision_module}/__init__.py +0 -0
  282. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron/core/transformer/identity_op.py +0 -0
  283. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron_core.egg-info/dependency_links.txt +0 -0
  284. {megatron_core-0.4.0 → megatron_core-0.10.0}/megatron_core.egg-info/top_level.txt +0 -0
  285. {megatron_core-0.4.0 → megatron_core-0.10.0}/setup.cfg +0 -0
@@ -29,13 +29,15 @@ The following applies to all files unless otherwise noted:
29
29
  --
30
30
 
31
31
  This repository also contains code from Hugging Face Inc., Google Research,
32
- Facebook (from their Fairseq and Dino projects), Microsoft(from their
33
- Swin-Transformer project)and Philip Popien. Files from these
34
- organizations have notices at the top of each file. Below are
35
- licenses used in those files, as indicated.
32
+ Facebook (from their Fairseq, Dino, and ParlAI projects), Microsoft (from their
33
+ Swin-Transformer project), Philip Popien, the Mamba project (Tri Dao and
34
+ Albert Gu), and the Triton language and compiler project (Philippe Tillet and
35
+ OpenAI). Files from these organizations have notices at the top of each file.
36
+ Below are licenses used in those files, as indicated.
36
37
 
37
38
 
38
- ------------- LICENSE FOR Facebook, huggingface and Google Research code --------------
39
+ --------------------------------------------------------------------------------
40
+ -- LICENSE FOR Facebook, huggingface, Google Research, LLaVA, and Mamba code --
39
41
 
40
42
 
41
43
  Apache License
@@ -240,12 +242,16 @@ licenses used in those files, as indicated.
240
242
  See the License for the specific language governing permissions and
241
243
  limitations under the License.
242
244
 
243
- ------------- LICENSE FOR Facebook Fairseq code --------------
245
+ --------------------------------------------------------------------------------
246
+ LICENSE FOR
247
+ Facebook, Inc. and its affiliates,
248
+ Meta Platforms, Inc. and its affiliates,
249
+ Microsoft Corporation,
250
+ OpenGVLab/InternVL, and
251
+ Triton language and compiler.
244
252
 
245
253
  MIT License
246
254
 
247
- Copyright (c) Facebook, Inc. and its affiliates.
248
-
249
255
  Permission is hereby granted, free of charge, to any person obtaining a copy
250
256
  of this software and associated documentation files (the "Software"), to deal
251
257
  in the Software without restriction, including without limitation the rights
@@ -264,28 +270,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
264
270
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
265
271
  SOFTWARE.
266
272
 
267
- ------------- LICENSE FOR Mircrosoft Swin transformer code --------------
268
-
269
- MIT License
270
-
271
- Copyright (c) Microsoft Corporation.
272
-
273
- Permission is hereby granted, free of charge, to any person obtaining a copy
274
- of this software and associated documentation files (the "Software"), to deal
275
- in the Software without restriction, including without limitation the rights
276
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
277
- copies of the Software, and to permit persons to whom the Software is
278
- furnished to do so, subject to the following conditions:
279
-
280
- The above copyright notice and this permission notice shall be included in all
281
- copies or substantial portions of the Software.
282
-
283
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
284
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
285
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
286
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
287
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
288
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
289
- SOFTWARE
290
-
291
-
@@ -0,0 +1,3 @@
1
+ include megatron/core/requirements.txt
2
+ include megatron/core/README.md
3
+ recursive-include requirements *