megatron-core 0.3.0__tar.gz → 0.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megatron-core might be problematic. Click here for more details.

Files changed (280) hide show
  1. {megatron_core-0.3.0 → megatron_core-0.10.0}/LICENSE +14 -33
  2. megatron_core-0.10.0/MANIFEST.in +3 -0
  3. megatron_core-0.10.0/PKG-INFO +936 -0
  4. megatron_core-0.10.0/README.md +610 -0
  5. megatron_core-0.10.0/megatron/core/README.md +14 -0
  6. megatron_core-0.10.0/megatron/core/__init__.py +34 -0
  7. megatron_core-0.10.0/megatron/core/config_logger.py +104 -0
  8. megatron_core-0.10.0/megatron/core/datasets/bert_dataset.py +192 -0
  9. megatron_core-0.10.0/megatron/core/datasets/blended_dataset.py +201 -0
  10. megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_builder.py +528 -0
  11. megatron_core-0.10.0/megatron/core/datasets/blended_megatron_dataset_config.py +177 -0
  12. megatron_core-0.10.0/megatron/core/datasets/gpt_dataset.py +810 -0
  13. megatron_core-0.10.0/megatron/core/datasets/helpers.cpp +846 -0
  14. megatron_core-0.10.0/megatron/core/datasets/helpers.py +64 -0
  15. megatron_core-0.10.0/megatron/core/datasets/indexed_dataset.py +857 -0
  16. megatron_core-0.10.0/megatron/core/datasets/masked_dataset.py +425 -0
  17. megatron_core-0.10.0/megatron/core/datasets/megatron_dataset.py +139 -0
  18. megatron_core-0.10.0/megatron/core/datasets/megatron_tokenizer.py +154 -0
  19. megatron_core-0.10.0/megatron/core/datasets/multimodal_dataset.py +62 -0
  20. megatron_core-0.10.0/megatron/core/datasets/retro/__init__.py +5 -0
  21. megatron_core-0.10.0/megatron/core/datasets/retro/config/__init__.py +16 -0
  22. megatron_core-0.10.0/megatron/core/datasets/retro/config/bert_embedders.py +48 -0
  23. megatron_core-0.10.0/megatron/core/datasets/retro/config/config.py +135 -0
  24. megatron_core-0.10.0/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +15 -0
  25. megatron_core-0.10.0/megatron/core/datasets/retro/config/tokenizers.py +15 -0
  26. megatron_core-0.10.0/megatron/core/datasets/retro/db/__init__.py +9 -0
  27. megatron_core-0.10.0/megatron/core/datasets/retro/db/build.py +633 -0
  28. megatron_core-0.10.0/megatron/core/datasets/retro/db/dataset.py +105 -0
  29. megatron_core-0.10.0/megatron/core/datasets/retro/db/utils.py +367 -0
  30. megatron_core-0.10.0/megatron/core/datasets/retro/external_libs.py +15 -0
  31. megatron_core-0.10.0/megatron/core/datasets/retro/index/__init__.py +11 -0
  32. megatron_core-0.10.0/megatron/core/datasets/retro/index/build.py +313 -0
  33. megatron_core-0.10.0/megatron/core/datasets/retro/index/factory.py +40 -0
  34. megatron_core-0.10.0/megatron/core/datasets/retro/index/index.py +133 -0
  35. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/__init__.py +10 -0
  36. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_base.py +150 -0
  37. megatron_core-0.10.0/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +208 -0
  38. megatron_core-0.10.0/megatron/core/datasets/retro/index/utils.py +126 -0
  39. megatron_core-0.10.0/megatron/core/datasets/retro/index/validate.py +191 -0
  40. megatron_core-0.10.0/megatron/core/datasets/retro/query/__init__.py +1 -0
  41. megatron_core-0.10.0/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +109 -0
  42. megatron_core-0.10.0/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +107 -0
  43. megatron_core-0.10.0/megatron/core/datasets/retro/query/query.py +393 -0
  44. megatron_core-0.10.0/megatron/core/datasets/retro/query/retro_dataset.py +238 -0
  45. megatron_core-0.10.0/megatron/core/datasets/retro/query/utils.py +35 -0
  46. megatron_core-0.10.0/megatron/core/datasets/retro/utils.py +349 -0
  47. megatron_core-0.10.0/megatron/core/datasets/t5_dataset.py +331 -0
  48. megatron_core-0.10.0/megatron/core/datasets/utils.py +87 -0
  49. megatron_core-0.10.0/megatron/core/datasets/utils_s3.py +164 -0
  50. megatron_core-0.10.0/megatron/core/dist_checkpointing/__init__.py +12 -0
  51. megatron_core-0.10.0/megatron/core/dist_checkpointing/core.py +77 -0
  52. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/dict_utils.py +88 -40
  53. megatron_core-0.10.0/megatron/core/dist_checkpointing/exchange_utils.py +519 -0
  54. megatron_core-0.10.0/megatron/core/dist_checkpointing/mapping.py +723 -0
  55. megatron_core-0.10.0/megatron/core/dist_checkpointing/optimizer.py +142 -0
  56. megatron_core-0.10.0/megatron/core/dist_checkpointing/serialization.py +424 -0
  57. megatron_core-0.10.0/megatron/core/dist_checkpointing/state_dict_transformation.py +270 -0
  58. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/__init__.py +7 -0
  59. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/async_utils.py +224 -0
  60. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/base.py +227 -0
  61. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/common.py +157 -0
  62. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/filesystem_async.py +439 -0
  63. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/fully_parallel.py +439 -0
  64. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/resharding.py +315 -0
  65. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +162 -0
  66. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/tensorstore.py +36 -18
  67. megatron_core-0.10.0/megatron/core/dist_checkpointing/strategies/torch.py +939 -0
  68. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/two_stage.py +23 -18
  69. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/dist_checkpointing/strategies/zarr.py +131 -40
  70. megatron_core-0.10.0/megatron/core/dist_checkpointing/utils.py +219 -0
  71. megatron_core-0.10.0/megatron/core/dist_checkpointing/validation.py +560 -0
  72. megatron_core-0.10.0/megatron/core/distributed/__init__.py +8 -0
  73. megatron_core-0.10.0/megatron/core/distributed/data_parallel_base.py +96 -0
  74. megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel.py +478 -0
  75. megatron_core-0.10.0/megatron/core/distributed/distributed_data_parallel_config.py +49 -0
  76. megatron_core-0.10.0/megatron/core/distributed/finalize_model_grads.py +284 -0
  77. megatron_core-0.10.0/megatron/core/distributed/param_and_grad_buffer.py +840 -0
  78. megatron_core-0.10.0/megatron/core/distributed/torch_fully_sharded_data_parallel.py +115 -0
  79. megatron_core-0.10.0/megatron/core/export/__init__.py +1 -0
  80. megatron_core-0.10.0/megatron/core/export/data_type.py +5 -0
  81. megatron_core-0.10.0/megatron/core/export/export_config.py +19 -0
  82. megatron_core-0.10.0/megatron/core/export/model_type.py +7 -0
  83. megatron_core-0.10.0/megatron/core/export/trtllm/__init__.py +1 -0
  84. megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/__init__.py +1 -0
  85. megatron_core-0.10.0/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +154 -0
  86. megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +1 -0
  87. megatron_core-0.10.0/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +36 -0
  88. megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_config.py +15 -0
  89. megatron_core-0.10.0/megatron/core/export/trtllm/trt_model_type.py +13 -0
  90. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_helper.py +588 -0
  91. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_layers.py +157 -0
  92. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +1 -0
  93. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +280 -0
  94. megatron_core-0.10.0/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +471 -0
  95. megatron_core-0.10.0/megatron/core/extensions/transformer_engine.py +1268 -0
  96. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_dropout.py +32 -19
  97. megatron_core-0.10.0/megatron/core/fusions/fused_bias_geglu.py +85 -0
  98. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_bias_gelu.py +10 -3
  99. megatron_core-0.10.0/megatron/core/fusions/fused_bias_swiglu.py +89 -0
  100. megatron_core-0.10.0/megatron/core/fusions/fused_cross_entropy.py +143 -0
  101. megatron_core-0.10.0/megatron/core/fusions/fused_layer_norm.py +169 -0
  102. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/fusions/fused_softmax.py +18 -2
  103. megatron_core-0.10.0/megatron/core/inference/__init__.py +1 -0
  104. megatron_core-0.10.0/megatron/core/inference/ammo_support/__init__.py +8 -0
  105. megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/model_specs.py +2 -0
  106. megatron_core-0.10.0/megatron/core/inference/ammo_support/gpt/state_dict_hooks.py +5 -0
  107. megatron_core-0.10.0/megatron/core/inference/common_inference_params.py +29 -0
  108. megatron_core-0.10.0/megatron/core/inference/communication_utils.py +50 -0
  109. megatron_core-0.10.0/megatron/core/inference/engines/__init__.py +1 -0
  110. megatron_core-0.10.0/megatron/core/inference/engines/abstract_engine.py +17 -0
  111. megatron_core-0.10.0/megatron/core/inference/engines/mcore_engine.py +113 -0
  112. megatron_core-0.10.0/megatron/core/inference/inference_request.py +39 -0
  113. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/__init__.py +1 -0
  114. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +238 -0
  115. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +1 -0
  116. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +90 -0
  117. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +44 -0
  118. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/__init__.py +1 -0
  119. megatron_core-0.10.0/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +215 -0
  120. megatron_core-0.10.0/megatron/core/inference/modelopt_support/__init__.py +8 -0
  121. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/__init__.py +1 -0
  122. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/model_specs.py +63 -0
  123. megatron_core-0.10.0/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py +133 -0
  124. megatron_core-0.10.0/megatron/core/inference/scheduler.py +127 -0
  125. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/__init__.py +1 -0
  126. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +35 -0
  127. megatron_core-0.10.0/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +402 -0
  128. megatron_core-0.10.0/megatron/core/inference/utils.py +17 -0
  129. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/inference_params.py +4 -0
  130. megatron_core-0.10.0/megatron/core/jit.py +10 -0
  131. megatron_core-0.10.0/megatron/core/model_parallel_config.py +387 -0
  132. megatron_core-0.10.0/megatron/core/models/T5/__init__.py +2 -0
  133. megatron_core-0.10.0/megatron/core/models/T5/t5_model.py +450 -0
  134. megatron_core-0.10.0/megatron/core/models/T5/t5_spec.py +248 -0
  135. megatron_core-0.10.0/megatron/core/models/bert/__init__.py +0 -0
  136. megatron_core-0.10.0/megatron/core/models/bert/bert_layer_specs.py +116 -0
  137. megatron_core-0.10.0/megatron/core/models/bert/bert_lm_head.py +50 -0
  138. megatron_core-0.10.0/megatron/core/models/bert/bert_model.py +366 -0
  139. megatron_core-0.10.0/megatron/core/models/bert/pooler.py +52 -0
  140. megatron_core-0.10.0/megatron/core/models/common/__init__.py +0 -0
  141. megatron_core-0.10.0/megatron/core/models/common/embeddings/__init__.py +5 -0
  142. megatron_core-0.10.0/megatron/core/models/common/embeddings/language_model_embedding.py +143 -0
  143. megatron_core-0.10.0/megatron/core/models/common/embeddings/rope_utils.py +261 -0
  144. megatron_core-0.10.0/megatron/core/models/common/embeddings/rotary_pos_embedding.py +213 -0
  145. megatron_core-0.10.0/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +179 -0
  146. megatron_core-0.10.0/megatron/core/models/common/language_module/__init__.py +0 -0
  147. megatron_core-0.10.0/megatron/core/models/common/language_module/language_module.py +204 -0
  148. megatron_core-0.10.0/megatron/core/models/common/vision_module/__init__.py +0 -0
  149. megatron_core-0.10.0/megatron/core/models/common/vision_module/vision_module.py +17 -0
  150. megatron_core-0.10.0/megatron/core/models/gpt/__init__.py +2 -0
  151. megatron_core-0.10.0/megatron/core/models/gpt/gpt_layer_specs.py +357 -0
  152. megatron_core-0.10.0/megatron/core/models/gpt/gpt_model.py +309 -0
  153. megatron_core-0.10.0/megatron/core/models/mamba/__init__.py +2 -0
  154. megatron_core-0.10.0/megatron/core/models/mamba/mamba_layer_specs.py +67 -0
  155. megatron_core-0.10.0/megatron/core/models/mamba/mamba_model.py +228 -0
  156. megatron_core-0.10.0/megatron/core/models/multimodal/__init__.py +1 -0
  157. megatron_core-0.10.0/megatron/core/models/multimodal/llava_model.py +923 -0
  158. megatron_core-0.10.0/megatron/core/models/multimodal/llava_spec.py +87 -0
  159. megatron_core-0.10.0/megatron/core/models/retro/__init__.py +13 -0
  160. megatron_core-0.10.0/megatron/core/models/retro/base_attention.py +43 -0
  161. megatron_core-0.10.0/megatron/core/models/retro/config.py +85 -0
  162. megatron_core-0.10.0/megatron/core/models/retro/decoder_attention.py +305 -0
  163. megatron_core-0.10.0/megatron/core/models/retro/decoder_spec.py +185 -0
  164. megatron_core-0.10.0/megatron/core/models/retro/encoder_attention.py +226 -0
  165. megatron_core-0.10.0/megatron/core/models/retro/encoder_spec.py +168 -0
  166. megatron_core-0.10.0/megatron/core/models/retro/model.py +99 -0
  167. megatron_core-0.10.0/megatron/core/models/retro/utils.py +24 -0
  168. megatron_core-0.10.0/megatron/core/models/vision/__init__.py +0 -0
  169. megatron_core-0.10.0/megatron/core/models/vision/clip_vit_model.py +219 -0
  170. megatron_core-0.10.0/megatron/core/models/vision/multimodal_projector.py +74 -0
  171. megatron_core-0.10.0/megatron/core/models/vision/vit_layer_specs.py +95 -0
  172. megatron_core-0.10.0/megatron/core/num_microbatches_calculator.py +508 -0
  173. megatron_core-0.10.0/megatron/core/optimizer/__init__.py +459 -0
  174. megatron_core-0.10.0/megatron/core/optimizer/clip_grads.py +220 -0
  175. megatron_core-0.10.0/megatron/core/optimizer/distrib_optimizer.py +1822 -0
  176. megatron_core-0.10.0/megatron/core/optimizer/grad_scaler.py +142 -0
  177. megatron_core-0.10.0/megatron/core/optimizer/optimizer.py +1069 -0
  178. megatron_core-0.10.0/megatron/core/optimizer/optimizer_config.py +116 -0
  179. megatron_core-0.10.0/megatron/core/optimizer_param_scheduler.py +297 -0
  180. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/package_info.py +1 -1
  181. megatron_core-0.10.0/megatron/core/packed_seq_params.py +20 -0
  182. megatron_core-0.10.0/megatron/core/parallel_state.py +1900 -0
  183. megatron_core-0.10.0/megatron/core/pipeline_parallel/__init__.py +2 -0
  184. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/pipeline_parallel/p2p_communication.py +129 -68
  185. megatron_core-0.10.0/megatron/core/pipeline_parallel/schedules.py +1877 -0
  186. megatron_core-0.10.0/megatron/core/requirements.txt +2 -0
  187. megatron_core-0.10.0/megatron/core/rerun_state_machine.py +1133 -0
  188. megatron_core-0.10.0/megatron/core/ssm/__init__.py +0 -0
  189. megatron_core-0.10.0/megatron/core/ssm/mamba_block.py +336 -0
  190. megatron_core-0.10.0/megatron/core/ssm/mamba_hybrid_layer_allocation.py +191 -0
  191. megatron_core-0.10.0/megatron/core/ssm/mamba_layer.py +116 -0
  192. megatron_core-0.10.0/megatron/core/ssm/mamba_mixer.py +718 -0
  193. megatron_core-0.10.0/megatron/core/ssm/triton_cache_manager.py +81 -0
  194. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/__init__.py +18 -2
  195. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/cross_entropy.py +132 -42
  196. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/data.py +6 -5
  197. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/layers.py +534 -225
  198. megatron_core-0.10.0/megatron/core/tensor_parallel/mappings.py +576 -0
  199. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/random.py +89 -24
  200. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/tensor_parallel/utils.py +34 -34
  201. megatron_core-0.10.0/megatron/core/timers.py +421 -0
  202. megatron_core-0.10.0/megatron/core/transformer/__init__.py +6 -0
  203. megatron_core-0.10.0/megatron/core/transformer/attention.py +734 -0
  204. megatron_core-0.10.0/megatron/core/transformer/cuda_graphs.py +313 -0
  205. megatron_core-0.10.0/megatron/core/transformer/custom_layers/__init__.py +0 -0
  206. megatron_core-0.10.0/megatron/core/transformer/custom_layers/transformer_engine.py +12 -0
  207. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/transformer/dot_product_attention.py +79 -38
  208. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/transformer/enums.py +3 -0
  209. megatron_core-0.10.0/megatron/core/transformer/identity_op.py +28 -0
  210. megatron_core-0.10.0/megatron/core/transformer/mlp.py +261 -0
  211. megatron_core-0.10.0/megatron/core/transformer/module.py +195 -0
  212. megatron_core-0.10.0/megatron/core/transformer/moe/__init__.py +0 -0
  213. megatron_core-0.10.0/megatron/core/transformer/moe/experts.py +853 -0
  214. megatron_core-0.10.0/megatron/core/transformer/moe/grouped_gemm_util.py +22 -0
  215. megatron_core-0.10.0/megatron/core/transformer/moe/legacy_a2a_token_dispatcher.py +314 -0
  216. megatron_core-0.10.0/megatron/core/transformer/moe/moe_layer.py +160 -0
  217. megatron_core-0.10.0/megatron/core/transformer/moe/moe_utils.py +407 -0
  218. megatron_core-0.10.0/megatron/core/transformer/moe/router.py +305 -0
  219. megatron_core-0.10.0/megatron/core/transformer/moe/shared_experts.py +244 -0
  220. megatron_core-0.10.0/megatron/core/transformer/moe/token_dispatcher.py +594 -0
  221. megatron_core-0.10.0/megatron/core/transformer/moe/upcycling_utils.py +196 -0
  222. megatron_core-0.10.0/megatron/core/transformer/multi_latent_attention.py +387 -0
  223. megatron_core-0.10.0/megatron/core/transformer/spec_utils.py +106 -0
  224. megatron_core-0.10.0/megatron/core/transformer/torch_layer_norm.py +4 -0
  225. megatron_core-0.10.0/megatron/core/transformer/torch_norm.py +48 -0
  226. megatron_core-0.10.0/megatron/core/transformer/transformer_block.py +618 -0
  227. megatron_core-0.10.0/megatron/core/transformer/transformer_config.py +637 -0
  228. megatron_core-0.10.0/megatron/core/transformer/transformer_layer.py +397 -0
  229. megatron_core-0.10.0/megatron/core/transformer/utils.py +188 -0
  230. megatron_core-0.10.0/megatron/core/utils.py +1415 -0
  231. megatron_core-0.10.0/megatron_core.egg-info/PKG-INFO +936 -0
  232. megatron_core-0.10.0/megatron_core.egg-info/SOURCES.txt +243 -0
  233. megatron_core-0.10.0/megatron_core.egg-info/requires.txt +16 -0
  234. megatron_core-0.10.0/pyproject.toml +72 -0
  235. megatron_core-0.10.0/requirements/pytorch:24.01/requirements.txt +15 -0
  236. megatron_core-0.10.0/requirements/pytorch:24.07/requirements.txt +14 -0
  237. {megatron_core-0.3.0 → megatron_core-0.10.0}/setup.py +30 -18
  238. megatron_core-0.3.0/PKG-INFO +0 -34
  239. megatron_core-0.3.0/README.md +0 -521
  240. megatron_core-0.3.0/megatron/core/__init__.py +0 -11
  241. megatron_core-0.3.0/megatron/core/dist_checkpointing/__init__.py +0 -5
  242. megatron_core-0.3.0/megatron/core/dist_checkpointing/core.py +0 -41
  243. megatron_core-0.3.0/megatron/core/dist_checkpointing/mapping.py +0 -289
  244. megatron_core-0.3.0/megatron/core/dist_checkpointing/optimizer.py +0 -86
  245. megatron_core-0.3.0/megatron/core/dist_checkpointing/serialization.py +0 -334
  246. megatron_core-0.3.0/megatron/core/dist_checkpointing/strategies/__init__.py +0 -16
  247. megatron_core-0.3.0/megatron/core/dist_checkpointing/strategies/base.py +0 -68
  248. megatron_core-0.3.0/megatron/core/dist_checkpointing/utils.py +0 -44
  249. megatron_core-0.3.0/megatron/core/fusions/fused_layer_norm.py +0 -119
  250. megatron_core-0.3.0/megatron/core/model_parallel_config.py +0 -167
  251. megatron_core-0.3.0/megatron/core/models/common/rotary_pos_embedding.py +0 -56
  252. megatron_core-0.3.0/megatron/core/models/gpt/__init__.py +0 -1
  253. megatron_core-0.3.0/megatron/core/models/gpt/gpt_embedding.py +0 -123
  254. megatron_core-0.3.0/megatron/core/models/gpt/gpt_model.py +0 -308
  255. megatron_core-0.3.0/megatron/core/parallel_state.py +0 -651
  256. megatron_core-0.3.0/megatron/core/pipeline_parallel/__init__.py +0 -1
  257. megatron_core-0.3.0/megatron/core/pipeline_parallel/schedules.py +0 -1254
  258. megatron_core-0.3.0/megatron/core/tensor_parallel/mappings.py +0 -283
  259. megatron_core-0.3.0/megatron/core/transformer/__init__.py +0 -3
  260. megatron_core-0.3.0/megatron/core/transformer/attention.py +0 -368
  261. megatron_core-0.3.0/megatron/core/transformer/custom_layers/transformer_engine.py +0 -249
  262. megatron_core-0.3.0/megatron/core/transformer/identity_op.py +0 -14
  263. megatron_core-0.3.0/megatron/core/transformer/mlp.py +0 -87
  264. megatron_core-0.3.0/megatron/core/transformer/module.py +0 -132
  265. megatron_core-0.3.0/megatron/core/transformer/transformer_block.py +0 -286
  266. megatron_core-0.3.0/megatron/core/transformer/transformer_config.py +0 -273
  267. megatron_core-0.3.0/megatron/core/transformer/transformer_layer.py +0 -270
  268. megatron_core-0.3.0/megatron/core/transformer/utils.py +0 -40
  269. megatron_core-0.3.0/megatron/core/utils.py +0 -207
  270. megatron_core-0.3.0/megatron_core.egg-info/PKG-INFO +0 -34
  271. megatron_core-0.3.0/megatron_core.egg-info/SOURCES.txt +0 -61
  272. megatron_core-0.3.0/pyproject.toml +0 -18
  273. {megatron_core-0.3.0/megatron/core/fusions → megatron_core-0.10.0/megatron/core/datasets}/__init__.py +0 -0
  274. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron/core/enums.py +0 -0
  275. {megatron_core-0.3.0/megatron/core/models → megatron_core-0.10.0/megatron/core/extensions}/__init__.py +0 -0
  276. {megatron_core-0.3.0/megatron/core/models/common → megatron_core-0.10.0/megatron/core/fusions}/__init__.py +0 -0
  277. {megatron_core-0.3.0/megatron/core/transformer/custom_layers → megatron_core-0.10.0/megatron/core/models}/__init__.py +0 -0
  278. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron_core.egg-info/dependency_links.txt +0 -0
  279. {megatron_core-0.3.0 → megatron_core-0.10.0}/megatron_core.egg-info/top_level.txt +0 -0
  280. {megatron_core-0.3.0 → megatron_core-0.10.0}/setup.cfg +0 -0
@@ -29,13 +29,15 @@ The following applies to all files unless otherwise noted:
29
29
  --
30
30
 
31
31
  This repository also contains code from Hugging Face Inc., Google Research,
32
- Facebook (from their Fairseq and Dino projects), Microsoft(from their
33
- Swin-Transformer project)and Philip Popien. Files from these
34
- organizations have notices at the top of each file. Below are
35
- licenses used in those files, as indicated.
32
+ Facebook (from their Fairseq, Dino, and ParlAI projects), Microsoft (from their
33
+ Swin-Transformer project), Philip Popien, the Mamba project (Tri Dao and
34
+ Albert Gu), and the Triton language and compiler project (Philippe Tillet and
35
+ OpenAI). Files from these organizations have notices at the top of each file.
36
+ Below are licenses used in those files, as indicated.
36
37
 
37
38
 
38
- ------------- LICENSE FOR Facebook, huggingface and Google Research code --------------
39
+ --------------------------------------------------------------------------------
40
+ -- LICENSE FOR Facebook, huggingface, Google Research, LLaVA, and Mamba code --
39
41
 
40
42
 
41
43
  Apache License
@@ -240,12 +242,16 @@ licenses used in those files, as indicated.
240
242
  See the License for the specific language governing permissions and
241
243
  limitations under the License.
242
244
 
243
- ------------- LICENSE FOR Facebook Fairseq code --------------
245
+ --------------------------------------------------------------------------------
246
+ LICENSE FOR
247
+ Facebook, Inc. and its affiliates,
248
+ Meta Platforms, Inc. and its affiliates,
249
+ Microsoft Corporation,
250
+ OpenGVLab/InternVL, and
251
+ Triton language and compiler.
244
252
 
245
253
  MIT License
246
254
 
247
- Copyright (c) Facebook, Inc. and its affiliates.
248
-
249
255
  Permission is hereby granted, free of charge, to any person obtaining a copy
250
256
  of this software and associated documentation files (the "Software"), to deal
251
257
  in the Software without restriction, including without limitation the rights
@@ -264,28 +270,3 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
264
270
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
265
271
  SOFTWARE.
266
272
 
267
- ------------- LICENSE FOR Mircrosoft Swin transformer code --------------
268
-
269
- MIT License
270
-
271
- Copyright (c) Microsoft Corporation.
272
-
273
- Permission is hereby granted, free of charge, to any person obtaining a copy
274
- of this software and associated documentation files (the "Software"), to deal
275
- in the Software without restriction, including without limitation the rights
276
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
277
- copies of the Software, and to permit persons to whom the Software is
278
- furnished to do so, subject to the following conditions:
279
-
280
- The above copyright notice and this permission notice shall be included in all
281
- copies or substantial portions of the Software.
282
-
283
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
284
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
285
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
286
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
287
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
288
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
289
- SOFTWARE
290
-
291
-
@@ -0,0 +1,3 @@
1
+ include megatron/core/requirements.txt
2
+ include megatron/core/README.md
3
+ recursive-include requirements *