megatron-core 0.12.0rc2__tar.gz → 0.12.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of megatron-core might be problematic. Click here for more details.

Files changed (287) hide show
  1. {megatron_core-0.12.0rc2/megatron_core.egg-info → megatron_core-0.12.0rc3}/PKG-INFO +1 -1
  2. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py +0 -3
  3. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/dynamic_context.py +19 -1
  4. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/dynamic_engine.py +8 -2
  5. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +6 -3
  6. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/package_info.py +1 -1
  7. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/utils.py +25 -0
  8. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3/megatron_core.egg-info}/PKG-INFO +1 -1
  9. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/LICENSE +0 -0
  10. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/MANIFEST.in +0 -0
  11. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/README.md +0 -0
  12. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/README.md +0 -0
  13. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/__init__.py +0 -0
  14. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config.py +0 -0
  15. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config_logger.py +0 -0
  16. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/__init__.py +0 -0
  17. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/bert_dataset.py +0 -0
  18. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_dataset.py +0 -0
  19. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
  20. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
  21. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/gpt_dataset.py +0 -0
  22. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.cpp +0 -0
  23. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.py +0 -0
  24. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/indexed_dataset.py +0 -0
  25. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/masked_dataset.py +0 -0
  26. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_dataset.py +0 -0
  27. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_tokenizer.py +0 -0
  28. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/multimodal_dataset.py +0 -0
  29. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/__init__.py +0 -0
  30. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/__init__.py +0 -0
  31. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
  32. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/config.py +0 -0
  33. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
  34. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
  35. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/__init__.py +0 -0
  36. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/build.py +0 -0
  37. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/dataset.py +0 -0
  38. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/utils.py +0 -0
  39. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/external_libs.py +0 -0
  40. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/__init__.py +0 -0
  41. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/build.py +0 -0
  42. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/factory.py +0 -0
  43. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/index.py +0 -0
  44. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
  45. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
  46. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
  47. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/utils.py +0 -0
  48. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/validate.py +0 -0
  49. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/__init__.py +0 -0
  50. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
  51. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
  52. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/query.py +0 -0
  53. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
  54. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/utils.py +0 -0
  55. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/utils.py +0 -0
  56. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/t5_dataset.py +0 -0
  57. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils.py +0 -0
  58. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils_s3.py +0 -0
  59. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/__init__.py +0 -0
  60. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/core.py +0 -0
  61. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
  62. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
  63. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/mapping.py +0 -0
  64. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/optimizer.py +0 -0
  65. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/serialization.py +0 -0
  66. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
  67. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
  68. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
  69. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
  70. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
  71. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
  72. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
  73. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
  74. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
  75. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
  76. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
  77. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
  78. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
  79. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
  80. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
  81. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/utils.py +0 -0
  82. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/validation.py +0 -0
  83. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/__init__.py +0 -0
  84. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/__init__.py +0 -0
  85. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +0 -0
  86. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/data_parallel_base.py +0 -0
  87. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel.py +0 -0
  88. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
  89. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/finalize_model_grads.py +0 -0
  90. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
  91. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
  92. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/enums.py +0 -0
  93. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/__init__.py +0 -0
  94. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/data_type.py +0 -0
  95. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/export_config.py +0 -0
  96. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/model_type.py +0 -0
  97. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/__init__.py +0 -0
  98. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
  99. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
  100. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
  101. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
  102. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_config.py +0 -0
  103. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_type.py +0 -0
  104. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
  105. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
  106. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
  107. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
  108. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
  109. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/__init__.py +0 -0
  110. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/transformer_engine.py +0 -0
  111. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fp8_utils.py +0 -0
  112. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/__init__.py +0 -0
  113. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_dropout.py +0 -0
  114. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_geglu.py +0 -0
  115. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_gelu.py +0 -0
  116. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
  117. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_cross_entropy.py +0 -0
  118. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_layer_norm.py +0 -0
  119. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_softmax.py +0 -0
  120. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/__init__.py +0 -0
  121. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/async_stream.py +0 -0
  122. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/common_inference_params.py +0 -0
  123. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/communication_utils.py +0 -0
  124. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/__init__.py +0 -0
  125. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/base_context.py +0 -0
  126. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/static_context.py +0 -0
  127. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/__init__.py +0 -0
  128. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/abstract_engine.py +0 -0
  129. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/mcore_engine.py +0 -0
  130. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/static_engine.py +0 -0
  131. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/inference_request.py +0 -0
  132. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
  133. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
  134. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
  135. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
  136. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
  137. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
  138. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
  139. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
  140. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/__init__.py +0 -0
  141. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/__init__.py +0 -0
  142. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/model_specs.py +0 -0
  143. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py +0 -0
  144. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/__init__.py +0 -0
  145. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/model_specs.py +0 -0
  146. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/sampling_params.py +0 -0
  147. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/scheduler.py +0 -0
  148. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
  149. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
  150. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
  151. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
  152. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/utils.py +0 -0
  153. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference_params.py +0 -0
  154. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/jit.py +0 -0
  155. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/model_parallel_config.py +0 -0
  156. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/__init__.py +0 -0
  157. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_model.py +0 -0
  158. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_spec.py +0 -0
  159. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/__init__.py +0 -0
  160. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/__init__.py +0 -0
  161. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_layer_specs.py +0 -0
  162. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_lm_head.py +0 -0
  163. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/bert_model.py +0 -0
  164. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/bert/pooler.py +0 -0
  165. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/__init__.py +0 -0
  166. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/__init__.py +0 -0
  167. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
  168. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
  169. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
  170. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
  171. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
  172. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/language_module/__init__.py +0 -0
  173. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/language_module/language_module.py +0 -0
  174. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/vision_module/__init__.py +0 -0
  175. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/common/vision_module/vision_module.py +0 -0
  176. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/__init__.py +0 -0
  177. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
  178. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/gpt_model.py +0 -0
  179. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/gpt/moe_module_specs.py +0 -0
  180. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/__init__.py +0 -0
  181. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/clip_model.py +0 -0
  182. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/module.py +0 -0
  183. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/huggingface/qwen_model.py +0 -0
  184. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/__init__.py +0 -0
  185. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
  186. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/mamba/mamba_model.py +0 -0
  187. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/__init__.py +0 -0
  188. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/context_parallel.py +0 -0
  189. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/llava_model.py +0 -0
  190. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/multimodal/llava_spec.py +0 -0
  191. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/__init__.py +0 -0
  192. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/base_attention.py +0 -0
  193. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/config.py +0 -0
  194. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/decoder_attention.py +0 -0
  195. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/decoder_spec.py +0 -0
  196. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/encoder_attention.py +0 -0
  197. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/encoder_spec.py +0 -0
  198. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/model.py +0 -0
  199. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/retro/utils.py +0 -0
  200. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/__init__.py +0 -0
  201. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/clip_vit_model.py +0 -0
  202. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/multimodal_projector.py +0 -0
  203. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/radio.py +0 -0
  204. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/vision/vit_layer_specs.py +0 -0
  205. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/num_microbatches_calculator.py +0 -0
  206. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/__init__.py +0 -0
  207. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/clip_grads.py +0 -0
  208. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
  209. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
  210. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/distrib_optimizer.py +0 -0
  211. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/grad_scaler.py +0 -0
  212. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/optimizer.py +0 -0
  213. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer/optimizer_config.py +0 -0
  214. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/optimizer_param_scheduler.py +0 -0
  215. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/packed_seq_params.py +0 -0
  216. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/parallel_state.py +0 -0
  217. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/__init__.py +0 -0
  218. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
  219. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/pipeline_parallel/schedules.py +0 -0
  220. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/__init__.py +0 -0
  221. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/__init__.py +0 -0
  222. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
  223. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
  224. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
  225. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/layers.py +0 -0
  226. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
  227. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
  228. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/process_groups_config.py +0 -0
  229. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/requirements.txt +0 -0
  230. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/rerun_state_machine.py +0 -0
  231. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/__init__.py +0 -0
  232. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_block.py +0 -0
  233. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_config.py +0 -0
  234. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
  235. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_layer.py +0 -0
  236. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mamba_mixer.py +0 -0
  237. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/mlp_layer.py +0 -0
  238. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/ssm/triton_cache_manager.py +0 -0
  239. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/__init__.py +0 -0
  240. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
  241. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/data.py +0 -0
  242. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/layers.py +0 -0
  243. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/mappings.py +0 -0
  244. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/random.py +0 -0
  245. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/tensor_parallel/utils.py +0 -0
  246. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/timers.py +0 -0
  247. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/__init__.py +0 -0
  248. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/attention.py +0 -0
  249. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/cuda_graphs.py +0 -0
  250. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/custom_layers/__init__.py +0 -0
  251. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
  252. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/dot_product_attention.py +0 -0
  253. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/enums.py +0 -0
  254. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/identity_op.py +0 -0
  255. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/mlp.py +0 -0
  256. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/module.py +0 -0
  257. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/__init__.py +0 -0
  258. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/experts.py +0 -0
  259. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/fused_a2a.py +0 -0
  260. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
  261. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/legacy_a2a_token_dispatcher.py +0 -0
  262. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/moe_layer.py +0 -0
  263. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/moe_utils.py +0 -0
  264. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/router.py +0 -0
  265. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/shared_experts.py +0 -0
  266. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
  267. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
  268. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/multi_latent_attention.py +0 -0
  269. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/multi_token_prediction.py +0 -0
  270. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/spec_utils.py +0 -0
  271. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/torch_layer_norm.py +0 -0
  272. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/torch_norm.py +0 -0
  273. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_block.py +0 -0
  274. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_config.py +0 -0
  275. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/transformer_layer.py +0 -0
  276. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/transformer/utils.py +0 -0
  277. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/SOURCES.txt +0 -0
  278. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/dependency_links.txt +0 -0
  279. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/requires.txt +0 -0
  280. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron_core.egg-info/top_level.txt +0 -0
  281. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/pyproject.toml +0 -0
  282. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.01/requirements.txt +0 -0
  283. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.07/requirements.txt +0 -0
  284. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_24.10/requirements.txt +0 -0
  285. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/requirements/pytorch_25.03/requirements.txt +0 -0
  286. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/setup.cfg +0 -0
  287. {megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megatron-core
3
- Version: 0.12.0rc2
3
+ Version: 0.12.0rc3
4
4
  Summary: Megatron Core - a library for efficient and scalable training of transformer based models
5
5
  Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
6
6
  Download-URL: https://github.com/NVIDIA/Megatron-LM/releases
@@ -22,7 +22,6 @@ from megatron.core.distributed.custom_fsdp.param_and_grad_buffer import (
22
22
  from megatron.core.distributed.data_parallel_base import _BaseDataParallel
23
23
  from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
24
24
  from megatron.core.fp8_utils import is_float8tensor
25
- from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
26
25
  from megatron.core.transformer.transformer_config import TransformerConfig
27
26
  from megatron.core.transformer.transformer_layer import TransformerLayer
28
27
  from megatron.core.utils import is_submodule, log_single_rank
@@ -124,8 +123,6 @@ class FullyShardedDataParallel(_BaseDataParallel):
124
123
  self.fsdp_unit_modules = fsdp_unit_modules
125
124
  else:
126
125
  self.fsdp_unit_modules = [TransformerLayer]
127
- if not getattr(self.module, "share_embeddings_and_output_weights", False):
128
- self.fsdp_unit_modules.append(LanguageModelEmbedding)
129
126
  self.main_weights = True
130
127
  self.data_parallel_group = parallel_state.get_data_parallel_group(
131
128
  with_context_parallel=True
@@ -177,6 +177,7 @@ class DynamicInferenceContext(BaseInferenceContext):
177
177
  (self.max_requests,), 0, dtype=torch.int32, device=torch.cuda.current_device()
178
178
  )
179
179
  self.request_query_lengths = torch.empty_like(self.request_ids)
180
+ self.request_output_lengths = torch.empty_like(self.request_ids)
180
181
  self.request_kv_length_offsets = torch.empty_like(self.request_ids)
181
182
  self.request_kv_chunk_counts = torch.empty_like(self.request_ids)
182
183
  self.request_last_kv_chunk_id = torch.empty_like(self.request_ids)
@@ -362,6 +363,10 @@ class DynamicInferenceContext(BaseInferenceContext):
362
363
  lengths = lengths[self.paused_request_count : self.total_request_count]
363
364
  return lengths
364
365
 
366
+ def get_max_sequence_lengths(self) -> Tensor:
367
+ """Maximum sequence length for active requests."""
368
+ return self.request_output_lengths[self.paused_request_count : self.total_request_count]
369
+
365
370
  def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
366
371
  """Append to KV cache.
367
372
 
@@ -628,6 +633,7 @@ class DynamicInferenceContext(BaseInferenceContext):
628
633
  # Reset request indexes.
629
634
  self.request_ids.fill_(0)
630
635
  self.request_query_lengths.fill_(0)
636
+ self.request_output_lengths.fill_(0)
631
637
  self.request_kv_length_offsets.fill_(0)
632
638
  self.request_kv_chunk_counts.fill_(0)
633
639
  self.request_last_kv_chunk_id.fill_(0)
@@ -693,7 +699,9 @@ class DynamicInferenceContext(BaseInferenceContext):
693
699
 
694
700
  return last_token_logits
695
701
 
696
- def add_request(self, request_id: int, tokens: List[int]) -> None:
702
+ def add_request(
703
+ self, request_id: int, tokens: List[int], num_tokens_to_generate: Optional[int] = None
704
+ ) -> None:
697
705
  """Add request to context.
698
706
 
699
707
  After a request is added, it will first do one prefill step, followed by
@@ -731,9 +739,17 @@ class DynamicInferenceContext(BaseInferenceContext):
731
739
  if new_chunk_ids is None:
732
740
  raise ChunkOverflowError()
733
741
 
742
+ if num_tokens_to_generate is None:
743
+ num_tokens_to_generate = self.max_sequence_length - context_length
744
+ elif context_length + num_tokens_to_generate > self.max_sequence_length:
745
+ raise TokenOverflowError()
746
+
734
747
  # Update request state.
735
748
  self.request_ids[self.total_request_count] = request_id
736
749
  self.request_query_lengths[self.total_request_count] = context_length
750
+ self.request_output_lengths[self.total_request_count] = (
751
+ context_length + num_tokens_to_generate
752
+ )
737
753
  self.request_kv_length_offsets[self.total_request_count] = 0
738
754
  self.request_kv_memory[self.total_request_count][:num_chunks_needed] = new_chunk_ids
739
755
  self.request_kv_chunk_counts[self.total_request_count] = num_chunks_needed
@@ -861,6 +877,7 @@ class DynamicInferenceContext(BaseInferenceContext):
861
877
  # Shift active requests left.
862
878
  self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
863
879
  self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
880
+ self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
864
881
  self.request_ids[dst_idxs] = self.request_ids[src_idxs]
865
882
  next_tokens[dst_idxs] = next_tokens[src_idxs]
866
883
 
@@ -910,6 +927,7 @@ class DynamicInferenceContext(BaseInferenceContext):
910
927
 
911
928
  self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
912
929
  self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
930
+ self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
913
931
  self.request_ids[dst_idxs] = self.request_ids[src_idxs]
914
932
  next_tokens[dst_idxs] = next_tokens[src_idxs]
915
933
 
@@ -85,12 +85,18 @@ class DynamicInferenceEngine(AbstractEngine):
85
85
  self.context.reset()
86
86
  self.finished_request_count = 0
87
87
 
88
- def add_request(self, request_id: int, prompt: Union[str, List[int], Tensor]) -> None:
88
+ def add_request(
89
+ self,
90
+ request_id: int,
91
+ prompt: Union[str, List[int], Tensor],
92
+ num_tokens_to_generate: Optional[int] = None,
93
+ ) -> None:
89
94
  """Add request to inference context.
90
95
 
91
96
  Args:
92
97
  request_id (int): Unique ID of request.
93
98
  prompt (Union[str, Tensor]): Prompt as either a text string or token IDs.
99
+ num_tokens_to_generate (Optional[int]): Number of output tokens to generate
94
100
 
95
101
  Return:
96
102
  None.
@@ -120,7 +126,7 @@ class DynamicInferenceEngine(AbstractEngine):
120
126
  raise Exception("specialize for <%s>." % type(prompt).__name__)
121
127
 
122
128
  # Add request to context.
123
- return self.context.add_request(request_id, tokens)
129
+ return self.context.add_request(request_id, tokens, num_tokens_to_generate)
124
130
 
125
131
  def step(
126
132
  self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False
@@ -335,10 +335,13 @@ class TextGenerationController:
335
335
  context.paused_request_count : context.total_request_count
336
336
  ].long()
337
337
  active_sequence_lengths = context.get_active_sequence_lengths()
338
+ active_sequence_lengths += 1 # Account for the token we just generated
339
+ max_sequence_lengths = context.get_max_sequence_lengths()
338
340
 
339
- # Request finished if termination_id or length > max_sequence_length.
340
- active_request_mask = (new_sample != termination_id).byte() & (
341
- active_sequence_lengths < context.max_sequence_length
341
+ # Request finished if termination_id or length >= max_sequence_length.
342
+
343
+ active_request_mask = (new_sample != termination_id).byte() & torch.less(
344
+ active_sequence_lengths, max_sequence_lengths
342
345
  ).byte()
343
346
  finished_idxs = (
344
347
  torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count
@@ -4,7 +4,7 @@
4
4
  MAJOR = 0
5
5
  MINOR = 12
6
6
  PATCH = 0
7
- PRE_RELEASE = 'rc2'
7
+ PRE_RELEASE = 'rc3'
8
8
 
9
9
  # Use the following formatting: (major, minor, patch, pre-release)
10
10
  VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)
@@ -47,6 +47,7 @@ except Exception:
47
47
  # This is a WAR for building docs, where torch is not actually imported
48
48
  _torch_version = PkgVersion("0.0.0")
49
49
  _te_version = None
50
+ _fa_version = None
50
51
 
51
52
 
52
53
  class ExperimentalNotEnabledError(Exception):
@@ -279,6 +280,30 @@ def is_torch_min_version(version, check_equality=True):
279
280
  return get_torch_version() > PkgVersion(version)
280
281
 
281
282
 
283
+ def get_fa_version():
284
+ """Get Flash attention version from __version__; if not available use pip's. Use caching."""
285
+
286
+ def get_fa_version_str():
287
+ import flash_attn as fa
288
+
289
+ if hasattr(fa, '__version__'):
290
+ return str(fa.__version__)
291
+ else:
292
+ return version("flash-attn")
293
+
294
+ global _fa_version
295
+ if _fa_version is None:
296
+ _fa_version = PkgVersion(get_fa_version_str())
297
+ return _fa_version
298
+
299
+
300
+ def is_fa_min_version(version, check_equality=True):
301
+ """Check if minimum version of `flash-attn` is installed."""
302
+ if check_equality:
303
+ return get_fa_version() >= PkgVersion(version)
304
+ return get_fa_version() > PkgVersion(version)
305
+
306
+
282
307
  def ensure_divisibility(numerator, denominator):
283
308
  """Ensure that numerator is divisible by the denominator."""
284
309
  assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: megatron-core
3
- Version: 0.12.0rc2
3
+ Version: 0.12.0rc3
4
4
  Summary: Megatron Core - a library for efficient and scalable training of transformer based models
5
5
  Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
6
6
  Download-URL: https://github.com/NVIDIA/Megatron-LM/releases