megatron-core 0.16.0rc0.dev108138__tar.gz → 0.16.0rc0.dev109400__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of megatron-core might be problematic. Click here for more details.
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/PKG-INFO +7 -14
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/megatron_tokenizer.py +0 -9
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fp8_utils.py +0 -49
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/package_info.py +1 -1
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron_core.egg-info/PKG-INFO +7 -14
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron_core.egg-info/requires.txt +6 -13
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/pyproject.toml +6 -13
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/MANIFEST.in +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/README.md +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/README.md +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/activations.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/config_logger.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/bert_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/blended_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/blended_megatron_dataset_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/blended_megatron_dataset_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/helpers.cpp +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/helpers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/indexed_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/masked_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/megatron_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/multimodal_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/object_storage_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/config/bert_embedders.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/config/config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/config/tokenizers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/db/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/db/build.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/db/dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/db/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/external_libs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/build.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/factory.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/index.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/indexes/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/indexes/faiss_base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/index/validate.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/query.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/retro_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/query/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/t5_dataset.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/datasets/utils_s3.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/core.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/exchange_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/mapping.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/serialization.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/state_dict_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/async_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/checkpointable.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/common.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/filesystem_async.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/fully_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/resharding.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/tensorstore.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/torch.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/two_stage.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/strategies/zarr.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/dist_checkpointing/validation.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/data_parallel_base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/distributed_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/finalize_model_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/mcore_fsdp_adapter.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/distributed_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/fully_shard.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/megatron_fsdp.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/package_info.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/uneven_dtensor.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/fsdp/src/megatron_fsdp/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/param_and_grad_buffer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/reduce_scatter_with_fp32_accumulation.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/torch_fully_sharded_data_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/distributed/torch_fully_sharded_data_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/energy_monitor.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/data_type.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/export_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/engine_builder/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trt_model_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trt_model_type.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_helper.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_layers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/export/trtllm/trtllm_weights_converter/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/extensions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/extensions/kitchen.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/extensions/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/extensions/transformer_engine_spec_provider.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fp4_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/full_cuda_graph.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_bias_dropout.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_bias_geglu.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_bias_gelu.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_bias_swiglu.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_indices_converter.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_mla_yarn_rope_apply.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_pad_routing_map.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_softmax.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fusions/fused_weighted_squared_relu.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/hyper_comm_grid.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/async_stream.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/common_inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/communication_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/attention_context/metadata_base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/attention_context/mha_metadata.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/base_context.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/dynamic_block_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/dynamic_context.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/fused_kv_append_kernel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/contexts/static_context.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/data_parallel_inference_coordinator.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/engines/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/engines/abstract_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/engines/dynamic_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/engines/mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/engines/static_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/headers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/inference_client.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/inference_request.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/t5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/sampling_params.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_controllers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_controllers/text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/endpoints/common.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/endpoints/completions.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/run_mcore_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/text_generation_server.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/text_generation_server/tokenization.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/unified_memory.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/inference_params.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/jit.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/model_parallel_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/T5/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/T5/t5_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/T5/t5_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/backends.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/bert/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/bert/bert_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/bert/bert_lm_head.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/bert/bert_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/bert/pooler.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/language_model_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/relative_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/rope_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/embeddings/yarn_rotary_pos_embedding.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/language_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/language_module/language_module.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/model_chunk_schedule_plan.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/vision_module/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/common/vision_module/vision_module.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/fine_grained_callables.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/gpt_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/gpt_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/heterogeneous/heterogeneous_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/gpt/moe_module_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/huggingface/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/huggingface/clip_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/huggingface/module.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/huggingface/qwen_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mamba/mamba_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mamba/mamba_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/config/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/config/base_configs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/model/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/model/base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/submodules/audio.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/submodules/base.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/mimo/submodules/vision.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/multimodal/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/multimodal/context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/multimodal/llava_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/multimodal/llava_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/base_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/decoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/decoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/encoder_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/encoder_spec.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/retro/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/vision/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/vision/clip_vit_model.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/vision/multimodal_projector.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/vision/radio.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/models/vision/vit_layer_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/msc_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/nccl_allocator.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/num_microbatches_calculator.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/clip_grads.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/cpu_offloading/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/cpu_offloading/hybrid_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/distrib_optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/grad_scaler.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/optimizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer/optimizer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/optimizer_param_scheduler.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/packed_seq_params.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/parallel_state.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/bridge_communicator.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/combined_1f1b.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/p2p_communication.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/schedules.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/pipeline_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/gpt/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/gpt/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/gpt/state_dict_hooks.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/mamba/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/post_training/modelopt/mamba/model_specs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/process_groups_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/quantization/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/quantization/quant_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/quantization/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/requirements.txt +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/rerun_state_machine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/safe_globals.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mamba_block.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mamba_context_parallel.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mamba_hybrid_layer_allocation.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mamba_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mamba_mixer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/mlp_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/ssm/triton_cache_manager.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/cross_entropy.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/data.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/layers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/mappings.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/random.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tensor_parallel/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/timers.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/base_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/megatron_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/abstract_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/bytelevel_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/chat_template.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/huggingface_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/megatron_hf_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/null_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/sentencepiece_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/libraries/tiktoken_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/bert_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/default_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/gpt_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/mamba_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/retro_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/models/t5_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/text_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/tokenizers/text/utils/build_tokenizer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/cuda_graphs.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/custom_layers/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/custom_layers/transformer_engine.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/dot_product_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/enums.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/fsdp_dtensor_checkpoint.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/heterogeneous/heterogeneous_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/heterogeneous/linear_replacements.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/identity_op.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/mlp.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/module.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/__init__.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/experts.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/fused_a2a.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/grouped_gemm_util.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/moe_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/moe_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/router.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/shared_experts.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/token_dispatcher.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/moe/upcycling_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/multi_latent_attention.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/multi_token_prediction.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/pipeline_parallel_layer_layout.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/spec_utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/torch_layer_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/torch_norm.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/transformer_block.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/transformer_config.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/transformer_layer.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/transformer/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/utils.py +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron_core.egg-info/SOURCES.txt +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron_core.egg-info/dependency_links.txt +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron_core.egg-info/top_level.txt +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/setup.cfg +0 -0
- {megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/setup.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev109400
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -41,7 +41,7 @@ Requires-Dist: transformers; extra == "mlm"
|
|
|
41
41
|
Provides-Extra: dev
|
|
42
42
|
Requires-Dist: nvidia-modelopt[torch]; sys_platform != "darwin" and extra == "dev"
|
|
43
43
|
Requires-Dist: transformer-engine[pytorch]<2.10.0,>=2.9.0a0; extra == "dev"
|
|
44
|
-
Requires-Dist: nvidia-resiliency-ext; extra == "dev"
|
|
44
|
+
Requires-Dist: nvidia-resiliency-ext<0.5.0,>=0.4.0a0; extra == "dev"
|
|
45
45
|
Requires-Dist: tqdm; extra == "dev"
|
|
46
46
|
Requires-Dist: einops~=0.8; extra == "dev"
|
|
47
47
|
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "dev"
|
|
@@ -59,20 +59,13 @@ Requires-Dist: wget; extra == "dev"
|
|
|
59
59
|
Requires-Dist: onnxscript; extra == "dev"
|
|
60
60
|
Provides-Extra: lts
|
|
61
61
|
Requires-Dist: tqdm; extra == "lts"
|
|
62
|
-
Requires-Dist: einops
|
|
63
|
-
Requires-Dist: tensorstore!=0.1.46,!=0.1.72
|
|
64
|
-
Requires-Dist: nvtx
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: einops; extra == "lts"
|
|
63
|
+
Requires-Dist: tensorstore!=0.1.46,!=0.1.72; extra == "lts"
|
|
64
|
+
Requires-Dist: nvtx; extra == "lts"
|
|
65
|
+
Requires-Dist: transformers; extra == "lts"
|
|
66
|
+
Requires-Dist: zarr; extra == "lts"
|
|
67
67
|
Requires-Dist: setuptools<80.0.0; extra == "lts"
|
|
68
|
-
Requires-Dist: mamba-ssm~=2.2; extra == "lts"
|
|
69
|
-
Requires-Dist: causal-conv1d~=1.5; extra == "lts"
|
|
70
|
-
Requires-Dist: nv-grouped-gemm~=1.1; extra == "lts"
|
|
71
|
-
Requires-Dist: megatron-energon[av_decode]~=6.0; extra == "lts"
|
|
72
|
-
Requires-Dist: av<16.0.0; extra == "lts"
|
|
73
|
-
Requires-Dist: flashinfer-python; extra == "lts"
|
|
74
68
|
Requires-Dist: wget; extra == "lts"
|
|
75
|
-
Requires-Dist: onnxscript; extra == "lts"
|
|
76
69
|
|
|
77
70
|
<div align="center">
|
|
78
71
|
|
|
@@ -1,14 +1,11 @@
|
|
|
1
1
|
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
|
2
2
|
import json
|
|
3
|
-
import logging
|
|
4
3
|
from abc import ABC, abstractmethod
|
|
5
4
|
from collections import OrderedDict
|
|
6
5
|
from typing import Any
|
|
7
6
|
|
|
8
7
|
import numpy
|
|
9
8
|
|
|
10
|
-
logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
9
|
|
|
13
10
|
class MegatronLegacyTokenizer(ABC):
|
|
14
11
|
"""Abstract class for tokenizer
|
|
@@ -23,12 +20,6 @@ class MegatronLegacyTokenizer(ABC):
|
|
|
23
20
|
"""
|
|
24
21
|
|
|
25
22
|
def __init__(self, *tokenizer_paths: str, **tokenizer_options: Any):
|
|
26
|
-
# Deprecation warning
|
|
27
|
-
logger.warning(
|
|
28
|
-
"You’re using the legacy tokenizer system, which is deprecated "
|
|
29
|
-
"and will be removed in a future release. Please migrate to the new tokenizer system "
|
|
30
|
-
"(`megatron.core.tokenizers.MegatronTokenizer`)."
|
|
31
|
-
)
|
|
32
23
|
self.unique_identifiers = OrderedDict()
|
|
33
24
|
self.unique_identifiers["class"] = type(self).__name__
|
|
34
25
|
self.unique_identifiers["tokenizer_path"] = list(tokenizer_paths)
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fp8_utils.py
RENAMED
|
@@ -10,12 +10,6 @@ from typing import List, Optional
|
|
|
10
10
|
import torch
|
|
11
11
|
|
|
12
12
|
from megatron.core.enums import Fp8Recipe
|
|
13
|
-
from megatron.core.tensor_parallel import (
|
|
14
|
-
ColumnParallelLinear,
|
|
15
|
-
RowParallelLinear,
|
|
16
|
-
gather_from_sequence_parallel_region,
|
|
17
|
-
reduce_scatter_to_sequence_parallel_region,
|
|
18
|
-
)
|
|
19
13
|
from megatron.core.transformer.transformer_config import TransformerConfig
|
|
20
14
|
from megatron.core.utils import get_te_version, is_te_min_version
|
|
21
15
|
|
|
@@ -118,27 +112,6 @@ def get_fp8_align_size(fp8_recipe: Fp8Recipe) -> int:
|
|
|
118
112
|
return 16
|
|
119
113
|
|
|
120
114
|
|
|
121
|
-
def is_column_parallel_linear(module):
|
|
122
|
-
"""Returns whether the given module is a ColumnParallelLinear layer."""
|
|
123
|
-
if HAVE_TE and (
|
|
124
|
-
isinstance(module, TEColumnParallelLinear)
|
|
125
|
-
or isinstance(module, TELayerNormColumnParallelLinear)
|
|
126
|
-
):
|
|
127
|
-
return True
|
|
128
|
-
elif isinstance(module, ColumnParallelLinear):
|
|
129
|
-
return True
|
|
130
|
-
return False
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def is_row_parallel_linear(module):
|
|
134
|
-
"""Returns whether the given module is a RowParallelLinear layer."""
|
|
135
|
-
if HAVE_TE and isinstance(module, TERowParallelLinear):
|
|
136
|
-
return True
|
|
137
|
-
elif isinstance(module, RowParallelLinear):
|
|
138
|
-
return True
|
|
139
|
-
return False
|
|
140
|
-
|
|
141
|
-
|
|
142
115
|
"""
|
|
143
116
|
The code below abstracts the functionalities needed for implementing "--fp8-param-gather" into
|
|
144
117
|
several functions. It provides different implementations for each function based on different
|
|
@@ -614,18 +587,6 @@ if HAVE_TE:
|
|
|
614
587
|
if not FP8GlobalStateManager.is_fp8_enabled():
|
|
615
588
|
return original_forward(input_tensor, *args, **kwargs)
|
|
616
589
|
|
|
617
|
-
# With sequence parallelism we need to all-gather before padding
|
|
618
|
-
# and reduce-scatter after unpadding
|
|
619
|
-
if is_sequence_parallel := getattr(module, "sequence_parallel", False):
|
|
620
|
-
if is_column_parallel_linear(module):
|
|
621
|
-
input_tensor = gather_from_sequence_parallel_region(
|
|
622
|
-
input_tensor, group=module.tp_group
|
|
623
|
-
)
|
|
624
|
-
|
|
625
|
-
# Disable sequence parallelism on the module because we are handling the
|
|
626
|
-
# all-gather and reduce-scatter externally
|
|
627
|
-
module.sequence_parallel = False
|
|
628
|
-
|
|
629
590
|
seq_len, batch_size, hidden_size = input_tensor.shape
|
|
630
591
|
# Reshape to (S, B*H) to pad sequence dimension
|
|
631
592
|
input_2d = input_tensor.reshape(seq_len, -1)
|
|
@@ -651,16 +612,6 @@ if HAVE_TE:
|
|
|
651
612
|
unpadded_output_2d = _unpad_func(output_2d, [seq_len])
|
|
652
613
|
unpadded_output = unpadded_output_2d.reshape(seq_len, batch_size, output_hidden_size)
|
|
653
614
|
|
|
654
|
-
if is_sequence_parallel:
|
|
655
|
-
# Reduce-scatter after unpadding
|
|
656
|
-
if is_row_parallel_linear(module):
|
|
657
|
-
unpadded_output = reduce_scatter_to_sequence_parallel_region(
|
|
658
|
-
unpadded_output, group=module.tp_group
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
# Reset sequence parallelism flag on the module
|
|
662
|
-
module.sequence_parallel = True
|
|
663
|
-
|
|
664
615
|
if other_outputs:
|
|
665
616
|
return (unpadded_output,) + other_outputs
|
|
666
617
|
else:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: megatron-core
|
|
3
|
-
Version: 0.16.0rc0.
|
|
3
|
+
Version: 0.16.0rc0.dev109400
|
|
4
4
|
Summary: Megatron Core - a library for efficient and scalable training of transformer based models
|
|
5
5
|
Author-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
6
6
|
Maintainer-email: NVIDIA <nemo-toolkit@nvidia.com>
|
|
@@ -41,7 +41,7 @@ Requires-Dist: transformers; extra == "mlm"
|
|
|
41
41
|
Provides-Extra: dev
|
|
42
42
|
Requires-Dist: nvidia-modelopt[torch]; sys_platform != "darwin" and extra == "dev"
|
|
43
43
|
Requires-Dist: transformer-engine[pytorch]<2.10.0,>=2.9.0a0; extra == "dev"
|
|
44
|
-
Requires-Dist: nvidia-resiliency-ext; extra == "dev"
|
|
44
|
+
Requires-Dist: nvidia-resiliency-ext<0.5.0,>=0.4.0a0; extra == "dev"
|
|
45
45
|
Requires-Dist: tqdm; extra == "dev"
|
|
46
46
|
Requires-Dist: einops~=0.8; extra == "dev"
|
|
47
47
|
Requires-Dist: tensorstore!=0.1.46,!=0.1.72,~=0.1; extra == "dev"
|
|
@@ -59,20 +59,13 @@ Requires-Dist: wget; extra == "dev"
|
|
|
59
59
|
Requires-Dist: onnxscript; extra == "dev"
|
|
60
60
|
Provides-Extra: lts
|
|
61
61
|
Requires-Dist: tqdm; extra == "lts"
|
|
62
|
-
Requires-Dist: einops
|
|
63
|
-
Requires-Dist: tensorstore!=0.1.46,!=0.1.72
|
|
64
|
-
Requires-Dist: nvtx
|
|
65
|
-
Requires-Dist:
|
|
66
|
-
Requires-Dist:
|
|
62
|
+
Requires-Dist: einops; extra == "lts"
|
|
63
|
+
Requires-Dist: tensorstore!=0.1.46,!=0.1.72; extra == "lts"
|
|
64
|
+
Requires-Dist: nvtx; extra == "lts"
|
|
65
|
+
Requires-Dist: transformers; extra == "lts"
|
|
66
|
+
Requires-Dist: zarr; extra == "lts"
|
|
67
67
|
Requires-Dist: setuptools<80.0.0; extra == "lts"
|
|
68
|
-
Requires-Dist: mamba-ssm~=2.2; extra == "lts"
|
|
69
|
-
Requires-Dist: causal-conv1d~=1.5; extra == "lts"
|
|
70
|
-
Requires-Dist: nv-grouped-gemm~=1.1; extra == "lts"
|
|
71
|
-
Requires-Dist: megatron-energon[av_decode]~=6.0; extra == "lts"
|
|
72
|
-
Requires-Dist: av<16.0.0; extra == "lts"
|
|
73
|
-
Requires-Dist: flashinfer-python; extra == "lts"
|
|
74
68
|
Requires-Dist: wget; extra == "lts"
|
|
75
|
-
Requires-Dist: onnxscript; extra == "lts"
|
|
76
69
|
|
|
77
70
|
<div align="center">
|
|
78
71
|
|
|
@@ -4,7 +4,7 @@ packaging>=24.2
|
|
|
4
4
|
|
|
5
5
|
[dev]
|
|
6
6
|
transformer-engine[pytorch]<2.10.0,>=2.9.0a0
|
|
7
|
-
nvidia-resiliency-ext
|
|
7
|
+
nvidia-resiliency-ext<0.5.0,>=0.4.0a0
|
|
8
8
|
tqdm
|
|
9
9
|
einops~=0.8
|
|
10
10
|
tensorstore!=0.1.46,!=0.1.72,~=0.1
|
|
@@ -26,20 +26,13 @@ nvidia-modelopt[torch]
|
|
|
26
26
|
|
|
27
27
|
[lts]
|
|
28
28
|
tqdm
|
|
29
|
-
einops
|
|
30
|
-
tensorstore!=0.1.46,!=0.1.72
|
|
31
|
-
nvtx
|
|
32
|
-
|
|
33
|
-
|
|
29
|
+
einops
|
|
30
|
+
tensorstore!=0.1.46,!=0.1.72
|
|
31
|
+
nvtx
|
|
32
|
+
transformers
|
|
33
|
+
zarr
|
|
34
34
|
setuptools<80.0.0
|
|
35
|
-
mamba-ssm~=2.2
|
|
36
|
-
causal-conv1d~=1.5
|
|
37
|
-
nv-grouped-gemm~=1.1
|
|
38
|
-
megatron-energon[av_decode]~=6.0
|
|
39
|
-
av<16.0.0
|
|
40
|
-
flashinfer-python
|
|
41
35
|
wget
|
|
42
|
-
onnxscript
|
|
43
36
|
|
|
44
37
|
[mlm]
|
|
45
38
|
flask-restful
|
|
@@ -69,7 +69,7 @@ mlm = ["flask-restful", "sentencepiece", "tiktoken", "wandb", "transformers"]
|
|
|
69
69
|
dev = [
|
|
70
70
|
"nvidia-modelopt[torch]; sys_platform != 'darwin'",
|
|
71
71
|
"transformer-engine[pytorch]>=2.9.0a0,<2.10.0",
|
|
72
|
-
"nvidia-resiliency-ext",
|
|
72
|
+
"nvidia-resiliency-ext>=0.4.0a0,<0.5.0",
|
|
73
73
|
"tqdm",
|
|
74
74
|
"einops~=0.8",
|
|
75
75
|
"tensorstore~=0.1,!=0.1.46,!=0.1.72",
|
|
@@ -89,20 +89,13 @@ dev = [
|
|
|
89
89
|
|
|
90
90
|
lts = [
|
|
91
91
|
"tqdm",
|
|
92
|
-
"einops
|
|
93
|
-
"tensorstore
|
|
94
|
-
"nvtx
|
|
95
|
-
"
|
|
96
|
-
"
|
|
92
|
+
"einops",
|
|
93
|
+
"tensorstore!=0.1.46,!=0.1.72",
|
|
94
|
+
"nvtx",
|
|
95
|
+
"transformers",
|
|
96
|
+
"zarr",
|
|
97
97
|
"setuptools<80.0.0",
|
|
98
|
-
"mamba-ssm~=2.2",
|
|
99
|
-
"causal-conv1d~=1.5",
|
|
100
|
-
"nv-grouped-gemm~=1.1",
|
|
101
|
-
"megatron-energon[av_decode]~=6.0",
|
|
102
|
-
"av<16.0.0", # At the time, av 16.0.0 is not compatible with Python 3.12
|
|
103
|
-
"flashinfer-python",
|
|
104
98
|
"wget",
|
|
105
|
-
"onnxscript",
|
|
106
99
|
]
|
|
107
100
|
|
|
108
101
|
[dependency-groups]
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/README.md
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/__init__.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/activations.py
RENAMED
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/enums.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{megatron_core-0.16.0rc0.dev108138 → megatron_core-0.16.0rc0.dev109400}/megatron/core/fp4_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|