PyPI - megatron-core - Versions diffs - 0.12.0rc2__tar.gz → 0.12.0rc3__tar.gz - Mend

megatron-core 0.12.0rc2tar.gz → 0.12.0rc3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of megatron-core might be problematic. Click here for more details.

Files changed (287) hide show

{megatron_core-0.12.0rc2/megatron_core.egg-info → megatron_core-0.12.0rc3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: megatron-core
-Version: 0.12.0rc2
+Version: 0.12.0rc3
 Summary: Megatron Core - a library for efficient and scalable training of transformer based models
 Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
 Download-URL: https://github.com/NVIDIA/Megatron-LM/releases

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/fully_sharded_data_parallel.py RENAMED Viewed

@@ -22,7 +22,6 @@ from megatron.core.distributed.custom_fsdp.param_and_grad_buffer import (
 from megatron.core.distributed.data_parallel_base import _BaseDataParallel
 from megatron.core.distributed.distributed_data_parallel_config import DistributedDataParallelConfig
 from megatron.core.fp8_utils import is_float8tensor
-from megatron.core.models.common.embeddings.language_model_embedding import LanguageModelEmbedding
 from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.core.transformer.transformer_layer import TransformerLayer
 from megatron.core.utils import is_submodule, log_single_rank
@@ -124,8 +123,6 @@ class FullyShardedDataParallel(_BaseDataParallel):
             self.fsdp_unit_modules = fsdp_unit_modules
         else:
             self.fsdp_unit_modules = [TransformerLayer]
-            if not getattr(self.module, "share_embeddings_and_output_weights", False):
-                self.fsdp_unit_modules.append(LanguageModelEmbedding)
         self.main_weights = True
         self.data_parallel_group = parallel_state.get_data_parallel_group(
             with_context_parallel=True

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/dynamic_context.py RENAMED Viewed

@@ -177,6 +177,7 @@ class DynamicInferenceContext(BaseInferenceContext):
             (self.max_requests,), 0, dtype=torch.int32, device=torch.cuda.current_device()
         )
         self.request_query_lengths = torch.empty_like(self.request_ids)
+        self.request_output_lengths = torch.empty_like(self.request_ids)
         self.request_kv_length_offsets = torch.empty_like(self.request_ids)
         self.request_kv_chunk_counts = torch.empty_like(self.request_ids)
         self.request_last_kv_chunk_id = torch.empty_like(self.request_ids)
@@ -362,6 +363,10 @@ class DynamicInferenceContext(BaseInferenceContext):
         lengths = lengths[self.paused_request_count : self.total_request_count]
         return lengths
+    def get_max_sequence_lengths(self) -> Tensor:
+        """Maximum sequence length for active requests."""
+        return self.request_output_lengths[self.paused_request_count : self.total_request_count]
     def append_key_value_cache(self, layer_number: int, key: Tensor, value: Tensor) -> None:
         """Append to KV cache.
@@ -628,6 +633,7 @@ class DynamicInferenceContext(BaseInferenceContext):
         # Reset request indexes.
         self.request_ids.fill_(0)
         self.request_query_lengths.fill_(0)
+        self.request_output_lengths.fill_(0)
         self.request_kv_length_offsets.fill_(0)
         self.request_kv_chunk_counts.fill_(0)
         self.request_last_kv_chunk_id.fill_(0)
@@ -693,7 +699,9 @@ class DynamicInferenceContext(BaseInferenceContext):
         return last_token_logits
-    def add_request(self, request_id: int, tokens: List[int]) -> None:
+    def add_request(
+        self, request_id: int, tokens: List[int], num_tokens_to_generate: Optional[int] = None
+    ) -> None:
         """Add request to context.
         After a request is added, it will first do one prefill step, followed by
@@ -731,9 +739,17 @@ class DynamicInferenceContext(BaseInferenceContext):
         if new_chunk_ids is None:
             raise ChunkOverflowError()
+        if num_tokens_to_generate is None:
+            num_tokens_to_generate = self.max_sequence_length - context_length
+        elif context_length + num_tokens_to_generate > self.max_sequence_length:
+            raise TokenOverflowError()
         # Update request state.
         self.request_ids[self.total_request_count] = request_id
         self.request_query_lengths[self.total_request_count] = context_length
+        self.request_output_lengths[self.total_request_count] = (
+            context_length + num_tokens_to_generate
+        )
         self.request_kv_length_offsets[self.total_request_count] = 0
         self.request_kv_memory[self.total_request_count][:num_chunks_needed] = new_chunk_ids
         self.request_kv_chunk_counts[self.total_request_count] = num_chunks_needed
@@ -861,6 +877,7 @@ class DynamicInferenceContext(BaseInferenceContext):
             # Shift active requests left.
             self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
             self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
+            self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
             self.request_ids[dst_idxs] = self.request_ids[src_idxs]
             next_tokens[dst_idxs] = next_tokens[src_idxs]
@@ -910,6 +927,7 @@ class DynamicInferenceContext(BaseInferenceContext):
                 self.request_kv_length_offsets[dst_idxs] = self.request_kv_length_offsets[src_idxs]
                 self.request_query_lengths[dst_idxs] = self.request_query_lengths[src_idxs]
+                self.request_output_lengths[dst_idxs] = self.request_output_lengths[src_idxs]
                 self.request_ids[dst_idxs] = self.request_ids[src_idxs]
                 next_tokens[dst_idxs] = next_tokens[src_idxs]

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/dynamic_engine.py RENAMED Viewed

@@ -85,12 +85,18 @@ class DynamicInferenceEngine(AbstractEngine):
         self.context.reset()
         self.finished_request_count = 0
-    def add_request(self, request_id: int, prompt: Union[str, List[int], Tensor]) -> None:
+    def add_request(
+        self,
+        request_id: int,
+        prompt: Union[str, List[int], Tensor],
+        num_tokens_to_generate: Optional[int] = None,
+    ) -> None:
         """Add request to inference context.
         Args:
             request_id (int): Unique ID of request.
             prompt (Union[str, Tensor]): Prompt as either a text string or token IDs.
+            num_tokens_to_generate (Optional[int]): Number of output tokens to generate
         Return:
             None.
@@ -120,7 +126,7 @@ class DynamicInferenceEngine(AbstractEngine):
             raise Exception("specialize for <%s>." % type(prompt).__name__)
         # Add request to context.
-        return self.context.add_request(request_id, tokens)
+        return self.context.add_request(request_id, tokens, num_tokens_to_generate)
     def step(
         self, sampling_params: SamplingParams, *, verbose: Optional[bool] = False

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/text_generation_controller.py RENAMED Viewed

@@ -335,10 +335,13 @@ class TextGenerationController:
             context.paused_request_count : context.total_request_count
         ].long()
         active_sequence_lengths = context.get_active_sequence_lengths()
+        active_sequence_lengths += 1  # Account for the token we just generated
+        max_sequence_lengths = context.get_max_sequence_lengths()
-        # Request finished if termination_id or length > max_sequence_length.
-        active_request_mask = (new_sample != termination_id).byte() & (
-            active_sequence_lengths < context.max_sequence_length
+        # Request finished if termination_id or length >= max_sequence_length.
+        active_request_mask = (new_sample != termination_id).byte() & torch.less(
+            active_sequence_lengths, max_sequence_lengths
         ).byte()
         finished_idxs = (
             torch.nonzero(active_request_mask == 0, as_tuple=True)[0] + context.paused_request_count

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/package_info.py RENAMED Viewed

@@ -4,7 +4,7 @@
 MAJOR = 0
 MINOR = 12
 PATCH = 0
-PRE_RELEASE = 'rc2'
+PRE_RELEASE = 'rc3'
 # Use the following formatting: (major, minor, patch, pre-release)
 VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE)

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/utils.py RENAMED Viewed

@@ -47,6 +47,7 @@ except Exception:
     # This is a WAR for building docs, where torch is not actually imported
     _torch_version = PkgVersion("0.0.0")
 _te_version = None
+_fa_version = None
 class ExperimentalNotEnabledError(Exception):
@@ -279,6 +280,30 @@ def is_torch_min_version(version, check_equality=True):
     return get_torch_version() > PkgVersion(version)
+def get_fa_version():
+    """Get Flash attention version from __version__; if not available use pip's. Use caching."""
+    def get_fa_version_str():
+        import flash_attn as fa
+        if hasattr(fa, '__version__'):
+            return str(fa.__version__)
+        else:
+            return version("flash-attn")
+    global _fa_version
+    if _fa_version is None:
+        _fa_version = PkgVersion(get_fa_version_str())
+    return _fa_version
+def is_fa_min_version(version, check_equality=True):
+    """Check if minimum version of `flash-attn` is installed."""
+    if check_equality:
+        return get_fa_version() >= PkgVersion(version)
+    return get_fa_version() > PkgVersion(version)
 def ensure_divisibility(numerator, denominator):
     """Ensure that numerator is divisible by the denominator."""
     assert numerator % denominator == 0, "{} is not divisible by {}".format(numerator, denominator)

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3/megatron_core.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: megatron-core
-Version: 0.12.0rc2
+Version: 0.12.0rc3
 Summary: Megatron Core - a library for efficient and scalable training of transformer based models
 Home-page: https://github.com/NVIDIA/Megatron-LM/megatron/core
 Download-URL: https://github.com/NVIDIA/Megatron-LM/releases

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/LICENSE RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/MANIFEST.in RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/README.md RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/README.md RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/config_logger.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/bert_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_builder.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/blended_megatron_dataset_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/gpt_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.cpp RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/helpers.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/indexed_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/masked_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/megatron_tokenizer.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/multimodal_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/bert_embedders.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/gpt_chunk_datasets.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/config/tokenizers.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/build.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/db/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/external_libs.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/build.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/factory.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/index.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_base.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/indexes/faiss_par_add.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/index/validate.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/gpt_chunk_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/multi_split_gpt_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/query.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/retro_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/query/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/retro/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/t5_dataset.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/datasets/utils_s3.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/core.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/dict_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/exchange_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/mapping.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/optimizer.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/serialization.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/state_dict_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/async_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/base.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/cached_metadata_filesystem_reader.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/common.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/filesystem_async.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/fully_parallel.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/resharding.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/state_dict_saver.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/tensorstore.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/torch.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/two_stage.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/strategies/zarr.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/tensor_aware_state_dict.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/dist_checkpointing/validation.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/data_parallel_base.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/distributed_data_parallel_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/finalize_model_grads.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/param_and_grad_buffer.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/distributed/torch_fully_sharded_data_parallel.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/enums.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/data_type.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/export_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/model_type.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/model_to_trllm_mapping/default_conversion_dict.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trt_model_type.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_helper.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_layers.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/distributed_trtllm_model_weights_converter.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/export/trtllm/trtllm_weights_converter/single_device_trtllm_model_weights_converter.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/extensions/transformer_engine.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fp8_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_dropout.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_geglu.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_gelu.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_bias_swiglu.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_cross_entropy.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_layer_norm.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/fusions/fused_softmax.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/async_stream.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/common_inference_params.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/communication_utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/base_context.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/contexts/static_context.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/abstract_engine.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/mcore_engine.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/engines/static_engine.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/inference_request.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/abstract_model_inference_wrapper.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/gpt/gpt_inference_wrapper.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/inference_wrapper_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/multimodal/vlm_inference_wrapper.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/model_inference_wrappers/t5/t5_inference_wrapper.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/model_specs.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/gpt/state_dict_hooks.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/modelopt_support/mamba/model_specs.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/sampling_params.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/scheduler.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/encoder_decoder_text_generation_controller.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/simple_text_generation_controller.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/text_generation_controllers/vlm_text_generation_controller.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference/utils.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/inference_params.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/jit.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/model_parallel_config.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/__init__.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_model.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/T5/t5_spec.py RENAMED Viewed

File without changes

{megatron_core-0.12.0rc2 → megatron_core-0.12.0rc3}/megatron/core/models/__init__.py RENAMED Viewed

File without changes

megatron-core 0.12.0rc2__tar.gz → 0.12.0rc3__tar.gz

Potentially problematic release.

megatron-core 0.12.0rc2tar.gz → 0.12.0rc3tar.gz