PyPI - optimum-rbln - Versions diffs - 0.8.3rc0__py3-none-any.whl → 0.8.4a0__py3-none-any.whl - Mend

optimum-rbln 0.8.3rc0py3-none-any.whl → 0.8.4a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (14) hide show

optimum/rbln/__version__.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.8.3rc0'
-__version_tuple__ = version_tuple = (0, 8, 3, 'rc0')
+__version__ = version = '0.8.4a0'
+__version_tuple__ = version_tuple = (0, 8, 4, 'a0')
 __commit_id__ = commit_id = None

optimum/rbln/configuration_utils.py CHANGED Viewed

@@ -476,6 +476,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
     non_save_attributes = [
         "_frozen",
         "_runtime_options",
+        "torch_dtype",
         "npu",
         "tensor_parallel_size",
         "create_runtimes",
@@ -566,6 +567,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         tensor_parallel_size: Optional[int] = None,
         timeout: Optional[int] = None,
         optimum_rbln_version: Optional[str] = None,
+        _torch_dtype: Optional[str] = None,
         _compile_cfgs: List[RBLNCompileConfig] = [],
         **kwargs: Any,
     ):
@@ -583,6 +585,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
             timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
             optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
+            _torch_dtype (Optional[str]): The data type to use for the model.
             _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
             **kwargs: Additional keyword arguments.
@@ -610,6 +613,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         self.npu = npu
         self.tensor_parallel_size = tensor_parallel_size
+        self._torch_dtype = _torch_dtype or "float32"
         self.optimum_rbln_version = optimum_rbln_version
         if self.optimum_rbln_version is None:
             self.optimum_rbln_version = __version__
@@ -639,6 +643,17 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             raise ValueError(f"Unexpected arguments: {kwargs.keys()}")
+    @property
+    def torch_dtype(self):
+        return getattr(torch, self._torch_dtype)
+    @torch_dtype.setter
+    def torch_dtype(self, torch_dtype: Union[str, torch.dtype]):
+        if isinstance(torch_dtype, torch.dtype):
+            torch_dtype = RBLNCompileConfig.normalize_dtype(torch_dtype)
+        self._torch_dtype = torch_dtype
     @property
     def rbln_model_cls_name(self) -> str:
         return self.__class__.__name__[:-6]

optimum/rbln/modeling.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
 import rebel
 import torch
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
-from transformers import AutoConfig, PretrainedConfig
+from transformers import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput
 from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -119,9 +119,6 @@ class RBLNModel(RBLNBaseModel):
         # Save configs
         if config is None:
             config = model.config
-            # remote_config
-            if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
-                config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
         if hasattr(model, "can_generate") and model.can_generate():
             import json

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -34,7 +34,7 @@ from .utils.submodule import SubModulesMixin
 if TYPE_CHECKING:
-    from transformers import PreTrainedModel
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
 logger = get_logger(__name__)
@@ -53,6 +53,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
     config_class = AutoConfig
     config_name = "config.json"
     hf_library_name = "transformers"
+    _supports_non_fp32 = False
     def __init__(
         self,
@@ -91,7 +92,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         self.device = torch.device("cpu")
         self.training = False
-        self.dtype = torch.float32
+        self.dtype = rbln_config.torch_dtype
         # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
@@ -400,8 +401,21 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         return compiled_model
     @classmethod
-    def update_rbln_config(cls, **others) -> RBLNModelConfig:
-        rbln_config = cls._update_rbln_config(**others)
+    def update_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNModelConfig,
+    ) -> RBLNModelConfig:
+        rbln_config.torch_dtype = model.dtype
+        if not cls._supports_non_fp32 and rbln_config.torch_dtype != torch.float32:
+            raise NotImplementedError(
+                f"Currently, {cls.__name__} does not support non-fp32 dtype. Please use float32 dtype."
+            )
+        rbln_config = cls._update_rbln_config(
+            preprocessors=preprocessors, model=model, model_config=model_config, rbln_config=rbln_config
+        )
         rbln_config.freeze()
         if rbln_config.rbln_model_cls_name != cls.__name__:
             raise NameError(
@@ -444,12 +458,12 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         # This method mimics the interface of torch.nn.Module.parameters()
         # specifically for code that uses `next(model.parameters())` to infer
-        # the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
+        # the device or dtype. It yields a single dummy tensor on CPU with model dtype.
         # Warning:
         #     This does NOT yield the actual model parameters used by the RBLN runtime.
         #     Code relying on iterating through all model parameters will not work as expected.
-        yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
+        yield torch.tensor([1.0], dtype=self.dtype, device=torch.device("cpu"))
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)

optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py CHANGED Viewed

@@ -1066,7 +1066,7 @@ class RotaryEmbedding(nn.Module):
             rope_type = "default"
         inv_freq, attention_scaling = ROPE_INIT_FUNCTIONS[rope_type](config, max_seq_len_cached)
-        cache_position = torch.arange(0, max_seq_len_cached, dtype=torch.float32)
+        cache_position = torch.arange(0, max_seq_len_cached)
         cache_position_expanded = cache_position[:, None]
         if rope_type == "dynamic":
@@ -1085,8 +1085,8 @@ class RotaryEmbedding(nn.Module):
     def forward(self, x, seq_len):
         return (
-            self._cos_cached[:seq_len].to(dtype=x.dtype),
-            self._sin_cached[:seq_len].to(dtype=x.dtype),
+            self._cos_cached[:seq_len].to(dtype=torch.float32),
+            self._sin_cached[:seq_len].to(dtype=torch.float32),
         )
@@ -1116,8 +1116,11 @@ def rotate_half(x):
 def apply_rotary_pos_emb(q, k, cos, sin):
     """Applies Rotary Position Embedding to the query and key tensors."""
+    dtype = q.dtype
     q_embed = (q * cos) + (rotate_half(q) * sin)
     k_embed = (k * cos) + (rotate_half(k) * sin)
+    q_embed = q_embed.to(dtype)
+    k_embed = k_embed.to(dtype)
     return q_embed, k_embed

optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py CHANGED Viewed

@@ -317,7 +317,13 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
         # Initialize attention mask for chunked processing
         chunked_attention_mask = (
-            torch.zeros(1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.max_seq_len, dtype=torch.float32)
+            torch.zeros(
+                1,
+                1,
+                self.rbln_config.prefill_chunk_size,
+                self.rbln_config.max_seq_len,
+                dtype=self.rbln_config.torch_dtype,
+            )
             if self.rbln_config.use_attention_mask
             else None
         )

optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
 import rebel
 import torch
 from rebel.compile_context import CompileContext
-from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
+from transformers import AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
 from transformers.modeling_outputs import BaseModelOutputWithPast
 from transformers.modeling_utils import no_init_weights
@@ -33,7 +33,7 @@ from ...modeling_attention_utils import (
     validate_sliding_window,
 )
 from ...modeling_outputs import RBLNDecoderOnlyOutput
-from ...utils.rbln_quantization import prepare_model_for_quantization
+from ...utils.rbln_quantization import get_quantized_model
 from .configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
 from .decoderonly_architecture import DecoderOnlyWrapper
 from .decoderonly_runtime_utils import RBLNPageTableManager, RBLNRuntimeModel
@@ -72,6 +72,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     auto_model_class = AutoModel
     _decoder_wrapper_cls = DecoderOnlyWrapper
     _use_rotary_emb = True
+    _supports_non_fp32 = True
     def __post_init__(self, **kwargs):
         if self.rbln_config.use_inputs_embeds:
@@ -86,10 +87,8 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     def setup_runtime(self):
         # Initialize resources to be used across Runtime instances (prefill and decode phases)
         page_table_manager = RBLNPageTableManager(self.rbln_config)
-        dec_attn_mask = torch.zeros(
-            self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=torch.float32
-        )
-        out_buffers = [torch.empty(self.prefill_output_size, dtype=torch.float32, device="cpu")]
+        dec_attn_mask = torch.zeros(self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=self.dtype)
+        out_buffers = [torch.empty(self.prefill_output_size, dtype=self.dtype)]
         common_kwargs = {
             "main_input_name": "inputs_embeds" if self.rbln_config.use_inputs_embeds else "input_ids",
@@ -143,35 +142,17 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     ):
         kwargs = cls.update_kwargs(kwargs)
-        if config is None:
-            config = AutoConfig.from_pretrained(
-                model_id,
-                use_auth_token=use_auth_token,
-                revision=revision,
-                force_download=force_download,
-                cache_dir=cache_dir,
-                trust_remote_code=trust_remote_code,
-                **kwargs,
-            )
-            if config.torch_dtype == torch.bfloat16:
-                # FIXME: bfloat16 is not supported by rebel-compiler
-                config.torch_dtype = torch.float32
-        with no_init_weights():
-            model = cls.auto_model_class.from_config(config)
-        model = prepare_model_for_quantization(
-            model,
+        return get_quantized_model(
+            cls.auto_model_class,
             model_id,
-            kwargs.get("num_hidden_layers"),
             use_auth_token=use_auth_token,
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
             local_files_only=local_files_only,
             rbln_quantization=rbln_config.quantization,
+            **kwargs,
         )
-        return model
     def __getattr__(self, __name: str) -> Any:
         # Special method to delegate attribute access to the original Huggingface LM class.
@@ -365,7 +346,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
         input_info = []
         if rbln_config.use_inputs_embeds:
-            input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], "float32"))
+            input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], rbln_config.torch_dtype))
         else:
             input_info.append(("input_ids", [batch_size, query_length], "int64"))
@@ -384,16 +365,16 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
         if rbln_config.use_attention_mask:
             if rbln_config.use_position_ids:
-                input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], "float32"))
+                input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], rbln_config.torch_dtype))
             else:
                 input_info.append(
-                    ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], "float32")
+                    ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], rbln_config.torch_dtype)
                 )
         if rbln_config.use_position_ids:
             input_info.append(("position_ids", [batch_size, query_length], "int32"))
-        kvcache_dtype = "float32"
+        kvcache_dtype = rbln_config.torch_dtype
         if rbln_config.quantization and rbln_config.quantization.kv_caches == "fp8":
             kvcache_dtype = "float8_e4m3fn"

optimum/rbln/transformers/models/gemma3/modeling_gemma3.py CHANGED Viewed

@@ -345,6 +345,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
     """
     _decoder_wrapper_cls = Gemma3ForCausalLMWrapper
+    _supports_non_fp32 = False
     def setup_runtime(self):
         # Initialize shared resources to be used across Runtime instances (prefill and decode phases)

optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py CHANGED Viewed

@@ -372,6 +372,8 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
         ```
     """
+    _supports_non_fp32 = False
     auto_model_class = AutoModelForVision2Seq
     _rbln_submodules = [
         {"name": "visual"},

optimum/rbln/transformers/utils/rbln_quantization.py CHANGED Viewed

@@ -14,18 +14,23 @@
 import glob
 import os
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type, Union
 import torch
 from huggingface_hub import hf_hub_download, list_repo_files
 from safetensors.torch import load_file
 from torch.nn import Linear, Parameter
 from torch.nn import functional as F
+from transformers import AutoConfig
+from transformers.modeling_utils import get_state_dict_dtype, no_init_weights
 from ...configuration_utils import RBLNSerializableConfigProtocol
 from ...utils.logging import get_logger
+if TYPE_CHECKING:
+    from transformers.models.auto.modeling_auto import _BaseAutoModelClass
 logger = get_logger()
@@ -138,22 +143,31 @@ class QuantizedLayerFactory:
         return create_fp8linear(layer, self.quantization_config)
-def prepare_model_for_quantization(
-    model: torch.nn.Module,
+def get_quantized_model(
+    hf_auto_model_class: Type["_BaseAutoModelClass"],
     model_id: str,
-    n_layer: Optional[int] = None,
     use_auth_token: Optional[Union[bool, str]] = None,
     revision: Optional[str] = None,
     cache_dir: Optional[str] = None,
     force_download: bool = False,
     local_files_only: bool = False,
     rbln_quantization: Optional[RBLNQuantizationConfig] = None,
-) -> torch.nn.Module:
+    **kwargs,
+):
     """
-    Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
+    Get a quantized model from a model class and model id.
     """
+    # torch_dtype should not be passed to AutoConfig.from_pretrained
+    # since it doesn't support 'auto'
+    torch_dtype = kwargs.pop("torch_dtype", None)
+    if torch_dtype is not None:
+        logger.warning(
+            "torch_dtype is not supported for quantized models. "
+            "It will be ignored and the dtype of the model will be determined by the weights."
+        )
+        torch_dtype = None
-    # 1. Load weight files
+    # get paths of safetensors files in the model repo
     safetensor_files = load_weight_files(
         model_id,
         use_auth_token=use_auth_token,
@@ -163,17 +177,31 @@ def prepare_model_for_quantization(
         local_files_only=local_files_only,
     )
-    # 2. Update linear layers based on the quantization config
-    update_layers_to_quantize(model, rbln_quantization)
+    # load safetensors files into memory
+    safetensors = [load_file(safetensor_file) for safetensor_file in safetensor_files]
+    # get the dtype of the model from the first safetensor file
+    torch_dtype = get_state_dict_dtype(safetensors[0])
-    # 3. Load weights into model parameters
-    load_weights_from_files(
-        model,
-        safetensor_files,
-        n_layer,
-        rbln_quantization=rbln_quantization,
+    config = AutoConfig.from_pretrained(
+        model_id,
+        use_auth_token=use_auth_token,
+        revision=revision,
+        cache_dir=cache_dir,
+        force_download=force_download,
+        local_files_only=local_files_only,
+        **kwargs,
     )
+    with no_init_weights():
+        model = hf_auto_model_class.from_config(config, torch_dtype=torch_dtype)
+    # Quantize the model
+    update_layers_to_quantize(model, rbln_quantization)
+    # Load weights into the model
+    load_weights_from_files(model, safetensors, rbln_quantization)
     return model
@@ -372,32 +400,26 @@ def canonicalize_checkpoint_items(
 def load_weights_from_files(
     model: torch.nn.Module,
-    safetensor_files: list[str],
-    n_layer: Optional[int] = None,
+    safetensors: List[Dict[str, torch.Tensor]],
     rbln_quantization: Optional[RBLNQuantizationConfig] = None,
 ):
     """
-    Load safetensor file data directly into the model from provided safetensor files,
-    filtering by layer if n_layer is provided.
+    Load safetensor file data directly into the model from provided safetensor files.
     """
     model_params = dict(model.named_parameters(recurse=True))
     model_buffers = dict(model.named_buffers(recurse=True))
-    target_layers = list(range(n_layer)) if n_layer is not None else None
     unloaded_keys = []
     loaded_input_scale = False
     loaded_kv_scale = False
     loaded_weight_scale = False
-    for safetensor_file in safetensor_files:
-        file_data = load_file(safetensor_file)
+    for safetensor in safetensors:
         # Normalize all (key, tensor) pairs to the internal schema
         normalized_items = canonicalize_checkpoint_items(
             model=model,
-            items=file_data.items(),
+            items=safetensor.items(),
             rbln_quantization=rbln_quantization,
         )
@@ -410,12 +432,6 @@ def load_weights_from_files(
             if key.endswith("k_scale") or key.endswith("v_scale"):
                 loaded_kv_scale = True
-            # Filter by layer index if requested
-            if target_layers is not None:
-                parts = key.split(".")
-                if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
-                    continue
             # Copy into parameters or buffers
             if key in model_params:
                 # Ensure dtype compatibility

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.3rc0
+Version: 0.8.4a0
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a0.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 optimum/rbln/__init__.py,sha256=32ouGKDGus9k5_kD27CxP8jIQOw66zpDTfS0xs1XlfE,18298
-optimum/rbln/__version__.py,sha256=boIaJ8T6HCT9Qh8wBU3n-6ZyjtAKYgztQh0WMaN7BxM,714
-optimum/rbln/configuration_utils.py,sha256=fE3HlZblxukKSdS-4VofjuyCAiqwPMX8bqXpOiTZp4g,33926
-optimum/rbln/modeling.py,sha256=jMiJy9PGjZpXpAmRTFD5fTuj8xEbLCUmncIxGD6XWLk,14338
-optimum/rbln/modeling_base.py,sha256=txBab-zVXcjqnF2gZJBzhrp5ruA3vwt3hjls0Q2S_0w,25492
+optimum/rbln/__version__.py,sha256=YNGYpHnDhFwKFL4ZTx3BIJGtmgon0Pv2G2E10GhWRaY,712
+optimum/rbln/configuration_utils.py,sha256=KtbDM7HnFGiO0PsuvkrCE3R9NF6OJVmV_fyQcQNrmUk,34469
+optimum/rbln/modeling.py,sha256=cAIPWEw5DGzUWeqjCbocRhU6OO3jyhVGW60AmBLh1Nw,14134
+optimum/rbln/modeling_base.py,sha256=kQsBfUoDncNgR5P8_BvyzY6H_4YEXOBzN20lFmOZV_g,26190
 optimum/rbln/diffusers/__init__.py,sha256=1tgU_xWA42BmInqu9bBz_5R_E9TGhhK3mI06YlaiTLg,7232
 optimum/rbln/diffusers/modeling_diffusers.py,sha256=TAuMb7PSMjNwK7mh5ItE_CtAEgYeZKI27XkFFmxjHlQ,19902
 optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
@@ -105,10 +105,10 @@ optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=eDWPVlo
 optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=v9rPLmNx-BQZhDFhKnr2kmARElTtKdFZCgFIU4m-HPw,15703
 optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=w3VZOIBYaHXVdnuhK4y0zWAj0IAv7_5LGTJYaz9oYmI,1056
 optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=H2i9Iefy-q5X-0BLWQ-CrxK8ZoT3p9t0lt_3r4TFSCY,15182
-optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=cGkhGc8XlseMWqDCrt13z0Itn9b0emZ2PjHI-1TP0wI,42685
-optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=9acEQxGRzd21YkzxRchkhqxqpX7emQHZigFg60BIulc,19902
+optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=L5LArhjN36fTdiwrUABgn3cnS7hh4SVCF4FMHBbiLZU,42760
+optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=v3mfIlQImQkYYr-rPn7rQR3GYdVUhALRttEduLI7H9c,20012
 optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py,sha256=4D89IF0yQju_Dp_vLJN_dBkpe2U_LMWaUciYx57D-0M,3379
-optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=hu2eJr0CpLHnRPSLhyBhyyC6DfosKmPu7lPjapcBCkE,33061
+optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=dAHV9NgdpXHyTJGT0lieXOB3Pzi_NPlR4rqmRtmAWzM,32412
 optimum/rbln/transformers/models/depth_anything/__init__.py,sha256=xvPSIriMJWyNeVYoVB1Z7YqB4kkHOIkaHq7loNps-dk,756
 optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py,sha256=JujBVEUa_zZDXNPr1y-B_PhK5SgFFcY8Ib4EoGjjtmE,989
 optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py,sha256=tTmsVaW9Wb2WD3nKRLwp7swn3hbMvgwUEJwwVIfNYEc,1008
@@ -130,7 +130,7 @@ optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_
 optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=rKjKJhyaIM7YoiLR-q8GAZKIQNzDzcb5X7qf_FJE72M,3398
 optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=fpLDAXCe5paWVsfc0tL59JkRQMRF-WNgIzOIb_QpSLU,6191
 optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py,sha256=vYQ9sjRlkfamxZca_hVMQI0ylKeExsV02gOWaYVMjyg,9640
-optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=zraPjowA8ni9Lb0NrmsiUai2XdOjgYOOpVnIU1n2jGA,24208
+optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=TxbgkvW2Nv0VGdXNXnN_Beas6E_1D9NAH8f09Fo8t0E,24239
 optimum/rbln/transformers/models/gpt2/__init__.py,sha256=SsawHMStE3wYRtqkH5EvdTFkCdX0LLmp-QSKFhEBrHo,740
 optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=iGdHfzG7plekZcIz-Z5U8lRE4SB8gbJJNcFQJ9l8Myg,1533
 optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=MyAWReXmyuHnDpW5HI_TI7psyJZxLujZ9KT5XnNm7nA,2802
@@ -182,7 +182,7 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=VOboPJF1rvvSVWkH
 optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
 optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
 optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=1yyMFxh1SKsKR7rOjuotPvpSneN2_4a89bYfNk42370,4735
-optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=BfpALuavpdCqe5RuHaNZNo2IDlLjE4SwsoPAlaictgc,26607
+optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=hRvA37sPFC9xH1FqnFbtHS9rQOPwAvLYg4zl4oEyK-w,26639
 optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=i_UUWhKoFjJ5CCpgeWicqABM23TxMEKPQ354LoZ6iUU,7445
 optimum/rbln/transformers/models/qwen3/__init__.py,sha256=tI4KwvXpD35dUUaa8aLUXpWoU9gJGcmKXeywOlH14ZE,746
 optimum/rbln/transformers/models/qwen3/configuration_qwen3.py,sha256=BFRPggnH4VlsXlOa19C6KAID-bPgQ8ooQ29dvogh5zk,2102
@@ -227,7 +227,7 @@ optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=O3o2KzJ8Li3QhB7G
 optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py,sha256=wHRpGTXL9khYqSkKL1IgA7__6_lt9QpOz9tHumjK7fo,1260
 optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=EZd3flRUEE38DYtdqEnG70LV7fHhkamRZV51xrVyjYI,1093
 optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-optimum/rbln/transformers/utils/rbln_quantization.py,sha256=ARngdvRmeVoOphUU3Md9kT6zS5HDrYdEFYljJwaAaio,21020
+optimum/rbln/transformers/utils/rbln_quantization.py,sha256=pORshQUgTInNaibUtd0HL-T8bKW5wuulZs2q0Oshppc,21659
 optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
 optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
 optimum/rbln/utils/depreacate_utils.py,sha256=uKxl3ENUCNaZXPnaDQvNxrH8hUIWdBWfZH6BM7ZV__4,385
@@ -238,7 +238,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
 optimum/rbln/utils/runtime_utils.py,sha256=R6uXDbeJP03-FWdd4vthNe2D4aCra5n12E3WB1ifiGM,7933
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=60NGLFvnhjP1DJg1opdb-FVQDsthcLCwWjW_1WQaasU,5280
-optimum_rbln-0.8.3rc0.dist-info/METADATA,sha256=ls15qV7a7bVTpkphb6aHteuBfil7u1xOzkUuysoRPZg,5300
-optimum_rbln-0.8.3rc0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.8.3rc0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.8.3rc0.dist-info/RECORD,,
+optimum_rbln-0.8.4a0.dist-info/METADATA,sha256=QqrF_vPDFZO-DiTK0p328Y54qXyk1wApO86SAISpNcc,5299
+optimum_rbln-0.8.4a0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.8.4a0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.8.4a0.dist-info/RECORD,,

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a0.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.8.3rc0__py3-none-any.whl → 0.8.4a0__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.3rc0py3-none-any.whl → 0.8.4a0py3-none-any.whl