PyPI - optimum-rbln - Versions diffs - 0.8.3rc0__py3-none-any.whl → 0.8.4a1__py3-none-any.whl - Mend

optimum-rbln 0.8.3rc0py3-none-any.whl → 0.8.4a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (19) hide show

optimum/rbln/__version__.py CHANGED Viewed

@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
-__version__ = version = '0.8.3rc0'
-__version_tuple__ = version_tuple = (0, 8, 3, 'rc0')
+__version__ = version = '0.8.4a1'
+__version_tuple__ = version_tuple = (0, 8, 4, 'a1')
 __commit_id__ = commit_id = None

optimum/rbln/configuration_utils.py CHANGED Viewed

@@ -248,9 +248,6 @@ class RBLNAutoConfig:
             if key[5:] not in RUNTIME_KEYWORDS and key[5:] not in cls.submodules
         }
-        if len(rbln_kwargs) > 0:
-            raise ValueError(f"Cannot set the following arguments: {list(rbln_kwargs.keys())}")
         # Process submodule's rbln_config
         for submodule in cls.submodules:
             if submodule not in config_file:
@@ -265,6 +262,16 @@ class RBLNAutoConfig:
         config_file.update(rbln_runtime_kwargs)
+        rbln_config = cls(**config_file)
+        if len(rbln_kwargs) > 0:
+            for key, value in rbln_kwargs.items():
+                if getattr(rbln_config, key) != value:
+                    raise ValueError(
+                        f"Cannot set the following arguments: {list(rbln_kwargs.keys())} "
+                        f"Since the value is already set to {getattr(rbln_config, key)}"
+                    )
         if return_unused_kwargs:
             return cls(**config_file), kwargs
         else:
@@ -476,6 +483,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
     non_save_attributes = [
         "_frozen",
         "_runtime_options",
+        "torch_dtype",
         "npu",
         "tensor_parallel_size",
         "create_runtimes",
@@ -566,6 +574,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         tensor_parallel_size: Optional[int] = None,
         timeout: Optional[int] = None,
         optimum_rbln_version: Optional[str] = None,
+        _torch_dtype: Optional[str] = None,
         _compile_cfgs: List[RBLNCompileConfig] = [],
         **kwargs: Any,
     ):
@@ -583,6 +592,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             tensor_parallel_size (Optional[int]): Size for tensor parallelism to distribute the model across devices.
             timeout (Optional[int]): The timeout for the runtime in seconds. If it isn't provided, it will be set to 60 by default.
             optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
+            _torch_dtype (Optional[str]): The data type to use for the model.
             _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
             **kwargs: Additional keyword arguments.
@@ -610,6 +620,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
         self.npu = npu
         self.tensor_parallel_size = tensor_parallel_size
+        self._torch_dtype = _torch_dtype or "float32"
         self.optimum_rbln_version = optimum_rbln_version
         if self.optimum_rbln_version is None:
             self.optimum_rbln_version = __version__
@@ -639,6 +650,17 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
             raise ValueError(f"Unexpected arguments: {kwargs.keys()}")
+    @property
+    def torch_dtype(self):
+        return getattr(torch, self._torch_dtype)
+    @torch_dtype.setter
+    def torch_dtype(self, torch_dtype: Union[str, torch.dtype]):
+        if isinstance(torch_dtype, torch.dtype):
+            torch_dtype = RBLNCompileConfig.normalize_dtype(torch_dtype)
+        self._torch_dtype = torch_dtype
     @property
     def rbln_model_cls_name(self) -> str:
         return self.__class__.__name__[:-6]

optimum/rbln/diffusers/modeling_diffusers.py CHANGED Viewed

@@ -130,7 +130,7 @@ class RBLNDiffusionMixin:
         cls,
         model_id: str,
         *,
-        export: bool = False,
+        export: bool = None,
         model_save_dir: Optional[PathLike] = None,
         rbln_config: Dict[str, Any] = {},
         lora_ids: Optional[Union[str, List[str]]] = None,
@@ -181,6 +181,20 @@ class RBLNDiffusionMixin:
         """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
+        if export is None:
+            export = any(
+                not RBLNModel._is_compiled(
+                    model_id,
+                    token=kwargs.get("token"),
+                    revision=kwargs.get("revision"),
+                    force_download=kwargs.get("force_download", False),
+                    cache_dir=kwargs.get("cache_dir"),
+                    subfolder=submodule_name,
+                    local_files_only=kwargs.get("local_files_only", False),
+                )
+                for submodule_name in cls._submodules
+            )
         if export:
             # keep submodules if user passed any of them.
             passed_submodules = {

optimum/rbln/diffusers/pipelines/auto_pipeline.py CHANGED Viewed

@@ -14,7 +14,8 @@
 import importlib
-from typing import Type
+from pathlib import Path
+from typing import Type, Union
 from diffusers.models.controlnets import ControlNetUnionModel
 from diffusers.pipelines.auto_pipeline import (
@@ -42,7 +43,13 @@ class RBLNAutoPipelineBase:
     _model_mapping_names = None
     @classmethod
-    def get_rbln_cls(cls, pretrained_model_name_or_path, export=True, **kwargs):
+    def get_rbln_cls(cls, pretrained_model_name_or_path: Union[str, Path], export: bool = None, **kwargs):
+        if isinstance(pretrained_model_name_or_path, Path):
+            pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
+        if export is None:
+            export = not cls._is_compiled_pipeline(pretrained_model_name_or_path, **kwargs)
         if export:
             hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
             rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -66,7 +73,7 @@ class RBLNAutoPipelineBase:
         return rbln_cls
     @classmethod
-    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
         """
         Retrieve the path to the compiled model directory for a given RBLN model.
@@ -86,10 +93,36 @@ class RBLNAutoPipelineBase:
         return model_index_config["_class_name"]
+    @classmethod
+    def _is_compiled_pipeline(
+        cls,
+        pretrained_model_name_or_path: Union[str, Path],
+        cache_dir=None,
+        force_download=False,
+        proxies=None,
+        token=None,
+        local_files_only=False,
+        revision=None,
+        **kwargs,
+    ):
+        config: dict = cls.load_config(
+            pretrained_model_name_or_path,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            local_files_only=local_files_only,
+            revision=revision,
+        )
+        for value in config.values():
+            if isinstance(value, list) and len(value) > 0 and value[0] == "optimum.rbln":
+                return True
+        return False
     @classmethod
     def infer_hf_model_class(
         cls,
-        pretrained_model_or_path,
+        pretrained_model_or_path: Union[str, Path],
         cache_dir=None,
         force_download=False,
         proxies=None,

optimum/rbln/modeling.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
 import rebel
 import torch
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
-from transformers import AutoConfig, PretrainedConfig
+from transformers import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput
 from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -119,9 +119,6 @@ class RBLNModel(RBLNBaseModel):
         # Save configs
         if config is None:
             config = model.config
-            # remote_config
-            if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
-                config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
         if hasattr(model, "can_generate") and model.can_generate():
             import json

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -34,7 +34,7 @@ from .utils.submodule import SubModulesMixin
 if TYPE_CHECKING:
-    from transformers import PreTrainedModel
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
 logger = get_logger(__name__)
@@ -53,6 +53,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
     config_class = AutoConfig
     config_name = "config.json"
     hf_library_name = "transformers"
+    _supports_non_fp32 = False
     def __init__(
         self,
@@ -91,7 +92,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         self.device = torch.device("cpu")
         self.training = False
-        self.dtype = torch.float32
+        self.dtype = rbln_config.torch_dtype
         # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
@@ -342,11 +343,37 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         rbln_config, kwargs = config_cls.initialize_from_kwargs(rbln_config, **kwargs)
         return rbln_config, kwargs
+    @classmethod
+    def _is_compiled(
+        cls,
+        model_id: Union[str, Path],
+        token: Optional[Union[bool, str]] = None,
+        revision: Optional[str] = None,
+        force_download: bool = False,
+        cache_dir: Optional[str] = None,
+        subfolder: str = "",
+        local_files_only: bool = False,
+    ) -> bool:
+        # Check if the model is already compiled.
+        try:
+            cls._load_compiled_model_dir(
+                model_id=model_id,
+                token=token,
+                revision=revision,
+                force_download=force_download,
+                cache_dir=cache_dir,
+                subfolder=subfolder,
+                local_files_only=local_files_only,
+            )
+            return True
+        except (FileNotFoundError, KeyError):
+            return False
     @classmethod
     def from_pretrained(
         cls: Type["RBLNBaseModel"],
         model_id: Union[str, Path],
-        export: bool = False,
+        export: bool = None,
         rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
         **kwargs: Any,
     ) -> "RBLNBaseModel":
@@ -356,7 +383,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         Args:
             model_id: The model id of the pre-trained model to be loaded. It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
-            export: A boolean flag to indicate whether the model should be compiled.
+            export: A boolean flag to indicate whether the model should be compiled. If None, it will be determined based on the existence of the compiled model files in the model_id.
             rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
                 For detailed configuration options, see the specific model's configuration class documentation.
@@ -368,6 +395,18 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         if isinstance(model_id, Path):
             model_id = model_id.as_posix()
+        if export is None:
+            export = not cls._is_compiled(
+                model_id=model_id,
+                token=kwargs.get("token"),
+                revision=kwargs.get("revision"),
+                force_download=kwargs.get("force_download", False),
+                cache_dir=kwargs.get("cache_dir"),
+                subfolder=kwargs.get("subfolder", ""),
+                local_files_only=kwargs.get("local_files_only", False),
+            )
         from_pretrained_method = cls._export if export else cls._from_pretrained
         return from_pretrained_method(model_id=model_id, **kwargs, rbln_config=rbln_config)
@@ -400,8 +439,21 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         return compiled_model
     @classmethod
-    def update_rbln_config(cls, **others) -> RBLNModelConfig:
-        rbln_config = cls._update_rbln_config(**others)
+    def update_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNModelConfig,
+    ) -> RBLNModelConfig:
+        rbln_config.torch_dtype = model.dtype
+        if not cls._supports_non_fp32 and rbln_config.torch_dtype != torch.float32:
+            raise NotImplementedError(
+                f"Currently, {cls.__name__} does not support non-fp32 dtype. Please use float32 dtype."
+            )
+        rbln_config = cls._update_rbln_config(
+            preprocessors=preprocessors, model=model, model_config=model_config, rbln_config=rbln_config
+        )
         rbln_config.freeze()
         if rbln_config.rbln_model_cls_name != cls.__name__:
             raise NameError(
@@ -444,12 +496,12 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         # This method mimics the interface of torch.nn.Module.parameters()
         # specifically for code that uses `next(model.parameters())` to infer
-        # the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
+        # the device or dtype. It yields a single dummy tensor on CPU with model dtype.
         # Warning:
         #     This does NOT yield the actual model parameters used by the RBLN runtime.
         #     Code relying on iterating through all model parameters will not work as expected.
-        yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
+        yield torch.tensor([1.0], dtype=self.dtype, device=torch.device("cpu"))
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)

optimum/rbln/transformers/models/auto/auto_factory.py CHANGED Viewed

@@ -14,9 +14,10 @@
 import importlib
 import inspect
 import warnings
-from typing import Type
+from pathlib import Path
+from typing import Any, Type, Union
-from transformers import AutoConfig, PretrainedConfig
+from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
 from transformers.dynamic_module_utils import get_class_from_dynamic_module
 from transformers.models.auto.auto_factory import _get_model_class
@@ -43,10 +44,10 @@ class _BaseAutoModelClass:
     @classmethod
     def get_rbln_cls(
         cls,
-        pretrained_model_name_or_path,
-        *args,
-        export=True,
-        **kwargs,
+        pretrained_model_name_or_path: Union[str, Path],
+        *args: Any,
+        export: bool = None,
+        **kwargs: Any,
     ):
         """
         Determine the appropriate RBLN model class based on the given model ID and configuration.
@@ -59,6 +60,20 @@ class _BaseAutoModelClass:
         Returns:
             RBLNBaseModel: The corresponding RBLN model class.
         """
+        if isinstance(pretrained_model_name_or_path, Path):
+            pretrained_model_name_or_path = pretrained_model_name_or_path.as_posix()
+        if export is None:
+            export = not RBLNBaseModel._is_compiled(
+                model_id=pretrained_model_name_or_path,
+                token=kwargs.get("token"),
+                revision=kwargs.get("revision"),
+                force_download=kwargs.get("force_download", False),
+                cache_dir=kwargs.get("cache_dir"),
+                subfolder=kwargs.get("subfolder", ""),
+                local_files_only=kwargs.get("local_files_only", False),
+            )
         if export:
             hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
             rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
@@ -85,9 +100,9 @@ class _BaseAutoModelClass:
     @classmethod
     def infer_hf_model_class(
         cls,
-        pretrained_model_name_or_path,
-        *args,
-        **kwargs,
+        pretrained_model_name_or_path: Union[str, Path],
+        *args: Any,
+        **kwargs: Any,
     ):
         """
         Infer the HuggingFace model class based on the configuration or model name.
@@ -140,7 +155,7 @@ class _BaseAutoModelClass:
         return model_class
     @classmethod
-    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path: Union[str, Path], **kwargs):
         """
         Retrieve the path to the compiled model directory for a given RBLN model.
@@ -163,17 +178,17 @@ class _BaseAutoModelClass:
         return rbln_config.rbln_model_cls_name
     @classmethod
-    def from_pretrained(cls, model_id, *args, **kwargs):
+    def from_pretrained(cls, model_id: Union[str, Path], *args, **kwargs):
         rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
         return rbln_cls.from_pretrained(model_id, *args, **kwargs)
     @classmethod
-    def from_model(cls, model, *args, **kwargs):
+    def from_model(cls, model: PreTrainedModel, *args, **kwargs):
         rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
         return rbln_cls.from_model(model, *args, **kwargs)
     @staticmethod
-    def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
+    def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
         """
         Register a new RBLN model class.

optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py CHANGED Viewed

@@ -1066,7 +1066,7 @@ class RotaryEmbedding(nn.Module):
             rope_type = "default"
         inv_freq, attention_scaling = ROPE_INIT_FUNCTIONS[rope_type](config, max_seq_len_cached)
-        cache_position = torch.arange(0, max_seq_len_cached, dtype=torch.float32)
+        cache_position = torch.arange(0, max_seq_len_cached)
         cache_position_expanded = cache_position[:, None]
         if rope_type == "dynamic":
@@ -1085,8 +1085,8 @@ class RotaryEmbedding(nn.Module):
     def forward(self, x, seq_len):
         return (
-            self._cos_cached[:seq_len].to(dtype=x.dtype),
-            self._sin_cached[:seq_len].to(dtype=x.dtype),
+            self._cos_cached[:seq_len].to(dtype=torch.float32),
+            self._sin_cached[:seq_len].to(dtype=torch.float32),
         )
@@ -1116,8 +1116,11 @@ def rotate_half(x):
 def apply_rotary_pos_emb(q, k, cos, sin):
     """Applies Rotary Position Embedding to the query and key tensors."""
+    dtype = q.dtype
     q_embed = (q * cos) + (rotate_half(q) * sin)
     k_embed = (k * cos) + (rotate_half(k) * sin)
+    q_embed = q_embed.to(dtype)
+    k_embed = k_embed.to(dtype)
     return q_embed, k_embed

optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py CHANGED Viewed

@@ -317,7 +317,13 @@ class RBLNRuntimeModel(RBLNPytorchRuntime):
         # Initialize attention mask for chunked processing
         chunked_attention_mask = (
-            torch.zeros(1, 1, self.rbln_config.prefill_chunk_size, self.rbln_config.max_seq_len, dtype=torch.float32)
+            torch.zeros(
+                1,
+                1,
+                self.rbln_config.prefill_chunk_size,
+                self.rbln_config.max_seq_len,
+                dtype=self.rbln_config.torch_dtype,
+            )
             if self.rbln_config.use_attention_mask
             else None
         )

optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
 import rebel
 import torch
 from rebel.compile_context import CompileContext
-from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
+from transformers import AutoModel, AutoModelForCausalLM, PretrainedConfig, PreTrainedModel
 from transformers.modeling_outputs import BaseModelOutputWithPast
 from transformers.modeling_utils import no_init_weights
@@ -33,7 +33,7 @@ from ...modeling_attention_utils import (
     validate_sliding_window,
 )
 from ...modeling_outputs import RBLNDecoderOnlyOutput
-from ...utils.rbln_quantization import prepare_model_for_quantization
+from ...utils.rbln_quantization import get_quantized_model
 from .configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
 from .decoderonly_architecture import DecoderOnlyWrapper
 from .decoderonly_runtime_utils import RBLNPageTableManager, RBLNRuntimeModel
@@ -72,6 +72,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     auto_model_class = AutoModel
     _decoder_wrapper_cls = DecoderOnlyWrapper
     _use_rotary_emb = True
+    _supports_non_fp32 = True
     def __post_init__(self, **kwargs):
         if self.rbln_config.use_inputs_embeds:
@@ -86,10 +87,8 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     def setup_runtime(self):
         # Initialize resources to be used across Runtime instances (prefill and decode phases)
         page_table_manager = RBLNPageTableManager(self.rbln_config)
-        dec_attn_mask = torch.zeros(
-            self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=torch.float32
-        )
-        out_buffers = [torch.empty(self.prefill_output_size, dtype=torch.float32, device="cpu")]
+        dec_attn_mask = torch.zeros(self.rbln_config.batch_size, 1, 1, self.rbln_config.max_seq_len, dtype=self.dtype)
+        out_buffers = [torch.empty(self.prefill_output_size, dtype=self.dtype)]
         common_kwargs = {
             "main_input_name": "inputs_embeds" if self.rbln_config.use_inputs_embeds else "input_ids",
@@ -143,35 +142,17 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
     ):
         kwargs = cls.update_kwargs(kwargs)
-        if config is None:
-            config = AutoConfig.from_pretrained(
-                model_id,
-                use_auth_token=use_auth_token,
-                revision=revision,
-                force_download=force_download,
-                cache_dir=cache_dir,
-                trust_remote_code=trust_remote_code,
-                **kwargs,
-            )
-            if config.torch_dtype == torch.bfloat16:
-                # FIXME: bfloat16 is not supported by rebel-compiler
-                config.torch_dtype = torch.float32
-        with no_init_weights():
-            model = cls.auto_model_class.from_config(config)
-        model = prepare_model_for_quantization(
-            model,
+        return get_quantized_model(
+            cls.auto_model_class,
             model_id,
-            kwargs.get("num_hidden_layers"),
             use_auth_token=use_auth_token,
             revision=revision,
             cache_dir=cache_dir,
             force_download=force_download,
             local_files_only=local_files_only,
             rbln_quantization=rbln_config.quantization,
+            **kwargs,
         )
-        return model
     def __getattr__(self, __name: str) -> Any:
         # Special method to delegate attribute access to the original Huggingface LM class.
@@ -365,7 +346,7 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
         input_info = []
         if rbln_config.use_inputs_embeds:
-            input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], "float32"))
+            input_info.append(("inputs_embeds", [batch_size, query_length, hidden_size], rbln_config.torch_dtype))
         else:
             input_info.append(("input_ids", [batch_size, query_length], "int64"))
@@ -384,16 +365,16 @@ class RBLNDecoderOnlyModel(RBLNModel, RBLNDecoderOnlyFlashAttentionMixin):
         if rbln_config.use_attention_mask:
             if rbln_config.use_position_ids:
-                input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], "float32"))
+                input_info.append(("attention_mask", [batch_size, rbln_config.max_seq_len], rbln_config.torch_dtype))
             else:
                 input_info.append(
-                    ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], "float32")
+                    ("attention_mask", [batch_size, 1, query_length, rbln_config.max_seq_len], rbln_config.torch_dtype)
                 )
         if rbln_config.use_position_ids:
             input_info.append(("position_ids", [batch_size, query_length], "int32"))
-        kvcache_dtype = "float32"
+        kvcache_dtype = rbln_config.torch_dtype
         if rbln_config.quantization and rbln_config.quantization.kv_caches == "fp8":
             kvcache_dtype = "float8_e4m3fn"

optimum/rbln/transformers/models/gemma3/modeling_gemma3.py CHANGED Viewed

@@ -345,6 +345,7 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
     """
     _decoder_wrapper_cls = Gemma3ForCausalLMWrapper
+    _supports_non_fp32 = False
     def setup_runtime(self):
         # Initialize shared resources to be used across Runtime instances (prefill and decode phases)

optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py CHANGED Viewed

@@ -357,10 +357,16 @@ class _GroundingDinoMultiscaleDeformableAttention(torch.nn.Module):
         batch_size, num_queries, _ = hidden_states.shape
         batch_size, sequence_length, _ = encoder_hidden_states.shape
         # Ignore copy
-        if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
-            raise ValueError(
-                "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
+        if torch.compiler.is_exporting():
+            torch._check(
+                (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum().item() == sequence_length,
+                "Make sure to align the spatial shapes with the sequence length of the encoder hidden states",
             )
+        else:
+            if (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() != sequence_length:
+                raise ValueError(
+                    "Make sure to align the spatial shapes with the sequence length of the encoder hidden states"
+                )
         value = self.value_proj(encoder_hidden_states)
         if attention_mask is not None:

optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py CHANGED Viewed

@@ -372,6 +372,8 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
         ```
     """
+    _supports_non_fp32 = False
     auto_model_class = AutoModelForVision2Seq
     _rbln_submodules = [
         {"name": "visual"},

optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py CHANGED Viewed

@@ -162,7 +162,13 @@ class TimeSeriesTransformersDecoder(nn.Module):
         attention_mask = _prepare_4d_causal_attention_mask(attention_mask, input_shape, inputs_embeds, cache_position)
         hidden_states = self.value_embedding(inputs_embeds)
-        embed_pos = self.embed_positions.weight[cache_position + self.config.context_length]
+        embed_idx = cache_position + self.config.context_length
+        if torch.compiler.is_exporting():
+            embed_idx = embed_idx.item()
+            torch._check_is_size(embed_idx)
+            torch._check(embed_idx >= 0)
+            torch._check(embed_idx < len(self.embed_positions.weight))
+        embed_pos = self.embed_positions.weight[embed_idx]
         hidden_states = self.layernorm_embedding(hidden_states + embed_pos)
         # iterate decoder_layer

optimum/rbln/transformers/utils/rbln_quantization.py CHANGED Viewed

@@ -14,18 +14,23 @@
 import glob
 import os
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type, Union
 import torch
 from huggingface_hub import hf_hub_download, list_repo_files
 from safetensors.torch import load_file
 from torch.nn import Linear, Parameter
 from torch.nn import functional as F
+from transformers import AutoConfig
+from transformers.modeling_utils import get_state_dict_dtype, no_init_weights
 from ...configuration_utils import RBLNSerializableConfigProtocol
 from ...utils.logging import get_logger
+if TYPE_CHECKING:
+    from transformers.models.auto.modeling_auto import _BaseAutoModelClass
 logger = get_logger()
@@ -138,22 +143,31 @@ class QuantizedLayerFactory:
         return create_fp8linear(layer, self.quantization_config)
-def prepare_model_for_quantization(
-    model: torch.nn.Module,
+def get_quantized_model(
+    hf_auto_model_class: Type["_BaseAutoModelClass"],
     model_id: str,
-    n_layer: Optional[int] = None,
     use_auth_token: Optional[Union[bool, str]] = None,
     revision: Optional[str] = None,
     cache_dir: Optional[str] = None,
     force_download: bool = False,
     local_files_only: bool = False,
     rbln_quantization: Optional[RBLNQuantizationConfig] = None,
-) -> torch.nn.Module:
+    **kwargs,
+):
     """
-    Prepare the model for quantization by updating specified linear layers to quantized (qlinear) layers.
+    Get a quantized model from a model class and model id.
     """
+    # torch_dtype should not be passed to AutoConfig.from_pretrained
+    # since it doesn't support 'auto'
+    torch_dtype = kwargs.pop("torch_dtype", None)
+    if torch_dtype is not None:
+        logger.warning(
+            "torch_dtype is not supported for quantized models. "
+            "It will be ignored and the dtype of the model will be determined by the weights."
+        )
+        torch_dtype = None
-    # 1. Load weight files
+    # get paths of safetensors files in the model repo
     safetensor_files = load_weight_files(
         model_id,
         use_auth_token=use_auth_token,
@@ -163,17 +177,31 @@ def prepare_model_for_quantization(
         local_files_only=local_files_only,
     )
-    # 2. Update linear layers based on the quantization config
-    update_layers_to_quantize(model, rbln_quantization)
+    # load safetensors files into memory
+    safetensors = [load_file(safetensor_file) for safetensor_file in safetensor_files]
+    # get the dtype of the model from the first safetensor file
+    torch_dtype = get_state_dict_dtype(safetensors[0])
-    # 3. Load weights into model parameters
-    load_weights_from_files(
-        model,
-        safetensor_files,
-        n_layer,
-        rbln_quantization=rbln_quantization,
+    config = AutoConfig.from_pretrained(
+        model_id,
+        use_auth_token=use_auth_token,
+        revision=revision,
+        cache_dir=cache_dir,
+        force_download=force_download,
+        local_files_only=local_files_only,
+        **kwargs,
     )
+    with no_init_weights():
+        model = hf_auto_model_class.from_config(config, torch_dtype=torch_dtype)
+    # Quantize the model
+    update_layers_to_quantize(model, rbln_quantization)
+    # Load weights into the model
+    load_weights_from_files(model, safetensors, rbln_quantization)
     return model
@@ -372,32 +400,26 @@ def canonicalize_checkpoint_items(
 def load_weights_from_files(
     model: torch.nn.Module,
-    safetensor_files: list[str],
-    n_layer: Optional[int] = None,
+    safetensors: List[Dict[str, torch.Tensor]],
     rbln_quantization: Optional[RBLNQuantizationConfig] = None,
 ):
     """
-    Load safetensor file data directly into the model from provided safetensor files,
-    filtering by layer if n_layer is provided.
+    Load safetensor file data directly into the model from provided safetensor files.
     """
     model_params = dict(model.named_parameters(recurse=True))
     model_buffers = dict(model.named_buffers(recurse=True))
-    target_layers = list(range(n_layer)) if n_layer is not None else None
     unloaded_keys = []
     loaded_input_scale = False
     loaded_kv_scale = False
     loaded_weight_scale = False
-    for safetensor_file in safetensor_files:
-        file_data = load_file(safetensor_file)
+    for safetensor in safetensors:
         # Normalize all (key, tensor) pairs to the internal schema
         normalized_items = canonicalize_checkpoint_items(
             model=model,
-            items=file_data.items(),
+            items=safetensor.items(),
             rbln_quantization=rbln_quantization,
         )
@@ -410,12 +432,6 @@ def load_weights_from_files(
             if key.endswith("k_scale") or key.endswith("v_scale"):
                 loaded_kv_scale = True
-            # Filter by layer index if requested
-            if target_layers is not None:
-                parts = key.split(".")
-                if len(parts) > 2 and parts[2].isdigit() and (int(parts[2]) not in target_layers):
-                    continue
             # Copy into parameters or buffers
             if key in model_params:
                 # Ensure dtype compatibility

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.8.3rc0
+Version: 0.8.4a1
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a1.dist-info}/RECORD RENAMED Viewed

@@ -1,10 +1,10 @@
 optimum/rbln/__init__.py,sha256=32ouGKDGus9k5_kD27CxP8jIQOw66zpDTfS0xs1XlfE,18298
-optimum/rbln/__version__.py,sha256=boIaJ8T6HCT9Qh8wBU3n-6ZyjtAKYgztQh0WMaN7BxM,714
-optimum/rbln/configuration_utils.py,sha256=fE3HlZblxukKSdS-4VofjuyCAiqwPMX8bqXpOiTZp4g,33926
-optimum/rbln/modeling.py,sha256=jMiJy9PGjZpXpAmRTFD5fTuj8xEbLCUmncIxGD6XWLk,14338
-optimum/rbln/modeling_base.py,sha256=txBab-zVXcjqnF2gZJBzhrp5ruA3vwt3hjls0Q2S_0w,25492
+optimum/rbln/__version__.py,sha256=Xldcu_i01nl8cPxjp-cO8CxxNYyVzFEpw4QQPEW-cj4,712
+optimum/rbln/configuration_utils.py,sha256=WNubd8EJIrdBkLOGT2UJJorgNL3lzhjg3a4bihAIptY,34761
+optimum/rbln/modeling.py,sha256=cAIPWEw5DGzUWeqjCbocRhU6OO3jyhVGW60AmBLh1Nw,14134
+optimum/rbln/modeling_base.py,sha256=97ju0uHJXB7PaorKaspf-FbLfsaHy0HwRVLJqtVscXA,27574
 optimum/rbln/diffusers/__init__.py,sha256=1tgU_xWA42BmInqu9bBz_5R_E9TGhhK3mI06YlaiTLg,7232
-optimum/rbln/diffusers/modeling_diffusers.py,sha256=TAuMb7PSMjNwK7mh5ItE_CtAEgYeZKI27XkFFmxjHlQ,19902
+optimum/rbln/diffusers/modeling_diffusers.py,sha256=3bzL0ZH7XyS8nGMWRSMIGjl9H3H2fhiZgmPaIF50mwg,20464
 optimum/rbln/diffusers/configurations/__init__.py,sha256=vMRnPY4s-Uju43xP038D2EA18X_mhy2YfsZVpSU-VoA,1322
 optimum/rbln/diffusers/configurations/models/__init__.py,sha256=7q95gtgDzCeIBogGw8SLQoHT4Wch7vpLJVF2UQovuoo,567
 optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py,sha256=ADS4SGZbwY6fy3SVNhgo3Zg4KxzAAGq5_zsJ97Dezh4,3201
@@ -36,7 +36,7 @@ optimum/rbln/diffusers/models/transformers/transformer_sd3.py,sha256=yF7sS0Qvawo
 optimum/rbln/diffusers/models/unets/__init__.py,sha256=MaICuK9CWjgzejXy8y2NDrphuEq1rkzanF8u45k6O5I,655
 optimum/rbln/diffusers/models/unets/unet_2d_condition.py,sha256=v3WS9EGKROE_QClXrxC7rmRko1BspAvAbeIfh83LK88,15832
 optimum/rbln/diffusers/pipelines/__init__.py,sha256=r8mu21102cKXdkG1II9tpfpUS6wuyren2oK9y_MptZY,3703
-optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=zFDXbO9Iv0LO7maefV82dmi5Ta6L9oZxY09QFVX6F_Q,9511
+optimum/rbln/diffusers/pipelines/auto_pipeline.py,sha256=DaDWla59LhKGv7h8sdnJrwYaxvcwnO3-qFc47NHvx20,10644
 optimum/rbln/diffusers/pipelines/controlnet/__init__.py,sha256=n1Ef22TSeax-kENi_d8K6wGGHSNEo9QkUeygELHgcao,983
 optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py,sha256=3S9dogIHW8Bqg5kIlCudhCQG-4g3FcdOPEWhBOf7CJA,4059
 optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py,sha256=G96bh4D9Cu-w4F9gZBQF6wNzhJQv9kvI34ZFsuEDjSw,35714
@@ -83,7 +83,7 @@ optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py,sha25
 optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py,sha256=z7LJiVJPmnlCM3mcyhPJP8AufSrxO_dsPeJ51onq-Nc,833
 optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py,sha256=FIKEVWpIt6-JQX9B_rAfCrAPqdUHtR2i8D_X2k7639E,1498
 optimum/rbln/transformers/models/auto/__init__.py,sha256=tdYqXkg9xBGNr4fZjH7_O3qRVbHvpEVjrJ6wtNUMMJM,1150
-optimum/rbln/transformers/models/auto/auto_factory.py,sha256=1CA52xV2dS1Uzumcgqe4zobdpoi-Xt2oNjP3uLFtm08,8020
+optimum/rbln/transformers/models/auto/auto_factory.py,sha256=9oaynN5f6aL6BTgDu5xF3b-5lz9eFuzLOdfVaZwIwvc,8834
 optimum/rbln/transformers/models/auto/modeling_auto.py,sha256=SMsWnD8f7VhKmh7h_S2voksEWlNccfF4fQ7AmwLYq6U,4790
 optimum/rbln/transformers/models/bart/__init__.py,sha256=fVo-gZEmJ0yxkIxEX6ciuRAGgXNyuvaXE2s88bhbjAE,830
 optimum/rbln/transformers/models/bart/bart_architecture.py,sha256=mAepjL0paPMK180vGTTCxXQ-hVZ1DD6JR-GvVNGJLqY,6268
@@ -105,10 +105,10 @@ optimum/rbln/transformers/models/colpali/configuration_colpali.py,sha256=eDWPVlo
 optimum/rbln/transformers/models/colpali/modeling_colpali.py,sha256=v9rPLmNx-BQZhDFhKnr2kmARElTtKdFZCgFIU4m-HPw,15703
 optimum/rbln/transformers/models/decoderonly/__init__.py,sha256=w3VZOIBYaHXVdnuhK4y0zWAj0IAv7_5LGTJYaz9oYmI,1056
 optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py,sha256=H2i9Iefy-q5X-0BLWQ-CrxK8ZoT3p9t0lt_3r4TFSCY,15182
-optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=cGkhGc8XlseMWqDCrt13z0Itn9b0emZ2PjHI-1TP0wI,42685
-optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=9acEQxGRzd21YkzxRchkhqxqpX7emQHZigFg60BIulc,19902
+optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py,sha256=L5LArhjN36fTdiwrUABgn3cnS7hh4SVCF4FMHBbiLZU,42760
+optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py,sha256=v3mfIlQImQkYYr-rPn7rQR3GYdVUhALRttEduLI7H9c,20012
 optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py,sha256=4D89IF0yQju_Dp_vLJN_dBkpe2U_LMWaUciYx57D-0M,3379
-optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=hu2eJr0CpLHnRPSLhyBhyyC6DfosKmPu7lPjapcBCkE,33061
+optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py,sha256=dAHV9NgdpXHyTJGT0lieXOB3Pzi_NPlR4rqmRtmAWzM,32412
 optimum/rbln/transformers/models/depth_anything/__init__.py,sha256=xvPSIriMJWyNeVYoVB1Z7YqB4kkHOIkaHq7loNps-dk,756
 optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py,sha256=JujBVEUa_zZDXNPr1y-B_PhK5SgFFcY8Ib4EoGjjtmE,989
 optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py,sha256=tTmsVaW9Wb2WD3nKRLwp7swn3hbMvgwUEJwwVIfNYEc,1008
@@ -130,14 +130,14 @@ optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_
 optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=rKjKJhyaIM7YoiLR-q8GAZKIQNzDzcb5X7qf_FJE72M,3398
 optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=fpLDAXCe5paWVsfc0tL59JkRQMRF-WNgIzOIb_QpSLU,6191
 optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py,sha256=vYQ9sjRlkfamxZca_hVMQI0ylKeExsV02gOWaYVMjyg,9640
-optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=zraPjowA8ni9Lb0NrmsiUai2XdOjgYOOpVnIU1n2jGA,24208
+optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=TxbgkvW2Nv0VGdXNXnN_Beas6E_1D9NAH8f09Fo8t0E,24239
 optimum/rbln/transformers/models/gpt2/__init__.py,sha256=SsawHMStE3wYRtqkH5EvdTFkCdX0LLmp-QSKFhEBrHo,740
 optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=iGdHfzG7plekZcIz-Z5U8lRE4SB8gbJJNcFQJ9l8Myg,1533
 optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=MyAWReXmyuHnDpW5HI_TI7psyJZxLujZ9KT5XnNm7nA,2802
 optimum/rbln/transformers/models/gpt2/modeling_gpt2.py,sha256=DhF6hU3oCYGbZ7UijKCsRfTx-VCkTqqqNwqqMSrjqRE,2230
 optimum/rbln/transformers/models/grounding_dino/__init__.py,sha256=DE7DipZGvrKC6b1T77k4I4X3G70ss8mlr-PrZCaohto,307
 optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py,sha256=b6aeAlAMf0aOoTKAqe5nnBfontu_H3zvIHgOiCNMJ1I,3127
-optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py,sha256=A_YBgvPVHwwKgsGLL0z4MyTKb6Hb6r3y6sU3oVIrKiU,22779
+optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py,sha256=E6HReXGwvSV7YDeetSBuds1rAVSzEeL0AGHYgBOQW6o,23097
 optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py,sha256=bXAOs2QH4sy2UFoFLUSM6u1_VHouUT5COERLQX20F6Y,46897
 optimum/rbln/transformers/models/idefics3/__init__.py,sha256=ulxE7HEfXsNJhd25J9Fvi6vggo9aZH9sLKJjWB6LlzQ,814
 optimum/rbln/transformers/models/idefics3/configuration_idefics3.py,sha256=8BhPLkfE1_ZU0eSm2iTbWQOnVe1q0g99srYHWZM6VJ4,2373
@@ -182,7 +182,7 @@ optimum/rbln/transformers/models/qwen2/modeling_qwen2.py,sha256=VOboPJF1rvvSVWkH
 optimum/rbln/transformers/models/qwen2/qwen2_architecture.py,sha256=XlNAMYAcDLohnSAhIFGKOPuCB5XLgzYs5ABWdeQSaZs,720
 optimum/rbln/transformers/models/qwen2_5_vl/__init__.py,sha256=rAW3DKQUzGL6EMwa5r1iLu94yhpiZpk6zfoD7TtYXrc,865
 optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py,sha256=1yyMFxh1SKsKR7rOjuotPvpSneN2_4a89bYfNk42370,4735
-optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=BfpALuavpdCqe5RuHaNZNo2IDlLjE4SwsoPAlaictgc,26607
+optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py,sha256=hRvA37sPFC9xH1FqnFbtHS9rQOPwAvLYg4zl4oEyK-w,26639
 optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py,sha256=i_UUWhKoFjJ5CCpgeWicqABM23TxMEKPQ354LoZ6iUU,7445
 optimum/rbln/transformers/models/qwen3/__init__.py,sha256=tI4KwvXpD35dUUaa8aLUXpWoU9gJGcmKXeywOlH14ZE,746
 optimum/rbln/transformers/models/qwen3/configuration_qwen3.py,sha256=BFRPggnH4VlsXlOa19C6KAID-bPgQ8ooQ29dvogh5zk,2102
@@ -211,7 +211,7 @@ optimum/rbln/transformers/models/t5/t5_architecture.py,sha256=DlJNrGk35NTBhcp76P
 optimum/rbln/transformers/models/time_series_transformer/__init__.py,sha256=xJaFWQawlwtv4H5tVFcY1pxLYzjHtMAlLq6nXysdkN8,1243
 optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py,sha256=MO-T4pcsea4EOmYeeg0tosUH6w76azqIPyV8Em8CMqw,1621
 optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py,sha256=8orxM-LbShCt2jC8Uyx43cSxWN1CGxamS58pKPjvzxs,17167
-optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py,sha256=XJDjQGbWXUq4ZimNojlcbm3mTDpxUMCl6tkFSzfYFl4,13769
+optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py,sha256=hAZXyXxzSDJMdkI883eefzpjz2L9KTVTRBeOVU8e92k,14038
 optimum/rbln/transformers/models/vit/__init__.py,sha256=CrrkHehfCe3U-_rUS00aMBY7Tncdeh43sNUgVI9Dt_g,807
 optimum/rbln/transformers/models/vit/configuration_vit.py,sha256=x98CxKR1cpKAG7Eh43uuPeGeGn4gS3HcKLPoDL3SWJo,994
 optimum/rbln/transformers/models/vit/modeling_vit.py,sha256=Q8xvX2oG2dC2RYM4ocaS0H70a2q_vQ9DZK2mCdyvxa0,1058
@@ -227,7 +227,7 @@ optimum/rbln/transformers/models/xlm_roberta/__init__.py,sha256=O3o2KzJ8Li3QhB7G
 optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py,sha256=wHRpGTXL9khYqSkKL1IgA7__6_lt9QpOz9tHumjK7fo,1260
 optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py,sha256=EZd3flRUEE38DYtdqEnG70LV7fHhkamRZV51xrVyjYI,1093
 optimum/rbln/transformers/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-optimum/rbln/transformers/utils/rbln_quantization.py,sha256=ARngdvRmeVoOphUU3Md9kT6zS5HDrYdEFYljJwaAaio,21020
+optimum/rbln/transformers/utils/rbln_quantization.py,sha256=pORshQUgTInNaibUtd0HL-T8bKW5wuulZs2q0Oshppc,21659
 optimum/rbln/utils/__init__.py,sha256=ieDBT2VFTt2E0M4v_POLBpuGW9LxSydpb_DuPd6PQqc,712
 optimum/rbln/utils/decorator_utils.py,sha256=xu-TrsNi33SRC2a7DBsyoo6-pEQxWKZPZSmM9QlDe2Y,3745
 optimum/rbln/utils/depreacate_utils.py,sha256=uKxl3ENUCNaZXPnaDQvNxrH8hUIWdBWfZH6BM7ZV__4,385
@@ -238,7 +238,7 @@ optimum/rbln/utils/model_utils.py,sha256=4k5879Kh75m3x_vS4-qOGfqsOiAvc2kdNFFfvsF
 optimum/rbln/utils/runtime_utils.py,sha256=R6uXDbeJP03-FWdd4vthNe2D4aCra5n12E3WB1ifiGM,7933
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=60NGLFvnhjP1DJg1opdb-FVQDsthcLCwWjW_1WQaasU,5280
-optimum_rbln-0.8.3rc0.dist-info/METADATA,sha256=ls15qV7a7bVTpkphb6aHteuBfil7u1xOzkUuysoRPZg,5300
-optimum_rbln-0.8.3rc0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.8.3rc0.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.8.3rc0.dist-info/RECORD,,
+optimum_rbln-0.8.4a1.dist-info/METADATA,sha256=cs0rmwPfLMefC6PHPHGw7XYrZIQVGPP3ax09PhmeUB8,5299
+optimum_rbln-0.8.4a1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.8.4a1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.8.4a1.dist-info/RECORD,,

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.8.3rc0.dist-info → optimum_rbln-0.8.4a1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.8.3rc0__py3-none-any.whl → 0.8.4a1__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.3rc0py3-none-any.whl → 0.8.4a1py3-none-any.whl