PyPI - optimum-rbln - Versions diffs - 0.8.2a4__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

optimum-rbln 0.8.2a4py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (196) hide show

optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py CHANGED Viewed

@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
     from cosmos_guardrail import CosmosSafetyChecker
     from cosmos_guardrail.cosmos_guardrail import (
         COSMOS_GUARDRAIL_CHECKPOINT,
-        Aegis,
         Blocklist,
         GuardrailRunner,
+        LlamaGuard3,
         ModelConfig,
         RetinaFaceFilter,
         SafetyClassifier,
@@ -55,7 +55,7 @@ else:
     COSMOS_GUARDRAIL_CHECKPOINT = None
-    class Aegis(FailToImportCosmosGuardrail): ...
+    class LlamaGuard3(FailToImportCosmosGuardrail): ...
     class Blocklist(FailToImportCosmosGuardrail): ...
@@ -312,33 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
         self.encoder.save_pretrained(checkpoint_id)
-class RBLNAegis(Aegis):
+class RBLNLlamaGuard3(LlamaGuard3):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        base_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        base_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         if is_compiled_dir(checkpoint_id):
             torch.nn.Module.__init__(self)
-            cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+            cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
             self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
-            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
+            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
         else:
-            super().__init__(checkpoint_id, base_model_id, aegis_adapter)
-            model = self.model.merge_and_unload()  # peft merge
+            super().__init__(checkpoint_id, base_model_id)
+            model = self.model
             del self.model
-            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
+            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
         self.rbln_config = rbln_config
         self.dtype = torch.bfloat16
         self.device = torch.device("cpu")
     def save_pretrained(self, checkpoint_id: str):
-        cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+        cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
         self.model.save_pretrained(cache_dir)
         self.tokenizer.save_pretrained(cache_dir)
@@ -351,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        aegis_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         torch.nn.Module.__init__(self)
@@ -369,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
         self.text_guardrail = GuardrailRunner(
             safety_models=[
                 Blocklist(COSMOS_GUARDRAIL_CHECKPOINT),  # Changed since it cannot be saved
-                RBLNAegis(
+                RBLNLlamaGuard3(
                     checkpoint_id=checkpoint_id,
-                    base_model_id=aegis_model_id,
-                    aegis_adapter=aegis_adapter_id,
+                    base_model_id=llamaguard_model_id,
                     rbln_config=rbln_config,
                 ),
             ]
@@ -387,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def save_pretrained(self, save_dir: str):
         for text_safety_models in self.text_guardrail.safety_models:
-            if isinstance(text_safety_models, RBLNAegis):
+            if isinstance(text_safety_models, RBLNLlamaGuard3):
                 text_safety_models.save_pretrained(save_dir)
         for video_safety_models in self.video_guardrail.safety_models:

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py CHANGED Viewed

@@ -87,8 +87,38 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
+        """
+        Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
+        This method has two distinct operating modes:
+            - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
+            - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
+        It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
+        Args:
+            model_id (`str`):
+                The model ID or path to the pretrained model to load. Can be either:
+                - A model ID from the HuggingFace Hub
+                - A local path to a saved model directory
+            export:
+                If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
+                If False, loads an already compiled RBLN model from `model_id` without recompilation.
+            safety_checker:
+                Optional custom safety checker to use instead of the default one. Only used when `export=True`.
+            rbln_config:
+                Configuration options for RBLN compilation. Can include settings for specific submodules
+                such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
+                pipeline being compiled.
+            kwargs:
+                Additional arguments to pass to the underlying diffusion pipeline constructor or the
+                RBLN compilation process. These may include parameters specific to individual submodules
+                or the particular diffusion pipeline being used.
+        """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:
             safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py CHANGED Viewed

@@ -87,8 +87,38 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
+        """
+        Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
+        This method has two distinct operating modes:
+            - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
+            - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
+        It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
+        Args:
+            model_id (`str`):
+                The model ID or path to the pretrained model to load. Can be either:
+                - A model ID from the HuggingFace Hub
+                - A local path to a saved model directory
+            export:
+                If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
+                If False, loads an already compiled RBLN model from `model_id` without recompilation.
+            safety_checker:
+                Optional custom safety checker to use instead of the default one. Only used when `export=True`.
+            rbln_config:
+                Configuration options for RBLN compilation. Can include settings for specific submodules
+                such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
+                pipeline being compiled.
+            kwargs:
+                Additional arguments to pass to the underlying diffusion pipeline constructor or the
+                RBLN compilation process. These may include parameters specific to individual submodules
+                or the particular diffusion pipeline being used.
+        """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:
             safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py CHANGED Viewed

@@ -22,12 +22,7 @@ from diffusers import (
     UNet2DConditionModel,
     VQModel,
 )
-from transformers import (
-    CLIPImageProcessor,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    CLIPVisionModelWithProjection,
-)
+from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
 from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
 from ...modeling_diffusers import RBLNDiffusionMixin

optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline_stable_video_diffusion import RBLNStableVideoDiffusionPipeline

optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from diffusers import StableVideoDiffusionPipeline
+from ....utils.logging import get_logger
+from ...configurations import RBLNStableVideoDiffusionPipelineConfig
+from ...modeling_diffusers import RBLNDiffusionMixin
+logger = get_logger(__name__)
+class RBLNStableVideoDiffusionPipeline(RBLNDiffusionMixin, StableVideoDiffusionPipeline):
+    """
+    RBLN-accelerated implementation of Stable Video Diffusion pipeline for image-to-video generation.
+    This pipeline compiles Stable Video Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
+    inference for generating videos from images with optimized memory usage and throughput.
+    """
+    original_class = StableVideoDiffusionPipeline
+    _rbln_config_class = RBLNStableVideoDiffusionPipelineConfig
+    _submodules = ["image_encoder", "unet", "vae"]
+    def handle_additional_kwargs(self, **kwargs):
+        compiled_num_frames = self.unet.rbln_config.num_frames
+        if compiled_num_frames is not None:
+            kwargs["num_frames"] = compiled_num_frames
+        compiled_decode_chunk_size = self.vae.rbln_config.decode_chunk_size
+        if compiled_decode_chunk_size is not None:
+            kwargs["decode_chunk_size"] = compiled_decode_chunk_size
+        return kwargs

optimum/rbln/modeling.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
 import rebel
 import torch
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
-from transformers import AutoConfig, PretrainedConfig
+from transformers import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput
 from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -54,13 +54,16 @@ class RBLNModel(RBLNBaseModel):
         pass
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         # Wrap the model if needed.
         return model
     @classmethod
     def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
-        model = cls.wrap_model_if_needed(model, rbln_config)
+        if rbln_config._allow_no_compile_cfgs:
+            return {}
+        model = cls._wrap_model_if_needed(model, rbln_config)
         rbln_compile_config = rbln_config.compile_cfgs[0]
         compiled_model = cls.compile(
             model,
@@ -70,6 +73,22 @@ class RBLNModel(RBLNBaseModel):
         )
         return compiled_model
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: Optional[Any],
+        model: Optional["PreTrainedModel"] = None,
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_config: Optional[RBLNModelConfig] = None,
+    ) -> RBLNModelConfig:
+        # Default implementation: return config as-is
+        # Subclasses should override to set compile_cfgs if needed
+        return rbln_config
+    @classmethod
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
+        return model
     @classmethod
     def from_model(
         cls,
@@ -78,18 +97,20 @@ class RBLNModel(RBLNBaseModel):
         rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         subfolder: str = "",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNModel":
         """
         Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
         This method performs the actual model conversion and compilation process.
         Args:
-            model: The PyTorch model to be compiled. The object must be an instance of the HuggingFace transformers PreTrainedModel class.
-            rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
+            model (PreTrainedModel): The PyTorch model to be compiled.
+                The object must be an instance of the HuggingFace transformers PreTrainedModel class.
+            config (Optional[PretrainedConfig]): The configuration object associated with the model.
+            rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
+                This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
                 For detailed configuration options, see the specific model's configuration class documentation.
-            kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
+            kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
         The method performs the following steps:
@@ -99,8 +120,10 @@ class RBLNModel(RBLNBaseModel):
         4. Saves the compiled model and configurations
         Returns:
-            A RBLN model instance ready for inference on RBLN NPU devices.
+            (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
         """
+        model = cls._reconstruct_model_if_needed(model)
         preprocessors = kwargs.pop("preprocessors", [])
         rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
@@ -119,9 +142,6 @@ class RBLNModel(RBLNBaseModel):
         # Save configs
         if config is None:
             config = model.config
-            # remote_config
-            if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
-                config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
         if hasattr(model, "can_generate") and model.can_generate():
             import json
@@ -147,6 +167,7 @@ class RBLNModel(RBLNBaseModel):
                 model=model,
                 model_save_dir=save_dir,
                 rbln_config=rbln_config,
+                preprocessors=preprocessors,
                 **kwargs,
             )
         else:
@@ -209,6 +230,7 @@ class RBLNModel(RBLNBaseModel):
         **kwargs,
     ) -> "PreTrainedModel":
         kwargs = cls.update_kwargs(kwargs)
         return cls.get_hf_class().from_pretrained(
             model_id,
             subfolder=subfolder,
@@ -227,6 +249,9 @@ class RBLNModel(RBLNBaseModel):
         compiled_models: List[rebel.RBLNCompiledModel],
         rbln_config: RBLNModelConfig,
     ) -> List[rebel.Runtime]:
+        if len(rbln_config.compile_cfgs) == 0:
+            return []
         if DEFAULT_COMPILED_MODEL_NAME not in rbln_config.device_map:
             cls._raise_missing_compiled_file_error([DEFAULT_COMPILED_MODEL_NAME])
@@ -241,31 +266,33 @@ class RBLNModel(RBLNBaseModel):
             for compiled_model in compiled_models
         ]
-    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
+    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
         """
-        Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
+        Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
+        replacement in many cases.
-        This method executes the compiled RBLN model on RBLN NPU devices while maintaining full compatibility
-        with HuggingFace transformers and diffusers APIs. The RBLNModel can be used as a direct substitute
-        for any HuggingFace nn.Module/PreTrainedModel, enabling seamless integration into existing workflows.
+        This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
+        Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
+        `transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` — enabling
+        seamless integration into existing workflows.
         Args:
-            *args: Variable length argument list containing model inputs. The format matches the original
+            args: Variable length argument list containing model inputs. The format matches the original
                 HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
                 transformers models, or sample, timestep for diffusers models).
             return_dict:
                 Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
                 - For transformers models: Uses `self.config.use_return_dict` (typically `True`)
                 - For diffusers models: Defaults to `True`
-            **kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
+            kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
                 matching the original HuggingFace model's interface.
         Returns:
             Model outputs in the same format as the original HuggingFace model.
-            - If `return_dict=True`: Returns a dictionary-like object (e.g., BaseModelOutput,
+            If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
                 CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
-            - If `return_dict=False`: Returns a tuple containing the raw model outputs.
+            If `return_dict=False`, Returns a tuple containing the raw model outputs.
         Note:
             - This method maintains the exact same interface as the original HuggingFace model's forward method

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -34,7 +34,7 @@ from .utils.submodule import SubModulesMixin
 if TYPE_CHECKING:
-    from transformers import PreTrainedModel
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
 logger = get_logger(__name__)
@@ -53,6 +53,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
     config_class = AutoConfig
     config_name = "config.json"
     hf_library_name = "transformers"
+    _supports_non_fp32 = False
     def __init__(
         self,
@@ -70,7 +71,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         self.rbln_config = rbln_config
         if not rbln_config.is_frozen():
             raise RuntimeError("`rbln_config` must be frozen. Please call `rbln_config.freeze()` first.")
         self.compiled_models = rbln_compiled_models
         # Registers the RBLN classes into the transformers AutoModel classes to avoid warnings when creating
@@ -91,7 +91,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         self.device = torch.device("cpu")
         self.training = False
-        self.dtype = torch.float32
+        self.dtype = rbln_config.torch_dtype
         # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
@@ -314,7 +314,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
             rbln_config,
             model_save_dir=model_save_dir,
             subfolder=subfolder,
-            rbln_compiled_models=(None if rbln_config.optimize_host_memory else rbln_compiled_models),
+            rbln_compiled_models=rbln_compiled_models,
             rbln_submodules=rbln_submodules,
             **kwargs,
         )
@@ -342,32 +342,72 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         rbln_config, kwargs = config_cls.initialize_from_kwargs(rbln_config, **kwargs)
         return rbln_config, kwargs
+    @classmethod
+    def _is_compiled(
+        cls,
+        model_id: Union[str, Path],
+        token: Optional[Union[bool, str]] = None,
+        revision: Optional[str] = None,
+        force_download: bool = False,
+        cache_dir: Optional[str] = None,
+        subfolder: str = "",
+        local_files_only: bool = False,
+    ) -> bool:
+        # Check if the model is already compiled.
+        try:
+            cls._load_compiled_model_dir(
+                model_id=model_id,
+                token=token,
+                revision=revision,
+                force_download=force_download,
+                cache_dir=cache_dir,
+                subfolder=subfolder,
+                local_files_only=local_files_only,
+            )
+            return True
+        except (FileNotFoundError, KeyError):
+            return False
     @classmethod
     def from_pretrained(
         cls: Type["RBLNBaseModel"],
         model_id: Union[str, Path],
-        export: bool = False,
+        export: Optional[bool] = None,
         rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNBaseModel":
         """
         The `from_pretrained()` function is utilized in its standard form as in the HuggingFace transformers library.
         User can use this function to load a pre-trained model from the HuggingFace library and convert it to a RBLN model to be run on RBLN NPUs.
         Args:
-            model_id: The model id of the pre-trained model to be loaded. It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
-            export: A boolean flag to indicate whether the model should be compiled.
-            rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
+            model_id (Union[str, Path]): The model id of the pre-trained model to be loaded.
+                It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
+            export (Optional[bool]): A boolean flag to indicate whether the model should be compiled.
+                If None, it will be determined based on the existence of the compiled model files in the model_id.
+            rbln_config (Optional[Union[Dict, RBLNModelConfig]]): Configuration for RBLN model compilation and runtime.
+                This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
                 For detailed configuration options, see the specific model's configuration class documentation.
-            kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
+            kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
         Returns:
-            A RBLN model instance ready for inference on RBLN NPU devices.
+            (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
         """
         if isinstance(model_id, Path):
             model_id = model_id.as_posix()
+        if export is None:
+            export = not cls._is_compiled(
+                model_id=model_id,
+                token=kwargs.get("token"),
+                revision=kwargs.get("revision"),
+                force_download=kwargs.get("force_download", False),
+                cache_dir=kwargs.get("cache_dir"),
+                subfolder=kwargs.get("subfolder", ""),
+                local_files_only=kwargs.get("local_files_only", False),
+            )
         from_pretrained_method = cls._export if export else cls._from_pretrained
         return from_pretrained_method(model_id=model_id, **kwargs, rbln_config=rbln_config)
@@ -392,7 +432,6 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         compiled_model = rebel.compile_from_torch(
             model,
             input_info=rbln_compile_config.input_info,
-            fusion=rbln_compile_config.fusion,
             npu=rbln_compile_config.npu,
             tensor_parallel_size=rbln_compile_config.tensor_parallel_size,
             **kwargs,
@@ -400,8 +439,21 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         return compiled_model
     @classmethod
-    def update_rbln_config(cls, **others) -> RBLNModelConfig:
-        rbln_config = cls._update_rbln_config(**others)
+    def update_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model: "PreTrainedModel",
+        model_config: "PretrainedConfig",
+        rbln_config: RBLNModelConfig,
+    ) -> RBLNModelConfig:
+        rbln_config.torch_dtype = model.dtype
+        if not cls._supports_non_fp32 and rbln_config.torch_dtype != torch.float32:
+            raise NotImplementedError(
+                f"Currently, {cls.__name__} does not support non-fp32 dtype. Please use float32 dtype."
+            )
+        rbln_config = cls._update_rbln_config(
+            preprocessors=preprocessors, model=model, model_config=model_config, rbln_config=rbln_config
+        )
         rbln_config.freeze()
         if rbln_config.rbln_model_cls_name != cls.__name__:
             raise NameError(
@@ -444,12 +496,12 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         # This method mimics the interface of torch.nn.Module.parameters()
         # specifically for code that uses `next(model.parameters())` to infer
-        # the device or dtype. It yields a single dummy tensor on CPU with float32 dtype.
+        # the device or dtype. It yields a single dummy tensor on CPU with model dtype.
         # Warning:
         #     This does NOT yield the actual model parameters used by the RBLN runtime.
         #     Code relying on iterating through all model parameters will not work as expected.
-        yield torch.tensor([1.0], dtype=torch.float32, device=torch.device("cpu"))
+        yield torch.tensor([1.0], dtype=self.dtype, device=torch.device("cpu"))
     def __call__(self, *args, **kwargs):
         return self.forward(*args, **kwargs)
@@ -484,9 +536,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         [`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
         Args:
-            save_directory (`Union[str, Path]`):
+            save_directory (Union[str, Path]):
                 Directory where to save the model file.
-            push_to_hub (`bool`, *optional*, defaults to `False`):
+            push_to_hub (bool):
                 Whether or not to push your model to the HuggingFace model hub after saving it.
         """
@@ -523,10 +575,35 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
             # First copy everything to a temporary directory
             shutil.copytree(real_save_dir, tmp_dir)
-            # If everything succeeded, atomically replace the target directory
+            # If everything succeeded, move files to target directory
             if os.path.exists(save_directory_path):
-                shutil.rmtree(save_directory_path)
-            os.rename(tmp_dir, save_directory_path)
+                # Merge files from tmp_dir into existing directory
+                def _merge_dir(src_root: str, dst_root: str):
+                    for name in os.listdir(src_root):
+                        src_item = os.path.join(src_root, name)
+                        dst_item = os.path.join(dst_root, name)
+                        if os.path.islink(src_item) or os.path.isfile(src_item):
+                            os.makedirs(os.path.dirname(dst_item), exist_ok=True)
+                            if os.path.isdir(dst_item) and not os.path.islink(dst_item):
+                                shutil.rmtree(dst_item)
+                            os.replace(src_item, dst_item)
+                        elif os.path.isdir(src_item):
+                            if os.path.islink(dst_item) or os.path.isfile(dst_item):
+                                os.remove(dst_item)
+                            os.makedirs(dst_item, exist_ok=True)
+                            _merge_dir(src_item, dst_item)
+                        else:
+                            # Fallback for special file types
+                            os.replace(src_item, dst_item)
+                _merge_dir(tmp_dir, str(save_directory_path))
+                # Remove the temporary directory tree after merge
+                shutil.rmtree(tmp_dir)
+            else:
+                # If target doesn't exist, just rename tmp_dir to target
+                os.rename(tmp_dir, save_directory_path)
         except Exception as e:
             # Clean up the temporary directory if anything fails

optimum-rbln 0.8.2a4__py3-none-any.whl → 0.9.3__py3-none-any.whl

optimum-rbln 0.8.2a4py3-none-any.whl → 0.9.3py3-none-any.whl