PyPI - optimum-rbln - Versions diffs - 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py CHANGED Viewed

@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
     from cosmos_guardrail import CosmosSafetyChecker
     from cosmos_guardrail.cosmos_guardrail import (
         COSMOS_GUARDRAIL_CHECKPOINT,
-        Aegis,
         Blocklist,
         GuardrailRunner,
+        LlamaGuard3,
         ModelConfig,
         RetinaFaceFilter,
         SafetyClassifier,
@@ -55,7 +55,7 @@ else:
     COSMOS_GUARDRAIL_CHECKPOINT = None
-    class Aegis(FailToImportCosmosGuardrail): ...
+    class LlamaGuard3(FailToImportCosmosGuardrail): ...
     class Blocklist(FailToImportCosmosGuardrail): ...
@@ -127,25 +127,13 @@ class RBLNSigLIPEncoder(SigLIPEncoder):
             # We don't use RBLNSiglipModel, but we need to override get_image_features to return pooler_output
             self.model = RBLNSiglipVisionModel.from_pretrained(
-                self.checkpoint_dir,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
+                self.checkpoint_dir, rbln_config=rbln_config.siglip_encoder
             )
         else:
             super().__init__(model_name, checkpoint_id)
             model = self.model
             del self.model
-            self.model = RBLNSiglipVisionModel.from_model(
-                model,
-                rbln_device=rbln_config.siglip_encoder.device,
-                rbln_image_size=rbln_config.siglip_encoder.image_size,
-                rbln_npu=rbln_config.siglip_encoder.npu,
-                rbln_create_runtimes=rbln_config.siglip_encoder.create_runtimes,
-                rbln_activate_profiler=rbln_config.siglip_encoder.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.siglip_encoder.optimize_host_memory,
-            )
+            self.model = RBLNSiglipVisionModel.from_model(model, rbln_config=rbln_config.siglip_encoder)
         self.rbln_config = rbln_config
         # Override get_image_features to return pooler_output
@@ -324,47 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
         self.encoder.save_pretrained(checkpoint_id)
-class RBLNAegis(Aegis):
+class RBLNLlamaGuard3(LlamaGuard3):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        base_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        base_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         if is_compiled_dir(checkpoint_id):
             torch.nn.Module.__init__(self)
-            cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+            cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
             self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
-            self.model = RBLNAutoModelForCausalLM.from_pretrained(
-                cache_dir,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
         else:
-            super().__init__(checkpoint_id, base_model_id, aegis_adapter)
-            model = self.model.merge_and_unload()  # peft merge
+            super().__init__(checkpoint_id, base_model_id)
+            model = self.model
             del self.model
-            self.model = RBLNAutoModelForCausalLM.from_model(
-                model,
-                rbln_tensor_parallel_size=4,
-                rbln_device=rbln_config.aegis.device,
-                rbln_create_runtimes=rbln_config.aegis.create_runtimes,
-                rbln_npu=rbln_config.aegis.npu,
-                rbln_activate_profiler=rbln_config.aegis.activate_profiler,
-                rbln_optimize_host_memory=rbln_config.aegis.optimize_host_memory,
-            )
+            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
         self.rbln_config = rbln_config
         self.dtype = torch.bfloat16
         self.device = torch.device("cpu")
     def save_pretrained(self, checkpoint_id: str):
-        cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+        cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
         self.model.save_pretrained(cache_dir)
         self.tokenizer.save_pretrained(cache_dir)
@@ -377,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        aegis_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         torch.nn.Module.__init__(self)
@@ -395,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
         self.text_guardrail = GuardrailRunner(
             safety_models=[
                 Blocklist(COSMOS_GUARDRAIL_CHECKPOINT),  # Changed since it cannot be saved
-                RBLNAegis(
+                RBLNLlamaGuard3(
                     checkpoint_id=checkpoint_id,
-                    base_model_id=aegis_model_id,
-                    aegis_adapter=aegis_adapter_id,
+                    base_model_id=llamaguard_model_id,
                     rbln_config=rbln_config,
                 ),
             ]
@@ -413,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def save_pretrained(self, save_dir: str):
         for text_safety_models in self.text_guardrail.safety_models:
-            if isinstance(text_safety_models, RBLNAegis):
+            if isinstance(text_safety_models, RBLNLlamaGuard3):
                 text_safety_models.save_pretrained(save_dir)
         for video_safety_models in self.video_guardrail.safety_models:

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py CHANGED Viewed

@@ -87,8 +87,38 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
+        """
+        Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
+        This method has two distinct operating modes:
+            - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
+            - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
+        It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
+        Args:
+            model_id (`str`):
+                The model ID or path to the pretrained model to load. Can be either:
+                - A model ID from the HuggingFace Hub
+                - A local path to a saved model directory
+            export:
+                If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
+                If False, loads an already compiled RBLN model from `model_id` without recompilation.
+            safety_checker:
+                Optional custom safety checker to use instead of the default one. Only used when `export=True`.
+            rbln_config:
+                Configuration options for RBLN compilation. Can include settings for specific submodules
+                such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
+                pipeline being compiled.
+            kwargs:
+                Additional arguments to pass to the underlying diffusion pipeline constructor or the
+                RBLN compilation process. These may include parameters specific to individual submodules
+                or the particular diffusion pipeline being used.
+        """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:
             safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py CHANGED Viewed

@@ -87,8 +87,38 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
+        """
+        Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
+        This method has two distinct operating modes:
+            - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
+            - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
+        It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
+        Args:
+            model_id (`str`):
+                The model ID or path to the pretrained model to load. Can be either:
+                - A model ID from the HuggingFace Hub
+                - A local path to a saved model directory
+            export:
+                If True, takes a PyTorch model from `model_id` and compiles it for RBLN NPU execution.
+                If False, loads an already compiled RBLN model from `model_id` without recompilation.
+            safety_checker:
+                Optional custom safety checker to use instead of the default one. Only used when `export=True`.
+            rbln_config:
+                Configuration options for RBLN compilation. Can include settings for specific submodules
+                such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
+                pipeline being compiled.
+            kwargs:
+                Additional arguments to pass to the underlying diffusion pipeline constructor or the
+                RBLN compilation process. These may include parameters specific to individual submodules
+                or the particular diffusion pipeline being used.
+        """
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:
             safety_checker = RBLNCosmosSafetyChecker(rbln_config=rbln_config.safety_checker)

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py CHANGED Viewed

@@ -22,12 +22,7 @@ from diffusers import (
     UNet2DConditionModel,
     VQModel,
 )
-from transformers import (
-    CLIPImageProcessor,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    CLIPVisionModelWithProjection,
-)
+from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
 from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
 from ...modeling_diffusers import RBLNDiffusionMixin

optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .pipeline_stable_video_diffusion import RBLNStableVideoDiffusionPipeline

optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from diffusers import StableVideoDiffusionPipeline
+from ....utils.logging import get_logger
+from ...configurations import RBLNStableVideoDiffusionPipelineConfig
+from ...modeling_diffusers import RBLNDiffusionMixin
+logger = get_logger(__name__)
+class RBLNStableVideoDiffusionPipeline(RBLNDiffusionMixin, StableVideoDiffusionPipeline):
+    """
+    RBLN-accelerated implementation of Stable Video Diffusion pipeline for image-to-video generation.
+    This pipeline compiles Stable Video Diffusion models to run efficiently on RBLN NPUs, enabling high-performance
+    inference for generating videos from images with optimized memory usage and throughput.
+    """
+    original_class = StableVideoDiffusionPipeline
+    _rbln_config_class = RBLNStableVideoDiffusionPipelineConfig
+    _submodules = ["image_encoder", "unet", "vae"]
+    def handle_additional_kwargs(self, **kwargs):
+        compiled_num_frames = self.unet.rbln_config.num_frames
+        if compiled_num_frames is not None:
+            kwargs["num_frames"] = compiled_num_frames
+        compiled_decode_chunk_size = self.vae.rbln_config.decode_chunk_size
+        if compiled_decode_chunk_size is not None:
+            kwargs["decode_chunk_size"] = compiled_decode_chunk_size
+        return kwargs

optimum/rbln/modeling.py CHANGED Viewed

@@ -19,7 +19,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, get_args, ge
 import rebel
 import torch
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
-from transformers import AutoConfig, PretrainedConfig
+from transformers import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput
 from .configuration_utils import DEFAULT_COMPILED_MODEL_NAME, RBLNModelConfig
@@ -35,8 +35,6 @@ logger = get_logger(__name__)
 class RBLNModel(RBLNBaseModel):
-    _output_class = None
     @classmethod
     def update_kwargs(cls, kwargs):
         # Update user-given kwargs to get proper pytorch model.
@@ -56,13 +54,16 @@ class RBLNModel(RBLNBaseModel):
         pass
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         # Wrap the model if needed.
         return model
     @classmethod
     def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
-        model = cls.wrap_model_if_needed(model, rbln_config)
+        if rbln_config._allow_no_compile_cfgs:
+            return {}
+        model = cls._wrap_model_if_needed(model, rbln_config)
         rbln_compile_config = rbln_config.compile_cfgs[0]
         compiled_model = cls.compile(
             model,
@@ -72,6 +73,22 @@ class RBLNModel(RBLNBaseModel):
         )
         return compiled_model
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: Optional[Any],
+        model: Optional["PreTrainedModel"] = None,
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_config: Optional[RBLNModelConfig] = None,
+    ) -> RBLNModelConfig:
+        # Default implementation: return config as-is
+        # Subclasses should override to set compile_cfgs if needed
+        return rbln_config
+    @classmethod
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
+        return model
     @classmethod
     def from_model(
         cls,
@@ -80,18 +97,20 @@ class RBLNModel(RBLNBaseModel):
         rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         subfolder: str = "",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNModel":
         """
         Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
         This method performs the actual model conversion and compilation process.
         Args:
-            model: The PyTorch model to be compiled. The object must be an instance of the HuggingFace transformers PreTrainedModel class.
-            rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
+            model (PreTrainedModel): The PyTorch model to be compiled.
+                The object must be an instance of the HuggingFace transformers PreTrainedModel class.
+            config (Optional[PretrainedConfig]): The configuration object associated with the model.
+            rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
+                This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
                 For detailed configuration options, see the specific model's configuration class documentation.
-            kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
+            kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
         The method performs the following steps:
@@ -101,8 +120,10 @@ class RBLNModel(RBLNBaseModel):
         4. Saves the compiled model and configurations
         Returns:
-            A RBLN model instance ready for inference on RBLN NPU devices.
+            (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
         """
+        model = cls._reconstruct_model_if_needed(model)
         preprocessors = kwargs.pop("preprocessors", [])
         rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
@@ -121,9 +142,6 @@ class RBLNModel(RBLNBaseModel):
         # Save configs
         if config is None:
             config = model.config
-            # remote_config
-            if hasattr(config, "auto_map") and "AutoConfig" in config.auto_map:
-                config = AutoConfig.from_pretrained(config._name_or_path, **kwargs)
         if hasattr(model, "can_generate") and model.can_generate():
             import json
@@ -149,6 +167,7 @@ class RBLNModel(RBLNBaseModel):
                 model=model,
                 model_save_dir=save_dir,
                 rbln_config=rbln_config,
+                preprocessors=preprocessors,
                 **kwargs,
             )
         else:
@@ -211,6 +230,7 @@ class RBLNModel(RBLNBaseModel):
         **kwargs,
     ) -> "PreTrainedModel":
         kwargs = cls.update_kwargs(kwargs)
         return cls.get_hf_class().from_pretrained(
             model_id,
             subfolder=subfolder,
@@ -229,6 +249,9 @@ class RBLNModel(RBLNBaseModel):
         compiled_models: List[rebel.RBLNCompiledModel],
         rbln_config: RBLNModelConfig,
     ) -> List[rebel.Runtime]:
+        if len(rbln_config.compile_cfgs) == 0:
+            return []
         if DEFAULT_COMPILED_MODEL_NAME not in rbln_config.device_map:
             cls._raise_missing_compiled_file_error([DEFAULT_COMPILED_MODEL_NAME])
@@ -238,35 +261,38 @@ class RBLNModel(RBLNBaseModel):
                 tensor_type="pt",
                 device=rbln_config.device_map[DEFAULT_COMPILED_MODEL_NAME],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             )
             for compiled_model in compiled_models
         ]
-    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
+    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
         """
-        Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
+        Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
+        replacement in many cases.
-        This method executes the compiled RBLN model on RBLN NPU devices while maintaining full compatibility
-        with HuggingFace transformers and diffusers APIs. The RBLNModel can be used as a direct substitute
-        for any HuggingFace nn.Module/PreTrainedModel, enabling seamless integration into existing workflows.
+        This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
+        Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
+        `transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` — enabling
+        seamless integration into existing workflows.
         Args:
-            *args: Variable length argument list containing model inputs. The format matches the original
+            args: Variable length argument list containing model inputs. The format matches the original
                 HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
                 transformers models, or sample, timestep for diffusers models).
             return_dict:
                 Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
                 - For transformers models: Uses `self.config.use_return_dict` (typically `True`)
                 - For diffusers models: Defaults to `True`
-            **kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
+            kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
                 matching the original HuggingFace model's interface.
         Returns:
             Model outputs in the same format as the original HuggingFace model.
-            - If `return_dict=True`: Returns a dictionary-like object (e.g., BaseModelOutput,
+            If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
                 CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
-            - If `return_dict=False`: Returns a tuple containing the raw model outputs.
+            If `return_dict=False`, Returns a tuple containing the raw model outputs.
         Note:
             - This method maintains the exact same interface as the original HuggingFace model's forward method
@@ -288,7 +314,7 @@ class RBLNModel(RBLNBaseModel):
     @classmethod
     def get_hf_output_class(cls):
         # Dynamically gets the output class from the corresponding HuggingFace model class.
-        if cls._output_class:
+        if "_output_class" in cls.__dict__ and cls._output_class is not None:
             return cls._output_class
         hf_class = cls.get_hf_class()

optimum-rbln 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl