PyPI - optimum-rbln - Versions diffs - 0.8.2rc0__py3-none-any.whl → 0.8.3__py3-none-any.whl - Mend

optimum-rbln 0.8.2rc0py3-none-any.whl → 0.8.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (105) hide show

optimum/rbln/diffusers/pipelines/auto_pipeline.py ADDED Viewed

@@ -0,0 +1,237 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+from typing import Type
+from diffusers.models.controlnets import ControlNetUnionModel
+from diffusers.pipelines.auto_pipeline import (
+    AUTO_IMAGE2IMAGE_PIPELINES_MAPPING,
+    AUTO_INPAINT_PIPELINES_MAPPING,
+    AUTO_TEXT2IMAGE_PIPELINES_MAPPING,
+    AutoPipelineForImage2Image,
+    AutoPipelineForInpainting,
+    AutoPipelineForText2Image,
+    _get_task_class,
+)
+from huggingface_hub.utils import validate_hf_hub_args
+from optimum.rbln.modeling_base import RBLNBaseModel
+from optimum.rbln.utils.model_utils import (
+    MODEL_MAPPING,
+    convert_hf_to_rbln_model_name,
+    convert_rbln_to_hf_model_name,
+    get_rbln_model_cls,
+)
+class RBLNAutoPipelineBase:
+    _model_mapping = None
+    _model_mapping_names = None
+    @classmethod
+    def get_rbln_cls(cls, pretrained_model_name_or_path, export=True, **kwargs):
+        if export:
+            hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
+            rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
+        else:
+            rbln_class_name = cls.get_rbln_model_cls_name(pretrained_model_name_or_path, **kwargs)
+            if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names.values():
+                raise ValueError(
+                    f"The architecture '{rbln_class_name}' is not supported by the `{cls.__name__}.from_pretrained()` method. "
+                    "Please use the `from_pretrained()` method of the appropriate class to load this model, "
+                    f"or directly use '{rbln_class_name}.from_pretrained()`."
+                )
+        try:
+            rbln_cls = get_rbln_model_cls(rbln_class_name)
+        except AttributeError as e:
+            raise AttributeError(
+                f"Class '{rbln_class_name}' not found in 'optimum.rbln' module for model ID '{pretrained_model_name_or_path}'. "
+                "Ensure that the class name is correctly mapped and available in the 'optimum.rbln' module."
+            ) from e
+        return rbln_cls
+    @classmethod
+    def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
+        """
+        Retrieve the path to the compiled model directory for a given RBLN model.
+        Args:
+            pretrained_model_name_or_path (str): Identifier of the model.
+        Returns:
+            str: Path to the compiled model directory.
+        """
+        model_index_config = cls.load_config(pretrained_model_name_or_path)
+        if "_class_name" not in model_index_config:
+            raise ValueError(
+                "The `_class_name` field is missing from model_index_config. This is unexpected and should be reported as an issue. "
+                "Please use the `from_pretrained()` method of the appropriate class to load this model."
+            )
+        return model_index_config["_class_name"]
+    @classmethod
+    def infer_hf_model_class(
+        cls,
+        pretrained_model_or_path,
+        cache_dir=None,
+        force_download=False,
+        proxies=None,
+        token=None,
+        local_files_only=False,
+        revision=None,
+        **kwargs,
+    ):
+        config = cls.load_config(
+            pretrained_model_or_path,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            proxies=proxies,
+            token=token,
+            local_files_only=local_files_only,
+            revision=revision,
+        )
+        pipeline_key_name = cls.get_pipeline_key_name(config, **kwargs)
+        pipeline_cls = _get_task_class(cls._model_mapping, pipeline_key_name)
+        return pipeline_cls
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        if "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = config["_class_name"].replace(to_replace, "ControlNetUnionPipeline")
+            else:
+                orig_class_name = config["_class_name"].replace(to_replace, "ControlNetPipeline")
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAGPipeline")
+        return orig_class_name
+    @classmethod
+    @validate_hf_hub_args
+    def from_pretrained(cls, model_id, **kwargs):
+        rbln_cls = cls.get_rbln_cls(model_id, **kwargs)
+        return rbln_cls.from_pretrained(model_id, **kwargs)
+    @classmethod
+    def from_model(cls, model, **kwargs):
+        rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
+        return rbln_cls.from_model(model, **kwargs)
+    @staticmethod
+    def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
+        """
+        Register a new RBLN model class.
+        Args:
+            rbln_cls (Type[RBLNBaseModel]): The RBLN model class to register.
+            exist_ok (bool): Whether to allow registering an already registered model.
+        """
+        if not issubclass(rbln_cls, RBLNBaseModel):
+            raise ValueError("`rbln_cls` must be a subclass of RBLNBaseModel.")
+        native_cls = getattr(importlib.import_module("optimum.rbln"), rbln_cls.__name__, None)
+        if rbln_cls.__name__ in MODEL_MAPPING or native_cls is not None:
+            if not exist_ok:
+                raise ValueError(f"Model for {rbln_cls.__name__} already registered.")
+        MODEL_MAPPING[rbln_cls.__name__] = rbln_cls
+class RBLNAutoPipelineForText2Image(RBLNAutoPipelineBase, AutoPipelineForText2Image):
+    _model_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_TEXT2IMAGE_PIPELINES_MAPPING.items()}
+class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2Image):
+    _model_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.items()}
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        # the `orig_class_name` can be:
+        # `- *Pipeline` (for regular text-to-image checkpoint)
+        #  - `*ControlPipeline` (for Flux tools specific checkpoint)
+        # `- *Img2ImgPipeline` (for refiner checkpoint)
+        if "Img2Img" in orig_class_name:
+            to_replace = "Img2ImgPipeline"
+        elif "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace)
+            else:
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace)
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace)
+        if to_replace == "ControlPipeline":
+            orig_class_name = orig_class_name.replace(to_replace, "ControlImg2ImgPipeline")
+        return orig_class_name
+class RBLNAutoPipelineForInpainting(RBLNAutoPipelineBase, AutoPipelineForInpainting):
+    _model_mapping = AUTO_INPAINT_PIPELINES_MAPPING
+    _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_INPAINT_PIPELINES_MAPPING.items()}
+    @classmethod
+    def get_pipeline_key_name(cls, config, **kwargs):
+        orig_class_name = config["_class_name"]
+        # The `orig_class_name`` can be:
+        # `- *InpaintPipeline` (for inpaint-specific checkpoint)
+        #  - `*ControlPipeline` (for Flux tools specific checkpoint)
+        #  - or *Pipeline (for regular text-to-image checkpoint)
+        if "Inpaint" in orig_class_name:
+            to_replace = "InpaintPipeline"
+        elif "ControlPipeline" in orig_class_name:
+            to_replace = "ControlPipeline"
+        else:
+            to_replace = "Pipeline"
+        if "controlnet" in kwargs:
+            if isinstance(kwargs["controlnet"], ControlNetUnionModel):
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNetUnion" + to_replace)
+            else:
+                orig_class_name = orig_class_name.replace(to_replace, "ControlNet" + to_replace)
+        if "enable_pag" in kwargs:
+            enable_pag = kwargs.pop("enable_pag")
+            if enable_pag:
+                orig_class_name = orig_class_name.replace(to_replace, "PAG" + to_replace)
+        if to_replace == "ControlPipeline":
+            orig_class_name = orig_class_name.replace(to_replace, "ControlInpaintPipeline")
+        return orig_class_name

optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Optional, Tuple
 from ....configuration_utils import RBLNAutoConfig, RBLNModelConfig
 from ....transformers import RBLNLlamaForCausalLMConfig, RBLNSiglipVisionModelConfig
@@ -56,11 +56,11 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
     Configuration class for RBLN Cosmos Safety Checker.
     """
-    submodules = ["aegis", "video_safety_model", "face_blur_filter", "siglip_encoder"]
+    submodules = ["llamaguard3", "video_safety_model", "face_blur_filter", "siglip_encoder"]
     def __init__(
         self,
-        aegis: Optional[RBLNModelConfig] = None,
+        llamaguard3: Optional[RBLNModelConfig] = None,
         video_safety_model: Optional[RBLNModelConfig] = None,
         face_blur_filter: Optional[RBLNModelConfig] = None,
         siglip_encoder: Optional[RBLNSiglipVisionModelConfig] = None,
@@ -69,19 +69,24 @@ class RBLNCosmosSafetyCheckerConfig(RBLNModelConfig):
         image_size: Optional[Tuple[int, int]] = None,
         height: Optional[int] = None,
         width: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        max_seq_len: Optional[int] = None,
+        **kwargs: Any,
     ):
         super().__init__(**kwargs)
         if height is not None and width is not None:
             image_size = (height, width)
+        if max_seq_len is None:
+            max_seq_len = 512
         tensor_parallel_size = kwargs.get("tensor_parallel_size")
-        self.aegis = self.init_submodule_config(
+        self.llamaguard3 = self.init_submodule_config(
             RBLNLlamaForCausalLMConfig,
-            aegis,
+            llamaguard3,
             batch_size=batch_size,
             tensor_parallel_size=tensor_parallel_size,
+            max_seq_len=max_seq_len,
         )
         self.siglip_encoder = self.init_submodule_config(

optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py CHANGED Viewed

@@ -33,9 +33,9 @@ if is_cosmos_guardrail_available():
     from cosmos_guardrail import CosmosSafetyChecker
     from cosmos_guardrail.cosmos_guardrail import (
         COSMOS_GUARDRAIL_CHECKPOINT,
-        Aegis,
         Blocklist,
         GuardrailRunner,
+        LlamaGuard3,
         ModelConfig,
         RetinaFaceFilter,
         SafetyClassifier,
@@ -55,7 +55,7 @@ else:
     COSMOS_GUARDRAIL_CHECKPOINT = None
-    class Aegis(FailToImportCosmosGuardrail): ...
+    class LlamaGuard3(FailToImportCosmosGuardrail): ...
     class Blocklist(FailToImportCosmosGuardrail): ...
@@ -312,33 +312,31 @@ class RBLNVideoContentSafetyFilter(VideoContentSafetyFilter):
         self.encoder.save_pretrained(checkpoint_id)
-class RBLNAegis(Aegis):
+class RBLNLlamaGuard3(LlamaGuard3):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        base_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        base_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         if is_compiled_dir(checkpoint_id):
             torch.nn.Module.__init__(self)
-            cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+            cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
             self.tokenizer = AutoTokenizer.from_pretrained(cache_dir)
-            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.aegis)
+            self.model = RBLNAutoModelForCausalLM.from_pretrained(cache_dir, rbln_config=rbln_config.llamaguard3)
         else:
-            super().__init__(checkpoint_id, base_model_id, aegis_adapter)
-            model = self.model.merge_and_unload()  # peft merge
+            super().__init__(checkpoint_id, base_model_id)
+            model = self.model
             del self.model
-            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.aegis)
+            self.model = RBLNAutoModelForCausalLM.from_model(model, rbln_config=rbln_config.llamaguard3)
         self.rbln_config = rbln_config
         self.dtype = torch.bfloat16
         self.device = torch.device("cpu")
     def save_pretrained(self, checkpoint_id: str):
-        cache_dir = pathlib.Path(checkpoint_id) / "aegis"
+        cache_dir = pathlib.Path(checkpoint_id) / "llamaguard3"
         self.model.save_pretrained(cache_dir)
         self.tokenizer.save_pretrained(cache_dir)
@@ -351,8 +349,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def __init__(
         self,
         checkpoint_id: str = COSMOS_GUARDRAIL_CHECKPOINT,
-        aegis_model_id: str = "meta-llama/LlamaGuard-7b",
-        aegis_adapter_id: str = "nvidia/Aegis-AI-Content-Safety-LlamaGuard-Defensive-1.0",
+        llamaguard_model_id: str = "meta-llama/Llama-Guard-3-8B",
         rbln_config: Optional[RBLNCosmosSafetyCheckerConfig] = None,
     ) -> None:
         torch.nn.Module.__init__(self)
@@ -369,10 +366,9 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
         self.text_guardrail = GuardrailRunner(
             safety_models=[
                 Blocklist(COSMOS_GUARDRAIL_CHECKPOINT),  # Changed since it cannot be saved
-                RBLNAegis(
+                RBLNLlamaGuard3(
                     checkpoint_id=checkpoint_id,
-                    base_model_id=aegis_model_id,
-                    aegis_adapter=aegis_adapter_id,
+                    base_model_id=llamaguard_model_id,
                     rbln_config=rbln_config,
                 ),
             ]
@@ -387,7 +383,7 @@ class RBLNCosmosSafetyChecker(CosmosSafetyChecker):
     def save_pretrained(self, save_dir: str):
         for text_safety_models in self.text_guardrail.safety_models:
-            if isinstance(text_safety_models, RBLNAegis):
+            if isinstance(text_safety_models, RBLNLlamaGuard3):
                 text_safety_models.save_pretrained(save_dir)
         for video_safety_models in self.video_guardrail.safety_models:

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py CHANGED Viewed

@@ -87,7 +87,7 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:

optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py CHANGED Viewed

@@ -87,7 +87,7 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
         export: bool = False,
         safety_checker: Optional[RBLNCosmosSafetyChecker] = None,
         rbln_config: Dict[str, Any] = {},
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
         if safety_checker is None and export:

optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py CHANGED Viewed

@@ -22,12 +22,7 @@ from diffusers import (
     UNet2DConditionModel,
     VQModel,
 )
-from transformers import (
-    CLIPImageProcessor,
-    CLIPTextModelWithProjection,
-    CLIPTokenizer,
-    CLIPVisionModelWithProjection,
-)
+from transformers import CLIPImageProcessor, CLIPTextModelWithProjection, CLIPTokenizer, CLIPVisionModelWithProjection
 from ...configurations import RBLNKandinskyV22CombinedPipelineConfig
 from ...modeling_diffusers import RBLNDiffusionMixin

optimum/rbln/modeling.py CHANGED Viewed

@@ -78,7 +78,7 @@ class RBLNModel(RBLNBaseModel):
         rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
         subfolder: str = "",
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNModel":
         """
         Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
@@ -147,6 +147,7 @@ class RBLNModel(RBLNBaseModel):
                 model=model,
                 model_save_dir=save_dir,
                 rbln_config=rbln_config,
+                preprocessors=preprocessors,
                 **kwargs,
             )
         else:
@@ -241,7 +242,7 @@ class RBLNModel(RBLNBaseModel):
             for compiled_model in compiled_models
         ]
-    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Dict[str, Any]) -> Any:
+    def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
         """
         Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -348,7 +348,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
         model_id: Union[str, Path],
         export: bool = False,
         rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> "RBLNBaseModel":
         """
         The `from_pretrained()` function is utilized in its standard form as in the HuggingFace transformers library.
@@ -523,10 +523,35 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
             # First copy everything to a temporary directory
             shutil.copytree(real_save_dir, tmp_dir)
-            # If everything succeeded, atomically replace the target directory
+            # If everything succeeded, move files to target directory
             if os.path.exists(save_directory_path):
-                shutil.rmtree(save_directory_path)
-            os.rename(tmp_dir, save_directory_path)
+                # Merge files from tmp_dir into existing directory
+                def _merge_dir(src_root: str, dst_root: str):
+                    for name in os.listdir(src_root):
+                        src_item = os.path.join(src_root, name)
+                        dst_item = os.path.join(dst_root, name)
+                        if os.path.islink(src_item) or os.path.isfile(src_item):
+                            os.makedirs(os.path.dirname(dst_item), exist_ok=True)
+                            if os.path.isdir(dst_item) and not os.path.islink(dst_item):
+                                shutil.rmtree(dst_item)
+                            os.replace(src_item, dst_item)
+                        elif os.path.isdir(src_item):
+                            if os.path.islink(dst_item) or os.path.isfile(dst_item):
+                                os.remove(dst_item)
+                            os.makedirs(dst_item, exist_ok=True)
+                            _merge_dir(src_item, dst_item)
+                        else:
+                            # Fallback for special file types
+                            os.replace(src_item, dst_item)
+                _merge_dir(tmp_dir, str(save_directory_path))
+                # Remove the temporary directory tree after merge
+                shutil.rmtree(tmp_dir)
+            else:
+                # If target doesn't exist, just rename tmp_dir to target
+                os.rename(tmp_dir, save_directory_path)
         except Exception as e:
             # Clean up the temporary directory if anything fails

optimum/rbln/ops/attn.py CHANGED Viewed

@@ -53,6 +53,45 @@ def paged_attn_decode_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_decode_kv_fp8.register_fake
+def paged_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_attn_prefill",
     mutates_args=(["kcache", "vcache"]),
@@ -112,6 +151,45 @@ def paged_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_attn_prefill_kv_fp8.register_fake
+def paged_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    mask: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_causal_attn_decode",
     mutates_args=(["kcache", "vcache"]),
@@ -236,6 +314,86 @@ def paged_causal_attn_prefill_fake(
     return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_decode_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_decode_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_decode_kv_fp8.register_fake
+def paged_causal_attn_decode_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@torch.library.custom_op(
+    "rbln_custom_ops::paged_causal_attn_prefill_kv_fp8",
+    mutates_args=(["kcache", "vcache"]),
+)
+def paged_causal_attn_prefill_kv_fp8(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
+@paged_causal_attn_prefill_kv_fp8.register_fake
+def paged_causal_attn_prefill_kv_fp8_fake(
+    q: Tensor,
+    k: Tensor,
+    v: Tensor,
+    kcache: Tensor,
+    vcache: Tensor,
+    seq: Tensor,
+    scale: Tensor,
+    block_table: Tensor,
+    block_size: int,
+    is_bidirectional: bool,
+    k_scale: Tensor,
+    v_scale: Tensor,
+    mask: Optional[Tensor] = None,
+) -> Tensor:
+    return torch.empty_like(q)
 @torch.library.custom_op(
     "rbln_custom_ops::paged_add_softmax_attn_decode",
     mutates_args=(["kcache", "vcache"]),

optimum-rbln 0.8.2rc0__py3-none-any.whl → 0.8.3__py3-none-any.whl

Potentially problematic release.

optimum-rbln 0.8.2rc0py3-none-any.whl → 0.8.3py3-none-any.whl