PyPI - optimum-rbln - Versions diffs - 0.7.5rc1__py3-none-any.whl → 0.7.5rc2__py3-none-any.whl - Mend

optimum-rbln 0.7.5rc1py3-none-any.whl → 0.7.5rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

optimum/rbln/__version__.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.7.5rc1'
-__version_tuple__ = version_tuple = (0, 7, 5, 'rc1')
+__version__ = version = '0.7.5rc2'
+__version_tuple__ = version_tuple = (0, 7, 5, 'rc2')

optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py CHANGED Viewed

@@ -33,6 +33,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         sample_size: Optional[Tuple[int, int]] = None,
         image_size: Optional[Tuple[int, int]] = None,
         guidance_scale: Optional[float] = None,
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
             image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
                 Cannot be used together with img_height/img_width.
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
         self.unet = self.init_submodule_config(
@@ -139,6 +161,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         sample_size: Optional[Tuple[int, int]] = None,
         image_size: Optional[Tuple[int, int]] = None,
         guidance_scale: Optional[float] = None,
@@ -159,6 +183,8 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
             image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
                 Cannot be used together with img_height/img_width.
@@ -173,11 +199,29 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
         self.text_encoder_2 = self.init_submodule_config(

optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py CHANGED Viewed

@@ -35,6 +35,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
         image_size: Optional[Tuple[int, int]] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         **kwargs,
     ):
         """
@@ -50,6 +52,8 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
                 Cannot be used together with img_height/img_width.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             **kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
@@ -60,11 +64,29 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.unet = self.init_submodule_config(RBLNUNet2DConditionModelConfig, unet, sample_size=sample_size)
         self.movq = self.init_submodule_config(
@@ -186,6 +208,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         guidance_scale: Optional[float] = None,
         prior_prior: Optional[RBLNPriorTransformerConfig] = None,
         prior_image_encoder: Optional[RBLNCLIPVisionModelWithProjectionConfig] = None,
@@ -212,6 +236,8 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             guidance_scale (Optional[float]): Scale for classifier-free guidance.
             prior_prior (Optional[RBLNPriorTransformerConfig]): Direct configuration for the prior transformer.
                 Used if prior_pipe is not provided.
@@ -226,6 +252,30 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
             **kwargs: Additional arguments passed to the parent RBLNModelConfig.
         """
         super().__init__(**kwargs)
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
+            image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.prior_pipe = self.init_submodule_config(
             RBLNKandinskyV22PriorPipelineConfig,
             prior_pipe,
@@ -243,8 +293,6 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
             batch_size=batch_size,
             sample_size=sample_size,
             image_size=image_size,
-            img_height=img_height,
-            img_width=img_width,
             guidance_scale=guidance_scale,
         )

optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py CHANGED Viewed

@@ -32,6 +32,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         sample_size: Optional[Tuple[int, int]] = None,
         image_size: Optional[Tuple[int, int]] = None,
         guidance_scale: Optional[float] = None,
@@ -48,6 +50,8 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
             image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
                 Cannot be used together with img_height/img_width.
@@ -62,11 +66,29 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
         self.unet = self.init_submodule_config(

optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py CHANGED Viewed

@@ -37,6 +37,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         guidance_scale: Optional[float] = None,
         **kwargs,
     ):
@@ -59,6 +61,8 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             guidance_scale (Optional[float]): Scale for classifier-free guidance.
             **kwargs: Additional arguments passed to the parent RBLNModelConfig.
@@ -70,11 +74,29 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         max_seq_len = max_seq_len or 256

optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py CHANGED Viewed

@@ -33,6 +33,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
         batch_size: Optional[int] = None,
         img_height: Optional[int] = None,
         img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
         sample_size: Optional[Tuple[int, int]] = None,
         image_size: Optional[Tuple[int, int]] = None,
         guidance_scale: Optional[float] = None,
@@ -51,6 +53,8 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
             batch_size (Optional[int]): Batch size for inference, applied to all submodules.
             img_height (Optional[int]): Height of the generated images.
             img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
             sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
             image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
                 Cannot be used together with img_height/img_width.
@@ -65,11 +69,29 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
             accommodate classifier-free guidance.
         """
         super().__init__(**kwargs)
-        if image_size is not None and (img_height is not None or img_width is not None):
-            raise ValueError("image_size and img_height/img_width cannot both be provided")
-        if img_height is not None and img_width is not None:
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
             image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
         self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
         self.text_encoder_2 = self.init_submodule_config(

optimum/rbln/modeling.py CHANGED Viewed

@@ -282,7 +282,7 @@ class RBLNModel(RBLNBaseModel):
         Prepare model output based on return_dict flag.
         This method can be overridden by subclasses to provide task-specific output handling.
         """
-        tuple_output = (output,) if not isinstance(output, (tuple, list)) else output
+        tuple_output = (output,) if not isinstance(output, (tuple, list)) else tuple(output)
         if not return_dict:
             return tuple_output
         else:

optimum/rbln/transformers/models/blip_2/configuration_blip_2.py CHANGED Viewed

@@ -18,29 +18,12 @@ from ....configuration_utils import RBLNModelConfig
 class RBLNBlip2VisionModelConfig(RBLNModelConfig):
-    def __init__(
-        self,
-        batch_size: Optional[int] = None,
-        **kwargs,
-    ):
-        """
-        Args:
-            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
-        Raises:
-            ValueError: If batch_size is not a positive integer.
-        """
-        super().__init__(**kwargs)
-        self.batch_size = batch_size or 1
-        if not isinstance(self.batch_size, int) or self.batch_size < 0:
-            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+    pass
 class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
     def __init__(
         self,
-        batch_size: Optional[int] = None,
         num_query_tokens: Optional[int] = None,
         image_text_hidden_size: Optional[int] = None,
         **kwargs,
@@ -54,10 +37,6 @@ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
             ValueError: If batch_size is not a positive integer.
         """
         super().__init__(**kwargs)
-        self.batch_size = batch_size or 1
-        if not isinstance(self.batch_size, int) or self.batch_size < 0:
-            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
         self.num_query_tokens = num_query_tokens
         self.image_text_hidden_size = image_text_hidden_size
@@ -88,6 +67,6 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
         if not isinstance(self.batch_size, int) or self.batch_size < 0:
             raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
-        self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model, batch_size=batch_size)
+        self.vision_model = self.init_submodule_config(RBLNBlip2VisionModelConfig, vision_model)
         self.language_model = language_model
-        self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer, batch_size=batch_size)
+        self.qformer = self.init_submodule_config(RBLNBlip2QFormerModelConfig, qformer)

optimum/rbln/transformers/models/blip_2/modeling_blip_2.py CHANGED Viewed

@@ -42,6 +42,28 @@ if TYPE_CHECKING:
     )
+class LoopProjector:
+    def __init__(self, language_projection) -> None:
+        self.language_projection = language_projection
+    def forward(self, *args, **kwargs):
+        query_output = args[0]
+        batch_size = query_output.shape[0]
+        outputs = []
+        for i in range(batch_size):
+            outputs.append(self.language_projection(query_output[i : i + 1]))
+        outputs = torch.cat(outputs, dim=0)
+        return outputs
+    def __call__(self, *args: Any, **kwds: Any) -> Any:
+        return self.forward(*args, **kwds)
+    def __repr__(self) -> str:
+        return repr(self.language_projection)
 class RBLNBlip2VisionModel(RBLNModel):
     def get_input_embeddings(self):
         return self.embeddings
@@ -71,7 +93,8 @@ class RBLNBlip2VisionModel(RBLNModel):
             (
                 "pixel_values",
                 [
-                    rbln_config.batch_size,
+                    # support for vllm CB (prefill)
+                    1,
                     model_config.num_channels,
                     model_config.image_size,
                     model_config.image_size,
@@ -86,27 +109,30 @@ class RBLNBlip2VisionModel(RBLNModel):
     def forward(
         self,
-        pixel_values: Optional[torch.FloatTensor] = None,
+        pixel_values,
         output_attentions: Optional[bool] = None,
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         interpolate_pos_encoding: bool = False,
     ) -> Union[Tuple, BaseModelOutputWithPooling]:
-        output = super().forward(pixel_values, return_dict=return_dict)
-        return output
+        batch_size = pixel_values.shape[0]
+        outputs = []
+        for i in range(batch_size):
+            outputs.append(self.model[0](pixel_values[i : i + 1]))
+        last_hidden_state = [output[0] for output in outputs]
+        pooler_output = [output[1] for output in outputs]
+        last_hidden_state = torch.cat(last_hidden_state, dim=0)
+        pooler_output = torch.cat(pooler_output, dim=0)
-    def _prepare_output(self, output, return_dict):
-        """
-        Prepare model output based on return_dict flag.
-        This method can be overridden by subclasses to provide task-specific output handling.
-        """
         if not return_dict:
-            return (output,) if not isinstance(output, (tuple, list)) else output
-        else:
-            return BaseModelOutputWithPooling(
-                last_hidden_state=output[0],
-                pooler_output=output[1],
-            )
+            return (last_hidden_state, pooler_output)
+        return BaseModelOutputWithPooling(
+            last_hidden_state=last_hidden_state,
+            pooler_output=pooler_output,
+        )
 class RBLNBlip2QFormerModel(RBLNModel):
@@ -158,7 +184,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
             (
                 "query_embeds",
                 [
-                    rbln_config.batch_size,
+                    1,
                     rbln_config.num_query_tokens,
                     model_config.hidden_size,
                 ],
@@ -167,7 +193,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
             (
                 "encoder_hidden_states",
                 [
-                    rbln_config.batch_size,
+                    1,
                     # image_text_hidden_size + cls token
                     rbln_config.image_text_hidden_size + 1,
                     model_config.encoder_hidden_size,
@@ -177,7 +203,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
             (
                 "encoder_attention_mask",
                 # image_text_hidden_size + cls token
-                [rbln_config.batch_size, rbln_config.image_text_hidden_size + 1],
+                [1, rbln_config.image_text_hidden_size + 1],
                 "int64",
             ),
         ]
@@ -200,21 +226,28 @@ class RBLNBlip2QFormerModel(RBLNModel):
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
-        output = super().forward(query_embeds, encoder_hidden_states, encoder_attention_mask, return_dict=return_dict)
-        return output
+        batch_size = query_embeds.shape[0]
+        outputs = []
+        for i in range(batch_size):
+            outputs.append(
+                self.model[0](
+                    query_embeds[i : i + 1], encoder_hidden_states[i : i + 1], encoder_attention_mask[i : i + 1]
+                )
+            )
+        sequence_output = [output[0] for output in outputs]
+        pooled_output = [output[1] for output in outputs]
+        sequence_output = torch.cat(sequence_output, dim=0)
+        pooled_output = torch.cat(pooled_output, dim=0)
-    def _prepare_output(self, output, return_dict):
-        """
-        Prepare model output based on return_dict flag.
-        This method can be overridden by subclasses to provide task-specific output handling.
-        """
         if not return_dict:
-            return (output,) if not isinstance(output, (tuple, list)) else output
-        else:
-            return BaseModelOutputWithPoolingAndCrossAttentions(
-                last_hidden_state=output[0],
-                pooler_output=output[1],
-            )
+            return (sequence_output, pooled_output)
+        return BaseModelOutputWithPoolingAndCrossAttentions(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+        )
 class RBLNBlip2ForConditionalGeneration(RBLNModel):
@@ -254,7 +287,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
         self.vision_model = self.rbln_submodules[0]
         self.language_model = self.rbln_submodules[2]
         self.qformer = self.rbln_submodules[1]
-        self.language_projection = self.model[0]
+        self.language_projection = LoopProjector(self.model[0])
         artifacts = torch.load(self.model_save_dir / self.subfolder / "query_tokens.pth", weights_only=False)
         self.query_tokens = artifacts["query_tokens"]
@@ -284,7 +317,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
             (
                 "query_output",
                 [
-                    rbln_config.batch_size,
+                    1,
                     model_config.num_query_tokens,
                     model_config.qformer_config.hidden_size,
                 ],
@@ -296,3 +329,61 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
         rbln_config.set_compile_cfgs([rbln_compile_config])
         return rbln_config
+    def _preprocess_prefill(
+        self,
+        pixel_values: torch.FloatTensor,
+        input_ids: torch.FloatTensor,
+        attention_mask: Optional[torch.LongTensor] = None,
+        return_dict: Optional[bool] = None,
+        interpolate_pos_encoding: bool = False,
+        **kwargs,
+    ):
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        vision_outputs = self.vision_model(
+            pixel_values=pixel_values,
+            return_dict=return_dict,
+            interpolate_pos_encoding=interpolate_pos_encoding,
+        )
+        image_embeds = vision_outputs[0]
+        image_attention_mask = torch.ones(image_embeds.size()[:-1], dtype=torch.long, device=image_embeds.device)
+        query_tokens = self.query_tokens.expand(image_embeds.shape[0], -1, -1)
+        query_outputs = self.qformer(
+            query_embeds=query_tokens,
+            encoder_hidden_states=image_embeds,
+            encoder_attention_mask=image_attention_mask,
+            return_dict=return_dict,
+        )
+        query_output = query_outputs[0]
+        if query_output.dtype != image_embeds.dtype:
+            query_output = query_output.to(image_embeds.dtype)
+        language_model_inputs = self.language_projection(query_output)
+        language_model_attention_mask = torch.ones(
+            language_model_inputs.size()[:-1], dtype=torch.long, device=language_model_inputs.device
+        )
+        inputs_embeds = self.language_model.get_input_embeddings()(input_ids)
+        if attention_mask is None:
+            attention_mask = torch.ones_like(input_ids)
+        if getattr(self.config, "image_token_index", None) is not None:
+            special_image_mask = (input_ids == self.config.image_token_index).unsqueeze(-1).expand_as(inputs_embeds)
+            language_model_inputs = language_model_inputs.to(inputs_embeds.device, inputs_embeds.dtype)
+            inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, language_model_inputs)
+        else:
+            logger.warning_once(
+                "Expanding inputs for image tokens in BLIP-2 should be done in processing. "
+                "Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. "
+                "Using processors without these attributes in the config is deprecated and will throw an error in v4.50."
+            )
+            inputs_embeds = torch.cat([language_model_inputs, inputs_embeds.to(language_model_inputs.device)], dim=1)
+            attention_mask = torch.cat(
+                [language_model_attention_mask, attention_mask.to(language_model_attention_mask.device)], dim=1
+            )
+        return inputs_embeds

optimum/rbln/transformers/models/gemma3/modeling_gemma3.py CHANGED Viewed

@@ -858,6 +858,11 @@ class RBLNGemma3ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
             max_seq_len=rbln_config.max_seq_len,
         )
+        if rbln_config.attn_impl == "eager":
+            raise ValueError(
+                "Eager attention is not supported for Gemma3. Please use flash attention by setting `rbln_attn_impl='flash_attn'`. Stay tuned for future updates!"
+            )
         validate_attention_method(
             attn_impl=rbln_config.attn_impl,
             kvcache_partition_len=rbln_config.kvcache_partition_len,

{optimum_rbln-0.7.5rc1.dist-info → optimum_rbln-0.7.5rc2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: optimum-rbln
-Version: 0.7.5rc1
+Version: 0.7.5rc2
 Summary: Optimum RBLN is the interface between the HuggingFace Transformers and Diffusers libraries and RBLN accelerators. It provides a set of tools enabling easy model loading and inference on single and multiple rbln device settings for different downstream tasks.
 Project-URL: Homepage, https://rebellions.ai
 Project-URL: Documentation, https://docs.rbln.ai

{optimum_rbln-0.7.5rc1.dist-info → optimum_rbln-0.7.5rc2.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 optimum/rbln/__init__.py,sha256=oAnsJSMrPYwBGEttUt3CMXTIESVNe15ftTWRTShwhZI,14386
-optimum/rbln/__version__.py,sha256=6UGbTME6xZZ1ojJoRpul_clf4TsgGIZHt3214_8maxM,521
+optimum/rbln/__version__.py,sha256=fpFaQLT4vGQYujVJTSb1WZo3X-GKEGeYInrc_bpJrpQ,521
 optimum/rbln/configuration_utils.py,sha256=gvAjRFEGw5rnSoH0IoyuLrE4fkxtk3DN1pikqrN_Rpk,31277
-optimum/rbln/modeling.py,sha256=CWYpOLQOu1RUQrHvoX3FoidiP2XltDzC9gWIzznUTFo,11455
+optimum/rbln/modeling.py,sha256=BpydF-bLBF60NnRMbtZwn5odOUjU4Awu9azqGeSufTI,11462
 optimum/rbln/modeling_base.py,sha256=HQgscr5jpUEtuXU1ACJHSLIntX-kq6Ef0SQ_W2-rp5A,25341
 optimum/rbln/diffusers/__init__.py,sha256=XL6oKPHbPCV6IVCw3fu0-M9mD2KO_x6unx5kJdAtpVY,6180
 optimum/rbln/diffusers/modeling_diffusers.py,sha256=bPyP5RMbOFLb2DfEAuLVp7hTuQWJvWid7El72wGmFrY,19535
@@ -14,11 +14,11 @@ optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py,sh
 optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py,sha256=c-1xAFgA1st8djLXkLeXtctcFp1MqZZYZp3Phqn1Wxo,3366
 optimum/rbln/diffusers/configurations/models/configuration_vq_model.py,sha256=su4Ceok4Wx4m1hKp5YEM3zigrlTmj3NIs9X7aAKOeWg,2980
 optimum/rbln/diffusers/configurations/pipelines/__init__.py,sha256=HJlu5lRZJWy4nYjBw3-ed93Pfb5QQmUbCJZKDW1bGH4,1160
-optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=G9e2fxVmQ2JD-0iWrRe4jjCTtj9ZysI8oM_dWohtMO8,11044
-optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=u26JzBCgGnc581up4y3XXyFX5lqAsKGF0IyDRGdYPp8,12746
-optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=cE5BHI2sy3PCz0kfhQic611feGwavb7wtpx04MPR6us,4992
-optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=54NTvVur7RADGgjGwO33s76dgKQ4zVNvmFl68rQFapw,6370
-optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=H0hqsqpAfqb9gBIK5KsfUf9gX9cTnggK9Nt2aqfzeIM,5528
+optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py,sha256=L5WeSCpYUdcTG4wBrMZIQIEUZV2Jxegdr53n8oSf6II,13748
+optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py,sha256=JkdeFVU4RNiCJaSQUOJ3gWfKAyRhDRxMqEd68NJIij0,15675
+optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py,sha256=C4PKcN6SZYmzTwyaIxXKXtltzUOWs7p1cwzudUwodY0,6344
+optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py,sha256=lfyjjHKYHZ470tDAeNKRL2tJf_TpECzSGEGm5iqoZBo,7722
+optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py,sha256=SHA1IV-oqdRpFM_ZLJ8GTmrrSdPXAUvLVtx5I3VQN7U,6880
 optimum/rbln/diffusers/models/__init__.py,sha256=mkCvJyH1KcwrsUvYSq_bVC79oOfyqtBSFDyPS1_48wA,1478
 optimum/rbln/diffusers/models/controlnet.py,sha256=kzDbUckGlioor8t0kKBvwi-hzNaG15XluEzTa7xZs1Q,10292
 optimum/rbln/diffusers/models/autoencoders/__init__.py,sha256=dg17ZTUsiqTcbIaEE4fqew9uRbao0diQ21PXvRKIqKg,679
@@ -79,8 +79,8 @@ optimum/rbln/transformers/models/bert/__init__.py,sha256=86FuGRBLw315_Roa9D5OUx6
 optimum/rbln/transformers/models/bert/configuration_bert.py,sha256=NIlBRn-zrnNirkEfJ4Uv2TZRIBL761PLJ9-cZaPyzpg,1017
 optimum/rbln/transformers/models/bert/modeling_bert.py,sha256=XxsRhBhexZ2w3mRCvKl73pIyGdqcFR1RrOKG7h4EAyk,1223
 optimum/rbln/transformers/models/blip_2/__init__.py,sha256=L01gPXcUCa8Vg-bcng20vZvBIN_jlqCzwUSFuq0QOag,855
-optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=Dh_gbeF46Tg3DKK4lq9DRblweI3B7XZHE2PlxO8qStU,3662
-optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=4-EWHRau363-YoZQcTfLXqm97IsAs3-Uya2L1IVGfxE,10830
+optimum/rbln/transformers/models/blip_2/configuration_blip_2.py,sha256=bAr3tlW2upxdBiihR7wUJGRxpdtelxt9BAkL9UXLJGE,2746
+optimum/rbln/transformers/models/blip_2/modeling_blip_2.py,sha256=8pjFNXYM2phJQBoTWw08hK_wi7APjjhFTJfBZ3cx_Xo,14301
 optimum/rbln/transformers/models/clip/__init__.py,sha256=TLeXDqcFK6M6v9x7Xr64kBbqGu3hFHM7p754dQ8UVQc,938
 optimum/rbln/transformers/models/clip/configuration_clip.py,sha256=wgfZeVvcVdSzrN9tcnt7DKJQ0NLR0REvW7MyUXyv2Bg,2976
 optimum/rbln/transformers/models/clip/modeling_clip.py,sha256=UslcDN6otyQ_psou7F_YcdK5vCImEtgIdcbwmexSfOM,7256
@@ -102,7 +102,7 @@ optimum/rbln/transformers/models/gemma/modeling_gemma.py,sha256=-U3w3cEOv3ps1S8a
 optimum/rbln/transformers/models/gemma3/__init__.py,sha256=6rugk3615SEt4lh7gduo_J9VyGiSReuEIvL0Uno0eaI,790
 optimum/rbln/transformers/models/gemma3/configuration_gemma3.py,sha256=nndcYVwDYkOige_qO4td-YwLNtUz6aLiSQDIfPdGG9A,2840
 optimum/rbln/transformers/models/gemma3/gemma3_architecture.py,sha256=Uer27wG06hgV1WNf92x1ZeUpl4Q0zskfCqzlLhtgtNU,17348
-optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=OUGsML-H6FOKldld7KRW9l0nRoT4DojWBDl8ZHpfXVA,44982
+optimum/rbln/transformers/models/gemma3/modeling_gemma3.py,sha256=mx3upghkboeyRGYxwPfA1fzRNPWj5MzX8bIy0kszHWY,45235
 optimum/rbln/transformers/models/gpt2/__init__.py,sha256=socBMIBZSiLbrVN12rQ4nL9gFeT0axMgz6SWaCaD4Ac,704
 optimum/rbln/transformers/models/gpt2/configuration_gpt2.py,sha256=vKvJD8P9Li4W9wdVoQcqMEr1MwEXojPBnF2NE85VXAo,772
 optimum/rbln/transformers/models/gpt2/gpt2_architecture.py,sha256=kf5jk7Djv9XXX3Q83oTosiMpt9g44TF_gCT-vMiWDJk,3097
@@ -178,7 +178,7 @@ optimum/rbln/utils/model_utils.py,sha256=V2kFpUe2aqVzLwbpztD8JOVFQqRHncvIWwJbgnU
 optimum/rbln/utils/runtime_utils.py,sha256=LoKNK3AQNV_BSScstIZWjICkJf265MnUgy360BOocVI,5454
 optimum/rbln/utils/save_utils.py,sha256=hG5uOtYmecSXZuGTvCXsTM-SiyZpr5q3InUGCCq_jzQ,3619
 optimum/rbln/utils/submodule.py,sha256=ZfI7e3YzbjbbBW4Yjfucj8NygEsukfIkaJi3PtwHrhc,5105
-optimum_rbln-0.7.5rc1.dist-info/METADATA,sha256=RUPCGW8cEzu6extEsTB9xYDgOb8hAqgEKG0tG3K5feA,5300
-optimum_rbln-0.7.5rc1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-optimum_rbln-0.7.5rc1.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
-optimum_rbln-0.7.5rc1.dist-info/RECORD,,
+optimum_rbln-0.7.5rc2.dist-info/METADATA,sha256=WltiXJxC_INRxeoxhT4xyvLOO8FM6cqJOqbpnTQ6xhg,5300
+optimum_rbln-0.7.5rc2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+optimum_rbln-0.7.5rc2.dist-info/licenses/LICENSE,sha256=QwcOLU5TJoTeUhuIXzhdCEEDDvorGiC6-3YTOl4TecE,11356
+optimum_rbln-0.7.5rc2.dist-info/RECORD,,

{optimum_rbln-0.7.5rc1.dist-info → optimum_rbln-0.7.5rc2.dist-info}/WHEEL RENAMED Viewed

File without changes

{optimum_rbln-0.7.5rc1.dist-info → optimum_rbln-0.7.5rc2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

optimum-rbln 0.7.5rc1__py3-none-any.whl → 0.7.5rc2__py3-none-any.whl

optimum-rbln 0.7.5rc1py3-none-any.whl → 0.7.5rc2py3-none-any.whl