PyPI - optimum-rbln - Versions diffs - 0.9.3rc0__py3-none-any.whl → 0.9.5a4__py3-none-any.whl - Mend

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.5a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

optimum/rbln/transformers/models/bert/modeling_bert.py CHANGED Viewed

@@ -12,7 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional, Tuple, Union
 import torch
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPoolingAndCrossAttentions,
+    MaskedLMOutput,
+    QuestionAnsweringModelOutput,
+)
 from ...modeling_generic import (
     RBLNModelForMaskedLM,
@@ -35,9 +42,45 @@ class RBLNBertModel(RBLNTransformerEncoderForFeatureExtraction):
     rbln_model_input_names = ["input_ids", "attention_mask"]
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
         return BertModelWrapper(model, rbln_config)
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[BaseModelOutputWithPoolingAndCrossAttentions, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for feature extraction tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+            position_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of positions of each input sequence tokens in the position embeddings.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPoolingAndCrossAttentions object.
+        """
+        input_map = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+            "position_ids": position_ids,
+        }
+        model_input_names = getattr(self.rbln_config, "model_input_names", None)
+        if model_input_names is None:
+            model_input_names = self.rbln_model_input_names
+        ordered_inputs = [input_map[name] for name in model_input_names if name in input_map]
+        return super().forward(*ordered_inputs, **kwargs)
 class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
     """
@@ -50,6 +93,27 @@ class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
     rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[MaskedLMOutput, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for masked language modeling tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a MaskedLMOutput object.
+        """
+        return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)
 class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
     """
@@ -61,3 +125,24 @@ class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
     """
     rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[QuestionAnsweringModelOutput, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for question answering tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a QuestionAnsweringModelOutput object.
+        """
+        return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)

optimum/rbln/transformers/models/blip_2/modeling_blip_2.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import inspect
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
 import torch
 from transformers import (
@@ -71,7 +71,7 @@ class RBLNBlip2VisionModel(RBLNModel):
         return self.embeddings
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2VisionModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2VisionModel") -> None:
                 super().__init__()
@@ -111,11 +111,20 @@ class RBLNBlip2VisionModel(RBLNModel):
     def forward(
         self,
         pixel_values: torch.FloatTensor,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
         interpolate_pos_encoding: bool = False,
+        return_dict: Optional[bool] = None,
     ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        """
+        Forward pass for the RBLN-optimized Blip2VisionModel model.
+        Args:
+            pixel_values (torch.FloatTensor of shape (batch_size, num_channels, height, width)): The tensors corresponding to the input images.
+            interpolate_pos_encoding (bool, optional): Whether to interpolate the positional encoding of the image embeddings. Defaults to False.
+            return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
+        Returns:
+            BaseModelOutputWithPooling or tuple(torch.FloatTensor): The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
+        """
         batch_size = pixel_values.shape[0]
         outputs = []
         for i in range(batch_size):
@@ -151,7 +160,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
         return self.embeddings.word_embeddings
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2QFormerModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2QFormerModel"):
                 super().__init__()
@@ -231,17 +240,22 @@ class RBLNBlip2QFormerModel(RBLNModel):
     def forward(
         self,
         query_embeds: torch.FloatTensor,
-        query_length: Optional[int] = None,
-        attention_mask: Optional[torch.FloatTensor] = None,
-        head_mask: Optional[torch.FloatTensor] = None,
         encoder_hidden_states: Optional[torch.FloatTensor] = None,
         encoder_attention_mask: Optional[torch.FloatTensor] = None,
-        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
+        """
+        The forward pass for the RBLN-optimized Blip2QFormerModel model.
+        Args:
+            query_embeds (torch.FloatTensor): Hidden states to be used in the attention computation.
+            encoder_hidden_states (torch.FloatTensor, optional): Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if the model is configured as a decoder.
+            encoder_attention_mask (torch.FloatTensor, optional): Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in the cross-attention if the model is configured as a decoder.
+            return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
+        Returns:
+            BaseModelOutputWithPoolingAndCrossAttentions or tuple(torch.FloatTensor): The model outputs. If `return_dict=False` is passed, returns a tuple of tensors. Otherwise, returns a `BaseModelOutputWithPoolingAndCrossAttentions` object.
+        """
         batch_size = query_embeds.shape[0]
         outputs = []
         for i in range(batch_size):
@@ -349,7 +363,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
         return self.language_model.get_input_embeddings()
     @classmethod
-    def wrap_model_if_needed(cls, model, rbln_config):
+    def _wrap_model_if_needed(cls, model, rbln_config):
         return model.language_projection
     @classmethod
@@ -444,7 +458,20 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
         inputs_embeds: Optional[torch.FloatTensor] = None,
         interpolate_pos_encoding: bool = False,
         **generate_kwargs,
-    ) -> torch.LongTensor:
+    ) -> List[torch.LongTensor]:
+        """
+        The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
+        Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.generate) for more details.
+        Args:
+            pixel_values (torch.FloatTensor): Input images to be processed.
+            input_ids (torch.LongTensor, optional): The sequence used as a prompt for the generation.
+            attention_mask (torch.LongTensor, optional): Mask to avoid performing attention on padding token indices
+            inputs_embeds (torch.FloatTensor, optional): Embedded representation of the inputs. Should be float, not int tokens.
+            interpolate_pos_encoding (bool, optional, defaults to False) — Whether to interpolate the positional encoding of the image embeddings.
+        Returns:
+            A list of strings of length batch_size * num_captions.
+        """
         batch_size = pixel_values.shape[0]
         image_embeds = self.vision_model(
             pixel_values,

optimum/rbln/transformers/models/clip/modeling_clip.py CHANGED Viewed

@@ -54,7 +54,7 @@ class RBLNCLIPTextModel(RBLNModel):
     _tp_support = False
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
         return _TextEncoder(model).eval()
     @classmethod
@@ -92,6 +92,9 @@ class RBLNCLIPTextModel(RBLNModel):
         Args:
             input_ids (torch.LongTensor): The input ids to the model.
             return_dict (Optional[bool]): Whether to return a dictionary of outputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPTextModelOutput object.
         """
         # To ignore using attention_mask, we override forward method.
@@ -157,7 +160,7 @@ class RBLNCLIPVisionModel(RBLNModel):
     _tp_support = False
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,
@@ -230,6 +233,9 @@ class RBLNCLIPVisionModel(RBLNModel):
             output_attentions (Optional[bool]): Whether to return attentions.
             output_hidden_states (Optional[bool]): Whether to return hidden states.
             interpolate_pos_encoding (bool): Whether to interpolate position encoding.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
         """
         if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
@@ -307,6 +313,38 @@ class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
     multimodal embedding alignment tasks.
     """
+    def forward(
+        self,
+        pixel_values: torch.FloatTensor,
+        return_dict: bool = True,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        interpolate_pos_encoding: bool = False,
+        **kwargs,
+    ) -> Union[Tuple, CLIPVisionModelOutput]:
+        """
+        Forward pass for the RBLN-optimized CLIP vision encoder model with projection.
+        Args:
+            pixel_values (torch.Tensor): The pixel values to the model.
+            return_dict (bool): Whether to return a dictionary of outputs.
+            output_attentions (Optional[bool]): Whether to return attentions.
+            output_hidden_states (Optional[bool]): Whether to return hidden states.
+            interpolate_pos_encoding (bool): Whether to interpolate position encoding.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPVisionModelOutput object.
+        """
+        return super().forward(
+            pixel_values=pixel_values,
+            return_dict=return_dict,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            interpolate_pos_encoding=interpolate_pos_encoding,
+            **kwargs,
+        )
     def _prepare_output(self, output, return_dict):
         # Prepare model output based on return_dict flag.
         # This method can be overridden by subclasses to provide task-specific output handling.

optimum/rbln/transformers/models/colpali/colpali_architecture.py CHANGED Viewed

@@ -77,11 +77,11 @@ class ColPaliModel(nn.Module):
         self, model, layers: List["ColPaliLayer"], output_hidden_states: bool = False, max_seq_len: int = 2048
     ):
         super().__init__()
-        self._original_mod = model
         self.layers = nn.ModuleList(layers)
         self.output_hidden_states = output_hidden_states
-        self.norm = self._original_mod.norm
-        self.hidden_size = self._original_mod.config.hidden_size
+        self.config = model.config
+        self.norm = model.norm
+        self.hidden_size = self.config.hidden_size
         self.max_seq_len = max_seq_len
     def forward(
@@ -118,7 +118,6 @@ class ColPaliModel(nn.Module):
 class ColPaliLayer(nn.Module):
     def __init__(self, layer, self_attn: "ColPaliAttention"):
         super().__init__()
-        self._original_mod = layer
         self.self_attn = self_attn
         self.mlp = layer.mlp
         self.input_layernorm = layer.input_layernorm
@@ -155,27 +154,22 @@ class ColPaliLayer(nn.Module):
 class ColPaliAttention(nn.Module):
     def __init__(self, self_attn):
         super().__init__()
-        self._original_mod = self_attn
-        self.num_heads = getattr(self._original_mod, "num_heads", None) or getattr(
-            self._original_mod.config, "num_attention_heads"
-        )
-        self.head_dim = self._original_mod.head_dim
+        self.config = self_attn.config
+        self.num_heads = getattr(self_attn, "num_heads", None) or self_attn.config.num_attention_heads
+        self.head_dim = self_attn.head_dim
         self.scaling = self.head_dim**-0.5
-        if hasattr(self._original_mod, "num_key_value_heads"):
-            self.num_key_value_heads = self._original_mod.num_key_value_heads
-        elif hasattr(self._original_mod, "config") and hasattr(self._original_mod.config, "num_key_value_heads"):
-            self.num_key_value_heads = self._original_mod.config.num_key_value_heads
+        if hasattr(self_attn, "num_key_value_heads"):
+            self.num_key_value_heads = self_attn.num_key_value_heads
+        elif hasattr(self_attn, "config") and hasattr(self_attn.config, "num_key_value_heads"):
+            self.num_key_value_heads = self_attn.config.num_key_value_heads
         else:
             self.num_key_value_heads = self.num_heads
-        self.__post_init__()
-    def __post_init__(self):
-        self.q_proj = self._original_mod.q_proj
-        self.k_proj = self._original_mod.k_proj
-        self.v_proj = self._original_mod.v_proj
-        self.o_proj = self._original_mod.o_proj
+        self.q_proj = self_attn.q_proj
+        self.k_proj = self_attn.k_proj
+        self.v_proj = self_attn.v_proj
+        self.o_proj = self_attn.o_proj
     def projection(self, hidden_states) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         query_states = self.q_proj(hidden_states)

optimum/rbln/transformers/models/colpali/configuration_colpali.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, List, Optional, Union
+from typing import Any, Optional
 from ....configuration_utils import RBLNModelConfig
 from ....utils.logging import get_logger
@@ -33,7 +33,9 @@ class RBLNColPaliForRetrievalConfig(RBLNModelConfig):
         # Create a configuration object
         config = RBLNColPaliForRetrievalConfig(
-            max_seq_lens=1152,
+            vlm={
+                "language_model": {"prefill_chunk_size": 8192},
+            }
             output_hidden_states=False,
             tensor_parallel_size=4
         )
@@ -47,24 +49,21 @@ class RBLNColPaliForRetrievalConfig(RBLNModelConfig):
         ```
     """
-    submodules = ["vision_tower"]
+    _allow_no_compile_cfgs = True
+    submodules = ["vlm"]
     def __init__(
         self,
         batch_size: Optional[int] = None,
-        max_seq_lens: Union[int, List[int]] = None,
+        vlm: Optional[RBLNModelConfig] = None,
         output_hidden_states: Optional[bool] = None,
-        vision_tower: Optional[RBLNModelConfig] = None,
         **kwargs: Any,
     ):
         """
         Args:
             batch_size (Optional[int]): The batch size for the model.
-            vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
-            max_seq_lens (Union[int, List[int]]): The maximum sequence lengths for the language model.
-                This can be multiple values, and the model will be compiled for each max_seq_len, allowing selection of the most appropriate max_seq_len at inference time.
-            output_hidden_states (Optional[bool]): Whether to output the hidden states of the language model.
-            vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
+            vlm (Optional[RBLNModelConfig]): Configuration for the VLM component.
+            output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
             kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.
@@ -74,11 +73,7 @@ class RBLNColPaliForRetrievalConfig(RBLNModelConfig):
         if not isinstance(self.batch_size, int) or self.batch_size < 0:
             raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
-        if self.batch_size != 1:
-            logger.warning("Ignore batch_size for ColPali vision tower. It will be set to 1.")
-        self.vision_tower = self.initialize_submodule_config(
-            submodule_config=vision_tower, batch_size=1, force_kwargs=True
+        self.output_hidden_states = output_hidden_states or False
+        self.vlm = self.initialize_submodule_config(
+            submodule_config=vlm, batch_size=batch_size, output_hidden_states=output_hidden_states
         )
-        self.max_seq_lens = max_seq_lens
-        self.output_hidden_states = output_hidden_states

optimum-rbln 0.9.3rc0__py3-none-any.whl → 0.9.5a4__py3-none-any.whl

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.5a4py3-none-any.whl