PyPI - optimum-rbln - Versions diffs - 0.9.3rc0__py3-none-any.whl → 0.9.4a2__py3-none-any.whl - Mend

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.4a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py CHANGED Viewed

@@ -12,17 +12,80 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ...modeling_generic import RBLNModelForAudioClassification
+from typing import TYPE_CHECKING, Optional
+import torch
+from transformers import AutoModelForAudioClassification
+from transformers.modeling_outputs import SequenceClassifierOutput
-class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
+from ....configuration_utils import RBLNCompileConfig
+from ....modeling import RBLNModel
+from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, PretrainedConfig, PreTrainedModel
+class RBLNASTForAudioClassification(RBLNModel):
     """
     Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
-    This model inherits from [`RBLNModelForAudioClassification`]. Check the superclass documentation for the generic methods the library implements for all its models.
+    This model inherits from [RBLNModelForAudioClassification]. Check the superclass documentation for the generic methods the library implements for all its models.
-    A class to convert and run pre-trained transformer-based `ASTForAudioClassification` models on RBLN devices.
-    It implements the methods to convert a pre-trained transformers `ASTForAudioClassification` model into a RBLN transformer model by:
+    A class to convert and run pre-trained transformer-based ASTForAudioClassification models on RBLN devices.
+    It implements the methods to convert a pre-trained transformers ASTForAudioClassification model into a RBLN transformer model by:
     - transferring the checkpoint weights of the original into an optimized RBLN graph,
     - compiling the resulting graph using the RBLN Compiler.
     """
+    auto_model_class = AutoModelForAudioClassification
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: "AutoFeatureExtractor" = None,
+        model: Optional["PreTrainedModel"] = None,
+        model_config: "PretrainedConfig" = None,
+        rbln_config: Optional[RBLNASTForAudioClassificationConfig] = None,
+    ) -> RBLNASTForAudioClassificationConfig:
+        num_mel_bins = getattr(model_config, "num_mel_bins", None)
+        if rbln_config.max_length is None:
+            rbln_config.max_length = getattr(model_config, "max_length", None)
+            for feature_extractor in preprocessors:
+                if hasattr(feature_extractor, "max_length"):
+                    rbln_config.max_length = feature_extractor.max_length
+                    break
+        if rbln_config.max_length is None:
+            raise ValueError("max_length should be specified!")
+        input_info = [
+            (
+                "input_values",
+                [rbln_config.batch_size, rbln_config.max_length, num_mel_bins],
+                "float32",
+            ),
+        ]
+        rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
+        return rbln_config
+    def forward(self, input_values: torch.Tensor, **kwargs) -> SequenceClassifierOutput:
+        """
+        Forward pass for the RBLN-optimized Audio Spectrogram Transformer model for audio classification.
+        Args:
+            input_values (torch.FloatTensor of shape (batch_size, max_length, num_mel_bins)):
+                Float values mel features extracted from the raw audio waveform. Raw audio waveform can be obtained by
+                loading a .flac or .wav audio file into an array of type list[float], a numpy.ndarray or a torch.Tensor, *e.g.* via
+                the torchcodec library (pip install torchcodec) or the soundfile library (pip install soundfile).
+                To prepare the array into input_features, the [AutoFeatureExtractor] should be used for extracting the
+                mel features, padding and conversion into a tensor of type torch.FloatTensor.
+        Returns:
+            Returns a SequenceClassifierOutput object.
+        """
+        return super().forward(input_values, **kwargs)

optimum/rbln/transformers/models/auto/auto_factory.py CHANGED Viewed

@@ -150,6 +150,7 @@ class _BaseAutoModelClass:
                 f"from the checkpoint, leading to potential unintended behavior. If this is not intentional, consider calling the "
                 f"`from_pretrained()` method directly from the `RBLN{config.architectures[0]}` class instead.",
                 UserWarning,
+                stacklevel=2,
             )
         return model_class

optimum/rbln/transformers/models/bart/modeling_bart.py CHANGED Viewed

@@ -13,9 +13,11 @@
 # limitations under the License.
 import inspect
-from typing import Any, Callable
+from typing import Any, Callable, Optional, Tuple, Union
+import torch
 from transformers import BartForConditionalGeneration, PreTrainedModel
+from transformers.modeling_outputs import Seq2SeqModelOutput
 from ....utils.logging import get_logger
 from ...modeling_generic import RBLNTransformerEncoderForFeatureExtraction
@@ -35,6 +37,25 @@ class RBLNBartModel(RBLNTransformerEncoderForFeatureExtraction):
     on RBLN devices, optimized for feature extraction use cases.
     """
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[Tuple, Seq2SeqModelOutput]:
+        """
+        Forward pass for the RBLN-optimized BART model for feature extraction tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a Seq2SeqModelOutput object.
+        """
+        return super().forward(input_ids, attention_mask, **kwargs)
 class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
     """
@@ -48,7 +69,7 @@ class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
     support_causal_attn = True
     @classmethod
-    def wrap_model_if_needed(self, model: PreTrainedModel, rbln_config: RBLNBartForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: PreTrainedModel, rbln_config: RBLNBartForConditionalGenerationConfig):
         return BartWrapper(
             model, enc_max_seq_len=rbln_config.enc_max_seq_len, use_attention_mask=rbln_config.use_attention_mask
         )

optimum/rbln/transformers/models/bert/modeling_bert.py CHANGED Viewed

@@ -12,7 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional, Tuple, Union
 import torch
+from transformers.modeling_outputs import (
+    BaseModelOutputWithPoolingAndCrossAttentions,
+    MaskedLMOutput,
+    QuestionAnsweringModelOutput,
+)
 from ...modeling_generic import (
     RBLNModelForMaskedLM,
@@ -35,9 +42,45 @@ class RBLNBertModel(RBLNTransformerEncoderForFeatureExtraction):
     rbln_model_input_names = ["input_ids", "attention_mask"]
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
         return BertModelWrapper(model, rbln_config)
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[BaseModelOutputWithPoolingAndCrossAttentions, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for feature extraction tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+            position_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of positions of each input sequence tokens in the position embeddings.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPoolingAndCrossAttentions object.
+        """
+        input_map = {
+            "input_ids": input_ids,
+            "attention_mask": attention_mask,
+            "token_type_ids": token_type_ids,
+            "position_ids": position_ids,
+        }
+        model_input_names = getattr(self.rbln_config, "model_input_names", None)
+        if model_input_names is None:
+            model_input_names = self.rbln_model_input_names
+        ordered_inputs = [input_map[name] for name in model_input_names if name in input_map]
+        return super().forward(*ordered_inputs, **kwargs)
 class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
     """
@@ -50,6 +93,27 @@ class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
     rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[MaskedLMOutput, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for masked language modeling tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a MaskedLMOutput object.
+        """
+        return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)
 class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
     """
@@ -61,3 +125,24 @@ class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
     """
     rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        token_type_ids: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Union[QuestionAnsweringModelOutput, Tuple]:
+        """
+        Forward pass for the RBLN-optimized BERT model for question answering tasks.
+        Args:
+            input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
+            attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
+            token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a QuestionAnsweringModelOutput object.
+        """
+        return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)

optimum/rbln/transformers/models/blip_2/modeling_blip_2.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import inspect
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
 import torch
 from transformers import (
@@ -71,7 +71,7 @@ class RBLNBlip2VisionModel(RBLNModel):
         return self.embeddings
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2VisionModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2VisionModel") -> None:
                 super().__init__()
@@ -111,11 +111,20 @@ class RBLNBlip2VisionModel(RBLNModel):
     def forward(
         self,
         pixel_values: torch.FloatTensor,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
         interpolate_pos_encoding: bool = False,
+        return_dict: Optional[bool] = None,
     ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        """
+        Forward pass for the RBLN-optimized Blip2VisionModel model.
+        Args:
+            pixel_values (torch.FloatTensor of shape (batch_size, num_channels, height, width)): The tensors corresponding to the input images.
+            interpolate_pos_encoding (bool, optional): Whether to interpolate the positional encoding of the image embeddings. Defaults to False.
+            return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
+        Returns:
+            BaseModelOutputWithPooling or tuple(torch.FloatTensor): The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
+        """
         batch_size = pixel_values.shape[0]
         outputs = []
         for i in range(batch_size):
@@ -151,7 +160,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
         return self.embeddings.word_embeddings
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
         class Blip2QFormerModelWrapper(torch.nn.Module):
             def __init__(self, model: "Blip2QFormerModel"):
                 super().__init__()
@@ -231,17 +240,22 @@ class RBLNBlip2QFormerModel(RBLNModel):
     def forward(
         self,
         query_embeds: torch.FloatTensor,
-        query_length: Optional[int] = None,
-        attention_mask: Optional[torch.FloatTensor] = None,
-        head_mask: Optional[torch.FloatTensor] = None,
         encoder_hidden_states: Optional[torch.FloatTensor] = None,
         encoder_attention_mask: Optional[torch.FloatTensor] = None,
-        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
     ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
+        """
+        The forward pass for the RBLN-optimized Blip2QFormerModel model.
+        Args:
+            query_embeds (torch.FloatTensor): Hidden states to be used in the attention computation.
+            encoder_hidden_states (torch.FloatTensor, optional): Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if the model is configured as a decoder.
+            encoder_attention_mask (torch.FloatTensor, optional): Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in the cross-attention if the model is configured as a decoder.
+            return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
+        Returns:
+            BaseModelOutputWithPoolingAndCrossAttentions or tuple(torch.FloatTensor): The model outputs. If `return_dict=False` is passed, returns a tuple of tensors. Otherwise, returns a `BaseModelOutputWithPoolingAndCrossAttentions` object.
+        """
         batch_size = query_embeds.shape[0]
         outputs = []
         for i in range(batch_size):
@@ -349,7 +363,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
         return self.language_model.get_input_embeddings()
     @classmethod
-    def wrap_model_if_needed(cls, model, rbln_config):
+    def _wrap_model_if_needed(cls, model, rbln_config):
         return model.language_projection
     @classmethod
@@ -444,7 +458,20 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
         inputs_embeds: Optional[torch.FloatTensor] = None,
         interpolate_pos_encoding: bool = False,
         **generate_kwargs,
-    ) -> torch.LongTensor:
+    ) -> List[torch.LongTensor]:
+        """
+        The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
+        Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.generate) for more details.
+        Args:
+            pixel_values (torch.FloatTensor): Input images to be processed.
+            input_ids (torch.LongTensor, optional): The sequence used as a prompt for the generation.
+            attention_mask (torch.LongTensor, optional): Mask to avoid performing attention on padding token indices
+            inputs_embeds (torch.FloatTensor, optional): Embedded representation of the inputs. Should be float, not int tokens.
+            interpolate_pos_encoding (bool, optional, defaults to False) — Whether to interpolate the positional encoding of the image embeddings.
+        Returns:
+            A list of strings of length batch_size * num_captions.
+        """
         batch_size = pixel_values.shape[0]
         image_embeds = self.vision_model(
             pixel_values,

optimum/rbln/transformers/models/clip/modeling_clip.py CHANGED Viewed

@@ -54,7 +54,7 @@ class RBLNCLIPTextModel(RBLNModel):
     _tp_support = False
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
         return _TextEncoder(model).eval()
     @classmethod
@@ -92,6 +92,9 @@ class RBLNCLIPTextModel(RBLNModel):
         Args:
             input_ids (torch.LongTensor): The input ids to the model.
             return_dict (Optional[bool]): Whether to return a dictionary of outputs.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPTextModelOutput object.
         """
         # To ignore using attention_mask, we override forward method.
@@ -157,7 +160,7 @@ class RBLNCLIPVisionModel(RBLNModel):
     _tp_support = False
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,
@@ -230,6 +233,9 @@ class RBLNCLIPVisionModel(RBLNModel):
             output_attentions (Optional[bool]): Whether to return attentions.
             output_hidden_states (Optional[bool]): Whether to return hidden states.
             interpolate_pos_encoding (bool): Whether to interpolate position encoding.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
         """
         if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
@@ -307,6 +313,38 @@ class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
     multimodal embedding alignment tasks.
     """
+    def forward(
+        self,
+        pixel_values: torch.FloatTensor,
+        return_dict: bool = True,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        interpolate_pos_encoding: bool = False,
+        **kwargs,
+    ) -> Union[Tuple, CLIPVisionModelOutput]:
+        """
+        Forward pass for the RBLN-optimized CLIP vision encoder model with projection.
+        Args:
+            pixel_values (torch.Tensor): The pixel values to the model.
+            return_dict (bool): Whether to return a dictionary of outputs.
+            output_attentions (Optional[bool]): Whether to return attentions.
+            output_hidden_states (Optional[bool]): Whether to return hidden states.
+            interpolate_pos_encoding (bool): Whether to interpolate position encoding.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPVisionModelOutput object.
+        """
+        return super().forward(
+            pixel_values=pixel_values,
+            return_dict=return_dict,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            interpolate_pos_encoding=interpolate_pos_encoding,
+            **kwargs,
+        )
     def _prepare_output(self, output, return_dict):
         # Prepare model output based on return_dict flag.
         # This method can be overridden by subclasses to provide task-specific output handling.

optimum/rbln/transformers/models/colpali/colpali_architecture.py CHANGED Viewed

@@ -156,8 +156,8 @@ class ColPaliAttention(nn.Module):
     def __init__(self, self_attn):
         super().__init__()
         self._original_mod = self_attn
-        self.num_heads = getattr(self._original_mod, "num_heads", None) or getattr(
-            self._original_mod.config, "num_attention_heads"
+        self.num_heads = (
+            getattr(self._original_mod, "num_heads", None) or self._original_mod.config.num_attention_heads
         )
         self.head_dim = self._original_mod.head_dim
         self.scaling = self.head_dim**-0.5

optimum/rbln/transformers/models/colpali/modeling_colpali.py CHANGED Viewed

@@ -14,8 +14,7 @@
 import bisect
 from pathlib import Path
-from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Optional, Tuple, Union
 import torch
 from transformers import PretrainedConfig, PreTrainedModel
@@ -182,7 +181,7 @@ class RBLNColPaliForRetrieval(RBLNModel):
         return multi_modal_projector
     @classmethod
-    def wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
+    def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
         return RBLNColPaliForRetrievalWrapper(
             causal_lm=model.vlm,
             embedding_proj_layer=model.embedding_proj_layer,
@@ -236,49 +235,11 @@ class RBLNColPaliForRetrieval(RBLNModel):
         return rbln_config
     @classmethod
-    def from_model(
-        cls,
-        model: "PreTrainedModel",
-        config: Optional[PretrainedConfig] = None,
-        rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
-        model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
-        subfolder: str = "",
-        **kwargs: Any,
-    ) -> "RBLNModel":
-        """
-        Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
-        This method performs the actual model conversion and compilation process.
-        Args:
-            model (PreTrainedModel): The PyTorch model to be compiled.
-                The object must be an instance of the HuggingFace transformers PreTrainedModel class.
-            config (Optional[PretrainedConfig]): The configuration object associated with the model.
-            rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
-                This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
-                For detailed configuration options, see the specific model's configuration class documentation.
-            kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
-        The method performs the following steps:
-        1. Compiles the PyTorch model into an optimized RBLN graph
-        2. Configures the model for the specified NPU device
-        3. Creates the necessary runtime objects if requested
-        4. Saves the compiled model and configurations
-        Returns:
-            (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
-        """
-        if not hasattr(model, "vision_tower"):
+    def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
+        if hasattr(model, "vlm"):
             model.vision_tower = model.vlm.vision_tower
             del model.vlm.model.vision_tower
-        model = super().from_model(model, config, rbln_config, model_save_dir, subfolder, **kwargs)
-        return model
-    @classmethod
-    def get_pytorch_model(cls, *args, **kwargs):
-        model = super().get_pytorch_model(*args, **kwargs)
-        model.vision_tower = model.vlm.vision_tower
-        del model.vlm.model.vision_tower
+            return model
         return model
     def get_image_features(self, pixel_values: torch.Tensor):
@@ -371,7 +332,7 @@ class RBLNColPaliForRetrieval(RBLNModel):
         ]
         outputs.append(torch.empty(size=language_model_out_size, dtype=torch.float32, device="cpu"))
         if self.rbln_config.output_hidden_states:
-            for i in range(self.config.vlm_config.text_config.num_hidden_layers + 1):
+            for _ in range(self.config.vlm_config.text_config.num_hidden_layers + 1):
                 outputs.append(torch.empty(size=language_model_hidden_states_size, dtype=torch.float32, device="cpu"))
         # Embedding_proj_layer is fused on the bottom of the language model.

optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py CHANGED Viewed

@@ -58,7 +58,6 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
         visual: Optional[RBLNModelConfig] = None,
         batch_size: Optional[int] = None,
         use_inputs_embeds: bool = True,
-        output_hidden_states: Optional[bool] = False,
         **kwargs,
     ):
         super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
@@ -71,4 +70,3 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
             raise ValueError("batch_size is not supported for RBLNColQwen2ForRetrievalConfig")
         self.visual = visual
-        self.output_hidden_states = output_hidden_states

optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py CHANGED Viewed

@@ -58,6 +58,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
         sliding_window_layers: Optional[List[int]] = None,
         phases: Optional[List[PhaseType]] = None,
         logits_to_keep: Optional[int] = None,
+        output_hidden_states: Optional[bool] = None,
         **kwargs,
     ):
         """
@@ -112,6 +113,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
                 ["prefill", "decode"] if DecoderOnlyModelForCausalLM is used.
             logits_to_keep (Optional[int]): The number of logits to keep for the decoder.  If set to 0, the decoder will keep all logits.
                 Defaults to 0 if DecoderOnlyModel is used, 1 if DecoderOnlyModelForCausalLM is used.
+            output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
             kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
@@ -232,6 +234,8 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
         if self.logits_to_keep is not None and self.logits_to_keep > 1:
             raise NotImplementedError("`logits_to_keep` > 1 is currently not supported for RBLN models.")
+        self.output_hidden_states = output_hidden_states or False
         self.decoder_batch_sizes = None
         if "decode" in self.phases:
             self.decoder_batch_sizes = decoder_batch_sizes
@@ -274,13 +278,18 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
     @property
     def use_lora(self):
-        """Check if LoRA is enabled for this configuration."""
         return self.lora_config is not None
     @property
     def can_generate(self) -> bool:
         return "decode" in self.phases
+    @property
+    def nbits_per_param(self) -> int:
+        if self.quantization:
+            return self.quantization.nbits_per_param
+        return 16
 class RBLNDecoderOnlyModelForCausalLMConfig(RBLNDecoderOnlyModelConfig):
     """

optimum/rbln/transformers/models/decoderonly/configuration_lora.py CHANGED Viewed

@@ -183,7 +183,7 @@ class RBLNLoRAAdapterConfig(RBLNSerializableConfigProtocol):
                 f"Failed to download LoRA adapter '{path.as_posix()}' from HuggingFace Hub. "
                 f"Please check if the model ID is correct or provide a valid local path. "
                 f"Error: {e}"
-            )
+            ) from e
     def _load_adapter_config(self) -> Dict[str, Any]:
         """

optimum-rbln 0.9.3rc0__py3-none-any.whl → 0.9.4a2__py3-none-any.whl

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.4a2py3-none-any.whl