PyPI - optimum-rbln - Versions diffs - 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

optimum/rbln/transformers/models/swin/modeling_swin.py ADDED Viewed

@@ -0,0 +1,354 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import types
+from typing import TYPE_CHECKING, Optional, Tuple, Union
+import torch
+import torch.nn.functional as F
+from transformers import SwinConfig
+from transformers.models.swin.modeling_swin import BackboneOutput
+from ....configuration_utils import RBLNCompileConfig, RBLNModelConfig
+from ....modeling import RBLNModel
+from ....utils.logging import get_logger
+from .configuration_swin import RBLNSwinBackboneConfig
+logger = get_logger(__name__)
+if TYPE_CHECKING:
+    from transformers import (
+        AutoFeatureExtractor,
+        AutoProcessor,
+        AutoTokenizer,
+        PreTrainedModel,
+        SwinBackbone,
+        SwinEncoder,
+    )
+def window_partition(input_feature, window_size):
+    """
+    Partitions the given input into windows.
+    """
+    batch_size, height, width, num_channels = input_feature.shape
+    input_feature = input_feature.view(
+        batch_size, height // window_size, window_size, width // window_size, window_size, num_channels
+    )
+    windows = input_feature.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, num_channels)
+    return windows
+def get_attn_mask(self, height, width, dtype, device):
+    if self.shift_size > 0:
+        # calculate attention mask for SW-MSA
+        img_mask = torch.zeros((1, height, width, 1), dtype=dtype, device=device)
+        height_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        width_slices = (
+            slice(0, -self.window_size),
+            slice(-self.window_size, -self.shift_size),
+            slice(-self.shift_size, None),
+        )
+        count = torch.zeros(1)
+        for height_slice in height_slices:
+            for width_slice in width_slices:
+                img_mask[:, height_slice, width_slice, :] = count
+                count += 1
+        mask_windows = window_partition(img_mask, self.window_size)
+        mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
+        attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
+        attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
+    else:
+        attn_mask = None
+    return attn_mask
+class _SwinEncoder(torch.nn.Module):
+    def __init__(self, model: "SwinEncoder"):
+        super().__init__()
+        self.layers = model.layers
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        input_dimensions: Tuple[int, int],
+        head_mask: Optional[torch.FloatTensor] = None,
+        output_attentions: Optional[bool] = False,
+        output_hidden_states: Optional[bool] = False,
+        output_hidden_states_before_downsampling: Optional[bool] = False,
+        always_partition: Optional[bool] = False,
+        return_dict: Optional[bool] = True,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+        all_reshaped_hidden_states = () if output_hidden_states else None
+        all_self_attentions = () if output_attentions else None
+        if output_hidden_states:
+            batch_size, _, hidden_size = hidden_states.shape
+            # rearrange b (h w) c -> b c h w
+            reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
+            reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+            all_hidden_states += (hidden_states,)
+            all_reshaped_hidden_states += (reshaped_hidden_state,)
+        for i, layer_module in enumerate(self.layers):
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            layer_outputs = layer_module(
+                hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition
+            )
+            hidden_states = layer_outputs[0]
+            hidden_states_before_downsampling = layer_outputs[1]
+            output_dimensions = layer_outputs[2]
+            input_dimensions = (output_dimensions[-2], output_dimensions[-1])
+            if output_hidden_states and output_hidden_states_before_downsampling:
+                batch_size, _, hidden_size = hidden_states_before_downsampling.shape
+                # rearrange b (h w) c -> b c h w
+                # here we use the original (not downsampled) height and width
+                reshaped_hidden_state = hidden_states_before_downsampling.view(
+                    batch_size, *(output_dimensions[0], output_dimensions[1]), hidden_size
+                )
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states_before_downsampling,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            elif output_hidden_states and not output_hidden_states_before_downsampling:
+                batch_size, _, hidden_size = hidden_states.shape
+                # rearrange b (h w) c -> b c h w
+                reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
+                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
+                all_hidden_states += (hidden_states,)
+                all_reshaped_hidden_states += (reshaped_hidden_state,)
+            if output_attentions:
+                all_self_attentions += layer_outputs[3:]
+        return tuple(
+            v
+            for v in [hidden_states, all_hidden_states, all_self_attentions, all_reshaped_hidden_states]
+            if v is not None
+        )
+class _SwinBackbone(torch.nn.Module):
+    def __init__(self, model: "SwinBackbone", output_hidden_states: bool, output_attentions: bool):
+        super().__init__()
+        self.model = model
+        self.embeddings = model.embeddings
+        self.encoder = model.encoder
+        self.stage_names = model.stage_names
+        self.out_features = model.out_features
+        self.hidden_states_norms = model.hidden_states_norms
+        self.output_hidden_states = output_hidden_states
+        self.output_attentions = output_attentions
+    def forward(
+        self,
+        pixel_values: torch.Tensor,
+    ):
+        embedding_output, input_dimensions = self.embeddings(pixel_values)
+        outputs = _SwinEncoder(self.encoder)(
+            embedding_output,
+            input_dimensions,
+            head_mask=None,
+            output_attentions=self.output_attentions,
+            output_hidden_states=True,
+            output_hidden_states_before_downsampling=True,
+            always_partition=True,
+            return_dict=False,
+        )
+        hidden_states = outputs[-1]
+        feature_maps = ()
+        for stage, hidden_state in zip(self.stage_names, hidden_states):
+            if stage in self.out_features:
+                batch_size, num_channels, height, width = hidden_state.shape
+                hidden_state = hidden_state.permute(0, 2, 3, 1).contiguous()
+                hidden_state = hidden_state.view(batch_size, height * width, num_channels)
+                hidden_state = self.hidden_states_norms[stage](hidden_state)
+                hidden_state = hidden_state.view(batch_size, height, width, num_channels)
+                hidden_state = hidden_state.permute(0, 3, 1, 2).contiguous()
+                feature_maps += (hidden_state,)
+        output = (feature_maps,)
+        if self.output_hidden_states:
+            output += (outputs[1],)
+        if self.output_attentions:
+            output += (outputs[2],)
+        return output
+class RBLNSwinBackbone(RBLNModel):
+    @classmethod
+    def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSwinBackboneConfig) -> torch.nn.Module:
+        for layer in model.encoder.layers:
+            for block in layer.blocks:
+                block.get_attn_mask = types.MethodType(get_attn_mask, block)
+        wrapper_cfg = {
+            "output_hidden_states": rbln_config.output_hidden_states,
+            "output_attentions": rbln_config.output_attentions,
+        }
+        return _SwinBackbone(model, **wrapper_cfg).eval()
+    @classmethod
+    def _update_submodule_config(
+        cls,
+        model: "PreTrainedModel",
+        rbln_config: RBLNModelConfig,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+    ):
+        for processor in preprocessors:
+            if rbln_config.image_size is None and hasattr(processor, "image_processor"):
+                if "height" in processor.image_processor.size and "width" in processor.image_processor.size:
+                    rbln_config.image_size = (
+                        processor.image_processor.size["height"],
+                        processor.image_processor.size["width"],
+                    )
+                elif (
+                    "longest_edge" in processor.image_processor.size
+                    and "shortest_edge" in processor.image_processor.size
+                ):
+                    rbln_config.image_size = processor.image_processor.size["longest_edge"]
+                elif "shortest_edge" in processor.image_processor.size:
+                    rbln_config.image_size = processor.image_processor.size["shortest_edge"]
+                break
+        return rbln_config
+    @classmethod
+    def _update_rbln_config(
+        cls,
+        preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
+        model: Optional["PreTrainedModel"] = None,
+        model_config: "SwinConfig" = None,
+        rbln_config: Optional[RBLNSwinBackboneConfig] = None,
+    ) -> RBLNSwinBackboneConfig:
+        if rbln_config.image_size is None:
+            for processor in preprocessors:
+                if hasattr(processor, "size"):
+                    if all(required_key in processor.size.keys() for required_key in ["height", "width"]):
+                        rbln_config.image_size = (processor.size["height"], processor.size["width"])
+                    break
+        input_info = [
+            (
+                "pixel_values",
+                [
+                    rbln_config.batch_size,
+                    3,
+                    rbln_config.image_height,
+                    rbln_config.image_width,
+                ],
+                "float32",
+            ),
+        ]
+        rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
+        return rbln_config
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        return_dict: bool = True,
+        output_attentions: bool = None,
+        output_hidden_states: bool = None,
+        **kwargs,
+    ) -> Union[Tuple, BackboneOutput]:
+        """
+        Forward pass for the RBLN-optimized Swin backbone model.
+        Args:
+            pixel_values (torch.FloatTensor of shape (batch_size, num_channels, image_size, image_size), optional): The tensors corresponding to the input images. Pixel values can be obtained using ViTImageProcessor. See ViTImageProcessor.call() for details (processor_class uses ViTImageProcessor for processing images).
+            return_dict (bool, optional): Whether or not to return a ModelOutput instead of a plain tuple.
+            output_attentions (bool, optional): Whether or not to return the attentions tensors of all attention layers. See attentions under returned tensors for more detail.
+            output_hidden_states (bool, optional): Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BackboneOutput object.
+        """
+        if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
+            logger.warning(
+                f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
+            )
+        output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
+        )
+        if output_attentions != self.rbln_config.output_attentions:
+            raise ValueError(
+                f"Variable output_attentions {output_attentions} is not equal to rbln_config.output_attentions {self.rbln_config.output_attentions} "
+                f"Please compile again with the correct argument."
+            )
+        if output_hidden_states != self.rbln_config.output_hidden_states:
+            raise ValueError(
+                f"Variable output_hidden_states {output_hidden_states} is not equal to rbln_config.output_hidden_states {self.rbln_config.output_hidden_states} "
+                f"Please compile again with the correct argument."
+            )
+        _, _, original_h, original_w = pixel_values.shape
+        if original_h > self.rbln_config.image_height or original_w > self.rbln_config.image_width:
+            raise ValueError(
+                f"Input image size ({original_h}x{original_w}) exceeds the configured maximum size"
+                f" ({self.rbln_config.image_height}x{self.rbln_config.image_width})."
+            )
+        pad_h = self.rbln_config.image_height - original_h
+        pad_w = self.rbln_config.image_width - original_w
+        padded_pixel_values = F.pad(pixel_values, (0, pad_w, 0, pad_h))
+        output = self.model[0](padded_pixel_values)
+        feature_maps = ()
+        for i in range(len(self.config.out_features)):
+            feature_maps += (output.pop(0),)
+        if self.rbln_config.output_hidden_states:
+            hidden_states = ()
+            for i in range(len(self.config.stage_names)):
+                hidden_states += (output.pop(0),)
+        else:
+            hidden_states = None
+        if self.rbln_config.output_attentions:
+            attentions = ()
+            for i in range(len(self.config.depths)):
+                attentions += (output.pop(0),)
+        else:
+            attentions = None
+        if not return_dict:
+            return tuple(item for item in (feature_maps, hidden_states, attentions) if item is not None)
+        else:
+            return BackboneOutput(
+                feature_maps=feature_maps,
+                hidden_states=hidden_states,
+                attentions=attentions,
+            )

optimum/rbln/transformers/models/t5/configuration_t5.py CHANGED Viewed

@@ -32,3 +32,5 @@ class RBLNT5ForConditionalGenerationConfig(RBLNModelForSeq2SeqLMConfig):
     This configuration class stores the configuration parameters specific to
     RBLN-optimized T5 models for conditional text generation tasks.
     """
+    support_paged_attention = False

optimum/rbln/transformers/models/t5/modeling_t5.py CHANGED Viewed

@@ -68,7 +68,7 @@ class RBLNT5EncoderModel(RBLNTransformerEncoderForFeatureExtraction):
     output_class = BaseModelOutputWithPastAndCrossAttentions
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5EncoderModelConfig):
         return T5EncoderWrapper(model)
     @classmethod
@@ -113,7 +113,7 @@ class RBLNT5ForConditionalGeneration(RBLNModelForSeq2SeqLM):
     support_causal_attn = False
     @classmethod
-    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
+    def _wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: RBLNT5ForConditionalGenerationConfig):
         return T5Wrapper(
             model, enc_max_seq_len=rbln_config.enc_max_seq_len, dec_max_seq_len=rbln_config.dec_max_seq_len
         )

optimum/rbln/transformers/models/t5/t5_architecture.py CHANGED Viewed

@@ -126,7 +126,14 @@ class T5Decoder(Seq2SeqDecoder):
         b_size = attention_mask.shape[0]
         batch_decoder_position_bias = []
         for i in range(b_size):
-            batch_position_bias = self._dec_position_bias[:, :, cache_position[i][0]].unsqueeze(2)
+            if torch.compiler.is_exporting():
+                cache_pos = cache_position[i][0].item()
+                torch._check_is_size(cache_pos)
+                torch._check(cache_pos >= 0)
+                torch._check(cache_pos < self._dec_position_bias.shape[2])
+            else:
+                cache_pos = cache_position[i][0]
+            batch_position_bias = torch.select(self._dec_position_bias, dim=2, index=cache_pos).unsqueeze(2)
             batch_decoder_position_bias.append(batch_position_bias)
         position_bias = torch.cat(batch_decoder_position_bias, dim=0)

optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Dict, Optional
+from typing import Any, Optional
 from ....configuration_utils import RBLNModelConfig
@@ -17,7 +17,7 @@ class RBLNTimeSeriesTransformerForPredictionConfig(RBLNModelConfig):
         enc_max_seq_len: Optional[int] = None,
         dec_max_seq_len: Optional[int] = None,
         num_parallel_samples: Optional[int] = None,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ):
         """
         Args:
@@ -25,7 +25,7 @@ class RBLNTimeSeriesTransformerForPredictionConfig(RBLNModelConfig):
             enc_max_seq_len (Optional[int]): Maximum sequence length for the encoder.
             dec_max_seq_len (Optional[int]): Maximum sequence length for the decoder.
             num_parallel_samples (Optional[int]): Number of samples to generate in parallel during prediction.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.

optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py CHANGED Viewed

@@ -23,24 +23,20 @@
 import inspect
 import logging
-from dataclasses import dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, List, Optional, Union
 import rebel
 import torch
 from rebel.compile_context import CompileContext
-from transformers import (
-    PretrainedConfig,
-    TimeSeriesTransformerForPrediction,
-    TimeSeriesTransformerModel,
-)
-from transformers.modeling_outputs import ModelOutput, SampleTSPredictionOutput, Seq2SeqTSModelOutput
+from transformers import PretrainedConfig, TimeSeriesTransformerForPrediction, TimeSeriesTransformerModel
+from transformers.modeling_outputs import SampleTSPredictionOutput, Seq2SeqTSModelOutput
 from transformers.modeling_utils import no_init_weights
 from ....configuration_utils import RBLNCompileConfig
 from ....modeling import RBLNModel
 from ....utils.runtime_utils import RBLNPytorchRuntime
+from ...modeling_outputs import RBLNSeq2SeqTSDecoderOutput
 from .configuration_time_series_transformer import RBLNTimeSeriesTransformerForPredictionConfig
 from .time_series_transformers_architecture import TimeSeriesTransformersWrapper
@@ -113,12 +109,6 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
         )
-@dataclass
-class RBLNSeq2SeqTSDecoderOutput(ModelOutput):
-    last_hidden_states: torch.FloatTensor = None
-    params: Tuple[torch.FloatTensor] = None
 class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
     """
     The Time Series Transformer Model with a distribution head on top for time-series forecasting. e.g., for datasets like M4, NN5, or other time series forecasting benchmarks.
@@ -163,7 +153,7 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
             return redirect(val)
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         self, model: "PreTrainedModel", rbln_config: RBLNTimeSeriesTransformerForPredictionConfig
     ):
         return TimeSeriesTransformersWrapper(model, rbln_config.num_parallel_samples)
@@ -171,7 +161,7 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model, rbln_config: RBLNTimeSeriesTransformerForPredictionConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
         enc_compile_config = rbln_config.compile_cfgs[0]
         dec_compile_config = rbln_config.compile_cfgs[1]
@@ -331,12 +321,14 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
                 tensor_type="pt",
                 device=rbln_config.device_map["encoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["decoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
         ]
@@ -361,6 +353,20 @@ class RBLNTimeSeriesTransformerForPrediction(RBLNModel):
         static_real_features: Optional[torch.Tensor] = None,
         **kwargs,
     ) -> SampleTSPredictionOutput:
+        """
+        Generate pass for the RBLN-optimized Time Series Transformer model for time series forecasting.
+        Args:
+            past_values (torch.FloatTensor of shape (batch_size, sequence_length) or (batch_size, sequence_length, input_size)): Past values of the time series, that serve as context in order to predict the future.
+            past_time_features (torch.FloatTensor of shape (batch_size, sequence_length, num_features)): Required time features, which the model internally will add to past_values.
+            future_time_features (torch.FloatTensor of shape (batch_size, prediction_length, num_features)): Required time features for the prediction window, which the model internally will add to future_values.
+            past_observed_mask (torch.BoolTensor of shape (batch_size, sequence_length) or (batch_size, sequence_length, input_size), optional): Boolean mask to indicate which past_values were observed and which were missing.
+            static_categorical_features (torch.LongTensor of shape (batch_size, number of static categorical features), optional): Optional static categorical features for which the model will learn an embedding, which it will add to the values of the time series.
+            static_real_features (torch.FloatTensor of shape (batch_size, number of static real features), optional): Optional static real features which the model will add to the values of the time series.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a SampleTSPredictionOutput object.
+        """
         self.validate_batch_size(**{k: v for k, v in locals().items() if isinstance(v, torch.Tensor)})
         outputs = self.encoder(

optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py CHANGED Viewed

@@ -162,7 +162,13 @@ class TimeSeriesTransformersDecoder(nn.Module):
         attention_mask = _prepare_4d_causal_attention_mask(attention_mask, input_shape, inputs_embeds, cache_position)
         hidden_states = self.value_embedding(inputs_embeds)
-        embed_pos = self.embed_positions.weight[cache_position + self.config.context_length]
+        embed_idx = cache_position + self.config.context_length
+        if torch.compiler.is_exporting():
+            embed_idx = embed_idx.item()
+            torch._check_is_size(embed_idx)
+            torch._check(embed_idx >= 0)
+            torch._check(embed_idx < len(self.embed_positions.weight))
+        embed_pos = self.embed_positions.weight[embed_idx]
         hidden_states = self.layernorm_embedding(hidden_states + embed_pos)
         # iterate decoder_layer

optimum/rbln/transformers/models/vit/modeling_vit.py CHANGED Viewed

@@ -12,6 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Tuple, Union
+import torch
+from transformers.modeling_outputs import ImageClassifierOutput
 from ...modeling_generic import RBLNModelForImageClassification
@@ -23,3 +28,17 @@ class RBLNViTForImageClassification(RBLNModelForImageClassification):
     on RBLN devices, supporting image classification with transformer-based architectures
     that process images as sequences of patches.
     """
+    def forward(self, pixel_values: torch.Tensor, **kwargs) -> Union[ImageClassifierOutput, Tuple]:
+        """
+        Forward pass for the RBLN-optimized Vision Transformer model for image classification.
+        Args:
+            pixel_values (torch.FloatTensor of shape (batch_size, channels, height, width)):
+                The tensors corresponding to the input images.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns an ImageClassifierOutput object.
+        """
+        return super().forward(pixel_values, **kwargs)

optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py CHANGED Viewed

@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ...configuration_generic import RBLNModelForMaskedLMConfig
+from typing import Any, Optional
+from ....configuration_utils import RBLNModelConfig
-class RBLNWav2Vec2ForCTCConfig(RBLNModelForMaskedLMConfig):
+class RBLNWav2Vec2ForCTCConfig(RBLNModelConfig):
     """
     Configuration class for RBLNWav2Vec2ForCTC.
@@ -23,4 +25,14 @@ class RBLNWav2Vec2ForCTCConfig(RBLNModelForMaskedLMConfig):
     RBLN-optimized Wav2Vec2 models for Connectionist Temporal Classification (CTC) tasks.
     """
-    rbln_model_input_names = ["input_values"]
+    def __init__(
+        self,
+        max_seq_len: Optional[int] = None,
+        batch_size: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+        self.max_seq_len = max_seq_len
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")

optimum-rbln 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl