PyPI - optimum-rbln - Versions diffs - 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl - Mend

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py CHANGED Viewed

@@ -20,7 +20,9 @@ import rebel
 import torch
 from rebel.compile_context import CompileContext
 from transformers import AutoModelForSeq2SeqLM, PretrainedConfig, PreTrainedModel
-from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
+from transformers.generation.configuration_utils import GenerationConfig
+from transformers.generation.utils import GenerationMixin
+from transformers.modeling_outputs import BaseModelOutput, ModelOutput, Seq2SeqLMOutput
 from ....configuration_utils import RBLNCompileConfig
 from ....modeling import RBLNModel
@@ -32,13 +34,13 @@ from .configuration_seq2seq import RBLNModelForSeq2SeqLMConfig
 logger = get_logger(__name__)
 if TYPE_CHECKING:
-    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, GenerationConfig, PretrainedConfig
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
 class RBLNRuntimeEncoder(RBLNPytorchRuntime):
     mandatory_members = ["main_input_name"]
-    def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
+    def forward(self, *args: List[torch.Tensor], **kwargs: torch.Tensor):
         output = super().forward(*args, **kwargs)
         return BaseModelOutput(last_hidden_state=output)
@@ -83,7 +85,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
                 decoding_step = cache_position[b_idx].item()
                 if not (0 <= decoding_step < self.dec_max_seq_len):
                     raise ValueError(
-                        f"Decoding step {decoding_step} out of bounds for attention mask with shape {self.dec_attn_mask.shape}."
+                        f"Decoding step {decoding_step} out of bounds for decoder_max_seq_len ({self.dec_max_seq_len})."
                     )
                 decoder_attention_mask[b_idx, : decoding_step + 1] = 1
@@ -101,7 +103,7 @@ class RBLNRuntimeDecoder(RBLNPytorchRuntime):
         return Seq2SeqLMOutput(logits=lm_logits)
-class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
+class RBLNModelForSeq2SeqLM(RBLNModel, GenerationMixin, ABC):
     """
     This is a generic model class that will be instantiated as one of the model classes of the library (with a sequence-to-sequence language modeling head) when created with the from_pretrained() class method.
     This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
@@ -117,6 +119,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
     main_input_name = "input_ids"
     auto_model_class = AutoModelForSeq2SeqLM
     support_causal_attn = None
+    _is_stateful = False
     def __post_init__(self, **kwargs):
         batch_size = self.rbln_config.batch_size
@@ -138,7 +141,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
     @classmethod
     @torch.inference_mode()
     def get_compiled_model(cls, model: PreTrainedModel, rbln_config: RBLNModelForSeq2SeqLMConfig):
-        wrapped_model = cls.wrap_model_if_needed(model, rbln_config)
+        wrapped_model = cls._wrap_model_if_needed(model, rbln_config)
         enc_compile_config = rbln_config.compile_cfgs[0]
         dec_compile_config = rbln_config.compile_cfgs[1]
@@ -181,6 +184,21 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         return {"encoder": compiled_encoder, "decoder": compiled_decoder}
+    @classmethod
+    def _update_paged_attention_config(cls, model_config: PretrainedConfig, rbln_config: RBLNModelForSeq2SeqLMConfig):
+        rbln_config.kvcache_num_blocks = rbln_config.kvcache_num_blocks or rbln_config.batch_size
+        rbln_config.kvcache_block_size = rbln_config.kvcache_block_size or rbln_config.dec_max_seq_len
+        if rbln_config.kvcache_num_blocks != rbln_config.batch_size:
+            raise NotImplementedError(
+                f"kvcache_num_blocks ({rbln_config.kvcache_num_blocks}) must be equal to batch_size ({rbln_config.batch_size}) as flash attention is not supported yet."
+            )
+        if rbln_config.kvcache_block_size != rbln_config.dec_max_seq_len:
+            raise NotImplementedError(
+                f"kvcache_block_size ({rbln_config.kvcache_block_size}) must be equal to dec_max_seq_len ({rbln_config.dec_max_seq_len}) as flash attention is not supported yet."
+            )
     @classmethod
     def _update_rbln_config(
         cls,
@@ -204,12 +222,6 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
             model_config, "max_position_embeddings", None
         )
-        pad_token_id = getattr(model_config, "pad_token_id", None)
-        pad_token_id = pad_token_id or getattr(model_config, "bos_token_id", None)
-        pad_token_id = pad_token_id or getattr(model_config, "eos_token_id", None)
-        pad_token_id = pad_token_id or -1
-        rbln_config.pad_token_id = pad_token_id
         if rbln_config.enc_max_seq_len is None:
             enc_max_seq_len = max_position_embeddings
             for tokenizer in preprocessors:
@@ -238,6 +250,9 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         if max_position_embeddings is not None and rbln_config.dec_max_seq_len > max_position_embeddings:
             raise ValueError("`dec_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_config.support_paged_attention:
+            cls._update_paged_attention_config(model_config, rbln_config)
         # model input info
         enc_input_info = [
             ("input_ids", [1, rbln_config.enc_max_seq_len], "int64"),
@@ -310,6 +325,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         dec_compile_config = RBLNCompileConfig(compiled_model_name="decoder", input_info=dec_input_info)
         rbln_config.set_compile_cfgs([enc_compile_config, dec_compile_config])
         return rbln_config
     @classmethod
@@ -327,12 +343,14 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
                 tensor_type="pt",
                 device=rbln_config.device_map["encoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
             rebel.Runtime(
                 compiled_models[1],
                 tensor_type="pt",
                 device=rbln_config.device_map["decoder"],
                 activate_profiler=rbln_config.activate_profiler,
+                timeout=rbln_config.timeout,
             ),
         ]
@@ -409,7 +427,7 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
         inputs_tensor = torch.nn.functional.pad(
             inputs_tensor,
             (0, self.rbln_config.enc_max_seq_len - input_len),
-            value=self.rbln_config.pad_token_id,
+            value=self.config.pad_token_id,
         )
         model_kwargs["attention_mask"] = torch.nn.functional.pad(
             model_kwargs["attention_mask"], (0, self.rbln_config.enc_max_seq_len - input_len)
@@ -428,3 +446,32 @@ class RBLNModelForSeq2SeqLM(RBLNModel, ABC):
             model_kwargs["encoder_outputs"] = encoder(**encoder_kwargs, block_tables=block_tables)
         return model_kwargs
+    def generate(
+        self,
+        input_ids: torch.LongTensor,
+        attention_mask: Optional[torch.LongTensor] = None,
+        generation_config: Optional[GenerationConfig] = None,
+        **kwargs,
+    ) -> Union[ModelOutput, torch.LongTensor]:
+        """
+        The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
+        Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/main_classes/text_generation#transformers.GenerationMixin.generate) for more details.
+        Args:
+            input_ids (torch.LongTensor): The input ids to the model.
+            attention_mask (torch.LongTensor, optional): The attention mask to the model.
+            generation_config (GenerationConfig, optional): The generation configuration to be used as base parametrization for the generation call. **kwargs passed to generate matching the attributes of generation_config will override them.
+                If generation_config is not provided, the default will be used, which had the following loading priority: 1) from the generation_config.json model file, if it exists; 2) from the model configuration.
+                Please note that unspecified parameters will inherit [GenerationConfig](https://huggingface.co/docs/transformers/v4.57.1/en/main_classes/text_generation#transformers.GenerationConfig)’s default values.
+            kwargs (dict[str, Any], optional): Additional arguments passed to the generate function. See the HuggingFace transformers documentation for more details.
+        Returns:
+            Generates sequences of token ids for models with a language modeling head.
+        """
+        if generation_config is not None:
+            kwargs["generation_config"] = generation_config
+        if attention_mask is not None:
+            kwargs["attention_mask"] = attention_mask
+        return super().generate(input_ids, **kwargs)

optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py CHANGED Viewed

@@ -31,7 +31,7 @@ class Seq2SeqWrapper:
     Args:
         model (nn.Module): The Seq2Seq model to wrap.
         enc_max_seq_len (int): Maximum sequence length for the encoder's position embeddings and cache sizes.
-        **kwargs: Additional arguments to pass to the decoder wrapper.
+        kwargs: Additional arguments to pass to the decoder wrapper.
     """
     def __init__(self, model: nn.Module, enc_max_seq_len: int, **kwargs):
@@ -125,7 +125,7 @@ class Seq2SeqDecoderWrapper(nn.Module):
     Args:
         model (nn.Module): The Seq2Seq model containing the decoder.
-        **kwargs: Additional arguments for decoder configuration.
+        kwargs: Additional arguments for decoder configuration.
     """
     def __init__(self, model: nn.Module, use_attention_mask: bool = True, **kwargs):

optimum/rbln/transformers/models/siglip/__init__.py CHANGED Viewed

@@ -12,9 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .configuration_siglip import (
-    RBLNSiglipVisionModelConfig,
-)
-from .modeling_siglip import (
-    RBLNSiglipVisionModel,
-)
+from .configuration_siglip import RBLNSiglipVisionModelConfig
+from .modeling_siglip import RBLNSiglipVisionModel

optimum/rbln/transformers/models/siglip/configuration_siglip.py CHANGED Viewed

@@ -42,7 +42,7 @@ class RBLNSiglipVisionModelConfig(RBLNModelConfig):
             interpolate_pos_encoding (Optional[bool]): Whether to interpolate the position encoding.
             output_hidden_states: (Optional[bool]): Whether to return hidden states.
             output_attentions: (Optional[bool]): Whether to return attentions.
-            **kwargs: Additional arguments passed to the parent RBLNModelConfig.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
         Raises:
             ValueError: If batch_size is not a positive integer.

optimum/rbln/transformers/models/siglip/modeling_siglip.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Optional, Tuple, Union
 import torch
 from transformers import SiglipVisionConfig, SiglipVisionModel
@@ -29,8 +29,6 @@ logger = get_logger(__name__)
 if TYPE_CHECKING:
     from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
-    from ....diffusers.modeling_diffusers import RBLNDiffusionMixin, RBLNDiffusionMixinConfig
 class _SiglipVisionModel(torch.nn.Module):
     def __init__(
@@ -65,8 +63,12 @@ class RBLNSiglipVisionModel(RBLNModel):
     on RBLN devices, supporting image encoding for multimodal vision-language tasks.
     """
+    _tp_support = False
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig) -> torch.nn.Module:
+    def _wrap_model_if_needed(
+        cls, model: torch.nn.Module, rbln_config: RBLNSiglipVisionModelConfig
+    ) -> torch.nn.Module:
         wrapper_cfg = {
             "interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
             "output_hidden_states": rbln_config.output_hidden_states,
@@ -74,12 +76,6 @@ class RBLNSiglipVisionModel(RBLNModel):
         }
         return _SiglipVisionModel(model, **wrapper_cfg).eval()
-    @classmethod
-    def update_rbln_config_using_pipe(
-        cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig", submodule_name: str
-    ) -> "RBLNDiffusionMixinConfig":
-        return rbln_config
     @classmethod
     def _update_rbln_config(
         cls,
@@ -126,12 +122,21 @@ class RBLNSiglipVisionModel(RBLNModel):
         output_attentions: bool = None,
         output_hidden_states: bool = None,
         interpolate_pos_encoding: bool = False,
-        **kwargs: Dict[str, Any],
+        **kwargs: Any,
     ) -> Union[Tuple, BaseModelOutputWithPooling]:
-        if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
-            logger.warning(
-                f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
-            )
+        """
+        Forward pass for the RBLN-optimized SigLIP vision model.
+        Args:
+            pixel_values (torch.FloatTensor of shape (batch_size, num_channels, image_size, image_size), optional): The tensors corresponding to the input images. Pixel values can be obtained using ViTImageProcessor. See ViTImageProcessor.call() for details (processor_class uses ViTImageProcessor for processing images).
+            return_dict (bool, optional): Whether or not to return a ModelOutput instead of a plain tuple.
+            output_attentions (bool, optional): Whether or not to return the attentions tensors of all attention layers. See attentions under returned tensors for more detail.
+            output_hidden_states (bool, optional): Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
+            interpolate_pos_encoding (bool, defaults to False): Whether to interpolate the pre-trained position encodings.
+        Returns:
+            The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
+        """
         output_attentions = output_attentions if output_attentions is not None else self.rbln_config.output_attentions
         output_hidden_states = (
@@ -156,7 +161,7 @@ class RBLNSiglipVisionModel(RBLNModel):
                 f"Please compile again with the correct argument."
             )
-        output = super().forward(pixel_values, return_dict=return_dict)
+        output = super().forward(pixel_values, return_dict=return_dict, **kwargs)
         return output
     def _prepare_output(self, output, return_dict):

optimum/rbln/transformers/models/swin/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .configuration_swin import RBLNSwinBackboneConfig
+from .modeling_swin import RBLNSwinBackbone

optimum/rbln/transformers/models/swin/configuration_swin.py ADDED Viewed

@@ -0,0 +1,42 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple, Union
+from ...configuration_generic import RBLNModelForImageClassificationConfig
+class RBLNSwinBackboneConfig(RBLNModelForImageClassificationConfig):
+    def __init__(
+        self,
+        image_size: Optional[Union[int, Tuple[int, int]]] = None,
+        batch_size: Optional[int] = None,
+        output_hidden_states: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.image_size = image_size
+        self.output_hidden_states = output_hidden_states
+        self.output_attentions = output_attentions

optimum-rbln 0.8.2a0__py3-none-any.whl → 0.9.3__py3-none-any.whl

optimum-rbln 0.8.2a0py3-none-any.whl → 0.9.3py3-none-any.whl