PyPI - optimum-rbln - Versions diffs - 0.9.3rc0__py3-none-any.whl → 0.9.5a4__py3-none-any.whl - Mend

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.5a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (157) hide show

optimum/rbln/transformers/models/phi/phi_architecture.py CHANGED Viewed

@@ -20,7 +20,6 @@ from transformers import PhiForCausalLM
 from ..decoderonly.decoderonly_architecture import (
     DecoderOnlyAttention,
     DecoderOnlyLayer,
-    DecoderOnlyModel,
     DecoderOnlyWrapper,
     apply_rotary_pos_emb_partial,
 )
@@ -37,9 +36,6 @@ class PhiWrapper(DecoderOnlyWrapper):
     def get_rbln_layer_class(self):
         return PhiLayer
-    def get_rbln_model_class(self):
-        return PhiModel
     def get_model_layer(self, model: Union["PhiForCausalLM", "PhiModel"]):
         return model.model if self.is_causal_lm else model
@@ -48,13 +44,15 @@ class PhiWrapper(DecoderOnlyWrapper):
 class PhiAttention(DecoderOnlyAttention):
-    def __post_init__(self):
-        self.q_proj = self._original_mod.q_proj
-        self.k_proj = self._original_mod.k_proj
-        self.v_proj = self._original_mod.v_proj
-        self.o_proj = self._original_mod.dense
-        self.qk_layernorm = self._original_mod.qk_layernorm
-        self.rotary_ndims = self._original_mod.rotary_ndims
+    def __post_init__(self, self_attn):
+        self.q_proj = self_attn.q_proj
+        self.k_proj = self_attn.k_proj
+        self.v_proj = self_attn.v_proj
+        self.o_proj = self_attn.dense
+        self.qk_layernorm = self_attn.qk_layernorm
+        self.rotary_ndims = self_attn.rotary_ndims
+        self.q_layernorm = getattr(self_attn, "q_layernorm", None)
+        self.k_layernorm = getattr(self_attn, "k_layernorm", None)
     def projection(self, hidden_states, lora_int_id) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         if lora_int_id is not None:
@@ -65,8 +63,8 @@ class PhiAttention(DecoderOnlyAttention):
         value_states = self.v_proj(hidden_states)
         if self.qk_layernorm:
-            query_states = self._original_mod.q_layernorm(query_states)
-            key_states = self._original_mod.k_layernorm(key_states)
+            query_states = self.q_layernorm(query_states)
+            key_states = self.k_layernorm(key_states)
         return query_states, key_states, value_states
@@ -75,8 +73,7 @@ class PhiAttention(DecoderOnlyAttention):
 class PhiLayer(DecoderOnlyLayer):
-    def get_post_attention_layernorm(self):
-        raise NotImplementedError
+    _POST_ATTN_LAYERNORM = None
     def forward(
         self,
@@ -103,13 +100,8 @@ class PhiLayer(DecoderOnlyLayer):
             block_tables=block_tables,
         )
-        feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
+        feed_forward_hidden_states = self.mlp(hidden_states)
         hidden_states = attn_output + feed_forward_hidden_states + residual
         return hidden_states
-class PhiModel(DecoderOnlyModel):
-    def get_last_layernorm(self):
-        return self._original_mod.final_layernorm

optimum/rbln/transformers/models/pixtral/modeling_pixtral.py CHANGED Viewed

@@ -229,7 +229,7 @@ class RBLNPixtralVisionModel(RBLNModel):
         torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
     @classmethod
-    def wrap_model_if_needed(
+    def _wrap_model_if_needed(
         cls, model: torch.nn.Module, rbln_config: RBLNPixtralVisionModelConfig
     ) -> torch.nn.Module:
         wrapper_cfg = {
@@ -293,6 +293,18 @@ class RBLNPixtralVisionModel(RBLNModel):
         return_dict: bool = True,
         **kwargs,
     ) -> Union[Tuple, BaseModelOutput]:
+        """
+        Forward pass for the RBLN-optimized Pixtral vision model.
+        Args:
+            pixel_values (torch.Tensor of shape (batch_size, num_channels, image_size, image_size)) — The tensors corresponding to the input images. Pixel values can be obtained using PixtralImageProcessor. See PixtralImageProcessor.call() for details (PixtralProcessor uses PixtralImageProcessor for processing images).
+            image_sizes (torch.Tensor of shape (batch_size, 2), optional) — The sizes of the images in the batch, being (height, width) for each image.
+            output_hidden_states (bool, optional) — Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
+            return_dict (bool, optional) — Whether or not to return a ModelOutput instead of a plain tuple.
+        Returns:
+            BaseModelOutput or tuple(torch.FloatTensor)
+        """
         output_hidden_states = (
             output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
         )

optimum/rbln/transformers/models/pixtral/pixtral_architecture.py CHANGED Viewed

@@ -24,8 +24,8 @@ class PixtralAttention(nn.Module):
     def __init__(self, self_attention):
         super().__init__()
         self.original_model = self_attention
-        self.num_heads = getattr(self.original_model, "num_heads", None) or getattr(
-            self.original_model.config, "num_attention_heads"
+        self.num_heads = (
+            getattr(self.original_model, "num_heads", None) or self.original_model.config.num_attention_heads
         )
         self.head_dim = self.original_model.head_dim
         self.scaling = self.head_dim**-0.5

optimum/rbln/transformers/models/qwen2/modeling_qwen2.py CHANGED Viewed

@@ -12,13 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from transformers import PretrainedConfig
 from ....utils import logging
 from ...models.decoderonly import (
     RBLNDecoderOnlyModel,
     RBLNDecoderOnlyModelForCausalLM,
-    RBLNDecoderOnlyModelForCausalLMConfig,
 )
 from .qwen2_architecture import QWEN2Wrapper
@@ -87,19 +85,6 @@ class RBLNQwen2ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
     _decoder_wrapper_cls = QWEN2Wrapper
-    @classmethod
-    def _update_sliding_window_config(
-        cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
-    ):
-        # https://github.com/huggingface/transformers/issues/35896
-        # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
-        # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
-        rbln_config.cache_impl = "sliding_window"
-        rbln_config.sliding_window = model_config.sliding_window
-        rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
-        return rbln_config
 class RBLNQwen2Model(RBLNDecoderOnlyModel):
     """
@@ -108,16 +93,3 @@ class RBLNQwen2Model(RBLNDecoderOnlyModel):
     """
     _decoder_wrapper_cls = QWEN2Wrapper
-    @classmethod
-    def _update_sliding_window_config(
-        cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
-    ):
-        # https://github.com/huggingface/transformers/issues/35896
-        # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
-        # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
-        rbln_config.cache_impl = "sliding_window"
-        rbln_config.sliding_window = model_config.sliding_window
-        rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
-        return rbln_config

optimum/rbln/transformers/models/qwen2_5_vl/__init__.py CHANGED Viewed

@@ -15,5 +15,10 @@
 from .configuration_qwen2_5_vl import (
     RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
     RBLNQwen2_5_VLForConditionalGenerationConfig,
+    RBLNQwen2_5_VLModelConfig,
+)
+from .modeling_qwen2_5_vl import (
+    RBLNQwen2_5_VisionTransformerPretrainedModel,
+    RBLNQwen2_5_VLForConditionalGeneration,
+    RBLNQwen2_5_VLModel,
 )
-from .modeling_qwen2_5_vl import RBLNQwen2_5_VisionTransformerPretrainedModel, RBLNQwen2_5_VLForConditionalGeneration

optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from typing import Any, List, Optional, Union
 from ....configuration_utils import RBLNModelConfig
-from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
+from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
 class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
@@ -56,6 +56,16 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
         self.visual = visual
+class RBLNQwen2_5_VLModelConfig(RBLNDecoderOnlyModelConfig):
+    """
+    Configuration class for RBLNQwen2_5_VLModel.
+    """
+    def __init__(self, visual: Optional[RBLNModelConfig] = None, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.visual = self.initialize_submodule_config(submodule_config=visual)
 class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
     """
     Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.

optimum-rbln 0.9.3rc0__py3-none-any.whl → 0.9.5a4__py3-none-any.whl

optimum-rbln 0.9.3rc0py3-none-any.whl → 0.9.5a4py3-none-any.whl