PyPI - optimum-rbln - Versions diffs - 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

optimum-rbln 0.1.9py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

optimum/rbln/transformers/models/bart/bart_architecture.py CHANGED Viewed

@@ -47,6 +47,12 @@ from transformers.utils import logging
 logger = logging.get_logger(__name__)
+class BartWrapper:
+    def __init__(self, model):
+        self.encoder = BartEncoderWrapper(model)
+        self.decoder = BartDecoderWrapper(model)
 class _BartAttention(BartAttention):
     def forward(
         self,
@@ -54,6 +60,7 @@ class _BartAttention(BartAttention):
         past_key_value: Tuple[torch.Tensor],
         attention_mask: torch.Tensor,
         cache_position: torch.Tensor,
+        batch_index: torch.Tensor,
         key_value_states: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
         bsz, tgt_len, _ = hidden_states.size()
@@ -72,28 +79,83 @@ class _BartAttention(BartAttention):
         else:
             key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
             value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
-            key_states = past_key_value[0].slice_scatter(
-                key_states, dim=2, start=cache_position, end=cache_position + 1
-            )
-            value_states = past_key_value[1].slice_scatter(
-                value_states, dim=2, start=cache_position, end=cache_position + 1
-            )
-        proj_shape = (bsz * self.num_heads, -1, self.head_dim)
-        query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
-        key_states = key_states.reshape(*proj_shape)
-        value_states = value_states.reshape(*proj_shape)
-        src_len = key_states.size(1)
-        attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
-        attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
-        attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
-        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
-        attn_output = torch.bmm(attn_weights, value_states)
-        attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
-        attn_output = attn_output.transpose(1, 2)
-        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+        if cache_position.dim() > 0:
+            proj_shape = (bsz, self.num_heads, -1, self.head_dim)
+            query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+            key_states = key_states.reshape(*proj_shape)
+            value_states = value_states.reshape(*proj_shape)
+            all_key_states = []
+            all_value_states = []
+            all_attn_output = []
+            for b in range(bsz):
+                batch_query_states = query_states[b].unsqueeze(0).unsqueeze(2)
+                batch_attention_mask = attention_mask[b].unsqueeze(0).unsqueeze(2)
+                batch_key_states = key_states[b].unsqueeze(0).unsqueeze(2)
+                batch_value_states = value_states[b].unsqueeze(0).unsqueeze(2)
+                if not is_cross_attention:
+                    batch_key_states = (
+                        past_key_value[0][b]
+                        .unsqueeze(0)
+                        .unsqueeze(2)
+                        .slice_scatter(
+                            batch_key_states, dim=-2, start=cache_position[b][0], end=cache_position[b][0] + 1
+                        )
+                    )
+                    batch_value_states = (
+                        past_key_value[1][b]
+                        .unsqueeze(0)
+                        .unsqueeze(2)
+                        .slice_scatter(
+                            batch_value_states, dim=-2, start=cache_position[b][0], end=cache_position[b][0] + 1
+                        )
+                    )
+                attn_weights = torch.matmul(batch_query_states, batch_key_states.transpose(3, 4))
+                attn_weights = attn_weights + batch_attention_mask
+                attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+                attn_output = torch.matmul(attn_weights, batch_value_states)
+                attn_output = attn_output.view(1, self.num_heads, tgt_len, self.head_dim)
+                attn_output = attn_output.transpose(1, 2)
+                attn_output = attn_output.reshape(1, tgt_len, self.embed_dim)
+                all_key_states.append(batch_key_states)
+                all_value_states.append(batch_value_states)
+                all_attn_output.append(attn_output)
+            key_states = torch.cat(all_key_states, dim=0).squeeze(2)
+            value_states = torch.cat(all_value_states, dim=0).squeeze(2)
+            attn_output = torch.cat(all_attn_output, dim=0)
+        else:
+            if batch_index is None or batch_index == -1:
+                batch_index = 0
+            if not is_cross_attention:
+                key_states = past_key_value[0].slice_scatter(
+                    key_states, dim=2, start=cache_position, end=cache_position + 1
+                )
+                value_states = past_key_value[1].slice_scatter(
+                    value_states, dim=2, start=cache_position, end=cache_position + 1
+                )
+            proj_shape = (bsz * self.num_heads, -1, self.head_dim)
+            query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+            key_states = key_states.reshape(*proj_shape)
+            value_states = value_states.reshape(*proj_shape)
+            src_len = key_states.size(1)
+            attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+            attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+            attn_output = torch.bmm(attn_weights, value_states)
+            attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
+            attn_output = attn_output.transpose(1, 2)
+            key_states = key_states.unsqueeze(0)
+            value_states = value_states.unsqueeze(0)
+            attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
         attn_output = self.out_proj(attn_output)
         present_key_value = (key_states, value_states)
@@ -108,6 +170,7 @@ class _BartSdpaAttention(BartSdpaAttention):
         past_key_value: Tuple[torch.Tensor],
         attention_mask: torch.Tensor,
         cache_position: torch.Tensor,
+        batch_index: torch.Tensor,
         key_value_states: Optional[torch.Tensor] = None,
     ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
         bsz, tgt_len, _ = hidden_states.size()
@@ -126,23 +189,71 @@ class _BartSdpaAttention(BartSdpaAttention):
         else:
             key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
             value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
-            key_states = past_key_value[0].slice_scatter(
-                key_states, dim=2, start=cache_position, end=cache_position + 1
-            )
-            value_states = past_key_value[1].slice_scatter(
-                value_states, dim=2, start=cache_position, end=cache_position + 1
-            )
         query_states = self._shape(query_states, tgt_len, bsz)
-        attn_output = torch.nn.functional.scaled_dot_product_attention(
-            query_states,
-            key_states,
-            value_states,
-            attn_mask=attention_mask,
-        )
-        attn_output = attn_output.transpose(1, 2)
-        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+        if (batch_index is None or batch_index == -1) and bsz > 1:
+            all_key_states = []
+            all_value_states = []
+            all_attn_output = []
+            for b in range(bsz):
+                batch_query_states = query_states[b].unsqueeze(0)
+                batch_attention_mask = attention_mask[b].unsqueeze(0)
+                batch_key_states = key_states[b].unsqueeze(0)
+                batch_value_states = value_states[b].unsqueeze(0)
+                if not is_cross_attention:
+                    batch_key_states = (
+                        past_key_value[0][b]
+                        .unsqueeze(0)
+                        .slice_scatter(
+                            batch_key_states, dim=-2, start=cache_position[b][0], end=cache_position[b][0] + 1
+                        )
+                    )
+                    batch_value_states = (
+                        past_key_value[1][b]
+                        .unsqueeze(0)
+                        .slice_scatter(
+                            batch_value_states, dim=-2, start=cache_position[b][0], end=cache_position[b][0] + 1
+                        )
+                    )
+                attn_output = torch.nn.functional.scaled_dot_product_attention(
+                    batch_query_states, batch_key_states, batch_value_states, attn_mask=batch_attention_mask
+                )
+                attn_output = attn_output.transpose(1, 2)
+                attn_output = attn_output.reshape(1, tgt_len, self.embed_dim)
+                all_key_states.append(batch_key_states)
+                all_value_states.append(batch_value_states)
+                all_attn_output.append(attn_output)
+            key_states = torch.cat(all_key_states, dim=0)
+            value_states = torch.cat(all_value_states, dim=0)
+            attn_output = torch.cat(all_attn_output, dim=0)
+        else:
+            if batch_index is None or batch_index == -1:
+                batch_index = 0
+            if not is_cross_attention:
+                key_states = past_key_value[0].slice_scatter(
+                    key_states, dim=2, start=cache_position, end=cache_position + 1
+                )
+                value_states = past_key_value[1].slice_scatter(
+                    value_states, dim=2, start=cache_position, end=cache_position + 1
+                )
+            # need 4d shape (input tensors) for scaled_dot_product_attention
+            attn_output = torch.nn.functional.scaled_dot_product_attention(
+                query_states,
+                key_states,
+                value_states,
+                attn_mask=attention_mask,
+            )
+            attn_output = attn_output.transpose(1, 2)
+            attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
         attn_output = self.out_proj(attn_output)
         present_key_value = (key_states, value_states)
@@ -162,6 +273,7 @@ class _BartDecoderLayer(BartDecoderLayer):
         encoder_hidden_states: torch.Tensor,
         past_key_value: Tuple[torch.Tensor],
         cache_position: torch.Tensor,
+        batch_ids: torch.Tensor,
         attn_impl: str = "eager",
     ) -> Tuple[torch.Tensor, Tuple[torch.Tensor]]:
         # Self Attention Block
@@ -174,6 +286,7 @@ class _BartDecoderLayer(BartDecoderLayer):
             past_key_value=self_attn_past_key_value,
             attention_mask=attention_mask,
             cache_position=cache_position,
+            batch_index=batch_ids,
         )
         hidden_states = residual + hidden_states
         hidden_states = self.self_attn_layer_norm(hidden_states)
@@ -189,6 +302,7 @@ class _BartDecoderLayer(BartDecoderLayer):
             past_key_value=cross_attn_past_key_value,
             attention_mask=encoder_attention_mask,
             cache_position=cache_position,
+            batch_index=batch_ids,
         )
         hidden_states = residual + hidden_states
         hidden_states = self.encoder_attn_layer_norm(hidden_states)
@@ -213,14 +327,32 @@ class _BartDecoder(BartDecoder):
         encoder_hidden_states: torch.Tensor,
         past_key_values: torch.Tensor,
         cache_position: torch.Tensor,
+        batch_ids: torch.Tensor,
         attn_impl: str = "eager",
     ):
         # embedding
-        positions_idx = cache_position + self.embed_positions.offset
-        positions = self.embed_positions.weight[positions_idx]
+        if hasattr(self, "embed_scale"):
+            inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+        else:
+            inputs_embeds = self.embed_tokens(input_ids)
+        if cache_position.dim() == 0:
+            positions_idx = cache_position + self.embed_positions.offset
+            positions = self.embed_positions.weight[positions_idx]
+            hidden_states = inputs_embeds + positions
+        else:
+            hidden_all = []
+            # compiler pattern base dependency -> take + add
+            for i in range(input_ids.shape[0]):
+                # cache position [N,1]
+                positions_idx = cache_position[i]
+                # offset is set 2 in bart embedding
+                position_weight = self.embed_positions.weight[2:]
+                position = position_weight[positions_idx]
+                batch_hidden = position + inputs_embeds[i]
+                hidden_all.append(batch_hidden)
+            hidden_states = torch.stack(hidden_all, dim=0)
-        inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
-        hidden_states = inputs_embeds + positions
         hidden_states = self.layernorm_embedding(hidden_states)
         # prepare attn_mask
@@ -230,14 +362,14 @@ class _BartDecoder(BartDecoder):
                 attention_mask, input_shape, inputs_embeds, cache_position
             )
             encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa(
-                encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+                encoder_attention_mask, torch.float32, tgt_len=input_shape[-1]
             )
         else:
             attention_mask = _prepare_4d_causal_attention_mask(
                 attention_mask, input_shape, inputs_embeds, cache_position
             )
             encoder_attention_mask = _prepare_4d_attention_mask(
-                encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+                encoder_attention_mask, torch.float32, tgt_len=input_shape[-1]
             )
         # iterate decoder_layer
@@ -252,6 +384,7 @@ class _BartDecoder(BartDecoder):
                 encoder_attention_mask=encoder_attention_mask,
                 past_key_value=past_key_value,
                 cache_position=cache_position,
+                batch_ids=batch_ids,
                 attn_impl=attn_impl,
             )
             hidden_states = layer_outputs[0]
@@ -277,9 +410,14 @@ class BartDecoderWrapper(torch.nn.Module):
         attention_mask: torch.Tensor,
         encoder_attention_mask: torch.Tensor,
         cache_position: torch.Tensor,
+        batch_position: torch.Tensor,
         self_kv_cache: torch.Tensor,
         cross_kv_cache: torch.Tensor,
     ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor]]:
+        if input_ids.shape[1] == 1:
+            rbln_batch_position = None
+        else:
+            rbln_batch_position = batch_position
         # prepare past_key_values
         kv_cache = ()
         for i in range(0, self.num_layers * 2, 2):
@@ -291,7 +429,6 @@ class BartDecoderWrapper(torch.nn.Module):
                     cross_kv_cache[i + 1],
                 ),
             )
         # decode
         decoder_outputs = _BartDecoder.forward(
             self.decoder,
@@ -302,6 +439,7 @@ class BartDecoderWrapper(torch.nn.Module):
             past_key_values=kv_cache,
             encoder_hidden_states=torch.tensor([1]),
             attn_impl=self.config._attn_implementation,
+            batch_ids=rbln_batch_position,
         )
         sequence_output = decoder_outputs[0]
         lm_logits = self.lm_head(sequence_output)
@@ -314,7 +452,8 @@ class BartDecoderWrapper(torch.nn.Module):
             self_kv_cache.append(past_key_values[i][1])
         self_kv_cache = torch.stack(self_kv_cache, dim=0)
-        return lm_logits, self_kv_cache
+        # return batch_position to keep it as a variable within the graph
+        return lm_logits, self_kv_cache, batch_position
 class BartEncoderWrapper(torch.nn.Module):
@@ -330,10 +469,13 @@ class BartEncoderWrapper(torch.nn.Module):
         self.num_heads = self.config.decoder_attention_heads
         self.d_kv = self.config.d_model // self.num_heads
-    def forward(self, input_ids: torch.LongTensor, attention_mask: torch.LongTensor) -> Tuple[torch.Tensor]:
-        encoder_batch_size = input_ids.shape[0]
-        decoder_batch_size = encoder_batch_size  # TODO(taehoon) fix to enable beam-search
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        attention_mask: torch.LongTensor,
+        cross_key_value: torch.Tensor = None,
+        batch_idx: torch.Tensor = None,
+    ) -> Tuple[torch.Tensor]:
         # 1. run encoder
         encoder_outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
         last_hidden_states = encoder_outputs[0]
@@ -341,32 +483,35 @@ class BartEncoderWrapper(torch.nn.Module):
         # 2. run dummy decoder to get pre-calculated cross-key_values for generation
         dummy_past_key_value = []
         for _ in range(self.num_layers):
-            pkv_self_attn_key = torch.zeros(decoder_batch_size, self.num_heads, self.decoder_max_length, self.d_kv)
-            pkv_self_attn_value = torch.zeros(decoder_batch_size, self.num_heads, self.decoder_max_length, self.d_kv)
-            pkv_cross_attn_key = torch.zeros(encoder_batch_size, self.num_heads, self.encoder_max_length, self.d_kv)
-            pkv_cross_attn_value = torch.zeros(encoder_batch_size, self.num_heads, self.encoder_max_length, self.d_kv)
+            pkv_self_attn_key = torch.zeros(1, self.num_heads, self.decoder_max_length, self.d_kv)
+            pkv_self_attn_value = torch.zeros(1, self.num_heads, self.decoder_max_length, self.d_kv)
+            pkv_cross_attn_key = torch.zeros(1, self.num_heads, self.encoder_max_length, self.d_kv)
+            pkv_cross_attn_value = torch.zeros(1, self.num_heads, self.encoder_max_length, self.d_kv)
             layer_pkv = (pkv_self_attn_key, pkv_self_attn_value, pkv_cross_attn_key, pkv_cross_attn_value)
             dummy_past_key_value.append(layer_pkv)
-        decoder_attention_mask = torch.zeros(decoder_batch_size, self.decoder_max_length, dtype=torch.int64)
+        decoder_attention_mask = torch.zeros(1, self.decoder_max_length, dtype=torch.float32)
         decoder_attention_mask[:, :1] = 1
         decoder_outputs = _BartDecoder.forward(
             self.decoder,
-            input_ids=torch.zeros((decoder_batch_size, 1), dtype=torch.int64),
+            input_ids=torch.zeros((1, 1), dtype=torch.int64),
             attention_mask=decoder_attention_mask,
             encoder_attention_mask=attention_mask,
             cache_position=torch.tensor(0, dtype=torch.int32),
             encoder_hidden_states=last_hidden_states,
             past_key_values=dummy_past_key_value,
+            batch_ids=torch.tensor(0, dtype=torch.int32),
             attn_impl=self.config._attn_implementation,
         )
         first_past_kv = decoder_outputs[1]
-        # 3. return cross_key_values to recurrence port. fyi (enc_ir.outputs[0] -> dec_ir.inputs[5])
         encoder_kv = []
-        for layer_out in first_past_kv:  # for layer
-            encoder_kv.append(torch.stack(layer_out[2:], dim=0))
-        encoder_kv = torch.stack(encoder_kv, dim=0)
+        for i in range(self.model.config.decoder_layers):
+            encoder_kv.append(first_past_kv[i][2].unsqueeze(0))
+            encoder_kv.append(first_past_kv[i][3].unsqueeze(0))
+        encoder_kv = torch.cat(encoder_kv, dim=0)
+        cross_key_value = cross_key_value.slice_scatter(encoder_kv, dim=1, start=batch_idx, end=batch_idx + 1)
-        return encoder_kv
+        return cross_key_value

optimum/rbln/transformers/models/bart/modeling_bart.py ADDED Viewed

@@ -0,0 +1,125 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+import inspect
+from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
+from transformers import BartConfig, BartForConditionalGeneration, BartModel, PretrainedConfig
+from ....modeling_base import RBLNModel
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
+from ....utils.logging import get_logger
+from ...models.seq2seq import RBLNModelForSeq2SeqLM
+from .bart_architecture import BartWrapper
+logger = get_logger()
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
+class RBLNBartModel(RBLNModel):
+    original_model_class = BartModel
+    original_config_class = BartConfig
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_kwargs: Dict[str, Any] = {},
+    ) -> RBLNConfig:
+        rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
+        rbln_batch_size = rbln_kwargs.get("batch_size", None)
+        rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
+        max_position_embeddings = getattr(model_config, "max_position_embeddings", None)
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            for tokenizer in preprocessors:
+                if hasattr(tokenizer, "model_input_names"):
+                    rbln_model_input_names = tokenizer.model_input_names
+                    # BartModel's forward() does not take token_type_ids as input.
+                    # (Added because some of the tokenizers includes 'token_type_ids')
+                    if "token_type_ids" in rbln_model_input_names:
+                        rbln_model_input_names.remove("token_type_ids")
+                    break
+            if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
+                rbln_model_input_names = cls.rbln_model_input_names
+            elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
+                input_names_order = inspect.signature(cls.original_model_class.forward).parameters.keys()
+                raise ValueError(
+                    "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
+                    f"and be sure to make the order of the inputs same as BartModel forward() arguments like ({list(input_names_order)})"
+                )
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_compile_config = RBLNCompileConfig(input_info=input_info)
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
+        rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
+        return rbln_config
+class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
+    @classmethod
+    def wrap_model_if_needed(self, model: "PreTrainedModel", rbln_config: "RBLNConfig"):
+        return BartWrapper(model)
+    def __getattr__(self, __name: str) -> Any:
+        def redirect(func):
+            return lambda *pargs, **kwargs: func(self, *pargs, **kwargs)
+        val = getattr(BartForConditionalGeneration, __name)
+        if isinstance(val, Callable) and "self" in set(inspect.signature(val).parameters):
+            return redirect(val)
+        return val

optimum/rbln/transformers/models/bert/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+from .modeling_bert import RBLNBertModel

optimum/rbln/transformers/models/bert/modeling_bert.py ADDED Viewed

@@ -0,0 +1,101 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+import inspect
+import logging
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+from transformers import BertConfig, BertModel, PretrainedConfig
+from ....modeling_base import RBLNModel
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
+logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
+class RBLNBertModel(RBLNModel):
+    original_model_class = BertModel
+    original_config_class = BertConfig
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_kwargs: Dict[str, Any] = {},
+    ) -> RBLNConfig:
+        rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
+        rbln_batch_size = rbln_kwargs.get("batch_size", None)
+        rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
+        max_position_embeddings = getattr(model_config, "max_position_embeddings", None)
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            for tokenizer in preprocessors:
+                if hasattr(tokenizer, "model_input_names"):
+                    rbln_model_input_names = tokenizer.model_input_names
+                    break
+            if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
+                rbln_model_input_names = cls.rbln_model_input_names
+            elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
+                input_names_order = inspect.signature(cls.original_model_class.forward).parameters.keys()
+                raise ValueError(
+                    "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
+                    f"and be sure to make the order of the inputs same as BertModel forward() arguments like ({list(input_names_order)})"
+                )
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_compile_config = RBLNCompileConfig(input_info=input_info)
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
+        rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
+        return rbln_config

optimum/rbln/transformers/models/clip/__init__.py CHANGED Viewed

@@ -21,4 +21,4 @@
 # copied, modified, or distributed without prior written permission
 # from Rebellions Inc.
-from .modeling_clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection
+from .modeling_clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection, RBLNCLIPVisionModel

optimum-rbln 0.1.9__py3-none-any.whl → 0.1.12__py3-none-any.whl

optimum-rbln 0.1.9py3-none-any.whl → 0.1.12py3-none-any.whl