PyPI - optimum-rbln - Versions diffs - 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

optimum-rbln 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

optimum/rbln/transformers/models/bart/modeling_bart.py ADDED Viewed

@@ -0,0 +1,106 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+import inspect
+import logging
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+from transformers import AutoModel, BartConfig, BartModel, PretrainedConfig
+from ....modeling_base import RBLNModel
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
+logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
+class RBLNBartModel(RBLNModel):
+    auto_model_class = AutoModel  # feature extraction
+    original_model_class = BartModel
+    original_config_class = BartConfig
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_kwargs: Dict[str, Any] = {},
+    ) -> RBLNConfig:
+        rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
+        rbln_batch_size = rbln_kwargs.get("batch_size", None)
+        rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
+        max_position_embeddings = getattr(model_config, "max_position_embeddings", None)
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            for tokenizer in preprocessors:
+                if hasattr(tokenizer, "model_input_names"):
+                    rbln_model_input_names = tokenizer.model_input_names
+                    # BartModel's forward() does not take token_type_ids as input.
+                    # (Added because some of the tokenizers includes 'token_type_ids')
+                    if "token_type_ids" in rbln_model_input_names:
+                        rbln_model_input_names.remove("token_type_ids")
+                    break
+            if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
+                rbln_model_input_names = cls.rbln_model_input_names
+            elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
+                input_names_order = inspect.signature(cls.original_model_class.forward).parameters.keys()
+                raise ValueError(
+                    "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
+                    f"and be sure to make the order of the inputs same as BartModel forward() arguments like ({list(input_names_order)})"
+                )
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_compile_config = RBLNCompileConfig(input_info=input_info)
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
+        rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
+        return rbln_config

optimum/rbln/transformers/models/bert/__init__.py ADDED Viewed

@@ -0,0 +1,24 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+from .modeling_bert import RBLNBertModel

optimum/rbln/transformers/models/bert/modeling_bert.py ADDED Viewed

@@ -0,0 +1,102 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+import inspect
+import logging
+from typing import TYPE_CHECKING, Any, Dict, Optional, Union
+from transformers import AutoModel, BertConfig, BertModel, PretrainedConfig
+from ....modeling_base import RBLNModel
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
+logger = logging.getLogger(__name__)
+if TYPE_CHECKING:
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
+class RBLNBertModel(RBLNModel):
+    auto_model_class = AutoModel  # feature extraction
+    original_model_class = BertModel
+    original_config_class = BertConfig
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_kwargs: Dict[str, Any] = {},
+    ) -> RBLNConfig:
+        rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
+        rbln_batch_size = rbln_kwargs.get("batch_size", None)
+        rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
+        max_position_embeddings = getattr(model_config, "max_position_embeddings", None)
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            for tokenizer in preprocessors:
+                if hasattr(tokenizer, "model_input_names"):
+                    rbln_model_input_names = tokenizer.model_input_names
+                    break
+            if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
+                rbln_model_input_names = cls.rbln_model_input_names
+            elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
+                input_names_order = inspect.signature(cls.original_model_class.forward).parameters.keys()
+                raise ValueError(
+                    "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
+                    f"and be sure to make the order of the inputs same as BertModel forward() arguments like ({list(input_names_order)})"
+                )
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_compile_config = RBLNCompileConfig(input_info=input_info)
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
+        rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
+        return rbln_config

optimum/rbln/transformers/models/clip/__init__.py CHANGED Viewed

@@ -21,4 +21,4 @@
 # copied, modified, or distributed without prior written permission
 # from Rebellions Inc.
-from .modeling_clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection
+from .modeling_clip import RBLNCLIPTextModel, RBLNCLIPTextModelWithProjection, RBLNCLIPVisionModel

optimum/rbln/transformers/models/clip/modeling_clip.py CHANGED Viewed

@@ -22,14 +22,23 @@
 # from Rebellions Inc.
 import logging
-from typing import TYPE_CHECKING, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 import torch
-from transformers import AutoConfig, AutoModel, CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection
+from transformers import (
+    AutoConfig,
+    AutoModel,
+    CLIPTextConfig,
+    CLIPTextModel,
+    CLIPTextModelWithProjection,
+    CLIPVisionConfig,
+    CLIPVisionModel,
+)
+from transformers.modeling_outputs import BaseModelOutputWithPooling
 from transformers.models.clip.modeling_clip import CLIPTextModelOutput
 from ....modeling_base import RBLNModel
-from ....modeling_config import RBLNConfig, RBLNRuntimeConfig
+from ....modeling_config import RBLNCompileConfig, RBLNConfig
 logger = logging.getLogger(__name__)
@@ -41,12 +50,10 @@ if TYPE_CHECKING:
 class _TextEncoder(torch.nn.Module):
     def __init__(self, enc: "CLIPTextModel"):
         super().__init__()
-        enc.config.return_dict = False
-        enc.config.output_hidden_states = True
         self.enc = enc
     def forward(self, inp):
-        enc_out = self.enc(inp)
+        enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
         return enc_out
@@ -55,9 +62,6 @@ class RBLNCLIPTextModel(RBLNModel):
     original_model_class = CLIPTextModel
     original_config_class = CLIPTextConfig
-    def __post_init__(self, **kwargs):
-        self.dtype = torch.float32
     @classmethod
     def from_pretrained(cls, *args, **kwargs):
         configtmp = AutoConfig.from_pretrained
@@ -70,7 +74,7 @@ class RBLNCLIPTextModel(RBLNModel):
         return rt
     @classmethod
-    def wrap_model_if_needed(cls, model: torch.nn.Module) -> torch.nn.Module:
+    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
         return _TextEncoder(model).eval()
     @classmethod
@@ -78,28 +82,32 @@ class RBLNCLIPTextModel(RBLNModel):
         cls,
         preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
         model_config: "CLIPTextConfig",
+        rbln_kwargs: Dict[str, Any] = {},
         rbln_batch_size: Optional[int] = None,
-        rbln_img_width: Optional[int] = None,
-        rbln_img_height: Optional[int] = None,
     ) -> RBLNConfig:
-        model_config.return_dict = False
+        rbln_batch_size = rbln_kwargs.get("batch_size", None)
         if rbln_batch_size is None:
             rbln_batch_size = 1
-        rbln_runtime_config = RBLNRuntimeConfig(
-            input_info=[
-                (
-                    "input_ids",
-                    [
-                        rbln_batch_size,
-                        model_config.max_position_embeddings,
-                    ],
-                    "int64",
-                ),
-            ],
-        )
+        model_config.return_dict = False
-        rbln_config = RBLNConfig.from_rbln_runtime_configs([rbln_runtime_config])
+        input_info = [
+            (
+                "input_ids",
+                [
+                    rbln_batch_size,
+                    model_config.max_position_embeddings,
+                ],
+                "int64",
+            ),
+        ]
+        rbln_compile_config = RBLNCompileConfig(input_info=input_info)
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
         return rbln_config
     def forward(self, input_ids: "torch.Tensor", **kwargs):
@@ -113,3 +121,97 @@ class RBLNCLIPTextModel(RBLNModel):
 class RBLNCLIPTextModelWithProjection(RBLNCLIPTextModel):
     original_model_class = CLIPTextModelWithProjection
+class _VisionEncoder(torch.nn.Module):
+    def __init__(self, enc: CLIPVisionModel):
+        super().__init__()
+        self.enc = enc
+    def forward(self, inp):
+        enc_out = self.enc(inp, output_hidden_states=True, return_dict=False)
+        return enc_out
+class RBLNCLIPVisionModel(RBLNModel):
+    original_model_class = CLIPVisionModel
+    original_config_class = CLIPVisionConfig
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        configtmp = AutoConfig.from_pretrained
+        modeltmp = AutoModel.from_pretrained
+        AutoConfig.from_pretrained = cls.original_config_class.from_pretrained
+        AutoModel.from_pretrained = cls.original_model_class.from_pretrained
+        rt = super().from_pretrained(*args, **kwargs)
+        AutoConfig.from_pretrained = configtmp
+        AutoModel.from_pretrained = modeltmp
+        return rt
+    @classmethod
+    def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
+        return _VisionEncoder(model).eval()
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"],
+        model_config: "CLIPTextConfig",
+        rbln_kwargs: Dict[str, Any] = {},
+    ) -> RBLNConfig:
+        rbln_batch_size = rbln_kwargs.get("batch_size", 1)
+        rbln_image_size = rbln_kwargs.get("image_size", None)
+        if rbln_image_size is None:
+            rbln_image_size = getattr(model_config, "image_size", None)
+        if isinstance(rbln_image_size, int):
+            rbln_image_size = (rbln_image_size, rbln_image_size)
+        if rbln_image_size is None:
+            raise ValueError("`rbln_image_size` should be specified!")
+        rbln_compile_config = RBLNCompileConfig(
+            input_info=[
+                (
+                    "pixel_values",
+                    [
+                        rbln_batch_size,
+                        3,
+                        rbln_image_size[0],
+                        rbln_image_size[1],
+                    ],
+                    "float32",
+                )
+            ]
+        )
+        rbln_config = RBLNConfig(
+            rbln_cls=cls.__name__,
+            compile_cfgs=[rbln_compile_config],
+            rbln_kwargs=rbln_kwargs,
+        )
+        rbln_config.model_cfg.update(
+            {
+                "batch_size": rbln_batch_size,
+                "image_size": rbln_image_size,
+            }
+        )
+        return rbln_config
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        **kwargs,
+    ) -> Union[Tuple, BaseModelOutputWithPooling]:
+        if len(kwargs) > 0 and any(kwargs.values()):
+            logger.warning(f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__}.")
+        output = super().forward(pixel_values)
+        return BaseModelOutputWithPooling(
+            last_hidden_state=output[0],
+            pooler_output=output[1],
+            hidden_states=output[2:],
+        )

optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py CHANGED Viewed

@@ -49,18 +49,19 @@ class DecoderOnlyWrapper(torch.nn.Module):
             self.config.max_position_embeddings if max_seq_len > self.config.max_position_embeddings else max_seq_len
         )
         self.max_seq_len = max_seq_len
+        self.rope_scaling = getattr(self.config, "rope_scaling", None)
         self.rotary_emb = self._init_rope()
     def _init_rope(self):
-        if self.config.rope_scaling is None:
+        if self.rope_scaling is None:
             rotary_emb = RotaryEmbedding(
                 self.head_dim,
                 max_position_embeddings=self.max_position_embeddings,
                 base=self.config.rope_theta,
             )
         else:
-            scaling_type = self.config.rope_scaling["type"]
-            scaling_factor = self.config.rope_scaling["factor"]
+            scaling_type = self.rope_scaling["type"]
+            scaling_factor = self.rope_scaling["factor"]
             if scaling_type == "linear":
                 rotary_emb = LinearScalingRotaryEmbedding(
                     self.head_dim,
@@ -92,17 +93,29 @@ class DecoderOnlyWrapper(torch.nn.Module):
     def forward(
         self,
-        input_ids,
+        input_ids_or_inputs_embeds,
         attention_mask,
         cache_position,
         batch_position,
+        query_idx,
         *past_key_values,
     ):
-        if input_ids.shape[1] == 1:
+        if input_ids_or_inputs_embeds.shape[1] == 1:
             rbln_batch_position = None
         else:
             rbln_batch_position = batch_position
+        if input_ids_or_inputs_embeds.ndim == 2:
+            # input_ids
+            input_ids = input_ids_or_inputs_embeds
+            inputs_embeds = None
+        elif input_ids_or_inputs_embeds.ndim == 3:
+            # inputs_embeds
+            input_ids = None
+            inputs_embeds = input_ids_or_inputs_embeds
+        else:
+            raise NotImplementedError(f"Unknown ndim of input : {input_ids_or_inputs_embeds.ndim}")
         # Formatting list of past_kv to DynamicCache class.
         past_key_values = RebelDynamicCache.from_input_format(
             cache_position,
@@ -114,6 +127,7 @@ class DecoderOnlyWrapper(torch.nn.Module):
         outputs = forward_dict["wrapper"](
             self.model,
             input_ids=input_ids,
+            inputs_embeds=inputs_embeds,
             attention_mask=attention_mask,
             position_ids=cache_position,
             past_key_values=past_key_values,
@@ -123,11 +137,14 @@ class DecoderOnlyWrapper(torch.nn.Module):
         )
         hidden_states = outputs[0]
+        if batch_position >= 0:
+            hidden_states = hidden_states[:, query_idx].unsqueeze(1)
         logits = self.lm_head(hidden_states)
         output = (logits,) + outputs[1:]
-        return output, batch_position
+        return output, batch_position + query_idx
 class DecoderOnlyAttention:
@@ -322,8 +339,16 @@ class DecoderOnlyModel:
         forward_dict: Optional[Dict[str, classmethod]] = None,
         rotary_pos_emb=None,
     ) -> BaseModelOutputWithPast:
+        # retrieve input_ids and inputs_embeds
+        if (input_ids is None) ^ (inputs_embeds is not None):
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one"
+            )
         # embed positions
-        inputs_embeds = self.embed_tokens(input_ids)
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
         hidden_states = inputs_embeds
         attention_mask = (1 - attention_mask) * torch.finfo(torch.float16).min

optimum-rbln 0.1.8__py3-none-any.whl → 0.1.11__py3-none-any.whl

optimum-rbln 0.1.8py3-none-any.whl → 0.1.11py3-none-any.whl