PyPI - optimum-rbln - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

optimum-rbln 0.1.4py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

optimum/rbln/modeling_base.py CHANGED Viewed

@@ -22,10 +22,12 @@
 # from Rebellions Inc.
 import logging
+import os
+import shutil
 from abc import ABC, abstractmethod
 from pathlib import Path
 from tempfile import TemporaryDirectory
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
 import rebel
 import torch
@@ -37,7 +39,9 @@ from transformers import (
     AutoModel,
     AutoModelForAudioClassification,
     AutoModelForImageClassification,
+    AutoModelForMaskedLM,
     AutoModelForQuestionAnswering,
+    AutoModelForSequenceClassification,
     GenerationConfig,
     PretrainedConfig,
 )
@@ -50,16 +54,7 @@ from .utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors
 logger = logging.getLogger(__name__)
 if TYPE_CHECKING:
-    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PretrainedConfig
-def listify(var: Any):
-    if isinstance(var, list):
-        return var
-    elif var is not None:
-        return [var]
-    else:
-        return None
+    from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer, PreTrainedModel
 class RBLNBaseModel(OptimizedModel, ABC):
@@ -103,23 +98,22 @@ class RBLNBaseModel(OptimizedModel, ABC):
     def __init__(
         self,
-        models: List[rebel.RBLNCompiledModel],
+        models: List[rebel.Runtime],
         config: "PretrainedConfig",
+        rbln_config: RBLNConfig,
         preprocessors: Optional[List],
-        rbln_config: Optional[RBLNConfig],
-        rbln_device: Optional[List[int]] = None,
-        rbln_device_map: Optional[Dict[str, int]] = None,
-        rbln_create_runtimes: Optional[bool] = None,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
+        subfolder: str = "",
+        rbln_compiled_models: Optional[rebel.RBLNCompiledModel] = None,
         **kwargs,
     ):
         super().__init__(models, config)
         if not isinstance(self.config, PretrainedConfig):  # if diffusers config
             self.config = PretrainedConfig(**self.config)
-        self.models = listify(self.model)
+        self.rbln_config = rbln_config
         self.preprocessors = [] if preprocessors is None else preprocessors
+        self.compiled_models = rbln_compiled_models
         # Registers the RBLNBaseModelForXXX classes into the transformers AutoModel classes to avoid warnings when creating
         # a pipeline https://github.com/huggingface/transformers/blob/3d3204c025b6b5de013e07dd364208e28b4d9589/src/transformers/pipelines/base.py#L940
@@ -127,18 +121,6 @@ class RBLNBaseModel(OptimizedModel, ABC):
         if hasattr(self.auto_model_class, "register"):
             self.auto_model_class.register(AutoConfig, self.__class__)
-        self.rbln_config = rbln_config
-        self.compiled_models: List[rebel.RBLNCompiledModel] = models
-        if rbln_device_map is None:
-            self.rbln_device_map = {}
-            device_val = 0 if rbln_device is None else rbln_device
-            for key in self.rbln_config:
-                self.rbln_device_map[key] = device_val
-        else:
-            self.rbln_device_map = rbln_device_map
         # copied from tranformers PreTrainedModel __init__
         self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
         if self.generation_config is not None:
@@ -146,15 +128,9 @@ class RBLNBaseModel(OptimizedModel, ABC):
         self.device = torch.device("cpu")
-        if rbln_create_runtimes is None:
-            rbln_create_runtimes = rebel.npu_is_available()
-        # create runtimes only if `rbln_create_runtimes` is enabled
-        self.runtimes = self._create_runtimes(self.rbln_device_map) if rbln_create_runtimes else UnavailableRuntime()
         # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
         # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
-        # would end-up removing the directory containing the underlying ONNX model.
+        # would end-up removing the directory containing the underlying RBLN model.
         self._model_save_dir_tempdirectory_instance = None
         if isinstance(model_save_dir, TemporaryDirectory):
             self._model_save_dir_tempdirectory_instance = model_save_dir
@@ -163,6 +139,7 @@ class RBLNBaseModel(OptimizedModel, ABC):
             self.model_save_dir = Path(model_save_dir)
         else:
             self.model_save_dir = model_save_dir
+        self.subfolder = subfolder
         self.__post_init__(**kwargs)
@@ -178,11 +155,14 @@ class RBLNBaseModel(OptimizedModel, ABC):
             save_directory (`Union[str, Path]`):
                 Directory where to save the model file.
         """
-        for compiled_model, compiled_model_name in zip(self.compiled_models, self.rbln_config):
-            dst_path = Path(save_directory) / f"{compiled_model_name}.rbln"
-            compiled_model.save(dst_path)
-        self.rbln_config.save(save_directory)
+        real_save_dir = self.model_save_dir / self.subfolder
+        if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
+            shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
+            self.config.save_pretrained(save_directory)
+            if self.generation_config is not None:
+                self.generation_config.save_pretrained(save_directory)
+        else:
+            raise FileNotFoundError(f"Saving compiled model failed.({real_save_dir}).")
     @classmethod
     def _from_pretrained(
@@ -196,6 +176,14 @@ class RBLNBaseModel(OptimizedModel, ABC):
         subfolder: str = "",
         local_files_only: bool = False,
         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
+        # Runtime - related kwargs
+        rbln_device: Optional[List[int]] = None,
+        rbln_device_map: Optional[Dict[str, int]] = None,
+        rbln_create_runtimes: Optional[bool] = None,
+        # passed from compile function
+        rbln_config: Optional[RBLNConfig] = None,
+        rbln_compiled_models: Optional[List[rebel.RBLNCompiledModel]] = None,
+        rbln_optimize_host_memory: Optional[bool] = None,
         **kwargs,
     ) -> "RBLNBaseModel":
         model_path = Path(model_id)
@@ -228,12 +216,15 @@ class RBLNBaseModel(OptimizedModel, ABC):
             )
         if model_path.is_dir():
-            rbln_config = RBLNConfig.load(str(model_path))
-            models = [
-                rebel.RBLNCompiledModel(model_path / f"{compiled_model_name}.rbln")
-                for compiled_model_name in rbln_config
-            ]
-            new_model_save_dir = model_path
+            if rbln_compiled_models is None:
+                rbln_config = RBLNConfig.load(str(model_path))
+                rbln_compiled_models = [
+                    rebel.RBLNCompiledModel(model_path / f"{compiled_model_name}.rbln")
+                    for compiled_model_name in rbln_config
+                ]
+                new_model_save_dir = model_path
+            else:
+                pass
         else:
             rbln_config_filename = rbln_config_filenames[0]
@@ -248,7 +239,7 @@ class RBLNBaseModel(OptimizedModel, ABC):
                 local_files_only=local_files_only,
             )
             rbln_config = RBLNConfig.load(Path(rbln_config_cache_path).parent)
-            models = []
+            rbln_compiled_models = []
             for compiled_model_name in rbln_config:
                 model_cache_path = hf_hub_download(
                     repo_id=model_id,
@@ -260,7 +251,7 @@ class RBLNBaseModel(OptimizedModel, ABC):
                     force_download=force_download,
                     local_files_only=local_files_only,
                 )
-                models.append(rebel.RBLNCompiledModel(model_cache_path))
+                rbln_compiled_models.append(rebel.RBLNCompiledModel(model_cache_path))
             new_model_save_dir = Path(rbln_config_cache_path).parent
         preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
@@ -268,17 +259,40 @@ class RBLNBaseModel(OptimizedModel, ABC):
         if model_save_dir is None:
             model_save_dir = new_model_save_dir
+        # Create runtimes
+        if rbln_create_runtimes is None:
+            rbln_create_runtimes = rebel.npu_is_available()
+        if rbln_device_map is None:
+            rbln_device_map = {}
+            device_val = 0 if rbln_device is None else rbln_device
+            for key in rbln_config:
+                rbln_device_map[key] = device_val
+        else:
+            rbln_device_map = rbln_device_map
+        # create runtimes only if `rbln_create_runtimes` is enabled
+        models = (
+            cls._create_runtimes(rbln_compiled_models, rbln_device_map)
+            if rbln_create_runtimes
+            else UnavailableRuntime()
+        )
+        if rbln_optimize_host_memory is None:
+            rbln_optimize_host_memory = True
         return cls(
             models,
             config,
+            rbln_config,
             preprocessors,
-            rbln_config=rbln_config,
             model_save_dir=model_save_dir,
+            subfolder=subfolder,
+            rbln_compiled_models=None if rbln_optimize_host_memory else rbln_compiled_models,
             **kwargs,
         )
     def __repr__(self):
-        return repr(self.runtimes)
+        return repr(self.model)
     @classmethod
     def compile(cls, model, rbln_runtime_config: Optional[RBLNRuntimeConfig] = None):
@@ -338,7 +352,15 @@ class RBLNBaseModel(OptimizedModel, ABC):
     def pop_rbln_kwargs_from_kwargs(kwargs: dict):
         keys = list(kwargs.keys())
         rbln_constructor_kwargs = {
-            key: kwargs.pop(key) for key in keys if key in ["rbln_device", "rbln_device_map", "rbln_create_runtimes"]
+            key: kwargs.pop(key)
+            for key in keys
+            if key
+            in [
+                "rbln_device",
+                "rbln_device_map",
+                "rbln_create_runtimes",
+                "rbln_optimize_host_memory",
+            ]
         }
         keys = list(kwargs.keys())
@@ -375,9 +397,12 @@ class RBLNBaseModel(OptimizedModel, ABC):
     def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
         pass
+    @classmethod
     @abstractmethod
-    def _create_runtimes(self, rbln_device_map: Dict[str, int]) -> List[rebel.Runtime]:
-        # self.compiled_models -> self.runtimes
+    def _create_runtimes(
+        cls, compiled_models: List[rebel.RBLNCompiledModel], rbln_device_map: Dict[str, int]
+    ) -> List[rebel.Runtime]:
+        # compiled_models -> runtimes
         pass
     @classmethod
@@ -417,14 +442,26 @@ class RBLNModel(RBLNBaseModel):
         ```
     """
-    model_type = "rbln_model"
-    auto_model_class = AutoModel  # feature extraction
+    @classmethod
+    def update_kwargs(cls, kwargs):
+        """
+        Update user-given kwargs to get proper pytorch model.
+        For example, `torchscript`=True should be set because torch.jit
+        does not support `transformers` output instances as module output;
+        """
+        kwargs.update(
+            {
+                "torchscript": True,
+                "return_dict": False,
+            }
+        )
+        return kwargs
     @classmethod
-    def _export(
+    def get_pytorch_model(
         cls,
-        model_id: Union[str, Path],
-        config: "PretrainedConfig",
+        model_id: str,
         use_auth_token: Optional[Union[bool, str]] = None,
         revision: Optional[str] = None,
         force_download: bool = False,
@@ -432,16 +469,62 @@ class RBLNModel(RBLNBaseModel):
         subfolder: str = "",
         local_files_only: bool = False,
         trust_remote_code: bool = False,
-        model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
+        rbln_config_kwargs: Optional[Dict[str, Any]] = None,
+        rbln_constructor_kwargs: Optional[Dict[str, Any]] = None,
         **kwargs,
-    ) -> "RBLNModel":
-        """
-        Exports a vanilla Transformers model into a rbln-compiled Module.
-        """
+    ) -> "PreTrainedModel":
         task = kwargs.pop("task", None)
         if task is None:
             task = TasksManager.infer_task_from_model(cls.auto_model_class)
+        kwargs = cls.update_kwargs(kwargs)
+        model = TasksManager.get_model_from_task(
+            task=task,
+            model_name_or_path=model_id,
+            subfolder=subfolder,
+            revision=revision,
+            framework="pt",
+            cache_dir=cache_dir,
+            use_auth_token=use_auth_token,
+            local_files_only=local_files_only,
+            force_download=force_download,
+            trust_remote_code=trust_remote_code,
+            **kwargs,
+        )
+        return model
+    @classmethod
+    def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNConfig):
+        model = cls.wrap_model_if_needed(model)
+        rbln_runtime_configs = list(rbln_config.values())
+        if len(rbln_runtime_configs) != 1:
+            raise ValueError
+        rbln_runtime_config = rbln_runtime_configs[0]
+        if len(rbln_runtime_config) != 1:
+            raise ValueError
+        rbln_runtime_config = rbln_runtime_config[0]
+        compiled_model = cls.compile(model, rbln_runtime_config=rbln_runtime_config)
+        return compiled_model
+    @classmethod
+    @torch.no_grad()
+    def _export(
+        cls,
+        model_id: str,
+        config: "PretrainedConfig",
+        use_auth_token: Optional[Union[bool, str]] = None,
+        revision: Optional[str] = None,
+        force_download: bool = False,
+        cache_dir: Optional[str] = None,
+        subfolder: str = "",
+        local_files_only: bool = False,
+        trust_remote_code: bool = False,
+        model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
+        **kwargs,
+    ) -> "RBLNModel":
         if model_save_dir is None:
             save_dir = TemporaryDirectory()
             save_dir_path = Path(save_dir.name)
@@ -453,35 +536,24 @@ class RBLNModel(RBLNBaseModel):
                 save_dir_path = Path(model_save_dir)
                 save_dir_path.mkdir(exist_ok=True)
-        kwargs.update(
-            {
-                "torchscript": True,
-                "return_dict": False,
-            }
-        )
         rbln_config_kwargs, rbln_constructor_kwargs = cls.pop_rbln_kwargs_from_kwargs(kwargs)
-        model = TasksManager.get_model_from_task(
-            task=task,
-            model_name_or_path=model_id,
+        model: "PreTrainedModel" = cls.get_pytorch_model(
+            model_id=model_id,
             subfolder=subfolder,
             revision=revision,
-            framework="pt",
             cache_dir=cache_dir,
             use_auth_token=use_auth_token,
             local_files_only=local_files_only,
             force_download=force_download,
             trust_remote_code=trust_remote_code,
+            rbln_config_kwargs=rbln_config_kwargs,
+            rbln_constructor_kwargs=rbln_constructor_kwargs,
             **kwargs,
         )
-        # TODO : do we need this?
-        if isinstance(model, torch.nn.Module):
-            model.eval()
-        if config is None:
-            config = model.config
+        # FIXME :: optimum passes AutoConfig.
+        config = model.config
         if not isinstance(config, PretrainedConfig):  # diffusers config
             config = PretrainedConfig(**config)
@@ -492,20 +564,22 @@ class RBLNModel(RBLNBaseModel):
         # Get compilation arguments
         if (rbln_config := rbln_config_kwargs.pop("rbln_config", None)) is None:
             rbln_config = cls.get_rbln_config(preprocessors=preprocessors, model_config=config, **rbln_config_kwargs)
+        compiled_model = cls.get_compiled_model(model, rbln_config=rbln_config)
-        rbln_runtime_configs = list(rbln_config.values())
-        if len(rbln_runtime_configs) != 1:
-            raise ValueError
-        rbln_runtime_config = rbln_runtime_configs[0]
-        if len(rbln_runtime_config) != 1:
-            raise ValueError
-        rbln_runtime_config = rbln_runtime_config[0]
+        # Save compiled models
+        (save_dir_path / subfolder).mkdir(exist_ok=True)
+        if isinstance(compiled_model, Iterable):
+            # compiled_model is an Iterable instance
+            for single_compiled_model, compiled_model_name in zip(compiled_model, rbln_config):
+                single_compiled_model.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
+            compiled_models = compiled_model
-        model = cls.wrap_model_if_needed(model)
-        compiled_model = cls.compile(model, rbln_runtime_config=rbln_runtime_config)
-        compiled_model.save(save_dir_path / subfolder / f"{rbln_runtime_config.compiled_model_name}.rbln")
+        else:
+            compiled_model.save(save_dir_path / subfolder / f"{DEFAULT_COMPILED_MODEL_NAME}.rbln")
+            compiled_models = [compiled_model]
         rbln_config.save(save_dir_path / subfolder)
+        # Instantiate
         return cls._from_pretrained(
             model_id=save_dir_path,
             config=config,
@@ -516,23 +590,23 @@ class RBLNModel(RBLNBaseModel):
             cache_dir=cache_dir,
             subfolder=subfolder,
             local_files_only=local_files_only,
+            rbln_config=rbln_config,
+            rbln_compiled_models=compiled_models,
             **rbln_constructor_kwargs,
             **kwargs,
         )
-    def _create_runtimes(self, rbln_device_map: Dict[str, int]) -> List[rebel.Runtime]:
+    @classmethod
+    def _create_runtimes(
+        cls, compiled_models: List[rebel.RBLNCompiledModel], rbln_device_map: Dict[str, int]
+    ) -> List[rebel.Runtime]:
         device = rbln_device_map[DEFAULT_COMPILED_MODEL_NAME]
-        return [
-            compiled_model.create_runtime(tensor_type="pt", device=device) for compiled_model in self.compiled_models
-        ]
+        return [compiled_model.create_runtime(tensor_type="pt", device=device) for compiled_model in compiled_models]
     def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
-        output = self.runtimes[0](*args, **kwargs)
+        output = self.model[0](*args, **kwargs)
         return output
-    def __repr__(self):
-        return repr(self.runtimes[0])
 class RBLNModelForQuestionAnswering(RBLNModel):
     model_type = "rbln_model"
@@ -676,3 +750,111 @@ class RBLNModelForAudioClassification(RBLNModel):
         )
         return rbln_config
+class RBLNModelForSequenceClassification(RBLNModel):
+    """
+    This is a generic model class that will be instantiated as one of the model classes of the library (with a sequence classification head) when created with the from_pretrained() class method
+    This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
+    A class to convert and run pre-trained transformers based SequenceClassification models on RBLN devices.
+    It implements the methods to convert a pre-trained transformers SequenceClassification model into a RBLN transformer model by:
+    - transferring the checkpoint weights of the original into an optimized RBLN graph,
+    - compiling the resulting graph using the RBLN compiler.
+    Currently, this model class supports the 'XLMRoberta' and 'Roberta' model from the transformers library. Future updates may include support for additional model types.
+    """
+    model_type = "rbln_model"
+    auto_model_class = AutoModelForSequenceClassification
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_max_seq_len: Optional[int] = None,
+        rbln_model_input_names: Optional[List[str]] = None,
+        rbln_batch_size: Optional[int] = None,
+    ) -> RBLNConfig:
+        max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
+            model_config, "max_position_embeddings", None
+        )
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            # These are BERT's inputs
+            rbln_model_input_names = ["input_ids", "attention_mask"]
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_runtime_config = RBLNRuntimeConfig(input_info=input_info)
+        rbln_runtime_config.batch_size = rbln_batch_size
+        meta = {"rbln_max_seq_len": rbln_max_seq_len}
+        return RBLNConfig.from_rbln_runtime_configs([rbln_runtime_config], _rbln_meta=meta)
+class RBLNModelForMaskedLM(RBLNModel):
+    model_type = "rbln_model"
+    auto_model_class = AutoModelForMaskedLM
+    @classmethod
+    def _get_rbln_config(
+        cls,
+        preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
+        model_config: Optional["PretrainedConfig"] = None,
+        rbln_max_seq_len: Optional[int] = None,
+        rbln_model_input_names: Optional[List[str]] = None,
+        rbln_batch_size: Optional[int] = None,
+    ) -> RBLNConfig:
+        max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
+            model_config, "max_position_embeddings", None
+        )
+        if rbln_max_seq_len is None:
+            rbln_max_seq_len = max_position_embeddings
+            if rbln_max_seq_len is None:
+                for tokenizer in preprocessors:
+                    if hasattr(tokenizer, "model_max_length"):
+                        rbln_max_seq_len = tokenizer.model_max_length
+                        break
+                if rbln_max_seq_len is None:
+                    raise ValueError("`rbln_max_seq_len` should be specified!")
+        if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
+            raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
+        if rbln_model_input_names is None:
+            # These are BERT's inputs
+            rbln_model_input_names = ["input_ids", "attention_mask"]
+        if rbln_batch_size is None:
+            rbln_batch_size = 1
+        input_info = [
+            (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
+            for model_input_name in rbln_model_input_names
+        ]
+        rbln_runtime_config = RBLNRuntimeConfig(input_info=input_info)
+        rbln_runtime_config.batch_size = rbln_batch_size
+        meta = {"rbln_max_seq_len": rbln_max_seq_len}
+        return RBLNConfig.from_rbln_runtime_configs([rbln_runtime_config], _rbln_meta=meta)

optimum-rbln 0.1.4__py3-none-any.whl → 0.1.8__py3-none-any.whl

optimum-rbln 0.1.4py3-none-any.whl → 0.1.8py3-none-any.whl