optimum-rbln 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. optimum/rbln/__init__.py +41 -38
  2. optimum/rbln/__version__.py +16 -1
  3. optimum/rbln/diffusers/__init__.py +26 -2
  4. optimum/rbln/{modeling_diffusers.py → diffusers/modeling_diffusers.py} +97 -126
  5. optimum/rbln/diffusers/models/__init__.py +36 -3
  6. optimum/rbln/{transformers/generation → diffusers/models/autoencoders}/__init__.py +1 -2
  7. optimum/rbln/diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +73 -61
  8. optimum/rbln/diffusers/models/autoencoders/vae.py +83 -0
  9. optimum/rbln/diffusers/models/controlnet.py +54 -14
  10. optimum/rbln/diffusers/models/transformers/__init__.py +24 -0
  11. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +203 -0
  12. optimum/rbln/diffusers/models/unets/__init__.py +24 -0
  13. optimum/rbln/diffusers/models/{unet_2d_condition.py → unets/unet_2d_condition.py} +82 -22
  14. optimum/rbln/diffusers/pipelines/__init__.py +23 -2
  15. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +13 -33
  16. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +17 -2
  17. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +18 -2
  18. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -2
  19. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +18 -2
  20. optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +1 -0
  21. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -2
  22. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -13
  23. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +31 -0
  24. optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +26 -0
  25. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +31 -0
  26. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +31 -0
  27. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +31 -0
  28. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +24 -0
  29. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +15 -8
  30. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +15 -8
  31. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +31 -0
  32. optimum/rbln/modeling.py +238 -0
  33. optimum/rbln/modeling_base.py +186 -760
  34. optimum/rbln/modeling_config.py +31 -7
  35. optimum/rbln/ops/__init__.py +26 -0
  36. optimum/rbln/ops/attn.py +221 -0
  37. optimum/rbln/ops/flash_attn.py +70 -0
  38. optimum/rbln/ops/kv_cache_update.py +69 -0
  39. optimum/rbln/transformers/__init__.py +20 -2
  40. optimum/rbln/{modeling_alias.py → transformers/modeling_alias.py} +5 -1
  41. optimum/rbln/transformers/modeling_generic.py +385 -0
  42. optimum/rbln/transformers/models/auto/__init__.py +23 -0
  43. optimum/rbln/transformers/models/auto/auto_factory.py +117 -23
  44. optimum/rbln/transformers/models/auto/modeling_auto.py +36 -12
  45. optimum/rbln/transformers/models/bart/__init__.py +0 -1
  46. optimum/rbln/transformers/models/bart/bart_architecture.py +107 -464
  47. optimum/rbln/transformers/models/bart/modeling_bart.py +10 -9
  48. optimum/rbln/transformers/models/bert/modeling_bert.py +3 -6
  49. optimum/rbln/transformers/models/clip/modeling_clip.py +8 -25
  50. optimum/rbln/transformers/models/decoderonly/__init__.py +0 -10
  51. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +775 -514
  52. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +128 -260
  53. optimum/rbln/transformers/models/dpt/modeling_dpt.py +1 -1
  54. optimum/rbln/transformers/models/exaone/exaone_architecture.py +60 -45
  55. optimum/rbln/transformers/models/exaone/modeling_exaone.py +4 -2
  56. optimum/rbln/transformers/models/gemma/gemma_architecture.py +33 -104
  57. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +50 -238
  58. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +3 -2
  59. optimum/rbln/transformers/models/llama/llama_architecture.py +0 -1
  60. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +3 -75
  61. optimum/rbln/transformers/models/midm/midm_architecture.py +84 -238
  62. optimum/rbln/transformers/models/midm/modeling_midm.py +5 -6
  63. optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -1
  64. optimum/rbln/transformers/models/phi/phi_architecture.py +60 -261
  65. optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -1
  66. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +58 -103
  67. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +498 -0
  68. optimum/rbln/transformers/models/t5/__init__.py +0 -1
  69. optimum/rbln/transformers/models/t5/modeling_t5.py +106 -5
  70. optimum/rbln/transformers/models/t5/t5_architecture.py +106 -448
  71. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  72. optimum/rbln/transformers/models/whisper/generation_whisper.py +42 -0
  73. optimum/rbln/transformers/models/whisper/modeling_whisper.py +78 -55
  74. optimum/rbln/transformers/models/whisper/whisper_architecture.py +219 -312
  75. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +3 -35
  76. optimum/rbln/transformers/utils/rbln_quantization.py +120 -4
  77. optimum/rbln/utils/decorator_utils.py +51 -11
  78. optimum/rbln/utils/hub.py +131 -0
  79. optimum/rbln/utils/import_utils.py +22 -1
  80. optimum/rbln/utils/logging.py +37 -0
  81. optimum/rbln/utils/model_utils.py +52 -0
  82. optimum/rbln/utils/runtime_utils.py +10 -4
  83. optimum/rbln/utils/save_utils.py +17 -0
  84. optimum/rbln/utils/submodule.py +137 -0
  85. optimum_rbln-0.2.0.dist-info/METADATA +117 -0
  86. optimum_rbln-0.2.0.dist-info/RECORD +114 -0
  87. {optimum_rbln-0.1.13.dist-info → optimum_rbln-0.2.0.dist-info}/WHEEL +1 -1
  88. optimum_rbln-0.2.0.dist-info/licenses/LICENSE +288 -0
  89. optimum/rbln/transformers/cache_utils.py +0 -107
  90. optimum/rbln/transformers/generation/streamers.py +0 -139
  91. optimum/rbln/transformers/generation/utils.py +0 -397
  92. optimum/rbln/transformers/models/exaone/hf_hub_cached/configuration_exaone.py +0 -181
  93. optimum/rbln/transformers/models/exaone/hf_hub_cached/modeling_exaone.py +0 -1725
  94. optimum/rbln/transformers/models/midm/hf_hub_cached/configuration_midm.py +0 -22
  95. optimum/rbln/transformers/models/midm/hf_hub_cached/midm_bitext_tokenization.py +0 -304
  96. optimum/rbln/transformers/models/midm/hf_hub_cached/modeling_midm.py +0 -1469
  97. optimum/rbln/transformers/models/midm/hf_hub_cached/rotary_position_embedding.py +0 -98
  98. optimum/rbln/utils/context.py +0 -58
  99. optimum/rbln/utils/timer_utils.py +0 -43
  100. optimum_rbln-0.1.13.dist-info/METADATA +0 -120
  101. optimum_rbln-0.1.13.dist-info/RECORD +0 -107
  102. optimum_rbln-0.1.13.dist-info/entry_points.txt +0 -4
  103. optimum_rbln-0.1.13.dist-info/licenses/LICENSE +0 -201
@@ -22,7 +22,6 @@
22
22
  # from Rebellions Inc.
23
23
 
24
24
  import importlib
25
- import inspect
26
25
  import logging
27
26
  import os
28
27
  import shutil
@@ -33,143 +32,31 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
33
32
 
34
33
  import rebel
35
34
  import torch
36
- import transformers
37
- from huggingface_hub import HfApi, HfFolder, hf_hub_download
38
- from optimum.exporters import TasksManager
39
- from optimum.modeling_base import OptimizedModel
40
35
  from transformers import (
41
36
  AutoConfig,
42
37
  AutoModel,
43
- AutoModelForAudioClassification,
44
- AutoModelForImageClassification,
45
- AutoModelForMaskedLM,
46
- AutoModelForQuestionAnswering,
47
- AutoModelForSequenceClassification,
48
38
  GenerationConfig,
49
39
  PretrainedConfig,
50
40
  )
51
41
 
52
- from .modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig, use_rbln_config
42
+ from .modeling_config import RBLNCompileConfig, RBLNConfig, use_rbln_config
43
+ from .utils.hub import PushToHubMixin, pull_compiled_model_from_hub, validate_files
53
44
  from .utils.runtime_utils import UnavailableRuntime
54
45
  from .utils.save_utils import maybe_load_preprocessors
46
+ from .utils.submodule import SubModulesMixin
55
47
 
56
48
 
57
49
  if TYPE_CHECKING:
58
- from transformers import (
59
- AutoFeatureExtractor,
60
- AutoProcessor,
61
- AutoTokenizer,
62
- PreTrainedModel,
63
- )
50
+ from transformers import PreTrainedModel
64
51
 
65
52
  logger = logging.getLogger(__name__)
66
53
 
67
54
 
68
- class SubModulesMixin:
69
- """
70
- _rbln_submodules = [
71
- {"name": "vision_tower"},
72
- {"name": "language_model"},
73
- ]
74
- """
75
-
76
- _rbln_submodules: List[Dict[str, Any]] = []
77
-
78
- def __init__(
79
- self,
80
- *,
81
- rbln_submodules: List["RBLNBaseModel"] = [],
82
- **kwargs,
83
- ) -> None:
84
- for submodule_meta, submodule in zip(self._rbln_submodules, rbln_submodules):
85
- setattr(self, submodule_meta["name"], submodule)
86
-
87
- @classmethod
88
- def _export_submodules_from_model(
89
- cls,
90
- model: "PreTrainedModel",
91
- model_save_dir: str,
92
- rbln_kwargs: Dict[str, Any],
93
- **kwargs,
94
- ) -> List["RBLNBaseModel"]:
95
- rbln_submodules = []
96
- for submodule in cls._rbln_submodules:
97
- submodule_name = submodule["name"]
98
- torch_submodule: "PreTrainedModel" = getattr(model, submodule["name"])
99
- cls_name = torch_submodule.__class__.__name__
100
- submodule_cls: "RBLNModel" = getattr(importlib.import_module("optimum.rbln"), f"RBLN{cls_name}")
101
-
102
- if submodule_name in rbln_kwargs:
103
- kwargs["rbln_config"] = rbln_kwargs[submodule_name]
104
-
105
- rbln_submodule = submodule_cls.from_model(
106
- model=torch_submodule,
107
- subfolder=submodule_name,
108
- model_save_dir=model_save_dir,
109
- **kwargs,
110
- )
111
-
112
- rbln_submodules.append(rbln_submodule)
113
-
114
- return rbln_submodules
115
-
116
- @classmethod
117
- def _load_submodules_from_compiled_models(
118
- cls,
119
- model_save_dir: str,
120
- rbln_kwargs: Dict[str, Any],
121
- **kwargs,
122
- ):
123
- rbln_submodules = []
124
- for submodule in cls._rbln_submodules:
125
- submodule_name = submodule["name"]
126
-
127
- if submodule_name in rbln_kwargs:
128
- kwargs["rbln_config"] = rbln_kwargs[submodule_name]
129
-
130
- # Get cls name for call the constructor of the rbln class
131
- submodule_rbln_config = RBLNConfig.load(Path(model_save_dir) / submodule_name)
132
- submodule_cls_name = submodule_rbln_config.meta["cls"]
133
- submodule_cls: "RBLNBaseModel" = getattr(importlib.import_module("optimum.rbln"), submodule_cls_name)
134
-
135
- config = OptimizedModel._load_config(Path(model_save_dir) / submodule_name)
136
- rbln_submodule = submodule_cls._from_pretrained(
137
- model_id=model_save_dir,
138
- config=config,
139
- subfolder=submodule_name,
140
- **kwargs,
141
- )
142
- rbln_submodules.append(rbln_submodule)
143
- return rbln_submodules
144
-
145
- @classmethod
146
- def _load_submodules(
147
- cls,
148
- model_save_dir,
149
- rbln_kwargs,
150
- model=None,
151
- **kwargs,
152
- ):
153
- # Two ways :
154
- # 1. Compile from pytorch object
155
- # 2. Load from compiled file
156
- if model is not None:
157
- return cls._export_submodules_from_model(
158
- model=model,
159
- model_save_dir=model_save_dir,
160
- rbln_kwargs=rbln_kwargs,
161
- **kwargs,
162
- )
163
-
164
- else:
165
- return cls._load_submodules_from_compiled_models(
166
- model_save_dir=model_save_dir,
167
- rbln_kwargs=rbln_kwargs,
168
- **kwargs,
169
- )
55
+ class PreTrainedModel(ABC): # noqa: F811
56
+ pass
170
57
 
171
58
 
172
- class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
59
+ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
173
60
  """
174
61
  An abstract base class for compiling, loading, and saving neural network models from the huggingface
175
62
  transformers and diffusers libraries to run on RBLN NPU devices.
@@ -205,30 +92,29 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
205
92
  """
206
93
 
207
94
  model_type = "rbln_model"
208
- auto_model_class = AutoModel # feature extraction
95
+ auto_model_class = AutoModel
96
+ config_class = AutoConfig
209
97
  config_name = "config.json"
98
+ hf_library_name = "transformers"
99
+ _hf_class = None
210
100
 
211
101
  def __init__(
212
102
  self,
213
103
  models: List[rebel.Runtime],
214
104
  config: "PretrainedConfig",
215
105
  rbln_config: RBLNConfig,
216
- preprocessors: Optional[List],
217
106
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
218
107
  subfolder: str = "",
219
108
  rbln_compiled_models: Optional[rebel.RBLNCompiledModel] = None,
220
109
  rbln_submodules: List["RBLNBaseModel"] = [],
221
110
  **kwargs,
222
111
  ):
223
- super().__init__(models, config)
224
- if not isinstance(self.config, PretrainedConfig): # if diffusers config
225
- self.config = PretrainedConfig(**self.config)
226
-
112
+ self.model = models
113
+ self.config = config
227
114
  self.rbln_config = rbln_config
228
- self.preprocessors = [] if preprocessors is None else preprocessors
229
115
  self.compiled_models = rbln_compiled_models
230
116
 
231
- # Registers the RBLNBaseModelForXXX classes into the transformers AutoModel classes to avoid warnings when creating
117
+ # Registers the RBLN classes into the transformers AutoModel classes to avoid warnings when creating
232
118
  # a pipeline https://github.com/huggingface/transformers/blob/3d3204c025b6b5de013e07dd364208e28b4d9589/src/transformers/pipelines/base.py#L940
233
119
  AutoConfig.register(self.model_type, AutoConfig)
234
120
  if hasattr(self.auto_model_class, "register"):
@@ -247,6 +133,7 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
247
133
 
248
134
  self.device = torch.device("cpu")
249
135
  self.training = False
136
+ self.dtype = torch.float32
250
137
 
251
138
  # FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
252
139
  # This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
@@ -264,34 +151,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
264
151
  self.rbln_submodules = rbln_submodules
265
152
  self.__post_init__(**kwargs)
266
153
 
267
- def _save_pretrained(self, save_directory: Union[str, Path]):
268
- """
269
- Saves a model and its configuration file to a directory, so that it can be re-loaded using the
270
- [`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
271
-
272
- Args:
273
- save_directory (`Union[str, Path]`):
274
- Directory where to save the model file.
275
- """
276
- real_save_dir = self.model_save_dir / self.subfolder
277
- save_directory_path = Path(save_directory)
278
- if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
279
- if save_directory_path.absolute() == real_save_dir.absolute():
280
- raise FileExistsError(
281
- f"Cannot save model to '{save_directory}'. "
282
- f"This directory already exists and contains the model files."
283
- )
284
- shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
285
- self.config.save_pretrained(save_directory)
286
- if self.generation_config is not None:
287
- self.generation_config.save_pretrained(save_directory)
288
- else:
289
- raise FileNotFoundError(
290
- f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
291
- f"Cannot save to the specified destination '{save_directory}'. "
292
- f"Please ensure the model directory exists and you have the necessary permissions to access it."
293
- )
294
-
295
154
  @classmethod
296
155
  def _load_compiled_model_dir(
297
156
  cls,
@@ -302,50 +161,18 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
302
161
  cache_dir: Optional[str] = None,
303
162
  subfolder: str = "",
304
163
  local_files_only: bool = False,
305
- ):
306
- # Find compiled model
307
- # And prepare or download cache folder from HF Hub if needed.
164
+ ) -> str:
165
+ """Load the directory containing the compiled model files."""
308
166
  model_path = Path(model_id)
167
+
309
168
  if model_path.is_dir():
310
169
  model_path = model_path / subfolder
311
170
  rbln_files = list(model_path.glob("*.rbln"))
312
171
  rbln_config_filenames = list(model_path.glob("rbln_config.json"))
172
+ validate_files(rbln_files, rbln_config_filenames, f"directory {model_path}")
313
173
  else:
314
- if isinstance(use_auth_token, bool):
315
- token = HfFolder().get_token()
316
- else:
317
- token = use_auth_token
318
- repo_files = list(
319
- map(
320
- Path,
321
- HfApi().list_repo_files(model_id, revision=revision, token=token),
322
- )
323
- )
324
-
325
- pattern = "*.rbln" if subfolder == "" else f"{subfolder}/*.rbln"
326
- rbln_files = [p for p in repo_files if p.match(pattern)]
327
-
328
- pattern = "rbln_config.json" if subfolder == "" else f"{subfolder}/rbln_config.json"
329
- rbln_config_filenames = [p for p in repo_files if p.match(pattern)]
330
-
331
- if len(rbln_files) == 0:
332
- raise FileNotFoundError(f"Could not find any rbln model file in {model_path}")
333
-
334
- if len(rbln_config_filenames) == 0:
335
- raise FileNotFoundError(f"Could not find `rbln_config.json` file in {model_path}")
336
-
337
- if len(rbln_config_filenames) > 1:
338
- raise FileExistsError(
339
- f"Multiple rbln_config.json are not expected. but {len(rbln_config_filenames)} are found."
340
- )
341
-
342
- if model_path.is_dir():
343
- model_path = str(model_path)
344
- else:
345
- rbln_config_filename = rbln_config_filenames[0]
346
- rbln_config_cache_path = hf_hub_download(
347
- repo_id=model_id,
348
- filename=str(rbln_config_filename),
174
+ model_path = pull_compiled_model_from_hub(
175
+ model_id=model_id,
349
176
  subfolder=subfolder,
350
177
  use_auth_token=use_auth_token,
351
178
  revision=revision,
@@ -353,9 +180,8 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
353
180
  force_download=force_download,
354
181
  local_files_only=local_files_only,
355
182
  )
356
- model_path = Path(rbln_config_cache_path).parent
357
183
 
358
- return model_path
184
+ return str(model_path)
359
185
 
360
186
  @classmethod
361
187
  def _load_compiled_models(cls, model_path: str):
@@ -368,13 +194,14 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
368
194
  def _from_pretrained(
369
195
  cls,
370
196
  model_id: Union[str, Path],
371
- config: "PretrainedConfig",
197
+ config: "PretrainedConfig" = None,
372
198
  use_auth_token: Optional[Union[bool, str]] = None,
373
199
  revision: Optional[str] = None,
374
200
  force_download: bool = False,
375
201
  cache_dir: Optional[str] = None,
376
202
  subfolder: str = "",
377
203
  local_files_only: bool = False,
204
+ trust_remote_code: bool = False,
378
205
  model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
379
206
  # passed from compile function
380
207
  rbln_config: Optional[RBLNConfig] = None,
@@ -401,6 +228,43 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
401
228
  rbln_config = RBLNConfig.load(model_path_subfolder)
402
229
  rbln_config.update_runtime_cfg(rbln_kwargs)
403
230
 
231
+ if rbln_config.meta["cls"] != cls.__name__:
232
+ raise NameError(
233
+ f"Cannot load the model. The model was originally compiled using "
234
+ f"{rbln_config.meta['cls']}, but you are trying to load it with {cls.__name__}."
235
+ "Please use the same model class that was used during compilation."
236
+ )
237
+
238
+ if config is None:
239
+ if cls.hf_library_name == "transformers":
240
+ config = AutoConfig.from_pretrained(
241
+ model_path_subfolder,
242
+ cache_dir=cache_dir,
243
+ force_download=force_download,
244
+ revision=revision,
245
+ token=use_auth_token,
246
+ trust_remote_code=trust_remote_code,
247
+ )
248
+ elif cls.hf_library_name == "diffusers":
249
+ # import here to prevent diffusers dependency
250
+ # TODO(jongho): Remove diffusers dependency if use transformers only.
251
+ from diffusers.configuration_utils import ConfigMixin
252
+
253
+ class DummyConfigMixin(ConfigMixin):
254
+ # Just to load config, We need to specify `config_name`
255
+ config_name = "config.json"
256
+
257
+ config = DummyConfigMixin.load_config(
258
+ model_id,
259
+ cache_dir=cache_dir,
260
+ force_download=force_download,
261
+ local_files_only=local_files_only,
262
+ revision=revision,
263
+ token=use_auth_token,
264
+ subfolder=subfolder,
265
+ )
266
+ config = PretrainedConfig(**config)
267
+
404
268
  rbln_compiled_models = cls._load_compiled_models(model_path_subfolder)
405
269
 
406
270
  if len(cls._rbln_submodules) > 0:
@@ -440,8 +304,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
440
304
  ):
441
305
  if isinstance(model_save_dir, str):
442
306
  model_save_dir = Path(model_save_dir)
443
- preprocessors = maybe_load_preprocessors(model_save_dir.name, subfolder=subfolder)
444
-
445
307
  # FIXME:: Should we convert it?
446
308
  compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
447
309
  rbln_compiled_models = [rbln_compiled_models[cm_name] for cm_name in compiled_model_names]
@@ -449,7 +311,7 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
449
311
  # create runtimes only if `rbln_create_runtimes` is enabled
450
312
  try:
451
313
  models = (
452
- cls._create_runtimes(rbln_compiled_models, rbln_config.device_map)
314
+ cls._create_runtimes(rbln_compiled_models, rbln_config.device_map, rbln_config.activate_profiler)
453
315
  if rbln_config.create_runtimes
454
316
  else UnavailableRuntime()
455
317
  )
@@ -464,7 +326,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
464
326
  models,
465
327
  config,
466
328
  rbln_config,
467
- preprocessors,
468
329
  model_save_dir=model_save_dir,
469
330
  subfolder=subfolder,
470
331
  rbln_compiled_models=(None if rbln_config.optimize_host_memory else rbln_compiled_models),
@@ -472,17 +333,53 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
472
333
  **kwargs,
473
334
  )
474
335
 
475
- def __repr__(self):
476
- return repr(self.model) + repr(self.rbln_submodules)
336
+ @classmethod
337
+ @use_rbln_config
338
+ def _export(
339
+ cls,
340
+ model_id: Union[str, Path],
341
+ rbln_config: Optional[Dict[str, Any]] = None,
342
+ **kwargs,
343
+ ) -> "RBLNBaseModel":
344
+ subfolder = kwargs.get("subfolder", "")
345
+ model_save_dir = kwargs.pop("model_save_dir", None)
346
+
347
+ rbln_kwargs = rbln_config
348
+ model: "PreTrainedModel" = cls.get_pytorch_model(
349
+ model_id=model_id,
350
+ rbln_kwargs=rbln_kwargs,
351
+ **kwargs,
352
+ )
353
+ preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
354
+ return cls.from_model(
355
+ model,
356
+ rbln_config=rbln_config,
357
+ preprocessors=preprocessors,
358
+ model_save_dir=model_save_dir,
359
+ **kwargs,
360
+ )
477
361
 
478
362
  @classmethod
479
- def compile(cls, model, rbln_compile_config: Optional[RBLNCompileConfig] = None):
363
+ def from_pretrained(
364
+ cls,
365
+ model_id: Union[str, Path],
366
+ export: bool = False,
367
+ **kwargs,
368
+ ) -> "RBLNBaseModel":
369
+ if isinstance(model_id, Path):
370
+ model_id = model_id.as_posix()
371
+ from_pretrained_method = cls._export if export else cls._from_pretrained
372
+ return from_pretrained_method(model_id=model_id, **kwargs)
373
+
374
+ @classmethod
375
+ def compile(cls, model, rbln_compile_config: Optional[RBLNCompileConfig] = None, **kwargs):
480
376
  compiled_model = rebel.compile_from_torch(
481
377
  model,
482
378
  input_info=rbln_compile_config.input_info,
483
379
  fusion=rbln_compile_config.fusion,
484
380
  npu=rbln_compile_config.npu,
485
381
  tensor_parallel_size=rbln_compile_config.tensor_parallel_size,
382
+ **kwargs,
486
383
  )
487
384
  return compiled_model
488
385
 
@@ -500,6 +397,24 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
500
397
  rbln_config = cls._get_rbln_config(**others, rbln_kwargs=rbln_kwargs)
501
398
  return rbln_config
502
399
 
400
+ @classmethod
401
+ @property
402
+ def hf_class(cls):
403
+ """
404
+ Lazily loads and caches the corresponding Hugging Face model class.
405
+ Removes 'RBLN' prefix from the class name to get the original class name
406
+ (e.g., RBLNLlamaForCausalLM -> LlamaForCausalLM) and imports it from
407
+ the transformers/diffusers module.
408
+
409
+ Returns:
410
+ type: The original Hugging Face model class
411
+ """
412
+ if cls._hf_class is None:
413
+ hf_cls_name = cls.__name__[4:]
414
+ library = importlib.import_module(cls.hf_library_name)
415
+ cls._hf_class = getattr(library, hf_cls_name, None)
416
+ return cls._hf_class
417
+
503
418
  def can_generate(self):
504
419
  return False
505
420
 
@@ -509,37 +424,88 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
509
424
  def __call__(self, *args, **kwargs):
510
425
  return self.forward(*args, **kwargs)
511
426
 
427
+ def __repr__(self):
428
+ return repr(self.model) + repr(self.rbln_submodules)
429
+
512
430
  def __post_init__(self, **kwargs):
513
- self.dtype = torch.float32
431
+ pass
514
432
 
515
- @classmethod
516
- def _from_transformers(cls, *args, **kwargs) -> "RBLNBaseModel":
433
+ def save_pretrained(
434
+ self,
435
+ save_directory: Union[str, Path],
436
+ push_to_hub: bool = False,
437
+ **kwargs,
438
+ ):
517
439
  """
518
- Exports a vanilla Transformers model into a rbln-compiled Module.
519
- This will be deprecated after optimum 2.0
440
+ Saves a model and its configuration file to a directory, so that it can be re-loaded using the
441
+ [`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
442
+
443
+ Args:
444
+ save_directory (`Union[str, Path]`):
445
+ Directory where to save the model file.
446
+ push_to_hub (`bool`, *optional*, defaults to `False`):
447
+ Whether or not to push your model to the Hugging Face model hub after saving it.
448
+
520
449
  """
521
- return cls._export(*args, **kwargs)
450
+ if os.path.isfile(save_directory):
451
+ logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
452
+ return
522
453
 
523
- @classmethod
524
- def wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNConfig) -> torch.nn.Module:
525
- # Wrap the model if needed.
526
- return model
454
+ os.makedirs(save_directory, exist_ok=True)
455
+
456
+ real_save_dir = self.model_save_dir / self.subfolder
457
+ save_directory_path = Path(save_directory)
458
+ if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
459
+ if save_directory_path.absolute() == real_save_dir.absolute():
460
+ raise FileExistsError(
461
+ f"Cannot save model to '{save_directory}'. "
462
+ f"This directory already exists and contains the model files."
463
+ )
464
+ shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
465
+ self.config.save_pretrained(save_directory)
466
+ if self.generation_config is not None:
467
+ self.generation_config.save_pretrained(save_directory)
468
+ else:
469
+ raise FileNotFoundError(
470
+ f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
471
+ f"Cannot save to the specified destination '{save_directory}'. "
472
+ f"Please ensure the model directory exists and you have the necessary permissions to access it."
473
+ )
474
+
475
+ if push_to_hub:
476
+ return super().push_to_hub(save_directory, **kwargs)
477
+
478
+ @staticmethod
479
+ def _raise_missing_compiled_file_error(missing_files: List[str]):
480
+ """Raises a KeyError with a message indicating missing compiled model files."""
481
+
482
+ if len(missing_files) == 1:
483
+ message = f"The rbln model folder is missing the required '{missing_files[0]}.rbln' file. "
484
+ else:
485
+ files_str = ", ".join([f"'{f}.rbln'" for f in missing_files])
486
+ message = (
487
+ "The rbln model folder is missing required files. "
488
+ f"Ensure that {files_str} files are present in the folder. "
489
+ )
490
+ message += (
491
+ "These files are necessary for loading the rbln model. "
492
+ "If these files are missing, please recompile the model using the latest optimum-rbln "
493
+ "and ensure the compilation completes successfully."
494
+ )
495
+ raise KeyError(message)
527
496
 
528
497
  @classmethod
529
498
  @abstractmethod
530
499
  def _get_rbln_config(cls, **rbln_config_kwargs) -> RBLNConfig:
531
500
  pass
532
501
 
533
- @abstractmethod
534
- def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
535
- pass
536
-
537
502
  @classmethod
538
503
  @abstractmethod
539
504
  def _create_runtimes(
540
505
  cls,
541
506
  compiled_models: List[rebel.RBLNCompiledModel],
542
507
  rbln_device_map: Dict[str, int],
508
+ activate_profiler: Optional[bool] = None,
543
509
  ) -> List[rebel.Runtime]:
544
510
  # compiled_models -> runtimes
545
511
  pass
@@ -562,546 +528,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
562
528
  ):
563
529
  pass
564
530
 
565
- @classmethod
566
- @use_rbln_config
567
- def _export(
568
- cls,
569
- model_id: Union[str, Path],
570
- config: "PretrainedConfig", # FIXME : optimum passes config, but we ignore it.
571
- rbln_config: Optional[Dict[str, Any]] = None,
572
- **kwargs,
573
- ) -> "RBLNModel":
574
- subfolder = kwargs.get("subfolder", "")
575
- model_save_dir = kwargs.pop("model_save_dir", None)
576
-
577
- rbln_kwargs = rbln_config
578
- model: "PreTrainedModel" = cls.get_pytorch_model(
579
- model_id=model_id,
580
- rbln_kwargs=rbln_kwargs,
581
- **kwargs,
582
- )
583
- preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
584
- return cls.from_model(
585
- model,
586
- rbln_config=rbln_config,
587
- preprocessors=preprocessors,
588
- model_save_dir=model_save_dir,
589
- **kwargs,
590
- )
591
-
592
-
593
- class RBLNModel(RBLNBaseModel):
594
- """
595
- A class that inherits from RBLNBaseModel for models consisting of a single `torch.nn.Module`.
596
-
597
- This class supports all the functionality of RBLNBaseModel, including loading and saving models using
598
- the `from_pretrained` and `save_pretrained` methods, compiling PyTorch models for execution on RBLN NPU
599
- devices.
600
-
601
- Example:
602
- ```python
603
- model = RBLNModel.from_pretrained("model_id", export=True, rbln_npu="npu_name")
604
- outputs = model(**inputs)
605
- ```
606
- """
607
-
608
- @classmethod
609
- def update_kwargs(cls, kwargs):
610
- """
611
- Update user-given kwargs to get proper pytorch model.
612
-
613
- For example, `torchscript`=True should be set because torch.jit
614
- does not support `transformers` output instances as module output;
615
- """
616
- kwargs.update(
617
- {
618
- "torchscript": True,
619
- "return_dict": False,
620
- }
621
- )
622
- return kwargs
623
-
624
- @classmethod
625
- def get_pytorch_model(
626
- cls,
627
- model_id: str,
628
- use_auth_token: Optional[Union[bool, str]] = None,
629
- revision: Optional[str] = None,
630
- force_download: bool = False,
631
- cache_dir: Optional[str] = None,
632
- subfolder: str = "",
633
- local_files_only: bool = False,
634
- trust_remote_code: bool = False,
635
- # Some rbln-kwargs should be applied before loading torch module (i.e. quantized llm)
636
- rbln_kwargs: Optional[Dict[str, Any]] = None,
637
- **kwargs,
638
- ) -> "PreTrainedModel":
639
- task = kwargs.pop("task", None)
640
- if task is None:
641
- task = TasksManager.infer_task_from_model(cls.auto_model_class)
642
-
643
- kwargs = cls.update_kwargs(kwargs)
644
-
645
- model = TasksManager.get_model_from_task(
646
- task=task,
647
- model_name_or_path=model_id,
648
- subfolder=subfolder,
649
- revision=revision,
650
- framework="pt",
651
- cache_dir=cache_dir,
652
- use_auth_token=use_auth_token,
653
- local_files_only=local_files_only,
654
- force_download=force_download,
655
- trust_remote_code=trust_remote_code,
656
- **kwargs,
657
- )
658
-
659
- return model
660
-
661
- @classmethod
662
- def save_torch_artifacts(
663
- cls,
664
- model: "PreTrainedModel",
665
- save_dir_path: Path,
666
- subfolder: str,
667
- rbln_config: RBLNConfig,
668
- ):
669
- """
670
- If you are unavoidably running on a CPU rather than an RBLN device,
671
- store the torch tensor, weight, etc. in this function.
672
- """
673
-
674
- @classmethod
675
- def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNConfig):
676
- model = cls.wrap_model_if_needed(model, rbln_config)
677
- rbln_compile_config = rbln_config.compile_cfgs[0]
678
- compiled_model = cls.compile(model, rbln_compile_config=rbln_compile_config)
679
- return compiled_model
680
-
681
- @classmethod
682
- @use_rbln_config
683
- def from_model(
684
- cls,
685
- model: "PreTrainedModel",
686
- rbln_config: Dict[str, Any] = {},
687
- model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
688
- subfolder: str = "",
689
- **kwargs,
690
- ):
691
- preprocessors = kwargs.pop("preprocessors", [])
692
- rbln_kwargs = rbln_config
693
-
694
- # Directory to save compile artifacts(.rbln) and original configs
695
- if model_save_dir is None:
696
- save_dir = TemporaryDirectory()
697
- save_dir_path = Path(save_dir.name)
698
- else:
699
- save_dir = model_save_dir
700
- if isinstance(save_dir, TemporaryDirectory):
701
- save_dir_path = Path(model_save_dir.name)
702
- else:
703
- save_dir_path = Path(model_save_dir)
704
- save_dir_path.mkdir(exist_ok=True)
705
-
706
- # (Optional) Save preprocessors (tokenizer, image preprocessors, etc)
707
- for preprocessor in preprocessors:
708
- preprocessor.save_pretrained(save_dir_path)
709
-
710
- # Save configs
711
- # FIXME :: optimum passes AutoConfig. But here we ignore it.
712
- config = model.config
713
- if hasattr(model, "can_generate") and model.can_generate():
714
- generation_config = model.generation_config
715
- generation_config.save_pretrained(save_dir_path / subfolder)
716
- if not isinstance(config, PretrainedConfig): # diffusers config
717
- config = PretrainedConfig(**config)
718
- config.save_pretrained(save_dir_path / subfolder)
719
-
720
- # Get compilation arguments (e.g. input_info)
721
- rbln_config: RBLNConfig = cls.get_rbln_config(
722
- preprocessors=preprocessors, model_config=config, rbln_kwargs=rbln_kwargs
723
- )
724
- # rbln_config.update_runtime_cfg(rbln_kwargs) # This is done in get_rbln_config
725
-
726
- compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
727
- model, rbln_config=rbln_config
728
- )
729
-
730
- # Save compiled models (.rbln)
731
- (save_dir_path / subfolder).mkdir(exist_ok=True)
732
- if not isinstance(compiled_model, dict):
733
- compiled_models = {DEFAULT_COMPILED_MODEL_NAME: compiled_model}
734
- else:
735
- compiled_models = compiled_model
736
- for compiled_model_name, cm in compiled_models.items():
737
- cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
738
- rbln_config.save(save_dir_path / subfolder)
739
-
740
- # Save torch artifacts (e.g. embedding matrix if needed.)
741
- cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
742
-
743
- # Load submodules
744
- if len(cls._rbln_submodules) > 0:
745
- rbln_submodules = cls._load_submodules(
746
- model=model,
747
- model_save_dir=save_dir,
748
- rbln_kwargs=rbln_kwargs,
749
- **kwargs,
750
- )
751
- else:
752
- rbln_submodules = []
753
-
754
- # Instantiate
755
- return cls._from_pretrained(
756
- model_id=save_dir_path,
757
- config=config,
758
- model_save_dir=save_dir,
759
- subfolder=subfolder,
760
- rbln_config=rbln_config,
761
- rbln_compiled_models=compiled_models,
762
- rbln_submodules=rbln_submodules,
763
- **kwargs,
764
- )
765
-
766
- @classmethod
767
- def _create_runtimes(
768
- cls,
769
- compiled_models: List[rebel.RBLNCompiledModel],
770
- rbln_device_map: Dict[str, int],
771
- ) -> List[rebel.Runtime]:
772
- device = rbln_device_map[DEFAULT_COMPILED_MODEL_NAME]
773
- return [compiled_model.create_runtime(tensor_type="pt", device=device) for compiled_model in compiled_models]
774
-
531
+ @abstractmethod
775
532
  def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
776
- output = self.model[0](*args, **kwargs)
777
- return output
778
-
779
-
780
- class RBLNModelForQuestionAnswering(RBLNModel):
781
- auto_model_class = AutoModelForQuestionAnswering
782
- rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
783
-
784
- @classmethod
785
- def _get_rbln_config(
786
- cls,
787
- preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
788
- model_config: Optional["PretrainedConfig"] = None,
789
- rbln_kwargs: Dict[str, Any] = {},
790
- ) -> RBLNConfig:
791
- rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
792
- rbln_batch_size = rbln_kwargs.get("batch_size", None)
793
- rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
794
-
795
- if rbln_max_seq_len is None:
796
- for tokenizer in preprocessors:
797
- if hasattr(tokenizer, "model_max_length"):
798
- rbln_max_seq_len = tokenizer.model_max_length
799
- break
800
- if rbln_max_seq_len is None:
801
- raise ValueError("`rbln_max_seq_len` should be specified!")
802
-
803
- if rbln_batch_size is None:
804
- rbln_batch_size = 1
805
-
806
- if rbln_model_input_names is None:
807
- for tokenizer in preprocessors:
808
- if hasattr(tokenizer, "model_input_names"):
809
- rbln_model_input_names = tokenizer.model_input_names
810
- break
811
- if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
812
- rbln_model_input_names = cls.rbln_model_input_names
813
- elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
814
- original_model_class = getattr(transformers, model_config.architectures[0])
815
- input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
816
- raise ValueError(
817
- "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
818
- f"and be sure to make the order of the inputs same as QuestionAnswering forward() arguments like ({list(input_names_order)})"
819
- )
820
-
821
- input_info = [
822
- (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
823
- for model_input_name in rbln_model_input_names
824
- ]
825
-
826
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
827
- rbln_config = RBLNConfig(
828
- rbln_cls=cls.__name__,
829
- compile_cfgs=[rbln_compile_config],
830
- rbln_kwargs=rbln_kwargs,
831
- )
832
- rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
833
- return rbln_config
834
-
835
-
836
- class RBLNModelForImageClassification(RBLNModel):
837
- """
838
- This is a generic model class that will be instantiated as one of the model classes of the library (with a image classification head) when created with the from_pretrained() class method
839
- """
840
-
841
- auto_model_class = AutoModelForImageClassification
842
-
843
- @classmethod
844
- def _get_rbln_config(
845
- cls,
846
- preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
847
- model_config: Optional["PretrainedConfig"] = None,
848
- rbln_kwargs: Dict[str, Any] = {},
849
- ) -> RBLNConfig:
850
- rbln_image_size = rbln_kwargs.get("image_size", None)
851
- rbln_batch_size = rbln_kwargs.get("batch_size", None)
852
-
853
- if rbln_image_size is None:
854
- for processor in preprocessors:
855
- if hasattr(processor, "size"):
856
- if all(required_key in processor.size.keys() for required_key in ["height", "width"]):
857
- rbln_image_size = (processor.size["height"], processor.size["width"])
858
- elif "shortest_edge" in processor.size.keys():
859
- rbln_image_size = (processor.size["shortest_edge"], processor.size["shortest_edge"])
860
- elif "longest_edge" in processor.size.keys():
861
- rbln_image_size = (processor.size["longest_edge"], processor.size["longest_edge"])
862
- break
863
-
864
- if rbln_image_size is None:
865
- rbln_image_size = model_config.image_size
866
-
867
- if rbln_image_size is None:
868
- raise ValueError("`rbln_image_size` should be specified!")
869
-
870
- if rbln_batch_size is None:
871
- rbln_batch_size = 1
872
-
873
- if isinstance(rbln_image_size, int):
874
- rbln_image_height, rbln_image_width = rbln_image_size, rbln_image_size
875
- elif isinstance(rbln_image_size, (list, tuple)):
876
- rbln_image_height, rbln_image_width = rbln_image_size[0], rbln_image_size[1]
877
- elif isinstance(rbln_image_size, dict):
878
- rbln_image_height, rbln_image_width = rbln_image_size["height"], rbln_image_size["width"]
879
- else:
880
- raise ValueError(
881
- "`rbln_image_size` should be `int` (ex. 224), `tuple` (ex. 224, 224), `dict` (ex. {'height': 224, 'width': 224}) format"
882
- )
883
-
884
- input_info = [
885
- (
886
- "pixel_values",
887
- [rbln_batch_size, 3, rbln_image_height, rbln_image_width],
888
- "float32",
889
- )
890
- ]
891
-
892
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
893
- return RBLNConfig(rbln_cls=cls.__name__, compile_cfgs=[rbln_compile_config], rbln_kwargs=rbln_kwargs)
894
-
895
-
896
- class RBLNModelForAudioClassification(RBLNModel):
897
- """
898
- This is a generic model class that will be instantiated as one of the model classes of the library (with a audio classification head) when created with the from_pretrained() class method
899
- This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
900
-
901
- A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
902
- It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
903
- - transferring the checkpoint weights of the original into an optimized RBLN graph,
904
- - compiling the resulting graph using the RBLN compiler.
905
-
906
- Currently, this model class only supports the 'AST' model from the transformers library. Future updates may include support for additional model types.
907
- """
908
-
909
- auto_model_class = AutoModelForAudioClassification
910
-
911
- @classmethod
912
- def _get_rbln_config(
913
- cls,
914
- preprocessors: "AutoFeatureExtractor",
915
- model_config: "PretrainedConfig",
916
- rbln_kwargs: Dict[str, Any] = {},
917
- ) -> RBLNConfig:
918
- rbln_batch_size = rbln_kwargs.get("batch_size", None)
919
- rbln_max_length = rbln_kwargs.get("max_length", None)
920
- rbln_num_mel_bins = rbln_kwargs.get("num_mel_bins", None)
921
-
922
- if rbln_batch_size is None:
923
- rbln_batch_size = 1
924
-
925
- if rbln_num_mel_bins is None:
926
- rbln_num_mel_bins = getattr(model_config, "num_mel_bins", None)
927
- if rbln_num_mel_bins is None:
928
- for feature_extractor in preprocessors:
929
- if hasattr(feature_extractor, "num_mel_bins"):
930
- rbln_num_mel_bins = feature_extractor.num_mel_bins
931
- break
932
-
933
- if rbln_num_mel_bins is None:
934
- raise ValueError("`rbln_num_mel_bins` should be specified!")
935
-
936
- if rbln_max_length is None:
937
- rbln_max_length = getattr(model_config, "max_length", None)
938
- for feature_extractor in preprocessors:
939
- if hasattr(feature_extractor, "max_length"):
940
- rbln_max_length = feature_extractor.max_length
941
- break
942
-
943
- if rbln_max_length is None:
944
- raise ValueError("`rbln_max_length` should be specified!")
945
-
946
- input_info = [
947
- (
948
- "input_values",
949
- [rbln_batch_size, rbln_max_length, rbln_num_mel_bins],
950
- "float32",
951
- ),
952
- ]
953
-
954
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
955
- rbln_config = RBLNConfig(
956
- rbln_cls=cls.__name__,
957
- compile_cfgs=[rbln_compile_config],
958
- rbln_kwargs=rbln_kwargs,
959
- )
960
- rbln_config.model_cfg.update(
961
- {
962
- "batch_size": rbln_batch_size,
963
- "max_length": rbln_max_length,
964
- "num_mel_bins": rbln_num_mel_bins,
965
- }
966
- )
967
- return rbln_config
968
-
969
-
970
- class RBLNModelForSequenceClassification(RBLNModel):
971
- """
972
- This is a generic model class that will be instantiated as one of the model classes of the library (with a sequence classification head) when created with the from_pretrained() class method
973
- This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
974
-
975
- A class to convert and run pre-trained transformers based SequenceClassification models on RBLN devices.
976
- It implements the methods to convert a pre-trained transformers SequenceClassification model into a RBLN transformer model by:
977
- - transferring the checkpoint weights of the original into an optimized RBLN graph,
978
- - compiling the resulting graph using the RBLN compiler.
979
-
980
- Currently, this model class supports the 'XLMRoberta' and 'Roberta' model from the transformers library. Future updates may include support for additional model types.
981
- """
982
-
983
- auto_model_class = AutoModelForSequenceClassification
984
-
985
- @classmethod
986
- def _get_rbln_config(
987
- cls,
988
- preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
989
- model_config: Optional["PretrainedConfig"] = None,
990
- rbln_kwargs: Dict[str, Any] = {},
991
- ) -> RBLNConfig:
992
- rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
993
- rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
994
- rbln_batch_size = rbln_kwargs.get("batch_size", None)
995
-
996
- max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
997
- model_config, "max_position_embeddings", None
998
- )
999
-
1000
- if rbln_max_seq_len is None:
1001
- rbln_max_seq_len = max_position_embeddings
1002
- if rbln_max_seq_len is None:
1003
- for tokenizer in preprocessors:
1004
- if hasattr(tokenizer, "model_max_length"):
1005
- rbln_max_seq_len = tokenizer.model_max_length
1006
- break
1007
- if rbln_max_seq_len is None:
1008
- raise ValueError("`rbln_max_seq_len` should be specified!")
1009
-
1010
- if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
1011
- raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
1012
-
1013
- if rbln_model_input_names is None:
1014
- for tokenizer in preprocessors:
1015
- if hasattr(tokenizer, "model_input_names"):
1016
- rbln_model_input_names = tokenizer.model_input_names
1017
- break
1018
- if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
1019
- rbln_model_input_names = cls.rbln_model_input_names
1020
- elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
1021
- original_model_class = getattr(transformers, model_config.architectures[0])
1022
- input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
1023
- raise ValueError(
1024
- "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
1025
- f"and be sure to make the order of the inputs same as SequenceClassification forward() arguments like ({list(input_names_order)})"
1026
- )
1027
-
1028
- if rbln_batch_size is None:
1029
- rbln_batch_size = 1
1030
-
1031
- input_info = [
1032
- (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
1033
- for model_input_name in rbln_model_input_names
1034
- ]
1035
-
1036
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
1037
- rbln_config = RBLNConfig(
1038
- rbln_cls=cls.__name__,
1039
- compile_cfgs=[rbln_compile_config],
1040
- rbln_kwargs=rbln_kwargs,
1041
- )
1042
- rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
1043
- return rbln_config
1044
-
1045
-
1046
- class RBLNModelForMaskedLM(RBLNModel):
1047
- auto_model_class = AutoModelForMaskedLM
1048
-
1049
- @classmethod
1050
- def _get_rbln_config(
1051
- cls,
1052
- preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
1053
- model_config: Optional["PretrainedConfig"] = None,
1054
- rbln_kwargs: Dict[str, Any] = {},
1055
- ) -> RBLNConfig:
1056
- rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
1057
- rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
1058
- rbln_batch_size = rbln_kwargs.get("batch_size", None)
1059
-
1060
- max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
1061
- model_config, "max_position_embeddings", None
1062
- )
1063
-
1064
- if rbln_max_seq_len is None:
1065
- rbln_max_seq_len = max_position_embeddings
1066
- if rbln_max_seq_len is None:
1067
- for tokenizer in preprocessors:
1068
- if hasattr(tokenizer, "model_max_length"):
1069
- rbln_max_seq_len = tokenizer.model_max_length
1070
- break
1071
- if rbln_max_seq_len is None:
1072
- raise ValueError("`rbln_max_seq_len` should be specified!")
1073
-
1074
- if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
1075
- raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
1076
-
1077
- if rbln_model_input_names is None:
1078
- for tokenizer in preprocessors:
1079
- if hasattr(tokenizer, "model_input_names"):
1080
- rbln_model_input_names = tokenizer.model_input_names
1081
- break
1082
- if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
1083
- rbln_model_input_names = cls.rbln_model_input_names
1084
- elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
1085
- original_model_class = getattr(transformers, model_config.architectures[0])
1086
- input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
1087
- raise ValueError(
1088
- "Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
1089
- f"and be sure to make the order of the inputs same as MaskedLM forward() arguments like ({list(input_names_order)})"
1090
- )
1091
-
1092
- if rbln_batch_size is None:
1093
- rbln_batch_size = 1
1094
-
1095
- input_info = [
1096
- (model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
1097
- for model_input_name in rbln_model_input_names
1098
- ]
1099
-
1100
- rbln_compile_config = RBLNCompileConfig(input_info=input_info)
1101
- rbln_config = RBLNConfig(
1102
- rbln_cls=cls.__name__,
1103
- compile_cfgs=[rbln_compile_config],
1104
- rbln_kwargs=rbln_kwargs,
1105
- )
1106
- rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
1107
- return rbln_config
533
+ pass