optimum-rbln 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +41 -38
- optimum/rbln/__version__.py +16 -1
- optimum/rbln/diffusers/__init__.py +26 -2
- optimum/rbln/{modeling_diffusers.py → diffusers/modeling_diffusers.py} +97 -126
- optimum/rbln/diffusers/models/__init__.py +36 -3
- optimum/rbln/{transformers/generation → diffusers/models/autoencoders}/__init__.py +1 -2
- optimum/rbln/diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +73 -61
- optimum/rbln/diffusers/models/autoencoders/vae.py +83 -0
- optimum/rbln/diffusers/models/controlnet.py +54 -14
- optimum/rbln/diffusers/models/transformers/__init__.py +24 -0
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +203 -0
- optimum/rbln/diffusers/models/unets/__init__.py +24 -0
- optimum/rbln/diffusers/models/{unet_2d_condition.py → unets/unet_2d_condition.py} +82 -22
- optimum/rbln/diffusers/pipelines/__init__.py +23 -2
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +13 -33
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +17 -2
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +18 -2
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -2
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +18 -2
- optimum/rbln/diffusers/pipelines/stable_diffusion/__init__.py +1 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +2 -2
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -13
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/__init__.py +26 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +31 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/__init__.py +24 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +15 -8
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +15 -8
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +31 -0
- optimum/rbln/modeling.py +238 -0
- optimum/rbln/modeling_base.py +186 -760
- optimum/rbln/modeling_config.py +31 -7
- optimum/rbln/ops/__init__.py +26 -0
- optimum/rbln/ops/attn.py +221 -0
- optimum/rbln/ops/flash_attn.py +70 -0
- optimum/rbln/ops/kv_cache_update.py +69 -0
- optimum/rbln/transformers/__init__.py +20 -2
- optimum/rbln/{modeling_alias.py → transformers/modeling_alias.py} +5 -1
- optimum/rbln/transformers/modeling_generic.py +385 -0
- optimum/rbln/transformers/models/auto/__init__.py +23 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +117 -23
- optimum/rbln/transformers/models/auto/modeling_auto.py +36 -12
- optimum/rbln/transformers/models/bart/__init__.py +0 -1
- optimum/rbln/transformers/models/bart/bart_architecture.py +107 -464
- optimum/rbln/transformers/models/bart/modeling_bart.py +10 -9
- optimum/rbln/transformers/models/bert/modeling_bert.py +3 -6
- optimum/rbln/transformers/models/clip/modeling_clip.py +8 -25
- optimum/rbln/transformers/models/decoderonly/__init__.py +0 -10
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +775 -514
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +128 -260
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +1 -1
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +60 -45
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +4 -2
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +33 -104
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +50 -238
- optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +3 -2
- optimum/rbln/transformers/models/llama/llama_architecture.py +0 -1
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +3 -75
- optimum/rbln/transformers/models/midm/midm_architecture.py +84 -238
- optimum/rbln/transformers/models/midm/modeling_midm.py +5 -6
- optimum/rbln/transformers/models/mistral/mistral_architecture.py +0 -1
- optimum/rbln/transformers/models/phi/phi_architecture.py +60 -261
- optimum/rbln/transformers/models/qwen2/qwen2_architecture.py +0 -1
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +58 -103
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +498 -0
- optimum/rbln/transformers/models/t5/__init__.py +0 -1
- optimum/rbln/transformers/models/t5/modeling_t5.py +106 -5
- optimum/rbln/transformers/models/t5/t5_architecture.py +106 -448
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
- optimum/rbln/transformers/models/whisper/generation_whisper.py +42 -0
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +78 -55
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +219 -312
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +3 -35
- optimum/rbln/transformers/utils/rbln_quantization.py +120 -4
- optimum/rbln/utils/decorator_utils.py +51 -11
- optimum/rbln/utils/hub.py +131 -0
- optimum/rbln/utils/import_utils.py +22 -1
- optimum/rbln/utils/logging.py +37 -0
- optimum/rbln/utils/model_utils.py +52 -0
- optimum/rbln/utils/runtime_utils.py +10 -4
- optimum/rbln/utils/save_utils.py +17 -0
- optimum/rbln/utils/submodule.py +137 -0
- optimum_rbln-0.2.0.dist-info/METADATA +117 -0
- optimum_rbln-0.2.0.dist-info/RECORD +114 -0
- {optimum_rbln-0.1.13.dist-info → optimum_rbln-0.2.0.dist-info}/WHEEL +1 -1
- optimum_rbln-0.2.0.dist-info/licenses/LICENSE +288 -0
- optimum/rbln/transformers/cache_utils.py +0 -107
- optimum/rbln/transformers/generation/streamers.py +0 -139
- optimum/rbln/transformers/generation/utils.py +0 -397
- optimum/rbln/transformers/models/exaone/hf_hub_cached/configuration_exaone.py +0 -181
- optimum/rbln/transformers/models/exaone/hf_hub_cached/modeling_exaone.py +0 -1725
- optimum/rbln/transformers/models/midm/hf_hub_cached/configuration_midm.py +0 -22
- optimum/rbln/transformers/models/midm/hf_hub_cached/midm_bitext_tokenization.py +0 -304
- optimum/rbln/transformers/models/midm/hf_hub_cached/modeling_midm.py +0 -1469
- optimum/rbln/transformers/models/midm/hf_hub_cached/rotary_position_embedding.py +0 -98
- optimum/rbln/utils/context.py +0 -58
- optimum/rbln/utils/timer_utils.py +0 -43
- optimum_rbln-0.1.13.dist-info/METADATA +0 -120
- optimum_rbln-0.1.13.dist-info/RECORD +0 -107
- optimum_rbln-0.1.13.dist-info/entry_points.txt +0 -4
- optimum_rbln-0.1.13.dist-info/licenses/LICENSE +0 -201
optimum/rbln/modeling_base.py
CHANGED
@@ -22,7 +22,6 @@
|
|
22
22
|
# from Rebellions Inc.
|
23
23
|
|
24
24
|
import importlib
|
25
|
-
import inspect
|
26
25
|
import logging
|
27
26
|
import os
|
28
27
|
import shutil
|
@@ -33,143 +32,31 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
33
32
|
|
34
33
|
import rebel
|
35
34
|
import torch
|
36
|
-
import transformers
|
37
|
-
from huggingface_hub import HfApi, HfFolder, hf_hub_download
|
38
|
-
from optimum.exporters import TasksManager
|
39
|
-
from optimum.modeling_base import OptimizedModel
|
40
35
|
from transformers import (
|
41
36
|
AutoConfig,
|
42
37
|
AutoModel,
|
43
|
-
AutoModelForAudioClassification,
|
44
|
-
AutoModelForImageClassification,
|
45
|
-
AutoModelForMaskedLM,
|
46
|
-
AutoModelForQuestionAnswering,
|
47
|
-
AutoModelForSequenceClassification,
|
48
38
|
GenerationConfig,
|
49
39
|
PretrainedConfig,
|
50
40
|
)
|
51
41
|
|
52
|
-
from .modeling_config import
|
42
|
+
from .modeling_config import RBLNCompileConfig, RBLNConfig, use_rbln_config
|
43
|
+
from .utils.hub import PushToHubMixin, pull_compiled_model_from_hub, validate_files
|
53
44
|
from .utils.runtime_utils import UnavailableRuntime
|
54
45
|
from .utils.save_utils import maybe_load_preprocessors
|
46
|
+
from .utils.submodule import SubModulesMixin
|
55
47
|
|
56
48
|
|
57
49
|
if TYPE_CHECKING:
|
58
|
-
from transformers import
|
59
|
-
AutoFeatureExtractor,
|
60
|
-
AutoProcessor,
|
61
|
-
AutoTokenizer,
|
62
|
-
PreTrainedModel,
|
63
|
-
)
|
50
|
+
from transformers import PreTrainedModel
|
64
51
|
|
65
52
|
logger = logging.getLogger(__name__)
|
66
53
|
|
67
54
|
|
68
|
-
class
|
69
|
-
|
70
|
-
_rbln_submodules = [
|
71
|
-
{"name": "vision_tower"},
|
72
|
-
{"name": "language_model"},
|
73
|
-
]
|
74
|
-
"""
|
75
|
-
|
76
|
-
_rbln_submodules: List[Dict[str, Any]] = []
|
77
|
-
|
78
|
-
def __init__(
|
79
|
-
self,
|
80
|
-
*,
|
81
|
-
rbln_submodules: List["RBLNBaseModel"] = [],
|
82
|
-
**kwargs,
|
83
|
-
) -> None:
|
84
|
-
for submodule_meta, submodule in zip(self._rbln_submodules, rbln_submodules):
|
85
|
-
setattr(self, submodule_meta["name"], submodule)
|
86
|
-
|
87
|
-
@classmethod
|
88
|
-
def _export_submodules_from_model(
|
89
|
-
cls,
|
90
|
-
model: "PreTrainedModel",
|
91
|
-
model_save_dir: str,
|
92
|
-
rbln_kwargs: Dict[str, Any],
|
93
|
-
**kwargs,
|
94
|
-
) -> List["RBLNBaseModel"]:
|
95
|
-
rbln_submodules = []
|
96
|
-
for submodule in cls._rbln_submodules:
|
97
|
-
submodule_name = submodule["name"]
|
98
|
-
torch_submodule: "PreTrainedModel" = getattr(model, submodule["name"])
|
99
|
-
cls_name = torch_submodule.__class__.__name__
|
100
|
-
submodule_cls: "RBLNModel" = getattr(importlib.import_module("optimum.rbln"), f"RBLN{cls_name}")
|
101
|
-
|
102
|
-
if submodule_name in rbln_kwargs:
|
103
|
-
kwargs["rbln_config"] = rbln_kwargs[submodule_name]
|
104
|
-
|
105
|
-
rbln_submodule = submodule_cls.from_model(
|
106
|
-
model=torch_submodule,
|
107
|
-
subfolder=submodule_name,
|
108
|
-
model_save_dir=model_save_dir,
|
109
|
-
**kwargs,
|
110
|
-
)
|
111
|
-
|
112
|
-
rbln_submodules.append(rbln_submodule)
|
113
|
-
|
114
|
-
return rbln_submodules
|
115
|
-
|
116
|
-
@classmethod
|
117
|
-
def _load_submodules_from_compiled_models(
|
118
|
-
cls,
|
119
|
-
model_save_dir: str,
|
120
|
-
rbln_kwargs: Dict[str, Any],
|
121
|
-
**kwargs,
|
122
|
-
):
|
123
|
-
rbln_submodules = []
|
124
|
-
for submodule in cls._rbln_submodules:
|
125
|
-
submodule_name = submodule["name"]
|
126
|
-
|
127
|
-
if submodule_name in rbln_kwargs:
|
128
|
-
kwargs["rbln_config"] = rbln_kwargs[submodule_name]
|
129
|
-
|
130
|
-
# Get cls name for call the constructor of the rbln class
|
131
|
-
submodule_rbln_config = RBLNConfig.load(Path(model_save_dir) / submodule_name)
|
132
|
-
submodule_cls_name = submodule_rbln_config.meta["cls"]
|
133
|
-
submodule_cls: "RBLNBaseModel" = getattr(importlib.import_module("optimum.rbln"), submodule_cls_name)
|
134
|
-
|
135
|
-
config = OptimizedModel._load_config(Path(model_save_dir) / submodule_name)
|
136
|
-
rbln_submodule = submodule_cls._from_pretrained(
|
137
|
-
model_id=model_save_dir,
|
138
|
-
config=config,
|
139
|
-
subfolder=submodule_name,
|
140
|
-
**kwargs,
|
141
|
-
)
|
142
|
-
rbln_submodules.append(rbln_submodule)
|
143
|
-
return rbln_submodules
|
144
|
-
|
145
|
-
@classmethod
|
146
|
-
def _load_submodules(
|
147
|
-
cls,
|
148
|
-
model_save_dir,
|
149
|
-
rbln_kwargs,
|
150
|
-
model=None,
|
151
|
-
**kwargs,
|
152
|
-
):
|
153
|
-
# Two ways :
|
154
|
-
# 1. Compile from pytorch object
|
155
|
-
# 2. Load from compiled file
|
156
|
-
if model is not None:
|
157
|
-
return cls._export_submodules_from_model(
|
158
|
-
model=model,
|
159
|
-
model_save_dir=model_save_dir,
|
160
|
-
rbln_kwargs=rbln_kwargs,
|
161
|
-
**kwargs,
|
162
|
-
)
|
163
|
-
|
164
|
-
else:
|
165
|
-
return cls._load_submodules_from_compiled_models(
|
166
|
-
model_save_dir=model_save_dir,
|
167
|
-
rbln_kwargs=rbln_kwargs,
|
168
|
-
**kwargs,
|
169
|
-
)
|
55
|
+
class PreTrainedModel(ABC): # noqa: F811
|
56
|
+
pass
|
170
57
|
|
171
58
|
|
172
|
-
class RBLNBaseModel(
|
59
|
+
class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
173
60
|
"""
|
174
61
|
An abstract base class for compiling, loading, and saving neural network models from the huggingface
|
175
62
|
transformers and diffusers libraries to run on RBLN NPU devices.
|
@@ -205,30 +92,29 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
205
92
|
"""
|
206
93
|
|
207
94
|
model_type = "rbln_model"
|
208
|
-
auto_model_class = AutoModel
|
95
|
+
auto_model_class = AutoModel
|
96
|
+
config_class = AutoConfig
|
209
97
|
config_name = "config.json"
|
98
|
+
hf_library_name = "transformers"
|
99
|
+
_hf_class = None
|
210
100
|
|
211
101
|
def __init__(
|
212
102
|
self,
|
213
103
|
models: List[rebel.Runtime],
|
214
104
|
config: "PretrainedConfig",
|
215
105
|
rbln_config: RBLNConfig,
|
216
|
-
preprocessors: Optional[List],
|
217
106
|
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
|
218
107
|
subfolder: str = "",
|
219
108
|
rbln_compiled_models: Optional[rebel.RBLNCompiledModel] = None,
|
220
109
|
rbln_submodules: List["RBLNBaseModel"] = [],
|
221
110
|
**kwargs,
|
222
111
|
):
|
223
|
-
|
224
|
-
|
225
|
-
self.config = PretrainedConfig(**self.config)
|
226
|
-
|
112
|
+
self.model = models
|
113
|
+
self.config = config
|
227
114
|
self.rbln_config = rbln_config
|
228
|
-
self.preprocessors = [] if preprocessors is None else preprocessors
|
229
115
|
self.compiled_models = rbln_compiled_models
|
230
116
|
|
231
|
-
# Registers the
|
117
|
+
# Registers the RBLN classes into the transformers AutoModel classes to avoid warnings when creating
|
232
118
|
# a pipeline https://github.com/huggingface/transformers/blob/3d3204c025b6b5de013e07dd364208e28b4d9589/src/transformers/pipelines/base.py#L940
|
233
119
|
AutoConfig.register(self.model_type, AutoConfig)
|
234
120
|
if hasattr(self.auto_model_class, "register"):
|
@@ -247,6 +133,7 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
247
133
|
|
248
134
|
self.device = torch.device("cpu")
|
249
135
|
self.training = False
|
136
|
+
self.dtype = torch.float32
|
250
137
|
|
251
138
|
# FIXME :: model_save_dir is not used after initialized. (This can be used when save/load)
|
252
139
|
# This attribute is needed to keep one reference on the temporary directory, since garbage collecting it
|
@@ -264,34 +151,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
264
151
|
self.rbln_submodules = rbln_submodules
|
265
152
|
self.__post_init__(**kwargs)
|
266
153
|
|
267
|
-
def _save_pretrained(self, save_directory: Union[str, Path]):
|
268
|
-
"""
|
269
|
-
Saves a model and its configuration file to a directory, so that it can be re-loaded using the
|
270
|
-
[`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
|
271
|
-
|
272
|
-
Args:
|
273
|
-
save_directory (`Union[str, Path]`):
|
274
|
-
Directory where to save the model file.
|
275
|
-
"""
|
276
|
-
real_save_dir = self.model_save_dir / self.subfolder
|
277
|
-
save_directory_path = Path(save_directory)
|
278
|
-
if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
|
279
|
-
if save_directory_path.absolute() == real_save_dir.absolute():
|
280
|
-
raise FileExistsError(
|
281
|
-
f"Cannot save model to '{save_directory}'. "
|
282
|
-
f"This directory already exists and contains the model files."
|
283
|
-
)
|
284
|
-
shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
|
285
|
-
self.config.save_pretrained(save_directory)
|
286
|
-
if self.generation_config is not None:
|
287
|
-
self.generation_config.save_pretrained(save_directory)
|
288
|
-
else:
|
289
|
-
raise FileNotFoundError(
|
290
|
-
f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
|
291
|
-
f"Cannot save to the specified destination '{save_directory}'. "
|
292
|
-
f"Please ensure the model directory exists and you have the necessary permissions to access it."
|
293
|
-
)
|
294
|
-
|
295
154
|
@classmethod
|
296
155
|
def _load_compiled_model_dir(
|
297
156
|
cls,
|
@@ -302,50 +161,18 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
302
161
|
cache_dir: Optional[str] = None,
|
303
162
|
subfolder: str = "",
|
304
163
|
local_files_only: bool = False,
|
305
|
-
):
|
306
|
-
|
307
|
-
# And prepare or download cache folder from HF Hub if needed.
|
164
|
+
) -> str:
|
165
|
+
"""Load the directory containing the compiled model files."""
|
308
166
|
model_path = Path(model_id)
|
167
|
+
|
309
168
|
if model_path.is_dir():
|
310
169
|
model_path = model_path / subfolder
|
311
170
|
rbln_files = list(model_path.glob("*.rbln"))
|
312
171
|
rbln_config_filenames = list(model_path.glob("rbln_config.json"))
|
172
|
+
validate_files(rbln_files, rbln_config_filenames, f"directory {model_path}")
|
313
173
|
else:
|
314
|
-
|
315
|
-
|
316
|
-
else:
|
317
|
-
token = use_auth_token
|
318
|
-
repo_files = list(
|
319
|
-
map(
|
320
|
-
Path,
|
321
|
-
HfApi().list_repo_files(model_id, revision=revision, token=token),
|
322
|
-
)
|
323
|
-
)
|
324
|
-
|
325
|
-
pattern = "*.rbln" if subfolder == "" else f"{subfolder}/*.rbln"
|
326
|
-
rbln_files = [p for p in repo_files if p.match(pattern)]
|
327
|
-
|
328
|
-
pattern = "rbln_config.json" if subfolder == "" else f"{subfolder}/rbln_config.json"
|
329
|
-
rbln_config_filenames = [p for p in repo_files if p.match(pattern)]
|
330
|
-
|
331
|
-
if len(rbln_files) == 0:
|
332
|
-
raise FileNotFoundError(f"Could not find any rbln model file in {model_path}")
|
333
|
-
|
334
|
-
if len(rbln_config_filenames) == 0:
|
335
|
-
raise FileNotFoundError(f"Could not find `rbln_config.json` file in {model_path}")
|
336
|
-
|
337
|
-
if len(rbln_config_filenames) > 1:
|
338
|
-
raise FileExistsError(
|
339
|
-
f"Multiple rbln_config.json are not expected. but {len(rbln_config_filenames)} are found."
|
340
|
-
)
|
341
|
-
|
342
|
-
if model_path.is_dir():
|
343
|
-
model_path = str(model_path)
|
344
|
-
else:
|
345
|
-
rbln_config_filename = rbln_config_filenames[0]
|
346
|
-
rbln_config_cache_path = hf_hub_download(
|
347
|
-
repo_id=model_id,
|
348
|
-
filename=str(rbln_config_filename),
|
174
|
+
model_path = pull_compiled_model_from_hub(
|
175
|
+
model_id=model_id,
|
349
176
|
subfolder=subfolder,
|
350
177
|
use_auth_token=use_auth_token,
|
351
178
|
revision=revision,
|
@@ -353,9 +180,8 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
353
180
|
force_download=force_download,
|
354
181
|
local_files_only=local_files_only,
|
355
182
|
)
|
356
|
-
model_path = Path(rbln_config_cache_path).parent
|
357
183
|
|
358
|
-
return model_path
|
184
|
+
return str(model_path)
|
359
185
|
|
360
186
|
@classmethod
|
361
187
|
def _load_compiled_models(cls, model_path: str):
|
@@ -368,13 +194,14 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
368
194
|
def _from_pretrained(
|
369
195
|
cls,
|
370
196
|
model_id: Union[str, Path],
|
371
|
-
config: "PretrainedConfig",
|
197
|
+
config: "PretrainedConfig" = None,
|
372
198
|
use_auth_token: Optional[Union[bool, str]] = None,
|
373
199
|
revision: Optional[str] = None,
|
374
200
|
force_download: bool = False,
|
375
201
|
cache_dir: Optional[str] = None,
|
376
202
|
subfolder: str = "",
|
377
203
|
local_files_only: bool = False,
|
204
|
+
trust_remote_code: bool = False,
|
378
205
|
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
|
379
206
|
# passed from compile function
|
380
207
|
rbln_config: Optional[RBLNConfig] = None,
|
@@ -401,6 +228,43 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
401
228
|
rbln_config = RBLNConfig.load(model_path_subfolder)
|
402
229
|
rbln_config.update_runtime_cfg(rbln_kwargs)
|
403
230
|
|
231
|
+
if rbln_config.meta["cls"] != cls.__name__:
|
232
|
+
raise NameError(
|
233
|
+
f"Cannot load the model. The model was originally compiled using "
|
234
|
+
f"{rbln_config.meta['cls']}, but you are trying to load it with {cls.__name__}."
|
235
|
+
"Please use the same model class that was used during compilation."
|
236
|
+
)
|
237
|
+
|
238
|
+
if config is None:
|
239
|
+
if cls.hf_library_name == "transformers":
|
240
|
+
config = AutoConfig.from_pretrained(
|
241
|
+
model_path_subfolder,
|
242
|
+
cache_dir=cache_dir,
|
243
|
+
force_download=force_download,
|
244
|
+
revision=revision,
|
245
|
+
token=use_auth_token,
|
246
|
+
trust_remote_code=trust_remote_code,
|
247
|
+
)
|
248
|
+
elif cls.hf_library_name == "diffusers":
|
249
|
+
# import here to prevent diffusers dependency
|
250
|
+
# TODO(jongho): Remove diffusers dependency if use transformers only.
|
251
|
+
from diffusers.configuration_utils import ConfigMixin
|
252
|
+
|
253
|
+
class DummyConfigMixin(ConfigMixin):
|
254
|
+
# Just to load config, We need to specify `config_name`
|
255
|
+
config_name = "config.json"
|
256
|
+
|
257
|
+
config = DummyConfigMixin.load_config(
|
258
|
+
model_id,
|
259
|
+
cache_dir=cache_dir,
|
260
|
+
force_download=force_download,
|
261
|
+
local_files_only=local_files_only,
|
262
|
+
revision=revision,
|
263
|
+
token=use_auth_token,
|
264
|
+
subfolder=subfolder,
|
265
|
+
)
|
266
|
+
config = PretrainedConfig(**config)
|
267
|
+
|
404
268
|
rbln_compiled_models = cls._load_compiled_models(model_path_subfolder)
|
405
269
|
|
406
270
|
if len(cls._rbln_submodules) > 0:
|
@@ -440,8 +304,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
440
304
|
):
|
441
305
|
if isinstance(model_save_dir, str):
|
442
306
|
model_save_dir = Path(model_save_dir)
|
443
|
-
preprocessors = maybe_load_preprocessors(model_save_dir.name, subfolder=subfolder)
|
444
|
-
|
445
307
|
# FIXME:: Should we convert it?
|
446
308
|
compiled_model_names = [cfg.compiled_model_name for cfg in rbln_config.compile_cfgs]
|
447
309
|
rbln_compiled_models = [rbln_compiled_models[cm_name] for cm_name in compiled_model_names]
|
@@ -449,7 +311,7 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
449
311
|
# create runtimes only if `rbln_create_runtimes` is enabled
|
450
312
|
try:
|
451
313
|
models = (
|
452
|
-
cls._create_runtimes(rbln_compiled_models, rbln_config.device_map)
|
314
|
+
cls._create_runtimes(rbln_compiled_models, rbln_config.device_map, rbln_config.activate_profiler)
|
453
315
|
if rbln_config.create_runtimes
|
454
316
|
else UnavailableRuntime()
|
455
317
|
)
|
@@ -464,7 +326,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
464
326
|
models,
|
465
327
|
config,
|
466
328
|
rbln_config,
|
467
|
-
preprocessors,
|
468
329
|
model_save_dir=model_save_dir,
|
469
330
|
subfolder=subfolder,
|
470
331
|
rbln_compiled_models=(None if rbln_config.optimize_host_memory else rbln_compiled_models),
|
@@ -472,17 +333,53 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
472
333
|
**kwargs,
|
473
334
|
)
|
474
335
|
|
475
|
-
|
476
|
-
|
336
|
+
@classmethod
|
337
|
+
@use_rbln_config
|
338
|
+
def _export(
|
339
|
+
cls,
|
340
|
+
model_id: Union[str, Path],
|
341
|
+
rbln_config: Optional[Dict[str, Any]] = None,
|
342
|
+
**kwargs,
|
343
|
+
) -> "RBLNBaseModel":
|
344
|
+
subfolder = kwargs.get("subfolder", "")
|
345
|
+
model_save_dir = kwargs.pop("model_save_dir", None)
|
346
|
+
|
347
|
+
rbln_kwargs = rbln_config
|
348
|
+
model: "PreTrainedModel" = cls.get_pytorch_model(
|
349
|
+
model_id=model_id,
|
350
|
+
rbln_kwargs=rbln_kwargs,
|
351
|
+
**kwargs,
|
352
|
+
)
|
353
|
+
preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
|
354
|
+
return cls.from_model(
|
355
|
+
model,
|
356
|
+
rbln_config=rbln_config,
|
357
|
+
preprocessors=preprocessors,
|
358
|
+
model_save_dir=model_save_dir,
|
359
|
+
**kwargs,
|
360
|
+
)
|
477
361
|
|
478
362
|
@classmethod
|
479
|
-
def
|
363
|
+
def from_pretrained(
|
364
|
+
cls,
|
365
|
+
model_id: Union[str, Path],
|
366
|
+
export: bool = False,
|
367
|
+
**kwargs,
|
368
|
+
) -> "RBLNBaseModel":
|
369
|
+
if isinstance(model_id, Path):
|
370
|
+
model_id = model_id.as_posix()
|
371
|
+
from_pretrained_method = cls._export if export else cls._from_pretrained
|
372
|
+
return from_pretrained_method(model_id=model_id, **kwargs)
|
373
|
+
|
374
|
+
@classmethod
|
375
|
+
def compile(cls, model, rbln_compile_config: Optional[RBLNCompileConfig] = None, **kwargs):
|
480
376
|
compiled_model = rebel.compile_from_torch(
|
481
377
|
model,
|
482
378
|
input_info=rbln_compile_config.input_info,
|
483
379
|
fusion=rbln_compile_config.fusion,
|
484
380
|
npu=rbln_compile_config.npu,
|
485
381
|
tensor_parallel_size=rbln_compile_config.tensor_parallel_size,
|
382
|
+
**kwargs,
|
486
383
|
)
|
487
384
|
return compiled_model
|
488
385
|
|
@@ -500,6 +397,24 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
500
397
|
rbln_config = cls._get_rbln_config(**others, rbln_kwargs=rbln_kwargs)
|
501
398
|
return rbln_config
|
502
399
|
|
400
|
+
@classmethod
|
401
|
+
@property
|
402
|
+
def hf_class(cls):
|
403
|
+
"""
|
404
|
+
Lazily loads and caches the corresponding Hugging Face model class.
|
405
|
+
Removes 'RBLN' prefix from the class name to get the original class name
|
406
|
+
(e.g., RBLNLlamaForCausalLM -> LlamaForCausalLM) and imports it from
|
407
|
+
the transformers/diffusers module.
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
type: The original Hugging Face model class
|
411
|
+
"""
|
412
|
+
if cls._hf_class is None:
|
413
|
+
hf_cls_name = cls.__name__[4:]
|
414
|
+
library = importlib.import_module(cls.hf_library_name)
|
415
|
+
cls._hf_class = getattr(library, hf_cls_name, None)
|
416
|
+
return cls._hf_class
|
417
|
+
|
503
418
|
def can_generate(self):
|
504
419
|
return False
|
505
420
|
|
@@ -509,37 +424,88 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
509
424
|
def __call__(self, *args, **kwargs):
|
510
425
|
return self.forward(*args, **kwargs)
|
511
426
|
|
427
|
+
def __repr__(self):
|
428
|
+
return repr(self.model) + repr(self.rbln_submodules)
|
429
|
+
|
512
430
|
def __post_init__(self, **kwargs):
|
513
|
-
|
431
|
+
pass
|
514
432
|
|
515
|
-
|
516
|
-
|
433
|
+
def save_pretrained(
|
434
|
+
self,
|
435
|
+
save_directory: Union[str, Path],
|
436
|
+
push_to_hub: bool = False,
|
437
|
+
**kwargs,
|
438
|
+
):
|
517
439
|
"""
|
518
|
-
|
519
|
-
|
440
|
+
Saves a model and its configuration file to a directory, so that it can be re-loaded using the
|
441
|
+
[`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
|
442
|
+
|
443
|
+
Args:
|
444
|
+
save_directory (`Union[str, Path]`):
|
445
|
+
Directory where to save the model file.
|
446
|
+
push_to_hub (`bool`, *optional*, defaults to `False`):
|
447
|
+
Whether or not to push your model to the Hugging Face model hub after saving it.
|
448
|
+
|
520
449
|
"""
|
521
|
-
|
450
|
+
if os.path.isfile(save_directory):
|
451
|
+
logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
|
452
|
+
return
|
522
453
|
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
454
|
+
os.makedirs(save_directory, exist_ok=True)
|
455
|
+
|
456
|
+
real_save_dir = self.model_save_dir / self.subfolder
|
457
|
+
save_directory_path = Path(save_directory)
|
458
|
+
if os.path.exists(real_save_dir) and os.path.isdir(real_save_dir):
|
459
|
+
if save_directory_path.absolute() == real_save_dir.absolute():
|
460
|
+
raise FileExistsError(
|
461
|
+
f"Cannot save model to '{save_directory}'. "
|
462
|
+
f"This directory already exists and contains the model files."
|
463
|
+
)
|
464
|
+
shutil.copytree(real_save_dir, save_directory, dirs_exist_ok=True)
|
465
|
+
self.config.save_pretrained(save_directory)
|
466
|
+
if self.generation_config is not None:
|
467
|
+
self.generation_config.save_pretrained(save_directory)
|
468
|
+
else:
|
469
|
+
raise FileNotFoundError(
|
470
|
+
f"Unable to save the model. The model directory '{real_save_dir}' does not exist or is not accessible. "
|
471
|
+
f"Cannot save to the specified destination '{save_directory}'. "
|
472
|
+
f"Please ensure the model directory exists and you have the necessary permissions to access it."
|
473
|
+
)
|
474
|
+
|
475
|
+
if push_to_hub:
|
476
|
+
return super().push_to_hub(save_directory, **kwargs)
|
477
|
+
|
478
|
+
@staticmethod
|
479
|
+
def _raise_missing_compiled_file_error(missing_files: List[str]):
|
480
|
+
"""Raises a KeyError with a message indicating missing compiled model files."""
|
481
|
+
|
482
|
+
if len(missing_files) == 1:
|
483
|
+
message = f"The rbln model folder is missing the required '{missing_files[0]}.rbln' file. "
|
484
|
+
else:
|
485
|
+
files_str = ", ".join([f"'{f}.rbln'" for f in missing_files])
|
486
|
+
message = (
|
487
|
+
"The rbln model folder is missing required files. "
|
488
|
+
f"Ensure that {files_str} files are present in the folder. "
|
489
|
+
)
|
490
|
+
message += (
|
491
|
+
"These files are necessary for loading the rbln model. "
|
492
|
+
"If these files are missing, please recompile the model using the latest optimum-rbln "
|
493
|
+
"and ensure the compilation completes successfully."
|
494
|
+
)
|
495
|
+
raise KeyError(message)
|
527
496
|
|
528
497
|
@classmethod
|
529
498
|
@abstractmethod
|
530
499
|
def _get_rbln_config(cls, **rbln_config_kwargs) -> RBLNConfig:
|
531
500
|
pass
|
532
501
|
|
533
|
-
@abstractmethod
|
534
|
-
def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
|
535
|
-
pass
|
536
|
-
|
537
502
|
@classmethod
|
538
503
|
@abstractmethod
|
539
504
|
def _create_runtimes(
|
540
505
|
cls,
|
541
506
|
compiled_models: List[rebel.RBLNCompiledModel],
|
542
507
|
rbln_device_map: Dict[str, int],
|
508
|
+
activate_profiler: Optional[bool] = None,
|
543
509
|
) -> List[rebel.Runtime]:
|
544
510
|
# compiled_models -> runtimes
|
545
511
|
pass
|
@@ -562,546 +528,6 @@ class RBLNBaseModel(OptimizedModel, ABC, SubModulesMixin):
|
|
562
528
|
):
|
563
529
|
pass
|
564
530
|
|
565
|
-
@
|
566
|
-
@use_rbln_config
|
567
|
-
def _export(
|
568
|
-
cls,
|
569
|
-
model_id: Union[str, Path],
|
570
|
-
config: "PretrainedConfig", # FIXME : optimum passes config, but we ignore it.
|
571
|
-
rbln_config: Optional[Dict[str, Any]] = None,
|
572
|
-
**kwargs,
|
573
|
-
) -> "RBLNModel":
|
574
|
-
subfolder = kwargs.get("subfolder", "")
|
575
|
-
model_save_dir = kwargs.pop("model_save_dir", None)
|
576
|
-
|
577
|
-
rbln_kwargs = rbln_config
|
578
|
-
model: "PreTrainedModel" = cls.get_pytorch_model(
|
579
|
-
model_id=model_id,
|
580
|
-
rbln_kwargs=rbln_kwargs,
|
581
|
-
**kwargs,
|
582
|
-
)
|
583
|
-
preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
|
584
|
-
return cls.from_model(
|
585
|
-
model,
|
586
|
-
rbln_config=rbln_config,
|
587
|
-
preprocessors=preprocessors,
|
588
|
-
model_save_dir=model_save_dir,
|
589
|
-
**kwargs,
|
590
|
-
)
|
591
|
-
|
592
|
-
|
593
|
-
class RBLNModel(RBLNBaseModel):
|
594
|
-
"""
|
595
|
-
A class that inherits from RBLNBaseModel for models consisting of a single `torch.nn.Module`.
|
596
|
-
|
597
|
-
This class supports all the functionality of RBLNBaseModel, including loading and saving models using
|
598
|
-
the `from_pretrained` and `save_pretrained` methods, compiling PyTorch models for execution on RBLN NPU
|
599
|
-
devices.
|
600
|
-
|
601
|
-
Example:
|
602
|
-
```python
|
603
|
-
model = RBLNModel.from_pretrained("model_id", export=True, rbln_npu="npu_name")
|
604
|
-
outputs = model(**inputs)
|
605
|
-
```
|
606
|
-
"""
|
607
|
-
|
608
|
-
@classmethod
|
609
|
-
def update_kwargs(cls, kwargs):
|
610
|
-
"""
|
611
|
-
Update user-given kwargs to get proper pytorch model.
|
612
|
-
|
613
|
-
For example, `torchscript`=True should be set because torch.jit
|
614
|
-
does not support `transformers` output instances as module output;
|
615
|
-
"""
|
616
|
-
kwargs.update(
|
617
|
-
{
|
618
|
-
"torchscript": True,
|
619
|
-
"return_dict": False,
|
620
|
-
}
|
621
|
-
)
|
622
|
-
return kwargs
|
623
|
-
|
624
|
-
@classmethod
|
625
|
-
def get_pytorch_model(
|
626
|
-
cls,
|
627
|
-
model_id: str,
|
628
|
-
use_auth_token: Optional[Union[bool, str]] = None,
|
629
|
-
revision: Optional[str] = None,
|
630
|
-
force_download: bool = False,
|
631
|
-
cache_dir: Optional[str] = None,
|
632
|
-
subfolder: str = "",
|
633
|
-
local_files_only: bool = False,
|
634
|
-
trust_remote_code: bool = False,
|
635
|
-
# Some rbln-kwargs should be applied before loading torch module (i.e. quantized llm)
|
636
|
-
rbln_kwargs: Optional[Dict[str, Any]] = None,
|
637
|
-
**kwargs,
|
638
|
-
) -> "PreTrainedModel":
|
639
|
-
task = kwargs.pop("task", None)
|
640
|
-
if task is None:
|
641
|
-
task = TasksManager.infer_task_from_model(cls.auto_model_class)
|
642
|
-
|
643
|
-
kwargs = cls.update_kwargs(kwargs)
|
644
|
-
|
645
|
-
model = TasksManager.get_model_from_task(
|
646
|
-
task=task,
|
647
|
-
model_name_or_path=model_id,
|
648
|
-
subfolder=subfolder,
|
649
|
-
revision=revision,
|
650
|
-
framework="pt",
|
651
|
-
cache_dir=cache_dir,
|
652
|
-
use_auth_token=use_auth_token,
|
653
|
-
local_files_only=local_files_only,
|
654
|
-
force_download=force_download,
|
655
|
-
trust_remote_code=trust_remote_code,
|
656
|
-
**kwargs,
|
657
|
-
)
|
658
|
-
|
659
|
-
return model
|
660
|
-
|
661
|
-
@classmethod
|
662
|
-
def save_torch_artifacts(
|
663
|
-
cls,
|
664
|
-
model: "PreTrainedModel",
|
665
|
-
save_dir_path: Path,
|
666
|
-
subfolder: str,
|
667
|
-
rbln_config: RBLNConfig,
|
668
|
-
):
|
669
|
-
"""
|
670
|
-
If you are unavoidably running on a CPU rather than an RBLN device,
|
671
|
-
store the torch tensor, weight, etc. in this function.
|
672
|
-
"""
|
673
|
-
|
674
|
-
@classmethod
|
675
|
-
def get_compiled_model(cls, model: "PreTrainedModel", rbln_config: RBLNConfig):
|
676
|
-
model = cls.wrap_model_if_needed(model, rbln_config)
|
677
|
-
rbln_compile_config = rbln_config.compile_cfgs[0]
|
678
|
-
compiled_model = cls.compile(model, rbln_compile_config=rbln_compile_config)
|
679
|
-
return compiled_model
|
680
|
-
|
681
|
-
@classmethod
|
682
|
-
@use_rbln_config
|
683
|
-
def from_model(
|
684
|
-
cls,
|
685
|
-
model: "PreTrainedModel",
|
686
|
-
rbln_config: Dict[str, Any] = {},
|
687
|
-
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
|
688
|
-
subfolder: str = "",
|
689
|
-
**kwargs,
|
690
|
-
):
|
691
|
-
preprocessors = kwargs.pop("preprocessors", [])
|
692
|
-
rbln_kwargs = rbln_config
|
693
|
-
|
694
|
-
# Directory to save compile artifacts(.rbln) and original configs
|
695
|
-
if model_save_dir is None:
|
696
|
-
save_dir = TemporaryDirectory()
|
697
|
-
save_dir_path = Path(save_dir.name)
|
698
|
-
else:
|
699
|
-
save_dir = model_save_dir
|
700
|
-
if isinstance(save_dir, TemporaryDirectory):
|
701
|
-
save_dir_path = Path(model_save_dir.name)
|
702
|
-
else:
|
703
|
-
save_dir_path = Path(model_save_dir)
|
704
|
-
save_dir_path.mkdir(exist_ok=True)
|
705
|
-
|
706
|
-
# (Optional) Save preprocessors (tokenizer, image preprocessors, etc)
|
707
|
-
for preprocessor in preprocessors:
|
708
|
-
preprocessor.save_pretrained(save_dir_path)
|
709
|
-
|
710
|
-
# Save configs
|
711
|
-
# FIXME :: optimum passes AutoConfig. But here we ignore it.
|
712
|
-
config = model.config
|
713
|
-
if hasattr(model, "can_generate") and model.can_generate():
|
714
|
-
generation_config = model.generation_config
|
715
|
-
generation_config.save_pretrained(save_dir_path / subfolder)
|
716
|
-
if not isinstance(config, PretrainedConfig): # diffusers config
|
717
|
-
config = PretrainedConfig(**config)
|
718
|
-
config.save_pretrained(save_dir_path / subfolder)
|
719
|
-
|
720
|
-
# Get compilation arguments (e.g. input_info)
|
721
|
-
rbln_config: RBLNConfig = cls.get_rbln_config(
|
722
|
-
preprocessors=preprocessors, model_config=config, rbln_kwargs=rbln_kwargs
|
723
|
-
)
|
724
|
-
# rbln_config.update_runtime_cfg(rbln_kwargs) # This is done in get_rbln_config
|
725
|
-
|
726
|
-
compiled_model: Union[rebel.RBLNCompiledModel, Dict[str, rebel.RBLNCompiledModel]] = cls.get_compiled_model(
|
727
|
-
model, rbln_config=rbln_config
|
728
|
-
)
|
729
|
-
|
730
|
-
# Save compiled models (.rbln)
|
731
|
-
(save_dir_path / subfolder).mkdir(exist_ok=True)
|
732
|
-
if not isinstance(compiled_model, dict):
|
733
|
-
compiled_models = {DEFAULT_COMPILED_MODEL_NAME: compiled_model}
|
734
|
-
else:
|
735
|
-
compiled_models = compiled_model
|
736
|
-
for compiled_model_name, cm in compiled_models.items():
|
737
|
-
cm.save(save_dir_path / subfolder / f"{compiled_model_name}.rbln")
|
738
|
-
rbln_config.save(save_dir_path / subfolder)
|
739
|
-
|
740
|
-
# Save torch artifacts (e.g. embedding matrix if needed.)
|
741
|
-
cls.save_torch_artifacts(model, save_dir_path=save_dir_path, subfolder=subfolder, rbln_config=rbln_config)
|
742
|
-
|
743
|
-
# Load submodules
|
744
|
-
if len(cls._rbln_submodules) > 0:
|
745
|
-
rbln_submodules = cls._load_submodules(
|
746
|
-
model=model,
|
747
|
-
model_save_dir=save_dir,
|
748
|
-
rbln_kwargs=rbln_kwargs,
|
749
|
-
**kwargs,
|
750
|
-
)
|
751
|
-
else:
|
752
|
-
rbln_submodules = []
|
753
|
-
|
754
|
-
# Instantiate
|
755
|
-
return cls._from_pretrained(
|
756
|
-
model_id=save_dir_path,
|
757
|
-
config=config,
|
758
|
-
model_save_dir=save_dir,
|
759
|
-
subfolder=subfolder,
|
760
|
-
rbln_config=rbln_config,
|
761
|
-
rbln_compiled_models=compiled_models,
|
762
|
-
rbln_submodules=rbln_submodules,
|
763
|
-
**kwargs,
|
764
|
-
)
|
765
|
-
|
766
|
-
@classmethod
|
767
|
-
def _create_runtimes(
|
768
|
-
cls,
|
769
|
-
compiled_models: List[rebel.RBLNCompiledModel],
|
770
|
-
rbln_device_map: Dict[str, int],
|
771
|
-
) -> List[rebel.Runtime]:
|
772
|
-
device = rbln_device_map[DEFAULT_COMPILED_MODEL_NAME]
|
773
|
-
return [compiled_model.create_runtime(tensor_type="pt", device=device) for compiled_model in compiled_models]
|
774
|
-
|
531
|
+
@abstractmethod
|
775
532
|
def forward(self, *args: List[torch.Tensor], **kwargs: Dict[str, torch.Tensor]):
|
776
|
-
|
777
|
-
return output
|
778
|
-
|
779
|
-
|
780
|
-
class RBLNModelForQuestionAnswering(RBLNModel):
|
781
|
-
auto_model_class = AutoModelForQuestionAnswering
|
782
|
-
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
783
|
-
|
784
|
-
@classmethod
|
785
|
-
def _get_rbln_config(
|
786
|
-
cls,
|
787
|
-
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
|
788
|
-
model_config: Optional["PretrainedConfig"] = None,
|
789
|
-
rbln_kwargs: Dict[str, Any] = {},
|
790
|
-
) -> RBLNConfig:
|
791
|
-
rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
|
792
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
793
|
-
rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
|
794
|
-
|
795
|
-
if rbln_max_seq_len is None:
|
796
|
-
for tokenizer in preprocessors:
|
797
|
-
if hasattr(tokenizer, "model_max_length"):
|
798
|
-
rbln_max_seq_len = tokenizer.model_max_length
|
799
|
-
break
|
800
|
-
if rbln_max_seq_len is None:
|
801
|
-
raise ValueError("`rbln_max_seq_len` should be specified!")
|
802
|
-
|
803
|
-
if rbln_batch_size is None:
|
804
|
-
rbln_batch_size = 1
|
805
|
-
|
806
|
-
if rbln_model_input_names is None:
|
807
|
-
for tokenizer in preprocessors:
|
808
|
-
if hasattr(tokenizer, "model_input_names"):
|
809
|
-
rbln_model_input_names = tokenizer.model_input_names
|
810
|
-
break
|
811
|
-
if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
|
812
|
-
rbln_model_input_names = cls.rbln_model_input_names
|
813
|
-
elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
|
814
|
-
original_model_class = getattr(transformers, model_config.architectures[0])
|
815
|
-
input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
|
816
|
-
raise ValueError(
|
817
|
-
"Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
|
818
|
-
f"and be sure to make the order of the inputs same as QuestionAnswering forward() arguments like ({list(input_names_order)})"
|
819
|
-
)
|
820
|
-
|
821
|
-
input_info = [
|
822
|
-
(model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
|
823
|
-
for model_input_name in rbln_model_input_names
|
824
|
-
]
|
825
|
-
|
826
|
-
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
827
|
-
rbln_config = RBLNConfig(
|
828
|
-
rbln_cls=cls.__name__,
|
829
|
-
compile_cfgs=[rbln_compile_config],
|
830
|
-
rbln_kwargs=rbln_kwargs,
|
831
|
-
)
|
832
|
-
rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
|
833
|
-
return rbln_config
|
834
|
-
|
835
|
-
|
836
|
-
class RBLNModelForImageClassification(RBLNModel):
|
837
|
-
"""
|
838
|
-
This is a generic model class that will be instantiated as one of the model classes of the library (with a image classification head) when created with the from_pretrained() class method
|
839
|
-
"""
|
840
|
-
|
841
|
-
auto_model_class = AutoModelForImageClassification
|
842
|
-
|
843
|
-
@classmethod
|
844
|
-
def _get_rbln_config(
|
845
|
-
cls,
|
846
|
-
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
|
847
|
-
model_config: Optional["PretrainedConfig"] = None,
|
848
|
-
rbln_kwargs: Dict[str, Any] = {},
|
849
|
-
) -> RBLNConfig:
|
850
|
-
rbln_image_size = rbln_kwargs.get("image_size", None)
|
851
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
852
|
-
|
853
|
-
if rbln_image_size is None:
|
854
|
-
for processor in preprocessors:
|
855
|
-
if hasattr(processor, "size"):
|
856
|
-
if all(required_key in processor.size.keys() for required_key in ["height", "width"]):
|
857
|
-
rbln_image_size = (processor.size["height"], processor.size["width"])
|
858
|
-
elif "shortest_edge" in processor.size.keys():
|
859
|
-
rbln_image_size = (processor.size["shortest_edge"], processor.size["shortest_edge"])
|
860
|
-
elif "longest_edge" in processor.size.keys():
|
861
|
-
rbln_image_size = (processor.size["longest_edge"], processor.size["longest_edge"])
|
862
|
-
break
|
863
|
-
|
864
|
-
if rbln_image_size is None:
|
865
|
-
rbln_image_size = model_config.image_size
|
866
|
-
|
867
|
-
if rbln_image_size is None:
|
868
|
-
raise ValueError("`rbln_image_size` should be specified!")
|
869
|
-
|
870
|
-
if rbln_batch_size is None:
|
871
|
-
rbln_batch_size = 1
|
872
|
-
|
873
|
-
if isinstance(rbln_image_size, int):
|
874
|
-
rbln_image_height, rbln_image_width = rbln_image_size, rbln_image_size
|
875
|
-
elif isinstance(rbln_image_size, (list, tuple)):
|
876
|
-
rbln_image_height, rbln_image_width = rbln_image_size[0], rbln_image_size[1]
|
877
|
-
elif isinstance(rbln_image_size, dict):
|
878
|
-
rbln_image_height, rbln_image_width = rbln_image_size["height"], rbln_image_size["width"]
|
879
|
-
else:
|
880
|
-
raise ValueError(
|
881
|
-
"`rbln_image_size` should be `int` (ex. 224), `tuple` (ex. 224, 224), `dict` (ex. {'height': 224, 'width': 224}) format"
|
882
|
-
)
|
883
|
-
|
884
|
-
input_info = [
|
885
|
-
(
|
886
|
-
"pixel_values",
|
887
|
-
[rbln_batch_size, 3, rbln_image_height, rbln_image_width],
|
888
|
-
"float32",
|
889
|
-
)
|
890
|
-
]
|
891
|
-
|
892
|
-
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
893
|
-
return RBLNConfig(rbln_cls=cls.__name__, compile_cfgs=[rbln_compile_config], rbln_kwargs=rbln_kwargs)
|
894
|
-
|
895
|
-
|
896
|
-
class RBLNModelForAudioClassification(RBLNModel):
|
897
|
-
"""
|
898
|
-
This is a generic model class that will be instantiated as one of the model classes of the library (with a audio classification head) when created with the from_pretrained() class method
|
899
|
-
This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
900
|
-
|
901
|
-
A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
|
902
|
-
It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
|
903
|
-
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
904
|
-
- compiling the resulting graph using the RBLN compiler.
|
905
|
-
|
906
|
-
Currently, this model class only supports the 'AST' model from the transformers library. Future updates may include support for additional model types.
|
907
|
-
"""
|
908
|
-
|
909
|
-
auto_model_class = AutoModelForAudioClassification
|
910
|
-
|
911
|
-
@classmethod
|
912
|
-
def _get_rbln_config(
|
913
|
-
cls,
|
914
|
-
preprocessors: "AutoFeatureExtractor",
|
915
|
-
model_config: "PretrainedConfig",
|
916
|
-
rbln_kwargs: Dict[str, Any] = {},
|
917
|
-
) -> RBLNConfig:
|
918
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
919
|
-
rbln_max_length = rbln_kwargs.get("max_length", None)
|
920
|
-
rbln_num_mel_bins = rbln_kwargs.get("num_mel_bins", None)
|
921
|
-
|
922
|
-
if rbln_batch_size is None:
|
923
|
-
rbln_batch_size = 1
|
924
|
-
|
925
|
-
if rbln_num_mel_bins is None:
|
926
|
-
rbln_num_mel_bins = getattr(model_config, "num_mel_bins", None)
|
927
|
-
if rbln_num_mel_bins is None:
|
928
|
-
for feature_extractor in preprocessors:
|
929
|
-
if hasattr(feature_extractor, "num_mel_bins"):
|
930
|
-
rbln_num_mel_bins = feature_extractor.num_mel_bins
|
931
|
-
break
|
932
|
-
|
933
|
-
if rbln_num_mel_bins is None:
|
934
|
-
raise ValueError("`rbln_num_mel_bins` should be specified!")
|
935
|
-
|
936
|
-
if rbln_max_length is None:
|
937
|
-
rbln_max_length = getattr(model_config, "max_length", None)
|
938
|
-
for feature_extractor in preprocessors:
|
939
|
-
if hasattr(feature_extractor, "max_length"):
|
940
|
-
rbln_max_length = feature_extractor.max_length
|
941
|
-
break
|
942
|
-
|
943
|
-
if rbln_max_length is None:
|
944
|
-
raise ValueError("`rbln_max_length` should be specified!")
|
945
|
-
|
946
|
-
input_info = [
|
947
|
-
(
|
948
|
-
"input_values",
|
949
|
-
[rbln_batch_size, rbln_max_length, rbln_num_mel_bins],
|
950
|
-
"float32",
|
951
|
-
),
|
952
|
-
]
|
953
|
-
|
954
|
-
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
955
|
-
rbln_config = RBLNConfig(
|
956
|
-
rbln_cls=cls.__name__,
|
957
|
-
compile_cfgs=[rbln_compile_config],
|
958
|
-
rbln_kwargs=rbln_kwargs,
|
959
|
-
)
|
960
|
-
rbln_config.model_cfg.update(
|
961
|
-
{
|
962
|
-
"batch_size": rbln_batch_size,
|
963
|
-
"max_length": rbln_max_length,
|
964
|
-
"num_mel_bins": rbln_num_mel_bins,
|
965
|
-
}
|
966
|
-
)
|
967
|
-
return rbln_config
|
968
|
-
|
969
|
-
|
970
|
-
class RBLNModelForSequenceClassification(RBLNModel):
|
971
|
-
"""
|
972
|
-
This is a generic model class that will be instantiated as one of the model classes of the library (with a sequence classification head) when created with the from_pretrained() class method
|
973
|
-
This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
974
|
-
|
975
|
-
A class to convert and run pre-trained transformers based SequenceClassification models on RBLN devices.
|
976
|
-
It implements the methods to convert a pre-trained transformers SequenceClassification model into a RBLN transformer model by:
|
977
|
-
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
978
|
-
- compiling the resulting graph using the RBLN compiler.
|
979
|
-
|
980
|
-
Currently, this model class supports the 'XLMRoberta' and 'Roberta' model from the transformers library. Future updates may include support for additional model types.
|
981
|
-
"""
|
982
|
-
|
983
|
-
auto_model_class = AutoModelForSequenceClassification
|
984
|
-
|
985
|
-
@classmethod
|
986
|
-
def _get_rbln_config(
|
987
|
-
cls,
|
988
|
-
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
|
989
|
-
model_config: Optional["PretrainedConfig"] = None,
|
990
|
-
rbln_kwargs: Dict[str, Any] = {},
|
991
|
-
) -> RBLNConfig:
|
992
|
-
rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
|
993
|
-
rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
|
994
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
995
|
-
|
996
|
-
max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
|
997
|
-
model_config, "max_position_embeddings", None
|
998
|
-
)
|
999
|
-
|
1000
|
-
if rbln_max_seq_len is None:
|
1001
|
-
rbln_max_seq_len = max_position_embeddings
|
1002
|
-
if rbln_max_seq_len is None:
|
1003
|
-
for tokenizer in preprocessors:
|
1004
|
-
if hasattr(tokenizer, "model_max_length"):
|
1005
|
-
rbln_max_seq_len = tokenizer.model_max_length
|
1006
|
-
break
|
1007
|
-
if rbln_max_seq_len is None:
|
1008
|
-
raise ValueError("`rbln_max_seq_len` should be specified!")
|
1009
|
-
|
1010
|
-
if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
|
1011
|
-
raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
|
1012
|
-
|
1013
|
-
if rbln_model_input_names is None:
|
1014
|
-
for tokenizer in preprocessors:
|
1015
|
-
if hasattr(tokenizer, "model_input_names"):
|
1016
|
-
rbln_model_input_names = tokenizer.model_input_names
|
1017
|
-
break
|
1018
|
-
if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
|
1019
|
-
rbln_model_input_names = cls.rbln_model_input_names
|
1020
|
-
elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
|
1021
|
-
original_model_class = getattr(transformers, model_config.architectures[0])
|
1022
|
-
input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
|
1023
|
-
raise ValueError(
|
1024
|
-
"Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
|
1025
|
-
f"and be sure to make the order of the inputs same as SequenceClassification forward() arguments like ({list(input_names_order)})"
|
1026
|
-
)
|
1027
|
-
|
1028
|
-
if rbln_batch_size is None:
|
1029
|
-
rbln_batch_size = 1
|
1030
|
-
|
1031
|
-
input_info = [
|
1032
|
-
(model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
|
1033
|
-
for model_input_name in rbln_model_input_names
|
1034
|
-
]
|
1035
|
-
|
1036
|
-
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
1037
|
-
rbln_config = RBLNConfig(
|
1038
|
-
rbln_cls=cls.__name__,
|
1039
|
-
compile_cfgs=[rbln_compile_config],
|
1040
|
-
rbln_kwargs=rbln_kwargs,
|
1041
|
-
)
|
1042
|
-
rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
|
1043
|
-
return rbln_config
|
1044
|
-
|
1045
|
-
|
1046
|
-
class RBLNModelForMaskedLM(RBLNModel):
|
1047
|
-
auto_model_class = AutoModelForMaskedLM
|
1048
|
-
|
1049
|
-
@classmethod
|
1050
|
-
def _get_rbln_config(
|
1051
|
-
cls,
|
1052
|
-
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]],
|
1053
|
-
model_config: Optional["PretrainedConfig"] = None,
|
1054
|
-
rbln_kwargs: Dict[str, Any] = {},
|
1055
|
-
) -> RBLNConfig:
|
1056
|
-
rbln_max_seq_len = rbln_kwargs.get("max_seq_len", None)
|
1057
|
-
rbln_model_input_names = rbln_kwargs.get("model_input_names", None)
|
1058
|
-
rbln_batch_size = rbln_kwargs.get("batch_size", None)
|
1059
|
-
|
1060
|
-
max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
|
1061
|
-
model_config, "max_position_embeddings", None
|
1062
|
-
)
|
1063
|
-
|
1064
|
-
if rbln_max_seq_len is None:
|
1065
|
-
rbln_max_seq_len = max_position_embeddings
|
1066
|
-
if rbln_max_seq_len is None:
|
1067
|
-
for tokenizer in preprocessors:
|
1068
|
-
if hasattr(tokenizer, "model_max_length"):
|
1069
|
-
rbln_max_seq_len = tokenizer.model_max_length
|
1070
|
-
break
|
1071
|
-
if rbln_max_seq_len is None:
|
1072
|
-
raise ValueError("`rbln_max_seq_len` should be specified!")
|
1073
|
-
|
1074
|
-
if max_position_embeddings is not None and rbln_max_seq_len > max_position_embeddings:
|
1075
|
-
raise ValueError("`rbln_enc_max_seq_len` should be less or equal than max_position_embeddings!")
|
1076
|
-
|
1077
|
-
if rbln_model_input_names is None:
|
1078
|
-
for tokenizer in preprocessors:
|
1079
|
-
if hasattr(tokenizer, "model_input_names"):
|
1080
|
-
rbln_model_input_names = tokenizer.model_input_names
|
1081
|
-
break
|
1082
|
-
if rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names"):
|
1083
|
-
rbln_model_input_names = cls.rbln_model_input_names
|
1084
|
-
elif rbln_model_input_names is None and hasattr(cls, "rbln_model_input_names") is False:
|
1085
|
-
original_model_class = getattr(transformers, model_config.architectures[0])
|
1086
|
-
input_names_order = inspect.signature(original_model_class.forward).parameters.keys()
|
1087
|
-
raise ValueError(
|
1088
|
-
"Specify the model input names obtained by the tokenizer via `rbln_model_input_names`, "
|
1089
|
-
f"and be sure to make the order of the inputs same as MaskedLM forward() arguments like ({list(input_names_order)})"
|
1090
|
-
)
|
1091
|
-
|
1092
|
-
if rbln_batch_size is None:
|
1093
|
-
rbln_batch_size = 1
|
1094
|
-
|
1095
|
-
input_info = [
|
1096
|
-
(model_input_name, [rbln_batch_size, rbln_max_seq_len], "int64")
|
1097
|
-
for model_input_name in rbln_model_input_names
|
1098
|
-
]
|
1099
|
-
|
1100
|
-
rbln_compile_config = RBLNCompileConfig(input_info=input_info)
|
1101
|
-
rbln_config = RBLNConfig(
|
1102
|
-
rbln_cls=cls.__name__,
|
1103
|
-
compile_cfgs=[rbln_compile_config],
|
1104
|
-
rbln_kwargs=rbln_kwargs,
|
1105
|
-
)
|
1106
|
-
rbln_config.model_cfg.update({"max_seq_len": rbln_max_seq_len})
|
1107
|
-
return rbln_config
|
533
|
+
pass
|