optimum-rbln 0.8.4a5__py3-none-any.whl → 0.8.4a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of optimum-rbln might be problematic. Click here for more details.
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +41 -3
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +1 -1
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +3 -3
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +1 -1
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +2 -2
- optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +7 -2
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +7 -2
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +1 -1
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +1 -1
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +2 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +1 -1
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +3 -3
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +1 -1
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +1 -1
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +1 -1
- optimum/rbln/diffusers/modeling_diffusers.py +7 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +28 -3
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -1
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +9 -1
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +9 -1
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +6 -3
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +45 -8
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -1
- optimum/rbln/modeling.py +17 -13
- optimum/rbln/modeling_base.py +11 -9
- optimum/rbln/transformers/configuration_generic.py +3 -3
- optimum/rbln/transformers/modeling_generic.py +1 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +67 -7
- optimum/rbln/transformers/models/auto/modeling_auto.py +31 -0
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +5 -6
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
- optimum/rbln/transformers/models/clip/configuration_clip.py +7 -4
- optimum/rbln/transformers/models/clip/modeling_clip.py +23 -4
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +2 -2
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +38 -6
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +1 -1
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +23 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +17 -2
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +7 -8
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +12 -6
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +6 -2
- optimum/rbln/transformers/models/llava/configuration_llava.py +6 -2
- optimum/rbln/transformers/models/llava/modeling_llava.py +1 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +57 -78
- optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +1 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +18 -3
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +2 -2
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +1 -1
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
- optimum/rbln/transformers/models/swin/configuration_swin.py +1 -1
- optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +1 -1
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -0
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +1 -1
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +1 -0
- {optimum_rbln-0.8.4a5.dist-info → optimum_rbln-0.8.4a7.dist-info}/METADATA +1 -1
- {optimum_rbln-0.8.4a5.dist-info → optimum_rbln-0.8.4a7.dist-info}/RECORD +64 -64
- {optimum_rbln-0.8.4a5.dist-info → optimum_rbln-0.8.4a7.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.4a5.dist-info → optimum_rbln-0.8.4a7.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
import importlib
|
|
17
17
|
from pathlib import Path
|
|
18
|
-
from typing import Type, Union
|
|
18
|
+
from typing import Any, Dict, Type, Union
|
|
19
19
|
|
|
20
20
|
from diffusers.models.controlnets import ControlNetUnionModel
|
|
21
21
|
from diffusers.pipelines.auto_pipeline import (
|
|
@@ -29,6 +29,7 @@ from diffusers.pipelines.auto_pipeline import (
|
|
|
29
29
|
)
|
|
30
30
|
from huggingface_hub.utils import validate_hf_hub_args
|
|
31
31
|
|
|
32
|
+
from optimum.rbln.configuration_utils import RBLNModelConfig
|
|
32
33
|
from optimum.rbln.modeling_base import RBLNBaseModel
|
|
33
34
|
from optimum.rbln.utils.model_utils import (
|
|
34
35
|
MODEL_MAPPING,
|
|
@@ -168,14 +169,44 @@ class RBLNAutoPipelineBase:
|
|
|
168
169
|
|
|
169
170
|
@classmethod
|
|
170
171
|
@validate_hf_hub_args
|
|
171
|
-
def from_pretrained(
|
|
172
|
-
|
|
173
|
-
|
|
172
|
+
def from_pretrained(
|
|
173
|
+
cls,
|
|
174
|
+
model_id: Union[str, Path],
|
|
175
|
+
*,
|
|
176
|
+
export: bool = None,
|
|
177
|
+
rbln_config: Union[Dict[str, Any], RBLNModelConfig] = {},
|
|
178
|
+
**kwargs: Any,
|
|
179
|
+
):
|
|
180
|
+
"""
|
|
181
|
+
Load an RBLN-accelerated Diffusers pipeline from a pretrained checkpoint or a compiled RBLN artifact.
|
|
174
182
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
183
|
+
This method determines the concrete `RBLN*` model class that corresponds to the
|
|
184
|
+
underlying Diffusers pipeline architecture and dispatches to that class's
|
|
185
|
+
`from_pretrained()` implementation. If a compiled RBLN folder is detected at `model_id`
|
|
186
|
+
(or `export=False` is explicitly passed), it loads the compiled artifacts; otherwise it
|
|
187
|
+
compiles from the original Diffusers checkpoint.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
model_id:
|
|
191
|
+
HF repo id or local path. For compiled models, this should point to a directory
|
|
192
|
+
(optionally under `subfolder`) that contains `*.rbln` files and `rbln_config.json`.
|
|
193
|
+
export:
|
|
194
|
+
Force compilation from a Diffusers checkpoint. When `None`, this is inferred by
|
|
195
|
+
checking whether compiled artifacts exist at `model_id`.
|
|
196
|
+
rbln_config:
|
|
197
|
+
RBLN compilation/runtime configuration. May be provided as a dictionary or as an
|
|
198
|
+
instance of the specific model's config class (e.g., `RBLNFluxPipelineConfig`).
|
|
199
|
+
kwargs: Additional keyword arguments.
|
|
200
|
+
- Arguments prefixed with `rbln_` are forwarded to the RBLN config.
|
|
201
|
+
- Remaining arguments are forwarded to the Diffusers loader.
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
RBLNBaseModel: An instantiated RBLN model wrapping the Diffusers pipeline, ready for
|
|
205
|
+
inference on RBLN NPUs.
|
|
206
|
+
|
|
207
|
+
"""
|
|
208
|
+
rbln_cls = cls.get_rbln_cls(model_id, export=export, **kwargs)
|
|
209
|
+
return rbln_cls.from_pretrained(model_id, export=export, rbln_config=rbln_config, **kwargs)
|
|
179
210
|
|
|
180
211
|
@staticmethod
|
|
181
212
|
def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
|
|
@@ -198,11 +229,15 @@ class RBLNAutoPipelineBase:
|
|
|
198
229
|
|
|
199
230
|
|
|
200
231
|
class RBLNAutoPipelineForText2Image(RBLNAutoPipelineBase, AutoPipelineForText2Image):
|
|
232
|
+
"""Text2Image AutoPipeline for RBLN NPUs."""
|
|
233
|
+
|
|
201
234
|
_model_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING
|
|
202
235
|
_model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_TEXT2IMAGE_PIPELINES_MAPPING.items()}
|
|
203
236
|
|
|
204
237
|
|
|
205
238
|
class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2Image):
|
|
239
|
+
"""Image2Image AutoPipeline for RBLN NPUs."""
|
|
240
|
+
|
|
206
241
|
_model_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING
|
|
207
242
|
_model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.items()}
|
|
208
243
|
|
|
@@ -237,6 +272,8 @@ class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2
|
|
|
237
272
|
|
|
238
273
|
|
|
239
274
|
class RBLNAutoPipelineForInpainting(RBLNAutoPipelineBase, AutoPipelineForInpainting):
|
|
275
|
+
"""Inpainting AutoPipeline for RBLN NPUs."""
|
|
276
|
+
|
|
240
277
|
_model_mapping = AUTO_INPAINT_PIPELINES_MAPPING
|
|
241
278
|
_model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_INPAINT_PIPELINES_MAPPING.items()}
|
|
242
279
|
|
|
@@ -113,7 +113,7 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
|
|
|
113
113
|
Configuration options for RBLN compilation. Can include settings for specific submodules
|
|
114
114
|
such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
|
|
115
115
|
pipeline being compiled.
|
|
116
|
-
|
|
116
|
+
kwargs:
|
|
117
117
|
Additional arguments to pass to the underlying diffusion pipeline constructor or the
|
|
118
118
|
RBLN compilation process. These may include parameters specific to individual submodules
|
|
119
119
|
or the particular diffusion pipeline being used.
|
|
@@ -113,7 +113,7 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
|
|
|
113
113
|
Configuration options for RBLN compilation. Can include settings for specific submodules
|
|
114
114
|
such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
|
|
115
115
|
pipeline being compiled.
|
|
116
|
-
|
|
116
|
+
kwargs:
|
|
117
117
|
Additional arguments to pass to the underlying diffusion pipeline constructor or the
|
|
118
118
|
RBLN compilation process. These may include parameters specific to individual submodules
|
|
119
119
|
or the particular diffusion pipeline being used.
|
optimum/rbln/modeling.py
CHANGED
|
@@ -85,11 +85,13 @@ class RBLNModel(RBLNBaseModel):
|
|
|
85
85
|
This method performs the actual model conversion and compilation process.
|
|
86
86
|
|
|
87
87
|
Args:
|
|
88
|
-
model: The PyTorch model to be compiled.
|
|
89
|
-
|
|
88
|
+
model (PreTrainedModel): The PyTorch model to be compiled.
|
|
89
|
+
The object must be an instance of the HuggingFace transformers PreTrainedModel class.
|
|
90
|
+
config (Optional[PretrainedConfig]): The configuration object associated with the model.
|
|
91
|
+
rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
|
|
92
|
+
This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
|
|
90
93
|
For detailed configuration options, see the specific model's configuration class documentation.
|
|
91
|
-
|
|
92
|
-
kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
94
|
+
kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
93
95
|
|
|
94
96
|
The method performs the following steps:
|
|
95
97
|
|
|
@@ -99,7 +101,7 @@ class RBLNModel(RBLNBaseModel):
|
|
|
99
101
|
4. Saves the compiled model and configurations
|
|
100
102
|
|
|
101
103
|
Returns:
|
|
102
|
-
A RBLN model instance ready for inference on RBLN NPU devices.
|
|
104
|
+
(RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
|
|
103
105
|
"""
|
|
104
106
|
preprocessors = kwargs.pop("preprocessors", [])
|
|
105
107
|
rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
|
|
@@ -241,29 +243,31 @@ class RBLNModel(RBLNBaseModel):
|
|
|
241
243
|
|
|
242
244
|
def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
|
|
243
245
|
"""
|
|
244
|
-
Defines the forward pass of
|
|
246
|
+
Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
|
|
247
|
+
replacement in many cases.
|
|
245
248
|
|
|
246
|
-
This method executes the compiled RBLN model on RBLN NPU devices while
|
|
247
|
-
|
|
248
|
-
|
|
249
|
+
This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
|
|
250
|
+
Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
|
|
251
|
+
`transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` — enabling
|
|
252
|
+
seamless integration into existing workflows.
|
|
249
253
|
|
|
250
254
|
Args:
|
|
251
|
-
|
|
255
|
+
args: Variable length argument list containing model inputs. The format matches the original
|
|
252
256
|
HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
|
|
253
257
|
transformers models, or sample, timestep for diffusers models).
|
|
254
258
|
return_dict:
|
|
255
259
|
Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
|
|
256
260
|
- For transformers models: Uses `self.config.use_return_dict` (typically `True`)
|
|
257
261
|
- For diffusers models: Defaults to `True`
|
|
258
|
-
|
|
262
|
+
kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
|
|
259
263
|
matching the original HuggingFace model's interface.
|
|
260
264
|
|
|
261
265
|
Returns:
|
|
262
266
|
Model outputs in the same format as the original HuggingFace model.
|
|
263
267
|
|
|
264
|
-
|
|
268
|
+
If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
|
|
265
269
|
CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
|
|
266
|
-
|
|
270
|
+
If `return_dict=False`, Returns a tuple containing the raw model outputs.
|
|
267
271
|
|
|
268
272
|
Note:
|
|
269
273
|
- This method maintains the exact same interface as the original HuggingFace model's forward method
|
optimum/rbln/modeling_base.py
CHANGED
|
@@ -373,7 +373,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
373
373
|
def from_pretrained(
|
|
374
374
|
cls: Type["RBLNBaseModel"],
|
|
375
375
|
model_id: Union[str, Path],
|
|
376
|
-
export: bool = None,
|
|
376
|
+
export: Optional[bool] = None,
|
|
377
377
|
rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
|
|
378
378
|
**kwargs: Any,
|
|
379
379
|
) -> "RBLNBaseModel":
|
|
@@ -382,15 +382,17 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
382
382
|
User can use this function to load a pre-trained model from the HuggingFace library and convert it to a RBLN model to be run on RBLN NPUs.
|
|
383
383
|
|
|
384
384
|
Args:
|
|
385
|
-
model_id: The model id of the pre-trained model to be loaded.
|
|
386
|
-
|
|
387
|
-
|
|
385
|
+
model_id (Union[str, Path]): The model id of the pre-trained model to be loaded.
|
|
386
|
+
It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
|
|
387
|
+
export (Optional[bool]): A boolean flag to indicate whether the model should be compiled.
|
|
388
|
+
If None, it will be determined based on the existence of the compiled model files in the model_id.
|
|
389
|
+
rbln_config (Optional[Union[Dict, RBLNModelConfig]]): Configuration for RBLN model compilation and runtime.
|
|
390
|
+
This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
|
|
388
391
|
For detailed configuration options, see the specific model's configuration class documentation.
|
|
389
|
-
|
|
390
|
-
kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
392
|
+
kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
391
393
|
|
|
392
394
|
Returns:
|
|
393
|
-
A RBLN model instance ready for inference on RBLN NPU devices.
|
|
395
|
+
(RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
|
|
394
396
|
"""
|
|
395
397
|
|
|
396
398
|
if isinstance(model_id, Path):
|
|
@@ -536,9 +538,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
|
|
|
536
538
|
[`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
|
|
537
539
|
|
|
538
540
|
Args:
|
|
539
|
-
save_directory (
|
|
541
|
+
save_directory (Union[str, Path]):
|
|
540
542
|
Directory where to save the model file.
|
|
541
|
-
push_to_hub (
|
|
543
|
+
push_to_hub (bool):
|
|
542
544
|
Whether or not to push your model to the HuggingFace model hub after saving it.
|
|
543
545
|
|
|
544
546
|
"""
|
|
@@ -34,7 +34,7 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
|
|
|
34
34
|
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
35
35
|
model_input_names (Optional[List[str]]): Names of the input tensors for the model.
|
|
36
36
|
Defaults to class-specific rbln_model_input_names if not provided.
|
|
37
|
-
|
|
37
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
38
38
|
|
|
39
39
|
Raises:
|
|
40
40
|
ValueError: If batch_size is not a positive integer.
|
|
@@ -61,7 +61,7 @@ class RBLNImageModelConfig(RBLNModelConfig):
|
|
|
61
61
|
image_size (Optional[Union[int, Tuple[int, int]]]): The size of input images.
|
|
62
62
|
Can be an integer for square images or a tuple (height, width).
|
|
63
63
|
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
64
|
-
|
|
64
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
65
65
|
|
|
66
66
|
Raises:
|
|
67
67
|
ValueError: If batch_size is not a positive integer.
|
|
@@ -133,7 +133,7 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
|
|
|
133
133
|
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
134
134
|
max_length (Optional[int]): Maximum length of the audio input in time dimension.
|
|
135
135
|
num_mel_bins (Optional[int]): Number of Mel frequency bins for audio processing.
|
|
136
|
-
|
|
136
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
137
137
|
|
|
138
138
|
Raises:
|
|
139
139
|
ValueError: If batch_size is not a positive integer.
|
|
@@ -253,6 +253,7 @@ class RBLNModelForAudioClassification(RBLNModel):
|
|
|
253
253
|
|
|
254
254
|
A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
|
|
255
255
|
It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
|
|
256
|
+
|
|
256
257
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
|
257
258
|
- compiling the resulting graph using the RBLN compiler.
|
|
258
259
|
|
|
@@ -15,13 +15,13 @@ import importlib
|
|
|
15
15
|
import inspect
|
|
16
16
|
import warnings
|
|
17
17
|
from pathlib import Path
|
|
18
|
-
from typing import Any, Type, Union
|
|
18
|
+
from typing import Any, Dict, Optional, Type, Union
|
|
19
19
|
|
|
20
20
|
from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
|
|
21
21
|
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
|
22
22
|
from transformers.models.auto.auto_factory import _get_model_class
|
|
23
23
|
|
|
24
|
-
from optimum.rbln.configuration_utils import RBLNAutoConfig
|
|
24
|
+
from optimum.rbln.configuration_utils import RBLNAutoConfig, RBLNModelConfig
|
|
25
25
|
from optimum.rbln.modeling_base import RBLNBaseModel
|
|
26
26
|
from optimum.rbln.utils.model_utils import (
|
|
27
27
|
MODEL_MAPPING,
|
|
@@ -178,14 +178,74 @@ class _BaseAutoModelClass:
|
|
|
178
178
|
return rbln_config.rbln_model_cls_name
|
|
179
179
|
|
|
180
180
|
@classmethod
|
|
181
|
-
def from_pretrained(
|
|
182
|
-
|
|
183
|
-
|
|
181
|
+
def from_pretrained(
|
|
182
|
+
cls,
|
|
183
|
+
model_id: Union[str, Path],
|
|
184
|
+
export: bool = None,
|
|
185
|
+
rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
|
|
186
|
+
**kwargs,
|
|
187
|
+
):
|
|
188
|
+
"""
|
|
189
|
+
Load an RBLN-accelerated model from a pretrained checkpoint or a compiled RBLN artifact.
|
|
190
|
+
|
|
191
|
+
This convenience method determines the concrete `RBLN*` model class that matches the
|
|
192
|
+
underlying HuggingFace architecture and dispatches to that class's
|
|
193
|
+
`from_pretrained()` implementation. Depending on whether a compiled RBLN folder is
|
|
194
|
+
detected (or if `export=True` is passed), it will either:
|
|
195
|
+
|
|
196
|
+
- Compile from a HuggingFace checkpoint to an RBLN model
|
|
197
|
+
- Or load an already-compiled RBLN model directory/repository
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
model_id:
|
|
201
|
+
HF repo id or local path. For compiled models, this should point to a directory
|
|
202
|
+
(optionally under `subfolder`) that contains `*.rbln` files and `rbln_config.json`.
|
|
203
|
+
export:
|
|
204
|
+
Force compilation from a HuggingFace checkpoint. When `None`, this is inferred by
|
|
205
|
+
checking whether compiled artifacts exist at `model_id`.
|
|
206
|
+
rbln_config:
|
|
207
|
+
RBLN compilation/runtime configuration. May be provided as a dictionary or as an
|
|
208
|
+
instance of the specific model's config class (e.g., `RBLNLlamaForCausalLMConfig`).
|
|
209
|
+
kwargs: Additional keyword arguments.
|
|
210
|
+
- Arguments prefixed with `rbln_` are forwarded to the RBLN config.
|
|
211
|
+
- Remaining arguments are forwarded to the HuggingFace loader (e.g., `revision`,
|
|
212
|
+
`token`, `trust_remote_code`, `cache_dir`, `subfolder`, `local_files_only`).
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
An instantiated RBLN model ready for inference on RBLN NPUs.
|
|
216
|
+
"""
|
|
217
|
+
rbln_cls = cls.get_rbln_cls(model_id, export=export, **kwargs)
|
|
218
|
+
return rbln_cls.from_pretrained(model_id, export=export, rbln_config=rbln_config, **kwargs)
|
|
184
219
|
|
|
185
220
|
@classmethod
|
|
186
|
-
def from_model(
|
|
221
|
+
def from_model(
|
|
222
|
+
cls,
|
|
223
|
+
model: PreTrainedModel,
|
|
224
|
+
config: Optional[PretrainedConfig] = None,
|
|
225
|
+
rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
|
|
226
|
+
**kwargs: Any,
|
|
227
|
+
) -> RBLNBaseModel:
|
|
228
|
+
"""
|
|
229
|
+
Convert and compile an in-memory HuggingFace model into an RBLN model.
|
|
230
|
+
|
|
231
|
+
This method resolves the appropriate concrete `RBLN*` class from the input model's class
|
|
232
|
+
name (e.g., `LlamaForCausalLM` -> `RBLNLlamaForCausalLM`) and then delegates to that
|
|
233
|
+
class's `from_model()` implementation.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
model: A HuggingFace model instance to convert.
|
|
237
|
+
config: The configuration object associated with the model.
|
|
238
|
+
rbln_config:
|
|
239
|
+
RBLN compilation/runtime configuration. May be provided as a dictionary or as an
|
|
240
|
+
instance of the specific model's config class.
|
|
241
|
+
kwargs: Additional keyword arguments.
|
|
242
|
+
- Arguments prefixed with `rbln_` are forwarded to the RBLN config.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
An instantiated RBLN model ready for inference on RBLN NPUs.
|
|
246
|
+
"""
|
|
187
247
|
rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
|
|
188
|
-
return rbln_cls.from_model(model,
|
|
248
|
+
return rbln_cls.from_model(model, config=config, rbln_config=rbln_config, **kwargs)
|
|
189
249
|
|
|
190
250
|
@staticmethod
|
|
191
251
|
def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
|
|
@@ -57,75 +57,106 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.update(
|
|
|
57
57
|
|
|
58
58
|
|
|
59
59
|
class RBLNAutoModel(_BaseAutoModelClass):
|
|
60
|
+
"""Automatically detect all supported transformers models."""
|
|
61
|
+
|
|
60
62
|
_model_mapping = MODEL_MAPPING
|
|
61
63
|
_model_mapping_names = MODEL_MAPPING_NAMES
|
|
62
64
|
|
|
63
65
|
|
|
64
66
|
class RBLNAutoModelForCTC(_BaseAutoModelClass):
|
|
67
|
+
"""Automatically detect Connectionist Temporal Classification (CTC) head Models."""
|
|
68
|
+
|
|
65
69
|
_model_mapping = MODEL_FOR_CTC_MAPPING
|
|
66
70
|
_model_mapping_names = MODEL_FOR_CTC_MAPPING_NAMES
|
|
67
71
|
|
|
68
72
|
|
|
69
73
|
class RBLNAutoModelForCausalLM(_BaseAutoModelClass):
|
|
74
|
+
"""Automatically detect Casual Language Models."""
|
|
75
|
+
|
|
76
|
+
""""""
|
|
70
77
|
_model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
|
|
71
78
|
_model_mapping_names = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
|
|
72
79
|
|
|
73
80
|
|
|
74
81
|
class RBLNAutoModelForSeq2SeqLM(_BaseAutoModelClass):
|
|
82
|
+
"""Automatically detect Sequence to Sequence Language Models."""
|
|
83
|
+
|
|
75
84
|
_model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
|
|
76
85
|
_model_mapping_names = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
|
|
77
86
|
|
|
78
87
|
|
|
79
88
|
class RBLNAutoModelForSpeechSeq2Seq(_BaseAutoModelClass):
|
|
89
|
+
"""Automatically detect Sequence to Sequence Generation Models."""
|
|
90
|
+
|
|
80
91
|
_model_mapping = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING
|
|
81
92
|
_model_mapping_names = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES
|
|
82
93
|
|
|
83
94
|
|
|
84
95
|
class RBLNAutoModelForDepthEstimation(_BaseAutoModelClass):
|
|
96
|
+
"""Automatically detect Speech Sequence to Sequence Language Models."""
|
|
97
|
+
|
|
85
98
|
_model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
|
|
86
99
|
_model_mapping_names = MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES
|
|
87
100
|
|
|
88
101
|
|
|
89
102
|
class RBLNAutoModelForSequenceClassification(_BaseAutoModelClass):
|
|
103
|
+
"""Automatically detect Sequence Classification Models."""
|
|
104
|
+
|
|
90
105
|
_model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
|
|
91
106
|
_model_mapping_names = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
|
|
92
107
|
|
|
93
108
|
|
|
94
109
|
class RBLNAutoModelForVision2Seq(_BaseAutoModelClass):
|
|
110
|
+
"""Automatically detect Vision to Sequence Generation Models."""
|
|
111
|
+
|
|
95
112
|
_model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
|
|
96
113
|
_model_mapping_names = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES
|
|
97
114
|
|
|
98
115
|
|
|
99
116
|
class RBLNAutoModelForImageTextToText(_BaseAutoModelClass):
|
|
117
|
+
"""Automatically detect Image and Text to Text Generation Models."""
|
|
118
|
+
|
|
100
119
|
_model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
|
|
101
120
|
_model_mapping_names = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
|
|
102
121
|
|
|
103
122
|
|
|
104
123
|
class RBLNAutoModelForMaskedLM(_BaseAutoModelClass):
|
|
124
|
+
"""Automatically detect Masked Lanuage Models."""
|
|
125
|
+
|
|
105
126
|
_model_mapping = MODEL_FOR_MASKED_LM_MAPPING
|
|
106
127
|
_model_mapping_names = MODEL_FOR_MASKED_LM_MAPPING_NAMES
|
|
107
128
|
|
|
108
129
|
|
|
109
130
|
class RBLNAutoModelForAudioClassification(_BaseAutoModelClass):
|
|
131
|
+
"""Automatically detect Audio Classification Models."""
|
|
132
|
+
|
|
110
133
|
_model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
|
|
111
134
|
_model_mapping_names = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES
|
|
112
135
|
|
|
113
136
|
|
|
114
137
|
class RBLNAutoModelForImageClassification(_BaseAutoModelClass):
|
|
138
|
+
"""Automatically detect Image Classification Models."""
|
|
139
|
+
|
|
115
140
|
_model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
|
|
116
141
|
_model_mapping_names = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES
|
|
117
142
|
|
|
118
143
|
|
|
119
144
|
class RBLNAutoModelForQuestionAnswering(_BaseAutoModelClass):
|
|
145
|
+
"""Automatically detect Question Answering Models."""
|
|
146
|
+
|
|
120
147
|
_model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
|
|
121
148
|
_model_mapping_names = MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
|
|
122
149
|
|
|
123
150
|
|
|
124
151
|
class RBLNAutoModelForTextEncoding(_BaseAutoModelClass):
|
|
152
|
+
"""Automatically detect Text Encoding Models."""
|
|
153
|
+
|
|
125
154
|
_model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING
|
|
126
155
|
_model_mapping_names = MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES
|
|
127
156
|
|
|
128
157
|
|
|
129
158
|
class RBLNAutoModelForZeroShotObjectDetection(_BaseAutoModelClass):
|
|
159
|
+
"""Automatically detect Zero Shot Object Detection Models."""
|
|
160
|
+
|
|
130
161
|
_model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
|
|
131
162
|
_model_mapping_names = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES
|
|
@@ -42,11 +42,9 @@ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
|
|
|
42
42
|
):
|
|
43
43
|
"""
|
|
44
44
|
Args:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
Raises:
|
|
49
|
-
ValueError: If batch_size is not a positive integer.
|
|
45
|
+
num_query_tokens (Optional[int]): The number of query tokens passed through the Transformer.
|
|
46
|
+
image_text_hidden_size (Optional[int]): Dimensionality of the hidden state of the image-text fusion layer.
|
|
47
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
50
48
|
"""
|
|
51
49
|
super().__init__(**kwargs)
|
|
52
50
|
self.num_query_tokens = num_query_tokens
|
|
@@ -68,8 +66,9 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
|
|
|
68
66
|
Args:
|
|
69
67
|
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
70
68
|
vision_model (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
|
69
|
+
qformer (Optional[RBLNModelConfig]): Configuration for the RBLN-optimized BLIP-2 Q-Former model.
|
|
71
70
|
language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
|
|
72
|
-
|
|
71
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
73
72
|
|
|
74
73
|
Raises:
|
|
75
74
|
ValueError: If batch_size is not a positive integer.
|
|
@@ -112,7 +112,7 @@ class RBLNBlip2VisionModel(RBLNModel):
|
|
|
112
112
|
|
|
113
113
|
def forward(
|
|
114
114
|
self,
|
|
115
|
-
pixel_values,
|
|
115
|
+
pixel_values: torch.FloatTensor,
|
|
116
116
|
output_attentions: Optional[bool] = None,
|
|
117
117
|
output_hidden_states: Optional[bool] = None,
|
|
118
118
|
return_dict: Optional[bool] = None,
|
|
@@ -22,10 +22,10 @@ class RBLNCLIPTextModelConfig(RBLNModelConfig):
|
|
|
22
22
|
"""
|
|
23
23
|
Args:
|
|
24
24
|
batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
|
|
25
|
-
|
|
25
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
26
26
|
|
|
27
27
|
Raises:
|
|
28
|
-
ValueError: If batch_size is not a positive integer.
|
|
28
|
+
ValueError: If `batch_size` is not a positive integer.
|
|
29
29
|
"""
|
|
30
30
|
super().__init__(**kwargs)
|
|
31
31
|
self.batch_size = batch_size or 1
|
|
@@ -57,10 +57,13 @@ class RBLNCLIPVisionModelConfig(RBLNModelConfig):
|
|
|
57
57
|
batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
|
|
58
58
|
image_size (Optional[int]): The size of input images. Can be an integer for square images,
|
|
59
59
|
a tuple/list (height, width), or a dictionary with 'height' and 'width' keys.
|
|
60
|
-
|
|
60
|
+
interpolate_pos_encoding (Optional[bool]): Whether or not to interpolate pre-trained position encodings. Defaults to `False`.
|
|
61
|
+
output_hidden_states (Optional[bool]): Whether or not to return the hidden states of all layers.
|
|
62
|
+
output_attentions (Optional[bool]): Whether or not to return the attentions tensors of all attention layers
|
|
63
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
61
64
|
|
|
62
65
|
Raises:
|
|
63
|
-
ValueError: If batch_size is not a positive integer.
|
|
66
|
+
ValueError: If `batch_size` is not a positive integer.
|
|
64
67
|
"""
|
|
65
68
|
super().__init__(**kwargs)
|
|
66
69
|
self.batch_size = batch_size or 1
|
|
@@ -83,7 +83,15 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
|
83
83
|
rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
|
|
84
84
|
return rbln_config
|
|
85
85
|
|
|
86
|
-
def forward(self, input_ids: torch.LongTensor, return_dict: bool = None, **kwargs) -> torch.FloatTensor:
|
|
86
|
+
def forward(self, input_ids: torch.LongTensor, return_dict: Optional[bool] = None, **kwargs) -> torch.FloatTensor:
|
|
87
|
+
"""
|
|
88
|
+
Forward pass for the RBLN-optimized CLIP text encoder model.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
input_ids (torch.LongTensor): The input ids to the model.
|
|
92
|
+
return_dict (Optional[bool]): Whether to return a dictionary of outputs.
|
|
93
|
+
"""
|
|
94
|
+
|
|
87
95
|
# To ignore using attention_mask, we override forward method.
|
|
88
96
|
output = super().forward(input_ids, return_dict=return_dict)
|
|
89
97
|
return output
|
|
@@ -202,13 +210,24 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
202
210
|
|
|
203
211
|
def forward(
|
|
204
212
|
self,
|
|
205
|
-
pixel_values:
|
|
213
|
+
pixel_values: torch.FloatTensor,
|
|
206
214
|
return_dict: bool = True,
|
|
207
|
-
output_attentions: bool = None,
|
|
208
|
-
output_hidden_states: bool = None,
|
|
215
|
+
output_attentions: Optional[bool] = None,
|
|
216
|
+
output_hidden_states: Optional[bool] = None,
|
|
209
217
|
interpolate_pos_encoding: bool = False,
|
|
210
218
|
**kwargs,
|
|
211
219
|
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
|
220
|
+
"""
|
|
221
|
+
Forward pass for the RBLN-optimized CLIP vision encoder model.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
pixel_values (torch.Tensor): The pixel values to the model.
|
|
225
|
+
return_dict (bool): Whether to return a dictionary of outputs.
|
|
226
|
+
output_attentions (Optional[bool]): Whether to return attentions.
|
|
227
|
+
output_hidden_states (Optional[bool]): Whether to return hidden states.
|
|
228
|
+
interpolate_pos_encoding (bool): Whether to interpolate position encoding.
|
|
229
|
+
"""
|
|
230
|
+
|
|
212
231
|
if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
|
|
213
232
|
logger.warning(
|
|
214
233
|
f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
|
|
@@ -54,11 +54,11 @@ class RBLNColPaliForRetrievalConfig(RBLNModelConfig):
|
|
|
54
54
|
):
|
|
55
55
|
"""
|
|
56
56
|
Args:
|
|
57
|
-
vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
|
58
57
|
max_seq_lens (Union[int, List[int]]): The maximum sequence lengths for the language model.
|
|
59
58
|
This can be multiple values, and the model will be compiled for each max_seq_len, allowing selection of the most appropriate max_seq_len at inference time.
|
|
60
59
|
output_hidden_states (Optional[bool]): Whether to output the hidden states of the language model.
|
|
61
|
-
|
|
60
|
+
vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
|
61
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
62
62
|
Raises:
|
|
63
63
|
ValueError: If batch_size is not a positive integer.
|
|
64
64
|
"""
|