optimum-rbln 0.8.4a6__py3-none-any.whl → 0.8.4a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

Files changed (66) hide show
  1. optimum/rbln/__version__.py +2 -2
  2. optimum/rbln/configuration_utils.py +41 -3
  3. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +1 -1
  4. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +3 -3
  5. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +1 -1
  6. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +2 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +7 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +7 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +1 -1
  10. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +1 -1
  11. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +2 -2
  12. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +1 -1
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +3 -3
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +1 -1
  15. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +1 -1
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +1 -1
  17. optimum/rbln/diffusers/modeling_diffusers.py +7 -3
  18. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
  19. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +28 -3
  20. optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
  21. optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -1
  22. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +9 -1
  23. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +9 -1
  24. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +6 -3
  25. optimum/rbln/diffusers/pipelines/auto_pipeline.py +45 -8
  26. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
  27. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -1
  28. optimum/rbln/modeling.py +17 -13
  29. optimum/rbln/modeling_base.py +11 -9
  30. optimum/rbln/transformers/configuration_generic.py +3 -3
  31. optimum/rbln/transformers/modeling_generic.py +1 -0
  32. optimum/rbln/transformers/models/auto/auto_factory.py +67 -7
  33. optimum/rbln/transformers/models/auto/modeling_auto.py +31 -0
  34. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +5 -6
  35. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
  36. optimum/rbln/transformers/models/clip/configuration_clip.py +7 -4
  37. optimum/rbln/transformers/models/clip/modeling_clip.py +23 -4
  38. optimum/rbln/transformers/models/colpali/configuration_colpali.py +2 -2
  39. optimum/rbln/transformers/models/colpali/modeling_colpali.py +38 -6
  40. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +1 -1
  41. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +23 -0
  42. optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
  43. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +17 -2
  44. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +7 -8
  45. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +12 -6
  46. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +6 -2
  47. optimum/rbln/transformers/models/llava/configuration_llava.py +6 -2
  48. optimum/rbln/transformers/models/llava/modeling_llava.py +1 -0
  49. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  50. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +57 -78
  51. optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
  52. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +1 -1
  53. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +18 -3
  54. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +2 -2
  55. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +1 -1
  56. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
  57. optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
  58. optimum/rbln/transformers/models/swin/configuration_swin.py +1 -1
  59. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +1 -1
  60. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -0
  61. optimum/rbln/transformers/models/whisper/configuration_whisper.py +1 -1
  62. optimum/rbln/transformers/models/whisper/modeling_whisper.py +1 -0
  63. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/METADATA +1 -1
  64. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/RECORD +66 -66
  65. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/WHEEL +0 -0
  66. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/licenses/LICENSE +0 -0
@@ -15,7 +15,7 @@
15
15
 
16
16
  import importlib
17
17
  from pathlib import Path
18
- from typing import Type, Union
18
+ from typing import Any, Dict, Type, Union
19
19
 
20
20
  from diffusers.models.controlnets import ControlNetUnionModel
21
21
  from diffusers.pipelines.auto_pipeline import (
@@ -29,6 +29,7 @@ from diffusers.pipelines.auto_pipeline import (
29
29
  )
30
30
  from huggingface_hub.utils import validate_hf_hub_args
31
31
 
32
+ from optimum.rbln.configuration_utils import RBLNModelConfig
32
33
  from optimum.rbln.modeling_base import RBLNBaseModel
33
34
  from optimum.rbln.utils.model_utils import (
34
35
  MODEL_MAPPING,
@@ -168,14 +169,44 @@ class RBLNAutoPipelineBase:
168
169
 
169
170
  @classmethod
170
171
  @validate_hf_hub_args
171
- def from_pretrained(cls, model_id, **kwargs):
172
- rbln_cls = cls.get_rbln_cls(model_id, **kwargs)
173
- return rbln_cls.from_pretrained(model_id, **kwargs)
172
+ def from_pretrained(
173
+ cls,
174
+ model_id: Union[str, Path],
175
+ *,
176
+ export: bool = None,
177
+ rbln_config: Union[Dict[str, Any], RBLNModelConfig] = {},
178
+ **kwargs: Any,
179
+ ):
180
+ """
181
+ Load an RBLN-accelerated Diffusers pipeline from a pretrained checkpoint or a compiled RBLN artifact.
174
182
 
175
- @classmethod
176
- def from_model(cls, model, **kwargs):
177
- rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
178
- return rbln_cls.from_model(model, **kwargs)
183
+ This method determines the concrete `RBLN*` model class that corresponds to the
184
+ underlying Diffusers pipeline architecture and dispatches to that class's
185
+ `from_pretrained()` implementation. If a compiled RBLN folder is detected at `model_id`
186
+ (or `export=False` is explicitly passed), it loads the compiled artifacts; otherwise it
187
+ compiles from the original Diffusers checkpoint.
188
+
189
+ Args:
190
+ model_id:
191
+ HF repo id or local path. For compiled models, this should point to a directory
192
+ (optionally under `subfolder`) that contains `*.rbln` files and `rbln_config.json`.
193
+ export:
194
+ Force compilation from a Diffusers checkpoint. When `None`, this is inferred by
195
+ checking whether compiled artifacts exist at `model_id`.
196
+ rbln_config:
197
+ RBLN compilation/runtime configuration. May be provided as a dictionary or as an
198
+ instance of the specific model's config class (e.g., `RBLNFluxPipelineConfig`).
199
+ kwargs: Additional keyword arguments.
200
+ - Arguments prefixed with `rbln_` are forwarded to the RBLN config.
201
+ - Remaining arguments are forwarded to the Diffusers loader.
202
+
203
+ Returns:
204
+ RBLNBaseModel: An instantiated RBLN model wrapping the Diffusers pipeline, ready for
205
+ inference on RBLN NPUs.
206
+
207
+ """
208
+ rbln_cls = cls.get_rbln_cls(model_id, export=export, **kwargs)
209
+ return rbln_cls.from_pretrained(model_id, export=export, rbln_config=rbln_config, **kwargs)
179
210
 
180
211
  @staticmethod
181
212
  def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
@@ -198,11 +229,15 @@ class RBLNAutoPipelineBase:
198
229
 
199
230
 
200
231
  class RBLNAutoPipelineForText2Image(RBLNAutoPipelineBase, AutoPipelineForText2Image):
232
+ """Text2Image AutoPipeline for RBLN NPUs."""
233
+
201
234
  _model_mapping = AUTO_TEXT2IMAGE_PIPELINES_MAPPING
202
235
  _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_TEXT2IMAGE_PIPELINES_MAPPING.items()}
203
236
 
204
237
 
205
238
  class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2Image):
239
+ """Image2Image AutoPipeline for RBLN NPUs."""
240
+
206
241
  _model_mapping = AUTO_IMAGE2IMAGE_PIPELINES_MAPPING
207
242
  _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_IMAGE2IMAGE_PIPELINES_MAPPING.items()}
208
243
 
@@ -237,6 +272,8 @@ class RBLNAutoPipelineForImage2Image(RBLNAutoPipelineBase, AutoPipelineForImage2
237
272
 
238
273
 
239
274
  class RBLNAutoPipelineForInpainting(RBLNAutoPipelineBase, AutoPipelineForInpainting):
275
+ """Inpainting AutoPipeline for RBLN NPUs."""
276
+
240
277
  _model_mapping = AUTO_INPAINT_PIPELINES_MAPPING
241
278
  _model_mapping_names = {x[0]: x[1].__name__ for x in AUTO_INPAINT_PIPELINES_MAPPING.items()}
242
279
 
@@ -113,7 +113,7 @@ class RBLNCosmosTextToWorldPipeline(RBLNDiffusionMixin, CosmosTextToWorldPipelin
113
113
  Configuration options for RBLN compilation. Can include settings for specific submodules
114
114
  such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
115
115
  pipeline being compiled.
116
- **kwargs:
116
+ kwargs:
117
117
  Additional arguments to pass to the underlying diffusion pipeline constructor or the
118
118
  RBLN compilation process. These may include parameters specific to individual submodules
119
119
  or the particular diffusion pipeline being used.
@@ -113,7 +113,7 @@ class RBLNCosmosVideoToWorldPipeline(RBLNDiffusionMixin, CosmosVideoToWorldPipel
113
113
  Configuration options for RBLN compilation. Can include settings for specific submodules
114
114
  such as `text_encoder`, `unet`, and `vae`. Configuration can be tailored to the specific
115
115
  pipeline being compiled.
116
- **kwargs:
116
+ kwargs:
117
117
  Additional arguments to pass to the underlying diffusion pipeline constructor or the
118
118
  RBLN compilation process. These may include parameters specific to individual submodules
119
119
  or the particular diffusion pipeline being used.
optimum/rbln/modeling.py CHANGED
@@ -85,11 +85,13 @@ class RBLNModel(RBLNBaseModel):
85
85
  This method performs the actual model conversion and compilation process.
86
86
 
87
87
  Args:
88
- model: The PyTorch model to be compiled. The object must be an instance of the HuggingFace transformers PreTrainedModel class.
89
- rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
88
+ model (PreTrainedModel): The PyTorch model to be compiled.
89
+ The object must be an instance of the HuggingFace transformers PreTrainedModel class.
90
+ config (Optional[PretrainedConfig]): The configuration object associated with the model.
91
+ rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
92
+ This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
90
93
  For detailed configuration options, see the specific model's configuration class documentation.
91
-
92
- kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
94
+ kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
93
95
 
94
96
  The method performs the following steps:
95
97
 
@@ -99,7 +101,7 @@ class RBLNModel(RBLNBaseModel):
99
101
  4. Saves the compiled model and configurations
100
102
 
101
103
  Returns:
102
- A RBLN model instance ready for inference on RBLN NPU devices.
104
+ (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
103
105
  """
104
106
  preprocessors = kwargs.pop("preprocessors", [])
105
107
  rbln_config, kwargs = cls.prepare_rbln_config(rbln_config=rbln_config, **kwargs)
@@ -241,29 +243,31 @@ class RBLNModel(RBLNBaseModel):
241
243
 
242
244
  def forward(self, *args: Any, return_dict: Optional[bool] = None, **kwargs: Any) -> Any:
243
245
  """
244
- Defines the forward pass of the RBLN model, providing a drop-in replacement for HuggingFace PreTrainedModel.
246
+ Defines the forward pass of `RBLNModel`. The interface mirrors HuggingFace conventions so it can act as a drop-in
247
+ replacement in many cases.
245
248
 
246
- This method executes the compiled RBLN model on RBLN NPU devices while maintaining full compatibility
247
- with HuggingFace transformers and diffusers APIs. The RBLNModel can be used as a direct substitute
248
- for any HuggingFace nn.Module/PreTrainedModel, enabling seamless integration into existing workflows.
249
+ This method executes the compiled RBLN model on RBLN NPU devices while remaining fully compatible with Hugging Face
250
+ Transformers and Diffusers APIs. In practice, `RBLNModel` can replace models built on `torch.nn.Module` — including
251
+ `transformers.PreTrainedModel` implementations and Diffusers components based on `diffusers.ModelMixin` enabling
252
+ seamless integration into existing workflows.
249
253
 
250
254
  Args:
251
- *args: Variable length argument list containing model inputs. The format matches the original
255
+ args: Variable length argument list containing model inputs. The format matches the original
252
256
  HuggingFace model's forward method signature (e.g., input_ids, attention_mask for
253
257
  transformers models, or sample, timestep for diffusers models).
254
258
  return_dict:
255
259
  Whether to return outputs as a dictionary-like object or as a tuple. When `None`:
256
260
  - For transformers models: Uses `self.config.use_return_dict` (typically `True`)
257
261
  - For diffusers models: Defaults to `True`
258
- **kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
262
+ kwargs: Arbitrary keyword arguments containing additional model inputs and parameters,
259
263
  matching the original HuggingFace model's interface.
260
264
 
261
265
  Returns:
262
266
  Model outputs in the same format as the original HuggingFace model.
263
267
 
264
- - If `return_dict=True`: Returns a dictionary-like object (e.g., BaseModelOutput,
268
+ If `return_dict=True`, Returns a dictionary-like object (e.g., BaseModelOutput,
265
269
  CausalLMOutput) with named fields such as `logits`, `hidden_states`, etc.
266
- - If `return_dict=False`: Returns a tuple containing the raw model outputs.
270
+ If `return_dict=False`, Returns a tuple containing the raw model outputs.
267
271
 
268
272
  Note:
269
273
  - This method maintains the exact same interface as the original HuggingFace model's forward method
@@ -373,7 +373,7 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
373
373
  def from_pretrained(
374
374
  cls: Type["RBLNBaseModel"],
375
375
  model_id: Union[str, Path],
376
- export: bool = None,
376
+ export: Optional[bool] = None,
377
377
  rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
378
378
  **kwargs: Any,
379
379
  ) -> "RBLNBaseModel":
@@ -382,15 +382,17 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
382
382
  User can use this function to load a pre-trained model from the HuggingFace library and convert it to a RBLN model to be run on RBLN NPUs.
383
383
 
384
384
  Args:
385
- model_id: The model id of the pre-trained model to be loaded. It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
386
- export: A boolean flag to indicate whether the model should be compiled. If None, it will be determined based on the existence of the compiled model files in the model_id.
387
- rbln_config: Configuration for RBLN model compilation and runtime. This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
385
+ model_id (Union[str, Path]): The model id of the pre-trained model to be loaded.
386
+ It can be downloaded from the HuggingFace model hub or a local path, or a model id of a compiled model using the RBLN Compiler.
387
+ export (Optional[bool]): A boolean flag to indicate whether the model should be compiled.
388
+ If None, it will be determined based on the existence of the compiled model files in the model_id.
389
+ rbln_config (Optional[Union[Dict, RBLNModelConfig]]): Configuration for RBLN model compilation and runtime.
390
+ This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
388
391
  For detailed configuration options, see the specific model's configuration class documentation.
389
-
390
- kwargs: Additional keyword arguments. Arguments with the prefix 'rbln_' are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
392
+ kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
391
393
 
392
394
  Returns:
393
- A RBLN model instance ready for inference on RBLN NPU devices.
395
+ (RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
394
396
  """
395
397
 
396
398
  if isinstance(model_id, Path):
@@ -536,9 +538,9 @@ class RBLNBaseModel(SubModulesMixin, PushToHubMixin, PreTrainedModel):
536
538
  [`~optimum.rbln.modeling_base.RBLNBaseModel.from_pretrained`] class method.
537
539
 
538
540
  Args:
539
- save_directory (`Union[str, Path]`):
541
+ save_directory (Union[str, Path]):
540
542
  Directory where to save the model file.
541
- push_to_hub (`bool`, *optional*, defaults to `False`):
543
+ push_to_hub (bool):
542
544
  Whether or not to push your model to the HuggingFace model hub after saving it.
543
545
 
544
546
  """
@@ -34,7 +34,7 @@ class RBLNTransformerEncoderConfig(RBLNModelConfig):
34
34
  batch_size (Optional[int]): The batch size for inference. Defaults to 1.
35
35
  model_input_names (Optional[List[str]]): Names of the input tensors for the model.
36
36
  Defaults to class-specific rbln_model_input_names if not provided.
37
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
37
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
38
38
 
39
39
  Raises:
40
40
  ValueError: If batch_size is not a positive integer.
@@ -61,7 +61,7 @@ class RBLNImageModelConfig(RBLNModelConfig):
61
61
  image_size (Optional[Union[int, Tuple[int, int]]]): The size of input images.
62
62
  Can be an integer for square images or a tuple (height, width).
63
63
  batch_size (Optional[int]): The batch size for inference. Defaults to 1.
64
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
64
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
65
65
 
66
66
  Raises:
67
67
  ValueError: If batch_size is not a positive integer.
@@ -133,7 +133,7 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
133
133
  batch_size (Optional[int]): The batch size for inference. Defaults to 1.
134
134
  max_length (Optional[int]): Maximum length of the audio input in time dimension.
135
135
  num_mel_bins (Optional[int]): Number of Mel frequency bins for audio processing.
136
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
136
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
137
137
 
138
138
  Raises:
139
139
  ValueError: If batch_size is not a positive integer.
@@ -253,6 +253,7 @@ class RBLNModelForAudioClassification(RBLNModel):
253
253
 
254
254
  A class to convert and run pre-trained transformers based AudioClassification models on RBLN devices.
255
255
  It implements the methods to convert a pre-trained transformers AudioClassification model into a RBLN transformer model by:
256
+
256
257
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
257
258
  - compiling the resulting graph using the RBLN compiler.
258
259
 
@@ -15,13 +15,13 @@ import importlib
15
15
  import inspect
16
16
  import warnings
17
17
  from pathlib import Path
18
- from typing import Any, Type, Union
18
+ from typing import Any, Dict, Optional, Type, Union
19
19
 
20
20
  from transformers import AutoConfig, PretrainedConfig, PreTrainedModel
21
21
  from transformers.dynamic_module_utils import get_class_from_dynamic_module
22
22
  from transformers.models.auto.auto_factory import _get_model_class
23
23
 
24
- from optimum.rbln.configuration_utils import RBLNAutoConfig
24
+ from optimum.rbln.configuration_utils import RBLNAutoConfig, RBLNModelConfig
25
25
  from optimum.rbln.modeling_base import RBLNBaseModel
26
26
  from optimum.rbln.utils.model_utils import (
27
27
  MODEL_MAPPING,
@@ -178,14 +178,74 @@ class _BaseAutoModelClass:
178
178
  return rbln_config.rbln_model_cls_name
179
179
 
180
180
  @classmethod
181
- def from_pretrained(cls, model_id: Union[str, Path], *args, **kwargs):
182
- rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
183
- return rbln_cls.from_pretrained(model_id, *args, **kwargs)
181
+ def from_pretrained(
182
+ cls,
183
+ model_id: Union[str, Path],
184
+ export: bool = None,
185
+ rbln_config: Optional[Union[Dict, RBLNModelConfig]] = None,
186
+ **kwargs,
187
+ ):
188
+ """
189
+ Load an RBLN-accelerated model from a pretrained checkpoint or a compiled RBLN artifact.
190
+
191
+ This convenience method determines the concrete `RBLN*` model class that matches the
192
+ underlying HuggingFace architecture and dispatches to that class's
193
+ `from_pretrained()` implementation. Depending on whether a compiled RBLN folder is
194
+ detected (or if `export=True` is passed), it will either:
195
+
196
+ - Compile from a HuggingFace checkpoint to an RBLN model
197
+ - Or load an already-compiled RBLN model directory/repository
198
+
199
+ Args:
200
+ model_id:
201
+ HF repo id or local path. For compiled models, this should point to a directory
202
+ (optionally under `subfolder`) that contains `*.rbln` files and `rbln_config.json`.
203
+ export:
204
+ Force compilation from a HuggingFace checkpoint. When `None`, this is inferred by
205
+ checking whether compiled artifacts exist at `model_id`.
206
+ rbln_config:
207
+ RBLN compilation/runtime configuration. May be provided as a dictionary or as an
208
+ instance of the specific model's config class (e.g., `RBLNLlamaForCausalLMConfig`).
209
+ kwargs: Additional keyword arguments.
210
+ - Arguments prefixed with `rbln_` are forwarded to the RBLN config.
211
+ - Remaining arguments are forwarded to the HuggingFace loader (e.g., `revision`,
212
+ `token`, `trust_remote_code`, `cache_dir`, `subfolder`, `local_files_only`).
213
+
214
+ Returns:
215
+ An instantiated RBLN model ready for inference on RBLN NPUs.
216
+ """
217
+ rbln_cls = cls.get_rbln_cls(model_id, export=export, **kwargs)
218
+ return rbln_cls.from_pretrained(model_id, export=export, rbln_config=rbln_config, **kwargs)
184
219
 
185
220
  @classmethod
186
- def from_model(cls, model: PreTrainedModel, *args, **kwargs):
221
+ def from_model(
222
+ cls,
223
+ model: PreTrainedModel,
224
+ config: Optional[PretrainedConfig] = None,
225
+ rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
226
+ **kwargs: Any,
227
+ ) -> RBLNBaseModel:
228
+ """
229
+ Convert and compile an in-memory HuggingFace model into an RBLN model.
230
+
231
+ This method resolves the appropriate concrete `RBLN*` class from the input model's class
232
+ name (e.g., `LlamaForCausalLM` -> `RBLNLlamaForCausalLM`) and then delegates to that
233
+ class's `from_model()` implementation.
234
+
235
+ Args:
236
+ model: A HuggingFace model instance to convert.
237
+ config: The configuration object associated with the model.
238
+ rbln_config:
239
+ RBLN compilation/runtime configuration. May be provided as a dictionary or as an
240
+ instance of the specific model's config class.
241
+ kwargs: Additional keyword arguments.
242
+ - Arguments prefixed with `rbln_` are forwarded to the RBLN config.
243
+
244
+ Returns:
245
+ An instantiated RBLN model ready for inference on RBLN NPUs.
246
+ """
187
247
  rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
188
- return rbln_cls.from_model(model, *args, **kwargs)
248
+ return rbln_cls.from_model(model, config=config, rbln_config=rbln_config, **kwargs)
189
249
 
190
250
  @staticmethod
191
251
  def register(rbln_cls: Type[RBLNBaseModel], exist_ok: bool = False):
@@ -57,75 +57,106 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES.update(
57
57
 
58
58
 
59
59
  class RBLNAutoModel(_BaseAutoModelClass):
60
+ """Automatically detect all supported transformers models."""
61
+
60
62
  _model_mapping = MODEL_MAPPING
61
63
  _model_mapping_names = MODEL_MAPPING_NAMES
62
64
 
63
65
 
64
66
  class RBLNAutoModelForCTC(_BaseAutoModelClass):
67
+ """Automatically detect Connectionist Temporal Classification (CTC) head Models."""
68
+
65
69
  _model_mapping = MODEL_FOR_CTC_MAPPING
66
70
  _model_mapping_names = MODEL_FOR_CTC_MAPPING_NAMES
67
71
 
68
72
 
69
73
  class RBLNAutoModelForCausalLM(_BaseAutoModelClass):
74
+ """Automatically detect Casual Language Models."""
75
+
76
+ """"""
70
77
  _model_mapping = MODEL_FOR_CAUSAL_LM_MAPPING
71
78
  _model_mapping_names = MODEL_FOR_CAUSAL_LM_MAPPING_NAMES
72
79
 
73
80
 
74
81
  class RBLNAutoModelForSeq2SeqLM(_BaseAutoModelClass):
82
+ """Automatically detect Sequence to Sequence Language Models."""
83
+
75
84
  _model_mapping = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING
76
85
  _model_mapping_names = MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES
77
86
 
78
87
 
79
88
  class RBLNAutoModelForSpeechSeq2Seq(_BaseAutoModelClass):
89
+ """Automatically detect Sequence to Sequence Generation Models."""
90
+
80
91
  _model_mapping = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING
81
92
  _model_mapping_names = MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES
82
93
 
83
94
 
84
95
  class RBLNAutoModelForDepthEstimation(_BaseAutoModelClass):
96
+ """Automatically detect Speech Sequence to Sequence Language Models."""
97
+
85
98
  _model_mapping = MODEL_FOR_DEPTH_ESTIMATION_MAPPING
86
99
  _model_mapping_names = MODEL_FOR_DEPTH_ESTIMATION_MAPPING_NAMES
87
100
 
88
101
 
89
102
  class RBLNAutoModelForSequenceClassification(_BaseAutoModelClass):
103
+ """Automatically detect Sequence Classification Models."""
104
+
90
105
  _model_mapping = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
91
106
  _model_mapping_names = MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
92
107
 
93
108
 
94
109
  class RBLNAutoModelForVision2Seq(_BaseAutoModelClass):
110
+ """Automatically detect Vision to Sequence Generation Models."""
111
+
95
112
  _model_mapping = MODEL_FOR_VISION_2_SEQ_MAPPING
96
113
  _model_mapping_names = MODEL_FOR_VISION_2_SEQ_MAPPING_NAMES
97
114
 
98
115
 
99
116
  class RBLNAutoModelForImageTextToText(_BaseAutoModelClass):
117
+ """Automatically detect Image and Text to Text Generation Models."""
118
+
100
119
  _model_mapping = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING
101
120
  _model_mapping_names = MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES
102
121
 
103
122
 
104
123
  class RBLNAutoModelForMaskedLM(_BaseAutoModelClass):
124
+ """Automatically detect Masked Lanuage Models."""
125
+
105
126
  _model_mapping = MODEL_FOR_MASKED_LM_MAPPING
106
127
  _model_mapping_names = MODEL_FOR_MASKED_LM_MAPPING_NAMES
107
128
 
108
129
 
109
130
  class RBLNAutoModelForAudioClassification(_BaseAutoModelClass):
131
+ """Automatically detect Audio Classification Models."""
132
+
110
133
  _model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
111
134
  _model_mapping_names = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING_NAMES
112
135
 
113
136
 
114
137
  class RBLNAutoModelForImageClassification(_BaseAutoModelClass):
138
+ """Automatically detect Image Classification Models."""
139
+
115
140
  _model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
116
141
  _model_mapping_names = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES
117
142
 
118
143
 
119
144
  class RBLNAutoModelForQuestionAnswering(_BaseAutoModelClass):
145
+ """Automatically detect Question Answering Models."""
146
+
120
147
  _model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
121
148
  _model_mapping_names = MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
122
149
 
123
150
 
124
151
  class RBLNAutoModelForTextEncoding(_BaseAutoModelClass):
152
+ """Automatically detect Text Encoding Models."""
153
+
125
154
  _model_mapping = MODEL_FOR_TEXT_ENCODING_MAPPING
126
155
  _model_mapping_names = MODEL_FOR_TEXT_ENCODING_MAPPING_NAMES
127
156
 
128
157
 
129
158
  class RBLNAutoModelForZeroShotObjectDetection(_BaseAutoModelClass):
159
+ """Automatically detect Zero Shot Object Detection Models."""
160
+
130
161
  _model_mapping = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING
131
162
  _model_mapping_names = MODEL_FOR_ZERO_SHOT_OBJECT_DETECTION_MAPPING_NAMES
@@ -42,11 +42,9 @@ class RBLNBlip2QFormerModelConfig(RBLNModelConfig):
42
42
  ):
43
43
  """
44
44
  Args:
45
- batch_size (Optional[int]): The batch size for inference. Defaults to 1.
46
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
47
-
48
- Raises:
49
- ValueError: If batch_size is not a positive integer.
45
+ num_query_tokens (Optional[int]): The number of query tokens passed through the Transformer.
46
+ image_text_hidden_size (Optional[int]): Dimensionality of the hidden state of the image-text fusion layer.
47
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
50
48
  """
51
49
  super().__init__(**kwargs)
52
50
  self.num_query_tokens = num_query_tokens
@@ -68,8 +66,9 @@ class RBLNBlip2ForConditionalGenerationConfig(RBLNModelConfig):
68
66
  Args:
69
67
  batch_size (Optional[int]): The batch size for inference. Defaults to 1.
70
68
  vision_model (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
69
+ qformer (Optional[RBLNModelConfig]): Configuration for the RBLN-optimized BLIP-2 Q-Former model.
71
70
  language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
72
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
71
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
73
72
 
74
73
  Raises:
75
74
  ValueError: If batch_size is not a positive integer.
@@ -112,7 +112,7 @@ class RBLNBlip2VisionModel(RBLNModel):
112
112
 
113
113
  def forward(
114
114
  self,
115
- pixel_values,
115
+ pixel_values: torch.FloatTensor,
116
116
  output_attentions: Optional[bool] = None,
117
117
  output_hidden_states: Optional[bool] = None,
118
118
  return_dict: Optional[bool] = None,
@@ -22,10 +22,10 @@ class RBLNCLIPTextModelConfig(RBLNModelConfig):
22
22
  """
23
23
  Args:
24
24
  batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
25
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
25
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
26
26
 
27
27
  Raises:
28
- ValueError: If batch_size is not a positive integer.
28
+ ValueError: If `batch_size` is not a positive integer.
29
29
  """
30
30
  super().__init__(**kwargs)
31
31
  self.batch_size = batch_size or 1
@@ -57,10 +57,13 @@ class RBLNCLIPVisionModelConfig(RBLNModelConfig):
57
57
  batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
58
58
  image_size (Optional[int]): The size of input images. Can be an integer for square images,
59
59
  a tuple/list (height, width), or a dictionary with 'height' and 'width' keys.
60
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
60
+ interpolate_pos_encoding (Optional[bool]): Whether or not to interpolate pre-trained position encodings. Defaults to `False`.
61
+ output_hidden_states (Optional[bool]): Whether or not to return the hidden states of all layers.
62
+ output_attentions (Optional[bool]): Whether or not to return the attentions tensors of all attention layers
63
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
61
64
 
62
65
  Raises:
63
- ValueError: If batch_size is not a positive integer.
66
+ ValueError: If `batch_size` is not a positive integer.
64
67
  """
65
68
  super().__init__(**kwargs)
66
69
  self.batch_size = batch_size or 1
@@ -83,7 +83,15 @@ class RBLNCLIPTextModel(RBLNModel):
83
83
  rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
84
84
  return rbln_config
85
85
 
86
- def forward(self, input_ids: torch.LongTensor, return_dict: bool = None, **kwargs) -> torch.FloatTensor:
86
+ def forward(self, input_ids: torch.LongTensor, return_dict: Optional[bool] = None, **kwargs) -> torch.FloatTensor:
87
+ """
88
+ Forward pass for the RBLN-optimized CLIP text encoder model.
89
+
90
+ Args:
91
+ input_ids (torch.LongTensor): The input ids to the model.
92
+ return_dict (Optional[bool]): Whether to return a dictionary of outputs.
93
+ """
94
+
87
95
  # To ignore using attention_mask, we override forward method.
88
96
  output = super().forward(input_ids, return_dict=return_dict)
89
97
  return output
@@ -202,13 +210,24 @@ class RBLNCLIPVisionModel(RBLNModel):
202
210
 
203
211
  def forward(
204
212
  self,
205
- pixel_values: Optional[torch.FloatTensor] = None,
213
+ pixel_values: torch.FloatTensor,
206
214
  return_dict: bool = True,
207
- output_attentions: bool = None,
208
- output_hidden_states: bool = None,
215
+ output_attentions: Optional[bool] = None,
216
+ output_hidden_states: Optional[bool] = None,
209
217
  interpolate_pos_encoding: bool = False,
210
218
  **kwargs,
211
219
  ) -> Union[Tuple, BaseModelOutputWithPooling]:
220
+ """
221
+ Forward pass for the RBLN-optimized CLIP vision encoder model.
222
+
223
+ Args:
224
+ pixel_values (torch.Tensor): The pixel values to the model.
225
+ return_dict (bool): Whether to return a dictionary of outputs.
226
+ output_attentions (Optional[bool]): Whether to return attentions.
227
+ output_hidden_states (Optional[bool]): Whether to return hidden states.
228
+ interpolate_pos_encoding (bool): Whether to interpolate position encoding.
229
+ """
230
+
212
231
  if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
213
232
  logger.warning(
214
233
  f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
@@ -54,11 +54,11 @@ class RBLNColPaliForRetrievalConfig(RBLNModelConfig):
54
54
  ):
55
55
  """
56
56
  Args:
57
- vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
58
57
  max_seq_lens (Union[int, List[int]]): The maximum sequence lengths for the language model.
59
58
  This can be multiple values, and the model will be compiled for each max_seq_len, allowing selection of the most appropriate max_seq_len at inference time.
60
59
  output_hidden_states (Optional[bool]): Whether to output the hidden states of the language model.
61
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
60
+ vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
61
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
62
  Raises:
63
63
  ValueError: If batch_size is not a positive integer.
64
64
  """