optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. optimum/rbln/__init__.py +2 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +45 -33
  4. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
  5. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
  6. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
  10. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +33 -9
  11. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -12
  12. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +22 -6
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +16 -6
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +16 -6
  15. optimum/rbln/diffusers/modeling_diffusers.py +16 -26
  16. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +11 -0
  17. optimum/rbln/diffusers/models/autoencoders/vae.py +1 -8
  18. optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -0
  19. optimum/rbln/diffusers/models/controlnet.py +13 -7
  20. optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
  21. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
  22. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +7 -0
  23. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
  24. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
  25. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
  26. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
  27. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
  28. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
  29. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
  30. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
  31. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
  32. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
  33. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
  34. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
  35. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
  36. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
  37. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
  38. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
  39. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
  40. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
  41. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
  42. optimum/rbln/modeling.py +33 -35
  43. optimum/rbln/modeling_base.py +45 -107
  44. optimum/rbln/transformers/__init__.py +39 -47
  45. optimum/rbln/transformers/configuration_generic.py +16 -13
  46. optimum/rbln/transformers/modeling_generic.py +18 -19
  47. optimum/rbln/transformers/modeling_rope_utils.py +1 -1
  48. optimum/rbln/transformers/models/__init__.py +46 -4
  49. optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
  50. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
  51. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
  52. optimum/rbln/transformers/models/auto/auto_factory.py +30 -12
  53. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +35 -4
  54. optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
  55. optimum/rbln/transformers/models/clip/modeling_clip.py +11 -12
  56. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
  57. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
  58. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +231 -175
  59. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  60. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +19 -0
  61. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +19 -0
  62. optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
  63. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  64. optimum/rbln/transformers/models/exaone/modeling_exaone.py +51 -5
  65. optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
  66. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  67. optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
  68. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  69. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
  70. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +87 -236
  71. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
  72. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
  73. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
  74. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
  75. optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
  76. optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
  77. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  78. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +33 -4
  79. optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
  80. optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
  81. optimum/rbln/transformers/models/midm/modeling_midm.py +51 -5
  82. optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
  83. optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
  84. optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
  85. optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
  86. optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
  87. optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
  88. optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
  89. optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
  90. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
  91. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
  92. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +15 -3
  93. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +46 -25
  94. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +4 -2
  95. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  96. optimum/rbln/transformers/models/resnet/configuration_resnet.py +20 -0
  97. optimum/rbln/transformers/models/resnet/modeling_resnet.py +22 -0
  98. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  99. optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +4 -30
  100. optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +2 -32
  101. optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
  102. optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
  103. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -1
  104. optimum/rbln/transformers/models/siglip/configuration_siglip.py +3 -0
  105. optimum/rbln/transformers/models/siglip/modeling_siglip.py +62 -21
  106. optimum/rbln/transformers/models/t5/modeling_t5.py +46 -4
  107. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
  108. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +2 -2
  109. optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +14 -9
  110. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  111. optimum/rbln/transformers/models/vit/configuration_vit.py +19 -0
  112. optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
  113. optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
  114. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  115. optimum/rbln/transformers/models/whisper/configuration_whisper.py +3 -1
  116. optimum/rbln/transformers/models/whisper/modeling_whisper.py +35 -15
  117. optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
  118. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
  119. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
  120. optimum/rbln/utils/model_utils.py +20 -0
  121. optimum/rbln/utils/submodule.py +6 -8
  122. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/METADATA +1 -1
  123. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/RECORD +127 -114
  124. /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
  125. /optimum/rbln/transformers/models/wav2vec2/{configuration_wav2vec.py → configuration_wav2vec2.py} +0 -0
  126. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/WHEEL +0 -0
  127. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/licenses/LICENSE +0 -0
@@ -12,12 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import List, Optional, Tuple, Union
15
+ from typing import Any, Dict, List, Optional, Tuple, Union
16
16
 
17
17
  from ..configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
- class _RBLNTransformerEncoderConfig(RBLNModelConfig):
20
+ class RBLNTransformerEncoderConfig(RBLNModelConfig):
21
21
  rbln_model_input_names: Optional[List[str]] = None
22
22
 
23
23
  def __init__(
@@ -25,7 +25,7 @@ class _RBLNTransformerEncoderConfig(RBLNModelConfig):
25
25
  max_seq_len: Optional[int] = None,
26
26
  batch_size: Optional[int] = None,
27
27
  model_input_names: Optional[List[str]] = None,
28
- **kwargs,
28
+ **kwargs: Dict[str, Any],
29
29
  ):
30
30
  """
31
31
  Args:
@@ -47,9 +47,12 @@ class _RBLNTransformerEncoderConfig(RBLNModelConfig):
47
47
  self.model_input_names = model_input_names or self.rbln_model_input_names
48
48
 
49
49
 
50
- class _RBLNImageModelConfig(RBLNModelConfig):
50
+ class RBLNImageModelConfig(RBLNModelConfig):
51
51
  def __init__(
52
- self, image_size: Optional[Union[int, Tuple[int, int]]] = None, batch_size: Optional[int] = None, **kwargs
52
+ self,
53
+ image_size: Optional[Union[int, Tuple[int, int]]] = None,
54
+ batch_size: Optional[int] = None,
55
+ **kwargs: Dict[str, Any],
53
56
  ):
54
57
  """
55
58
  Args:
@@ -86,32 +89,32 @@ class _RBLNImageModelConfig(RBLNModelConfig):
86
89
  return self.image_size["height"]
87
90
 
88
91
 
89
- class RBLNModelForQuestionAnsweringConfig(_RBLNTransformerEncoderConfig):
92
+ class RBLNModelForQuestionAnsweringConfig(RBLNTransformerEncoderConfig):
90
93
  pass
91
94
 
92
95
 
93
- class RBLNModelForSequenceClassificationConfig(_RBLNTransformerEncoderConfig):
96
+ class RBLNModelForSequenceClassificationConfig(RBLNTransformerEncoderConfig):
94
97
  pass
95
98
 
96
99
 
97
- class RBLNModelForMaskedLMConfig(_RBLNTransformerEncoderConfig):
100
+ class RBLNModelForMaskedLMConfig(RBLNTransformerEncoderConfig):
98
101
  pass
99
102
 
100
103
 
101
- class RBLNModelForTextEncodingConfig(_RBLNTransformerEncoderConfig):
104
+ class RBLNModelForTextEncodingConfig(RBLNTransformerEncoderConfig):
102
105
  pass
103
106
 
104
107
 
105
108
  # FIXME : Appropriate name ?
106
- class RBLNTransformerEncoderForFeatureExtractionConfig(_RBLNTransformerEncoderConfig):
109
+ class RBLNTransformerEncoderForFeatureExtractionConfig(RBLNTransformerEncoderConfig):
107
110
  pass
108
111
 
109
112
 
110
- class RBLNModelForImageClassificationConfig(_RBLNImageModelConfig):
113
+ class RBLNModelForImageClassificationConfig(RBLNImageModelConfig):
111
114
  pass
112
115
 
113
116
 
114
- class RBLNModelForDepthEstimationConfig(_RBLNImageModelConfig):
117
+ class RBLNModelForDepthEstimationConfig(RBLNImageModelConfig):
115
118
  pass
116
119
 
117
120
 
@@ -121,7 +124,7 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
121
124
  batch_size: Optional[int] = None,
122
125
  max_length: Optional[int] = None,
123
126
  num_mel_bins: Optional[int] = None,
124
- **kwargs,
127
+ **kwargs: Dict[str, Any],
125
128
  ):
126
129
  """
127
130
  Args:
@@ -43,9 +43,9 @@ from ..configuration_utils import RBLNCompileConfig
43
43
  from ..modeling import RBLNModel
44
44
  from ..utils.logging import get_logger
45
45
  from .configuration_generic import (
46
+ RBLNImageModelConfig,
46
47
  RBLNModelForAudioClassificationConfig,
47
- _RBLNImageModelConfig,
48
- _RBLNTransformerEncoderConfig,
48
+ RBLNTransformerEncoderConfig,
49
49
  )
50
50
 
51
51
 
@@ -55,7 +55,7 @@ if TYPE_CHECKING:
55
55
  logger = get_logger()
56
56
 
57
57
 
58
- class _RBLNTransformerEncoder(RBLNModel):
58
+ class RBLNTransformerEncoder(RBLNModel):
59
59
  auto_model_class = AutoModel
60
60
  rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
61
61
  rbln_dtype = "int64"
@@ -66,8 +66,8 @@ class _RBLNTransformerEncoder(RBLNModel):
66
66
  preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
67
67
  model: Optional["PreTrainedModel"] = None,
68
68
  model_config: Optional["PretrainedConfig"] = None,
69
- rbln_config: Optional[_RBLNTransformerEncoderConfig] = None,
70
- ) -> _RBLNTransformerEncoderConfig:
69
+ rbln_config: Optional[RBLNTransformerEncoderConfig] = None,
70
+ ) -> RBLNTransformerEncoderConfig:
71
71
  return cls.update_rbln_config_for_transformers_encoder(
72
72
  preprocessors=preprocessors,
73
73
  model=model,
@@ -81,8 +81,8 @@ class _RBLNTransformerEncoder(RBLNModel):
81
81
  preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
82
82
  model: Optional["PreTrainedModel"] = None,
83
83
  model_config: Optional["PretrainedConfig"] = None,
84
- rbln_config: Optional[_RBLNTransformerEncoderConfig] = None,
85
- ) -> _RBLNTransformerEncoderConfig:
84
+ rbln_config: Optional[RBLNTransformerEncoderConfig] = None,
85
+ ) -> RBLNTransformerEncoderConfig:
86
86
  max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
87
87
  model_config, "max_position_embeddings", None
88
88
  )
@@ -150,8 +150,8 @@ class _RBLNImageModel(RBLNModel):
150
150
  preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
151
151
  model: Optional["PreTrainedModel"] = None,
152
152
  model_config: Optional["PretrainedConfig"] = None,
153
- rbln_config: Optional[_RBLNImageModelConfig] = None,
154
- ) -> _RBLNImageModelConfig:
153
+ rbln_config: Optional[RBLNImageModelConfig] = None,
154
+ ) -> RBLNImageModelConfig:
155
155
  return cls.update_rbln_config_for_image_model(
156
156
  preprocessors=preprocessors,
157
157
  model=model,
@@ -165,8 +165,8 @@ class _RBLNImageModel(RBLNModel):
165
165
  preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
166
166
  model: Optional["PreTrainedModel"] = None,
167
167
  model_config: Optional["PretrainedConfig"] = None,
168
- rbln_config: Optional[_RBLNImageModelConfig] = None,
169
- ) -> _RBLNImageModelConfig:
168
+ rbln_config: Optional[RBLNImageModelConfig] = None,
169
+ ) -> RBLNImageModelConfig:
170
170
  if rbln_config.image_size is None:
171
171
  for processor in preprocessors:
172
172
  if hasattr(processor, "size"):
@@ -196,15 +196,14 @@ class _RBLNImageModel(RBLNModel):
196
196
  return rbln_config
197
197
 
198
198
 
199
- class RBLNModelForQuestionAnswering(_RBLNTransformerEncoder):
199
+ class RBLNModelForQuestionAnswering(RBLNTransformerEncoder):
200
200
  auto_model_class = AutoModelForQuestionAnswering
201
201
  rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
202
202
  output_class = QuestionAnsweringModelOutput
203
203
 
204
204
  def _prepare_output(self, output, return_dict):
205
- """
206
- Prepare QuestionAnswering specific output format.
207
- """
205
+ # Prepare QuestionAnswering specific output format.
206
+
208
207
  start_logits, end_logits = output
209
208
 
210
209
  if not return_dict:
@@ -213,22 +212,22 @@ class RBLNModelForQuestionAnswering(_RBLNTransformerEncoder):
213
212
  return QuestionAnsweringModelOutput(start_logits=start_logits, end_logits=end_logits)
214
213
 
215
214
 
216
- class RBLNModelForSequenceClassification(_RBLNTransformerEncoder):
215
+ class RBLNModelForSequenceClassification(RBLNTransformerEncoder):
217
216
  auto_model_class = AutoModelForSequenceClassification
218
217
  rbln_model_input_names = ["input_ids", "attention_mask"]
219
218
 
220
219
 
221
- class RBLNModelForMaskedLM(_RBLNTransformerEncoder):
220
+ class RBLNModelForMaskedLM(RBLNTransformerEncoder):
222
221
  auto_model_class = AutoModelForMaskedLM
223
222
  rbln_model_input_names = ["input_ids", "attention_mask"]
224
223
 
225
224
 
226
- class RBLNModelForTextEncoding(_RBLNTransformerEncoder):
225
+ class RBLNModelForTextEncoding(RBLNTransformerEncoder):
227
226
  auto_model_class = AutoModelForTextEncoding
228
227
  rbln_model_input_names = ["input_ids", "attention_mask"]
229
228
 
230
229
 
231
- class RBLNTransformerEncoderForFeatureExtraction(_RBLNTransformerEncoder):
230
+ class RBLNTransformerEncoderForFeatureExtraction(RBLNTransformerEncoder):
232
231
  # TODO: RBLNModel is also for feature extraction.
233
232
  auto_model_class = AutoModel
234
233
  rbln_model_input_names = ["input_ids", "attention_mask"]
@@ -51,7 +51,7 @@ def _compute_default_rope_parameters(
51
51
 
52
52
  base = config.rope_theta
53
53
  partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
54
- head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
54
+ head_dim = getattr(config, "head_dim", None) or config.hidden_size // config.num_attention_heads
55
55
  dim = int(head_dim * partial_rotary_factor)
56
56
 
57
57
  attention_factor = 1.0 # Unused in this type of RoPE
@@ -18,6 +18,10 @@ from transformers.utils import _LazyModule
18
18
 
19
19
 
20
20
  _import_structure = {
21
+ "audio_spectrogram_transformer": [
22
+ "RBLNASTForAudioClassification",
23
+ "RBLNASTForAudioClassificationConfig",
24
+ ],
21
25
  "auto": [
22
26
  "RBLNAutoModel",
23
27
  "RBLNAutoModelForAudioClassification",
@@ -65,6 +69,10 @@ _import_structure = {
65
69
  "RBLNCLIPVisionModelWithProjection",
66
70
  "RBLNCLIPVisionModelWithProjectionConfig",
67
71
  ],
72
+ "distilbert": [
73
+ "RBLNDistilBertForQuestionAnswering",
74
+ "RBLNDistilBertForQuestionAnsweringConfig",
75
+ ],
68
76
  "qwen2_5_vl": [
69
77
  "RBLNQwen2_5_VisionTransformerPretrainedModel",
70
78
  "RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
@@ -101,11 +109,18 @@ _import_structure = {
101
109
  "mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
102
110
  "phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
103
111
  "qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
112
+ "resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
113
+ "roberta": [
114
+ "RBLNRobertaForMaskedLM",
115
+ "RBLNRobertaForMaskedLMConfig",
116
+ "RBLNRobertaForSequenceClassification",
117
+ "RBLNRobertaForSequenceClassificationConfig",
118
+ ],
104
119
  "siglip": [
105
120
  "RBLNSiglipVisionModel",
106
121
  "RBLNSiglipVisionModelConfig",
107
122
  ],
108
- "time_series_transformers": [
123
+ "time_series_transformer": [
109
124
  "RBLNTimeSeriesTransformerForPrediction",
110
125
  "RBLNTimeSeriesTransformerForPredictionConfig",
111
126
  ],
@@ -115,12 +130,22 @@ _import_structure = {
115
130
  "RBLNT5EncoderModelConfig",
116
131
  "RBLNT5ForConditionalGenerationConfig",
117
132
  ],
133
+ "vit": ["RBLNViTForImageClassification", "RBLNViTForImageClassificationConfig"],
118
134
  "wav2vec2": ["RBLNWav2Vec2ForCTC", "RBLNWav2Vec2ForCTCConfig"],
119
135
  "whisper": ["RBLNWhisperForConditionalGeneration", "RBLNWhisperForConditionalGenerationConfig"],
120
- "xlm_roberta": ["RBLNXLMRobertaModel", "RBLNXLMRobertaModelConfig"],
136
+ "xlm_roberta": [
137
+ "RBLNXLMRobertaModel",
138
+ "RBLNXLMRobertaModelConfig",
139
+ "RBLNXLMRobertaForSequenceClassification",
140
+ "RBLNXLMRobertaForSequenceClassificationConfig",
141
+ ],
121
142
  }
122
143
 
123
144
  if TYPE_CHECKING:
145
+ from .audio_spectrogram_transformer import (
146
+ RBLNASTForAudioClassification,
147
+ RBLNASTForAudioClassificationConfig,
148
+ )
124
149
  from .auto import (
125
150
  RBLNAutoModel,
126
151
  RBLNAutoModelForAudioClassification,
@@ -172,6 +197,10 @@ if TYPE_CHECKING:
172
197
  RBLNDecoderOnlyModelForCausalLM,
173
198
  RBLNDecoderOnlyModelForCausalLMConfig,
174
199
  )
200
+ from .distilbert import (
201
+ RBLNDistilBertForQuestionAnswering,
202
+ RBLNDistilBertForQuestionAnsweringConfig,
203
+ )
175
204
  from .dpt import (
176
205
  RBLNDPTForDepthEstimation,
177
206
  RBLNDPTForDepthEstimationConfig,
@@ -204,6 +233,13 @@ if TYPE_CHECKING:
204
233
  RBLNQwen2_5_VLForConditionalGeneration,
205
234
  RBLNQwen2_5_VLForConditionalGenerationConfig,
206
235
  )
236
+ from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
237
+ from .roberta import (
238
+ RBLNRobertaForMaskedLM,
239
+ RBLNRobertaForMaskedLMConfig,
240
+ RBLNRobertaForSequenceClassification,
241
+ RBLNRobertaForSequenceClassificationConfig,
242
+ )
207
243
  from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
208
244
  from .t5 import (
209
245
  RBLNT5EncoderModel,
@@ -211,13 +247,19 @@ if TYPE_CHECKING:
211
247
  RBLNT5ForConditionalGeneration,
212
248
  RBLNT5ForConditionalGenerationConfig,
213
249
  )
214
- from .time_series_transformers import (
250
+ from .time_series_transformer import (
215
251
  RBLNTimeSeriesTransformerForPrediction,
216
252
  RBLNTimeSeriesTransformerForPredictionConfig,
217
253
  )
254
+ from .vit import RBLNViTForImageClassification, RBLNViTForImageClassificationConfig
218
255
  from .wav2vec2 import RBLNWav2Vec2ForCTC, RBLNWav2Vec2ForCTCConfig
219
256
  from .whisper import RBLNWhisperForConditionalGeneration, RBLNWhisperForConditionalGenerationConfig
220
- from .xlm_roberta import RBLNXLMRobertaModel, RBLNXLMRobertaModelConfig
257
+ from .xlm_roberta import (
258
+ RBLNXLMRobertaForSequenceClassification,
259
+ RBLNXLMRobertaForSequenceClassificationConfig,
260
+ RBLNXLMRobertaModel,
261
+ RBLNXLMRobertaModelConfig,
262
+ )
221
263
 
222
264
  else:
223
265
  import sys
@@ -0,0 +1,17 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
17
+ from .modeling_audio_spectrogram_transformer import RBLNASTForAudioClassification
@@ -0,0 +1,21 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...configuration_generic import RBLNModelForAudioClassificationConfig
16
+
17
+
18
+ class RBLNASTForAudioClassificationConfig(RBLNModelForAudioClassificationConfig):
19
+ """
20
+ Configuration class for RBLNASTForAudioClassification.
21
+ """
@@ -0,0 +1,28 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...modeling_generic import RBLNModelForAudioClassification
16
+
17
+
18
+ class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
19
+ """
20
+ Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
21
+ This model inherits from [`RBLNModelForAudioClassification`]. Check the superclass documentation for the generic methods the library implements for all its models.
22
+
23
+ A class to convert and run pre-trained transformer-based `ASTForAudioClassification` models on RBLN devices.
24
+ It implements the methods to convert a pre-trained transformers `ASTForAudioClassification` model into a RBLN transformer model by:
25
+
26
+ - transferring the checkpoint weights of the original into an optimized RBLN graph,
27
+ - compiling the resulting graph using the RBLN Compiler.
28
+ """
@@ -11,10 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  import importlib
16
15
  import inspect
17
16
  import warnings
17
+ from typing import Type
18
18
 
19
19
  from transformers import AutoConfig, PretrainedConfig
20
20
  from transformers.dynamic_module_utils import get_class_from_dynamic_module
@@ -22,7 +22,12 @@ from transformers.models.auto.auto_factory import _get_model_class
22
22
 
23
23
  from optimum.rbln.configuration_utils import RBLNAutoConfig
24
24
  from optimum.rbln.modeling_base import RBLNBaseModel
25
- from optimum.rbln.utils.model_utils import convert_hf_to_rbln_model_name, convert_rbln_to_hf_model_name
25
+ from optimum.rbln.utils.model_utils import (
26
+ MODEL_MAPPING,
27
+ convert_hf_to_rbln_model_name,
28
+ convert_rbln_to_hf_model_name,
29
+ get_rbln_model_cls,
30
+ )
26
31
 
27
32
 
28
33
  class _BaseAutoModelClass:
@@ -58,7 +63,7 @@ class _BaseAutoModelClass:
58
63
  hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
59
64
  rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
60
65
  else:
61
- rbln_class_name = cls.get_rbln_model_class_name(pretrained_model_name_or_path, **kwargs)
66
+ rbln_class_name = cls.get_rbln_model_cls_name(pretrained_model_name_or_path, **kwargs)
62
67
 
63
68
  if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names.values():
64
69
  raise ValueError(
@@ -68,8 +73,7 @@ class _BaseAutoModelClass:
68
73
  )
69
74
 
70
75
  try:
71
- module = importlib.import_module("optimum.rbln")
72
- rbln_cls = getattr(module, rbln_class_name)
76
+ rbln_cls = get_rbln_model_cls(rbln_class_name)
73
77
  except AttributeError as e:
74
78
  raise AttributeError(
75
79
  f"Class '{rbln_class_name}' not found in 'optimum.rbln' module for model ID '{pretrained_model_name_or_path}'. "
@@ -136,7 +140,7 @@ class _BaseAutoModelClass:
136
140
  return model_class
137
141
 
138
142
  @classmethod
139
- def get_rbln_model_class_name(cls, pretrained_model_name_or_path, **kwargs):
143
+ def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
140
144
  """
141
145
  Retrieve the path to the compiled model directory for a given RBLN model.
142
146
 
@@ -159,11 +163,25 @@ class _BaseAutoModelClass:
159
163
  return rbln_config.rbln_model_cls_name
160
164
 
161
165
  @classmethod
162
- def from_pretrained(
163
- cls,
164
- model_id,
165
- *args,
166
- **kwargs,
167
- ):
166
+ def from_pretrained(cls, model_id, *args, **kwargs):
168
167
  rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
169
168
  return rbln_cls.from_pretrained(model_id, *args, **kwargs)
169
+
170
+ @staticmethod
171
+ def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
172
+ """
173
+ Register a new RBLN model class.
174
+
175
+ Args:
176
+ rbln_cls (Type[RBLNBaseModel]): The RBLN model class to register.
177
+ exist_ok (bool): Whether to allow registering an already registered model.
178
+ """
179
+ if not issubclass(rbln_cls, RBLNBaseModel):
180
+ raise ValueError("`rbln_cls` must be a subclass of RBLNBaseModel.")
181
+
182
+ native_cls = getattr(importlib.import_module("optimum.rbln"), rbln_cls.__name__, None)
183
+ if rbln_cls.__name__ in MODEL_MAPPING or native_cls is not None:
184
+ if not exist_ok:
185
+ raise ValueError(f"Model for {rbln_cls.__name__} already registered.")
186
+
187
+ MODEL_MAPPING[rbln_cls.__name__] = rbln_cls
@@ -251,6 +251,38 @@ class RBLNBlip2QFormerModel(RBLNModel):
251
251
 
252
252
 
253
253
  class RBLNBlip2ForConditionalGeneration(RBLNModel):
254
+ """
255
+ RBLNBlip2ForConditionalGeneration is a multi-modal model that integrates vision and language processing capabilities,
256
+ optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
257
+
258
+ This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
259
+
260
+ Important Note:
261
+ This model includes a Large Language Model (LLM) as a submodule. For optimal performance, it is highly recommended to use
262
+ tensor parallelism for the language model. This can be achieved by using the `rbln_config` parameter in the
263
+ `from_pretrained` method. Refer to the `from_pretrained` documentation and the RBLNBlip2ForConditionalGeneration class for details.
264
+
265
+ Examples:
266
+ ```python
267
+ from optimum.rbln import RBLNBlip2ForConditionalGeneration
268
+
269
+ model = RBLNBlip2ForConditionalGeneration.from_pretrained(
270
+ "Salesforce/blip2-opt-2.7b",
271
+ export=True,
272
+ rbln_config={
273
+ "language_model": {
274
+ "batch_size": 1,
275
+ "max_seq_len": 2048,
276
+ "tensor_parallel_size": 1,
277
+ "use_inputs_embeds": True,
278
+ },
279
+ },
280
+ )
281
+
282
+ model.save_pretrained("compiled-blip2-opt-2.7b")
283
+ ```
284
+ """
285
+
254
286
  auto_model_class = AutoModelForVisualQuestionAnswering
255
287
  _rbln_submodules = [{"name": "vision_model"}, {"name": "qformer"}, {"name": "language_model"}]
256
288
 
@@ -275,10 +307,9 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
275
307
  subfolder: str,
276
308
  rbln_config: RBLNModelConfig,
277
309
  ):
278
- """
279
- If you are unavoidably running on a CPU rather than an RBLN device,
280
- store the torch tensor, weight, etc. in this function.
281
- """
310
+ # If you are unavoidably running on a CPU rather than an RBLN device,
311
+ # store the torch tensor, weight, etc. in this function.
312
+
282
313
  save_dict = {}
283
314
  save_dict["query_tokens"] = model.query_tokens
284
315
  torch.save(save_dict, save_dir_path / subfolder / "query_tokens.pth")
@@ -12,13 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Any, Dict, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNCLIPTextModelConfig(RBLNModelConfig):
21
- def __init__(self, batch_size: Optional[int] = None, **kwargs):
21
+ def __init__(self, batch_size: Optional[int] = None, **kwargs: Dict[str, Any]):
22
22
  """
23
23
  Args:
24
24
  batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
@@ -38,7 +38,7 @@ class RBLNCLIPTextModelWithProjectionConfig(RBLNCLIPTextModelConfig):
38
38
 
39
39
 
40
40
  class RBLNCLIPVisionModelConfig(RBLNModelConfig):
41
- def __init__(self, batch_size: Optional[int] = None, image_size: Optional[int] = None, **kwargs):
41
+ def __init__(self, batch_size: Optional[int] = None, image_size: Optional[int] = None, **kwargs: Dict[str, Any]):
42
42
  """
43
43
  Args:
44
44
  batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
@@ -49,7 +49,7 @@ class RBLNCLIPTextModel(RBLNModel):
49
49
 
50
50
  @classmethod
51
51
  def update_rbln_config_using_pipe(
52
- cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig", submodule_config: str
52
+ cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig", submodule_name: str
53
53
  ) -> "RBLNDiffusionMixinConfig":
54
54
  return rbln_config
55
55
 
@@ -81,10 +81,9 @@ class RBLNCLIPTextModel(RBLNModel):
81
81
  return output
82
82
 
83
83
  def _prepare_output(self, output, return_dict):
84
- """
85
- Prepare model output based on return_dict flag.
86
- This method can be overridden by subclasses to provide task-specific output handling.
87
- """
84
+ # Prepare model output based on return_dict flag.
85
+ # This method can be overridden by subclasses to provide task-specific output handling.
86
+
88
87
  if not return_dict:
89
88
  return (output,) if not isinstance(output, (tuple, list)) else output
90
89
  else:
@@ -161,17 +160,17 @@ class RBLNCLIPVisionModel(RBLNModel):
161
160
  return_dict: bool = None,
162
161
  **kwargs,
163
162
  ) -> Union[Tuple, CLIPVisionModelOutput]:
164
- if len(kwargs) > 0 and any(kwargs.values()):
165
- logger.warning(f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__}.")
166
-
163
+ if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
164
+ logger.warning(
165
+ f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
166
+ )
167
167
  output = super().forward(pixel_values, return_dict=return_dict)
168
168
  return output
169
169
 
170
170
  def _prepare_output(self, output, return_dict):
171
- """
172
- Prepare model output based on return_dict flag.
173
- This method can be overridden by subclasses to provide task-specific output handling.
174
- """
171
+ # Prepare model output based on return_dict flag.
172
+ # This method can be overridden by subclasses to provide task-specific output handling.
173
+
175
174
  if not return_dict:
176
175
  return (output,) if not isinstance(output, (tuple, list)) else output
177
176
  else: