optimum-rbln 0.8.1a0__py3-none-any.whl → 0.8.1a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +2 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +53 -33
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
- optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
- optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
- optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +33 -9
- optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -12
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +22 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +16 -6
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +16 -6
- optimum/rbln/diffusers/modeling_diffusers.py +16 -26
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +11 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +1 -8
- optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -0
- optimum/rbln/diffusers/models/controlnet.py +13 -7
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
- optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
- optimum/rbln/modeling.py +33 -35
- optimum/rbln/modeling_base.py +45 -107
- optimum/rbln/transformers/__init__.py +39 -47
- optimum/rbln/transformers/configuration_generic.py +16 -13
- optimum/rbln/transformers/modeling_generic.py +18 -19
- optimum/rbln/transformers/modeling_rope_utils.py +5 -2
- optimum/rbln/transformers/models/__init__.py +46 -4
- optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +35 -12
- optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +35 -4
- optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
- optimum/rbln/transformers/models/clip/modeling_clip.py +11 -12
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +229 -175
- optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
- optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +19 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +19 -0
- optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
- optimum/rbln/transformers/models/exaone/modeling_exaone.py +66 -5
- optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
- optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +106 -236
- optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
- optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
- optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
- optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
- optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +32 -4
- optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
- optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
- optimum/rbln/transformers/models/midm/modeling_midm.py +66 -5
- optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
- optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
- optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
- optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
- optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
- optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
- optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +15 -3
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +58 -27
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +47 -2
- optimum/rbln/transformers/models/resnet/__init__.py +23 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +20 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +22 -0
- optimum/rbln/transformers/models/roberta/__init__.py +24 -0
- optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +4 -30
- optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +2 -32
- optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
- optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -1
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
- optimum/rbln/transformers/models/siglip/configuration_siglip.py +3 -0
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +62 -21
- optimum/rbln/transformers/models/t5/modeling_t5.py +46 -4
- optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
- optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +2 -2
- optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +14 -9
- optimum/rbln/transformers/models/vit/__init__.py +19 -0
- optimum/rbln/transformers/models/vit/configuration_vit.py +19 -0
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
- optimum/rbln/transformers/models/whisper/configuration_whisper.py +3 -1
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +35 -15
- optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
- optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
- optimum/rbln/utils/model_utils.py +20 -0
- optimum/rbln/utils/submodule.py +6 -8
- {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/METADATA +2 -2
- {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/RECORD +130 -117
- /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
- /optimum/rbln/transformers/models/wav2vec2/{configuration_wav2vec.py → configuration_wav2vec2.py} +0 -0
- {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/licenses/LICENSE +0 -0
@@ -12,12 +12,12 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import List, Optional, Tuple, Union
|
15
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
16
16
|
|
17
17
|
from ..configuration_utils import RBLNModelConfig
|
18
18
|
|
19
19
|
|
20
|
-
class
|
20
|
+
class RBLNTransformerEncoderConfig(RBLNModelConfig):
|
21
21
|
rbln_model_input_names: Optional[List[str]] = None
|
22
22
|
|
23
23
|
def __init__(
|
@@ -25,7 +25,7 @@ class _RBLNTransformerEncoderConfig(RBLNModelConfig):
|
|
25
25
|
max_seq_len: Optional[int] = None,
|
26
26
|
batch_size: Optional[int] = None,
|
27
27
|
model_input_names: Optional[List[str]] = None,
|
28
|
-
**kwargs,
|
28
|
+
**kwargs: Dict[str, Any],
|
29
29
|
):
|
30
30
|
"""
|
31
31
|
Args:
|
@@ -47,9 +47,12 @@ class _RBLNTransformerEncoderConfig(RBLNModelConfig):
|
|
47
47
|
self.model_input_names = model_input_names or self.rbln_model_input_names
|
48
48
|
|
49
49
|
|
50
|
-
class
|
50
|
+
class RBLNImageModelConfig(RBLNModelConfig):
|
51
51
|
def __init__(
|
52
|
-
self,
|
52
|
+
self,
|
53
|
+
image_size: Optional[Union[int, Tuple[int, int]]] = None,
|
54
|
+
batch_size: Optional[int] = None,
|
55
|
+
**kwargs: Dict[str, Any],
|
53
56
|
):
|
54
57
|
"""
|
55
58
|
Args:
|
@@ -86,32 +89,32 @@ class _RBLNImageModelConfig(RBLNModelConfig):
|
|
86
89
|
return self.image_size["height"]
|
87
90
|
|
88
91
|
|
89
|
-
class RBLNModelForQuestionAnsweringConfig(
|
92
|
+
class RBLNModelForQuestionAnsweringConfig(RBLNTransformerEncoderConfig):
|
90
93
|
pass
|
91
94
|
|
92
95
|
|
93
|
-
class RBLNModelForSequenceClassificationConfig(
|
96
|
+
class RBLNModelForSequenceClassificationConfig(RBLNTransformerEncoderConfig):
|
94
97
|
pass
|
95
98
|
|
96
99
|
|
97
|
-
class RBLNModelForMaskedLMConfig(
|
100
|
+
class RBLNModelForMaskedLMConfig(RBLNTransformerEncoderConfig):
|
98
101
|
pass
|
99
102
|
|
100
103
|
|
101
|
-
class RBLNModelForTextEncodingConfig(
|
104
|
+
class RBLNModelForTextEncodingConfig(RBLNTransformerEncoderConfig):
|
102
105
|
pass
|
103
106
|
|
104
107
|
|
105
108
|
# FIXME : Appropriate name ?
|
106
|
-
class RBLNTransformerEncoderForFeatureExtractionConfig(
|
109
|
+
class RBLNTransformerEncoderForFeatureExtractionConfig(RBLNTransformerEncoderConfig):
|
107
110
|
pass
|
108
111
|
|
109
112
|
|
110
|
-
class RBLNModelForImageClassificationConfig(
|
113
|
+
class RBLNModelForImageClassificationConfig(RBLNImageModelConfig):
|
111
114
|
pass
|
112
115
|
|
113
116
|
|
114
|
-
class RBLNModelForDepthEstimationConfig(
|
117
|
+
class RBLNModelForDepthEstimationConfig(RBLNImageModelConfig):
|
115
118
|
pass
|
116
119
|
|
117
120
|
|
@@ -121,7 +124,7 @@ class RBLNModelForAudioClassificationConfig(RBLNModelConfig):
|
|
121
124
|
batch_size: Optional[int] = None,
|
122
125
|
max_length: Optional[int] = None,
|
123
126
|
num_mel_bins: Optional[int] = None,
|
124
|
-
**kwargs,
|
127
|
+
**kwargs: Dict[str, Any],
|
125
128
|
):
|
126
129
|
"""
|
127
130
|
Args:
|
@@ -43,9 +43,9 @@ from ..configuration_utils import RBLNCompileConfig
|
|
43
43
|
from ..modeling import RBLNModel
|
44
44
|
from ..utils.logging import get_logger
|
45
45
|
from .configuration_generic import (
|
46
|
+
RBLNImageModelConfig,
|
46
47
|
RBLNModelForAudioClassificationConfig,
|
47
|
-
|
48
|
-
_RBLNTransformerEncoderConfig,
|
48
|
+
RBLNTransformerEncoderConfig,
|
49
49
|
)
|
50
50
|
|
51
51
|
|
@@ -55,7 +55,7 @@ if TYPE_CHECKING:
|
|
55
55
|
logger = get_logger()
|
56
56
|
|
57
57
|
|
58
|
-
class
|
58
|
+
class RBLNTransformerEncoder(RBLNModel):
|
59
59
|
auto_model_class = AutoModel
|
60
60
|
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
61
61
|
rbln_dtype = "int64"
|
@@ -66,8 +66,8 @@ class _RBLNTransformerEncoder(RBLNModel):
|
|
66
66
|
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
|
67
67
|
model: Optional["PreTrainedModel"] = None,
|
68
68
|
model_config: Optional["PretrainedConfig"] = None,
|
69
|
-
rbln_config: Optional[
|
70
|
-
) ->
|
69
|
+
rbln_config: Optional[RBLNTransformerEncoderConfig] = None,
|
70
|
+
) -> RBLNTransformerEncoderConfig:
|
71
71
|
return cls.update_rbln_config_for_transformers_encoder(
|
72
72
|
preprocessors=preprocessors,
|
73
73
|
model=model,
|
@@ -81,8 +81,8 @@ class _RBLNTransformerEncoder(RBLNModel):
|
|
81
81
|
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
|
82
82
|
model: Optional["PreTrainedModel"] = None,
|
83
83
|
model_config: Optional["PretrainedConfig"] = None,
|
84
|
-
rbln_config: Optional[
|
85
|
-
) ->
|
84
|
+
rbln_config: Optional[RBLNTransformerEncoderConfig] = None,
|
85
|
+
) -> RBLNTransformerEncoderConfig:
|
86
86
|
max_position_embeddings = getattr(model_config, "n_positions", None) or getattr(
|
87
87
|
model_config, "max_position_embeddings", None
|
88
88
|
)
|
@@ -150,8 +150,8 @@ class _RBLNImageModel(RBLNModel):
|
|
150
150
|
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
|
151
151
|
model: Optional["PreTrainedModel"] = None,
|
152
152
|
model_config: Optional["PretrainedConfig"] = None,
|
153
|
-
rbln_config: Optional[
|
154
|
-
) ->
|
153
|
+
rbln_config: Optional[RBLNImageModelConfig] = None,
|
154
|
+
) -> RBLNImageModelConfig:
|
155
155
|
return cls.update_rbln_config_for_image_model(
|
156
156
|
preprocessors=preprocessors,
|
157
157
|
model=model,
|
@@ -165,8 +165,8 @@ class _RBLNImageModel(RBLNModel):
|
|
165
165
|
preprocessors: Optional[Union["AutoFeatureExtractor", "AutoProcessor", "AutoTokenizer"]] = None,
|
166
166
|
model: Optional["PreTrainedModel"] = None,
|
167
167
|
model_config: Optional["PretrainedConfig"] = None,
|
168
|
-
rbln_config: Optional[
|
169
|
-
) ->
|
168
|
+
rbln_config: Optional[RBLNImageModelConfig] = None,
|
169
|
+
) -> RBLNImageModelConfig:
|
170
170
|
if rbln_config.image_size is None:
|
171
171
|
for processor in preprocessors:
|
172
172
|
if hasattr(processor, "size"):
|
@@ -196,15 +196,14 @@ class _RBLNImageModel(RBLNModel):
|
|
196
196
|
return rbln_config
|
197
197
|
|
198
198
|
|
199
|
-
class RBLNModelForQuestionAnswering(
|
199
|
+
class RBLNModelForQuestionAnswering(RBLNTransformerEncoder):
|
200
200
|
auto_model_class = AutoModelForQuestionAnswering
|
201
201
|
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
202
202
|
output_class = QuestionAnsweringModelOutput
|
203
203
|
|
204
204
|
def _prepare_output(self, output, return_dict):
|
205
|
-
|
206
|
-
|
207
|
-
"""
|
205
|
+
# Prepare QuestionAnswering specific output format.
|
206
|
+
|
208
207
|
start_logits, end_logits = output
|
209
208
|
|
210
209
|
if not return_dict:
|
@@ -213,22 +212,22 @@ class RBLNModelForQuestionAnswering(_RBLNTransformerEncoder):
|
|
213
212
|
return QuestionAnsweringModelOutput(start_logits=start_logits, end_logits=end_logits)
|
214
213
|
|
215
214
|
|
216
|
-
class RBLNModelForSequenceClassification(
|
215
|
+
class RBLNModelForSequenceClassification(RBLNTransformerEncoder):
|
217
216
|
auto_model_class = AutoModelForSequenceClassification
|
218
217
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
219
218
|
|
220
219
|
|
221
|
-
class RBLNModelForMaskedLM(
|
220
|
+
class RBLNModelForMaskedLM(RBLNTransformerEncoder):
|
222
221
|
auto_model_class = AutoModelForMaskedLM
|
223
222
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
224
223
|
|
225
224
|
|
226
|
-
class RBLNModelForTextEncoding(
|
225
|
+
class RBLNModelForTextEncoding(RBLNTransformerEncoder):
|
227
226
|
auto_model_class = AutoModelForTextEncoding
|
228
227
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
229
228
|
|
230
229
|
|
231
|
-
class RBLNTransformerEncoderForFeatureExtraction(
|
230
|
+
class RBLNTransformerEncoderForFeatureExtraction(RBLNTransformerEncoder):
|
232
231
|
# TODO: RBLNModel is also for feature extraction.
|
233
232
|
auto_model_class = AutoModel
|
234
233
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
@@ -48,10 +48,13 @@ def _compute_default_rope_parameters(
|
|
48
48
|
Tuple of (`torch.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
|
49
49
|
post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
|
50
50
|
"""
|
51
|
-
|
52
51
|
base = config.rope_theta
|
53
52
|
partial_rotary_factor = config.partial_rotary_factor if hasattr(config, "partial_rotary_factor") else 1.0
|
54
|
-
head_dim =
|
53
|
+
head_dim = (
|
54
|
+
config.head_dim
|
55
|
+
if hasattr(config, "head_dim") and config.head_dim is not None
|
56
|
+
else config.hidden_size // config.num_attention_heads
|
57
|
+
)
|
55
58
|
dim = int(head_dim * partial_rotary_factor)
|
56
59
|
|
57
60
|
attention_factor = 1.0 # Unused in this type of RoPE
|
@@ -18,6 +18,10 @@ from transformers.utils import _LazyModule
|
|
18
18
|
|
19
19
|
|
20
20
|
_import_structure = {
|
21
|
+
"audio_spectrogram_transformer": [
|
22
|
+
"RBLNASTForAudioClassification",
|
23
|
+
"RBLNASTForAudioClassificationConfig",
|
24
|
+
],
|
21
25
|
"auto": [
|
22
26
|
"RBLNAutoModel",
|
23
27
|
"RBLNAutoModelForAudioClassification",
|
@@ -65,6 +69,10 @@ _import_structure = {
|
|
65
69
|
"RBLNCLIPVisionModelWithProjection",
|
66
70
|
"RBLNCLIPVisionModelWithProjectionConfig",
|
67
71
|
],
|
72
|
+
"distilbert": [
|
73
|
+
"RBLNDistilBertForQuestionAnswering",
|
74
|
+
"RBLNDistilBertForQuestionAnsweringConfig",
|
75
|
+
],
|
68
76
|
"qwen2_5_vl": [
|
69
77
|
"RBLNQwen2_5_VisionTransformerPretrainedModel",
|
70
78
|
"RBLNQwen2_5_VisionTransformerPretrainedModelConfig",
|
@@ -101,11 +109,18 @@ _import_structure = {
|
|
101
109
|
"mistral": ["RBLNMistralForCausalLM", "RBLNMistralForCausalLMConfig"],
|
102
110
|
"phi": ["RBLNPhiForCausalLM", "RBLNPhiForCausalLMConfig"],
|
103
111
|
"qwen2": ["RBLNQwen2ForCausalLM", "RBLNQwen2ForCausalLMConfig"],
|
112
|
+
"resnet": ["RBLNResNetForImageClassification", "RBLNResNetForImageClassificationConfig"],
|
113
|
+
"roberta": [
|
114
|
+
"RBLNRobertaForMaskedLM",
|
115
|
+
"RBLNRobertaForMaskedLMConfig",
|
116
|
+
"RBLNRobertaForSequenceClassification",
|
117
|
+
"RBLNRobertaForSequenceClassificationConfig",
|
118
|
+
],
|
104
119
|
"siglip": [
|
105
120
|
"RBLNSiglipVisionModel",
|
106
121
|
"RBLNSiglipVisionModelConfig",
|
107
122
|
],
|
108
|
-
"
|
123
|
+
"time_series_transformer": [
|
109
124
|
"RBLNTimeSeriesTransformerForPrediction",
|
110
125
|
"RBLNTimeSeriesTransformerForPredictionConfig",
|
111
126
|
],
|
@@ -115,12 +130,22 @@ _import_structure = {
|
|
115
130
|
"RBLNT5EncoderModelConfig",
|
116
131
|
"RBLNT5ForConditionalGenerationConfig",
|
117
132
|
],
|
133
|
+
"vit": ["RBLNViTForImageClassification", "RBLNViTForImageClassificationConfig"],
|
118
134
|
"wav2vec2": ["RBLNWav2Vec2ForCTC", "RBLNWav2Vec2ForCTCConfig"],
|
119
135
|
"whisper": ["RBLNWhisperForConditionalGeneration", "RBLNWhisperForConditionalGenerationConfig"],
|
120
|
-
"xlm_roberta": [
|
136
|
+
"xlm_roberta": [
|
137
|
+
"RBLNXLMRobertaModel",
|
138
|
+
"RBLNXLMRobertaModelConfig",
|
139
|
+
"RBLNXLMRobertaForSequenceClassification",
|
140
|
+
"RBLNXLMRobertaForSequenceClassificationConfig",
|
141
|
+
],
|
121
142
|
}
|
122
143
|
|
123
144
|
if TYPE_CHECKING:
|
145
|
+
from .audio_spectrogram_transformer import (
|
146
|
+
RBLNASTForAudioClassification,
|
147
|
+
RBLNASTForAudioClassificationConfig,
|
148
|
+
)
|
124
149
|
from .auto import (
|
125
150
|
RBLNAutoModel,
|
126
151
|
RBLNAutoModelForAudioClassification,
|
@@ -172,6 +197,10 @@ if TYPE_CHECKING:
|
|
172
197
|
RBLNDecoderOnlyModelForCausalLM,
|
173
198
|
RBLNDecoderOnlyModelForCausalLMConfig,
|
174
199
|
)
|
200
|
+
from .distilbert import (
|
201
|
+
RBLNDistilBertForQuestionAnswering,
|
202
|
+
RBLNDistilBertForQuestionAnsweringConfig,
|
203
|
+
)
|
175
204
|
from .dpt import (
|
176
205
|
RBLNDPTForDepthEstimation,
|
177
206
|
RBLNDPTForDepthEstimationConfig,
|
@@ -204,6 +233,13 @@ if TYPE_CHECKING:
|
|
204
233
|
RBLNQwen2_5_VLForConditionalGeneration,
|
205
234
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
206
235
|
)
|
236
|
+
from .resnet import RBLNResNetForImageClassification, RBLNResNetForImageClassificationConfig
|
237
|
+
from .roberta import (
|
238
|
+
RBLNRobertaForMaskedLM,
|
239
|
+
RBLNRobertaForMaskedLMConfig,
|
240
|
+
RBLNRobertaForSequenceClassification,
|
241
|
+
RBLNRobertaForSequenceClassificationConfig,
|
242
|
+
)
|
207
243
|
from .siglip import RBLNSiglipVisionModel, RBLNSiglipVisionModelConfig
|
208
244
|
from .t5 import (
|
209
245
|
RBLNT5EncoderModel,
|
@@ -211,13 +247,19 @@ if TYPE_CHECKING:
|
|
211
247
|
RBLNT5ForConditionalGeneration,
|
212
248
|
RBLNT5ForConditionalGenerationConfig,
|
213
249
|
)
|
214
|
-
from .
|
250
|
+
from .time_series_transformer import (
|
215
251
|
RBLNTimeSeriesTransformerForPrediction,
|
216
252
|
RBLNTimeSeriesTransformerForPredictionConfig,
|
217
253
|
)
|
254
|
+
from .vit import RBLNViTForImageClassification, RBLNViTForImageClassificationConfig
|
218
255
|
from .wav2vec2 import RBLNWav2Vec2ForCTC, RBLNWav2Vec2ForCTCConfig
|
219
256
|
from .whisper import RBLNWhisperForConditionalGeneration, RBLNWhisperForConditionalGenerationConfig
|
220
|
-
from .xlm_roberta import
|
257
|
+
from .xlm_roberta import (
|
258
|
+
RBLNXLMRobertaForSequenceClassification,
|
259
|
+
RBLNXLMRobertaForSequenceClassificationConfig,
|
260
|
+
RBLNXLMRobertaModel,
|
261
|
+
RBLNXLMRobertaModelConfig,
|
262
|
+
)
|
221
263
|
|
222
264
|
else:
|
223
265
|
import sys
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
|
17
|
+
from .modeling_audio_spectrogram_transformer import RBLNASTForAudioClassification
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from ...configuration_generic import RBLNModelForAudioClassificationConfig
|
16
|
+
|
17
|
+
|
18
|
+
class RBLNASTForAudioClassificationConfig(RBLNModelForAudioClassificationConfig):
|
19
|
+
"""
|
20
|
+
Configuration class for RBLNASTForAudioClassification.
|
21
|
+
"""
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
2
|
+
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at:
|
6
|
+
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from ...modeling_generic import RBLNModelForAudioClassification
|
16
|
+
|
17
|
+
|
18
|
+
class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
|
19
|
+
"""
|
20
|
+
Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
|
21
|
+
This model inherits from [`RBLNModelForAudioClassification`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
22
|
+
|
23
|
+
A class to convert and run pre-trained transformer-based `ASTForAudioClassification` models on RBLN devices.
|
24
|
+
It implements the methods to convert a pre-trained transformers `ASTForAudioClassification` model into a RBLN transformer model by:
|
25
|
+
|
26
|
+
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
27
|
+
- compiling the resulting graph using the RBLN Compiler.
|
28
|
+
"""
|
@@ -11,10 +11,10 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
-
|
15
14
|
import importlib
|
16
15
|
import inspect
|
17
16
|
import warnings
|
17
|
+
from typing import Type
|
18
18
|
|
19
19
|
from transformers import AutoConfig, PretrainedConfig
|
20
20
|
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
@@ -22,7 +22,12 @@ from transformers.models.auto.auto_factory import _get_model_class
|
|
22
22
|
|
23
23
|
from optimum.rbln.configuration_utils import RBLNAutoConfig
|
24
24
|
from optimum.rbln.modeling_base import RBLNBaseModel
|
25
|
-
from optimum.rbln.utils.model_utils import
|
25
|
+
from optimum.rbln.utils.model_utils import (
|
26
|
+
MODEL_MAPPING,
|
27
|
+
convert_hf_to_rbln_model_name,
|
28
|
+
convert_rbln_to_hf_model_name,
|
29
|
+
get_rbln_model_cls,
|
30
|
+
)
|
26
31
|
|
27
32
|
|
28
33
|
class _BaseAutoModelClass:
|
@@ -58,7 +63,7 @@ class _BaseAutoModelClass:
|
|
58
63
|
hf_model_class = cls.infer_hf_model_class(pretrained_model_name_or_path, **kwargs)
|
59
64
|
rbln_class_name = convert_hf_to_rbln_model_name(hf_model_class.__name__)
|
60
65
|
else:
|
61
|
-
rbln_class_name = cls.
|
66
|
+
rbln_class_name = cls.get_rbln_model_cls_name(pretrained_model_name_or_path, **kwargs)
|
62
67
|
|
63
68
|
if convert_rbln_to_hf_model_name(rbln_class_name) not in cls._model_mapping_names.values():
|
64
69
|
raise ValueError(
|
@@ -68,8 +73,7 @@ class _BaseAutoModelClass:
|
|
68
73
|
)
|
69
74
|
|
70
75
|
try:
|
71
|
-
|
72
|
-
rbln_cls = getattr(module, rbln_class_name)
|
76
|
+
rbln_cls = get_rbln_model_cls(rbln_class_name)
|
73
77
|
except AttributeError as e:
|
74
78
|
raise AttributeError(
|
75
79
|
f"Class '{rbln_class_name}' not found in 'optimum.rbln' module for model ID '{pretrained_model_name_or_path}'. "
|
@@ -136,7 +140,7 @@ class _BaseAutoModelClass:
|
|
136
140
|
return model_class
|
137
141
|
|
138
142
|
@classmethod
|
139
|
-
def
|
143
|
+
def get_rbln_model_cls_name(cls, pretrained_model_name_or_path, **kwargs):
|
140
144
|
"""
|
141
145
|
Retrieve the path to the compiled model directory for a given RBLN model.
|
142
146
|
|
@@ -159,11 +163,30 @@ class _BaseAutoModelClass:
|
|
159
163
|
return rbln_config.rbln_model_cls_name
|
160
164
|
|
161
165
|
@classmethod
|
162
|
-
def from_pretrained(
|
163
|
-
cls,
|
164
|
-
model_id,
|
165
|
-
*args,
|
166
|
-
**kwargs,
|
167
|
-
):
|
166
|
+
def from_pretrained(cls, model_id, *args, **kwargs):
|
168
167
|
rbln_cls = cls.get_rbln_cls(model_id, *args, **kwargs)
|
169
168
|
return rbln_cls.from_pretrained(model_id, *args, **kwargs)
|
169
|
+
|
170
|
+
@classmethod
|
171
|
+
def from_model(cls, model, *args, **kwargs):
|
172
|
+
rbln_cls = get_rbln_model_cls(f"RBLN{model.__class__.__name__}")
|
173
|
+
return rbln_cls.from_model(model, *args, **kwargs)
|
174
|
+
|
175
|
+
@staticmethod
|
176
|
+
def register(rbln_cls: Type[RBLNBaseModel], exist_ok=False):
|
177
|
+
"""
|
178
|
+
Register a new RBLN model class.
|
179
|
+
|
180
|
+
Args:
|
181
|
+
rbln_cls (Type[RBLNBaseModel]): The RBLN model class to register.
|
182
|
+
exist_ok (bool): Whether to allow registering an already registered model.
|
183
|
+
"""
|
184
|
+
if not issubclass(rbln_cls, RBLNBaseModel):
|
185
|
+
raise ValueError("`rbln_cls` must be a subclass of RBLNBaseModel.")
|
186
|
+
|
187
|
+
native_cls = getattr(importlib.import_module("optimum.rbln"), rbln_cls.__name__, None)
|
188
|
+
if rbln_cls.__name__ in MODEL_MAPPING or native_cls is not None:
|
189
|
+
if not exist_ok:
|
190
|
+
raise ValueError(f"Model for {rbln_cls.__name__} already registered.")
|
191
|
+
|
192
|
+
MODEL_MAPPING[rbln_cls.__name__] = rbln_cls
|
@@ -22,6 +22,7 @@ from transformers.modeling_attn_mask_utils import (
|
|
22
22
|
from transformers.utils import logging
|
23
23
|
|
24
24
|
from ..seq2seq.seq2seq_architecture import (
|
25
|
+
Seq2SeqCrossAttention,
|
25
26
|
Seq2SeqDecoder,
|
26
27
|
Seq2SeqDecoderLayer,
|
27
28
|
Seq2SeqDecoderWrapper,
|
@@ -45,7 +46,8 @@ class BartDecoderWrapper(Seq2SeqDecoderWrapper):
|
|
45
46
|
new_layers = []
|
46
47
|
for layer in model.get_decoder().layers:
|
47
48
|
self_attn = BartSelfAttention(layer.self_attn, use_attention_mask=self.use_attention_mask)
|
48
|
-
|
49
|
+
cross_attn = BartCrossAttention(layer.encoder_attn)
|
50
|
+
new_layers.append(BartDecoderLayer(layer, self_attn, cross_attn))
|
49
51
|
|
50
52
|
decoder_model = BartDecoder(model.get_decoder(), new_layers)
|
51
53
|
new_model = BartForConditionalGeneration(model, decoder_model)
|
@@ -153,3 +155,14 @@ class BartSelfAttention(Seq2SeqSelfAttention):
|
|
153
155
|
key_states = self.k_proj(hidden_states)
|
154
156
|
value_states = self.v_proj(hidden_states)
|
155
157
|
return query_states, key_states, value_states
|
158
|
+
|
159
|
+
|
160
|
+
class BartCrossAttention(Seq2SeqCrossAttention):
|
161
|
+
def __post_init__(self):
|
162
|
+
self.q_proj = self._original_mod.q_proj
|
163
|
+
self.k_proj = self._original_mod.k_proj
|
164
|
+
self.v_proj = self._original_mod.v_proj
|
165
|
+
self.out_proj = self._original_mod.out_proj
|
166
|
+
self.num_heads = self._original_mod.num_heads
|
167
|
+
self.head_dim = self._original_mod.embed_dim // self._original_mod.num_heads
|
168
|
+
self.embed_dim = self._original_mod.embed_dim
|
@@ -251,6 +251,38 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
251
251
|
|
252
252
|
|
253
253
|
class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
254
|
+
"""
|
255
|
+
RBLNBlip2ForConditionalGeneration is a multi-modal model that integrates vision and language processing capabilities,
|
256
|
+
optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
|
257
|
+
|
258
|
+
This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
|
259
|
+
|
260
|
+
Important Note:
|
261
|
+
This model includes a Large Language Model (LLM) as a submodule. For optimal performance, it is highly recommended to use
|
262
|
+
tensor parallelism for the language model. This can be achieved by using the `rbln_config` parameter in the
|
263
|
+
`from_pretrained` method. Refer to the `from_pretrained` documentation and the RBLNBlip2ForConditionalGeneration class for details.
|
264
|
+
|
265
|
+
Examples:
|
266
|
+
```python
|
267
|
+
from optimum.rbln import RBLNBlip2ForConditionalGeneration
|
268
|
+
|
269
|
+
model = RBLNBlip2ForConditionalGeneration.from_pretrained(
|
270
|
+
"Salesforce/blip2-opt-2.7b",
|
271
|
+
export=True,
|
272
|
+
rbln_config={
|
273
|
+
"language_model": {
|
274
|
+
"batch_size": 1,
|
275
|
+
"max_seq_len": 2048,
|
276
|
+
"tensor_parallel_size": 1,
|
277
|
+
"use_inputs_embeds": True,
|
278
|
+
},
|
279
|
+
},
|
280
|
+
)
|
281
|
+
|
282
|
+
model.save_pretrained("compiled-blip2-opt-2.7b")
|
283
|
+
```
|
284
|
+
"""
|
285
|
+
|
254
286
|
auto_model_class = AutoModelForVisualQuestionAnswering
|
255
287
|
_rbln_submodules = [{"name": "vision_model"}, {"name": "qformer"}, {"name": "language_model"}]
|
256
288
|
|
@@ -275,10 +307,9 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel):
|
|
275
307
|
subfolder: str,
|
276
308
|
rbln_config: RBLNModelConfig,
|
277
309
|
):
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
"""
|
310
|
+
# If you are unavoidably running on a CPU rather than an RBLN device,
|
311
|
+
# store the torch tensor, weight, etc. in this function.
|
312
|
+
|
282
313
|
save_dict = {}
|
283
314
|
save_dict["query_tokens"] = model.query_tokens
|
284
315
|
torch.save(save_dict, save_dir_path / subfolder / "query_tokens.pth")
|
@@ -12,13 +12,13 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Optional
|
15
|
+
from typing import Any, Dict, Optional
|
16
16
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
18
18
|
|
19
19
|
|
20
20
|
class RBLNCLIPTextModelConfig(RBLNModelConfig):
|
21
|
-
def __init__(self, batch_size: Optional[int] = None, **kwargs):
|
21
|
+
def __init__(self, batch_size: Optional[int] = None, **kwargs: Dict[str, Any]):
|
22
22
|
"""
|
23
23
|
Args:
|
24
24
|
batch_size (Optional[int]): The batch size for text processing. Defaults to 1.
|
@@ -38,7 +38,7 @@ class RBLNCLIPTextModelWithProjectionConfig(RBLNCLIPTextModelConfig):
|
|
38
38
|
|
39
39
|
|
40
40
|
class RBLNCLIPVisionModelConfig(RBLNModelConfig):
|
41
|
-
def __init__(self, batch_size: Optional[int] = None, image_size: Optional[int] = None, **kwargs):
|
41
|
+
def __init__(self, batch_size: Optional[int] = None, image_size: Optional[int] = None, **kwargs: Dict[str, Any]):
|
42
42
|
"""
|
43
43
|
Args:
|
44
44
|
batch_size (Optional[int]): The batch size for image processing. Defaults to 1.
|
@@ -49,7 +49,7 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
49
49
|
|
50
50
|
@classmethod
|
51
51
|
def update_rbln_config_using_pipe(
|
52
|
-
cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig",
|
52
|
+
cls, pipe: "RBLNDiffusionMixin", rbln_config: "RBLNDiffusionMixinConfig", submodule_name: str
|
53
53
|
) -> "RBLNDiffusionMixinConfig":
|
54
54
|
return rbln_config
|
55
55
|
|
@@ -81,10 +81,9 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
81
81
|
return output
|
82
82
|
|
83
83
|
def _prepare_output(self, output, return_dict):
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
"""
|
84
|
+
# Prepare model output based on return_dict flag.
|
85
|
+
# This method can be overridden by subclasses to provide task-specific output handling.
|
86
|
+
|
88
87
|
if not return_dict:
|
89
88
|
return (output,) if not isinstance(output, (tuple, list)) else output
|
90
89
|
else:
|
@@ -161,17 +160,17 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
161
160
|
return_dict: bool = None,
|
162
161
|
**kwargs,
|
163
162
|
) -> Union[Tuple, CLIPVisionModelOutput]:
|
164
|
-
if len(kwargs) > 0 and any(kwargs.values()):
|
165
|
-
logger.warning(
|
166
|
-
|
163
|
+
if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
|
164
|
+
logger.warning(
|
165
|
+
f"Currently, optimum-rbln does not support kwargs {kwargs.keys()} for {self.__class__.__name__}."
|
166
|
+
)
|
167
167
|
output = super().forward(pixel_values, return_dict=return_dict)
|
168
168
|
return output
|
169
169
|
|
170
170
|
def _prepare_output(self, output, return_dict):
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
"""
|
171
|
+
# Prepare model output based on return_dict flag.
|
172
|
+
# This method can be overridden by subclasses to provide task-specific output handling.
|
173
|
+
|
175
174
|
if not return_dict:
|
176
175
|
return (output,) if not isinstance(output, (tuple, list)) else output
|
177
176
|
else:
|