optimum-rbln 0.9.3rc0__py3-none-any.whl → 0.9.4a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +12 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +16 -6
- optimum/rbln/diffusers/__init__.py +12 -0
- optimum/rbln/diffusers/configurations/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
- optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
- optimum/rbln/diffusers/modeling_diffusers.py +1 -1
- optimum/rbln/diffusers/models/__init__.py +17 -3
- optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +27 -8
- optimum/rbln/diffusers/models/controlnet.py +17 -2
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +16 -2
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +16 -1
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +14 -1
- optimum/rbln/diffusers/models/unets/__init__.py +1 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +18 -2
- optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
- optimum/rbln/diffusers/pipelines/__init__.py +4 -0
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -2
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +20 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -4
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -2
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
- optimum/rbln/modeling.py +20 -45
- optimum/rbln/modeling_base.py +12 -8
- optimum/rbln/transformers/configuration_generic.py +0 -27
- optimum/rbln/transformers/modeling_attention_utils.py +242 -109
- optimum/rbln/transformers/modeling_generic.py +2 -61
- optimum/rbln/transformers/modeling_outputs.py +1 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +28 -2
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +68 -5
- optimum/rbln/transformers/models/auto/auto_factory.py +1 -0
- optimum/rbln/transformers/models/bart/modeling_bart.py +23 -2
- optimum/rbln/transformers/models/bert/modeling_bert.py +86 -1
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +42 -15
- optimum/rbln/transformers/models/clip/modeling_clip.py +40 -2
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +2 -2
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +6 -45
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +0 -2
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +10 -1
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +92 -43
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +207 -64
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +17 -9
- optimum/rbln/transformers/models/decoderonly/lora_architecture.py +1 -1
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +140 -46
- optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +17 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +24 -0
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +17 -0
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +7 -1
- optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +42 -70
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +46 -31
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +1 -1
- optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +24 -9
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +3 -5
- optimum/rbln/transformers/models/llava/modeling_llava.py +37 -25
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +3 -5
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -22
- optimum/rbln/transformers/models/opt/modeling_opt.py +2 -2
- optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +1 -1
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +13 -1
- optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +2 -2
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -28
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +8 -9
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +6 -7
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +1 -1
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +0 -20
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +17 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +73 -0
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +33 -0
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +2 -4
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +36 -12
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +17 -1
- optimum/rbln/transformers/models/swin/modeling_swin.py +17 -4
- optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
- optimum/rbln/transformers/models/t5/t5_architecture.py +1 -1
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +25 -10
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +15 -3
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +60 -8
- optimum/rbln/transformers/models/whisper/generation_whisper.py +48 -14
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +53 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +9 -0
- optimum/rbln/utils/deprecation.py +213 -0
- optimum/rbln/utils/hub.py +14 -3
- optimum/rbln/utils/import_utils.py +7 -1
- optimum/rbln/utils/runtime_utils.py +32 -0
- optimum/rbln/utils/submodule.py +3 -1
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.4a2.dist-info}/METADATA +2 -2
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.4a2.dist-info}/RECORD +106 -99
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.4a2.dist-info}/WHEEL +1 -1
- optimum/rbln/utils/depreacate_utils.py +0 -16
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.4a2.dist-info}/entry_points.txt +0 -0
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.4a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,17 +12,80 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from typing import TYPE_CHECKING, Optional
|
|
16
16
|
|
|
17
|
+
import torch
|
|
18
|
+
from transformers import AutoModelForAudioClassification
|
|
19
|
+
from transformers.modeling_outputs import SequenceClassifierOutput
|
|
17
20
|
|
|
18
|
-
|
|
21
|
+
from ....configuration_utils import RBLNCompileConfig
|
|
22
|
+
from ....modeling import RBLNModel
|
|
23
|
+
from .configuration_audio_spectrogram_transformer import RBLNASTForAudioClassificationConfig
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from transformers import AutoFeatureExtractor, PretrainedConfig, PreTrainedModel
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class RBLNASTForAudioClassification(RBLNModel):
|
|
19
31
|
"""
|
|
20
32
|
Audio Spectrogram Transformer model with an audio classification head on top (a linear layer on top of the pooled output) e.g. for datasets like AudioSet, Speech Commands v2.
|
|
21
|
-
This model inherits from [
|
|
33
|
+
This model inherits from [RBLNModelForAudioClassification]. Check the superclass documentation for the generic methods the library implements for all its models.
|
|
22
34
|
|
|
23
|
-
A class to convert and run pre-trained transformer-based
|
|
24
|
-
It implements the methods to convert a pre-trained transformers
|
|
35
|
+
A class to convert and run pre-trained transformer-based ASTForAudioClassification models on RBLN devices.
|
|
36
|
+
It implements the methods to convert a pre-trained transformers ASTForAudioClassification model into a RBLN transformer model by:
|
|
25
37
|
|
|
26
38
|
- transferring the checkpoint weights of the original into an optimized RBLN graph,
|
|
27
39
|
- compiling the resulting graph using the RBLN Compiler.
|
|
28
40
|
"""
|
|
41
|
+
|
|
42
|
+
auto_model_class = AutoModelForAudioClassification
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def _update_rbln_config(
|
|
46
|
+
cls,
|
|
47
|
+
preprocessors: "AutoFeatureExtractor" = None,
|
|
48
|
+
model: Optional["PreTrainedModel"] = None,
|
|
49
|
+
model_config: "PretrainedConfig" = None,
|
|
50
|
+
rbln_config: Optional[RBLNASTForAudioClassificationConfig] = None,
|
|
51
|
+
) -> RBLNASTForAudioClassificationConfig:
|
|
52
|
+
num_mel_bins = getattr(model_config, "num_mel_bins", None)
|
|
53
|
+
|
|
54
|
+
if rbln_config.max_length is None:
|
|
55
|
+
rbln_config.max_length = getattr(model_config, "max_length", None)
|
|
56
|
+
for feature_extractor in preprocessors:
|
|
57
|
+
if hasattr(feature_extractor, "max_length"):
|
|
58
|
+
rbln_config.max_length = feature_extractor.max_length
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
if rbln_config.max_length is None:
|
|
62
|
+
raise ValueError("max_length should be specified!")
|
|
63
|
+
|
|
64
|
+
input_info = [
|
|
65
|
+
(
|
|
66
|
+
"input_values",
|
|
67
|
+
[rbln_config.batch_size, rbln_config.max_length, num_mel_bins],
|
|
68
|
+
"float32",
|
|
69
|
+
),
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
rbln_config.set_compile_cfgs([RBLNCompileConfig(input_info=input_info)])
|
|
73
|
+
return rbln_config
|
|
74
|
+
|
|
75
|
+
def forward(self, input_values: torch.Tensor, **kwargs) -> SequenceClassifierOutput:
|
|
76
|
+
"""
|
|
77
|
+
Forward pass for the RBLN-optimized Audio Spectrogram Transformer model for audio classification.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
input_values (torch.FloatTensor of shape (batch_size, max_length, num_mel_bins)):
|
|
81
|
+
Float values mel features extracted from the raw audio waveform. Raw audio waveform can be obtained by
|
|
82
|
+
loading a .flac or .wav audio file into an array of type list[float], a numpy.ndarray or a torch.Tensor, *e.g.* via
|
|
83
|
+
the torchcodec library (pip install torchcodec) or the soundfile library (pip install soundfile).
|
|
84
|
+
To prepare the array into input_features, the [AutoFeatureExtractor] should be used for extracting the
|
|
85
|
+
mel features, padding and conversion into a tensor of type torch.FloatTensor.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Returns a SequenceClassifierOutput object.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
return super().forward(input_values, **kwargs)
|
|
@@ -150,6 +150,7 @@ class _BaseAutoModelClass:
|
|
|
150
150
|
f"from the checkpoint, leading to potential unintended behavior. If this is not intentional, consider calling the "
|
|
151
151
|
f"`from_pretrained()` method directly from the `RBLN{config.architectures[0]}` class instead.",
|
|
152
152
|
UserWarning,
|
|
153
|
+
stacklevel=2,
|
|
153
154
|
)
|
|
154
155
|
|
|
155
156
|
return model_class
|
|
@@ -13,9 +13,11 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
|
-
from typing import Any, Callable
|
|
16
|
+
from typing import Any, Callable, Optional, Tuple, Union
|
|
17
17
|
|
|
18
|
+
import torch
|
|
18
19
|
from transformers import BartForConditionalGeneration, PreTrainedModel
|
|
20
|
+
from transformers.modeling_outputs import Seq2SeqModelOutput
|
|
19
21
|
|
|
20
22
|
from ....utils.logging import get_logger
|
|
21
23
|
from ...modeling_generic import RBLNTransformerEncoderForFeatureExtraction
|
|
@@ -35,6 +37,25 @@ class RBLNBartModel(RBLNTransformerEncoderForFeatureExtraction):
|
|
|
35
37
|
on RBLN devices, optimized for feature extraction use cases.
|
|
36
38
|
"""
|
|
37
39
|
|
|
40
|
+
def forward(
|
|
41
|
+
self,
|
|
42
|
+
input_ids: Optional[torch.Tensor] = None,
|
|
43
|
+
attention_mask: Optional[torch.Tensor] = None,
|
|
44
|
+
**kwargs,
|
|
45
|
+
) -> Union[Tuple, Seq2SeqModelOutput]:
|
|
46
|
+
"""
|
|
47
|
+
Forward pass for the RBLN-optimized BART model for feature extraction tasks.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
|
|
51
|
+
attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a Seq2SeqModelOutput object.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
return super().forward(input_ids, attention_mask, **kwargs)
|
|
58
|
+
|
|
38
59
|
|
|
39
60
|
class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
|
|
40
61
|
"""
|
|
@@ -48,7 +69,7 @@ class RBLNBartForConditionalGeneration(RBLNModelForSeq2SeqLM):
|
|
|
48
69
|
support_causal_attn = True
|
|
49
70
|
|
|
50
71
|
@classmethod
|
|
51
|
-
def
|
|
72
|
+
def _wrap_model_if_needed(self, model: PreTrainedModel, rbln_config: RBLNBartForConditionalGenerationConfig):
|
|
52
73
|
return BartWrapper(
|
|
53
74
|
model, enc_max_seq_len=rbln_config.enc_max_seq_len, use_attention_mask=rbln_config.use_attention_mask
|
|
54
75
|
)
|
|
@@ -12,7 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
from typing import Optional, Tuple, Union
|
|
16
|
+
|
|
15
17
|
import torch
|
|
18
|
+
from transformers.modeling_outputs import (
|
|
19
|
+
BaseModelOutputWithPoolingAndCrossAttentions,
|
|
20
|
+
MaskedLMOutput,
|
|
21
|
+
QuestionAnsweringModelOutput,
|
|
22
|
+
)
|
|
16
23
|
|
|
17
24
|
from ...modeling_generic import (
|
|
18
25
|
RBLNModelForMaskedLM,
|
|
@@ -35,9 +42,45 @@ class RBLNBertModel(RBLNTransformerEncoderForFeatureExtraction):
|
|
|
35
42
|
rbln_model_input_names = ["input_ids", "attention_mask"]
|
|
36
43
|
|
|
37
44
|
@classmethod
|
|
38
|
-
def
|
|
45
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNBertModelConfig) -> torch.nn.Module:
|
|
39
46
|
return BertModelWrapper(model, rbln_config)
|
|
40
47
|
|
|
48
|
+
def forward(
|
|
49
|
+
self,
|
|
50
|
+
input_ids: Optional[torch.Tensor] = None,
|
|
51
|
+
attention_mask: Optional[torch.Tensor] = None,
|
|
52
|
+
token_type_ids: Optional[torch.Tensor] = None,
|
|
53
|
+
position_ids: Optional[torch.Tensor] = None,
|
|
54
|
+
**kwargs,
|
|
55
|
+
) -> Union[BaseModelOutputWithPoolingAndCrossAttentions, Tuple]:
|
|
56
|
+
"""
|
|
57
|
+
Forward pass for the RBLN-optimized BERT model for feature extraction tasks.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
|
|
61
|
+
attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
|
|
62
|
+
token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
|
|
63
|
+
position_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of positions of each input sequence tokens in the position embeddings.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPoolingAndCrossAttentions object.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
input_map = {
|
|
70
|
+
"input_ids": input_ids,
|
|
71
|
+
"attention_mask": attention_mask,
|
|
72
|
+
"token_type_ids": token_type_ids,
|
|
73
|
+
"position_ids": position_ids,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
model_input_names = getattr(self.rbln_config, "model_input_names", None)
|
|
77
|
+
if model_input_names is None:
|
|
78
|
+
model_input_names = self.rbln_model_input_names
|
|
79
|
+
|
|
80
|
+
ordered_inputs = [input_map[name] for name in model_input_names if name in input_map]
|
|
81
|
+
|
|
82
|
+
return super().forward(*ordered_inputs, **kwargs)
|
|
83
|
+
|
|
41
84
|
|
|
42
85
|
class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
|
|
43
86
|
"""
|
|
@@ -50,6 +93,27 @@ class RBLNBertForMaskedLM(RBLNModelForMaskedLM):
|
|
|
50
93
|
|
|
51
94
|
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
|
52
95
|
|
|
96
|
+
def forward(
|
|
97
|
+
self,
|
|
98
|
+
input_ids: Optional[torch.Tensor] = None,
|
|
99
|
+
attention_mask: Optional[torch.Tensor] = None,
|
|
100
|
+
token_type_ids: Optional[torch.Tensor] = None,
|
|
101
|
+
**kwargs,
|
|
102
|
+
) -> Union[MaskedLMOutput, Tuple]:
|
|
103
|
+
"""
|
|
104
|
+
Forward pass for the RBLN-optimized BERT model for masked language modeling tasks.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
|
|
108
|
+
attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
|
|
109
|
+
token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a MaskedLMOutput object.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)
|
|
116
|
+
|
|
53
117
|
|
|
54
118
|
class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
|
|
55
119
|
"""
|
|
@@ -61,3 +125,24 @@ class RBLNBertForQuestionAnswering(RBLNModelForQuestionAnswering):
|
|
|
61
125
|
"""
|
|
62
126
|
|
|
63
127
|
rbln_model_input_names = ["input_ids", "attention_mask", "token_type_ids"]
|
|
128
|
+
|
|
129
|
+
def forward(
|
|
130
|
+
self,
|
|
131
|
+
input_ids: Optional[torch.Tensor] = None,
|
|
132
|
+
attention_mask: Optional[torch.Tensor] = None,
|
|
133
|
+
token_type_ids: Optional[torch.Tensor] = None,
|
|
134
|
+
**kwargs,
|
|
135
|
+
) -> Union[QuestionAnsweringModelOutput, Tuple]:
|
|
136
|
+
"""
|
|
137
|
+
Forward pass for the RBLN-optimized BERT model for question answering tasks.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
input_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Indices of input sequence tokens in the vocabulary.
|
|
141
|
+
attention_mask (torch.Tensor of shape (batch_size, sequence_length), optional): Mask to avoid performing attention on padding token indices.
|
|
142
|
+
token_type_ids (torch.Tensor of shape (batch_size, sequence_length), optional): Segment token indices to indicate first and second portions of the inputs.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a QuestionAnsweringModelOutput object.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
return super().forward(input_ids, attention_mask, token_type_ids, **kwargs)
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import inspect
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from typing import TYPE_CHECKING, Any, Callable, Optional, Tuple, Union
|
|
17
|
+
from typing import TYPE_CHECKING, Any, Callable, List, Optional, Tuple, Union
|
|
18
18
|
|
|
19
19
|
import torch
|
|
20
20
|
from transformers import (
|
|
@@ -71,7 +71,7 @@ class RBLNBlip2VisionModel(RBLNModel):
|
|
|
71
71
|
return self.embeddings
|
|
72
72
|
|
|
73
73
|
@classmethod
|
|
74
|
-
def
|
|
74
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
|
|
75
75
|
class Blip2VisionModelWrapper(torch.nn.Module):
|
|
76
76
|
def __init__(self, model: "Blip2VisionModel") -> None:
|
|
77
77
|
super().__init__()
|
|
@@ -111,11 +111,20 @@ class RBLNBlip2VisionModel(RBLNModel):
|
|
|
111
111
|
def forward(
|
|
112
112
|
self,
|
|
113
113
|
pixel_values: torch.FloatTensor,
|
|
114
|
-
output_attentions: Optional[bool] = None,
|
|
115
|
-
output_hidden_states: Optional[bool] = None,
|
|
116
|
-
return_dict: Optional[bool] = None,
|
|
117
114
|
interpolate_pos_encoding: bool = False,
|
|
115
|
+
return_dict: Optional[bool] = None,
|
|
118
116
|
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
|
117
|
+
"""
|
|
118
|
+
Forward pass for the RBLN-optimized Blip2VisionModel model.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
pixel_values (torch.FloatTensor of shape (batch_size, num_channels, height, width)): The tensors corresponding to the input images.
|
|
122
|
+
interpolate_pos_encoding (bool, optional): Whether to interpolate the positional encoding of the image embeddings. Defaults to False.
|
|
123
|
+
return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
BaseModelOutputWithPooling or tuple(torch.FloatTensor): The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
|
|
127
|
+
"""
|
|
119
128
|
batch_size = pixel_values.shape[0]
|
|
120
129
|
outputs = []
|
|
121
130
|
for i in range(batch_size):
|
|
@@ -151,7 +160,7 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
|
151
160
|
return self.embeddings.word_embeddings
|
|
152
161
|
|
|
153
162
|
@classmethod
|
|
154
|
-
def
|
|
163
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNModelConfig) -> torch.nn.Module:
|
|
155
164
|
class Blip2QFormerModelWrapper(torch.nn.Module):
|
|
156
165
|
def __init__(self, model: "Blip2QFormerModel"):
|
|
157
166
|
super().__init__()
|
|
@@ -231,17 +240,22 @@ class RBLNBlip2QFormerModel(RBLNModel):
|
|
|
231
240
|
def forward(
|
|
232
241
|
self,
|
|
233
242
|
query_embeds: torch.FloatTensor,
|
|
234
|
-
query_length: Optional[int] = None,
|
|
235
|
-
attention_mask: Optional[torch.FloatTensor] = None,
|
|
236
|
-
head_mask: Optional[torch.FloatTensor] = None,
|
|
237
243
|
encoder_hidden_states: Optional[torch.FloatTensor] = None,
|
|
238
244
|
encoder_attention_mask: Optional[torch.FloatTensor] = None,
|
|
239
|
-
past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
|
|
240
|
-
use_cache: Optional[bool] = None,
|
|
241
|
-
output_attentions: Optional[bool] = None,
|
|
242
|
-
output_hidden_states: Optional[bool] = None,
|
|
243
245
|
return_dict: Optional[bool] = None,
|
|
244
246
|
) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
|
|
247
|
+
"""
|
|
248
|
+
The forward pass for the RBLN-optimized Blip2QFormerModel model.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
query_embeds (torch.FloatTensor): Hidden states to be used in the attention computation.
|
|
252
|
+
encoder_hidden_states (torch.FloatTensor, optional): Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if the model is configured as a decoder.
|
|
253
|
+
encoder_attention_mask (torch.FloatTensor, optional): Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in the cross-attention if the model is configured as a decoder.
|
|
254
|
+
return_dict (bool, optional): Whether to return a ModelOutput instead of a plain tuple.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
BaseModelOutputWithPoolingAndCrossAttentions or tuple(torch.FloatTensor): The model outputs. If `return_dict=False` is passed, returns a tuple of tensors. Otherwise, returns a `BaseModelOutputWithPoolingAndCrossAttentions` object.
|
|
258
|
+
"""
|
|
245
259
|
batch_size = query_embeds.shape[0]
|
|
246
260
|
outputs = []
|
|
247
261
|
for i in range(batch_size):
|
|
@@ -349,7 +363,7 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
|
|
|
349
363
|
return self.language_model.get_input_embeddings()
|
|
350
364
|
|
|
351
365
|
@classmethod
|
|
352
|
-
def
|
|
366
|
+
def _wrap_model_if_needed(cls, model, rbln_config):
|
|
353
367
|
return model.language_projection
|
|
354
368
|
|
|
355
369
|
@classmethod
|
|
@@ -444,7 +458,20 @@ class RBLNBlip2ForConditionalGeneration(RBLNModel, RBLNDecoderOnlyGenerationMixi
|
|
|
444
458
|
inputs_embeds: Optional[torch.FloatTensor] = None,
|
|
445
459
|
interpolate_pos_encoding: bool = False,
|
|
446
460
|
**generate_kwargs,
|
|
447
|
-
) -> torch.LongTensor:
|
|
461
|
+
) -> List[torch.LongTensor]:
|
|
462
|
+
"""
|
|
463
|
+
The generate function is utilized in its standard form as in the HuggingFace transformers library. User can use this function to generate text from the model.
|
|
464
|
+
Check the [HuggingFace transformers documentation](https://huggingface.co/docs/transformers/v4.57.1/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.generate) for more details.
|
|
465
|
+
|
|
466
|
+
Args:
|
|
467
|
+
pixel_values (torch.FloatTensor): Input images to be processed.
|
|
468
|
+
input_ids (torch.LongTensor, optional): The sequence used as a prompt for the generation.
|
|
469
|
+
attention_mask (torch.LongTensor, optional): Mask to avoid performing attention on padding token indices
|
|
470
|
+
inputs_embeds (torch.FloatTensor, optional): Embedded representation of the inputs. Should be float, not int tokens.
|
|
471
|
+
interpolate_pos_encoding (bool, optional, defaults to False) — Whether to interpolate the positional encoding of the image embeddings.
|
|
472
|
+
Returns:
|
|
473
|
+
A list of strings of length batch_size * num_captions.
|
|
474
|
+
"""
|
|
448
475
|
batch_size = pixel_values.shape[0]
|
|
449
476
|
image_embeds = self.vision_model(
|
|
450
477
|
pixel_values,
|
|
@@ -54,7 +54,7 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
|
54
54
|
_tp_support = False
|
|
55
55
|
|
|
56
56
|
@classmethod
|
|
57
|
-
def
|
|
57
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPTextModelConfig) -> torch.nn.Module:
|
|
58
58
|
return _TextEncoder(model).eval()
|
|
59
59
|
|
|
60
60
|
@classmethod
|
|
@@ -92,6 +92,9 @@ class RBLNCLIPTextModel(RBLNModel):
|
|
|
92
92
|
Args:
|
|
93
93
|
input_ids (torch.LongTensor): The input ids to the model.
|
|
94
94
|
return_dict (Optional[bool]): Whether to return a dictionary of outputs.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPTextModelOutput object.
|
|
95
98
|
"""
|
|
96
99
|
|
|
97
100
|
# To ignore using attention_mask, we override forward method.
|
|
@@ -157,7 +160,7 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
157
160
|
_tp_support = False
|
|
158
161
|
|
|
159
162
|
@classmethod
|
|
160
|
-
def
|
|
163
|
+
def _wrap_model_if_needed(cls, model: torch.nn.Module, rbln_config: RBLNCLIPVisionModelConfig) -> torch.nn.Module:
|
|
161
164
|
wrapper_cfg = {
|
|
162
165
|
"interpolate_pos_encoding": rbln_config.interpolate_pos_encoding,
|
|
163
166
|
"output_hidden_states": rbln_config.output_hidden_states,
|
|
@@ -230,6 +233,9 @@ class RBLNCLIPVisionModel(RBLNModel):
|
|
|
230
233
|
output_attentions (Optional[bool]): Whether to return attentions.
|
|
231
234
|
output_hidden_states (Optional[bool]): Whether to return hidden states.
|
|
232
235
|
interpolate_pos_encoding (bool): Whether to interpolate position encoding.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutputWithPooling object.
|
|
233
239
|
"""
|
|
234
240
|
|
|
235
241
|
if len(kwargs) > 0 and any(value is not None for value in kwargs.values()):
|
|
@@ -307,6 +313,38 @@ class RBLNCLIPVisionModelWithProjection(RBLNCLIPVisionModel):
|
|
|
307
313
|
multimodal embedding alignment tasks.
|
|
308
314
|
"""
|
|
309
315
|
|
|
316
|
+
def forward(
|
|
317
|
+
self,
|
|
318
|
+
pixel_values: torch.FloatTensor,
|
|
319
|
+
return_dict: bool = True,
|
|
320
|
+
output_attentions: Optional[bool] = None,
|
|
321
|
+
output_hidden_states: Optional[bool] = None,
|
|
322
|
+
interpolate_pos_encoding: bool = False,
|
|
323
|
+
**kwargs,
|
|
324
|
+
) -> Union[Tuple, CLIPVisionModelOutput]:
|
|
325
|
+
"""
|
|
326
|
+
Forward pass for the RBLN-optimized CLIP vision encoder model with projection.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
pixel_values (torch.Tensor): The pixel values to the model.
|
|
330
|
+
return_dict (bool): Whether to return a dictionary of outputs.
|
|
331
|
+
output_attentions (Optional[bool]): Whether to return attentions.
|
|
332
|
+
output_hidden_states (Optional[bool]): Whether to return hidden states.
|
|
333
|
+
interpolate_pos_encoding (bool): Whether to interpolate position encoding.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a CLIPVisionModelOutput object.
|
|
337
|
+
"""
|
|
338
|
+
|
|
339
|
+
return super().forward(
|
|
340
|
+
pixel_values=pixel_values,
|
|
341
|
+
return_dict=return_dict,
|
|
342
|
+
output_attentions=output_attentions,
|
|
343
|
+
output_hidden_states=output_hidden_states,
|
|
344
|
+
interpolate_pos_encoding=interpolate_pos_encoding,
|
|
345
|
+
**kwargs,
|
|
346
|
+
)
|
|
347
|
+
|
|
310
348
|
def _prepare_output(self, output, return_dict):
|
|
311
349
|
# Prepare model output based on return_dict flag.
|
|
312
350
|
# This method can be overridden by subclasses to provide task-specific output handling.
|
|
@@ -156,8 +156,8 @@ class ColPaliAttention(nn.Module):
|
|
|
156
156
|
def __init__(self, self_attn):
|
|
157
157
|
super().__init__()
|
|
158
158
|
self._original_mod = self_attn
|
|
159
|
-
self.num_heads =
|
|
160
|
-
self._original_mod
|
|
159
|
+
self.num_heads = (
|
|
160
|
+
getattr(self._original_mod, "num_heads", None) or self._original_mod.config.num_attention_heads
|
|
161
161
|
)
|
|
162
162
|
self.head_dim = self._original_mod.head_dim
|
|
163
163
|
self.scaling = self.head_dim**-0.5
|
|
@@ -14,8 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import bisect
|
|
16
16
|
from pathlib import Path
|
|
17
|
-
from
|
|
18
|
-
from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
|
|
17
|
+
from typing import TYPE_CHECKING, Optional, Tuple, Union
|
|
19
18
|
|
|
20
19
|
import torch
|
|
21
20
|
from transformers import PretrainedConfig, PreTrainedModel
|
|
@@ -182,7 +181,7 @@ class RBLNColPaliForRetrieval(RBLNModel):
|
|
|
182
181
|
return multi_modal_projector
|
|
183
182
|
|
|
184
183
|
@classmethod
|
|
185
|
-
def
|
|
184
|
+
def _wrap_model_if_needed(cls, model: "PreTrainedModel", rbln_config: RBLNModelConfig):
|
|
186
185
|
return RBLNColPaliForRetrievalWrapper(
|
|
187
186
|
causal_lm=model.vlm,
|
|
188
187
|
embedding_proj_layer=model.embedding_proj_layer,
|
|
@@ -236,49 +235,11 @@ class RBLNColPaliForRetrieval(RBLNModel):
|
|
|
236
235
|
return rbln_config
|
|
237
236
|
|
|
238
237
|
@classmethod
|
|
239
|
-
def
|
|
240
|
-
|
|
241
|
-
model: "PreTrainedModel",
|
|
242
|
-
config: Optional[PretrainedConfig] = None,
|
|
243
|
-
rbln_config: Optional[Union[RBLNModelConfig, Dict]] = None,
|
|
244
|
-
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
|
|
245
|
-
subfolder: str = "",
|
|
246
|
-
**kwargs: Any,
|
|
247
|
-
) -> "RBLNModel":
|
|
248
|
-
"""
|
|
249
|
-
Converts and compiles a pre-trained HuggingFace library model into a RBLN model.
|
|
250
|
-
This method performs the actual model conversion and compilation process.
|
|
251
|
-
|
|
252
|
-
Args:
|
|
253
|
-
model (PreTrainedModel): The PyTorch model to be compiled.
|
|
254
|
-
The object must be an instance of the HuggingFace transformers PreTrainedModel class.
|
|
255
|
-
config (Optional[PretrainedConfig]): The configuration object associated with the model.
|
|
256
|
-
rbln_config (Optional[Union[RBLNModelConfig, Dict]]): Configuration for RBLN model compilation and runtime.
|
|
257
|
-
This can be provided as a dictionary or an instance of the model's configuration class (e.g., `RBLNLlamaForCausalLMConfig` for Llama models).
|
|
258
|
-
For detailed configuration options, see the specific model's configuration class documentation.
|
|
259
|
-
kwargs: Additional keyword arguments. Arguments with the prefix `rbln_` are passed to rbln_config, while the remaining arguments are passed to the HuggingFace library.
|
|
260
|
-
|
|
261
|
-
The method performs the following steps:
|
|
262
|
-
|
|
263
|
-
1. Compiles the PyTorch model into an optimized RBLN graph
|
|
264
|
-
2. Configures the model for the specified NPU device
|
|
265
|
-
3. Creates the necessary runtime objects if requested
|
|
266
|
-
4. Saves the compiled model and configurations
|
|
267
|
-
|
|
268
|
-
Returns:
|
|
269
|
-
(RBLNModel): A RBLN model instance ready for inference on RBLN NPU devices.
|
|
270
|
-
"""
|
|
271
|
-
if not hasattr(model, "vision_tower"):
|
|
238
|
+
def _reconstruct_model_if_needed(cls, model: "PreTrainedModel"):
|
|
239
|
+
if hasattr(model, "vlm"):
|
|
272
240
|
model.vision_tower = model.vlm.vision_tower
|
|
273
241
|
del model.vlm.model.vision_tower
|
|
274
|
-
|
|
275
|
-
return model
|
|
276
|
-
|
|
277
|
-
@classmethod
|
|
278
|
-
def get_pytorch_model(cls, *args, **kwargs):
|
|
279
|
-
model = super().get_pytorch_model(*args, **kwargs)
|
|
280
|
-
model.vision_tower = model.vlm.vision_tower
|
|
281
|
-
del model.vlm.model.vision_tower
|
|
242
|
+
return model
|
|
282
243
|
return model
|
|
283
244
|
|
|
284
245
|
def get_image_features(self, pixel_values: torch.Tensor):
|
|
@@ -371,7 +332,7 @@ class RBLNColPaliForRetrieval(RBLNModel):
|
|
|
371
332
|
]
|
|
372
333
|
outputs.append(torch.empty(size=language_model_out_size, dtype=torch.float32, device="cpu"))
|
|
373
334
|
if self.rbln_config.output_hidden_states:
|
|
374
|
-
for
|
|
335
|
+
for _ in range(self.config.vlm_config.text_config.num_hidden_layers + 1):
|
|
375
336
|
outputs.append(torch.empty(size=language_model_hidden_states_size, dtype=torch.float32, device="cpu"))
|
|
376
337
|
|
|
377
338
|
# Embedding_proj_layer is fused on the bottom of the language model.
|
|
@@ -58,7 +58,6 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
|
|
|
58
58
|
visual: Optional[RBLNModelConfig] = None,
|
|
59
59
|
batch_size: Optional[int] = None,
|
|
60
60
|
use_inputs_embeds: bool = True,
|
|
61
|
-
output_hidden_states: Optional[bool] = False,
|
|
62
61
|
**kwargs,
|
|
63
62
|
):
|
|
64
63
|
super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
|
|
@@ -71,4 +70,3 @@ class RBLNColQwen2ForRetrievalConfig(RBLNDecoderOnlyModelConfig):
|
|
|
71
70
|
raise ValueError("batch_size is not supported for RBLNColQwen2ForRetrievalConfig")
|
|
72
71
|
|
|
73
72
|
self.visual = visual
|
|
74
|
-
self.output_hidden_states = output_hidden_states
|
|
@@ -58,6 +58,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
|
|
|
58
58
|
sliding_window_layers: Optional[List[int]] = None,
|
|
59
59
|
phases: Optional[List[PhaseType]] = None,
|
|
60
60
|
logits_to_keep: Optional[int] = None,
|
|
61
|
+
output_hidden_states: Optional[bool] = None,
|
|
61
62
|
**kwargs,
|
|
62
63
|
):
|
|
63
64
|
"""
|
|
@@ -112,6 +113,7 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
|
|
|
112
113
|
["prefill", "decode"] if DecoderOnlyModelForCausalLM is used.
|
|
113
114
|
logits_to_keep (Optional[int]): The number of logits to keep for the decoder. If set to 0, the decoder will keep all logits.
|
|
114
115
|
Defaults to 0 if DecoderOnlyModel is used, 1 if DecoderOnlyModelForCausalLM is used.
|
|
116
|
+
output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
|
|
115
117
|
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
116
118
|
|
|
117
119
|
Raises:
|
|
@@ -232,6 +234,8 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
|
|
|
232
234
|
if self.logits_to_keep is not None and self.logits_to_keep > 1:
|
|
233
235
|
raise NotImplementedError("`logits_to_keep` > 1 is currently not supported for RBLN models.")
|
|
234
236
|
|
|
237
|
+
self.output_hidden_states = output_hidden_states or False
|
|
238
|
+
|
|
235
239
|
self.decoder_batch_sizes = None
|
|
236
240
|
if "decode" in self.phases:
|
|
237
241
|
self.decoder_batch_sizes = decoder_batch_sizes
|
|
@@ -274,13 +278,18 @@ class RBLNDecoderOnlyModelConfig(RBLNModelConfig):
|
|
|
274
278
|
|
|
275
279
|
@property
|
|
276
280
|
def use_lora(self):
|
|
277
|
-
"""Check if LoRA is enabled for this configuration."""
|
|
278
281
|
return self.lora_config is not None
|
|
279
282
|
|
|
280
283
|
@property
|
|
281
284
|
def can_generate(self) -> bool:
|
|
282
285
|
return "decode" in self.phases
|
|
283
286
|
|
|
287
|
+
@property
|
|
288
|
+
def nbits_per_param(self) -> int:
|
|
289
|
+
if self.quantization:
|
|
290
|
+
return self.quantization.nbits_per_param
|
|
291
|
+
return 16
|
|
292
|
+
|
|
284
293
|
|
|
285
294
|
class RBLNDecoderOnlyModelForCausalLMConfig(RBLNDecoderOnlyModelConfig):
|
|
286
295
|
"""
|
|
@@ -183,7 +183,7 @@ class RBLNLoRAAdapterConfig(RBLNSerializableConfigProtocol):
|
|
|
183
183
|
f"Failed to download LoRA adapter '{path.as_posix()}' from HuggingFace Hub. "
|
|
184
184
|
f"Please check if the model ID is correct or provide a valid local path. "
|
|
185
185
|
f"Error: {e}"
|
|
186
|
-
)
|
|
186
|
+
) from e
|
|
187
187
|
|
|
188
188
|
def _load_adapter_config(self) -> Dict[str, Any]:
|
|
189
189
|
"""
|