optimum-rbln 0.9.3rc0__py3-none-any.whl → 0.9.5a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +48 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +50 -21
- optimum/rbln/diffusers/__init__.py +12 -0
- optimum/rbln/diffusers/configurations/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
- optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py +67 -0
- optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py +59 -0
- optimum/rbln/diffusers/configurations/pipelines/__init__.py +3 -0
- optimum/rbln/diffusers/configurations/pipelines/configuration_stable_video_diffusion.py +114 -0
- optimum/rbln/diffusers/modeling_diffusers.py +1 -1
- optimum/rbln/diffusers/models/__init__.py +17 -3
- optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +3 -3
- optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +275 -0
- optimum/rbln/diffusers/models/autoencoders/vae.py +27 -8
- optimum/rbln/diffusers/models/controlnet.py +17 -2
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +16 -2
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +16 -1
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +14 -1
- optimum/rbln/diffusers/models/unets/__init__.py +1 -0
- optimum/rbln/diffusers/models/unets/unet_2d_condition.py +18 -2
- optimum/rbln/diffusers/models/unets/unet_spatio_temporal_condition.py +201 -0
- optimum/rbln/diffusers/pipelines/__init__.py +4 -0
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -2
- optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +20 -0
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +13 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -4
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
- optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -2
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/__init__.py +15 -0
- optimum/rbln/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +46 -0
- optimum/rbln/modeling.py +20 -45
- optimum/rbln/modeling_base.py +18 -14
- optimum/rbln/ops/__init__.py +1 -0
- optimum/rbln/ops/attn.py +10 -0
- optimum/rbln/ops/flash_attn.py +8 -0
- optimum/rbln/ops/moe.py +180 -0
- optimum/rbln/ops/sliding_window_attn.py +9 -0
- optimum/rbln/transformers/__init__.py +36 -0
- optimum/rbln/transformers/configuration_generic.py +0 -27
- optimum/rbln/transformers/modeling_attention_utils.py +156 -127
- optimum/rbln/transformers/modeling_generic.py +2 -61
- optimum/rbln/transformers/modeling_outputs.py +26 -0
- optimum/rbln/transformers/modeling_rope_utils.py +78 -42
- optimum/rbln/transformers/models/__init__.py +28 -0
- optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +28 -2
- optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +68 -5
- optimum/rbln/transformers/models/auto/auto_factory.py +1 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
- optimum/rbln/transformers/models/bart/modeling_bart.py +23 -2
- optimum/rbln/transformers/models/bert/modeling_bert.py +86 -1
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +42 -15
- optimum/rbln/transformers/models/clip/modeling_clip.py +40 -2
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -221
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +38 -23
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
- optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +128 -17
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +2 -2
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +211 -89
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +205 -64
- optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +17 -9
- optimum/rbln/transformers/models/decoderonly/lora_architecture.py +1 -1
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +194 -132
- optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +17 -0
- optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +24 -0
- optimum/rbln/transformers/models/dpt/modeling_dpt.py +17 -0
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
- optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
- optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
- optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +23 -19
- optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +42 -70
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +46 -31
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
- optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
- optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +41 -0
- optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
- optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +165 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +7 -5
- optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +24 -9
- optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +3 -5
- optimum/rbln/transformers/models/llava/modeling_llava.py +37 -26
- optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +3 -5
- optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
- optimum/rbln/transformers/models/mistral/modeling_mistral.py +0 -22
- optimum/rbln/transformers/models/opt/modeling_opt.py +2 -2
- optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
- optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
- optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
- optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
- optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +1 -1
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
- optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +13 -1
- optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +2 -2
- optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +0 -28
- optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +11 -1
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +278 -130
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
- optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
- optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
- optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +11 -1
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +268 -111
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +27 -35
- optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +0 -20
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
- optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
- optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
- optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +17 -0
- optimum/rbln/transformers/models/resnet/modeling_resnet.py +73 -0
- optimum/rbln/transformers/models/roberta/modeling_roberta.py +33 -0
- optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +2 -4
- optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +36 -12
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +21 -19
- optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
- optimum/rbln/transformers/models/swin/modeling_swin.py +17 -4
- optimum/rbln/transformers/models/t5/modeling_t5.py +2 -2
- optimum/rbln/transformers/models/t5/t5_architecture.py +16 -17
- optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +25 -10
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
- optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
- optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +15 -3
- optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +60 -8
- optimum/rbln/transformers/models/whisper/generation_whisper.py +48 -14
- optimum/rbln/transformers/models/whisper/modeling_whisper.py +2 -2
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
- optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +53 -0
- optimum/rbln/transformers/utils/rbln_quantization.py +29 -12
- optimum/rbln/utils/deprecation.py +213 -0
- optimum/rbln/utils/hub.py +14 -3
- optimum/rbln/utils/import_utils.py +23 -2
- optimum/rbln/utils/runtime_utils.py +42 -6
- optimum/rbln/utils/submodule.py +27 -1
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.5a4.dist-info}/METADATA +6 -6
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.5a4.dist-info}/RECORD +155 -129
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.5a4.dist-info}/WHEEL +1 -1
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
- optimum/rbln/utils/depreacate_utils.py +0 -16
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.5a4.dist-info}/entry_points.txt +0 -0
- {optimum_rbln-0.9.3rc0.dist-info → optimum_rbln-0.9.5a4.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,7 +20,6 @@ from transformers import PhiForCausalLM
|
|
|
20
20
|
from ..decoderonly.decoderonly_architecture import (
|
|
21
21
|
DecoderOnlyAttention,
|
|
22
22
|
DecoderOnlyLayer,
|
|
23
|
-
DecoderOnlyModel,
|
|
24
23
|
DecoderOnlyWrapper,
|
|
25
24
|
apply_rotary_pos_emb_partial,
|
|
26
25
|
)
|
|
@@ -37,9 +36,6 @@ class PhiWrapper(DecoderOnlyWrapper):
|
|
|
37
36
|
def get_rbln_layer_class(self):
|
|
38
37
|
return PhiLayer
|
|
39
38
|
|
|
40
|
-
def get_rbln_model_class(self):
|
|
41
|
-
return PhiModel
|
|
42
|
-
|
|
43
39
|
def get_model_layer(self, model: Union["PhiForCausalLM", "PhiModel"]):
|
|
44
40
|
return model.model if self.is_causal_lm else model
|
|
45
41
|
|
|
@@ -48,13 +44,15 @@ class PhiWrapper(DecoderOnlyWrapper):
|
|
|
48
44
|
|
|
49
45
|
|
|
50
46
|
class PhiAttention(DecoderOnlyAttention):
|
|
51
|
-
def __post_init__(self):
|
|
52
|
-
self.q_proj =
|
|
53
|
-
self.k_proj =
|
|
54
|
-
self.v_proj =
|
|
55
|
-
self.o_proj =
|
|
56
|
-
self.qk_layernorm =
|
|
57
|
-
self.rotary_ndims =
|
|
47
|
+
def __post_init__(self, self_attn):
|
|
48
|
+
self.q_proj = self_attn.q_proj
|
|
49
|
+
self.k_proj = self_attn.k_proj
|
|
50
|
+
self.v_proj = self_attn.v_proj
|
|
51
|
+
self.o_proj = self_attn.dense
|
|
52
|
+
self.qk_layernorm = self_attn.qk_layernorm
|
|
53
|
+
self.rotary_ndims = self_attn.rotary_ndims
|
|
54
|
+
self.q_layernorm = getattr(self_attn, "q_layernorm", None)
|
|
55
|
+
self.k_layernorm = getattr(self_attn, "k_layernorm", None)
|
|
58
56
|
|
|
59
57
|
def projection(self, hidden_states, lora_int_id) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
|
60
58
|
if lora_int_id is not None:
|
|
@@ -65,8 +63,8 @@ class PhiAttention(DecoderOnlyAttention):
|
|
|
65
63
|
value_states = self.v_proj(hidden_states)
|
|
66
64
|
|
|
67
65
|
if self.qk_layernorm:
|
|
68
|
-
query_states = self.
|
|
69
|
-
key_states = self.
|
|
66
|
+
query_states = self.q_layernorm(query_states)
|
|
67
|
+
key_states = self.k_layernorm(key_states)
|
|
70
68
|
|
|
71
69
|
return query_states, key_states, value_states
|
|
72
70
|
|
|
@@ -75,8 +73,7 @@ class PhiAttention(DecoderOnlyAttention):
|
|
|
75
73
|
|
|
76
74
|
|
|
77
75
|
class PhiLayer(DecoderOnlyLayer):
|
|
78
|
-
|
|
79
|
-
raise NotImplementedError
|
|
76
|
+
_POST_ATTN_LAYERNORM = None
|
|
80
77
|
|
|
81
78
|
def forward(
|
|
82
79
|
self,
|
|
@@ -103,13 +100,8 @@ class PhiLayer(DecoderOnlyLayer):
|
|
|
103
100
|
block_tables=block_tables,
|
|
104
101
|
)
|
|
105
102
|
|
|
106
|
-
feed_forward_hidden_states = self.
|
|
103
|
+
feed_forward_hidden_states = self.mlp(hidden_states)
|
|
107
104
|
|
|
108
105
|
hidden_states = attn_output + feed_forward_hidden_states + residual
|
|
109
106
|
|
|
110
107
|
return hidden_states
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class PhiModel(DecoderOnlyModel):
|
|
114
|
-
def get_last_layernorm(self):
|
|
115
|
-
return self._original_mod.final_layernorm
|
|
@@ -229,7 +229,7 @@ class RBLNPixtralVisionModel(RBLNModel):
|
|
|
229
229
|
torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
|
|
230
230
|
|
|
231
231
|
@classmethod
|
|
232
|
-
def
|
|
232
|
+
def _wrap_model_if_needed(
|
|
233
233
|
cls, model: torch.nn.Module, rbln_config: RBLNPixtralVisionModelConfig
|
|
234
234
|
) -> torch.nn.Module:
|
|
235
235
|
wrapper_cfg = {
|
|
@@ -293,6 +293,18 @@ class RBLNPixtralVisionModel(RBLNModel):
|
|
|
293
293
|
return_dict: bool = True,
|
|
294
294
|
**kwargs,
|
|
295
295
|
) -> Union[Tuple, BaseModelOutput]:
|
|
296
|
+
"""
|
|
297
|
+
Forward pass for the RBLN-optimized Pixtral vision model.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
pixel_values (torch.Tensor of shape (batch_size, num_channels, image_size, image_size)) — The tensors corresponding to the input images. Pixel values can be obtained using PixtralImageProcessor. See PixtralImageProcessor.call() for details (PixtralProcessor uses PixtralImageProcessor for processing images).
|
|
301
|
+
image_sizes (torch.Tensor of shape (batch_size, 2), optional) — The sizes of the images in the batch, being (height, width) for each image.
|
|
302
|
+
output_hidden_states (bool, optional) — Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
|
|
303
|
+
return_dict (bool, optional) — Whether or not to return a ModelOutput instead of a plain tuple.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
BaseModelOutput or tuple(torch.FloatTensor)
|
|
307
|
+
"""
|
|
296
308
|
output_hidden_states = (
|
|
297
309
|
output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
|
|
298
310
|
)
|
|
@@ -24,8 +24,8 @@ class PixtralAttention(nn.Module):
|
|
|
24
24
|
def __init__(self, self_attention):
|
|
25
25
|
super().__init__()
|
|
26
26
|
self.original_model = self_attention
|
|
27
|
-
self.num_heads =
|
|
28
|
-
self.original_model
|
|
27
|
+
self.num_heads = (
|
|
28
|
+
getattr(self.original_model, "num_heads", None) or self.original_model.config.num_attention_heads
|
|
29
29
|
)
|
|
30
30
|
self.head_dim = self.original_model.head_dim
|
|
31
31
|
self.scaling = self.head_dim**-0.5
|
|
@@ -12,13 +12,11 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from transformers import PretrainedConfig
|
|
16
15
|
|
|
17
16
|
from ....utils import logging
|
|
18
17
|
from ...models.decoderonly import (
|
|
19
18
|
RBLNDecoderOnlyModel,
|
|
20
19
|
RBLNDecoderOnlyModelForCausalLM,
|
|
21
|
-
RBLNDecoderOnlyModelForCausalLMConfig,
|
|
22
20
|
)
|
|
23
21
|
from .qwen2_architecture import QWEN2Wrapper
|
|
24
22
|
|
|
@@ -87,19 +85,6 @@ class RBLNQwen2ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
|
|
|
87
85
|
|
|
88
86
|
_decoder_wrapper_cls = QWEN2Wrapper
|
|
89
87
|
|
|
90
|
-
@classmethod
|
|
91
|
-
def _update_sliding_window_config(
|
|
92
|
-
cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
|
|
93
|
-
):
|
|
94
|
-
# https://github.com/huggingface/transformers/issues/35896
|
|
95
|
-
# There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
|
|
96
|
-
# we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
|
|
97
|
-
|
|
98
|
-
rbln_config.cache_impl = "sliding_window"
|
|
99
|
-
rbln_config.sliding_window = model_config.sliding_window
|
|
100
|
-
rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
|
|
101
|
-
return rbln_config
|
|
102
|
-
|
|
103
88
|
|
|
104
89
|
class RBLNQwen2Model(RBLNDecoderOnlyModel):
|
|
105
90
|
"""
|
|
@@ -108,16 +93,3 @@ class RBLNQwen2Model(RBLNDecoderOnlyModel):
|
|
|
108
93
|
"""
|
|
109
94
|
|
|
110
95
|
_decoder_wrapper_cls = QWEN2Wrapper
|
|
111
|
-
|
|
112
|
-
@classmethod
|
|
113
|
-
def _update_sliding_window_config(
|
|
114
|
-
cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
|
|
115
|
-
):
|
|
116
|
-
# https://github.com/huggingface/transformers/issues/35896
|
|
117
|
-
# There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
|
|
118
|
-
# we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
|
|
119
|
-
|
|
120
|
-
rbln_config.cache_impl = "sliding_window"
|
|
121
|
-
rbln_config.sliding_window = model_config.sliding_window
|
|
122
|
-
rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
|
|
123
|
-
return rbln_config
|
|
@@ -15,5 +15,10 @@
|
|
|
15
15
|
from .configuration_qwen2_5_vl import (
|
|
16
16
|
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
|
17
17
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
18
|
+
RBLNQwen2_5_VLModelConfig,
|
|
19
|
+
)
|
|
20
|
+
from .modeling_qwen2_5_vl import (
|
|
21
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
|
22
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
|
23
|
+
RBLNQwen2_5_VLModel,
|
|
18
24
|
)
|
|
19
|
-
from .modeling_qwen2_5_vl import RBLNQwen2_5_VisionTransformerPretrainedModel, RBLNQwen2_5_VLForConditionalGeneration
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from typing import Any, List, Optional, Union
|
|
16
16
|
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
|
18
|
-
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
|
|
18
|
+
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
|
@@ -56,6 +56,16 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
|
|
|
56
56
|
self.visual = visual
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
class RBLNQwen2_5_VLModelConfig(RBLNDecoderOnlyModelConfig):
|
|
60
|
+
"""
|
|
61
|
+
Configuration class for RBLNQwen2_5_VLModel.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(self, visual: Optional[RBLNModelConfig] = None, **kwargs: Any):
|
|
65
|
+
super().__init__(**kwargs)
|
|
66
|
+
self.visual = self.initialize_submodule_config(submodule_config=visual)
|
|
67
|
+
|
|
68
|
+
|
|
59
69
|
class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
|
|
60
70
|
"""
|
|
61
71
|
Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.
|