optimum-rbln 0.9.4a2__py3-none-any.whl → 0.10.0.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +44 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +230 -67
- optimum/rbln/diffusers/models/controlnet.py +2 -2
- optimum/rbln/diffusers/models/transformers/prior_transformer.py +2 -2
- optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +2 -2
- optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -2
- optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -3
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +3 -12
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +2 -4
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +1 -3
- optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
- optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +2 -2
- optimum/rbln/modeling_base.py +11 -10
- optimum/rbln/ops/__init__.py +1 -0
- optimum/rbln/ops/attn.py +10 -0
- optimum/rbln/ops/flash_attn.py +8 -0
- optimum/rbln/ops/moe.py +180 -0
- optimum/rbln/ops/sliding_window_attn.py +9 -0
- optimum/rbln/transformers/__init__.py +44 -0
- optimum/rbln/transformers/modeling_attention_utils.py +124 -222
- optimum/rbln/transformers/modeling_outputs.py +25 -0
- optimum/rbln/transformers/modeling_rope_utils.py +78 -42
- optimum/rbln/transformers/models/__init__.py +38 -0
- optimum/rbln/transformers/models/auto/auto_factory.py +3 -3
- optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
- optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +7 -2
- optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -182
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +40 -23
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
- optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +144 -17
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +122 -48
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +5 -7
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +120 -128
- optimum/rbln/transformers/models/detr/__init__.py +23 -0
- optimum/rbln/transformers/models/detr/configuration_detr.py +38 -0
- optimum/rbln/transformers/models/detr/modeling_detr.py +53 -0
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
- optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
- optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
- optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
- optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +2 -7
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +16 -18
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -177
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
- optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
- optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +42 -0
- optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
- optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +168 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +6 -4
- optimum/rbln/transformers/models/llava/modeling_llava.py +0 -1
- optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
- optimum/rbln/transformers/models/mixtral/__init__.py +16 -0
- optimum/rbln/transformers/models/mixtral/configuration_mixtral.py +38 -0
- optimum/rbln/transformers/models/mixtral/mixtral_architecture.py +76 -0
- optimum/rbln/transformers/models/mixtral/modeling_mixtral.py +68 -0
- optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
- optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
- optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
- optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
- optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
- optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +9 -5
- optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +13 -1
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +271 -122
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
- optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
- optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
- optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +13 -1
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +263 -105
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +26 -34
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
- optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
- optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
- optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
- optimum/rbln/transformers/models/resnet/configuration_resnet.py +10 -4
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +4 -18
- optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
- optimum/rbln/transformers/models/t5/t5_architecture.py +15 -16
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
- optimum/rbln/transformers/models/whisper/generation_whisper.py +8 -8
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
- optimum/rbln/transformers/utils/rbln_quantization.py +20 -12
- optimum/rbln/utils/deprecation.py +78 -1
- optimum/rbln/utils/hub.py +93 -2
- optimum/rbln/utils/import_utils.py +16 -1
- optimum/rbln/utils/runtime_utils.py +12 -8
- optimum/rbln/utils/submodule.py +24 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/METADATA +6 -6
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/RECORD +107 -81
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/entry_points.txt +0 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,7 +20,6 @@ from transformers import PhiForCausalLM
|
|
|
20
20
|
from ..decoderonly.decoderonly_architecture import (
|
|
21
21
|
DecoderOnlyAttention,
|
|
22
22
|
DecoderOnlyLayer,
|
|
23
|
-
DecoderOnlyModel,
|
|
24
23
|
DecoderOnlyWrapper,
|
|
25
24
|
apply_rotary_pos_emb_partial,
|
|
26
25
|
)
|
|
@@ -37,9 +36,6 @@ class PhiWrapper(DecoderOnlyWrapper):
|
|
|
37
36
|
def get_rbln_layer_class(self):
|
|
38
37
|
return PhiLayer
|
|
39
38
|
|
|
40
|
-
def get_rbln_model_class(self):
|
|
41
|
-
return PhiModel
|
|
42
|
-
|
|
43
39
|
def get_model_layer(self, model: Union["PhiForCausalLM", "PhiModel"]):
|
|
44
40
|
return model.model if self.is_causal_lm else model
|
|
45
41
|
|
|
@@ -48,13 +44,15 @@ class PhiWrapper(DecoderOnlyWrapper):
|
|
|
48
44
|
|
|
49
45
|
|
|
50
46
|
class PhiAttention(DecoderOnlyAttention):
|
|
51
|
-
def __post_init__(self):
|
|
52
|
-
self.q_proj =
|
|
53
|
-
self.k_proj =
|
|
54
|
-
self.v_proj =
|
|
55
|
-
self.o_proj =
|
|
56
|
-
self.qk_layernorm =
|
|
57
|
-
self.rotary_ndims =
|
|
47
|
+
def __post_init__(self, self_attn):
|
|
48
|
+
self.q_proj = self_attn.q_proj
|
|
49
|
+
self.k_proj = self_attn.k_proj
|
|
50
|
+
self.v_proj = self_attn.v_proj
|
|
51
|
+
self.o_proj = self_attn.dense
|
|
52
|
+
self.qk_layernorm = self_attn.qk_layernorm
|
|
53
|
+
self.rotary_ndims = self_attn.rotary_ndims
|
|
54
|
+
self.q_layernorm = getattr(self_attn, "q_layernorm", None)
|
|
55
|
+
self.k_layernorm = getattr(self_attn, "k_layernorm", None)
|
|
58
56
|
|
|
59
57
|
def projection(self, hidden_states, lora_int_id) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
|
|
60
58
|
if lora_int_id is not None:
|
|
@@ -65,8 +63,8 @@ class PhiAttention(DecoderOnlyAttention):
|
|
|
65
63
|
value_states = self.v_proj(hidden_states)
|
|
66
64
|
|
|
67
65
|
if self.qk_layernorm:
|
|
68
|
-
query_states = self.
|
|
69
|
-
key_states = self.
|
|
66
|
+
query_states = self.q_layernorm(query_states)
|
|
67
|
+
key_states = self.k_layernorm(key_states)
|
|
70
68
|
|
|
71
69
|
return query_states, key_states, value_states
|
|
72
70
|
|
|
@@ -75,8 +73,7 @@ class PhiAttention(DecoderOnlyAttention):
|
|
|
75
73
|
|
|
76
74
|
|
|
77
75
|
class PhiLayer(DecoderOnlyLayer):
|
|
78
|
-
|
|
79
|
-
raise NotImplementedError
|
|
76
|
+
_POST_ATTN_LAYERNORM = None
|
|
80
77
|
|
|
81
78
|
def forward(
|
|
82
79
|
self,
|
|
@@ -103,13 +100,8 @@ class PhiLayer(DecoderOnlyLayer):
|
|
|
103
100
|
block_tables=block_tables,
|
|
104
101
|
)
|
|
105
102
|
|
|
106
|
-
feed_forward_hidden_states = self.
|
|
103
|
+
feed_forward_hidden_states = self.mlp(hidden_states)
|
|
107
104
|
|
|
108
105
|
hidden_states = attn_output + feed_forward_hidden_states + residual
|
|
109
106
|
|
|
110
107
|
return hidden_states
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
class PhiModel(DecoderOnlyModel):
|
|
114
|
-
def get_last_layernorm(self):
|
|
115
|
-
return self._original_mod.final_layernorm
|
|
@@ -297,13 +297,17 @@ class RBLNPixtralVisionModel(RBLNModel):
|
|
|
297
297
|
Forward pass for the RBLN-optimized Pixtral vision model.
|
|
298
298
|
|
|
299
299
|
Args:
|
|
300
|
-
pixel_values
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
300
|
+
pixel_values: Input images as a tensor of shape (batch_size, num_channels, image_size, image_size).
|
|
301
|
+
Pixel values can be obtained using PixtralImageProcessor. See PixtralImageProcessor.__call__()
|
|
302
|
+
for details (PixtralProcessor uses PixtralImageProcessor for processing images).
|
|
303
|
+
image_sizes: The sizes of the images in the batch as a tensor of shape (batch_size, 2),
|
|
304
|
+
being (height, width) for each image. Optional.
|
|
305
|
+
output_hidden_states: Whether or not to return the hidden states of all layers. Optional.
|
|
306
|
+
See hidden_states under returned tensors for more detail.
|
|
307
|
+
return_dict: Whether or not to return a ModelOutput instead of a plain tuple. Optional.
|
|
304
308
|
|
|
305
309
|
Returns:
|
|
306
|
-
|
|
310
|
+
The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutput object.
|
|
307
311
|
"""
|
|
308
312
|
output_hidden_states = (
|
|
309
313
|
output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
|
|
@@ -15,5 +15,10 @@
|
|
|
15
15
|
from .configuration_qwen2_5_vl import (
|
|
16
16
|
RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
|
|
17
17
|
RBLNQwen2_5_VLForConditionalGenerationConfig,
|
|
18
|
+
RBLNQwen2_5_VLModelConfig,
|
|
19
|
+
)
|
|
20
|
+
from .modeling_qwen2_5_vl import (
|
|
21
|
+
RBLNQwen2_5_VisionTransformerPretrainedModel,
|
|
22
|
+
RBLNQwen2_5_VLForConditionalGeneration,
|
|
23
|
+
RBLNQwen2_5_VLModel,
|
|
18
24
|
)
|
|
19
|
-
from .modeling_qwen2_5_vl import RBLNQwen2_5_VisionTransformerPretrainedModel, RBLNQwen2_5_VLForConditionalGeneration
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from typing import Any, List, Optional, Union
|
|
16
16
|
|
|
17
17
|
from ....configuration_utils import RBLNModelConfig
|
|
18
|
-
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
|
|
18
|
+
from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
|
|
@@ -56,6 +56,18 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
|
|
|
56
56
|
self.visual = visual
|
|
57
57
|
|
|
58
58
|
|
|
59
|
+
class RBLNQwen2_5_VLModelConfig(RBLNDecoderOnlyModelConfig):
|
|
60
|
+
"""
|
|
61
|
+
Configuration class for RBLNQwen2_5_VLModel.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
submodules = ["visual"]
|
|
65
|
+
|
|
66
|
+
def __init__(self, visual: Optional[RBLNModelConfig] = None, **kwargs: Any):
|
|
67
|
+
super().__init__(**kwargs)
|
|
68
|
+
self.visual = self.initialize_submodule_config(submodule_config=visual)
|
|
69
|
+
|
|
70
|
+
|
|
59
71
|
class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
|
|
60
72
|
"""
|
|
61
73
|
Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.
|