optimum-rbln 0.9.4a2__py3-none-any.whl → 0.9.5a4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- optimum/rbln/__init__.py +36 -0
- optimum/rbln/__version__.py +2 -2
- optimum/rbln/configuration_utils.py +35 -16
- optimum/rbln/modeling_base.py +6 -6
- optimum/rbln/ops/__init__.py +1 -0
- optimum/rbln/ops/attn.py +10 -0
- optimum/rbln/ops/flash_attn.py +8 -0
- optimum/rbln/ops/moe.py +180 -0
- optimum/rbln/ops/sliding_window_attn.py +9 -0
- optimum/rbln/transformers/__init__.py +36 -0
- optimum/rbln/transformers/modeling_attention_utils.py +118 -222
- optimum/rbln/transformers/modeling_outputs.py +25 -0
- optimum/rbln/transformers/modeling_rope_utils.py +78 -42
- optimum/rbln/transformers/models/__init__.py +28 -0
- optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
- optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
- optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
- optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -182
- optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +38 -21
- optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
- optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
- optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +118 -16
- optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
- optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +121 -48
- optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +5 -7
- optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +75 -107
- optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
- optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
- optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
- optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
- optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
- optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
- optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +16 -18
- optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +1 -1
- optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
- optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
- optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +41 -0
- optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
- optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +165 -0
- optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
- optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +6 -4
- optimum/rbln/transformers/models/llava/modeling_llava.py +0 -1
- optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
- optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
- optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
- optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
- optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
- optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
- optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
- optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +11 -1
- optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +271 -122
- optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
- optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
- optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
- optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
- optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
- optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +11 -1
- optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +263 -105
- optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +26 -34
- optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
- optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
- optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
- optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
- optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
- optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
- optimum/rbln/transformers/models/siglip/modeling_siglip.py +4 -18
- optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
- optimum/rbln/transformers/models/t5/t5_architecture.py +15 -16
- optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
- optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
- optimum/rbln/transformers/utils/rbln_quantization.py +20 -12
- optimum/rbln/utils/import_utils.py +16 -1
- optimum/rbln/utils/runtime_utils.py +10 -6
- optimum/rbln/utils/submodule.py +24 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/METADATA +6 -6
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/RECORD +81 -62
- optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/WHEEL +0 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/entry_points.txt +0 -0
- {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,14 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
from typing import TYPE_CHECKING
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
from ...models.decoderonly.decoderonly_architecture import (
|
|
20
|
-
DecoderOnlyAttention,
|
|
21
|
-
DecoderOnlyLayer,
|
|
22
|
-
DecoderOnlyModel,
|
|
23
|
-
DecoderOnlyWrapper,
|
|
24
|
-
)
|
|
17
|
+
from ...models.decoderonly.decoderonly_architecture import DecoderOnlyWrapper
|
|
25
18
|
|
|
26
19
|
|
|
27
20
|
if TYPE_CHECKING:
|
|
@@ -31,44 +24,8 @@ if TYPE_CHECKING:
|
|
|
31
24
|
class OPTWrapper(DecoderOnlyWrapper):
|
|
32
25
|
_use_learned_pos_emb = True
|
|
33
26
|
|
|
34
|
-
def get_rbln_attn_class(self):
|
|
35
|
-
return OPTAttention
|
|
36
|
-
|
|
37
|
-
def get_rbln_layer_class(self):
|
|
38
|
-
return OPTDecoderLayer
|
|
39
|
-
|
|
40
|
-
def get_rbln_model_class(self):
|
|
41
|
-
return OPTModel
|
|
42
|
-
|
|
43
27
|
def get_model_layer(self, model: "OPTForCausalLM"):
|
|
44
28
|
return model.model.decoder if self.is_causal_lm else model.decoder
|
|
45
29
|
|
|
46
30
|
def get_decoder_layers(self, model: "OPTForCausalLM"):
|
|
47
31
|
return model.model.decoder.layers if self.is_causal_lm else model.decoder.layers
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class OPTAttention(DecoderOnlyAttention):
|
|
51
|
-
def __post_init__(self):
|
|
52
|
-
self.k_proj = self._original_mod.k_proj
|
|
53
|
-
self.v_proj = self._original_mod.v_proj
|
|
54
|
-
self.q_proj = self._original_mod.q_proj
|
|
55
|
-
self.o_proj = self._original_mod.out_proj
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class OPTModel(DecoderOnlyModel):
|
|
59
|
-
def get_embedding(self) -> nn.Embedding:
|
|
60
|
-
return self._original_mod.embed_tokens
|
|
61
|
-
|
|
62
|
-
def get_pos_embedding(self):
|
|
63
|
-
return self._original_mod.embed_positions
|
|
64
|
-
|
|
65
|
-
def get_last_layernorm(self) -> nn.LayerNorm:
|
|
66
|
-
return self._original_mod.final_layer_norm
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
class OPTDecoderLayer(DecoderOnlyLayer):
|
|
70
|
-
def get_pre_attention_layernorm(self) -> nn.LayerNorm:
|
|
71
|
-
return self._original_mod.self_attn_layer_norm
|
|
72
|
-
|
|
73
|
-
def get_post_attention_layernorm(self) -> nn.LayerNorm:
|
|
74
|
-
return self._original_mod.final_layer_norm
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .configuration_paligemma import RBLNPaliGemmaForConditionalGenerationConfig, RBLNPaliGemmaModelConfig
|
|
16
|
+
from .modeling_paligemma import RBLNPaliGemmaForConditionalGeneration, RBLNPaliGemmaModel
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Copyright 2025 Rebellions Inc. All rights reserved.
|
|
2
|
+
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at:
|
|
6
|
+
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Any, Optional
|
|
16
|
+
|
|
17
|
+
from ....configuration_utils import RBLNModelConfig
|
|
18
|
+
from ....utils.logging import get_logger
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RBLNPaliGemmaForConditionalGenerationConfig(RBLNModelConfig):
|
|
25
|
+
"""
|
|
26
|
+
Configuration class for RBLNPaliGemmaForConditionalGenerationConfig.
|
|
27
|
+
This configuration class stores the configuration parameters specific to
|
|
28
|
+
RBLN-optimized PaliGemma models for multimodal conditional generation tasks
|
|
29
|
+
that combine vision and language processing capabilities.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
submodules = ["vision_tower", "language_model"]
|
|
33
|
+
_allow_no_compile_cfgs = True
|
|
34
|
+
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
batch_size: Optional[int] = None,
|
|
38
|
+
vision_tower: Optional[RBLNModelConfig] = None,
|
|
39
|
+
language_model: Optional[RBLNModelConfig] = None,
|
|
40
|
+
output_hidden_states: Optional[bool] = None,
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Args:
|
|
45
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
46
|
+
vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
|
47
|
+
This can include settings specific to the vision encoder, such as input resolution or other vision-related parameters.
|
|
48
|
+
If not provided, default settings will be used.
|
|
49
|
+
language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
|
|
50
|
+
This can include settings specific to the language model, such as tensor parallelism or other text-related parameters.
|
|
51
|
+
If not provided, default settings will be used.
|
|
52
|
+
output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
|
|
53
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
54
|
+
Raises:
|
|
55
|
+
ValueError: If `batch_size` is not a positive integer.
|
|
56
|
+
"""
|
|
57
|
+
super().__init__(**kwargs)
|
|
58
|
+
self.batch_size = batch_size or 1
|
|
59
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
|
60
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
|
61
|
+
|
|
62
|
+
if self.batch_size != 1:
|
|
63
|
+
logger.warning("Ignore batch_size for PaliGemma vision tower. It will be set to 1.")
|
|
64
|
+
|
|
65
|
+
self.output_hidden_states = output_hidden_states or False
|
|
66
|
+
|
|
67
|
+
self.vision_tower = self.initialize_submodule_config(
|
|
68
|
+
submodule_config=vision_tower,
|
|
69
|
+
batch_size=1, # vision_tower batch_size is always 1 in PaliGemma
|
|
70
|
+
force_kwargs=True,
|
|
71
|
+
)
|
|
72
|
+
self.language_model = self.initialize_submodule_config(
|
|
73
|
+
submodule_config=language_model,
|
|
74
|
+
batch_size=batch_size,
|
|
75
|
+
use_position_ids=True,
|
|
76
|
+
use_attention_mask=True,
|
|
77
|
+
use_inputs_embeds=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class RBLNPaliGemmaModelConfig(RBLNModelConfig):
|
|
82
|
+
submodules = ["vision_tower", "language_model"]
|
|
83
|
+
_allow_no_compile_cfgs = True
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
batch_size: Optional[int] = None,
|
|
88
|
+
vision_tower: Optional[RBLNModelConfig] = None,
|
|
89
|
+
language_model: Optional[RBLNModelConfig] = None,
|
|
90
|
+
output_hidden_states: Optional[bool] = None,
|
|
91
|
+
**kwargs: Any,
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Args:
|
|
95
|
+
batch_size (Optional[int]): The batch size for inference. Defaults to 1.
|
|
96
|
+
vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
|
|
97
|
+
This can include settings specific to the vision encoder, such as input resolution or other vision-related parameters.
|
|
98
|
+
If not provided, default settings will be used.
|
|
99
|
+
language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
|
|
100
|
+
This can include settings specific to the language model, such as tensor parallelism or other text-related parameters.
|
|
101
|
+
If not provided, default settings will be used.
|
|
102
|
+
output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
|
|
103
|
+
kwargs: Additional arguments passed to the parent RBLNModelConfig.
|
|
104
|
+
Raises:
|
|
105
|
+
ValueError: If `batch_size` is not a positive integer.
|
|
106
|
+
"""
|
|
107
|
+
super().__init__(**kwargs)
|
|
108
|
+
self.batch_size = batch_size or 1
|
|
109
|
+
if not isinstance(self.batch_size, int) or self.batch_size < 0:
|
|
110
|
+
raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
|
|
111
|
+
|
|
112
|
+
if self.batch_size != 1:
|
|
113
|
+
logger.warning("Ignore batch_size for PaliGemma vision tower. It will be set to 1.")
|
|
114
|
+
|
|
115
|
+
self.output_hidden_states = output_hidden_states or False
|
|
116
|
+
self.vision_tower = self.initialize_submodule_config(
|
|
117
|
+
submodule_config=vision_tower,
|
|
118
|
+
batch_size=1, # vision_tower batch_size is always 1 in PaliGemma
|
|
119
|
+
force_kwargs=True,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
self.language_model = self.initialize_submodule_config(
|
|
123
|
+
submodule_config=language_model,
|
|
124
|
+
batch_size=batch_size,
|
|
125
|
+
use_position_ids=True,
|
|
126
|
+
use_attention_mask=True,
|
|
127
|
+
use_inputs_embeds=True,
|
|
128
|
+
output_hidden_states=output_hidden_states,
|
|
129
|
+
)
|