optimum-rbln 0.9.4a2__py3-none-any.whl → 0.9.5a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. optimum/rbln/__init__.py +36 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +35 -16
  4. optimum/rbln/modeling_base.py +6 -6
  5. optimum/rbln/ops/__init__.py +1 -0
  6. optimum/rbln/ops/attn.py +10 -0
  7. optimum/rbln/ops/flash_attn.py +8 -0
  8. optimum/rbln/ops/moe.py +180 -0
  9. optimum/rbln/ops/sliding_window_attn.py +9 -0
  10. optimum/rbln/transformers/__init__.py +36 -0
  11. optimum/rbln/transformers/modeling_attention_utils.py +118 -222
  12. optimum/rbln/transformers/modeling_outputs.py +25 -0
  13. optimum/rbln/transformers/modeling_rope_utils.py +78 -42
  14. optimum/rbln/transformers/models/__init__.py +28 -0
  15. optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
  16. optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
  17. optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
  18. optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -182
  19. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +38 -21
  20. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
  21. optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
  22. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +118 -16
  23. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
  24. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +121 -48
  25. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +5 -7
  26. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +75 -107
  27. optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
  28. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
  29. optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
  30. optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
  31. optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
  32. optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
  33. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +16 -18
  34. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +1 -1
  35. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
  36. optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
  37. optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +41 -0
  38. optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
  39. optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +165 -0
  40. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
  41. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +6 -4
  42. optimum/rbln/transformers/models/llava/modeling_llava.py +0 -1
  43. optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
  44. optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
  45. optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
  46. optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
  47. optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
  48. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
  49. optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
  50. optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
  51. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +11 -1
  52. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +271 -122
  53. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
  54. optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
  55. optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
  56. optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
  57. optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
  58. optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
  59. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +11 -1
  60. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +263 -105
  61. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +26 -34
  62. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
  63. optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
  64. optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
  65. optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
  66. optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
  67. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
  68. optimum/rbln/transformers/models/siglip/modeling_siglip.py +4 -18
  69. optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
  70. optimum/rbln/transformers/models/t5/t5_architecture.py +15 -16
  71. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
  72. optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
  73. optimum/rbln/transformers/utils/rbln_quantization.py +20 -12
  74. optimum/rbln/utils/import_utils.py +16 -1
  75. optimum/rbln/utils/runtime_utils.py +10 -6
  76. optimum/rbln/utils/submodule.py +24 -0
  77. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/METADATA +6 -6
  78. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/RECORD +81 -62
  79. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
  80. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/WHEEL +0 -0
  81. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/entry_points.txt +0 -0
  82. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.9.5a4.dist-info}/licenses/LICENSE +0 -0
@@ -14,14 +14,7 @@
14
14
 
15
15
  from typing import TYPE_CHECKING
16
16
 
17
- import torch.nn as nn
18
-
19
- from ...models.decoderonly.decoderonly_architecture import (
20
- DecoderOnlyAttention,
21
- DecoderOnlyLayer,
22
- DecoderOnlyModel,
23
- DecoderOnlyWrapper,
24
- )
17
+ from ...models.decoderonly.decoderonly_architecture import DecoderOnlyWrapper
25
18
 
26
19
 
27
20
  if TYPE_CHECKING:
@@ -31,44 +24,8 @@ if TYPE_CHECKING:
31
24
  class OPTWrapper(DecoderOnlyWrapper):
32
25
  _use_learned_pos_emb = True
33
26
 
34
- def get_rbln_attn_class(self):
35
- return OPTAttention
36
-
37
- def get_rbln_layer_class(self):
38
- return OPTDecoderLayer
39
-
40
- def get_rbln_model_class(self):
41
- return OPTModel
42
-
43
27
  def get_model_layer(self, model: "OPTForCausalLM"):
44
28
  return model.model.decoder if self.is_causal_lm else model.decoder
45
29
 
46
30
  def get_decoder_layers(self, model: "OPTForCausalLM"):
47
31
  return model.model.decoder.layers if self.is_causal_lm else model.decoder.layers
48
-
49
-
50
- class OPTAttention(DecoderOnlyAttention):
51
- def __post_init__(self):
52
- self.k_proj = self._original_mod.k_proj
53
- self.v_proj = self._original_mod.v_proj
54
- self.q_proj = self._original_mod.q_proj
55
- self.o_proj = self._original_mod.out_proj
56
-
57
-
58
- class OPTModel(DecoderOnlyModel):
59
- def get_embedding(self) -> nn.Embedding:
60
- return self._original_mod.embed_tokens
61
-
62
- def get_pos_embedding(self):
63
- return self._original_mod.embed_positions
64
-
65
- def get_last_layernorm(self) -> nn.LayerNorm:
66
- return self._original_mod.final_layer_norm
67
-
68
-
69
- class OPTDecoderLayer(DecoderOnlyLayer):
70
- def get_pre_attention_layernorm(self) -> nn.LayerNorm:
71
- return self._original_mod.self_attn_layer_norm
72
-
73
- def get_post_attention_layernorm(self) -> nn.LayerNorm:
74
- return self._original_mod.final_layer_norm
@@ -0,0 +1,16 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_paligemma import RBLNPaliGemmaForConditionalGenerationConfig, RBLNPaliGemmaModelConfig
16
+ from .modeling_paligemma import RBLNPaliGemmaForConditionalGeneration, RBLNPaliGemmaModel
@@ -0,0 +1,129 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Optional
16
+
17
+ from ....configuration_utils import RBLNModelConfig
18
+ from ....utils.logging import get_logger
19
+
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class RBLNPaliGemmaForConditionalGenerationConfig(RBLNModelConfig):
25
+ """
26
+ Configuration class for RBLNPaliGemmaForConditionalGenerationConfig.
27
+ This configuration class stores the configuration parameters specific to
28
+ RBLN-optimized PaliGemma models for multimodal conditional generation tasks
29
+ that combine vision and language processing capabilities.
30
+ """
31
+
32
+ submodules = ["vision_tower", "language_model"]
33
+ _allow_no_compile_cfgs = True
34
+
35
+ def __init__(
36
+ self,
37
+ batch_size: Optional[int] = None,
38
+ vision_tower: Optional[RBLNModelConfig] = None,
39
+ language_model: Optional[RBLNModelConfig] = None,
40
+ output_hidden_states: Optional[bool] = None,
41
+ **kwargs: Any,
42
+ ):
43
+ """
44
+ Args:
45
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
46
+ vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
47
+ This can include settings specific to the vision encoder, such as input resolution or other vision-related parameters.
48
+ If not provided, default settings will be used.
49
+ language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
50
+ This can include settings specific to the language model, such as tensor parallelism or other text-related parameters.
51
+ If not provided, default settings will be used.
52
+ output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
53
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
54
+ Raises:
55
+ ValueError: If `batch_size` is not a positive integer.
56
+ """
57
+ super().__init__(**kwargs)
58
+ self.batch_size = batch_size or 1
59
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
60
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
61
+
62
+ if self.batch_size != 1:
63
+ logger.warning("Ignore batch_size for PaliGemma vision tower. It will be set to 1.")
64
+
65
+ self.output_hidden_states = output_hidden_states or False
66
+
67
+ self.vision_tower = self.initialize_submodule_config(
68
+ submodule_config=vision_tower,
69
+ batch_size=1, # vision_tower batch_size is always 1 in PaliGemma
70
+ force_kwargs=True,
71
+ )
72
+ self.language_model = self.initialize_submodule_config(
73
+ submodule_config=language_model,
74
+ batch_size=batch_size,
75
+ use_position_ids=True,
76
+ use_attention_mask=True,
77
+ use_inputs_embeds=True,
78
+ )
79
+
80
+
81
+ class RBLNPaliGemmaModelConfig(RBLNModelConfig):
82
+ submodules = ["vision_tower", "language_model"]
83
+ _allow_no_compile_cfgs = True
84
+
85
+ def __init__(
86
+ self,
87
+ batch_size: Optional[int] = None,
88
+ vision_tower: Optional[RBLNModelConfig] = None,
89
+ language_model: Optional[RBLNModelConfig] = None,
90
+ output_hidden_states: Optional[bool] = None,
91
+ **kwargs: Any,
92
+ ):
93
+ """
94
+ Args:
95
+ batch_size (Optional[int]): The batch size for inference. Defaults to 1.
96
+ vision_tower (Optional[RBLNModelConfig]): Configuration for the vision encoder component.
97
+ This can include settings specific to the vision encoder, such as input resolution or other vision-related parameters.
98
+ If not provided, default settings will be used.
99
+ language_model (Optional[RBLNModelConfig]): Configuration for the language model component.
100
+ This can include settings specific to the language model, such as tensor parallelism or other text-related parameters.
101
+ If not provided, default settings will be used.
102
+ output_hidden_states (Optional[bool]): Whether to output the hidden states of the decoder. Defaults to False.
103
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
104
+ Raises:
105
+ ValueError: If `batch_size` is not a positive integer.
106
+ """
107
+ super().__init__(**kwargs)
108
+ self.batch_size = batch_size or 1
109
+ if not isinstance(self.batch_size, int) or self.batch_size < 0:
110
+ raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
111
+
112
+ if self.batch_size != 1:
113
+ logger.warning("Ignore batch_size for PaliGemma vision tower. It will be set to 1.")
114
+
115
+ self.output_hidden_states = output_hidden_states or False
116
+ self.vision_tower = self.initialize_submodule_config(
117
+ submodule_config=vision_tower,
118
+ batch_size=1, # vision_tower batch_size is always 1 in PaliGemma
119
+ force_kwargs=True,
120
+ )
121
+
122
+ self.language_model = self.initialize_submodule_config(
123
+ submodule_config=language_model,
124
+ batch_size=batch_size,
125
+ use_position_ids=True,
126
+ use_attention_mask=True,
127
+ use_inputs_embeds=True,
128
+ output_hidden_states=output_hidden_states,
129
+ )