optimum-rbln 0.9.4a2__py3-none-any.whl → 0.10.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. optimum/rbln/__init__.py +44 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +230 -67
  4. optimum/rbln/diffusers/models/controlnet.py +2 -2
  5. optimum/rbln/diffusers/models/transformers/prior_transformer.py +2 -2
  6. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +2 -2
  7. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -2
  8. optimum/rbln/diffusers/pipelines/auto_pipeline.py +2 -3
  9. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +3 -12
  10. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +2 -4
  11. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +1 -3
  12. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +1 -3
  13. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +2 -2
  14. optimum/rbln/modeling_base.py +11 -10
  15. optimum/rbln/ops/__init__.py +1 -0
  16. optimum/rbln/ops/attn.py +10 -0
  17. optimum/rbln/ops/flash_attn.py +8 -0
  18. optimum/rbln/ops/moe.py +180 -0
  19. optimum/rbln/ops/sliding_window_attn.py +9 -0
  20. optimum/rbln/transformers/__init__.py +44 -0
  21. optimum/rbln/transformers/modeling_attention_utils.py +124 -222
  22. optimum/rbln/transformers/modeling_outputs.py +25 -0
  23. optimum/rbln/transformers/modeling_rope_utils.py +78 -42
  24. optimum/rbln/transformers/models/__init__.py +38 -0
  25. optimum/rbln/transformers/models/auto/auto_factory.py +3 -3
  26. optimum/rbln/transformers/models/bart/bart_architecture.py +24 -24
  27. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +7 -2
  28. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
  29. optimum/rbln/transformers/models/colpali/colpali_architecture.py +14 -20
  30. optimum/rbln/transformers/models/colpali/configuration_colpali.py +12 -17
  31. optimum/rbln/transformers/models/colpali/modeling_colpali.py +66 -182
  32. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +40 -23
  33. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +107 -371
  34. optimum/rbln/transformers/models/decoderonly/__init__.py +2 -0
  35. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +144 -17
  36. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +1 -1
  37. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +122 -48
  38. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +5 -7
  39. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +120 -128
  40. optimum/rbln/transformers/models/detr/__init__.py +23 -0
  41. optimum/rbln/transformers/models/detr/configuration_detr.py +38 -0
  42. optimum/rbln/transformers/models/detr/modeling_detr.py +53 -0
  43. optimum/rbln/transformers/models/exaone/exaone_architecture.py +0 -36
  44. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -1
  45. optimum/rbln/transformers/models/gemma2/__init__.py +16 -0
  46. optimum/rbln/transformers/models/gemma2/configuration_gemma2.py +45 -0
  47. optimum/rbln/transformers/models/gemma2/gemma2_architecture.py +83 -0
  48. optimum/rbln/transformers/models/gemma2/modeling_gemma2.py +101 -0
  49. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +2 -7
  50. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +16 -18
  51. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +5 -177
  52. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +8 -34
  53. optimum/rbln/transformers/models/gpt_oss/__init__.py +16 -0
  54. optimum/rbln/transformers/models/gpt_oss/configuration_gpt_oss.py +42 -0
  55. optimum/rbln/transformers/models/gpt_oss/gpt_oss_architecture.py +122 -0
  56. optimum/rbln/transformers/models/gpt_oss/modeling_gpt_oss.py +168 -0
  57. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +8 -5
  58. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +6 -4
  59. optimum/rbln/transformers/models/llava/modeling_llava.py +0 -1
  60. optimum/rbln/transformers/models/midm/midm_architecture.py +29 -22
  61. optimum/rbln/transformers/models/mixtral/__init__.py +16 -0
  62. optimum/rbln/transformers/models/mixtral/configuration_mixtral.py +38 -0
  63. optimum/rbln/transformers/models/mixtral/mixtral_architecture.py +76 -0
  64. optimum/rbln/transformers/models/mixtral/modeling_mixtral.py +68 -0
  65. optimum/rbln/transformers/models/opt/opt_architecture.py +1 -44
  66. optimum/rbln/transformers/models/paligemma/__init__.py +16 -0
  67. optimum/rbln/transformers/models/paligemma/configuration_paligemma.py +129 -0
  68. optimum/rbln/transformers/models/paligemma/modeling_paligemma.py +564 -0
  69. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +24 -24
  70. optimum/rbln/transformers/models/phi/phi_architecture.py +13 -21
  71. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +9 -5
  72. optimum/rbln/transformers/models/qwen2_5_vl/__init__.py +6 -1
  73. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +13 -1
  74. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +271 -122
  75. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +43 -39
  76. optimum/rbln/transformers/models/qwen2_moe/__init__.py +16 -0
  77. optimum/rbln/transformers/models/qwen2_moe/configuration_qwen2_moe.py +38 -0
  78. optimum/rbln/transformers/models/qwen2_moe/modeling_qwen2_moe.py +68 -0
  79. optimum/rbln/transformers/models/qwen2_moe/qwen2_moe_architecture.py +94 -0
  80. optimum/rbln/transformers/models/qwen2_vl/__init__.py +6 -1
  81. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +13 -1
  82. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +263 -105
  83. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +26 -34
  84. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +7 -7
  85. optimum/rbln/transformers/models/qwen3_moe/__init__.py +16 -0
  86. optimum/rbln/transformers/models/qwen3_moe/configuration_qwen3_moe.py +38 -0
  87. optimum/rbln/transformers/models/qwen3_moe/modeling_qwen3_moe.py +68 -0
  88. optimum/rbln/transformers/models/qwen3_moe/qwen3_moe_architecture.py +100 -0
  89. optimum/rbln/transformers/models/resnet/configuration_resnet.py +10 -4
  90. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +14 -12
  91. optimum/rbln/transformers/models/siglip/modeling_siglip.py +4 -18
  92. optimum/rbln/transformers/models/swin/configuration_swin.py +1 -6
  93. optimum/rbln/transformers/models/t5/t5_architecture.py +15 -16
  94. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +0 -3
  95. optimum/rbln/transformers/models/whisper/generation_whisper.py +8 -8
  96. optimum/rbln/transformers/models/whisper/whisper_architecture.py +0 -3
  97. optimum/rbln/transformers/utils/rbln_quantization.py +20 -12
  98. optimum/rbln/utils/deprecation.py +78 -1
  99. optimum/rbln/utils/hub.py +93 -2
  100. optimum/rbln/utils/import_utils.py +16 -1
  101. optimum/rbln/utils/runtime_utils.py +12 -8
  102. optimum/rbln/utils/submodule.py +24 -0
  103. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/METADATA +6 -6
  104. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/RECORD +107 -81
  105. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +0 -233
  106. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/WHEEL +0 -0
  107. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/entry_points.txt +0 -0
  108. {optimum_rbln-0.9.4a2.dist-info → optimum_rbln-0.10.0.post1.dist-info}/licenses/LICENSE +0 -0
@@ -20,7 +20,6 @@ from transformers import PhiForCausalLM
20
20
  from ..decoderonly.decoderonly_architecture import (
21
21
  DecoderOnlyAttention,
22
22
  DecoderOnlyLayer,
23
- DecoderOnlyModel,
24
23
  DecoderOnlyWrapper,
25
24
  apply_rotary_pos_emb_partial,
26
25
  )
@@ -37,9 +36,6 @@ class PhiWrapper(DecoderOnlyWrapper):
37
36
  def get_rbln_layer_class(self):
38
37
  return PhiLayer
39
38
 
40
- def get_rbln_model_class(self):
41
- return PhiModel
42
-
43
39
  def get_model_layer(self, model: Union["PhiForCausalLM", "PhiModel"]):
44
40
  return model.model if self.is_causal_lm else model
45
41
 
@@ -48,13 +44,15 @@ class PhiWrapper(DecoderOnlyWrapper):
48
44
 
49
45
 
50
46
  class PhiAttention(DecoderOnlyAttention):
51
- def __post_init__(self):
52
- self.q_proj = self._original_mod.q_proj
53
- self.k_proj = self._original_mod.k_proj
54
- self.v_proj = self._original_mod.v_proj
55
- self.o_proj = self._original_mod.dense
56
- self.qk_layernorm = self._original_mod.qk_layernorm
57
- self.rotary_ndims = self._original_mod.rotary_ndims
47
+ def __post_init__(self, self_attn):
48
+ self.q_proj = self_attn.q_proj
49
+ self.k_proj = self_attn.k_proj
50
+ self.v_proj = self_attn.v_proj
51
+ self.o_proj = self_attn.dense
52
+ self.qk_layernorm = self_attn.qk_layernorm
53
+ self.rotary_ndims = self_attn.rotary_ndims
54
+ self.q_layernorm = getattr(self_attn, "q_layernorm", None)
55
+ self.k_layernorm = getattr(self_attn, "k_layernorm", None)
58
56
 
59
57
  def projection(self, hidden_states, lora_int_id) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
60
58
  if lora_int_id is not None:
@@ -65,8 +63,8 @@ class PhiAttention(DecoderOnlyAttention):
65
63
  value_states = self.v_proj(hidden_states)
66
64
 
67
65
  if self.qk_layernorm:
68
- query_states = self._original_mod.q_layernorm(query_states)
69
- key_states = self._original_mod.k_layernorm(key_states)
66
+ query_states = self.q_layernorm(query_states)
67
+ key_states = self.k_layernorm(key_states)
70
68
 
71
69
  return query_states, key_states, value_states
72
70
 
@@ -75,8 +73,7 @@ class PhiAttention(DecoderOnlyAttention):
75
73
 
76
74
 
77
75
  class PhiLayer(DecoderOnlyLayer):
78
- def get_post_attention_layernorm(self):
79
- raise NotImplementedError
76
+ _POST_ATTN_LAYERNORM = None
80
77
 
81
78
  def forward(
82
79
  self,
@@ -103,13 +100,8 @@ class PhiLayer(DecoderOnlyLayer):
103
100
  block_tables=block_tables,
104
101
  )
105
102
 
106
- feed_forward_hidden_states = self._original_mod.mlp(hidden_states)
103
+ feed_forward_hidden_states = self.mlp(hidden_states)
107
104
 
108
105
  hidden_states = attn_output + feed_forward_hidden_states + residual
109
106
 
110
107
  return hidden_states
111
-
112
-
113
- class PhiModel(DecoderOnlyModel):
114
- def get_last_layernorm(self):
115
- return self._original_mod.final_layernorm
@@ -297,13 +297,17 @@ class RBLNPixtralVisionModel(RBLNModel):
297
297
  Forward pass for the RBLN-optimized Pixtral vision model.
298
298
 
299
299
  Args:
300
- pixel_values (torch.Tensor of shape (batch_size, num_channels, image_size, image_size)) — The tensors corresponding to the input images. Pixel values can be obtained using PixtralImageProcessor. See PixtralImageProcessor.call() for details (PixtralProcessor uses PixtralImageProcessor for processing images).
301
- image_sizes (torch.Tensor of shape (batch_size, 2), optional) The sizes of the images in the batch, being (height, width) for each image.
302
- output_hidden_states (bool, optional) Whether or not to return the hidden states of all layers. See hidden_states under returned tensors for more detail.
303
- return_dict (bool, optional) Whether or not to return a ModelOutput instead of a plain tuple.
300
+ pixel_values: Input images as a tensor of shape (batch_size, num_channels, image_size, image_size).
301
+ Pixel values can be obtained using PixtralImageProcessor. See PixtralImageProcessor.__call__()
302
+ for details (PixtralProcessor uses PixtralImageProcessor for processing images).
303
+ image_sizes: The sizes of the images in the batch as a tensor of shape (batch_size, 2),
304
+ being (height, width) for each image. Optional.
305
+ output_hidden_states: Whether or not to return the hidden states of all layers. Optional.
306
+ See hidden_states under returned tensors for more detail.
307
+ return_dict: Whether or not to return a ModelOutput instead of a plain tuple. Optional.
304
308
 
305
309
  Returns:
306
- BaseModelOutput or tuple(torch.FloatTensor)
310
+ The model outputs. If return_dict=False is passed, returns a tuple of tensors. Otherwise, returns a BaseModelOutput object.
307
311
  """
308
312
  output_hidden_states = (
309
313
  output_hidden_states if output_hidden_states is not None else self.rbln_config.output_hidden_states
@@ -15,5 +15,10 @@
15
15
  from .configuration_qwen2_5_vl import (
16
16
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
17
17
  RBLNQwen2_5_VLForConditionalGenerationConfig,
18
+ RBLNQwen2_5_VLModelConfig,
19
+ )
20
+ from .modeling_qwen2_5_vl import (
21
+ RBLNQwen2_5_VisionTransformerPretrainedModel,
22
+ RBLNQwen2_5_VLForConditionalGeneration,
23
+ RBLNQwen2_5_VLModel,
18
24
  )
19
- from .modeling_qwen2_5_vl import RBLNQwen2_5_VisionTransformerPretrainedModel, RBLNQwen2_5_VLForConditionalGeneration
@@ -15,7 +15,7 @@
15
15
  from typing import Any, List, Optional, Union
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
- from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
18
+ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelConfig, RBLNDecoderOnlyModelForCausalLMConfig
19
19
 
20
20
 
21
21
  class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausalLMConfig):
@@ -56,6 +56,18 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
56
56
  self.visual = visual
57
57
 
58
58
 
59
+ class RBLNQwen2_5_VLModelConfig(RBLNDecoderOnlyModelConfig):
60
+ """
61
+ Configuration class for RBLNQwen2_5_VLModel.
62
+ """
63
+
64
+ submodules = ["visual"]
65
+
66
+ def __init__(self, visual: Optional[RBLNModelConfig] = None, **kwargs: Any):
67
+ super().__init__(**kwargs)
68
+ self.visual = self.initialize_submodule_config(submodule_config=visual)
69
+
70
+
59
71
  class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
60
72
  """
61
73
  Configuration class for RBLNQwen2_5_VisionTransformerPretrainedModel.