optimum-rbln 0.8.1a0__py3-none-any.whl → 0.8.1a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. optimum/rbln/__init__.py +2 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +53 -33
  4. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
  5. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
  6. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
  10. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +33 -9
  11. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -12
  12. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +22 -6
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +16 -6
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +16 -6
  15. optimum/rbln/diffusers/modeling_diffusers.py +16 -26
  16. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +11 -0
  17. optimum/rbln/diffusers/models/autoencoders/vae.py +1 -8
  18. optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -0
  19. optimum/rbln/diffusers/models/controlnet.py +13 -7
  20. optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
  21. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
  22. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +7 -0
  23. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
  24. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
  25. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
  26. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
  27. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
  28. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
  29. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
  30. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
  31. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
  32. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
  33. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
  34. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
  35. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
  36. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
  37. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
  38. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
  39. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
  40. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
  41. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
  42. optimum/rbln/modeling.py +33 -35
  43. optimum/rbln/modeling_base.py +45 -107
  44. optimum/rbln/transformers/__init__.py +39 -47
  45. optimum/rbln/transformers/configuration_generic.py +16 -13
  46. optimum/rbln/transformers/modeling_generic.py +18 -19
  47. optimum/rbln/transformers/modeling_rope_utils.py +5 -2
  48. optimum/rbln/transformers/models/__init__.py +46 -4
  49. optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
  50. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
  51. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
  52. optimum/rbln/transformers/models/auto/auto_factory.py +35 -12
  53. optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
  54. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +35 -4
  55. optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
  56. optimum/rbln/transformers/models/clip/modeling_clip.py +11 -12
  57. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
  58. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
  59. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +229 -175
  60. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  61. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +19 -0
  62. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +19 -0
  63. optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
  64. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  65. optimum/rbln/transformers/models/exaone/modeling_exaone.py +66 -5
  66. optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
  67. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  68. optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
  69. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  70. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
  71. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +106 -236
  72. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
  73. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
  74. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
  75. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
  76. optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
  77. optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
  78. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  79. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +32 -4
  80. optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
  81. optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
  82. optimum/rbln/transformers/models/midm/modeling_midm.py +66 -5
  83. optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
  84. optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
  85. optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
  86. optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
  87. optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
  88. optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
  89. optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
  90. optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
  91. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
  92. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
  93. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +15 -3
  94. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +58 -27
  95. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +47 -2
  96. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  97. optimum/rbln/transformers/models/resnet/configuration_resnet.py +20 -0
  98. optimum/rbln/transformers/models/resnet/modeling_resnet.py +22 -0
  99. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  100. optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +4 -30
  101. optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +2 -32
  102. optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
  103. optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
  104. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -1
  105. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
  106. optimum/rbln/transformers/models/siglip/configuration_siglip.py +3 -0
  107. optimum/rbln/transformers/models/siglip/modeling_siglip.py +62 -21
  108. optimum/rbln/transformers/models/t5/modeling_t5.py +46 -4
  109. optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
  110. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
  111. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +2 -2
  112. optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +14 -9
  113. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  114. optimum/rbln/transformers/models/vit/configuration_vit.py +19 -0
  115. optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
  116. optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
  117. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  118. optimum/rbln/transformers/models/whisper/configuration_whisper.py +3 -1
  119. optimum/rbln/transformers/models/whisper/modeling_whisper.py +35 -15
  120. optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
  121. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
  122. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
  123. optimum/rbln/utils/model_utils.py +20 -0
  124. optimum/rbln/utils/submodule.py +6 -8
  125. {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/METADATA +2 -2
  126. {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/RECORD +130 -117
  127. /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
  128. /optimum/rbln/transformers/models/wav2vec2/{configuration_wav2vec.py → configuration_wav2vec2.py} +0 -0
  129. {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/WHEEL +0 -0
  130. {optimum_rbln-0.8.1a0.dist-info → optimum_rbln-0.8.1a2.dist-info}/licenses/LICENSE +0 -0
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from transformers import PretrainedConfig
16
+
15
17
  from ....utils import logging
16
- from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM
18
+ from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyModelForCausalLMConfig
17
19
  from .qwen2_architecture import QWEN2Wrapper
18
20
 
19
21
 
@@ -22,13 +24,74 @@ logger = logging.get_logger(__name__)
22
24
 
23
25
  class RBLNQwen2ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
24
26
  """
25
- The Llama Model transformer with a language modeling head (linear layer) on top.
27
+ The Qwen2 Model transformer with a language modeling head (linear layer) on top.
26
28
  This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
27
29
 
28
- A class to convert and run pre-trained transformers based LlamaForCausalLM model on RBLN devices.
29
- It implements the methods to convert a pre-trained transformers LlamaForCausalLM model into a RBLN transformer model by:
30
+ A class to convert and run pre-trained transformers based Qwen2ForCausalLM model on RBLN devices.
31
+ It implements the methods to convert a pre-trained transformers Qwen2ForCausalLM model into a RBLN transformer model by:
30
32
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
31
33
  - compiling the resulting graph using the RBLN compiler.
34
+
35
+ **Configuration:**
36
+ This model uses [`RBLNQwen2ForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
37
+ the `rbln_config` parameter should be an instance of [`RBLNQwen2ForCausalLMConfig`] or a dictionary conforming to its structure.
38
+
39
+ See the [`RBLNQwen2ForCausalLMConfig`] class for all available configuration options.
40
+
41
+ Examples:
42
+ ```python
43
+ from optimum.rbln import RBLNQwen2ForCausalLM
44
+
45
+ # Simple usage using rbln_* arguments
46
+ # `max_seq_len` is automatically inferred from the model config
47
+ model = RBLNQwen2ForCausalLM.from_pretrained(
48
+ "Qwen/Qwen2-7B-Instruct",
49
+ export=True,
50
+ rbln_batch_size=1,
51
+ rbln_tensor_parallel_size=4,
52
+ )
53
+
54
+
55
+ # Using a config dictionary
56
+ rbln_config = {
57
+ "batch_size": 1,
58
+ "max_seq_len": 4096,
59
+ "tensor_parallel_size": 4,
60
+ }
61
+ model = RBLNQwen2ForCausalLM.from_pretrained(
62
+ "Qwen/Qwen2-7B-Instruct",
63
+ export=True,
64
+ rbln_config=rbln_config
65
+ )
66
+
67
+
68
+ # Using a RBLNQwen2ForCausalLMConfig instance (recommended for type checking)
69
+ from optimum.rbln import RBLNQwen2ForCausalLMConfig
70
+
71
+ config = RBLNQwen2ForCausalLMConfig(
72
+ batch_size=1,
73
+ max_seq_len=4096,
74
+ tensor_parallel_size=4
75
+ )
76
+ model = RBLNQwen2ForCausalLM.from_pretrained(
77
+ "Qwen/Qwen2-7B-Instruct",
78
+ export=True,
79
+ rbln_config=config
80
+ )
81
+ ```
32
82
  """
33
83
 
34
84
  _decoder_wrapper_cls = QWEN2Wrapper
85
+
86
+ @classmethod
87
+ def _update_sliding_window_config(
88
+ cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
89
+ ):
90
+ # https://github.com/huggingface/transformers/issues/35896
91
+ # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
92
+ # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
93
+
94
+ rbln_config.cache_impl = "sliding_window"
95
+ rbln_config.sliding_window = model_config.sliding_window
96
+ rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
97
+ return rbln_config
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import List, Optional, Union
15
+ from typing import Any, Dict, List, Optional, Union
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
@@ -25,7 +25,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
25
25
  self,
26
26
  visual: Optional[RBLNModelConfig] = None,
27
27
  use_inputs_embeds: bool = True,
28
- **kwargs,
28
+ **kwargs: Dict[str, Any],
29
29
  ):
30
30
  super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
31
31
  if not self.use_inputs_embeds:
@@ -37,7 +37,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
37
37
 
38
38
 
39
39
  class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
40
- def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs):
40
+ def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs: Dict[str, Any]):
41
41
  """
42
42
  Args:
43
43
  max_seq_lens (Optional[Union[int, List[int]]]): Maximum sequence lengths for Vision
@@ -54,6 +54,18 @@ class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
54
54
 
55
55
  Raises:
56
56
  ValueError: If batch_size is not a positive integer.
57
+
58
+ Max Seq Lens:
59
+ Since `Qwen2_5_VLForConditionalGeneration` performs inference on a per-image or per-frame basis,
60
+ `max_seq_lens` should be set based on the maximum expected resolution of the input images or video frames,
61
+ according to the following guidelines:
62
+
63
+ 1. **Minimum Value**: `max_seq_lens` must be greater than or equal to the number of patches generated from the input image.
64
+ For example, a 224x224 image with a patch size of 14 results in (224 / 14) * (224 / 14) = 256 patches.
65
+ Therefore, `max_seq_lens` must be at least 256.
66
+ 2. **Alignment Requirement**: `max_seq_lens` must be a multiple of `(window_size / patch_size)^2` due to the requirements
67
+ of the window-based attention mechanism. For instance, if `window_size` is 112 and `patch_size` is 14, then
68
+ `(112 / 14)^2 = 64`, meaning valid values for `max_seq_lens` include 64, 128, 192, 256, etc.
57
69
  """
58
70
  super().__init__(**kwargs)
59
71
 
@@ -28,6 +28,7 @@ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
28
28
  Qwen2_5_VisionPatchEmbed,
29
29
  Qwen2_5_VisionRotaryEmbedding,
30
30
  Qwen2_5_VisionTransformerPretrainedModel,
31
+ Qwen2_5_VLModel,
31
32
  Qwen2_5_VLRotaryEmbedding,
32
33
  )
33
34
 
@@ -37,6 +38,7 @@ from ....utils.logging import get_logger
37
38
  from ..decoderonly.modeling_decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyOutput
38
39
  from .configuration_qwen2_5_vl import (
39
40
  RBLNQwen2_5_VisionTransformerPretrainedModelConfig,
41
+ RBLNQwen2_5_VLForConditionalGenerationConfig,
40
42
  )
41
43
  from .qwen2_5_vl_architecture import Qwen2_5_VisionTransformerWrapper, Qwen2_5_VL_LanguageModelWrapper
42
44
 
@@ -338,6 +340,40 @@ class RBLNQwen2_5_VisionTransformerPretrainedModel(RBLNModel):
338
340
 
339
341
 
340
342
  class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
343
+ """
344
+ RBLNQwen2_5_VLForConditionalGeneration is a multi-modal model that integrates vision and language processing capabilities,
345
+ optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
346
+
347
+ This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
348
+
349
+ Important Note:
350
+ This model includes a Large Language Model (LLM). For optimal performance, it is highly recommended to use
351
+ tensor parallelism for the language model. This can be achieved by using the `rbln_config` parameter in the
352
+ `from_pretrained` method. Refer to the `from_pretrained` documentation and the RBLNQwen2_5_VLForConditionalGenerationConfig class for details.
353
+
354
+ Examples:
355
+ ```python
356
+ from optimum.rbln import RBLNQwen2_5_VLForConditionalGeneration
357
+
358
+ model = RBLNQwen2_5_VLForConditionalGeneration.from_pretrained(
359
+ "Qwen/Qwen2.5-VL-7B-Instruct",
360
+ export=True,
361
+ rbln_config={
362
+ "visual": {
363
+ "max_seq_lens": 6400,
364
+ "device": 0,
365
+ },
366
+ "tensor_parallel_size": 8,
367
+ "kvcache_partition_len": 16_384,
368
+ "max_seq_len": 114_688,
369
+ "device": [0, 1, 2, 3, 4, 5, 6, 7],
370
+ },
371
+ )
372
+
373
+ model.save_pretrained("compiled-qwen2.5-vl-7b-instruct")
374
+ ```
375
+ """
376
+
341
377
  auto_model_class = AutoModelForVision2Seq
342
378
  _rbln_submodules = [
343
379
  {"name": "visual"},
@@ -355,6 +391,14 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
355
391
  def can_generate(self):
356
392
  return True
357
393
 
394
+ @classmethod
395
+ def get_pytorch_model(cls, *args, **kwargs):
396
+ model = super().get_pytorch_model(*args, **kwargs)
397
+ model.model.lm_head = model.lm_head
398
+ model.lm_head = None
399
+ del model.lm_head
400
+ return model
401
+
358
402
  @classmethod
359
403
  def update_kwargs(cls, kwargs):
360
404
  kwargs.update(
@@ -369,33 +413,19 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
369
413
  cls,
370
414
  batch_size: int,
371
415
  query_length: int,
372
- use_inputs_embeds: bool,
373
- use_attention_mask: bool,
374
- use_position_ids: bool,
375
- max_seq_len: int,
376
- kvcache_block_size: int,
377
- kvcache_num_blocks: int,
378
- num_key_value_heads: int,
379
- num_hidden_layers: int,
380
- hidden_size: int,
381
- head_dim: int,
416
+ rbln_config: RBLNQwen2_5_VLForConditionalGenerationConfig,
417
+ model_config: PretrainedConfig,
382
418
  ):
383
- input_info = super().get_input_info(
384
- batch_size,
385
- query_length,
386
- use_inputs_embeds,
387
- use_attention_mask,
388
- use_position_ids,
389
- max_seq_len,
390
- kvcache_block_size,
391
- kvcache_num_blocks,
392
- num_key_value_heads,
393
- num_hidden_layers,
394
- hidden_size,
395
- head_dim,
396
- )
419
+ input_info = super().get_input_info(batch_size, query_length, rbln_config, model_config)
397
420
  pos_idx = 3
398
- input_info.insert(pos_idx, ("position_emb", [2, batch_size, 1, query_length, head_dim], "float32"))
421
+ input_info.insert(
422
+ pos_idx,
423
+ (
424
+ "position_emb",
425
+ [2, batch_size, 1, query_length, model_config.hidden_size // model_config.num_attention_heads],
426
+ "float32",
427
+ ),
428
+ )
399
429
 
400
430
  return input_info
401
431
 
@@ -510,7 +540,8 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
510
540
  vision_tokens = input_id[0][vision_start_indices + 1]
511
541
  image_nums = (vision_tokens == image_token_id).sum()
512
542
  video_nums = (vision_tokens == video_token_id).sum()
513
- position_ids, rope_deltas = self.get_rope_index(
543
+ position_ids, rope_deltas = Qwen2_5_VLModel.get_rope_index(
544
+ self,
514
545
  input_id,
515
546
  image_grid_thw[image_idx : image_idx + image_nums] if image_grid_thw is not None else None,
516
547
  video_grid_thw[video_idx : video_idx + video_nums] if video_grid_thw is not None else None,
@@ -595,7 +626,7 @@ class RBLNQwen2_5_VLForConditionalGeneration(RBLNDecoderOnlyModelForCausalLM):
595
626
  )
596
627
  logits.append(output.logits)
597
628
  logits = torch.cat(logits, dim=0)
598
- # Decoder
629
+ # Decoder
599
630
  else:
600
631
  inputs_embeds, position_embed = self._preprocess_decoder(input_ids, cache_position)
601
632
  output = self.decoder(
@@ -3,8 +3,14 @@ from typing import Tuple
3
3
 
4
4
  import torch
5
5
  import torch.nn as nn
6
+ from transformers import PreTrainedModel
6
7
 
7
8
  from ..decoderonly.decoderonly_architecture import (
9
+ DecoderOnlyAttention,
10
+ DecoderOnlyFlashAttention,
11
+ DecoderOnlyForCausalLM,
12
+ DecoderOnlyLayer,
13
+ DecoderOnlyModel,
8
14
  DecoderOnlyWrapper,
9
15
  apply_rotary_pos_emb,
10
16
  )
@@ -162,7 +168,8 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
162
168
  input_ids = None if self.use_inputs_embeds else args.pop(0)
163
169
  inputs_embeds = args.pop(0) if self.use_inputs_embeds else None
164
170
  cache_position = args.pop(0)
165
- block_tables = args.pop(0)
171
+ global_block_tables = args.pop(0)
172
+ local_block_tables = None
166
173
  position_embeds = args.pop(0)
167
174
  query_position = args.pop(0) if self.phase == "prefill" else None
168
175
  position_ids = None
@@ -188,10 +195,48 @@ class Qwen2_5_VL_LanguageModelWrapper(DecoderOnlyWrapper):
188
195
  input_ids,
189
196
  inputs_embeds,
190
197
  cache_position,
191
- block_tables,
198
+ global_block_tables,
199
+ local_block_tables,
192
200
  query_position,
193
201
  attention_mask,
194
202
  position_ids,
195
203
  past_key_values,
196
204
  position_embeds,
197
205
  )
206
+
207
+ def convert_to_rbln_causal_lm(self, causal_lm: PreTrainedModel, max_seq_len: int):
208
+ new_layers = []
209
+
210
+ for layer in causal_lm.model.language_model.layers:
211
+ if self.attn_impl == "eager":
212
+ new_self_attn = DecoderOnlyAttention(
213
+ layer.self_attn,
214
+ self.use_attention_mask,
215
+ self.use_position_ids,
216
+ kvcache_block_size=self.kvcache_block_size,
217
+ )
218
+ elif self.attn_impl == "flash_attn":
219
+ new_self_attn = DecoderOnlyFlashAttention(
220
+ layer.self_attn,
221
+ kvcache_partition_len=self.kvcache_partition_len,
222
+ kvcache_block_size=self.kvcache_block_size,
223
+ use_attention_mask=self.use_attention_mask,
224
+ use_position_ids=self.use_position_ids,
225
+ )
226
+ else:
227
+ raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
228
+
229
+ new_layer = DecoderOnlyLayer(layer, new_self_attn)
230
+ new_layers.append(new_layer)
231
+
232
+ new_model = DecoderOnlyModel(
233
+ causal_lm.model.language_model,
234
+ new_layers,
235
+ partition_len=self.kvcache_partition_len,
236
+ max_seq_len=max_seq_len,
237
+ kvcache_block_size=self.kvcache_block_size,
238
+ use_learned_pos_emb=self.use_learned_pos_emb,
239
+ sliding_window_layers=self.sliding_window_layers,
240
+ )
241
+ new_causal_lm = DecoderOnlyForCausalLM(causal_lm.model, new_model)
242
+ return new_causal_lm
@@ -0,0 +1,23 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from .configuration_resnet import RBLNResNetForImageClassificationConfig
17
+ from .modeling_resnet import RBLNResNetForImageClassification
18
+
19
+
20
+ __all__ = [
21
+ "RBLNResNetForImageClassificationConfig",
22
+ "RBLNResNetForImageClassification",
23
+ ]
@@ -0,0 +1,20 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ...configuration_generic import RBLNModelForImageClassificationConfig
17
+
18
+
19
+ class RBLNResNetForImageClassificationConfig(RBLNModelForImageClassificationConfig):
20
+ ""
@@ -0,0 +1,22 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ...modeling_generic import RBLNModelForImageClassification
17
+
18
+
19
+ class RBLNResNetForImageClassification(RBLNModelForImageClassification):
20
+ """
21
+ ResNet model for image classification tasks on RBLN NPU.
22
+ """
@@ -0,0 +1,24 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .configuration_roberta import RBLNRobertaForMaskedLMConfig, RBLNRobertaForSequenceClassificationConfig
16
+ from .modeling_roberta import RBLNRobertaForMaskedLM, RBLNRobertaForSequenceClassification
17
+
18
+
19
+ __all__ = [
20
+ "RBLNRobertaForMaskedLMConfig",
21
+ "RBLNRobertaForSequenceClassificationConfig",
22
+ "RBLNRobertaForMaskedLM",
23
+ "RBLNRobertaForSequenceClassification",
24
+ ]
@@ -12,38 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .configuration_generic import (
16
- RBLNModelForAudioClassificationConfig,
17
- RBLNModelForImageClassificationConfig,
18
- RBLNModelForMaskedLMConfig,
19
- RBLNModelForQuestionAnsweringConfig,
20
- RBLNModelForSequenceClassificationConfig,
21
- )
22
-
23
-
24
- class RBLNASTForAudioClassificationConfig(RBLNModelForAudioClassificationConfig):
25
- pass
26
-
27
-
28
- class RBLNDistilBertForQuestionAnsweringConfig(RBLNModelForQuestionAnsweringConfig):
29
- pass
30
-
31
-
32
- class RBLNResNetForImageClassificationConfig(RBLNModelForImageClassificationConfig):
33
- pass
34
-
35
-
36
- class RBLNXLMRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
37
- pass
38
-
39
-
40
- class RBLNRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
41
- pass
15
+ from ...configuration_generic import RBLNModelForMaskedLMConfig, RBLNModelForSequenceClassificationConfig
42
16
 
43
17
 
44
18
  class RBLNRobertaForMaskedLMConfig(RBLNModelForMaskedLMConfig):
45
- pass
19
+ ""
46
20
 
47
21
 
48
- class RBLNViTForImageClassificationConfig(RBLNModelForImageClassificationConfig):
49
- pass
22
+ class RBLNRobertaForSequenceClassificationConfig(RBLNModelForSequenceClassificationConfig):
23
+ ""
@@ -12,42 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ..utils.logging import get_logger
16
- from .modeling_generic import (
17
- RBLNModelForAudioClassification,
18
- RBLNModelForImageClassification,
19
- RBLNModelForMaskedLM,
20
- RBLNModelForQuestionAnswering,
21
- RBLNModelForSequenceClassification,
22
- )
15
+ from ...modeling_generic import RBLNModelForMaskedLM, RBLNModelForSequenceClassification
23
16
 
24
17
 
25
- logger = get_logger()
26
-
27
-
28
- class RBLNASTForAudioClassification(RBLNModelForAudioClassification):
29
- pass
30
-
31
-
32
- class RBLNDistilBertForQuestionAnswering(RBLNModelForQuestionAnswering):
33
- rbln_model_input_names = ["input_ids", "attention_mask"]
34
-
35
-
36
- class RBLNResNetForImageClassification(RBLNModelForImageClassification):
37
- pass
38
-
39
-
40
- class RBLNXLMRobertaForSequenceClassification(RBLNModelForSequenceClassification):
18
+ class RBLNRobertaForMaskedLM(RBLNModelForMaskedLM):
41
19
  rbln_model_input_names = ["input_ids", "attention_mask"]
42
20
 
43
21
 
44
22
  class RBLNRobertaForSequenceClassification(RBLNModelForSequenceClassification):
45
23
  rbln_model_input_names = ["input_ids", "attention_mask"]
46
-
47
-
48
- class RBLNRobertaForMaskedLM(RBLNModelForMaskedLM):
49
- rbln_model_input_names = ["input_ids", "attention_mask"]
50
-
51
-
52
- class RBLNViTForImageClassification(RBLNModelForImageClassification):
53
- pass
@@ -12,5 +12,5 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .configuration_seq2seq2 import RBLNModelForSeq2SeqLMConfig
15
+ from .configuration_seq2seq import RBLNModelForSeq2SeqLMConfig
16
16
  from .modeling_seq2seq import RBLNModelForSeq2SeqLM
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Any, Dict, Optional
16
16
 
17
17
  import rebel
18
18
 
@@ -31,7 +31,7 @@ class RBLNModelForSeq2SeqLMConfig(RBLNModelConfig):
31
31
  dec_max_seq_len: Optional[int] = None,
32
32
  use_attention_mask: Optional[bool] = None,
33
33
  pad_token_id: Optional[int] = None,
34
- **kwargs,
34
+ **kwargs: Dict[str, Any],
35
35
  ):
36
36
  """
37
37
  Args:
@@ -26,7 +26,7 @@ from ....configuration_utils import RBLNCompileConfig
26
26
  from ....modeling import RBLNModel
27
27
  from ....utils.logging import get_logger
28
28
  from ....utils.runtime_utils import RBLNPytorchRuntime
29
- from .configuration_seq2seq2 import RBLNModelForSeq2SeqLMConfig
29
+ from .configuration_seq2seq import RBLNModelForSeq2SeqLMConfig
30
30
 
31
31
 
32
32
  logger = get_logger(__name__)
@@ -148,7 +148,8 @@ class Seq2SeqDecoderWrapper(nn.Module):
148
148
  new_layers = []
149
149
  for layer in model.get_decoder().layers:
150
150
  self_attn = Seq2SeqSelfAttention(layer.self_attn)
151
- new_layers.append(Seq2SeqDecoderLayer(layer, self_attn))
151
+ cross_attn = Seq2SeqCrossAttention(layer.encoder_attn)
152
+ new_layers.append(Seq2SeqDecoderLayer(layer, self_attn, cross_attn))
152
153
 
153
154
  decoder_model = Seq2SeqDecoder(model.get_decoder(), new_layers)
154
155
  new_model = Seq2SeqForConditionalGeneration(model, decoder_model)
@@ -341,10 +342,11 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
341
342
  self_attn (Seq2SeqSelfAttention): Modified self-attention layer optimized for RBLN
342
343
  """
343
344
 
344
- def __init__(self, decoder_layer, self_attn):
345
+ def __init__(self, decoder_layer, self_attn, cross_attn):
345
346
  super().__init__()
346
347
  self._original_mod = decoder_layer
347
348
  self.self_attn = self_attn
349
+ self.cross_attn = cross_attn
348
350
  self.__post_init__()
349
351
 
350
352
  def __post_init__(self, **kwargs):
@@ -402,7 +404,8 @@ class Seq2SeqDecoderLayer(torch.nn.Module):
402
404
  # Cross-Attention Block
403
405
  residual = hidden_states
404
406
  hidden_states = self.pre_cross_attn_layer_norm(hidden_states)
405
- cross_attn_output = self.encoder_attn(
407
+
408
+ cross_attn_output = self.cross_attn(
406
409
  hidden_states=hidden_states,
407
410
  past_key_value=cross_past_key_value,
408
411
  attention_mask=encoder_attention_mask,
@@ -487,3 +490,38 @@ class Seq2SeqSelfAttention(nn.Module):
487
490
  attn_output = self.out_proj(attn_output)
488
491
 
489
492
  return attn_output
493
+
494
+
495
+ class Seq2SeqCrossAttention(nn.Module):
496
+ def __init__(self, attn, **kwargs):
497
+ super().__init__()
498
+ self._original_mod = attn
499
+ self.__post_init__(**kwargs)
500
+
501
+ def forward(
502
+ self,
503
+ hidden_states: torch.Tensor,
504
+ key_value_states: torch.Tensor = None,
505
+ past_key_value: Optional[object] = None,
506
+ attention_mask: Optional[torch.Tensor] = None,
507
+ ):
508
+ bsz, tgt_len, _ = hidden_states.size()
509
+ query_states = self.q_proj(hidden_states).view(bsz, -1, self.num_heads, self.head_dim).transpose(1, 2)
510
+
511
+ is_cross_attention = key_value_states is not None
512
+ if is_cross_attention:
513
+ key_states = past_key_value[0]
514
+ value_states = past_key_value[1]
515
+
516
+ attn_output = torch.nn.functional.scaled_dot_product_attention(
517
+ query_states,
518
+ key_states,
519
+ value_states,
520
+ attn_mask=attention_mask,
521
+ )
522
+
523
+ attn_output = attn_output.transpose(1, 2).contiguous()
524
+ attn_output = attn_output.view(bsz, tgt_len, self.embed_dim)
525
+ attn_output = self.out_proj(attn_output)
526
+
527
+ return attn_output, None, past_key_value
@@ -24,6 +24,7 @@ class RBLNSiglipVisionModelConfig(RBLNModelConfig):
24
24
  image_size: Optional[int] = None,
25
25
  interpolate_pos_encoding: Optional[bool] = None,
26
26
  output_hidden_states: Optional[bool] = None,
27
+ output_attentions: Optional[bool] = None,
27
28
  **kwargs,
28
29
  ):
29
30
  """
@@ -33,6 +34,7 @@ class RBLNSiglipVisionModelConfig(RBLNModelConfig):
33
34
  a tuple/list (height, width), or a dictionary with 'height' and 'width' keys.
34
35
  interpolate_pos_encoding (Optional[bool]): Whether to interpolate the position encoding.
35
36
  output_hidden_states: (Optional[bool]): Whether to return hidden states.
37
+ output_attentions: (Optional[bool]): Whether to return attentions.
36
38
  **kwargs: Additional arguments passed to the parent RBLNModelConfig.
37
39
 
38
40
  Raises:
@@ -46,6 +48,7 @@ class RBLNSiglipVisionModelConfig(RBLNModelConfig):
46
48
  self.image_size = image_size
47
49
  self.interpolate_pos_encoding = interpolate_pos_encoding or False
48
50
  self.output_hidden_states = output_hidden_states
51
+ self.output_attentions = output_attentions
49
52
 
50
53
  @property
51
54
  def image_width(self):