optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. optimum/rbln/__init__.py +2 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +45 -33
  4. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
  5. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
  6. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
  10. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +33 -9
  11. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -12
  12. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +22 -6
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +16 -6
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +16 -6
  15. optimum/rbln/diffusers/modeling_diffusers.py +16 -26
  16. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +11 -0
  17. optimum/rbln/diffusers/models/autoencoders/vae.py +1 -8
  18. optimum/rbln/diffusers/models/autoencoders/vq_model.py +11 -0
  19. optimum/rbln/diffusers/models/controlnet.py +13 -7
  20. optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
  21. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
  22. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +7 -0
  23. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
  24. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
  25. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
  26. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
  27. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
  28. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
  29. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
  30. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
  31. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
  32. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
  33. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
  34. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
  35. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
  36. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
  37. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
  38. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
  39. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
  40. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
  41. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
  42. optimum/rbln/modeling.py +33 -35
  43. optimum/rbln/modeling_base.py +45 -107
  44. optimum/rbln/transformers/__init__.py +39 -47
  45. optimum/rbln/transformers/configuration_generic.py +16 -13
  46. optimum/rbln/transformers/modeling_generic.py +18 -19
  47. optimum/rbln/transformers/modeling_rope_utils.py +1 -1
  48. optimum/rbln/transformers/models/__init__.py +46 -4
  49. optimum/rbln/transformers/models/audio_spectrogram_transformer/__init__.py +17 -0
  50. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
  51. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
  52. optimum/rbln/transformers/models/auto/auto_factory.py +30 -12
  53. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +35 -4
  54. optimum/rbln/transformers/models/clip/configuration_clip.py +3 -3
  55. optimum/rbln/transformers/models/clip/modeling_clip.py +11 -12
  56. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
  57. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
  58. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +231 -175
  59. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  60. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +19 -0
  61. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +19 -0
  62. optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
  63. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  64. optimum/rbln/transformers/models/exaone/modeling_exaone.py +51 -5
  65. optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
  66. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  67. optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
  68. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  69. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
  70. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +87 -236
  71. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
  72. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
  73. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
  74. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
  75. optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
  76. optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
  77. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  78. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +33 -4
  79. optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
  80. optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
  81. optimum/rbln/transformers/models/midm/modeling_midm.py +51 -5
  82. optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
  83. optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
  84. optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
  85. optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
  86. optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
  87. optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
  88. optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
  89. optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
  90. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
  91. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
  92. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +15 -3
  93. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +46 -25
  94. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +4 -2
  95. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  96. optimum/rbln/transformers/models/resnet/configuration_resnet.py +20 -0
  97. optimum/rbln/transformers/models/resnet/modeling_resnet.py +22 -0
  98. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  99. optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +4 -30
  100. optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +2 -32
  101. optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
  102. optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
  103. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +1 -1
  104. optimum/rbln/transformers/models/siglip/configuration_siglip.py +3 -0
  105. optimum/rbln/transformers/models/siglip/modeling_siglip.py +62 -21
  106. optimum/rbln/transformers/models/t5/modeling_t5.py +46 -4
  107. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
  108. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +2 -2
  109. optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +14 -9
  110. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  111. optimum/rbln/transformers/models/vit/configuration_vit.py +19 -0
  112. optimum/rbln/transformers/models/vit/modeling_vit.py +19 -0
  113. optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
  114. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  115. optimum/rbln/transformers/models/whisper/configuration_whisper.py +3 -1
  116. optimum/rbln/transformers/models/whisper/modeling_whisper.py +35 -15
  117. optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
  118. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
  119. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
  120. optimum/rbln/utils/model_utils.py +20 -0
  121. optimum/rbln/utils/submodule.py +6 -8
  122. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/METADATA +1 -1
  123. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/RECORD +127 -114
  124. /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
  125. /optimum/rbln/transformers/models/wav2vec2/{configuration_wav2vec.py → configuration_wav2vec2.py} +0 -0
  126. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/WHEEL +0 -0
  127. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1a1.dist-info}/licenses/LICENSE +0 -0
@@ -27,8 +27,57 @@ class RBLNLlamaForCausalLM(RBLNDecoderOnlyModelForCausalLM):
27
27
 
28
28
  A class to convert and run pre-trained transformers based LlamaForCausalLM model on RBLN devices.
29
29
  It implements the methods to convert a pre-trained transformers LlamaForCausalLM model into a RBLN transformer model by:
30
+
30
31
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
31
32
  - compiling the resulting graph using the RBLN compiler.
33
+
34
+ **Configuration:**
35
+ This model uses [`RBLNLlamaForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
36
+ the `rbln_config` parameter should be an instance of [`RBLNLlamaForCausalLMConfig`] or a dictionary conforming to its structure.
37
+
38
+ See the [`RBLNLlamaForCausalLMConfig`] class for all available configuration options.
39
+
40
+ Examples:
41
+ ```python
42
+ from optimum.rbln import RBLNLlamaForCausalLM
43
+
44
+ # Simple usage using rbln_* arguments
45
+ # `max_seq_len` is automatically inferred from the model config
46
+ model = RBLNLlamaForCausalLM.from_pretrained(
47
+ "meta-llama/Llama-2-7b-hf",
48
+ export=True,
49
+ rbln_batch_size=1,
50
+ rbln_tensor_parallel_size=4,
51
+ )
52
+
53
+
54
+ # Using a config dictionary
55
+ rbln_config = {
56
+ "batch_size": 1,
57
+ "max_seq_len": 4096,
58
+ "tensor_parallel_size": 4,
59
+ }
60
+ model = RBLNLlamaForCausalLM.from_pretrained(
61
+ "meta-llama/Llama-2-7b-hf",
62
+ export=True,
63
+ rbln_config=rbln_config
64
+ )
65
+
66
+
67
+ # Using a RBLNLlamaForCausalLMConfig instance (recommended for type checking)
68
+ from optimum.rbln import RBLNLlamaForCausalLMConfig
69
+
70
+ config = RBLNLlamaForCausalLMConfig(
71
+ batch_size=1,
72
+ max_seq_len=4096,
73
+ tensor_parallel_size=4
74
+ )
75
+ model = RBLNLlamaForCausalLM.from_pretrained(
76
+ "meta-llama/Llama-2-7b-hf",
77
+ export=True,
78
+ rbln_config=config
79
+ )
80
+ ```
32
81
  """
33
82
 
34
83
  _decoder_wrapper_cls = LlamaWrapper
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional
15
+ from typing import Any, Dict, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
@@ -25,7 +25,7 @@ class RBLNLlavaNextForConditionalGenerationConfig(RBLNModelConfig):
25
25
  batch_size: Optional[int] = None,
26
26
  vision_tower: Optional[RBLNModelConfig] = None,
27
27
  language_model: Optional[RBLNModelConfig] = None,
28
- **kwargs,
28
+ **kwargs: Dict[str, Any],
29
29
  ):
30
30
  """
31
31
  Args:
@@ -109,6 +109,36 @@ class LoopProjector:
109
109
 
110
110
 
111
111
  class RBLNLlavaNextForConditionalGeneration(RBLNModel):
112
+ """
113
+ RBLNLlavaNextForConditionalGeneration is a multi-modal model that combines vision and language processing capabilities,
114
+ optimized for RBLN NPUs. It is designed for conditional generation tasks that involve both image and text inputs.
115
+
116
+ This model inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods the library implements for all its models.
117
+
118
+ Important Note:
119
+ This model includes a Large Language Model (LLM) as a submodule. For optimal performance, it is highly recommended to use
120
+ tensor parallelism for the language model. This can be achieved by using the `rbln_config` parameter in the
121
+ `from_pretrained` method. Refer to the `from_pretrained` documentation and the RBLNLlavaNextForConditionalGenerationConfig class for details.
122
+
123
+ Examples:
124
+ ```python
125
+ from optimum.rbln import RBLNLlavaNextForConditionalGeneration
126
+
127
+ model = RBLNLlavaNextForConditionalGeneration.from_pretrained(
128
+ "llava-hf/llava-v1.6-mistral-7b-hf",
129
+ export=True,
130
+ rbln_config={
131
+ "language_model": {
132
+ "tensor_parallel_size": 4,
133
+ "use_inputs_embeds": True, # In Llava-Next, language model must use inputs_embeds as input.
134
+ },
135
+ },
136
+ )
137
+
138
+ model.save_pretrained("compiled-llava-next-mistral-7b-hf")
139
+ ```
140
+ """
141
+
112
142
  auto_model_class = AutoModelForVision2Seq
113
143
  _rbln_submodules = [
114
144
  {"name": "vision_tower"},
@@ -136,10 +166,9 @@ class RBLNLlavaNextForConditionalGeneration(RBLNModel):
136
166
  subfolder: str,
137
167
  rbln_config: RBLNModelConfig,
138
168
  ):
139
- """
140
- If you are unavoidably running on a CPU rather than an RBLN device,
141
- store the torch tensor, weight, etc. in this function.
142
- """
169
+ # If you are unavoidably running on a CPU rather than an RBLN device,
170
+ # store the torch tensor, weight, etc. in this function.
171
+
143
172
  save_dict = {}
144
173
  save_dict["image_newline"] = model.image_newline
145
174
  torch.save(save_dict, save_dir_path / subfolder / "torch_artifacts.pth")
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
16
16
 
17
17
 
18
18
  class RBLNMidmLMHeadModelConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
- pass
19
+ """
20
+ Configuration class for MIDM models.
21
+
22
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
23
+
24
+ Example usage:
25
+ ```python
26
+ from optimum.rbln import RBLNMidmLMHeadModel, RBLNMidmLMHeadModelConfig
27
+
28
+ # Create a configuration object
29
+ config = RBLNMidmLMHeadModelConfig(
30
+ batch_size=1,
31
+ max_seq_len=4096,
32
+ tensor_parallel_size=4
33
+ )
34
+
35
+ # Use the configuration with from_pretrained
36
+ model = RBLNMidmLMHeadModel.from_pretrained(
37
+ "KT-AI/midm-bitext-S-7B-inst-v1",
38
+ export=True,
39
+ rbln_config=config
40
+ )
41
+ ```
42
+ """
@@ -68,7 +68,12 @@ class MidmLMHeadModelWrapper(DecoderOnlyWrapper):
68
68
  )
69
69
  new_layer = MidmLayer(layer, new_self_attn)
70
70
  new_layers.append(new_layer)
71
- new_model = MidmModel(causal_lm.transformer, new_layers, max_seq_len=max_seq_len)
71
+ new_model = MidmModel(
72
+ causal_lm.transformer,
73
+ new_layers,
74
+ max_seq_len=max_seq_len,
75
+ sliding_window_layers=self.sliding_window_layers,
76
+ )
72
77
  new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
73
78
  return new_causal_lm
74
79
 
@@ -24,16 +24,62 @@ logger = logging.get_logger(__name__)
24
24
 
25
25
  class RBLNMidmLMHeadModel(RBLNDecoderOnlyModelForCausalLM):
26
26
  """
27
- The Midm Model transformer with a language modeling head on top (linear layer with weights tied to the input
28
- embeddings).
27
+ The MIDM Model transformer with a language modeling head (linear layer) on top.
28
+ This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
29
29
 
30
- This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the
31
- library implements for all its model.
30
+ A class to convert and run pre-trained transformers based MidmForCausalLM model on RBLN devices.
31
+ It implements the methods to convert a pre-trained transformers MidmForCausalLM model into a RBLN transformer model by:
32
32
 
33
- It implements the methods to convert a pre-trained transformers Midm model into a RBLN transformer model by:
34
33
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
35
34
  - compiling the resulting graph using the RBLN compiler.
36
35
 
36
+ **Configuration:**
37
+ This model uses [`RBLNMidmLMHeadModelConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
38
+ the `rbln_config` parameter should be an instance of [`RBLNMidmLMHeadModelConfig`] or a dictionary conforming to its structure.
39
+
40
+ See the [`RBLNMidmLMHeadModelConfig`] class for all available configuration options.
41
+
42
+ Examples:
43
+ ```python
44
+ from optimum.rbln import RBLNMidmLMHeadModel
45
+
46
+ # Simple usage using rbln_* arguments
47
+ # `max_seq_len` is automatically inferred from the model config
48
+ model = RBLNMidmLMHeadModel.from_pretrained(
49
+ "KT-AI/midm-bitext-S-7B-inst-v1",
50
+ export=True,
51
+ rbln_batch_size=1,
52
+ rbln_tensor_parallel_size=4,
53
+ )
54
+
55
+
56
+ # Using a config dictionary
57
+ rbln_config = {
58
+ "batch_size": 1,
59
+ "max_seq_len": 4096,
60
+ "tensor_parallel_size": 4,
61
+ }
62
+ model = RBLNMidmLMHeadModel.from_pretrained(
63
+ "KT-AI/midm-bitext-S-7B-inst-v1",
64
+ export=True,
65
+ rbln_config=rbln_config
66
+ )
67
+
68
+
69
+ # Using a RBLNMidmLMHeadModelConfig instance (recommended for type checking)
70
+ from optimum.rbln import RBLNMidmLMHeadModelConfig
71
+
72
+ config = RBLNMidmLMHeadModelConfig(
73
+ batch_size=1,
74
+ max_seq_len=4096,
75
+ tensor_parallel_size=4
76
+ )
77
+ model = RBLNMidmLMHeadModel.from_pretrained(
78
+ "KT-AI/midm-bitext-S-7B-inst-v1",
79
+ export=True,
80
+ rbln_config=config
81
+ )
82
+ ```
37
83
  """
38
84
 
39
85
  _decoder_wrapper_cls = MidmLMHeadModelWrapper
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
16
16
 
17
17
 
18
18
  class RBLNMistralForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
- pass
19
+ """
20
+ Configuration class for RBLN Mistral models.
21
+
22
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
23
+
24
+ Example usage:
25
+ ```python
26
+ from optimum.rbln import RBLNMistralForCausalLM, RBLNMistralForCausalLMConfig
27
+
28
+ # Create a configuration object
29
+ config = RBLNMistralForCausalLMConfig(
30
+ batch_size=1,
31
+ max_seq_len=4096,
32
+ tensor_parallel_size=4
33
+ )
34
+
35
+ # Use the configuration with from_pretrained
36
+ model = RBLNMistralForCausalLM.from_pretrained(
37
+ "mistralai/Mistral-7B-v0.1",
38
+ export=True,
39
+ rbln_config=config
40
+ )
41
+ ```
42
+ """
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from transformers import PretrainedConfig
16
+
15
17
  from ....utils import logging
16
- from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM
18
+ from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyModelForCausalLMConfig
17
19
  from .mistral_architecture import MistralForCausalLMWrapper
18
20
 
19
21
 
@@ -22,13 +24,69 @@ logger = logging.get_logger(__name__)
22
24
 
23
25
  class RBLNMistralForCausalLM(RBLNDecoderOnlyModelForCausalLM):
24
26
  """
25
- The Llama Model transformer with a language modeling head (linear layer) on top.
27
+ The Mistral Model transformer with a language modeling head (linear layer) on top.
26
28
  This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
27
29
 
28
- A class to convert and run pre-trained transformers based LlamaForCausalLM model on RBLN devices.
29
- It implements the methods to convert a pre-trained transformers LlamaForCausalLM model into a RBLN transformer model by:
30
+ A class to convert and run pre-trained transformers based MistralForCausalLM model on RBLN devices.
31
+ It implements the methods to convert a pre-trained transformers MistralForCausalLM model into a RBLN transformer model by:
30
32
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
31
33
  - compiling the resulting graph using the RBLN compiler.
34
+
35
+ **Configuration:**
36
+ This model uses [`RBLNMistralForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
37
+ the `rbln_config` parameter should be an instance of [`RBLNMistralForCausalLMConfig`] or a dictionary conforming to its structure.
38
+
39
+ See the [`RBLNMistralForCausalLMConfig`] class for all available configuration options.
40
+
41
+ Examples:
42
+ ```python
43
+ from optimum.rbln import RBLNMistralForCausalLM
44
+
45
+ # Simple usage using rbln_* arguments
46
+ # `max_seq_len` is automatically inferred from the model config
47
+ model = RBLNMistralForCausalLM.from_pretrained(
48
+ "mistralai/Mistral-7B-v0.1",
49
+ export=True,
50
+ rbln_batch_size=1,
51
+ rbln_tensor_parallel_size=4,
52
+ )
53
+
54
+ # Using a config dictionary
55
+ rbln_config = {
56
+ "batch_size": 1,
57
+ "max_seq_len": 4096,
58
+ "tensor_parallel_size": 4,
59
+ }
60
+ model = RBLNMistralForCausalLM.from_pretrained(
61
+ "mistralai/Mistral-7B-v0.1",
62
+ export=True,
63
+ rbln_config=rbln_config
64
+ )
65
+
66
+ # Using a RBLNMistralForCausalLMConfig instance (recommended for type checking)
67
+ from optimum.rbln import RBLNMistralForCausalLMConfig
68
+
69
+ config = RBLNMistralForCausalLMConfig(
70
+ batch_size=1,
71
+ max_seq_len=4096,
72
+ tensor_parallel_size=4
73
+ )
74
+ model = RBLNMistralForCausalLM.from_pretrained(
75
+ "mistralai/Mistral-7B-v0.1",
76
+ export=True,
77
+ rbln_config=config
78
+ )
79
+ ```
32
80
  """
33
81
 
34
82
  _decoder_wrapper_cls = MistralForCausalLMWrapper
83
+
84
+ @classmethod
85
+ def _update_sliding_window_config(
86
+ cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
87
+ ):
88
+ rbln_config.cache_impl = "sliding_window"
89
+ rbln_config.sliding_window = model_config.sliding_window
90
+ rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
91
+
92
+ return rbln_config
@@ -16,4 +16,7 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
16
16
 
17
17
 
18
18
  class RBLNOPTForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
- pass
19
+ """
20
+ Configuration class for OPT causal language model.
21
+ Inherits from RBLNDecoderOnlyModelForCausalLMConfig with no additional parameters.
22
+ """
@@ -45,8 +45,15 @@ class RBLNOPTForCausalLM(RBLNDecoderOnlyModelForCausalLM):
45
45
 
46
46
  A class to convert and run pre-trained transformers based OPTForCausalLM model on RBLN devices.
47
47
  It implements the methods to convert a pre-trained transformers OPTForCausalLM model into a RBLN transformer model by:
48
+
48
49
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
49
50
  - compiling the resulting graph using the RBLN compiler.
51
+
52
+ **Configuration:**
53
+ This model uses [`RBLNOPTForCausalLM`] for configuration. When calling methods like `from_pretrained` or `from_model`,
54
+ the `rbln_config` parameter should be an instance of [`RBLNOPTForCausalLM`] or a dictionary conforming to its structure.
55
+
56
+ See the [`RBLNOPTForCausalLM`] class for all available configuration options.
50
57
  """
51
58
 
52
59
  _decoder_wrapper_cls = OPTWrapper
@@ -72,6 +79,9 @@ class RBLNOPTForCausalLM(RBLNDecoderOnlyModelForCausalLM):
72
79
  "use_attention_mask": rbln_config.use_attention_mask,
73
80
  "use_position_ids": rbln_config.use_position_ids,
74
81
  "use_inputs_embeds": rbln_config.use_inputs_embeds,
82
+ "cache_impl": rbln_config.cache_impl,
83
+ "sliding_window": rbln_config.sliding_window,
84
+ "sliding_window_layers": rbln_config.sliding_window_layers,
75
85
  }
76
86
 
77
87
  for i in range(len(model.model.decoder.layers)):
@@ -45,7 +45,13 @@ class OPTWrapper(DecoderOnlyWrapper):
45
45
  )
46
46
  new_layer = OPTDecoderLayer(layer, new_self_attn)
47
47
  new_layers.append(new_layer)
48
- new_model = OPTModel(causal_lm.model.decoder, new_layers, max_seq_len=max_seq_len, use_learned_pos_emb=True)
48
+ new_model = OPTModel(
49
+ causal_lm.model.decoder,
50
+ new_layers,
51
+ max_seq_len=max_seq_len,
52
+ use_learned_pos_emb=True,
53
+ sliding_window_layers=self.sliding_window_layers,
54
+ )
49
55
  new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
50
56
  return new_causal_lm
51
57
 
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
16
16
 
17
17
 
18
18
  class RBLNPhiForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
- pass
19
+ """
20
+ Configuration class for RBLN Phi models.
21
+
22
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
23
+
24
+ Example usage:
25
+ ```python
26
+ from optimum.rbln import RBLNPhiForCausalLM, RBLNPhiForCausalLMConfig
27
+
28
+ # Create a configuration object
29
+ config = RBLNPhiForCausalLMConfig(
30
+ batch_size=1,
31
+ max_seq_len=4096,
32
+ tensor_parallel_size=4
33
+ )
34
+
35
+ # Use the configuration with from_pretrained
36
+ model = RBLNPhiForCausalLM.from_pretrained(
37
+ "microsoft/phi-2",
38
+ export=True,
39
+ rbln_config=config
40
+ )
41
+ ```
42
+ """
@@ -27,8 +27,57 @@ class RBLNPhiForCausalLM(RBLNDecoderOnlyModelForCausalLM):
27
27
 
28
28
  A class to convert and run pre-trained transformers based PhiForCausalLM model on RBLN devices.
29
29
  It implements the methods to convert a pre-trained transformers PhiForCausalLM model into a RBLN transformer model by:
30
+
30
31
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
31
32
  - compiling the resulting graph using the RBLN compiler.
33
+
34
+ **Configuration:**
35
+ This model uses [`RBLNPhiForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
36
+ the `rbln_config` parameter should be an instance of [`RBLNPhiForCausalLMConfig`] or a dictionary conforming to its structure.
37
+
38
+ See the [`RBLNPhiForCausalLMConfig`] class for all available configuration options.
39
+
40
+ Examples:
41
+ ```python
42
+ from optimum.rbln import RBLNPhiForCausalLM
43
+
44
+ # Simple usage using rbln_* arguments
45
+ # `max_seq_len` is automatically inferred from the model config
46
+ model = RBLNPhiForCausalLM.from_pretrained(
47
+ "microsoft/phi-2",
48
+ export=True,
49
+ rbln_batch_size=1,
50
+ rbln_tensor_parallel_size=4,
51
+ )
52
+
53
+
54
+ # Using a config dictionary
55
+ rbln_config = {
56
+ "batch_size": 1,
57
+ "max_seq_len": 4096,
58
+ "tensor_parallel_size": 4,
59
+ }
60
+ model = RBLNPhiForCausalLM.from_pretrained(
61
+ "microsoft/phi-2",
62
+ export=True,
63
+ rbln_config=rbln_config
64
+ )
65
+
66
+
67
+ # Using a RBLNPhiForCausalLMConfig instance (recommended for type checking)
68
+ from optimum.rbln import RBLNPhiForCausalLMConfig
69
+
70
+ config = RBLNPhiForCausalLMConfig(
71
+ batch_size=1,
72
+ max_seq_len=4096,
73
+ tensor_parallel_size=4
74
+ )
75
+ model = RBLNPhiForCausalLM.from_pretrained(
76
+ "microsoft/phi-2",
77
+ export=True,
78
+ rbln_config=config
79
+ )
80
+ ```
32
81
  """
33
82
 
34
83
  _decoder_wrapper_cls = PhiWrapper
@@ -48,7 +48,7 @@ class PhiWrapper(DecoderOnlyWrapper):
48
48
  raise NotImplementedError(f"Unknwon attn : {self.attn_impl}")
49
49
  new_layer = PhiLayer(layer, new_self_attn)
50
50
  new_layers.append(new_layer)
51
- new_model = PhiModel(causal_lm.model, new_layers)
51
+ new_model = PhiModel(causal_lm.model, new_layers, sliding_window_layers=self.sliding_window_layers)
52
52
  new_causal_lm = DecoderOnlyForCausalLM(causal_lm, new_model)
53
53
  return new_causal_lm
54
54
 
@@ -16,4 +16,27 @@ from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausa
16
16
 
17
17
 
18
18
  class RBLNQwen2ForCausalLMConfig(RBLNDecoderOnlyModelForCausalLMConfig):
19
- pass
19
+ """
20
+ Configuration class for RBLN Qwen2 models.
21
+
22
+ This class is an alias of RBLNDecoderOnlyModelForCausalLMConfig.
23
+
24
+ Example usage:
25
+ ```python
26
+ from optimum.rbln import RBLNQwen2ForCausalLM, RBLNQwen2ForCausalLMConfig
27
+
28
+ # Create a configuration object
29
+ config = RBLNQwen2ForCausalLMConfig(
30
+ batch_size=1,
31
+ max_seq_len=4096,
32
+ tensor_parallel_size=4
33
+ )
34
+
35
+ # Use the configuration with from_pretrained
36
+ model = RBLNQwen2ForCausalLM.from_pretrained(
37
+ "Qwen/Qwen2-7B",
38
+ export=True,
39
+ rbln_config=config
40
+ )
41
+ ```
42
+ """
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from transformers import PretrainedConfig
16
+
15
17
  from ....utils import logging
16
- from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM
18
+ from ...models.decoderonly import RBLNDecoderOnlyModelForCausalLM, RBLNDecoderOnlyModelForCausalLMConfig
17
19
  from .qwen2_architecture import QWEN2Wrapper
18
20
 
19
21
 
@@ -22,13 +24,74 @@ logger = logging.get_logger(__name__)
22
24
 
23
25
  class RBLNQwen2ForCausalLM(RBLNDecoderOnlyModelForCausalLM):
24
26
  """
25
- The Llama Model transformer with a language modeling head (linear layer) on top.
27
+ The Qwen2 Model transformer with a language modeling head (linear layer) on top.
26
28
  This model inherits from [`RBLNDecoderOnlyModelForCausalLM`]. Check the superclass documentation for the generic methods the library implements for all its models.
27
29
 
28
- A class to convert and run pre-trained transformers based LlamaForCausalLM model on RBLN devices.
29
- It implements the methods to convert a pre-trained transformers LlamaForCausalLM model into a RBLN transformer model by:
30
+ A class to convert and run pre-trained transformers based Qwen2ForCausalLM model on RBLN devices.
31
+ It implements the methods to convert a pre-trained transformers Qwen2ForCausalLM model into a RBLN transformer model by:
30
32
  - transferring the checkpoint weights of the original into an optimized RBLN graph,
31
33
  - compiling the resulting graph using the RBLN compiler.
34
+
35
+ **Configuration:**
36
+ This model uses [`RBLNQwen2ForCausalLMConfig`] for configuration. When calling methods like `from_pretrained` or `from_model`,
37
+ the `rbln_config` parameter should be an instance of [`RBLNQwen2ForCausalLMConfig`] or a dictionary conforming to its structure.
38
+
39
+ See the [`RBLNQwen2ForCausalLMConfig`] class for all available configuration options.
40
+
41
+ Examples:
42
+ ```python
43
+ from optimum.rbln import RBLNQwen2ForCausalLM
44
+
45
+ # Simple usage using rbln_* arguments
46
+ # `max_seq_len` is automatically inferred from the model config
47
+ model = RBLNQwen2ForCausalLM.from_pretrained(
48
+ "Qwen/Qwen2-7B-Instruct",
49
+ export=True,
50
+ rbln_batch_size=1,
51
+ rbln_tensor_parallel_size=4,
52
+ )
53
+
54
+
55
+ # Using a config dictionary
56
+ rbln_config = {
57
+ "batch_size": 1,
58
+ "max_seq_len": 4096,
59
+ "tensor_parallel_size": 4,
60
+ }
61
+ model = RBLNQwen2ForCausalLM.from_pretrained(
62
+ "Qwen/Qwen2-7B-Instruct",
63
+ export=True,
64
+ rbln_config=rbln_config
65
+ )
66
+
67
+
68
+ # Using a RBLNQwen2ForCausalLMConfig instance (recommended for type checking)
69
+ from optimum.rbln import RBLNQwen2ForCausalLMConfig
70
+
71
+ config = RBLNQwen2ForCausalLMConfig(
72
+ batch_size=1,
73
+ max_seq_len=4096,
74
+ tensor_parallel_size=4
75
+ )
76
+ model = RBLNQwen2ForCausalLM.from_pretrained(
77
+ "Qwen/Qwen2-7B-Instruct",
78
+ export=True,
79
+ rbln_config=config
80
+ )
81
+ ```
32
82
  """
33
83
 
34
84
  _decoder_wrapper_cls = QWEN2Wrapper
85
+
86
+ @classmethod
87
+ def _update_sliding_window_config(
88
+ cls, model_config: PretrainedConfig, rbln_config: RBLNDecoderOnlyModelForCausalLMConfig
89
+ ):
90
+ # https://github.com/huggingface/transformers/issues/35896
91
+ # There seems to be a bug in transformers(v4.52.4). Therefore, similar to when attn_implementation is eager,
92
+ # we set all layers to use sliding window in this version. This should be updated once the bug is fixed.
93
+
94
+ rbln_config.cache_impl = "sliding_window"
95
+ rbln_config.sliding_window = model_config.sliding_window
96
+ rbln_config.sliding_window_layers = list(range(model_config.num_hidden_layers))
97
+ return rbln_config
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import List, Optional, Union
15
+ from typing import Any, Dict, List, Optional, Union
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ..decoderonly.configuration_decoderonly import RBLNDecoderOnlyModelForCausalLMConfig
@@ -25,7 +25,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
25
25
  self,
26
26
  visual: Optional[RBLNModelConfig] = None,
27
27
  use_inputs_embeds: bool = True,
28
- **kwargs,
28
+ **kwargs: Dict[str, Any],
29
29
  ):
30
30
  super().__init__(use_inputs_embeds=use_inputs_embeds, **kwargs)
31
31
  if not self.use_inputs_embeds:
@@ -37,7 +37,7 @@ class RBLNQwen2_5_VLForConditionalGenerationConfig(RBLNDecoderOnlyModelForCausal
37
37
 
38
38
 
39
39
  class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
40
- def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs):
40
+ def __init__(self, max_seq_lens: Union[int, List[int]] = None, **kwargs: Dict[str, Any]):
41
41
  """
42
42
  Args:
43
43
  max_seq_lens (Optional[Union[int, List[int]]]): Maximum sequence lengths for Vision
@@ -54,6 +54,18 @@ class RBLNQwen2_5_VisionTransformerPretrainedModelConfig(RBLNModelConfig):
54
54
 
55
55
  Raises:
56
56
  ValueError: If batch_size is not a positive integer.
57
+
58
+ Max Seq Lens:
59
+ Since `Qwen2_5_VLForConditionalGeneration` performs inference on a per-image or per-frame basis,
60
+ `max_seq_lens` should be set based on the maximum expected resolution of the input images or video frames,
61
+ according to the following guidelines:
62
+
63
+ 1. **Minimum Value**: `max_seq_lens` must be greater than or equal to the number of patches generated from the input image.
64
+ For example, a 224x224 image with a patch size of 14 results in (224 / 14) * (224 / 14) = 256 patches.
65
+ Therefore, `max_seq_lens` must be at least 256.
66
+ 2. **Alignment Requirement**: `max_seq_lens` must be a multiple of `(window_size / patch_size)^2` due to the requirements
67
+ of the window-based attention mechanism. For instance, if `window_size` is 112 and `patch_size` is 14, then
68
+ `(112 / 14)^2 = 64`, meaning valid values for `max_seq_lens` include 64, 128, 192, 256, etc.
57
69
  """
58
70
  super().__init__(**kwargs)
59
71