optimum-rbln 0.8.2a4__py3-none-any.whl → 0.9.3rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. optimum/rbln/__init__.py +96 -9
  2. optimum/rbln/__version__.py +16 -3
  3. optimum/rbln/cli.py +660 -0
  4. optimum/rbln/configuration_utils.py +153 -42
  5. optimum/rbln/diffusers/__init__.py +7 -0
  6. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +3 -3
  7. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +1 -1
  8. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +3 -3
  9. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +4 -4
  10. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +9 -4
  11. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +9 -4
  12. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +3 -3
  13. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +3 -3
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +35 -19
  15. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +14 -11
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +30 -20
  17. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +13 -9
  18. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +17 -13
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +17 -10
  20. optimum/rbln/diffusers/modeling_diffusers.py +30 -14
  21. optimum/rbln/diffusers/models/__init__.py +3 -13
  22. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
  23. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +28 -3
  24. optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
  25. optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -1
  26. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +9 -1
  27. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +9 -1
  28. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +6 -3
  29. optimum/rbln/diffusers/pipelines/__init__.py +11 -5
  30. optimum/rbln/diffusers/pipelines/auto_pipeline.py +307 -0
  31. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +19 -16
  32. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +14 -18
  33. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +31 -1
  34. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +31 -1
  35. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +1 -6
  36. optimum/rbln/modeling.py +71 -19
  37. optimum/rbln/modeling_base.py +99 -21
  38. optimum/rbln/ops/attn.py +158 -0
  39. optimum/rbln/ops/flash_attn.py +166 -0
  40. optimum/rbln/ops/kv_cache_update.py +5 -0
  41. optimum/rbln/ops/linear.py +7 -0
  42. optimum/rbln/transformers/__init__.py +92 -0
  43. optimum/rbln/transformers/configuration_generic.py +9 -7
  44. optimum/rbln/transformers/modeling_attention_utils.py +252 -0
  45. optimum/rbln/transformers/modeling_generic.py +51 -9
  46. optimum/rbln/transformers/modeling_outputs.py +37 -0
  47. optimum/rbln/transformers/models/__init__.py +91 -30
  48. optimum/rbln/transformers/models/auto/__init__.py +2 -0
  49. optimum/rbln/transformers/models/auto/auto_factory.py +92 -17
  50. optimum/rbln/transformers/models/auto/modeling_auto.py +45 -0
  51. optimum/rbln/transformers/models/bart/bart_architecture.py +1 -3
  52. optimum/rbln/transformers/models/bart/configuration_bart.py +2 -0
  53. optimum/rbln/transformers/models/bert/bert_architecture.py +16 -0
  54. optimum/rbln/transformers/models/bert/modeling_bert.py +8 -4
  55. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +42 -11
  56. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +94 -30
  57. optimum/rbln/transformers/models/clip/configuration_clip.py +10 -7
  58. optimum/rbln/transformers/models/clip/modeling_clip.py +27 -4
  59. optimum/rbln/transformers/models/colpali/colpali_architecture.py +3 -6
  60. optimum/rbln/transformers/models/colpali/configuration_colpali.py +37 -21
  61. optimum/rbln/transformers/models/colpali/modeling_colpali.py +113 -96
  62. optimum/rbln/transformers/models/colqwen2/__init__.py +2 -0
  63. optimum/rbln/transformers/models/colqwen2/colqwen2_architecture.py +233 -0
  64. optimum/rbln/transformers/models/colqwen2/configuration_colqwen2.py +74 -0
  65. optimum/rbln/transformers/models/colqwen2/modeling_colqwen2.py +446 -0
  66. optimum/rbln/transformers/models/decoderonly/__init__.py +3 -2
  67. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +109 -37
  68. optimum/rbln/transformers/models/decoderonly/configuration_lora.py +411 -0
  69. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +318 -309
  70. optimum/rbln/transformers/models/decoderonly/decoderonly_runtime_utils.py +504 -0
  71. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +111 -0
  72. optimum/rbln/transformers/models/decoderonly/lora_architecture.py +204 -0
  73. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +453 -897
  74. optimum/rbln/transformers/models/depth_anything/__init__.py +16 -0
  75. optimum/rbln/transformers/models/depth_anything/configuration_depth_anything.py +24 -0
  76. optimum/rbln/transformers/models/depth_anything/modeling_depth_anything.py +25 -0
  77. optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
  78. optimum/rbln/transformers/models/gemma/__init__.py +2 -2
  79. optimum/rbln/transformers/models/gemma/configuration_gemma.py +9 -1
  80. optimum/rbln/transformers/models/gemma/gemma_architecture.py +1 -4
  81. optimum/rbln/transformers/models/gemma/modeling_gemma.py +22 -1
  82. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +49 -13
  83. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +12 -2
  84. optimum/rbln/transformers/models/gemma3/gemma3_runtime_utils.py +245 -0
  85. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +201 -349
  86. optimum/rbln/transformers/models/gpt2/__init__.py +2 -2
  87. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +31 -3
  88. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +10 -8
  89. optimum/rbln/transformers/models/gpt2/modeling_gpt2.py +18 -1
  90. optimum/rbln/transformers/models/grounding_dino/__init__.py +10 -0
  91. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +92 -0
  92. optimum/rbln/transformers/models/grounding_dino/grounding_dino_architecture.py +599 -0
  93. optimum/rbln/transformers/models/grounding_dino/modeling_grounding_dino.py +1032 -0
  94. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +35 -7
  95. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +26 -27
  96. optimum/rbln/transformers/models/llama/__init__.py +2 -2
  97. optimum/rbln/transformers/models/llama/configuration_llama.py +9 -1
  98. optimum/rbln/transformers/models/llama/modeling_llama.py +22 -1
  99. optimum/rbln/transformers/models/llava/__init__.py +16 -0
  100. optimum/rbln/transformers/models/llava/configuration_llava.py +72 -0
  101. optimum/rbln/transformers/models/llava/modeling_llava.py +478 -0
  102. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +15 -17
  103. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +235 -375
  104. optimum/rbln/transformers/models/midm/midm_architecture.py +4 -1
  105. optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
  106. optimum/rbln/transformers/models/mistral/__init__.py +2 -2
  107. optimum/rbln/transformers/models/mistral/configuration_mistral.py +9 -1
  108. optimum/rbln/transformers/models/mistral/mistral_architecture.py +1 -1
  109. optimum/rbln/transformers/models/mistral/modeling_mistral.py +26 -3
  110. optimum/rbln/transformers/models/opt/__init__.py +2 -2
  111. optimum/rbln/transformers/models/opt/configuration_opt.py +8 -1
  112. optimum/rbln/transformers/models/opt/modeling_opt.py +28 -16
  113. optimum/rbln/transformers/models/opt/opt_architecture.py +4 -4
  114. optimum/rbln/transformers/models/pegasus/__init__.py +17 -0
  115. optimum/rbln/transformers/models/pegasus/configuration_pegasus.py +38 -0
  116. optimum/rbln/transformers/models/pegasus/modeling_pegasus.py +71 -0
  117. optimum/rbln/transformers/models/pegasus/pegasus_architecture.py +161 -0
  118. optimum/rbln/transformers/models/phi/__init__.py +2 -2
  119. optimum/rbln/transformers/models/phi/configuration_phi.py +9 -1
  120. optimum/rbln/transformers/models/phi/modeling_phi.py +10 -1
  121. optimum/rbln/transformers/models/phi/phi_architecture.py +11 -7
  122. optimum/rbln/transformers/models/pixtral/__init__.py +16 -0
  123. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +43 -0
  124. optimum/rbln/transformers/models/pixtral/modeling_pixtral.py +310 -0
  125. optimum/rbln/transformers/models/pixtral/pixtral_architecture.py +73 -0
  126. optimum/rbln/transformers/models/qwen2/__init__.py +2 -2
  127. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +9 -1
  128. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +27 -1
  129. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +21 -6
  130. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +15 -21
  131. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +28 -7
  132. optimum/rbln/transformers/models/qwen2_vl/__init__.py +19 -0
  133. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +88 -0
  134. optimum/rbln/transformers/models/qwen2_vl/modeling_qwen2_vl.py +514 -0
  135. optimum/rbln/transformers/models/qwen2_vl/qwen2_vl_architecture.py +165 -0
  136. optimum/rbln/transformers/models/qwen3/configuration_qwen3.py +2 -2
  137. optimum/rbln/transformers/models/qwen3/modeling_qwen3.py +86 -330
  138. optimum/rbln/transformers/models/qwen3/qwen3_architecture.py +1 -245
  139. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +20 -13
  140. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +24 -3
  141. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
  142. optimum/rbln/transformers/models/siglip/__init__.py +2 -6
  143. optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
  144. optimum/rbln/transformers/models/siglip/modeling_siglip.py +5 -16
  145. optimum/rbln/transformers/models/swin/__init__.py +16 -0
  146. optimum/rbln/transformers/models/swin/configuration_swin.py +42 -0
  147. optimum/rbln/transformers/models/swin/modeling_swin.py +341 -0
  148. optimum/rbln/transformers/models/t5/configuration_t5.py +2 -0
  149. optimum/rbln/transformers/models/t5/t5_architecture.py +8 -1
  150. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +3 -3
  151. optimum/rbln/transformers/models/time_series_transformer/modeling_time_series_transformer.py +4 -14
  152. optimum/rbln/transformers/models/time_series_transformer/time_series_transformers_architecture.py +7 -1
  153. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -0
  154. optimum/rbln/transformers/models/whisper/configuration_whisper.py +12 -13
  155. optimum/rbln/transformers/models/whisper/generation_whisper.py +28 -6
  156. optimum/rbln/transformers/models/whisper/modeling_whisper.py +28 -3
  157. optimum/rbln/transformers/models/xlm_roberta/__init__.py +2 -8
  158. optimum/rbln/transformers/utils/rbln_quantization.py +391 -75
  159. optimum/rbln/transformers/utils/rbln_runtime_wrapper.py +79 -0
  160. optimum/rbln/utils/depreacate_utils.py +16 -0
  161. optimum/rbln/utils/runtime_utils.py +28 -18
  162. optimum/rbln/utils/submodule.py +31 -9
  163. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/METADATA +8 -7
  164. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/RECORD +167 -125
  165. optimum_rbln-0.9.3rc0.dist-info/entry_points.txt +2 -0
  166. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/WHEEL +0 -0
  167. {optimum_rbln-0.8.2a4.dist-info → optimum_rbln-0.9.3rc0.dist-info}/licenses/LICENSE +0 -0
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
@@ -38,7 +38,7 @@ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs: Dict[str, Any],
41
+ **kwargs: Any,
42
42
  ):
43
43
  """
44
44
  Args:
@@ -59,7 +59,7 @@ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
59
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
60
60
  Cannot be used together with img_height/img_width.
61
61
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
 
64
64
  Raises:
65
65
  ValueError: If both image_size and img_height/img_width are provided.
@@ -93,20 +93,27 @@ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
93
93
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
94
94
  raise ValueError("Both img_height and img_width must be provided together if used")
95
95
 
96
- self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
97
- self.unet = self.init_submodule_config(
98
- RBLNUNet2DConditionModelConfig,
96
+ self.text_encoder = self.initialize_submodule_config(
97
+ text_encoder,
98
+ cls_name="RBLNCLIPTextModelConfig",
99
+ batch_size=batch_size,
100
+ )
101
+ self.unet = self.initialize_submodule_config(
99
102
  unet,
103
+ cls_name="RBLNUNet2DConditionModelConfig",
100
104
  sample_size=sample_size,
101
105
  )
102
- self.vae = self.init_submodule_config(
103
- RBLNAutoencoderKLConfig,
106
+ self.vae = self.initialize_submodule_config(
104
107
  vae,
108
+ cls_name="RBLNAutoencoderKLConfig",
105
109
  batch_size=batch_size,
106
110
  uses_encoder=self.__class__._vae_uses_encoder,
107
111
  sample_size=image_size, # image size is equal to sample size in vae
108
112
  )
109
- self.controlnet = self.init_submodule_config(RBLNControlNetModelConfig, controlnet)
113
+ self.controlnet = self.initialize_submodule_config(
114
+ controlnet,
115
+ cls_name="RBLNControlNetModelConfig",
116
+ )
110
117
 
111
118
  # Get default guidance scale from original class to set UNet and ControlNet batch size
112
119
  if guidance_scale is None:
@@ -178,7 +185,7 @@ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
178
185
  sample_size: Optional[Tuple[int, int]] = None,
179
186
  image_size: Optional[Tuple[int, int]] = None,
180
187
  guidance_scale: Optional[float] = None,
181
- **kwargs: Dict[str, Any],
188
+ **kwargs: Any,
182
189
  ):
183
190
  """
184
191
  Args:
@@ -201,7 +208,7 @@ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
201
208
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
202
209
  Cannot be used together with img_height/img_width.
203
210
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
204
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
211
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
205
212
 
206
213
  Raises:
207
214
  ValueError: If both image_size and img_height/img_width are provided.
@@ -235,23 +242,32 @@ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
235
242
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
236
243
  raise ValueError("Both img_height and img_width must be provided together if used")
237
244
 
238
- self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
239
- self.text_encoder_2 = self.init_submodule_config(
240
- RBLNCLIPTextModelWithProjectionConfig, text_encoder_2, batch_size=batch_size
245
+ self.text_encoder = self.initialize_submodule_config(
246
+ text_encoder,
247
+ cls_name="RBLNCLIPTextModelConfig",
248
+ batch_size=batch_size,
241
249
  )
242
- self.unet = self.init_submodule_config(
243
- RBLNUNet2DConditionModelConfig,
250
+ self.text_encoder_2 = self.initialize_submodule_config(
251
+ text_encoder_2,
252
+ cls_name="RBLNCLIPTextModelWithProjectionConfig",
253
+ batch_size=batch_size,
254
+ )
255
+ self.unet = self.initialize_submodule_config(
244
256
  unet,
257
+ cls_name="RBLNUNet2DConditionModelConfig",
245
258
  sample_size=sample_size,
246
259
  )
247
- self.vae = self.init_submodule_config(
248
- RBLNAutoencoderKLConfig,
260
+ self.vae = self.initialize_submodule_config(
249
261
  vae,
262
+ cls_name="RBLNAutoencoderKLConfig",
250
263
  batch_size=batch_size,
251
264
  uses_encoder=self.__class__._vae_uses_encoder,
252
265
  sample_size=image_size, # image size is equal to sample size in vae
253
266
  )
254
- self.controlnet = self.init_submodule_config(RBLNControlNetModelConfig, controlnet)
267
+ self.controlnet = self.initialize_submodule_config(
268
+ controlnet,
269
+ cls_name="RBLNControlNetModelConfig",
270
+ )
255
271
 
256
272
  # Get default guidance scale from original class to set UNet and ControlNet batch size
257
273
  guidance_scale = (
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional
15
+ from typing import Any, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNT5EncoderModelConfig
@@ -41,7 +41,7 @@ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
41
41
  num_frames: Optional[int] = None,
42
42
  fps: Optional[int] = None,
43
43
  max_seq_len: Optional[int] = None,
44
- **kwargs: Dict[str, Any],
44
+ **kwargs: Any,
45
45
  ):
46
46
  """
47
47
  Args:
@@ -59,16 +59,19 @@ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
59
59
  num_frames (Optional[int]): The number of frames in the generated video.
60
60
  fps (Optional[int]): The frames per second of the generated video.
61
61
  max_seq_len (Optional[int]): Maximum sequence length supported by the model.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
  """
64
64
  super().__init__(**kwargs)
65
65
 
66
- self.text_encoder = self.init_submodule_config(
67
- RBLNT5EncoderModelConfig, text_encoder, batch_size=batch_size, max_seq_len=max_seq_len
66
+ self.text_encoder = self.initialize_submodule_config(
67
+ text_encoder,
68
+ cls_name="RBLNT5EncoderModelConfig",
69
+ batch_size=batch_size,
70
+ max_seq_len=max_seq_len,
68
71
  )
69
- self.transformer = self.init_submodule_config(
70
- RBLNCosmosTransformer3DModelConfig,
72
+ self.transformer = self.initialize_submodule_config(
71
73
  transformer,
74
+ cls_name="RBLNCosmosTransformer3DModelConfig",
72
75
  batch_size=batch_size,
73
76
  max_seq_len=max_seq_len,
74
77
  height=height,
@@ -76,18 +79,18 @@ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
76
79
  num_frames=num_frames,
77
80
  fps=fps,
78
81
  )
79
- self.vae = self.init_submodule_config(
80
- RBLNAutoencoderKLCosmosConfig,
82
+ self.vae = self.initialize_submodule_config(
81
83
  vae,
84
+ cls_name="RBLNAutoencoderKLCosmosConfig",
82
85
  batch_size=batch_size,
83
86
  uses_encoder=self.__class__._vae_uses_encoder,
84
87
  height=height,
85
88
  width=width,
86
89
  num_frames=num_frames,
87
90
  )
88
- self.safety_checker = self.init_submodule_config(
89
- RBLNCosmosSafetyCheckerConfig,
91
+ self.safety_checker = self.initialize_submodule_config(
90
92
  safety_checker,
93
+ cls_name="RBLNCosmosSafetyCheckerConfig",
91
94
  batch_size=batch_size,
92
95
  height=height,
93
96
  width=width,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNCLIPVisionModelWithProjectionConfig
@@ -37,7 +37,7 @@ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
37
37
  img_width: Optional[int] = None,
38
38
  height: Optional[int] = None,
39
39
  width: Optional[int] = None,
40
- **kwargs: Dict[str, Any],
40
+ **kwargs: Any,
41
41
  ):
42
42
  """
43
43
  Args:
@@ -54,7 +54,7 @@ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
54
54
  img_width (Optional[int]): Width of the generated images.
55
55
  height (Optional[int]): Height of the generated images.
56
56
  width (Optional[int]): Width of the generated images.
57
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
57
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
58
58
 
59
59
  Raises:
60
60
  ValueError: If both image_size and img_height/img_width are provided.
@@ -88,10 +88,14 @@ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
88
88
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
89
89
  raise ValueError("Both img_height and img_width must be provided together if used")
90
90
 
91
- self.unet = self.init_submodule_config(RBLNUNet2DConditionModelConfig, unet, sample_size=sample_size)
92
- self.movq = self.init_submodule_config(
93
- RBLNVQModelConfig,
91
+ self.unet = self.initialize_submodule_config(
92
+ unet,
93
+ cls_name="RBLNUNet2DConditionModelConfig",
94
+ sample_size=sample_size,
95
+ )
96
+ self.movq = self.initialize_submodule_config(
94
97
  movq,
98
+ cls_name="RBLNVQModelConfig",
95
99
  batch_size=batch_size,
96
100
  sample_size=image_size, # image size is equal to sample size in vae
97
101
  uses_encoder=self._movq_uses_encoder,
@@ -148,7 +152,7 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
148
152
  *,
149
153
  batch_size: Optional[int] = None,
150
154
  guidance_scale: Optional[float] = None,
151
- **kwargs: Dict[str, Any],
155
+ **kwargs: Any,
152
156
  ):
153
157
  """
154
158
  Initialize a configuration for Kandinsky 2.2 prior pipeline optimized for RBLN NPU.
@@ -166,21 +170,27 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
166
170
  Initialized as RBLNPriorTransformerConfig if not provided.
167
171
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
168
172
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
169
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
173
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
170
174
 
171
175
  Note:
172
176
  When guidance_scale > 1.0, the prior batch size is automatically doubled to
173
177
  accommodate classifier-free guidance.
174
178
  """
175
179
  super().__init__(**kwargs)
176
- self.text_encoder = self.init_submodule_config(
177
- RBLNCLIPTextModelWithProjectionConfig, text_encoder, batch_size=batch_size
180
+ self.text_encoder = self.initialize_submodule_config(
181
+ text_encoder,
182
+ cls_name="RBLNCLIPTextModelWithProjectionConfig",
183
+ batch_size=batch_size,
178
184
  )
179
- self.image_encoder = self.init_submodule_config(
180
- RBLNCLIPVisionModelWithProjectionConfig, image_encoder, batch_size=batch_size
185
+ self.image_encoder = self.initialize_submodule_config(
186
+ image_encoder,
187
+ cls_name="RBLNCLIPVisionModelWithProjectionConfig",
188
+ batch_size=batch_size,
189
+ )
190
+ self.prior = self.initialize_submodule_config(
191
+ prior,
192
+ cls_name="RBLNPriorTransformerConfig",
181
193
  )
182
-
183
- self.prior = self.init_submodule_config(RBLNPriorTransformerConfig, prior)
184
194
 
185
195
  # Get default guidance scale from original class to set UNet batch size
186
196
  if guidance_scale is None:
@@ -226,7 +236,7 @@ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
226
236
  prior_text_encoder: Optional[RBLNCLIPTextModelWithProjectionConfig] = None,
227
237
  unet: Optional[RBLNUNet2DConditionModelConfig] = None,
228
238
  movq: Optional[RBLNVQModelConfig] = None,
229
- **kwargs: Dict[str, Any],
239
+ **kwargs: Any,
230
240
  ):
231
241
  """
232
242
  Initialize a configuration for combined Kandinsky 2.2 pipelines optimized for RBLN NPU.
@@ -259,7 +269,7 @@ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
259
269
  Used if decoder_pipe is not provided.
260
270
  movq (Optional[RBLNVQModelConfig]): Direct configuration for the MoVQ (VQ-GAN) model.
261
271
  Used if decoder_pipe is not provided.
262
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
272
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
263
273
  """
264
274
  super().__init__(**kwargs)
265
275
 
@@ -286,18 +296,18 @@ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
286
296
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
287
297
  raise ValueError("Both img_height and img_width must be provided together if used")
288
298
 
289
- self.prior_pipe = self.init_submodule_config(
290
- RBLNKandinskyV22PriorPipelineConfig,
299
+ self.prior_pipe = self.initialize_submodule_config(
291
300
  prior_pipe,
301
+ cls_name="RBLNKandinskyV22PriorPipelineConfig",
292
302
  prior=prior_prior,
293
303
  image_encoder=prior_image_encoder,
294
304
  text_encoder=prior_text_encoder,
295
305
  batch_size=batch_size,
296
306
  guidance_scale=guidance_scale,
297
307
  )
298
- self.decoder_pipe = self.init_submodule_config(
299
- self._decoder_pipe_cls,
308
+ self.decoder_pipe = self.initialize_submodule_config(
300
309
  decoder_pipe,
310
+ cls_name=self._decoder_pipe_cls.__name__,
301
311
  unet=unet,
302
312
  movq=movq,
303
313
  batch_size=batch_size,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig
@@ -37,7 +37,7 @@ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
37
37
  sample_size: Optional[Tuple[int, int]] = None,
38
38
  image_size: Optional[Tuple[int, int]] = None,
39
39
  guidance_scale: Optional[float] = None,
40
- **kwargs: Dict[str, Any],
40
+ **kwargs: Any,
41
41
  ):
42
42
  """
43
43
  Args:
@@ -56,7 +56,7 @@ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
56
56
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
57
57
  Cannot be used together with img_height/img_width.
58
58
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
59
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
59
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
60
60
 
61
61
  Raises:
62
62
  ValueError: If both image_size and img_height/img_width are provided.
@@ -90,18 +90,22 @@ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
90
90
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
91
91
  raise ValueError("Both img_height and img_width must be provided together if used")
92
92
 
93
- self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
94
- self.unet = self.init_submodule_config(
95
- RBLNUNet2DConditionModelConfig,
93
+ self.text_encoder = self.initialize_submodule_config(
94
+ text_encoder,
95
+ cls_name="RBLNCLIPTextModelConfig",
96
+ batch_size=batch_size,
97
+ )
98
+ self.unet = self.initialize_submodule_config(
96
99
  unet,
100
+ cls_name="RBLNUNet2DConditionModelConfig",
97
101
  sample_size=sample_size,
98
102
  )
99
- self.vae = self.init_submodule_config(
100
- RBLNAutoencoderKLConfig,
103
+ self.vae = self.initialize_submodule_config(
101
104
  vae,
105
+ cls_name="RBLNAutoencoderKLConfig",
102
106
  batch_size=batch_size,
103
107
  uses_encoder=self.__class__._vae_uses_encoder,
104
- sample_size=image_size, # image size is equal to sample size in vae
108
+ sample_size=image_size,
105
109
  )
106
110
 
107
111
  # Get default guidance scale from original class to set UNet batch size
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNT5EncoderModelConfig
@@ -40,7 +40,7 @@ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
40
40
  height: Optional[int] = None,
41
41
  width: Optional[int] = None,
42
42
  guidance_scale: Optional[float] = None,
43
- **kwargs: Dict[str, Any],
43
+ **kwargs: Any,
44
44
  ):
45
45
  """
46
46
  Args:
@@ -64,7 +64,7 @@ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
64
64
  height (Optional[int]): Height of the generated images.
65
65
  width (Optional[int]): Width of the generated images.
66
66
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
67
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
67
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
68
68
 
69
69
  Raises:
70
70
  ValueError: If both image_size and img_height/img_width are provided.
@@ -100,27 +100,31 @@ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
100
100
 
101
101
  max_seq_len = max_seq_len or 256
102
102
 
103
- self.text_encoder = self.init_submodule_config(
104
- RBLNCLIPTextModelWithProjectionConfig, text_encoder, batch_size=batch_size
103
+ self.text_encoder = self.initialize_submodule_config(
104
+ text_encoder,
105
+ cls_name="RBLNCLIPTextModelWithProjectionConfig",
106
+ batch_size=batch_size,
105
107
  )
106
- self.text_encoder_2 = self.init_submodule_config(
107
- RBLNCLIPTextModelWithProjectionConfig, text_encoder_2, batch_size=batch_size
108
+ self.text_encoder_2 = self.initialize_submodule_config(
109
+ text_encoder_2,
110
+ cls_name="RBLNCLIPTextModelWithProjectionConfig",
111
+ batch_size=batch_size,
108
112
  )
109
- self.text_encoder_3 = self.init_submodule_config(
110
- RBLNT5EncoderModelConfig,
113
+ self.text_encoder_3 = self.initialize_submodule_config(
111
114
  text_encoder_3,
115
+ cls_name="RBLNT5EncoderModelConfig",
112
116
  batch_size=batch_size,
113
117
  max_seq_len=max_seq_len,
114
118
  model_input_names=["input_ids"],
115
119
  )
116
- self.transformer = self.init_submodule_config(
117
- RBLNSD3Transformer2DModelConfig,
120
+ self.transformer = self.initialize_submodule_config(
118
121
  transformer,
122
+ cls_name="RBLNSD3Transformer2DModelConfig",
119
123
  sample_size=sample_size,
120
124
  )
121
- self.vae = self.init_submodule_config(
122
- RBLNAutoencoderKLConfig,
125
+ self.vae = self.initialize_submodule_config(
123
126
  vae,
127
+ cls_name="RBLNAutoencoderKLConfig",
124
128
  batch_size=batch_size,
125
129
  uses_encoder=self.__class__._vae_uses_encoder,
126
130
  sample_size=image_size,
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, Optional, Tuple
15
+ from typing import Any, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
@@ -38,7 +38,7 @@ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs: Dict[str, Any],
41
+ **kwargs: Any,
42
42
  ):
43
43
  """
44
44
  Args:
@@ -59,7 +59,7 @@ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
59
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
60
60
  Cannot be used together with img_height/img_width.
61
61
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
 
64
64
  Raises:
65
65
  ValueError: If both image_size and img_height/img_width are provided.
@@ -93,18 +93,25 @@ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
93
93
  elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
94
94
  raise ValueError("Both img_height and img_width must be provided together if used")
95
95
 
96
- self.text_encoder = self.init_submodule_config(RBLNCLIPTextModelConfig, text_encoder, batch_size=batch_size)
97
- self.text_encoder_2 = self.init_submodule_config(
98
- RBLNCLIPTextModelWithProjectionConfig, text_encoder_2, batch_size=batch_size
96
+ self.text_encoder = self.initialize_submodule_config(
97
+ text_encoder,
98
+ cls_name="RBLNCLIPTextModelConfig",
99
+ batch_size=batch_size,
100
+ )
101
+ self.text_encoder_2 = self.initialize_submodule_config(
102
+ text_encoder_2,
103
+ cls_name="RBLNCLIPTextModelWithProjectionConfig",
104
+ batch_size=batch_size,
99
105
  )
100
- self.unet = self.init_submodule_config(
101
- RBLNUNet2DConditionModelConfig,
106
+
107
+ self.unet = self.initialize_submodule_config(
102
108
  unet,
109
+ cls_name="RBLNUNet2DConditionModelConfig",
103
110
  sample_size=sample_size,
104
111
  )
105
- self.vae = self.init_submodule_config(
106
- RBLNAutoencoderKLConfig,
112
+ self.vae = self.initialize_submodule_config(
107
113
  vae,
114
+ cls_name="RBLNAutoencoderKLConfig",
108
115
  batch_size=batch_size,
109
116
  uses_encoder=self.__class__._vae_uses_encoder,
110
117
  sample_size=image_size, # image size is equal to sample size in vae
@@ -33,6 +33,10 @@ if TYPE_CHECKING:
33
33
 
34
34
 
35
35
  class RBLNDiffusionMixinConfig(RBLNModelConfig):
36
+ """
37
+ Configuration class for RBLN diffusion pipelines.
38
+ """
39
+
36
40
  pass
37
41
 
38
42
 
@@ -54,8 +58,8 @@ class RBLNDiffusionMixin:
54
58
  ```
55
59
 
56
60
  Class Variables:
57
- _submodules: List of submodule names that should be compiled (typically ["text_encoder", "unet", "vae"])
58
- _optional_submodules: List of submodule names compiled without inheriting RBLNModel (typically ["safety_checker"])
61
+ - `_submodules`: List of submodule names that should be compiled (typically ["text_encoder", "unet", "vae"])
62
+ - `_optional_submodules`: List of submodule names compiled without inheriting RBLNModel (typically ["safety_checker"])
59
63
 
60
64
  Methods:
61
65
  from_pretrained: Creates and optionally compiles a model from a pretrained checkpoint
@@ -130,20 +134,20 @@ class RBLNDiffusionMixin:
130
134
  cls,
131
135
  model_id: str,
132
136
  *,
133
- export: bool = False,
137
+ export: bool = None,
134
138
  model_save_dir: Optional[PathLike] = None,
135
139
  rbln_config: Dict[str, Any] = {},
136
140
  lora_ids: Optional[Union[str, List[str]]] = None,
137
141
  lora_weights_names: Optional[Union[str, List[str]]] = None,
138
142
  lora_scales: Optional[Union[float, List[float]]] = None,
139
- **kwargs: Dict[str, Any],
143
+ **kwargs: Any,
140
144
  ) -> "RBLNDiffusionMixin":
141
145
  """
142
146
  Load a pretrained diffusion pipeline from a model checkpoint, with optional compilation for RBLN NPUs.
143
147
 
144
148
  This method has two distinct operating modes:
145
- - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
146
- - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
149
+ - When `export=True`: Takes a PyTorch-based diffusion model, compiles it for RBLN NPUs, and loads the compiled model
150
+ - When `export=False`: Loads an already compiled RBLN model from `model_id` without recompilation
147
151
 
148
152
  It supports various diffusion pipelines including Stable Diffusion, Kandinsky, ControlNet, and other diffusers-based models.
149
153
 
@@ -170,7 +174,7 @@ class RBLNDiffusionMixin:
170
174
  Names of specific LoRA weight files to load, corresponding to lora_ids. Only used when `export=True`.
171
175
  lora_scales:
172
176
  Scaling factor(s) to apply to the LoRA adapter(s). Only used when `export=True`.
173
- **kwargs:
177
+ kwargs:
174
178
  Additional arguments to pass to the underlying diffusion pipeline constructor or the
175
179
  RBLN compilation process. These may include parameters specific to individual submodules
176
180
  or the particular diffusion pipeline being used.
@@ -181,6 +185,20 @@ class RBLNDiffusionMixin:
181
185
  """
182
186
  rbln_config, kwargs = cls.get_rbln_config_class().initialize_from_kwargs(rbln_config, **kwargs)
183
187
 
188
+ if export is None:
189
+ export = any(
190
+ not RBLNModel._is_compiled(
191
+ model_id,
192
+ token=kwargs.get("token"),
193
+ revision=kwargs.get("revision"),
194
+ force_download=kwargs.get("force_download", False),
195
+ cache_dir=kwargs.get("cache_dir"),
196
+ subfolder=submodule_name,
197
+ local_files_only=kwargs.get("local_files_only", False),
198
+ )
199
+ for submodule_name in cls._submodules
200
+ )
201
+
184
202
  if export:
185
203
  # keep submodules if user passed any of them.
186
204
  passed_submodules = {
@@ -226,7 +244,6 @@ class RBLNDiffusionMixin:
226
244
  device=rbln_config.device,
227
245
  device_map=rbln_config.device_map,
228
246
  create_runtimes=rbln_config.create_runtimes,
229
- optimize_host_mem=rbln_config.optimize_host_memory,
230
247
  activate_profiler=rbln_config.activate_profiler,
231
248
  timeout=rbln_config.timeout,
232
249
  ):
@@ -394,12 +411,11 @@ class RBLNDiffusionMixin:
394
411
  # overwrite to replace incorrect config
395
412
  model.save_config(model_save_dir)
396
413
 
397
- if rbln_config.optimize_host_memory is False:
398
- # Keep compiled_model objs to further analysis. -> TODO: remove soon...
399
- model.compiled_models = []
400
- for name in cls._submodules:
401
- submodule = getattr(model, name)
402
- model.compiled_models.extend(submodule.compiled_models)
414
+ # Keep compiled_model objs to further analysis. -> TODO: remove soon...
415
+ model.compiled_models = []
416
+ for name in cls._submodules:
417
+ submodule = getattr(model, name)
418
+ model.compiled_models.extend(submodule.compiled_models)
403
419
 
404
420
  return model
405
421
 
@@ -35,20 +35,10 @@ _import_structure = {
35
35
  }
36
36
 
37
37
  if TYPE_CHECKING:
38
- from .autoencoders import (
39
- RBLNAutoencoderKL,
40
- RBLNAutoencoderKLCosmos,
41
- RBLNVQModel,
42
- )
38
+ from .autoencoders import RBLNAutoencoderKL, RBLNAutoencoderKLCosmos, RBLNVQModel
43
39
  from .controlnet import RBLNControlNetModel
44
- from .transformers import (
45
- RBLNCosmosTransformer3DModel,
46
- RBLNPriorTransformer,
47
- RBLNSD3Transformer2DModel,
48
- )
49
- from .unets import (
50
- RBLNUNet2DConditionModel,
51
- )
40
+ from .transformers import RBLNCosmosTransformer3DModel, RBLNPriorTransformer, RBLNSD3Transformer2DModel
41
+ from .unets import RBLNUNet2DConditionModel
52
42
  else:
53
43
  import sys
54
44