optimum-rbln 0.8.0.post2__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. optimum/rbln/__init__.py +24 -0
  2. optimum/rbln/__version__.py +2 -2
  3. optimum/rbln/configuration_utils.py +45 -33
  4. optimum/rbln/diffusers/__init__.py +21 -1
  5. optimum/rbln/diffusers/configurations/__init__.py +4 -0
  6. optimum/rbln/diffusers/configurations/models/__init__.py +2 -0
  7. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +9 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +84 -0
  9. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +4 -2
  10. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +9 -2
  11. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +70 -0
  12. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +4 -2
  13. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +9 -2
  14. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +9 -2
  15. optimum/rbln/diffusers/configurations/pipelines/__init__.py +1 -0
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +29 -9
  17. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +114 -0
  18. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +28 -12
  19. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +18 -6
  20. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +13 -6
  21. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +12 -6
  22. optimum/rbln/diffusers/modeling_diffusers.py +72 -65
  23. optimum/rbln/diffusers/models/__init__.py +4 -0
  24. optimum/rbln/diffusers/models/autoencoders/__init__.py +1 -0
  25. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +17 -1
  26. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +219 -0
  27. optimum/rbln/diffusers/models/autoencoders/vae.py +45 -8
  28. optimum/rbln/diffusers/models/autoencoders/vq_model.py +17 -1
  29. optimum/rbln/diffusers/models/controlnet.py +14 -8
  30. optimum/rbln/diffusers/models/transformers/__init__.py +1 -0
  31. optimum/rbln/diffusers/models/transformers/prior_transformer.py +10 -0
  32. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +321 -0
  33. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +2 -0
  34. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +11 -1
  35. optimum/rbln/diffusers/pipelines/__init__.py +10 -0
  36. optimum/rbln/diffusers/pipelines/controlnet/multicontrolnet.py +1 -4
  37. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -0
  38. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -0
  39. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +7 -0
  40. optimum/rbln/diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +7 -0
  41. optimum/rbln/diffusers/pipelines/cosmos/__init__.py +17 -0
  42. optimum/rbln/diffusers/pipelines/cosmos/configuration_cosmos_guardrail.py +102 -0
  43. optimum/rbln/diffusers/pipelines/cosmos/cosmos_guardrail.py +455 -0
  44. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +98 -0
  45. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +98 -0
  46. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +7 -0
  47. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +48 -27
  48. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +7 -0
  49. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpaint.py +7 -0
  50. optimum/rbln/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -0
  51. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +7 -0
  52. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +7 -0
  53. optimum/rbln/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +7 -0
  54. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -0
  55. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -0
  56. optimum/rbln/diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -0
  57. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +7 -0
  58. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -0
  59. optimum/rbln/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +7 -0
  60. optimum/rbln/modeling.py +71 -37
  61. optimum/rbln/modeling_base.py +63 -109
  62. optimum/rbln/transformers/__init__.py +41 -47
  63. optimum/rbln/transformers/configuration_generic.py +16 -13
  64. optimum/rbln/transformers/modeling_generic.py +21 -22
  65. optimum/rbln/transformers/modeling_rope_utils.py +5 -2
  66. optimum/rbln/transformers/models/__init__.py +54 -4
  67. optimum/rbln/transformers/models/{wav2vec2/configuration_wav2vec.py → audio_spectrogram_transformer/__init__.py} +2 -4
  68. optimum/rbln/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +21 -0
  69. optimum/rbln/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +28 -0
  70. optimum/rbln/transformers/models/auto/auto_factory.py +35 -12
  71. optimum/rbln/transformers/models/bart/bart_architecture.py +14 -1
  72. optimum/rbln/transformers/models/bart/configuration_bart.py +12 -2
  73. optimum/rbln/transformers/models/bart/modeling_bart.py +16 -7
  74. optimum/rbln/transformers/models/bert/configuration_bert.py +18 -3
  75. optimum/rbln/transformers/models/bert/modeling_bert.py +24 -0
  76. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +15 -3
  77. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +50 -4
  78. optimum/rbln/transformers/models/clip/configuration_clip.py +15 -5
  79. optimum/rbln/transformers/models/clip/modeling_clip.py +38 -13
  80. optimum/rbln/transformers/models/colpali/__init__.py +2 -0
  81. optimum/rbln/transformers/models/colpali/colpali_architecture.py +221 -0
  82. optimum/rbln/transformers/models/colpali/configuration_colpali.py +68 -0
  83. optimum/rbln/transformers/models/colpali/modeling_colpali.py +383 -0
  84. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +111 -14
  85. optimum/rbln/transformers/models/decoderonly/decoderonly_architecture.py +102 -35
  86. optimum/rbln/transformers/models/decoderonly/modeling_decoderonly.py +253 -195
  87. optimum/rbln/transformers/models/distilbert/__init__.py +19 -0
  88. optimum/rbln/transformers/models/distilbert/configuration_distilbert.py +24 -0
  89. optimum/rbln/transformers/models/distilbert/modeling_distilbert.py +27 -0
  90. optimum/rbln/transformers/models/dpt/configuration_dpt.py +6 -1
  91. optimum/rbln/transformers/models/dpt/modeling_dpt.py +6 -1
  92. optimum/rbln/transformers/models/exaone/configuration_exaone.py +24 -1
  93. optimum/rbln/transformers/models/exaone/exaone_architecture.py +5 -1
  94. optimum/rbln/transformers/models/exaone/modeling_exaone.py +66 -5
  95. optimum/rbln/transformers/models/gemma/configuration_gemma.py +24 -1
  96. optimum/rbln/transformers/models/gemma/gemma_architecture.py +5 -1
  97. optimum/rbln/transformers/models/gemma/modeling_gemma.py +49 -0
  98. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +3 -3
  99. optimum/rbln/transformers/models/gemma3/gemma3_architecture.py +18 -250
  100. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +89 -244
  101. optimum/rbln/transformers/models/gpt2/configuration_gpt2.py +4 -1
  102. optimum/rbln/transformers/models/gpt2/gpt2_architecture.py +6 -1
  103. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +12 -2
  104. optimum/rbln/transformers/models/idefics3/modeling_idefics3.py +41 -4
  105. optimum/rbln/transformers/models/llama/configuration_llama.py +24 -1
  106. optimum/rbln/transformers/models/llama/modeling_llama.py +49 -0
  107. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +10 -2
  108. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +32 -4
  109. optimum/rbln/transformers/models/midm/configuration_midm.py +24 -1
  110. optimum/rbln/transformers/models/midm/midm_architecture.py +6 -1
  111. optimum/rbln/transformers/models/midm/modeling_midm.py +66 -5
  112. optimum/rbln/transformers/models/mistral/configuration_mistral.py +24 -1
  113. optimum/rbln/transformers/models/mistral/modeling_mistral.py +62 -4
  114. optimum/rbln/transformers/models/opt/configuration_opt.py +4 -1
  115. optimum/rbln/transformers/models/opt/modeling_opt.py +10 -0
  116. optimum/rbln/transformers/models/opt/opt_architecture.py +7 -1
  117. optimum/rbln/transformers/models/phi/configuration_phi.py +24 -1
  118. optimum/rbln/transformers/models/phi/modeling_phi.py +49 -0
  119. optimum/rbln/transformers/models/phi/phi_architecture.py +1 -1
  120. optimum/rbln/transformers/models/qwen2/configuration_qwen2.py +24 -1
  121. optimum/rbln/transformers/models/qwen2/modeling_qwen2.py +67 -4
  122. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +31 -3
  123. optimum/rbln/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py +54 -25
  124. optimum/rbln/transformers/models/qwen2_5_vl/qwen2_5_vl_architecture.py +6 -4
  125. optimum/rbln/transformers/models/resnet/__init__.py +23 -0
  126. optimum/rbln/transformers/models/resnet/configuration_resnet.py +25 -0
  127. optimum/rbln/transformers/models/resnet/modeling_resnet.py +26 -0
  128. optimum/rbln/transformers/models/roberta/__init__.py +24 -0
  129. optimum/rbln/transformers/{configuration_alias.py → models/roberta/configuration_roberta.py} +12 -28
  130. optimum/rbln/transformers/{modeling_alias.py → models/roberta/modeling_roberta.py} +14 -28
  131. optimum/rbln/transformers/models/seq2seq/__init__.py +1 -1
  132. optimum/rbln/transformers/models/seq2seq/{configuration_seq2seq2.py → configuration_seq2seq.py} +2 -2
  133. optimum/rbln/transformers/models/seq2seq/modeling_seq2seq.py +7 -3
  134. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +41 -3
  135. optimum/rbln/transformers/models/siglip/configuration_siglip.py +10 -0
  136. optimum/rbln/transformers/models/siglip/modeling_siglip.py +69 -21
  137. optimum/rbln/transformers/models/t5/configuration_t5.py +12 -2
  138. optimum/rbln/transformers/models/t5/modeling_t5.py +56 -8
  139. optimum/rbln/transformers/models/t5/t5_architecture.py +5 -1
  140. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/__init__.py +1 -1
  141. optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/configuration_time_series_transformer.py +9 -2
  142. optimum/rbln/transformers/models/{time_series_transformers/modeling_time_series_transformers.py → time_series_transformer/modeling_time_series_transformer.py} +20 -11
  143. optimum/rbln/transformers/models/vit/__init__.py +19 -0
  144. optimum/rbln/transformers/models/vit/configuration_vit.py +24 -0
  145. optimum/rbln/transformers/models/vit/modeling_vit.py +25 -0
  146. optimum/rbln/transformers/models/wav2vec2/__init__.py +1 -1
  147. optimum/rbln/transformers/models/wav2vec2/configuration_wav2vec2.py +26 -0
  148. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -1
  149. optimum/rbln/transformers/models/whisper/configuration_whisper.py +10 -1
  150. optimum/rbln/transformers/models/whisper/modeling_whisper.py +41 -17
  151. optimum/rbln/transformers/models/xlm_roberta/__init__.py +16 -2
  152. optimum/rbln/transformers/models/xlm_roberta/configuration_xlm_roberta.py +15 -2
  153. optimum/rbln/transformers/models/xlm_roberta/modeling_xlm_roberta.py +12 -3
  154. optimum/rbln/utils/model_utils.py +20 -0
  155. optimum/rbln/utils/runtime_utils.py +49 -1
  156. optimum/rbln/utils/submodule.py +6 -8
  157. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/METADATA +6 -6
  158. optimum_rbln-0.8.1.dist-info/RECORD +211 -0
  159. optimum_rbln-0.8.0.post2.dist-info/RECORD +0 -184
  160. /optimum/rbln/transformers/models/{time_series_transformers → time_series_transformer}/time_series_transformers_architecture.py +0 -0
  161. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/WHEEL +0 -0
  162. {optimum_rbln-0.8.0.post2.dist-info → optimum_rbln-0.8.1.dist-info}/licenses/LICENSE +0 -0
@@ -12,12 +12,19 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNVQModelConfig(RBLNModelConfig):
21
+ """
22
+ Configuration class for RBLN VQModel models, used in Kandinsky.
23
+
24
+ This class inherits from RBLNModelConfig and provides specific configuration options
25
+ for VQModel, which acts similarly to a VAE but uses vector quantization.
26
+ """
27
+
21
28
  def __init__(
22
29
  self,
23
30
  batch_size: Optional[int] = None,
@@ -26,7 +33,7 @@ class RBLNVQModelConfig(RBLNModelConfig):
26
33
  vqmodel_scale_factor: Optional[float] = None, # TODO: rename to scaling_factor
27
34
  in_channels: Optional[int] = None,
28
35
  latent_channels: Optional[int] = None,
29
- **kwargs,
36
+ **kwargs: Dict[str, Any],
30
37
  ):
31
38
  """
32
39
  Args:
@@ -4,6 +4,7 @@ from .configuration_controlnet import (
4
4
  RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig,
5
5
  RBLNStableDiffusionXLControlNetPipelineConfig,
6
6
  )
7
+ from .configuration_cosmos import RBLNCosmosTextToWorldPipelineConfig, RBLNCosmosVideoToWorldPipelineConfig
7
8
  from .configuration_kandinsky2_2 import (
8
9
  RBLNKandinskyV22CombinedPipelineConfig,
9
10
  RBLNKandinskyV22Img2ImgCombinedPipelineConfig,
@@ -12,14 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
19
19
  from ..models import RBLNAutoencoderKLConfig, RBLNControlNetModelConfig, RBLNUNet2DConditionModelConfig
20
20
 
21
21
 
22
- class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
22
+ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
23
23
  submodules = ["text_encoder", "unet", "vae", "controlnet"]
24
24
  _vae_uses_encoder = False
25
25
 
@@ -38,7 +38,7 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs,
41
+ **kwargs: Dict[str, Any],
42
42
  ):
43
43
  """
44
44
  Args:
@@ -138,15 +138,27 @@ class _RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
138
138
  return self.vae.sample_size
139
139
 
140
140
 
141
- class RBLNStableDiffusionControlNetPipelineConfig(_RBLNStableDiffusionControlNetPipelineBaseConfig):
141
+ class RBLNStableDiffusionControlNetPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
142
+ """
143
+ Configuration for Stable Diffusion ControlNet pipeline.
144
+ """
145
+
142
146
  _vae_uses_encoder = False
143
147
 
144
148
 
145
- class RBLNStableDiffusionControlNetImg2ImgPipelineConfig(_RBLNStableDiffusionControlNetPipelineBaseConfig):
149
+ class RBLNStableDiffusionControlNetImg2ImgPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
150
+ """
151
+ Configuration for Stable Diffusion ControlNet image-to-image pipeline.
152
+ """
153
+
146
154
  _vae_uses_encoder = True
147
155
 
148
156
 
149
- class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
157
+ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
158
+ """
159
+ Base configuration for Stable Diffusion XL ControlNet pipelines.
160
+ """
161
+
150
162
  submodules = ["text_encoder", "text_encoder_2", "unet", "vae", "controlnet"]
151
163
  _vae_uses_encoder = False
152
164
 
@@ -166,7 +178,7 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
166
178
  sample_size: Optional[Tuple[int, int]] = None,
167
179
  image_size: Optional[Tuple[int, int]] = None,
168
180
  guidance_scale: Optional[float] = None,
169
- **kwargs,
181
+ **kwargs: Dict[str, Any],
170
182
  ):
171
183
  """
172
184
  Args:
@@ -272,9 +284,17 @@ class _RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
272
284
  return self.vae.sample_size
273
285
 
274
286
 
275
- class RBLNStableDiffusionXLControlNetPipelineConfig(_RBLNStableDiffusionXLControlNetPipelineBaseConfig):
287
+ class RBLNStableDiffusionXLControlNetPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
288
+ """
289
+ Configuration for Stable Diffusion XL ControlNet pipeline.
290
+ """
291
+
276
292
  _vae_uses_encoder = False
277
293
 
278
294
 
279
- class RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig(_RBLNStableDiffusionXLControlNetPipelineBaseConfig):
295
+ class RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
296
+ """
297
+ Configuration for Stable Diffusion XL ControlNet image-to-image pipeline.
298
+ """
299
+
280
300
  _vae_uses_encoder = True
@@ -0,0 +1,114 @@
1
+ # Copyright 2025 Rebellions Inc. All rights reserved.
2
+
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at:
6
+
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, Optional
16
+
17
+ from ....configuration_utils import RBLNModelConfig
18
+ from ....transformers import RBLNT5EncoderModelConfig
19
+ from ....utils.logging import get_logger
20
+ from ...pipelines.cosmos.cosmos_guardrail import RBLNCosmosSafetyCheckerConfig
21
+ from ..models import RBLNAutoencoderKLCosmosConfig, RBLNCosmosTransformer3DModelConfig
22
+
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
28
+ submodules = ["text_encoder", "transformer", "vae", "safety_checker"]
29
+ _vae_uses_encoder = False
30
+
31
+ def __init__(
32
+ self,
33
+ text_encoder: Optional[RBLNT5EncoderModelConfig] = None,
34
+ transformer: Optional[RBLNCosmosTransformer3DModelConfig] = None,
35
+ vae: Optional[RBLNAutoencoderKLCosmosConfig] = None,
36
+ safety_checker: Optional[RBLNCosmosSafetyCheckerConfig] = None,
37
+ *,
38
+ batch_size: Optional[int] = None,
39
+ height: Optional[int] = None,
40
+ width: Optional[int] = None,
41
+ num_frames: Optional[int] = None,
42
+ fps: Optional[int] = None,
43
+ max_seq_len: Optional[int] = None,
44
+ **kwargs: Dict[str, Any],
45
+ ):
46
+ """
47
+ Args:
48
+ text_encoder (Optional[RBLNT5EncoderModelConfig]): Configuration for the text encoder component.
49
+ Initialized as RBLNT5EncoderModelConfig if not provided.
50
+ transformer (Optional[RBLNCosmosTransformer3DModelConfig]): Configuration for the Transformer model component.
51
+ Initialized as RBLNCosmosTransformer3DModelConfig if not provided.
52
+ vae (Optional[RBLNAutoencoderKLCosmosConfig]): Configuration for the VAE model component.
53
+ Initialized as RBLNAutoencoderKLCosmosConfig if not provided.
54
+ safety_checker (Optional[RBLNCosmosSafetyCheckerConfig]): Configuration for the safety checker component.
55
+ Initialized as RBLNCosmosSafetyCheckerConfig if not provided.
56
+ batch_size (Optional[int]): Batch size for inference, applied to all submodules.
57
+ height (Optional[int]): Height of the generated videos.
58
+ width (Optional[int]): Width of the generated videos.
59
+ num_frames (Optional[int]): The number of frames in the generated video.
60
+ fps (Optional[int]): The frames per second of the generated video.
61
+ max_seq_len (Optional[int]): Maximum sequence length supported by the model.
62
+ **kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
+ """
64
+ super().__init__(**kwargs)
65
+
66
+ self.text_encoder = self.init_submodule_config(
67
+ RBLNT5EncoderModelConfig, text_encoder, batch_size=batch_size, max_seq_len=max_seq_len
68
+ )
69
+ self.transformer = self.init_submodule_config(
70
+ RBLNCosmosTransformer3DModelConfig,
71
+ transformer,
72
+ batch_size=batch_size,
73
+ max_seq_len=max_seq_len,
74
+ height=height,
75
+ width=width,
76
+ num_frames=num_frames,
77
+ fps=fps,
78
+ )
79
+ self.vae = self.init_submodule_config(
80
+ RBLNAutoencoderKLCosmosConfig,
81
+ vae,
82
+ batch_size=batch_size,
83
+ uses_encoder=self.__class__._vae_uses_encoder,
84
+ height=height,
85
+ width=width,
86
+ num_frames=num_frames,
87
+ )
88
+ self.safety_checker = self.init_submodule_config(
89
+ RBLNCosmosSafetyCheckerConfig,
90
+ safety_checker,
91
+ batch_size=batch_size,
92
+ height=height,
93
+ width=width,
94
+ )
95
+
96
+ @property
97
+ def batch_size(self):
98
+ return self.vae.batch_size
99
+
100
+ @property
101
+ def max_seq_len(self):
102
+ return self.text_encoder.max_seq_len
103
+
104
+
105
+ class RBLNCosmosTextToWorldPipelineConfig(RBLNCosmosPipelineBaseConfig):
106
+ """Config for Cosmos Text2World Pipeline"""
107
+
108
+ _vae_uses_encoder = False
109
+
110
+
111
+ class RBLNCosmosVideoToWorldPipelineConfig(RBLNCosmosPipelineBaseConfig):
112
+ """Config for Cosmos Video2World Pipeline"""
113
+
114
+ _vae_uses_encoder = True
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNCLIPVisionModelWithProjectionConfig
@@ -20,7 +20,7 @@ from ..models import RBLNUNet2DConditionModelConfig, RBLNVQModelConfig
20
20
  from ..models.configuration_prior_transformer import RBLNPriorTransformerConfig
21
21
 
22
22
 
23
- class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
23
+ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
24
24
  submodules = ["unet", "movq"]
25
25
  _movq_uses_encoder = False
26
26
 
@@ -37,7 +37,7 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
37
37
  img_width: Optional[int] = None,
38
38
  height: Optional[int] = None,
39
39
  width: Optional[int] = None,
40
- **kwargs,
40
+ **kwargs: Dict[str, Any],
41
41
  ):
42
42
  """
43
43
  Args:
@@ -117,19 +117,27 @@ class _RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
117
117
  return self.movq.sample_size
118
118
 
119
119
 
120
- class RBLNKandinskyV22PipelineConfig(_RBLNKandinskyV22PipelineBaseConfig):
120
+ class RBLNKandinskyV22PipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
121
+ """Configuration class for the Kandinsky V2.2 text-to-image decoder pipeline."""
122
+
121
123
  _movq_uses_encoder = False
122
124
 
123
125
 
124
- class RBLNKandinskyV22Img2ImgPipelineConfig(_RBLNKandinskyV22PipelineBaseConfig):
126
+ class RBLNKandinskyV22Img2ImgPipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
127
+ """Configuration class for the Kandinsky V2.2 image-to-image decoder pipeline."""
128
+
125
129
  _movq_uses_encoder = True
126
130
 
127
131
 
128
- class RBLNKandinskyV22InpaintPipelineConfig(_RBLNKandinskyV22PipelineBaseConfig):
132
+ class RBLNKandinskyV22InpaintPipelineConfig(RBLNKandinskyV22PipelineBaseConfig):
133
+ """Configuration class for the Kandinsky V2.2 inpainting decoder pipeline."""
134
+
129
135
  _movq_uses_encoder = True
130
136
 
131
137
 
132
138
  class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
139
+ """Configuration class for the Kandinsky V2.2 Prior pipeline."""
140
+
133
141
  submodules = ["text_encoder", "image_encoder", "prior"]
134
142
 
135
143
  def __init__(
@@ -140,7 +148,7 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
140
148
  *,
141
149
  batch_size: Optional[int] = None,
142
150
  guidance_scale: Optional[float] = None,
143
- **kwargs,
151
+ **kwargs: Dict[str, Any],
144
152
  ):
145
153
  """
146
154
  Initialize a configuration for Kandinsky 2.2 prior pipeline optimized for RBLN NPU.
@@ -194,7 +202,9 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
194
202
  return self.image_encoder.image_size
195
203
 
196
204
 
197
- class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
205
+ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
206
+ """Base configuration class for Kandinsky V2.2 combined pipelines."""
207
+
198
208
  submodules = ["prior_pipe", "decoder_pipe"]
199
209
  _decoder_pipe_cls = RBLNKandinskyV22PipelineConfig
200
210
 
@@ -216,7 +226,7 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
216
226
  prior_text_encoder: Optional[RBLNCLIPTextModelWithProjectionConfig] = None,
217
227
  unet: Optional[RBLNUNet2DConditionModelConfig] = None,
218
228
  movq: Optional[RBLNVQModelConfig] = None,
219
- **kwargs,
229
+ **kwargs: Dict[str, Any],
220
230
  ):
221
231
  """
222
232
  Initialize a configuration for combined Kandinsky 2.2 pipelines optimized for RBLN NPU.
@@ -325,13 +335,19 @@ class _RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
325
335
  return self.decoder_pipe.movq
326
336
 
327
337
 
328
- class RBLNKandinskyV22CombinedPipelineConfig(_RBLNKandinskyV22CombinedPipelineBaseConfig):
338
+ class RBLNKandinskyV22CombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
339
+ """Configuration class for the Kandinsky V2.2 combined text-to-image pipeline."""
340
+
329
341
  _decoder_pipe_cls = RBLNKandinskyV22PipelineConfig
330
342
 
331
343
 
332
- class RBLNKandinskyV22InpaintCombinedPipelineConfig(_RBLNKandinskyV22CombinedPipelineBaseConfig):
344
+ class RBLNKandinskyV22InpaintCombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
345
+ """Configuration class for the Kandinsky V2.2 combined inpainting pipeline."""
346
+
333
347
  _decoder_pipe_cls = RBLNKandinskyV22InpaintPipelineConfig
334
348
 
335
349
 
336
- class RBLNKandinskyV22Img2ImgCombinedPipelineConfig(_RBLNKandinskyV22CombinedPipelineBaseConfig):
350
+ class RBLNKandinskyV22Img2ImgCombinedPipelineConfig(RBLNKandinskyV22CombinedPipelineBaseConfig):
351
+ """Configuration class for the Kandinsky V2.2 combined image-to-image pipeline."""
352
+
337
353
  _decoder_pipe_cls = RBLNKandinskyV22Img2ImgPipelineConfig
@@ -12,14 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig
19
19
  from ..models import RBLNAutoencoderKLConfig, RBLNUNet2DConditionModelConfig
20
20
 
21
21
 
22
- class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
22
+ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
23
23
  submodules = ["text_encoder", "unet", "vae"]
24
24
  _vae_uses_encoder = False
25
25
 
@@ -37,7 +37,7 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
37
37
  sample_size: Optional[Tuple[int, int]] = None,
38
38
  image_size: Optional[Tuple[int, int]] = None,
39
39
  guidance_scale: Optional[float] = None,
40
- **kwargs,
40
+ **kwargs: Dict[str, Any],
41
41
  ):
42
42
  """
43
43
  Args:
@@ -128,13 +128,25 @@ class _RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
128
128
  return self.vae.sample_size
129
129
 
130
130
 
131
- class RBLNStableDiffusionPipelineConfig(_RBLNStableDiffusionPipelineBaseConfig):
131
+ class RBLNStableDiffusionPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
132
+ """
133
+ Configuration for Stable Diffusion pipeline.
134
+ """
135
+
132
136
  _vae_uses_encoder = False
133
137
 
134
138
 
135
- class RBLNStableDiffusionImg2ImgPipelineConfig(_RBLNStableDiffusionPipelineBaseConfig):
139
+ class RBLNStableDiffusionImg2ImgPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
140
+ """
141
+ Configuration for Stable Diffusion image-to-image pipeline.
142
+ """
143
+
136
144
  _vae_uses_encoder = True
137
145
 
138
146
 
139
- class RBLNStableDiffusionInpaintPipelineConfig(_RBLNStableDiffusionPipelineBaseConfig):
147
+ class RBLNStableDiffusionInpaintPipelineConfig(RBLNStableDiffusionPipelineBaseConfig):
148
+ """
149
+ Configuration for Stable Diffusion inpainting pipeline.
150
+ """
151
+
140
152
  _vae_uses_encoder = True
@@ -12,14 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelWithProjectionConfig, RBLNT5EncoderModelConfig
19
19
  from ..models import RBLNAutoencoderKLConfig, RBLNSD3Transformer2DModelConfig
20
20
 
21
21
 
22
- class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
22
+ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
23
23
  submodules = ["transformer", "text_encoder", "text_encoder_2", "text_encoder_3", "vae"]
24
24
  _vae_uses_encoder = False
25
25
 
@@ -40,7 +40,7 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
40
40
  height: Optional[int] = None,
41
41
  width: Optional[int] = None,
42
42
  guidance_scale: Optional[float] = None,
43
- **kwargs,
43
+ **kwargs: Dict[str, Any],
44
44
  ):
45
45
  """
46
46
  Args:
@@ -111,6 +111,7 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
111
111
  text_encoder_3,
112
112
  batch_size=batch_size,
113
113
  max_seq_len=max_seq_len,
114
+ model_input_names=["input_ids"],
114
115
  )
115
116
  self.transformer = self.init_submodule_config(
116
117
  RBLNSD3Transformer2DModelConfig,
@@ -153,13 +154,19 @@ class _RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
153
154
  return self.vae.sample_size
154
155
 
155
156
 
156
- class RBLNStableDiffusion3PipelineConfig(_RBLNStableDiffusion3PipelineBaseConfig):
157
+ class RBLNStableDiffusion3PipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
158
+ """Config for SD3 Text2Img Pipeline"""
159
+
157
160
  _vae_uses_encoder = False
158
161
 
159
162
 
160
- class RBLNStableDiffusion3Img2ImgPipelineConfig(_RBLNStableDiffusion3PipelineBaseConfig):
163
+ class RBLNStableDiffusion3Img2ImgPipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
164
+ """Config for SD3 Img2Img Pipeline"""
165
+
161
166
  _vae_uses_encoder = True
162
167
 
163
168
 
164
- class RBLNStableDiffusion3InpaintPipelineConfig(_RBLNStableDiffusion3PipelineBaseConfig):
169
+ class RBLNStableDiffusion3InpaintPipelineConfig(RBLNStableDiffusion3PipelineBaseConfig):
170
+ """Config for SD3 Inpainting Pipeline"""
171
+
165
172
  _vae_uses_encoder = True
@@ -12,14 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Optional, Tuple
15
+ from typing import Any, Dict, Optional, Tuple
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
19
19
  from ..models import RBLNAutoencoderKLConfig, RBLNUNet2DConditionModelConfig
20
20
 
21
21
 
22
- class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
22
+ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
23
23
  submodules = ["text_encoder", "text_encoder_2", "unet", "vae"]
24
24
  _vae_uses_encoder = False
25
25
 
@@ -38,7 +38,7 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
38
38
  sample_size: Optional[Tuple[int, int]] = None,
39
39
  image_size: Optional[Tuple[int, int]] = None,
40
40
  guidance_scale: Optional[float] = None,
41
- **kwargs,
41
+ **kwargs: Dict[str, Any],
42
42
  ):
43
43
  """
44
44
  Args:
@@ -134,13 +134,19 @@ class _RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
134
134
  return self.vae.sample_size
135
135
 
136
136
 
137
- class RBLNStableDiffusionXLPipelineConfig(_RBLNStableDiffusionXLPipelineBaseConfig):
137
+ class RBLNStableDiffusionXLPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
138
+ """Config for SDXL Text2Img Pipeline"""
139
+
138
140
  _vae_uses_encoder = False
139
141
 
140
142
 
141
- class RBLNStableDiffusionXLImg2ImgPipelineConfig(_RBLNStableDiffusionXLPipelineBaseConfig):
143
+ class RBLNStableDiffusionXLImg2ImgPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
144
+ """Config for SDXL Img2Img Pipeline"""
145
+
142
146
  _vae_uses_encoder = True
143
147
 
144
148
 
145
- class RBLNStableDiffusionXLInpaintPipelineConfig(_RBLNStableDiffusionXLPipelineBaseConfig):
149
+ class RBLNStableDiffusionXLInpaintPipelineConfig(RBLNStableDiffusionXLPipelineBaseConfig):
150
+ """Config for SDXL Inpainting Pipeline"""
151
+
146
152
  _vae_uses_encoder = True