optimum-rbln 0.8.4a6__py3-none-any.whl → 0.8.4a8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of optimum-rbln might be problematic. Click here for more details.

Files changed (66) hide show
  1. optimum/rbln/__version__.py +2 -2
  2. optimum/rbln/configuration_utils.py +41 -3
  3. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py +1 -1
  4. optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py +3 -3
  5. optimum/rbln/diffusers/configurations/models/configuration_controlnet.py +1 -1
  6. optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py +2 -2
  7. optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py +7 -2
  8. optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py +7 -2
  9. optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py +1 -1
  10. optimum/rbln/diffusers/configurations/models/configuration_vq_model.py +1 -1
  11. optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py +2 -2
  12. optimum/rbln/diffusers/configurations/pipelines/configuration_cosmos.py +1 -1
  13. optimum/rbln/diffusers/configurations/pipelines/configuration_kandinsky2_2.py +3 -3
  14. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion.py +1 -1
  15. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_3.py +1 -1
  16. optimum/rbln/diffusers/configurations/pipelines/configuration_stable_diffusion_xl.py +1 -1
  17. optimum/rbln/diffusers/modeling_diffusers.py +7 -3
  18. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl.py +31 -3
  19. optimum/rbln/diffusers/models/autoencoders/autoencoder_kl_cosmos.py +28 -3
  20. optimum/rbln/diffusers/models/autoencoders/vq_model.py +31 -3
  21. optimum/rbln/diffusers/models/transformers/prior_transformer.py +1 -1
  22. optimum/rbln/diffusers/models/transformers/transformer_cosmos.py +9 -1
  23. optimum/rbln/diffusers/models/transformers/transformer_sd3.py +9 -1
  24. optimum/rbln/diffusers/models/unets/unet_2d_condition.py +6 -3
  25. optimum/rbln/diffusers/pipelines/auto_pipeline.py +45 -8
  26. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +1 -1
  27. optimum/rbln/diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +1 -1
  28. optimum/rbln/modeling.py +17 -13
  29. optimum/rbln/modeling_base.py +11 -9
  30. optimum/rbln/transformers/configuration_generic.py +3 -3
  31. optimum/rbln/transformers/modeling_generic.py +1 -0
  32. optimum/rbln/transformers/models/auto/auto_factory.py +67 -7
  33. optimum/rbln/transformers/models/auto/modeling_auto.py +31 -0
  34. optimum/rbln/transformers/models/blip_2/configuration_blip_2.py +5 -6
  35. optimum/rbln/transformers/models/blip_2/modeling_blip_2.py +1 -1
  36. optimum/rbln/transformers/models/clip/configuration_clip.py +7 -4
  37. optimum/rbln/transformers/models/clip/modeling_clip.py +23 -4
  38. optimum/rbln/transformers/models/colpali/configuration_colpali.py +2 -2
  39. optimum/rbln/transformers/models/colpali/modeling_colpali.py +38 -6
  40. optimum/rbln/transformers/models/decoderonly/configuration_decoderonly.py +1 -1
  41. optimum/rbln/transformers/models/decoderonly/generation_decoderonly.py +23 -0
  42. optimum/rbln/transformers/models/exaone/modeling_exaone.py +42 -4
  43. optimum/rbln/transformers/models/gemma3/configuration_gemma3.py +17 -2
  44. optimum/rbln/transformers/models/gemma3/modeling_gemma3.py +7 -8
  45. optimum/rbln/transformers/models/grounding_dino/configuration_grounding_dino.py +12 -6
  46. optimum/rbln/transformers/models/idefics3/configuration_idefics3.py +6 -2
  47. optimum/rbln/transformers/models/llava/configuration_llava.py +6 -2
  48. optimum/rbln/transformers/models/llava/modeling_llava.py +1 -0
  49. optimum/rbln/transformers/models/llava_next/configuration_llava_next.py +2 -2
  50. optimum/rbln/transformers/models/llava_next/modeling_llava_next.py +57 -78
  51. optimum/rbln/transformers/models/midm/modeling_midm.py +42 -4
  52. optimum/rbln/transformers/models/pixtral/configuration_pixtral.py +1 -1
  53. optimum/rbln/transformers/models/qwen2_5_vl/configuration_qwen2_5_vl.py +18 -3
  54. optimum/rbln/transformers/models/qwen2_vl/configuration_qwen2_vl.py +2 -2
  55. optimum/rbln/transformers/models/seq2seq/configuration_seq2seq.py +1 -1
  56. optimum/rbln/transformers/models/seq2seq/seq2seq_architecture.py +2 -2
  57. optimum/rbln/transformers/models/siglip/configuration_siglip.py +1 -1
  58. optimum/rbln/transformers/models/swin/configuration_swin.py +1 -1
  59. optimum/rbln/transformers/models/time_series_transformer/configuration_time_series_transformer.py +1 -1
  60. optimum/rbln/transformers/models/wav2vec2/modeling_wav2vec2.py +1 -0
  61. optimum/rbln/transformers/models/whisper/configuration_whisper.py +1 -1
  62. optimum/rbln/transformers/models/whisper/modeling_whisper.py +1 -0
  63. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/METADATA +1 -1
  64. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/RECORD +66 -66
  65. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/WHEEL +0 -0
  66. {optimum_rbln-0.8.4a6.dist-info → optimum_rbln-0.8.4a8.dist-info}/licenses/LICENSE +0 -0
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.8.4a6'
32
- __version_tuple__ = version_tuple = (0, 8, 4, 'a6')
31
+ __version__ = version = '0.8.4a8'
32
+ __version_tuple__ = version_tuple = (0, 8, 4, 'a8')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -185,6 +185,15 @@ def load_config(path: str) -> Tuple[Type["RBLNModelConfig"], Dict[str, Any]]:
185
185
 
186
186
 
187
187
  class RBLNAutoConfig:
188
+ """
189
+ Resolver and factory for RBLN model configurations.
190
+
191
+ This class selects the concrete `RBLNModelConfig` subclass, validates the
192
+ provided data, and returns a frozen configuration object that serves as the
193
+ single source of truth during export and load. It does not define the schema
194
+ or control model behavior.
195
+ """
196
+
188
197
  def __new__(cls, **kwargs):
189
198
  cls_name = kwargs.get("cls_name")
190
199
  if cls_name is None:
@@ -194,6 +203,33 @@ class RBLNAutoConfig:
194
203
 
195
204
  @staticmethod
196
205
  def load_from_dict(config_dict: Dict[str, Any]) -> "RBLNModelConfig":
206
+ """
207
+ Build a `RBLNModelConfig` from a plain dictionary.
208
+
209
+ The dictionary must contain `cls_name`, which identifies the concrete
210
+ configuration class to instantiate. All other keys are forwarded to the
211
+ target class initializer. This method does not mutate `config_dict`.
212
+
213
+ Args:
214
+ config_dict: Mapping typically created by `json.load` or `yaml.safe_load`.
215
+ For example, the parsed contents of `rbln_config.json`.
216
+
217
+ Returns:
218
+ RBLNModelConfig: A configuration instance. The specific subclass is
219
+ selected by `config_dict["cls_name"]`.
220
+
221
+ Raises:
222
+ ValueError: If `cls_name` is missing.
223
+ Exception: Any error raised by the target config class during init.
224
+
225
+ Examples:
226
+ >>> data = {
227
+ ... "cls_name": "RBLNLlamaForCausalLMConfig",
228
+ ... "create_runtimes": False,
229
+ ... "tensor_parallel_size": 4
230
+ ... }
231
+ >>> cfg = RBLNAutoConfig.load_from_dict(data)
232
+ """
197
233
  cls_name = config_dict.get("cls_name")
198
234
  if cls_name is None:
199
235
  raise ValueError("`cls_name` is required.")
@@ -206,7 +242,8 @@ class RBLNAutoConfig:
206
242
  Register a new configuration for this class.
207
243
 
208
244
  Args:
209
- config ([`RBLNModelConfig`]): The config to register.
245
+ config (RBLNModelConfig): The config to register.
246
+ exist_ok (bool): Whether to allow registering an already registered model.
210
247
  """
211
248
  if not issubclass(config, RBLNModelConfig):
212
249
  raise ValueError("`config` must be a subclass of RBLNModelConfig.")
@@ -282,6 +319,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
282
319
  """Base configuration class for RBLN models that handles compilation settings, runtime options, and submodules.
283
320
 
284
321
  This class provides functionality for:
322
+
285
323
  1. Managing compilation configurations for RBLN devices
286
324
  2. Configuring runtime behavior such as device placement
287
325
  3. Handling nested configuration objects for complex model architectures
@@ -594,7 +632,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
594
632
  optimum_rbln_version (Optional[str]): The optimum-rbln version used for this configuration.
595
633
  _torch_dtype (Optional[str]): The data type to use for the model.
596
634
  _compile_cfgs (List[RBLNCompileConfig]): List of compilation configurations for the model.
597
- **kwargs: Additional keyword arguments.
635
+ kwargs: Additional keyword arguments.
598
636
 
599
637
  Raises:
600
638
  ValueError: If unexpected keyword arguments are provided.
@@ -761,7 +799,7 @@ class RBLNModelConfig(RBLNSerializableConfigProtocol):
761
799
 
762
800
  Args:
763
801
  path (str): Path to the RBLNModelConfig file or directory containing the config file.
764
- **kwargs: Additional keyword arguments to override configuration values.
802
+ kwargs: Additional keyword arguments to override configuration values.
765
803
  Keys starting with 'rbln_' will have the prefix removed and be used
766
804
  to update the configuration.
767
805
 
@@ -46,7 +46,7 @@ class RBLNAutoencoderKLConfig(RBLNModelConfig):
46
46
  Determines how much smaller the latent representations are compared to the original images.
47
47
  in_channels (Optional[int]): Number of input channels for the model.
48
48
  latent_channels (Optional[int]): Number of channels in the latent space.
49
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
49
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
50
50
 
51
51
  Raises:
52
52
  ValueError: If batch_size is not a positive integer.
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Optional
15
+ from typing import Any, Dict, Optional
16
16
 
17
17
  from ....configuration_utils import RBLNModelConfig
18
18
  from ....utils.logging import get_logger
@@ -35,7 +35,7 @@ class RBLNAutoencoderKLCosmosConfig(RBLNModelConfig):
35
35
  vae_scale_factor_temporal: Optional[int] = None,
36
36
  vae_scale_factor_spatial: Optional[int] = None,
37
37
  use_slicing: Optional[bool] = None,
38
- **kwargs: Any,
38
+ **kwargs: Dict[str, Any],
39
39
  ):
40
40
  """
41
41
  Args:
@@ -52,7 +52,7 @@ class RBLNAutoencoderKLCosmosConfig(RBLNModelConfig):
52
52
  Determines how much smaller the latent representations are compared to the original videos.
53
53
  use_slicing (Optional[bool]): Enable sliced VAE encoding and decoding.
54
54
  If True, the VAE will split the input tensor in slices to compute encoding or decoding in several steps.
55
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
55
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
56
56
 
57
57
  Raises:
58
58
  ValueError: If batch_size is not a positive integer.
@@ -42,7 +42,7 @@ class RBLNControlNetModelConfig(RBLNModelConfig):
42
42
  of the VAE input/output images.
43
43
  text_model_hidden_size (Optional[int]): Hidden size of the text encoder model used
44
44
  for conditioning.
45
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
45
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
46
46
 
47
47
  Raises:
48
48
  ValueError: If batch_size is not a positive integer.
@@ -22,7 +22,7 @@ class RBLNPriorTransformerConfig(RBLNModelConfig):
22
22
  Configuration class for RBLN Prior Transformer models.
23
23
 
24
24
  This class inherits from RBLNModelConfig and provides specific configuration options
25
- for Prior Transformer models used in diffusion models like Kandinsky V2.2.
25
+ for Transformer models used in diffusion models like Kandinsky V2.2.
26
26
  """
27
27
 
28
28
  subclass_non_save_attributes = ["_batch_size_is_specified"]
@@ -39,7 +39,7 @@ class RBLNPriorTransformerConfig(RBLNModelConfig):
39
39
  batch_size (Optional[int]): The batch size for inference. Defaults to 1.
40
40
  embedding_dim (Optional[int]): Dimension of the embedding vectors in the model.
41
41
  num_embeddings (Optional[int]): Number of discrete embeddings in the codebook.
42
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
42
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
43
43
 
44
44
  Raises:
45
45
  ValueError: If batch_size is not a positive integer.
@@ -18,7 +18,12 @@ from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
21
- """Configuration class for RBLN Cosmos Transformer models."""
21
+ """
22
+ Configuration class for RBLN Cosmos Transformer models.
23
+
24
+ This class inherits from RBLNModelConfig and provides specific configuration options
25
+ for Transformer models used in diffusion models like Cosmos.
26
+ """
22
27
 
23
28
  def __init__(
24
29
  self,
@@ -47,7 +52,7 @@ class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
47
52
  num_channels_latents (Optional[int]): The number of channels in latent space.
48
53
  latent_height (Optional[int]): The height in pixels in latent space.
49
54
  latent_width (Optional[int]): The width in pixels in latent space.
50
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
55
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
51
56
 
52
57
  Raises:
53
58
  ValueError: If batch_size is not a positive integer.
@@ -18,7 +18,12 @@ from ....configuration_utils import RBLNModelConfig
18
18
 
19
19
 
20
20
  class RBLNSD3Transformer2DModelConfig(RBLNModelConfig):
21
- """Configuration class for RBLN Stable Diffusion 3 Transformer models."""
21
+ """
22
+ Configuration class for RBLN Stable Diffusion 3 Transformer models.
23
+
24
+ This class inherits from RBLNModelConfig and provides specific configuration options
25
+ for Transformer models used in diffusion models like Stable Diffusion 3.
26
+ """
22
27
 
23
28
  subclass_non_save_attributes = ["_batch_size_is_specified"]
24
29
 
@@ -36,7 +41,7 @@ class RBLNSD3Transformer2DModelConfig(RBLNModelConfig):
36
41
  of the generated samples. If an integer is provided, it's used for both height and width.
37
42
  prompt_embed_length (Optional[int]): The length of the embedded prompt vectors that
38
43
  will be used to condition the transformer model.
39
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
44
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
40
45
 
41
46
  Raises:
42
47
  ValueError: If batch_size is not a positive integer.
@@ -52,7 +52,7 @@ class RBLNUNet2DConditionModelConfig(RBLNModelConfig):
52
52
  in_features (Optional[int]): Number of input features for the model.
53
53
  text_model_hidden_size (Optional[int]): Hidden size of the text encoder model.
54
54
  image_model_hidden_size (Optional[int]): Hidden size of the image encoder model.
55
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
55
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
56
56
 
57
57
  Raises:
58
58
  ValueError: If batch_size is not a positive integer.
@@ -46,7 +46,7 @@ class RBLNVQModelConfig(RBLNModelConfig):
46
46
  Determines the downsampling ratio between original images and latent representations.
47
47
  in_channels (Optional[int]): Number of input channels for the model.
48
48
  latent_channels (Optional[int]): Number of channels in the latent space.
49
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
49
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
50
50
 
51
51
  Raises:
52
52
  ValueError: If batch_size is not a positive integer.
@@ -59,7 +59,7 @@ class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
59
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
60
60
  Cannot be used together with img_height/img_width.
61
61
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
 
64
64
  Raises:
65
65
  ValueError: If both image_size and img_height/img_width are provided.
@@ -201,7 +201,7 @@ class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
201
201
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
202
202
  Cannot be used together with img_height/img_width.
203
203
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
204
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
204
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
205
205
 
206
206
  Raises:
207
207
  ValueError: If both image_size and img_height/img_width are provided.
@@ -59,7 +59,7 @@ class RBLNCosmosPipelineBaseConfig(RBLNModelConfig):
59
59
  num_frames (Optional[int]): The number of frames in the generated video.
60
60
  fps (Optional[int]): The frames per second of the generated video.
61
61
  max_seq_len (Optional[int]): Maximum sequence length supported by the model.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
  """
64
64
  super().__init__(**kwargs)
65
65
 
@@ -54,7 +54,7 @@ class RBLNKandinskyV22PipelineBaseConfig(RBLNModelConfig):
54
54
  img_width (Optional[int]): Width of the generated images.
55
55
  height (Optional[int]): Height of the generated images.
56
56
  width (Optional[int]): Width of the generated images.
57
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
57
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
58
58
 
59
59
  Raises:
60
60
  ValueError: If both image_size and img_height/img_width are provided.
@@ -166,7 +166,7 @@ class RBLNKandinskyV22PriorPipelineConfig(RBLNModelConfig):
166
166
  Initialized as RBLNPriorTransformerConfig if not provided.
167
167
  batch_size (Optional[int]): Batch size for inference, applied to all submodules.
168
168
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
169
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
169
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
170
170
 
171
171
  Note:
172
172
  When guidance_scale > 1.0, the prior batch size is automatically doubled to
@@ -259,7 +259,7 @@ class RBLNKandinskyV22CombinedPipelineBaseConfig(RBLNModelConfig):
259
259
  Used if decoder_pipe is not provided.
260
260
  movq (Optional[RBLNVQModelConfig]): Direct configuration for the MoVQ (VQ-GAN) model.
261
261
  Used if decoder_pipe is not provided.
262
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
262
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
263
263
  """
264
264
  super().__init__(**kwargs)
265
265
 
@@ -56,7 +56,7 @@ class RBLNStableDiffusionPipelineBaseConfig(RBLNModelConfig):
56
56
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
57
57
  Cannot be used together with img_height/img_width.
58
58
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
59
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
59
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
60
60
 
61
61
  Raises:
62
62
  ValueError: If both image_size and img_height/img_width are provided.
@@ -64,7 +64,7 @@ class RBLNStableDiffusion3PipelineBaseConfig(RBLNModelConfig):
64
64
  height (Optional[int]): Height of the generated images.
65
65
  width (Optional[int]): Width of the generated images.
66
66
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
67
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
67
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
68
68
 
69
69
  Raises:
70
70
  ValueError: If both image_size and img_height/img_width are provided.
@@ -59,7 +59,7 @@ class RBLNStableDiffusionXLPipelineBaseConfig(RBLNModelConfig):
59
59
  image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
60
60
  Cannot be used together with img_height/img_width.
61
61
  guidance_scale (Optional[float]): Scale for classifier-free guidance.
62
- **kwargs: Additional arguments passed to the parent RBLNModelConfig.
62
+ kwargs: Additional arguments passed to the parent RBLNModelConfig.
63
63
 
64
64
  Raises:
65
65
  ValueError: If both image_size and img_height/img_width are provided.
@@ -33,6 +33,10 @@ if TYPE_CHECKING:
33
33
 
34
34
 
35
35
  class RBLNDiffusionMixinConfig(RBLNModelConfig):
36
+ """
37
+ Configuration class for RBLN diffusion pipelines.
38
+ """
39
+
36
40
  pass
37
41
 
38
42
 
@@ -54,8 +58,8 @@ class RBLNDiffusionMixin:
54
58
  ```
55
59
 
56
60
  Class Variables:
57
- _submodules: List of submodule names that should be compiled (typically ["text_encoder", "unet", "vae"])
58
- _optional_submodules: List of submodule names compiled without inheriting RBLNModel (typically ["safety_checker"])
61
+ - `_submodules`: List of submodule names that should be compiled (typically ["text_encoder", "unet", "vae"])
62
+ - `_optional_submodules`: List of submodule names compiled without inheriting RBLNModel (typically ["safety_checker"])
59
63
 
60
64
  Methods:
61
65
  from_pretrained: Creates and optionally compiles a model from a pretrained checkpoint
@@ -170,7 +174,7 @@ class RBLNDiffusionMixin:
170
174
  Names of specific LoRA weight files to load, corresponding to lora_ids. Only used when `export=True`.
171
175
  lora_scales:
172
176
  Scaling factor(s) to apply to the LoRA adapter(s). Only used when `export=True`.
173
- **kwargs:
177
+ kwargs:
174
178
  Additional arguments to pass to the underlying diffusion pipeline constructor or the
175
179
  RBLN compilation process. These may include parameters specific to individual submodules
176
180
  or the particular diffusion pipeline being used.
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TYPE_CHECKING, Dict, List, Tuple, Union
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
16
16
 
17
17
  import rebel
18
18
  import torch
@@ -214,13 +214,41 @@ class RBLNAutoencoderKL(RBLNModel):
214
214
  for compiled_model, device_val in zip(compiled_models, device_vals)
215
215
  ]
216
216
 
217
- def encode(self, x: torch.FloatTensor, return_dict: bool = True, **kwargs) -> torch.FloatTensor:
217
+ def encode(
218
+ self, x: torch.FloatTensor, return_dict: bool = True, **kwargs: Dict[str, Any]
219
+ ) -> Union[torch.FloatTensor, AutoencoderKLOutput]:
220
+ """
221
+ Encode an input image into a latent representation.
222
+
223
+ Args:
224
+ x: The input image to encode.
225
+ return_dict:
226
+ Whether to return output as a dictionary. Defaults to True.
227
+ kwargs: Additional arguments to pass to the encoder.
228
+
229
+ Returns:
230
+ The latent representation or AutoencoderKLOutput if return_dict=True
231
+ """
218
232
  posterior = self.encoder.encode(x)
219
233
  if not return_dict:
220
234
  return (posterior,)
221
235
  return AutoencoderKLOutput(latent_dist=posterior)
222
236
 
223
- def decode(self, z: torch.FloatTensor, return_dict: bool = True, **kwargs) -> torch.FloatTensor:
237
+ def decode(
238
+ self, z: torch.FloatTensor, return_dict: bool = True, **kwargs: Dict[str, Any]
239
+ ) -> Union[torch.FloatTensor, DecoderOutput]:
240
+ """
241
+ Decode a latent representation into an image.
242
+
243
+ Args:
244
+ z: The latent representation to decode.
245
+ return_dict:
246
+ Whether to return output as a dictionary. Defaults to True.
247
+ kwargs: Additional arguments to pass to the decoder.
248
+
249
+ Returns:
250
+ The decoded image or DecoderOutput if return_dict=True
251
+ """
224
252
  dec = self.decoder.decode(z)
225
253
  if not return_dict:
226
254
  return (dec,)
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TYPE_CHECKING, Dict, List, Union
15
+ from typing import TYPE_CHECKING, Any, Dict, List, Union
16
16
 
17
17
  import rebel
18
18
  import torch
@@ -205,13 +205,38 @@ class RBLNAutoencoderKLCosmos(RBLNModel):
205
205
  for compiled_model, device_val in zip(compiled_models, device_vals)
206
206
  ]
207
207
 
208
- def encode(self, x: torch.FloatTensor, return_dict: bool = True, **kwargs) -> torch.FloatTensor:
208
+ def encode(
209
+ self, x: torch.FloatTensor, return_dict: bool = True, **kwargs: Dict[str, Any]
210
+ ) -> Union[torch.FloatTensor, AutoencoderKLOutput]:
211
+ """
212
+ Encode an input video into a latent representation.
213
+
214
+ Args:
215
+ x: The input video to encode.
216
+ return_dict:
217
+ Whether to return output as a dictionary. Defaults to True.
218
+ kwargs: Additional arguments to pass to the encoder.
219
+
220
+ Returns:
221
+ The latent representation or AutoencoderKLOutput if return_dict=True
222
+ """
209
223
  posterior = self.encoder.encode(x)
210
224
  if not return_dict:
211
225
  return (posterior,)
212
226
  return AutoencoderKLOutput(latent_dist=posterior)
213
227
 
214
- def decode(self, z: torch.FloatTensor, return_dict: bool = True) -> torch.FloatTensor:
228
+ def decode(self, z: torch.FloatTensor, return_dict: bool = True) -> Union[torch.FloatTensor, DecoderOutput]:
229
+ """
230
+ Decode a latent representation into a video.
231
+
232
+ Args:
233
+ z: The latent representation to decode.
234
+ return_dict:
235
+ Whether to return output as a dictionary. Defaults to True.
236
+
237
+ Returns:
238
+ The decoded video or DecoderOutput if return_dict=True
239
+ """
215
240
  decoded = self.decoder.decode(z)
216
241
 
217
242
  if not return_dict:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TYPE_CHECKING, List, Union
15
+ from typing import TYPE_CHECKING, Any, List, Union
16
16
 
17
17
  import rebel
18
18
  import torch
@@ -170,13 +170,41 @@ class RBLNVQModel(RBLNModel):
170
170
  for compiled_model, device_val in zip(compiled_models, device_vals)
171
171
  ]
172
172
 
173
- def encode(self, x: torch.FloatTensor, return_dict: bool = True, **kwargs) -> torch.FloatTensor:
173
+ def encode(
174
+ self, x: torch.FloatTensor, return_dict: bool = True, **kwargs: Any
175
+ ) -> Union[torch.FloatTensor, VQEncoderOutput]:
176
+ """
177
+ Encode an input image into a quantized latent representation.
178
+
179
+ Args:
180
+ x: The input image to encode.
181
+ return_dict:
182
+ Whether to return output as a dictionary. Defaults to True.
183
+ kwargs: Additional arguments to pass to the encoder/quantizer.
184
+
185
+ Returns:
186
+ The quantized latent representation or a specific output object.
187
+ """
174
188
  posterior = self.encoder.encode(x)
175
189
  if not return_dict:
176
190
  return (posterior,)
177
191
  return VQEncoderOutput(latents=posterior)
178
192
 
179
- def decode(self, h: torch.FloatTensor, return_dict: bool = True, **kwargs) -> torch.FloatTensor:
193
+ def decode(
194
+ self, h: torch.FloatTensor, return_dict: bool = True, **kwargs: Any
195
+ ) -> Union[torch.FloatTensor, DecoderOutput]:
196
+ """
197
+ Decode a quantized latent representation back into an image.
198
+
199
+ Args:
200
+ h: The quantized latent representation to decode.
201
+ return_dict:
202
+ Whether to return output as a dictionary. Defaults to True.
203
+ kwargs: Additional arguments to pass to the decoder.
204
+
205
+ Returns:
206
+ The decoded image or a DecoderOutput object.
207
+ """
180
208
  dec, commit_loss = self.decoder.decode(h, **kwargs)
181
209
  if not return_dict:
182
210
  return (dec, commit_loss)
@@ -59,7 +59,7 @@ class RBLNPriorTransformer(RBLNModel):
59
59
  """
60
60
  RBLN implementation of PriorTransformer for diffusion models like Kandinsky V2.2.
61
61
 
62
- The Prior Transformer takes text and/or image embeddings from encoders (like CLIP) and
62
+ The PriorTransformer takes text and/or image embeddings from encoders (like CLIP) and
63
63
  maps them to a shared latent space that guides the diffusion process to generate the desired image.
64
64
 
65
65
  This class inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods
@@ -94,7 +94,15 @@ class CosmosTransformer3DModelWrapper(torch.nn.Module):
94
94
 
95
95
 
96
96
  class RBLNCosmosTransformer3DModel(RBLNModel):
97
- """RBLN wrapper for the Cosmos Transformer model."""
97
+ """
98
+ RBLN implementation of CosmosTransformer3DModel for diffusion models like Cosmos.
99
+
100
+ The CosmosTransformer3DModel takes text and/or image embeddings from encoders (like CLIP) and
101
+ maps them to a shared latent space that guides the diffusion process to generate the desired image.
102
+
103
+ This class inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods
104
+ the library implements for all its models.
105
+ """
98
106
 
99
107
  hf_library_name = "diffusers"
100
108
  auto_model_class = CosmosTransformer3DModel
@@ -59,7 +59,15 @@ class SD3Transformer2DModelWrapper(torch.nn.Module):
59
59
 
60
60
 
61
61
  class RBLNSD3Transformer2DModel(RBLNModel):
62
- """RBLN wrapper for the Stable Diffusion 3 MMDiT Transformer model."""
62
+ """
63
+ RBLN implementation of SD3Transformer2DModel for diffusion models like Stable Diffusion 3.
64
+
65
+ The SD3Transformer2DModel takes text and/or image embeddings from encoders (like CLIP) and
66
+ maps them to a shared latent space that guides the diffusion process to generate the desired image.
67
+
68
+ This class inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods
69
+ the library implements for all its models.
70
+ """
63
71
 
64
72
  hf_library_name = "diffusers"
65
73
  auto_model_class = SD3Transformer2DModel
@@ -141,10 +141,13 @@ class _UNet_Kandinsky(torch.nn.Module):
141
141
 
142
142
  class RBLNUNet2DConditionModel(RBLNModel):
143
143
  """
144
- Configuration class for RBLN UNet2DCondition models.
144
+ RBLN implementation of UNet2DConditionModel for diffusion models.
145
145
 
146
- This class inherits from RBLNModelConfig and provides specific configuration options
147
- for UNet2DCondition models used in diffusion-based image generation.
146
+ This model is used to accelerate UNet2DCondition models from diffusers library on RBLN NPUs.
147
+ It is a key component in diffusion-based image generation models like Stable Diffusion.
148
+
149
+ This class inherits from [`RBLNModel`]. Check the superclass documentation for the generic methods
150
+ the library implements for all its models.
148
151
  """
149
152
 
150
153
  hf_library_name = "diffusers"