PyPI - optimum-rbln - Versions diffs - 0.9.3.post1__py3-none-any.whl - Mend

optimum-rbln 0.9.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (264) hide show

optimum/rbln/diffusers/__init__.py ADDED Viewed

@@ -0,0 +1,198 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import TYPE_CHECKING
+from diffusers.pipelines.pipeline_utils import ALL_IMPORTABLE_CLASSES, LOADABLE_CLASSES
+from transformers.utils import _LazyModule
+LOADABLE_CLASSES["optimum.rbln"] = {
+    "RBLNBaseModel": ["save_pretrained", "from_pretrained"],
+    "RBLNCosmosSafetyChecker": ["save_pretrained", "from_pretrained"],
+}
+ALL_IMPORTABLE_CLASSES.update(LOADABLE_CLASSES["optimum.rbln"])
+_import_structure = {
+    "configurations": [
+        "RBLNAutoencoderKLConfig",
+        "RBLNAutoencoderKLCosmosConfig",
+        "RBLNControlNetModelConfig",
+        "RBLNCosmosTextToWorldPipelineConfig",
+        "RBLNCosmosVideoToWorldPipelineConfig",
+        "RBLNCosmosTransformer3DModelConfig",
+        "RBLNKandinskyV22CombinedPipelineConfig",
+        "RBLNKandinskyV22Img2ImgCombinedPipelineConfig",
+        "RBLNKandinskyV22Img2ImgPipelineConfig",
+        "RBLNKandinskyV22InpaintCombinedPipelineConfig",
+        "RBLNKandinskyV22InpaintPipelineConfig",
+        "RBLNKandinskyV22PipelineConfig",
+        "RBLNKandinskyV22PriorPipelineConfig",
+        "RBLNPriorTransformerConfig",
+        "RBLNStableDiffusionControlNetPipelineConfig",
+        "RBLNStableDiffusionControlNetImg2ImgPipelineConfig",
+        "RBLNStableDiffusionImg2ImgPipelineConfig",
+        "RBLNStableDiffusionInpaintPipelineConfig",
+        "RBLNStableDiffusionPipelineConfig",
+        "RBLNStableDiffusionXLControlNetPipelineConfig",
+        "RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig",
+        "RBLNStableDiffusionXLImg2ImgPipelineConfig",
+        "RBLNStableDiffusionXLInpaintPipelineConfig",
+        "RBLNStableDiffusionXLPipelineConfig",
+        "RBLNStableDiffusion3PipelineConfig",
+        "RBLNStableDiffusion3Img2ImgPipelineConfig",
+        "RBLNStableDiffusion3InpaintPipelineConfig",
+        "RBLNSD3Transformer2DModelConfig",
+        "RBLNUNet2DConditionModelConfig",
+        "RBLNVQModelConfig",
+        "RBLNUNetSpatioTemporalConditionModelConfig",
+        "RBLNStableVideoDiffusionPipelineConfig",
+        "RBLNAutoencoderKLTemporalDecoderConfig",
+    ],
+    "pipelines": [
+        "RBLNAutoPipelineForImage2Image",
+        "RBLNAutoPipelineForInpainting",
+        "RBLNAutoPipelineForText2Image",
+        "RBLNCosmosTextToWorldPipeline",
+        "RBLNCosmosVideoToWorldPipeline",
+        "RBLNCosmosSafetyChecker",
+        "RBLNKandinskyV22CombinedPipeline",
+        "RBLNKandinskyV22Img2ImgCombinedPipeline",
+        "RBLNKandinskyV22InpaintCombinedPipeline",
+        "RBLNKandinskyV22InpaintPipeline",
+        "RBLNKandinskyV22Img2ImgPipeline",
+        "RBLNKandinskyV22PriorPipeline",
+        "RBLNKandinskyV22Pipeline",
+        "RBLNStableDiffusionPipeline",
+        "RBLNStableDiffusionXLPipeline",
+        "RBLNStableDiffusionImg2ImgPipeline",
+        "RBLNStableDiffusionInpaintPipeline",
+        "RBLNStableDiffusionControlNetImg2ImgPipeline",
+        "RBLNMultiControlNetModel",
+        "RBLNStableDiffusionXLImg2ImgPipeline",
+        "RBLNStableDiffusionXLInpaintPipeline",
+        "RBLNStableDiffusionControlNetPipeline",
+        "RBLNStableDiffusionXLControlNetPipeline",
+        "RBLNStableDiffusionXLControlNetImg2ImgPipeline",
+        "RBLNStableDiffusion3Pipeline",
+        "RBLNStableDiffusion3Img2ImgPipeline",
+        "RBLNStableDiffusion3InpaintPipeline",
+        "RBLNStableVideoDiffusionPipeline",
+    ],
+    "models": [
+        "RBLNAutoencoderKL",
+        "RBLNAutoencoderKLCosmos",
+        "RBLNUNet2DConditionModel",
+        "RBLNUNetSpatioTemporalConditionModel",
+        "RBLNControlNetModel",
+        "RBLNCosmosTransformer3DModel",
+        "RBLNSD3Transformer2DModel",
+        "RBLNAutoencoderKLTemporalDecoder",
+        "RBLNPriorTransformer",
+        "RBLNVQModel",
+    ],
+    "modeling_diffusers": [
+        "RBLNDiffusionMixin",
+    ],
+}
+if TYPE_CHECKING:
+    from .configurations import (
+        RBLNAutoencoderKLConfig,
+        RBLNAutoencoderKLCosmosConfig,
+        RBLNAutoencoderKLTemporalDecoderConfig,
+        RBLNControlNetModelConfig,
+        RBLNCosmosTextToWorldPipelineConfig,
+        RBLNCosmosTransformer3DModelConfig,
+        RBLNCosmosVideoToWorldPipelineConfig,
+        RBLNKandinskyV22CombinedPipelineConfig,
+        RBLNKandinskyV22Img2ImgCombinedPipelineConfig,
+        RBLNKandinskyV22Img2ImgPipelineConfig,
+        RBLNKandinskyV22InpaintCombinedPipelineConfig,
+        RBLNKandinskyV22InpaintPipelineConfig,
+        RBLNKandinskyV22PipelineConfig,
+        RBLNKandinskyV22PriorPipelineConfig,
+        RBLNPriorTransformerConfig,
+        RBLNSD3Transformer2DModelConfig,
+        RBLNStableDiffusion3Img2ImgPipelineConfig,
+        RBLNStableDiffusion3InpaintPipelineConfig,
+        RBLNStableDiffusion3PipelineConfig,
+        RBLNStableDiffusionControlNetImg2ImgPipelineConfig,
+        RBLNStableDiffusionControlNetPipelineConfig,
+        RBLNStableDiffusionImg2ImgPipelineConfig,
+        RBLNStableDiffusionInpaintPipelineConfig,
+        RBLNStableDiffusionPipelineConfig,
+        RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig,
+        RBLNStableDiffusionXLControlNetPipelineConfig,
+        RBLNStableDiffusionXLImg2ImgPipelineConfig,
+        RBLNStableDiffusionXLInpaintPipelineConfig,
+        RBLNStableDiffusionXLPipelineConfig,
+        RBLNStableVideoDiffusionPipelineConfig,
+        RBLNUNet2DConditionModelConfig,
+        RBLNUNetSpatioTemporalConditionModelConfig,
+        RBLNVQModelConfig,
+    )
+    from .modeling_diffusers import RBLNDiffusionMixin
+    from .models import (
+        RBLNAutoencoderKL,
+        RBLNAutoencoderKLCosmos,
+        RBLNAutoencoderKLTemporalDecoder,
+        RBLNControlNetModel,
+        RBLNCosmosTransformer3DModel,
+        RBLNPriorTransformer,
+        RBLNSD3Transformer2DModel,
+        RBLNUNet2DConditionModel,
+        RBLNUNetSpatioTemporalConditionModel,
+        RBLNVQModel,
+    )
+    from .pipelines import (
+        RBLNAutoPipelineForImage2Image,
+        RBLNAutoPipelineForInpainting,
+        RBLNAutoPipelineForText2Image,
+        RBLNCosmosSafetyChecker,
+        RBLNCosmosTextToWorldPipeline,
+        RBLNCosmosVideoToWorldPipeline,
+        RBLNKandinskyV22CombinedPipeline,
+        RBLNKandinskyV22Img2ImgCombinedPipeline,
+        RBLNKandinskyV22Img2ImgPipeline,
+        RBLNKandinskyV22InpaintCombinedPipeline,
+        RBLNKandinskyV22InpaintPipeline,
+        RBLNKandinskyV22Pipeline,
+        RBLNKandinskyV22PriorPipeline,
+        RBLNMultiControlNetModel,
+        RBLNStableDiffusion3Img2ImgPipeline,
+        RBLNStableDiffusion3InpaintPipeline,
+        RBLNStableDiffusion3Pipeline,
+        RBLNStableDiffusionControlNetImg2ImgPipeline,
+        RBLNStableDiffusionControlNetPipeline,
+        RBLNStableDiffusionImg2ImgPipeline,
+        RBLNStableDiffusionInpaintPipeline,
+        RBLNStableDiffusionPipeline,
+        RBLNStableDiffusionXLControlNetImg2ImgPipeline,
+        RBLNStableDiffusionXLControlNetPipeline,
+        RBLNStableDiffusionXLImg2ImgPipeline,
+        RBLNStableDiffusionXLInpaintPipeline,
+        RBLNStableDiffusionXLPipeline,
+        RBLNStableVideoDiffusionPipeline,
+    )
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )

optimum/rbln/diffusers/configurations/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+from .models import (
+    RBLNAutoencoderKLConfig,
+    RBLNAutoencoderKLCosmosConfig,
+    RBLNAutoencoderKLTemporalDecoderConfig,
+    RBLNControlNetModelConfig,
+    RBLNCosmosTransformer3DModelConfig,
+    RBLNPriorTransformerConfig,
+    RBLNSD3Transformer2DModelConfig,
+    RBLNUNet2DConditionModelConfig,
+    RBLNUNetSpatioTemporalConditionModelConfig,
+    RBLNVQModelConfig,
+)
+from .pipelines import (
+    RBLNCosmosTextToWorldPipelineConfig,
+    RBLNCosmosVideoToWorldPipelineConfig,
+    RBLNKandinskyV22CombinedPipelineConfig,
+    RBLNKandinskyV22Img2ImgCombinedPipelineConfig,
+    RBLNKandinskyV22Img2ImgPipelineConfig,
+    RBLNKandinskyV22InpaintCombinedPipelineConfig,
+    RBLNKandinskyV22InpaintPipelineConfig,
+    RBLNKandinskyV22PipelineConfig,
+    RBLNKandinskyV22PriorPipelineConfig,
+    RBLNStableDiffusion3Img2ImgPipelineConfig,
+    RBLNStableDiffusion3InpaintPipelineConfig,
+    RBLNStableDiffusion3PipelineConfig,
+    RBLNStableDiffusionControlNetImg2ImgPipelineConfig,
+    RBLNStableDiffusionControlNetPipelineConfig,
+    RBLNStableDiffusionImg2ImgPipelineConfig,
+    RBLNStableDiffusionInpaintPipelineConfig,
+    RBLNStableDiffusionPipelineConfig,
+    RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig,
+    RBLNStableDiffusionXLControlNetPipelineConfig,
+    RBLNStableDiffusionXLImg2ImgPipelineConfig,
+    RBLNStableDiffusionXLInpaintPipelineConfig,
+    RBLNStableDiffusionXLPipelineConfig,
+    RBLNStableVideoDiffusionPipelineConfig,
+)

optimum/rbln/diffusers/configurations/models/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .configuration_autoencoder_kl import RBLNAutoencoderKLConfig
+from .configuration_autoencoder_kl_cosmos import RBLNAutoencoderKLCosmosConfig
+from .configuration_autoencoder_kl_temporal_decoder import RBLNAutoencoderKLTemporalDecoderConfig
+from .configuration_controlnet import RBLNControlNetModelConfig
+from .configuration_prior_transformer import RBLNPriorTransformerConfig
+from .configuration_transformer_cosmos import RBLNCosmosTransformer3DModelConfig
+from .configuration_transformer_sd3 import RBLNSD3Transformer2DModelConfig
+from .configuration_unet_2d_condition import RBLNUNet2DConditionModelConfig
+from .configuration_unet_spatio_temporal_condition import RBLNUNetSpatioTemporalConditionModelConfig
+from .configuration_vq_model import RBLNVQModelConfig

optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl.py ADDED Viewed

@@ -0,0 +1,73 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNAutoencoderKLConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN Variational Autoencoder (VAE) models.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for VAE models used in diffusion-based image generation.
+    """
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        uses_encoder: Optional[bool] = None,
+        vae_scale_factor: Optional[float] = None,  # TODO: rename to scaling_factor
+        in_channels: Optional[int] = None,
+        latent_channels: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width) of the input/output images.
+                If an integer is provided, it's used for both height and width.
+            uses_encoder (Optional[bool]): Whether to include the encoder part of the VAE in the model.
+                When False, only the decoder is used (for latent-to-image conversion).
+            vae_scale_factor (Optional[float]): The scaling factor between pixel space and latent space.
+                Determines how much smaller the latent representations are compared to the original images.
+            in_channels (Optional[int]): Number of input channels for the model.
+            latent_channels (Optional[int]): Number of channels in the latent space.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.uses_encoder = uses_encoder
+        self.vae_scale_factor = vae_scale_factor
+        self.in_channels = in_channels
+        self.latent_channels = latent_channels
+        self.sample_size = sample_size
+        if isinstance(sample_size, int):
+            self.sample_size = (sample_size, sample_size)
+    @property
+    def image_size(self):
+        return self.sample_size
+    @property
+    def latent_sample_size(self):
+        return (self.image_size[0] // self.vae_scale_factor, self.image_size[1] // self.vae_scale_factor)

optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_cosmos.py ADDED Viewed

@@ -0,0 +1,84 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, Optional
+from ....configuration_utils import RBLNModelConfig
+from ....utils.logging import get_logger
+logger = get_logger(__name__)
+class RBLNAutoencoderKLCosmosConfig(RBLNModelConfig):
+    """Configuration class for RBLN Cosmos Variational Autoencoder (VAE) models."""
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        uses_encoder: Optional[bool] = None,
+        num_frames: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_channels_latents: Optional[int] = None,
+        vae_scale_factor_temporal: Optional[int] = None,
+        vae_scale_factor_spatial: Optional[int] = None,
+        use_slicing: Optional[bool] = None,
+        **kwargs: Dict[str, Any],
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            uses_encoder (Optional[bool]): Whether to include the encoder part of the VAE in the model.
+                When False, only the decoder is used (for latent-to-video conversion).
+            num_frames (Optional[int]): The number of frames in the generated video. Defaults to 121.
+            height (Optional[int]): The height in pixels of the generated video. Defaults to 704.
+            width (Optional[int]): The width in pixels of the generated video. Defaults to 1280.
+            num_channels_latents (Optional[int]): The number of channels in latent space.
+            vae_scale_factor_temporal (Optional[int]): The scaling factor between time space and latent space.
+                Determines how much shorter the latent representations are compared to the original videos.
+            vae_scale_factor_spatial (Optional[int]): The scaling factor between pixel space and latent space.
+                Determines how much smaller the latent representations are compared to the original videos.
+            use_slicing (Optional[bool]): Enable sliced VAE encoding and decoding.
+                If True, the VAE will split the input tensor in slices to compute encoding or decoding in several steps.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        # Since the Cosmos VAE Decoder already requires approximately 7.9 GiB of memory,
+        # Optimum-rbln cannot execute this model on RBLN-CA12 when the batch size > 1.
+        # However, the Cosmos VAE Decoder propose batch slicing when the batch size is greater than 1,
+        # Optimum-rbln utilize this method by compiling with batch_size=1 to enable batch slicing.
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        elif self.batch_size > 1:
+            logger.warning("The batch size of Cosmos VAE Decoder will be explicitly 1 for memory efficiency.")
+            self.batch_size = 1
+        self.uses_encoder = uses_encoder
+        self.num_frames = num_frames or 121
+        self.height = height or 704
+        self.width = width or 1280
+        self.num_channels_latents = num_channels_latents
+        self.vae_scale_factor_temporal = vae_scale_factor_temporal
+        self.vae_scale_factor_spatial = vae_scale_factor_spatial
+        self.use_slicing = use_slicing or False
+    @property
+    def image_size(self):
+        return (self.height, self.width)

optimum/rbln/diffusers/configurations/models/configuration_autoencoder_kl_temporal_decoder.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNAutoencoderKLTemporalDecoderConfig(RBLNModelConfig):
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        uses_encoder: Optional[bool] = None,
+        num_frames: Optional[int] = None,
+        decode_chunk_size: Optional[int] = None,
+        vae_scale_factor: Optional[float] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width) of the input/output images.
+                If an integer is provided, it's used for both height and width.
+            uses_encoder (Optional[bool]): Whether to include the encoder part of the VAE in the model.
+                When False, only the decoder is used (for latent-to-image conversion).
+            num_frames (Optional[int]): The number of frames in the generated video.
+            decode_chunk_size (Optional[int]): The number of frames to decode at once during VAE decoding.
+                Useful for managing memory usage during video generation.
+            vae_scale_factor (Optional[float]): The scaling factor between pixel space and latent space.
+                Determines how much smaller the latent representations are compared to the original images.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.uses_encoder = uses_encoder
+        self.num_frames = num_frames
+        self.decode_chunk_size = decode_chunk_size
+        self.vae_scale_factor = vae_scale_factor
+        self.sample_size = sample_size
+        if isinstance(sample_size, int):
+            self.sample_size = (sample_size, sample_size)
+    @property
+    def image_size(self):
+        return self.sample_size
+    @property
+    def latent_sample_size(self):
+        return (self.image_size[0] // self.vae_scale_factor, self.image_size[1] // self.vae_scale_factor)

optimum/rbln/diffusers/configurations/models/configuration_controlnet.py ADDED Viewed

@@ -0,0 +1,64 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNControlNetModelConfig(RBLNModelConfig):
+    """Configuration class for RBLN ControlNet models."""
+    subclass_non_save_attributes = ["_batch_size_is_specified"]
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        max_seq_len: Optional[int] = None,
+        unet_sample_size: Optional[Tuple[int, int]] = None,
+        vae_sample_size: Optional[Tuple[int, int]] = None,
+        text_model_hidden_size: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            max_seq_len (Optional[int]): Maximum sequence length for text inputs when used
+                with cross-attention.
+            unet_sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width)
+                of the UNet output samples.
+            vae_sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width)
+                of the VAE input/output images.
+            text_model_hidden_size (Optional[int]): Hidden size of the text encoder model used
+                for conditioning.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self._batch_size_is_specified = batch_size is not None
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.max_seq_len = max_seq_len
+        self.unet_sample_size = unet_sample_size
+        self.vae_sample_size = vae_sample_size
+        self.text_model_hidden_size = text_model_hidden_size
+    @property
+    def batch_size_is_specified(self):
+        return self._batch_size_is_specified

optimum/rbln/diffusers/configurations/models/configuration_prior_transformer.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+from ....configuration_utils import RBLNModelConfig
+class RBLNPriorTransformerConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN Prior Transformer models.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for Transformer models used in diffusion models like Kandinsky V2.2.
+    """
+    subclass_non_save_attributes = ["_batch_size_is_specified"]
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        embedding_dim: Optional[int] = None,
+        num_embeddings: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            embedding_dim (Optional[int]): Dimension of the embedding vectors in the model.
+            num_embeddings (Optional[int]): Number of discrete embeddings in the codebook.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self._batch_size_is_specified = batch_size is not None
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.embedding_dim = embedding_dim
+        self.num_embeddings = num_embeddings
+    @property
+    def batch_size_is_specified(self):
+        return self._batch_size_is_specified

optimum/rbln/diffusers/configurations/models/configuration_transformer_cosmos.py ADDED Viewed

@@ -0,0 +1,78 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+from ....configuration_utils import RBLNModelConfig
+class RBLNCosmosTransformer3DModelConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN Cosmos Transformer models.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for Transformer models used in diffusion models like Cosmos.
+    """
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        num_frames: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        fps: Optional[int] = None,
+        max_seq_len: Optional[int] = None,
+        embedding_dim: Optional[int] = None,
+        num_channels_latents: Optional[int] = None,
+        num_latent_frames: Optional[int] = None,
+        latent_height: Optional[int] = None,
+        latent_width: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            num_frames (Optional[int]): The number of frames in the generated video. Defaults to 121.
+            height (Optional[int]): The height in pixels of the generated video. Defaults to 704.
+            width (Optional[int]): The width in pixels of the generated video. Defaults to 1280.
+            fps (Optional[int]): The frames per second of the generated video.  Defaults to 30.
+            max_seq_len (Optional[int]): Maximum sequence length of prompt embeds.
+            embedding_dim (Optional[int]): Embedding vector dimension of prompt embeds.
+            num_channels_latents (Optional[int]): The number of channels in latent space.
+            latent_height (Optional[int]): The height in pixels in latent space.
+            latent_width (Optional[int]): The width in pixels in latent space.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        if kwargs.get("timeout") is None:
+            kwargs["timeout"] = 80
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        self.num_frames = num_frames or 121
+        self.height = height or 704
+        self.width = width or 1280
+        self.fps = fps or 30
+        self.max_seq_len = max_seq_len
+        self.num_channels_latents = num_channels_latents
+        self.num_latent_frames = num_latent_frames
+        self.latent_height = latent_height
+        self.latent_width = latent_width
+        self.embedding_dim = embedding_dim
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")