PyPI - optimum-rbln - Versions diffs - 0.9.3.post1__py3-none-any.whl - Mend

optimum-rbln 0.9.3.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of optimum-rbln might be problematic. Click here for more details.

Files changed (264) hide show

optimum/rbln/diffusers/configurations/models/configuration_transformer_sd3.py ADDED Viewed

@@ -0,0 +1,63 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple, Union
+from ....configuration_utils import RBLNModelConfig
+class RBLNSD3Transformer2DModelConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN Stable Diffusion 3 Transformer models.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for Transformer models used in diffusion models like Stable Diffusion 3.
+    """
+    subclass_non_save_attributes = ["_batch_size_is_specified"]
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Union[int, Tuple[int, int]]] = None,
+        prompt_embed_length: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Union[int, Tuple[int, int]]]): The spatial dimensions (height, width)
+                of the generated samples. If an integer is provided, it's used for both height and width.
+            prompt_embed_length (Optional[int]): The length of the embedded prompt vectors that
+                will be used to condition the transformer model.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self._batch_size_is_specified = batch_size is not None
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.prompt_embed_length = prompt_embed_length
+        self.sample_size = sample_size
+        if isinstance(self.sample_size, int):
+            self.sample_size = (self.sample_size, self.sample_size)
+    @property
+    def batch_size_is_specified(self):
+        return self._batch_size_is_specified

optimum/rbln/diffusers/configurations/models/configuration_unet_2d_condition.py ADDED Viewed

@@ -0,0 +1,81 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNUNet2DConditionModelConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN UNet2DCondition models.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for UNet2DCondition models used in diffusion-based image generation.
+    """
+    subclass_non_save_attributes = ["_batch_size_is_specified"]
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        in_channels: Optional[int] = None,
+        cross_attention_dim: Optional[int] = None,
+        use_additional_residuals: Optional[bool] = None,
+        max_seq_len: Optional[int] = None,
+        in_features: Optional[int] = None,
+        text_model_hidden_size: Optional[int] = None,
+        image_model_hidden_size: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width) of the generated samples.
+                If an integer is provided, it's used for both height and width.
+            in_channels (Optional[int]): Number of input channels for the UNet.
+            cross_attention_dim (Optional[int]): Dimension of the cross-attention features.
+            use_additional_residuals (Optional[bool]): Whether to use additional residual connections in the model.
+            max_seq_len (Optional[int]): Maximum sequence length for text inputs when used with cross-attention.
+            in_features (Optional[int]): Number of input features for the model.
+            text_model_hidden_size (Optional[int]): Hidden size of the text encoder model.
+            image_model_hidden_size (Optional[int]): Hidden size of the image encoder model.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self._batch_size_is_specified = batch_size is not None
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.in_channels = in_channels
+        self.cross_attention_dim = cross_attention_dim
+        self.use_additional_residuals = use_additional_residuals
+        self.max_seq_len = max_seq_len
+        self.in_features = in_features
+        self.text_model_hidden_size = text_model_hidden_size
+        self.image_model_hidden_size = image_model_hidden_size
+        self.sample_size = sample_size
+        if isinstance(sample_size, int):
+            self.sample_size = (sample_size, sample_size)
+    @property
+    def batch_size_is_specified(self):
+        return self._batch_size_is_specified

optimum/rbln/diffusers/configurations/models/configuration_unet_spatio_temporal_condition.py ADDED Viewed

@@ -0,0 +1,59 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNUNetSpatioTemporalConditionModelConfig(RBLNModelConfig):
+    subclass_non_save_attributes = ["_batch_size_is_specified"]
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        in_features: Optional[int] = None,
+        num_frames: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width) of the generated samples.
+                If an integer is provided, it's used for both height and width.
+            in_features (Optional[int]): Number of input features for the model.
+            num_frames (Optional[int]): The number of frames in the generated video.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self._batch_size_is_specified = batch_size is not None
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.in_features = in_features
+        self.num_frames = num_frames
+        self.sample_size = sample_size
+        if isinstance(sample_size, int):
+            self.sample_size = (sample_size, sample_size)
+    @property
+    def batch_size_is_specified(self):
+        return self._batch_size_is_specified

optimum/rbln/diffusers/configurations/models/configuration_vq_model.py ADDED Viewed

@@ -0,0 +1,74 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+class RBLNVQModelConfig(RBLNModelConfig):
+    """
+    Configuration class for RBLN VQModel models, used in Kandinsky.
+    This class inherits from RBLNModelConfig and provides specific configuration options
+    for VQModel, which acts similarly to a VAE but uses vector quantization.
+    """
+    def __init__(
+        self,
+        batch_size: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        uses_encoder: Optional[bool] = None,
+        vqmodel_scale_factor: Optional[float] = None,  # TODO: rename to scaling_factor
+        in_channels: Optional[int] = None,
+        latent_channels: Optional[int] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            batch_size (Optional[int]): The batch size for inference. Defaults to 1.
+            sample_size (Optional[Tuple[int, int]]): The spatial dimensions (height, width) of the input/output images.
+                If an integer is provided, it's used for both height and width.
+            uses_encoder (Optional[bool]): Whether to include the encoder part of the VAE in the model.
+                When False, only the decoder is used (for latent-to-image conversion).
+            vqmodel_scale_factor (Optional[float]): The scaling factor between pixel space and latent space.
+                Determines the downsampling ratio between original images and latent representations.
+            in_channels (Optional[int]): Number of input channels for the model.
+            latent_channels (Optional[int]): Number of channels in the latent space.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If batch_size is not a positive integer.
+        """
+        super().__init__(**kwargs)
+        self.batch_size = batch_size or 1
+        if not isinstance(self.batch_size, int) or self.batch_size < 0:
+            raise ValueError(f"batch_size must be a positive integer, got {self.batch_size}")
+        self.uses_encoder = uses_encoder
+        self.sample_size = sample_size
+        if isinstance(self.sample_size, int):
+            self.sample_size = (self.sample_size, self.sample_size)
+        self.vqmodel_scale_factor = vqmodel_scale_factor
+        self.in_channels = in_channels
+        self.latent_channels = latent_channels
+    @property
+    def image_size(self):
+        return self.sample_size
+    @property
+    def latent_sample_size(self):
+        return (self.image_size[0] // self.vqmodel_scale_factor, self.image_size[1] // self.vqmodel_scale_factor)

optimum/rbln/diffusers/configurations/pipelines/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+from .configuration_controlnet import (
+    RBLNStableDiffusionControlNetImg2ImgPipelineConfig,
+    RBLNStableDiffusionControlNetPipelineConfig,
+    RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig,
+    RBLNStableDiffusionXLControlNetPipelineConfig,
+)
+from .configuration_cosmos import RBLNCosmosTextToWorldPipelineConfig, RBLNCosmosVideoToWorldPipelineConfig
+from .configuration_kandinsky2_2 import (
+    RBLNKandinskyV22CombinedPipelineConfig,
+    RBLNKandinskyV22Img2ImgCombinedPipelineConfig,
+    RBLNKandinskyV22Img2ImgPipelineConfig,
+    RBLNKandinskyV22InpaintCombinedPipelineConfig,
+    RBLNKandinskyV22InpaintPipelineConfig,
+    RBLNKandinskyV22PipelineConfig,
+    RBLNKandinskyV22PriorPipelineConfig,
+)
+from .configuration_stable_diffusion import (
+    RBLNStableDiffusionImg2ImgPipelineConfig,
+    RBLNStableDiffusionInpaintPipelineConfig,
+    RBLNStableDiffusionPipelineConfig,
+)
+from .configuration_stable_diffusion_3 import (
+    RBLNStableDiffusion3Img2ImgPipelineConfig,
+    RBLNStableDiffusion3InpaintPipelineConfig,
+    RBLNStableDiffusion3PipelineConfig,
+)
+from .configuration_stable_diffusion_xl import (
+    RBLNStableDiffusionXLImg2ImgPipelineConfig,
+    RBLNStableDiffusionXLInpaintPipelineConfig,
+    RBLNStableDiffusionXLPipelineConfig,
+)
+from .configuration_stable_video_diffusion import (
+    RBLNStableVideoDiffusionPipelineConfig,
+)

optimum/rbln/diffusers/configurations/pipelines/configuration_controlnet.py ADDED Viewed

@@ -0,0 +1,316 @@
+# Copyright 2025 Rebellions Inc. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Tuple
+from ....configuration_utils import RBLNModelConfig
+from ....transformers import RBLNCLIPTextModelConfig, RBLNCLIPTextModelWithProjectionConfig
+from ..models import RBLNAutoencoderKLConfig, RBLNControlNetModelConfig, RBLNUNet2DConditionModelConfig
+class RBLNStableDiffusionControlNetPipelineBaseConfig(RBLNModelConfig):
+    submodules = ["text_encoder", "unet", "vae", "controlnet"]
+    _vae_uses_encoder = False
+    def __init__(
+        self,
+        text_encoder: Optional[RBLNCLIPTextModelConfig] = None,
+        unet: Optional[RBLNUNet2DConditionModelConfig] = None,
+        vae: Optional[RBLNAutoencoderKLConfig] = None,
+        controlnet: Optional[RBLNControlNetModelConfig] = None,
+        *,
+        batch_size: Optional[int] = None,
+        img_height: Optional[int] = None,
+        img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        image_size: Optional[Tuple[int, int]] = None,
+        guidance_scale: Optional[float] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            text_encoder (Optional[RBLNCLIPTextModelConfig]): Configuration for the text encoder component.
+                Initialized as RBLNCLIPTextModelConfig if not provided.
+            unet (Optional[RBLNUNet2DConditionModelConfig]): Configuration for the UNet model component.
+                Initialized as RBLNUNet2DConditionModelConfig if not provided.
+            vae (Optional[RBLNAutoencoderKLConfig]): Configuration for the VAE model component.
+                Initialized as RBLNAutoencoderKLConfig if not provided.
+            controlnet (Optional[RBLNControlNetModelConfig]): Configuration for the ControlNet model component.
+                Initialized as RBLNControlNetModelConfig if not provided.
+            batch_size (Optional[int]): Batch size for inference, applied to all submodules.
+            img_height (Optional[int]): Height of the generated images.
+            img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
+            sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
+            image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
+                Cannot be used together with img_height/img_width.
+            guidance_scale (Optional[float]): Scale for classifier-free guidance.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If both image_size and img_height/img_width are provided.
+        Note:
+            When guidance_scale > 1.0, the UNet batch size is automatically doubled to
+            accommodate classifier-free guidance.
+        """
+        super().__init__(**kwargs)
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
+            image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
+        self.text_encoder = self.initialize_submodule_config(
+            text_encoder,
+            cls_name="RBLNCLIPTextModelConfig",
+            batch_size=batch_size,
+        )
+        self.unet = self.initialize_submodule_config(
+            unet,
+            cls_name="RBLNUNet2DConditionModelConfig",
+            sample_size=sample_size,
+        )
+        self.vae = self.initialize_submodule_config(
+            vae,
+            cls_name="RBLNAutoencoderKLConfig",
+            batch_size=batch_size,
+            uses_encoder=self.__class__._vae_uses_encoder,
+            sample_size=image_size,  # image size is equal to sample size in vae
+        )
+        self.controlnet = self.initialize_submodule_config(
+            controlnet,
+            cls_name="RBLNControlNetModelConfig",
+        )
+        # Get default guidance scale from original class to set UNet and ControlNet batch size
+        if guidance_scale is None:
+            guidance_scale = self.get_default_values_for_original_cls("__call__", ["guidance_scale"])["guidance_scale"]
+        if guidance_scale is not None:
+            do_classifier_free_guidance = guidance_scale > 1.0
+            if do_classifier_free_guidance:
+                if not self.unet.batch_size_is_specified:
+                    self.unet.batch_size = self.text_encoder.batch_size * 2
+                if not self.controlnet.batch_size_is_specified:
+                    self.controlnet.batch_size = self.text_encoder.batch_size * 2
+            else:
+                if not self.unet.batch_size_is_specified:
+                    self.unet.batch_size = self.text_encoder.batch_size
+                if not self.controlnet.batch_size_is_specified:
+                    self.controlnet.batch_size = self.text_encoder.batch_size
+    @property
+    def batch_size(self):
+        return self.vae.batch_size
+    @property
+    def sample_size(self):
+        return self.unet.sample_size
+    @property
+    def image_size(self):
+        return self.vae.sample_size
+class RBLNStableDiffusionControlNetPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
+    """
+    Configuration for Stable Diffusion ControlNet pipeline.
+    """
+    _vae_uses_encoder = False
+class RBLNStableDiffusionControlNetImg2ImgPipelineConfig(RBLNStableDiffusionControlNetPipelineBaseConfig):
+    """
+    Configuration for Stable Diffusion ControlNet image-to-image pipeline.
+    """
+    _vae_uses_encoder = True
+class RBLNStableDiffusionXLControlNetPipelineBaseConfig(RBLNModelConfig):
+    """
+    Base configuration for Stable Diffusion XL ControlNet pipelines.
+    """
+    submodules = ["text_encoder", "text_encoder_2", "unet", "vae", "controlnet"]
+    _vae_uses_encoder = False
+    def __init__(
+        self,
+        text_encoder: Optional[RBLNCLIPTextModelConfig] = None,
+        text_encoder_2: Optional[RBLNCLIPTextModelWithProjectionConfig] = None,
+        unet: Optional[RBLNUNet2DConditionModelConfig] = None,
+        vae: Optional[RBLNAutoencoderKLConfig] = None,
+        controlnet: Optional[RBLNControlNetModelConfig] = None,
+        *,
+        batch_size: Optional[int] = None,
+        img_height: Optional[int] = None,
+        img_width: Optional[int] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        sample_size: Optional[Tuple[int, int]] = None,
+        image_size: Optional[Tuple[int, int]] = None,
+        guidance_scale: Optional[float] = None,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            text_encoder (Optional[RBLNCLIPTextModelConfig]): Configuration for the primary text encoder.
+                Initialized as RBLNCLIPTextModelConfig if not provided.
+            text_encoder_2 (Optional[RBLNCLIPTextModelWithProjectionConfig]): Configuration for the secondary text encoder.
+                Initialized as RBLNCLIPTextModelWithProjectionConfig if not provided.
+            unet (Optional[RBLNUNet2DConditionModelConfig]): Configuration for the UNet model component.
+                Initialized as RBLNUNet2DConditionModelConfig if not provided.
+            vae (Optional[RBLNAutoencoderKLConfig]): Configuration for the VAE model component.
+                Initialized as RBLNAutoencoderKLConfig if not provided.
+            controlnet (Optional[RBLNControlNetModelConfig]): Configuration for the ControlNet model component.
+                Initialized as RBLNControlNetModelConfig if not provided.
+            batch_size (Optional[int]): Batch size for inference, applied to all submodules.
+            img_height (Optional[int]): Height of the generated images.
+            img_width (Optional[int]): Width of the generated images.
+            height (Optional[int]): Height of the generated images.
+            width (Optional[int]): Width of the generated images.
+            sample_size (Optional[Tuple[int, int]]): Spatial dimensions for the UNet model.
+            image_size (Optional[Tuple[int, int]]): Alternative way to specify image dimensions.
+                Cannot be used together with img_height/img_width.
+            guidance_scale (Optional[float]): Scale for classifier-free guidance.
+            kwargs: Additional arguments passed to the parent RBLNModelConfig.
+        Raises:
+            ValueError: If both image_size and img_height/img_width are provided.
+        Note:
+            When guidance_scale > 1.0, the UNet batch size is automatically doubled to
+            accommodate classifier-free guidance.
+        """
+        super().__init__(**kwargs)
+        # Initial check for image_size conflict remains as is
+        if image_size is not None and (
+            img_height is not None or img_width is not None or height is not None or width is not None
+        ):
+            raise ValueError("image_size cannot be provided alongside img_height/img_width or height/width")
+        # Prioritize height/width (HF-aligned)
+        if height is not None and width is not None:
+            if img_height is not None or img_width is not None:
+                # Raise error if both sets of arguments are provided
+                raise ValueError(
+                    "Cannot provide both 'height'/'width' and 'img_height'/'img_width' simultaneously. "
+                    "Please use one set of arguments for image dimensions, preferring 'height'/'width'."
+                )
+            image_size = (height, width)
+        elif (height is not None and width is None) or (height is None and width is not None):
+            raise ValueError("Both height and width must be provided together if used")
+        # Fallback to img_height/img_width for backward compatibility
+        elif img_height is not None and img_width is not None:
+            image_size = (img_height, img_width)
+        elif (img_height is not None and img_width is None) or (img_height is None and img_width is not None):
+            raise ValueError("Both img_height and img_width must be provided together if used")
+        self.text_encoder = self.initialize_submodule_config(
+            text_encoder,
+            cls_name="RBLNCLIPTextModelConfig",
+            batch_size=batch_size,
+        )
+        self.text_encoder_2 = self.initialize_submodule_config(
+            text_encoder_2,
+            cls_name="RBLNCLIPTextModelWithProjectionConfig",
+            batch_size=batch_size,
+        )
+        self.unet = self.initialize_submodule_config(
+            unet,
+            cls_name="RBLNUNet2DConditionModelConfig",
+            sample_size=sample_size,
+        )
+        self.vae = self.initialize_submodule_config(
+            vae,
+            cls_name="RBLNAutoencoderKLConfig",
+            batch_size=batch_size,
+            uses_encoder=self.__class__._vae_uses_encoder,
+            sample_size=image_size,  # image size is equal to sample size in vae
+        )
+        self.controlnet = self.initialize_submodule_config(
+            controlnet,
+            cls_name="RBLNControlNetModelConfig",
+        )
+        # Get default guidance scale from original class to set UNet and ControlNet batch size
+        guidance_scale = (
+            guidance_scale
+            or self.get_default_values_for_original_cls("__call__", ["guidance_scale"])["guidance_scale"]
+        )
+        do_classifier_free_guidance = guidance_scale > 1.0
+        if do_classifier_free_guidance:
+            if not self.unet.batch_size_is_specified:
+                self.unet.batch_size = self.text_encoder.batch_size * 2
+            if not self.controlnet.batch_size_is_specified:
+                self.controlnet.batch_size = self.text_encoder.batch_size * 2
+        else:
+            if not self.unet.batch_size_is_specified:
+                self.unet.batch_size = self.text_encoder.batch_size
+            if not self.controlnet.batch_size_is_specified:
+                self.controlnet.batch_size = self.text_encoder.batch_size
+    @property
+    def batch_size(self):
+        return self.vae.batch_size
+    @property
+    def sample_size(self):
+        return self.unet.sample_size
+    @property
+    def image_size(self):
+        return self.vae.sample_size
+class RBLNStableDiffusionXLControlNetPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
+    """
+    Configuration for Stable Diffusion XL ControlNet pipeline.
+    """
+    _vae_uses_encoder = False
+class RBLNStableDiffusionXLControlNetImg2ImgPipelineConfig(RBLNStableDiffusionXLControlNetPipelineBaseConfig):
+    """
+    Configuration for Stable Diffusion XL ControlNet image-to-image pipeline.
+    """
+    _vae_uses_encoder = True