PyPI - optimum-rbln - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.15__py3-none-any.whl - Mend

optimum-rbln 0.1.12py3-none-any.whl → 0.1.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

optimum/rbln/__init__.py CHANGED Viewed

@@ -38,9 +38,9 @@ _import_structure = {
         "RBLNXLMRobertaForSequenceClassification",
         "RBLNRobertaForSequenceClassification",
         "RBLNRobertaForMaskedLM",
-        "RBLNViTForImageClassification"
+        "RBLNViTForImageClassification",
     ],
-    "modeling_base": [
+    "modeling": [
         "RBLNBaseModel",
         "RBLNModel",
         "RBLNModelForQuestionAnswering",
@@ -50,7 +50,6 @@ _import_structure = {
         "RBLNModelForMaskedLM",
     ],
     "transformers": [
-        "BatchTextIteratorStreamer",
         "RBLNAutoModel",
         "RBLNAutoModelForAudioClassification",
         "RBLNAutoModelForCausalLM",
@@ -76,6 +75,7 @@ _import_structure = {
         "RBLNQwen2ForCausalLM",
         "RBLNWav2Vec2ForCTC",
         "RBLNLlamaForCausalLM",
+        "RBLNT5EncoderModel",
         "RBLNT5ForConditionalGeneration",
         "RBLNPhiForCausalLM",
         "RBLNLlavaNextForConditionalGeneration",
@@ -91,14 +91,21 @@ _import_structure = {
         "RBLNUNet2DConditionModel",
         "RBLNControlNetModel",
         "RBLNStableDiffusionImg2ImgPipeline",
+        "RBLNStableDiffusionInpaintPipeline",
         "RBLNStableDiffusionControlNetImg2ImgPipeline",
         "RBLNMultiControlNetModel",
         "RBLNStableDiffusionXLImg2ImgPipeline",
+        "RBLNStableDiffusionXLInpaintPipeline",
         "RBLNStableDiffusionControlNetPipeline",
         "RBLNStableDiffusionXLControlNetPipeline",
         "RBLNStableDiffusionXLControlNetImg2ImgPipeline",
+        "RBLNSD3Transformer2DModel",
+        "RBLNStableDiffusion3Img2ImgPipeline",
+        "RBLNStableDiffusion3InpaintPipeline",
+        "RBLNStableDiffusion3Pipeline",
     ],
     "modeling_config": ["RBLNCompileConfig", "RBLNConfig"],
+    "modeling_diffusers": ["RBLNDiffusionMixin"],
 }
 if TYPE_CHECKING:
@@ -106,16 +113,31 @@ if TYPE_CHECKING:
         RBLNAutoencoderKL,
         RBLNControlNetModel,
         RBLNMultiControlNetModel,
+        RBLNSD3Transformer2DModel,
+        RBLNStableDiffusion3Img2ImgPipeline,
+        RBLNStableDiffusion3InpaintPipeline,
+        RBLNStableDiffusion3Pipeline,
         RBLNStableDiffusionControlNetImg2ImgPipeline,
         RBLNStableDiffusionControlNetPipeline,
         RBLNStableDiffusionImg2ImgPipeline,
+        RBLNStableDiffusionInpaintPipeline,
         RBLNStableDiffusionPipeline,
         RBLNStableDiffusionXLControlNetImg2ImgPipeline,
         RBLNStableDiffusionXLControlNetPipeline,
         RBLNStableDiffusionXLImg2ImgPipeline,
+        RBLNStableDiffusionXLInpaintPipeline,
         RBLNStableDiffusionXLPipeline,
         RBLNUNet2DConditionModel,
     )
+    from .modeling import (
+        RBLNBaseModel,
+        RBLNModel,
+        RBLNModelForAudioClassification,
+        RBLNModelForImageClassification,
+        RBLNModelForMaskedLM,
+        RBLNModelForQuestionAnswering,
+        RBLNModelForSequenceClassification,
+    )
     from .modeling_alias import (
         RBLNASTForAudioClassification,
         RBLNBertForQuestionAnswering,
@@ -126,18 +148,9 @@ if TYPE_CHECKING:
         RBLNViTForImageClassification,
         RBLNXLMRobertaForSequenceClassification,
     )
-    from .modeling_base import (
-        RBLNBaseModel,
-        RBLNModel,
-        RBLNModelForAudioClassification,
-        RBLNModelForImageClassification,
-        RBLNModelForMaskedLM,
-        RBLNModelForQuestionAnswering,
-        RBLNModelForSequenceClassification,
-    )
     from .modeling_config import RBLNCompileConfig, RBLNConfig
+    from .modeling_diffusers import RBLNDiffusionMixin
     from .transformers import (
-        BatchTextIteratorStreamer,
         RBLNAutoModel,
         RBLNAutoModelForAudioClassification,
         RBLNAutoModelForCausalLM,
@@ -166,6 +179,7 @@ if TYPE_CHECKING:
         RBLNMistralForCausalLM,
         RBLNPhiForCausalLM,
         RBLNQwen2ForCausalLM,
+        RBLNT5EncoderModel,
         RBLNT5ForConditionalGeneration,
         RBLNWav2Vec2ForCTC,
         RBLNWhisperForConditionalGeneration,

optimum/rbln/__version__.py CHANGED Viewed

@@ -1 +1,16 @@
-__version__ = '0.1.12'
+# file generated by setuptools_scm
+# don't change, don't track in version control
+TYPE_CHECKING = False
+if TYPE_CHECKING:
+    from typing import Tuple, Union
+    VERSION_TUPLE = Tuple[Union[int, str], ...]
+else:
+    VERSION_TUPLE = object
+version: str
+__version__: str
+__version_tuple__: VERSION_TUPLE
+version_tuple: VERSION_TUPLE
+__version__ = version = '0.1.15'
+__version_tuple__ = version_tuple = (0, 1, 15)

optimum/rbln/diffusers/__init__.py CHANGED Viewed

@@ -36,27 +36,47 @@ _import_structure = {
         "RBLNStableDiffusionPipeline",
         "RBLNStableDiffusionXLPipeline",
         "RBLNStableDiffusionImg2ImgPipeline",
+        "RBLNStableDiffusionInpaintPipeline",
         "RBLNStableDiffusionControlNetImg2ImgPipeline",
         "RBLNMultiControlNetModel",
         "RBLNStableDiffusionXLImg2ImgPipeline",
+        "RBLNStableDiffusionXLInpaintPipeline",
         "RBLNStableDiffusionControlNetPipeline",
         "RBLNStableDiffusionXLControlNetPipeline",
         "RBLNStableDiffusionXLControlNetImg2ImgPipeline",
+        "RBLNStableDiffusion3Pipeline",
+        "RBLNStableDiffusion3Img2ImgPipeline",
+        "RBLNStableDiffusion3InpaintPipeline",
+    ],
+    "models": [
+        "RBLNAutoencoderKL",
+        "RBLNUNet2DConditionModel",
+        "RBLNControlNetModel",
+        "RBLNSD3Transformer2DModel",
     ],
-    "models": ["RBLNAutoencoderKL", "RBLNUNet2DConditionModel", "RBLNControlNetModel"],
 }
 if TYPE_CHECKING:
-    from .models import RBLNAutoencoderKL, RBLNControlNetModel, RBLNUNet2DConditionModel
+    from .models import (
+        RBLNAutoencoderKL,
+        RBLNControlNetModel,
+        RBLNSD3Transformer2DModel,
+        RBLNUNet2DConditionModel,
+    )
     from .pipelines import (
         RBLNMultiControlNetModel,
+        RBLNStableDiffusion3Img2ImgPipeline,
+        RBLNStableDiffusion3InpaintPipeline,
+        RBLNStableDiffusion3Pipeline,
         RBLNStableDiffusionControlNetImg2ImgPipeline,
         RBLNStableDiffusionControlNetPipeline,
         RBLNStableDiffusionImg2ImgPipeline,
+        RBLNStableDiffusionInpaintPipeline,
         RBLNStableDiffusionPipeline,
         RBLNStableDiffusionXLControlNetImg2ImgPipeline,
         RBLNStableDiffusionXLControlNetPipeline,
         RBLNStableDiffusionXLImg2ImgPipeline,
+        RBLNStableDiffusionXLInpaintPipeline,
         RBLNStableDiffusionXLPipeline,
     )
 else:

optimum/rbln/diffusers/models/__init__.py CHANGED Viewed

@@ -20,7 +20,38 @@
 # are the intellectual property of Rebellions Inc. and may not be
 # copied, modified, or distributed without prior written permission
 # from Rebellions Inc.
+from typing import TYPE_CHECKING
-from .autoencoder_kl import RBLNAutoencoderKL
-from .controlnet import RBLNControlNetModel
-from .unet_2d_condition import RBLNUNet2DConditionModel
+from transformers.utils import _LazyModule
+_import_structure = {
+    "autoencoders": [
+        "RBLNAutoencoderKL",
+    ],
+    "unets": [
+        "RBLNUNet2DConditionModel",
+    ],
+    "controlnet": ["RBLNControlNetModel"],
+    "transformers": ["RBLNSD3Transformer2DModel"],
+}
+if TYPE_CHECKING:
+    from .autoencoders import (
+        RBLNAutoencoderKL,
+    )
+    from .controlnet import RBLNControlNetModel
+    from .transformers import (
+        RBLNSD3Transformer2DModel,
+    )
+    from .unets import (
+        RBLNUNet2DConditionModel,
+    )
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )

optimum/rbln/{transformers/generation → diffusers/models/autoencoders}/__init__.py RENAMED Viewed

@@ -21,5 +21,4 @@
 # copied, modified, or distributed without prior written permission
 # from Rebellions Inc.
-from .streamers import BatchTextIteratorStreamer
-from .utils import RBLNGenerationMixin
+from .autoencoder_kl import RBLNAutoencoderKL

optimum/rbln/diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} RENAMED Viewed

@@ -22,20 +22,18 @@
 # from Rebellions Inc.
 import logging
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Union
 import rebel
 import torch  # noqa: I001
 from diffusers import AutoencoderKL
-from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
 from diffusers.models.modeling_outputs import AutoencoderKLOutput
-from optimum.exporters import TasksManager
-from transformers import AutoConfig, AutoModel, PretrainedConfig
+from transformers import PretrainedConfig
-from ...modeling_base import RBLNModel
-from ...modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig
-from ...utils.runtime_utils import RBLNPytorchRuntime
+from ....modeling import RBLNModel
+from ....modeling_config import DEFAULT_COMPILED_MODEL_NAME, RBLNCompileConfig, RBLNConfig
+from ....modeling_diffusers import RBLNDiffusionMixin
+from .vae import RBLNRuntimeVAEDecoder, RBLNRuntimeVAEEncoder, _VAEDecoder, _VAEEncoder
 if TYPE_CHECKING:
@@ -45,31 +43,22 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
-class RBLNRuntimeVAEEncoder(RBLNPytorchRuntime):
-    def encode(self, x: torch.FloatTensor, **kwargs) -> torch.FloatTensor:
-        moments = self.forward(x.contiguous())
-        posterior = DiagonalGaussianDistribution(moments)
-        return AutoencoderKLOutput(latent_dist=posterior)
-class RBLNRuntimeVAEDecoder(RBLNPytorchRuntime):
-    def decode(self, z: torch.FloatTensor, **kwargs) -> torch.FloatTensor:
-        return (self.forward(z),)
 class RBLNAutoencoderKL(RBLNModel):
+    auto_model_class = AutoencoderKL
     config_name = "config.json"
+    hf_library_name = "diffusers"
     def __post_init__(self, **kwargs):
         super().__post_init__(**kwargs)
-        self.rbln_use_encode = self.rbln_config.model_cfg["use_encode"]
-        if self.rbln_use_encode:
+        if self.rbln_config.model_cfg.get("img2img_pipeline") or self.rbln_config.model_cfg.get("inpaint_pipeline"):
             self.encoder = RBLNRuntimeVAEEncoder(runtime=self.model[0], main_input_name="x")
             self.decoder = RBLNRuntimeVAEDecoder(runtime=self.model[1], main_input_name="z")
         else:
             self.decoder = RBLNRuntimeVAEDecoder(runtime=self.model[0], main_input_name="z")
+        self.image_size = self.rbln_config.model_cfg["sample_size"]
     @classmethod
     def get_compiled_model(cls, model, rbln_config: RBLNConfig):
         def compile_img2img():
@@ -91,39 +80,40 @@ class RBLNAutoencoderKL(RBLNModel):
             return dec_compiled_model
-        if rbln_config.model_cfg.get("use_encode", False):
+        if rbln_config.model_cfg.get("img2img_pipeline") or rbln_config.model_cfg.get("inpaint_pipeline"):
             return compile_img2img()
         else:
             return compile_text2img()
     @classmethod
-    def from_pretrained(cls, *args, **kwargs):
-        def get_model_from_task(
-            task: str,
-            model_name_or_path: Union[str, Path],
-            **kwargs,
-        ):
-            return AutoencoderKL.from_pretrained(pretrained_model_name_or_path=model_name_or_path, **kwargs)
-        tasktmp = TasksManager.get_model_from_task
-        configtmp = AutoConfig.from_pretrained
-        modeltmp = AutoModel.from_pretrained
-        TasksManager.get_model_from_task = get_model_from_task
-        if kwargs.get("export", None):
-            # This is an ad-hoc to workaround save null values of the config.
-            # if export, pure optimum(not optimum-rbln) loads config using AutoConfig
-            # and diffusers model do not support loading by AutoConfig.
-            AutoConfig.from_pretrained = lambda *args, **kwargs: None
+    def get_vae_sample_size(cls, pipe: RBLNDiffusionMixin, rbln_config: Dict[str, Any]) -> Union[int, Tuple[int, int]]:
+        image_size = (rbln_config.get("img_height"), rbln_config.get("img_width"))
+        if (image_size[0] is None) != (image_size[1] is None):
+            raise ValueError("Both image height and image width must be given or not given")
+        elif image_size[0] is None and image_size[1] is None:
+            if rbln_config["img2img_pipeline"]:
+                sample_size = pipe.vae.config.sample_size
+            elif rbln_config["inpaint_pipeline"]:
+                sample_size = pipe.unet.config.sample_size * pipe.vae_scale_factor
+            else:
+                # In case of text2img, sample size of vae decoder is determined by unet.
+                unet_sample_size = pipe.unet.config.sample_size
+                if isinstance(unet_sample_size, int):
+                    sample_size = unet_sample_size * pipe.vae_scale_factor
+                else:
+                    sample_size = (
+                        unet_sample_size[0] * pipe.vae_scale_factor,
+                        unet_sample_size[1] * pipe.vae_scale_factor,
+                    )
         else:
-            AutoConfig.from_pretrained = AutoencoderKL.load_config
+            sample_size = (image_size[0], image_size[1])
-        AutoModel.from_pretrained = AutoencoderKL.from_pretrained
-        rt = super().from_pretrained(*args, **kwargs)
-        AutoConfig.from_pretrained = configtmp
-        AutoModel.from_pretrained = modeltmp
-        TasksManager.get_model_from_task = tasktmp
-        return rt
+        return sample_size
+    @classmethod
+    def update_rbln_config_using_pipe(cls, pipe: RBLNDiffusionMixin, rbln_config: Dict[str, Any]) -> Dict[str, Any]:
+        rbln_config.update({"sample_size": cls.get_vae_sample_size(pipe, rbln_config)})
+        return rbln_config
     @classmethod
     def _get_rbln_config(
@@ -132,34 +122,43 @@ class RBLNAutoencoderKL(RBLNModel):
         model_config: "PretrainedConfig",
         rbln_kwargs: Dict[str, Any] = {},
     ) -> RBLNConfig:
-        rbln_unet_sample_size = rbln_kwargs.get("unet_sample_size", None)
-        rbln_img_width = rbln_kwargs.get("img_width", None)
-        rbln_img_height = rbln_kwargs.get("img_height", None)
-        rbln_batch_size = rbln_kwargs.get("batch_size", None)
-        rbln_use_encode = rbln_kwargs.get("use_encode", None)
-        rbln_vae_scale_factor = rbln_kwargs.get("vae_scale_factor", None)
+        rbln_batch_size = rbln_kwargs.get("batch_size")
+        sample_size = rbln_kwargs.get("sample_size")
+        is_img2img = rbln_kwargs.get("img2img_pipeline")
+        is_inpaint = rbln_kwargs.get("inpaint_pipeline")
         if rbln_batch_size is None:
             rbln_batch_size = 1
-        model_cfg = {}
+        if sample_size is None:
+            sample_size = model_config.sample_size
+        if isinstance(sample_size, int):
+            sample_size = (sample_size, sample_size)
+        rbln_kwargs["sample_size"] = sample_size
+        if hasattr(model_config, "block_out_channels"):
+            vae_scale_factor = 2 ** (len(model_config.block_out_channels) - 1)
+        else:
+            # vae image processor default value 8 (int)
+            vae_scale_factor = 8
-        if rbln_use_encode:
-            model_cfg["img_width"] = rbln_img_width
-            model_cfg["img_height"] = rbln_img_height
+        dec_shape = (sample_size[0] // vae_scale_factor, sample_size[1] // vae_scale_factor)
+        enc_shape = (sample_size[0], sample_size[1])
+        if is_img2img or is_inpaint:
             vae_enc_input_info = [
-                ("x", [rbln_batch_size, model_config.in_channels, rbln_img_height, rbln_img_width], "float32")
+                (
+                    "x",
+                    [rbln_batch_size, model_config.in_channels, enc_shape[0], enc_shape[1]],
+                    "float32",
+                )
             ]
             vae_dec_input_info = [
                 (
                     "z",
-                    [
-                        rbln_batch_size,
-                        model_config.latent_channels,
-                        rbln_img_height // rbln_vae_scale_factor,
-                        rbln_img_width // rbln_vae_scale_factor,
-                    ],
+                    [rbln_batch_size, model_config.latent_channels, dec_shape[0], dec_shape[1]],
                     "float32",
                 )
             ]
@@ -173,33 +172,22 @@ class RBLNAutoencoderKL(RBLNModel):
                 compile_cfgs=compile_cfgs,
                 rbln_kwargs=rbln_kwargs,
             )
-            rbln_config.model_cfg.update(model_cfg)
             return rbln_config
-        if rbln_unet_sample_size is None:
-            rbln_unet_sample_size = 64
-        model_cfg["unet_sample_size"] = rbln_unet_sample_size
         vae_config = RBLNCompileConfig(
             input_info=[
                 (
                     "z",
-                    [
-                        rbln_batch_size,
-                        model_config.latent_channels,
-                        rbln_unet_sample_size,
-                        rbln_unet_sample_size,
-                    ],
+                    [rbln_batch_size, model_config.latent_channels, dec_shape[0], dec_shape[1]],
                     "float32",
                 )
-            ],
+            ]
         )
         rbln_config = RBLNConfig(
             rbln_cls=cls.__name__,
             compile_cfgs=[vae_config],
             rbln_kwargs=rbln_kwargs,
         )
-        rbln_config.model_cfg.update(model_cfg)
         return rbln_config
     @classmethod
@@ -222,36 +210,3 @@ class RBLNAutoencoderKL(RBLNModel):
     def decode(self, z: torch.FloatTensor, **kwargs) -> torch.FloatTensor:
         return self.decoder.decode(z)
-class _VAEDecoder(torch.nn.Module):
-    def __init__(self, vae: "AutoencoderKL"):
-        super().__init__()
-        self.vae = vae
-    def forward(self, z):
-        vae_out = self.vae.decode(z, return_dict=False)
-        return vae_out
-class _VAEEncoder(torch.nn.Module):
-    def __init__(self, vae: "AutoencoderKL"):
-        super().__init__()
-        self.vae = vae
-    def encode(self, x: torch.FloatTensor, return_dict: bool = True):
-        if self.use_tiling and (x.shape[-1] > self.tile_sample_min_size or x.shape[-2] > self.tile_sample_min_size):
-            return self.tiled_encode(x, return_dict=return_dict)
-        if self.use_slicing and x.shape[0] > 1:
-            encoded_slices = [self.encoder(x_slice) for x_slice in x.split(1)]
-            h = torch.cat(encoded_slices)
-        else:
-            h = self.encoder(x)
-        moments = self.quant_conv(h)
-        return moments
-    def forward(self, x):
-        vae_out = _VAEEncoder.encode(self.vae, x, return_dict=False)
-        return vae_out

optimum/rbln/diffusers/models/autoencoders/vae.py ADDED Viewed

@@ -0,0 +1,84 @@
+# Copyright 2024 Rebellions Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Portions of this software are licensed under the Apache License,
+# Version 2.0. See the NOTICE file distributed with this work for
+# additional information regarding copyright ownership.
+# All other portions of this software, including proprietary code,
+# are the intellectual property of Rebellions Inc. and may not be
+# copied, modified, or distributed without prior written permission
+# from Rebellions Inc.
+import logging
+from typing import TYPE_CHECKING
+import torch  # noqa: I001
+from diffusers import AutoencoderKL
+from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution
+from diffusers.models.modeling_outputs import AutoencoderKLOutput
+from ....utils.runtime_utils import RBLNPytorchRuntime
+if TYPE_CHECKING:
+    import torch
+logger = logging.getLogger(__name__)
+class RBLNRuntimeVAEEncoder(RBLNPytorchRuntime):
+    def encode(self, x: torch.FloatTensor, **kwargs) -> torch.FloatTensor:
+        moments = self.forward(x.contiguous())
+        posterior = DiagonalGaussianDistribution(moments)
+        return AutoencoderKLOutput(latent_dist=posterior)
+class RBLNRuntimeVAEDecoder(RBLNPytorchRuntime):
+    def decode(self, z: torch.FloatTensor, **kwargs) -> torch.FloatTensor:
+        return (self.forward(z),)
+class _VAEDecoder(torch.nn.Module):
+    def __init__(self, vae: "AutoencoderKL"):
+        super().__init__()
+        self.vae = vae
+    def forward(self, z):
+        vae_out = self.vae.decode(z, return_dict=False)
+        return vae_out
+class _VAEEncoder(torch.nn.Module):
+    def __init__(self, vae: "AutoencoderKL"):
+        super().__init__()
+        self.vae = vae
+    def encode(self, x: torch.FloatTensor, return_dict: bool = True):
+        if self.use_tiling and (x.shape[-1] > self.tile_sample_min_size or x.shape[-2] > self.tile_sample_min_size):
+            return self.tiled_encode(x, return_dict=return_dict)
+        if self.use_slicing and x.shape[0] > 1:
+            encoded_slices = [self.encoder(x_slice) for x_slice in x.split(1)]
+            h = torch.cat(encoded_slices)
+        else:
+            h = self.encoder(x)
+            if self.quant_conv is not None:
+                h = self.quant_conv(h)
+        return h
+    def forward(self, x):
+        vae_out = _VAEEncoder.encode(self.vae, x, return_dict=False)
+        return vae_out

optimum-rbln 0.1.12__py3-none-any.whl → 0.1.15__py3-none-any.whl

optimum-rbln 0.1.12py3-none-any.whl → 0.1.15py3-none-any.whl