PyPI - diffusers - Versions diffs - 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl - Mend

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl

Files changed (120) hide show

diffusers/__init__.py +26 -1
diffusers/configuration_utils.py +34 -29
diffusers/dependency_versions_table.py +4 -0
diffusers/image_processor.py +125 -12
diffusers/loaders.py +169 -203
diffusers/models/attention.py +24 -1
diffusers/models/attention_flax.py +10 -5
diffusers/models/attention_processor.py +3 -0
diffusers/models/autoencoder_kl.py +114 -33
diffusers/models/controlnet.py +131 -14
diffusers/models/controlnet_flax.py +37 -26
diffusers/models/cross_attention.py +17 -17
diffusers/models/embeddings.py +67 -0
diffusers/models/modeling_flax_utils.py +64 -56
diffusers/models/modeling_utils.py +193 -104
diffusers/models/prior_transformer.py +207 -37
diffusers/models/resnet.py +26 -26
diffusers/models/transformer_2d.py +36 -41
diffusers/models/transformer_temporal.py +24 -21
diffusers/models/unet_1d.py +31 -25
diffusers/models/unet_2d.py +43 -30
diffusers/models/unet_2d_blocks.py +210 -89
diffusers/models/unet_2d_blocks_flax.py +12 -12
diffusers/models/unet_2d_condition.py +172 -64
diffusers/models/unet_2d_condition_flax.py +38 -24
diffusers/models/unet_3d_blocks.py +34 -31
diffusers/models/unet_3d_condition.py +101 -34
diffusers/models/vae.py +5 -5
diffusers/models/vae_flax.py +37 -34
diffusers/models/vq_model.py +23 -14
diffusers/pipelines/__init__.py +24 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
diffusers/pipelines/consistency_models/__init__.py +1 -0
diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
diffusers/pipelines/kandinsky/__init__.py +1 -1
diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
diffusers/pipelines/pipeline_flax_utils.py +2 -2
diffusers/pipelines/pipeline_utils.py +124 -146
diffusers/pipelines/shap_e/__init__.py +27 -0
diffusers/pipelines/shap_e/camera.py +147 -0
diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
diffusers/pipelines/shap_e/renderer.py +709 -0
diffusers/pipelines/stable_diffusion/__init__.py +2 -0
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
diffusers/schedulers/__init__.py +3 -0
diffusers/schedulers/scheduling_consistency_models.py +380 -0
diffusers/schedulers/scheduling_ddim.py +28 -6
diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
diffusers/schedulers/scheduling_ddpm.py +53 -7
diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
diffusers/schedulers/scheduling_deis_multistep.py +66 -11
diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
diffusers/schedulers/scheduling_euler_discrete.py +58 -8
diffusers/schedulers/scheduling_heun_discrete.py +89 -14
diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
diffusers/schedulers/scheduling_lms_discrete.py +57 -8
diffusers/schedulers/scheduling_pndm.py +46 -10
diffusers/schedulers/scheduling_repaint.py +19 -4
diffusers/schedulers/scheduling_sde_ve.py +5 -1
diffusers/schedulers/scheduling_unclip.py +43 -4
diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
diffusers/training_utils.py +1 -1
diffusers/utils/__init__.py +2 -1
diffusers/utils/dummy_pt_objects.py +60 -0
diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
diffusers/utils/hub_utils.py +1 -1
diffusers/utils/import_utils.py +20 -3
diffusers/utils/logging.py +15 -18
diffusers/utils/outputs.py +3 -3
diffusers/utils/testing_utils.py +15 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0

diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import inspect
-import os
 import warnings
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -518,6 +517,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
         prompt_embeds=None,
         negative_prompt_embeds=None,
         controlnet_conditioning_scale=1.0,
+        control_guidance_start=0.0,
+        control_guidance_end=1.0,
     ):
         if (callback_steps is None) or (
             callback_steps is not None and (not isinstance(callback_steps, int) or callback_steps <= 0)
@@ -586,7 +587,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
                 raise ValueError("A single batch of multiple conditionings are supported at the moment.")
             elif len(image) != len(self.controlnet.nets):
                 raise ValueError(
-                    "For multiple controlnets: `image` must have the same length as the number of controlnets."
+                    f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
                 )
             for image_ in image:
@@ -620,6 +621,27 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
         else:
             assert False
+        if len(control_guidance_start) != len(control_guidance_end):
+            raise ValueError(
+                f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
+            )
+        if isinstance(self.controlnet, MultiControlNetModel):
+            if len(control_guidance_start) != len(self.controlnet.nets):
+                raise ValueError(
+                    f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
+                )
+        for start, end in zip(control_guidance_start, control_guidance_end):
+            if start >= end:
+                raise ValueError(
+                    f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
+                )
+            if start < 0.0:
+                raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
+            if end > 1.0:
+                raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
     # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
     def check_image(self, image, prompt, prompt_embeds):
         image_is_pil = isinstance(image, PIL.Image.Image)
@@ -757,18 +779,6 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
         return latents
-    # override DiffusionPipeline
-    def save_pretrained(
-        self,
-        save_directory: Union[str, os.PathLike],
-        safe_serialization: bool = False,
-        variant: Optional[str] = None,
-    ):
-        if isinstance(self.controlnet, ControlNetModel):
-            super().save_pretrained(save_directory, safe_serialization, variant)
-        else:
-            raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
     @torch.no_grad()
     @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -809,6 +819,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
         cross_attention_kwargs: Optional[Dict[str, Any]] = None,
         controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
         guess_mode: bool = False,
+        control_guidance_start: Union[float, List[float]] = 0.0,
+        control_guidance_end: Union[float, List[float]] = 1.0,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -889,6 +901,10 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
             guess_mode (`bool`, *optional*, defaults to `False`):
                 In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
                 you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
+            control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
+                The percentage of total steps at which the controlnet starts applying.
+            control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
+                The percentage of total steps at which the controlnet stops applying.
         Examples:
@@ -899,6 +915,19 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
             list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
             (nsfw) content, according to the `safety_checker`.
         """
+        controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
+        # align format for control guidance
+        if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
+            control_guidance_start = len(control_guidance_end) * [control_guidance_start]
+        elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
+            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
+        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
+            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
+            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
+                control_guidance_end
+            ]
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
             prompt,
@@ -908,6 +937,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
             prompt_embeds,
             negative_prompt_embeds,
             controlnet_conditioning_scale,
+            control_guidance_start,
+            control_guidance_end,
         )
         # 2. Define call parameters
@@ -1007,6 +1038,15 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
         # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+        # 7.1 Create tensor stating which controlnets to keep
+        controlnet_keep = []
+        for i in range(len(timesteps)):
+            keeps = [
+                1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
+                for s, e in zip(control_guidance_start, control_guidance_end)
+            ]
+            controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
         # 8. Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -1025,12 +1065,17 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, TextualInversi
                     control_model_input = latent_model_input
                     controlnet_prompt_embeds = prompt_embeds
+                if isinstance(controlnet_keep[i], list):
+                    cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
+                else:
+                    cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
                 down_block_res_samples, mid_block_res_sample = self.controlnet(
                     control_model_input,
                     t,
                     encoder_hidden_states=controlnet_prompt_embeds,
                     controlnet_cond=control_image,
-                    conditioning_scale=controlnet_conditioning_scale,
+                    conditioning_scale=cond_scale,
                     guess_mode=guess_mode,
                     return_dict=False,
                 )

diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py CHANGED Viewed

@@ -15,7 +15,6 @@
 # This model implementation is heavily inspired by https://github.com/haofanwang/ControlNet-for-Diffusers/
 import inspect
-import os
 import warnings
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
@@ -647,6 +646,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         prompt_embeds=None,
         negative_prompt_embeds=None,
         controlnet_conditioning_scale=1.0,
+        control_guidance_start=0.0,
+        control_guidance_end=1.0,
     ):
         if height % 8 != 0 or width % 8 != 0:
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -718,7 +719,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
                 raise ValueError("A single batch of multiple conditionings are supported at the moment.")
             elif len(image) != len(self.controlnet.nets):
                 raise ValueError(
-                    "For multiple controlnets: `image` must have the same length as the number of controlnets."
+                    f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
                 )
             for image_ in image:
@@ -752,6 +753,27 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         else:
             assert False
+        if len(control_guidance_start) != len(control_guidance_end):
+            raise ValueError(
+                f"`control_guidance_start` has {len(control_guidance_start)} elements, but `control_guidance_end` has {len(control_guidance_end)} elements. Make sure to provide the same number of elements to each list."
+            )
+        if isinstance(self.controlnet, MultiControlNetModel):
+            if len(control_guidance_start) != len(self.controlnet.nets):
+                raise ValueError(
+                    f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
+                )
+        for start, end in zip(control_guidance_start, control_guidance_end):
+            if start >= end:
+                raise ValueError(
+                    f"control guidance start: {start} cannot be larger or equal to control guidance end: {end}."
+                )
+            if start < 0.0:
+                raise ValueError(f"control guidance start: {start} can't be smaller than 0.")
+            if end > 1.0:
+                raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
     # Copied from diffusers.pipelines.controlnet.pipeline_controlnet.StableDiffusionControlNetPipeline.check_image
     def check_image(self, image, prompt, prompt_embeds):
         image_is_pil = isinstance(image, PIL.Image.Image)
@@ -957,18 +979,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         return image_latents
-    # override DiffusionPipeline
-    def save_pretrained(
-        self,
-        save_directory: Union[str, os.PathLike],
-        safe_serialization: bool = False,
-        variant: Optional[str] = None,
-    ):
-        if isinstance(self.controlnet, ControlNetModel):
-            super().save_pretrained(save_directory, safe_serialization, variant)
-        else:
-            raise NotImplementedError("Currently, the `save_pretrained()` is not implemented for Multi-ControlNet.")
     @torch.no_grad()
     @replace_example_docstring(EXAMPLE_DOC_STRING)
     def __call__(
@@ -1003,6 +1013,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         cross_attention_kwargs: Optional[Dict[str, Any]] = None,
         controlnet_conditioning_scale: Union[float, List[float]] = 0.5,
         guess_mode: bool = False,
+        control_guidance_start: Union[float, List[float]] = 0.0,
+        control_guidance_end: Union[float, List[float]] = 1.0,
     ):
         r"""
         Function invoked when calling the pipeline for generation.
@@ -1086,6 +1098,10 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
             guess_mode (`bool`, *optional*, defaults to `False`):
                 In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
                 you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
+            control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
+                The percentage of total steps at which the controlnet starts applying.
+            control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
+                The percentage of total steps at which the controlnet stops applying.
         Examples:
@@ -1096,9 +1112,22 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
             list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work"
             (nsfw) content, according to the `safety_checker`.
         """
+        controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
         # 0. Default height and width to unet
         height, width = self._default_height_width(height, width, image)
+        # align format for control guidance
+        if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
+            control_guidance_start = len(control_guidance_end) * [control_guidance_start]
+        elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
+            control_guidance_end = len(control_guidance_start) * [control_guidance_end]
+        elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
+            mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetModel) else 1
+            control_guidance_start, control_guidance_end = mult * [control_guidance_start], mult * [
+                control_guidance_end
+            ]
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
             prompt,
@@ -1110,6 +1139,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
             prompt_embeds,
             negative_prompt_embeds,
             controlnet_conditioning_scale,
+            control_guidance_start,
+            control_guidance_end,
         )
         # 2. Define call parameters
@@ -1126,8 +1157,6 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         # corresponds to doing no classifier free guidance.
         do_classifier_free_guidance = guidance_scale > 1.0
-        controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
         if isinstance(controlnet, MultiControlNetModel) and isinstance(controlnet_conditioning_scale, float):
             controlnet_conditioning_scale = [controlnet_conditioning_scale] * len(controlnet.nets)
@@ -1244,6 +1273,15 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
         # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+        # 7.1 Create tensor stating which controlnets to keep
+        controlnet_keep = []
+        for i in range(len(timesteps)):
+            keeps = [
+                1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
+                for s, e in zip(control_guidance_start, control_guidance_end)
+            ]
+            controlnet_keep.append(keeps[0] if len(keeps) == 1 else keeps)
         # 8. Denoising loop
         num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
         with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -1262,12 +1300,17 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, TextualInversi
                     control_model_input = latent_model_input
                     controlnet_prompt_embeds = prompt_embeds
+                if isinstance(controlnet_keep[i], list):
+                    cond_scale = [c * s for c, s in zip(controlnet_conditioning_scale, controlnet_keep[i])]
+                else:
+                    cond_scale = controlnet_conditioning_scale * controlnet_keep[i]
                 down_block_res_samples, mid_block_res_sample = self.controlnet(
                     control_model_input,
                     t,
                     encoder_hidden_states=controlnet_prompt_embeds,
                     controlnet_cond=control_image,
-                    conditioning_scale=controlnet_conditioning_scale,
+                    conditioning_scale=cond_scale,
                     guess_mode=guess_mode,
                     return_dict=False,
                 )

diffusers/pipelines/controlnet/pipeline_flax_controlnet.py CHANGED Viewed

@@ -464,7 +464,7 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
             images_uint8_casted = np.asarray(images_uint8_casted).reshape(num_devices * batch_size, height, width, 3)
             images_uint8_casted, has_nsfw_concept = self._run_safety_checker(images_uint8_casted, safety_params, jit)
-            images = np.asarray(images)
+            images = np.array(images)
             # block images
             if any(has_nsfw_concept):

diffusers/pipelines/kandinsky/__init__.py CHANGED Viewed

@@ -15,5 +15,5 @@ else:
     from .pipeline_kandinsky import KandinskyPipeline
     from .pipeline_kandinsky_img2img import KandinskyImg2ImgPipeline
     from .pipeline_kandinsky_inpaint import KandinskyInpaintPipeline
-    from .pipeline_kandinsky_prior import KandinskyPriorPipeline
+    from .pipeline_kandinsky_prior import KandinskyPriorPipeline, KandinskyPriorPipelineOutput
     from .text_encoder import MultilingualCLIP

diffusers/pipelines/kandinsky/pipeline_kandinsky.py CHANGED Viewed

@@ -22,7 +22,7 @@ from transformers import (
 from ...models import UNet2DConditionModel, VQModel
 from ...pipelines import DiffusionPipeline
 from ...pipelines.pipeline_utils import ImagePipelineOutput
-from ...schedulers import DDIMScheduler
+from ...schedulers import DDIMScheduler, DDPMScheduler
 from ...utils import (
     is_accelerate_available,
     is_accelerate_version,
@@ -88,7 +88,7 @@ class KandinskyPipeline(DiffusionPipeline):
             Frozen text-encoder.
         tokenizer ([`XLMRobertaTokenizer`]):
             Tokenizer of class
-        scheduler ([`DDIMScheduler`]):
+        scheduler (Union[`DDIMScheduler`,`DDPMScheduler`]):
             A scheduler to be used in combination with `unet` to generate image latents.
         unet ([`UNet2DConditionModel`]):
             Conditional U-Net architecture to denoise the image embedding.
@@ -101,7 +101,7 @@ class KandinskyPipeline(DiffusionPipeline):
         text_encoder: MultilingualCLIP,
         tokenizer: XLMRobertaTokenizer,
         unet: UNet2DConditionModel,
-        scheduler: DDIMScheduler,
+        scheduler: Union[DDIMScheduler, DDPMScheduler],
         movq: VQModel,
     ):
         super().__init__()
@@ -115,6 +115,7 @@ class KandinskyPipeline(DiffusionPipeline):
         )
         self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
+    # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
     def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
         if latents is None:
             latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -439,9 +440,6 @@ class KandinskyPipeline(DiffusionPipeline):
                 noise_pred,
                 t,
                 latents,
-                # YiYi notes: only reason this pipeline can't work with unclip scheduler is that can't pass down this argument
-                #             need to use DDPM scheduler instead
-                # prev_timestep=prev_timestep,
                 generator=generator,
             ).prev_sample
         # post-processing

diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py CHANGED Viewed

@@ -275,6 +275,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
         )
         self.movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1)
+    # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
     def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
         if latents is None:
             latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)

diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py CHANGED Viewed

@@ -274,6 +274,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
         return KandinskyPriorPipelineOutput(image_embeds=image_emb, negative_image_embeds=zero_image_emb)
+    # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline.prepare_latents
     def prepare_latents(self, shape, dtype, device, generator, latents, scheduler):
         if latents is None:
             latents = randn_tensor(shape, generator=generator, device=device, dtype=dtype)

diffusers/pipelines/kandinsky2_2/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from .pipeline_kandinsky2_2 import KandinskyV22Pipeline
+from .pipeline_kandinsky2_2_controlnet import KandinskyV22ControlnetPipeline
+from .pipeline_kandinsky2_2_controlnet_img2img import KandinskyV22ControlnetImg2ImgPipeline
+from .pipeline_kandinsky2_2_img2img import KandinskyV22Img2ImgPipeline
+from .pipeline_kandinsky2_2_inpainting import KandinskyV22InpaintPipeline
+from .pipeline_kandinsky2_2_prior import KandinskyV22PriorPipeline
+from .pipeline_kandinsky2_2_prior_emb2emb import KandinskyV22PriorEmb2EmbPipeline

diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl