PyPI - diffusers - Versions diffs - 0.28.2__py3-none-any.whl → 0.29.1__py3-none-any.whl - Mend

diffusers 0.28.2py3-none-any.whl → 0.29.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (122) hide show

diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py CHANGED Viewed

@@ -390,9 +390,10 @@ class StableDiffusionControlNetXSPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/deepfloyd_if/watermark.py CHANGED Viewed

@@ -17,7 +17,7 @@ class IFWatermarker(ModelMixin, ConfigMixin):
         self.watermark_image_as_pil = None
     def apply_watermark(self, images: List[PIL.Image.Image], sample_size=None):
-        # copied from https://github.com/deep-floyd/IF/blob/b77482e36ca2031cb94dbca1001fc1e6400bf4ab/deepfloyd_if/modules/base.py#L287
+        # Copied from https://github.com/deep-floyd/IF/blob/b77482e36ca2031cb94dbca1001fc1e6400bf4ab/deepfloyd_if/modules/base.py#L287
         h = images[0].height
         w = images[0].width

diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py CHANGED Viewed

@@ -456,9 +456,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py CHANGED Viewed

@@ -426,9 +426,10 @@ class StableDiffusionInpaintPipelineLegacy(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py CHANGED Viewed

@@ -364,9 +364,10 @@ class StableDiffusionModelEditingPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py CHANGED Viewed

@@ -355,9 +355,10 @@ class StableDiffusionParadigmsPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py CHANGED Viewed

@@ -578,9 +578,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py CHANGED Viewed

@@ -52,7 +52,9 @@ EXAMPLE_DOC_STRING = """
         >>> import torch
         >>> from diffusers import HunyuanDiTPipeline
-        >>> pipe = HunyuanDiTPipeline.from_pretrained("Tencent-Hunyuan/HunyuanDiT", torch_dtype=torch.float16)
+        >>> pipe = HunyuanDiTPipeline.from_pretrained(
+        ...     "Tencent-Hunyuan/HunyuanDiT-Diffusers", torch_dtype=torch.float16
+        ... )
         >>> pipe.to("cuda")
         >>> # You may also use English prompt as HunyuanDiT supports both English and Chinese
@@ -226,16 +228,22 @@ class HunyuanDiTPipeline(DiffusionPipeline):
                 " checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
             )
-        self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
+        self.vae_scale_factor = (
+            2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
+        )
         self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
         self.register_to_config(requires_safety_checker=requires_safety_checker)
-        self.default_sample_size = self.transformer.config.sample_size
+        self.default_sample_size = (
+            self.transformer.config.sample_size
+            if hasattr(self, "transformer") and self.transformer is not None
+            else 128
+        )
     def encode_prompt(
         self,
         prompt: str,
-        device: torch.device,
-        dtype: torch.dtype,
+        device: torch.device = None,
+        dtype: torch.dtype = None,
         num_images_per_prompt: int = 1,
         do_classifier_free_guidance: bool = True,
         negative_prompt: Optional[str] = None,
@@ -279,6 +287,17 @@ class HunyuanDiTPipeline(DiffusionPipeline):
             text_encoder_index (`int`, *optional*):
                 Index of the text encoder to use. `0` for clip and `1` for T5.
         """
+        if dtype is None:
+            if self.text_encoder_2 is not None:
+                dtype = self.text_encoder_2.dtype
+            elif self.transformer is not None:
+                dtype = self.transformer.dtype
+            else:
+                dtype = None
+        if device is None:
+            device = self._execution_device
         tokenizers = [self.tokenizer, self.tokenizer_2]
         text_encoders = [self.text_encoder, self.text_encoder_2]

diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py CHANGED Viewed

@@ -405,9 +405,10 @@ class LatentConsistencyModelImg2ImgPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py CHANGED Viewed

@@ -389,9 +389,10 @@ class LatentConsistencyModelPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/marigold/marigold_image_processing.py CHANGED Viewed

@@ -245,9 +245,9 @@ class MarigoldImageProcessor(ConfigMixin):
     ) -> Union[np.ndarray, torch.Tensor]:
         """
         Converts a monochrome image into an RGB image by applying the specified colormap. This function mimics the
-        behavior of matplotlib.colormaps, but allows the user to use the most discriminative color map "Spectral"
-        without having to install or import matplotlib. For all other cases, the function will attempt to use the
-        native implementation.
+        behavior of matplotlib.colormaps, but allows the user to use the most discriminative color maps ("Spectral",
+        "binary") without having to install or import matplotlib. For all other cases, the function will attempt to use
+        the native implementation.
         Args:
             image: 2D tensor of values between 0 and 1, either as np.ndarray or torch.Tensor.
@@ -255,7 +255,7 @@ class MarigoldImageProcessor(ConfigMixin):
             bytes: Whether to return the output as uint8 or floating point image.
             _force_method:
                 Can be used to specify whether to use the native implementation (`"matplotlib"`), the efficient custom
-                implementation of the "Spectral" color map (`"custom"`), or rely on autodetection (`None`, default).
+                implementation of the select color maps (`"custom"`), or rely on autodetection (`None`, default).
         Returns:
             An RGB-colorized tensor corresponding to the input image.
@@ -265,6 +265,26 @@ class MarigoldImageProcessor(ConfigMixin):
         if _force_method not in (None, "matplotlib", "custom"):
             raise ValueError("_force_method must be either `None`, `'matplotlib'` or `'custom'`.")
+        supported_cmaps = {
+            "binary": [
+                (1.0, 1.0, 1.0),
+                (0.0, 0.0, 0.0),
+            ],
+            "Spectral": [  # Taken from matplotlib/_cm.py
+                (0.61960784313725492, 0.003921568627450980, 0.25882352941176473),  # 0.0 -> [0]
+                (0.83529411764705885, 0.24313725490196078, 0.30980392156862746),
+                (0.95686274509803926, 0.42745098039215684, 0.2627450980392157),
+                (0.99215686274509807, 0.68235294117647061, 0.38039215686274508),
+                (0.99607843137254903, 0.8784313725490196, 0.54509803921568623),
+                (1.0, 1.0, 0.74901960784313726),
+                (0.90196078431372551, 0.96078431372549022, 0.59607843137254901),
+                (0.6705882352941176, 0.8666666666666667, 0.64313725490196083),
+                (0.4, 0.76078431372549016, 0.6470588235294118),
+                (0.19607843137254902, 0.53333333333333333, 0.74117647058823533),
+                (0.36862745098039218, 0.30980392156862746, 0.63529411764705879),  # 1.0 -> [K-1]
+            ],
+        }
         def method_matplotlib(image, cmap, bytes=False):
             if is_matplotlib_available():
                 import matplotlib
@@ -298,24 +318,19 @@ class MarigoldImageProcessor(ConfigMixin):
             else:
                 image = image.float()
-            if cmap != "Spectral":
-                raise ValueError("Only 'Spectral' color map is available without installing matplotlib.")
+            is_cmap_reversed = cmap.endswith("_r")
+            if is_cmap_reversed:
+                cmap = cmap[:-2]
-            _Spectral_data = (  # Taken from matplotlib/_cm.py
-                (0.61960784313725492, 0.003921568627450980, 0.25882352941176473),  # 0.0 -> [0]
-                (0.83529411764705885, 0.24313725490196078, 0.30980392156862746),
-                (0.95686274509803926, 0.42745098039215684, 0.2627450980392157),
-                (0.99215686274509807, 0.68235294117647061, 0.38039215686274508),
-                (0.99607843137254903, 0.8784313725490196, 0.54509803921568623),
-                (1.0, 1.0, 0.74901960784313726),
-                (0.90196078431372551, 0.96078431372549022, 0.59607843137254901),
-                (0.6705882352941176, 0.8666666666666667, 0.64313725490196083),
-                (0.4, 0.76078431372549016, 0.6470588235294118),
-                (0.19607843137254902, 0.53333333333333333, 0.74117647058823533),
-                (0.36862745098039218, 0.30980392156862746, 0.63529411764705879),  # 1.0 -> [K-1]
-            )
+            if cmap not in supported_cmaps:
+                raise ValueError(
+                    f"Only {list(supported_cmaps.keys())} color maps are available without installing matplotlib."
+                )
-            cmap = torch.tensor(_Spectral_data, dtype=torch.float, device=image.device)  # [K,3]
+            cmap = supported_cmaps[cmap]
+            if is_cmap_reversed:
+                cmap = cmap[::-1]
+            cmap = torch.tensor(cmap, dtype=torch.float, device=image.device)  # [K,3]
             K = cmap.shape[0]
             pos = image.clamp(min=0, max=1) * (K - 1)

diffusers/pipelines/pia/pipeline_pia.py CHANGED Viewed

@@ -375,9 +375,10 @@ class PIAPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py CHANGED Viewed

@@ -394,7 +394,7 @@ class PixArtAlphaPipeline(DiffusionPipeline):
         # get unconditional embeddings for classifier free guidance
         if do_classifier_free_guidance and negative_prompt_embeds is None:
-            uncond_tokens = [negative_prompt] * batch_size
+            uncond_tokens = [negative_prompt] * batch_size if isinstance(negative_prompt, str) else negative_prompt
             uncond_tokens = self._text_preprocessing(uncond_tokens, clean_caption=clean_caption)
             max_length = prompt_embeds.shape[1]
             uncond_input = self.tokenizer(

diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py CHANGED Viewed

@@ -320,7 +320,7 @@ class PixArtSigmaPipeline(DiffusionPipeline):
         # get unconditional embeddings for classifier free guidance
         if do_classifier_free_guidance and negative_prompt_embeds is None:
-            uncond_tokens = [negative_prompt] * batch_size
+            uncond_tokens = [negative_prompt] * batch_size if isinstance(negative_prompt, str) else negative_prompt
             uncond_tokens = self._text_preprocessing(uncond_tokens, clean_caption=clean_caption)
             max_length = prompt_embeds.shape[1]
             uncond_input = self.tokenizer(

diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py CHANGED Viewed

@@ -376,6 +376,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
         # 2. Define call parameters
         batch_size = 1 if isinstance(prompt, str) else len(prompt)
+        device = self._execution_device
         if editing_prompt:
             enable_edit_guidance = True
@@ -405,7 +406,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                 f" {self.tokenizer.model_max_length} tokens: {removed_text}"
             )
             text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length]
-        text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0]
+        text_embeddings = self.text_encoder(text_input_ids.to(device))[0]
         # duplicate text embeddings for each generation per prompt, using mps friendly method
         bs_embed, seq_len, _ = text_embeddings.shape
@@ -433,9 +434,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                         f" {self.tokenizer.model_max_length} tokens: {removed_text}"
                     )
                     edit_concepts_input_ids = edit_concepts_input_ids[:, : self.tokenizer.model_max_length]
-                edit_concepts = self.text_encoder(edit_concepts_input_ids.to(self.device))[0]
+                edit_concepts = self.text_encoder(edit_concepts_input_ids.to(device))[0]
             else:
-                edit_concepts = editing_prompt_embeddings.to(self.device).repeat(batch_size, 1, 1)
+                edit_concepts = editing_prompt_embeddings.to(device).repeat(batch_size, 1, 1)
             # duplicate text embeddings for each generation per prompt, using mps friendly method
             bs_embed_edit, seq_len_edit, _ = edit_concepts.shape
@@ -476,7 +477,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                 truncation=True,
                 return_tensors="pt",
             )
-            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
+            uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0]
             # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
             seq_len = uncond_embeddings.shape[1]
@@ -493,7 +494,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
         # get the initial random noise unless the user supplied it
         # 4. Prepare timesteps
-        self.scheduler.set_timesteps(num_inference_steps, device=self.device)
+        self.scheduler.set_timesteps(num_inference_steps, device=device)
         timesteps = self.scheduler.timesteps
         # 5. Prepare latent variables
@@ -504,7 +505,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
             height,
             width,
             text_embeddings.dtype,
-            self.device,
+            device,
             generator,
             latents,
         )
@@ -562,12 +563,12 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                 if enable_edit_guidance:
                     concept_weights = torch.zeros(
                         (len(noise_pred_edit_concepts), noise_guidance.shape[0]),
-                        device=self.device,
+                        device=device,
                         dtype=noise_guidance.dtype,
                     )
                     noise_guidance_edit = torch.zeros(
                         (len(noise_pred_edit_concepts), *noise_guidance.shape),
-                        device=self.device,
+                        device=device,
                         dtype=noise_guidance.dtype,
                     )
                     # noise_guidance_edit = torch.zeros_like(noise_guidance)
@@ -644,21 +645,19 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                         # noise_guidance_edit = noise_guidance_edit + noise_guidance_edit_tmp
-                    warmup_inds = torch.tensor(warmup_inds).to(self.device)
+                    warmup_inds = torch.tensor(warmup_inds).to(device)
                     if len(noise_pred_edit_concepts) > warmup_inds.shape[0] > 0:
                         concept_weights = concept_weights.to("cpu")  # Offload to cpu
                         noise_guidance_edit = noise_guidance_edit.to("cpu")
-                        concept_weights_tmp = torch.index_select(concept_weights.to(self.device), 0, warmup_inds)
+                        concept_weights_tmp = torch.index_select(concept_weights.to(device), 0, warmup_inds)
                         concept_weights_tmp = torch.where(
                             concept_weights_tmp < 0, torch.zeros_like(concept_weights_tmp), concept_weights_tmp
                         )
                         concept_weights_tmp = concept_weights_tmp / concept_weights_tmp.sum(dim=0)
                         # concept_weights_tmp = torch.nan_to_num(concept_weights_tmp)
-                        noise_guidance_edit_tmp = torch.index_select(
-                            noise_guidance_edit.to(self.device), 0, warmup_inds
-                        )
+                        noise_guidance_edit_tmp = torch.index_select(noise_guidance_edit.to(device), 0, warmup_inds)
                         noise_guidance_edit_tmp = torch.einsum(
                             "cb,cbijk->bijk", concept_weights_tmp, noise_guidance_edit_tmp
                         )
@@ -669,8 +668,8 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                         del noise_guidance_edit_tmp
                         del concept_weights_tmp
-                        concept_weights = concept_weights.to(self.device)
-                        noise_guidance_edit = noise_guidance_edit.to(self.device)
+                        concept_weights = concept_weights.to(device)
+                        noise_guidance_edit = noise_guidance_edit.to(device)
                     concept_weights = torch.where(
                         concept_weights < 0, torch.zeros_like(concept_weights), concept_weights
@@ -679,6 +678,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                     concept_weights = torch.nan_to_num(concept_weights)
                     noise_guidance_edit = torch.einsum("cb,cbijk->bijk", concept_weights, noise_guidance_edit)
+                    noise_guidance_edit = noise_guidance_edit.to(edit_momentum.device)
                     noise_guidance_edit = noise_guidance_edit + edit_momentum_scale * edit_momentum
@@ -689,7 +689,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
                         self.sem_guidance[i] = noise_guidance_edit.detach().cpu()
                 if sem_guidance is not None:
-                    edit_guidance = sem_guidance[i].to(self.device)
+                    edit_guidance = sem_guidance[i].to(device)
                     noise_guidance = noise_guidance + edit_guidance
                 noise_pred = noise_pred_uncond + noise_guidance
@@ -705,7 +705,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
         # 8. Post-processing
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-            image, has_nsfw_concept = self.run_safety_checker(image, self.device, text_embeddings.dtype)
+            image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)
         else:
             image = latents
             has_nsfw_concept = None

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py CHANGED Viewed

@@ -474,9 +474,10 @@ class StableDiffusionPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py CHANGED Viewed

@@ -357,9 +357,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds
@@ -545,7 +546,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
         if depth_map is None:
             pixel_values = self.feature_extractor(images=image, return_tensors="pt").pixel_values
-            pixel_values = pixel_values.to(device=device)
+            pixel_values = pixel_values.to(device=device, dtype=dtype)
             # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
             # So we use `torch.autocast` here for half precision inference.
             if torch.backends.mps.is_available():

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py CHANGED Viewed

@@ -517,9 +517,10 @@ class StableDiffusionImg2ImgPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py CHANGED Viewed

@@ -589,9 +589,10 @@ class StableDiffusionInpaintPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py CHANGED Viewed

@@ -377,9 +377,10 @@ class StableDiffusionUpscalePipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py CHANGED Viewed

@@ -458,9 +458,10 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py CHANGED Viewed

@@ -51,8 +51,8 @@ EXAMPLE_DOC_STRING = """
         >>> from diffusers import StableUnCLIPImg2ImgPipeline
         >>> pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
-        ...     "fusing/stable-unclip-2-1-l-img2img", torch_dtype=torch.float16
-        ... )  # TODO update model path
+        ...     "stabilityai/stable-diffusion-2-1-unclip-small", torch_dtype=torch.float16
+        ... )
         >>> pipe = pipe.to("cuda")
         >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
@@ -63,7 +63,7 @@ EXAMPLE_DOC_STRING = """
         >>> prompt = "A fantasy landscape, trending on artstation"
-        >>> images = pipe(prompt, init_image).images
+        >>> images = pipe(init_image, prompt).images
         >>> images[0].save("fantasy_landscape.png")
         ```
 """
@@ -422,9 +422,10 @@ class StableUnCLIPImg2ImgPipeline(
             negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
             negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
-        if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
-            # Retrieve the original scale by scaling back the LoRA layers
-            unscale_lora_layers(self.text_encoder, lora_scale)
+        if self.text_encoder is not None:
+            if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
+                # Retrieve the original scale by scaling back the LoRA layers
+                unscale_lora_layers(self.text_encoder, lora_scale)
         return prompt_embeds, negative_prompt_embeds

diffusers/pipelines/stable_diffusion_3/__init__.py ADDED Viewed

@@ -0,0 +1,52 @@
+from typing import TYPE_CHECKING
+from ...utils import (
+    DIFFUSERS_SLOW_IMPORT,
+    OptionalDependencyNotAvailable,
+    _LazyModule,
+    get_objects_from_module,
+    is_flax_available,
+    is_torch_available,
+    is_transformers_available,
+)
+_dummy_objects = {}
+_additional_imports = {}
+_import_structure = {"pipeline_output": ["StableDiffusion3PipelineOutput"]}
+try:
+    if not (is_transformers_available() and is_torch_available()):
+        raise OptionalDependencyNotAvailable()
+except OptionalDependencyNotAvailable:
+    from ...utils import dummy_torch_and_transformers_objects  # noqa F403
+    _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
+else:
+    _import_structure["pipeline_stable_diffusion_3"] = ["StableDiffusion3Pipeline"]
+    _import_structure["pipeline_stable_diffusion_3_img2img"] = ["StableDiffusion3Img2ImgPipeline"]
+if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
+    try:
+        if not (is_transformers_available() and is_torch_available()):
+            raise OptionalDependencyNotAvailable()
+    except OptionalDependencyNotAvailable:
+        from ...utils.dummy_torch_and_transformers_objects import *  # noqa F403
+    else:
+        from .pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
+        from .pipeline_stable_diffusion_3_img2img import StableDiffusion3Img2ImgPipeline
+else:
+    import sys
+    sys.modules[__name__] = _LazyModule(
+        __name__,
+        globals()["__file__"],
+        _import_structure,
+        module_spec=__spec__,
+    )
+    for name, value in _dummy_objects.items():
+        setattr(sys.modules[__name__], name, value)
+    for name, value in _additional_imports.items():
+        setattr(sys.modules[__name__], name, value)

diffusers 0.28.2__py3-none-any.whl → 0.29.1__py3-none-any.whl

diffusers 0.28.2py3-none-any.whl → 0.29.1py3-none-any.whl