diffusers 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +94 -3
- diffusers/commands/env.py +1 -5
- diffusers/configuration_utils.py +4 -9
- diffusers/dependency_versions_table.py +2 -2
- diffusers/image_processor.py +1 -2
- diffusers/loaders/__init__.py +17 -2
- diffusers/loaders/ip_adapter.py +10 -7
- diffusers/loaders/lora_base.py +752 -0
- diffusers/loaders/lora_pipeline.py +2222 -0
- diffusers/loaders/peft.py +213 -5
- diffusers/loaders/single_file.py +1 -12
- diffusers/loaders/single_file_model.py +31 -10
- diffusers/loaders/single_file_utils.py +262 -2
- diffusers/loaders/textual_inversion.py +1 -6
- diffusers/loaders/unet.py +23 -208
- diffusers/models/__init__.py +20 -0
- diffusers/models/activations.py +22 -0
- diffusers/models/attention.py +386 -7
- diffusers/models/attention_processor.py +1795 -629
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_kl.py +14 -3
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1035 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
- diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vq_model.py +4 -4
- diffusers/models/controlnet.py +2 -3
- diffusers/models/controlnet_hunyuan.py +401 -0
- diffusers/models/controlnet_sd3.py +11 -11
- diffusers/models/controlnet_sparsectrl.py +789 -0
- diffusers/models/controlnet_xs.py +40 -10
- diffusers/models/downsampling.py +68 -0
- diffusers/models/embeddings.py +319 -36
- diffusers/models/model_loading_utils.py +1 -3
- diffusers/models/modeling_flax_utils.py +1 -6
- diffusers/models/modeling_utils.py +4 -16
- diffusers/models/normalization.py +203 -12
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +527 -0
- diffusers/models/transformers/cogvideox_transformer_3d.py +345 -0
- diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
- diffusers/models/transformers/latte_transformer_3d.py +327 -0
- diffusers/models/transformers/lumina_nextdit2d.py +340 -0
- diffusers/models/transformers/pixart_transformer_2d.py +102 -1
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/stable_audio_transformer.py +458 -0
- diffusers/models/transformers/transformer_flux.py +455 -0
- diffusers/models/transformers/transformer_sd3.py +18 -4
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_condition.py +8 -1
- diffusers/models/unets/unet_3d_blocks.py +51 -920
- diffusers/models/unets/unet_3d_condition.py +4 -1
- diffusers/models/unets/unet_i2vgen_xl.py +4 -1
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +1330 -84
- diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
- diffusers/models/unets/unet_stable_cascade.py +1 -3
- diffusers/models/unets/uvit_2d.py +1 -1
- diffusers/models/upsampling.py +64 -0
- diffusers/models/vq_model.py +8 -4
- diffusers/optimization.py +1 -1
- diffusers/pipelines/__init__.py +100 -3
- diffusers/pipelines/animatediff/__init__.py +4 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
- diffusers/pipelines/aura_flow/__init__.py +48 -0
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
- diffusers/pipelines/auto_pipeline.py +97 -19
- diffusers/pipelines/cogvideo/__init__.py +48 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +687 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
- diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
- diffusers/pipelines/flux/__init__.py +47 -0
- diffusers/pipelines/flux/pipeline_flux.py +749 -0
- diffusers/pipelines/flux/pipeline_output.py +21 -0
- diffusers/pipelines/free_init_utils.py +2 -0
- diffusers/pipelines/free_noise_utils.py +236 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
- diffusers/pipelines/kolors/__init__.py +54 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
- diffusers/pipelines/kolors/pipeline_output.py +21 -0
- diffusers/pipelines/kolors/text_encoder.py +889 -0
- diffusers/pipelines/kolors/tokenizer.py +334 -0
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
- diffusers/pipelines/latte/__init__.py +48 -0
- diffusers/pipelines/latte/pipeline_latte.py +881 -0
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
- diffusers/pipelines/lumina/__init__.py +48 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
- diffusers/pipelines/pag/__init__.py +67 -0
- diffusers/pipelines/pag/pag_utils.py +237 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
- diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
- diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
- diffusers/pipelines/pia/pipeline_pia.py +30 -37
- diffusers/pipelines/pipeline_flax_utils.py +4 -9
- diffusers/pipelines/pipeline_loading_utils.py +0 -3
- diffusers/pipelines/pipeline_utils.py +2 -14
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
- diffusers/pipelines/stable_audio/__init__.py +50 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
- diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
- diffusers/schedulers/__init__.py +8 -0
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
- diffusers/schedulers/scheduling_ddim.py +1 -1
- diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
- diffusers/schedulers/scheduling_ddpm.py +1 -1
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
- diffusers/schedulers/scheduling_deis_multistep.py +2 -2
- diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
- diffusers/schedulers/scheduling_ipndm.py +1 -1
- diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
- diffusers/schedulers/scheduling_utils.py +1 -3
- diffusers/schedulers/scheduling_utils_flax.py +1 -3
- diffusers/training_utils.py +99 -14
- diffusers/utils/__init__.py +2 -2
- diffusers/utils/dummy_pt_objects.py +210 -0
- diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
- diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
- diffusers/utils/dynamic_modules_utils.py +1 -11
- diffusers/utils/export_utils.py +1 -4
- diffusers/utils/hub_utils.py +45 -42
- diffusers/utils/import_utils.py +19 -16
- diffusers/utils/loading_utils.py +76 -3
- diffusers/utils/testing_utils.py +11 -8
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/METADATA +73 -83
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/RECORD +217 -164
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/WHEEL +1 -1
- diffusers/loaders/autoencoder.py +0 -146
- diffusers/loaders/controlnet.py +0 -136
- diffusers/loaders/lora.py +0 -1728
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/LICENSE +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/top_level.txt +0 -0
@@ -30,9 +30,12 @@ from ...models.controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
|
|
30
30
|
from ...models.transformers import SD3Transformer2DModel
|
31
31
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
32
32
|
from ...utils import (
|
33
|
+
USE_PEFT_BACKEND,
|
33
34
|
is_torch_xla_available,
|
34
35
|
logging,
|
35
36
|
replace_example_docstring,
|
37
|
+
scale_lora_layers,
|
38
|
+
unscale_lora_layers,
|
36
39
|
)
|
37
40
|
from ...utils.torch_utils import randn_tensor
|
38
41
|
from ..pipeline_utils import DiffusionPipeline
|
@@ -346,6 +349,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
346
349
|
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
347
350
|
clip_skip: Optional[int] = None,
|
348
351
|
max_sequence_length: int = 256,
|
352
|
+
lora_scale: Optional[float] = None,
|
349
353
|
):
|
350
354
|
r"""
|
351
355
|
|
@@ -391,9 +395,22 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
391
395
|
clip_skip (`int`, *optional*):
|
392
396
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
393
397
|
the output of the pre-final layer will be used for computing the prompt embeddings.
|
398
|
+
lora_scale (`float`, *optional*):
|
399
|
+
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
394
400
|
"""
|
395
401
|
device = device or self._execution_device
|
396
402
|
|
403
|
+
# set lora scale so that monkey patched LoRA
|
404
|
+
# function of text encoder can correctly access it
|
405
|
+
if lora_scale is not None and isinstance(self, SD3LoraLoaderMixin):
|
406
|
+
self._lora_scale = lora_scale
|
407
|
+
|
408
|
+
# dynamically adjust the LoRA scale
|
409
|
+
if self.text_encoder is not None and USE_PEFT_BACKEND:
|
410
|
+
scale_lora_layers(self.text_encoder, lora_scale)
|
411
|
+
if self.text_encoder_2 is not None and USE_PEFT_BACKEND:
|
412
|
+
scale_lora_layers(self.text_encoder_2, lora_scale)
|
413
|
+
|
397
414
|
prompt = [prompt] if isinstance(prompt, str) else prompt
|
398
415
|
if prompt is not None:
|
399
416
|
batch_size = len(prompt)
|
@@ -496,6 +513,16 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
496
513
|
[negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1
|
497
514
|
)
|
498
515
|
|
516
|
+
if self.text_encoder is not None:
|
517
|
+
if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
|
518
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
519
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
520
|
+
|
521
|
+
if self.text_encoder_2 is not None:
|
522
|
+
if isinstance(self, SD3LoraLoaderMixin) and USE_PEFT_BACKEND:
|
523
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
524
|
+
unscale_lora_layers(self.text_encoder_2, lora_scale)
|
525
|
+
|
499
526
|
return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
|
500
527
|
|
501
528
|
def check_inputs(
|
@@ -513,6 +540,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
513
540
|
pooled_prompt_embeds=None,
|
514
541
|
negative_pooled_prompt_embeds=None,
|
515
542
|
callback_on_step_end_tensor_inputs=None,
|
543
|
+
max_sequence_length=None,
|
516
544
|
):
|
517
545
|
if height % 8 != 0 or width % 8 != 0:
|
518
546
|
raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
|
@@ -584,6 +612,9 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
584
612
|
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
585
613
|
)
|
586
614
|
|
615
|
+
if max_sequence_length is not None and max_sequence_length > 512:
|
616
|
+
raise ValueError(f"`max_sequence_length` cannot be greater than 512 but is {max_sequence_length}")
|
617
|
+
|
587
618
|
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
|
588
619
|
def prepare_latents(
|
589
620
|
self,
|
@@ -710,6 +741,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
710
741
|
clip_skip: Optional[int] = None,
|
711
742
|
callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
|
712
743
|
callback_on_step_end_tensor_inputs: List[str] = ["latents"],
|
744
|
+
max_sequence_length: int = 256,
|
713
745
|
):
|
714
746
|
r"""
|
715
747
|
Function invoked when calling the pipeline for generation.
|
@@ -811,6 +843,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
811
843
|
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
|
812
844
|
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
|
813
845
|
`._callback_tensor_inputs` attribute of your pipeline class.
|
846
|
+
max_sequence_length (`int` defaults to 256): Maximum sequence length to use with the `prompt`.
|
814
847
|
|
815
848
|
Examples:
|
816
849
|
|
@@ -850,6 +883,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
850
883
|
pooled_prompt_embeds=pooled_prompt_embeds,
|
851
884
|
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
852
885
|
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
|
886
|
+
max_sequence_length=max_sequence_length,
|
853
887
|
)
|
854
888
|
|
855
889
|
self._guidance_scale = guidance_scale
|
@@ -888,6 +922,7 @@ class StableDiffusion3ControlNetPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
888
922
|
device=device,
|
889
923
|
clip_skip=self.clip_skip,
|
890
924
|
num_images_per_prompt=num_images_per_prompt,
|
925
|
+
max_sequence_length=max_sequence_length,
|
891
926
|
)
|
892
927
|
|
893
928
|
if self.do_classifier_free_guidance:
|
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
23
23
|
|
24
24
|
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
25
25
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
26
|
-
from ...loaders import FromSingleFileMixin,
|
26
|
+
from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
27
27
|
from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
28
28
|
from ...models.lora import adjust_lora_scale_text_encoder
|
29
29
|
from ...schedulers import KarrasDiffusionSchedulers
|
@@ -90,7 +90,11 @@ EXAMPLE_DOC_STRING = """
|
|
90
90
|
|
91
91
|
|
92
92
|
class StableDiffusionControlNetXSPipeline(
|
93
|
-
DiffusionPipeline,
|
93
|
+
DiffusionPipeline,
|
94
|
+
StableDiffusionMixin,
|
95
|
+
TextualInversionLoaderMixin,
|
96
|
+
StableDiffusionLoraLoaderMixin,
|
97
|
+
FromSingleFileMixin,
|
94
98
|
):
|
95
99
|
r"""
|
96
100
|
Pipeline for text-to-image generation using Stable Diffusion with ControlNet-XS guidance.
|
@@ -100,8 +104,8 @@ class StableDiffusionControlNetXSPipeline(
|
|
100
104
|
|
101
105
|
The pipeline also inherits the following loading methods:
|
102
106
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
103
|
-
- [`~loaders.
|
104
|
-
- [`~loaders.
|
107
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
108
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
105
109
|
- [`loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
|
106
110
|
|
107
111
|
Args:
|
@@ -258,7 +262,7 @@ class StableDiffusionControlNetXSPipeline(
|
|
258
262
|
"""
|
259
263
|
# set lora scale so that monkey patched LoRA
|
260
264
|
# function of text encoder can correctly access it
|
261
|
-
if lora_scale is not None and isinstance(self,
|
265
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
262
266
|
self._lora_scale = lora_scale
|
263
267
|
|
264
268
|
# dynamically adjust the LoRA scale
|
@@ -391,7 +395,7 @@ class StableDiffusionControlNetXSPipeline(
|
|
391
395
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
392
396
|
|
393
397
|
if self.text_encoder is not None:
|
394
|
-
if isinstance(self,
|
398
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
395
399
|
# Retrieve the original scale by scaling back the LoRA layers
|
396
400
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
397
401
|
|
@@ -34,8 +34,6 @@ from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, Te
|
|
34
34
|
from ...models import AutoencoderKL, ControlNetXSAdapter, UNet2DConditionModel, UNetControlNetXSModel
|
35
35
|
from ...models.attention_processor import (
|
36
36
|
AttnProcessor2_0,
|
37
|
-
LoRAAttnProcessor2_0,
|
38
|
-
LoRAXFormersAttnProcessor,
|
39
37
|
XFormersAttnProcessor,
|
40
38
|
)
|
41
39
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -678,8 +676,6 @@ class StableDiffusionXLControlNetXSPipeline(
|
|
678
676
|
(
|
679
677
|
AttnProcessor2_0,
|
680
678
|
XFormersAttnProcessor,
|
681
|
-
LoRAXFormersAttnProcessor,
|
682
|
-
LoRAAttnProcessor2_0,
|
683
679
|
),
|
684
680
|
)
|
685
681
|
# if xformers or torch_2_0 is used attention block does not need
|
@@ -7,7 +7,7 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
7
7
|
import torch
|
8
8
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
9
9
|
|
10
|
-
from ...loaders import
|
10
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
11
11
|
from ...models import UNet2DConditionModel
|
12
12
|
from ...schedulers import DDPMScheduler
|
13
13
|
from ...utils import (
|
@@ -84,7 +84,7 @@ EXAMPLE_DOC_STRING = """
|
|
84
84
|
"""
|
85
85
|
|
86
86
|
|
87
|
-
class IFPipeline(DiffusionPipeline,
|
87
|
+
class IFPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
88
88
|
tokenizer: T5Tokenizer
|
89
89
|
text_encoder: T5EncoderModel
|
90
90
|
|
@@ -9,7 +9,7 @@ import PIL.Image
|
|
9
9
|
import torch
|
10
10
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
11
11
|
|
12
|
-
from ...loaders import
|
12
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
13
13
|
from ...models import UNet2DConditionModel
|
14
14
|
from ...schedulers import DDPMScheduler
|
15
15
|
from ...utils import (
|
@@ -108,7 +108,7 @@ EXAMPLE_DOC_STRING = """
|
|
108
108
|
"""
|
109
109
|
|
110
110
|
|
111
|
-
class IFImg2ImgPipeline(DiffusionPipeline,
|
111
|
+
class IFImg2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
112
112
|
tokenizer: T5Tokenizer
|
113
113
|
text_encoder: T5EncoderModel
|
114
114
|
|
@@ -10,7 +10,7 @@ import torch
|
|
10
10
|
import torch.nn.functional as F
|
11
11
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
12
12
|
|
13
|
-
from ...loaders import
|
13
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
14
14
|
from ...models import UNet2DConditionModel
|
15
15
|
from ...schedulers import DDPMScheduler
|
16
16
|
from ...utils import (
|
@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """
|
|
111
111
|
"""
|
112
112
|
|
113
113
|
|
114
|
-
class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline,
|
114
|
+
class IFImg2ImgSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
115
115
|
tokenizer: T5Tokenizer
|
116
116
|
text_encoder: T5EncoderModel
|
117
117
|
|
@@ -9,7 +9,7 @@ import PIL.Image
|
|
9
9
|
import torch
|
10
10
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
11
11
|
|
12
|
-
from ...loaders import
|
12
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
13
13
|
from ...models import UNet2DConditionModel
|
14
14
|
from ...schedulers import DDPMScheduler
|
15
15
|
from ...utils import (
|
@@ -111,7 +111,7 @@ EXAMPLE_DOC_STRING = """
|
|
111
111
|
"""
|
112
112
|
|
113
113
|
|
114
|
-
class IFInpaintingPipeline(DiffusionPipeline,
|
114
|
+
class IFInpaintingPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
115
115
|
tokenizer: T5Tokenizer
|
116
116
|
text_encoder: T5EncoderModel
|
117
117
|
|
@@ -10,7 +10,7 @@ import torch
|
|
10
10
|
import torch.nn.functional as F
|
11
11
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
12
12
|
|
13
|
-
from ...loaders import
|
13
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
14
14
|
from ...models import UNet2DConditionModel
|
15
15
|
from ...schedulers import DDPMScheduler
|
16
16
|
from ...utils import (
|
@@ -113,7 +113,7 @@ EXAMPLE_DOC_STRING = """
|
|
113
113
|
"""
|
114
114
|
|
115
115
|
|
116
|
-
class IFInpaintingSuperResolutionPipeline(DiffusionPipeline,
|
116
|
+
class IFInpaintingSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
117
117
|
tokenizer: T5Tokenizer
|
118
118
|
text_encoder: T5EncoderModel
|
119
119
|
|
@@ -10,7 +10,7 @@ import torch
|
|
10
10
|
import torch.nn.functional as F
|
11
11
|
from transformers import CLIPImageProcessor, T5EncoderModel, T5Tokenizer
|
12
12
|
|
13
|
-
from ...loaders import
|
13
|
+
from ...loaders import StableDiffusionLoraLoaderMixin
|
14
14
|
from ...models import UNet2DConditionModel
|
15
15
|
from ...schedulers import DDPMScheduler
|
16
16
|
from ...utils import (
|
@@ -69,7 +69,7 @@ EXAMPLE_DOC_STRING = """
|
|
69
69
|
"""
|
70
70
|
|
71
71
|
|
72
|
-
class IFSuperResolutionPipeline(DiffusionPipeline,
|
72
|
+
class IFSuperResolutionPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
73
73
|
tokenizer: T5Tokenizer
|
74
74
|
text_encoder: T5EncoderModel
|
75
75
|
|
@@ -21,7 +21,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
|
|
21
21
|
|
22
22
|
from ....configuration_utils import FrozenDict
|
23
23
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
24
|
-
from ....loaders import
|
24
|
+
from ....loaders import (
|
25
|
+
FromSingleFileMixin,
|
26
|
+
IPAdapterMixin,
|
27
|
+
StableDiffusionLoraLoaderMixin,
|
28
|
+
TextualInversionLoaderMixin,
|
29
|
+
)
|
25
30
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
26
31
|
from ....models.lora import adjust_lora_scale_text_encoder
|
27
32
|
from ....schedulers import KarrasDiffusionSchedulers
|
@@ -137,7 +142,7 @@ class AltDiffusionPipeline(
|
|
137
142
|
DiffusionPipeline,
|
138
143
|
StableDiffusionMixin,
|
139
144
|
TextualInversionLoaderMixin,
|
140
|
-
|
145
|
+
StableDiffusionLoraLoaderMixin,
|
141
146
|
IPAdapterMixin,
|
142
147
|
FromSingleFileMixin,
|
143
148
|
):
|
@@ -149,8 +154,8 @@ class AltDiffusionPipeline(
|
|
149
154
|
|
150
155
|
The pipeline also inherits the following loading methods:
|
151
156
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
152
|
-
- [`~loaders.
|
153
|
-
- [`~loaders.
|
157
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
158
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
154
159
|
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
|
155
160
|
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
|
156
161
|
|
@@ -346,7 +351,7 @@ class AltDiffusionPipeline(
|
|
346
351
|
"""
|
347
352
|
# set lora scale so that monkey patched LoRA
|
348
353
|
# function of text encoder can correctly access it
|
349
|
-
if lora_scale is not None and isinstance(self,
|
354
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
350
355
|
self._lora_scale = lora_scale
|
351
356
|
|
352
357
|
# dynamically adjust the LoRA scale
|
@@ -478,7 +483,7 @@ class AltDiffusionPipeline(
|
|
478
483
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
479
484
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
480
485
|
|
481
|
-
if isinstance(self,
|
486
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
482
487
|
# Retrieve the original scale by scaling back the LoRA layers
|
483
488
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
484
489
|
|
@@ -23,7 +23,12 @@ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection, XLMR
|
|
23
23
|
|
24
24
|
from ....configuration_utils import FrozenDict
|
25
25
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
26
|
-
from ....loaders import
|
26
|
+
from ....loaders import (
|
27
|
+
FromSingleFileMixin,
|
28
|
+
IPAdapterMixin,
|
29
|
+
StableDiffusionLoraLoaderMixin,
|
30
|
+
TextualInversionLoaderMixin,
|
31
|
+
)
|
27
32
|
from ....models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
28
33
|
from ....models.lora import adjust_lora_scale_text_encoder
|
29
34
|
from ....schedulers import KarrasDiffusionSchedulers
|
@@ -178,7 +183,7 @@ class AltDiffusionImg2ImgPipeline(
|
|
178
183
|
StableDiffusionMixin,
|
179
184
|
TextualInversionLoaderMixin,
|
180
185
|
IPAdapterMixin,
|
181
|
-
|
186
|
+
StableDiffusionLoraLoaderMixin,
|
182
187
|
FromSingleFileMixin,
|
183
188
|
):
|
184
189
|
r"""
|
@@ -189,8 +194,8 @@ class AltDiffusionImg2ImgPipeline(
|
|
189
194
|
|
190
195
|
The pipeline also inherits the following loading methods:
|
191
196
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
192
|
-
- [`~loaders.
|
193
|
-
- [`~loaders.
|
197
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
198
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
194
199
|
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
|
195
200
|
- [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
|
196
201
|
|
@@ -386,7 +391,7 @@ class AltDiffusionImg2ImgPipeline(
|
|
386
391
|
"""
|
387
392
|
# set lora scale so that monkey patched LoRA
|
388
393
|
# function of text encoder can correctly access it
|
389
|
-
if lora_scale is not None and isinstance(self,
|
394
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
390
395
|
self._lora_scale = lora_scale
|
391
396
|
|
392
397
|
# dynamically adjust the LoRA scale
|
@@ -518,7 +523,7 @@ class AltDiffusionImg2ImgPipeline(
|
|
518
523
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
519
524
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
520
525
|
|
521
|
-
if isinstance(self,
|
526
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
522
527
|
# Retrieve the original scale by scaling back the LoRA layers
|
523
528
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
524
529
|
|
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
23
23
|
|
24
24
|
from ....configuration_utils import FrozenDict
|
25
25
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
26
|
-
from ....loaders import
|
26
|
+
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
27
27
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
28
28
|
from ....models.lora import adjust_lora_scale_text_encoder
|
29
29
|
from ....schedulers import DDIMScheduler
|
@@ -136,7 +136,7 @@ def compute_noise(scheduler, prev_latents, latents, timestep, noise_pred, eta):
|
|
136
136
|
return noise
|
137
137
|
|
138
138
|
|
139
|
-
class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin,
|
139
|
+
class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin):
|
140
140
|
r"""
|
141
141
|
Pipeline for text-guided image to image generation using Stable Diffusion.
|
142
142
|
|
@@ -145,8 +145,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
145
145
|
|
146
146
|
The pipeline also inherits the following loading methods:
|
147
147
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
148
|
-
- [`~loaders.
|
149
|
-
- [`~loaders.
|
148
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
149
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
150
150
|
|
151
151
|
Args:
|
152
152
|
vae ([`AutoencoderKL`]):
|
@@ -324,7 +324,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
324
324
|
"""
|
325
325
|
# set lora scale so that monkey patched LoRA
|
326
326
|
# function of text encoder can correctly access it
|
327
|
-
if lora_scale is not None and isinstance(self,
|
327
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
328
328
|
self._lora_scale = lora_scale
|
329
329
|
|
330
330
|
# dynamically adjust the LoRA scale
|
@@ -457,7 +457,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
|
|
457
457
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
458
458
|
|
459
459
|
if self.text_encoder is not None:
|
460
|
-
if isinstance(self,
|
460
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
461
461
|
# Retrieve the original scale by scaling back the LoRA layers
|
462
462
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
463
463
|
|
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py
CHANGED
@@ -23,7 +23,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
|
23
23
|
|
24
24
|
from ....configuration_utils import FrozenDict
|
25
25
|
from ....image_processor import VaeImageProcessor
|
26
|
-
from ....loaders import FromSingleFileMixin,
|
26
|
+
from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
27
27
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
28
28
|
from ....models.lora import adjust_lora_scale_text_encoder
|
29
29
|
from ....schedulers import KarrasDiffusionSchedulers
|
@@ -79,7 +79,7 @@ def preprocess_mask(mask, batch_size, scale_factor=8):
|
|
79
79
|
|
80
80
|
|
81
81
|
class StableDiffusionInpaintPipelineLegacy(
|
82
|
-
DiffusionPipeline, TextualInversionLoaderMixin,
|
82
|
+
DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin, FromSingleFileMixin
|
83
83
|
):
|
84
84
|
r"""
|
85
85
|
Pipeline for text-guided image inpainting using Stable Diffusion. *This is an experimental feature*.
|
@@ -89,11 +89,11 @@ class StableDiffusionInpaintPipelineLegacy(
|
|
89
89
|
|
90
90
|
In addition the pipeline inherits the following loading methods:
|
91
91
|
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
92
|
-
- *LoRA*: [`loaders.
|
92
|
+
- *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`]
|
93
93
|
- *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
|
94
94
|
|
95
95
|
as well as the following saving methods:
|
96
|
-
- *LoRA*: [`loaders.
|
96
|
+
- *LoRA*: [`loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`]
|
97
97
|
|
98
98
|
Args:
|
99
99
|
vae ([`AutoencoderKL`]):
|
@@ -294,7 +294,7 @@ class StableDiffusionInpaintPipelineLegacy(
|
|
294
294
|
"""
|
295
295
|
# set lora scale so that monkey patched LoRA
|
296
296
|
# function of text encoder can correctly access it
|
297
|
-
if lora_scale is not None and isinstance(self,
|
297
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
298
298
|
self._lora_scale = lora_scale
|
299
299
|
|
300
300
|
# dynamically adjust the LoRA scale
|
@@ -427,7 +427,7 @@ class StableDiffusionInpaintPipelineLegacy(
|
|
427
427
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
428
428
|
|
429
429
|
if self.text_encoder is not None:
|
430
|
-
if isinstance(self,
|
430
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
431
431
|
# Retrieve the original scale by scaling back the LoRA layers
|
432
432
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
433
433
|
|
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py
CHANGED
@@ -16,10 +16,10 @@ import inspect
|
|
16
16
|
from typing import Any, Callable, Dict, List, Optional, Union
|
17
17
|
|
18
18
|
import torch
|
19
|
-
from transformers import
|
19
|
+
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
20
20
|
|
21
21
|
from ....image_processor import VaeImageProcessor
|
22
|
-
from ....loaders import
|
22
|
+
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
23
23
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
24
24
|
from ....models.lora import adjust_lora_scale_text_encoder
|
25
25
|
from ....schedulers import PNDMScheduler
|
@@ -37,7 +37,7 @@ AUGS_CONST = ["A photo of ", "An image of ", "A picture of "]
|
|
37
37
|
|
38
38
|
|
39
39
|
class StableDiffusionModelEditingPipeline(
|
40
|
-
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin,
|
40
|
+
DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
|
41
41
|
):
|
42
42
|
r"""
|
43
43
|
Pipeline for text-to-image model editing.
|
@@ -47,8 +47,8 @@ class StableDiffusionModelEditingPipeline(
|
|
47
47
|
|
48
48
|
The pipeline also inherits the following loading methods:
|
49
49
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
50
|
-
- [`~loaders.
|
51
|
-
- [`~loaders.
|
50
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
51
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
52
52
|
|
53
53
|
Args:
|
54
54
|
vae ([`AutoencoderKL`]):
|
@@ -66,8 +66,8 @@ class StableDiffusionModelEditingPipeline(
|
|
66
66
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
67
67
|
Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
|
68
68
|
about a model's potential harms.
|
69
|
-
feature_extractor ([`~transformers.
|
70
|
-
A `
|
69
|
+
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
70
|
+
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
71
71
|
with_to_k ([`bool`]):
|
72
72
|
Whether to edit the key projection matrices along with the value projection matrices.
|
73
73
|
with_augs ([`list`]):
|
@@ -86,7 +86,7 @@ class StableDiffusionModelEditingPipeline(
|
|
86
86
|
unet: UNet2DConditionModel,
|
87
87
|
scheduler: SchedulerMixin,
|
88
88
|
safety_checker: StableDiffusionSafetyChecker,
|
89
|
-
feature_extractor:
|
89
|
+
feature_extractor: CLIPImageProcessor,
|
90
90
|
requires_safety_checker: bool = True,
|
91
91
|
with_to_k: bool = True,
|
92
92
|
with_augs: list = AUGS_CONST,
|
@@ -232,7 +232,7 @@ class StableDiffusionModelEditingPipeline(
|
|
232
232
|
"""
|
233
233
|
# set lora scale so that monkey patched LoRA
|
234
234
|
# function of text encoder can correctly access it
|
235
|
-
if lora_scale is not None and isinstance(self,
|
235
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
236
236
|
self._lora_scale = lora_scale
|
237
237
|
|
238
238
|
# dynamically adjust the LoRA scale
|
@@ -365,7 +365,7 @@ class StableDiffusionModelEditingPipeline(
|
|
365
365
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
366
366
|
|
367
367
|
if self.text_encoder is not None:
|
368
|
-
if isinstance(self,
|
368
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
369
369
|
# Retrieve the original scale by scaling back the LoRA layers
|
370
370
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
371
371
|
|
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py
CHANGED
@@ -19,7 +19,7 @@ import torch
|
|
19
19
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
20
20
|
|
21
21
|
from ....image_processor import VaeImageProcessor
|
22
|
-
from ....loaders import FromSingleFileMixin,
|
22
|
+
from ....loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
23
23
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
24
24
|
from ....models.lora import adjust_lora_scale_text_encoder
|
25
25
|
from ....schedulers import KarrasDiffusionSchedulers
|
@@ -63,7 +63,11 @@ EXAMPLE_DOC_STRING = """
|
|
63
63
|
|
64
64
|
|
65
65
|
class StableDiffusionParadigmsPipeline(
|
66
|
-
DiffusionPipeline,
|
66
|
+
DiffusionPipeline,
|
67
|
+
StableDiffusionMixin,
|
68
|
+
TextualInversionLoaderMixin,
|
69
|
+
StableDiffusionLoraLoaderMixin,
|
70
|
+
FromSingleFileMixin,
|
67
71
|
):
|
68
72
|
r"""
|
69
73
|
Pipeline for text-to-image generation using a parallelized version of Stable Diffusion.
|
@@ -73,8 +77,8 @@ class StableDiffusionParadigmsPipeline(
|
|
73
77
|
|
74
78
|
The pipeline also inherits the following loading methods:
|
75
79
|
- [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
|
76
|
-
- [`~loaders.
|
77
|
-
- [`~loaders.
|
80
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
|
81
|
+
- [`~loaders.StableDiffusionLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
|
78
82
|
- [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
|
79
83
|
|
80
84
|
Args:
|
@@ -223,7 +227,7 @@ class StableDiffusionParadigmsPipeline(
|
|
223
227
|
"""
|
224
228
|
# set lora scale so that monkey patched LoRA
|
225
229
|
# function of text encoder can correctly access it
|
226
|
-
if lora_scale is not None and isinstance(self,
|
230
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
227
231
|
self._lora_scale = lora_scale
|
228
232
|
|
229
233
|
# dynamically adjust the LoRA scale
|
@@ -356,7 +360,7 @@ class StableDiffusionParadigmsPipeline(
|
|
356
360
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
357
361
|
|
358
362
|
if self.text_encoder is not None:
|
359
|
-
if isinstance(self,
|
363
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
360
364
|
# Retrieve the original scale by scaling back the LoRA layers
|
361
365
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
362
366
|
|
diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py
CHANGED
@@ -29,7 +29,7 @@ from transformers import (
|
|
29
29
|
)
|
30
30
|
|
31
31
|
from ....image_processor import PipelineImageInput, VaeImageProcessor
|
32
|
-
from ....loaders import
|
32
|
+
from ....loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
33
33
|
from ....models import AutoencoderKL, UNet2DConditionModel
|
34
34
|
from ....models.attention_processor import Attention
|
35
35
|
from ....models.lora import adjust_lora_scale_text_encoder
|
@@ -446,7 +446,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
|
|
446
446
|
"""
|
447
447
|
# set lora scale so that monkey patched LoRA
|
448
448
|
# function of text encoder can correctly access it
|
449
|
-
if lora_scale is not None and isinstance(self,
|
449
|
+
if lora_scale is not None and isinstance(self, StableDiffusionLoraLoaderMixin):
|
450
450
|
self._lora_scale = lora_scale
|
451
451
|
|
452
452
|
# dynamically adjust the LoRA scale
|
@@ -579,7 +579,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
|
|
579
579
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
580
580
|
|
581
581
|
if self.text_encoder is not None:
|
582
|
-
if isinstance(self,
|
582
|
+
if isinstance(self, StableDiffusionLoraLoaderMixin) and USE_PEFT_BACKEND:
|
583
583
|
# Retrieve the original scale by scaling back the LoRA layers
|
584
584
|
unscale_lora_layers(self.text_encoder, lora_scale)
|
585
585
|
|
@@ -837,7 +837,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
|
|
837
837
|
|
838
838
|
def fn_recursive_add_processors(name: str, module: torch.nn.Module, processors: Dict[str, AttentionProcessor]):
|
839
839
|
if hasattr(module, "get_processor"):
|
840
|
-
processors[f"{name}.processor"] = module.get_processor(
|
840
|
+
processors[f"{name}.processor"] = module.get_processor()
|
841
841
|
|
842
842
|
for sub_name, child in module.named_children():
|
843
843
|
fn_recursive_add_processors(f"{name}.{sub_name}", child, processors)
|