diffusers 0.28.2__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +9 -1
- diffusers/commands/env.py +1 -5
- diffusers/dependency_versions_table.py +1 -1
- diffusers/image_processor.py +2 -1
- diffusers/loaders/__init__.py +2 -2
- diffusers/loaders/lora.py +406 -140
- diffusers/loaders/lora_conversion_utils.py +7 -1
- diffusers/loaders/single_file.py +1 -1
- diffusers/loaders/single_file_model.py +5 -0
- diffusers/loaders/single_file_utils.py +242 -2
- diffusers/loaders/unet.py +307 -272
- diffusers/models/__init__.py +5 -3
- diffusers/models/attention.py +125 -1
- diffusers/models/attention_processor.py +169 -1
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +1 -1
- diffusers/models/autoencoders/autoencoder_kl.py +17 -6
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -2
- diffusers/models/autoencoders/consistency_decoder_vae.py +9 -9
- diffusers/models/autoencoders/vq_model.py +182 -0
- diffusers/models/controlnet_xs.py +6 -6
- diffusers/models/embeddings.py +112 -84
- diffusers/models/model_loading_utils.py +55 -0
- diffusers/models/modeling_utils.py +128 -17
- diffusers/models/normalization.py +11 -6
- diffusers/models/transformers/__init__.py +1 -0
- diffusers/models/transformers/dual_transformer_2d.py +5 -4
- diffusers/models/transformers/hunyuan_transformer_2d.py +149 -2
- diffusers/models/transformers/prior_transformer.py +5 -5
- diffusers/models/transformers/transformer_2d.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +344 -0
- diffusers/models/transformers/transformer_temporal.py +12 -10
- diffusers/models/unets/unet_1d.py +3 -3
- diffusers/models/unets/unet_2d.py +3 -3
- diffusers/models/unets/unet_2d_condition.py +4 -15
- diffusers/models/unets/unet_3d_condition.py +5 -17
- diffusers/models/unets/unet_i2vgen_xl.py +4 -4
- diffusers/models/unets/unet_motion_model.py +4 -4
- diffusers/models/unets/unet_spatio_temporal_condition.py +3 -3
- diffusers/models/vq_model.py +8 -165
- diffusers/pipelines/__init__.py +2 -0
- diffusers/pipelines/animatediff/pipeline_animatediff.py +4 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +4 -3
- diffusers/pipelines/controlnet/pipeline_controlnet.py +4 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +4 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +4 -3
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +4 -3
- diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +4 -3
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +4 -3
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +4 -3
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +4 -3
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +4 -3
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +24 -5
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +4 -3
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +4 -3
- diffusers/pipelines/marigold/marigold_image_processing.py +35 -20
- diffusers/pipelines/pia/pipeline_pia.py +4 -3
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +17 -17
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +7 -6
- diffusers/pipelines/stable_diffusion_3/__init__.py +52 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +886 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +923 -0
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +4 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +10 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +4 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +4 -3
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +4 -3
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +4 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +4 -3
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +4 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +4 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +4 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +4 -3
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +4 -3
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +4 -3
- diffusers/schedulers/__init__.py +2 -0
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -3
- diffusers/schedulers/scheduling_edm_euler.py +2 -4
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +287 -0
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/training_utils.py +4 -4
- diffusers/utils/__init__.py +3 -0
- diffusers/utils/constants.py +2 -0
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +30 -0
- diffusers/utils/dynamic_modules_utils.py +15 -13
- diffusers/utils/hub_utils.py +106 -0
- diffusers/utils/import_utils.py +0 -1
- diffusers/utils/logging.py +3 -1
- diffusers/utils/state_dict_utils.py +2 -0
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/METADATA +45 -45
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/RECORD +108 -111
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/WHEEL +1 -1
- diffusers/models/dual_transformer_2d.py +0 -20
- diffusers/models/prior_transformer.py +0 -12
- diffusers/models/t5_film_transformer.py +0 -70
- diffusers/models/transformer_2d.py +0 -25
- diffusers/models/transformer_temporal.py +0 -34
- diffusers/models/unet_1d.py +0 -26
- diffusers/models/unet_1d_blocks.py +0 -203
- diffusers/models/unet_2d.py +0 -27
- diffusers/models/unet_2d_blocks.py +0 -375
- diffusers/models/unet_2d_condition.py +0 -25
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/LICENSE +0 -0
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/top_level.txt +0 -0
@@ -376,6 +376,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
376
376
|
|
377
377
|
# 2. Define call parameters
|
378
378
|
batch_size = 1 if isinstance(prompt, str) else len(prompt)
|
379
|
+
device = self._execution_device
|
379
380
|
|
380
381
|
if editing_prompt:
|
381
382
|
enable_edit_guidance = True
|
@@ -405,7 +406,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
405
406
|
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
|
406
407
|
)
|
407
408
|
text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length]
|
408
|
-
text_embeddings = self.text_encoder(text_input_ids.to(
|
409
|
+
text_embeddings = self.text_encoder(text_input_ids.to(device))[0]
|
409
410
|
|
410
411
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
411
412
|
bs_embed, seq_len, _ = text_embeddings.shape
|
@@ -433,9 +434,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
433
434
|
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
|
434
435
|
)
|
435
436
|
edit_concepts_input_ids = edit_concepts_input_ids[:, : self.tokenizer.model_max_length]
|
436
|
-
edit_concepts = self.text_encoder(edit_concepts_input_ids.to(
|
437
|
+
edit_concepts = self.text_encoder(edit_concepts_input_ids.to(device))[0]
|
437
438
|
else:
|
438
|
-
edit_concepts = editing_prompt_embeddings.to(
|
439
|
+
edit_concepts = editing_prompt_embeddings.to(device).repeat(batch_size, 1, 1)
|
439
440
|
|
440
441
|
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
441
442
|
bs_embed_edit, seq_len_edit, _ = edit_concepts.shape
|
@@ -476,7 +477,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
476
477
|
truncation=True,
|
477
478
|
return_tensors="pt",
|
478
479
|
)
|
479
|
-
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(
|
480
|
+
uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0]
|
480
481
|
|
481
482
|
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
|
482
483
|
seq_len = uncond_embeddings.shape[1]
|
@@ -493,7 +494,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
493
494
|
# get the initial random noise unless the user supplied it
|
494
495
|
|
495
496
|
# 4. Prepare timesteps
|
496
|
-
self.scheduler.set_timesteps(num_inference_steps, device=
|
497
|
+
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
497
498
|
timesteps = self.scheduler.timesteps
|
498
499
|
|
499
500
|
# 5. Prepare latent variables
|
@@ -504,7 +505,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
504
505
|
height,
|
505
506
|
width,
|
506
507
|
text_embeddings.dtype,
|
507
|
-
|
508
|
+
device,
|
508
509
|
generator,
|
509
510
|
latents,
|
510
511
|
)
|
@@ -562,12 +563,12 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
562
563
|
if enable_edit_guidance:
|
563
564
|
concept_weights = torch.zeros(
|
564
565
|
(len(noise_pred_edit_concepts), noise_guidance.shape[0]),
|
565
|
-
device=
|
566
|
+
device=device,
|
566
567
|
dtype=noise_guidance.dtype,
|
567
568
|
)
|
568
569
|
noise_guidance_edit = torch.zeros(
|
569
570
|
(len(noise_pred_edit_concepts), *noise_guidance.shape),
|
570
|
-
device=
|
571
|
+
device=device,
|
571
572
|
dtype=noise_guidance.dtype,
|
572
573
|
)
|
573
574
|
# noise_guidance_edit = torch.zeros_like(noise_guidance)
|
@@ -644,21 +645,19 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
644
645
|
|
645
646
|
# noise_guidance_edit = noise_guidance_edit + noise_guidance_edit_tmp
|
646
647
|
|
647
|
-
warmup_inds = torch.tensor(warmup_inds).to(
|
648
|
+
warmup_inds = torch.tensor(warmup_inds).to(device)
|
648
649
|
if len(noise_pred_edit_concepts) > warmup_inds.shape[0] > 0:
|
649
650
|
concept_weights = concept_weights.to("cpu") # Offload to cpu
|
650
651
|
noise_guidance_edit = noise_guidance_edit.to("cpu")
|
651
652
|
|
652
|
-
concept_weights_tmp = torch.index_select(concept_weights.to(
|
653
|
+
concept_weights_tmp = torch.index_select(concept_weights.to(device), 0, warmup_inds)
|
653
654
|
concept_weights_tmp = torch.where(
|
654
655
|
concept_weights_tmp < 0, torch.zeros_like(concept_weights_tmp), concept_weights_tmp
|
655
656
|
)
|
656
657
|
concept_weights_tmp = concept_weights_tmp / concept_weights_tmp.sum(dim=0)
|
657
658
|
# concept_weights_tmp = torch.nan_to_num(concept_weights_tmp)
|
658
659
|
|
659
|
-
noise_guidance_edit_tmp = torch.index_select(
|
660
|
-
noise_guidance_edit.to(self.device), 0, warmup_inds
|
661
|
-
)
|
660
|
+
noise_guidance_edit_tmp = torch.index_select(noise_guidance_edit.to(device), 0, warmup_inds)
|
662
661
|
noise_guidance_edit_tmp = torch.einsum(
|
663
662
|
"cb,cbijk->bijk", concept_weights_tmp, noise_guidance_edit_tmp
|
664
663
|
)
|
@@ -669,8 +668,8 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
669
668
|
|
670
669
|
del noise_guidance_edit_tmp
|
671
670
|
del concept_weights_tmp
|
672
|
-
concept_weights = concept_weights.to(
|
673
|
-
noise_guidance_edit = noise_guidance_edit.to(
|
671
|
+
concept_weights = concept_weights.to(device)
|
672
|
+
noise_guidance_edit = noise_guidance_edit.to(device)
|
674
673
|
|
675
674
|
concept_weights = torch.where(
|
676
675
|
concept_weights < 0, torch.zeros_like(concept_weights), concept_weights
|
@@ -679,6 +678,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
679
678
|
concept_weights = torch.nan_to_num(concept_weights)
|
680
679
|
|
681
680
|
noise_guidance_edit = torch.einsum("cb,cbijk->bijk", concept_weights, noise_guidance_edit)
|
681
|
+
noise_guidance_edit = noise_guidance_edit.to(edit_momentum.device)
|
682
682
|
|
683
683
|
noise_guidance_edit = noise_guidance_edit + edit_momentum_scale * edit_momentum
|
684
684
|
|
@@ -689,7 +689,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
689
689
|
self.sem_guidance[i] = noise_guidance_edit.detach().cpu()
|
690
690
|
|
691
691
|
if sem_guidance is not None:
|
692
|
-
edit_guidance = sem_guidance[i].to(
|
692
|
+
edit_guidance = sem_guidance[i].to(device)
|
693
693
|
noise_guidance = noise_guidance + edit_guidance
|
694
694
|
|
695
695
|
noise_pred = noise_pred_uncond + noise_guidance
|
@@ -705,7 +705,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
705
705
|
# 8. Post-processing
|
706
706
|
if not output_type == "latent":
|
707
707
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
708
|
-
image, has_nsfw_concept = self.run_safety_checker(image,
|
708
|
+
image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)
|
709
709
|
else:
|
710
710
|
image = latents
|
711
711
|
has_nsfw_concept = None
|
@@ -474,9 +474,10 @@ class StableDiffusionPipeline(
|
|
474
474
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
475
475
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
476
476
|
|
477
|
-
if
|
478
|
-
|
479
|
-
|
477
|
+
if self.text_encoder is not None:
|
478
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
479
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
480
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
480
481
|
|
481
482
|
return prompt_embeds, negative_prompt_embeds
|
482
483
|
|
@@ -357,9 +357,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
357
357
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
358
358
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
359
359
|
|
360
|
-
if
|
361
|
-
|
362
|
-
|
360
|
+
if self.text_encoder is not None:
|
361
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
362
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
363
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
363
364
|
|
364
365
|
return prompt_embeds, negative_prompt_embeds
|
365
366
|
|
@@ -545,7 +546,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
545
546
|
|
546
547
|
if depth_map is None:
|
547
548
|
pixel_values = self.feature_extractor(images=image, return_tensors="pt").pixel_values
|
548
|
-
pixel_values = pixel_values.to(device=device)
|
549
|
+
pixel_values = pixel_values.to(device=device, dtype=dtype)
|
549
550
|
# The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
|
550
551
|
# So we use `torch.autocast` here for half precision inference.
|
551
552
|
if torch.backends.mps.is_available():
|
@@ -517,9 +517,10 @@ class StableDiffusionImg2ImgPipeline(
|
|
517
517
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
518
518
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
519
519
|
|
520
|
-
if
|
521
|
-
|
522
|
-
|
520
|
+
if self.text_encoder is not None:
|
521
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
522
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
523
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
523
524
|
|
524
525
|
return prompt_embeds, negative_prompt_embeds
|
525
526
|
|
@@ -589,9 +589,10 @@ class StableDiffusionInpaintPipeline(
|
|
589
589
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
590
590
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
591
591
|
|
592
|
-
if
|
593
|
-
|
594
|
-
|
592
|
+
if self.text_encoder is not None:
|
593
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
594
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
595
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
595
596
|
|
596
597
|
return prompt_embeds, negative_prompt_embeds
|
597
598
|
|
@@ -377,9 +377,10 @@ class StableDiffusionUpscalePipeline(
|
|
377
377
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
378
378
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
379
379
|
|
380
|
-
if
|
381
|
-
|
382
|
-
|
380
|
+
if self.text_encoder is not None:
|
381
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
382
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
383
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
383
384
|
|
384
385
|
return prompt_embeds, negative_prompt_embeds
|
385
386
|
|
@@ -458,9 +458,10 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
|
|
458
458
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
459
459
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
460
460
|
|
461
|
-
if
|
462
|
-
|
463
|
-
|
461
|
+
if self.text_encoder is not None:
|
462
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
463
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
464
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
464
465
|
|
465
466
|
return prompt_embeds, negative_prompt_embeds
|
466
467
|
|
@@ -51,8 +51,8 @@ EXAMPLE_DOC_STRING = """
|
|
51
51
|
>>> from diffusers import StableUnCLIPImg2ImgPipeline
|
52
52
|
|
53
53
|
>>> pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
54
|
-
... "
|
55
|
-
... )
|
54
|
+
... "stabilityai/stable-diffusion-2-1-unclip-small", torch_dtype=torch.float16
|
55
|
+
... )
|
56
56
|
>>> pipe = pipe.to("cuda")
|
57
57
|
|
58
58
|
>>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
|
@@ -63,7 +63,7 @@ EXAMPLE_DOC_STRING = """
|
|
63
63
|
|
64
64
|
>>> prompt = "A fantasy landscape, trending on artstation"
|
65
65
|
|
66
|
-
>>> images = pipe(
|
66
|
+
>>> images = pipe(init_image, prompt).images
|
67
67
|
>>> images[0].save("fantasy_landscape.png")
|
68
68
|
```
|
69
69
|
"""
|
@@ -422,9 +422,10 @@ class StableUnCLIPImg2ImgPipeline(
|
|
422
422
|
negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
423
423
|
negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
424
424
|
|
425
|
-
if
|
426
|
-
|
427
|
-
|
425
|
+
if self.text_encoder is not None:
|
426
|
+
if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
|
427
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
428
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
428
429
|
|
429
430
|
return prompt_embeds, negative_prompt_embeds
|
430
431
|
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_flax_available,
|
9
|
+
is_torch_available,
|
10
|
+
is_transformers_available,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
_dummy_objects = {}
|
15
|
+
_additional_imports = {}
|
16
|
+
_import_structure = {"pipeline_output": ["StableDiffusion3PipelineOutput"]}
|
17
|
+
|
18
|
+
try:
|
19
|
+
if not (is_transformers_available() and is_torch_available()):
|
20
|
+
raise OptionalDependencyNotAvailable()
|
21
|
+
except OptionalDependencyNotAvailable:
|
22
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
23
|
+
|
24
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
25
|
+
else:
|
26
|
+
_import_structure["pipeline_stable_diffusion_3"] = ["StableDiffusion3Pipeline"]
|
27
|
+
_import_structure["pipeline_stable_diffusion_3_img2img"] = ["StableDiffusion3Img2ImgPipeline"]
|
28
|
+
|
29
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
30
|
+
try:
|
31
|
+
if not (is_transformers_available() and is_torch_available()):
|
32
|
+
raise OptionalDependencyNotAvailable()
|
33
|
+
except OptionalDependencyNotAvailable:
|
34
|
+
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
35
|
+
else:
|
36
|
+
from .pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
|
37
|
+
from .pipeline_stable_diffusion_3_img2img import StableDiffusion3Img2ImgPipeline
|
38
|
+
|
39
|
+
else:
|
40
|
+
import sys
|
41
|
+
|
42
|
+
sys.modules[__name__] = _LazyModule(
|
43
|
+
__name__,
|
44
|
+
globals()["__file__"],
|
45
|
+
_import_structure,
|
46
|
+
module_spec=__spec__,
|
47
|
+
)
|
48
|
+
|
49
|
+
for name, value in _dummy_objects.items():
|
50
|
+
setattr(sys.modules[__name__], name, value)
|
51
|
+
for name, value in _additional_imports.items():
|
52
|
+
setattr(sys.modules[__name__], name, value)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
from typing import List, Union
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import PIL.Image
|
6
|
+
|
7
|
+
from ...utils import BaseOutput
|
8
|
+
|
9
|
+
|
10
|
+
@dataclass
|
11
|
+
class StableDiffusion3PipelineOutput(BaseOutput):
|
12
|
+
"""
|
13
|
+
Output class for Stable Diffusion pipelines.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
images (`List[PIL.Image.Image]` or `np.ndarray`)
|
17
|
+
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
|
18
|
+
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
|
19
|
+
"""
|
20
|
+
|
21
|
+
images: Union[List[PIL.Image.Image], np.ndarray]
|