diffusers 0.28.2__py3-none-any.whl → 0.29.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. diffusers/__init__.py +9 -1
  2. diffusers/commands/env.py +1 -5
  3. diffusers/dependency_versions_table.py +1 -1
  4. diffusers/image_processor.py +2 -1
  5. diffusers/loaders/__init__.py +2 -2
  6. diffusers/loaders/lora.py +406 -140
  7. diffusers/loaders/lora_conversion_utils.py +7 -1
  8. diffusers/loaders/single_file.py +1 -1
  9. diffusers/loaders/single_file_model.py +5 -0
  10. diffusers/loaders/single_file_utils.py +242 -2
  11. diffusers/loaders/unet.py +307 -272
  12. diffusers/models/__init__.py +5 -3
  13. diffusers/models/attention.py +125 -1
  14. diffusers/models/attention_processor.py +169 -1
  15. diffusers/models/autoencoders/__init__.py +1 -0
  16. diffusers/models/autoencoders/autoencoder_asym_kl.py +1 -1
  17. diffusers/models/autoencoders/autoencoder_kl.py +17 -6
  18. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -2
  19. diffusers/models/autoencoders/consistency_decoder_vae.py +9 -9
  20. diffusers/models/autoencoders/vq_model.py +182 -0
  21. diffusers/models/controlnet_xs.py +6 -6
  22. diffusers/models/embeddings.py +112 -84
  23. diffusers/models/model_loading_utils.py +55 -0
  24. diffusers/models/modeling_utils.py +128 -17
  25. diffusers/models/normalization.py +11 -6
  26. diffusers/models/transformers/__init__.py +1 -0
  27. diffusers/models/transformers/dual_transformer_2d.py +5 -4
  28. diffusers/models/transformers/hunyuan_transformer_2d.py +149 -2
  29. diffusers/models/transformers/prior_transformer.py +5 -5
  30. diffusers/models/transformers/transformer_2d.py +2 -2
  31. diffusers/models/transformers/transformer_sd3.py +344 -0
  32. diffusers/models/transformers/transformer_temporal.py +12 -10
  33. diffusers/models/unets/unet_1d.py +3 -3
  34. diffusers/models/unets/unet_2d.py +3 -3
  35. diffusers/models/unets/unet_2d_condition.py +4 -15
  36. diffusers/models/unets/unet_3d_condition.py +5 -17
  37. diffusers/models/unets/unet_i2vgen_xl.py +4 -4
  38. diffusers/models/unets/unet_motion_model.py +4 -4
  39. diffusers/models/unets/unet_spatio_temporal_condition.py +3 -3
  40. diffusers/models/vq_model.py +8 -165
  41. diffusers/pipelines/__init__.py +2 -0
  42. diffusers/pipelines/animatediff/pipeline_animatediff.py +4 -3
  43. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +4 -3
  44. diffusers/pipelines/controlnet/pipeline_controlnet.py +4 -3
  45. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +4 -3
  46. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +4 -3
  47. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +4 -3
  48. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  49. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +4 -3
  50. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +4 -3
  51. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +4 -3
  52. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +4 -3
  53. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +4 -3
  54. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +24 -5
  55. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +4 -3
  56. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +4 -3
  57. diffusers/pipelines/marigold/marigold_image_processing.py +35 -20
  58. diffusers/pipelines/pia/pipeline_pia.py +4 -3
  59. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +1 -1
  60. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +1 -1
  61. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +17 -17
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +4 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -4
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +4 -3
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -3
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +4 -3
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +4 -3
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +7 -6
  69. diffusers/pipelines/stable_diffusion_3/__init__.py +52 -0
  70. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  71. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +886 -0
  72. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +923 -0
  73. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +4 -3
  74. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +10 -11
  75. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +4 -3
  76. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +4 -3
  77. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +4 -3
  78. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +4 -3
  79. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +4 -3
  80. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +4 -3
  81. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +4 -3
  82. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +4 -3
  83. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +4 -3
  84. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +4 -3
  85. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  86. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +4 -3
  87. diffusers/schedulers/__init__.py +2 -0
  88. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  89. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -3
  90. diffusers/schedulers/scheduling_edm_euler.py +2 -4
  91. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +287 -0
  92. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  93. diffusers/training_utils.py +4 -4
  94. diffusers/utils/__init__.py +3 -0
  95. diffusers/utils/constants.py +2 -0
  96. diffusers/utils/dummy_pt_objects.py +30 -0
  97. diffusers/utils/dummy_torch_and_transformers_objects.py +30 -0
  98. diffusers/utils/dynamic_modules_utils.py +15 -13
  99. diffusers/utils/hub_utils.py +106 -0
  100. diffusers/utils/import_utils.py +0 -1
  101. diffusers/utils/logging.py +3 -1
  102. diffusers/utils/state_dict_utils.py +2 -0
  103. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/METADATA +45 -45
  104. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/RECORD +108 -111
  105. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/WHEEL +1 -1
  106. diffusers/models/dual_transformer_2d.py +0 -20
  107. diffusers/models/prior_transformer.py +0 -12
  108. diffusers/models/t5_film_transformer.py +0 -70
  109. diffusers/models/transformer_2d.py +0 -25
  110. diffusers/models/transformer_temporal.py +0 -34
  111. diffusers/models/unet_1d.py +0 -26
  112. diffusers/models/unet_1d_blocks.py +0 -203
  113. diffusers/models/unet_2d.py +0 -27
  114. diffusers/models/unet_2d_blocks.py +0 -375
  115. diffusers/models/unet_2d_condition.py +0 -25
  116. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/LICENSE +0 -0
  117. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/entry_points.txt +0 -0
  118. {diffusers-0.28.2.dist-info → diffusers-0.29.0.dist-info}/top_level.txt +0 -0
@@ -376,6 +376,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
376
376
 
377
377
  # 2. Define call parameters
378
378
  batch_size = 1 if isinstance(prompt, str) else len(prompt)
379
+ device = self._execution_device
379
380
 
380
381
  if editing_prompt:
381
382
  enable_edit_guidance = True
@@ -405,7 +406,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
405
406
  f" {self.tokenizer.model_max_length} tokens: {removed_text}"
406
407
  )
407
408
  text_input_ids = text_input_ids[:, : self.tokenizer.model_max_length]
408
- text_embeddings = self.text_encoder(text_input_ids.to(self.device))[0]
409
+ text_embeddings = self.text_encoder(text_input_ids.to(device))[0]
409
410
 
410
411
  # duplicate text embeddings for each generation per prompt, using mps friendly method
411
412
  bs_embed, seq_len, _ = text_embeddings.shape
@@ -433,9 +434,9 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
433
434
  f" {self.tokenizer.model_max_length} tokens: {removed_text}"
434
435
  )
435
436
  edit_concepts_input_ids = edit_concepts_input_ids[:, : self.tokenizer.model_max_length]
436
- edit_concepts = self.text_encoder(edit_concepts_input_ids.to(self.device))[0]
437
+ edit_concepts = self.text_encoder(edit_concepts_input_ids.to(device))[0]
437
438
  else:
438
- edit_concepts = editing_prompt_embeddings.to(self.device).repeat(batch_size, 1, 1)
439
+ edit_concepts = editing_prompt_embeddings.to(device).repeat(batch_size, 1, 1)
439
440
 
440
441
  # duplicate text embeddings for each generation per prompt, using mps friendly method
441
442
  bs_embed_edit, seq_len_edit, _ = edit_concepts.shape
@@ -476,7 +477,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
476
477
  truncation=True,
477
478
  return_tensors="pt",
478
479
  )
479
- uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(self.device))[0]
480
+ uncond_embeddings = self.text_encoder(uncond_input.input_ids.to(device))[0]
480
481
 
481
482
  # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
482
483
  seq_len = uncond_embeddings.shape[1]
@@ -493,7 +494,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
493
494
  # get the initial random noise unless the user supplied it
494
495
 
495
496
  # 4. Prepare timesteps
496
- self.scheduler.set_timesteps(num_inference_steps, device=self.device)
497
+ self.scheduler.set_timesteps(num_inference_steps, device=device)
497
498
  timesteps = self.scheduler.timesteps
498
499
 
499
500
  # 5. Prepare latent variables
@@ -504,7 +505,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
504
505
  height,
505
506
  width,
506
507
  text_embeddings.dtype,
507
- self.device,
508
+ device,
508
509
  generator,
509
510
  latents,
510
511
  )
@@ -562,12 +563,12 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
562
563
  if enable_edit_guidance:
563
564
  concept_weights = torch.zeros(
564
565
  (len(noise_pred_edit_concepts), noise_guidance.shape[0]),
565
- device=self.device,
566
+ device=device,
566
567
  dtype=noise_guidance.dtype,
567
568
  )
568
569
  noise_guidance_edit = torch.zeros(
569
570
  (len(noise_pred_edit_concepts), *noise_guidance.shape),
570
- device=self.device,
571
+ device=device,
571
572
  dtype=noise_guidance.dtype,
572
573
  )
573
574
  # noise_guidance_edit = torch.zeros_like(noise_guidance)
@@ -644,21 +645,19 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
644
645
 
645
646
  # noise_guidance_edit = noise_guidance_edit + noise_guidance_edit_tmp
646
647
 
647
- warmup_inds = torch.tensor(warmup_inds).to(self.device)
648
+ warmup_inds = torch.tensor(warmup_inds).to(device)
648
649
  if len(noise_pred_edit_concepts) > warmup_inds.shape[0] > 0:
649
650
  concept_weights = concept_weights.to("cpu") # Offload to cpu
650
651
  noise_guidance_edit = noise_guidance_edit.to("cpu")
651
652
 
652
- concept_weights_tmp = torch.index_select(concept_weights.to(self.device), 0, warmup_inds)
653
+ concept_weights_tmp = torch.index_select(concept_weights.to(device), 0, warmup_inds)
653
654
  concept_weights_tmp = torch.where(
654
655
  concept_weights_tmp < 0, torch.zeros_like(concept_weights_tmp), concept_weights_tmp
655
656
  )
656
657
  concept_weights_tmp = concept_weights_tmp / concept_weights_tmp.sum(dim=0)
657
658
  # concept_weights_tmp = torch.nan_to_num(concept_weights_tmp)
658
659
 
659
- noise_guidance_edit_tmp = torch.index_select(
660
- noise_guidance_edit.to(self.device), 0, warmup_inds
661
- )
660
+ noise_guidance_edit_tmp = torch.index_select(noise_guidance_edit.to(device), 0, warmup_inds)
662
661
  noise_guidance_edit_tmp = torch.einsum(
663
662
  "cb,cbijk->bijk", concept_weights_tmp, noise_guidance_edit_tmp
664
663
  )
@@ -669,8 +668,8 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
669
668
 
670
669
  del noise_guidance_edit_tmp
671
670
  del concept_weights_tmp
672
- concept_weights = concept_weights.to(self.device)
673
- noise_guidance_edit = noise_guidance_edit.to(self.device)
671
+ concept_weights = concept_weights.to(device)
672
+ noise_guidance_edit = noise_guidance_edit.to(device)
674
673
 
675
674
  concept_weights = torch.where(
676
675
  concept_weights < 0, torch.zeros_like(concept_weights), concept_weights
@@ -679,6 +678,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
679
678
  concept_weights = torch.nan_to_num(concept_weights)
680
679
 
681
680
  noise_guidance_edit = torch.einsum("cb,cbijk->bijk", concept_weights, noise_guidance_edit)
681
+ noise_guidance_edit = noise_guidance_edit.to(edit_momentum.device)
682
682
 
683
683
  noise_guidance_edit = noise_guidance_edit + edit_momentum_scale * edit_momentum
684
684
 
@@ -689,7 +689,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
689
689
  self.sem_guidance[i] = noise_guidance_edit.detach().cpu()
690
690
 
691
691
  if sem_guidance is not None:
692
- edit_guidance = sem_guidance[i].to(self.device)
692
+ edit_guidance = sem_guidance[i].to(device)
693
693
  noise_guidance = noise_guidance + edit_guidance
694
694
 
695
695
  noise_pred = noise_pred_uncond + noise_guidance
@@ -705,7 +705,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
705
705
  # 8. Post-processing
706
706
  if not output_type == "latent":
707
707
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
708
- image, has_nsfw_concept = self.run_safety_checker(image, self.device, text_embeddings.dtype)
708
+ image, has_nsfw_concept = self.run_safety_checker(image, device, text_embeddings.dtype)
709
709
  else:
710
710
  image = latents
711
711
  has_nsfw_concept = None
@@ -474,9 +474,10 @@ class StableDiffusionPipeline(
474
474
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
475
475
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
476
476
 
477
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
478
- # Retrieve the original scale by scaling back the LoRA layers
479
- unscale_lora_layers(self.text_encoder, lora_scale)
477
+ if self.text_encoder is not None:
478
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
479
+ # Retrieve the original scale by scaling back the LoRA layers
480
+ unscale_lora_layers(self.text_encoder, lora_scale)
480
481
 
481
482
  return prompt_embeds, negative_prompt_embeds
482
483
 
@@ -357,9 +357,10 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
357
357
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
358
358
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
359
359
 
360
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
361
- # Retrieve the original scale by scaling back the LoRA layers
362
- unscale_lora_layers(self.text_encoder, lora_scale)
360
+ if self.text_encoder is not None:
361
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
362
+ # Retrieve the original scale by scaling back the LoRA layers
363
+ unscale_lora_layers(self.text_encoder, lora_scale)
363
364
 
364
365
  return prompt_embeds, negative_prompt_embeds
365
366
 
@@ -545,7 +546,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
545
546
 
546
547
  if depth_map is None:
547
548
  pixel_values = self.feature_extractor(images=image, return_tensors="pt").pixel_values
548
- pixel_values = pixel_values.to(device=device)
549
+ pixel_values = pixel_values.to(device=device, dtype=dtype)
549
550
  # The DPT-Hybrid model uses batch-norm layers which are not compatible with fp16.
550
551
  # So we use `torch.autocast` here for half precision inference.
551
552
  if torch.backends.mps.is_available():
@@ -517,9 +517,10 @@ class StableDiffusionImg2ImgPipeline(
517
517
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
518
518
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
519
519
 
520
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
521
- # Retrieve the original scale by scaling back the LoRA layers
522
- unscale_lora_layers(self.text_encoder, lora_scale)
520
+ if self.text_encoder is not None:
521
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
522
+ # Retrieve the original scale by scaling back the LoRA layers
523
+ unscale_lora_layers(self.text_encoder, lora_scale)
523
524
 
524
525
  return prompt_embeds, negative_prompt_embeds
525
526
 
@@ -589,9 +589,10 @@ class StableDiffusionInpaintPipeline(
589
589
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
590
590
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
591
591
 
592
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
593
- # Retrieve the original scale by scaling back the LoRA layers
594
- unscale_lora_layers(self.text_encoder, lora_scale)
592
+ if self.text_encoder is not None:
593
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
594
+ # Retrieve the original scale by scaling back the LoRA layers
595
+ unscale_lora_layers(self.text_encoder, lora_scale)
595
596
 
596
597
  return prompt_embeds, negative_prompt_embeds
597
598
 
@@ -377,9 +377,10 @@ class StableDiffusionUpscalePipeline(
377
377
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
378
378
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
379
379
 
380
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
381
- # Retrieve the original scale by scaling back the LoRA layers
382
- unscale_lora_layers(self.text_encoder, lora_scale)
380
+ if self.text_encoder is not None:
381
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
382
+ # Retrieve the original scale by scaling back the LoRA layers
383
+ unscale_lora_layers(self.text_encoder, lora_scale)
383
384
 
384
385
  return prompt_embeds, negative_prompt_embeds
385
386
 
@@ -458,9 +458,10 @@ class StableUnCLIPPipeline(DiffusionPipeline, StableDiffusionMixin, TextualInver
458
458
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
459
459
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
460
460
 
461
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
462
- # Retrieve the original scale by scaling back the LoRA layers
463
- unscale_lora_layers(self.text_encoder, lora_scale)
461
+ if self.text_encoder is not None:
462
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
463
+ # Retrieve the original scale by scaling back the LoRA layers
464
+ unscale_lora_layers(self.text_encoder, lora_scale)
464
465
 
465
466
  return prompt_embeds, negative_prompt_embeds
466
467
 
@@ -51,8 +51,8 @@ EXAMPLE_DOC_STRING = """
51
51
  >>> from diffusers import StableUnCLIPImg2ImgPipeline
52
52
 
53
53
  >>> pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
54
- ... "fusing/stable-unclip-2-1-l-img2img", torch_dtype=torch.float16
55
- ... ) # TODO update model path
54
+ ... "stabilityai/stable-diffusion-2-1-unclip-small", torch_dtype=torch.float16
55
+ ... )
56
56
  >>> pipe = pipe.to("cuda")
57
57
 
58
58
  >>> url = "https://raw.githubusercontent.com/CompVis/stable-diffusion/main/assets/stable-samples/img2img/sketch-mountains-input.jpg"
@@ -63,7 +63,7 @@ EXAMPLE_DOC_STRING = """
63
63
 
64
64
  >>> prompt = "A fantasy landscape, trending on artstation"
65
65
 
66
- >>> images = pipe(prompt, init_image).images
66
+ >>> images = pipe(init_image, prompt).images
67
67
  >>> images[0].save("fantasy_landscape.png")
68
68
  ```
69
69
  """
@@ -422,9 +422,10 @@ class StableUnCLIPImg2ImgPipeline(
422
422
  negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
423
423
  negative_prompt_embeds = negative_prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
424
424
 
425
- if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
426
- # Retrieve the original scale by scaling back the LoRA layers
427
- unscale_lora_layers(self.text_encoder, lora_scale)
425
+ if self.text_encoder is not None:
426
+ if isinstance(self, LoraLoaderMixin) and USE_PEFT_BACKEND:
427
+ # Retrieve the original scale by scaling back the LoRA layers
428
+ unscale_lora_layers(self.text_encoder, lora_scale)
428
429
 
429
430
  return prompt_embeds, negative_prompt_embeds
430
431
 
@@ -0,0 +1,52 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_flax_available,
9
+ is_torch_available,
10
+ is_transformers_available,
11
+ )
12
+
13
+
14
+ _dummy_objects = {}
15
+ _additional_imports = {}
16
+ _import_structure = {"pipeline_output": ["StableDiffusion3PipelineOutput"]}
17
+
18
+ try:
19
+ if not (is_transformers_available() and is_torch_available()):
20
+ raise OptionalDependencyNotAvailable()
21
+ except OptionalDependencyNotAvailable:
22
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
23
+
24
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
25
+ else:
26
+ _import_structure["pipeline_stable_diffusion_3"] = ["StableDiffusion3Pipeline"]
27
+ _import_structure["pipeline_stable_diffusion_3_img2img"] = ["StableDiffusion3Img2ImgPipeline"]
28
+
29
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
30
+ try:
31
+ if not (is_transformers_available() and is_torch_available()):
32
+ raise OptionalDependencyNotAvailable()
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
35
+ else:
36
+ from .pipeline_stable_diffusion_3 import StableDiffusion3Pipeline
37
+ from .pipeline_stable_diffusion_3_img2img import StableDiffusion3Img2ImgPipeline
38
+
39
+ else:
40
+ import sys
41
+
42
+ sys.modules[__name__] = _LazyModule(
43
+ __name__,
44
+ globals()["__file__"],
45
+ _import_structure,
46
+ module_spec=__spec__,
47
+ )
48
+
49
+ for name, value in _dummy_objects.items():
50
+ setattr(sys.modules[__name__], name, value)
51
+ for name, value in _additional_imports.items():
52
+ setattr(sys.modules[__name__], name, value)
@@ -0,0 +1,21 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Union
3
+
4
+ import numpy as np
5
+ import PIL.Image
6
+
7
+ from ...utils import BaseOutput
8
+
9
+
10
+ @dataclass
11
+ class StableDiffusion3PipelineOutput(BaseOutput):
12
+ """
13
+ Output class for Stable Diffusion pipelines.
14
+
15
+ Args:
16
+ images (`List[PIL.Image.Image]` or `np.ndarray`)
17
+ List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
18
+ num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
19
+ """
20
+
21
+ images: Union[List[PIL.Image.Image], np.ndarray]