diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/configuration_utils.py +34 -29
  3. diffusers/dependency_versions_table.py +4 -0
  4. diffusers/image_processor.py +125 -12
  5. diffusers/loaders.py +169 -203
  6. diffusers/models/attention.py +24 -1
  7. diffusers/models/attention_flax.py +10 -5
  8. diffusers/models/attention_processor.py +3 -0
  9. diffusers/models/autoencoder_kl.py +114 -33
  10. diffusers/models/controlnet.py +131 -14
  11. diffusers/models/controlnet_flax.py +37 -26
  12. diffusers/models/cross_attention.py +17 -17
  13. diffusers/models/embeddings.py +67 -0
  14. diffusers/models/modeling_flax_utils.py +64 -56
  15. diffusers/models/modeling_utils.py +193 -104
  16. diffusers/models/prior_transformer.py +207 -37
  17. diffusers/models/resnet.py +26 -26
  18. diffusers/models/transformer_2d.py +36 -41
  19. diffusers/models/transformer_temporal.py +24 -21
  20. diffusers/models/unet_1d.py +31 -25
  21. diffusers/models/unet_2d.py +43 -30
  22. diffusers/models/unet_2d_blocks.py +210 -89
  23. diffusers/models/unet_2d_blocks_flax.py +12 -12
  24. diffusers/models/unet_2d_condition.py +172 -64
  25. diffusers/models/unet_2d_condition_flax.py +38 -24
  26. diffusers/models/unet_3d_blocks.py +34 -31
  27. diffusers/models/unet_3d_condition.py +101 -34
  28. diffusers/models/vae.py +5 -5
  29. diffusers/models/vae_flax.py +37 -34
  30. diffusers/models/vq_model.py +23 -14
  31. diffusers/pipelines/__init__.py +24 -1
  32. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
  33. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
  34. diffusers/pipelines/consistency_models/__init__.py +1 -0
  35. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
  36. diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
  37. diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
  38. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
  39. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
  40. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  41. diffusers/pipelines/kandinsky/__init__.py +1 -1
  42. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
  43. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
  44. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
  45. diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
  46. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
  47. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
  48. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
  49. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
  50. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
  51. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
  52. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
  53. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  54. diffusers/pipelines/pipeline_utils.py +124 -146
  55. diffusers/pipelines/shap_e/__init__.py +27 -0
  56. diffusers/pipelines/shap_e/camera.py +147 -0
  57. diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
  58. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
  59. diffusers/pipelines/shap_e/renderer.py +709 -0
  60. diffusers/pipelines/stable_diffusion/__init__.py +2 -0
  61. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
  69. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
  70. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
  71. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
  72. diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
  73. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
  74. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
  75. diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
  76. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
  77. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
  78. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
  79. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
  80. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  81. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
  82. diffusers/schedulers/__init__.py +3 -0
  83. diffusers/schedulers/scheduling_consistency_models.py +380 -0
  84. diffusers/schedulers/scheduling_ddim.py +28 -6
  85. diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
  86. diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
  87. diffusers/schedulers/scheduling_ddpm.py +53 -7
  88. diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
  89. diffusers/schedulers/scheduling_deis_multistep.py +66 -11
  90. diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
  91. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
  92. diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
  93. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
  94. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
  95. diffusers/schedulers/scheduling_euler_discrete.py +58 -8
  96. diffusers/schedulers/scheduling_heun_discrete.py +89 -14
  97. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
  98. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
  99. diffusers/schedulers/scheduling_lms_discrete.py +57 -8
  100. diffusers/schedulers/scheduling_pndm.py +46 -10
  101. diffusers/schedulers/scheduling_repaint.py +19 -4
  102. diffusers/schedulers/scheduling_sde_ve.py +5 -1
  103. diffusers/schedulers/scheduling_unclip.py +43 -4
  104. diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
  105. diffusers/training_utils.py +1 -1
  106. diffusers/utils/__init__.py +2 -1
  107. diffusers/utils/dummy_pt_objects.py +60 -0
  108. diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
  109. diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
  110. diffusers/utils/hub_utils.py +1 -1
  111. diffusers/utils/import_utils.py +20 -3
  112. diffusers/utils/logging.py +15 -18
  113. diffusers/utils/outputs.py +3 -3
  114. diffusers/utils/testing_utils.py +15 -0
  115. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
  116. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
  117. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
  118. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
  119. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
  120. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
diffusers/models/vae.py CHANGED
@@ -30,7 +30,7 @@ class DecoderOutput(BaseOutput):
30
30
 
31
31
  Args:
32
32
  sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
33
- Decoded output sample of the model. Output of the last layer of the model.
33
+ The decoded output sample from the last layer of the model.
34
34
  """
35
35
 
36
36
  sample: torch.FloatTensor
@@ -79,7 +79,7 @@ class Encoder(nn.Module):
79
79
  downsample_padding=0,
80
80
  resnet_act_fn=act_fn,
81
81
  resnet_groups=norm_num_groups,
82
- attn_num_head_channels=None,
82
+ attention_head_dim=output_channel,
83
83
  temb_channels=None,
84
84
  )
85
85
  self.down_blocks.append(down_block)
@@ -91,7 +91,7 @@ class Encoder(nn.Module):
91
91
  resnet_act_fn=act_fn,
92
92
  output_scale_factor=1,
93
93
  resnet_time_scale_shift="default",
94
- attn_num_head_channels=None,
94
+ attention_head_dim=block_out_channels[-1],
95
95
  resnet_groups=norm_num_groups,
96
96
  temb_channels=None,
97
97
  )
@@ -184,7 +184,7 @@ class Decoder(nn.Module):
184
184
  resnet_act_fn=act_fn,
185
185
  output_scale_factor=1,
186
186
  resnet_time_scale_shift="default" if norm_type == "group" else norm_type,
187
- attn_num_head_channels=None,
187
+ attention_head_dim=block_out_channels[-1],
188
188
  resnet_groups=norm_num_groups,
189
189
  temb_channels=temb_channels,
190
190
  )
@@ -208,7 +208,7 @@ class Decoder(nn.Module):
208
208
  resnet_eps=1e-6,
209
209
  resnet_act_fn=act_fn,
210
210
  resnet_groups=norm_num_groups,
211
- attn_num_head_channels=None,
211
+ attention_head_dim=output_channel,
212
212
  temb_channels=temb_channels,
213
213
  resnet_time_scale_shift=norm_type,
214
214
  )
@@ -36,9 +36,9 @@ class FlaxDecoderOutput(BaseOutput):
36
36
 
37
37
  Args:
38
38
  sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)`):
39
- Decoded output sample of the model. Output of the last layer of the model.
40
- dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
41
- Parameters `dtype`
39
+ The decoded output sample from the last layer of the model.
40
+ dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
41
+ The `dtype` of the parameters.
42
42
  """
43
43
 
44
44
  sample: jnp.ndarray
@@ -396,7 +396,7 @@ class FlaxUNetMidBlock2D(nn.Module):
396
396
  Number of Resnet layer block
397
397
  resnet_groups (:obj:`int`, *optional*, defaults to `32`):
398
398
  The number of groups to use for the Resnet and Attention block group norm
399
- attn_num_head_channels (:obj:`int`, *optional*, defaults to `1`):
399
+ num_attention_heads (:obj:`int`, *optional*, defaults to `1`):
400
400
  Number of attention heads for each attention block
401
401
  dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
402
402
  Parameters `dtype`
@@ -405,7 +405,7 @@ class FlaxUNetMidBlock2D(nn.Module):
405
405
  dropout: float = 0.0
406
406
  num_layers: int = 1
407
407
  resnet_groups: int = 32
408
- attn_num_head_channels: int = 1
408
+ num_attention_heads: int = 1
409
409
  dtype: jnp.dtype = jnp.float32
410
410
 
411
411
  def setup(self):
@@ -427,7 +427,7 @@ class FlaxUNetMidBlock2D(nn.Module):
427
427
  for _ in range(self.num_layers):
428
428
  attn_block = FlaxAttentionBlock(
429
429
  channels=self.in_channels,
430
- num_head_channels=self.attn_num_head_channels,
430
+ num_head_channels=self.num_attention_heads,
431
431
  num_groups=resnet_groups,
432
432
  dtype=self.dtype,
433
433
  )
@@ -532,7 +532,7 @@ class FlaxEncoder(nn.Module):
532
532
  self.mid_block = FlaxUNetMidBlock2D(
533
533
  in_channels=block_out_channels[-1],
534
534
  resnet_groups=self.norm_num_groups,
535
- attn_num_head_channels=None,
535
+ num_attention_heads=None,
536
536
  dtype=self.dtype,
537
537
  )
538
538
 
@@ -625,7 +625,7 @@ class FlaxDecoder(nn.Module):
625
625
  self.mid_block = FlaxUNetMidBlock2D(
626
626
  in_channels=block_out_channels[-1],
627
627
  resnet_groups=self.norm_num_groups,
628
- attn_num_head_channels=None,
628
+ num_attention_heads=None,
629
629
  dtype=self.dtype,
630
630
  )
631
631
 
@@ -720,40 +720,43 @@ class FlaxDiagonalGaussianDistribution(object):
720
720
  @flax_register_to_config
721
721
  class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
722
722
  r"""
723
- Flax Implementation of Variational Autoencoder (VAE) model with KL loss from the paper Auto-Encoding Variational
724
- Bayes by Diederik P. Kingma and Max Welling.
723
+ Flax implementation of a VAE model with KL loss for decoding latent representations.
724
+
725
+ This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it's generic methods
726
+ implemented for all models (such as downloading or saving).
725
727
 
726
728
  This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
727
- subclass. Use it as a regular Flax linen Module and refer to the Flax documentation for all matter related to
729
+ subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matter related to its
728
730
  general usage and behavior.
729
731
 
730
- Finally, this model supports inherent JAX features such as:
732
+ Inherent JAX features such as the following are supported:
733
+
731
734
  - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
732
735
  - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
733
736
  - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
734
737
  - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)
735
738
 
736
739
  Parameters:
737
- in_channels (:obj:`int`, *optional*, defaults to 3):
738
- Input channels
739
- out_channels (:obj:`int`, *optional*, defaults to 3):
740
- Output channels
741
- down_block_types (:obj:`Tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`):
742
- DownEncoder block type
743
- up_block_types (:obj:`Tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`):
744
- UpDecoder block type
745
- block_out_channels (:obj:`Tuple[str]`, *optional*, defaults to `(64,)`):
746
- Tuple containing the number of output channels for each block
747
- layers_per_block (:obj:`int`, *optional*, defaults to `2`):
748
- Number of Resnet layer for each block
749
- act_fn (:obj:`str`, *optional*, defaults to `silu`):
750
- Activation function
751
- latent_channels (:obj:`int`, *optional*, defaults to `4`):
752
- Latent space channels
753
- norm_num_groups (:obj:`int`, *optional*, defaults to `32`):
754
- Norm num group
755
- sample_size (:obj:`int`, *optional*, defaults to 32):
756
- Sample input size
740
+ in_channels (`int`, *optional*, defaults to 3):
741
+ Number of channels in the input image.
742
+ out_channels (`int`, *optional*, defaults to 3):
743
+ Number of channels in the output.
744
+ down_block_types (`Tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`):
745
+ Tuple of downsample block types.
746
+ up_block_types (`Tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`):
747
+ Tuple of upsample block types.
748
+ block_out_channels (`Tuple[str]`, *optional*, defaults to `(64,)`):
749
+ Tuple of block output channels.
750
+ layers_per_block (`int`, *optional*, defaults to `2`):
751
+ Number of ResNet layer for each block.
752
+ act_fn (`str`, *optional*, defaults to `silu`):
753
+ The activation function to use.
754
+ latent_channels (`int`, *optional*, defaults to `4`):
755
+ Number of channels in the latent space.
756
+ norm_num_groups (`int`, *optional*, defaults to `32`):
757
+ The number of groups for normalization.
758
+ sample_size (`int`, *optional*, defaults to 32):
759
+ Sample input size.
757
760
  scaling_factor (`float`, *optional*, defaults to 0.18215):
758
761
  The component-wise standard deviation of the trained latent space computed using the first batch of the
759
762
  training set. This is used to scale the latent space to have unit variance when training the diffusion
@@ -761,8 +764,8 @@ class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
761
764
  diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
762
765
  / scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
763
766
  Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
764
- dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
765
- parameters `dtype`
767
+ dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
768
+ The `dtype` of the parameters.
766
769
  """
767
770
  in_channels: int = 3
768
771
  out_channels: int = 3
@@ -18,7 +18,7 @@ import torch
18
18
  import torch.nn as nn
19
19
 
20
20
  from ..configuration_utils import ConfigMixin, register_to_config
21
- from ..utils import BaseOutput
21
+ from ..utils import BaseOutput, apply_forward_hook
22
22
  from .modeling_utils import ModelMixin
23
23
  from .vae import Decoder, DecoderOutput, Encoder, VectorQuantizer
24
24
 
@@ -30,31 +30,31 @@ class VQEncoderOutput(BaseOutput):
30
30
 
31
31
  Args:
32
32
  latents (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
33
- Encoded output sample of the model. Output of the last layer of the model.
33
+ The encoded output sample from the last layer of the model.
34
34
  """
35
35
 
36
36
  latents: torch.FloatTensor
37
37
 
38
38
 
39
39
  class VQModel(ModelMixin, ConfigMixin):
40
- r"""VQ-VAE model from the paper Neural Discrete Representation Learning by Aaron van den Oord, Oriol Vinyals and Koray
41
- Kavukcuoglu.
40
+ r"""
41
+ A VQ-VAE model for decoding latent representations.
42
42
 
43
- This model inherits from [`ModelMixin`]. Check the superclass documentation for the generic methods the library
44
- implements for all the model (such as downloading or saving, etc.)
43
+ This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
44
+ for all models (such as downloading or saving).
45
45
 
46
46
  Parameters:
47
47
  in_channels (int, *optional*, defaults to 3): Number of channels in the input image.
48
48
  out_channels (int, *optional*, defaults to 3): Number of channels in the output.
49
- down_block_types (`Tuple[str]`, *optional*, defaults to :
50
- obj:`("DownEncoderBlock2D",)`): Tuple of downsample block types.
51
- up_block_types (`Tuple[str]`, *optional*, defaults to :
52
- obj:`("UpDecoderBlock2D",)`): Tuple of upsample block types.
53
- block_out_channels (`Tuple[int]`, *optional*, defaults to :
54
- obj:`(64,)`): Tuple of block output channels.
49
+ down_block_types (`Tuple[str]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
50
+ Tuple of downsample block types.
51
+ up_block_types (`Tuple[str]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
52
+ Tuple of upsample block types.
53
+ block_out_channels (`Tuple[int]`, *optional*, defaults to `(64,)`):
54
+ Tuple of block output channels.
55
55
  act_fn (`str`, *optional*, defaults to `"silu"`): The activation function to use.
56
56
  latent_channels (`int`, *optional*, defaults to `3`): Number of channels in the latent space.
57
- sample_size (`int`, *optional*, defaults to `32`): TODO
57
+ sample_size (`int`, *optional*, defaults to `32`): Sample input size.
58
58
  num_vq_embeddings (`int`, *optional*, defaults to `256`): Number of codebook vectors in the VQ-VAE.
59
59
  vq_embed_dim (`int`, *optional*): Hidden dim of codebook vectors in the VQ-VAE.
60
60
  scaling_factor (`float`, *optional*, defaults to `0.18215`):
@@ -116,6 +116,7 @@ class VQModel(ModelMixin, ConfigMixin):
116
116
  norm_type=norm_type,
117
117
  )
118
118
 
119
+ @apply_forward_hook
119
120
  def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> VQEncoderOutput:
120
121
  h = self.encoder(x)
121
122
  h = self.quant_conv(h)
@@ -125,6 +126,7 @@ class VQModel(ModelMixin, ConfigMixin):
125
126
 
126
127
  return VQEncoderOutput(latents=h)
127
128
 
129
+ @apply_forward_hook
128
130
  def decode(
129
131
  self, h: torch.FloatTensor, force_not_quantize: bool = False, return_dict: bool = True
130
132
  ) -> Union[DecoderOutput, torch.FloatTensor]:
@@ -143,10 +145,17 @@ class VQModel(ModelMixin, ConfigMixin):
143
145
 
144
146
  def forward(self, sample: torch.FloatTensor, return_dict: bool = True) -> Union[DecoderOutput, torch.FloatTensor]:
145
147
  r"""
148
+ The [`VQModel`] forward method.
149
+
146
150
  Args:
147
151
  sample (`torch.FloatTensor`): Input sample.
148
152
  return_dict (`bool`, *optional*, defaults to `True`):
149
- Whether or not to return a [`DecoderOutput`] instead of a plain tuple.
153
+ Whether or not to return a [`models.vq_model.VQEncoderOutput`] instead of a plain tuple.
154
+
155
+ Returns:
156
+ [`~models.vq_model.VQEncoderOutput`] or `tuple`:
157
+ If return_dict is True, a [`~models.vq_model.VQEncoderOutput`] is returned, otherwise a plain `tuple`
158
+ is returned.
150
159
  """
151
160
  x = sample
152
161
  h = self.encode(x).latents
@@ -1,6 +1,7 @@
1
1
  from ..utils import (
2
2
  OptionalDependencyNotAvailable,
3
3
  is_flax_available,
4
+ is_invisible_watermark_available,
4
5
  is_k_diffusion_available,
5
6
  is_librosa_available,
6
7
  is_note_seq_available,
@@ -16,6 +17,7 @@ try:
16
17
  except OptionalDependencyNotAvailable:
17
18
  from ..utils.dummy_pt_objects import * # noqa F403
18
19
  else:
20
+ from .consistency_models import ConsistencyModelPipeline
19
21
  from .dance_diffusion import DanceDiffusionPipeline
20
22
  from .ddim import DDIMPipeline
21
23
  from .ddpm import DDPMPipeline
@@ -63,9 +65,19 @@ else:
63
65
  KandinskyPipeline,
64
66
  KandinskyPriorPipeline,
65
67
  )
68
+ from .kandinsky2_2 import (
69
+ KandinskyV22ControlnetImg2ImgPipeline,
70
+ KandinskyV22ControlnetPipeline,
71
+ KandinskyV22Img2ImgPipeline,
72
+ KandinskyV22InpaintPipeline,
73
+ KandinskyV22Pipeline,
74
+ KandinskyV22PriorEmb2EmbPipeline,
75
+ KandinskyV22PriorPipeline,
76
+ )
66
77
  from .latent_diffusion import LDMTextToImagePipeline
67
78
  from .paint_by_example import PaintByExamplePipeline
68
79
  from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
80
+ from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
69
81
  from .stable_diffusion import (
70
82
  CycleDiffusionPipeline,
71
83
  StableDiffusionAttendAndExcitePipeline,
@@ -77,8 +89,10 @@ else:
77
89
  StableDiffusionInpaintPipelineLegacy,
78
90
  StableDiffusionInstructPix2PixPipeline,
79
91
  StableDiffusionLatentUpscalePipeline,
92
+ StableDiffusionLDM3DPipeline,
80
93
  StableDiffusionModelEditingPipeline,
81
94
  StableDiffusionPanoramaPipeline,
95
+ StableDiffusionParadigmsPipeline,
82
96
  StableDiffusionPipeline,
83
97
  StableDiffusionPix2PixZeroPipeline,
84
98
  StableDiffusionSAGPipeline,
@@ -87,7 +101,7 @@ else:
87
101
  StableUnCLIPPipeline,
88
102
  )
89
103
  from .stable_diffusion_safe import StableDiffusionPipelineSafe
90
- from .text_to_video_synthesis import TextToVideoSDPipeline, TextToVideoZeroPipeline
104
+ from .text_to_video_synthesis import TextToVideoSDPipeline, TextToVideoZeroPipeline, VideoToVideoSDPipeline
91
105
  from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline
92
106
  from .unidiffuser import ImageTextPipelineOutput, UniDiffuserModel, UniDiffuserPipeline, UniDiffuserTextDecoder
93
107
  from .versatile_diffusion import (
@@ -98,6 +112,15 @@ else:
98
112
  )
99
113
  from .vq_diffusion import VQDiffusionPipeline
100
114
 
115
+
116
+ try:
117
+ if not (is_torch_available() and is_transformers_available() and is_invisible_watermark_available()):
118
+ raise OptionalDependencyNotAvailable()
119
+ except OptionalDependencyNotAvailable:
120
+ from ..utils.dummy_torch_and_transformers_and_invisible_watermark_objects import * # noqa F403
121
+ else:
122
+ from .stable_diffusion_xl import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline
123
+
101
124
  try:
102
125
  if not is_onnx_available():
103
126
  raise OptionalDependencyNotAvailable()
@@ -77,7 +77,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
77
77
  In addition the pipeline inherits the following loading methods:
78
78
  - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
79
79
  - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
80
- - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
80
+ - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
81
81
 
82
82
  as well as the following saving methods:
83
83
  - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
@@ -26,7 +26,7 @@ from diffusers.utils import is_accelerate_available, is_accelerate_version
26
26
 
27
27
  from ...configuration_utils import FrozenDict
28
28
  from ...image_processor import VaeImageProcessor
29
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
29
+ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
30
30
  from ...models import AutoencoderKL, UNet2DConditionModel
31
31
  from ...schedulers import KarrasDiffusionSchedulers
32
32
  from ...utils import PIL_INTERPOLATION, deprecate, logging, randn_tensor, replace_example_docstring
@@ -95,7 +95,9 @@ def preprocess(image):
95
95
 
96
96
 
97
97
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline with Stable->Alt, CLIPTextModel->RobertaSeriesModelWithTransformation, CLIPTokenizer->XLMRobertaTokenizer, AltDiffusionSafetyChecker->StableDiffusionSafetyChecker
98
- class AltDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
98
+ class AltDiffusionImg2ImgPipeline(
99
+ DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
100
+ ):
99
101
  r"""
100
102
  Pipeline for text-guided image to image generation using Alt Diffusion.
101
103
 
@@ -105,7 +107,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
105
107
  In addition the pipeline inherits the following loading methods:
106
108
  - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
107
109
  - *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
108
- - *Ckpt*: [`loaders.FromCkptMixin.from_ckpt`]
110
+ - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
109
111
 
110
112
  as well as the following saving methods:
111
113
  - *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
@@ -0,0 +1 @@
1
+ from .pipeline_consistency_models import ConsistencyModelPipeline