diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- diffusers/__init__.py +26 -1
- diffusers/configuration_utils.py +34 -29
- diffusers/dependency_versions_table.py +4 -0
- diffusers/image_processor.py +125 -12
- diffusers/loaders.py +169 -203
- diffusers/models/attention.py +24 -1
- diffusers/models/attention_flax.py +10 -5
- diffusers/models/attention_processor.py +3 -0
- diffusers/models/autoencoder_kl.py +114 -33
- diffusers/models/controlnet.py +131 -14
- diffusers/models/controlnet_flax.py +37 -26
- diffusers/models/cross_attention.py +17 -17
- diffusers/models/embeddings.py +67 -0
- diffusers/models/modeling_flax_utils.py +64 -56
- diffusers/models/modeling_utils.py +193 -104
- diffusers/models/prior_transformer.py +207 -37
- diffusers/models/resnet.py +26 -26
- diffusers/models/transformer_2d.py +36 -41
- diffusers/models/transformer_temporal.py +24 -21
- diffusers/models/unet_1d.py +31 -25
- diffusers/models/unet_2d.py +43 -30
- diffusers/models/unet_2d_blocks.py +210 -89
- diffusers/models/unet_2d_blocks_flax.py +12 -12
- diffusers/models/unet_2d_condition.py +172 -64
- diffusers/models/unet_2d_condition_flax.py +38 -24
- diffusers/models/unet_3d_blocks.py +34 -31
- diffusers/models/unet_3d_condition.py +101 -34
- diffusers/models/vae.py +5 -5
- diffusers/models/vae_flax.py +37 -34
- diffusers/models/vq_model.py +23 -14
- diffusers/pipelines/__init__.py +24 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
- diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
- diffusers/pipelines/consistency_models/__init__.py +1 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
- diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/kandinsky/__init__.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
- diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
- diffusers/pipelines/pipeline_flax_utils.py +2 -2
- diffusers/pipelines/pipeline_utils.py +124 -146
- diffusers/pipelines/shap_e/__init__.py +27 -0
- diffusers/pipelines/shap_e/camera.py +147 -0
- diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
- diffusers/pipelines/shap_e/renderer.py +709 -0
- diffusers/pipelines/stable_diffusion/__init__.py +2 -0
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
- diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
- diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
- diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
- diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
- diffusers/schedulers/__init__.py +3 -0
- diffusers/schedulers/scheduling_consistency_models.py +380 -0
- diffusers/schedulers/scheduling_ddim.py +28 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
- diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
- diffusers/schedulers/scheduling_ddpm.py +53 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
- diffusers/schedulers/scheduling_deis_multistep.py +66 -11
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
- diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
- diffusers/schedulers/scheduling_euler_discrete.py +58 -8
- diffusers/schedulers/scheduling_heun_discrete.py +89 -14
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
- diffusers/schedulers/scheduling_lms_discrete.py +57 -8
- diffusers/schedulers/scheduling_pndm.py +46 -10
- diffusers/schedulers/scheduling_repaint.py +19 -4
- diffusers/schedulers/scheduling_sde_ve.py +5 -1
- diffusers/schedulers/scheduling_unclip.py +43 -4
- diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
- diffusers/training_utils.py +1 -1
- diffusers/utils/__init__.py +2 -1
- diffusers/utils/dummy_pt_objects.py +60 -0
- diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
- diffusers/utils/hub_utils.py +1 -1
- diffusers/utils/import_utils.py +20 -3
- diffusers/utils/logging.py +15 -18
- diffusers/utils/outputs.py +3 -3
- diffusers/utils/testing_utils.py +15 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
- {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
diffusers/models/vae.py
CHANGED
@@ -30,7 +30,7 @@ class DecoderOutput(BaseOutput):
|
|
30
30
|
|
31
31
|
Args:
|
32
32
|
sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
33
|
-
|
33
|
+
The decoded output sample from the last layer of the model.
|
34
34
|
"""
|
35
35
|
|
36
36
|
sample: torch.FloatTensor
|
@@ -79,7 +79,7 @@ class Encoder(nn.Module):
|
|
79
79
|
downsample_padding=0,
|
80
80
|
resnet_act_fn=act_fn,
|
81
81
|
resnet_groups=norm_num_groups,
|
82
|
-
|
82
|
+
attention_head_dim=output_channel,
|
83
83
|
temb_channels=None,
|
84
84
|
)
|
85
85
|
self.down_blocks.append(down_block)
|
@@ -91,7 +91,7 @@ class Encoder(nn.Module):
|
|
91
91
|
resnet_act_fn=act_fn,
|
92
92
|
output_scale_factor=1,
|
93
93
|
resnet_time_scale_shift="default",
|
94
|
-
|
94
|
+
attention_head_dim=block_out_channels[-1],
|
95
95
|
resnet_groups=norm_num_groups,
|
96
96
|
temb_channels=None,
|
97
97
|
)
|
@@ -184,7 +184,7 @@ class Decoder(nn.Module):
|
|
184
184
|
resnet_act_fn=act_fn,
|
185
185
|
output_scale_factor=1,
|
186
186
|
resnet_time_scale_shift="default" if norm_type == "group" else norm_type,
|
187
|
-
|
187
|
+
attention_head_dim=block_out_channels[-1],
|
188
188
|
resnet_groups=norm_num_groups,
|
189
189
|
temb_channels=temb_channels,
|
190
190
|
)
|
@@ -208,7 +208,7 @@ class Decoder(nn.Module):
|
|
208
208
|
resnet_eps=1e-6,
|
209
209
|
resnet_act_fn=act_fn,
|
210
210
|
resnet_groups=norm_num_groups,
|
211
|
-
|
211
|
+
attention_head_dim=output_channel,
|
212
212
|
temb_channels=temb_channels,
|
213
213
|
resnet_time_scale_shift=norm_type,
|
214
214
|
)
|
diffusers/models/vae_flax.py
CHANGED
@@ -36,9 +36,9 @@ class FlaxDecoderOutput(BaseOutput):
|
|
36
36
|
|
37
37
|
Args:
|
38
38
|
sample (`jnp.ndarray` of shape `(batch_size, num_channels, height, width)`):
|
39
|
-
|
40
|
-
dtype (
|
41
|
-
|
39
|
+
The decoded output sample from the last layer of the model.
|
40
|
+
dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
|
41
|
+
The `dtype` of the parameters.
|
42
42
|
"""
|
43
43
|
|
44
44
|
sample: jnp.ndarray
|
@@ -396,7 +396,7 @@ class FlaxUNetMidBlock2D(nn.Module):
|
|
396
396
|
Number of Resnet layer block
|
397
397
|
resnet_groups (:obj:`int`, *optional*, defaults to `32`):
|
398
398
|
The number of groups to use for the Resnet and Attention block group norm
|
399
|
-
|
399
|
+
num_attention_heads (:obj:`int`, *optional*, defaults to `1`):
|
400
400
|
Number of attention heads for each attention block
|
401
401
|
dtype (:obj:`jnp.dtype`, *optional*, defaults to jnp.float32):
|
402
402
|
Parameters `dtype`
|
@@ -405,7 +405,7 @@ class FlaxUNetMidBlock2D(nn.Module):
|
|
405
405
|
dropout: float = 0.0
|
406
406
|
num_layers: int = 1
|
407
407
|
resnet_groups: int = 32
|
408
|
-
|
408
|
+
num_attention_heads: int = 1
|
409
409
|
dtype: jnp.dtype = jnp.float32
|
410
410
|
|
411
411
|
def setup(self):
|
@@ -427,7 +427,7 @@ class FlaxUNetMidBlock2D(nn.Module):
|
|
427
427
|
for _ in range(self.num_layers):
|
428
428
|
attn_block = FlaxAttentionBlock(
|
429
429
|
channels=self.in_channels,
|
430
|
-
num_head_channels=self.
|
430
|
+
num_head_channels=self.num_attention_heads,
|
431
431
|
num_groups=resnet_groups,
|
432
432
|
dtype=self.dtype,
|
433
433
|
)
|
@@ -532,7 +532,7 @@ class FlaxEncoder(nn.Module):
|
|
532
532
|
self.mid_block = FlaxUNetMidBlock2D(
|
533
533
|
in_channels=block_out_channels[-1],
|
534
534
|
resnet_groups=self.norm_num_groups,
|
535
|
-
|
535
|
+
num_attention_heads=None,
|
536
536
|
dtype=self.dtype,
|
537
537
|
)
|
538
538
|
|
@@ -625,7 +625,7 @@ class FlaxDecoder(nn.Module):
|
|
625
625
|
self.mid_block = FlaxUNetMidBlock2D(
|
626
626
|
in_channels=block_out_channels[-1],
|
627
627
|
resnet_groups=self.norm_num_groups,
|
628
|
-
|
628
|
+
num_attention_heads=None,
|
629
629
|
dtype=self.dtype,
|
630
630
|
)
|
631
631
|
|
@@ -720,40 +720,43 @@ class FlaxDiagonalGaussianDistribution(object):
|
|
720
720
|
@flax_register_to_config
|
721
721
|
class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
|
722
722
|
r"""
|
723
|
-
Flax
|
724
|
-
|
723
|
+
Flax implementation of a VAE model with KL loss for decoding latent representations.
|
724
|
+
|
725
|
+
This model inherits from [`FlaxModelMixin`]. Check the superclass documentation for it's generic methods
|
726
|
+
implemented for all models (such as downloading or saving).
|
725
727
|
|
726
728
|
This model is a Flax Linen [flax.linen.Module](https://flax.readthedocs.io/en/latest/flax.linen.html#module)
|
727
|
-
subclass. Use it as a regular Flax
|
729
|
+
subclass. Use it as a regular Flax Linen module and refer to the Flax documentation for all matter related to its
|
728
730
|
general usage and behavior.
|
729
731
|
|
730
|
-
|
732
|
+
Inherent JAX features such as the following are supported:
|
733
|
+
|
731
734
|
- [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
|
732
735
|
- [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
|
733
736
|
- [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
|
734
737
|
- [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)
|
735
738
|
|
736
739
|
Parameters:
|
737
|
-
in_channels (
|
738
|
-
|
739
|
-
out_channels (
|
740
|
-
|
741
|
-
down_block_types (
|
742
|
-
|
743
|
-
up_block_types (
|
744
|
-
|
745
|
-
block_out_channels (
|
746
|
-
Tuple
|
747
|
-
layers_per_block (
|
748
|
-
Number of
|
749
|
-
act_fn (
|
750
|
-
|
751
|
-
latent_channels (
|
752
|
-
|
753
|
-
norm_num_groups (
|
754
|
-
|
755
|
-
sample_size (
|
756
|
-
Sample input size
|
740
|
+
in_channels (`int`, *optional*, defaults to 3):
|
741
|
+
Number of channels in the input image.
|
742
|
+
out_channels (`int`, *optional*, defaults to 3):
|
743
|
+
Number of channels in the output.
|
744
|
+
down_block_types (`Tuple[str]`, *optional*, defaults to `(DownEncoderBlock2D)`):
|
745
|
+
Tuple of downsample block types.
|
746
|
+
up_block_types (`Tuple[str]`, *optional*, defaults to `(UpDecoderBlock2D)`):
|
747
|
+
Tuple of upsample block types.
|
748
|
+
block_out_channels (`Tuple[str]`, *optional*, defaults to `(64,)`):
|
749
|
+
Tuple of block output channels.
|
750
|
+
layers_per_block (`int`, *optional*, defaults to `2`):
|
751
|
+
Number of ResNet layer for each block.
|
752
|
+
act_fn (`str`, *optional*, defaults to `silu`):
|
753
|
+
The activation function to use.
|
754
|
+
latent_channels (`int`, *optional*, defaults to `4`):
|
755
|
+
Number of channels in the latent space.
|
756
|
+
norm_num_groups (`int`, *optional*, defaults to `32`):
|
757
|
+
The number of groups for normalization.
|
758
|
+
sample_size (`int`, *optional*, defaults to 32):
|
759
|
+
Sample input size.
|
757
760
|
scaling_factor (`float`, *optional*, defaults to 0.18215):
|
758
761
|
The component-wise standard deviation of the trained latent space computed using the first batch of the
|
759
762
|
training set. This is used to scale the latent space to have unit variance when training the diffusion
|
@@ -761,8 +764,8 @@ class FlaxAutoencoderKL(nn.Module, FlaxModelMixin, ConfigMixin):
|
|
761
764
|
diffusion model. When decoding, the latents are scaled back to the original scale with the formula: `z = 1
|
762
765
|
/ scaling_factor * z`. For more details, refer to sections 4.3.2 and D.1 of the [High-Resolution Image
|
763
766
|
Synthesis with Latent Diffusion Models](https://arxiv.org/abs/2112.10752) paper.
|
764
|
-
dtype (
|
765
|
-
|
767
|
+
dtype (`jnp.dtype`, *optional*, defaults to `jnp.float32`):
|
768
|
+
The `dtype` of the parameters.
|
766
769
|
"""
|
767
770
|
in_channels: int = 3
|
768
771
|
out_channels: int = 3
|
diffusers/models/vq_model.py
CHANGED
@@ -18,7 +18,7 @@ import torch
|
|
18
18
|
import torch.nn as nn
|
19
19
|
|
20
20
|
from ..configuration_utils import ConfigMixin, register_to_config
|
21
|
-
from ..utils import BaseOutput
|
21
|
+
from ..utils import BaseOutput, apply_forward_hook
|
22
22
|
from .modeling_utils import ModelMixin
|
23
23
|
from .vae import Decoder, DecoderOutput, Encoder, VectorQuantizer
|
24
24
|
|
@@ -30,31 +30,31 @@ class VQEncoderOutput(BaseOutput):
|
|
30
30
|
|
31
31
|
Args:
|
32
32
|
latents (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
|
33
|
-
|
33
|
+
The encoded output sample from the last layer of the model.
|
34
34
|
"""
|
35
35
|
|
36
36
|
latents: torch.FloatTensor
|
37
37
|
|
38
38
|
|
39
39
|
class VQModel(ModelMixin, ConfigMixin):
|
40
|
-
r"""
|
41
|
-
|
40
|
+
r"""
|
41
|
+
A VQ-VAE model for decoding latent representations.
|
42
42
|
|
43
|
-
This model inherits from [`ModelMixin`]. Check the superclass documentation for
|
44
|
-
|
43
|
+
This model inherits from [`ModelMixin`]. Check the superclass documentation for it's generic methods implemented
|
44
|
+
for all models (such as downloading or saving).
|
45
45
|
|
46
46
|
Parameters:
|
47
47
|
in_channels (int, *optional*, defaults to 3): Number of channels in the input image.
|
48
48
|
out_channels (int, *optional*, defaults to 3): Number of channels in the output.
|
49
|
-
down_block_types (`Tuple[str]`, *optional*, defaults to :
|
50
|
-
|
51
|
-
up_block_types (`Tuple[str]`, *optional*, defaults to :
|
52
|
-
|
53
|
-
block_out_channels (`Tuple[int]`, *optional*, defaults to :
|
54
|
-
|
49
|
+
down_block_types (`Tuple[str]`, *optional*, defaults to `("DownEncoderBlock2D",)`):
|
50
|
+
Tuple of downsample block types.
|
51
|
+
up_block_types (`Tuple[str]`, *optional*, defaults to `("UpDecoderBlock2D",)`):
|
52
|
+
Tuple of upsample block types.
|
53
|
+
block_out_channels (`Tuple[int]`, *optional*, defaults to `(64,)`):
|
54
|
+
Tuple of block output channels.
|
55
55
|
act_fn (`str`, *optional*, defaults to `"silu"`): The activation function to use.
|
56
56
|
latent_channels (`int`, *optional*, defaults to `3`): Number of channels in the latent space.
|
57
|
-
sample_size (`int`, *optional*, defaults to `32`):
|
57
|
+
sample_size (`int`, *optional*, defaults to `32`): Sample input size.
|
58
58
|
num_vq_embeddings (`int`, *optional*, defaults to `256`): Number of codebook vectors in the VQ-VAE.
|
59
59
|
vq_embed_dim (`int`, *optional*): Hidden dim of codebook vectors in the VQ-VAE.
|
60
60
|
scaling_factor (`float`, *optional*, defaults to `0.18215`):
|
@@ -116,6 +116,7 @@ class VQModel(ModelMixin, ConfigMixin):
|
|
116
116
|
norm_type=norm_type,
|
117
117
|
)
|
118
118
|
|
119
|
+
@apply_forward_hook
|
119
120
|
def encode(self, x: torch.FloatTensor, return_dict: bool = True) -> VQEncoderOutput:
|
120
121
|
h = self.encoder(x)
|
121
122
|
h = self.quant_conv(h)
|
@@ -125,6 +126,7 @@ class VQModel(ModelMixin, ConfigMixin):
|
|
125
126
|
|
126
127
|
return VQEncoderOutput(latents=h)
|
127
128
|
|
129
|
+
@apply_forward_hook
|
128
130
|
def decode(
|
129
131
|
self, h: torch.FloatTensor, force_not_quantize: bool = False, return_dict: bool = True
|
130
132
|
) -> Union[DecoderOutput, torch.FloatTensor]:
|
@@ -143,10 +145,17 @@ class VQModel(ModelMixin, ConfigMixin):
|
|
143
145
|
|
144
146
|
def forward(self, sample: torch.FloatTensor, return_dict: bool = True) -> Union[DecoderOutput, torch.FloatTensor]:
|
145
147
|
r"""
|
148
|
+
The [`VQModel`] forward method.
|
149
|
+
|
146
150
|
Args:
|
147
151
|
sample (`torch.FloatTensor`): Input sample.
|
148
152
|
return_dict (`bool`, *optional*, defaults to `True`):
|
149
|
-
Whether or not to return a [`
|
153
|
+
Whether or not to return a [`models.vq_model.VQEncoderOutput`] instead of a plain tuple.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
[`~models.vq_model.VQEncoderOutput`] or `tuple`:
|
157
|
+
If return_dict is True, a [`~models.vq_model.VQEncoderOutput`] is returned, otherwise a plain `tuple`
|
158
|
+
is returned.
|
150
159
|
"""
|
151
160
|
x = sample
|
152
161
|
h = self.encode(x).latents
|
diffusers/pipelines/__init__.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from ..utils import (
|
2
2
|
OptionalDependencyNotAvailable,
|
3
3
|
is_flax_available,
|
4
|
+
is_invisible_watermark_available,
|
4
5
|
is_k_diffusion_available,
|
5
6
|
is_librosa_available,
|
6
7
|
is_note_seq_available,
|
@@ -16,6 +17,7 @@ try:
|
|
16
17
|
except OptionalDependencyNotAvailable:
|
17
18
|
from ..utils.dummy_pt_objects import * # noqa F403
|
18
19
|
else:
|
20
|
+
from .consistency_models import ConsistencyModelPipeline
|
19
21
|
from .dance_diffusion import DanceDiffusionPipeline
|
20
22
|
from .ddim import DDIMPipeline
|
21
23
|
from .ddpm import DDPMPipeline
|
@@ -63,9 +65,19 @@ else:
|
|
63
65
|
KandinskyPipeline,
|
64
66
|
KandinskyPriorPipeline,
|
65
67
|
)
|
68
|
+
from .kandinsky2_2 import (
|
69
|
+
KandinskyV22ControlnetImg2ImgPipeline,
|
70
|
+
KandinskyV22ControlnetPipeline,
|
71
|
+
KandinskyV22Img2ImgPipeline,
|
72
|
+
KandinskyV22InpaintPipeline,
|
73
|
+
KandinskyV22Pipeline,
|
74
|
+
KandinskyV22PriorEmb2EmbPipeline,
|
75
|
+
KandinskyV22PriorPipeline,
|
76
|
+
)
|
66
77
|
from .latent_diffusion import LDMTextToImagePipeline
|
67
78
|
from .paint_by_example import PaintByExamplePipeline
|
68
79
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
80
|
+
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
69
81
|
from .stable_diffusion import (
|
70
82
|
CycleDiffusionPipeline,
|
71
83
|
StableDiffusionAttendAndExcitePipeline,
|
@@ -77,8 +89,10 @@ else:
|
|
77
89
|
StableDiffusionInpaintPipelineLegacy,
|
78
90
|
StableDiffusionInstructPix2PixPipeline,
|
79
91
|
StableDiffusionLatentUpscalePipeline,
|
92
|
+
StableDiffusionLDM3DPipeline,
|
80
93
|
StableDiffusionModelEditingPipeline,
|
81
94
|
StableDiffusionPanoramaPipeline,
|
95
|
+
StableDiffusionParadigmsPipeline,
|
82
96
|
StableDiffusionPipeline,
|
83
97
|
StableDiffusionPix2PixZeroPipeline,
|
84
98
|
StableDiffusionSAGPipeline,
|
@@ -87,7 +101,7 @@ else:
|
|
87
101
|
StableUnCLIPPipeline,
|
88
102
|
)
|
89
103
|
from .stable_diffusion_safe import StableDiffusionPipelineSafe
|
90
|
-
from .text_to_video_synthesis import TextToVideoSDPipeline, TextToVideoZeroPipeline
|
104
|
+
from .text_to_video_synthesis import TextToVideoSDPipeline, TextToVideoZeroPipeline, VideoToVideoSDPipeline
|
91
105
|
from .unclip import UnCLIPImageVariationPipeline, UnCLIPPipeline
|
92
106
|
from .unidiffuser import ImageTextPipelineOutput, UniDiffuserModel, UniDiffuserPipeline, UniDiffuserTextDecoder
|
93
107
|
from .versatile_diffusion import (
|
@@ -98,6 +112,15 @@ else:
|
|
98
112
|
)
|
99
113
|
from .vq_diffusion import VQDiffusionPipeline
|
100
114
|
|
115
|
+
|
116
|
+
try:
|
117
|
+
if not (is_torch_available() and is_transformers_available() and is_invisible_watermark_available()):
|
118
|
+
raise OptionalDependencyNotAvailable()
|
119
|
+
except OptionalDependencyNotAvailable:
|
120
|
+
from ..utils.dummy_torch_and_transformers_and_invisible_watermark_objects import * # noqa F403
|
121
|
+
else:
|
122
|
+
from .stable_diffusion_xl import StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline
|
123
|
+
|
101
124
|
try:
|
102
125
|
if not is_onnx_available():
|
103
126
|
raise OptionalDependencyNotAvailable()
|
@@ -77,7 +77,7 @@ class AltDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
|
|
77
77
|
In addition the pipeline inherits the following loading methods:
|
78
78
|
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
79
79
|
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
|
80
|
-
- *Ckpt*: [`loaders.
|
80
|
+
- *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
|
81
81
|
|
82
82
|
as well as the following saving methods:
|
83
83
|
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
|
@@ -26,7 +26,7 @@ from diffusers.utils import is_accelerate_available, is_accelerate_version
|
|
26
26
|
|
27
27
|
from ...configuration_utils import FrozenDict
|
28
28
|
from ...image_processor import VaeImageProcessor
|
29
|
-
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
|
29
|
+
from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
|
30
30
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
31
31
|
from ...schedulers import KarrasDiffusionSchedulers
|
32
32
|
from ...utils import PIL_INTERPOLATION, deprecate, logging, randn_tensor, replace_example_docstring
|
@@ -95,7 +95,9 @@ def preprocess(image):
|
|
95
95
|
|
96
96
|
|
97
97
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.StableDiffusionImg2ImgPipeline with Stable->Alt, CLIPTextModel->RobertaSeriesModelWithTransformation, CLIPTokenizer->XLMRobertaTokenizer, AltDiffusionSafetyChecker->StableDiffusionSafetyChecker
|
98
|
-
class AltDiffusionImg2ImgPipeline(
|
98
|
+
class AltDiffusionImg2ImgPipeline(
|
99
|
+
DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
|
100
|
+
):
|
99
101
|
r"""
|
100
102
|
Pipeline for text-guided image to image generation using Alt Diffusion.
|
101
103
|
|
@@ -105,7 +107,7 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
|
|
105
107
|
In addition the pipeline inherits the following loading methods:
|
106
108
|
- *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
|
107
109
|
- *LoRA*: [`loaders.LoraLoaderMixin.load_lora_weights`]
|
108
|
-
- *Ckpt*: [`loaders.
|
110
|
+
- *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
|
109
111
|
|
110
112
|
as well as the following saving methods:
|
111
113
|
- *LoRA*: [`loaders.LoraLoaderMixin.save_lora_weights`]
|
@@ -0,0 +1 @@
|
|
1
|
+
from .pipeline_consistency_models import ConsistencyModelPipeline
|