diffusers 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. diffusers/__init__.py +11 -1
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +12 -8
  4. diffusers/dependency_versions_table.py +2 -1
  5. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  6. diffusers/image_processor.py +286 -46
  7. diffusers/loaders/ip_adapter.py +11 -9
  8. diffusers/loaders/lora.py +198 -60
  9. diffusers/loaders/single_file.py +24 -18
  10. diffusers/loaders/textual_inversion.py +10 -14
  11. diffusers/loaders/unet.py +130 -37
  12. diffusers/models/__init__.py +18 -12
  13. diffusers/models/activations.py +9 -6
  14. diffusers/models/attention.py +137 -16
  15. diffusers/models/attention_processor.py +133 -46
  16. diffusers/models/autoencoders/__init__.py +5 -0
  17. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +4 -4
  18. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +45 -6
  19. diffusers/models/{autoencoder_kl_temporal_decoder.py → autoencoders/autoencoder_kl_temporal_decoder.py} +8 -8
  20. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +4 -4
  21. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +14 -14
  22. diffusers/models/{vae.py → autoencoders/vae.py} +9 -5
  23. diffusers/models/downsampling.py +338 -0
  24. diffusers/models/embeddings.py +112 -29
  25. diffusers/models/modeling_flax_utils.py +12 -7
  26. diffusers/models/modeling_utils.py +10 -10
  27. diffusers/models/normalization.py +108 -2
  28. diffusers/models/resnet.py +15 -699
  29. diffusers/models/transformer_2d.py +2 -2
  30. diffusers/models/unet_2d_condition.py +37 -0
  31. diffusers/models/{unet_kandi3.py → unet_kandinsky3.py} +105 -159
  32. diffusers/models/upsampling.py +454 -0
  33. diffusers/models/uvit_2d.py +471 -0
  34. diffusers/models/vq_model.py +9 -2
  35. diffusers/pipelines/__init__.py +81 -73
  36. diffusers/pipelines/amused/__init__.py +62 -0
  37. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  38. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  39. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +38 -10
  41. diffusers/pipelines/auto_pipeline.py +17 -13
  42. diffusers/pipelines/controlnet/pipeline_controlnet.py +27 -10
  43. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +47 -5
  44. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +25 -8
  45. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +4 -6
  46. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +26 -10
  47. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +4 -3
  48. diffusers/pipelines/deprecated/__init__.py +153 -0
  49. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  50. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +91 -18
  51. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +91 -18
  52. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  53. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  54. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  55. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  56. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  57. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  58. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  59. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  60. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  61. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  62. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  63. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +4 -4
  64. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  65. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  66. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  67. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  68. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +7 -7
  69. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  70. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +16 -11
  71. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +6 -6
  72. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +11 -11
  73. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +16 -11
  74. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +10 -10
  75. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +13 -13
  76. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  77. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  78. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  79. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +54 -11
  80. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  81. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +6 -6
  82. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +6 -6
  83. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +6 -6
  84. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  85. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  86. diffusers/pipelines/kandinsky3/__init__.py +4 -4
  87. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  88. diffusers/pipelines/kandinsky3/{kandinsky3_pipeline.py → pipeline_kandinsky3.py} +172 -35
  89. diffusers/pipelines/kandinsky3/{kandinsky3img2img_pipeline.py → pipeline_kandinsky3_img2img.py} +228 -34
  90. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +46 -5
  91. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +47 -6
  92. diffusers/pipelines/onnx_utils.py +8 -5
  93. diffusers/pipelines/pipeline_flax_utils.py +7 -6
  94. diffusers/pipelines/pipeline_utils.py +30 -29
  95. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +51 -2
  96. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  97. diffusers/pipelines/stable_diffusion/__init__.py +1 -72
  98. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +67 -75
  99. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +92 -8
  100. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -8
  101. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +138 -10
  102. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +57 -7
  103. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -0
  104. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +6 -0
  105. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  106. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  107. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  108. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +5 -2
  109. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  110. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +2 -3
  111. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  112. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +2 -2
  113. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +3 -3
  114. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  115. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +6 -1
  116. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  117. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +50 -7
  118. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  119. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +56 -8
  120. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  121. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  122. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +67 -10
  123. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +97 -15
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -14
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +97 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +7 -5
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +12 -9
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +6 -0
  129. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -0
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +5 -0
  131. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +331 -9
  132. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +468 -9
  133. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  134. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  135. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  136. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +4 -0
  137. diffusers/schedulers/__init__.py +2 -0
  138. diffusers/schedulers/scheduling_amused.py +162 -0
  139. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  140. diffusers/schedulers/scheduling_ddim_inverse.py +1 -4
  141. diffusers/schedulers/scheduling_ddpm.py +46 -0
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +46 -0
  143. diffusers/schedulers/scheduling_deis_multistep.py +13 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +13 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +13 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -0
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -1
  148. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -0
  149. diffusers/schedulers/scheduling_euler_discrete.py +62 -3
  150. diffusers/schedulers/scheduling_heun_discrete.py +2 -0
  151. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -0
  152. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -0
  153. diffusers/schedulers/scheduling_lms_discrete.py +2 -0
  154. diffusers/schedulers/scheduling_unipc_multistep.py +13 -1
  155. diffusers/schedulers/scheduling_utils.py +3 -1
  156. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  157. diffusers/training_utils.py +1 -1
  158. diffusers/utils/__init__.py +0 -2
  159. diffusers/utils/constants.py +2 -5
  160. diffusers/utils/dummy_pt_objects.py +30 -0
  161. diffusers/utils/dummy_torch_and_transformers_objects.py +45 -0
  162. diffusers/utils/dynamic_modules_utils.py +14 -18
  163. diffusers/utils/hub_utils.py +24 -36
  164. diffusers/utils/logging.py +1 -1
  165. diffusers/utils/state_dict_utils.py +8 -0
  166. diffusers/utils/testing_utils.py +199 -1
  167. diffusers/utils/torch_utils.py +3 -3
  168. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/METADATA +54 -53
  169. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/RECORD +174 -155
  170. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  171. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  172. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  173. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  174. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
diffusers/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.24.0"
1
+ __version__ = "0.25.0"
2
2
 
3
3
  from typing import TYPE_CHECKING
4
4
 
@@ -94,6 +94,7 @@ else:
94
94
  "UNet3DConditionModel",
95
95
  "UNetMotionModel",
96
96
  "UNetSpatioTemporalConditionModel",
97
+ "UVit2DModel",
97
98
  "VQModel",
98
99
  ]
99
100
  )
@@ -130,6 +131,7 @@ else:
130
131
  )
131
132
  _import_structure["schedulers"].extend(
132
133
  [
134
+ "AmusedScheduler",
133
135
  "CMStochasticIterativeScheduler",
134
136
  "DDIMInverseScheduler",
135
137
  "DDIMParallelScheduler",
@@ -201,6 +203,9 @@ else:
201
203
  [
202
204
  "AltDiffusionImg2ImgPipeline",
203
205
  "AltDiffusionPipeline",
206
+ "AmusedImg2ImgPipeline",
207
+ "AmusedInpaintPipeline",
208
+ "AmusedPipeline",
204
209
  "AnimateDiffPipeline",
205
210
  "AudioLDM2Pipeline",
206
211
  "AudioLDM2ProjectionModel",
@@ -468,6 +473,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
468
473
  UNet3DConditionModel,
469
474
  UNetMotionModel,
470
475
  UNetSpatioTemporalConditionModel,
476
+ UVit2DModel,
471
477
  VQModel,
472
478
  )
473
479
  from .optimization import (
@@ -502,6 +508,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
502
508
  ScoreSdeVePipeline,
503
509
  )
504
510
  from .schedulers import (
511
+ AmusedScheduler,
505
512
  CMStochasticIterativeScheduler,
506
513
  DDIMInverseScheduler,
507
514
  DDIMParallelScheduler,
@@ -556,6 +563,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
556
563
  from .pipelines import (
557
564
  AltDiffusionImg2ImgPipeline,
558
565
  AltDiffusionPipeline,
566
+ AmusedImg2ImgPipeline,
567
+ AmusedInpaintPipeline,
568
+ AmusedPipeline,
559
569
  AnimateDiffPipeline,
560
570
  AudioLDM2Pipeline,
561
571
  AudioLDM2ProjectionModel,
@@ -19,6 +19,7 @@ Usage example:
19
19
 
20
20
  import glob
21
21
  import json
22
+ import warnings
22
23
  from argparse import ArgumentParser, Namespace
23
24
  from importlib import import_module
24
25
 
@@ -32,12 +33,12 @@ from . import BaseDiffusersCLICommand
32
33
 
33
34
 
34
35
  def conversion_command_factory(args: Namespace):
35
- return FP16SafetensorsCommand(
36
- args.ckpt_id,
37
- args.fp16,
38
- args.use_safetensors,
39
- args.use_auth_token,
40
- )
36
+ if args.use_auth_token:
37
+ warnings.warn(
38
+ "The `--use_auth_token` flag is deprecated and will be removed in a future version. Authentication is now"
39
+ " handled automatically if user is logged in."
40
+ )
41
+ return FP16SafetensorsCommand(args.ckpt_id, args.fp16, args.use_safetensors)
41
42
 
42
43
 
43
44
  class FP16SafetensorsCommand(BaseDiffusersCLICommand):
@@ -62,7 +63,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
62
63
  )
63
64
  conversion_parser.set_defaults(func=conversion_command_factory)
64
65
 
65
- def __init__(self, ckpt_id: str, fp16: bool, use_safetensors: bool, use_auth_token: bool):
66
+ def __init__(self, ckpt_id: str, fp16: bool, use_safetensors: bool):
66
67
  self.logger = logging.get_logger("diffusers-cli/fp16_safetensors")
67
68
  self.ckpt_id = ckpt_id
68
69
  self.local_ckpt_dir = f"/tmp/{ckpt_id}"
@@ -75,8 +76,6 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
75
76
  "When `use_safetensors` and `fp16` both are False, then this command is of no use."
76
77
  )
77
78
 
78
- self.use_auth_token = use_auth_token
79
-
80
79
  def run(self):
81
80
  if version.parse(huggingface_hub.__version__) < version.parse("0.9.0"):
82
81
  raise ImportError(
@@ -87,7 +86,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
87
86
  from huggingface_hub import create_commit
88
87
  from huggingface_hub._commit_api import CommitOperationAdd
89
88
 
90
- model_index = hf_hub_download(repo_id=self.ckpt_id, filename="model_index.json", token=self.use_auth_token)
89
+ model_index = hf_hub_download(repo_id=self.ckpt_id, filename="model_index.json")
91
90
  with open(model_index, "r") as f:
92
91
  pipeline_class_name = json.load(f)["_class_name"]
93
92
  pipeline_class = getattr(import_module("diffusers"), pipeline_class_name)
@@ -96,7 +95,7 @@ class FP16SafetensorsCommand(BaseDiffusersCLICommand):
96
95
  # Load the appropriate pipeline. We could have use `DiffusionPipeline`
97
96
  # here, but just to avoid any rough edge cases.
98
97
  pipeline = pipeline_class.from_pretrained(
99
- self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32, use_auth_token=self.use_auth_token
98
+ self.ckpt_id, torch_dtype=torch.float16 if self.fp16 else torch.float32
100
99
  )
101
100
  pipeline.save_pretrained(
102
101
  self.local_ckpt_dir,
@@ -27,12 +27,16 @@ from typing import Any, Dict, Tuple, Union
27
27
 
28
28
  import numpy as np
29
29
  from huggingface_hub import create_repo, hf_hub_download
30
- from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
30
+ from huggingface_hub.utils import (
31
+ EntryNotFoundError,
32
+ RepositoryNotFoundError,
33
+ RevisionNotFoundError,
34
+ validate_hf_hub_args,
35
+ )
31
36
  from requests import HTTPError
32
37
 
33
38
  from . import __version__
34
39
  from .utils import (
35
- DIFFUSERS_CACHE,
36
40
  HUGGINGFACE_CO_RESOLVE_ENDPOINT,
37
41
  DummyObject,
38
42
  deprecate,
@@ -275,6 +279,7 @@ class ConfigMixin:
275
279
  return cls.load_config(*args, **kwargs)
276
280
 
277
281
  @classmethod
282
+ @validate_hf_hub_args
278
283
  def load_config(
279
284
  cls,
280
285
  pretrained_model_name_or_path: Union[str, os.PathLike],
@@ -311,7 +316,7 @@ class ConfigMixin:
311
316
  local_files_only (`bool`, *optional*, defaults to `False`):
312
317
  Whether to only load local model weights and configuration files or not. If set to `True`, the model
313
318
  won't be downloaded from the Hub.
314
- use_auth_token (`str` or *bool*, *optional*):
319
+ token (`str` or *bool*, *optional*):
315
320
  The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
316
321
  `diffusers-cli login` (stored in `~/.huggingface`) is used.
317
322
  revision (`str`, *optional*, defaults to `"main"`):
@@ -329,11 +334,11 @@ class ConfigMixin:
329
334
  A dictionary of all the parameters stored in a JSON configuration file.
330
335
 
331
336
  """
332
- cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
337
+ cache_dir = kwargs.pop("cache_dir", None)
333
338
  force_download = kwargs.pop("force_download", False)
334
339
  resume_download = kwargs.pop("resume_download", False)
335
340
  proxies = kwargs.pop("proxies", None)
336
- use_auth_token = kwargs.pop("use_auth_token", None)
341
+ token = kwargs.pop("token", None)
337
342
  local_files_only = kwargs.pop("local_files_only", False)
338
343
  revision = kwargs.pop("revision", None)
339
344
  _ = kwargs.pop("mirror", None)
@@ -376,7 +381,7 @@ class ConfigMixin:
376
381
  proxies=proxies,
377
382
  resume_download=resume_download,
378
383
  local_files_only=local_files_only,
379
- use_auth_token=use_auth_token,
384
+ token=token,
380
385
  user_agent=user_agent,
381
386
  subfolder=subfolder,
382
387
  revision=revision,
@@ -385,8 +390,7 @@ class ConfigMixin:
385
390
  raise EnvironmentError(
386
391
  f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier"
387
392
  " listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a"
388
- " token having permission to this repo with `use_auth_token` or log in with `huggingface-cli"
389
- " login`."
393
+ " token having permission to this repo with `token` or log in with `huggingface-cli login`."
390
394
  )
391
395
  except RevisionNotFoundError:
392
396
  raise EnvironmentError(
@@ -30,9 +30,10 @@ deps = {
30
30
  "pytest-timeout": "pytest-timeout",
31
31
  "pytest-xdist": "pytest-xdist",
32
32
  "python": "python>=3.8.0",
33
- "ruff": "ruff>=0.1.5,<=0.2",
33
+ "ruff": "ruff==0.1.5",
34
34
  "safetensors": "safetensors>=0.3.1",
35
35
  "sentencepiece": "sentencepiece>=0.1.91,!=0.1.92",
36
+ "GitPython": "GitPython<3.1.19",
36
37
  "scipy": "scipy",
37
38
  "onnx": "onnx",
38
39
  "regex": "regex!=2019.12.17",
@@ -113,7 +113,7 @@ class ValueGuidedRLPipeline(DiffusionPipeline):
113
113
  prev_x = self.unet(x.permute(0, 2, 1), timesteps).sample.permute(0, 2, 1)
114
114
 
115
115
  # TODO: verify deprecation of this kwarg
116
- x = self.scheduler.step(prev_x, i, x, predict_epsilon=False)["prev_sample"]
116
+ x = self.scheduler.step(prev_x, i, x)["prev_sample"]
117
117
 
118
118
  # apply conditions to the trajectory (set the initial state)
119
119
  x = self.reset_x0(x, conditions, self.action_dim)
@@ -18,7 +18,7 @@ from typing import List, Optional, Tuple, Union
18
18
  import numpy as np
19
19
  import PIL.Image
20
20
  import torch
21
- from PIL import Image
21
+ from PIL import Image, ImageFilter, ImageOps
22
22
 
23
23
  from .configuration_utils import ConfigMixin, register_to_config
24
24
  from .utils import CONFIG_NAME, PIL_INTERPOLATION, deprecate
@@ -88,7 +88,7 @@ class VaeImageProcessor(ConfigMixin):
88
88
  self.config.do_convert_rgb = False
89
89
 
90
90
  @staticmethod
91
- def numpy_to_pil(images: np.ndarray) -> PIL.Image.Image:
91
+ def numpy_to_pil(images: np.ndarray) -> List[PIL.Image.Image]:
92
92
  """
93
93
  Convert a numpy image or a batch of images to a PIL image.
94
94
  """
@@ -166,54 +166,178 @@ class VaeImageProcessor(ConfigMixin):
166
166
 
167
167
  return image
168
168
 
169
- def get_default_height_width(
170
- self,
171
- image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
172
- height: Optional[int] = None,
173
- width: Optional[int] = None,
174
- ) -> Tuple[int, int]:
169
+ @staticmethod
170
+ def blur(image: PIL.Image.Image, blur_factor: int = 4) -> PIL.Image.Image:
175
171
  """
176
- This function return the height and width that are downscaled to the next integer multiple of
177
- `vae_scale_factor`.
172
+ Blurs an image.
173
+ """
174
+ image = image.filter(ImageFilter.GaussianBlur(blur_factor))
175
+
176
+ return image
177
+
178
+ @staticmethod
179
+ def get_crop_region(mask_image: PIL.Image.Image, width: int, height: int, pad=0):
180
+ """
181
+ Finds a rectangular region that contains all masked ares in an image, and expands region to match the aspect ratio of the original image;
182
+ for example, if user drew mask in a 128x32 region, and the dimensions for processing are 512x512, the region will be expanded to 128x128.
178
183
 
179
184
  Args:
180
- image(`PIL.Image.Image`, `np.ndarray` or `torch.Tensor`):
181
- The image input, can be a PIL image, numpy array or pytorch tensor. if it is a numpy array, should have
182
- shape `[batch, height, width]` or `[batch, height, width, channel]` if it is a pytorch tensor, should
183
- have shape `[batch, channel, height, width]`.
184
- height (`int`, *optional*, defaults to `None`):
185
- The height in preprocessed image. If `None`, will use the height of `image` input.
186
- width (`int`, *optional*`, defaults to `None`):
187
- The width in preprocessed. If `None`, will use the width of the `image` input.
185
+ mask_image (PIL.Image.Image): Mask image.
186
+ width (int): Width of the image to be processed.
187
+ height (int): Height of the image to be processed.
188
+ pad (int, optional): Padding to be added to the crop region. Defaults to 0.
189
+
190
+ Returns:
191
+ tuple: (x1, y1, x2, y2) represent a rectangular region that contains all masked ares in an image and matches the original aspect ratio.
192
+ """
193
+
194
+ mask_image = mask_image.convert("L")
195
+ mask = np.array(mask_image)
196
+
197
+ # 1. find a rectangular region that contains all masked ares in an image
198
+ h, w = mask.shape
199
+ crop_left = 0
200
+ for i in range(w):
201
+ if not (mask[:, i] == 0).all():
202
+ break
203
+ crop_left += 1
204
+
205
+ crop_right = 0
206
+ for i in reversed(range(w)):
207
+ if not (mask[:, i] == 0).all():
208
+ break
209
+ crop_right += 1
210
+
211
+ crop_top = 0
212
+ for i in range(h):
213
+ if not (mask[i] == 0).all():
214
+ break
215
+ crop_top += 1
216
+
217
+ crop_bottom = 0
218
+ for i in reversed(range(h)):
219
+ if not (mask[i] == 0).all():
220
+ break
221
+ crop_bottom += 1
222
+
223
+ # 2. add padding to the crop region
224
+ x1, y1, x2, y2 = (
225
+ int(max(crop_left - pad, 0)),
226
+ int(max(crop_top - pad, 0)),
227
+ int(min(w - crop_right + pad, w)),
228
+ int(min(h - crop_bottom + pad, h)),
229
+ )
230
+
231
+ # 3. expands crop region to match the aspect ratio of the image to be processed
232
+ ratio_crop_region = (x2 - x1) / (y2 - y1)
233
+ ratio_processing = width / height
234
+
235
+ if ratio_crop_region > ratio_processing:
236
+ desired_height = (x2 - x1) / ratio_processing
237
+ desired_height_diff = int(desired_height - (y2 - y1))
238
+ y1 -= desired_height_diff // 2
239
+ y2 += desired_height_diff - desired_height_diff // 2
240
+ if y2 >= mask_image.height:
241
+ diff = y2 - mask_image.height
242
+ y2 -= diff
243
+ y1 -= diff
244
+ if y1 < 0:
245
+ y2 -= y1
246
+ y1 -= y1
247
+ if y2 >= mask_image.height:
248
+ y2 = mask_image.height
249
+ else:
250
+ desired_width = (y2 - y1) * ratio_processing
251
+ desired_width_diff = int(desired_width - (x2 - x1))
252
+ x1 -= desired_width_diff // 2
253
+ x2 += desired_width_diff - desired_width_diff // 2
254
+ if x2 >= mask_image.width:
255
+ diff = x2 - mask_image.width
256
+ x2 -= diff
257
+ x1 -= diff
258
+ if x1 < 0:
259
+ x2 -= x1
260
+ x1 -= x1
261
+ if x2 >= mask_image.width:
262
+ x2 = mask_image.width
263
+
264
+ return x1, y1, x2, y2
265
+
266
+ def _resize_and_fill(
267
+ self,
268
+ image: PIL.Image.Image,
269
+ width: int,
270
+ height: int,
271
+ ) -> PIL.Image.Image:
188
272
  """
273
+ Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, filling empty with data from image.
189
274
 
190
- if height is None:
191
- if isinstance(image, PIL.Image.Image):
192
- height = image.height
193
- elif isinstance(image, torch.Tensor):
194
- height = image.shape[2]
195
- else:
196
- height = image.shape[1]
275
+ Args:
276
+ image: The image to resize.
277
+ width: The width to resize the image to.
278
+ height: The height to resize the image to.
279
+ """
280
+
281
+ ratio = width / height
282
+ src_ratio = image.width / image.height
283
+
284
+ src_w = width if ratio < src_ratio else image.width * height // image.height
285
+ src_h = height if ratio >= src_ratio else image.height * width // image.width
286
+
287
+ resized = image.resize((src_w, src_h), resample=PIL_INTERPOLATION["lanczos"])
288
+ res = Image.new("RGB", (width, height))
289
+ res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
290
+
291
+ if ratio < src_ratio:
292
+ fill_height = height // 2 - src_h // 2
293
+ if fill_height > 0:
294
+ res.paste(resized.resize((width, fill_height), box=(0, 0, width, 0)), box=(0, 0))
295
+ res.paste(
296
+ resized.resize((width, fill_height), box=(0, resized.height, width, resized.height)),
297
+ box=(0, fill_height + src_h),
298
+ )
299
+ elif ratio > src_ratio:
300
+ fill_width = width // 2 - src_w // 2
301
+ if fill_width > 0:
302
+ res.paste(resized.resize((fill_width, height), box=(0, 0, 0, height)), box=(0, 0))
303
+ res.paste(
304
+ resized.resize((fill_width, height), box=(resized.width, 0, resized.width, height)),
305
+ box=(fill_width + src_w, 0),
306
+ )
307
+
308
+ return res
309
+
310
+ def _resize_and_crop(
311
+ self,
312
+ image: PIL.Image.Image,
313
+ width: int,
314
+ height: int,
315
+ ) -> PIL.Image.Image:
316
+ """
317
+ Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image within the dimensions, cropping the excess.
197
318
 
198
- if width is None:
199
- if isinstance(image, PIL.Image.Image):
200
- width = image.width
201
- elif isinstance(image, torch.Tensor):
202
- width = image.shape[3]
203
- else:
204
- width = image.shape[2]
319
+ Args:
320
+ image: The image to resize.
321
+ width: The width to resize the image to.
322
+ height: The height to resize the image to.
323
+ """
324
+ ratio = width / height
325
+ src_ratio = image.width / image.height
205
326
 
206
- width, height = (
207
- x - x % self.config.vae_scale_factor for x in (width, height)
208
- ) # resize to integer multiple of vae_scale_factor
327
+ src_w = width if ratio > src_ratio else image.width * height // image.height
328
+ src_h = height if ratio <= src_ratio else image.height * width // image.width
209
329
 
210
- return height, width
330
+ resized = image.resize((src_w, src_h), resample=PIL_INTERPOLATION["lanczos"])
331
+ res = Image.new("RGB", (width, height))
332
+ res.paste(resized, box=(width // 2 - src_w // 2, height // 2 - src_h // 2))
333
+ return res
211
334
 
212
335
  def resize(
213
336
  self,
214
337
  image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
215
- height: Optional[int] = None,
216
- width: Optional[int] = None,
338
+ height: int,
339
+ width: int,
340
+ resize_mode: str = "default", # "defalt", "fill", "crop"
217
341
  ) -> Union[PIL.Image.Image, np.ndarray, torch.Tensor]:
218
342
  """
219
343
  Resize image.
@@ -221,17 +345,35 @@ class VaeImageProcessor(ConfigMixin):
221
345
  Args:
222
346
  image (`PIL.Image.Image`, `np.ndarray` or `torch.Tensor`):
223
347
  The image input, can be a PIL image, numpy array or pytorch tensor.
224
- height (`int`, *optional*, defaults to `None`):
348
+ height (`int`):
225
349
  The height to resize to.
226
- width (`int`, *optional*`, defaults to `None`):
350
+ width (`int`):
227
351
  The width to resize to.
352
+ resize_mode (`str`, *optional*, defaults to `default`):
353
+ The resize mode to use, can be one of `default` or `fill`. If `default`, will resize the image to fit
354
+ within the specified width and height, and it may not maintaining the original aspect ratio.
355
+ If `fill`, will resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image
356
+ within the dimensions, filling empty with data from image.
357
+ If `crop`, will resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image
358
+ within the dimensions, cropping the excess.
359
+ Note that resize_mode `fill` and `crop` are only supported for PIL image input.
228
360
 
229
361
  Returns:
230
362
  `PIL.Image.Image`, `np.ndarray` or `torch.Tensor`:
231
363
  The resized image.
232
364
  """
365
+ if resize_mode != "default" and not isinstance(image, PIL.Image.Image):
366
+ raise ValueError(f"Only PIL image input is supported for resize_mode {resize_mode}")
233
367
  if isinstance(image, PIL.Image.Image):
234
- image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample])
368
+ if resize_mode == "default":
369
+ image = image.resize((width, height), resample=PIL_INTERPOLATION[self.config.resample])
370
+ elif resize_mode == "fill":
371
+ image = self._resize_and_fill(image, width, height)
372
+ elif resize_mode == "crop":
373
+ image = self._resize_and_crop(image, width, height)
374
+ else:
375
+ raise ValueError(f"resize_mode {resize_mode} is not supported")
376
+
235
377
  elif isinstance(image, torch.Tensor):
236
378
  image = torch.nn.functional.interpolate(
237
379
  image,
@@ -262,14 +404,77 @@ class VaeImageProcessor(ConfigMixin):
262
404
  image[image >= 0.5] = 1
263
405
  return image
264
406
 
407
+ def get_default_height_width(
408
+ self,
409
+ image: Union[PIL.Image.Image, np.ndarray, torch.Tensor],
410
+ height: Optional[int] = None,
411
+ width: Optional[int] = None,
412
+ ) -> Tuple[int, int]:
413
+ """
414
+ This function return the height and width that are downscaled to the next integer multiple of
415
+ `vae_scale_factor`.
416
+
417
+ Args:
418
+ image(`PIL.Image.Image`, `np.ndarray` or `torch.Tensor`):
419
+ The image input, can be a PIL image, numpy array or pytorch tensor. if it is a numpy array, should have
420
+ shape `[batch, height, width]` or `[batch, height, width, channel]` if it is a pytorch tensor, should
421
+ have shape `[batch, channel, height, width]`.
422
+ height (`int`, *optional*, defaults to `None`):
423
+ The height in preprocessed image. If `None`, will use the height of `image` input.
424
+ width (`int`, *optional*`, defaults to `None`):
425
+ The width in preprocessed. If `None`, will use the width of the `image` input.
426
+ """
427
+
428
+ if height is None:
429
+ if isinstance(image, PIL.Image.Image):
430
+ height = image.height
431
+ elif isinstance(image, torch.Tensor):
432
+ height = image.shape[2]
433
+ else:
434
+ height = image.shape[1]
435
+
436
+ if width is None:
437
+ if isinstance(image, PIL.Image.Image):
438
+ width = image.width
439
+ elif isinstance(image, torch.Tensor):
440
+ width = image.shape[3]
441
+ else:
442
+ width = image.shape[2]
443
+
444
+ width, height = (
445
+ x - x % self.config.vae_scale_factor for x in (width, height)
446
+ ) # resize to integer multiple of vae_scale_factor
447
+
448
+ return height, width
449
+
265
450
  def preprocess(
266
451
  self,
267
- image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray],
452
+ image: PipelineImageInput,
268
453
  height: Optional[int] = None,
269
454
  width: Optional[int] = None,
455
+ resize_mode: str = "default", # "defalt", "fill", "crop"
456
+ crops_coords: Optional[Tuple[int, int, int, int]] = None,
270
457
  ) -> torch.Tensor:
271
458
  """
272
- Preprocess the image input. Accepted formats are PIL images, NumPy arrays or PyTorch tensors.
459
+ Preprocess the image input.
460
+
461
+ Args:
462
+ image (`pipeline_image_input`):
463
+ The image input, accepted formats are PIL images, NumPy arrays, PyTorch tensors; Also accept list of supported formats.
464
+ height (`int`, *optional*, defaults to `None`):
465
+ The height in preprocessed image. If `None`, will use the `get_default_height_width()` to get default height.
466
+ width (`int`, *optional*`, defaults to `None`):
467
+ The width in preprocessed. If `None`, will use get_default_height_width()` to get the default width.
468
+ resize_mode (`str`, *optional*, defaults to `default`):
469
+ The resize mode, can be one of `default` or `fill`. If `default`, will resize the image to fit
470
+ within the specified width and height, and it may not maintaining the original aspect ratio.
471
+ If `fill`, will resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image
472
+ within the dimensions, filling empty with data from image.
473
+ If `crop`, will resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center the image
474
+ within the dimensions, cropping the excess.
475
+ Note that resize_mode `fill` and `crop` are only supported for PIL image input.
476
+ crops_coords (`List[Tuple[int, int, int, int]]`, *optional*, defaults to `None`):
477
+ The crop coordinates for each image in the batch. If `None`, will not crop the image.
273
478
  """
274
479
  supported_formats = (PIL.Image.Image, np.ndarray, torch.Tensor)
275
480
 
@@ -299,13 +504,15 @@ class VaeImageProcessor(ConfigMixin):
299
504
  )
300
505
 
301
506
  if isinstance(image[0], PIL.Image.Image):
507
+ if crops_coords is not None:
508
+ image = [i.crop(crops_coords) for i in image]
509
+ if self.config.do_resize:
510
+ height, width = self.get_default_height_width(image[0], height, width)
511
+ image = [self.resize(i, height, width, resize_mode=resize_mode) for i in image]
302
512
  if self.config.do_convert_rgb:
303
513
  image = [self.convert_to_rgb(i) for i in image]
304
514
  elif self.config.do_convert_grayscale:
305
515
  image = [self.convert_to_grayscale(i) for i in image]
306
- if self.config.do_resize:
307
- height, width = self.get_default_height_width(image[0], height, width)
308
- image = [self.resize(i, height, width) for i in image]
309
516
  image = self.pil_to_numpy(image) # to np
310
517
  image = self.numpy_to_pt(image) # to pt
311
518
 
@@ -406,6 +613,39 @@ class VaeImageProcessor(ConfigMixin):
406
613
  if output_type == "pil":
407
614
  return self.numpy_to_pil(image)
408
615
 
616
+ def apply_overlay(
617
+ self,
618
+ mask: PIL.Image.Image,
619
+ init_image: PIL.Image.Image,
620
+ image: PIL.Image.Image,
621
+ crop_coords: Optional[Tuple[int, int, int, int]] = None,
622
+ ) -> PIL.Image.Image:
623
+ """
624
+ overlay the inpaint output to the original image
625
+ """
626
+
627
+ width, height = image.width, image.height
628
+
629
+ init_image = self.resize(init_image, width=width, height=height)
630
+ mask = self.resize(mask, width=width, height=height)
631
+
632
+ init_image_masked = PIL.Image.new("RGBa", (width, height))
633
+ init_image_masked.paste(init_image.convert("RGBA").convert("RGBa"), mask=ImageOps.invert(mask.convert("L")))
634
+ init_image_masked = init_image_masked.convert("RGBA")
635
+
636
+ if crop_coords is not None:
637
+ x, y, w, h = crop_coords
638
+ base_image = PIL.Image.new("RGBA", (width, height))
639
+ image = self.resize(image, height=h, width=w, resize_mode="crop")
640
+ base_image.paste(image, (x, y))
641
+ image = base_image.convert("RGB")
642
+
643
+ image = image.convert("RGBA")
644
+ image.alpha_composite(init_image_masked)
645
+ image = image.convert("RGB")
646
+
647
+ return image
648
+
409
649
 
410
650
  class VaeImageProcessorLDM3D(VaeImageProcessor):
411
651
  """