diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. diffusers/__init__.py +26 -1
  2. diffusers/configuration_utils.py +34 -29
  3. diffusers/dependency_versions_table.py +4 -0
  4. diffusers/image_processor.py +125 -12
  5. diffusers/loaders.py +169 -203
  6. diffusers/models/attention.py +24 -1
  7. diffusers/models/attention_flax.py +10 -5
  8. diffusers/models/attention_processor.py +3 -0
  9. diffusers/models/autoencoder_kl.py +114 -33
  10. diffusers/models/controlnet.py +131 -14
  11. diffusers/models/controlnet_flax.py +37 -26
  12. diffusers/models/cross_attention.py +17 -17
  13. diffusers/models/embeddings.py +67 -0
  14. diffusers/models/modeling_flax_utils.py +64 -56
  15. diffusers/models/modeling_utils.py +193 -104
  16. diffusers/models/prior_transformer.py +207 -37
  17. diffusers/models/resnet.py +26 -26
  18. diffusers/models/transformer_2d.py +36 -41
  19. diffusers/models/transformer_temporal.py +24 -21
  20. diffusers/models/unet_1d.py +31 -25
  21. diffusers/models/unet_2d.py +43 -30
  22. diffusers/models/unet_2d_blocks.py +210 -89
  23. diffusers/models/unet_2d_blocks_flax.py +12 -12
  24. diffusers/models/unet_2d_condition.py +172 -64
  25. diffusers/models/unet_2d_condition_flax.py +38 -24
  26. diffusers/models/unet_3d_blocks.py +34 -31
  27. diffusers/models/unet_3d_condition.py +101 -34
  28. diffusers/models/vae.py +5 -5
  29. diffusers/models/vae_flax.py +37 -34
  30. diffusers/models/vq_model.py +23 -14
  31. diffusers/pipelines/__init__.py +24 -1
  32. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
  33. diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
  34. diffusers/pipelines/consistency_models/__init__.py +1 -0
  35. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
  36. diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
  37. diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
  38. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
  39. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
  40. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  41. diffusers/pipelines/kandinsky/__init__.py +1 -1
  42. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
  43. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
  44. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
  45. diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
  46. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
  47. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
  48. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
  49. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
  50. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
  51. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
  52. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
  53. diffusers/pipelines/pipeline_flax_utils.py +2 -2
  54. diffusers/pipelines/pipeline_utils.py +124 -146
  55. diffusers/pipelines/shap_e/__init__.py +27 -0
  56. diffusers/pipelines/shap_e/camera.py +147 -0
  57. diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
  58. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
  59. diffusers/pipelines/shap_e/renderer.py +709 -0
  60. diffusers/pipelines/stable_diffusion/__init__.py +2 -0
  61. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
  62. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
  63. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
  64. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
  65. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  66. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
  67. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  68. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
  69. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
  70. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
  71. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
  72. diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
  73. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
  74. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
  75. diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
  76. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
  77. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
  78. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
  79. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
  80. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  81. diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
  82. diffusers/schedulers/__init__.py +3 -0
  83. diffusers/schedulers/scheduling_consistency_models.py +380 -0
  84. diffusers/schedulers/scheduling_ddim.py +28 -6
  85. diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
  86. diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
  87. diffusers/schedulers/scheduling_ddpm.py +53 -7
  88. diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
  89. diffusers/schedulers/scheduling_deis_multistep.py +66 -11
  90. diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
  91. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
  92. diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
  93. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
  94. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
  95. diffusers/schedulers/scheduling_euler_discrete.py +58 -8
  96. diffusers/schedulers/scheduling_heun_discrete.py +89 -14
  97. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
  98. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
  99. diffusers/schedulers/scheduling_lms_discrete.py +57 -8
  100. diffusers/schedulers/scheduling_pndm.py +46 -10
  101. diffusers/schedulers/scheduling_repaint.py +19 -4
  102. diffusers/schedulers/scheduling_sde_ve.py +5 -1
  103. diffusers/schedulers/scheduling_unclip.py +43 -4
  104. diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
  105. diffusers/training_utils.py +1 -1
  106. diffusers/utils/__init__.py +2 -1
  107. diffusers/utils/dummy_pt_objects.py +60 -0
  108. diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
  109. diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
  110. diffusers/utils/hub_utils.py +1 -1
  111. diffusers/utils/import_utils.py +20 -3
  112. diffusers/utils/logging.py +15 -18
  113. diffusers/utils/outputs.py +3 -3
  114. diffusers/utils/testing_utils.py +15 -0
  115. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
  116. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
  117. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
  118. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
  119. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
  120. {diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0
diffusers/loaders.py CHANGED
@@ -115,63 +115,50 @@ class UNet2DConditionLoadersMixin:
115
115
 
116
116
  def load_attn_procs(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
117
117
  r"""
118
- Load pretrained attention processor layers into `UNet2DConditionModel`. Attention processor layers have to be
118
+ Load pretrained attention processor layers into [`UNet2DConditionModel`]. Attention processor layers have to be
119
119
  defined in
120
120
  [`cross_attention.py`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py)
121
121
  and be a `torch.nn.Module` class.
122
122
 
123
- <Tip warning={true}>
124
-
125
- This function is experimental and might change in the future.
126
-
127
- </Tip>
128
-
129
123
  Parameters:
130
124
  pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
131
125
  Can be either:
132
126
 
133
- - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
134
- Valid model ids should have an organization name, like `google/ddpm-celebahq-256`.
135
- - A path to a *directory* containing model weights saved using [`~ModelMixin.save_config`], e.g.,
136
- `./my_model_directory/`.
127
+ - A string, the model id (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
128
+ the Hub.
129
+ - A path to a directory (for example `./my_model_directory`) containing the model weights saved
130
+ with [`ModelMixin.save_pretrained`].
137
131
  - A [torch state
138
132
  dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
139
133
 
140
134
  cache_dir (`Union[str, os.PathLike]`, *optional*):
141
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
142
- standard cache should not be used.
135
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
136
+ is not used.
143
137
  force_download (`bool`, *optional*, defaults to `False`):
144
138
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
145
139
  cached versions if they exist.
146
140
  resume_download (`bool`, *optional*, defaults to `False`):
147
- Whether or not to delete incompletely received files. Will attempt to resume the download if such a
148
- file exists.
141
+ Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
142
+ incompletely downloaded files are deleted.
149
143
  proxies (`Dict[str, str]`, *optional*):
150
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
144
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
151
145
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
152
- local_files_only(`bool`, *optional*, defaults to `False`):
153
- Whether or not to only look at local files (i.e., do not try to download the model).
146
+ local_files_only (`bool`, *optional*, defaults to `False`):
147
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
148
+ won't be downloaded from the Hub.
154
149
  use_auth_token (`str` or *bool*, *optional*):
155
- The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
156
- when running `diffusers-cli login` (stored in `~/.huggingface`).
150
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
151
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
157
152
  revision (`str`, *optional*, defaults to `"main"`):
158
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
159
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
160
- identifier allowed by git.
153
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
154
+ allowed by Git.
161
155
  subfolder (`str`, *optional*, defaults to `""`):
162
- In case the relevant files are located inside a subfolder of the model repo (either remote in
163
- huggingface.co or downloaded locally), you can specify the folder name here.
156
+ The subfolder location of a model file within a larger model repository on the Hub or locally.
164
157
  mirror (`str`, *optional*):
165
- Mirror source to accelerate downloads in China. If you are from China and have an accessibility
166
- problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
167
- Please refer to the mirror site for more information.
168
-
169
- <Tip>
170
-
171
- It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
172
- models](https://huggingface.co/docs/hub/models-gated#gated-models).
158
+ Mirror source to resolve accessibility issues if you’re downloading a model in China. We do not
159
+ guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
160
+ information.
173
161
 
174
- </Tip>
175
162
  """
176
163
 
177
164
  cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
@@ -190,7 +177,7 @@ class UNet2DConditionLoadersMixin:
190
177
 
191
178
  if use_safetensors and not is_safetensors_available():
192
179
  raise ValueError(
193
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
180
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
194
181
  )
195
182
 
196
183
  allow_pickle = False
@@ -349,24 +336,25 @@ class UNet2DConditionLoadersMixin:
349
336
  **kwargs,
350
337
  ):
351
338
  r"""
352
- Save an attention processor to a directory, so that it can be re-loaded using the
339
+ Save an attention processor to a directory so that it can be reloaded using the
353
340
  [`~loaders.UNet2DConditionLoadersMixin.load_attn_procs`] method.
354
341
 
355
342
  Arguments:
356
343
  save_directory (`str` or `os.PathLike`):
357
- Directory to which to save. Will be created if it doesn't exist.
344
+ Directory to save an attention processor to. Will be created if it doesn't exist.
358
345
  is_main_process (`bool`, *optional*, defaults to `True`):
359
- Whether the process calling this is the main process or not. Useful when in distributed training like
360
- TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
361
- the main process to avoid race conditions.
346
+ Whether the process calling this is the main process or not. Useful during distributed training and you
347
+ need to call this function on all processes. In this case, set `is_main_process=True` only on the main
348
+ process to avoid race conditions.
362
349
  save_function (`Callable`):
363
- The function to use to save the state dictionary. Useful on distributed training like TPUs when one
364
- need to replace `torch.save` by another method. Can be configured with the environment variable
350
+ The function to use to save the state dictionary. Useful during distributed training when you need to
351
+ replace `torch.save` with another method. Can be configured with the environment variable
365
352
  `DIFFUSERS_SAVE_MODE`.
353
+
366
354
  """
367
355
  weight_name = weight_name or deprecate(
368
356
  "weights_name",
369
- "0.18.0",
357
+ "0.20.0",
370
358
  "`weights_name` is deprecated, please use `weight_name` instead.",
371
359
  take_from=kwargs,
372
360
  )
@@ -418,15 +406,14 @@ class UNet2DConditionLoadersMixin:
418
406
 
419
407
  class TextualInversionLoaderMixin:
420
408
  r"""
421
- Mixin class for loading textual inversion tokens and embeddings to the tokenizer and text encoder.
409
+ Load textual inversion tokens and embeddings to the tokenizer and text encoder.
422
410
  """
423
411
 
424
412
  def maybe_convert_prompt(self, prompt: Union[str, List[str]], tokenizer: "PreTrainedTokenizer"):
425
413
  r"""
426
- Maybe convert a prompt into a "multi vector"-compatible prompt. If the prompt includes a token that corresponds
427
- to a multi-vector textual inversion embedding, this function will process the prompt so that the special token
428
- is replaced with multiple special tokens each corresponding to one of the vectors. If the prompt has no textual
429
- inversion token or a textual inversion token that is a single vector, the input prompt is simply returned.
414
+ Processes prompts that include a special token corresponding to a multi-vector textual inversion embedding to
415
+ be replaced with multiple special tokens each corresponding to one of the vectors. If the prompt has no textual
416
+ inversion token or if the textual inversion token is a single vector, the input prompt is returned.
430
417
 
431
418
  Parameters:
432
419
  prompt (`str` or list of `str`):
@@ -486,78 +473,61 @@ class TextualInversionLoaderMixin:
486
473
  **kwargs,
487
474
  ):
488
475
  r"""
489
- Load textual inversion embeddings into the text encoder of stable diffusion pipelines. Both `diffusers` and
490
- `Automatic1111` formats are supported (see example below).
491
-
492
- <Tip warning={true}>
493
-
494
- This function is experimental and might change in the future.
495
-
496
- </Tip>
476
+ Load textual inversion embeddings into the text encoder of [`StableDiffusionPipeline`] (both 🤗 Diffusers and
477
+ Automatic1111 formats are supported).
497
478
 
498
479
  Parameters:
499
480
  pretrained_model_name_or_path (`str` or `os.PathLike` or `List[str or os.PathLike]` or `Dict` or `List[Dict]`):
500
- Can be either:
481
+ Can be either one of the following or a list of them:
501
482
 
502
- - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
503
- Valid model ids should have an organization name, like
504
- `"sd-concepts-library/low-poly-hd-logos-icons"`.
505
- - A path to a *directory* containing textual inversion weights, e.g.
506
- `./my_text_inversion_directory/`.
507
- - A path to a *file* containing textual inversion weights, e.g. `./my_text_inversions.pt`.
483
+ - A string, the *model id* (for example `sd-concepts-library/low-poly-hd-logos-icons`) of a
484
+ pretrained model hosted on the Hub.
485
+ - A path to a *directory* (for example `./my_text_inversion_directory/`) containing the textual
486
+ inversion weights.
487
+ - A path to a *file* (for example `./my_text_inversions.pt`) containing textual inversion weights.
508
488
  - A [torch state
509
489
  dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
510
490
 
511
- Or a list of those elements.
512
491
  token (`str` or `List[str]`, *optional*):
513
492
  Override the token to use for the textual inversion weights. If `pretrained_model_name_or_path` is a
514
493
  list, then `token` must also be a list of equal length.
515
494
  weight_name (`str`, *optional*):
516
- Name of a custom weight file. This should be used in two cases:
495
+ Name of a custom weight file. This should be used when:
517
496
 
518
- - The saved textual inversion file is in `diffusers` format, but was saved under a specific weight
519
- name, such as `text_inv.bin`.
520
- - The saved textual inversion file is in the "Automatic1111" form.
497
+ - The saved textual inversion file is in 🤗 Diffusers format, but was saved under a specific weight
498
+ name such as `text_inv.bin`.
499
+ - The saved textual inversion file is in the Automatic1111 format.
521
500
  cache_dir (`Union[str, os.PathLike]`, *optional*):
522
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
523
- standard cache should not be used.
501
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
502
+ is not used.
524
503
  force_download (`bool`, *optional*, defaults to `False`):
525
504
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
526
505
  cached versions if they exist.
527
506
  resume_download (`bool`, *optional*, defaults to `False`):
528
- Whether or not to delete incompletely received files. Will attempt to resume the download if such a
529
- file exists.
507
+ Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
508
+ incompletely downloaded files are deleted.
530
509
  proxies (`Dict[str, str]`, *optional*):
531
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
510
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
532
511
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
533
- local_files_only(`bool`, *optional*, defaults to `False`):
534
- Whether or not to only look at local files (i.e., do not try to download the model).
512
+ local_files_only (`bool`, *optional*, defaults to `False`):
513
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
514
+ won't be downloaded from the Hub.
535
515
  use_auth_token (`str` or *bool*, *optional*):
536
- The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
537
- when running `diffusers-cli login` (stored in `~/.huggingface`).
516
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
517
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
538
518
  revision (`str`, *optional*, defaults to `"main"`):
539
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
540
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
541
- identifier allowed by git.
519
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
520
+ allowed by Git.
542
521
  subfolder (`str`, *optional*, defaults to `""`):
543
- In case the relevant files are located inside a subfolder of the model repo (either remote in
544
- huggingface.co or downloaded locally), you can specify the folder name here.
545
-
522
+ The subfolder location of a model file within a larger model repository on the Hub or locally.
546
523
  mirror (`str`, *optional*):
547
- Mirror source to accelerate downloads in China. If you are from China and have an accessibility
548
- problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
549
- Please refer to the mirror site for more information.
550
-
551
- <Tip>
552
-
553
- It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
554
- models](https://huggingface.co/docs/hub/models-gated#gated-models).
555
-
556
- </Tip>
524
+ Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
525
+ guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
526
+ information.
557
527
 
558
528
  Example:
559
529
 
560
- To load a textual inversion embedding vector in `diffusers` format:
530
+ To load a textual inversion embedding vector in 🤗 Diffusers format:
561
531
 
562
532
  ```py
563
533
  from diffusers import StableDiffusionPipeline
@@ -574,8 +544,9 @@ class TextualInversionLoaderMixin:
574
544
  image.save("cat-backpack.png")
575
545
  ```
576
546
 
577
- To load a textual inversion embedding vector in Automatic1111 format, make sure to first download the vector,
578
- e.g. from [civitAI](https://civitai.com/models/3036?modelVersionId=9857) and then load the vector locally:
547
+ To load a textual inversion embedding vector in Automatic1111 format, make sure to download the vector first
548
+ (for example from [civitAI](https://civitai.com/models/3036?modelVersionId=9857)) and then load the vector
549
+ locally:
579
550
 
580
551
  ```py
581
552
  from diffusers import StableDiffusionPipeline
@@ -618,7 +589,7 @@ class TextualInversionLoaderMixin:
618
589
 
619
590
  if use_safetensors and not is_safetensors_available():
620
591
  raise ValueError(
621
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
592
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
622
593
  )
623
594
 
624
595
  allow_pickle = False
@@ -766,78 +737,56 @@ class TextualInversionLoaderMixin:
766
737
 
767
738
  class LoraLoaderMixin:
768
739
  r"""
769
- Utility class for handling the loading LoRA layers into UNet (of class [`UNet2DConditionModel`]) and Text Encoder
770
- (of class [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel)).
771
-
772
- <Tip warning={true}>
773
-
774
- This function is experimental and might change in the future.
775
-
776
- </Tip>
740
+ Load LoRA layers into [`UNet2DConditionModel`] and
741
+ [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
777
742
  """
778
743
  text_encoder_name = TEXT_ENCODER_NAME
779
744
  unet_name = UNET_NAME
780
745
 
781
746
  def load_lora_weights(self, pretrained_model_name_or_path_or_dict: Union[str, Dict[str, torch.Tensor]], **kwargs):
782
747
  r"""
783
- Load pretrained attention processor layers (such as LoRA) into [`UNet2DConditionModel`] and
784
- [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel)).
785
-
786
- <Tip warning={true}>
787
-
788
- We support loading A1111 formatted LoRA checkpoints in a limited capacity.
789
-
790
- This function is experimental and might change in the future.
791
-
792
- </Tip>
748
+ Load pretrained LoRA attention processor layers into [`UNet2DConditionModel`] and
749
+ [`CLIPTextModel`](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel).
793
750
 
794
751
  Parameters:
795
752
  pretrained_model_name_or_path_or_dict (`str` or `os.PathLike` or `dict`):
796
753
  Can be either:
797
754
 
798
- - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
799
- Valid model ids should have an organization name, like `google/ddpm-celebahq-256`.
800
- - A path to a *directory* containing model weights saved using [`~ModelMixin.save_config`], e.g.,
801
- `./my_model_directory/`.
755
+ - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
756
+ the Hub.
757
+ - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
758
+ with [`ModelMixin.save_pretrained`].
802
759
  - A [torch state
803
760
  dict](https://pytorch.org/tutorials/beginner/saving_loading_models.html#what-is-a-state-dict).
804
761
 
805
762
  cache_dir (`Union[str, os.PathLike]`, *optional*):
806
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
807
- standard cache should not be used.
763
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
764
+ is not used.
808
765
  force_download (`bool`, *optional*, defaults to `False`):
809
766
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
810
767
  cached versions if they exist.
811
768
  resume_download (`bool`, *optional*, defaults to `False`):
812
- Whether or not to delete incompletely received files. Will attempt to resume the download if such a
813
- file exists.
769
+ Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
770
+ incompletely downloaded files are deleted.
814
771
  proxies (`Dict[str, str]`, *optional*):
815
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
772
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
816
773
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
817
- local_files_only(`bool`, *optional*, defaults to `False`):
818
- Whether or not to only look at local files (i.e., do not try to download the model).
774
+ local_files_only (`bool`, *optional*, defaults to `False`):
775
+ Whether to only load local model weights and configuration files or not. If set to `True`, the model
776
+ won't be downloaded from the Hub.
819
777
  use_auth_token (`str` or *bool*, *optional*):
820
- The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
821
- when running `diffusers-cli login` (stored in `~/.huggingface`).
778
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
779
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
822
780
  revision (`str`, *optional*, defaults to `"main"`):
823
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
824
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
825
- identifier allowed by git.
781
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
782
+ allowed by Git.
826
783
  subfolder (`str`, *optional*, defaults to `""`):
827
- In case the relevant files are located inside a subfolder of the model repo (either remote in
828
- huggingface.co or downloaded locally), you can specify the folder name here.
829
-
784
+ The subfolder location of a model file within a larger model repository on the Hub or locally.
830
785
  mirror (`str`, *optional*):
831
- Mirror source to accelerate downloads in China. If you are from China and have an accessibility
832
- problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
833
- Please refer to the mirror site for more information.
834
-
835
- <Tip>
836
-
837
- It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
838
- models](https://huggingface.co/docs/hub/models-gated#gated-models).
786
+ Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
787
+ guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
788
+ information.
839
789
 
840
- </Tip>
841
790
  """
842
791
  # Load the main state dict first which has the LoRA layers for either of
843
792
  # UNet and text encoder or both.
@@ -857,7 +806,7 @@ class LoraLoaderMixin:
857
806
 
858
807
  if use_safetensors and not is_safetensors_available():
859
808
  raise ValueError(
860
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
809
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
861
810
  )
862
811
 
863
812
  allow_pickle = False
@@ -1062,7 +1011,7 @@ class LoraLoaderMixin:
1062
1011
  proxies (`Dict[str, str]`, *optional*):
1063
1012
  A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
1064
1013
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
1065
- local_files_only(`bool`, *optional*, defaults to `False`):
1014
+ local_files_only (`bool`, *optional*, defaults to `False`):
1066
1015
  Whether or not to only look at local files (i.e., do not try to download the model).
1067
1016
  use_auth_token (`str` or *bool*, *optional*):
1068
1017
  The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
@@ -1105,7 +1054,7 @@ class LoraLoaderMixin:
1105
1054
 
1106
1055
  if use_safetensors and not is_safetensors_available():
1107
1056
  raise ValueError(
1108
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
1057
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
1109
1058
  )
1110
1059
 
1111
1060
  allow_pickle = False
@@ -1210,26 +1159,23 @@ class LoraLoaderMixin:
1210
1159
  safe_serialization: bool = False,
1211
1160
  ):
1212
1161
  r"""
1213
- Save the LoRA parameters corresponding to the UNet and the text encoder.
1162
+ Save the LoRA parameters corresponding to the UNet and text encoder.
1214
1163
 
1215
1164
  Arguments:
1216
1165
  save_directory (`str` or `os.PathLike`):
1217
- Directory to which to save. Will be created if it doesn't exist.
1166
+ Directory to save LoRA parameters to. Will be created if it doesn't exist.
1218
1167
  unet_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
1219
- State dict of the LoRA layers corresponding to the UNet. Specifying this helps to make the
1220
- serialization process easier and cleaner. Values can be both LoRA torch.nn.Modules layers or torch
1221
- weights.
1168
+ State dict of the LoRA layers corresponding to the UNet.
1222
1169
  text_encoder_lora_layers (`Dict[str, torch.nn.Module] or `Dict[str, torch.Tensor]`):
1223
- State dict of the LoRA layers corresponding to the `text_encoder`. Since the `text_encoder` comes from
1224
- `transformers`, we cannot rejig it. That is why we have to explicitly pass the text encoder LoRA state
1225
- dict. Values can be both LoRA torch.nn.Modules layers or torch weights.
1170
+ State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
1171
+ encoder LoRA state dict because it comes 🤗 Transformers.
1226
1172
  is_main_process (`bool`, *optional*, defaults to `True`):
1227
- Whether the process calling this is the main process or not. Useful when in distributed training like
1228
- TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
1229
- the main process to avoid race conditions.
1173
+ Whether the process calling this is the main process or not. Useful during distributed training and you
1174
+ need to call this function on all processes. In this case, set `is_main_process=True` only on the main
1175
+ process to avoid race conditions.
1230
1176
  save_function (`Callable`):
1231
- The function to use to save the state dictionary. Useful on distributed training like TPUs when one
1232
- need to replace `torch.save` by another method. Can be configured with the environment variable
1177
+ The function to use to save the state dictionary. Useful during distributed training when you need to
1178
+ replace `torch.save` with another method. Can be configured with the environment variable
1233
1179
  `DIFFUSERS_SAVE_MODE`.
1234
1180
  """
1235
1181
  if os.path.isfile(save_directory):
@@ -1330,74 +1276,90 @@ class LoraLoaderMixin:
1330
1276
  return new_state_dict, network_alpha
1331
1277
 
1332
1278
 
1333
- class FromCkptMixin:
1334
- """This helper class allows to directly load .ckpt stable diffusion file_extension
1335
- into the respective classes."""
1279
+ class FromSingleFileMixin:
1280
+ """
1281
+ Load model weights saved in the `.ckpt` format into a [`DiffusionPipeline`].
1282
+ """
1336
1283
 
1337
1284
  @classmethod
1338
- def from_ckpt(cls, pretrained_model_link_or_path, **kwargs):
1339
- r"""
1340
- Instantiate a PyTorch diffusion pipeline from pre-trained pipeline weights saved in the original .ckpt format.
1285
+ def from_ckpt(cls, *args, **kwargs):
1286
+ deprecation_message = "The function `from_ckpt` is deprecated in favor of `from_single_file` and will be removed in diffusers v.0.21. Please make sure to use `StableDiffusionPipeline.from_single_file(...)` instead."
1287
+ deprecate("from_ckpt", "0.21.0", deprecation_message, standard_warn=False)
1288
+ return cls.from_single_file(*args, **kwargs)
1341
1289
 
1342
- The pipeline is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated).
1290
+ @classmethod
1291
+ def from_single_file(cls, pretrained_model_link_or_path, **kwargs):
1292
+ r"""
1293
+ Instantiate a [`DiffusionPipeline`] from pretrained pipeline weights saved in the `.ckpt` format. The pipeline
1294
+ is set in evaluation mode (`model.eval()`) by default.
1343
1295
 
1344
1296
  Parameters:
1345
1297
  pretrained_model_link_or_path (`str` or `os.PathLike`, *optional*):
1346
1298
  Can be either:
1347
- - A link to the .ckpt file on the Hub. Should be in the format
1348
- `"https://huggingface.co/<repo_id>/blob/main/<path_to_file>"`
1299
+ - A link to the `.ckpt` file (for example
1300
+ `"https://huggingface.co/<repo_id>/blob/main/<path_to_file>.ckpt"`) on the Hub.
1349
1301
  - A path to a *file* containing all pipeline weights.
1350
1302
  torch_dtype (`str` or `torch.dtype`, *optional*):
1351
- Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype
1352
- will be automatically derived from the model's weights.
1303
+ Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
1304
+ dtype is automatically derived from the model's weights.
1353
1305
  force_download (`bool`, *optional*, defaults to `False`):
1354
1306
  Whether or not to force the (re-)download of the model weights and configuration files, overriding the
1355
1307
  cached versions if they exist.
1356
1308
  cache_dir (`Union[str, os.PathLike]`, *optional*):
1357
- Path to a directory in which a downloaded pretrained model configuration should be cached if the
1358
- standard cache should not be used.
1309
+ Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
1310
+ is not used.
1359
1311
  resume_download (`bool`, *optional*, defaults to `False`):
1360
- Whether or not to delete incompletely received files. Will attempt to resume the download if such a
1361
- file exists.
1312
+ Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
1313
+ incompletely downloaded files are deleted.
1362
1314
  proxies (`Dict[str, str]`, *optional*):
1363
- A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
1315
+ A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
1364
1316
  'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
1365
1317
  local_files_only (`bool`, *optional*, defaults to `False`):
1366
- Whether or not to only look at local files (i.e., do not try to download the model).
1318
+ Whether to only load local model weights and configuration files or not. If set to True, the model
1319
+ won't be downloaded from the Hub.
1367
1320
  use_auth_token (`str` or *bool*, *optional*):
1368
- The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
1369
- when running `huggingface-cli login` (stored in `~/.huggingface`).
1321
+ The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
1322
+ `diffusers-cli login` (stored in `~/.huggingface`) is used.
1370
1323
  revision (`str`, *optional*, defaults to `"main"`):
1371
- The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
1372
- git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
1373
- identifier allowed by git.
1324
+ The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
1325
+ allowed by Git.
1374
1326
  use_safetensors (`bool`, *optional*, defaults to `None`):
1375
- If set to `None`, the pipeline will load the `safetensors` weights if they're available **and** if the
1376
- `safetensors` library is installed. If set to `True`, the pipeline will forcibly load the models from
1377
- `safetensors` weights. If set to `False` the pipeline will *not* use `safetensors`.
1378
- extract_ema (`bool`, *optional*, defaults to `False`): Only relevant for
1379
- checkpoints that have both EMA and non-EMA weights. Whether to extract the EMA weights or not. Defaults
1380
- to `False`. Pass `True` to extract the EMA weights. EMA weights usually yield higher quality images for
1381
- inference. Non-EMA weights are usually better to continue fine-tuning.
1327
+ If set to `None`, the safetensors weights are downloaded if they're available **and** if the
1328
+ safetensors library is installed. If set to `True`, the model is forcibly loaded from safetensors
1329
+ weights. If set to `False`, safetensors weights are not loaded.
1330
+ extract_ema (`bool`, *optional*, defaults to `False`):
1331
+ Whether to extract the EMA weights or not. Pass `True` to extract the EMA weights which usually yield
1332
+ higher quality images for inference. Non-EMA weights are usually better to continue finetuning.
1382
1333
  upcast_attention (`bool`, *optional*, defaults to `None`):
1383
- Whether the attention computation should always be upcasted. This is necessary when running stable
1334
+ Whether the attention computation should always be upcasted.
1384
1335
  image_size (`int`, *optional*, defaults to 512):
1385
- The image size that the model was trained on. Use 512 for Stable Diffusion v1.X and Stable Diffusion v2
1386
- Base. Use 768 for Stable Diffusion v2.
1336
+ The image size the model was trained on. Use 512 for all Stable Diffusion v1 models and the Stable
1337
+ Diffusion v2 base model. Use 768 for Stable Diffusion v2.
1387
1338
  prediction_type (`str`, *optional*):
1388
- The prediction type that the model was trained on. Use `'epsilon'` for Stable Diffusion v1.X and Stable
1389
- Diffusion v2 Base. Use `'v_prediction'` for Stable Diffusion v2.
1390
- num_in_channels (`int`, *optional*, defaults to None):
1339
+ The prediction type the model was trained on. Use `'epsilon'` for all Stable Diffusion v1 models and
1340
+ the Stable Diffusion v2 base model. Use `'v_prediction'` for Stable Diffusion v2.
1341
+ num_in_channels (`int`, *optional*, defaults to `None`):
1391
1342
  The number of input channels. If `None`, it will be automatically inferred.
1392
- scheduler_type (`str`, *optional*, defaults to 'pndm'):
1343
+ scheduler_type (`str`, *optional*, defaults to `"pndm"`):
1393
1344
  Type of scheduler to use. Should be one of `["pndm", "lms", "heun", "euler", "euler-ancestral", "dpm",
1394
1345
  "ddim"]`.
1395
1346
  load_safety_checker (`bool`, *optional*, defaults to `True`):
1396
- Whether to load the safety checker or not. Defaults to `True`.
1347
+ Whether to load the safety checker or not.
1348
+ text_encoder (`CLIPTextModel`, *optional*, defaults to `None`):
1349
+ An instance of
1350
+ [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel) to use,
1351
+ specifically the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)
1352
+ variant. If this parameter is `None`, the function will load a new instance of [CLIP] by itself, if
1353
+ needed.
1354
+ tokenizer (`CLIPTokenizer`, *optional*, defaults to `None`):
1355
+ An instance of
1356
+ [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer)
1357
+ to use. If this parameter is `None`, the function will load a new instance of [CLIPTokenizer] by
1358
+ itself, if needed.
1397
1359
  kwargs (remaining dictionary of keyword arguments, *optional*):
1398
- Can be used to overwrite load - and saveable variables - *i.e.* the pipeline components - of the
1399
- specific pipeline class. The overwritten components are then directly passed to the pipelines
1400
- `__init__` method. See example below for more information.
1360
+ Can be used to overwrite load and saveable variables (for example the pipeline components of the
1361
+ specific pipeline class). The overwritten components are directly passed to the pipelines `__init__`
1362
+ method. See example below for more information.
1401
1363
 
1402
1364
  Examples:
1403
1365
 
@@ -1405,16 +1367,16 @@ class FromCkptMixin:
1405
1367
  >>> from diffusers import StableDiffusionPipeline
1406
1368
 
1407
1369
  >>> # Download pipeline from huggingface.co and cache.
1408
- >>> pipeline = StableDiffusionPipeline.from_ckpt(
1370
+ >>> pipeline = StableDiffusionPipeline.from_single_file(
1409
1371
  ... "https://huggingface.co/WarriorMama777/OrangeMixs/blob/main/Models/AbyssOrangeMix/AbyssOrangeMix.safetensors"
1410
1372
  ... )
1411
1373
 
1412
1374
  >>> # Download pipeline from local file
1413
1375
  >>> # file is downloaded under ./v1-5-pruned-emaonly.ckpt
1414
- >>> pipeline = StableDiffusionPipeline.from_ckpt("./v1-5-pruned-emaonly")
1376
+ >>> pipeline = StableDiffusionPipeline.from_single_file("./v1-5-pruned-emaonly")
1415
1377
 
1416
1378
  >>> # Enable float16 and move to GPU
1417
- >>> pipeline = StableDiffusionPipeline.from_ckpt(
1379
+ >>> pipeline = StableDiffusionPipeline.from_single_file(
1418
1380
  ... "https://huggingface.co/runwayml/stable-diffusion-v1-5/blob/main/v1-5-pruned-emaonly.ckpt",
1419
1381
  ... torch_dtype=torch.float16,
1420
1382
  ... )
@@ -1432,12 +1394,14 @@ class FromCkptMixin:
1432
1394
  use_auth_token = kwargs.pop("use_auth_token", None)
1433
1395
  revision = kwargs.pop("revision", None)
1434
1396
  extract_ema = kwargs.pop("extract_ema", False)
1435
- image_size = kwargs.pop("image_size", 512)
1397
+ image_size = kwargs.pop("image_size", None)
1436
1398
  scheduler_type = kwargs.pop("scheduler_type", "pndm")
1437
1399
  num_in_channels = kwargs.pop("num_in_channels", None)
1438
1400
  upcast_attention = kwargs.pop("upcast_attention", None)
1439
1401
  load_safety_checker = kwargs.pop("load_safety_checker", True)
1440
1402
  prediction_type = kwargs.pop("prediction_type", None)
1403
+ text_encoder = kwargs.pop("text_encoder", None)
1404
+ tokenizer = kwargs.pop("tokenizer", None)
1441
1405
 
1442
1406
  torch_dtype = kwargs.pop("torch_dtype", None)
1443
1407
 
@@ -1518,6 +1482,8 @@ class FromCkptMixin:
1518
1482
  upcast_attention=upcast_attention,
1519
1483
  load_safety_checker=load_safety_checker,
1520
1484
  prediction_type=prediction_type,
1485
+ text_encoder=text_encoder,
1486
+ tokenizer=tokenizer,
1521
1487
  )
1522
1488
 
1523
1489
  if torch_dtype is not None: