diffusers 0.29.0__py3-none-any.whl → 0.29.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +7 -1
- diffusers/loaders/single_file.py +12 -0
- diffusers/loaders/single_file_model.py +10 -8
- diffusers/loaders/single_file_utils.py +33 -23
- diffusers/models/__init__.py +2 -0
- diffusers/models/controlnet_sd3.py +418 -0
- diffusers/models/modeling_utils.py +10 -3
- diffusers/models/transformers/transformer_sd3.py +16 -7
- diffusers/pipelines/__init__.py +9 -0
- diffusers/pipelines/auto_pipeline.py +8 -0
- diffusers/pipelines/controlnet_sd3/__init__.py +53 -0
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1062 -0
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +23 -5
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +23 -5
- diffusers/utils/dummy_pt_objects.py +30 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +15 -0
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/METADATA +44 -44
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/RECORD +22 -19
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/WHEEL +1 -1
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/LICENSE +0 -0
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.29.0.dist-info → diffusers-0.29.1.dist-info}/top_level.txt +0 -0
@@ -462,7 +462,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|
462
462
|
device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
|
463
463
|
A map that specifies where each submodule should go. It doesn't need to be defined for each
|
464
464
|
parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the
|
465
|
-
same device.
|
465
|
+
same device. Defaults to `None`, meaning that the model will be loaded on CPU.
|
466
466
|
|
467
467
|
Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For
|
468
468
|
more information about each option see [designing a device
|
@@ -774,7 +774,12 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|
774
774
|
else: # else let accelerate handle loading and dispatching.
|
775
775
|
# Load weights and dispatch according to the device_map
|
776
776
|
# by default the device_map is None and the weights are loaded on the CPU
|
777
|
+
force_hook = True
|
777
778
|
device_map = _determine_device_map(model, device_map, max_memory, torch_dtype)
|
779
|
+
if device_map is None and is_sharded:
|
780
|
+
# we load the parameters on the cpu
|
781
|
+
device_map = {"": "cpu"}
|
782
|
+
force_hook = False
|
778
783
|
try:
|
779
784
|
accelerate.load_checkpoint_and_dispatch(
|
780
785
|
model,
|
@@ -784,7 +789,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|
784
789
|
offload_folder=offload_folder,
|
785
790
|
offload_state_dict=offload_state_dict,
|
786
791
|
dtype=torch_dtype,
|
787
|
-
force_hooks=
|
792
|
+
force_hooks=force_hook,
|
788
793
|
strict=True,
|
789
794
|
)
|
790
795
|
except AttributeError as e:
|
@@ -808,12 +813,14 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
|
|
808
813
|
model._temp_convert_self_to_deprecated_attention_blocks()
|
809
814
|
accelerate.load_checkpoint_and_dispatch(
|
810
815
|
model,
|
811
|
-
model_file,
|
816
|
+
model_file if not is_sharded else sharded_ckpt_cached_folder,
|
812
817
|
device_map,
|
813
818
|
max_memory=max_memory,
|
814
819
|
offload_folder=offload_folder,
|
815
820
|
offload_state_dict=offload_state_dict,
|
816
821
|
dtype=torch_dtype,
|
822
|
+
force_hook=force_hook,
|
823
|
+
strict=True,
|
817
824
|
)
|
818
825
|
model._undo_temp_convert_self_to_deprecated_attention_blocks()
|
819
826
|
else:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright 2024 Stability AI and The
|
1
|
+
# Copyright 2024 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -13,7 +13,7 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
|
16
|
-
from typing import Any, Dict, Optional, Union
|
16
|
+
from typing import Any, Dict, List, Optional, Union
|
17
17
|
|
18
18
|
import torch
|
19
19
|
import torch.nn as nn
|
@@ -245,6 +245,7 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
245
245
|
encoder_hidden_states: torch.FloatTensor = None,
|
246
246
|
pooled_projections: torch.FloatTensor = None,
|
247
247
|
timestep: torch.LongTensor = None,
|
248
|
+
block_controlnet_hidden_states: List = None,
|
248
249
|
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
249
250
|
return_dict: bool = True,
|
250
251
|
) -> Union[torch.FloatTensor, Transformer2DModelOutput]:
|
@@ -260,6 +261,8 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
260
261
|
from the embeddings of input conditions.
|
261
262
|
timestep ( `torch.LongTensor`):
|
262
263
|
Used to indicate denoising step.
|
264
|
+
block_controlnet_hidden_states: (`list` of `torch.Tensor`):
|
265
|
+
A list of tensors that if specified are added to the residuals of transformer blocks.
|
263
266
|
joint_attention_kwargs (`dict`, *optional*):
|
264
267
|
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
265
268
|
`self.processor` in
|
@@ -282,9 +285,10 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
282
285
|
# weight the lora layers by setting `lora_scale` for each PEFT layer
|
283
286
|
scale_lora_layers(self, lora_scale)
|
284
287
|
else:
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
+
if joint_attention_kwargs is not None and joint_attention_kwargs.get("scale", None) is not None:
|
289
|
+
logger.warning(
|
290
|
+
"Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
|
291
|
+
)
|
288
292
|
|
289
293
|
height, width = hidden_states.shape[-2:]
|
290
294
|
|
@@ -292,7 +296,7 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
292
296
|
temb = self.time_text_embed(timestep, pooled_projections)
|
293
297
|
encoder_hidden_states = self.context_embedder(encoder_hidden_states)
|
294
298
|
|
295
|
-
for block in self.transformer_blocks:
|
299
|
+
for index_block, block in enumerate(self.transformer_blocks):
|
296
300
|
if self.training and self.gradient_checkpointing:
|
297
301
|
|
298
302
|
def create_custom_forward(module, return_dict=None):
|
@@ -305,7 +309,7 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
305
309
|
return custom_forward
|
306
310
|
|
307
311
|
ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
|
308
|
-
hidden_states = torch.utils.checkpoint.checkpoint(
|
312
|
+
encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
|
309
313
|
create_custom_forward(block),
|
310
314
|
hidden_states,
|
311
315
|
encoder_hidden_states,
|
@@ -318,6 +322,11 @@ class SD3Transformer2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin, FromOrigi
|
|
318
322
|
hidden_states=hidden_states, encoder_hidden_states=encoder_hidden_states, temb=temb
|
319
323
|
)
|
320
324
|
|
325
|
+
# controlnet residual
|
326
|
+
if block_controlnet_hidden_states is not None and block.context_pre_only is False:
|
327
|
+
interval_control = len(self.transformer_blocks) // len(block_controlnet_hidden_states)
|
328
|
+
hidden_states = hidden_states + block_controlnet_hidden_states[index_block // interval_control]
|
329
|
+
|
321
330
|
hidden_states = self.norm_out(hidden_states, temb)
|
322
331
|
hidden_states = self.proj_out(hidden_states)
|
323
332
|
|
diffusers/pipelines/__init__.py
CHANGED
@@ -20,6 +20,7 @@ from ..utils import (
|
|
20
20
|
_dummy_objects = {}
|
21
21
|
_import_structure = {
|
22
22
|
"controlnet": [],
|
23
|
+
"controlnet_sd3": [],
|
23
24
|
"controlnet_xs": [],
|
24
25
|
"deprecated": [],
|
25
26
|
"latent_diffusion": [],
|
@@ -142,6 +143,11 @@ else:
|
|
142
143
|
"StableDiffusionXLControlNetXSPipeline",
|
143
144
|
]
|
144
145
|
)
|
146
|
+
_import_structure["controlnet_sd3"].extend(
|
147
|
+
[
|
148
|
+
"StableDiffusion3ControlNetPipeline",
|
149
|
+
]
|
150
|
+
)
|
145
151
|
_import_structure["deepfloyd_if"] = [
|
146
152
|
"IFImg2ImgPipeline",
|
147
153
|
"IFImg2ImgSuperResolutionPipeline",
|
@@ -394,6 +400,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
394
400
|
StableDiffusionXLControlNetInpaintPipeline,
|
395
401
|
StableDiffusionXLControlNetPipeline,
|
396
402
|
)
|
403
|
+
from .controlnet_sd3 import (
|
404
|
+
StableDiffusion3ControlNetPipeline,
|
405
|
+
)
|
397
406
|
from .controlnet_xs import (
|
398
407
|
StableDiffusionControlNetXSPipeline,
|
399
408
|
StableDiffusionXLControlNetXSPipeline,
|
@@ -27,6 +27,7 @@ from .controlnet import (
|
|
27
27
|
StableDiffusionXLControlNetPipeline,
|
28
28
|
)
|
29
29
|
from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
|
30
|
+
from .hunyuandit import HunyuanDiTPipeline
|
30
31
|
from .kandinsky import (
|
31
32
|
KandinskyCombinedPipeline,
|
32
33
|
KandinskyImg2ImgCombinedPipeline,
|
@@ -52,6 +53,10 @@ from .stable_diffusion import (
|
|
52
53
|
StableDiffusionInpaintPipeline,
|
53
54
|
StableDiffusionPipeline,
|
54
55
|
)
|
56
|
+
from .stable_diffusion_3 import (
|
57
|
+
StableDiffusion3Img2ImgPipeline,
|
58
|
+
StableDiffusion3Pipeline,
|
59
|
+
)
|
55
60
|
from .stable_diffusion_xl import (
|
56
61
|
StableDiffusionXLImg2ImgPipeline,
|
57
62
|
StableDiffusionXLInpaintPipeline,
|
@@ -64,7 +69,9 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
64
69
|
[
|
65
70
|
("stable-diffusion", StableDiffusionPipeline),
|
66
71
|
("stable-diffusion-xl", StableDiffusionXLPipeline),
|
72
|
+
("stable-diffusion-3", StableDiffusion3Pipeline),
|
67
73
|
("if", IFPipeline),
|
74
|
+
("hunyuan", HunyuanDiTPipeline),
|
68
75
|
("kandinsky", KandinskyCombinedPipeline),
|
69
76
|
("kandinsky22", KandinskyV22CombinedPipeline),
|
70
77
|
("kandinsky3", Kandinsky3Pipeline),
|
@@ -82,6 +89,7 @@ AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
82
89
|
[
|
83
90
|
("stable-diffusion", StableDiffusionImg2ImgPipeline),
|
84
91
|
("stable-diffusion-xl", StableDiffusionXLImg2ImgPipeline),
|
92
|
+
("stable-diffusion-3", StableDiffusion3Img2ImgPipeline),
|
85
93
|
("if", IFImg2ImgPipeline),
|
86
94
|
("kandinsky", KandinskyImg2ImgCombinedPipeline),
|
87
95
|
("kandinsky22", KandinskyV22Img2ImgCombinedPipeline),
|
@@ -0,0 +1,53 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_flax_available,
|
9
|
+
is_torch_available,
|
10
|
+
is_transformers_available,
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
_dummy_objects = {}
|
15
|
+
_import_structure = {}
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_stable_diffusion_3_controlnet"] = ["StableDiffusion3ControlNetPipeline"]
|
26
|
+
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
|
32
|
+
except OptionalDependencyNotAvailable:
|
33
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
34
|
+
else:
|
35
|
+
from .pipeline_stable_diffusion_3_controlnet import StableDiffusion3ControlNetPipeline
|
36
|
+
|
37
|
+
try:
|
38
|
+
if not (is_transformers_available() and is_flax_available()):
|
39
|
+
raise OptionalDependencyNotAvailable()
|
40
|
+
except OptionalDependencyNotAvailable:
|
41
|
+
from ...utils.dummy_flax_and_transformers_objects import * # noqa F403
|
42
|
+
|
43
|
+
else:
|
44
|
+
import sys
|
45
|
+
|
46
|
+
sys.modules[__name__] = _LazyModule(
|
47
|
+
__name__,
|
48
|
+
globals()["__file__"],
|
49
|
+
_import_structure,
|
50
|
+
module_spec=__spec__,
|
51
|
+
)
|
52
|
+
for name, value in _dummy_objects.items():
|
53
|
+
setattr(sys.modules[__name__], name, value)
|