diffsynth-engine 0.6.1.dev24__py3-none-any.whl → 0.6.1.dev26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffsynth_engine/__init__.py +6 -2
- diffsynth_engine/configs/__init__.py +10 -6
- diffsynth_engine/configs/pipeline.py +2 -18
- diffsynth_engine/models/basic/attention.py +52 -0
- diffsynth_engine/models/basic/video_sparse_attention.py +4 -1
- diffsynth_engine/pipelines/base.py +30 -2
- diffsynth_engine/pipelines/flux_image.py +2 -2
- diffsynth_engine/pipelines/qwen_image.py +4 -2
- diffsynth_engine/pipelines/wan_s2v.py +1 -1
- diffsynth_engine/pipelines/wan_video.py +8 -4
- diffsynth_engine/utils/flag.py +5 -0
- diffsynth_engine/utils/parallel.py +6 -7
- {diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/METADATA +1 -1
- {diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/RECORD +17 -17
- {diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/WHEEL +0 -0
- {diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/licenses/LICENSE +0 -0
- {diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/top_level.txt +0 -0
diffsynth_engine/__init__.py
CHANGED
|
@@ -12,11 +12,13 @@ from .configs import (
|
|
|
12
12
|
WanStateDicts,
|
|
13
13
|
QwenImageStateDicts,
|
|
14
14
|
AttnImpl,
|
|
15
|
+
SpargeAttentionParams,
|
|
16
|
+
VideoSparseAttentionParams,
|
|
17
|
+
LoraConfig,
|
|
15
18
|
ControlNetParams,
|
|
16
19
|
ControlType,
|
|
17
20
|
QwenImageControlNetParams,
|
|
18
21
|
QwenImageControlType,
|
|
19
|
-
LoraConfig,
|
|
20
22
|
)
|
|
21
23
|
from .pipelines import (
|
|
22
24
|
SDImagePipeline,
|
|
@@ -59,6 +61,9 @@ __all__ = [
|
|
|
59
61
|
"WanStateDicts",
|
|
60
62
|
"QwenImageStateDicts",
|
|
61
63
|
"AttnImpl",
|
|
64
|
+
"SpargeAttentionParams",
|
|
65
|
+
"VideoSparseAttentionParams",
|
|
66
|
+
"LoraConfig",
|
|
62
67
|
"ControlNetParams",
|
|
63
68
|
"ControlType",
|
|
64
69
|
"QwenImageControlNetParams",
|
|
@@ -79,7 +84,6 @@ __all__ = [
|
|
|
79
84
|
"FluxIPAdapterRefTool",
|
|
80
85
|
"FluxReplaceByControlTool",
|
|
81
86
|
"FluxReduxRefTool",
|
|
82
|
-
"LoraConfig",
|
|
83
87
|
"fetch_model",
|
|
84
88
|
"fetch_modelscope_model",
|
|
85
89
|
"register_fetch_modelscope_model",
|
|
@@ -17,14 +17,16 @@ from .pipeline import (
|
|
|
17
17
|
WanStateDicts,
|
|
18
18
|
WanS2VStateDicts,
|
|
19
19
|
QwenImageStateDicts,
|
|
20
|
-
LoraConfig,
|
|
21
20
|
AttnImpl,
|
|
21
|
+
SpargeAttentionParams,
|
|
22
|
+
VideoSparseAttentionParams,
|
|
23
|
+
LoraConfig,
|
|
22
24
|
)
|
|
23
25
|
from .controlnet import (
|
|
24
26
|
ControlType,
|
|
25
27
|
ControlNetParams,
|
|
26
|
-
QwenImageControlNetParams,
|
|
27
28
|
QwenImageControlType,
|
|
29
|
+
QwenImageControlNetParams,
|
|
28
30
|
)
|
|
29
31
|
|
|
30
32
|
__all__ = [
|
|
@@ -46,10 +48,12 @@ __all__ = [
|
|
|
46
48
|
"WanStateDicts",
|
|
47
49
|
"WanS2VStateDicts",
|
|
48
50
|
"QwenImageStateDicts",
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
+
"AttnImpl",
|
|
52
|
+
"SpargeAttentionParams",
|
|
53
|
+
"VideoSparseAttentionParams",
|
|
54
|
+
"LoraConfig",
|
|
51
55
|
"ControlType",
|
|
52
56
|
"ControlNetParams",
|
|
53
|
-
"
|
|
54
|
-
"
|
|
57
|
+
"QwenImageControlType",
|
|
58
|
+
"QwenImageControlNetParams",
|
|
55
59
|
]
|
|
@@ -5,7 +5,6 @@ from dataclasses import dataclass, field
|
|
|
5
5
|
from typing import List, Dict, Tuple, Optional
|
|
6
6
|
|
|
7
7
|
from diffsynth_engine.configs.controlnet import ControlType
|
|
8
|
-
from diffsynth_engine.models.basic.video_sparse_attention import get_vsa_kwargs
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
@dataclass
|
|
@@ -27,6 +26,8 @@ class AttnImpl(Enum):
|
|
|
27
26
|
FA2 = "fa2" # Flash Attention 2
|
|
28
27
|
FA3 = "fa3" # Flash Attention 3
|
|
29
28
|
FA3_FP8 = "fa3_fp8" # Flash Attention 3 with FP8
|
|
29
|
+
AITER = "aiter" # Aiter Flash Attention
|
|
30
|
+
AITER_FP8 = "aiter_fp8" # Aiter Flash Attention with FP8
|
|
30
31
|
XFORMERS = "xformers" # XFormers
|
|
31
32
|
SDPA = "sdpa" # Scaled Dot Product Attention
|
|
32
33
|
SAGE = "sage" # Sage Attention
|
|
@@ -52,23 +53,6 @@ class AttentionConfig:
|
|
|
52
53
|
dit_attn_impl: AttnImpl = AttnImpl.AUTO
|
|
53
54
|
attn_params: Optional[SpargeAttentionParams | VideoSparseAttentionParams] = None
|
|
54
55
|
|
|
55
|
-
def get_attn_kwargs(self, latents: torch.Tensor, device: str) -> Dict:
|
|
56
|
-
attn_kwargs = {"attn_impl": self.dit_attn_impl.value}
|
|
57
|
-
if isinstance(self.attn_params, SpargeAttentionParams):
|
|
58
|
-
assert self.dit_attn_impl == AttnImpl.SPARGE
|
|
59
|
-
attn_kwargs.update(
|
|
60
|
-
{
|
|
61
|
-
"smooth_k": self.attn_params.smooth_k,
|
|
62
|
-
"simthreshd1": self.attn_params.simthreshd1,
|
|
63
|
-
"cdfthreshd": self.attn_params.cdfthreshd,
|
|
64
|
-
"pvthreshd": self.attn_params.pvthreshd,
|
|
65
|
-
}
|
|
66
|
-
)
|
|
67
|
-
elif isinstance(self.attn_params, VideoSparseAttentionParams):
|
|
68
|
-
assert self.dit_attn_impl == AttnImpl.VSA
|
|
69
|
-
attn_kwargs.update(get_vsa_kwargs(latents.shape[2:], (1, 2, 2), self.attn_params.sparsity, device=device))
|
|
70
|
-
return attn_kwargs
|
|
71
|
-
|
|
72
56
|
|
|
73
57
|
@dataclass
|
|
74
58
|
class OptimizationConfig:
|
|
@@ -13,6 +13,7 @@ from diffsynth_engine.utils.flag import (
|
|
|
13
13
|
SAGE_ATTN_AVAILABLE,
|
|
14
14
|
SPARGE_ATTN_AVAILABLE,
|
|
15
15
|
VIDEO_SPARSE_ATTN_AVAILABLE,
|
|
16
|
+
AITER_AVAILABLE,
|
|
16
17
|
)
|
|
17
18
|
from diffsynth_engine.utils.platform import DTYPE_FP8
|
|
18
19
|
|
|
@@ -93,6 +94,9 @@ if SPARGE_ATTN_AVAILABLE:
|
|
|
93
94
|
)
|
|
94
95
|
return out.transpose(1, 2)
|
|
95
96
|
|
|
97
|
+
if AITER_AVAILABLE:
|
|
98
|
+
from aiter import flash_attn_func as aiter_flash_attn
|
|
99
|
+
from aiter import flash_attn_fp8_pertensor_func as aiter_flash_attn_fp8
|
|
96
100
|
|
|
97
101
|
if VIDEO_SPARSE_ATTN_AVAILABLE:
|
|
98
102
|
from diffsynth_engine.models.basic.video_sparse_attention import (
|
|
@@ -137,6 +141,8 @@ def attention(
|
|
|
137
141
|
"fa2",
|
|
138
142
|
"fa3",
|
|
139
143
|
"fa3_fp8",
|
|
144
|
+
"aiter",
|
|
145
|
+
"aiter_fp8",
|
|
140
146
|
"xformers",
|
|
141
147
|
"sdpa",
|
|
142
148
|
"sage",
|
|
@@ -157,6 +163,13 @@ def attention(
|
|
|
157
163
|
logger.debug(
|
|
158
164
|
"flash_attn_3 does not support attention mask, will use fallback attention implementation"
|
|
159
165
|
)
|
|
166
|
+
if AITER_AVAILABLE:
|
|
167
|
+
if flash_attn3_compatible:
|
|
168
|
+
return aiter_flash_attn(q, k, v, softmax_scale=scale)
|
|
169
|
+
else:
|
|
170
|
+
logger.warning(
|
|
171
|
+
f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
|
|
172
|
+
)
|
|
160
173
|
if XFORMERS_AVAILABLE:
|
|
161
174
|
return xformers_attn(q, k, v, attn_mask=attn_mask, scale=scale)
|
|
162
175
|
if SDPA_AVAILABLE:
|
|
@@ -183,6 +196,22 @@ def attention(
|
|
|
183
196
|
v = v.to(dtype=DTYPE_FP8)
|
|
184
197
|
out = flash_attn3(q, k, v, softmax_scale=scale)
|
|
185
198
|
return out.to(dtype=origin_dtype)
|
|
199
|
+
if attn_impl == "aiter" or attn_impl == "aiter_fp8":
|
|
200
|
+
if not flash_attn3_compatible:
|
|
201
|
+
raise RuntimeError(
|
|
202
|
+
f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}"
|
|
203
|
+
)
|
|
204
|
+
if attn_mask is not None:
|
|
205
|
+
raise RuntimeError("aiter_flash_attn does not support attention mask")
|
|
206
|
+
if attn_impl == "aiter" :
|
|
207
|
+
return aiter_flash_attn(q, k, v, softmax_scale=scale)
|
|
208
|
+
else:
|
|
209
|
+
origin_dtype = q.dtype
|
|
210
|
+
q = q.to(dtype=DTYPE_FP8)
|
|
211
|
+
k = k.to(dtype=DTYPE_FP8)
|
|
212
|
+
v = v.to(dtype=DTYPE_FP8)
|
|
213
|
+
out = aiter_flash_attn_fp8(q, k, v, softmax_scale=scale)
|
|
214
|
+
return out.to(dtype=origin_dtype)
|
|
186
215
|
if attn_impl == "fa2":
|
|
187
216
|
return flash_attn2(q, k, v, softmax_scale=scale)
|
|
188
217
|
if attn_impl == "xformers":
|
|
@@ -288,6 +317,8 @@ def long_context_attention(
|
|
|
288
317
|
"fa2",
|
|
289
318
|
"fa3",
|
|
290
319
|
"fa3_fp8",
|
|
320
|
+
"aiter",
|
|
321
|
+
"aiter_fp8",
|
|
291
322
|
"sdpa",
|
|
292
323
|
"sage",
|
|
293
324
|
"sparge",
|
|
@@ -303,6 +334,13 @@ def long_context_attention(
|
|
|
303
334
|
logger.warning(
|
|
304
335
|
f"head_dim={q.shape[-1]}, but flash_attn_3 only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
|
|
305
336
|
)
|
|
337
|
+
if AITER_AVAILABLE:
|
|
338
|
+
if flash_attn3_compatible:
|
|
339
|
+
return LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
|
|
340
|
+
else:
|
|
341
|
+
logger.warning(
|
|
342
|
+
f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}, will use fallback attention implementation"
|
|
343
|
+
)
|
|
306
344
|
if SDPA_AVAILABLE:
|
|
307
345
|
return LongContextAttention(attn_type=AttnType.TORCH)(q, k, v, softmax_scale=scale)
|
|
308
346
|
if FLASH_ATTN_2_AVAILABLE:
|
|
@@ -323,6 +361,20 @@ def long_context_attention(
|
|
|
323
361
|
v = v.to(dtype=DTYPE_FP8)
|
|
324
362
|
out = LongContextAttention(attn_type=AttnType.FA3)(q, k, v, softmax_scale=scale)
|
|
325
363
|
return out.to(dtype=origin_dtype)
|
|
364
|
+
if attn_impl == "aiter" or attn_impl == "aiter_fp8":
|
|
365
|
+
if not flash_attn3_compatible:
|
|
366
|
+
raise RuntimeError(
|
|
367
|
+
f"head_dim={q.shape[-1]}, but aiter_flash_attn only supports head dimension at most {FA3_MAX_HEADDIM}"
|
|
368
|
+
)
|
|
369
|
+
if attn_impl == "aiter":
|
|
370
|
+
return LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
|
|
371
|
+
|
|
372
|
+
origin_dtype = q.dtype
|
|
373
|
+
q = q.to(dtype=DTYPE_FP8)
|
|
374
|
+
k = k.to(dtype=DTYPE_FP8)
|
|
375
|
+
v = v.to(dtype=DTYPE_FP8)
|
|
376
|
+
out = LongContextAttention(attn_type=AttnType.AITER)(q, k, v, softmax_scale=scale)
|
|
377
|
+
return out.to(dtype=origin_dtype)
|
|
326
378
|
if attn_impl == "fa2":
|
|
327
379
|
return LongContextAttention(attn_type=AttnType.FA)(q, k, v, softmax_scale=scale)
|
|
328
380
|
if attn_impl == "sdpa":
|
|
@@ -2,9 +2,12 @@ import torch
|
|
|
2
2
|
import math
|
|
3
3
|
import functools
|
|
4
4
|
|
|
5
|
-
from
|
|
5
|
+
from diffsynth_engine.utils.flag import VIDEO_SPARSE_ATTN_AVAILABLE
|
|
6
6
|
from diffsynth_engine.utils.parallel import get_sp_ulysses_group, get_sp_ring_world_size
|
|
7
7
|
|
|
8
|
+
if VIDEO_SPARSE_ATTN_AVAILABLE:
|
|
9
|
+
from vsa import video_sparse_attn as vsa_core
|
|
10
|
+
|
|
8
11
|
VSA_TILE_SIZE = (4, 4, 4)
|
|
9
12
|
|
|
10
13
|
|
|
@@ -5,7 +5,15 @@ from einops import rearrange
|
|
|
5
5
|
from typing import Dict, List, Tuple, Union, Optional
|
|
6
6
|
from PIL import Image
|
|
7
7
|
|
|
8
|
-
from diffsynth_engine.configs import
|
|
8
|
+
from diffsynth_engine.configs import (
|
|
9
|
+
BaseConfig,
|
|
10
|
+
BaseStateDicts,
|
|
11
|
+
LoraConfig,
|
|
12
|
+
AttnImpl,
|
|
13
|
+
SpargeAttentionParams,
|
|
14
|
+
VideoSparseAttentionParams,
|
|
15
|
+
)
|
|
16
|
+
from diffsynth_engine.models.basic.video_sparse_attention import get_vsa_kwargs
|
|
9
17
|
from diffsynth_engine.utils.offload import enable_sequential_cpu_offload, offload_model_to_dict, restore_model_from_dict
|
|
10
18
|
from diffsynth_engine.utils.fp8_linear import enable_fp8_autocast
|
|
11
19
|
from diffsynth_engine.utils.gguf import load_gguf_checkpoint
|
|
@@ -33,6 +41,7 @@ class BasePipeline:
|
|
|
33
41
|
dtype=torch.float16,
|
|
34
42
|
):
|
|
35
43
|
super().__init__()
|
|
44
|
+
self.config = None
|
|
36
45
|
self.vae_tiled = vae_tiled
|
|
37
46
|
self.vae_tile_size = vae_tile_size
|
|
38
47
|
self.vae_tile_stride = vae_tile_stride
|
|
@@ -48,7 +57,7 @@ class BasePipeline:
|
|
|
48
57
|
raise NotImplementedError()
|
|
49
58
|
|
|
50
59
|
@classmethod
|
|
51
|
-
def from_state_dict(cls, state_dicts: BaseStateDicts,
|
|
60
|
+
def from_state_dict(cls, state_dicts: BaseStateDicts, config: BaseConfig) -> "BasePipeline":
|
|
52
61
|
raise NotImplementedError()
|
|
53
62
|
|
|
54
63
|
def update_weights(self, state_dicts: BaseStateDicts) -> None:
|
|
@@ -260,6 +269,25 @@ class BasePipeline:
|
|
|
260
269
|
)
|
|
261
270
|
return init_latents, latents, sigmas, timesteps
|
|
262
271
|
|
|
272
|
+
def get_attn_kwargs(self, latents: torch.Tensor) -> Dict:
|
|
273
|
+
attn_kwargs = {"attn_impl": self.config.dit_attn_impl.value}
|
|
274
|
+
if isinstance(self.config.attn_params, SpargeAttentionParams):
|
|
275
|
+
assert self.config.dit_attn_impl == AttnImpl.SPARGE
|
|
276
|
+
attn_kwargs.update(
|
|
277
|
+
{
|
|
278
|
+
"smooth_k": self.config.attn_params.smooth_k,
|
|
279
|
+
"simthreshd1": self.config.attn_params.simthreshd1,
|
|
280
|
+
"cdfthreshd": self.config.attn_params.cdfthreshd,
|
|
281
|
+
"pvthreshd": self.config.attn_params.pvthreshd,
|
|
282
|
+
}
|
|
283
|
+
)
|
|
284
|
+
elif isinstance(self.config.attn_params, VideoSparseAttentionParams):
|
|
285
|
+
assert self.config.dit_attn_impl == AttnImpl.VSA
|
|
286
|
+
attn_kwargs.update(
|
|
287
|
+
get_vsa_kwargs(latents.shape[2:], (1, 2, 2), self.config.attn_params.sparsity, device=self.device)
|
|
288
|
+
)
|
|
289
|
+
return attn_kwargs
|
|
290
|
+
|
|
263
291
|
def eval(self):
|
|
264
292
|
for model_name in self.model_names:
|
|
265
293
|
model = getattr(self, model_name)
|
|
@@ -751,7 +751,7 @@ class FluxImagePipeline(BasePipeline):
|
|
|
751
751
|
latents = latents.to(self.dtype)
|
|
752
752
|
self.load_models_to_device(["dit"])
|
|
753
753
|
|
|
754
|
-
attn_kwargs = self.
|
|
754
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
755
755
|
noise_pred = self.dit(
|
|
756
756
|
hidden_states=latents,
|
|
757
757
|
timestep=timestep,
|
|
@@ -886,7 +886,7 @@ class FluxImagePipeline(BasePipeline):
|
|
|
886
886
|
empty_cache()
|
|
887
887
|
param.model.to(self.device)
|
|
888
888
|
|
|
889
|
-
attn_kwargs = self.
|
|
889
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
890
890
|
double_block_output, single_block_output = param.model(
|
|
891
891
|
hidden_states=latents,
|
|
892
892
|
control_condition=control_condition,
|
|
@@ -208,7 +208,9 @@ class QwenImagePipeline(BasePipeline):
|
|
|
208
208
|
)
|
|
209
209
|
if config.load_encoder:
|
|
210
210
|
logger.info(f"loading state dict from {config.encoder_path} ...")
|
|
211
|
-
encoder_state_dict = cls.load_model_checkpoint(
|
|
211
|
+
encoder_state_dict = cls.load_model_checkpoint(
|
|
212
|
+
config.encoder_path, device="cpu", dtype=config.encoder_dtype
|
|
213
|
+
)
|
|
212
214
|
|
|
213
215
|
state_dicts = QwenImageStateDicts(
|
|
214
216
|
model=model_state_dict,
|
|
@@ -547,7 +549,7 @@ class QwenImagePipeline(BasePipeline):
|
|
|
547
549
|
entity_masks: Optional[List[torch.Tensor]] = None,
|
|
548
550
|
):
|
|
549
551
|
self.load_models_to_device(["dit"])
|
|
550
|
-
attn_kwargs = self.
|
|
552
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
551
553
|
noise_pred = self.dit(
|
|
552
554
|
image=latents,
|
|
553
555
|
edit=image_latents,
|
|
@@ -394,7 +394,7 @@ class WanSpeech2VideoPipeline(WanVideoPipeline):
|
|
|
394
394
|
void_audio_input: torch.Tensor | None = None,
|
|
395
395
|
):
|
|
396
396
|
latents = latents.to(dtype=self.config.model_dtype, device=self.device)
|
|
397
|
-
attn_kwargs = self.
|
|
397
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
398
398
|
|
|
399
399
|
noise_pred = model(
|
|
400
400
|
x=latents,
|
|
@@ -144,7 +144,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
144
144
|
lora_list: List[Tuple[str, float]],
|
|
145
145
|
fused: bool = True,
|
|
146
146
|
save_original_weight: bool = False,
|
|
147
|
-
lora_converter: Optional[WanLoRAConverter] = None
|
|
147
|
+
lora_converter: Optional[WanLoRAConverter] = None,
|
|
148
148
|
):
|
|
149
149
|
assert self.config.tp_degree is None or self.config.tp_degree == 1, (
|
|
150
150
|
"load LoRA is not allowed when tensor parallel is enabled; "
|
|
@@ -156,11 +156,15 @@ class WanVideoPipeline(BasePipeline):
|
|
|
156
156
|
)
|
|
157
157
|
super().load_loras(lora_list, fused, save_original_weight, lora_converter)
|
|
158
158
|
|
|
159
|
-
def load_loras_low_noise(
|
|
159
|
+
def load_loras_low_noise(
|
|
160
|
+
self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False
|
|
161
|
+
):
|
|
160
162
|
assert self.dit2 is not None, "low noise LoRA can only be applied to Wan2.2"
|
|
161
163
|
self.load_loras(lora_list, fused, save_original_weight, self.low_noise_lora_converter)
|
|
162
164
|
|
|
163
|
-
def load_loras_high_noise(
|
|
165
|
+
def load_loras_high_noise(
|
|
166
|
+
self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False
|
|
167
|
+
):
|
|
164
168
|
assert self.dit2 is not None, "high noise LoRA can only be applied to Wan2.2"
|
|
165
169
|
self.load_loras(lora_list, fused, save_original_weight)
|
|
166
170
|
|
|
@@ -323,7 +327,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
323
327
|
|
|
324
328
|
def predict_noise(self, model, latents, image_clip_feature, image_y, timestep, context):
|
|
325
329
|
latents = latents.to(dtype=self.config.model_dtype, device=self.device)
|
|
326
|
-
attn_kwargs = self.
|
|
330
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
327
331
|
|
|
328
332
|
noise_pred = model(
|
|
329
333
|
x=latents,
|
diffsynth_engine/utils/flag.py
CHANGED
|
@@ -31,6 +31,11 @@ if SDPA_AVAILABLE:
|
|
|
31
31
|
else:
|
|
32
32
|
logger.info("Torch SDPA is not available")
|
|
33
33
|
|
|
34
|
+
AITER_AVAILABLE = importlib.util.find_spec("aiter") is not None
|
|
35
|
+
if AITER_AVAILABLE:
|
|
36
|
+
logger.info("Aiter is available")
|
|
37
|
+
else:
|
|
38
|
+
logger.info("Aiter is not available")
|
|
34
39
|
|
|
35
40
|
# 有损
|
|
36
41
|
SAGE_ATTN_AVAILABLE = importlib.util.find_spec("sageattention") is not None
|
|
@@ -19,8 +19,6 @@ from typing import Dict, List, Set, Type, Union, Optional
|
|
|
19
19
|
from queue import Empty
|
|
20
20
|
|
|
21
21
|
import diffsynth_engine.models.basic.attention as attention_ops
|
|
22
|
-
from diffsynth_engine.models import PreTrainedModel
|
|
23
|
-
from diffsynth_engine.pipelines import BasePipeline
|
|
24
22
|
from diffsynth_engine.utils.platform import empty_cache
|
|
25
23
|
from diffsynth_engine.utils import logging
|
|
26
24
|
|
|
@@ -300,14 +298,15 @@ def _worker_loop(
|
|
|
300
298
|
world_size=world_size,
|
|
301
299
|
)
|
|
302
300
|
|
|
303
|
-
def wrap_for_parallel(module
|
|
304
|
-
if
|
|
305
|
-
for model_name in module
|
|
306
|
-
|
|
301
|
+
def wrap_for_parallel(module):
|
|
302
|
+
if hasattr(module, "model_names"):
|
|
303
|
+
for model_name in getattr(module, "model_names"):
|
|
304
|
+
submodule = getattr(module, model_name)
|
|
305
|
+
if getattr(submodule, "_supports_parallelization", False):
|
|
307
306
|
setattr(module, model_name, wrap_for_parallel(submodule))
|
|
308
307
|
return module
|
|
309
308
|
|
|
310
|
-
if not module
|
|
309
|
+
if not getattr(module, "_supports_parallelization", False):
|
|
311
310
|
return module
|
|
312
311
|
|
|
313
312
|
if tp_degree > 1:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
diffsynth_engine/__init__.py,sha256=
|
|
1
|
+
diffsynth_engine/__init__.py,sha256=deLiGEHeQV1Xq7Kd11oRUA28FDegUgXBjlkNwgtVBMw,2290
|
|
2
2
|
diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
|
|
4
4
|
diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=3ve4bYxGyfuERynvoNYdFYSk0agdBgXKCeIOS6O6wgI,819
|
|
@@ -79,20 +79,20 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json,sha256=e4q
|
|
|
79
79
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model,sha256=45CaZ7eAZQs1z1Kax4KtK2sm5tH4SdP7tqhykF9FJFg,4548313
|
|
80
80
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoUtOslX0-pHJwfIGiyCi3iRylnyj0iYCs,16837417
|
|
81
81
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
|
|
82
|
-
diffsynth_engine/configs/__init__.py,sha256=
|
|
82
|
+
diffsynth_engine/configs/__init__.py,sha256=vSjJToEdq3JX7t81_z4nwNwIdD4bYnFjxnMZH7PXMKo,1309
|
|
83
83
|
diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
|
|
84
|
-
diffsynth_engine/configs/pipeline.py,sha256=
|
|
84
|
+
diffsynth_engine/configs/pipeline.py,sha256=ADgWJa7bA3Z3Z1JtVLgmt4N3eS1KRp9yHu1QvTBzTm0,13404
|
|
85
85
|
diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
86
86
|
diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
|
|
87
87
|
diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
|
|
88
88
|
diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
-
diffsynth_engine/models/basic/attention.py,sha256=
|
|
89
|
+
diffsynth_engine/models/basic/attention.py,sha256=mvgk8LTqFwgtPdBeRv797IZNg9k7--X9wD92Hcr188c,15682
|
|
90
90
|
diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
|
|
91
91
|
diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
|
|
92
92
|
diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
|
|
93
93
|
diffsynth_engine/models/basic/transformer_helper.py,sha256=6K7A5bVnN2bOoq6I0IQf7RJBhSZUP4jNf1n7NPGu8zA,5287
|
|
94
94
|
diffsynth_engine/models/basic/unet_helper.py,sha256=4lN6F80Ubm6ip4dkLVmB-Og5-Y25Wduhs9Q8qjyzK6E,9044
|
|
95
|
-
diffsynth_engine/models/basic/video_sparse_attention.py,sha256=
|
|
95
|
+
diffsynth_engine/models/basic/video_sparse_attention.py,sha256=iXA3sHDLWk1ns1lVCNbZdiaDu94kBIsw-9vrCGAll7g,7843
|
|
96
96
|
diffsynth_engine/models/flux/__init__.py,sha256=x0JoxL0CdiiVrY0BjkIrGinud7mcXecLleGO0km91XQ,686
|
|
97
97
|
diffsynth_engine/models/flux/flux_controlnet.py,sha256=NvFKQIx0NldX5uUxdmYwuS2s-xaFRlKotiE6lr3-HRY,8018
|
|
98
98
|
diffsynth_engine/models/flux/flux_dit.py,sha256=7sdV8KFQiHcK-8aqyvXBgC7E_-D9rcgBcnMXUq_AybI,23403
|
|
@@ -141,15 +141,15 @@ diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=j63ulcWLY4XGITOKUMGX292LtSEtP-
|
|
|
141
141
|
diffsynth_engine/models/wan/wan_text_encoder.py,sha256=OERlmwOqthAFPNnnT2sXJ4OjyyRmsRLx7VGp1zlBkLU,11021
|
|
142
142
|
diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
|
|
143
143
|
diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
|
|
144
|
-
diffsynth_engine/pipelines/base.py,sha256=
|
|
145
|
-
diffsynth_engine/pipelines/flux_image.py,sha256=
|
|
144
|
+
diffsynth_engine/pipelines/base.py,sha256=Yvb2xiHT1Jhx4HDkNPHdXjzhUkM9_65D4zM-GSSOWoU,16133
|
|
145
|
+
diffsynth_engine/pipelines/flux_image.py,sha256=L0ggxpthLD8a5-zdPHu9z668uWBei9YzPb4PFVypDNU,50707
|
|
146
146
|
diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
|
|
147
|
-
diffsynth_engine/pipelines/qwen_image.py,sha256=
|
|
147
|
+
diffsynth_engine/pipelines/qwen_image.py,sha256=n6Nnin8OyC9Mfp8O-3N4GNq12Mws8_hHWv-SwU4-HCc,33054
|
|
148
148
|
diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
|
|
149
149
|
diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
|
|
150
150
|
diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
|
|
151
|
-
diffsynth_engine/pipelines/wan_s2v.py,sha256=
|
|
152
|
-
diffsynth_engine/pipelines/wan_video.py,sha256=
|
|
151
|
+
diffsynth_engine/pipelines/wan_s2v.py,sha256=AUVLhLP5F0gnOV7nqWQUSZbye5ov-m44151B3zWBrAk,29323
|
|
152
|
+
diffsynth_engine/pipelines/wan_video.py,sha256=Hs1iVacfrwi_0X4VNgflVUlJP5vHp0x7CF6wegidP2c,29108
|
|
153
153
|
diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
154
154
|
diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
|
|
155
155
|
diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
|
|
@@ -171,7 +171,7 @@ diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6g
|
|
|
171
171
|
diffsynth_engine/utils/constants.py,sha256=sJio3Vy8i0-PWYRnqquYt6ez9k6Tc9JdjCv6pn2BU_4,3551
|
|
172
172
|
diffsynth_engine/utils/download.py,sha256=w9QQjllPfTUEY371UTREU7o_vvdMY-Q2DymDel3ZEZY,6792
|
|
173
173
|
diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I,363
|
|
174
|
-
diffsynth_engine/utils/flag.py,sha256=
|
|
174
|
+
diffsynth_engine/utils/flag.py,sha256=v9GcRFYiNMonD9qmDLWdbXONuF-AcQ_KABPFtRZd0Tc,1767
|
|
175
175
|
diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
|
|
176
176
|
diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
|
|
177
177
|
diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
|
|
@@ -180,15 +180,15 @@ diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPU
|
|
|
180
180
|
diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
|
|
181
181
|
diffsynth_engine/utils/offload.py,sha256=94og79TIkxldwYUgZT3L4OVu1WBlE7gfVPvO2MRhm6c,3551
|
|
182
182
|
diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjWo,1149
|
|
183
|
-
diffsynth_engine/utils/parallel.py,sha256=
|
|
183
|
+
diffsynth_engine/utils/parallel.py,sha256=6T8oCTp-7Gb3qsgNRB2Bp3DF4eyx1FzvS6pFnEJbsek,19789
|
|
184
184
|
diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
|
|
185
185
|
diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
|
|
186
186
|
diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CDhg,2200
|
|
187
187
|
diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
188
188
|
diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
|
|
189
189
|
diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
|
|
190
|
-
diffsynth_engine-0.6.1.
|
|
191
|
-
diffsynth_engine-0.6.1.
|
|
192
|
-
diffsynth_engine-0.6.1.
|
|
193
|
-
diffsynth_engine-0.6.1.
|
|
194
|
-
diffsynth_engine-0.6.1.
|
|
190
|
+
diffsynth_engine-0.6.1.dev26.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
|
|
191
|
+
diffsynth_engine-0.6.1.dev26.dist-info/METADATA,sha256=z6sjXpooZoFJJGqqdE_DFtsi2f3aqhjLBbyXPX0RdgE,1164
|
|
192
|
+
diffsynth_engine-0.6.1.dev26.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
193
|
+
diffsynth_engine-0.6.1.dev26.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
|
|
194
|
+
diffsynth_engine-0.6.1.dev26.dist-info/RECORD,,
|
|
File without changes
|
{diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{diffsynth_engine-0.6.1.dev24.dist-info → diffsynth_engine-0.6.1.dev26.dist-info}/top_level.txt
RENAMED
|
File without changes
|