diffsynth-engine 0.6.1.dev14__py3-none-any.whl → 0.6.1.dev25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffsynth_engine/__init__.py +6 -2
- diffsynth_engine/conf/models/wan/dit/wan_dit_keymap.json +41 -0
- diffsynth_engine/configs/__init__.py +10 -6
- diffsynth_engine/configs/pipeline.py +17 -10
- diffsynth_engine/models/base.py +1 -1
- diffsynth_engine/models/basic/attention.py +59 -20
- diffsynth_engine/models/basic/transformer_helper.py +36 -2
- diffsynth_engine/models/basic/video_sparse_attention.py +238 -0
- diffsynth_engine/models/flux/flux_controlnet.py +7 -19
- diffsynth_engine/models/flux/flux_dit.py +27 -38
- diffsynth_engine/models/flux/flux_dit_fbcache.py +9 -7
- diffsynth_engine/models/flux/flux_ipadapter.py +5 -5
- diffsynth_engine/models/qwen_image/qwen2_5_vl.py +5 -0
- diffsynth_engine/models/qwen_image/qwen_image_dit.py +28 -34
- diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py +14 -6
- diffsynth_engine/models/wan/wan_audio_encoder.py +0 -1
- diffsynth_engine/models/wan/wan_dit.py +64 -27
- diffsynth_engine/pipelines/base.py +36 -4
- diffsynth_engine/pipelines/flux_image.py +19 -17
- diffsynth_engine/pipelines/qwen_image.py +45 -36
- diffsynth_engine/pipelines/sdxl_image.py +1 -1
- diffsynth_engine/pipelines/utils.py +52 -0
- diffsynth_engine/pipelines/wan_s2v.py +4 -9
- diffsynth_engine/pipelines/wan_video.py +43 -19
- diffsynth_engine/tokenizers/base.py +6 -0
- diffsynth_engine/tokenizers/qwen2.py +12 -4
- diffsynth_engine/utils/constants.py +13 -12
- diffsynth_engine/utils/flag.py +6 -0
- diffsynth_engine/utils/parallel.py +62 -29
- {diffsynth_engine-0.6.1.dev14.dist-info → diffsynth_engine-0.6.1.dev25.dist-info}/METADATA +1 -1
- {diffsynth_engine-0.6.1.dev14.dist-info → diffsynth_engine-0.6.1.dev25.dist-info}/RECORD +45 -43
- /diffsynth_engine/conf/models/wan/dit/{wan2.1-flf2v-14b.json → wan2.1_flf2v_14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.1-i2v-14b.json → wan2.1_i2v_14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.1-t2v-1.3b.json → wan2.1_t2v_1.3b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.1-t2v-14b.json → wan2.1_t2v_14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.2-i2v-a14b.json → wan2.2_i2v_a14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.2-s2v-14b.json → wan2.2_s2v_14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.2-t2v-a14b.json → wan2.2_t2v_a14b.json} +0 -0
- /diffsynth_engine/conf/models/wan/dit/{wan2.2-ti2v-5b.json → wan2.2_ti2v_5b.json} +0 -0
- /diffsynth_engine/conf/models/wan/vae/{wan2.1-vae.json → wan2.1_vae.json} +0 -0
- /diffsynth_engine/conf/models/wan/vae/{wan2.2-vae.json → wan2.2_vae.json} +0 -0
- /diffsynth_engine/conf/models/wan/vae/{wan-vae-keymap.json → wan_vae_keymap.json} +0 -0
- {diffsynth_engine-0.6.1.dev14.dist-info → diffsynth_engine-0.6.1.dev25.dist-info}/WHEEL +0 -0
- {diffsynth_engine-0.6.1.dev14.dist-info → diffsynth_engine-0.6.1.dev25.dist-info}/licenses/LICENSE +0 -0
- {diffsynth_engine-0.6.1.dev14.dist-info → diffsynth_engine-0.6.1.dev25.dist-info}/top_level.txt +0 -0
|
@@ -394,6 +394,7 @@ class WanSpeech2VideoPipeline(WanVideoPipeline):
|
|
|
394
394
|
void_audio_input: torch.Tensor | None = None,
|
|
395
395
|
):
|
|
396
396
|
latents = latents.to(dtype=self.config.model_dtype, device=self.device)
|
|
397
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
397
398
|
|
|
398
399
|
noise_pred = model(
|
|
399
400
|
x=latents,
|
|
@@ -408,6 +409,7 @@ class WanSpeech2VideoPipeline(WanVideoPipeline):
|
|
|
408
409
|
drop_motion_frames=drop_motion_frames,
|
|
409
410
|
audio_mask=audio_mask,
|
|
410
411
|
void_audio_input=void_audio_input,
|
|
412
|
+
attn_kwargs=attn_kwargs,
|
|
411
413
|
)
|
|
412
414
|
return noise_pred
|
|
413
415
|
|
|
@@ -654,19 +656,12 @@ class WanSpeech2VideoPipeline(WanVideoPipeline):
|
|
|
654
656
|
)
|
|
655
657
|
|
|
656
658
|
with LoRAContext():
|
|
657
|
-
attn_kwargs = {
|
|
658
|
-
"attn_impl": config.dit_attn_impl.value,
|
|
659
|
-
"sparge_smooth_k": config.sparge_smooth_k,
|
|
660
|
-
"sparge_cdfthreshd": config.sparge_cdfthreshd,
|
|
661
|
-
"sparge_simthreshd1": config.sparge_simthreshd1,
|
|
662
|
-
"sparge_pvthreshd": config.sparge_pvthreshd,
|
|
663
|
-
}
|
|
664
659
|
dit = WanS2VDiT.from_state_dict(
|
|
665
660
|
state_dicts.model,
|
|
666
661
|
config=model_config,
|
|
667
|
-
device=init_device,
|
|
662
|
+
device=("cpu" if config.use_fsdp else init_device),
|
|
668
663
|
dtype=config.model_dtype,
|
|
669
|
-
|
|
664
|
+
use_vsa=(config.dit_attn_impl.value == "vsa"),
|
|
670
665
|
)
|
|
671
666
|
if config.use_fp8_linear:
|
|
672
667
|
enable_fp8_linear(dit)
|
|
@@ -95,8 +95,14 @@ class WanLoRAConverter(LoRAStateDictConverter):
|
|
|
95
95
|
return state_dict
|
|
96
96
|
|
|
97
97
|
|
|
98
|
+
class WanLowNoiseLoRAConverter(WanLoRAConverter):
|
|
99
|
+
def convert(self, state_dict):
|
|
100
|
+
return {"dit2": super().convert(state_dict)["dit"]}
|
|
101
|
+
|
|
102
|
+
|
|
98
103
|
class WanVideoPipeline(BasePipeline):
|
|
99
104
|
lora_converter = WanLoRAConverter()
|
|
105
|
+
low_noise_lora_converter = WanLowNoiseLoRAConverter()
|
|
100
106
|
|
|
101
107
|
def __init__(
|
|
102
108
|
self,
|
|
@@ -133,7 +139,13 @@ class WanVideoPipeline(BasePipeline):
|
|
|
133
139
|
self.image_encoder = image_encoder
|
|
134
140
|
self.model_names = ["text_encoder", "dit", "dit2", "vae", "image_encoder"]
|
|
135
141
|
|
|
136
|
-
def load_loras(
|
|
142
|
+
def load_loras(
|
|
143
|
+
self,
|
|
144
|
+
lora_list: List[Tuple[str, float]],
|
|
145
|
+
fused: bool = True,
|
|
146
|
+
save_original_weight: bool = False,
|
|
147
|
+
lora_converter: Optional[WanLoRAConverter] = None,
|
|
148
|
+
):
|
|
137
149
|
assert self.config.tp_degree is None or self.config.tp_degree == 1, (
|
|
138
150
|
"load LoRA is not allowed when tensor parallel is enabled; "
|
|
139
151
|
"set tp_degree=None or tp_degree=1 during pipeline initialization"
|
|
@@ -142,10 +154,24 @@ class WanVideoPipeline(BasePipeline):
|
|
|
142
154
|
"load fused LoRA is not allowed when fully sharded data parallel is enabled; "
|
|
143
155
|
"either load LoRA with fused=False or set use_fsdp=False during pipeline initialization"
|
|
144
156
|
)
|
|
145
|
-
super().load_loras(lora_list, fused, save_original_weight)
|
|
157
|
+
super().load_loras(lora_list, fused, save_original_weight, lora_converter)
|
|
158
|
+
|
|
159
|
+
def load_loras_low_noise(
|
|
160
|
+
self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False
|
|
161
|
+
):
|
|
162
|
+
assert self.dit2 is not None, "low noise LoRA can only be applied to Wan2.2"
|
|
163
|
+
self.load_loras(lora_list, fused, save_original_weight, self.low_noise_lora_converter)
|
|
164
|
+
|
|
165
|
+
def load_loras_high_noise(
|
|
166
|
+
self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False
|
|
167
|
+
):
|
|
168
|
+
assert self.dit2 is not None, "high noise LoRA can only be applied to Wan2.2"
|
|
169
|
+
self.load_loras(lora_list, fused, save_original_weight)
|
|
146
170
|
|
|
147
171
|
def unload_loras(self):
|
|
148
172
|
self.dit.unload_loras()
|
|
173
|
+
if self.dit2 is not None:
|
|
174
|
+
self.dit2.unload_loras()
|
|
149
175
|
self.text_encoder.unload_loras()
|
|
150
176
|
|
|
151
177
|
def get_default_fps(self) -> int:
|
|
@@ -301,6 +327,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
301
327
|
|
|
302
328
|
def predict_noise(self, model, latents, image_clip_feature, image_y, timestep, context):
|
|
303
329
|
latents = latents.to(dtype=self.config.model_dtype, device=self.device)
|
|
330
|
+
attn_kwargs = self.get_attn_kwargs(latents)
|
|
304
331
|
|
|
305
332
|
noise_pred = model(
|
|
306
333
|
x=latents,
|
|
@@ -308,6 +335,7 @@ class WanVideoPipeline(BasePipeline):
|
|
|
308
335
|
context=context,
|
|
309
336
|
clip_feature=image_clip_feature,
|
|
310
337
|
y=image_y,
|
|
338
|
+
attn_kwargs=attn_kwargs,
|
|
311
339
|
)
|
|
312
340
|
return noise_pred
|
|
313
341
|
|
|
@@ -556,19 +584,12 @@ class WanVideoPipeline(BasePipeline):
|
|
|
556
584
|
dit_state_dict = state_dicts.model
|
|
557
585
|
|
|
558
586
|
with LoRAContext():
|
|
559
|
-
attn_kwargs = {
|
|
560
|
-
"attn_impl": config.dit_attn_impl.value,
|
|
561
|
-
"sparge_smooth_k": config.sparge_smooth_k,
|
|
562
|
-
"sparge_cdfthreshd": config.sparge_cdfthreshd,
|
|
563
|
-
"sparge_simthreshd1": config.sparge_simthreshd1,
|
|
564
|
-
"sparge_pvthreshd": config.sparge_pvthreshd,
|
|
565
|
-
}
|
|
566
587
|
dit = WanDiT.from_state_dict(
|
|
567
588
|
dit_state_dict,
|
|
568
589
|
config=dit_config,
|
|
569
|
-
device=init_device,
|
|
590
|
+
device=("cpu" if config.use_fsdp else init_device),
|
|
570
591
|
dtype=config.model_dtype,
|
|
571
|
-
|
|
592
|
+
use_vsa=(config.dit_attn_impl.value == "vsa"),
|
|
572
593
|
)
|
|
573
594
|
if config.use_fp8_linear:
|
|
574
595
|
enable_fp8_linear(dit)
|
|
@@ -578,9 +599,9 @@ class WanVideoPipeline(BasePipeline):
|
|
|
578
599
|
dit2 = WanDiT.from_state_dict(
|
|
579
600
|
dit2_state_dict,
|
|
580
601
|
config=dit_config,
|
|
581
|
-
device=init_device,
|
|
602
|
+
device=("cpu" if config.use_fsdp else init_device),
|
|
582
603
|
dtype=config.model_dtype,
|
|
583
|
-
|
|
604
|
+
use_vsa=(config.dit_attn_impl.value == "vsa"),
|
|
584
605
|
)
|
|
585
606
|
if config.use_fp8_linear:
|
|
586
607
|
enable_fp8_linear(dit2)
|
|
@@ -618,19 +639,22 @@ class WanVideoPipeline(BasePipeline):
|
|
|
618
639
|
@staticmethod
|
|
619
640
|
def _get_dit_type(model_state_dict: Dict[str, torch.Tensor] | Dict[str, Dict[str, torch.Tensor]]) -> str:
|
|
620
641
|
# determine wan dit type by model params
|
|
642
|
+
def has_any_key(*xs):
|
|
643
|
+
return any(x in model_state_dict for x in xs)
|
|
644
|
+
|
|
621
645
|
dit_type = None
|
|
622
|
-
if "high_noise_model"
|
|
646
|
+
if has_any_key("high_noise_model"):
|
|
623
647
|
if model_state_dict["high_noise_model"]["patch_embedding.weight"].shape[1] == 36:
|
|
624
648
|
dit_type = "wan2.2-i2v-a14b"
|
|
625
649
|
elif model_state_dict["high_noise_model"]["patch_embedding.weight"].shape[1] == 16:
|
|
626
650
|
dit_type = "wan2.2-t2v-a14b"
|
|
627
651
|
elif model_state_dict["patch_embedding.weight"].shape[1] == 48:
|
|
628
652
|
dit_type = "wan2.2-ti2v-5b"
|
|
629
|
-
elif "img_emb.emb_pos"
|
|
653
|
+
elif has_any_key("img_emb.emb_pos", "condition_embedder.image_embedder.pos_embed"):
|
|
630
654
|
dit_type = "wan2.1-flf2v-14b"
|
|
631
|
-
elif "img_emb.proj.0.weight"
|
|
655
|
+
elif has_any_key("img_emb.proj.0.weight", "condition_embedder.image_embedder.norm1"):
|
|
632
656
|
dit_type = "wan2.1-i2v-14b"
|
|
633
|
-
elif "blocks.39.self_attn.norm_q.weight"
|
|
657
|
+
elif has_any_key("blocks.39.self_attn.norm_q.weight", "blocks.39.attn1.norm_q.weight"):
|
|
634
658
|
dit_type = "wan2.1-t2v-14b"
|
|
635
659
|
else:
|
|
636
660
|
dit_type = "wan2.1-t2v-1.3b"
|
|
@@ -645,6 +669,6 @@ class WanVideoPipeline(BasePipeline):
|
|
|
645
669
|
return vae_type
|
|
646
670
|
|
|
647
671
|
def compile(self):
|
|
648
|
-
self.dit.compile_repeated_blocks(
|
|
672
|
+
self.dit.compile_repeated_blocks()
|
|
649
673
|
if self.dit2 is not None:
|
|
650
|
-
self.dit2.compile_repeated_blocks(
|
|
674
|
+
self.dit2.compile_repeated_blocks()
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
# Modified from transformers.tokenization_utils_base
|
|
2
2
|
from typing import Dict, List, Union, overload
|
|
3
|
+
from enum import Enum
|
|
3
4
|
|
|
4
5
|
|
|
5
6
|
TOKENIZER_CONFIG_FILE = "tokenizer_config.json"
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
class PaddingStrategy(str, Enum):
|
|
10
|
+
LONGEST = "longest"
|
|
11
|
+
MAX_LENGTH = "max_length"
|
|
12
|
+
|
|
13
|
+
|
|
8
14
|
class BaseTokenizer:
|
|
9
15
|
SPECIAL_TOKENS_ATTRIBUTES = [
|
|
10
16
|
"bos_token",
|
|
@@ -4,7 +4,7 @@ import torch
|
|
|
4
4
|
from typing import Dict, List, Union, Optional
|
|
5
5
|
from tokenizers import Tokenizer as TokenizerFast, AddedToken
|
|
6
6
|
|
|
7
|
-
from diffsynth_engine.tokenizers.base import BaseTokenizer, TOKENIZER_CONFIG_FILE
|
|
7
|
+
from diffsynth_engine.tokenizers.base import BaseTokenizer, PaddingStrategy, TOKENIZER_CONFIG_FILE
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
|
|
@@ -165,22 +165,28 @@ class Qwen2TokenizerFast(BaseTokenizer):
|
|
|
165
165
|
texts: Union[str, List[str]],
|
|
166
166
|
max_length: Optional[int] = None,
|
|
167
167
|
padding_side: Optional[str] = None,
|
|
168
|
+
padding_strategy: Union[PaddingStrategy, str] = "longest",
|
|
168
169
|
**kwargs,
|
|
169
170
|
) -> Dict[str, "torch.Tensor"]:
|
|
170
171
|
"""
|
|
171
172
|
Tokenize text and prepare for model inputs.
|
|
172
173
|
|
|
173
174
|
Args:
|
|
174
|
-
|
|
175
|
+
texts (`str`, `List[str]`):
|
|
175
176
|
The sequence or batch of sequences to be encoded.
|
|
176
177
|
|
|
177
178
|
max_length (`int`, *optional*):
|
|
178
|
-
|
|
179
|
+
Maximum length of the encoded sequences.
|
|
179
180
|
|
|
180
181
|
padding_side (`str`, *optional*):
|
|
181
182
|
The side on which the padding should be applied. Should be selected between `"right"` and `"left"`.
|
|
182
183
|
Defaults to `"right"`.
|
|
183
184
|
|
|
185
|
+
padding_strategy (`PaddingStrategy`, `str`, *optional*):
|
|
186
|
+
If `"longest"`, will pad the sequences to the longest sequence in the batch.
|
|
187
|
+
If `"max_length"`, will pad the sequences to the `max_length` argument.
|
|
188
|
+
Defaults to `"longest"`.
|
|
189
|
+
|
|
184
190
|
Returns:
|
|
185
191
|
`Dict[str, "torch.Tensor"]`: tensor dict compatible with model_input_names.
|
|
186
192
|
"""
|
|
@@ -190,7 +196,9 @@ class Qwen2TokenizerFast(BaseTokenizer):
|
|
|
190
196
|
|
|
191
197
|
batch_ids = self.batch_encode(texts)
|
|
192
198
|
ids_lens = [len(ids_) for ids_ in batch_ids]
|
|
193
|
-
max_length = max_length if max_length is not None else
|
|
199
|
+
max_length = max_length if max_length is not None else self.model_max_length
|
|
200
|
+
if padding_strategy == PaddingStrategy.LONGEST:
|
|
201
|
+
max_length = min(max(ids_lens), max_length)
|
|
194
202
|
padding_side = padding_side if padding_side is not None else self.padding_side
|
|
195
203
|
|
|
196
204
|
encoded = torch.zeros(len(texts), max_length, dtype=torch.long)
|
|
@@ -27,18 +27,19 @@ SD3_TEXT_ENCODER_CONFIG_FILE = os.path.join(CONF_PATH, "models", "sd3", "sd3_tex
|
|
|
27
27
|
SDXL_TEXT_ENCODER_CONFIG_FILE = os.path.join(CONF_PATH, "models", "sdxl", "sdxl_text_encoder.json")
|
|
28
28
|
SDXL_UNET_CONFIG_FILE = os.path.join(CONF_PATH, "models", "sdxl", "sdxl_unet.json")
|
|
29
29
|
|
|
30
|
-
WAN2_1_DIT_T2V_1_3B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
31
|
-
WAN2_1_DIT_T2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
32
|
-
WAN2_1_DIT_I2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
33
|
-
WAN2_1_DIT_FLF2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
34
|
-
WAN2_2_DIT_TI2V_5B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
35
|
-
WAN2_2_DIT_T2V_A14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
36
|
-
WAN2_2_DIT_I2V_A14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
37
|
-
WAN2_2_DIT_S2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
30
|
+
WAN2_1_DIT_T2V_1_3B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.1_t2v_1.3b.json")
|
|
31
|
+
WAN2_1_DIT_T2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.1_t2v_14b.json")
|
|
32
|
+
WAN2_1_DIT_I2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.1_i2v_14b.json")
|
|
33
|
+
WAN2_1_DIT_FLF2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.1_flf2v_14b.json")
|
|
34
|
+
WAN2_2_DIT_TI2V_5B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.2_ti2v_5b.json")
|
|
35
|
+
WAN2_2_DIT_T2V_A14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.2_t2v_a14b.json")
|
|
36
|
+
WAN2_2_DIT_I2V_A14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.2_i2v_a14b.json")
|
|
37
|
+
WAN2_2_DIT_S2V_14B_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan2.2_s2v_14b.json")
|
|
38
|
+
WAN_DIT_KEYMAP_FILE = os.path.join(CONF_PATH, "models", "wan", "dit", "wan_dit_keymap.json")
|
|
39
|
+
|
|
40
|
+
WAN2_1_VAE_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "vae", "wan2.1_vae.json")
|
|
41
|
+
WAN2_2_VAE_CONFIG_FILE = os.path.join(CONF_PATH, "models", "wan", "vae", "wan2.2_vae.json")
|
|
42
|
+
WAN_VAE_KEYMAP_FILE = os.path.join(CONF_PATH, "models", "wan", "vae", "wan_vae_keymap.json")
|
|
42
43
|
|
|
43
44
|
QWEN_IMAGE_CONFIG_FILE = os.path.join(CONF_PATH, "models", "qwen_image", "qwen2_5_vl_config.json")
|
|
44
45
|
QWEN_IMAGE_VISION_CONFIG_FILE = os.path.join(CONF_PATH, "models", "qwen_image", "qwen2_5_vl_vision_config.json")
|
diffsynth_engine/utils/flag.py
CHANGED
|
@@ -44,3 +44,9 @@ if SPARGE_ATTN_AVAILABLE:
|
|
|
44
44
|
logger.info("Sparge attention is available")
|
|
45
45
|
else:
|
|
46
46
|
logger.info("Sparge attention is not available")
|
|
47
|
+
|
|
48
|
+
VIDEO_SPARSE_ATTN_AVAILABLE = importlib.util.find_spec("vsa") is not None
|
|
49
|
+
if VIDEO_SPARSE_ATTN_AVAILABLE:
|
|
50
|
+
logger.info("Video sparse attention is available")
|
|
51
|
+
else:
|
|
52
|
+
logger.info("Video sparse attention is not available")
|
|
@@ -8,19 +8,17 @@ import torch.multiprocessing as mp
|
|
|
8
8
|
import torch.distributed as dist
|
|
9
9
|
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
|
|
10
10
|
from torch.distributed.fsdp import ShardingStrategy
|
|
11
|
-
from torch.distributed.fsdp.wrap import
|
|
11
|
+
from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
|
|
12
12
|
from torch.distributed.device_mesh import DeviceMesh
|
|
13
13
|
from torch.distributed.tensor.parallel.style import ParallelStyle
|
|
14
14
|
from torch.distributed.tensor.parallel._utils import _validate_tp_mesh_dim
|
|
15
15
|
from contextlib import contextmanager
|
|
16
16
|
from datetime import timedelta
|
|
17
17
|
from functools import partial
|
|
18
|
-
from typing import Dict, List, Union, Optional
|
|
18
|
+
from typing import Dict, List, Set, Type, Union, Optional
|
|
19
19
|
from queue import Empty
|
|
20
20
|
|
|
21
21
|
import diffsynth_engine.models.basic.attention as attention_ops
|
|
22
|
-
from diffsynth_engine.models import PreTrainedModel
|
|
23
|
-
from diffsynth_engine.pipelines import BasePipeline
|
|
24
22
|
from diffsynth_engine.utils.platform import empty_cache
|
|
25
23
|
from diffsynth_engine.utils import logging
|
|
26
24
|
|
|
@@ -40,10 +38,14 @@ class ProcessGroupSingleton(Singleton):
|
|
|
40
38
|
def __init__(self):
|
|
41
39
|
self.CFG_GROUP: Optional[dist.ProcessGroup] = None
|
|
42
40
|
self.SP_GROUP: Optional[dist.ProcessGroup] = None
|
|
41
|
+
self.SP_ULYSSUES_GROUP: Optional[dist.ProcessGroup] = None
|
|
42
|
+
self.SP_RING_GROUP: Optional[dist.ProcessGroup] = None
|
|
43
43
|
self.TP_GROUP: Optional[dist.ProcessGroup] = None
|
|
44
44
|
|
|
45
45
|
self.CFG_RANKS: List[int] = []
|
|
46
46
|
self.SP_RANKS: List[int] = []
|
|
47
|
+
self.SP_ULYSSUES_RANKS: List[int] = []
|
|
48
|
+
self.SP_RING_RANKS: List[int] = []
|
|
47
49
|
self.TP_RANKS: List[int] = []
|
|
48
50
|
|
|
49
51
|
|
|
@@ -82,6 +84,38 @@ def get_sp_ranks():
|
|
|
82
84
|
return PROCESS_GROUP.SP_RANKS
|
|
83
85
|
|
|
84
86
|
|
|
87
|
+
def get_sp_ulysses_group():
|
|
88
|
+
return PROCESS_GROUP.SP_ULYSSUES_GROUP
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def get_sp_ulysses_world_size():
|
|
92
|
+
return PROCESS_GROUP.SP_ULYSSUES_GROUP.size() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 1
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def get_sp_ulysses_rank():
|
|
96
|
+
return PROCESS_GROUP.SP_ULYSSUES_GROUP.rank() if PROCESS_GROUP.SP_ULYSSUES_GROUP is not None else 0
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_sp_ulysses_ranks():
|
|
100
|
+
return PROCESS_GROUP.SP_ULYSSUES_RANKS
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def get_sp_ring_group():
|
|
104
|
+
return PROCESS_GROUP.SP_RING_GROUP
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_sp_ring_world_size():
|
|
108
|
+
return PROCESS_GROUP.SP_RING_GROUP.size() if PROCESS_GROUP.SP_RING_GROUP is not None else 1
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def get_sp_ring_rank():
|
|
112
|
+
return PROCESS_GROUP.SP_RING_GROUP.rank() if PROCESS_GROUP.SP_RING_GROUP is not None else 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def get_sp_ring_ranks():
|
|
116
|
+
return PROCESS_GROUP.SP_RING_RANKS
|
|
117
|
+
|
|
118
|
+
|
|
85
119
|
def get_tp_group():
|
|
86
120
|
return PROCESS_GROUP.TP_GROUP
|
|
87
121
|
|
|
@@ -127,23 +161,32 @@ def init_parallel_pgs(
|
|
|
127
161
|
blocks = [list(range(world_size))]
|
|
128
162
|
cfg_groups, cfg_blocks = make_parallel_groups(blocks, cfg_degree)
|
|
129
163
|
for cfg_ranks in cfg_groups:
|
|
130
|
-
cfg_group = dist.new_group(cfg_ranks)
|
|
131
164
|
if rank in cfg_ranks:
|
|
132
|
-
PROCESS_GROUP.CFG_GROUP =
|
|
165
|
+
PROCESS_GROUP.CFG_GROUP = dist.new_group(cfg_ranks)
|
|
133
166
|
PROCESS_GROUP.CFG_RANKS = cfg_ranks
|
|
134
167
|
|
|
135
168
|
sp_groups, sp_blocks = make_parallel_groups(cfg_blocks, sp_degree)
|
|
136
169
|
for sp_ranks in sp_groups:
|
|
137
|
-
group = dist.new_group(sp_ranks)
|
|
138
170
|
if rank in sp_ranks:
|
|
139
|
-
PROCESS_GROUP.SP_GROUP =
|
|
171
|
+
PROCESS_GROUP.SP_GROUP = dist.new_group(sp_ranks)
|
|
140
172
|
PROCESS_GROUP.SP_RANKS = sp_ranks
|
|
141
173
|
|
|
174
|
+
sp_ulysses_groups, sp_ulysses_blocks = make_parallel_groups(cfg_blocks, sp_ulysses_degree)
|
|
175
|
+
for sp_ulysses_ranks in sp_ulysses_groups:
|
|
176
|
+
if rank in sp_ulysses_ranks:
|
|
177
|
+
PROCESS_GROUP.SP_ULYSSUES_GROUP = dist.new_group(sp_ulysses_ranks)
|
|
178
|
+
PROCESS_GROUP.SP_ULYSSUES_RANKS = sp_ulysses_ranks
|
|
179
|
+
|
|
180
|
+
sp_ring_groups, _ = make_parallel_groups(sp_ulysses_blocks, sp_ring_degree)
|
|
181
|
+
for sp_ring_ranks in sp_ring_groups:
|
|
182
|
+
if rank in sp_ring_ranks:
|
|
183
|
+
PROCESS_GROUP.SP_RING_GROUP = dist.new_group(sp_ring_ranks)
|
|
184
|
+
PROCESS_GROUP.SP_RING_RANKS = sp_ring_ranks
|
|
185
|
+
|
|
142
186
|
tp_groups, _ = make_parallel_groups(sp_blocks, tp_degree)
|
|
143
187
|
for tp_ranks in tp_groups:
|
|
144
|
-
group = dist.new_group(tp_ranks)
|
|
145
188
|
if rank in tp_ranks:
|
|
146
|
-
PROCESS_GROUP.TP_GROUP =
|
|
189
|
+
PROCESS_GROUP.TP_GROUP = dist.new_group(tp_ranks)
|
|
147
190
|
PROCESS_GROUP.TP_RANKS = tp_ranks
|
|
148
191
|
|
|
149
192
|
set_seq_parallel_pg(sp_ulysses_degree, sp_ring_degree, rank, world_size)
|
|
@@ -174,25 +217,14 @@ def to_device(data, device):
|
|
|
174
217
|
def shard_model(
|
|
175
218
|
module: nn.Module,
|
|
176
219
|
device_id: int | torch.device,
|
|
220
|
+
wrap_module_cls: Set[Type[nn.Module]],
|
|
177
221
|
sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD,
|
|
178
|
-
wrap_module_names: Optional[List[str]] = None,
|
|
179
222
|
):
|
|
180
|
-
wrap_module_names = wrap_module_names or []
|
|
181
|
-
|
|
182
|
-
def wrap_fn(m):
|
|
183
|
-
for name in wrap_module_names:
|
|
184
|
-
submodule = getattr(module, name)
|
|
185
|
-
if isinstance(submodule, nn.ModuleList) and m in submodule:
|
|
186
|
-
return True
|
|
187
|
-
elif not isinstance(submodule, nn.ModuleList) and m is submodule:
|
|
188
|
-
return True
|
|
189
|
-
return False
|
|
190
|
-
|
|
191
223
|
return FSDP(
|
|
192
224
|
module,
|
|
193
225
|
device_id=device_id,
|
|
194
226
|
sharding_strategy=sharding_strategy,
|
|
195
|
-
auto_wrap_policy=partial(
|
|
227
|
+
auto_wrap_policy=partial(transformer_auto_wrap_policy, transformer_layer_cls=wrap_module_cls),
|
|
196
228
|
)
|
|
197
229
|
|
|
198
230
|
|
|
@@ -266,14 +298,15 @@ def _worker_loop(
|
|
|
266
298
|
world_size=world_size,
|
|
267
299
|
)
|
|
268
300
|
|
|
269
|
-
def wrap_for_parallel(module
|
|
270
|
-
if
|
|
271
|
-
for model_name in module
|
|
272
|
-
|
|
301
|
+
def wrap_for_parallel(module):
|
|
302
|
+
if hasattr(module, "model_names"):
|
|
303
|
+
for model_name in getattr(module, "model_names"):
|
|
304
|
+
submodule = getattr(module, model_name)
|
|
305
|
+
if getattr(submodule, "_supports_parallelization", False):
|
|
273
306
|
setattr(module, model_name, wrap_for_parallel(submodule))
|
|
274
307
|
return module
|
|
275
308
|
|
|
276
|
-
if not module
|
|
309
|
+
if not getattr(module, "_supports_parallelization", False):
|
|
277
310
|
return module
|
|
278
311
|
|
|
279
312
|
if tp_degree > 1:
|
|
@@ -283,7 +316,7 @@ def _worker_loop(
|
|
|
283
316
|
parallelize_plan=module.get_tp_plan(),
|
|
284
317
|
)
|
|
285
318
|
elif use_fsdp:
|
|
286
|
-
module = shard_model(module, device_id=device,
|
|
319
|
+
module = shard_model(module, device_id=device, wrap_module_cls=module.get_fsdp_module_cls())
|
|
287
320
|
return module
|
|
288
321
|
|
|
289
322
|
module = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
diffsynth_engine/__init__.py,sha256=
|
|
1
|
+
diffsynth_engine/__init__.py,sha256=deLiGEHeQV1Xq7Kd11oRUA28FDegUgXBjlkNwgtVBMw,2290
|
|
2
2
|
diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
|
|
4
4
|
diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=3ve4bYxGyfuERynvoNYdFYSk0agdBgXKCeIOS6O6wgI,819
|
|
@@ -40,17 +40,18 @@ diffsynth_engine/conf/models/sd3/sd3_dit.json,sha256=RyJeCKjd4UPRf2Qbicd8Oxlioxg
|
|
|
40
40
|
diffsynth_engine/conf/models/sd3/sd3_text_encoder.json,sha256=1yXwzKbbIIVg1QPhQJxjdwvbFkA1mJ6NR6dw2vrN-1A,91415
|
|
41
41
|
diffsynth_engine/conf/models/sdxl/sdxl_text_encoder.json,sha256=cBN3mIm4BjJYbSpL2gz4yeb1aP0BvGt9na4hmuafyJo,35642
|
|
42
42
|
diffsynth_engine/conf/models/sdxl/sdxl_unet.json,sha256=9f9ca1qYQALaDkA5KTCfVP9mKFvhM2xFP5e042Ryppw,129779
|
|
43
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
44
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
45
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
46
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
47
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
48
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
49
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
50
|
-
diffsynth_engine/conf/models/wan/dit/wan2.
|
|
51
|
-
diffsynth_engine/conf/models/wan/
|
|
52
|
-
diffsynth_engine/conf/models/wan/vae/wan2.
|
|
53
|
-
diffsynth_engine/conf/models/wan/vae/wan2.
|
|
43
|
+
diffsynth_engine/conf/models/wan/dit/wan2.1_flf2v_14b.json,sha256=s7yoVErSiuSlGwwqfrvhvmzz6MD4oAqBKg7iZfL1vX8,313
|
|
44
|
+
diffsynth_engine/conf/models/wan/dit/wan2.1_i2v_14b.json,sha256=BkDV80TkA-_vTRR_1AWpGIzwlgtuKbh-gezW2Q20dlQ,269
|
|
45
|
+
diffsynth_engine/conf/models/wan/dit/wan2.1_t2v_1.3b.json,sha256=M_h55-mMhpgXUuY85sBK6-_f4fg3bfCa6T7n1CyMP3s,209
|
|
46
|
+
diffsynth_engine/conf/models/wan/dit/wan2.1_t2v_14b.json,sha256=7i2Hq8BRH4kDVYBKcIBt8m3vCl_HGZZPFY5fmFw4xgs,210
|
|
47
|
+
diffsynth_engine/conf/models/wan/dit/wan2.2_i2v_a14b.json,sha256=7OmPEfreIu8Ex6NDr1IW69zmKRp21hZkmg_9yg6sUg8,322
|
|
48
|
+
diffsynth_engine/conf/models/wan/dit/wan2.2_s2v_14b.json,sha256=r5L_BT2RHlFpztzXzoOVBK-WzNelYHEo3yDs8GKtWlk,284
|
|
49
|
+
diffsynth_engine/conf/models/wan/dit/wan2.2_t2v_a14b.json,sha256=MqxjGwq8VqD-1RwbPocbkKx0JzsMgwn18hfVK7M0d4k,312
|
|
50
|
+
diffsynth_engine/conf/models/wan/dit/wan2.2_ti2v_5b.json,sha256=tO7nymyqQgBIgxlswITnIc_MsRr1RRPhZbbhJ-1gHow,257
|
|
51
|
+
diffsynth_engine/conf/models/wan/dit/wan_dit_keymap.json,sha256=hfGytOIRkdYFgOR9RB714X_XoYC3kSh9SGNHrPFuvQE,1903
|
|
52
|
+
diffsynth_engine/conf/models/wan/vae/wan2.1_vae.json,sha256=eVLTSRqbXm3JD8QDkLbM6vFfCdynlS-8QxqCfi4BzrI,815
|
|
53
|
+
diffsynth_engine/conf/models/wan/vae/wan2.2_vae.json,sha256=pdnYEEZ_GcZHM_iH1y5ASdf_qZUGCOuDEaFmjdg9RKY,1860
|
|
54
|
+
diffsynth_engine/conf/models/wan/vae/wan_vae_keymap.json,sha256=u9MJ3yRL45kdqRVoBnYbHkmuUmOseUFtwte-_9ZvdHc,25224
|
|
54
55
|
diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt,sha256=n9aR98gDkhDg_O0VhlRmxlgg0JtjmIsBdL_iXeKZBRo,524619
|
|
55
56
|
diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json,sha256=LNs7gzGmDJL8HlWhPp_WH9IpPFpRJ1_czNYreABSUw4,588
|
|
56
57
|
diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json,sha256=a9zunMzioWyitMDF7QC0LFDqIl9EcqjEweljopAsKIE,705
|
|
@@ -78,24 +79,25 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json,sha256=e4q
|
|
|
78
79
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model,sha256=45CaZ7eAZQs1z1Kax4KtK2sm5tH4SdP7tqhykF9FJFg,4548313
|
|
79
80
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoUtOslX0-pHJwfIGiyCi3iRylnyj0iYCs,16837417
|
|
80
81
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
|
|
81
|
-
diffsynth_engine/configs/__init__.py,sha256=
|
|
82
|
+
diffsynth_engine/configs/__init__.py,sha256=vSjJToEdq3JX7t81_z4nwNwIdD4bYnFjxnMZH7PXMKo,1309
|
|
82
83
|
diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
|
|
83
|
-
diffsynth_engine/configs/pipeline.py,sha256=
|
|
84
|
+
diffsynth_engine/configs/pipeline.py,sha256=2tCcW3qndx5GdzYNvpbAsR6ZGnzY8q7EzJjWDIATBr0,13297
|
|
84
85
|
diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
86
|
diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
|
|
86
|
-
diffsynth_engine/models/base.py,sha256=
|
|
87
|
+
diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
|
|
87
88
|
diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
88
|
-
diffsynth_engine/models/basic/attention.py,sha256=
|
|
89
|
+
diffsynth_engine/models/basic/attention.py,sha256=iFxpvXdaEJZHddTTRuKL1grKb6beU53y-VuRPX8FpFw,13127
|
|
89
90
|
diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
|
|
90
91
|
diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
|
|
91
92
|
diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
|
|
92
|
-
diffsynth_engine/models/basic/transformer_helper.py,sha256=
|
|
93
|
+
diffsynth_engine/models/basic/transformer_helper.py,sha256=6K7A5bVnN2bOoq6I0IQf7RJBhSZUP4jNf1n7NPGu8zA,5287
|
|
93
94
|
diffsynth_engine/models/basic/unet_helper.py,sha256=4lN6F80Ubm6ip4dkLVmB-Og5-Y25Wduhs9Q8qjyzK6E,9044
|
|
95
|
+
diffsynth_engine/models/basic/video_sparse_attention.py,sha256=iXA3sHDLWk1ns1lVCNbZdiaDu94kBIsw-9vrCGAll7g,7843
|
|
94
96
|
diffsynth_engine/models/flux/__init__.py,sha256=x0JoxL0CdiiVrY0BjkIrGinud7mcXecLleGO0km91XQ,686
|
|
95
|
-
diffsynth_engine/models/flux/flux_controlnet.py,sha256=
|
|
96
|
-
diffsynth_engine/models/flux/flux_dit.py,sha256=
|
|
97
|
-
diffsynth_engine/models/flux/flux_dit_fbcache.py,sha256=
|
|
98
|
-
diffsynth_engine/models/flux/flux_ipadapter.py,sha256=
|
|
97
|
+
diffsynth_engine/models/flux/flux_controlnet.py,sha256=NvFKQIx0NldX5uUxdmYwuS2s-xaFRlKotiE6lr3-HRY,8018
|
|
98
|
+
diffsynth_engine/models/flux/flux_dit.py,sha256=7sdV8KFQiHcK-8aqyvXBgC7E_-D9rcgBcnMXUq_AybI,23403
|
|
99
|
+
diffsynth_engine/models/flux/flux_dit_fbcache.py,sha256=0TpEJQ2gxIilM4FVy4ZNA-lmajnb7-ueXuElagYzWgw,8324
|
|
100
|
+
diffsynth_engine/models/flux/flux_ipadapter.py,sha256=8YBprBH-V5TRs0bdzQOeGIwAGdSUcpk5GD_oDZZlfZg,6985
|
|
99
101
|
diffsynth_engine/models/flux/flux_redux.py,sha256=-X-eu4NGEu-4PmK9hM-u4nRrWqvW6a4FJ884G8KEE3A,2353
|
|
100
102
|
diffsynth_engine/models/flux/flux_text_encoder.py,sha256=ruqFICZXQfZtHUasXHw5FK6uOi-IJdFjC-_GM0m5bco,3629
|
|
101
103
|
diffsynth_engine/models/flux/flux_vae.py,sha256=lsiqqsl6eT_ry3vGkrqgLi5EVrFFXYm8wUkezspowD8,3705
|
|
@@ -107,9 +109,9 @@ diffsynth_engine/models/hunyuan3d/moe.py,sha256=FAuUqgrB2ZFb0uGBhI-Afv850HmzDFP5
|
|
|
107
109
|
diffsynth_engine/models/hunyuan3d/surface_extractor.py,sha256=b15mb1N4PYwAvDk1Gude8qlccRKrSg461xT59RjMEQk,4167
|
|
108
110
|
diffsynth_engine/models/hunyuan3d/volume_decoder.py,sha256=sgflj1a8sIerqGSalBAVQOlyiIihkLOLXYysNbulCoQ,2355
|
|
109
111
|
diffsynth_engine/models/qwen_image/__init__.py,sha256=X5pig621WEsDZ6L7HVkmYspV53-GDfs_la1ncaq_NFw,417
|
|
110
|
-
diffsynth_engine/models/qwen_image/qwen2_5_vl.py,sha256=
|
|
111
|
-
diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=
|
|
112
|
-
diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py,sha256=
|
|
112
|
+
diffsynth_engine/models/qwen_image/qwen2_5_vl.py,sha256=Eu-r-c42t_q74Qpwz21ToCGHpvSi7VND4B1EI0e-ePA,57748
|
|
113
|
+
diffsynth_engine/models/qwen_image/qwen_image_dit.py,sha256=iJ-FinDyXa982Uao1is37bxUttyPu0Eldyd7qPJO_XQ,22582
|
|
114
|
+
diffsynth_engine/models/qwen_image/qwen_image_dit_fbcache.py,sha256=LIv9X_BohKk5rcEzyl3ATLwd8MSoFX43wjkArQ68nq8,4828
|
|
113
115
|
diffsynth_engine/models/qwen_image/qwen_image_vae.py,sha256=eO7f4YqiYXfw7NncBNFTu-xEvdJ5uKY-SnfP15QY0tE,38443
|
|
114
116
|
diffsynth_engine/models/sd/__init__.py,sha256=hjoKRnwoXOLD0wude-w7I6wK5ak7ACMbnbkPuBB2oU0,380
|
|
115
117
|
diffsynth_engine/models/sd/sd_controlnet.py,sha256=kMGfIdriXhC7reT6iO2Z0rPICXEkXpytjeBQcR_sjT8,50577
|
|
@@ -132,29 +134,29 @@ diffsynth_engine/models/text_encoder/t5.py,sha256=8JXVzqJLMrtn7qC_XANK4u76vEGavd
|
|
|
132
134
|
diffsynth_engine/models/vae/__init__.py,sha256=TFSIXZ-UyRaZbEr5KUXm1d4koS5gbgsCi7Soh6jDV0Y,140
|
|
133
135
|
diffsynth_engine/models/vae/vae.py,sha256=1Hz5Yb6f8V-psC0qothfzg8EZBPVPpg9KGlSMDm2-kA,15809
|
|
134
136
|
diffsynth_engine/models/wan/__init__.py,sha256=eYwZ2Upo2mTjaAcBWuSft1m4mLnqE47bz2V_u-WtkwQ,246
|
|
135
|
-
diffsynth_engine/models/wan/wan_audio_encoder.py,sha256=
|
|
136
|
-
diffsynth_engine/models/wan/wan_dit.py,sha256=
|
|
137
|
+
diffsynth_engine/models/wan/wan_audio_encoder.py,sha256=i8mVu5lhVlTnzVTDcSv7qGC6HjB3MuS9hFVkUrw9458,13629
|
|
138
|
+
diffsynth_engine/models/wan/wan_dit.py,sha256=MEt9eWy6djWT1dtlFEHP9Yevat4-M_LSzWRauNSIHck,21599
|
|
137
139
|
diffsynth_engine/models/wan/wan_image_encoder.py,sha256=VE7crdTxOFN2UCMN2cQlvHB9BilSbKOBQYgnXgl4E2Y,14313
|
|
138
140
|
diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=j63ulcWLY4XGITOKUMGX292LtSEtP-n8BTvqb98YExU,23615
|
|
139
141
|
diffsynth_engine/models/wan/wan_text_encoder.py,sha256=OERlmwOqthAFPNnnT2sXJ4OjyyRmsRLx7VGp1zlBkLU,11021
|
|
140
142
|
diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
|
|
141
143
|
diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
|
|
142
|
-
diffsynth_engine/pipelines/base.py,sha256=
|
|
143
|
-
diffsynth_engine/pipelines/flux_image.py,sha256=
|
|
144
|
+
diffsynth_engine/pipelines/base.py,sha256=Yvb2xiHT1Jhx4HDkNPHdXjzhUkM9_65D4zM-GSSOWoU,16133
|
|
145
|
+
diffsynth_engine/pipelines/flux_image.py,sha256=L0ggxpthLD8a5-zdPHu9z668uWBei9YzPb4PFVypDNU,50707
|
|
144
146
|
diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
|
|
145
|
-
diffsynth_engine/pipelines/qwen_image.py,sha256=
|
|
147
|
+
diffsynth_engine/pipelines/qwen_image.py,sha256=n6Nnin8OyC9Mfp8O-3N4GNq12Mws8_hHWv-SwU4-HCc,33054
|
|
146
148
|
diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
|
|
147
|
-
diffsynth_engine/pipelines/sdxl_image.py,sha256=
|
|
148
|
-
diffsynth_engine/pipelines/utils.py,sha256=
|
|
149
|
-
diffsynth_engine/pipelines/wan_s2v.py,sha256=
|
|
150
|
-
diffsynth_engine/pipelines/wan_video.py,sha256=
|
|
149
|
+
diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
|
|
150
|
+
diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
|
|
151
|
+
diffsynth_engine/pipelines/wan_s2v.py,sha256=AUVLhLP5F0gnOV7nqWQUSZbye5ov-m44151B3zWBrAk,29323
|
|
152
|
+
diffsynth_engine/pipelines/wan_video.py,sha256=Hs1iVacfrwi_0X4VNgflVUlJP5vHp0x7CF6wegidP2c,29108
|
|
151
153
|
diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
152
154
|
diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
|
|
153
155
|
diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
|
|
154
156
|
diffsynth_engine/tokenizers/__init__.py,sha256=KxTna7UrkptrBU1j3zBYOi_8mVEWlcSMGZwK2ahuHNw,456
|
|
155
|
-
diffsynth_engine/tokenizers/base.py,sha256=
|
|
157
|
+
diffsynth_engine/tokenizers/base.py,sha256=skDQZXEYElc51nDi3b0vhtgm4PZQpQOIDz7fPSAYTHI,5261
|
|
156
158
|
diffsynth_engine/tokenizers/clip.py,sha256=6yggDSRGZc34CKflO1DwTIisggv53aITe_h-YnsERzc,10695
|
|
157
|
-
diffsynth_engine/tokenizers/qwen2.py,sha256=
|
|
159
|
+
diffsynth_engine/tokenizers/qwen2.py,sha256=siel195SbXOD7XVnJVKMkOtW8Vl1vXOiyWVXXToVKW0,9696
|
|
158
160
|
diffsynth_engine/tokenizers/qwen2_vl_image_processor.py,sha256=7IBOn2m4AbL-URVrSrFY0k88r4_gkK_nuTQRAxorBes,6239
|
|
159
161
|
diffsynth_engine/tokenizers/qwen2_vl_processor.py,sha256=Zyu8_5ETCjACQ8BX6jvVRWj37nZqJgtI0hesSUGm4-g,4145
|
|
160
162
|
diffsynth_engine/tokenizers/t5.py,sha256=brhRFkXaTzE29hl_wDdcjQ3MCoL0pQslwHIRbMX_bNo,7442
|
|
@@ -166,10 +168,10 @@ diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWx
|
|
|
166
168
|
diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
|
|
167
169
|
diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
170
|
diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
|
|
169
|
-
diffsynth_engine/utils/constants.py,sha256=
|
|
171
|
+
diffsynth_engine/utils/constants.py,sha256=sJio3Vy8i0-PWYRnqquYt6ez9k6Tc9JdjCv6pn2BU_4,3551
|
|
170
172
|
diffsynth_engine/utils/download.py,sha256=w9QQjllPfTUEY371UTREU7o_vvdMY-Q2DymDel3ZEZY,6792
|
|
171
173
|
diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I,363
|
|
172
|
-
diffsynth_engine/utils/flag.py,sha256=
|
|
174
|
+
diffsynth_engine/utils/flag.py,sha256=wODDbMMLTGOl7yoLMZDKGyqXSYANPaDQdZGXOJryGeI,1597
|
|
173
175
|
diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
|
|
174
176
|
diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
|
|
175
177
|
diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
|
|
@@ -178,15 +180,15 @@ diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPU
|
|
|
178
180
|
diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
|
|
179
181
|
diffsynth_engine/utils/offload.py,sha256=94og79TIkxldwYUgZT3L4OVu1WBlE7gfVPvO2MRhm6c,3551
|
|
180
182
|
diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjWo,1149
|
|
181
|
-
diffsynth_engine/utils/parallel.py,sha256=
|
|
183
|
+
diffsynth_engine/utils/parallel.py,sha256=6T8oCTp-7Gb3qsgNRB2Bp3DF4eyx1FzvS6pFnEJbsek,19789
|
|
182
184
|
diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
|
|
183
185
|
diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
|
|
184
186
|
diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CDhg,2200
|
|
185
187
|
diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
186
188
|
diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
|
|
187
189
|
diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
|
|
188
|
-
diffsynth_engine-0.6.1.
|
|
189
|
-
diffsynth_engine-0.6.1.
|
|
190
|
-
diffsynth_engine-0.6.1.
|
|
191
|
-
diffsynth_engine-0.6.1.
|
|
192
|
-
diffsynth_engine-0.6.1.
|
|
190
|
+
diffsynth_engine-0.6.1.dev25.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
|
|
191
|
+
diffsynth_engine-0.6.1.dev25.dist-info/METADATA,sha256=hbm3Xm8GajphVodptdo1vPnvB098xLQk8B1ORFoUQ8k,1164
|
|
192
|
+
diffsynth_engine-0.6.1.dev25.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
193
|
+
diffsynth_engine-0.6.1.dev25.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
|
|
194
|
+
diffsynth_engine-0.6.1.dev25.dist-info/RECORD,,
|
|
File without changes
|