diffsynth-engine 0.5.1.dev2__py3-none-any.whl → 0.5.1.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffsynth_engine/__init__.py +2 -0
- diffsynth_engine/conf/models/wan/dit/wan2.2-s2v-14b.json +13 -0
- diffsynth_engine/configs/__init__.py +4 -0
- diffsynth_engine/configs/pipeline.py +36 -0
- diffsynth_engine/models/basic/attention.py +7 -4
- diffsynth_engine/models/wan/wan_audio_encoder.py +306 -0
- diffsynth_engine/models/wan/wan_dit.py +6 -2
- diffsynth_engine/models/wan/wan_s2v_dit.py +567 -0
- diffsynth_engine/pipelines/__init__.py +2 -0
- diffsynth_engine/pipelines/wan_s2v.py +685 -0
- diffsynth_engine/utils/constants.py +1 -0
- diffsynth_engine/utils/image.py +7 -0
- diffsynth_engine/utils/video.py +26 -0
- {diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/METADATA +3 -1
- {diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/RECORD +18 -14
- {diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/WHEEL +0 -0
- {diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/licenses/LICENSE +0 -0
- {diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/top_level.txt +0 -0
diffsynth_engine/utils/image.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import torch
|
|
2
|
+
from torchvision import transforms
|
|
2
3
|
import numpy as np
|
|
3
4
|
import math
|
|
4
5
|
from PIL import Image
|
|
@@ -32,6 +33,12 @@ def tensor_to_image(t: torch.Tensor, denormalize: bool = True) -> Image.Image:
|
|
|
32
33
|
return Image.fromarray(t, mode=mode)
|
|
33
34
|
|
|
34
35
|
|
|
36
|
+
def resize_and_center_crop(image, height: int, width: int):
|
|
37
|
+
resize_operation = transforms.Resize(min(height, width))
|
|
38
|
+
crop_operation = transforms.CenterCrop((height, width))
|
|
39
|
+
return transforms.Compose([resize_operation, crop_operation])(image)
|
|
40
|
+
|
|
41
|
+
|
|
35
42
|
class ChannelDimension(Enum):
|
|
36
43
|
FIRST = "channels_first"
|
|
37
44
|
LAST = "channels_last"
|
diffsynth_engine/utils/video.py
CHANGED
|
@@ -3,6 +3,7 @@ import imageio.v3 as iio
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
from PIL import Image
|
|
5
5
|
from typing import List
|
|
6
|
+
from moviepy import ImageSequenceClip, AudioFileClip, VideoClip
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class VideoReader:
|
|
@@ -38,3 +39,28 @@ def save_video(frames, save_path, fps=15):
|
|
|
38
39
|
# 使用 imageio 写入 .webm 文件
|
|
39
40
|
with iio.imopen(save_path, "w", plugin="FFMPEG") as writer:
|
|
40
41
|
writer.write(frames, fps=fps, codec=codec)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def read_n_frames(frames: List[Image.Image], original_fps: int, n_frames: int, target_fps: int = 16) -> List[Image.Image]:
|
|
45
|
+
num_frames = len(frames)
|
|
46
|
+
interval = max(1, round(original_fps / target_fps))
|
|
47
|
+
sampled_frames: List[Image.Image] = []
|
|
48
|
+
for i in range(n_frames):
|
|
49
|
+
frame_idx = i * interval
|
|
50
|
+
if frame_idx >= num_frames:
|
|
51
|
+
break
|
|
52
|
+
sampled_frames.append(frames[frame_idx])
|
|
53
|
+
return sampled_frames
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def save_video_with_audio(frames: List[Image.Image], audio_path: str, target_video_path: str, fps: int = 16):
|
|
57
|
+
# combine all frames
|
|
58
|
+
video = [np.array(frame) for frame in frames] # shape: t* (b*h, w, c)
|
|
59
|
+
video_clip = ImageSequenceClip(video, fps=fps)
|
|
60
|
+
audio_clip = AudioFileClip(audio_path)
|
|
61
|
+
if audio_clip.duration > video_clip.duration:
|
|
62
|
+
audio_clip: AudioFileClip = audio_clip.subclipped(0, video_clip.duration) # clip audio
|
|
63
|
+
else:
|
|
64
|
+
video_clip: VideoClip = video_clip.subclipped(0, audio_clip.duration)
|
|
65
|
+
video_with_audio: VideoClip = video_clip.with_audio(audio_clip)
|
|
66
|
+
video_with_audio.write_videofile(target_video_path, codec="libx264")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffsynth_engine
|
|
3
|
-
Version: 0.5.1.
|
|
3
|
+
Version: 0.5.1.dev4
|
|
4
4
|
Author: MuseAI x ModelScope
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Operating System :: OS Independent
|
|
@@ -25,6 +25,8 @@ Requires-Dist: imageio[ffmpeg]
|
|
|
25
25
|
Requires-Dist: yunchang; sys_platform == "linux"
|
|
26
26
|
Requires-Dist: onnxruntime
|
|
27
27
|
Requires-Dist: opencv-python
|
|
28
|
+
Requires-Dist: moviepy
|
|
29
|
+
Requires-Dist: librosa
|
|
28
30
|
Requires-Dist: scikit-image
|
|
29
31
|
Requires-Dist: trimesh
|
|
30
32
|
Provides-Extra: dev
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
diffsynth_engine/__init__.py,sha256=
|
|
1
|
+
diffsynth_engine/__init__.py,sha256=Pn2kE8Y_Z-PCqqhhsIbUwLQIAf5sQti-ozitPr-WUHg,1986
|
|
2
2
|
diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
|
|
4
4
|
diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=WICrLEh7b2TdZMMEN14NqiYydj7dxXT6RolXymKiMk8,188
|
|
@@ -45,6 +45,7 @@ diffsynth_engine/conf/models/wan/dit/wan2.1-i2v-14b.json,sha256=BkDV80TkA-_vTRR_
|
|
|
45
45
|
diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-1.3b.json,sha256=M_h55-mMhpgXUuY85sBK6-_f4fg3bfCa6T7n1CyMP3s,209
|
|
46
46
|
diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-14b.json,sha256=7i2Hq8BRH4kDVYBKcIBt8m3vCl_HGZZPFY5fmFw4xgs,210
|
|
47
47
|
diffsynth_engine/conf/models/wan/dit/wan2.2-i2v-a14b.json,sha256=7OmPEfreIu8Ex6NDr1IW69zmKRp21hZkmg_9yg6sUg8,322
|
|
48
|
+
diffsynth_engine/conf/models/wan/dit/wan2.2-s2v-14b.json,sha256=r5L_BT2RHlFpztzXzoOVBK-WzNelYHEo3yDs8GKtWlk,284
|
|
48
49
|
diffsynth_engine/conf/models/wan/dit/wan2.2-t2v-a14b.json,sha256=MqxjGwq8VqD-1RwbPocbkKx0JzsMgwn18hfVK7M0d4k,312
|
|
49
50
|
diffsynth_engine/conf/models/wan/dit/wan2.2-ti2v-5b.json,sha256=tO7nymyqQgBIgxlswITnIc_MsRr1RRPhZbbhJ-1gHow,257
|
|
50
51
|
diffsynth_engine/conf/models/wan/vae/wan-vae-keymap.json,sha256=u9MJ3yRL45kdqRVoBnYbHkmuUmOseUFtwte-_9ZvdHc,25224
|
|
@@ -77,14 +78,14 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json,sha256=e4q
|
|
|
77
78
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model,sha256=45CaZ7eAZQs1z1Kax4KtK2sm5tH4SdP7tqhykF9FJFg,4548313
|
|
78
79
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoUtOslX0-pHJwfIGiyCi3iRylnyj0iYCs,16837417
|
|
79
80
|
diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
|
|
80
|
-
diffsynth_engine/configs/__init__.py,sha256=
|
|
81
|
+
diffsynth_engine/configs/__init__.py,sha256=bEpjrHs5OVy4TAdXI1X9aDAbZOhLecT6U9R2xaR2siA,992
|
|
81
82
|
diffsynth_engine/configs/controlnet.py,sha256=EpUkCdRNk2G5uo56syaOzPFdR9g0sDHRXckagmMsgaQ,948
|
|
82
|
-
diffsynth_engine/configs/pipeline.py,sha256=
|
|
83
|
+
diffsynth_engine/configs/pipeline.py,sha256=Wh3fD9xsnpjbPos-KLJxlCjIxVHms8oqadkS03H3yuI,12721
|
|
83
84
|
diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
85
|
diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
|
|
85
86
|
diffsynth_engine/models/base.py,sha256=PlBDpimIYnysjbvlIwsCDc0uyIAjdiTlwuOUFGdt-bk,2556
|
|
86
87
|
diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
87
|
-
diffsynth_engine/models/basic/attention.py,sha256=
|
|
88
|
+
diffsynth_engine/models/basic/attention.py,sha256=Or57PjHGVOjjzSPUuH2nbuf5r_VtrVQYRiL1v5zT7T0,10646
|
|
88
89
|
diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
|
|
89
90
|
diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
|
|
90
91
|
diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
|
|
@@ -131,11 +132,13 @@ diffsynth_engine/models/text_encoder/t5.py,sha256=8JXVzqJLMrtn7qC_XANK4u76vEGavd
|
|
|
131
132
|
diffsynth_engine/models/vae/__init__.py,sha256=TFSIXZ-UyRaZbEr5KUXm1d4koS5gbgsCi7Soh6jDV0Y,140
|
|
132
133
|
diffsynth_engine/models/vae/vae.py,sha256=q2haNS0N0Y6tWv4X7ocBcruE8TXQkHDSFoPzXHv7YUo,15791
|
|
133
134
|
diffsynth_engine/models/wan/__init__.py,sha256=eYwZ2Upo2mTjaAcBWuSft1m4mLnqE47bz2V_u-WtkwQ,246
|
|
134
|
-
diffsynth_engine/models/wan/
|
|
135
|
+
diffsynth_engine/models/wan/wan_audio_encoder.py,sha256=W7sUo3Fse33MNsZTYiYYEe0Y_YZUPpPZex0VNz8OQk8,13647
|
|
136
|
+
diffsynth_engine/models/wan/wan_dit.py,sha256=ocCAYl3xP0UdB9dhz9R1pRfBBESEHxDUu116hORe39U,20080
|
|
135
137
|
diffsynth_engine/models/wan/wan_image_encoder.py,sha256=VE7crdTxOFN2UCMN2cQlvHB9BilSbKOBQYgnXgl4E2Y,14313
|
|
138
|
+
diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=sOJsSs1snI-ZGPJS8utstmgj0wcYwloyZ0C14hQHFkg,23597
|
|
136
139
|
diffsynth_engine/models/wan/wan_text_encoder.py,sha256=Zv0q7Rc0Uq3SM0i1PokdmaKK5vhhJogpXxpq1tYHrtU,10768
|
|
137
140
|
diffsynth_engine/models/wan/wan_vae.py,sha256=ogXrVlwmzXR4iLxjSCkBPtYW8KWebnvvd2UtPZeoziY,38853
|
|
138
|
-
diffsynth_engine/pipelines/__init__.py,sha256=
|
|
141
|
+
diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
|
|
139
142
|
diffsynth_engine/pipelines/base.py,sha256=RTkVwWaWXr5ujqn5-UBHvdPddYwr-uvChj9-fmoXrms,13729
|
|
140
143
|
diffsynth_engine/pipelines/flux_image.py,sha256=a-MaHuguV7Z6LJukC_Tvp7d9_2dnrAaJZ4MZH_sKsKo,49116
|
|
141
144
|
diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=fwNKET54KjCiWDpW2S1Fk-p3nfJreZ-RH7p46VLawEQ,7911
|
|
@@ -143,6 +146,7 @@ diffsynth_engine/pipelines/qwen_image.py,sha256=EAYoq1QkdOSie_yVZG9enxJJRcncwVFP
|
|
|
143
146
|
diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
|
|
144
147
|
diffsynth_engine/pipelines/sdxl_image.py,sha256=FaihRd9Rt_qtqup2xEbHViVIFwFZVyvekYW4lCodNKY,21692
|
|
145
148
|
diffsynth_engine/pipelines/utils.py,sha256=lk7sFGEk-fGjgadLpwwppHKG-yZ0RC-4ZmHW7pRRe8A,473
|
|
149
|
+
diffsynth_engine/pipelines/wan_s2v.py,sha256=Ekrd93rjoqUMcas7a6u9cA3Lon_21h6ESteNFcFFPas,29378
|
|
146
150
|
diffsynth_engine/pipelines/wan_video.py,sha256=PZZcjmI0RfKWAy_6835qUeGZi7nuLqLOAEng86EqaxA,28127
|
|
147
151
|
diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
148
152
|
diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
|
|
@@ -162,13 +166,13 @@ diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWx
|
|
|
162
166
|
diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
|
|
163
167
|
diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
168
|
diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
|
|
165
|
-
diffsynth_engine/utils/constants.py,sha256=
|
|
169
|
+
diffsynth_engine/utils/constants.py,sha256=3D1MIuD6tqJDTu5dXWVyUz3KvDx6eVZOSCdIta4v83o,3458
|
|
166
170
|
diffsynth_engine/utils/download.py,sha256=8O56zQr2taY4BnCBEKVk3YGorTqTTCqTblf7tI9otXA,6721
|
|
167
171
|
diffsynth_engine/utils/env.py,sha256=43x-kBjt5zI2cwZ9G4BOeTbedi2k6TuBzHGOBeFbFvU,280
|
|
168
172
|
diffsynth_engine/utils/flag.py,sha256=6zQLnoEaU69pBEyhavCgydQfP0khw5ppCU7sue4yRqg,1370
|
|
169
173
|
diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
|
|
170
174
|
diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
|
|
171
|
-
diffsynth_engine/utils/image.py,sha256=
|
|
175
|
+
diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
|
|
172
176
|
diffsynth_engine/utils/loader.py,sha256=Z5v1WNDWFY0OrVubB70j5VU3zeaAfEK_j8c1KrGI4yM,1240
|
|
173
177
|
diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPUI,1601
|
|
174
178
|
diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
|
|
@@ -177,12 +181,12 @@ diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjW
|
|
|
177
181
|
diffsynth_engine/utils/parallel.py,sha256=vCgbCPSOv6bXF0nGKfYPs95sQQs33FsWJ05PhGmFqLM,18653
|
|
178
182
|
diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
|
|
179
183
|
diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
|
|
180
|
-
diffsynth_engine/utils/video.py,sha256=
|
|
184
|
+
diffsynth_engine/utils/video.py,sha256=GoMyc2as4_VqfWX4pjQyAWh9QObsFMov42zADVZNaJw,2194
|
|
181
185
|
diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
182
186
|
diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
|
|
183
187
|
diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
|
|
184
|
-
diffsynth_engine-0.5.1.
|
|
185
|
-
diffsynth_engine-0.5.1.
|
|
186
|
-
diffsynth_engine-0.5.1.
|
|
187
|
-
diffsynth_engine-0.5.1.
|
|
188
|
-
diffsynth_engine-0.5.1.
|
|
188
|
+
diffsynth_engine-0.5.1.dev4.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
|
|
189
|
+
diffsynth_engine-0.5.1.dev4.dist-info/METADATA,sha256=yVR8JT3eIkasT_Dm9AOwenKN_SZdeGCrBaBy195JaVA,1163
|
|
190
|
+
diffsynth_engine-0.5.1.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
191
|
+
diffsynth_engine-0.5.1.dev4.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
|
|
192
|
+
diffsynth_engine-0.5.1.dev4.dist-info/RECORD,,
|
|
File without changes
|
{diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{diffsynth_engine-0.5.1.dev2.dist-info → diffsynth_engine-0.5.1.dev4.dist-info}/top_level.txt
RENAMED
|
File without changes
|