diffsynth-engine 0.5.1.dev2__py3-none-any.whl → 0.5.1.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import torch
2
+ from torchvision import transforms
2
3
  import numpy as np
3
4
  import math
4
5
  from PIL import Image
@@ -32,6 +33,12 @@ def tensor_to_image(t: torch.Tensor, denormalize: bool = True) -> Image.Image:
32
33
  return Image.fromarray(t, mode=mode)
33
34
 
34
35
 
36
+ def resize_and_center_crop(image, height: int, width: int):
37
+ resize_operation = transforms.Resize(min(height, width))
38
+ crop_operation = transforms.CenterCrop((height, width))
39
+ return transforms.Compose([resize_operation, crop_operation])(image)
40
+
41
+
35
42
  class ChannelDimension(Enum):
36
43
  FIRST = "channels_first"
37
44
  LAST = "channels_last"
@@ -3,6 +3,7 @@ import imageio.v3 as iio
3
3
  import numpy as np
4
4
  from PIL import Image
5
5
  from typing import List
6
+ from moviepy import ImageSequenceClip, AudioFileClip, VideoClip
6
7
 
7
8
 
8
9
  class VideoReader:
@@ -38,3 +39,28 @@ def save_video(frames, save_path, fps=15):
38
39
  # 使用 imageio 写入 .webm 文件
39
40
  with iio.imopen(save_path, "w", plugin="FFMPEG") as writer:
40
41
  writer.write(frames, fps=fps, codec=codec)
42
+
43
+
44
+ def read_n_frames(frames: List[Image.Image], original_fps: int, n_frames: int, target_fps: int = 16) -> List[Image.Image]:
45
+ num_frames = len(frames)
46
+ interval = max(1, round(original_fps / target_fps))
47
+ sampled_frames: List[Image.Image] = []
48
+ for i in range(n_frames):
49
+ frame_idx = i * interval
50
+ if frame_idx >= num_frames:
51
+ break
52
+ sampled_frames.append(frames[frame_idx])
53
+ return sampled_frames
54
+
55
+
56
+ def save_video_with_audio(frames: List[Image.Image], audio_path: str, target_video_path: str, fps: int = 16):
57
+ # combine all frames
58
+ video = [np.array(frame) for frame in frames] # shape: t* (b*h, w, c)
59
+ video_clip = ImageSequenceClip(video, fps=fps)
60
+ audio_clip = AudioFileClip(audio_path)
61
+ if audio_clip.duration > video_clip.duration:
62
+ audio_clip: AudioFileClip = audio_clip.subclipped(0, video_clip.duration) # clip audio
63
+ else:
64
+ video_clip: VideoClip = video_clip.subclipped(0, audio_clip.duration)
65
+ video_with_audio: VideoClip = video_clip.with_audio(audio_clip)
66
+ video_with_audio.write_videofile(target_video_path, codec="libx264")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.5.1.dev2
3
+ Version: 0.5.1.dev4
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -25,6 +25,8 @@ Requires-Dist: imageio[ffmpeg]
25
25
  Requires-Dist: yunchang; sys_platform == "linux"
26
26
  Requires-Dist: onnxruntime
27
27
  Requires-Dist: opencv-python
28
+ Requires-Dist: moviepy
29
+ Requires-Dist: librosa
28
30
  Requires-Dist: scikit-image
29
31
  Requires-Dist: trimesh
30
32
  Provides-Extra: dev
@@ -1,4 +1,4 @@
1
- diffsynth_engine/__init__.py,sha256=2HxhhjHDK4DdGgTRCqV0jLZFR5GlgrnCHMnUbh8GFLg,1914
1
+ diffsynth_engine/__init__.py,sha256=Pn2kE8Y_Z-PCqqhhsIbUwLQIAf5sQti-ozitPr-WUHg,1986
2
2
  diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
4
4
  diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=WICrLEh7b2TdZMMEN14NqiYydj7dxXT6RolXymKiMk8,188
@@ -45,6 +45,7 @@ diffsynth_engine/conf/models/wan/dit/wan2.1-i2v-14b.json,sha256=BkDV80TkA-_vTRR_
45
45
  diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-1.3b.json,sha256=M_h55-mMhpgXUuY85sBK6-_f4fg3bfCa6T7n1CyMP3s,209
46
46
  diffsynth_engine/conf/models/wan/dit/wan2.1-t2v-14b.json,sha256=7i2Hq8BRH4kDVYBKcIBt8m3vCl_HGZZPFY5fmFw4xgs,210
47
47
  diffsynth_engine/conf/models/wan/dit/wan2.2-i2v-a14b.json,sha256=7OmPEfreIu8Ex6NDr1IW69zmKRp21hZkmg_9yg6sUg8,322
48
+ diffsynth_engine/conf/models/wan/dit/wan2.2-s2v-14b.json,sha256=r5L_BT2RHlFpztzXzoOVBK-WzNelYHEo3yDs8GKtWlk,284
48
49
  diffsynth_engine/conf/models/wan/dit/wan2.2-t2v-a14b.json,sha256=MqxjGwq8VqD-1RwbPocbkKx0JzsMgwn18hfVK7M0d4k,312
49
50
  diffsynth_engine/conf/models/wan/dit/wan2.2-ti2v-5b.json,sha256=tO7nymyqQgBIgxlswITnIc_MsRr1RRPhZbbhJ-1gHow,257
50
51
  diffsynth_engine/conf/models/wan/vae/wan-vae-keymap.json,sha256=u9MJ3yRL45kdqRVoBnYbHkmuUmOseUFtwte-_9ZvdHc,25224
@@ -77,14 +78,14 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json,sha256=e4q
77
78
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model,sha256=45CaZ7eAZQs1z1Kax4KtK2sm5tH4SdP7tqhykF9FJFg,4548313
78
79
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoUtOslX0-pHJwfIGiyCi3iRylnyj0iYCs,16837417
79
80
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
80
- diffsynth_engine/configs/__init__.py,sha256=jKVFQGlcnOgD0ABIM40n69HpxgMe-QYYhqz8j9-u_eA,874
81
+ diffsynth_engine/configs/__init__.py,sha256=bEpjrHs5OVy4TAdXI1X9aDAbZOhLecT6U9R2xaR2siA,992
81
82
  diffsynth_engine/configs/controlnet.py,sha256=EpUkCdRNk2G5uo56syaOzPFdR9g0sDHRXckagmMsgaQ,948
82
- diffsynth_engine/configs/pipeline.py,sha256=jdaJicZh-bJDZPaY1ZVxWXXlWvoIXagPsmB6J8eaaEc,11537
83
+ diffsynth_engine/configs/pipeline.py,sha256=Wh3fD9xsnpjbPos-KLJxlCjIxVHms8oqadkS03H3yuI,12721
83
84
  diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
85
  diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
85
86
  diffsynth_engine/models/base.py,sha256=PlBDpimIYnysjbvlIwsCDc0uyIAjdiTlwuOUFGdt-bk,2556
86
87
  diffsynth_engine/models/basic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- diffsynth_engine/models/basic/attention.py,sha256=H-LKiVD09GFj6hIwiIb5wiDS1HVCslN_TXgR7FrkUX4,10343
88
+ diffsynth_engine/models/basic/attention.py,sha256=Or57PjHGVOjjzSPUuH2nbuf5r_VtrVQYRiL1v5zT7T0,10646
88
89
  diffsynth_engine/models/basic/lora.py,sha256=PT-A3pwIuUrW2w3TnNlBPb1KRj70QYiBaoCvLnkR5cs,10652
89
90
  diffsynth_engine/models/basic/relative_position_emb.py,sha256=rCXOweZMcayVnNUVvBcYXMdhHS257B_PC8PZSWxvhNQ,2540
90
91
  diffsynth_engine/models/basic/timestep.py,sha256=WJODYqkSXEM0wcS42YkkfrGwxWt0e60zMTkDdUBQqBw,2810
@@ -131,11 +132,13 @@ diffsynth_engine/models/text_encoder/t5.py,sha256=8JXVzqJLMrtn7qC_XANK4u76vEGavd
131
132
  diffsynth_engine/models/vae/__init__.py,sha256=TFSIXZ-UyRaZbEr5KUXm1d4koS5gbgsCi7Soh6jDV0Y,140
132
133
  diffsynth_engine/models/vae/vae.py,sha256=q2haNS0N0Y6tWv4X7ocBcruE8TXQkHDSFoPzXHv7YUo,15791
133
134
  diffsynth_engine/models/wan/__init__.py,sha256=eYwZ2Upo2mTjaAcBWuSft1m4mLnqE47bz2V_u-WtkwQ,246
134
- diffsynth_engine/models/wan/wan_dit.py,sha256=Ix8xf7neQKMmBvlJe_-SAMdGPUbndEWeahHQ7VRynGc,19867
135
+ diffsynth_engine/models/wan/wan_audio_encoder.py,sha256=W7sUo3Fse33MNsZTYiYYEe0Y_YZUPpPZex0VNz8OQk8,13647
136
+ diffsynth_engine/models/wan/wan_dit.py,sha256=ocCAYl3xP0UdB9dhz9R1pRfBBESEHxDUu116hORe39U,20080
135
137
  diffsynth_engine/models/wan/wan_image_encoder.py,sha256=VE7crdTxOFN2UCMN2cQlvHB9BilSbKOBQYgnXgl4E2Y,14313
138
+ diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=sOJsSs1snI-ZGPJS8utstmgj0wcYwloyZ0C14hQHFkg,23597
136
139
  diffsynth_engine/models/wan/wan_text_encoder.py,sha256=Zv0q7Rc0Uq3SM0i1PokdmaKK5vhhJogpXxpq1tYHrtU,10768
137
140
  diffsynth_engine/models/wan/wan_vae.py,sha256=ogXrVlwmzXR4iLxjSCkBPtYW8KWebnvvd2UtPZeoziY,38853
138
- diffsynth_engine/pipelines/__init__.py,sha256=bEdXa533rXgjySO0aiLlfLkVmxoP6Yy_I4kj3WkpxnI,528
141
+ diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
139
142
  diffsynth_engine/pipelines/base.py,sha256=RTkVwWaWXr5ujqn5-UBHvdPddYwr-uvChj9-fmoXrms,13729
140
143
  diffsynth_engine/pipelines/flux_image.py,sha256=a-MaHuguV7Z6LJukC_Tvp7d9_2dnrAaJZ4MZH_sKsKo,49116
141
144
  diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=fwNKET54KjCiWDpW2S1Fk-p3nfJreZ-RH7p46VLawEQ,7911
@@ -143,6 +146,7 @@ diffsynth_engine/pipelines/qwen_image.py,sha256=EAYoq1QkdOSie_yVZG9enxJJRcncwVFP
143
146
  diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
144
147
  diffsynth_engine/pipelines/sdxl_image.py,sha256=FaihRd9Rt_qtqup2xEbHViVIFwFZVyvekYW4lCodNKY,21692
145
148
  diffsynth_engine/pipelines/utils.py,sha256=lk7sFGEk-fGjgadLpwwppHKG-yZ0RC-4ZmHW7pRRe8A,473
149
+ diffsynth_engine/pipelines/wan_s2v.py,sha256=Ekrd93rjoqUMcas7a6u9cA3Lon_21h6ESteNFcFFPas,29378
146
150
  diffsynth_engine/pipelines/wan_video.py,sha256=PZZcjmI0RfKWAy_6835qUeGZi7nuLqLOAEng86EqaxA,28127
147
151
  diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
148
152
  diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
@@ -162,13 +166,13 @@ diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWx
162
166
  diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
163
167
  diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
168
  diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
165
- diffsynth_engine/utils/constants.py,sha256=csKPtXA9YFoRPnhUBmnw3zf1KlTV963a43Nu4j9TqOE,3354
169
+ diffsynth_engine/utils/constants.py,sha256=3D1MIuD6tqJDTu5dXWVyUz3KvDx6eVZOSCdIta4v83o,3458
166
170
  diffsynth_engine/utils/download.py,sha256=8O56zQr2taY4BnCBEKVk3YGorTqTTCqTblf7tI9otXA,6721
167
171
  diffsynth_engine/utils/env.py,sha256=43x-kBjt5zI2cwZ9G4BOeTbedi2k6TuBzHGOBeFbFvU,280
168
172
  diffsynth_engine/utils/flag.py,sha256=6zQLnoEaU69pBEyhavCgydQfP0khw5ppCU7sue4yRqg,1370
169
173
  diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
170
174
  diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
171
- diffsynth_engine/utils/image.py,sha256=xZ_bEU-DdoSwMPG7jpP1daAro2tsy9ddeXKbXqXaeC0,9335
175
+ diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
172
176
  diffsynth_engine/utils/loader.py,sha256=Z5v1WNDWFY0OrVubB70j5VU3zeaAfEK_j8c1KrGI4yM,1240
173
177
  diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPUI,1601
174
178
  diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
@@ -177,12 +181,12 @@ diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjW
177
181
  diffsynth_engine/utils/parallel.py,sha256=vCgbCPSOv6bXF0nGKfYPs95sQQs33FsWJ05PhGmFqLM,18653
178
182
  diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
179
183
  diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
180
- diffsynth_engine/utils/video.py,sha256=Ne0rd2lb59UT1q5EotpjlY7OT8F9oTCFDyo1ST77uoQ,1004
184
+ diffsynth_engine/utils/video.py,sha256=GoMyc2as4_VqfWX4pjQyAWh9QObsFMov42zADVZNaJw,2194
181
185
  diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
182
186
  diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
183
187
  diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
184
- diffsynth_engine-0.5.1.dev2.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
185
- diffsynth_engine-0.5.1.dev2.dist-info/METADATA,sha256=MB7Kzzpm_b8MLnSKUJH5uzV4gFTc8pgEOpAziSHA70E,1117
186
- diffsynth_engine-0.5.1.dev2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
187
- diffsynth_engine-0.5.1.dev2.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
188
- diffsynth_engine-0.5.1.dev2.dist-info/RECORD,,
188
+ diffsynth_engine-0.5.1.dev4.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
189
+ diffsynth_engine-0.5.1.dev4.dist-info/METADATA,sha256=yVR8JT3eIkasT_Dm9AOwenKN_SZdeGCrBaBy195JaVA,1163
190
+ diffsynth_engine-0.5.1.dev4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
191
+ diffsynth_engine-0.5.1.dev4.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
192
+ diffsynth_engine-0.5.1.dev4.dist-info/RECORD,,