diffsynth-engine 0.6.1.dev34__py3-none-any.whl → 0.6.1.dev35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ import torch
2
+ import torch.distributed as dist
3
+ import math
4
+ import json
5
+ from typing import Callable, List, Dict, Tuple, Optional, Union
6
+ from tqdm import tqdm
7
+
8
+ from diffsynth_engine.configs import (
9
+ ZImagePipelineConfig,
10
+ ZImageStateDicts,
11
+ )
12
+ from diffsynth_engine.models.basic.lora import LoRAContext
13
+
14
+ from diffsynth_engine.models.z_image import (
15
+ ZImageDiT,
16
+ Qwen3Model,
17
+ Qwen3Config,
18
+ )
19
+ from diffsynth_engine.tokenizers.qwen2 import Qwen2TokenizerFast
20
+ from diffsynth_engine.utils.constants import (
21
+ Z_IMAGE_TEXT_ENCODER_CONFIG_FILE,
22
+ Z_IMAGE_TOKENIZER_CONF_PATH,
23
+ )
24
+ from diffsynth_engine.models.flux import FluxVAEDecoder
25
+ from diffsynth_engine.pipelines import BasePipeline, LoRAStateDictConverter
26
+ from diffsynth_engine.pipelines.utils import calculate_shift
27
+ from diffsynth_engine.algorithm.noise_scheduler import RecifitedFlowScheduler
28
+ from diffsynth_engine.algorithm.sampler import FlowMatchEulerSampler
29
+ from diffsynth_engine.utils.parallel import ParallelWrapper
30
+ from diffsynth_engine.utils import logging
31
+ from diffsynth_engine.utils.fp8_linear import enable_fp8_linear
32
+ from diffsynth_engine.utils.download import fetch_model
33
+
34
+ logger = logging.get_logger(__name__)
35
+
36
+
37
+ class ZImageLoRAConverter(LoRAStateDictConverter):
38
+ def _from_diffusers(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
39
+ dit_dict = {}
40
+ for key, param in lora_state_dict.items():
41
+ if "lora_A.weight" in key:
42
+ lora_b_key = key.replace("lora_A.weight", "lora_B.weight")
43
+ target_key = key.replace(".lora_A.weight", "").replace("transformer.", "")
44
+
45
+ if "attn.to_out.0" in target_key:
46
+ target_key = target_key.replace("attn.to_out.0", "attn.to_out")
47
+
48
+ dit_dict[target_key] = {
49
+ "down": param,
50
+ "up": lora_state_dict[lora_b_key],
51
+ "alpha": lora_state_dict.get(key.replace("lora_A.weight", "alpha"), None),
52
+ }
53
+ return {"dit": dit_dict}
54
+
55
+ def convert(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
56
+ return self._from_diffusers(lora_state_dict)
57
+
58
+
59
+ class ZImagePipeline(BasePipeline):
60
+ lora_converter = ZImageLoRAConverter()
61
+
62
+ def __init__(
63
+ self,
64
+ config: ZImagePipelineConfig,
65
+ tokenizer: Qwen2TokenizerFast,
66
+ text_encoder: Qwen3Model,
67
+ dit: ZImageDiT,
68
+ vae_decoder: FluxVAEDecoder,
69
+ ):
70
+ super().__init__(
71
+ vae_tiled=config.vae_tiled,
72
+ vae_tile_size=config.vae_tile_size,
73
+ vae_tile_stride=config.vae_tile_stride,
74
+ device=config.device,
75
+ dtype=config.model_dtype,
76
+ )
77
+ self.config = config
78
+
79
+ # Scheduler
80
+ self.noise_scheduler = RecifitedFlowScheduler(shift=3.0, use_dynamic_shifting=True)
81
+ self.sampler = FlowMatchEulerSampler()
82
+ self.tokenizer = tokenizer
83
+ # Models
84
+ self.text_encoder = text_encoder
85
+ self.dit = dit
86
+ self.vae_decoder = vae_decoder
87
+
88
+ self.model_names = ["text_encoder", "dit", "vae_decoder"]
89
+
90
+ @classmethod
91
+ def from_pretrained(cls, model_path_or_config: str | ZImagePipelineConfig) -> "ZImagePipeline":
92
+ if isinstance(model_path_or_config, str):
93
+ config = ZImagePipelineConfig(model_path=model_path_or_config)
94
+ else:
95
+ config = model_path_or_config
96
+
97
+ logger.info(f"Loading state dict from {config.model_path} ...")
98
+
99
+ model_state_dict = cls.load_model_checkpoint(
100
+ config.model_path, device="cpu", dtype=config.model_dtype, convert_dtype=False
101
+ )
102
+
103
+ if config.vae_path is None:
104
+ config.vae_path = fetch_model(config.model_path, path="vae/diffusion_pytorch_model.safetensors")
105
+ logger.info(f"Loading VAE from {config.vae_path} ...")
106
+ vae_state_dict = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
107
+
108
+ if config.encoder_path is None:
109
+ config.encoder_path = fetch_model(config.model_path, path="text_encoder")
110
+ logger.info(f"Loading Text Encoder from {config.encoder_path} ...")
111
+ text_encoder_state_dict = cls.load_model_checkpoint(
112
+ config.encoder_path, device="cpu", dtype=config.encoder_dtype
113
+ )
114
+
115
+ state_dicts = ZImageStateDicts(
116
+ model=model_state_dict,
117
+ vae=vae_state_dict,
118
+ encoder=text_encoder_state_dict,
119
+ )
120
+ return cls.from_state_dict(state_dicts, config)
121
+
122
+ @classmethod
123
+ def from_state_dict(cls, state_dicts: ZImageStateDicts, config: ZImagePipelineConfig) -> "ZImagePipeline":
124
+ if config.parallelism > 1:
125
+ pipe = ParallelWrapper(
126
+ cfg_degree=config.cfg_degree,
127
+ sp_ulysses_degree=config.sp_ulysses_degree,
128
+ sp_ring_degree=config.sp_ring_degree,
129
+ tp_degree=config.tp_degree,
130
+ use_fsdp=config.use_fsdp,
131
+ )
132
+ pipe.load_module(cls._from_state_dict, state_dicts=state_dicts, config=config)
133
+ else:
134
+ pipe = cls._from_state_dict(state_dicts, config)
135
+ return pipe
136
+
137
+ @classmethod
138
+ def _from_state_dict(cls, state_dicts: ZImageStateDicts, config: ZImagePipelineConfig) -> "ZImagePipeline":
139
+ init_device = "cpu" if config.offload_mode is not None else config.device
140
+ with open(Z_IMAGE_TEXT_ENCODER_CONFIG_FILE, "r", encoding="utf-8") as f:
141
+ qwen3_config = Qwen3Config(**json.load(f))
142
+ text_encoder = Qwen3Model.from_state_dict(
143
+ state_dicts.encoder, config=qwen3_config, device=init_device, dtype=config.encoder_dtype
144
+ )
145
+ tokenizer = Qwen2TokenizerFast.from_pretrained(Z_IMAGE_TOKENIZER_CONF_PATH)
146
+ vae_decoder = FluxVAEDecoder.from_state_dict(state_dicts.vae, device=init_device, dtype=config.vae_dtype)
147
+
148
+ with LoRAContext():
149
+ dit = ZImageDiT.from_state_dict(
150
+ state_dicts.model,
151
+ device=("cpu" if config.use_fsdp else init_device),
152
+ dtype=config.model_dtype,
153
+ )
154
+ if config.use_fp8_linear:
155
+ enable_fp8_linear(dit)
156
+
157
+ pipe = cls(
158
+ config=config,
159
+ tokenizer=tokenizer,
160
+ text_encoder=text_encoder,
161
+ dit=dit,
162
+ vae_decoder=vae_decoder,
163
+ )
164
+ pipe.eval()
165
+
166
+ if config.offload_mode is not None:
167
+ pipe.enable_cpu_offload(config.offload_mode, config.offload_to_disk)
168
+
169
+ if config.model_dtype == torch.float8_e4m3fn:
170
+ pipe.dtype = torch.bfloat16
171
+ pipe.enable_fp8_autocast(
172
+ model_names=["dit"], compute_dtype=pipe.dtype, use_fp8_linear=config.use_fp8_linear
173
+ )
174
+
175
+ if config.use_torch_compile:
176
+ pipe.compile()
177
+
178
+ return pipe
179
+
180
+ def update_weights(self, state_dicts: ZImageStateDicts) -> None:
181
+ self.update_component(self.dit, state_dicts.model, self.config.device, self.config.model_dtype)
182
+ self.update_component(
183
+ self.text_encoder, state_dicts.text_encoder, self.config.device, self.config.encoder_dtype
184
+ )
185
+ self.update_component(self.vae_decoder, state_dicts.vae, self.config.device, self.config.vae_dtype)
186
+
187
+ def compile(self):
188
+ if hasattr(self.dit, "compile_repeated_blocks"):
189
+ self.dit.compile_repeated_blocks()
190
+
191
+ def load_loras(self, lora_list: List[Tuple[str, float]], fused: bool = True, save_original_weight: bool = False):
192
+ assert self.config.tp_degree is None or self.config.tp_degree == 1, (
193
+ "load LoRA is not allowed when tensor parallel is enabled; "
194
+ "set tp_degree=None or tp_degree=1 during pipeline initialization"
195
+ )
196
+ assert not (self.config.use_fsdp and fused), (
197
+ "load fused LoRA is not allowed when fully sharded data parallel is enabled; "
198
+ "either load LoRA with fused=False or set use_fsdp=False during pipeline initialization"
199
+ )
200
+ super().load_loras(lora_list, fused, save_original_weight)
201
+
202
+ def unload_loras(self):
203
+ if hasattr(self.dit, "unload_loras"):
204
+ self.dit.unload_loras()
205
+ self.noise_scheduler.restore_config()
206
+
207
+ def apply_scheduler_config(self, scheduler_config: Dict):
208
+ self.noise_scheduler.update_config(scheduler_config)
209
+
210
+ def prepare_latents(
211
+ self,
212
+ latents: torch.Tensor,
213
+ num_inference_steps: int,
214
+ mu: float,
215
+ ):
216
+ sigmas, timesteps = self.noise_scheduler.schedule(num_inference_steps, mu=mu, sigma_min=0, sigma_max=1.0)
217
+
218
+ sigmas = sigmas.to(device=self.device, dtype=self.dtype)
219
+ timesteps = timesteps.to(device=self.device, dtype=self.dtype)
220
+ latents = latents.to(device=self.device, dtype=self.dtype)
221
+
222
+ return latents, sigmas, timesteps
223
+
224
+ def encode_prompt(
225
+ self,
226
+ prompt: str,
227
+ max_sequence_length: int = 512,
228
+ ):
229
+ if prompt is None:
230
+ return None
231
+ template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
232
+ txt = [template.format(prompt)]
233
+ text_inputs = self.tokenizer(
234
+ txt,
235
+ max_length=max_sequence_length,
236
+ padding_strategy="max_length",
237
+ )
238
+
239
+ input_ids = text_inputs["input_ids"].to(self.device)
240
+ attention_mask = text_inputs["attention_mask"].to(self.device).bool()
241
+ # Encoder forward
242
+ outputs = self.text_encoder(
243
+ input_ids=input_ids,
244
+ attention_mask=attention_mask,
245
+ output_hidden_states=True,
246
+ )
247
+
248
+ prompt_embeds = outputs["hidden_states"][-2]
249
+ embeddings_list = []
250
+ for i in range(len(prompt_embeds)):
251
+ embeddings_list.append(prompt_embeds[i][attention_mask[i]])
252
+ return embeddings_list
253
+
254
+ def predict_noise_with_cfg(
255
+ self,
256
+ latents: torch.Tensor,
257
+ timestep: torch.Tensor,
258
+ prompt_emb: List[torch.Tensor],
259
+ negative_prompt_emb: List[torch.Tensor],
260
+ cfg_scale: float = 5.0,
261
+ cfg_truncation: float = 1.0,
262
+ cfg_normalization: float = 0.0, # 0.0 means disabled
263
+ batch_cfg: bool = False,
264
+ ):
265
+ t = timestep.expand(latents.shape[0])
266
+ t = (1000 - t) / 1000
267
+ progress = t[0].item()
268
+
269
+ current_cfg_scale = cfg_scale
270
+ if cfg_truncation <= 1.0 and progress > cfg_truncation:
271
+ current_cfg_scale = 0.0
272
+
273
+ do_cfg = current_cfg_scale > 0 and negative_prompt_emb is not None
274
+
275
+ if not do_cfg:
276
+ comb_pred = self.predict_noise(latents, t, prompt_emb)[0]
277
+ else:
278
+ if not batch_cfg:
279
+ positive_noise_pred = self.predict_noise(latents, t, prompt_emb)
280
+ negative_noise_pred = self.predict_noise(latents, t, negative_prompt_emb)
281
+ else:
282
+ latents_input = torch.cat([latents, latents], dim=0)
283
+ t = torch.cat([t, t], dim=0)
284
+ prompt_input = prompt_emb + negative_prompt_emb
285
+
286
+ noise_pred = self.predict_noise(latents_input, t, prompt_input)
287
+
288
+ positive_noise_pred, negative_noise_pred = noise_pred[0], noise_pred[1]
289
+
290
+ comb_pred = positive_noise_pred + current_cfg_scale * (positive_noise_pred - negative_noise_pred)
291
+
292
+ if cfg_normalization is not None and cfg_normalization > 0:
293
+ cond_norm = torch.linalg.vector_norm(positive_noise_pred)
294
+ new_norm = torch.linalg.vector_norm(comb_pred)
295
+ max_allowed_norm = cond_norm * cfg_normalization
296
+ new_norm = torch.where(new_norm < 1e-6, torch.ones_like(new_norm), new_norm)
297
+ scale_factor = max_allowed_norm / new_norm
298
+ scale_factor = torch.clamp(scale_factor, max=1.0)
299
+ comb_pred = comb_pred * scale_factor
300
+
301
+ comb_pred = -comb_pred.squeeze(1).unsqueeze(0)
302
+ return comb_pred
303
+
304
+ def predict_noise(
305
+ self,
306
+ latents: torch.Tensor,
307
+ timestep: torch.Tensor,
308
+ prompt_emb: List[torch.Tensor],
309
+ ):
310
+ self.load_models_to_device(["dit"])
311
+
312
+ latents_list = list(latents.unsqueeze(2).unbind(dim=0))
313
+
314
+ noise_pred = self.dit(
315
+ image=latents_list,
316
+ timestep=timestep,
317
+ cap_feats=prompt_emb,
318
+ )
319
+ return noise_pred
320
+
321
+ @torch.no_grad()
322
+ def __call__(
323
+ self,
324
+ prompt: Union[str, List[str]],
325
+ negative_prompt: Optional[Union[str, List[str]]] = None,
326
+ height: int = 1024,
327
+ width: int = 1024,
328
+ num_inference_steps: int = 50,
329
+ cfg_scale: float = 5.0,
330
+ cfg_normalization: bool = False,
331
+ cfg_truncation: float = 1.0,
332
+ seed: Optional[int] = None,
333
+ progress_callback: Optional[Callable] = None,
334
+ ):
335
+ self.validate_image_size(height, width, multiple_of=16)
336
+
337
+ self.load_models_to_device(["text_encoder"])
338
+ prompt_embeds, negative_prompt_embeds = self.encode_prompt(prompt), self.encode_prompt(negative_prompt)
339
+ self.model_lifecycle_finish(["text_encoder"])
340
+
341
+ noise = self.generate_noise((1, 16, height // 8, width // 8), seed=seed, device="cpu", dtype=self.dtype).to(
342
+ device=self.device
343
+ )
344
+ image_seq_len = math.ceil(height // 16) * math.ceil(width // 16)
345
+
346
+ mu = calculate_shift(image_seq_len, base_seq_len=256, max_seq_len=4096, base_shift=0.5, max_shift=1.15)
347
+
348
+ latents, sigmas, timesteps = self.prepare_latents(noise, num_inference_steps, mu)
349
+
350
+ self.sampler.initialize(sigmas=sigmas)
351
+
352
+ self.load_models_to_device(["dit"])
353
+ hide_progress = dist.is_initialized() and dist.get_rank() != 0
354
+
355
+ for i, timestep in enumerate(tqdm(timesteps, disable=hide_progress)):
356
+ timestep = timestep.unsqueeze(0).to(dtype=self.dtype)
357
+ noise_pred = self.predict_noise_with_cfg(
358
+ latents=latents,
359
+ timestep=timestep,
360
+ prompt_emb=prompt_embeds,
361
+ negative_prompt_emb=negative_prompt_embeds,
362
+ batch_cfg=self.config.batch_cfg,
363
+ cfg_truncation=cfg_truncation,
364
+ cfg_normalization=cfg_normalization,
365
+ )
366
+ latents = self.sampler.step(latents, noise_pred, i)
367
+ if progress_callback is not None:
368
+ progress_callback(i, len(timesteps), "DENOISING")
369
+
370
+ self.model_lifecycle_finish(["dit"])
371
+
372
+ self.load_models_to_device(["vae_decoder"])
373
+ vae_output = self.decode_image(latents)
374
+ image = self.vae_output_to_image(vae_output)
375
+ # Offload all models
376
+ self.load_models_to_device([])
377
+ return image
@@ -14,6 +14,7 @@ SDXL_TOKENIZER_2_CONF_PATH = os.path.join(CONF_PATH, "tokenizers", "sdxl", "toke
14
14
  WAN_TOKENIZER_CONF_PATH = os.path.join(CONF_PATH, "tokenizers", "wan", "umt5-xxl")
15
15
  QWEN_IMAGE_TOKENIZER_CONF_PATH = os.path.join(CONF_PATH, "tokenizers", "qwen_image", "tokenizer")
16
16
  QWEN_IMAGE_PROCESSOR_CONFIG_FILE = os.path.join(CONF_PATH, "tokenizers", "qwen_image", "qwen2_vl_image_processor.json")
17
+ Z_IMAGE_TOKENIZER_CONF_PATH = os.path.join(CONF_PATH, "tokenizers", "z_image", "tokenizer")
17
18
 
18
19
  # models
19
20
  VAE_CONFIG_FILE = os.path.join(CONF_PATH, "models", "components", "vae.json")
@@ -46,6 +47,8 @@ QWEN_IMAGE_VISION_CONFIG_FILE = os.path.join(CONF_PATH, "models", "qwen_image",
46
47
  QWEN_IMAGE_VAE_CONFIG_FILE = os.path.join(CONF_PATH, "models", "qwen_image", "qwen_image_vae.json")
47
48
  QWEN_IMAGE_VAE_KEYMAP_FILE = os.path.join(CONF_PATH, "models", "qwen_image", "qwen_image_vae_keymap.json")
48
49
 
50
+ Z_IMAGE_TEXT_ENCODER_CONFIG_FILE = os.path.join(CONF_PATH, "models", "z_image", "qwen3_config.json")
51
+
49
52
  # data size
50
53
  KB = 1024
51
54
  MB = 1024 * KB
@@ -20,7 +20,7 @@ class Singleton:
20
20
 
21
21
  class ProcessGroupSingleton(Singleton):
22
22
  def __init__(self):
23
- if not hasattr(self, 'initialized'):
23
+ if not hasattr(self, "initialized"):
24
24
  self.CFG_GROUP: Optional[dist.ProcessGroup] = None
25
25
  self.SP_GROUP: Optional[dist.ProcessGroup] = None
26
26
  self.SP_ULYSSUES_GROUP: Optional[dist.ProcessGroup] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.6.1.dev34
3
+ Version: 0.6.1.dev35
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -1,4 +1,4 @@
1
- diffsynth_engine/__init__.py,sha256=deLiGEHeQV1Xq7Kd11oRUA28FDegUgXBjlkNwgtVBMw,2290
1
+ diffsynth_engine/__init__.py,sha256=hN0jYaikjhpqHB4Mg-e53h-7ck1DsiY4FBti8K9lN2k,2390
2
2
  diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
4
4
  diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=3ve4bYxGyfuERynvoNYdFYSk0agdBgXKCeIOS6O6wgI,819
@@ -52,6 +52,7 @@ diffsynth_engine/conf/models/wan/dit/wan_dit_keymap.json,sha256=hfGytOIRkdYFgOR9
52
52
  diffsynth_engine/conf/models/wan/vae/wan2.1_vae.json,sha256=eVLTSRqbXm3JD8QDkLbM6vFfCdynlS-8QxqCfi4BzrI,815
53
53
  diffsynth_engine/conf/models/wan/vae/wan2.2_vae.json,sha256=pdnYEEZ_GcZHM_iH1y5ASdf_qZUGCOuDEaFmjdg9RKY,1860
54
54
  diffsynth_engine/conf/models/wan/vae/wan_vae_keymap.json,sha256=u9MJ3yRL45kdqRVoBnYbHkmuUmOseUFtwte-_9ZvdHc,25224
55
+ diffsynth_engine/conf/models/z_image/qwen3_config.json,sha256=i6AG90_s-q6zkocqYPSkgOfsmGAVPS4bdp7IH5oUf4o,726
55
56
  diffsynth_engine/conf/tokenizers/flux/tokenizer_1/merges.txt,sha256=n9aR98gDkhDg_O0VhlRmxlgg0JtjmIsBdL_iXeKZBRo,524619
56
57
  diffsynth_engine/conf/tokenizers/flux/tokenizer_1/special_tokens_map.json,sha256=LNs7gzGmDJL8HlWhPp_WH9IpPFpRJ1_czNYreABSUw4,588
57
58
  diffsynth_engine/conf/tokenizers/flux/tokenizer_1/tokenizer_config.json,sha256=a9zunMzioWyitMDF7QC0LFDqIl9EcqjEweljopAsKIE,705
@@ -79,9 +80,13 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/special_tokens_map.json,sha256=e4q
79
80
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/spiece.model,sha256=45CaZ7eAZQs1z1Kax4KtK2sm5tH4SdP7tqhykF9FJFg,4548313
80
81
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoUtOslX0-pHJwfIGiyCi3iRylnyj0iYCs,16837417
81
82
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
82
- diffsynth_engine/configs/__init__.py,sha256=vSjJToEdq3JX7t81_z4nwNwIdD4bYnFjxnMZH7PXMKo,1309
83
+ diffsynth_engine/conf/tokenizers/z_image/tokenizer/merges.txt,sha256=iDHk8aBERxNA98CoPXvXEwaluGfpX9hw900MUwipBNU,1671853
84
+ diffsynth_engine/conf/tokenizers/z_image/tokenizer/tokenizer.json,sha256=rrEzB6cazY_oGGHZStVKtonfdzMYgJ7tPL55S0SS2uQ,11422654
85
+ diffsynth_engine/conf/tokenizers/z_image/tokenizer/tokenizer_config.json,sha256=1dCfB7SMMIbFCLMNHJEUvRGJFFt06YKiZTUMkjrNgQE,9732
86
+ diffsynth_engine/conf/tokenizers/z_image/tokenizer/vocab.json,sha256=yhDX6fs-0YV13R4neiV5wW0QjjLydDloSvoOELFECRA,2776833
87
+ diffsynth_engine/configs/__init__.py,sha256=biluGSEw78PPwO7XFlms16iuWXDiM0Eg_qsOMMTY0NQ,1409
83
88
  diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
84
- diffsynth_engine/configs/pipeline.py,sha256=SLaxFd9mKuJgromrkXpJrsNGAGzMl51Twomc4Qo83Wc,13759
89
+ diffsynth_engine/configs/pipeline.py,sha256=0WmKz_mykmJkRCGwv9DjuN8s27LppkD_Ier4VtovZSg,15307
85
90
  diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
91
  diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
87
92
  diffsynth_engine/models/base.py,sha256=svao__9WH8VNcyXz5o5dzywYXDcGV0YV9IfkLzDKews,2558
@@ -142,7 +147,10 @@ diffsynth_engine/models/wan/wan_image_encoder.py,sha256=Vdd39lv_QvOsmPxihZWZZbpP
142
147
  diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=j63ulcWLY4XGITOKUMGX292LtSEtP-n8BTvqb98YExU,23615
143
148
  diffsynth_engine/models/wan/wan_text_encoder.py,sha256=ePeOifbTI_o650mckzugyWPuHn5vhM-uFMcDVCijxPM,11394
144
149
  diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
145
- diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHrxXWj-Ud1U,604
150
+ diffsynth_engine/models/z_image/__init__.py,sha256=d1ztBNgM8GR2_uGwlxOE1Jf5URTq1g-WnmJH7nrMoaY,160
151
+ diffsynth_engine/models/z_image/qwen3.py,sha256=PmT6m46Fc7KZXNzG7ig23Mzj6QfHnMmrpX_MM0UuuYg,4580
152
+ diffsynth_engine/models/z_image/z_image_dit.py,sha256=kGtYzmfzk_FDe7KWfXpJagN7k7ROXl5J01IhRRs-Bsk,23806
153
+ diffsynth_engine/pipelines/__init__.py,sha256=xQUtz2cVmcEInazvT1dqv2HdPiJKmywWTIPfbK5dZXI,662
146
154
  diffsynth_engine/pipelines/base.py,sha256=ShRiX5MY6bUkRKfuGrA1aalAqeHyeZxhzT87Mwc30b4,17231
147
155
  diffsynth_engine/pipelines/flux_image.py,sha256=L0ggxpthLD8a5-zdPHu9z668uWBei9YzPb4PFVypDNU,50707
148
156
  diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
@@ -152,6 +160,7 @@ diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8
152
160
  diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
153
161
  diffsynth_engine/pipelines/wan_s2v.py,sha256=QHlCLMqlmnp55iYm2mzg4qCq4jceRAP3Zt5Mubz3mAM,29384
154
162
  diffsynth_engine/pipelines/wan_video.py,sha256=9xjSvQ4mlVEDdaL6QuUURj4iyxhJ2xABBphQjkfzK8s,31323
163
+ diffsynth_engine/pipelines/z_image.py,sha256=gSBhKV7TBL9xvCUrABdZA0kNqQzPuawmEv8OcI6KTcs,14756
155
164
  diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
165
  diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
157
166
  diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
@@ -170,7 +179,7 @@ diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWx
170
179
  diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
171
180
  diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
172
181
  diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
173
- diffsynth_engine/utils/constants.py,sha256=sJio3Vy8i0-PWYRnqquYt6ez9k6Tc9JdjCv6pn2BU_4,3551
182
+ diffsynth_engine/utils/constants.py,sha256=x0-bsPRplW-KkRpLVajuC9Yv6f3QbdHgSr3XZ-eBCsQ,3745
174
183
  diffsynth_engine/utils/download.py,sha256=w9QQjllPfTUEY371UTREU7o_vvdMY-Q2DymDel3ZEZY,6792
175
184
  diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I,363
176
185
  diffsynth_engine/utils/flag.py,sha256=KSzjnzRe7sleNCJm8IpbJQbmBY4KNV2kDrijxi27Jek,2928
@@ -184,14 +193,14 @@ diffsynth_engine/utils/offload.py,sha256=94og79TIkxldwYUgZT3L4OVu1WBlE7gfVPvO2MR
184
193
  diffsynth_engine/utils/onnx.py,sha256=jeWUudJHnESjuiEAHyUZYUZz7dCj34O9aGjHCe8yjWo,1149
185
194
  diffsynth_engine/utils/parallel.py,sha256=OBGsAK-3ncArRyMU1lea7tbYgxSdCucQvXheL3Ssl5M,17653
186
195
  diffsynth_engine/utils/platform.py,sha256=nbpG-XHJFRmYY6u_e7IBQ9Q6GyItrIkKf3VKuBPTUpY,627
187
- diffsynth_engine/utils/process_group.py,sha256=P-X04a--Zb4M4kjc3DddmusrxCKqv8wiDGhXG4Al-rE,3783
196
+ diffsynth_engine/utils/process_group.py,sha256=I9uiqoVq-Hlu694GnrvbVi7nfVJBsgCCDo3p2kjU3yo,3783
188
197
  diffsynth_engine/utils/prompt.py,sha256=YItMchoVzsG6y-LB4vzzDUWrkhKRVlt1HfVhxZjSxMQ,280
189
198
  diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CDhg,2200
190
199
  diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
191
200
  diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
192
201
  diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
193
- diffsynth_engine-0.6.1.dev34.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
194
- diffsynth_engine-0.6.1.dev34.dist-info/METADATA,sha256=Uu-yhnydrVudp5RdK0ifk9-q4J_18zulQge4fNs24Z0,1164
195
- diffsynth_engine-0.6.1.dev34.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
196
- diffsynth_engine-0.6.1.dev34.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
197
- diffsynth_engine-0.6.1.dev34.dist-info/RECORD,,
202
+ diffsynth_engine-0.6.1.dev35.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
203
+ diffsynth_engine-0.6.1.dev35.dist-info/METADATA,sha256=mwDiBscVZRY6rz7Mbmv4qxhlFNSFoACIu4xl0YA9lVE,1164
204
+ diffsynth_engine-0.6.1.dev35.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
205
+ diffsynth_engine-0.6.1.dev35.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
206
+ diffsynth_engine-0.6.1.dev35.dist-info/RECORD,,