diffsynth-engine 0.7.1.dev1__py3-none-any.whl → 0.7.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,10 @@ class BaseScheduler:
19
19
  def update_config(self, config_dict):
20
20
  for config_name, new_value in config_dict.items():
21
21
  if hasattr(self, config_name):
22
- setattr(self, config_name, new_value)
22
+ actual_value = new_value
23
+ if isinstance(actual_value, str) and actual_value.lower() == "none":
24
+ actual_value = None
25
+ setattr(self, config_name, actual_value)
23
26
 
24
27
  def restore_config(self):
25
28
  for config_name, config_value in self._stored_config.items():
@@ -59,8 +59,11 @@ class Qwen3Model(PreTrainedModel):
59
59
  device: str = "cuda:0",
60
60
  dtype: torch.dtype = torch.bfloat16,
61
61
  ):
62
- model = cls(config=config, device="meta", dtype=dtype)
62
+ with torch.device("meta"):
63
+ model = cls(config=config, device="meta", dtype=dtype)
63
64
  model.requires_grad_(False)
65
+
66
+ model.rotary_emb = Qwen3RotaryEmbedding(config=config, device=device)
64
67
  model.load_state_dict(state_dict, assign=True)
65
68
  model.to(device=device, dtype=dtype, non_blocking=True)
66
69
  return model
@@ -584,7 +584,8 @@ class ZImageDiT(PreTrainedModel):
584
584
  dtype: torch.dtype,
585
585
  **kwargs,
586
586
  ):
587
- model = cls(device="meta", dtype=dtype, **kwargs)
587
+ with torch.device("meta"):
588
+ model = cls(device="meta", dtype=dtype, **kwargs)
588
589
  model = model.requires_grad_(False)
589
590
  model.load_state_dict(state_dict, assign=True)
590
591
  model.to(device=device, dtype=dtype, non_blocking=True)
@@ -0,0 +1,340 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import math
4
+ import numpy as np
5
+ from typing import Literal, Optional, Dict
6
+ from copy import deepcopy
7
+ from PIL import Image
8
+ from einops import rearrange, repeat
9
+ from contextlib import contextmanager
10
+
11
+ from diffsynth_engine.configs import QwenImagePipelineConfig
12
+ from diffsynth_engine.pipelines.qwen_image import QwenImagePipeline
13
+ from diffsynth_engine.models.qwen_image import QwenImageVAE
14
+ from diffsynth_engine.models.basic.lora import LoRALinear
15
+ from diffsynth_engine.models.qwen_image.qwen_image_dit import QwenImageTransformerBlock
16
+ from diffsynth_engine.utils import logging
17
+ from diffsynth_engine.utils.loader import load_file
18
+ from diffsynth_engine.utils.download import fetch_model
19
+ from diffsynth_engine.utils.image import adain_color_fix, wavelet_color_fix
20
+
21
+ logger = logging.get_logger(__name__)
22
+
23
+
24
+ @contextmanager
25
+ def odtsr_forward():
26
+ """
27
+ Context manager for ODTSR forward pass optimization.
28
+
29
+ Replaces two methods:
30
+ 1. LoRALinear.forward - to support batch CFG with dual outputs
31
+ 2. QwenImageTransformerBlock._modulate - optimized version without repeat_interleave
32
+ """
33
+ original_lora_forward = LoRALinear.forward
34
+ original_modulate = QwenImageTransformerBlock._modulate
35
+
36
+ def lora_batch_cfg_forward(self, x):
37
+ y = nn.Linear.forward(self, x)
38
+ if len(self._lora_dict) < 1:
39
+ return y
40
+ if x.ndim == 2:
41
+ y2 = y.clone()
42
+ for name, lora in self._lora_dict.items():
43
+ y2 += lora(x)
44
+ return torch.stack([y, y2], dim=1)
45
+ else:
46
+ L2 = x.shape[1]
47
+ L = L2 // 2
48
+ x2 = x[:, L:, :]
49
+ for name, lora in self._lora_dict.items():
50
+ y[:, L:] += lora(x2)
51
+ return y
52
+
53
+ def optimized_modulate(self, x, mod_params, index=None):
54
+ if mod_params.ndim == 2:
55
+ shift, scale, gate = mod_params.chunk(3, dim=-1)
56
+ return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1), gate.unsqueeze(1)
57
+ else:
58
+ B, L2, C = x.shape
59
+ L = L2 // 2
60
+ shift, scale, gate = mod_params.chunk(3, dim=-1) # Each: [B, 2, dim]
61
+
62
+ result = torch.empty_like(x)
63
+ gate_result = torch.empty(B, L2, gate.shape[-1], dtype=x.dtype, device=x.device)
64
+
65
+ result[:, :L] = x[:, :L] * (1 + scale[:, 0:1]) + shift[:, 0:1]
66
+ gate_result[:, :L] = gate[:, 0:1].expand(-1, L, -1)
67
+
68
+ result[:, L:] = x[:, L:] * (1 + scale[:, 1:2]) + shift[:, 1:2]
69
+ gate_result[:, L:] = gate[:, 1:2].expand(-1, L, -1)
70
+
71
+ return result, gate_result
72
+
73
+ LoRALinear.forward = lora_batch_cfg_forward
74
+ QwenImageTransformerBlock._modulate = optimized_modulate
75
+
76
+ try:
77
+ yield
78
+ finally:
79
+ LoRALinear.forward = original_lora_forward
80
+ QwenImageTransformerBlock._modulate = original_modulate
81
+
82
+
83
+ class QwenImageUpscalerTool:
84
+ """
85
+ Tool for ODTSR (One-step Diffusion Transformer Super Resolution) image upscaling.
86
+ https://huggingface.co/double8fun/ODTSR
87
+ """
88
+
89
+ def __init__(
90
+ self,
91
+ pipeline: QwenImagePipeline,
92
+ odtsr_weight_path: Optional[str] = None,
93
+ ):
94
+ self.pipe = pipeline
95
+ self.device = self.pipe.device
96
+ self.dtype = self.pipe.dtype
97
+
98
+ # to avoid "small grid" artifacts in generated images
99
+ self._convert_dit_part_linear_weight()
100
+
101
+ if not odtsr_weight_path:
102
+ odtsr_weight_path = fetch_model("muse/ODTSR", revision="master", path="weight.safetensors")
103
+ odtsr_state_dict = load_file(odtsr_weight_path)
104
+ lora_state_dict = self._convert_odtsr_lora(odtsr_state_dict)
105
+ lora_state_dict_list = [(lora_state_dict, 1.0, odtsr_weight_path)]
106
+ self.pipe._load_lora_state_dicts(lora_state_dict_list, fused=False, save_original_weight=False)
107
+
108
+ self.new_vae = deepcopy(self.pipe.vae)
109
+ self._load_vae_encoder_weights(odtsr_state_dict)
110
+
111
+ sigmas = torch.linspace(1.0, 0.0, 1000 + 1)[:-1]
112
+ mu = 0.8
113
+ shift_terminal = 0.02
114
+ sigmas = math.exp(mu) / (math.exp(mu) + (1 / sigmas - 1))
115
+ one_minus_sigmas = 1 - sigmas
116
+ scale_factor = one_minus_sigmas[-1] / (1 - shift_terminal)
117
+ self.sigmas = 1 - (one_minus_sigmas / scale_factor)
118
+ self.sigmas = self.sigmas.to(device=self.device)
119
+ self.timesteps = self.sigmas * self.pipe.noise_scheduler.num_train_timesteps
120
+ self.timesteps = self.timesteps.to(device=self.device)
121
+ self.start_timestep = 750
122
+ self.fixed_timestep = self.timesteps[self.start_timestep].to(device=self.device)
123
+ self.one_step_sigma = self.sigmas[self.start_timestep].to(device=self.device)
124
+
125
+ self.prompt = "High Contrast, hyper detailed photo, 2k UHD"
126
+ self.prompt_emb, self.prompt_emb_mask = self.pipe.encode_prompt(self.prompt, 1, 4096)
127
+
128
+ @classmethod
129
+ def from_pretrained(
130
+ cls,
131
+ qwen_model_path: str,
132
+ odtsr_weight_path: Optional[str] = None,
133
+ device: str = "cuda",
134
+ dtype: torch.dtype = torch.bfloat16,
135
+ ):
136
+ config = QwenImagePipelineConfig(
137
+ model_path=qwen_model_path,
138
+ model_dtype=dtype,
139
+ device=device,
140
+ load_encoder=True,
141
+ )
142
+ pipe = QwenImagePipeline.from_pretrained(config)
143
+ return cls(pipe, odtsr_weight_path)
144
+
145
+ def _convert_dit_part_linear_weight(self):
146
+ """
147
+ Perform dtype conversion on weights of specific Linear layers in the DIT model.
148
+
149
+ This is an important trick: for Linear layers NOT in the patterns list, convert their weights
150
+ to float8_e4m3fn first, then convert back to the original dtype (typically bfloat16). This operation
151
+ matches the weight processing method used during training to avoid "small grid" artifacts in generated images.
152
+
153
+ Layers in the patterns list (such as LoRA-related layers) are skipped and their original weights remain unchanged.
154
+ """
155
+ patterns = [
156
+ "img_in",
157
+ "img_mod.1",
158
+ "attn.to_q",
159
+ "attn.to_k",
160
+ "attn.to_v",
161
+ "to_out",
162
+ "img_mlp.net.0.proj",
163
+ "img_mlp.net.2",
164
+ ]
165
+
166
+ def _convert_weight(parent: nn.Module, name_prefix: str = ""):
167
+ for name, module in list(parent.named_children()):
168
+ full_name = f"{name_prefix}{name}"
169
+ if isinstance(module, torch.nn.Linear):
170
+ if not any(p in full_name for p in patterns):
171
+ origin_dtype = module.weight.data.dtype
172
+ module.weight.data = module.weight.data.to(torch.float8_e4m3fn)
173
+ module.weight.data = module.weight.data.to(origin_dtype)
174
+ if module.bias is not None:
175
+ module.bias.data = module.bias.data.to(torch.float8_e4m3fn)
176
+ module.bias.data = module.bias.data.to(origin_dtype)
177
+ else:
178
+ _convert_weight(module, name_prefix=full_name + ".")
179
+
180
+ _convert_weight(self.pipe.dit)
181
+
182
+ def _convert_odtsr_lora(self, odtsr_state_dict: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
183
+ state_dict = {}
184
+ for key, param in odtsr_state_dict.items():
185
+ if "lora_A2" in key:
186
+ lora_b_key = key.replace("lora_A2", "lora_B2")
187
+ lora_b_param = odtsr_state_dict[lora_b_key]
188
+
189
+ lora_a_key = key.replace("lora_A2", "lora_A").replace("pipe.dit.", "")
190
+ lora_b_key = lora_b_key.replace("lora_B2", "lora_B").replace("pipe.dit.", "")
191
+ state_dict[lora_a_key] = param
192
+ state_dict[lora_b_key] = lora_b_param
193
+
194
+ return state_dict
195
+
196
+ def _load_vae_encoder_weights(self, state_dict: Dict[str, torch.Tensor]):
197
+ try:
198
+ vae_state_dict = {}
199
+ for k, v in state_dict.items():
200
+ if 'pipe.new_vae.' in k:
201
+ new_key = k.replace('pipe.new_vae.', '')
202
+ vae_state_dict[new_key] = v
203
+ if vae_state_dict:
204
+ self.new_vae.load_state_dict(vae_state_dict, strict=False)
205
+ logger.info(f"Loaded {len(vae_state_dict)} trained VAE encoder parameters")
206
+ else:
207
+ logger.warning(f"No 'pipe.new_vae.' weights found, using original VAE")
208
+ except Exception as e:
209
+ logger.error(f"Failed to load VAE encoder weights: {e}")
210
+ raise e
211
+
212
+
213
+ def add_noise(self, sample: torch.Tensor, noise: torch.Tensor, timestep: torch.Tensor) -> torch.Tensor:
214
+ timestep_id = torch.argmin((self.timesteps - timestep).abs())
215
+ sigma = self.sigmas[timestep_id]
216
+ sample = (1 - sigma) * sample + sigma * noise
217
+ return sample
218
+
219
+ def preprocess_image(self, image: Image.Image) -> torch.Tensor:
220
+ image = torch.Tensor(np.array(image, dtype=np.float32))
221
+ image = image.to(dtype=self.dtype, device=self.device)
222
+ image = image * (2 / 255) - 1
223
+ image = repeat(image, f"H W C -> B C H W", **({"B": 1}))
224
+ return image
225
+
226
+ def _prepare_condition_latents(self, image: Image.Image, vae: QwenImageVAE, vae_tiled: bool) -> torch.Tensor:
227
+ image_tensor = self.preprocess_image(image).to(dtype=self.pipe.config.vae_dtype)
228
+ image_tensor = image_tensor.unsqueeze(2)
229
+
230
+ latents = vae.encode(
231
+ image_tensor,
232
+ device=self.device,
233
+ tiled=vae_tiled,
234
+ tile_size=self.pipe.vae_tile_size,
235
+ tile_stride=self.pipe.vae_tile_stride,
236
+ )
237
+ latents = latents.squeeze(2).to(device=self.device, dtype=self.dtype)
238
+ return latents
239
+
240
+ def _single_step_denoise(
241
+ self,
242
+ latents: torch.Tensor,
243
+ image_latents: torch.Tensor,
244
+ noise: torch.Tensor,
245
+ prompt_emb: torch.Tensor,
246
+ prompt_emb_mask: torch.Tensor,
247
+ fidelity: float,
248
+ ) -> torch.Tensor:
249
+ fidelity_timestep_id = int(self.start_timestep + fidelity * (1000 - self.start_timestep) + 0.5)
250
+ if fidelity_timestep_id != 1000:
251
+ fidelity_timestep = self.timesteps[fidelity_timestep_id].to(device=self.device)
252
+ image_latents = self.add_noise(image_latents, noise, fidelity_timestep)
253
+
254
+ latents = self.add_noise(latents, noise, self.fixed_timestep)
255
+
256
+ with odtsr_forward():
257
+ noise_pred = self.pipe.predict_noise_with_cfg(
258
+ latents=latents,
259
+ image_latents=[image_latents],
260
+ timestep=self.fixed_timestep.unsqueeze(0),
261
+ prompt_emb=prompt_emb,
262
+ prompt_emb_mask=prompt_emb_mask,
263
+ negative_prompt_emb=None,
264
+ negative_prompt_emb_mask=None,
265
+ context_latents=None,
266
+ entity_prompt_embs=None,
267
+ entity_prompt_emb_masks=None,
268
+ negative_entity_prompt_embs=None,
269
+ negative_entity_prompt_emb_masks=None,
270
+ entity_masks=None,
271
+ cfg_scale=1.0,
272
+ batch_cfg=self.pipe.config.batch_cfg,
273
+ )
274
+
275
+ denoised = latents + (0 - self.one_step_sigma) * noise_pred
276
+ return denoised
277
+
278
+ @torch.no_grad()
279
+ def __call__(
280
+ self,
281
+ image: Image.Image,
282
+ scale: int = 2,
283
+ prompt: str = "High Contrast, hyper detailed photo, 2k UHD",
284
+ fidelity: float = 1.0,
285
+ align_method: Literal["none", "adain", "wavelet"] = "none",
286
+ ) -> Image.Image:
287
+ width, height = image.size
288
+ target_width, target_height = width * scale, height * scale
289
+ target_width_round = target_width // 16 * 16
290
+ target_height_round = target_height // 16 * 16
291
+ logger.info(f"Upscaling image from {width}x{height} to {target_width}x{target_height}")
292
+ vae_tiled = (target_width_round * target_height_round > 2048 * 2048)
293
+
294
+ resized_image = image.resize((target_width_round, target_height_round), Image.BICUBIC)
295
+
296
+ condition_latents = self._prepare_condition_latents(resized_image, self.pipe.vae, vae_tiled)
297
+ latents = self._prepare_condition_latents(resized_image, self.new_vae, vae_tiled)
298
+
299
+ noise = self.pipe.generate_noise(
300
+ (1, 16, target_height_round // 8, target_width_round // 8),
301
+ seed=42,
302
+ device=self.device,
303
+ dtype=self.dtype
304
+ )
305
+
306
+ prompt_emb, prompt_emb_mask = self.prompt_emb, self.prompt_emb_mask
307
+ if prompt != self.prompt:
308
+ prompt_emb, prompt_emb_mask = self.pipe.encode_prompt(prompt, 1, 4096)
309
+
310
+ denoised_latents = self._single_step_denoise(
311
+ latents=latents,
312
+ noise=noise,
313
+ image_latents=condition_latents,
314
+ prompt_emb=prompt_emb,
315
+ prompt_emb_mask=prompt_emb_mask,
316
+ fidelity=fidelity,
317
+ )
318
+
319
+ # Decode
320
+ denoised_latents = rearrange(denoised_latents, "B C H W -> B C 1 H W")
321
+ vae_output = rearrange(
322
+ self.pipe.vae.decode(
323
+ denoised_latents.to(self.pipe.vae.model.encoder.conv1.weight.dtype),
324
+ device=self.pipe.vae.model.encoder.conv1.weight.device,
325
+ tiled=vae_tiled,
326
+ tile_size=self.pipe.vae_tile_size,
327
+ tile_stride=self.pipe.vae_tile_stride,
328
+ )[0],
329
+ "C B H W -> B C H W",
330
+ )
331
+ result_image = self.pipe.vae_output_to_image(vae_output)
332
+ self.pipe.model_lifecycle_finish(["vae"])
333
+
334
+ if align_method == "adain":
335
+ result_image = adain_color_fix(target=result_image, source=resized_image)
336
+ elif align_method == "wavelet":
337
+ result_image = wavelet_color_fix(target=result_image, source=resized_image)
338
+
339
+ result_image = result_image.resize((target_width, target_height), Image.BICUBIC)
340
+ return result_image
@@ -1,10 +1,13 @@
1
1
  import torch
2
2
  from torchvision import transforms
3
+ from torchvision.transforms import ToTensor, ToPILImage
3
4
  import numpy as np
4
5
  import math
5
6
  from PIL import Image
6
7
  from enum import Enum
7
8
  from typing import List, Tuple, Optional
9
+ from torch import Tensor
10
+ from torch.nn import functional as F
8
11
 
9
12
  from diffsynth_engine.utils import logging
10
13
 
@@ -243,3 +246,84 @@ def _need_rescale_pil_conversion(image: np.ndarray) -> bool:
243
246
  f"got [{image.min()}, {image.max()}] which cannot be converted to uint8."
244
247
  )
245
248
  return do_rescale
249
+
250
+
251
+ # --------------------------------------------------------------------------------
252
+ # Color Alignment Functions
253
+ # Based on Li Yi's implementation: https://github.com/pkuliyi2015/sd-webui-stablesr
254
+ # --------------------------------------------------------------------------------
255
+ def calc_mean_std(feat: Tensor, eps=1e-5):
256
+ size = feat.size()
257
+ assert len(size) == 4, 'The input feature should be 4D tensor.'
258
+ b, c = size[:2]
259
+ feat_var = feat.reshape(b, c, -1).var(dim=2) + eps
260
+ feat_std = feat_var.sqrt().reshape(b, c, 1, 1)
261
+ feat_mean = feat.reshape(b, c, -1).mean(dim=2).reshape(b, c, 1, 1)
262
+ return feat_mean, feat_std
263
+
264
+
265
+ def adaptive_instance_normalization(content_feat: Tensor, style_feat: Tensor):
266
+ size = content_feat.size()
267
+ style_mean, style_std = calc_mean_std(style_feat)
268
+ content_mean, content_std = calc_mean_std(content_feat)
269
+ normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
270
+ return normalized_feat * style_std.expand(size) + style_mean.expand(size)
271
+
272
+
273
+ def wavelet_blur(image: Tensor, radius: int):
274
+ kernel_vals = [
275
+ [0.0625, 0.125, 0.0625],
276
+ [0.125, 0.25, 0.125],
277
+ [0.0625, 0.125, 0.0625],
278
+ ]
279
+ kernel = torch.tensor(kernel_vals, dtype=image.dtype, device=image.device)
280
+ kernel = kernel[None, None]
281
+ kernel = kernel.repeat(3, 1, 1, 1)
282
+ image = F.pad(image, (radius, radius, radius, radius), mode='replicate')
283
+ output = F.conv2d(image, kernel, groups=3, dilation=radius)
284
+ return output
285
+
286
+
287
+ def wavelet_decomposition(image: Tensor, levels=5):
288
+ high_freq = torch.zeros_like(image)
289
+ for i in range(levels):
290
+ radius = 2 ** i
291
+ low_freq = wavelet_blur(image, radius)
292
+ high_freq += (image - low_freq)
293
+ image = low_freq
294
+
295
+ return high_freq, low_freq
296
+
297
+
298
+ def wavelet_reconstruction(content_feat: Tensor, style_feat: Tensor):
299
+ content_high_freq, content_low_freq = wavelet_decomposition(content_feat)
300
+ del content_low_freq
301
+ style_high_freq, style_low_freq = wavelet_decomposition(style_feat)
302
+ del style_high_freq
303
+ return content_high_freq + style_low_freq
304
+
305
+
306
+ def adain_color_fix(target: Image.Image, source: Image.Image) -> Image.Image:
307
+ to_tensor = ToTensor()
308
+ target_tensor = to_tensor(target).unsqueeze(0)
309
+ source_tensor = to_tensor(source).unsqueeze(0)
310
+
311
+ result_tensor = adaptive_instance_normalization(target_tensor, source_tensor)
312
+
313
+ to_image = ToPILImage()
314
+ result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
315
+
316
+ return result_image
317
+
318
+
319
+ def wavelet_color_fix(target: Image.Image, source: Image.Image) -> Image.Image:
320
+ to_tensor = ToTensor()
321
+ target_tensor = to_tensor(target).unsqueeze(0)
322
+ source_tensor = to_tensor(source).unsqueeze(0)
323
+
324
+ result_tensor = wavelet_reconstruction(target_tensor, source_tensor)
325
+
326
+ to_image = ToPILImage()
327
+ result_image = to_image(result_tensor.squeeze(0).clamp_(0.0, 1.0))
328
+
329
+ return result_image
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.7.1.dev1
3
+ Version: 0.7.1.dev3
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -1,7 +1,7 @@
1
1
  diffsynth_engine/__init__.py,sha256=lzUI6r47i2CCUiSIwi1IK502TL89ZG7h1yNwmM1eFvI,2588
2
2
  diffsynth_engine/algorithm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  diffsynth_engine/algorithm/noise_scheduler/__init__.py,sha256=YvcwE2tCNua-OAX9GEPm0EXsINNWH4XvJMNZb-uaZMM,745
4
- diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=3ve4bYxGyfuERynvoNYdFYSk0agdBgXKCeIOS6O6wgI,819
4
+ diffsynth_engine/algorithm/noise_scheduler/base_scheduler.py,sha256=WflR4KGZhbbsoTnEQhpPNR2FfJhTQqdU27A8tBN58P8,988
5
5
  diffsynth_engine/algorithm/noise_scheduler/flow_match/__init__.py,sha256=ivBtxk1P_ERGxptqzYCnsguwL9aScJ5hpAgF7xgtR2I,213
6
6
  diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_beta.py,sha256=atw0CPS3TnitILpy78T6-YdDQMcBvTEHJloZzjtWqvM,1161
7
7
  diffsynth_engine/algorithm/noise_scheduler/flow_match/flow_ddim.py,sha256=cX_18RlvWtiEIcSCUflGipJdooNo9BZUHTQEm8Ltnfg,1108
@@ -152,9 +152,9 @@ diffsynth_engine/models/wan/wan_s2v_dit.py,sha256=j63ulcWLY4XGITOKUMGX292LtSEtP-
152
152
  diffsynth_engine/models/wan/wan_text_encoder.py,sha256=ePeOifbTI_o650mckzugyWPuHn5vhM-uFMcDVCijxPM,11394
153
153
  diffsynth_engine/models/wan/wan_vae.py,sha256=dC7MoUFeXRL7SIY0LG1OOUiZW-pp9IbXCghutMxpXr4,38889
154
154
  diffsynth_engine/models/z_image/__init__.py,sha256=7sQvTYf984sK6ke3Wr-_Pt3Qkqw_s540wPswn4nThkY,305
155
- diffsynth_engine/models/z_image/qwen3.py,sha256=PmT6m46Fc7KZXNzG7ig23Mzj6QfHnMmrpX_MM0UuuYg,4580
155
+ diffsynth_engine/models/z_image/qwen3.py,sha256=QIS0ToOMa6p8-KsRla0s6hR9CUlLud8joo88mkdchwI,4698
156
156
  diffsynth_engine/models/z_image/siglip.py,sha256=PjB6ECXXJKgEpU9gF5Fyyt8twjKNA5_jCAG_8qQkoc8,2661
157
- diffsynth_engine/models/z_image/z_image_dit.py,sha256=kGtYzmfzk_FDe7KWfXpJagN7k7ROXl5J01IhRRs-Bsk,23806
157
+ diffsynth_engine/models/z_image/z_image_dit.py,sha256=la_fcNRbLBuP7FS26ixS0hkcEo-NLmCLhvVeECEYSB0,23845
158
158
  diffsynth_engine/models/z_image/z_image_dit_omni_base.py,sha256=cfdUFTwGFYRiyBhB_4ptn0lAvYuLAulF6zf0ABqlAzs,44854
159
159
  diffsynth_engine/pipelines/__init__.py,sha256=pcqwWR-K3jOMYiko82gSKjixYPTrfWCpZAa6qrPmdFg,880
160
160
  diffsynth_engine/pipelines/base.py,sha256=h6xOqT1LMFGrJYoTD68_VoHcfRX04je8KUE_y3BUZfM,17279
@@ -186,6 +186,7 @@ diffsynth_engine/tools/flux_inpainting_tool.py,sha256=qHsYKUG20A19ujRdocpIPC4a_H
186
186
  diffsynth_engine/tools/flux_outpainting_tool.py,sha256=ff4qUj2mMYW6GMts7ifnJG7Rth55pfuggopRCyAXwJ8,3894
187
187
  diffsynth_engine/tools/flux_reference_tool.py,sha256=6v0NRZPsDEHFlPruO-ZJTB4rYWxKVAlmnYEeandD3r8,4723
188
188
  diffsynth_engine/tools/flux_replace_tool.py,sha256=AOyEGxHsaNwpTS2VChAieIfECgMxlKsRw0lWPm1k9C0,4627
189
+ diffsynth_engine/tools/qwen_image_upscaler_tool.py,sha256=TFtITz113zoqsdRibVuLtWF8JEhGTqzyV2ZGHJuuYKw,13876
189
190
  diffsynth_engine/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
191
  diffsynth_engine/utils/cache.py,sha256=Ivef22pCuhEq-4H00gSvkLS8ceVZoGis7OSitYL6gH4,2101
191
192
  diffsynth_engine/utils/constants.py,sha256=Tsn3EAByfZra-nGcx0NEcP9nWTPKaDGdatosE3BuPGE,3846
@@ -194,7 +195,7 @@ diffsynth_engine/utils/env.py,sha256=k749eYt_qKGq38GocDiXfkhp8nZrowFefNVTZ8R755I
194
195
  diffsynth_engine/utils/flag.py,sha256=Ubm7FF0vHG197bmJGEplp4XauBlUaQVv-zr-w6VyEIM,2493
195
196
  diffsynth_engine/utils/fp8_linear.py,sha256=k34YFWo2dc3t8aKjHaCW9CbQMOTqXxaDHk8aw8aKif4,3857
196
197
  diffsynth_engine/utils/gguf.py,sha256=ZWvw46V4g4uVyAR_oCq-4K5nPdKVrYk3u47uXMgA9lU,14092
197
- diffsynth_engine/utils/image.py,sha256=PiDButjv0fsRS23kpQgCLZAlBumpzQmNnolfvb5EKQ0,9626
198
+ diffsynth_engine/utils/image.py,sha256=jqx-UKfdc2YRBtHoL-RP2M8yce_0h2rTIJgf6mux-aU,12695
198
199
  diffsynth_engine/utils/loader.py,sha256=usIr2nUMgPxEdtEND6kboaST3ZUVr0PVWwm2sK-HXe8,1871
199
200
  diffsynth_engine/utils/lock.py,sha256=1Ipgst9eEFfFdViAvD5bxdB6HnHHBcqWYOb__fGaPUI,1601
200
201
  diffsynth_engine/utils/logging.py,sha256=XB0xTT8PBN6btkOjFtOvjlrOCRVgDGT8PFAp1vmse28,467
@@ -208,8 +209,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
208
209
  diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
210
  diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
210
211
  diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
211
- diffsynth_engine-0.7.1.dev1.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
212
- diffsynth_engine-0.7.1.dev1.dist-info/METADATA,sha256=TtCBrrPGk2MynQ4JRTsk6bTACJ-ZNYNXLzpU8YmKF1k,1163
213
- diffsynth_engine-0.7.1.dev1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
214
- diffsynth_engine-0.7.1.dev1.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
215
- diffsynth_engine-0.7.1.dev1.dist-info/RECORD,,
212
+ diffsynth_engine-0.7.1.dev3.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
213
+ diffsynth_engine-0.7.1.dev3.dist-info/METADATA,sha256=GdfffMwz8CD9vSlEKGlzjwp_fO19sYw0ulei0vx6rQY,1163
214
+ diffsynth_engine-0.7.1.dev3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
215
+ diffsynth_engine-0.7.1.dev3.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
216
+ diffsynth_engine-0.7.1.dev3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5