diffsynth-engine 0.6.1.dev36__py3-none-any.whl → 0.6.1.dev38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -301,11 +301,11 @@ class HunyuanPipelineConfig(BaseConfig):
301
301
  @dataclass
302
302
  class ZImagePipelineConfig(AttentionConfig, OptimizationConfig, ParallelConfig, BaseConfig):
303
303
  model_path: str | os.PathLike | List[str | os.PathLike]
304
- model_dtype: torch.dtype = torch.float16
304
+ model_dtype: torch.dtype = torch.bfloat16
305
305
  vae_path: Optional[str | os.PathLike | List[str | os.PathLike]] = None
306
- vae_dtype: torch.dtype = torch.float16
306
+ vae_dtype: torch.dtype = torch.bfloat16
307
307
  encoder_path: Optional[str | os.PathLike | List[str | os.PathLike]] = None
308
- encoder_dtype: torch.dtype = torch.float16
308
+ encoder_dtype: torch.dtype = torch.bfloat16
309
309
 
310
310
  @classmethod
311
311
  def basic_config(
@@ -40,20 +40,54 @@ class ZImageLoRAConverter(LoRAStateDictConverter):
40
40
  for key, param in lora_state_dict.items():
41
41
  if "lora_A.weight" in key:
42
42
  lora_b_key = key.replace("lora_A.weight", "lora_B.weight")
43
- target_key = key.replace(".lora_A.weight", "").replace("transformer.", "")
43
+ target_key = key.replace(".lora_A.weight", "").replace("diffusion_model.", "")
44
44
 
45
- if "attn.to_out.0" in target_key:
46
- target_key = target_key.replace("attn.to_out.0", "attn.to_out")
45
+ if "attention.to_out.0" in target_key:
46
+ target_key = target_key.replace("attention.to_out.0", "attention.to_out")
47
+ if "adaLN_modulation.0" in target_key:
48
+ target_key = target_key.replace("adaLN_modulation.0", "adaLN_modulation")
49
+
50
+ up = lora_state_dict[lora_b_key]
51
+ rank = up.shape[1]
47
52
 
48
53
  dit_dict[target_key] = {
49
54
  "down": param,
50
- "up": lora_state_dict[lora_b_key],
51
- "alpha": lora_state_dict.get(key.replace("lora_A.weight", "alpha"), None),
55
+ "up": up,
56
+ "rank": rank,
57
+ "alpha": lora_state_dict.get(key.replace("lora_A.weight", "alpha"), rank),
52
58
  }
59
+
53
60
  return {"dit": dit_dict}
54
61
 
62
+ def _from_diffsynth(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
63
+ dit_dict = {}
64
+ for key, param in lora_state_dict.items():
65
+ if "lora_A.default.weight" in key:
66
+ lora_b_key = key.replace("lora_A.default.weight", "lora_B.default.weight")
67
+ target_key = key.replace(".lora_A.default.weight", "")
68
+
69
+ if "attention.to_out.0" in target_key:
70
+ target_key = target_key.replace("attention.to_out.0", "attention.to_out")
71
+
72
+ up = lora_state_dict[lora_b_key]
73
+ rank = up.shape[1]
74
+
75
+ dit_dict[target_key] = {
76
+ "down": param,
77
+ "up": up,
78
+ "rank": rank,
79
+ "alpha": lora_state_dict.get(key.replace("lora_A.default.weight", "alpha"), rank),
80
+ }
81
+
82
+ return {"dit": dit_dict}
83
+
84
+
55
85
  def convert(self, lora_state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
56
- return self._from_diffusers(lora_state_dict)
86
+ key = list(lora_state_dict.keys())[0]
87
+ if key.startswith("diffusion_model."):
88
+ return self._from_diffusers(lora_state_dict)
89
+ else:
90
+ return self._from_diffsynth(lora_state_dict)
57
91
 
58
92
 
59
93
  class ZImagePipeline(BasePipeline):
@@ -180,7 +214,7 @@ class ZImagePipeline(BasePipeline):
180
214
  def update_weights(self, state_dicts: ZImageStateDicts) -> None:
181
215
  self.update_component(self.dit, state_dicts.model, self.config.device, self.config.model_dtype)
182
216
  self.update_component(
183
- self.text_encoder, state_dicts.text_encoder, self.config.device, self.config.encoder_dtype
217
+ self.text_encoder, state_dicts.encoder, self.config.device, self.config.encoder_dtype
184
218
  )
185
219
  self.update_component(self.vae_decoder, state_dicts.vae, self.config.device, self.config.vae_dtype)
186
220
 
@@ -276,8 +310,8 @@ class ZImagePipeline(BasePipeline):
276
310
  comb_pred = self.predict_noise(latents, t, prompt_emb)[0]
277
311
  else:
278
312
  if not batch_cfg:
279
- positive_noise_pred = self.predict_noise(latents, t, prompt_emb)
280
- negative_noise_pred = self.predict_noise(latents, t, negative_prompt_emb)
313
+ positive_noise_pred = self.predict_noise(latents, t, prompt_emb)[0]
314
+ negative_noise_pred = self.predict_noise(latents, t, negative_prompt_emb)[0]
281
315
  else:
282
316
  latents_input = torch.cat([latents, latents], dim=0)
283
317
  t = torch.cat([t, t], dim=0)
@@ -360,6 +394,7 @@ class ZImagePipeline(BasePipeline):
360
394
  prompt_emb=prompt_embeds,
361
395
  negative_prompt_emb=negative_prompt_embeds,
362
396
  batch_cfg=self.config.batch_cfg,
397
+ cfg_scale=cfg_scale,
363
398
  cfg_truncation=cfg_truncation,
364
399
  cfg_normalization=cfg_normalization,
365
400
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.6.1.dev36
3
+ Version: 0.6.1.dev38
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -86,7 +86,7 @@ diffsynth_engine/conf/tokenizers/z_image/tokenizer/tokenizer_config.json,sha256=
86
86
  diffsynth_engine/conf/tokenizers/z_image/tokenizer/vocab.json,sha256=yhDX6fs-0YV13R4neiV5wW0QjjLydDloSvoOELFECRA,2776833
87
87
  diffsynth_engine/configs/__init__.py,sha256=biluGSEw78PPwO7XFlms16iuWXDiM0Eg_qsOMMTY0NQ,1409
88
88
  diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
89
- diffsynth_engine/configs/pipeline.py,sha256=0WmKz_mykmJkRCGwv9DjuN8s27LppkD_Ier4VtovZSg,15307
89
+ diffsynth_engine/configs/pipeline.py,sha256=RqhPAZOCpIMkFk-OsfiNYlqpqM-7B52ny0Zcr9Ix7wY,15310
90
90
  diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
92
92
  diffsynth_engine/models/base.py,sha256=svao__9WH8VNcyXz5o5dzywYXDcGV0YV9IfkLzDKews,2558
@@ -160,7 +160,7 @@ diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8
160
160
  diffsynth_engine/pipelines/utils.py,sha256=HZbJHErNJS1DhlwJKvZ9dY7Kh8Zdlsw3zE2e88TYGRY,2277
161
161
  diffsynth_engine/pipelines/wan_s2v.py,sha256=QHlCLMqlmnp55iYm2mzg4qCq4jceRAP3Zt5Mubz3mAM,29384
162
162
  diffsynth_engine/pipelines/wan_video.py,sha256=9xjSvQ4mlVEDdaL6QuUURj4iyxhJ2xABBphQjkfzK8s,31323
163
- diffsynth_engine/pipelines/z_image.py,sha256=gSBhKV7TBL9xvCUrABdZA0kNqQzPuawmEv8OcI6KTcs,14756
163
+ diffsynth_engine/pipelines/z_image.py,sha256=VvqjxsKRsmP2tfWg9nDlcQu5oEzIRFa2wtuArzjQAlk,16151
164
164
  diffsynth_engine/processor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  diffsynth_engine/processor/canny_processor.py,sha256=hV30NlblTkEFUAmF_O-LJrNlGVM2SFrqq6okfF8VpOo,602
166
166
  diffsynth_engine/processor/depth_processor.py,sha256=dQvs3JsnyMbz4dyI9QoR8oO-mMFBFAgNvgqeCoaU5jk,1532
@@ -199,8 +199,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
199
199
  diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
200
200
  diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
201
201
  diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
202
- diffsynth_engine-0.6.1.dev36.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
203
- diffsynth_engine-0.6.1.dev36.dist-info/METADATA,sha256=ShMi7F-NEzgV7Tse-BFIzIa5N7t8FLVpAHA1FkDWp14,1164
204
- diffsynth_engine-0.6.1.dev36.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
205
- diffsynth_engine-0.6.1.dev36.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
206
- diffsynth_engine-0.6.1.dev36.dist-info/RECORD,,
202
+ diffsynth_engine-0.6.1.dev38.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
203
+ diffsynth_engine-0.6.1.dev38.dist-info/METADATA,sha256=0fI0prUJox3z_sDzvhl-wh6wlCCYCA7N-naxpobysL0,1164
204
+ diffsynth_engine-0.6.1.dev38.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
205
+ diffsynth_engine-0.6.1.dev38.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
206
+ diffsynth_engine-0.6.1.dev38.dist-info/RECORD,,