diffsynth-engine 0.6.1.dev21__py3-none-any.whl → 0.6.1.dev22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -242,6 +242,8 @@ class QwenImagePipelineConfig(AttentionConfig, OptimizationConfig, ParallelConfi
242
242
  vae_tile_size: Tuple[int, int] = (34, 34)
243
243
  vae_tile_stride: Tuple[int, int] = (18, 16)
244
244
 
245
+ load_encoder: bool = True
246
+
245
247
  @classmethod
246
248
  def basic_config(
247
249
  cls,
@@ -186,6 +186,7 @@ class QwenImagePipeline(BasePipeline):
186
186
  logger.info(f"loading state dict from {config.vae_path} ...")
187
187
  vae_state_dict = cls.load_model_checkpoint(config.vae_path, device="cpu", dtype=config.vae_dtype)
188
188
 
189
+ encoder_state_dict = None
189
190
  if config.encoder_path is None:
190
191
  config.encoder_path = fetch_model(
191
192
  "MusePublic/Qwen-image",
@@ -197,8 +198,9 @@ class QwenImagePipeline(BasePipeline):
197
198
  "text_encoder/model-00004-of-00004.safetensors",
198
199
  ],
199
200
  )
200
- logger.info(f"loading state dict from {config.encoder_path} ...")
201
- encoder_state_dict = cls.load_model_checkpoint(config.encoder_path, device="cpu", dtype=config.encoder_dtype)
201
+ if config.load_encoder:
202
+ logger.info(f"loading state dict from {config.encoder_path} ...")
203
+ encoder_state_dict = cls.load_model_checkpoint(config.encoder_path, device="cpu", dtype=config.encoder_dtype)
202
204
 
203
205
  state_dicts = QwenImageStateDicts(
204
206
  model=model_state_dict,
@@ -225,22 +227,25 @@ class QwenImagePipeline(BasePipeline):
225
227
  @classmethod
226
228
  def _from_state_dict(cls, state_dicts: QwenImageStateDicts, config: QwenImagePipelineConfig) -> "QwenImagePipeline":
227
229
  init_device = "cpu" if config.offload_mode is not None else config.device
228
- tokenizer = Qwen2TokenizerFast.from_pretrained(QWEN_IMAGE_TOKENIZER_CONF_PATH)
229
- processor = Qwen2VLProcessor.from_pretrained(
230
- tokenizer_config_path=QWEN_IMAGE_TOKENIZER_CONF_PATH,
231
- image_processor_config_path=QWEN_IMAGE_PROCESSOR_CONFIG_FILE,
232
- )
233
- with open(QWEN_IMAGE_VISION_CONFIG_FILE, "r", encoding="utf-8") as f:
234
- vision_config = Qwen2_5_VLVisionConfig(**json.load(f))
235
- with open(QWEN_IMAGE_CONFIG_FILE, "r", encoding="utf-8") as f:
236
- text_config = Qwen2_5_VLConfig(**json.load(f))
237
- encoder = Qwen2_5_VLForConditionalGeneration.from_state_dict(
238
- state_dicts.encoder,
239
- vision_config=vision_config,
240
- config=text_config,
241
- device=("cpu" if config.use_fsdp else init_device),
242
- dtype=config.encoder_dtype,
243
- )
230
+ tokenizer, processor, encoder = None, None, None
231
+ if config.load_encoder:
232
+ tokenizer = Qwen2TokenizerFast.from_pretrained(QWEN_IMAGE_TOKENIZER_CONF_PATH)
233
+ processor = Qwen2VLProcessor.from_pretrained(
234
+ tokenizer_config_path=QWEN_IMAGE_TOKENIZER_CONF_PATH,
235
+ image_processor_config_path=QWEN_IMAGE_PROCESSOR_CONFIG_FILE,
236
+ )
237
+ with open(QWEN_IMAGE_VISION_CONFIG_FILE, "r", encoding="utf-8") as f:
238
+ vision_config = Qwen2_5_VLVisionConfig(**json.load(f))
239
+ with open(QWEN_IMAGE_CONFIG_FILE, "r", encoding="utf-8") as f:
240
+ text_config = Qwen2_5_VLConfig(**json.load(f))
241
+ encoder = Qwen2_5_VLForConditionalGeneration.from_state_dict(
242
+ state_dicts.encoder,
243
+ vision_config=vision_config,
244
+ config=text_config,
245
+ device=("cpu" if config.use_fsdp else init_device),
246
+ dtype=config.encoder_dtype,
247
+ )
248
+
244
249
  with open(QWEN_IMAGE_VAE_CONFIG_FILE, "r", encoding="utf-8") as f:
245
250
  vae_config = json.load(f)
246
251
  vae = QwenImageVAE.from_state_dict(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffsynth_engine
3
- Version: 0.6.1.dev21
3
+ Version: 0.6.1.dev22
4
4
  Author: MuseAI x ModelScope
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Operating System :: OS Independent
@@ -80,7 +80,7 @@ diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer.json,sha256=bhl7TT29cdoU
80
80
  diffsynth_engine/conf/tokenizers/wan/umt5-xxl/tokenizer_config.json,sha256=7Zo6iw-qcacKMoR-BDX-A25uES1N9O23u0ipIeNE3AU,61728
81
81
  diffsynth_engine/configs/__init__.py,sha256=f6Y-j_ZQs7bM4Lr7Mh9CXFEBrSNLc9k5GJyJqjLAGiY,1187
82
82
  diffsynth_engine/configs/controlnet.py,sha256=f3vclyP3lcAjxDGD9C1vevhqqQ7W2LL_c6Wye0uxk3Q,1180
83
- diffsynth_engine/configs/pipeline.py,sha256=u4P0JnzSsvS_tfbTYyUARdT88k7TEGRYNqjaAPZlY40,13223
83
+ diffsynth_engine/configs/pipeline.py,sha256=FwHIvj2VdxtmiHxOUYoAzs5YVBprxobhV9AJ2CFrV4c,13254
84
84
  diffsynth_engine/kernels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  diffsynth_engine/models/__init__.py,sha256=8Ze7cSE8InetgXWTNb0neVA2Q44K7WlE-h7O-02m2sY,119
86
86
  diffsynth_engine/models/base.py,sha256=BA5vgMqfy_cjuL2OtXbrFD-Qg5xQnaumHpj5TabwSy8,2559
@@ -142,7 +142,7 @@ diffsynth_engine/pipelines/__init__.py,sha256=jh-4LSJ0vqlXiT8BgFgRIQxuAr2atEPyHr
142
142
  diffsynth_engine/pipelines/base.py,sha256=BWW7LW0E2qwu8G-6bP3nmeO7VCQxC8srOo8tE4aKA4o,14993
143
143
  diffsynth_engine/pipelines/flux_image.py,sha256=vJKvnYmeeQVX2O1Zjtm4NLrltBp66VSZ-KjAUqJ8zJ8,50936
144
144
  diffsynth_engine/pipelines/hunyuan3d_shape.py,sha256=TNV0Wr09Dj2bzzlpua9WioCClOj3YiLfE6utI9aWL8A,8164
145
- diffsynth_engine/pipelines/qwen_image.py,sha256=jt4rg-U5qWsFD0kUeDwKzgIiTAC80Cj8aq1YQOR1_-k,33052
145
+ diffsynth_engine/pipelines/qwen_image.py,sha256=rksB8tiAEp9TIcLLca269dNFQRPIDxffThKRMuR06A0,33280
146
146
  diffsynth_engine/pipelines/sd_image.py,sha256=nr-Nhsnomq8CsUqhTM3i2l2zG01YjwXdfRXgr_bC3F0,17891
147
147
  diffsynth_engine/pipelines/sdxl_image.py,sha256=v7ZACGPb6EcBunL6e5E9jynSQjE7GQx8etEV-ZLP91g,21704
148
148
  diffsynth_engine/pipelines/utils.py,sha256=lk7sFGEk-fGjgadLpwwppHKG-yZ0RC-4ZmHW7pRRe8A,473
@@ -185,8 +185,8 @@ diffsynth_engine/utils/video.py,sha256=8FCaeqIdUsWMgWI_6SO9SPynsToGcLCQAVYFTc4CD
185
185
  diffsynth_engine/utils/memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
186
186
  diffsynth_engine/utils/memory/linear_regression.py,sha256=oW_EQEw13oPoyUrxiL8A7Ksa5AuJ2ynI2qhCbfAuZbg,3930
187
187
  diffsynth_engine/utils/memory/memory_predcit_model.py,sha256=EXprSl_zlVjgfMWNXP-iw83Ot3hyMcgYaRPv-dvyL84,3943
188
- diffsynth_engine-0.6.1.dev21.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
189
- diffsynth_engine-0.6.1.dev21.dist-info/METADATA,sha256=tdKUjrwahEQ72SA-YSPu8LsaswLKJuDrjEZI_6nYySM,1164
190
- diffsynth_engine-0.6.1.dev21.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
191
- diffsynth_engine-0.6.1.dev21.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
192
- diffsynth_engine-0.6.1.dev21.dist-info/RECORD,,
188
+ diffsynth_engine-0.6.1.dev22.dist-info/licenses/LICENSE,sha256=x7aBqQuVI0IYnftgoTPI_A0I_rjdjPPQkjnU6N2nikM,11346
189
+ diffsynth_engine-0.6.1.dev22.dist-info/METADATA,sha256=K5yUarSjYpbjDvqrG1i7rKrY2r1ILPkcuNupcMTDsvY,1164
190
+ diffsynth_engine-0.6.1.dev22.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
191
+ diffsynth_engine-0.6.1.dev22.dist-info/top_level.txt,sha256=6zgbiIzEHLbhgDKRyX0uBJOV3F6VnGGBRIQvSiYYn6w,17
192
+ diffsynth_engine-0.6.1.dev22.dist-info/RECORD,,