PyPI - diffsynth-engine - Versions diffs - 0.2.1__tar.gz → 0.2.2__tar.gz - Mend

diffsynth-engine 0.2.1tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (228) hide show

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.2.1
+Version: 0.2.2
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/basic/transformer_helper.py RENAMED Viewed

@@ -65,17 +65,29 @@ class RoPEEmbedding(nn.Module):
 class RMSNorm(nn.Module):
-    def __init__(self, dim, eps, device: str, dtype: torch.dtype):
+    def __init__(
+        self,
+        dim,
+        eps=1e-5,
+        elementwise_affine=True,
+        device: str = "cuda:0",
+        dtype: torch.dtype = torch.bfloat16,
+    ):
         super().__init__()
-        self.weight = nn.Parameter(torch.ones((dim,), device=device, dtype=dtype))
         self.eps = eps
-    def forward(self, hidden_states):
-        input_dtype = hidden_states.dtype
-        variance = hidden_states.to(torch.float32).square().mean(-1, keepdim=True)
-        hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
-        hidden_states = hidden_states.to(input_dtype) * self.weight
-        return hidden_states
+        self.dim = dim
+        self.elementwise_affine = elementwise_affine
+        if elementwise_affine:
+            self.weight = nn.Parameter(torch.ones(dim, device=device, dtype=dtype))
+    def norm(self, x):
+        return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps)
+    def forward(self, x):
+        norm_result = self.norm(x.float()).to(x.dtype)
+        if self.elementwise_affine:
+            return norm_result * self.weight
+        return norm_result
 class NewGELUActivation(nn.Module):

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/components/vae.py RENAMED Viewed

@@ -67,6 +67,7 @@ class VAEAttentionBlock(nn.Module):
         num_layers=1,
         norm_num_groups=32,
         eps=1e-5,
+        attn_impl: str = "auto",
         device: str = "cuda:0",
         dtype: torch.dtype = torch.float32,
     ):
@@ -86,6 +87,7 @@ class VAEAttentionBlock(nn.Module):
                     bias_q=True,
                     bias_kv=True,
                     bias_out=True,
+                    attn_impl=attn_impl,
                     device=device,
                     dtype=dtype,
                 )
@@ -119,6 +121,7 @@ class VAEDecoder(PreTrainedModel):
         scaling_factor: float = 0.18215,
         shift_factor: float = 0,
         use_post_quant_conv: bool = True,
+        attn_impl: str = "auto",
         device: str = "cuda:0",
         dtype: torch.dtype = torch.float32,
     ):
@@ -137,7 +140,7 @@ class VAEDecoder(PreTrainedModel):
             [
                 # UNetMidBlock2D
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
-                VAEAttentionBlock(1, 512, 512, 1, eps=1e-6, device=device, dtype=dtype),
+                VAEAttentionBlock(1, 512, 512, 1, eps=1e-6, device=device, dtype=dtype, attn_impl=attn_impl),
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
                 # UpDecoderBlock2D
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
@@ -202,6 +205,7 @@ class VAEDecoder(PreTrainedModel):
         scaling_factor: float = 0.18215,
         shift_factor: float = 0,
         use_post_quant_conv: bool = True,
+        attn_impl: str = "auto",
     ):
         with no_init_weights():
             model = torch.nn.utils.skip_init(
@@ -210,6 +214,7 @@ class VAEDecoder(PreTrainedModel):
                 scaling_factor=scaling_factor,
                 shift_factor=shift_factor,
                 use_post_quant_conv=use_post_quant_conv,
+                attn_impl=attn_impl,
                 device=device,
                 dtype=dtype,
             )
@@ -230,6 +235,7 @@ class VAEEncoder(PreTrainedModel):
         scaling_factor: float = 0.18215,
         shift_factor: float = 0,
         use_quant_conv: bool = True,
+        attn_impl: str = "auto",
         device: str = "cuda:0",
         dtype: torch.dtype = torch.float32,
     ):
@@ -263,7 +269,7 @@ class VAEEncoder(PreTrainedModel):
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
                 # UNetMidBlock2D
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
-                VAEAttentionBlock(1, 512, 512, 1, eps=1e-6, device=device, dtype=dtype),
+                VAEAttentionBlock(1, 512, 512, 1, eps=1e-6, device=device, dtype=dtype, attn_impl=attn_impl),
                 ResnetBlock(512, 512, eps=1e-6, device=device, dtype=dtype),
             ]
         )
@@ -309,6 +315,7 @@ class VAEEncoder(PreTrainedModel):
         scaling_factor: float = 0.18215,
         shift_factor: float = 0,
         use_quant_conv: bool = True,
+        attn_impl: str = "auto",
     ):
         with no_init_weights():
             model = torch.nn.utils.skip_init(
@@ -317,6 +324,7 @@ class VAEEncoder(PreTrainedModel):
                 scaling_factor=scaling_factor,
                 shift_factor=shift_factor,
                 use_quant_conv=use_quant_conv,
+                attn_impl=attn_impl,
                 device=device,
                 dtype=dtype,
             )
@@ -338,6 +346,7 @@ class VAE(PreTrainedModel):
         shift_factor: float = 0,
         use_quant_conv: bool = True,
         use_post_quant_conv: bool = True,
+        attn_impl: str = "auto",
         device: str = "cuda:0",
         dtype: torch.dtype = torch.float32,
     ):
@@ -347,6 +356,7 @@ class VAE(PreTrainedModel):
             scaling_factor=scaling_factor,
             shift_factor=shift_factor,
             use_quant_conv=use_quant_conv,
+            attn_impl=attn_impl,
             device=device,
             dtype=dtype,
         )
@@ -355,6 +365,7 @@ class VAE(PreTrainedModel):
             scaling_factor=scaling_factor,
             shift_factor=shift_factor,
             use_post_quant_conv=use_post_quant_conv,
+            attn_impl=attn_impl,
             device=device,
             dtype=dtype,
         )
@@ -376,6 +387,7 @@ class VAE(PreTrainedModel):
         shift_factor: float = 0,
         use_quant_conv: bool = True,
         use_post_quant_conv: bool = True,
+        attn_impl: str = "auto",
     ):
         with no_init_weights():
             model = torch.nn.utils.skip_init(
@@ -385,6 +397,7 @@ class VAE(PreTrainedModel):
                 shift_factor=shift_factor,
                 use_quant_conv=use_quant_conv,
                 use_post_quant_conv=use_post_quant_conv,
+                attn_impl=attn_impl,
                 device=device,
                 dtype=dtype,
             )

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/flux/flux_dit.py RENAMED Viewed

@@ -227,7 +227,7 @@ class FluxJointTransformerBlock(nn.Module):
             nn.Linear(dim * 4, dim, device=device, dtype=dtype),
         )
-    def forward(self, hidden_states_a, hidden_states_b, temb, image_rotary_emb):
+    def forward(self, hidden_states_a, hidden_states_b, temb, image_rotary_emb, image_emb):
         norm_hidden_states_a, gate_msa_a, shift_mlp_a, scale_mlp_a, gate_mlp_a = self.norm1_a(hidden_states_a, emb=temb)
         norm_hidden_states_b, gate_msa_b, shift_mlp_b, scale_mlp_b, gate_mlp_b = self.norm1_b(hidden_states_b, emb=temb)
@@ -293,7 +293,7 @@ class FluxSingleTransformerBlock(nn.Module):
         hidden_states = hidden_states.to(q.dtype)
         return hidden_states
-    def forward(self, hidden_states_a, hidden_states_b, temb, image_rotary_emb):
+    def forward(self, hidden_states_a, hidden_states_b, temb, image_rotary_emb, image_emb):
         residual = hidden_states_a
         norm_hidden_states, gate = self.norm(hidden_states_a, emb=temb)
         hidden_states_a = self.to_qkv_mlp(norm_hidden_states)
@@ -386,6 +386,7 @@ class FluxDiT(PreTrainedModel):
         timestep,
         prompt_emb,
         pooled_prompt_emb,
+        image_emb,
         guidance,
         text_ids,
         image_ids=None,
@@ -421,10 +422,13 @@ class FluxDiT(PreTrainedModel):
                         prompt_emb,
                         conditioning,
                         image_rotary_emb,
+                        image_emb,
                         use_reentrant=False,
                     )
                 else:
-                    hidden_states, prompt_emb = block(hidden_states, prompt_emb, conditioning, image_rotary_emb)
+                    hidden_states, prompt_emb = block(
+                        hidden_states, prompt_emb, conditioning, image_rotary_emb, image_emb
+                    )
                 if controlnet_double_block_output is not None:
                     interval_control = len(self.blocks) / len(controlnet_double_block_output)
                     interval_control = int(np.ceil(interval_control))
@@ -439,10 +443,13 @@ class FluxDiT(PreTrainedModel):
                         prompt_emb,
                         conditioning,
                         image_rotary_emb,
+                        image_emb,
                         use_reentrant=False,
                     )
                 else:
-                    hidden_states, prompt_emb = block(hidden_states, prompt_emb, conditioning, image_rotary_emb)
+                    hidden_states, prompt_emb = block(
+                        hidden_states, prompt_emb, conditioning, image_rotary_emb, image_emb
+                    )
                 if controlnet_single_block_output is not None:
                     interval_control = len(self.single_blocks) / len(controlnet_double_block_output)
                     interval_control = int(np.ceil(interval_control))

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/sd/sd_vae.py RENAMED Viewed

@@ -6,33 +6,44 @@ from diffsynth_engine.models.utils import no_init_weights
 class SDVAEEncoder(VAEEncoder):
-    def __init__(self, device: str = "cuda:0", dtype: torch.dtype = torch.float32):
+    def __init__(self, attn_impl: str = "auto", device: str = "cuda:0", dtype: torch.dtype = torch.float32):
         super().__init__(
-            latent_channels=4, scaling_factor=0.18215, shift_factor=0, use_quant_conv=True, device=device, dtype=dtype
+            latent_channels=4,
+            scaling_factor=0.18215,
+            shift_factor=0,
+            use_quant_conv=True,
+            attn_impl=attn_impl,
+            device=device,
+            dtype=dtype,
         )
     @classmethod
-    def from_state_dict(cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype):
+    def from_state_dict(
+        cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype, attn_impl: str = "auto"
+    ):
         with no_init_weights():
-            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype)
+            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype, attn_impl=attn_impl)
         model.load_state_dict(state_dict)
         return model
 class SDVAEDecoder(VAEDecoder):
-    def __init__(self, device: str = "cuda:0", dtype: torch.dtype = torch.float32):
+    def __init__(self, attn_impl: str = "auto", device: str = "cuda:0", dtype: torch.dtype = torch.float32):
         super().__init__(
             latent_channels=4,
             scaling_factor=0.18215,
             shift_factor=0,
             use_post_quant_conv=True,
+            attn_impl=attn_impl,
             device=device,
             dtype=dtype,
         )
     @classmethod
-    def from_state_dict(cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype):
+    def from_state_dict(
+        cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype, attn_impl: str = "auto"
+    ):
         with no_init_weights():
-            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype)
+            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype, attn_impl=attn_impl)
         model.load_state_dict(state_dict)
         return model

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/sdxl/sdxl_vae.py RENAMED Viewed

@@ -6,33 +6,44 @@ from diffsynth_engine.models.utils import no_init_weights
 class SDXLVAEEncoder(VAEEncoder):
-    def __init__(self, device: str = "cuda:0", dtype: torch.dtype = torch.float32):
+    def __init__(self, attn_impl: str = "auto", device: str = "cuda:0", dtype: torch.dtype = torch.float32):
         super().__init__(
-            latent_channels=4, scaling_factor=0.13025, shift_factor=0, use_quant_conv=True, device=device, dtype=dtype
+            latent_channels=4,
+            scaling_factor=0.13025,
+            shift_factor=0,
+            use_quant_conv=True,
+            attn_impl=attn_impl,
+            device=device,
+            dtype=dtype,
         )
     @classmethod
-    def from_state_dict(cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype):
+    def from_state_dict(
+        cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype, attn_impl: str = "auto"
+    ):
         with no_init_weights():
-            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype)
+            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype, attn_impl=attn_impl)
         model.load_state_dict(state_dict)
         return model
 class SDXLVAEDecoder(VAEDecoder):
-    def __init__(self, device: str = "cuda:0", dtype: torch.dtype = torch.float32):
+    def __init__(self, attn_impl: str = "auto", device: str = "cuda:0", dtype: torch.dtype = torch.float32):
         super().__init__(
             latent_channels=4,
             scaling_factor=0.13025,
             shift_factor=0,
             use_post_quant_conv=True,
+            attn_impl=attn_impl,
             device=device,
             dtype=dtype,
         )
     @classmethod
-    def from_state_dict(cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype):
+    def from_state_dict(
+        cls, state_dict: Dict[str, torch.Tensor], device: str, dtype: torch.dtype, attn_impl: str = "auto"
+    ):
         with no_init_weights():
-            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype)
+            model = torch.nn.utils.skip_init(cls, device=device, dtype=dtype, attn_impl=attn_impl)
         model.load_state_dict(state_dict)
         return model

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/models/wan/wan_dit.py RENAMED Viewed

@@ -8,6 +8,7 @@ from einops import rearrange
 from diffsynth_engine.models.base import StateDictConverter, PreTrainedModel
 from diffsynth_engine.models.basic.attention import attention, long_context_attention
+from diffsynth_engine.models.basic.transformer_helper import RMSNorm
 from diffsynth_engine.models.utils import no_init_weights
 from diffsynth_engine.utils.constants import (
     WAN_DIT_1_3B_T2V_CONFIG_FILE,
@@ -57,26 +58,6 @@ def rope_apply(x, freqs):
     return x_out.to(x.dtype).flatten(3)
-class RMSNorm(nn.Module):
-    def __init__(
-        self,
-        dim,
-        eps=1e-5,
-        device: str = "cuda:0",
-        dtype: torch.dtype = torch.bfloat16,
-    ):
-        super().__init__()
-        self.eps = eps
-        self.dim = dim
-        self.weight = nn.Parameter(torch.ones(dim, device=device, dtype=dtype))
-    def norm(self, x):
-        return x * torch.rsqrt(x.pow(2).mean(dim=-1, keepdim=True) + self.eps)
-    def forward(self, x):
-        return self.norm(x.float()).to(x.dtype) * self.weight
 class SelfAttention(nn.Module):
     def __init__(
         self,

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/pipelines/flux_image.py RENAMED Viewed

@@ -366,6 +366,7 @@ class FluxImagePipeline(BasePipeline):
         negative_prompt_emb: torch.Tensor,
         positive_add_text_embeds: torch.Tensor,
         negative_add_text_embeds: torch.Tensor,
+        image_emb: torch.Tensor | None,
         image_ids: torch.Tensor,
         text_ids: torch.Tensor,
         cfg_scale: float,
@@ -382,6 +383,7 @@ class FluxImagePipeline(BasePipeline):
                 timestep,
                 positive_prompt_emb,
                 positive_add_text_embeds,
+                image_emb,
                 image_ids,
                 text_ids,
                 guidance,
@@ -396,6 +398,7 @@ class FluxImagePipeline(BasePipeline):
                 timestep,
                 positive_prompt_emb,
                 positive_add_text_embeds,
+                image_emb,
                 image_ids,
                 text_ids,
                 guidance,
@@ -408,6 +411,7 @@ class FluxImagePipeline(BasePipeline):
                 timestep,
                 negative_prompt_emb,
                 negative_add_text_embeds,
+                image_emb,
                 image_ids,
                 text_ids,
                 guidance,
@@ -428,6 +432,7 @@ class FluxImagePipeline(BasePipeline):
                 timestep,
                 prompt_emb,
                 add_text_embeds,
+                image_emb,
                 image_ids,
                 text_ids,
                 guidance,
@@ -444,6 +449,7 @@ class FluxImagePipeline(BasePipeline):
         timestep: torch.Tensor,
         prompt_emb: torch.Tensor,
         add_text_embeds: torch.Tensor,
+        image_emb: torch.Tensor | None,
         image_ids: torch.Tensor,
         text_ids: torch.Tensor,
         guidance: float,
@@ -468,6 +474,7 @@ class FluxImagePipeline(BasePipeline):
             timestep=timestep,
             prompt_emb=prompt_emb,
             pooled_prompt_emb=add_text_embeds,
+            image_emb=image_emb,
             guidance=guidance,
             text_ids=text_ids,
             image_ids=image_ids,
@@ -579,14 +586,24 @@ class FluxImagePipeline(BasePipeline):
     def enable_fp8_linear(self):
         enable_fp8_linear(self.dit)
+    def load_ip_adapter(self, ip_adapter):
+        self.ip_adapter = ip_adapter
+        self.ip_adapter.inject(self.dit)
+    def unload_ip_adapter(self):
+        if self.ip_adapter is not None:
+            self.ip_adapter.remove(self.dit)
+            self.ip_adapter = None
     @torch.no_grad()
     def __call__(
         self,
         prompt: str,
         negative_prompt: str = "",
-        cfg_scale: float = 1.0,
+        ref_image: Image.Image | None = None,  # use for ip-adapter, instance-id
+        cfg_scale: float = 1.0,  # 官方的flux模型不支持cfg调整
         clip_skip: int = 2,
-        input_image: Image.Image | None = None,
+        input_image: Image.Image | None = None,  # use for img2img
         denoising_strength: float = 1.0,
         height: int = 1024,
         width: int = 1024,
@@ -624,6 +641,11 @@ class FluxImagePipeline(BasePipeline):
         # ControlNet
         controlnet_params = self.prepare_controlnet_params(controlnet_params, h=height, w=width)
+        # image_emb
+        image_emb = (
+            self.ip_adapter.encode_image(ref_image) if self.ip_adapter is not None and ref_image is not None else None
+        )
         # Denoise
         self.load_models_to_device(["dit"])
         for i, timestep in enumerate(tqdm(timesteps)):
@@ -635,6 +657,7 @@ class FluxImagePipeline(BasePipeline):
                 negative_prompt_emb=negative_prompt_emb,
                 positive_add_text_embeds=positive_add_text_embeds,
                 negative_add_text_embeds=negative_add_text_embeds,
+                image_emb=image_emb,
                 image_ids=image_ids,
                 text_ids=text_ids,
                 cfg_scale=cfg_scale,

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/pipelines/sd_image.py RENAMED Viewed

@@ -217,8 +217,12 @@ class SDImagePipeline(BasePipeline):
                 clip_state_dict, device=init_device, dtype=model_config.clip_dtype
             )
             unet = SDUNet.from_state_dict(unet_state_dict, device=init_device, dtype=model_config.unet_dtype)
-        vae_decoder = SDVAEDecoder.from_state_dict(vae_state_dict, device=init_device, dtype=model_config.vae_dtype)
-        vae_encoder = SDVAEEncoder.from_state_dict(vae_state_dict, device=init_device, dtype=model_config.vae_dtype)
+        vae_decoder = SDVAEDecoder.from_state_dict(
+            vae_state_dict, device=init_device, dtype=model_config.vae_dtype, attn_impl="sdpa"
+        )
+        vae_encoder = SDVAEEncoder.from_state_dict(
+            vae_state_dict, device=init_device, dtype=model_config.vae_dtype, attn_impl="sdpa"
+        )
         pipe = cls(
             tokenizer=tokenizer,

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/pipelines/sdxl_image.py RENAMED Viewed

@@ -203,8 +203,12 @@ class SDXLImagePipeline(BasePipeline):
                 clip_g_state_dict, device=init_device, dtype=model_config.clip_g_dtype
             )
             unet = SDXLUNet.from_state_dict(unet_state_dict, device=init_device, dtype=model_config.unet_dtype)
-        vae_decoder = SDXLVAEDecoder.from_state_dict(vae_state_dict, device=init_device, dtype=model_config.vae_dtype)
-        vae_encoder = SDXLVAEEncoder.from_state_dict(vae_state_dict, device=init_device, dtype=model_config.vae_dtype)
+        vae_decoder = SDXLVAEDecoder.from_state_dict(
+            vae_state_dict, device=init_device, dtype=model_config.vae_dtype, attn_impl="sdpa"
+        )
+        vae_encoder = SDXLVAEEncoder.from_state_dict(
+            vae_state_dict, device=init_device, dtype=model_config.vae_dtype, attn_impl="sdpa"
+        )
         pipe = cls(
             tokenizer=tokenizer,
@@ -387,6 +391,11 @@ class SDXLImagePipeline(BasePipeline):
         self.load_models_to_device(["unet"])
         for i, timestep in enumerate(tqdm(timesteps)):
             timestep = timestep.unsqueeze(0).to(dtype=self.dtype)
+            positive_prompt_emb = positive_prompt_emb.to(self.dtype)
+            negative_prompt_emb = negative_prompt_emb.to(self.dtype)
+            positive_add_text_embeds = positive_add_text_embeds.to(self.dtype)
+            negative_add_text_embeds = negative_add_text_embeds.to(self.dtype)
+            add_time_id = add_time_id.to(self.dtype)
             # Classifier-free guidance
             noise_pred = self.predict_noise_with_cfg(
                 latents=latents,

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine/utils/download.py RENAMED Viewed

@@ -26,9 +26,10 @@ def fetch_model(
     path: Optional[str] = None,
     access_token: Optional[str] = None,
     source: str = "modelscope",
+    fetch_safetensors: bool = True,
 ) -> str:
     if source == "modelscope":
-        return fetch_modelscope_model(model_uri, revision, path, access_token)
+        return fetch_modelscope_model(model_uri, revision, path, access_token, fetch_safetensors)
     if source == "civitai":
         return fetch_civitai_model(model_uri)
     raise ValueError(f'source should be one of {MODEL_SOURCES} but got "{source}"')
@@ -39,6 +40,7 @@ def fetch_modelscope_model(
     revision: Optional[str] = None,
     path: Optional[str] = None,
     access_token: Optional[str] = None,
+    fetch_safetensors: bool = True,
 ) -> str:
     lock_file_name = f"modelscope.{model_id.replace('/', '--')}.{revision if revision else '__version'}.lock"
     lock_file_path = os.path.join(DIFFSYNTH_FILELOCK_DIR, lock_file_name)
@@ -55,7 +57,7 @@ def fetch_modelscope_model(
     else:
         path = dirpath
-    if os.path.isdir(path):
+    if os.path.isdir(path) and fetch_safetensors:
         return _fetch_safetensors(path)
     return path

{diffsynth_engine-0.2.1 → diffsynth_engine-0.2.2}/diffsynth_engine.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: diffsynth_engine
-Version: 0.2.1
+Version: 0.2.2
 Author: MuseAI x ModelScope
 Classifier: Programming Language :: Python :: 3
 Classifier: Operating System :: OS Independent