PyPI - diffsynth - Versions diffs - 1.0.0__py3-none-any.whl - Mend

diffsynth 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

diffsynth/__init__.py +6 -0
diffsynth/configs/__init__.py +0 -0
diffsynth/configs/model_config.py +243 -0
diffsynth/controlnets/__init__.py +2 -0
diffsynth/controlnets/controlnet_unit.py +53 -0
diffsynth/controlnets/processors.py +51 -0
diffsynth/data/__init__.py +1 -0
diffsynth/data/simple_text_image.py +35 -0
diffsynth/data/video.py +148 -0
diffsynth/extensions/ESRGAN/__init__.py +118 -0
diffsynth/extensions/FastBlend/__init__.py +63 -0
diffsynth/extensions/FastBlend/api.py +397 -0
diffsynth/extensions/FastBlend/cupy_kernels.py +119 -0
diffsynth/extensions/FastBlend/data.py +146 -0
diffsynth/extensions/FastBlend/patch_match.py +298 -0
diffsynth/extensions/FastBlend/runners/__init__.py +4 -0
diffsynth/extensions/FastBlend/runners/accurate.py +35 -0
diffsynth/extensions/FastBlend/runners/balanced.py +46 -0
diffsynth/extensions/FastBlend/runners/fast.py +141 -0
diffsynth/extensions/FastBlend/runners/interpolation.py +121 -0
diffsynth/extensions/RIFE/__init__.py +242 -0
diffsynth/extensions/__init__.py +0 -0
diffsynth/models/__init__.py +1 -0
diffsynth/models/attention.py +89 -0
diffsynth/models/downloader.py +66 -0
diffsynth/models/hunyuan_dit.py +451 -0
diffsynth/models/hunyuan_dit_text_encoder.py +163 -0
diffsynth/models/kolors_text_encoder.py +1363 -0
diffsynth/models/lora.py +195 -0
diffsynth/models/model_manager.py +536 -0
diffsynth/models/sd3_dit.py +798 -0
diffsynth/models/sd3_text_encoder.py +1107 -0
diffsynth/models/sd3_vae_decoder.py +81 -0
diffsynth/models/sd3_vae_encoder.py +95 -0
diffsynth/models/sd_controlnet.py +588 -0
diffsynth/models/sd_ipadapter.py +57 -0
diffsynth/models/sd_motion.py +199 -0
diffsynth/models/sd_text_encoder.py +321 -0
diffsynth/models/sd_unet.py +1108 -0
diffsynth/models/sd_vae_decoder.py +336 -0
diffsynth/models/sd_vae_encoder.py +282 -0
diffsynth/models/sdxl_ipadapter.py +122 -0
diffsynth/models/sdxl_motion.py +104 -0
diffsynth/models/sdxl_text_encoder.py +759 -0
diffsynth/models/sdxl_unet.py +1899 -0
diffsynth/models/sdxl_vae_decoder.py +24 -0
diffsynth/models/sdxl_vae_encoder.py +24 -0
diffsynth/models/svd_image_encoder.py +505 -0
diffsynth/models/svd_unet.py +2004 -0
diffsynth/models/svd_vae_decoder.py +578 -0
diffsynth/models/svd_vae_encoder.py +139 -0
diffsynth/models/tiler.py +106 -0
diffsynth/pipelines/__init__.py +9 -0
diffsynth/pipelines/base.py +34 -0
diffsynth/pipelines/dancer.py +178 -0
diffsynth/pipelines/hunyuan_image.py +274 -0
diffsynth/pipelines/pipeline_runner.py +105 -0
diffsynth/pipelines/sd3_image.py +132 -0
diffsynth/pipelines/sd_image.py +173 -0
diffsynth/pipelines/sd_video.py +266 -0
diffsynth/pipelines/sdxl_image.py +191 -0
diffsynth/pipelines/sdxl_video.py +223 -0
diffsynth/pipelines/svd_video.py +297 -0
diffsynth/processors/FastBlend.py +142 -0
diffsynth/processors/PILEditor.py +28 -0
diffsynth/processors/RIFE.py +77 -0
diffsynth/processors/__init__.py +0 -0
diffsynth/processors/base.py +6 -0
diffsynth/processors/sequencial_processor.py +41 -0
diffsynth/prompters/__init__.py +6 -0
diffsynth/prompters/base_prompter.py +57 -0
diffsynth/prompters/hunyuan_dit_prompter.py +69 -0
diffsynth/prompters/kolors_prompter.py +353 -0
diffsynth/prompters/prompt_refiners.py +77 -0
diffsynth/prompters/sd3_prompter.py +92 -0
diffsynth/prompters/sd_prompter.py +73 -0
diffsynth/prompters/sdxl_prompter.py +61 -0
diffsynth/schedulers/__init__.py +3 -0
diffsynth/schedulers/continuous_ode.py +59 -0
diffsynth/schedulers/ddim.py +79 -0
diffsynth/schedulers/flow_match.py +51 -0
diffsynth/tokenizer_configs/__init__.py +0 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json +7 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json +16 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab.txt +47020 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/vocab_org.txt +21128 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/config.json +28 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/special_tokens_map.json +1 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/spiece.model +0 -0
diffsynth/tokenizer_configs/hunyuan_dit/tokenizer_t5/tokenizer_config.json +1 -0
diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer.model +0 -0
diffsynth/tokenizer_configs/kolors/tokenizer/tokenizer_config.json +12 -0
diffsynth/tokenizer_configs/kolors/tokenizer/vocab.txt +0 -0
diffsynth/tokenizer_configs/stable_diffusion/tokenizer/merges.txt +48895 -0
diffsynth/tokenizer_configs/stable_diffusion/tokenizer/special_tokens_map.json +24 -0
diffsynth/tokenizer_configs/stable_diffusion/tokenizer/tokenizer_config.json +34 -0
diffsynth/tokenizer_configs/stable_diffusion/tokenizer/vocab.json +49410 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/merges.txt +48895 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/special_tokens_map.json +30 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/tokenizer_config.json +30 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_1/vocab.json +49410 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/merges.txt +48895 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/special_tokens_map.json +30 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/tokenizer_config.json +38 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_2/vocab.json +49410 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/special_tokens_map.json +125 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/spiece.model +0 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer.json +129428 -0
diffsynth/tokenizer_configs/stable_diffusion_3/tokenizer_3/tokenizer_config.json +940 -0
diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/merges.txt +40213 -0
diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/special_tokens_map.json +24 -0
diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/tokenizer_config.json +38 -0
diffsynth/tokenizer_configs/stable_diffusion_xl/tokenizer_2/vocab.json +49411 -0
diffsynth/trainers/__init__.py +0 -0
diffsynth/trainers/text_to_image.py +253 -0
diffsynth-1.0.0.dist-info/LICENSE +201 -0
diffsynth-1.0.0.dist-info/METADATA +23 -0
diffsynth-1.0.0.dist-info/RECORD +120 -0
diffsynth-1.0.0.dist-info/WHEEL +5 -0
diffsynth-1.0.0.dist-info/top_level.txt +1 -0

diffsynth/prompters/sdxl_prompter.py ADDED Viewed

@@ -0,0 +1,61 @@
+from .base_prompter import BasePrompter, tokenize_long_prompt
+from ..models.model_manager import ModelManager
+from ..models import SDXLTextEncoder, SDXLTextEncoder2
+from transformers import CLIPTokenizer
+import torch, os
+class SDXLPrompter(BasePrompter):
+    def __init__(
+        self,
+        tokenizer_path=None,
+        tokenizer_2_path=None
+    ):
+        if tokenizer_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_path = os.path.join(base_path, "tokenizer_configs/stable_diffusion/tokenizer")
+        if tokenizer_2_path is None:
+            base_path = os.path.dirname(os.path.dirname(__file__))
+            tokenizer_2_path = os.path.join(base_path, "tokenizer_configs/stable_diffusion_xl/tokenizer_2")
+        super().__init__()
+        self.tokenizer = CLIPTokenizer.from_pretrained(tokenizer_path)
+        self.tokenizer_2 = CLIPTokenizer.from_pretrained(tokenizer_2_path)
+        self.text_encoder: SDXLTextEncoder = None
+        self.text_encoder_2: SDXLTextEncoder2 = None
+    def fetch_models(self, text_encoder: SDXLTextEncoder = None, text_encoder_2: SDXLTextEncoder2 = None):
+        self.text_encoder = text_encoder
+        self.text_encoder_2 = text_encoder_2
+    def encode_prompt(
+        self,
+        prompt,
+        clip_skip=1,
+        clip_skip_2=2,
+        positive=True,
+        device="cuda"
+    ):
+        prompt = self.process_prompt(prompt, positive=positive)
+        # 1
+        input_ids = tokenize_long_prompt(self.tokenizer, prompt).to(device)
+        prompt_emb_1 = self.text_encoder(input_ids, clip_skip=clip_skip)
+        # 2
+        input_ids_2 = tokenize_long_prompt(self.tokenizer_2, prompt).to(device)
+        add_text_embeds, prompt_emb_2 = self.text_encoder_2(input_ids_2, clip_skip=clip_skip_2)
+        # Merge
+        if prompt_emb_1.shape[0] != prompt_emb_2.shape[0]:
+            max_batch_size = min(prompt_emb_1.shape[0], prompt_emb_2.shape[0])
+            prompt_emb_1 = prompt_emb_1[: max_batch_size]
+            prompt_emb_2 = prompt_emb_2[: max_batch_size]
+        prompt_emb = torch.concatenate([prompt_emb_1, prompt_emb_2], dim=-1)
+        # For very long prompt, we only use the first 77 tokens to compute `add_text_embeds`.
+        add_text_embeds = add_text_embeds[0:1]
+        prompt_emb = prompt_emb.reshape((1, prompt_emb.shape[0]*prompt_emb.shape[1], -1))
+        return add_text_embeds, prompt_emb

diffsynth/schedulers/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .ddim import EnhancedDDIMScheduler
+from .continuous_ode import ContinuousODEScheduler
+from .flow_match import FlowMatchScheduler

diffsynth/schedulers/continuous_ode.py ADDED Viewed

@@ -0,0 +1,59 @@
+import torch
+class ContinuousODEScheduler():
+    def __init__(self, num_inference_steps=100, sigma_max=700.0, sigma_min=0.002, rho=7.0):
+        self.sigma_max = sigma_max
+        self.sigma_min = sigma_min
+        self.rho = rho
+        self.set_timesteps(num_inference_steps)
+    def set_timesteps(self, num_inference_steps=100, denoising_strength=1.0):
+        ramp = torch.linspace(1-denoising_strength, 1, num_inference_steps)
+        min_inv_rho = torch.pow(torch.tensor((self.sigma_min,)), (1 / self.rho))
+        max_inv_rho = torch.pow(torch.tensor((self.sigma_max,)), (1 / self.rho))
+        self.sigmas = torch.pow(max_inv_rho + ramp * (min_inv_rho - max_inv_rho), self.rho)
+        self.timesteps = torch.log(self.sigmas) * 0.25
+    def step(self, model_output, timestep, sample, to_final=False):
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        sample *= (sigma*sigma + 1).sqrt()
+        estimated_sample = -sigma / (sigma*sigma + 1).sqrt() * model_output + 1 / (sigma*sigma + 1) * sample
+        if to_final or timestep_id + 1 >= len(self.timesteps):
+            prev_sample = estimated_sample
+        else:
+            sigma_ = self.sigmas[timestep_id + 1]
+            derivative = 1 / sigma * (sample - estimated_sample)
+            prev_sample = sample + derivative * (sigma_ - sigma)
+            prev_sample /= (sigma_*sigma_ + 1).sqrt()
+        return prev_sample
+    def return_to_timestep(self, timestep, sample, sample_stablized):
+        # This scheduler doesn't support this function.
+        pass
+    def add_noise(self, original_samples, noise, timestep):
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        sample = (original_samples + noise * sigma) / (sigma*sigma + 1).sqrt()
+        return sample
+    def training_target(self, sample, noise, timestep):
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        target = (-(sigma*sigma + 1).sqrt() / sigma + 1 / (sigma*sigma + 1).sqrt() / sigma) * sample + 1 / (sigma*sigma + 1).sqrt() * noise
+        return target
+    def training_weight(self, timestep):
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        weight = (1 + sigma*sigma).sqrt() / sigma
+        return weight

diffsynth/schedulers/ddim.py ADDED Viewed

@@ -0,0 +1,79 @@
+import torch, math
+class EnhancedDDIMScheduler():
+    def __init__(self, num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", prediction_type="epsilon"):
+        self.num_train_timesteps = num_train_timesteps
+        if beta_schedule == "scaled_linear":
+            betas = torch.square(torch.linspace(math.sqrt(beta_start), math.sqrt(beta_end), num_train_timesteps, dtype=torch.float32))
+        elif beta_schedule == "linear":
+            betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
+        else:
+            raise NotImplementedError(f"{beta_schedule} is not implemented")
+        self.alphas_cumprod = torch.cumprod(1.0 - betas, dim=0).tolist()
+        self.set_timesteps(10)
+        self.prediction_type = prediction_type
+    def set_timesteps(self, num_inference_steps, denoising_strength=1.0):
+        # The timesteps are aligned to 999...0, which is different from other implementations,
+        # but I think this implementation is more reasonable in theory.
+        max_timestep = max(round(self.num_train_timesteps * denoising_strength) - 1, 0)
+        num_inference_steps = min(num_inference_steps, max_timestep + 1)
+        if num_inference_steps == 1:
+            self.timesteps = torch.Tensor([max_timestep])
+        else:
+            step_length = max_timestep / (num_inference_steps - 1)
+            self.timesteps = torch.Tensor([round(max_timestep - i*step_length) for i in range(num_inference_steps)])
+    def denoise(self, model_output, sample, alpha_prod_t, alpha_prod_t_prev):
+        if self.prediction_type == "epsilon":
+            weight_e = math.sqrt(1 - alpha_prod_t_prev) - math.sqrt(alpha_prod_t_prev * (1 - alpha_prod_t) / alpha_prod_t)
+            weight_x = math.sqrt(alpha_prod_t_prev / alpha_prod_t)
+            prev_sample = sample * weight_x + model_output * weight_e
+        elif self.prediction_type == "v_prediction":
+            weight_e = -math.sqrt(alpha_prod_t_prev * (1 - alpha_prod_t)) + math.sqrt(alpha_prod_t * (1 - alpha_prod_t_prev))
+            weight_x = math.sqrt(alpha_prod_t * alpha_prod_t_prev) + math.sqrt((1 - alpha_prod_t) * (1 - alpha_prod_t_prev))
+            prev_sample = sample * weight_x + model_output * weight_e
+        else:
+            raise NotImplementedError(f"{self.prediction_type} is not implemented")
+        return prev_sample
+    def step(self, model_output, timestep, sample, to_final=False):
+        alpha_prod_t = self.alphas_cumprod[int(timestep.flatten().tolist()[0])]
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.cpu()
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        if to_final or timestep_id + 1 >= len(self.timesteps):
+            alpha_prod_t_prev = 1.0
+        else:
+            timestep_prev = int(self.timesteps[timestep_id + 1])
+            alpha_prod_t_prev = self.alphas_cumprod[timestep_prev]
+        return self.denoise(model_output, sample, alpha_prod_t, alpha_prod_t_prev)
+    def return_to_timestep(self, timestep, sample, sample_stablized):
+        alpha_prod_t = self.alphas_cumprod[int(timestep.flatten().tolist()[0])]
+        noise_pred = (sample - math.sqrt(alpha_prod_t) * sample_stablized) / math.sqrt(1 - alpha_prod_t)
+        return noise_pred
+    def add_noise(self, original_samples, noise, timestep):
+        sqrt_alpha_prod = math.sqrt(self.alphas_cumprod[int(timestep.flatten().tolist()[0])])
+        sqrt_one_minus_alpha_prod = math.sqrt(1 - self.alphas_cumprod[int(timestep.flatten().tolist()[0])])
+        noisy_samples = sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise
+        return noisy_samples
+    def training_target(self, sample, noise, timestep):
+        if self.prediction_type == "epsilon":
+            return noise
+        else:
+            sqrt_alpha_prod = math.sqrt(self.alphas_cumprod[int(timestep.flatten().tolist()[0])])
+            sqrt_one_minus_alpha_prod = math.sqrt(1 - self.alphas_cumprod[int(timestep.flatten().tolist()[0])])
+            target = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
+            return target

diffsynth/schedulers/flow_match.py ADDED Viewed

@@ -0,0 +1,51 @@
+import torch
+class FlowMatchScheduler():
+    def __init__(self, num_inference_steps=100, num_train_timesteps=1000, shift=3.0, sigma_max=1.0, sigma_min=0.003/1.002):
+        self.num_train_timesteps = num_train_timesteps
+        self.shift = shift
+        self.sigma_max = sigma_max
+        self.sigma_min = sigma_min
+        self.set_timesteps(num_inference_steps)
+    def set_timesteps(self, num_inference_steps=100, denoising_strength=1.0):
+        sigma_start = self.sigma_min + (self.sigma_max - self.sigma_min) * denoising_strength
+        self.sigmas = torch.linspace(sigma_start, self.sigma_min, num_inference_steps)
+        self.sigmas = self.shift * self.sigmas / (1 + (self.shift - 1) * self.sigmas)
+        self.timesteps = self.sigmas * self.num_train_timesteps
+    def step(self, model_output, timestep, sample, to_final=False):
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.cpu()
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        if to_final or timestep_id + 1 >= len(self.timesteps):
+            sigma_ = 0
+        else:
+            sigma_ = self.sigmas[timestep_id + 1]
+        prev_sample = sample + model_output * (sigma_ - sigma)
+        return prev_sample
+    def return_to_timestep(self, timestep, sample, sample_stablized):
+        # This scheduler doesn't support this function.
+        pass
+    def add_noise(self, original_samples, noise, timestep):
+        if isinstance(timestep, torch.Tensor):
+            timestep = timestep.cpu()
+        timestep_id = torch.argmin((self.timesteps - timestep).abs())
+        sigma = self.sigmas[timestep_id]
+        sample = (1 - sigma) * original_samples + sigma * noise
+        return sample
+    def training_target(self, sample, noise, timestep):
+        target = noise - sample
+        return target

diffsynth/tokenizer_configs/__init__.py ADDED Viewed

File without changes

diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/special_tokens_map.json ADDED Viewed

@@ -0,0 +1,7 @@
+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

diffsynth/tokenizer_configs/hunyuan_dit/tokenizer/tokenizer_config.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "name_or_path": "hfl/chinese-roberta-wwm-ext",
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": "/home/chenweifeng/.cache/huggingface/hub/models--hfl--chinese-roberta-wwm-ext/snapshots/5c58d0b8ec1d9014354d691c538661bf00bfdb44/special_tokens_map.json",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]",
+  "model_max_length": 77
+}