PyPI - hcpdiff - Versions diffs - 0.9.0__py3-none-any.whl → 2.1__py3-none-any.whl - Mend

hcpdiff 0.9.0py3-none-any.whl → 2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

hcpdiff/__init__.py +4 -4
hcpdiff/ckpt_manager/__init__.py +4 -5
hcpdiff/ckpt_manager/ckpt.py +24 -0
hcpdiff/ckpt_manager/format/__init__.py +4 -0
hcpdiff/ckpt_manager/format/diffusers.py +59 -0
hcpdiff/ckpt_manager/format/emb.py +21 -0
hcpdiff/ckpt_manager/format/lora_webui.py +244 -0
hcpdiff/ckpt_manager/format/sd_single.py +41 -0
hcpdiff/ckpt_manager/loader.py +64 -0
hcpdiff/data/__init__.py +4 -28
hcpdiff/data/cache/__init__.py +1 -0
hcpdiff/data/cache/vae.py +102 -0
hcpdiff/data/dataset.py +20 -0
hcpdiff/data/handler/__init__.py +3 -0
hcpdiff/data/handler/controlnet.py +18 -0
hcpdiff/data/handler/diffusion.py +80 -0
hcpdiff/data/handler/text.py +111 -0
hcpdiff/data/source/__init__.py +1 -2
hcpdiff/data/source/folder_class.py +12 -29
hcpdiff/data/source/text2img.py +36 -74
hcpdiff/data/source/text2img_cond.py +9 -15
hcpdiff/diffusion/__init__.py +0 -0
hcpdiff/diffusion/noise/__init__.py +2 -0
hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
hcpdiff/diffusion/noise/zero_terminal.py +39 -0
hcpdiff/diffusion/sampler/__init__.py +5 -0
hcpdiff/diffusion/sampler/base.py +72 -0
hcpdiff/diffusion/sampler/ddpm.py +20 -0
hcpdiff/diffusion/sampler/diffusers.py +66 -0
hcpdiff/diffusion/sampler/edm.py +22 -0
hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
hcpdiff/easy/__init__.py +2 -0
hcpdiff/easy/cfg/__init__.py +3 -0
hcpdiff/easy/cfg/sd15_train.py +201 -0
hcpdiff/easy/cfg/sdxl_train.py +140 -0
hcpdiff/easy/cfg/t2i.py +177 -0
hcpdiff/easy/model/__init__.py +2 -0
hcpdiff/easy/model/cnet.py +31 -0
hcpdiff/easy/model/loader.py +79 -0
hcpdiff/easy/sampler.py +46 -0
hcpdiff/evaluate/__init__.py +1 -0
hcpdiff/evaluate/previewer.py +60 -0
hcpdiff/loss/__init__.py +4 -1
hcpdiff/loss/base.py +41 -0
hcpdiff/loss/gw.py +35 -0
hcpdiff/loss/ssim.py +37 -0
hcpdiff/loss/vlb.py +79 -0
hcpdiff/loss/weighting.py +66 -0
hcpdiff/models/__init__.py +2 -2
hcpdiff/models/cfg_context.py +17 -14
hcpdiff/models/compose/compose_hook.py +44 -23
hcpdiff/models/compose/compose_tokenizer.py +21 -8
hcpdiff/models/compose/sdxl_composer.py +4 -4
hcpdiff/models/container.py +1 -1
hcpdiff/models/controlnet.py +16 -16
hcpdiff/models/lora_base_patch.py +14 -25
hcpdiff/models/lora_layers.py +3 -9
hcpdiff/models/lora_layers_patch.py +14 -24
hcpdiff/models/text_emb_ex.py +84 -6
hcpdiff/models/textencoder_ex.py +54 -18
hcpdiff/models/wrapper/__init__.py +3 -0
hcpdiff/models/wrapper/pixart.py +19 -0
hcpdiff/models/wrapper/sd.py +218 -0
hcpdiff/models/wrapper/utils.py +20 -0
hcpdiff/parser/__init__.py +1 -0
hcpdiff/parser/embpt.py +32 -0
hcpdiff/tools/convert_caption_txt2json.py +1 -1
hcpdiff/tools/dataset_generator.py +94 -0
hcpdiff/tools/download_hf_model.py +24 -0
hcpdiff/tools/embedding_convert.py +6 -2
hcpdiff/tools/init_proj.py +3 -21
hcpdiff/tools/lora_convert.py +19 -15
hcpdiff/tools/save_model.py +12 -0
hcpdiff/tools/sd2diffusers.py +1 -1
hcpdiff/train_colo.py +1 -1
hcpdiff/train_deepspeed.py +1 -1
hcpdiff/trainer_ac.py +79 -0
hcpdiff/trainer_ac_single.py +31 -0
hcpdiff/utils/__init__.py +0 -2
hcpdiff/utils/inpaint_pipe.py +790 -0
hcpdiff/utils/net_utils.py +29 -6
hcpdiff/utils/pipe_hook.py +46 -33
hcpdiff/utils/utils.py +21 -4
hcpdiff/workflow/__init__.py +15 -10
hcpdiff/workflow/daam/__init__.py +1 -0
hcpdiff/workflow/daam/act.py +66 -0
hcpdiff/workflow/daam/hook.py +109 -0
hcpdiff/workflow/diffusion.py +128 -136
hcpdiff/workflow/fast.py +31 -0
hcpdiff/workflow/flow.py +67 -0
hcpdiff/workflow/io.py +36 -68
hcpdiff/workflow/model.py +46 -43
hcpdiff/workflow/text.py +84 -52
hcpdiff/workflow/utils.py +32 -12
hcpdiff/workflow/vae.py +37 -38
hcpdiff-2.1.dist-info/METADATA +285 -0
hcpdiff-2.1.dist-info/RECORD +114 -0
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/WHEEL +1 -1
hcpdiff-2.1.dist-info/entry_points.txt +5 -0
hcpdiff/ckpt_manager/base.py +0 -16
hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -60
hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
hcpdiff/data/bucket.py +0 -358
hcpdiff/data/caption_loader.py +0 -80
hcpdiff/data/cond_dataset.py +0 -40
hcpdiff/data/crop_info_dataset.py +0 -40
hcpdiff/data/data_processor.py +0 -33
hcpdiff/data/pair_dataset.py +0 -146
hcpdiff/data/sampler.py +0 -54
hcpdiff/data/source/base.py +0 -30
hcpdiff/data/utils.py +0 -80
hcpdiff/infer_workflow.py +0 -57
hcpdiff/loggers/__init__.py +0 -13
hcpdiff/loggers/base_logger.py +0 -76
hcpdiff/loggers/cli_logger.py +0 -40
hcpdiff/loggers/preview/__init__.py +0 -1
hcpdiff/loggers/preview/image_previewer.py +0 -149
hcpdiff/loggers/tensorboard_logger.py +0 -30
hcpdiff/loggers/wandb_logger.py +0 -31
hcpdiff/loggers/webui_logger.py +0 -9
hcpdiff/loss/min_snr_loss.py +0 -52
hcpdiff/models/layers.py +0 -81
hcpdiff/models/plugin.py +0 -348
hcpdiff/models/wrapper.py +0 -75
hcpdiff/noise/__init__.py +0 -3
hcpdiff/noise/noise_base.py +0 -16
hcpdiff/noise/pyramid_noise.py +0 -50
hcpdiff/noise/zero_terminal.py +0 -44
hcpdiff/train_ac.py +0 -565
hcpdiff/train_ac_single.py +0 -39
hcpdiff/utils/caption_tools.py +0 -105
hcpdiff/utils/cfg_net_tools.py +0 -321
hcpdiff/utils/cfg_resolvers.py +0 -16
hcpdiff/utils/ema.py +0 -52
hcpdiff/utils/img_size_tool.py +0 -248
hcpdiff/vis/__init__.py +0 -3
hcpdiff/vis/base_interface.py +0 -12
hcpdiff/vis/disk_interface.py +0 -48
hcpdiff/vis/webui_interface.py +0 -17
hcpdiff/visualizer.py +0 -258
hcpdiff/visualizer_reloadable.py +0 -237
hcpdiff/workflow/base.py +0 -59
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -57
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero2.json +0 -32
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero3.json +0 -39
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
hcpdiff-0.9.0.dist-info/METADATA +0 -199
hcpdiff-0.9.0.dist-info/RECORD +0 -155
hcpdiff-0.9.0.dist-info/entry_points.txt +0 -2
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info/licenses}/LICENSE +0 -0
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/top_level.txt +0 -0

hcpdiff/utils/net_utils.py CHANGED Viewed

@@ -6,11 +6,19 @@ import torch
 from diffusers.optimization import SchedulerType, TYPE_TO_SCHEDULER_FUNCTION, Optimizer
 from torch import nn
 from torch.optim import lr_scheduler
-from transformers import PretrainedConfig, AutoTokenizer
+from transformers import PretrainedConfig, AutoTokenizer, T5EncoderModel, CLIPTextModel
 from functools import partial
+from huggingface_hub import hf_hub_download
+import json
 dtype_dict = {'fp32':torch.float32, 'amp':torch.float32, 'fp16':torch.float16, 'bf16':torch.bfloat16}
+try:
+    dtype_dict['fp8_e4m3'] = torch.float8_e4m3fn
+    dtype_dict['fp8_e5m2'] = torch.float8_e5m2
+except:
+    pass
 def get_scheduler(cfg, optimizer):
     if cfg is None:
         return None
@@ -90,7 +98,7 @@ def auto_tokenizer_cls(pretrained_model_name_or_path: str, revision: str = None)
             revision=revision, use_fast=False,
         )
         return SDXLTokenizer
-    except OSError:
+    except:
         # not sdxl, only one tokenizer
         return AutoTokenizer
@@ -102,8 +110,10 @@ def auto_text_encoder_cls(pretrained_model_name_or_path: str, revision: str = No
             subfolder="text_encoder_2",
             revision=revision,
         )
+        if text_encoder_config.architectures is None:
+            raise ValueError()
         return SDXLTextEncoder
-    except OSError:
+    except:
         text_encoder_config = PretrainedConfig.from_pretrained(
             pretrained_model_name_or_path,
             subfolder="text_encoder",
@@ -112,16 +122,26 @@ def auto_text_encoder_cls(pretrained_model_name_or_path: str, revision: str = No
         model_class = text_encoder_config.architectures[0]
         if model_class == "CLIPTextModel":
-            from transformers import CLIPTextModel
             return CLIPTextModel
         elif model_class == "RobertaSeriesModelWithTransformation":
             from diffusers.pipelines.alt_diffusion.modeling_roberta_series import RobertaSeriesModelWithTransformation
             return RobertaSeriesModelWithTransformation
+        elif model_class == "T5EncoderModel":
+            return T5EncoderModel
         else:
             raise ValueError(f"{model_class} is not supported.")
+def get_pipe_name(path: str):
+    if os.path.isdir(path):
+        json_file = os.path.join(path, "model_index.json")
+    else:
+        json_file = hf_hub_download(path, "model_index.json")
+    with open(json_file, "r", encoding="utf-8") as reader:
+        text = reader.read()
+        data = json.loads(text)
+    return data['_class_name']
 def auto_tokenizer(pretrained_model_name_or_path: str, revision: str = None, **kwargs):
     return auto_tokenizer_cls(pretrained_model_name_or_path, revision).from_pretrained(pretrained_model_name_or_path, revision=revision, **kwargs)
@@ -225,4 +245,7 @@ def split_module_name(layer_name):
     return parent_name, host_name
 def get_dtype(dtype):
-    return dtype_dict.get(dtype, torch.float32)
+    if isinstance(dtype, torch.dtype):
+        return dtype
+    else:
+        return dtype_dict.get(dtype, torch.float32)

hcpdiff/utils/pipe_hook.py CHANGED Viewed

@@ -2,10 +2,10 @@ from typing import Union, List, Optional, Callable, Dict, Any
 import PIL
 import torch
-from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy
+from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, PixArtTransformer2DModel
 from diffusers.image_processor import VaeImageProcessor
-from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput
-from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_inpaint_legacy import preprocess_mask, preprocess_image
+from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
+from .inpaint_pipe import preprocess_mask, preprocess_image, StableDiffusionInpaintPipelineLegacy
 from einops import repeat
 class HookPipe_T2I(StableDiffusionPipeline):
@@ -17,25 +17,17 @@ class HookPipe_T2I(StableDiffusionPipeline):
     def device(self) -> torch.device:
         return torch.device('cuda')
-    def proc_prompt(self, device, num_images_per_prompt, prompt_embeds = None, negative_prompt_embeds = None):
-        batch_size = prompt_embeds.shape[0]
-        prompt_embeds = prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
+    def proc_prompt(self, device, num_inference_steps, prompt_embeds = None, negative_prompt_embeds = None) -> List[torch.Tensor]:
+        if not isinstance(prompt_embeds, list): # to emb for each step
+            prompt_embeds = [prompt_embeds]*num_inference_steps
+        if not isinstance(negative_prompt_embeds, list): # to emb for each step
+            negative_prompt_embeds = [negative_prompt_embeds]*num_inference_steps
-        bs_embed, seq_len, _ = prompt_embeds.shape
-        # duplicate text embeddings for each generation per prompt, using mps friendly method
-        prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
-        prompt_embeds = prompt_embeds.view(bs_embed*num_images_per_prompt, seq_len, -1)
+        prompt_embeds = [p.to(dtype=self.text_encoder.dtype, device=device) for p in prompt_embeds]
+        negative_prompt_embeds = [p.to(dtype=self.text_encoder.dtype, device=device) for p in negative_prompt_embeds]
-        # duplicate unconditional embeddings for each generation per prompt, using mps friendly method
-        seq_len = negative_prompt_embeds.shape[1]
-        negative_prompt_embeds = negative_prompt_embeds.to(dtype=self.text_encoder.dtype, device=device)
-        negative_prompt_embeds = negative_prompt_embeds.repeat(1, num_images_per_prompt, 1)
-        negative_prompt_embeds = negative_prompt_embeds.view(batch_size*num_images_per_prompt, seq_len, -1)
-        prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
-        return prompt_embeds
+        prompt_embeds = [torch.cat([emb_neg, emb_pos]) for emb_pos, emb_neg in zip(prompt_embeds, negative_prompt_embeds)]
+        return prompt_embeds # List[emb_step_i]*num_inference_steps
     @torch.no_grad()
     def __call__(
@@ -46,7 +38,6 @@ class HookPipe_T2I(StableDiffusionPipeline):
         num_inference_steps: int = 50,
         guidance_scale: float = 7.5,
         negative_prompt: Optional[Union[str, List[str]]] = None,
-        num_images_per_prompt: Optional[int] = 1,
         eta: float = 0.0,
         generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
         latents: Optional[torch.FloatTensor] = None,
@@ -74,6 +65,8 @@ class HookPipe_T2I(StableDiffusionPipeline):
             batch_size = 1
         elif prompt is not None and isinstance(prompt, list):
             batch_size = len(prompt)
+        elif isinstance(prompt_embeds, list):
+            batch_size = prompt_embeds[0].shape[0]
         else:
             batch_size = prompt_embeds.shape[0]
@@ -84,7 +77,7 @@ class HookPipe_T2I(StableDiffusionPipeline):
         do_classifier_free_guidance = guidance_scale>1.0
         # 3. Encode input prompt
-        prompt_embeds = self.proc_prompt(device, num_images_per_prompt,
+        prompt_embeds = self.proc_prompt(device, num_inference_steps,
                             prompt_embeds=prompt_embeds, negative_prompt_embeds=negative_prompt_embeds)
         # 4. Prepare timesteps
@@ -95,11 +88,11 @@ class HookPipe_T2I(StableDiffusionPipeline):
         # 5. Prepare latent variables
         num_channels_latents = self.unet.config.in_channels
         latents = self.prepare_latents(
-            batch_size*num_images_per_prompt,
+            batch_size,
             num_channels_latents,
             height,
             width,
-            prompt_embeds.dtype,
+            prompt_embeds[0].dtype,
             device,
             generator,
             latents,
@@ -114,7 +107,7 @@ class HookPipe_T2I(StableDiffusionPipeline):
                 crop_info = torch.tensor([height, width, 0, 0, height, width], dtype=torch.float)
             else:
                 crop_info = torch.tensor([height, width, *crop_coord], dtype=torch.float)
-            crop_info = crop_info.to(device).repeat(batch_size*num_images_per_prompt, 1)
+            crop_info = crop_info.to(device).repeat(batch_size, 1)
             pooled_output = pooled_output.to(device)
             if do_classifier_free_guidance:
@@ -129,12 +122,20 @@ class HookPipe_T2I(StableDiffusionPipeline):
                 latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
                 if pooled_output is None:
-                    noise_pred = self.unet(latent_model_input, t, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
-                                           cross_attention_kwargs=cross_attention_kwargs, ).sample
+                    if isinstance(self.unet, PixArtTransformer2DModel):
+                        added_cond_kwargs = {"resolution": None, "aspect_ratio": None}
+                        noise_pred = self.unet(latent_model_input, timestep=t.repeat(latent_model_input.shape[0]), encoder_hidden_states=prompt_embeds[i],
+                                            encoder_attention_mask=encoder_attention_mask,
+                                            cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
+                    else:
+                        noise_pred = self.unet(latent_model_input, timestep=t, encoder_hidden_states=prompt_embeds[i],
+                                            encoder_attention_mask=encoder_attention_mask,
+                                            cross_attention_kwargs=cross_attention_kwargs).sample
                 else:
                     added_cond_kwargs = {"text_embeds":pooled_output, "time_ids":crop_info}
                     # predict the noise residual
-                    noise_pred = self.unet(latent_model_input, t, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
+                    noise_pred = self.unet(latent_model_input, timestep=t, encoder_hidden_states=prompt_embeds[i],
+                                           encoder_attention_mask=encoder_attention_mask,
                                            cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
                 # perform guidance
@@ -142,6 +143,10 @@ class HookPipe_T2I(StableDiffusionPipeline):
                     noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                     noise_pred = noise_pred_uncond+guidance_scale*(noise_pred_text-noise_pred_uncond)
+                # learned sigma
+                if self.unet.config.out_channels // 2 == num_channels_latents:
+                    noise_pred = noise_pred.chunk(2, dim=1)[0]
                 # x_t -> x_0
                 alpha_prod_t = alphas_cumprod[t.long()]
                 beta_prod_t = 1-alpha_prod_t
@@ -155,7 +160,8 @@ class HookPipe_T2I(StableDiffusionPipeline):
                 if i == len(timesteps)-1 or ((i+1)>num_warmup_steps and (i+1)%self.scheduler.order == 0):
                     progress_bar.update()
                     if callback is not None and i%callback_steps == 0:
-                        if callback(i, t, num_inference_steps, latents_x0):
+                        latents = callback(i, t, num_inference_steps, latents_x0, latents)
+                        if latents is None:
                             return None
         latents = latents.to(dtype=self.vae.dtype)
@@ -277,8 +283,13 @@ class HookPipe_I2I(StableDiffusionImg2ImgPipeline):
                 # predict the noise residual
                 if pooled_output is None:
-                    noise_pred = self.unet(latent_model_input, t, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
-                                           cross_attention_kwargs=cross_attention_kwargs, ).sample
+                    if isinstance(self.unet, PixArtTransformer2DModel):
+                        added_cond_kwargs = {"resolution": None, "aspect_ratio": None}
+                        noise_pred = self.unet(latent_model_input, t, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
+                                            cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
+                    else:
+                        noise_pred = self.unet(latent_model_input, t, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
+                                            cross_attention_kwargs=cross_attention_kwargs, ).sample
                 else:
                     added_cond_kwargs = {"text_embeds":pooled_output, "time_ids":crop_info}
                     # predict the noise residual
@@ -302,7 +313,8 @@ class HookPipe_I2I(StableDiffusionImg2ImgPipeline):
                 if i == len(timesteps)-1 or ((i+1)>num_warmup_steps and (i+1)%self.scheduler.order == 0):
                     progress_bar.update()
                     if callback is not None and i%callback_steps == 0:
-                        if callback(i, t, num_inference_steps, latents_x0):
+                        latents = callback(i, t, num_inference_steps, latents_x0, latents)
+                        if latents is None:
                             return None
         latents = latents.to(dtype=self.vae.dtype)
@@ -450,7 +462,8 @@ class HookPipe_Inpaint(StableDiffusionInpaintPipelineLegacy):
                 if i == len(timesteps)-1 or ((i+1)>num_warmup_steps and (i+1)%self.scheduler.order == 0):
                     progress_bar.update()
                     if callback is not None and i%callback_steps == 0:
-                        if callback(i, t, num_inference_steps, latents_x0):
+                        latents = callback(i, t, num_inference_steps, latents_x0, latents)
+                        if latents is None:
                             return None
         # use original latents corresponding to unmasked portions of the image

hcpdiff/utils/utils.py CHANGED Viewed

@@ -56,8 +56,8 @@ def remove_config_undefined(cfg):
 def load_config(path, remove_undefined=True):
     cfg = OmegaConf.load(path)
     if '_base_' in cfg:
-        for base in cfg['_base_']:
-            cfg = OmegaConf.merge(load_config(base, remove_undefined=False), cfg)
+        base_cfgs = [load_config(base, remove_undefined=False) for base in cfg['_base_']]
+        cfg = OmegaConf.merge(*base_cfgs, cfg)
         del cfg['_base_']
     if remove_undefined:
         cfg = remove_config_undefined(cfg)
@@ -85,7 +85,7 @@ def get_cfg_range(cfg_text:str):
 def to_validate_file(name):
     rstr = r"[\/\\\:\*\?\"\<\>\|]"  # '/ \ : * ? " < > |'
     new_title = re.sub(rstr, "_", name)  # 替换为下划线
-    return new_title[:50]
+    return new_title[:200]
 def make_mask(start, end, length):
     mask=torch.zeros(length)
@@ -159,4 +159,21 @@ def pad_attn_bias(x, attn_bias, block_size=8):
     # 在k维度上进行填充
     x_padded = F.pad(x, (0, 0, 0, padding_l, 0, 0), mode='constant', value=0)
     attn_bias_padded = F.pad(attn_bias, (0, padding_l, 0, 0), mode='constant', value=0)
-    return x_padded, attn_bias_padded
+    return x_padded, attn_bias_padded
+def linear_interp(t, x):
+    '''
+    t_l ---------t_h
+           ^x
+    '''
+    if (x>=len(t)).any():
+        x = x.clamp(max=len(t)-1e-6)
+    x0 = x.floor().long()
+    x1 = x0 + 1
+    y0 = t[x0]
+    y1 = t[x1]
+    xd = (x - x0.float())
+    return y0 * (1 - xd) + y1 * xd

hcpdiff/workflow/__init__.py CHANGED Viewed

@@ -1,15 +1,20 @@
-from .base import BasicAction, MemoryMixin, from_memory, ExecAction, LoopAction
-from .diffusion import InputFeederAction, PrepareDiffusionAction, MakeLatentAction, NoisePredAction, SampleAction, DiffusionStepAction, \
-    X0PredAction, SeedAction, MakeTimestepsAction
+from .diffusion import InputFeederAction, MakeLatentAction, DenoiseAction, SampleAction, DiffusionStepAction, \
+    X0PredAction, SeedAction, MakeTimestepsAction, PrepareDiffusionAction, time_iter
 from .text import TextEncodeAction, TextHookAction, AttnMultTextEncodeAction
 from .vae import EncodeAction, DecodeAction
-from .io import LoadModelsAction, SaveImageAction, BuildModelLoaderAction, LoadPartAction, LoadLoraAction, LoadPluginAction
-from .utils import LatentResizeAction, ImageResizeAction
-from .model import VaeOptimizeAction, BuildOffloadAction, XformersEnableAction, StartTextEncode, StartDiffusion, EndTextEncode, EndDiffusion
+from .io import BuildModelsAction, SaveImageAction, LoadImageAction
+from .utils import LatentResizeAction, ImageResizeAction, FeedtoCNetAction
+from .model import VaeOptimizeAction, BuildOffloadAction, XformersEnableAction
+#from .flow import FilePromptAction
+try:
+    from .fast import SFastCompileAction
+except:
+    print('stable fast not installed.')
 from omegaconf import OmegaConf
-OmegaConf.register_new_resolver("hcp.from_memory", lambda mem_name: OmegaConf.create({
-    '_target_': 'hcpdiff.workflow.from_memory',
-    'mem_name': mem_name,
-}))
+OmegaConf.register_new_resolver("hcp.from_memory", lambda mem_name:OmegaConf.create({
+    '_target_':'hcpdiff.workflow.from_memory',
+    'mem_name':mem_name,
+}))

hcpdiff/workflow/daam/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .act import CaptureCrossAttnAction, SaveWordAttnAction

hcpdiff/workflow/daam/act.py ADDED Viewed

@@ -0,0 +1,66 @@
+import os
+from io import BytesIO
+import numpy as np
+from PIL import Image
+from hcpdiff.utils import to_validate_file
+from rainbowneko.utils import types_support
+from matplotlib import pyplot as plt
+from rainbowneko.infer import BasicAction, Actions
+from .hook import DiffusionHeatMapHooker
+class CaptureCrossAttnAction(Actions):
+    def forward(self, prompt, denoiser, tokenizer, vae, **states):
+        bs = len(prompt)
+        N_head = 8
+        with DiffusionHeatMapHooker(denoiser, tokenizer, vae_scale_factor=vae.vae_scale_factor) as tc:
+            states = super().forward(**states)
+            heat_maps = [tc.compute_global_heat_map(prompt=prompt[i], head_idxs=range(N_head*i, N_head*(i+1))) for i in range(bs)]
+        return {**states, 'cross_attn_heat_maps':heat_maps}
+class SaveWordAttnAction(BasicAction):
+    def __init__(self, save_root: str, N_col: int = 4, image_type: str = 'png', quality: int = 95, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.save_root = save_root
+        self.image_type = image_type
+        self.quality = quality
+        self.N_col = N_col
+        os.makedirs(save_root, exist_ok=True)
+    def draw_attn(self, tokenizer, prompt, image, global_heat_map):
+        prompt=tokenizer.bos_token+prompt+tokenizer.eos_token
+        tokens = [token.replace("</w>", "") for token in tokenizer.tokenize(prompt)]
+        d_len = self.N_col
+        plt.rcParams['figure.dpi'] = 300
+        plt.rcParams.update({'font.size':12})
+        h = int(np.ceil(len(tokens)/d_len))
+        fig, ax = plt.subplots(h, d_len, figsize=(2*d_len, 2*h))
+        for ax_ in ax.flatten():
+            ax_.set_xticks([])
+            ax_.set_yticks([])
+        for i, token in enumerate(tokens):
+            heat_map = global_heat_map.compute_word_heat_map(token, word_idx=i)
+            if h==1:
+                heat_map.plot_overlay(image, ax=ax[i%d_len])
+            else:
+                heat_map.plot_overlay(image, ax=ax[i//d_len, i%d_len])
+        # plt.tight_layout()
+        buf = BytesIO()
+        plt.savefig(buf, format='png')
+        buf.seek(0)
+        return Image.open(buf)
+    def forward(self, tokenizer, images, prompt, seeds, cross_attn_heat_maps, **states):
+        num_img_exist = max([0]+[int(x.split('-', 1)[0]) for x in os.listdir(self.save_root) if x.rsplit('.', 1)[-1] in types_support])
+        for bid, (p, img) in enumerate(zip(prompt, images)):
+            img_path = os.path.join(self.save_root, f"{num_img_exist}-{seeds[bid]}-cross_attn-{to_validate_file(prompt[0])}.{self.image_type}")
+            img = self.draw_attn(tokenizer, p, img, cross_attn_heat_maps[bid])
+            img.save(img_path, quality=self.quality)
+            num_img_exist += 1

hcpdiff/workflow/daam/hook.py ADDED Viewed

@@ -0,0 +1,109 @@
+from daam import AggregateHooker, RawHeatMapCollection, UNetCrossAttentionLocator, GlobalHeatMap
+from daam.trace import UNetCrossAttentionHooker
+from typing import List
+from diffusers import UNet2DConditionModel
+from PIL import Image
+import numpy as np
+import torch
+import torch.nn.functional as F
+def auto_autocast(*args, **kwargs):
+    if not torch.cuda.is_available():
+        kwargs['enabled'] = False
+    return torch.cuda.amp.autocast(*args, **kwargs)
+class DiffusionHeatMapHooker(AggregateHooker):
+    def __init__(
+            self,
+            unet: UNet2DConditionModel,
+            tokenizer,
+            vae_scale_factor: int,
+            low_memory: bool = False,
+            load_heads: bool = False,
+            save_heads: bool = False,
+            data_dir: str = None
+    ):
+        self.all_heat_maps = RawHeatMapCollection()
+        h = (unet.config.sample_size * vae_scale_factor)
+        self.latent_hw = 4096 if h == 512 or h == 1024 else 9216  # 64x64 or 96x96 depending on if it's 2.0-v or 2.0
+        locate_middle = load_heads or save_heads
+        self.locator = UNetCrossAttentionLocator(restrict={0} if low_memory else None, locate_middle_block=locate_middle)
+        self.last_prompt: str = ''
+        self.last_image: Image.Image = None
+        self.time_idx = 0
+        self._gen_idx = 0
+        self.tokenizer = tokenizer
+        modules = [
+            UNetCrossAttentionHooker(
+                x,
+                self,
+                layer_idx=idx,
+                latent_hw=self.latent_hw,
+                load_heads=load_heads,
+                save_heads=save_heads,
+                data_dir=data_dir
+            ) for idx, x in enumerate(self.locator.locate(unet))
+        ]
+        super().__init__(modules)
+    def time_callback(self, *args, **kwargs):
+        self.time_idx += 1
+    @property
+    def layer_names(self):
+        return self.locator.layer_names
+    def compute_global_heat_map(self, prompt=None, factors=None, head_idxs: List[int]=None, layer_idx=None, normalize=False):
+        # type: (str, List[float], int, int, bool) -> GlobalHeatMap
+        """
+        Compute the global heat map for the given prompt, aggregating across time (inference steps) and space (different
+        spatial transformer block heat maps).
+        Args:
+            prompt: The prompt to compute the heat map for. If none, uses the last prompt that was used for generation.
+            factors: Restrict the application to heat maps with spatial factors in this set. If `None`, use all sizes.
+            head_idx: Restrict the application to heat maps with this head index. If `None`, use all heads.
+            layer_idx: Restrict the application to heat maps with this layer index. If `None`, use all layers.
+        Returns:
+            A heat map object for computing word-level heat maps.
+        """
+        heat_maps = self.all_heat_maps
+        if prompt is None:
+            prompt = self.last_prompt
+        if factors is None:
+            factors = {0, 1, 2, 4, 8, 16, 32, 64}
+        else:
+            factors = set(factors)
+        all_merges = []
+        x = int(np.sqrt(self.latent_hw))
+        with auto_autocast(dtype=torch.float32):
+            for (factor, layer, head), heat_map in heat_maps:
+                if (head_idxs is None or head in head_idxs) and (layer_idx is None or layer_idx == layer):
+                    heat_map = heat_map.unsqueeze(1)/25 # [L,1,H,W]
+                    # The clamping fixes undershoot.
+                    all_merges.append(F.interpolate(heat_map, size=(x, x), mode='bicubic').clamp_(min=0))
+            try:
+                maps = torch.stack(all_merges, dim=0) # [B*head, L, 1, H, W]
+            except RuntimeError:
+                if head_idxs is not None or layer_idx is not None:
+                    raise RuntimeError('No heat maps found for the given parameters.')
+                else:
+                    raise RuntimeError('No heat maps found. Did you forget to call `with trace(...)` during generation?')
+            maps = maps.mean(0)[:, 0] # [L,H,W]
+            #maps = maps[:len(self.tokenizer.tokenize(prompt)) + 2]  # 1 for SOS and 1 for padding
+            if normalize:
+                maps = maps / (maps[1:-1].sum(0, keepdim=True) + 1e-6)  # drop out [SOS] and [PAD] for proper probabilities
+        return GlobalHeatMap(self.tokenizer, prompt, maps)

hcpdiff 0.9.0__py3-none-any.whl → 2.1__py3-none-any.whl

hcpdiff 0.9.0py3-none-any.whl → 2.1py3-none-any.whl