PyPI - hcpdiff - Versions diffs - 0.9.0__py3-none-any.whl → 2.1__py3-none-any.whl - Mend

hcpdiff 0.9.0py3-none-any.whl → 2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (208) hide show

hcpdiff/__init__.py +4 -4
hcpdiff/ckpt_manager/__init__.py +4 -5
hcpdiff/ckpt_manager/ckpt.py +24 -0
hcpdiff/ckpt_manager/format/__init__.py +4 -0
hcpdiff/ckpt_manager/format/diffusers.py +59 -0
hcpdiff/ckpt_manager/format/emb.py +21 -0
hcpdiff/ckpt_manager/format/lora_webui.py +244 -0
hcpdiff/ckpt_manager/format/sd_single.py +41 -0
hcpdiff/ckpt_manager/loader.py +64 -0
hcpdiff/data/__init__.py +4 -28
hcpdiff/data/cache/__init__.py +1 -0
hcpdiff/data/cache/vae.py +102 -0
hcpdiff/data/dataset.py +20 -0
hcpdiff/data/handler/__init__.py +3 -0
hcpdiff/data/handler/controlnet.py +18 -0
hcpdiff/data/handler/diffusion.py +80 -0
hcpdiff/data/handler/text.py +111 -0
hcpdiff/data/source/__init__.py +1 -2
hcpdiff/data/source/folder_class.py +12 -29
hcpdiff/data/source/text2img.py +36 -74
hcpdiff/data/source/text2img_cond.py +9 -15
hcpdiff/diffusion/__init__.py +0 -0
hcpdiff/diffusion/noise/__init__.py +2 -0
hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
hcpdiff/diffusion/noise/zero_terminal.py +39 -0
hcpdiff/diffusion/sampler/__init__.py +5 -0
hcpdiff/diffusion/sampler/base.py +72 -0
hcpdiff/diffusion/sampler/ddpm.py +20 -0
hcpdiff/diffusion/sampler/diffusers.py +66 -0
hcpdiff/diffusion/sampler/edm.py +22 -0
hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
hcpdiff/easy/__init__.py +2 -0
hcpdiff/easy/cfg/__init__.py +3 -0
hcpdiff/easy/cfg/sd15_train.py +201 -0
hcpdiff/easy/cfg/sdxl_train.py +140 -0
hcpdiff/easy/cfg/t2i.py +177 -0
hcpdiff/easy/model/__init__.py +2 -0
hcpdiff/easy/model/cnet.py +31 -0
hcpdiff/easy/model/loader.py +79 -0
hcpdiff/easy/sampler.py +46 -0
hcpdiff/evaluate/__init__.py +1 -0
hcpdiff/evaluate/previewer.py +60 -0
hcpdiff/loss/__init__.py +4 -1
hcpdiff/loss/base.py +41 -0
hcpdiff/loss/gw.py +35 -0
hcpdiff/loss/ssim.py +37 -0
hcpdiff/loss/vlb.py +79 -0
hcpdiff/loss/weighting.py +66 -0
hcpdiff/models/__init__.py +2 -2
hcpdiff/models/cfg_context.py +17 -14
hcpdiff/models/compose/compose_hook.py +44 -23
hcpdiff/models/compose/compose_tokenizer.py +21 -8
hcpdiff/models/compose/sdxl_composer.py +4 -4
hcpdiff/models/container.py +1 -1
hcpdiff/models/controlnet.py +16 -16
hcpdiff/models/lora_base_patch.py +14 -25
hcpdiff/models/lora_layers.py +3 -9
hcpdiff/models/lora_layers_patch.py +14 -24
hcpdiff/models/text_emb_ex.py +84 -6
hcpdiff/models/textencoder_ex.py +54 -18
hcpdiff/models/wrapper/__init__.py +3 -0
hcpdiff/models/wrapper/pixart.py +19 -0
hcpdiff/models/wrapper/sd.py +218 -0
hcpdiff/models/wrapper/utils.py +20 -0
hcpdiff/parser/__init__.py +1 -0
hcpdiff/parser/embpt.py +32 -0
hcpdiff/tools/convert_caption_txt2json.py +1 -1
hcpdiff/tools/dataset_generator.py +94 -0
hcpdiff/tools/download_hf_model.py +24 -0
hcpdiff/tools/embedding_convert.py +6 -2
hcpdiff/tools/init_proj.py +3 -21
hcpdiff/tools/lora_convert.py +19 -15
hcpdiff/tools/save_model.py +12 -0
hcpdiff/tools/sd2diffusers.py +1 -1
hcpdiff/train_colo.py +1 -1
hcpdiff/train_deepspeed.py +1 -1
hcpdiff/trainer_ac.py +79 -0
hcpdiff/trainer_ac_single.py +31 -0
hcpdiff/utils/__init__.py +0 -2
hcpdiff/utils/inpaint_pipe.py +790 -0
hcpdiff/utils/net_utils.py +29 -6
hcpdiff/utils/pipe_hook.py +46 -33
hcpdiff/utils/utils.py +21 -4
hcpdiff/workflow/__init__.py +15 -10
hcpdiff/workflow/daam/__init__.py +1 -0
hcpdiff/workflow/daam/act.py +66 -0
hcpdiff/workflow/daam/hook.py +109 -0
hcpdiff/workflow/diffusion.py +128 -136
hcpdiff/workflow/fast.py +31 -0
hcpdiff/workflow/flow.py +67 -0
hcpdiff/workflow/io.py +36 -68
hcpdiff/workflow/model.py +46 -43
hcpdiff/workflow/text.py +84 -52
hcpdiff/workflow/utils.py +32 -12
hcpdiff/workflow/vae.py +37 -38
hcpdiff-2.1.dist-info/METADATA +285 -0
hcpdiff-2.1.dist-info/RECORD +114 -0
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/WHEEL +1 -1
hcpdiff-2.1.dist-info/entry_points.txt +5 -0
hcpdiff/ckpt_manager/base.py +0 -16
hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -60
hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
hcpdiff/data/bucket.py +0 -358
hcpdiff/data/caption_loader.py +0 -80
hcpdiff/data/cond_dataset.py +0 -40
hcpdiff/data/crop_info_dataset.py +0 -40
hcpdiff/data/data_processor.py +0 -33
hcpdiff/data/pair_dataset.py +0 -146
hcpdiff/data/sampler.py +0 -54
hcpdiff/data/source/base.py +0 -30
hcpdiff/data/utils.py +0 -80
hcpdiff/infer_workflow.py +0 -57
hcpdiff/loggers/__init__.py +0 -13
hcpdiff/loggers/base_logger.py +0 -76
hcpdiff/loggers/cli_logger.py +0 -40
hcpdiff/loggers/preview/__init__.py +0 -1
hcpdiff/loggers/preview/image_previewer.py +0 -149
hcpdiff/loggers/tensorboard_logger.py +0 -30
hcpdiff/loggers/wandb_logger.py +0 -31
hcpdiff/loggers/webui_logger.py +0 -9
hcpdiff/loss/min_snr_loss.py +0 -52
hcpdiff/models/layers.py +0 -81
hcpdiff/models/plugin.py +0 -348
hcpdiff/models/wrapper.py +0 -75
hcpdiff/noise/__init__.py +0 -3
hcpdiff/noise/noise_base.py +0 -16
hcpdiff/noise/pyramid_noise.py +0 -50
hcpdiff/noise/zero_terminal.py +0 -44
hcpdiff/train_ac.py +0 -565
hcpdiff/train_ac_single.py +0 -39
hcpdiff/utils/caption_tools.py +0 -105
hcpdiff/utils/cfg_net_tools.py +0 -321
hcpdiff/utils/cfg_resolvers.py +0 -16
hcpdiff/utils/ema.py +0 -52
hcpdiff/utils/img_size_tool.py +0 -248
hcpdiff/vis/__init__.py +0 -3
hcpdiff/vis/base_interface.py +0 -12
hcpdiff/vis/disk_interface.py +0 -48
hcpdiff/vis/webui_interface.py +0 -17
hcpdiff/visualizer.py +0 -258
hcpdiff/visualizer_reloadable.py +0 -237
hcpdiff/workflow/base.py +0 -59
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -57
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero2.json +0 -32
hcpdiff-0.9.0.data/data/hcpdiff/cfgs/zero3.json +0 -39
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
hcpdiff-0.9.0.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
hcpdiff-0.9.0.dist-info/METADATA +0 -199
hcpdiff-0.9.0.dist-info/RECORD +0 -155
hcpdiff-0.9.0.dist-info/entry_points.txt +0 -2
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info/licenses}/LICENSE +0 -0
{hcpdiff-0.9.0.dist-info → hcpdiff-2.1.dist-info}/top_level.txt +0 -0

hcpdiff/workflow/model.py CHANGED Viewed

@@ -1,67 +1,70 @@
+import torch
 from accelerate import infer_auto_device_map, dispatch_model
 from diffusers.utils.import_utils import is_xformers_available
+from rainbowneko.infer import BasicAction
-from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
+from hcpdiff.utils.net_utils import get_dtype
+from hcpdiff.utils.net_utils import to_cpu
 from hcpdiff.utils.utils import size_to_int, int_to_size
-from .base import BasicAction, from_memory_context, MemoryMixin
-class VaeOptimizeAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, vae=None, slicing=True, tiling=False):
-        super().__init__()
+class VaeOptimizeAction(BasicAction):
+    def __init__(self, slicing=True, tiling=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.slicing = slicing
         self.tiling = tiling
-        self.vae = vae
-    def forward(self, memory, **states):
-        vae = self.vae or memory.vae
+    def forward(self, vae, **states):
         if self.tiling:
             vae.enable_tiling()
         if self.slicing:
             vae.enable_slicing()
-        return states
-class BuildOffloadAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, max_VRAM: str, max_RAM: str):
-        super().__init__()
+class BuildOffloadAction(BasicAction):
+    def __init__(self, max_VRAM: str, max_RAM: str, vae_cpu=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.max_VRAM = max_VRAM
         self.max_RAM = max_RAM
+        self.vae_cpu = vae_cpu
-    def forward(self, memory, dtype: str, **states):
+    def forward(self, vae, denoiser, dtype: str, **states):
+        # denoiser offload
         torch_dtype = get_dtype(dtype)
         vram = size_to_int(self.max_VRAM)
-        device_map = infer_auto_device_map(memory.unet, max_memory={0:int_to_size(vram >> 1), "cpu":self.max_RAM}, dtype=torch_dtype)
-        memory.unet = dispatch_model(memory.unet, device_map)
+        device_map = infer_auto_device_map(denoiser, max_memory={0:int_to_size(vram >> 1), "cpu":self.max_RAM}, dtype=torch_dtype)
+        denoiser = dispatch_model(denoiser, device_map)
-        device_map = infer_auto_device_map(memory.vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch_dtype)
-        memory.vae = dispatch_model(memory.vae, device_map)
-        return {'dtype':dtype, **states}
+        device_map = infer_auto_device_map(vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch_dtype)
+        vae = dispatch_model(vae, device_map)
+        # VAE offload
+        vram = size_to_int(self.max_VRAM)
+        if not self.vae_cpu:
+            device_map = infer_auto_device_map(vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch.float32)
+            vae = dispatch_model(vae, device_map)
+        else:
+            to_cpu(vae)
+            vae_decode_raw = vae.decode
-class XformersEnableAction(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        if is_xformers_available():
-            memory.unet.enable_xformers_memory_efficient_attention()
-            # self.te_hook.enable_xformers()
-        return states
+            def vae_decode_offload(latents, return_dict=True, decode_raw=vae.decode):
+                vae.to(dtype=torch.float32)
+                res = decode_raw(latents.cpu().to(dtype=torch.float32), return_dict=return_dict)
+                return res
-class StartTextEncode(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cuda(memory.text_encoder)
-        return states
+            vae.decode = vae_decode_offload
-class EndTextEncode(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cpu(memory.text_encoder)
-        return states
+            vae_encode_raw = vae.encode
-class StartDiffusion(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cuda(memory.unet)
-        return states
+            def vae_encode_offload(x, return_dict=True, encode_raw=vae.encode):
+                vae.to(dtype=torch.float32)
+                res = encode_raw(x.cpu().to(dtype=torch.float32), return_dict=return_dict)
+                return res
-class EndDiffusion(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cpu(memory.unet)
-        return states
+            vae.encode = vae_encode_offload
+            return {'denoiser':denoiser, 'vae':vae, 'vae_decode_raw':vae_decode_raw, 'vae_encode_raw':vae_encode_raw}
+        return {'denoiser':denoiser, 'vae':vae}
+class XformersEnableAction(BasicAction):
+    def forward(self, denoiser, **states):
+        if is_xformers_available():
+            denoiser.enable_xformers_memory_efficient_attention()
+            # self.te_hook.enable_xformers()

hcpdiff/workflow/text.py CHANGED Viewed

@@ -1,80 +1,112 @@
 from typing import List, Union
 import torch
-from torch.cuda.amp import autocast
 from hcpdiff.models import TokenizerHook
 from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook
-from .base import BasicAction, from_memory_context, MemoryMixin
+from hcpdiff.utils import pad_attn_bias
 from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
+from rainbowneko.infer import BasicAction
+from torch.cuda.amp import autocast
-class TextHookAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, TE=None, tokenizer=None, emb_dir: str = 'embs/', N_repeats: int = 1, layer_skip: int = 0, TE_final_norm: bool = True):
-        super().__init__()
-        self.TE = TE
-        self.tokenizer = tokenizer
+class TextHookAction(BasicAction):
+    def __init__(self, emb_dir: str = None, N_repeats: int = 1, layer_skip: int = 0, TE_final_norm: bool = True,
+                 use_attention_mask=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.emb_dir = emb_dir
         self.N_repeats = N_repeats
         self.layer_skip = layer_skip
         self.TE_final_norm = TE_final_norm
-    def forward(self, memory, **states):
-        self.TE = self.TE or memory.text_encoder
-        self.tokenizer = self.tokenizer or memory.tokenizer
-        memory.emb_hook, _ = ComposeEmbPTHook.hook_from_dir(self.emb_dir, self.tokenizer, self.TE, N_repeats=self.N_repeats)
-        memory.te_hook = ComposeTEEXHook.hook(self.TE, self.tokenizer, N_repeats=self.N_repeats, device='cuda',
-                                              clip_skip=self.layer_skip, clip_final_norm=self.TE_final_norm)
-        memory.token_ex = TokenizerHook(self.tokenizer)
-        return states
-class TextEncodeAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, te_hook=None):
-        super().__init__()
+        self.use_attention_mask = use_attention_mask
+    def forward(self, TE, tokenizer, in_preview=False, te_hook:ComposeTEEXHook=None, emb_hook=None, **states):
+        if in_preview and emb_hook is not None:
+            emb_hook.N_repeats = self.N_repeats
+        else:
+            emb_hook, _ = ComposeEmbPTHook.hook_from_dir(self.emb_dir, tokenizer, TE, N_repeats=self.N_repeats)
+            tokenizer.N_repeats = self.N_repeats
+        if in_preview:
+            te_hook.N_repeats = self.N_repeats
+            te_hook.clip_skip = self.layer_skip
+            te_hook.clip_final_norm = self.TE_final_norm
+            te_hook.use_attention_mask = self.use_attention_mask
+        else:
+            te_hook = ComposeTEEXHook.hook(TE, tokenizer, N_repeats=self.N_repeats,
+                                       clip_skip=self.layer_skip, clip_final_norm=self.TE_final_norm, use_attention_mask=self.use_attention_mask)
+        token_ex = TokenizerHook(tokenizer)
+        return {'te_hook':te_hook, 'emb_hook':emb_hook, 'token_ex':token_ex}
+class TextEncodeAction(BasicAction):
+    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         if isinstance(prompt, str) and bs is not None:
             prompt = [prompt]*bs
             negative_prompt = [negative_prompt]*bs
         self.prompt = prompt
         self.negative_prompt = negative_prompt
+        self.bs = bs
-        self.te_hook = te_hook
+    def forward(self, te_hook, TE, dtype: str, device, amp=None, gen_step=None, prompt_all=None, negative_prompt_all=None, model_offload=False,
+                **states):
+        prompt_all = prompt_all or self.prompt
+        negative_prompt_all = negative_prompt_all or self.negative_prompt
-    def forward(self, memory, dtype: str, device, **states):
-        te_hook = self.te_hook or memory.te_hook
-        with autocast(enabled=dtype == 'amp'):
-            emb, pooled_output = te_hook.encode_prompt_to_emb(self.negative_prompt+self.prompt)
-            # emb = emb.to(dtype=get_dtype(dtype), device=device)
-        return {**states, 'prompt':self.prompt, 'negative_prompt':self.negative_prompt, 'prompt_embeds':emb, 'device':device, 'dtype':dtype}
+        if gen_step is not None:
+            idx = (gen_step*self.bs)%len(prompt_all)
+            prompt = prompt_all[idx:idx+self.bs]
+            negative_prompt = negative_prompt_all[idx:idx+self.bs]
+        else:
+            prompt = prompt_all
+            negative_prompt = negative_prompt_all
+        if model_offload:
+            to_cuda(TE)
+        with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
+            emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(negative_prompt+prompt)
+            if attention_mask is not None:
+                emb, attention_mask = pad_attn_bias(emb, attention_mask)
+        if model_offload:
+            to_cpu(TE)
+        if not isinstance(te_hook, ComposeTEEXHook):
+            pooled_output = None
+        return {'prompt':prompt, 'negative_prompt':negative_prompt, 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask,
+            'pooled_output':pooled_output}
 class AttnMultTextEncodeAction(TextEncodeAction):
-    @from_memory_context
-    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, te_hook=None, token_ex=None):
-        super().__init__(prompt, negative_prompt, bs, te_hook)
-        self.token_ex = token_ex
-    def forward(self, memory, dtype: str, device, **states):
-        te_hook = self.te_hook or memory.te_hook
-        token_ex = self.token_ex or memory.token_ex
-        offload = memory.text_encoder.device.type == 'cpu'
-        if offload:
-            to_cuda(memory.text_encoder)
-        mult_p, clean_text_p = token_ex.parse_attn_mult(self.prompt)
-        mult_n, clean_text_n = token_ex.parse_attn_mult(self.negative_prompt)
-        with autocast(enabled=dtype == 'amp'):
+    def forward(self, te_hook, token_ex, TE, dtype: str, device, amp=None, gen_step=None, prompt_all=None, negative_prompt_all=None,
+                model_offload=False, **states):
+        prompt_all = prompt_all if prompt_all is not None else self.prompt
+        negative_prompt_all = negative_prompt_all if negative_prompt_all is not None else self.negative_prompt
+        if gen_step is not None:
+            idx = (gen_step*self.bs)%len(prompt_all)
+            prompt = prompt_all[idx:idx+self.bs]
+            negative_prompt = negative_prompt_all[idx:idx+self.bs]
+        else:
+            prompt = prompt_all
+            negative_prompt = negative_prompt_all
+        if model_offload:
+            to_cuda(TE)
+        mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
+        mult_n, clean_text_n = token_ex.parse_attn_mult(negative_prompt)
+        with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
             emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(clean_text_n+clean_text_p)
-            # emb = emb.to(dtype=dtype, device=device)
+            if attention_mask is not None:
+                emb, attention_mask = pad_attn_bias(emb, attention_mask)
             emb_n, emb_p = emb.chunk(2)
         emb_p = te_hook.mult_attn(emb_p, mult_p)
         emb_n = te_hook.mult_attn(emb_n, mult_n)
-        if offload:
-            to_cpu(memory.text_encoder)
+        if model_offload:
+            to_cpu(TE)
-        return {**states, 'prompt':self.prompt, 'negative_prompt':self.negative_prompt, 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
-            'device':device, 'dtype':dtype, 'encoder_attention_mask': attention_mask}
+        return {'prompt':list(clean_text_p), 'negative_prompt':list(clean_text_n), 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
+            'encoder_attention_mask':attention_mask, 'pooled_output':pooled_output}

hcpdiff/workflow/utils.py CHANGED Viewed

@@ -1,13 +1,14 @@
-import torch
+from typing import List, Union
-from .base import BasicAction, from_memory_context
-from torch import nn
+import torch
 from PIL import Image
-from typing import List
+from hcpdiff.data.handler import ControlNetHandler
+from rainbowneko.infer import BasicAction
+from torch import nn
 class LatentResizeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, width=1024, height=1024, mode='bicubic', antialias=True):
+    def __init__(self, width=1024, height=1024, mode='bicubic', antialias=True, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.size = (height//8, width//8)
         self.mode = mode
         self.antialias = antialias
@@ -16,18 +17,37 @@ class LatentResizeAction(BasicAction):
         latents_dtype = latents.dtype
         latents = nn.functional.interpolate(latents.to(dtype=torch.float32), size=self.size, mode=self.mode)
         latents = latents.to(dtype=latents_dtype)
-        return {**states, 'latents':latents}
+        return {'latents':latents}
 class ImageResizeAction(BasicAction):
     # resample name to Image.xxx
     mode_map = {'nearest':Image.NEAREST, 'bilinear':Image.BILINEAR, 'bicubic':Image.BICUBIC, 'lanczos':Image.LANCZOS, 'box':Image.BOX,
-        'hamming':Image.HAMMING, 'antialias':Image.ANTIALIAS}
+        'hamming':Image.HAMMING, 'antialias':Image.LANCZOS}
-    @from_memory_context
-    def __init__(self, width=1024, height=1024, mode='bicubic'):
+    def __init__(self, width=1024, height=1024, mode='bicubic', key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.size = (width, height)
         self.mode = self.mode_map[mode]
-    def forward(self, images:List[Image.Image], **states):
+    def forward(self, images: List[Image.Image], **states):
         images = [image.resize(self.size, resample=self.mode) for image in images]
-        return {**states, 'images':images}
+        return {'images':images}
+class FeedtoCNetAction(BasicAction):
+    def __init__(self, width=None, height=None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.size = (width, height)
+        self.cnet_handler = ControlNetHandler()
+    def forward(self, images: Union[List[Image.Image], Image.Image], device='cuda', dtype=None, bs=None, latents=None, **states):
+        if bs is None:
+            if 'prompt' in states:
+                bs = len(states['prompt'])
+        if latents is not None:
+            width, height = latents.shape[3]*8, latents.shape[2]*8
+        else:
+            width, height = self.size
+        images = self.cnet_handler.handle(images).to(device, dtype=dtype).expand(bs*2, 3, width, height)
+        return {'ex_inputs':{'cond':images}}

hcpdiff/workflow/vae.py CHANGED Viewed

@@ -1,33 +1,32 @@
-from .base import BasicAction, from_memory_context
-from diffusers import AutoencoderKL
-from diffusers.image_processor import VaeImageProcessor
-from typing import Dict, Any
 import torch
+from diffusers.image_processor import VaeImageProcessor
 from hcpdiff.utils import to_cuda, to_cpu
 from hcpdiff.utils.net_utils import get_dtype
+from rainbowneko.infer import BasicAction
 class EncodeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, vae: AutoencoderKL, image_processor=None, offload: Dict[str, Any] = None):
-        super().__init__()
-        self.vae = vae
-        self.vae_scale_factor = 2**(len(self.vae.config.block_out_channels)-1)
-        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) if image_processor is None else image_processor
-        self.offload = offload
+    def __init__(self, image_processor=None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.image_processor = image_processor
-    def forward(self, images, dtype:str, device, generator, bs=None, **states):
+    def forward(self, vae, images, dtype: str, device, generator, bs=None, model_offload=False, **states):
         if bs is None:
             if 'prompt' in states:
                 bs = len(states['prompt'])
+        vae_scale_factor = 2**(len(vae.config.block_out_channels)-1)
+        if self.image_processor is None:
+            self.image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
         image = self.image_processor.preprocess(images)
-        image = image.to(device=device, dtype=self.vae.dtype)
+        if bs is not None and image.shape[0] != bs:
+            image = image.repeat(bs//image.shape[0], 1, 1, 1)
+        image = image.to(device=device, dtype=vae.dtype)
         if image.shape[1] == 4:
             init_latents = image
         else:
-            if self.offload:
-                to_cuda(self.vae)
+            if model_offload:
+                to_cuda(vae)
             if isinstance(generator, list) and len(generator) != bs:
                 raise ValueError(
                     f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -36,38 +35,38 @@ class EncodeAction(BasicAction):
             elif isinstance(generator, list):
                 init_latents = [
-                    self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i]) for i in range(bs)
+                    vae.encode(image[i: i+1]).latent_dist.sample(generator[i]) for i in range(bs)
                 ]
                 init_latents = torch.cat(init_latents, dim=0)
             else:
-                init_latents = self.vae.encode(image).latent_dist.sample(generator)
+                init_latents = vae.encode(image).latent_dist.sample(generator)
-            init_latents = self.vae.config.scaling_factor * init_latents.to(dtype=get_dtype(dtype))
-            if self.offload:
-                to_cpu(self.vae)
-        return {**states, 'latents':init_latents, 'dtype':dtype, 'device':device, 'bs':bs}
+            init_latents = vae.config.scaling_factor*init_latents.to(dtype=get_dtype(dtype))
+            if model_offload:
+                to_cpu(vae)
+        return {'latents':init_latents}
 class DecodeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, vae: AutoencoderKL, image_processor=None, output_type='pil', offload: Dict[str, Any] = None, decode_key='latents'):
-        super().__init__()
-        self.vae = vae
-        self.offload = offload
+    def __init__(self, image_processor=None, output_type='pil', key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
-        self.vae_scale_factor = 2**(len(self.vae.config.block_out_channels)-1)
-        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) if image_processor is None else image_processor
+        self.image_processor = image_processor
         self.output_type = output_type
-        self.decode_key = decode_key
-    def forward(self, **states):
-        latents = states[self.decode_key]
-        if self.offload:
-            to_cuda(self.vae)
-        latents = latents.to(dtype=self.vae.dtype)
-        image = self.vae.decode(latents/self.vae.config.scaling_factor, return_dict=False)[0]
-        if self.offload:
-            to_cpu(self.vae)
+    def forward(self, vae, denoiser, latents, model_offload=False, **states):
+        vae_scale_factor = 2**(len(vae.config.block_out_channels)-1)
+        if self.image_processor is None:
+            self.image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
+        if model_offload:
+            to_cpu(denoiser)
+            torch.cuda.synchronize()
+            to_cuda(vae)
+        latents = latents.to(dtype=vae.dtype)
+        image = vae.decode(latents/vae.config.scaling_factor, return_dict=False)[0]
+        if model_offload:
+            to_cpu(vae)
         do_denormalize = [True]*image.shape[0]
         image = self.image_processor.postprocess(image, output_type=self.output_type, do_denormalize=do_denormalize)
-        return {**states, 'images':image}
+        return {'images':image}

hcpdiff 0.9.0__py3-none-any.whl → 2.1__py3-none-any.whl

hcpdiff 0.9.0py3-none-any.whl → 2.1py3-none-any.whl