PyPI - hcpdiff - Versions diffs - 0.9.1__py3-none-any.whl → 2.2__py3-none-any.whl - Mend

hcpdiff 0.9.1py3-none-any.whl → 2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

hcpdiff/__init__.py +4 -4
hcpdiff/ckpt_manager/__init__.py +4 -5
hcpdiff/ckpt_manager/ckpt.py +24 -0
hcpdiff/ckpt_manager/format/__init__.py +4 -0
hcpdiff/ckpt_manager/format/diffusers.py +59 -0
hcpdiff/ckpt_manager/format/emb.py +21 -0
hcpdiff/ckpt_manager/format/lora_webui.py +252 -0
hcpdiff/ckpt_manager/format/sd_single.py +41 -0
hcpdiff/ckpt_manager/loader.py +64 -0
hcpdiff/data/__init__.py +4 -28
hcpdiff/data/cache/__init__.py +1 -0
hcpdiff/data/cache/vae.py +102 -0
hcpdiff/data/dataset.py +20 -0
hcpdiff/data/handler/__init__.py +3 -0
hcpdiff/data/handler/controlnet.py +18 -0
hcpdiff/data/handler/diffusion.py +90 -0
hcpdiff/data/handler/text.py +111 -0
hcpdiff/data/source/__init__.py +3 -3
hcpdiff/data/source/folder_class.py +12 -29
hcpdiff/data/source/text.py +40 -0
hcpdiff/data/source/text2img.py +36 -74
hcpdiff/data/source/text2img_cond.py +9 -15
hcpdiff/diffusion/__init__.py +0 -0
hcpdiff/diffusion/noise/__init__.py +2 -0
hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
hcpdiff/diffusion/noise/zero_terminal.py +39 -0
hcpdiff/diffusion/sampler/__init__.py +5 -0
hcpdiff/diffusion/sampler/base.py +72 -0
hcpdiff/diffusion/sampler/ddpm.py +20 -0
hcpdiff/diffusion/sampler/diffusers.py +66 -0
hcpdiff/diffusion/sampler/edm.py +22 -0
hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
hcpdiff/easy/__init__.py +2 -0
hcpdiff/easy/cfg/__init__.py +3 -0
hcpdiff/easy/cfg/sd15_train.py +207 -0
hcpdiff/easy/cfg/sdxl_train.py +147 -0
hcpdiff/easy/cfg/t2i.py +228 -0
hcpdiff/easy/model/__init__.py +2 -0
hcpdiff/easy/model/cnet.py +31 -0
hcpdiff/easy/model/loader.py +79 -0
hcpdiff/easy/sampler.py +46 -0
hcpdiff/evaluate/__init__.py +1 -0
hcpdiff/evaluate/previewer.py +60 -0
hcpdiff/loss/__init__.py +4 -1
hcpdiff/loss/base.py +41 -0
hcpdiff/loss/gw.py +35 -0
hcpdiff/loss/ssim.py +37 -0
hcpdiff/loss/vlb.py +79 -0
hcpdiff/loss/weighting.py +66 -0
hcpdiff/models/__init__.py +2 -2
hcpdiff/models/cfg_context.py +17 -14
hcpdiff/models/compose/compose_hook.py +44 -23
hcpdiff/models/compose/compose_tokenizer.py +21 -8
hcpdiff/models/compose/sdxl_composer.py +4 -4
hcpdiff/models/controlnet.py +16 -16
hcpdiff/models/lora_base_patch.py +14 -25
hcpdiff/models/lora_layers.py +3 -9
hcpdiff/models/lora_layers_patch.py +14 -24
hcpdiff/models/text_emb_ex.py +84 -6
hcpdiff/models/textencoder_ex.py +54 -18
hcpdiff/models/wrapper/__init__.py +3 -0
hcpdiff/models/wrapper/pixart.py +19 -0
hcpdiff/models/wrapper/sd.py +218 -0
hcpdiff/models/wrapper/utils.py +20 -0
hcpdiff/parser/__init__.py +1 -0
hcpdiff/parser/embpt.py +32 -0
hcpdiff/tools/convert_caption_txt2json.py +1 -1
hcpdiff/tools/dataset_generator.py +94 -0
hcpdiff/tools/download_hf_model.py +24 -0
hcpdiff/tools/init_proj.py +3 -21
hcpdiff/tools/lora_convert.py +18 -17
hcpdiff/tools/save_model.py +12 -0
hcpdiff/tools/sd2diffusers.py +1 -1
hcpdiff/train_colo.py +1 -1
hcpdiff/train_deepspeed.py +1 -1
hcpdiff/trainer_ac.py +79 -0
hcpdiff/trainer_ac_single.py +31 -0
hcpdiff/utils/__init__.py +0 -2
hcpdiff/utils/inpaint_pipe.py +7 -2
hcpdiff/utils/net_utils.py +29 -6
hcpdiff/utils/pipe_hook.py +24 -7
hcpdiff/utils/utils.py +21 -4
hcpdiff/workflow/__init__.py +15 -10
hcpdiff/workflow/daam/__init__.py +1 -0
hcpdiff/workflow/daam/act.py +66 -0
hcpdiff/workflow/daam/hook.py +109 -0
hcpdiff/workflow/diffusion.py +118 -128
hcpdiff/workflow/fast.py +31 -0
hcpdiff/workflow/flow.py +67 -0
hcpdiff/workflow/io.py +36 -130
hcpdiff/workflow/model.py +46 -43
hcpdiff/workflow/text.py +60 -47
hcpdiff/workflow/utils.py +32 -12
hcpdiff/workflow/vae.py +37 -38
hcpdiff-2.2.dist-info/METADATA +299 -0
hcpdiff-2.2.dist-info/RECORD +115 -0
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info}/WHEEL +1 -1
hcpdiff-2.2.dist-info/entry_points.txt +5 -0
hcpdiff/ckpt_manager/base.py +0 -16
hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -64
hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
hcpdiff/data/bucket.py +0 -358
hcpdiff/data/caption_loader.py +0 -80
hcpdiff/data/cond_dataset.py +0 -40
hcpdiff/data/crop_info_dataset.py +0 -40
hcpdiff/data/data_processor.py +0 -33
hcpdiff/data/pair_dataset.py +0 -146
hcpdiff/data/sampler.py +0 -54
hcpdiff/data/source/base.py +0 -30
hcpdiff/data/utils.py +0 -80
hcpdiff/deprecated/__init__.py +0 -1
hcpdiff/deprecated/cfg_converter.py +0 -81
hcpdiff/deprecated/lora_convert.py +0 -31
hcpdiff/infer_workflow.py +0 -57
hcpdiff/loggers/__init__.py +0 -13
hcpdiff/loggers/base_logger.py +0 -76
hcpdiff/loggers/cli_logger.py +0 -40
hcpdiff/loggers/preview/__init__.py +0 -1
hcpdiff/loggers/preview/image_previewer.py +0 -149
hcpdiff/loggers/tensorboard_logger.py +0 -30
hcpdiff/loggers/wandb_logger.py +0 -31
hcpdiff/loggers/webui_logger.py +0 -9
hcpdiff/loss/min_snr_loss.py +0 -52
hcpdiff/models/layers.py +0 -81
hcpdiff/models/plugin.py +0 -348
hcpdiff/models/wrapper.py +0 -75
hcpdiff/noise/__init__.py +0 -3
hcpdiff/noise/noise_base.py +0 -16
hcpdiff/noise/pyramid_noise.py +0 -50
hcpdiff/noise/zero_terminal.py +0 -44
hcpdiff/train_ac.py +0 -566
hcpdiff/train_ac_single.py +0 -39
hcpdiff/utils/caption_tools.py +0 -105
hcpdiff/utils/cfg_net_tools.py +0 -321
hcpdiff/utils/cfg_resolvers.py +0 -16
hcpdiff/utils/ema.py +0 -52
hcpdiff/utils/img_size_tool.py +0 -248
hcpdiff/vis/__init__.py +0 -3
hcpdiff/vis/base_interface.py +0 -12
hcpdiff/vis/disk_interface.py +0 -48
hcpdiff/vis/webui_interface.py +0 -17
hcpdiff/viser_fast.py +0 -138
hcpdiff/visualizer.py +0 -265
hcpdiff/visualizer_reloadable.py +0 -237
hcpdiff/workflow/base.py +0 -59
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -59
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero2.json +0 -32
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero3.json +0 -39
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
hcpdiff-0.9.1.dist-info/METADATA +0 -199
hcpdiff-0.9.1.dist-info/RECORD +0 -160
hcpdiff-0.9.1.dist-info/entry_points.txt +0 -2
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info/licenses}/LICENSE +0 -0
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info}/top_level.txt +0 -0

hcpdiff/workflow/io.py CHANGED Viewed

@@ -1,150 +1,56 @@
 import os
-from typing import List
-import warnings
+from functools import partial
+from typing import List, Union
-from diffusers import UNet2DConditionModel, AutoencoderKL, PNDMScheduler
-from hcpdiff.utils import auto_text_encoder, auto_tokenizer, to_validate_file
-from hcpdiff.utils.cfg_net_tools import HCPModelLoader, make_plugin
-from hcpdiff.utils.img_size_tool import types_support
+import torch
+from hcpdiff.utils import to_validate_file
 from hcpdiff.utils.net_utils import get_dtype
-from .base import BasicAction, from_memory_context, MemoryMixin
-class LoadModelsAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, pretrained_model: str, dtype: str, unet=None, text_encoder=None, tokenizer=None, vae=None, scheduler=None):
-        self.pretrained_model = pretrained_model
+from rainbowneko.ckpt_manager import NekoLoader
+from rainbowneko.infer import BasicAction
+from rainbowneko.infer import LoadImageAction as Neko_LoadImageAction
+from rainbowneko.utils.img_size_tool import types_support
+class BuildModelsAction(BasicAction):
+    def __init__(self, model_loader: partial[NekoLoader.load], dtype: str=torch.float32, device='cuda', key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.model_loader = model_loader
         self.dtype = get_dtype(dtype)
+        self.device = device
-        self.unet = unet
-        self.text_encoder = text_encoder
-        self.tokenizer = tokenizer
-        self.vae = vae
-        self.scheduler = scheduler
+    def forward(self, in_preview=False, model=None, **states):
+        if in_preview:
+            model = self.model_loader(dtype=self.dtype, device=self.device, denoiser=model.denoiser, TE=model.TE, vae=model.vae)
+        else:
+            model = self.model_loader(dtype=self.dtype, device=self.device)
-    def forward(self, memory, **states):
-        memory.unet = self.unet or UNet2DConditionModel.from_pretrained(self.pretrained_model, subfolder="unet", torch_dtype=self.dtype)
-        memory.text_encoder = self.text_encoder or auto_text_encoder(self.pretrained_model, subfolder="text_encoder", torch_dtype=self.dtype)
-        memory.tokenizer = self.tokenizer or auto_tokenizer(self.pretrained_model, subfolder="tokenizer", use_fast=False)
-        memory.vae = self.vae or AutoencoderKL.from_pretrained(self.pretrained_model, subfolder="vae", torch_dtype=self.dtype)
-        memory.scheduler = self.scheduler or PNDMScheduler.from_pretrained(self.pretrained_model, subfolder="scheduler", torch_dtype=self.dtype)
+        if isinstance(model, dict):
+            return model
+        else:
+            return {'model':model}
-        return states
+class LoadImageAction(Neko_LoadImageAction):
+    def __init__(self, image_paths: Union[str, List[str]], image_transforms=None, key_map_in=None, key_map_out=('input.x -> images',)):
+        super().__init__(image_paths, image_transforms, key_map_in, key_map_out)
 class SaveImageAction(BasicAction):
-    @from_memory_context
-    def __init__(self, save_root: str, image_type: str = 'png', quality: int = 95):
+    def __init__(self, save_root: str, image_type: str = 'png', quality: int = 95, save_cfg=True, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.save_root = save_root
         self.image_type = image_type
         self.quality = quality
+        self.save_cfg = save_cfg
         os.makedirs(save_root, exist_ok=True)
-    def forward(self, images, prompt, negative_prompt, seeds=None, **states):
-        num_img_exist = max([0]+[int(x.split('-', 1)[0]) for x in os.listdir(self.save_root) if x.rsplit('.', 1)[-1] in types_support])+1
+    def forward(self, images, prompt, negative_prompt, seeds, cfgs=None, parser=None, preview_root=None, preview_step=None, **states):
+        save_root = preview_root or self.save_root
+        num_img_exist = max([0]+[int(x.split('-', 1)[0]) for x in os.listdir(save_root) if x.rsplit('.', 1)[-1] in types_support])+1
         for bid, (p, pn, img) in enumerate(zip(prompt, negative_prompt, images)):
-            img_path = os.path.join(self.save_root, f"{num_img_exist}-{seeds[bid]}-{to_validate_file(prompt[0])}.{self.image_type}")
+            img_path = os.path.join(save_root, f"{preview_step or num_img_exist}-{seeds[bid]}-{to_validate_file(prompt[0])}.{self.image_type}")
             img.save(img_path, quality=self.quality)
             num_img_exist += 1
-        return {**states, 'images':images, 'prompt':prompt, 'negative_prompt':negative_prompt, 'seeds':seeds}
-class BuildModelLoaderAction(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        memory.model_loader_unet = HCPModelLoader(memory.unet)
-        memory.model_loader_TE = HCPModelLoader(memory.text_encoder)
-        return states
-class LoadPartAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, model: str, cfg):
-        self.model = model
-        self.cfg = cfg
-    def forward(self, memory, **states):
-        model_loader = memory[f"model_loader_{self.model}"]
-        model_loader.load_part(self.cfg)
-        return states
-class LoadLoraAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, model: str, cfg):
-        self.model = model
-        self.cfg = cfg
-    def forward(self, memory, **states):
-        model_loader = memory[f"model_loader_{self.model}"]
-        lora_group = model_loader.load_lora(self.cfg)
-        if 'lora_dict' not in memory:
-            memory.lora_dict = {}
-        if path in memory.lora_dict:
-            warnings.warn(f"Lora {path} already loaded, and will be replaced!")
-            memory.lora_dict[path].remove()
-        memory.lora_dict[path] = lora_group
-        return states
-class BuildPluginAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, model: str, cfg):
-        self.model = model
-        self.cfg = cfg
-    def forward(self, memory, **states):
-        if isinstance(self.cfg_merge.plugin_cfg, str):
-            plugin_cfg = load_config(self.cfg_merge.plugin_cfg)
-            plugin_cfg = {'plugin_unet':hydra.utils.instantiate(plugin_cfg['plugin_unet']),
-                'plugin_TE':hydra.utils.instantiate(plugin_cfg['plugin_TE'])}
-        else:
-            plugin_cfg = self.cfg_merge.plugin_cfg
-        all_plugin_group_unet = make_plugin(memory.unet, plugin_cfg['plugin_unet'])
-        all_plugin_group_TE = make_plugin(memory.text_encoder, plugin_cfg['plugin_TE'])
-        if 'plugin_dict' not in memory:
-            memory.plugin_dict = {}
-        for name, plugin_group in all_plugin_group_unet.items():
-            memory.plugin_dict[name] = plugin_group
-        for name, plugin_group in all_plugin_group_TE.items():
-            memory.plugin_dict[name] = plugin_group
-        return states
-class LoadPluginAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, model: str, cfg):
-        self.model = model
-        self.cfg = cfg
-    def forward(self, memory, **states):
-        model_loader = memory[f"model_loader_{self.model}"]
-        model_loader.load_plugin(self.cfg)
-        return states
-class RemoveLoraAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, path_list: List[str]):
-        self.path_list = path_list
-    def forward(self, memory, **states):
-        for path in self.path_list:
-            if path in memory.lora_dict:
-                memory.lora_dict[path].remove()
-                del memory.lora_dict[path]
-            else:
-                warnings.warn(f"Lora {path} not loaded!")
-        return states
-class RemovePluginAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, name_list: List[str]):
-        self.name_list = name_list
-    def forward(self, memory, **states):
-        for name in self.name_list:
-            if name in memory.plugin_dict:
-                memory.plugin_dict[name].remove()
-                del memory.plugin_dict[name]
-            else:
-                warnings.warn(f"Plugin {name} not loaded!")
-        return states
+            if self.save_cfg:
+                cfgs.seed = seeds[bid]
+                parser.save_configs(cfgs, os.path.join(save_root, f"{preview_step or num_img_exist}-{seeds[bid]}-info"))

hcpdiff/workflow/model.py CHANGED Viewed

@@ -1,67 +1,70 @@
+import torch
 from accelerate import infer_auto_device_map, dispatch_model
 from diffusers.utils.import_utils import is_xformers_available
+from rainbowneko.infer import BasicAction
-from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
+from hcpdiff.utils.net_utils import get_dtype
+from hcpdiff.utils.net_utils import to_cpu
 from hcpdiff.utils.utils import size_to_int, int_to_size
-from .base import BasicAction, from_memory_context, MemoryMixin
-class VaeOptimizeAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, vae=None, slicing=True, tiling=False):
-        super().__init__()
+class VaeOptimizeAction(BasicAction):
+    def __init__(self, slicing=True, tiling=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.slicing = slicing
         self.tiling = tiling
-        self.vae = vae
-    def forward(self, memory, **states):
-        vae = self.vae or memory.vae
+    def forward(self, vae, **states):
         if self.tiling:
             vae.enable_tiling()
         if self.slicing:
             vae.enable_slicing()
-        return states
-class BuildOffloadAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, max_VRAM: str, max_RAM: str):
-        super().__init__()
+class BuildOffloadAction(BasicAction):
+    def __init__(self, max_VRAM: str, max_RAM: str, vae_cpu=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.max_VRAM = max_VRAM
         self.max_RAM = max_RAM
+        self.vae_cpu = vae_cpu
-    def forward(self, memory, dtype: str, **states):
+    def forward(self, vae, denoiser, dtype: str, **states):
+        # denoiser offload
         torch_dtype = get_dtype(dtype)
         vram = size_to_int(self.max_VRAM)
-        device_map = infer_auto_device_map(memory.unet, max_memory={0:int_to_size(vram >> 1), "cpu":self.max_RAM}, dtype=torch_dtype)
-        memory.unet = dispatch_model(memory.unet, device_map)
+        device_map = infer_auto_device_map(denoiser, max_memory={0:int_to_size(vram >> 1), "cpu":self.max_RAM}, dtype=torch_dtype)
+        denoiser = dispatch_model(denoiser, device_map)
-        device_map = infer_auto_device_map(memory.vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch_dtype)
-        memory.vae = dispatch_model(memory.vae, device_map)
-        return {'dtype':dtype, **states}
+        device_map = infer_auto_device_map(vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch_dtype)
+        vae = dispatch_model(vae, device_map)
+        # VAE offload
+        vram = size_to_int(self.max_VRAM)
+        if not self.vae_cpu:
+            device_map = infer_auto_device_map(vae, max_memory={0:int_to_size(vram >> 5), "cpu":self.max_RAM}, dtype=torch.float32)
+            vae = dispatch_model(vae, device_map)
+        else:
+            to_cpu(vae)
+            vae_decode_raw = vae.decode
-class XformersEnableAction(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        if is_xformers_available():
-            memory.unet.enable_xformers_memory_efficient_attention()
-            # self.te_hook.enable_xformers()
-        return states
+            def vae_decode_offload(latents, return_dict=True, decode_raw=vae.decode):
+                vae.to(dtype=torch.float32)
+                res = decode_raw(latents.cpu().to(dtype=torch.float32), return_dict=return_dict)
+                return res
-class StartTextEncode(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cuda(memory.text_encoder)
-        return states
+            vae.decode = vae_decode_offload
-class EndTextEncode(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cpu(memory.text_encoder)
-        return states
+            vae_encode_raw = vae.encode
-class StartDiffusion(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cuda(memory.unet)
-        return states
+            def vae_encode_offload(x, return_dict=True, encode_raw=vae.encode):
+                vae.to(dtype=torch.float32)
+                res = encode_raw(x.cpu().to(dtype=torch.float32), return_dict=return_dict)
+                return res
-class EndDiffusion(BasicAction, MemoryMixin):
-    def forward(self, memory, **states):
-        to_cpu(memory.unet)
-        return states
+            vae.encode = vae_encode_offload
+            return {'denoiser':denoiser, 'vae':vae, 'vae_decode_raw':vae_decode_raw, 'vae_encode_raw':vae_encode_raw}
+        return {'denoiser':denoiser, 'vae':vae}
+class XformersEnableAction(BasicAction):
+    def forward(self, denoiser, **states):
+        if is_xformers_available():
+            denoiser.enable_xformers_memory_efficient_attention()
+            # self.te_hook.enable_xformers()

hcpdiff/workflow/text.py CHANGED Viewed

@@ -3,78 +3,91 @@ from typing import List, Union
 import torch
 from hcpdiff.models import TokenizerHook
 from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook
+from hcpdiff.utils import pad_attn_bias
 from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
+from rainbowneko.infer import BasicAction
 from torch.cuda.amp import autocast
-from .base import BasicAction, from_memory_context, MemoryMixin
-class TextHookAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, TE=None, tokenizer=None, emb_dir: str = 'embs/', N_repeats: int = 1, layer_skip: int = 0, TE_final_norm: bool = True):
-        super().__init__()
-        self.TE = TE
-        self.tokenizer = tokenizer
+class TextHookAction(BasicAction):
+    def __init__(self, emb_dir: str = None, N_repeats: int = 1, layer_skip: int = 0, TE_final_norm: bool = True,
+                 use_attention_mask=False, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.emb_dir = emb_dir
         self.N_repeats = N_repeats
         self.layer_skip = layer_skip
         self.TE_final_norm = TE_final_norm
-    def forward(self, memory, **states):
-        self.TE = self.TE or memory.text_encoder
-        self.tokenizer = self.tokenizer or memory.tokenizer
-        memory.emb_hook, _ = ComposeEmbPTHook.hook_from_dir(self.emb_dir, self.tokenizer, self.TE, N_repeats=self.N_repeats)
-        memory.te_hook = ComposeTEEXHook.hook(self.TE, self.tokenizer, N_repeats=self.N_repeats, device='cuda',
-                                              clip_skip=self.layer_skip, clip_final_norm=self.TE_final_norm)
-        memory.token_ex = TokenizerHook(self.tokenizer)
-        return states
-class TextEncodeAction(BasicAction, MemoryMixin):
-    @from_memory_context
-    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, te_hook=None):
-        super().__init__()
+        self.use_attention_mask = use_attention_mask
+    def forward(self, TE, tokenizer, in_preview=False, te_hook:ComposeTEEXHook=None, emb_hook=None, **states):
+        if in_preview and emb_hook is not None:
+            emb_hook.N_repeats = self.N_repeats
+        else:
+            emb_hook, _ = ComposeEmbPTHook.hook_from_dir(self.emb_dir, tokenizer, TE, N_repeats=self.N_repeats)
+            tokenizer.N_repeats = self.N_repeats
+        if in_preview:
+            te_hook.N_repeats = self.N_repeats
+            te_hook.clip_skip = self.layer_skip
+            te_hook.clip_final_norm = self.TE_final_norm
+            te_hook.use_attention_mask = self.use_attention_mask
+        else:
+            te_hook = ComposeTEEXHook.hook(TE, tokenizer, N_repeats=self.N_repeats,
+                                       clip_skip=self.layer_skip, clip_final_norm=self.TE_final_norm, use_attention_mask=self.use_attention_mask)
+        token_ex = TokenizerHook(tokenizer)
+        return {'te_hook':te_hook, 'emb_hook':emb_hook, 'token_ex':token_ex}
+class TextEncodeAction(BasicAction):
+    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         if isinstance(prompt, str) and bs is not None:
             prompt = [prompt]*bs
             negative_prompt = [negative_prompt]*bs
         self.prompt = prompt
         self.negative_prompt = negative_prompt
+        self.bs = bs
-        self.te_hook = te_hook
+    def forward(self, te_hook, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
+        prompt = prompt or self.prompt
+        negative_prompt = negative_prompt or self.negative_prompt
+        if model_offload:
+            to_cuda(TE)
-    def forward(self, memory, dtype: str, device, amp=None, **states):
-        te_hook = self.te_hook or memory.te_hook
         with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
-            emb, pooled_output = te_hook.encode_prompt_to_emb(self.negative_prompt+self.prompt)
-            # emb = emb.to(dtype=get_dtype(dtype), device=device)
-        return {**states, 'prompt':self.prompt, 'negative_prompt':self.negative_prompt, 'prompt_embeds':emb, 'amp':amp,
-            'device':device, 'dtype':dtype}
+            emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(negative_prompt+prompt)
+            if attention_mask is not None:
+                emb, attention_mask = pad_attn_bias(emb, attention_mask)
-class AttnMultTextEncodeAction(TextEncodeAction):
-    @from_memory_context
-    def __init__(self, prompt: Union[List, str], negative_prompt: Union[List, str], bs: int = None, te_hook=None, token_ex=None):
-        super().__init__(prompt, negative_prompt, bs, te_hook)
-        self.token_ex = token_ex
+        if model_offload:
+            to_cpu(TE)
-    def forward(self, memory, dtype: str, device, amp=None, **states):
-        te_hook = self.te_hook or memory.te_hook
-        token_ex = self.token_ex or memory.token_ex
+        if not isinstance(te_hook, ComposeTEEXHook):
+            pooled_output = None
+        return {'prompt':prompt, 'negative_prompt':negative_prompt, 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask,
+            'pooled_output':pooled_output}
+class AttnMultTextEncodeAction(TextEncodeAction):
+    def forward(self, te_hook, token_ex, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
+        prompt = prompt or self.prompt
+        negative_prompt = negative_prompt or self.negative_prompt
-        offload = memory.text_encoder.device.type == 'cpu'
-        if offload:
-            to_cuda(memory.text_encoder)
+        if model_offload:
+            to_cuda(TE)
-        mult_p, clean_text_p = token_ex.parse_attn_mult(self.prompt)
-        mult_n, clean_text_n = token_ex.parse_attn_mult(self.negative_prompt)
+        mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
+        mult_n, clean_text_n = token_ex.parse_attn_mult(negative_prompt)
         with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
             emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(clean_text_n+clean_text_p)
+            if attention_mask is not None:
+                emb, attention_mask = pad_attn_bias(emb, attention_mask)
             emb_n, emb_p = emb.chunk(2)
         emb_p = te_hook.mult_attn(emb_p, mult_p)
         emb_n = te_hook.mult_attn(emb_n, mult_n)
-        if offload:
-            to_cpu(memory.text_encoder)
+        if model_offload:
+            to_cpu(TE)
-        return {**states, 'prompt':self.prompt, 'negative_prompt':self.negative_prompt, 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
-            'device':device, 'dtype':dtype, 'amp':amp, 'encoder_attention_mask':attention_mask}
+        return {'prompt':list(clean_text_p), 'negative_prompt':list(clean_text_n), 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
+            'encoder_attention_mask':attention_mask, 'pooled_output':pooled_output}

hcpdiff/workflow/utils.py CHANGED Viewed

@@ -1,13 +1,14 @@
-import torch
+from typing import List, Union
-from .base import BasicAction, from_memory_context
-from torch import nn
+import torch
 from PIL import Image
-from typing import List
+from hcpdiff.data.handler import ControlNetHandler
+from rainbowneko.infer import BasicAction
+from torch import nn
 class LatentResizeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, width=1024, height=1024, mode='bicubic', antialias=True):
+    def __init__(self, width=1024, height=1024, mode='bicubic', antialias=True, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.size = (height//8, width//8)
         self.mode = mode
         self.antialias = antialias
@@ -16,18 +17,37 @@ class LatentResizeAction(BasicAction):
         latents_dtype = latents.dtype
         latents = nn.functional.interpolate(latents.to(dtype=torch.float32), size=self.size, mode=self.mode)
         latents = latents.to(dtype=latents_dtype)
-        return {**states, 'latents':latents}
+        return {'latents':latents}
 class ImageResizeAction(BasicAction):
     # resample name to Image.xxx
     mode_map = {'nearest':Image.NEAREST, 'bilinear':Image.BILINEAR, 'bicubic':Image.BICUBIC, 'lanczos':Image.LANCZOS, 'box':Image.BOX,
-        'hamming':Image.HAMMING, 'antialias':Image.ANTIALIAS}
+        'hamming':Image.HAMMING, 'antialias':Image.LANCZOS}
-    @from_memory_context
-    def __init__(self, width=1024, height=1024, mode='bicubic'):
+    def __init__(self, width=1024, height=1024, mode='bicubic', key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
         self.size = (width, height)
         self.mode = self.mode_map[mode]
-    def forward(self, images:List[Image.Image], **states):
+    def forward(self, images: List[Image.Image], **states):
         images = [image.resize(self.size, resample=self.mode) for image in images]
-        return {**states, 'images':images}
+        return {'images':images}
+class FeedtoCNetAction(BasicAction):
+    def __init__(self, width=None, height=None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.size = (width, height)
+        self.cnet_handler = ControlNetHandler()
+    def forward(self, images: Union[List[Image.Image], Image.Image], device='cuda', dtype=None, bs=None, latents=None, **states):
+        if bs is None:
+            if 'prompt' in states:
+                bs = len(states['prompt'])
+        if latents is not None:
+            width, height = latents.shape[3]*8, latents.shape[2]*8
+        else:
+            width, height = self.size
+        images = self.cnet_handler.handle(images).to(device, dtype=dtype).expand(bs*2, 3, width, height)
+        return {'ex_inputs':{'cond':images}}

hcpdiff/workflow/vae.py CHANGED Viewed

@@ -1,33 +1,32 @@
-from .base import BasicAction, from_memory_context
-from diffusers import AutoencoderKL
-from diffusers.image_processor import VaeImageProcessor
-from typing import Dict, Any
 import torch
+from diffusers.image_processor import VaeImageProcessor
 from hcpdiff.utils import to_cuda, to_cpu
 from hcpdiff.utils.net_utils import get_dtype
+from rainbowneko.infer import BasicAction
 class EncodeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, vae: AutoencoderKL, image_processor=None, offload: Dict[str, Any] = None):
-        super().__init__()
-        self.vae = vae
-        self.vae_scale_factor = 2**(len(self.vae.config.block_out_channels)-1)
-        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) if image_processor is None else image_processor
-        self.offload = offload
+    def __init__(self, image_processor=None, key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.image_processor = image_processor
-    def forward(self, images, dtype:str, device, generator, bs=None, **states):
+    def forward(self, vae, images, dtype: str, device, generator, bs=None, model_offload=False, **states):
         if bs is None:
             if 'prompt' in states:
                 bs = len(states['prompt'])
+        vae_scale_factor = 2**(len(vae.config.block_out_channels)-1)
+        if self.image_processor is None:
+            self.image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
         image = self.image_processor.preprocess(images)
-        image = image.to(device=device, dtype=self.vae.dtype)
+        if bs is not None and image.shape[0] != bs:
+            image = image.repeat(bs//image.shape[0], 1, 1, 1)
+        image = image.to(device=device, dtype=vae.dtype)
         if image.shape[1] == 4:
             init_latents = image
         else:
-            if self.offload:
-                to_cuda(self.vae)
+            if model_offload:
+                to_cuda(vae)
             if isinstance(generator, list) and len(generator) != bs:
                 raise ValueError(
                     f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
@@ -36,38 +35,38 @@ class EncodeAction(BasicAction):
             elif isinstance(generator, list):
                 init_latents = [
-                    self.vae.encode(image[i : i + 1]).latent_dist.sample(generator[i]) for i in range(bs)
+                    vae.encode(image[i: i+1]).latent_dist.sample(generator[i]) for i in range(bs)
                 ]
                 init_latents = torch.cat(init_latents, dim=0)
             else:
-                init_latents = self.vae.encode(image).latent_dist.sample(generator)
+                init_latents = vae.encode(image).latent_dist.sample(generator)
-            init_latents = self.vae.config.scaling_factor * init_latents.to(dtype=get_dtype(dtype))
-            if self.offload:
-                to_cpu(self.vae)
-        return {**states, 'latents':init_latents, 'dtype':dtype, 'device':device, 'bs':bs}
+            init_latents = vae.config.scaling_factor*init_latents.to(dtype=get_dtype(dtype))
+            if model_offload:
+                to_cpu(vae)
+        return {'latents':init_latents}
 class DecodeAction(BasicAction):
-    @from_memory_context
-    def __init__(self, vae: AutoencoderKL, image_processor=None, output_type='pil', offload: Dict[str, Any] = None, decode_key='latents'):
-        super().__init__()
-        self.vae = vae
-        self.offload = offload
+    def __init__(self, image_processor=None, output_type='pil', key_map_in=None, key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
-        self.vae_scale_factor = 2**(len(self.vae.config.block_out_channels)-1)
-        self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) if image_processor is None else image_processor
+        self.image_processor = image_processor
         self.output_type = output_type
-        self.decode_key = decode_key
-    def forward(self, **states):
-        latents = states[self.decode_key]
-        if self.offload:
-            to_cuda(self.vae)
-        latents = latents.to(dtype=self.vae.dtype)
-        image = self.vae.decode(latents/self.vae.config.scaling_factor, return_dict=False)[0]
-        if self.offload:
-            to_cpu(self.vae)
+    def forward(self, vae, denoiser, latents, model_offload=False, **states):
+        vae_scale_factor = 2**(len(vae.config.block_out_channels)-1)
+        if self.image_processor is None:
+            self.image_processor = VaeImageProcessor(vae_scale_factor=vae_scale_factor)
+        if model_offload:
+            to_cpu(denoiser)
+            torch.cuda.synchronize()
+            to_cuda(vae)
+        latents = latents.to(dtype=vae.dtype)
+        image = vae.decode(latents/vae.config.scaling_factor, return_dict=False)[0]
+        if model_offload:
+            to_cpu(vae)
         do_denormalize = [True]*image.shape[0]
         image = self.image_processor.postprocess(image, output_type=self.output_type, do_denormalize=do_denormalize)
-        return {**states, 'images':image}
+        return {'images':image}

hcpdiff 0.9.1__py3-none-any.whl → 2.2__py3-none-any.whl

hcpdiff 0.9.1py3-none-any.whl → 2.2py3-none-any.whl