PyPI - hcpdiff - Versions diffs - 0.9.1__py3-none-any.whl → 2.2__py3-none-any.whl - Mend

hcpdiff 0.9.1py3-none-any.whl → 2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (211) hide show

hcpdiff/__init__.py +4 -4
hcpdiff/ckpt_manager/__init__.py +4 -5
hcpdiff/ckpt_manager/ckpt.py +24 -0
hcpdiff/ckpt_manager/format/__init__.py +4 -0
hcpdiff/ckpt_manager/format/diffusers.py +59 -0
hcpdiff/ckpt_manager/format/emb.py +21 -0
hcpdiff/ckpt_manager/format/lora_webui.py +252 -0
hcpdiff/ckpt_manager/format/sd_single.py +41 -0
hcpdiff/ckpt_manager/loader.py +64 -0
hcpdiff/data/__init__.py +4 -28
hcpdiff/data/cache/__init__.py +1 -0
hcpdiff/data/cache/vae.py +102 -0
hcpdiff/data/dataset.py +20 -0
hcpdiff/data/handler/__init__.py +3 -0
hcpdiff/data/handler/controlnet.py +18 -0
hcpdiff/data/handler/diffusion.py +90 -0
hcpdiff/data/handler/text.py +111 -0
hcpdiff/data/source/__init__.py +3 -3
hcpdiff/data/source/folder_class.py +12 -29
hcpdiff/data/source/text.py +40 -0
hcpdiff/data/source/text2img.py +36 -74
hcpdiff/data/source/text2img_cond.py +9 -15
hcpdiff/diffusion/__init__.py +0 -0
hcpdiff/diffusion/noise/__init__.py +2 -0
hcpdiff/diffusion/noise/pyramid_noise.py +42 -0
hcpdiff/diffusion/noise/zero_terminal.py +39 -0
hcpdiff/diffusion/sampler/__init__.py +5 -0
hcpdiff/diffusion/sampler/base.py +72 -0
hcpdiff/diffusion/sampler/ddpm.py +20 -0
hcpdiff/diffusion/sampler/diffusers.py +66 -0
hcpdiff/diffusion/sampler/edm.py +22 -0
hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py +3 -0
hcpdiff/diffusion/sampler/sigma_scheduler/base.py +14 -0
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +197 -0
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +48 -0
hcpdiff/easy/__init__.py +2 -0
hcpdiff/easy/cfg/__init__.py +3 -0
hcpdiff/easy/cfg/sd15_train.py +207 -0
hcpdiff/easy/cfg/sdxl_train.py +147 -0
hcpdiff/easy/cfg/t2i.py +228 -0
hcpdiff/easy/model/__init__.py +2 -0
hcpdiff/easy/model/cnet.py +31 -0
hcpdiff/easy/model/loader.py +79 -0
hcpdiff/easy/sampler.py +46 -0
hcpdiff/evaluate/__init__.py +1 -0
hcpdiff/evaluate/previewer.py +60 -0
hcpdiff/loss/__init__.py +4 -1
hcpdiff/loss/base.py +41 -0
hcpdiff/loss/gw.py +35 -0
hcpdiff/loss/ssim.py +37 -0
hcpdiff/loss/vlb.py +79 -0
hcpdiff/loss/weighting.py +66 -0
hcpdiff/models/__init__.py +2 -2
hcpdiff/models/cfg_context.py +17 -14
hcpdiff/models/compose/compose_hook.py +44 -23
hcpdiff/models/compose/compose_tokenizer.py +21 -8
hcpdiff/models/compose/sdxl_composer.py +4 -4
hcpdiff/models/controlnet.py +16 -16
hcpdiff/models/lora_base_patch.py +14 -25
hcpdiff/models/lora_layers.py +3 -9
hcpdiff/models/lora_layers_patch.py +14 -24
hcpdiff/models/text_emb_ex.py +84 -6
hcpdiff/models/textencoder_ex.py +54 -18
hcpdiff/models/wrapper/__init__.py +3 -0
hcpdiff/models/wrapper/pixart.py +19 -0
hcpdiff/models/wrapper/sd.py +218 -0
hcpdiff/models/wrapper/utils.py +20 -0
hcpdiff/parser/__init__.py +1 -0
hcpdiff/parser/embpt.py +32 -0
hcpdiff/tools/convert_caption_txt2json.py +1 -1
hcpdiff/tools/dataset_generator.py +94 -0
hcpdiff/tools/download_hf_model.py +24 -0
hcpdiff/tools/init_proj.py +3 -21
hcpdiff/tools/lora_convert.py +18 -17
hcpdiff/tools/save_model.py +12 -0
hcpdiff/tools/sd2diffusers.py +1 -1
hcpdiff/train_colo.py +1 -1
hcpdiff/train_deepspeed.py +1 -1
hcpdiff/trainer_ac.py +79 -0
hcpdiff/trainer_ac_single.py +31 -0
hcpdiff/utils/__init__.py +0 -2
hcpdiff/utils/inpaint_pipe.py +7 -2
hcpdiff/utils/net_utils.py +29 -6
hcpdiff/utils/pipe_hook.py +24 -7
hcpdiff/utils/utils.py +21 -4
hcpdiff/workflow/__init__.py +15 -10
hcpdiff/workflow/daam/__init__.py +1 -0
hcpdiff/workflow/daam/act.py +66 -0
hcpdiff/workflow/daam/hook.py +109 -0
hcpdiff/workflow/diffusion.py +118 -128
hcpdiff/workflow/fast.py +31 -0
hcpdiff/workflow/flow.py +67 -0
hcpdiff/workflow/io.py +36 -130
hcpdiff/workflow/model.py +46 -43
hcpdiff/workflow/text.py +60 -47
hcpdiff/workflow/utils.py +32 -12
hcpdiff/workflow/vae.py +37 -38
hcpdiff-2.2.dist-info/METADATA +299 -0
hcpdiff-2.2.dist-info/RECORD +115 -0
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info}/WHEEL +1 -1
hcpdiff-2.2.dist-info/entry_points.txt +5 -0
hcpdiff/ckpt_manager/base.py +0 -16
hcpdiff/ckpt_manager/ckpt_diffusers.py +0 -45
hcpdiff/ckpt_manager/ckpt_pkl.py +0 -138
hcpdiff/ckpt_manager/ckpt_safetensor.py +0 -64
hcpdiff/ckpt_manager/ckpt_webui.py +0 -54
hcpdiff/data/bucket.py +0 -358
hcpdiff/data/caption_loader.py +0 -80
hcpdiff/data/cond_dataset.py +0 -40
hcpdiff/data/crop_info_dataset.py +0 -40
hcpdiff/data/data_processor.py +0 -33
hcpdiff/data/pair_dataset.py +0 -146
hcpdiff/data/sampler.py +0 -54
hcpdiff/data/source/base.py +0 -30
hcpdiff/data/utils.py +0 -80
hcpdiff/deprecated/__init__.py +0 -1
hcpdiff/deprecated/cfg_converter.py +0 -81
hcpdiff/deprecated/lora_convert.py +0 -31
hcpdiff/infer_workflow.py +0 -57
hcpdiff/loggers/__init__.py +0 -13
hcpdiff/loggers/base_logger.py +0 -76
hcpdiff/loggers/cli_logger.py +0 -40
hcpdiff/loggers/preview/__init__.py +0 -1
hcpdiff/loggers/preview/image_previewer.py +0 -149
hcpdiff/loggers/tensorboard_logger.py +0 -30
hcpdiff/loggers/wandb_logger.py +0 -31
hcpdiff/loggers/webui_logger.py +0 -9
hcpdiff/loss/min_snr_loss.py +0 -52
hcpdiff/models/layers.py +0 -81
hcpdiff/models/plugin.py +0 -348
hcpdiff/models/wrapper.py +0 -75
hcpdiff/noise/__init__.py +0 -3
hcpdiff/noise/noise_base.py +0 -16
hcpdiff/noise/pyramid_noise.py +0 -50
hcpdiff/noise/zero_terminal.py +0 -44
hcpdiff/train_ac.py +0 -566
hcpdiff/train_ac_single.py +0 -39
hcpdiff/utils/caption_tools.py +0 -105
hcpdiff/utils/cfg_net_tools.py +0 -321
hcpdiff/utils/cfg_resolvers.py +0 -16
hcpdiff/utils/ema.py +0 -52
hcpdiff/utils/img_size_tool.py +0 -248
hcpdiff/vis/__init__.py +0 -3
hcpdiff/vis/base_interface.py +0 -12
hcpdiff/vis/disk_interface.py +0 -48
hcpdiff/vis/webui_interface.py +0 -17
hcpdiff/viser_fast.py +0 -138
hcpdiff/visualizer.py +0 -265
hcpdiff/visualizer_reloadable.py +0 -237
hcpdiff/workflow/base.py +0 -59
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime.yaml +0 -21
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/anime/text2img_anime_lora.yaml +0 -58
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/change_vae.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/euler_a.yaml +0 -8
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img.yaml +0 -10
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/img2img_controlnet.yaml +0 -19
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/inpaint.yaml +0 -11
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_lora.yaml +0 -26
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/load_unet_part.yaml +0 -18
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/offload_2GB.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/save_model.yaml +0 -44
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_DA++.yaml +0 -34
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/infer/text2img_sdxl.yaml +0 -9
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/plugins/plugin_controlnet.yaml +0 -17
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/te_struct.txt +0 -193
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/base_dataset.yaml +0 -29
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/dataset/regularization_dataset.yaml +0 -31
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/CustomDiffusion.yaml +0 -74
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist++.yaml +0 -135
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamArtist.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/DreamBooth.yaml +0 -62
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/FT_sdxl.yaml +0 -33
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/Lion_optimizer.yaml +0 -17
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/TextualInversion.yaml +0 -41
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/add_logger_tensorboard_wandb.yaml +0 -15
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/controlnet.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/ema.yaml +0 -10
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/fine-tuning.yaml +0 -53
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/locon.yaml +0 -24
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_anime_character.yaml +0 -77
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_conventional.yaml +0 -56
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/lora_sdxl.yaml +0 -41
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/min_snr.yaml +0 -7
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples/preview_in_training.yaml +0 -6
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/DreamBooth.yaml +0 -70
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/TextualInversion.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/fine-tuning.yaml +0 -45
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/examples_noob/lora.yaml +0 -63
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/train_base.yaml +0 -81
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/train/tuning_base.yaml +0 -42
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/unet_struct.txt +0 -932
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_latent.yaml +0 -86
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/highres_fix_pixel.yaml +0 -99
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img.yaml +0 -59
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/workflow/text2img_lora.yaml +0 -70
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero2.json +0 -32
hcpdiff-0.9.1.data/data/hcpdiff/cfgs/zero3.json +0 -39
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_2pt_caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/name_caption.txt +0 -1
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object.txt +0 -27
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/object_caption.txt +0 -27
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style.txt +0 -19
hcpdiff-0.9.1.data/data/hcpdiff/prompt_tuning_template/style_caption.txt +0 -19
hcpdiff-0.9.1.dist-info/METADATA +0 -199
hcpdiff-0.9.1.dist-info/RECORD +0 -160
hcpdiff-0.9.1.dist-info/entry_points.txt +0 -2
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info/licenses}/LICENSE +0 -0
{hcpdiff-0.9.1.dist-info → hcpdiff-2.2.dist-info}/top_level.txt +0 -0

hcpdiff/data/handler/controlnet.py ADDED Viewed

@@ -0,0 +1,18 @@
+import torchvision.transforms as T
+from PIL import Image
+from rainbowneko.data import DataHandler, HandlerChain, LoadImageHandler, ImageHandler
+class ControlNetHandler(DataHandler):
+    def __init__(self, key_map_in=('cond -> image',), key_map_out=('image -> cond',), bucket=None):
+        super().__init__(key_map_in, key_map_out)
+        self.handlers = HandlerChain(
+            load=LoadImageHandler(),
+            bucket=bucket.handler if bucket else DataHandler(),
+            image=ImageHandler(
+                transform=T.ToTensor(),
+            )
+        )
+    def handle(self, image:Image.Image):
+        return self.handlers(dict(image=image))

hcpdiff/data/handler/diffusion.py ADDED Viewed

@@ -0,0 +1,90 @@
+from typing import Union, Dict, Any
+import numpy as np
+import torch
+import torchvision.transforms as T
+from PIL import Image
+from rainbowneko.data import DataHandler, HandlerChain, LoadImageHandler, ImageHandler
+from .text import TemplateFillHandler, TagDropoutHandler, TagEraseHandler, TagShuffleHandler, TokenizeHandler
+class LossMapHandler(DataHandler):
+    def __init__(self, bucket, vae_scale=8, key_map_in=('loss_map -> image', 'image_size -> image_size'),
+                 key_map_out=('image -> loss_map', 'coord -> coord')):
+        super().__init__(key_map_in, key_map_out)
+        self.vae_scale = vae_scale
+        self.handlers = HandlerChain(
+            load=LoadImageHandler(mode='L'),
+            bucket=bucket.handler,
+            image=ImageHandler(transform=T.Compose([
+                lambda x:x.resize((x.size[0]//self.vae_scale, x.size[1]//self.vae_scale), Image.BILINEAR),
+                T.ToTensor()
+            ]), )
+        )
+    def handle(self, image: Union[Image.Image, str], image_size: np.ndarray[int]):
+        data = self.handlers(dict(image=image, image_size=image_size))
+        image = data['image']
+        image[image<=0.5] *= 2
+        image[image>0.5] = (image[image>0.5]-0.5)*4+1
+        return self.handlers(dict(**data, image=image))
+class DiffusionImageHandler(DataHandler):
+    def __init__(self, bucket, key_map_in=('image -> image', 'image_size -> image_size'), key_map_out=('image -> image', 'coord -> coord')):
+        super().__init__(key_map_in, key_map_out)
+        self.handlers = HandlerChain(
+            load=LoadImageHandler(),
+            bucket=bucket.handler,
+            image=ImageHandler(transform=T.Compose([
+                T.ToTensor(),
+                T.Normalize([0.5], [0.5])
+            ]), )
+        )
+    def handle(self, image: Image.Image, image_size: np.ndarray[int]):
+        if isinstance(image, torch.Tensor):  # cached latents
+            return dict(image=image, image_size=image_size)
+        else:
+            return self.handlers(dict(image=image, image_size=image_size))
+class DiffusionTextHandler(DataHandler):
+    def __init__(self, encoder_attention_mask=False, erase=0.0, dropout=0.0, shuffle=0.0, word_names={}, tokenize=True,
+                 key_map_in=('prompt -> prompt', ), key_map_out=('prompt -> prompt', )):
+        super().__init__(key_map_in, key_map_out)
+        text_handlers = {}
+        if dropout>0:
+            text_handlers['dropout'] = TagDropoutHandler(p=dropout)
+        if erase>0:
+            text_handlers['erase'] = TagEraseHandler(p=erase)
+        if shuffle>0:
+            text_handlers['shuffle'] = TagShuffleHandler()
+        text_handlers['fill'] = TemplateFillHandler(word_names)
+        if tokenize:
+            text_handlers['tokenize'] = TokenizeHandler(encoder_attention_mask)
+        self.handlers = HandlerChain(**text_handlers)
+    def handle(self, prompt: Union[str, Dict[str, str]]):
+        return self.handlers(dict(prompt=prompt))
+class StableDiffusionHandler(DataHandler):
+    def __init__(self, bucket, encoder_attention_mask=False, key_map_in=('image -> image', 'image_size -> image_size', 'prompt -> prompt'),
+                 key_map_out=('image -> image', 'coord -> coord', 'prompt -> prompt'),
+                 erase=0.0, dropout=0.0, shuffle=0.0, word_names={}, tokenize=True):
+        super().__init__(key_map_in, key_map_out)
+        self.image_handlers = DiffusionImageHandler(bucket)
+        self.text_handlers = DiffusionTextHandler(encoder_attention_mask=encoder_attention_mask, erase=erase, dropout=dropout, shuffle=shuffle,
+                                                  word_names=word_names, tokenize=tokenize)
+    def handle(self, image: Image.Image, image_size: np.ndarray[int], prompt: str):
+        return dict(**self.image_handlers(dict(image=image, image_size=image_size)), **self.text_handlers(dict(prompt=prompt)))
+    def __call__(self, data) -> Dict[str, Any]:
+        data_proc = self.handle(**self.key_mapper_in.map_data(data)[1])
+        out_data = self.key_mapper_out.map_data(data_proc)[1]
+        data = dict(**data)
+        data.update(out_data)
+        return data

hcpdiff/data/handler/text.py ADDED Viewed

@@ -0,0 +1,111 @@
+import random
+from typing import Dict, Union, List
+import numpy as np
+from string import Formatter
+from rainbowneko.data import DataHandler
+from rainbowneko._share import register_model_callback
+class TagShuffleHandler(DataHandler):
+    def __init__(self, key_map_in=('prompt -> prompt',), key_map_out=('prompt -> prompt',)):
+        super().__init__(key_map_in, key_map_out)
+    def handle(self, prompt: Union[Dict[str, str], str]):
+        if isinstance(prompt, str):
+            tags = prompt.split(',')
+            random.shuffle(tags)
+            prompt = ','.join(tags)
+        else:
+            tags = prompt['caption'].split(',')
+            random.shuffle(tags)
+            prompt['caption'] = ','.join(tags)
+        return {'prompt':prompt}
+    def __repr__(self):
+        return 'TagShuffleHandler()'
+class TagDropoutHandler(DataHandler):
+    def __init__(self, p=0.1, key_map_in=('prompt -> prompt',), key_map_out=('prompt -> prompt',)):
+        super().__init__(key_map_in, key_map_out)
+        self.p = p
+    def handle(self, prompt: Union[Dict[str, str], str]):
+        if isinstance(prompt, str):
+            tags = np.array(prompt.split(','))
+            prompt = ','.join(tags[np.random.random(len(tags))>self.p])
+        else:
+            tags = prompt['caption'].split(',')
+            prompt['caption'] = ','.join(tags[np.random.random(len(tags))>self.p])
+        return {'prompt':prompt}
+    def __repr__(self):
+        return f'TagDropoutHandler(p={self.p})'
+class TagEraseHandler(DataHandler):
+    def __init__(self, p=0.1, key_map_in=('prompt -> prompt',), key_map_out=('prompt -> prompt',)):
+        super().__init__(key_map_in, key_map_out)
+        self.p = p
+    def handle(self, prompt):
+        if isinstance(prompt, str):
+            if random.random()<self.p:
+                prompt = ''
+        else:
+            if random.random()<self.p:
+                prompt['caption'] = ''
+        return {'prompt':prompt}
+    def __repr__(self):
+        return f'TagEraseHandler(p={self.p})'
+class TemplateFillHandler(DataHandler):
+    def __init__(self, word_names: Dict[str, str], key_map_in=('prompt -> prompt',), key_map_out=('prompt -> prompt',)):
+        super().__init__(key_map_in, key_map_out)
+        self.word_names = word_names
+    def handle(self, prompt):
+        template, caption = prompt['template'], prompt['caption']
+        keys_need = {i[1] for i in Formatter().parse(template) if i[1] is not None}
+        fill_dict = {k: v for k, v in self.word_names.items() if k in keys_need}
+        if (caption is not None) and ('caption' in keys_need):
+            fill_dict.update(caption=fill_dict.get('caption', None) or caption)
+        # skip keys that not provide
+        for k in keys_need:
+            if k not in fill_dict:
+                fill_dict[k] = ''
+        # replace None value with ''
+        fill_dict = {k:(v or '') for k, v in fill_dict.items()}
+        return {'prompt':template.format(**fill_dict)}
+    def __repr__(self):
+        return f'TemplateFill(\nword_names={self.word_names}\n)'
+class TokenizeHandler(DataHandler):
+    def __init__(self, encoder_attention_mask=False, key_map_in=('prompt -> prompt',), key_map_out=None):
+        super().__init__(key_map_in, key_map_out)
+        self.encoder_attention_mask = encoder_attention_mask
+        register_model_callback(self.acquire_tokenizer)
+    def acquire_tokenizer(self, model_wrapper):
+        self.tokenizer = model_wrapper.tokenizer
+    def handle(self, prompt):
+        token_info = self.tokenizer(prompt, truncation=True, padding="max_length", return_tensors="pt",
+                                max_length=self.tokenizer.model_max_length*self.tokenizer.N_repeats)
+        tokens = token_info.input_ids.squeeze()
+        data = {'prompt':tokens}
+        if self.encoder_attention_mask and 'attention_mask' in token_info:
+            data['attn_mask'] = token_info.attention_mask.squeeze()
+        if 'position_ids' in token_info:
+            data['position_ids'] = token_info.position_ids.squeeze()
+        return data
+    def __repr__(self):
+        return f'TokenizeHandler(\nencoder_attention_mask={self.encoder_attention_mask}, tokenizer={self.tokenizer}\n)'

hcpdiff/data/source/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .base import DataSource, ComposeDataSource
-from .text2img import Text2ImageSource, Text2ImageAttMapSource
+from .text2img import Text2ImageSource, Text2ImageLossMapSource
 from .text2img_cond import Text2ImageCondSource
-from .folder_class import T2IFolderClassSource
+from .folder_class import T2IFolderClassSource
+from .text import TextSource

hcpdiff/data/source/folder_class.py CHANGED Viewed

@@ -1,40 +1,23 @@
-import os
-from typing import List, Tuple, Union
-from hcpdiff.utils.utils import get_file_name, get_file_ext
-from hcpdiff.utils.img_size_tool import types_support
-from .text2img import Text2ImageAttMapSource
-from hcpdiff.data.caption_loader import BaseCaptionLoader, auto_caption_loader
 from copy import copy
+from typing import Union
-class T2IFolderClassSource(Text2ImageAttMapSource):
+from rainbowneko.data.label_loader import BaseLabelLoader, auto_label_loader
-    def get_image_list(self) -> List[Tuple[str, "T2IFolderClassSource"]]:
-        sub_folders = [os.path.join(self.img_root, x) for x in os.listdir(self.img_root)]
-        class_imgs = []
-        for class_folder in sub_folders:
-            class_name = os.path.basename(class_folder)
-            imgs = [(os.path.join(class_folder, x), self) for x in os.listdir(class_folder) if get_file_ext(x) in types_support]
-            class_imgs.extend(imgs*self.repeat[class_name])
-        return class_imgs
+from .text2img import Text2ImageLossMapSource
-    def load_captions(self, caption_file: Union[str, BaseCaptionLoader]):
-        if caption_file is None:
+class T2IFolderClassSource(Text2ImageLossMapSource):
+    def _load_label_data(self, label_file: Union[str, BaseLabelLoader]):
+        ''' {class_name/image.ext: label} '''
+        if label_file is None:
             return {}
-        elif isinstance(caption_file, str):
+        elif isinstance(label_file, str):
             captions = {}
-            caption_loader = auto_caption_loader(caption_file)
-            for class_name in os.listdir(caption_loader.path):
-                class_folder = os.path.join(caption_loader.path, class_name)
+            caption_loader = auto_label_loader(label_file)
+            for class_folder in caption_loader.path.iterdir():
                 caption_loader_class = copy(caption_loader)
                 caption_loader_class.path = class_folder
-                captions_class = {f'{class_name}/{name}':caption for name, caption in caption_loader_class.load().item()}
+                captions_class = {f'{class_folder.name}/{name}':caption for name, caption in caption_loader_class.load().item()}
                 captions.update(captions_class)
             return captions
         else:
-            return caption_file.load()
-    def get_image_name(self, path: str) -> str:
-        img_root, img_name = os.path.split(path)
-        img_name = img_name.rsplit('.')[0]
-        img_root, class_name = os.path.split(img_root)
-        return f'{class_name}/{img_name}'
+            return label_file.load()

hcpdiff/data/source/text.py ADDED Viewed

@@ -0,0 +1,40 @@
+from rainbowneko.data import UnLabelSource, DataSource
+from rainbowneko.data.label_loader import BaseLabelLoader, auto_label_loader
+from typing import Union, Dict, Any
+import random
+class TextSource(DataSource):
+    def __init__(self, label_file, prompt_template=None, repeat=1, **kwargs):
+        super().__init__(repeat=repeat)
+        self.label_file = label_file
+        self.label_dict = self._load_label_data(label_file)
+        self.img_ids = self._load_img_ids(self.label_dict)
+        self.prompt_template = self.load_template(prompt_template)
+    def _load_img_ids(self, label_dict):
+        return list(label_dict.keys()) * self.repeat
+    def _load_label_data(self, label_file: Union[str, BaseLabelLoader]):
+        if label_file is None:
+            return {}
+        elif isinstance(label_file, str):
+            return auto_label_loader(label_file).load()
+        else:
+            return label_file.load()
+    def load_template(self, template_file):
+        if template_file is None:
+            return ['{caption}']
+        else:
+            with open(template_file, 'r', encoding='utf-8') as f:
+                return f.read().strip().split('\n')
+    def __getitem__(self, index) -> Dict[str, Any]:
+        img_name = self.img_ids[index]
+        return {
+            'id':img_name,
+            'prompt':{
+                'template':random.choice(self.prompt_template),
+                'caption':self.label_dict[img_name],
+            }
+        }

hcpdiff/data/source/text2img.py CHANGED Viewed

@@ -1,13 +1,11 @@
-from .base import DataSource
-from hcpdiff.data.caption_loader import BaseCaptionLoader, auto_caption_loader
-from typing import Union, Any
 import os
-from hcpdiff.utils.utils import get_file_name, get_file_ext
-from hcpdiff.utils.img_size_tool import types_support
-from typing import Dict, List, Tuple
-from PIL import Image
-import numpy as np
 import random
+from pathlib import Path
+from typing import Any
+from typing import Dict
+from rainbowneko.data import ImageLabelSource
+from rainbowneko.utils.utils import is_image_file
 from torchvision.transforms import transforms
 default_image_transforms = transforms.Compose([
@@ -15,77 +13,41 @@ default_image_transforms = transforms.Compose([
     transforms.Normalize([0.5], [0.5])
 ])
-class Text2ImageSource(DataSource):
-    def __init__(self, img_root, caption_file, prompt_template, text_transforms, image_transforms=default_image_transforms,
-                 bg_color=(255,255,255), repeat=1, **kwargs):
-        super(Text2ImageSource, self).__init__(img_root, repeat=repeat)
+class Text2ImageSource(ImageLabelSource):
+    def __init__(self, img_root, label_file, prompt_template, repeat=1, **kwargs):
+        super().__init__(img_root, label_file, repeat=repeat)
-        self.caption_dict = self.load_captions(caption_file)
         self.prompt_template = self.load_template(prompt_template)
-        self.image_transforms = image_transforms
-        self.text_transforms = text_transforms
-        self.bg_color = tuple(bg_color)
-    def load_captions(self, caption_file: Union[str, BaseCaptionLoader]):
-        if caption_file is None:
-            return {}
-        elif isinstance(caption_file, str):
-            return auto_caption_loader(caption_file).load()
-        else:
-            return caption_file.load()
     def load_template(self, template_file):
         with open(template_file, 'r', encoding='utf-8') as f:
             return f.read().strip().split('\n')
-    def get_image_list(self) -> List[Tuple[str, DataSource]]:
-        imgs = [(os.path.join(self.img_root, x), self) for x in os.listdir(self.img_root) if get_file_ext(x) in types_support]
-        return imgs*self.repeat
-    def procees_image(self, image):
-        return self.image_transforms(image)
-    def process_text(self, text_dict):
-        return self.text_transforms(text_dict)
-    def load_image(self, path) -> Dict[str, Any]:
-        image = Image.open(path)
-        if image.mode == 'RGBA':
-            x, y = image.size
-            canvas = Image.new('RGBA', image.size, self.bg_color)
-            canvas.paste(image, (0, 0, x, y), image)
-            image = canvas
-        return {'image': image.convert("RGB")}
-    def load_caption(self, img_name) -> str:
-        caption_ist = self.caption_dict.get(img_name, None)
-        prompt_template = random.choice(self.prompt_template)
-        prompt_ist = self.process_text({'prompt':prompt_template, 'caption':caption_ist})['prompt']
-        return prompt_ist
-class Text2ImageAttMapSource(Text2ImageSource):
-    def __init__(self, img_root, caption_file, prompt_template, text_transforms, image_transforms=default_image_transforms, att_mask=None,
-                 bg_color=(255, 255, 255), repeat=1, **kwargs):
-        super().__init__(img_root, caption_file, prompt_template, image_transforms=image_transforms, text_transforms=text_transforms,
-                         bg_color=bg_color, repeat=repeat)
-        if att_mask is None:
-            self.att_mask = {}
+    def __getitem__(self, index) -> Dict[str, Any]:
+        img_name = self.img_ids[index]
+        path = self.img_root / img_name
+        return {
+            'id':img_name,
+            'image':path,
+            'prompt':{
+                'template':random.choice(self.prompt_template),
+                'caption':self.label_dict.get(img_name, None),
+            }
+        }
+class Text2ImageLossMapSource(Text2ImageSource):
+    def __init__(self, img_root, caption_file, prompt_template, loss_map=None, repeat=1, **kwargs):
+        super().__init__(img_root, caption_file, prompt_template, repeat=repeat)
+        if loss_map is None:
+            self.loss_map = {}
         else:
-            self.att_mask = {get_file_name(file):os.path.join(att_mask, file)
-                for file in os.listdir(att_mask) if get_file_ext(file) in types_support}
-    def get_att_mask(self, img_name):
-        if img_name not in self.att_mask:
-            return None
-        att_mask = Image.open(self.att_mask[img_name]).convert("L")
-        np_mask = np.array(att_mask).astype(float)
-        np_mask[np_mask<=127+0.1] = (np_mask[np_mask<=127+0.1]/127.)
-        np_mask[np_mask>127] = ((np_mask[np_mask>127]-127)/128.)*4+1
-        return np_mask
-    def load_image(self, path) -> Dict[str, Any]:
-        img_root, img_name = os.path.split(path)
-        image_dict = super().load_image(path)
-        image_dict['att_mask'] = self.get_att_mask(get_file_name(img_name))
-        return image_dict
+            loss_map = Path(loss_map)
+            self.loss_map = {file.stem:loss_map/file for file in loss_map.iterdir() if is_image_file(file)}
+    def __getitem__(self, index) -> Dict[str, Any]:
+        data = super().__getitem__(index)
+        img_name = self.img_ids[index]
+        data['loss_map'] = self.loss_map[Path(img_name).stem]
+        return data

hcpdiff/data/source/text2img_cond.py CHANGED Viewed

@@ -1,22 +1,16 @@
 import os
 from typing import Dict, Any
-from PIL import Image
-from torchvision import transforms
+from .text2img import Text2ImageSource
-from .text2img import Text2ImageAttMapSource, default_image_transforms
-class Text2ImageCondSource(Text2ImageAttMapSource):
-    def __init__(self, img_root, caption_file, prompt_template, text_transforms, image_transforms=default_image_transforms,
-                bg_color=(255, 255, 255), repeat=1, cond_dir=None, **kwargs):
-        super().__init__(img_root, caption_file, prompt_template, image_transforms=image_transforms, text_transforms=text_transforms,
-                         bg_color=bg_color, repeat=repeat)
-        self.cond_transform = transforms.ToTensor()
+class Text2ImageCondSource(Text2ImageSource):
+    def __init__(self, img_root, caption_file, prompt_template, repeat=1, cond_dir=None, **kwargs):
+        super().__init__(img_root, caption_file, prompt_template, repeat=repeat)
         self.cond_dir = cond_dir
-    def load_image(self, path) -> Dict[str, Any]:
-        img_root, img_name = os.path.split(path)
-        image_dict = super().load_image(path)
+    def __getitem__(self, index) -> Dict[str, Any]:
+        data = super().__getitem__(index)
+        img_name = self.img_ids[index]
         cond_path = os.path.join(self.cond_dir, img_name)
-        image_dict['cond'] = Image.open(cond_path).convert("RGB")
-        return image_dict
+        data['cond'] = cond_path
+        return data

hcpdiff/diffusion/__init__.py ADDED Viewed

File without changes

hcpdiff/diffusion/noise/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .pyramid_noise import PyramidNoiseSampler
2	+ from .zero_terminal import ZeroTerminalSampler

hcpdiff/diffusion/noise/pyramid_noise.py ADDED Viewed

@@ -0,0 +1,42 @@
+import random
+import torch
+from torch.nn import functional as F
+from hcpdiff.diffusion.sampler import BaseSampler
+class PyramidNoiseSampler:
+    def __init__(self, level: int = 6, discount: float = 0.4, step_size: float = 2., resize_mode: str = 'bilinear'):
+        self.level = level
+        self.step_size = step_size
+        self.resize_mode = resize_mode
+        self.discount = discount
+    def make_nosie(self, shape, device='cuda', dtype=torch.float32):
+        noise = torch.randn(shape, device=device, dtype=dtype)
+        with torch.no_grad():
+            b, c, h, w = noise.shape
+            for i in range(1, self.level):
+                r = random.random()*2+self.step_size
+                wn, hn = max(1, int(w/(r**i))), max(1, int(h/(r**i)))
+                noise += F.interpolate(torch.randn(b, c, hn, wn).to(noise), (h, w), None, self.resize_mode)*(self.discount**i)
+                if wn == 1 or hn == 1:
+                    break
+            noise = noise/noise.std()
+        return noise
+    @classmethod
+    def patch(cls, base_sampler: BaseSampler, level: int = 6, discount: float = 0.4, step_size: float = 2., resize_mode: str = 'bilinear'):
+        patcher = cls(level, discount, step_size, resize_mode)
+        base_sampler.make_nosie = patcher.make_nosie
+        return base_sampler
+if __name__ == '__main__':
+    from hcpdiff.diffusion.sampler import EDM_DDPMSampler, DDPMContinuousSigmaScheduler
+    from matplotlib import pyplot as plt
+    sampler = PyramidNoiseSampler.patch(EDM_DDPMSampler(DDPMContinuousSigmaScheduler()))
+    noise = sampler.make_nosie((1,3,512,512), device='cpu')
+    plt.figure()
+    plt.imshow(noise[0].permute(1,2,0))
+    plt.show()

hcpdiff/diffusion/noise/zero_terminal.py ADDED Viewed

@@ -0,0 +1,39 @@
+import torch
+from ..sampler.sigma_scheduler import DDPMDiscreteSigmaScheduler
+class ZeroTerminalSampler:
+    @classmethod
+    def patch(cls, base_sampler):
+        assert isinstance(base_sampler.sigma_scheduler, DDPMDiscreteSigmaScheduler), "ZeroTerminalScheduler only works with DDPM SigmaScheduler"
+        alphas_cumprod = base_sampler.sigma_scheduler.alphas_cumprod
+        base_sampler.sigma_scheduler.alphas_cumprod = cls.rescale_zero_terminal_snr(alphas_cumprod)
+        base_sampler.sigma_scheduler.sigmas = ((1-alphas_cumprod)/alphas_cumprod).sqrt()
+    @staticmethod
+    def rescale_zero_terminal_snr(alphas_cumprod, thr=1e-4):
+        """
+        Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
+        Args:
+            alphas_cumprod (`torch.FloatTensor`)
+        Returns:
+            `torch.FloatTensor`: rescaled betas with zero terminal SNR
+        """
+        alphas_bar_sqrt = alphas_cumprod.sqrt()
+        # Store old values.
+        alphas_bar_sqrt_0 = alphas_bar_sqrt[0].clone()
+        alphas_bar_sqrt_T = alphas_bar_sqrt[-1].clone()
+        # Shift so the last timestep is zero.
+        alphas_bar_sqrt -= alphas_bar_sqrt_T
+        # Scale so the first timestep is back to the old value.
+        alphas_bar_sqrt *= alphas_bar_sqrt_0/(alphas_bar_sqrt_0-alphas_bar_sqrt_T)
+        alphas_bar_sqrt[-1] = thr # avoid nan sigma
+        # Convert alphas_bar_sqrt to betas
+        alphas_bar = alphas_bar_sqrt**2  # Revert sqrt
+        return alphas_bar

hcpdiff/diffusion/sampler/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .sigma_scheduler import *
+from .base import BaseSampler
+from .ddpm import DDPMSampler
+from .edm import EDMSampler
+from .diffusers import DiffusersSampler

hcpdiff/diffusion/sampler/base.py ADDED Viewed

@@ -0,0 +1,72 @@
+from typing import Tuple
+import torch
+from .sigma_scheduler import SigmaScheduler
+from diffusers import DDPMScheduler
+class BaseSampler:
+    def __init__(self, sigma_scheduler: SigmaScheduler, generator: torch.Generator = None):
+        self.sigma_scheduler = sigma_scheduler
+        self.generator = generator
+    def c_in(self, sigma):
+        return 1
+    def c_out(self, sigma):
+        return 1
+    def c_skip(self, sigma):
+        return 1
+    @property
+    def num_timesteps(self):
+        return getattr(self.sigma_scheduler, 'num_timesteps', 1000.)
+    def get_timesteps(self, N_steps, device='cuda'):
+        return torch.linspace(0, self.num_timesteps, N_steps, device=device)
+    def make_nosie(self, shape, device='cuda', dtype=torch.float32):
+        return torch.randn(shape, generator=self.generator, device=device, dtype=dtype)
+    def init_noise(self, shape, device='cuda', dtype=torch.float32):
+        sigma = self.sigma_scheduler.sigma_max
+        return self.make_nosie(shape, device, dtype)*sigma
+    def add_noise(self, x, sigma) -> Tuple[torch.Tensor, torch.Tensor]:
+        noise = self.make_nosie(x.shape, device=x.device)
+        noisy_x = (x.to(dtype=torch.float32)-self.c_out(sigma)*noise)/self.c_skip(sigma)
+        return noisy_x.to(dtype=x.dtype), noise.to(dtype=x.dtype)
+    def add_noise_rand_t(self, x):
+        bs = x.shape[0]
+        # timesteps: [0, 1]
+        sigma, timesteps = self.sigma_scheduler.sample_sigma(shape=(bs,))
+        sigma = sigma.view(-1, 1, 1, 1).to(x.device)
+        timesteps = timesteps.to(x.device)
+        noisy_x, noise = self.add_noise(x, sigma)
+        # Sample a random timestep for each image
+        timesteps = timesteps*(self.num_timesteps-1)
+        return noisy_x, noise, sigma, timesteps
+    def denoise(self, x, sigma, eps=None, generator=None):
+        raise NotImplementedError
+    def eps_to_x0(self, eps, x_t, sigma):
+        return self.c_skip(sigma)*x_t+self.c_out(sigma)*eps
+    def velocity_to_eps(self, v_pred, x_t, sigma):
+        alpha = 1/(sigma**2+1)
+        sqrt_alpha = alpha.sqrt()
+        one_sqrt_alpha = (1-alpha).sqrt()
+        return sqrt_alpha*v_pred + one_sqrt_alpha*(x_t*sqrt_alpha)
+    def eps_to_velocity(self, eps, x_t, sigma):
+        alpha = 1/(sigma**2+1)
+        sqrt_alpha = alpha.sqrt()
+        one_sqrt_alpha = (1-alpha).sqrt()
+        return eps/sqrt_alpha - one_sqrt_alpha*x_t
+    def velocity_to_x0(self, v_pred, x_t, sigma):
+        alpha = 1/(sigma**2+1)
+        one_sqrt_alpha = (1-alpha).sqrt()
+        return alpha*x_t - one_sqrt_alpha*v_pred

hcpdiff 0.9.1__py3-none-any.whl → 2.2__py3-none-any.whl

hcpdiff 0.9.1py3-none-any.whl → 2.2py3-none-any.whl