hcpdiff 2.3.1__py3-none-any.whl → 2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hcpdiff/ckpt_manager/__init__.py +1 -1
- hcpdiff/ckpt_manager/format/__init__.py +2 -2
- hcpdiff/ckpt_manager/format/diffusers.py +19 -4
- hcpdiff/ckpt_manager/format/emb.py +8 -3
- hcpdiff/ckpt_manager/format/lora_webui.py +1 -1
- hcpdiff/ckpt_manager/format/sd_single.py +28 -5
- hcpdiff/data/cache/vae.py +10 -2
- hcpdiff/data/handler/text.py +15 -14
- hcpdiff/diffusion/sampler/__init__.py +2 -1
- hcpdiff/diffusion/sampler/base.py +17 -6
- hcpdiff/diffusion/sampler/diffusers.py +4 -3
- hcpdiff/diffusion/sampler/sigma_scheduler/base.py +5 -14
- hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +7 -6
- hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +4 -4
- hcpdiff/diffusion/sampler/sigma_scheduler/flow.py +3 -3
- hcpdiff/diffusion/sampler/timer/__init__.py +2 -0
- hcpdiff/diffusion/sampler/timer/base.py +26 -0
- hcpdiff/diffusion/sampler/timer/shift.py +49 -0
- hcpdiff/easy/__init__.py +2 -1
- hcpdiff/easy/cfg/sd15_train.py +1 -3
- hcpdiff/easy/model/__init__.py +1 -1
- hcpdiff/easy/model/loader.py +33 -11
- hcpdiff/easy/sampler.py +8 -1
- hcpdiff/loss/__init__.py +4 -3
- hcpdiff/loss/charbonnier.py +17 -0
- hcpdiff/loss/vlb.py +2 -2
- hcpdiff/loss/weighting.py +29 -11
- hcpdiff/models/__init__.py +1 -1
- hcpdiff/models/cfg_context.py +5 -3
- hcpdiff/models/compose/__init__.py +2 -1
- hcpdiff/models/compose/compose_hook.py +69 -67
- hcpdiff/models/compose/compose_textencoder.py +59 -45
- hcpdiff/models/compose/compose_tokenizer.py +48 -11
- hcpdiff/models/compose/flux.py +75 -0
- hcpdiff/models/compose/sdxl.py +86 -0
- hcpdiff/models/text_emb_ex.py +13 -9
- hcpdiff/models/textencoder_ex.py +8 -38
- hcpdiff/models/wrapper/__init__.py +2 -1
- hcpdiff/models/wrapper/flux.py +75 -0
- hcpdiff/models/wrapper/pixart.py +13 -1
- hcpdiff/models/wrapper/sd.py +17 -8
- hcpdiff/parser/embpt.py +7 -7
- hcpdiff/utils/net_utils.py +22 -12
- hcpdiff/workflow/__init__.py +1 -1
- hcpdiff/workflow/diffusion.py +145 -18
- hcpdiff/workflow/text.py +49 -18
- hcpdiff/workflow/vae.py +10 -2
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/METADATA +1 -1
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/RECORD +53 -49
- hcpdiff/models/compose/sdxl_composer.py +0 -39
- hcpdiff/utils/inpaint_pipe.py +0 -790
- hcpdiff/utils/pipe_hook.py +0 -656
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/WHEEL +0 -0
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/entry_points.txt +0 -0
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/licenses/LICENSE +0 -0
- {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/top_level.txt +0 -0
hcpdiff/workflow/diffusion.py
CHANGED
@@ -8,6 +8,9 @@ from hcpdiff.utils import prepare_seed
|
|
8
8
|
from hcpdiff.utils.net_utils import get_dtype, to_cuda
|
9
9
|
from rainbowneko.infer import BasicAction, Actions
|
10
10
|
from torch.cuda.amp import autocast
|
11
|
+
from einops import rearrange, repeat
|
12
|
+
from hcpdiff.models.compose import SDXLTextEncoder
|
13
|
+
from diffusers import FluxTransformer2DModel, PixArtTransformer2DModel
|
11
14
|
|
12
15
|
try:
|
13
16
|
from diffusers.utils import randn_tensor
|
@@ -91,14 +94,15 @@ class MakeTimestepsAction(BasicAction):
|
|
91
94
|
return {'timesteps':timesteps}
|
92
95
|
|
93
96
|
class MakeLatentAction(BasicAction):
|
94
|
-
def __init__(self, N_ch=4, height=None, width=None, key_map_in=None, key_map_out=None):
|
97
|
+
def __init__(self, N_ch=4, height=None, width=None, patch_size=1, key_map_in=None, key_map_out=None):
|
95
98
|
super().__init__(key_map_in, key_map_out)
|
96
99
|
self.N_ch = N_ch
|
97
100
|
self.height = height
|
98
101
|
self.width = width
|
102
|
+
self.patch_size = patch_size
|
99
103
|
|
100
104
|
def forward(self, noise_sampler:BaseSampler, vae, generator, device, dtype, bs=None, latents=None, start_timestep=None,
|
101
|
-
|
105
|
+
pooler_output=None, crop_coord=None, **states):
|
102
106
|
if bs is None:
|
103
107
|
if 'prompt' in states:
|
104
108
|
bs = len(states['prompt'])
|
@@ -121,34 +125,38 @@ class MakeLatentAction(BasicAction):
|
|
121
125
|
# scale the initial noise by the standard deviation required by the noise_sampler
|
122
126
|
noise_sampler.generator = generator
|
123
127
|
latents = noise_sampler.init_noise(shape, device=device, dtype=get_dtype(dtype))
|
128
|
+
if self.patch_size>1:
|
129
|
+
latents = rearrange(latents, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=self.patch_size, pw=self.patch_size)
|
124
130
|
else:
|
125
131
|
# image to image
|
126
132
|
latents = latents.to(device)
|
133
|
+
if self.patch_size>1:
|
134
|
+
latents = rearrange(latents, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=self.patch_size, pw=self.patch_size)
|
127
135
|
latents, noise = noise_sampler.add_noise(latents, start_timestep)
|
128
136
|
|
129
|
-
output = {'latents':latents}
|
137
|
+
output = {'latents':latents, 'latent_w':shape[3], 'latent_h':shape[2], 'patch_size':self.patch_size}
|
130
138
|
|
131
139
|
# SDXL inputs
|
132
|
-
if
|
140
|
+
if pooler_output is not None:
|
133
141
|
width, height = shape[3]*vae_scale_factor, shape[2]*vae_scale_factor
|
134
142
|
if crop_coord is None:
|
135
143
|
crop_info = torch.tensor([height, width, 0, 0, height, width], dtype=torch.float)
|
136
144
|
else:
|
137
145
|
crop_info = torch.tensor([height, width, *crop_coord], dtype=torch.float)
|
138
146
|
crop_info = crop_info.to(device).repeat(bs, 1)
|
139
|
-
output['
|
147
|
+
output['pooler_output'] = pooler_output.to(device)
|
140
148
|
|
141
149
|
if 'negative_prompt' in states:
|
142
150
|
output['crop_info'] = torch.cat([crop_info, crop_info], dim=0)
|
143
151
|
|
144
152
|
return output
|
145
153
|
|
146
|
-
class
|
154
|
+
class SD15DenoiseAction(BasicAction):
|
147
155
|
def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
|
148
156
|
super().__init__(key_map_in, key_map_out)
|
149
157
|
self.guidance_scale = guidance_scale
|
150
158
|
|
151
|
-
def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds,
|
159
|
+
def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, encoder_attention_mask=None,
|
152
160
|
cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
|
153
161
|
|
154
162
|
if model_offload:
|
@@ -159,19 +167,123 @@ class DenoiseAction(BasicAction):
|
|
159
167
|
latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
|
160
168
|
t_in = noise_sampler.sigma_scheduler.c_noise(t)
|
161
169
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
+
noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
|
171
|
+
cross_attention_kwargs=cross_attention_kwargs, ).sample
|
172
|
+
# perform guidance
|
173
|
+
if self.guidance_scale>1:
|
174
|
+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
175
|
+
noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
|
176
|
+
|
177
|
+
return {'noise_pred':noise_pred}
|
178
|
+
|
179
|
+
class SDXLDenoiseAction(BasicAction):
|
180
|
+
def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
|
181
|
+
super().__init__(key_map_in, key_map_out)
|
182
|
+
self.guidance_scale = guidance_scale
|
183
|
+
|
184
|
+
def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, pooler_output=None, encoder_attention_mask=None,
|
185
|
+
crop_info=None, cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
|
186
|
+
|
187
|
+
if model_offload:
|
188
|
+
to_cuda(denoiser) # to_cpu in VAE
|
189
|
+
|
190
|
+
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
191
|
+
latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
|
192
|
+
latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
|
193
|
+
t_in = noise_sampler.sigma_scheduler.c_noise(t)
|
194
|
+
|
195
|
+
added_cond_kwargs = {"text_embeds":pooler_output, "time_ids":crop_info}
|
196
|
+
# predict the noise residual
|
197
|
+
noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
|
198
|
+
cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
|
199
|
+
|
200
|
+
# perform guidance
|
201
|
+
if self.guidance_scale>1:
|
202
|
+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
203
|
+
noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
|
204
|
+
|
205
|
+
return {'noise_pred':noise_pred}
|
206
|
+
|
207
|
+
class PixartDenoiseAction(BasicAction):
|
208
|
+
def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
|
209
|
+
super().__init__(key_map_in, key_map_out)
|
210
|
+
self.guidance_scale = guidance_scale
|
211
|
+
|
212
|
+
def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, encoder_attention_mask=None,
|
213
|
+
cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
|
214
|
+
|
215
|
+
if model_offload:
|
216
|
+
to_cuda(denoiser) # to_cpu in VAE
|
217
|
+
|
218
|
+
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
219
|
+
latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
|
220
|
+
latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
|
221
|
+
t_in = noise_sampler.sigma_scheduler.c_noise(t)
|
170
222
|
|
223
|
+
if t_in.dim() == 0:
|
224
|
+
t_in = t_in.unsqueeze(0).expand(latent_model_input.shape[0])
|
225
|
+
|
226
|
+
noise_pred = denoiser(latent_model_input, prompt_embeds, t_in, encoder_attention_mask=encoder_attention_mask,
|
227
|
+
cross_attention_kwargs=cross_attention_kwargs, ).sample
|
171
228
|
# perform guidance
|
172
229
|
if self.guidance_scale>1:
|
173
230
|
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
174
231
|
noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
|
232
|
+
|
233
|
+
# remove vars from DiT
|
234
|
+
noise_pred, _ = noise_pred.chunk(2, dim=1)
|
235
|
+
|
236
|
+
return {'noise_pred':noise_pred}
|
237
|
+
|
238
|
+
class FluxDenoiseAction(BasicAction):
|
239
|
+
def __init__(self, guidance_scale: float = 7.0, true_cfg=False, key_map_in=None, key_map_out=None):
|
240
|
+
super().__init__(key_map_in, key_map_out)
|
241
|
+
self.guidance_scale = guidance_scale
|
242
|
+
self.true_cfg = true_cfg
|
243
|
+
|
244
|
+
def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, pooler_output=None, encoder_attention_mask=None,
|
245
|
+
latent_w=None, latent_h=None, dtype='fp32', amp=None, model_offload=False, **states):
|
246
|
+
|
247
|
+
if model_offload:
|
248
|
+
to_cuda(denoiser) # to_cpu in VAE
|
249
|
+
|
250
|
+
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
251
|
+
if self.true_cfg:
|
252
|
+
latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
|
253
|
+
latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
|
254
|
+
t_in = noise_sampler.sigma_scheduler.c_noise(t)
|
255
|
+
latent_model_input = rearrange(latent_model_input, "b c h w -> b (h w) c")
|
256
|
+
|
257
|
+
img_ids = torch.zeros(latent_h, latent_w, 3)
|
258
|
+
img_ids[..., 1] = img_ids[..., 1]+torch.arange(latent_h)[:, None]
|
259
|
+
img_ids[..., 2] = img_ids[..., 2]+torch.arange(latent_w)[None, :]
|
260
|
+
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=latent_model_input.shape[0])
|
261
|
+
|
262
|
+
txt_ids = torch.zeros(prompt_embeds.shape[0], prompt_embeds.shape[1], 3)
|
263
|
+
|
264
|
+
# predict the noise residual
|
265
|
+
noise_pred = denoiser(latent_model_input, t_in, 1.0, pooler_output, prompt_embeds, txt_ids, img_ids).sample
|
266
|
+
|
267
|
+
# perform guidance
|
268
|
+
if self.guidance_scale>1:
|
269
|
+
noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
|
270
|
+
noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
|
271
|
+
else:
|
272
|
+
latent_model_input = latents
|
273
|
+
latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
|
274
|
+
t_in = noise_sampler.sigma_scheduler.c_noise(t)
|
275
|
+
latent_model_input = rearrange(latent_model_input, "b c h w -> b (h w) c")
|
276
|
+
|
277
|
+
img_ids = torch.zeros(latent_h, latent_w, 3)
|
278
|
+
img_ids[..., 1] = img_ids[..., 1]+torch.arange(latent_h)[:, None]
|
279
|
+
img_ids[..., 2] = img_ids[..., 2]+torch.arange(latent_w)[None, :]
|
280
|
+
img_ids = repeat(img_ids, "h w c -> b (h w) c", b=latent_model_input.shape[0])
|
281
|
+
|
282
|
+
txt_ids = torch.zeros(latent_model_input.shape[0], prompt_embeds.shape[1], 3)
|
283
|
+
|
284
|
+
# predict the noise residual
|
285
|
+
noise_pred = denoiser(latent_model_input, t_in, self.guidance_scale, pooler_output, prompt_embeds, txt_ids, img_ids).sample
|
286
|
+
noise_pred = rearrange(noise_pred, "b (h w) c -> b c h w", h=latent_h, w=latent_w)
|
175
287
|
|
176
288
|
return {'noise_pred':noise_pred}
|
177
289
|
|
@@ -182,12 +294,27 @@ class SampleAction(BasicAction):
|
|
182
294
|
return {'latents':latents}
|
183
295
|
|
184
296
|
class DiffusionStepAction(BasicAction):
|
185
|
-
def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
|
297
|
+
def __init__(self, guidance_scale: float = 7.0, denoise_action:str|BasicAction='auto', true_cfg=False, key_map_in=None, key_map_out=None):
|
186
298
|
super().__init__(key_map_in, key_map_out)
|
187
|
-
|
299
|
+
if callable(denoise_action):
|
300
|
+
self.act_noise_pred = denoise_action(guidance_scale)
|
301
|
+
else:
|
302
|
+
self.act_noise_pred = None
|
303
|
+
self.true_cfg = true_cfg
|
304
|
+
self.guidance_scale = guidance_scale
|
188
305
|
self.act_sample = SampleAction()
|
189
306
|
|
190
|
-
def forward(self, denoiser, noise_sampler, **states):
|
307
|
+
def forward(self, denoiser, noise_sampler, TE, **states):
|
308
|
+
if self.act_noise_pred is None:
|
309
|
+
if isinstance(denoiser, FluxTransformer2DModel):
|
310
|
+
self.act_noise_pred = FluxDenoiseAction(guidance_scale=self.guidance_scale, true_cfg=self.true_cfg)
|
311
|
+
elif isinstance(TE, SDXLTextEncoder):
|
312
|
+
self.act_noise_pred = SDXLDenoiseAction(guidance_scale=self.guidance_scale)
|
313
|
+
elif isinstance(denoiser, PixArtTransformer2DModel):
|
314
|
+
self.act_noise_pred = PixartDenoiseAction(guidance_scale=self.guidance_scale)
|
315
|
+
else:
|
316
|
+
self.act_noise_pred = SD15DenoiseAction(guidance_scale=self.guidance_scale)
|
317
|
+
|
191
318
|
states = self.act_noise_pred(denoiser=denoiser, noise_sampler=noise_sampler, **states)
|
192
319
|
states = self.act_sample(**states)
|
193
320
|
return states
|
hcpdiff/workflow/text.py
CHANGED
@@ -2,7 +2,7 @@ from typing import List, Union
|
|
2
2
|
|
3
3
|
import torch
|
4
4
|
from hcpdiff.models import TokenizerHook
|
5
|
-
from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook
|
5
|
+
from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook, ComposeTokenizer
|
6
6
|
from hcpdiff.utils import pad_attn_bias
|
7
7
|
from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
|
8
8
|
from rainbowneko.infer import BasicAction
|
@@ -42,13 +42,30 @@ class TextEncodeAction(BasicAction):
|
|
42
42
|
super().__init__(key_map_in, key_map_out)
|
43
43
|
if isinstance(prompt, str) and bs is not None:
|
44
44
|
prompt = [prompt]*bs
|
45
|
-
negative_prompt
|
45
|
+
if negative_prompt is not None:
|
46
|
+
negative_prompt = [negative_prompt]*bs
|
46
47
|
|
47
48
|
self.prompt = prompt
|
48
49
|
self.negative_prompt = negative_prompt
|
49
50
|
self.bs = bs
|
50
51
|
|
51
|
-
def
|
52
|
+
def encode_prompt_to_emb(self, tokenizer, TE, te_hook, prompt, device):
|
53
|
+
token_info = ComposeTokenizer.tokenize_ex(tokenizer, prompt, truncation=True, padding="max_length",
|
54
|
+
return_tensors="pt", device=device)
|
55
|
+
if te_hook.use_attention_mask:
|
56
|
+
attention_mask = token_info.get('attention_mask', None)
|
57
|
+
else:
|
58
|
+
attention_mask = None
|
59
|
+
token_info['attention_mask'] = attention_mask
|
60
|
+
input_ids = token_info.pop('input_ids') # for TEEXHook
|
61
|
+
prompt_embeds, pooler_output = TE(
|
62
|
+
input_ids,
|
63
|
+
**token_info,
|
64
|
+
output_hidden_states=True,
|
65
|
+
)
|
66
|
+
return prompt_embeds, pooler_output, attention_mask
|
67
|
+
|
68
|
+
def forward(self, te_hook, tokenizer, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
|
52
69
|
prompt = prompt or self.prompt
|
53
70
|
negative_prompt = negative_prompt or self.negative_prompt
|
54
71
|
|
@@ -56,7 +73,10 @@ class TextEncodeAction(BasicAction):
|
|
56
73
|
to_cuda(TE)
|
57
74
|
|
58
75
|
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
59
|
-
|
76
|
+
if negative_prompt is None:
|
77
|
+
emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, prompt, device)
|
78
|
+
else:
|
79
|
+
emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, negative_prompt+prompt, device)
|
60
80
|
if attention_mask is not None:
|
61
81
|
emb, attention_mask = pad_attn_bias(emb, attention_mask)
|
62
82
|
|
@@ -64,12 +84,12 @@ class TextEncodeAction(BasicAction):
|
|
64
84
|
to_cpu(TE)
|
65
85
|
|
66
86
|
if not isinstance(te_hook, ComposeTEEXHook):
|
67
|
-
|
87
|
+
pooler_output = None
|
68
88
|
return {'prompt':prompt, 'negative_prompt':negative_prompt, 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask,
|
69
|
-
'
|
89
|
+
'pooler_output':pooler_output}
|
70
90
|
|
71
91
|
class AttnMultTextEncodeAction(TextEncodeAction):
|
72
|
-
def forward(self, te_hook, token_ex, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
|
92
|
+
def forward(self, te_hook, tokenizer, token_ex, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
|
73
93
|
prompt = prompt or self.prompt
|
74
94
|
negative_prompt = negative_prompt or self.negative_prompt
|
75
95
|
|
@@ -81,18 +101,29 @@ class AttnMultTextEncodeAction(TextEncodeAction):
|
|
81
101
|
if model_offload:
|
82
102
|
to_cuda(TE)
|
83
103
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
104
|
+
if negative_prompt is None:
|
105
|
+
mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
|
106
|
+
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
107
|
+
emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, clean_text_p, device)
|
108
|
+
if attention_mask is not None:
|
109
|
+
emb, attention_mask = pad_attn_bias(emb, attention_mask)
|
110
|
+
emb = te_hook.mult_attn(emb, mult_p)
|
111
|
+
else:
|
112
|
+
mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
|
113
|
+
mult_n, clean_text_n = token_ex.parse_attn_mult(negative_prompt)
|
114
|
+
with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
|
115
|
+
emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, clean_text_n+clean_text_p, device)
|
116
|
+
if attention_mask is not None:
|
117
|
+
emb, attention_mask = pad_attn_bias(emb, attention_mask)
|
118
|
+
emb_n, emb_p = emb.chunk(2)
|
119
|
+
emb_p = te_hook.mult_attn(emb_p, mult_p)
|
120
|
+
emb_n = te_hook.mult_attn(emb_n, mult_n)
|
93
121
|
|
94
122
|
if model_offload:
|
95
123
|
to_cpu(TE)
|
96
124
|
|
97
|
-
|
98
|
-
'encoder_attention_mask':attention_mask, '
|
125
|
+
if negative_prompt is None:
|
126
|
+
return {'prompt':list(clean_text_p), 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask, 'pooler_output':pooler_output}
|
127
|
+
else:
|
128
|
+
return {'prompt':list(clean_text_p), 'negative_prompt':list(clean_text_n), 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
|
129
|
+
'encoder_attention_mask':attention_mask, 'pooler_output':pooler_output}
|
hcpdiff/workflow/vae.py
CHANGED
@@ -41,7 +41,11 @@ class EncodeAction(BasicAction):
|
|
41
41
|
else:
|
42
42
|
init_latents = vae.encode(image).latent_dist.sample(generator)
|
43
43
|
|
44
|
-
init_latents =
|
44
|
+
init_latents = init_latents.to(dtype=get_dtype(dtype))
|
45
|
+
if shift_factor := getattr(vae.config, 'shift_factor', None) is not None:
|
46
|
+
init_latents = (init_latents-shift_factor)*vae.config.scaling_factor
|
47
|
+
else:
|
48
|
+
init_latents = init_latents*vae.config.scaling_factor
|
45
49
|
if model_offload:
|
46
50
|
to_cpu(vae)
|
47
51
|
return {'latents':init_latents}
|
@@ -63,7 +67,11 @@ class DecodeAction(BasicAction):
|
|
63
67
|
torch.cuda.synchronize()
|
64
68
|
to_cuda(vae)
|
65
69
|
latents = latents.to(dtype=vae.dtype)
|
66
|
-
|
70
|
+
if shift_factor := getattr(vae.config, 'shift_factor', None) is not None:
|
71
|
+
latents = latents/vae.config.scaling_factor + shift_factor
|
72
|
+
else:
|
73
|
+
latents = latents/vae.config.scaling_factor
|
74
|
+
image = vae.decode(latents, return_dict=False)[0]
|
67
75
|
if model_offload:
|
68
76
|
to_cpu(vae)
|
69
77
|
|
@@ -3,22 +3,22 @@ hcpdiff/train_colo.py,sha256=EsuNSzLBvGTZWU_LEk0JpP-F5eNW0lwkawIRAX38jmE,9250
|
|
3
3
|
hcpdiff/trainer_ac.py,sha256=-owV-3_bvPxuQsZS2WaajBDh58HpftRtnx0GJkswqaY,2787
|
4
4
|
hcpdiff/trainer_ac_single.py,sha256=zyZVrutLUbIJYW1HzUnQ_RnmIcDhbC7M_CT833PJH5w,993
|
5
5
|
hcpdiff/trainer_deepspeed.py,sha256=7lGsiAstWuIlmhRMwWTcJCkoxzUaakVxBngKDnJdSJk,1947
|
6
|
-
hcpdiff/ckpt_manager/__init__.py,sha256=
|
6
|
+
hcpdiff/ckpt_manager/__init__.py,sha256=OCotG2Q4q3n4ZpN6UfY-j-Lg4VOAvtLnb0xygkkqZm8,280
|
7
7
|
hcpdiff/ckpt_manager/ckpt.py,sha256=2A093lT03M1ZsJIMWl376V165eh0TZwOgiGrz3LM73Q,1248
|
8
8
|
hcpdiff/ckpt_manager/loader.py,sha256=6iZDUj-Vfc5T9eGdWfFMQw4n1GqyLqaLBolgAtgqPq8,3640
|
9
|
-
hcpdiff/ckpt_manager/format/__init__.py,sha256=
|
10
|
-
hcpdiff/ckpt_manager/format/diffusers.py,sha256=
|
11
|
-
hcpdiff/ckpt_manager/format/emb.py,sha256=
|
12
|
-
hcpdiff/ckpt_manager/format/lora_webui.py,sha256=
|
13
|
-
hcpdiff/ckpt_manager/format/sd_single.py,sha256=
|
9
|
+
hcpdiff/ckpt_manager/format/__init__.py,sha256=9d9NOY1XRDGlebOiQpOnpQw14uPHZyL3-6Cvkc8hYZY,277
|
10
|
+
hcpdiff/ckpt_manager/format/diffusers.py,sha256=VKXI2i4whrIaHMOFMzA8UqB5ytXOv6WMgEyU9EkDW5Y,5008
|
11
|
+
hcpdiff/ckpt_manager/format/emb.py,sha256=jC-PR47y-TLn4xMkrujlXluGiW-tcUIhQjKZ_G33gUc,899
|
12
|
+
hcpdiff/ckpt_manager/format/lora_webui.py,sha256=9pBesgzifInW8YnzdzmEbyrClyHt-zmkzla7kK4YrBg,10015
|
13
|
+
hcpdiff/ckpt_manager/format/sd_single.py,sha256=gqN0NqrsxaYMAspdl48KEWtzGz_yxbxxfC6LJapbHfs,3598
|
14
14
|
hcpdiff/data/__init__.py,sha256=ZFKtanOoMo3G3eKUJPhysnHXnr8BNARERkcMB6B897U,292
|
15
15
|
hcpdiff/data/dataset.py,sha256=1k4GldW13eVyqK_9hrQniqr3_XYAapnWF7iXl_1GXGg,877
|
16
16
|
hcpdiff/data/cache/__init__.py,sha256=ToCmokYH6DghlSwm7HJFirPRIWJ0LkgzqVOYlgoAkQw,25
|
17
|
-
hcpdiff/data/cache/vae.py,sha256=
|
17
|
+
hcpdiff/data/cache/vae.py,sha256=_Vkx4AXB17hgs5Rgy9NbytMbcNu0pHMFH5mreeNjKxk,4572
|
18
18
|
hcpdiff/data/handler/__init__.py,sha256=G8ZTQF91ilkTRmUoWdmAissTSZ7fvNUpm_hBYmXKTtk,258
|
19
19
|
hcpdiff/data/handler/controlnet.py,sha256=bRDMD9BP8-VaG5VrxzvcFKfkqeTbChNfrJSZ3vXbQgY,658
|
20
20
|
hcpdiff/data/handler/diffusion.py,sha256=S-_7o5Z1tm6LmRZVZs21rbJC7iUoq0tHOsSjKK6geVk,4156
|
21
|
-
hcpdiff/data/handler/text.py,sha256=
|
21
|
+
hcpdiff/data/handler/text.py,sha256=I_dqFf1gKwRsNdBCv8dvAR16Do8HwOgwVLt4iH8n0bo,4331
|
22
22
|
hcpdiff/data/source/__init__.py,sha256=265M8qfWNUE4SKX0pdXhLYjCnCuae5YE4bfZpO-ydXc,187
|
23
23
|
hcpdiff/data/source/folder_class.py,sha256=bs4qPMTzwcnT6ZFlT3tpi9sclsRF9a2MBA1pQD-9EYs,961
|
24
24
|
hcpdiff/data/source/text.py,sha256=VgI5Ouq986Yy1jwD2fZ9iBlsRciPCeARZmOPEZIcaQY,1468
|
@@ -28,57 +28,63 @@ hcpdiff/diffusion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
28
28
|
hcpdiff/diffusion/noise/__init__.py,sha256=D83EZ6bnc6Ucu4AZwE6rpmXCtwYfHHumeVq97brbnIE,47
|
29
29
|
hcpdiff/diffusion/noise/pyramid_noise.py,sha256=KbpyMT1BHNIaAa7g5eECDkTttOMoMWVFmbP-ekBsuEY,1693
|
30
30
|
hcpdiff/diffusion/sampler/VP.py,sha256=r0Q_RROEIeNNw93XrOD5htW78rfuoSxy1WBQEoQL83s,958
|
31
|
-
hcpdiff/diffusion/sampler/__init__.py,sha256=
|
32
|
-
hcpdiff/diffusion/sampler/base.py,sha256=
|
33
|
-
hcpdiff/diffusion/sampler/diffusers.py,sha256
|
31
|
+
hcpdiff/diffusion/sampler/__init__.py,sha256=8UmSOxcFzdYnO9hkMzWx4V09ysx5RKDnf2sVYlZEM9w,156
|
32
|
+
hcpdiff/diffusion/sampler/base.py,sha256=HhscR81Qq_m4yfnhGEDPkvxb6tJozlbT8YuTxku0kZQ,5891
|
33
|
+
hcpdiff/diffusion/sampler/diffusers.py,sha256=-A2vbKM-CjbE4CS77jroeNhaSm8LqNsWdvYUPoDZzTM,2640
|
34
34
|
hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py,sha256=eiSmMBkXI_LfxnNrXj5XptcF0dGcPas--vWvqhFGlv8,273
|
35
|
-
hcpdiff/diffusion/sampler/sigma_scheduler/base.py,sha256=
|
36
|
-
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py,sha256=
|
37
|
-
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py,sha256=
|
38
|
-
hcpdiff/diffusion/sampler/sigma_scheduler/flow.py,sha256=
|
35
|
+
hcpdiff/diffusion/sampler/sigma_scheduler/base.py,sha256=_cAfak2M3CMdH4EK6AYKPbKA3ccRTDyNJw9NjfXBEOM,2210
|
36
|
+
hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py,sha256=PBh2LFkRF5mrmedHvEHh8vATnIMU2gIoi3IlUEheaLY,13045
|
37
|
+
hcpdiff/diffusion/sampler/sigma_scheduler/edm.py,sha256=cQuNj8jk4r3TDEN86i_tLYHypO3QFUPpVZMaAGVr3dA,4131
|
38
|
+
hcpdiff/diffusion/sampler/sigma_scheduler/flow.py,sha256=1edTaTiM02ogV1lAbnI4e2gc3QUaNgl8RWwYQm6vf2s,2023
|
39
39
|
hcpdiff/diffusion/sampler/sigma_scheduler/zero_terminal.py,sha256=CCqQLkGo4omkxzFovYdZQzdZVwIxK3PiOitZFww8MHs,859
|
40
|
-
hcpdiff/
|
41
|
-
hcpdiff/
|
40
|
+
hcpdiff/diffusion/sampler/timer/__init__.py,sha256=QNmmF6dwMWQM16mRYXDq0FY7h5RiQ_tnM76PMnZnH4E,108
|
41
|
+
hcpdiff/diffusion/sampler/timer/base.py,sha256=CgJ4aqG9jzHrlRzqQRJUOq95Cait_mXYICCIHA_OO64,872
|
42
|
+
hcpdiff/diffusion/sampler/timer/shift.py,sha256=v2nHj6Z5bfzzVH76ypfEgK1EDDf0Lp9lfJSRCadSuwU,1735
|
43
|
+
hcpdiff/easy/__init__.py,sha256=JT-dbN4e3iG3zHT2p9_TaesTPDwvDuj9PNFaPEhjxjU,208
|
44
|
+
hcpdiff/easy/sampler.py,sha256=5O01VRz-bYJfzBIqRsD3vxE3AbVwh3zKzjjFXzRX9-E,1438
|
42
45
|
hcpdiff/easy/cfg/__init__.py,sha256=SxHMWG6T2CXhX3dP0xizSMd9vFWPaZQDc4Gj4CF__yQ,253
|
43
|
-
hcpdiff/easy/cfg/sd15_train.py,sha256=
|
46
|
+
hcpdiff/easy/cfg/sd15_train.py,sha256=L9bNWM87T1DIZpWetwK0gwPIWL39JoNOovDCxb7cDiw,6967
|
44
47
|
hcpdiff/easy/cfg/sdxl_train.py,sha256=rVLLKVMKB_PHuum3dKQcBqL0uR8QhzmdRllM-pYnbK4,4534
|
45
48
|
hcpdiff/easy/cfg/t2i.py,sha256=SnjFjZAKd9orjJr3RW5_N2_EIlW2Ree7JMvdNUAR9gc,9507
|
46
|
-
hcpdiff/easy/model/__init__.py,sha256=
|
49
|
+
hcpdiff/easy/model/__init__.py,sha256=UukTqyidx-W2n2eiG4mUQBa0Sziv7gYmuBPx1twvE90,170
|
47
50
|
hcpdiff/easy/model/cnet.py,sha256=m0NTH9V1kLzb5GybwBrSNT0KvTcRpPfGkzUeMz9jZZQ,1084
|
48
|
-
hcpdiff/easy/model/loader.py,sha256=
|
51
|
+
hcpdiff/easy/model/loader.py,sha256=nm06hPqtQuIwpa6GnygG1CBgmywhmB5mcACRcDPa51U,4355
|
49
52
|
hcpdiff/evaluate/__init__.py,sha256=qWxV0D8Ho5uBj2YbaC_QFDnT49PSKPfh44m4ivkNbMM,108
|
50
53
|
hcpdiff/evaluate/evaluator.py,sha256=9BZQBeC-N7p-ICx6Giw9v-2Tb9volMTDmeDfhj0nXJ0,2940
|
51
54
|
hcpdiff/evaluate/previewer.py,sha256=-vE0YXVfos70CQMo9ZInw7xu3d88DlTfVLs4BzzkxfM,3140
|
52
55
|
hcpdiff/evaluate/metrics/__init__.py,sha256=vE0nSvBtDBu9SomANvWcm2UHX56PhCYwhgrcmm_mKyo,39
|
53
56
|
hcpdiff/evaluate/metrics/clip_score.py,sha256=rQgweu5QcqW3fPI3EXcNbrH2QCcSAekE3lpYk45P2M4,900
|
54
|
-
hcpdiff/loss/__init__.py,sha256=
|
57
|
+
hcpdiff/loss/__init__.py,sha256=wlWpg4a2ev7JXsv52MwJmWCduvq011IoSTmABJ_XM2M,230
|
55
58
|
hcpdiff/loss/base.py,sha256=Vvpm-KZGH4n-gYIlnVAtPl1B799c7v0dJXJ5BBh3yO0,1112
|
59
|
+
hcpdiff/loss/charbonnier.py,sha256=Qvjj9bznJEBn_NFPbPsVf9jzawOcfVvien54bMS2UGc,461
|
56
60
|
hcpdiff/loss/gw.py,sha256=0yi1kozuII3xZA6FnjOhINtvScWt1MyBZLBtMKmgojM,1224
|
57
61
|
hcpdiff/loss/ssim.py,sha256=YofadvBkc6sklxBUx1p3ADw5OHOZPK3kaHz8FH5a6m4,1281
|
58
|
-
hcpdiff/loss/vlb.py,sha256=
|
59
|
-
hcpdiff/loss/weighting.py,sha256=
|
60
|
-
hcpdiff/models/__init__.py,sha256=
|
61
|
-
hcpdiff/models/cfg_context.py,sha256=
|
62
|
+
hcpdiff/loss/vlb.py,sha256=NqkhzGM3g_67nmpg18I0W_KC8X6YvKSZyHfH9C14OCg,3256
|
63
|
+
hcpdiff/loss/weighting.py,sha256=qJvp895qwT6H6_x1IhxkHWMYPjqM5peiyciB634LREI,3668
|
64
|
+
hcpdiff/models/__init__.py,sha256=__LnS75jcEmJ0Y_b5N6zsDI9CMxByBNKNuPrcUXRRwo,485
|
65
|
+
hcpdiff/models/cfg_context.py,sha256=FePMdvzDfH4Xo6aYVqR2UTzU7t_MhAKhnSrGWlua1Ts,1553
|
62
66
|
hcpdiff/models/container.py,sha256=z3p5TmQhxdzXSIfofz55_bmEhSsgUJsy1o9EcDs8Oeo,696
|
63
67
|
hcpdiff/models/controlnet.py,sha256=VIkUzJCVpCqqQOtRSLQPfbcDy9CsXutxLeZB6PdZfA0,7809
|
64
68
|
hcpdiff/models/lora_base.py,sha256=LGwBD9KP6qf4pgTx24i5-JLo4rDBQ6jFfterQKBjTbE,6758
|
65
69
|
hcpdiff/models/lora_base_patch.py,sha256=Tdb_b3TN_K-04nlUvcfBh6flPcbL9M4iP7jOVyb1jXQ,7271
|
66
70
|
hcpdiff/models/lora_layers.py,sha256=O9W_Ue71lHj7Y_GbpioF4Hc3h2-z_zOqck93VYUra6s,7777
|
67
71
|
hcpdiff/models/lora_layers_patch.py,sha256=GYFYsJD2VSLZfdnLma9CmQEHz09HROFJcc4wc_gs9f0,8198
|
68
|
-
hcpdiff/models/text_emb_ex.py,sha256=
|
69
|
-
hcpdiff/models/textencoder_ex.py,sha256=
|
72
|
+
hcpdiff/models/text_emb_ex.py,sha256=HQAwXf-3CXH27ehOjCNRMk26Sp7hbj4rU8ewWP_GWa8,8116
|
73
|
+
hcpdiff/models/textencoder_ex.py,sha256=vQNFDmmAfXmF9cGbqMKYQhoYrrdwq39PVb0mubEkTLs,6926
|
70
74
|
hcpdiff/models/tokenizer_ex.py,sha256=zKUn4BY7b3yXwK9PWkZtQKJPyKYwUc07E-hwB9NQybs,2446
|
71
|
-
hcpdiff/models/compose/__init__.py,sha256=
|
72
|
-
hcpdiff/models/compose/compose_hook.py,sha256=
|
73
|
-
hcpdiff/models/compose/compose_textencoder.py,sha256=
|
74
|
-
hcpdiff/models/compose/compose_tokenizer.py,sha256=
|
75
|
-
hcpdiff/models/compose/
|
76
|
-
hcpdiff/models/
|
77
|
-
hcpdiff/models/wrapper/
|
78
|
-
hcpdiff/models/wrapper/
|
75
|
+
hcpdiff/models/compose/__init__.py,sha256=c2Zsk8ge6T5iOOl-8HRKtDoSpdQ3fS88mDiYZC3VpHE,257
|
76
|
+
hcpdiff/models/compose/compose_hook.py,sha256=rU6PIhyEVjnDQlw5NOB5QdydNlSLe60CnilUn_NpxH4,6352
|
77
|
+
hcpdiff/models/compose/compose_textencoder.py,sha256=7rPplxzs9xXxYqpHhTp-qX30nAXtgb6ZpClFLDVouvk,7409
|
78
|
+
hcpdiff/models/compose/compose_tokenizer.py,sha256=AogUrqwmztAX21oyVfW8j5fcPPiGDZMT1_9wPQcxBMc,4572
|
79
|
+
hcpdiff/models/compose/flux.py,sha256=Fcg-zzpsWbNdOB0VsJAoicMK62l2uCWugFoq_uIxtzY,3632
|
80
|
+
hcpdiff/models/compose/sdxl.py,sha256=cgR5BplUSPPud_nG0dH6LT3SoWu9ypLVXLMgzcnvyaI,4522
|
81
|
+
hcpdiff/models/wrapper/__init__.py,sha256=ZFQ4CqJvSA_saKmI8eKgqvT6pIKtG52gonE4ZzLOkok,165
|
82
|
+
hcpdiff/models/wrapper/flux.py,sha256=Zqm-Qnz-Jrtwd1h5sPfctVIp6cnSQViFvGnSw0UOn6E,4567
|
83
|
+
hcpdiff/models/wrapper/pixart.py,sha256=zsqA3soCdGyTxAO3u9NiQdqO8rUPsrtXiAh8E-ziMd4,1861
|
84
|
+
hcpdiff/models/wrapper/sd.py,sha256=O49ziLrHeGVaVEXGcgZ4zkRNSnARiLeZWmPOW8ZzIU0,12080
|
79
85
|
hcpdiff/models/wrapper/utils.py,sha256=NyebMoAPnrgcTHbiIocSD-eGdGdD-V1G_TQuWsRWufw,665
|
80
86
|
hcpdiff/parser/__init__.py,sha256=-2dDZ2Ii4zoGQqDTme94q4PpJbBiV6HS5BsDASz4Xbo,33
|
81
|
-
hcpdiff/parser/embpt.py,sha256
|
87
|
+
hcpdiff/parser/embpt.py,sha256=-AP2wn8FkxryzzMAOERAt9vdd8ZEWD1tNUYVPAghaEM,1419
|
82
88
|
hcpdiff/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
83
89
|
hcpdiff/tools/convert_caption_txt2json.py,sha256=tbBgIphJWvXUoXjtwsnLX2w9IZEY3jTgxbTvUMgukbM,945
|
84
90
|
hcpdiff/tools/convert_old_lora.py,sha256=yIP9RGcyQbwT2NNAZtTLgBXs6XJOHRvoHQep0SdqDho,453
|
@@ -94,26 +100,24 @@ hcpdiff/tools/save_model.py,sha256=gbfYi_EfEBZEUcDjle6MDHA19sQWY0zA8_y_LMzHQ7M,4
|
|
94
100
|
hcpdiff/tools/sd2diffusers.py,sha256=vB6OnBLw60sJkdpVZcYEPtKAZW1h8ErbSGSRq0uAiIk,16855
|
95
101
|
hcpdiff/utils/__init__.py,sha256=28K9Ui0uur-vHuUdlSyIBYijgu2b7rGOPXN2ogJu1z8,82
|
96
102
|
hcpdiff/utils/colo_utils.py,sha256=JyLUvVnISa48CnryNLrgVxMo-jxu2UhBq70eYPrkjuI,837
|
97
|
-
hcpdiff/utils/
|
98
|
-
hcpdiff/utils/net_utils.py,sha256=gdwLYDNKV2t3SP0jBIO3d0HtY6E7jRaf_rmPT8gKZZE,9762
|
99
|
-
hcpdiff/utils/pipe_hook.py,sha256=-UDX3FtZGl-bxSk13gdbPXc1OvtbCcpk_fvKxLQo3Ag,31987
|
103
|
+
hcpdiff/utils/net_utils.py,sha256=dL3Q9I8X7xAcssQomWAvapwmTXCffWQ80vcAVUCHOt4,10211
|
100
104
|
hcpdiff/utils/torch_utils.py,sha256=gBZCcDKZc0NGDQx6QeHuQePoZ82kQRhaL7oEdZIYGvU,573
|
101
105
|
hcpdiff/utils/utils.py,sha256=hZnZP1IETgVpScxES0yIuRfc34TnzvAqmgOTK_56ssw,4976
|
102
|
-
hcpdiff/workflow/__init__.py,sha256=
|
103
|
-
hcpdiff/workflow/diffusion.py,sha256=
|
106
|
+
hcpdiff/workflow/__init__.py,sha256=Ve_ZZZVKEplR5SDRq0yRlrT_DHSxRtpESnQSgCZ1qR0,948
|
107
|
+
hcpdiff/workflow/diffusion.py,sha256=oEnnc8tnITXXko2Fb6ahDg7FbuKa8lZr33JWecCznPM,16535
|
104
108
|
hcpdiff/workflow/fast.py,sha256=kZt7bKrvpFInSn7GzbkTkpoCSM0Z6IbDjgaDvcbFYf8,1024
|
105
109
|
hcpdiff/workflow/flow.py,sha256=FFbFFOAXT4c31L5bHBEB_qeVGuBQDLYhq8kTD1chGNo,2548
|
106
110
|
hcpdiff/workflow/io.py,sha256=4oiE_PS3sOVYT8M6PDwvT5h9XzoKDMQR0n_4-Ktttys,3284
|
107
111
|
hcpdiff/workflow/model.py,sha256=1gj5yOTefYTnGXVR6JPAfxIwuB69YwN6E-BontRcuyQ,2913
|
108
|
-
hcpdiff/workflow/text.py,sha256=
|
112
|
+
hcpdiff/workflow/text.py,sha256=vz8zAA6qcYOBfvak6kM2m2KaLjiaaSPgVWhrPQtko0I,6402
|
109
113
|
hcpdiff/workflow/utils.py,sha256=xojaMG4lHsymslc8df5uiVXmmBVWpn_Phqka8qzJEWw,2226
|
110
|
-
hcpdiff/workflow/vae.py,sha256=
|
114
|
+
hcpdiff/workflow/vae.py,sha256=LOvGFm6xt1abGOh8UezloXD1pMhYkv7S0vQUE4HojGo,3548
|
111
115
|
hcpdiff/workflow/daam/__init__.py,sha256=ySIDaxloN-D3qM7OuVaG1BR3D-CibDoXYpoTgw0zUhU,59
|
112
116
|
hcpdiff/workflow/daam/act.py,sha256=tHbsFWTYYU4bvcZOo1Bpi_z6ofpJatRYccl4vvf8wIA,2756
|
113
117
|
hcpdiff/workflow/daam/hook.py,sha256=z9f9mBjKW21xuUZ-iQxQ0HbWOBXtZrisFB0VNMq6d0U,4383
|
114
|
-
hcpdiff-2.
|
115
|
-
hcpdiff-2.
|
116
|
-
hcpdiff-2.
|
117
|
-
hcpdiff-2.
|
118
|
-
hcpdiff-2.
|
119
|
-
hcpdiff-2.
|
118
|
+
hcpdiff-2.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
119
|
+
hcpdiff-2.4.dist-info/METADATA,sha256=AM8nFWpSpgB0EWNhp40nh7a2G_qHS0We8sKj_ihct7w,10321
|
120
|
+
hcpdiff-2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
121
|
+
hcpdiff-2.4.dist-info/entry_points.txt,sha256=_4VRsEsEWOhHfzBDu9bx8Wh_S8Wi4ZTHpI0n6rU0J-I,258
|
122
|
+
hcpdiff-2.4.dist-info/top_level.txt,sha256=shyf78x-HVgykYpsmY22mKG0xIc7Qk30fDMdavdYWQ8,8
|
123
|
+
hcpdiff-2.4.dist-info/RECORD,,
|
@@ -1,39 +0,0 @@
|
|
1
|
-
from .compose_textencoder import ComposeTextEncoder
|
2
|
-
from .compose_tokenizer import ComposeTokenizer
|
3
|
-
from transformers import CLIPTextModel, AutoTokenizer, CLIPTextModelWithProjection
|
4
|
-
from typing import Optional, Union, Tuple
|
5
|
-
import torch
|
6
|
-
from transformers.modeling_outputs import BaseModelOutputWithPooling
|
7
|
-
|
8
|
-
class CLIPTextModelWithProjection_Align(CLIPTextModelWithProjection):
|
9
|
-
# fxxk the transformers!
|
10
|
-
def forward(
|
11
|
-
self,
|
12
|
-
input_ids: Optional[torch.Tensor] = None,
|
13
|
-
attention_mask: Optional[torch.Tensor] = None,
|
14
|
-
position_ids: Optional[torch.Tensor] = None,
|
15
|
-
output_attentions: Optional[bool] = None,
|
16
|
-
output_hidden_states: Optional[bool] = None,
|
17
|
-
return_dict: Optional[bool] = None,
|
18
|
-
) -> Union[Tuple, BaseModelOutputWithPooling]:
|
19
|
-
text_outputs = super().forward(input_ids, attention_mask, position_ids, output_attentions, output_hidden_states, return_dict)
|
20
|
-
return BaseModelOutputWithPooling(
|
21
|
-
last_hidden_state=text_outputs.last_hidden_state,
|
22
|
-
pooler_output=text_outputs.text_embeds,
|
23
|
-
hidden_states=text_outputs.hidden_states,
|
24
|
-
attentions=text_outputs.attentions,
|
25
|
-
)
|
26
|
-
|
27
|
-
class SDXLTextEncoder(ComposeTextEncoder):
|
28
|
-
@classmethod
|
29
|
-
def from_pretrained(cls, pretrained_model_name_or_path: str, *args, subfolder=None, revision:str=None, **kwargs):
|
30
|
-
clip_L = CLIPTextModel.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder', **kwargs)
|
31
|
-
clip_bigG = CLIPTextModelWithProjection_Align.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder_2', **kwargs)
|
32
|
-
return cls([('clip_L', clip_L), ('clip_bigG', clip_bigG)])
|
33
|
-
|
34
|
-
class SDXLTokenizer(ComposeTokenizer):
|
35
|
-
@classmethod
|
36
|
-
def from_pretrained(cls, pretrained_model_name_or_path: str, *args, subfolder=None, revision:str=None, **kwargs):
|
37
|
-
clip_L = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder='tokenizer', **kwargs)
|
38
|
-
clip_bigG = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder='tokenizer_2', **kwargs)
|
39
|
-
return cls([('clip_L', clip_L), ('clip_bigG', clip_bigG)])
|