hcpdiff 2.3.1__py3-none-any.whl → 2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. hcpdiff/ckpt_manager/__init__.py +1 -1
  2. hcpdiff/ckpt_manager/format/__init__.py +2 -2
  3. hcpdiff/ckpt_manager/format/diffusers.py +19 -4
  4. hcpdiff/ckpt_manager/format/emb.py +8 -3
  5. hcpdiff/ckpt_manager/format/lora_webui.py +1 -1
  6. hcpdiff/ckpt_manager/format/sd_single.py +28 -5
  7. hcpdiff/data/cache/vae.py +10 -2
  8. hcpdiff/data/handler/text.py +15 -14
  9. hcpdiff/diffusion/sampler/__init__.py +2 -1
  10. hcpdiff/diffusion/sampler/base.py +17 -6
  11. hcpdiff/diffusion/sampler/diffusers.py +4 -3
  12. hcpdiff/diffusion/sampler/sigma_scheduler/base.py +5 -14
  13. hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py +7 -6
  14. hcpdiff/diffusion/sampler/sigma_scheduler/edm.py +4 -4
  15. hcpdiff/diffusion/sampler/sigma_scheduler/flow.py +3 -3
  16. hcpdiff/diffusion/sampler/timer/__init__.py +2 -0
  17. hcpdiff/diffusion/sampler/timer/base.py +26 -0
  18. hcpdiff/diffusion/sampler/timer/shift.py +49 -0
  19. hcpdiff/easy/__init__.py +2 -1
  20. hcpdiff/easy/cfg/sd15_train.py +1 -3
  21. hcpdiff/easy/model/__init__.py +1 -1
  22. hcpdiff/easy/model/loader.py +33 -11
  23. hcpdiff/easy/sampler.py +8 -1
  24. hcpdiff/loss/__init__.py +4 -3
  25. hcpdiff/loss/charbonnier.py +17 -0
  26. hcpdiff/loss/vlb.py +2 -2
  27. hcpdiff/loss/weighting.py +29 -11
  28. hcpdiff/models/__init__.py +1 -1
  29. hcpdiff/models/cfg_context.py +5 -3
  30. hcpdiff/models/compose/__init__.py +2 -1
  31. hcpdiff/models/compose/compose_hook.py +69 -67
  32. hcpdiff/models/compose/compose_textencoder.py +59 -45
  33. hcpdiff/models/compose/compose_tokenizer.py +48 -11
  34. hcpdiff/models/compose/flux.py +75 -0
  35. hcpdiff/models/compose/sdxl.py +86 -0
  36. hcpdiff/models/text_emb_ex.py +13 -9
  37. hcpdiff/models/textencoder_ex.py +8 -38
  38. hcpdiff/models/wrapper/__init__.py +2 -1
  39. hcpdiff/models/wrapper/flux.py +75 -0
  40. hcpdiff/models/wrapper/pixart.py +13 -1
  41. hcpdiff/models/wrapper/sd.py +17 -8
  42. hcpdiff/parser/embpt.py +7 -7
  43. hcpdiff/utils/net_utils.py +22 -12
  44. hcpdiff/workflow/__init__.py +1 -1
  45. hcpdiff/workflow/diffusion.py +145 -18
  46. hcpdiff/workflow/text.py +49 -18
  47. hcpdiff/workflow/vae.py +10 -2
  48. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/METADATA +1 -1
  49. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/RECORD +53 -49
  50. hcpdiff/models/compose/sdxl_composer.py +0 -39
  51. hcpdiff/utils/inpaint_pipe.py +0 -790
  52. hcpdiff/utils/pipe_hook.py +0 -656
  53. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/WHEEL +0 -0
  54. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/entry_points.txt +0 -0
  55. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/licenses/LICENSE +0 -0
  56. {hcpdiff-2.3.1.dist-info → hcpdiff-2.4.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,9 @@ from hcpdiff.utils import prepare_seed
8
8
  from hcpdiff.utils.net_utils import get_dtype, to_cuda
9
9
  from rainbowneko.infer import BasicAction, Actions
10
10
  from torch.cuda.amp import autocast
11
+ from einops import rearrange, repeat
12
+ from hcpdiff.models.compose import SDXLTextEncoder
13
+ from diffusers import FluxTransformer2DModel, PixArtTransformer2DModel
11
14
 
12
15
  try:
13
16
  from diffusers.utils import randn_tensor
@@ -91,14 +94,15 @@ class MakeTimestepsAction(BasicAction):
91
94
  return {'timesteps':timesteps}
92
95
 
93
96
  class MakeLatentAction(BasicAction):
94
- def __init__(self, N_ch=4, height=None, width=None, key_map_in=None, key_map_out=None):
97
+ def __init__(self, N_ch=4, height=None, width=None, patch_size=1, key_map_in=None, key_map_out=None):
95
98
  super().__init__(key_map_in, key_map_out)
96
99
  self.N_ch = N_ch
97
100
  self.height = height
98
101
  self.width = width
102
+ self.patch_size = patch_size
99
103
 
100
104
  def forward(self, noise_sampler:BaseSampler, vae, generator, device, dtype, bs=None, latents=None, start_timestep=None,
101
- pooled_output=None, crop_coord=None, **states):
105
+ pooler_output=None, crop_coord=None, **states):
102
106
  if bs is None:
103
107
  if 'prompt' in states:
104
108
  bs = len(states['prompt'])
@@ -121,34 +125,38 @@ class MakeLatentAction(BasicAction):
121
125
  # scale the initial noise by the standard deviation required by the noise_sampler
122
126
  noise_sampler.generator = generator
123
127
  latents = noise_sampler.init_noise(shape, device=device, dtype=get_dtype(dtype))
128
+ if self.patch_size>1:
129
+ latents = rearrange(latents, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=self.patch_size, pw=self.patch_size)
124
130
  else:
125
131
  # image to image
126
132
  latents = latents.to(device)
133
+ if self.patch_size>1:
134
+ latents = rearrange(latents, "b c (h ph) (w pw) -> b (c ph pw) h w", ph=self.patch_size, pw=self.patch_size)
127
135
  latents, noise = noise_sampler.add_noise(latents, start_timestep)
128
136
 
129
- output = {'latents':latents}
137
+ output = {'latents':latents, 'latent_w':shape[3], 'latent_h':shape[2], 'patch_size':self.patch_size}
130
138
 
131
139
  # SDXL inputs
132
- if pooled_output is not None:
140
+ if pooler_output is not None:
133
141
  width, height = shape[3]*vae_scale_factor, shape[2]*vae_scale_factor
134
142
  if crop_coord is None:
135
143
  crop_info = torch.tensor([height, width, 0, 0, height, width], dtype=torch.float)
136
144
  else:
137
145
  crop_info = torch.tensor([height, width, *crop_coord], dtype=torch.float)
138
146
  crop_info = crop_info.to(device).repeat(bs, 1)
139
- output['text_embeds'] = pooled_output[-1].to(device)
147
+ output['pooler_output'] = pooler_output.to(device)
140
148
 
141
149
  if 'negative_prompt' in states:
142
150
  output['crop_info'] = torch.cat([crop_info, crop_info], dim=0)
143
151
 
144
152
  return output
145
153
 
146
- class DenoiseAction(BasicAction):
154
+ class SD15DenoiseAction(BasicAction):
147
155
  def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
148
156
  super().__init__(key_map_in, key_map_out)
149
157
  self.guidance_scale = guidance_scale
150
158
 
151
- def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, text_embeds=None, encoder_attention_mask=None, crop_info=None,
159
+ def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, encoder_attention_mask=None,
152
160
  cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
153
161
 
154
162
  if model_offload:
@@ -159,19 +167,123 @@ class DenoiseAction(BasicAction):
159
167
  latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
160
168
  t_in = noise_sampler.sigma_scheduler.c_noise(t)
161
169
 
162
- if text_embeds is None:
163
- noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
164
- cross_attention_kwargs=cross_attention_kwargs, ).sample
165
- else:
166
- added_cond_kwargs = {"text_embeds":text_embeds, "time_ids":crop_info}
167
- # predict the noise residual
168
- noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
169
- cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
170
+ noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
171
+ cross_attention_kwargs=cross_attention_kwargs, ).sample
172
+ # perform guidance
173
+ if self.guidance_scale>1:
174
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
175
+ noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
176
+
177
+ return {'noise_pred':noise_pred}
178
+
179
+ class SDXLDenoiseAction(BasicAction):
180
+ def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
181
+ super().__init__(key_map_in, key_map_out)
182
+ self.guidance_scale = guidance_scale
183
+
184
+ def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, pooler_output=None, encoder_attention_mask=None,
185
+ crop_info=None, cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
186
+
187
+ if model_offload:
188
+ to_cuda(denoiser) # to_cpu in VAE
189
+
190
+ with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
191
+ latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
192
+ latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
193
+ t_in = noise_sampler.sigma_scheduler.c_noise(t)
194
+
195
+ added_cond_kwargs = {"text_embeds":pooler_output, "time_ids":crop_info}
196
+ # predict the noise residual
197
+ noise_pred = denoiser(latent_model_input, t_in, prompt_embeds, encoder_attention_mask=encoder_attention_mask,
198
+ cross_attention_kwargs=cross_attention_kwargs, added_cond_kwargs=added_cond_kwargs).sample
199
+
200
+ # perform guidance
201
+ if self.guidance_scale>1:
202
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
203
+ noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
204
+
205
+ return {'noise_pred':noise_pred}
206
+
207
+ class PixartDenoiseAction(BasicAction):
208
+ def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
209
+ super().__init__(key_map_in, key_map_out)
210
+ self.guidance_scale = guidance_scale
211
+
212
+ def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, encoder_attention_mask=None,
213
+ cross_attention_kwargs=None, dtype='fp32', amp=None, model_offload=False, **states):
214
+
215
+ if model_offload:
216
+ to_cuda(denoiser) # to_cpu in VAE
217
+
218
+ with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
219
+ latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
220
+ latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
221
+ t_in = noise_sampler.sigma_scheduler.c_noise(t)
170
222
 
223
+ if t_in.dim() == 0:
224
+ t_in = t_in.unsqueeze(0).expand(latent_model_input.shape[0])
225
+
226
+ noise_pred = denoiser(latent_model_input, prompt_embeds, t_in, encoder_attention_mask=encoder_attention_mask,
227
+ cross_attention_kwargs=cross_attention_kwargs, ).sample
171
228
  # perform guidance
172
229
  if self.guidance_scale>1:
173
230
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
174
231
  noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
232
+
233
+ # remove vars from DiT
234
+ noise_pred, _ = noise_pred.chunk(2, dim=1)
235
+
236
+ return {'noise_pred':noise_pred}
237
+
238
+ class FluxDenoiseAction(BasicAction):
239
+ def __init__(self, guidance_scale: float = 7.0, true_cfg=False, key_map_in=None, key_map_out=None):
240
+ super().__init__(key_map_in, key_map_out)
241
+ self.guidance_scale = guidance_scale
242
+ self.true_cfg = true_cfg
243
+
244
+ def forward(self, denoiser, noise_sampler: BaseSampler, t, latents, prompt_embeds, pooler_output=None, encoder_attention_mask=None,
245
+ latent_w=None, latent_h=None, dtype='fp32', amp=None, model_offload=False, **states):
246
+
247
+ if model_offload:
248
+ to_cuda(denoiser) # to_cpu in VAE
249
+
250
+ with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
251
+ if self.true_cfg:
252
+ latent_model_input = torch.cat([latents]*2) if self.guidance_scale>1 else latents
253
+ latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
254
+ t_in = noise_sampler.sigma_scheduler.c_noise(t)
255
+ latent_model_input = rearrange(latent_model_input, "b c h w -> b (h w) c")
256
+
257
+ img_ids = torch.zeros(latent_h, latent_w, 3)
258
+ img_ids[..., 1] = img_ids[..., 1]+torch.arange(latent_h)[:, None]
259
+ img_ids[..., 2] = img_ids[..., 2]+torch.arange(latent_w)[None, :]
260
+ img_ids = repeat(img_ids, "h w c -> b (h w) c", b=latent_model_input.shape[0])
261
+
262
+ txt_ids = torch.zeros(prompt_embeds.shape[0], prompt_embeds.shape[1], 3)
263
+
264
+ # predict the noise residual
265
+ noise_pred = denoiser(latent_model_input, t_in, 1.0, pooler_output, prompt_embeds, txt_ids, img_ids).sample
266
+
267
+ # perform guidance
268
+ if self.guidance_scale>1:
269
+ noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
270
+ noise_pred = noise_pred_uncond+self.guidance_scale*(noise_pred_text-noise_pred_uncond)
271
+ else:
272
+ latent_model_input = latents
273
+ latent_model_input = noise_sampler.sigma_scheduler.c_in(t)*latent_model_input
274
+ t_in = noise_sampler.sigma_scheduler.c_noise(t)
275
+ latent_model_input = rearrange(latent_model_input, "b c h w -> b (h w) c")
276
+
277
+ img_ids = torch.zeros(latent_h, latent_w, 3)
278
+ img_ids[..., 1] = img_ids[..., 1]+torch.arange(latent_h)[:, None]
279
+ img_ids[..., 2] = img_ids[..., 2]+torch.arange(latent_w)[None, :]
280
+ img_ids = repeat(img_ids, "h w c -> b (h w) c", b=latent_model_input.shape[0])
281
+
282
+ txt_ids = torch.zeros(latent_model_input.shape[0], prompt_embeds.shape[1], 3)
283
+
284
+ # predict the noise residual
285
+ noise_pred = denoiser(latent_model_input, t_in, self.guidance_scale, pooler_output, prompt_embeds, txt_ids, img_ids).sample
286
+ noise_pred = rearrange(noise_pred, "b (h w) c -> b c h w", h=latent_h, w=latent_w)
175
287
 
176
288
  return {'noise_pred':noise_pred}
177
289
 
@@ -182,12 +294,27 @@ class SampleAction(BasicAction):
182
294
  return {'latents':latents}
183
295
 
184
296
  class DiffusionStepAction(BasicAction):
185
- def __init__(self, guidance_scale: float = 7.0, key_map_in=None, key_map_out=None):
297
+ def __init__(self, guidance_scale: float = 7.0, denoise_action:str|BasicAction='auto', true_cfg=False, key_map_in=None, key_map_out=None):
186
298
  super().__init__(key_map_in, key_map_out)
187
- self.act_noise_pred = DenoiseAction(guidance_scale)
299
+ if callable(denoise_action):
300
+ self.act_noise_pred = denoise_action(guidance_scale)
301
+ else:
302
+ self.act_noise_pred = None
303
+ self.true_cfg = true_cfg
304
+ self.guidance_scale = guidance_scale
188
305
  self.act_sample = SampleAction()
189
306
 
190
- def forward(self, denoiser, noise_sampler, **states):
307
+ def forward(self, denoiser, noise_sampler, TE, **states):
308
+ if self.act_noise_pred is None:
309
+ if isinstance(denoiser, FluxTransformer2DModel):
310
+ self.act_noise_pred = FluxDenoiseAction(guidance_scale=self.guidance_scale, true_cfg=self.true_cfg)
311
+ elif isinstance(TE, SDXLTextEncoder):
312
+ self.act_noise_pred = SDXLDenoiseAction(guidance_scale=self.guidance_scale)
313
+ elif isinstance(denoiser, PixArtTransformer2DModel):
314
+ self.act_noise_pred = PixartDenoiseAction(guidance_scale=self.guidance_scale)
315
+ else:
316
+ self.act_noise_pred = SD15DenoiseAction(guidance_scale=self.guidance_scale)
317
+
191
318
  states = self.act_noise_pred(denoiser=denoiser, noise_sampler=noise_sampler, **states)
192
319
  states = self.act_sample(**states)
193
320
  return states
hcpdiff/workflow/text.py CHANGED
@@ -2,7 +2,7 @@ from typing import List, Union
2
2
 
3
3
  import torch
4
4
  from hcpdiff.models import TokenizerHook
5
- from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook
5
+ from hcpdiff.models.compose import ComposeTEEXHook, ComposeEmbPTHook, ComposeTokenizer
6
6
  from hcpdiff.utils import pad_attn_bias
7
7
  from hcpdiff.utils.net_utils import get_dtype, to_cpu, to_cuda
8
8
  from rainbowneko.infer import BasicAction
@@ -42,13 +42,30 @@ class TextEncodeAction(BasicAction):
42
42
  super().__init__(key_map_in, key_map_out)
43
43
  if isinstance(prompt, str) and bs is not None:
44
44
  prompt = [prompt]*bs
45
- negative_prompt = [negative_prompt]*bs
45
+ if negative_prompt is not None:
46
+ negative_prompt = [negative_prompt]*bs
46
47
 
47
48
  self.prompt = prompt
48
49
  self.negative_prompt = negative_prompt
49
50
  self.bs = bs
50
51
 
51
- def forward(self, te_hook, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
52
+ def encode_prompt_to_emb(self, tokenizer, TE, te_hook, prompt, device):
53
+ token_info = ComposeTokenizer.tokenize_ex(tokenizer, prompt, truncation=True, padding="max_length",
54
+ return_tensors="pt", device=device)
55
+ if te_hook.use_attention_mask:
56
+ attention_mask = token_info.get('attention_mask', None)
57
+ else:
58
+ attention_mask = None
59
+ token_info['attention_mask'] = attention_mask
60
+ input_ids = token_info.pop('input_ids') # for TEEXHook
61
+ prompt_embeds, pooler_output = TE(
62
+ input_ids,
63
+ **token_info,
64
+ output_hidden_states=True,
65
+ )
66
+ return prompt_embeds, pooler_output, attention_mask
67
+
68
+ def forward(self, te_hook, tokenizer, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
52
69
  prompt = prompt or self.prompt
53
70
  negative_prompt = negative_prompt or self.negative_prompt
54
71
 
@@ -56,7 +73,10 @@ class TextEncodeAction(BasicAction):
56
73
  to_cuda(TE)
57
74
 
58
75
  with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
59
- emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(negative_prompt+prompt)
76
+ if negative_prompt is None:
77
+ emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, prompt, device)
78
+ else:
79
+ emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, negative_prompt+prompt, device)
60
80
  if attention_mask is not None:
61
81
  emb, attention_mask = pad_attn_bias(emb, attention_mask)
62
82
 
@@ -64,12 +84,12 @@ class TextEncodeAction(BasicAction):
64
84
  to_cpu(TE)
65
85
 
66
86
  if not isinstance(te_hook, ComposeTEEXHook):
67
- pooled_output = None
87
+ pooler_output = None
68
88
  return {'prompt':prompt, 'negative_prompt':negative_prompt, 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask,
69
- 'pooled_output':pooled_output}
89
+ 'pooler_output':pooler_output}
70
90
 
71
91
  class AttnMultTextEncodeAction(TextEncodeAction):
72
- def forward(self, te_hook, token_ex, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
92
+ def forward(self, te_hook, tokenizer, token_ex, TE, dtype: str, device, amp=None, prompt=None, negative_prompt=None, model_offload=False, **states):
73
93
  prompt = prompt or self.prompt
74
94
  negative_prompt = negative_prompt or self.negative_prompt
75
95
 
@@ -81,18 +101,29 @@ class AttnMultTextEncodeAction(TextEncodeAction):
81
101
  if model_offload:
82
102
  to_cuda(TE)
83
103
 
84
- mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
85
- mult_n, clean_text_n = token_ex.parse_attn_mult(negative_prompt)
86
- with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
87
- emb, pooled_output, attention_mask = te_hook.encode_prompt_to_emb(clean_text_n+clean_text_p)
88
- if attention_mask is not None:
89
- emb, attention_mask = pad_attn_bias(emb, attention_mask)
90
- emb_n, emb_p = emb.chunk(2)
91
- emb_p = te_hook.mult_attn(emb_p, mult_p)
92
- emb_n = te_hook.mult_attn(emb_n, mult_n)
104
+ if negative_prompt is None:
105
+ mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
106
+ with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
107
+ emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, clean_text_p, device)
108
+ if attention_mask is not None:
109
+ emb, attention_mask = pad_attn_bias(emb, attention_mask)
110
+ emb = te_hook.mult_attn(emb, mult_p)
111
+ else:
112
+ mult_p, clean_text_p = token_ex.parse_attn_mult(prompt)
113
+ mult_n, clean_text_n = token_ex.parse_attn_mult(negative_prompt)
114
+ with autocast(enabled=amp is not None, dtype=get_dtype(amp)):
115
+ emb, pooler_output, attention_mask = self.encode_prompt_to_emb(tokenizer, TE, te_hook, clean_text_n+clean_text_p, device)
116
+ if attention_mask is not None:
117
+ emb, attention_mask = pad_attn_bias(emb, attention_mask)
118
+ emb_n, emb_p = emb.chunk(2)
119
+ emb_p = te_hook.mult_attn(emb_p, mult_p)
120
+ emb_n = te_hook.mult_attn(emb_n, mult_n)
93
121
 
94
122
  if model_offload:
95
123
  to_cpu(TE)
96
124
 
97
- return {'prompt':list(clean_text_p), 'negative_prompt':list(clean_text_n), 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
98
- 'encoder_attention_mask':attention_mask, 'pooled_output':pooled_output}
125
+ if negative_prompt is None:
126
+ return {'prompt':list(clean_text_p), 'prompt_embeds':emb, 'encoder_attention_mask':attention_mask, 'pooler_output':pooler_output}
127
+ else:
128
+ return {'prompt':list(clean_text_p), 'negative_prompt':list(clean_text_n), 'prompt_embeds':torch.cat([emb_n, emb_p], dim=0),
129
+ 'encoder_attention_mask':attention_mask, 'pooler_output':pooler_output}
hcpdiff/workflow/vae.py CHANGED
@@ -41,7 +41,11 @@ class EncodeAction(BasicAction):
41
41
  else:
42
42
  init_latents = vae.encode(image).latent_dist.sample(generator)
43
43
 
44
- init_latents = vae.config.scaling_factor*init_latents.to(dtype=get_dtype(dtype))
44
+ init_latents = init_latents.to(dtype=get_dtype(dtype))
45
+ if shift_factor := getattr(vae.config, 'shift_factor', None) is not None:
46
+ init_latents = (init_latents-shift_factor)*vae.config.scaling_factor
47
+ else:
48
+ init_latents = init_latents*vae.config.scaling_factor
45
49
  if model_offload:
46
50
  to_cpu(vae)
47
51
  return {'latents':init_latents}
@@ -63,7 +67,11 @@ class DecodeAction(BasicAction):
63
67
  torch.cuda.synchronize()
64
68
  to_cuda(vae)
65
69
  latents = latents.to(dtype=vae.dtype)
66
- image = vae.decode(latents/vae.config.scaling_factor, return_dict=False)[0]
70
+ if shift_factor := getattr(vae.config, 'shift_factor', None) is not None:
71
+ latents = latents/vae.config.scaling_factor + shift_factor
72
+ else:
73
+ latents = latents/vae.config.scaling_factor
74
+ image = vae.decode(latents, return_dict=False)[0]
67
75
  if model_offload:
68
76
  to_cpu(vae)
69
77
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hcpdiff
3
- Version: 2.3.1
3
+ Version: 2.4
4
4
  Summary: A universal Diffusion toolbox
5
5
  Home-page: https://github.com/IrisRainbowNeko/HCP-Diffusion
6
6
  Author: Ziyi Dong
@@ -3,22 +3,22 @@ hcpdiff/train_colo.py,sha256=EsuNSzLBvGTZWU_LEk0JpP-F5eNW0lwkawIRAX38jmE,9250
3
3
  hcpdiff/trainer_ac.py,sha256=-owV-3_bvPxuQsZS2WaajBDh58HpftRtnx0GJkswqaY,2787
4
4
  hcpdiff/trainer_ac_single.py,sha256=zyZVrutLUbIJYW1HzUnQ_RnmIcDhbC7M_CT833PJH5w,993
5
5
  hcpdiff/trainer_deepspeed.py,sha256=7lGsiAstWuIlmhRMwWTcJCkoxzUaakVxBngKDnJdSJk,1947
6
- hcpdiff/ckpt_manager/__init__.py,sha256=r_sgjZWCLtdJrRkqqU6aPdfubXSYfPh2Z_Vf_XpZXXs,240
6
+ hcpdiff/ckpt_manager/__init__.py,sha256=OCotG2Q4q3n4ZpN6UfY-j-Lg4VOAvtLnb0xygkkqZm8,280
7
7
  hcpdiff/ckpt_manager/ckpt.py,sha256=2A093lT03M1ZsJIMWl376V165eh0TZwOgiGrz3LM73Q,1248
8
8
  hcpdiff/ckpt_manager/loader.py,sha256=6iZDUj-Vfc5T9eGdWfFMQw4n1GqyLqaLBolgAtgqPq8,3640
9
- hcpdiff/ckpt_manager/format/__init__.py,sha256=a3cdKkOTDgdVbDQwSC4mlxOigjX2hBvRb5_X7E3TQWs,237
10
- hcpdiff/ckpt_manager/format/diffusers.py,sha256=qhGbrKAaeLyjFzY-Lj4sL1THHFNrta41JGGMoXT-bCE,3761
11
- hcpdiff/ckpt_manager/format/emb.py,sha256=FrqfTfJ8H7f0Zw17NTWCP2AJtpsJI5oXR5IAd4NekhU,680
12
- hcpdiff/ckpt_manager/format/lora_webui.py,sha256=4y_T9RdmFTxWzsXd8guNjCiukmyILa5j4MPrhVIL4Qk,10017
13
- hcpdiff/ckpt_manager/format/sd_single.py,sha256=4DZLAl1RNC_nPxuW-lmrBlIMFUhpSTa7HGHgu7Yx8qk,2322
9
+ hcpdiff/ckpt_manager/format/__init__.py,sha256=9d9NOY1XRDGlebOiQpOnpQw14uPHZyL3-6Cvkc8hYZY,277
10
+ hcpdiff/ckpt_manager/format/diffusers.py,sha256=VKXI2i4whrIaHMOFMzA8UqB5ytXOv6WMgEyU9EkDW5Y,5008
11
+ hcpdiff/ckpt_manager/format/emb.py,sha256=jC-PR47y-TLn4xMkrujlXluGiW-tcUIhQjKZ_G33gUc,899
12
+ hcpdiff/ckpt_manager/format/lora_webui.py,sha256=9pBesgzifInW8YnzdzmEbyrClyHt-zmkzla7kK4YrBg,10015
13
+ hcpdiff/ckpt_manager/format/sd_single.py,sha256=gqN0NqrsxaYMAspdl48KEWtzGz_yxbxxfC6LJapbHfs,3598
14
14
  hcpdiff/data/__init__.py,sha256=ZFKtanOoMo3G3eKUJPhysnHXnr8BNARERkcMB6B897U,292
15
15
  hcpdiff/data/dataset.py,sha256=1k4GldW13eVyqK_9hrQniqr3_XYAapnWF7iXl_1GXGg,877
16
16
  hcpdiff/data/cache/__init__.py,sha256=ToCmokYH6DghlSwm7HJFirPRIWJ0LkgzqVOYlgoAkQw,25
17
- hcpdiff/data/cache/vae.py,sha256=gB89zs4CdNlvukDXhVYU9QZrY6VTFUWfzjeF2psNQ50,4070
17
+ hcpdiff/data/cache/vae.py,sha256=_Vkx4AXB17hgs5Rgy9NbytMbcNu0pHMFH5mreeNjKxk,4572
18
18
  hcpdiff/data/handler/__init__.py,sha256=G8ZTQF91ilkTRmUoWdmAissTSZ7fvNUpm_hBYmXKTtk,258
19
19
  hcpdiff/data/handler/controlnet.py,sha256=bRDMD9BP8-VaG5VrxzvcFKfkqeTbChNfrJSZ3vXbQgY,658
20
20
  hcpdiff/data/handler/diffusion.py,sha256=S-_7o5Z1tm6LmRZVZs21rbJC7iUoq0tHOsSjKK6geVk,4156
21
- hcpdiff/data/handler/text.py,sha256=gOzqB2oEkEUbiuy0kZWduo0c-w4Buu60KI6q6Nyl3aM,4208
21
+ hcpdiff/data/handler/text.py,sha256=I_dqFf1gKwRsNdBCv8dvAR16Do8HwOgwVLt4iH8n0bo,4331
22
22
  hcpdiff/data/source/__init__.py,sha256=265M8qfWNUE4SKX0pdXhLYjCnCuae5YE4bfZpO-ydXc,187
23
23
  hcpdiff/data/source/folder_class.py,sha256=bs4qPMTzwcnT6ZFlT3tpi9sclsRF9a2MBA1pQD-9EYs,961
24
24
  hcpdiff/data/source/text.py,sha256=VgI5Ouq986Yy1jwD2fZ9iBlsRciPCeARZmOPEZIcaQY,1468
@@ -28,57 +28,63 @@ hcpdiff/diffusion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
28
28
  hcpdiff/diffusion/noise/__init__.py,sha256=D83EZ6bnc6Ucu4AZwE6rpmXCtwYfHHumeVq97brbnIE,47
29
29
  hcpdiff/diffusion/noise/pyramid_noise.py,sha256=KbpyMT1BHNIaAa7g5eECDkTttOMoMWVFmbP-ekBsuEY,1693
30
30
  hcpdiff/diffusion/sampler/VP.py,sha256=r0Q_RROEIeNNw93XrOD5htW78rfuoSxy1WBQEoQL83s,958
31
- hcpdiff/diffusion/sampler/__init__.py,sha256=Lrwg1us8qo943T7mdIXFDRXfKvnLhrzwmi6DrIKIiUA,135
32
- hcpdiff/diffusion/sampler/base.py,sha256=UbE_AmtvLg-Hr2bkYz8PvNWB63tvtacUvCIDm_W6opA,5484
33
- hcpdiff/diffusion/sampler/diffusers.py,sha256=wIMs8n3kdci1On0FUCV0si324ZE9zeRw_CxaHP8rdcs,2586
31
+ hcpdiff/diffusion/sampler/__init__.py,sha256=8UmSOxcFzdYnO9hkMzWx4V09ysx5RKDnf2sVYlZEM9w,156
32
+ hcpdiff/diffusion/sampler/base.py,sha256=HhscR81Qq_m4yfnhGEDPkvxb6tJozlbT8YuTxku0kZQ,5891
33
+ hcpdiff/diffusion/sampler/diffusers.py,sha256=-A2vbKM-CjbE4CS77jroeNhaSm8LqNsWdvYUPoDZzTM,2640
34
34
  hcpdiff/diffusion/sampler/sigma_scheduler/__init__.py,sha256=eiSmMBkXI_LfxnNrXj5XptcF0dGcPas--vWvqhFGlv8,273
35
- hcpdiff/diffusion/sampler/sigma_scheduler/base.py,sha256=UT4tbjFf80KYfU08y0hJf8h_Cl80a5MUhK5FsKLsqbY,2521
36
- hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py,sha256=SA8lXT6lucJot_rpJ84Wz-_uc5dXfb2QPoQgJHSKOj4,12999
37
- hcpdiff/diffusion/sampler/sigma_scheduler/edm.py,sha256=m1YlIyn61zfDjxLMcHvWs0nzULbHgXeB7WGKmTiaGSU,4127
38
- hcpdiff/diffusion/sampler/sigma_scheduler/flow.py,sha256=FtWpesUtSmFuiIGkrrVhYJweB7INZiw0atC64tc0Nk4,2020
35
+ hcpdiff/diffusion/sampler/sigma_scheduler/base.py,sha256=_cAfak2M3CMdH4EK6AYKPbKA3ccRTDyNJw9NjfXBEOM,2210
36
+ hcpdiff/diffusion/sampler/sigma_scheduler/ddpm.py,sha256=PBh2LFkRF5mrmedHvEHh8vATnIMU2gIoi3IlUEheaLY,13045
37
+ hcpdiff/diffusion/sampler/sigma_scheduler/edm.py,sha256=cQuNj8jk4r3TDEN86i_tLYHypO3QFUPpVZMaAGVr3dA,4131
38
+ hcpdiff/diffusion/sampler/sigma_scheduler/flow.py,sha256=1edTaTiM02ogV1lAbnI4e2gc3QUaNgl8RWwYQm6vf2s,2023
39
39
  hcpdiff/diffusion/sampler/sigma_scheduler/zero_terminal.py,sha256=CCqQLkGo4omkxzFovYdZQzdZVwIxK3PiOitZFww8MHs,859
40
- hcpdiff/easy/__init__.py,sha256=-emoyCOZlLCu3KNMI8L4qapUEtEYFSoiGU6-rKv1at4,149
41
- hcpdiff/easy/sampler.py,sha256=dQSBkeGh71O0DAmZLhTHTbk1bY7XzyUCeW1oJO14A4I,1250
40
+ hcpdiff/diffusion/sampler/timer/__init__.py,sha256=QNmmF6dwMWQM16mRYXDq0FY7h5RiQ_tnM76PMnZnH4E,108
41
+ hcpdiff/diffusion/sampler/timer/base.py,sha256=CgJ4aqG9jzHrlRzqQRJUOq95Cait_mXYICCIHA_OO64,872
42
+ hcpdiff/diffusion/sampler/timer/shift.py,sha256=v2nHj6Z5bfzzVH76ypfEgK1EDDf0Lp9lfJSRCadSuwU,1735
43
+ hcpdiff/easy/__init__.py,sha256=JT-dbN4e3iG3zHT2p9_TaesTPDwvDuj9PNFaPEhjxjU,208
44
+ hcpdiff/easy/sampler.py,sha256=5O01VRz-bYJfzBIqRsD3vxE3AbVwh3zKzjjFXzRX9-E,1438
42
45
  hcpdiff/easy/cfg/__init__.py,sha256=SxHMWG6T2CXhX3dP0xizSMd9vFWPaZQDc4Gj4CF__yQ,253
43
- hcpdiff/easy/cfg/sd15_train.py,sha256=NtgsQLg1sd5JFmHU4nqMPOrvP7zmwo2x0MCspjVNQEY,7000
46
+ hcpdiff/easy/cfg/sd15_train.py,sha256=L9bNWM87T1DIZpWetwK0gwPIWL39JoNOovDCxb7cDiw,6967
44
47
  hcpdiff/easy/cfg/sdxl_train.py,sha256=rVLLKVMKB_PHuum3dKQcBqL0uR8QhzmdRllM-pYnbK4,4534
45
48
  hcpdiff/easy/cfg/t2i.py,sha256=SnjFjZAKd9orjJr3RW5_N2_EIlW2Ree7JMvdNUAR9gc,9507
46
- hcpdiff/easy/model/__init__.py,sha256=CA-7r3R2Jgweekk1XNByFYttLolbWyUV2bCnXygcD8w,133
49
+ hcpdiff/easy/model/__init__.py,sha256=UukTqyidx-W2n2eiG4mUQBa0Sziv7gYmuBPx1twvE90,170
47
50
  hcpdiff/easy/model/cnet.py,sha256=m0NTH9V1kLzb5GybwBrSNT0KvTcRpPfGkzUeMz9jZZQ,1084
48
- hcpdiff/easy/model/loader.py,sha256=Tdx-lhQEYf2NYjVM1A5B8x6ZZpJKcXUkFIPIbr7h7XM,3456
51
+ hcpdiff/easy/model/loader.py,sha256=nm06hPqtQuIwpa6GnygG1CBgmywhmB5mcACRcDPa51U,4355
49
52
  hcpdiff/evaluate/__init__.py,sha256=qWxV0D8Ho5uBj2YbaC_QFDnT49PSKPfh44m4ivkNbMM,108
50
53
  hcpdiff/evaluate/evaluator.py,sha256=9BZQBeC-N7p-ICx6Giw9v-2Tb9volMTDmeDfhj0nXJ0,2940
51
54
  hcpdiff/evaluate/previewer.py,sha256=-vE0YXVfos70CQMo9ZInw7xu3d88DlTfVLs4BzzkxfM,3140
52
55
  hcpdiff/evaluate/metrics/__init__.py,sha256=vE0nSvBtDBu9SomANvWcm2UHX56PhCYwhgrcmm_mKyo,39
53
56
  hcpdiff/evaluate/metrics/clip_score.py,sha256=rQgweu5QcqW3fPI3EXcNbrH2QCcSAekE3lpYk45P2M4,900
54
- hcpdiff/loss/__init__.py,sha256=2dwPczSiv3rB5fzOeYbl5ZHpMU-qXOQlXeOiXdxcxwM,173
57
+ hcpdiff/loss/__init__.py,sha256=wlWpg4a2ev7JXsv52MwJmWCduvq011IoSTmABJ_XM2M,230
55
58
  hcpdiff/loss/base.py,sha256=Vvpm-KZGH4n-gYIlnVAtPl1B799c7v0dJXJ5BBh3yO0,1112
59
+ hcpdiff/loss/charbonnier.py,sha256=Qvjj9bznJEBn_NFPbPsVf9jzawOcfVvien54bMS2UGc,461
56
60
  hcpdiff/loss/gw.py,sha256=0yi1kozuII3xZA6FnjOhINtvScWt1MyBZLBtMKmgojM,1224
57
61
  hcpdiff/loss/ssim.py,sha256=YofadvBkc6sklxBUx1p3ADw5OHOZPK3kaHz8FH5a6m4,1281
58
- hcpdiff/loss/vlb.py,sha256=s78iBnXUiDWfGf7mYmhUnHqxqea5gSByKOoqBrX6bzU,3222
59
- hcpdiff/loss/weighting.py,sha256=9qzMnvCb6b5qx0p08GDSlkxmYEqQcNt79XdRBvfHmiI,2914
60
- hcpdiff/models/__init__.py,sha256=eQS7DPiGLiE1MFRkZj_17IY3IsfDUVcYpcOmhHb5B9o,472
61
- hcpdiff/models/cfg_context.py,sha256=e2B3K1KwJhzbD6xdJUOyNtl_XgQ0296XI3FHw3gvZF4,1502
62
+ hcpdiff/loss/vlb.py,sha256=NqkhzGM3g_67nmpg18I0W_KC8X6YvKSZyHfH9C14OCg,3256
63
+ hcpdiff/loss/weighting.py,sha256=qJvp895qwT6H6_x1IhxkHWMYPjqM5peiyciB634LREI,3668
64
+ hcpdiff/models/__init__.py,sha256=__LnS75jcEmJ0Y_b5N6zsDI9CMxByBNKNuPrcUXRRwo,485
65
+ hcpdiff/models/cfg_context.py,sha256=FePMdvzDfH4Xo6aYVqR2UTzU7t_MhAKhnSrGWlua1Ts,1553
62
66
  hcpdiff/models/container.py,sha256=z3p5TmQhxdzXSIfofz55_bmEhSsgUJsy1o9EcDs8Oeo,696
63
67
  hcpdiff/models/controlnet.py,sha256=VIkUzJCVpCqqQOtRSLQPfbcDy9CsXutxLeZB6PdZfA0,7809
64
68
  hcpdiff/models/lora_base.py,sha256=LGwBD9KP6qf4pgTx24i5-JLo4rDBQ6jFfterQKBjTbE,6758
65
69
  hcpdiff/models/lora_base_patch.py,sha256=Tdb_b3TN_K-04nlUvcfBh6flPcbL9M4iP7jOVyb1jXQ,7271
66
70
  hcpdiff/models/lora_layers.py,sha256=O9W_Ue71lHj7Y_GbpioF4Hc3h2-z_zOqck93VYUra6s,7777
67
71
  hcpdiff/models/lora_layers_patch.py,sha256=GYFYsJD2VSLZfdnLma9CmQEHz09HROFJcc4wc_gs9f0,8198
68
- hcpdiff/models/text_emb_ex.py,sha256=O0XZqid01OrB0dHY7hCiBvdU2026SvZ38yfQaF2TWrs,8018
69
- hcpdiff/models/textencoder_ex.py,sha256=JrTQ30Avx8tPbdr-Q6K5BvEWCEdsu8Z7eSOzMqpUuzg,8270
72
+ hcpdiff/models/text_emb_ex.py,sha256=HQAwXf-3CXH27ehOjCNRMk26Sp7hbj4rU8ewWP_GWa8,8116
73
+ hcpdiff/models/textencoder_ex.py,sha256=vQNFDmmAfXmF9cGbqMKYQhoYrrdwq39PVb0mubEkTLs,6926
70
74
  hcpdiff/models/tokenizer_ex.py,sha256=zKUn4BY7b3yXwK9PWkZtQKJPyKYwUc07E-hwB9NQybs,2446
71
- hcpdiff/models/compose/__init__.py,sha256=lTNFTGg5csqvUuys22RqgjmWlk_7Okw6ZTsnTi1pqCg,217
72
- hcpdiff/models/compose/compose_hook.py,sha256=FfDSfn5FuLFGM80HMUwiUopy1P4xDbvKSBDuA6QK2So,6112
73
- hcpdiff/models/compose/compose_textencoder.py,sha256=tiFoStKOIEH9YzsZQrLki4gra18kMy3wSzSUrVQG1sk,6607
74
- hcpdiff/models/compose/compose_tokenizer.py,sha256=g3l0pOFv6p7Iigxm6Pqt_iTUXBlO1_SWAQOt0m54IoE,3033
75
- hcpdiff/models/compose/sdxl_composer.py,sha256=NtMGaFGZTfKsPJSVi2yT-UM6K1WKWtk99XxVmTcKlk8,2164
76
- hcpdiff/models/wrapper/__init__.py,sha256=HbGQmFnfccr-dtvZKjEv-pmR4cCnF4fwGLKS3tuG_OY,135
77
- hcpdiff/models/wrapper/pixart.py,sha256=nRUvHSHn4TYg_smC0xpeW-GtUgXss-MuaVPTHpMozDE,1147
78
- hcpdiff/models/wrapper/sd.py,sha256=EywmVU2QzR74M_4eH_uXVW8HJNauyjwcZPU7rRAQ7eI,11666
75
+ hcpdiff/models/compose/__init__.py,sha256=c2Zsk8ge6T5iOOl-8HRKtDoSpdQ3fS88mDiYZC3VpHE,257
76
+ hcpdiff/models/compose/compose_hook.py,sha256=rU6PIhyEVjnDQlw5NOB5QdydNlSLe60CnilUn_NpxH4,6352
77
+ hcpdiff/models/compose/compose_textencoder.py,sha256=7rPplxzs9xXxYqpHhTp-qX30nAXtgb6ZpClFLDVouvk,7409
78
+ hcpdiff/models/compose/compose_tokenizer.py,sha256=AogUrqwmztAX21oyVfW8j5fcPPiGDZMT1_9wPQcxBMc,4572
79
+ hcpdiff/models/compose/flux.py,sha256=Fcg-zzpsWbNdOB0VsJAoicMK62l2uCWugFoq_uIxtzY,3632
80
+ hcpdiff/models/compose/sdxl.py,sha256=cgR5BplUSPPud_nG0dH6LT3SoWu9ypLVXLMgzcnvyaI,4522
81
+ hcpdiff/models/wrapper/__init__.py,sha256=ZFQ4CqJvSA_saKmI8eKgqvT6pIKtG52gonE4ZzLOkok,165
82
+ hcpdiff/models/wrapper/flux.py,sha256=Zqm-Qnz-Jrtwd1h5sPfctVIp6cnSQViFvGnSw0UOn6E,4567
83
+ hcpdiff/models/wrapper/pixart.py,sha256=zsqA3soCdGyTxAO3u9NiQdqO8rUPsrtXiAh8E-ziMd4,1861
84
+ hcpdiff/models/wrapper/sd.py,sha256=O49ziLrHeGVaVEXGcgZ4zkRNSnARiLeZWmPOW8ZzIU0,12080
79
85
  hcpdiff/models/wrapper/utils.py,sha256=NyebMoAPnrgcTHbiIocSD-eGdGdD-V1G_TQuWsRWufw,665
80
86
  hcpdiff/parser/__init__.py,sha256=-2dDZ2Ii4zoGQqDTme94q4PpJbBiV6HS5BsDASz4Xbo,33
81
- hcpdiff/parser/embpt.py,sha256=LgwZ0f0tLn3DrTo5ZpSCsZcA5330UpiW_sK96yEPmOM,1307
87
+ hcpdiff/parser/embpt.py,sha256=-AP2wn8FkxryzzMAOERAt9vdd8ZEWD1tNUYVPAghaEM,1419
82
88
  hcpdiff/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
89
  hcpdiff/tools/convert_caption_txt2json.py,sha256=tbBgIphJWvXUoXjtwsnLX2w9IZEY3jTgxbTvUMgukbM,945
84
90
  hcpdiff/tools/convert_old_lora.py,sha256=yIP9RGcyQbwT2NNAZtTLgBXs6XJOHRvoHQep0SdqDho,453
@@ -94,26 +100,24 @@ hcpdiff/tools/save_model.py,sha256=gbfYi_EfEBZEUcDjle6MDHA19sQWY0zA8_y_LMzHQ7M,4
94
100
  hcpdiff/tools/sd2diffusers.py,sha256=vB6OnBLw60sJkdpVZcYEPtKAZW1h8ErbSGSRq0uAiIk,16855
95
101
  hcpdiff/utils/__init__.py,sha256=28K9Ui0uur-vHuUdlSyIBYijgu2b7rGOPXN2ogJu1z8,82
96
102
  hcpdiff/utils/colo_utils.py,sha256=JyLUvVnISa48CnryNLrgVxMo-jxu2UhBq70eYPrkjuI,837
97
- hcpdiff/utils/inpaint_pipe.py,sha256=CRy1MUlPmHifCAbZnKOP0qbLp2grn7ZbVeaB2qIA4ig,42862
98
- hcpdiff/utils/net_utils.py,sha256=gdwLYDNKV2t3SP0jBIO3d0HtY6E7jRaf_rmPT8gKZZE,9762
99
- hcpdiff/utils/pipe_hook.py,sha256=-UDX3FtZGl-bxSk13gdbPXc1OvtbCcpk_fvKxLQo3Ag,31987
103
+ hcpdiff/utils/net_utils.py,sha256=dL3Q9I8X7xAcssQomWAvapwmTXCffWQ80vcAVUCHOt4,10211
100
104
  hcpdiff/utils/torch_utils.py,sha256=gBZCcDKZc0NGDQx6QeHuQePoZ82kQRhaL7oEdZIYGvU,573
101
105
  hcpdiff/utils/utils.py,sha256=hZnZP1IETgVpScxES0yIuRfc34TnzvAqmgOTK_56ssw,4976
102
- hcpdiff/workflow/__init__.py,sha256=i5s7QXo6wK9607KL0KTW4suE1c-HGJ5_EgnCdVLl3WM,885
103
- hcpdiff/workflow/diffusion.py,sha256=hKefBrVP6-025MhdrKOQMUhHxLaGqjpUKhR6WahYwh0,9549
106
+ hcpdiff/workflow/__init__.py,sha256=Ve_ZZZVKEplR5SDRq0yRlrT_DHSxRtpESnQSgCZ1qR0,948
107
+ hcpdiff/workflow/diffusion.py,sha256=oEnnc8tnITXXko2Fb6ahDg7FbuKa8lZr33JWecCznPM,16535
104
108
  hcpdiff/workflow/fast.py,sha256=kZt7bKrvpFInSn7GzbkTkpoCSM0Z6IbDjgaDvcbFYf8,1024
105
109
  hcpdiff/workflow/flow.py,sha256=FFbFFOAXT4c31L5bHBEB_qeVGuBQDLYhq8kTD1chGNo,2548
106
110
  hcpdiff/workflow/io.py,sha256=4oiE_PS3sOVYT8M6PDwvT5h9XzoKDMQR0n_4-Ktttys,3284
107
111
  hcpdiff/workflow/model.py,sha256=1gj5yOTefYTnGXVR6JPAfxIwuB69YwN6E-BontRcuyQ,2913
108
- hcpdiff/workflow/text.py,sha256=XQvN4zzK7VaGxy4FDgSDeWh2jjk7UZU24moeRKAWXRE,4608
112
+ hcpdiff/workflow/text.py,sha256=vz8zAA6qcYOBfvak6kM2m2KaLjiaaSPgVWhrPQtko0I,6402
109
113
  hcpdiff/workflow/utils.py,sha256=xojaMG4lHsymslc8df5uiVXmmBVWpn_Phqka8qzJEWw,2226
110
- hcpdiff/workflow/vae.py,sha256=cingDPkIOc4qGpOwwhXJK4EQbGoIxO583pm6gGov5t8,3118
114
+ hcpdiff/workflow/vae.py,sha256=LOvGFm6xt1abGOh8UezloXD1pMhYkv7S0vQUE4HojGo,3548
111
115
  hcpdiff/workflow/daam/__init__.py,sha256=ySIDaxloN-D3qM7OuVaG1BR3D-CibDoXYpoTgw0zUhU,59
112
116
  hcpdiff/workflow/daam/act.py,sha256=tHbsFWTYYU4bvcZOo1Bpi_z6ofpJatRYccl4vvf8wIA,2756
113
117
  hcpdiff/workflow/daam/hook.py,sha256=z9f9mBjKW21xuUZ-iQxQ0HbWOBXtZrisFB0VNMq6d0U,4383
114
- hcpdiff-2.3.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
115
- hcpdiff-2.3.1.dist-info/METADATA,sha256=zaJHhKQiezDTvyv-IIoRHf4VCv0z2gU9fq0sVi9XhTg,10323
116
- hcpdiff-2.3.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
117
- hcpdiff-2.3.1.dist-info/entry_points.txt,sha256=_4VRsEsEWOhHfzBDu9bx8Wh_S8Wi4ZTHpI0n6rU0J-I,258
118
- hcpdiff-2.3.1.dist-info/top_level.txt,sha256=shyf78x-HVgykYpsmY22mKG0xIc7Qk30fDMdavdYWQ8,8
119
- hcpdiff-2.3.1.dist-info/RECORD,,
118
+ hcpdiff-2.4.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
119
+ hcpdiff-2.4.dist-info/METADATA,sha256=AM8nFWpSpgB0EWNhp40nh7a2G_qHS0We8sKj_ihct7w,10321
120
+ hcpdiff-2.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
121
+ hcpdiff-2.4.dist-info/entry_points.txt,sha256=_4VRsEsEWOhHfzBDu9bx8Wh_S8Wi4ZTHpI0n6rU0J-I,258
122
+ hcpdiff-2.4.dist-info/top_level.txt,sha256=shyf78x-HVgykYpsmY22mKG0xIc7Qk30fDMdavdYWQ8,8
123
+ hcpdiff-2.4.dist-info/RECORD,,
@@ -1,39 +0,0 @@
1
- from .compose_textencoder import ComposeTextEncoder
2
- from .compose_tokenizer import ComposeTokenizer
3
- from transformers import CLIPTextModel, AutoTokenizer, CLIPTextModelWithProjection
4
- from typing import Optional, Union, Tuple
5
- import torch
6
- from transformers.modeling_outputs import BaseModelOutputWithPooling
7
-
8
- class CLIPTextModelWithProjection_Align(CLIPTextModelWithProjection):
9
- # fxxk the transformers!
10
- def forward(
11
- self,
12
- input_ids: Optional[torch.Tensor] = None,
13
- attention_mask: Optional[torch.Tensor] = None,
14
- position_ids: Optional[torch.Tensor] = None,
15
- output_attentions: Optional[bool] = None,
16
- output_hidden_states: Optional[bool] = None,
17
- return_dict: Optional[bool] = None,
18
- ) -> Union[Tuple, BaseModelOutputWithPooling]:
19
- text_outputs = super().forward(input_ids, attention_mask, position_ids, output_attentions, output_hidden_states, return_dict)
20
- return BaseModelOutputWithPooling(
21
- last_hidden_state=text_outputs.last_hidden_state,
22
- pooler_output=text_outputs.text_embeds,
23
- hidden_states=text_outputs.hidden_states,
24
- attentions=text_outputs.attentions,
25
- )
26
-
27
- class SDXLTextEncoder(ComposeTextEncoder):
28
- @classmethod
29
- def from_pretrained(cls, pretrained_model_name_or_path: str, *args, subfolder=None, revision:str=None, **kwargs):
30
- clip_L = CLIPTextModel.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder', **kwargs)
31
- clip_bigG = CLIPTextModelWithProjection_Align.from_pretrained(pretrained_model_name_or_path, subfolder='text_encoder_2', **kwargs)
32
- return cls([('clip_L', clip_L), ('clip_bigG', clip_bigG)])
33
-
34
- class SDXLTokenizer(ComposeTokenizer):
35
- @classmethod
36
- def from_pretrained(cls, pretrained_model_name_or_path: str, *args, subfolder=None, revision:str=None, **kwargs):
37
- clip_L = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder='tokenizer', **kwargs)
38
- clip_bigG = AutoTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder='tokenizer_2', **kwargs)
39
- return cls([('clip_L', clip_L), ('clip_bigG', clip_bigG)])