InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +1 -15
- invokeai/app/invocations/metadata_linked.py +0 -47
- invokeai/app/invocations/z_image_denoise.py +84 -244
- invokeai/app/services/config/config_default.py +1 -3
- invokeai/app/services/model_manager/model_manager_default.py +0 -7
- invokeai/backend/flux/denoise.py +11 -196
- invokeai/backend/model_manager/configs/lora.py +0 -36
- invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
- invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
- invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
- invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
- invokeai/backend/model_manager/starter_models.py +4 -13
- invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
- invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
- invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
- invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +5 -11
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
- invokeai/app/invocations/pbr_maps.py +0 -59
- invokeai/backend/flux/schedulers.py +0 -62
- invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
- invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
- invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
- invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
- invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
- invokeai/frontend/web/dist/locales/en-GB.json +0 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -47,7 +47,6 @@ from invokeai.backend.flux.sampling_utils import (
|
|
|
47
47
|
pack,
|
|
48
48
|
unpack,
|
|
49
49
|
)
|
|
50
|
-
from invokeai.backend.flux.schedulers import FLUX_SCHEDULER_LABELS, FLUX_SCHEDULER_MAP, FLUX_SCHEDULER_NAME_VALUES
|
|
51
50
|
from invokeai.backend.flux.text_conditioning import FluxReduxConditioning, FluxTextConditioning
|
|
52
51
|
from invokeai.backend.model_manager.taxonomy import BaseModelType, FluxVariantType, ModelFormat, ModelType
|
|
53
52
|
from invokeai.backend.patches.layer_patcher import LayerPatcher
|
|
@@ -64,7 +63,7 @@ from invokeai.backend.util.devices import TorchDevice
|
|
|
64
63
|
title="FLUX Denoise",
|
|
65
64
|
tags=["image", "flux"],
|
|
66
65
|
category="image",
|
|
67
|
-
version="4.
|
|
66
|
+
version="4.1.0",
|
|
68
67
|
)
|
|
69
68
|
class FluxDenoiseInvocation(BaseInvocation):
|
|
70
69
|
"""Run denoising process with a FLUX transformer model."""
|
|
@@ -133,12 +132,6 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
133
132
|
num_steps: int = InputField(
|
|
134
133
|
default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
|
|
135
134
|
)
|
|
136
|
-
scheduler: FLUX_SCHEDULER_NAME_VALUES = InputField(
|
|
137
|
-
default="euler",
|
|
138
|
-
description="Scheduler (sampler) for the denoising process. 'euler' is fast and standard. "
|
|
139
|
-
"'heun' is 2nd-order (better quality, 2x slower). 'lcm' is optimized for few steps.",
|
|
140
|
-
ui_choice_labels=FLUX_SCHEDULER_LABELS,
|
|
141
|
-
)
|
|
142
135
|
guidance: float = InputField(
|
|
143
136
|
default=4.0,
|
|
144
137
|
description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
|
|
@@ -249,12 +242,6 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
249
242
|
shift=not is_schnell,
|
|
250
243
|
)
|
|
251
244
|
|
|
252
|
-
# Create scheduler if not using default euler
|
|
253
|
-
scheduler = None
|
|
254
|
-
if self.scheduler in FLUX_SCHEDULER_MAP:
|
|
255
|
-
scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
|
|
256
|
-
scheduler = scheduler_class(num_train_timesteps=1000)
|
|
257
|
-
|
|
258
245
|
# Clip the timesteps schedule based on denoising_start and denoising_end.
|
|
259
246
|
timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
|
|
260
247
|
|
|
@@ -439,7 +426,6 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
439
426
|
img_cond=img_cond,
|
|
440
427
|
img_cond_seq=img_cond_seq,
|
|
441
428
|
img_cond_seq_ids=img_cond_seq_ids,
|
|
442
|
-
scheduler=scheduler,
|
|
443
429
|
)
|
|
444
430
|
|
|
445
431
|
x = unpack(x.float(), self.height, self.width)
|
|
@@ -52,7 +52,6 @@ from invokeai.app.invocations.primitives import (
|
|
|
52
52
|
)
|
|
53
53
|
from invokeai.app.invocations.scheduler import SchedulerOutput
|
|
54
54
|
from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
|
|
55
|
-
from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
|
|
56
55
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
57
56
|
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
|
|
58
57
|
from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
|
|
@@ -730,52 +729,6 @@ class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
|
|
|
730
729
|
return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
|
|
731
730
|
|
|
732
731
|
|
|
733
|
-
@invocation(
|
|
734
|
-
"z_image_denoise_meta",
|
|
735
|
-
title=f"{ZImageDenoiseInvocation.UIConfig.title} + Metadata",
|
|
736
|
-
tags=["z-image", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
|
|
737
|
-
category="latents",
|
|
738
|
-
version="1.0.0",
|
|
739
|
-
)
|
|
740
|
-
class ZImageDenoiseMetaInvocation(ZImageDenoiseInvocation, WithMetadata):
|
|
741
|
-
"""Run denoising process with a Z-Image transformer model + metadata."""
|
|
742
|
-
|
|
743
|
-
def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
|
|
744
|
-
def _loras_to_json(obj: Union[Any, list[Any]]):
|
|
745
|
-
if not isinstance(obj, list):
|
|
746
|
-
obj = [obj]
|
|
747
|
-
|
|
748
|
-
output: list[dict[str, Any]] = []
|
|
749
|
-
for item in obj:
|
|
750
|
-
output.append(
|
|
751
|
-
LoRAMetadataField(
|
|
752
|
-
model=item.lora,
|
|
753
|
-
weight=item.weight,
|
|
754
|
-
).model_dump(exclude_none=True, exclude={"id", "type", "is_intermediate", "use_cache"})
|
|
755
|
-
)
|
|
756
|
-
return output
|
|
757
|
-
|
|
758
|
-
obj = super().invoke(context)
|
|
759
|
-
|
|
760
|
-
md: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
|
|
761
|
-
md.update({"width": obj.width})
|
|
762
|
-
md.update({"height": obj.height})
|
|
763
|
-
md.update({"steps": self.steps})
|
|
764
|
-
md.update({"guidance": self.guidance_scale})
|
|
765
|
-
md.update({"denoising_start": self.denoising_start})
|
|
766
|
-
md.update({"denoising_end": self.denoising_end})
|
|
767
|
-
md.update({"scheduler": self.scheduler})
|
|
768
|
-
md.update({"model": self.transformer.transformer})
|
|
769
|
-
md.update({"seed": self.seed})
|
|
770
|
-
if len(self.transformer.loras) > 0:
|
|
771
|
-
md.update({"loras": _loras_to_json(self.transformer.loras)})
|
|
772
|
-
|
|
773
|
-
params = obj.__dict__.copy()
|
|
774
|
-
del params["type"]
|
|
775
|
-
|
|
776
|
-
return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
|
|
777
|
-
|
|
778
|
-
|
|
779
732
|
@invocation(
|
|
780
733
|
"metadata_to_vae",
|
|
781
734
|
title="Metadata To VAE",
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
import math
|
|
3
2
|
from contextlib import ExitStack
|
|
4
3
|
from typing import Callable, Iterator, Optional, Tuple
|
|
@@ -6,7 +5,6 @@ from typing import Callable, Iterator, Optional, Tuple
|
|
|
6
5
|
import einops
|
|
7
6
|
import torch
|
|
8
7
|
import torchvision.transforms as tv_transforms
|
|
9
|
-
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
|
10
8
|
from PIL import Image
|
|
11
9
|
from torchvision.transforms.functional import resize as tv_resize
|
|
12
10
|
from tqdm import tqdm
|
|
@@ -26,7 +24,6 @@ from invokeai.app.invocations.primitives import LatentsOutput
|
|
|
26
24
|
from invokeai.app.invocations.z_image_control import ZImageControlField
|
|
27
25
|
from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
|
|
28
26
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
29
|
-
from invokeai.backend.flux.schedulers import ZIMAGE_SCHEDULER_LABELS, ZIMAGE_SCHEDULER_MAP, ZIMAGE_SCHEDULER_NAME_VALUES
|
|
30
27
|
from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat
|
|
31
28
|
from invokeai.backend.patches.layer_patcher import LayerPatcher
|
|
32
29
|
from invokeai.backend.patches.lora_conversions.z_image_lora_constants import Z_IMAGE_LORA_TRANSFORMER_PREFIX
|
|
@@ -50,7 +47,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
|
|
|
50
47
|
title="Denoise - Z-Image",
|
|
51
48
|
tags=["image", "z-image"],
|
|
52
49
|
category="image",
|
|
53
|
-
version="1.
|
|
50
|
+
version="1.2.0",
|
|
54
51
|
classification=Classification.Prototype,
|
|
55
52
|
)
|
|
56
53
|
class ZImageDenoiseInvocation(BaseInvocation):
|
|
@@ -69,7 +66,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
|
|
|
69
66
|
)
|
|
70
67
|
denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
|
|
71
68
|
denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
|
|
72
|
-
add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
|
|
73
69
|
transformer: TransformerField = InputField(
|
|
74
70
|
description=FieldDescriptions.z_image_model, input=Input.Connection, title="Transformer"
|
|
75
71
|
)
|
|
@@ -104,13 +100,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
|
|
|
104
100
|
description=FieldDescriptions.vae + " Required for control conditioning.",
|
|
105
101
|
input=Input.Connection,
|
|
106
102
|
)
|
|
107
|
-
# Scheduler selection for the denoising process
|
|
108
|
-
scheduler: ZIMAGE_SCHEDULER_NAME_VALUES = InputField(
|
|
109
|
-
default="euler",
|
|
110
|
-
description="Scheduler (sampler) for the denoising process. Euler is the default and recommended for "
|
|
111
|
-
"Z-Image-Turbo. Heun is 2nd-order (better quality, 2x slower). LCM is optimized for few steps.",
|
|
112
|
-
ui_choice_labels=ZIMAGE_SCHEDULER_LABELS,
|
|
113
|
-
)
|
|
114
103
|
|
|
115
104
|
@torch.no_grad()
|
|
116
105
|
def invoke(self, context: InvocationContext) -> LatentsOutput:
|
|
@@ -348,12 +337,8 @@ class ZImageDenoiseInvocation(BaseInvocation):
|
|
|
348
337
|
|
|
349
338
|
# Prepare input latent image
|
|
350
339
|
if init_latents is not None:
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
s_0 = sigmas[0]
|
|
354
|
-
latents = s_0 * noise + (1.0 - s_0) * init_latents
|
|
355
|
-
else:
|
|
356
|
-
latents = init_latents
|
|
340
|
+
s_0 = sigmas[0]
|
|
341
|
+
latents = s_0 * noise + (1.0 - s_0) * init_latents
|
|
357
342
|
else:
|
|
358
343
|
if self.denoising_start > 1e-5:
|
|
359
344
|
raise ValueError("denoising_start should be 0 when initial latents are not provided.")
|
|
@@ -376,32 +361,15 @@ class ZImageDenoiseInvocation(BaseInvocation):
|
|
|
376
361
|
)
|
|
377
362
|
|
|
378
363
|
step_callback = self._build_step_callback(context)
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
shift=1.0,
|
|
389
|
-
)
|
|
390
|
-
# Set timesteps - LCM should use num_inference_steps (it has its own sigma schedule),
|
|
391
|
-
# while other schedulers can use custom sigmas if supported
|
|
392
|
-
is_lcm = self.scheduler == "lcm"
|
|
393
|
-
set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
|
|
394
|
-
if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
|
|
395
|
-
# Convert sigmas list to tensor for scheduler
|
|
396
|
-
scheduler.set_timesteps(sigmas=sigmas, device=device)
|
|
397
|
-
else:
|
|
398
|
-
# LCM or scheduler doesn't support custom sigmas - use num_inference_steps
|
|
399
|
-
scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
|
|
400
|
-
|
|
401
|
-
# For Heun scheduler, the number of actual steps may differ
|
|
402
|
-
num_scheduler_steps = len(scheduler.timesteps)
|
|
403
|
-
else:
|
|
404
|
-
num_scheduler_steps = total_steps
|
|
364
|
+
step_callback(
|
|
365
|
+
PipelineIntermediateState(
|
|
366
|
+
step=0,
|
|
367
|
+
order=1,
|
|
368
|
+
total_steps=total_steps,
|
|
369
|
+
timestep=int(sigmas[0] * 1000),
|
|
370
|
+
latents=latents,
|
|
371
|
+
),
|
|
372
|
+
)
|
|
405
373
|
|
|
406
374
|
with ExitStack() as exit_stack:
|
|
407
375
|
# Get transformer config to determine if it's quantized
|
|
@@ -535,219 +503,91 @@ class ZImageDenoiseInvocation(BaseInvocation):
|
|
|
535
503
|
)
|
|
536
504
|
)
|
|
537
505
|
|
|
538
|
-
# Denoising loop
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
#
|
|
544
|
-
#
|
|
545
|
-
#
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
apply_control = control_extension is not None and control_extension.should_apply(
|
|
568
|
-
user_step, total_steps
|
|
506
|
+
# Denoising loop
|
|
507
|
+
for step_idx in tqdm(range(total_steps)):
|
|
508
|
+
sigma_curr = sigmas[step_idx]
|
|
509
|
+
sigma_prev = sigmas[step_idx + 1]
|
|
510
|
+
|
|
511
|
+
# Timestep tensor for Z-Image model
|
|
512
|
+
# The model expects t=0 at start (noise) and t=1 at end (clean)
|
|
513
|
+
# Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
|
|
514
|
+
model_t = 1.0 - sigma_curr
|
|
515
|
+
timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
|
|
516
|
+
|
|
517
|
+
# Run transformer for positive prediction
|
|
518
|
+
# Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
|
|
519
|
+
# Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
|
|
520
|
+
latent_model_input = latents.to(transformer.dtype)
|
|
521
|
+
latent_model_input = latent_model_input.unsqueeze(2) # Add frame dimension
|
|
522
|
+
latent_model_input_list = list(latent_model_input.unbind(dim=0))
|
|
523
|
+
|
|
524
|
+
# Determine if control should be applied at this step
|
|
525
|
+
apply_control = control_extension is not None and control_extension.should_apply(step_idx, total_steps)
|
|
526
|
+
|
|
527
|
+
# Run forward pass - use custom forward with control if extension is active
|
|
528
|
+
if apply_control:
|
|
529
|
+
model_out_list, _ = z_image_forward_with_control(
|
|
530
|
+
transformer=transformer,
|
|
531
|
+
x=latent_model_input_list,
|
|
532
|
+
t=timestep,
|
|
533
|
+
cap_feats=[pos_prompt_embeds],
|
|
534
|
+
control_extension=control_extension,
|
|
569
535
|
)
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
x=latent_model_input_list,
|
|
576
|
-
t=timestep,
|
|
577
|
-
cap_feats=[pos_prompt_embeds],
|
|
578
|
-
control_extension=control_extension,
|
|
579
|
-
)
|
|
580
|
-
else:
|
|
581
|
-
model_output = transformer(
|
|
582
|
-
x=latent_model_input_list,
|
|
583
|
-
t=timestep,
|
|
584
|
-
cap_feats=[pos_prompt_embeds],
|
|
585
|
-
)
|
|
586
|
-
model_out_list = model_output[0]
|
|
587
|
-
|
|
588
|
-
noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
|
|
589
|
-
noise_pred_cond = noise_pred_cond.squeeze(2)
|
|
590
|
-
noise_pred_cond = -noise_pred_cond # Z-Image uses v-prediction with negation
|
|
591
|
-
|
|
592
|
-
# Apply CFG if enabled
|
|
593
|
-
if do_classifier_free_guidance and neg_prompt_embeds is not None:
|
|
594
|
-
if apply_control:
|
|
595
|
-
model_out_list_uncond, _ = z_image_forward_with_control(
|
|
596
|
-
transformer=transformer,
|
|
597
|
-
x=latent_model_input_list,
|
|
598
|
-
t=timestep,
|
|
599
|
-
cap_feats=[neg_prompt_embeds],
|
|
600
|
-
control_extension=control_extension,
|
|
601
|
-
)
|
|
602
|
-
else:
|
|
603
|
-
model_output_uncond = transformer(
|
|
604
|
-
x=latent_model_input_list,
|
|
605
|
-
t=timestep,
|
|
606
|
-
cap_feats=[neg_prompt_embeds],
|
|
607
|
-
)
|
|
608
|
-
model_out_list_uncond = model_output_uncond[0]
|
|
609
|
-
|
|
610
|
-
noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
|
|
611
|
-
noise_pred_uncond = noise_pred_uncond.squeeze(2)
|
|
612
|
-
noise_pred_uncond = -noise_pred_uncond
|
|
613
|
-
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
|
|
614
|
-
else:
|
|
615
|
-
noise_pred = noise_pred_cond
|
|
616
|
-
|
|
617
|
-
# Use scheduler.step() for the update
|
|
618
|
-
step_output = scheduler.step(model_output=noise_pred, timestep=sched_timestep, sample=latents)
|
|
619
|
-
latents = step_output.prev_sample
|
|
620
|
-
|
|
621
|
-
# Get sigma_prev for inpainting (next sigma value)
|
|
622
|
-
if step_index + 1 < len(scheduler.sigmas):
|
|
623
|
-
sigma_prev = scheduler.sigmas[step_index + 1].item()
|
|
624
|
-
else:
|
|
625
|
-
sigma_prev = 0.0
|
|
626
|
-
|
|
627
|
-
if inpaint_extension is not None:
|
|
628
|
-
latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
|
|
629
|
-
|
|
630
|
-
# For Heun, only increment user step after second-order step completes
|
|
631
|
-
if is_heun:
|
|
632
|
-
if not in_first_order:
|
|
633
|
-
user_step += 1
|
|
634
|
-
# Only call step_callback if we haven't exceeded total_steps
|
|
635
|
-
if user_step <= total_steps:
|
|
636
|
-
pbar.update(1)
|
|
637
|
-
step_callback(
|
|
638
|
-
PipelineIntermediateState(
|
|
639
|
-
step=user_step,
|
|
640
|
-
order=2,
|
|
641
|
-
total_steps=total_steps,
|
|
642
|
-
timestep=int(sigma_curr * 1000),
|
|
643
|
-
latents=latents,
|
|
644
|
-
),
|
|
645
|
-
)
|
|
646
|
-
else:
|
|
647
|
-
# For LCM and other first-order schedulers
|
|
648
|
-
user_step += 1
|
|
649
|
-
# Only call step_callback if we haven't exceeded total_steps
|
|
650
|
-
# (LCM scheduler may have more internal steps than user-facing steps)
|
|
651
|
-
if user_step <= total_steps:
|
|
652
|
-
pbar.update(1)
|
|
653
|
-
step_callback(
|
|
654
|
-
PipelineIntermediateState(
|
|
655
|
-
step=user_step,
|
|
656
|
-
order=1,
|
|
657
|
-
total_steps=total_steps,
|
|
658
|
-
timestep=int(sigma_curr * 1000),
|
|
659
|
-
latents=latents,
|
|
660
|
-
),
|
|
661
|
-
)
|
|
662
|
-
pbar.close()
|
|
663
|
-
else:
|
|
664
|
-
# Original Euler implementation (default, optimized for Z-Image)
|
|
665
|
-
for step_idx in tqdm(range(total_steps)):
|
|
666
|
-
sigma_curr = sigmas[step_idx]
|
|
667
|
-
sigma_prev = sigmas[step_idx + 1]
|
|
668
|
-
|
|
669
|
-
# Timestep tensor for Z-Image model
|
|
670
|
-
# The model expects t=0 at start (noise) and t=1 at end (clean)
|
|
671
|
-
# Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
|
|
672
|
-
model_t = 1.0 - sigma_curr
|
|
673
|
-
timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
|
|
674
|
-
|
|
675
|
-
# Run transformer for positive prediction
|
|
676
|
-
# Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
|
|
677
|
-
# Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
|
|
678
|
-
latent_model_input = latents.to(transformer.dtype)
|
|
679
|
-
latent_model_input = latent_model_input.unsqueeze(2) # Add frame dimension
|
|
680
|
-
latent_model_input_list = list(latent_model_input.unbind(dim=0))
|
|
681
|
-
|
|
682
|
-
# Determine if control should be applied at this step
|
|
683
|
-
apply_control = control_extension is not None and control_extension.should_apply(
|
|
684
|
-
step_idx, total_steps
|
|
536
|
+
else:
|
|
537
|
+
model_output = transformer(
|
|
538
|
+
x=latent_model_input_list,
|
|
539
|
+
t=timestep,
|
|
540
|
+
cap_feats=[pos_prompt_embeds],
|
|
685
541
|
)
|
|
542
|
+
model_out_list = model_output[0] # Extract list of tensors from tuple
|
|
543
|
+
|
|
544
|
+
noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
|
|
545
|
+
noise_pred_cond = noise_pred_cond.squeeze(2) # Remove frame dimension
|
|
546
|
+
noise_pred_cond = -noise_pred_cond # Z-Image uses v-prediction with negation
|
|
686
547
|
|
|
687
|
-
|
|
548
|
+
# Apply CFG if enabled
|
|
549
|
+
if do_classifier_free_guidance and neg_prompt_embeds is not None:
|
|
688
550
|
if apply_control:
|
|
689
|
-
|
|
551
|
+
model_out_list_uncond, _ = z_image_forward_with_control(
|
|
690
552
|
transformer=transformer,
|
|
691
553
|
x=latent_model_input_list,
|
|
692
554
|
t=timestep,
|
|
693
|
-
cap_feats=[
|
|
555
|
+
cap_feats=[neg_prompt_embeds],
|
|
694
556
|
control_extension=control_extension,
|
|
695
557
|
)
|
|
696
558
|
else:
|
|
697
|
-
|
|
559
|
+
model_output_uncond = transformer(
|
|
698
560
|
x=latent_model_input_list,
|
|
699
561
|
t=timestep,
|
|
700
|
-
cap_feats=[
|
|
562
|
+
cap_feats=[neg_prompt_embeds],
|
|
701
563
|
)
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
|
|
730
|
-
else:
|
|
731
|
-
noise_pred = noise_pred_cond
|
|
732
|
-
|
|
733
|
-
# Euler step
|
|
734
|
-
latents_dtype = latents.dtype
|
|
735
|
-
latents = latents.to(dtype=torch.float32)
|
|
736
|
-
latents = latents + (sigma_prev - sigma_curr) * noise_pred
|
|
737
|
-
latents = latents.to(dtype=latents_dtype)
|
|
738
|
-
|
|
739
|
-
if inpaint_extension is not None:
|
|
740
|
-
latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
|
|
741
|
-
|
|
742
|
-
step_callback(
|
|
743
|
-
PipelineIntermediateState(
|
|
744
|
-
step=step_idx + 1,
|
|
745
|
-
order=1,
|
|
746
|
-
total_steps=total_steps,
|
|
747
|
-
timestep=int(sigma_curr * 1000),
|
|
748
|
-
latents=latents,
|
|
749
|
-
),
|
|
750
|
-
)
|
|
564
|
+
model_out_list_uncond = model_output_uncond[0] # Extract list of tensors from tuple
|
|
565
|
+
|
|
566
|
+
noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
|
|
567
|
+
noise_pred_uncond = noise_pred_uncond.squeeze(2)
|
|
568
|
+
noise_pred_uncond = -noise_pred_uncond
|
|
569
|
+
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
|
|
570
|
+
else:
|
|
571
|
+
noise_pred = noise_pred_cond
|
|
572
|
+
|
|
573
|
+
# Euler step
|
|
574
|
+
latents_dtype = latents.dtype
|
|
575
|
+
latents = latents.to(dtype=torch.float32)
|
|
576
|
+
latents = latents + (sigma_prev - sigma_curr) * noise_pred
|
|
577
|
+
latents = latents.to(dtype=latents_dtype)
|
|
578
|
+
|
|
579
|
+
if inpaint_extension is not None:
|
|
580
|
+
latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
|
|
581
|
+
|
|
582
|
+
step_callback(
|
|
583
|
+
PipelineIntermediateState(
|
|
584
|
+
step=step_idx + 1,
|
|
585
|
+
order=1,
|
|
586
|
+
total_steps=total_steps,
|
|
587
|
+
timestep=int(sigma_curr * 1000),
|
|
588
|
+
latents=latents,
|
|
589
|
+
),
|
|
590
|
+
)
|
|
751
591
|
|
|
752
592
|
return latents
|
|
753
593
|
|
|
@@ -85,7 +85,6 @@ class InvokeAIAppConfig(BaseSettings):
|
|
|
85
85
|
max_cache_ram_gb: The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.
|
|
86
86
|
max_cache_vram_gb: The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.
|
|
87
87
|
log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
|
|
88
|
-
model_cache_keep_alive_min: How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.
|
|
89
88
|
device_working_mem_gb: The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.
|
|
90
89
|
enable_partial_loading: Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.
|
|
91
90
|
keep_ram_copy_of_weights: Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.
|
|
@@ -166,10 +165,9 @@ class InvokeAIAppConfig(BaseSettings):
|
|
|
166
165
|
max_cache_ram_gb: Optional[float] = Field(default=None, gt=0, description="The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.")
|
|
167
166
|
max_cache_vram_gb: Optional[float] = Field(default=None, ge=0, description="The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.")
|
|
168
167
|
log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
|
|
169
|
-
model_cache_keep_alive_min: float = Field(default=0, ge=0, description="How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.")
|
|
170
168
|
device_working_mem_gb: float = Field(default=3, description="The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.")
|
|
171
169
|
enable_partial_loading: bool = Field(default=False, description="Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.")
|
|
172
|
-
keep_ram_copy_of_weights: bool = Field(default=True,
|
|
170
|
+
keep_ram_copy_of_weights: bool = Field(default=True, description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
|
|
173
171
|
# Deprecated CACHE configs
|
|
174
172
|
ram: Optional[float] = Field(default=None, gt=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_ram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
|
|
175
173
|
vram: Optional[float] = Field(default=None, ge=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_vram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
|
|
@@ -60,10 +60,6 @@ class ModelManagerService(ModelManagerServiceBase):
|
|
|
60
60
|
service.start(invoker)
|
|
61
61
|
|
|
62
62
|
def stop(self, invoker: Invoker) -> None:
|
|
63
|
-
# Shutdown the model cache to cancel any pending timers
|
|
64
|
-
if hasattr(self._load, "ram_cache"):
|
|
65
|
-
self._load.ram_cache.shutdown()
|
|
66
|
-
|
|
67
63
|
for service in [self._store, self._install, self._load]:
|
|
68
64
|
if hasattr(service, "stop"):
|
|
69
65
|
service.stop(invoker)
|
|
@@ -92,10 +88,7 @@ class ModelManagerService(ModelManagerServiceBase):
|
|
|
92
88
|
max_ram_cache_size_gb=app_config.max_cache_ram_gb,
|
|
93
89
|
max_vram_cache_size_gb=app_config.max_cache_vram_gb,
|
|
94
90
|
execution_device=execution_device or TorchDevice.choose_torch_device(),
|
|
95
|
-
storage_device="cpu",
|
|
96
|
-
log_memory_usage=app_config.log_memory_usage,
|
|
97
91
|
logger=logger,
|
|
98
|
-
keep_alive_minutes=app_config.model_cache_keep_alive_min,
|
|
99
92
|
)
|
|
100
93
|
loader = ModelLoadService(
|
|
101
94
|
app_config=app_config,
|