InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. invokeai/app/invocations/flux_denoise.py +1 -15
  2. invokeai/app/invocations/metadata_linked.py +0 -47
  3. invokeai/app/invocations/z_image_denoise.py +84 -244
  4. invokeai/app/services/config/config_default.py +1 -3
  5. invokeai/app/services/model_manager/model_manager_default.py +0 -7
  6. invokeai/backend/flux/denoise.py +11 -196
  7. invokeai/backend/model_manager/configs/lora.py +0 -36
  8. invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
  9. invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
  10. invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
  11. invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
  12. invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
  13. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
  14. invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
  15. invokeai/backend/model_manager/starter_models.py +4 -13
  16. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
  17. invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
  18. invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
  19. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
  20. invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
  21. invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
  22. invokeai/frontend/web/dist/index.html +1 -1
  23. invokeai/frontend/web/dist/locales/en.json +5 -11
  24. invokeai/version/invokeai_version.py +1 -1
  25. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
  26. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
  27. invokeai/app/invocations/pbr_maps.py +0 -59
  28. invokeai/backend/flux/schedulers.py +0 -62
  29. invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
  30. invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
  31. invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
  32. invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
  33. invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
  34. invokeai/frontend/web/dist/locales/en-GB.json +0 -1
  35. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
  36. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
  37. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  38. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  39. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  40. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -47,7 +47,6 @@ from invokeai.backend.flux.sampling_utils import (
47
47
  pack,
48
48
  unpack,
49
49
  )
50
- from invokeai.backend.flux.schedulers import FLUX_SCHEDULER_LABELS, FLUX_SCHEDULER_MAP, FLUX_SCHEDULER_NAME_VALUES
51
50
  from invokeai.backend.flux.text_conditioning import FluxReduxConditioning, FluxTextConditioning
52
51
  from invokeai.backend.model_manager.taxonomy import BaseModelType, FluxVariantType, ModelFormat, ModelType
53
52
  from invokeai.backend.patches.layer_patcher import LayerPatcher
@@ -64,7 +63,7 @@ from invokeai.backend.util.devices import TorchDevice
64
63
  title="FLUX Denoise",
65
64
  tags=["image", "flux"],
66
65
  category="image",
67
- version="4.2.0",
66
+ version="4.1.0",
68
67
  )
69
68
  class FluxDenoiseInvocation(BaseInvocation):
70
69
  """Run denoising process with a FLUX transformer model."""
@@ -133,12 +132,6 @@ class FluxDenoiseInvocation(BaseInvocation):
133
132
  num_steps: int = InputField(
134
133
  default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
135
134
  )
136
- scheduler: FLUX_SCHEDULER_NAME_VALUES = InputField(
137
- default="euler",
138
- description="Scheduler (sampler) for the denoising process. 'euler' is fast and standard. "
139
- "'heun' is 2nd-order (better quality, 2x slower). 'lcm' is optimized for few steps.",
140
- ui_choice_labels=FLUX_SCHEDULER_LABELS,
141
- )
142
135
  guidance: float = InputField(
143
136
  default=4.0,
144
137
  description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
@@ -249,12 +242,6 @@ class FluxDenoiseInvocation(BaseInvocation):
249
242
  shift=not is_schnell,
250
243
  )
251
244
 
252
- # Create scheduler if not using default euler
253
- scheduler = None
254
- if self.scheduler in FLUX_SCHEDULER_MAP:
255
- scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
256
- scheduler = scheduler_class(num_train_timesteps=1000)
257
-
258
245
  # Clip the timesteps schedule based on denoising_start and denoising_end.
259
246
  timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
260
247
 
@@ -439,7 +426,6 @@ class FluxDenoiseInvocation(BaseInvocation):
439
426
  img_cond=img_cond,
440
427
  img_cond_seq=img_cond_seq,
441
428
  img_cond_seq_ids=img_cond_seq_ids,
442
- scheduler=scheduler,
443
429
  )
444
430
 
445
431
  x = unpack(x.float(), self.height, self.width)
@@ -52,7 +52,6 @@ from invokeai.app.invocations.primitives import (
52
52
  )
53
53
  from invokeai.app.invocations.scheduler import SchedulerOutput
54
54
  from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
55
- from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
56
55
  from invokeai.app.services.shared.invocation_context import InvocationContext
57
56
  from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
58
57
  from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -730,52 +729,6 @@ class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
730
729
  return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
731
730
 
732
731
 
733
- @invocation(
734
- "z_image_denoise_meta",
735
- title=f"{ZImageDenoiseInvocation.UIConfig.title} + Metadata",
736
- tags=["z-image", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
737
- category="latents",
738
- version="1.0.0",
739
- )
740
- class ZImageDenoiseMetaInvocation(ZImageDenoiseInvocation, WithMetadata):
741
- """Run denoising process with a Z-Image transformer model + metadata."""
742
-
743
- def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
744
- def _loras_to_json(obj: Union[Any, list[Any]]):
745
- if not isinstance(obj, list):
746
- obj = [obj]
747
-
748
- output: list[dict[str, Any]] = []
749
- for item in obj:
750
- output.append(
751
- LoRAMetadataField(
752
- model=item.lora,
753
- weight=item.weight,
754
- ).model_dump(exclude_none=True, exclude={"id", "type", "is_intermediate", "use_cache"})
755
- )
756
- return output
757
-
758
- obj = super().invoke(context)
759
-
760
- md: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
761
- md.update({"width": obj.width})
762
- md.update({"height": obj.height})
763
- md.update({"steps": self.steps})
764
- md.update({"guidance": self.guidance_scale})
765
- md.update({"denoising_start": self.denoising_start})
766
- md.update({"denoising_end": self.denoising_end})
767
- md.update({"scheduler": self.scheduler})
768
- md.update({"model": self.transformer.transformer})
769
- md.update({"seed": self.seed})
770
- if len(self.transformer.loras) > 0:
771
- md.update({"loras": _loras_to_json(self.transformer.loras)})
772
-
773
- params = obj.__dict__.copy()
774
- del params["type"]
775
-
776
- return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
777
-
778
-
779
732
  @invocation(
780
733
  "metadata_to_vae",
781
734
  title="Metadata To VAE",
@@ -1,4 +1,3 @@
1
- import inspect
2
1
  import math
3
2
  from contextlib import ExitStack
4
3
  from typing import Callable, Iterator, Optional, Tuple
@@ -6,7 +5,6 @@ from typing import Callable, Iterator, Optional, Tuple
6
5
  import einops
7
6
  import torch
8
7
  import torchvision.transforms as tv_transforms
9
- from diffusers.schedulers.scheduling_utils import SchedulerMixin
10
8
  from PIL import Image
11
9
  from torchvision.transforms.functional import resize as tv_resize
12
10
  from tqdm import tqdm
@@ -26,7 +24,6 @@ from invokeai.app.invocations.primitives import LatentsOutput
26
24
  from invokeai.app.invocations.z_image_control import ZImageControlField
27
25
  from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
28
26
  from invokeai.app.services.shared.invocation_context import InvocationContext
29
- from invokeai.backend.flux.schedulers import ZIMAGE_SCHEDULER_LABELS, ZIMAGE_SCHEDULER_MAP, ZIMAGE_SCHEDULER_NAME_VALUES
30
27
  from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat
31
28
  from invokeai.backend.patches.layer_patcher import LayerPatcher
32
29
  from invokeai.backend.patches.lora_conversions.z_image_lora_constants import Z_IMAGE_LORA_TRANSFORMER_PREFIX
@@ -50,7 +47,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
50
47
  title="Denoise - Z-Image",
51
48
  tags=["image", "z-image"],
52
49
  category="image",
53
- version="1.4.0",
50
+ version="1.2.0",
54
51
  classification=Classification.Prototype,
55
52
  )
56
53
  class ZImageDenoiseInvocation(BaseInvocation):
@@ -69,7 +66,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
69
66
  )
70
67
  denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
71
68
  denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
72
- add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
73
69
  transformer: TransformerField = InputField(
74
70
  description=FieldDescriptions.z_image_model, input=Input.Connection, title="Transformer"
75
71
  )
@@ -104,13 +100,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
104
100
  description=FieldDescriptions.vae + " Required for control conditioning.",
105
101
  input=Input.Connection,
106
102
  )
107
- # Scheduler selection for the denoising process
108
- scheduler: ZIMAGE_SCHEDULER_NAME_VALUES = InputField(
109
- default="euler",
110
- description="Scheduler (sampler) for the denoising process. Euler is the default and recommended for "
111
- "Z-Image-Turbo. Heun is 2nd-order (better quality, 2x slower). LCM is optimized for few steps.",
112
- ui_choice_labels=ZIMAGE_SCHEDULER_LABELS,
113
- )
114
103
 
115
104
  @torch.no_grad()
116
105
  def invoke(self, context: InvocationContext) -> LatentsOutput:
@@ -348,12 +337,8 @@ class ZImageDenoiseInvocation(BaseInvocation):
348
337
 
349
338
  # Prepare input latent image
350
339
  if init_latents is not None:
351
- if self.add_noise:
352
- # Noise the init_latents by the appropriate amount for the first timestep.
353
- s_0 = sigmas[0]
354
- latents = s_0 * noise + (1.0 - s_0) * init_latents
355
- else:
356
- latents = init_latents
340
+ s_0 = sigmas[0]
341
+ latents = s_0 * noise + (1.0 - s_0) * init_latents
357
342
  else:
358
343
  if self.denoising_start > 1e-5:
359
344
  raise ValueError("denoising_start should be 0 when initial latents are not provided.")
@@ -376,32 +361,15 @@ class ZImageDenoiseInvocation(BaseInvocation):
376
361
  )
377
362
 
378
363
  step_callback = self._build_step_callback(context)
379
-
380
- # Initialize the diffusers scheduler if not using built-in Euler
381
- scheduler: SchedulerMixin | None = None
382
- use_scheduler = self.scheduler != "euler"
383
-
384
- if use_scheduler:
385
- scheduler_class = ZIMAGE_SCHEDULER_MAP[self.scheduler]
386
- scheduler = scheduler_class(
387
- num_train_timesteps=1000,
388
- shift=1.0,
389
- )
390
- # Set timesteps - LCM should use num_inference_steps (it has its own sigma schedule),
391
- # while other schedulers can use custom sigmas if supported
392
- is_lcm = self.scheduler == "lcm"
393
- set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
394
- if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
395
- # Convert sigmas list to tensor for scheduler
396
- scheduler.set_timesteps(sigmas=sigmas, device=device)
397
- else:
398
- # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
399
- scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
400
-
401
- # For Heun scheduler, the number of actual steps may differ
402
- num_scheduler_steps = len(scheduler.timesteps)
403
- else:
404
- num_scheduler_steps = total_steps
364
+ step_callback(
365
+ PipelineIntermediateState(
366
+ step=0,
367
+ order=1,
368
+ total_steps=total_steps,
369
+ timestep=int(sigmas[0] * 1000),
370
+ latents=latents,
371
+ ),
372
+ )
405
373
 
406
374
  with ExitStack() as exit_stack:
407
375
  # Get transformer config to determine if it's quantized
@@ -535,219 +503,91 @@ class ZImageDenoiseInvocation(BaseInvocation):
535
503
  )
536
504
  )
537
505
 
538
- # Denoising loop - supports both built-in Euler and diffusers schedulers
539
- # Track user-facing step for progress (accounts for Heun's double steps)
540
- user_step = 0
541
-
542
- if use_scheduler and scheduler is not None:
543
- # Use diffusers scheduler for stepping
544
- # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
545
- # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
546
- pbar = tqdm(total=total_steps, desc="Denoising")
547
- for step_index in range(num_scheduler_steps):
548
- sched_timestep = scheduler.timesteps[step_index]
549
- # Convert scheduler timestep (0-1000) to normalized sigma (0-1)
550
- sigma_curr = sched_timestep.item() / scheduler.config.num_train_timesteps
551
-
552
- # For Heun scheduler, track if we're in first or second order step
553
- is_heun = hasattr(scheduler, "state_in_first_order")
554
- in_first_order = scheduler.state_in_first_order if is_heun else True
555
-
556
- # Timestep tensor for Z-Image model
557
- # The model expects t=0 at start (noise) and t=1 at end (clean)
558
- model_t = 1.0 - sigma_curr
559
- timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
560
-
561
- # Run transformer for positive prediction
562
- latent_model_input = latents.to(transformer.dtype)
563
- latent_model_input = latent_model_input.unsqueeze(2) # Add frame dimension
564
- latent_model_input_list = list(latent_model_input.unbind(dim=0))
565
-
566
- # Determine if control should be applied at this step
567
- apply_control = control_extension is not None and control_extension.should_apply(
568
- user_step, total_steps
506
+ # Denoising loop
507
+ for step_idx in tqdm(range(total_steps)):
508
+ sigma_curr = sigmas[step_idx]
509
+ sigma_prev = sigmas[step_idx + 1]
510
+
511
+ # Timestep tensor for Z-Image model
512
+ # The model expects t=0 at start (noise) and t=1 at end (clean)
513
+ # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
514
+ model_t = 1.0 - sigma_curr
515
+ timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
516
+
517
+ # Run transformer for positive prediction
518
+ # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
519
+ # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
520
+ latent_model_input = latents.to(transformer.dtype)
521
+ latent_model_input = latent_model_input.unsqueeze(2) # Add frame dimension
522
+ latent_model_input_list = list(latent_model_input.unbind(dim=0))
523
+
524
+ # Determine if control should be applied at this step
525
+ apply_control = control_extension is not None and control_extension.should_apply(step_idx, total_steps)
526
+
527
+ # Run forward pass - use custom forward with control if extension is active
528
+ if apply_control:
529
+ model_out_list, _ = z_image_forward_with_control(
530
+ transformer=transformer,
531
+ x=latent_model_input_list,
532
+ t=timestep,
533
+ cap_feats=[pos_prompt_embeds],
534
+ control_extension=control_extension,
569
535
  )
570
-
571
- # Run forward pass
572
- if apply_control:
573
- model_out_list, _ = z_image_forward_with_control(
574
- transformer=transformer,
575
- x=latent_model_input_list,
576
- t=timestep,
577
- cap_feats=[pos_prompt_embeds],
578
- control_extension=control_extension,
579
- )
580
- else:
581
- model_output = transformer(
582
- x=latent_model_input_list,
583
- t=timestep,
584
- cap_feats=[pos_prompt_embeds],
585
- )
586
- model_out_list = model_output[0]
587
-
588
- noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
589
- noise_pred_cond = noise_pred_cond.squeeze(2)
590
- noise_pred_cond = -noise_pred_cond # Z-Image uses v-prediction with negation
591
-
592
- # Apply CFG if enabled
593
- if do_classifier_free_guidance and neg_prompt_embeds is not None:
594
- if apply_control:
595
- model_out_list_uncond, _ = z_image_forward_with_control(
596
- transformer=transformer,
597
- x=latent_model_input_list,
598
- t=timestep,
599
- cap_feats=[neg_prompt_embeds],
600
- control_extension=control_extension,
601
- )
602
- else:
603
- model_output_uncond = transformer(
604
- x=latent_model_input_list,
605
- t=timestep,
606
- cap_feats=[neg_prompt_embeds],
607
- )
608
- model_out_list_uncond = model_output_uncond[0]
609
-
610
- noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
611
- noise_pred_uncond = noise_pred_uncond.squeeze(2)
612
- noise_pred_uncond = -noise_pred_uncond
613
- noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
614
- else:
615
- noise_pred = noise_pred_cond
616
-
617
- # Use scheduler.step() for the update
618
- step_output = scheduler.step(model_output=noise_pred, timestep=sched_timestep, sample=latents)
619
- latents = step_output.prev_sample
620
-
621
- # Get sigma_prev for inpainting (next sigma value)
622
- if step_index + 1 < len(scheduler.sigmas):
623
- sigma_prev = scheduler.sigmas[step_index + 1].item()
624
- else:
625
- sigma_prev = 0.0
626
-
627
- if inpaint_extension is not None:
628
- latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
629
-
630
- # For Heun, only increment user step after second-order step completes
631
- if is_heun:
632
- if not in_first_order:
633
- user_step += 1
634
- # Only call step_callback if we haven't exceeded total_steps
635
- if user_step <= total_steps:
636
- pbar.update(1)
637
- step_callback(
638
- PipelineIntermediateState(
639
- step=user_step,
640
- order=2,
641
- total_steps=total_steps,
642
- timestep=int(sigma_curr * 1000),
643
- latents=latents,
644
- ),
645
- )
646
- else:
647
- # For LCM and other first-order schedulers
648
- user_step += 1
649
- # Only call step_callback if we haven't exceeded total_steps
650
- # (LCM scheduler may have more internal steps than user-facing steps)
651
- if user_step <= total_steps:
652
- pbar.update(1)
653
- step_callback(
654
- PipelineIntermediateState(
655
- step=user_step,
656
- order=1,
657
- total_steps=total_steps,
658
- timestep=int(sigma_curr * 1000),
659
- latents=latents,
660
- ),
661
- )
662
- pbar.close()
663
- else:
664
- # Original Euler implementation (default, optimized for Z-Image)
665
- for step_idx in tqdm(range(total_steps)):
666
- sigma_curr = sigmas[step_idx]
667
- sigma_prev = sigmas[step_idx + 1]
668
-
669
- # Timestep tensor for Z-Image model
670
- # The model expects t=0 at start (noise) and t=1 at end (clean)
671
- # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
672
- model_t = 1.0 - sigma_curr
673
- timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
674
-
675
- # Run transformer for positive prediction
676
- # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
677
- # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
678
- latent_model_input = latents.to(transformer.dtype)
679
- latent_model_input = latent_model_input.unsqueeze(2) # Add frame dimension
680
- latent_model_input_list = list(latent_model_input.unbind(dim=0))
681
-
682
- # Determine if control should be applied at this step
683
- apply_control = control_extension is not None and control_extension.should_apply(
684
- step_idx, total_steps
536
+ else:
537
+ model_output = transformer(
538
+ x=latent_model_input_list,
539
+ t=timestep,
540
+ cap_feats=[pos_prompt_embeds],
685
541
  )
542
+ model_out_list = model_output[0] # Extract list of tensors from tuple
543
+
544
+ noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
545
+ noise_pred_cond = noise_pred_cond.squeeze(2) # Remove frame dimension
546
+ noise_pred_cond = -noise_pred_cond # Z-Image uses v-prediction with negation
686
547
 
687
- # Run forward pass - use custom forward with control if extension is active
548
+ # Apply CFG if enabled
549
+ if do_classifier_free_guidance and neg_prompt_embeds is not None:
688
550
  if apply_control:
689
- model_out_list, _ = z_image_forward_with_control(
551
+ model_out_list_uncond, _ = z_image_forward_with_control(
690
552
  transformer=transformer,
691
553
  x=latent_model_input_list,
692
554
  t=timestep,
693
- cap_feats=[pos_prompt_embeds],
555
+ cap_feats=[neg_prompt_embeds],
694
556
  control_extension=control_extension,
695
557
  )
696
558
  else:
697
- model_output = transformer(
559
+ model_output_uncond = transformer(
698
560
  x=latent_model_input_list,
699
561
  t=timestep,
700
- cap_feats=[pos_prompt_embeds],
562
+ cap_feats=[neg_prompt_embeds],
701
563
  )
702
- model_out_list = model_output[0] # Extract list of tensors from tuple
703
-
704
- noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
705
- noise_pred_cond = noise_pred_cond.squeeze(2) # Remove frame dimension
706
- noise_pred_cond = -noise_pred_cond # Z-Image uses v-prediction with negation
707
-
708
- # Apply CFG if enabled
709
- if do_classifier_free_guidance and neg_prompt_embeds is not None:
710
- if apply_control:
711
- model_out_list_uncond, _ = z_image_forward_with_control(
712
- transformer=transformer,
713
- x=latent_model_input_list,
714
- t=timestep,
715
- cap_feats=[neg_prompt_embeds],
716
- control_extension=control_extension,
717
- )
718
- else:
719
- model_output_uncond = transformer(
720
- x=latent_model_input_list,
721
- t=timestep,
722
- cap_feats=[neg_prompt_embeds],
723
- )
724
- model_out_list_uncond = model_output_uncond[0] # Extract list of tensors from tuple
725
-
726
- noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
727
- noise_pred_uncond = noise_pred_uncond.squeeze(2)
728
- noise_pred_uncond = -noise_pred_uncond
729
- noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
730
- else:
731
- noise_pred = noise_pred_cond
732
-
733
- # Euler step
734
- latents_dtype = latents.dtype
735
- latents = latents.to(dtype=torch.float32)
736
- latents = latents + (sigma_prev - sigma_curr) * noise_pred
737
- latents = latents.to(dtype=latents_dtype)
738
-
739
- if inpaint_extension is not None:
740
- latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
741
-
742
- step_callback(
743
- PipelineIntermediateState(
744
- step=step_idx + 1,
745
- order=1,
746
- total_steps=total_steps,
747
- timestep=int(sigma_curr * 1000),
748
- latents=latents,
749
- ),
750
- )
564
+ model_out_list_uncond = model_output_uncond[0] # Extract list of tensors from tuple
565
+
566
+ noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
567
+ noise_pred_uncond = noise_pred_uncond.squeeze(2)
568
+ noise_pred_uncond = -noise_pred_uncond
569
+ noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
570
+ else:
571
+ noise_pred = noise_pred_cond
572
+
573
+ # Euler step
574
+ latents_dtype = latents.dtype
575
+ latents = latents.to(dtype=torch.float32)
576
+ latents = latents + (sigma_prev - sigma_curr) * noise_pred
577
+ latents = latents.to(dtype=latents_dtype)
578
+
579
+ if inpaint_extension is not None:
580
+ latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
581
+
582
+ step_callback(
583
+ PipelineIntermediateState(
584
+ step=step_idx + 1,
585
+ order=1,
586
+ total_steps=total_steps,
587
+ timestep=int(sigma_curr * 1000),
588
+ latents=latents,
589
+ ),
590
+ )
751
591
 
752
592
  return latents
753
593
 
@@ -85,7 +85,6 @@ class InvokeAIAppConfig(BaseSettings):
85
85
  max_cache_ram_gb: The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.
86
86
  max_cache_vram_gb: The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.
87
87
  log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
88
- model_cache_keep_alive_min: How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.
89
88
  device_working_mem_gb: The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.
90
89
  enable_partial_loading: Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.
91
90
  keep_ram_copy_of_weights: Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.
@@ -166,10 +165,9 @@ class InvokeAIAppConfig(BaseSettings):
166
165
  max_cache_ram_gb: Optional[float] = Field(default=None, gt=0, description="The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.")
167
166
  max_cache_vram_gb: Optional[float] = Field(default=None, ge=0, description="The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.")
168
167
  log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
169
- model_cache_keep_alive_min: float = Field(default=0, ge=0, description="How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.")
170
168
  device_working_mem_gb: float = Field(default=3, description="The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.")
171
169
  enable_partial_loading: bool = Field(default=False, description="Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.")
172
- keep_ram_copy_of_weights: bool = Field(default=True, description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
170
+ keep_ram_copy_of_weights: bool = Field(default=True, description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
173
171
  # Deprecated CACHE configs
174
172
  ram: Optional[float] = Field(default=None, gt=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_ram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
175
173
  vram: Optional[float] = Field(default=None, ge=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_vram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
@@ -60,10 +60,6 @@ class ModelManagerService(ModelManagerServiceBase):
60
60
  service.start(invoker)
61
61
 
62
62
  def stop(self, invoker: Invoker) -> None:
63
- # Shutdown the model cache to cancel any pending timers
64
- if hasattr(self._load, "ram_cache"):
65
- self._load.ram_cache.shutdown()
66
-
67
63
  for service in [self._store, self._install, self._load]:
68
64
  if hasattr(service, "stop"):
69
65
  service.stop(invoker)
@@ -92,10 +88,7 @@ class ModelManagerService(ModelManagerServiceBase):
92
88
  max_ram_cache_size_gb=app_config.max_cache_ram_gb,
93
89
  max_vram_cache_size_gb=app_config.max_cache_vram_gb,
94
90
  execution_device=execution_device or TorchDevice.choose_torch_device(),
95
- storage_device="cpu",
96
- log_memory_usage=app_config.log_memory_usage,
97
91
  logger=logger,
98
- keep_alive_minutes=app_config.model_cache_keep_alive_min,
99
92
  )
100
93
  loader = ModelLoadService(
101
94
  app_config=app_config,