InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. invokeai/app/api/routers/model_manager.py +43 -1
  2. invokeai/app/invocations/fields.py +1 -1
  3. invokeai/app/invocations/flux2_denoise.py +499 -0
  4. invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
  5. invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
  6. invokeai/app/invocations/flux2_vae_decode.py +106 -0
  7. invokeai/app/invocations/flux2_vae_encode.py +88 -0
  8. invokeai/app/invocations/flux_denoise.py +50 -3
  9. invokeai/app/invocations/flux_lora_loader.py +1 -1
  10. invokeai/app/invocations/ideal_size.py +6 -1
  11. invokeai/app/invocations/metadata.py +4 -0
  12. invokeai/app/invocations/metadata_linked.py +47 -0
  13. invokeai/app/invocations/model.py +1 -0
  14. invokeai/app/invocations/z_image_denoise.py +8 -3
  15. invokeai/app/invocations/z_image_image_to_latents.py +9 -1
  16. invokeai/app/invocations/z_image_latents_to_image.py +9 -1
  17. invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
  18. invokeai/app/services/config/config_default.py +3 -1
  19. invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
  20. invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
  21. invokeai/app/services/model_manager/model_manager_default.py +7 -0
  22. invokeai/app/services/model_records/model_records_base.py +4 -2
  23. invokeai/app/services/shared/invocation_context.py +15 -0
  24. invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
  25. invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
  26. invokeai/app/util/step_callback.py +42 -0
  27. invokeai/backend/flux/denoise.py +239 -204
  28. invokeai/backend/flux/dype/__init__.py +18 -0
  29. invokeai/backend/flux/dype/base.py +226 -0
  30. invokeai/backend/flux/dype/embed.py +116 -0
  31. invokeai/backend/flux/dype/presets.py +141 -0
  32. invokeai/backend/flux/dype/rope.py +110 -0
  33. invokeai/backend/flux/extensions/dype_extension.py +91 -0
  34. invokeai/backend/flux/util.py +35 -1
  35. invokeai/backend/flux2/__init__.py +4 -0
  36. invokeai/backend/flux2/denoise.py +261 -0
  37. invokeai/backend/flux2/ref_image_extension.py +294 -0
  38. invokeai/backend/flux2/sampling_utils.py +209 -0
  39. invokeai/backend/model_manager/configs/factory.py +19 -1
  40. invokeai/backend/model_manager/configs/main.py +395 -3
  41. invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
  42. invokeai/backend/model_manager/configs/vae.py +104 -2
  43. invokeai/backend/model_manager/load/load_default.py +0 -1
  44. invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
  45. invokeai/backend/model_manager/load/model_loaders/flux.py +1007 -2
  46. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +0 -1
  47. invokeai/backend/model_manager/load/model_loaders/z_image.py +121 -28
  48. invokeai/backend/model_manager/starter_models.py +128 -0
  49. invokeai/backend/model_manager/taxonomy.py +31 -4
  50. invokeai/backend/model_manager/util/select_hf_files.py +3 -2
  51. invokeai/backend/util/vae_working_memory.py +0 -2
  52. invokeai/frontend/web/dist/assets/App-ClpIJstk.js +161 -0
  53. invokeai/frontend/web/dist/assets/{browser-ponyfill-BP0RxJ4G.js → browser-ponyfill-Cw07u5G1.js} +1 -1
  54. invokeai/frontend/web/dist/assets/{index-B44qKjrs.js → index-DSKM8iGj.js} +69 -69
  55. invokeai/frontend/web/dist/index.html +1 -1
  56. invokeai/frontend/web/dist/locales/en.json +58 -5
  57. invokeai/frontend/web/dist/locales/it.json +2 -1
  58. invokeai/version/invokeai_version.py +1 -1
  59. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/METADATA +7 -1
  60. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/RECORD +66 -49
  61. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/WHEEL +1 -1
  62. invokeai/frontend/web/dist/assets/App-DllqPQ3j.js +0 -161
  63. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/entry_points.txt +0 -0
  64. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE +0 -0
  65. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  66. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  67. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,8 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
32
32
  from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
33
33
  from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
34
34
  from invokeai.backend.flux.denoise import denoise
35
+ from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_from_preset
36
+ from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
35
37
  from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
36
38
  from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
37
39
  from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
@@ -64,7 +66,7 @@ from invokeai.backend.util.devices import TorchDevice
64
66
  title="FLUX Denoise",
65
67
  tags=["image", "flux"],
66
68
  category="image",
67
- version="4.2.0",
69
+ version="4.3.0",
68
70
  )
69
71
  class FluxDenoiseInvocation(BaseInvocation):
70
72
  """Run denoising process with a FLUX transformer model."""
@@ -166,6 +168,24 @@ class FluxDenoiseInvocation(BaseInvocation):
166
168
  input=Input.Connection,
167
169
  )
168
170
 
171
+ # DyPE (Dynamic Position Extrapolation) for high-resolution generation
172
+ dype_preset: DyPEPreset = InputField(
173
+ default=DyPEPreset.OFF,
174
+ description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
175
+ )
176
+ dype_scale: Optional[float] = InputField(
177
+ default=None,
178
+ ge=0.0,
179
+ le=8.0,
180
+ description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
181
+ )
182
+ dype_exponent: Optional[float] = InputField(
183
+ default=None,
184
+ ge=0.0,
185
+ le=1000.0,
186
+ description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
187
+ )
188
+
169
189
  @torch.no_grad()
170
190
  def invoke(self, context: InvocationContext) -> LatentsOutput:
171
191
  latents = self._run_diffusion(context)
@@ -239,8 +259,14 @@ class FluxDenoiseInvocation(BaseInvocation):
239
259
  )
240
260
 
241
261
  transformer_config = context.models.get_config(self.transformer.transformer)
242
- assert transformer_config.base is BaseModelType.Flux and transformer_config.type is ModelType.Main
243
- is_schnell = transformer_config.variant is FluxVariantType.Schnell
262
+ assert (
263
+ transformer_config.base in (BaseModelType.Flux, BaseModelType.Flux2)
264
+ and transformer_config.type is ModelType.Main
265
+ )
266
+ # Schnell is only for FLUX.1, FLUX.2 Klein behaves like Dev (with guidance)
267
+ is_schnell = (
268
+ transformer_config.base is BaseModelType.Flux and transformer_config.variant is FluxVariantType.Schnell
269
+ )
244
270
 
245
271
  # Calculate the timestep schedule.
246
272
  timesteps = get_schedule(
@@ -422,6 +448,26 @@ class FluxDenoiseInvocation(BaseInvocation):
422
448
  kontext_extension.ensure_batch_size(x.shape[0])
423
449
  img_cond_seq, img_cond_seq_ids = kontext_extension.kontext_latents, kontext_extension.kontext_ids
424
450
 
451
+ # Prepare DyPE extension for high-resolution generation
452
+ dype_extension: DyPEExtension | None = None
453
+ dype_config = get_dype_config_from_preset(
454
+ preset=self.dype_preset,
455
+ width=self.width,
456
+ height=self.height,
457
+ custom_scale=self.dype_scale,
458
+ custom_exponent=self.dype_exponent,
459
+ )
460
+ if dype_config is not None:
461
+ dype_extension = DyPEExtension(
462
+ config=dype_config,
463
+ target_height=self.height,
464
+ target_width=self.width,
465
+ )
466
+ context.logger.info(
467
+ f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset.value}, "
468
+ f"scale={dype_config.dype_scale:.2f}, method={dype_config.method}"
469
+ )
470
+
425
471
  x = denoise(
426
472
  model=transformer,
427
473
  img=x,
@@ -439,6 +485,7 @@ class FluxDenoiseInvocation(BaseInvocation):
439
485
  img_cond=img_cond,
440
486
  img_cond_seq=img_cond_seq,
441
487
  img_cond_seq_ids=img_cond_seq_ids,
488
+ dype_extension=dype_extension,
442
489
  scheduler=scheduler,
443
490
  )
444
491
 
@@ -162,7 +162,7 @@ class FLUXLoRACollectionLoader(BaseInvocation):
162
162
  if not context.models.exists(lora.lora.key):
163
163
  raise Exception(f"Unknown lora: {lora.lora.key}!")
164
164
 
165
- assert lora.lora.base is BaseModelType.Flux
165
+ assert lora.lora.base in (BaseModelType.Flux, BaseModelType.Flux2)
166
166
 
167
167
  added_loras.append(lora.lora.key)
168
168
 
@@ -46,7 +46,12 @@ class IdealSizeInvocation(BaseInvocation):
46
46
  dimension = 512
47
47
  elif unet_config.base == BaseModelType.StableDiffusion2:
48
48
  dimension = 768
49
- elif unet_config.base in (BaseModelType.StableDiffusionXL, BaseModelType.Flux, BaseModelType.StableDiffusion3):
49
+ elif unet_config.base in (
50
+ BaseModelType.StableDiffusionXL,
51
+ BaseModelType.Flux,
52
+ BaseModelType.Flux2,
53
+ BaseModelType.StableDiffusion3,
54
+ ):
50
55
  dimension = 1024
51
56
  else:
52
57
  raise ValueError(f"Unsupported model type: {unet_config.base}")
@@ -150,6 +150,10 @@ GENERATION_MODES = Literal[
150
150
  "flux_img2img",
151
151
  "flux_inpaint",
152
152
  "flux_outpaint",
153
+ "flux2_txt2img",
154
+ "flux2_img2img",
155
+ "flux2_inpaint",
156
+ "flux2_outpaint",
153
157
  "sd3_txt2img",
154
158
  "sd3_img2img",
155
159
  "sd3_inpaint",
@@ -52,6 +52,7 @@ from invokeai.app.invocations.primitives import (
52
52
  )
53
53
  from invokeai.app.invocations.scheduler import SchedulerOutput
54
54
  from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
55
+ from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
55
56
  from invokeai.app.services.shared.invocation_context import InvocationContext
56
57
  from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
57
58
  from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -729,6 +730,52 @@ class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
729
730
  return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
730
731
 
731
732
 
733
+ @invocation(
734
+ "z_image_denoise_meta",
735
+ title=f"{ZImageDenoiseInvocation.UIConfig.title} + Metadata",
736
+ tags=["z-image", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
737
+ category="latents",
738
+ version="1.0.0",
739
+ )
740
+ class ZImageDenoiseMetaInvocation(ZImageDenoiseInvocation, WithMetadata):
741
+ """Run denoising process with a Z-Image transformer model + metadata."""
742
+
743
+ def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
744
+ def _loras_to_json(obj: Union[Any, list[Any]]):
745
+ if not isinstance(obj, list):
746
+ obj = [obj]
747
+
748
+ output: list[dict[str, Any]] = []
749
+ for item in obj:
750
+ output.append(
751
+ LoRAMetadataField(
752
+ model=item.lora,
753
+ weight=item.weight,
754
+ ).model_dump(exclude_none=True, exclude={"id", "type", "is_intermediate", "use_cache"})
755
+ )
756
+ return output
757
+
758
+ obj = super().invoke(context)
759
+
760
+ md: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
761
+ md.update({"width": obj.width})
762
+ md.update({"height": obj.height})
763
+ md.update({"steps": self.steps})
764
+ md.update({"guidance": self.guidance_scale})
765
+ md.update({"denoising_start": self.denoising_start})
766
+ md.update({"denoising_end": self.denoising_end})
767
+ md.update({"scheduler": self.scheduler})
768
+ md.update({"model": self.transformer.transformer})
769
+ md.update({"seed": self.seed})
770
+ if len(self.transformer.loras) > 0:
771
+ md.update({"loras": _loras_to_json(self.transformer.loras)})
772
+
773
+ params = obj.__dict__.copy()
774
+ del params["type"]
775
+
776
+ return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
777
+
778
+
732
779
  @invocation(
733
780
  "metadata_to_vae",
734
781
  title="Metadata To VAE",
@@ -510,6 +510,7 @@ class VAELoaderInvocation(BaseInvocation):
510
510
  BaseModelType.StableDiffusionXL,
511
511
  BaseModelType.StableDiffusion3,
512
512
  BaseModelType.Flux,
513
+ BaseModelType.Flux2,
513
514
  ],
514
515
  ui_model_type=ModelType.VAE,
515
516
  )
@@ -50,7 +50,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
50
50
  title="Denoise - Z-Image",
51
51
  tags=["image", "z-image"],
52
52
  category="image",
53
- version="1.3.0",
53
+ version="1.4.0",
54
54
  classification=Classification.Prototype,
55
55
  )
56
56
  class ZImageDenoiseInvocation(BaseInvocation):
@@ -69,6 +69,7 @@ class ZImageDenoiseInvocation(BaseInvocation):
69
69
  )
70
70
  denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
71
71
  denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
72
+ add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
72
73
  transformer: TransformerField = InputField(
73
74
  description=FieldDescriptions.z_image_model, input=Input.Connection, title="Transformer"
74
75
  )
@@ -347,8 +348,12 @@ class ZImageDenoiseInvocation(BaseInvocation):
347
348
 
348
349
  # Prepare input latent image
349
350
  if init_latents is not None:
350
- s_0 = sigmas[0]
351
- latents = s_0 * noise + (1.0 - s_0) * init_latents
351
+ if self.add_noise:
352
+ # Noise the init_latents by the appropriate amount for the first timestep.
353
+ s_0 = sigmas[0]
354
+ latents = s_0 * noise + (1.0 - s_0) * init_latents
355
+ else:
356
+ latents = init_latents
352
357
  else:
353
358
  if self.denoising_start > 1e-5:
354
359
  raise ValueError("denoising_start should be 0 when initial latents are not provided.")
@@ -20,6 +20,7 @@ from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEnc
20
20
  from invokeai.backend.model_manager.load.load_base import LoadedModel
21
21
  from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
22
22
  from invokeai.backend.util.devices import TorchDevice
23
+ from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
23
24
 
24
25
  # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
25
26
  ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -47,7 +48,14 @@ class ZImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
47
48
  "Ensure you are using a compatible VAE model."
48
49
  )
49
50
 
50
- with vae_info.model_on_device() as (_, vae):
51
+ # Estimate working memory needed for VAE encode
52
+ estimated_working_memory = estimate_vae_working_memory_flux(
53
+ operation="encode",
54
+ image_tensor=image_tensor,
55
+ vae=vae_info.model,
56
+ )
57
+
58
+ with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
51
59
  if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
52
60
  raise TypeError(
53
61
  f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "
@@ -21,6 +21,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
21
21
  from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
22
22
  from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
23
23
  from invokeai.backend.util.devices import TorchDevice
24
+ from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
24
25
 
25
26
  # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
26
27
  ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -53,12 +54,19 @@ class ZImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
53
54
 
54
55
  is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
55
56
 
57
+ # Estimate working memory needed for VAE decode
58
+ estimated_working_memory = estimate_vae_working_memory_flux(
59
+ operation="decode",
60
+ image_tensor=latents,
61
+ vae=vae_info.model,
62
+ )
63
+
56
64
  # FLUX VAE doesn't support seamless, so only apply for AutoencoderKL
57
65
  seamless_context = (
58
66
  nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
59
67
  )
60
68
 
61
- with seamless_context, vae_info.model_on_device() as (_, vae):
69
+ with seamless_context, vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
62
70
  context.util.signal_progress("Running VAE")
63
71
  if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
64
72
  raise TypeError(
@@ -0,0 +1,110 @@
1
+ import torch
2
+
3
+ from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
4
+ from invokeai.app.invocations.fields import (
5
+ FieldDescriptions,
6
+ Input,
7
+ InputField,
8
+ ZImageConditioningField,
9
+ )
10
+ from invokeai.app.invocations.primitives import ZImageConditioningOutput
11
+ from invokeai.app.services.shared.invocation_context import InvocationContext
12
+ from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
13
+ ConditioningFieldData,
14
+ ZImageConditioningInfo,
15
+ )
16
+
17
+
18
+ @invocation(
19
+ "z_image_seed_variance_enhancer",
20
+ title="Seed Variance Enhancer - Z-Image",
21
+ tags=["conditioning", "z-image", "variance", "seed"],
22
+ category="conditioning",
23
+ version="1.0.0",
24
+ classification=Classification.Prototype,
25
+ )
26
+ class ZImageSeedVarianceEnhancerInvocation(BaseInvocation):
27
+ """Adds seed-based noise to Z-Image conditioning to increase variance between seeds.
28
+
29
+ Z-Image-Turbo can produce relatively similar images with different seeds,
30
+ making it harder to explore variations of a prompt. This node implements
31
+ reproducible, seed-based noise injection into text embeddings to increase
32
+ visual variation while maintaining reproducibility.
33
+
34
+ The noise strength is auto-calibrated relative to the embedding's standard
35
+ deviation, ensuring consistent results across different prompts.
36
+ """
37
+
38
+ conditioning: ZImageConditioningField = InputField(
39
+ description=FieldDescriptions.cond,
40
+ input=Input.Connection,
41
+ title="Conditioning",
42
+ )
43
+ seed: int = InputField(
44
+ default=0,
45
+ ge=0,
46
+ description="Seed for reproducible noise generation. Different seeds produce different noise patterns.",
47
+ )
48
+ strength: float = InputField(
49
+ default=0.1,
50
+ ge=0.0,
51
+ le=2.0,
52
+ description="Noise strength as multiplier of embedding std. 0=off, 0.1=subtle, 0.5=strong.",
53
+ )
54
+ randomize_percent: float = InputField(
55
+ default=50.0,
56
+ ge=1.0,
57
+ le=100.0,
58
+ description="Percentage of embedding values to add noise to (1-100). Lower values create more selective noise patterns.",
59
+ )
60
+
61
+ @torch.no_grad()
62
+ def invoke(self, context: InvocationContext) -> ZImageConditioningOutput:
63
+ # Load conditioning data
64
+ cond_data = context.conditioning.load(self.conditioning.conditioning_name)
65
+ assert len(cond_data.conditionings) == 1, "Expected exactly one conditioning tensor"
66
+ z_image_conditioning = cond_data.conditionings[0]
67
+ assert isinstance(z_image_conditioning, ZImageConditioningInfo), "Expected ZImageConditioningInfo"
68
+
69
+ # Early return if strength is zero (no modification needed)
70
+ if self.strength == 0:
71
+ return ZImageConditioningOutput(conditioning=self.conditioning)
72
+
73
+ # Clone embeddings to avoid modifying the original
74
+ prompt_embeds = z_image_conditioning.prompt_embeds.clone()
75
+
76
+ # Calculate actual noise strength based on embedding statistics
77
+ # This auto-calibration ensures consistent results across different prompts
78
+ embed_std = torch.std(prompt_embeds).item()
79
+ actual_strength = self.strength * embed_std
80
+
81
+ # Generate deterministic noise using the seed
82
+ generator = torch.Generator(device=prompt_embeds.device)
83
+ generator.manual_seed(self.seed)
84
+ noise = torch.rand(
85
+ prompt_embeds.shape, generator=generator, device=prompt_embeds.device, dtype=prompt_embeds.dtype
86
+ )
87
+ noise = noise * 2 - 1 # Scale to [-1, 1)
88
+ noise = noise * actual_strength
89
+
90
+ # Create selective mask for noise application
91
+ generator.manual_seed(self.seed + 1)
92
+ noise_mask = torch.bernoulli(
93
+ torch.ones_like(prompt_embeds) * (self.randomize_percent / 100.0),
94
+ generator=generator,
95
+ ).bool()
96
+
97
+ # Apply noise only to masked positions
98
+ prompt_embeds = prompt_embeds + (noise * noise_mask)
99
+
100
+ # Save modified conditioning
101
+ new_conditioning = ZImageConditioningInfo(prompt_embeds=prompt_embeds)
102
+ conditioning_data = ConditioningFieldData(conditionings=[new_conditioning])
103
+ conditioning_name = context.conditioning.save(conditioning_data)
104
+
105
+ return ZImageConditioningOutput(
106
+ conditioning=ZImageConditioningField(
107
+ conditioning_name=conditioning_name,
108
+ mask=self.conditioning.mask,
109
+ )
110
+ )
@@ -85,6 +85,7 @@ class InvokeAIAppConfig(BaseSettings):
85
85
  max_cache_ram_gb: The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.
86
86
  max_cache_vram_gb: The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.
87
87
  log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
88
+ model_cache_keep_alive_min: How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.
88
89
  device_working_mem_gb: The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.
89
90
  enable_partial_loading: Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.
90
91
  keep_ram_copy_of_weights: Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.
@@ -165,9 +166,10 @@ class InvokeAIAppConfig(BaseSettings):
165
166
  max_cache_ram_gb: Optional[float] = Field(default=None, gt=0, description="The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.")
166
167
  max_cache_vram_gb: Optional[float] = Field(default=None, ge=0, description="The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.")
167
168
  log_memory_usage: bool = Field(default=False, description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
169
+ model_cache_keep_alive_min: float = Field(default=0, ge=0, description="How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.")
168
170
  device_working_mem_gb: float = Field(default=3, description="The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.")
169
171
  enable_partial_loading: bool = Field(default=False, description="Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.")
170
- keep_ram_copy_of_weights: bool = Field(default=True, description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
172
+ keep_ram_copy_of_weights: bool = Field(default=True, description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
171
173
  # Deprecated CACHE configs
172
174
  ram: Optional[float] = Field(default=None, gt=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_ram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
173
175
  vram: Optional[float] = Field(default=None, ge=0, description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_vram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
@@ -14,7 +14,7 @@ class NodeExecutionStatsSummary:
14
14
  node_type: str
15
15
  num_calls: int
16
16
  time_used_seconds: float
17
- peak_vram_gb: float
17
+ delta_vram_gb: float
18
18
 
19
19
 
20
20
  @dataclass
@@ -58,10 +58,10 @@ class InvocationStatsSummary:
58
58
  def __str__(self) -> str:
59
59
  _str = ""
60
60
  _str = f"Graph stats: {self.graph_stats.graph_execution_state_id}\n"
61
- _str += f"{'Node':>30} {'Calls':>7} {'Seconds':>9} {'VRAM Used':>10}\n"
61
+ _str += f"{'Node':>30} {'Calls':>7} {'Seconds':>9} {'VRAM Change':+>10}\n"
62
62
 
63
63
  for summary in self.node_stats:
64
- _str += f"{summary.node_type:>30} {summary.num_calls:>7} {summary.time_used_seconds:>8.3f}s {summary.peak_vram_gb:>9.3f}G\n"
64
+ _str += f"{summary.node_type:>30} {summary.num_calls:>7} {summary.time_used_seconds:>8.3f}s {summary.delta_vram_gb:+10.3f}G\n"
65
65
 
66
66
  _str += f"TOTAL GRAPH EXECUTION TIME: {self.graph_stats.execution_time_seconds:7.3f}s\n"
67
67
 
@@ -100,7 +100,7 @@ class NodeExecutionStats:
100
100
  start_ram_gb: float # GB
101
101
  end_ram_gb: float # GB
102
102
 
103
- peak_vram_gb: float # GB
103
+ delta_vram_gb: float # GB
104
104
 
105
105
  def total_time(self) -> float:
106
106
  return self.end_time - self.start_time
@@ -174,9 +174,9 @@ class GraphExecutionStats:
174
174
  for node_type, node_type_stats_list in node_stats_by_type.items():
175
175
  num_calls = len(node_type_stats_list)
176
176
  time_used = sum([n.total_time() for n in node_type_stats_list])
177
- peak_vram = max([n.peak_vram_gb for n in node_type_stats_list])
177
+ delta_vram = max([n.delta_vram_gb for n in node_type_stats_list])
178
178
  summary = NodeExecutionStatsSummary(
179
- node_type=node_type, num_calls=num_calls, time_used_seconds=time_used, peak_vram_gb=peak_vram
179
+ node_type=node_type, num_calls=num_calls, time_used_seconds=time_used, delta_vram_gb=delta_vram
180
180
  )
181
181
  summaries.append(summary)
182
182
 
@@ -52,8 +52,9 @@ class InvocationStatsService(InvocationStatsServiceBase):
52
52
  # Record state before the invocation.
53
53
  start_time = time.time()
54
54
  start_ram = psutil.Process().memory_info().rss
55
- if torch.cuda.is_available():
56
- torch.cuda.reset_peak_memory_stats()
55
+
56
+ # Remember current VRAM usage
57
+ vram_in_use = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0.0
57
58
 
58
59
  assert services.model_manager.load is not None
59
60
  services.model_manager.load.ram_cache.stats = self._cache_stats[graph_execution_state_id]
@@ -62,14 +63,16 @@ class InvocationStatsService(InvocationStatsServiceBase):
62
63
  # Let the invocation run.
63
64
  yield None
64
65
  finally:
65
- # Record state after the invocation.
66
+ # Record delta VRAM
67
+ delta_vram_gb = ((torch.cuda.memory_allocated() - vram_in_use) / GB) if torch.cuda.is_available() else 0.0
68
+
66
69
  node_stats = NodeExecutionStats(
67
70
  invocation_type=invocation.get_type(),
68
71
  start_time=start_time,
69
72
  end_time=time.time(),
70
73
  start_ram_gb=start_ram / GB,
71
74
  end_ram_gb=psutil.Process().memory_info().rss / GB,
72
- peak_vram_gb=torch.cuda.max_memory_allocated() / GB if torch.cuda.is_available() else 0.0,
75
+ delta_vram_gb=delta_vram_gb,
73
76
  )
74
77
  self._stats[graph_execution_state_id].add_node_execution_stats(node_stats)
75
78
 
@@ -81,6 +84,8 @@ class InvocationStatsService(InvocationStatsServiceBase):
81
84
  graph_stats_summary = self._get_graph_summary(graph_execution_state_id)
82
85
  node_stats_summaries = self._get_node_summaries(graph_execution_state_id)
83
86
  model_cache_stats_summary = self._get_model_cache_summary(graph_execution_state_id)
87
+ # Note: We use memory_allocated() here (not memory_reserved()) because we want to show
88
+ # the current actively-used VRAM, not the total reserved memory including PyTorch's cache.
84
89
  vram_usage_gb = torch.cuda.memory_allocated() / GB if torch.cuda.is_available() else None
85
90
 
86
91
  return InvocationStatsSummary(
@@ -60,6 +60,10 @@ class ModelManagerService(ModelManagerServiceBase):
60
60
  service.start(invoker)
61
61
 
62
62
  def stop(self, invoker: Invoker) -> None:
63
+ # Shutdown the model cache to cancel any pending timers
64
+ if hasattr(self._load, "ram_cache"):
65
+ self._load.ram_cache.shutdown()
66
+
63
67
  for service in [self._store, self._install, self._load]:
64
68
  if hasattr(service, "stop"):
65
69
  service.stop(invoker)
@@ -88,7 +92,10 @@ class ModelManagerService(ModelManagerServiceBase):
88
92
  max_ram_cache_size_gb=app_config.max_cache_ram_gb,
89
93
  max_vram_cache_size_gb=app_config.max_cache_vram_gb,
90
94
  execution_device=execution_device or TorchDevice.choose_torch_device(),
95
+ storage_device="cpu",
96
+ log_memory_usage=app_config.log_memory_usage,
91
97
  logger=logger,
98
+ keep_alive_minutes=app_config.model_cache_keep_alive_min,
92
99
  )
93
100
  loader = ModelLoadService(
94
101
  app_config=app_config,
@@ -19,11 +19,13 @@ from invokeai.backend.model_manager.configs.main import MainModelDefaultSettings
19
19
  from invokeai.backend.model_manager.taxonomy import (
20
20
  BaseModelType,
21
21
  ClipVariantType,
22
+ Flux2VariantType,
22
23
  FluxVariantType,
23
24
  ModelFormat,
24
25
  ModelSourceType,
25
26
  ModelType,
26
27
  ModelVariantType,
28
+ Qwen3VariantType,
27
29
  SchedulerPredictionType,
28
30
  )
29
31
 
@@ -89,8 +91,8 @@ class ModelRecordChanges(BaseModelExcludeNull):
89
91
 
90
92
  # Checkpoint-specific changes
91
93
  # TODO(MM2): Should we expose these? Feels footgun-y...
92
- variant: Optional[ModelVariantType | ClipVariantType | FluxVariantType] = Field(
93
- description="The variant of the model.", default=None
94
+ variant: Optional[ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | Qwen3VariantType] = (
95
+ Field(description="The variant of the model.", default=None)
94
96
  )
95
97
  prediction_type: Optional[SchedulerPredictionType] = Field(
96
98
  description="The prediction type of the model.", default=None
@@ -630,6 +630,21 @@ class UtilInterface(InvocationContextInterface):
630
630
  is_canceled=self.is_canceled,
631
631
  )
632
632
 
633
+ def flux2_step_callback(self, intermediate_state: PipelineIntermediateState) -> None:
634
+ """
635
+ The step callback for FLUX.2 Klein models (32-channel VAE).
636
+
637
+ Args:
638
+ intermediate_state: The intermediate state of the diffusion pipeline.
639
+ """
640
+
641
+ diffusion_step_callback(
642
+ signal_progress=self.signal_progress,
643
+ intermediate_state=intermediate_state,
644
+ base_model=BaseModelType.Flux2,
645
+ is_canceled=self.is_canceled,
646
+ )
647
+
633
648
  def signal_progress(
634
649
  self,
635
650
  message: str,
@@ -27,6 +27,7 @@ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_21 import
27
27
  from invokeai.app.services.shared.sqlite_migrator.migrations.migration_22 import build_migration_22
28
28
  from invokeai.app.services.shared.sqlite_migrator.migrations.migration_23 import build_migration_23
29
29
  from invokeai.app.services.shared.sqlite_migrator.migrations.migration_24 import build_migration_24
30
+ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_25 import build_migration_25
30
31
  from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_impl import SqliteMigrator
31
32
 
32
33
 
@@ -71,6 +72,7 @@ def init_db(config: InvokeAIAppConfig, logger: Logger, image_files: ImageFileSto
71
72
  migrator.register_migration(build_migration_22(app_config=config, logger=logger))
72
73
  migrator.register_migration(build_migration_23(app_config=config, logger=logger))
73
74
  migrator.register_migration(build_migration_24(app_config=config, logger=logger))
75
+ migrator.register_migration(build_migration_25(app_config=config, logger=logger))
74
76
  migrator.run_migrations()
75
77
 
76
78
  return db
@@ -0,0 +1,61 @@
1
+ import json
2
+ import sqlite3
3
+ from logging import Logger
4
+ from typing import Any
5
+
6
+ from invokeai.app.services.config import InvokeAIAppConfig
7
+ from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
8
+ from invokeai.backend.model_manager.taxonomy import ModelType, Qwen3VariantType
9
+
10
+
11
+ class Migration25Callback:
12
+ def __init__(self, app_config: InvokeAIAppConfig, logger: Logger) -> None:
13
+ self._app_config = app_config
14
+ self._logger = logger
15
+
16
+ def __call__(self, cursor: sqlite3.Cursor) -> None:
17
+ cursor.execute("SELECT id, config FROM models;")
18
+ rows = cursor.fetchall()
19
+
20
+ migrated_count = 0
21
+
22
+ for model_id, config_json in rows:
23
+ try:
24
+ config_dict: dict[str, Any] = json.loads(config_json)
25
+
26
+ if config_dict.get("type") != ModelType.Qwen3Encoder.value:
27
+ continue
28
+
29
+ if "variant" in config_dict:
30
+ continue
31
+
32
+ config_dict["variant"] = Qwen3VariantType.Qwen3_4B.value
33
+
34
+ cursor.execute(
35
+ "UPDATE models SET config = ? WHERE id = ?;",
36
+ (json.dumps(config_dict), model_id),
37
+ )
38
+ migrated_count += 1
39
+
40
+ except json.JSONDecodeError as e:
41
+ self._logger.error("Invalid config JSON for model %s: %s", model_id, e)
42
+ raise
43
+
44
+ if migrated_count > 0:
45
+ self._logger.info(f"Migration complete: {migrated_count} Qwen3 encoder configs updated with variant field")
46
+ else:
47
+ self._logger.info("Migration complete: no Qwen3 encoder configs needed migration")
48
+
49
+
50
+ def build_migration_25(app_config: InvokeAIAppConfig, logger: Logger) -> Migration:
51
+ """Builds the migration object for migrating from version 24 to version 25.
52
+
53
+ This migration adds the variant field to existing Qwen3 encoder models.
54
+ Models installed before the variant field was added will default to Qwen3_4B (for Z-Image compatibility).
55
+ """
56
+
57
+ return Migration(
58
+ from_version=24,
59
+ to_version=25,
60
+ callback=Migration25Callback(app_config=app_config, logger=logger),
61
+ )