PyPI - InvokeAI - Versions diffs - 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

invokeai/app/invocations/flux_denoise.py CHANGED Viewed

@@ -32,6 +32,8 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
 from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
 from invokeai.backend.flux.denoise import denoise
+from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_from_preset
+from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
 from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
@@ -64,7 +66,7 @@ from invokeai.backend.util.devices import TorchDevice
     title="FLUX Denoise",
     tags=["image", "flux"],
     category="image",
-    version="4.2.0",
+    version="4.3.0",
 )
 class FluxDenoiseInvocation(BaseInvocation):
     """Run denoising process with a FLUX transformer model."""
@@ -166,6 +168,24 @@ class FluxDenoiseInvocation(BaseInvocation):
         input=Input.Connection,
     )
+    # DyPE (Dynamic Position Extrapolation) for high-resolution generation
+    dype_preset: DyPEPreset = InputField(
+        default=DyPEPreset.OFF,
+        description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
+    )
+    dype_scale: Optional[float] = InputField(
+        default=None,
+        ge=0.0,
+        le=8.0,
+        description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
+    )
+    dype_exponent: Optional[float] = InputField(
+        default=None,
+        ge=0.0,
+        le=1000.0,
+        description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
+    )
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> LatentsOutput:
         latents = self._run_diffusion(context)
@@ -239,8 +259,14 @@ class FluxDenoiseInvocation(BaseInvocation):
         )
         transformer_config = context.models.get_config(self.transformer.transformer)
-        assert transformer_config.base is BaseModelType.Flux and transformer_config.type is ModelType.Main
-        is_schnell = transformer_config.variant is FluxVariantType.Schnell
+        assert (
+            transformer_config.base in (BaseModelType.Flux, BaseModelType.Flux2)
+            and transformer_config.type is ModelType.Main
+        )
+        # Schnell is only for FLUX.1, FLUX.2 Klein behaves like Dev (with guidance)
+        is_schnell = (
+            transformer_config.base is BaseModelType.Flux and transformer_config.variant is FluxVariantType.Schnell
+        )
         # Calculate the timestep schedule.
         timesteps = get_schedule(
@@ -422,6 +448,26 @@ class FluxDenoiseInvocation(BaseInvocation):
                 kontext_extension.ensure_batch_size(x.shape[0])
                 img_cond_seq, img_cond_seq_ids = kontext_extension.kontext_latents, kontext_extension.kontext_ids
+            # Prepare DyPE extension for high-resolution generation
+            dype_extension: DyPEExtension | None = None
+            dype_config = get_dype_config_from_preset(
+                preset=self.dype_preset,
+                width=self.width,
+                height=self.height,
+                custom_scale=self.dype_scale,
+                custom_exponent=self.dype_exponent,
+            )
+            if dype_config is not None:
+                dype_extension = DyPEExtension(
+                    config=dype_config,
+                    target_height=self.height,
+                    target_width=self.width,
+                )
+                context.logger.info(
+                    f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset.value}, "
+                    f"scale={dype_config.dype_scale:.2f}, method={dype_config.method}"
+                )
             x = denoise(
                 model=transformer,
                 img=x,
@@ -439,6 +485,7 @@ class FluxDenoiseInvocation(BaseInvocation):
                 img_cond=img_cond,
                 img_cond_seq=img_cond_seq,
                 img_cond_seq_ids=img_cond_seq_ids,
+                dype_extension=dype_extension,
                 scheduler=scheduler,
             )

invokeai/app/invocations/flux_lora_loader.py CHANGED Viewed

@@ -162,7 +162,7 @@ class FLUXLoRACollectionLoader(BaseInvocation):
             if not context.models.exists(lora.lora.key):
                 raise Exception(f"Unknown lora: {lora.lora.key}!")
-            assert lora.lora.base is BaseModelType.Flux
+            assert lora.lora.base in (BaseModelType.Flux, BaseModelType.Flux2)
             added_loras.append(lora.lora.key)

invokeai/app/invocations/ideal_size.py CHANGED Viewed

@@ -46,7 +46,12 @@ class IdealSizeInvocation(BaseInvocation):
             dimension = 512
         elif unet_config.base == BaseModelType.StableDiffusion2:
             dimension = 768
-        elif unet_config.base in (BaseModelType.StableDiffusionXL, BaseModelType.Flux, BaseModelType.StableDiffusion3):
+        elif unet_config.base in (
+            BaseModelType.StableDiffusionXL,
+            BaseModelType.Flux,
+            BaseModelType.Flux2,
+            BaseModelType.StableDiffusion3,
+        ):
             dimension = 1024
         else:
             raise ValueError(f"Unsupported model type: {unet_config.base}")

invokeai/app/invocations/metadata.py CHANGED Viewed

@@ -150,6 +150,10 @@ GENERATION_MODES = Literal[
     "flux_img2img",
     "flux_inpaint",
     "flux_outpaint",
+    "flux2_txt2img",
+    "flux2_img2img",
+    "flux2_inpaint",
+    "flux2_outpaint",
     "sd3_txt2img",
     "sd3_img2img",
     "sd3_inpaint",

invokeai/app/invocations/metadata_linked.py CHANGED Viewed

@@ -52,6 +52,7 @@ from invokeai.app.invocations.primitives import (
 )
 from invokeai.app.invocations.scheduler import SchedulerOutput
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
+from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -729,6 +730,52 @@ class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
         return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
+@invocation(
+    "z_image_denoise_meta",
+    title=f"{ZImageDenoiseInvocation.UIConfig.title} + Metadata",
+    tags=["z-image", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
+    category="latents",
+    version="1.0.0",
+)
+class ZImageDenoiseMetaInvocation(ZImageDenoiseInvocation, WithMetadata):
+    """Run denoising process with a Z-Image transformer model + metadata."""
+    def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
+        def _loras_to_json(obj: Union[Any, list[Any]]):
+            if not isinstance(obj, list):
+                obj = [obj]
+            output: list[dict[str, Any]] = []
+            for item in obj:
+                output.append(
+                    LoRAMetadataField(
+                        model=item.lora,
+                        weight=item.weight,
+                    ).model_dump(exclude_none=True, exclude={"id", "type", "is_intermediate", "use_cache"})
+                )
+            return output
+        obj = super().invoke(context)
+        md: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
+        md.update({"width": obj.width})
+        md.update({"height": obj.height})
+        md.update({"steps": self.steps})
+        md.update({"guidance": self.guidance_scale})
+        md.update({"denoising_start": self.denoising_start})
+        md.update({"denoising_end": self.denoising_end})
+        md.update({"scheduler": self.scheduler})
+        md.update({"model": self.transformer.transformer})
+        md.update({"seed": self.seed})
+        if len(self.transformer.loras) > 0:
+            md.update({"loras": _loras_to_json(self.transformer.loras)})
+        params = obj.__dict__.copy()
+        del params["type"]
+        return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
 @invocation(
     "metadata_to_vae",
     title="Metadata To VAE",

invokeai/app/invocations/model.py CHANGED Viewed

@@ -510,6 +510,7 @@ class VAELoaderInvocation(BaseInvocation):
             BaseModelType.StableDiffusionXL,
             BaseModelType.StableDiffusion3,
             BaseModelType.Flux,
+            BaseModelType.Flux2,
         ],
         ui_model_type=ModelType.VAE,
     )

invokeai/app/invocations/z_image_denoise.py CHANGED Viewed

@@ -50,7 +50,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
     title="Denoise - Z-Image",
     tags=["image", "z-image"],
     category="image",
-    version="1.3.0",
+    version="1.4.0",
     classification=Classification.Prototype,
 )
 class ZImageDenoiseInvocation(BaseInvocation):
@@ -69,6 +69,7 @@ class ZImageDenoiseInvocation(BaseInvocation):
     )
     denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
     denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
+    add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
     transformer: TransformerField = InputField(
         description=FieldDescriptions.z_image_model, input=Input.Connection, title="Transformer"
     )
@@ -347,8 +348,12 @@ class ZImageDenoiseInvocation(BaseInvocation):
         # Prepare input latent image
         if init_latents is not None:
-            s_0 = sigmas[0]
-            latents = s_0 * noise + (1.0 - s_0) * init_latents
+            if self.add_noise:
+                # Noise the init_latents by the appropriate amount for the first timestep.
+                s_0 = sigmas[0]
+                latents = s_0 * noise + (1.0 - s_0) * init_latents
+            else:
+                latents = init_latents
         else:
             if self.denoising_start > 1e-5:
                 raise ValueError("denoising_start should be 0 when initial latents are not provided.")

invokeai/app/invocations/z_image_image_to_latents.py CHANGED Viewed

@@ -20,6 +20,7 @@ from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEnc
 from invokeai.backend.model_manager.load.load_base import LoadedModel
 from invokeai.backend.stable_diffusion.diffusers_pipeline import image_resized_to_grid_as_tensor
 from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
 ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -47,7 +48,14 @@ class ZImageImageToLatentsInvocation(BaseInvocation, WithMetadata, WithBoard):
                 "Ensure you are using a compatible VAE model."
             )
-        with vae_info.model_on_device() as (_, vae):
+        # Estimate working memory needed for VAE encode
+        estimated_working_memory = estimate_vae_working_memory_flux(
+            operation="encode",
+            image_tensor=image_tensor,
+            vae=vae_info.model,
+        )
+        with vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
             if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
                 raise TypeError(
                     f"Expected AutoencoderKL or FluxAutoEncoder, got {type(vae).__name__}. "

invokeai/app/invocations/z_image_latents_to_image.py CHANGED Viewed

@@ -21,6 +21,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder as FluxAutoEncoder
 from invokeai.backend.stable_diffusion.extensions.seamless import SeamlessExt
 from invokeai.backend.util.devices import TorchDevice
+from invokeai.backend.util.vae_working_memory import estimate_vae_working_memory_flux
 # Z-Image can use either the Diffusers AutoencoderKL or the FLUX AutoEncoder
 ZImageVAE = Union[AutoencoderKL, FluxAutoEncoder]
@@ -53,12 +54,19 @@ class ZImageLatentsToImageInvocation(BaseInvocation, WithMetadata, WithBoard):
         is_flux_vae = isinstance(vae_info.model, FluxAutoEncoder)
+        # Estimate working memory needed for VAE decode
+        estimated_working_memory = estimate_vae_working_memory_flux(
+            operation="decode",
+            image_tensor=latents,
+            vae=vae_info.model,
+        )
         # FLUX VAE doesn't support seamless, so only apply for AutoencoderKL
         seamless_context = (
             nullcontext() if is_flux_vae else SeamlessExt.static_patch_model(vae_info.model, self.vae.seamless_axes)
         )
-        with seamless_context, vae_info.model_on_device() as (_, vae):
+        with seamless_context, vae_info.model_on_device(working_mem_bytes=estimated_working_memory) as (_, vae):
             context.util.signal_progress("Running VAE")
             if not isinstance(vae, (AutoencoderKL, FluxAutoEncoder)):
                 raise TypeError(

invokeai/app/invocations/z_image_seed_variance_enhancer.py ADDED Viewed

@@ -0,0 +1,110 @@
+import torch
+from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.fields import (
+    FieldDescriptions,
+    Input,
+    InputField,
+    ZImageConditioningField,
+)
+from invokeai.app.invocations.primitives import ZImageConditioningOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import (
+    ConditioningFieldData,
+    ZImageConditioningInfo,
+)
+@invocation(
+    "z_image_seed_variance_enhancer",
+    title="Seed Variance Enhancer - Z-Image",
+    tags=["conditioning", "z-image", "variance", "seed"],
+    category="conditioning",
+    version="1.0.0",
+    classification=Classification.Prototype,
+)
+class ZImageSeedVarianceEnhancerInvocation(BaseInvocation):
+    """Adds seed-based noise to Z-Image conditioning to increase variance between seeds.
+    Z-Image-Turbo can produce relatively similar images with different seeds,
+    making it harder to explore variations of a prompt. This node implements
+    reproducible, seed-based noise injection into text embeddings to increase
+    visual variation while maintaining reproducibility.
+    The noise strength is auto-calibrated relative to the embedding's standard
+    deviation, ensuring consistent results across different prompts.
+    """
+    conditioning: ZImageConditioningField = InputField(
+        description=FieldDescriptions.cond,
+        input=Input.Connection,
+        title="Conditioning",
+    )
+    seed: int = InputField(
+        default=0,
+        ge=0,
+        description="Seed for reproducible noise generation. Different seeds produce different noise patterns.",
+    )
+    strength: float = InputField(
+        default=0.1,
+        ge=0.0,
+        le=2.0,
+        description="Noise strength as multiplier of embedding std. 0=off, 0.1=subtle, 0.5=strong.",
+    )
+    randomize_percent: float = InputField(
+        default=50.0,
+        ge=1.0,
+        le=100.0,
+        description="Percentage of embedding values to add noise to (1-100). Lower values create more selective noise patterns.",
+    )
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> ZImageConditioningOutput:
+        # Load conditioning data
+        cond_data = context.conditioning.load(self.conditioning.conditioning_name)
+        assert len(cond_data.conditionings) == 1, "Expected exactly one conditioning tensor"
+        z_image_conditioning = cond_data.conditionings[0]
+        assert isinstance(z_image_conditioning, ZImageConditioningInfo), "Expected ZImageConditioningInfo"
+        # Early return if strength is zero (no modification needed)
+        if self.strength == 0:
+            return ZImageConditioningOutput(conditioning=self.conditioning)
+        # Clone embeddings to avoid modifying the original
+        prompt_embeds = z_image_conditioning.prompt_embeds.clone()
+        # Calculate actual noise strength based on embedding statistics
+        # This auto-calibration ensures consistent results across different prompts
+        embed_std = torch.std(prompt_embeds).item()
+        actual_strength = self.strength * embed_std
+        # Generate deterministic noise using the seed
+        generator = torch.Generator(device=prompt_embeds.device)
+        generator.manual_seed(self.seed)
+        noise = torch.rand(
+            prompt_embeds.shape, generator=generator, device=prompt_embeds.device, dtype=prompt_embeds.dtype
+        )
+        noise = noise * 2 - 1  # Scale to [-1, 1)
+        noise = noise * actual_strength
+        # Create selective mask for noise application
+        generator.manual_seed(self.seed + 1)
+        noise_mask = torch.bernoulli(
+            torch.ones_like(prompt_embeds) * (self.randomize_percent / 100.0),
+            generator=generator,
+        ).bool()
+        # Apply noise only to masked positions
+        prompt_embeds = prompt_embeds + (noise * noise_mask)
+        # Save modified conditioning
+        new_conditioning = ZImageConditioningInfo(prompt_embeds=prompt_embeds)
+        conditioning_data = ConditioningFieldData(conditionings=[new_conditioning])
+        conditioning_name = context.conditioning.save(conditioning_data)
+        return ZImageConditioningOutput(
+            conditioning=ZImageConditioningField(
+                conditioning_name=conditioning_name,
+                mask=self.conditioning.mask,
+            )
+        )

invokeai/app/services/config/config_default.py CHANGED Viewed

@@ -85,6 +85,7 @@ class InvokeAIAppConfig(BaseSettings):
         max_cache_ram_gb: The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.
         max_cache_vram_gb: The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.
         log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
+        model_cache_keep_alive_min: How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.
         device_working_mem_gb: The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.
         enable_partial_loading: Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.
         keep_ram_copy_of_weights: Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.
@@ -165,9 +166,10 @@ class InvokeAIAppConfig(BaseSettings):
     max_cache_ram_gb:   Optional[float] = Field(default=None, gt=0,         description="The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.")
     max_cache_vram_gb:  Optional[float] = Field(default=None, ge=0,         description="The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.")
     log_memory_usage:              bool = Field(default=False,              description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
+    model_cache_keep_alive_min:   float = Field(default=0, ge=0,            description="How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.")
     device_working_mem_gb:        float = Field(default=3,                  description="The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.")
     enable_partial_loading:        bool = Field(default=False,              description="Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.")
-    keep_ram_copy_of_weights:      bool = Field(default=True,              description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
+    keep_ram_copy_of_weights:      bool = Field(default=True,               description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
     # Deprecated CACHE configs
     ram:                Optional[float] = Field(default=None, gt=0,         description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_ram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
     vram:               Optional[float] = Field(default=None, ge=0,         description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_vram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")

invokeai/app/services/invocation_stats/invocation_stats_common.py CHANGED Viewed

@@ -14,7 +14,7 @@ class NodeExecutionStatsSummary:
     node_type: str
     num_calls: int
     time_used_seconds: float
-    peak_vram_gb: float
+    delta_vram_gb: float
 @dataclass
@@ -58,10 +58,10 @@ class InvocationStatsSummary:
     def __str__(self) -> str:
         _str = ""
         _str = f"Graph stats: {self.graph_stats.graph_execution_state_id}\n"
-        _str += f"{'Node':>30} {'Calls':>7} {'Seconds':>9} {'VRAM Used':>10}\n"
+        _str += f"{'Node':>30} {'Calls':>7} {'Seconds':>9} {'VRAM Change':+>10}\n"
         for summary in self.node_stats:
-            _str += f"{summary.node_type:>30} {summary.num_calls:>7} {summary.time_used_seconds:>8.3f}s {summary.peak_vram_gb:>9.3f}G\n"
+            _str += f"{summary.node_type:>30} {summary.num_calls:>7} {summary.time_used_seconds:>8.3f}s {summary.delta_vram_gb:+10.3f}G\n"
         _str += f"TOTAL GRAPH EXECUTION TIME: {self.graph_stats.execution_time_seconds:7.3f}s\n"
@@ -100,7 +100,7 @@ class NodeExecutionStats:
     start_ram_gb: float  # GB
     end_ram_gb: float  # GB
-    peak_vram_gb: float  # GB
+    delta_vram_gb: float  # GB
     def total_time(self) -> float:
         return self.end_time - self.start_time
@@ -174,9 +174,9 @@ class GraphExecutionStats:
         for node_type, node_type_stats_list in node_stats_by_type.items():
             num_calls = len(node_type_stats_list)
             time_used = sum([n.total_time() for n in node_type_stats_list])
-            peak_vram = max([n.peak_vram_gb for n in node_type_stats_list])
+            delta_vram = max([n.delta_vram_gb for n in node_type_stats_list])
             summary = NodeExecutionStatsSummary(
-                node_type=node_type, num_calls=num_calls, time_used_seconds=time_used, peak_vram_gb=peak_vram
+                node_type=node_type, num_calls=num_calls, time_used_seconds=time_used, delta_vram_gb=delta_vram
             )
             summaries.append(summary)

invokeai/app/services/invocation_stats/invocation_stats_default.py CHANGED Viewed

@@ -52,8 +52,9 @@ class InvocationStatsService(InvocationStatsServiceBase):
         # Record state before the invocation.
         start_time = time.time()
         start_ram = psutil.Process().memory_info().rss
-        if torch.cuda.is_available():
-            torch.cuda.reset_peak_memory_stats()
+        # Remember current VRAM usage
+        vram_in_use = torch.cuda.memory_allocated() if torch.cuda.is_available() else 0.0
         assert services.model_manager.load is not None
         services.model_manager.load.ram_cache.stats = self._cache_stats[graph_execution_state_id]
@@ -62,14 +63,16 @@ class InvocationStatsService(InvocationStatsServiceBase):
             # Let the invocation run.
             yield None
         finally:
-            # Record state after the invocation.
+            # Record delta VRAM
+            delta_vram_gb = ((torch.cuda.memory_allocated() - vram_in_use) / GB) if torch.cuda.is_available() else 0.0
             node_stats = NodeExecutionStats(
                 invocation_type=invocation.get_type(),
                 start_time=start_time,
                 end_time=time.time(),
                 start_ram_gb=start_ram / GB,
                 end_ram_gb=psutil.Process().memory_info().rss / GB,
-                peak_vram_gb=torch.cuda.max_memory_allocated() / GB if torch.cuda.is_available() else 0.0,
+                delta_vram_gb=delta_vram_gb,
             )
             self._stats[graph_execution_state_id].add_node_execution_stats(node_stats)
@@ -81,6 +84,8 @@ class InvocationStatsService(InvocationStatsServiceBase):
         graph_stats_summary = self._get_graph_summary(graph_execution_state_id)
         node_stats_summaries = self._get_node_summaries(graph_execution_state_id)
         model_cache_stats_summary = self._get_model_cache_summary(graph_execution_state_id)
+        # Note: We use memory_allocated() here (not memory_reserved()) because we want to show
+        # the current actively-used VRAM, not the total reserved memory including PyTorch's cache.
         vram_usage_gb = torch.cuda.memory_allocated() / GB if torch.cuda.is_available() else None
         return InvocationStatsSummary(

invokeai/app/services/model_manager/model_manager_default.py CHANGED Viewed

@@ -60,6 +60,10 @@ class ModelManagerService(ModelManagerServiceBase):
                 service.start(invoker)
     def stop(self, invoker: Invoker) -> None:
+        # Shutdown the model cache to cancel any pending timers
+        if hasattr(self._load, "ram_cache"):
+            self._load.ram_cache.shutdown()
         for service in [self._store, self._install, self._load]:
             if hasattr(service, "stop"):
                 service.stop(invoker)
@@ -88,7 +92,10 @@ class ModelManagerService(ModelManagerServiceBase):
             max_ram_cache_size_gb=app_config.max_cache_ram_gb,
             max_vram_cache_size_gb=app_config.max_cache_vram_gb,
             execution_device=execution_device or TorchDevice.choose_torch_device(),
+            storage_device="cpu",
+            log_memory_usage=app_config.log_memory_usage,
             logger=logger,
+            keep_alive_minutes=app_config.model_cache_keep_alive_min,
         )
         loader = ModelLoadService(
             app_config=app_config,

invokeai/app/services/model_records/model_records_base.py CHANGED Viewed

@@ -19,11 +19,13 @@ from invokeai.backend.model_manager.configs.main import MainModelDefaultSettings
 from invokeai.backend.model_manager.taxonomy import (
     BaseModelType,
     ClipVariantType,
+    Flux2VariantType,
     FluxVariantType,
     ModelFormat,
     ModelSourceType,
     ModelType,
     ModelVariantType,
+    Qwen3VariantType,
     SchedulerPredictionType,
 )
@@ -89,8 +91,8 @@ class ModelRecordChanges(BaseModelExcludeNull):
     # Checkpoint-specific changes
     # TODO(MM2): Should we expose these? Feels footgun-y...
-    variant: Optional[ModelVariantType | ClipVariantType | FluxVariantType] = Field(
-        description="The variant of the model.", default=None
+    variant: Optional[ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | Qwen3VariantType] = (
+        Field(description="The variant of the model.", default=None)
     )
     prediction_type: Optional[SchedulerPredictionType] = Field(
         description="The prediction type of the model.", default=None

invokeai/app/services/shared/invocation_context.py CHANGED Viewed

@@ -630,6 +630,21 @@ class UtilInterface(InvocationContextInterface):
             is_canceled=self.is_canceled,
         )
+    def flux2_step_callback(self, intermediate_state: PipelineIntermediateState) -> None:
+        """
+        The step callback for FLUX.2 Klein models (32-channel VAE).
+        Args:
+            intermediate_state: The intermediate state of the diffusion pipeline.
+        """
+        diffusion_step_callback(
+            signal_progress=self.signal_progress,
+            intermediate_state=intermediate_state,
+            base_model=BaseModelType.Flux2,
+            is_canceled=self.is_canceled,
+        )
     def signal_progress(
         self,
         message: str,

invokeai/app/services/shared/sqlite/sqlite_util.py CHANGED Viewed

@@ -27,6 +27,7 @@ from invokeai.app.services.shared.sqlite_migrator.migrations.migration_21 import
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_22 import build_migration_22
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_23 import build_migration_23
 from invokeai.app.services.shared.sqlite_migrator.migrations.migration_24 import build_migration_24
+from invokeai.app.services.shared.sqlite_migrator.migrations.migration_25 import build_migration_25
 from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_impl import SqliteMigrator
@@ -71,6 +72,7 @@ def init_db(config: InvokeAIAppConfig, logger: Logger, image_files: ImageFileSto
     migrator.register_migration(build_migration_22(app_config=config, logger=logger))
     migrator.register_migration(build_migration_23(app_config=config, logger=logger))
     migrator.register_migration(build_migration_24(app_config=config, logger=logger))
+    migrator.register_migration(build_migration_25(app_config=config, logger=logger))
     migrator.run_migrations()
     return db

invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py ADDED Viewed

@@ -0,0 +1,61 @@
+import json
+import sqlite3
+from logging import Logger
+from typing import Any
+from invokeai.app.services.config import InvokeAIAppConfig
+from invokeai.app.services.shared.sqlite_migrator.sqlite_migrator_common import Migration
+from invokeai.backend.model_manager.taxonomy import ModelType, Qwen3VariantType
+class Migration25Callback:
+    def __init__(self, app_config: InvokeAIAppConfig, logger: Logger) -> None:
+        self._app_config = app_config
+        self._logger = logger
+    def __call__(self, cursor: sqlite3.Cursor) -> None:
+        cursor.execute("SELECT id, config FROM models;")
+        rows = cursor.fetchall()
+        migrated_count = 0
+        for model_id, config_json in rows:
+            try:
+                config_dict: dict[str, Any] = json.loads(config_json)
+                if config_dict.get("type") != ModelType.Qwen3Encoder.value:
+                    continue
+                if "variant" in config_dict:
+                    continue
+                config_dict["variant"] = Qwen3VariantType.Qwen3_4B.value
+                cursor.execute(
+                    "UPDATE models SET config = ? WHERE id = ?;",
+                    (json.dumps(config_dict), model_id),
+                )
+                migrated_count += 1
+            except json.JSONDecodeError as e:
+                self._logger.error("Invalid config JSON for model %s: %s", model_id, e)
+                raise
+        if migrated_count > 0:
+            self._logger.info(f"Migration complete: {migrated_count} Qwen3 encoder configs updated with variant field")
+        else:
+            self._logger.info("Migration complete: no Qwen3 encoder configs needed migration")
+def build_migration_25(app_config: InvokeAIAppConfig, logger: Logger) -> Migration:
+    """Builds the migration object for migrating from version 24 to version 25.
+    This migration adds the variant field to existing Qwen3 encoder models.
+    Models installed before the variant field was added will default to Qwen3_4B (for Z-Image compatibility).
+    """
+    return Migration(
+        from_version=24,
+        to_version=25,
+        callback=Migration25Callback(app_config=app_config, logger=logger),
+    )

InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl