PyPI - InvokeAI - Versions diffs - 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0py3-none-any.whl → 6.10.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

invokeai/app/invocations/flux_denoise.py CHANGED Viewed

@@ -47,7 +47,6 @@ from invokeai.backend.flux.sampling_utils import (
     pack,
     unpack,
 )
-from invokeai.backend.flux.schedulers import FLUX_SCHEDULER_LABELS, FLUX_SCHEDULER_MAP, FLUX_SCHEDULER_NAME_VALUES
 from invokeai.backend.flux.text_conditioning import FluxReduxConditioning, FluxTextConditioning
 from invokeai.backend.model_manager.taxonomy import BaseModelType, FluxVariantType, ModelFormat, ModelType
 from invokeai.backend.patches.layer_patcher import LayerPatcher
@@ -64,7 +63,7 @@ from invokeai.backend.util.devices import TorchDevice
     title="FLUX Denoise",
     tags=["image", "flux"],
     category="image",
-    version="4.2.0",
+    version="4.1.0",
 )
 class FluxDenoiseInvocation(BaseInvocation):
     """Run denoising process with a FLUX transformer model."""
@@ -133,12 +132,6 @@ class FluxDenoiseInvocation(BaseInvocation):
     num_steps: int = InputField(
         default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
     )
-    scheduler: FLUX_SCHEDULER_NAME_VALUES = InputField(
-        default="euler",
-        description="Scheduler (sampler) for the denoising process. 'euler' is fast and standard. "
-        "'heun' is 2nd-order (better quality, 2x slower). 'lcm' is optimized for few steps.",
-        ui_choice_labels=FLUX_SCHEDULER_LABELS,
-    )
     guidance: float = InputField(
         default=4.0,
         description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
@@ -249,12 +242,6 @@ class FluxDenoiseInvocation(BaseInvocation):
             shift=not is_schnell,
         )
-        # Create scheduler if not using default euler
-        scheduler = None
-        if self.scheduler in FLUX_SCHEDULER_MAP:
-            scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
-            scheduler = scheduler_class(num_train_timesteps=1000)
         # Clip the timesteps schedule based on denoising_start and denoising_end.
         timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
@@ -439,7 +426,6 @@ class FluxDenoiseInvocation(BaseInvocation):
                 img_cond=img_cond,
                 img_cond_seq=img_cond_seq,
                 img_cond_seq_ids=img_cond_seq_ids,
-                scheduler=scheduler,
             )
         x = unpack(x.float(), self.height, self.width)

invokeai/app/invocations/metadata_linked.py CHANGED Viewed

@@ -52,7 +52,6 @@ from invokeai.app.invocations.primitives import (
 )
 from invokeai.app.invocations.scheduler import SchedulerOutput
 from invokeai.app.invocations.t2i_adapter import T2IAdapterField, T2IAdapterInvocation
-from invokeai.app.invocations.z_image_denoise import ZImageDenoiseInvocation
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelType, SubModelType
 from invokeai.backend.stable_diffusion.schedulers.schedulers import SCHEDULER_NAME_VALUES
@@ -730,52 +729,6 @@ class FluxDenoiseLatentsMetaInvocation(FluxDenoiseInvocation, WithMetadata):
         return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
-@invocation(
-    "z_image_denoise_meta",
-    title=f"{ZImageDenoiseInvocation.UIConfig.title} + Metadata",
-    tags=["z-image", "latents", "denoise", "txt2img", "t2i", "t2l", "img2img", "i2i", "l2l"],
-    category="latents",
-    version="1.0.0",
-)
-class ZImageDenoiseMetaInvocation(ZImageDenoiseInvocation, WithMetadata):
-    """Run denoising process with a Z-Image transformer model + metadata."""
-    def invoke(self, context: InvocationContext) -> LatentsMetaOutput:
-        def _loras_to_json(obj: Union[Any, list[Any]]):
-            if not isinstance(obj, list):
-                obj = [obj]
-            output: list[dict[str, Any]] = []
-            for item in obj:
-                output.append(
-                    LoRAMetadataField(
-                        model=item.lora,
-                        weight=item.weight,
-                    ).model_dump(exclude_none=True, exclude={"id", "type", "is_intermediate", "use_cache"})
-                )
-            return output
-        obj = super().invoke(context)
-        md: Dict[str, Any] = {} if self.metadata is None else self.metadata.root
-        md.update({"width": obj.width})
-        md.update({"height": obj.height})
-        md.update({"steps": self.steps})
-        md.update({"guidance": self.guidance_scale})
-        md.update({"denoising_start": self.denoising_start})
-        md.update({"denoising_end": self.denoising_end})
-        md.update({"scheduler": self.scheduler})
-        md.update({"model": self.transformer.transformer})
-        md.update({"seed": self.seed})
-        if len(self.transformer.loras) > 0:
-            md.update({"loras": _loras_to_json(self.transformer.loras)})
-        params = obj.__dict__.copy()
-        del params["type"]
-        return LatentsMetaOutput(**params, metadata=MetadataField.model_validate(md))
 @invocation(
     "metadata_to_vae",
     title="Metadata To VAE",

invokeai/app/invocations/z_image_denoise.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import inspect
 import math
 from contextlib import ExitStack
 from typing import Callable, Iterator, Optional, Tuple
@@ -6,7 +5,6 @@ from typing import Callable, Iterator, Optional, Tuple
 import einops
 import torch
 import torchvision.transforms as tv_transforms
-from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from PIL import Image
 from torchvision.transforms.functional import resize as tv_resize
 from tqdm import tqdm
@@ -26,7 +24,6 @@ from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.invocations.z_image_control import ZImageControlField
 from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
 from invokeai.app.services.shared.invocation_context import InvocationContext
-from invokeai.backend.flux.schedulers import ZIMAGE_SCHEDULER_LABELS, ZIMAGE_SCHEDULER_MAP, ZIMAGE_SCHEDULER_NAME_VALUES
 from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat
 from invokeai.backend.patches.layer_patcher import LayerPatcher
 from invokeai.backend.patches.lora_conversions.z_image_lora_constants import Z_IMAGE_LORA_TRANSFORMER_PREFIX
@@ -50,7 +47,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
     title="Denoise - Z-Image",
     tags=["image", "z-image"],
     category="image",
-    version="1.4.0",
+    version="1.2.0",
     classification=Classification.Prototype,
 )
 class ZImageDenoiseInvocation(BaseInvocation):
@@ -69,7 +66,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
     )
     denoising_start: float = InputField(default=0.0, ge=0, le=1, description=FieldDescriptions.denoising_start)
     denoising_end: float = InputField(default=1.0, ge=0, le=1, description=FieldDescriptions.denoising_end)
-    add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
     transformer: TransformerField = InputField(
         description=FieldDescriptions.z_image_model, input=Input.Connection, title="Transformer"
     )
@@ -104,13 +100,6 @@ class ZImageDenoiseInvocation(BaseInvocation):
         description=FieldDescriptions.vae + " Required for control conditioning.",
         input=Input.Connection,
     )
-    # Scheduler selection for the denoising process
-    scheduler: ZIMAGE_SCHEDULER_NAME_VALUES = InputField(
-        default="euler",
-        description="Scheduler (sampler) for the denoising process. Euler is the default and recommended for "
-        "Z-Image-Turbo. Heun is 2nd-order (better quality, 2x slower). LCM is optimized for few steps.",
-        ui_choice_labels=ZIMAGE_SCHEDULER_LABELS,
-    )
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> LatentsOutput:
@@ -348,12 +337,8 @@ class ZImageDenoiseInvocation(BaseInvocation):
         # Prepare input latent image
         if init_latents is not None:
-            if self.add_noise:
-                # Noise the init_latents by the appropriate amount for the first timestep.
-                s_0 = sigmas[0]
-                latents = s_0 * noise + (1.0 - s_0) * init_latents
-            else:
-                latents = init_latents
+            s_0 = sigmas[0]
+            latents = s_0 * noise + (1.0 - s_0) * init_latents
         else:
             if self.denoising_start > 1e-5:
                 raise ValueError("denoising_start should be 0 when initial latents are not provided.")
@@ -376,32 +361,15 @@ class ZImageDenoiseInvocation(BaseInvocation):
             )
         step_callback = self._build_step_callback(context)
-        # Initialize the diffusers scheduler if not using built-in Euler
-        scheduler: SchedulerMixin | None = None
-        use_scheduler = self.scheduler != "euler"
-        if use_scheduler:
-            scheduler_class = ZIMAGE_SCHEDULER_MAP[self.scheduler]
-            scheduler = scheduler_class(
-                num_train_timesteps=1000,
-                shift=1.0,
-            )
-            # Set timesteps - LCM should use num_inference_steps (it has its own sigma schedule),
-            # while other schedulers can use custom sigmas if supported
-            is_lcm = self.scheduler == "lcm"
-            set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
-            if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
-                # Convert sigmas list to tensor for scheduler
-                scheduler.set_timesteps(sigmas=sigmas, device=device)
-            else:
-                # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
-                scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
-            # For Heun scheduler, the number of actual steps may differ
-            num_scheduler_steps = len(scheduler.timesteps)
-        else:
-            num_scheduler_steps = total_steps
+        step_callback(
+            PipelineIntermediateState(
+                step=0,
+                order=1,
+                total_steps=total_steps,
+                timestep=int(sigmas[0] * 1000),
+                latents=latents,
+            ),
+        )
         with ExitStack() as exit_stack:
             # Get transformer config to determine if it's quantized
@@ -535,219 +503,91 @@ class ZImageDenoiseInvocation(BaseInvocation):
                 )
             )
-            # Denoising loop - supports both built-in Euler and diffusers schedulers
-            # Track user-facing step for progress (accounts for Heun's double steps)
-            user_step = 0
-            if use_scheduler and scheduler is not None:
-                # Use diffusers scheduler for stepping
-                # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
-                # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
-                pbar = tqdm(total=total_steps, desc="Denoising")
-                for step_index in range(num_scheduler_steps):
-                    sched_timestep = scheduler.timesteps[step_index]
-                    # Convert scheduler timestep (0-1000) to normalized sigma (0-1)
-                    sigma_curr = sched_timestep.item() / scheduler.config.num_train_timesteps
-                    # For Heun scheduler, track if we're in first or second order step
-                    is_heun = hasattr(scheduler, "state_in_first_order")
-                    in_first_order = scheduler.state_in_first_order if is_heun else True
-                    # Timestep tensor for Z-Image model
-                    # The model expects t=0 at start (noise) and t=1 at end (clean)
-                    model_t = 1.0 - sigma_curr
-                    timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
-                    # Run transformer for positive prediction
-                    latent_model_input = latents.to(transformer.dtype)
-                    latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
-                    latent_model_input_list = list(latent_model_input.unbind(dim=0))
-                    # Determine if control should be applied at this step
-                    apply_control = control_extension is not None and control_extension.should_apply(
-                        user_step, total_steps
+            # Denoising loop
+            for step_idx in tqdm(range(total_steps)):
+                sigma_curr = sigmas[step_idx]
+                sigma_prev = sigmas[step_idx + 1]
+                # Timestep tensor for Z-Image model
+                # The model expects t=0 at start (noise) and t=1 at end (clean)
+                # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
+                model_t = 1.0 - sigma_curr
+                timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
+                # Run transformer for positive prediction
+                # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
+                # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
+                latent_model_input = latents.to(transformer.dtype)
+                latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
+                latent_model_input_list = list(latent_model_input.unbind(dim=0))
+                # Determine if control should be applied at this step
+                apply_control = control_extension is not None and control_extension.should_apply(step_idx, total_steps)
+                # Run forward pass - use custom forward with control if extension is active
+                if apply_control:
+                    model_out_list, _ = z_image_forward_with_control(
+                        transformer=transformer,
+                        x=latent_model_input_list,
+                        t=timestep,
+                        cap_feats=[pos_prompt_embeds],
+                        control_extension=control_extension,
                     )
-                    # Run forward pass
-                    if apply_control:
-                        model_out_list, _ = z_image_forward_with_control(
-                            transformer=transformer,
-                            x=latent_model_input_list,
-                            t=timestep,
-                            cap_feats=[pos_prompt_embeds],
-                            control_extension=control_extension,
-                        )
-                    else:
-                        model_output = transformer(
-                            x=latent_model_input_list,
-                            t=timestep,
-                            cap_feats=[pos_prompt_embeds],
-                        )
-                        model_out_list = model_output[0]
-                    noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
-                    noise_pred_cond = noise_pred_cond.squeeze(2)
-                    noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
-                    # Apply CFG if enabled
-                    if do_classifier_free_guidance and neg_prompt_embeds is not None:
-                        if apply_control:
-                            model_out_list_uncond, _ = z_image_forward_with_control(
-                                transformer=transformer,
-                                x=latent_model_input_list,
-                                t=timestep,
-                                cap_feats=[neg_prompt_embeds],
-                                control_extension=control_extension,
-                            )
-                        else:
-                            model_output_uncond = transformer(
-                                x=latent_model_input_list,
-                                t=timestep,
-                                cap_feats=[neg_prompt_embeds],
-                            )
-                            model_out_list_uncond = model_output_uncond[0]
-                        noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
-                        noise_pred_uncond = noise_pred_uncond.squeeze(2)
-                        noise_pred_uncond = -noise_pred_uncond
-                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
-                    else:
-                        noise_pred = noise_pred_cond
-                    # Use scheduler.step() for the update
-                    step_output = scheduler.step(model_output=noise_pred, timestep=sched_timestep, sample=latents)
-                    latents = step_output.prev_sample
-                    # Get sigma_prev for inpainting (next sigma value)
-                    if step_index + 1 < len(scheduler.sigmas):
-                        sigma_prev = scheduler.sigmas[step_index + 1].item()
-                    else:
-                        sigma_prev = 0.0
-                    if inpaint_extension is not None:
-                        latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
-                    # For Heun, only increment user step after second-order step completes
-                    if is_heun:
-                        if not in_first_order:
-                            user_step += 1
-                            # Only call step_callback if we haven't exceeded total_steps
-                            if user_step <= total_steps:
-                                pbar.update(1)
-                                step_callback(
-                                    PipelineIntermediateState(
-                                        step=user_step,
-                                        order=2,
-                                        total_steps=total_steps,
-                                        timestep=int(sigma_curr * 1000),
-                                        latents=latents,
-                                    ),
-                                )
-                    else:
-                        # For LCM and other first-order schedulers
-                        user_step += 1
-                        # Only call step_callback if we haven't exceeded total_steps
-                        # (LCM scheduler may have more internal steps than user-facing steps)
-                        if user_step <= total_steps:
-                            pbar.update(1)
-                            step_callback(
-                                PipelineIntermediateState(
-                                    step=user_step,
-                                    order=1,
-                                    total_steps=total_steps,
-                                    timestep=int(sigma_curr * 1000),
-                                    latents=latents,
-                                ),
-                            )
-                pbar.close()
-            else:
-                # Original Euler implementation (default, optimized for Z-Image)
-                for step_idx in tqdm(range(total_steps)):
-                    sigma_curr = sigmas[step_idx]
-                    sigma_prev = sigmas[step_idx + 1]
-                    # Timestep tensor for Z-Image model
-                    # The model expects t=0 at start (noise) and t=1 at end (clean)
-                    # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
-                    model_t = 1.0 - sigma_curr
-                    timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
-                    # Run transformer for positive prediction
-                    # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
-                    # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
-                    latent_model_input = latents.to(transformer.dtype)
-                    latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
-                    latent_model_input_list = list(latent_model_input.unbind(dim=0))
-                    # Determine if control should be applied at this step
-                    apply_control = control_extension is not None and control_extension.should_apply(
-                        step_idx, total_steps
+                else:
+                    model_output = transformer(
+                        x=latent_model_input_list,
+                        t=timestep,
+                        cap_feats=[pos_prompt_embeds],
                     )
+                    model_out_list = model_output[0]  # Extract list of tensors from tuple
+                noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
+                noise_pred_cond = noise_pred_cond.squeeze(2)  # Remove frame dimension
+                noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
-                    # Run forward pass - use custom forward with control if extension is active
+                # Apply CFG if enabled
+                if do_classifier_free_guidance and neg_prompt_embeds is not None:
                     if apply_control:
-                        model_out_list, _ = z_image_forward_with_control(
+                        model_out_list_uncond, _ = z_image_forward_with_control(
                             transformer=transformer,
                             x=latent_model_input_list,
                             t=timestep,
-                            cap_feats=[pos_prompt_embeds],
+                            cap_feats=[neg_prompt_embeds],
                             control_extension=control_extension,
                         )
                     else:
-                        model_output = transformer(
+                        model_output_uncond = transformer(
                             x=latent_model_input_list,
                             t=timestep,
-                            cap_feats=[pos_prompt_embeds],
+                            cap_feats=[neg_prompt_embeds],
                         )
-                        model_out_list = model_output[0]  # Extract list of tensors from tuple
-                    noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
-                    noise_pred_cond = noise_pred_cond.squeeze(2)  # Remove frame dimension
-                    noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
-                    # Apply CFG if enabled
-                    if do_classifier_free_guidance and neg_prompt_embeds is not None:
-                        if apply_control:
-                            model_out_list_uncond, _ = z_image_forward_with_control(
-                                transformer=transformer,
-                                x=latent_model_input_list,
-                                t=timestep,
-                                cap_feats=[neg_prompt_embeds],
-                                control_extension=control_extension,
-                            )
-                        else:
-                            model_output_uncond = transformer(
-                                x=latent_model_input_list,
-                                t=timestep,
-                                cap_feats=[neg_prompt_embeds],
-                            )
-                            model_out_list_uncond = model_output_uncond[0]  # Extract list of tensors from tuple
-                        noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
-                        noise_pred_uncond = noise_pred_uncond.squeeze(2)
-                        noise_pred_uncond = -noise_pred_uncond
-                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
-                    else:
-                        noise_pred = noise_pred_cond
-                    # Euler step
-                    latents_dtype = latents.dtype
-                    latents = latents.to(dtype=torch.float32)
-                    latents = latents + (sigma_prev - sigma_curr) * noise_pred
-                    latents = latents.to(dtype=latents_dtype)
-                    if inpaint_extension is not None:
-                        latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
-                    step_callback(
-                        PipelineIntermediateState(
-                            step=step_idx + 1,
-                            order=1,
-                            total_steps=total_steps,
-                            timestep=int(sigma_curr * 1000),
-                            latents=latents,
-                        ),
-                    )
+                        model_out_list_uncond = model_output_uncond[0]  # Extract list of tensors from tuple
+                    noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
+                    noise_pred_uncond = noise_pred_uncond.squeeze(2)
+                    noise_pred_uncond = -noise_pred_uncond
+                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
+                else:
+                    noise_pred = noise_pred_cond
+                # Euler step
+                latents_dtype = latents.dtype
+                latents = latents.to(dtype=torch.float32)
+                latents = latents + (sigma_prev - sigma_curr) * noise_pred
+                latents = latents.to(dtype=latents_dtype)
+                if inpaint_extension is not None:
+                    latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
+                step_callback(
+                    PipelineIntermediateState(
+                        step=step_idx + 1,
+                        order=1,
+                        total_steps=total_steps,
+                        timestep=int(sigma_curr * 1000),
+                        latents=latents,
+                    ),
+                )
         return latents

invokeai/app/services/config/config_default.py CHANGED Viewed

@@ -85,7 +85,6 @@ class InvokeAIAppConfig(BaseSettings):
         max_cache_ram_gb: The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.
         max_cache_vram_gb: The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.
         log_memory_usage: If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.
-        model_cache_keep_alive_min: How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.
         device_working_mem_gb: The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.
         enable_partial_loading: Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.
         keep_ram_copy_of_weights: Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.
@@ -166,10 +165,9 @@ class InvokeAIAppConfig(BaseSettings):
     max_cache_ram_gb:   Optional[float] = Field(default=None, gt=0,         description="The maximum amount of CPU RAM to use for model caching in GB. If unset, the limit will be configured based on the available RAM. In most cases, it is recommended to leave this unset.")
     max_cache_vram_gb:  Optional[float] = Field(default=None, ge=0,         description="The amount of VRAM to use for model caching in GB. If unset, the limit will be configured based on the available VRAM and the device_working_mem_gb. In most cases, it is recommended to leave this unset.")
     log_memory_usage:              bool = Field(default=False,              description="If True, a memory snapshot will be captured before and after every model cache operation, and the result will be logged (at debug level). There is a time cost to capturing the memory snapshots, so it is recommended to only enable this feature if you are actively inspecting the model cache's behaviour.")
-    model_cache_keep_alive_min:   float = Field(default=0, ge=0,            description="How long to keep models in cache after last use, in minutes. A value of 0 (the default) means models are kept in cache indefinitely. If no model generations occur within the timeout period, the model cache is cleared using the same logic as the 'Clear Model Cache' button.")
     device_working_mem_gb:        float = Field(default=3,                  description="The amount of working memory to keep available on the compute device (in GB). Has no effect if running on CPU. If you are experiencing OOM errors, try increasing this value.")
     enable_partial_loading:        bool = Field(default=False,              description="Enable partial loading of models. This enables models to run with reduced VRAM requirements (at the cost of slower speed) by streaming the model from RAM to VRAM as its used. In some edge cases, partial loading can cause models to run more slowly if they were previously being fully loaded into VRAM.")
-    keep_ram_copy_of_weights:      bool = Field(default=True,               description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
+    keep_ram_copy_of_weights:      bool = Field(default=True,              description="Whether to keep a full RAM copy of a model's weights when the model is loaded in VRAM. Keeping a RAM copy increases average RAM usage, but speeds up model switching and LoRA patching (assuming there is sufficient RAM). Set this to False if RAM pressure is consistently high.")
     # Deprecated CACHE configs
     ram:                Optional[float] = Field(default=None, gt=0,         description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_ram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")
     vram:               Optional[float] = Field(default=None, ge=0,         description="DEPRECATED: This setting is no longer used. It has been replaced by `max_cache_vram_gb`, but most users will not need to use this config since automatic cache size limits should work well in most cases. This config setting will be removed once the new model cache behavior is stable.")

invokeai/app/services/model_manager/model_manager_default.py CHANGED Viewed

@@ -60,10 +60,6 @@ class ModelManagerService(ModelManagerServiceBase):
                 service.start(invoker)
     def stop(self, invoker: Invoker) -> None:
-        # Shutdown the model cache to cancel any pending timers
-        if hasattr(self._load, "ram_cache"):
-            self._load.ram_cache.shutdown()
         for service in [self._store, self._install, self._load]:
             if hasattr(service, "stop"):
                 service.stop(invoker)
@@ -92,10 +88,7 @@ class ModelManagerService(ModelManagerServiceBase):
             max_ram_cache_size_gb=app_config.max_cache_ram_gb,
             max_vram_cache_size_gb=app_config.max_cache_vram_gb,
             execution_device=execution_device or TorchDevice.choose_torch_device(),
-            storage_device="cpu",
-            log_memory_usage=app_config.log_memory_usage,
             logger=logger,
-            keep_alive_minutes=app_config.model_cache_keep_alive_min,
         )
         loader = ModelLoadService(
             app_config=app_config,

InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

InvokeAI 6.10.0py3-none-any.whl → 6.10.0rc1py3-none-any.whl