PyPI - InvokeAI - Versions diffs - 6.11.0rc1__py3-none-any.whl → 6.11.1__py3-none-any.whl - Mend

InvokeAI 6.11.0rc1py3-none-any.whl → 6.11.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

invokeai/backend/flux2/denoise.py CHANGED Viewed

@@ -4,6 +4,7 @@ This module provides the denoising function for FLUX.2 Klein models,
 which use Qwen3 as the text encoder instead of CLIP+T5.
 """
+import inspect
 import math
 from typing import Any, Callable
@@ -87,11 +88,18 @@ def denoise(
         # The scheduler will apply dynamic shifting internally using mu (if enabled in scheduler config)
         sigmas = np.array(timesteps[:-1], dtype=np.float32)  # Exclude final 0.0
-        # Pass mu if provided - it will only be used if scheduler has use_dynamic_shifting=True
-        if mu is not None:
+        # Check if scheduler supports sigmas parameter using inspect.signature
+        # FlowMatchHeunDiscreteScheduler and FlowMatchLCMScheduler don't support sigmas
+        set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
+        supports_sigmas = "sigmas" in set_timesteps_sig.parameters
+        if supports_sigmas and mu is not None:
+            # Pass mu if provided - it will only be used if scheduler has use_dynamic_shifting=True
             scheduler.set_timesteps(sigmas=sigmas.tolist(), mu=mu, device=img.device)
-        else:
+        elif supports_sigmas:
             scheduler.set_timesteps(sigmas=sigmas.tolist(), device=img.device)
+        else:
+            # Scheduler doesn't support sigmas (e.g., Heun, LCM) - use num_inference_steps
+            scheduler.set_timesteps(num_inference_steps=len(sigmas), device=img.device)
         num_scheduler_steps = len(scheduler.timesteps)
         is_heun = hasattr(scheduler, "state_in_first_order")
         user_step = 0
@@ -152,7 +160,15 @@ def denoise(
             # Apply inpainting merge at each step
             if inpaint_extension is not None:
-                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+                # Separate the generated latents from the reference conditioning
+                gen_img = img[:, :original_seq_len, :]
+                ref_img = img[:, original_seq_len:, :]
+                # Merge only the generated part
+                gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
+                # Concatenate back together
+                img = torch.cat([gen_img, ref_img], dim=1)
             # For Heun, only increment user step after second-order step completes
             if is_heun:
@@ -239,8 +255,19 @@ def denoise(
             # Apply inpainting merge at each step
             if inpaint_extension is not None:
-                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
+                # Separate the generated latents from the reference conditioning
+                gen_img = img[:, :original_seq_len, :]
+                ref_img = img[:, original_seq_len:, :]
+                # Merge only the generated part
+                gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
+                # Concatenate back together
+                img = torch.cat([gen_img, ref_img], dim=1)
+                # Handling preview images
+                preview_gen = preview_img[:, :original_seq_len, :]
+                preview_gen = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_gen, 0.0)
             # Extract only the generated image portion for preview (exclude reference images)
             callback_latents = preview_img[:, :original_seq_len, :] if img_cond_seq is not None else preview_img

invokeai/backend/flux2/sampling_utils.py CHANGED Viewed

@@ -108,33 +108,27 @@ def unpack_flux2(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
 def compute_empirical_mu(image_seq_len: int, num_steps: int) -> float:
-    """Compute empirical mu for FLUX.2 schedule shifting.
+    """Compute mu for FLUX.2 schedule shifting.
-    This matches the diffusers Flux2Pipeline implementation.
-    The mu value controls how much the schedule is shifted towards higher timesteps.
+    Uses a fixed mu value of 2.02, matching ComfyUI's proven FLUX.2 configuration.
+    The previous implementation (from diffusers' FLUX.1 pipeline) computed mu as a
+    linear function of image_seq_len, which produced excessively high values at
+    high resolutions (e.g., mu=3.23 at 2048x2048). This over-shifted the sigma
+    schedule, compressing almost all values above 0.9 and forcing the model to
+    denoise everything in the final 1-2 steps, causing severe grid/diamond artifacts.
+    ComfyUI uses a fixed shift=2.02 for FLUX.2 Klein at all resolutions and produces
+    artifact-free images even at 2048x2048.
     Args:
-        image_seq_len: Number of image tokens (packed_h * packed_w).
-        num_steps: Number of denoising steps.
+        image_seq_len: Number of image tokens (packed_h * packed_w). Currently unused.
+        num_steps: Number of denoising steps. Currently unused.
     Returns:
-        The empirical mu value.
+        The mu value (fixed at 2.02).
     """
-    a1, b1 = 8.73809524e-05, 1.89833333
-    a2, b2 = 0.00016927, 0.45666666
-    if image_seq_len > 4300:
-        mu = a2 * image_seq_len + b2
-        return float(mu)
-    m_200 = a2 * image_seq_len + b2
-    m_10 = a1 * image_seq_len + b1
-    a = (m_200 - m_10) / 190.0
-    b = m_200 - 200.0 * a
-    mu = a * num_steps + b
-    return float(mu)
+    return 2.02
 def get_schedule_flux2(
@@ -169,11 +163,14 @@ def get_schedule_flux2(
 def generate_img_ids_flux2(h: int, w: int, batch_size: int, device: torch.device) -> torch.Tensor:
-    """Generate tensor of image position ids for FLUX.2.
+    """Generate tensor of image position ids for FLUX.2 with RoPE scaling.
     FLUX.2 uses 4D position coordinates (T, H, W, L) for its rotary position embeddings.
     This is different from FLUX.1 which uses 3D coordinates.
+    RoPE Scaling: For resolutions >1536x1536, position IDs are scaled down using
+    Position Interpolation to prevent RoPE degradation and diamond/grid artifacts.
     IMPORTANT: Position IDs must use int64 (long) dtype like diffusers, not bfloat16.
     Using floating point dtype for position IDs can cause NaN in rotary embeddings.

InvokeAI 6.11.0rc1__py3-none-any.whl → 6.11.1__py3-none-any.whl

InvokeAI 6.11.0rc1py3-none-any.whl → 6.11.1py3-none-any.whl