PyPI - InvokeAI - Versions diffs - 6.11.0__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl - Mend

InvokeAI 6.11.0py3-none-any.whl → 6.11.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

invokeai/app/invocations/flux_denoise.py CHANGED Viewed

@@ -32,12 +32,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
 from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
 from invokeai.backend.flux.denoise import denoise
-from invokeai.backend.flux.dype.presets import (
-    DYPE_PRESET_LABELS,
-    DYPE_PRESET_OFF,
-    DyPEPreset,
-    get_dype_config_from_preset,
-)
+from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_from_preset
 from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
@@ -71,7 +66,7 @@ from invokeai.backend.util.devices import TorchDevice
     title="FLUX Denoise",
     tags=["image", "flux"],
     category="image",
-    version="4.5.0",
+    version="4.3.0",
 )
 class FluxDenoiseInvocation(BaseInvocation):
     """Run denoising process with a FLUX transformer model."""
@@ -175,24 +170,20 @@ class FluxDenoiseInvocation(BaseInvocation):
     # DyPE (Dynamic Position Extrapolation) for high-resolution generation
     dype_preset: DyPEPreset = InputField(
-        default=DYPE_PRESET_OFF,
+        default=DyPEPreset.OFF,
         description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
-        ui_order=100,
-        ui_choice_labels=DYPE_PRESET_LABELS,
     )
     dype_scale: Optional[float] = InputField(
         default=None,
         ge=0.0,
         le=8.0,
         description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
-        ui_order=101,
     )
     dype_exponent: Optional[float] = InputField(
         default=None,
         ge=0.0,
         le=1000.0,
         description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
-        ui_order=102,
     )
     @torch.no_grad()
@@ -473,13 +464,9 @@ class FluxDenoiseInvocation(BaseInvocation):
                     target_width=self.width,
                 )
                 context.logger.info(
-                    f"DyPE enabled: resolution={self.width}x{self.height}, preset={self.dype_preset}, "
-                    f"method={dype_config.method}, scale={dype_config.dype_scale:.2f}, "
-                    f"exponent={dype_config.dype_exponent:.2f}, start_sigma={dype_config.dype_start_sigma:.2f}, "
-                    f"base_resolution={dype_config.base_resolution}"
+                    f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset.value}, "
+                    f"scale={dype_config.dype_scale:.2f}, method={dype_config.method}"
                 )
-            else:
-                context.logger.debug(f"DyPE disabled: resolution={self.width}x{self.height}, preset={self.dype_preset}")
             x = denoise(
                 model=transformer,

invokeai/app/invocations/flux_model_loader.py CHANGED Viewed

@@ -6,7 +6,7 @@ from invokeai.app.invocations.baseinvocation import (
     invocation,
     invocation_output,
 )
-from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
+from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField
 from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
 from invokeai.app.services.shared.invocation_context import InvocationContext
 from invokeai.app.util.t5_model_identifier import (
@@ -37,25 +37,28 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
     title="Main Model - FLUX",
     tags=["model", "flux"],
     category="model",
-    version="1.0.7",
+    version="1.0.6",
 )
 class FluxModelLoaderInvocation(BaseInvocation):
     """Loads a flux base model, outputting its submodels."""
     model: ModelIdentifierField = InputField(
         description=FieldDescriptions.flux_model,
+        input=Input.Direct,
         ui_model_base=BaseModelType.Flux,
         ui_model_type=ModelType.Main,
     )
     t5_encoder_model: ModelIdentifierField = InputField(
         description=FieldDescriptions.t5_encoder,
+        input=Input.Direct,
         title="T5 Encoder",
         ui_model_type=ModelType.T5Encoder,
     )
     clip_embed_model: ModelIdentifierField = InputField(
         description=FieldDescriptions.clip_embed_model,
+        input=Input.Direct,
         title="CLIP Embed",
         ui_model_type=ModelType.CLIPEmbed,
     )

invokeai/app/util/step_callback.py CHANGED Viewed

@@ -93,60 +93,54 @@ COGVIEW4_LATENT_RGB_FACTORS = [
     [-0.00955853, -0.00980067, -0.00977842],
 ]
-# FLUX.2 uses 32 latent channels.
-# Factors from ComfyUI: https://github.com/Comfy-Org/ComfyUI/blob/main/comfy/latent_formats.py
+# FLUX.2 uses 32 latent channels. Since we don't have proper factors yet,
+# we extend FLUX factors with zeros for preview approximation.
 FLUX2_LATENT_RGB_FACTORS = [
     #   R        G        B
-    [0.0058, 0.0113, 0.0073],
-    [0.0495, 0.0443, 0.0836],
-    [-0.0099, 0.0096, 0.0644],
-    [0.2144, 0.3009, 0.3652],
-    [0.0166, -0.0039, -0.0054],
-    [0.0157, 0.0103, -0.0160],
-    [-0.0398, 0.0902, -0.0235],
-    [-0.0052, 0.0095, 0.0109],
-    [-0.3527, -0.2712, -0.1666],
-    [-0.0301, -0.0356, -0.0180],
-    [-0.0107, 0.0078, 0.0013],
-    [0.0746, 0.0090, -0.0941],
-    [0.0156, 0.0169, 0.0070],
-    [-0.0034, -0.0040, -0.0114],
-    [0.0032, 0.0181, 0.0080],
-    [-0.0939, -0.0008, 0.0186],
-    [0.0018, 0.0043, 0.0104],
-    [0.0284, 0.0056, -0.0127],
-    [-0.0024, -0.0022, -0.0030],
-    [0.1207, -0.0026, 0.0065],
-    [0.0128, 0.0101, 0.0142],
-    [0.0137, -0.0072, -0.0007],
-    [0.0095, 0.0092, -0.0059],
-    [0.0000, -0.0077, -0.0049],
-    [-0.0465, -0.0204, -0.0312],
-    [0.0095, 0.0012, -0.0066],
-    [0.0290, -0.0034, 0.0025],
-    [0.0220, 0.0169, -0.0048],
-    [-0.0332, -0.0457, -0.0468],
-    [-0.0085, 0.0389, 0.0609],
-    [-0.0076, 0.0003, -0.0043],
-    [-0.0111, -0.0460, -0.0614],
+    # First 16 channels (from FLUX)
+    [0.0118, 0.0024, 0.0017],
+    [-0.0074, -0.0108, -0.0003],
+    [0.0056, 0.0291, 0.0768],
+    [0.0342, -0.0681, -0.0427],
+    [-0.0258, 0.0092, 0.0463],
+    [0.0863, 0.0784, 0.0547],
+    [-0.0017, 0.0402, 0.0158],
+    [0.0501, 0.1058, 0.1152],
+    [-0.0209, -0.0218, -0.0329],
+    [-0.0314, 0.0083, 0.0896],
+    [0.0851, 0.0665, -0.0472],
+    [-0.0534, 0.0238, -0.0024],
+    [0.0452, -0.0026, 0.0048],
+    [0.0892, 0.0831, 0.0881],
+    [-0.1117, -0.0304, -0.0789],
+    [0.0027, -0.0479, -0.0043],
+    # Additional 16 channels (zeros as placeholder)
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
 ]
-FLUX2_LATENT_RGB_BIAS = [-0.0329, -0.0718, -0.0851]
 def sample_to_lowres_estimated_image(
-    samples: torch.Tensor,
-    latent_rgb_factors: torch.Tensor,
-    smooth_matrix: Optional[torch.Tensor] = None,
-    latent_rgb_bias: Optional[torch.Tensor] = None,
+    samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
 ):
     if samples.dim() == 4:
         samples = samples[0]
     latent_image = samples.permute(1, 2, 0) @ latent_rgb_factors
-    if latent_rgb_bias is not None:
-        latent_image = latent_image + latent_rgb_bias
     if smooth_matrix is not None:
         latent_image = latent_image.unsqueeze(0).permute(3, 0, 1, 2)
         latent_image = torch.nn.functional.conv2d(latent_image, smooth_matrix.reshape((1, 1, 3, 3)), padding=1)
@@ -199,7 +193,6 @@ def diffusion_step_callback(
         sample = intermediate_state.latents
     smooth_matrix: list[list[float]] | None = None
-    latent_rgb_bias: list[float] | None = None
     if base_model in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
         latent_rgb_factors = SD1_5_LATENT_RGB_FACTORS
     elif base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
@@ -213,7 +206,6 @@ def diffusion_step_callback(
         latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
     elif base_model == BaseModelType.Flux2:
         latent_rgb_factors = FLUX2_LATENT_RGB_FACTORS
-        latent_rgb_bias = FLUX2_LATENT_RGB_BIAS
     elif base_model == BaseModelType.ZImage:
         # Z-Image uses FLUX-compatible VAE with 16 latent channels
         latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
@@ -224,14 +216,8 @@ def diffusion_step_callback(
     smooth_matrix_torch = (
         torch.tensor(smooth_matrix, dtype=sample.dtype, device=sample.device) if smooth_matrix else None
     )
-    latent_rgb_bias_torch = (
-        torch.tensor(latent_rgb_bias, dtype=sample.dtype, device=sample.device) if latent_rgb_bias else None
-    )
     image = sample_to_lowres_estimated_image(
-        samples=sample,
-        latent_rgb_factors=latent_rgb_factors_torch,
-        smooth_matrix=smooth_matrix_torch,
-        latent_rgb_bias=latent_rgb_bias_torch,
+        samples=sample, latent_rgb_factors=latent_rgb_factors_torch, smooth_matrix=smooth_matrix_torch
     )
     width = image.width * 8

invokeai/backend/flux/dype/__init__.py CHANGED Viewed

@@ -8,24 +8,11 @@ Based on: https://github.com/wildminder/ComfyUI-DyPE
 from invokeai.backend.flux.dype.base import DyPEConfig
 from invokeai.backend.flux.dype.embed import DyPEEmbedND
-from invokeai.backend.flux.dype.presets import (
-    DYPE_PRESET_4K,
-    DYPE_PRESET_AUTO,
-    DYPE_PRESET_LABELS,
-    DYPE_PRESET_MANUAL,
-    DYPE_PRESET_OFF,
-    DyPEPreset,
-    get_dype_config_for_resolution,
-)
+from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_for_resolution
 __all__ = [
     "DyPEConfig",
     "DyPEEmbedND",
     "DyPEPreset",
-    "DYPE_PRESET_OFF",
-    "DYPE_PRESET_MANUAL",
-    "DYPE_PRESET_AUTO",
-    "DYPE_PRESET_4K",
-    "DYPE_PRESET_LABELS",
     "get_dype_config_for_resolution",
 ]

invokeai/backend/flux/dype/base.py CHANGED Viewed

@@ -99,17 +99,13 @@ def compute_vision_yarn_freqs(
     The NTK-aware approach smoothly interpolates frequencies to cover larger
     position ranges without breaking the attention patterns.
-    DyPE (Dynamic Position Extrapolation) modulates the NTK scaling based on
-    the current timestep - stronger extrapolation in early steps (global structure),
-    weaker in late steps (fine details).
     Args:
         pos: Position tensor
         dim: Embedding dimension
         theta: RoPE base frequency
         scale_h: Height scaling factor
         scale_w: Width scaling factor
-        current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
+        current_sigma: Current noise level (reserved for future timestep-aware scaling)
         dype_config: DyPE configuration
     Returns:
@@ -128,24 +124,7 @@ def compute_vision_yarn_freqs(
     # This increases the wavelength of position encodings proportionally
     if scale > 1.0:
         ntk_alpha = scale ** (dim / (dim - 2))
-        # Apply timestep-dependent DyPE modulation
-        # mscale controls how strongly we apply the NTK extrapolation
-        # Early steps (high sigma): stronger extrapolation for global structure
-        # Late steps (low sigma): weaker extrapolation for fine details
-        mscale = get_timestep_mscale(
-            scale=scale,
-            current_sigma=current_sigma,
-            dype_scale=dype_config.dype_scale,
-            dype_exponent=dype_config.dype_exponent,
-            dype_start_sigma=dype_config.dype_start_sigma,
-        )
-        # Modulate NTK alpha by mscale
-        # When mscale > 1: interpolate towards stronger extrapolation
-        # When mscale = 1: use base NTK alpha
-        modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
-        scaled_theta = theta * modulated_alpha
+        scaled_theta = theta * ntk_alpha
     else:
         scaled_theta = theta
@@ -172,15 +151,14 @@ def compute_yarn_freqs(
 ) -> tuple[Tensor, Tensor]:
     """Compute RoPE frequencies using YARN/NTK method.
-    Uses NTK-aware theta scaling for high-resolution support with
-    timestep-dependent DyPE modulation.
+    Uses NTK-aware theta scaling for high-resolution support.
     Args:
         pos: Position tensor
         dim: Embedding dimension
         theta: RoPE base frequency
         scale: Uniform scaling factor
-        current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
+        current_sigma: Current noise level (reserved for future use)
         dype_config: DyPE configuration
     Returns:
@@ -191,22 +169,10 @@ def compute_yarn_freqs(
     device = pos.device
     dtype = torch.float64 if device.type != "mps" else torch.float32
-    # NTK-aware theta scaling with DyPE modulation
+    # NTK-aware theta scaling
     if scale > 1.0:
         ntk_alpha = scale ** (dim / (dim - 2))
-        # Apply timestep-dependent DyPE modulation
-        mscale = get_timestep_mscale(
-            scale=scale,
-            current_sigma=current_sigma,
-            dype_scale=dype_config.dype_scale,
-            dype_exponent=dype_config.dype_exponent,
-            dype_start_sigma=dype_config.dype_start_sigma,
-        )
-        # Modulate NTK alpha by mscale
-        modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
-        scaled_theta = theta * modulated_alpha
+        scaled_theta = theta * ntk_alpha
     else:
         scaled_theta = theta

invokeai/backend/flux/dype/presets.py CHANGED Viewed

@@ -1,26 +1,17 @@
 """DyPE presets and automatic configuration."""
 from dataclasses import dataclass
-from typing import Literal
+from enum import Enum
 from invokeai.backend.flux.dype.base import DyPEConfig
-# DyPE preset type - using Literal for proper frontend dropdown support
-DyPEPreset = Literal["off", "manual", "auto", "4k"]
-# Constants for preset values
-DYPE_PRESET_OFF: DyPEPreset = "off"
-DYPE_PRESET_MANUAL: DyPEPreset = "manual"
-DYPE_PRESET_AUTO: DyPEPreset = "auto"
-DYPE_PRESET_4K: DyPEPreset = "4k"
-# Human-readable labels for the UI
-DYPE_PRESET_LABELS: dict[str, str] = {
-    "off": "Off",
-    "manual": "Manual",
-    "auto": "Auto (>1536px)",
-    "4k": "4K Optimized",
-}
+class DyPEPreset(str, Enum):
+    """Predefined DyPE configurations."""
+    OFF = "off"  # DyPE disabled
+    AUTO = "auto"  # Automatically enable based on resolution
+    PRESET_4K = "4k"  # Optimized for 3840x2160 / 4096x2160
 @dataclass
@@ -36,7 +27,7 @@ class DyPEPresetConfig:
 # Predefined preset configurations
 DYPE_PRESETS: dict[DyPEPreset, DyPEPresetConfig] = {
-    DYPE_PRESET_4K: DyPEPresetConfig(
+    DyPEPreset.PRESET_4K: DyPEPresetConfig(
         base_resolution=1024,
         method="vision_yarn",
         dype_scale=2.0,
@@ -101,39 +92,41 @@ def get_dype_config_from_preset(
         preset: The DyPE preset to use
         width: Target image width
         height: Target image height
-        custom_scale: Optional custom dype_scale (only used with 'manual' preset)
-        custom_exponent: Optional custom dype_exponent (only used with 'manual' preset)
+        custom_scale: Optional custom dype_scale (overrides preset)
+        custom_exponent: Optional custom dype_exponent (overrides preset)
     Returns:
         DyPEConfig if DyPE should be enabled, None otherwise
     """
-    if preset == DYPE_PRESET_OFF:
+    if preset == DyPEPreset.OFF:
+        # Check if custom values are provided even with preset=OFF
+        if custom_scale is not None:
+            return DyPEConfig(
+                enable_dype=True,
+                base_resolution=1024,
+                method="vision_yarn",
+                dype_scale=custom_scale,
+                dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
+                dype_start_sigma=1.0,
+            )
         return None
-    if preset == DYPE_PRESET_MANUAL:
-        # Manual mode - custom values can override defaults
-        max_dim = max(width, height)
-        scale = max_dim / 1024
-        dynamic_dype_scale = min(2.0 * scale, 8.0)
-        return DyPEConfig(
-            enable_dype=True,
-            base_resolution=1024,
-            method="vision_yarn",
-            dype_scale=custom_scale if custom_scale is not None else dynamic_dype_scale,
-            dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
-            dype_start_sigma=1.0,
-        )
-    if preset == DYPE_PRESET_AUTO:
-        # Auto preset - custom values are ignored
-        return get_dype_config_for_resolution(
+    if preset == DyPEPreset.AUTO:
+        config = get_dype_config_for_resolution(
             width=width,
             height=height,
             base_resolution=1024,
             activation_threshold=1536,
         )
-    # Use preset configuration (4K etc.) - custom values are ignored
+        # Apply custom overrides if provided
+        if config is not None:
+            if custom_scale is not None:
+                config.dype_scale = custom_scale
+            if custom_exponent is not None:
+                config.dype_exponent = custom_exponent
+        return config
+    # Use preset configuration
     preset_config = DYPE_PRESETS.get(preset)
     if preset_config is None:
         return None
@@ -142,7 +135,7 @@ def get_dype_config_from_preset(
         enable_dype=True,
         base_resolution=preset_config.base_resolution,
         method=preset_config.method,
-        dype_scale=preset_config.dype_scale,
-        dype_exponent=preset_config.dype_exponent,
+        dype_scale=custom_scale if custom_scale is not None else preset_config.dype_scale,
+        dype_exponent=custom_exponent if custom_exponent is not None else preset_config.dype_exponent,
         dype_start_sigma=preset_config.dype_start_sigma,
     )

invokeai/backend/flux2/denoise.py CHANGED Viewed

@@ -152,15 +152,7 @@ def denoise(
             # Apply inpainting merge at each step
             if inpaint_extension is not None:
-                # Separate the generated latents from the reference conditioning
-                gen_img = img[:, :original_seq_len, :]
-                ref_img = img[:, original_seq_len:, :]
-                # Merge only the generated part
-                gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
-                # Concatenate back together
-                img = torch.cat([gen_img, ref_img], dim=1)
+                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
             # For Heun, only increment user step after second-order step completes
             if is_heun:
@@ -247,19 +239,8 @@ def denoise(
             # Apply inpainting merge at each step
             if inpaint_extension is not None:
-                # Separate the generated latents from the reference conditioning
-                gen_img = img[:, :original_seq_len, :]
-                ref_img = img[:, original_seq_len:, :]
-                # Merge only the generated part
-                gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
-                # Concatenate back together
-                img = torch.cat([gen_img, ref_img], dim=1)
-                # Handling preview images
-                preview_gen = preview_img[:, :original_seq_len, :]
-                preview_gen = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_gen, 0.0)
+                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
             # Extract only the generated image portion for preview (exclude reference images)
             callback_latents = preview_img[:, :original_seq_len, :] if img_cond_seq is not None else preview_img

InvokeAI 6.11.0__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

InvokeAI 6.11.0py3-none-any.whl → 6.11.0rc1py3-none-any.whl