PyPI - InvokeAI - Versions diffs - 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

invokeai/backend/flux/dype/base.py ADDED Viewed

@@ -0,0 +1,226 @@
+"""DyPE base configuration and utilities."""
+import math
+from dataclasses import dataclass
+from typing import Literal
+import torch
+from torch import Tensor
+@dataclass
+class DyPEConfig:
+    """Configuration for Dynamic Position Extrapolation."""
+    enable_dype: bool = True
+    base_resolution: int = 1024  # Native training resolution
+    method: Literal["vision_yarn", "yarn", "ntk", "base"] = "vision_yarn"
+    dype_scale: float = 2.0  # Magnitude λs (0.0-8.0)
+    dype_exponent: float = 2.0  # Decay speed λt (0.0-1000.0)
+    dype_start_sigma: float = 1.0  # When DyPE decay starts
+def get_mscale(scale: float, mscale_factor: float = 1.0) -> float:
+    """Calculate magnitude scaling factor.
+    Args:
+        scale: The resolution scaling factor
+        mscale_factor: Adjustment factor for the scaling
+    Returns:
+        The magnitude scaling factor
+    """
+    if scale <= 1.0:
+        return 1.0
+    return mscale_factor * math.log(scale) + 1.0
+def get_timestep_mscale(
+    scale: float,
+    current_sigma: float,
+    dype_scale: float,
+    dype_exponent: float,
+    dype_start_sigma: float,
+) -> float:
+    """Calculate timestep-dependent magnitude scaling.
+    The key insight of DyPE: early steps focus on low frequencies (global structure),
+    late steps on high frequencies (details). This function modulates the scaling
+    based on the current timestep/sigma.
+    Args:
+        scale: Resolution scaling factor
+        current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
+        dype_scale: DyPE magnitude (λs)
+        dype_exponent: DyPE decay speed (λt)
+        dype_start_sigma: Sigma threshold to start decay
+    Returns:
+        Timestep-modulated scaling factor
+    """
+    if scale <= 1.0:
+        return 1.0
+    # Normalize sigma to [0, 1] range relative to start_sigma
+    if current_sigma >= dype_start_sigma:
+        t_normalized = 1.0
+    else:
+        t_normalized = current_sigma / dype_start_sigma
+    # Apply exponential decay: stronger extrapolation early, weaker late
+    # decay = exp(-λt * (1 - t))  where t=1 is early (high sigma), t=0 is late
+    decay = math.exp(-dype_exponent * (1.0 - t_normalized))
+    # Base mscale from resolution
+    base_mscale = get_mscale(scale)
+    # Interpolate between base_mscale and 1.0 based on decay and dype_scale
+    # When decay=1 (early): use scaled value
+    # When decay=0 (late): use base value
+    scaled_mscale = 1.0 + (base_mscale - 1.0) * dype_scale * decay
+    return scaled_mscale
+def compute_vision_yarn_freqs(
+    pos: Tensor,
+    dim: int,
+    theta: int,
+    scale_h: float,
+    scale_w: float,
+    current_sigma: float,
+    dype_config: DyPEConfig,
+) -> tuple[Tensor, Tensor]:
+    """Compute RoPE frequencies using NTK-aware scaling for high-resolution.
+    This method extends FLUX's position encoding to handle resolutions beyond
+    the 1024px training resolution by scaling the base frequency (theta).
+    The NTK-aware approach smoothly interpolates frequencies to cover larger
+    position ranges without breaking the attention patterns.
+    Args:
+        pos: Position tensor
+        dim: Embedding dimension
+        theta: RoPE base frequency
+        scale_h: Height scaling factor
+        scale_w: Width scaling factor
+        current_sigma: Current noise level (reserved for future timestep-aware scaling)
+        dype_config: DyPE configuration
+    Returns:
+        Tuple of (cos, sin) frequency tensors
+    """
+    assert dim % 2 == 0
+    # Use the larger scale for NTK calculation
+    scale = max(scale_h, scale_w)
+    device = pos.device
+    dtype = torch.float64 if device.type != "mps" else torch.float32
+    # NTK-aware theta scaling: extends position coverage for high-res
+    # Formula: theta_scaled = theta * scale^(dim/(dim-2))
+    # This increases the wavelength of position encodings proportionally
+    if scale > 1.0:
+        ntk_alpha = scale ** (dim / (dim - 2))
+        scaled_theta = theta * ntk_alpha
+    else:
+        scaled_theta = theta
+    # Standard RoPE frequency computation
+    freq_seq = torch.arange(0, dim, 2, dtype=dtype, device=device) / dim
+    freqs = 1.0 / (scaled_theta**freq_seq)
+    # Compute angles = position * frequency
+    angles = torch.einsum("...n,d->...nd", pos.to(dtype), freqs)
+    cos = torch.cos(angles)
+    sin = torch.sin(angles)
+    return cos.to(pos.dtype), sin.to(pos.dtype)
+def compute_yarn_freqs(
+    pos: Tensor,
+    dim: int,
+    theta: int,
+    scale: float,
+    current_sigma: float,
+    dype_config: DyPEConfig,
+) -> tuple[Tensor, Tensor]:
+    """Compute RoPE frequencies using YARN/NTK method.
+    Uses NTK-aware theta scaling for high-resolution support.
+    Args:
+        pos: Position tensor
+        dim: Embedding dimension
+        theta: RoPE base frequency
+        scale: Uniform scaling factor
+        current_sigma: Current noise level (reserved for future use)
+        dype_config: DyPE configuration
+    Returns:
+        Tuple of (cos, sin) frequency tensors
+    """
+    assert dim % 2 == 0
+    device = pos.device
+    dtype = torch.float64 if device.type != "mps" else torch.float32
+    # NTK-aware theta scaling
+    if scale > 1.0:
+        ntk_alpha = scale ** (dim / (dim - 2))
+        scaled_theta = theta * ntk_alpha
+    else:
+        scaled_theta = theta
+    freq_seq = torch.arange(0, dim, 2, dtype=dtype, device=device) / dim
+    freqs = 1.0 / (scaled_theta**freq_seq)
+    angles = torch.einsum("...n,d->...nd", pos.to(dtype), freqs)
+    cos = torch.cos(angles)
+    sin = torch.sin(angles)
+    return cos.to(pos.dtype), sin.to(pos.dtype)
+def compute_ntk_freqs(
+    pos: Tensor,
+    dim: int,
+    theta: int,
+    scale: float,
+) -> tuple[Tensor, Tensor]:
+    """Compute RoPE frequencies using NTK method.
+    Neural Tangent Kernel approach - continuous frequency scaling without
+    timestep dependency.
+    Args:
+        pos: Position tensor
+        dim: Embedding dimension
+        theta: RoPE base frequency
+        scale: Scaling factor
+    Returns:
+        Tuple of (cos, sin) frequency tensors
+    """
+    assert dim % 2 == 0
+    device = pos.device
+    dtype = torch.float64 if device.type != "mps" else torch.float32
+    # NTK scaling
+    scaled_theta = theta * (scale ** (dim / (dim - 2)))
+    freq_seq = torch.arange(0, dim, 2, dtype=dtype, device=device) / dim
+    freqs = 1.0 / (scaled_theta**freq_seq)
+    angles = torch.einsum("...n,d->...nd", pos.to(dtype), freqs)
+    cos = torch.cos(angles)
+    sin = torch.sin(angles)
+    return cos.to(pos.dtype), sin.to(pos.dtype)

invokeai/backend/flux/dype/embed.py ADDED Viewed

@@ -0,0 +1,116 @@
+"""DyPE-enhanced position embedding module."""
+import torch
+from torch import Tensor, nn
+from invokeai.backend.flux.dype.base import DyPEConfig
+from invokeai.backend.flux.dype.rope import rope_dype
+class DyPEEmbedND(nn.Module):
+    """N-dimensional position embedding with DyPE support.
+    This class replaces the standard EmbedND from FLUX with a DyPE-aware version
+    that dynamically scales position embeddings based on resolution and timestep.
+    The key difference from EmbedND:
+    - Maintains step state (current_sigma, target dimensions)
+    - Uses rope_dype() instead of rope() for frequency computation
+    - Applies timestep-dependent scaling for better high-resolution generation
+    """
+    def __init__(
+        self,
+        dim: int,
+        theta: int,
+        axes_dim: list[int],
+        dype_config: DyPEConfig,
+    ):
+        """Initialize DyPE position embedder.
+        Args:
+            dim: Total embedding dimension (sum of axes_dim)
+            theta: RoPE base frequency
+            axes_dim: Dimension allocation per axis (e.g., [16, 56, 56] for FLUX)
+            dype_config: DyPE configuration
+        """
+        super().__init__()
+        self.dim = dim
+        self.theta = theta
+        self.axes_dim = axes_dim
+        self.dype_config = dype_config
+        # Step state - updated before each denoising step
+        self._current_sigma: float = 1.0
+        self._target_height: int = 1024
+        self._target_width: int = 1024
+    def set_step_state(self, sigma: float, height: int, width: int) -> None:
+        """Update the step state before each denoising step.
+        This method should be called by the DyPE extension before each step
+        to update the current noise level and target dimensions.
+        Args:
+            sigma: Current noise level (timestep value, 1.0 = full noise)
+            height: Target image height in pixels
+            width: Target image width in pixels
+        """
+        self._current_sigma = sigma
+        self._target_height = height
+        self._target_width = width
+    def forward(self, ids: Tensor) -> Tensor:
+        """Compute position embeddings with DyPE scaling.
+        Args:
+            ids: Position indices tensor with shape (batch, seq_len, n_axes)
+                 For FLUX: n_axes=3 (time/channel, height, width)
+        Returns:
+            Position embedding tensor with shape (batch, 1, seq_len, dim)
+        """
+        n_axes = ids.shape[-1]
+        # Compute RoPE for each axis with DyPE scaling
+        embeddings = []
+        for i in range(n_axes):
+            axis_emb = rope_dype(
+                pos=ids[..., i],
+                dim=self.axes_dim[i],
+                theta=self.theta,
+                current_sigma=self._current_sigma,
+                target_height=self._target_height,
+                target_width=self._target_width,
+                dype_config=self.dype_config,
+            )
+            embeddings.append(axis_emb)
+        # Concatenate embeddings from all axes
+        emb = torch.cat(embeddings, dim=-3)
+        return emb.unsqueeze(1)
+    @classmethod
+    def from_embednd(
+        cls,
+        embed_nd: nn.Module,
+        dype_config: DyPEConfig,
+    ) -> "DyPEEmbedND":
+        """Create a DyPEEmbedND from an existing EmbedND.
+        This is a convenience method for patching an existing FLUX model.
+        Args:
+            embed_nd: Original EmbedND module from FLUX
+            dype_config: DyPE configuration
+        Returns:
+            New DyPEEmbedND with same parameters
+        """
+        return cls(
+            dim=embed_nd.dim,
+            theta=embed_nd.theta,
+            axes_dim=embed_nd.axes_dim,
+            dype_config=dype_config,
+        )

invokeai/backend/flux/dype/presets.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""DyPE presets and automatic configuration."""
+from dataclasses import dataclass
+from enum import Enum
+from invokeai.backend.flux.dype.base import DyPEConfig
+class DyPEPreset(str, Enum):
+    """Predefined DyPE configurations."""
+    OFF = "off"  # DyPE disabled
+    AUTO = "auto"  # Automatically enable based on resolution
+    PRESET_4K = "4k"  # Optimized for 3840x2160 / 4096x2160
+@dataclass
+class DyPEPresetConfig:
+    """Preset configuration values."""
+    base_resolution: int
+    method: str
+    dype_scale: float
+    dype_exponent: float
+    dype_start_sigma: float
+# Predefined preset configurations
+DYPE_PRESETS: dict[DyPEPreset, DyPEPresetConfig] = {
+    DyPEPreset.PRESET_4K: DyPEPresetConfig(
+        base_resolution=1024,
+        method="vision_yarn",
+        dype_scale=2.0,
+        dype_exponent=2.0,
+        dype_start_sigma=1.0,
+    ),
+}
+def get_dype_config_for_resolution(
+    width: int,
+    height: int,
+    base_resolution: int = 1024,
+    activation_threshold: int = 1536,
+) -> DyPEConfig | None:
+    """Automatically determine DyPE config based on target resolution.
+    FLUX can handle resolutions up to ~1.5x natively without significant artifacts.
+    DyPE is only activated when the resolution exceeds the activation threshold.
+    Args:
+        width: Target image width in pixels
+        height: Target image height in pixels
+        base_resolution: Native training resolution of the model (for scale calculation)
+        activation_threshold: Resolution threshold above which DyPE is activated
+    Returns:
+        DyPEConfig if DyPE should be enabled, None otherwise
+    """
+    max_dim = max(width, height)
+    if max_dim <= activation_threshold:
+        return None  # FLUX can handle this natively
+    # Calculate scaling factor based on base_resolution
+    scale = max_dim / base_resolution
+    # Dynamic parameters based on scaling
+    # Higher resolution = higher dype_scale, capped at 8.0
+    dynamic_dype_scale = min(2.0 * scale, 8.0)
+    return DyPEConfig(
+        enable_dype=True,
+        base_resolution=base_resolution,
+        method="vision_yarn",
+        dype_scale=dynamic_dype_scale,
+        dype_exponent=2.0,
+        dype_start_sigma=1.0,
+    )
+def get_dype_config_from_preset(
+    preset: DyPEPreset,
+    width: int,
+    height: int,
+    custom_scale: float | None = None,
+    custom_exponent: float | None = None,
+) -> DyPEConfig | None:
+    """Get DyPE configuration from a preset or custom values.
+    Args:
+        preset: The DyPE preset to use
+        width: Target image width
+        height: Target image height
+        custom_scale: Optional custom dype_scale (overrides preset)
+        custom_exponent: Optional custom dype_exponent (overrides preset)
+    Returns:
+        DyPEConfig if DyPE should be enabled, None otherwise
+    """
+    if preset == DyPEPreset.OFF:
+        # Check if custom values are provided even with preset=OFF
+        if custom_scale is not None:
+            return DyPEConfig(
+                enable_dype=True,
+                base_resolution=1024,
+                method="vision_yarn",
+                dype_scale=custom_scale,
+                dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
+                dype_start_sigma=1.0,
+            )
+        return None
+    if preset == DyPEPreset.AUTO:
+        config = get_dype_config_for_resolution(
+            width=width,
+            height=height,
+            base_resolution=1024,
+            activation_threshold=1536,
+        )
+        # Apply custom overrides if provided
+        if config is not None:
+            if custom_scale is not None:
+                config.dype_scale = custom_scale
+            if custom_exponent is not None:
+                config.dype_exponent = custom_exponent
+        return config
+    # Use preset configuration
+    preset_config = DYPE_PRESETS.get(preset)
+    if preset_config is None:
+        return None
+    return DyPEConfig(
+        enable_dype=True,
+        base_resolution=preset_config.base_resolution,
+        method=preset_config.method,
+        dype_scale=custom_scale if custom_scale is not None else preset_config.dype_scale,
+        dype_exponent=custom_exponent if custom_exponent is not None else preset_config.dype_exponent,
+        dype_start_sigma=preset_config.dype_start_sigma,
+    )

invokeai/backend/flux/dype/rope.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""DyPE-enhanced RoPE (Rotary Position Embedding) functions."""
+import torch
+from einops import rearrange
+from torch import Tensor
+from invokeai.backend.flux.dype.base import (
+    DyPEConfig,
+    compute_ntk_freqs,
+    compute_vision_yarn_freqs,
+    compute_yarn_freqs,
+)
+def rope_dype(
+    pos: Tensor,
+    dim: int,
+    theta: int,
+    current_sigma: float,
+    target_height: int,
+    target_width: int,
+    dype_config: DyPEConfig,
+) -> Tensor:
+    """Compute RoPE with Dynamic Position Extrapolation.
+    This is the core DyPE function that replaces the standard rope() function.
+    It applies resolution-aware and timestep-aware scaling to position embeddings.
+    Args:
+        pos: Position indices tensor
+        dim: Embedding dimension per axis
+        theta: RoPE base frequency (typically 10000)
+        current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
+        target_height: Target image height in pixels
+        target_width: Target image width in pixels
+        dype_config: DyPE configuration
+    Returns:
+        Rotary position embedding tensor with shape suitable for FLUX attention
+    """
+    assert dim % 2 == 0
+    # Calculate scaling factors
+    base_res = dype_config.base_resolution
+    scale_h = target_height / base_res
+    scale_w = target_width / base_res
+    scale = max(scale_h, scale_w)
+    # If no scaling needed and DyPE disabled, use base method
+    if not dype_config.enable_dype or scale <= 1.0:
+        return _rope_base(pos, dim, theta)
+    # Select method and compute frequencies
+    method = dype_config.method
+    if method == "vision_yarn":
+        cos, sin = compute_vision_yarn_freqs(
+            pos=pos,
+            dim=dim,
+            theta=theta,
+            scale_h=scale_h,
+            scale_w=scale_w,
+            current_sigma=current_sigma,
+            dype_config=dype_config,
+        )
+    elif method == "yarn":
+        cos, sin = compute_yarn_freqs(
+            pos=pos,
+            dim=dim,
+            theta=theta,
+            scale=scale,
+            current_sigma=current_sigma,
+            dype_config=dype_config,
+        )
+    elif method == "ntk":
+        cos, sin = compute_ntk_freqs(
+            pos=pos,
+            dim=dim,
+            theta=theta,
+            scale=scale,
+        )
+    else:  # "base"
+        return _rope_base(pos, dim, theta)
+    # Construct rotation matrix from cos/sin
+    # Output shape: (batch, seq_len, dim/2, 2, 2)
+    out = torch.stack([cos, -sin, sin, cos], dim=-1)
+    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+    return out.to(dtype=pos.dtype, device=pos.device)
+def _rope_base(pos: Tensor, dim: int, theta: int) -> Tensor:
+    """Standard RoPE without DyPE scaling.
+    This matches the original rope() function from invokeai.backend.flux.math.
+    """
+    assert dim % 2 == 0
+    device = pos.device
+    dtype = torch.float64 if device.type != "mps" else torch.float32
+    scale = torch.arange(0, dim, 2, dtype=dtype, device=device) / dim
+    omega = 1.0 / (theta**scale)
+    out = torch.einsum("...n,d->...nd", pos.to(dtype), omega)
+    out = torch.stack([torch.cos(out), -torch.sin(out), torch.sin(out), torch.cos(out)], dim=-1)
+    out = rearrange(out, "b n d (i j) -> b n d i j", i=2, j=2)
+    return out.to(dtype=pos.dtype, device=pos.device)

invokeai/backend/flux/extensions/dype_extension.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""DyPE extension for FLUX denoising pipeline."""
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+from invokeai.backend.flux.dype.base import DyPEConfig
+from invokeai.backend.flux.dype.embed import DyPEEmbedND
+if TYPE_CHECKING:
+    from invokeai.backend.flux.model import Flux
+@dataclass
+class DyPEExtension:
+    """Extension for Dynamic Position Extrapolation in FLUX models.
+    This extension manages the patching of the FLUX model's position embedder
+    and updates the step state during denoising.
+    Usage:
+        1. Create extension with config and target dimensions
+        2. Call patch_model() to replace pe_embedder with DyPE version
+        3. Call update_step_state() before each denoising step
+        4. Call restore_model() after denoising to restore original embedder
+    """
+    config: DyPEConfig
+    target_height: int
+    target_width: int
+    def patch_model(self, model: "Flux") -> tuple[DyPEEmbedND, object]:
+        """Patch the model's position embedder with DyPE version.
+        Args:
+            model: The FLUX model to patch
+        Returns:
+            Tuple of (new DyPE embedder, original embedder for restoration)
+        """
+        original_embedder = model.pe_embedder
+        dype_embedder = DyPEEmbedND.from_embednd(
+            embed_nd=original_embedder,
+            dype_config=self.config,
+        )
+        # Set initial state
+        dype_embedder.set_step_state(
+            sigma=1.0,
+            height=self.target_height,
+            width=self.target_width,
+        )
+        # Replace the embedder
+        model.pe_embedder = dype_embedder
+        return dype_embedder, original_embedder
+    def update_step_state(
+        self,
+        embedder: DyPEEmbedND,
+        timestep: float,
+        timestep_index: int,
+        total_steps: int,
+    ) -> None:
+        """Update the step state in the DyPE embedder.
+        This should be called before each denoising step to update the
+        current noise level for timestep-dependent scaling.
+        Args:
+            embedder: The DyPE embedder to update
+            timestep: Current timestep value (sigma/noise level)
+            timestep_index: Current step index (0-based)
+            total_steps: Total number of denoising steps
+        """
+        embedder.set_step_state(
+            sigma=timestep,
+            height=self.target_height,
+            width=self.target_width,
+        )
+    @staticmethod
+    def restore_model(model: "Flux", original_embedder: object) -> None:
+        """Restore the original position embedder.
+        Args:
+            model: The FLUX model to restore
+            original_embedder: The original embedder saved from patch_model()
+        """
+        model.pe_embedder = original_embedder

InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl