PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

invokeai/app/api/routers/model_manager.py +43 -1
invokeai/app/invocations/fields.py +1 -1
invokeai/app/invocations/flux2_denoise.py +499 -0
invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
invokeai/app/invocations/flux2_vae_decode.py +106 -0
invokeai/app/invocations/flux2_vae_encode.py +88 -0
invokeai/app/invocations/flux_denoise.py +77 -3
invokeai/app/invocations/flux_lora_loader.py +1 -1
invokeai/app/invocations/flux_model_loader.py +2 -5
invokeai/app/invocations/ideal_size.py +6 -1
invokeai/app/invocations/metadata.py +4 -0
invokeai/app/invocations/metadata_linked.py +47 -0
invokeai/app/invocations/model.py +1 -0
invokeai/app/invocations/pbr_maps.py +59 -0
invokeai/app/invocations/z_image_denoise.py +244 -84
invokeai/app/invocations/z_image_image_to_latents.py +9 -1
invokeai/app/invocations/z_image_latents_to_image.py +9 -1
invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
invokeai/app/services/config/config_default.py +3 -1
invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
invokeai/app/services/model_manager/model_manager_default.py +7 -0
invokeai/app/services/model_records/model_records_base.py +4 -2
invokeai/app/services/shared/invocation_context.py +15 -0
invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
invokeai/app/util/step_callback.py +58 -2
invokeai/backend/flux/denoise.py +338 -118
invokeai/backend/flux/dype/__init__.py +31 -0
invokeai/backend/flux/dype/base.py +260 -0
invokeai/backend/flux/dype/embed.py +116 -0
invokeai/backend/flux/dype/presets.py +148 -0
invokeai/backend/flux/dype/rope.py +110 -0
invokeai/backend/flux/extensions/dype_extension.py +91 -0
invokeai/backend/flux/schedulers.py +62 -0
invokeai/backend/flux/util.py +35 -1
invokeai/backend/flux2/__init__.py +4 -0
invokeai/backend/flux2/denoise.py +280 -0
invokeai/backend/flux2/ref_image_extension.py +294 -0
invokeai/backend/flux2/sampling_utils.py +209 -0
invokeai/backend/image_util/pbr_maps/architecture/block.py +367 -0
invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +70 -0
invokeai/backend/image_util/pbr_maps/pbr_maps.py +141 -0
invokeai/backend/image_util/pbr_maps/utils/image_ops.py +93 -0
invokeai/backend/model_manager/configs/factory.py +19 -1
invokeai/backend/model_manager/configs/lora.py +36 -0
invokeai/backend/model_manager/configs/main.py +395 -3
invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
invokeai/backend/model_manager/configs/vae.py +104 -2
invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
invokeai/backend/model_manager/load/model_loaders/cogview4.py +2 -1
invokeai/backend/model_manager/load/model_loaders/flux.py +1020 -8
invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +4 -2
invokeai/backend/model_manager/load/model_loaders/onnx.py +1 -0
invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +2 -1
invokeai/backend/model_manager/load/model_loaders/z_image.py +158 -31
invokeai/backend/model_manager/starter_models.py +141 -4
invokeai/backend/model_manager/taxonomy.py +31 -4
invokeai/backend/model_manager/util/select_hf_files.py +3 -2
invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +39 -5
invokeai/backend/quantization/gguf/ggml_tensor.py +15 -4
invokeai/backend/util/vae_working_memory.py +0 -2
invokeai/backend/z_image/extensions/regional_prompting_extension.py +10 -12
invokeai/frontend/web/dist/assets/App-D13dX7be.js +161 -0
invokeai/frontend/web/dist/assets/{browser-ponyfill-DHZxq1nk.js → browser-ponyfill-u_ZjhQTI.js} +1 -1
invokeai/frontend/web/dist/assets/index-BB0nHmDe.js +530 -0
invokeai/frontend/web/dist/index.html +1 -1
invokeai/frontend/web/dist/locales/en-GB.json +1 -0
invokeai/frontend/web/dist/locales/en.json +85 -6
invokeai/frontend/web/dist/locales/it.json +135 -15
invokeai/frontend/web/dist/locales/ru.json +11 -11
invokeai/version/invokeai_version.py +1 -1
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/METADATA +8 -2
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/RECORD +81 -57
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/WHEEL +1 -1
invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +0 -161
invokeai/frontend/web/dist/assets/index-dgSJAY--.js +0 -530
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/entry_points.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/top_level.txt +0 -0

invokeai/app/api/routers/model_manager.py CHANGED Viewed

@@ -219,7 +219,16 @@ async def reidentify_model(
         result = ModelConfigFactory.from_model_on_disk(mod)
         if result.config is None:
             raise InvalidModelException("Unable to identify model format")
-        result.config.key = config.key  # retain the same key
+        # Retain user-editable fields from the original config
+        result.config.key = config.key
+        result.config.name = config.name
+        result.config.description = config.description
+        result.config.cover_image = config.cover_image
+        result.config.trigger_phrases = config.trigger_phrases
+        result.config.source = config.source
+        result.config.source_type = config.source_type
         new_config = ApiDependencies.invoker.services.model_manager.store.replace_model(config.key, result.config)
         return new_config
     except UnknownModelException as e:
@@ -905,15 +914,48 @@ class StarterModelResponse(BaseModel):
 def get_is_installed(
     starter_model: StarterModel | StarterModelWithoutDependencies, installed_models: list[AnyModelConfig]
 ) -> bool:
+    from invokeai.backend.model_manager.taxonomy import ModelType
     for model in installed_models:
+        # Check if source matches exactly
         if model.source == starter_model.source:
             return True
+        # Check if name (or previous names), base and type match
         if (
             (model.name == starter_model.name or model.name in starter_model.previous_names)
             and model.base == starter_model.base
             and model.type == starter_model.type
         ):
             return True
+    # Special handling for Qwen3Encoder models - check by type and variant
+    # This allows renamed models to still be detected as installed
+    if starter_model.type == ModelType.Qwen3Encoder:
+        from invokeai.backend.model_manager.taxonomy import Qwen3VariantType
+        # Determine expected variant from source pattern
+        expected_variant: Qwen3VariantType | None = None
+        if "klein-9B" in starter_model.source or "qwen3_8b" in starter_model.source.lower():
+            expected_variant = Qwen3VariantType.Qwen3_8B
+        elif (
+            "klein-4B" in starter_model.source
+            or "qwen3_4b" in starter_model.source.lower()
+            or "Z-Image" in starter_model.source
+        ):
+            expected_variant = Qwen3VariantType.Qwen3_4B
+        if expected_variant is not None:
+            for model in installed_models:
+                if model.type == ModelType.Qwen3Encoder and hasattr(model, "variant"):
+                    model_variant = model.variant
+                    # Handle both enum and string values
+                    if isinstance(model_variant, Qwen3VariantType):
+                        if model_variant == expected_variant:
+                            return True
+                    elif isinstance(model_variant, str):
+                        if model_variant == expected_variant.value:
+                            return True
     return False

invokeai/app/invocations/fields.py CHANGED Viewed

@@ -532,7 +532,7 @@ def migrate_model_ui_type(ui_type: UIType | str, json_schema_extra: dict[str, An
         case UIType.VAEModel:
             ui_model_type = [ModelType.VAE]
         case UIType.FluxVAEModel:
-            ui_model_base = [BaseModelType.Flux]
+            ui_model_base = [BaseModelType.Flux, BaseModelType.Flux2]
             ui_model_type = [ModelType.VAE]
         case UIType.LoRAModel:
             ui_model_type = [ModelType.LoRA]

invokeai/app/invocations/flux2_denoise.py ADDED Viewed

@@ -0,0 +1,499 @@
+"""Flux2 Klein Denoise Invocation.
+Run denoising process with a FLUX.2 Klein transformer model.
+Uses Qwen3 conditioning instead of CLIP+T5.
+"""
+from contextlib import ExitStack
+from typing import Callable, Iterator, Optional, Tuple
+import torch
+import torchvision.transforms as tv_transforms
+from torchvision.transforms.functional import resize as tv_resize
+from invokeai.app.invocations.baseinvocation import BaseInvocation, Classification, invocation
+from invokeai.app.invocations.fields import (
+    DenoiseMaskField,
+    FieldDescriptions,
+    FluxConditioningField,
+    FluxKontextConditioningField,
+    Input,
+    InputField,
+    LatentsField,
+)
+from invokeai.app.invocations.model import TransformerField, VAEField
+from invokeai.app.invocations.primitives import LatentsOutput
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.sampling_utils import clip_timestep_schedule_fractional
+from invokeai.backend.flux.schedulers import FLUX_SCHEDULER_LABELS, FLUX_SCHEDULER_MAP, FLUX_SCHEDULER_NAME_VALUES
+from invokeai.backend.flux2.denoise import denoise
+from invokeai.backend.flux2.ref_image_extension import Flux2RefImageExtension
+from invokeai.backend.flux2.sampling_utils import (
+    compute_empirical_mu,
+    generate_img_ids_flux2,
+    get_noise_flux2,
+    get_schedule_flux2,
+    pack_flux2,
+    unpack_flux2,
+)
+from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType
+from invokeai.backend.patches.layer_patcher import LayerPatcher
+from invokeai.backend.patches.lora_conversions.flux_lora_constants import FLUX_LORA_TRANSFORMER_PREFIX
+from invokeai.backend.patches.model_patch_raw import ModelPatchRaw
+from invokeai.backend.rectified_flow.rectified_flow_inpaint_extension import RectifiedFlowInpaintExtension
+from invokeai.backend.stable_diffusion.diffusers_pipeline import PipelineIntermediateState
+from invokeai.backend.stable_diffusion.diffusion.conditioning_data import FLUXConditioningInfo
+from invokeai.backend.util.devices import TorchDevice
+@invocation(
+    "flux2_denoise",
+    title="FLUX2 Denoise",
+    tags=["image", "flux", "flux2", "klein", "denoise"],
+    category="image",
+    version="1.3.0",
+    classification=Classification.Prototype,
+)
+class Flux2DenoiseInvocation(BaseInvocation):
+    """Run denoising process with a FLUX.2 Klein transformer model.
+    This node is designed for FLUX.2 Klein models which use Qwen3 as the text encoder.
+    It does not support ControlNet, IP-Adapters, or regional prompting.
+    """
+    latents: Optional[LatentsField] = InputField(
+        default=None,
+        description=FieldDescriptions.latents,
+        input=Input.Connection,
+    )
+    denoise_mask: Optional[DenoiseMaskField] = InputField(
+        default=None,
+        description=FieldDescriptions.denoise_mask,
+        input=Input.Connection,
+    )
+    denoising_start: float = InputField(
+        default=0.0,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_start,
+    )
+    denoising_end: float = InputField(
+        default=1.0,
+        ge=0,
+        le=1,
+        description=FieldDescriptions.denoising_end,
+    )
+    add_noise: bool = InputField(default=True, description="Add noise based on denoising start.")
+    transformer: TransformerField = InputField(
+        description=FieldDescriptions.flux_model,
+        input=Input.Connection,
+        title="Transformer",
+    )
+    positive_text_conditioning: FluxConditioningField = InputField(
+        description=FieldDescriptions.positive_cond,
+        input=Input.Connection,
+    )
+    negative_text_conditioning: Optional[FluxConditioningField] = InputField(
+        default=None,
+        description="Negative conditioning tensor. Can be None if cfg_scale is 1.0.",
+        input=Input.Connection,
+    )
+    cfg_scale: float = InputField(
+        default=1.0,
+        description=FieldDescriptions.cfg_scale,
+        title="CFG Scale",
+    )
+    width: int = InputField(default=1024, multiple_of=16, description="Width of the generated image.")
+    height: int = InputField(default=1024, multiple_of=16, description="Height of the generated image.")
+    num_steps: int = InputField(
+        default=4,
+        description="Number of diffusion steps. Use 4 for distilled models, 28+ for base models.",
+    )
+    scheduler: FLUX_SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description="Scheduler (sampler) for the denoising process. 'euler' is fast and standard. "
+        "'heun' is 2nd-order (better quality, 2x slower). 'lcm' is optimized for few steps.",
+        ui_choice_labels=FLUX_SCHEDULER_LABELS,
+    )
+    seed: int = InputField(default=0, description="Randomness seed for reproducibility.")
+    vae: VAEField = InputField(
+        description="FLUX.2 VAE model (required for BN statistics).",
+        input=Input.Connection,
+    )
+    kontext_conditioning: FluxKontextConditioningField | list[FluxKontextConditioningField] | None = InputField(
+        default=None,
+        description="FLUX Kontext conditioning (reference images for multi-reference image editing).",
+        input=Input.Connection,
+        title="Reference Images",
+    )
+    def _get_bn_stats(self, context: InvocationContext) -> Optional[Tuple[torch.Tensor, torch.Tensor]]:
+        """Extract BN statistics from the FLUX.2 VAE.
+        The FLUX.2 VAE uses batch normalization on the patchified 128-channel representation.
+        IMPORTANT: BFL FLUX.2 VAE uses affine=False, so there are NO learnable weight/bias.
+        BN formula (affine=False): y = (x - mean) / std
+        Inverse: x = y * std + mean
+        Returns:
+            Tuple of (bn_mean, bn_std) tensors of shape (128,), or None if BN layer not found.
+        """
+        with context.models.load(self.vae.vae).model_on_device() as (_, vae):
+            # Ensure VAE is in eval mode to prevent BN stats from being updated
+            vae.eval()
+            # Try to find the BN layer - it may be at different locations depending on model format
+            bn_layer = None
+            if hasattr(vae, "bn"):
+                bn_layer = vae.bn
+            elif hasattr(vae, "batch_norm"):
+                bn_layer = vae.batch_norm
+            elif hasattr(vae, "encoder") and hasattr(vae.encoder, "bn"):
+                bn_layer = vae.encoder.bn
+            if bn_layer is None:
+                return None
+            # Verify running statistics are initialized
+            if bn_layer.running_mean is None or bn_layer.running_var is None:
+                return None
+            # Get BN running statistics from VAE
+            bn_mean = bn_layer.running_mean.clone()  # Shape: (128,)
+            bn_var = bn_layer.running_var.clone()  # Shape: (128,)
+            bn_eps = bn_layer.eps if hasattr(bn_layer, "eps") else 1e-4  # BFL uses 1e-4
+            bn_std = torch.sqrt(bn_var + bn_eps)
+        return bn_mean, bn_std
+    def _bn_normalize(
+        self,
+        x: torch.Tensor,
+        bn_mean: torch.Tensor,
+        bn_std: torch.Tensor,
+    ) -> torch.Tensor:
+        """Apply BN normalization to packed latents.
+        BN formula (affine=False): y = (x - mean) / std
+        Args:
+            x: Packed latents of shape (B, seq, 128).
+            bn_mean: BN running mean of shape (128,).
+            bn_std: BN running std of shape (128,).
+        Returns:
+            Normalized latents of same shape.
+        """
+        # x: (B, seq, 128), params: (128,) -> broadcast over batch and sequence dims
+        bn_mean = bn_mean.to(x.device, x.dtype)
+        bn_std = bn_std.to(x.device, x.dtype)
+        return (x - bn_mean) / bn_std
+    def _bn_denormalize(
+        self,
+        x: torch.Tensor,
+        bn_mean: torch.Tensor,
+        bn_std: torch.Tensor,
+    ) -> torch.Tensor:
+        """Apply BN denormalization to packed latents (inverse of normalization).
+        Inverse BN (affine=False): x = y * std + mean
+        Args:
+            x: Packed latents of shape (B, seq, 128).
+            bn_mean: BN running mean of shape (128,).
+            bn_std: BN running std of shape (128,).
+        Returns:
+            Denormalized latents of same shape.
+        """
+        # x: (B, seq, 128), params: (128,) -> broadcast over batch and sequence dims
+        bn_mean = bn_mean.to(x.device, x.dtype)
+        bn_std = bn_std.to(x.device, x.dtype)
+        return x * bn_std + bn_mean
+    @torch.no_grad()
+    def invoke(self, context: InvocationContext) -> LatentsOutput:
+        latents = self._run_diffusion(context)
+        latents = latents.detach().to("cpu")
+        name = context.tensors.save(tensor=latents)
+        return LatentsOutput.build(latents_name=name, latents=latents, seed=None)
+    def _run_diffusion(self, context: InvocationContext) -> torch.Tensor:
+        inference_dtype = torch.bfloat16
+        device = TorchDevice.choose_torch_device()
+        # Get BN statistics from VAE for latent denormalization (optional)
+        # BFL FLUX.2 VAE uses affine=False, so only mean/std are needed
+        # Some VAE formats (e.g. diffusers) may not expose BN stats directly
+        bn_stats = self._get_bn_stats(context)
+        bn_mean, bn_std = bn_stats if bn_stats is not None else (None, None)
+        # Load the input latents, if provided
+        init_latents = context.tensors.load(self.latents.latents_name) if self.latents else None
+        if init_latents is not None:
+            init_latents = init_latents.to(device=device, dtype=inference_dtype)
+        # Prepare input noise (FLUX.2 uses 32 channels)
+        noise = get_noise_flux2(
+            num_samples=1,
+            height=self.height,
+            width=self.width,
+            device=device,
+            dtype=inference_dtype,
+            seed=self.seed,
+        )
+        b, _c, latent_h, latent_w = noise.shape
+        packed_h = latent_h // 2
+        packed_w = latent_w // 2
+        # Load the conditioning data
+        pos_cond_data = context.conditioning.load(self.positive_text_conditioning.conditioning_name)
+        assert len(pos_cond_data.conditionings) == 1
+        pos_flux_conditioning = pos_cond_data.conditionings[0]
+        assert isinstance(pos_flux_conditioning, FLUXConditioningInfo)
+        pos_flux_conditioning = pos_flux_conditioning.to(dtype=inference_dtype, device=device)
+        # Qwen3 stacked embeddings (stored in t5_embeds field for compatibility)
+        txt = pos_flux_conditioning.t5_embeds
+        # Generate text position IDs (4D format for FLUX.2: T, H, W, L)
+        # FLUX.2 uses 4D position coordinates for its rotary position embeddings
+        # IMPORTANT: Position IDs must be int64 (long) dtype
+        # Diffusers uses: T=0, H=0, W=0, L=0..seq_len-1
+        seq_len = txt.shape[1]
+        txt_ids = torch.zeros(1, seq_len, 4, device=device, dtype=torch.long)
+        txt_ids[..., 3] = torch.arange(seq_len, device=device, dtype=torch.long)  # L coordinate varies
+        # Load negative conditioning if provided
+        neg_txt = None
+        neg_txt_ids = None
+        if self.negative_text_conditioning is not None:
+            neg_cond_data = context.conditioning.load(self.negative_text_conditioning.conditioning_name)
+            assert len(neg_cond_data.conditionings) == 1
+            neg_flux_conditioning = neg_cond_data.conditionings[0]
+            assert isinstance(neg_flux_conditioning, FLUXConditioningInfo)
+            neg_flux_conditioning = neg_flux_conditioning.to(dtype=inference_dtype, device=device)
+            neg_txt = neg_flux_conditioning.t5_embeds
+            # For text tokens: T=0, H=0, W=0, L=0..seq_len-1 (only L varies per token)
+            neg_seq_len = neg_txt.shape[1]
+            neg_txt_ids = torch.zeros(1, neg_seq_len, 4, device=device, dtype=torch.long)
+            neg_txt_ids[..., 3] = torch.arange(neg_seq_len, device=device, dtype=torch.long)
+        # Validate transformer config
+        transformer_config = context.models.get_config(self.transformer.transformer)
+        assert transformer_config.base == BaseModelType.Flux2 and transformer_config.type == ModelType.Main
+        # Calculate the timestep schedule using FLUX.2 specific schedule
+        # This matches diffusers' Flux2Pipeline implementation
+        # Note: Schedule shifting is handled by the scheduler via mu parameter
+        image_seq_len = packed_h * packed_w
+        timesteps = get_schedule_flux2(
+            num_steps=self.num_steps,
+            image_seq_len=image_seq_len,
+        )
+        # Compute mu for dynamic schedule shifting (used by FlowMatchEulerDiscreteScheduler)
+        mu = compute_empirical_mu(image_seq_len=image_seq_len, num_steps=self.num_steps)
+        # Clip the timesteps schedule based on denoising_start and denoising_end
+        timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
+        # Prepare input latent image
+        if init_latents is not None:
+            if self.add_noise:
+                t_0 = timesteps[0]
+                x = t_0 * noise + (1.0 - t_0) * init_latents
+            else:
+                x = init_latents
+        else:
+            if self.denoising_start > 1e-5:
+                raise ValueError("denoising_start should be 0 when initial latents are not provided.")
+            x = noise
+        # If len(timesteps) == 1, then short-circuit
+        if len(timesteps) <= 1:
+            return x
+        # Generate image position IDs (FLUX.2 uses 4D coordinates)
+        # Position IDs use int64 dtype like diffusers
+        img_ids = generate_img_ids_flux2(h=latent_h, w=latent_w, batch_size=b, device=device)
+        # Prepare inpaint mask
+        inpaint_mask = self._prep_inpaint_mask(context, x)
+        # Pack all latent tensors
+        init_latents_packed = pack_flux2(init_latents) if init_latents is not None else None
+        inpaint_mask_packed = pack_flux2(inpaint_mask) if inpaint_mask is not None else None
+        noise_packed = pack_flux2(noise)
+        x = pack_flux2(x)
+        # Apply BN normalization BEFORE denoising (as per diffusers Flux2KleinPipeline)
+        # BN normalization: y = (x - mean) / std
+        # This transforms latents to normalized space for the transformer
+        # IMPORTANT: Also normalize init_latents and noise for inpainting to maintain consistency
+        if bn_mean is not None and bn_std is not None:
+            x = self._bn_normalize(x, bn_mean, bn_std)
+            if init_latents_packed is not None:
+                init_latents_packed = self._bn_normalize(init_latents_packed, bn_mean, bn_std)
+            noise_packed = self._bn_normalize(noise_packed, bn_mean, bn_std)
+        # Verify packed dimensions
+        assert packed_h * packed_w == x.shape[1]
+        # Prepare inpaint extension
+        inpaint_extension: Optional[RectifiedFlowInpaintExtension] = None
+        if inpaint_mask_packed is not None:
+            assert init_latents_packed is not None
+            inpaint_extension = RectifiedFlowInpaintExtension(
+                init_latents=init_latents_packed,
+                inpaint_mask=inpaint_mask_packed,
+                noise=noise_packed,
+            )
+        # Prepare CFG scale list
+        num_steps = len(timesteps) - 1
+        cfg_scale_list = [self.cfg_scale] * num_steps
+        # Check if we're doing inpainting (have a mask or a clipped schedule)
+        is_inpainting = self.denoise_mask is not None or self.denoising_start > 1e-5
+        # Create scheduler with FLUX.2 Klein configuration
+        # For inpainting/img2img, use manual Euler stepping to preserve the exact timestep schedule
+        # For txt2img, use the scheduler with dynamic shifting for optimal results
+        scheduler = None
+        if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
+            # Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
+            scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
+            scheduler = scheduler_class(
+                num_train_timesteps=1000,
+                shift=3.0,
+                use_dynamic_shifting=True,
+                base_shift=0.5,
+                max_shift=1.15,
+                base_image_seq_len=256,
+                max_image_seq_len=4096,
+                time_shift_type="exponential",
+            )
+        # Prepare reference image extension for FLUX.2 Klein built-in editing
+        ref_image_extension = None
+        if self.kontext_conditioning:
+            ref_image_extension = Flux2RefImageExtension(
+                context=context,
+                ref_image_conditioning=self.kontext_conditioning
+                if isinstance(self.kontext_conditioning, list)
+                else [self.kontext_conditioning],
+                vae_field=self.vae,
+                device=device,
+                dtype=inference_dtype,
+                bn_mean=bn_mean,
+                bn_std=bn_std,
+            )
+        with ExitStack() as exit_stack:
+            # Load the transformer model
+            (cached_weights, transformer) = exit_stack.enter_context(
+                context.models.load(self.transformer.transformer).model_on_device()
+            )
+            config = transformer_config
+            # Determine if the model is quantized
+            if config.format in [ModelFormat.Diffusers]:
+                model_is_quantized = False
+            elif config.format in [
+                ModelFormat.BnbQuantizedLlmInt8b,
+                ModelFormat.BnbQuantizednf4b,
+                ModelFormat.GGUFQuantized,
+            ]:
+                model_is_quantized = True
+            else:
+                model_is_quantized = False
+            # Apply LoRA models to the transformer
+            exit_stack.enter_context(
+                LayerPatcher.apply_smart_model_patches(
+                    model=transformer,
+                    patches=self._lora_iterator(context),
+                    prefix=FLUX_LORA_TRANSFORMER_PREFIX,
+                    dtype=inference_dtype,
+                    cached_weights=cached_weights,
+                    force_sidecar_patching=model_is_quantized,
+                )
+            )
+            # Prepare reference image conditioning if provided
+            img_cond_seq = None
+            img_cond_seq_ids = None
+            if ref_image_extension is not None:
+                # Ensure batch sizes match
+                ref_image_extension.ensure_batch_size(x.shape[0])
+                img_cond_seq, img_cond_seq_ids = (
+                    ref_image_extension.ref_image_latents,
+                    ref_image_extension.ref_image_ids,
+                )
+            x = denoise(
+                model=transformer,
+                img=x,
+                img_ids=img_ids,
+                txt=txt,
+                txt_ids=txt_ids,
+                timesteps=timesteps,
+                step_callback=self._build_step_callback(context),
+                cfg_scale=cfg_scale_list,
+                neg_txt=neg_txt,
+                neg_txt_ids=neg_txt_ids,
+                scheduler=scheduler,
+                mu=mu,
+                inpaint_extension=inpaint_extension,
+                img_cond_seq=img_cond_seq,
+                img_cond_seq_ids=img_cond_seq_ids,
+            )
+        # Apply BN denormalization if BN stats are available
+        # The diffusers Flux2KleinPipeline applies: latents = latents * bn_std + bn_mean
+        # This transforms latents from normalized space to VAE's expected input space
+        if bn_mean is not None and bn_std is not None:
+            x = self._bn_denormalize(x, bn_mean, bn_std)
+        x = unpack_flux2(x.float(), self.height, self.width)
+        return x
+    def _prep_inpaint_mask(self, context: InvocationContext, latents: torch.Tensor) -> Optional[torch.Tensor]:
+        """Prepare the inpaint mask."""
+        if self.denoise_mask is None:
+            return None
+        mask = context.tensors.load(self.denoise_mask.mask_name)
+        mask = 1.0 - mask
+        _, _, latent_height, latent_width = latents.shape
+        mask = tv_resize(
+            img=mask,
+            size=[latent_height, latent_width],
+            interpolation=tv_transforms.InterpolationMode.BILINEAR,
+            antialias=False,
+        )
+        mask = mask.to(device=latents.device, dtype=latents.dtype)
+        return mask.expand_as(latents)
+    def _lora_iterator(self, context: InvocationContext) -> Iterator[Tuple[ModelPatchRaw, float]]:
+        """Iterate over LoRA models to apply."""
+        for lora in self.transformer.loras:
+            lora_info = context.models.load(lora.lora)
+            assert isinstance(lora_info.model, ModelPatchRaw)
+            yield (lora_info.model, lora.weight)
+            del lora_info
+    def _build_step_callback(self, context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
+        """Build a callback for step progress updates."""
+        def step_callback(state: PipelineIntermediateState) -> None:
+            latents = state.latents.float()
+            state.latents = unpack_flux2(latents, self.height, self.width).squeeze()
+            context.util.flux2_step_callback(state)
+        return step_callback

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl