PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

invokeai/app/api/routers/model_manager.py +43 -1
invokeai/app/invocations/fields.py +1 -1
invokeai/app/invocations/flux2_denoise.py +499 -0
invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
invokeai/app/invocations/flux2_vae_decode.py +106 -0
invokeai/app/invocations/flux2_vae_encode.py +88 -0
invokeai/app/invocations/flux_denoise.py +77 -3
invokeai/app/invocations/flux_lora_loader.py +1 -1
invokeai/app/invocations/flux_model_loader.py +2 -5
invokeai/app/invocations/ideal_size.py +6 -1
invokeai/app/invocations/metadata.py +4 -0
invokeai/app/invocations/metadata_linked.py +47 -0
invokeai/app/invocations/model.py +1 -0
invokeai/app/invocations/pbr_maps.py +59 -0
invokeai/app/invocations/z_image_denoise.py +244 -84
invokeai/app/invocations/z_image_image_to_latents.py +9 -1
invokeai/app/invocations/z_image_latents_to_image.py +9 -1
invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
invokeai/app/services/config/config_default.py +3 -1
invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
invokeai/app/services/model_manager/model_manager_default.py +7 -0
invokeai/app/services/model_records/model_records_base.py +4 -2
invokeai/app/services/shared/invocation_context.py +15 -0
invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
invokeai/app/util/step_callback.py +58 -2
invokeai/backend/flux/denoise.py +338 -118
invokeai/backend/flux/dype/__init__.py +31 -0
invokeai/backend/flux/dype/base.py +260 -0
invokeai/backend/flux/dype/embed.py +116 -0
invokeai/backend/flux/dype/presets.py +148 -0
invokeai/backend/flux/dype/rope.py +110 -0
invokeai/backend/flux/extensions/dype_extension.py +91 -0
invokeai/backend/flux/schedulers.py +62 -0
invokeai/backend/flux/util.py +35 -1
invokeai/backend/flux2/__init__.py +4 -0
invokeai/backend/flux2/denoise.py +280 -0
invokeai/backend/flux2/ref_image_extension.py +294 -0
invokeai/backend/flux2/sampling_utils.py +209 -0
invokeai/backend/image_util/pbr_maps/architecture/block.py +367 -0
invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +70 -0
invokeai/backend/image_util/pbr_maps/pbr_maps.py +141 -0
invokeai/backend/image_util/pbr_maps/utils/image_ops.py +93 -0
invokeai/backend/model_manager/configs/factory.py +19 -1
invokeai/backend/model_manager/configs/lora.py +36 -0
invokeai/backend/model_manager/configs/main.py +395 -3
invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
invokeai/backend/model_manager/configs/vae.py +104 -2
invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
invokeai/backend/model_manager/load/model_loaders/cogview4.py +2 -1
invokeai/backend/model_manager/load/model_loaders/flux.py +1020 -8
invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +4 -2
invokeai/backend/model_manager/load/model_loaders/onnx.py +1 -0
invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +2 -1
invokeai/backend/model_manager/load/model_loaders/z_image.py +158 -31
invokeai/backend/model_manager/starter_models.py +141 -4
invokeai/backend/model_manager/taxonomy.py +31 -4
invokeai/backend/model_manager/util/select_hf_files.py +3 -2
invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +39 -5
invokeai/backend/quantization/gguf/ggml_tensor.py +15 -4
invokeai/backend/util/vae_working_memory.py +0 -2
invokeai/backend/z_image/extensions/regional_prompting_extension.py +10 -12
invokeai/frontend/web/dist/assets/App-D13dX7be.js +161 -0
invokeai/frontend/web/dist/assets/{browser-ponyfill-DHZxq1nk.js → browser-ponyfill-u_ZjhQTI.js} +1 -1
invokeai/frontend/web/dist/assets/index-BB0nHmDe.js +530 -0
invokeai/frontend/web/dist/index.html +1 -1
invokeai/frontend/web/dist/locales/en-GB.json +1 -0
invokeai/frontend/web/dist/locales/en.json +85 -6
invokeai/frontend/web/dist/locales/it.json +135 -15
invokeai/frontend/web/dist/locales/ru.json +11 -11
invokeai/version/invokeai_version.py +1 -1
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/METADATA +8 -2
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/RECORD +81 -57
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/WHEEL +1 -1
invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +0 -161
invokeai/frontend/web/dist/assets/index-dgSJAY--.js +0 -530
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/entry_points.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/top_level.txt +0 -0

invokeai/backend/flux/denoise.py CHANGED Viewed

@@ -1,10 +1,13 @@
+import inspect
 import math
 from typing import Callable
 import torch
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from tqdm import tqdm
 from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
+from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
@@ -35,149 +38,366 @@ def denoise(
     # extra img tokens (sequence-wise) - for Kontext conditioning
     img_cond_seq: torch.Tensor | None = None,
     img_cond_seq_ids: torch.Tensor | None = None,
+    # DyPE extension for high-resolution generation
+    dype_extension: DyPEExtension | None = None,
+    # Optional scheduler for alternative sampling methods
+    scheduler: SchedulerMixin | None = None,
 ):
-    # step 0 is the initial state
-    total_steps = len(timesteps) - 1
-    step_callback(
-        PipelineIntermediateState(
-            step=0,
-            order=1,
-            total_steps=total_steps,
-            timestep=int(timesteps[0]),
-            latents=img,
-        ),
-    )
+    # Determine if we're using a diffusers scheduler or the built-in Euler method
+    use_scheduler = scheduler is not None
+    if use_scheduler:
+        # Initialize scheduler with timesteps
+        # The timesteps list contains values in [0, 1] range (sigmas)
+        # LCM should use num_inference_steps (it has its own sigma schedule),
+        # while other schedulers can use custom sigmas if supported
+        is_lcm = scheduler.__class__.__name__ == "FlowMatchLCMScheduler"
+        set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
+        if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
+            # Scheduler supports custom sigmas - use InvokeAI's time-shifted schedule
+            scheduler.set_timesteps(sigmas=timesteps, device=img.device)
+        else:
+            # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
+            # The schedule will be computed by the scheduler itself
+            num_inference_steps = len(timesteps) - 1
+            scheduler.set_timesteps(num_inference_steps=num_inference_steps, device=img.device)
+        # For schedulers like Heun, the number of actual steps may differ
+        # (Heun doubles timesteps internally)
+        num_scheduler_steps = len(scheduler.timesteps)
+        # For user-facing step count, use the original number of denoising steps
+        total_steps = len(timesteps) - 1
+    else:
+        total_steps = len(timesteps) - 1
+        num_scheduler_steps = total_steps
     # guidance_vec is ignored for schnell.
     guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
     # Store original sequence length for slicing predictions
     original_seq_len = img.shape[1]
-    for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
-        t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+    # DyPE: Patch model with DyPE-aware position embedder
+    dype_embedder = None
+    original_pe_embedder = None
+    if dype_extension is not None:
+        dype_embedder, original_pe_embedder = dype_extension.patch_model(model)
-        # Run ControlNet models.
-        controlnet_residuals: list[ControlNetFluxOutput] = []
-        for controlnet_extension in controlnet_extensions:
-            controlnet_residuals.append(
-                controlnet_extension.run_controlnet(
-                    timestep_index=step_index,
-                    total_num_timesteps=total_steps,
-                    img=img,
-                    img_ids=img_ids,
+    try:
+        # Track the actual step for user-facing progress (accounts for Heun's double steps)
+        user_step = 0
+        if use_scheduler:
+            # Use diffusers scheduler for stepping
+            # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
+            # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
+            pbar = tqdm(total=total_steps, desc="Denoising")
+            for step_index in range(num_scheduler_steps):
+                timestep = scheduler.timesteps[step_index]
+                # Convert scheduler timestep (0-1000) to normalized (0-1) for the model
+                t_curr = timestep.item() / scheduler.config.num_train_timesteps
+                t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+                # DyPE: Update step state for timestep-dependent scaling
+                if dype_extension is not None and dype_embedder is not None:
+                    dype_extension.update_step_state(
+                        embedder=dype_embedder,
+                        timestep=t_curr,
+                        timestep_index=user_step,
+                        total_steps=total_steps,
+                    )
+                # For Heun scheduler, track if we're in first or second order step
+                is_heun = hasattr(scheduler, "state_in_first_order")
+                in_first_order = scheduler.state_in_first_order if is_heun else True
+                # Run ControlNet models
+                controlnet_residuals: list[ControlNetFluxOutput] = []
+                for controlnet_extension in controlnet_extensions:
+                    controlnet_residuals.append(
+                        controlnet_extension.run_controlnet(
+                            timestep_index=user_step,
+                            total_num_timesteps=total_steps,
+                            img=img,
+                            img_ids=img_ids,
+                            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                            y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                            timesteps=t_vec,
+                            guidance=guidance_vec,
+                        )
+                    )
+                merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
+                # Prepare input for model
+                img_input = img
+                img_input_ids = img_ids
+                if img_cond is not None:
+                    img_input = torch.cat((img_input, img_cond), dim=-1)
+                if img_cond_seq is not None:
+                    assert img_cond_seq_ids is not None
+                    img_input = torch.cat((img_input, img_cond_seq), dim=1)
+                    img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
+                pred = model(
+                    img=img_input,
+                    img_ids=img_input_ids,
                     txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
                     txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
                     y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
                     timesteps=t_vec,
                     guidance=guidance_vec,
+                    timestep_index=user_step,
+                    total_num_timesteps=total_steps,
+                    controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
+                    controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
+                    ip_adapter_extensions=pos_ip_adapter_extensions,
+                    regional_prompting_extension=pos_regional_prompting_extension,
                 )
-            )
-        # Merge the ControlNet residuals from multiple ControlNets.
-        # TODO(ryand): We may want to calculate the sum just-in-time to keep peak memory low. Keep in mind, that the
-        # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
-        # tensors. Calculating the sum materializes each tensor into its own instance.
-        merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
+                if img_cond_seq is not None:
+                    pred = pred[:, :original_seq_len]
-        # Prepare input for model - concatenate fresh each step
-        img_input = img
-        img_input_ids = img_ids
+                # Get CFG scale for current user step
+                step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
-        # Add channel-wise conditioning (for ControlNet, FLUX Fill, etc.)
-        if img_cond is not None:
-            img_input = torch.cat((img_input, img_cond), dim=-1)
+                if not math.isclose(step_cfg_scale, 1.0):
+                    if neg_regional_prompting_extension is None:
+                        raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
-        # Add sequence-wise conditioning (for Kontext)
-        if img_cond_seq is not None:
-            assert img_cond_seq_ids is not None, (
-                "You need to provide either both or neither of the sequence conditioning"
-            )
-            img_input = torch.cat((img_input, img_cond_seq), dim=1)
-            img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
-        pred = model(
-            img=img_input,
-            img_ids=img_input_ids,
-            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-            y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-            timesteps=t_vec,
-            guidance=guidance_vec,
-            timestep_index=step_index,
-            total_num_timesteps=total_steps,
-            controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
-            controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
-            ip_adapter_extensions=pos_ip_adapter_extensions,
-            regional_prompting_extension=pos_regional_prompting_extension,
-        )
-        # Slice prediction to only include the main image tokens
-        if img_cond_seq is not None:
-            pred = pred[:, :original_seq_len]
-        step_cfg_scale = cfg_scale[step_index]
-        # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
-        if not math.isclose(step_cfg_scale, 1.0):
-            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
-            # on systems with sufficient VRAM.
-            if neg_regional_prompting_extension is None:
-                raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
-            # For negative prediction with Kontext, we need to include the reference images
-            # to maintain consistency between positive and negative passes. Without this,
-            # CFG would create artifacts as the attention mechanism would see different
-            # spatial structures in each pass
-            neg_img_input = img
-            neg_img_input_ids = img_ids
-            # Add channel-wise conditioning for negative pass if present
+                    neg_img_input = img
+                    neg_img_input_ids = img_ids
+                    if img_cond is not None:
+                        neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
+                    if img_cond_seq is not None:
+                        neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
+                        neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
+                    neg_pred = model(
+                        img=neg_img_input,
+                        img_ids=neg_img_input_ids,
+                        txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                        txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                        y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                        timesteps=t_vec,
+                        guidance=guidance_vec,
+                        timestep_index=user_step,
+                        total_num_timesteps=total_steps,
+                        controlnet_double_block_residuals=None,
+                        controlnet_single_block_residuals=None,
+                        ip_adapter_extensions=neg_ip_adapter_extensions,
+                        regional_prompting_extension=neg_regional_prompting_extension,
+                    )
+                    if img_cond_seq is not None:
+                        neg_pred = neg_pred[:, :original_seq_len]
+                    pred = neg_pred + step_cfg_scale * (pred - neg_pred)
+                # Use scheduler.step() for the update
+                step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
+                img = step_output.prev_sample
+                # Get t_prev for inpainting (next sigma value)
+                if step_index + 1 < len(scheduler.sigmas):
+                    t_prev = scheduler.sigmas[step_index + 1].item()
+                else:
+                    t_prev = 0.0
+                if inpaint_extension is not None:
+                    img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+                # For Heun, only increment user step after second-order step completes
+                if is_heun:
+                    if not in_first_order:
+                        # Second order step completed
+                        user_step += 1
+                        # Only call step_callback if we haven't exceeded total_steps
+                        if user_step <= total_steps:
+                            pbar.update(1)
+                            preview_img = img - t_curr * pred
+                            if inpaint_extension is not None:
+                                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
+                                    preview_img, 0.0
+                                )
+                            step_callback(
+                                PipelineIntermediateState(
+                                    step=user_step,
+                                    order=2,
+                                    total_steps=total_steps,
+                                    timestep=int(t_curr * 1000),
+                                    latents=preview_img,
+                                ),
+                            )
+                else:
+                    # For LCM and other first-order schedulers
+                    user_step += 1
+                    # Only call step_callback if we haven't exceeded total_steps
+                    # (LCM scheduler may have more internal steps than user-facing steps)
+                    if user_step <= total_steps:
+                        pbar.update(1)
+                        preview_img = img - t_curr * pred
+                        if inpaint_extension is not None:
+                            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
+                                preview_img, 0.0
+                            )
+                        step_callback(
+                            PipelineIntermediateState(
+                                step=user_step,
+                                order=1,
+                                total_steps=total_steps,
+                                timestep=int(t_curr * 1000),
+                                latents=preview_img,
+                            ),
+                        )
+            pbar.close()
+            return img
+        # Original Euler implementation (when scheduler is None)
+        for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
+            # DyPE: Update step state for timestep-dependent scaling
+            if dype_extension is not None and dype_embedder is not None:
+                dype_extension.update_step_state(
+                    embedder=dype_embedder,
+                    timestep=t_curr,
+                    timestep_index=step_index,
+                    total_steps=total_steps,
+                )
+            t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+            # Run ControlNet models.
+            controlnet_residuals: list[ControlNetFluxOutput] = []
+            for controlnet_extension in controlnet_extensions:
+                controlnet_residuals.append(
+                    controlnet_extension.run_controlnet(
+                        timestep_index=step_index,
+                        total_num_timesteps=total_steps,
+                        img=img,
+                        img_ids=img_ids,
+                        txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                        txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                        y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                        timesteps=t_vec,
+                        guidance=guidance_vec,
+                    )
+                )
+            # Merge the ControlNet residuals from multiple ControlNets.
+            # TODO(ryand): We may want to calculate the sum just-in-time to keep peak memory low. Keep in mind, that the
+            # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
+            # tensors. Calculating the sum materializes each tensor into its own instance.
+            merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
+            # Prepare input for model - concatenate fresh each step
+            img_input = img
+            img_input_ids = img_ids
+            # Add channel-wise conditioning (for ControlNet, FLUX Fill, etc.)
             if img_cond is not None:
-                neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
+                img_input = torch.cat((img_input, img_cond), dim=-1)
-            # Add sequence-wise conditioning (Kontext) for negative pass
-            # This ensures reference images are processed consistently
+            # Add sequence-wise conditioning (for Kontext)
             if img_cond_seq is not None:
-                neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
-                neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
-            neg_pred = model(
-                img=neg_img_input,
-                img_ids=neg_img_input_ids,
-                txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                assert img_cond_seq_ids is not None, (
+                    "You need to provide either both or neither of the sequence conditioning"
+                )
+                img_input = torch.cat((img_input, img_cond_seq), dim=1)
+                img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
+            pred = model(
+                img=img_input,
+                img_ids=img_input_ids,
+                txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
                 timesteps=t_vec,
                 guidance=guidance_vec,
                 timestep_index=step_index,
                 total_num_timesteps=total_steps,
-                controlnet_double_block_residuals=None,
-                controlnet_single_block_residuals=None,
-                ip_adapter_extensions=neg_ip_adapter_extensions,
-                regional_prompting_extension=neg_regional_prompting_extension,
+                controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
+                controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
+                ip_adapter_extensions=pos_ip_adapter_extensions,
+                regional_prompting_extension=pos_regional_prompting_extension,
             )
-            # Slice negative prediction to match main image tokens
+            # Slice prediction to only include the main image tokens
             if img_cond_seq is not None:
-                neg_pred = neg_pred[:, :original_seq_len]
-            pred = neg_pred + step_cfg_scale * (pred - neg_pred)
-        preview_img = img - t_curr * pred
-        img = img + (t_prev - t_curr) * pred
-        if inpaint_extension is not None:
-            img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
-        step_callback(
-            PipelineIntermediateState(
-                step=step_index + 1,
-                order=1,
-                total_steps=total_steps,
-                timestep=int(t_curr),
-                latents=preview_img,
-            ),
-        )
-    return img
+                pred = pred[:, :original_seq_len]
+            step_cfg_scale = cfg_scale[step_index]
+            # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
+            if not math.isclose(step_cfg_scale, 1.0):
+                # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
+                # on systems with sufficient VRAM.
+                if neg_regional_prompting_extension is None:
+                    raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
+                # For negative prediction with Kontext, we need to include the reference images
+                # to maintain consistency between positive and negative passes. Without this,
+                # CFG would create artifacts as the attention mechanism would see different
+                # spatial structures in each pass
+                neg_img_input = img
+                neg_img_input_ids = img_ids
+                # Add channel-wise conditioning for negative pass if present
+                if img_cond is not None:
+                    neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
+                # Add sequence-wise conditioning (Kontext) for negative pass
+                # This ensures reference images are processed consistently
+                if img_cond_seq is not None:
+                    neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
+                    neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
+                neg_pred = model(
+                    img=neg_img_input,
+                    img_ids=neg_img_input_ids,
+                    txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                    txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                    y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                    timesteps=t_vec,
+                    guidance=guidance_vec,
+                    timestep_index=step_index,
+                    total_num_timesteps=total_steps,
+                    controlnet_double_block_residuals=None,
+                    controlnet_single_block_residuals=None,
+                    ip_adapter_extensions=neg_ip_adapter_extensions,
+                    regional_prompting_extension=neg_regional_prompting_extension,
+                )
+                # Slice negative prediction to match main image tokens
+                if img_cond_seq is not None:
+                    neg_pred = neg_pred[:, :original_seq_len]
+                pred = neg_pred + step_cfg_scale * (pred - neg_pred)
+            preview_img = img - t_curr * pred
+            img = img + (t_prev - t_curr) * pred
+            if inpaint_extension is not None:
+                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
+            step_callback(
+                PipelineIntermediateState(
+                    step=step_index + 1,
+                    order=1,
+                    total_steps=total_steps,
+                    timestep=int(t_curr),
+                    latents=preview_img,
+                ),
+            )
+        return img
+    finally:
+        # DyPE: Restore original position embedder
+        if original_pe_embedder is not None:
+            DyPEExtension.restore_model(model, original_pe_embedder)

invokeai/backend/flux/dype/__init__.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""Dynamic Position Extrapolation (DyPE) for FLUX models.
+DyPE enables high-resolution image generation (4K+) with pretrained FLUX models
+by dynamically scaling RoPE position embeddings during the denoising process.
+Based on: https://github.com/wildminder/ComfyUI-DyPE
+"""
+from invokeai.backend.flux.dype.base import DyPEConfig
+from invokeai.backend.flux.dype.embed import DyPEEmbedND
+from invokeai.backend.flux.dype.presets import (
+    DYPE_PRESET_4K,
+    DYPE_PRESET_AUTO,
+    DYPE_PRESET_LABELS,
+    DYPE_PRESET_MANUAL,
+    DYPE_PRESET_OFF,
+    DyPEPreset,
+    get_dype_config_for_resolution,
+)
+__all__ = [
+    "DyPEConfig",
+    "DyPEEmbedND",
+    "DyPEPreset",
+    "DYPE_PRESET_OFF",
+    "DYPE_PRESET_MANUAL",
+    "DYPE_PRESET_AUTO",
+    "DYPE_PRESET_4K",
+    "DYPE_PRESET_LABELS",
+    "get_dype_config_for_resolution",
+]

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl