PyPI - InvokeAI - Versions diffs - 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

invokeai/app/util/step_callback.py CHANGED Viewed

@@ -93,6 +93,46 @@ COGVIEW4_LATENT_RGB_FACTORS = [
     [-0.00955853, -0.00980067, -0.00977842],
 ]
+# FLUX.2 uses 32 latent channels. Since we don't have proper factors yet,
+# we extend FLUX factors with zeros for preview approximation.
+FLUX2_LATENT_RGB_FACTORS = [
+    #   R        G        B
+    # First 16 channels (from FLUX)
+    [0.0118, 0.0024, 0.0017],
+    [-0.0074, -0.0108, -0.0003],
+    [0.0056, 0.0291, 0.0768],
+    [0.0342, -0.0681, -0.0427],
+    [-0.0258, 0.0092, 0.0463],
+    [0.0863, 0.0784, 0.0547],
+    [-0.0017, 0.0402, 0.0158],
+    [0.0501, 0.1058, 0.1152],
+    [-0.0209, -0.0218, -0.0329],
+    [-0.0314, 0.0083, 0.0896],
+    [0.0851, 0.0665, -0.0472],
+    [-0.0534, 0.0238, -0.0024],
+    [0.0452, -0.0026, 0.0048],
+    [0.0892, 0.0831, 0.0881],
+    [-0.1117, -0.0304, -0.0789],
+    [0.0027, -0.0479, -0.0043],
+    # Additional 16 channels (zeros as placeholder)
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+    [0.0, 0.0, 0.0],
+]
 def sample_to_lowres_estimated_image(
     samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
@@ -164,6 +204,8 @@ def diffusion_step_callback(
         latent_rgb_factors = COGVIEW4_LATENT_RGB_FACTORS
     elif base_model == BaseModelType.Flux:
         latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
+    elif base_model == BaseModelType.Flux2:
+        latent_rgb_factors = FLUX2_LATENT_RGB_FACTORS
     elif base_model == BaseModelType.ZImage:
         # Z-Image uses FLUX-compatible VAE with 16 latent channels
         latent_rgb_factors = FLUX_LATENT_RGB_FACTORS

invokeai/backend/flux/denoise.py CHANGED Viewed

@@ -7,6 +7,7 @@ from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from tqdm import tqdm
 from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
+from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
 from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
 from invokeai.backend.flux.extensions.regional_prompting_extension import RegionalPromptingExtension
 from invokeai.backend.flux.extensions.xlabs_controlnet_extension import XLabsControlNetExtension
@@ -37,6 +38,8 @@ def denoise(
     # extra img tokens (sequence-wise) - for Kontext conditioning
     img_cond_seq: torch.Tensor | None = None,
     img_cond_seq_ids: torch.Tensor | None = None,
+    # DyPE extension for high-resolution generation
+    dype_extension: DyPEExtension | None = None,
     # Optional scheduler for alternative sampling methods
     scheduler: SchedulerMixin | None = None,
 ):
@@ -74,30 +77,206 @@ def denoise(
     # Store original sequence length for slicing predictions
     original_seq_len = img.shape[1]
-    # Track the actual step for user-facing progress (accounts for Heun's double steps)
-    user_step = 0
+    # DyPE: Patch model with DyPE-aware position embedder
+    dype_embedder = None
+    original_pe_embedder = None
+    if dype_extension is not None:
+        dype_embedder, original_pe_embedder = dype_extension.patch_model(model)
+    try:
+        # Track the actual step for user-facing progress (accounts for Heun's double steps)
+        user_step = 0
+        if use_scheduler:
+            # Use diffusers scheduler for stepping
+            # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
+            # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
+            pbar = tqdm(total=total_steps, desc="Denoising")
+            for step_index in range(num_scheduler_steps):
+                timestep = scheduler.timesteps[step_index]
+                # Convert scheduler timestep (0-1000) to normalized (0-1) for the model
+                t_curr = timestep.item() / scheduler.config.num_train_timesteps
+                t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+                # DyPE: Update step state for timestep-dependent scaling
+                if dype_extension is not None and dype_embedder is not None:
+                    dype_extension.update_step_state(
+                        embedder=dype_embedder,
+                        timestep=t_curr,
+                        timestep_index=user_step,
+                        total_steps=total_steps,
+                    )
-    if use_scheduler:
-        # Use diffusers scheduler for stepping
-        # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
-        # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
-        pbar = tqdm(total=total_steps, desc="Denoising")
-        for step_index in range(num_scheduler_steps):
-            timestep = scheduler.timesteps[step_index]
-            # Convert scheduler timestep (0-1000) to normalized (0-1) for the model
-            t_curr = timestep.item() / scheduler.config.num_train_timesteps
-            t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
+                # For Heun scheduler, track if we're in first or second order step
+                is_heun = hasattr(scheduler, "state_in_first_order")
+                in_first_order = scheduler.state_in_first_order if is_heun else True
+                # Run ControlNet models
+                controlnet_residuals: list[ControlNetFluxOutput] = []
+                for controlnet_extension in controlnet_extensions:
+                    controlnet_residuals.append(
+                        controlnet_extension.run_controlnet(
+                            timestep_index=user_step,
+                            total_num_timesteps=total_steps,
+                            img=img,
+                            img_ids=img_ids,
+                            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                            y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                            timesteps=t_vec,
+                            guidance=guidance_vec,
+                        )
+                    )
+                merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
+                # Prepare input for model
+                img_input = img
+                img_input_ids = img_ids
+                if img_cond is not None:
+                    img_input = torch.cat((img_input, img_cond), dim=-1)
+                if img_cond_seq is not None:
+                    assert img_cond_seq_ids is not None
+                    img_input = torch.cat((img_input, img_cond_seq), dim=1)
+                    img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
+                pred = model(
+                    img=img_input,
+                    img_ids=img_input_ids,
+                    txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                    txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                    y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                    timesteps=t_vec,
+                    guidance=guidance_vec,
+                    timestep_index=user_step,
+                    total_num_timesteps=total_steps,
+                    controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
+                    controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
+                    ip_adapter_extensions=pos_ip_adapter_extensions,
+                    regional_prompting_extension=pos_regional_prompting_extension,
+                )
+                if img_cond_seq is not None:
+                    pred = pred[:, :original_seq_len]
+                # Get CFG scale for current user step
+                step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
+                if not math.isclose(step_cfg_scale, 1.0):
+                    if neg_regional_prompting_extension is None:
+                        raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
+                    neg_img_input = img
+                    neg_img_input_ids = img_ids
+                    if img_cond is not None:
+                        neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
+                    if img_cond_seq is not None:
+                        neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
+                        neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
+                    neg_pred = model(
+                        img=neg_img_input,
+                        img_ids=neg_img_input_ids,
+                        txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
+                        txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
+                        y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
+                        timesteps=t_vec,
+                        guidance=guidance_vec,
+                        timestep_index=user_step,
+                        total_num_timesteps=total_steps,
+                        controlnet_double_block_residuals=None,
+                        controlnet_single_block_residuals=None,
+                        ip_adapter_extensions=neg_ip_adapter_extensions,
+                        regional_prompting_extension=neg_regional_prompting_extension,
+                    )
+                    if img_cond_seq is not None:
+                        neg_pred = neg_pred[:, :original_seq_len]
+                    pred = neg_pred + step_cfg_scale * (pred - neg_pred)
+                # Use scheduler.step() for the update
+                step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
+                img = step_output.prev_sample
+                # Get t_prev for inpainting (next sigma value)
+                if step_index + 1 < len(scheduler.sigmas):
+                    t_prev = scheduler.sigmas[step_index + 1].item()
+                else:
+                    t_prev = 0.0
+                if inpaint_extension is not None:
+                    img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
+                # For Heun, only increment user step after second-order step completes
+                if is_heun:
+                    if not in_first_order:
+                        # Second order step completed
+                        user_step += 1
+                        # Only call step_callback if we haven't exceeded total_steps
+                        if user_step <= total_steps:
+                            pbar.update(1)
+                            preview_img = img - t_curr * pred
+                            if inpaint_extension is not None:
+                                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
+                                    preview_img, 0.0
+                                )
+                            step_callback(
+                                PipelineIntermediateState(
+                                    step=user_step,
+                                    order=2,
+                                    total_steps=total_steps,
+                                    timestep=int(t_curr * 1000),
+                                    latents=preview_img,
+                                ),
+                            )
+                else:
+                    # For LCM and other first-order schedulers
+                    user_step += 1
+                    # Only call step_callback if we haven't exceeded total_steps
+                    # (LCM scheduler may have more internal steps than user-facing steps)
+                    if user_step <= total_steps:
+                        pbar.update(1)
+                        preview_img = img - t_curr * pred
+                        if inpaint_extension is not None:
+                            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
+                                preview_img, 0.0
+                            )
+                        step_callback(
+                            PipelineIntermediateState(
+                                step=user_step,
+                                order=1,
+                                total_steps=total_steps,
+                                timestep=int(t_curr * 1000),
+                                latents=preview_img,
+                            ),
+                        )
+            pbar.close()
+            return img
+        # Original Euler implementation (when scheduler is None)
+        for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
+            # DyPE: Update step state for timestep-dependent scaling
+            if dype_extension is not None and dype_embedder is not None:
+                dype_extension.update_step_state(
+                    embedder=dype_embedder,
+                    timestep=t_curr,
+                    timestep_index=step_index,
+                    total_steps=total_steps,
+                )
-            # For Heun scheduler, track if we're in first or second order step
-            is_heun = hasattr(scheduler, "state_in_first_order")
-            in_first_order = scheduler.state_in_first_order if is_heun else True
+            t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
-            # Run ControlNet models
+            # Run ControlNet models.
             controlnet_residuals: list[ControlNetFluxOutput] = []
             for controlnet_extension in controlnet_extensions:
                 controlnet_residuals.append(
                     controlnet_extension.run_controlnet(
-                        timestep_index=user_step,
+                        timestep_index=step_index,
                         total_num_timesteps=total_steps,
                         img=img,
                         img_ids=img_ids,
@@ -109,17 +288,25 @@ def denoise(
                     )
                 )
+            # Merge the ControlNet residuals from multiple ControlNets.
+            # TODO(ryand): We may want to calculate the sum just-in-time to keep peak memory low. Keep in mind, that the
+            # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
+            # tensors. Calculating the sum materializes each tensor into its own instance.
             merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
-            # Prepare input for model
+            # Prepare input for model - concatenate fresh each step
             img_input = img
             img_input_ids = img_ids
+            # Add channel-wise conditioning (for ControlNet, FLUX Fill, etc.)
             if img_cond is not None:
                 img_input = torch.cat((img_input, img_cond), dim=-1)
+            # Add sequence-wise conditioning (for Kontext)
             if img_cond_seq is not None:
-                assert img_cond_seq_ids is not None
+                assert img_cond_seq_ids is not None, (
+                    "You need to provide either both or neither of the sequence conditioning"
+                )
                 img_input = torch.cat((img_input, img_cond_seq), dim=1)
                 img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
@@ -131,7 +318,7 @@ def denoise(
                 y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
                 timesteps=t_vec,
                 guidance=guidance_vec,
-                timestep_index=user_step,
+                timestep_index=step_index,
                 total_num_timesteps=total_steps,
                 controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
                 controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
@@ -139,22 +326,33 @@ def denoise(
                 regional_prompting_extension=pos_regional_prompting_extension,
             )
+            # Slice prediction to only include the main image tokens
             if img_cond_seq is not None:
                 pred = pred[:, :original_seq_len]
-            # Get CFG scale for current user step
-            step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
+            step_cfg_scale = cfg_scale[step_index]
+            # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
             if not math.isclose(step_cfg_scale, 1.0):
+                # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
+                # on systems with sufficient VRAM.
                 if neg_regional_prompting_extension is None:
                     raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
+                # For negative prediction with Kontext, we need to include the reference images
+                # to maintain consistency between positive and negative passes. Without this,
+                # CFG would create artifacts as the attention mechanism would see different
+                # spatial structures in each pass
                 neg_img_input = img
                 neg_img_input_ids = img_ids
+                # Add channel-wise conditioning for negative pass if present
                 if img_cond is not None:
                     neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
+                # Add sequence-wise conditioning (Kontext) for negative pass
+                # This ensures reference images are processed consistently
                 if img_cond_seq is not None:
                     neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
                     neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
@@ -167,7 +365,7 @@ def denoise(
                     y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
                     timesteps=t_vec,
                     guidance=guidance_vec,
-                    timestep_index=user_step,
+                    timestep_index=step_index,
                     total_num_timesteps=total_steps,
                     controlnet_double_block_residuals=None,
                     controlnet_single_block_residuals=None,
@@ -175,194 +373,31 @@ def denoise(
                     regional_prompting_extension=neg_regional_prompting_extension,
                 )
+                # Slice negative prediction to match main image tokens
                 if img_cond_seq is not None:
                     neg_pred = neg_pred[:, :original_seq_len]
                 pred = neg_pred + step_cfg_scale * (pred - neg_pred)
-            # Use scheduler.step() for the update
-            step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
-            img = step_output.prev_sample
-            # Get t_prev for inpainting (next sigma value)
-            if step_index + 1 < len(scheduler.sigmas):
-                t_prev = scheduler.sigmas[step_index + 1].item()
-            else:
-                t_prev = 0.0
+            preview_img = img - t_curr * pred
+            img = img + (t_prev - t_curr) * pred
             if inpaint_extension is not None:
                 img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-            # For Heun, only increment user step after second-order step completes
-            if is_heun:
-                if not in_first_order:
-                    # Second order step completed
-                    user_step += 1
-                    # Only call step_callback if we haven't exceeded total_steps
-                    if user_step <= total_steps:
-                        pbar.update(1)
-                        preview_img = img - t_curr * pred
-                        if inpaint_extension is not None:
-                            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
-                                preview_img, 0.0
-                            )
-                        step_callback(
-                            PipelineIntermediateState(
-                                step=user_step,
-                                order=2,
-                                total_steps=total_steps,
-                                timestep=int(t_curr * 1000),
-                                latents=preview_img,
-                            ),
-                        )
-            else:
-                # For LCM and other first-order schedulers
-                user_step += 1
-                # Only call step_callback if we haven't exceeded total_steps
-                # (LCM scheduler may have more internal steps than user-facing steps)
-                if user_step <= total_steps:
-                    pbar.update(1)
-                    preview_img = img - t_curr * pred
-                    if inpaint_extension is not None:
-                        preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
-                    step_callback(
-                        PipelineIntermediateState(
-                            step=user_step,
-                            order=1,
-                            total_steps=total_steps,
-                            timestep=int(t_curr * 1000),
-                            latents=preview_img,
-                        ),
-                    )
-        pbar.close()
-        return img
-    # Original Euler implementation (when scheduler is None)
-    for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
-        t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
-        # Run ControlNet models.
-        controlnet_residuals: list[ControlNetFluxOutput] = []
-        for controlnet_extension in controlnet_extensions:
-            controlnet_residuals.append(
-                controlnet_extension.run_controlnet(
-                    timestep_index=step_index,
-                    total_num_timesteps=total_steps,
-                    img=img,
-                    img_ids=img_ids,
-                    txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                    txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                    y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-                    timesteps=t_vec,
-                    guidance=guidance_vec,
-                )
+                preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
+            step_callback(
+                PipelineIntermediateState(
+                    step=step_index + 1,
+                    order=1,
+                    total_steps=total_steps,
+                    timestep=int(t_curr),
+                    latents=preview_img,
+                ),
             )
-        # Merge the ControlNet residuals from multiple ControlNets.
-        # TODO(ryand): We may want to calculate the sum just-in-time to keep peak memory low. Keep in mind, that the
-        # controlnet_residuals datastructure is efficient in that it likely contains multiple references to the same
-        # tensors. Calculating the sum materializes each tensor into its own instance.
-        merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
-        # Prepare input for model - concatenate fresh each step
-        img_input = img
-        img_input_ids = img_ids
-        # Add channel-wise conditioning (for ControlNet, FLUX Fill, etc.)
-        if img_cond is not None:
-            img_input = torch.cat((img_input, img_cond), dim=-1)
-        # Add sequence-wise conditioning (for Kontext)
-        if img_cond_seq is not None:
-            assert img_cond_seq_ids is not None, (
-                "You need to provide either both or neither of the sequence conditioning"
-            )
-            img_input = torch.cat((img_input, img_cond_seq), dim=1)
-            img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
-        pred = model(
-            img=img_input,
-            img_ids=img_input_ids,
-            txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-            txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-            y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-            timesteps=t_vec,
-            guidance=guidance_vec,
-            timestep_index=step_index,
-            total_num_timesteps=total_steps,
-            controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
-            controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
-            ip_adapter_extensions=pos_ip_adapter_extensions,
-            regional_prompting_extension=pos_regional_prompting_extension,
-        )
-        # Slice prediction to only include the main image tokens
-        if img_cond_seq is not None:
-            pred = pred[:, :original_seq_len]
-        step_cfg_scale = cfg_scale[step_index]
-        # If step_cfg_scale, is 1.0, then we don't need to run the negative prediction.
-        if not math.isclose(step_cfg_scale, 1.0):
-            # TODO(ryand): Add option to run positive and negative predictions in a single batch for better performance
-            # on systems with sufficient VRAM.
-            if neg_regional_prompting_extension is None:
-                raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
-            # For negative prediction with Kontext, we need to include the reference images
-            # to maintain consistency between positive and negative passes. Without this,
-            # CFG would create artifacts as the attention mechanism would see different
-            # spatial structures in each pass
-            neg_img_input = img
-            neg_img_input_ids = img_ids
-            # Add channel-wise conditioning for negative pass if present
-            if img_cond is not None:
-                neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
-            # Add sequence-wise conditioning (Kontext) for negative pass
-            # This ensures reference images are processed consistently
-            if img_cond_seq is not None:
-                neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
-                neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
-            neg_pred = model(
-                img=neg_img_input,
-                img_ids=neg_img_input_ids,
-                txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-                timesteps=t_vec,
-                guidance=guidance_vec,
-                timestep_index=step_index,
-                total_num_timesteps=total_steps,
-                controlnet_double_block_residuals=None,
-                controlnet_single_block_residuals=None,
-                ip_adapter_extensions=neg_ip_adapter_extensions,
-                regional_prompting_extension=neg_regional_prompting_extension,
-            )
+        return img
-            # Slice negative prediction to match main image tokens
-            if img_cond_seq is not None:
-                neg_pred = neg_pred[:, :original_seq_len]
-            pred = neg_pred + step_cfg_scale * (pred - neg_pred)
-        preview_img = img - t_curr * pred
-        img = img + (t_prev - t_curr) * pred
-        if inpaint_extension is not None:
-            img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
-        step_callback(
-            PipelineIntermediateState(
-                step=step_index + 1,
-                order=1,
-                total_steps=total_steps,
-                timestep=int(t_curr),
-                latents=preview_img,
-            ),
-        )
-    return img
+    finally:
+        # DyPE: Restore original position embedder
+        if original_pe_embedder is not None:
+            DyPEExtension.restore_model(model, original_pe_embedder)

invokeai/backend/flux/dype/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Dynamic Position Extrapolation (DyPE) for FLUX models.
+DyPE enables high-resolution image generation (4K+) with pretrained FLUX models
+by dynamically scaling RoPE position embeddings during the denoising process.
+Based on: https://github.com/wildminder/ComfyUI-DyPE
+"""
+from invokeai.backend.flux.dype.base import DyPEConfig
+from invokeai.backend.flux.dype.embed import DyPEEmbedND
+from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_for_resolution
+__all__ = [
+    "DyPEConfig",
+    "DyPEEmbedND",
+    "DyPEPreset",
+    "get_dype_config_for_resolution",
+]

InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl