PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

invokeai/app/invocations/flux_denoise.py CHANGED Viewed

@@ -47,6 +47,7 @@ from invokeai.backend.flux.sampling_utils import (
     pack,
     unpack,
 )
+from invokeai.backend.flux.schedulers import FLUX_SCHEDULER_LABELS, FLUX_SCHEDULER_MAP, FLUX_SCHEDULER_NAME_VALUES
 from invokeai.backend.flux.text_conditioning import FluxReduxConditioning, FluxTextConditioning
 from invokeai.backend.model_manager.taxonomy import BaseModelType, FluxVariantType, ModelFormat, ModelType
 from invokeai.backend.patches.layer_patcher import LayerPatcher
@@ -63,7 +64,7 @@ from invokeai.backend.util.devices import TorchDevice
     title="FLUX Denoise",
     tags=["image", "flux"],
     category="image",
-    version="4.1.0",
+    version="4.2.0",
 )
 class FluxDenoiseInvocation(BaseInvocation):
     """Run denoising process with a FLUX transformer model."""
@@ -132,6 +133,12 @@ class FluxDenoiseInvocation(BaseInvocation):
     num_steps: int = InputField(
         default=4, description="Number of diffusion steps. Recommended values are schnell: 4, dev: 50."
     )
+    scheduler: FLUX_SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description="Scheduler (sampler) for the denoising process. 'euler' is fast and standard. "
+        "'heun' is 2nd-order (better quality, 2x slower). 'lcm' is optimized for few steps.",
+        ui_choice_labels=FLUX_SCHEDULER_LABELS,
+    )
     guidance: float = InputField(
         default=4.0,
         description="The guidance strength. Higher values adhere more strictly to the prompt, and will produce less diverse images. FLUX dev only, ignored for schnell.",
@@ -242,6 +249,12 @@ class FluxDenoiseInvocation(BaseInvocation):
             shift=not is_schnell,
         )
+        # Create scheduler if not using default euler
+        scheduler = None
+        if self.scheduler in FLUX_SCHEDULER_MAP:
+            scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
+            scheduler = scheduler_class(num_train_timesteps=1000)
         # Clip the timesteps schedule based on denoising_start and denoising_end.
         timesteps = clip_timestep_schedule_fractional(timesteps, self.denoising_start, self.denoising_end)
@@ -426,6 +439,7 @@ class FluxDenoiseInvocation(BaseInvocation):
                 img_cond=img_cond,
                 img_cond_seq=img_cond_seq,
                 img_cond_seq_ids=img_cond_seq_ids,
+                scheduler=scheduler,
             )
         x = unpack(x.float(), self.height, self.width)

invokeai/app/invocations/pbr_maps.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pathlib
+from typing import Literal
+from invokeai.app.invocations.baseinvocation import BaseInvocation, BaseInvocationOutput, invocation, invocation_output
+from invokeai.app.invocations.fields import ImageField, InputField, OutputField, WithBoard, WithMetadata
+from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+from invokeai.backend.image_util.pbr_maps.pbr_maps import NORMAL_MAP_MODEL, OTHER_MAP_MODEL, PBRMapsGenerator
+from invokeai.backend.util.devices import TorchDevice
+@invocation_output("pbr_maps-output")
+class PBRMapsOutput(BaseInvocationOutput):
+    normal_map: ImageField = OutputField(default=None, description="The generated normal map")
+    roughness_map: ImageField = OutputField(default=None, description="The generated roughness map")
+    displacement_map: ImageField = OutputField(default=None, description="The generated displacement map")
+@invocation("pbr_maps", title="PBR Maps", tags=["image", "material"], category="image", version="1.0.0")
+class PBRMapsInvocation(BaseInvocation, WithMetadata, WithBoard):
+    """Generate Normal, Displacement and Roughness Map from a given image"""
+    image: ImageField = InputField(description="Input image")
+    tile_size: int = InputField(default=512, description="Tile size")
+    border_mode: Literal["none", "seamless", "mirror", "replicate"] = InputField(
+        default="none", description="Border mode to apply to eliminate any artifacts or seams"
+    )
+    def invoke(self, context: InvocationContext) -> PBRMapsOutput:
+        image_pil = context.images.get_pil(self.image.image_name, mode="RGB")
+        def loader(model_path: pathlib.Path):
+            return PBRMapsGenerator.load_model(model_path, TorchDevice.choose_torch_device())
+        torch_device = TorchDevice.choose_torch_device()
+        with (
+            context.models.load_remote_model(NORMAL_MAP_MODEL, loader) as normal_map_model,
+            context.models.load_remote_model(OTHER_MAP_MODEL, loader) as other_map_model,
+        ):
+            assert isinstance(normal_map_model, PBR_RRDB_Net)
+            assert isinstance(other_map_model, PBR_RRDB_Net)
+            pbr_pipeline = PBRMapsGenerator(normal_map_model, other_map_model, torch_device)
+            normal_map, roughness_map, displacement_map = pbr_pipeline.generate_maps(
+                image_pil, self.tile_size, self.border_mode
+            )
+            normal_map = context.images.save(normal_map)
+            normal_map_field = ImageField(image_name=normal_map.image_name)
+            roughness_map = context.images.save(roughness_map)
+            roughness_map_field = ImageField(image_name=roughness_map.image_name)
+            displacement_map = context.images.save(displacement_map)
+            displacement_map_field = ImageField(image_name=displacement_map.image_name)
+        return PBRMapsOutput(
+            normal_map=normal_map_field, roughness_map=roughness_map_field, displacement_map=displacement_map_field
+        )

invokeai/app/invocations/z_image_denoise.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import inspect
 import math
 from contextlib import ExitStack
 from typing import Callable, Iterator, Optional, Tuple
@@ -5,6 +6,7 @@ from typing import Callable, Iterator, Optional, Tuple
 import einops
 import torch
 import torchvision.transforms as tv_transforms
+from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from PIL import Image
 from torchvision.transforms.functional import resize as tv_resize
 from tqdm import tqdm
@@ -24,6 +26,7 @@ from invokeai.app.invocations.primitives import LatentsOutput
 from invokeai.app.invocations.z_image_control import ZImageControlField
 from invokeai.app.invocations.z_image_image_to_latents import ZImageImageToLatentsInvocation
 from invokeai.app.services.shared.invocation_context import InvocationContext
+from invokeai.backend.flux.schedulers import ZIMAGE_SCHEDULER_LABELS, ZIMAGE_SCHEDULER_MAP, ZIMAGE_SCHEDULER_NAME_VALUES
 from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat
 from invokeai.backend.patches.layer_patcher import LayerPatcher
 from invokeai.backend.patches.lora_conversions.z_image_lora_constants import Z_IMAGE_LORA_TRANSFORMER_PREFIX
@@ -47,7 +50,7 @@ from invokeai.backend.z_image.z_image_transformer_patch import patch_transformer
     title="Denoise - Z-Image",
     tags=["image", "z-image"],
     category="image",
-    version="1.2.0",
+    version="1.3.0",
     classification=Classification.Prototype,
 )
 class ZImageDenoiseInvocation(BaseInvocation):
@@ -100,6 +103,13 @@ class ZImageDenoiseInvocation(BaseInvocation):
         description=FieldDescriptions.vae + " Required for control conditioning.",
         input=Input.Connection,
     )
+    # Scheduler selection for the denoising process
+    scheduler: ZIMAGE_SCHEDULER_NAME_VALUES = InputField(
+        default="euler",
+        description="Scheduler (sampler) for the denoising process. Euler is the default and recommended for "
+        "Z-Image-Turbo. Heun is 2nd-order (better quality, 2x slower). LCM is optimized for few steps.",
+        ui_choice_labels=ZIMAGE_SCHEDULER_LABELS,
+    )
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> LatentsOutput:
@@ -361,15 +371,32 @@ class ZImageDenoiseInvocation(BaseInvocation):
             )
         step_callback = self._build_step_callback(context)
-        step_callback(
-            PipelineIntermediateState(
-                step=0,
-                order=1,
-                total_steps=total_steps,
-                timestep=int(sigmas[0] * 1000),
-                latents=latents,
-            ),
-        )
+        # Initialize the diffusers scheduler if not using built-in Euler
+        scheduler: SchedulerMixin | None = None
+        use_scheduler = self.scheduler != "euler"
+        if use_scheduler:
+            scheduler_class = ZIMAGE_SCHEDULER_MAP[self.scheduler]
+            scheduler = scheduler_class(
+                num_train_timesteps=1000,
+                shift=1.0,
+            )
+            # Set timesteps - LCM should use num_inference_steps (it has its own sigma schedule),
+            # while other schedulers can use custom sigmas if supported
+            is_lcm = self.scheduler == "lcm"
+            set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
+            if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
+                # Convert sigmas list to tensor for scheduler
+                scheduler.set_timesteps(sigmas=sigmas, device=device)
+            else:
+                # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
+                scheduler.set_timesteps(num_inference_steps=total_steps, device=device)
+            # For Heun scheduler, the number of actual steps may differ
+            num_scheduler_steps = len(scheduler.timesteps)
+        else:
+            num_scheduler_steps = total_steps
         with ExitStack() as exit_stack:
             # Get transformer config to determine if it's quantized
@@ -503,91 +530,219 @@ class ZImageDenoiseInvocation(BaseInvocation):
                 )
             )
-            # Denoising loop
-            for step_idx in tqdm(range(total_steps)):
-                sigma_curr = sigmas[step_idx]
-                sigma_prev = sigmas[step_idx + 1]
-                # Timestep tensor for Z-Image model
-                # The model expects t=0 at start (noise) and t=1 at end (clean)
-                # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
-                model_t = 1.0 - sigma_curr
-                timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
-                # Run transformer for positive prediction
-                # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
-                # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
-                latent_model_input = latents.to(transformer.dtype)
-                latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
-                latent_model_input_list = list(latent_model_input.unbind(dim=0))
-                # Determine if control should be applied at this step
-                apply_control = control_extension is not None and control_extension.should_apply(step_idx, total_steps)
-                # Run forward pass - use custom forward with control if extension is active
-                if apply_control:
-                    model_out_list, _ = z_image_forward_with_control(
-                        transformer=transformer,
-                        x=latent_model_input_list,
-                        t=timestep,
-                        cap_feats=[pos_prompt_embeds],
-                        control_extension=control_extension,
+            # Denoising loop - supports both built-in Euler and diffusers schedulers
+            # Track user-facing step for progress (accounts for Heun's double steps)
+            user_step = 0
+            if use_scheduler and scheduler is not None:
+                # Use diffusers scheduler for stepping
+                # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
+                # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
+                pbar = tqdm(total=total_steps, desc="Denoising")
+                for step_index in range(num_scheduler_steps):
+                    sched_timestep = scheduler.timesteps[step_index]
+                    # Convert scheduler timestep (0-1000) to normalized sigma (0-1)
+                    sigma_curr = sched_timestep.item() / scheduler.config.num_train_timesteps
+                    # For Heun scheduler, track if we're in first or second order step
+                    is_heun = hasattr(scheduler, "state_in_first_order")
+                    in_first_order = scheduler.state_in_first_order if is_heun else True
+                    # Timestep tensor for Z-Image model
+                    # The model expects t=0 at start (noise) and t=1 at end (clean)
+                    model_t = 1.0 - sigma_curr
+                    timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
+                    # Run transformer for positive prediction
+                    latent_model_input = latents.to(transformer.dtype)
+                    latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
+                    latent_model_input_list = list(latent_model_input.unbind(dim=0))
+                    # Determine if control should be applied at this step
+                    apply_control = control_extension is not None and control_extension.should_apply(
+                        user_step, total_steps
                     )
-                else:
-                    model_output = transformer(
-                        x=latent_model_input_list,
-                        t=timestep,
-                        cap_feats=[pos_prompt_embeds],
-                    )
-                    model_out_list = model_output[0]  # Extract list of tensors from tuple
-                noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
-                noise_pred_cond = noise_pred_cond.squeeze(2)  # Remove frame dimension
-                noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
+                    # Run forward pass
+                    if apply_control:
+                        model_out_list, _ = z_image_forward_with_control(
+                            transformer=transformer,
+                            x=latent_model_input_list,
+                            t=timestep,
+                            cap_feats=[pos_prompt_embeds],
+                            control_extension=control_extension,
+                        )
+                    else:
+                        model_output = transformer(
+                            x=latent_model_input_list,
+                            t=timestep,
+                            cap_feats=[pos_prompt_embeds],
+                        )
+                        model_out_list = model_output[0]
+                    noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
+                    noise_pred_cond = noise_pred_cond.squeeze(2)
+                    noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
+                    # Apply CFG if enabled
+                    if do_classifier_free_guidance and neg_prompt_embeds is not None:
+                        if apply_control:
+                            model_out_list_uncond, _ = z_image_forward_with_control(
+                                transformer=transformer,
+                                x=latent_model_input_list,
+                                t=timestep,
+                                cap_feats=[neg_prompt_embeds],
+                                control_extension=control_extension,
+                            )
+                        else:
+                            model_output_uncond = transformer(
+                                x=latent_model_input_list,
+                                t=timestep,
+                                cap_feats=[neg_prompt_embeds],
+                            )
+                            model_out_list_uncond = model_output_uncond[0]
+                        noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
+                        noise_pred_uncond = noise_pred_uncond.squeeze(2)
+                        noise_pred_uncond = -noise_pred_uncond
+                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
+                    else:
+                        noise_pred = noise_pred_cond
+                    # Use scheduler.step() for the update
+                    step_output = scheduler.step(model_output=noise_pred, timestep=sched_timestep, sample=latents)
+                    latents = step_output.prev_sample
+                    # Get sigma_prev for inpainting (next sigma value)
+                    if step_index + 1 < len(scheduler.sigmas):
+                        sigma_prev = scheduler.sigmas[step_index + 1].item()
+                    else:
+                        sigma_prev = 0.0
+                    if inpaint_extension is not None:
+                        latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
+                    # For Heun, only increment user step after second-order step completes
+                    if is_heun:
+                        if not in_first_order:
+                            user_step += 1
+                            # Only call step_callback if we haven't exceeded total_steps
+                            if user_step <= total_steps:
+                                pbar.update(1)
+                                step_callback(
+                                    PipelineIntermediateState(
+                                        step=user_step,
+                                        order=2,
+                                        total_steps=total_steps,
+                                        timestep=int(sigma_curr * 1000),
+                                        latents=latents,
+                                    ),
+                                )
+                    else:
+                        # For LCM and other first-order schedulers
+                        user_step += 1
+                        # Only call step_callback if we haven't exceeded total_steps
+                        # (LCM scheduler may have more internal steps than user-facing steps)
+                        if user_step <= total_steps:
+                            pbar.update(1)
+                            step_callback(
+                                PipelineIntermediateState(
+                                    step=user_step,
+                                    order=1,
+                                    total_steps=total_steps,
+                                    timestep=int(sigma_curr * 1000),
+                                    latents=latents,
+                                ),
+                            )
+                pbar.close()
+            else:
+                # Original Euler implementation (default, optimized for Z-Image)
+                for step_idx in tqdm(range(total_steps)):
+                    sigma_curr = sigmas[step_idx]
+                    sigma_prev = sigmas[step_idx + 1]
+                    # Timestep tensor for Z-Image model
+                    # The model expects t=0 at start (noise) and t=1 at end (clean)
+                    # Sigma goes from 1 (noise) to 0 (clean), so model_t = 1 - sigma
+                    model_t = 1.0 - sigma_curr
+                    timestep = torch.tensor([model_t], device=device, dtype=inference_dtype).expand(latents.shape[0])
+                    # Run transformer for positive prediction
+                    # Z-Image transformer expects: x as list of [C, 1, H, W] tensors, t, cap_feats as list
+                    # Prepare latent input: [B, C, H, W] -> [B, C, 1, H, W] -> list of [C, 1, H, W]
+                    latent_model_input = latents.to(transformer.dtype)
+                    latent_model_input = latent_model_input.unsqueeze(2)  # Add frame dimension
+                    latent_model_input_list = list(latent_model_input.unbind(dim=0))
+                    # Determine if control should be applied at this step
+                    apply_control = control_extension is not None and control_extension.should_apply(
+                        step_idx, total_steps
+                    )
-                # Apply CFG if enabled
-                if do_classifier_free_guidance and neg_prompt_embeds is not None:
+                    # Run forward pass - use custom forward with control if extension is active
                     if apply_control:
-                        model_out_list_uncond, _ = z_image_forward_with_control(
+                        model_out_list, _ = z_image_forward_with_control(
                             transformer=transformer,
                             x=latent_model_input_list,
                             t=timestep,
-                            cap_feats=[neg_prompt_embeds],
+                            cap_feats=[pos_prompt_embeds],
                             control_extension=control_extension,
                         )
                     else:
-                        model_output_uncond = transformer(
+                        model_output = transformer(
                             x=latent_model_input_list,
                             t=timestep,
-                            cap_feats=[neg_prompt_embeds],
+                            cap_feats=[pos_prompt_embeds],
                         )
-                        model_out_list_uncond = model_output_uncond[0]  # Extract list of tensors from tuple
-                    noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
-                    noise_pred_uncond = noise_pred_uncond.squeeze(2)
-                    noise_pred_uncond = -noise_pred_uncond
-                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
-                else:
-                    noise_pred = noise_pred_cond
-                # Euler step
-                latents_dtype = latents.dtype
-                latents = latents.to(dtype=torch.float32)
-                latents = latents + (sigma_prev - sigma_curr) * noise_pred
-                latents = latents.to(dtype=latents_dtype)
-                if inpaint_extension is not None:
-                    latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
-                step_callback(
-                    PipelineIntermediateState(
-                        step=step_idx + 1,
-                        order=1,
-                        total_steps=total_steps,
-                        timestep=int(sigma_curr * 1000),
-                        latents=latents,
-                    ),
-                )
+                        model_out_list = model_output[0]  # Extract list of tensors from tuple
+                    noise_pred_cond = torch.stack([t.float() for t in model_out_list], dim=0)
+                    noise_pred_cond = noise_pred_cond.squeeze(2)  # Remove frame dimension
+                    noise_pred_cond = -noise_pred_cond  # Z-Image uses v-prediction with negation
+                    # Apply CFG if enabled
+                    if do_classifier_free_guidance and neg_prompt_embeds is not None:
+                        if apply_control:
+                            model_out_list_uncond, _ = z_image_forward_with_control(
+                                transformer=transformer,
+                                x=latent_model_input_list,
+                                t=timestep,
+                                cap_feats=[neg_prompt_embeds],
+                                control_extension=control_extension,
+                            )
+                        else:
+                            model_output_uncond = transformer(
+                                x=latent_model_input_list,
+                                t=timestep,
+                                cap_feats=[neg_prompt_embeds],
+                            )
+                            model_out_list_uncond = model_output_uncond[0]  # Extract list of tensors from tuple
+                        noise_pred_uncond = torch.stack([t.float() for t in model_out_list_uncond], dim=0)
+                        noise_pred_uncond = noise_pred_uncond.squeeze(2)
+                        noise_pred_uncond = -noise_pred_uncond
+                        noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
+                    else:
+                        noise_pred = noise_pred_cond
+                    # Euler step
+                    latents_dtype = latents.dtype
+                    latents = latents.to(dtype=torch.float32)
+                    latents = latents + (sigma_prev - sigma_curr) * noise_pred
+                    latents = latents.to(dtype=latents_dtype)
+                    if inpaint_extension is not None:
+                        latents = inpaint_extension.merge_intermediate_latents_with_init_latents(latents, sigma_prev)
+                    step_callback(
+                        PipelineIntermediateState(
+                            step=step_idx + 1,
+                            order=1,
+                            total_steps=total_steps,
+                            timestep=int(sigma_curr * 1000),
+                            latents=latents,
+                        ),
+                    )
         return latents

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl