PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py ADDED Viewed

@@ -0,0 +1,70 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+import math
+from typing import Literal, Optional
+import torch
+import torch.nn as nn
+import invokeai.backend.image_util.pbr_maps.architecture.block as B
+UPSCALE_MODE = Literal["upconv", "pixelshuffle"]
+class PBR_RRDB_Net(nn.Module):
+    def __init__(
+        self,
+        in_nc: int,
+        out_nc: int,
+        nf: int,
+        nb: int,
+        gc: int = 32,
+        upscale: int = 4,
+        norm_type: Optional[B.NORMALIZATION_LAYER_TYPE] = None,
+        act_type: B.ACTIVATION_LAYER_TYPE = "leakyrelu",
+        mode: B.BLOCK_MODE = "CNA",
+        res_scale: int = 1,
+        upsample_mode: UPSCALE_MODE = "upconv",
+    ):
+        super(PBR_RRDB_Net, self).__init__()
+        n_upscale = int(math.log(upscale, 2))
+        if upscale == 3:
+            n_upscale = 1
+        fea_conv = B.conv_block(in_nc, nf, kernel_size=3, norm_type=None, act_type=None)
+        rb_blocks = [
+            B.RRDB(
+                nf,
+                kernel_size=3,
+                gc=32,
+                stride=1,
+                bias=True,
+                pad_type="zero",
+                norm_type=norm_type,
+                act_type=act_type,
+                mode="CNA",
+            )
+            for _ in range(nb)
+        ]
+        LR_conv = B.conv_block(nf, nf, kernel_size=3, norm_type=norm_type, act_type=None, mode=mode)
+        if upsample_mode == "upconv":
+            upsample_block = B.upconv_block
+        elif upsample_mode == "pixelshuffle":
+            upsample_block = B.pixelshuffle_block
+        if upscale == 3:
+            upsampler = upsample_block(nf, nf, 3, act_type=act_type)
+        else:
+            upsampler = [upsample_block(nf, nf, act_type=act_type) for _ in range(n_upscale)]
+        HR_conv0 = B.conv_block(nf, nf, kernel_size=3, norm_type=None, act_type=act_type)
+        HR_conv1 = B.conv_block(nf, out_nc, kernel_size=3, norm_type=None, act_type=None)
+        self.model = B.sequential(
+            fea_conv, B.ShortcutBlock(B.sequential(*rb_blocks, LR_conv)), *upsampler, HR_conv0, HR_conv1
+        )
+    def forward(self, x: torch.Tensor):
+        return self.model(x)

invokeai/backend/image_util/pbr_maps/pbr_maps.py ADDED Viewed

@@ -0,0 +1,141 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+import pathlib
+from typing import Any, Literal
+import cv2
+import numpy as np
+import numpy.typing as npt
+import torch
+from PIL import Image
+from safetensors.torch import load_file
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+from invokeai.backend.image_util.pbr_maps.utils.image_ops import crop_seamless, esrgan_launcher_split_merge
+NORMAL_MAP_MODEL = (
+    "https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/normal_map_generator.safetensors?download=true"
+)
+OTHER_MAP_MODEL = (
+    "https://huggingface.co/InvokeAI/pbr-material-maps/resolve/main/franken_map_generator.safetensors?download=true"
+)
+class PBRMapsGenerator:
+    def __init__(self, normal_map_model: PBR_RRDB_Net, other_map_model: PBR_RRDB_Net, device: torch.device) -> None:
+        self.normal_map_model = normal_map_model
+        self.other_map_model = other_map_model
+        self.device = device
+    @staticmethod
+    def load_model(model_path: pathlib.Path, device: torch.device) -> PBR_RRDB_Net:
+        state_dict = load_file(model_path.as_posix(), device=device.type)
+        model = PBR_RRDB_Net(
+            3,
+            3,
+            32,
+            12,
+            gc=32,
+            upscale=1,
+            norm_type=None,
+            act_type="leakyrelu",
+            mode="CNA",
+            res_scale=1,
+            upsample_mode="upconv",
+        )
+        model.load_state_dict(state_dict, strict=False)
+        del state_dict
+        if torch.cuda.is_available() and device.type == "cuda":
+            torch.cuda.empty_cache()
+        model.eval()
+        for _, v in model.named_parameters():
+            v.requires_grad = False
+        return model.to(device)
+    def process(self, img: npt.NDArray[Any], model: PBR_RRDB_Net):
+        img = img.astype(np.float32) / np.iinfo(img.dtype).max
+        img = img[..., ::-1].copy()
+        tensor_img = torch.tensor(img).permute(2, 0, 1).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            output = model(tensor_img).data.squeeze(0).float().cpu().clamp_(0, 1).numpy()
+            output = output[[2, 1, 0], :, :]
+            output = np.transpose(output, (1, 2, 0))
+            output = (output * 255.0).round()
+            return output
+    def _cv2_to_pil(self, image: npt.NDArray[Any]):
+        return Image.fromarray(cv2.cvtColor(image.astype(np.uint8), cv2.COLOR_RGB2BGR))
+    def generate_maps(
+        self,
+        image: Image.Image,
+        tile_size: int = 512,
+        border_mode: Literal["none", "seamless", "mirror", "replicate"] = "none",
+    ):
+        """
+        Generate PBR texture maps (normal, roughness, and displacement) from an input image.
+        The image can optionally be padded before inference to control how borders are treated,
+        which can help create seamless or edge‑consistent textures.
+        Args:
+            image: Source image used to generate the PBR maps.
+            tile_size: Maximum tile size used for tiled inference. If the image is larger than
+                this size in either dimension, it will be split into tiles for processing and
+                then merged.
+            border_mode: Strategy for padding the image before inference:
+                - "none": No padding is applied; the image is processed as‑is.
+                - "seamless": Pads the image using wrap‑around tiling
+                  (`cv2.BORDER_WRAP`) to help produce seamless textures.
+                - "mirror": Pads the image by mirroring border pixels
+                  (`cv2.BORDER_REFLECT_101`) to reduce edge artifacts.
+                - "replicate": Pads the image by replicating the edge pixels outward
+                  (`cv2.BORDER_REPLICATE`).
+        Returns:
+            A tuple of three PIL Images:
+                - normal_map: RGB normal map generated from the input.
+                - roughness: Single‑channel roughness map extracted from the second model output.
+                - displacement: Single‑channel displacement (height) map extracted from the
+                  second model output.
+        """
+        models = [self.normal_map_model, self.other_map_model]
+        np_image = np.array(image).astype(np.uint8)
+        match border_mode:
+            case "seamless":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_WRAP)
+            case "mirror":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REFLECT_101)
+            case "replicate":
+                np_image = cv2.copyMakeBorder(np_image, 16, 16, 16, 16, cv2.BORDER_REPLICATE)
+            case "none":
+                pass
+        img_height, img_width = np_image.shape[:2]
+        # Checking whether to perform tiled inference
+        do_split = img_height > tile_size or img_width > tile_size
+        if do_split:
+            rlts = esrgan_launcher_split_merge(np_image, self.process, models, scale_factor=1, tile_size=tile_size)
+        else:
+            rlts = [self.process(np_image, model) for model in models]
+        if border_mode != "none":
+            rlts = [crop_seamless(rlt) for rlt in rlts]
+        normal_map = self._cv2_to_pil(rlts[0])
+        roughness = self._cv2_to_pil(rlts[1][:, :, 1])
+        displacement = self._cv2_to_pil(rlts[1][:, :, 0])
+        return normal_map, roughness, displacement

invokeai/backend/image_util/pbr_maps/utils/image_ops.py ADDED Viewed

@@ -0,0 +1,93 @@
+# Original: https://github.com/joeyballentine/Material-Map-Generator
+# Adopted and optimized for Invoke AI
+import math
+from typing import Any, Callable, List
+import numpy as np
+import numpy.typing as npt
+from invokeai.backend.image_util.pbr_maps.architecture.pbr_rrdb_net import PBR_RRDB_Net
+def crop_seamless(img: npt.NDArray[Any]):
+    img_height, img_width = img.shape[:2]
+    y, x = 16, 16
+    h, w = img_height - 32, img_width - 32
+    img = img[y : y + h, x : x + w]
+    return img
+# from https://github.com/ata4/esrgan-launcher/blob/master/upscale.py
+def esrgan_launcher_split_merge(
+    input_image: npt.NDArray[Any],
+    upscale_function: Callable[[npt.NDArray[Any], PBR_RRDB_Net], npt.NDArray[Any]],
+    models: List[PBR_RRDB_Net],
+    scale_factor: int = 4,
+    tile_size: int = 512,
+    tile_padding: float = 0.125,
+):
+    width, height, depth = input_image.shape
+    output_width = width * scale_factor
+    output_height = height * scale_factor
+    output_shape = (output_width, output_height, depth)
+    # start with black image
+    output_images = [np.zeros(output_shape, np.uint8) for _ in range(len(models))]
+    tile_padding = math.ceil(tile_size * tile_padding)
+    tile_size = math.ceil(tile_size / scale_factor)
+    tiles_x = math.ceil(width / tile_size)
+    tiles_y = math.ceil(height / tile_size)
+    for y in range(tiles_y):
+        for x in range(tiles_x):
+            # extract tile from input image
+            ofs_x = x * tile_size
+            ofs_y = y * tile_size
+            # input tile area on total image
+            input_start_x = ofs_x
+            input_end_x = min(ofs_x + tile_size, width)
+            input_start_y = ofs_y
+            input_end_y = min(ofs_y + tile_size, height)
+            # input tile area on total image with padding
+            input_start_x_pad = max(input_start_x - tile_padding, 0)
+            input_end_x_pad = min(input_end_x + tile_padding, width)
+            input_start_y_pad = max(input_start_y - tile_padding, 0)
+            input_end_y_pad = min(input_end_y + tile_padding, height)
+            # input tile dimensions
+            input_tile_width = input_end_x - input_start_x
+            input_tile_height = input_end_y - input_start_y
+            input_tile = input_image[input_start_x_pad:input_end_x_pad, input_start_y_pad:input_end_y_pad]
+            for idx, model in enumerate(models):
+                # upscale tile
+                output_tile = upscale_function(input_tile, model)
+                # output tile area on total image
+                output_start_x = input_start_x * scale_factor
+                output_end_x = input_end_x * scale_factor
+                output_start_y = input_start_y * scale_factor
+                output_end_y = input_end_y * scale_factor
+                # output tile area without padding
+                output_start_x_tile = (input_start_x - input_start_x_pad) * scale_factor
+                output_end_x_tile = output_start_x_tile + input_tile_width * scale_factor
+                output_start_y_tile = (input_start_y - input_start_y_pad) * scale_factor
+                output_end_y_tile = output_start_y_tile + input_tile_height * scale_factor
+                # put tile into output image
+                output_images[idx][output_start_x:output_end_x, output_start_y:output_end_y] = output_tile[
+                    output_start_x_tile:output_end_x_tile, output_start_y_tile:output_end_y_tile
+                ]
+    return output_images

invokeai/backend/model_manager/configs/lora.py CHANGED Viewed

@@ -227,6 +227,42 @@ class LoRA_LyCORIS_ZImage_Config(LoRA_LyCORIS_Config_Base, Config_Base):
     base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
+    @classmethod
+    def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
+        """Z-Image LoRAs have different key patterns than SD/SDXL LoRAs.
+        Z-Image LoRAs use keys like:
+        - diffusion_model.layers.X.attention.to_k.lora_down.weight (DoRA format)
+        - diffusion_model.layers.X.attention.to_k.lora_A.weight (PEFT format)
+        - diffusion_model.layers.X.attention.to_k.dora_scale (DoRA scale)
+        """
+        state_dict = mod.load_state_dict()
+        # Check for Z-Image specific LoRA patterns
+        has_z_image_lora_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "diffusion_model.layers.",  # Z-Image S3-DiT layer pattern
+            },
+        )
+        # Also check for LoRA weight suffixes (various formats)
+        has_lora_suffix = state_dict_has_any_keys_ending_with(
+            state_dict,
+            {
+                "lora_A.weight",
+                "lora_B.weight",
+                "lora_down.weight",
+                "lora_up.weight",
+                "dora_scale",
+            },
+        )
+        if has_z_image_lora_keys and has_lora_suffix:
+            return
+        raise NotAMatchError("model does not match Z-Image LoRA heuristics")
     @classmethod
     def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
         """Z-Image LoRAs are identified by their diffusion_model.layers structure.

invokeai/backend/model_manager/load/load_default.py CHANGED Viewed

@@ -75,6 +75,7 @@ class ModelLoader(ModelLoaderBase):
         config.path = str(self._get_model_path(config))
         self._ram_cache.make_room(self.get_size_fs(config, Path(config.path), submodel_type))
+        self._logger.info(f"Loading model '{stats_name}' into RAM cache..., config={config}")
         loaded_model = self._load_model(config, submodel_type)
         self._ram_cache.put(

invokeai/backend/model_manager/load/model_loaders/cogview4.py CHANGED Viewed

@@ -45,12 +45,13 @@ class CogView4DiffusersModel(GenericDiffusersLoader):
                 model_path,
                 torch_dtype=dtype,
                 variant=variant,
+                local_files_only=True,
             )
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = load_class.from_pretrained(model_path, torch_dtype=dtype)
+                result = load_class.from_pretrained(model_path, torch_dtype=dtype, local_files_only=True)
             else:
                 raise e

invokeai/backend/model_manager/load/model_loaders/flux.py CHANGED Viewed

@@ -122,9 +122,9 @@ class CLIPDiffusersLoader(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer:
-                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
+                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer", local_files_only=True)
             case SubModelType.TextEncoder:
-                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
+                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder", local_files_only=True)
         raise ValueError(
             f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
@@ -148,10 +148,12 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
             )
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
+                return T5TokenizerFast.from_pretrained(
+                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
+                )
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 te2_model_path = Path(config.path) / "text_encoder_2"
-                model_config = AutoConfig.from_pretrained(te2_model_path)
+                model_config = AutoConfig.from_pretrained(te2_model_path, local_files_only=True)
                 with accelerate.init_empty_weights():
                     model = AutoModelForTextEncoding.from_config(model_config)
                     model = quantize_model_llm_int8(model, modules_to_not_convert=set())
@@ -192,10 +194,15 @@ class T5EncoderCheckpointModel(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
+                return T5TokenizerFast.from_pretrained(
+                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
+                )
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 return T5EncoderModel.from_pretrained(
-                    Path(config.path) / "text_encoder_2", torch_dtype="auto", low_cpu_mem_usage=True
+                    Path(config.path) / "text_encoder_2",
+                    torch_dtype="auto",
+                    low_cpu_mem_usage=True,
+                    local_files_only=True,
                 )
         raise ValueError(

invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py CHANGED Viewed

@@ -37,12 +37,14 @@ class GenericDiffusersLoader(ModelLoader):
         repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
         variant = repo_variant.value if repo_variant else None
         try:
-            result: AnyModel = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, variant=variant)
+            result: AnyModel = model_class.from_pretrained(
+                model_path, torch_dtype=self._torch_dtype, variant=variant, local_files_only=True
+            )
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
+                result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
             else:
                 raise e
         return result

invokeai/backend/model_manager/load/model_loaders/onnx.py CHANGED Viewed

@@ -38,5 +38,6 @@ class OnnyxDiffusersModel(GenericDiffusersLoader):
             model_path,
             torch_dtype=self._torch_dtype,
             variant=variant,
+            local_files_only=True,
         )
         return result

invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py CHANGED Viewed

@@ -80,12 +80,13 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                 model_path,
                 torch_dtype=self._torch_dtype,
                 variant=variant,
+                local_files_only=True,
             )
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
+                result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
             else:
                 raise e
@@ -139,6 +140,7 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
         # Some weights of the model checkpoint were not used when initializing CLIPTextModelWithProjection:
         # ['text_model.embeddings.position_ids']
+        self._logger.info(f"Loading model from single file at {config.path} using {load_class.__name__}")
         with SilenceWarnings():
             pipeline = load_class.from_single_file(config.path, torch_dtype=self._torch_dtype)

invokeai/backend/model_manager/load/model_loaders/z_image.py CHANGED Viewed

@@ -384,15 +384,19 @@ class Qwen3EncoderLoader(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer:
-                return AutoTokenizer.from_pretrained(tokenizer_path)
+                # Use local_files_only=True to prevent network requests for validation
+                # The tokenizer files should already exist locally in the model directory
+                return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
             case SubModelType.TextEncoder:
                 # Determine safe dtype based on target device capabilities
                 target_device = TorchDevice.choose_torch_device()
                 model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
+                # Use local_files_only=True to prevent network requests for validation
                 return Qwen3ForCausalLM.from_pretrained(
                     text_encoder_path,
                     torch_dtype=model_dtype,
                     low_cpu_mem_usage=True,
+                    local_files_only=True,
                 )
         raise ValueError(
@@ -526,12 +530,27 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
                 return self._load_from_singlefile(config)
             case SubModelType.Tokenizer:
                 # For single-file Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
     def _load_from_singlefile(
         self,
         config: AnyModelConfig,
@@ -686,12 +705,27 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
                 return self._load_from_gguf(config)
             case SubModelType.Tokenizer:
                 # For GGUF Qwen3, load tokenizer from HuggingFace
-                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
+                # Try local cache first to support offline usage after initial download
+                return self._load_tokenizer_with_offline_fallback()
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
+    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
+        """Load tokenizer with local_files_only fallback for offline support.
+        First tries to load from local cache (offline), falling back to network download
+        if the tokenizer hasn't been cached yet. This ensures offline operation after
+        the initial download.
+        """
+        try:
+            # Try loading from local cache first (supports offline usage)
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
+        except OSError:
+            # Not in cache yet, download from HuggingFace
+            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
     def _load_from_gguf(
         self,
         config: AnyModelConfig,

invokeai/backend/model_manager/starter_models.py CHANGED Viewed

@@ -720,20 +720,20 @@ z_image_turbo_quantized = StarterModel(
     name="Z-Image Turbo (quantized)",
     base=BaseModelType.ZImage,
     source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q4_K.gguf",
-    description="Z-Image Turbo quantized to GGUF Q4_K format. Requires separate Qwen3 text encoder. ~4GB",
+    description="Z-Image Turbo quantized to GGUF Q4_K format. Requires standalone Qwen3 text encoder and Flux VAE. ~4GB",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
-    dependencies=[z_image_qwen3_encoder_quantized],
+    dependencies=[z_image_qwen3_encoder_quantized, flux_vae],
 )
 z_image_turbo_q8 = StarterModel(
     name="Z-Image Turbo (Q8)",
     base=BaseModelType.ZImage,
     source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q8_0.gguf",
-    description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires separate Qwen3 text encoder. ~6.6GB",
+    description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires standalone Qwen3 text encoder and Flux VAE. ~6.6GB",
     type=ModelType.Main,
     format=ModelFormat.GGUFQuantized,
-    dependencies=[z_image_qwen3_encoder_quantized],
+    dependencies=[z_image_qwen3_encoder_quantized, flux_vae],
 )
 z_image_controlnet_union = StarterModel(
@@ -890,10 +890,19 @@ flux_bundle: list[StarterModel] = [
     flux_krea_quantized,
 ]
+zimage_bundle: list[StarterModel] = [
+    z_image_turbo_quantized,
+    z_image_qwen3_encoder_quantized,
+    z_image_controlnet_union,
+    z_image_controlnet_tile,
+    flux_vae,
+]
 STARTER_BUNDLES: dict[str, StarterModelBundle] = {
     BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle),
     BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle),
     BaseModelType.Flux: StarterModelBundle(name="FLUX.1 dev", models=flux_bundle),
+    BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle),
 }
 assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models"

invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py CHANGED Viewed

@@ -140,16 +140,50 @@ def _get_lora_layer_values(layer_dict: dict[str, torch.Tensor], alpha: float | N
 def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
-    """Groups the keys in the state dict by layer."""
+    """Groups the keys in the state dict by layer.
+    Z-Image LoRAs have keys like:
+    - diffusion_model.layers.17.attention.to_k.alpha
+    - diffusion_model.layers.17.attention.to_k.dora_scale
+    - diffusion_model.layers.17.attention.to_k.lora_down.weight
+    - diffusion_model.layers.17.attention.to_k.lora_up.weight
+    We need to group these by the full layer path (e.g., diffusion_model.layers.17.attention.to_k)
+    and extract the suffix (alpha, dora_scale, lora_down.weight, lora_up.weight).
+    """
     layer_dict: dict[str, dict[str, torch.Tensor]] = {}
+    # Known suffixes that indicate the end of a layer name
+    known_suffixes = [
+        ".lora_A.weight",
+        ".lora_B.weight",
+        ".lora_down.weight",
+        ".lora_up.weight",
+        ".dora_scale",
+        ".alpha",
+    ]
     for key in state_dict:
         if not isinstance(key, str):
             continue
-        # Split the 'lora_A.weight' or 'lora_B.weight' suffix from the layer name.
-        parts = key.rsplit(".", maxsplit=2)
-        layer_name = parts[0]
-        key_name = ".".join(parts[1:])
+        # Try to find a known suffix
+        layer_name = None
+        key_name = None
+        for suffix in known_suffixes:
+            if key.endswith(suffix):
+                layer_name = key[: -len(suffix)]
+                key_name = suffix[1:]  # Remove leading dot
+                break
+        if layer_name is None:
+            # Fallback to original logic for unknown formats
+            parts = key.rsplit(".", maxsplit=2)
+            layer_name = parts[0]
+            key_name = ".".join(parts[1:])
         if layer_name not in layer_dict:
             layer_dict[layer_name] = {}
         layer_dict[layer_name][key_name] = state_dict[key]
     return layer_dict

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl