PyPI - InvokeAI - Versions diffs - 6.9.0rc3__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl - Mend

InvokeAI 6.9.0rc3py3-none-any.whl → 6.10.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

invokeai/backend/model_manager/configs/lora.py CHANGED Viewed

@@ -150,11 +150,16 @@ class LoRA_LyCORIS_Config_Base(LoRA_Config_Base):
     @classmethod
     def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
-        # First rule out ControlLoRA and Diffusers LoRA
+        # First rule out ControlLoRA
         flux_format = _get_flux_lora_format(mod)
         if flux_format in [FluxLoRAFormat.Control]:
             raise NotAMatchError("model looks like Control LoRA")
+        # If it's a recognized Flux LoRA format (Kohya, Diffusers, OneTrainer, AIToolkit, XLabs, etc.),
+        # it's valid and we skip the heuristic check
+        if flux_format is not None:
+            return
         # Note: Existence of these key prefixes/suffixes does not guarantee that this is a LoRA.
         # Some main models have these keys, likely due to the creator merging in a LoRA.
         has_key_with_lora_prefix = state_dict_has_any_keys_starting_with(
@@ -217,6 +222,37 @@ class LoRA_LyCORIS_FLUX_Config(LoRA_LyCORIS_Config_Base, Config_Base):
     base: Literal[BaseModelType.Flux] = Field(default=BaseModelType.Flux)
+class LoRA_LyCORIS_ZImage_Config(LoRA_LyCORIS_Config_Base, Config_Base):
+    """Model config for Z-Image LoRA models in LyCORIS format."""
+    base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
+    @classmethod
+    def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
+        """Z-Image LoRAs are identified by their diffusion_model.layers structure.
+        Z-Image uses S3-DiT architecture with layer names like:
+        - diffusion_model.layers.0.attention.to_k.lora_A.weight
+        - diffusion_model.layers.0.feed_forward.w1.lora_A.weight
+        """
+        state_dict = mod.load_state_dict()
+        # Check for Z-Image transformer layer patterns
+        # Z-Image uses diffusion_model.layers.X structure (unlike Flux which uses double_blocks/single_blocks)
+        has_z_image_keys = state_dict_has_any_keys_starting_with(
+            state_dict,
+            {
+                "diffusion_model.layers.",  # Z-Image S3-DiT layer pattern
+            },
+        )
+        # If it looks like a Z-Image LoRA, return ZImage base
+        if has_z_image_keys:
+            return BaseModelType.ZImage
+        raise NotAMatchError("model does not look like a Z-Image LoRA")
 class ControlAdapter_Config_Base(ABC, BaseModel):
     default_settings: ControlAdapterDefaultSettings | None = Field(None)
@@ -320,3 +356,9 @@ class LoRA_Diffusers_SDXL_Config(LoRA_Diffusers_Config_Base, Config_Base):
 class LoRA_Diffusers_FLUX_Config(LoRA_Diffusers_Config_Base, Config_Base):
     base: Literal[BaseModelType.Flux] = Field(default=BaseModelType.Flux)
+class LoRA_Diffusers_ZImage_Config(LoRA_Diffusers_Config_Base, Config_Base):
+    """Model config for Z-Image LoRA models in Diffusers format."""
+    base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)

invokeai/backend/model_manager/configs/main.py CHANGED Viewed

@@ -60,6 +60,8 @@ class MainModelDefaultSettings(BaseModel):
                 return cls(width=768, height=768)
             case BaseModelType.StableDiffusionXL:
                 return cls(width=1024, height=1024)
+            case BaseModelType.ZImage:
+                return cls(steps=9, cfg_scale=1.0, width=1024, height=1024)
             case _:
                 # TODO(psyche): Do we want defaults for other base types?
                 return None
@@ -111,6 +113,28 @@ def _has_main_keys(state_dict: dict[str | int, Any]) -> bool:
     return False
+def _has_z_image_keys(state_dict: dict[str | int, Any]) -> bool:
+    """Check if state dict contains Z-Image S3-DiT transformer keys."""
+    # Z-Image specific keys that distinguish it from other models
+    z_image_specific_keys = {
+        "cap_embedder",  # Caption embedder - unique to Z-Image
+        "context_refiner",  # Context refiner blocks
+        "cap_pad_token",  # Caption padding token
+    }
+    for key in state_dict.keys():
+        if isinstance(key, int):
+            continue
+        # Check for Z-Image specific key prefixes
+        # Handle both direct keys (cap_embedder.0.weight) and
+        # ComfyUI-style keys (model.diffusion_model.cap_embedder.0.weight)
+        key_parts = key.split(".")
+        for part in key_parts:
+            if part in z_image_specific_keys:
+                return True
+    return False
 class Main_SD_Checkpoint_Config_Base(Checkpoint_Config_Base, Main_Config_Base):
     """Model config for main checkpoint models."""
@@ -657,3 +681,92 @@ class Main_Diffusers_CogView4_Config(Diffusers_Config_Base, Main_Config_Base, Co
             **override_fields,
             repo_variant=repo_variant,
         )
+class Main_Diffusers_ZImage_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for Z-Image diffusers models (Z-Image-Turbo, Z-Image-Base, Z-Image-Edit)."""
+    base: Literal[BaseModelType.ZImage] = Field(BaseModelType.ZImage)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_dir(mod)
+        raise_for_override_fields(cls, override_fields)
+        # This check implies the base type - no further validation needed.
+        raise_for_class_name(
+            common_config_paths(mod.path),
+            {
+                "ZImagePipeline",
+            },
+        )
+        repo_variant = override_fields.get("repo_variant") or cls._get_repo_variant_or_raise(mod)
+        return cls(
+            **override_fields,
+            repo_variant=repo_variant,
+        )
+class Main_Checkpoint_ZImage_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for Z-Image single-file checkpoint models (safetensors, etc)."""
+    base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
+    format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_z_image_model(mod)
+        cls._validate_does_not_look_like_gguf_quantized(mod)
+        return cls(**override_fields)
+    @classmethod
+    def _validate_looks_like_z_image_model(cls, mod: ModelOnDisk) -> None:
+        has_z_image_keys = _has_z_image_keys(mod.load_state_dict())
+        if not has_z_image_keys:
+            raise NotAMatchError("state dict does not look like a Z-Image model")
+    @classmethod
+    def _validate_does_not_look_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
+        has_ggml_tensors = _has_ggml_tensors(mod.load_state_dict())
+        if has_ggml_tensors:
+            raise NotAMatchError("state dict looks like GGUF quantized")
+class Main_GGUF_ZImage_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for GGUF-quantized Z-Image transformer models."""
+    base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
+    format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_z_image_model(mod)
+        cls._validate_looks_like_gguf_quantized(mod)
+        return cls(**override_fields)
+    @classmethod
+    def _validate_looks_like_z_image_model(cls, mod: ModelOnDisk) -> None:
+        has_z_image_keys = _has_z_image_keys(mod.load_state_dict())
+        if not has_z_image_keys:
+            raise NotAMatchError("state dict does not look like a Z-Image model")
+    @classmethod
+    def _validate_looks_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
+        has_ggml_tensors = _has_ggml_tensors(mod.load_state_dict())
+        if not has_ggml_tensors:
+            raise NotAMatchError("state dict does not look like GGUF quantized")

invokeai/backend/model_manager/configs/qwen3_encoder.py ADDED Viewed

@@ -0,0 +1,156 @@
+from typing import Any, Literal, Self
+from pydantic import Field
+from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Config_Base
+from invokeai.backend.model_manager.configs.identification_utils import (
+    NotAMatchError,
+    raise_for_class_name,
+    raise_for_override_fields,
+    raise_if_not_dir,
+    raise_if_not_file,
+)
+from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
+from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType
+from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
+def _has_qwen3_keys(state_dict: dict[str | int, Any]) -> bool:
+    """Check if state dict contains Qwen3 model keys.
+    Supports both:
+    - PyTorch/diffusers format: model.layers.0., model.embed_tokens.weight
+    - GGUF/llama.cpp format: blk.0., token_embd.weight
+    """
+    # PyTorch/diffusers format indicators
+    pytorch_indicators = ["model.layers.0.", "model.embed_tokens.weight"]
+    # GGUF/llama.cpp format indicators
+    gguf_indicators = ["blk.0.", "token_embd.weight"]
+    for key in state_dict.keys():
+        if isinstance(key, str):
+            # Check PyTorch format
+            for indicator in pytorch_indicators:
+                if key.startswith(indicator) or key == indicator:
+                    return True
+            # Check GGUF format
+            for indicator in gguf_indicators:
+                if key.startswith(indicator) or key == indicator:
+                    return True
+    return False
+def _has_ggml_tensors(state_dict: dict[str | int, Any]) -> bool:
+    """Check if state dict contains GGML tensors (GGUF quantized)."""
+    return any(isinstance(v, GGMLTensor) for v in state_dict.values())
+class Qwen3Encoder_Checkpoint_Config(Checkpoint_Config_Base, Config_Base):
+    """Configuration for single-file Qwen3 Encoder models (safetensors)."""
+    base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
+    type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
+    format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_qwen3_model(mod)
+        cls._validate_does_not_look_like_gguf_quantized(mod)
+        return cls(**override_fields)
+    @classmethod
+    def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:
+        has_qwen3_keys = _has_qwen3_keys(mod.load_state_dict())
+        if not has_qwen3_keys:
+            raise NotAMatchError("state dict does not look like a Qwen3 model")
+    @classmethod
+    def _validate_does_not_look_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
+        has_ggml = _has_ggml_tensors(mod.load_state_dict())
+        if has_ggml:
+            raise NotAMatchError("state dict looks like GGUF quantized")
+class Qwen3Encoder_Qwen3Encoder_Config(Config_Base):
+    """Configuration for Qwen3 Encoder models in a diffusers-like format.
+    The model weights are expected to be in a folder called text_encoder inside the model directory,
+    compatible with Qwen2VLForConditionalGeneration or similar architectures used by Z-Image.
+    """
+    base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
+    type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
+    format: Literal[ModelFormat.Qwen3Encoder] = Field(default=ModelFormat.Qwen3Encoder)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_dir(mod)
+        raise_for_override_fields(cls, override_fields)
+        # Check for text_encoder config - support both:
+        # 1. Full model structure: model_root/text_encoder/config.json
+        # 2. Standalone text_encoder download: model_root/config.json (when text_encoder subfolder is downloaded separately)
+        config_path_nested = mod.path / "text_encoder" / "config.json"
+        config_path_direct = mod.path / "config.json"
+        if config_path_nested.exists():
+            expected_config_path = config_path_nested
+        elif config_path_direct.exists():
+            expected_config_path = config_path_direct
+        else:
+            from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError
+            raise NotAMatchError(
+                f"unable to load config file(s): {{PosixPath('{config_path_nested}'): 'file does not exist'}}"
+            )
+        # Qwen3 uses Qwen2VLForConditionalGeneration or similar
+        raise_for_class_name(
+            expected_config_path,
+            {
+                "Qwen2VLForConditionalGeneration",
+                "Qwen2ForCausalLM",
+                "Qwen3ForCausalLM",
+            },
+        )
+        return cls(**override_fields)
+class Qwen3Encoder_GGUF_Config(Checkpoint_Config_Base, Config_Base):
+    """Configuration for GGUF-quantized Qwen3 Encoder models."""
+    base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
+    type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
+    format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_qwen3_model(mod)
+        cls._validate_looks_like_gguf_quantized(mod)
+        return cls(**override_fields)
+    @classmethod
+    def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:
+        has_qwen3_keys = _has_qwen3_keys(mod.load_state_dict())
+        if not has_qwen3_keys:
+            raise NotAMatchError("state dict does not look like a Qwen3 model")
+    @classmethod
+    def _validate_looks_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
+        has_ggml = _has_ggml_tensors(mod.load_state_dict())
+        if not has_ggml:
+            raise NotAMatchError("state dict does not look like GGUF quantized")

invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_diffusers_rms_norm.py ADDED Viewed

@@ -0,0 +1,40 @@
+import torch
+from diffusers.models.normalization import RMSNorm as DiffusersRMSNorm
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.cast_to_device import cast_to_device
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_module_mixin import (
+    CustomModuleMixin,
+)
+class CustomDiffusersRMSNorm(DiffusersRMSNorm, CustomModuleMixin):
+    """Custom wrapper for diffusers RMSNorm that supports device autocasting for partial model loading."""
+    def _autocast_forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        weight = cast_to_device(self.weight, hidden_states.device) if self.weight is not None else None
+        bias = cast_to_device(self.bias, hidden_states.device) if self.bias is not None else None
+        input_dtype = hidden_states.dtype
+        variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
+        if weight is not None:
+            # convert into half-precision if necessary
+            if weight.dtype in [torch.float16, torch.bfloat16]:
+                hidden_states = hidden_states.to(weight.dtype)
+            hidden_states = hidden_states * weight
+            if bias is not None:
+                hidden_states = hidden_states + bias
+        else:
+            hidden_states = hidden_states.to(input_dtype)
+        return hidden_states
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        if len(self._patches_and_weights) > 0:
+            raise RuntimeError("DiffusersRMSNorm layers do not support patches")
+        if self._device_autocasting_enabled:
+            return self._autocast_forward(hidden_states)
+        else:
+            return super().forward(hidden_states)

invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_layer_norm.py ADDED Viewed

@@ -0,0 +1,25 @@
+import torch
+import torch.nn.functional as F
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.cast_to_device import cast_to_device
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_module_mixin import (
+    CustomModuleMixin,
+)
+class CustomLayerNorm(torch.nn.LayerNorm, CustomModuleMixin):
+    """Custom wrapper for torch.nn.LayerNorm that supports device autocasting for partial model loading."""
+    def _autocast_forward(self, input: torch.Tensor) -> torch.Tensor:
+        weight = cast_to_device(self.weight, input.device) if self.weight is not None else None
+        bias = cast_to_device(self.bias, input.device) if self.bias is not None else None
+        return F.layer_norm(input, self.normalized_shape, weight, bias, self.eps)
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        if len(self._patches_and_weights) > 0:
+            raise RuntimeError("LayerNorm layers do not support patches")
+        if self._device_autocasting_enabled:
+            return self._autocast_forward(input)
+        else:
+            return super().forward(input)

invokeai/backend/model_manager/load/model_cache/torch_module_autocast/torch_module_autocast.py CHANGED Viewed

@@ -1,14 +1,18 @@
 from typing import TypeVar
 import torch
+from diffusers.models.normalization import RMSNorm as DiffusersRMSNorm
-from invokeai.backend.flux.modules.layers import RMSNorm
+from invokeai.backend.flux.modules.layers import RMSNorm as FluxRMSNorm
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_conv1d import (
     CustomConv1d,
 )
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_conv2d import (
     CustomConv2d,
 )
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_diffusers_rms_norm import (
+    CustomDiffusersRMSNorm,
+)
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_embedding import (
     CustomEmbedding,
 )
@@ -18,6 +22,9 @@ from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custo
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_group_norm import (
     CustomGroupNorm,
 )
+from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_layer_norm import (
+    CustomLayerNorm,
+)
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.custom_linear import (
     CustomLinear,
 )
@@ -31,7 +38,9 @@ AUTOCAST_MODULE_TYPE_MAPPING: dict[type[torch.nn.Module], type[torch.nn.Module]]
     torch.nn.Conv2d: CustomConv2d,
     torch.nn.GroupNorm: CustomGroupNorm,
     torch.nn.Embedding: CustomEmbedding,
-    RMSNorm: CustomFluxRMSNorm,
+    torch.nn.LayerNorm: CustomLayerNorm,
+    FluxRMSNorm: CustomFluxRMSNorm,
+    DiffusersRMSNorm: CustomDiffusersRMSNorm,
 }
 try:

invokeai/backend/model_manager/load/model_loaders/lora.py CHANGED Viewed

@@ -41,8 +41,13 @@ from invokeai.backend.patches.lora_conversions.flux_onetrainer_lora_conversion_u
     is_state_dict_likely_in_flux_onetrainer_format,
     lora_model_from_flux_onetrainer_state_dict,
 )
+from invokeai.backend.patches.lora_conversions.flux_xlabs_lora_conversion_utils import (
+    is_state_dict_likely_in_flux_xlabs_format,
+    lora_model_from_flux_xlabs_state_dict,
+)
 from invokeai.backend.patches.lora_conversions.sd_lora_conversion_utils import lora_model_from_sd_state_dict
 from invokeai.backend.patches.lora_conversions.sdxl_lora_conversion_utils import convert_sdxl_keys_to_diffusers_format
+from invokeai.backend.patches.lora_conversions.z_image_lora_conversion_utils import lora_model_from_z_image_state_dict
 @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.LoRA, format=ModelFormat.OMI)
@@ -117,6 +122,8 @@ class LoRALoader(ModelLoader):
                     model = lora_model_from_flux_control_state_dict(state_dict=state_dict)
                 elif is_state_dict_likely_in_flux_aitoolkit_format(state_dict=state_dict):
                     model = lora_model_from_flux_aitoolkit_state_dict(state_dict=state_dict)
+                elif is_state_dict_likely_in_flux_xlabs_format(state_dict=state_dict):
+                    model = lora_model_from_flux_xlabs_state_dict(state_dict=state_dict)
                 else:
                     raise ValueError("LoRA model is in unsupported FLUX format")
             else:
@@ -124,6 +131,10 @@ class LoRALoader(ModelLoader):
         elif self._model_base in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
             # Currently, we don't apply any conversions for SD1 and SD2 LoRA models.
             model = lora_model_from_sd_state_dict(state_dict=state_dict)
+        elif self._model_base == BaseModelType.ZImage:
+            # Z-Image LoRAs use diffusers PEFT format with transformer and/or Qwen3 encoder layers.
+            # We set alpha=None to use rank as alpha (common default).
+            model = lora_model_from_z_image_state_dict(state_dict=state_dict, alpha=None)
         else:
             raise ValueError(f"Unsupported LoRA base model: {self._model_base}")

InvokeAI 6.9.0rc3__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

InvokeAI 6.9.0rc3py3-none-any.whl → 6.10.0rc1py3-none-any.whl