PyPI - InvokeAI - Versions diffs - 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

invokeai/backend/model_manager/configs/main.py CHANGED Viewed

@@ -23,6 +23,7 @@ from invokeai.backend.model_manager.configs.identification_utils import (
 from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
 from invokeai.backend.model_manager.taxonomy import (
     BaseModelType,
+    Flux2VariantType,
     FluxVariantType,
     ModelFormat,
     ModelType,
@@ -52,7 +53,11 @@ class MainModelDefaultSettings(BaseModel):
     model_config = ConfigDict(extra="forbid")
     @classmethod
-    def from_base(cls, base: BaseModelType) -> Self | None:
+    def from_base(
+        cls,
+        base: BaseModelType,
+        variant: Flux2VariantType | FluxVariantType | ModelVariantType | None = None,
+    ) -> Self | None:
         match base:
             case BaseModelType.StableDiffusion1:
                 return cls(width=512, height=512)
@@ -62,6 +67,14 @@ class MainModelDefaultSettings(BaseModel):
                 return cls(width=1024, height=1024)
             case BaseModelType.ZImage:
                 return cls(steps=9, cfg_scale=1.0, width=1024, height=1024)
+            case BaseModelType.Flux2:
+                # Different defaults based on variant
+                if variant == Flux2VariantType.Klein9BBase:
+                    # Undistilled base model needs more steps
+                    return cls(steps=28, cfg_scale=1.0, width=1024, height=1024)
+                else:
+                    # Distilled models (Klein 4B, Klein 9B) use fewer steps
+                    return cls(steps=4, cfg_scale=1.0, width=1024, height=1024)
             case _:
                 # TODO(psyche): Do we want defaults for other base types?
                 return None
@@ -114,7 +127,11 @@ def _has_main_keys(state_dict: dict[str | int, Any]) -> bool:
 def _has_z_image_keys(state_dict: dict[str | int, Any]) -> bool:
-    """Check if state dict contains Z-Image S3-DiT transformer keys."""
+    """Check if state dict contains Z-Image S3-DiT transformer keys.
+    This function returns True only for Z-Image main models, not LoRAs.
+    LoRAs are excluded by checking for LoRA-specific weight suffixes.
+    """
     # Z-Image specific keys that distinguish it from other models
     z_image_specific_keys = {
         "cap_embedder",  # Caption embedder - unique to Z-Image
@@ -122,9 +139,23 @@ def _has_z_image_keys(state_dict: dict[str | int, Any]) -> bool:
         "cap_pad_token",  # Caption padding token
     }
+    # LoRA-specific suffixes - if present, this is a LoRA not a main model
+    lora_suffixes = (
+        ".lora_down.weight",
+        ".lora_up.weight",
+        ".lora_A.weight",
+        ".lora_B.weight",
+        ".dora_scale",
+    )
     for key in state_dict.keys():
         if isinstance(key, int):
             continue
+        # If we find any LoRA-specific keys, this is not a main model
+        if key.endswith(lora_suffixes):
+            return False
         # Check for Z-Image specific key prefixes
         # Handle both direct keys (cap_embedder.0.weight) and
         # ComfyUI-style keys (model.diffusion_model.cap_embedder.0.weight)
@@ -132,6 +163,7 @@ def _has_z_image_keys(state_dict: dict[str | int, Any]) -> bool:
         for part in key_parts:
             if part in z_image_specific_keys:
                 return True
     return False
@@ -249,6 +281,108 @@ class Main_Checkpoint_SDXLRefiner_Config(Main_SD_Checkpoint_Config_Base, Config_
     base: Literal[BaseModelType.StableDiffusionXLRefiner] = Field(default=BaseModelType.StableDiffusionXLRefiner)
+def _is_flux2_model(state_dict: dict[str | int, Any]) -> bool:
+    """Check if state dict is a FLUX.2 model by examining context_embedder dimensions.
+    FLUX.2 Klein uses Qwen3 encoder with larger context dimension:
+    - FLUX.1: context_in_dim = 4096 (T5)
+    - FLUX.2 Klein 4B: context_in_dim = 7680 (3×Qwen3-4B hidden size)
+    - FLUX.2 Klein 8B: context_in_dim = 12288 (3×Qwen3-8B hidden size)
+    Also checks for FLUX.2-specific 32-channel latent space (in_channels=128 after packing).
+    """
+    # Check context_embedder input dimension (most reliable)
+    # Weight shape: [hidden_size, context_in_dim]
+    for key in {"context_embedder.weight", "model.diffusion_model.context_embedder.weight"}:
+        if key in state_dict:
+            weight = state_dict[key]
+            if hasattr(weight, "shape") and len(weight.shape) >= 2:
+                context_in_dim = weight.shape[1]
+                # FLUX.2 has context_in_dim > 4096 (Qwen3 vs T5)
+                if context_in_dim > 4096:
+                    return True
+    # Also check in_channels - FLUX.2 uses 128 (32 latent channels × 4 packing)
+    for key in {"img_in.weight", "model.diffusion_model.img_in.weight"}:
+        if key in state_dict:
+            in_channels = state_dict[key].shape[1]
+            # FLUX.2 uses 128 in_channels (32 latent channels × 4)
+            # FLUX.1 uses 64 in_channels (16 latent channels × 4)
+            if in_channels == 128:
+                return True
+    return False
+def _get_flux2_variant(state_dict: dict[str | int, Any]) -> Flux2VariantType | None:
+    """Determine FLUX.2 variant from state dict.
+    Distinguishes between Klein 4B and Klein 9B based on context embedding dimension:
+    - Klein 4B: context_in_dim = 7680 (3 × Qwen3-4B hidden_size 2560)
+    - Klein 9B: context_in_dim = 12288 (3 × Qwen3-8B hidden_size 4096)
+    Note: Klein 9B Base (undistilled) also has context_in_dim = 12288 but is rare.
+    We default to Klein9B (distilled) for all 9B models since GGUF models may not
+    include guidance embedding keys needed to distinguish them.
+    Supports both BFL format (checkpoint) and diffusers format keys:
+    - BFL format: txt_in.weight (context embedder)
+    - Diffusers format: context_embedder.weight
+    """
+    # Context dimensions for each variant
+    KLEIN_4B_CONTEXT_DIM = 7680  # 3 × 2560
+    KLEIN_9B_CONTEXT_DIM = 12288  # 3 × 4096
+    # Check context_embedder to determine variant
+    # Support both BFL format (txt_in.weight) and diffusers format (context_embedder.weight)
+    context_keys = {
+        # Diffusers format
+        "context_embedder.weight",
+        "model.diffusion_model.context_embedder.weight",
+        # BFL format (used by checkpoint/GGUF models)
+        "txt_in.weight",
+        "model.diffusion_model.txt_in.weight",
+    }
+    for key in context_keys:
+        if key in state_dict:
+            weight = state_dict[key]
+            # Handle GGUF quantized tensors which use tensor_shape instead of shape
+            if hasattr(weight, "tensor_shape"):
+                shape = weight.tensor_shape
+            elif hasattr(weight, "shape"):
+                shape = weight.shape
+            else:
+                continue
+            if len(shape) >= 2:
+                context_in_dim = shape[1]
+                # Determine variant based on context dimension
+                if context_in_dim == KLEIN_9B_CONTEXT_DIM:
+                    # Default to Klein9B (distilled) - the official/common 9B model
+                    return Flux2VariantType.Klein9B
+                elif context_in_dim == KLEIN_4B_CONTEXT_DIM:
+                    return Flux2VariantType.Klein4B
+                elif context_in_dim > 4096:
+                    # Unknown FLUX.2 variant, default to 4B
+                    return Flux2VariantType.Klein4B
+    # Check in_channels as backup - can only confirm it's FLUX.2, not which variant
+    for key in {"img_in.weight", "model.diffusion_model.img_in.weight"}:
+        if key in state_dict:
+            weight = state_dict[key]
+            # Handle GGUF quantized tensors
+            if hasattr(weight, "tensor_shape"):
+                in_channels = weight.tensor_shape[1]
+            elif hasattr(weight, "shape"):
+                in_channels = weight.shape[1]
+            else:
+                continue
+            if in_channels == 128:
+                # It's FLUX.2 but we can't determine which Klein variant, default to 4B
+                return Flux2VariantType.Klein4B
+    return None
 def _get_flux_variant(state_dict: dict[str | int, Any]) -> FluxVariantType | None:
     # FLUX Model variant types are distinguished by input channels and the presence of certain keys.
@@ -322,8 +456,9 @@ class Main_Checkpoint_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Conf
     @classmethod
     def _validate_is_flux(cls, mod: ModelOnDisk) -> None:
+        state_dict = mod.load_state_dict()
         if not state_dict_has_any_keys_exact(
-            mod.load_state_dict(),
+            state_dict,
             {
                 "double_blocks.0.img_attn.norm.key_norm.scale",
                 "model.diffusion_model.double_blocks.0.img_attn.norm.key_norm.scale",
@@ -331,6 +466,10 @@ class Main_Checkpoint_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Conf
         ):
             raise NotAMatchError("state dict does not look like a FLUX checkpoint")
+        # Exclude FLUX.2 models - they have their own config class
+        if _is_flux2_model(state_dict):
+            raise NotAMatchError("model is a FLUX.2 model, not FLUX.1")
     @classmethod
     def _get_variant_or_raise(cls, mod: ModelOnDisk) -> FluxVariantType:
         # FLUX Model variant types are distinguished by input channels and the presence of certain keys.
@@ -364,6 +503,68 @@ class Main_Checkpoint_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Conf
             raise NotAMatchError("state dict looks like GGUF quantized")
+class Main_Checkpoint_Flux2_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for FLUX.2 checkpoint models (e.g. Klein)."""
+    format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
+    base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
+    variant: Flux2VariantType = Field()
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_main_model(mod)
+        cls._validate_is_flux2(mod)
+        cls._validate_does_not_look_like_bnb_quantized(mod)
+        cls._validate_does_not_look_like_gguf_quantized(mod)
+        variant = override_fields.get("variant") or cls._get_variant_or_raise(mod)
+        return cls(**override_fields, variant=variant)
+    @classmethod
+    def _validate_is_flux2(cls, mod: ModelOnDisk) -> None:
+        """Validate that this is a FLUX.2 model, not FLUX.1."""
+        state_dict = mod.load_state_dict()
+        if not _is_flux2_model(state_dict):
+            raise NotAMatchError("state dict does not look like a FLUX.2 model")
+    @classmethod
+    def _get_variant_or_raise(cls, mod: ModelOnDisk) -> Flux2VariantType:
+        state_dict = mod.load_state_dict()
+        variant = _get_flux2_variant(state_dict)
+        if variant is None:
+            raise NotAMatchError("unable to determine FLUX.2 model variant from state dict")
+        return variant
+    @classmethod
+    def _validate_looks_like_main_model(cls, mod: ModelOnDisk) -> None:
+        has_main_model_keys = _has_main_keys(mod.load_state_dict())
+        if not has_main_model_keys:
+            raise NotAMatchError("state dict does not look like a main model")
+    @classmethod
+    def _validate_does_not_look_like_bnb_quantized(cls, mod: ModelOnDisk) -> None:
+        has_bnb_nf4_keys = _has_bnb_nf4_keys(mod.load_state_dict())
+        if has_bnb_nf4_keys:
+            raise NotAMatchError("state dict looks like bnb quantized nf4")
+    @classmethod
+    def _validate_does_not_look_like_gguf_quantized(cls, mod: ModelOnDisk):
+        has_ggml_tensors = _has_ggml_tensors(mod.load_state_dict())
+        if has_ggml_tensors:
+            raise NotAMatchError("state dict looks like GGUF quantized")
 class Main_BnBNF4_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base):
     """Model config for main checkpoint models."""
@@ -431,6 +632,8 @@ class Main_GGUF_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Bas
         cls._validate_looks_like_gguf_quantized(mod)
+        cls._validate_is_not_flux2(mod)
         variant = override_fields.get("variant") or cls._get_variant_or_raise(mod)
         return cls(**override_fields, variant=variant)
@@ -461,6 +664,195 @@ class Main_GGUF_FLUX_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Bas
         if not has_ggml_tensors:
             raise NotAMatchError("state dict does not look like GGUF quantized")
+    @classmethod
+    def _validate_is_not_flux2(cls, mod: ModelOnDisk) -> None:
+        """Validate that this is NOT a FLUX.2 model."""
+        state_dict = mod.load_state_dict()
+        if _is_flux2_model(state_dict):
+            raise NotAMatchError("model is a FLUX.2 model, not FLUX.1")
+class Main_GGUF_Flux2_Config(Checkpoint_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for GGUF-quantized FLUX.2 checkpoint models (e.g. Klein)."""
+    base: Literal[BaseModelType.Flux2] = Field(default=BaseModelType.Flux2)
+    format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
+    variant: Flux2VariantType = Field()
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_file(mod)
+        raise_for_override_fields(cls, override_fields)
+        cls._validate_looks_like_main_model(mod)
+        cls._validate_looks_like_gguf_quantized(mod)
+        cls._validate_is_flux2(mod)
+        variant = override_fields.get("variant") or cls._get_variant_or_raise(mod)
+        return cls(**override_fields, variant=variant)
+    @classmethod
+    def _validate_is_flux2(cls, mod: ModelOnDisk) -> None:
+        """Validate that this is a FLUX.2 model, not FLUX.1."""
+        state_dict = mod.load_state_dict()
+        if not _is_flux2_model(state_dict):
+            raise NotAMatchError("state dict does not look like a FLUX.2 model")
+    @classmethod
+    def _get_variant_or_raise(cls, mod: ModelOnDisk) -> Flux2VariantType:
+        state_dict = mod.load_state_dict()
+        variant = _get_flux2_variant(state_dict)
+        if variant is None:
+            raise NotAMatchError("unable to determine FLUX.2 model variant from state dict")
+        return variant
+    @classmethod
+    def _validate_looks_like_main_model(cls, mod: ModelOnDisk) -> None:
+        has_main_model_keys = _has_main_keys(mod.load_state_dict())
+        if not has_main_model_keys:
+            raise NotAMatchError("state dict does not look like a main model")
+    @classmethod
+    def _validate_looks_like_gguf_quantized(cls, mod: ModelOnDisk) -> None:
+        has_ggml_tensors = _has_ggml_tensors(mod.load_state_dict())
+        if not has_ggml_tensors:
+            raise NotAMatchError("state dict does not look like GGUF quantized")
+class Main_Diffusers_FLUX_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for FLUX.1 models in diffusers format."""
+    base: Literal[BaseModelType.Flux] = Field(BaseModelType.Flux)
+    variant: FluxVariantType = Field()
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_dir(mod)
+        raise_for_override_fields(cls, override_fields)
+        # Check for FLUX-specific pipeline or transformer class names
+        raise_for_class_name(
+            common_config_paths(mod.path),
+            {
+                "FluxPipeline",
+                "FluxFillPipeline",
+                "FluxTransformer2DModel",
+            },
+        )
+        variant = override_fields.get("variant") or cls._get_variant_or_raise(mod)
+        repo_variant = override_fields.get("repo_variant") or cls._get_repo_variant_or_raise(mod)
+        return cls(
+            **override_fields,
+            variant=variant,
+            repo_variant=repo_variant,
+        )
+    @classmethod
+    def _get_variant_or_raise(cls, mod: ModelOnDisk) -> FluxVariantType:
+        """Determine the FLUX variant from the transformer config.
+        FLUX variants are distinguished by:
+        - in_channels: 64 for Dev/Schnell, 384 for DevFill
+        - guidance_embeds: True for Dev, False for Schnell
+        """
+        transformer_config = get_config_dict_or_raise(mod.path / "transformer" / "config.json")
+        in_channels = transformer_config.get("in_channels", 64)
+        guidance_embeds = transformer_config.get("guidance_embeds", False)
+        # DevFill has 384 input channels
+        if in_channels == 384:
+            return FluxVariantType.DevFill
+        # Dev has guidance_embeds=True, Schnell has guidance_embeds=False
+        if guidance_embeds:
+            return FluxVariantType.Dev
+        else:
+            return FluxVariantType.Schnell
+class Main_Diffusers_Flux2_Config(Diffusers_Config_Base, Main_Config_Base, Config_Base):
+    """Model config for FLUX.2 models in diffusers format (e.g. FLUX.2 Klein)."""
+    base: Literal[BaseModelType.Flux2] = Field(BaseModelType.Flux2)
+    variant: Flux2VariantType = Field()
+    @classmethod
+    def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
+        raise_if_not_dir(mod)
+        raise_for_override_fields(cls, override_fields)
+        # Check for FLUX.2-specific pipeline class names
+        raise_for_class_name(
+            common_config_paths(mod.path),
+            {
+                "Flux2KleinPipeline",
+            },
+        )
+        variant = override_fields.get("variant") or cls._get_variant_or_raise(mod)
+        repo_variant = override_fields.get("repo_variant") or cls._get_repo_variant_or_raise(mod)
+        return cls(
+            **override_fields,
+            variant=variant,
+            repo_variant=repo_variant,
+        )
+    @classmethod
+    def _get_variant_or_raise(cls, mod: ModelOnDisk) -> Flux2VariantType:
+        """Determine the FLUX.2 variant from the transformer config.
+        FLUX.2 Klein uses Qwen3 text encoder with larger joint_attention_dim:
+        - Klein 4B: joint_attention_dim = 7680 (3×Qwen3-4B hidden size)
+        - Klein 9B/9B Base: joint_attention_dim = 12288 (3×Qwen3-8B hidden size)
+        To distinguish Klein 9B (distilled) from Klein 9B Base (undistilled),
+        we check guidance_embeds:
+        - Klein 9B (distilled): guidance_embeds = False (guidance is "baked in" during distillation)
+        - Klein 9B Base (undistilled): guidance_embeds = True (needs guidance at inference)
+        Note: The official BFL Klein 9B model is the distilled version with guidance_embeds=False.
+        """
+        KLEIN_4B_CONTEXT_DIM = 7680  # 3 × 2560
+        KLEIN_9B_CONTEXT_DIM = 12288  # 3 × 4096
+        transformer_config = get_config_dict_or_raise(mod.path / "transformer" / "config.json")
+        joint_attention_dim = transformer_config.get("joint_attention_dim", 4096)
+        guidance_embeds = transformer_config.get("guidance_embeds", False)
+        # Determine variant based on joint_attention_dim
+        if joint_attention_dim == KLEIN_9B_CONTEXT_DIM:
+            # Check guidance_embeds to distinguish distilled from undistilled
+            # Klein 9B (distilled): guidance_embeds = False (guidance is baked in)
+            # Klein 9B Base (undistilled): guidance_embeds = True (needs guidance)
+            if guidance_embeds:
+                return Flux2VariantType.Klein9BBase
+            else:
+                return Flux2VariantType.Klein9B
+        elif joint_attention_dim == KLEIN_4B_CONTEXT_DIM:
+            return Flux2VariantType.Klein4B
+        elif joint_attention_dim > 4096:
+            # Unknown FLUX.2 variant, default to 4B
+            return Flux2VariantType.Klein4B
+        # Default to 4B
+        return Flux2VariantType.Klein4B
 class Main_SD_Diffusers_Config_Base(Diffusers_Config_Base, Main_Config_Base):
     prediction_type: SchedulerPredictionType = Field()

invokeai/backend/model_manager/configs/qwen3_encoder.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from typing import Any, Literal, Self
+import json
+from typing import Any, Literal, Optional, Self
 from pydantic import Field
@@ -11,7 +12,7 @@ from invokeai.backend.model_manager.configs.identification_utils import (
     raise_if_not_file,
 )
 from invokeai.backend.model_manager.model_on_disk import ModelOnDisk
-from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType
+from invokeai.backend.model_manager.taxonomy import BaseModelType, ModelFormat, ModelType, Qwen3VariantType
 from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
@@ -45,12 +46,67 @@ def _has_ggml_tensors(state_dict: dict[str | int, Any]) -> bool:
     return any(isinstance(v, GGMLTensor) for v in state_dict.values())
+def _get_qwen3_variant_from_state_dict(state_dict: dict[str | int, Any]) -> Optional[Qwen3VariantType]:
+    """Determine Qwen3 variant (4B vs 8B) from state dict based on hidden_size.
+    The hidden_size can be determined from the embed_tokens.weight tensor shape:
+    - Qwen3 4B: hidden_size = 2560
+    - Qwen3 8B: hidden_size = 4096
+    For GGUF format, the key is 'token_embd.weight'.
+    For PyTorch format, the key is 'model.embed_tokens.weight'.
+    """
+    # Hidden size thresholds
+    QWEN3_4B_HIDDEN_SIZE = 2560
+    QWEN3_8B_HIDDEN_SIZE = 4096
+    # Try to find embed_tokens weight
+    embed_key = None
+    for key in state_dict.keys():
+        if isinstance(key, str):
+            if key == "model.embed_tokens.weight" or key == "token_embd.weight":
+                embed_key = key
+                break
+    if embed_key is None:
+        return None
+    tensor = state_dict[embed_key]
+    # Get hidden_size from tensor shape
+    # Shape is [vocab_size, hidden_size]
+    if isinstance(tensor, GGMLTensor):
+        # GGUF tensor
+        if hasattr(tensor, "shape") and len(tensor.shape) >= 2:
+            hidden_size = tensor.shape[1]
+        else:
+            return None
+    elif hasattr(tensor, "shape"):
+        # PyTorch tensor
+        if len(tensor.shape) >= 2:
+            hidden_size = tensor.shape[1]
+        else:
+            return None
+    else:
+        return None
+    # Determine variant based on hidden_size
+    if hidden_size == QWEN3_4B_HIDDEN_SIZE:
+        return Qwen3VariantType.Qwen3_4B
+    elif hidden_size == QWEN3_8B_HIDDEN_SIZE:
+        return Qwen3VariantType.Qwen3_8B
+    else:
+        # Unknown size, default to 4B (more common)
+        return Qwen3VariantType.Qwen3_4B
 class Qwen3Encoder_Checkpoint_Config(Checkpoint_Config_Base, Config_Base):
     """Configuration for single-file Qwen3 Encoder models (safetensors)."""
     base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
     type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
     format: Literal[ModelFormat.Checkpoint] = Field(default=ModelFormat.Checkpoint)
+    variant: Qwen3VariantType = Field(description="Qwen3 model size variant (4B or 8B)")
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -62,7 +118,17 @@ class Qwen3Encoder_Checkpoint_Config(Checkpoint_Config_Base, Config_Base):
         cls._validate_does_not_look_like_gguf_quantized(mod)
-        return cls(**override_fields)
+        # Determine variant from state dict
+        variant = cls._get_variant_or_default(mod)
+        return cls(variant=variant, **override_fields)
+    @classmethod
+    def _get_variant_or_default(cls, mod: ModelOnDisk) -> Qwen3VariantType:
+        """Get variant from state dict, defaulting to 4B if unknown."""
+        state_dict = mod.load_state_dict()
+        variant = _get_qwen3_variant_from_state_dict(state_dict)
+        return variant if variant is not None else Qwen3VariantType.Qwen3_4B
     @classmethod
     def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:
@@ -87,6 +153,7 @@ class Qwen3Encoder_Qwen3Encoder_Config(Config_Base):
     base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
     type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
     format: Literal[ModelFormat.Qwen3Encoder] = Field(default=ModelFormat.Qwen3Encoder)
+    variant: Qwen3VariantType = Field(description="Qwen3 model size variant (4B or 8B)")
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -94,6 +161,16 @@ class Qwen3Encoder_Qwen3Encoder_Config(Config_Base):
         raise_for_override_fields(cls, override_fields)
+        # Exclude full pipeline models - these should be matched as main models, not just Qwen3 encoders.
+        # Full pipelines have model_index.json at root (diffusers format) or a transformer subfolder.
+        model_index_path = mod.path / "model_index.json"
+        transformer_path = mod.path / "transformer"
+        if model_index_path.exists() or transformer_path.exists():
+            raise NotAMatchError(
+                "directory looks like a full diffusers pipeline (has model_index.json or transformer folder), "
+                "not a standalone Qwen3 encoder"
+            )
         # Check for text_encoder config - support both:
         # 1. Full model structure: model_root/text_encoder/config.json
         # 2. Standalone text_encoder download: model_root/config.json (when text_encoder subfolder is downloaded separately)
@@ -105,8 +182,6 @@ class Qwen3Encoder_Qwen3Encoder_Config(Config_Base):
         elif config_path_direct.exists():
             expected_config_path = config_path_direct
         else:
-            from invokeai.backend.model_manager.configs.identification_utils import NotAMatchError
             raise NotAMatchError(
                 f"unable to load config file(s): {{PosixPath('{config_path_nested}'): 'file does not exist'}}"
             )
@@ -121,7 +196,30 @@ class Qwen3Encoder_Qwen3Encoder_Config(Config_Base):
             },
         )
-        return cls(**override_fields)
+        # Determine variant from config.json hidden_size
+        variant = cls._get_variant_from_config(expected_config_path)
+        return cls(variant=variant, **override_fields)
+    @classmethod
+    def _get_variant_from_config(cls, config_path) -> Qwen3VariantType:
+        """Get variant from config.json based on hidden_size."""
+        QWEN3_4B_HIDDEN_SIZE = 2560
+        QWEN3_8B_HIDDEN_SIZE = 4096
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                config = json.load(f)
+            hidden_size = config.get("hidden_size")
+            if hidden_size == QWEN3_8B_HIDDEN_SIZE:
+                return Qwen3VariantType.Qwen3_8B
+            elif hidden_size == QWEN3_4B_HIDDEN_SIZE:
+                return Qwen3VariantType.Qwen3_4B
+            else:
+                # Default to 4B for unknown sizes
+                return Qwen3VariantType.Qwen3_4B
+        except (json.JSONDecodeError, OSError):
+            return Qwen3VariantType.Qwen3_4B
 class Qwen3Encoder_GGUF_Config(Checkpoint_Config_Base, Config_Base):
@@ -130,6 +228,7 @@ class Qwen3Encoder_GGUF_Config(Checkpoint_Config_Base, Config_Base):
     base: Literal[BaseModelType.Any] = Field(default=BaseModelType.Any)
     type: Literal[ModelType.Qwen3Encoder] = Field(default=ModelType.Qwen3Encoder)
     format: Literal[ModelFormat.GGUFQuantized] = Field(default=ModelFormat.GGUFQuantized)
+    variant: Qwen3VariantType = Field(description="Qwen3 model size variant (4B or 8B)")
     @classmethod
     def from_model_on_disk(cls, mod: ModelOnDisk, override_fields: dict[str, Any]) -> Self:
@@ -141,7 +240,17 @@ class Qwen3Encoder_GGUF_Config(Checkpoint_Config_Base, Config_Base):
         cls._validate_looks_like_gguf_quantized(mod)
-        return cls(**override_fields)
+        # Determine variant from state dict
+        variant = cls._get_variant_or_default(mod)
+        return cls(variant=variant, **override_fields)
+    @classmethod
+    def _get_variant_or_default(cls, mod: ModelOnDisk) -> Qwen3VariantType:
+        """Get variant from state dict, defaulting to 4B if unknown."""
+        state_dict = mod.load_state_dict()
+        variant = _get_qwen3_variant_from_state_dict(state_dict)
+        return variant if variant is not None else Qwen3VariantType.Qwen3_4B
     @classmethod
     def _validate_looks_like_qwen3_model(cls, mod: ModelOnDisk) -> None:

InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

InvokeAI 6.10.0rc2py3-none-any.whl → 6.11.0rc1py3-none-any.whl