PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

invokeai/app/api/routers/model_manager.py +43 -1
invokeai/app/invocations/fields.py +1 -1
invokeai/app/invocations/flux2_denoise.py +499 -0
invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
invokeai/app/invocations/flux2_vae_decode.py +106 -0
invokeai/app/invocations/flux2_vae_encode.py +88 -0
invokeai/app/invocations/flux_denoise.py +77 -3
invokeai/app/invocations/flux_lora_loader.py +1 -1
invokeai/app/invocations/flux_model_loader.py +2 -5
invokeai/app/invocations/ideal_size.py +6 -1
invokeai/app/invocations/metadata.py +4 -0
invokeai/app/invocations/metadata_linked.py +47 -0
invokeai/app/invocations/model.py +1 -0
invokeai/app/invocations/pbr_maps.py +59 -0
invokeai/app/invocations/z_image_denoise.py +244 -84
invokeai/app/invocations/z_image_image_to_latents.py +9 -1
invokeai/app/invocations/z_image_latents_to_image.py +9 -1
invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
invokeai/app/services/config/config_default.py +3 -1
invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
invokeai/app/services/model_manager/model_manager_default.py +7 -0
invokeai/app/services/model_records/model_records_base.py +4 -2
invokeai/app/services/shared/invocation_context.py +15 -0
invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
invokeai/app/util/step_callback.py +58 -2
invokeai/backend/flux/denoise.py +338 -118
invokeai/backend/flux/dype/__init__.py +31 -0
invokeai/backend/flux/dype/base.py +260 -0
invokeai/backend/flux/dype/embed.py +116 -0
invokeai/backend/flux/dype/presets.py +148 -0
invokeai/backend/flux/dype/rope.py +110 -0
invokeai/backend/flux/extensions/dype_extension.py +91 -0
invokeai/backend/flux/schedulers.py +62 -0
invokeai/backend/flux/util.py +35 -1
invokeai/backend/flux2/__init__.py +4 -0
invokeai/backend/flux2/denoise.py +280 -0
invokeai/backend/flux2/ref_image_extension.py +294 -0
invokeai/backend/flux2/sampling_utils.py +209 -0
invokeai/backend/image_util/pbr_maps/architecture/block.py +367 -0
invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +70 -0
invokeai/backend/image_util/pbr_maps/pbr_maps.py +141 -0
invokeai/backend/image_util/pbr_maps/utils/image_ops.py +93 -0
invokeai/backend/model_manager/configs/factory.py +19 -1
invokeai/backend/model_manager/configs/lora.py +36 -0
invokeai/backend/model_manager/configs/main.py +395 -3
invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
invokeai/backend/model_manager/configs/vae.py +104 -2
invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
invokeai/backend/model_manager/load/model_loaders/cogview4.py +2 -1
invokeai/backend/model_manager/load/model_loaders/flux.py +1020 -8
invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +4 -2
invokeai/backend/model_manager/load/model_loaders/onnx.py +1 -0
invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +2 -1
invokeai/backend/model_manager/load/model_loaders/z_image.py +158 -31
invokeai/backend/model_manager/starter_models.py +141 -4
invokeai/backend/model_manager/taxonomy.py +31 -4
invokeai/backend/model_manager/util/select_hf_files.py +3 -2
invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +39 -5
invokeai/backend/quantization/gguf/ggml_tensor.py +15 -4
invokeai/backend/util/vae_working_memory.py +0 -2
invokeai/backend/z_image/extensions/regional_prompting_extension.py +10 -12
invokeai/frontend/web/dist/assets/App-D13dX7be.js +161 -0
invokeai/frontend/web/dist/assets/{browser-ponyfill-DHZxq1nk.js → browser-ponyfill-u_ZjhQTI.js} +1 -1
invokeai/frontend/web/dist/assets/index-BB0nHmDe.js +530 -0
invokeai/frontend/web/dist/index.html +1 -1
invokeai/frontend/web/dist/locales/en-GB.json +1 -0
invokeai/frontend/web/dist/locales/en.json +85 -6
invokeai/frontend/web/dist/locales/it.json +135 -15
invokeai/frontend/web/dist/locales/ru.json +11 -11
invokeai/version/invokeai_version.py +1 -1
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/METADATA +8 -2
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/RECORD +81 -57
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/WHEEL +1 -1
invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +0 -161
invokeai/frontend/web/dist/assets/index-dgSJAY--.js +0 -530
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/entry_points.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
{invokeai-6.10.0rc1.dist-info → invokeai-6.11.0.dist-info}/top_level.txt +0 -0

invokeai/backend/model_manager/load/model_loaders/flux.py CHANGED Viewed

@@ -34,7 +34,7 @@ from invokeai.backend.flux.model import Flux
 from invokeai.backend.flux.modules.autoencoder import AutoEncoder
 from invokeai.backend.flux.redux.flux_redux_model import FluxReduxModel
 from invokeai.backend.flux.util import get_flux_ae_params, get_flux_transformers_params
-from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base
+from invokeai.backend.model_manager.configs.base import Checkpoint_Config_Base, Diffusers_Config_Base
 from invokeai.backend.model_manager.configs.clip_embed import CLIPEmbed_Diffusers_Config_Base
 from invokeai.backend.model_manager.configs.controlnet import (
     ControlNet_Checkpoint_Config_Base,
@@ -45,13 +45,16 @@ from invokeai.backend.model_manager.configs.flux_redux import FLUXRedux_Checkpoi
 from invokeai.backend.model_manager.configs.ip_adapter import IPAdapter_Checkpoint_Config_Base
 from invokeai.backend.model_manager.configs.main import (
     Main_BnBNF4_FLUX_Config,
+    Main_Checkpoint_Flux2_Config,
     Main_Checkpoint_FLUX_Config,
+    Main_GGUF_Flux2_Config,
     Main_GGUF_FLUX_Config,
 )
 from invokeai.backend.model_manager.configs.t5_encoder import T5Encoder_BnBLLMint8_Config, T5Encoder_T5Encoder_Config
-from invokeai.backend.model_manager.configs.vae import VAE_Checkpoint_Config_Base
+from invokeai.backend.model_manager.configs.vae import VAE_Checkpoint_Config_Base, VAE_Checkpoint_Flux2_Config
 from invokeai.backend.model_manager.load.load_default import ModelLoader
 from invokeai.backend.model_manager.load.model_loader_registry import ModelLoaderRegistry
+from invokeai.backend.model_manager.load.model_loaders.generic_diffusers import GenericDiffusersLoader
 from invokeai.backend.model_manager.taxonomy import (
     AnyModel,
     BaseModelType,
@@ -108,6 +111,264 @@ class FluxVAELoader(ModelLoader):
         return model
+@ModelLoaderRegistry.register(base=BaseModelType.Flux2, type=ModelType.VAE, format=ModelFormat.Diffusers)
+class Flux2VAEDiffusersLoader(ModelLoader):
+    """Class to load FLUX.2 VAE models in diffusers format (AutoencoderKLFlux2 with 32 latent channels)."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        from diffusers import AutoencoderKLFlux2
+        model_path = Path(config.path)
+        # VAE is broken in float16, which mps defaults to
+        if self._torch_dtype == torch.float16:
+            try:
+                vae_dtype = torch.tensor([1.0], dtype=torch.bfloat16, device=self._torch_device).dtype
+            except TypeError:
+                vae_dtype = torch.float32
+        else:
+            vae_dtype = self._torch_dtype
+        model = AutoencoderKLFlux2.from_pretrained(
+            model_path,
+            torch_dtype=vae_dtype,
+            local_files_only=True,
+        )
+        return model
+@ModelLoaderRegistry.register(base=BaseModelType.Flux2, type=ModelType.VAE, format=ModelFormat.Checkpoint)
+class Flux2VAELoader(ModelLoader):
+    """Class to load FLUX.2 VAE models (AutoencoderKLFlux2 with 32 latent channels)."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if not isinstance(config, VAE_Checkpoint_Flux2_Config):
+            raise ValueError("Only VAE_Checkpoint_Flux2_Config models are currently supported here.")
+        from diffusers import AutoencoderKLFlux2
+        model_path = Path(config.path)
+        # Load state dict manually since from_single_file may not support AutoencoderKLFlux2 yet
+        sd = load_file(model_path)
+        # Convert BFL format to diffusers format if needed
+        # BFL format uses: encoder.down., decoder.up., decoder.mid.block_1, decoder.mid.attn_1, decoder.norm_out
+        # Diffusers uses: encoder.down_blocks., decoder.up_blocks., decoder.mid_block.resnets., decoder.conv_norm_out
+        is_bfl_format = any(
+            k.startswith("encoder.down.")
+            or k.startswith("decoder.up.")
+            or k.startswith("decoder.mid.block_")
+            or k.startswith("decoder.mid.attn_")
+            or k.startswith("decoder.norm_out")
+            or k.startswith("encoder.mid.block_")
+            or k.startswith("encoder.mid.attn_")
+            or k.startswith("encoder.norm_out")
+            for k in sd.keys()
+        )
+        if is_bfl_format:
+            sd = self._convert_flux2_vae_bfl_to_diffusers(sd)
+        # FLUX.2 VAE configuration (32 latent channels)
+        # Based on the official FLUX.2 VAE architecture
+        # Use default config - AutoencoderKLFlux2 has built-in defaults
+        with SilenceWarnings():
+            with accelerate.init_empty_weights():
+                model = AutoencoderKLFlux2()
+        # Convert to bfloat16 and load
+        for k in sd.keys():
+            sd[k] = sd[k].to(torch.bfloat16)
+        model.load_state_dict(sd, assign=True)
+        # VAE is broken in float16, which mps defaults to
+        if self._torch_dtype == torch.float16:
+            try:
+                vae_dtype = torch.tensor([1.0], dtype=torch.bfloat16, device=self._torch_device).dtype
+            except TypeError:
+                vae_dtype = torch.float32
+        else:
+            vae_dtype = self._torch_dtype
+        model.to(vae_dtype)
+        return model
+    def _convert_flux2_vae_bfl_to_diffusers(self, sd: dict) -> dict:
+        """Convert FLUX.2 VAE BFL format state dict to diffusers format.
+        Key differences:
+        - encoder.down.X.block.Y -> encoder.down_blocks.X.resnets.Y
+        - encoder.down.X.downsample.conv -> encoder.down_blocks.X.downsamplers.0.conv
+        - encoder.mid.block_1/2 -> encoder.mid_block.resnets.0/1
+        - encoder.mid.attn_1.q/k/v -> encoder.mid_block.attentions.0.to_q/k/v
+        - encoder.norm_out -> encoder.conv_norm_out
+        - encoder.quant_conv -> quant_conv (top-level)
+        - decoder.up.X -> decoder.up_blocks.(num_blocks-1-X) (reversed order!)
+        - decoder.post_quant_conv -> post_quant_conv (top-level)
+        - *.nin_shortcut -> *.conv_shortcut
+        """
+        import re
+        converted = {}
+        num_up_blocks = 4  # Standard VAE has 4 up blocks
+        for old_key, tensor in sd.items():
+            new_key = old_key
+            # Encoder down blocks: encoder.down.X.block.Y -> encoder.down_blocks.X.resnets.Y
+            match = re.match(r"encoder\.down\.(\d+)\.block\.(\d+)\.(.*)", old_key)
+            if match:
+                block_idx, resnet_idx, rest = match.groups()
+                rest = rest.replace("nin_shortcut", "conv_shortcut")
+                new_key = f"encoder.down_blocks.{block_idx}.resnets.{resnet_idx}.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Encoder downsamplers: encoder.down.X.downsample.conv -> encoder.down_blocks.X.downsamplers.0.conv
+            match = re.match(r"encoder\.down\.(\d+)\.downsample\.conv\.(.*)", old_key)
+            if match:
+                block_idx, rest = match.groups()
+                new_key = f"encoder.down_blocks.{block_idx}.downsamplers.0.conv.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Encoder mid block resnets: encoder.mid.block_1/2 -> encoder.mid_block.resnets.0/1
+            match = re.match(r"encoder\.mid\.block_(\d+)\.(.*)", old_key)
+            if match:
+                block_num, rest = match.groups()
+                resnet_idx = int(block_num) - 1  # block_1 -> resnets.0, block_2 -> resnets.1
+                new_key = f"encoder.mid_block.resnets.{resnet_idx}.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Encoder mid block attention: encoder.mid.attn_1.* -> encoder.mid_block.attentions.0.*
+            match = re.match(r"encoder\.mid\.attn_1\.(.*)", old_key)
+            if match:
+                rest = match.group(1)
+                # Map attention keys
+                # BFL uses Conv2d (shape [out, in, 1, 1]), diffusers uses Linear (shape [out, in])
+                # Squeeze the extra dimensions for weight tensors
+                if rest.startswith("q."):
+                    new_key = f"encoder.mid_block.attentions.0.to_q.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("k."):
+                    new_key = f"encoder.mid_block.attentions.0.to_k.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("v."):
+                    new_key = f"encoder.mid_block.attentions.0.to_v.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("proj_out."):
+                    new_key = f"encoder.mid_block.attentions.0.to_out.0.{rest[9:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("norm."):
+                    new_key = f"encoder.mid_block.attentions.0.group_norm.{rest[5:]}"
+                else:
+                    new_key = f"encoder.mid_block.attentions.0.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Encoder norm_out -> conv_norm_out
+            if old_key.startswith("encoder.norm_out."):
+                new_key = old_key.replace("encoder.norm_out.", "encoder.conv_norm_out.")
+                converted[new_key] = tensor
+                continue
+            # Encoder quant_conv -> quant_conv (move to top level)
+            if old_key.startswith("encoder.quant_conv."):
+                new_key = old_key.replace("encoder.quant_conv.", "quant_conv.")
+                converted[new_key] = tensor
+                continue
+            # Decoder up blocks (reversed order!): decoder.up.X -> decoder.up_blocks.(num_blocks-1-X)
+            match = re.match(r"decoder\.up\.(\d+)\.block\.(\d+)\.(.*)", old_key)
+            if match:
+                block_idx, resnet_idx, rest = match.groups()
+                # Reverse the block index
+                new_block_idx = num_up_blocks - 1 - int(block_idx)
+                rest = rest.replace("nin_shortcut", "conv_shortcut")
+                new_key = f"decoder.up_blocks.{new_block_idx}.resnets.{resnet_idx}.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Decoder upsamplers (reversed order!)
+            match = re.match(r"decoder\.up\.(\d+)\.upsample\.conv\.(.*)", old_key)
+            if match:
+                block_idx, rest = match.groups()
+                new_block_idx = num_up_blocks - 1 - int(block_idx)
+                new_key = f"decoder.up_blocks.{new_block_idx}.upsamplers.0.conv.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Decoder mid block resnets: decoder.mid.block_1/2 -> decoder.mid_block.resnets.0/1
+            match = re.match(r"decoder\.mid\.block_(\d+)\.(.*)", old_key)
+            if match:
+                block_num, rest = match.groups()
+                resnet_idx = int(block_num) - 1
+                new_key = f"decoder.mid_block.resnets.{resnet_idx}.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Decoder mid block attention: decoder.mid.attn_1.* -> decoder.mid_block.attentions.0.*
+            match = re.match(r"decoder\.mid\.attn_1\.(.*)", old_key)
+            if match:
+                rest = match.group(1)
+                # BFL uses Conv2d (shape [out, in, 1, 1]), diffusers uses Linear (shape [out, in])
+                # Squeeze the extra dimensions for weight tensors
+                if rest.startswith("q."):
+                    new_key = f"decoder.mid_block.attentions.0.to_q.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("k."):
+                    new_key = f"decoder.mid_block.attentions.0.to_k.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("v."):
+                    new_key = f"decoder.mid_block.attentions.0.to_v.{rest[2:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("proj_out."):
+                    new_key = f"decoder.mid_block.attentions.0.to_out.0.{rest[9:]}"
+                    if rest.endswith(".weight") and tensor.dim() == 4:
+                        tensor = tensor.squeeze(-1).squeeze(-1)
+                elif rest.startswith("norm."):
+                    new_key = f"decoder.mid_block.attentions.0.group_norm.{rest[5:]}"
+                else:
+                    new_key = f"decoder.mid_block.attentions.0.{rest}"
+                converted[new_key] = tensor
+                continue
+            # Decoder norm_out -> conv_norm_out
+            if old_key.startswith("decoder.norm_out."):
+                new_key = old_key.replace("decoder.norm_out.", "decoder.conv_norm_out.")
+                converted[new_key] = tensor
+                continue
+            # Decoder post_quant_conv -> post_quant_conv (move to top level)
+            if old_key.startswith("decoder.post_quant_conv."):
+                new_key = old_key.replace("decoder.post_quant_conv.", "post_quant_conv.")
+                converted[new_key] = tensor
+                continue
+            # Keep other keys as-is (like encoder.conv_in, decoder.conv_in, decoder.conv_out, bn.*)
+            converted[new_key] = tensor
+        return converted
 @ModelLoaderRegistry.register(base=BaseModelType.Any, type=ModelType.CLIPEmbed, format=ModelFormat.Diffusers)
 class CLIPDiffusersLoader(ModelLoader):
     """Class to load main models."""
@@ -122,9 +383,9 @@ class CLIPDiffusersLoader(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer:
-                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
+                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer", local_files_only=True)
             case SubModelType.TextEncoder:
-                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
+                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder", local_files_only=True)
         raise ValueError(
             f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
@@ -148,10 +409,12 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
             )
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
+                return T5TokenizerFast.from_pretrained(
+                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
+                )
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 te2_model_path = Path(config.path) / "text_encoder_2"
-                model_config = AutoConfig.from_pretrained(te2_model_path)
+                model_config = AutoConfig.from_pretrained(te2_model_path, local_files_only=True)
                 with accelerate.init_empty_weights():
                     model = AutoModelForTextEncoding.from_config(model_config)
                     model = quantize_model_llm_int8(model, modules_to_not_convert=set())
@@ -192,10 +455,15 @@ class T5EncoderCheckpointModel(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
+                return T5TokenizerFast.from_pretrained(
+                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
+                )
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 return T5EncoderModel.from_pretrained(
-                    Path(config.path) / "text_encoder_2", torch_dtype="auto", low_cpu_mem_usage=True
+                    Path(config.path) / "text_encoder_2",
+                    torch_dtype="auto",
+                    low_cpu_mem_usage=True,
+                    local_files_only=True,
                 )
         raise ValueError(
@@ -333,6 +601,750 @@ class FluxBnbQuantizednf4bCheckpointModel(ModelLoader):
         return model
+@ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.Main, format=ModelFormat.Diffusers)
+class FluxDiffusersModel(GenericDiffusersLoader):
+    """Class to load FLUX.1 main models in diffusers format."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if isinstance(config, Checkpoint_Config_Base):
+            raise NotImplementedError("CheckpointConfigBase is not implemented for FLUX diffusers models.")
+        if submodel_type is None:
+            raise Exception("A submodel type must be provided when loading main pipelines.")
+        model_path = Path(config.path)
+        load_class = self.get_hf_load_class(model_path, submodel_type)
+        repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
+        variant = repo_variant.value if repo_variant else None
+        model_path = model_path / submodel_type.value
+        # We force bfloat16 for FLUX models. This is required for correct inference.
+        dtype = torch.bfloat16
+        try:
+            result: AnyModel = load_class.from_pretrained(
+                model_path,
+                torch_dtype=dtype,
+                variant=variant,
+                local_files_only=True,
+            )
+        except OSError as e:
+            if variant and "no file named" in str(
+                e
+            ):  # try without the variant, just in case user's preferences changed
+                result = load_class.from_pretrained(model_path, torch_dtype=dtype, local_files_only=True)
+            else:
+                raise e
+        return result
+@ModelLoaderRegistry.register(base=BaseModelType.Flux2, type=ModelType.Main, format=ModelFormat.Diffusers)
+class Flux2DiffusersModel(GenericDiffusersLoader):
+    """Class to load FLUX.2 main models in diffusers format (e.g. FLUX.2 Klein)."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if isinstance(config, Checkpoint_Config_Base):
+            raise NotImplementedError("CheckpointConfigBase is not implemented for FLUX.2 diffusers models.")
+        if submodel_type is None:
+            raise Exception("A submodel type must be provided when loading main pipelines.")
+        model_path = Path(config.path)
+        load_class = self.get_hf_load_class(model_path, submodel_type)
+        repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
+        variant = repo_variant.value if repo_variant else None
+        model_path = model_path / submodel_type.value
+        # We force bfloat16 for FLUX.2 models. This is required for correct inference.
+        # We use low_cpu_mem_usage=False to avoid meta tensors for weights not in checkpoint.
+        # FLUX.2 Klein models may have guidance_embeds=False, so the guidance_embed layers
+        # won't be in the checkpoint but the model class still creates them.
+        # We use SilenceWarnings to suppress the "guidance_embeds is not expected" warning
+        # from diffusers Flux2Transformer2DModel.
+        dtype = torch.bfloat16
+        with SilenceWarnings():
+            try:
+                result: AnyModel = load_class.from_pretrained(
+                    model_path,
+                    torch_dtype=dtype,
+                    variant=variant,
+                    local_files_only=True,
+                    low_cpu_mem_usage=False,
+                )
+            except OSError as e:
+                if variant and "no file named" in str(
+                    e
+                ):  # try without the variant, just in case user's preferences changed
+                    result = load_class.from_pretrained(
+                        model_path,
+                        torch_dtype=dtype,
+                        local_files_only=True,
+                        low_cpu_mem_usage=False,
+                    )
+                else:
+                    raise e
+        # For Klein models without guidance_embeds, zero out the guidance_embedder weights
+        # that were randomly initialized by diffusers. This prevents noise from affecting
+        # the time embeddings.
+        if submodel_type == SubModelType.Transformer and hasattr(result, "time_guidance_embed"):
+            # Check if this is a Klein model without guidance (guidance_embeds=False in config)
+            transformer_config_path = model_path / "config.json"
+            if transformer_config_path.exists():
+                import json
+                with open(transformer_config_path, "r") as f:
+                    transformer_config = json.load(f)
+                if not transformer_config.get("guidance_embeds", True):
+                    # Zero out the guidance embedder weights
+                    guidance_emb = result.time_guidance_embed.guidance_embedder
+                    if hasattr(guidance_emb, "linear_1"):
+                        guidance_emb.linear_1.weight.data.zero_()
+                        if guidance_emb.linear_1.bias is not None:
+                            guidance_emb.linear_1.bias.data.zero_()
+                    if hasattr(guidance_emb, "linear_2"):
+                        guidance_emb.linear_2.weight.data.zero_()
+                        if guidance_emb.linear_2.bias is not None:
+                            guidance_emb.linear_2.bias.data.zero_()
+        return result
+@ModelLoaderRegistry.register(base=BaseModelType.Flux2, type=ModelType.Main, format=ModelFormat.Checkpoint)
+class Flux2CheckpointModel(ModelLoader):
+    """Class to load FLUX.2 transformer models from single-file checkpoints (safetensors)."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if not isinstance(config, Checkpoint_Config_Base):
+            raise ValueError("Only CheckpointConfigBase models are currently supported here.")
+        match submodel_type:
+            case SubModelType.Transformer:
+                return self._load_from_singlefile(config)
+        raise ValueError(
+            f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
+        )
+    def _load_from_singlefile(
+        self,
+        config: AnyModelConfig,
+    ) -> AnyModel:
+        from diffusers import Flux2Transformer2DModel
+        if not isinstance(config, Main_Checkpoint_Flux2_Config):
+            raise TypeError(
+                f"Expected Main_Checkpoint_Flux2_Config, got {type(config).__name__}. "
+                "Model configuration type mismatch."
+            )
+        model_path = Path(config.path)
+        # Load state dict
+        sd = load_file(model_path)
+        # Handle FP8 quantized weights (ComfyUI-style or scaled FP8)
+        # These store weights as: layer.weight (FP8) + layer.weight_scale (FP32 scalar)
+        sd = self._dequantize_fp8_weights(sd)
+        # Check if keys have ComfyUI-style prefix and strip if needed
+        prefix_to_strip = None
+        for prefix in ["model.diffusion_model.", "diffusion_model."]:
+            if any(k.startswith(prefix) for k in sd.keys() if isinstance(k, str)):
+                prefix_to_strip = prefix
+                break
+        if prefix_to_strip:
+            sd = {
+                (k[len(prefix_to_strip) :] if isinstance(k, str) and k.startswith(prefix_to_strip) else k): v
+                for k, v in sd.items()
+            }
+        # Convert BFL format state dict to diffusers format
+        converted_sd = self._convert_flux2_bfl_to_diffusers(sd)
+        # Detect architecture from checkpoint keys
+        double_block_indices = [
+            int(k.split(".")[1])
+            for k in converted_sd.keys()
+            if isinstance(k, str) and k.startswith("transformer_blocks.")
+        ]
+        single_block_indices = [
+            int(k.split(".")[1])
+            for k in converted_sd.keys()
+            if isinstance(k, str) and k.startswith("single_transformer_blocks.")
+        ]
+        num_layers = max(double_block_indices) + 1 if double_block_indices else 5
+        num_single_layers = max(single_block_indices) + 1 if single_block_indices else 20
+        # Get dimensions from weights
+        # context_embedder.weight shape: [hidden_size, joint_attention_dim]
+        context_embedder_weight = converted_sd.get("context_embedder.weight")
+        if context_embedder_weight is not None:
+            hidden_size = context_embedder_weight.shape[0]
+            joint_attention_dim = context_embedder_weight.shape[1]
+        else:
+            # Default to Klein 4B dimensions
+            hidden_size = 3072
+            joint_attention_dim = 7680
+        x_embedder_weight = converted_sd.get("x_embedder.weight")
+        if x_embedder_weight is not None:
+            in_channels = x_embedder_weight.shape[1]
+        else:
+            in_channels = 128
+        # Calculate num_attention_heads from hidden_size
+        # Klein 4B: hidden_size=3072, num_attention_heads=24 (3072/128=24)
+        # Klein 9B: hidden_size=4096, num_attention_heads=32 (4096/128=32)
+        attention_head_dim = 128
+        num_attention_heads = hidden_size // attention_head_dim
+        # Klein models don't have guidance embeddings - check if they're in the checkpoint
+        has_guidance = "time_guidance_embed.guidance_embedder.linear_1.weight" in converted_sd
+        # Create model with detected configuration
+        with SilenceWarnings():
+            with accelerate.init_empty_weights():
+                model = Flux2Transformer2DModel(
+                    in_channels=in_channels,
+                    out_channels=in_channels,
+                    num_layers=num_layers,
+                    num_single_layers=num_single_layers,
+                    attention_head_dim=attention_head_dim,
+                    num_attention_heads=num_attention_heads,
+                    joint_attention_dim=joint_attention_dim,
+                    patch_size=1,
+                )
+        # If Klein model without guidance, initialize guidance embedder with zeros
+        if not has_guidance:
+            # Get the expected dimensions from timestep embedder (they should match)
+            timestep_linear1 = converted_sd.get("time_guidance_embed.timestep_embedder.linear_1.weight")
+            if timestep_linear1 is not None:
+                in_features = timestep_linear1.shape[1]
+                out_features = timestep_linear1.shape[0]
+                # Initialize guidance embedder with same shape as timestep embedder
+                converted_sd["time_guidance_embed.guidance_embedder.linear_1.weight"] = torch.zeros(
+                    out_features, in_features, dtype=torch.bfloat16
+                )
+                timestep_linear2 = converted_sd.get("time_guidance_embed.timestep_embedder.linear_2.weight")
+                if timestep_linear2 is not None:
+                    in_features2 = timestep_linear2.shape[1]
+                    out_features2 = timestep_linear2.shape[0]
+                    converted_sd["time_guidance_embed.guidance_embedder.linear_2.weight"] = torch.zeros(
+                        out_features2, in_features2, dtype=torch.bfloat16
+                    )
+        # Convert to bfloat16 and load
+        for k in converted_sd.keys():
+            converted_sd[k] = converted_sd[k].to(torch.bfloat16)
+        # Load the state dict - guidance weights were already initialized above if missing
+        model.load_state_dict(converted_sd, assign=True)
+        return model
+    def _convert_flux2_bfl_to_diffusers(self, sd: dict) -> dict:
+        """Convert FLUX.2 BFL format state dict to diffusers format.
+        Based on diffusers convert_flux2_to_diffusers.py key mappings.
+        """
+        converted = {}
+        # Basic key renames
+        key_renames = {
+            "img_in.weight": "x_embedder.weight",
+            "txt_in.weight": "context_embedder.weight",
+            "time_in.in_layer.weight": "time_guidance_embed.timestep_embedder.linear_1.weight",
+            "time_in.out_layer.weight": "time_guidance_embed.timestep_embedder.linear_2.weight",
+            "guidance_in.in_layer.weight": "time_guidance_embed.guidance_embedder.linear_1.weight",
+            "guidance_in.out_layer.weight": "time_guidance_embed.guidance_embedder.linear_2.weight",
+            "double_stream_modulation_img.lin.weight": "double_stream_modulation_img.linear.weight",
+            "double_stream_modulation_txt.lin.weight": "double_stream_modulation_txt.linear.weight",
+            "single_stream_modulation.lin.weight": "single_stream_modulation.linear.weight",
+            "final_layer.linear.weight": "proj_out.weight",
+            "final_layer.adaLN_modulation.1.weight": "norm_out.linear.weight",
+        }
+        for old_key, tensor in sd.items():
+            new_key = old_key
+            # Apply basic renames
+            if old_key in key_renames:
+                new_key = key_renames[old_key]
+                # Apply scale-shift swap for adaLN modulation weights
+                # BFL and diffusers use different parameter ordering for AdaLayerNorm
+                if old_key == "final_layer.adaLN_modulation.1.weight":
+                    tensor = self._swap_scale_shift(tensor)
+                converted[new_key] = tensor
+                continue
+            # Convert double_blocks.X.* to transformer_blocks.X.*
+            if old_key.startswith("double_blocks."):
+                new_key = self._convert_double_block_key(old_key, tensor, converted)
+                if new_key is None:
+                    continue  # Key was handled specially
+            # Convert single_blocks.X.* to single_transformer_blocks.X.*
+            elif old_key.startswith("single_blocks."):
+                new_key = self._convert_single_block_key(old_key, tensor, converted)
+                if new_key is None:
+                    continue  # Key was handled specially
+            if new_key != old_key or new_key not in converted:
+                converted[new_key] = tensor
+        return converted
+    def _convert_double_block_key(self, key: str, tensor: torch.Tensor, converted: dict) -> str | None:
+        """Convert double_blocks key to transformer_blocks format."""
+        parts = key.split(".")
+        block_idx = parts[1]
+        rest = ".".join(parts[2:])
+        prefix = f"transformer_blocks.{block_idx}"
+        # Attention QKV conversion - BFL uses fused qkv, diffusers uses separate
+        if "img_attn.qkv.weight" in rest:
+            # Split fused QKV into separate Q, K, V
+            # Defensive check: ensure tensor has at least 1 dimension and can be split into 3
+            if tensor.dim() < 1 or tensor.shape[0] % 3 != 0:
+                # Skip malformed tensors (might be metadata or corrupted)
+                return key
+            q, k, v = tensor.chunk(3, dim=0)
+            converted[f"{prefix}.attn.to_q.weight"] = q
+            converted[f"{prefix}.attn.to_k.weight"] = k
+            converted[f"{prefix}.attn.to_v.weight"] = v
+            return None
+        elif "txt_attn.qkv.weight" in rest:
+            # Defensive check
+            if tensor.dim() < 1 or tensor.shape[0] % 3 != 0:
+                return key
+            q, k, v = tensor.chunk(3, dim=0)
+            converted[f"{prefix}.attn.add_q_proj.weight"] = q
+            converted[f"{prefix}.attn.add_k_proj.weight"] = k
+            converted[f"{prefix}.attn.add_v_proj.weight"] = v
+            return None
+        # Attention output projection
+        if "img_attn.proj.weight" in rest:
+            return f"{prefix}.attn.to_out.0.weight"
+        elif "txt_attn.proj.weight" in rest:
+            return f"{prefix}.attn.to_add_out.weight"
+        # Attention norms
+        if "img_attn.norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_q.weight"
+        elif "img_attn.norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_k.weight"
+        elif "txt_attn.norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_added_q.weight"
+        elif "txt_attn.norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_added_k.weight"
+        # MLP layers
+        if "img_mlp.0.weight" in rest:
+            return f"{prefix}.ff.linear_in.weight"
+        elif "img_mlp.2.weight" in rest:
+            return f"{prefix}.ff.linear_out.weight"
+        elif "txt_mlp.0.weight" in rest:
+            return f"{prefix}.ff_context.linear_in.weight"
+        elif "txt_mlp.2.weight" in rest:
+            return f"{prefix}.ff_context.linear_out.weight"
+        return key
+    def _convert_single_block_key(self, key: str, tensor: torch.Tensor, converted: dict) -> str | None:
+        """Convert single_blocks key to single_transformer_blocks format."""
+        parts = key.split(".")
+        block_idx = parts[1]
+        rest = ".".join(parts[2:])
+        prefix = f"single_transformer_blocks.{block_idx}"
+        # linear1 is the fused QKV+MLP projection
+        if "linear1.weight" in rest:
+            return f"{prefix}.attn.to_qkv_mlp_proj.weight"
+        elif "linear2.weight" in rest:
+            return f"{prefix}.attn.to_out.weight"
+        # Norms
+        if "norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_q.weight"
+        elif "norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_k.weight"
+        return key
+    def _swap_scale_shift(self, weight: torch.Tensor) -> torch.Tensor:
+        """Swap scale and shift in AdaLayerNorm weights.
+        BFL and diffusers use different parameter ordering for AdaLayerNorm.
+        This function swaps the two halves of the weight tensor.
+        Args:
+            weight: Weight tensor of shape (out_features,) or (out_features, in_features)
+        Returns:
+            Weight tensor with scale and shift swapped.
+        """
+        # Defensive check: ensure tensor can be split
+        if weight.dim() < 1 or weight.shape[0] % 2 != 0:
+            return weight
+        # Split in half along the first dimension and swap
+        shift, scale = weight.chunk(2, dim=0)
+        return torch.cat([scale, shift], dim=0)
+    def _dequantize_fp8_weights(self, sd: dict) -> dict:
+        """Dequantize FP8 quantized weights in the state dict.
+        ComfyUI and some FLUX.2 models store quantized weights as:
+        - layer.weight: quantized FP8 data
+        - layer.weight_scale: scale factor (FP32 scalar or per-channel)
+        Dequantization formula: dequantized = weight.to(float) * weight_scale
+        Also handles FP8 tensors stored with float8_e4m3fn dtype by converting to float.
+        """
+        # Check for ComfyUI-style scale factors
+        weight_scale_keys = [k for k in sd.keys() if isinstance(k, str) and k.endswith(".weight_scale")]
+        for scale_key in weight_scale_keys:
+            # Get the corresponding weight key
+            weight_key = scale_key.replace(".weight_scale", ".weight")
+            if weight_key in sd:
+                weight = sd[weight_key]
+                scale = sd[scale_key]
+                # Dequantize: convert FP8 to float and multiply by scale
+                # Note: Float8 types require .float() instead of .to(torch.float32)
+                weight_float = weight.float()
+                scale = scale.float()
+                # Handle block-wise quantization where scale may have different shape
+                if scale.dim() > 0 and scale.shape != weight_float.shape and scale.numel() > 1:
+                    for dim in range(len(weight_float.shape)):
+                        if dim < len(scale.shape) and scale.shape[dim] != weight_float.shape[dim]:
+                            block_size = weight_float.shape[dim] // scale.shape[dim]
+                            if block_size > 1:
+                                scale = scale.repeat_interleave(block_size, dim=dim)
+                sd[weight_key] = weight_float * scale
+        # Filter out scale metadata keys and other FP8 metadata
+        keys_to_remove = [
+            k
+            for k in sd.keys()
+            if isinstance(k, str)
+            and (k.endswith(".weight_scale") or k.endswith(".scale_weight") or "comfy_quant" in k or k == "scaled_fp8")
+        ]
+        for k in keys_to_remove:
+            del sd[k]
+        # Handle native FP8 tensors (float8_e4m3fn dtype) that aren't already dequantized
+        # Also filter out 0-dimensional tensors (scalars) which are typically metadata
+        keys_to_convert = []
+        keys_to_remove_scalars = []
+        for key in list(sd.keys()):
+            tensor = sd[key]
+            if hasattr(tensor, "dim"):
+                if tensor.dim() == 0:
+                    # 0-dimensional tensor (scalar) - likely metadata, remove it
+                    keys_to_remove_scalars.append(key)
+                elif hasattr(tensor, "dtype") and "float8" in str(tensor.dtype):
+                    # Native FP8 tensor - mark for conversion
+                    keys_to_convert.append(key)
+        for k in keys_to_remove_scalars:
+            del sd[k]
+        for key in keys_to_convert:
+            # Convert FP8 tensor to float32
+            sd[key] = sd[key].float()
+        return sd
+@ModelLoaderRegistry.register(base=BaseModelType.Flux2, type=ModelType.Main, format=ModelFormat.GGUFQuantized)
+class Flux2GGUFCheckpointModel(ModelLoader):
+    """Class to load GGUF-quantized FLUX.2 transformer models."""
+    def _load_model(
+        self,
+        config: AnyModelConfig,
+        submodel_type: Optional[SubModelType] = None,
+    ) -> AnyModel:
+        if not isinstance(config, Main_GGUF_Flux2_Config):
+            raise ValueError("Only Main_GGUF_Flux2_Config models are currently supported here.")
+        match submodel_type:
+            case SubModelType.Transformer:
+                return self._load_from_singlefile(config)
+        raise ValueError(
+            f"Only Transformer submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
+        )
+    def _load_from_singlefile(
+        self,
+        config: Main_GGUF_Flux2_Config,
+    ) -> AnyModel:
+        from diffusers import Flux2Transformer2DModel
+        model_path = Path(config.path)
+        # Load GGUF state dict
+        sd = gguf_sd_loader(model_path, compute_dtype=torch.bfloat16)
+        # Check if keys have ComfyUI-style prefix and strip if needed
+        prefix_to_strip = None
+        for prefix in ["model.diffusion_model.", "diffusion_model."]:
+            if any(k.startswith(prefix) for k in sd.keys() if isinstance(k, str)):
+                prefix_to_strip = prefix
+                break
+        if prefix_to_strip:
+            sd = {
+                (k[len(prefix_to_strip) :] if isinstance(k, str) and k.startswith(prefix_to_strip) else k): v
+                for k, v in sd.items()
+            }
+        # Convert BFL format state dict to diffusers format
+        converted_sd = self._convert_flux2_bfl_to_diffusers(sd)
+        # Detect architecture from checkpoint keys
+        double_block_indices = [
+            int(k.split(".")[1])
+            for k in converted_sd.keys()
+            if isinstance(k, str) and k.startswith("transformer_blocks.")
+        ]
+        single_block_indices = [
+            int(k.split(".")[1])
+            for k in converted_sd.keys()
+            if isinstance(k, str) and k.startswith("single_transformer_blocks.")
+        ]
+        num_layers = max(double_block_indices) + 1 if double_block_indices else 5
+        num_single_layers = max(single_block_indices) + 1 if single_block_indices else 20
+        # Get dimensions from weights
+        # context_embedder.weight shape: [hidden_size, joint_attention_dim]
+        context_embedder_weight = converted_sd.get("context_embedder.weight")
+        if context_embedder_weight is not None:
+            if hasattr(context_embedder_weight, "tensor_shape"):
+                hidden_size = context_embedder_weight.tensor_shape[0]
+                joint_attention_dim = context_embedder_weight.tensor_shape[1]
+            else:
+                hidden_size = context_embedder_weight.shape[0]
+                joint_attention_dim = context_embedder_weight.shape[1]
+        else:
+            # Default to Klein 4B dimensions
+            hidden_size = 3072
+            joint_attention_dim = 7680
+        x_embedder_weight = converted_sd.get("x_embedder.weight")
+        if x_embedder_weight is not None:
+            in_channels = (
+                x_embedder_weight.tensor_shape[1]
+                if hasattr(x_embedder_weight, "tensor_shape")
+                else x_embedder_weight.shape[1]
+            )
+        else:
+            in_channels = 128
+        # Calculate num_attention_heads from hidden_size
+        # Klein 4B: hidden_size=3072, num_attention_heads=24 (3072/128=24)
+        # Klein 9B: hidden_size=4096, num_attention_heads=32 (4096/128=32)
+        attention_head_dim = 128
+        num_attention_heads = hidden_size // attention_head_dim
+        # Klein models don't have guidance embeddings - check if they're in the checkpoint
+        has_guidance = "time_guidance_embed.guidance_embedder.linear_1.weight" in converted_sd
+        # Create model with detected configuration
+        with SilenceWarnings():
+            with accelerate.init_empty_weights():
+                model = Flux2Transformer2DModel(
+                    in_channels=in_channels,
+                    out_channels=in_channels,
+                    num_layers=num_layers,
+                    num_single_layers=num_single_layers,
+                    attention_head_dim=attention_head_dim,
+                    num_attention_heads=num_attention_heads,
+                    joint_attention_dim=joint_attention_dim,
+                    patch_size=1,
+                )
+        # If Klein model without guidance, initialize guidance embedder with zeros
+        if not has_guidance:
+            timestep_linear1 = converted_sd.get("time_guidance_embed.timestep_embedder.linear_1.weight")
+            if timestep_linear1 is not None:
+                in_features = (
+                    timestep_linear1.tensor_shape[1]
+                    if hasattr(timestep_linear1, "tensor_shape")
+                    else timestep_linear1.shape[1]
+                )
+                out_features = (
+                    timestep_linear1.tensor_shape[0]
+                    if hasattr(timestep_linear1, "tensor_shape")
+                    else timestep_linear1.shape[0]
+                )
+                converted_sd["time_guidance_embed.guidance_embedder.linear_1.weight"] = torch.zeros(
+                    out_features, in_features, dtype=torch.bfloat16
+                )
+                timestep_linear2 = converted_sd.get("time_guidance_embed.timestep_embedder.linear_2.weight")
+                if timestep_linear2 is not None:
+                    in_features2 = (
+                        timestep_linear2.tensor_shape[1]
+                        if hasattr(timestep_linear2, "tensor_shape")
+                        else timestep_linear2.shape[1]
+                    )
+                    out_features2 = (
+                        timestep_linear2.tensor_shape[0]
+                        if hasattr(timestep_linear2, "tensor_shape")
+                        else timestep_linear2.shape[0]
+                    )
+                    converted_sd["time_guidance_embed.guidance_embedder.linear_2.weight"] = torch.zeros(
+                        out_features2, in_features2, dtype=torch.bfloat16
+                    )
+        model.load_state_dict(converted_sd, assign=True)
+        return model
+    def _convert_flux2_bfl_to_diffusers(self, sd: dict) -> dict:
+        """Convert FLUX.2 BFL format state dict to diffusers format."""
+        converted = {}
+        key_renames = {
+            "img_in.weight": "x_embedder.weight",
+            "txt_in.weight": "context_embedder.weight",
+            "time_in.in_layer.weight": "time_guidance_embed.timestep_embedder.linear_1.weight",
+            "time_in.out_layer.weight": "time_guidance_embed.timestep_embedder.linear_2.weight",
+            "guidance_in.in_layer.weight": "time_guidance_embed.guidance_embedder.linear_1.weight",
+            "guidance_in.out_layer.weight": "time_guidance_embed.guidance_embedder.linear_2.weight",
+            "double_stream_modulation_img.lin.weight": "double_stream_modulation_img.linear.weight",
+            "double_stream_modulation_txt.lin.weight": "double_stream_modulation_txt.linear.weight",
+            "single_stream_modulation.lin.weight": "single_stream_modulation.linear.weight",
+            "final_layer.linear.weight": "proj_out.weight",
+            "final_layer.adaLN_modulation.1.weight": "norm_out.linear.weight",
+        }
+        for old_key, tensor in sd.items():
+            new_key = old_key
+            if old_key in key_renames:
+                new_key = key_renames[old_key]
+                if old_key == "final_layer.adaLN_modulation.1.weight":
+                    tensor = self._swap_scale_shift(tensor)
+                converted[new_key] = tensor
+                continue
+            if old_key.startswith("double_blocks."):
+                new_key = self._convert_double_block_key(old_key, tensor, converted)
+                if new_key is None:
+                    continue
+            elif old_key.startswith("single_blocks."):
+                new_key = self._convert_single_block_key(old_key, tensor, converted)
+                if new_key is None:
+                    continue
+            if new_key != old_key or new_key not in converted:
+                converted[new_key] = tensor
+        return converted
+    def _convert_double_block_key(self, key: str, tensor, converted: dict) -> str | None:
+        parts = key.split(".")
+        block_idx = parts[1]
+        rest = ".".join(parts[2:])
+        prefix = f"transformer_blocks.{block_idx}"
+        if "img_attn.qkv.weight" in rest:
+            q, k, v = self._chunk_tensor(tensor, 3)
+            converted[f"{prefix}.attn.to_q.weight"] = q
+            converted[f"{prefix}.attn.to_k.weight"] = k
+            converted[f"{prefix}.attn.to_v.weight"] = v
+            return None
+        elif "txt_attn.qkv.weight" in rest:
+            q, k, v = self._chunk_tensor(tensor, 3)
+            converted[f"{prefix}.attn.add_q_proj.weight"] = q
+            converted[f"{prefix}.attn.add_k_proj.weight"] = k
+            converted[f"{prefix}.attn.add_v_proj.weight"] = v
+            return None
+        if "img_attn.proj.weight" in rest:
+            return f"{prefix}.attn.to_out.0.weight"
+        elif "txt_attn.proj.weight" in rest:
+            return f"{prefix}.attn.to_add_out.weight"
+        if "img_attn.norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_q.weight"
+        elif "img_attn.norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_k.weight"
+        elif "txt_attn.norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_added_q.weight"
+        elif "txt_attn.norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_added_k.weight"
+        if "img_mlp.0.weight" in rest:
+            return f"{prefix}.ff.linear_in.weight"
+        elif "img_mlp.2.weight" in rest:
+            return f"{prefix}.ff.linear_out.weight"
+        elif "txt_mlp.0.weight" in rest:
+            return f"{prefix}.ff_context.linear_in.weight"
+        elif "txt_mlp.2.weight" in rest:
+            return f"{prefix}.ff_context.linear_out.weight"
+        return key
+    def _convert_single_block_key(self, key: str, tensor, converted: dict) -> str | None:
+        parts = key.split(".")
+        block_idx = parts[1]
+        rest = ".".join(parts[2:])
+        prefix = f"single_transformer_blocks.{block_idx}"
+        if "linear1.weight" in rest:
+            return f"{prefix}.attn.to_qkv_mlp_proj.weight"
+        elif "linear2.weight" in rest:
+            return f"{prefix}.attn.to_out.weight"
+        if "norm.query_norm.scale" in rest:
+            return f"{prefix}.attn.norm_q.weight"
+        elif "norm.key_norm.scale" in rest:
+            return f"{prefix}.attn.norm_k.weight"
+        return key
+    def _chunk_tensor(self, tensor, chunks: int):
+        """Chunk a tensor, handling both regular tensors and GGUF quantized tensors."""
+        if hasattr(tensor, "get_dequantized_tensor"):
+            # GGUF quantized tensor - dequantize first, then chunk
+            # This loses quantization for the split weights, but is necessary
+            # because diffusers uses separate Q/K/V projections
+            tensor = tensor.get_dequantized_tensor()
+        return tensor.chunk(chunks, dim=0)
+    def _swap_scale_shift(self, weight) -> torch.Tensor:
+        """Swap scale and shift in AdaLayerNorm weights."""
+        if hasattr(weight, "get_dequantized_tensor"):
+            # For GGUF, dequantize first
+            weight = weight.get_dequantized_tensor()
+        shift, scale = weight.chunk(2, dim=0)
+        return torch.cat([scale, shift], dim=0)
 @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.ControlNet, format=ModelFormat.Checkpoint)
 @ModelLoaderRegistry.register(base=BaseModelType.Flux, type=ModelType.ControlNet, format=ModelFormat.Diffusers)
 class FluxControlnetModel(ModelLoader):

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.11.0__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.11.0py3-none-any.whl