InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/api/routers/model_manager.py +43 -1
- invokeai/app/invocations/fields.py +1 -1
- invokeai/app/invocations/flux2_denoise.py +499 -0
- invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
- invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
- invokeai/app/invocations/flux2_vae_decode.py +106 -0
- invokeai/app/invocations/flux2_vae_encode.py +88 -0
- invokeai/app/invocations/flux_denoise.py +50 -3
- invokeai/app/invocations/flux_lora_loader.py +1 -1
- invokeai/app/invocations/ideal_size.py +6 -1
- invokeai/app/invocations/metadata.py +4 -0
- invokeai/app/invocations/metadata_linked.py +47 -0
- invokeai/app/invocations/model.py +1 -0
- invokeai/app/invocations/z_image_denoise.py +8 -3
- invokeai/app/invocations/z_image_image_to_latents.py +9 -1
- invokeai/app/invocations/z_image_latents_to_image.py +9 -1
- invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
- invokeai/app/services/config/config_default.py +3 -1
- invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
- invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
- invokeai/app/services/model_manager/model_manager_default.py +7 -0
- invokeai/app/services/model_records/model_records_base.py +4 -2
- invokeai/app/services/shared/invocation_context.py +15 -0
- invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
- invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
- invokeai/app/util/step_callback.py +42 -0
- invokeai/backend/flux/denoise.py +239 -204
- invokeai/backend/flux/dype/__init__.py +18 -0
- invokeai/backend/flux/dype/base.py +226 -0
- invokeai/backend/flux/dype/embed.py +116 -0
- invokeai/backend/flux/dype/presets.py +141 -0
- invokeai/backend/flux/dype/rope.py +110 -0
- invokeai/backend/flux/extensions/dype_extension.py +91 -0
- invokeai/backend/flux/util.py +35 -1
- invokeai/backend/flux2/__init__.py +4 -0
- invokeai/backend/flux2/denoise.py +261 -0
- invokeai/backend/flux2/ref_image_extension.py +294 -0
- invokeai/backend/flux2/sampling_utils.py +209 -0
- invokeai/backend/model_manager/configs/factory.py +19 -1
- invokeai/backend/model_manager/configs/main.py +395 -3
- invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
- invokeai/backend/model_manager/configs/vae.py +104 -2
- invokeai/backend/model_manager/load/load_default.py +0 -1
- invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
- invokeai/backend/model_manager/load/model_loaders/flux.py +1007 -2
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +0 -1
- invokeai/backend/model_manager/load/model_loaders/z_image.py +121 -28
- invokeai/backend/model_manager/starter_models.py +128 -0
- invokeai/backend/model_manager/taxonomy.py +31 -4
- invokeai/backend/model_manager/util/select_hf_files.py +3 -2
- invokeai/backend/util/vae_working_memory.py +0 -2
- invokeai/frontend/web/dist/assets/App-ClpIJstk.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-BP0RxJ4G.js → browser-ponyfill-Cw07u5G1.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-B44qKjrs.js → index-DSKM8iGj.js} +69 -69
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +58 -5
- invokeai/frontend/web/dist/locales/it.json +2 -1
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/METADATA +7 -1
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/RECORD +66 -49
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/WHEEL +1 -1
- invokeai/frontend/web/dist/assets/App-DllqPQ3j.js +0 -161
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -140,7 +140,6 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
|
|
|
140
140
|
# Some weights of the model checkpoint were not used when initializing CLIPTextModelWithProjection:
|
|
141
141
|
# ['text_model.embeddings.position_ids']
|
|
142
142
|
|
|
143
|
-
self._logger.info(f"Loading model from single file at {config.path} using {load_class.__name__}")
|
|
144
143
|
with SilenceWarnings():
|
|
145
144
|
pipeline = load_class.from_single_file(config.path, torch_dtype=self._torch_dtype)
|
|
146
145
|
|
|
@@ -576,7 +576,54 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
|
|
|
576
576
|
# Load the state dict from safetensors file
|
|
577
577
|
sd = load_file(model_path)
|
|
578
578
|
|
|
579
|
-
#
|
|
579
|
+
# Handle ComfyUI quantized checkpoints
|
|
580
|
+
# ComfyUI stores quantized weights with accompanying scale factors:
|
|
581
|
+
# - layer.weight: quantized data (FP8)
|
|
582
|
+
# - layer.weight_scale: scale factor (FP32 scalar)
|
|
583
|
+
# Dequantization formula: dequantized = weight.to(dtype) * weight_scale
|
|
584
|
+
# Reference: https://github.com/Comfy-Org/ComfyUI/blob/master/QUANTIZATION.md
|
|
585
|
+
original_key_count = len(sd)
|
|
586
|
+
weight_scale_keys = [k for k in sd.keys() if k.endswith(".weight_scale")]
|
|
587
|
+
dequantized_count = 0
|
|
588
|
+
|
|
589
|
+
for scale_key in weight_scale_keys:
|
|
590
|
+
# Get the corresponding weight key (remove "_scale" suffix)
|
|
591
|
+
weight_key = scale_key.replace(".weight_scale", ".weight")
|
|
592
|
+
if weight_key in sd:
|
|
593
|
+
weight = sd[weight_key]
|
|
594
|
+
scale = sd[scale_key]
|
|
595
|
+
# Dequantize: convert to float and multiply by scale
|
|
596
|
+
# Handle block-wise quantization (e.g., FP4 with block_size=8)
|
|
597
|
+
# where scale has shape [weight_dim / block_size, ...]
|
|
598
|
+
# Note: Float8 types (e.g., float8_e4m3fn) require .float() instead of .to(torch.float32)
|
|
599
|
+
# as PyTorch doesn't support direct type promotion for Float8 types
|
|
600
|
+
weight_float = weight.float()
|
|
601
|
+
scale = scale.float()
|
|
602
|
+
if scale.shape != weight_float.shape and scale.numel() > 1:
|
|
603
|
+
# Block-wise quantization: need to expand scale to match weight shape
|
|
604
|
+
# Find which dimension differs and repeat scale along that dimension
|
|
605
|
+
for dim in range(len(weight_float.shape)):
|
|
606
|
+
if dim < len(scale.shape) and scale.shape[dim] != weight_float.shape[dim]:
|
|
607
|
+
block_size = weight_float.shape[dim] // scale.shape[dim]
|
|
608
|
+
if block_size > 1:
|
|
609
|
+
# Repeat scale along this dimension to match weight shape
|
|
610
|
+
scale = scale.repeat_interleave(block_size, dim=dim)
|
|
611
|
+
sd[weight_key] = weight_float * scale
|
|
612
|
+
dequantized_count += 1
|
|
613
|
+
|
|
614
|
+
if dequantized_count > 0:
|
|
615
|
+
logger.info(f"Dequantized {dequantized_count} ComfyUI quantized weights")
|
|
616
|
+
|
|
617
|
+
# Filter out ComfyUI quantization metadata keys (comfy_quant, weight_scale)
|
|
618
|
+
# These are no longer needed after dequantization
|
|
619
|
+
comfy_metadata_keys = [k for k in sd.keys() if "comfy_quant" in k or "weight_scale" in k]
|
|
620
|
+
for k in comfy_metadata_keys:
|
|
621
|
+
del sd[k]
|
|
622
|
+
if comfy_metadata_keys:
|
|
623
|
+
logger.info(f"Filtered out {len(comfy_metadata_keys)} ComfyUI quantization metadata keys")
|
|
624
|
+
|
|
625
|
+
logger.info(f"Loaded state dict with {len(sd)} keys (originally {original_key_count})")
|
|
626
|
+
|
|
580
627
|
# Count the number of layers by looking at layer keys
|
|
581
628
|
layer_count = 0
|
|
582
629
|
for key in sd.keys():
|
|
@@ -589,34 +636,63 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
|
|
|
589
636
|
except ValueError:
|
|
590
637
|
pass
|
|
591
638
|
|
|
592
|
-
# Get
|
|
639
|
+
# Get vocab size from embed_tokens weight shape
|
|
593
640
|
embed_weight = sd.get("model.embed_tokens.weight")
|
|
594
641
|
if embed_weight is None:
|
|
595
642
|
raise ValueError("Could not find model.embed_tokens.weight in state dict")
|
|
596
|
-
|
|
597
|
-
raise ValueError(
|
|
598
|
-
f"Expected 2D embed_tokens weight tensor, got shape {embed_weight.shape}. "
|
|
599
|
-
"The model file may be corrupted or incompatible."
|
|
600
|
-
)
|
|
601
|
-
hidden_size = embed_weight.shape[1]
|
|
643
|
+
|
|
602
644
|
vocab_size = embed_weight.shape[0]
|
|
645
|
+
embed_hidden_size = embed_weight.shape[1]
|
|
646
|
+
|
|
647
|
+
# Detect model variant based on embed_tokens hidden size and layer count
|
|
648
|
+
# FLUX 2 Klein / Z-Image uses Qwen3 configurations from ComfyUI:
|
|
649
|
+
# Reference: https://github.com/comfyanonymous/ComfyUI/blob/master/comfy/text_encoders/llama.py
|
|
650
|
+
# - Qwen3-4B: hidden_size=2560, 36 layers, 32 heads, 8 KV heads, intermediate=9728
|
|
651
|
+
# - Qwen3-8B: hidden_size=4096, 36 layers, 32 heads, 8 KV heads, intermediate=12288
|
|
652
|
+
if embed_hidden_size == 2560 and layer_count == 36:
|
|
653
|
+
# Qwen3-4B variant (FLUX 2 Klein / Z-Image)
|
|
654
|
+
logger.info("Detected Qwen3-4B variant (FLUX 2 Klein / Z-Image)")
|
|
655
|
+
hidden_size = 2560
|
|
656
|
+
num_attention_heads = 32
|
|
657
|
+
num_kv_heads = 8
|
|
658
|
+
intermediate_size = 9728
|
|
659
|
+
head_dim = 128
|
|
660
|
+
max_position_embeddings = 40960
|
|
661
|
+
elif embed_hidden_size == 4096 and layer_count == 36:
|
|
662
|
+
# Qwen3-8B variant
|
|
663
|
+
logger.info("Detected Qwen3-8B variant")
|
|
664
|
+
hidden_size = 4096
|
|
665
|
+
num_attention_heads = 32
|
|
666
|
+
num_kv_heads = 8
|
|
667
|
+
intermediate_size = 12288
|
|
668
|
+
head_dim = 128
|
|
669
|
+
max_position_embeddings = 40960
|
|
670
|
+
else:
|
|
671
|
+
# Unknown variant - try to detect from weights
|
|
672
|
+
logger.warning(
|
|
673
|
+
f"Unknown Qwen3 variant: embed_hidden_size={embed_hidden_size}, layers={layer_count}. "
|
|
674
|
+
"Attempting to detect configuration from weights..."
|
|
675
|
+
)
|
|
676
|
+
q_proj_weight = sd.get("model.layers.0.self_attn.q_proj.weight")
|
|
677
|
+
k_proj_weight = sd.get("model.layers.0.self_attn.k_proj.weight")
|
|
678
|
+
gate_proj_weight = sd.get("model.layers.0.mlp.gate_proj.weight")
|
|
603
679
|
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
k_proj_weight = sd.get("model.layers.0.self_attn.k_proj.weight")
|
|
607
|
-
gate_proj_weight = sd.get("model.layers.0.mlp.gate_proj.weight")
|
|
680
|
+
if q_proj_weight is None or k_proj_weight is None or gate_proj_weight is None:
|
|
681
|
+
raise ValueError("Could not find attention/mlp weights to determine configuration")
|
|
608
682
|
|
|
609
|
-
|
|
610
|
-
|
|
683
|
+
hidden_size = embed_hidden_size
|
|
684
|
+
head_dim = 128
|
|
685
|
+
num_attention_heads = q_proj_weight.shape[0] // head_dim
|
|
686
|
+
num_kv_heads = k_proj_weight.shape[0] // head_dim
|
|
687
|
+
intermediate_size = gate_proj_weight.shape[0]
|
|
688
|
+
max_position_embeddings = 40960
|
|
611
689
|
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
num_kv_heads = k_proj_weight.shape[0] // head_dim
|
|
617
|
-
intermediate_size = gate_proj_weight.shape[0]
|
|
690
|
+
logger.info(
|
|
691
|
+
f"Qwen3 config: hidden_size={hidden_size}, layers={layer_count}, "
|
|
692
|
+
f"heads={num_attention_heads}, kv_heads={num_kv_heads}, intermediate={intermediate_size}"
|
|
693
|
+
)
|
|
618
694
|
|
|
619
|
-
# Create Qwen3 config
|
|
695
|
+
# Create Qwen3 config
|
|
620
696
|
qwen_config = Qwen3Config(
|
|
621
697
|
vocab_size=vocab_size,
|
|
622
698
|
hidden_size=hidden_size,
|
|
@@ -625,7 +701,7 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
|
|
|
625
701
|
num_attention_heads=num_attention_heads,
|
|
626
702
|
num_key_value_heads=num_kv_heads,
|
|
627
703
|
head_dim=head_dim,
|
|
628
|
-
max_position_embeddings=
|
|
704
|
+
max_position_embeddings=max_position_embeddings,
|
|
629
705
|
rms_norm_eps=1e-6,
|
|
630
706
|
tie_word_embeddings=True,
|
|
631
707
|
rope_theta=1000000.0,
|
|
@@ -771,7 +847,7 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
|
|
|
771
847
|
except ValueError:
|
|
772
848
|
pass
|
|
773
849
|
|
|
774
|
-
# Get
|
|
850
|
+
# Get vocab size from embed_tokens weight shape
|
|
775
851
|
embed_weight = sd.get("model.embed_tokens.weight")
|
|
776
852
|
if embed_weight is None:
|
|
777
853
|
raise ValueError("Could not find model.embed_tokens.weight in state dict")
|
|
@@ -783,13 +859,23 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
|
|
|
783
859
|
f"Expected 2D embed_tokens weight tensor, got shape {embed_shape}. "
|
|
784
860
|
"The model file may be corrupted or incompatible."
|
|
785
861
|
)
|
|
786
|
-
hidden_size = embed_shape[1]
|
|
787
862
|
vocab_size = embed_shape[0]
|
|
788
863
|
|
|
789
|
-
# Detect attention configuration from layer
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
864
|
+
# Detect attention configuration from layer weights
|
|
865
|
+
# IMPORTANT: Use layer 1 (not layer 0) because some models like FLUX 2 Klein have a special
|
|
866
|
+
# first layer with different dimensions (input projection layer) while the rest of the
|
|
867
|
+
# transformer layers have a different hidden_size. Using a middle layer ensures we get
|
|
868
|
+
# the representative hidden_size for the bulk of the model.
|
|
869
|
+
# Fall back to layer 0 if layer 1 doesn't exist.
|
|
870
|
+
q_proj_weight = sd.get("model.layers.1.self_attn.q_proj.weight")
|
|
871
|
+
k_proj_weight = sd.get("model.layers.1.self_attn.k_proj.weight")
|
|
872
|
+
gate_proj_weight = sd.get("model.layers.1.mlp.gate_proj.weight")
|
|
873
|
+
|
|
874
|
+
# Fall back to layer 0 if layer 1 doesn't exist (single-layer model edge case)
|
|
875
|
+
if q_proj_weight is None:
|
|
876
|
+
q_proj_weight = sd.get("model.layers.0.self_attn.q_proj.weight")
|
|
877
|
+
k_proj_weight = sd.get("model.layers.0.self_attn.k_proj.weight")
|
|
878
|
+
gate_proj_weight = sd.get("model.layers.0.mlp.gate_proj.weight")
|
|
793
879
|
|
|
794
880
|
if q_proj_weight is None or k_proj_weight is None or gate_proj_weight is None:
|
|
795
881
|
raise ValueError("Could not find attention/mlp weights in state dict to determine configuration")
|
|
@@ -800,7 +886,14 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
|
|
|
800
886
|
gate_shape = gate_proj_weight.shape if hasattr(gate_proj_weight, "shape") else gate_proj_weight.tensor_shape
|
|
801
887
|
|
|
802
888
|
# Calculate dimensions from actual weights
|
|
889
|
+
# IMPORTANT: Use hidden_size from k_proj input dimension (not q_proj or embed_tokens).
|
|
890
|
+
# Some models (like FLUX 2 Klein) have unusual architectures where:
|
|
891
|
+
# - embed_tokens has a larger dimension (e.g., 2560)
|
|
892
|
+
# - q_proj may have a larger input dimension for query expansion
|
|
893
|
+
# - k_proj/v_proj have the actual transformer hidden_size (e.g., 1280)
|
|
894
|
+
# Using k_proj ensures we get the correct internal hidden_size.
|
|
803
895
|
head_dim = 128 # Standard head dimension for Qwen3 models
|
|
896
|
+
hidden_size = k_shape[1] # Use k_proj input dim as the hidden_size
|
|
804
897
|
num_attention_heads = q_shape[0] // head_dim
|
|
805
898
|
num_kv_heads = k_shape[0] // head_dim
|
|
806
899
|
intermediate_size = gate_shape[0]
|
|
@@ -690,6 +690,115 @@ flux_fill = StarterModel(
|
|
|
690
690
|
)
|
|
691
691
|
# endregion
|
|
692
692
|
|
|
693
|
+
# region FLUX.2 Klein
|
|
694
|
+
flux2_vae = StarterModel(
|
|
695
|
+
name="FLUX.2 VAE",
|
|
696
|
+
base=BaseModelType.Flux2,
|
|
697
|
+
source="black-forest-labs/FLUX.2-klein-4B::vae",
|
|
698
|
+
description="FLUX.2 VAE (16-channel, same architecture as FLUX.1 VAE). ~335MB",
|
|
699
|
+
type=ModelType.VAE,
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
flux2_klein_qwen3_4b_encoder = StarterModel(
|
|
703
|
+
name="FLUX.2 Klein Qwen3 4B Encoder",
|
|
704
|
+
base=BaseModelType.Any,
|
|
705
|
+
source="black-forest-labs/FLUX.2-klein-4B::text_encoder+tokenizer",
|
|
706
|
+
description="Qwen3 4B text encoder for FLUX.2 Klein 4B (also compatible with Z-Image). ~8GB",
|
|
707
|
+
type=ModelType.Qwen3Encoder,
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
flux2_klein_qwen3_8b_encoder = StarterModel(
|
|
711
|
+
name="FLUX.2 Klein Qwen3 8B Encoder",
|
|
712
|
+
base=BaseModelType.Any,
|
|
713
|
+
source="black-forest-labs/FLUX.2-klein-9B::text_encoder+tokenizer",
|
|
714
|
+
description="Qwen3 8B text encoder for FLUX.2 Klein 9B models. ~16GB",
|
|
715
|
+
type=ModelType.Qwen3Encoder,
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
flux2_klein_4b = StarterModel(
|
|
719
|
+
name="FLUX.2 Klein 4B (Diffusers)",
|
|
720
|
+
base=BaseModelType.Flux2,
|
|
721
|
+
source="black-forest-labs/FLUX.2-klein-4B",
|
|
722
|
+
description="FLUX.2 Klein 4B in Diffusers format - includes transformer, VAE and Qwen3 encoder. ~10GB",
|
|
723
|
+
type=ModelType.Main,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
flux2_klein_4b_single = StarterModel(
|
|
727
|
+
name="FLUX.2 Klein 4B",
|
|
728
|
+
base=BaseModelType.Flux2,
|
|
729
|
+
source="https://huggingface.co/black-forest-labs/FLUX.2-klein-4B/resolve/main/flux-2-klein-4b.safetensors",
|
|
730
|
+
description="FLUX.2 Klein 4B standalone transformer. Installs with VAE and Qwen3 4B encoder. ~8GB",
|
|
731
|
+
type=ModelType.Main,
|
|
732
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_4b_encoder],
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
flux2_klein_4b_fp8 = StarterModel(
|
|
736
|
+
name="FLUX.2 Klein 4B (FP8)",
|
|
737
|
+
base=BaseModelType.Flux2,
|
|
738
|
+
source="https://huggingface.co/black-forest-labs/FLUX.2-klein-4b-fp8/resolve/main/flux-2-klein-4b-fp8.safetensors",
|
|
739
|
+
description="FLUX.2 Klein 4B FP8 quantized - smaller and faster. Installs with VAE and Qwen3 4B encoder. ~4GB",
|
|
740
|
+
type=ModelType.Main,
|
|
741
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_4b_encoder],
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
flux2_klein_9b = StarterModel(
|
|
745
|
+
name="FLUX.2 Klein 9B (Diffusers)",
|
|
746
|
+
base=BaseModelType.Flux2,
|
|
747
|
+
source="black-forest-labs/FLUX.2-klein-9B",
|
|
748
|
+
description="FLUX.2 Klein 9B in Diffusers format - includes transformer, VAE and Qwen3 encoder. ~20GB",
|
|
749
|
+
type=ModelType.Main,
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
flux2_klein_9b_fp8 = StarterModel(
|
|
753
|
+
name="FLUX.2 Klein 9B (FP8)",
|
|
754
|
+
base=BaseModelType.Flux2,
|
|
755
|
+
source="https://huggingface.co/black-forest-labs/FLUX.2-klein-9b-fp8/resolve/main/flux-2-klein-9b-fp8.safetensors",
|
|
756
|
+
description="FLUX.2 Klein 9B FP8 quantized - more efficient than full precision. Installs with VAE and Qwen3 8B encoder. ~9.5GB",
|
|
757
|
+
type=ModelType.Main,
|
|
758
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_8b_encoder],
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
flux2_klein_4b_gguf_q4 = StarterModel(
|
|
762
|
+
name="FLUX.2 Klein 4B (GGUF Q4)",
|
|
763
|
+
base=BaseModelType.Flux2,
|
|
764
|
+
source="https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main/flux-2-klein-4b-Q4_K_M.gguf",
|
|
765
|
+
description="FLUX.2 Klein 4B GGUF Q4_K_M quantized - runs on 6-8GB VRAM. Installs with VAE and Qwen3 4B encoder. ~2.6GB",
|
|
766
|
+
type=ModelType.Main,
|
|
767
|
+
format=ModelFormat.GGUFQuantized,
|
|
768
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_4b_encoder],
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
flux2_klein_4b_gguf_q8 = StarterModel(
|
|
772
|
+
name="FLUX.2 Klein 4B (GGUF Q8)",
|
|
773
|
+
base=BaseModelType.Flux2,
|
|
774
|
+
source="https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main/flux-2-klein-4b-Q8_0.gguf",
|
|
775
|
+
description="FLUX.2 Klein 4B GGUF Q8_0 quantized - higher quality than Q4. Installs with VAE and Qwen3 4B encoder. ~4.3GB",
|
|
776
|
+
type=ModelType.Main,
|
|
777
|
+
format=ModelFormat.GGUFQuantized,
|
|
778
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_4b_encoder],
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
flux2_klein_9b_gguf_q4 = StarterModel(
|
|
782
|
+
name="FLUX.2 Klein 9B (GGUF Q4)",
|
|
783
|
+
base=BaseModelType.Flux2,
|
|
784
|
+
source="https://huggingface.co/unsloth/FLUX.2-klein-9B-GGUF/resolve/main/flux-2-klein-9b-Q4_K_M.gguf",
|
|
785
|
+
description="FLUX.2 Klein 9B GGUF Q4_K_M quantized - runs on 12GB+ VRAM. Installs with VAE and Qwen3 8B encoder. ~5.8GB",
|
|
786
|
+
type=ModelType.Main,
|
|
787
|
+
format=ModelFormat.GGUFQuantized,
|
|
788
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_8b_encoder],
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
flux2_klein_9b_gguf_q8 = StarterModel(
|
|
792
|
+
name="FLUX.2 Klein 9B (GGUF Q8)",
|
|
793
|
+
base=BaseModelType.Flux2,
|
|
794
|
+
source="https://huggingface.co/unsloth/FLUX.2-klein-9B-GGUF/resolve/main/flux-2-klein-9b-Q8_0.gguf",
|
|
795
|
+
description="FLUX.2 Klein 9B GGUF Q8_0 quantized - higher quality than Q4. Installs with VAE and Qwen3 8B encoder. ~10GB",
|
|
796
|
+
type=ModelType.Main,
|
|
797
|
+
format=ModelFormat.GGUFQuantized,
|
|
798
|
+
dependencies=[flux2_vae, flux2_klein_qwen3_8b_encoder],
|
|
799
|
+
)
|
|
800
|
+
# endregion
|
|
801
|
+
|
|
693
802
|
# region Z-Image
|
|
694
803
|
z_image_qwen3_encoder = StarterModel(
|
|
695
804
|
name="Z-Image Qwen3 Text Encoder",
|
|
@@ -826,6 +935,18 @@ STARTER_MODELS: list[StarterModel] = [
|
|
|
826
935
|
flux_redux,
|
|
827
936
|
llava_onevision,
|
|
828
937
|
flux_fill,
|
|
938
|
+
flux2_vae,
|
|
939
|
+
flux2_klein_4b,
|
|
940
|
+
flux2_klein_4b_single,
|
|
941
|
+
flux2_klein_4b_fp8,
|
|
942
|
+
flux2_klein_9b,
|
|
943
|
+
flux2_klein_9b_fp8,
|
|
944
|
+
flux2_klein_4b_gguf_q4,
|
|
945
|
+
flux2_klein_4b_gguf_q8,
|
|
946
|
+
flux2_klein_9b_gguf_q4,
|
|
947
|
+
flux2_klein_9b_gguf_q8,
|
|
948
|
+
flux2_klein_qwen3_4b_encoder,
|
|
949
|
+
flux2_klein_qwen3_8b_encoder,
|
|
829
950
|
cogview4,
|
|
830
951
|
flux_krea,
|
|
831
952
|
flux_krea_quantized,
|
|
@@ -898,10 +1019,17 @@ zimage_bundle: list[StarterModel] = [
|
|
|
898
1019
|
flux_vae,
|
|
899
1020
|
]
|
|
900
1021
|
|
|
1022
|
+
flux2_klein_bundle: list[StarterModel] = [
|
|
1023
|
+
flux2_klein_4b_gguf_q4,
|
|
1024
|
+
flux2_vae,
|
|
1025
|
+
flux2_klein_qwen3_4b_encoder,
|
|
1026
|
+
]
|
|
1027
|
+
|
|
901
1028
|
STARTER_BUNDLES: dict[str, StarterModelBundle] = {
|
|
902
1029
|
BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle),
|
|
903
1030
|
BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle),
|
|
904
1031
|
BaseModelType.Flux: StarterModelBundle(name="FLUX.1 dev", models=flux_bundle),
|
|
1032
|
+
BaseModelType.Flux2: StarterModelBundle(name="FLUX.2 Klein", models=flux2_klein_bundle),
|
|
905
1033
|
BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle),
|
|
906
1034
|
}
|
|
907
1035
|
|
|
@@ -46,6 +46,8 @@ class BaseModelType(str, Enum):
|
|
|
46
46
|
"""Indicates the model is associated with the Stable Diffusion XL Refiner model architecture."""
|
|
47
47
|
Flux = "flux"
|
|
48
48
|
"""Indicates the model is associated with FLUX.1 model architecture, including FLUX Dev, Schnell and Fill."""
|
|
49
|
+
Flux2 = "flux2"
|
|
50
|
+
"""Indicates the model is associated with FLUX.2 model architecture, including FLUX2 Klein."""
|
|
49
51
|
CogView4 = "cogview4"
|
|
50
52
|
"""Indicates the model is associated with CogView 4 model architecture."""
|
|
51
53
|
ZImage = "z-image"
|
|
@@ -111,11 +113,36 @@ class ModelVariantType(str, Enum):
|
|
|
111
113
|
|
|
112
114
|
|
|
113
115
|
class FluxVariantType(str, Enum):
|
|
116
|
+
"""FLUX.1 model variants."""
|
|
117
|
+
|
|
114
118
|
Schnell = "schnell"
|
|
115
119
|
Dev = "dev"
|
|
116
120
|
DevFill = "dev_fill"
|
|
117
121
|
|
|
118
122
|
|
|
123
|
+
class Flux2VariantType(str, Enum):
|
|
124
|
+
"""FLUX.2 model variants."""
|
|
125
|
+
|
|
126
|
+
Klein4B = "klein_4b"
|
|
127
|
+
"""Flux2 Klein 4B variant using Qwen3 4B text encoder."""
|
|
128
|
+
|
|
129
|
+
Klein9B = "klein_9b"
|
|
130
|
+
"""Flux2 Klein 9B variant using Qwen3 8B text encoder (distilled)."""
|
|
131
|
+
|
|
132
|
+
Klein9BBase = "klein_9b_base"
|
|
133
|
+
"""Flux2 Klein 9B Base variant - undistilled foundation model using Qwen3 8B text encoder."""
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class Qwen3VariantType(str, Enum):
|
|
137
|
+
"""Qwen3 text encoder variants based on model size."""
|
|
138
|
+
|
|
139
|
+
Qwen3_4B = "qwen3_4b"
|
|
140
|
+
"""Qwen3 4B text encoder (hidden_size=2560). Used by FLUX.2 Klein 4B and Z-Image."""
|
|
141
|
+
|
|
142
|
+
Qwen3_8B = "qwen3_8b"
|
|
143
|
+
"""Qwen3 8B text encoder (hidden_size=4096). Used by FLUX.2 Klein 9B."""
|
|
144
|
+
|
|
145
|
+
|
|
119
146
|
class ModelFormat(str, Enum):
|
|
120
147
|
"""Storage format of model."""
|
|
121
148
|
|
|
@@ -174,7 +201,7 @@ class FluxLoRAFormat(str, Enum):
|
|
|
174
201
|
XLabs = "flux.xlabs"
|
|
175
202
|
|
|
176
203
|
|
|
177
|
-
AnyVariant: TypeAlias = Union[ModelVariantType, ClipVariantType, FluxVariantType]
|
|
178
|
-
variant_type_adapter = TypeAdapter[
|
|
179
|
-
ModelVariantType | ClipVariantType | FluxVariantType
|
|
180
|
-
)
|
|
204
|
+
AnyVariant: TypeAlias = Union[ModelVariantType, ClipVariantType, FluxVariantType, Flux2VariantType, Qwen3VariantType]
|
|
205
|
+
variant_type_adapter = TypeAdapter[
|
|
206
|
+
ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | Qwen3VariantType
|
|
207
|
+
](ModelVariantType | ClipVariantType | FluxVariantType | Flux2VariantType | Qwen3VariantType)
|
|
@@ -60,7 +60,7 @@ def filter_files(
|
|
|
60
60
|
|
|
61
61
|
# Start by filtering on model file extensions, discarding images, docs, etc
|
|
62
62
|
for file in files:
|
|
63
|
-
if file.name.endswith((".json", ".txt")):
|
|
63
|
+
if file.name.endswith((".json", ".txt", ".jinja")): # .jinja for chat templates
|
|
64
64
|
paths.append(file)
|
|
65
65
|
elif file.name.endswith(
|
|
66
66
|
(
|
|
@@ -116,7 +116,8 @@ def _filter_by_variant(files: List[Path], variant: ModelRepoVariant) -> Set[Path
|
|
|
116
116
|
|
|
117
117
|
# Note: '.model' was added to support:
|
|
118
118
|
# https://huggingface.co/black-forest-labs/FLUX.1-schnell/blob/768d12a373ed5cc9ef9a9dea7504dc09fcc14842/tokenizer_2/spiece.model
|
|
119
|
-
|
|
119
|
+
# Note: '.jinja' was added to support chat templates for FLUX.2 Klein models
|
|
120
|
+
elif path.suffix in [".json", ".txt", ".model", ".jinja"]:
|
|
120
121
|
result.add(path)
|
|
121
122
|
|
|
122
123
|
elif variant in [
|
|
@@ -47,8 +47,6 @@ def estimate_vae_working_memory_sd15_sdxl(
|
|
|
47
47
|
# If we are running in FP32, then we should account for the likely increase in model size (~250MB).
|
|
48
48
|
working_memory += 250 * 2**20
|
|
49
49
|
|
|
50
|
-
print(f"estimate_vae_working_memory_sd15_sdxl: {int(working_memory)}")
|
|
51
|
-
|
|
52
50
|
return int(working_memory)
|
|
53
51
|
|
|
54
52
|
|