InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +1 -15
- invokeai/app/invocations/metadata_linked.py +0 -47
- invokeai/app/invocations/z_image_denoise.py +84 -244
- invokeai/app/services/config/config_default.py +1 -3
- invokeai/app/services/model_manager/model_manager_default.py +0 -7
- invokeai/backend/flux/denoise.py +11 -196
- invokeai/backend/model_manager/configs/lora.py +0 -36
- invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
- invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
- invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
- invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
- invokeai/backend/model_manager/starter_models.py +4 -13
- invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
- invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
- invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
- invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +5 -11
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
- invokeai/app/invocations/pbr_maps.py +0 -59
- invokeai/backend/flux/schedulers.py +0 -62
- invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
- invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
- invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
- invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
- invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
- invokeai/frontend/web/dist/locales/en-GB.json +0 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -720,20 +720,20 @@ z_image_turbo_quantized = StarterModel(
|
|
|
720
720
|
name="Z-Image Turbo (quantized)",
|
|
721
721
|
base=BaseModelType.ZImage,
|
|
722
722
|
source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q4_K.gguf",
|
|
723
|
-
description="Z-Image Turbo quantized to GGUF Q4_K format. Requires
|
|
723
|
+
description="Z-Image Turbo quantized to GGUF Q4_K format. Requires separate Qwen3 text encoder. ~4GB",
|
|
724
724
|
type=ModelType.Main,
|
|
725
725
|
format=ModelFormat.GGUFQuantized,
|
|
726
|
-
dependencies=[z_image_qwen3_encoder_quantized
|
|
726
|
+
dependencies=[z_image_qwen3_encoder_quantized],
|
|
727
727
|
)
|
|
728
728
|
|
|
729
729
|
z_image_turbo_q8 = StarterModel(
|
|
730
730
|
name="Z-Image Turbo (Q8)",
|
|
731
731
|
base=BaseModelType.ZImage,
|
|
732
732
|
source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q8_0.gguf",
|
|
733
|
-
description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires
|
|
733
|
+
description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires separate Qwen3 text encoder. ~6.6GB",
|
|
734
734
|
type=ModelType.Main,
|
|
735
735
|
format=ModelFormat.GGUFQuantized,
|
|
736
|
-
dependencies=[z_image_qwen3_encoder_quantized
|
|
736
|
+
dependencies=[z_image_qwen3_encoder_quantized],
|
|
737
737
|
)
|
|
738
738
|
|
|
739
739
|
z_image_controlnet_union = StarterModel(
|
|
@@ -890,19 +890,10 @@ flux_bundle: list[StarterModel] = [
|
|
|
890
890
|
flux_krea_quantized,
|
|
891
891
|
]
|
|
892
892
|
|
|
893
|
-
zimage_bundle: list[StarterModel] = [
|
|
894
|
-
z_image_turbo_quantized,
|
|
895
|
-
z_image_qwen3_encoder_quantized,
|
|
896
|
-
z_image_controlnet_union,
|
|
897
|
-
z_image_controlnet_tile,
|
|
898
|
-
flux_vae,
|
|
899
|
-
]
|
|
900
|
-
|
|
901
893
|
STARTER_BUNDLES: dict[str, StarterModelBundle] = {
|
|
902
894
|
BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle),
|
|
903
895
|
BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle),
|
|
904
896
|
BaseModelType.Flux: StarterModelBundle(name="FLUX.1 dev", models=flux_bundle),
|
|
905
|
-
BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle),
|
|
906
897
|
}
|
|
907
898
|
|
|
908
899
|
assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models"
|
|
@@ -140,50 +140,16 @@ def _get_lora_layer_values(layer_dict: dict[str, torch.Tensor], alpha: float | N
|
|
|
140
140
|
|
|
141
141
|
|
|
142
142
|
def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
|
|
143
|
-
"""Groups the keys in the state dict by layer.
|
|
144
|
-
|
|
145
|
-
Z-Image LoRAs have keys like:
|
|
146
|
-
- diffusion_model.layers.17.attention.to_k.alpha
|
|
147
|
-
- diffusion_model.layers.17.attention.to_k.dora_scale
|
|
148
|
-
- diffusion_model.layers.17.attention.to_k.lora_down.weight
|
|
149
|
-
- diffusion_model.layers.17.attention.to_k.lora_up.weight
|
|
150
|
-
|
|
151
|
-
We need to group these by the full layer path (e.g., diffusion_model.layers.17.attention.to_k)
|
|
152
|
-
and extract the suffix (alpha, dora_scale, lora_down.weight, lora_up.weight).
|
|
153
|
-
"""
|
|
143
|
+
"""Groups the keys in the state dict by layer."""
|
|
154
144
|
layer_dict: dict[str, dict[str, torch.Tensor]] = {}
|
|
155
|
-
|
|
156
|
-
# Known suffixes that indicate the end of a layer name
|
|
157
|
-
known_suffixes = [
|
|
158
|
-
".lora_A.weight",
|
|
159
|
-
".lora_B.weight",
|
|
160
|
-
".lora_down.weight",
|
|
161
|
-
".lora_up.weight",
|
|
162
|
-
".dora_scale",
|
|
163
|
-
".alpha",
|
|
164
|
-
]
|
|
165
|
-
|
|
166
145
|
for key in state_dict:
|
|
167
146
|
if not isinstance(key, str):
|
|
168
147
|
continue
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
layer_name =
|
|
172
|
-
key_name =
|
|
173
|
-
for suffix in known_suffixes:
|
|
174
|
-
if key.endswith(suffix):
|
|
175
|
-
layer_name = key[: -len(suffix)]
|
|
176
|
-
key_name = suffix[1:] # Remove leading dot
|
|
177
|
-
break
|
|
178
|
-
|
|
179
|
-
if layer_name is None:
|
|
180
|
-
# Fallback to original logic for unknown formats
|
|
181
|
-
parts = key.rsplit(".", maxsplit=2)
|
|
182
|
-
layer_name = parts[0]
|
|
183
|
-
key_name = ".".join(parts[1:])
|
|
184
|
-
|
|
148
|
+
# Split the 'lora_A.weight' or 'lora_B.weight' suffix from the layer name.
|
|
149
|
+
parts = key.rsplit(".", maxsplit=2)
|
|
150
|
+
layer_name = parts[0]
|
|
151
|
+
key_name = ".".join(parts[1:])
|
|
185
152
|
if layer_name not in layer_dict:
|
|
186
153
|
layer_dict[layer_name] = {}
|
|
187
154
|
layer_dict[layer_name][key_name] = state_dict[key]
|
|
188
|
-
|
|
189
155
|
return layer_dict
|
|
@@ -17,32 +17,21 @@ def dequantize_and_run(func, args, kwargs):
|
|
|
17
17
|
Also casts other floating point tensors to match the compute_dtype of GGMLTensors
|
|
18
18
|
to avoid dtype mismatches in matrix operations.
|
|
19
19
|
"""
|
|
20
|
-
# Find the compute_dtype
|
|
20
|
+
# Find the compute_dtype from any GGMLTensor in the args
|
|
21
21
|
compute_dtype = None
|
|
22
|
-
target_device = None
|
|
23
22
|
for a in args:
|
|
24
23
|
if hasattr(a, "compute_dtype"):
|
|
25
24
|
compute_dtype = a.compute_dtype
|
|
26
|
-
if isinstance(a, torch.Tensor) and target_device is None:
|
|
27
|
-
target_device = a.device
|
|
28
|
-
if compute_dtype is not None and target_device is not None:
|
|
29
25
|
break
|
|
30
|
-
if compute_dtype is None
|
|
26
|
+
if compute_dtype is None:
|
|
31
27
|
for v in kwargs.values():
|
|
32
|
-
if hasattr(v, "compute_dtype")
|
|
28
|
+
if hasattr(v, "compute_dtype"):
|
|
33
29
|
compute_dtype = v.compute_dtype
|
|
34
|
-
if isinstance(v, torch.Tensor) and target_device is None:
|
|
35
|
-
target_device = v.device
|
|
36
|
-
if compute_dtype is not None and target_device is not None:
|
|
37
30
|
break
|
|
38
31
|
|
|
39
32
|
def process_tensor(t):
|
|
40
33
|
if hasattr(t, "get_dequantized_tensor"):
|
|
41
|
-
|
|
42
|
-
# Ensure the dequantized tensor is on the target device
|
|
43
|
-
if target_device is not None and result.device != target_device:
|
|
44
|
-
result = result.to(target_device)
|
|
45
|
-
return result
|
|
34
|
+
return t.get_dequantized_tensor()
|
|
46
35
|
elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
|
|
47
36
|
# Cast other floating point tensors to match the GGUF compute_dtype
|
|
48
37
|
return t.to(compute_dtype)
|
|
@@ -66,16 +66,12 @@ class ZImageRegionalPromptingExtension:
|
|
|
66
66
|
) -> torch.Tensor | None:
|
|
67
67
|
"""Prepare a regional attention mask for Z-Image.
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
- Image tokens
|
|
69
|
+
The mask controls which tokens can attend to each other:
|
|
70
|
+
- Image tokens within a region attend only to each other
|
|
71
71
|
- Image tokens attend only to their corresponding regional text
|
|
72
72
|
- Text tokens attend only to their corresponding regional image
|
|
73
73
|
- Text tokens attend to themselves
|
|
74
74
|
|
|
75
|
-
The unrestricted image self-attention allows the model to maintain global
|
|
76
|
-
coherence across regions, preventing the generation of separate/disconnected
|
|
77
|
-
images for each region.
|
|
78
|
-
|
|
79
75
|
Z-Image sequence order: [img_tokens, txt_tokens]
|
|
80
76
|
|
|
81
77
|
Args:
|
|
@@ -133,6 +129,12 @@ class ZImageRegionalPromptingExtension:
|
|
|
133
129
|
# 3. txt attends to corresponding regional img
|
|
134
130
|
# Reshape mask to (1, img_seq_len) for broadcasting
|
|
135
131
|
regional_attention_mask[txt_start:txt_end, :img_seq_len] = mask_flat.view(1, img_seq_len)
|
|
132
|
+
|
|
133
|
+
# 4. img self-attention within region
|
|
134
|
+
# mask @ mask.T creates pairwise attention within the masked region
|
|
135
|
+
regional_attention_mask[:img_seq_len, :img_seq_len] += mask_flat.view(img_seq_len, 1) @ mask_flat.view(
|
|
136
|
+
1, img_seq_len
|
|
137
|
+
)
|
|
136
138
|
else:
|
|
137
139
|
# Global prompt: allow attention to/from background regions only
|
|
138
140
|
if background_region_mask is not None:
|
|
@@ -150,10 +152,10 @@ class ZImageRegionalPromptingExtension:
|
|
|
150
152
|
regional_attention_mask[:img_seq_len, txt_start:txt_end] = 1.0
|
|
151
153
|
regional_attention_mask[txt_start:txt_end, :img_seq_len] = 1.0
|
|
152
154
|
|
|
153
|
-
#
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
155
|
+
# Allow background regions to attend to themselves
|
|
156
|
+
if background_region_mask is not None:
|
|
157
|
+
bg_mask = background_region_mask.view(img_seq_len, 1)
|
|
158
|
+
regional_attention_mask[:img_seq_len, :img_seq_len] += bg_mask @ bg_mask.T
|
|
157
159
|
|
|
158
160
|
# Convert to boolean mask
|
|
159
161
|
regional_attention_mask = regional_attention_mask > 0.5
|