InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. invokeai/app/invocations/flux_denoise.py +1 -15
  2. invokeai/app/invocations/metadata_linked.py +0 -47
  3. invokeai/app/invocations/z_image_denoise.py +84 -244
  4. invokeai/app/services/config/config_default.py +1 -3
  5. invokeai/app/services/model_manager/model_manager_default.py +0 -7
  6. invokeai/backend/flux/denoise.py +11 -196
  7. invokeai/backend/model_manager/configs/lora.py +0 -36
  8. invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
  9. invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
  10. invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
  11. invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
  12. invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
  13. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
  14. invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
  15. invokeai/backend/model_manager/starter_models.py +4 -13
  16. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
  17. invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
  18. invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
  19. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
  20. invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
  21. invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
  22. invokeai/frontend/web/dist/index.html +1 -1
  23. invokeai/frontend/web/dist/locales/en.json +5 -11
  24. invokeai/version/invokeai_version.py +1 -1
  25. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
  26. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
  27. invokeai/app/invocations/pbr_maps.py +0 -59
  28. invokeai/backend/flux/schedulers.py +0 -62
  29. invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
  30. invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
  31. invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
  32. invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
  33. invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
  34. invokeai/frontend/web/dist/locales/en-GB.json +0 -1
  35. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
  36. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
  37. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  38. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  39. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  40. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -720,20 +720,20 @@ z_image_turbo_quantized = StarterModel(
720
720
  name="Z-Image Turbo (quantized)",
721
721
  base=BaseModelType.ZImage,
722
722
  source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q4_K.gguf",
723
- description="Z-Image Turbo quantized to GGUF Q4_K format. Requires standalone Qwen3 text encoder and Flux VAE. ~4GB",
723
+ description="Z-Image Turbo quantized to GGUF Q4_K format. Requires separate Qwen3 text encoder. ~4GB",
724
724
  type=ModelType.Main,
725
725
  format=ModelFormat.GGUFQuantized,
726
- dependencies=[z_image_qwen3_encoder_quantized, flux_vae],
726
+ dependencies=[z_image_qwen3_encoder_quantized],
727
727
  )
728
728
 
729
729
  z_image_turbo_q8 = StarterModel(
730
730
  name="Z-Image Turbo (Q8)",
731
731
  base=BaseModelType.ZImage,
732
732
  source="https://huggingface.co/leejet/Z-Image-Turbo-GGUF/resolve/main/z_image_turbo-Q8_0.gguf",
733
- description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires standalone Qwen3 text encoder and Flux VAE. ~6.6GB",
733
+ description="Z-Image Turbo quantized to GGUF Q8_0 format. Higher quality, larger size. Requires separate Qwen3 text encoder. ~6.6GB",
734
734
  type=ModelType.Main,
735
735
  format=ModelFormat.GGUFQuantized,
736
- dependencies=[z_image_qwen3_encoder_quantized, flux_vae],
736
+ dependencies=[z_image_qwen3_encoder_quantized],
737
737
  )
738
738
 
739
739
  z_image_controlnet_union = StarterModel(
@@ -890,19 +890,10 @@ flux_bundle: list[StarterModel] = [
890
890
  flux_krea_quantized,
891
891
  ]
892
892
 
893
- zimage_bundle: list[StarterModel] = [
894
- z_image_turbo_quantized,
895
- z_image_qwen3_encoder_quantized,
896
- z_image_controlnet_union,
897
- z_image_controlnet_tile,
898
- flux_vae,
899
- ]
900
-
901
893
  STARTER_BUNDLES: dict[str, StarterModelBundle] = {
902
894
  BaseModelType.StableDiffusion1: StarterModelBundle(name="Stable Diffusion 1.5", models=sd1_bundle),
903
895
  BaseModelType.StableDiffusionXL: StarterModelBundle(name="SDXL", models=sdxl_bundle),
904
896
  BaseModelType.Flux: StarterModelBundle(name="FLUX.1 dev", models=flux_bundle),
905
- BaseModelType.ZImage: StarterModelBundle(name="Z-Image Turbo", models=zimage_bundle),
906
897
  }
907
898
 
908
899
  assert len(STARTER_MODELS) == len({m.source for m in STARTER_MODELS}), "Duplicate starter models"
@@ -140,50 +140,16 @@ def _get_lora_layer_values(layer_dict: dict[str, torch.Tensor], alpha: float | N
140
140
 
141
141
 
142
142
  def _group_by_layer(state_dict: Dict[str, torch.Tensor]) -> dict[str, dict[str, torch.Tensor]]:
143
- """Groups the keys in the state dict by layer.
144
-
145
- Z-Image LoRAs have keys like:
146
- - diffusion_model.layers.17.attention.to_k.alpha
147
- - diffusion_model.layers.17.attention.to_k.dora_scale
148
- - diffusion_model.layers.17.attention.to_k.lora_down.weight
149
- - diffusion_model.layers.17.attention.to_k.lora_up.weight
150
-
151
- We need to group these by the full layer path (e.g., diffusion_model.layers.17.attention.to_k)
152
- and extract the suffix (alpha, dora_scale, lora_down.weight, lora_up.weight).
153
- """
143
+ """Groups the keys in the state dict by layer."""
154
144
  layer_dict: dict[str, dict[str, torch.Tensor]] = {}
155
-
156
- # Known suffixes that indicate the end of a layer name
157
- known_suffixes = [
158
- ".lora_A.weight",
159
- ".lora_B.weight",
160
- ".lora_down.weight",
161
- ".lora_up.weight",
162
- ".dora_scale",
163
- ".alpha",
164
- ]
165
-
166
145
  for key in state_dict:
167
146
  if not isinstance(key, str):
168
147
  continue
169
-
170
- # Try to find a known suffix
171
- layer_name = None
172
- key_name = None
173
- for suffix in known_suffixes:
174
- if key.endswith(suffix):
175
- layer_name = key[: -len(suffix)]
176
- key_name = suffix[1:] # Remove leading dot
177
- break
178
-
179
- if layer_name is None:
180
- # Fallback to original logic for unknown formats
181
- parts = key.rsplit(".", maxsplit=2)
182
- layer_name = parts[0]
183
- key_name = ".".join(parts[1:])
184
-
148
+ # Split the 'lora_A.weight' or 'lora_B.weight' suffix from the layer name.
149
+ parts = key.rsplit(".", maxsplit=2)
150
+ layer_name = parts[0]
151
+ key_name = ".".join(parts[1:])
185
152
  if layer_name not in layer_dict:
186
153
  layer_dict[layer_name] = {}
187
154
  layer_dict[layer_name][key_name] = state_dict[key]
188
-
189
155
  return layer_dict
@@ -17,32 +17,21 @@ def dequantize_and_run(func, args, kwargs):
17
17
  Also casts other floating point tensors to match the compute_dtype of GGMLTensors
18
18
  to avoid dtype mismatches in matrix operations.
19
19
  """
20
- # Find the compute_dtype and target_device from any GGMLTensor in the args
20
+ # Find the compute_dtype from any GGMLTensor in the args
21
21
  compute_dtype = None
22
- target_device = None
23
22
  for a in args:
24
23
  if hasattr(a, "compute_dtype"):
25
24
  compute_dtype = a.compute_dtype
26
- if isinstance(a, torch.Tensor) and target_device is None:
27
- target_device = a.device
28
- if compute_dtype is not None and target_device is not None:
29
25
  break
30
- if compute_dtype is None or target_device is None:
26
+ if compute_dtype is None:
31
27
  for v in kwargs.values():
32
- if hasattr(v, "compute_dtype") and compute_dtype is None:
28
+ if hasattr(v, "compute_dtype"):
33
29
  compute_dtype = v.compute_dtype
34
- if isinstance(v, torch.Tensor) and target_device is None:
35
- target_device = v.device
36
- if compute_dtype is not None and target_device is not None:
37
30
  break
38
31
 
39
32
  def process_tensor(t):
40
33
  if hasattr(t, "get_dequantized_tensor"):
41
- result = t.get_dequantized_tensor()
42
- # Ensure the dequantized tensor is on the target device
43
- if target_device is not None and result.device != target_device:
44
- result = result.to(target_device)
45
- return result
34
+ return t.get_dequantized_tensor()
46
35
  elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
47
36
  # Cast other floating point tensors to match the GGUF compute_dtype
48
37
  return t.to(compute_dtype)
@@ -66,16 +66,12 @@ class ZImageRegionalPromptingExtension:
66
66
  ) -> torch.Tensor | None:
67
67
  """Prepare a regional attention mask for Z-Image.
68
68
 
69
- This uses an 'unrestricted' image self-attention approach (similar to FLUX):
70
- - Image tokens can attend to ALL other image tokens (unrestricted self-attention)
69
+ The mask controls which tokens can attend to each other:
70
+ - Image tokens within a region attend only to each other
71
71
  - Image tokens attend only to their corresponding regional text
72
72
  - Text tokens attend only to their corresponding regional image
73
73
  - Text tokens attend to themselves
74
74
 
75
- The unrestricted image self-attention allows the model to maintain global
76
- coherence across regions, preventing the generation of separate/disconnected
77
- images for each region.
78
-
79
75
  Z-Image sequence order: [img_tokens, txt_tokens]
80
76
 
81
77
  Args:
@@ -133,6 +129,12 @@ class ZImageRegionalPromptingExtension:
133
129
  # 3. txt attends to corresponding regional img
134
130
  # Reshape mask to (1, img_seq_len) for broadcasting
135
131
  regional_attention_mask[txt_start:txt_end, :img_seq_len] = mask_flat.view(1, img_seq_len)
132
+
133
+ # 4. img self-attention within region
134
+ # mask @ mask.T creates pairwise attention within the masked region
135
+ regional_attention_mask[:img_seq_len, :img_seq_len] += mask_flat.view(img_seq_len, 1) @ mask_flat.view(
136
+ 1, img_seq_len
137
+ )
136
138
  else:
137
139
  # Global prompt: allow attention to/from background regions only
138
140
  if background_region_mask is not None:
@@ -150,10 +152,10 @@ class ZImageRegionalPromptingExtension:
150
152
  regional_attention_mask[:img_seq_len, txt_start:txt_end] = 1.0
151
153
  regional_attention_mask[txt_start:txt_end, :img_seq_len] = 1.0
152
154
 
153
- # 4. Allow unrestricted image self-attention
154
- # This is the key difference from the restricted approach - all image tokens
155
- # can attend to each other, which helps maintain global coherence across regions
156
- regional_attention_mask[:img_seq_len, :img_seq_len] = 1.0
155
+ # Allow background regions to attend to themselves
156
+ if background_region_mask is not None:
157
+ bg_mask = background_region_mask.view(img_seq_len, 1)
158
+ regional_attention_mask[:img_seq_len, :img_seq_len] += bg_mask @ bg_mask.T
157
159
 
158
160
  # Convert to boolean mask
159
161
  regional_attention_mask = regional_attention_mask > 0.5