InvokeAI 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +15 -1
- invokeai/app/invocations/pbr_maps.py +59 -0
- invokeai/app/invocations/z_image_denoise.py +237 -82
- invokeai/backend/flux/denoise.py +196 -11
- invokeai/backend/flux/schedulers.py +62 -0
- invokeai/backend/image_util/pbr_maps/architecture/block.py +367 -0
- invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +70 -0
- invokeai/backend/image_util/pbr_maps/pbr_maps.py +141 -0
- invokeai/backend/image_util/pbr_maps/utils/image_ops.py +93 -0
- invokeai/backend/model_manager/configs/lora.py +36 -0
- invokeai/backend/model_manager/load/load_default.py +1 -0
- invokeai/backend/model_manager/load/model_loaders/cogview4.py +2 -1
- invokeai/backend/model_manager/load/model_loaders/flux.py +13 -6
- invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +4 -2
- invokeai/backend/model_manager/load/model_loaders/onnx.py +1 -0
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +3 -1
- invokeai/backend/model_manager/load/model_loaders/z_image.py +37 -3
- invokeai/backend/model_manager/starter_models.py +13 -4
- invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +39 -5
- invokeai/backend/quantization/gguf/ggml_tensor.py +15 -4
- invokeai/backend/z_image/extensions/regional_prompting_extension.py +10 -12
- invokeai/frontend/web/dist/assets/App-DllqPQ3j.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-DHZxq1nk.js → browser-ponyfill-BP0RxJ4G.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-dgSJAY--.js → index-B44qKjrs.js} +51 -51
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en-GB.json +1 -0
- invokeai/frontend/web/dist/locales/en.json +11 -5
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/METADATA +2 -2
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/RECORD +36 -29
- invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +0 -161
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/WHEEL +0 -0
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0rc1.dist-info → invokeai-6.10.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -17,21 +17,32 @@ def dequantize_and_run(func, args, kwargs):
|
|
|
17
17
|
Also casts other floating point tensors to match the compute_dtype of GGMLTensors
|
|
18
18
|
to avoid dtype mismatches in matrix operations.
|
|
19
19
|
"""
|
|
20
|
-
# Find the compute_dtype from any GGMLTensor in the args
|
|
20
|
+
# Find the compute_dtype and target_device from any GGMLTensor in the args
|
|
21
21
|
compute_dtype = None
|
|
22
|
+
target_device = None
|
|
22
23
|
for a in args:
|
|
23
24
|
if hasattr(a, "compute_dtype"):
|
|
24
25
|
compute_dtype = a.compute_dtype
|
|
26
|
+
if isinstance(a, torch.Tensor) and target_device is None:
|
|
27
|
+
target_device = a.device
|
|
28
|
+
if compute_dtype is not None and target_device is not None:
|
|
25
29
|
break
|
|
26
|
-
if compute_dtype is None:
|
|
30
|
+
if compute_dtype is None or target_device is None:
|
|
27
31
|
for v in kwargs.values():
|
|
28
|
-
if hasattr(v, "compute_dtype"):
|
|
32
|
+
if hasattr(v, "compute_dtype") and compute_dtype is None:
|
|
29
33
|
compute_dtype = v.compute_dtype
|
|
34
|
+
if isinstance(v, torch.Tensor) and target_device is None:
|
|
35
|
+
target_device = v.device
|
|
36
|
+
if compute_dtype is not None and target_device is not None:
|
|
30
37
|
break
|
|
31
38
|
|
|
32
39
|
def process_tensor(t):
|
|
33
40
|
if hasattr(t, "get_dequantized_tensor"):
|
|
34
|
-
|
|
41
|
+
result = t.get_dequantized_tensor()
|
|
42
|
+
# Ensure the dequantized tensor is on the target device
|
|
43
|
+
if target_device is not None and result.device != target_device:
|
|
44
|
+
result = result.to(target_device)
|
|
45
|
+
return result
|
|
35
46
|
elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
|
|
36
47
|
# Cast other floating point tensors to match the GGUF compute_dtype
|
|
37
48
|
return t.to(compute_dtype)
|
|
@@ -66,12 +66,16 @@ class ZImageRegionalPromptingExtension:
|
|
|
66
66
|
) -> torch.Tensor | None:
|
|
67
67
|
"""Prepare a regional attention mask for Z-Image.
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
- Image tokens
|
|
69
|
+
This uses an 'unrestricted' image self-attention approach (similar to FLUX):
|
|
70
|
+
- Image tokens can attend to ALL other image tokens (unrestricted self-attention)
|
|
71
71
|
- Image tokens attend only to their corresponding regional text
|
|
72
72
|
- Text tokens attend only to their corresponding regional image
|
|
73
73
|
- Text tokens attend to themselves
|
|
74
74
|
|
|
75
|
+
The unrestricted image self-attention allows the model to maintain global
|
|
76
|
+
coherence across regions, preventing the generation of separate/disconnected
|
|
77
|
+
images for each region.
|
|
78
|
+
|
|
75
79
|
Z-Image sequence order: [img_tokens, txt_tokens]
|
|
76
80
|
|
|
77
81
|
Args:
|
|
@@ -129,12 +133,6 @@ class ZImageRegionalPromptingExtension:
|
|
|
129
133
|
# 3. txt attends to corresponding regional img
|
|
130
134
|
# Reshape mask to (1, img_seq_len) for broadcasting
|
|
131
135
|
regional_attention_mask[txt_start:txt_end, :img_seq_len] = mask_flat.view(1, img_seq_len)
|
|
132
|
-
|
|
133
|
-
# 4. img self-attention within region
|
|
134
|
-
# mask @ mask.T creates pairwise attention within the masked region
|
|
135
|
-
regional_attention_mask[:img_seq_len, :img_seq_len] += mask_flat.view(img_seq_len, 1) @ mask_flat.view(
|
|
136
|
-
1, img_seq_len
|
|
137
|
-
)
|
|
138
136
|
else:
|
|
139
137
|
# Global prompt: allow attention to/from background regions only
|
|
140
138
|
if background_region_mask is not None:
|
|
@@ -152,10 +150,10 @@ class ZImageRegionalPromptingExtension:
|
|
|
152
150
|
regional_attention_mask[:img_seq_len, txt_start:txt_end] = 1.0
|
|
153
151
|
regional_attention_mask[txt_start:txt_end, :img_seq_len] = 1.0
|
|
154
152
|
|
|
155
|
-
# Allow
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
153
|
+
# 4. Allow unrestricted image self-attention
|
|
154
|
+
# This is the key difference from the restricted approach - all image tokens
|
|
155
|
+
# can attend to each other, which helps maintain global coherence across regions
|
|
156
|
+
regional_attention_mask[:img_seq_len, :img_seq_len] = 1.0
|
|
159
157
|
|
|
160
158
|
# Convert to boolean mask
|
|
161
159
|
regional_attention_mask = regional_attention_mask > 0.5
|