PyPI - InvokeAI - Versions diffs - 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl - Mend

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

invokeai/backend/quantization/gguf/ggml_tensor.py CHANGED Viewed

@@ -17,21 +17,32 @@ def dequantize_and_run(func, args, kwargs):
     Also casts other floating point tensors to match the compute_dtype of GGMLTensors
     to avoid dtype mismatches in matrix operations.
     """
-    # Find the compute_dtype from any GGMLTensor in the args
+    # Find the compute_dtype and target_device from any GGMLTensor in the args
     compute_dtype = None
+    target_device = None
     for a in args:
         if hasattr(a, "compute_dtype"):
             compute_dtype = a.compute_dtype
+        if isinstance(a, torch.Tensor) and target_device is None:
+            target_device = a.device
+        if compute_dtype is not None and target_device is not None:
             break
-    if compute_dtype is None:
+    if compute_dtype is None or target_device is None:
         for v in kwargs.values():
-            if hasattr(v, "compute_dtype"):
+            if hasattr(v, "compute_dtype") and compute_dtype is None:
                 compute_dtype = v.compute_dtype
+            if isinstance(v, torch.Tensor) and target_device is None:
+                target_device = v.device
+            if compute_dtype is not None and target_device is not None:
                 break
     def process_tensor(t):
         if hasattr(t, "get_dequantized_tensor"):
-            return t.get_dequantized_tensor()
+            result = t.get_dequantized_tensor()
+            # Ensure the dequantized tensor is on the target device
+            if target_device is not None and result.device != target_device:
+                result = result.to(target_device)
+            return result
         elif isinstance(t, torch.Tensor) and compute_dtype is not None and t.is_floating_point():
             # Cast other floating point tensors to match the GGUF compute_dtype
             return t.to(compute_dtype)

invokeai/backend/z_image/extensions/regional_prompting_extension.py CHANGED Viewed

@@ -66,12 +66,16 @@ class ZImageRegionalPromptingExtension:
     ) -> torch.Tensor | None:
         """Prepare a regional attention mask for Z-Image.
-        The mask controls which tokens can attend to each other:
-        - Image tokens within a region attend only to each other
+        This uses an 'unrestricted' image self-attention approach (similar to FLUX):
+        - Image tokens can attend to ALL other image tokens (unrestricted self-attention)
         - Image tokens attend only to their corresponding regional text
         - Text tokens attend only to their corresponding regional image
         - Text tokens attend to themselves
+        The unrestricted image self-attention allows the model to maintain global
+        coherence across regions, preventing the generation of separate/disconnected
+        images for each region.
         Z-Image sequence order: [img_tokens, txt_tokens]
         Args:
@@ -129,12 +133,6 @@ class ZImageRegionalPromptingExtension:
                 # 3. txt attends to corresponding regional img
                 # Reshape mask to (1, img_seq_len) for broadcasting
                 regional_attention_mask[txt_start:txt_end, :img_seq_len] = mask_flat.view(1, img_seq_len)
-                # 4. img self-attention within region
-                # mask @ mask.T creates pairwise attention within the masked region
-                regional_attention_mask[:img_seq_len, :img_seq_len] += mask_flat.view(img_seq_len, 1) @ mask_flat.view(
-                    1, img_seq_len
-                )
             else:
                 # Global prompt: allow attention to/from background regions only
                 if background_region_mask is not None:
@@ -152,10 +150,10 @@ class ZImageRegionalPromptingExtension:
                     regional_attention_mask[:img_seq_len, txt_start:txt_end] = 1.0
                     regional_attention_mask[txt_start:txt_end, :img_seq_len] = 1.0
-        # Allow background regions to attend to themselves
-        if background_region_mask is not None:
-            bg_mask = background_region_mask.view(img_seq_len, 1)
-            regional_attention_mask[:img_seq_len, :img_seq_len] += bg_mask @ bg_mask.T
+        # 4. Allow unrestricted image self-attention
+        # This is the key difference from the restricted approach - all image tokens
+        # can attend to each other, which helps maintain global coherence across regions
+        regional_attention_mask[:img_seq_len, :img_seq_len] = 1.0
         # Convert to boolean mask
         regional_attention_mask = regional_attention_mask > 0.5

InvokeAI 6.10.0rc1__py3-none-any.whl → 6.10.0rc2__py3-none-any.whl

InvokeAI 6.10.0rc1py3-none-any.whl → 6.10.0rc2py3-none-any.whl