InvokeAI 6.9.0rc3__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. invokeai/app/api/dependencies.py +2 -0
  2. invokeai/app/api/routers/model_manager.py +91 -2
  3. invokeai/app/api/routers/workflows.py +9 -0
  4. invokeai/app/invocations/fields.py +19 -0
  5. invokeai/app/invocations/image_to_latents.py +23 -5
  6. invokeai/app/invocations/latents_to_image.py +2 -25
  7. invokeai/app/invocations/metadata.py +9 -1
  8. invokeai/app/invocations/model.py +8 -0
  9. invokeai/app/invocations/primitives.py +12 -0
  10. invokeai/app/invocations/prompt_template.py +57 -0
  11. invokeai/app/invocations/z_image_control.py +112 -0
  12. invokeai/app/invocations/z_image_denoise.py +610 -0
  13. invokeai/app/invocations/z_image_image_to_latents.py +102 -0
  14. invokeai/app/invocations/z_image_latents_to_image.py +103 -0
  15. invokeai/app/invocations/z_image_lora_loader.py +153 -0
  16. invokeai/app/invocations/z_image_model_loader.py +135 -0
  17. invokeai/app/invocations/z_image_text_encoder.py +197 -0
  18. invokeai/app/services/model_install/model_install_common.py +14 -1
  19. invokeai/app/services/model_install/model_install_default.py +119 -19
  20. invokeai/app/services/model_records/model_records_base.py +12 -0
  21. invokeai/app/services/model_records/model_records_sql.py +17 -0
  22. invokeai/app/services/shared/graph.py +132 -77
  23. invokeai/app/services/workflow_records/workflow_records_base.py +8 -0
  24. invokeai/app/services/workflow_records/workflow_records_sqlite.py +42 -0
  25. invokeai/app/util/step_callback.py +3 -0
  26. invokeai/backend/model_manager/configs/controlnet.py +47 -1
  27. invokeai/backend/model_manager/configs/factory.py +26 -1
  28. invokeai/backend/model_manager/configs/lora.py +43 -1
  29. invokeai/backend/model_manager/configs/main.py +113 -0
  30. invokeai/backend/model_manager/configs/qwen3_encoder.py +156 -0
  31. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_diffusers_rms_norm.py +40 -0
  32. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_layer_norm.py +25 -0
  33. invokeai/backend/model_manager/load/model_cache/torch_module_autocast/torch_module_autocast.py +11 -2
  34. invokeai/backend/model_manager/load/model_loaders/lora.py +11 -0
  35. invokeai/backend/model_manager/load/model_loaders/z_image.py +935 -0
  36. invokeai/backend/model_manager/load/model_util.py +6 -1
  37. invokeai/backend/model_manager/metadata/metadata_base.py +12 -5
  38. invokeai/backend/model_manager/model_on_disk.py +3 -0
  39. invokeai/backend/model_manager/starter_models.py +70 -0
  40. invokeai/backend/model_manager/taxonomy.py +5 -0
  41. invokeai/backend/model_manager/util/select_hf_files.py +23 -8
  42. invokeai/backend/patches/layer_patcher.py +34 -16
  43. invokeai/backend/patches/layers/lora_layer_base.py +2 -1
  44. invokeai/backend/patches/lora_conversions/flux_aitoolkit_lora_conversion_utils.py +17 -2
  45. invokeai/backend/patches/lora_conversions/flux_xlabs_lora_conversion_utils.py +92 -0
  46. invokeai/backend/patches/lora_conversions/formats.py +5 -0
  47. invokeai/backend/patches/lora_conversions/z_image_lora_constants.py +8 -0
  48. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +155 -0
  49. invokeai/backend/quantization/gguf/ggml_tensor.py +27 -4
  50. invokeai/backend/quantization/gguf/loaders.py +47 -12
  51. invokeai/backend/stable_diffusion/diffusion/conditioning_data.py +13 -0
  52. invokeai/backend/util/devices.py +25 -0
  53. invokeai/backend/util/hotfixes.py +2 -2
  54. invokeai/backend/z_image/__init__.py +16 -0
  55. invokeai/backend/z_image/extensions/__init__.py +1 -0
  56. invokeai/backend/z_image/extensions/regional_prompting_extension.py +207 -0
  57. invokeai/backend/z_image/text_conditioning.py +74 -0
  58. invokeai/backend/z_image/z_image_control_adapter.py +238 -0
  59. invokeai/backend/z_image/z_image_control_transformer.py +643 -0
  60. invokeai/backend/z_image/z_image_controlnet_extension.py +531 -0
  61. invokeai/backend/z_image/z_image_patchify_utils.py +135 -0
  62. invokeai/backend/z_image/z_image_transformer_patch.py +234 -0
  63. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
  64. invokeai/frontend/web/dist/assets/{browser-ponyfill-CN1j0ARZ.js → browser-ponyfill-DHZxq1nk.js} +1 -1
  65. invokeai/frontend/web/dist/assets/index-dgSJAY--.js +530 -0
  66. invokeai/frontend/web/dist/index.html +1 -1
  67. invokeai/frontend/web/dist/locales/de.json +24 -6
  68. invokeai/frontend/web/dist/locales/en.json +70 -1
  69. invokeai/frontend/web/dist/locales/es.json +0 -5
  70. invokeai/frontend/web/dist/locales/fr.json +0 -6
  71. invokeai/frontend/web/dist/locales/it.json +17 -64
  72. invokeai/frontend/web/dist/locales/ja.json +379 -44
  73. invokeai/frontend/web/dist/locales/ru.json +0 -6
  74. invokeai/frontend/web/dist/locales/vi.json +7 -54
  75. invokeai/frontend/web/dist/locales/zh-CN.json +0 -6
  76. invokeai/version/invokeai_version.py +1 -1
  77. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +3 -3
  78. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +84 -60
  79. invokeai/frontend/web/dist/assets/App-Cn9UyjoV.js +0 -161
  80. invokeai/frontend/web/dist/assets/index-BDrf9CL-.js +0 -530
  81. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
  82. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
  83. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  84. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  85. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  86. {invokeai-6.9.0rc3.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,238 @@
1
+ # Adapted from https://github.com/aigc-apps/VideoX-Fun/blob/main/videox_fun/models/z_image_transformer2d_control.py
2
+ # Copyright (c) Alibaba, Inc. and its affiliates.
3
+ # Apache License 2.0
4
+
5
+ """
6
+ Z-Image Control Adapter for InvokeAI.
7
+
8
+ This module provides a standalone control adapter that can be combined with
9
+ a base ZImageTransformer2DModel at runtime. The adapter contains only the
10
+ control-specific layers (control_layers, control_all_x_embedder, control_noise_refiner).
11
+ """
12
+
13
+ from typing import List, Optional
14
+
15
+ import torch
16
+ import torch.nn as nn
17
+ from diffusers.configuration_utils import ConfigMixin, register_to_config
18
+ from diffusers.models.modeling_utils import ModelMixin
19
+ from diffusers.models.transformers.transformer_z_image import (
20
+ SEQ_MULTI_OF,
21
+ ZImageTransformerBlock,
22
+ )
23
+ from torch.nn.utils.rnn import pad_sequence
24
+
25
+
26
+ class ZImageControlTransformerBlock(ZImageTransformerBlock):
27
+ """Control-specific transformer block with skip connections for hint generation."""
28
+
29
+ def __init__(
30
+ self,
31
+ layer_id: int,
32
+ dim: int,
33
+ n_heads: int,
34
+ n_kv_heads: int,
35
+ norm_eps: float,
36
+ qk_norm: bool,
37
+ modulation: bool = True,
38
+ block_id: int = 0,
39
+ ):
40
+ super().__init__(layer_id, dim, n_heads, n_kv_heads, norm_eps, qk_norm, modulation)
41
+ self.block_id = block_id
42
+ if block_id == 0:
43
+ self.before_proj = nn.Linear(dim, dim)
44
+ nn.init.zeros_(self.before_proj.weight)
45
+ nn.init.zeros_(self.before_proj.bias)
46
+ self.after_proj = nn.Linear(dim, dim)
47
+ nn.init.zeros_(self.after_proj.weight)
48
+ nn.init.zeros_(self.after_proj.bias)
49
+
50
+ def forward(
51
+ self,
52
+ c: torch.Tensor,
53
+ x: torch.Tensor,
54
+ attn_mask: torch.Tensor,
55
+ freqs_cis: torch.Tensor,
56
+ adaln_input: Optional[torch.Tensor] = None,
57
+ ) -> torch.Tensor:
58
+ if self.block_id == 0:
59
+ c = self.before_proj(c) + x
60
+ all_c: list[torch.Tensor] = []
61
+ else:
62
+ all_c = list(torch.unbind(c))
63
+ c = all_c.pop(-1)
64
+
65
+ c = super().forward(c, attn_mask=attn_mask, freqs_cis=freqs_cis, adaln_input=adaln_input)
66
+ c_skip = self.after_proj(c)
67
+ all_c += [c_skip, c]
68
+ c = torch.stack(all_c)
69
+ return c
70
+
71
+
72
+ class ZImageControlAdapter(ModelMixin, ConfigMixin):
73
+ """Standalone Z-Image Control Adapter.
74
+
75
+ This adapter contains only the control-specific layers and can be combined
76
+ with a base ZImageTransformer2DModel at runtime. It computes control hints
77
+ that are added to the transformer's hidden states.
78
+
79
+ The adapter supports 5 control modes: Canny, HED, Depth, Pose, MLSD.
80
+ Recommended control_context_scale: 0.65-0.80.
81
+ """
82
+
83
+ @register_to_config
84
+ def __init__(
85
+ self,
86
+ num_control_blocks: int = 6, # Number of control layer blocks
87
+ control_in_dim: int = 16,
88
+ all_patch_size: tuple[int, ...] = (2,),
89
+ all_f_patch_size: tuple[int, ...] = (1,),
90
+ dim: int = 3840,
91
+ n_refiner_layers: int = 2,
92
+ n_heads: int = 30,
93
+ n_kv_heads: int = 30,
94
+ norm_eps: float = 1e-5,
95
+ qk_norm: bool = True,
96
+ ):
97
+ super().__init__()
98
+
99
+ self.dim = dim
100
+ self.control_in_dim = control_in_dim
101
+ self.all_patch_size = all_patch_size
102
+ self.all_f_patch_size = all_f_patch_size
103
+
104
+ # Control patch embeddings
105
+ all_x_embedder = {}
106
+ for patch_size, f_patch_size in zip(all_patch_size, all_f_patch_size, strict=True):
107
+ x_embedder = nn.Linear(
108
+ f_patch_size * patch_size * patch_size * control_in_dim,
109
+ dim,
110
+ bias=True,
111
+ )
112
+ all_x_embedder[f"{patch_size}-{f_patch_size}"] = x_embedder
113
+
114
+ self.control_all_x_embedder = nn.ModuleDict(all_x_embedder)
115
+
116
+ # Control noise refiner
117
+ self.control_noise_refiner = nn.ModuleList(
118
+ [
119
+ ZImageTransformerBlock(
120
+ 1000 + layer_id,
121
+ dim,
122
+ n_heads,
123
+ n_kv_heads,
124
+ norm_eps,
125
+ qk_norm,
126
+ modulation=True,
127
+ )
128
+ for layer_id in range(n_refiner_layers)
129
+ ]
130
+ )
131
+
132
+ # Control transformer blocks
133
+ self.control_layers = nn.ModuleList(
134
+ [
135
+ ZImageControlTransformerBlock(
136
+ i,
137
+ dim,
138
+ n_heads,
139
+ n_kv_heads,
140
+ norm_eps,
141
+ qk_norm,
142
+ block_id=i,
143
+ )
144
+ for i in range(num_control_blocks)
145
+ ]
146
+ )
147
+
148
+ # Padding token for control context
149
+ self.x_pad_token = nn.Parameter(torch.empty(dim))
150
+ nn.init.normal_(self.x_pad_token, std=0.02)
151
+
152
+ def forward(
153
+ self,
154
+ control_context: List[torch.Tensor],
155
+ unified_hidden_states: torch.Tensor,
156
+ cap_feats: torch.Tensor,
157
+ timestep_emb: torch.Tensor,
158
+ attn_mask: torch.Tensor,
159
+ freqs_cis: torch.Tensor,
160
+ rope_embedder,
161
+ patchify_fn,
162
+ patch_size: int = 2,
163
+ f_patch_size: int = 1,
164
+ ) -> tuple[torch.Tensor, ...]:
165
+ """Compute control hints from control context.
166
+
167
+ Args:
168
+ control_context: List of control image latents [C, 1, H, W]
169
+ unified_hidden_states: Combined image+caption embeddings from main path
170
+ cap_feats: Caption feature embeddings
171
+ timestep_emb: Timestep embeddings
172
+ attn_mask: Attention mask
173
+ freqs_cis: RoPE frequencies
174
+ rope_embedder: RoPE embedder from base model
175
+ patchify_fn: Patchify function from base model
176
+ patch_size: Spatial patch size
177
+ f_patch_size: Frame patch size
178
+
179
+ Returns:
180
+ Tuple of hint tensors to be added at each control layer position
181
+ """
182
+ bsz = len(control_context)
183
+ device = control_context[0].device
184
+
185
+ # Patchify control context using base model's patchify
186
+ (
187
+ control_context_patches,
188
+ x_size,
189
+ x_pos_ids,
190
+ x_inner_pad_mask,
191
+ ) = patchify_fn(control_context, patch_size, f_patch_size, cap_feats.size(1))
192
+
193
+ # Embed control context
194
+ x_item_seqlens = [len(_) for _ in control_context_patches]
195
+ assert all(_ % SEQ_MULTI_OF == 0 for _ in x_item_seqlens)
196
+ x_max_item_seqlen = max(x_item_seqlens)
197
+
198
+ control_context_cat = torch.cat(control_context_patches, dim=0)
199
+ control_context_cat = self.control_all_x_embedder[f"{patch_size}-{f_patch_size}"](control_context_cat)
200
+
201
+ # Match timestep dtype
202
+ adaln_input = timestep_emb.type_as(control_context_cat)
203
+ control_context_cat[torch.cat(x_inner_pad_mask)] = self.x_pad_token
204
+ control_context_list = list(control_context_cat.split(x_item_seqlens, dim=0))
205
+ x_freqs_cis = list(rope_embedder(torch.cat(x_pos_ids, dim=0)).split(x_item_seqlens, dim=0))
206
+
207
+ control_context_padded = pad_sequence(control_context_list, batch_first=True, padding_value=0.0)
208
+ x_freqs_cis = pad_sequence(x_freqs_cis, batch_first=True, padding_value=0.0)
209
+ x_attn_mask = torch.zeros((bsz, x_max_item_seqlen), dtype=torch.bool, device=device)
210
+ for i, seq_len in enumerate(x_item_seqlens):
211
+ x_attn_mask[i, :seq_len] = 1
212
+
213
+ # Refine control context
214
+ for layer in self.control_noise_refiner:
215
+ control_context_padded = layer(control_context_padded, x_attn_mask, x_freqs_cis, adaln_input)
216
+
217
+ # Unify with caption features
218
+ cap_item_seqlens = [cap_feats.size(1)] * bsz
219
+ control_context_unified = []
220
+ for i in range(bsz):
221
+ x_len = x_item_seqlens[i]
222
+ cap_len = cap_item_seqlens[i]
223
+ control_context_unified.append(torch.cat([control_context_padded[i][:x_len], cap_feats[i][:cap_len]]))
224
+ control_context_unified = pad_sequence(control_context_unified, batch_first=True, padding_value=0.0)
225
+ c = control_context_unified
226
+
227
+ # Process through control layers
228
+ for layer in self.control_layers:
229
+ c = layer(
230
+ c,
231
+ x=unified_hidden_states,
232
+ attn_mask=attn_mask,
233
+ freqs_cis=freqs_cis,
234
+ adaln_input=adaln_input,
235
+ )
236
+
237
+ hints = torch.unbind(c)[:-1]
238
+ return hints