InvokeAI 6.10.0rc2__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. invokeai/app/api/routers/model_manager.py +43 -1
  2. invokeai/app/invocations/fields.py +1 -1
  3. invokeai/app/invocations/flux2_denoise.py +499 -0
  4. invokeai/app/invocations/flux2_klein_model_loader.py +222 -0
  5. invokeai/app/invocations/flux2_klein_text_encoder.py +222 -0
  6. invokeai/app/invocations/flux2_vae_decode.py +106 -0
  7. invokeai/app/invocations/flux2_vae_encode.py +88 -0
  8. invokeai/app/invocations/flux_denoise.py +50 -3
  9. invokeai/app/invocations/flux_lora_loader.py +1 -1
  10. invokeai/app/invocations/ideal_size.py +6 -1
  11. invokeai/app/invocations/metadata.py +4 -0
  12. invokeai/app/invocations/metadata_linked.py +47 -0
  13. invokeai/app/invocations/model.py +1 -0
  14. invokeai/app/invocations/z_image_denoise.py +8 -3
  15. invokeai/app/invocations/z_image_image_to_latents.py +9 -1
  16. invokeai/app/invocations/z_image_latents_to_image.py +9 -1
  17. invokeai/app/invocations/z_image_seed_variance_enhancer.py +110 -0
  18. invokeai/app/services/config/config_default.py +3 -1
  19. invokeai/app/services/invocation_stats/invocation_stats_common.py +6 -6
  20. invokeai/app/services/invocation_stats/invocation_stats_default.py +9 -4
  21. invokeai/app/services/model_manager/model_manager_default.py +7 -0
  22. invokeai/app/services/model_records/model_records_base.py +4 -2
  23. invokeai/app/services/shared/invocation_context.py +15 -0
  24. invokeai/app/services/shared/sqlite/sqlite_util.py +2 -0
  25. invokeai/app/services/shared/sqlite_migrator/migrations/migration_25.py +61 -0
  26. invokeai/app/util/step_callback.py +42 -0
  27. invokeai/backend/flux/denoise.py +239 -204
  28. invokeai/backend/flux/dype/__init__.py +18 -0
  29. invokeai/backend/flux/dype/base.py +226 -0
  30. invokeai/backend/flux/dype/embed.py +116 -0
  31. invokeai/backend/flux/dype/presets.py +141 -0
  32. invokeai/backend/flux/dype/rope.py +110 -0
  33. invokeai/backend/flux/extensions/dype_extension.py +91 -0
  34. invokeai/backend/flux/util.py +35 -1
  35. invokeai/backend/flux2/__init__.py +4 -0
  36. invokeai/backend/flux2/denoise.py +261 -0
  37. invokeai/backend/flux2/ref_image_extension.py +294 -0
  38. invokeai/backend/flux2/sampling_utils.py +209 -0
  39. invokeai/backend/model_manager/configs/factory.py +19 -1
  40. invokeai/backend/model_manager/configs/main.py +395 -3
  41. invokeai/backend/model_manager/configs/qwen3_encoder.py +116 -7
  42. invokeai/backend/model_manager/configs/vae.py +104 -2
  43. invokeai/backend/model_manager/load/load_default.py +0 -1
  44. invokeai/backend/model_manager/load/model_cache/model_cache.py +107 -2
  45. invokeai/backend/model_manager/load/model_loaders/flux.py +1007 -2
  46. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +0 -1
  47. invokeai/backend/model_manager/load/model_loaders/z_image.py +121 -28
  48. invokeai/backend/model_manager/starter_models.py +128 -0
  49. invokeai/backend/model_manager/taxonomy.py +31 -4
  50. invokeai/backend/model_manager/util/select_hf_files.py +3 -2
  51. invokeai/backend/util/vae_working_memory.py +0 -2
  52. invokeai/frontend/web/dist/assets/App-ClpIJstk.js +161 -0
  53. invokeai/frontend/web/dist/assets/{browser-ponyfill-BP0RxJ4G.js → browser-ponyfill-Cw07u5G1.js} +1 -1
  54. invokeai/frontend/web/dist/assets/{index-B44qKjrs.js → index-DSKM8iGj.js} +69 -69
  55. invokeai/frontend/web/dist/index.html +1 -1
  56. invokeai/frontend/web/dist/locales/en.json +58 -5
  57. invokeai/frontend/web/dist/locales/it.json +2 -1
  58. invokeai/version/invokeai_version.py +1 -1
  59. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/METADATA +7 -1
  60. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/RECORD +66 -49
  61. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/WHEEL +1 -1
  62. invokeai/frontend/web/dist/assets/App-DllqPQ3j.js +0 -161
  63. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/entry_points.txt +0 -0
  64. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE +0 -0
  65. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  66. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  67. {invokeai-6.10.0rc2.dist-info → invokeai-6.11.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,209 @@
1
+ """FLUX.2 Klein Sampling Utilities.
2
+
3
+ FLUX.2 Klein uses a 32-channel VAE (AutoencoderKLFlux2) instead of the 16-channel VAE
4
+ used by FLUX.1. This module provides sampling utilities adapted for FLUX.2.
5
+ """
6
+
7
+ import math
8
+
9
+ import torch
10
+ from einops import rearrange
11
+
12
+
13
+ def get_noise_flux2(
14
+ num_samples: int,
15
+ height: int,
16
+ width: int,
17
+ device: torch.device,
18
+ dtype: torch.dtype,
19
+ seed: int,
20
+ ) -> torch.Tensor:
21
+ """Generate noise for FLUX.2 Klein (32 channels).
22
+
23
+ FLUX.2 uses a 32-channel VAE, so noise must have 32 channels.
24
+ The spatial dimensions are calculated to allow for packing.
25
+
26
+ Args:
27
+ num_samples: Batch size.
28
+ height: Target image height in pixels.
29
+ width: Target image width in pixels.
30
+ device: Target device.
31
+ dtype: Target dtype.
32
+ seed: Random seed.
33
+
34
+ Returns:
35
+ Noise tensor of shape (num_samples, 32, latent_h, latent_w).
36
+ """
37
+ # We always generate noise on the same device and dtype then cast to ensure consistency.
38
+ rand_device = "cpu"
39
+ rand_dtype = torch.float16
40
+
41
+ # FLUX.2 uses 32 latent channels
42
+ # Latent dimensions: height/8, width/8 (from VAE downsampling)
43
+ # Must be divisible by 2 for packing (patchify step)
44
+ latent_h = 2 * math.ceil(height / 16)
45
+ latent_w = 2 * math.ceil(width / 16)
46
+
47
+ return torch.randn(
48
+ num_samples,
49
+ 32, # FLUX.2 uses 32 latent channels (vs 16 for FLUX.1)
50
+ latent_h,
51
+ latent_w,
52
+ device=rand_device,
53
+ dtype=rand_dtype,
54
+ generator=torch.Generator(device=rand_device).manual_seed(seed),
55
+ ).to(device=device, dtype=dtype)
56
+
57
+
58
+ def pack_flux2(x: torch.Tensor) -> torch.Tensor:
59
+ """Pack latent image to flattened array of patch embeddings for FLUX.2.
60
+
61
+ This performs the patchify + pack operation in one step:
62
+ 1. Patchify: Group 2x2 spatial patches into channels (C*4)
63
+ 2. Pack: Flatten spatial dimensions to sequence
64
+
65
+ For 32-channel input: (B, 32, H, W) -> (B, H/2*W/2, 128)
66
+
67
+ Args:
68
+ x: Latent tensor of shape (B, 32, H, W).
69
+
70
+ Returns:
71
+ Packed tensor of shape (B, H/2*W/2, 128).
72
+ """
73
+ # Same operation as FLUX.1 pack, but input has 32 channels -> output has 128
74
+ return rearrange(x, "b c (h ph) (w pw) -> b (h w) (c ph pw)", ph=2, pw=2)
75
+
76
+
77
+ def unpack_flux2(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
78
+ """Unpack flat array of patch embeddings back to latent image for FLUX.2.
79
+
80
+ This reverses the pack_flux2 operation:
81
+ 1. Unpack: Restore spatial dimensions from sequence
82
+ 2. Unpatchify: Restore 32 channels from 128
83
+
84
+ Args:
85
+ x: Packed tensor of shape (B, H/2*W/2, 128).
86
+ height: Target image height in pixels.
87
+ width: Target image width in pixels.
88
+
89
+ Returns:
90
+ Latent tensor of shape (B, 32, H, W).
91
+ """
92
+ # Calculate latent dimensions
93
+ latent_h = 2 * math.ceil(height / 16)
94
+ latent_w = 2 * math.ceil(width / 16)
95
+
96
+ # Packed dimensions (after patchify)
97
+ packed_h = latent_h // 2
98
+ packed_w = latent_w // 2
99
+
100
+ return rearrange(
101
+ x,
102
+ "b (h w) (c ph pw) -> b c (h ph) (w pw)",
103
+ h=packed_h,
104
+ w=packed_w,
105
+ ph=2,
106
+ pw=2,
107
+ )
108
+
109
+
110
+ def compute_empirical_mu(image_seq_len: int, num_steps: int) -> float:
111
+ """Compute empirical mu for FLUX.2 schedule shifting.
112
+
113
+ This matches the diffusers Flux2Pipeline implementation.
114
+ The mu value controls how much the schedule is shifted towards higher timesteps.
115
+
116
+ Args:
117
+ image_seq_len: Number of image tokens (packed_h * packed_w).
118
+ num_steps: Number of denoising steps.
119
+
120
+ Returns:
121
+ The empirical mu value.
122
+ """
123
+ a1, b1 = 8.73809524e-05, 1.89833333
124
+ a2, b2 = 0.00016927, 0.45666666
125
+
126
+ if image_seq_len > 4300:
127
+ mu = a2 * image_seq_len + b2
128
+ return float(mu)
129
+
130
+ m_200 = a2 * image_seq_len + b2
131
+ m_10 = a1 * image_seq_len + b1
132
+
133
+ a = (m_200 - m_10) / 190.0
134
+ b = m_200 - 200.0 * a
135
+ mu = a * num_steps + b
136
+
137
+ return float(mu)
138
+
139
+
140
+ def get_schedule_flux2(
141
+ num_steps: int,
142
+ image_seq_len: int,
143
+ ) -> list[float]:
144
+ """Get linear timestep schedule for FLUX.2.
145
+
146
+ Returns a linear sigma schedule from 1.0 to 1/num_steps.
147
+ The actual schedule shifting is handled by the FlowMatchEulerDiscreteScheduler
148
+ using the mu parameter and use_dynamic_shifting=True.
149
+
150
+ Args:
151
+ num_steps: Number of denoising steps.
152
+ image_seq_len: Number of image tokens (packed_h * packed_w). Currently unused,
153
+ but kept for API compatibility. The scheduler computes shifting internally.
154
+
155
+ Returns:
156
+ List of linear sigmas from 1.0 to 1/num_steps, plus final 0.0.
157
+ """
158
+ import numpy as np
159
+
160
+ # Create linear sigmas from 1.0 to 1/num_steps
161
+ # The scheduler will apply dynamic shifting using mu parameter
162
+ sigmas = np.linspace(1.0, 1 / num_steps, num_steps)
163
+ sigmas_list = [float(s) for s in sigmas]
164
+
165
+ # Add final 0.0 for the last step (scheduler needs n+1 timesteps for n steps)
166
+ sigmas_list.append(0.0)
167
+
168
+ return sigmas_list
169
+
170
+
171
+ def generate_img_ids_flux2(h: int, w: int, batch_size: int, device: torch.device) -> torch.Tensor:
172
+ """Generate tensor of image position ids for FLUX.2.
173
+
174
+ FLUX.2 uses 4D position coordinates (T, H, W, L) for its rotary position embeddings.
175
+ This is different from FLUX.1 which uses 3D coordinates.
176
+
177
+ IMPORTANT: Position IDs must use int64 (long) dtype like diffusers, not bfloat16.
178
+ Using floating point dtype for position IDs can cause NaN in rotary embeddings.
179
+
180
+ Args:
181
+ h: Height of image in latent space.
182
+ w: Width of image in latent space.
183
+ batch_size: Batch size.
184
+ device: Device.
185
+
186
+ Returns:
187
+ Image position ids tensor of shape (batch_size, h/2*w/2, 4) with int64 dtype.
188
+ """
189
+ # After packing, spatial dims are h/2 x w/2
190
+ packed_h = h // 2
191
+ packed_w = w // 2
192
+
193
+ # Create coordinate grids - 4D: (T, H, W, L)
194
+ # T = time/batch index, H = height, W = width, L = layer/channel
195
+ # Use int64 (long) dtype like diffusers
196
+ img_ids = torch.zeros(packed_h, packed_w, 4, device=device, dtype=torch.long)
197
+
198
+ # T (time/batch) coordinate - set to 0 (already initialized)
199
+ # H coordinates
200
+ img_ids[..., 1] = torch.arange(packed_h, device=device, dtype=torch.long)[:, None]
201
+ # W coordinates
202
+ img_ids[..., 2] = torch.arange(packed_w, device=device, dtype=torch.long)[None, :]
203
+ # L (layer) coordinate - set to 0 (already initialized)
204
+
205
+ # Flatten and expand for batch
206
+ img_ids = img_ids.reshape(1, packed_h * packed_w, 4)
207
+ img_ids = img_ids.expand(batch_size, -1, -1)
208
+
209
+ return img_ids
@@ -56,6 +56,7 @@ from invokeai.backend.model_manager.configs.lora import (
56
56
  )
57
57
  from invokeai.backend.model_manager.configs.main import (
58
58
  Main_BnBNF4_FLUX_Config,
59
+ Main_Checkpoint_Flux2_Config,
59
60
  Main_Checkpoint_FLUX_Config,
60
61
  Main_Checkpoint_SD1_Config,
61
62
  Main_Checkpoint_SD2_Config,
@@ -63,12 +64,15 @@ from invokeai.backend.model_manager.configs.main import (
63
64
  Main_Checkpoint_SDXLRefiner_Config,
64
65
  Main_Checkpoint_ZImage_Config,
65
66
  Main_Diffusers_CogView4_Config,
67
+ Main_Diffusers_Flux2_Config,
68
+ Main_Diffusers_FLUX_Config,
66
69
  Main_Diffusers_SD1_Config,
67
70
  Main_Diffusers_SD2_Config,
68
71
  Main_Diffusers_SD3_Config,
69
72
  Main_Diffusers_SDXL_Config,
70
73
  Main_Diffusers_SDXLRefiner_Config,
71
74
  Main_Diffusers_ZImage_Config,
75
+ Main_GGUF_Flux2_Config,
72
76
  Main_GGUF_FLUX_Config,
73
77
  Main_GGUF_ZImage_Config,
74
78
  MainModelDefaultSettings,
@@ -95,10 +99,12 @@ from invokeai.backend.model_manager.configs.textual_inversion import (
95
99
  )
96
100
  from invokeai.backend.model_manager.configs.unknown import Unknown_Config
97
101
  from invokeai.backend.model_manager.configs.vae import (
102
+ VAE_Checkpoint_Flux2_Config,
98
103
  VAE_Checkpoint_FLUX_Config,
99
104
  VAE_Checkpoint_SD1_Config,
100
105
  VAE_Checkpoint_SD2_Config,
101
106
  VAE_Checkpoint_SDXL_Config,
107
+ VAE_Diffusers_Flux2_Config,
102
108
  VAE_Diffusers_SD1_Config,
103
109
  VAE_Diffusers_SDXL_Config,
104
110
  )
@@ -148,17 +154,25 @@ AnyModelConfig = Annotated[
148
154
  Annotated[Main_Diffusers_SDXL_Config, Main_Diffusers_SDXL_Config.get_tag()],
149
155
  Annotated[Main_Diffusers_SDXLRefiner_Config, Main_Diffusers_SDXLRefiner_Config.get_tag()],
150
156
  Annotated[Main_Diffusers_SD3_Config, Main_Diffusers_SD3_Config.get_tag()],
157
+ Annotated[Main_Diffusers_FLUX_Config, Main_Diffusers_FLUX_Config.get_tag()],
158
+ Annotated[Main_Diffusers_Flux2_Config, Main_Diffusers_Flux2_Config.get_tag()],
151
159
  Annotated[Main_Diffusers_CogView4_Config, Main_Diffusers_CogView4_Config.get_tag()],
152
160
  Annotated[Main_Diffusers_ZImage_Config, Main_Diffusers_ZImage_Config.get_tag()],
153
161
  # Main (Pipeline) - checkpoint format
162
+ # IMPORTANT: FLUX.2 must be checked BEFORE FLUX.1 because FLUX.2 has specific validation
163
+ # that will reject FLUX.1 models, but FLUX.1 validation may incorrectly match FLUX.2 models
154
164
  Annotated[Main_Checkpoint_SD1_Config, Main_Checkpoint_SD1_Config.get_tag()],
155
165
  Annotated[Main_Checkpoint_SD2_Config, Main_Checkpoint_SD2_Config.get_tag()],
156
166
  Annotated[Main_Checkpoint_SDXL_Config, Main_Checkpoint_SDXL_Config.get_tag()],
157
167
  Annotated[Main_Checkpoint_SDXLRefiner_Config, Main_Checkpoint_SDXLRefiner_Config.get_tag()],
168
+ Annotated[Main_Checkpoint_Flux2_Config, Main_Checkpoint_Flux2_Config.get_tag()],
158
169
  Annotated[Main_Checkpoint_FLUX_Config, Main_Checkpoint_FLUX_Config.get_tag()],
159
170
  Annotated[Main_Checkpoint_ZImage_Config, Main_Checkpoint_ZImage_Config.get_tag()],
160
171
  # Main (Pipeline) - quantized formats
172
+ # IMPORTANT: FLUX.2 must be checked BEFORE FLUX.1 because FLUX.2 has specific validation
173
+ # that will reject FLUX.1 models, but FLUX.1 validation may incorrectly match FLUX.2 models
161
174
  Annotated[Main_BnBNF4_FLUX_Config, Main_BnBNF4_FLUX_Config.get_tag()],
175
+ Annotated[Main_GGUF_Flux2_Config, Main_GGUF_Flux2_Config.get_tag()],
162
176
  Annotated[Main_GGUF_FLUX_Config, Main_GGUF_FLUX_Config.get_tag()],
163
177
  Annotated[Main_GGUF_ZImage_Config, Main_GGUF_ZImage_Config.get_tag()],
164
178
  # VAE - checkpoint format
@@ -166,9 +180,11 @@ AnyModelConfig = Annotated[
166
180
  Annotated[VAE_Checkpoint_SD2_Config, VAE_Checkpoint_SD2_Config.get_tag()],
167
181
  Annotated[VAE_Checkpoint_SDXL_Config, VAE_Checkpoint_SDXL_Config.get_tag()],
168
182
  Annotated[VAE_Checkpoint_FLUX_Config, VAE_Checkpoint_FLUX_Config.get_tag()],
183
+ Annotated[VAE_Checkpoint_Flux2_Config, VAE_Checkpoint_Flux2_Config.get_tag()],
169
184
  # VAE - diffusers format
170
185
  Annotated[VAE_Diffusers_SD1_Config, VAE_Diffusers_SD1_Config.get_tag()],
171
186
  Annotated[VAE_Diffusers_SDXL_Config, VAE_Diffusers_SDXL_Config.get_tag()],
187
+ Annotated[VAE_Diffusers_Flux2_Config, VAE_Diffusers_Flux2_Config.get_tag()],
172
188
  # ControlNet - checkpoint format
173
189
  Annotated[ControlNet_Checkpoint_SD1_Config, ControlNet_Checkpoint_SD1_Config.get_tag()],
174
190
  Annotated[ControlNet_Checkpoint_SD2_Config, ControlNet_Checkpoint_SD2_Config.get_tag()],
@@ -498,7 +514,9 @@ class ModelConfigFactory:
498
514
  # Now do any post-processing needed for specific model types/bases/etc.
499
515
  match config.type:
500
516
  case ModelType.Main:
501
- config.default_settings = MainModelDefaultSettings.from_base(config.base)
517
+ # Pass variant if available (e.g., for Flux2 models)
518
+ variant = getattr(config, "variant", None)
519
+ config.default_settings = MainModelDefaultSettings.from_base(config.base, variant)
502
520
  case ModelType.ControlNet | ModelType.T2IAdapter | ModelType.ControlLoRa:
503
521
  config.default_settings = ControlAdapterDefaultSettings.from_model_name(config.name)
504
522
  case ModelType.LoRA: