InvokeAI 6.11.0__py3-none-any.whl → 6.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -329,15 +329,13 @@ class Flux2DenoiseInvocation(BaseInvocation):
329
329
  noise_packed = pack_flux2(noise)
330
330
  x = pack_flux2(x)
331
331
 
332
- # Apply BN normalization BEFORE denoising (as per diffusers Flux2KleinPipeline)
333
- # BN normalization: y = (x - mean) / std
334
- # This transforms latents to normalized space for the transformer
335
- # IMPORTANT: Also normalize init_latents and noise for inpainting to maintain consistency
336
- if bn_mean is not None and bn_std is not None:
337
- x = self._bn_normalize(x, bn_mean, bn_std)
338
- if init_latents_packed is not None:
339
- init_latents_packed = self._bn_normalize(init_latents_packed, bn_mean, bn_std)
340
- noise_packed = self._bn_normalize(noise_packed, bn_mean, bn_std)
332
+ # BN normalization for txt2img:
333
+ # - DO NOT normalize random noise (it's already N(0,1) distributed)
334
+ # - Diffusers only normalizes image latents from VAE (for img2img/kontext)
335
+ # - Output MUST be denormalized after denoising before VAE decode
336
+ #
337
+ # For img2img with init_latents, we should normalize init_latents on unpacked
338
+ # shape (B, 128, H/16, W/16) - this is handled by _bn_normalize_unpacked below
341
339
 
342
340
  # Verify packed dimensions
343
341
  assert packed_h * packed_w == x.shape[1]
@@ -366,16 +364,24 @@ class Flux2DenoiseInvocation(BaseInvocation):
366
364
  if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
367
365
  # Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
368
366
  scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
369
- scheduler = scheduler_class(
370
- num_train_timesteps=1000,
371
- shift=3.0,
372
- use_dynamic_shifting=True,
373
- base_shift=0.5,
374
- max_shift=1.15,
375
- base_image_seq_len=256,
376
- max_image_seq_len=4096,
377
- time_shift_type="exponential",
378
- )
367
+ # FlowMatchHeunDiscreteScheduler only supports num_train_timesteps and shift parameters
368
+ # FlowMatchEulerDiscreteScheduler and FlowMatchLCMScheduler support dynamic shifting
369
+ if self.scheduler == "heun":
370
+ scheduler = scheduler_class(
371
+ num_train_timesteps=1000,
372
+ shift=3.0,
373
+ )
374
+ else:
375
+ scheduler = scheduler_class(
376
+ num_train_timesteps=1000,
377
+ shift=3.0,
378
+ use_dynamic_shifting=True,
379
+ base_shift=0.5,
380
+ max_shift=1.15,
381
+ base_image_seq_len=256,
382
+ max_image_seq_len=4096,
383
+ time_shift_type="exponential",
384
+ )
379
385
 
380
386
  # Prepare reference image extension for FLUX.2 Klein built-in editing
381
387
  ref_image_extension = None
@@ -57,20 +57,6 @@ class Flux2VaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
57
57
  # Decode using diffusers API
58
58
  decoded = vae.decode(latents, return_dict=False)[0]
59
59
 
60
- # Debug: Log decoded output statistics
61
- print(
62
- f"[FLUX.2 VAE] Decoded output: shape={decoded.shape}, "
63
- f"min={decoded.min().item():.4f}, max={decoded.max().item():.4f}, "
64
- f"mean={decoded.mean().item():.4f}"
65
- )
66
- # Check per-channel statistics to diagnose color issues
67
- for c in range(min(3, decoded.shape[1])):
68
- ch = decoded[0, c]
69
- print(
70
- f"[FLUX.2 VAE] Channel {c}: min={ch.min().item():.4f}, "
71
- f"max={ch.max().item():.4f}, mean={ch.mean().item():.4f}"
72
- )
73
-
74
60
  # Convert from [-1, 1] to [0, 1] then to [0, 255] PIL image
75
61
  img = (decoded / 2 + 0.5).clamp(0, 1)
76
62
  img = rearrange(img[0], "c h w -> h w c")
@@ -71,7 +71,7 @@ from invokeai.backend.util.devices import TorchDevice
71
71
  title="FLUX Denoise",
72
72
  tags=["image", "flux"],
73
73
  category="image",
74
- version="4.5.0",
74
+ version="4.5.1",
75
75
  )
76
76
  class FluxDenoiseInvocation(BaseInvocation):
77
77
  """Run denoising process with a FLUX transformer model."""
@@ -176,7 +176,10 @@ class FluxDenoiseInvocation(BaseInvocation):
176
176
  # DyPE (Dynamic Position Extrapolation) for high-resolution generation
177
177
  dype_preset: DyPEPreset = InputField(
178
178
  default=DYPE_PRESET_OFF,
179
- description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
179
+ description=(
180
+ "DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. "
181
+ "'area' enables automatically based on image area. '4k' uses optimized settings for 4K output."
182
+ ),
180
183
  ui_order=100,
181
184
  ui_choice_labels=DYPE_PRESET_LABELS,
182
185
  )
@@ -10,11 +10,13 @@ from invokeai.backend.flux.dype.base import DyPEConfig
10
10
  from invokeai.backend.flux.dype.embed import DyPEEmbedND
11
11
  from invokeai.backend.flux.dype.presets import (
12
12
  DYPE_PRESET_4K,
13
+ DYPE_PRESET_AREA,
13
14
  DYPE_PRESET_AUTO,
14
15
  DYPE_PRESET_LABELS,
15
16
  DYPE_PRESET_MANUAL,
16
17
  DYPE_PRESET_OFF,
17
18
  DyPEPreset,
19
+ get_dype_config_for_area,
18
20
  get_dype_config_for_resolution,
19
21
  )
20
22
 
@@ -25,7 +27,9 @@ __all__ = [
25
27
  "DYPE_PRESET_OFF",
26
28
  "DYPE_PRESET_MANUAL",
27
29
  "DYPE_PRESET_AUTO",
30
+ "DYPE_PRESET_AREA",
28
31
  "DYPE_PRESET_4K",
29
32
  "DYPE_PRESET_LABELS",
33
+ "get_dype_config_for_area",
30
34
  "get_dype_config_for_resolution",
31
35
  ]
@@ -1,17 +1,19 @@
1
1
  """DyPE presets and automatic configuration."""
2
2
 
3
+ import math
3
4
  from dataclasses import dataclass
4
5
  from typing import Literal
5
6
 
6
7
  from invokeai.backend.flux.dype.base import DyPEConfig
7
8
 
8
9
  # DyPE preset type - using Literal for proper frontend dropdown support
9
- DyPEPreset = Literal["off", "manual", "auto", "4k"]
10
+ DyPEPreset = Literal["off", "manual", "auto", "area", "4k"]
10
11
 
11
12
  # Constants for preset values
12
13
  DYPE_PRESET_OFF: DyPEPreset = "off"
13
14
  DYPE_PRESET_MANUAL: DyPEPreset = "manual"
14
15
  DYPE_PRESET_AUTO: DyPEPreset = "auto"
16
+ DYPE_PRESET_AREA: DyPEPreset = "area"
15
17
  DYPE_PRESET_4K: DyPEPreset = "4k"
16
18
 
17
19
  # Human-readable labels for the UI
@@ -19,6 +21,7 @@ DYPE_PRESET_LABELS: dict[str, str] = {
19
21
  "off": "Off",
20
22
  "manual": "Manual",
21
23
  "auto": "Auto (>1536px)",
24
+ "area": "Area (auto)",
22
25
  "4k": "4K Optimized",
23
26
  }
24
27
 
@@ -88,6 +91,50 @@ def get_dype_config_for_resolution(
88
91
  )
89
92
 
90
93
 
94
+ def get_dype_config_for_area(
95
+ width: int,
96
+ height: int,
97
+ base_resolution: int = 1024,
98
+ ) -> DyPEConfig | None:
99
+ """Automatically determine DyPE config based on target area.
100
+
101
+ Uses sqrt(area/base_area) as an effective side-length ratio.
102
+ DyPE is enabled only when target area exceeds base area.
103
+
104
+ Returns:
105
+ DyPEConfig if DyPE should be enabled, None otherwise
106
+ """
107
+ area = width * height
108
+ base_area = base_resolution**2
109
+
110
+ if area <= base_area:
111
+ return None
112
+
113
+ area_ratio = area / base_area
114
+ effective_side_ratio = math.sqrt(area_ratio) # 1.0 at base, 2.0 at 2K (if base is 1K)
115
+
116
+ # Strength: 0 at base area, 8 at sat_area, clamped thereafter.
117
+ sat_area = 2027520 # Determined by experimentation where a vertical line appears
118
+ sat_side_ratio = math.sqrt(sat_area / base_area)
119
+ dynamic_dype_scale = 8.0 * (effective_side_ratio - 1.0) / (sat_side_ratio - 1.0)
120
+ dynamic_dype_scale = max(0.0, min(dynamic_dype_scale, 8.0))
121
+
122
+ # Continuous exponent schedule:
123
+ # r=1 -> 0.5, r=2 -> 1.0, r=4 -> 2.0 (exact), smoothly varying in between.
124
+ x = math.log2(effective_side_ratio)
125
+ dype_exponent = 0.25 * (x**2) + 0.25 * x + 0.5
126
+ dype_exponent = max(0.5, min(dype_exponent, 2.0))
127
+
128
+ return DyPEConfig(
129
+ enable_dype=True,
130
+ base_resolution=base_resolution,
131
+ method="vision_yarn",
132
+ dype_scale=dynamic_dype_scale,
133
+ dype_exponent=dype_exponent,
134
+ dype_start_sigma=1.0,
135
+ )
136
+
137
+
91
138
  def get_dype_config_from_preset(
92
139
  preset: DyPEPreset,
93
140
  width: int,
@@ -133,6 +180,14 @@ def get_dype_config_from_preset(
133
180
  activation_threshold=1536,
134
181
  )
135
182
 
183
+ if preset == DYPE_PRESET_AREA:
184
+ # Area-based preset - custom values are ignored
185
+ return get_dype_config_for_area(
186
+ width=width,
187
+ height=height,
188
+ base_resolution=1024,
189
+ )
190
+
136
191
  # Use preset configuration (4K etc.) - custom values are ignored
137
192
  preset_config = DYPE_PRESETS.get(preset)
138
193
  if preset_config is None:
@@ -4,6 +4,7 @@ This module provides the denoising function for FLUX.2 Klein models,
4
4
  which use Qwen3 as the text encoder instead of CLIP+T5.
5
5
  """
6
6
 
7
+ import inspect
7
8
  import math
8
9
  from typing import Any, Callable
9
10
 
@@ -87,11 +88,18 @@ def denoise(
87
88
  # The scheduler will apply dynamic shifting internally using mu (if enabled in scheduler config)
88
89
  sigmas = np.array(timesteps[:-1], dtype=np.float32) # Exclude final 0.0
89
90
 
90
- # Pass mu if provided - it will only be used if scheduler has use_dynamic_shifting=True
91
- if mu is not None:
91
+ # Check if scheduler supports sigmas parameter using inspect.signature
92
+ # FlowMatchHeunDiscreteScheduler and FlowMatchLCMScheduler don't support sigmas
93
+ set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
94
+ supports_sigmas = "sigmas" in set_timesteps_sig.parameters
95
+ if supports_sigmas and mu is not None:
96
+ # Pass mu if provided - it will only be used if scheduler has use_dynamic_shifting=True
92
97
  scheduler.set_timesteps(sigmas=sigmas.tolist(), mu=mu, device=img.device)
93
- else:
98
+ elif supports_sigmas:
94
99
  scheduler.set_timesteps(sigmas=sigmas.tolist(), device=img.device)
100
+ else:
101
+ # Scheduler doesn't support sigmas (e.g., Heun, LCM) - use num_inference_steps
102
+ scheduler.set_timesteps(num_inference_steps=len(sigmas), device=img.device)
95
103
  num_scheduler_steps = len(scheduler.timesteps)
96
104
  is_heun = hasattr(scheduler, "state_in_first_order")
97
105
  user_step = 0
@@ -108,33 +108,27 @@ def unpack_flux2(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
108
108
 
109
109
 
110
110
  def compute_empirical_mu(image_seq_len: int, num_steps: int) -> float:
111
- """Compute empirical mu for FLUX.2 schedule shifting.
111
+ """Compute mu for FLUX.2 schedule shifting.
112
112
 
113
- This matches the diffusers Flux2Pipeline implementation.
114
- The mu value controls how much the schedule is shifted towards higher timesteps.
113
+ Uses a fixed mu value of 2.02, matching ComfyUI's proven FLUX.2 configuration.
114
+
115
+ The previous implementation (from diffusers' FLUX.1 pipeline) computed mu as a
116
+ linear function of image_seq_len, which produced excessively high values at
117
+ high resolutions (e.g., mu=3.23 at 2048x2048). This over-shifted the sigma
118
+ schedule, compressing almost all values above 0.9 and forcing the model to
119
+ denoise everything in the final 1-2 steps, causing severe grid/diamond artifacts.
120
+
121
+ ComfyUI uses a fixed shift=2.02 for FLUX.2 Klein at all resolutions and produces
122
+ artifact-free images even at 2048x2048.
115
123
 
116
124
  Args:
117
- image_seq_len: Number of image tokens (packed_h * packed_w).
118
- num_steps: Number of denoising steps.
125
+ image_seq_len: Number of image tokens (packed_h * packed_w). Currently unused.
126
+ num_steps: Number of denoising steps. Currently unused.
119
127
 
120
128
  Returns:
121
- The empirical mu value.
129
+ The mu value (fixed at 2.02).
122
130
  """
123
- a1, b1 = 8.73809524e-05, 1.89833333
124
- a2, b2 = 0.00016927, 0.45666666
125
-
126
- if image_seq_len > 4300:
127
- mu = a2 * image_seq_len + b2
128
- return float(mu)
129
-
130
- m_200 = a2 * image_seq_len + b2
131
- m_10 = a1 * image_seq_len + b1
132
-
133
- a = (m_200 - m_10) / 190.0
134
- b = m_200 - 200.0 * a
135
- mu = a * num_steps + b
136
-
137
- return float(mu)
131
+ return 2.02
138
132
 
139
133
 
140
134
  def get_schedule_flux2(
@@ -169,11 +163,14 @@ def get_schedule_flux2(
169
163
 
170
164
 
171
165
  def generate_img_ids_flux2(h: int, w: int, batch_size: int, device: torch.device) -> torch.Tensor:
172
- """Generate tensor of image position ids for FLUX.2.
166
+ """Generate tensor of image position ids for FLUX.2 with RoPE scaling.
173
167
 
174
168
  FLUX.2 uses 4D position coordinates (T, H, W, L) for its rotary position embeddings.
175
169
  This is different from FLUX.1 which uses 3D coordinates.
176
170
 
171
+ RoPE Scaling: For resolutions >1536x1536, position IDs are scaled down using
172
+ Position Interpolation to prevent RoPE degradation and diamond/grid artifacts.
173
+
177
174
  IMPORTANT: Position IDs must use int64 (long) dtype like diffusers, not bfloat16.
178
175
  Using floating point dtype for position IDs can cause NaN in rotary embeddings.
179
176