InvokeAI 6.11.0__py3-none-any.whl → 6.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux2_denoise.py +25 -19
- invokeai/app/invocations/flux2_vae_decode.py +0 -14
- invokeai/app/invocations/flux_denoise.py +5 -2
- invokeai/backend/flux/dype/__init__.py +4 -0
- invokeai/backend/flux/dype/presets.py +56 -1
- invokeai/backend/flux2/denoise.py +11 -3
- invokeai/backend/flux2/sampling_utils.py +19 -22
- invokeai/frontend/web/dist/assets/{App-D13dX7be.js → App-Drro7CYT.js} +39 -39
- invokeai/frontend/web/dist/assets/{browser-ponyfill-u_ZjhQTI.js → browser-ponyfill-B5E9kN5q.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-BB0nHmDe.js → index-Bp-c_7R4.js} +39 -39
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/ru.json +31 -31
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/METADATA +1 -1
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/RECORD +21 -21
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/WHEEL +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.1.dist-info}/top_level.txt +0 -0
|
@@ -329,15 +329,13 @@ class Flux2DenoiseInvocation(BaseInvocation):
|
|
|
329
329
|
noise_packed = pack_flux2(noise)
|
|
330
330
|
x = pack_flux2(x)
|
|
331
331
|
|
|
332
|
-
#
|
|
333
|
-
#
|
|
334
|
-
#
|
|
335
|
-
#
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
init_latents_packed = self._bn_normalize(init_latents_packed, bn_mean, bn_std)
|
|
340
|
-
noise_packed = self._bn_normalize(noise_packed, bn_mean, bn_std)
|
|
332
|
+
# BN normalization for txt2img:
|
|
333
|
+
# - DO NOT normalize random noise (it's already N(0,1) distributed)
|
|
334
|
+
# - Diffusers only normalizes image latents from VAE (for img2img/kontext)
|
|
335
|
+
# - Output MUST be denormalized after denoising before VAE decode
|
|
336
|
+
#
|
|
337
|
+
# For img2img with init_latents, we should normalize init_latents on unpacked
|
|
338
|
+
# shape (B, 128, H/16, W/16) - this is handled by _bn_normalize_unpacked below
|
|
341
339
|
|
|
342
340
|
# Verify packed dimensions
|
|
343
341
|
assert packed_h * packed_w == x.shape[1]
|
|
@@ -366,16 +364,24 @@ class Flux2DenoiseInvocation(BaseInvocation):
|
|
|
366
364
|
if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
|
|
367
365
|
# Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
|
|
368
366
|
scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
367
|
+
# FlowMatchHeunDiscreteScheduler only supports num_train_timesteps and shift parameters
|
|
368
|
+
# FlowMatchEulerDiscreteScheduler and FlowMatchLCMScheduler support dynamic shifting
|
|
369
|
+
if self.scheduler == "heun":
|
|
370
|
+
scheduler = scheduler_class(
|
|
371
|
+
num_train_timesteps=1000,
|
|
372
|
+
shift=3.0,
|
|
373
|
+
)
|
|
374
|
+
else:
|
|
375
|
+
scheduler = scheduler_class(
|
|
376
|
+
num_train_timesteps=1000,
|
|
377
|
+
shift=3.0,
|
|
378
|
+
use_dynamic_shifting=True,
|
|
379
|
+
base_shift=0.5,
|
|
380
|
+
max_shift=1.15,
|
|
381
|
+
base_image_seq_len=256,
|
|
382
|
+
max_image_seq_len=4096,
|
|
383
|
+
time_shift_type="exponential",
|
|
384
|
+
)
|
|
379
385
|
|
|
380
386
|
# Prepare reference image extension for FLUX.2 Klein built-in editing
|
|
381
387
|
ref_image_extension = None
|
|
@@ -57,20 +57,6 @@ class Flux2VaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
|
|
|
57
57
|
# Decode using diffusers API
|
|
58
58
|
decoded = vae.decode(latents, return_dict=False)[0]
|
|
59
59
|
|
|
60
|
-
# Debug: Log decoded output statistics
|
|
61
|
-
print(
|
|
62
|
-
f"[FLUX.2 VAE] Decoded output: shape={decoded.shape}, "
|
|
63
|
-
f"min={decoded.min().item():.4f}, max={decoded.max().item():.4f}, "
|
|
64
|
-
f"mean={decoded.mean().item():.4f}"
|
|
65
|
-
)
|
|
66
|
-
# Check per-channel statistics to diagnose color issues
|
|
67
|
-
for c in range(min(3, decoded.shape[1])):
|
|
68
|
-
ch = decoded[0, c]
|
|
69
|
-
print(
|
|
70
|
-
f"[FLUX.2 VAE] Channel {c}: min={ch.min().item():.4f}, "
|
|
71
|
-
f"max={ch.max().item():.4f}, mean={ch.mean().item():.4f}"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
60
|
# Convert from [-1, 1] to [0, 1] then to [0, 255] PIL image
|
|
75
61
|
img = (decoded / 2 + 0.5).clamp(0, 1)
|
|
76
62
|
img = rearrange(img[0], "c h w -> h w c")
|
|
@@ -71,7 +71,7 @@ from invokeai.backend.util.devices import TorchDevice
|
|
|
71
71
|
title="FLUX Denoise",
|
|
72
72
|
tags=["image", "flux"],
|
|
73
73
|
category="image",
|
|
74
|
-
version="4.5.
|
|
74
|
+
version="4.5.1",
|
|
75
75
|
)
|
|
76
76
|
class FluxDenoiseInvocation(BaseInvocation):
|
|
77
77
|
"""Run denoising process with a FLUX transformer model."""
|
|
@@ -176,7 +176,10 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
176
176
|
# DyPE (Dynamic Position Extrapolation) for high-resolution generation
|
|
177
177
|
dype_preset: DyPEPreset = InputField(
|
|
178
178
|
default=DYPE_PRESET_OFF,
|
|
179
|
-
description=
|
|
179
|
+
description=(
|
|
180
|
+
"DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. "
|
|
181
|
+
"'area' enables automatically based on image area. '4k' uses optimized settings for 4K output."
|
|
182
|
+
),
|
|
180
183
|
ui_order=100,
|
|
181
184
|
ui_choice_labels=DYPE_PRESET_LABELS,
|
|
182
185
|
)
|
|
@@ -10,11 +10,13 @@ from invokeai.backend.flux.dype.base import DyPEConfig
|
|
|
10
10
|
from invokeai.backend.flux.dype.embed import DyPEEmbedND
|
|
11
11
|
from invokeai.backend.flux.dype.presets import (
|
|
12
12
|
DYPE_PRESET_4K,
|
|
13
|
+
DYPE_PRESET_AREA,
|
|
13
14
|
DYPE_PRESET_AUTO,
|
|
14
15
|
DYPE_PRESET_LABELS,
|
|
15
16
|
DYPE_PRESET_MANUAL,
|
|
16
17
|
DYPE_PRESET_OFF,
|
|
17
18
|
DyPEPreset,
|
|
19
|
+
get_dype_config_for_area,
|
|
18
20
|
get_dype_config_for_resolution,
|
|
19
21
|
)
|
|
20
22
|
|
|
@@ -25,7 +27,9 @@ __all__ = [
|
|
|
25
27
|
"DYPE_PRESET_OFF",
|
|
26
28
|
"DYPE_PRESET_MANUAL",
|
|
27
29
|
"DYPE_PRESET_AUTO",
|
|
30
|
+
"DYPE_PRESET_AREA",
|
|
28
31
|
"DYPE_PRESET_4K",
|
|
29
32
|
"DYPE_PRESET_LABELS",
|
|
33
|
+
"get_dype_config_for_area",
|
|
30
34
|
"get_dype_config_for_resolution",
|
|
31
35
|
]
|
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
"""DyPE presets and automatic configuration."""
|
|
2
2
|
|
|
3
|
+
import math
|
|
3
4
|
from dataclasses import dataclass
|
|
4
5
|
from typing import Literal
|
|
5
6
|
|
|
6
7
|
from invokeai.backend.flux.dype.base import DyPEConfig
|
|
7
8
|
|
|
8
9
|
# DyPE preset type - using Literal for proper frontend dropdown support
|
|
9
|
-
DyPEPreset = Literal["off", "manual", "auto", "4k"]
|
|
10
|
+
DyPEPreset = Literal["off", "manual", "auto", "area", "4k"]
|
|
10
11
|
|
|
11
12
|
# Constants for preset values
|
|
12
13
|
DYPE_PRESET_OFF: DyPEPreset = "off"
|
|
13
14
|
DYPE_PRESET_MANUAL: DyPEPreset = "manual"
|
|
14
15
|
DYPE_PRESET_AUTO: DyPEPreset = "auto"
|
|
16
|
+
DYPE_PRESET_AREA: DyPEPreset = "area"
|
|
15
17
|
DYPE_PRESET_4K: DyPEPreset = "4k"
|
|
16
18
|
|
|
17
19
|
# Human-readable labels for the UI
|
|
@@ -19,6 +21,7 @@ DYPE_PRESET_LABELS: dict[str, str] = {
|
|
|
19
21
|
"off": "Off",
|
|
20
22
|
"manual": "Manual",
|
|
21
23
|
"auto": "Auto (>1536px)",
|
|
24
|
+
"area": "Area (auto)",
|
|
22
25
|
"4k": "4K Optimized",
|
|
23
26
|
}
|
|
24
27
|
|
|
@@ -88,6 +91,50 @@ def get_dype_config_for_resolution(
|
|
|
88
91
|
)
|
|
89
92
|
|
|
90
93
|
|
|
94
|
+
def get_dype_config_for_area(
|
|
95
|
+
width: int,
|
|
96
|
+
height: int,
|
|
97
|
+
base_resolution: int = 1024,
|
|
98
|
+
) -> DyPEConfig | None:
|
|
99
|
+
"""Automatically determine DyPE config based on target area.
|
|
100
|
+
|
|
101
|
+
Uses sqrt(area/base_area) as an effective side-length ratio.
|
|
102
|
+
DyPE is enabled only when target area exceeds base area.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
DyPEConfig if DyPE should be enabled, None otherwise
|
|
106
|
+
"""
|
|
107
|
+
area = width * height
|
|
108
|
+
base_area = base_resolution**2
|
|
109
|
+
|
|
110
|
+
if area <= base_area:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
area_ratio = area / base_area
|
|
114
|
+
effective_side_ratio = math.sqrt(area_ratio) # 1.0 at base, 2.0 at 2K (if base is 1K)
|
|
115
|
+
|
|
116
|
+
# Strength: 0 at base area, 8 at sat_area, clamped thereafter.
|
|
117
|
+
sat_area = 2027520 # Determined by experimentation where a vertical line appears
|
|
118
|
+
sat_side_ratio = math.sqrt(sat_area / base_area)
|
|
119
|
+
dynamic_dype_scale = 8.0 * (effective_side_ratio - 1.0) / (sat_side_ratio - 1.0)
|
|
120
|
+
dynamic_dype_scale = max(0.0, min(dynamic_dype_scale, 8.0))
|
|
121
|
+
|
|
122
|
+
# Continuous exponent schedule:
|
|
123
|
+
# r=1 -> 0.5, r=2 -> 1.0, r=4 -> 2.0 (exact), smoothly varying in between.
|
|
124
|
+
x = math.log2(effective_side_ratio)
|
|
125
|
+
dype_exponent = 0.25 * (x**2) + 0.25 * x + 0.5
|
|
126
|
+
dype_exponent = max(0.5, min(dype_exponent, 2.0))
|
|
127
|
+
|
|
128
|
+
return DyPEConfig(
|
|
129
|
+
enable_dype=True,
|
|
130
|
+
base_resolution=base_resolution,
|
|
131
|
+
method="vision_yarn",
|
|
132
|
+
dype_scale=dynamic_dype_scale,
|
|
133
|
+
dype_exponent=dype_exponent,
|
|
134
|
+
dype_start_sigma=1.0,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
91
138
|
def get_dype_config_from_preset(
|
|
92
139
|
preset: DyPEPreset,
|
|
93
140
|
width: int,
|
|
@@ -133,6 +180,14 @@ def get_dype_config_from_preset(
|
|
|
133
180
|
activation_threshold=1536,
|
|
134
181
|
)
|
|
135
182
|
|
|
183
|
+
if preset == DYPE_PRESET_AREA:
|
|
184
|
+
# Area-based preset - custom values are ignored
|
|
185
|
+
return get_dype_config_for_area(
|
|
186
|
+
width=width,
|
|
187
|
+
height=height,
|
|
188
|
+
base_resolution=1024,
|
|
189
|
+
)
|
|
190
|
+
|
|
136
191
|
# Use preset configuration (4K etc.) - custom values are ignored
|
|
137
192
|
preset_config = DYPE_PRESETS.get(preset)
|
|
138
193
|
if preset_config is None:
|
|
@@ -4,6 +4,7 @@ This module provides the denoising function for FLUX.2 Klein models,
|
|
|
4
4
|
which use Qwen3 as the text encoder instead of CLIP+T5.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
import inspect
|
|
7
8
|
import math
|
|
8
9
|
from typing import Any, Callable
|
|
9
10
|
|
|
@@ -87,11 +88,18 @@ def denoise(
|
|
|
87
88
|
# The scheduler will apply dynamic shifting internally using mu (if enabled in scheduler config)
|
|
88
89
|
sigmas = np.array(timesteps[:-1], dtype=np.float32) # Exclude final 0.0
|
|
89
90
|
|
|
90
|
-
#
|
|
91
|
-
|
|
91
|
+
# Check if scheduler supports sigmas parameter using inspect.signature
|
|
92
|
+
# FlowMatchHeunDiscreteScheduler and FlowMatchLCMScheduler don't support sigmas
|
|
93
|
+
set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
|
|
94
|
+
supports_sigmas = "sigmas" in set_timesteps_sig.parameters
|
|
95
|
+
if supports_sigmas and mu is not None:
|
|
96
|
+
# Pass mu if provided - it will only be used if scheduler has use_dynamic_shifting=True
|
|
92
97
|
scheduler.set_timesteps(sigmas=sigmas.tolist(), mu=mu, device=img.device)
|
|
93
|
-
|
|
98
|
+
elif supports_sigmas:
|
|
94
99
|
scheduler.set_timesteps(sigmas=sigmas.tolist(), device=img.device)
|
|
100
|
+
else:
|
|
101
|
+
# Scheduler doesn't support sigmas (e.g., Heun, LCM) - use num_inference_steps
|
|
102
|
+
scheduler.set_timesteps(num_inference_steps=len(sigmas), device=img.device)
|
|
95
103
|
num_scheduler_steps = len(scheduler.timesteps)
|
|
96
104
|
is_heun = hasattr(scheduler, "state_in_first_order")
|
|
97
105
|
user_step = 0
|
|
@@ -108,33 +108,27 @@ def unpack_flux2(x: torch.Tensor, height: int, width: int) -> torch.Tensor:
|
|
|
108
108
|
|
|
109
109
|
|
|
110
110
|
def compute_empirical_mu(image_seq_len: int, num_steps: int) -> float:
|
|
111
|
-
"""Compute
|
|
111
|
+
"""Compute mu for FLUX.2 schedule shifting.
|
|
112
112
|
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
Uses a fixed mu value of 2.02, matching ComfyUI's proven FLUX.2 configuration.
|
|
114
|
+
|
|
115
|
+
The previous implementation (from diffusers' FLUX.1 pipeline) computed mu as a
|
|
116
|
+
linear function of image_seq_len, which produced excessively high values at
|
|
117
|
+
high resolutions (e.g., mu=3.23 at 2048x2048). This over-shifted the sigma
|
|
118
|
+
schedule, compressing almost all values above 0.9 and forcing the model to
|
|
119
|
+
denoise everything in the final 1-2 steps, causing severe grid/diamond artifacts.
|
|
120
|
+
|
|
121
|
+
ComfyUI uses a fixed shift=2.02 for FLUX.2 Klein at all resolutions and produces
|
|
122
|
+
artifact-free images even at 2048x2048.
|
|
115
123
|
|
|
116
124
|
Args:
|
|
117
|
-
image_seq_len: Number of image tokens (packed_h * packed_w).
|
|
118
|
-
num_steps: Number of denoising steps.
|
|
125
|
+
image_seq_len: Number of image tokens (packed_h * packed_w). Currently unused.
|
|
126
|
+
num_steps: Number of denoising steps. Currently unused.
|
|
119
127
|
|
|
120
128
|
Returns:
|
|
121
|
-
The
|
|
129
|
+
The mu value (fixed at 2.02).
|
|
122
130
|
"""
|
|
123
|
-
|
|
124
|
-
a2, b2 = 0.00016927, 0.45666666
|
|
125
|
-
|
|
126
|
-
if image_seq_len > 4300:
|
|
127
|
-
mu = a2 * image_seq_len + b2
|
|
128
|
-
return float(mu)
|
|
129
|
-
|
|
130
|
-
m_200 = a2 * image_seq_len + b2
|
|
131
|
-
m_10 = a1 * image_seq_len + b1
|
|
132
|
-
|
|
133
|
-
a = (m_200 - m_10) / 190.0
|
|
134
|
-
b = m_200 - 200.0 * a
|
|
135
|
-
mu = a * num_steps + b
|
|
136
|
-
|
|
137
|
-
return float(mu)
|
|
131
|
+
return 2.02
|
|
138
132
|
|
|
139
133
|
|
|
140
134
|
def get_schedule_flux2(
|
|
@@ -169,11 +163,14 @@ def get_schedule_flux2(
|
|
|
169
163
|
|
|
170
164
|
|
|
171
165
|
def generate_img_ids_flux2(h: int, w: int, batch_size: int, device: torch.device) -> torch.Tensor:
|
|
172
|
-
"""Generate tensor of image position ids for FLUX.2.
|
|
166
|
+
"""Generate tensor of image position ids for FLUX.2 with RoPE scaling.
|
|
173
167
|
|
|
174
168
|
FLUX.2 uses 4D position coordinates (T, H, W, L) for its rotary position embeddings.
|
|
175
169
|
This is different from FLUX.1 which uses 3D coordinates.
|
|
176
170
|
|
|
171
|
+
RoPE Scaling: For resolutions >1536x1536, position IDs are scaled down using
|
|
172
|
+
Position Interpolation to prevent RoPE degradation and diamond/grid artifacts.
|
|
173
|
+
|
|
177
174
|
IMPORTANT: Position IDs must use int64 (long) dtype like diffusers, not bfloat16.
|
|
178
175
|
Using floating point dtype for position IDs can cause NaN in rotary embeddings.
|
|
179
176
|
|