InvokeAI 6.11.0rc1__py3-none-any.whl → 6.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux2_denoise.py +25 -19
- invokeai/app/invocations/flux2_vae_decode.py +0 -14
- invokeai/app/invocations/flux_denoise.py +22 -6
- invokeai/app/invocations/flux_model_loader.py +2 -5
- invokeai/app/util/step_callback.py +52 -38
- invokeai/backend/flux/dype/__init__.py +18 -1
- invokeai/backend/flux/dype/base.py +40 -6
- invokeai/backend/flux/dype/presets.py +97 -35
- invokeai/backend/flux2/denoise.py +33 -6
- invokeai/backend/flux2/sampling_utils.py +19 -22
- invokeai/frontend/web/dist/assets/App-Drro7CYT.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-Cw07u5G1.js → browser-ponyfill-B5E9kN5q.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-DSKM8iGj.js → index-Bp-c_7R4.js} +64 -64
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +21 -1
- invokeai/frontend/web/dist/locales/it.json +135 -16
- invokeai/frontend/web/dist/locales/ru.json +42 -42
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/METADATA +1 -1
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/RECORD +26 -26
- invokeai/frontend/web/dist/assets/App-ClpIJstk.js +0 -161
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/WHEEL +0 -0
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/top_level.txt +0 -0
|
@@ -329,15 +329,13 @@ class Flux2DenoiseInvocation(BaseInvocation):
|
|
|
329
329
|
noise_packed = pack_flux2(noise)
|
|
330
330
|
x = pack_flux2(x)
|
|
331
331
|
|
|
332
|
-
#
|
|
333
|
-
#
|
|
334
|
-
#
|
|
335
|
-
#
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
init_latents_packed = self._bn_normalize(init_latents_packed, bn_mean, bn_std)
|
|
340
|
-
noise_packed = self._bn_normalize(noise_packed, bn_mean, bn_std)
|
|
332
|
+
# BN normalization for txt2img:
|
|
333
|
+
# - DO NOT normalize random noise (it's already N(0,1) distributed)
|
|
334
|
+
# - Diffusers only normalizes image latents from VAE (for img2img/kontext)
|
|
335
|
+
# - Output MUST be denormalized after denoising before VAE decode
|
|
336
|
+
#
|
|
337
|
+
# For img2img with init_latents, we should normalize init_latents on unpacked
|
|
338
|
+
# shape (B, 128, H/16, W/16) - this is handled by _bn_normalize_unpacked below
|
|
341
339
|
|
|
342
340
|
# Verify packed dimensions
|
|
343
341
|
assert packed_h * packed_w == x.shape[1]
|
|
@@ -366,16 +364,24 @@ class Flux2DenoiseInvocation(BaseInvocation):
|
|
|
366
364
|
if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
|
|
367
365
|
# Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
|
|
368
366
|
scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
367
|
+
# FlowMatchHeunDiscreteScheduler only supports num_train_timesteps and shift parameters
|
|
368
|
+
# FlowMatchEulerDiscreteScheduler and FlowMatchLCMScheduler support dynamic shifting
|
|
369
|
+
if self.scheduler == "heun":
|
|
370
|
+
scheduler = scheduler_class(
|
|
371
|
+
num_train_timesteps=1000,
|
|
372
|
+
shift=3.0,
|
|
373
|
+
)
|
|
374
|
+
else:
|
|
375
|
+
scheduler = scheduler_class(
|
|
376
|
+
num_train_timesteps=1000,
|
|
377
|
+
shift=3.0,
|
|
378
|
+
use_dynamic_shifting=True,
|
|
379
|
+
base_shift=0.5,
|
|
380
|
+
max_shift=1.15,
|
|
381
|
+
base_image_seq_len=256,
|
|
382
|
+
max_image_seq_len=4096,
|
|
383
|
+
time_shift_type="exponential",
|
|
384
|
+
)
|
|
379
385
|
|
|
380
386
|
# Prepare reference image extension for FLUX.2 Klein built-in editing
|
|
381
387
|
ref_image_extension = None
|
|
@@ -57,20 +57,6 @@ class Flux2VaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
|
|
|
57
57
|
# Decode using diffusers API
|
|
58
58
|
decoded = vae.decode(latents, return_dict=False)[0]
|
|
59
59
|
|
|
60
|
-
# Debug: Log decoded output statistics
|
|
61
|
-
print(
|
|
62
|
-
f"[FLUX.2 VAE] Decoded output: shape={decoded.shape}, "
|
|
63
|
-
f"min={decoded.min().item():.4f}, max={decoded.max().item():.4f}, "
|
|
64
|
-
f"mean={decoded.mean().item():.4f}"
|
|
65
|
-
)
|
|
66
|
-
# Check per-channel statistics to diagnose color issues
|
|
67
|
-
for c in range(min(3, decoded.shape[1])):
|
|
68
|
-
ch = decoded[0, c]
|
|
69
|
-
print(
|
|
70
|
-
f"[FLUX.2 VAE] Channel {c}: min={ch.min().item():.4f}, "
|
|
71
|
-
f"max={ch.max().item():.4f}, mean={ch.mean().item():.4f}"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
60
|
# Convert from [-1, 1] to [0, 1] then to [0, 255] PIL image
|
|
75
61
|
img = (decoded / 2 + 0.5).clamp(0, 1)
|
|
76
62
|
img = rearrange(img[0], "c h w -> h w c")
|
|
@@ -32,7 +32,12 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
|
32
32
|
from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
|
|
33
33
|
from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
|
|
34
34
|
from invokeai.backend.flux.denoise import denoise
|
|
35
|
-
from invokeai.backend.flux.dype.presets import
|
|
35
|
+
from invokeai.backend.flux.dype.presets import (
|
|
36
|
+
DYPE_PRESET_LABELS,
|
|
37
|
+
DYPE_PRESET_OFF,
|
|
38
|
+
DyPEPreset,
|
|
39
|
+
get_dype_config_from_preset,
|
|
40
|
+
)
|
|
36
41
|
from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
|
|
37
42
|
from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
|
|
38
43
|
from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
|
|
@@ -66,7 +71,7 @@ from invokeai.backend.util.devices import TorchDevice
|
|
|
66
71
|
title="FLUX Denoise",
|
|
67
72
|
tags=["image", "flux"],
|
|
68
73
|
category="image",
|
|
69
|
-
version="4.
|
|
74
|
+
version="4.5.1",
|
|
70
75
|
)
|
|
71
76
|
class FluxDenoiseInvocation(BaseInvocation):
|
|
72
77
|
"""Run denoising process with a FLUX transformer model."""
|
|
@@ -170,20 +175,27 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
170
175
|
|
|
171
176
|
# DyPE (Dynamic Position Extrapolation) for high-resolution generation
|
|
172
177
|
dype_preset: DyPEPreset = InputField(
|
|
173
|
-
default=
|
|
174
|
-
description=
|
|
178
|
+
default=DYPE_PRESET_OFF,
|
|
179
|
+
description=(
|
|
180
|
+
"DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. "
|
|
181
|
+
"'area' enables automatically based on image area. '4k' uses optimized settings for 4K output."
|
|
182
|
+
),
|
|
183
|
+
ui_order=100,
|
|
184
|
+
ui_choice_labels=DYPE_PRESET_LABELS,
|
|
175
185
|
)
|
|
176
186
|
dype_scale: Optional[float] = InputField(
|
|
177
187
|
default=None,
|
|
178
188
|
ge=0.0,
|
|
179
189
|
le=8.0,
|
|
180
190
|
description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
|
|
191
|
+
ui_order=101,
|
|
181
192
|
)
|
|
182
193
|
dype_exponent: Optional[float] = InputField(
|
|
183
194
|
default=None,
|
|
184
195
|
ge=0.0,
|
|
185
196
|
le=1000.0,
|
|
186
197
|
description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
|
|
198
|
+
ui_order=102,
|
|
187
199
|
)
|
|
188
200
|
|
|
189
201
|
@torch.no_grad()
|
|
@@ -464,9 +476,13 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
464
476
|
target_width=self.width,
|
|
465
477
|
)
|
|
466
478
|
context.logger.info(
|
|
467
|
-
f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset
|
|
468
|
-
f"
|
|
479
|
+
f"DyPE enabled: resolution={self.width}x{self.height}, preset={self.dype_preset}, "
|
|
480
|
+
f"method={dype_config.method}, scale={dype_config.dype_scale:.2f}, "
|
|
481
|
+
f"exponent={dype_config.dype_exponent:.2f}, start_sigma={dype_config.dype_start_sigma:.2f}, "
|
|
482
|
+
f"base_resolution={dype_config.base_resolution}"
|
|
469
483
|
)
|
|
484
|
+
else:
|
|
485
|
+
context.logger.debug(f"DyPE disabled: resolution={self.width}x{self.height}, preset={self.dype_preset}")
|
|
470
486
|
|
|
471
487
|
x = denoise(
|
|
472
488
|
model=transformer,
|
|
@@ -6,7 +6,7 @@ from invokeai.app.invocations.baseinvocation import (
|
|
|
6
6
|
invocation,
|
|
7
7
|
invocation_output,
|
|
8
8
|
)
|
|
9
|
-
from invokeai.app.invocations.fields import FieldDescriptions,
|
|
9
|
+
from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
|
|
10
10
|
from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
|
|
11
11
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
12
12
|
from invokeai.app.util.t5_model_identifier import (
|
|
@@ -37,28 +37,25 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
|
|
|
37
37
|
title="Main Model - FLUX",
|
|
38
38
|
tags=["model", "flux"],
|
|
39
39
|
category="model",
|
|
40
|
-
version="1.0.
|
|
40
|
+
version="1.0.7",
|
|
41
41
|
)
|
|
42
42
|
class FluxModelLoaderInvocation(BaseInvocation):
|
|
43
43
|
"""Loads a flux base model, outputting its submodels."""
|
|
44
44
|
|
|
45
45
|
model: ModelIdentifierField = InputField(
|
|
46
46
|
description=FieldDescriptions.flux_model,
|
|
47
|
-
input=Input.Direct,
|
|
48
47
|
ui_model_base=BaseModelType.Flux,
|
|
49
48
|
ui_model_type=ModelType.Main,
|
|
50
49
|
)
|
|
51
50
|
|
|
52
51
|
t5_encoder_model: ModelIdentifierField = InputField(
|
|
53
52
|
description=FieldDescriptions.t5_encoder,
|
|
54
|
-
input=Input.Direct,
|
|
55
53
|
title="T5 Encoder",
|
|
56
54
|
ui_model_type=ModelType.T5Encoder,
|
|
57
55
|
)
|
|
58
56
|
|
|
59
57
|
clip_embed_model: ModelIdentifierField = InputField(
|
|
60
58
|
description=FieldDescriptions.clip_embed_model,
|
|
61
|
-
input=Input.Direct,
|
|
62
59
|
title="CLIP Embed",
|
|
63
60
|
ui_model_type=ModelType.CLIPEmbed,
|
|
64
61
|
)
|
|
@@ -93,54 +93,60 @@ COGVIEW4_LATENT_RGB_FACTORS = [
|
|
|
93
93
|
[-0.00955853, -0.00980067, -0.00977842],
|
|
94
94
|
]
|
|
95
95
|
|
|
96
|
-
# FLUX.2 uses 32 latent channels.
|
|
97
|
-
#
|
|
96
|
+
# FLUX.2 uses 32 latent channels.
|
|
97
|
+
# Factors from ComfyUI: https://github.com/Comfy-Org/ComfyUI/blob/main/comfy/latent_formats.py
|
|
98
98
|
FLUX2_LATENT_RGB_FACTORS = [
|
|
99
99
|
# R G B
|
|
100
|
-
|
|
101
|
-
[0.
|
|
102
|
-
[-0.
|
|
103
|
-
[0.
|
|
104
|
-
[0.
|
|
105
|
-
[
|
|
106
|
-
[0.
|
|
107
|
-
[-0.
|
|
108
|
-
[0.
|
|
109
|
-
[-0.
|
|
110
|
-
[-0.
|
|
111
|
-
[0.
|
|
112
|
-
[
|
|
113
|
-
[0.
|
|
114
|
-
[0.
|
|
115
|
-
[-0.
|
|
116
|
-
[0.
|
|
117
|
-
|
|
118
|
-
[0.
|
|
119
|
-
[0.
|
|
120
|
-
[0.
|
|
121
|
-
[0.
|
|
122
|
-
[0.
|
|
123
|
-
[0.
|
|
124
|
-
[0.
|
|
125
|
-
[0.
|
|
126
|
-
[0.
|
|
127
|
-
[0.
|
|
128
|
-
[0.
|
|
129
|
-
[0.
|
|
130
|
-
[0.
|
|
131
|
-
[0.
|
|
132
|
-
[0.0, 0.0, 0.0],
|
|
133
|
-
[0.0, 0.0, 0.0],
|
|
100
|
+
[0.0058, 0.0113, 0.0073],
|
|
101
|
+
[0.0495, 0.0443, 0.0836],
|
|
102
|
+
[-0.0099, 0.0096, 0.0644],
|
|
103
|
+
[0.2144, 0.3009, 0.3652],
|
|
104
|
+
[0.0166, -0.0039, -0.0054],
|
|
105
|
+
[0.0157, 0.0103, -0.0160],
|
|
106
|
+
[-0.0398, 0.0902, -0.0235],
|
|
107
|
+
[-0.0052, 0.0095, 0.0109],
|
|
108
|
+
[-0.3527, -0.2712, -0.1666],
|
|
109
|
+
[-0.0301, -0.0356, -0.0180],
|
|
110
|
+
[-0.0107, 0.0078, 0.0013],
|
|
111
|
+
[0.0746, 0.0090, -0.0941],
|
|
112
|
+
[0.0156, 0.0169, 0.0070],
|
|
113
|
+
[-0.0034, -0.0040, -0.0114],
|
|
114
|
+
[0.0032, 0.0181, 0.0080],
|
|
115
|
+
[-0.0939, -0.0008, 0.0186],
|
|
116
|
+
[0.0018, 0.0043, 0.0104],
|
|
117
|
+
[0.0284, 0.0056, -0.0127],
|
|
118
|
+
[-0.0024, -0.0022, -0.0030],
|
|
119
|
+
[0.1207, -0.0026, 0.0065],
|
|
120
|
+
[0.0128, 0.0101, 0.0142],
|
|
121
|
+
[0.0137, -0.0072, -0.0007],
|
|
122
|
+
[0.0095, 0.0092, -0.0059],
|
|
123
|
+
[0.0000, -0.0077, -0.0049],
|
|
124
|
+
[-0.0465, -0.0204, -0.0312],
|
|
125
|
+
[0.0095, 0.0012, -0.0066],
|
|
126
|
+
[0.0290, -0.0034, 0.0025],
|
|
127
|
+
[0.0220, 0.0169, -0.0048],
|
|
128
|
+
[-0.0332, -0.0457, -0.0468],
|
|
129
|
+
[-0.0085, 0.0389, 0.0609],
|
|
130
|
+
[-0.0076, 0.0003, -0.0043],
|
|
131
|
+
[-0.0111, -0.0460, -0.0614],
|
|
134
132
|
]
|
|
135
133
|
|
|
134
|
+
FLUX2_LATENT_RGB_BIAS = [-0.0329, -0.0718, -0.0851]
|
|
135
|
+
|
|
136
136
|
|
|
137
137
|
def sample_to_lowres_estimated_image(
|
|
138
|
-
samples: torch.Tensor,
|
|
138
|
+
samples: torch.Tensor,
|
|
139
|
+
latent_rgb_factors: torch.Tensor,
|
|
140
|
+
smooth_matrix: Optional[torch.Tensor] = None,
|
|
141
|
+
latent_rgb_bias: Optional[torch.Tensor] = None,
|
|
139
142
|
):
|
|
140
143
|
if samples.dim() == 4:
|
|
141
144
|
samples = samples[0]
|
|
142
145
|
latent_image = samples.permute(1, 2, 0) @ latent_rgb_factors
|
|
143
146
|
|
|
147
|
+
if latent_rgb_bias is not None:
|
|
148
|
+
latent_image = latent_image + latent_rgb_bias
|
|
149
|
+
|
|
144
150
|
if smooth_matrix is not None:
|
|
145
151
|
latent_image = latent_image.unsqueeze(0).permute(3, 0, 1, 2)
|
|
146
152
|
latent_image = torch.nn.functional.conv2d(latent_image, smooth_matrix.reshape((1, 1, 3, 3)), padding=1)
|
|
@@ -193,6 +199,7 @@ def diffusion_step_callback(
|
|
|
193
199
|
sample = intermediate_state.latents
|
|
194
200
|
|
|
195
201
|
smooth_matrix: list[list[float]] | None = None
|
|
202
|
+
latent_rgb_bias: list[float] | None = None
|
|
196
203
|
if base_model in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
|
|
197
204
|
latent_rgb_factors = SD1_5_LATENT_RGB_FACTORS
|
|
198
205
|
elif base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
|
|
@@ -206,6 +213,7 @@ def diffusion_step_callback(
|
|
|
206
213
|
latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
|
|
207
214
|
elif base_model == BaseModelType.Flux2:
|
|
208
215
|
latent_rgb_factors = FLUX2_LATENT_RGB_FACTORS
|
|
216
|
+
latent_rgb_bias = FLUX2_LATENT_RGB_BIAS
|
|
209
217
|
elif base_model == BaseModelType.ZImage:
|
|
210
218
|
# Z-Image uses FLUX-compatible VAE with 16 latent channels
|
|
211
219
|
latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
|
|
@@ -216,8 +224,14 @@ def diffusion_step_callback(
|
|
|
216
224
|
smooth_matrix_torch = (
|
|
217
225
|
torch.tensor(smooth_matrix, dtype=sample.dtype, device=sample.device) if smooth_matrix else None
|
|
218
226
|
)
|
|
227
|
+
latent_rgb_bias_torch = (
|
|
228
|
+
torch.tensor(latent_rgb_bias, dtype=sample.dtype, device=sample.device) if latent_rgb_bias else None
|
|
229
|
+
)
|
|
219
230
|
image = sample_to_lowres_estimated_image(
|
|
220
|
-
samples=sample,
|
|
231
|
+
samples=sample,
|
|
232
|
+
latent_rgb_factors=latent_rgb_factors_torch,
|
|
233
|
+
smooth_matrix=smooth_matrix_torch,
|
|
234
|
+
latent_rgb_bias=latent_rgb_bias_torch,
|
|
221
235
|
)
|
|
222
236
|
|
|
223
237
|
width = image.width * 8
|
|
@@ -8,11 +8,28 @@ Based on: https://github.com/wildminder/ComfyUI-DyPE
|
|
|
8
8
|
|
|
9
9
|
from invokeai.backend.flux.dype.base import DyPEConfig
|
|
10
10
|
from invokeai.backend.flux.dype.embed import DyPEEmbedND
|
|
11
|
-
from invokeai.backend.flux.dype.presets import
|
|
11
|
+
from invokeai.backend.flux.dype.presets import (
|
|
12
|
+
DYPE_PRESET_4K,
|
|
13
|
+
DYPE_PRESET_AREA,
|
|
14
|
+
DYPE_PRESET_AUTO,
|
|
15
|
+
DYPE_PRESET_LABELS,
|
|
16
|
+
DYPE_PRESET_MANUAL,
|
|
17
|
+
DYPE_PRESET_OFF,
|
|
18
|
+
DyPEPreset,
|
|
19
|
+
get_dype_config_for_area,
|
|
20
|
+
get_dype_config_for_resolution,
|
|
21
|
+
)
|
|
12
22
|
|
|
13
23
|
__all__ = [
|
|
14
24
|
"DyPEConfig",
|
|
15
25
|
"DyPEEmbedND",
|
|
16
26
|
"DyPEPreset",
|
|
27
|
+
"DYPE_PRESET_OFF",
|
|
28
|
+
"DYPE_PRESET_MANUAL",
|
|
29
|
+
"DYPE_PRESET_AUTO",
|
|
30
|
+
"DYPE_PRESET_AREA",
|
|
31
|
+
"DYPE_PRESET_4K",
|
|
32
|
+
"DYPE_PRESET_LABELS",
|
|
33
|
+
"get_dype_config_for_area",
|
|
17
34
|
"get_dype_config_for_resolution",
|
|
18
35
|
]
|
|
@@ -99,13 +99,17 @@ def compute_vision_yarn_freqs(
|
|
|
99
99
|
The NTK-aware approach smoothly interpolates frequencies to cover larger
|
|
100
100
|
position ranges without breaking the attention patterns.
|
|
101
101
|
|
|
102
|
+
DyPE (Dynamic Position Extrapolation) modulates the NTK scaling based on
|
|
103
|
+
the current timestep - stronger extrapolation in early steps (global structure),
|
|
104
|
+
weaker in late steps (fine details).
|
|
105
|
+
|
|
102
106
|
Args:
|
|
103
107
|
pos: Position tensor
|
|
104
108
|
dim: Embedding dimension
|
|
105
109
|
theta: RoPE base frequency
|
|
106
110
|
scale_h: Height scaling factor
|
|
107
111
|
scale_w: Width scaling factor
|
|
108
|
-
current_sigma: Current noise level (
|
|
112
|
+
current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
|
|
109
113
|
dype_config: DyPE configuration
|
|
110
114
|
|
|
111
115
|
Returns:
|
|
@@ -124,7 +128,24 @@ def compute_vision_yarn_freqs(
|
|
|
124
128
|
# This increases the wavelength of position encodings proportionally
|
|
125
129
|
if scale > 1.0:
|
|
126
130
|
ntk_alpha = scale ** (dim / (dim - 2))
|
|
127
|
-
|
|
131
|
+
|
|
132
|
+
# Apply timestep-dependent DyPE modulation
|
|
133
|
+
# mscale controls how strongly we apply the NTK extrapolation
|
|
134
|
+
# Early steps (high sigma): stronger extrapolation for global structure
|
|
135
|
+
# Late steps (low sigma): weaker extrapolation for fine details
|
|
136
|
+
mscale = get_timestep_mscale(
|
|
137
|
+
scale=scale,
|
|
138
|
+
current_sigma=current_sigma,
|
|
139
|
+
dype_scale=dype_config.dype_scale,
|
|
140
|
+
dype_exponent=dype_config.dype_exponent,
|
|
141
|
+
dype_start_sigma=dype_config.dype_start_sigma,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Modulate NTK alpha by mscale
|
|
145
|
+
# When mscale > 1: interpolate towards stronger extrapolation
|
|
146
|
+
# When mscale = 1: use base NTK alpha
|
|
147
|
+
modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
|
|
148
|
+
scaled_theta = theta * modulated_alpha
|
|
128
149
|
else:
|
|
129
150
|
scaled_theta = theta
|
|
130
151
|
|
|
@@ -151,14 +172,15 @@ def compute_yarn_freqs(
|
|
|
151
172
|
) -> tuple[Tensor, Tensor]:
|
|
152
173
|
"""Compute RoPE frequencies using YARN/NTK method.
|
|
153
174
|
|
|
154
|
-
Uses NTK-aware theta scaling for high-resolution support
|
|
175
|
+
Uses NTK-aware theta scaling for high-resolution support with
|
|
176
|
+
timestep-dependent DyPE modulation.
|
|
155
177
|
|
|
156
178
|
Args:
|
|
157
179
|
pos: Position tensor
|
|
158
180
|
dim: Embedding dimension
|
|
159
181
|
theta: RoPE base frequency
|
|
160
182
|
scale: Uniform scaling factor
|
|
161
|
-
current_sigma: Current noise level (
|
|
183
|
+
current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
|
|
162
184
|
dype_config: DyPE configuration
|
|
163
185
|
|
|
164
186
|
Returns:
|
|
@@ -169,10 +191,22 @@ def compute_yarn_freqs(
|
|
|
169
191
|
device = pos.device
|
|
170
192
|
dtype = torch.float64 if device.type != "mps" else torch.float32
|
|
171
193
|
|
|
172
|
-
# NTK-aware theta scaling
|
|
194
|
+
# NTK-aware theta scaling with DyPE modulation
|
|
173
195
|
if scale > 1.0:
|
|
174
196
|
ntk_alpha = scale ** (dim / (dim - 2))
|
|
175
|
-
|
|
197
|
+
|
|
198
|
+
# Apply timestep-dependent DyPE modulation
|
|
199
|
+
mscale = get_timestep_mscale(
|
|
200
|
+
scale=scale,
|
|
201
|
+
current_sigma=current_sigma,
|
|
202
|
+
dype_scale=dype_config.dype_scale,
|
|
203
|
+
dype_exponent=dype_config.dype_exponent,
|
|
204
|
+
dype_start_sigma=dype_config.dype_start_sigma,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# Modulate NTK alpha by mscale
|
|
208
|
+
modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
|
|
209
|
+
scaled_theta = theta * modulated_alpha
|
|
176
210
|
else:
|
|
177
211
|
scaled_theta = theta
|
|
178
212
|
|
|
@@ -1,17 +1,29 @@
|
|
|
1
1
|
"""DyPE presets and automatic configuration."""
|
|
2
2
|
|
|
3
|
+
import math
|
|
3
4
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
5
|
+
from typing import Literal
|
|
5
6
|
|
|
6
7
|
from invokeai.backend.flux.dype.base import DyPEConfig
|
|
7
8
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
9
|
+
# DyPE preset type - using Literal for proper frontend dropdown support
|
|
10
|
+
DyPEPreset = Literal["off", "manual", "auto", "area", "4k"]
|
|
11
|
+
|
|
12
|
+
# Constants for preset values
|
|
13
|
+
DYPE_PRESET_OFF: DyPEPreset = "off"
|
|
14
|
+
DYPE_PRESET_MANUAL: DyPEPreset = "manual"
|
|
15
|
+
DYPE_PRESET_AUTO: DyPEPreset = "auto"
|
|
16
|
+
DYPE_PRESET_AREA: DyPEPreset = "area"
|
|
17
|
+
DYPE_PRESET_4K: DyPEPreset = "4k"
|
|
18
|
+
|
|
19
|
+
# Human-readable labels for the UI
|
|
20
|
+
DYPE_PRESET_LABELS: dict[str, str] = {
|
|
21
|
+
"off": "Off",
|
|
22
|
+
"manual": "Manual",
|
|
23
|
+
"auto": "Auto (>1536px)",
|
|
24
|
+
"area": "Area (auto)",
|
|
25
|
+
"4k": "4K Optimized",
|
|
26
|
+
}
|
|
15
27
|
|
|
16
28
|
|
|
17
29
|
@dataclass
|
|
@@ -27,7 +39,7 @@ class DyPEPresetConfig:
|
|
|
27
39
|
|
|
28
40
|
# Predefined preset configurations
|
|
29
41
|
DYPE_PRESETS: dict[DyPEPreset, DyPEPresetConfig] = {
|
|
30
|
-
|
|
42
|
+
DYPE_PRESET_4K: DyPEPresetConfig(
|
|
31
43
|
base_resolution=1024,
|
|
32
44
|
method="vision_yarn",
|
|
33
45
|
dype_scale=2.0,
|
|
@@ -79,6 +91,50 @@ def get_dype_config_for_resolution(
|
|
|
79
91
|
)
|
|
80
92
|
|
|
81
93
|
|
|
94
|
+
def get_dype_config_for_area(
|
|
95
|
+
width: int,
|
|
96
|
+
height: int,
|
|
97
|
+
base_resolution: int = 1024,
|
|
98
|
+
) -> DyPEConfig | None:
|
|
99
|
+
"""Automatically determine DyPE config based on target area.
|
|
100
|
+
|
|
101
|
+
Uses sqrt(area/base_area) as an effective side-length ratio.
|
|
102
|
+
DyPE is enabled only when target area exceeds base area.
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
DyPEConfig if DyPE should be enabled, None otherwise
|
|
106
|
+
"""
|
|
107
|
+
area = width * height
|
|
108
|
+
base_area = base_resolution**2
|
|
109
|
+
|
|
110
|
+
if area <= base_area:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
area_ratio = area / base_area
|
|
114
|
+
effective_side_ratio = math.sqrt(area_ratio) # 1.0 at base, 2.0 at 2K (if base is 1K)
|
|
115
|
+
|
|
116
|
+
# Strength: 0 at base area, 8 at sat_area, clamped thereafter.
|
|
117
|
+
sat_area = 2027520 # Determined by experimentation where a vertical line appears
|
|
118
|
+
sat_side_ratio = math.sqrt(sat_area / base_area)
|
|
119
|
+
dynamic_dype_scale = 8.0 * (effective_side_ratio - 1.0) / (sat_side_ratio - 1.0)
|
|
120
|
+
dynamic_dype_scale = max(0.0, min(dynamic_dype_scale, 8.0))
|
|
121
|
+
|
|
122
|
+
# Continuous exponent schedule:
|
|
123
|
+
# r=1 -> 0.5, r=2 -> 1.0, r=4 -> 2.0 (exact), smoothly varying in between.
|
|
124
|
+
x = math.log2(effective_side_ratio)
|
|
125
|
+
dype_exponent = 0.25 * (x**2) + 0.25 * x + 0.5
|
|
126
|
+
dype_exponent = max(0.5, min(dype_exponent, 2.0))
|
|
127
|
+
|
|
128
|
+
return DyPEConfig(
|
|
129
|
+
enable_dype=True,
|
|
130
|
+
base_resolution=base_resolution,
|
|
131
|
+
method="vision_yarn",
|
|
132
|
+
dype_scale=dynamic_dype_scale,
|
|
133
|
+
dype_exponent=dype_exponent,
|
|
134
|
+
dype_start_sigma=1.0,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
82
138
|
def get_dype_config_from_preset(
|
|
83
139
|
preset: DyPEPreset,
|
|
84
140
|
width: int,
|
|
@@ -92,41 +148,47 @@ def get_dype_config_from_preset(
|
|
|
92
148
|
preset: The DyPE preset to use
|
|
93
149
|
width: Target image width
|
|
94
150
|
height: Target image height
|
|
95
|
-
custom_scale: Optional custom dype_scale (
|
|
96
|
-
custom_exponent: Optional custom dype_exponent (
|
|
151
|
+
custom_scale: Optional custom dype_scale (only used with 'manual' preset)
|
|
152
|
+
custom_exponent: Optional custom dype_exponent (only used with 'manual' preset)
|
|
97
153
|
|
|
98
154
|
Returns:
|
|
99
155
|
DyPEConfig if DyPE should be enabled, None otherwise
|
|
100
156
|
"""
|
|
101
|
-
if preset ==
|
|
102
|
-
# Check if custom values are provided even with preset=OFF
|
|
103
|
-
if custom_scale is not None:
|
|
104
|
-
return DyPEConfig(
|
|
105
|
-
enable_dype=True,
|
|
106
|
-
base_resolution=1024,
|
|
107
|
-
method="vision_yarn",
|
|
108
|
-
dype_scale=custom_scale,
|
|
109
|
-
dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
|
|
110
|
-
dype_start_sigma=1.0,
|
|
111
|
-
)
|
|
157
|
+
if preset == DYPE_PRESET_OFF:
|
|
112
158
|
return None
|
|
113
159
|
|
|
114
|
-
if preset ==
|
|
115
|
-
|
|
160
|
+
if preset == DYPE_PRESET_MANUAL:
|
|
161
|
+
# Manual mode - custom values can override defaults
|
|
162
|
+
max_dim = max(width, height)
|
|
163
|
+
scale = max_dim / 1024
|
|
164
|
+
dynamic_dype_scale = min(2.0 * scale, 8.0)
|
|
165
|
+
return DyPEConfig(
|
|
166
|
+
enable_dype=True,
|
|
167
|
+
base_resolution=1024,
|
|
168
|
+
method="vision_yarn",
|
|
169
|
+
dype_scale=custom_scale if custom_scale is not None else dynamic_dype_scale,
|
|
170
|
+
dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
|
|
171
|
+
dype_start_sigma=1.0,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
if preset == DYPE_PRESET_AUTO:
|
|
175
|
+
# Auto preset - custom values are ignored
|
|
176
|
+
return get_dype_config_for_resolution(
|
|
116
177
|
width=width,
|
|
117
178
|
height=height,
|
|
118
179
|
base_resolution=1024,
|
|
119
180
|
activation_threshold=1536,
|
|
120
181
|
)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
182
|
+
|
|
183
|
+
if preset == DYPE_PRESET_AREA:
|
|
184
|
+
# Area-based preset - custom values are ignored
|
|
185
|
+
return get_dype_config_for_area(
|
|
186
|
+
width=width,
|
|
187
|
+
height=height,
|
|
188
|
+
base_resolution=1024,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Use preset configuration (4K etc.) - custom values are ignored
|
|
130
192
|
preset_config = DYPE_PRESETS.get(preset)
|
|
131
193
|
if preset_config is None:
|
|
132
194
|
return None
|
|
@@ -135,7 +197,7 @@ def get_dype_config_from_preset(
|
|
|
135
197
|
enable_dype=True,
|
|
136
198
|
base_resolution=preset_config.base_resolution,
|
|
137
199
|
method=preset_config.method,
|
|
138
|
-
dype_scale=
|
|
139
|
-
dype_exponent=
|
|
200
|
+
dype_scale=preset_config.dype_scale,
|
|
201
|
+
dype_exponent=preset_config.dype_exponent,
|
|
140
202
|
dype_start_sigma=preset_config.dype_start_sigma,
|
|
141
203
|
)
|