InvokeAI 6.11.0rc1__py3-none-any.whl → 6.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. invokeai/app/invocations/flux2_denoise.py +25 -19
  2. invokeai/app/invocations/flux2_vae_decode.py +0 -14
  3. invokeai/app/invocations/flux_denoise.py +22 -6
  4. invokeai/app/invocations/flux_model_loader.py +2 -5
  5. invokeai/app/util/step_callback.py +52 -38
  6. invokeai/backend/flux/dype/__init__.py +18 -1
  7. invokeai/backend/flux/dype/base.py +40 -6
  8. invokeai/backend/flux/dype/presets.py +97 -35
  9. invokeai/backend/flux2/denoise.py +33 -6
  10. invokeai/backend/flux2/sampling_utils.py +19 -22
  11. invokeai/frontend/web/dist/assets/App-Drro7CYT.js +161 -0
  12. invokeai/frontend/web/dist/assets/{browser-ponyfill-Cw07u5G1.js → browser-ponyfill-B5E9kN5q.js} +1 -1
  13. invokeai/frontend/web/dist/assets/{index-DSKM8iGj.js → index-Bp-c_7R4.js} +64 -64
  14. invokeai/frontend/web/dist/index.html +1 -1
  15. invokeai/frontend/web/dist/locales/en.json +21 -1
  16. invokeai/frontend/web/dist/locales/it.json +135 -16
  17. invokeai/frontend/web/dist/locales/ru.json +42 -42
  18. invokeai/version/invokeai_version.py +1 -1
  19. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/METADATA +1 -1
  20. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/RECORD +26 -26
  21. invokeai/frontend/web/dist/assets/App-ClpIJstk.js +0 -161
  22. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/WHEEL +0 -0
  23. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/entry_points.txt +0 -0
  24. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE +0 -0
  25. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  26. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  27. {invokeai-6.11.0rc1.dist-info → invokeai-6.11.1.dist-info}/top_level.txt +0 -0
@@ -329,15 +329,13 @@ class Flux2DenoiseInvocation(BaseInvocation):
329
329
  noise_packed = pack_flux2(noise)
330
330
  x = pack_flux2(x)
331
331
 
332
- # Apply BN normalization BEFORE denoising (as per diffusers Flux2KleinPipeline)
333
- # BN normalization: y = (x - mean) / std
334
- # This transforms latents to normalized space for the transformer
335
- # IMPORTANT: Also normalize init_latents and noise for inpainting to maintain consistency
336
- if bn_mean is not None and bn_std is not None:
337
- x = self._bn_normalize(x, bn_mean, bn_std)
338
- if init_latents_packed is not None:
339
- init_latents_packed = self._bn_normalize(init_latents_packed, bn_mean, bn_std)
340
- noise_packed = self._bn_normalize(noise_packed, bn_mean, bn_std)
332
+ # BN normalization for txt2img:
333
+ # - DO NOT normalize random noise (it's already N(0,1) distributed)
334
+ # - Diffusers only normalizes image latents from VAE (for img2img/kontext)
335
+ # - Output MUST be denormalized after denoising before VAE decode
336
+ #
337
+ # For img2img with init_latents, we should normalize init_latents on unpacked
338
+ # shape (B, 128, H/16, W/16) - this is handled by _bn_normalize_unpacked below
341
339
 
342
340
  # Verify packed dimensions
343
341
  assert packed_h * packed_w == x.shape[1]
@@ -366,16 +364,24 @@ class Flux2DenoiseInvocation(BaseInvocation):
366
364
  if self.scheduler in FLUX_SCHEDULER_MAP and not is_inpainting:
367
365
  # Only use scheduler for txt2img - use manual Euler for inpainting to preserve exact timesteps
368
366
  scheduler_class = FLUX_SCHEDULER_MAP[self.scheduler]
369
- scheduler = scheduler_class(
370
- num_train_timesteps=1000,
371
- shift=3.0,
372
- use_dynamic_shifting=True,
373
- base_shift=0.5,
374
- max_shift=1.15,
375
- base_image_seq_len=256,
376
- max_image_seq_len=4096,
377
- time_shift_type="exponential",
378
- )
367
+ # FlowMatchHeunDiscreteScheduler only supports num_train_timesteps and shift parameters
368
+ # FlowMatchEulerDiscreteScheduler and FlowMatchLCMScheduler support dynamic shifting
369
+ if self.scheduler == "heun":
370
+ scheduler = scheduler_class(
371
+ num_train_timesteps=1000,
372
+ shift=3.0,
373
+ )
374
+ else:
375
+ scheduler = scheduler_class(
376
+ num_train_timesteps=1000,
377
+ shift=3.0,
378
+ use_dynamic_shifting=True,
379
+ base_shift=0.5,
380
+ max_shift=1.15,
381
+ base_image_seq_len=256,
382
+ max_image_seq_len=4096,
383
+ time_shift_type="exponential",
384
+ )
379
385
 
380
386
  # Prepare reference image extension for FLUX.2 Klein built-in editing
381
387
  ref_image_extension = None
@@ -57,20 +57,6 @@ class Flux2VaeDecodeInvocation(BaseInvocation, WithMetadata, WithBoard):
57
57
  # Decode using diffusers API
58
58
  decoded = vae.decode(latents, return_dict=False)[0]
59
59
 
60
- # Debug: Log decoded output statistics
61
- print(
62
- f"[FLUX.2 VAE] Decoded output: shape={decoded.shape}, "
63
- f"min={decoded.min().item():.4f}, max={decoded.max().item():.4f}, "
64
- f"mean={decoded.mean().item():.4f}"
65
- )
66
- # Check per-channel statistics to diagnose color issues
67
- for c in range(min(3, decoded.shape[1])):
68
- ch = decoded[0, c]
69
- print(
70
- f"[FLUX.2 VAE] Channel {c}: min={ch.min().item():.4f}, "
71
- f"max={ch.max().item():.4f}, mean={ch.mean().item():.4f}"
72
- )
73
-
74
60
  # Convert from [-1, 1] to [0, 1] then to [0, 255] PIL image
75
61
  img = (decoded / 2 + 0.5).clamp(0, 1)
76
62
  img = rearrange(img[0], "c h w -> h w c")
@@ -32,7 +32,12 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
32
32
  from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
33
33
  from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
34
34
  from invokeai.backend.flux.denoise import denoise
35
- from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_from_preset
35
+ from invokeai.backend.flux.dype.presets import (
36
+ DYPE_PRESET_LABELS,
37
+ DYPE_PRESET_OFF,
38
+ DyPEPreset,
39
+ get_dype_config_from_preset,
40
+ )
36
41
  from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
37
42
  from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
38
43
  from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
@@ -66,7 +71,7 @@ from invokeai.backend.util.devices import TorchDevice
66
71
  title="FLUX Denoise",
67
72
  tags=["image", "flux"],
68
73
  category="image",
69
- version="4.3.0",
74
+ version="4.5.1",
70
75
  )
71
76
  class FluxDenoiseInvocation(BaseInvocation):
72
77
  """Run denoising process with a FLUX transformer model."""
@@ -170,20 +175,27 @@ class FluxDenoiseInvocation(BaseInvocation):
170
175
 
171
176
  # DyPE (Dynamic Position Extrapolation) for high-resolution generation
172
177
  dype_preset: DyPEPreset = InputField(
173
- default=DyPEPreset.OFF,
174
- description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
178
+ default=DYPE_PRESET_OFF,
179
+ description=(
180
+ "DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. "
181
+ "'area' enables automatically based on image area. '4k' uses optimized settings for 4K output."
182
+ ),
183
+ ui_order=100,
184
+ ui_choice_labels=DYPE_PRESET_LABELS,
175
185
  )
176
186
  dype_scale: Optional[float] = InputField(
177
187
  default=None,
178
188
  ge=0.0,
179
189
  le=8.0,
180
190
  description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
191
+ ui_order=101,
181
192
  )
182
193
  dype_exponent: Optional[float] = InputField(
183
194
  default=None,
184
195
  ge=0.0,
185
196
  le=1000.0,
186
197
  description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
198
+ ui_order=102,
187
199
  )
188
200
 
189
201
  @torch.no_grad()
@@ -464,9 +476,13 @@ class FluxDenoiseInvocation(BaseInvocation):
464
476
  target_width=self.width,
465
477
  )
466
478
  context.logger.info(
467
- f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset.value}, "
468
- f"scale={dype_config.dype_scale:.2f}, method={dype_config.method}"
479
+ f"DyPE enabled: resolution={self.width}x{self.height}, preset={self.dype_preset}, "
480
+ f"method={dype_config.method}, scale={dype_config.dype_scale:.2f}, "
481
+ f"exponent={dype_config.dype_exponent:.2f}, start_sigma={dype_config.dype_start_sigma:.2f}, "
482
+ f"base_resolution={dype_config.base_resolution}"
469
483
  )
484
+ else:
485
+ context.logger.debug(f"DyPE disabled: resolution={self.width}x{self.height}, preset={self.dype_preset}")
470
486
 
471
487
  x = denoise(
472
488
  model=transformer,
@@ -6,7 +6,7 @@ from invokeai.app.invocations.baseinvocation import (
6
6
  invocation,
7
7
  invocation_output,
8
8
  )
9
- from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField
9
+ from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
10
10
  from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
11
11
  from invokeai.app.services.shared.invocation_context import InvocationContext
12
12
  from invokeai.app.util.t5_model_identifier import (
@@ -37,28 +37,25 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
37
37
  title="Main Model - FLUX",
38
38
  tags=["model", "flux"],
39
39
  category="model",
40
- version="1.0.6",
40
+ version="1.0.7",
41
41
  )
42
42
  class FluxModelLoaderInvocation(BaseInvocation):
43
43
  """Loads a flux base model, outputting its submodels."""
44
44
 
45
45
  model: ModelIdentifierField = InputField(
46
46
  description=FieldDescriptions.flux_model,
47
- input=Input.Direct,
48
47
  ui_model_base=BaseModelType.Flux,
49
48
  ui_model_type=ModelType.Main,
50
49
  )
51
50
 
52
51
  t5_encoder_model: ModelIdentifierField = InputField(
53
52
  description=FieldDescriptions.t5_encoder,
54
- input=Input.Direct,
55
53
  title="T5 Encoder",
56
54
  ui_model_type=ModelType.T5Encoder,
57
55
  )
58
56
 
59
57
  clip_embed_model: ModelIdentifierField = InputField(
60
58
  description=FieldDescriptions.clip_embed_model,
61
- input=Input.Direct,
62
59
  title="CLIP Embed",
63
60
  ui_model_type=ModelType.CLIPEmbed,
64
61
  )
@@ -93,54 +93,60 @@ COGVIEW4_LATENT_RGB_FACTORS = [
93
93
  [-0.00955853, -0.00980067, -0.00977842],
94
94
  ]
95
95
 
96
- # FLUX.2 uses 32 latent channels. Since we don't have proper factors yet,
97
- # we extend FLUX factors with zeros for preview approximation.
96
+ # FLUX.2 uses 32 latent channels.
97
+ # Factors from ComfyUI: https://github.com/Comfy-Org/ComfyUI/blob/main/comfy/latent_formats.py
98
98
  FLUX2_LATENT_RGB_FACTORS = [
99
99
  # R G B
100
- # First 16 channels (from FLUX)
101
- [0.0118, 0.0024, 0.0017],
102
- [-0.0074, -0.0108, -0.0003],
103
- [0.0056, 0.0291, 0.0768],
104
- [0.0342, -0.0681, -0.0427],
105
- [-0.0258, 0.0092, 0.0463],
106
- [0.0863, 0.0784, 0.0547],
107
- [-0.0017, 0.0402, 0.0158],
108
- [0.0501, 0.1058, 0.1152],
109
- [-0.0209, -0.0218, -0.0329],
110
- [-0.0314, 0.0083, 0.0896],
111
- [0.0851, 0.0665, -0.0472],
112
- [-0.0534, 0.0238, -0.0024],
113
- [0.0452, -0.0026, 0.0048],
114
- [0.0892, 0.0831, 0.0881],
115
- [-0.1117, -0.0304, -0.0789],
116
- [0.0027, -0.0479, -0.0043],
117
- # Additional 16 channels (zeros as placeholder)
118
- [0.0, 0.0, 0.0],
119
- [0.0, 0.0, 0.0],
120
- [0.0, 0.0, 0.0],
121
- [0.0, 0.0, 0.0],
122
- [0.0, 0.0, 0.0],
123
- [0.0, 0.0, 0.0],
124
- [0.0, 0.0, 0.0],
125
- [0.0, 0.0, 0.0],
126
- [0.0, 0.0, 0.0],
127
- [0.0, 0.0, 0.0],
128
- [0.0, 0.0, 0.0],
129
- [0.0, 0.0, 0.0],
130
- [0.0, 0.0, 0.0],
131
- [0.0, 0.0, 0.0],
132
- [0.0, 0.0, 0.0],
133
- [0.0, 0.0, 0.0],
100
+ [0.0058, 0.0113, 0.0073],
101
+ [0.0495, 0.0443, 0.0836],
102
+ [-0.0099, 0.0096, 0.0644],
103
+ [0.2144, 0.3009, 0.3652],
104
+ [0.0166, -0.0039, -0.0054],
105
+ [0.0157, 0.0103, -0.0160],
106
+ [-0.0398, 0.0902, -0.0235],
107
+ [-0.0052, 0.0095, 0.0109],
108
+ [-0.3527, -0.2712, -0.1666],
109
+ [-0.0301, -0.0356, -0.0180],
110
+ [-0.0107, 0.0078, 0.0013],
111
+ [0.0746, 0.0090, -0.0941],
112
+ [0.0156, 0.0169, 0.0070],
113
+ [-0.0034, -0.0040, -0.0114],
114
+ [0.0032, 0.0181, 0.0080],
115
+ [-0.0939, -0.0008, 0.0186],
116
+ [0.0018, 0.0043, 0.0104],
117
+ [0.0284, 0.0056, -0.0127],
118
+ [-0.0024, -0.0022, -0.0030],
119
+ [0.1207, -0.0026, 0.0065],
120
+ [0.0128, 0.0101, 0.0142],
121
+ [0.0137, -0.0072, -0.0007],
122
+ [0.0095, 0.0092, -0.0059],
123
+ [0.0000, -0.0077, -0.0049],
124
+ [-0.0465, -0.0204, -0.0312],
125
+ [0.0095, 0.0012, -0.0066],
126
+ [0.0290, -0.0034, 0.0025],
127
+ [0.0220, 0.0169, -0.0048],
128
+ [-0.0332, -0.0457, -0.0468],
129
+ [-0.0085, 0.0389, 0.0609],
130
+ [-0.0076, 0.0003, -0.0043],
131
+ [-0.0111, -0.0460, -0.0614],
134
132
  ]
135
133
 
134
+ FLUX2_LATENT_RGB_BIAS = [-0.0329, -0.0718, -0.0851]
135
+
136
136
 
137
137
  def sample_to_lowres_estimated_image(
138
- samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
138
+ samples: torch.Tensor,
139
+ latent_rgb_factors: torch.Tensor,
140
+ smooth_matrix: Optional[torch.Tensor] = None,
141
+ latent_rgb_bias: Optional[torch.Tensor] = None,
139
142
  ):
140
143
  if samples.dim() == 4:
141
144
  samples = samples[0]
142
145
  latent_image = samples.permute(1, 2, 0) @ latent_rgb_factors
143
146
 
147
+ if latent_rgb_bias is not None:
148
+ latent_image = latent_image + latent_rgb_bias
149
+
144
150
  if smooth_matrix is not None:
145
151
  latent_image = latent_image.unsqueeze(0).permute(3, 0, 1, 2)
146
152
  latent_image = torch.nn.functional.conv2d(latent_image, smooth_matrix.reshape((1, 1, 3, 3)), padding=1)
@@ -193,6 +199,7 @@ def diffusion_step_callback(
193
199
  sample = intermediate_state.latents
194
200
 
195
201
  smooth_matrix: list[list[float]] | None = None
202
+ latent_rgb_bias: list[float] | None = None
196
203
  if base_model in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
197
204
  latent_rgb_factors = SD1_5_LATENT_RGB_FACTORS
198
205
  elif base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
@@ -206,6 +213,7 @@ def diffusion_step_callback(
206
213
  latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
207
214
  elif base_model == BaseModelType.Flux2:
208
215
  latent_rgb_factors = FLUX2_LATENT_RGB_FACTORS
216
+ latent_rgb_bias = FLUX2_LATENT_RGB_BIAS
209
217
  elif base_model == BaseModelType.ZImage:
210
218
  # Z-Image uses FLUX-compatible VAE with 16 latent channels
211
219
  latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
@@ -216,8 +224,14 @@ def diffusion_step_callback(
216
224
  smooth_matrix_torch = (
217
225
  torch.tensor(smooth_matrix, dtype=sample.dtype, device=sample.device) if smooth_matrix else None
218
226
  )
227
+ latent_rgb_bias_torch = (
228
+ torch.tensor(latent_rgb_bias, dtype=sample.dtype, device=sample.device) if latent_rgb_bias else None
229
+ )
219
230
  image = sample_to_lowres_estimated_image(
220
- samples=sample, latent_rgb_factors=latent_rgb_factors_torch, smooth_matrix=smooth_matrix_torch
231
+ samples=sample,
232
+ latent_rgb_factors=latent_rgb_factors_torch,
233
+ smooth_matrix=smooth_matrix_torch,
234
+ latent_rgb_bias=latent_rgb_bias_torch,
221
235
  )
222
236
 
223
237
  width = image.width * 8
@@ -8,11 +8,28 @@ Based on: https://github.com/wildminder/ComfyUI-DyPE
8
8
 
9
9
  from invokeai.backend.flux.dype.base import DyPEConfig
10
10
  from invokeai.backend.flux.dype.embed import DyPEEmbedND
11
- from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_for_resolution
11
+ from invokeai.backend.flux.dype.presets import (
12
+ DYPE_PRESET_4K,
13
+ DYPE_PRESET_AREA,
14
+ DYPE_PRESET_AUTO,
15
+ DYPE_PRESET_LABELS,
16
+ DYPE_PRESET_MANUAL,
17
+ DYPE_PRESET_OFF,
18
+ DyPEPreset,
19
+ get_dype_config_for_area,
20
+ get_dype_config_for_resolution,
21
+ )
12
22
 
13
23
  __all__ = [
14
24
  "DyPEConfig",
15
25
  "DyPEEmbedND",
16
26
  "DyPEPreset",
27
+ "DYPE_PRESET_OFF",
28
+ "DYPE_PRESET_MANUAL",
29
+ "DYPE_PRESET_AUTO",
30
+ "DYPE_PRESET_AREA",
31
+ "DYPE_PRESET_4K",
32
+ "DYPE_PRESET_LABELS",
33
+ "get_dype_config_for_area",
17
34
  "get_dype_config_for_resolution",
18
35
  ]
@@ -99,13 +99,17 @@ def compute_vision_yarn_freqs(
99
99
  The NTK-aware approach smoothly interpolates frequencies to cover larger
100
100
  position ranges without breaking the attention patterns.
101
101
 
102
+ DyPE (Dynamic Position Extrapolation) modulates the NTK scaling based on
103
+ the current timestep - stronger extrapolation in early steps (global structure),
104
+ weaker in late steps (fine details).
105
+
102
106
  Args:
103
107
  pos: Position tensor
104
108
  dim: Embedding dimension
105
109
  theta: RoPE base frequency
106
110
  scale_h: Height scaling factor
107
111
  scale_w: Width scaling factor
108
- current_sigma: Current noise level (reserved for future timestep-aware scaling)
112
+ current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
109
113
  dype_config: DyPE configuration
110
114
 
111
115
  Returns:
@@ -124,7 +128,24 @@ def compute_vision_yarn_freqs(
124
128
  # This increases the wavelength of position encodings proportionally
125
129
  if scale > 1.0:
126
130
  ntk_alpha = scale ** (dim / (dim - 2))
127
- scaled_theta = theta * ntk_alpha
131
+
132
+ # Apply timestep-dependent DyPE modulation
133
+ # mscale controls how strongly we apply the NTK extrapolation
134
+ # Early steps (high sigma): stronger extrapolation for global structure
135
+ # Late steps (low sigma): weaker extrapolation for fine details
136
+ mscale = get_timestep_mscale(
137
+ scale=scale,
138
+ current_sigma=current_sigma,
139
+ dype_scale=dype_config.dype_scale,
140
+ dype_exponent=dype_config.dype_exponent,
141
+ dype_start_sigma=dype_config.dype_start_sigma,
142
+ )
143
+
144
+ # Modulate NTK alpha by mscale
145
+ # When mscale > 1: interpolate towards stronger extrapolation
146
+ # When mscale = 1: use base NTK alpha
147
+ modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
148
+ scaled_theta = theta * modulated_alpha
128
149
  else:
129
150
  scaled_theta = theta
130
151
 
@@ -151,14 +172,15 @@ def compute_yarn_freqs(
151
172
  ) -> tuple[Tensor, Tensor]:
152
173
  """Compute RoPE frequencies using YARN/NTK method.
153
174
 
154
- Uses NTK-aware theta scaling for high-resolution support.
175
+ Uses NTK-aware theta scaling for high-resolution support with
176
+ timestep-dependent DyPE modulation.
155
177
 
156
178
  Args:
157
179
  pos: Position tensor
158
180
  dim: Embedding dimension
159
181
  theta: RoPE base frequency
160
182
  scale: Uniform scaling factor
161
- current_sigma: Current noise level (reserved for future use)
183
+ current_sigma: Current noise level (1.0 = full noise, 0.0 = clean)
162
184
  dype_config: DyPE configuration
163
185
 
164
186
  Returns:
@@ -169,10 +191,22 @@ def compute_yarn_freqs(
169
191
  device = pos.device
170
192
  dtype = torch.float64 if device.type != "mps" else torch.float32
171
193
 
172
- # NTK-aware theta scaling
194
+ # NTK-aware theta scaling with DyPE modulation
173
195
  if scale > 1.0:
174
196
  ntk_alpha = scale ** (dim / (dim - 2))
175
- scaled_theta = theta * ntk_alpha
197
+
198
+ # Apply timestep-dependent DyPE modulation
199
+ mscale = get_timestep_mscale(
200
+ scale=scale,
201
+ current_sigma=current_sigma,
202
+ dype_scale=dype_config.dype_scale,
203
+ dype_exponent=dype_config.dype_exponent,
204
+ dype_start_sigma=dype_config.dype_start_sigma,
205
+ )
206
+
207
+ # Modulate NTK alpha by mscale
208
+ modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
209
+ scaled_theta = theta * modulated_alpha
176
210
  else:
177
211
  scaled_theta = theta
178
212
 
@@ -1,17 +1,29 @@
1
1
  """DyPE presets and automatic configuration."""
2
2
 
3
+ import math
3
4
  from dataclasses import dataclass
4
- from enum import Enum
5
+ from typing import Literal
5
6
 
6
7
  from invokeai.backend.flux.dype.base import DyPEConfig
7
8
 
8
-
9
- class DyPEPreset(str, Enum):
10
- """Predefined DyPE configurations."""
11
-
12
- OFF = "off" # DyPE disabled
13
- AUTO = "auto" # Automatically enable based on resolution
14
- PRESET_4K = "4k" # Optimized for 3840x2160 / 4096x2160
9
+ # DyPE preset type - using Literal for proper frontend dropdown support
10
+ DyPEPreset = Literal["off", "manual", "auto", "area", "4k"]
11
+
12
+ # Constants for preset values
13
+ DYPE_PRESET_OFF: DyPEPreset = "off"
14
+ DYPE_PRESET_MANUAL: DyPEPreset = "manual"
15
+ DYPE_PRESET_AUTO: DyPEPreset = "auto"
16
+ DYPE_PRESET_AREA: DyPEPreset = "area"
17
+ DYPE_PRESET_4K: DyPEPreset = "4k"
18
+
19
+ # Human-readable labels for the UI
20
+ DYPE_PRESET_LABELS: dict[str, str] = {
21
+ "off": "Off",
22
+ "manual": "Manual",
23
+ "auto": "Auto (>1536px)",
24
+ "area": "Area (auto)",
25
+ "4k": "4K Optimized",
26
+ }
15
27
 
16
28
 
17
29
  @dataclass
@@ -27,7 +39,7 @@ class DyPEPresetConfig:
27
39
 
28
40
  # Predefined preset configurations
29
41
  DYPE_PRESETS: dict[DyPEPreset, DyPEPresetConfig] = {
30
- DyPEPreset.PRESET_4K: DyPEPresetConfig(
42
+ DYPE_PRESET_4K: DyPEPresetConfig(
31
43
  base_resolution=1024,
32
44
  method="vision_yarn",
33
45
  dype_scale=2.0,
@@ -79,6 +91,50 @@ def get_dype_config_for_resolution(
79
91
  )
80
92
 
81
93
 
94
+ def get_dype_config_for_area(
95
+ width: int,
96
+ height: int,
97
+ base_resolution: int = 1024,
98
+ ) -> DyPEConfig | None:
99
+ """Automatically determine DyPE config based on target area.
100
+
101
+ Uses sqrt(area/base_area) as an effective side-length ratio.
102
+ DyPE is enabled only when target area exceeds base area.
103
+
104
+ Returns:
105
+ DyPEConfig if DyPE should be enabled, None otherwise
106
+ """
107
+ area = width * height
108
+ base_area = base_resolution**2
109
+
110
+ if area <= base_area:
111
+ return None
112
+
113
+ area_ratio = area / base_area
114
+ effective_side_ratio = math.sqrt(area_ratio) # 1.0 at base, 2.0 at 2K (if base is 1K)
115
+
116
+ # Strength: 0 at base area, 8 at sat_area, clamped thereafter.
117
+ sat_area = 2027520 # Determined by experimentation where a vertical line appears
118
+ sat_side_ratio = math.sqrt(sat_area / base_area)
119
+ dynamic_dype_scale = 8.0 * (effective_side_ratio - 1.0) / (sat_side_ratio - 1.0)
120
+ dynamic_dype_scale = max(0.0, min(dynamic_dype_scale, 8.0))
121
+
122
+ # Continuous exponent schedule:
123
+ # r=1 -> 0.5, r=2 -> 1.0, r=4 -> 2.0 (exact), smoothly varying in between.
124
+ x = math.log2(effective_side_ratio)
125
+ dype_exponent = 0.25 * (x**2) + 0.25 * x + 0.5
126
+ dype_exponent = max(0.5, min(dype_exponent, 2.0))
127
+
128
+ return DyPEConfig(
129
+ enable_dype=True,
130
+ base_resolution=base_resolution,
131
+ method="vision_yarn",
132
+ dype_scale=dynamic_dype_scale,
133
+ dype_exponent=dype_exponent,
134
+ dype_start_sigma=1.0,
135
+ )
136
+
137
+
82
138
  def get_dype_config_from_preset(
83
139
  preset: DyPEPreset,
84
140
  width: int,
@@ -92,41 +148,47 @@ def get_dype_config_from_preset(
92
148
  preset: The DyPE preset to use
93
149
  width: Target image width
94
150
  height: Target image height
95
- custom_scale: Optional custom dype_scale (overrides preset)
96
- custom_exponent: Optional custom dype_exponent (overrides preset)
151
+ custom_scale: Optional custom dype_scale (only used with 'manual' preset)
152
+ custom_exponent: Optional custom dype_exponent (only used with 'manual' preset)
97
153
 
98
154
  Returns:
99
155
  DyPEConfig if DyPE should be enabled, None otherwise
100
156
  """
101
- if preset == DyPEPreset.OFF:
102
- # Check if custom values are provided even with preset=OFF
103
- if custom_scale is not None:
104
- return DyPEConfig(
105
- enable_dype=True,
106
- base_resolution=1024,
107
- method="vision_yarn",
108
- dype_scale=custom_scale,
109
- dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
110
- dype_start_sigma=1.0,
111
- )
157
+ if preset == DYPE_PRESET_OFF:
112
158
  return None
113
159
 
114
- if preset == DyPEPreset.AUTO:
115
- config = get_dype_config_for_resolution(
160
+ if preset == DYPE_PRESET_MANUAL:
161
+ # Manual mode - custom values can override defaults
162
+ max_dim = max(width, height)
163
+ scale = max_dim / 1024
164
+ dynamic_dype_scale = min(2.0 * scale, 8.0)
165
+ return DyPEConfig(
166
+ enable_dype=True,
167
+ base_resolution=1024,
168
+ method="vision_yarn",
169
+ dype_scale=custom_scale if custom_scale is not None else dynamic_dype_scale,
170
+ dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
171
+ dype_start_sigma=1.0,
172
+ )
173
+
174
+ if preset == DYPE_PRESET_AUTO:
175
+ # Auto preset - custom values are ignored
176
+ return get_dype_config_for_resolution(
116
177
  width=width,
117
178
  height=height,
118
179
  base_resolution=1024,
119
180
  activation_threshold=1536,
120
181
  )
121
- # Apply custom overrides if provided
122
- if config is not None:
123
- if custom_scale is not None:
124
- config.dype_scale = custom_scale
125
- if custom_exponent is not None:
126
- config.dype_exponent = custom_exponent
127
- return config
128
-
129
- # Use preset configuration
182
+
183
+ if preset == DYPE_PRESET_AREA:
184
+ # Area-based preset - custom values are ignored
185
+ return get_dype_config_for_area(
186
+ width=width,
187
+ height=height,
188
+ base_resolution=1024,
189
+ )
190
+
191
+ # Use preset configuration (4K etc.) - custom values are ignored
130
192
  preset_config = DYPE_PRESETS.get(preset)
131
193
  if preset_config is None:
132
194
  return None
@@ -135,7 +197,7 @@ def get_dype_config_from_preset(
135
197
  enable_dype=True,
136
198
  base_resolution=preset_config.base_resolution,
137
199
  method=preset_config.method,
138
- dype_scale=custom_scale if custom_scale is not None else preset_config.dype_scale,
139
- dype_exponent=custom_exponent if custom_exponent is not None else preset_config.dype_exponent,
200
+ dype_scale=preset_config.dype_scale,
201
+ dype_exponent=preset_config.dype_exponent,
140
202
  dype_start_sigma=preset_config.dype_start_sigma,
141
203
  )