InvokeAI 6.11.0__py3-none-any.whl → 6.11.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +5 -18
- invokeai/app/invocations/flux_model_loader.py +5 -2
- invokeai/app/util/step_callback.py +38 -52
- invokeai/backend/flux/dype/__init__.py +1 -14
- invokeai/backend/flux/dype/base.py +6 -40
- invokeai/backend/flux/dype/presets.py +35 -42
- invokeai/backend/flux2/denoise.py +3 -22
- invokeai/frontend/web/dist/assets/App-ClpIJstk.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-u_ZjhQTI.js → browser-ponyfill-Cw07u5G1.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-BB0nHmDe.js → index-DSKM8iGj.js} +64 -64
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +1 -21
- invokeai/frontend/web/dist/locales/it.json +16 -135
- invokeai/frontend/web/dist/locales/ru.json +11 -11
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/METADATA +1 -1
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/RECORD +23 -23
- invokeai/frontend/web/dist/assets/App-D13dX7be.js +0 -161
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/WHEEL +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.11.0.dist-info → invokeai-6.11.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -32,12 +32,7 @@ from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
|
32
32
|
from invokeai.backend.flux.controlnet.instantx_controlnet_flux import InstantXControlNetFlux
|
|
33
33
|
from invokeai.backend.flux.controlnet.xlabs_controlnet_flux import XLabsControlNetFlux
|
|
34
34
|
from invokeai.backend.flux.denoise import denoise
|
|
35
|
-
from invokeai.backend.flux.dype.presets import
|
|
36
|
-
DYPE_PRESET_LABELS,
|
|
37
|
-
DYPE_PRESET_OFF,
|
|
38
|
-
DyPEPreset,
|
|
39
|
-
get_dype_config_from_preset,
|
|
40
|
-
)
|
|
35
|
+
from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_from_preset
|
|
41
36
|
from invokeai.backend.flux.extensions.dype_extension import DyPEExtension
|
|
42
37
|
from invokeai.backend.flux.extensions.instantx_controlnet_extension import InstantXControlNetExtension
|
|
43
38
|
from invokeai.backend.flux.extensions.kontext_extension import KontextExtension
|
|
@@ -71,7 +66,7 @@ from invokeai.backend.util.devices import TorchDevice
|
|
|
71
66
|
title="FLUX Denoise",
|
|
72
67
|
tags=["image", "flux"],
|
|
73
68
|
category="image",
|
|
74
|
-
version="4.
|
|
69
|
+
version="4.3.0",
|
|
75
70
|
)
|
|
76
71
|
class FluxDenoiseInvocation(BaseInvocation):
|
|
77
72
|
"""Run denoising process with a FLUX transformer model."""
|
|
@@ -175,24 +170,20 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
175
170
|
|
|
176
171
|
# DyPE (Dynamic Position Extrapolation) for high-resolution generation
|
|
177
172
|
dype_preset: DyPEPreset = InputField(
|
|
178
|
-
default=
|
|
173
|
+
default=DyPEPreset.OFF,
|
|
179
174
|
description="DyPE preset for high-resolution generation. 'auto' enables automatically for resolutions > 1536px. '4k' uses optimized settings for 4K output.",
|
|
180
|
-
ui_order=100,
|
|
181
|
-
ui_choice_labels=DYPE_PRESET_LABELS,
|
|
182
175
|
)
|
|
183
176
|
dype_scale: Optional[float] = InputField(
|
|
184
177
|
default=None,
|
|
185
178
|
ge=0.0,
|
|
186
179
|
le=8.0,
|
|
187
180
|
description="DyPE magnitude (λs). Higher values = stronger extrapolation. Only used when dype_preset is not 'off'.",
|
|
188
|
-
ui_order=101,
|
|
189
181
|
)
|
|
190
182
|
dype_exponent: Optional[float] = InputField(
|
|
191
183
|
default=None,
|
|
192
184
|
ge=0.0,
|
|
193
185
|
le=1000.0,
|
|
194
186
|
description="DyPE decay speed (λt). Controls transition from low to high frequency detail. Only used when dype_preset is not 'off'.",
|
|
195
|
-
ui_order=102,
|
|
196
187
|
)
|
|
197
188
|
|
|
198
189
|
@torch.no_grad()
|
|
@@ -473,13 +464,9 @@ class FluxDenoiseInvocation(BaseInvocation):
|
|
|
473
464
|
target_width=self.width,
|
|
474
465
|
)
|
|
475
466
|
context.logger.info(
|
|
476
|
-
f"DyPE enabled:
|
|
477
|
-
f"
|
|
478
|
-
f"exponent={dype_config.dype_exponent:.2f}, start_sigma={dype_config.dype_start_sigma:.2f}, "
|
|
479
|
-
f"base_resolution={dype_config.base_resolution}"
|
|
467
|
+
f"DyPE enabled: {self.width}x{self.height}, preset={self.dype_preset.value}, "
|
|
468
|
+
f"scale={dype_config.dype_scale:.2f}, method={dype_config.method}"
|
|
480
469
|
)
|
|
481
|
-
else:
|
|
482
|
-
context.logger.debug(f"DyPE disabled: resolution={self.width}x{self.height}, preset={self.dype_preset}")
|
|
483
470
|
|
|
484
471
|
x = denoise(
|
|
485
472
|
model=transformer,
|
|
@@ -6,7 +6,7 @@ from invokeai.app.invocations.baseinvocation import (
|
|
|
6
6
|
invocation,
|
|
7
7
|
invocation_output,
|
|
8
8
|
)
|
|
9
|
-
from invokeai.app.invocations.fields import FieldDescriptions, InputField, OutputField
|
|
9
|
+
from invokeai.app.invocations.fields import FieldDescriptions, Input, InputField, OutputField
|
|
10
10
|
from invokeai.app.invocations.model import CLIPField, ModelIdentifierField, T5EncoderField, TransformerField, VAEField
|
|
11
11
|
from invokeai.app.services.shared.invocation_context import InvocationContext
|
|
12
12
|
from invokeai.app.util.t5_model_identifier import (
|
|
@@ -37,25 +37,28 @@ class FluxModelLoaderOutput(BaseInvocationOutput):
|
|
|
37
37
|
title="Main Model - FLUX",
|
|
38
38
|
tags=["model", "flux"],
|
|
39
39
|
category="model",
|
|
40
|
-
version="1.0.
|
|
40
|
+
version="1.0.6",
|
|
41
41
|
)
|
|
42
42
|
class FluxModelLoaderInvocation(BaseInvocation):
|
|
43
43
|
"""Loads a flux base model, outputting its submodels."""
|
|
44
44
|
|
|
45
45
|
model: ModelIdentifierField = InputField(
|
|
46
46
|
description=FieldDescriptions.flux_model,
|
|
47
|
+
input=Input.Direct,
|
|
47
48
|
ui_model_base=BaseModelType.Flux,
|
|
48
49
|
ui_model_type=ModelType.Main,
|
|
49
50
|
)
|
|
50
51
|
|
|
51
52
|
t5_encoder_model: ModelIdentifierField = InputField(
|
|
52
53
|
description=FieldDescriptions.t5_encoder,
|
|
54
|
+
input=Input.Direct,
|
|
53
55
|
title="T5 Encoder",
|
|
54
56
|
ui_model_type=ModelType.T5Encoder,
|
|
55
57
|
)
|
|
56
58
|
|
|
57
59
|
clip_embed_model: ModelIdentifierField = InputField(
|
|
58
60
|
description=FieldDescriptions.clip_embed_model,
|
|
61
|
+
input=Input.Direct,
|
|
59
62
|
title="CLIP Embed",
|
|
60
63
|
ui_model_type=ModelType.CLIPEmbed,
|
|
61
64
|
)
|
|
@@ -93,60 +93,54 @@ COGVIEW4_LATENT_RGB_FACTORS = [
|
|
|
93
93
|
[-0.00955853, -0.00980067, -0.00977842],
|
|
94
94
|
]
|
|
95
95
|
|
|
96
|
-
# FLUX.2 uses 32 latent channels.
|
|
97
|
-
#
|
|
96
|
+
# FLUX.2 uses 32 latent channels. Since we don't have proper factors yet,
|
|
97
|
+
# we extend FLUX factors with zeros for preview approximation.
|
|
98
98
|
FLUX2_LATENT_RGB_FACTORS = [
|
|
99
99
|
# R G B
|
|
100
|
-
|
|
101
|
-
[0.
|
|
102
|
-
[-0.
|
|
103
|
-
[0.
|
|
104
|
-
[0.
|
|
105
|
-
[0.
|
|
106
|
-
[
|
|
107
|
-
[-0.
|
|
108
|
-
[
|
|
109
|
-
[-0.
|
|
110
|
-
[-0.
|
|
111
|
-
[0.
|
|
112
|
-
[0.
|
|
113
|
-
[
|
|
114
|
-
[0.
|
|
115
|
-
[-0.
|
|
116
|
-
[0.
|
|
117
|
-
|
|
118
|
-
[
|
|
119
|
-
[0.
|
|
120
|
-
[0.
|
|
121
|
-
[0.
|
|
122
|
-
[0.
|
|
123
|
-
[0.
|
|
124
|
-
[
|
|
125
|
-
[0.
|
|
126
|
-
[0.
|
|
127
|
-
[0.
|
|
128
|
-
[
|
|
129
|
-
[
|
|
130
|
-
[
|
|
131
|
-
[
|
|
100
|
+
# First 16 channels (from FLUX)
|
|
101
|
+
[0.0118, 0.0024, 0.0017],
|
|
102
|
+
[-0.0074, -0.0108, -0.0003],
|
|
103
|
+
[0.0056, 0.0291, 0.0768],
|
|
104
|
+
[0.0342, -0.0681, -0.0427],
|
|
105
|
+
[-0.0258, 0.0092, 0.0463],
|
|
106
|
+
[0.0863, 0.0784, 0.0547],
|
|
107
|
+
[-0.0017, 0.0402, 0.0158],
|
|
108
|
+
[0.0501, 0.1058, 0.1152],
|
|
109
|
+
[-0.0209, -0.0218, -0.0329],
|
|
110
|
+
[-0.0314, 0.0083, 0.0896],
|
|
111
|
+
[0.0851, 0.0665, -0.0472],
|
|
112
|
+
[-0.0534, 0.0238, -0.0024],
|
|
113
|
+
[0.0452, -0.0026, 0.0048],
|
|
114
|
+
[0.0892, 0.0831, 0.0881],
|
|
115
|
+
[-0.1117, -0.0304, -0.0789],
|
|
116
|
+
[0.0027, -0.0479, -0.0043],
|
|
117
|
+
# Additional 16 channels (zeros as placeholder)
|
|
118
|
+
[0.0, 0.0, 0.0],
|
|
119
|
+
[0.0, 0.0, 0.0],
|
|
120
|
+
[0.0, 0.0, 0.0],
|
|
121
|
+
[0.0, 0.0, 0.0],
|
|
122
|
+
[0.0, 0.0, 0.0],
|
|
123
|
+
[0.0, 0.0, 0.0],
|
|
124
|
+
[0.0, 0.0, 0.0],
|
|
125
|
+
[0.0, 0.0, 0.0],
|
|
126
|
+
[0.0, 0.0, 0.0],
|
|
127
|
+
[0.0, 0.0, 0.0],
|
|
128
|
+
[0.0, 0.0, 0.0],
|
|
129
|
+
[0.0, 0.0, 0.0],
|
|
130
|
+
[0.0, 0.0, 0.0],
|
|
131
|
+
[0.0, 0.0, 0.0],
|
|
132
|
+
[0.0, 0.0, 0.0],
|
|
133
|
+
[0.0, 0.0, 0.0],
|
|
132
134
|
]
|
|
133
135
|
|
|
134
|
-
FLUX2_LATENT_RGB_BIAS = [-0.0329, -0.0718, -0.0851]
|
|
135
|
-
|
|
136
136
|
|
|
137
137
|
def sample_to_lowres_estimated_image(
|
|
138
|
-
samples: torch.Tensor,
|
|
139
|
-
latent_rgb_factors: torch.Tensor,
|
|
140
|
-
smooth_matrix: Optional[torch.Tensor] = None,
|
|
141
|
-
latent_rgb_bias: Optional[torch.Tensor] = None,
|
|
138
|
+
samples: torch.Tensor, latent_rgb_factors: torch.Tensor, smooth_matrix: Optional[torch.Tensor] = None
|
|
142
139
|
):
|
|
143
140
|
if samples.dim() == 4:
|
|
144
141
|
samples = samples[0]
|
|
145
142
|
latent_image = samples.permute(1, 2, 0) @ latent_rgb_factors
|
|
146
143
|
|
|
147
|
-
if latent_rgb_bias is not None:
|
|
148
|
-
latent_image = latent_image + latent_rgb_bias
|
|
149
|
-
|
|
150
144
|
if smooth_matrix is not None:
|
|
151
145
|
latent_image = latent_image.unsqueeze(0).permute(3, 0, 1, 2)
|
|
152
146
|
latent_image = torch.nn.functional.conv2d(latent_image, smooth_matrix.reshape((1, 1, 3, 3)), padding=1)
|
|
@@ -199,7 +193,6 @@ def diffusion_step_callback(
|
|
|
199
193
|
sample = intermediate_state.latents
|
|
200
194
|
|
|
201
195
|
smooth_matrix: list[list[float]] | None = None
|
|
202
|
-
latent_rgb_bias: list[float] | None = None
|
|
203
196
|
if base_model in [BaseModelType.StableDiffusion1, BaseModelType.StableDiffusion2]:
|
|
204
197
|
latent_rgb_factors = SD1_5_LATENT_RGB_FACTORS
|
|
205
198
|
elif base_model in [BaseModelType.StableDiffusionXL, BaseModelType.StableDiffusionXLRefiner]:
|
|
@@ -213,7 +206,6 @@ def diffusion_step_callback(
|
|
|
213
206
|
latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
|
|
214
207
|
elif base_model == BaseModelType.Flux2:
|
|
215
208
|
latent_rgb_factors = FLUX2_LATENT_RGB_FACTORS
|
|
216
|
-
latent_rgb_bias = FLUX2_LATENT_RGB_BIAS
|
|
217
209
|
elif base_model == BaseModelType.ZImage:
|
|
218
210
|
# Z-Image uses FLUX-compatible VAE with 16 latent channels
|
|
219
211
|
latent_rgb_factors = FLUX_LATENT_RGB_FACTORS
|
|
@@ -224,14 +216,8 @@ def diffusion_step_callback(
|
|
|
224
216
|
smooth_matrix_torch = (
|
|
225
217
|
torch.tensor(smooth_matrix, dtype=sample.dtype, device=sample.device) if smooth_matrix else None
|
|
226
218
|
)
|
|
227
|
-
latent_rgb_bias_torch = (
|
|
228
|
-
torch.tensor(latent_rgb_bias, dtype=sample.dtype, device=sample.device) if latent_rgb_bias else None
|
|
229
|
-
)
|
|
230
219
|
image = sample_to_lowres_estimated_image(
|
|
231
|
-
samples=sample,
|
|
232
|
-
latent_rgb_factors=latent_rgb_factors_torch,
|
|
233
|
-
smooth_matrix=smooth_matrix_torch,
|
|
234
|
-
latent_rgb_bias=latent_rgb_bias_torch,
|
|
220
|
+
samples=sample, latent_rgb_factors=latent_rgb_factors_torch, smooth_matrix=smooth_matrix_torch
|
|
235
221
|
)
|
|
236
222
|
|
|
237
223
|
width = image.width * 8
|
|
@@ -8,24 +8,11 @@ Based on: https://github.com/wildminder/ComfyUI-DyPE
|
|
|
8
8
|
|
|
9
9
|
from invokeai.backend.flux.dype.base import DyPEConfig
|
|
10
10
|
from invokeai.backend.flux.dype.embed import DyPEEmbedND
|
|
11
|
-
from invokeai.backend.flux.dype.presets import
|
|
12
|
-
DYPE_PRESET_4K,
|
|
13
|
-
DYPE_PRESET_AUTO,
|
|
14
|
-
DYPE_PRESET_LABELS,
|
|
15
|
-
DYPE_PRESET_MANUAL,
|
|
16
|
-
DYPE_PRESET_OFF,
|
|
17
|
-
DyPEPreset,
|
|
18
|
-
get_dype_config_for_resolution,
|
|
19
|
-
)
|
|
11
|
+
from invokeai.backend.flux.dype.presets import DyPEPreset, get_dype_config_for_resolution
|
|
20
12
|
|
|
21
13
|
__all__ = [
|
|
22
14
|
"DyPEConfig",
|
|
23
15
|
"DyPEEmbedND",
|
|
24
16
|
"DyPEPreset",
|
|
25
|
-
"DYPE_PRESET_OFF",
|
|
26
|
-
"DYPE_PRESET_MANUAL",
|
|
27
|
-
"DYPE_PRESET_AUTO",
|
|
28
|
-
"DYPE_PRESET_4K",
|
|
29
|
-
"DYPE_PRESET_LABELS",
|
|
30
17
|
"get_dype_config_for_resolution",
|
|
31
18
|
]
|
|
@@ -99,17 +99,13 @@ def compute_vision_yarn_freqs(
|
|
|
99
99
|
The NTK-aware approach smoothly interpolates frequencies to cover larger
|
|
100
100
|
position ranges without breaking the attention patterns.
|
|
101
101
|
|
|
102
|
-
DyPE (Dynamic Position Extrapolation) modulates the NTK scaling based on
|
|
103
|
-
the current timestep - stronger extrapolation in early steps (global structure),
|
|
104
|
-
weaker in late steps (fine details).
|
|
105
|
-
|
|
106
102
|
Args:
|
|
107
103
|
pos: Position tensor
|
|
108
104
|
dim: Embedding dimension
|
|
109
105
|
theta: RoPE base frequency
|
|
110
106
|
scale_h: Height scaling factor
|
|
111
107
|
scale_w: Width scaling factor
|
|
112
|
-
current_sigma: Current noise level (
|
|
108
|
+
current_sigma: Current noise level (reserved for future timestep-aware scaling)
|
|
113
109
|
dype_config: DyPE configuration
|
|
114
110
|
|
|
115
111
|
Returns:
|
|
@@ -128,24 +124,7 @@ def compute_vision_yarn_freqs(
|
|
|
128
124
|
# This increases the wavelength of position encodings proportionally
|
|
129
125
|
if scale > 1.0:
|
|
130
126
|
ntk_alpha = scale ** (dim / (dim - 2))
|
|
131
|
-
|
|
132
|
-
# Apply timestep-dependent DyPE modulation
|
|
133
|
-
# mscale controls how strongly we apply the NTK extrapolation
|
|
134
|
-
# Early steps (high sigma): stronger extrapolation for global structure
|
|
135
|
-
# Late steps (low sigma): weaker extrapolation for fine details
|
|
136
|
-
mscale = get_timestep_mscale(
|
|
137
|
-
scale=scale,
|
|
138
|
-
current_sigma=current_sigma,
|
|
139
|
-
dype_scale=dype_config.dype_scale,
|
|
140
|
-
dype_exponent=dype_config.dype_exponent,
|
|
141
|
-
dype_start_sigma=dype_config.dype_start_sigma,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
# Modulate NTK alpha by mscale
|
|
145
|
-
# When mscale > 1: interpolate towards stronger extrapolation
|
|
146
|
-
# When mscale = 1: use base NTK alpha
|
|
147
|
-
modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
|
|
148
|
-
scaled_theta = theta * modulated_alpha
|
|
127
|
+
scaled_theta = theta * ntk_alpha
|
|
149
128
|
else:
|
|
150
129
|
scaled_theta = theta
|
|
151
130
|
|
|
@@ -172,15 +151,14 @@ def compute_yarn_freqs(
|
|
|
172
151
|
) -> tuple[Tensor, Tensor]:
|
|
173
152
|
"""Compute RoPE frequencies using YARN/NTK method.
|
|
174
153
|
|
|
175
|
-
Uses NTK-aware theta scaling for high-resolution support
|
|
176
|
-
timestep-dependent DyPE modulation.
|
|
154
|
+
Uses NTK-aware theta scaling for high-resolution support.
|
|
177
155
|
|
|
178
156
|
Args:
|
|
179
157
|
pos: Position tensor
|
|
180
158
|
dim: Embedding dimension
|
|
181
159
|
theta: RoPE base frequency
|
|
182
160
|
scale: Uniform scaling factor
|
|
183
|
-
current_sigma: Current noise level (
|
|
161
|
+
current_sigma: Current noise level (reserved for future use)
|
|
184
162
|
dype_config: DyPE configuration
|
|
185
163
|
|
|
186
164
|
Returns:
|
|
@@ -191,22 +169,10 @@ def compute_yarn_freqs(
|
|
|
191
169
|
device = pos.device
|
|
192
170
|
dtype = torch.float64 if device.type != "mps" else torch.float32
|
|
193
171
|
|
|
194
|
-
# NTK-aware theta scaling
|
|
172
|
+
# NTK-aware theta scaling
|
|
195
173
|
if scale > 1.0:
|
|
196
174
|
ntk_alpha = scale ** (dim / (dim - 2))
|
|
197
|
-
|
|
198
|
-
# Apply timestep-dependent DyPE modulation
|
|
199
|
-
mscale = get_timestep_mscale(
|
|
200
|
-
scale=scale,
|
|
201
|
-
current_sigma=current_sigma,
|
|
202
|
-
dype_scale=dype_config.dype_scale,
|
|
203
|
-
dype_exponent=dype_config.dype_exponent,
|
|
204
|
-
dype_start_sigma=dype_config.dype_start_sigma,
|
|
205
|
-
)
|
|
206
|
-
|
|
207
|
-
# Modulate NTK alpha by mscale
|
|
208
|
-
modulated_alpha = 1.0 + (ntk_alpha - 1.0) * mscale
|
|
209
|
-
scaled_theta = theta * modulated_alpha
|
|
175
|
+
scaled_theta = theta * ntk_alpha
|
|
210
176
|
else:
|
|
211
177
|
scaled_theta = theta
|
|
212
178
|
|
|
@@ -1,26 +1,17 @@
|
|
|
1
1
|
"""DyPE presets and automatic configuration."""
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from
|
|
4
|
+
from enum import Enum
|
|
5
5
|
|
|
6
6
|
from invokeai.backend.flux.dype.base import DyPEConfig
|
|
7
7
|
|
|
8
|
-
|
|
9
|
-
DyPEPreset
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
DYPE_PRESET_4K: DyPEPreset = "4k"
|
|
16
|
-
|
|
17
|
-
# Human-readable labels for the UI
|
|
18
|
-
DYPE_PRESET_LABELS: dict[str, str] = {
|
|
19
|
-
"off": "Off",
|
|
20
|
-
"manual": "Manual",
|
|
21
|
-
"auto": "Auto (>1536px)",
|
|
22
|
-
"4k": "4K Optimized",
|
|
23
|
-
}
|
|
8
|
+
|
|
9
|
+
class DyPEPreset(str, Enum):
|
|
10
|
+
"""Predefined DyPE configurations."""
|
|
11
|
+
|
|
12
|
+
OFF = "off" # DyPE disabled
|
|
13
|
+
AUTO = "auto" # Automatically enable based on resolution
|
|
14
|
+
PRESET_4K = "4k" # Optimized for 3840x2160 / 4096x2160
|
|
24
15
|
|
|
25
16
|
|
|
26
17
|
@dataclass
|
|
@@ -36,7 +27,7 @@ class DyPEPresetConfig:
|
|
|
36
27
|
|
|
37
28
|
# Predefined preset configurations
|
|
38
29
|
DYPE_PRESETS: dict[DyPEPreset, DyPEPresetConfig] = {
|
|
39
|
-
|
|
30
|
+
DyPEPreset.PRESET_4K: DyPEPresetConfig(
|
|
40
31
|
base_resolution=1024,
|
|
41
32
|
method="vision_yarn",
|
|
42
33
|
dype_scale=2.0,
|
|
@@ -101,39 +92,41 @@ def get_dype_config_from_preset(
|
|
|
101
92
|
preset: The DyPE preset to use
|
|
102
93
|
width: Target image width
|
|
103
94
|
height: Target image height
|
|
104
|
-
custom_scale: Optional custom dype_scale (
|
|
105
|
-
custom_exponent: Optional custom dype_exponent (
|
|
95
|
+
custom_scale: Optional custom dype_scale (overrides preset)
|
|
96
|
+
custom_exponent: Optional custom dype_exponent (overrides preset)
|
|
106
97
|
|
|
107
98
|
Returns:
|
|
108
99
|
DyPEConfig if DyPE should be enabled, None otherwise
|
|
109
100
|
"""
|
|
110
|
-
if preset ==
|
|
101
|
+
if preset == DyPEPreset.OFF:
|
|
102
|
+
# Check if custom values are provided even with preset=OFF
|
|
103
|
+
if custom_scale is not None:
|
|
104
|
+
return DyPEConfig(
|
|
105
|
+
enable_dype=True,
|
|
106
|
+
base_resolution=1024,
|
|
107
|
+
method="vision_yarn",
|
|
108
|
+
dype_scale=custom_scale,
|
|
109
|
+
dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
|
|
110
|
+
dype_start_sigma=1.0,
|
|
111
|
+
)
|
|
111
112
|
return None
|
|
112
113
|
|
|
113
|
-
if preset ==
|
|
114
|
-
|
|
115
|
-
max_dim = max(width, height)
|
|
116
|
-
scale = max_dim / 1024
|
|
117
|
-
dynamic_dype_scale = min(2.0 * scale, 8.0)
|
|
118
|
-
return DyPEConfig(
|
|
119
|
-
enable_dype=True,
|
|
120
|
-
base_resolution=1024,
|
|
121
|
-
method="vision_yarn",
|
|
122
|
-
dype_scale=custom_scale if custom_scale is not None else dynamic_dype_scale,
|
|
123
|
-
dype_exponent=custom_exponent if custom_exponent is not None else 2.0,
|
|
124
|
-
dype_start_sigma=1.0,
|
|
125
|
-
)
|
|
126
|
-
|
|
127
|
-
if preset == DYPE_PRESET_AUTO:
|
|
128
|
-
# Auto preset - custom values are ignored
|
|
129
|
-
return get_dype_config_for_resolution(
|
|
114
|
+
if preset == DyPEPreset.AUTO:
|
|
115
|
+
config = get_dype_config_for_resolution(
|
|
130
116
|
width=width,
|
|
131
117
|
height=height,
|
|
132
118
|
base_resolution=1024,
|
|
133
119
|
activation_threshold=1536,
|
|
134
120
|
)
|
|
135
|
-
|
|
136
|
-
|
|
121
|
+
# Apply custom overrides if provided
|
|
122
|
+
if config is not None:
|
|
123
|
+
if custom_scale is not None:
|
|
124
|
+
config.dype_scale = custom_scale
|
|
125
|
+
if custom_exponent is not None:
|
|
126
|
+
config.dype_exponent = custom_exponent
|
|
127
|
+
return config
|
|
128
|
+
|
|
129
|
+
# Use preset configuration
|
|
137
130
|
preset_config = DYPE_PRESETS.get(preset)
|
|
138
131
|
if preset_config is None:
|
|
139
132
|
return None
|
|
@@ -142,7 +135,7 @@ def get_dype_config_from_preset(
|
|
|
142
135
|
enable_dype=True,
|
|
143
136
|
base_resolution=preset_config.base_resolution,
|
|
144
137
|
method=preset_config.method,
|
|
145
|
-
dype_scale=preset_config.dype_scale,
|
|
146
|
-
dype_exponent=preset_config.dype_exponent,
|
|
138
|
+
dype_scale=custom_scale if custom_scale is not None else preset_config.dype_scale,
|
|
139
|
+
dype_exponent=custom_exponent if custom_exponent is not None else preset_config.dype_exponent,
|
|
147
140
|
dype_start_sigma=preset_config.dype_start_sigma,
|
|
148
141
|
)
|
|
@@ -152,15 +152,7 @@ def denoise(
|
|
|
152
152
|
|
|
153
153
|
# Apply inpainting merge at each step
|
|
154
154
|
if inpaint_extension is not None:
|
|
155
|
-
|
|
156
|
-
gen_img = img[:, :original_seq_len, :]
|
|
157
|
-
ref_img = img[:, original_seq_len:, :]
|
|
158
|
-
|
|
159
|
-
# Merge only the generated part
|
|
160
|
-
gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
|
|
161
|
-
|
|
162
|
-
# Concatenate back together
|
|
163
|
-
img = torch.cat([gen_img, ref_img], dim=1)
|
|
155
|
+
img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
|
|
164
156
|
|
|
165
157
|
# For Heun, only increment user step after second-order step completes
|
|
166
158
|
if is_heun:
|
|
@@ -247,19 +239,8 @@ def denoise(
|
|
|
247
239
|
|
|
248
240
|
# Apply inpainting merge at each step
|
|
249
241
|
if inpaint_extension is not None:
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
ref_img = img[:, original_seq_len:, :]
|
|
253
|
-
|
|
254
|
-
# Merge only the generated part
|
|
255
|
-
gen_img = inpaint_extension.merge_intermediate_latents_with_init_latents(gen_img, t_prev)
|
|
256
|
-
|
|
257
|
-
# Concatenate back together
|
|
258
|
-
img = torch.cat([gen_img, ref_img], dim=1)
|
|
259
|
-
|
|
260
|
-
# Handling preview images
|
|
261
|
-
preview_gen = preview_img[:, :original_seq_len, :]
|
|
262
|
-
preview_gen = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_gen, 0.0)
|
|
242
|
+
img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
|
|
243
|
+
preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
|
|
263
244
|
|
|
264
245
|
# Extract only the generated image portion for preview (exclude reference images)
|
|
265
246
|
callback_latents = preview_img[:, :original_seq_len, :] if img_cond_seq is not None else preview_img
|