InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invokeai/app/invocations/flux_denoise.py +1 -15
- invokeai/app/invocations/metadata_linked.py +0 -47
- invokeai/app/invocations/z_image_denoise.py +84 -244
- invokeai/app/services/config/config_default.py +1 -3
- invokeai/app/services/model_manager/model_manager_default.py +0 -7
- invokeai/backend/flux/denoise.py +11 -196
- invokeai/backend/model_manager/configs/lora.py +0 -36
- invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
- invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
- invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
- invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
- invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
- invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
- invokeai/backend/model_manager/starter_models.py +4 -13
- invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
- invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
- invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
- invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
- invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
- invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
- invokeai/frontend/web/dist/index.html +1 -1
- invokeai/frontend/web/dist/locales/en.json +5 -11
- invokeai/version/invokeai_version.py +1 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
- invokeai/app/invocations/pbr_maps.py +0 -59
- invokeai/backend/flux/schedulers.py +0 -62
- invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
- invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
- invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
- invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
- invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
- invokeai/frontend/web/dist/locales/en-GB.json +0 -1
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
- {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
invokeai/backend/flux/denoise.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import inspect
|
|
2
1
|
import math
|
|
3
2
|
from typing import Callable
|
|
4
3
|
|
|
5
4
|
import torch
|
|
6
|
-
from diffusers.schedulers.scheduling_utils import SchedulerMixin
|
|
7
5
|
from tqdm import tqdm
|
|
8
6
|
|
|
9
7
|
from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
|
|
@@ -37,207 +35,24 @@ def denoise(
|
|
|
37
35
|
# extra img tokens (sequence-wise) - for Kontext conditioning
|
|
38
36
|
img_cond_seq: torch.Tensor | None = None,
|
|
39
37
|
img_cond_seq_ids: torch.Tensor | None = None,
|
|
40
|
-
# Optional scheduler for alternative sampling methods
|
|
41
|
-
scheduler: SchedulerMixin | None = None,
|
|
42
38
|
):
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
# Scheduler supports custom sigmas - use InvokeAI's time-shifted schedule
|
|
55
|
-
scheduler.set_timesteps(sigmas=timesteps, device=img.device)
|
|
56
|
-
else:
|
|
57
|
-
# LCM or scheduler doesn't support custom sigmas - use num_inference_steps
|
|
58
|
-
# The schedule will be computed by the scheduler itself
|
|
59
|
-
num_inference_steps = len(timesteps) - 1
|
|
60
|
-
scheduler.set_timesteps(num_inference_steps=num_inference_steps, device=img.device)
|
|
61
|
-
|
|
62
|
-
# For schedulers like Heun, the number of actual steps may differ
|
|
63
|
-
# (Heun doubles timesteps internally)
|
|
64
|
-
num_scheduler_steps = len(scheduler.timesteps)
|
|
65
|
-
# For user-facing step count, use the original number of denoising steps
|
|
66
|
-
total_steps = len(timesteps) - 1
|
|
67
|
-
else:
|
|
68
|
-
total_steps = len(timesteps) - 1
|
|
69
|
-
num_scheduler_steps = total_steps
|
|
70
|
-
|
|
39
|
+
# step 0 is the initial state
|
|
40
|
+
total_steps = len(timesteps) - 1
|
|
41
|
+
step_callback(
|
|
42
|
+
PipelineIntermediateState(
|
|
43
|
+
step=0,
|
|
44
|
+
order=1,
|
|
45
|
+
total_steps=total_steps,
|
|
46
|
+
timestep=int(timesteps[0]),
|
|
47
|
+
latents=img,
|
|
48
|
+
),
|
|
49
|
+
)
|
|
71
50
|
# guidance_vec is ignored for schnell.
|
|
72
51
|
guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
|
|
73
52
|
|
|
74
53
|
# Store original sequence length for slicing predictions
|
|
75
54
|
original_seq_len = img.shape[1]
|
|
76
55
|
|
|
77
|
-
# Track the actual step for user-facing progress (accounts for Heun's double steps)
|
|
78
|
-
user_step = 0
|
|
79
|
-
|
|
80
|
-
if use_scheduler:
|
|
81
|
-
# Use diffusers scheduler for stepping
|
|
82
|
-
# Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
|
|
83
|
-
# This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
|
|
84
|
-
pbar = tqdm(total=total_steps, desc="Denoising")
|
|
85
|
-
for step_index in range(num_scheduler_steps):
|
|
86
|
-
timestep = scheduler.timesteps[step_index]
|
|
87
|
-
# Convert scheduler timestep (0-1000) to normalized (0-1) for the model
|
|
88
|
-
t_curr = timestep.item() / scheduler.config.num_train_timesteps
|
|
89
|
-
t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
|
|
90
|
-
|
|
91
|
-
# For Heun scheduler, track if we're in first or second order step
|
|
92
|
-
is_heun = hasattr(scheduler, "state_in_first_order")
|
|
93
|
-
in_first_order = scheduler.state_in_first_order if is_heun else True
|
|
94
|
-
|
|
95
|
-
# Run ControlNet models
|
|
96
|
-
controlnet_residuals: list[ControlNetFluxOutput] = []
|
|
97
|
-
for controlnet_extension in controlnet_extensions:
|
|
98
|
-
controlnet_residuals.append(
|
|
99
|
-
controlnet_extension.run_controlnet(
|
|
100
|
-
timestep_index=user_step,
|
|
101
|
-
total_num_timesteps=total_steps,
|
|
102
|
-
img=img,
|
|
103
|
-
img_ids=img_ids,
|
|
104
|
-
txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
|
|
105
|
-
txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
|
|
106
|
-
y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
|
|
107
|
-
timesteps=t_vec,
|
|
108
|
-
guidance=guidance_vec,
|
|
109
|
-
)
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
|
|
113
|
-
|
|
114
|
-
# Prepare input for model
|
|
115
|
-
img_input = img
|
|
116
|
-
img_input_ids = img_ids
|
|
117
|
-
|
|
118
|
-
if img_cond is not None:
|
|
119
|
-
img_input = torch.cat((img_input, img_cond), dim=-1)
|
|
120
|
-
|
|
121
|
-
if img_cond_seq is not None:
|
|
122
|
-
assert img_cond_seq_ids is not None
|
|
123
|
-
img_input = torch.cat((img_input, img_cond_seq), dim=1)
|
|
124
|
-
img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
|
|
125
|
-
|
|
126
|
-
pred = model(
|
|
127
|
-
img=img_input,
|
|
128
|
-
img_ids=img_input_ids,
|
|
129
|
-
txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
|
|
130
|
-
txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
|
|
131
|
-
y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
|
|
132
|
-
timesteps=t_vec,
|
|
133
|
-
guidance=guidance_vec,
|
|
134
|
-
timestep_index=user_step,
|
|
135
|
-
total_num_timesteps=total_steps,
|
|
136
|
-
controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
|
|
137
|
-
controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
|
|
138
|
-
ip_adapter_extensions=pos_ip_adapter_extensions,
|
|
139
|
-
regional_prompting_extension=pos_regional_prompting_extension,
|
|
140
|
-
)
|
|
141
|
-
|
|
142
|
-
if img_cond_seq is not None:
|
|
143
|
-
pred = pred[:, :original_seq_len]
|
|
144
|
-
|
|
145
|
-
# Get CFG scale for current user step
|
|
146
|
-
step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
|
|
147
|
-
|
|
148
|
-
if not math.isclose(step_cfg_scale, 1.0):
|
|
149
|
-
if neg_regional_prompting_extension is None:
|
|
150
|
-
raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
|
|
151
|
-
|
|
152
|
-
neg_img_input = img
|
|
153
|
-
neg_img_input_ids = img_ids
|
|
154
|
-
|
|
155
|
-
if img_cond is not None:
|
|
156
|
-
neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
|
|
157
|
-
|
|
158
|
-
if img_cond_seq is not None:
|
|
159
|
-
neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
|
|
160
|
-
neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
|
|
161
|
-
|
|
162
|
-
neg_pred = model(
|
|
163
|
-
img=neg_img_input,
|
|
164
|
-
img_ids=neg_img_input_ids,
|
|
165
|
-
txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
|
|
166
|
-
txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
|
|
167
|
-
y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
|
|
168
|
-
timesteps=t_vec,
|
|
169
|
-
guidance=guidance_vec,
|
|
170
|
-
timestep_index=user_step,
|
|
171
|
-
total_num_timesteps=total_steps,
|
|
172
|
-
controlnet_double_block_residuals=None,
|
|
173
|
-
controlnet_single_block_residuals=None,
|
|
174
|
-
ip_adapter_extensions=neg_ip_adapter_extensions,
|
|
175
|
-
regional_prompting_extension=neg_regional_prompting_extension,
|
|
176
|
-
)
|
|
177
|
-
|
|
178
|
-
if img_cond_seq is not None:
|
|
179
|
-
neg_pred = neg_pred[:, :original_seq_len]
|
|
180
|
-
pred = neg_pred + step_cfg_scale * (pred - neg_pred)
|
|
181
|
-
|
|
182
|
-
# Use scheduler.step() for the update
|
|
183
|
-
step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
|
|
184
|
-
img = step_output.prev_sample
|
|
185
|
-
|
|
186
|
-
# Get t_prev for inpainting (next sigma value)
|
|
187
|
-
if step_index + 1 < len(scheduler.sigmas):
|
|
188
|
-
t_prev = scheduler.sigmas[step_index + 1].item()
|
|
189
|
-
else:
|
|
190
|
-
t_prev = 0.0
|
|
191
|
-
|
|
192
|
-
if inpaint_extension is not None:
|
|
193
|
-
img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
|
|
194
|
-
|
|
195
|
-
# For Heun, only increment user step after second-order step completes
|
|
196
|
-
if is_heun:
|
|
197
|
-
if not in_first_order:
|
|
198
|
-
# Second order step completed
|
|
199
|
-
user_step += 1
|
|
200
|
-
# Only call step_callback if we haven't exceeded total_steps
|
|
201
|
-
if user_step <= total_steps:
|
|
202
|
-
pbar.update(1)
|
|
203
|
-
preview_img = img - t_curr * pred
|
|
204
|
-
if inpaint_extension is not None:
|
|
205
|
-
preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
|
|
206
|
-
preview_img, 0.0
|
|
207
|
-
)
|
|
208
|
-
step_callback(
|
|
209
|
-
PipelineIntermediateState(
|
|
210
|
-
step=user_step,
|
|
211
|
-
order=2,
|
|
212
|
-
total_steps=total_steps,
|
|
213
|
-
timestep=int(t_curr * 1000),
|
|
214
|
-
latents=preview_img,
|
|
215
|
-
),
|
|
216
|
-
)
|
|
217
|
-
else:
|
|
218
|
-
# For LCM and other first-order schedulers
|
|
219
|
-
user_step += 1
|
|
220
|
-
# Only call step_callback if we haven't exceeded total_steps
|
|
221
|
-
# (LCM scheduler may have more internal steps than user-facing steps)
|
|
222
|
-
if user_step <= total_steps:
|
|
223
|
-
pbar.update(1)
|
|
224
|
-
preview_img = img - t_curr * pred
|
|
225
|
-
if inpaint_extension is not None:
|
|
226
|
-
preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
|
|
227
|
-
step_callback(
|
|
228
|
-
PipelineIntermediateState(
|
|
229
|
-
step=user_step,
|
|
230
|
-
order=1,
|
|
231
|
-
total_steps=total_steps,
|
|
232
|
-
timestep=int(t_curr * 1000),
|
|
233
|
-
latents=preview_img,
|
|
234
|
-
),
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
pbar.close()
|
|
238
|
-
return img
|
|
239
|
-
|
|
240
|
-
# Original Euler implementation (when scheduler is None)
|
|
241
56
|
for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
|
|
242
57
|
t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
|
|
243
58
|
|
|
@@ -227,42 +227,6 @@ class LoRA_LyCORIS_ZImage_Config(LoRA_LyCORIS_Config_Base, Config_Base):
|
|
|
227
227
|
|
|
228
228
|
base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
|
|
229
229
|
|
|
230
|
-
@classmethod
|
|
231
|
-
def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
|
|
232
|
-
"""Z-Image LoRAs have different key patterns than SD/SDXL LoRAs.
|
|
233
|
-
|
|
234
|
-
Z-Image LoRAs use keys like:
|
|
235
|
-
- diffusion_model.layers.X.attention.to_k.lora_down.weight (DoRA format)
|
|
236
|
-
- diffusion_model.layers.X.attention.to_k.lora_A.weight (PEFT format)
|
|
237
|
-
- diffusion_model.layers.X.attention.to_k.dora_scale (DoRA scale)
|
|
238
|
-
"""
|
|
239
|
-
state_dict = mod.load_state_dict()
|
|
240
|
-
|
|
241
|
-
# Check for Z-Image specific LoRA patterns
|
|
242
|
-
has_z_image_lora_keys = state_dict_has_any_keys_starting_with(
|
|
243
|
-
state_dict,
|
|
244
|
-
{
|
|
245
|
-
"diffusion_model.layers.", # Z-Image S3-DiT layer pattern
|
|
246
|
-
},
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
# Also check for LoRA weight suffixes (various formats)
|
|
250
|
-
has_lora_suffix = state_dict_has_any_keys_ending_with(
|
|
251
|
-
state_dict,
|
|
252
|
-
{
|
|
253
|
-
"lora_A.weight",
|
|
254
|
-
"lora_B.weight",
|
|
255
|
-
"lora_down.weight",
|
|
256
|
-
"lora_up.weight",
|
|
257
|
-
"dora_scale",
|
|
258
|
-
},
|
|
259
|
-
)
|
|
260
|
-
|
|
261
|
-
if has_z_image_lora_keys and has_lora_suffix:
|
|
262
|
-
return
|
|
263
|
-
|
|
264
|
-
raise NotAMatchError("model does not match Z-Image LoRA heuristics")
|
|
265
|
-
|
|
266
230
|
@classmethod
|
|
267
231
|
def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
|
|
268
232
|
"""Z-Image LoRAs are identified by their diffusion_model.layers structure.
|
|
@@ -55,21 +55,6 @@ def synchronized(method: Callable[..., Any]) -> Callable[..., Any]:
|
|
|
55
55
|
return wrapper
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
def record_activity(method: Callable[..., Any]) -> Callable[..., Any]:
|
|
59
|
-
"""A decorator that records activity after a method completes successfully.
|
|
60
|
-
|
|
61
|
-
Note: This decorator should be applied to methods that already hold self._lock.
|
|
62
|
-
"""
|
|
63
|
-
|
|
64
|
-
@wraps(method)
|
|
65
|
-
def wrapper(self, *args, **kwargs):
|
|
66
|
-
result = method(self, *args, **kwargs)
|
|
67
|
-
self._record_activity()
|
|
68
|
-
return result
|
|
69
|
-
|
|
70
|
-
return wrapper
|
|
71
|
-
|
|
72
|
-
|
|
73
58
|
@dataclass
|
|
74
59
|
class CacheEntrySnapshot:
|
|
75
60
|
cache_key: str
|
|
@@ -147,7 +132,6 @@ class ModelCache:
|
|
|
147
132
|
storage_device: torch.device | str = "cpu",
|
|
148
133
|
log_memory_usage: bool = False,
|
|
149
134
|
logger: Optional[Logger] = None,
|
|
150
|
-
keep_alive_minutes: float = 0,
|
|
151
135
|
):
|
|
152
136
|
"""Initialize the model RAM cache.
|
|
153
137
|
|
|
@@ -167,7 +151,6 @@ class ModelCache:
|
|
|
167
151
|
snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
|
|
168
152
|
behaviour.
|
|
169
153
|
:param logger: InvokeAILogger to use (otherwise creates one)
|
|
170
|
-
:param keep_alive_minutes: How long to keep models in cache after last use (in minutes). 0 means keep indefinitely.
|
|
171
154
|
"""
|
|
172
155
|
self._enable_partial_loading = enable_partial_loading
|
|
173
156
|
self._keep_ram_copy_of_weights = keep_ram_copy_of_weights
|
|
@@ -199,12 +182,6 @@ class ModelCache:
|
|
|
199
182
|
self._on_cache_miss_callbacks: set[CacheMissCallback] = set()
|
|
200
183
|
self._on_cache_models_cleared_callbacks: set[CacheModelsClearedCallback] = set()
|
|
201
184
|
|
|
202
|
-
# Keep-alive timeout support
|
|
203
|
-
self._keep_alive_minutes = keep_alive_minutes
|
|
204
|
-
self._last_activity_time: Optional[float] = None
|
|
205
|
-
self._timeout_timer: Optional[threading.Timer] = None
|
|
206
|
-
self._shutdown_event = threading.Event()
|
|
207
|
-
|
|
208
185
|
def on_cache_hit(self, cb: CacheHitCallback) -> Callable[[], None]:
|
|
209
186
|
self._on_cache_hit_callbacks.add(cb)
|
|
210
187
|
|
|
@@ -213,7 +190,7 @@ class ModelCache:
|
|
|
213
190
|
|
|
214
191
|
return unsubscribe
|
|
215
192
|
|
|
216
|
-
def on_cache_miss(self, cb:
|
|
193
|
+
def on_cache_miss(self, cb: CacheHitCallback) -> Callable[[], None]:
|
|
217
194
|
self._on_cache_miss_callbacks.add(cb)
|
|
218
195
|
|
|
219
196
|
def unsubscribe() -> None:
|
|
@@ -241,78 +218,7 @@ class ModelCache:
|
|
|
241
218
|
"""Set the CacheStats object for collecting cache statistics."""
|
|
242
219
|
self._stats = stats
|
|
243
220
|
|
|
244
|
-
def _record_activity(self) -> None:
|
|
245
|
-
"""Record model activity and reset the timeout timer if configured.
|
|
246
|
-
|
|
247
|
-
Note: This method should only be called when self._lock is already held.
|
|
248
|
-
"""
|
|
249
|
-
if self._keep_alive_minutes <= 0:
|
|
250
|
-
return
|
|
251
|
-
|
|
252
|
-
self._last_activity_time = time.time()
|
|
253
|
-
|
|
254
|
-
# Cancel any existing timer
|
|
255
|
-
if self._timeout_timer is not None:
|
|
256
|
-
self._timeout_timer.cancel()
|
|
257
|
-
|
|
258
|
-
# Start a new timer
|
|
259
|
-
timeout_seconds = self._keep_alive_minutes * 60
|
|
260
|
-
self._timeout_timer = threading.Timer(timeout_seconds, self._on_timeout)
|
|
261
|
-
# Set as daemon so it doesn't prevent application shutdown
|
|
262
|
-
self._timeout_timer.daemon = True
|
|
263
|
-
self._timeout_timer.start()
|
|
264
|
-
self._logger.debug(f"Model cache activity recorded. Timeout set to {self._keep_alive_minutes} minutes.")
|
|
265
|
-
|
|
266
221
|
@synchronized
|
|
267
|
-
@record_activity
|
|
268
|
-
def _on_timeout(self) -> None:
|
|
269
|
-
"""Called when the keep-alive timeout expires. Clears the model cache."""
|
|
270
|
-
if self._shutdown_event.is_set():
|
|
271
|
-
return
|
|
272
|
-
|
|
273
|
-
# Double-check if there has been activity since the timer was set
|
|
274
|
-
# This handles the race condition where activity occurred just before the timer fired
|
|
275
|
-
if self._last_activity_time is not None and self._keep_alive_minutes > 0:
|
|
276
|
-
elapsed_minutes = (time.time() - self._last_activity_time) / 60
|
|
277
|
-
if elapsed_minutes < self._keep_alive_minutes:
|
|
278
|
-
# Activity occurred, don't clear cache
|
|
279
|
-
self._logger.debug(
|
|
280
|
-
f"Model cache timeout fired but activity detected {elapsed_minutes:.2f} minutes ago. "
|
|
281
|
-
f"Skipping cache clear."
|
|
282
|
-
)
|
|
283
|
-
return
|
|
284
|
-
|
|
285
|
-
# Check if there are any unlocked models that can be cleared
|
|
286
|
-
unlocked_models = [key for key, entry in self._cached_models.items() if not entry.is_locked]
|
|
287
|
-
|
|
288
|
-
if len(unlocked_models) > 0:
|
|
289
|
-
self._logger.info(
|
|
290
|
-
f"Model cache keep-alive timeout of {self._keep_alive_minutes} minutes expired. "
|
|
291
|
-
f"Clearing {len(unlocked_models)} unlocked model(s) from cache."
|
|
292
|
-
)
|
|
293
|
-
# Clear the cache by requesting a very large amount of space.
|
|
294
|
-
# This is the same logic used by the "Clear Model Cache" button.
|
|
295
|
-
# Using 1000 GB ensures all unlocked models are removed.
|
|
296
|
-
self._make_room_internal(1000 * GB)
|
|
297
|
-
elif len(self._cached_models) > 0:
|
|
298
|
-
# All models are locked, don't log at info level
|
|
299
|
-
self._logger.debug(
|
|
300
|
-
f"Model cache timeout fired but all {len(self._cached_models)} model(s) are locked. "
|
|
301
|
-
f"Skipping cache clear."
|
|
302
|
-
)
|
|
303
|
-
else:
|
|
304
|
-
self._logger.debug("Model cache timeout fired but cache is already empty.")
|
|
305
|
-
|
|
306
|
-
@synchronized
|
|
307
|
-
def shutdown(self) -> None:
|
|
308
|
-
"""Shutdown the model cache, cancelling any pending timers."""
|
|
309
|
-
self._shutdown_event.set()
|
|
310
|
-
if self._timeout_timer is not None:
|
|
311
|
-
self._timeout_timer.cancel()
|
|
312
|
-
self._timeout_timer = None
|
|
313
|
-
|
|
314
|
-
@synchronized
|
|
315
|
-
@record_activity
|
|
316
222
|
def put(self, key: str, model: AnyModel) -> None:
|
|
317
223
|
"""Add a model to the cache."""
|
|
318
224
|
if key in self._cached_models:
|
|
@@ -322,7 +228,7 @@ class ModelCache:
|
|
|
322
228
|
return
|
|
323
229
|
|
|
324
230
|
size = calc_model_size_by_data(self._logger, model)
|
|
325
|
-
self.
|
|
231
|
+
self.make_room(size)
|
|
326
232
|
|
|
327
233
|
# Inject custom modules into the model.
|
|
328
234
|
if isinstance(model, torch.nn.Module):
|
|
@@ -366,7 +272,6 @@ class ModelCache:
|
|
|
366
272
|
return overview
|
|
367
273
|
|
|
368
274
|
@synchronized
|
|
369
|
-
@record_activity
|
|
370
275
|
def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
|
|
371
276
|
"""Retrieve a model from the cache.
|
|
372
277
|
|
|
@@ -404,11 +309,9 @@ class ModelCache:
|
|
|
404
309
|
self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
|
|
405
310
|
for cb in self._on_cache_hit_callbacks:
|
|
406
311
|
cb(model_key=key, cache_snapshot=self._get_cache_snapshot())
|
|
407
|
-
|
|
408
312
|
return cache_entry
|
|
409
313
|
|
|
410
314
|
@synchronized
|
|
411
|
-
@record_activity
|
|
412
315
|
def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> None:
|
|
413
316
|
"""Lock a model for use and move it into VRAM."""
|
|
414
317
|
if cache_entry.key not in self._cached_models:
|
|
@@ -445,7 +348,6 @@ class ModelCache:
|
|
|
445
348
|
self._log_cache_state()
|
|
446
349
|
|
|
447
350
|
@synchronized
|
|
448
|
-
@record_activity
|
|
449
351
|
def unlock(self, cache_entry: CacheRecord) -> None:
|
|
450
352
|
"""Unlock a model."""
|
|
451
353
|
if cache_entry.key not in self._cached_models:
|
|
@@ -789,10 +691,6 @@ class ModelCache:
|
|
|
789
691
|
external references to the model, there's nothing that the cache can do about it, and those models will not be
|
|
790
692
|
garbage-collected.
|
|
791
693
|
"""
|
|
792
|
-
self._make_room_internal(bytes_needed)
|
|
793
|
-
|
|
794
|
-
def _make_room_internal(self, bytes_needed: int) -> None:
|
|
795
|
-
"""Internal implementation of make_room(). Assumes the lock is already held."""
|
|
796
694
|
self._logger.debug(f"Making room for {bytes_needed / MB:.2f}MB of RAM.")
|
|
797
695
|
self._log_cache_state(title="Before dropping models:")
|
|
798
696
|
|
|
@@ -45,13 +45,12 @@ class CogView4DiffusersModel(GenericDiffusersLoader):
|
|
|
45
45
|
model_path,
|
|
46
46
|
torch_dtype=dtype,
|
|
47
47
|
variant=variant,
|
|
48
|
-
local_files_only=True,
|
|
49
48
|
)
|
|
50
49
|
except OSError as e:
|
|
51
50
|
if variant and "no file named" in str(
|
|
52
51
|
e
|
|
53
52
|
): # try without the variant, just in case user's preferences changed
|
|
54
|
-
result = load_class.from_pretrained(model_path, torch_dtype=dtype
|
|
53
|
+
result = load_class.from_pretrained(model_path, torch_dtype=dtype)
|
|
55
54
|
else:
|
|
56
55
|
raise e
|
|
57
56
|
|
|
@@ -122,9 +122,9 @@ class CLIPDiffusersLoader(ModelLoader):
|
|
|
122
122
|
|
|
123
123
|
match submodel_type:
|
|
124
124
|
case SubModelType.Tokenizer:
|
|
125
|
-
return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer"
|
|
125
|
+
return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
|
|
126
126
|
case SubModelType.TextEncoder:
|
|
127
|
-
return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder"
|
|
127
|
+
return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
|
|
128
128
|
|
|
129
129
|
raise ValueError(
|
|
130
130
|
f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
|
|
@@ -148,12 +148,10 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
|
|
|
148
148
|
)
|
|
149
149
|
match submodel_type:
|
|
150
150
|
case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
|
|
151
|
-
return T5TokenizerFast.from_pretrained(
|
|
152
|
-
Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
|
|
153
|
-
)
|
|
151
|
+
return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
|
|
154
152
|
case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
|
|
155
153
|
te2_model_path = Path(config.path) / "text_encoder_2"
|
|
156
|
-
model_config = AutoConfig.from_pretrained(te2_model_path
|
|
154
|
+
model_config = AutoConfig.from_pretrained(te2_model_path)
|
|
157
155
|
with accelerate.init_empty_weights():
|
|
158
156
|
model = AutoModelForTextEncoding.from_config(model_config)
|
|
159
157
|
model = quantize_model_llm_int8(model, modules_to_not_convert=set())
|
|
@@ -194,15 +192,10 @@ class T5EncoderCheckpointModel(ModelLoader):
|
|
|
194
192
|
|
|
195
193
|
match submodel_type:
|
|
196
194
|
case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
|
|
197
|
-
return T5TokenizerFast.from_pretrained(
|
|
198
|
-
Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
|
|
199
|
-
)
|
|
195
|
+
return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
|
|
200
196
|
case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
|
|
201
197
|
return T5EncoderModel.from_pretrained(
|
|
202
|
-
Path(config.path) / "text_encoder_2",
|
|
203
|
-
torch_dtype="auto",
|
|
204
|
-
low_cpu_mem_usage=True,
|
|
205
|
-
local_files_only=True,
|
|
198
|
+
Path(config.path) / "text_encoder_2", torch_dtype="auto", low_cpu_mem_usage=True
|
|
206
199
|
)
|
|
207
200
|
|
|
208
201
|
raise ValueError(
|
|
@@ -37,14 +37,12 @@ class GenericDiffusersLoader(ModelLoader):
|
|
|
37
37
|
repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
|
|
38
38
|
variant = repo_variant.value if repo_variant else None
|
|
39
39
|
try:
|
|
40
|
-
result: AnyModel = model_class.from_pretrained(
|
|
41
|
-
model_path, torch_dtype=self._torch_dtype, variant=variant, local_files_only=True
|
|
42
|
-
)
|
|
40
|
+
result: AnyModel = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, variant=variant)
|
|
43
41
|
except OSError as e:
|
|
44
42
|
if variant and "no file named" in str(
|
|
45
43
|
e
|
|
46
44
|
): # try without the variant, just in case user's preferences changed
|
|
47
|
-
result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype
|
|
45
|
+
result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
|
|
48
46
|
else:
|
|
49
47
|
raise e
|
|
50
48
|
return result
|
|
@@ -80,13 +80,12 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
|
|
|
80
80
|
model_path,
|
|
81
81
|
torch_dtype=self._torch_dtype,
|
|
82
82
|
variant=variant,
|
|
83
|
-
local_files_only=True,
|
|
84
83
|
)
|
|
85
84
|
except OSError as e:
|
|
86
85
|
if variant and "no file named" in str(
|
|
87
86
|
e
|
|
88
87
|
): # try without the variant, just in case user's preferences changed
|
|
89
|
-
result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype
|
|
88
|
+
result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
|
|
90
89
|
else:
|
|
91
90
|
raise e
|
|
92
91
|
|
|
@@ -384,19 +384,15 @@ class Qwen3EncoderLoader(ModelLoader):
|
|
|
384
384
|
|
|
385
385
|
match submodel_type:
|
|
386
386
|
case SubModelType.Tokenizer:
|
|
387
|
-
|
|
388
|
-
# The tokenizer files should already exist locally in the model directory
|
|
389
|
-
return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
|
|
387
|
+
return AutoTokenizer.from_pretrained(tokenizer_path)
|
|
390
388
|
case SubModelType.TextEncoder:
|
|
391
389
|
# Determine safe dtype based on target device capabilities
|
|
392
390
|
target_device = TorchDevice.choose_torch_device()
|
|
393
391
|
model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
|
|
394
|
-
# Use local_files_only=True to prevent network requests for validation
|
|
395
392
|
return Qwen3ForCausalLM.from_pretrained(
|
|
396
393
|
text_encoder_path,
|
|
397
394
|
torch_dtype=model_dtype,
|
|
398
395
|
low_cpu_mem_usage=True,
|
|
399
|
-
local_files_only=True,
|
|
400
396
|
)
|
|
401
397
|
|
|
402
398
|
raise ValueError(
|
|
@@ -530,27 +526,12 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
|
|
|
530
526
|
return self._load_from_singlefile(config)
|
|
531
527
|
case SubModelType.Tokenizer:
|
|
532
528
|
# For single-file Qwen3, load tokenizer from HuggingFace
|
|
533
|
-
|
|
534
|
-
return self._load_tokenizer_with_offline_fallback()
|
|
529
|
+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
|
|
535
530
|
|
|
536
531
|
raise ValueError(
|
|
537
532
|
f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
|
|
538
533
|
)
|
|
539
534
|
|
|
540
|
-
def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
|
|
541
|
-
"""Load tokenizer with local_files_only fallback for offline support.
|
|
542
|
-
|
|
543
|
-
First tries to load from local cache (offline), falling back to network download
|
|
544
|
-
if the tokenizer hasn't been cached yet. This ensures offline operation after
|
|
545
|
-
the initial download.
|
|
546
|
-
"""
|
|
547
|
-
try:
|
|
548
|
-
# Try loading from local cache first (supports offline usage)
|
|
549
|
-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
|
|
550
|
-
except OSError:
|
|
551
|
-
# Not in cache yet, download from HuggingFace
|
|
552
|
-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
|
|
553
|
-
|
|
554
535
|
def _load_from_singlefile(
|
|
555
536
|
self,
|
|
556
537
|
config: AnyModelConfig,
|
|
@@ -705,27 +686,12 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
|
|
|
705
686
|
return self._load_from_gguf(config)
|
|
706
687
|
case SubModelType.Tokenizer:
|
|
707
688
|
# For GGUF Qwen3, load tokenizer from HuggingFace
|
|
708
|
-
|
|
709
|
-
return self._load_tokenizer_with_offline_fallback()
|
|
689
|
+
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
|
|
710
690
|
|
|
711
691
|
raise ValueError(
|
|
712
692
|
f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
|
|
713
693
|
)
|
|
714
694
|
|
|
715
|
-
def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
|
|
716
|
-
"""Load tokenizer with local_files_only fallback for offline support.
|
|
717
|
-
|
|
718
|
-
First tries to load from local cache (offline), falling back to network download
|
|
719
|
-
if the tokenizer hasn't been cached yet. This ensures offline operation after
|
|
720
|
-
the initial download.
|
|
721
|
-
"""
|
|
722
|
-
try:
|
|
723
|
-
# Try loading from local cache first (supports offline usage)
|
|
724
|
-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
|
|
725
|
-
except OSError:
|
|
726
|
-
# Not in cache yet, download from HuggingFace
|
|
727
|
-
return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
|
|
728
|
-
|
|
729
695
|
def _load_from_gguf(
|
|
730
696
|
self,
|
|
731
697
|
config: AnyModelConfig,
|