InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. invokeai/app/invocations/flux_denoise.py +1 -15
  2. invokeai/app/invocations/metadata_linked.py +0 -47
  3. invokeai/app/invocations/z_image_denoise.py +84 -244
  4. invokeai/app/services/config/config_default.py +1 -3
  5. invokeai/app/services/model_manager/model_manager_default.py +0 -7
  6. invokeai/backend/flux/denoise.py +11 -196
  7. invokeai/backend/model_manager/configs/lora.py +0 -36
  8. invokeai/backend/model_manager/load/model_cache/model_cache.py +2 -104
  9. invokeai/backend/model_manager/load/model_loaders/cogview4.py +1 -2
  10. invokeai/backend/model_manager/load/model_loaders/flux.py +6 -13
  11. invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py +2 -4
  12. invokeai/backend/model_manager/load/model_loaders/onnx.py +0 -1
  13. invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py +1 -2
  14. invokeai/backend/model_manager/load/model_loaders/z_image.py +3 -37
  15. invokeai/backend/model_manager/starter_models.py +4 -13
  16. invokeai/backend/patches/lora_conversions/z_image_lora_conversion_utils.py +5 -39
  17. invokeai/backend/quantization/gguf/ggml_tensor.py +4 -15
  18. invokeai/backend/z_image/extensions/regional_prompting_extension.py +12 -10
  19. invokeai/frontend/web/dist/assets/App-CYhlZO3Q.js +161 -0
  20. invokeai/frontend/web/dist/assets/{browser-ponyfill-4xPFTMT3.js → browser-ponyfill-DHZxq1nk.js} +1 -1
  21. invokeai/frontend/web/dist/assets/{index-vCDSQboA.js → index-dgSJAY--.js} +51 -51
  22. invokeai/frontend/web/dist/index.html +1 -1
  23. invokeai/frontend/web/dist/locales/en.json +5 -11
  24. invokeai/version/invokeai_version.py +1 -1
  25. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/METADATA +2 -2
  26. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/RECORD +32 -39
  27. invokeai/app/invocations/pbr_maps.py +0 -59
  28. invokeai/backend/flux/schedulers.py +0 -62
  29. invokeai/backend/image_util/pbr_maps/architecture/block.py +0 -367
  30. invokeai/backend/image_util/pbr_maps/architecture/pbr_rrdb_net.py +0 -70
  31. invokeai/backend/image_util/pbr_maps/pbr_maps.py +0 -141
  32. invokeai/backend/image_util/pbr_maps/utils/image_ops.py +0 -93
  33. invokeai/frontend/web/dist/assets/App-BBELGD-n.js +0 -161
  34. invokeai/frontend/web/dist/locales/en-GB.json +0 -1
  35. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/WHEEL +0 -0
  36. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/entry_points.txt +0 -0
  37. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  38. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SD1+SD2.txt +0 -0
  39. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/licenses/LICENSE-SDXL.txt +0 -0
  40. {invokeai-6.10.0.dist-info → invokeai-6.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,7 @@
1
- import inspect
2
1
  import math
3
2
  from typing import Callable
4
3
 
5
4
  import torch
6
- from diffusers.schedulers.scheduling_utils import SchedulerMixin
7
5
  from tqdm import tqdm
8
6
 
9
7
  from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
@@ -37,207 +35,24 @@ def denoise(
37
35
  # extra img tokens (sequence-wise) - for Kontext conditioning
38
36
  img_cond_seq: torch.Tensor | None = None,
39
37
  img_cond_seq_ids: torch.Tensor | None = None,
40
- # Optional scheduler for alternative sampling methods
41
- scheduler: SchedulerMixin | None = None,
42
38
  ):
43
- # Determine if we're using a diffusers scheduler or the built-in Euler method
44
- use_scheduler = scheduler is not None
45
-
46
- if use_scheduler:
47
- # Initialize scheduler with timesteps
48
- # The timesteps list contains values in [0, 1] range (sigmas)
49
- # LCM should use num_inference_steps (it has its own sigma schedule),
50
- # while other schedulers can use custom sigmas if supported
51
- is_lcm = scheduler.__class__.__name__ == "FlowMatchLCMScheduler"
52
- set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
53
- if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
54
- # Scheduler supports custom sigmas - use InvokeAI's time-shifted schedule
55
- scheduler.set_timesteps(sigmas=timesteps, device=img.device)
56
- else:
57
- # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
58
- # The schedule will be computed by the scheduler itself
59
- num_inference_steps = len(timesteps) - 1
60
- scheduler.set_timesteps(num_inference_steps=num_inference_steps, device=img.device)
61
-
62
- # For schedulers like Heun, the number of actual steps may differ
63
- # (Heun doubles timesteps internally)
64
- num_scheduler_steps = len(scheduler.timesteps)
65
- # For user-facing step count, use the original number of denoising steps
66
- total_steps = len(timesteps) - 1
67
- else:
68
- total_steps = len(timesteps) - 1
69
- num_scheduler_steps = total_steps
70
-
39
+ # step 0 is the initial state
40
+ total_steps = len(timesteps) - 1
41
+ step_callback(
42
+ PipelineIntermediateState(
43
+ step=0,
44
+ order=1,
45
+ total_steps=total_steps,
46
+ timestep=int(timesteps[0]),
47
+ latents=img,
48
+ ),
49
+ )
71
50
  # guidance_vec is ignored for schnell.
72
51
  guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
73
52
 
74
53
  # Store original sequence length for slicing predictions
75
54
  original_seq_len = img.shape[1]
76
55
 
77
- # Track the actual step for user-facing progress (accounts for Heun's double steps)
78
- user_step = 0
79
-
80
- if use_scheduler:
81
- # Use diffusers scheduler for stepping
82
- # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
83
- # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
84
- pbar = tqdm(total=total_steps, desc="Denoising")
85
- for step_index in range(num_scheduler_steps):
86
- timestep = scheduler.timesteps[step_index]
87
- # Convert scheduler timestep (0-1000) to normalized (0-1) for the model
88
- t_curr = timestep.item() / scheduler.config.num_train_timesteps
89
- t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
90
-
91
- # For Heun scheduler, track if we're in first or second order step
92
- is_heun = hasattr(scheduler, "state_in_first_order")
93
- in_first_order = scheduler.state_in_first_order if is_heun else True
94
-
95
- # Run ControlNet models
96
- controlnet_residuals: list[ControlNetFluxOutput] = []
97
- for controlnet_extension in controlnet_extensions:
98
- controlnet_residuals.append(
99
- controlnet_extension.run_controlnet(
100
- timestep_index=user_step,
101
- total_num_timesteps=total_steps,
102
- img=img,
103
- img_ids=img_ids,
104
- txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
105
- txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
106
- y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
107
- timesteps=t_vec,
108
- guidance=guidance_vec,
109
- )
110
- )
111
-
112
- merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
113
-
114
- # Prepare input for model
115
- img_input = img
116
- img_input_ids = img_ids
117
-
118
- if img_cond is not None:
119
- img_input = torch.cat((img_input, img_cond), dim=-1)
120
-
121
- if img_cond_seq is not None:
122
- assert img_cond_seq_ids is not None
123
- img_input = torch.cat((img_input, img_cond_seq), dim=1)
124
- img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
125
-
126
- pred = model(
127
- img=img_input,
128
- img_ids=img_input_ids,
129
- txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
130
- txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
131
- y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
132
- timesteps=t_vec,
133
- guidance=guidance_vec,
134
- timestep_index=user_step,
135
- total_num_timesteps=total_steps,
136
- controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
137
- controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
138
- ip_adapter_extensions=pos_ip_adapter_extensions,
139
- regional_prompting_extension=pos_regional_prompting_extension,
140
- )
141
-
142
- if img_cond_seq is not None:
143
- pred = pred[:, :original_seq_len]
144
-
145
- # Get CFG scale for current user step
146
- step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
147
-
148
- if not math.isclose(step_cfg_scale, 1.0):
149
- if neg_regional_prompting_extension is None:
150
- raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
151
-
152
- neg_img_input = img
153
- neg_img_input_ids = img_ids
154
-
155
- if img_cond is not None:
156
- neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
157
-
158
- if img_cond_seq is not None:
159
- neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
160
- neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
161
-
162
- neg_pred = model(
163
- img=neg_img_input,
164
- img_ids=neg_img_input_ids,
165
- txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
166
- txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
167
- y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
168
- timesteps=t_vec,
169
- guidance=guidance_vec,
170
- timestep_index=user_step,
171
- total_num_timesteps=total_steps,
172
- controlnet_double_block_residuals=None,
173
- controlnet_single_block_residuals=None,
174
- ip_adapter_extensions=neg_ip_adapter_extensions,
175
- regional_prompting_extension=neg_regional_prompting_extension,
176
- )
177
-
178
- if img_cond_seq is not None:
179
- neg_pred = neg_pred[:, :original_seq_len]
180
- pred = neg_pred + step_cfg_scale * (pred - neg_pred)
181
-
182
- # Use scheduler.step() for the update
183
- step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
184
- img = step_output.prev_sample
185
-
186
- # Get t_prev for inpainting (next sigma value)
187
- if step_index + 1 < len(scheduler.sigmas):
188
- t_prev = scheduler.sigmas[step_index + 1].item()
189
- else:
190
- t_prev = 0.0
191
-
192
- if inpaint_extension is not None:
193
- img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
194
-
195
- # For Heun, only increment user step after second-order step completes
196
- if is_heun:
197
- if not in_first_order:
198
- # Second order step completed
199
- user_step += 1
200
- # Only call step_callback if we haven't exceeded total_steps
201
- if user_step <= total_steps:
202
- pbar.update(1)
203
- preview_img = img - t_curr * pred
204
- if inpaint_extension is not None:
205
- preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
206
- preview_img, 0.0
207
- )
208
- step_callback(
209
- PipelineIntermediateState(
210
- step=user_step,
211
- order=2,
212
- total_steps=total_steps,
213
- timestep=int(t_curr * 1000),
214
- latents=preview_img,
215
- ),
216
- )
217
- else:
218
- # For LCM and other first-order schedulers
219
- user_step += 1
220
- # Only call step_callback if we haven't exceeded total_steps
221
- # (LCM scheduler may have more internal steps than user-facing steps)
222
- if user_step <= total_steps:
223
- pbar.update(1)
224
- preview_img = img - t_curr * pred
225
- if inpaint_extension is not None:
226
- preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
227
- step_callback(
228
- PipelineIntermediateState(
229
- step=user_step,
230
- order=1,
231
- total_steps=total_steps,
232
- timestep=int(t_curr * 1000),
233
- latents=preview_img,
234
- ),
235
- )
236
-
237
- pbar.close()
238
- return img
239
-
240
- # Original Euler implementation (when scheduler is None)
241
56
  for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
242
57
  t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
243
58
 
@@ -227,42 +227,6 @@ class LoRA_LyCORIS_ZImage_Config(LoRA_LyCORIS_Config_Base, Config_Base):
227
227
 
228
228
  base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
229
229
 
230
- @classmethod
231
- def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
232
- """Z-Image LoRAs have different key patterns than SD/SDXL LoRAs.
233
-
234
- Z-Image LoRAs use keys like:
235
- - diffusion_model.layers.X.attention.to_k.lora_down.weight (DoRA format)
236
- - diffusion_model.layers.X.attention.to_k.lora_A.weight (PEFT format)
237
- - diffusion_model.layers.X.attention.to_k.dora_scale (DoRA scale)
238
- """
239
- state_dict = mod.load_state_dict()
240
-
241
- # Check for Z-Image specific LoRA patterns
242
- has_z_image_lora_keys = state_dict_has_any_keys_starting_with(
243
- state_dict,
244
- {
245
- "diffusion_model.layers.", # Z-Image S3-DiT layer pattern
246
- },
247
- )
248
-
249
- # Also check for LoRA weight suffixes (various formats)
250
- has_lora_suffix = state_dict_has_any_keys_ending_with(
251
- state_dict,
252
- {
253
- "lora_A.weight",
254
- "lora_B.weight",
255
- "lora_down.weight",
256
- "lora_up.weight",
257
- "dora_scale",
258
- },
259
- )
260
-
261
- if has_z_image_lora_keys and has_lora_suffix:
262
- return
263
-
264
- raise NotAMatchError("model does not match Z-Image LoRA heuristics")
265
-
266
230
  @classmethod
267
231
  def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
268
232
  """Z-Image LoRAs are identified by their diffusion_model.layers structure.
@@ -55,21 +55,6 @@ def synchronized(method: Callable[..., Any]) -> Callable[..., Any]:
55
55
  return wrapper
56
56
 
57
57
 
58
- def record_activity(method: Callable[..., Any]) -> Callable[..., Any]:
59
- """A decorator that records activity after a method completes successfully.
60
-
61
- Note: This decorator should be applied to methods that already hold self._lock.
62
- """
63
-
64
- @wraps(method)
65
- def wrapper(self, *args, **kwargs):
66
- result = method(self, *args, **kwargs)
67
- self._record_activity()
68
- return result
69
-
70
- return wrapper
71
-
72
-
73
58
  @dataclass
74
59
  class CacheEntrySnapshot:
75
60
  cache_key: str
@@ -147,7 +132,6 @@ class ModelCache:
147
132
  storage_device: torch.device | str = "cpu",
148
133
  log_memory_usage: bool = False,
149
134
  logger: Optional[Logger] = None,
150
- keep_alive_minutes: float = 0,
151
135
  ):
152
136
  """Initialize the model RAM cache.
153
137
 
@@ -167,7 +151,6 @@ class ModelCache:
167
151
  snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
168
152
  behaviour.
169
153
  :param logger: InvokeAILogger to use (otherwise creates one)
170
- :param keep_alive_minutes: How long to keep models in cache after last use (in minutes). 0 means keep indefinitely.
171
154
  """
172
155
  self._enable_partial_loading = enable_partial_loading
173
156
  self._keep_ram_copy_of_weights = keep_ram_copy_of_weights
@@ -199,12 +182,6 @@ class ModelCache:
199
182
  self._on_cache_miss_callbacks: set[CacheMissCallback] = set()
200
183
  self._on_cache_models_cleared_callbacks: set[CacheModelsClearedCallback] = set()
201
184
 
202
- # Keep-alive timeout support
203
- self._keep_alive_minutes = keep_alive_minutes
204
- self._last_activity_time: Optional[float] = None
205
- self._timeout_timer: Optional[threading.Timer] = None
206
- self._shutdown_event = threading.Event()
207
-
208
185
  def on_cache_hit(self, cb: CacheHitCallback) -> Callable[[], None]:
209
186
  self._on_cache_hit_callbacks.add(cb)
210
187
 
@@ -213,7 +190,7 @@ class ModelCache:
213
190
 
214
191
  return unsubscribe
215
192
 
216
- def on_cache_miss(self, cb: CacheMissCallback) -> Callable[[], None]:
193
+ def on_cache_miss(self, cb: CacheHitCallback) -> Callable[[], None]:
217
194
  self._on_cache_miss_callbacks.add(cb)
218
195
 
219
196
  def unsubscribe() -> None:
@@ -241,78 +218,7 @@ class ModelCache:
241
218
  """Set the CacheStats object for collecting cache statistics."""
242
219
  self._stats = stats
243
220
 
244
- def _record_activity(self) -> None:
245
- """Record model activity and reset the timeout timer if configured.
246
-
247
- Note: This method should only be called when self._lock is already held.
248
- """
249
- if self._keep_alive_minutes <= 0:
250
- return
251
-
252
- self._last_activity_time = time.time()
253
-
254
- # Cancel any existing timer
255
- if self._timeout_timer is not None:
256
- self._timeout_timer.cancel()
257
-
258
- # Start a new timer
259
- timeout_seconds = self._keep_alive_minutes * 60
260
- self._timeout_timer = threading.Timer(timeout_seconds, self._on_timeout)
261
- # Set as daemon so it doesn't prevent application shutdown
262
- self._timeout_timer.daemon = True
263
- self._timeout_timer.start()
264
- self._logger.debug(f"Model cache activity recorded. Timeout set to {self._keep_alive_minutes} minutes.")
265
-
266
221
  @synchronized
267
- @record_activity
268
- def _on_timeout(self) -> None:
269
- """Called when the keep-alive timeout expires. Clears the model cache."""
270
- if self._shutdown_event.is_set():
271
- return
272
-
273
- # Double-check if there has been activity since the timer was set
274
- # This handles the race condition where activity occurred just before the timer fired
275
- if self._last_activity_time is not None and self._keep_alive_minutes > 0:
276
- elapsed_minutes = (time.time() - self._last_activity_time) / 60
277
- if elapsed_minutes < self._keep_alive_minutes:
278
- # Activity occurred, don't clear cache
279
- self._logger.debug(
280
- f"Model cache timeout fired but activity detected {elapsed_minutes:.2f} minutes ago. "
281
- f"Skipping cache clear."
282
- )
283
- return
284
-
285
- # Check if there are any unlocked models that can be cleared
286
- unlocked_models = [key for key, entry in self._cached_models.items() if not entry.is_locked]
287
-
288
- if len(unlocked_models) > 0:
289
- self._logger.info(
290
- f"Model cache keep-alive timeout of {self._keep_alive_minutes} minutes expired. "
291
- f"Clearing {len(unlocked_models)} unlocked model(s) from cache."
292
- )
293
- # Clear the cache by requesting a very large amount of space.
294
- # This is the same logic used by the "Clear Model Cache" button.
295
- # Using 1000 GB ensures all unlocked models are removed.
296
- self._make_room_internal(1000 * GB)
297
- elif len(self._cached_models) > 0:
298
- # All models are locked, don't log at info level
299
- self._logger.debug(
300
- f"Model cache timeout fired but all {len(self._cached_models)} model(s) are locked. "
301
- f"Skipping cache clear."
302
- )
303
- else:
304
- self._logger.debug("Model cache timeout fired but cache is already empty.")
305
-
306
- @synchronized
307
- def shutdown(self) -> None:
308
- """Shutdown the model cache, cancelling any pending timers."""
309
- self._shutdown_event.set()
310
- if self._timeout_timer is not None:
311
- self._timeout_timer.cancel()
312
- self._timeout_timer = None
313
-
314
- @synchronized
315
- @record_activity
316
222
  def put(self, key: str, model: AnyModel) -> None:
317
223
  """Add a model to the cache."""
318
224
  if key in self._cached_models:
@@ -322,7 +228,7 @@ class ModelCache:
322
228
  return
323
229
 
324
230
  size = calc_model_size_by_data(self._logger, model)
325
- self._make_room_internal(size)
231
+ self.make_room(size)
326
232
 
327
233
  # Inject custom modules into the model.
328
234
  if isinstance(model, torch.nn.Module):
@@ -366,7 +272,6 @@ class ModelCache:
366
272
  return overview
367
273
 
368
274
  @synchronized
369
- @record_activity
370
275
  def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
371
276
  """Retrieve a model from the cache.
372
277
 
@@ -404,11 +309,9 @@ class ModelCache:
404
309
  self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
405
310
  for cb in self._on_cache_hit_callbacks:
406
311
  cb(model_key=key, cache_snapshot=self._get_cache_snapshot())
407
-
408
312
  return cache_entry
409
313
 
410
314
  @synchronized
411
- @record_activity
412
315
  def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> None:
413
316
  """Lock a model for use and move it into VRAM."""
414
317
  if cache_entry.key not in self._cached_models:
@@ -445,7 +348,6 @@ class ModelCache:
445
348
  self._log_cache_state()
446
349
 
447
350
  @synchronized
448
- @record_activity
449
351
  def unlock(self, cache_entry: CacheRecord) -> None:
450
352
  """Unlock a model."""
451
353
  if cache_entry.key not in self._cached_models:
@@ -789,10 +691,6 @@ class ModelCache:
789
691
  external references to the model, there's nothing that the cache can do about it, and those models will not be
790
692
  garbage-collected.
791
693
  """
792
- self._make_room_internal(bytes_needed)
793
-
794
- def _make_room_internal(self, bytes_needed: int) -> None:
795
- """Internal implementation of make_room(). Assumes the lock is already held."""
796
694
  self._logger.debug(f"Making room for {bytes_needed / MB:.2f}MB of RAM.")
797
695
  self._log_cache_state(title="Before dropping models:")
798
696
 
@@ -45,13 +45,12 @@ class CogView4DiffusersModel(GenericDiffusersLoader):
45
45
  model_path,
46
46
  torch_dtype=dtype,
47
47
  variant=variant,
48
- local_files_only=True,
49
48
  )
50
49
  except OSError as e:
51
50
  if variant and "no file named" in str(
52
51
  e
53
52
  ): # try without the variant, just in case user's preferences changed
54
- result = load_class.from_pretrained(model_path, torch_dtype=dtype, local_files_only=True)
53
+ result = load_class.from_pretrained(model_path, torch_dtype=dtype)
55
54
  else:
56
55
  raise e
57
56
 
@@ -122,9 +122,9 @@ class CLIPDiffusersLoader(ModelLoader):
122
122
 
123
123
  match submodel_type:
124
124
  case SubModelType.Tokenizer:
125
- return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer", local_files_only=True)
125
+ return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
126
126
  case SubModelType.TextEncoder:
127
- return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder", local_files_only=True)
127
+ return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
128
128
 
129
129
  raise ValueError(
130
130
  f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
@@ -148,12 +148,10 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
148
148
  )
149
149
  match submodel_type:
150
150
  case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
151
- return T5TokenizerFast.from_pretrained(
152
- Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
153
- )
151
+ return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
154
152
  case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
155
153
  te2_model_path = Path(config.path) / "text_encoder_2"
156
- model_config = AutoConfig.from_pretrained(te2_model_path, local_files_only=True)
154
+ model_config = AutoConfig.from_pretrained(te2_model_path)
157
155
  with accelerate.init_empty_weights():
158
156
  model = AutoModelForTextEncoding.from_config(model_config)
159
157
  model = quantize_model_llm_int8(model, modules_to_not_convert=set())
@@ -194,15 +192,10 @@ class T5EncoderCheckpointModel(ModelLoader):
194
192
 
195
193
  match submodel_type:
196
194
  case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
197
- return T5TokenizerFast.from_pretrained(
198
- Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
199
- )
195
+ return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
200
196
  case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
201
197
  return T5EncoderModel.from_pretrained(
202
- Path(config.path) / "text_encoder_2",
203
- torch_dtype="auto",
204
- low_cpu_mem_usage=True,
205
- local_files_only=True,
198
+ Path(config.path) / "text_encoder_2", torch_dtype="auto", low_cpu_mem_usage=True
206
199
  )
207
200
 
208
201
  raise ValueError(
@@ -37,14 +37,12 @@ class GenericDiffusersLoader(ModelLoader):
37
37
  repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
38
38
  variant = repo_variant.value if repo_variant else None
39
39
  try:
40
- result: AnyModel = model_class.from_pretrained(
41
- model_path, torch_dtype=self._torch_dtype, variant=variant, local_files_only=True
42
- )
40
+ result: AnyModel = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, variant=variant)
43
41
  except OSError as e:
44
42
  if variant and "no file named" in str(
45
43
  e
46
44
  ): # try without the variant, just in case user's preferences changed
47
- result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
45
+ result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
48
46
  else:
49
47
  raise e
50
48
  return result
@@ -38,6 +38,5 @@ class OnnyxDiffusersModel(GenericDiffusersLoader):
38
38
  model_path,
39
39
  torch_dtype=self._torch_dtype,
40
40
  variant=variant,
41
- local_files_only=True,
42
41
  )
43
42
  return result
@@ -80,13 +80,12 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
80
80
  model_path,
81
81
  torch_dtype=self._torch_dtype,
82
82
  variant=variant,
83
- local_files_only=True,
84
83
  )
85
84
  except OSError as e:
86
85
  if variant and "no file named" in str(
87
86
  e
88
87
  ): # try without the variant, just in case user's preferences changed
89
- result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
88
+ result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
90
89
  else:
91
90
  raise e
92
91
 
@@ -384,19 +384,15 @@ class Qwen3EncoderLoader(ModelLoader):
384
384
 
385
385
  match submodel_type:
386
386
  case SubModelType.Tokenizer:
387
- # Use local_files_only=True to prevent network requests for validation
388
- # The tokenizer files should already exist locally in the model directory
389
- return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
387
+ return AutoTokenizer.from_pretrained(tokenizer_path)
390
388
  case SubModelType.TextEncoder:
391
389
  # Determine safe dtype based on target device capabilities
392
390
  target_device = TorchDevice.choose_torch_device()
393
391
  model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
394
- # Use local_files_only=True to prevent network requests for validation
395
392
  return Qwen3ForCausalLM.from_pretrained(
396
393
  text_encoder_path,
397
394
  torch_dtype=model_dtype,
398
395
  low_cpu_mem_usage=True,
399
- local_files_only=True,
400
396
  )
401
397
 
402
398
  raise ValueError(
@@ -530,27 +526,12 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
530
526
  return self._load_from_singlefile(config)
531
527
  case SubModelType.Tokenizer:
532
528
  # For single-file Qwen3, load tokenizer from HuggingFace
533
- # Try local cache first to support offline usage after initial download
534
- return self._load_tokenizer_with_offline_fallback()
529
+ return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
535
530
 
536
531
  raise ValueError(
537
532
  f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
538
533
  )
539
534
 
540
- def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
541
- """Load tokenizer with local_files_only fallback for offline support.
542
-
543
- First tries to load from local cache (offline), falling back to network download
544
- if the tokenizer hasn't been cached yet. This ensures offline operation after
545
- the initial download.
546
- """
547
- try:
548
- # Try loading from local cache first (supports offline usage)
549
- return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
550
- except OSError:
551
- # Not in cache yet, download from HuggingFace
552
- return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
553
-
554
535
  def _load_from_singlefile(
555
536
  self,
556
537
  config: AnyModelConfig,
@@ -705,27 +686,12 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
705
686
  return self._load_from_gguf(config)
706
687
  case SubModelType.Tokenizer:
707
688
  # For GGUF Qwen3, load tokenizer from HuggingFace
708
- # Try local cache first to support offline usage after initial download
709
- return self._load_tokenizer_with_offline_fallback()
689
+ return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
710
690
 
711
691
  raise ValueError(
712
692
  f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
713
693
  )
714
694
 
715
- def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
716
- """Load tokenizer with local_files_only fallback for offline support.
717
-
718
- First tries to load from local cache (offline), falling back to network download
719
- if the tokenizer hasn't been cached yet. This ensures offline operation after
720
- the initial download.
721
- """
722
- try:
723
- # Try loading from local cache first (supports offline usage)
724
- return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
725
- except OSError:
726
- # Not in cache yet, download from HuggingFace
727
- return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
728
-
729
695
  def _load_from_gguf(
730
696
  self,
731
697
  config: AnyModelConfig,