PyPI - InvokeAI - Versions diffs - 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl - Mend

InvokeAI 6.10.0py3-none-any.whl → 6.10.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

invokeai/backend/flux/denoise.py CHANGED Viewed

@@ -1,9 +1,7 @@
-import inspect
 import math
 from typing import Callable
 import torch
-from diffusers.schedulers.scheduling_utils import SchedulerMixin
 from tqdm import tqdm
 from invokeai.backend.flux.controlnet.controlnet_flux_output import ControlNetFluxOutput, sum_controlnet_flux_outputs
@@ -37,207 +35,24 @@ def denoise(
     # extra img tokens (sequence-wise) - for Kontext conditioning
     img_cond_seq: torch.Tensor | None = None,
     img_cond_seq_ids: torch.Tensor | None = None,
-    # Optional scheduler for alternative sampling methods
-    scheduler: SchedulerMixin | None = None,
 ):
-    # Determine if we're using a diffusers scheduler or the built-in Euler method
-    use_scheduler = scheduler is not None
-    if use_scheduler:
-        # Initialize scheduler with timesteps
-        # The timesteps list contains values in [0, 1] range (sigmas)
-        # LCM should use num_inference_steps (it has its own sigma schedule),
-        # while other schedulers can use custom sigmas if supported
-        is_lcm = scheduler.__class__.__name__ == "FlowMatchLCMScheduler"
-        set_timesteps_sig = inspect.signature(scheduler.set_timesteps)
-        if not is_lcm and "sigmas" in set_timesteps_sig.parameters:
-            # Scheduler supports custom sigmas - use InvokeAI's time-shifted schedule
-            scheduler.set_timesteps(sigmas=timesteps, device=img.device)
-        else:
-            # LCM or scheduler doesn't support custom sigmas - use num_inference_steps
-            # The schedule will be computed by the scheduler itself
-            num_inference_steps = len(timesteps) - 1
-            scheduler.set_timesteps(num_inference_steps=num_inference_steps, device=img.device)
-        # For schedulers like Heun, the number of actual steps may differ
-        # (Heun doubles timesteps internally)
-        num_scheduler_steps = len(scheduler.timesteps)
-        # For user-facing step count, use the original number of denoising steps
-        total_steps = len(timesteps) - 1
-    else:
-        total_steps = len(timesteps) - 1
-        num_scheduler_steps = total_steps
+    # step 0 is the initial state
+    total_steps = len(timesteps) - 1
+    step_callback(
+        PipelineIntermediateState(
+            step=0,
+            order=1,
+            total_steps=total_steps,
+            timestep=int(timesteps[0]),
+            latents=img,
+        ),
+    )
     # guidance_vec is ignored for schnell.
     guidance_vec = torch.full((img.shape[0],), guidance, device=img.device, dtype=img.dtype)
     # Store original sequence length for slicing predictions
     original_seq_len = img.shape[1]
-    # Track the actual step for user-facing progress (accounts for Heun's double steps)
-    user_step = 0
-    if use_scheduler:
-        # Use diffusers scheduler for stepping
-        # Use tqdm with total_steps (user-facing steps) not num_scheduler_steps (internal steps)
-        # This ensures progress bar shows 1/8, 2/8, etc. even when scheduler uses more internal steps
-        pbar = tqdm(total=total_steps, desc="Denoising")
-        for step_index in range(num_scheduler_steps):
-            timestep = scheduler.timesteps[step_index]
-            # Convert scheduler timestep (0-1000) to normalized (0-1) for the model
-            t_curr = timestep.item() / scheduler.config.num_train_timesteps
-            t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)
-            # For Heun scheduler, track if we're in first or second order step
-            is_heun = hasattr(scheduler, "state_in_first_order")
-            in_first_order = scheduler.state_in_first_order if is_heun else True
-            # Run ControlNet models
-            controlnet_residuals: list[ControlNetFluxOutput] = []
-            for controlnet_extension in controlnet_extensions:
-                controlnet_residuals.append(
-                    controlnet_extension.run_controlnet(
-                        timestep_index=user_step,
-                        total_num_timesteps=total_steps,
-                        img=img,
-                        img_ids=img_ids,
-                        txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                        txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                        y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-                        timesteps=t_vec,
-                        guidance=guidance_vec,
-                    )
-                )
-            merged_controlnet_residuals = sum_controlnet_flux_outputs(controlnet_residuals)
-            # Prepare input for model
-            img_input = img
-            img_input_ids = img_ids
-            if img_cond is not None:
-                img_input = torch.cat((img_input, img_cond), dim=-1)
-            if img_cond_seq is not None:
-                assert img_cond_seq_ids is not None
-                img_input = torch.cat((img_input, img_cond_seq), dim=1)
-                img_input_ids = torch.cat((img_input_ids, img_cond_seq_ids), dim=1)
-            pred = model(
-                img=img_input,
-                img_ids=img_input_ids,
-                txt=pos_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                txt_ids=pos_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                y=pos_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-                timesteps=t_vec,
-                guidance=guidance_vec,
-                timestep_index=user_step,
-                total_num_timesteps=total_steps,
-                controlnet_double_block_residuals=merged_controlnet_residuals.double_block_residuals,
-                controlnet_single_block_residuals=merged_controlnet_residuals.single_block_residuals,
-                ip_adapter_extensions=pos_ip_adapter_extensions,
-                regional_prompting_extension=pos_regional_prompting_extension,
-            )
-            if img_cond_seq is not None:
-                pred = pred[:, :original_seq_len]
-            # Get CFG scale for current user step
-            step_cfg_scale = cfg_scale[min(user_step, len(cfg_scale) - 1)]
-            if not math.isclose(step_cfg_scale, 1.0):
-                if neg_regional_prompting_extension is None:
-                    raise ValueError("Negative text conditioning is required when cfg_scale is not 1.0.")
-                neg_img_input = img
-                neg_img_input_ids = img_ids
-                if img_cond is not None:
-                    neg_img_input = torch.cat((neg_img_input, img_cond), dim=-1)
-                if img_cond_seq is not None:
-                    neg_img_input = torch.cat((neg_img_input, img_cond_seq), dim=1)
-                    neg_img_input_ids = torch.cat((neg_img_input_ids, img_cond_seq_ids), dim=1)
-                neg_pred = model(
-                    img=neg_img_input,
-                    img_ids=neg_img_input_ids,
-                    txt=neg_regional_prompting_extension.regional_text_conditioning.t5_embeddings,
-                    txt_ids=neg_regional_prompting_extension.regional_text_conditioning.t5_txt_ids,
-                    y=neg_regional_prompting_extension.regional_text_conditioning.clip_embeddings,
-                    timesteps=t_vec,
-                    guidance=guidance_vec,
-                    timestep_index=user_step,
-                    total_num_timesteps=total_steps,
-                    controlnet_double_block_residuals=None,
-                    controlnet_single_block_residuals=None,
-                    ip_adapter_extensions=neg_ip_adapter_extensions,
-                    regional_prompting_extension=neg_regional_prompting_extension,
-                )
-                if img_cond_seq is not None:
-                    neg_pred = neg_pred[:, :original_seq_len]
-                pred = neg_pred + step_cfg_scale * (pred - neg_pred)
-            # Use scheduler.step() for the update
-            step_output = scheduler.step(model_output=pred, timestep=timestep, sample=img)
-            img = step_output.prev_sample
-            # Get t_prev for inpainting (next sigma value)
-            if step_index + 1 < len(scheduler.sigmas):
-                t_prev = scheduler.sigmas[step_index + 1].item()
-            else:
-                t_prev = 0.0
-            if inpaint_extension is not None:
-                img = inpaint_extension.merge_intermediate_latents_with_init_latents(img, t_prev)
-            # For Heun, only increment user step after second-order step completes
-            if is_heun:
-                if not in_first_order:
-                    # Second order step completed
-                    user_step += 1
-                    # Only call step_callback if we haven't exceeded total_steps
-                    if user_step <= total_steps:
-                        pbar.update(1)
-                        preview_img = img - t_curr * pred
-                        if inpaint_extension is not None:
-                            preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(
-                                preview_img, 0.0
-                            )
-                        step_callback(
-                            PipelineIntermediateState(
-                                step=user_step,
-                                order=2,
-                                total_steps=total_steps,
-                                timestep=int(t_curr * 1000),
-                                latents=preview_img,
-                            ),
-                        )
-            else:
-                # For LCM and other first-order schedulers
-                user_step += 1
-                # Only call step_callback if we haven't exceeded total_steps
-                # (LCM scheduler may have more internal steps than user-facing steps)
-                if user_step <= total_steps:
-                    pbar.update(1)
-                    preview_img = img - t_curr * pred
-                    if inpaint_extension is not None:
-                        preview_img = inpaint_extension.merge_intermediate_latents_with_init_latents(preview_img, 0.0)
-                    step_callback(
-                        PipelineIntermediateState(
-                            step=user_step,
-                            order=1,
-                            total_steps=total_steps,
-                            timestep=int(t_curr * 1000),
-                            latents=preview_img,
-                        ),
-                    )
-        pbar.close()
-        return img
-    # Original Euler implementation (when scheduler is None)
     for step_index, (t_curr, t_prev) in tqdm(list(enumerate(zip(timesteps[:-1], timesteps[1:], strict=True)))):
         t_vec = torch.full((img.shape[0],), t_curr, dtype=img.dtype, device=img.device)

invokeai/backend/model_manager/configs/lora.py CHANGED Viewed

@@ -227,42 +227,6 @@ class LoRA_LyCORIS_ZImage_Config(LoRA_LyCORIS_Config_Base, Config_Base):
     base: Literal[BaseModelType.ZImage] = Field(default=BaseModelType.ZImage)
-    @classmethod
-    def _validate_looks_like_lora(cls, mod: ModelOnDisk) -> None:
-        """Z-Image LoRAs have different key patterns than SD/SDXL LoRAs.
-        Z-Image LoRAs use keys like:
-        - diffusion_model.layers.X.attention.to_k.lora_down.weight (DoRA format)
-        - diffusion_model.layers.X.attention.to_k.lora_A.weight (PEFT format)
-        - diffusion_model.layers.X.attention.to_k.dora_scale (DoRA scale)
-        """
-        state_dict = mod.load_state_dict()
-        # Check for Z-Image specific LoRA patterns
-        has_z_image_lora_keys = state_dict_has_any_keys_starting_with(
-            state_dict,
-            {
-                "diffusion_model.layers.",  # Z-Image S3-DiT layer pattern
-            },
-        )
-        # Also check for LoRA weight suffixes (various formats)
-        has_lora_suffix = state_dict_has_any_keys_ending_with(
-            state_dict,
-            {
-                "lora_A.weight",
-                "lora_B.weight",
-                "lora_down.weight",
-                "lora_up.weight",
-                "dora_scale",
-            },
-        )
-        if has_z_image_lora_keys and has_lora_suffix:
-            return
-        raise NotAMatchError("model does not match Z-Image LoRA heuristics")
     @classmethod
     def _get_base_or_raise(cls, mod: ModelOnDisk) -> BaseModelType:
         """Z-Image LoRAs are identified by their diffusion_model.layers structure.

invokeai/backend/model_manager/load/model_cache/model_cache.py CHANGED Viewed

@@ -55,21 +55,6 @@ def synchronized(method: Callable[..., Any]) -> Callable[..., Any]:
     return wrapper
-def record_activity(method: Callable[..., Any]) -> Callable[..., Any]:
-    """A decorator that records activity after a method completes successfully.
-    Note: This decorator should be applied to methods that already hold self._lock.
-    """
-    @wraps(method)
-    def wrapper(self, *args, **kwargs):
-        result = method(self, *args, **kwargs)
-        self._record_activity()
-        return result
-    return wrapper
 @dataclass
 class CacheEntrySnapshot:
     cache_key: str
@@ -147,7 +132,6 @@ class ModelCache:
         storage_device: torch.device | str = "cpu",
         log_memory_usage: bool = False,
         logger: Optional[Logger] = None,
-        keep_alive_minutes: float = 0,
     ):
         """Initialize the model RAM cache.
@@ -167,7 +151,6 @@ class ModelCache:
             snapshots, so it is recommended to disable this feature unless you are actively inspecting the model cache's
             behaviour.
         :param logger: InvokeAILogger to use (otherwise creates one)
-        :param keep_alive_minutes: How long to keep models in cache after last use (in minutes). 0 means keep indefinitely.
         """
         self._enable_partial_loading = enable_partial_loading
         self._keep_ram_copy_of_weights = keep_ram_copy_of_weights
@@ -199,12 +182,6 @@ class ModelCache:
         self._on_cache_miss_callbacks: set[CacheMissCallback] = set()
         self._on_cache_models_cleared_callbacks: set[CacheModelsClearedCallback] = set()
-        # Keep-alive timeout support
-        self._keep_alive_minutes = keep_alive_minutes
-        self._last_activity_time: Optional[float] = None
-        self._timeout_timer: Optional[threading.Timer] = None
-        self._shutdown_event = threading.Event()
     def on_cache_hit(self, cb: CacheHitCallback) -> Callable[[], None]:
         self._on_cache_hit_callbacks.add(cb)
@@ -213,7 +190,7 @@ class ModelCache:
         return unsubscribe
-    def on_cache_miss(self, cb: CacheMissCallback) -> Callable[[], None]:
+    def on_cache_miss(self, cb: CacheHitCallback) -> Callable[[], None]:
         self._on_cache_miss_callbacks.add(cb)
         def unsubscribe() -> None:
@@ -241,78 +218,7 @@ class ModelCache:
         """Set the CacheStats object for collecting cache statistics."""
         self._stats = stats
-    def _record_activity(self) -> None:
-        """Record model activity and reset the timeout timer if configured.
-        Note: This method should only be called when self._lock is already held.
-        """
-        if self._keep_alive_minutes <= 0:
-            return
-        self._last_activity_time = time.time()
-        # Cancel any existing timer
-        if self._timeout_timer is not None:
-            self._timeout_timer.cancel()
-        # Start a new timer
-        timeout_seconds = self._keep_alive_minutes * 60
-        self._timeout_timer = threading.Timer(timeout_seconds, self._on_timeout)
-        # Set as daemon so it doesn't prevent application shutdown
-        self._timeout_timer.daemon = True
-        self._timeout_timer.start()
-        self._logger.debug(f"Model cache activity recorded. Timeout set to {self._keep_alive_minutes} minutes.")
     @synchronized
-    @record_activity
-    def _on_timeout(self) -> None:
-        """Called when the keep-alive timeout expires. Clears the model cache."""
-        if self._shutdown_event.is_set():
-            return
-        # Double-check if there has been activity since the timer was set
-        # This handles the race condition where activity occurred just before the timer fired
-        if self._last_activity_time is not None and self._keep_alive_minutes > 0:
-            elapsed_minutes = (time.time() - self._last_activity_time) / 60
-            if elapsed_minutes < self._keep_alive_minutes:
-                # Activity occurred, don't clear cache
-                self._logger.debug(
-                    f"Model cache timeout fired but activity detected {elapsed_minutes:.2f} minutes ago. "
-                    f"Skipping cache clear."
-                )
-                return
-        # Check if there are any unlocked models that can be cleared
-        unlocked_models = [key for key, entry in self._cached_models.items() if not entry.is_locked]
-        if len(unlocked_models) > 0:
-            self._logger.info(
-                f"Model cache keep-alive timeout of {self._keep_alive_minutes} minutes expired. "
-                f"Clearing {len(unlocked_models)} unlocked model(s) from cache."
-            )
-            # Clear the cache by requesting a very large amount of space.
-            # This is the same logic used by the "Clear Model Cache" button.
-            # Using 1000 GB ensures all unlocked models are removed.
-            self._make_room_internal(1000 * GB)
-        elif len(self._cached_models) > 0:
-            # All models are locked, don't log at info level
-            self._logger.debug(
-                f"Model cache timeout fired but all {len(self._cached_models)} model(s) are locked. "
-                f"Skipping cache clear."
-            )
-        else:
-            self._logger.debug("Model cache timeout fired but cache is already empty.")
-    @synchronized
-    def shutdown(self) -> None:
-        """Shutdown the model cache, cancelling any pending timers."""
-        self._shutdown_event.set()
-        if self._timeout_timer is not None:
-            self._timeout_timer.cancel()
-            self._timeout_timer = None
-    @synchronized
-    @record_activity
     def put(self, key: str, model: AnyModel) -> None:
         """Add a model to the cache."""
         if key in self._cached_models:
@@ -322,7 +228,7 @@ class ModelCache:
             return
         size = calc_model_size_by_data(self._logger, model)
-        self._make_room_internal(size)
+        self.make_room(size)
         # Inject custom modules into the model.
         if isinstance(model, torch.nn.Module):
@@ -366,7 +272,6 @@ class ModelCache:
         return overview
     @synchronized
-    @record_activity
     def get(self, key: str, stats_name: Optional[str] = None) -> CacheRecord:
         """Retrieve a model from the cache.
@@ -404,11 +309,9 @@ class ModelCache:
         self._logger.debug(f"Cache hit: {key} (Type: {cache_entry.cached_model.model.__class__.__name__})")
         for cb in self._on_cache_hit_callbacks:
             cb(model_key=key, cache_snapshot=self._get_cache_snapshot())
         return cache_entry
     @synchronized
-    @record_activity
     def lock(self, cache_entry: CacheRecord, working_mem_bytes: Optional[int]) -> None:
         """Lock a model for use and move it into VRAM."""
         if cache_entry.key not in self._cached_models:
@@ -445,7 +348,6 @@ class ModelCache:
         self._log_cache_state()
     @synchronized
-    @record_activity
     def unlock(self, cache_entry: CacheRecord) -> None:
         """Unlock a model."""
         if cache_entry.key not in self._cached_models:
@@ -789,10 +691,6 @@ class ModelCache:
         external references to the model, there's nothing that the cache can do about it, and those models will not be
         garbage-collected.
         """
-        self._make_room_internal(bytes_needed)
-    def _make_room_internal(self, bytes_needed: int) -> None:
-        """Internal implementation of make_room(). Assumes the lock is already held."""
         self._logger.debug(f"Making room for {bytes_needed / MB:.2f}MB of RAM.")
         self._log_cache_state(title="Before dropping models:")

invokeai/backend/model_manager/load/model_loaders/cogview4.py CHANGED Viewed

@@ -45,13 +45,12 @@ class CogView4DiffusersModel(GenericDiffusersLoader):
                 model_path,
                 torch_dtype=dtype,
                 variant=variant,
-                local_files_only=True,
             )
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = load_class.from_pretrained(model_path, torch_dtype=dtype, local_files_only=True)
+                result = load_class.from_pretrained(model_path, torch_dtype=dtype)
             else:
                 raise e

invokeai/backend/model_manager/load/model_loaders/flux.py CHANGED Viewed

@@ -122,9 +122,9 @@ class CLIPDiffusersLoader(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer:
-                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer", local_files_only=True)
+                return CLIPTokenizer.from_pretrained(Path(config.path) / "tokenizer")
             case SubModelType.TextEncoder:
-                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder", local_files_only=True)
+                return CLIPTextModel.from_pretrained(Path(config.path) / "text_encoder")
         raise ValueError(
             f"Only Tokenizer and TextEncoder submodels are currently supported. Received: {submodel_type.value if submodel_type else 'None'}"
@@ -148,12 +148,10 @@ class BnbQuantizedLlmInt8bCheckpointModel(ModelLoader):
             )
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(
-                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
-                )
+                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 te2_model_path = Path(config.path) / "text_encoder_2"
-                model_config = AutoConfig.from_pretrained(te2_model_path, local_files_only=True)
+                model_config = AutoConfig.from_pretrained(te2_model_path)
                 with accelerate.init_empty_weights():
                     model = AutoModelForTextEncoding.from_config(model_config)
                     model = quantize_model_llm_int8(model, modules_to_not_convert=set())
@@ -194,15 +192,10 @@ class T5EncoderCheckpointModel(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer2 | SubModelType.Tokenizer3:
-                return T5TokenizerFast.from_pretrained(
-                    Path(config.path) / "tokenizer_2", max_length=512, local_files_only=True
-                )
+                return T5TokenizerFast.from_pretrained(Path(config.path) / "tokenizer_2", max_length=512)
             case SubModelType.TextEncoder2 | SubModelType.TextEncoder3:
                 return T5EncoderModel.from_pretrained(
-                    Path(config.path) / "text_encoder_2",
-                    torch_dtype="auto",
-                    low_cpu_mem_usage=True,
-                    local_files_only=True,
+                    Path(config.path) / "text_encoder_2", torch_dtype="auto", low_cpu_mem_usage=True
                 )
         raise ValueError(

invokeai/backend/model_manager/load/model_loaders/generic_diffusers.py CHANGED Viewed

@@ -37,14 +37,12 @@ class GenericDiffusersLoader(ModelLoader):
         repo_variant = config.repo_variant if isinstance(config, Diffusers_Config_Base) else None
         variant = repo_variant.value if repo_variant else None
         try:
-            result: AnyModel = model_class.from_pretrained(
-                model_path, torch_dtype=self._torch_dtype, variant=variant, local_files_only=True
-            )
+            result: AnyModel = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, variant=variant)
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
+                result = model_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
             else:
                 raise e
         return result

invokeai/backend/model_manager/load/model_loaders/onnx.py CHANGED Viewed

@@ -38,6 +38,5 @@ class OnnyxDiffusersModel(GenericDiffusersLoader):
             model_path,
             torch_dtype=self._torch_dtype,
             variant=variant,
-            local_files_only=True,
         )
         return result

invokeai/backend/model_manager/load/model_loaders/stable_diffusion.py CHANGED Viewed

@@ -80,13 +80,12 @@ class StableDiffusionDiffusersModel(GenericDiffusersLoader):
                 model_path,
                 torch_dtype=self._torch_dtype,
                 variant=variant,
-                local_files_only=True,
             )
         except OSError as e:
             if variant and "no file named" in str(
                 e
             ):  # try without the variant, just in case user's preferences changed
-                result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype, local_files_only=True)
+                result = load_class.from_pretrained(model_path, torch_dtype=self._torch_dtype)
             else:
                 raise e

invokeai/backend/model_manager/load/model_loaders/z_image.py CHANGED Viewed

@@ -384,19 +384,15 @@ class Qwen3EncoderLoader(ModelLoader):
         match submodel_type:
             case SubModelType.Tokenizer:
-                # Use local_files_only=True to prevent network requests for validation
-                # The tokenizer files should already exist locally in the model directory
-                return AutoTokenizer.from_pretrained(tokenizer_path, local_files_only=True)
+                return AutoTokenizer.from_pretrained(tokenizer_path)
             case SubModelType.TextEncoder:
                 # Determine safe dtype based on target device capabilities
                 target_device = TorchDevice.choose_torch_device()
                 model_dtype = TorchDevice.choose_bfloat16_safe_dtype(target_device)
-                # Use local_files_only=True to prevent network requests for validation
                 return Qwen3ForCausalLM.from_pretrained(
                     text_encoder_path,
                     torch_dtype=model_dtype,
                     low_cpu_mem_usage=True,
-                    local_files_only=True,
                 )
         raise ValueError(
@@ -530,27 +526,12 @@ class Qwen3EncoderCheckpointLoader(ModelLoader):
                 return self._load_from_singlefile(config)
             case SubModelType.Tokenizer:
                 # For single-file Qwen3, load tokenizer from HuggingFace
-                # Try local cache first to support offline usage after initial download
-                return self._load_tokenizer_with_offline_fallback()
+                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
-    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
-        """Load tokenizer with local_files_only fallback for offline support.
-        First tries to load from local cache (offline), falling back to network download
-        if the tokenizer hasn't been cached yet. This ensures offline operation after
-        the initial download.
-        """
-        try:
-            # Try loading from local cache first (supports offline usage)
-            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
-        except OSError:
-            # Not in cache yet, download from HuggingFace
-            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
     def _load_from_singlefile(
         self,
         config: AnyModelConfig,
@@ -705,27 +686,12 @@ class Qwen3EncoderGGUFLoader(ModelLoader):
                 return self._load_from_gguf(config)
             case SubModelType.Tokenizer:
                 # For GGUF Qwen3, load tokenizer from HuggingFace
-                # Try local cache first to support offline usage after initial download
-                return self._load_tokenizer_with_offline_fallback()
+                return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
         raise ValueError(
             f"Only TextEncoder and Tokenizer submodels are supported. Received: {submodel_type.value if submodel_type else 'None'}"
         )
-    def _load_tokenizer_with_offline_fallback(self) -> AnyModel:
-        """Load tokenizer with local_files_only fallback for offline support.
-        First tries to load from local cache (offline), falling back to network download
-        if the tokenizer hasn't been cached yet. This ensures offline operation after
-        the initial download.
-        """
-        try:
-            # Try loading from local cache first (supports offline usage)
-            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE, local_files_only=True)
-        except OSError:
-            # Not in cache yet, download from HuggingFace
-            return AutoTokenizer.from_pretrained(self.DEFAULT_TOKENIZER_SOURCE)
     def _load_from_gguf(
         self,
         config: AnyModelConfig,

InvokeAI 6.10.0__py3-none-any.whl → 6.10.0rc1__py3-none-any.whl

InvokeAI 6.10.0py3-none-any.whl → 6.10.0rc1py3-none-any.whl