PyPI - ollamadiffuser - Versions diffs - 1.2.2__py3-none-any.whl → 2.0.0__py3-none-any.whl - Mend

ollamadiffuser 1.2.2py3-none-any.whl → 2.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

ollamadiffuser/__init__.py +1 -1
ollamadiffuser/api/server.py +312 -312
ollamadiffuser/cli/config_commands.py +119 -0
ollamadiffuser/cli/lora_commands.py +169 -0
ollamadiffuser/cli/main.py +85 -1233
ollamadiffuser/cli/model_commands.py +664 -0
ollamadiffuser/cli/recommend_command.py +205 -0
ollamadiffuser/cli/registry_commands.py +197 -0
ollamadiffuser/core/config/model_registry.py +562 -11
ollamadiffuser/core/config/settings.py +24 -2
ollamadiffuser/core/inference/__init__.py +5 -0
ollamadiffuser/core/inference/base.py +182 -0
ollamadiffuser/core/inference/engine.py +204 -1405
ollamadiffuser/core/inference/strategies/__init__.py +1 -0
ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
ollamadiffuser/mcp/__init__.py +0 -0
ollamadiffuser/mcp/server.py +184 -0
ollamadiffuser/ui/templates/index.html +62 -1
ollamadiffuser/ui/web.py +116 -54
{ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/METADATA +337 -108
ollamadiffuser-2.0.0.dist-info/RECORD +61 -0
{ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/WHEEL +1 -1
{ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/entry_points.txt +1 -0
ollamadiffuser/core/models/registry.py +0 -384
ollamadiffuser/ui/samples/.DS_Store +0 -0
ollamadiffuser-1.2.2.dist-info/RECORD +0 -45
{ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/licenses/LICENSE +0 -0
{ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/top_level.txt +0 -0

ollamadiffuser/core/inference/engine.py CHANGED Viewed

@@ -1,1427 +1,226 @@
-import os
-import logging
-import torch
-import numpy as np
-from diffusers import (
-    StableDiffusionPipeline,
-    StableDiffusionXLPipeline,
-    StableDiffusion3Pipeline,
-    FluxPipeline,
-    StableDiffusionControlNetPipeline,
-    StableDiffusionXLControlNetPipeline,
-    ControlNetModel,
-    AnimateDiffPipeline,
-    MotionAdapter
-)
-# Try to import HiDreamImagePipeline if available
-try:
-    from diffusers import HiDreamImagePipeline
-    HIDREAM_AVAILABLE = True
-except ImportError:
-    HIDREAM_AVAILABLE = False
-    logger = logging.getLogger(__name__)
-    logger.warning("HiDreamImagePipeline not available. Install latest diffusers from source for HiDream support.")
+"""
+Inference Engine - Facade that delegates to model-specific strategies.
-# Import GGUF support
-try:
-    from ..models.gguf_loader import gguf_loader, GGUF_AVAILABLE
-    logger = logging.getLogger(__name__)
-    if GGUF_AVAILABLE:
-        logger.info("GGUF support available for quantized model inference")
-    else:
-        logger.warning("GGUF support not available. Install with: pip install llama-cpp-python gguf")
-except ImportError:
-    GGUF_AVAILABLE = False
-    logger = logging.getLogger(__name__)
-    logger.warning("GGUF loader module not found")
+This replaces the former 1400+ line god class with a clean strategy pattern.
+Each model type (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF)
+has its own strategy class that handles loading and generation.
+"""
+import logging
+from typing import Any, Dict, Optional, Union
 from PIL import Image
-from typing import Optional, Dict, Any, Union
-from pathlib import Path
-from ..config.settings import ModelConfig
-from ..utils.controlnet_preprocessors import controlnet_preprocessor
-# Global safety checker disabling
-os.environ["DISABLE_NSFW_CHECKER"] = "1"
-os.environ["DIFFUSERS_DISABLE_SAFETY_CHECKER"] = "1"
+from ..config.settings import ModelConfig
+from .base import InferenceStrategy
 logger = logging.getLogger(__name__)
+def _get_strategy(model_type: str) -> InferenceStrategy:
+    """Create the appropriate strategy for a model type."""
+    if model_type == "sd15":
+        from .strategies.sd15_strategy import SD15Strategy
+        return SD15Strategy()
+    elif model_type == "sdxl":
+        from .strategies.sdxl_strategy import SDXLStrategy
+        return SDXLStrategy()
+    elif model_type == "flux":
+        from .strategies.flux_strategy import FluxStrategy
+        return FluxStrategy()
+    elif model_type == "sd3":
+        from .strategies.sd3_strategy import SD3Strategy
+        return SD3Strategy()
+    elif model_type in ("controlnet_sd15", "controlnet_sdxl"):
+        from .strategies.controlnet_strategy import ControlNetStrategy
+        return ControlNetStrategy()
+    elif model_type == "video":
+        from .strategies.video_strategy import VideoStrategy
+        return VideoStrategy()
+    elif model_type == "hidream":
+        from .strategies.hidream_strategy import HiDreamStrategy
+        return HiDreamStrategy()
+    elif model_type == "gguf":
+        from .strategies.gguf_strategy import GGUFStrategy
+        return GGUFStrategy()
+    elif model_type == "generic":
+        from .strategies.generic_strategy import GenericPipelineStrategy
+        return GenericPipelineStrategy()
+    else:
+        raise ValueError(f"Unsupported model type: {model_type}")
+def _detect_device() -> str:
+    """Automatically detect the best available device."""
+    import torch
+    if torch.cuda.is_available():
+        device = "cuda"
+        logger.debug(f"CUDA device count: {torch.cuda.device_count()}")
+    elif torch.backends.mps.is_available():
+        device = "mps"
+    else:
+        device = "cpu"
+    logger.info(f"Using device: {device}")
+    if device == "cpu":
+        logger.warning("Using CPU - inference will be slower")
+    return device
 class InferenceEngine:
-    """Inference engine responsible for actual image generation"""
+    """
+    Facade engine that delegates to model-type-specific strategies.
+    This class maintains backward compatibility with the previous monolithic
+    InferenceEngine API while cleanly separating concerns into per-model strategies.
+    """
     def __init__(self):
-        self.pipeline = None
+        self._strategy: Optional[InferenceStrategy] = None
         self.model_config: Optional[ModelConfig] = None
-        self.device = None
-        self.tokenizer = None
-        self.max_token_limit = 77
-        self.current_lora = None  # Track current LoRA state
-        self.controlnet = None  # Track ControlNet model
-        self.is_controlnet_pipeline = False  # Track if current pipeline is ControlNet
-    def _get_device(self) -> str:
-        """Automatically detect available device"""
-        # Debug device availability
-        logger.debug(f"CUDA available: {torch.cuda.is_available()}")
-        logger.debug(f"MPS available: {torch.backends.mps.is_available()}")
-        # Determine device
-        if torch.cuda.is_available():
-            device = "cuda"
-            logger.debug(f"CUDA device count: {torch.cuda.device_count()}")
-        elif torch.backends.mps.is_available():
-            device = "mps"  # Apple Silicon GPU
-        else:
-            device = "cpu"
-        logger.info(f"Using device: {device}")
-        if device == "cpu":
-            logger.warning("⚠️  Using CPU - this will be slower for large models")
-        return device
-    def _get_pipeline_class(self, model_type: str):
-        """Get corresponding pipeline class based on model type"""
-        pipeline_map = {
-            "sd15": StableDiffusionPipeline,
-            "sdxl": StableDiffusionXLPipeline,
-            "sd3": StableDiffusion3Pipeline,
-            "flux": FluxPipeline,
-            "gguf": "gguf_special",  # Special marker for GGUF models
-            "controlnet_sd15": StableDiffusionControlNetPipeline,
-            "controlnet_sdxl": StableDiffusionXLControlNetPipeline,
-            "video": AnimateDiffPipeline,
-        }
-        # Add HiDream support if available
-        if HIDREAM_AVAILABLE:
-            pipeline_map["hidream"] = HiDreamImagePipeline
-        return pipeline_map.get(model_type)
+        self.device: Optional[str] = None
+    # -- Backward-compatible properties --
+    @property
+    def pipeline(self):
+        """Access the underlying pipeline (backward compat)."""
+        return self._strategy.pipeline if self._strategy else None
+    @property
+    def current_lora(self):
+        return self._strategy.current_lora if self._strategy else None
+    @property
+    def is_controlnet_pipeline(self) -> bool:
+        return getattr(self._strategy, "is_controlnet_pipeline", False)
+    # -- Core API --
     def load_model(self, model_config: ModelConfig) -> bool:
-        """Load model"""
+        """Load a model using the appropriate strategy."""
         try:
-            # Validate model configuration
-            if not model_config:
-                logger.error("Model configuration is None")
-                return False
-            if not model_config.path:
-                logger.error(f"Model path is None for model: {model_config.name}")
+            if not model_config or not model_config.path:
+                logger.error("Invalid model configuration")
                 return False
-            model_path = Path(model_config.path)
-            if not model_path.exists():
-                logger.error(f"Model path does not exist: {model_config.path}")
-                return False
-            logger.info(f"Loading model from path: {model_config.path}")
-            self.device = self._get_device()
-            logger.info(f"Using device: {self.device}")
-            # Get corresponding pipeline class
-            pipeline_class = self._get_pipeline_class(model_config.model_type)
-            if not pipeline_class:
-                logger.error(f"Unsupported model type: {model_config.model_type}")
+            # Only check existence for local paths, not HuggingFace Hub IDs
+            # Hub IDs look like "org/model-name", local paths start with / . or ~
+            from pathlib import Path
+            model_path = model_config.path
+            is_local_path = model_path.startswith(("/", ".", "~")) or (len(model_path) > 1 and model_path[1] == ":")
+            if is_local_path and not Path(model_path).expanduser().exists():
+                logger.error(f"Model path does not exist: {model_path}")
                 return False
-            # Handle GGUF models specially
-            if model_config.model_type == "gguf" or (model_config.variant and "gguf" in model_config.variant.lower()):
-                if not GGUF_AVAILABLE:
-                    logger.error("GGUF support not available. Install with: pip install llama-cpp-python gguf")
-                    return False
-                logger.info(f"Loading GGUF model: {model_config.name} (variant: {model_config.variant})")
-                # Use GGUF loader instead of regular pipeline
-                model_config_dict = {
-                    'name': model_config.name,
-                    'path': model_config.path,
-                    'variant': model_config.variant,
-                    'model_type': model_config.model_type,
-                    'parameters': model_config.parameters
-                }
-                if gguf_loader.load_model(model_config_dict):
-                    # Set pipeline to None since we're using GGUF loader
-                    self.pipeline = None
-                    self.model_config = model_config
-                    self.device = self._get_device()
-                    logger.info(f"GGUF model {model_config.name} loaded successfully")
-                    return True
-                else:
-                    logger.error(f"Failed to load GGUF model: {model_config.name}")
-                    return False
-            # Check if this is a ControlNet model
-            self.is_controlnet_pipeline = model_config.model_type.startswith("controlnet_")
-            # Handle ControlNet models
-            if self.is_controlnet_pipeline:
-                return self._load_controlnet_model(model_config, pipeline_class, {})
-            # Set loading parameters
-            load_kwargs = {}
-            if model_config.variant == "fp16":
-                load_kwargs["torch_dtype"] = torch.float16
-                load_kwargs["variant"] = "fp16"
-            elif model_config.variant == "bf16":
-                load_kwargs["torch_dtype"] = torch.bfloat16
-            # Load pipeline
-            logger.info(f"Loading model: {model_config.name}")
-            # Special handling for FLUX models
-            if model_config.model_type == "flux":
-                # FLUX models work best with bfloat16, but use float32 on CPU or float16 on MPS
-                if self.device == "cpu":
-                    load_kwargs["torch_dtype"] = torch.float32
-                    logger.info("Using float32 for FLUX model on CPU")
-                    logger.warning("⚠️  FLUX.1-dev is a 12B parameter model. CPU inference will be very slow!")
-                    logger.warning("⚠️  For better performance, consider using a GPU with at least 12GB VRAM")
-                else:
-                    load_kwargs["torch_dtype"] = torch.bfloat16
-                    load_kwargs["use_safetensors"] = True
-                    logger.info("Using bfloat16 for FLUX model")
-            # Special handling for Video (AnimateDiff) models
-            elif model_config.model_type == "video":
-                # AnimateDiff requires motion adapter
-                logger.info("Loading AnimateDiff (video) model")
-                motion_adapter_path = getattr(model_config, 'motion_adapter_path', None)
-                if not motion_adapter_path:
-                    # Use default motion adapter if not specified
-                    motion_adapter_path = "guoyww/animatediff-motion-adapter-v1-5-2"
-                    logger.info(f"Using default motion adapter: {motion_adapter_path}")
-                try:
-                    # Load motion adapter
-                    motion_adapter = MotionAdapter.from_pretrained(
-                        motion_adapter_path,
-                        torch_dtype=load_kwargs.get("torch_dtype", torch.float16)
-                    )
-                    load_kwargs["motion_adapter"] = motion_adapter
-                    logger.info(f"Motion adapter loaded from: {motion_adapter_path}")
-                except Exception as e:
-                    logger.error(f"Failed to load motion adapter: {e}")
-                    return False
-                # Disable safety checker for AnimateDiff
-                load_kwargs["safety_checker"] = None
-                load_kwargs["requires_safety_checker"] = False
-                load_kwargs["feature_extractor"] = None
-                logger.info("Safety checker disabled for AnimateDiff models")
-            # Special handling for HiDream models
-            elif model_config.model_type == "hidream":
-                if not HIDREAM_AVAILABLE:
-                    logger.error("HiDream models require diffusers to be installed from source. Please install with: pip install git+https://github.com/huggingface/diffusers.git")
-                    return False
-                logger.info("Loading HiDream model")
-                # HiDream models work best with bfloat16
-                if self.device == "cpu":
-                    load_kwargs["torch_dtype"] = torch.float32
-                    logger.info("Using float32 for HiDream model on CPU")
-                    logger.warning("⚠️  HiDream models are large. CPU inference will be slow!")
-                else:
-                    load_kwargs["torch_dtype"] = torch.bfloat16
-                    logger.info("Using bfloat16 for HiDream model")
-                # Disable safety checker for HiDream models
-                load_kwargs["safety_checker"] = None
-                load_kwargs["requires_safety_checker"] = False
-                load_kwargs["feature_extractor"] = None
-                logger.info("Safety checker disabled for HiDream models")
-            # Disable safety checker for SD 1.5 to prevent false NSFW detections
-            if model_config.model_type == "sd15" or model_config.model_type == "sdxl":
-                load_kwargs["safety_checker"] = None
-                load_kwargs["requires_safety_checker"] = False
-                load_kwargs["feature_extractor"] = None
-                # Use float32 for better numerical stability on SD 1.5
-                if model_config.variant == "fp16" and (self.device == "cpu" or self.device == "mps"):
-                    load_kwargs["torch_dtype"] = torch.float32
-                    load_kwargs.pop("variant", None)
-                    logger.info(f"Using float32 for {self.device} inference to improve stability")
-                elif self.device == "mps":
-                    # Force float32 on MPS for SD 1.5 to avoid NaN issues
-                    load_kwargs["torch_dtype"] = torch.float32
-                    logger.info("Using float32 for MPS inference to avoid NaN issues with SD 1.5")
-                logger.info("Safety checker disabled for SD 1.5 to prevent false NSFW detections")
-            # Disable safety checker for FLUX models to prevent false NSFW detections
-            if model_config.model_type == "flux":
-                load_kwargs["safety_checker"] = None
-                load_kwargs["requires_safety_checker"] = False
-                load_kwargs["feature_extractor"] = None
-                logger.info("Safety checker disabled for FLUX models to prevent false NSFW detections")
-            # Load pipeline
-            self.pipeline = pipeline_class.from_pretrained(
-                model_config.path,
-                **load_kwargs
-            )
-            # Move to device with proper error handling
-            try:
-                self.pipeline = self.pipeline.to(self.device)
-                logger.info(f"Pipeline moved to {self.device}")
-            except Exception as e:
-                logger.warning(f"Failed to move pipeline to {self.device}: {e}")
-                if self.device != "cpu":
-                    logger.info("Falling back to CPU")
-                    self.device = "cpu"
-                    self.pipeline = self.pipeline.to("cpu")
-            # Enable memory optimizations
-            if hasattr(self.pipeline, 'enable_attention_slicing'):
-                self.pipeline.enable_attention_slicing()
-                logger.info("Enabled attention slicing for memory optimization")
-            # Special optimizations for FLUX models
-            if model_config.model_type == "flux":
-                if self.device == "cuda":
-                    # CUDA-specific optimizations
-                    if hasattr(self.pipeline, 'enable_model_cpu_offload'):
-                        self.pipeline.enable_model_cpu_offload()
-                        logger.info("Enabled CPU offloading for FLUX model")
-                elif self.device == "cpu":
-                    # CPU-specific optimizations
-                    logger.info("Applying CPU-specific optimizations for FLUX model")
-                    # Enable memory efficient attention if available
-                    if hasattr(self.pipeline, 'enable_xformers_memory_efficient_attention'):
-                        try:
-                            self.pipeline.enable_xformers_memory_efficient_attention()
-                            logger.info("Enabled xformers memory efficient attention")
-                        except Exception as e:
-                            logger.debug(f"xformers not available: {e}")
-                    # Set low memory mode
-                    if hasattr(self.pipeline, 'enable_sequential_cpu_offload'):
-                        try:
-                            self.pipeline.enable_sequential_cpu_offload()
-                            logger.info("Enabled sequential CPU offload for memory efficiency")
-                        except Exception as e:
-                            logger.debug(f"Sequential CPU offload not available: {e}")
-            # Special optimizations for Video (AnimateDiff) models
-            elif model_config.model_type == "video":
-                logger.info("Applying optimizations for AnimateDiff video model")
-                # Enable VAE slicing for video models to reduce memory usage
-                if hasattr(self.pipeline, 'enable_vae_slicing'):
-                    self.pipeline.enable_vae_slicing()
-                    logger.info("Enabled VAE slicing for video model")
-                # Enable model CPU offload for better memory management
-                if self.device == "cuda" and hasattr(self.pipeline, 'enable_model_cpu_offload'):
-                    self.pipeline.enable_model_cpu_offload()
-                    logger.info("Enabled model CPU offload for video model")
-                # Set scheduler to work well with AnimateDiff
-                if hasattr(self.pipeline, 'scheduler'):
-                    from diffusers import DDIMScheduler
-                    try:
-                        self.pipeline.scheduler = DDIMScheduler.from_config(
-                            self.pipeline.scheduler.config,
-                            clip_sample=False,
-                            timestep_spacing="linspace",
-                            beta_schedule="linear",
-                            steps_offset=1,
-                        )
-                        logger.info("Configured DDIM scheduler for AnimateDiff")
-                    except Exception as e:
-                        logger.debug(f"Could not configure DDIM scheduler: {e}")
-            # Special optimizations for HiDream models
-            elif model_config.model_type == "hidream":
-                logger.info("Applying optimizations for HiDream model")
-                # Enable VAE slicing and tiling for HiDream models
-                if hasattr(self.pipeline, 'enable_vae_slicing'):
-                    self.pipeline.enable_vae_slicing()
-                    logger.info("Enabled VAE slicing for HiDream model")
-                if hasattr(self.pipeline, 'enable_vae_tiling'):
-                    self.pipeline.enable_vae_tiling()
-                    logger.info("Enabled VAE tiling for HiDream model")
-                # Enable model CPU offload for better memory management
-                if self.device == "cuda" and hasattr(self.pipeline, 'enable_model_cpu_offload'):
-                    self.pipeline.enable_model_cpu_offload()
-                    logger.info("Enabled model CPU offload for HiDream model")
-                elif self.device == "cpu":
-                    # CPU-specific optimizations for HiDream
-                    if hasattr(self.pipeline, 'enable_sequential_cpu_offload'):
-                        try:
-                            self.pipeline.enable_sequential_cpu_offload()
-                            logger.info("Enabled sequential CPU offload for HiDream model")
-                        except Exception as e:
-                            logger.debug(f"Sequential CPU offload not available: {e}")
-            # Additional safety checker disabling for SD 1.5 (in case the above didn't work)
-            if model_config.model_type == "sd15" or model_config.model_type == "sdxl":
-                if hasattr(self.pipeline, 'safety_checker'):
-                    self.pipeline.safety_checker = None
-                if hasattr(self.pipeline, 'feature_extractor'):
-                    self.pipeline.feature_extractor = None
-                if hasattr(self.pipeline, 'requires_safety_checker'):
-                    self.pipeline.requires_safety_checker = False
-                # Monkey patch the safety checker call to always return False
-                def dummy_safety_check(self, images, clip_input):
-                    return images, [False] * len(images)
-                # Apply monkey patch if safety checker exists
-                if hasattr(self.pipeline, '_safety_check'):
-                    self.pipeline._safety_check = dummy_safety_check.__get__(self.pipeline, type(self.pipeline))
-                # Also monkey patch the run_safety_checker method if it exists
-                if hasattr(self.pipeline, 'run_safety_checker'):
-                    def dummy_run_safety_checker(images, device, dtype):
-                        return images, [False] * len(images)
-                    self.pipeline.run_safety_checker = dummy_run_safety_checker
-                # Monkey patch the check_inputs method to prevent safety checker validation
-                if hasattr(self.pipeline, 'check_inputs'):
-                    original_check_inputs = self.pipeline.check_inputs
-                    def patched_check_inputs(*args, **kwargs):
-                        # Call original but ignore safety checker requirements
-                        try:
-                            return original_check_inputs(*args, **kwargs)
-                        except Exception as e:
-                            if "safety_checker" in str(e).lower():
-                                logger.debug(f"Ignoring safety checker validation error: {e}")
-                                return
-                            raise e
-                    self.pipeline.check_inputs = patched_check_inputs
-                logger.info("Additional safety checker components disabled with monkey patch")
-            # Load LoRA and other components
-            if model_config.components and "lora" in model_config.components:
-                self._load_lora(model_config)
-            # Apply optimizations
-            self._apply_optimizations()
-            # Set tokenizer
-            if hasattr(self.pipeline, 'tokenizer'):
-                self.tokenizer = self.pipeline.tokenizer
-            self.model_config = model_config
-            logger.info(f"Model {model_config.name} loaded successfully")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to load model: {e}")
+            # Detect GGUF models by variant
+            model_type = model_config.model_type
+            if model_config.variant and "gguf" in model_config.variant.lower():
+                model_type = "gguf"
+            self.device = _detect_device()
+            self._strategy = _get_strategy(model_type)
+            if self._strategy.load(model_config, self.device):
+                self.model_config = model_config
+                # Update device in case strategy fell back to CPU
+                self.device = self._strategy.device
+                logger.info(f"Model {model_config.name} loaded successfully via {type(self._strategy).__name__}")
+                return True
+            self._strategy = None
             return False
-    def _load_controlnet_model(self, model_config: ModelConfig, pipeline_class, load_kwargs: dict) -> bool:
-        """Load ControlNet model with base model"""
-        try:
-            # Get base model info
-            base_model_name = getattr(model_config, 'base_model', None)
-            if not base_model_name:
-                # Try to extract from model registry
-                from ..models.manager import model_manager
-                model_info = model_manager.get_model_info(model_config.name)
-                if model_info and 'base_model' in model_info:
-                    base_model_name = model_info['base_model']
-                else:
-                    logger.error(f"No base model specified for ControlNet model: {model_config.name}")
-                    return False
-            # Check if base model is installed
-            from ..models.manager import model_manager
-            if not model_manager.is_model_installed(base_model_name):
-                logger.error(f"Base model '{base_model_name}' not installed. Please install it first.")
-                return False
-            # Get base model config
-            from ..config.settings import settings
-            base_model_config = settings.models[base_model_name]
-            # Set loading parameters based on variant
-            if model_config.variant == "fp16":
-                load_kwargs["torch_dtype"] = torch.float16
-                load_kwargs["variant"] = "fp16"
-            elif model_config.variant == "bf16":
-                load_kwargs["torch_dtype"] = torch.bfloat16
-            # Handle device-specific optimizations
-            if self.device == "cpu" or self.device == "mps":
-                load_kwargs["torch_dtype"] = torch.float32
-                load_kwargs.pop("variant", None)
-                logger.info(f"Using float32 for {self.device} inference to improve stability")
-            # Disable safety checker
-            load_kwargs["safety_checker"] = None
-            load_kwargs["requires_safety_checker"] = False
-            load_kwargs["feature_extractor"] = None
-            # Load ControlNet model
-            logger.info(f"Loading ControlNet model from: {model_config.path}")
-            self.controlnet = ControlNetModel.from_pretrained(
-                model_config.path,
-                torch_dtype=load_kwargs.get("torch_dtype", torch.float32)
-            )
-            # Load pipeline with ControlNet and base model
-            logger.info(f"Loading ControlNet pipeline with base model: {base_model_name}")
-            self.pipeline = pipeline_class.from_pretrained(
-                base_model_config.path,
-                controlnet=self.controlnet,
-                **load_kwargs
-            )
-            # Move to device
-            try:
-                self.pipeline = self.pipeline.to(self.device)
-                self.controlnet = self.controlnet.to(self.device)
-                logger.info(f"ControlNet pipeline moved to {self.device}")
-            except Exception as e:
-                logger.warning(f"Failed to move ControlNet pipeline to {self.device}: {e}")
-                if self.device != "cpu":
-                    logger.info("Falling back to CPU")
-                    self.device = "cpu"
-                    self.pipeline = self.pipeline.to("cpu")
-                    self.controlnet = self.controlnet.to("cpu")
-            # Enable memory optimizations
-            if hasattr(self.pipeline, 'enable_attention_slicing'):
-                self.pipeline.enable_attention_slicing()
-                logger.info("Enabled attention slicing for ControlNet pipeline")
-            # Apply additional optimizations
-            self._apply_optimizations()
-            # Set tokenizer
-            if hasattr(self.pipeline, 'tokenizer'):
-                self.tokenizer = self.pipeline.tokenizer
-            self.model_config = model_config
-            logger.info(f"ControlNet model {model_config.name} loaded successfully")
-            return True
         except Exception as e:
-            logger.error(f"Failed to load ControlNet model: {e}")
+            logger.error(f"Failed to load model: {e}")
+            self._strategy = None
             return False
-    def _load_lora(self, model_config: ModelConfig):
-        """Load LoRA weights"""
-        try:
-            lora_config = model_config.components["lora"]
-            # Check if it's a Hugging Face Hub model
-            if "repo_id" in lora_config:
-                # Load from Hugging Face Hub
-                repo_id = lora_config["repo_id"]
-                weight_name = lora_config.get("weight_name", "pytorch_lora_weights.safetensors")
-                logger.info(f"Loading LoRA from Hugging Face Hub: {repo_id}")
-                self.pipeline.load_lora_weights(repo_id, weight_name=weight_name)
-                # Set LoRA scale if specified
-                if "scale" in lora_config:
-                    scale = lora_config["scale"]
-                    if hasattr(self.pipeline, 'set_adapters'):
-                        self.pipeline.set_adapters(["default"], adapter_weights=[scale])
-                        logger.info(f"Set LoRA scale to {scale}")
-                logger.info(f"LoRA weights loaded successfully from {repo_id}")
-            elif "filename" in lora_config:
-                # Load from local file
-                components_path = Path(model_config.path) / "components" / "lora"
-                lora_path = components_path / lora_config["filename"]
-                if lora_path.exists():
-                    self.pipeline.load_lora_weights(str(components_path), weight_name=lora_config["filename"])
-                    self.pipeline.fuse_lora()
-                    logger.info("LoRA weights loaded successfully from local file")
-            else:
-                # Load from directory
-                components_path = Path(model_config.path) / "components" / "lora"
-                if components_path.exists():
-                    self.pipeline.load_lora_weights(str(components_path))
-                    self.pipeline.fuse_lora()
-                    logger.info("LoRA weights loaded successfully from directory")
-        except Exception as e:
-            logger.warning(f"Failed to load LoRA weights: {e}")
-    def _apply_optimizations(self):
-        """Apply performance optimizations"""
-        try:
-            # Enable torch compile for faster inference
-            if hasattr(torch, 'compile') and self.device != "mps":  # MPS doesn't support torch.compile yet
-                if hasattr(self.pipeline, 'unet'):
-                    try:
-                        self.pipeline.unet = torch.compile(
-                            self.pipeline.unet,
-                            mode="reduce-overhead",
-                            fullgraph=True
-                        )
-                        logger.info("torch.compile optimization enabled")
-                    except Exception as e:
-                        logger.debug(f"torch.compile failed: {e}")
-            elif self.device == "mps":
-                logger.debug("Skipping torch.compile on MPS (not supported yet)")
-            elif self.device == "cpu":
-                logger.debug("Skipping torch.compile on CPU for stability")
-        except Exception as e:
-            logger.warning(f"Failed to apply optimization settings: {e}")
-    def truncate_prompt(self, prompt: str) -> str:
-        """Truncate prompt to fit CLIP token limit"""
-        if not prompt or not self.tokenizer:
-            return prompt
-        # Encode prompt
-        tokens = self.tokenizer.encode(prompt)
-        # Check if truncation is needed
-        if len(tokens) <= self.max_token_limit:
-            return prompt
-        # Truncate tokens and decode back to text
-        truncated_tokens = tokens[:self.max_token_limit]
-        truncated_prompt = self.tokenizer.decode(truncated_tokens)
-        logger.warning(f"Prompt truncated: {len(tokens)} -> {len(truncated_tokens)} tokens")
-        return truncated_prompt
-    def generate_image(self,
-                      prompt: str,
-                      negative_prompt: str = "low quality, bad anatomy, worst quality, low resolution",
-                      num_inference_steps: Optional[int] = None,
-                      guidance_scale: Optional[float] = None,
-                      width: int = 1024,
-                      height: int = 1024,
-                      control_image: Optional[Union[Image.Image, str]] = None,
-                      controlnet_conditioning_scale: float = 1.0,
-                      control_guidance_start: float = 0.0,
-                      control_guidance_end: float = 1.0,
-                      **kwargs) -> Image.Image:
-        """Generate image"""
-        # Check if we're using a GGUF model
-        is_gguf_model = (
-            self.model_config and
-            (self.model_config.model_type == "gguf" or
-             (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
+    def generate_image(
+        self,
+        prompt: str,
+        negative_prompt: str = "low quality, bad anatomy, worst quality, low resolution",
+        num_inference_steps: Optional[int] = None,
+        guidance_scale: Optional[float] = None,
+        width: int = 1024,
+        height: int = 1024,
+        seed: Optional[int] = None,
+        control_image: Optional[Union[Image.Image, str]] = None,
+        controlnet_conditioning_scale: float = 1.0,
+        control_guidance_start: float = 0.0,
+        control_guidance_end: float = 1.0,
+        image: Optional[Image.Image] = None,
+        mask_image: Optional[Image.Image] = None,
+        strength: float = 0.75,
+        **kwargs,
+    ) -> Image.Image:
+        """Generate an image using the current strategy."""
+        if not self._strategy:
+            raise RuntimeError("No model loaded")
+        # Build kwargs to pass through
+        gen_kwargs = dict(kwargs)
+        # Pass img2img / inpainting params
+        if image is not None:
+            gen_kwargs["image"] = image
+        if mask_image is not None:
+            gen_kwargs["mask_image"] = mask_image
+        if image is not None or mask_image is not None:
+            gen_kwargs["strength"] = strength
+        # Pass ControlNet params
+        if control_image is not None:
+            gen_kwargs["control_image"] = control_image
+            gen_kwargs["controlnet_conditioning_scale"] = controlnet_conditioning_scale
+            gen_kwargs["control_guidance_start"] = control_guidance_start
+            gen_kwargs["control_guidance_end"] = control_guidance_end
+        return self._strategy.generate(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+            width=width,
+            height=height,
+            seed=seed,
+            **gen_kwargs,
         )
-        if is_gguf_model:
-            if not GGUF_AVAILABLE:
-                raise RuntimeError("GGUF support not available")
-            if not gguf_loader.is_loaded():
-                raise RuntimeError("GGUF model not loaded")
-            logger.info(f"Generating image using GGUF model: {prompt[:50]}...")
-            # Use model default parameters for GGUF
-            if num_inference_steps is None:
-                num_inference_steps = self.model_config.parameters.get("num_inference_steps", 20)
-            if guidance_scale is None:
-                guidance_scale = self.model_config.parameters.get("guidance_scale", 7.5)
-            # Generate using GGUF loader
-            generation_kwargs = {
-                "prompt": prompt,
-                "negative_prompt": negative_prompt,
-                "num_inference_steps": num_inference_steps,
-                "guidance_scale": guidance_scale,
-                "width": width,
-                "height": height,
-                **kwargs
-            }
-            try:
-                image = gguf_loader.generate_image(**generation_kwargs)
-                if image is None:
-                    logger.warning("GGUF generation returned None, creating error image")
-                    return self._create_error_image("GGUF generation failed or not yet implemented", prompt)
-                return image
-            except Exception as e:
-                logger.error(f"GGUF generation failed: {e}")
-                return self._create_error_image(str(e), prompt)
-        # Continue with regular pipeline generation for non-GGUF models
-        if not self.pipeline:
-            raise RuntimeError("Model not loaded")
-        # Handle ControlNet-specific logic
-        if self.is_controlnet_pipeline:
-            if control_image is None:
-                raise ValueError("ControlNet model requires a control image")
-            # Process control image
-            control_image = self._prepare_control_image(control_image, width, height)
-        # Use model default parameters
-        if num_inference_steps is None:
-            num_inference_steps = self.model_config.parameters.get("num_inference_steps", 28)
-        if guidance_scale is None:
-            guidance_scale = self.model_config.parameters.get("guidance_scale", 3.5)
-        # Truncate prompts
-        truncated_prompt = self.truncate_prompt(prompt)
-        truncated_negative_prompt = self.truncate_prompt(negative_prompt)
-        try:
-            logger.info(f"Starting image generation: {truncated_prompt[:50]}...")
-            # Generation parameters
-            generation_kwargs = {
-                "prompt": truncated_prompt,
-                "negative_prompt": truncated_negative_prompt,
-                "num_inference_steps": num_inference_steps,
-                "guidance_scale": guidance_scale,
-                **kwargs
-            }
-            # Add ControlNet parameters if this is a ControlNet pipeline
-            if self.is_controlnet_pipeline and control_image is not None:
-                generation_kwargs.update({
-                    "image": control_image,
-                    "controlnet_conditioning_scale": controlnet_conditioning_scale,
-                    "control_guidance_start": control_guidance_start,
-                    "control_guidance_end": control_guidance_end
-                })
-                logger.info(f"ControlNet parameters: conditioning_scale={controlnet_conditioning_scale}, "
-                           f"guidance_start={control_guidance_start}, guidance_end={control_guidance_end}")
-            # Add size parameters based on model type
-            if self.model_config.model_type in ["sdxl", "sd3", "flux", "controlnet_sdxl"]:
-                generation_kwargs.update({
-                    "width": width,
-                    "height": height
-                })
-                # FLUX models have special parameters
-                if self.model_config.model_type == "flux":
-                    # Add max_sequence_length for FLUX
-                    max_seq_len = self.model_config.parameters.get("max_sequence_length", 512)
-                    generation_kwargs["max_sequence_length"] = max_seq_len
-                    logger.info(f"Using max_sequence_length={max_seq_len} for FLUX model")
-                    # Special handling for FLUX.1-schnell (distilled model)
-                    if "schnell" in self.model_config.name.lower():
-                        # FLUX.1-schnell doesn't use guidance
-                        if guidance_scale != 0.0:
-                            logger.info("FLUX.1-schnell detected - setting guidance_scale to 0.0 (distilled model doesn't use guidance)")
-                            generation_kwargs["guidance_scale"] = 0.0
-                        # Use fewer steps for schnell (it's designed for 1-4 steps)
-                        if num_inference_steps > 4:
-                            logger.info(f"FLUX.1-schnell detected - reducing steps from {num_inference_steps} to 4 for optimal performance")
-                            generation_kwargs["num_inference_steps"] = 4
-                        logger.info("🚀 Using FLUX.1-schnell - fast distilled model optimized for 4-step generation")
-                    # Device-specific adjustments for FLUX
-                    if self.device == "cpu":
-                        # Reduce steps for faster CPU inference
-                        if "schnell" not in self.model_config.name.lower() and num_inference_steps > 20:
-                            num_inference_steps = 20
-                            generation_kwargs["num_inference_steps"] = num_inference_steps
-                            logger.info(f"Reduced inference steps to {num_inference_steps} for CPU performance")
-                        # Lower guidance scale for CPU stability (except for schnell which uses 0.0)
-                        if "schnell" not in self.model_config.name.lower() and guidance_scale > 5.0:
-                            guidance_scale = 5.0
-                            generation_kwargs["guidance_scale"] = guidance_scale
-                            logger.info(f"Reduced guidance scale to {guidance_scale} for CPU stability")
-                        logger.warning("🐌 CPU inference detected - this may take several minutes per image")
-                    elif self.device == "mps":
-                        # MPS-specific adjustments for stability (except for schnell which uses 0.0)
-                        if "schnell" not in self.model_config.name.lower() and guidance_scale > 7.0:
-                            guidance_scale = 7.0
-                            generation_kwargs["guidance_scale"] = guidance_scale
-                            logger.info(f"Reduced guidance scale to {guidance_scale} for MPS stability")
-                        logger.info("🍎 MPS inference - should be faster than CPU but slower than CUDA")
-            elif self.model_config.model_type in ["sd15", "controlnet_sd15"]:
-                # SD 1.5 and ControlNet SD 1.5 work best with 512x512, adjust if different sizes requested
-                if width != 1024 or height != 1024:
-                    generation_kwargs.update({
-                        "width": width,
-                        "height": height
-                    })
-                else:
-                    # Use optimal size for SD 1.5
-                    generation_kwargs.update({
-                        "width": 512,
-                        "height": 512
-                    })
-            # Special handling for Video (AnimateDiff) models
-            elif self.model_config.model_type == "video":
-                logger.info("Configuring AnimateDiff video generation parameters")
-                # Video-specific parameters
-                num_frames = kwargs.get("num_frames", 16)
-                generation_kwargs["num_frames"] = num_frames
-                # AnimateDiff works best with specific resolutions
-                # Use 512x512 for better compatibility with most motion adapters
-                generation_kwargs.update({
-                    "width": 512,
-                    "height": 512
-                })
-                logger.info(f"Using 512x512 resolution for AnimateDiff compatibility")
-                # Set optimal parameters for video generation
-                if guidance_scale > 7.5:
-                    generation_kwargs["guidance_scale"] = 7.5
-                    logger.info("Reduced guidance scale to 7.5 for video stability")
-                if num_inference_steps > 25:
-                    generation_kwargs["num_inference_steps"] = 25
-                    logger.info("Reduced inference steps to 25 for video generation")
-                logger.info(f"Generating {num_frames} frames for video output")
-            # Special handling for HiDream models
-            elif self.model_config.model_type == "hidream":
-                logger.info("Configuring HiDream model parameters")
-                # HiDream models support high resolution
-                generation_kwargs.update({
-                    "width": width,
-                    "height": height
-                })
-                # HiDream models have multiple text encoders, handle if provided
-                if "prompt_2" in kwargs:
-                    generation_kwargs["prompt_2"] = self.truncate_prompt(kwargs["prompt_2"])
-                if "prompt_3" in kwargs:
-                    generation_kwargs["prompt_3"] = self.truncate_prompt(kwargs["prompt_3"])
-                if "prompt_4" in kwargs:
-                    generation_kwargs["prompt_4"] = self.truncate_prompt(kwargs["prompt_4"])
-                # Set optimal parameters for HiDream
-                max_seq_len = self.model_config.parameters.get("max_sequence_length", 128)
-                generation_kwargs["max_sequence_length"] = max_seq_len
-                # HiDream models use different guidance scale defaults
-                if guidance_scale is None or guidance_scale == 3.5:
-                    generation_kwargs["guidance_scale"] = 5.0
-                    logger.info("Using default guidance_scale=5.0 for HiDream model")
-                logger.info(f"Using max_sequence_length={max_seq_len} for HiDream model")
-            # Generate image
-            logger.info(f"Generation parameters: steps={num_inference_steps}, guidance={guidance_scale}")
-            # Add generator for reproducible results
-            if self.device == "cpu":
-                generator = torch.Generator().manual_seed(42)
-            else:
-                generator = torch.Generator(device=self.device).manual_seed(42)
-            generation_kwargs["generator"] = generator
-            # For SD 1.5, use a more conservative approach to avoid numerical issues
-            if self.model_config.model_type == "sd15":
-                # Lower guidance scale to prevent numerical instability
-                if generation_kwargs["guidance_scale"] > 7.0:
-                    generation_kwargs["guidance_scale"] = 7.0
-                    logger.info("Reduced guidance scale to 7.0 for stability")
-                # Ensure we're using float32 for better numerical stability
-                if self.device == "mps":
-                    # For Apple Silicon, use specific optimizations
-                    generation_kwargs["guidance_scale"] = min(generation_kwargs["guidance_scale"], 6.0)
-                    logger.info("Applied MPS-specific optimizations")
-            # For SD 1.5, use manual pipeline execution to completely bypass safety checker
-            if self.model_config.model_type == "sd15" and not self.is_controlnet_pipeline:
-                logger.info("Using manual pipeline execution for SD 1.5 to bypass safety checker")
-                try:
-                    # Manual pipeline execution with safety checks disabled
-                    with torch.no_grad():
-                        # Encode prompt
-                        text_inputs = self.pipeline.tokenizer(
-                            generation_kwargs["prompt"],
-                            padding="max_length",
-                            max_length=self.pipeline.tokenizer.model_max_length,
-                            truncation=True,
-                            return_tensors="pt",
-                        )
-                        text_embeddings = self.pipeline.text_encoder(text_inputs.input_ids.to(self.device))[0]
-                        # Encode negative prompt
-                        uncond_inputs = self.pipeline.tokenizer(
-                            generation_kwargs["negative_prompt"],
-                            padding="max_length",
-                            max_length=self.pipeline.tokenizer.model_max_length,
-                            truncation=True,
-                            return_tensors="pt",
-                        )
-                        uncond_embeddings = self.pipeline.text_encoder(uncond_inputs.input_ids.to(self.device))[0]
-                        # Concatenate embeddings
-                        text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
-                        # Generate latents
-                        latents = torch.randn(
-                            (1, self.pipeline.unet.config.in_channels,
-                             generation_kwargs["height"] // 8, generation_kwargs["width"] // 8),
-                            generator=generation_kwargs["generator"],
-                            device=self.device,
-                            dtype=text_embeddings.dtype,
-                        )
-                        logger.debug(f"Initial latents stats - mean: {latents.mean().item():.4f}, std: {latents.std().item():.4f}")
-                        logger.debug(f"Text embeddings stats - mean: {text_embeddings.mean().item():.4f}, std: {text_embeddings.std().item():.4f}")
-                        # Set scheduler
-                        self.pipeline.scheduler.set_timesteps(generation_kwargs["num_inference_steps"])
-                        latents = latents * self.pipeline.scheduler.init_noise_sigma
-                        logger.debug(f"Latents after noise scaling - mean: {latents.mean().item():.4f}, std: {latents.std().item():.4f}")
-                        logger.debug(f"Scheduler init_noise_sigma: {self.pipeline.scheduler.init_noise_sigma}")
-                        # Denoising loop
-                        for i, t in enumerate(self.pipeline.scheduler.timesteps):
-                            latent_model_input = torch.cat([latents] * 2)
-                            latent_model_input = self.pipeline.scheduler.scale_model_input(latent_model_input, t)
-                            # Check for NaN before UNet
-                            if torch.isnan(latent_model_input).any():
-                                logger.error(f"NaN detected in latent_model_input at step {i}")
-                                break
-                            noise_pred = self.pipeline.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample
-                            # Check for NaN after UNet
-                            if torch.isnan(noise_pred).any():
-                                logger.error(f"NaN detected in noise_pred at step {i}")
-                                break
-                            noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                            noise_pred = noise_pred_uncond + generation_kwargs["guidance_scale"] * (noise_pred_text - noise_pred_uncond)
-                            # Check for NaN after guidance
-                            if torch.isnan(noise_pred).any():
-                                logger.error(f"NaN detected after guidance at step {i}")
-                                break
-                            latents = self.pipeline.scheduler.step(noise_pred, t, latents).prev_sample
-                            # Check for NaN after scheduler step
-                            if torch.isnan(latents).any():
-                                logger.error(f"NaN detected in latents after scheduler step {i}")
-                                break
-                            if i == 0:  # Log first step for debugging
-                                logger.debug(f"Step {i}: latents mean={latents.mean().item():.4f}, std={latents.std().item():.4f}")
-                        # Decode latents
-                        latents = 1 / self.pipeline.vae.config.scaling_factor * latents
-                        # Debug latents before VAE decode
-                        logger.debug(f"Latents stats before VAE decode - mean: {latents.mean().item():.4f}, std: {latents.std().item():.4f}")
-                        logger.debug(f"Latents range: [{latents.min().item():.4f}, {latents.max().item():.4f}]")
-                        with torch.no_grad():
-                            # Ensure latents are on correct device and dtype
-                            latents = latents.to(device=self.device, dtype=self.pipeline.vae.dtype)
-                            try:
-                                image = self.pipeline.vae.decode(latents).sample
-                                logger.debug(f"VAE decode successful - image shape: {image.shape}")
-                            except Exception as e:
-                                logger.error(f"VAE decode failed: {e}")
-                                # Create a fallback image
-                                image = torch.randn_like(latents).repeat(1, 3, 8, 8) * 0.1 + 0.5
-                                logger.warning("Using fallback random image due to VAE decode failure")
-                        # Convert to PIL with proper NaN/inf handling
-                        logger.debug(f"Image stats after VAE decode - mean: {image.mean().item():.4f}, std: {image.std().item():.4f}")
-                        logger.debug(f"Image range: [{image.min().item():.4f}, {image.max().item():.4f}]")
-                        image = (image / 2 + 0.5).clamp(0, 1)
-                        logger.debug(f"Image stats after normalization - mean: {image.mean().item():.4f}, std: {image.std().item():.4f}")
-                        logger.debug(f"Image range after norm: [{image.min().item():.4f}, {image.max().item():.4f}]")
-                        # Check for NaN or infinite values before conversion
-                        if torch.isnan(image).any() or torch.isinf(image).any():
-                            logger.warning("NaN or infinite values detected in image tensor, applying selective fixes")
-                            # Only replace NaN/inf values, keep valid pixels intact
-                            nan_mask = torch.isnan(image)
-                            inf_mask = torch.isinf(image)
-                            # Replace only problematic pixels
-                            image = torch.where(nan_mask, torch.tensor(0.5, device=image.device, dtype=image.dtype), image)
-                            image = torch.where(inf_mask & (image > 0), torch.tensor(1.0, device=image.device, dtype=image.dtype), image)
-                            image = torch.where(inf_mask & (image < 0), torch.tensor(0.0, device=image.device, dtype=image.dtype), image)
-                            logger.info(f"Fixed {nan_mask.sum().item()} NaN pixels and {inf_mask.sum().item()} infinite pixels")
-                        # Final clamp to ensure valid range
-                        image = torch.clamp(image, 0, 1)
-                        image = image.cpu().permute(0, 2, 3, 1).float().numpy()
-                        # Additional validation before uint8 conversion - only fix problematic pixels
-                        if np.isnan(image).any() or np.isinf(image).any():
-                            logger.warning("NaN/inf values detected in numpy array, applying selective fixes")
-                            nan_count = np.isnan(image).sum()
-                            inf_count = np.isinf(image).sum()
-                            # Only replace problematic pixels, preserve valid ones
-                            image = np.where(np.isnan(image), 0.5, image)
-                            image = np.where(np.isinf(image) & (image > 0), 1.0, image)
-                            image = np.where(np.isinf(image) & (image < 0), 0.0, image)
-                            logger.info(f"Fixed {nan_count} NaN and {inf_count} infinite pixels in numpy array")
-                        # Ensure valid range
-                        image = np.clip(image, 0, 1)
-                        # Safe conversion to uint8
-                        image = (image * 255).astype(np.uint8)
-                        from PIL import Image as PILImage
-                        image = PILImage.fromarray(image[0])
-                        # Create a mock output object
-                        class MockOutput:
-                            def __init__(self, images):
-                                self.images = images
-                                self.nsfw_content_detected = [False] * len(images)
-                        output = MockOutput([image])
-                except Exception as e:
-                    logger.error(f"Manual pipeline execution failed: {e}")
-                    raise e
-            else:
-                # For FLUX and other models, use regular pipeline execution with safety checker disabled
-                logger.info(f"Using regular pipeline execution for {self.model_config.model_type} model")
-                # Debug: Log device and generation kwargs
-                logger.debug(f"Pipeline device: {self.device}")
-                logger.debug(f"Generator device: {generation_kwargs['generator'].device if hasattr(generation_kwargs['generator'], 'device') else 'CPU'}")
-                # Ensure all tensors are on the correct device
-                try:
-                    # For FLUX models, temporarily disable any remaining safety checker components
-                    if self.model_config.model_type == "flux":
-                        # Store original safety checker components
-                        original_safety_checker = getattr(self.pipeline, 'safety_checker', None)
-                        original_feature_extractor = getattr(self.pipeline, 'feature_extractor', None)
-                        original_requires_safety_checker = getattr(self.pipeline, 'requires_safety_checker', None)
-                        # Temporarily set to None
-                        if hasattr(self.pipeline, 'safety_checker'):
-                            self.pipeline.safety_checker = None
-                        if hasattr(self.pipeline, 'feature_extractor'):
-                            self.pipeline.feature_extractor = None
-                        if hasattr(self.pipeline, 'requires_safety_checker'):
-                            self.pipeline.requires_safety_checker = False
-                        logger.info("Temporarily disabled safety checker components for FLUX generation")
-                    output = self.pipeline(**generation_kwargs)
-                    # Restore original safety checker components for FLUX (though they should remain None)
-                    if self.model_config.model_type == "flux":
-                        if hasattr(self.pipeline, 'safety_checker'):
-                            self.pipeline.safety_checker = original_safety_checker
-                        if hasattr(self.pipeline, 'feature_extractor'):
-                            self.pipeline.feature_extractor = original_feature_extractor
-                        if hasattr(self.pipeline, 'requires_safety_checker'):
-                            self.pipeline.requires_safety_checker = original_requires_safety_checker
-                except RuntimeError as e:
-                    if "CUDA" in str(e) and self.device == "cpu":
-                        logger.error(f"CUDA error on CPU device: {e}")
-                        logger.info("Attempting to fix device mismatch...")
-                        # Remove generator and try again
-                        generation_kwargs_fixed = generation_kwargs.copy()
-                        generation_kwargs_fixed.pop("generator", None)
-                        output = self.pipeline(**generation_kwargs_fixed)
-                    else:
-                        raise e
-            # Special handling for FLUX models to bypass any remaining safety checker issues
-            if self.model_config.model_type == "flux" and hasattr(output, 'images'):
-                # Check if we got a black image and try to regenerate with different approach
-                test_image = output.images[0]
-                test_array = np.array(test_image)
-                if np.all(test_array == 0):
-                    logger.warning("FLUX model returned black image, attempting manual image processing")
-                    # Try to access the raw latents or intermediate results
-                    if hasattr(output, 'latents') or hasattr(self.pipeline, 'vae'):
-                        try:
-                            # Generate a simple test image to verify the pipeline is working
-                            logger.info("Generating test image with simple prompt")
-                            simple_kwargs = generation_kwargs.copy()
-                            simple_kwargs["prompt"] = "a red apple"
-                            simple_kwargs["negative_prompt"] = ""
-                            # Temporarily disable any image processing that might cause issues
-                            original_image_processor = getattr(self.pipeline, 'image_processor', None)
-                            if hasattr(self.pipeline, 'image_processor'):
-                                # Create a custom image processor that handles NaN values
-                                class SafeImageProcessor:
-                                    def postprocess(self, image, output_type="pil", do_denormalize=None):
-                                        if isinstance(image, torch.Tensor):
-                                            # Handle NaN and inf values before conversion
-                                            image = torch.nan_to_num(image, nan=0.5, posinf=1.0, neginf=0.0)
-                                            image = torch.clamp(image, 0, 1)
-                                            # Convert to numpy
-                                            image = image.cpu().permute(0, 2, 3, 1).float().numpy()
-                                            # Additional safety checks
-                                            image = np.nan_to_num(image, nan=0.5, posinf=1.0, neginf=0.0)
-                                            image = np.clip(image, 0, 1)
-                                            # Convert to uint8 safely
-                                            image = (image * 255).astype(np.uint8)
-                                            # Convert to PIL
-                                            if output_type == "pil":
-                                                from PIL import Image as PILImage
-                                                return [PILImage.fromarray(img) for img in image]
-                                            return image
-                                        return image
-                                self.pipeline.image_processor = SafeImageProcessor()
-                                logger.info("Applied safe image processor for FLUX model")
-                            # Try generation again with safe image processor
-                            test_output = self.pipeline(**simple_kwargs)
-                            # Restore original image processor
-                            if original_image_processor:
-                                self.pipeline.image_processor = original_image_processor
-                            if hasattr(test_output, 'images') and len(test_output.images) > 0:
-                                test_result = np.array(test_output.images[0])
-                                if not np.all(test_result == 0):
-                                    logger.info("Test generation successful, using original output")
-                                    # The issue might be with the specific prompt, return the test image
-                                    output = test_output
-                                else:
-                                    logger.warning("Test generation also returned black image")
-                        except Exception as e:
-                            logger.warning(f"Manual image processing failed: {e}")
-            # Check if output contains nsfw_content_detected
-            if hasattr(output, 'nsfw_content_detected') and output.nsfw_content_detected:
-                logger.warning("NSFW content detected by pipeline - this should not happen with safety checker disabled")
-            # Special handling for video models that return multiple frames
-            if self.model_config.model_type == "video":
-                logger.info(f"Processing video output with {len(output.frames)} frames")
-                # For now, return the first frame as a single image
-                # In the future, this could be extended to return a video file or GIF
-                if hasattr(output, 'frames') and len(output.frames) > 0:
-                    image = output.frames[0]
-                    logger.info("Extracted first frame from video generation")
-                else:
-                    # Fallback to images if frames not available
-                    image = output.images[0]
-                    logger.info("Using first image from video output")
-                # TODO: Add option to save all frames or create a GIF
-                # frames = output.frames if hasattr(output, 'frames') else output.images
-                # save_video_frames(frames, prompt)
-            else:
-                # Standard single image output for other models
-                image = output.images[0]
-            # Debug: Check image properties
-            logger.info(f"Generated image size: {image.size}, mode: {image.mode}")
-            # Validate and fix image data if needed
-            image = self._validate_and_fix_image(image)
-            logger.info("Image generation completed")
-            return image
-        except Exception as e:
-            logger.error(f"Image generation failed: {e}")
-            # Return error image
-            return self._create_error_image(str(e), truncated_prompt)
-    def _validate_and_fix_image(self, image: Image.Image) -> Image.Image:
-        """Validate and fix image data to handle NaN/infinite values"""
-        try:
-            # Convert PIL image to numpy array
-            img_array = np.array(image)
-            # Check if image is completely black (safety checker replacement)
-            if np.all(img_array == 0):
-                logger.error("Generated image is completely black - likely safety checker issue")
-                if self.model_config.model_type == "flux":
-                    logger.error("FLUX model safety checker is still active despite our attempts to disable it")
-                    logger.error("This suggests the safety checker is built into the model weights or pipeline")
-                    logger.info("Attempting to generate a test pattern instead of black image")
-                    # Create a test pattern to show the system is working
-                    test_image = np.zeros_like(img_array)
-                    height, width = test_image.shape[:2]
-                    # Create a simple gradient pattern
-                    for i in range(height):
-                        for j in range(width):
-                            test_image[i, j] = [
-                                int(255 * i / height),  # Red gradient
-                                int(255 * j / width),   # Green gradient
-                                128                      # Blue constant
-                            ]
-                    logger.info("Created test gradient pattern to replace black image")
-                    return Image.fromarray(test_image.astype(np.uint8))
-                else:
-                    logger.error("This suggests the safety checker is still active despite our attempts to disable it")
-            # Check for NaN or infinite values
-            if np.isnan(img_array).any() or np.isinf(img_array).any():
-                logger.warning("Invalid values (NaN/inf) detected in generated image, applying fixes")
-                # Replace NaN and infinite values with valid ranges
-                img_array = np.nan_to_num(img_array, nan=0.0, posinf=255.0, neginf=0.0)
-                # Ensure values are in valid range [0, 255]
-                img_array = np.clip(img_array, 0, 255)
-                # Convert back to PIL Image
-                image = Image.fromarray(img_array.astype(np.uint8))
-                logger.info("Image data fixed successfully")
-            # Log image statistics for debugging
-            mean_val = np.mean(img_array)
-            std_val = np.std(img_array)
-            logger.info(f"Image stats - mean: {mean_val:.2f}, std: {std_val:.2f}")
-            # Additional check for very low variance (mostly black/gray)
-            if std_val < 10.0 and mean_val < 50.0:
-                logger.warning(f"Image has very low variance (std={std_val:.2f}) and low brightness (mean={mean_val:.2f})")
-                logger.warning("This might indicate safety checker interference or generation issues")
-                if self.model_config.model_type == "flux":
-                    logger.info("For FLUX models, try using different prompts or adjusting generation parameters")
-            return image
-        except Exception as e:
-            logger.warning(f"Failed to validate image data: {e}, returning original image")
-            return image
-    def _prepare_control_image(self, control_image: Union[Image.Image, str], width: int, height: int) -> Image.Image:
-        """Prepare control image for ControlNet"""
-        try:
-            # Initialize ControlNet preprocessors if needed
-            if not controlnet_preprocessor.is_initialized():
-                logger.info("Initializing ControlNet preprocessors for image processing...")
-                if not controlnet_preprocessor.initialize():
-                    logger.error("Failed to initialize ControlNet preprocessors")
-                    # Continue with basic processing
-            # Load image if path is provided
-            if isinstance(control_image, str):
-                control_image = Image.open(control_image).convert('RGB')
-            elif not isinstance(control_image, Image.Image):
-                raise ValueError("Control image must be PIL Image or file path")
-            # Ensure image is RGB
-            if control_image.mode != 'RGB':
-                control_image = control_image.convert('RGB')
-            # Get ControlNet type from model config
-            from ..models.manager import model_manager
-            model_info = model_manager.get_model_info(self.model_config.name)
-            controlnet_type = model_info.get('controlnet_type', 'canny') if model_info else 'canny'
-            # Preprocess the control image based on ControlNet type
-            logger.info(f"Preprocessing control image for {controlnet_type} ControlNet")
-            processed_image = controlnet_preprocessor.preprocess(control_image, controlnet_type)
-            # Resize to match generation size
-            processed_image = controlnet_preprocessor.resize_for_controlnet(processed_image, width, height)
-            logger.info(f"Control image prepared: {processed_image.size}")
-            return processed_image
-        except Exception as e:
-            logger.error(f"Failed to prepare control image: {e}")
-            # Return resized original image as fallback
-            if isinstance(control_image, str):
-                control_image = Image.open(control_image).convert('RGB')
-            return controlnet_preprocessor.resize_for_controlnet(control_image, width, height)
-    def _create_error_image(self, error_msg: str, prompt: str) -> Image.Image:
-        """Create error message image"""
-        from PIL import ImageDraw, ImageFont
-        # Create white background image
-        img = Image.new('RGB', (512, 512), color=(255, 255, 255))
-        draw = ImageDraw.Draw(img)
-        # Draw error information
-        try:
-            # Try to use system font
-            font = ImageFont.load_default()
-        except:
-            font = None
-        # Draw text
-        draw.text((10, 10), f"Error: {error_msg}", fill=(255, 0, 0), font=font)
-        draw.text((10, 30), f"Prompt: {prompt[:50]}...", fill=(0, 0, 0), font=font)
-        return img
     def unload(self):
-        """Unload model and free GPU memory"""
-        # Handle GGUF models
-        is_gguf_model = (
-            self.model_config and
-            (self.model_config.model_type == "gguf" or
-             (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
-        )
-        if is_gguf_model:
-            if GGUF_AVAILABLE and gguf_loader.is_loaded():
-                gguf_loader.unload_model()
-                logger.info("GGUF model unloaded")
-            self.model_config = None
-            self.tokenizer = None
-            return
-        # Handle regular diffusion models
-        if self.pipeline:
-            # Move to CPU to free GPU memory
-            self.pipeline = self.pipeline.to("cpu")
-            # Clear GPU cache
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            # Delete pipeline
-            del self.pipeline
-            self.pipeline = None
-            self.model_config = None
-            self.tokenizer = None
-            logger.info("Model unloaded")
+        """Unload the current model and free resources."""
+        if self._strategy:
+            self._strategy.unload()
+            self._strategy = None
+        self.model_config = None
+        self.device = None
+        logger.info("Engine unloaded")
     def is_loaded(self) -> bool:
-        """Check if model is loaded"""
-        # Check GGUF models
-        is_gguf_model = (
-            self.model_config and
-            (self.model_config.model_type == "gguf" or
-             (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
-        )
-        if is_gguf_model:
-            return GGUF_AVAILABLE and gguf_loader.is_loaded()
-        # Check regular pipeline models
-        return self.pipeline is not None
-    def load_lora_runtime(self, repo_id: str, weight_name: str = None, scale: float = 1.0):
-        """Load LoRA weights at runtime"""
-        if not self.pipeline:
-            raise RuntimeError("Model not loaded")
-        try:
-            if weight_name:
-                logger.info(f"Loading LoRA from {repo_id} with weight {weight_name}")
-                self.pipeline.load_lora_weights(repo_id, weight_name=weight_name)
-            else:
-                logger.info(f"Loading LoRA from {repo_id}")
-                self.pipeline.load_lora_weights(repo_id)
-            # Set LoRA scale
-            if hasattr(self.pipeline, 'set_adapters') and scale != 1.0:
-                self.pipeline.set_adapters(["default"], adapter_weights=[scale])
-                logger.info(f"Set LoRA scale to {scale}")
-            # Track LoRA state
-            self.current_lora = {
-                "repo_id": repo_id,
-                "weight_name": weight_name,
-                "scale": scale,
-                "loaded": True
-            }
-            logger.info("LoRA weights loaded successfully at runtime")
-            return True
-        except Exception as e:
-            logger.error(f"Failed to load LoRA weights at runtime: {e}")
-            return False
-    def unload_lora(self):
-        """Unload LoRA weights"""
-        if not self.pipeline:
-            return False
-        try:
-            if hasattr(self.pipeline, 'unload_lora_weights'):
-                self.pipeline.unload_lora_weights()
-                # Clear LoRA state
-                self.current_lora = None
-                logger.info("LoRA weights unloaded successfully")
-                return True
-            else:
-                logger.warning("Pipeline does not support LoRA unloading")
-                return False
-        except Exception as e:
-            logger.error(f"Failed to unload LoRA weights: {e}")
-            return False
+        """Check if a model is loaded."""
+        return self._strategy is not None and self._strategy.is_loaded
     def get_model_info(self) -> Optional[Dict[str, Any]]:
-        """Get current loaded model information"""
-        if not self.model_config:
+        """Get information about the currently loaded model."""
+        if not self._strategy:
             return None
-        base_info = {
-            "name": self.model_config.name,
-            "type": self.model_config.model_type,
-            "device": self.device,
-            "variant": self.model_config.variant,
-            "parameters": self.model_config.parameters
-        }
-        # Check if this is a GGUF model
-        is_gguf_model = (
-            self.model_config.model_type == "gguf" or
-            (self.model_config.variant and "gguf" in self.model_config.variant.lower())
-        )
-        # Add GGUF-specific information
-        if is_gguf_model and GGUF_AVAILABLE:
-            gguf_info = gguf_loader.get_model_info()
-            base_info.update(gguf_info)
-            base_info["gguf_available"] = True
-            base_info["gguf_loaded"] = gguf_loader.is_loaded()
-            base_info["is_gguf"] = True
-        else:
-            base_info["gguf_available"] = GGUF_AVAILABLE
-            base_info["gguf_loaded"] = False
-            base_info["is_gguf"] = is_gguf_model
-        return base_info
+        info = self._strategy.get_info()
+        info["strategy"] = type(self._strategy).__name__
+        return info
+    # -- LoRA support --
+    def load_lora_runtime(
+        self, repo_id: str, weight_name: str = None, scale: float = 1.0
+    ) -> bool:
+        """Load LoRA weights at runtime."""
+        if not self._strategy:
+            raise RuntimeError("No model loaded")
+        return self._strategy.load_lora_runtime(repo_id, weight_name, scale)
+    def unload_lora(self) -> bool:
+        """Unload current LoRA weights."""
+        if not self._strategy:
+            return False
+        return self._strategy.unload_lora()

ollamadiffuser 1.2.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

ollamadiffuser 1.2.2py3-none-any.whl → 2.0.0py3-none-any.whl