ollamadiffuser 1.1.6__py3-none-any.whl → 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,8 +9,32 @@ from diffusers import (
9
9
  FluxPipeline,
10
10
  StableDiffusionControlNetPipeline,
11
11
  StableDiffusionXLControlNetPipeline,
12
- ControlNetModel
12
+ ControlNetModel,
13
+ AnimateDiffPipeline,
14
+ MotionAdapter
13
15
  )
16
+ # Try to import HiDreamImagePipeline if available
17
+ try:
18
+ from diffusers import HiDreamImagePipeline
19
+ HIDREAM_AVAILABLE = True
20
+ except ImportError:
21
+ HIDREAM_AVAILABLE = False
22
+ logger = logging.getLogger(__name__)
23
+ logger.warning("HiDreamImagePipeline not available. Install latest diffusers from source for HiDream support.")
24
+
25
+ # Import GGUF support
26
+ try:
27
+ from ..models.gguf_loader import gguf_loader, GGUF_AVAILABLE
28
+ logger = logging.getLogger(__name__)
29
+ if GGUF_AVAILABLE:
30
+ logger.info("GGUF support available for quantized model inference")
31
+ else:
32
+ logger.warning("GGUF support not available. Install with: pip install llama-cpp-python gguf")
33
+ except ImportError:
34
+ GGUF_AVAILABLE = False
35
+ logger = logging.getLogger(__name__)
36
+ logger.warning("GGUF loader module not found")
37
+
14
38
  from PIL import Image
15
39
  from typing import Optional, Dict, Any, Union
16
40
  from pathlib import Path
@@ -63,9 +87,16 @@ class InferenceEngine:
63
87
  "sdxl": StableDiffusionXLPipeline,
64
88
  "sd3": StableDiffusion3Pipeline,
65
89
  "flux": FluxPipeline,
90
+ "gguf": "gguf_special", # Special marker for GGUF models
66
91
  "controlnet_sd15": StableDiffusionControlNetPipeline,
67
92
  "controlnet_sdxl": StableDiffusionXLControlNetPipeline,
93
+ "video": AnimateDiffPipeline,
68
94
  }
95
+
96
+ # Add HiDream support if available
97
+ if HIDREAM_AVAILABLE:
98
+ pipeline_map["hidream"] = HiDreamImagePipeline
99
+
69
100
  return pipeline_map.get(model_type)
70
101
 
71
102
  def load_model(self, model_config: ModelConfig) -> bool:
@@ -96,6 +127,34 @@ class InferenceEngine:
96
127
  logger.error(f"Unsupported model type: {model_config.model_type}")
97
128
  return False
98
129
 
130
+ # Handle GGUF models specially
131
+ if model_config.model_type == "gguf" or (model_config.variant and "gguf" in model_config.variant.lower()):
132
+ if not GGUF_AVAILABLE:
133
+ logger.error("GGUF support not available. Install with: pip install llama-cpp-python gguf")
134
+ return False
135
+
136
+ logger.info(f"Loading GGUF model: {model_config.name} (variant: {model_config.variant})")
137
+
138
+ # Use GGUF loader instead of regular pipeline
139
+ model_config_dict = {
140
+ 'name': model_config.name,
141
+ 'path': model_config.path,
142
+ 'variant': model_config.variant,
143
+ 'model_type': model_config.model_type,
144
+ 'parameters': model_config.parameters
145
+ }
146
+
147
+ if gguf_loader.load_model(model_config_dict):
148
+ # Set pipeline to None since we're using GGUF loader
149
+ self.pipeline = None
150
+ self.model_config = model_config
151
+ self.device = self._get_device()
152
+ logger.info(f"GGUF model {model_config.name} loaded successfully")
153
+ return True
154
+ else:
155
+ logger.error(f"Failed to load GGUF model: {model_config.name}")
156
+ return False
157
+
99
158
  # Check if this is a ControlNet model
100
159
  self.is_controlnet_pipeline = model_config.model_type.startswith("controlnet_")
101
160
 
@@ -127,6 +186,56 @@ class InferenceEngine:
127
186
  load_kwargs["use_safetensors"] = True
128
187
  logger.info("Using bfloat16 for FLUX model")
129
188
 
189
+ # Special handling for Video (AnimateDiff) models
190
+ elif model_config.model_type == "video":
191
+ # AnimateDiff requires motion adapter
192
+ logger.info("Loading AnimateDiff (video) model")
193
+ motion_adapter_path = getattr(model_config, 'motion_adapter_path', None)
194
+ if not motion_adapter_path:
195
+ # Use default motion adapter if not specified
196
+ motion_adapter_path = "guoyww/animatediff-motion-adapter-v1-5-2"
197
+ logger.info(f"Using default motion adapter: {motion_adapter_path}")
198
+
199
+ try:
200
+ # Load motion adapter
201
+ motion_adapter = MotionAdapter.from_pretrained(
202
+ motion_adapter_path,
203
+ torch_dtype=load_kwargs.get("torch_dtype", torch.float16)
204
+ )
205
+ load_kwargs["motion_adapter"] = motion_adapter
206
+ logger.info(f"Motion adapter loaded from: {motion_adapter_path}")
207
+ except Exception as e:
208
+ logger.error(f"Failed to load motion adapter: {e}")
209
+ return False
210
+
211
+ # Disable safety checker for AnimateDiff
212
+ load_kwargs["safety_checker"] = None
213
+ load_kwargs["requires_safety_checker"] = False
214
+ load_kwargs["feature_extractor"] = None
215
+ logger.info("Safety checker disabled for AnimateDiff models")
216
+
217
+ # Special handling for HiDream models
218
+ elif model_config.model_type == "hidream":
219
+ if not HIDREAM_AVAILABLE:
220
+ logger.error("HiDream models require diffusers to be installed from source. Please install with: pip install git+https://github.com/huggingface/diffusers.git")
221
+ return False
222
+
223
+ logger.info("Loading HiDream model")
224
+ # HiDream models work best with bfloat16
225
+ if self.device == "cpu":
226
+ load_kwargs["torch_dtype"] = torch.float32
227
+ logger.info("Using float32 for HiDream model on CPU")
228
+ logger.warning("⚠️ HiDream models are large. CPU inference will be slow!")
229
+ else:
230
+ load_kwargs["torch_dtype"] = torch.bfloat16
231
+ logger.info("Using bfloat16 for HiDream model")
232
+
233
+ # Disable safety checker for HiDream models
234
+ load_kwargs["safety_checker"] = None
235
+ load_kwargs["requires_safety_checker"] = False
236
+ load_kwargs["feature_extractor"] = None
237
+ logger.info("Safety checker disabled for HiDream models")
238
+
130
239
  # Disable safety checker for SD 1.5 to prevent false NSFW detections
131
240
  if model_config.model_type == "sd15" or model_config.model_type == "sdxl":
132
241
  load_kwargs["safety_checker"] = None
@@ -198,6 +307,59 @@ class InferenceEngine:
198
307
  except Exception as e:
199
308
  logger.debug(f"Sequential CPU offload not available: {e}")
200
309
 
310
+ # Special optimizations for Video (AnimateDiff) models
311
+ elif model_config.model_type == "video":
312
+ logger.info("Applying optimizations for AnimateDiff video model")
313
+ # Enable VAE slicing for video models to reduce memory usage
314
+ if hasattr(self.pipeline, 'enable_vae_slicing'):
315
+ self.pipeline.enable_vae_slicing()
316
+ logger.info("Enabled VAE slicing for video model")
317
+
318
+ # Enable model CPU offload for better memory management
319
+ if self.device == "cuda" and hasattr(self.pipeline, 'enable_model_cpu_offload'):
320
+ self.pipeline.enable_model_cpu_offload()
321
+ logger.info("Enabled model CPU offload for video model")
322
+
323
+ # Set scheduler to work well with AnimateDiff
324
+ if hasattr(self.pipeline, 'scheduler'):
325
+ from diffusers import DDIMScheduler
326
+ try:
327
+ self.pipeline.scheduler = DDIMScheduler.from_config(
328
+ self.pipeline.scheduler.config,
329
+ clip_sample=False,
330
+ timestep_spacing="linspace",
331
+ beta_schedule="linear",
332
+ steps_offset=1,
333
+ )
334
+ logger.info("Configured DDIM scheduler for AnimateDiff")
335
+ except Exception as e:
336
+ logger.debug(f"Could not configure DDIM scheduler: {e}")
337
+
338
+ # Special optimizations for HiDream models
339
+ elif model_config.model_type == "hidream":
340
+ logger.info("Applying optimizations for HiDream model")
341
+ # Enable VAE slicing and tiling for HiDream models
342
+ if hasattr(self.pipeline, 'enable_vae_slicing'):
343
+ self.pipeline.enable_vae_slicing()
344
+ logger.info("Enabled VAE slicing for HiDream model")
345
+
346
+ if hasattr(self.pipeline, 'enable_vae_tiling'):
347
+ self.pipeline.enable_vae_tiling()
348
+ logger.info("Enabled VAE tiling for HiDream model")
349
+
350
+ # Enable model CPU offload for better memory management
351
+ if self.device == "cuda" and hasattr(self.pipeline, 'enable_model_cpu_offload'):
352
+ self.pipeline.enable_model_cpu_offload()
353
+ logger.info("Enabled model CPU offload for HiDream model")
354
+ elif self.device == "cpu":
355
+ # CPU-specific optimizations for HiDream
356
+ if hasattr(self.pipeline, 'enable_sequential_cpu_offload'):
357
+ try:
358
+ self.pipeline.enable_sequential_cpu_offload()
359
+ logger.info("Enabled sequential CPU offload for HiDream model")
360
+ except Exception as e:
361
+ logger.debug(f"Sequential CPU offload not available: {e}")
362
+
201
363
  # Additional safety checker disabling for SD 1.5 (in case the above didn't work)
202
364
  if model_config.model_type == "sd15" or model_config.model_type == "sdxl":
203
365
  if hasattr(self.pipeline, 'safety_checker'):
@@ -445,6 +607,51 @@ class InferenceEngine:
445
607
  control_guidance_end: float = 1.0,
446
608
  **kwargs) -> Image.Image:
447
609
  """Generate image"""
610
+ # Check if we're using a GGUF model
611
+ is_gguf_model = (
612
+ self.model_config and
613
+ (self.model_config.model_type == "gguf" or
614
+ (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
615
+ )
616
+
617
+ if is_gguf_model:
618
+ if not GGUF_AVAILABLE:
619
+ raise RuntimeError("GGUF support not available")
620
+
621
+ if not gguf_loader.is_loaded():
622
+ raise RuntimeError("GGUF model not loaded")
623
+
624
+ logger.info(f"Generating image using GGUF model: {prompt[:50]}...")
625
+
626
+ # Use model default parameters for GGUF
627
+ if num_inference_steps is None:
628
+ num_inference_steps = self.model_config.parameters.get("num_inference_steps", 20)
629
+
630
+ if guidance_scale is None:
631
+ guidance_scale = self.model_config.parameters.get("guidance_scale", 7.5)
632
+
633
+ # Generate using GGUF loader
634
+ generation_kwargs = {
635
+ "prompt": prompt,
636
+ "negative_prompt": negative_prompt,
637
+ "num_inference_steps": num_inference_steps,
638
+ "guidance_scale": guidance_scale,
639
+ "width": width,
640
+ "height": height,
641
+ **kwargs
642
+ }
643
+
644
+ try:
645
+ image = gguf_loader.generate_image(**generation_kwargs)
646
+ if image is None:
647
+ logger.warning("GGUF generation returned None, creating error image")
648
+ return self._create_error_image("GGUF generation failed or not yet implemented", prompt)
649
+ return image
650
+ except Exception as e:
651
+ logger.error(f"GGUF generation failed: {e}")
652
+ return self._create_error_image(str(e), prompt)
653
+
654
+ # Continue with regular pipeline generation for non-GGUF models
448
655
  if not self.pipeline:
449
656
  raise RuntimeError("Model not loaded")
450
657
 
@@ -556,6 +763,62 @@ class InferenceEngine:
556
763
  "height": 512
557
764
  })
558
765
 
766
+ # Special handling for Video (AnimateDiff) models
767
+ elif self.model_config.model_type == "video":
768
+ logger.info("Configuring AnimateDiff video generation parameters")
769
+
770
+ # Video-specific parameters
771
+ num_frames = kwargs.get("num_frames", 16)
772
+ generation_kwargs["num_frames"] = num_frames
773
+
774
+ # AnimateDiff works best with specific resolutions
775
+ # Use 512x512 for better compatibility with most motion adapters
776
+ generation_kwargs.update({
777
+ "width": 512,
778
+ "height": 512
779
+ })
780
+ logger.info(f"Using 512x512 resolution for AnimateDiff compatibility")
781
+
782
+ # Set optimal parameters for video generation
783
+ if guidance_scale > 7.5:
784
+ generation_kwargs["guidance_scale"] = 7.5
785
+ logger.info("Reduced guidance scale to 7.5 for video stability")
786
+
787
+ if num_inference_steps > 25:
788
+ generation_kwargs["num_inference_steps"] = 25
789
+ logger.info("Reduced inference steps to 25 for video generation")
790
+
791
+ logger.info(f"Generating {num_frames} frames for video output")
792
+
793
+ # Special handling for HiDream models
794
+ elif self.model_config.model_type == "hidream":
795
+ logger.info("Configuring HiDream model parameters")
796
+
797
+ # HiDream models support high resolution
798
+ generation_kwargs.update({
799
+ "width": width,
800
+ "height": height
801
+ })
802
+
803
+ # HiDream models have multiple text encoders, handle if provided
804
+ if "prompt_2" in kwargs:
805
+ generation_kwargs["prompt_2"] = self.truncate_prompt(kwargs["prompt_2"])
806
+ if "prompt_3" in kwargs:
807
+ generation_kwargs["prompt_3"] = self.truncate_prompt(kwargs["prompt_3"])
808
+ if "prompt_4" in kwargs:
809
+ generation_kwargs["prompt_4"] = self.truncate_prompt(kwargs["prompt_4"])
810
+
811
+ # Set optimal parameters for HiDream
812
+ max_seq_len = self.model_config.parameters.get("max_sequence_length", 128)
813
+ generation_kwargs["max_sequence_length"] = max_seq_len
814
+
815
+ # HiDream models use different guidance scale defaults
816
+ if guidance_scale is None or guidance_scale == 3.5:
817
+ generation_kwargs["guidance_scale"] = 5.0
818
+ logger.info("Using default guidance_scale=5.0 for HiDream model")
819
+
820
+ logger.info(f"Using max_sequence_length={max_seq_len} for HiDream model")
821
+
559
822
  # Generate image
560
823
  logger.info(f"Generation parameters: steps={num_inference_steps}, guidance={guidance_scale}")
561
824
 
@@ -866,7 +1129,26 @@ class InferenceEngine:
866
1129
  if hasattr(output, 'nsfw_content_detected') and output.nsfw_content_detected:
867
1130
  logger.warning("NSFW content detected by pipeline - this should not happen with safety checker disabled")
868
1131
 
869
- image = output.images[0]
1132
+ # Special handling for video models that return multiple frames
1133
+ if self.model_config.model_type == "video":
1134
+ logger.info(f"Processing video output with {len(output.frames)} frames")
1135
+
1136
+ # For now, return the first frame as a single image
1137
+ # In the future, this could be extended to return a video file or GIF
1138
+ if hasattr(output, 'frames') and len(output.frames) > 0:
1139
+ image = output.frames[0]
1140
+ logger.info("Extracted first frame from video generation")
1141
+ else:
1142
+ # Fallback to images if frames not available
1143
+ image = output.images[0]
1144
+ logger.info("Using first image from video output")
1145
+
1146
+ # TODO: Add option to save all frames or create a GIF
1147
+ # frames = output.frames if hasattr(output, 'frames') else output.images
1148
+ # save_video_frames(frames, prompt)
1149
+ else:
1150
+ # Standard single image output for other models
1151
+ image = output.images[0]
870
1152
 
871
1153
  # Debug: Check image properties
872
1154
  logger.info(f"Generated image size: {image.size}, mode: {image.mode}")
@@ -1011,6 +1293,23 @@ class InferenceEngine:
1011
1293
 
1012
1294
  def unload(self):
1013
1295
  """Unload model and free GPU memory"""
1296
+ # Handle GGUF models
1297
+ is_gguf_model = (
1298
+ self.model_config and
1299
+ (self.model_config.model_type == "gguf" or
1300
+ (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
1301
+ )
1302
+
1303
+ if is_gguf_model:
1304
+ if GGUF_AVAILABLE and gguf_loader.is_loaded():
1305
+ gguf_loader.unload_model()
1306
+ logger.info("GGUF model unloaded")
1307
+
1308
+ self.model_config = None
1309
+ self.tokenizer = None
1310
+ return
1311
+
1312
+ # Handle regular diffusion models
1014
1313
  if self.pipeline:
1015
1314
  # Move to CPU to free GPU memory
1016
1315
  self.pipeline = self.pipeline.to("cpu")
@@ -1029,6 +1328,17 @@ class InferenceEngine:
1029
1328
 
1030
1329
  def is_loaded(self) -> bool:
1031
1330
  """Check if model is loaded"""
1331
+ # Check GGUF models
1332
+ is_gguf_model = (
1333
+ self.model_config and
1334
+ (self.model_config.model_type == "gguf" or
1335
+ (self.model_config.variant and "gguf" in self.model_config.variant.lower()))
1336
+ )
1337
+
1338
+ if is_gguf_model:
1339
+ return GGUF_AVAILABLE and gguf_loader.is_loaded()
1340
+
1341
+ # Check regular pipeline models
1032
1342
  return self.pipeline is not None
1033
1343
 
1034
1344
  def load_lora_runtime(self, repo_id: str, weight_name: str = None, scale: float = 1.0):
@@ -1088,10 +1398,30 @@ class InferenceEngine:
1088
1398
  if not self.model_config:
1089
1399
  return None
1090
1400
 
1091
- return {
1401
+ base_info = {
1092
1402
  "name": self.model_config.name,
1093
1403
  "type": self.model_config.model_type,
1094
1404
  "device": self.device,
1095
1405
  "variant": self.model_config.variant,
1096
1406
  "parameters": self.model_config.parameters
1097
- }
1407
+ }
1408
+
1409
+ # Check if this is a GGUF model
1410
+ is_gguf_model = (
1411
+ self.model_config.model_type == "gguf" or
1412
+ (self.model_config.variant and "gguf" in self.model_config.variant.lower())
1413
+ )
1414
+
1415
+ # Add GGUF-specific information
1416
+ if is_gguf_model and GGUF_AVAILABLE:
1417
+ gguf_info = gguf_loader.get_model_info()
1418
+ base_info.update(gguf_info)
1419
+ base_info["gguf_available"] = True
1420
+ base_info["gguf_loaded"] = gguf_loader.is_loaded()
1421
+ base_info["is_gguf"] = True
1422
+ else:
1423
+ base_info["gguf_available"] = GGUF_AVAILABLE
1424
+ base_info["gguf_loaded"] = False
1425
+ base_info["is_gguf"] = is_gguf_model
1426
+
1427
+ return base_info