ollamadiffuser 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ OllamaDiffuser - Local AI Image Generation with Ollama-style CLI
4
4
  A tool for managing and running Stable Diffusion, FLUX.1, and other AI image generation models locally.
5
5
  """
6
6
 
7
- __version__ = "1.2.0"
7
+ __version__ = "1.2.2"
8
8
  __author__ = "OllamaDiffuser Team"
9
9
  __email__ = "ollamadiffuser@gmail.com"
10
10
  __description__ = "🎨 Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX.1, and LoRA support"
@@ -20,7 +20,9 @@ class GenerateRequest(BaseModel):
20
20
  prompt: str
21
21
  negative_prompt: str = "low quality, bad anatomy, worst quality, low resolution"
22
22
  num_inference_steps: Optional[int] = None
23
+ steps: Optional[int] = None # Alias for num_inference_steps for convenience
23
24
  guidance_scale: Optional[float] = None
25
+ cfg_scale: Optional[float] = None # Alias for guidance_scale for convenience
24
26
  width: int = 1024
25
27
  height: int = 1024
26
28
  control_image_path: Optional[str] = None # Path to control image file
@@ -232,12 +234,18 @@ def create_app() -> FastAPI:
232
234
  # Get current loaded inference engine
233
235
  engine = model_manager.loaded_model
234
236
 
237
+ # Handle parameter aliasing - prioritize shorter names for convenience
238
+ steps = request.steps if request.steps is not None else request.num_inference_steps
239
+ guidance = request.cfg_scale if request.cfg_scale is not None else request.guidance_scale
240
+
235
241
  # Generate image
236
242
  image = engine.generate_image(
237
243
  prompt=request.prompt,
238
244
  negative_prompt=request.negative_prompt,
239
- num_inference_steps=request.num_inference_steps,
240
- guidance_scale=request.guidance_scale,
245
+ num_inference_steps=steps,
246
+ steps=steps, # Pass both for GGUF compatibility
247
+ guidance_scale=guidance,
248
+ cfg_scale=guidance, # Pass both for GGUF compatibility
241
249
  width=request.width,
242
250
  height=request.height,
243
251
  control_image=request.control_image_path,
@@ -46,6 +46,7 @@ class GGUFModelLoader:
46
46
 
47
47
  # Map variant to actual file names
48
48
  variant_mapping = {
49
+ # FLUX.1-dev variants
49
50
  'gguf-q2k': 'flux1-dev-Q2_K.gguf',
50
51
  'gguf-q3ks': 'flux1-dev-Q3_K_S.gguf',
51
52
  'gguf-q4ks': 'flux1-dev-Q4_K_S.gguf',
@@ -57,6 +58,52 @@ class GGUFModelLoader:
57
58
  'gguf-q6k': 'flux1-dev-Q6_K.gguf',
58
59
  'gguf-q8': 'flux1-dev-Q8_0.gguf',
59
60
  'gguf-f16': 'flux1-dev-F16.gguf',
61
+
62
+ # FLUX.1-schnell variants
63
+ 'gguf-schnell': 'flux1-schnell-F16.gguf', # Default to F16
64
+ 'gguf-schnell-q2k': 'flux1-schnell-Q2_K.gguf',
65
+ 'gguf-schnell-q3ks': 'flux1-schnell-Q3_K_S.gguf',
66
+ 'gguf-schnell-q4-0': 'flux1-schnell-Q4_0.gguf',
67
+ 'gguf-schnell-q4-1': 'flux1-schnell-Q4_1.gguf',
68
+ 'gguf-schnell-q4ks': 'flux1-schnell-Q4_K_S.gguf',
69
+ 'gguf-schnell-q5-0': 'flux1-schnell-Q5_0.gguf',
70
+ 'gguf-schnell-q5-1': 'flux1-schnell-Q5_1.gguf',
71
+ 'gguf-schnell-q5ks': 'flux1-schnell-Q5_K_S.gguf',
72
+ 'gguf-schnell-q6k': 'flux1-schnell-Q6_K.gguf',
73
+ 'gguf-schnell-q8': 'flux1-schnell-Q8_0.gguf',
74
+ 'gguf-schnell-f16': 'flux1-schnell-F16.gguf',
75
+
76
+ # Stable Diffusion 3.5 Large variants
77
+ 'gguf-large': 'sd3.5_large-F16.gguf', # Default to F16
78
+ 'gguf-large-q4-0': 'sd3.5_large-Q4_0.gguf',
79
+ 'gguf-large-q4-1': 'sd3.5_large-Q4_1.gguf',
80
+ 'gguf-large-q5-0': 'sd3.5_large-Q5_0.gguf',
81
+ 'gguf-large-q5-1': 'sd3.5_large-Q5_1.gguf',
82
+ 'gguf-large-q8-0': 'sd3.5_large-Q8_0.gguf',
83
+ 'gguf-large-f16': 'sd3.5_large-F16.gguf',
84
+
85
+ # Stable Diffusion 3.5 Large Turbo variants
86
+ 'gguf-large-turbo': 'sd3.5_large_turbo.gguf', # Default to standard format
87
+ 'gguf-large-turbo-q4-0': 'sd3.5_large_turbo-Q4_0.gguf',
88
+ 'gguf-large-turbo-q4-1': 'sd3.5_large_turbo-Q4_1.gguf',
89
+ 'gguf-large-turbo-q5-0': 'sd3.5_large_turbo-Q5_0.gguf',
90
+ 'gguf-large-turbo-q5-1': 'sd3.5_large_turbo-Q5_1.gguf',
91
+ 'gguf-large-turbo-q8-0': 'sd3.5_large_turbo-Q8_0.gguf',
92
+ 'gguf-large-turbo-f16': 'sd3.5_large_turbo-F16.gguf',
93
+
94
+ # Other model variants
95
+ 'gguf-medium': 'sd3.5-medium-F16.gguf',
96
+ 'gguf-sd3-medium': 'sd3-medium-F16.gguf',
97
+ 'gguf-lite': 'flux-lite-8b-F16.gguf',
98
+ 'gguf-distilled': 'flux-dev-de-distill-F16.gguf',
99
+ 'gguf-fill': 'flux-fill-dev-F16.gguf',
100
+ 'gguf-full': 'hidream-i1-full-F16.gguf',
101
+ 'gguf-dev': 'hidream-i1-dev-F16.gguf',
102
+ 'gguf-fast': 'hidream-i1-fast-F16.gguf',
103
+ 'gguf-i2v': 'ltx-video-i2v-F16.gguf',
104
+ 'gguf-2b': 'ltx-video-2b-F16.gguf',
105
+ 'gguf-t2v': 'hunyuan-video-t2v-F16.gguf',
106
+
60
107
  'gguf': 'flux1-dev-Q4_K_S.gguf', # Default to Q4_K_S
61
108
  }
62
109
 
@@ -132,27 +179,59 @@ class GGUFModelLoader:
132
179
  # Download required components
133
180
  components = self.download_required_components(model_path)
134
181
 
135
- # Verify all components are available
136
- missing_components = [name for name, path in components.items() if path is None]
137
- if missing_components:
138
- logger.error(f"Missing required components: {missing_components}")
139
- return False
140
-
141
- # Initialize stable-diffusion.cpp
142
- if not GGUF_AVAILABLE:
143
- logger.error("stable-diffusion-cpp-python not properly installed")
144
- return False
182
+ # Detect model type for appropriate validation
183
+ is_sd35 = any(pattern in model_name.lower() for pattern in ['3.5', 'sd3.5', 'stable-diffusion-3-5'])
184
+
185
+ # Validate components based on model type
186
+ if is_sd35:
187
+ # SD 3.5 models need VAE, CLIP-L, CLIP-G, and T5XXL
188
+ required_components = ['vae', 'clip_l', 'clip_g', 't5xxl']
189
+ missing_components = [name for name in required_components if not components.get(name)]
190
+ if missing_components:
191
+ logger.error(f"Missing required SD 3.5 components: {missing_components}")
192
+ return False
193
+ else:
194
+ # FLUX models need VAE, CLIP-L, and T5XXL (no CLIP-G)
195
+ required_components = ['vae', 'clip_l', 't5xxl']
196
+ missing_components = [name for name in required_components if not components.get(name)]
197
+ if missing_components:
198
+ logger.error(f"Missing required FLUX components: {missing_components}")
199
+ return False
200
+
201
+ # Initialize the stable-diffusion.cpp model
202
+ logger.info("Loading GGUF model with stable-diffusion.cpp...")
203
+
204
+ if is_sd35:
205
+ logger.info("Detected SD 3.5 model - using appropriate configuration")
145
206
 
146
- # Create StableDiffusion instance with correct API for FLUX
147
- # For FLUX models, use diffusion_model_path instead of model_path
148
- self.stable_diffusion = StableDiffusion(
149
- diffusion_model_path=str(gguf_file), # FLUX GGUF models use this parameter
150
- vae_path=str(components['vae']),
151
- clip_l_path=str(components['clip_l']),
152
- t5xxl_path=str(components['t5xxl']),
153
- vae_decode_only=True, # For txt2img only
154
- n_threads=-1 # Auto-detect threads
155
- )
207
+ sd_params = {
208
+ 'diffusion_model_path': str(gguf_file),
209
+ 'n_threads': 4
210
+ }
211
+
212
+ if components['vae']:
213
+ sd_params['vae_path'] = str(components['vae'])
214
+ if components['clip_l']:
215
+ sd_params['clip_l_path'] = str(components['clip_l'])
216
+ if components['clip_g']:
217
+ sd_params['clip_g_path'] = str(components['clip_g'])
218
+ if components['t5xxl']:
219
+ sd_params['t5xxl_path'] = str(components['t5xxl'])
220
+
221
+ logger.info(f"Initializing SD 3.5 model with params: {sd_params}")
222
+ self.stable_diffusion = StableDiffusion(**sd_params)
223
+
224
+ else:
225
+ # FLUX models use different parameter structure
226
+ logger.info("Detected FLUX model - using CLIP-L and T5-XXL configuration")
227
+ self.stable_diffusion = StableDiffusion(
228
+ diffusion_model_path=str(gguf_file),
229
+ vae_path=str(components['vae']),
230
+ clip_l_path=str(components['clip_l']),
231
+ t5xxl_path=str(components['t5xxl']),
232
+ vae_decode_only=True,
233
+ n_threads=-1
234
+ )
156
235
 
157
236
  self.model_path = str(gguf_file)
158
237
  self.model_config = model_config
@@ -179,17 +258,17 @@ class GGUFModelLoader:
179
258
  width = kwargs.get('width', 1024)
180
259
  height = kwargs.get('height', 1024)
181
260
 
182
- # Support both 'steps' and 'num_inference_steps'
183
- steps = kwargs.get('steps') or kwargs.get('num_inference_steps', 20) # Increased for better quality
261
+ # Support both 'steps' and 'num_inference_steps' - ensure not None
262
+ steps = kwargs.get('steps') or kwargs.get('num_inference_steps') or 20
184
263
 
185
- # Support both 'cfg_scale' and 'guidance_scale' - FLUX works best with low CFG
186
- cfg_scale = kwargs.get('cfg_scale') or kwargs.get('guidance_scale', 1.0) # FLUX optimized CFG (reduced from 1.2)
264
+ # Support both 'cfg_scale' and 'guidance_scale' - FLUX works best with low CFG - ensure not None
265
+ cfg_scale = kwargs.get('cfg_scale') or kwargs.get('guidance_scale') or 1.0
187
266
 
188
267
  seed = kwargs.get('seed', 42)
189
268
  negative_prompt = kwargs.get('negative_prompt', "")
190
269
 
191
270
  # Allow custom sampler, with FLUX-optimized default
192
- sampler = kwargs.get('sampler', kwargs.get('sample_method', 'dpmpp2m')) # Better sampler for FLUX (fixed name)
271
+ sampler = kwargs.get('sampler', kwargs.get('sample_method', 'dpmpp2m'))
193
272
 
194
273
  # Validate sampler and provide fallback
195
274
  valid_samplers = ['euler_a', 'euler', 'heun', 'dpm2', 'dpmpp2s_a', 'dpmpp2m', 'dpmpp2mv2', 'ipndm', 'ipndm_v', 'lcm', 'ddim_trailing', 'tcd']
@@ -197,6 +276,13 @@ class GGUFModelLoader:
197
276
  logger.warning(f"Invalid sampler '{sampler}', falling back to 'dpmpp2m'")
198
277
  sampler = 'dpmpp2m'
199
278
 
279
+ # Ensure all values are proper types and not None
280
+ steps = int(steps) if steps is not None else 20
281
+ cfg_scale = float(cfg_scale) if cfg_scale is not None else 1.0
282
+ width = int(width) if width is not None else 1024
283
+ height = int(height) if height is not None else 1024
284
+ seed = int(seed) if seed is not None else 42
285
+
200
286
  logger.info(f"Generating image: {width}x{height}, steps={steps}, cfg={cfg_scale}, sampler={sampler}, negative_prompt={negative_prompt}")
201
287
 
202
288
  # Log model quantization info for quality assessment
@@ -219,7 +305,7 @@ class GGUFModelLoader:
219
305
  cfg_scale=cfg_scale,
220
306
  width=width,
221
307
  height=height,
222
- sample_method=sampler, # Use optimized sampler
308
+ sample_method=sampler,
223
309
  sample_steps=steps,
224
310
  seed=seed
225
311
  )
@@ -313,17 +399,64 @@ class GGUFModelLoader:
313
399
  """
314
400
  # Map variant to specific GGUF file patterns
315
401
  variant_patterns = {
402
+ # FLUX.1-dev variants
316
403
  'gguf-q2k': ['*Q2_K*.gguf'],
317
404
  'gguf-q3ks': ['*Q3_K_S*.gguf'],
405
+ 'gguf-q4ks': ['*Q4_K_S*.gguf'],
318
406
  'gguf-q4-0': ['*Q4_0*.gguf'],
319
407
  'gguf-q4-1': ['*Q4_1*.gguf'],
320
- 'gguf-q4ks': ['*Q4_K_S*.gguf'],
408
+ 'gguf-q5ks': ['*Q5_K_S*.gguf'],
321
409
  'gguf-q5-0': ['*Q5_0*.gguf'],
322
410
  'gguf-q5-1': ['*Q5_1*.gguf'],
323
- 'gguf-q5ks': ['*Q5_K_S*.gguf'],
324
411
  'gguf-q6k': ['*Q6_K*.gguf'],
325
- 'gguf-q8-0': ['*Q8_0*.gguf'],
326
- 'gguf-f16': ['*F16*.gguf']
412
+ 'gguf-q8': ['*Q8_0*.gguf'],
413
+ 'gguf-q8-0': ['*Q8_0*.gguf'], # Keep for backward compatibility
414
+ 'gguf-f16': ['*F16*.gguf'],
415
+
416
+ # FLUX.1-schnell variants
417
+ 'gguf-schnell': ['*flux1-schnell*F16*.gguf'],
418
+ 'gguf-schnell-q2k': ['*flux1-schnell*Q2_K*.gguf'],
419
+ 'gguf-schnell-q3ks': ['*flux1-schnell*Q3_K_S*.gguf'],
420
+ 'gguf-schnell-q4-0': ['*flux1-schnell*Q4_0*.gguf'],
421
+ 'gguf-schnell-q4-1': ['*flux1-schnell*Q4_1*.gguf'],
422
+ 'gguf-schnell-q4ks': ['*flux1-schnell*Q4_K_S*.gguf'],
423
+ 'gguf-schnell-q5-0': ['*flux1-schnell*Q5_0*.gguf'],
424
+ 'gguf-schnell-q5-1': ['*flux1-schnell*Q5_1*.gguf'],
425
+ 'gguf-schnell-q5ks': ['*flux1-schnell*Q5_K_S*.gguf'],
426
+ 'gguf-schnell-q6k': ['*flux1-schnell*Q6_K*.gguf'],
427
+ 'gguf-schnell-q8': ['*flux1-schnell*Q8_0*.gguf'],
428
+ 'gguf-schnell-f16': ['*flux1-schnell*F16*.gguf'],
429
+
430
+ # Stable Diffusion 3.5 Large variants
431
+ 'gguf-large': ['*sd3.5_large-F16*.gguf'],
432
+ 'gguf-large-q4-0': ['*sd3.5_large-Q4_0*.gguf'],
433
+ 'gguf-large-q4-1': ['*sd3.5_large-Q4_1*.gguf'],
434
+ 'gguf-large-q5-0': ['*sd3.5_large-Q5_0*.gguf'],
435
+ 'gguf-large-q5-1': ['*sd3.5_large-Q5_1*.gguf'],
436
+ 'gguf-large-q8-0': ['*sd3.5_large-Q8_0*.gguf'],
437
+ 'gguf-large-f16': ['*sd3.5_large-F16*.gguf'],
438
+
439
+ # Stable Diffusion 3.5 Large Turbo variants
440
+ 'gguf-large-turbo': ['*sd3.5_large_turbo*F16*.gguf'],
441
+ 'gguf-large-turbo-q4-0': ['*sd3.5_large_turbo*Q4_0*.gguf'],
442
+ 'gguf-large-turbo-q4-1': ['*sd3.5_large_turbo*Q4_1*.gguf'],
443
+ 'gguf-large-turbo-q5-0': ['*sd3.5_large_turbo*Q5_0*.gguf'],
444
+ 'gguf-large-turbo-q5-1': ['*sd3.5_large_turbo*Q5_1*.gguf'],
445
+ 'gguf-large-turbo-q8-0': ['*sd3.5_large_turbo*Q8_0*.gguf'],
446
+ 'gguf-large-turbo-f16': ['*sd3.5_large_turbo*F16*.gguf'],
447
+
448
+ # Other model variants
449
+ 'gguf-medium': ['*sd3.5-medium*.gguf'],
450
+ 'gguf-sd3-medium': ['*sd3-medium*.gguf'],
451
+ 'gguf-lite': ['*flux-lite-8b*.gguf'],
452
+ 'gguf-distilled': ['*flux-dev-de-distill*.gguf'],
453
+ 'gguf-fill': ['*flux-fill-dev*.gguf'],
454
+ 'gguf-full': ['*hidream-i1-full*.gguf'],
455
+ 'gguf-dev': ['*hidream-i1-dev*.gguf'],
456
+ 'gguf-fast': ['*hidream-i1-fast*.gguf'],
457
+ 'gguf-i2v': ['*ltx-video-i2v*.gguf', '*hunyuan-video-i2v*.gguf'],
458
+ 'gguf-2b': ['*ltx-video-2b*.gguf'],
459
+ 'gguf-t2v': ['*hunyuan-video-t2v*.gguf'],
327
460
  }
328
461
 
329
462
  # Get the specific GGUF file pattern for this variant
@@ -342,26 +475,125 @@ class GGUFModelLoader:
342
475
  # Include the specific GGUF model file
343
476
  allow_patterns = essential_files + gguf_pattern
344
477
 
345
- # Create ignore patterns - ignore all other GGUF variants
346
- all_gguf_variants = []
347
- for pattern_list in variant_patterns.values():
348
- all_gguf_variants.extend(pattern_list)
478
+ # Create ignore patterns based on variant name (not pattern content)
479
+ # This prevents conflicts between allow and ignore patterns
480
+ ignore_patterns = []
349
481
 
350
- # Remove the current variant from ignore list
351
- ignore_patterns = [p for p in all_gguf_variants if p not in gguf_pattern]
482
+ # Determine model family from variant name
483
+ if variant.startswith('gguf-schnell') or 'schnell' in variant:
484
+ # FLUX.1-schnell variants - ignore other model types
485
+ ignore_patterns = [
486
+ '*flux1-dev*.gguf', # Ignore FLUX.1-dev
487
+ '*sd3.5*.gguf', # Ignore SD 3.5
488
+ '*ltx-video*.gguf', # Ignore video models
489
+ '*hidream*.gguf', # Ignore HiDream
490
+ '*hunyuan*.gguf' # Ignore Hunyuan
491
+ ]
492
+ # Ignore other schnell quantizations except the one we want
493
+ for other_variant, other_patterns in variant_patterns.items():
494
+ if (other_variant.startswith('gguf-schnell') and
495
+ other_variant != variant and
496
+ other_variant != 'gguf'):
497
+ # Only ignore if it doesn't conflict with our allow patterns
498
+ for pattern in other_patterns:
499
+ if pattern not in gguf_pattern:
500
+ ignore_patterns.append(pattern)
501
+
502
+ elif (variant.startswith('gguf-large-turbo') or
503
+ 'large-turbo' in variant or
504
+ variant.startswith('gguf-large') or
505
+ 'sd3.5' in variant or
506
+ 'stable-diffusion-3' in variant):
507
+ # SD 3.5 variants - ignore other model types
508
+ ignore_patterns = [
509
+ '*flux1-dev*.gguf', # Ignore FLUX.1-dev
510
+ '*flux1-schnell*.gguf', # Ignore FLUX.1-schnell
511
+ '*ltx-video*.gguf', # Ignore video models
512
+ '*hidream*.gguf', # Ignore HiDream
513
+ '*hunyuan*.gguf' # Ignore Hunyuan
514
+ ]
515
+ # Ignore other SD 3.5 quantizations except the one we want
516
+ for other_variant, other_patterns in variant_patterns.items():
517
+ if (('large' in other_variant or 'sd3.5' in other_variant or 'stable-diffusion-3' in other_variant) and
518
+ other_variant != variant and
519
+ other_variant != 'gguf'):
520
+ # Only ignore if it doesn't conflict with our allow patterns
521
+ for pattern in other_patterns:
522
+ if pattern not in gguf_pattern:
523
+ ignore_patterns.append(pattern)
524
+
525
+ elif ('video' in variant or
526
+ 'i2v' in variant or
527
+ 't2v' in variant or
528
+ '2b' in variant):
529
+ # Video model variants
530
+ ignore_patterns = [
531
+ '*flux1-dev*.gguf',
532
+ '*flux1-schnell*.gguf',
533
+ '*sd3.5*.gguf'
534
+ ]
535
+
536
+ elif ('hidream' in variant or
537
+ 'full' in variant or
538
+ 'fast' in variant):
539
+ # HiDream variants
540
+ ignore_patterns = [
541
+ '*flux1-dev*.gguf',
542
+ '*flux1-schnell*.gguf',
543
+ '*sd3.5*.gguf',
544
+ '*ltx-video*.gguf',
545
+ '*hunyuan*.gguf'
546
+ ]
547
+
548
+ else:
549
+ # FLUX.1-dev variants (default case) - ignore other model types
550
+ ignore_patterns = [
551
+ '*flux1-schnell*.gguf', # Ignore FLUX.1-schnell
552
+ '*sd3.5*.gguf', # Ignore SD 3.5
553
+ '*ltx-video*.gguf', # Ignore video models
554
+ '*hidream*.gguf', # Ignore HiDream
555
+ '*hunyuan*.gguf' # Ignore Hunyuan
556
+ ]
557
+ # Ignore other FLUX.1-dev quantizations except the one we want
558
+ for other_variant, other_patterns in variant_patterns.items():
559
+ if (not other_variant.startswith('gguf-schnell') and
560
+ not 'large' in other_variant and
561
+ not 'sd3.5' in other_variant and
562
+ not 'video' in other_variant and
563
+ not 'hidream' in other_variant and
564
+ other_variant != variant and
565
+ other_variant != 'gguf'):
566
+ # Only ignore if it doesn't conflict with our allow patterns
567
+ for pattern in other_patterns:
568
+ if pattern not in gguf_pattern:
569
+ ignore_patterns.append(pattern)
352
570
 
353
571
  return {
354
572
  'allow_patterns': allow_patterns,
355
573
  'ignore_patterns': ignore_patterns
356
574
  }
357
575
 
576
+ def _get_model_family(self, pattern: str) -> str:
577
+ """Extract model family from a pattern (e.g., flux1-dev, flux1-schnell, sd3.5-large)"""
578
+ if 'flux1-dev' in pattern:
579
+ return 'flux1-dev'
580
+ elif 'flux1-schnell' in pattern:
581
+ return 'flux1-schnell'
582
+ elif 'sd3.5-large-turbo' in pattern:
583
+ return 'sd3.5-large-turbo'
584
+ elif 'sd3.5-large' in pattern:
585
+ return 'sd3.5-large'
586
+ elif 'sd3.5' in pattern:
587
+ return 'sd3.5'
588
+ else:
589
+ return pattern.split('*')[1].split('*')[0] if '*' in pattern else pattern
590
+
358
591
  def download_required_components(self, model_path: Path) -> Dict[str, Optional[Path]]:
359
592
  """Download or locate required VAE, CLIP-L, and T5XXL components
360
593
 
361
- For FLUX GGUF models, these components need to be downloaded separately:
362
- - VAE: ae.safetensors from black-forest-labs/FLUX.1-dev
363
- - CLIP-L: clip_l.safetensors from comfyanonymous/flux_text_encoders
364
- - T5XXL: t5xxl_fp16.safetensors from comfyanonymous/flux_text_encoders
594
+ For different model types:
595
+ - FLUX GGUF models need: ae.safetensors (VAE), clip_l.safetensors, t5xxl_fp16.safetensors
596
+ - SD 3.5 models need: different text encoders and VAE
365
597
  """
366
598
  from ..utils.download_utils import robust_snapshot_download
367
599
  from ..config.settings import settings
@@ -369,66 +601,168 @@ class GGUFModelLoader:
369
601
  components = {
370
602
  'vae': None,
371
603
  'clip_l': None,
604
+ 'clip_g': None, # Needed for SD 3.5 models
372
605
  't5xxl': None
373
606
  }
374
607
 
375
- logger.info("Downloading required FLUX components...")
608
+ # Detect model type based on model path or name
609
+ model_name = model_path.name.lower()
610
+ is_sd35 = any(pattern in model_name for pattern in ['3.5', 'sd3.5', 'stable-diffusion-3-5'])
611
+ is_flux = any(x in model_name for x in ['flux', 'flux1'])
612
+
613
+ logger.info(f"Downloading required components for model type: {'SD3.5' if is_sd35 else 'FLUX' if is_flux else 'Unknown'}")
376
614
 
377
615
  try:
378
- # Download VAE from official FLUX repository
379
- vae_dir = model_path.parent / "flux_vae"
380
- if not (vae_dir / "ae.safetensors").exists():
381
- logger.info("Downloading FLUX VAE...")
382
- robust_snapshot_download(
383
- repo_id="black-forest-labs/FLUX.1-dev",
384
- local_dir=str(vae_dir),
385
- cache_dir=str(settings.cache_dir),
386
- allow_patterns=['ae.safetensors'],
387
- max_retries=3
388
- )
389
-
390
- vae_path = vae_dir / "ae.safetensors"
391
- if vae_path.exists():
392
- components['vae'] = vae_path
393
- logger.info(f"VAE found at: {vae_path}")
394
-
395
- # Download text encoders
396
- text_encoders_dir = model_path.parent / "flux_text_encoders"
397
-
398
- # Download CLIP-L
399
- if not (text_encoders_dir / "clip_l.safetensors").exists():
400
- logger.info("Downloading CLIP-L text encoder...")
401
- robust_snapshot_download(
402
- repo_id="comfyanonymous/flux_text_encoders",
403
- local_dir=str(text_encoders_dir),
404
- cache_dir=str(settings.cache_dir),
405
- allow_patterns=['clip_l.safetensors'],
406
- max_retries=3
407
- )
408
-
409
- clip_l_path = text_encoders_dir / "clip_l.safetensors"
410
- if clip_l_path.exists():
411
- components['clip_l'] = clip_l_path
412
- logger.info(f"CLIP-L found at: {clip_l_path}")
413
-
414
- # Download T5XXL
415
- if not (text_encoders_dir / "t5xxl_fp16.safetensors").exists():
416
- logger.info("Downloading T5XXL text encoder...")
417
- robust_snapshot_download(
418
- repo_id="comfyanonymous/flux_text_encoders",
419
- local_dir=str(text_encoders_dir),
420
- cache_dir=str(settings.cache_dir),
421
- allow_patterns=['t5xxl_fp16.safetensors'],
422
- max_retries=3
423
- )
424
-
425
- t5xxl_path = text_encoders_dir / "t5xxl_fp16.safetensors"
426
- if t5xxl_path.exists():
427
- components['t5xxl'] = t5xxl_path
428
- logger.info(f"T5XXL found at: {t5xxl_path}")
616
+ if is_sd35:
617
+ # SD 3.5 models - use SD 3.5 specific components
618
+ logger.info("Downloading SD 3.5 components...")
619
+
620
+ # Download SD 3.5 VAE
621
+ vae_dir = model_path.parent / "sd35_vae"
622
+ if not (vae_dir / "vae.safetensors").exists():
623
+ logger.info("Downloading SD 3.5 VAE...")
624
+ robust_snapshot_download(
625
+ repo_id="stabilityai/stable-diffusion-3.5-large",
626
+ local_dir=str(vae_dir),
627
+ cache_dir=str(settings.cache_dir),
628
+ allow_patterns=['vae/diffusion_pytorch_model.safetensors'],
629
+ max_retries=3
630
+ )
631
+ # Move to expected location if needed
632
+ vae_source = vae_dir / "vae" / "diffusion_pytorch_model.safetensors"
633
+ vae_target = vae_dir / "vae.safetensors"
634
+ if vae_source.exists() and not vae_target.exists():
635
+ vae_source.rename(vae_target)
636
+
637
+ vae_path = vae_dir / "vae.safetensors"
638
+ if vae_path.exists():
639
+ components['vae'] = vae_path
640
+ logger.info(f"SD 3.5 VAE found at: {vae_path}")
641
+
642
+ # Download SD 3.5 text encoders
643
+ text_encoders_dir = model_path.parent / "sd35_text_encoders"
644
+
645
+ # Download CLIP-L for SD 3.5
646
+ if not (text_encoders_dir / "clip_l.safetensors").exists():
647
+ logger.info("Downloading SD 3.5 CLIP-L text encoder...")
648
+ robust_snapshot_download(
649
+ repo_id="stabilityai/stable-diffusion-3.5-large",
650
+ local_dir=str(text_encoders_dir),
651
+ cache_dir=str(settings.cache_dir),
652
+ allow_patterns=['text_encoders/clip_l.safetensors'],
653
+ max_retries=3
654
+ )
655
+ # Move to expected location if needed
656
+ clip_source = text_encoders_dir / "text_encoders" / "clip_l.safetensors"
657
+ clip_target = text_encoders_dir / "clip_l.safetensors"
658
+ if clip_source.exists() and not clip_target.exists():
659
+ clip_source.rename(clip_target)
660
+
661
+ clip_l_path = text_encoders_dir / "clip_l.safetensors"
662
+ if clip_l_path.exists():
663
+ components['clip_l'] = clip_l_path
664
+ logger.info(f"SD 3.5 CLIP-L found at: {clip_l_path}")
665
+
666
+ # Download CLIP-G for SD 3.5
667
+ if not (text_encoders_dir / "clip_g.safetensors").exists():
668
+ logger.info("Downloading SD 3.5 CLIP-G text encoder...")
669
+ robust_snapshot_download(
670
+ repo_id="stabilityai/stable-diffusion-3.5-large",
671
+ local_dir=str(text_encoders_dir),
672
+ cache_dir=str(settings.cache_dir),
673
+ allow_patterns=['text_encoders/clip_g.safetensors'],
674
+ max_retries=3
675
+ )
676
+ # Move to expected location if needed
677
+ clipg_source = text_encoders_dir / "text_encoders" / "clip_g.safetensors"
678
+ clipg_target = text_encoders_dir / "clip_g.safetensors"
679
+ if clipg_source.exists() and not clipg_target.exists():
680
+ clipg_source.rename(clipg_target)
681
+
682
+ clip_g_path = text_encoders_dir / "clip_g.safetensors"
683
+ if clip_g_path.exists():
684
+ components['clip_g'] = clip_g_path
685
+ logger.info(f"SD 3.5 CLIP-G found at: {clip_g_path}")
686
+
687
+ # Download T5XXL for SD 3.5
688
+ if not (text_encoders_dir / "t5xxl_fp16.safetensors").exists():
689
+ logger.info("Downloading SD 3.5 T5XXL text encoder...")
690
+ robust_snapshot_download(
691
+ repo_id="stabilityai/stable-diffusion-3.5-large",
692
+ local_dir=str(text_encoders_dir),
693
+ cache_dir=str(settings.cache_dir),
694
+ allow_patterns=['text_encoders/t5xxl_fp16.safetensors'],
695
+ max_retries=3
696
+ )
697
+ # Move to expected location if needed
698
+ t5_source = text_encoders_dir / "text_encoders" / "t5xxl_fp16.safetensors"
699
+ t5_target = text_encoders_dir / "t5xxl_fp16.safetensors"
700
+ if t5_source.exists() and not t5_target.exists():
701
+ t5_source.rename(t5_target)
702
+
703
+ t5xxl_path = text_encoders_dir / "t5xxl_fp16.safetensors"
704
+ if t5xxl_path.exists():
705
+ components['t5xxl'] = t5xxl_path
706
+ logger.info(f"SD 3.5 T5XXL found at: {t5xxl_path}")
707
+
708
+ else:
709
+ # FLUX models (default) - use FLUX specific components
710
+ logger.info("Downloading FLUX components...")
711
+
712
+ # Download VAE from official FLUX repository
713
+ vae_dir = model_path.parent / "flux_vae"
714
+ if not (vae_dir / "ae.safetensors").exists():
715
+ logger.info("Downloading FLUX VAE...")
716
+ robust_snapshot_download(
717
+ repo_id="black-forest-labs/FLUX.1-dev",
718
+ local_dir=str(vae_dir),
719
+ cache_dir=str(settings.cache_dir),
720
+ allow_patterns=['ae.safetensors'],
721
+ max_retries=3
722
+ )
723
+
724
+ vae_path = vae_dir / "ae.safetensors"
725
+ if vae_path.exists():
726
+ components['vae'] = vae_path
727
+ logger.info(f"FLUX VAE found at: {vae_path}")
728
+
729
+ # Download text encoders
730
+ text_encoders_dir = model_path.parent / "flux_text_encoders"
731
+
732
+ # Download CLIP-L
733
+ if not (text_encoders_dir / "clip_l.safetensors").exists():
734
+ logger.info("Downloading FLUX CLIP-L text encoder...")
735
+ robust_snapshot_download(
736
+ repo_id="comfyanonymous/flux_text_encoders",
737
+ local_dir=str(text_encoders_dir),
738
+ cache_dir=str(settings.cache_dir),
739
+ allow_patterns=['clip_l.safetensors'],
740
+ max_retries=3
741
+ )
742
+
743
+ clip_l_path = text_encoders_dir / "clip_l.safetensors"
744
+ if clip_l_path.exists():
745
+ components['clip_l'] = clip_l_path
746
+ logger.info(f"FLUX CLIP-L found at: {clip_l_path}")
747
+
748
+ # Download T5XXL
749
+ if not (text_encoders_dir / "t5xxl_fp16.safetensors").exists():
750
+ logger.info("Downloading FLUX T5XXL text encoder...")
751
+ robust_snapshot_download(
752
+ repo_id="comfyanonymous/flux_text_encoders",
753
+ local_dir=str(text_encoders_dir),
754
+ cache_dir=str(settings.cache_dir),
755
+ allow_patterns=['t5xxl_fp16.safetensors'],
756
+ max_retries=3
757
+ )
758
+
759
+ t5xxl_path = text_encoders_dir / "t5xxl_fp16.safetensors"
760
+ if t5xxl_path.exists():
761
+ components['t5xxl'] = t5xxl_path
762
+ logger.info(f"FLUX T5XXL found at: {t5xxl_path}")
429
763
 
430
764
  except Exception as e:
431
- logger.error(f"Failed to download FLUX components: {e}")
765
+ logger.error(f"Failed to download components: {e}")
432
766
 
433
767
  return components
434
768
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ollamadiffuser
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: 🎨 Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX.1, and LoRA support
5
5
  Home-page: https://github.com/ollamadiffuser/ollamadiffuser
6
6
  Author: OllamaDiffuser Team
@@ -82,6 +82,63 @@ Dynamic: requires-python
82
82
 
83
83
  ---
84
84
 
85
+ ## 🔑 Hugging Face Authentication
86
+
87
+ **Do you need a Hugging Face token?** It depends on which models you want to use!
88
+
89
+ ### 🟢 Models that DON'T require a token:
90
+ - **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
91
+ - **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
92
+ - **Most ControlNet models** - Generally public access ✅
93
+
94
+ ### 🟡 Models that DO require a token:
95
+ - **FLUX.1-dev** - Requires HF token and license agreement ⚠️
96
+ - **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
97
+ - **Some premium LoRAs** - Gated models from Hugging Face ⚠️
98
+
99
+ ### 🚀 Quick Setup
100
+
101
+ **For basic usage** (no token needed):
102
+ ```bash
103
+ # These work immediately without any setup:
104
+ ollamadiffuser pull flux.1-schnell
105
+ ollamadiffuser pull stable-diffusion-1.5
106
+ ```
107
+
108
+ **For advanced models** (token required):
109
+ ```bash
110
+ # 1. Set your token
111
+ export HF_TOKEN=your_token_here
112
+
113
+ # 2. Now you can access gated models
114
+ ollamadiffuser pull flux.1-dev
115
+ ollamadiffuser pull stable-diffusion-3.5-medium
116
+ ```
117
+
118
+ ### 🔧 How to get a Hugging Face token:
119
+
120
+ 1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
121
+ 2. **Generate token**: Go to Settings → Access Tokens → Create new token
122
+ 3. **Accept licenses**: Visit the model pages and accept license agreements:
123
+ - [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
124
+ - [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
125
+ 4. **Set environment variable**:
126
+ ```bash
127
+ # Temporary (current session)
128
+ export HF_TOKEN=your_token_here
129
+
130
+ # Permanent (add to ~/.bashrc or ~/.zshrc)
131
+ echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
132
+ ```
133
+
134
+ ### 💡 Pro Tips:
135
+ - **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
136
+ - **Token scope**: Use "read" permissions for downloading models
137
+ - **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
138
+ - **Troubleshooting**: If downloads fail, verify your token and model access permissions
139
+
140
+ ---
141
+
85
142
  ## ✨ Features
86
143
 
87
144
  - **🚀 Fast Startup**: Instant application launch with lazy loading architecture
@@ -423,7 +480,6 @@ with open("control.jpg", "rb") as f:
423
480
 
424
481
  - **[GGUF Models Guide](GGUF_GUIDE.md)**: Complete guide to memory-efficient GGUF models
425
482
  - **[ControlNet Guide](CONTROLNET_GUIDE.md)**: Comprehensive ControlNet usage and examples
426
- - **[Installation Guide](INSTALLATION_GUIDE.md)**: Detailed installation instructions
427
483
  - **[Website Documentation](https://www.ollamadiffuser.com/)**: Complete tutorials and guides
428
484
 
429
485
  ## 🚀 Performance & Hardware
@@ -1,7 +1,7 @@
1
- ollamadiffuser/__init__.py,sha256=DjwIN2FSK0VDarxOtEUAW9LGNRCYHSo1uH_pEgYioJQ,1127
1
+ ollamadiffuser/__init__.py,sha256=QcfqaWxnceRL7tqVm88tAbTRc5wHU4L0XNsIC_MxRpE,1127
2
2
  ollamadiffuser/__main__.py,sha256=tNWMvEHq4ddtKLp7DrhIoOdnFw3F8RNrETC_u5xpkFI,141
3
3
  ollamadiffuser/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- ollamadiffuser/api/server.py,sha256=4-3gT8W1404bxvJ7y9htvKbd2yxrrbtAUvT7shOlJss,17679
4
+ ollamadiffuser/api/server.py,sha256=kc-Up50zmLfSM4f4InYR4Btkl35lyEN6UFcxlvxICSQ,18237
5
5
  ollamadiffuser/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  ollamadiffuser/cli/commands.py,sha256=Pe0vyfGiffwd10QlVxBCTtNnMqHi8nJ3oNn_k8nAi5k,8903
7
7
  ollamadiffuser/cli/main.py,sha256=qj0VKTOjw_gox2dPVtbU-9kCo25TqshyVyqW8qsv4Pk,56081
@@ -12,7 +12,7 @@ ollamadiffuser/core/config/settings.py,sha256=VhI1vLGmOAQ7-XtyHrT5KoMpcGeGt-Mij-
12
12
  ollamadiffuser/core/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  ollamadiffuser/core/inference/engine.py,sha256=-EVcH4NyRVBQ-puvI_Az3KOZJYG9b-ySzsvoBTY3mlY,73962
14
14
  ollamadiffuser/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- ollamadiffuser/core/models/gguf_loader.py,sha256=jlmluhq4VawkxHD8ASjB_XrbhTGOisN1-4Lt0Xcc5wg,18153
15
+ ollamadiffuser/core/models/gguf_loader.py,sha256=ocfl3_MDVXC9nSjW8YJdz4kX1Q-Qe2ltu6w4fbqhxVY,35724
16
16
  ollamadiffuser/core/models/manager.py,sha256=rTEAameGih3wPcVG_Y-4k_brBeEqEoBjoI7fjggNtiY,16799
17
17
  ollamadiffuser/core/models/registry.py,sha256=YPx3xcHnCHogyowi9fQ6oXZg7_jz0fM5bDyyg-BgSFY,15125
18
18
  ollamadiffuser/core/utils/__init__.py,sha256=ZdXZWX1hfDnnV6OmRD6UStNljDJIQ892da2CtC-zdDw,31
@@ -37,9 +37,9 @@ ollamadiffuser/ui/samples/scribble/face_sketch.png,sha256=MVVYy_aS48xoS_RnIDzLUa
37
37
  ollamadiffuser/ui/samples/scribble/tree_sketch.png,sha256=3P-NGgW25xRwreDxiBYKcDhd2oHZAwKSkjNVM5oPTWY,3017
38
38
  ollamadiffuser/ui/templates/index.html,sha256=qTQVFxiTbeZ90O-iNqWC_4pYP6yyIs2z6U69VJPqAB4,38176
39
39
  ollamadiffuser/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- ollamadiffuser-1.2.0.dist-info/licenses/LICENSE,sha256=cnGL9l2P510Uk3TCnv62kot6vAfdSawhOZh7Y-oYoIE,1071
41
- ollamadiffuser-1.2.0.dist-info/METADATA,sha256=h9t99eLyVINJNkrFnreqF8APYdzsqV2Y293lZxownWg,19808
42
- ollamadiffuser-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
- ollamadiffuser-1.2.0.dist-info/entry_points.txt,sha256=tHXXO3N0GSnIobDe_eSOLfHPjjVFjeTg2Fd-APoD6sY,64
44
- ollamadiffuser-1.2.0.dist-info/top_level.txt,sha256=97wOGgTCxDE765Nr_o7B4Kwr_M_jy8fCCeQ81sMKlC4,15
45
- ollamadiffuser-1.2.0.dist-info/RECORD,,
40
+ ollamadiffuser-1.2.2.dist-info/licenses/LICENSE,sha256=cnGL9l2P510Uk3TCnv62kot6vAfdSawhOZh7Y-oYoIE,1071
41
+ ollamadiffuser-1.2.2.dist-info/METADATA,sha256=Z2z38MfQ3SzUrpvb6Yk67eWEfx8kuhvJ8w-aRyCMZcY,21756
42
+ ollamadiffuser-1.2.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ ollamadiffuser-1.2.2.dist-info/entry_points.txt,sha256=tHXXO3N0GSnIobDe_eSOLfHPjjVFjeTg2Fd-APoD6sY,64
44
+ ollamadiffuser-1.2.2.dist-info/top_level.txt,sha256=97wOGgTCxDE765Nr_o7B4Kwr_M_jy8fCCeQ81sMKlC4,15
45
+ ollamadiffuser-1.2.2.dist-info/RECORD,,