ollamadiffuser 1.2.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ollamadiffuser/__init__.py +1 -1
  2. ollamadiffuser/api/server.py +312 -312
  3. ollamadiffuser/cli/config_commands.py +119 -0
  4. ollamadiffuser/cli/lora_commands.py +169 -0
  5. ollamadiffuser/cli/main.py +85 -1233
  6. ollamadiffuser/cli/model_commands.py +664 -0
  7. ollamadiffuser/cli/recommend_command.py +205 -0
  8. ollamadiffuser/cli/registry_commands.py +197 -0
  9. ollamadiffuser/core/config/model_registry.py +562 -11
  10. ollamadiffuser/core/config/settings.py +24 -2
  11. ollamadiffuser/core/inference/__init__.py +5 -0
  12. ollamadiffuser/core/inference/base.py +182 -0
  13. ollamadiffuser/core/inference/engine.py +204 -1405
  14. ollamadiffuser/core/inference/strategies/__init__.py +1 -0
  15. ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
  16. ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
  17. ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
  18. ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
  19. ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
  20. ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
  21. ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
  22. ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
  23. ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
  24. ollamadiffuser/mcp/__init__.py +0 -0
  25. ollamadiffuser/mcp/server.py +184 -0
  26. ollamadiffuser/ui/templates/index.html +62 -1
  27. ollamadiffuser/ui/web.py +116 -54
  28. {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/METADATA +317 -108
  29. ollamadiffuser-2.0.1.dist-info/RECORD +61 -0
  30. {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/WHEEL +1 -1
  31. {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/entry_points.txt +1 -0
  32. ollamadiffuser/core/models/registry.py +0 -384
  33. ollamadiffuser/ui/samples/.DS_Store +0 -0
  34. ollamadiffuser-1.2.3.dist-info/RECORD +0 -45
  35. {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/licenses/LICENSE +0 -0
  36. {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ollamadiffuser
3
- Version: 1.2.3
4
- Summary: 🎨 Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX.1, and LoRA support
3
+ Version: 2.0.1
4
+ Summary: Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX, and LoRA support
5
5
  Home-page: https://github.com/ollamadiffuser/ollamadiffuser
6
6
  Author: OllamaDiffuser Team
7
7
  Author-email: OllamaDiffuser Team <ollamadiffuser@gmail.com>
@@ -14,7 +14,7 @@ Project-URL: Documentation, https://www.ollamadiffuser.com/
14
14
  Project-URL: Bug Reports, https://github.com/ollamadiffuser/ollamadiffuser/issues
15
15
  Project-URL: Feature Requests, https://github.com/ollamadiffuser/ollamadiffuser/issues
16
16
  Project-URL: Source Code, https://github.com/ollamadiffuser/ollamadiffuser
17
- Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli
17
+ Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli,img2img,inpainting,mcp,openclaw
18
18
  Classifier: Development Status :: 4 - Beta
19
19
  Classifier: Intended Audience :: Developers
20
20
  Classifier: Intended Audience :: End Users/Desktop
@@ -33,40 +33,60 @@ Classifier: Environment :: Web Environment
33
33
  Requires-Python: >=3.10
34
34
  Description-Content-Type: text/markdown
35
35
  License-File: LICENSE
36
- Requires-Dist: torch>=2.1.0
37
- Requires-Dist: diffusers>=0.26.0
38
- Requires-Dist: transformers>=4.35.0
39
- Requires-Dist: accelerate>=0.25.0
40
- Requires-Dist: fastapi>=0.104.0
36
+ Requires-Dist: torch>=2.4.0
37
+ Requires-Dist: diffusers>=0.34.0
38
+ Requires-Dist: transformers>=4.40.0
39
+ Requires-Dist: accelerate>=1.0.0
40
+ Requires-Dist: fastapi>=0.110.0
41
41
  Requires-Dist: uvicorn>=0.23.0
42
- Requires-Dist: huggingface-hub>=0.16.0
43
- Requires-Dist: Pillow>=9.0.0
42
+ Requires-Dist: huggingface-hub>=0.25.0
43
+ Requires-Dist: Pillow>=10.0.0
44
44
  Requires-Dist: click>=8.0.0
45
45
  Requires-Dist: rich>=13.0.0
46
46
  Requires-Dist: pydantic>=2.0.0
47
47
  Requires-Dist: protobuf>=3.20.0
48
48
  Requires-Dist: sentencepiece>=0.1.99
49
- Requires-Dist: safetensors>=0.3.0
49
+ Requires-Dist: safetensors>=0.4.0
50
50
  Requires-Dist: python-multipart>=0.0.0
51
51
  Requires-Dist: psutil>=5.9.0
52
52
  Requires-Dist: jinja2>=3.0.0
53
- Requires-Dist: peft>=0.10.0
54
- Requires-Dist: numpy>=1.21.0
53
+ Requires-Dist: peft>=0.17.0
54
+ Requires-Dist: numpy>=1.26.0
55
55
  Requires-Dist: controlnet-aux>=0.0.7
56
56
  Requires-Dist: opencv-python>=4.8.0
57
- Requires-Dist: stable-diffusion-cpp-python>=0.1.0
58
- Requires-Dist: gguf>=0.1.0
57
+ Requires-Dist: requests>=2.28.0
58
+ Requires-Dist: PyYAML>=6.0
59
+ Provides-Extra: gguf
60
+ Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "gguf"
61
+ Requires-Dist: gguf>=0.1.0; extra == "gguf"
62
+ Provides-Extra: full
63
+ Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "full"
64
+ Requires-Dist: gguf>=0.1.0; extra == "full"
65
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "full"
66
+ Provides-Extra: mcp
67
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
68
+ Provides-Extra: openclaw
69
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "openclaw"
59
70
  Provides-Extra: dev
60
71
  Requires-Dist: pytest>=7.0.0; extra == "dev"
61
72
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
73
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
74
+ Requires-Dist: httpx>=0.24.0; extra == "dev"
62
75
  Requires-Dist: black>=23.0.0; extra == "dev"
63
76
  Requires-Dist: isort>=5.12.0; extra == "dev"
64
77
  Requires-Dist: flake8>=6.0.0; extra == "dev"
78
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
65
79
  Dynamic: author
66
80
  Dynamic: home-page
67
81
  Dynamic: license-file
68
82
  Dynamic: requires-python
69
83
 
84
+ ### Project Status: Active Development
85
+
86
+ **Thank you for the incredible support and over 11,000 downloads!**
87
+
88
+ `ollamadiffuser` is back in **active development**. v2.0 brings a major architecture overhaul, 21 new models, MCP/OpenClaw integration, and Apple Silicon support. Part of the **[LocalKinAI](https://github.com/LocalKinAI)** ecosystem.
89
+
70
90
  # OllamaDiffuser 🎨
71
91
 
72
92
  [![PyPI version](https://badge.fury.io/py/ollamadiffuser.svg)](https://badge.fury.io/py/ollamadiffuser)
@@ -76,95 +96,70 @@ Dynamic: requires-python
76
96
 
77
97
  ## Local AI Image Generation with OllamaDiffuser
78
98
 
79
- **OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX.1**, and other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
99
+ **OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX**, **CogView4**, **Kolors**, **SANA**, **PixArt-Sigma**, and 40+ other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
80
100
 
81
101
  🌐 **Website**: [ollamadiffuser.com](https://www.ollamadiffuser.com/) | 📦 **PyPI**: [pypi.org/project/ollamadiffuser](https://pypi.org/project/ollamadiffuser/)
82
102
 
83
- ---
84
-
85
- ## 🔑 Hugging Face Authentication
86
-
87
- **Do you need a Hugging Face token?** It depends on which models you want to use!
103
+ > **Upgrading from v1.x?** v2.0 is a major rewrite requiring **Python 3.10+**. Run `pip install --upgrade "ollamadiffuser[full]"` and see the [Migration Guide](#-migration-guide) below.
88
104
 
89
- ### 🟢 Models that DON'T require a token:
90
- - **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
91
- - **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
92
- - **Most ControlNet models** - Generally public access ✅
93
-
94
- ### 🟡 Models that DO require a token:
95
- - **FLUX.1-dev** - Requires HF token and license agreement ⚠️
96
- - **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
97
- - **Some premium LoRAs** - Gated models from Hugging Face ⚠️
105
+ ---
98
106
 
99
- ### 🚀 Quick Setup
107
+ ## 🚀 Quick Start (v2.0)
100
108
 
101
- **For basic usage** (no token needed):
109
+ **For Mac/PC Users:**
102
110
  ```bash
103
- # These work immediately without any setup:
104
- ollamadiffuser pull flux.1-schnell
105
- ollamadiffuser pull stable-diffusion-1.5
111
+ pip install "ollamadiffuser[full]"
112
+ ollamadiffuser recommend # Find which models fit your GPU
106
113
  ```
107
114
 
108
- **For advanced models** (token required):
115
+ **For OpenClaw/Agent Users:**
109
116
  ```bash
110
- # 1. Set your token
111
- export HF_TOKEN=your_token_here
117
+ pip install "ollamadiffuser[mcp]"
118
+ ollamadiffuser mcp # Starts the MCP server
119
+ ```
112
120
 
113
- # 2. Now you can access gated models
114
- ollamadiffuser pull flux.1-dev
115
- ollamadiffuser pull stable-diffusion-3.5-medium
121
+ **For Low-VRAM / Budget GPU Users:**
122
+ ```bash
123
+ pip install "ollamadiffuser[gguf]"
124
+ ollamadiffuser pull flux.1-dev-gguf-q4ks # Only 6GB VRAM needed
125
+ ollamadiffuser run flux.1-dev-gguf-q4ks
116
126
  ```
117
127
 
118
- ### 🔧 How to get a Hugging Face token:
119
-
120
- 1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
121
- 2. **Generate token**: Go to Settings → Access Tokens → Create new token
122
- 3. **Accept licenses**: Visit the model pages and accept license agreements:
123
- - [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
124
- - [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
125
- 4. **Set environment variable**:
126
- ```bash
127
- # Temporary (current session)
128
- export HF_TOKEN=your_token_here
129
-
130
- # Permanent (add to ~/.bashrc or ~/.zshrc)
131
- echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
132
- ```
133
-
134
- ### 💡 Pro Tips:
135
- - **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
136
- - **Token scope**: Use "read" permissions for downloading models
137
- - **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
138
- - **Troubleshooting**: If downloads fail, verify your token and model access permissions
128
+ Most models work **without any token** -- just install and go. See [Hugging Face Authentication](#-hugging-face-authentication) when you want gated models like FLUX.1-dev or SD 3.5.
139
129
 
140
130
  ---
141
131
 
142
132
  ## ✨ Features
143
133
 
144
- - **🚀 Fast Startup**: Instant application launch with lazy loading architecture
134
+ - **🏗️ Strategy Architecture**: Clean per-model strategy pattern (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF, Generic)
135
+ - **🌐 40+ Models**: FLUX.2, SD 3.5, SDXL Lightning, CogView4, Kolors, SANA, PixArt-Sigma, and more
136
+ - **🔌 Generic Pipeline**: Add new diffusers models via registry config alone -- no code changes needed
137
+ - **🖼️ img2img & Inpainting**: Image-to-image and inpainting support across SD1.5, SDXL, and the API/Web UI
138
+ - **⚡ Async API**: Non-blocking FastAPI server using `asyncio.to_thread` for GPU operations
139
+ - **🎲 Random Seeds**: Reproducible generation with explicit seeds, random by default
145
140
  - **🎛️ ControlNet Support**: Precise image generation control with 10+ control types
146
141
  - **🔄 LoRA Integration**: Dynamic LoRA loading and management
147
- - **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!)
142
+ - **🔌 MCP & OpenClaw**: Model Context Protocol server for AI assistant integration (OpenClaw, Claude Code, Cursor)
143
+ - **🍎 Apple Silicon**: MPS dtype safety, GGUF Metal acceleration, `ollamadiffuser recommend` for hardware-aware model suggestions
144
+ - **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!) with CUDA and Metal acceleration
148
145
  - **🌐 Multiple Interfaces**: CLI, Python API, Web UI, and REST API
149
146
  - **📦 Model Management**: Easy installation and switching between models
150
147
  - **⚡ Performance Optimized**: Memory-efficient with GPU acceleration
151
- - **🎨 Professional Results**: High-quality image generation with fine-tuned control
152
-
153
- ## 🚀 Quick Start
148
+ - **🧪 Test Suite**: 82 tests across settings, registry, engine, API, MPS, and MCP
154
149
 
155
150
  ### Option 1: Install from PyPI (Recommended)
156
151
  ```bash
157
152
  # Install from PyPI
158
153
  pip install ollamadiffuser
159
154
 
160
- # Pull and run a model (4-command setup)
155
+ # Pull and run a model
161
156
  ollamadiffuser pull flux.1-schnell
162
157
  ollamadiffuser run flux.1-schnell
163
158
 
164
- # Generate via API
159
+ # Generate via API (seed is optional for reproducibility)
165
160
  curl -X POST http://localhost:8000/api/generate \
166
161
  -H "Content-Type: application/json" \
167
- -d '{"prompt": "A beautiful sunset"}' \
162
+ -d '{"prompt": "A beautiful sunset", "seed": 12345}' \
168
163
  --output image.png
169
164
  ```
170
165
 
@@ -187,7 +182,7 @@ This ensures you get:
187
182
  ### GGUF Quick Start (Low VRAM)
188
183
  ```bash
189
184
  # For systems with limited VRAM (3GB+)
190
- pip install ollamadiffuser stable-diffusion-cpp-python gguf
185
+ pip install "ollamadiffuser[gguf]"
191
186
 
192
187
  # Download memory-efficient GGUF model
193
188
  ollamadiffuser pull flux.1-dev-gguf-q4ks
@@ -196,6 +191,22 @@ ollamadiffuser pull flux.1-dev-gguf-q4ks
196
191
  ollamadiffuser run flux.1-dev-gguf-q4ks
197
192
  ```
198
193
 
194
+ ### Apple Silicon Quick Start (Mac Mini / MacBook)
195
+ ```bash
196
+ # See which models fit your Mac
197
+ ollamadiffuser recommend
198
+
199
+ # Best lightweight model (0.6B, <6GB)
200
+ ollamadiffuser pull pixart-sigma
201
+ ollamadiffuser run pixart-sigma
202
+
203
+ # GGUF with Metal acceleration (6GB, great quality)
204
+ pip install "ollamadiffuser[gguf]"
205
+ CMAKE_ARGS="-DSD_METAL=ON" pip install stable-diffusion-cpp-python
206
+ ollamadiffuser pull flux.1-dev-gguf-q4ks
207
+ ollamadiffuser run flux.1-dev-gguf-q4ks
208
+ ```
209
+
199
210
  ### Option 2: Development Installation
200
211
  ```bash
201
212
  # Clone the repository
@@ -245,21 +256,92 @@ curl -X POST http://localhost:8000/api/generate/controlnet \
245
256
 
246
257
  ---
247
258
 
248
- ## 🎯 Supported Models
259
+ ## 🔑 Hugging Face Authentication
260
+
261
+ **Do you need a Hugging Face token?** It depends on which models you want to use!
262
+
263
+ **Models that DON'T require a token** -- ready to use right away:
264
+ - FLUX.1-schnell, Stable Diffusion 1.5, DreamShaper, PixArt-Sigma, SANA 1.5, most ControlNet models
265
+
266
+ **Models that DO require a token:**
267
+ - FLUX.1-dev, Stable Diffusion 3.5, some premium LoRAs
268
+
269
+ **Setup** (only needed for gated models):
270
+ ```bash
271
+ # 1. Create account at https://huggingface.co and generate an access token
272
+ # 2. Accept license on the model page (e.g. FLUX.1-dev, SD 3.5)
273
+ # 3. Set your token
274
+ export HF_TOKEN=your_token_here
275
+
276
+ # 4. Now you can access gated models
277
+ ollamadiffuser pull flux.1-dev
278
+ ollamadiffuser pull stable-diffusion-3.5-medium
279
+ ```
280
+
281
+ > **Tips:** Use "read" permissions for the token. Your token stays local -- never shared with OllamaDiffuser servers. Add `export HF_TOKEN=...` to `~/.bashrc` or `~/.zshrc` to make it permanent.
282
+
283
+ ---
249
284
 
250
- Choose from a variety of state-of-the-art image generation models:
285
+ ## 🎯 Supported Models
251
286
 
252
- | Model | License | Quality | Speed | Commercial Use | VRAM |
253
- |-------|---------|---------|-------|----------------|------|
254
- | **FLUX.1-schnell** | Apache 2.0 | High | **4 steps** (12x faster) | ✅ Commercial OK | 20GB+ |
255
- | **FLUX.1-dev** | Non-commercial | High | 50 steps | ❌ Non-commercial | 20GB+ |
256
- | **FLUX.1-dev-gguf** | Non-commercial | High | 4 steps | Non-commercial | **3-16GB** |
257
- | **Stable Diffusion 3.5** | CreativeML | Medium | 28 steps | ⚠️ Check License | 12GB+ |
258
- | **Stable Diffusion 1.5** | CreativeML | Fast | Lightweight | ⚠️ Check License | 6GB+ |
287
+ Choose from 40+ models spanning every major architecture:
288
+
289
+ ### Core Models
290
+
291
+ | Model | Type | Steps | VRAM | Commercial | License |
292
+ |-------|------|-------|------|------------|---------|
293
+ | `flux.1-schnell` | flux | 4 | 16GB+ | | Apache 2.0 |
294
+ | `flux.1-dev` | flux | 20 | 20GB+ | ❌ | Non-commercial |
295
+ | `stable-diffusion-3.5-medium` | sd3 | 28 | 8GB+ | ⚠️ | Stability AI |
296
+ | `stable-diffusion-3.5-large` | sd3 | 28 | 12GB+ | ⚠️ | Stability AI |
297
+ | `stable-diffusion-3.5-large-turbo` | sd3 | 4 | 12GB+ | ⚠️ | Stability AI |
298
+ | `stable-diffusion-xl-base` | sdxl | 50 | 6GB+ | ⚠️ | CreativeML |
299
+ | `stable-diffusion-1.5` | sd15 | 50 | 4GB+ | ⚠️ | CreativeML |
300
+
301
+ ### Next-Generation Models
302
+
303
+ | Model | Origin | Params | Steps | VRAM | Commercial | License |
304
+ |-------|--------|--------|-------|------|------------|---------|
305
+ | `flux.2-dev` | Black Forest Labs | 32B | 28 | 14GB+ | ❌ | Non-commercial |
306
+ | `flux.2-klein-4b` | Black Forest Labs | 4B | 28 | 10GB+ | ✅ | Apache 2.0 |
307
+ | `z-image-turbo` | Alibaba (Tongyi) | 6B | 8 | 10GB+ | ✅ | Apache 2.0 |
308
+ | `sana-1.5` | NVIDIA | 1.6B | 20 | 8GB+ | ✅ | Apache 2.0 |
309
+ | `cogview4` | Zhipu AI | 6B | 50 | 12GB+ | ✅ | Apache 2.0 |
310
+ | `kolors` | Kuaishou | 8.6B | 50 | 8GB+ | ✅ | Kolors License |
311
+ | `hunyuan-dit` | Tencent | 1.5B | 50 | 6GB+ | ✅ | Tencent Community |
312
+ | `lumina-2` | Alpha-VLLM | 2B | 30 | 8GB+ | ✅ | Apache 2.0 |
313
+ | `pixart-sigma` | PixArt | 0.6B | 20 | 6GB+ | ✅ | Open |
314
+ | `auraflow` | Fal | 6.8B | 50 | 12GB+ | ✅ | Apache 2.0 |
315
+ | `omnigen` | BAAI | 3.8B | 50 | 12GB+ | ✅ | MIT |
316
+
317
+ ### Fast / Turbo Models
318
+
319
+ | Model | Steps | VRAM | Notes |
320
+ |-------|-------|------|-------|
321
+ | `sdxl-turbo` | 1 | 6GB+ | Single-step distilled SDXL |
322
+ | `sdxl-lightning-4step` | 4 | 6GB+ | ByteDance, custom scheduler |
323
+ | `stable-diffusion-3.5-large-turbo` | 4 | 12GB+ | Distilled SD 3.5 Large |
324
+ | `z-image-turbo` | 8 | 10GB+ | Alibaba 6B turbo |
325
+
326
+ ### Community Fine-Tunes
327
+
328
+ | Model | Base | Notes |
329
+ |-------|------|-------|
330
+ | `realvisxl-v4` | SDXL | Photorealistic, very popular |
331
+ | `dreamshaper` | SD 1.5 | Versatile artistic model |
332
+ | `realistic-vision-v6` | SD 1.5 | Portrait specialist |
333
+
334
+ ### FLUX Pipeline Variants
335
+
336
+ | Model | Pipeline | Use Case |
337
+ |-------|----------|----------|
338
+ | `flux.1-fill-dev` | FluxFillPipeline | Inpainting / outpainting |
339
+ | `flux.1-canny-dev` | FluxControlPipeline | Canny edge control |
340
+ | `flux.1-depth-dev` | FluxControlPipeline | Depth map control |
259
341
 
260
342
  ### 💾 GGUF Models - Reduced Memory Requirements
261
343
 
262
- **NEW**: GGUF quantized models enable running FLUX.1-dev on budget hardware!
344
+ GGUF quantized models enable running FLUX.1-dev on budget hardware:
263
345
 
264
346
  | GGUF Variant | VRAM | Quality | Best For |
265
347
  |--------------|------|---------|----------|
@@ -270,11 +352,6 @@ Choose from a variety of state-of-the-art image generation models:
270
352
 
271
353
  📖 **[Complete GGUF Guide](GGUF_GUIDE.md)** - Hardware recommendations, installation, and optimization tips
272
354
 
273
- ### Why Choose FLUX.1-schnell?
274
- - **Apache 2.0 license** - Perfect for commercial use
275
- - **4-step generation** - Lightning fast results
276
- - **Commercial OK** - Use in your business
277
-
278
355
  ---
279
356
 
280
357
  ## 🎛️ ControlNet Features
@@ -335,6 +412,16 @@ ollamadiffuser lora unload
335
412
  ollamadiffuser pull stable-diffusion-1.5
336
413
  ollamadiffuser run stable-diffusion-1.5
337
414
 
415
+ # Model registry management
416
+ ollamadiffuser registry list
417
+ ollamadiffuser registry list --installed-only
418
+ ollamadiffuser registry check-gguf
419
+
420
+ # Configuration management
421
+ ollamadiffuser config # show all config
422
+ ollamadiffuser config set models_dir /mnt/ssd/models # custom model path
423
+ ollamadiffuser config set server.port 9000 # change server port
424
+
338
425
  # In another terminal, generate images via API
339
426
  curl -X POST http://localhost:8000/api/generate \
340
427
  -H "Content-Type: application/json" \
@@ -366,18 +453,75 @@ Features:
366
453
  ```bash
367
454
  # Start API server
368
455
  ollamadiffuser --mode api
369
-
370
456
  ollamadiffuser load stable-diffusion-1.5
371
457
 
372
- # Generate image
458
+ # Text-to-image
373
459
  curl -X POST http://localhost:8000/api/generate \
374
460
  -H "Content-Type: application/json" \
375
- -d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024}'
461
+ -d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024, "seed": 42}'
462
+
463
+ # Image-to-image
464
+ curl -X POST http://localhost:8000/api/generate/img2img \
465
+ -F "prompt=oil painting style" \
466
+ -F "strength=0.75" \
467
+ -F "image=@input.png" \
468
+ --output result.png
469
+
470
+ # Inpainting
471
+ curl -X POST http://localhost:8000/api/generate/inpaint \
472
+ -F "prompt=a red car" \
473
+ -F "image=@photo.png" \
474
+ -F "mask=@mask.png" \
475
+ --output inpainted.png
476
+
477
+ # API docs: http://localhost:8000/docs
478
+ ```
479
+
480
+ ### MCP Server (AI Assistant Integration)
481
+
482
+ OllamaDiffuser includes a [Model Context Protocol](https://modelcontextprotocol.io/) server for integration with AI assistants like OpenClaw, Claude Code, and Cursor.
376
483
 
377
- # API document
378
- http://localhost:8000/docs
484
+ ```bash
485
+ # Install MCP support
486
+ pip install "ollamadiffuser[mcp]"
487
+
488
+ # Start MCP server (stdio transport)
489
+ ollamadiffuser mcp
379
490
  ```
380
491
 
492
+ **MCP client configuration** (e.g. `claude_desktop_config.json`):
493
+ ```json
494
+ {
495
+ "mcpServers": {
496
+ "ollamadiffuser": {
497
+ "command": "ollamadiffuser-mcp"
498
+ }
499
+ }
500
+ }
501
+ ```
502
+
503
+ **Available MCP tools:**
504
+ - `generate_image` -- Generate images from text prompts (auto-loads model)
505
+ - `list_models` -- List available and installed models
506
+ - `load_model` -- Load a model into memory
507
+ - `get_status` -- Check device, loaded model, and system status
508
+
509
+ ### OpenClaw AgentSkill
510
+
511
+ An [OpenClaw](https://github.com/openclaw/openclaw) skill is included at `integrations/openclaw/SKILL.md`. It uses the REST API with `response_format=b64_json` for agent-friendly base64 image responses. Copy the skill directory to your OpenClaw skills folder or publish to ClawHub.
512
+
513
+ ### Base64 JSON API Response
514
+
515
+ For AI agents and messaging platforms, use `response_format=b64_json` to get images as JSON:
516
+
517
+ ```bash
518
+ curl -X POST http://localhost:8000/api/generate \
519
+ -H "Content-Type: application/json" \
520
+ -d '{"prompt": "a sunset over mountains", "response_format": "b64_json"}'
521
+ ```
522
+
523
+ Response: `{"image": "<base64 PNG>", "format": "png", "width": 1024, "height": 1024}`
524
+
381
525
  ### Python API
382
526
  ```python
383
527
  from ollamadiffuser.core.models.manager import model_manager
@@ -386,30 +530,59 @@ from ollamadiffuser.core.models.manager import model_manager
386
530
  success = model_manager.load_model("stable-diffusion-1.5")
387
531
  if success:
388
532
  engine = model_manager.loaded_model
389
-
390
- # Generate image
533
+
534
+ # Text-to-image (seed is optional; omit for random)
391
535
  image = engine.generate_image(
392
536
  prompt="a beautiful sunset",
393
537
  width=1024,
394
- height=1024
538
+ height=1024,
539
+ seed=42,
395
540
  )
396
541
  image.save("output.jpg")
542
+
543
+ # Image-to-image
544
+ from PIL import Image
545
+ input_img = Image.open("photo.jpg")
546
+ result = engine.generate_image(
547
+ prompt="watercolor painting",
548
+ image=input_img,
549
+ strength=0.7,
550
+ )
551
+ result.save("img2img_output.jpg")
397
552
  else:
398
553
  print("Failed to load model")
399
554
  ```
400
555
 
401
- ## 📦 Supported Models
556
+ ## 📦 Model Ecosystem
402
557
 
403
558
  ### Base Models
404
- - **Stable Diffusion 1.5**: Classic, reliable, fast
405
- - **Stable Diffusion XL**: High-resolution, detailed
406
- - **Stable Diffusion 3**: Latest architecture
407
- - **FLUX.1**: State-of-the-art quality
559
+ - **Stable Diffusion 1.5**: Classic, reliable, fast (img2img + inpainting)
560
+ - **Stable Diffusion XL**: High-resolution, detailed (img2img + inpainting, scheduler overrides)
561
+ - **Stable Diffusion 3.5**: Medium, Large, and Large Turbo variants
562
+ - **FLUX.1**: schnell, dev, Fill, Canny, Depth pipeline variants
563
+ - **HiDream**: Multi-prompt generation with bfloat16
564
+ - **AnimateDiff**: Video/animation generation
565
+
566
+ ### Next-Generation Models
567
+ - **FLUX.2**: 32B dev and 4B Klein variants from Black Forest Labs
568
+ - **Chinese Models**: CogView4 (Zhipu), Kolors (Kuaishou), Hunyuan-DiT (Tencent), Z-Image (Alibaba)
569
+ - **Efficient Models**: SANA 1.5 (1.6B), PixArt-Sigma (0.6B) -- high quality at low VRAM
570
+ - **Open Models**: AuraFlow (6.8B, Apache 2.0), OmniGen (3.8B, MIT), Lumina 2.0 (2B, Apache 2.0)
571
+
572
+ ### Fast / Turbo Models
573
+ - **SDXL Turbo**: Single-step inference from Stability AI
574
+ - **SDXL Lightning**: 4-step with custom scheduler from ByteDance
575
+ - **Z-Image Turbo**: 8-step turbo from Alibaba
576
+
577
+ ### Community Fine-Tunes
578
+ - **RealVisXL V4**: Photorealistic SDXL, very popular
579
+ - **DreamShaper**: Versatile artistic SD 1.5 model
580
+ - **Realistic Vision V6**: Portrait specialist
408
581
 
409
582
  ### GGUF Quantized Models
410
583
  - **FLUX.1-dev GGUF**: 7 quantization levels (3GB-16GB VRAM)
411
584
  - **Memory Efficient**: Run high-quality models on budget hardware
412
- - **Same API**: Works seamlessly with existing commands
585
+ - **Optional Install**: `pip install "ollamadiffuser[gguf]"`
413
586
 
414
587
  ### ControlNet Models
415
588
  - **SD 1.5 ControlNet**: 4 control types (canny, depth, openpose, scribble)
@@ -421,14 +594,32 @@ else:
421
594
  - **Dynamic Loading**: Load/unload without model restart
422
595
  - **Strength Control**: Adjustable influence (0.1-2.0)
423
596
 
424
- ## ⚙️ Configuration
597
+ ## ⚙️ Architecture
425
598
 
426
- ### Model Configuration
599
+ ### Strategy Pattern Engine
600
+ Each model type has a dedicated strategy class handling loading and generation:
601
+
602
+ ```
603
+ InferenceEngine (facade)
604
+ -> SD15Strategy (512x512, float32 on MPS, img2img, inpainting)
605
+ -> SDXLStrategy (1024x1024, img2img, inpainting, scheduler overrides)
606
+ -> FluxStrategy (schnell/dev/Fill/Canny/Depth, dynamic pipeline class)
607
+ -> SD3Strategy (1024x1024, 28 steps, guidance=3.5)
608
+ -> ControlNetStrategy (SD15 + SDXL base models)
609
+ -> VideoStrategy (AnimateDiff, 16 frames)
610
+ -> HiDreamStrategy (bfloat16, multi-prompt)
611
+ -> GGUFStrategy (quantized via stable-diffusion-cpp)
612
+ -> GenericPipelineStrategy (any diffusers pipeline via config)
613
+ ```
614
+
615
+ The `GenericPipelineStrategy` dynamically loads any `diffusers` pipeline class specified in the model registry, so new models can be added with zero code changes.
616
+
617
+ ### Configuration
427
618
  Models are automatically configured with optimal settings:
428
619
  - **Memory Optimization**: Attention slicing, CPU offloading
429
620
  - **Device Detection**: Automatic CUDA/MPS/CPU selection
430
- - **Precision Handling**: FP16/BF16 support for efficiency
431
- - **Safety Features**: NSFW filter bypass for creative freedom
621
+ - **Precision Handling**: FP16/BF16 per model type
622
+ - **Safety Disabled**: Unified `SAFETY_DISABLED_KWARGS` (no monkey-patching)
432
623
 
433
624
  ## 🔧 Advanced Usage
434
625
 
@@ -503,7 +694,7 @@ with open("control.jpg", "rb") as f:
503
694
  ### Minimum Requirements
504
695
  - **RAM**: 8GB system RAM
505
696
  - **Storage**: 10GB free space
506
- - **Python**: 3.8+
697
+ - **Python**: 3.10+
507
698
 
508
699
  ### Recommended Hardware
509
700
 
@@ -512,6 +703,12 @@ with open("control.jpg", "rb") as f:
512
703
  - **RAM**: 16GB+ system RAM
513
704
  - **Storage**: SSD with 50GB+ free space
514
705
 
706
+ #### For Apple Silicon (Mac Mini / MacBook)
707
+ - **16GB unified memory**: PixArt-Sigma, SANA 1.5, DreamShaper, SD 1.5/XL, GGUF q2k-q5ks
708
+ - **24GB+ unified memory**: CogView4, Kolors, Lumina 2.0, GGUF q6k-q8
709
+ - **GGUF with Metal**: Install with `CMAKE_ARGS="-DSD_METAL=ON"` for GPU acceleration
710
+ - Run `ollamadiffuser recommend` to see what fits your hardware
711
+
515
712
  #### For GGUF Models (Memory Efficient)
516
713
  - **GPU**: 3GB+ VRAM (or CPU only)
517
714
  - **RAM**: 8GB+ system RAM (16GB+ for CPU inference)
@@ -519,7 +716,7 @@ with open("control.jpg", "rb") as f:
519
716
 
520
717
  ### Supported Platforms
521
718
  - **CUDA**: NVIDIA GPUs (recommended)
522
- - **MPS**: Apple Silicon (M1/M2/M3)
719
+ - **MPS**: Apple Silicon (M1/M2/M3/M4) -- native support for 30+ models including GGUF
523
720
  - **CPU**: All platforms (slower but functional)
524
721
 
525
722
  ## 🔧 Troubleshooting
@@ -550,7 +747,7 @@ pip install 'ollamadiffuser[full]'
550
747
  #### GGUF Support Issues
551
748
  ```bash
552
749
  # Install GGUF dependencies
553
- pip install stable-diffusion-cpp-python gguf
750
+ pip install "ollamadiffuser[gguf]"
554
751
 
555
752
  # Check GGUF support
556
753
  ollamadiffuser registry check-gguf
@@ -689,9 +886,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
689
886
  ## 🙏 Acknowledgments
690
887
 
691
888
  - **Stability AI**: For Stable Diffusion models
692
- - **Black Forest Labs**: For FLUX.1 models
889
+ - **Black Forest Labs**: For FLUX.1 and FLUX.2 models
890
+ - **Alibaba (Tongyi-MAI)**: For Z-Image Turbo
891
+ - **NVIDIA (Efficient-Large-Model)**: For SANA 1.5
892
+ - **Zhipu AI (THUDM)**: For CogView4
893
+ - **Kuaishou (Kwai-Kolors)**: For Kolors
894
+ - **Tencent (Hunyuan)**: For Hunyuan-DiT
895
+ - **Alpha-VLLM**: For Lumina 2.0
896
+ - **PixArt-alpha**: For PixArt-Sigma
897
+ - **Fal**: For AuraFlow
898
+ - **BAAI (Shitao)**: For OmniGen
899
+ - **ByteDance**: For SDXL Lightning
693
900
  - **city96**: For FLUX.1-dev GGUF quantizations
694
901
  - **Hugging Face**: For model hosting and diffusers library
902
+ - **Anthropic**: For Model Context Protocol (MCP)
903
+ - **OpenClaw**: For AI agent ecosystem integration
695
904
  - **ControlNet Team**: For ControlNet architecture
696
905
  - **Community**: For feedback and contributions
697
906