ollamadiffuser 1.2.2__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. ollamadiffuser/__init__.py +1 -1
  2. ollamadiffuser/api/server.py +312 -312
  3. ollamadiffuser/cli/config_commands.py +119 -0
  4. ollamadiffuser/cli/lora_commands.py +169 -0
  5. ollamadiffuser/cli/main.py +85 -1233
  6. ollamadiffuser/cli/model_commands.py +664 -0
  7. ollamadiffuser/cli/recommend_command.py +205 -0
  8. ollamadiffuser/cli/registry_commands.py +197 -0
  9. ollamadiffuser/core/config/model_registry.py +562 -11
  10. ollamadiffuser/core/config/settings.py +24 -2
  11. ollamadiffuser/core/inference/__init__.py +5 -0
  12. ollamadiffuser/core/inference/base.py +182 -0
  13. ollamadiffuser/core/inference/engine.py +204 -1405
  14. ollamadiffuser/core/inference/strategies/__init__.py +1 -0
  15. ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
  16. ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
  17. ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
  18. ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
  19. ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
  20. ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
  21. ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
  22. ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
  23. ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
  24. ollamadiffuser/mcp/__init__.py +0 -0
  25. ollamadiffuser/mcp/server.py +184 -0
  26. ollamadiffuser/ui/templates/index.html +62 -1
  27. ollamadiffuser/ui/web.py +116 -54
  28. {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/METADATA +337 -108
  29. ollamadiffuser-2.0.0.dist-info/RECORD +61 -0
  30. {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/WHEEL +1 -1
  31. {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/entry_points.txt +1 -0
  32. ollamadiffuser/core/models/registry.py +0 -384
  33. ollamadiffuser/ui/samples/.DS_Store +0 -0
  34. ollamadiffuser-1.2.2.dist-info/RECORD +0 -45
  35. {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/licenses/LICENSE +0 -0
  36. {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ollamadiffuser
3
- Version: 1.2.2
4
- Summary: 🎨 Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX.1, and LoRA support
3
+ Version: 2.0.0
4
+ Summary: Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX, and LoRA support
5
5
  Home-page: https://github.com/ollamadiffuser/ollamadiffuser
6
6
  Author: OllamaDiffuser Team
7
7
  Author-email: OllamaDiffuser Team <ollamadiffuser@gmail.com>
@@ -14,7 +14,7 @@ Project-URL: Documentation, https://www.ollamadiffuser.com/
14
14
  Project-URL: Bug Reports, https://github.com/ollamadiffuser/ollamadiffuser/issues
15
15
  Project-URL: Feature Requests, https://github.com/ollamadiffuser/ollamadiffuser/issues
16
16
  Project-URL: Source Code, https://github.com/ollamadiffuser/ollamadiffuser
17
- Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli
17
+ Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli,img2img,inpainting,mcp,openclaw
18
18
  Classifier: Development Status :: 4 - Beta
19
19
  Classifier: Intended Audience :: Developers
20
20
  Classifier: Intended Audience :: End Users/Desktop
@@ -33,40 +33,64 @@ Classifier: Environment :: Web Environment
33
33
  Requires-Python: >=3.10
34
34
  Description-Content-Type: text/markdown
35
35
  License-File: LICENSE
36
- Requires-Dist: torch>=2.1.0
37
- Requires-Dist: diffusers>=0.26.0
38
- Requires-Dist: transformers>=4.35.0
39
- Requires-Dist: accelerate>=0.25.0
40
- Requires-Dist: fastapi>=0.104.0
36
+ Requires-Dist: torch>=2.4.0
37
+ Requires-Dist: diffusers>=0.34.0
38
+ Requires-Dist: transformers>=4.40.0
39
+ Requires-Dist: accelerate>=1.0.0
40
+ Requires-Dist: fastapi>=0.110.0
41
41
  Requires-Dist: uvicorn>=0.23.0
42
- Requires-Dist: huggingface-hub>=0.16.0
43
- Requires-Dist: Pillow>=9.0.0
42
+ Requires-Dist: huggingface-hub>=0.25.0
43
+ Requires-Dist: Pillow>=10.0.0
44
44
  Requires-Dist: click>=8.0.0
45
45
  Requires-Dist: rich>=13.0.0
46
46
  Requires-Dist: pydantic>=2.0.0
47
47
  Requires-Dist: protobuf>=3.20.0
48
48
  Requires-Dist: sentencepiece>=0.1.99
49
- Requires-Dist: safetensors>=0.3.0
49
+ Requires-Dist: safetensors>=0.4.0
50
50
  Requires-Dist: python-multipart>=0.0.0
51
51
  Requires-Dist: psutil>=5.9.0
52
52
  Requires-Dist: jinja2>=3.0.0
53
- Requires-Dist: peft>=0.10.0
54
- Requires-Dist: numpy>=1.21.0
53
+ Requires-Dist: peft>=0.13.0
54
+ Requires-Dist: numpy>=1.26.0
55
55
  Requires-Dist: controlnet-aux>=0.0.7
56
56
  Requires-Dist: opencv-python>=4.8.0
57
- Requires-Dist: stable-diffusion-cpp-python>=0.1.0
58
- Requires-Dist: gguf>=0.1.0
57
+ Requires-Dist: requests>=2.28.0
58
+ Requires-Dist: PyYAML>=6.0
59
+ Provides-Extra: gguf
60
+ Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "gguf"
61
+ Requires-Dist: gguf>=0.1.0; extra == "gguf"
62
+ Provides-Extra: full
63
+ Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "full"
64
+ Requires-Dist: gguf>=0.1.0; extra == "full"
65
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "full"
66
+ Provides-Extra: mcp
67
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
68
+ Provides-Extra: openclaw
69
+ Requires-Dist: mcp[cli]>=1.0.0; extra == "openclaw"
59
70
  Provides-Extra: dev
60
71
  Requires-Dist: pytest>=7.0.0; extra == "dev"
61
72
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
73
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
74
+ Requires-Dist: httpx>=0.24.0; extra == "dev"
62
75
  Requires-Dist: black>=23.0.0; extra == "dev"
63
76
  Requires-Dist: isort>=5.12.0; extra == "dev"
64
77
  Requires-Dist: flake8>=6.0.0; extra == "dev"
78
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
65
79
  Dynamic: author
66
80
  Dynamic: home-page
67
81
  Dynamic: license-file
68
82
  Dynamic: requires-python
69
83
 
84
+ ### ⚠️ Project Status: Maintenance Mode
85
+
86
+ **Thank you for the incredible support and over 5,000 downloads!**
87
+
88
+ Please be aware that `ollamadiffuser` is currently in **maintenance mode**. Due to the creator's other professional commitments, active feature development has been paused.
89
+
90
+ The project in its current state is stable and will remain available for use. However, new features will not be added, and non-critical issues may not be addressed in the near future.
91
+
92
+ This project laid the foundation for a more ambitious vision: **[LocalKinAI](https://github.com/LocalKinAI)**. Thank you for being part of the journey.
93
+
70
94
  # OllamaDiffuser 🎨
71
95
 
72
96
  [![PyPI version](https://badge.fury.io/py/ollamadiffuser.svg)](https://badge.fury.io/py/ollamadiffuser)
@@ -76,102 +100,93 @@ Dynamic: requires-python
76
100
 
77
101
  ## Local AI Image Generation with OllamaDiffuser
78
102
 
79
- **OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX.1**, and other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
103
+ **OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX**, **CogView4**, **Kolors**, **SANA**, **PixArt-Sigma**, and 40+ other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
80
104
 
81
105
  🌐 **Website**: [ollamadiffuser.com](https://www.ollamadiffuser.com/) | 📦 **PyPI**: [pypi.org/project/ollamadiffuser](https://pypi.org/project/ollamadiffuser/)
82
106
 
83
- ---
84
-
85
- ## 🔑 Hugging Face Authentication
86
-
87
- **Do you need a Hugging Face token?** It depends on which models you want to use!
88
-
89
- ### 🟢 Models that DON'T require a token:
90
- - **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
91
- - **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
92
- - **Most ControlNet models** - Generally public access ✅
107
+ > **Upgrading from v1.x?** v2.0 is a major rewrite requiring **Python 3.10+**. Run `pip install --upgrade "ollamadiffuser[full]"` and see the [Migration Guide](#-migration-guide) below.
93
108
 
94
- ### 🟡 Models that DO require a token:
95
- - **FLUX.1-dev** - Requires HF token and license agreement ⚠️
96
- - **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
97
- - **Some premium LoRAs** - Gated models from Hugging Face ⚠️
109
+ ---
98
110
 
99
- ### 🚀 Quick Setup
111
+ ## 🚀 Quick Start (v2.0)
100
112
 
101
- **For basic usage** (no token needed):
113
+ **For Mac/PC Users:**
102
114
  ```bash
103
- # These work immediately without any setup:
104
- ollamadiffuser pull flux.1-schnell
105
- ollamadiffuser pull stable-diffusion-1.5
115
+ pip install "ollamadiffuser[full]"
116
+ ollamadiffuser recommend # Find which models fit your GPU
106
117
  ```
107
118
 
108
- **For advanced models** (token required):
119
+ **For OpenClaw/Agent Users:**
109
120
  ```bash
110
- # 1. Set your token
111
- export HF_TOKEN=your_token_here
121
+ pip install "ollamadiffuser[mcp]"
122
+ ollamadiffuser mcp # Starts the MCP server
123
+ ```
112
124
 
113
- # 2. Now you can access gated models
114
- ollamadiffuser pull flux.1-dev
115
- ollamadiffuser pull stable-diffusion-3.5-medium
125
+ **For Low-VRAM / Budget GPU Users:**
126
+ ```bash
127
+ pip install "ollamadiffuser[gguf]"
128
+ ollamadiffuser pull flux.1-dev-gguf-q4ks # Only 6GB VRAM needed
129
+ ollamadiffuser run flux.1-dev-gguf-q4ks
116
130
  ```
117
131
 
118
- ### 🔧 How to get a Hugging Face token:
119
-
120
- 1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
121
- 2. **Generate token**: Go to Settings → Access Tokens → Create new token
122
- 3. **Accept licenses**: Visit the model pages and accept license agreements:
123
- - [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
124
- - [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
125
- 4. **Set environment variable**:
126
- ```bash
127
- # Temporary (current session)
128
- export HF_TOKEN=your_token_here
129
-
130
- # Permanent (add to ~/.bashrc or ~/.zshrc)
131
- echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
132
- ```
133
-
134
- ### 💡 Pro Tips:
135
- - **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
136
- - **Token scope**: Use "read" permissions for downloading models
137
- - **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
138
- - **Troubleshooting**: If downloads fail, verify your token and model access permissions
132
+ Most models work **without any token** -- just install and go. See [Hugging Face Authentication](#-hugging-face-authentication) when you want gated models like FLUX.1-dev or SD 3.5.
139
133
 
140
134
  ---
141
135
 
142
136
  ## ✨ Features
143
137
 
144
- - **🚀 Fast Startup**: Instant application launch with lazy loading architecture
138
+ - **🏗️ Strategy Architecture**: Clean per-model strategy pattern (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF, Generic)
139
+ - **🌐 40+ Models**: FLUX.2, SD 3.5, SDXL Lightning, CogView4, Kolors, SANA, PixArt-Sigma, and more
140
+ - **🔌 Generic Pipeline**: Add new diffusers models via registry config alone -- no code changes needed
141
+ - **🖼️ img2img & Inpainting**: Image-to-image and inpainting support across SD1.5, SDXL, and the API/Web UI
142
+ - **⚡ Async API**: Non-blocking FastAPI server using `asyncio.to_thread` for GPU operations
143
+ - **🎲 Random Seeds**: Reproducible generation with explicit seeds, random by default
145
144
  - **🎛️ ControlNet Support**: Precise image generation control with 10+ control types
146
145
  - **🔄 LoRA Integration**: Dynamic LoRA loading and management
147
- - **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!)
146
+ - **🔌 MCP & OpenClaw**: Model Context Protocol server for AI assistant integration (OpenClaw, Claude Code, Cursor)
147
+ - **🍎 Apple Silicon**: MPS dtype safety, GGUF Metal acceleration, `ollamadiffuser recommend` for hardware-aware model suggestions
148
+ - **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!) with CUDA and Metal acceleration
148
149
  - **🌐 Multiple Interfaces**: CLI, Python API, Web UI, and REST API
149
150
  - **📦 Model Management**: Easy installation and switching between models
150
151
  - **⚡ Performance Optimized**: Memory-efficient with GPU acceleration
151
- - **🎨 Professional Results**: High-quality image generation with fine-tuned control
152
-
153
- ## 🚀 Quick Start
152
+ - **🧪 Test Suite**: 82 tests across settings, registry, engine, API, MPS, and MCP
154
153
 
155
154
  ### Option 1: Install from PyPI (Recommended)
156
155
  ```bash
157
156
  # Install from PyPI
158
157
  pip install ollamadiffuser
159
158
 
160
- # Pull and run a model (4-command setup)
159
+ # Pull and run a model
161
160
  ollamadiffuser pull flux.1-schnell
162
161
  ollamadiffuser run flux.1-schnell
163
162
 
164
- # Generate via API
163
+ # Generate via API (seed is optional for reproducibility)
165
164
  curl -X POST http://localhost:8000/api/generate \
166
165
  -H "Content-Type: application/json" \
167
- -d '{"prompt": "A beautiful sunset"}' \
166
+ -d '{"prompt": "A beautiful sunset", "seed": 12345}' \
168
167
  --output image.png
169
168
  ```
170
169
 
170
+ ### 🔄 Update to Latest Version
171
+
172
+ **Always use the latest version** for the newest features and bug fixes:
173
+
174
+ ```bash
175
+ # Update to latest version
176
+ pip uninstall ollamadiffuser
177
+ pip install --no-cache-dir ollamadiffuser
178
+ ```
179
+
180
+ This ensures you get:
181
+ - 🐛 **Latest bug fixes**
182
+ - ✨ **New features and improvements**
183
+ - 🚀 **Performance optimizations**
184
+ - 🔒 **Security updates**
185
+
171
186
  ### GGUF Quick Start (Low VRAM)
172
187
  ```bash
173
188
  # For systems with limited VRAM (3GB+)
174
- pip install ollamadiffuser stable-diffusion-cpp-python gguf
189
+ pip install "ollamadiffuser[gguf]"
175
190
 
176
191
  # Download memory-efficient GGUF model
177
192
  ollamadiffuser pull flux.1-dev-gguf-q4ks
@@ -180,6 +195,22 @@ ollamadiffuser pull flux.1-dev-gguf-q4ks
180
195
  ollamadiffuser run flux.1-dev-gguf-q4ks
181
196
  ```
182
197
 
198
+ ### Apple Silicon Quick Start (Mac Mini / MacBook)
199
+ ```bash
200
+ # See which models fit your Mac
201
+ ollamadiffuser recommend
202
+
203
+ # Best lightweight model (0.6B, <6GB)
204
+ ollamadiffuser pull pixart-sigma
205
+ ollamadiffuser run pixart-sigma
206
+
207
+ # GGUF with Metal acceleration (6GB, great quality)
208
+ pip install "ollamadiffuser[gguf]"
209
+ CMAKE_ARGS="-DSD_METAL=ON" pip install stable-diffusion-cpp-python
210
+ ollamadiffuser pull flux.1-dev-gguf-q4ks
211
+ ollamadiffuser run flux.1-dev-gguf-q4ks
212
+ ```
213
+
183
214
  ### Option 2: Development Installation
184
215
  ```bash
185
216
  # Clone the repository
@@ -229,21 +260,92 @@ curl -X POST http://localhost:8000/api/generate/controlnet \
229
260
 
230
261
  ---
231
262
 
232
- ## 🎯 Supported Models
263
+ ## 🔑 Hugging Face Authentication
264
+
265
+ **Do you need a Hugging Face token?** It depends on which models you want to use!
233
266
 
234
- Choose from a variety of state-of-the-art image generation models:
267
+ **Models that DON'T require a token** -- ready to use right away:
268
+ - FLUX.1-schnell, Stable Diffusion 1.5, DreamShaper, PixArt-Sigma, SANA 1.5, most ControlNet models
235
269
 
236
- | Model | License | Quality | Speed | Commercial Use | VRAM |
237
- |-------|---------|---------|-------|----------------|------|
238
- | **FLUX.1-schnell** | Apache 2.0 | High | **4 steps** (12x faster) | ✅ Commercial OK | 20GB+ |
239
- | **FLUX.1-dev** | Non-commercial | High | 50 steps | ❌ Non-commercial | 20GB+ |
240
- | **FLUX.1-dev-gguf** | Non-commercial | High | 4 steps | ❌ Non-commercial | **3-16GB** |
241
- | **Stable Diffusion 3.5** | CreativeML | Medium | 28 steps | ⚠️ Check License | 12GB+ |
242
- | **Stable Diffusion 1.5** | CreativeML | Fast | Lightweight | ⚠️ Check License | 6GB+ |
270
+ **Models that DO require a token:**
271
+ - FLUX.1-dev, Stable Diffusion 3.5, some premium LoRAs
272
+
273
+ **Setup** (only needed for gated models):
274
+ ```bash
275
+ # 1. Create account at https://huggingface.co and generate an access token
276
+ # 2. Accept license on the model page (e.g. FLUX.1-dev, SD 3.5)
277
+ # 3. Set your token
278
+ export HF_TOKEN=your_token_here
279
+
280
+ # 4. Now you can access gated models
281
+ ollamadiffuser pull flux.1-dev
282
+ ollamadiffuser pull stable-diffusion-3.5-medium
283
+ ```
284
+
285
+ > **Tips:** Use "read" permissions for the token. Your token stays local -- never shared with OllamaDiffuser servers. Add `export HF_TOKEN=...` to `~/.bashrc` or `~/.zshrc` to make it permanent.
286
+
287
+ ---
288
+
289
+ ## 🎯 Supported Models
290
+
291
+ Choose from 40+ models spanning every major architecture:
292
+
293
+ ### Core Models
294
+
295
+ | Model | Type | Steps | VRAM | Commercial | License |
296
+ |-------|------|-------|------|------------|---------|
297
+ | `flux.1-schnell` | flux | 4 | 16GB+ | ✅ | Apache 2.0 |
298
+ | `flux.1-dev` | flux | 20 | 20GB+ | ❌ | Non-commercial |
299
+ | `stable-diffusion-3.5-medium` | sd3 | 28 | 8GB+ | ⚠️ | Stability AI |
300
+ | `stable-diffusion-3.5-large` | sd3 | 28 | 12GB+ | ⚠️ | Stability AI |
301
+ | `stable-diffusion-3.5-large-turbo` | sd3 | 4 | 12GB+ | ⚠️ | Stability AI |
302
+ | `stable-diffusion-xl-base` | sdxl | 50 | 6GB+ | ⚠️ | CreativeML |
303
+ | `stable-diffusion-1.5` | sd15 | 50 | 4GB+ | ⚠️ | CreativeML |
304
+
305
+ ### Next-Generation Models
306
+
307
+ | Model | Origin | Params | Steps | VRAM | Commercial | License |
308
+ |-------|--------|--------|-------|------|------------|---------|
309
+ | `flux.2-dev` | Black Forest Labs | 32B | 28 | 14GB+ | ❌ | Non-commercial |
310
+ | `flux.2-klein-4b` | Black Forest Labs | 4B | 28 | 10GB+ | ✅ | Apache 2.0 |
311
+ | `z-image-turbo` | Alibaba (Tongyi) | 6B | 8 | 10GB+ | ✅ | Apache 2.0 |
312
+ | `sana-1.5` | NVIDIA | 1.6B | 20 | 8GB+ | ✅ | Apache 2.0 |
313
+ | `cogview4` | Zhipu AI | 6B | 50 | 12GB+ | ✅ | Apache 2.0 |
314
+ | `kolors` | Kuaishou | 8.6B | 50 | 8GB+ | ✅ | Kolors License |
315
+ | `hunyuan-dit` | Tencent | 1.5B | 50 | 6GB+ | ✅ | Tencent Community |
316
+ | `lumina-2` | Alpha-VLLM | 2B | 30 | 8GB+ | ✅ | Apache 2.0 |
317
+ | `pixart-sigma` | PixArt | 0.6B | 20 | 6GB+ | ✅ | Open |
318
+ | `auraflow` | Fal | 6.8B | 50 | 12GB+ | ✅ | Apache 2.0 |
319
+ | `omnigen` | BAAI | 3.8B | 50 | 12GB+ | ✅ | MIT |
320
+
321
+ ### Fast / Turbo Models
322
+
323
+ | Model | Steps | VRAM | Notes |
324
+ |-------|-------|------|-------|
325
+ | `sdxl-turbo` | 1 | 6GB+ | Single-step distilled SDXL |
326
+ | `sdxl-lightning-4step` | 4 | 6GB+ | ByteDance, custom scheduler |
327
+ | `stable-diffusion-3.5-large-turbo` | 4 | 12GB+ | Distilled SD 3.5 Large |
328
+ | `z-image-turbo` | 8 | 10GB+ | Alibaba 6B turbo |
329
+
330
+ ### Community Fine-Tunes
331
+
332
+ | Model | Base | Notes |
333
+ |-------|------|-------|
334
+ | `realvisxl-v4` | SDXL | Photorealistic, very popular |
335
+ | `dreamshaper` | SD 1.5 | Versatile artistic model |
336
+ | `realistic-vision-v6` | SD 1.5 | Portrait specialist |
337
+
338
+ ### FLUX Pipeline Variants
339
+
340
+ | Model | Pipeline | Use Case |
341
+ |-------|----------|----------|
342
+ | `flux.1-fill-dev` | FluxFillPipeline | Inpainting / outpainting |
343
+ | `flux.1-canny-dev` | FluxControlPipeline | Canny edge control |
344
+ | `flux.1-depth-dev` | FluxControlPipeline | Depth map control |
243
345
 
244
346
  ### 💾 GGUF Models - Reduced Memory Requirements
245
347
 
246
- **NEW**: GGUF quantized models enable running FLUX.1-dev on budget hardware!
348
+ GGUF quantized models enable running FLUX.1-dev on budget hardware:
247
349
 
248
350
  | GGUF Variant | VRAM | Quality | Best For |
249
351
  |--------------|------|---------|----------|
@@ -254,11 +356,6 @@ Choose from a variety of state-of-the-art image generation models:
254
356
 
255
357
  📖 **[Complete GGUF Guide](GGUF_GUIDE.md)** - Hardware recommendations, installation, and optimization tips
256
358
 
257
- ### Why Choose FLUX.1-schnell?
258
- - **Apache 2.0 license** - Perfect for commercial use
259
- - **4-step generation** - Lightning fast results
260
- - **Commercial OK** - Use in your business
261
-
262
359
  ---
263
360
 
264
361
  ## 🎛️ ControlNet Features
@@ -319,6 +416,16 @@ ollamadiffuser lora unload
319
416
  ollamadiffuser pull stable-diffusion-1.5
320
417
  ollamadiffuser run stable-diffusion-1.5
321
418
 
419
+ # Model registry management
420
+ ollamadiffuser registry list
421
+ ollamadiffuser registry list --installed-only
422
+ ollamadiffuser registry check-gguf
423
+
424
+ # Configuration management
425
+ ollamadiffuser config # show all config
426
+ ollamadiffuser config set models_dir /mnt/ssd/models # custom model path
427
+ ollamadiffuser config set server.port 9000 # change server port
428
+
322
429
  # In another terminal, generate images via API
323
430
  curl -X POST http://localhost:8000/api/generate \
324
431
  -H "Content-Type: application/json" \
@@ -350,18 +457,75 @@ Features:
350
457
  ```bash
351
458
  # Start API server
352
459
  ollamadiffuser --mode api
353
-
354
460
  ollamadiffuser load stable-diffusion-1.5
355
461
 
356
- # Generate image
462
+ # Text-to-image
357
463
  curl -X POST http://localhost:8000/api/generate \
358
464
  -H "Content-Type: application/json" \
359
- -d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024}'
465
+ -d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024, "seed": 42}'
466
+
467
+ # Image-to-image
468
+ curl -X POST http://localhost:8000/api/generate/img2img \
469
+ -F "prompt=oil painting style" \
470
+ -F "strength=0.75" \
471
+ -F "image=@input.png" \
472
+ --output result.png
473
+
474
+ # Inpainting
475
+ curl -X POST http://localhost:8000/api/generate/inpaint \
476
+ -F "prompt=a red car" \
477
+ -F "image=@photo.png" \
478
+ -F "mask=@mask.png" \
479
+ --output inpainted.png
480
+
481
+ # API docs: http://localhost:8000/docs
482
+ ```
360
483
 
361
- # API document
362
- http://localhost:8000/docs
484
+ ### MCP Server (AI Assistant Integration)
485
+
486
+ OllamaDiffuser includes a [Model Context Protocol](https://modelcontextprotocol.io/) server for integration with AI assistants like OpenClaw, Claude Code, and Cursor.
487
+
488
+ ```bash
489
+ # Install MCP support
490
+ pip install "ollamadiffuser[mcp]"
491
+
492
+ # Start MCP server (stdio transport)
493
+ ollamadiffuser mcp
363
494
  ```
364
495
 
496
+ **MCP client configuration** (e.g. `claude_desktop_config.json`):
497
+ ```json
498
+ {
499
+ "mcpServers": {
500
+ "ollamadiffuser": {
501
+ "command": "ollamadiffuser-mcp"
502
+ }
503
+ }
504
+ }
505
+ ```
506
+
507
+ **Available MCP tools:**
508
+ - `generate_image` -- Generate images from text prompts (auto-loads model)
509
+ - `list_models` -- List available and installed models
510
+ - `load_model` -- Load a model into memory
511
+ - `get_status` -- Check device, loaded model, and system status
512
+
513
+ ### OpenClaw AgentSkill
514
+
515
+ An [OpenClaw](https://github.com/openclaw/openclaw) skill is included at `integrations/openclaw/SKILL.md`. It uses the REST API with `response_format=b64_json` for agent-friendly base64 image responses. Copy the skill directory to your OpenClaw skills folder or publish to ClawHub.
516
+
517
+ ### Base64 JSON API Response
518
+
519
+ For AI agents and messaging platforms, use `response_format=b64_json` to get images as JSON:
520
+
521
+ ```bash
522
+ curl -X POST http://localhost:8000/api/generate \
523
+ -H "Content-Type: application/json" \
524
+ -d '{"prompt": "a sunset over mountains", "response_format": "b64_json"}'
525
+ ```
526
+
527
+ Response: `{"image": "<base64 PNG>", "format": "png", "width": 1024, "height": 1024}`
528
+
365
529
  ### Python API
366
530
  ```python
367
531
  from ollamadiffuser.core.models.manager import model_manager
@@ -370,30 +534,59 @@ from ollamadiffuser.core.models.manager import model_manager
370
534
  success = model_manager.load_model("stable-diffusion-1.5")
371
535
  if success:
372
536
  engine = model_manager.loaded_model
373
-
374
- # Generate image
537
+
538
+ # Text-to-image (seed is optional; omit for random)
375
539
  image = engine.generate_image(
376
540
  prompt="a beautiful sunset",
377
541
  width=1024,
378
- height=1024
542
+ height=1024,
543
+ seed=42,
379
544
  )
380
545
  image.save("output.jpg")
546
+
547
+ # Image-to-image
548
+ from PIL import Image
549
+ input_img = Image.open("photo.jpg")
550
+ result = engine.generate_image(
551
+ prompt="watercolor painting",
552
+ image=input_img,
553
+ strength=0.7,
554
+ )
555
+ result.save("img2img_output.jpg")
381
556
  else:
382
557
  print("Failed to load model")
383
558
  ```
384
559
 
385
- ## 📦 Supported Models
560
+ ## 📦 Model Ecosystem
386
561
 
387
562
  ### Base Models
388
- - **Stable Diffusion 1.5**: Classic, reliable, fast
389
- - **Stable Diffusion XL**: High-resolution, detailed
390
- - **Stable Diffusion 3**: Latest architecture
391
- - **FLUX.1**: State-of-the-art quality
563
+ - **Stable Diffusion 1.5**: Classic, reliable, fast (img2img + inpainting)
564
+ - **Stable Diffusion XL**: High-resolution, detailed (img2img + inpainting, scheduler overrides)
565
+ - **Stable Diffusion 3.5**: Medium, Large, and Large Turbo variants
566
+ - **FLUX.1**: schnell, dev, Fill, Canny, Depth pipeline variants
567
+ - **HiDream**: Multi-prompt generation with bfloat16
568
+ - **AnimateDiff**: Video/animation generation
569
+
570
+ ### Next-Generation Models
571
+ - **FLUX.2**: 32B dev and 4B Klein variants from Black Forest Labs
572
+ - **Chinese Models**: CogView4 (Zhipu), Kolors (Kuaishou), Hunyuan-DiT (Tencent), Z-Image (Alibaba)
573
+ - **Efficient Models**: SANA 1.5 (1.6B), PixArt-Sigma (0.6B) -- high quality at low VRAM
574
+ - **Open Models**: AuraFlow (6.8B, Apache 2.0), OmniGen (3.8B, MIT), Lumina 2.0 (2B, Apache 2.0)
575
+
576
+ ### Fast / Turbo Models
577
+ - **SDXL Turbo**: Single-step inference from Stability AI
578
+ - **SDXL Lightning**: 4-step with custom scheduler from ByteDance
579
+ - **Z-Image Turbo**: 8-step turbo from Alibaba
580
+
581
+ ### Community Fine-Tunes
582
+ - **RealVisXL V4**: Photorealistic SDXL, very popular
583
+ - **DreamShaper**: Versatile artistic SD 1.5 model
584
+ - **Realistic Vision V6**: Portrait specialist
392
585
 
393
586
  ### GGUF Quantized Models
394
587
  - **FLUX.1-dev GGUF**: 7 quantization levels (3GB-16GB VRAM)
395
588
  - **Memory Efficient**: Run high-quality models on budget hardware
396
- - **Same API**: Works seamlessly with existing commands
589
+ - **Optional Install**: `pip install "ollamadiffuser[gguf]"`
397
590
 
398
591
  ### ControlNet Models
399
592
  - **SD 1.5 ControlNet**: 4 control types (canny, depth, openpose, scribble)
@@ -405,14 +598,32 @@ else:
405
598
  - **Dynamic Loading**: Load/unload without model restart
406
599
  - **Strength Control**: Adjustable influence (0.1-2.0)
407
600
 
408
- ## ⚙️ Configuration
601
+ ## ⚙️ Architecture
602
+
603
+ ### Strategy Pattern Engine
604
+ Each model type has a dedicated strategy class handling loading and generation:
409
605
 
410
- ### Model Configuration
606
+ ```
607
+ InferenceEngine (facade)
608
+ -> SD15Strategy (512x512, float32 on MPS, img2img, inpainting)
609
+ -> SDXLStrategy (1024x1024, img2img, inpainting, scheduler overrides)
610
+ -> FluxStrategy (schnell/dev/Fill/Canny/Depth, dynamic pipeline class)
611
+ -> SD3Strategy (1024x1024, 28 steps, guidance=3.5)
612
+ -> ControlNetStrategy (SD15 + SDXL base models)
613
+ -> VideoStrategy (AnimateDiff, 16 frames)
614
+ -> HiDreamStrategy (bfloat16, multi-prompt)
615
+ -> GGUFStrategy (quantized via stable-diffusion-cpp)
616
+ -> GenericPipelineStrategy (any diffusers pipeline via config)
617
+ ```
618
+
619
+ The `GenericPipelineStrategy` dynamically loads any `diffusers` pipeline class specified in the model registry, so new models can be added with zero code changes.
620
+
621
+ ### Configuration
411
622
  Models are automatically configured with optimal settings:
412
623
  - **Memory Optimization**: Attention slicing, CPU offloading
413
624
  - **Device Detection**: Automatic CUDA/MPS/CPU selection
414
- - **Precision Handling**: FP16/BF16 support for efficiency
415
- - **Safety Features**: NSFW filter bypass for creative freedom
625
+ - **Precision Handling**: FP16/BF16 per model type
626
+ - **Safety Disabled**: Unified `SAFETY_DISABLED_KWARGS` (no monkey-patching)
416
627
 
417
628
  ## 🔧 Advanced Usage
418
629
 
@@ -487,7 +698,7 @@ with open("control.jpg", "rb") as f:
487
698
  ### Minimum Requirements
488
699
  - **RAM**: 8GB system RAM
489
700
  - **Storage**: 10GB free space
490
- - **Python**: 3.8+
701
+ - **Python**: 3.10+
491
702
 
492
703
  ### Recommended Hardware
493
704
 
@@ -496,6 +707,12 @@ with open("control.jpg", "rb") as f:
496
707
  - **RAM**: 16GB+ system RAM
497
708
  - **Storage**: SSD with 50GB+ free space
498
709
 
710
+ #### For Apple Silicon (Mac Mini / MacBook)
711
+ - **16GB unified memory**: PixArt-Sigma, SANA 1.5, DreamShaper, SD 1.5/XL, GGUF q2k-q5ks
712
+ - **24GB+ unified memory**: CogView4, Kolors, Lumina 2.0, GGUF q6k-q8
713
+ - **GGUF with Metal**: Install with `CMAKE_ARGS="-DSD_METAL=ON"` for GPU acceleration
714
+ - Run `ollamadiffuser recommend` to see what fits your hardware
715
+
499
716
  #### For GGUF Models (Memory Efficient)
500
717
  - **GPU**: 3GB+ VRAM (or CPU only)
501
718
  - **RAM**: 8GB+ system RAM (16GB+ for CPU inference)
@@ -503,7 +720,7 @@ with open("control.jpg", "rb") as f:
503
720
 
504
721
  ### Supported Platforms
505
722
  - **CUDA**: NVIDIA GPUs (recommended)
506
- - **MPS**: Apple Silicon (M1/M2/M3)
723
+ - **MPS**: Apple Silicon (M1/M2/M3/M4) -- native support for 30+ models including GGUF
507
724
  - **CPU**: All platforms (slower but functional)
508
725
 
509
726
  ## 🔧 Troubleshooting
@@ -534,7 +751,7 @@ pip install 'ollamadiffuser[full]'
534
751
  #### GGUF Support Issues
535
752
  ```bash
536
753
  # Install GGUF dependencies
537
- pip install stable-diffusion-cpp-python gguf
754
+ pip install "ollamadiffuser[gguf]"
538
755
 
539
756
  # Check GGUF support
540
757
  ollamadiffuser registry check-gguf
@@ -673,9 +890,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
673
890
  ## 🙏 Acknowledgments
674
891
 
675
892
  - **Stability AI**: For Stable Diffusion models
676
- - **Black Forest Labs**: For FLUX.1 models
893
+ - **Black Forest Labs**: For FLUX.1 and FLUX.2 models
894
+ - **Alibaba (Tongyi-MAI)**: For Z-Image Turbo
895
+ - **NVIDIA (Efficient-Large-Model)**: For SANA 1.5
896
+ - **Zhipu AI (THUDM)**: For CogView4
897
+ - **Kuaishou (Kwai-Kolors)**: For Kolors
898
+ - **Tencent (Hunyuan)**: For Hunyuan-DiT
899
+ - **Alpha-VLLM**: For Lumina 2.0
900
+ - **PixArt-alpha**: For PixArt-Sigma
901
+ - **Fal**: For AuraFlow
902
+ - **BAAI (Shitao)**: For OmniGen
903
+ - **ByteDance**: For SDXL Lightning
677
904
  - **city96**: For FLUX.1-dev GGUF quantizations
678
905
  - **Hugging Face**: For model hosting and diffusers library
906
+ - **Anthropic**: For Model Context Protocol (MCP)
907
+ - **OpenClaw**: For AI agent ecosystem integration
679
908
  - **ControlNet Team**: For ControlNet architecture
680
909
  - **Community**: For feedback and contributions
681
910