ollamadiffuser 1.2.3__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ollamadiffuser/__init__.py +1 -1
- ollamadiffuser/api/server.py +312 -312
- ollamadiffuser/cli/config_commands.py +119 -0
- ollamadiffuser/cli/lora_commands.py +169 -0
- ollamadiffuser/cli/main.py +85 -1233
- ollamadiffuser/cli/model_commands.py +664 -0
- ollamadiffuser/cli/recommend_command.py +205 -0
- ollamadiffuser/cli/registry_commands.py +197 -0
- ollamadiffuser/core/config/model_registry.py +562 -11
- ollamadiffuser/core/config/settings.py +24 -2
- ollamadiffuser/core/inference/__init__.py +5 -0
- ollamadiffuser/core/inference/base.py +182 -0
- ollamadiffuser/core/inference/engine.py +204 -1405
- ollamadiffuser/core/inference/strategies/__init__.py +1 -0
- ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
- ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
- ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
- ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
- ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
- ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
- ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
- ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
- ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
- ollamadiffuser/mcp/__init__.py +0 -0
- ollamadiffuser/mcp/server.py +184 -0
- ollamadiffuser/ui/templates/index.html +62 -1
- ollamadiffuser/ui/web.py +116 -54
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/METADATA +317 -108
- ollamadiffuser-2.0.1.dist-info/RECORD +61 -0
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/WHEEL +1 -1
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/entry_points.txt +1 -0
- ollamadiffuser/core/models/registry.py +0 -384
- ollamadiffuser/ui/samples/.DS_Store +0 -0
- ollamadiffuser-1.2.3.dist-info/RECORD +0 -45
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/licenses/LICENSE +0 -0
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ollamadiffuser
|
|
3
|
-
Version:
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 2.0.1
|
|
4
|
+
Summary: Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX, and LoRA support
|
|
5
5
|
Home-page: https://github.com/ollamadiffuser/ollamadiffuser
|
|
6
6
|
Author: OllamaDiffuser Team
|
|
7
7
|
Author-email: OllamaDiffuser Team <ollamadiffuser@gmail.com>
|
|
@@ -14,7 +14,7 @@ Project-URL: Documentation, https://www.ollamadiffuser.com/
|
|
|
14
14
|
Project-URL: Bug Reports, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
15
15
|
Project-URL: Feature Requests, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
16
16
|
Project-URL: Source Code, https://github.com/ollamadiffuser/ollamadiffuser
|
|
17
|
-
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli
|
|
17
|
+
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli,img2img,inpainting,mcp,openclaw
|
|
18
18
|
Classifier: Development Status :: 4 - Beta
|
|
19
19
|
Classifier: Intended Audience :: Developers
|
|
20
20
|
Classifier: Intended Audience :: End Users/Desktop
|
|
@@ -33,40 +33,60 @@ Classifier: Environment :: Web Environment
|
|
|
33
33
|
Requires-Python: >=3.10
|
|
34
34
|
Description-Content-Type: text/markdown
|
|
35
35
|
License-File: LICENSE
|
|
36
|
-
Requires-Dist: torch>=2.
|
|
37
|
-
Requires-Dist: diffusers>=0.
|
|
38
|
-
Requires-Dist: transformers>=4.
|
|
39
|
-
Requires-Dist: accelerate>=0.
|
|
40
|
-
Requires-Dist: fastapi>=0.
|
|
36
|
+
Requires-Dist: torch>=2.4.0
|
|
37
|
+
Requires-Dist: diffusers>=0.34.0
|
|
38
|
+
Requires-Dist: transformers>=4.40.0
|
|
39
|
+
Requires-Dist: accelerate>=1.0.0
|
|
40
|
+
Requires-Dist: fastapi>=0.110.0
|
|
41
41
|
Requires-Dist: uvicorn>=0.23.0
|
|
42
|
-
Requires-Dist: huggingface-hub>=0.
|
|
43
|
-
Requires-Dist: Pillow>=
|
|
42
|
+
Requires-Dist: huggingface-hub>=0.25.0
|
|
43
|
+
Requires-Dist: Pillow>=10.0.0
|
|
44
44
|
Requires-Dist: click>=8.0.0
|
|
45
45
|
Requires-Dist: rich>=13.0.0
|
|
46
46
|
Requires-Dist: pydantic>=2.0.0
|
|
47
47
|
Requires-Dist: protobuf>=3.20.0
|
|
48
48
|
Requires-Dist: sentencepiece>=0.1.99
|
|
49
|
-
Requires-Dist: safetensors>=0.
|
|
49
|
+
Requires-Dist: safetensors>=0.4.0
|
|
50
50
|
Requires-Dist: python-multipart>=0.0.0
|
|
51
51
|
Requires-Dist: psutil>=5.9.0
|
|
52
52
|
Requires-Dist: jinja2>=3.0.0
|
|
53
|
-
Requires-Dist: peft>=0.
|
|
54
|
-
Requires-Dist: numpy>=1.
|
|
53
|
+
Requires-Dist: peft>=0.17.0
|
|
54
|
+
Requires-Dist: numpy>=1.26.0
|
|
55
55
|
Requires-Dist: controlnet-aux>=0.0.7
|
|
56
56
|
Requires-Dist: opencv-python>=4.8.0
|
|
57
|
-
Requires-Dist:
|
|
58
|
-
Requires-Dist:
|
|
57
|
+
Requires-Dist: requests>=2.28.0
|
|
58
|
+
Requires-Dist: PyYAML>=6.0
|
|
59
|
+
Provides-Extra: gguf
|
|
60
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "gguf"
|
|
61
|
+
Requires-Dist: gguf>=0.1.0; extra == "gguf"
|
|
62
|
+
Provides-Extra: full
|
|
63
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "full"
|
|
64
|
+
Requires-Dist: gguf>=0.1.0; extra == "full"
|
|
65
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "full"
|
|
66
|
+
Provides-Extra: mcp
|
|
67
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
|
|
68
|
+
Provides-Extra: openclaw
|
|
69
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "openclaw"
|
|
59
70
|
Provides-Extra: dev
|
|
60
71
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
61
72
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
73
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
74
|
+
Requires-Dist: httpx>=0.24.0; extra == "dev"
|
|
62
75
|
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
63
76
|
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
64
77
|
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
78
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
65
79
|
Dynamic: author
|
|
66
80
|
Dynamic: home-page
|
|
67
81
|
Dynamic: license-file
|
|
68
82
|
Dynamic: requires-python
|
|
69
83
|
|
|
84
|
+
### Project Status: Active Development
|
|
85
|
+
|
|
86
|
+
**Thank you for the incredible support and over 11,000 downloads!**
|
|
87
|
+
|
|
88
|
+
`ollamadiffuser` is back in **active development**. v2.0 brings a major architecture overhaul, 21 new models, MCP/OpenClaw integration, and Apple Silicon support. Part of the **[LocalKinAI](https://github.com/LocalKinAI)** ecosystem.
|
|
89
|
+
|
|
70
90
|
# OllamaDiffuser 🎨
|
|
71
91
|
|
|
72
92
|
[](https://badge.fury.io/py/ollamadiffuser)
|
|
@@ -76,95 +96,70 @@ Dynamic: requires-python
|
|
|
76
96
|
|
|
77
97
|
## Local AI Image Generation with OllamaDiffuser
|
|
78
98
|
|
|
79
|
-
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX
|
|
99
|
+
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX**, **CogView4**, **Kolors**, **SANA**, **PixArt-Sigma**, and 40+ other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
|
|
80
100
|
|
|
81
101
|
🌐 **Website**: [ollamadiffuser.com](https://www.ollamadiffuser.com/) | 📦 **PyPI**: [pypi.org/project/ollamadiffuser](https://pypi.org/project/ollamadiffuser/)
|
|
82
102
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
## 🔑 Hugging Face Authentication
|
|
86
|
-
|
|
87
|
-
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
103
|
+
> **Upgrading from v1.x?** v2.0 is a major rewrite requiring **Python 3.10+**. Run `pip install --upgrade "ollamadiffuser[full]"` and see the [Migration Guide](#-migration-guide) below.
|
|
88
104
|
|
|
89
|
-
|
|
90
|
-
- **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
|
|
91
|
-
- **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
|
|
92
|
-
- **Most ControlNet models** - Generally public access ✅
|
|
93
|
-
|
|
94
|
-
### 🟡 Models that DO require a token:
|
|
95
|
-
- **FLUX.1-dev** - Requires HF token and license agreement ⚠️
|
|
96
|
-
- **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
|
|
97
|
-
- **Some premium LoRAs** - Gated models from Hugging Face ⚠️
|
|
105
|
+
---
|
|
98
106
|
|
|
99
|
-
|
|
107
|
+
## 🚀 Quick Start (v2.0)
|
|
100
108
|
|
|
101
|
-
**For
|
|
109
|
+
**For Mac/PC Users:**
|
|
102
110
|
```bash
|
|
103
|
-
|
|
104
|
-
ollamadiffuser
|
|
105
|
-
ollamadiffuser pull stable-diffusion-1.5
|
|
111
|
+
pip install "ollamadiffuser[full]"
|
|
112
|
+
ollamadiffuser recommend # Find which models fit your GPU
|
|
106
113
|
```
|
|
107
114
|
|
|
108
|
-
**For
|
|
115
|
+
**For OpenClaw/Agent Users:**
|
|
109
116
|
```bash
|
|
110
|
-
|
|
111
|
-
|
|
117
|
+
pip install "ollamadiffuser[mcp]"
|
|
118
|
+
ollamadiffuser mcp # Starts the MCP server
|
|
119
|
+
```
|
|
112
120
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
121
|
+
**For Low-VRAM / Budget GPU Users:**
|
|
122
|
+
```bash
|
|
123
|
+
pip install "ollamadiffuser[gguf]"
|
|
124
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks # Only 6GB VRAM needed
|
|
125
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
116
126
|
```
|
|
117
127
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
|
|
121
|
-
2. **Generate token**: Go to Settings → Access Tokens → Create new token
|
|
122
|
-
3. **Accept licenses**: Visit the model pages and accept license agreements:
|
|
123
|
-
- [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
|
|
124
|
-
- [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
|
|
125
|
-
4. **Set environment variable**:
|
|
126
|
-
```bash
|
|
127
|
-
# Temporary (current session)
|
|
128
|
-
export HF_TOKEN=your_token_here
|
|
129
|
-
|
|
130
|
-
# Permanent (add to ~/.bashrc or ~/.zshrc)
|
|
131
|
-
echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### 💡 Pro Tips:
|
|
135
|
-
- **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
|
|
136
|
-
- **Token scope**: Use "read" permissions for downloading models
|
|
137
|
-
- **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
|
|
138
|
-
- **Troubleshooting**: If downloads fail, verify your token and model access permissions
|
|
128
|
+
Most models work **without any token** -- just install and go. See [Hugging Face Authentication](#-hugging-face-authentication) when you want gated models like FLUX.1-dev or SD 3.5.
|
|
139
129
|
|
|
140
130
|
---
|
|
141
131
|
|
|
142
132
|
## ✨ Features
|
|
143
133
|
|
|
144
|
-
-
|
|
134
|
+
- **🏗️ Strategy Architecture**: Clean per-model strategy pattern (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF, Generic)
|
|
135
|
+
- **🌐 40+ Models**: FLUX.2, SD 3.5, SDXL Lightning, CogView4, Kolors, SANA, PixArt-Sigma, and more
|
|
136
|
+
- **🔌 Generic Pipeline**: Add new diffusers models via registry config alone -- no code changes needed
|
|
137
|
+
- **🖼️ img2img & Inpainting**: Image-to-image and inpainting support across SD1.5, SDXL, and the API/Web UI
|
|
138
|
+
- **⚡ Async API**: Non-blocking FastAPI server using `asyncio.to_thread` for GPU operations
|
|
139
|
+
- **🎲 Random Seeds**: Reproducible generation with explicit seeds, random by default
|
|
145
140
|
- **🎛️ ControlNet Support**: Precise image generation control with 10+ control types
|
|
146
141
|
- **🔄 LoRA Integration**: Dynamic LoRA loading and management
|
|
147
|
-
-
|
|
142
|
+
- **🔌 MCP & OpenClaw**: Model Context Protocol server for AI assistant integration (OpenClaw, Claude Code, Cursor)
|
|
143
|
+
- **🍎 Apple Silicon**: MPS dtype safety, GGUF Metal acceleration, `ollamadiffuser recommend` for hardware-aware model suggestions
|
|
144
|
+
- **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!) with CUDA and Metal acceleration
|
|
148
145
|
- **🌐 Multiple Interfaces**: CLI, Python API, Web UI, and REST API
|
|
149
146
|
- **📦 Model Management**: Easy installation and switching between models
|
|
150
147
|
- **⚡ Performance Optimized**: Memory-efficient with GPU acceleration
|
|
151
|
-
-
|
|
152
|
-
|
|
153
|
-
## 🚀 Quick Start
|
|
148
|
+
- **🧪 Test Suite**: 82 tests across settings, registry, engine, API, MPS, and MCP
|
|
154
149
|
|
|
155
150
|
### Option 1: Install from PyPI (Recommended)
|
|
156
151
|
```bash
|
|
157
152
|
# Install from PyPI
|
|
158
153
|
pip install ollamadiffuser
|
|
159
154
|
|
|
160
|
-
# Pull and run a model
|
|
155
|
+
# Pull and run a model
|
|
161
156
|
ollamadiffuser pull flux.1-schnell
|
|
162
157
|
ollamadiffuser run flux.1-schnell
|
|
163
158
|
|
|
164
|
-
# Generate via API
|
|
159
|
+
# Generate via API (seed is optional for reproducibility)
|
|
165
160
|
curl -X POST http://localhost:8000/api/generate \
|
|
166
161
|
-H "Content-Type: application/json" \
|
|
167
|
-
-d '{"prompt": "A beautiful sunset"}' \
|
|
162
|
+
-d '{"prompt": "A beautiful sunset", "seed": 12345}' \
|
|
168
163
|
--output image.png
|
|
169
164
|
```
|
|
170
165
|
|
|
@@ -187,7 +182,7 @@ This ensures you get:
|
|
|
187
182
|
### GGUF Quick Start (Low VRAM)
|
|
188
183
|
```bash
|
|
189
184
|
# For systems with limited VRAM (3GB+)
|
|
190
|
-
pip install ollamadiffuser
|
|
185
|
+
pip install "ollamadiffuser[gguf]"
|
|
191
186
|
|
|
192
187
|
# Download memory-efficient GGUF model
|
|
193
188
|
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
@@ -196,6 +191,22 @@ ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
|
196
191
|
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
197
192
|
```
|
|
198
193
|
|
|
194
|
+
### Apple Silicon Quick Start (Mac Mini / MacBook)
|
|
195
|
+
```bash
|
|
196
|
+
# See which models fit your Mac
|
|
197
|
+
ollamadiffuser recommend
|
|
198
|
+
|
|
199
|
+
# Best lightweight model (0.6B, <6GB)
|
|
200
|
+
ollamadiffuser pull pixart-sigma
|
|
201
|
+
ollamadiffuser run pixart-sigma
|
|
202
|
+
|
|
203
|
+
# GGUF with Metal acceleration (6GB, great quality)
|
|
204
|
+
pip install "ollamadiffuser[gguf]"
|
|
205
|
+
CMAKE_ARGS="-DSD_METAL=ON" pip install stable-diffusion-cpp-python
|
|
206
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
207
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
208
|
+
```
|
|
209
|
+
|
|
199
210
|
### Option 2: Development Installation
|
|
200
211
|
```bash
|
|
201
212
|
# Clone the repository
|
|
@@ -245,21 +256,92 @@ curl -X POST http://localhost:8000/api/generate/controlnet \
|
|
|
245
256
|
|
|
246
257
|
---
|
|
247
258
|
|
|
248
|
-
##
|
|
259
|
+
## 🔑 Hugging Face Authentication
|
|
260
|
+
|
|
261
|
+
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
262
|
+
|
|
263
|
+
**Models that DON'T require a token** -- ready to use right away:
|
|
264
|
+
- FLUX.1-schnell, Stable Diffusion 1.5, DreamShaper, PixArt-Sigma, SANA 1.5, most ControlNet models
|
|
265
|
+
|
|
266
|
+
**Models that DO require a token:**
|
|
267
|
+
- FLUX.1-dev, Stable Diffusion 3.5, some premium LoRAs
|
|
268
|
+
|
|
269
|
+
**Setup** (only needed for gated models):
|
|
270
|
+
```bash
|
|
271
|
+
# 1. Create account at https://huggingface.co and generate an access token
|
|
272
|
+
# 2. Accept license on the model page (e.g. FLUX.1-dev, SD 3.5)
|
|
273
|
+
# 3. Set your token
|
|
274
|
+
export HF_TOKEN=your_token_here
|
|
275
|
+
|
|
276
|
+
# 4. Now you can access gated models
|
|
277
|
+
ollamadiffuser pull flux.1-dev
|
|
278
|
+
ollamadiffuser pull stable-diffusion-3.5-medium
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
> **Tips:** Use "read" permissions for the token. Your token stays local -- never shared with OllamaDiffuser servers. Add `export HF_TOKEN=...` to `~/.bashrc` or `~/.zshrc` to make it permanent.
|
|
282
|
+
|
|
283
|
+
---
|
|
249
284
|
|
|
250
|
-
|
|
285
|
+
## 🎯 Supported Models
|
|
251
286
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
|
257
|
-
|
|
258
|
-
|
|
|
287
|
+
Choose from 40+ models spanning every major architecture:
|
|
288
|
+
|
|
289
|
+
### Core Models
|
|
290
|
+
|
|
291
|
+
| Model | Type | Steps | VRAM | Commercial | License |
|
|
292
|
+
|-------|------|-------|------|------------|---------|
|
|
293
|
+
| `flux.1-schnell` | flux | 4 | 16GB+ | ✅ | Apache 2.0 |
|
|
294
|
+
| `flux.1-dev` | flux | 20 | 20GB+ | ❌ | Non-commercial |
|
|
295
|
+
| `stable-diffusion-3.5-medium` | sd3 | 28 | 8GB+ | ⚠️ | Stability AI |
|
|
296
|
+
| `stable-diffusion-3.5-large` | sd3 | 28 | 12GB+ | ⚠️ | Stability AI |
|
|
297
|
+
| `stable-diffusion-3.5-large-turbo` | sd3 | 4 | 12GB+ | ⚠️ | Stability AI |
|
|
298
|
+
| `stable-diffusion-xl-base` | sdxl | 50 | 6GB+ | ⚠️ | CreativeML |
|
|
299
|
+
| `stable-diffusion-1.5` | sd15 | 50 | 4GB+ | ⚠️ | CreativeML |
|
|
300
|
+
|
|
301
|
+
### Next-Generation Models
|
|
302
|
+
|
|
303
|
+
| Model | Origin | Params | Steps | VRAM | Commercial | License |
|
|
304
|
+
|-------|--------|--------|-------|------|------------|---------|
|
|
305
|
+
| `flux.2-dev` | Black Forest Labs | 32B | 28 | 14GB+ | ❌ | Non-commercial |
|
|
306
|
+
| `flux.2-klein-4b` | Black Forest Labs | 4B | 28 | 10GB+ | ✅ | Apache 2.0 |
|
|
307
|
+
| `z-image-turbo` | Alibaba (Tongyi) | 6B | 8 | 10GB+ | ✅ | Apache 2.0 |
|
|
308
|
+
| `sana-1.5` | NVIDIA | 1.6B | 20 | 8GB+ | ✅ | Apache 2.0 |
|
|
309
|
+
| `cogview4` | Zhipu AI | 6B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
310
|
+
| `kolors` | Kuaishou | 8.6B | 50 | 8GB+ | ✅ | Kolors License |
|
|
311
|
+
| `hunyuan-dit` | Tencent | 1.5B | 50 | 6GB+ | ✅ | Tencent Community |
|
|
312
|
+
| `lumina-2` | Alpha-VLLM | 2B | 30 | 8GB+ | ✅ | Apache 2.0 |
|
|
313
|
+
| `pixart-sigma` | PixArt | 0.6B | 20 | 6GB+ | ✅ | Open |
|
|
314
|
+
| `auraflow` | Fal | 6.8B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
315
|
+
| `omnigen` | BAAI | 3.8B | 50 | 12GB+ | ✅ | MIT |
|
|
316
|
+
|
|
317
|
+
### Fast / Turbo Models
|
|
318
|
+
|
|
319
|
+
| Model | Steps | VRAM | Notes |
|
|
320
|
+
|-------|-------|------|-------|
|
|
321
|
+
| `sdxl-turbo` | 1 | 6GB+ | Single-step distilled SDXL |
|
|
322
|
+
| `sdxl-lightning-4step` | 4 | 6GB+ | ByteDance, custom scheduler |
|
|
323
|
+
| `stable-diffusion-3.5-large-turbo` | 4 | 12GB+ | Distilled SD 3.5 Large |
|
|
324
|
+
| `z-image-turbo` | 8 | 10GB+ | Alibaba 6B turbo |
|
|
325
|
+
|
|
326
|
+
### Community Fine-Tunes
|
|
327
|
+
|
|
328
|
+
| Model | Base | Notes |
|
|
329
|
+
|-------|------|-------|
|
|
330
|
+
| `realvisxl-v4` | SDXL | Photorealistic, very popular |
|
|
331
|
+
| `dreamshaper` | SD 1.5 | Versatile artistic model |
|
|
332
|
+
| `realistic-vision-v6` | SD 1.5 | Portrait specialist |
|
|
333
|
+
|
|
334
|
+
### FLUX Pipeline Variants
|
|
335
|
+
|
|
336
|
+
| Model | Pipeline | Use Case |
|
|
337
|
+
|-------|----------|----------|
|
|
338
|
+
| `flux.1-fill-dev` | FluxFillPipeline | Inpainting / outpainting |
|
|
339
|
+
| `flux.1-canny-dev` | FluxControlPipeline | Canny edge control |
|
|
340
|
+
| `flux.1-depth-dev` | FluxControlPipeline | Depth map control |
|
|
259
341
|
|
|
260
342
|
### 💾 GGUF Models - Reduced Memory Requirements
|
|
261
343
|
|
|
262
|
-
|
|
344
|
+
GGUF quantized models enable running FLUX.1-dev on budget hardware:
|
|
263
345
|
|
|
264
346
|
| GGUF Variant | VRAM | Quality | Best For |
|
|
265
347
|
|--------------|------|---------|----------|
|
|
@@ -270,11 +352,6 @@ Choose from a variety of state-of-the-art image generation models:
|
|
|
270
352
|
|
|
271
353
|
📖 **[Complete GGUF Guide](GGUF_GUIDE.md)** - Hardware recommendations, installation, and optimization tips
|
|
272
354
|
|
|
273
|
-
### Why Choose FLUX.1-schnell?
|
|
274
|
-
- **Apache 2.0 license** - Perfect for commercial use
|
|
275
|
-
- **4-step generation** - Lightning fast results
|
|
276
|
-
- **Commercial OK** - Use in your business
|
|
277
|
-
|
|
278
355
|
---
|
|
279
356
|
|
|
280
357
|
## 🎛️ ControlNet Features
|
|
@@ -335,6 +412,16 @@ ollamadiffuser lora unload
|
|
|
335
412
|
ollamadiffuser pull stable-diffusion-1.5
|
|
336
413
|
ollamadiffuser run stable-diffusion-1.5
|
|
337
414
|
|
|
415
|
+
# Model registry management
|
|
416
|
+
ollamadiffuser registry list
|
|
417
|
+
ollamadiffuser registry list --installed-only
|
|
418
|
+
ollamadiffuser registry check-gguf
|
|
419
|
+
|
|
420
|
+
# Configuration management
|
|
421
|
+
ollamadiffuser config # show all config
|
|
422
|
+
ollamadiffuser config set models_dir /mnt/ssd/models # custom model path
|
|
423
|
+
ollamadiffuser config set server.port 9000 # change server port
|
|
424
|
+
|
|
338
425
|
# In another terminal, generate images via API
|
|
339
426
|
curl -X POST http://localhost:8000/api/generate \
|
|
340
427
|
-H "Content-Type: application/json" \
|
|
@@ -366,18 +453,75 @@ Features:
|
|
|
366
453
|
```bash
|
|
367
454
|
# Start API server
|
|
368
455
|
ollamadiffuser --mode api
|
|
369
|
-
|
|
370
456
|
ollamadiffuser load stable-diffusion-1.5
|
|
371
457
|
|
|
372
|
-
#
|
|
458
|
+
# Text-to-image
|
|
373
459
|
curl -X POST http://localhost:8000/api/generate \
|
|
374
460
|
-H "Content-Type: application/json" \
|
|
375
|
-
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024}'
|
|
461
|
+
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024, "seed": 42}'
|
|
462
|
+
|
|
463
|
+
# Image-to-image
|
|
464
|
+
curl -X POST http://localhost:8000/api/generate/img2img \
|
|
465
|
+
-F "prompt=oil painting style" \
|
|
466
|
+
-F "strength=0.75" \
|
|
467
|
+
-F "image=@input.png" \
|
|
468
|
+
--output result.png
|
|
469
|
+
|
|
470
|
+
# Inpainting
|
|
471
|
+
curl -X POST http://localhost:8000/api/generate/inpaint \
|
|
472
|
+
-F "prompt=a red car" \
|
|
473
|
+
-F "image=@photo.png" \
|
|
474
|
+
-F "mask=@mask.png" \
|
|
475
|
+
--output inpainted.png
|
|
476
|
+
|
|
477
|
+
# API docs: http://localhost:8000/docs
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
### MCP Server (AI Assistant Integration)
|
|
481
|
+
|
|
482
|
+
OllamaDiffuser includes a [Model Context Protocol](https://modelcontextprotocol.io/) server for integration with AI assistants like OpenClaw, Claude Code, and Cursor.
|
|
376
483
|
|
|
377
|
-
|
|
378
|
-
|
|
484
|
+
```bash
|
|
485
|
+
# Install MCP support
|
|
486
|
+
pip install "ollamadiffuser[mcp]"
|
|
487
|
+
|
|
488
|
+
# Start MCP server (stdio transport)
|
|
489
|
+
ollamadiffuser mcp
|
|
379
490
|
```
|
|
380
491
|
|
|
492
|
+
**MCP client configuration** (e.g. `claude_desktop_config.json`):
|
|
493
|
+
```json
|
|
494
|
+
{
|
|
495
|
+
"mcpServers": {
|
|
496
|
+
"ollamadiffuser": {
|
|
497
|
+
"command": "ollamadiffuser-mcp"
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
**Available MCP tools:**
|
|
504
|
+
- `generate_image` -- Generate images from text prompts (auto-loads model)
|
|
505
|
+
- `list_models` -- List available and installed models
|
|
506
|
+
- `load_model` -- Load a model into memory
|
|
507
|
+
- `get_status` -- Check device, loaded model, and system status
|
|
508
|
+
|
|
509
|
+
### OpenClaw AgentSkill
|
|
510
|
+
|
|
511
|
+
An [OpenClaw](https://github.com/openclaw/openclaw) skill is included at `integrations/openclaw/SKILL.md`. It uses the REST API with `response_format=b64_json` for agent-friendly base64 image responses. Copy the skill directory to your OpenClaw skills folder or publish to ClawHub.
|
|
512
|
+
|
|
513
|
+
### Base64 JSON API Response
|
|
514
|
+
|
|
515
|
+
For AI agents and messaging platforms, use `response_format=b64_json` to get images as JSON:
|
|
516
|
+
|
|
517
|
+
```bash
|
|
518
|
+
curl -X POST http://localhost:8000/api/generate \
|
|
519
|
+
-H "Content-Type: application/json" \
|
|
520
|
+
-d '{"prompt": "a sunset over mountains", "response_format": "b64_json"}'
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
Response: `{"image": "<base64 PNG>", "format": "png", "width": 1024, "height": 1024}`
|
|
524
|
+
|
|
381
525
|
### Python API
|
|
382
526
|
```python
|
|
383
527
|
from ollamadiffuser.core.models.manager import model_manager
|
|
@@ -386,30 +530,59 @@ from ollamadiffuser.core.models.manager import model_manager
|
|
|
386
530
|
success = model_manager.load_model("stable-diffusion-1.5")
|
|
387
531
|
if success:
|
|
388
532
|
engine = model_manager.loaded_model
|
|
389
|
-
|
|
390
|
-
#
|
|
533
|
+
|
|
534
|
+
# Text-to-image (seed is optional; omit for random)
|
|
391
535
|
image = engine.generate_image(
|
|
392
536
|
prompt="a beautiful sunset",
|
|
393
537
|
width=1024,
|
|
394
|
-
height=1024
|
|
538
|
+
height=1024,
|
|
539
|
+
seed=42,
|
|
395
540
|
)
|
|
396
541
|
image.save("output.jpg")
|
|
542
|
+
|
|
543
|
+
# Image-to-image
|
|
544
|
+
from PIL import Image
|
|
545
|
+
input_img = Image.open("photo.jpg")
|
|
546
|
+
result = engine.generate_image(
|
|
547
|
+
prompt="watercolor painting",
|
|
548
|
+
image=input_img,
|
|
549
|
+
strength=0.7,
|
|
550
|
+
)
|
|
551
|
+
result.save("img2img_output.jpg")
|
|
397
552
|
else:
|
|
398
553
|
print("Failed to load model")
|
|
399
554
|
```
|
|
400
555
|
|
|
401
|
-
## 📦
|
|
556
|
+
## 📦 Model Ecosystem
|
|
402
557
|
|
|
403
558
|
### Base Models
|
|
404
|
-
- **Stable Diffusion 1.5**: Classic, reliable, fast
|
|
405
|
-
- **Stable Diffusion XL**: High-resolution, detailed
|
|
406
|
-
- **Stable Diffusion 3**:
|
|
407
|
-
- **FLUX.1**:
|
|
559
|
+
- **Stable Diffusion 1.5**: Classic, reliable, fast (img2img + inpainting)
|
|
560
|
+
- **Stable Diffusion XL**: High-resolution, detailed (img2img + inpainting, scheduler overrides)
|
|
561
|
+
- **Stable Diffusion 3.5**: Medium, Large, and Large Turbo variants
|
|
562
|
+
- **FLUX.1**: schnell, dev, Fill, Canny, Depth pipeline variants
|
|
563
|
+
- **HiDream**: Multi-prompt generation with bfloat16
|
|
564
|
+
- **AnimateDiff**: Video/animation generation
|
|
565
|
+
|
|
566
|
+
### Next-Generation Models
|
|
567
|
+
- **FLUX.2**: 32B dev and 4B Klein variants from Black Forest Labs
|
|
568
|
+
- **Chinese Models**: CogView4 (Zhipu), Kolors (Kuaishou), Hunyuan-DiT (Tencent), Z-Image (Alibaba)
|
|
569
|
+
- **Efficient Models**: SANA 1.5 (1.6B), PixArt-Sigma (0.6B) -- high quality at low VRAM
|
|
570
|
+
- **Open Models**: AuraFlow (6.8B, Apache 2.0), OmniGen (3.8B, MIT), Lumina 2.0 (2B, Apache 2.0)
|
|
571
|
+
|
|
572
|
+
### Fast / Turbo Models
|
|
573
|
+
- **SDXL Turbo**: Single-step inference from Stability AI
|
|
574
|
+
- **SDXL Lightning**: 4-step with custom scheduler from ByteDance
|
|
575
|
+
- **Z-Image Turbo**: 8-step turbo from Alibaba
|
|
576
|
+
|
|
577
|
+
### Community Fine-Tunes
|
|
578
|
+
- **RealVisXL V4**: Photorealistic SDXL, very popular
|
|
579
|
+
- **DreamShaper**: Versatile artistic SD 1.5 model
|
|
580
|
+
- **Realistic Vision V6**: Portrait specialist
|
|
408
581
|
|
|
409
582
|
### GGUF Quantized Models
|
|
410
583
|
- **FLUX.1-dev GGUF**: 7 quantization levels (3GB-16GB VRAM)
|
|
411
584
|
- **Memory Efficient**: Run high-quality models on budget hardware
|
|
412
|
-
- **
|
|
585
|
+
- **Optional Install**: `pip install "ollamadiffuser[gguf]"`
|
|
413
586
|
|
|
414
587
|
### ControlNet Models
|
|
415
588
|
- **SD 1.5 ControlNet**: 4 control types (canny, depth, openpose, scribble)
|
|
@@ -421,14 +594,32 @@ else:
|
|
|
421
594
|
- **Dynamic Loading**: Load/unload without model restart
|
|
422
595
|
- **Strength Control**: Adjustable influence (0.1-2.0)
|
|
423
596
|
|
|
424
|
-
## ⚙️
|
|
597
|
+
## ⚙️ Architecture
|
|
425
598
|
|
|
426
|
-
###
|
|
599
|
+
### Strategy Pattern Engine
|
|
600
|
+
Each model type has a dedicated strategy class handling loading and generation:
|
|
601
|
+
|
|
602
|
+
```
|
|
603
|
+
InferenceEngine (facade)
|
|
604
|
+
-> SD15Strategy (512x512, float32 on MPS, img2img, inpainting)
|
|
605
|
+
-> SDXLStrategy (1024x1024, img2img, inpainting, scheduler overrides)
|
|
606
|
+
-> FluxStrategy (schnell/dev/Fill/Canny/Depth, dynamic pipeline class)
|
|
607
|
+
-> SD3Strategy (1024x1024, 28 steps, guidance=3.5)
|
|
608
|
+
-> ControlNetStrategy (SD15 + SDXL base models)
|
|
609
|
+
-> VideoStrategy (AnimateDiff, 16 frames)
|
|
610
|
+
-> HiDreamStrategy (bfloat16, multi-prompt)
|
|
611
|
+
-> GGUFStrategy (quantized via stable-diffusion-cpp)
|
|
612
|
+
-> GenericPipelineStrategy (any diffusers pipeline via config)
|
|
613
|
+
```
|
|
614
|
+
|
|
615
|
+
The `GenericPipelineStrategy` dynamically loads any `diffusers` pipeline class specified in the model registry, so new models can be added with zero code changes.
|
|
616
|
+
|
|
617
|
+
### Configuration
|
|
427
618
|
Models are automatically configured with optimal settings:
|
|
428
619
|
- **Memory Optimization**: Attention slicing, CPU offloading
|
|
429
620
|
- **Device Detection**: Automatic CUDA/MPS/CPU selection
|
|
430
|
-
- **Precision Handling**: FP16/BF16
|
|
431
|
-
- **Safety
|
|
621
|
+
- **Precision Handling**: FP16/BF16 per model type
|
|
622
|
+
- **Safety Disabled**: Unified `SAFETY_DISABLED_KWARGS` (no monkey-patching)
|
|
432
623
|
|
|
433
624
|
## 🔧 Advanced Usage
|
|
434
625
|
|
|
@@ -503,7 +694,7 @@ with open("control.jpg", "rb") as f:
|
|
|
503
694
|
### Minimum Requirements
|
|
504
695
|
- **RAM**: 8GB system RAM
|
|
505
696
|
- **Storage**: 10GB free space
|
|
506
|
-
- **Python**: 3.
|
|
697
|
+
- **Python**: 3.10+
|
|
507
698
|
|
|
508
699
|
### Recommended Hardware
|
|
509
700
|
|
|
@@ -512,6 +703,12 @@ with open("control.jpg", "rb") as f:
|
|
|
512
703
|
- **RAM**: 16GB+ system RAM
|
|
513
704
|
- **Storage**: SSD with 50GB+ free space
|
|
514
705
|
|
|
706
|
+
#### For Apple Silicon (Mac Mini / MacBook)
|
|
707
|
+
- **16GB unified memory**: PixArt-Sigma, SANA 1.5, DreamShaper, SD 1.5/XL, GGUF q2k-q5ks
|
|
708
|
+
- **24GB+ unified memory**: CogView4, Kolors, Lumina 2.0, GGUF q6k-q8
|
|
709
|
+
- **GGUF with Metal**: Install with `CMAKE_ARGS="-DSD_METAL=ON"` for GPU acceleration
|
|
710
|
+
- Run `ollamadiffuser recommend` to see what fits your hardware
|
|
711
|
+
|
|
515
712
|
#### For GGUF Models (Memory Efficient)
|
|
516
713
|
- **GPU**: 3GB+ VRAM (or CPU only)
|
|
517
714
|
- **RAM**: 8GB+ system RAM (16GB+ for CPU inference)
|
|
@@ -519,7 +716,7 @@ with open("control.jpg", "rb") as f:
|
|
|
519
716
|
|
|
520
717
|
### Supported Platforms
|
|
521
718
|
- **CUDA**: NVIDIA GPUs (recommended)
|
|
522
|
-
- **MPS**: Apple Silicon (M1/M2/M3)
|
|
719
|
+
- **MPS**: Apple Silicon (M1/M2/M3/M4) -- native support for 30+ models including GGUF
|
|
523
720
|
- **CPU**: All platforms (slower but functional)
|
|
524
721
|
|
|
525
722
|
## 🔧 Troubleshooting
|
|
@@ -550,7 +747,7 @@ pip install 'ollamadiffuser[full]'
|
|
|
550
747
|
#### GGUF Support Issues
|
|
551
748
|
```bash
|
|
552
749
|
# Install GGUF dependencies
|
|
553
|
-
pip install
|
|
750
|
+
pip install "ollamadiffuser[gguf]"
|
|
554
751
|
|
|
555
752
|
# Check GGUF support
|
|
556
753
|
ollamadiffuser registry check-gguf
|
|
@@ -689,9 +886,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
689
886
|
## 🙏 Acknowledgments
|
|
690
887
|
|
|
691
888
|
- **Stability AI**: For Stable Diffusion models
|
|
692
|
-
- **Black Forest Labs**: For FLUX.1 models
|
|
889
|
+
- **Black Forest Labs**: For FLUX.1 and FLUX.2 models
|
|
890
|
+
- **Alibaba (Tongyi-MAI)**: For Z-Image Turbo
|
|
891
|
+
- **NVIDIA (Efficient-Large-Model)**: For SANA 1.5
|
|
892
|
+
- **Zhipu AI (THUDM)**: For CogView4
|
|
893
|
+
- **Kuaishou (Kwai-Kolors)**: For Kolors
|
|
894
|
+
- **Tencent (Hunyuan)**: For Hunyuan-DiT
|
|
895
|
+
- **Alpha-VLLM**: For Lumina 2.0
|
|
896
|
+
- **PixArt-alpha**: For PixArt-Sigma
|
|
897
|
+
- **Fal**: For AuraFlow
|
|
898
|
+
- **BAAI (Shitao)**: For OmniGen
|
|
899
|
+
- **ByteDance**: For SDXL Lightning
|
|
693
900
|
- **city96**: For FLUX.1-dev GGUF quantizations
|
|
694
901
|
- **Hugging Face**: For model hosting and diffusers library
|
|
902
|
+
- **Anthropic**: For Model Context Protocol (MCP)
|
|
903
|
+
- **OpenClaw**: For AI agent ecosystem integration
|
|
695
904
|
- **ControlNet Team**: For ControlNet architecture
|
|
696
905
|
- **Community**: For feedback and contributions
|
|
697
906
|
|