ollamadiffuser 1.2.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ollamadiffuser/__init__.py +1 -1
- ollamadiffuser/api/server.py +312 -312
- ollamadiffuser/cli/config_commands.py +119 -0
- ollamadiffuser/cli/lora_commands.py +169 -0
- ollamadiffuser/cli/main.py +85 -1233
- ollamadiffuser/cli/model_commands.py +664 -0
- ollamadiffuser/cli/recommend_command.py +205 -0
- ollamadiffuser/cli/registry_commands.py +197 -0
- ollamadiffuser/core/config/model_registry.py +562 -11
- ollamadiffuser/core/config/settings.py +24 -2
- ollamadiffuser/core/inference/__init__.py +5 -0
- ollamadiffuser/core/inference/base.py +182 -0
- ollamadiffuser/core/inference/engine.py +204 -1405
- ollamadiffuser/core/inference/strategies/__init__.py +1 -0
- ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
- ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
- ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
- ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
- ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
- ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
- ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
- ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
- ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
- ollamadiffuser/mcp/__init__.py +0 -0
- ollamadiffuser/mcp/server.py +184 -0
- ollamadiffuser/ui/templates/index.html +62 -1
- ollamadiffuser/ui/web.py +116 -54
- {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/METADATA +337 -108
- ollamadiffuser-2.0.0.dist-info/RECORD +61 -0
- {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/WHEEL +1 -1
- {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/entry_points.txt +1 -0
- ollamadiffuser/core/models/registry.py +0 -384
- ollamadiffuser/ui/samples/.DS_Store +0 -0
- ollamadiffuser-1.2.2.dist-info/RECORD +0 -45
- {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {ollamadiffuser-1.2.2.dist-info → ollamadiffuser-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ollamadiffuser
|
|
3
|
-
Version:
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX, and LoRA support
|
|
5
5
|
Home-page: https://github.com/ollamadiffuser/ollamadiffuser
|
|
6
6
|
Author: OllamaDiffuser Team
|
|
7
7
|
Author-email: OllamaDiffuser Team <ollamadiffuser@gmail.com>
|
|
@@ -14,7 +14,7 @@ Project-URL: Documentation, https://www.ollamadiffuser.com/
|
|
|
14
14
|
Project-URL: Bug Reports, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
15
15
|
Project-URL: Feature Requests, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
16
16
|
Project-URL: Source Code, https://github.com/ollamadiffuser/ollamadiffuser
|
|
17
|
-
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli
|
|
17
|
+
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli,img2img,inpainting,mcp,openclaw
|
|
18
18
|
Classifier: Development Status :: 4 - Beta
|
|
19
19
|
Classifier: Intended Audience :: Developers
|
|
20
20
|
Classifier: Intended Audience :: End Users/Desktop
|
|
@@ -33,40 +33,64 @@ Classifier: Environment :: Web Environment
|
|
|
33
33
|
Requires-Python: >=3.10
|
|
34
34
|
Description-Content-Type: text/markdown
|
|
35
35
|
License-File: LICENSE
|
|
36
|
-
Requires-Dist: torch>=2.
|
|
37
|
-
Requires-Dist: diffusers>=0.
|
|
38
|
-
Requires-Dist: transformers>=4.
|
|
39
|
-
Requires-Dist: accelerate>=0.
|
|
40
|
-
Requires-Dist: fastapi>=0.
|
|
36
|
+
Requires-Dist: torch>=2.4.0
|
|
37
|
+
Requires-Dist: diffusers>=0.34.0
|
|
38
|
+
Requires-Dist: transformers>=4.40.0
|
|
39
|
+
Requires-Dist: accelerate>=1.0.0
|
|
40
|
+
Requires-Dist: fastapi>=0.110.0
|
|
41
41
|
Requires-Dist: uvicorn>=0.23.0
|
|
42
|
-
Requires-Dist: huggingface-hub>=0.
|
|
43
|
-
Requires-Dist: Pillow>=
|
|
42
|
+
Requires-Dist: huggingface-hub>=0.25.0
|
|
43
|
+
Requires-Dist: Pillow>=10.0.0
|
|
44
44
|
Requires-Dist: click>=8.0.0
|
|
45
45
|
Requires-Dist: rich>=13.0.0
|
|
46
46
|
Requires-Dist: pydantic>=2.0.0
|
|
47
47
|
Requires-Dist: protobuf>=3.20.0
|
|
48
48
|
Requires-Dist: sentencepiece>=0.1.99
|
|
49
|
-
Requires-Dist: safetensors>=0.
|
|
49
|
+
Requires-Dist: safetensors>=0.4.0
|
|
50
50
|
Requires-Dist: python-multipart>=0.0.0
|
|
51
51
|
Requires-Dist: psutil>=5.9.0
|
|
52
52
|
Requires-Dist: jinja2>=3.0.0
|
|
53
|
-
Requires-Dist: peft>=0.
|
|
54
|
-
Requires-Dist: numpy>=1.
|
|
53
|
+
Requires-Dist: peft>=0.13.0
|
|
54
|
+
Requires-Dist: numpy>=1.26.0
|
|
55
55
|
Requires-Dist: controlnet-aux>=0.0.7
|
|
56
56
|
Requires-Dist: opencv-python>=4.8.0
|
|
57
|
-
Requires-Dist:
|
|
58
|
-
Requires-Dist:
|
|
57
|
+
Requires-Dist: requests>=2.28.0
|
|
58
|
+
Requires-Dist: PyYAML>=6.0
|
|
59
|
+
Provides-Extra: gguf
|
|
60
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "gguf"
|
|
61
|
+
Requires-Dist: gguf>=0.1.0; extra == "gguf"
|
|
62
|
+
Provides-Extra: full
|
|
63
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "full"
|
|
64
|
+
Requires-Dist: gguf>=0.1.0; extra == "full"
|
|
65
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "full"
|
|
66
|
+
Provides-Extra: mcp
|
|
67
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
|
|
68
|
+
Provides-Extra: openclaw
|
|
69
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "openclaw"
|
|
59
70
|
Provides-Extra: dev
|
|
60
71
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
61
72
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
73
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
74
|
+
Requires-Dist: httpx>=0.24.0; extra == "dev"
|
|
62
75
|
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
63
76
|
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
64
77
|
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
78
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
65
79
|
Dynamic: author
|
|
66
80
|
Dynamic: home-page
|
|
67
81
|
Dynamic: license-file
|
|
68
82
|
Dynamic: requires-python
|
|
69
83
|
|
|
84
|
+
### ⚠️ Project Status: Maintenance Mode
|
|
85
|
+
|
|
86
|
+
**Thank you for the incredible support and over 5,000 downloads!**
|
|
87
|
+
|
|
88
|
+
Please be aware that `ollamadiffuser` is currently in **maintenance mode**. Due to the creator's other professional commitments, active feature development has been paused.
|
|
89
|
+
|
|
90
|
+
The project in its current state is stable and will remain available for use. However, new features will not be added, and non-critical issues may not be addressed in the near future.
|
|
91
|
+
|
|
92
|
+
This project laid the foundation for a more ambitious vision: **[LocalKinAI](https://github.com/LocalKinAI)**. Thank you for being part of the journey.
|
|
93
|
+
|
|
70
94
|
# OllamaDiffuser 🎨
|
|
71
95
|
|
|
72
96
|
[](https://badge.fury.io/py/ollamadiffuser)
|
|
@@ -76,102 +100,93 @@ Dynamic: requires-python
|
|
|
76
100
|
|
|
77
101
|
## Local AI Image Generation with OllamaDiffuser
|
|
78
102
|
|
|
79
|
-
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX
|
|
103
|
+
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX**, **CogView4**, **Kolors**, **SANA**, **PixArt-Sigma**, and 40+ other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
|
|
80
104
|
|
|
81
105
|
🌐 **Website**: [ollamadiffuser.com](https://www.ollamadiffuser.com/) | 📦 **PyPI**: [pypi.org/project/ollamadiffuser](https://pypi.org/project/ollamadiffuser/)
|
|
82
106
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
## 🔑 Hugging Face Authentication
|
|
86
|
-
|
|
87
|
-
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
88
|
-
|
|
89
|
-
### 🟢 Models that DON'T require a token:
|
|
90
|
-
- **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
|
|
91
|
-
- **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
|
|
92
|
-
- **Most ControlNet models** - Generally public access ✅
|
|
107
|
+
> **Upgrading from v1.x?** v2.0 is a major rewrite requiring **Python 3.10+**. Run `pip install --upgrade "ollamadiffuser[full]"` and see the [Migration Guide](#-migration-guide) below.
|
|
93
108
|
|
|
94
|
-
|
|
95
|
-
- **FLUX.1-dev** - Requires HF token and license agreement ⚠️
|
|
96
|
-
- **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
|
|
97
|
-
- **Some premium LoRAs** - Gated models from Hugging Face ⚠️
|
|
109
|
+
---
|
|
98
110
|
|
|
99
|
-
|
|
111
|
+
## 🚀 Quick Start (v2.0)
|
|
100
112
|
|
|
101
|
-
**For
|
|
113
|
+
**For Mac/PC Users:**
|
|
102
114
|
```bash
|
|
103
|
-
|
|
104
|
-
ollamadiffuser
|
|
105
|
-
ollamadiffuser pull stable-diffusion-1.5
|
|
115
|
+
pip install "ollamadiffuser[full]"
|
|
116
|
+
ollamadiffuser recommend # Find which models fit your GPU
|
|
106
117
|
```
|
|
107
118
|
|
|
108
|
-
**For
|
|
119
|
+
**For OpenClaw/Agent Users:**
|
|
109
120
|
```bash
|
|
110
|
-
|
|
111
|
-
|
|
121
|
+
pip install "ollamadiffuser[mcp]"
|
|
122
|
+
ollamadiffuser mcp # Starts the MCP server
|
|
123
|
+
```
|
|
112
124
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
125
|
+
**For Low-VRAM / Budget GPU Users:**
|
|
126
|
+
```bash
|
|
127
|
+
pip install "ollamadiffuser[gguf]"
|
|
128
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks # Only 6GB VRAM needed
|
|
129
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
116
130
|
```
|
|
117
131
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
|
|
121
|
-
2. **Generate token**: Go to Settings → Access Tokens → Create new token
|
|
122
|
-
3. **Accept licenses**: Visit the model pages and accept license agreements:
|
|
123
|
-
- [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
|
|
124
|
-
- [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
|
|
125
|
-
4. **Set environment variable**:
|
|
126
|
-
```bash
|
|
127
|
-
# Temporary (current session)
|
|
128
|
-
export HF_TOKEN=your_token_here
|
|
129
|
-
|
|
130
|
-
# Permanent (add to ~/.bashrc or ~/.zshrc)
|
|
131
|
-
echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### 💡 Pro Tips:
|
|
135
|
-
- **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
|
|
136
|
-
- **Token scope**: Use "read" permissions for downloading models
|
|
137
|
-
- **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
|
|
138
|
-
- **Troubleshooting**: If downloads fail, verify your token and model access permissions
|
|
132
|
+
Most models work **without any token** -- just install and go. See [Hugging Face Authentication](#-hugging-face-authentication) when you want gated models like FLUX.1-dev or SD 3.5.
|
|
139
133
|
|
|
140
134
|
---
|
|
141
135
|
|
|
142
136
|
## ✨ Features
|
|
143
137
|
|
|
144
|
-
-
|
|
138
|
+
- **🏗️ Strategy Architecture**: Clean per-model strategy pattern (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF, Generic)
|
|
139
|
+
- **🌐 40+ Models**: FLUX.2, SD 3.5, SDXL Lightning, CogView4, Kolors, SANA, PixArt-Sigma, and more
|
|
140
|
+
- **🔌 Generic Pipeline**: Add new diffusers models via registry config alone -- no code changes needed
|
|
141
|
+
- **🖼️ img2img & Inpainting**: Image-to-image and inpainting support across SD1.5, SDXL, and the API/Web UI
|
|
142
|
+
- **⚡ Async API**: Non-blocking FastAPI server using `asyncio.to_thread` for GPU operations
|
|
143
|
+
- **🎲 Random Seeds**: Reproducible generation with explicit seeds, random by default
|
|
145
144
|
- **🎛️ ControlNet Support**: Precise image generation control with 10+ control types
|
|
146
145
|
- **🔄 LoRA Integration**: Dynamic LoRA loading and management
|
|
147
|
-
-
|
|
146
|
+
- **🔌 MCP & OpenClaw**: Model Context Protocol server for AI assistant integration (OpenClaw, Claude Code, Cursor)
|
|
147
|
+
- **🍎 Apple Silicon**: MPS dtype safety, GGUF Metal acceleration, `ollamadiffuser recommend` for hardware-aware model suggestions
|
|
148
|
+
- **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!) with CUDA and Metal acceleration
|
|
148
149
|
- **🌐 Multiple Interfaces**: CLI, Python API, Web UI, and REST API
|
|
149
150
|
- **📦 Model Management**: Easy installation and switching between models
|
|
150
151
|
- **⚡ Performance Optimized**: Memory-efficient with GPU acceleration
|
|
151
|
-
-
|
|
152
|
-
|
|
153
|
-
## 🚀 Quick Start
|
|
152
|
+
- **🧪 Test Suite**: 82 tests across settings, registry, engine, API, MPS, and MCP
|
|
154
153
|
|
|
155
154
|
### Option 1: Install from PyPI (Recommended)
|
|
156
155
|
```bash
|
|
157
156
|
# Install from PyPI
|
|
158
157
|
pip install ollamadiffuser
|
|
159
158
|
|
|
160
|
-
# Pull and run a model
|
|
159
|
+
# Pull and run a model
|
|
161
160
|
ollamadiffuser pull flux.1-schnell
|
|
162
161
|
ollamadiffuser run flux.1-schnell
|
|
163
162
|
|
|
164
|
-
# Generate via API
|
|
163
|
+
# Generate via API (seed is optional for reproducibility)
|
|
165
164
|
curl -X POST http://localhost:8000/api/generate \
|
|
166
165
|
-H "Content-Type: application/json" \
|
|
167
|
-
-d '{"prompt": "A beautiful sunset"}' \
|
|
166
|
+
-d '{"prompt": "A beautiful sunset", "seed": 12345}' \
|
|
168
167
|
--output image.png
|
|
169
168
|
```
|
|
170
169
|
|
|
170
|
+
### 🔄 Update to Latest Version
|
|
171
|
+
|
|
172
|
+
**Always use the latest version** for the newest features and bug fixes:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
# Update to latest version
|
|
176
|
+
pip uninstall ollamadiffuser
|
|
177
|
+
pip install --no-cache-dir ollamadiffuser
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
This ensures you get:
|
|
181
|
+
- 🐛 **Latest bug fixes**
|
|
182
|
+
- ✨ **New features and improvements**
|
|
183
|
+
- 🚀 **Performance optimizations**
|
|
184
|
+
- 🔒 **Security updates**
|
|
185
|
+
|
|
171
186
|
### GGUF Quick Start (Low VRAM)
|
|
172
187
|
```bash
|
|
173
188
|
# For systems with limited VRAM (3GB+)
|
|
174
|
-
pip install ollamadiffuser
|
|
189
|
+
pip install "ollamadiffuser[gguf]"
|
|
175
190
|
|
|
176
191
|
# Download memory-efficient GGUF model
|
|
177
192
|
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
@@ -180,6 +195,22 @@ ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
|
180
195
|
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
181
196
|
```
|
|
182
197
|
|
|
198
|
+
### Apple Silicon Quick Start (Mac Mini / MacBook)
|
|
199
|
+
```bash
|
|
200
|
+
# See which models fit your Mac
|
|
201
|
+
ollamadiffuser recommend
|
|
202
|
+
|
|
203
|
+
# Best lightweight model (0.6B, <6GB)
|
|
204
|
+
ollamadiffuser pull pixart-sigma
|
|
205
|
+
ollamadiffuser run pixart-sigma
|
|
206
|
+
|
|
207
|
+
# GGUF with Metal acceleration (6GB, great quality)
|
|
208
|
+
pip install "ollamadiffuser[gguf]"
|
|
209
|
+
CMAKE_ARGS="-DSD_METAL=ON" pip install stable-diffusion-cpp-python
|
|
210
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
211
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
212
|
+
```
|
|
213
|
+
|
|
183
214
|
### Option 2: Development Installation
|
|
184
215
|
```bash
|
|
185
216
|
# Clone the repository
|
|
@@ -229,21 +260,92 @@ curl -X POST http://localhost:8000/api/generate/controlnet \
|
|
|
229
260
|
|
|
230
261
|
---
|
|
231
262
|
|
|
232
|
-
##
|
|
263
|
+
## 🔑 Hugging Face Authentication
|
|
264
|
+
|
|
265
|
+
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
233
266
|
|
|
234
|
-
|
|
267
|
+
**Models that DON'T require a token** -- ready to use right away:
|
|
268
|
+
- FLUX.1-schnell, Stable Diffusion 1.5, DreamShaper, PixArt-Sigma, SANA 1.5, most ControlNet models
|
|
235
269
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
270
|
+
**Models that DO require a token:**
|
|
271
|
+
- FLUX.1-dev, Stable Diffusion 3.5, some premium LoRAs
|
|
272
|
+
|
|
273
|
+
**Setup** (only needed for gated models):
|
|
274
|
+
```bash
|
|
275
|
+
# 1. Create account at https://huggingface.co and generate an access token
|
|
276
|
+
# 2. Accept license on the model page (e.g. FLUX.1-dev, SD 3.5)
|
|
277
|
+
# 3. Set your token
|
|
278
|
+
export HF_TOKEN=your_token_here
|
|
279
|
+
|
|
280
|
+
# 4. Now you can access gated models
|
|
281
|
+
ollamadiffuser pull flux.1-dev
|
|
282
|
+
ollamadiffuser pull stable-diffusion-3.5-medium
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
> **Tips:** Use "read" permissions for the token. Your token stays local -- never shared with OllamaDiffuser servers. Add `export HF_TOKEN=...` to `~/.bashrc` or `~/.zshrc` to make it permanent.
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## 🎯 Supported Models
|
|
290
|
+
|
|
291
|
+
Choose from 40+ models spanning every major architecture:
|
|
292
|
+
|
|
293
|
+
### Core Models
|
|
294
|
+
|
|
295
|
+
| Model | Type | Steps | VRAM | Commercial | License |
|
|
296
|
+
|-------|------|-------|------|------------|---------|
|
|
297
|
+
| `flux.1-schnell` | flux | 4 | 16GB+ | ✅ | Apache 2.0 |
|
|
298
|
+
| `flux.1-dev` | flux | 20 | 20GB+ | ❌ | Non-commercial |
|
|
299
|
+
| `stable-diffusion-3.5-medium` | sd3 | 28 | 8GB+ | ⚠️ | Stability AI |
|
|
300
|
+
| `stable-diffusion-3.5-large` | sd3 | 28 | 12GB+ | ⚠️ | Stability AI |
|
|
301
|
+
| `stable-diffusion-3.5-large-turbo` | sd3 | 4 | 12GB+ | ⚠️ | Stability AI |
|
|
302
|
+
| `stable-diffusion-xl-base` | sdxl | 50 | 6GB+ | ⚠️ | CreativeML |
|
|
303
|
+
| `stable-diffusion-1.5` | sd15 | 50 | 4GB+ | ⚠️ | CreativeML |
|
|
304
|
+
|
|
305
|
+
### Next-Generation Models
|
|
306
|
+
|
|
307
|
+
| Model | Origin | Params | Steps | VRAM | Commercial | License |
|
|
308
|
+
|-------|--------|--------|-------|------|------------|---------|
|
|
309
|
+
| `flux.2-dev` | Black Forest Labs | 32B | 28 | 14GB+ | ❌ | Non-commercial |
|
|
310
|
+
| `flux.2-klein-4b` | Black Forest Labs | 4B | 28 | 10GB+ | ✅ | Apache 2.0 |
|
|
311
|
+
| `z-image-turbo` | Alibaba (Tongyi) | 6B | 8 | 10GB+ | ✅ | Apache 2.0 |
|
|
312
|
+
| `sana-1.5` | NVIDIA | 1.6B | 20 | 8GB+ | ✅ | Apache 2.0 |
|
|
313
|
+
| `cogview4` | Zhipu AI | 6B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
314
|
+
| `kolors` | Kuaishou | 8.6B | 50 | 8GB+ | ✅ | Kolors License |
|
|
315
|
+
| `hunyuan-dit` | Tencent | 1.5B | 50 | 6GB+ | ✅ | Tencent Community |
|
|
316
|
+
| `lumina-2` | Alpha-VLLM | 2B | 30 | 8GB+ | ✅ | Apache 2.0 |
|
|
317
|
+
| `pixart-sigma` | PixArt | 0.6B | 20 | 6GB+ | ✅ | Open |
|
|
318
|
+
| `auraflow` | Fal | 6.8B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
319
|
+
| `omnigen` | BAAI | 3.8B | 50 | 12GB+ | ✅ | MIT |
|
|
320
|
+
|
|
321
|
+
### Fast / Turbo Models
|
|
322
|
+
|
|
323
|
+
| Model | Steps | VRAM | Notes |
|
|
324
|
+
|-------|-------|------|-------|
|
|
325
|
+
| `sdxl-turbo` | 1 | 6GB+ | Single-step distilled SDXL |
|
|
326
|
+
| `sdxl-lightning-4step` | 4 | 6GB+ | ByteDance, custom scheduler |
|
|
327
|
+
| `stable-diffusion-3.5-large-turbo` | 4 | 12GB+ | Distilled SD 3.5 Large |
|
|
328
|
+
| `z-image-turbo` | 8 | 10GB+ | Alibaba 6B turbo |
|
|
329
|
+
|
|
330
|
+
### Community Fine-Tunes
|
|
331
|
+
|
|
332
|
+
| Model | Base | Notes |
|
|
333
|
+
|-------|------|-------|
|
|
334
|
+
| `realvisxl-v4` | SDXL | Photorealistic, very popular |
|
|
335
|
+
| `dreamshaper` | SD 1.5 | Versatile artistic model |
|
|
336
|
+
| `realistic-vision-v6` | SD 1.5 | Portrait specialist |
|
|
337
|
+
|
|
338
|
+
### FLUX Pipeline Variants
|
|
339
|
+
|
|
340
|
+
| Model | Pipeline | Use Case |
|
|
341
|
+
|-------|----------|----------|
|
|
342
|
+
| `flux.1-fill-dev` | FluxFillPipeline | Inpainting / outpainting |
|
|
343
|
+
| `flux.1-canny-dev` | FluxControlPipeline | Canny edge control |
|
|
344
|
+
| `flux.1-depth-dev` | FluxControlPipeline | Depth map control |
|
|
243
345
|
|
|
244
346
|
### 💾 GGUF Models - Reduced Memory Requirements
|
|
245
347
|
|
|
246
|
-
|
|
348
|
+
GGUF quantized models enable running FLUX.1-dev on budget hardware:
|
|
247
349
|
|
|
248
350
|
| GGUF Variant | VRAM | Quality | Best For |
|
|
249
351
|
|--------------|------|---------|----------|
|
|
@@ -254,11 +356,6 @@ Choose from a variety of state-of-the-art image generation models:
|
|
|
254
356
|
|
|
255
357
|
📖 **[Complete GGUF Guide](GGUF_GUIDE.md)** - Hardware recommendations, installation, and optimization tips
|
|
256
358
|
|
|
257
|
-
### Why Choose FLUX.1-schnell?
|
|
258
|
-
- **Apache 2.0 license** - Perfect for commercial use
|
|
259
|
-
- **4-step generation** - Lightning fast results
|
|
260
|
-
- **Commercial OK** - Use in your business
|
|
261
|
-
|
|
262
359
|
---
|
|
263
360
|
|
|
264
361
|
## 🎛️ ControlNet Features
|
|
@@ -319,6 +416,16 @@ ollamadiffuser lora unload
|
|
|
319
416
|
ollamadiffuser pull stable-diffusion-1.5
|
|
320
417
|
ollamadiffuser run stable-diffusion-1.5
|
|
321
418
|
|
|
419
|
+
# Model registry management
|
|
420
|
+
ollamadiffuser registry list
|
|
421
|
+
ollamadiffuser registry list --installed-only
|
|
422
|
+
ollamadiffuser registry check-gguf
|
|
423
|
+
|
|
424
|
+
# Configuration management
|
|
425
|
+
ollamadiffuser config # show all config
|
|
426
|
+
ollamadiffuser config set models_dir /mnt/ssd/models # custom model path
|
|
427
|
+
ollamadiffuser config set server.port 9000 # change server port
|
|
428
|
+
|
|
322
429
|
# In another terminal, generate images via API
|
|
323
430
|
curl -X POST http://localhost:8000/api/generate \
|
|
324
431
|
-H "Content-Type: application/json" \
|
|
@@ -350,18 +457,75 @@ Features:
|
|
|
350
457
|
```bash
|
|
351
458
|
# Start API server
|
|
352
459
|
ollamadiffuser --mode api
|
|
353
|
-
|
|
354
460
|
ollamadiffuser load stable-diffusion-1.5
|
|
355
461
|
|
|
356
|
-
#
|
|
462
|
+
# Text-to-image
|
|
357
463
|
curl -X POST http://localhost:8000/api/generate \
|
|
358
464
|
-H "Content-Type: application/json" \
|
|
359
|
-
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024}'
|
|
465
|
+
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024, "seed": 42}'
|
|
466
|
+
|
|
467
|
+
# Image-to-image
|
|
468
|
+
curl -X POST http://localhost:8000/api/generate/img2img \
|
|
469
|
+
-F "prompt=oil painting style" \
|
|
470
|
+
-F "strength=0.75" \
|
|
471
|
+
-F "image=@input.png" \
|
|
472
|
+
--output result.png
|
|
473
|
+
|
|
474
|
+
# Inpainting
|
|
475
|
+
curl -X POST http://localhost:8000/api/generate/inpaint \
|
|
476
|
+
-F "prompt=a red car" \
|
|
477
|
+
-F "image=@photo.png" \
|
|
478
|
+
-F "mask=@mask.png" \
|
|
479
|
+
--output inpainted.png
|
|
480
|
+
|
|
481
|
+
# API docs: http://localhost:8000/docs
|
|
482
|
+
```
|
|
360
483
|
|
|
361
|
-
|
|
362
|
-
|
|
484
|
+
### MCP Server (AI Assistant Integration)
|
|
485
|
+
|
|
486
|
+
OllamaDiffuser includes a [Model Context Protocol](https://modelcontextprotocol.io/) server for integration with AI assistants like OpenClaw, Claude Code, and Cursor.
|
|
487
|
+
|
|
488
|
+
```bash
|
|
489
|
+
# Install MCP support
|
|
490
|
+
pip install "ollamadiffuser[mcp]"
|
|
491
|
+
|
|
492
|
+
# Start MCP server (stdio transport)
|
|
493
|
+
ollamadiffuser mcp
|
|
363
494
|
```
|
|
364
495
|
|
|
496
|
+
**MCP client configuration** (e.g. `claude_desktop_config.json`):
|
|
497
|
+
```json
|
|
498
|
+
{
|
|
499
|
+
"mcpServers": {
|
|
500
|
+
"ollamadiffuser": {
|
|
501
|
+
"command": "ollamadiffuser-mcp"
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
```
|
|
506
|
+
|
|
507
|
+
**Available MCP tools:**
|
|
508
|
+
- `generate_image` -- Generate images from text prompts (auto-loads model)
|
|
509
|
+
- `list_models` -- List available and installed models
|
|
510
|
+
- `load_model` -- Load a model into memory
|
|
511
|
+
- `get_status` -- Check device, loaded model, and system status
|
|
512
|
+
|
|
513
|
+
### OpenClaw AgentSkill
|
|
514
|
+
|
|
515
|
+
An [OpenClaw](https://github.com/openclaw/openclaw) skill is included at `integrations/openclaw/SKILL.md`. It uses the REST API with `response_format=b64_json` for agent-friendly base64 image responses. Copy the skill directory to your OpenClaw skills folder or publish to ClawHub.
|
|
516
|
+
|
|
517
|
+
### Base64 JSON API Response
|
|
518
|
+
|
|
519
|
+
For AI agents and messaging platforms, use `response_format=b64_json` to get images as JSON:
|
|
520
|
+
|
|
521
|
+
```bash
|
|
522
|
+
curl -X POST http://localhost:8000/api/generate \
|
|
523
|
+
-H "Content-Type: application/json" \
|
|
524
|
+
-d '{"prompt": "a sunset over mountains", "response_format": "b64_json"}'
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
Response: `{"image": "<base64 PNG>", "format": "png", "width": 1024, "height": 1024}`
|
|
528
|
+
|
|
365
529
|
### Python API
|
|
366
530
|
```python
|
|
367
531
|
from ollamadiffuser.core.models.manager import model_manager
|
|
@@ -370,30 +534,59 @@ from ollamadiffuser.core.models.manager import model_manager
|
|
|
370
534
|
success = model_manager.load_model("stable-diffusion-1.5")
|
|
371
535
|
if success:
|
|
372
536
|
engine = model_manager.loaded_model
|
|
373
|
-
|
|
374
|
-
#
|
|
537
|
+
|
|
538
|
+
# Text-to-image (seed is optional; omit for random)
|
|
375
539
|
image = engine.generate_image(
|
|
376
540
|
prompt="a beautiful sunset",
|
|
377
541
|
width=1024,
|
|
378
|
-
height=1024
|
|
542
|
+
height=1024,
|
|
543
|
+
seed=42,
|
|
379
544
|
)
|
|
380
545
|
image.save("output.jpg")
|
|
546
|
+
|
|
547
|
+
# Image-to-image
|
|
548
|
+
from PIL import Image
|
|
549
|
+
input_img = Image.open("photo.jpg")
|
|
550
|
+
result = engine.generate_image(
|
|
551
|
+
prompt="watercolor painting",
|
|
552
|
+
image=input_img,
|
|
553
|
+
strength=0.7,
|
|
554
|
+
)
|
|
555
|
+
result.save("img2img_output.jpg")
|
|
381
556
|
else:
|
|
382
557
|
print("Failed to load model")
|
|
383
558
|
```
|
|
384
559
|
|
|
385
|
-
## 📦
|
|
560
|
+
## 📦 Model Ecosystem
|
|
386
561
|
|
|
387
562
|
### Base Models
|
|
388
|
-
- **Stable Diffusion 1.5**: Classic, reliable, fast
|
|
389
|
-
- **Stable Diffusion XL**: High-resolution, detailed
|
|
390
|
-
- **Stable Diffusion 3**:
|
|
391
|
-
- **FLUX.1**:
|
|
563
|
+
- **Stable Diffusion 1.5**: Classic, reliable, fast (img2img + inpainting)
|
|
564
|
+
- **Stable Diffusion XL**: High-resolution, detailed (img2img + inpainting, scheduler overrides)
|
|
565
|
+
- **Stable Diffusion 3.5**: Medium, Large, and Large Turbo variants
|
|
566
|
+
- **FLUX.1**: schnell, dev, Fill, Canny, Depth pipeline variants
|
|
567
|
+
- **HiDream**: Multi-prompt generation with bfloat16
|
|
568
|
+
- **AnimateDiff**: Video/animation generation
|
|
569
|
+
|
|
570
|
+
### Next-Generation Models
|
|
571
|
+
- **FLUX.2**: 32B dev and 4B Klein variants from Black Forest Labs
|
|
572
|
+
- **Chinese Models**: CogView4 (Zhipu), Kolors (Kuaishou), Hunyuan-DiT (Tencent), Z-Image (Alibaba)
|
|
573
|
+
- **Efficient Models**: SANA 1.5 (1.6B), PixArt-Sigma (0.6B) -- high quality at low VRAM
|
|
574
|
+
- **Open Models**: AuraFlow (6.8B, Apache 2.0), OmniGen (3.8B, MIT), Lumina 2.0 (2B, Apache 2.0)
|
|
575
|
+
|
|
576
|
+
### Fast / Turbo Models
|
|
577
|
+
- **SDXL Turbo**: Single-step inference from Stability AI
|
|
578
|
+
- **SDXL Lightning**: 4-step with custom scheduler from ByteDance
|
|
579
|
+
- **Z-Image Turbo**: 8-step turbo from Alibaba
|
|
580
|
+
|
|
581
|
+
### Community Fine-Tunes
|
|
582
|
+
- **RealVisXL V4**: Photorealistic SDXL, very popular
|
|
583
|
+
- **DreamShaper**: Versatile artistic SD 1.5 model
|
|
584
|
+
- **Realistic Vision V6**: Portrait specialist
|
|
392
585
|
|
|
393
586
|
### GGUF Quantized Models
|
|
394
587
|
- **FLUX.1-dev GGUF**: 7 quantization levels (3GB-16GB VRAM)
|
|
395
588
|
- **Memory Efficient**: Run high-quality models on budget hardware
|
|
396
|
-
- **
|
|
589
|
+
- **Optional Install**: `pip install "ollamadiffuser[gguf]"`
|
|
397
590
|
|
|
398
591
|
### ControlNet Models
|
|
399
592
|
- **SD 1.5 ControlNet**: 4 control types (canny, depth, openpose, scribble)
|
|
@@ -405,14 +598,32 @@ else:
|
|
|
405
598
|
- **Dynamic Loading**: Load/unload without model restart
|
|
406
599
|
- **Strength Control**: Adjustable influence (0.1-2.0)
|
|
407
600
|
|
|
408
|
-
## ⚙️
|
|
601
|
+
## ⚙️ Architecture
|
|
602
|
+
|
|
603
|
+
### Strategy Pattern Engine
|
|
604
|
+
Each model type has a dedicated strategy class handling loading and generation:
|
|
409
605
|
|
|
410
|
-
|
|
606
|
+
```
|
|
607
|
+
InferenceEngine (facade)
|
|
608
|
+
-> SD15Strategy (512x512, float32 on MPS, img2img, inpainting)
|
|
609
|
+
-> SDXLStrategy (1024x1024, img2img, inpainting, scheduler overrides)
|
|
610
|
+
-> FluxStrategy (schnell/dev/Fill/Canny/Depth, dynamic pipeline class)
|
|
611
|
+
-> SD3Strategy (1024x1024, 28 steps, guidance=3.5)
|
|
612
|
+
-> ControlNetStrategy (SD15 + SDXL base models)
|
|
613
|
+
-> VideoStrategy (AnimateDiff, 16 frames)
|
|
614
|
+
-> HiDreamStrategy (bfloat16, multi-prompt)
|
|
615
|
+
-> GGUFStrategy (quantized via stable-diffusion-cpp)
|
|
616
|
+
-> GenericPipelineStrategy (any diffusers pipeline via config)
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
The `GenericPipelineStrategy` dynamically loads any `diffusers` pipeline class specified in the model registry, so new models can be added with zero code changes.
|
|
620
|
+
|
|
621
|
+
### Configuration
|
|
411
622
|
Models are automatically configured with optimal settings:
|
|
412
623
|
- **Memory Optimization**: Attention slicing, CPU offloading
|
|
413
624
|
- **Device Detection**: Automatic CUDA/MPS/CPU selection
|
|
414
|
-
- **Precision Handling**: FP16/BF16
|
|
415
|
-
- **Safety
|
|
625
|
+
- **Precision Handling**: FP16/BF16 per model type
|
|
626
|
+
- **Safety Disabled**: Unified `SAFETY_DISABLED_KWARGS` (no monkey-patching)
|
|
416
627
|
|
|
417
628
|
## 🔧 Advanced Usage
|
|
418
629
|
|
|
@@ -487,7 +698,7 @@ with open("control.jpg", "rb") as f:
|
|
|
487
698
|
### Minimum Requirements
|
|
488
699
|
- **RAM**: 8GB system RAM
|
|
489
700
|
- **Storage**: 10GB free space
|
|
490
|
-
- **Python**: 3.
|
|
701
|
+
- **Python**: 3.10+
|
|
491
702
|
|
|
492
703
|
### Recommended Hardware
|
|
493
704
|
|
|
@@ -496,6 +707,12 @@ with open("control.jpg", "rb") as f:
|
|
|
496
707
|
- **RAM**: 16GB+ system RAM
|
|
497
708
|
- **Storage**: SSD with 50GB+ free space
|
|
498
709
|
|
|
710
|
+
#### For Apple Silicon (Mac Mini / MacBook)
|
|
711
|
+
- **16GB unified memory**: PixArt-Sigma, SANA 1.5, DreamShaper, SD 1.5/XL, GGUF q2k-q5ks
|
|
712
|
+
- **24GB+ unified memory**: CogView4, Kolors, Lumina 2.0, GGUF q6k-q8
|
|
713
|
+
- **GGUF with Metal**: Install with `CMAKE_ARGS="-DSD_METAL=ON"` for GPU acceleration
|
|
714
|
+
- Run `ollamadiffuser recommend` to see what fits your hardware
|
|
715
|
+
|
|
499
716
|
#### For GGUF Models (Memory Efficient)
|
|
500
717
|
- **GPU**: 3GB+ VRAM (or CPU only)
|
|
501
718
|
- **RAM**: 8GB+ system RAM (16GB+ for CPU inference)
|
|
@@ -503,7 +720,7 @@ with open("control.jpg", "rb") as f:
|
|
|
503
720
|
|
|
504
721
|
### Supported Platforms
|
|
505
722
|
- **CUDA**: NVIDIA GPUs (recommended)
|
|
506
|
-
- **MPS**: Apple Silicon (M1/M2/M3)
|
|
723
|
+
- **MPS**: Apple Silicon (M1/M2/M3/M4) -- native support for 30+ models including GGUF
|
|
507
724
|
- **CPU**: All platforms (slower but functional)
|
|
508
725
|
|
|
509
726
|
## 🔧 Troubleshooting
|
|
@@ -534,7 +751,7 @@ pip install 'ollamadiffuser[full]'
|
|
|
534
751
|
#### GGUF Support Issues
|
|
535
752
|
```bash
|
|
536
753
|
# Install GGUF dependencies
|
|
537
|
-
pip install
|
|
754
|
+
pip install "ollamadiffuser[gguf]"
|
|
538
755
|
|
|
539
756
|
# Check GGUF support
|
|
540
757
|
ollamadiffuser registry check-gguf
|
|
@@ -673,9 +890,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
673
890
|
## 🙏 Acknowledgments
|
|
674
891
|
|
|
675
892
|
- **Stability AI**: For Stable Diffusion models
|
|
676
|
-
- **Black Forest Labs**: For FLUX.1 models
|
|
893
|
+
- **Black Forest Labs**: For FLUX.1 and FLUX.2 models
|
|
894
|
+
- **Alibaba (Tongyi-MAI)**: For Z-Image Turbo
|
|
895
|
+
- **NVIDIA (Efficient-Large-Model)**: For SANA 1.5
|
|
896
|
+
- **Zhipu AI (THUDM)**: For CogView4
|
|
897
|
+
- **Kuaishou (Kwai-Kolors)**: For Kolors
|
|
898
|
+
- **Tencent (Hunyuan)**: For Hunyuan-DiT
|
|
899
|
+
- **Alpha-VLLM**: For Lumina 2.0
|
|
900
|
+
- **PixArt-alpha**: For PixArt-Sigma
|
|
901
|
+
- **Fal**: For AuraFlow
|
|
902
|
+
- **BAAI (Shitao)**: For OmniGen
|
|
903
|
+
- **ByteDance**: For SDXL Lightning
|
|
677
904
|
- **city96**: For FLUX.1-dev GGUF quantizations
|
|
678
905
|
- **Hugging Face**: For model hosting and diffusers library
|
|
906
|
+
- **Anthropic**: For Model Context Protocol (MCP)
|
|
907
|
+
- **OpenClaw**: For AI agent ecosystem integration
|
|
679
908
|
- **ControlNet Team**: For ControlNet architecture
|
|
680
909
|
- **Community**: For feedback and contributions
|
|
681
910
|
|