ollamadiffuser 1.2.3__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ollamadiffuser/__init__.py +1 -1
- ollamadiffuser/api/server.py +312 -312
- ollamadiffuser/cli/config_commands.py +119 -0
- ollamadiffuser/cli/lora_commands.py +169 -0
- ollamadiffuser/cli/main.py +85 -1233
- ollamadiffuser/cli/model_commands.py +664 -0
- ollamadiffuser/cli/recommend_command.py +205 -0
- ollamadiffuser/cli/registry_commands.py +197 -0
- ollamadiffuser/core/config/model_registry.py +562 -11
- ollamadiffuser/core/config/settings.py +24 -2
- ollamadiffuser/core/inference/__init__.py +5 -0
- ollamadiffuser/core/inference/base.py +182 -0
- ollamadiffuser/core/inference/engine.py +204 -1405
- ollamadiffuser/core/inference/strategies/__init__.py +1 -0
- ollamadiffuser/core/inference/strategies/controlnet_strategy.py +170 -0
- ollamadiffuser/core/inference/strategies/flux_strategy.py +136 -0
- ollamadiffuser/core/inference/strategies/generic_strategy.py +164 -0
- ollamadiffuser/core/inference/strategies/gguf_strategy.py +113 -0
- ollamadiffuser/core/inference/strategies/hidream_strategy.py +104 -0
- ollamadiffuser/core/inference/strategies/sd15_strategy.py +134 -0
- ollamadiffuser/core/inference/strategies/sd3_strategy.py +80 -0
- ollamadiffuser/core/inference/strategies/sdxl_strategy.py +131 -0
- ollamadiffuser/core/inference/strategies/video_strategy.py +108 -0
- ollamadiffuser/mcp/__init__.py +0 -0
- ollamadiffuser/mcp/server.py +184 -0
- ollamadiffuser/ui/templates/index.html +62 -1
- ollamadiffuser/ui/web.py +116 -54
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.0.dist-info}/METADATA +321 -108
- ollamadiffuser-2.0.0.dist-info/RECORD +61 -0
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.0.dist-info}/WHEEL +1 -1
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.0.dist-info}/entry_points.txt +1 -0
- ollamadiffuser/core/models/registry.py +0 -384
- ollamadiffuser/ui/samples/.DS_Store +0 -0
- ollamadiffuser-1.2.3.dist-info/RECORD +0 -45
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.0.dist-info}/licenses/LICENSE +0 -0
- {ollamadiffuser-1.2.3.dist-info → ollamadiffuser-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ollamadiffuser
|
|
3
|
-
Version:
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 2.0.0
|
|
4
|
+
Summary: Local AI Image Generation with Ollama-style CLI for Stable Diffusion, FLUX, and LoRA support
|
|
5
5
|
Home-page: https://github.com/ollamadiffuser/ollamadiffuser
|
|
6
6
|
Author: OllamaDiffuser Team
|
|
7
7
|
Author-email: OllamaDiffuser Team <ollamadiffuser@gmail.com>
|
|
@@ -14,7 +14,7 @@ Project-URL: Documentation, https://www.ollamadiffuser.com/
|
|
|
14
14
|
Project-URL: Bug Reports, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
15
15
|
Project-URL: Feature Requests, https://github.com/ollamadiffuser/ollamadiffuser/issues
|
|
16
16
|
Project-URL: Source Code, https://github.com/ollamadiffuser/ollamadiffuser
|
|
17
|
-
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli
|
|
17
|
+
Keywords: diffusion,image-generation,ai,machine-learning,lora,ollama,stable-diffusion,flux,local-ai,controlnet,web-ui,cli,img2img,inpainting,mcp,openclaw
|
|
18
18
|
Classifier: Development Status :: 4 - Beta
|
|
19
19
|
Classifier: Intended Audience :: Developers
|
|
20
20
|
Classifier: Intended Audience :: End Users/Desktop
|
|
@@ -33,40 +33,64 @@ Classifier: Environment :: Web Environment
|
|
|
33
33
|
Requires-Python: >=3.10
|
|
34
34
|
Description-Content-Type: text/markdown
|
|
35
35
|
License-File: LICENSE
|
|
36
|
-
Requires-Dist: torch>=2.
|
|
37
|
-
Requires-Dist: diffusers>=0.
|
|
38
|
-
Requires-Dist: transformers>=4.
|
|
39
|
-
Requires-Dist: accelerate>=0.
|
|
40
|
-
Requires-Dist: fastapi>=0.
|
|
36
|
+
Requires-Dist: torch>=2.4.0
|
|
37
|
+
Requires-Dist: diffusers>=0.34.0
|
|
38
|
+
Requires-Dist: transformers>=4.40.0
|
|
39
|
+
Requires-Dist: accelerate>=1.0.0
|
|
40
|
+
Requires-Dist: fastapi>=0.110.0
|
|
41
41
|
Requires-Dist: uvicorn>=0.23.0
|
|
42
|
-
Requires-Dist: huggingface-hub>=0.
|
|
43
|
-
Requires-Dist: Pillow>=
|
|
42
|
+
Requires-Dist: huggingface-hub>=0.25.0
|
|
43
|
+
Requires-Dist: Pillow>=10.0.0
|
|
44
44
|
Requires-Dist: click>=8.0.0
|
|
45
45
|
Requires-Dist: rich>=13.0.0
|
|
46
46
|
Requires-Dist: pydantic>=2.0.0
|
|
47
47
|
Requires-Dist: protobuf>=3.20.0
|
|
48
48
|
Requires-Dist: sentencepiece>=0.1.99
|
|
49
|
-
Requires-Dist: safetensors>=0.
|
|
49
|
+
Requires-Dist: safetensors>=0.4.0
|
|
50
50
|
Requires-Dist: python-multipart>=0.0.0
|
|
51
51
|
Requires-Dist: psutil>=5.9.0
|
|
52
52
|
Requires-Dist: jinja2>=3.0.0
|
|
53
|
-
Requires-Dist: peft>=0.
|
|
54
|
-
Requires-Dist: numpy>=1.
|
|
53
|
+
Requires-Dist: peft>=0.13.0
|
|
54
|
+
Requires-Dist: numpy>=1.26.0
|
|
55
55
|
Requires-Dist: controlnet-aux>=0.0.7
|
|
56
56
|
Requires-Dist: opencv-python>=4.8.0
|
|
57
|
-
Requires-Dist:
|
|
58
|
-
Requires-Dist:
|
|
57
|
+
Requires-Dist: requests>=2.28.0
|
|
58
|
+
Requires-Dist: PyYAML>=6.0
|
|
59
|
+
Provides-Extra: gguf
|
|
60
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "gguf"
|
|
61
|
+
Requires-Dist: gguf>=0.1.0; extra == "gguf"
|
|
62
|
+
Provides-Extra: full
|
|
63
|
+
Requires-Dist: stable-diffusion-cpp-python>=0.1.0; extra == "full"
|
|
64
|
+
Requires-Dist: gguf>=0.1.0; extra == "full"
|
|
65
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "full"
|
|
66
|
+
Provides-Extra: mcp
|
|
67
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "mcp"
|
|
68
|
+
Provides-Extra: openclaw
|
|
69
|
+
Requires-Dist: mcp[cli]>=1.0.0; extra == "openclaw"
|
|
59
70
|
Provides-Extra: dev
|
|
60
71
|
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
61
72
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
73
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
74
|
+
Requires-Dist: httpx>=0.24.0; extra == "dev"
|
|
62
75
|
Requires-Dist: black>=23.0.0; extra == "dev"
|
|
63
76
|
Requires-Dist: isort>=5.12.0; extra == "dev"
|
|
64
77
|
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
|
78
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
65
79
|
Dynamic: author
|
|
66
80
|
Dynamic: home-page
|
|
67
81
|
Dynamic: license-file
|
|
68
82
|
Dynamic: requires-python
|
|
69
83
|
|
|
84
|
+
### ⚠️ Project Status: Maintenance Mode
|
|
85
|
+
|
|
86
|
+
**Thank you for the incredible support and over 5,000 downloads!**
|
|
87
|
+
|
|
88
|
+
Please be aware that `ollamadiffuser` is currently in **maintenance mode**. Due to the creator's other professional commitments, active feature development has been paused.
|
|
89
|
+
|
|
90
|
+
The project in its current state is stable and will remain available for use. However, new features will not be added, and non-critical issues may not be addressed in the near future.
|
|
91
|
+
|
|
92
|
+
This project laid the foundation for a more ambitious vision: **[LocalKinAI](https://github.com/LocalKinAI)**. Thank you for being part of the journey.
|
|
93
|
+
|
|
70
94
|
# OllamaDiffuser 🎨
|
|
71
95
|
|
|
72
96
|
[](https://badge.fury.io/py/ollamadiffuser)
|
|
@@ -76,95 +100,70 @@ Dynamic: requires-python
|
|
|
76
100
|
|
|
77
101
|
## Local AI Image Generation with OllamaDiffuser
|
|
78
102
|
|
|
79
|
-
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX
|
|
103
|
+
**OllamaDiffuser** simplifies local deployment of **Stable Diffusion**, **FLUX**, **CogView4**, **Kolors**, **SANA**, **PixArt-Sigma**, and 40+ other AI image generation models. An intuitive **local SD** tool inspired by **Ollama's** simplicity - perfect for **local diffuser** workflows with CLI, web UI, and LoRA support.
|
|
80
104
|
|
|
81
105
|
🌐 **Website**: [ollamadiffuser.com](https://www.ollamadiffuser.com/) | 📦 **PyPI**: [pypi.org/project/ollamadiffuser](https://pypi.org/project/ollamadiffuser/)
|
|
82
106
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
## 🔑 Hugging Face Authentication
|
|
86
|
-
|
|
87
|
-
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
88
|
-
|
|
89
|
-
### 🟢 Models that DON'T require a token:
|
|
90
|
-
- **FLUX.1-schnell** - Apache 2.0 license, ready to use ✅
|
|
91
|
-
- **Stable Diffusion 1.5** - Basic model, no authentication needed ✅
|
|
92
|
-
- **Most ControlNet models** - Generally public access ✅
|
|
107
|
+
> **Upgrading from v1.x?** v2.0 is a major rewrite requiring **Python 3.10+**. Run `pip install --upgrade "ollamadiffuser[full]"` and see the [Migration Guide](#-migration-guide) below.
|
|
93
108
|
|
|
94
|
-
|
|
95
|
-
- **FLUX.1-dev** - Requires HF token and license agreement ⚠️
|
|
96
|
-
- **Stable Diffusion 3.5** - Requires HF token and license agreement ⚠️
|
|
97
|
-
- **Some premium LoRAs** - Gated models from Hugging Face ⚠️
|
|
109
|
+
---
|
|
98
110
|
|
|
99
|
-
|
|
111
|
+
## 🚀 Quick Start (v2.0)
|
|
100
112
|
|
|
101
|
-
**For
|
|
113
|
+
**For Mac/PC Users:**
|
|
102
114
|
```bash
|
|
103
|
-
|
|
104
|
-
ollamadiffuser
|
|
105
|
-
ollamadiffuser pull stable-diffusion-1.5
|
|
115
|
+
pip install "ollamadiffuser[full]"
|
|
116
|
+
ollamadiffuser recommend # Find which models fit your GPU
|
|
106
117
|
```
|
|
107
118
|
|
|
108
|
-
**For
|
|
119
|
+
**For OpenClaw/Agent Users:**
|
|
109
120
|
```bash
|
|
110
|
-
|
|
111
|
-
|
|
121
|
+
pip install "ollamadiffuser[mcp]"
|
|
122
|
+
ollamadiffuser mcp # Starts the MCP server
|
|
123
|
+
```
|
|
112
124
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
125
|
+
**For Low-VRAM / Budget GPU Users:**
|
|
126
|
+
```bash
|
|
127
|
+
pip install "ollamadiffuser[gguf]"
|
|
128
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks # Only 6GB VRAM needed
|
|
129
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
116
130
|
```
|
|
117
131
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
1. **Create account**: Visit [huggingface.co](https://huggingface.co) and sign up
|
|
121
|
-
2. **Generate token**: Go to Settings → Access Tokens → Create new token
|
|
122
|
-
3. **Accept licenses**: Visit the model pages and accept license agreements:
|
|
123
|
-
- [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev)
|
|
124
|
-
- [Stable Diffusion 3.5](https://huggingface.co/stabilityai/stable-diffusion-3.5-medium)
|
|
125
|
-
4. **Set environment variable**:
|
|
126
|
-
```bash
|
|
127
|
-
# Temporary (current session)
|
|
128
|
-
export HF_TOKEN=your_token_here
|
|
129
|
-
|
|
130
|
-
# Permanent (add to ~/.bashrc or ~/.zshrc)
|
|
131
|
-
echo 'export HF_TOKEN=your_token_here' >> ~/.bashrc
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### 💡 Pro Tips:
|
|
135
|
-
- **Start simple**: Begin with FLUX.1-schnell (no token required, commercial use OK)
|
|
136
|
-
- **Token scope**: Use "read" permissions for downloading models
|
|
137
|
-
- **Privacy**: Your token stays local - never shared with OllamaDiffuser servers
|
|
138
|
-
- **Troubleshooting**: If downloads fail, verify your token and model access permissions
|
|
132
|
+
Most models work **without any token** -- just install and go. See [Hugging Face Authentication](#-hugging-face-authentication) when you want gated models like FLUX.1-dev or SD 3.5.
|
|
139
133
|
|
|
140
134
|
---
|
|
141
135
|
|
|
142
136
|
## ✨ Features
|
|
143
137
|
|
|
144
|
-
-
|
|
138
|
+
- **🏗️ Strategy Architecture**: Clean per-model strategy pattern (SD1.5, SDXL, FLUX, SD3, ControlNet, Video, HiDream, GGUF, Generic)
|
|
139
|
+
- **🌐 40+ Models**: FLUX.2, SD 3.5, SDXL Lightning, CogView4, Kolors, SANA, PixArt-Sigma, and more
|
|
140
|
+
- **🔌 Generic Pipeline**: Add new diffusers models via registry config alone -- no code changes needed
|
|
141
|
+
- **🖼️ img2img & Inpainting**: Image-to-image and inpainting support across SD1.5, SDXL, and the API/Web UI
|
|
142
|
+
- **⚡ Async API**: Non-blocking FastAPI server using `asyncio.to_thread` for GPU operations
|
|
143
|
+
- **🎲 Random Seeds**: Reproducible generation with explicit seeds, random by default
|
|
145
144
|
- **🎛️ ControlNet Support**: Precise image generation control with 10+ control types
|
|
146
145
|
- **🔄 LoRA Integration**: Dynamic LoRA loading and management
|
|
147
|
-
-
|
|
146
|
+
- **🔌 MCP & OpenClaw**: Model Context Protocol server for AI assistant integration (OpenClaw, Claude Code, Cursor)
|
|
147
|
+
- **🍎 Apple Silicon**: MPS dtype safety, GGUF Metal acceleration, `ollamadiffuser recommend` for hardware-aware model suggestions
|
|
148
|
+
- **📦 GGUF Support**: Memory-efficient quantized models (3GB VRAM minimum!) with CUDA and Metal acceleration
|
|
148
149
|
- **🌐 Multiple Interfaces**: CLI, Python API, Web UI, and REST API
|
|
149
150
|
- **📦 Model Management**: Easy installation and switching between models
|
|
150
151
|
- **⚡ Performance Optimized**: Memory-efficient with GPU acceleration
|
|
151
|
-
-
|
|
152
|
-
|
|
153
|
-
## 🚀 Quick Start
|
|
152
|
+
- **🧪 Test Suite**: 82 tests across settings, registry, engine, API, MPS, and MCP
|
|
154
153
|
|
|
155
154
|
### Option 1: Install from PyPI (Recommended)
|
|
156
155
|
```bash
|
|
157
156
|
# Install from PyPI
|
|
158
157
|
pip install ollamadiffuser
|
|
159
158
|
|
|
160
|
-
# Pull and run a model
|
|
159
|
+
# Pull and run a model
|
|
161
160
|
ollamadiffuser pull flux.1-schnell
|
|
162
161
|
ollamadiffuser run flux.1-schnell
|
|
163
162
|
|
|
164
|
-
# Generate via API
|
|
163
|
+
# Generate via API (seed is optional for reproducibility)
|
|
165
164
|
curl -X POST http://localhost:8000/api/generate \
|
|
166
165
|
-H "Content-Type: application/json" \
|
|
167
|
-
-d '{"prompt": "A beautiful sunset"}' \
|
|
166
|
+
-d '{"prompt": "A beautiful sunset", "seed": 12345}' \
|
|
168
167
|
--output image.png
|
|
169
168
|
```
|
|
170
169
|
|
|
@@ -187,7 +186,7 @@ This ensures you get:
|
|
|
187
186
|
### GGUF Quick Start (Low VRAM)
|
|
188
187
|
```bash
|
|
189
188
|
# For systems with limited VRAM (3GB+)
|
|
190
|
-
pip install ollamadiffuser
|
|
189
|
+
pip install "ollamadiffuser[gguf]"
|
|
191
190
|
|
|
192
191
|
# Download memory-efficient GGUF model
|
|
193
192
|
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
@@ -196,6 +195,22 @@ ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
|
196
195
|
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
197
196
|
```
|
|
198
197
|
|
|
198
|
+
### Apple Silicon Quick Start (Mac Mini / MacBook)
|
|
199
|
+
```bash
|
|
200
|
+
# See which models fit your Mac
|
|
201
|
+
ollamadiffuser recommend
|
|
202
|
+
|
|
203
|
+
# Best lightweight model (0.6B, <6GB)
|
|
204
|
+
ollamadiffuser pull pixart-sigma
|
|
205
|
+
ollamadiffuser run pixart-sigma
|
|
206
|
+
|
|
207
|
+
# GGUF with Metal acceleration (6GB, great quality)
|
|
208
|
+
pip install "ollamadiffuser[gguf]"
|
|
209
|
+
CMAKE_ARGS="-DSD_METAL=ON" pip install stable-diffusion-cpp-python
|
|
210
|
+
ollamadiffuser pull flux.1-dev-gguf-q4ks
|
|
211
|
+
ollamadiffuser run flux.1-dev-gguf-q4ks
|
|
212
|
+
```
|
|
213
|
+
|
|
199
214
|
### Option 2: Development Installation
|
|
200
215
|
```bash
|
|
201
216
|
# Clone the repository
|
|
@@ -245,21 +260,92 @@ curl -X POST http://localhost:8000/api/generate/controlnet \
|
|
|
245
260
|
|
|
246
261
|
---
|
|
247
262
|
|
|
248
|
-
##
|
|
263
|
+
## 🔑 Hugging Face Authentication
|
|
264
|
+
|
|
265
|
+
**Do you need a Hugging Face token?** It depends on which models you want to use!
|
|
266
|
+
|
|
267
|
+
**Models that DON'T require a token** -- ready to use right away:
|
|
268
|
+
- FLUX.1-schnell, Stable Diffusion 1.5, DreamShaper, PixArt-Sigma, SANA 1.5, most ControlNet models
|
|
269
|
+
|
|
270
|
+
**Models that DO require a token:**
|
|
271
|
+
- FLUX.1-dev, Stable Diffusion 3.5, some premium LoRAs
|
|
272
|
+
|
|
273
|
+
**Setup** (only needed for gated models):
|
|
274
|
+
```bash
|
|
275
|
+
# 1. Create account at https://huggingface.co and generate an access token
|
|
276
|
+
# 2. Accept license on the model page (e.g. FLUX.1-dev, SD 3.5)
|
|
277
|
+
# 3. Set your token
|
|
278
|
+
export HF_TOKEN=your_token_here
|
|
249
279
|
|
|
250
|
-
|
|
280
|
+
# 4. Now you can access gated models
|
|
281
|
+
ollamadiffuser pull flux.1-dev
|
|
282
|
+
ollamadiffuser pull stable-diffusion-3.5-medium
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
> **Tips:** Use "read" permissions for the token. Your token stays local -- never shared with OllamaDiffuser servers. Add `export HF_TOKEN=...` to `~/.bashrc` or `~/.zshrc` to make it permanent.
|
|
286
|
+
|
|
287
|
+
---
|
|
288
|
+
|
|
289
|
+
## 🎯 Supported Models
|
|
251
290
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
|
257
|
-
|
|
258
|
-
|
|
|
291
|
+
Choose from 40+ models spanning every major architecture:
|
|
292
|
+
|
|
293
|
+
### Core Models
|
|
294
|
+
|
|
295
|
+
| Model | Type | Steps | VRAM | Commercial | License |
|
|
296
|
+
|-------|------|-------|------|------------|---------|
|
|
297
|
+
| `flux.1-schnell` | flux | 4 | 16GB+ | ✅ | Apache 2.0 |
|
|
298
|
+
| `flux.1-dev` | flux | 20 | 20GB+ | ❌ | Non-commercial |
|
|
299
|
+
| `stable-diffusion-3.5-medium` | sd3 | 28 | 8GB+ | ⚠️ | Stability AI |
|
|
300
|
+
| `stable-diffusion-3.5-large` | sd3 | 28 | 12GB+ | ⚠️ | Stability AI |
|
|
301
|
+
| `stable-diffusion-3.5-large-turbo` | sd3 | 4 | 12GB+ | ⚠️ | Stability AI |
|
|
302
|
+
| `stable-diffusion-xl-base` | sdxl | 50 | 6GB+ | ⚠️ | CreativeML |
|
|
303
|
+
| `stable-diffusion-1.5` | sd15 | 50 | 4GB+ | ⚠️ | CreativeML |
|
|
304
|
+
|
|
305
|
+
### Next-Generation Models
|
|
306
|
+
|
|
307
|
+
| Model | Origin | Params | Steps | VRAM | Commercial | License |
|
|
308
|
+
|-------|--------|--------|-------|------|------------|---------|
|
|
309
|
+
| `flux.2-dev` | Black Forest Labs | 32B | 28 | 14GB+ | ❌ | Non-commercial |
|
|
310
|
+
| `flux.2-klein-4b` | Black Forest Labs | 4B | 28 | 10GB+ | ✅ | Apache 2.0 |
|
|
311
|
+
| `z-image-turbo` | Alibaba (Tongyi) | 6B | 8 | 10GB+ | ✅ | Apache 2.0 |
|
|
312
|
+
| `sana-1.5` | NVIDIA | 1.6B | 20 | 8GB+ | ✅ | Apache 2.0 |
|
|
313
|
+
| `cogview4` | Zhipu AI | 6B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
314
|
+
| `kolors` | Kuaishou | 8.6B | 50 | 8GB+ | ✅ | Kolors License |
|
|
315
|
+
| `hunyuan-dit` | Tencent | 1.5B | 50 | 6GB+ | ✅ | Tencent Community |
|
|
316
|
+
| `lumina-2` | Alpha-VLLM | 2B | 30 | 8GB+ | ✅ | Apache 2.0 |
|
|
317
|
+
| `pixart-sigma` | PixArt | 0.6B | 20 | 6GB+ | ✅ | Open |
|
|
318
|
+
| `auraflow` | Fal | 6.8B | 50 | 12GB+ | ✅ | Apache 2.0 |
|
|
319
|
+
| `omnigen` | BAAI | 3.8B | 50 | 12GB+ | ✅ | MIT |
|
|
320
|
+
|
|
321
|
+
### Fast / Turbo Models
|
|
322
|
+
|
|
323
|
+
| Model | Steps | VRAM | Notes |
|
|
324
|
+
|-------|-------|------|-------|
|
|
325
|
+
| `sdxl-turbo` | 1 | 6GB+ | Single-step distilled SDXL |
|
|
326
|
+
| `sdxl-lightning-4step` | 4 | 6GB+ | ByteDance, custom scheduler |
|
|
327
|
+
| `stable-diffusion-3.5-large-turbo` | 4 | 12GB+ | Distilled SD 3.5 Large |
|
|
328
|
+
| `z-image-turbo` | 8 | 10GB+ | Alibaba 6B turbo |
|
|
329
|
+
|
|
330
|
+
### Community Fine-Tunes
|
|
331
|
+
|
|
332
|
+
| Model | Base | Notes |
|
|
333
|
+
|-------|------|-------|
|
|
334
|
+
| `realvisxl-v4` | SDXL | Photorealistic, very popular |
|
|
335
|
+
| `dreamshaper` | SD 1.5 | Versatile artistic model |
|
|
336
|
+
| `realistic-vision-v6` | SD 1.5 | Portrait specialist |
|
|
337
|
+
|
|
338
|
+
### FLUX Pipeline Variants
|
|
339
|
+
|
|
340
|
+
| Model | Pipeline | Use Case |
|
|
341
|
+
|-------|----------|----------|
|
|
342
|
+
| `flux.1-fill-dev` | FluxFillPipeline | Inpainting / outpainting |
|
|
343
|
+
| `flux.1-canny-dev` | FluxControlPipeline | Canny edge control |
|
|
344
|
+
| `flux.1-depth-dev` | FluxControlPipeline | Depth map control |
|
|
259
345
|
|
|
260
346
|
### 💾 GGUF Models - Reduced Memory Requirements
|
|
261
347
|
|
|
262
|
-
|
|
348
|
+
GGUF quantized models enable running FLUX.1-dev on budget hardware:
|
|
263
349
|
|
|
264
350
|
| GGUF Variant | VRAM | Quality | Best For |
|
|
265
351
|
|--------------|------|---------|----------|
|
|
@@ -270,11 +356,6 @@ Choose from a variety of state-of-the-art image generation models:
|
|
|
270
356
|
|
|
271
357
|
📖 **[Complete GGUF Guide](GGUF_GUIDE.md)** - Hardware recommendations, installation, and optimization tips
|
|
272
358
|
|
|
273
|
-
### Why Choose FLUX.1-schnell?
|
|
274
|
-
- **Apache 2.0 license** - Perfect for commercial use
|
|
275
|
-
- **4-step generation** - Lightning fast results
|
|
276
|
-
- **Commercial OK** - Use in your business
|
|
277
|
-
|
|
278
359
|
---
|
|
279
360
|
|
|
280
361
|
## 🎛️ ControlNet Features
|
|
@@ -335,6 +416,16 @@ ollamadiffuser lora unload
|
|
|
335
416
|
ollamadiffuser pull stable-diffusion-1.5
|
|
336
417
|
ollamadiffuser run stable-diffusion-1.5
|
|
337
418
|
|
|
419
|
+
# Model registry management
|
|
420
|
+
ollamadiffuser registry list
|
|
421
|
+
ollamadiffuser registry list --installed-only
|
|
422
|
+
ollamadiffuser registry check-gguf
|
|
423
|
+
|
|
424
|
+
# Configuration management
|
|
425
|
+
ollamadiffuser config # show all config
|
|
426
|
+
ollamadiffuser config set models_dir /mnt/ssd/models # custom model path
|
|
427
|
+
ollamadiffuser config set server.port 9000 # change server port
|
|
428
|
+
|
|
338
429
|
# In another terminal, generate images via API
|
|
339
430
|
curl -X POST http://localhost:8000/api/generate \
|
|
340
431
|
-H "Content-Type: application/json" \
|
|
@@ -366,18 +457,75 @@ Features:
|
|
|
366
457
|
```bash
|
|
367
458
|
# Start API server
|
|
368
459
|
ollamadiffuser --mode api
|
|
369
|
-
|
|
370
460
|
ollamadiffuser load stable-diffusion-1.5
|
|
371
461
|
|
|
372
|
-
#
|
|
462
|
+
# Text-to-image
|
|
373
463
|
curl -X POST http://localhost:8000/api/generate \
|
|
374
464
|
-H "Content-Type: application/json" \
|
|
375
|
-
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024}'
|
|
465
|
+
-d '{"prompt": "a beautiful landscape", "width": 1024, "height": 1024, "seed": 42}'
|
|
466
|
+
|
|
467
|
+
# Image-to-image
|
|
468
|
+
curl -X POST http://localhost:8000/api/generate/img2img \
|
|
469
|
+
-F "prompt=oil painting style" \
|
|
470
|
+
-F "strength=0.75" \
|
|
471
|
+
-F "image=@input.png" \
|
|
472
|
+
--output result.png
|
|
473
|
+
|
|
474
|
+
# Inpainting
|
|
475
|
+
curl -X POST http://localhost:8000/api/generate/inpaint \
|
|
476
|
+
-F "prompt=a red car" \
|
|
477
|
+
-F "image=@photo.png" \
|
|
478
|
+
-F "mask=@mask.png" \
|
|
479
|
+
--output inpainted.png
|
|
480
|
+
|
|
481
|
+
# API docs: http://localhost:8000/docs
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
### MCP Server (AI Assistant Integration)
|
|
485
|
+
|
|
486
|
+
OllamaDiffuser includes a [Model Context Protocol](https://modelcontextprotocol.io/) server for integration with AI assistants like OpenClaw, Claude Code, and Cursor.
|
|
487
|
+
|
|
488
|
+
```bash
|
|
489
|
+
# Install MCP support
|
|
490
|
+
pip install "ollamadiffuser[mcp]"
|
|
491
|
+
|
|
492
|
+
# Start MCP server (stdio transport)
|
|
493
|
+
ollamadiffuser mcp
|
|
494
|
+
```
|
|
376
495
|
|
|
377
|
-
|
|
378
|
-
|
|
496
|
+
**MCP client configuration** (e.g. `claude_desktop_config.json`):
|
|
497
|
+
```json
|
|
498
|
+
{
|
|
499
|
+
"mcpServers": {
|
|
500
|
+
"ollamadiffuser": {
|
|
501
|
+
"command": "ollamadiffuser-mcp"
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
379
505
|
```
|
|
380
506
|
|
|
507
|
+
**Available MCP tools:**
|
|
508
|
+
- `generate_image` -- Generate images from text prompts (auto-loads model)
|
|
509
|
+
- `list_models` -- List available and installed models
|
|
510
|
+
- `load_model` -- Load a model into memory
|
|
511
|
+
- `get_status` -- Check device, loaded model, and system status
|
|
512
|
+
|
|
513
|
+
### OpenClaw AgentSkill
|
|
514
|
+
|
|
515
|
+
An [OpenClaw](https://github.com/openclaw/openclaw) skill is included at `integrations/openclaw/SKILL.md`. It uses the REST API with `response_format=b64_json` for agent-friendly base64 image responses. Copy the skill directory to your OpenClaw skills folder or publish to ClawHub.
|
|
516
|
+
|
|
517
|
+
### Base64 JSON API Response
|
|
518
|
+
|
|
519
|
+
For AI agents and messaging platforms, use `response_format=b64_json` to get images as JSON:
|
|
520
|
+
|
|
521
|
+
```bash
|
|
522
|
+
curl -X POST http://localhost:8000/api/generate \
|
|
523
|
+
-H "Content-Type: application/json" \
|
|
524
|
+
-d '{"prompt": "a sunset over mountains", "response_format": "b64_json"}'
|
|
525
|
+
```
|
|
526
|
+
|
|
527
|
+
Response: `{"image": "<base64 PNG>", "format": "png", "width": 1024, "height": 1024}`
|
|
528
|
+
|
|
381
529
|
### Python API
|
|
382
530
|
```python
|
|
383
531
|
from ollamadiffuser.core.models.manager import model_manager
|
|
@@ -386,30 +534,59 @@ from ollamadiffuser.core.models.manager import model_manager
|
|
|
386
534
|
success = model_manager.load_model("stable-diffusion-1.5")
|
|
387
535
|
if success:
|
|
388
536
|
engine = model_manager.loaded_model
|
|
389
|
-
|
|
390
|
-
#
|
|
537
|
+
|
|
538
|
+
# Text-to-image (seed is optional; omit for random)
|
|
391
539
|
image = engine.generate_image(
|
|
392
540
|
prompt="a beautiful sunset",
|
|
393
541
|
width=1024,
|
|
394
|
-
height=1024
|
|
542
|
+
height=1024,
|
|
543
|
+
seed=42,
|
|
395
544
|
)
|
|
396
545
|
image.save("output.jpg")
|
|
546
|
+
|
|
547
|
+
# Image-to-image
|
|
548
|
+
from PIL import Image
|
|
549
|
+
input_img = Image.open("photo.jpg")
|
|
550
|
+
result = engine.generate_image(
|
|
551
|
+
prompt="watercolor painting",
|
|
552
|
+
image=input_img,
|
|
553
|
+
strength=0.7,
|
|
554
|
+
)
|
|
555
|
+
result.save("img2img_output.jpg")
|
|
397
556
|
else:
|
|
398
557
|
print("Failed to load model")
|
|
399
558
|
```
|
|
400
559
|
|
|
401
|
-
## 📦
|
|
560
|
+
## 📦 Model Ecosystem
|
|
402
561
|
|
|
403
562
|
### Base Models
|
|
404
|
-
- **Stable Diffusion 1.5**: Classic, reliable, fast
|
|
405
|
-
- **Stable Diffusion XL**: High-resolution, detailed
|
|
406
|
-
- **Stable Diffusion 3**:
|
|
407
|
-
- **FLUX.1**:
|
|
563
|
+
- **Stable Diffusion 1.5**: Classic, reliable, fast (img2img + inpainting)
|
|
564
|
+
- **Stable Diffusion XL**: High-resolution, detailed (img2img + inpainting, scheduler overrides)
|
|
565
|
+
- **Stable Diffusion 3.5**: Medium, Large, and Large Turbo variants
|
|
566
|
+
- **FLUX.1**: schnell, dev, Fill, Canny, Depth pipeline variants
|
|
567
|
+
- **HiDream**: Multi-prompt generation with bfloat16
|
|
568
|
+
- **AnimateDiff**: Video/animation generation
|
|
569
|
+
|
|
570
|
+
### Next-Generation Models
|
|
571
|
+
- **FLUX.2**: 32B dev and 4B Klein variants from Black Forest Labs
|
|
572
|
+
- **Chinese Models**: CogView4 (Zhipu), Kolors (Kuaishou), Hunyuan-DiT (Tencent), Z-Image (Alibaba)
|
|
573
|
+
- **Efficient Models**: SANA 1.5 (1.6B), PixArt-Sigma (0.6B) -- high quality at low VRAM
|
|
574
|
+
- **Open Models**: AuraFlow (6.8B, Apache 2.0), OmniGen (3.8B, MIT), Lumina 2.0 (2B, Apache 2.0)
|
|
575
|
+
|
|
576
|
+
### Fast / Turbo Models
|
|
577
|
+
- **SDXL Turbo**: Single-step inference from Stability AI
|
|
578
|
+
- **SDXL Lightning**: 4-step with custom scheduler from ByteDance
|
|
579
|
+
- **Z-Image Turbo**: 8-step turbo from Alibaba
|
|
580
|
+
|
|
581
|
+
### Community Fine-Tunes
|
|
582
|
+
- **RealVisXL V4**: Photorealistic SDXL, very popular
|
|
583
|
+
- **DreamShaper**: Versatile artistic SD 1.5 model
|
|
584
|
+
- **Realistic Vision V6**: Portrait specialist
|
|
408
585
|
|
|
409
586
|
### GGUF Quantized Models
|
|
410
587
|
- **FLUX.1-dev GGUF**: 7 quantization levels (3GB-16GB VRAM)
|
|
411
588
|
- **Memory Efficient**: Run high-quality models on budget hardware
|
|
412
|
-
- **
|
|
589
|
+
- **Optional Install**: `pip install "ollamadiffuser[gguf]"`
|
|
413
590
|
|
|
414
591
|
### ControlNet Models
|
|
415
592
|
- **SD 1.5 ControlNet**: 4 control types (canny, depth, openpose, scribble)
|
|
@@ -421,14 +598,32 @@ else:
|
|
|
421
598
|
- **Dynamic Loading**: Load/unload without model restart
|
|
422
599
|
- **Strength Control**: Adjustable influence (0.1-2.0)
|
|
423
600
|
|
|
424
|
-
## ⚙️
|
|
601
|
+
## ⚙️ Architecture
|
|
602
|
+
|
|
603
|
+
### Strategy Pattern Engine
|
|
604
|
+
Each model type has a dedicated strategy class handling loading and generation:
|
|
425
605
|
|
|
426
|
-
|
|
606
|
+
```
|
|
607
|
+
InferenceEngine (facade)
|
|
608
|
+
-> SD15Strategy (512x512, float32 on MPS, img2img, inpainting)
|
|
609
|
+
-> SDXLStrategy (1024x1024, img2img, inpainting, scheduler overrides)
|
|
610
|
+
-> FluxStrategy (schnell/dev/Fill/Canny/Depth, dynamic pipeline class)
|
|
611
|
+
-> SD3Strategy (1024x1024, 28 steps, guidance=3.5)
|
|
612
|
+
-> ControlNetStrategy (SD15 + SDXL base models)
|
|
613
|
+
-> VideoStrategy (AnimateDiff, 16 frames)
|
|
614
|
+
-> HiDreamStrategy (bfloat16, multi-prompt)
|
|
615
|
+
-> GGUFStrategy (quantized via stable-diffusion-cpp)
|
|
616
|
+
-> GenericPipelineStrategy (any diffusers pipeline via config)
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
The `GenericPipelineStrategy` dynamically loads any `diffusers` pipeline class specified in the model registry, so new models can be added with zero code changes.
|
|
620
|
+
|
|
621
|
+
### Configuration
|
|
427
622
|
Models are automatically configured with optimal settings:
|
|
428
623
|
- **Memory Optimization**: Attention slicing, CPU offloading
|
|
429
624
|
- **Device Detection**: Automatic CUDA/MPS/CPU selection
|
|
430
|
-
- **Precision Handling**: FP16/BF16
|
|
431
|
-
- **Safety
|
|
625
|
+
- **Precision Handling**: FP16/BF16 per model type
|
|
626
|
+
- **Safety Disabled**: Unified `SAFETY_DISABLED_KWARGS` (no monkey-patching)
|
|
432
627
|
|
|
433
628
|
## 🔧 Advanced Usage
|
|
434
629
|
|
|
@@ -503,7 +698,7 @@ with open("control.jpg", "rb") as f:
|
|
|
503
698
|
### Minimum Requirements
|
|
504
699
|
- **RAM**: 8GB system RAM
|
|
505
700
|
- **Storage**: 10GB free space
|
|
506
|
-
- **Python**: 3.
|
|
701
|
+
- **Python**: 3.10+
|
|
507
702
|
|
|
508
703
|
### Recommended Hardware
|
|
509
704
|
|
|
@@ -512,6 +707,12 @@ with open("control.jpg", "rb") as f:
|
|
|
512
707
|
- **RAM**: 16GB+ system RAM
|
|
513
708
|
- **Storage**: SSD with 50GB+ free space
|
|
514
709
|
|
|
710
|
+
#### For Apple Silicon (Mac Mini / MacBook)
|
|
711
|
+
- **16GB unified memory**: PixArt-Sigma, SANA 1.5, DreamShaper, SD 1.5/XL, GGUF q2k-q5ks
|
|
712
|
+
- **24GB+ unified memory**: CogView4, Kolors, Lumina 2.0, GGUF q6k-q8
|
|
713
|
+
- **GGUF with Metal**: Install with `CMAKE_ARGS="-DSD_METAL=ON"` for GPU acceleration
|
|
714
|
+
- Run `ollamadiffuser recommend` to see what fits your hardware
|
|
715
|
+
|
|
515
716
|
#### For GGUF Models (Memory Efficient)
|
|
516
717
|
- **GPU**: 3GB+ VRAM (or CPU only)
|
|
517
718
|
- **RAM**: 8GB+ system RAM (16GB+ for CPU inference)
|
|
@@ -519,7 +720,7 @@ with open("control.jpg", "rb") as f:
|
|
|
519
720
|
|
|
520
721
|
### Supported Platforms
|
|
521
722
|
- **CUDA**: NVIDIA GPUs (recommended)
|
|
522
|
-
- **MPS**: Apple Silicon (M1/M2/M3)
|
|
723
|
+
- **MPS**: Apple Silicon (M1/M2/M3/M4) -- native support for 30+ models including GGUF
|
|
523
724
|
- **CPU**: All platforms (slower but functional)
|
|
524
725
|
|
|
525
726
|
## 🔧 Troubleshooting
|
|
@@ -550,7 +751,7 @@ pip install 'ollamadiffuser[full]'
|
|
|
550
751
|
#### GGUF Support Issues
|
|
551
752
|
```bash
|
|
552
753
|
# Install GGUF dependencies
|
|
553
|
-
pip install
|
|
754
|
+
pip install "ollamadiffuser[gguf]"
|
|
554
755
|
|
|
555
756
|
# Check GGUF support
|
|
556
757
|
ollamadiffuser registry check-gguf
|
|
@@ -689,9 +890,21 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
|
689
890
|
## 🙏 Acknowledgments
|
|
690
891
|
|
|
691
892
|
- **Stability AI**: For Stable Diffusion models
|
|
692
|
-
- **Black Forest Labs**: For FLUX.1 models
|
|
893
|
+
- **Black Forest Labs**: For FLUX.1 and FLUX.2 models
|
|
894
|
+
- **Alibaba (Tongyi-MAI)**: For Z-Image Turbo
|
|
895
|
+
- **NVIDIA (Efficient-Large-Model)**: For SANA 1.5
|
|
896
|
+
- **Zhipu AI (THUDM)**: For CogView4
|
|
897
|
+
- **Kuaishou (Kwai-Kolors)**: For Kolors
|
|
898
|
+
- **Tencent (Hunyuan)**: For Hunyuan-DiT
|
|
899
|
+
- **Alpha-VLLM**: For Lumina 2.0
|
|
900
|
+
- **PixArt-alpha**: For PixArt-Sigma
|
|
901
|
+
- **Fal**: For AuraFlow
|
|
902
|
+
- **BAAI (Shitao)**: For OmniGen
|
|
903
|
+
- **ByteDance**: For SDXL Lightning
|
|
693
904
|
- **city96**: For FLUX.1-dev GGUF quantizations
|
|
694
905
|
- **Hugging Face**: For model hosting and diffusers library
|
|
906
|
+
- **Anthropic**: For Model Context Protocol (MCP)
|
|
907
|
+
- **OpenClaw**: For AI agent ecosystem integration
|
|
695
908
|
- **ControlNet Team**: For ControlNet architecture
|
|
696
909
|
- **Community**: For feedback and contributions
|
|
697
910
|
|