abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvision/__init__.py +18 -3
- abstractvision/__main__.py +8 -0
- abstractvision/artifacts.py +320 -0
- abstractvision/assets/vision_model_capabilities.json +406 -0
- abstractvision/backends/__init__.py +43 -0
- abstractvision/backends/base_backend.py +63 -0
- abstractvision/backends/huggingface_diffusers.py +1503 -0
- abstractvision/backends/openai_compatible.py +325 -0
- abstractvision/backends/stable_diffusion_cpp.py +751 -0
- abstractvision/cli.py +778 -0
- abstractvision/errors.py +19 -0
- abstractvision/integrations/__init__.py +5 -0
- abstractvision/integrations/abstractcore.py +263 -0
- abstractvision/integrations/abstractcore_plugin.py +193 -0
- abstractvision/model_capabilities.py +255 -0
- abstractvision/types.py +95 -0
- abstractvision/vision_manager.py +115 -0
- abstractvision-0.2.1.dist-info/METADATA +243 -0
- abstractvision-0.2.1.dist-info/RECORD +23 -0
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/WHEEL +1 -1
- abstractvision-0.2.1.dist-info/entry_points.txt +5 -0
- abstractvision-0.1.0.dist-info/METADATA +0 -65
- abstractvision-0.1.0.dist-info/RECORD +0 -6
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,751 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import shutil
|
|
5
|
+
import subprocess
|
|
6
|
+
import tempfile
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
|
|
11
|
+
|
|
12
|
+
from ..errors import CapabilityNotSupportedError, OptionalDependencyMissingError
|
|
13
|
+
from ..types import (
|
|
14
|
+
GeneratedAsset,
|
|
15
|
+
ImageEditRequest,
|
|
16
|
+
ImageGenerationRequest,
|
|
17
|
+
ImageToVideoRequest,
|
|
18
|
+
MultiAngleRequest,
|
|
19
|
+
VideoGenerationRequest,
|
|
20
|
+
VisionBackendCapabilities,
|
|
21
|
+
)
|
|
22
|
+
from .base_backend import VisionBackend
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _sniff_mime_type(data: bytes) -> str:
|
|
26
|
+
b = bytes(data or b"")
|
|
27
|
+
if b.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
28
|
+
return "image/png"
|
|
29
|
+
if b.startswith(b"\xff\xd8\xff"):
|
|
30
|
+
return "image/jpeg"
|
|
31
|
+
return "application/octet-stream"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _sniff_ext(data: bytes) -> str:
|
|
35
|
+
mime = _sniff_mime_type(data)
|
|
36
|
+
if mime == "image/png":
|
|
37
|
+
return ".png"
|
|
38
|
+
if mime == "image/jpeg":
|
|
39
|
+
return ".jpg"
|
|
40
|
+
return ".bin"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _require_sd_cli(path: str) -> str:
|
|
44
|
+
p = str(path or "").strip()
|
|
45
|
+
if not p:
|
|
46
|
+
raise OptionalDependencyMissingError(
|
|
47
|
+
"stable-diffusion.cpp executable is not configured. "
|
|
48
|
+
"Set sd_cli_path or install `sd-cli` from https://github.com/leejet/stable-diffusion.cpp/releases "
|
|
49
|
+
"(or install `stable-diffusion-cpp-python` to use pip-installable python bindings). "
|
|
50
|
+
"If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
|
|
51
|
+
"Diffusers backend instead."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# If the user passed a path-like string, validate it exists; otherwise rely on PATH lookup.
|
|
55
|
+
looks_like_path = os.sep in p or (os.altsep and os.altsep in p) or p.startswith(".")
|
|
56
|
+
if looks_like_path:
|
|
57
|
+
if not Path(p).expanduser().exists():
|
|
58
|
+
raise OptionalDependencyMissingError(
|
|
59
|
+
f"stable-diffusion.cpp executable not found at: {p!r}. "
|
|
60
|
+
"Install from https://github.com/leejet/stable-diffusion.cpp/releases or install `stable-diffusion-cpp-python`, "
|
|
61
|
+
"or update sd_cli_path. "
|
|
62
|
+
"If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
|
|
63
|
+
"Diffusers backend instead."
|
|
64
|
+
)
|
|
65
|
+
return p
|
|
66
|
+
|
|
67
|
+
resolved = shutil.which(p)
|
|
68
|
+
if not resolved:
|
|
69
|
+
raise OptionalDependencyMissingError(
|
|
70
|
+
f"stable-diffusion.cpp executable not found in PATH: {p!r}. "
|
|
71
|
+
"Install from https://github.com/leejet/stable-diffusion.cpp/releases or install `stable-diffusion-cpp-python`, "
|
|
72
|
+
"or set sd_cli_path. "
|
|
73
|
+
"If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
|
|
74
|
+
"Diffusers backend instead."
|
|
75
|
+
)
|
|
76
|
+
return resolved
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _flatten(xs: Iterable[Any]) -> List[str]:
|
|
80
|
+
out: List[str] = []
|
|
81
|
+
for x in xs:
|
|
82
|
+
if x is None:
|
|
83
|
+
continue
|
|
84
|
+
if isinstance(x, (list, tuple)):
|
|
85
|
+
out.extend(_flatten(x))
|
|
86
|
+
continue
|
|
87
|
+
s = str(x).strip()
|
|
88
|
+
if s:
|
|
89
|
+
out.append(s)
|
|
90
|
+
return out
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _extra_to_cli_args(extra: Dict[str, Any]) -> List[str]:
|
|
94
|
+
args: List[str] = []
|
|
95
|
+
for k, v in (extra or {}).items():
|
|
96
|
+
if k is None:
|
|
97
|
+
continue
|
|
98
|
+
key = str(k).strip()
|
|
99
|
+
if not key:
|
|
100
|
+
continue
|
|
101
|
+
if key.startswith("-"):
|
|
102
|
+
# Best-effort: allow advanced users to pass raw flags like "--diffusion-fa".
|
|
103
|
+
flag = key
|
|
104
|
+
else:
|
|
105
|
+
flag = "--" + key.replace("_", "-")
|
|
106
|
+
if v is None:
|
|
107
|
+
continue
|
|
108
|
+
if isinstance(v, bool):
|
|
109
|
+
if v:
|
|
110
|
+
args.append(flag)
|
|
111
|
+
continue
|
|
112
|
+
args.extend([flag, str(v)])
|
|
113
|
+
return args
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _parse_sdcpp_extra_args(extra_args: Sequence[str]) -> tuple[Dict[str, Any], Dict[str, Any]]:
|
|
117
|
+
"""Parse CLI-style tokens (from config.extra_args) into python-binding kwargs.
|
|
118
|
+
|
|
119
|
+
We intentionally only support a small, stable subset of sd-cli flags that map cleanly to
|
|
120
|
+
`stable-diffusion-cpp-python` parameters.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
tokens = [str(t) for t in _flatten(extra_args)]
|
|
124
|
+
flags: Dict[str, Any] = {}
|
|
125
|
+
i = 0
|
|
126
|
+
while i < len(tokens):
|
|
127
|
+
t = tokens[i]
|
|
128
|
+
if not t.startswith("--"):
|
|
129
|
+
i += 1
|
|
130
|
+
continue
|
|
131
|
+
key = t[2:].strip().replace("-", "_")
|
|
132
|
+
if not key:
|
|
133
|
+
i += 1
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# bool flag by default; if a value follows and doesn't look like a flag, treat as value.
|
|
137
|
+
value: Any = True
|
|
138
|
+
if i + 1 < len(tokens):
|
|
139
|
+
nxt = tokens[i + 1]
|
|
140
|
+
if nxt and not nxt.startswith("--"):
|
|
141
|
+
value = nxt
|
|
142
|
+
i += 2
|
|
143
|
+
else:
|
|
144
|
+
i += 1
|
|
145
|
+
else:
|
|
146
|
+
i += 1
|
|
147
|
+
flags[key] = value
|
|
148
|
+
|
|
149
|
+
init_kwargs: Dict[str, Any] = {}
|
|
150
|
+
default_generate_kwargs: Dict[str, Any] = {}
|
|
151
|
+
|
|
152
|
+
def _as_int(v: Any, *, flag: str) -> int:
|
|
153
|
+
try:
|
|
154
|
+
return int(v)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
raise ValueError(f"Invalid value for {flag!r}: expected int, got {v!r}") from e
|
|
157
|
+
|
|
158
|
+
def _as_float(v: Any, *, flag: str) -> float:
|
|
159
|
+
try:
|
|
160
|
+
return float(v)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
raise ValueError(f"Invalid value for {flag!r}: expected float, got {v!r}") from e
|
|
163
|
+
|
|
164
|
+
for k, v in flags.items():
|
|
165
|
+
if k == "offload_to_cpu" and bool(v):
|
|
166
|
+
init_kwargs["offload_params_to_cpu"] = True
|
|
167
|
+
elif k == "diffusion_fa" and bool(v):
|
|
168
|
+
init_kwargs["diffusion_flash_attn"] = True
|
|
169
|
+
elif k == "flow_shift":
|
|
170
|
+
init_kwargs["flow_shift"] = _as_float(v, flag="--flow-shift")
|
|
171
|
+
elif k == "sampling_method":
|
|
172
|
+
default_generate_kwargs["sample_method"] = str(v)
|
|
173
|
+
elif k == "steps":
|
|
174
|
+
default_generate_kwargs["sample_steps"] = _as_int(v, flag="--steps")
|
|
175
|
+
elif k == "cfg_scale":
|
|
176
|
+
default_generate_kwargs["cfg_scale"] = _as_float(v, flag="--cfg-scale")
|
|
177
|
+
elif k == "seed":
|
|
178
|
+
default_generate_kwargs["seed"] = _as_int(v, flag="--seed")
|
|
179
|
+
elif k == "width":
|
|
180
|
+
default_generate_kwargs["width"] = _as_int(v, flag="--width")
|
|
181
|
+
elif k == "height":
|
|
182
|
+
default_generate_kwargs["height"] = _as_int(v, flag="--height")
|
|
183
|
+
|
|
184
|
+
return init_kwargs, default_generate_kwargs
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _extra_to_python_generate_kwargs(extra: Dict[str, Any]) -> Dict[str, Any]:
|
|
188
|
+
out: Dict[str, Any] = {}
|
|
189
|
+
|
|
190
|
+
for k, v in (extra or {}).items():
|
|
191
|
+
if k is None or v is None:
|
|
192
|
+
continue
|
|
193
|
+
key = str(k).strip()
|
|
194
|
+
if not key:
|
|
195
|
+
continue
|
|
196
|
+
if key.startswith("-"):
|
|
197
|
+
key = key.lstrip("-")
|
|
198
|
+
key = key.replace("-", "_")
|
|
199
|
+
|
|
200
|
+
# Common aliases between sd-cli and stable-diffusion-cpp-python.
|
|
201
|
+
if key == "sampling_method":
|
|
202
|
+
key = "sample_method"
|
|
203
|
+
elif key == "steps":
|
|
204
|
+
key = "sample_steps"
|
|
205
|
+
elif key in {"guidance_scale", "cfg"}:
|
|
206
|
+
key = "cfg_scale"
|
|
207
|
+
|
|
208
|
+
out[key] = v
|
|
209
|
+
|
|
210
|
+
return out
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _filter_generate_kwargs(model: Any, kwargs: Dict[str, Any]) -> Dict[str, Any]:
|
|
214
|
+
"""Drop keys that stable-diffusion-cpp-python does not accept for generate_image()."""
|
|
215
|
+
|
|
216
|
+
import inspect
|
|
217
|
+
|
|
218
|
+
params = set(inspect.signature(model.generate_image).parameters.keys())
|
|
219
|
+
return {k: v for k, v in kwargs.items() if k in params and v is not None}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _try_read_gguf_architecture(path: str) -> Optional[str]:
|
|
223
|
+
try:
|
|
224
|
+
import struct
|
|
225
|
+
|
|
226
|
+
p = Path(path).expanduser()
|
|
227
|
+
if not p.exists():
|
|
228
|
+
return None
|
|
229
|
+
with p.open("rb") as f:
|
|
230
|
+
magic = f.read(4)
|
|
231
|
+
if magic != b"GGUF":
|
|
232
|
+
return None
|
|
233
|
+
_ver = struct.unpack("<I", f.read(4))[0]
|
|
234
|
+
_tensor_count = struct.unpack("<Q", f.read(8))[0]
|
|
235
|
+
kv_count = struct.unpack("<Q", f.read(8))[0]
|
|
236
|
+
|
|
237
|
+
def read_u32() -> int:
|
|
238
|
+
return struct.unpack("<I", f.read(4))[0]
|
|
239
|
+
|
|
240
|
+
def read_u64() -> int:
|
|
241
|
+
return struct.unpack("<Q", f.read(8))[0]
|
|
242
|
+
|
|
243
|
+
def read_str() -> str:
|
|
244
|
+
n = read_u64()
|
|
245
|
+
return f.read(n).decode("utf-8", errors="replace")
|
|
246
|
+
|
|
247
|
+
GGUF_TYPE_STRING = 8
|
|
248
|
+
GGUF_TYPE_ARRAY = 9
|
|
249
|
+
GGUF_TYPE_UINT64 = 10
|
|
250
|
+
GGUF_TYPE_INT64 = 11
|
|
251
|
+
GGUF_TYPE_FLOAT64 = 12
|
|
252
|
+
|
|
253
|
+
def skip_value(t: int) -> None:
|
|
254
|
+
# scalar sizes
|
|
255
|
+
if t in (0, 1, 7):
|
|
256
|
+
f.read(1)
|
|
257
|
+
return
|
|
258
|
+
if t in (2, 3):
|
|
259
|
+
f.read(2)
|
|
260
|
+
return
|
|
261
|
+
if t in (4, 5, 6):
|
|
262
|
+
f.read(4)
|
|
263
|
+
return
|
|
264
|
+
if t in (GGUF_TYPE_UINT64, GGUF_TYPE_INT64, GGUF_TYPE_FLOAT64):
|
|
265
|
+
f.read(8)
|
|
266
|
+
return
|
|
267
|
+
if t == GGUF_TYPE_STRING:
|
|
268
|
+
n = read_u64()
|
|
269
|
+
f.read(n)
|
|
270
|
+
return
|
|
271
|
+
if t == GGUF_TYPE_ARRAY:
|
|
272
|
+
at = read_u32()
|
|
273
|
+
n = read_u64()
|
|
274
|
+
size = {0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 10: 8, 11: 8, 12: 8}.get(at)
|
|
275
|
+
if size is None:
|
|
276
|
+
# fallback: give up cleanly (we only need the architecture key).
|
|
277
|
+
raise ValueError("unsupported gguf array type")
|
|
278
|
+
f.read(int(n) * int(size))
|
|
279
|
+
return
|
|
280
|
+
raise ValueError("unsupported gguf value type")
|
|
281
|
+
|
|
282
|
+
arch: Optional[str] = None
|
|
283
|
+
for _ in range(int(kv_count)):
|
|
284
|
+
key = read_str()
|
|
285
|
+
t = read_u32()
|
|
286
|
+
if key == "general.architecture" and t == GGUF_TYPE_STRING:
|
|
287
|
+
arch = read_str()
|
|
288
|
+
else:
|
|
289
|
+
skip_value(t)
|
|
290
|
+
return arch
|
|
291
|
+
except Exception:
|
|
292
|
+
return None
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
@dataclass(frozen=True)
|
|
296
|
+
class StableDiffusionCppBackendConfig:
|
|
297
|
+
"""Config for stable-diffusion.cpp backends.
|
|
298
|
+
|
|
299
|
+
This backend is dependency-light by default (stdlib only) and can run via:
|
|
300
|
+
|
|
301
|
+
- External executable (`sd-cli`) from stable-diffusion.cpp releases
|
|
302
|
+
- Optional python bindings (pip-installable): `stable-diffusion-cpp-python`
|
|
303
|
+
|
|
304
|
+
`StableDiffusionCppVisionBackend` auto-selects:
|
|
305
|
+
- `sd-cli` when available
|
|
306
|
+
- otherwise falls back to python bindings when installed
|
|
307
|
+
|
|
308
|
+
External executable:
|
|
309
|
+
https://github.com/leejet/stable-diffusion.cpp
|
|
310
|
+
|
|
311
|
+
You can either provide a single `model` (full model), or provide components:
|
|
312
|
+
- diffusion_model (+ optional vae / llm / clip / t5xxl ...)
|
|
313
|
+
|
|
314
|
+
For Qwen Image GGUF models, stable-diffusion.cpp expects:
|
|
315
|
+
- diffusion_model (GGUF)
|
|
316
|
+
- vae (safetensors)
|
|
317
|
+
- llm (Qwen2.5-VL text encoder in GGUF)
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
sd_cli_path: str = "sd-cli"
|
|
321
|
+
|
|
322
|
+
# Single-file full model
|
|
323
|
+
model: Optional[str] = None
|
|
324
|
+
|
|
325
|
+
# Component mode
|
|
326
|
+
diffusion_model: Optional[str] = None
|
|
327
|
+
vae: Optional[str] = None
|
|
328
|
+
llm: Optional[str] = None
|
|
329
|
+
llm_vision: Optional[str] = None
|
|
330
|
+
clip_l: Optional[str] = None
|
|
331
|
+
clip_g: Optional[str] = None
|
|
332
|
+
t5xxl: Optional[str] = None
|
|
333
|
+
|
|
334
|
+
# Extra args:
|
|
335
|
+
# - CLI mode: forwarded to `sd-cli` (best-effort).
|
|
336
|
+
# - Python mode: a small subset is mapped to python-binding defaults (e.g. --sampling-method, --offload-to-cpu).
|
|
337
|
+
extra_args: Sequence[str] = field(default_factory=tuple)
|
|
338
|
+
|
|
339
|
+
# Safety
|
|
340
|
+
timeout_s: float = 60.0 * 60.0 # 1h (image generation can be slow on CPU)
|
|
341
|
+
cwd: Optional[str] = None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
class StableDiffusionCppVisionBackend(VisionBackend):
|
|
345
|
+
"""Local vision backend that runs stable-diffusion.cpp.
|
|
346
|
+
|
|
347
|
+
Supports: text_to_image and image_to_image (including masks when the model supports it).
|
|
348
|
+
"""
|
|
349
|
+
|
|
350
|
+
def __init__(self, *, config: StableDiffusionCppBackendConfig):
|
|
351
|
+
self._cfg = config
|
|
352
|
+
self._mode: Optional[str] = None # "cli" | "python"
|
|
353
|
+
self._sd_cli_resolved: Optional[str] = None
|
|
354
|
+
self._py_sd: Any = None
|
|
355
|
+
self._py_model: Any = None
|
|
356
|
+
self._py_init_kwargs: Optional[Dict[str, Any]] = None
|
|
357
|
+
self._py_default_generate_kwargs: Optional[Dict[str, Any]] = None
|
|
358
|
+
|
|
359
|
+
def preload(self) -> None:
|
|
360
|
+
# Best-effort: in python-binding mode, construct the model eagerly.
|
|
361
|
+
mode = self._select_mode()
|
|
362
|
+
if mode == "python":
|
|
363
|
+
self._ensure_python_model()
|
|
364
|
+
|
|
365
|
+
def unload(self) -> None:
|
|
366
|
+
# Best-effort: drop python-binding model reference so native memory can be reclaimed.
|
|
367
|
+
self._py_model = None
|
|
368
|
+
self._py_init_kwargs = None
|
|
369
|
+
self._py_default_generate_kwargs = None
|
|
370
|
+
try:
|
|
371
|
+
import gc
|
|
372
|
+
|
|
373
|
+
gc.collect()
|
|
374
|
+
except Exception:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
def get_capabilities(self) -> VisionBackendCapabilities:
|
|
378
|
+
return VisionBackendCapabilities(
|
|
379
|
+
supported_tasks=["text_to_image", "image_to_image"],
|
|
380
|
+
supports_mask=True,
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
def _base_cmd(self) -> List[str]:
|
|
384
|
+
sd_cli = _require_sd_cli(self._cfg.sd_cli_path)
|
|
385
|
+
cmd: List[str] = [sd_cli]
|
|
386
|
+
|
|
387
|
+
model = str(self._cfg.model or "").strip()
|
|
388
|
+
diffusion_model = str(self._cfg.diffusion_model or "").strip()
|
|
389
|
+
if model:
|
|
390
|
+
cmd.extend(["--model", model])
|
|
391
|
+
elif diffusion_model:
|
|
392
|
+
cmd.extend(["--diffusion-model", diffusion_model])
|
|
393
|
+
else:
|
|
394
|
+
raise OptionalDependencyMissingError(
|
|
395
|
+
"StableDiffusionCppVisionBackend is not configured. "
|
|
396
|
+
"Set `model` (full model) or `diffusion_model` (component mode)."
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
if self._cfg.vae:
|
|
400
|
+
cmd.extend(["--vae", str(self._cfg.vae)])
|
|
401
|
+
if self._cfg.llm:
|
|
402
|
+
cmd.extend(["--llm", str(self._cfg.llm)])
|
|
403
|
+
if self._cfg.llm_vision:
|
|
404
|
+
cmd.extend(["--llm_vision", str(self._cfg.llm_vision)])
|
|
405
|
+
if self._cfg.clip_l:
|
|
406
|
+
cmd.extend(["--clip_l", str(self._cfg.clip_l)])
|
|
407
|
+
if self._cfg.clip_g:
|
|
408
|
+
cmd.extend(["--clip_g", str(self._cfg.clip_g)])
|
|
409
|
+
if self._cfg.t5xxl:
|
|
410
|
+
cmd.extend(["--t5xxl", str(self._cfg.t5xxl)])
|
|
411
|
+
|
|
412
|
+
cmd.extend(_flatten(self._cfg.extra_args))
|
|
413
|
+
return cmd
|
|
414
|
+
|
|
415
|
+
def _select_mode(self) -> str:
|
|
416
|
+
if self._mode:
|
|
417
|
+
return self._mode
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
self._sd_cli_resolved = _require_sd_cli(self._cfg.sd_cli_path)
|
|
421
|
+
self._mode = "cli"
|
|
422
|
+
return self._mode
|
|
423
|
+
except OptionalDependencyMissingError as cli_error:
|
|
424
|
+
try:
|
|
425
|
+
import stable_diffusion_cpp # type: ignore
|
|
426
|
+
except Exception as e:
|
|
427
|
+
raise OptionalDependencyMissingError(
|
|
428
|
+
f"{cli_error} Alternatively, install `stable-diffusion-cpp-python` to use the pip-installable "
|
|
429
|
+
"stable-diffusion.cpp python bindings."
|
|
430
|
+
) from e
|
|
431
|
+
|
|
432
|
+
self._py_sd = stable_diffusion_cpp
|
|
433
|
+
self._mode = "python"
|
|
434
|
+
return self._mode
|
|
435
|
+
|
|
436
|
+
def _ensure_python_model(self) -> Any:
|
|
437
|
+
if self._py_model is not None:
|
|
438
|
+
return self._py_model
|
|
439
|
+
|
|
440
|
+
self._select_mode()
|
|
441
|
+
if self._mode != "python":
|
|
442
|
+
raise RuntimeError("Internal error: python model requested while backend is in CLI mode.")
|
|
443
|
+
|
|
444
|
+
init_kwargs, default_generate_kwargs = _parse_sdcpp_extra_args(self._cfg.extra_args)
|
|
445
|
+
self._py_init_kwargs = init_kwargs
|
|
446
|
+
self._py_default_generate_kwargs = default_generate_kwargs
|
|
447
|
+
|
|
448
|
+
model = str(self._cfg.model or "").strip()
|
|
449
|
+
diffusion_model = str(self._cfg.diffusion_model or "").strip()
|
|
450
|
+
if not model and not diffusion_model:
|
|
451
|
+
raise OptionalDependencyMissingError(
|
|
452
|
+
"StableDiffusionCppVisionBackend is not configured. "
|
|
453
|
+
"Set `model` (full model) or `diffusion_model` (component mode)."
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
# stable-diffusion-cpp-python accepts both full model and component paths.
|
|
457
|
+
self._py_model = self._py_sd.StableDiffusion( # type: ignore[attr-defined]
|
|
458
|
+
model_path=model,
|
|
459
|
+
diffusion_model_path=diffusion_model,
|
|
460
|
+
vae_path=str(self._cfg.vae or ""),
|
|
461
|
+
llm_path=str(self._cfg.llm or ""),
|
|
462
|
+
llm_vision_path=str(self._cfg.llm_vision or ""),
|
|
463
|
+
clip_l_path=str(self._cfg.clip_l or ""),
|
|
464
|
+
clip_g_path=str(self._cfg.clip_g or ""),
|
|
465
|
+
t5xxl_path=str(self._cfg.t5xxl or ""),
|
|
466
|
+
**(self._py_init_kwargs or {}),
|
|
467
|
+
)
|
|
468
|
+
return self._py_model
|
|
469
|
+
|
|
470
|
+
def _validate_qwen_image_components(self) -> None:
|
|
471
|
+
diffusion_model = str(self._cfg.diffusion_model or "").strip()
|
|
472
|
+
if not diffusion_model:
|
|
473
|
+
return
|
|
474
|
+
arch = _try_read_gguf_architecture(diffusion_model)
|
|
475
|
+
if arch not in {"qwen_image", "qwen_image_edit"}:
|
|
476
|
+
return
|
|
477
|
+
if not str(self._cfg.vae or "").strip():
|
|
478
|
+
raise OptionalDependencyMissingError("Qwen Image GGUF requires `vae` (e.g. qwen_image_vae.safetensors).")
|
|
479
|
+
if not str(self._cfg.llm or "").strip():
|
|
480
|
+
raise OptionalDependencyMissingError("Qwen Image GGUF requires `llm` (e.g. Qwen2.5-VL-7B-Instruct-*.gguf).")
|
|
481
|
+
|
|
482
|
+
def _run(self, cmd: List[str]) -> None:
|
|
483
|
+
try:
|
|
484
|
+
subprocess.run(
|
|
485
|
+
cmd,
|
|
486
|
+
check=True,
|
|
487
|
+
stdout=subprocess.PIPE,
|
|
488
|
+
stderr=subprocess.PIPE,
|
|
489
|
+
cwd=str(self._cfg.cwd) if self._cfg.cwd else None,
|
|
490
|
+
timeout=float(self._cfg.timeout_s),
|
|
491
|
+
)
|
|
492
|
+
except subprocess.TimeoutExpired as e:
|
|
493
|
+
raise RuntimeError(f"sd-cli timed out after {self._cfg.timeout_s}s") from e
|
|
494
|
+
except subprocess.CalledProcessError as e:
|
|
495
|
+
out = (e.stdout or b"") + b"\n" + (e.stderr or b"")
|
|
496
|
+
msg = out.decode("utf-8", errors="replace")[:4000]
|
|
497
|
+
raise RuntimeError(f"sd-cli failed (exit={e.returncode}). Output:\n{msg}") from e
|
|
498
|
+
except FileNotFoundError as e:
|
|
499
|
+
raise OptionalDependencyMissingError(
|
|
500
|
+
"stable-diffusion.cpp executable not found. "
|
|
501
|
+
"Install `sd-cli` from https://github.com/leejet/stable-diffusion.cpp/releases "
|
|
502
|
+
"or install `stable-diffusion-cpp-python` for pip-installable python bindings, "
|
|
503
|
+
"or set sd_cli_path to the executable path."
|
|
504
|
+
) from e
|
|
505
|
+
|
|
506
|
+
def generate_image(self, request: ImageGenerationRequest) -> GeneratedAsset:
|
|
507
|
+
return self.generate_image_with_progress(request, progress_callback=None)
|
|
508
|
+
|
|
509
|
+
def generate_image_with_progress(
|
|
510
|
+
self,
|
|
511
|
+
request: ImageGenerationRequest,
|
|
512
|
+
progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
|
|
513
|
+
) -> GeneratedAsset:
|
|
514
|
+
self._validate_qwen_image_components()
|
|
515
|
+
mode = self._select_mode()
|
|
516
|
+
if mode == "cli":
|
|
517
|
+
with tempfile.TemporaryDirectory(prefix="abstractvision-sdcpp-") as td:
|
|
518
|
+
out_path = Path(td) / "output.png"
|
|
519
|
+
cmd = self._base_cmd()
|
|
520
|
+
cmd.extend(["--output", str(out_path)])
|
|
521
|
+
cmd.extend(["--prompt", str(request.prompt)])
|
|
522
|
+
|
|
523
|
+
if request.negative_prompt is not None:
|
|
524
|
+
cmd.extend(["--negative-prompt", str(request.negative_prompt)])
|
|
525
|
+
if request.width is not None:
|
|
526
|
+
cmd.extend(["--width", str(int(request.width))])
|
|
527
|
+
if request.height is not None:
|
|
528
|
+
cmd.extend(["--height", str(int(request.height))])
|
|
529
|
+
if request.steps is not None:
|
|
530
|
+
cmd.extend(["--steps", str(int(request.steps))])
|
|
531
|
+
if request.guidance_scale is not None:
|
|
532
|
+
cmd.extend(["--cfg-scale", str(float(request.guidance_scale))])
|
|
533
|
+
if request.seed is not None:
|
|
534
|
+
cmd.extend(["--seed", str(int(request.seed))])
|
|
535
|
+
|
|
536
|
+
cmd.extend(_extra_to_cli_args(request.extra))
|
|
537
|
+
self._run(cmd)
|
|
538
|
+
|
|
539
|
+
data = out_path.read_bytes()
|
|
540
|
+
mime = _sniff_mime_type(data)
|
|
541
|
+
if not mime.startswith("image/"):
|
|
542
|
+
raise ValueError("sd-cli produced an unexpected output format (expected an image).")
|
|
543
|
+
return GeneratedAsset(
|
|
544
|
+
media_type="image",
|
|
545
|
+
data=data,
|
|
546
|
+
mime_type=mime,
|
|
547
|
+
metadata={
|
|
548
|
+
"source": "stable-diffusion.cpp",
|
|
549
|
+
"mode": "cli",
|
|
550
|
+
"sd_cli": str(self._cfg.sd_cli_path),
|
|
551
|
+
"model": self._cfg.model,
|
|
552
|
+
"diffusion_model": self._cfg.diffusion_model,
|
|
553
|
+
},
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
model = self._ensure_python_model()
|
|
557
|
+
kwargs = dict(self._py_default_generate_kwargs or {})
|
|
558
|
+
kwargs.update(
|
|
559
|
+
{
|
|
560
|
+
"prompt": str(request.prompt),
|
|
561
|
+
"negative_prompt": str(request.negative_prompt or ""),
|
|
562
|
+
}
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
if progress_callback is not None:
|
|
566
|
+
zero_based: Dict[str, Optional[bool]] = {"v": None}
|
|
567
|
+
|
|
568
|
+
def _pcb(*args: Any, **_kw: Any) -> bool:
|
|
569
|
+
try:
|
|
570
|
+
step = int(args[0]) if len(args) >= 1 else 0
|
|
571
|
+
total = int(args[1]) if len(args) >= 2 else None
|
|
572
|
+
if zero_based["v"] is None:
|
|
573
|
+
zero_based["v"] = (step == 0)
|
|
574
|
+
if zero_based["v"]:
|
|
575
|
+
step = step + 1
|
|
576
|
+
progress_callback(step, total)
|
|
577
|
+
except Exception:
|
|
578
|
+
pass
|
|
579
|
+
return True
|
|
580
|
+
|
|
581
|
+
kwargs["progress_callback"] = _pcb
|
|
582
|
+
|
|
583
|
+
if request.width is not None:
|
|
584
|
+
kwargs["width"] = int(request.width)
|
|
585
|
+
if request.height is not None:
|
|
586
|
+
kwargs["height"] = int(request.height)
|
|
587
|
+
if request.steps is not None:
|
|
588
|
+
kwargs["sample_steps"] = int(request.steps)
|
|
589
|
+
if request.guidance_scale is not None:
|
|
590
|
+
kwargs["cfg_scale"] = float(request.guidance_scale)
|
|
591
|
+
if request.seed is not None:
|
|
592
|
+
kwargs["seed"] = int(request.seed)
|
|
593
|
+
|
|
594
|
+
kwargs.update(_extra_to_python_generate_kwargs(request.extra))
|
|
595
|
+
kwargs = _filter_generate_kwargs(model, kwargs)
|
|
596
|
+
|
|
597
|
+
images = model.generate_image(**kwargs)
|
|
598
|
+
if not images:
|
|
599
|
+
raise RuntimeError("stable-diffusion.cpp python bindings produced no images.")
|
|
600
|
+
img0 = images[0]
|
|
601
|
+
buf = BytesIO()
|
|
602
|
+
img0.save(buf, format="PNG")
|
|
603
|
+
data = buf.getvalue()
|
|
604
|
+
mime = _sniff_mime_type(data)
|
|
605
|
+
return GeneratedAsset(
|
|
606
|
+
media_type="image",
|
|
607
|
+
data=data,
|
|
608
|
+
mime_type=mime,
|
|
609
|
+
metadata={
|
|
610
|
+
"source": "stable-diffusion.cpp",
|
|
611
|
+
"mode": "python",
|
|
612
|
+
"python_package": getattr(self._py_sd, "__version__", None),
|
|
613
|
+
"model": self._cfg.model,
|
|
614
|
+
"diffusion_model": self._cfg.diffusion_model,
|
|
615
|
+
},
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
def edit_image(self, request: ImageEditRequest) -> GeneratedAsset:
|
|
619
|
+
return self.edit_image_with_progress(request, progress_callback=None)
|
|
620
|
+
|
|
621
|
+
def edit_image_with_progress(
|
|
622
|
+
self,
|
|
623
|
+
request: ImageEditRequest,
|
|
624
|
+
progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
|
|
625
|
+
) -> GeneratedAsset:
|
|
626
|
+
self._validate_qwen_image_components()
|
|
627
|
+
mode = self._select_mode()
|
|
628
|
+
if mode == "cli":
|
|
629
|
+
with tempfile.TemporaryDirectory(prefix="abstractvision-sdcpp-") as td:
|
|
630
|
+
td_p = Path(td)
|
|
631
|
+
init_ext = _sniff_ext(request.image)
|
|
632
|
+
init_path = td_p / f"init{init_ext}"
|
|
633
|
+
init_path.write_bytes(bytes(request.image))
|
|
634
|
+
|
|
635
|
+
mask_path: Optional[Path] = None
|
|
636
|
+
if request.mask is not None:
|
|
637
|
+
mask_ext = _sniff_ext(request.mask)
|
|
638
|
+
mask_path = td_p / f"mask{mask_ext}"
|
|
639
|
+
mask_path.write_bytes(bytes(request.mask))
|
|
640
|
+
|
|
641
|
+
out_path = td_p / "output.png"
|
|
642
|
+
|
|
643
|
+
cmd = self._base_cmd()
|
|
644
|
+
cmd.extend(["--output", str(out_path)])
|
|
645
|
+
cmd.extend(["--prompt", str(request.prompt)])
|
|
646
|
+
cmd.extend(["--init-img", str(init_path)])
|
|
647
|
+
if mask_path is not None:
|
|
648
|
+
cmd.extend(["--mask", str(mask_path)])
|
|
649
|
+
|
|
650
|
+
if request.negative_prompt is not None:
|
|
651
|
+
cmd.extend(["--negative-prompt", str(request.negative_prompt)])
|
|
652
|
+
if request.steps is not None:
|
|
653
|
+
cmd.extend(["--steps", str(int(request.steps))])
|
|
654
|
+
if request.guidance_scale is not None:
|
|
655
|
+
cmd.extend(["--cfg-scale", str(float(request.guidance_scale))])
|
|
656
|
+
if request.seed is not None:
|
|
657
|
+
cmd.extend(["--seed", str(int(request.seed))])
|
|
658
|
+
|
|
659
|
+
cmd.extend(_extra_to_cli_args(request.extra))
|
|
660
|
+
self._run(cmd)
|
|
661
|
+
|
|
662
|
+
data = out_path.read_bytes()
|
|
663
|
+
mime = _sniff_mime_type(data)
|
|
664
|
+
if not mime.startswith("image/"):
|
|
665
|
+
raise ValueError("sd-cli produced an unexpected output format (expected an image).")
|
|
666
|
+
return GeneratedAsset(
|
|
667
|
+
media_type="image",
|
|
668
|
+
data=data,
|
|
669
|
+
mime_type=mime,
|
|
670
|
+
metadata={
|
|
671
|
+
"source": "stable-diffusion.cpp",
|
|
672
|
+
"mode": "cli",
|
|
673
|
+
"sd_cli": str(self._cfg.sd_cli_path),
|
|
674
|
+
"model": self._cfg.model,
|
|
675
|
+
"diffusion_model": self._cfg.diffusion_model,
|
|
676
|
+
},
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
model = self._ensure_python_model()
|
|
680
|
+
kwargs = dict(self._py_default_generate_kwargs or {})
|
|
681
|
+
kwargs.update(
|
|
682
|
+
{
|
|
683
|
+
"prompt": str(request.prompt),
|
|
684
|
+
"negative_prompt": str(request.negative_prompt or ""),
|
|
685
|
+
}
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
if progress_callback is not None:
|
|
689
|
+
zero_based: Dict[str, Optional[bool]] = {"v": None}
|
|
690
|
+
|
|
691
|
+
def _pcb(*args: Any, **_kw: Any) -> bool:
|
|
692
|
+
try:
|
|
693
|
+
step = int(args[0]) if len(args) >= 1 else 0
|
|
694
|
+
total = int(args[1]) if len(args) >= 2 else None
|
|
695
|
+
if zero_based["v"] is None:
|
|
696
|
+
zero_based["v"] = (step == 0)
|
|
697
|
+
if zero_based["v"]:
|
|
698
|
+
step = step + 1
|
|
699
|
+
progress_callback(step, total)
|
|
700
|
+
except Exception:
|
|
701
|
+
pass
|
|
702
|
+
return True
|
|
703
|
+
|
|
704
|
+
kwargs["progress_callback"] = _pcb
|
|
705
|
+
|
|
706
|
+
from PIL import Image # pillow is a dependency of stable-diffusion-cpp-python
|
|
707
|
+
|
|
708
|
+
init_img = Image.open(BytesIO(bytes(request.image)))
|
|
709
|
+
kwargs["init_image"] = init_img
|
|
710
|
+
if request.mask is not None:
|
|
711
|
+
kwargs["mask_image"] = Image.open(BytesIO(bytes(request.mask)))
|
|
712
|
+
|
|
713
|
+
if request.steps is not None:
|
|
714
|
+
kwargs["sample_steps"] = int(request.steps)
|
|
715
|
+
if request.guidance_scale is not None:
|
|
716
|
+
kwargs["cfg_scale"] = float(request.guidance_scale)
|
|
717
|
+
if request.seed is not None:
|
|
718
|
+
kwargs["seed"] = int(request.seed)
|
|
719
|
+
|
|
720
|
+
kwargs.update(_extra_to_python_generate_kwargs(request.extra))
|
|
721
|
+
kwargs = _filter_generate_kwargs(model, kwargs)
|
|
722
|
+
|
|
723
|
+
images = model.generate_image(**kwargs)
|
|
724
|
+
if not images:
|
|
725
|
+
raise RuntimeError("stable-diffusion.cpp python bindings produced no images.")
|
|
726
|
+
img0 = images[0]
|
|
727
|
+
buf = BytesIO()
|
|
728
|
+
img0.save(buf, format="PNG")
|
|
729
|
+
data = buf.getvalue()
|
|
730
|
+
mime = _sniff_mime_type(data)
|
|
731
|
+
return GeneratedAsset(
|
|
732
|
+
media_type="image",
|
|
733
|
+
data=data,
|
|
734
|
+
mime_type=mime,
|
|
735
|
+
metadata={
|
|
736
|
+
"source": "stable-diffusion.cpp",
|
|
737
|
+
"mode": "python",
|
|
738
|
+
"python_package": getattr(self._py_sd, "__version__", None),
|
|
739
|
+
"model": self._cfg.model,
|
|
740
|
+
"diffusion_model": self._cfg.diffusion_model,
|
|
741
|
+
},
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
def generate_angles(self, request: MultiAngleRequest) -> list[GeneratedAsset]:
|
|
745
|
+
raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement multi-view generation.")
|
|
746
|
+
|
|
747
|
+
def generate_video(self, request: VideoGenerationRequest) -> GeneratedAsset:
|
|
748
|
+
raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement text_to_video (phase 2).")
|
|
749
|
+
|
|
750
|
+
def image_to_video(self, request: ImageToVideoRequest) -> GeneratedAsset:
|
|
751
|
+
raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement image_to_video (phase 2).")
|