abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,751 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ import subprocess
6
+ import tempfile
7
+ from dataclasses import dataclass, field
8
+ from io import BytesIO
9
+ from pathlib import Path
10
+ from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence
11
+
12
+ from ..errors import CapabilityNotSupportedError, OptionalDependencyMissingError
13
+ from ..types import (
14
+ GeneratedAsset,
15
+ ImageEditRequest,
16
+ ImageGenerationRequest,
17
+ ImageToVideoRequest,
18
+ MultiAngleRequest,
19
+ VideoGenerationRequest,
20
+ VisionBackendCapabilities,
21
+ )
22
+ from .base_backend import VisionBackend
23
+
24
+
25
+ def _sniff_mime_type(data: bytes) -> str:
26
+ b = bytes(data or b"")
27
+ if b.startswith(b"\x89PNG\r\n\x1a\n"):
28
+ return "image/png"
29
+ if b.startswith(b"\xff\xd8\xff"):
30
+ return "image/jpeg"
31
+ return "application/octet-stream"
32
+
33
+
34
+ def _sniff_ext(data: bytes) -> str:
35
+ mime = _sniff_mime_type(data)
36
+ if mime == "image/png":
37
+ return ".png"
38
+ if mime == "image/jpeg":
39
+ return ".jpg"
40
+ return ".bin"
41
+
42
+
43
+ def _require_sd_cli(path: str) -> str:
44
+ p = str(path or "").strip()
45
+ if not p:
46
+ raise OptionalDependencyMissingError(
47
+ "stable-diffusion.cpp executable is not configured. "
48
+ "Set sd_cli_path or install `sd-cli` from https://github.com/leejet/stable-diffusion.cpp/releases "
49
+ "(or install `stable-diffusion-cpp-python` to use pip-installable python bindings). "
50
+ "If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
51
+ "Diffusers backend instead."
52
+ )
53
+
54
+ # If the user passed a path-like string, validate it exists; otherwise rely on PATH lookup.
55
+ looks_like_path = os.sep in p or (os.altsep and os.altsep in p) or p.startswith(".")
56
+ if looks_like_path:
57
+ if not Path(p).expanduser().exists():
58
+ raise OptionalDependencyMissingError(
59
+ f"stable-diffusion.cpp executable not found at: {p!r}. "
60
+ "Install from https://github.com/leejet/stable-diffusion.cpp/releases or install `stable-diffusion-cpp-python`, "
61
+ "or update sd_cli_path. "
62
+ "If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
63
+ "Diffusers backend instead."
64
+ )
65
+ return p
66
+
67
+ resolved = shutil.which(p)
68
+ if not resolved:
69
+ raise OptionalDependencyMissingError(
70
+ f"stable-diffusion.cpp executable not found in PATH: {p!r}. "
71
+ "Install from https://github.com/leejet/stable-diffusion.cpp/releases or install `stable-diffusion-cpp-python`, "
72
+ "or set sd_cli_path. "
73
+ "If you intended to run a standard Diffusers model (e.g. 'runwayml/stable-diffusion-v1-5'), use the "
74
+ "Diffusers backend instead."
75
+ )
76
+ return resolved
77
+
78
+
79
+ def _flatten(xs: Iterable[Any]) -> List[str]:
80
+ out: List[str] = []
81
+ for x in xs:
82
+ if x is None:
83
+ continue
84
+ if isinstance(x, (list, tuple)):
85
+ out.extend(_flatten(x))
86
+ continue
87
+ s = str(x).strip()
88
+ if s:
89
+ out.append(s)
90
+ return out
91
+
92
+
93
+ def _extra_to_cli_args(extra: Dict[str, Any]) -> List[str]:
94
+ args: List[str] = []
95
+ for k, v in (extra or {}).items():
96
+ if k is None:
97
+ continue
98
+ key = str(k).strip()
99
+ if not key:
100
+ continue
101
+ if key.startswith("-"):
102
+ # Best-effort: allow advanced users to pass raw flags like "--diffusion-fa".
103
+ flag = key
104
+ else:
105
+ flag = "--" + key.replace("_", "-")
106
+ if v is None:
107
+ continue
108
+ if isinstance(v, bool):
109
+ if v:
110
+ args.append(flag)
111
+ continue
112
+ args.extend([flag, str(v)])
113
+ return args
114
+
115
+
116
+ def _parse_sdcpp_extra_args(extra_args: Sequence[str]) -> tuple[Dict[str, Any], Dict[str, Any]]:
117
+ """Parse CLI-style tokens (from config.extra_args) into python-binding kwargs.
118
+
119
+ We intentionally only support a small, stable subset of sd-cli flags that map cleanly to
120
+ `stable-diffusion-cpp-python` parameters.
121
+ """
122
+
123
+ tokens = [str(t) for t in _flatten(extra_args)]
124
+ flags: Dict[str, Any] = {}
125
+ i = 0
126
+ while i < len(tokens):
127
+ t = tokens[i]
128
+ if not t.startswith("--"):
129
+ i += 1
130
+ continue
131
+ key = t[2:].strip().replace("-", "_")
132
+ if not key:
133
+ i += 1
134
+ continue
135
+
136
+ # bool flag by default; if a value follows and doesn't look like a flag, treat as value.
137
+ value: Any = True
138
+ if i + 1 < len(tokens):
139
+ nxt = tokens[i + 1]
140
+ if nxt and not nxt.startswith("--"):
141
+ value = nxt
142
+ i += 2
143
+ else:
144
+ i += 1
145
+ else:
146
+ i += 1
147
+ flags[key] = value
148
+
149
+ init_kwargs: Dict[str, Any] = {}
150
+ default_generate_kwargs: Dict[str, Any] = {}
151
+
152
+ def _as_int(v: Any, *, flag: str) -> int:
153
+ try:
154
+ return int(v)
155
+ except Exception as e:
156
+ raise ValueError(f"Invalid value for {flag!r}: expected int, got {v!r}") from e
157
+
158
+ def _as_float(v: Any, *, flag: str) -> float:
159
+ try:
160
+ return float(v)
161
+ except Exception as e:
162
+ raise ValueError(f"Invalid value for {flag!r}: expected float, got {v!r}") from e
163
+
164
+ for k, v in flags.items():
165
+ if k == "offload_to_cpu" and bool(v):
166
+ init_kwargs["offload_params_to_cpu"] = True
167
+ elif k == "diffusion_fa" and bool(v):
168
+ init_kwargs["diffusion_flash_attn"] = True
169
+ elif k == "flow_shift":
170
+ init_kwargs["flow_shift"] = _as_float(v, flag="--flow-shift")
171
+ elif k == "sampling_method":
172
+ default_generate_kwargs["sample_method"] = str(v)
173
+ elif k == "steps":
174
+ default_generate_kwargs["sample_steps"] = _as_int(v, flag="--steps")
175
+ elif k == "cfg_scale":
176
+ default_generate_kwargs["cfg_scale"] = _as_float(v, flag="--cfg-scale")
177
+ elif k == "seed":
178
+ default_generate_kwargs["seed"] = _as_int(v, flag="--seed")
179
+ elif k == "width":
180
+ default_generate_kwargs["width"] = _as_int(v, flag="--width")
181
+ elif k == "height":
182
+ default_generate_kwargs["height"] = _as_int(v, flag="--height")
183
+
184
+ return init_kwargs, default_generate_kwargs
185
+
186
+
187
+ def _extra_to_python_generate_kwargs(extra: Dict[str, Any]) -> Dict[str, Any]:
188
+ out: Dict[str, Any] = {}
189
+
190
+ for k, v in (extra or {}).items():
191
+ if k is None or v is None:
192
+ continue
193
+ key = str(k).strip()
194
+ if not key:
195
+ continue
196
+ if key.startswith("-"):
197
+ key = key.lstrip("-")
198
+ key = key.replace("-", "_")
199
+
200
+ # Common aliases between sd-cli and stable-diffusion-cpp-python.
201
+ if key == "sampling_method":
202
+ key = "sample_method"
203
+ elif key == "steps":
204
+ key = "sample_steps"
205
+ elif key in {"guidance_scale", "cfg"}:
206
+ key = "cfg_scale"
207
+
208
+ out[key] = v
209
+
210
+ return out
211
+
212
+
213
+ def _filter_generate_kwargs(model: Any, kwargs: Dict[str, Any]) -> Dict[str, Any]:
214
+ """Drop keys that stable-diffusion-cpp-python does not accept for generate_image()."""
215
+
216
+ import inspect
217
+
218
+ params = set(inspect.signature(model.generate_image).parameters.keys())
219
+ return {k: v for k, v in kwargs.items() if k in params and v is not None}
220
+
221
+
222
+ def _try_read_gguf_architecture(path: str) -> Optional[str]:
223
+ try:
224
+ import struct
225
+
226
+ p = Path(path).expanduser()
227
+ if not p.exists():
228
+ return None
229
+ with p.open("rb") as f:
230
+ magic = f.read(4)
231
+ if magic != b"GGUF":
232
+ return None
233
+ _ver = struct.unpack("<I", f.read(4))[0]
234
+ _tensor_count = struct.unpack("<Q", f.read(8))[0]
235
+ kv_count = struct.unpack("<Q", f.read(8))[0]
236
+
237
+ def read_u32() -> int:
238
+ return struct.unpack("<I", f.read(4))[0]
239
+
240
+ def read_u64() -> int:
241
+ return struct.unpack("<Q", f.read(8))[0]
242
+
243
+ def read_str() -> str:
244
+ n = read_u64()
245
+ return f.read(n).decode("utf-8", errors="replace")
246
+
247
+ GGUF_TYPE_STRING = 8
248
+ GGUF_TYPE_ARRAY = 9
249
+ GGUF_TYPE_UINT64 = 10
250
+ GGUF_TYPE_INT64 = 11
251
+ GGUF_TYPE_FLOAT64 = 12
252
+
253
+ def skip_value(t: int) -> None:
254
+ # scalar sizes
255
+ if t in (0, 1, 7):
256
+ f.read(1)
257
+ return
258
+ if t in (2, 3):
259
+ f.read(2)
260
+ return
261
+ if t in (4, 5, 6):
262
+ f.read(4)
263
+ return
264
+ if t in (GGUF_TYPE_UINT64, GGUF_TYPE_INT64, GGUF_TYPE_FLOAT64):
265
+ f.read(8)
266
+ return
267
+ if t == GGUF_TYPE_STRING:
268
+ n = read_u64()
269
+ f.read(n)
270
+ return
271
+ if t == GGUF_TYPE_ARRAY:
272
+ at = read_u32()
273
+ n = read_u64()
274
+ size = {0: 1, 1: 1, 2: 2, 3: 2, 4: 4, 5: 4, 6: 4, 7: 1, 10: 8, 11: 8, 12: 8}.get(at)
275
+ if size is None:
276
+ # fallback: give up cleanly (we only need the architecture key).
277
+ raise ValueError("unsupported gguf array type")
278
+ f.read(int(n) * int(size))
279
+ return
280
+ raise ValueError("unsupported gguf value type")
281
+
282
+ arch: Optional[str] = None
283
+ for _ in range(int(kv_count)):
284
+ key = read_str()
285
+ t = read_u32()
286
+ if key == "general.architecture" and t == GGUF_TYPE_STRING:
287
+ arch = read_str()
288
+ else:
289
+ skip_value(t)
290
+ return arch
291
+ except Exception:
292
+ return None
293
+
294
+
295
+ @dataclass(frozen=True)
296
+ class StableDiffusionCppBackendConfig:
297
+ """Config for stable-diffusion.cpp backends.
298
+
299
+ This backend is dependency-light by default (stdlib only) and can run via:
300
+
301
+ - External executable (`sd-cli`) from stable-diffusion.cpp releases
302
+ - Optional python bindings (pip-installable): `stable-diffusion-cpp-python`
303
+
304
+ `StableDiffusionCppVisionBackend` auto-selects:
305
+ - `sd-cli` when available
306
+ - otherwise falls back to python bindings when installed
307
+
308
+ External executable:
309
+ https://github.com/leejet/stable-diffusion.cpp
310
+
311
+ You can either provide a single `model` (full model), or provide components:
312
+ - diffusion_model (+ optional vae / llm / clip / t5xxl ...)
313
+
314
+ For Qwen Image GGUF models, stable-diffusion.cpp expects:
315
+ - diffusion_model (GGUF)
316
+ - vae (safetensors)
317
+ - llm (Qwen2.5-VL text encoder in GGUF)
318
+ """
319
+
320
+ sd_cli_path: str = "sd-cli"
321
+
322
+ # Single-file full model
323
+ model: Optional[str] = None
324
+
325
+ # Component mode
326
+ diffusion_model: Optional[str] = None
327
+ vae: Optional[str] = None
328
+ llm: Optional[str] = None
329
+ llm_vision: Optional[str] = None
330
+ clip_l: Optional[str] = None
331
+ clip_g: Optional[str] = None
332
+ t5xxl: Optional[str] = None
333
+
334
+ # Extra args:
335
+ # - CLI mode: forwarded to `sd-cli` (best-effort).
336
+ # - Python mode: a small subset is mapped to python-binding defaults (e.g. --sampling-method, --offload-to-cpu).
337
+ extra_args: Sequence[str] = field(default_factory=tuple)
338
+
339
+ # Safety
340
+ timeout_s: float = 60.0 * 60.0 # 1h (image generation can be slow on CPU)
341
+ cwd: Optional[str] = None
342
+
343
+
344
+ class StableDiffusionCppVisionBackend(VisionBackend):
345
+ """Local vision backend that runs stable-diffusion.cpp.
346
+
347
+ Supports: text_to_image and image_to_image (including masks when the model supports it).
348
+ """
349
+
350
+ def __init__(self, *, config: StableDiffusionCppBackendConfig):
351
+ self._cfg = config
352
+ self._mode: Optional[str] = None # "cli" | "python"
353
+ self._sd_cli_resolved: Optional[str] = None
354
+ self._py_sd: Any = None
355
+ self._py_model: Any = None
356
+ self._py_init_kwargs: Optional[Dict[str, Any]] = None
357
+ self._py_default_generate_kwargs: Optional[Dict[str, Any]] = None
358
+
359
+ def preload(self) -> None:
360
+ # Best-effort: in python-binding mode, construct the model eagerly.
361
+ mode = self._select_mode()
362
+ if mode == "python":
363
+ self._ensure_python_model()
364
+
365
+ def unload(self) -> None:
366
+ # Best-effort: drop python-binding model reference so native memory can be reclaimed.
367
+ self._py_model = None
368
+ self._py_init_kwargs = None
369
+ self._py_default_generate_kwargs = None
370
+ try:
371
+ import gc
372
+
373
+ gc.collect()
374
+ except Exception:
375
+ pass
376
+
377
+ def get_capabilities(self) -> VisionBackendCapabilities:
378
+ return VisionBackendCapabilities(
379
+ supported_tasks=["text_to_image", "image_to_image"],
380
+ supports_mask=True,
381
+ )
382
+
383
+ def _base_cmd(self) -> List[str]:
384
+ sd_cli = _require_sd_cli(self._cfg.sd_cli_path)
385
+ cmd: List[str] = [sd_cli]
386
+
387
+ model = str(self._cfg.model or "").strip()
388
+ diffusion_model = str(self._cfg.diffusion_model or "").strip()
389
+ if model:
390
+ cmd.extend(["--model", model])
391
+ elif diffusion_model:
392
+ cmd.extend(["--diffusion-model", diffusion_model])
393
+ else:
394
+ raise OptionalDependencyMissingError(
395
+ "StableDiffusionCppVisionBackend is not configured. "
396
+ "Set `model` (full model) or `diffusion_model` (component mode)."
397
+ )
398
+
399
+ if self._cfg.vae:
400
+ cmd.extend(["--vae", str(self._cfg.vae)])
401
+ if self._cfg.llm:
402
+ cmd.extend(["--llm", str(self._cfg.llm)])
403
+ if self._cfg.llm_vision:
404
+ cmd.extend(["--llm_vision", str(self._cfg.llm_vision)])
405
+ if self._cfg.clip_l:
406
+ cmd.extend(["--clip_l", str(self._cfg.clip_l)])
407
+ if self._cfg.clip_g:
408
+ cmd.extend(["--clip_g", str(self._cfg.clip_g)])
409
+ if self._cfg.t5xxl:
410
+ cmd.extend(["--t5xxl", str(self._cfg.t5xxl)])
411
+
412
+ cmd.extend(_flatten(self._cfg.extra_args))
413
+ return cmd
414
+
415
+ def _select_mode(self) -> str:
416
+ if self._mode:
417
+ return self._mode
418
+
419
+ try:
420
+ self._sd_cli_resolved = _require_sd_cli(self._cfg.sd_cli_path)
421
+ self._mode = "cli"
422
+ return self._mode
423
+ except OptionalDependencyMissingError as cli_error:
424
+ try:
425
+ import stable_diffusion_cpp # type: ignore
426
+ except Exception as e:
427
+ raise OptionalDependencyMissingError(
428
+ f"{cli_error} Alternatively, install `stable-diffusion-cpp-python` to use the pip-installable "
429
+ "stable-diffusion.cpp python bindings."
430
+ ) from e
431
+
432
+ self._py_sd = stable_diffusion_cpp
433
+ self._mode = "python"
434
+ return self._mode
435
+
436
+ def _ensure_python_model(self) -> Any:
437
+ if self._py_model is not None:
438
+ return self._py_model
439
+
440
+ self._select_mode()
441
+ if self._mode != "python":
442
+ raise RuntimeError("Internal error: python model requested while backend is in CLI mode.")
443
+
444
+ init_kwargs, default_generate_kwargs = _parse_sdcpp_extra_args(self._cfg.extra_args)
445
+ self._py_init_kwargs = init_kwargs
446
+ self._py_default_generate_kwargs = default_generate_kwargs
447
+
448
+ model = str(self._cfg.model or "").strip()
449
+ diffusion_model = str(self._cfg.diffusion_model or "").strip()
450
+ if not model and not diffusion_model:
451
+ raise OptionalDependencyMissingError(
452
+ "StableDiffusionCppVisionBackend is not configured. "
453
+ "Set `model` (full model) or `diffusion_model` (component mode)."
454
+ )
455
+
456
+ # stable-diffusion-cpp-python accepts both full model and component paths.
457
+ self._py_model = self._py_sd.StableDiffusion( # type: ignore[attr-defined]
458
+ model_path=model,
459
+ diffusion_model_path=diffusion_model,
460
+ vae_path=str(self._cfg.vae or ""),
461
+ llm_path=str(self._cfg.llm or ""),
462
+ llm_vision_path=str(self._cfg.llm_vision or ""),
463
+ clip_l_path=str(self._cfg.clip_l or ""),
464
+ clip_g_path=str(self._cfg.clip_g or ""),
465
+ t5xxl_path=str(self._cfg.t5xxl or ""),
466
+ **(self._py_init_kwargs or {}),
467
+ )
468
+ return self._py_model
469
+
470
+ def _validate_qwen_image_components(self) -> None:
471
+ diffusion_model = str(self._cfg.diffusion_model or "").strip()
472
+ if not diffusion_model:
473
+ return
474
+ arch = _try_read_gguf_architecture(diffusion_model)
475
+ if arch not in {"qwen_image", "qwen_image_edit"}:
476
+ return
477
+ if not str(self._cfg.vae or "").strip():
478
+ raise OptionalDependencyMissingError("Qwen Image GGUF requires `vae` (e.g. qwen_image_vae.safetensors).")
479
+ if not str(self._cfg.llm or "").strip():
480
+ raise OptionalDependencyMissingError("Qwen Image GGUF requires `llm` (e.g. Qwen2.5-VL-7B-Instruct-*.gguf).")
481
+
482
+ def _run(self, cmd: List[str]) -> None:
483
+ try:
484
+ subprocess.run(
485
+ cmd,
486
+ check=True,
487
+ stdout=subprocess.PIPE,
488
+ stderr=subprocess.PIPE,
489
+ cwd=str(self._cfg.cwd) if self._cfg.cwd else None,
490
+ timeout=float(self._cfg.timeout_s),
491
+ )
492
+ except subprocess.TimeoutExpired as e:
493
+ raise RuntimeError(f"sd-cli timed out after {self._cfg.timeout_s}s") from e
494
+ except subprocess.CalledProcessError as e:
495
+ out = (e.stdout or b"") + b"\n" + (e.stderr or b"")
496
+ msg = out.decode("utf-8", errors="replace")[:4000]
497
+ raise RuntimeError(f"sd-cli failed (exit={e.returncode}). Output:\n{msg}") from e
498
+ except FileNotFoundError as e:
499
+ raise OptionalDependencyMissingError(
500
+ "stable-diffusion.cpp executable not found. "
501
+ "Install `sd-cli` from https://github.com/leejet/stable-diffusion.cpp/releases "
502
+ "or install `stable-diffusion-cpp-python` for pip-installable python bindings, "
503
+ "or set sd_cli_path to the executable path."
504
+ ) from e
505
+
506
+ def generate_image(self, request: ImageGenerationRequest) -> GeneratedAsset:
507
+ return self.generate_image_with_progress(request, progress_callback=None)
508
+
509
+ def generate_image_with_progress(
510
+ self,
511
+ request: ImageGenerationRequest,
512
+ progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
513
+ ) -> GeneratedAsset:
514
+ self._validate_qwen_image_components()
515
+ mode = self._select_mode()
516
+ if mode == "cli":
517
+ with tempfile.TemporaryDirectory(prefix="abstractvision-sdcpp-") as td:
518
+ out_path = Path(td) / "output.png"
519
+ cmd = self._base_cmd()
520
+ cmd.extend(["--output", str(out_path)])
521
+ cmd.extend(["--prompt", str(request.prompt)])
522
+
523
+ if request.negative_prompt is not None:
524
+ cmd.extend(["--negative-prompt", str(request.negative_prompt)])
525
+ if request.width is not None:
526
+ cmd.extend(["--width", str(int(request.width))])
527
+ if request.height is not None:
528
+ cmd.extend(["--height", str(int(request.height))])
529
+ if request.steps is not None:
530
+ cmd.extend(["--steps", str(int(request.steps))])
531
+ if request.guidance_scale is not None:
532
+ cmd.extend(["--cfg-scale", str(float(request.guidance_scale))])
533
+ if request.seed is not None:
534
+ cmd.extend(["--seed", str(int(request.seed))])
535
+
536
+ cmd.extend(_extra_to_cli_args(request.extra))
537
+ self._run(cmd)
538
+
539
+ data = out_path.read_bytes()
540
+ mime = _sniff_mime_type(data)
541
+ if not mime.startswith("image/"):
542
+ raise ValueError("sd-cli produced an unexpected output format (expected an image).")
543
+ return GeneratedAsset(
544
+ media_type="image",
545
+ data=data,
546
+ mime_type=mime,
547
+ metadata={
548
+ "source": "stable-diffusion.cpp",
549
+ "mode": "cli",
550
+ "sd_cli": str(self._cfg.sd_cli_path),
551
+ "model": self._cfg.model,
552
+ "diffusion_model": self._cfg.diffusion_model,
553
+ },
554
+ )
555
+
556
+ model = self._ensure_python_model()
557
+ kwargs = dict(self._py_default_generate_kwargs or {})
558
+ kwargs.update(
559
+ {
560
+ "prompt": str(request.prompt),
561
+ "negative_prompt": str(request.negative_prompt or ""),
562
+ }
563
+ )
564
+
565
+ if progress_callback is not None:
566
+ zero_based: Dict[str, Optional[bool]] = {"v": None}
567
+
568
+ def _pcb(*args: Any, **_kw: Any) -> bool:
569
+ try:
570
+ step = int(args[0]) if len(args) >= 1 else 0
571
+ total = int(args[1]) if len(args) >= 2 else None
572
+ if zero_based["v"] is None:
573
+ zero_based["v"] = (step == 0)
574
+ if zero_based["v"]:
575
+ step = step + 1
576
+ progress_callback(step, total)
577
+ except Exception:
578
+ pass
579
+ return True
580
+
581
+ kwargs["progress_callback"] = _pcb
582
+
583
+ if request.width is not None:
584
+ kwargs["width"] = int(request.width)
585
+ if request.height is not None:
586
+ kwargs["height"] = int(request.height)
587
+ if request.steps is not None:
588
+ kwargs["sample_steps"] = int(request.steps)
589
+ if request.guidance_scale is not None:
590
+ kwargs["cfg_scale"] = float(request.guidance_scale)
591
+ if request.seed is not None:
592
+ kwargs["seed"] = int(request.seed)
593
+
594
+ kwargs.update(_extra_to_python_generate_kwargs(request.extra))
595
+ kwargs = _filter_generate_kwargs(model, kwargs)
596
+
597
+ images = model.generate_image(**kwargs)
598
+ if not images:
599
+ raise RuntimeError("stable-diffusion.cpp python bindings produced no images.")
600
+ img0 = images[0]
601
+ buf = BytesIO()
602
+ img0.save(buf, format="PNG")
603
+ data = buf.getvalue()
604
+ mime = _sniff_mime_type(data)
605
+ return GeneratedAsset(
606
+ media_type="image",
607
+ data=data,
608
+ mime_type=mime,
609
+ metadata={
610
+ "source": "stable-diffusion.cpp",
611
+ "mode": "python",
612
+ "python_package": getattr(self._py_sd, "__version__", None),
613
+ "model": self._cfg.model,
614
+ "diffusion_model": self._cfg.diffusion_model,
615
+ },
616
+ )
617
+
618
+ def edit_image(self, request: ImageEditRequest) -> GeneratedAsset:
619
+ return self.edit_image_with_progress(request, progress_callback=None)
620
+
621
+ def edit_image_with_progress(
622
+ self,
623
+ request: ImageEditRequest,
624
+ progress_callback: Optional[Callable[[int, Optional[int]], None]] = None,
625
+ ) -> GeneratedAsset:
626
+ self._validate_qwen_image_components()
627
+ mode = self._select_mode()
628
+ if mode == "cli":
629
+ with tempfile.TemporaryDirectory(prefix="abstractvision-sdcpp-") as td:
630
+ td_p = Path(td)
631
+ init_ext = _sniff_ext(request.image)
632
+ init_path = td_p / f"init{init_ext}"
633
+ init_path.write_bytes(bytes(request.image))
634
+
635
+ mask_path: Optional[Path] = None
636
+ if request.mask is not None:
637
+ mask_ext = _sniff_ext(request.mask)
638
+ mask_path = td_p / f"mask{mask_ext}"
639
+ mask_path.write_bytes(bytes(request.mask))
640
+
641
+ out_path = td_p / "output.png"
642
+
643
+ cmd = self._base_cmd()
644
+ cmd.extend(["--output", str(out_path)])
645
+ cmd.extend(["--prompt", str(request.prompt)])
646
+ cmd.extend(["--init-img", str(init_path)])
647
+ if mask_path is not None:
648
+ cmd.extend(["--mask", str(mask_path)])
649
+
650
+ if request.negative_prompt is not None:
651
+ cmd.extend(["--negative-prompt", str(request.negative_prompt)])
652
+ if request.steps is not None:
653
+ cmd.extend(["--steps", str(int(request.steps))])
654
+ if request.guidance_scale is not None:
655
+ cmd.extend(["--cfg-scale", str(float(request.guidance_scale))])
656
+ if request.seed is not None:
657
+ cmd.extend(["--seed", str(int(request.seed))])
658
+
659
+ cmd.extend(_extra_to_cli_args(request.extra))
660
+ self._run(cmd)
661
+
662
+ data = out_path.read_bytes()
663
+ mime = _sniff_mime_type(data)
664
+ if not mime.startswith("image/"):
665
+ raise ValueError("sd-cli produced an unexpected output format (expected an image).")
666
+ return GeneratedAsset(
667
+ media_type="image",
668
+ data=data,
669
+ mime_type=mime,
670
+ metadata={
671
+ "source": "stable-diffusion.cpp",
672
+ "mode": "cli",
673
+ "sd_cli": str(self._cfg.sd_cli_path),
674
+ "model": self._cfg.model,
675
+ "diffusion_model": self._cfg.diffusion_model,
676
+ },
677
+ )
678
+
679
+ model = self._ensure_python_model()
680
+ kwargs = dict(self._py_default_generate_kwargs or {})
681
+ kwargs.update(
682
+ {
683
+ "prompt": str(request.prompt),
684
+ "negative_prompt": str(request.negative_prompt or ""),
685
+ }
686
+ )
687
+
688
+ if progress_callback is not None:
689
+ zero_based: Dict[str, Optional[bool]] = {"v": None}
690
+
691
+ def _pcb(*args: Any, **_kw: Any) -> bool:
692
+ try:
693
+ step = int(args[0]) if len(args) >= 1 else 0
694
+ total = int(args[1]) if len(args) >= 2 else None
695
+ if zero_based["v"] is None:
696
+ zero_based["v"] = (step == 0)
697
+ if zero_based["v"]:
698
+ step = step + 1
699
+ progress_callback(step, total)
700
+ except Exception:
701
+ pass
702
+ return True
703
+
704
+ kwargs["progress_callback"] = _pcb
705
+
706
+ from PIL import Image # pillow is a dependency of stable-diffusion-cpp-python
707
+
708
+ init_img = Image.open(BytesIO(bytes(request.image)))
709
+ kwargs["init_image"] = init_img
710
+ if request.mask is not None:
711
+ kwargs["mask_image"] = Image.open(BytesIO(bytes(request.mask)))
712
+
713
+ if request.steps is not None:
714
+ kwargs["sample_steps"] = int(request.steps)
715
+ if request.guidance_scale is not None:
716
+ kwargs["cfg_scale"] = float(request.guidance_scale)
717
+ if request.seed is not None:
718
+ kwargs["seed"] = int(request.seed)
719
+
720
+ kwargs.update(_extra_to_python_generate_kwargs(request.extra))
721
+ kwargs = _filter_generate_kwargs(model, kwargs)
722
+
723
+ images = model.generate_image(**kwargs)
724
+ if not images:
725
+ raise RuntimeError("stable-diffusion.cpp python bindings produced no images.")
726
+ img0 = images[0]
727
+ buf = BytesIO()
728
+ img0.save(buf, format="PNG")
729
+ data = buf.getvalue()
730
+ mime = _sniff_mime_type(data)
731
+ return GeneratedAsset(
732
+ media_type="image",
733
+ data=data,
734
+ mime_type=mime,
735
+ metadata={
736
+ "source": "stable-diffusion.cpp",
737
+ "mode": "python",
738
+ "python_package": getattr(self._py_sd, "__version__", None),
739
+ "model": self._cfg.model,
740
+ "diffusion_model": self._cfg.diffusion_model,
741
+ },
742
+ )
743
+
744
+ def generate_angles(self, request: MultiAngleRequest) -> list[GeneratedAsset]:
745
+ raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement multi-view generation.")
746
+
747
+ def generate_video(self, request: VideoGenerationRequest) -> GeneratedAsset:
748
+ raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement text_to_video (phase 2).")
749
+
750
+ def image_to_video(self, request: ImageToVideoRequest) -> GeneratedAsset:
751
+ raise CapabilityNotSupportedError("StableDiffusionCppVisionBackend does not implement image_to_video (phase 2).")