abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  81. abstractcore-2.9.1.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1319 @@
1
+ """
2
+ OpenAI-compatible vision generation endpoints for AbstractCore Server.
3
+
4
+ This module is intentionally dependency-light:
5
+ - It does not import `abstractvision` unless the endpoints are actually used.
6
+
7
+ Design notes:
8
+ - AbstractCore Server is a gateway; vision generation is delegated to AbstractVision backends.
9
+ - This router can act as a thin "vision proxy" to any upstream that implements
10
+ `/images/generations` and `/images/edits`, or run local backends (Diffusers / stable-diffusion.cpp).
11
+
12
+ Out-of-the-box behavior:
13
+ - If `ABSTRACTCORE_VISION_BACKEND` is not set, the router defaults to `auto`.
14
+ - In `auto` mode, the backend is inferred per-request:
15
+ - Hugging Face repo ids like `org/model` -> Diffusers backend
16
+ - Local `.gguf` file paths -> stable-diffusion.cpp backend
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import base64
22
+ import json
23
+ import os
24
+ import platform
25
+ import shlex
26
+ import time
27
+ import threading
28
+ import uuid
29
+ from pathlib import Path
30
+ from typing import Any, Dict, Optional, Tuple
31
+
32
+ from fastapi import APIRouter, Body, File, Form, HTTPException, UploadFile
33
+
34
+ try: # Optional dependency (needed only for multipart parsing).
35
+ import multipart # type: ignore # noqa: F401
36
+
37
+ _HAS_MULTIPART = True
38
+ except Exception: # pragma: no cover
39
+ _HAS_MULTIPART = False
40
+
41
+
42
+ router = APIRouter(tags=["vision"])
43
+
44
+ _BACKEND_CACHE_LOCK = threading.Lock()
45
+ _BACKEND_CACHE: Dict[Tuple[Any, ...], Tuple[Any, threading.Lock, float]] = {}
46
+
47
+ _ACTIVE_LOCK = threading.Lock()
48
+ _ACTIVE_MODEL_ID: Optional[str] = None
49
+ _ACTIVE_BACKEND_KIND: Optional[str] = None
50
+ _ACTIVE_BACKEND: Any = None
51
+ _ACTIVE_CALL_LOCK: Optional[threading.Lock] = None
52
+ _ACTIVE_LOADED_AT_S: Optional[float] = None
53
+
54
+ _JOBS_LOCK = threading.Lock()
55
+ _JOBS: Dict[str, Dict[str, Any]] = {}
56
+
57
+
58
+ def _jobs_max() -> int:
59
+ raw = _env("ABSTRACTCORE_VISION_JOBS_MAX", "8") or "8"
60
+ try:
61
+ n = int(str(raw).strip())
62
+ except Exception:
63
+ n = 8
64
+ return max(1, min(n, 64))
65
+
66
+
67
+ def _jobs_ttl_s() -> float:
68
+ raw = _env("ABSTRACTCORE_VISION_JOBS_TTL_S", "600") or "600"
69
+ try:
70
+ v = float(str(raw).strip())
71
+ except Exception:
72
+ v = 600.0
73
+ return max(10.0, min(v, 24.0 * 3600.0))
74
+
75
+
76
+ def _new_job_id() -> str:
77
+ return uuid.uuid4().hex
78
+
79
+
80
+ def _jobs_cleanup_locked(*, now_s: float) -> None:
81
+ ttl = _jobs_ttl_s()
82
+ # Drop old completed jobs.
83
+ for jid, job in list(_JOBS.items()):
84
+ state = str(job.get("state") or "")
85
+ if state not in {"succeeded", "failed"}:
86
+ continue
87
+ updated = float(job.get("updated_at_s") or 0.0)
88
+ if updated and (now_s - updated) > ttl:
89
+ _JOBS.pop(jid, None)
90
+
91
+ # Enforce size bound (drop oldest completed first).
92
+ max_entries = _jobs_max()
93
+ if len(_JOBS) <= max_entries:
94
+ return
95
+ items = sorted(_JOBS.items(), key=lambda kv: float(kv[1].get("updated_at_s") or kv[1].get("created_at_s") or 0.0))
96
+ for jid, job in items:
97
+ if len(_JOBS) <= max_entries:
98
+ break
99
+ state = str(job.get("state") or "")
100
+ if state in {"succeeded", "failed"}:
101
+ _JOBS.pop(jid, None)
102
+
103
+ # If still too many (all in-flight), drop the oldest anyway (best-effort).
104
+ if len(_JOBS) > max_entries:
105
+ items = sorted(
106
+ _JOBS.items(),
107
+ key=lambda kv: float(kv[1].get("created_at_s") or 0.0),
108
+ )
109
+ for jid, _job in items[: max(0, len(_JOBS) - max_entries)]:
110
+ _JOBS.pop(jid, None)
111
+
112
+
113
+ def _any_inflight_job_locked() -> bool:
114
+ return any(str(j.get("state") or "") in {"queued", "running"} for j in _JOBS.values())
115
+
116
+
117
+ def _job_update_progress(job_id: str, *, step: Optional[int], total: Optional[int], message: Optional[str] = None) -> None:
118
+ with _JOBS_LOCK:
119
+ job = _JOBS.get(job_id)
120
+ if job is None:
121
+ return
122
+ prog = job.get("progress")
123
+ if not isinstance(prog, dict):
124
+ prog = {}
125
+ job["progress"] = prog
126
+ if step is not None:
127
+ prog["step"] = int(step)
128
+ if total is not None:
129
+ prog["total_steps"] = int(total)
130
+ if message is not None:
131
+ prog["message"] = str(message)
132
+ job["updated_at_s"] = time.time()
133
+
134
+
135
+ def _job_finish(job_id: str, *, ok: bool, result: Optional[Dict[str, Any]] = None, error: Optional[str] = None) -> None:
136
+ with _JOBS_LOCK:
137
+ job = _JOBS.get(job_id)
138
+ if job is None:
139
+ return
140
+ job["state"] = "succeeded" if ok else "failed"
141
+ job["updated_at_s"] = time.time()
142
+ if ok:
143
+ job["result"] = result
144
+ job.pop("error", None)
145
+ else:
146
+ job["error"] = str(error or "Unknown error")
147
+ job.pop("result", None)
148
+
149
+
150
+ def _get_or_create_cached_backend(key: Tuple[Any, ...], factory):
151
+ with _BACKEND_CACHE_LOCK:
152
+ now = time.time()
153
+ cached = _BACKEND_CACHE.get(key)
154
+ if cached is not None:
155
+ backend, call_lock, _ts = cached
156
+ _BACKEND_CACHE[key] = (backend, call_lock, now)
157
+ return backend, call_lock
158
+
159
+ backend = factory()
160
+ call_lock = threading.Lock()
161
+ _BACKEND_CACHE[key] = (backend, call_lock, now)
162
+
163
+ max_entries_raw = _env("ABSTRACTCORE_VISION_BACKEND_CACHE_MAX", "4") or "4"
164
+ try:
165
+ max_entries = int(str(max_entries_raw).strip())
166
+ except Exception:
167
+ max_entries = 4
168
+ max_entries = max(1, min(int(max_entries), 64))
169
+
170
+ if len(_BACKEND_CACHE) > max_entries:
171
+ # Evict least-recently-used backends (best-effort).
172
+ items = sorted(_BACKEND_CACHE.items(), key=lambda kv: kv[1][2])
173
+ for k, _ in items[: max(0, len(_BACKEND_CACHE) - max_entries)]:
174
+ if k == key:
175
+ continue
176
+ _BACKEND_CACHE.pop(k, None)
177
+
178
+ return backend, call_lock
179
+
180
+
181
+ def _env(name: str, default: Optional[str] = None) -> Optional[str]:
182
+ v = os.getenv(name)
183
+ if v is None:
184
+ return default
185
+ s = str(v).strip()
186
+ return s if s else default
187
+
188
+
189
+ def _env_bool(name: str, default: bool = False) -> bool:
190
+ v = _env(name)
191
+ if v is None:
192
+ return bool(default)
193
+ return str(v).strip().lower() in {"1", "true", "yes", "on"}
194
+
195
+
196
+ def _active_state() -> Dict[str, Any]:
197
+ with _ACTIVE_LOCK:
198
+ return {
199
+ "model_id": _ACTIVE_MODEL_ID,
200
+ "backend_kind": _ACTIVE_BACKEND_KIND,
201
+ "loaded_at_s": _ACTIVE_LOADED_AT_S,
202
+ "has_backend": _ACTIVE_BACKEND is not None,
203
+ }
204
+
205
+
206
+ def _get_active_backend() -> Tuple[Optional[str], Optional[str], Any, Optional[threading.Lock]]:
207
+ with _ACTIVE_LOCK:
208
+ return _ACTIVE_MODEL_ID, _ACTIVE_BACKEND_KIND, _ACTIVE_BACKEND, _ACTIVE_CALL_LOCK
209
+
210
+
211
+ def _unload_backend_best_effort(backend: Any) -> None:
212
+ unload = getattr(backend, "unload", None)
213
+ if callable(unload):
214
+ unload()
215
+
216
+ # Extra best-effort GC to drop references ASAP.
217
+ try:
218
+ import gc
219
+
220
+ gc.collect()
221
+ except Exception:
222
+ pass
223
+
224
+
225
+ def _unload_active_backend() -> None:
226
+ global _ACTIVE_MODEL_ID, _ACTIVE_BACKEND_KIND, _ACTIVE_BACKEND, _ACTIVE_CALL_LOCK, _ACTIVE_LOADED_AT_S
227
+ with _ACTIVE_LOCK:
228
+ backend = _ACTIVE_BACKEND
229
+ call_lock = _ACTIVE_CALL_LOCK
230
+ _ACTIVE_MODEL_ID = None
231
+ _ACTIVE_BACKEND_KIND = None
232
+ _ACTIVE_BACKEND = None
233
+ _ACTIVE_CALL_LOCK = None
234
+ _ACTIVE_LOADED_AT_S = None
235
+
236
+ if backend is not None:
237
+ if call_lock is not None:
238
+ with call_lock:
239
+ _unload_backend_best_effort(backend)
240
+ else:
241
+ _unload_backend_best_effort(backend)
242
+
243
+
244
+ def _vision_backend_kind() -> str:
245
+ raw = _env("ABSTRACTCORE_VISION_BACKEND")
246
+ if not raw:
247
+ return "auto"
248
+ v = str(raw).strip().lower()
249
+ if v in {"auto", "default"}:
250
+ return "auto"
251
+ if v in {"openai", "openai-compatible", "openai_compatible", "proxy", "openai_compatible_proxy"}:
252
+ return "openai_compatible_proxy"
253
+ if v in {"diffusers", "hf-diffusers", "huggingface-diffusers"}:
254
+ return "diffusers"
255
+ if v in {"sdcpp", "sd-cpp", "stable-diffusion.cpp", "stable-diffusion-cpp", "stable_diffusion_cpp"}:
256
+ return "sdcpp"
257
+ return v
258
+
259
+
260
+ def _looks_like_filesystem_path(model: str) -> bool:
261
+ s = str(model or "").strip()
262
+ if not s:
263
+ return False
264
+ if s.startswith(("~", "./", "../")):
265
+ return True
266
+ if s.startswith(("/", "\\")):
267
+ return True
268
+ if s.startswith("file:"):
269
+ return True
270
+ # Windows drive letters (e.g. C:\path\to\file.gguf)
271
+ if len(s) >= 3 and s[1:3] == ":\\":
272
+ return True
273
+ # Common local weight formats (we care most about gguf for stable-diffusion.cpp).
274
+ if s.lower().endswith((".gguf", ".safetensors", ".ckpt", ".pt", ".pth", ".bin")):
275
+ return True
276
+ return False
277
+
278
+
279
+ def _looks_like_hf_repo_id(model: str) -> bool:
280
+ s = str(model or "").strip()
281
+ if not s:
282
+ return False
283
+ if _looks_like_filesystem_path(s):
284
+ return False
285
+ if "://" in s:
286
+ return False
287
+ parts = s.split("/")
288
+ if len(parts) != 2:
289
+ return False
290
+ org, name = parts
291
+ return bool(org and name)
292
+
293
+
294
+ def _infer_backend_kind(request_model: Any) -> str:
295
+ model = str(request_model or "").strip()
296
+ if model:
297
+ if _looks_like_filesystem_path(model):
298
+ return "sdcpp"
299
+ # Default: treat HF-style ids as Diffusers.
300
+ if _looks_like_hf_repo_id(model):
301
+ return "diffusers"
302
+ # Unknown strings: prefer local generation unless proxy is explicitly configured.
303
+ if _env("ABSTRACTCORE_VISION_UPSTREAM_BASE_URL"):
304
+ return "openai_compatible_proxy"
305
+ return "diffusers"
306
+
307
+ # No request model: fall back to env configuration.
308
+ if _env("ABSTRACTCORE_VISION_MODEL_ID"):
309
+ return "diffusers"
310
+ if _env("ABSTRACTCORE_VISION_SDCPP_MODEL") or _env("ABSTRACTCORE_VISION_SDCPP_DIFFUSION_MODEL"):
311
+ return "sdcpp"
312
+ if _env("ABSTRACTCORE_VISION_UPSTREAM_BASE_URL"):
313
+ return "openai_compatible_proxy"
314
+ return "auto_unconfigured"
315
+
316
+
317
+ def _effective_backend_kind(request_model: Any) -> str:
318
+ env_kind = _vision_backend_kind()
319
+ if env_kind == "auto":
320
+ return _infer_backend_kind(request_model)
321
+
322
+ model = str(request_model or "").strip()
323
+ if model and env_kind == "sdcpp" and _looks_like_hf_repo_id(model):
324
+ # Common misconfiguration: user set SDCPP backend but selected a Diffusers model id.
325
+ return "diffusers"
326
+ if model and env_kind == "diffusers" and _looks_like_filesystem_path(model):
327
+ # Common misconfiguration: user set Diffusers backend but passed a local gguf path.
328
+ return "sdcpp"
329
+
330
+ return env_kind
331
+
332
+
333
+ def _default_hf_hub_cache_dirs() -> list[Path]:
334
+ dirs: list[str] = []
335
+ # Explicit overrides.
336
+ for k in ("HF_HUB_CACHE", "HF_HUB_CACHE_DIR"):
337
+ v = _env(k)
338
+ if v:
339
+ dirs.append(v)
340
+
341
+ # HF_HOME implies <HF_HOME>/hub.
342
+ hf_home = _env("HF_HOME")
343
+ if hf_home:
344
+ dirs.append(str(Path(hf_home).expanduser() / "hub"))
345
+
346
+ # Other common env vars used by Transformers/Diffusers. These may or may not be hub-style dirs.
347
+ for k in ("TRANSFORMERS_CACHE", "DIFFUSERS_CACHE"):
348
+ v = _env(k)
349
+ if v:
350
+ dirs.append(v)
351
+
352
+ # Default from huggingface_hub if available.
353
+ try:
354
+ from huggingface_hub.constants import HF_HUB_CACHE # type: ignore
355
+
356
+ dirs.append(str(HF_HUB_CACHE))
357
+ except Exception:
358
+ # Fallback: common default.
359
+ dirs.append(str(Path.home() / ".cache" / "huggingface" / "hub"))
360
+
361
+ out: list[Path] = []
362
+ seen: set[str] = set()
363
+ for d in dirs:
364
+ p = Path(d).expanduser()
365
+ key = str(p)
366
+ if key in seen:
367
+ continue
368
+ seen.add(key)
369
+ if p.is_dir():
370
+ out.append(p)
371
+ return out
372
+
373
+
374
+ def _is_hf_model_cached(model_id: str, cache_dirs: list[Path]) -> bool:
375
+ s = str(model_id or "").strip()
376
+ if "/" not in s:
377
+ return False
378
+ # HF hub cache uses folder names like: models--org--name
379
+ folder = "models--" + s.replace("/", "--")
380
+ for base in cache_dirs:
381
+ snaps = base / folder / "snapshots"
382
+ try:
383
+ if snaps.is_dir() and any(snaps.iterdir()):
384
+ return True
385
+ except Exception:
386
+ continue
387
+ return False
388
+
389
+
390
+ def _default_lmstudio_model_dirs() -> list[Path]:
391
+ dirs: list[str] = []
392
+ for k in ("LMSTUDIO_MODELS_DIR", "LMSTUDIO_MODEL_DIR", "LM_STUDIO_MODELS_DIR"):
393
+ v = _env(k)
394
+ if v:
395
+ dirs.append(v)
396
+
397
+ sysname = platform.system().lower()
398
+ home = Path.home()
399
+ if sysname == "darwin":
400
+ dirs.append(str(home / "Library" / "Application Support" / "LM Studio" / "models"))
401
+ elif sysname == "linux":
402
+ dirs.append(str(home / ".cache" / "lm-studio" / "models"))
403
+ dirs.append(str(home / ".cache" / "lmstudio" / "models"))
404
+ elif sysname == "windows":
405
+ local = os.getenv("LOCALAPPDATA") or ""
406
+ roaming = os.getenv("APPDATA") or ""
407
+ if local:
408
+ dirs.append(str(Path(local) / "LM Studio" / "models"))
409
+ if roaming:
410
+ dirs.append(str(Path(roaming) / "LM Studio" / "models"))
411
+
412
+ out: list[Path] = []
413
+ seen: set[str] = set()
414
+ for d in dirs:
415
+ p = Path(d).expanduser()
416
+ key = str(p)
417
+ if key in seen:
418
+ continue
419
+ seen.add(key)
420
+ if p.is_dir():
421
+ out.append(p)
422
+ return out
423
+
424
+
425
+ def _is_lmstudio_model_cached(model_id: str, cache_dirs: list[Path]) -> bool:
426
+ s = str(model_id or "").strip()
427
+ if "/" not in s:
428
+ return False
429
+ org, name = s.split("/", 1)
430
+ for base in cache_dirs:
431
+ p = base / org / name
432
+ try:
433
+ if p.is_dir() and any(p.iterdir()):
434
+ return True
435
+ except Exception:
436
+ continue
437
+ return False
438
+
439
+
440
+ def _require_upstream_base_url() -> str:
441
+ base_url = _env("ABSTRACTCORE_VISION_UPSTREAM_BASE_URL")
442
+ if not base_url:
443
+ raise HTTPException(
444
+ status_code=501,
445
+ detail=(
446
+ "Vision image endpoints are not configured. "
447
+ "Set ABSTRACTCORE_VISION_UPSTREAM_BASE_URL to an OpenAI-compatible server base URL "
448
+ "(e.g. https://api.openai.com/v1 or http://localhost:1234/v1)."
449
+ ),
450
+ )
451
+ return base_url
452
+
453
+
454
+ def _require_diffusers_model_id(request_model: Any) -> str:
455
+ model_id = str(request_model or _env("ABSTRACTCORE_VISION_MODEL_ID") or "").strip()
456
+ if not model_id:
457
+ raise HTTPException(
458
+ status_code=501,
459
+ detail=(
460
+ "Vision image endpoints are not configured for diffusers mode. "
461
+ "Set ABSTRACTCORE_VISION_MODEL_ID (and optionally ABSTRACTCORE_VISION_BACKEND=diffusers), "
462
+ "or pass `model` in the request."
463
+ ),
464
+ )
465
+ return model_id
466
+
467
+
468
+ def _require_sdcpp_model_or_diffusion_model(request_model: Any) -> Tuple[Optional[str], Optional[str]]:
469
+ req = str(request_model or "").strip()
470
+ if req and not _looks_like_filesystem_path(req):
471
+ raise HTTPException(
472
+ status_code=400,
473
+ detail=(
474
+ "stable-diffusion.cpp backend expects a local model path (typically a .gguf file). "
475
+ f"Got model={req!r}. If you intended to run a Hugging Face model id (e.g. 'runwayml/stable-diffusion-v1-5'), "
476
+ "use the Diffusers backend (or set ABSTRACTCORE_VISION_BACKEND=auto)."
477
+ ),
478
+ )
479
+
480
+ env_model = str(_env("ABSTRACTCORE_VISION_SDCPP_MODEL") or "").strip()
481
+ env_diffusion = str(_env("ABSTRACTCORE_VISION_SDCPP_DIFFUSION_MODEL") or "").strip()
482
+
483
+ # Request model overrides env defaults.
484
+ if req:
485
+ # If the user configured component paths, treat the request as diffusion_model (component mode).
486
+ component_mode = any(
487
+ str(_env(k) or "").strip()
488
+ for k in (
489
+ "ABSTRACTCORE_VISION_SDCPP_VAE",
490
+ "ABSTRACTCORE_VISION_SDCPP_LLM",
491
+ "ABSTRACTCORE_VISION_SDCPP_LLM_VISION",
492
+ "ABSTRACTCORE_VISION_SDCPP_CLIP_L",
493
+ "ABSTRACTCORE_VISION_SDCPP_CLIP_G",
494
+ "ABSTRACTCORE_VISION_SDCPP_T5XXL",
495
+ )
496
+ )
497
+ return (None, req) if component_mode else (req, None)
498
+
499
+ if env_model:
500
+ return env_model, None
501
+ if env_diffusion:
502
+ return None, env_diffusion
503
+
504
+ raise HTTPException(
505
+ status_code=501,
506
+ detail=(
507
+ "Vision image endpoints are not configured for sdcpp mode. "
508
+ "Set ABSTRACTCORE_VISION_SDCPP_MODEL (full model) or ABSTRACTCORE_VISION_SDCPP_DIFFUSION_MODEL "
509
+ "(component mode), or pass a local .gguf path as `model` in the request."
510
+ ),
511
+ )
512
+
513
+
514
+ def _import_abstractvision() -> Tuple[Any, ...]:
515
+ try:
516
+ from abstractvision.backends import ( # type: ignore
517
+ HuggingFaceDiffusersBackendConfig,
518
+ HuggingFaceDiffusersVisionBackend,
519
+ OpenAICompatibleBackendConfig,
520
+ OpenAICompatibleVisionBackend,
521
+ StableDiffusionCppBackendConfig,
522
+ StableDiffusionCppVisionBackend,
523
+ )
524
+ from abstractvision.errors import OptionalDependencyMissingError # type: ignore
525
+ from abstractvision.types import ImageEditRequest, ImageGenerationRequest # type: ignore
526
+ except Exception as e: # pragma: no cover
527
+ import sys
528
+
529
+ raise HTTPException(
530
+ status_code=501,
531
+ detail=(
532
+ "AbstractVision is required for vision generation endpoints. "
533
+ "Install it into the same environment running the server (and use `python -m uvicorn ...` "
534
+ "to ensure you are using the same interpreter). "
535
+ f"(python={sys.executable})"
536
+ ),
537
+ ) from e
538
+ return (
539
+ OpenAICompatibleBackendConfig,
540
+ OpenAICompatibleVisionBackend,
541
+ HuggingFaceDiffusersBackendConfig,
542
+ HuggingFaceDiffusersVisionBackend,
543
+ StableDiffusionCppBackendConfig,
544
+ StableDiffusionCppVisionBackend,
545
+ OptionalDependencyMissingError,
546
+ (ImageGenerationRequest, ImageEditRequest),
547
+ )
548
+
549
+
550
+ def _parse_size(value: Any) -> Tuple[Optional[int], Optional[int]]:
551
+ if value is None:
552
+ return None, None
553
+ s = str(value).strip().lower()
554
+ if not s:
555
+ return None, None
556
+ if "x" not in s:
557
+ return None, None
558
+ w_s, h_s = s.split("x", 1)
559
+ try:
560
+ return int(w_s), int(h_s)
561
+ except Exception:
562
+ return None, None
563
+
564
+
565
+ def _coerce_int(v: Any) -> Optional[int]:
566
+ if v is None:
567
+ return None
568
+ if isinstance(v, int):
569
+ return v
570
+ try:
571
+ return int(str(v).strip())
572
+ except Exception:
573
+ return None
574
+
575
+
576
+ def _coerce_float(v: Any) -> Optional[float]:
577
+ if v is None:
578
+ return None
579
+ if isinstance(v, float):
580
+ return v
581
+ try:
582
+ return float(str(v).strip())
583
+ except Exception:
584
+ return None
585
+
586
+
587
+ def _resolve_backend(request_model: Any):
588
+ req_model = str(request_model or "").strip()
589
+ backend_kind = _effective_backend_kind(request_model)
590
+
591
+ # Important: return "not configured" errors without requiring optional deps.
592
+ if backend_kind == "auto_unconfigured":
593
+ raise HTTPException(
594
+ status_code=501,
595
+ detail=(
596
+ "Vision image endpoints are not configured. "
597
+ "Either pass `model` in the request (recommended), or set one of:\n"
598
+ "- ABSTRACTCORE_VISION_MODEL_ID (Diffusers)\n"
599
+ "- ABSTRACTCORE_VISION_UPSTREAM_BASE_URL (OpenAI-compatible proxy)\n"
600
+ "- ABSTRACTCORE_VISION_SDCPP_MODEL / ABSTRACTCORE_VISION_SDCPP_DIFFUSION_MODEL (stable-diffusion.cpp)"
601
+ ),
602
+ )
603
+
604
+ # Validate backend-specific configuration before importing AbstractVision.
605
+ # This keeps error messages stable and avoids optional dependency requirements for unconfigured setups.
606
+ prevalidated: Dict[str, Any] = {"backend_kind": backend_kind}
607
+ if backend_kind == "openai_compatible_proxy":
608
+ base_url = _require_upstream_base_url()
609
+ model_id = str(request_model or _env("ABSTRACTCORE_VISION_UPSTREAM_MODEL_ID") or "").strip() or None
610
+ prevalidated.update(
611
+ {
612
+ "base_url": base_url,
613
+ "model_id": model_id,
614
+ "timeout_s": float(_env("ABSTRACTCORE_VISION_TIMEOUT_S", "300") or "300"),
615
+ "image_generations_path": _env("ABSTRACTCORE_VISION_UPSTREAM_IMAGES_GENERATIONS_PATH", "/images/generations")
616
+ or "/images/generations",
617
+ "image_edits_path": _env("ABSTRACTCORE_VISION_UPSTREAM_IMAGES_EDITS_PATH", "/images/edits") or "/images/edits",
618
+ "api_key": _env("ABSTRACTCORE_VISION_UPSTREAM_API_KEY"),
619
+ }
620
+ )
621
+ elif backend_kind == "diffusers":
622
+ model_id = _require_diffusers_model_id(request_model)
623
+ allow_download = _env_bool("ABSTRACTCORE_VISION_ALLOW_DOWNLOAD", True)
624
+ prevalidated.update(
625
+ {
626
+ "model_id": model_id,
627
+ "device": _env("ABSTRACTCORE_VISION_DEVICE", "auto") or "auto",
628
+ "torch_dtype": _env("ABSTRACTCORE_VISION_TORCH_DTYPE"),
629
+ "allow_download": allow_download,
630
+ }
631
+ )
632
+ elif backend_kind == "sdcpp":
633
+ model_path, diffusion_model_path = _require_sdcpp_model_or_diffusion_model(request_model)
634
+ extra_args = _env("ABSTRACTCORE_VISION_SDCPP_EXTRA_ARGS")
635
+ prevalidated.update(
636
+ {
637
+ "sd_cli_path": _env("ABSTRACTCORE_VISION_SDCPP_BIN", "sd-cli") or "sd-cli",
638
+ "model_path": model_path,
639
+ "diffusion_model_path": diffusion_model_path,
640
+ "vae": _env("ABSTRACTCORE_VISION_SDCPP_VAE"),
641
+ "llm": _env("ABSTRACTCORE_VISION_SDCPP_LLM"),
642
+ "llm_vision": _env("ABSTRACTCORE_VISION_SDCPP_LLM_VISION"),
643
+ "clip_l": _env("ABSTRACTCORE_VISION_SDCPP_CLIP_L"),
644
+ "clip_g": _env("ABSTRACTCORE_VISION_SDCPP_CLIP_G"),
645
+ "t5xxl": _env("ABSTRACTCORE_VISION_SDCPP_T5XXL"),
646
+ "extra_args": extra_args,
647
+ "timeout_s": float(_env("ABSTRACTCORE_VISION_TIMEOUT_S", "3600") or "3600"),
648
+ }
649
+ )
650
+ else:
651
+ raise HTTPException(status_code=501, detail=f"Unknown vision backend kind: {backend_kind!r} (set ABSTRACTCORE_VISION_BACKEND)")
652
+
653
+ (
654
+ OpenAICompatibleBackendConfig,
655
+ OpenAICompatibleVisionBackend,
656
+ HuggingFaceDiffusersBackendConfig,
657
+ HuggingFaceDiffusersVisionBackend,
658
+ StableDiffusionCppBackendConfig,
659
+ StableDiffusionCppVisionBackend,
660
+ OptionalDependencyMissingError,
661
+ req_types,
662
+ ) = _import_abstractvision()
663
+ ImageGenerationRequest, ImageEditRequest = req_types
664
+
665
+ active_model_id, active_kind, active_backend, active_call_lock = _get_active_backend()
666
+ if active_backend is not None and active_call_lock is not None and (not req_model or req_model == active_model_id):
667
+ return active_backend, active_call_lock, OptionalDependencyMissingError, ImageGenerationRequest, ImageEditRequest
668
+
669
+ if backend_kind == "openai_compatible_proxy":
670
+ base_url = prevalidated["base_url"]
671
+ model_id = prevalidated["model_id"]
672
+ cfg = OpenAICompatibleBackendConfig(
673
+ base_url=base_url,
674
+ api_key=prevalidated["api_key"],
675
+ model_id=model_id,
676
+ timeout_s=prevalidated["timeout_s"],
677
+ image_generations_path=prevalidated["image_generations_path"],
678
+ image_edits_path=prevalidated["image_edits_path"],
679
+ )
680
+ key = (
681
+ "openai_compatible_proxy",
682
+ base_url,
683
+ prevalidated["api_key"],
684
+ model_id,
685
+ prevalidated["timeout_s"],
686
+ prevalidated["image_generations_path"],
687
+ prevalidated["image_edits_path"],
688
+ )
689
+ backend, call_lock = _get_or_create_cached_backend(key, lambda: OpenAICompatibleVisionBackend(config=cfg))
690
+ return backend, call_lock, OptionalDependencyMissingError, ImageGenerationRequest, ImageEditRequest
691
+
692
+ if backend_kind == "diffusers":
693
+ model_id = prevalidated["model_id"]
694
+ allow_download = prevalidated["allow_download"]
695
+ cfg = HuggingFaceDiffusersBackendConfig(
696
+ model_id=model_id,
697
+ device=prevalidated["device"],
698
+ torch_dtype=prevalidated["torch_dtype"],
699
+ allow_download=allow_download,
700
+ )
701
+ key = (
702
+ "diffusers",
703
+ model_id,
704
+ prevalidated["device"],
705
+ prevalidated["torch_dtype"],
706
+ allow_download,
707
+ )
708
+ backend, call_lock = _get_or_create_cached_backend(key, lambda: HuggingFaceDiffusersVisionBackend(config=cfg))
709
+ return backend, call_lock, OptionalDependencyMissingError, ImageGenerationRequest, ImageEditRequest
710
+
711
+ if backend_kind == "sdcpp":
712
+ model_path = prevalidated["model_path"]
713
+ diffusion_model_path = prevalidated["diffusion_model_path"]
714
+ extra_args = prevalidated["extra_args"]
715
+ cfg = StableDiffusionCppBackendConfig(
716
+ sd_cli_path=prevalidated["sd_cli_path"],
717
+ model=model_path,
718
+ diffusion_model=diffusion_model_path,
719
+ vae=prevalidated["vae"],
720
+ llm=prevalidated["llm"],
721
+ llm_vision=prevalidated["llm_vision"],
722
+ clip_l=prevalidated["clip_l"],
723
+ clip_g=prevalidated["clip_g"],
724
+ t5xxl=prevalidated["t5xxl"],
725
+ extra_args=shlex.split(str(extra_args)) if extra_args else (),
726
+ timeout_s=prevalidated["timeout_s"],
727
+ )
728
+ key = (
729
+ "sdcpp",
730
+ prevalidated["sd_cli_path"],
731
+ model_path,
732
+ diffusion_model_path,
733
+ prevalidated["vae"],
734
+ prevalidated["llm"],
735
+ prevalidated["llm_vision"],
736
+ prevalidated["clip_l"],
737
+ prevalidated["clip_g"],
738
+ prevalidated["t5xxl"],
739
+ extra_args,
740
+ prevalidated["timeout_s"],
741
+ )
742
+ backend, call_lock = _get_or_create_cached_backend(key, lambda: StableDiffusionCppVisionBackend(config=cfg))
743
+ return backend, call_lock, OptionalDependencyMissingError, ImageGenerationRequest, ImageEditRequest
744
+
745
+ raise HTTPException(status_code=501, detail=f"Unknown vision backend kind: {backend_kind!r} (set ABSTRACTCORE_VISION_BACKEND)")
746
+
747
+
748
+ def _import_registry() -> Any:
749
+ try:
750
+ from abstractvision import VisionModelCapabilitiesRegistry # type: ignore
751
+ except Exception as e: # pragma: no cover
752
+ raise HTTPException(
753
+ status_code=501,
754
+ detail="AbstractVision is required for vision model registry endpoints. Install `abstractvision`.",
755
+ ) from e
756
+ return VisionModelCapabilitiesRegistry
757
+
758
+
759
+ @router.get("/vision/models")
760
+ async def list_cached_vision_models() -> Dict[str, Any]:
761
+ """List vision models from the AbstractVision registry that are present in local caches."""
762
+ VisionModelCapabilitiesRegistry = _import_registry()
763
+ reg = VisionModelCapabilitiesRegistry()
764
+
765
+ hf_dirs = _default_hf_hub_cache_dirs()
766
+ lms_dirs = _default_lmstudio_model_dirs()
767
+
768
+ models: list[Dict[str, Any]] = []
769
+ for model_id in reg.list_models():
770
+ spec = reg.get(model_id)
771
+ # Only list models relevant to this UI (t2i / i2i).
772
+ supported_tasks = sorted(spec.tasks.keys())
773
+ if "text_to_image" not in spec.tasks and "image_to_image" not in spec.tasks:
774
+ continue
775
+
776
+ cached_in: list[str] = []
777
+ if _is_hf_model_cached(model_id, hf_dirs):
778
+ cached_in.append("huggingface")
779
+ if _is_lmstudio_model_cached(model_id, lms_dirs):
780
+ cached_in.append("lmstudio")
781
+ if not cached_in:
782
+ continue
783
+
784
+ models.append(
785
+ {
786
+ "id": model_id,
787
+ "provider": spec.provider,
788
+ "license": spec.license,
789
+ "tasks": supported_tasks,
790
+ "notes": spec.notes,
791
+ "cached_in": cached_in,
792
+ }
793
+ )
794
+
795
+ models.sort(key=lambda x: str(x.get("id") or ""))
796
+ return {
797
+ "models": models,
798
+ "registry_total": len(reg.list_models()),
799
+ "cached_total": len(models),
800
+ "active": _active_state(),
801
+ "cache_dirs": {
802
+ "huggingface": [str(p) for p in hf_dirs],
803
+ "lmstudio": [str(p) for p in lms_dirs],
804
+ },
805
+ }
806
+
807
+
808
+ @router.get("/vision/model")
809
+ async def get_active_vision_model() -> Dict[str, Any]:
810
+ """Get the currently loaded (in-memory) vision model for this server process."""
811
+ return {"active": _active_state()}
812
+
813
+
814
+ @router.post("/vision/model/unload")
815
+ async def unload_active_vision_model() -> Dict[str, Any]:
816
+ """Unload the currently active in-memory vision model (best-effort)."""
817
+ _unload_active_backend()
818
+ return {"ok": True, "active": _active_state()}
819
+
820
+
821
+ @router.post("/vision/model/load")
822
+ async def load_active_vision_model(payload: Dict[str, Any] = Body(...)) -> Dict[str, Any]:
823
+ """Unload any active model, then load the requested one into memory (best-effort)."""
824
+ model_id = str(payload.get("model_id") or payload.get("model") or "").strip()
825
+ if not model_id:
826
+ raise HTTPException(status_code=400, detail="Missing required field: model_id")
827
+
828
+ VisionModelCapabilitiesRegistry = _import_registry()
829
+ reg = VisionModelCapabilitiesRegistry()
830
+ try:
831
+ _spec = reg.get(model_id)
832
+ except Exception as e:
833
+ raise HTTPException(status_code=400, detail=f"Unknown model id: {model_id!r}") from e
834
+
835
+ # Always switch single active model (free memory) for the playground UX.
836
+ _unload_active_backend()
837
+
838
+ start = time.time()
839
+ backend_kind = _infer_backend_kind(model_id)
840
+ if backend_kind not in {"diffusers", "sdcpp"}:
841
+ raise HTTPException(
842
+ status_code=400,
843
+ detail=f"Model {model_id!r} cannot be loaded into memory (unsupported backend kind: {backend_kind!r}).",
844
+ )
845
+
846
+ (
847
+ _OpenAICompatibleBackendConfig,
848
+ _OpenAICompatibleVisionBackend,
849
+ HuggingFaceDiffusersBackendConfig,
850
+ HuggingFaceDiffusersVisionBackend,
851
+ StableDiffusionCppBackendConfig,
852
+ StableDiffusionCppVisionBackend,
853
+ _OptionalDependencyMissingError,
854
+ _req_types,
855
+ ) = _import_abstractvision()
856
+
857
+ backend: Any = None
858
+ try:
859
+ if backend_kind == "diffusers":
860
+ cfg = HuggingFaceDiffusersBackendConfig(
861
+ model_id=model_id,
862
+ device=_env("ABSTRACTCORE_VISION_DEVICE", "auto") or "auto",
863
+ torch_dtype=_env("ABSTRACTCORE_VISION_TORCH_DTYPE"),
864
+ allow_download=_env_bool("ABSTRACTCORE_VISION_ALLOW_DOWNLOAD", True),
865
+ )
866
+ backend = HuggingFaceDiffusersVisionBackend(config=cfg)
867
+ else:
868
+ # stable-diffusion.cpp: treat `model_id` as a local path when used here.
869
+ model_path, diffusion_model_path = _require_sdcpp_model_or_diffusion_model(model_id)
870
+ extra_args = _env("ABSTRACTCORE_VISION_SDCPP_EXTRA_ARGS")
871
+ cfg = StableDiffusionCppBackendConfig(
872
+ sd_cli_path=_env("ABSTRACTCORE_VISION_SDCPP_BIN", "sd-cli") or "sd-cli",
873
+ model=model_path,
874
+ diffusion_model=diffusion_model_path,
875
+ vae=_env("ABSTRACTCORE_VISION_SDCPP_VAE"),
876
+ llm=_env("ABSTRACTCORE_VISION_SDCPP_LLM"),
877
+ llm_vision=_env("ABSTRACTCORE_VISION_SDCPP_LLM_VISION"),
878
+ clip_l=_env("ABSTRACTCORE_VISION_SDCPP_CLIP_L"),
879
+ clip_g=_env("ABSTRACTCORE_VISION_SDCPP_CLIP_G"),
880
+ t5xxl=_env("ABSTRACTCORE_VISION_SDCPP_T5XXL"),
881
+ extra_args=shlex.split(str(extra_args)) if extra_args else (),
882
+ timeout_s=float(_env("ABSTRACTCORE_VISION_TIMEOUT_S", "3600") or "3600"),
883
+ )
884
+ backend = StableDiffusionCppVisionBackend(config=cfg)
885
+
886
+ call_lock = threading.Lock()
887
+ preload = getattr(backend, "preload", None)
888
+ if callable(preload):
889
+ with call_lock:
890
+ preload()
891
+
892
+ global _ACTIVE_MODEL_ID, _ACTIVE_BACKEND_KIND, _ACTIVE_BACKEND, _ACTIVE_CALL_LOCK, _ACTIVE_LOADED_AT_S
893
+ with _ACTIVE_LOCK:
894
+ _ACTIVE_MODEL_ID = model_id
895
+ _ACTIVE_BACKEND_KIND = backend_kind
896
+ _ACTIVE_BACKEND = backend
897
+ _ACTIVE_CALL_LOCK = call_lock
898
+ _ACTIVE_LOADED_AT_S = time.time()
899
+
900
+ return {
901
+ "ok": True,
902
+ "active": _active_state(),
903
+ "load_ms": int((time.time() - start) * 1000),
904
+ }
905
+ except HTTPException:
906
+ raise
907
+ except Exception as e:
908
+ if backend is not None:
909
+ _unload_backend_best_effort(backend)
910
+ raise HTTPException(status_code=500, detail=str(e)) from e
911
+
912
+
913
+ @router.post("/images/generations")
914
+ async def images_generations(payload: Dict[str, Any] = Body(...)) -> Dict[str, Any]:
915
+ """
916
+ OpenAI-compatible image generation endpoint: POST /v1/images/generations
917
+
918
+ Notes:
919
+ - Only `response_format=b64_json` is supported.
920
+ - In `auto` mode (default), the backend is inferred per-request based on `model`.
921
+ """
922
+ prompt = str(payload.get("prompt") or "").strip()
923
+ if not prompt:
924
+ raise HTTPException(status_code=400, detail="Missing required field: prompt")
925
+
926
+ response_format = str(payload.get("response_format") or "b64_json").strip().lower()
927
+ if response_format not in {"b64_json"}:
928
+ raise HTTPException(status_code=400, detail="Only response_format='b64_json' is supported.")
929
+
930
+ n = _coerce_int(payload.get("n")) or 1
931
+ n = max(1, min(int(n), 10))
932
+
933
+ width = _coerce_int(payload.get("width"))
934
+ height = _coerce_int(payload.get("height"))
935
+ if (width is None or height is None) and payload.get("size") is not None:
936
+ w2, h2 = _parse_size(payload.get("size"))
937
+ width = width if width is not None else w2
938
+ height = height if height is not None else h2
939
+
940
+ negative_prompt = payload.get("negative_prompt")
941
+ steps = _coerce_int(payload.get("steps"))
942
+ guidance_scale = _coerce_float(payload.get("guidance_scale"))
943
+ seed = _coerce_int(payload.get("seed"))
944
+
945
+ backend, call_lock, OptionalDependencyMissingError, ImageGenerationRequest, _ImageEditRequest = _resolve_backend(
946
+ payload.get("model")
947
+ )
948
+
949
+ data_items = []
950
+ for _ in range(n):
951
+ try:
952
+ req = ImageGenerationRequest(
953
+ prompt=prompt,
954
+ negative_prompt=str(negative_prompt) if negative_prompt is not None else None,
955
+ width=width,
956
+ height=height,
957
+ steps=steps,
958
+ guidance_scale=guidance_scale,
959
+ seed=seed,
960
+ extra={
961
+ k: v
962
+ for k, v in payload.items()
963
+ if k
964
+ not in {
965
+ "prompt",
966
+ "model",
967
+ "n",
968
+ "size",
969
+ "response_format",
970
+ "width",
971
+ "height",
972
+ "negative_prompt",
973
+ "seed",
974
+ "steps",
975
+ "guidance_scale",
976
+ }
977
+ },
978
+ )
979
+ with call_lock:
980
+ asset = backend.generate_image(req)
981
+ except OptionalDependencyMissingError as e:
982
+ raise HTTPException(status_code=501, detail=str(e)) from e
983
+ except ValueError as e:
984
+ raise HTTPException(status_code=400, detail=str(e)) from e
985
+ except HTTPException:
986
+ raise
987
+ except Exception as e:
988
+ raise HTTPException(status_code=500, detail=str(e)) from e
989
+ b64 = base64.b64encode(bytes(asset.data)).decode("ascii")
990
+ data_items.append({"b64_json": b64})
991
+
992
+ return {"created": int(time.time()), "data": data_items}
993
+
994
+
995
+ @router.post("/vision/jobs/images/generations")
996
+ async def jobs_images_generations(payload: Dict[str, Any] = Body(...)) -> Dict[str, Any]:
997
+ """Start an async image generation job with progress polling."""
998
+ prompt = str(payload.get("prompt") or "").strip()
999
+ if not prompt:
1000
+ raise HTTPException(status_code=400, detail="Missing required field: prompt")
1001
+
1002
+ response_format = str(payload.get("response_format") or "b64_json").strip().lower()
1003
+ if response_format not in {"b64_json"}:
1004
+ raise HTTPException(status_code=400, detail="Only response_format='b64_json' is supported.")
1005
+
1006
+ n = _coerce_int(payload.get("n")) or 1
1007
+ n = max(1, min(int(n), 10))
1008
+
1009
+ width = _coerce_int(payload.get("width"))
1010
+ height = _coerce_int(payload.get("height"))
1011
+ if (width is None or height is None) and payload.get("size") is not None:
1012
+ w2, h2 = _parse_size(payload.get("size"))
1013
+ width = width if width is not None else w2
1014
+ height = height if height is not None else h2
1015
+
1016
+ negative_prompt = payload.get("negative_prompt")
1017
+ steps = _coerce_int(payload.get("steps"))
1018
+ guidance_scale = _coerce_float(payload.get("guidance_scale"))
1019
+ seed = _coerce_int(payload.get("seed"))
1020
+
1021
+ backend, call_lock, OptionalDependencyMissingError, ImageGenerationRequest, _ImageEditRequest = _resolve_backend(
1022
+ payload.get("model")
1023
+ )
1024
+
1025
+ total_steps = (int(steps) * int(n)) if steps is not None else None
1026
+
1027
+ now_s = time.time()
1028
+ with _JOBS_LOCK:
1029
+ _jobs_cleanup_locked(now_s=now_s)
1030
+ # Be conservative: only allow one in-flight job to reduce memory pressure.
1031
+ if _any_inflight_job_locked():
1032
+ raise HTTPException(status_code=409, detail="Another generation is already running; wait for it to finish.")
1033
+
1034
+ job_id = _new_job_id()
1035
+ _JOBS[job_id] = {
1036
+ "id": job_id,
1037
+ "kind": "images/generations",
1038
+ "state": "queued",
1039
+ "created_at_s": now_s,
1040
+ "updated_at_s": now_s,
1041
+ "progress": {"step": 0, "total_steps": total_steps},
1042
+ }
1043
+
1044
+ def _runner() -> None:
1045
+ try:
1046
+ _job_update_progress(job_id, step=0, total=total_steps, message="running")
1047
+ with _JOBS_LOCK:
1048
+ job = _JOBS.get(job_id)
1049
+ if job is not None:
1050
+ job["state"] = "running"
1051
+ job["updated_at_s"] = time.time()
1052
+
1053
+ data_items = []
1054
+ for i in range(int(n)):
1055
+ req = ImageGenerationRequest(
1056
+ prompt=prompt,
1057
+ negative_prompt=str(negative_prompt) if negative_prompt is not None else None,
1058
+ width=width,
1059
+ height=height,
1060
+ steps=steps,
1061
+ guidance_scale=guidance_scale,
1062
+ seed=seed,
1063
+ extra={
1064
+ k: v
1065
+ for k, v in payload.items()
1066
+ if k
1067
+ not in {
1068
+ "prompt",
1069
+ "model",
1070
+ "n",
1071
+ "size",
1072
+ "response_format",
1073
+ "width",
1074
+ "height",
1075
+ "negative_prompt",
1076
+ "seed",
1077
+ "steps",
1078
+ "guidance_scale",
1079
+ }
1080
+ },
1081
+ )
1082
+
1083
+ offset = int(steps) * i if steps is not None else 0
1084
+
1085
+ def _progress(step_i: int, total_i: Optional[int] = None) -> None:
1086
+ # `step_i` is expected to be 0/1-based; normalize to 1..N for UI.
1087
+ s = int(step_i)
1088
+ if s < 0:
1089
+ s = 0
1090
+ if steps is not None and s > int(steps):
1091
+ s = int(steps)
1092
+ overall = offset + s
1093
+ _job_update_progress(job_id, step=overall, total=total_steps)
1094
+
1095
+ with call_lock:
1096
+ fn = getattr(backend, "generate_image_with_progress", None)
1097
+ if callable(fn):
1098
+ asset = fn(req, progress_callback=_progress)
1099
+ else:
1100
+ asset = backend.generate_image(req)
1101
+ b64 = base64.b64encode(bytes(asset.data)).decode("ascii")
1102
+ data_items.append({"b64_json": b64})
1103
+
1104
+ _job_finish(job_id, ok=True, result={"created": int(time.time()), "data": data_items})
1105
+ except OptionalDependencyMissingError as e:
1106
+ _job_finish(job_id, ok=False, error=str(e))
1107
+ except Exception as e:
1108
+ _job_finish(job_id, ok=False, error=str(e))
1109
+
1110
+ threading.Thread(target=_runner, name=f"vision-job-{job_id}", daemon=True).start()
1111
+ return {"job_id": job_id}
1112
+
1113
+
1114
+ @router.get("/vision/jobs/{job_id}")
1115
+ async def get_job(job_id: str, consume: Optional[bool] = False) -> Dict[str, Any]:
1116
+ """Poll a job status (optionally consume/remove it when completed)."""
1117
+ jid = str(job_id or "").strip()
1118
+ if not jid:
1119
+ raise HTTPException(status_code=400, detail="Missing job_id")
1120
+
1121
+ now_s = time.time()
1122
+ with _JOBS_LOCK:
1123
+ _jobs_cleanup_locked(now_s=now_s)
1124
+ job = _JOBS.get(jid)
1125
+ if job is None:
1126
+ raise HTTPException(status_code=404, detail="Job not found")
1127
+ data = dict(job)
1128
+ state = str(data.get("state") or "")
1129
+ if consume and state in {"succeeded", "failed"}:
1130
+ _JOBS.pop(jid, None)
1131
+
1132
+ # Avoid leaking large results on in-flight polls.
1133
+ if str(data.get("state") or "") not in {"succeeded"}:
1134
+ data.pop("result", None)
1135
+ return data
1136
+
1137
+
1138
+ if _HAS_MULTIPART:
1139
+
1140
+ @router.post("/vision/jobs/images/edits")
1141
+ async def jobs_images_edits(
1142
+ prompt: str = Form(...),
1143
+ image: UploadFile = File(...),
1144
+ mask: Optional[UploadFile] = File(None),
1145
+ model: Optional[str] = Form(None),
1146
+ negative_prompt: Optional[str] = Form(None),
1147
+ seed: Optional[str] = Form(None),
1148
+ steps: Optional[str] = Form(None),
1149
+ guidance_scale: Optional[str] = Form(None),
1150
+ extra_json: Optional[str] = Form(None),
1151
+ ) -> Dict[str, Any]:
1152
+ """Start an async image edit job with progress polling."""
1153
+ prompt_s = str(prompt or "").strip()
1154
+ if not prompt_s:
1155
+ raise HTTPException(status_code=400, detail="Missing required field: prompt")
1156
+
1157
+ image_bytes = await image.read()
1158
+ if not image_bytes:
1159
+ raise HTTPException(status_code=400, detail="Missing required image bytes")
1160
+
1161
+ mask_bytes = await mask.read() if mask is not None else None
1162
+
1163
+ extra: Dict[str, Any] = {}
1164
+ if extra_json:
1165
+ try:
1166
+ parsed = json.loads(str(extra_json))
1167
+ except Exception as e:
1168
+ raise HTTPException(status_code=400, detail="extra_json must be a JSON object string") from e
1169
+ if parsed is None:
1170
+ extra = {}
1171
+ elif isinstance(parsed, dict):
1172
+ extra = dict(parsed)
1173
+ else:
1174
+ raise HTTPException(status_code=400, detail="extra_json must be a JSON object string")
1175
+
1176
+ seed_i = _coerce_int(seed)
1177
+ steps_i = _coerce_int(steps)
1178
+ guidance_f = _coerce_float(guidance_scale)
1179
+
1180
+ backend, call_lock, OptionalDependencyMissingError, _ImageGenerationRequest, ImageEditRequest = _resolve_backend(model)
1181
+ total_steps = int(steps_i) if steps_i is not None else None
1182
+
1183
+ now_s = time.time()
1184
+ with _JOBS_LOCK:
1185
+ _jobs_cleanup_locked(now_s=now_s)
1186
+ if _any_inflight_job_locked():
1187
+ raise HTTPException(status_code=409, detail="Another generation is already running; wait for it to finish.")
1188
+
1189
+ job_id = _new_job_id()
1190
+ _JOBS[job_id] = {
1191
+ "id": job_id,
1192
+ "kind": "images/edits",
1193
+ "state": "queued",
1194
+ "created_at_s": now_s,
1195
+ "updated_at_s": now_s,
1196
+ "progress": {"step": 0, "total_steps": total_steps},
1197
+ }
1198
+
1199
+ def _runner() -> None:
1200
+ try:
1201
+ _job_update_progress(job_id, step=0, total=total_steps, message="running")
1202
+ with _JOBS_LOCK:
1203
+ job = _JOBS.get(job_id)
1204
+ if job is not None:
1205
+ job["state"] = "running"
1206
+ job["updated_at_s"] = time.time()
1207
+
1208
+ req = ImageEditRequest(
1209
+ prompt=prompt_s,
1210
+ image=bytes(image_bytes),
1211
+ mask=bytes(mask_bytes) if mask_bytes else None,
1212
+ negative_prompt=str(negative_prompt) if negative_prompt is not None else None,
1213
+ seed=seed_i,
1214
+ steps=steps_i,
1215
+ guidance_scale=guidance_f,
1216
+ extra=extra,
1217
+ )
1218
+
1219
+ def _progress(step_i: int, total_i: Optional[int] = None) -> None:
1220
+ s = int(step_i)
1221
+ if s < 0:
1222
+ s = 0
1223
+ if total_steps is not None and s > int(total_steps):
1224
+ s = int(total_steps)
1225
+ _job_update_progress(job_id, step=s, total=total_steps)
1226
+
1227
+ with call_lock:
1228
+ fn = getattr(backend, "edit_image_with_progress", None)
1229
+ if callable(fn):
1230
+ asset = fn(req, progress_callback=_progress)
1231
+ else:
1232
+ asset = backend.edit_image(req)
1233
+ b64 = base64.b64encode(bytes(asset.data)).decode("ascii")
1234
+ _job_finish(job_id, ok=True, result={"created": int(time.time()), "data": [{"b64_json": b64}]})
1235
+ except OptionalDependencyMissingError as e:
1236
+ _job_finish(job_id, ok=False, error=str(e))
1237
+ except Exception as e:
1238
+ _job_finish(job_id, ok=False, error=str(e))
1239
+
1240
+ threading.Thread(target=_runner, name=f"vision-job-{job_id}", daemon=True).start()
1241
+ return {"job_id": job_id}
1242
+
1243
+ @router.post("/images/edits")
1244
+ async def images_edits(
1245
+ prompt: str = Form(...),
1246
+ image: UploadFile = File(...),
1247
+ mask: Optional[UploadFile] = File(None),
1248
+ model: Optional[str] = Form(None),
1249
+ negative_prompt: Optional[str] = Form(None),
1250
+ seed: Optional[str] = Form(None),
1251
+ steps: Optional[str] = Form(None),
1252
+ guidance_scale: Optional[str] = Form(None),
1253
+ extra_json: Optional[str] = Form(None),
1254
+ ) -> Dict[str, Any]:
1255
+ """
1256
+ OpenAI-compatible image edit endpoint: POST /v1/images/edits (multipart/form-data)
1257
+
1258
+ Implemented as a thin proxy over AbstractVision's OpenAI-compatible backend.
1259
+ """
1260
+ prompt_s = str(prompt or "").strip()
1261
+ if not prompt_s:
1262
+ raise HTTPException(status_code=400, detail="Missing required field: prompt")
1263
+
1264
+ image_bytes = await image.read()
1265
+ if not image_bytes:
1266
+ raise HTTPException(status_code=400, detail="Missing required image bytes")
1267
+
1268
+ mask_bytes = await mask.read() if mask is not None else None
1269
+
1270
+ extra: Dict[str, Any] = {}
1271
+ if extra_json:
1272
+ try:
1273
+ parsed = json.loads(str(extra_json))
1274
+ except Exception as e:
1275
+ raise HTTPException(status_code=400, detail="extra_json must be a JSON object string") from e
1276
+ if parsed is None:
1277
+ extra = {}
1278
+ elif isinstance(parsed, dict):
1279
+ extra = dict(parsed)
1280
+ else:
1281
+ raise HTTPException(status_code=400, detail="extra_json must be a JSON object string")
1282
+
1283
+ backend, call_lock, OptionalDependencyMissingError, _ImageGenerationRequest, ImageEditRequest = _resolve_backend(model)
1284
+
1285
+ try:
1286
+ req = ImageEditRequest(
1287
+ prompt=prompt_s,
1288
+ image=bytes(image_bytes),
1289
+ mask=bytes(mask_bytes) if mask_bytes else None,
1290
+ negative_prompt=str(negative_prompt) if negative_prompt is not None else None,
1291
+ seed=_coerce_int(seed),
1292
+ steps=_coerce_int(steps),
1293
+ guidance_scale=_coerce_float(guidance_scale),
1294
+ extra=extra,
1295
+ )
1296
+ with call_lock:
1297
+ asset = backend.edit_image(req)
1298
+ except OptionalDependencyMissingError as e:
1299
+ raise HTTPException(status_code=501, detail=str(e)) from e
1300
+ except ValueError as e:
1301
+ raise HTTPException(status_code=400, detail=str(e)) from e
1302
+ except HTTPException:
1303
+ raise
1304
+ except Exception as e:
1305
+ raise HTTPException(status_code=500, detail=str(e)) from e
1306
+ b64 = base64.b64encode(bytes(asset.data)).decode("ascii")
1307
+ return {"created": int(time.time()), "data": [{"b64_json": b64}]}
1308
+
1309
+ else:
1310
+
1311
+ @router.post("/images/edits")
1312
+ async def images_edits() -> Dict[str, Any]:
1313
+ raise HTTPException(
1314
+ status_code=501,
1315
+ detail=(
1316
+ "The /v1/images/edits endpoint requires python-multipart for multipart/form-data parsing. "
1317
+ "Install it via: pip install \"abstractcore[server]\" (or: pip install python-multipart)."
1318
+ ),
1319
+ )