abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractvision/__init__.py +18 -3
- abstractvision/__main__.py +8 -0
- abstractvision/artifacts.py +320 -0
- abstractvision/assets/vision_model_capabilities.json +406 -0
- abstractvision/backends/__init__.py +43 -0
- abstractvision/backends/base_backend.py +63 -0
- abstractvision/backends/huggingface_diffusers.py +1503 -0
- abstractvision/backends/openai_compatible.py +325 -0
- abstractvision/backends/stable_diffusion_cpp.py +751 -0
- abstractvision/cli.py +778 -0
- abstractvision/errors.py +19 -0
- abstractvision/integrations/__init__.py +5 -0
- abstractvision/integrations/abstractcore.py +263 -0
- abstractvision/integrations/abstractcore_plugin.py +193 -0
- abstractvision/model_capabilities.py +255 -0
- abstractvision/types.py +95 -0
- abstractvision/vision_manager.py +115 -0
- abstractvision-0.2.1.dist-info/METADATA +243 -0
- abstractvision-0.2.1.dist-info/RECORD +23 -0
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/WHEEL +1 -1
- abstractvision-0.2.1.dist-info/entry_points.txt +5 -0
- abstractvision-0.1.0.dist-info/METADATA +0 -65
- abstractvision-0.1.0.dist-info/RECORD +0 -6
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
import uuid
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Optional, Tuple
|
|
8
|
+
from urllib.request import Request, urlopen
|
|
9
|
+
|
|
10
|
+
from ..errors import CapabilityNotSupportedError
|
|
11
|
+
from ..types import (
|
|
12
|
+
GeneratedAsset,
|
|
13
|
+
ImageEditRequest,
|
|
14
|
+
ImageGenerationRequest,
|
|
15
|
+
ImageToVideoRequest,
|
|
16
|
+
VideoGenerationRequest,
|
|
17
|
+
VisionBackendCapabilities,
|
|
18
|
+
)
|
|
19
|
+
from .base_backend import VisionBackend
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _join_url(base_url: str, path: str) -> str:
|
|
23
|
+
b = str(base_url or "").rstrip("/")
|
|
24
|
+
p = str(path or "").strip()
|
|
25
|
+
if not p:
|
|
26
|
+
return b
|
|
27
|
+
if not p.startswith("/"):
|
|
28
|
+
p = "/" + p
|
|
29
|
+
return b + p
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _sniff_mime_type(content: bytes, fallback: str) -> str:
|
|
33
|
+
b = bytes(content or b"")
|
|
34
|
+
if b.startswith(b"\x89PNG\r\n\x1a\n"):
|
|
35
|
+
return "image/png"
|
|
36
|
+
if b.startswith(b"\xff\xd8\xff"):
|
|
37
|
+
return "image/jpeg"
|
|
38
|
+
if len(b) >= 12 and b[4:8] == b"ftyp":
|
|
39
|
+
return "video/mp4"
|
|
40
|
+
return str(fallback or "application/octet-stream")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _decode_b64(s: str) -> bytes:
|
|
44
|
+
raw = str(s or "").strip()
|
|
45
|
+
raw = "".join(raw.split())
|
|
46
|
+
pad = (-len(raw)) % 4
|
|
47
|
+
if pad:
|
|
48
|
+
raw = raw + ("=" * pad)
|
|
49
|
+
return base64.b64decode(raw, validate=False)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _first_data_item(resp: Dict[str, Any]) -> Dict[str, Any]:
|
|
53
|
+
data = resp.get("data")
|
|
54
|
+
if isinstance(data, list) and data and isinstance(data[0], dict):
|
|
55
|
+
return data[0]
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _multipart_form(
|
|
60
|
+
*,
|
|
61
|
+
fields: Dict[str, str],
|
|
62
|
+
files: Dict[str, Tuple[str, bytes, str]],
|
|
63
|
+
) -> Tuple[bytes, str]:
|
|
64
|
+
boundary = f"----abstractvision-{uuid.uuid4().hex}"
|
|
65
|
+
parts: list[bytes] = []
|
|
66
|
+
|
|
67
|
+
def _add(b: bytes) -> None:
|
|
68
|
+
parts.append(b)
|
|
69
|
+
|
|
70
|
+
for name, value in fields.items():
|
|
71
|
+
_add(f"--{boundary}\r\n".encode("utf-8"))
|
|
72
|
+
_add(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode("utf-8"))
|
|
73
|
+
_add(str(value).encode("utf-8"))
|
|
74
|
+
_add(b"\r\n")
|
|
75
|
+
|
|
76
|
+
for name, (filename, content, content_type) in files.items():
|
|
77
|
+
_add(f"--{boundary}\r\n".encode("utf-8"))
|
|
78
|
+
_add(
|
|
79
|
+
f'Content-Disposition: form-data; name="{name}"; filename="{filename}"\r\n'.encode(
|
|
80
|
+
"utf-8"
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
_add(f"Content-Type: {content_type}\r\n\r\n".encode("utf-8"))
|
|
84
|
+
_add(bytes(content))
|
|
85
|
+
_add(b"\r\n")
|
|
86
|
+
|
|
87
|
+
_add(f"--{boundary}--\r\n".encode("utf-8"))
|
|
88
|
+
body = b"".join(parts)
|
|
89
|
+
return body, boundary
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class OpenAICompatibleBackendConfig:
|
|
94
|
+
base_url: str
|
|
95
|
+
api_key: Optional[str] = None
|
|
96
|
+
model_id: Optional[str] = None
|
|
97
|
+
timeout_s: float = 300.0
|
|
98
|
+
|
|
99
|
+
# Endpoints (OpenAI-shaped HTTP).
|
|
100
|
+
image_generations_path: str = "/images/generations"
|
|
101
|
+
image_edits_path: str = "/images/edits"
|
|
102
|
+
text_to_video_path: Optional[str] = None
|
|
103
|
+
image_to_video_path: Optional[str] = None
|
|
104
|
+
|
|
105
|
+
# Image-to-video request mode when enabled.
|
|
106
|
+
image_to_video_mode: str = "multipart" # "multipart" | "json_b64"
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class OpenAICompatibleVisionBackend(VisionBackend):
|
|
110
|
+
"""Backend adapter for OpenAI-compatible endpoints (OpenAI-shaped HTTP).
|
|
111
|
+
|
|
112
|
+
Notes:
|
|
113
|
+
- Image endpoints are widely implemented (`/images/generations`, `/images/edits`).
|
|
114
|
+
- Video endpoints are not standardized; they are optional and must be configured explicitly.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, *, config: OpenAICompatibleBackendConfig):
|
|
118
|
+
self._cfg = config
|
|
119
|
+
|
|
120
|
+
def get_capabilities(self) -> VisionBackendCapabilities:
|
|
121
|
+
tasks = {"text_to_image", "image_to_image"}
|
|
122
|
+
if self._cfg.text_to_video_path:
|
|
123
|
+
tasks.add("text_to_video")
|
|
124
|
+
if self._cfg.image_to_video_path:
|
|
125
|
+
tasks.add("image_to_video")
|
|
126
|
+
return VisionBackendCapabilities(
|
|
127
|
+
supported_tasks=sorted(tasks),
|
|
128
|
+
supports_mask=True,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _headers(self, *, content_type: str) -> Dict[str, str]:
|
|
132
|
+
headers = {"Content-Type": str(content_type)}
|
|
133
|
+
if self._cfg.api_key:
|
|
134
|
+
headers["Authorization"] = f"Bearer {self._cfg.api_key}"
|
|
135
|
+
return headers
|
|
136
|
+
|
|
137
|
+
def _post_json(self, *, path: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
138
|
+
url = _join_url(self._cfg.base_url, path)
|
|
139
|
+
body = json.dumps(payload).encode("utf-8")
|
|
140
|
+
req = Request(url=url, data=body, method="POST", headers=self._headers(content_type="application/json"))
|
|
141
|
+
with urlopen(req, timeout=float(self._cfg.timeout_s)) as resp:
|
|
142
|
+
raw = resp.read()
|
|
143
|
+
data = json.loads(raw.decode("utf-8"))
|
|
144
|
+
if not isinstance(data, dict):
|
|
145
|
+
raise ValueError("Invalid response: expected JSON object")
|
|
146
|
+
return data
|
|
147
|
+
|
|
148
|
+
def _post_multipart(self, *, path: str, fields: Dict[str, str], files: Dict[str, Tuple[str, bytes, str]]) -> Dict[str, Any]:
|
|
149
|
+
url = _join_url(self._cfg.base_url, path)
|
|
150
|
+
body, boundary = _multipart_form(fields=fields, files=files)
|
|
151
|
+
ctype = f"multipart/form-data; boundary={boundary}"
|
|
152
|
+
req = Request(url=url, data=body, method="POST", headers=self._headers(content_type=ctype))
|
|
153
|
+
with urlopen(req, timeout=float(self._cfg.timeout_s)) as resp:
|
|
154
|
+
raw = resp.read()
|
|
155
|
+
data = json.loads(raw.decode("utf-8"))
|
|
156
|
+
if not isinstance(data, dict):
|
|
157
|
+
raise ValueError("Invalid response: expected JSON object")
|
|
158
|
+
return data
|
|
159
|
+
|
|
160
|
+
def _parse_media(self, resp: Dict[str, Any], *, fallback_mime: str) -> GeneratedAsset:
|
|
161
|
+
item = _first_data_item(resp)
|
|
162
|
+
if "b64_json" in item:
|
|
163
|
+
content = _decode_b64(str(item.get("b64_json") or ""))
|
|
164
|
+
mime = _sniff_mime_type(content, fallback_mime)
|
|
165
|
+
media_type = "video" if mime.startswith("video/") else "image"
|
|
166
|
+
return GeneratedAsset(media_type=media_type, data=content, mime_type=mime, metadata={"source": "b64_json"})
|
|
167
|
+
if "url" in item and isinstance(item.get("url"), str):
|
|
168
|
+
# Best-effort: download bytes.
|
|
169
|
+
u = str(item.get("url"))
|
|
170
|
+
req = Request(url=u, method="GET")
|
|
171
|
+
with urlopen(req, timeout=float(self._cfg.timeout_s)) as resp2:
|
|
172
|
+
content = resp2.read()
|
|
173
|
+
ct = resp2.headers.get("Content-Type") or fallback_mime
|
|
174
|
+
mime = _sniff_mime_type(content, str(ct))
|
|
175
|
+
media_type = "video" if mime.startswith("video/") else "image"
|
|
176
|
+
return GeneratedAsset(media_type=media_type, data=content, mime_type=mime, metadata={"source": "url", "url": u})
|
|
177
|
+
raise ValueError("Invalid response: missing data[0].b64_json or data[0].url")
|
|
178
|
+
|
|
179
|
+
def generate_image(self, request: ImageGenerationRequest) -> GeneratedAsset:
|
|
180
|
+
payload: Dict[str, Any] = {
|
|
181
|
+
"prompt": request.prompt,
|
|
182
|
+
"response_format": "b64_json",
|
|
183
|
+
"n": 1,
|
|
184
|
+
}
|
|
185
|
+
if self._cfg.model_id:
|
|
186
|
+
payload["model"] = self._cfg.model_id
|
|
187
|
+
if request.negative_prompt is not None:
|
|
188
|
+
payload["negative_prompt"] = request.negative_prompt
|
|
189
|
+
if request.width is not None and request.height is not None:
|
|
190
|
+
payload["size"] = f"{int(request.width)}x{int(request.height)}"
|
|
191
|
+
payload["width"] = int(request.width)
|
|
192
|
+
payload["height"] = int(request.height)
|
|
193
|
+
if request.seed is not None:
|
|
194
|
+
payload["seed"] = int(request.seed)
|
|
195
|
+
if request.steps is not None:
|
|
196
|
+
payload["steps"] = int(request.steps)
|
|
197
|
+
if request.guidance_scale is not None:
|
|
198
|
+
payload["guidance_scale"] = float(request.guidance_scale)
|
|
199
|
+
if isinstance(request.extra, dict) and request.extra:
|
|
200
|
+
payload.update(dict(request.extra))
|
|
201
|
+
|
|
202
|
+
resp = self._post_json(path=self._cfg.image_generations_path, payload=payload)
|
|
203
|
+
return self._parse_media(resp, fallback_mime="image/png")
|
|
204
|
+
|
|
205
|
+
def edit_image(self, request: ImageEditRequest) -> GeneratedAsset:
|
|
206
|
+
# OpenAI-style image edits use multipart form data.
|
|
207
|
+
fields: Dict[str, str] = {"prompt": request.prompt}
|
|
208
|
+
if self._cfg.model_id:
|
|
209
|
+
fields["model"] = self._cfg.model_id
|
|
210
|
+
if request.negative_prompt is not None:
|
|
211
|
+
fields["negative_prompt"] = request.negative_prompt
|
|
212
|
+
|
|
213
|
+
files: Dict[str, Tuple[str, bytes, str]] = {
|
|
214
|
+
"image": ("image.png", bytes(request.image), "image/png"),
|
|
215
|
+
}
|
|
216
|
+
if request.mask is not None:
|
|
217
|
+
files["mask"] = ("mask.png", bytes(request.mask), "image/png")
|
|
218
|
+
|
|
219
|
+
# Best-effort extra fields.
|
|
220
|
+
if request.seed is not None:
|
|
221
|
+
fields["seed"] = str(int(request.seed))
|
|
222
|
+
if request.steps is not None:
|
|
223
|
+
fields["steps"] = str(int(request.steps))
|
|
224
|
+
if request.guidance_scale is not None:
|
|
225
|
+
fields["guidance_scale"] = str(float(request.guidance_scale))
|
|
226
|
+
if isinstance(request.extra, dict) and request.extra:
|
|
227
|
+
for k, v in request.extra.items():
|
|
228
|
+
if v is None:
|
|
229
|
+
continue
|
|
230
|
+
fields[str(k)] = str(v)
|
|
231
|
+
|
|
232
|
+
resp = self._post_multipart(path=self._cfg.image_edits_path, fields=fields, files=files)
|
|
233
|
+
return self._parse_media(resp, fallback_mime="image/png")
|
|
234
|
+
|
|
235
|
+
def generate_angles(self, request) -> list[GeneratedAsset]:
|
|
236
|
+
raise CapabilityNotSupportedError("OpenAICompatibleVisionBackend does not implement multi-view generation.")
|
|
237
|
+
|
|
238
|
+
def generate_video(self, request: VideoGenerationRequest) -> GeneratedAsset:
|
|
239
|
+
if not self._cfg.text_to_video_path:
|
|
240
|
+
raise CapabilityNotSupportedError("text_to_video is not configured for this backend.")
|
|
241
|
+
payload: Dict[str, Any] = {"prompt": request.prompt, "response_format": "b64_json", "n": 1}
|
|
242
|
+
if self._cfg.model_id:
|
|
243
|
+
payload["model"] = self._cfg.model_id
|
|
244
|
+
if request.negative_prompt is not None:
|
|
245
|
+
payload["negative_prompt"] = request.negative_prompt
|
|
246
|
+
if request.width is not None:
|
|
247
|
+
payload["width"] = int(request.width)
|
|
248
|
+
if request.height is not None:
|
|
249
|
+
payload["height"] = int(request.height)
|
|
250
|
+
if request.fps is not None:
|
|
251
|
+
payload["fps"] = int(request.fps)
|
|
252
|
+
if request.num_frames is not None:
|
|
253
|
+
payload["num_frames"] = int(request.num_frames)
|
|
254
|
+
if request.seed is not None:
|
|
255
|
+
payload["seed"] = int(request.seed)
|
|
256
|
+
if request.steps is not None:
|
|
257
|
+
payload["steps"] = int(request.steps)
|
|
258
|
+
if request.guidance_scale is not None:
|
|
259
|
+
payload["guidance_scale"] = float(request.guidance_scale)
|
|
260
|
+
if isinstance(request.extra, dict) and request.extra:
|
|
261
|
+
payload.update(dict(request.extra))
|
|
262
|
+
resp = self._post_json(path=str(self._cfg.text_to_video_path), payload=payload)
|
|
263
|
+
return self._parse_media(resp, fallback_mime="video/mp4")
|
|
264
|
+
|
|
265
|
+
def image_to_video(self, request: ImageToVideoRequest) -> GeneratedAsset:
|
|
266
|
+
if not self._cfg.image_to_video_path:
|
|
267
|
+
raise CapabilityNotSupportedError("image_to_video is not configured for this backend.")
|
|
268
|
+
|
|
269
|
+
if str(self._cfg.image_to_video_mode) == "json_b64":
|
|
270
|
+
payload: Dict[str, Any] = {"image_b64": base64.b64encode(bytes(request.image)).decode("ascii")}
|
|
271
|
+
if self._cfg.model_id:
|
|
272
|
+
payload["model"] = self._cfg.model_id
|
|
273
|
+
if request.prompt is not None:
|
|
274
|
+
payload["prompt"] = request.prompt
|
|
275
|
+
if request.negative_prompt is not None:
|
|
276
|
+
payload["negative_prompt"] = request.negative_prompt
|
|
277
|
+
if request.width is not None:
|
|
278
|
+
payload["width"] = int(request.width)
|
|
279
|
+
if request.height is not None:
|
|
280
|
+
payload["height"] = int(request.height)
|
|
281
|
+
if request.fps is not None:
|
|
282
|
+
payload["fps"] = int(request.fps)
|
|
283
|
+
if request.num_frames is not None:
|
|
284
|
+
payload["num_frames"] = int(request.num_frames)
|
|
285
|
+
if request.seed is not None:
|
|
286
|
+
payload["seed"] = int(request.seed)
|
|
287
|
+
if request.steps is not None:
|
|
288
|
+
payload["steps"] = int(request.steps)
|
|
289
|
+
if request.guidance_scale is not None:
|
|
290
|
+
payload["guidance_scale"] = float(request.guidance_scale)
|
|
291
|
+
if isinstance(request.extra, dict) and request.extra:
|
|
292
|
+
payload.update(dict(request.extra))
|
|
293
|
+
resp = self._post_json(path=str(self._cfg.image_to_video_path), payload=payload)
|
|
294
|
+
return self._parse_media(resp, fallback_mime="video/mp4")
|
|
295
|
+
|
|
296
|
+
fields: Dict[str, str] = {}
|
|
297
|
+
if self._cfg.model_id:
|
|
298
|
+
fields["model"] = self._cfg.model_id
|
|
299
|
+
if request.prompt is not None:
|
|
300
|
+
fields["prompt"] = request.prompt
|
|
301
|
+
if request.negative_prompt is not None:
|
|
302
|
+
fields["negative_prompt"] = request.negative_prompt
|
|
303
|
+
if request.width is not None:
|
|
304
|
+
fields["width"] = str(int(request.width))
|
|
305
|
+
if request.height is not None:
|
|
306
|
+
fields["height"] = str(int(request.height))
|
|
307
|
+
if request.fps is not None:
|
|
308
|
+
fields["fps"] = str(int(request.fps))
|
|
309
|
+
if request.num_frames is not None:
|
|
310
|
+
fields["num_frames"] = str(int(request.num_frames))
|
|
311
|
+
if request.seed is not None:
|
|
312
|
+
fields["seed"] = str(int(request.seed))
|
|
313
|
+
if request.steps is not None:
|
|
314
|
+
fields["steps"] = str(int(request.steps))
|
|
315
|
+
if request.guidance_scale is not None:
|
|
316
|
+
fields["guidance_scale"] = str(float(request.guidance_scale))
|
|
317
|
+
if isinstance(request.extra, dict) and request.extra:
|
|
318
|
+
for k, v in request.extra.items():
|
|
319
|
+
if v is None:
|
|
320
|
+
continue
|
|
321
|
+
fields[str(k)] = str(v)
|
|
322
|
+
|
|
323
|
+
files = {"image": ("image.png", bytes(request.image), "image/png")}
|
|
324
|
+
resp = self._post_multipart(path=str(self._cfg.image_to_video_path), fields=fields, files=files)
|
|
325
|
+
return self._parse_media(resp, fallback_mime="video/mp4")
|