abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ class AbstractVisionError(Exception):
2
+ """Base exception for the abstractvision package."""
3
+
4
+
5
+ class BackendNotConfiguredError(AbstractVisionError):
6
+ """Raised when a VisionManager method is called without a configured backend."""
7
+
8
+
9
+ class OptionalDependencyMissingError(AbstractVisionError):
10
+ """Raised when an optional backend dependency is missing."""
11
+
12
+
13
+ class UnknownModelError(AbstractVisionError):
14
+ """Raised when a model id is not present in the capability registry."""
15
+
16
+
17
+ class CapabilityNotSupportedError(AbstractVisionError):
18
+ """Raised when a model/backend cannot satisfy a requested generative capability."""
19
+
@@ -0,0 +1,5 @@
1
+ """Integration modules for external Abstract ecosystem packages.
2
+
3
+ These modules are optional and should not be imported at package import time.
4
+ """
5
+
@@ -0,0 +1,263 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ from typing import Any, Callable, Dict, List, Optional
5
+
6
+ from ..artifacts import MediaStore, get_artifact_id, is_artifact_ref
7
+ from ..errors import AbstractVisionError, OptionalDependencyMissingError
8
+ from ..model_capabilities import VisionModelCapabilitiesRegistry
9
+ from ..vision_manager import VisionManager
10
+
11
+
12
+ def _require_abstractcore_tool():
13
+ try:
14
+ from abstractcore import tool # type: ignore
15
+ except Exception as e: # pragma: no cover (covered indirectly by import failures)
16
+ raise OptionalDependencyMissingError(
17
+ "AbstractCore is required for this integration. Install it via: pip install abstractcore"
18
+ ) from e
19
+ return tool
20
+
21
+
22
+ def _decode_base64_bytes(value: str) -> bytes:
23
+ raw = str(value or "").strip()
24
+ if not raw:
25
+ return b""
26
+ if raw.startswith("data:") and "," in raw:
27
+ raw = raw.split(",", 1)[1].strip()
28
+ # Best-effort: tolerate missing padding/newlines.
29
+ raw = "".join(raw.split())
30
+ pad = (-len(raw)) % 4
31
+ if pad:
32
+ raw = raw + ("=" * pad)
33
+ return base64.b64decode(raw, validate=False)
34
+
35
+
36
+ def _require_store(vm: VisionManager) -> MediaStore:
37
+ store = getattr(vm, "store", None)
38
+ if store is None:
39
+ raise AbstractVisionError("VisionManager.store is required for tool integration (artifact-ref outputs).")
40
+ return store
41
+
42
+
43
+ def _resolve_input_bytes(
44
+ *,
45
+ store: MediaStore,
46
+ artifact: Optional[Dict[str, Any]],
47
+ b64: Optional[str],
48
+ name: str,
49
+ required: bool,
50
+ ) -> Optional[bytes]:
51
+ if artifact is not None:
52
+ if not is_artifact_ref(artifact):
53
+ raise ValueError(f"{name}: expected an artifact ref dict like {{'$artifact': '...'}}")
54
+ return store.load_bytes(get_artifact_id(artifact))
55
+ if b64 is not None:
56
+ out = _decode_base64_bytes(b64)
57
+ if required and not out:
58
+ raise ValueError(f"{name}: base64 payload decoded to empty bytes")
59
+ return out
60
+ if required:
61
+ raise ValueError(f"{name}: either {name}_artifact or {name}_b64 is required")
62
+ return None
63
+
64
+
65
+ def make_vision_tools(
66
+ *,
67
+ vision_manager: VisionManager,
68
+ model_id: str,
69
+ registry: Optional[VisionModelCapabilitiesRegistry] = None,
70
+ ) -> List[Callable[..., Any]]:
71
+ """Create AbstractCore tools for generative vision (artifact-ref outputs).
72
+
73
+ Tools are returned as normal Python callables decorated with `@abstractcore.tool`.
74
+ """
75
+ tool = _require_abstractcore_tool()
76
+ reg = registry or VisionModelCapabilitiesRegistry()
77
+ store = _require_store(vision_manager)
78
+ model_id = str(model_id or "").strip()
79
+ if not model_id:
80
+ raise ValueError("model_id must be a non-empty string")
81
+
82
+ @tool(
83
+ name="vision_text_to_image",
84
+ description="Generate an image from a text prompt and return an artifact ref.",
85
+ tags=["vision", "generate", "image"],
86
+ when_to_use="Use when you need to create a new image from a prompt.",
87
+ )
88
+ def vision_text_to_image(
89
+ prompt: str,
90
+ negative_prompt: Optional[str] = None,
91
+ width: Optional[int] = None,
92
+ height: Optional[int] = None,
93
+ steps: Optional[int] = 10,
94
+ guidance_scale: Optional[float] = None,
95
+ seed: Optional[int] = None,
96
+ ) -> Dict[str, Any]:
97
+ reg.require_support(model_id, "text_to_image")
98
+ out = vision_manager.generate_image(
99
+ prompt,
100
+ negative_prompt=negative_prompt,
101
+ width=width,
102
+ height=height,
103
+ steps=steps,
104
+ guidance_scale=guidance_scale,
105
+ seed=seed,
106
+ )
107
+ if not (isinstance(out, dict) and is_artifact_ref(out)):
108
+ raise AbstractVisionError("vision_text_to_image expected artifact-ref output; ensure VisionManager.store is set.")
109
+ return out
110
+
111
+ @tool(
112
+ name="vision_image_to_image",
113
+ description="Edit/transform an input image using a prompt and return an artifact ref.",
114
+ tags=["vision", "edit", "image"],
115
+ when_to_use="Use when you need to modify an existing image (optionally with a mask).",
116
+ )
117
+ def vision_image_to_image(
118
+ prompt: str,
119
+ image_artifact: Optional[Dict[str, Any]] = None,
120
+ image_b64: Optional[str] = None,
121
+ mask_artifact: Optional[Dict[str, Any]] = None,
122
+ mask_b64: Optional[str] = None,
123
+ negative_prompt: Optional[str] = None,
124
+ steps: Optional[int] = 10,
125
+ guidance_scale: Optional[float] = None,
126
+ seed: Optional[int] = None,
127
+ ) -> Dict[str, Any]:
128
+ reg.require_support(model_id, "image_to_image")
129
+ image_bytes = _resolve_input_bytes(store=store, artifact=image_artifact, b64=image_b64, name="image", required=True)
130
+ mask_bytes = _resolve_input_bytes(store=store, artifact=mask_artifact, b64=mask_b64, name="mask", required=False)
131
+ out = vision_manager.edit_image(
132
+ prompt,
133
+ image=image_bytes or b"",
134
+ mask=mask_bytes,
135
+ negative_prompt=negative_prompt,
136
+ steps=steps,
137
+ guidance_scale=guidance_scale,
138
+ seed=seed,
139
+ )
140
+ if not (isinstance(out, dict) and is_artifact_ref(out)):
141
+ raise AbstractVisionError("vision_image_to_image expected artifact-ref output; ensure VisionManager.store is set.")
142
+ return out
143
+
144
+ @tool(
145
+ name="vision_multi_view_image",
146
+ description="Generate multiple views/angles of a concept and return artifact refs.",
147
+ tags=["vision", "generate", "image", "multi_view"],
148
+ when_to_use="Use when you need multiple consistent viewpoints (front/side/back).",
149
+ )
150
+ def vision_multi_view_image(
151
+ prompt: str,
152
+ reference_image_artifact: Optional[Dict[str, Any]] = None,
153
+ reference_image_b64: Optional[str] = None,
154
+ angles: Optional[List[str]] = None,
155
+ negative_prompt: Optional[str] = None,
156
+ steps: Optional[int] = 10,
157
+ guidance_scale: Optional[float] = None,
158
+ seed: Optional[int] = None,
159
+ ) -> List[Dict[str, Any]]:
160
+ reg.require_support(model_id, "multi_view_image")
161
+ ref_bytes = _resolve_input_bytes(
162
+ store=store,
163
+ artifact=reference_image_artifact,
164
+ b64=reference_image_b64,
165
+ name="reference_image",
166
+ required=False,
167
+ )
168
+ kwargs: Dict[str, Any] = {}
169
+ if ref_bytes is not None:
170
+ kwargs["reference_image"] = ref_bytes
171
+ if angles is not None:
172
+ kwargs["angles"] = angles
173
+ if negative_prompt is not None:
174
+ kwargs["negative_prompt"] = negative_prompt
175
+ if steps is not None:
176
+ kwargs["steps"] = steps
177
+ if guidance_scale is not None:
178
+ kwargs["guidance_scale"] = guidance_scale
179
+ if seed is not None:
180
+ kwargs["seed"] = seed
181
+
182
+ out = vision_manager.generate_angles(prompt, **kwargs)
183
+ if not (isinstance(out, list) and all(isinstance(x, dict) and is_artifact_ref(x) for x in out)):
184
+ raise AbstractVisionError("vision_multi_view_image expected a list of artifact refs; ensure VisionManager.store is set.")
185
+ return out
186
+
187
+ @tool(
188
+ name="vision_text_to_video",
189
+ description="Generate a video from a text prompt and return an artifact ref.",
190
+ tags=["vision", "generate", "video"],
191
+ when_to_use="Use when you need to create a short video from a prompt.",
192
+ )
193
+ def vision_text_to_video(
194
+ prompt: str,
195
+ negative_prompt: Optional[str] = None,
196
+ width: Optional[int] = None,
197
+ height: Optional[int] = None,
198
+ fps: Optional[int] = None,
199
+ num_frames: Optional[int] = None,
200
+ steps: Optional[int] = 10,
201
+ guidance_scale: Optional[float] = None,
202
+ seed: Optional[int] = None,
203
+ ) -> Dict[str, Any]:
204
+ reg.require_support(model_id, "text_to_video")
205
+ out = vision_manager.generate_video(
206
+ prompt,
207
+ negative_prompt=negative_prompt,
208
+ width=width,
209
+ height=height,
210
+ fps=fps,
211
+ num_frames=num_frames,
212
+ steps=steps,
213
+ guidance_scale=guidance_scale,
214
+ seed=seed,
215
+ )
216
+ if not (isinstance(out, dict) and is_artifact_ref(out)):
217
+ raise AbstractVisionError("vision_text_to_video expected artifact-ref output; ensure VisionManager.store is set.")
218
+ return out
219
+
220
+ @tool(
221
+ name="vision_image_to_video",
222
+ description="Generate a video conditioned on an input image and return an artifact ref.",
223
+ tags=["vision", "generate", "video"],
224
+ when_to_use="Use when you need to animate an image into a video (optionally guided by a prompt).",
225
+ )
226
+ def vision_image_to_video(
227
+ image_artifact: Optional[Dict[str, Any]] = None,
228
+ image_b64: Optional[str] = None,
229
+ prompt: Optional[str] = None,
230
+ negative_prompt: Optional[str] = None,
231
+ width: Optional[int] = None,
232
+ height: Optional[int] = None,
233
+ fps: Optional[int] = None,
234
+ num_frames: Optional[int] = None,
235
+ steps: Optional[int] = 10,
236
+ guidance_scale: Optional[float] = None,
237
+ seed: Optional[int] = None,
238
+ ) -> Dict[str, Any]:
239
+ reg.require_support(model_id, "image_to_video")
240
+ image_bytes = _resolve_input_bytes(store=store, artifact=image_artifact, b64=image_b64, name="image", required=True)
241
+ out = vision_manager.image_to_video(
242
+ image=image_bytes or b"",
243
+ prompt=prompt,
244
+ negative_prompt=negative_prompt,
245
+ width=width,
246
+ height=height,
247
+ fps=fps,
248
+ num_frames=num_frames,
249
+ steps=steps,
250
+ guidance_scale=guidance_scale,
251
+ seed=seed,
252
+ )
253
+ if not (isinstance(out, dict) and is_artifact_ref(out)):
254
+ raise AbstractVisionError("vision_image_to_video expected artifact-ref output; ensure VisionManager.store is set.")
255
+ return out
256
+
257
+ return [
258
+ vision_text_to_image,
259
+ vision_image_to_image,
260
+ vision_multi_view_image,
261
+ vision_text_to_video,
262
+ vision_image_to_video,
263
+ ]
@@ -0,0 +1,193 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Any, Dict, Optional, Union
6
+
7
+ from ..artifacts import RuntimeArtifactStoreAdapter, is_artifact_ref, get_artifact_id
8
+ from ..errors import AbstractVisionError
9
+ from ..vision_manager import VisionManager
10
+
11
+
12
+ def _env(key: str, default: Optional[str] = None) -> Optional[str]:
13
+ v = os.environ.get(str(key), None)
14
+ if v is None:
15
+ return default
16
+ s = str(v).strip()
17
+ return s if s else default
18
+
19
+
20
+ def _owner_cfg(owner: Any, key: str) -> Optional[str]:
21
+ try:
22
+ cfg = getattr(owner, "config", None)
23
+ if isinstance(cfg, dict):
24
+ v = cfg.get(key)
25
+ if v is None:
26
+ return None
27
+ s = str(v).strip()
28
+ return s if s else None
29
+ except Exception:
30
+ return None
31
+ return None
32
+
33
+
34
+ def _read_bytes_from_path(path: Union[str, Path]) -> bytes:
35
+ p = Path(str(path)).expanduser()
36
+ return p.read_bytes()
37
+
38
+
39
+ def _resolve_bytes_input(value: Union[bytes, Dict[str, Any], str], *, artifact_store: Any) -> bytes:
40
+ if isinstance(value, (bytes, bytearray)):
41
+ return bytes(value)
42
+ if isinstance(value, dict):
43
+ if not is_artifact_ref(value):
44
+ raise ValueError("Expected an artifact ref dict like {'$artifact': '...'}")
45
+ if artifact_store is None:
46
+ raise ValueError("artifact_store is required to resolve artifact refs to bytes")
47
+ store = RuntimeArtifactStoreAdapter(artifact_store)
48
+ return store.load_bytes(get_artifact_id(value))
49
+ if isinstance(value, str):
50
+ p = Path(value).expanduser()
51
+ if p.exists() and p.is_file():
52
+ return p.read_bytes()
53
+ raise FileNotFoundError(f"File not found: {value}")
54
+ raise TypeError("Unsupported input type; expected bytes, artifact-ref dict, or file path")
55
+
56
+
57
+ class _AbstractVisionCapability:
58
+ """AbstractCore VisionCapability backed by AbstractVision."""
59
+
60
+ backend_id = "abstractvision:openai-compatible"
61
+
62
+ def __init__(self, owner: Any):
63
+ self._owner = owner
64
+ self._backend = None
65
+
66
+ def _get_backend(self):
67
+ if self._backend is not None:
68
+ return self._backend
69
+
70
+ # Injection hook (useful for tests and advanced embedding).
71
+ try:
72
+ cfg = getattr(self._owner, "config", None)
73
+ if isinstance(cfg, dict):
74
+ inst = cfg.get("vision_backend_instance")
75
+ if inst is not None:
76
+ self._backend = inst
77
+ return self._backend
78
+ factory = cfg.get("vision_backend_factory")
79
+ if callable(factory):
80
+ self._backend = factory(self._owner)
81
+ return self._backend
82
+ except Exception:
83
+ pass
84
+
85
+ # Prefer AbstractCore config keys when present; fall back to AbstractVision env vars.
86
+ backend_kind = (_owner_cfg(self._owner, "vision_backend") or _env("ABSTRACTVISION_BACKEND", "openai") or "openai").lower()
87
+
88
+ if backend_kind not in {"openai", "openai-compatible"}:
89
+ raise AbstractVisionError(
90
+ "Only the OpenAI-compatible HTTP backend is supported via the AbstractCore plugin (v0). "
91
+ "Set vision_backend='openai' (or ABSTRACTVISION_BACKEND=openai)."
92
+ )
93
+
94
+ base_url = _owner_cfg(self._owner, "vision_base_url") or _env("ABSTRACTVISION_BASE_URL")
95
+ api_key = _owner_cfg(self._owner, "vision_api_key") or _env("ABSTRACTVISION_API_KEY")
96
+ model_id = _owner_cfg(self._owner, "vision_model_id") or _env("ABSTRACTVISION_MODEL_ID")
97
+ timeout_s_raw = _owner_cfg(self._owner, "vision_timeout_s") or _env("ABSTRACTVISION_TIMEOUT_S")
98
+ try:
99
+ timeout_s = float(timeout_s_raw) if timeout_s_raw else 300.0
100
+ except Exception:
101
+ timeout_s = 300.0
102
+
103
+ if not base_url:
104
+ raise AbstractVisionError(
105
+ "Missing vision_base_url / ABSTRACTVISION_BASE_URL. "
106
+ "Configure an OpenAI-compatible endpoint (e.g. http://localhost:8000/v1)."
107
+ )
108
+
109
+ # Optional video endpoints (not standardized; only enabled when configured).
110
+ t2v_path = _owner_cfg(self._owner, "vision_text_to_video_path") or _env("ABSTRACTVISION_TEXT_TO_VIDEO_PATH")
111
+ i2v_path = _owner_cfg(self._owner, "vision_image_to_video_path") or _env("ABSTRACTVISION_IMAGE_TO_VIDEO_PATH")
112
+ i2v_mode = _owner_cfg(self._owner, "vision_image_to_video_mode") or _env("ABSTRACTVISION_IMAGE_TO_VIDEO_MODE", "multipart")
113
+
114
+ # Import backend module lazily (keeps plugin import-light).
115
+ from ..backends.openai_compatible import OpenAICompatibleBackendConfig, OpenAICompatibleVisionBackend
116
+
117
+ cfg = OpenAICompatibleBackendConfig(
118
+ base_url=str(base_url),
119
+ api_key=str(api_key) if api_key else None,
120
+ model_id=str(model_id) if model_id else None,
121
+ timeout_s=float(timeout_s),
122
+ text_to_video_path=str(t2v_path) if t2v_path else None,
123
+ image_to_video_path=str(i2v_path) if i2v_path else None,
124
+ image_to_video_mode=str(i2v_mode or "multipart"),
125
+ )
126
+ self._backend = OpenAICompatibleVisionBackend(config=cfg)
127
+ return self._backend
128
+
129
+ def _make_manager(self, *, artifact_store: Any) -> VisionManager:
130
+ store = RuntimeArtifactStoreAdapter(artifact_store) if artifact_store is not None else None
131
+ return VisionManager(backend=self._get_backend(), store=store)
132
+
133
+ def t2i(self, prompt: str, **kwargs: Any):
134
+ store = kwargs.pop("artifact_store", None)
135
+ vm = self._make_manager(artifact_store=store)
136
+ out = vm.generate_image(str(prompt), **kwargs)
137
+ if isinstance(out, dict):
138
+ return out
139
+ return bytes(getattr(out, "data", b""))
140
+
141
+ def i2i(self, prompt: str, image: Union[bytes, Dict[str, Any], str], **kwargs: Any):
142
+ store = kwargs.pop("artifact_store", None)
143
+ image_b = _resolve_bytes_input(image, artifact_store=store)
144
+ mask = kwargs.pop("mask", None)
145
+ mask_b = None
146
+ if mask is not None:
147
+ mask_b = _resolve_bytes_input(mask, artifact_store=store)
148
+ vm = self._make_manager(artifact_store=store)
149
+ out = vm.edit_image(str(prompt), image=image_b, mask=mask_b, **kwargs)
150
+ if isinstance(out, dict):
151
+ return out
152
+ return bytes(getattr(out, "data", b""))
153
+
154
+ def t2v(self, prompt: str, **kwargs: Any):
155
+ store = kwargs.pop("artifact_store", None)
156
+ vm = self._make_manager(artifact_store=store)
157
+ out = vm.generate_video(str(prompt), **kwargs)
158
+ if isinstance(out, dict):
159
+ return out
160
+ return bytes(getattr(out, "data", b""))
161
+
162
+ def i2v(self, image: Union[bytes, Dict[str, Any], str], **kwargs: Any):
163
+ store = kwargs.pop("artifact_store", None)
164
+ image_b = _resolve_bytes_input(image, artifact_store=store)
165
+ vm = self._make_manager(artifact_store=store)
166
+ out = vm.image_to_video(image=image_b, **kwargs)
167
+ if isinstance(out, dict):
168
+ return out
169
+ return bytes(getattr(out, "data", b""))
170
+
171
+
172
+ def register(registry: Any) -> None:
173
+ """Register AbstractVision as an AbstractCore capability plugin.
174
+
175
+ This function is loaded via the `abstractcore.capabilities_plugins` entry point group.
176
+ """
177
+
178
+ def _factory(owner: Any) -> _AbstractVisionCapability:
179
+ return _AbstractVisionCapability(owner)
180
+
181
+ config_hint = (
182
+ "Set ABSTRACTVISION_BASE_URL (or pass vision_base_url=...) to point to an OpenAI-compatible /v1 endpoint. "
183
+ "Example: vision_base_url='http://localhost:8000/v1' (AbstractCore Server vision endpoints) or "
184
+ "vision_base_url='http://localhost:1234/v1' (LMStudio/vLLM)."
185
+ )
186
+
187
+ registry.register_vision_backend(
188
+ backend_id=_AbstractVisionCapability.backend_id,
189
+ factory=_factory,
190
+ priority=0,
191
+ description="AbstractVision via OpenAI-compatible HTTP backend (env/config-driven).",
192
+ config_hint=config_hint,
193
+ )