PyPI - abstractvision - Versions diffs - 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

abstractvision 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

abstractvision/__init__.py +18 -3
abstractvision/__main__.py +8 -0
abstractvision/artifacts.py +320 -0
abstractvision/assets/vision_model_capabilities.json +406 -0
abstractvision/backends/__init__.py +43 -0
abstractvision/backends/base_backend.py +63 -0
abstractvision/backends/huggingface_diffusers.py +1503 -0
abstractvision/backends/openai_compatible.py +325 -0
abstractvision/backends/stable_diffusion_cpp.py +751 -0
abstractvision/cli.py +778 -0
abstractvision/errors.py +19 -0
abstractvision/integrations/__init__.py +5 -0
abstractvision/integrations/abstractcore.py +263 -0
abstractvision/integrations/abstractcore_plugin.py +193 -0
abstractvision/model_capabilities.py +255 -0
abstractvision/types.py +95 -0
abstractvision/vision_manager.py +115 -0
abstractvision-0.2.1.dist-info/METADATA +243 -0
abstractvision-0.2.1.dist-info/RECORD +23 -0
{abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/WHEEL +1 -1
abstractvision-0.2.1.dist-info/entry_points.txt +5 -0
abstractvision-0.1.0.dist-info/METADATA +0 -65
abstractvision-0.1.0.dist-info/RECORD +0 -6
{abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/licenses/LICENSE +0 -0
{abstractvision-0.1.0.dist-info → abstractvision-0.2.1.dist-info}/top_level.txt +0 -0

abstractvision/errors.py ADDED Viewed

@@ -0,0 +1,19 @@
+class AbstractVisionError(Exception):
+    """Base exception for the abstractvision package."""
+class BackendNotConfiguredError(AbstractVisionError):
+    """Raised when a VisionManager method is called without a configured backend."""
+class OptionalDependencyMissingError(AbstractVisionError):
+    """Raised when an optional backend dependency is missing."""
+class UnknownModelError(AbstractVisionError):
+    """Raised when a model id is not present in the capability registry."""
+class CapabilityNotSupportedError(AbstractVisionError):
+    """Raised when a model/backend cannot satisfy a requested generative capability."""

abstractvision/integrations/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Integration modules for external Abstract ecosystem packages.
+These modules are optional and should not be imported at package import time.
+"""

abstractvision/integrations/abstractcore.py ADDED Viewed

@@ -0,0 +1,263 @@
+from __future__ import annotations
+import base64
+from typing import Any, Callable, Dict, List, Optional
+from ..artifacts import MediaStore, get_artifact_id, is_artifact_ref
+from ..errors import AbstractVisionError, OptionalDependencyMissingError
+from ..model_capabilities import VisionModelCapabilitiesRegistry
+from ..vision_manager import VisionManager
+def _require_abstractcore_tool():
+    try:
+        from abstractcore import tool  # type: ignore
+    except Exception as e:  # pragma: no cover (covered indirectly by import failures)
+        raise OptionalDependencyMissingError(
+            "AbstractCore is required for this integration. Install it via: pip install abstractcore"
+        ) from e
+    return tool
+def _decode_base64_bytes(value: str) -> bytes:
+    raw = str(value or "").strip()
+    if not raw:
+        return b""
+    if raw.startswith("data:") and "," in raw:
+        raw = raw.split(",", 1)[1].strip()
+    # Best-effort: tolerate missing padding/newlines.
+    raw = "".join(raw.split())
+    pad = (-len(raw)) % 4
+    if pad:
+        raw = raw + ("=" * pad)
+    return base64.b64decode(raw, validate=False)
+def _require_store(vm: VisionManager) -> MediaStore:
+    store = getattr(vm, "store", None)
+    if store is None:
+        raise AbstractVisionError("VisionManager.store is required for tool integration (artifact-ref outputs).")
+    return store
+def _resolve_input_bytes(
+    *,
+    store: MediaStore,
+    artifact: Optional[Dict[str, Any]],
+    b64: Optional[str],
+    name: str,
+    required: bool,
+) -> Optional[bytes]:
+    if artifact is not None:
+        if not is_artifact_ref(artifact):
+            raise ValueError(f"{name}: expected an artifact ref dict like {{'$artifact': '...'}}")
+        return store.load_bytes(get_artifact_id(artifact))
+    if b64 is not None:
+        out = _decode_base64_bytes(b64)
+        if required and not out:
+            raise ValueError(f"{name}: base64 payload decoded to empty bytes")
+        return out
+    if required:
+        raise ValueError(f"{name}: either {name}_artifact or {name}_b64 is required")
+    return None
+def make_vision_tools(
+    *,
+    vision_manager: VisionManager,
+    model_id: str,
+    registry: Optional[VisionModelCapabilitiesRegistry] = None,
+) -> List[Callable[..., Any]]:
+    """Create AbstractCore tools for generative vision (artifact-ref outputs).
+    Tools are returned as normal Python callables decorated with `@abstractcore.tool`.
+    """
+    tool = _require_abstractcore_tool()
+    reg = registry or VisionModelCapabilitiesRegistry()
+    store = _require_store(vision_manager)
+    model_id = str(model_id or "").strip()
+    if not model_id:
+        raise ValueError("model_id must be a non-empty string")
+    @tool(
+        name="vision_text_to_image",
+        description="Generate an image from a text prompt and return an artifact ref.",
+        tags=["vision", "generate", "image"],
+        when_to_use="Use when you need to create a new image from a prompt.",
+    )
+    def vision_text_to_image(
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        steps: Optional[int] = 10,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        reg.require_support(model_id, "text_to_image")
+        out = vision_manager.generate_image(
+            prompt,
+            negative_prompt=negative_prompt,
+            width=width,
+            height=height,
+            steps=steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        if not (isinstance(out, dict) and is_artifact_ref(out)):
+            raise AbstractVisionError("vision_text_to_image expected artifact-ref output; ensure VisionManager.store is set.")
+        return out
+    @tool(
+        name="vision_image_to_image",
+        description="Edit/transform an input image using a prompt and return an artifact ref.",
+        tags=["vision", "edit", "image"],
+        when_to_use="Use when you need to modify an existing image (optionally with a mask).",
+    )
+    def vision_image_to_image(
+        prompt: str,
+        image_artifact: Optional[Dict[str, Any]] = None,
+        image_b64: Optional[str] = None,
+        mask_artifact: Optional[Dict[str, Any]] = None,
+        mask_b64: Optional[str] = None,
+        negative_prompt: Optional[str] = None,
+        steps: Optional[int] = 10,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        reg.require_support(model_id, "image_to_image")
+        image_bytes = _resolve_input_bytes(store=store, artifact=image_artifact, b64=image_b64, name="image", required=True)
+        mask_bytes = _resolve_input_bytes(store=store, artifact=mask_artifact, b64=mask_b64, name="mask", required=False)
+        out = vision_manager.edit_image(
+            prompt,
+            image=image_bytes or b"",
+            mask=mask_bytes,
+            negative_prompt=negative_prompt,
+            steps=steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        if not (isinstance(out, dict) and is_artifact_ref(out)):
+            raise AbstractVisionError("vision_image_to_image expected artifact-ref output; ensure VisionManager.store is set.")
+        return out
+    @tool(
+        name="vision_multi_view_image",
+        description="Generate multiple views/angles of a concept and return artifact refs.",
+        tags=["vision", "generate", "image", "multi_view"],
+        when_to_use="Use when you need multiple consistent viewpoints (front/side/back).",
+    )
+    def vision_multi_view_image(
+        prompt: str,
+        reference_image_artifact: Optional[Dict[str, Any]] = None,
+        reference_image_b64: Optional[str] = None,
+        angles: Optional[List[str]] = None,
+        negative_prompt: Optional[str] = None,
+        steps: Optional[int] = 10,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+    ) -> List[Dict[str, Any]]:
+        reg.require_support(model_id, "multi_view_image")
+        ref_bytes = _resolve_input_bytes(
+            store=store,
+            artifact=reference_image_artifact,
+            b64=reference_image_b64,
+            name="reference_image",
+            required=False,
+        )
+        kwargs: Dict[str, Any] = {}
+        if ref_bytes is not None:
+            kwargs["reference_image"] = ref_bytes
+        if angles is not None:
+            kwargs["angles"] = angles
+        if negative_prompt is not None:
+            kwargs["negative_prompt"] = negative_prompt
+        if steps is not None:
+            kwargs["steps"] = steps
+        if guidance_scale is not None:
+            kwargs["guidance_scale"] = guidance_scale
+        if seed is not None:
+            kwargs["seed"] = seed
+        out = vision_manager.generate_angles(prompt, **kwargs)
+        if not (isinstance(out, list) and all(isinstance(x, dict) and is_artifact_ref(x) for x in out)):
+            raise AbstractVisionError("vision_multi_view_image expected a list of artifact refs; ensure VisionManager.store is set.")
+        return out
+    @tool(
+        name="vision_text_to_video",
+        description="Generate a video from a text prompt and return an artifact ref.",
+        tags=["vision", "generate", "video"],
+        when_to_use="Use when you need to create a short video from a prompt.",
+    )
+    def vision_text_to_video(
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        fps: Optional[int] = None,
+        num_frames: Optional[int] = None,
+        steps: Optional[int] = 10,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        reg.require_support(model_id, "text_to_video")
+        out = vision_manager.generate_video(
+            prompt,
+            negative_prompt=negative_prompt,
+            width=width,
+            height=height,
+            fps=fps,
+            num_frames=num_frames,
+            steps=steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        if not (isinstance(out, dict) and is_artifact_ref(out)):
+            raise AbstractVisionError("vision_text_to_video expected artifact-ref output; ensure VisionManager.store is set.")
+        return out
+    @tool(
+        name="vision_image_to_video",
+        description="Generate a video conditioned on an input image and return an artifact ref.",
+        tags=["vision", "generate", "video"],
+        when_to_use="Use when you need to animate an image into a video (optionally guided by a prompt).",
+    )
+    def vision_image_to_video(
+        image_artifact: Optional[Dict[str, Any]] = None,
+        image_b64: Optional[str] = None,
+        prompt: Optional[str] = None,
+        negative_prompt: Optional[str] = None,
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        fps: Optional[int] = None,
+        num_frames: Optional[int] = None,
+        steps: Optional[int] = 10,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        reg.require_support(model_id, "image_to_video")
+        image_bytes = _resolve_input_bytes(store=store, artifact=image_artifact, b64=image_b64, name="image", required=True)
+        out = vision_manager.image_to_video(
+            image=image_bytes or b"",
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=width,
+            height=height,
+            fps=fps,
+            num_frames=num_frames,
+            steps=steps,
+            guidance_scale=guidance_scale,
+            seed=seed,
+        )
+        if not (isinstance(out, dict) and is_artifact_ref(out)):
+            raise AbstractVisionError("vision_image_to_video expected artifact-ref output; ensure VisionManager.store is set.")
+        return out
+    return [
+        vision_text_to_image,
+        vision_image_to_image,
+        vision_multi_view_image,
+        vision_text_to_video,
+        vision_image_to_video,
+    ]

abstractvision/integrations/abstractcore_plugin.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+import os
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+from ..artifacts import RuntimeArtifactStoreAdapter, is_artifact_ref, get_artifact_id
+from ..errors import AbstractVisionError
+from ..vision_manager import VisionManager
+def _env(key: str, default: Optional[str] = None) -> Optional[str]:
+    v = os.environ.get(str(key), None)
+    if v is None:
+        return default
+    s = str(v).strip()
+    return s if s else default
+def _owner_cfg(owner: Any, key: str) -> Optional[str]:
+    try:
+        cfg = getattr(owner, "config", None)
+        if isinstance(cfg, dict):
+            v = cfg.get(key)
+            if v is None:
+                return None
+            s = str(v).strip()
+            return s if s else None
+    except Exception:
+        return None
+    return None
+def _read_bytes_from_path(path: Union[str, Path]) -> bytes:
+    p = Path(str(path)).expanduser()
+    return p.read_bytes()
+def _resolve_bytes_input(value: Union[bytes, Dict[str, Any], str], *, artifact_store: Any) -> bytes:
+    if isinstance(value, (bytes, bytearray)):
+        return bytes(value)
+    if isinstance(value, dict):
+        if not is_artifact_ref(value):
+            raise ValueError("Expected an artifact ref dict like {'$artifact': '...'}")
+        if artifact_store is None:
+            raise ValueError("artifact_store is required to resolve artifact refs to bytes")
+        store = RuntimeArtifactStoreAdapter(artifact_store)
+        return store.load_bytes(get_artifact_id(value))
+    if isinstance(value, str):
+        p = Path(value).expanduser()
+        if p.exists() and p.is_file():
+            return p.read_bytes()
+        raise FileNotFoundError(f"File not found: {value}")
+    raise TypeError("Unsupported input type; expected bytes, artifact-ref dict, or file path")
+class _AbstractVisionCapability:
+    """AbstractCore VisionCapability backed by AbstractVision."""
+    backend_id = "abstractvision:openai-compatible"
+    def __init__(self, owner: Any):
+        self._owner = owner
+        self._backend = None
+    def _get_backend(self):
+        if self._backend is not None:
+            return self._backend
+        # Injection hook (useful for tests and advanced embedding).
+        try:
+            cfg = getattr(self._owner, "config", None)
+            if isinstance(cfg, dict):
+                inst = cfg.get("vision_backend_instance")
+                if inst is not None:
+                    self._backend = inst
+                    return self._backend
+                factory = cfg.get("vision_backend_factory")
+                if callable(factory):
+                    self._backend = factory(self._owner)
+                    return self._backend
+        except Exception:
+            pass
+        # Prefer AbstractCore config keys when present; fall back to AbstractVision env vars.
+        backend_kind = (_owner_cfg(self._owner, "vision_backend") or _env("ABSTRACTVISION_BACKEND", "openai") or "openai").lower()
+        if backend_kind not in {"openai", "openai-compatible"}:
+            raise AbstractVisionError(
+                "Only the OpenAI-compatible HTTP backend is supported via the AbstractCore plugin (v0). "
+                "Set vision_backend='openai' (or ABSTRACTVISION_BACKEND=openai)."
+            )
+        base_url = _owner_cfg(self._owner, "vision_base_url") or _env("ABSTRACTVISION_BASE_URL")
+        api_key = _owner_cfg(self._owner, "vision_api_key") or _env("ABSTRACTVISION_API_KEY")
+        model_id = _owner_cfg(self._owner, "vision_model_id") or _env("ABSTRACTVISION_MODEL_ID")
+        timeout_s_raw = _owner_cfg(self._owner, "vision_timeout_s") or _env("ABSTRACTVISION_TIMEOUT_S")
+        try:
+            timeout_s = float(timeout_s_raw) if timeout_s_raw else 300.0
+        except Exception:
+            timeout_s = 300.0
+        if not base_url:
+            raise AbstractVisionError(
+                "Missing vision_base_url / ABSTRACTVISION_BASE_URL. "
+                "Configure an OpenAI-compatible endpoint (e.g. http://localhost:8000/v1)."
+            )
+        # Optional video endpoints (not standardized; only enabled when configured).
+        t2v_path = _owner_cfg(self._owner, "vision_text_to_video_path") or _env("ABSTRACTVISION_TEXT_TO_VIDEO_PATH")
+        i2v_path = _owner_cfg(self._owner, "vision_image_to_video_path") or _env("ABSTRACTVISION_IMAGE_TO_VIDEO_PATH")
+        i2v_mode = _owner_cfg(self._owner, "vision_image_to_video_mode") or _env("ABSTRACTVISION_IMAGE_TO_VIDEO_MODE", "multipart")
+        # Import backend module lazily (keeps plugin import-light).
+        from ..backends.openai_compatible import OpenAICompatibleBackendConfig, OpenAICompatibleVisionBackend
+        cfg = OpenAICompatibleBackendConfig(
+            base_url=str(base_url),
+            api_key=str(api_key) if api_key else None,
+            model_id=str(model_id) if model_id else None,
+            timeout_s=float(timeout_s),
+            text_to_video_path=str(t2v_path) if t2v_path else None,
+            image_to_video_path=str(i2v_path) if i2v_path else None,
+            image_to_video_mode=str(i2v_mode or "multipart"),
+        )
+        self._backend = OpenAICompatibleVisionBackend(config=cfg)
+        return self._backend
+    def _make_manager(self, *, artifact_store: Any) -> VisionManager:
+        store = RuntimeArtifactStoreAdapter(artifact_store) if artifact_store is not None else None
+        return VisionManager(backend=self._get_backend(), store=store)
+    def t2i(self, prompt: str, **kwargs: Any):
+        store = kwargs.pop("artifact_store", None)
+        vm = self._make_manager(artifact_store=store)
+        out = vm.generate_image(str(prompt), **kwargs)
+        if isinstance(out, dict):
+            return out
+        return bytes(getattr(out, "data", b""))
+    def i2i(self, prompt: str, image: Union[bytes, Dict[str, Any], str], **kwargs: Any):
+        store = kwargs.pop("artifact_store", None)
+        image_b = _resolve_bytes_input(image, artifact_store=store)
+        mask = kwargs.pop("mask", None)
+        mask_b = None
+        if mask is not None:
+            mask_b = _resolve_bytes_input(mask, artifact_store=store)
+        vm = self._make_manager(artifact_store=store)
+        out = vm.edit_image(str(prompt), image=image_b, mask=mask_b, **kwargs)
+        if isinstance(out, dict):
+            return out
+        return bytes(getattr(out, "data", b""))
+    def t2v(self, prompt: str, **kwargs: Any):
+        store = kwargs.pop("artifact_store", None)
+        vm = self._make_manager(artifact_store=store)
+        out = vm.generate_video(str(prompt), **kwargs)
+        if isinstance(out, dict):
+            return out
+        return bytes(getattr(out, "data", b""))
+    def i2v(self, image: Union[bytes, Dict[str, Any], str], **kwargs: Any):
+        store = kwargs.pop("artifact_store", None)
+        image_b = _resolve_bytes_input(image, artifact_store=store)
+        vm = self._make_manager(artifact_store=store)
+        out = vm.image_to_video(image=image_b, **kwargs)
+        if isinstance(out, dict):
+            return out
+        return bytes(getattr(out, "data", b""))
+def register(registry: Any) -> None:
+    """Register AbstractVision as an AbstractCore capability plugin.
+    This function is loaded via the `abstractcore.capabilities_plugins` entry point group.
+    """
+    def _factory(owner: Any) -> _AbstractVisionCapability:
+        return _AbstractVisionCapability(owner)
+    config_hint = (
+        "Set ABSTRACTVISION_BASE_URL (or pass vision_base_url=...) to point to an OpenAI-compatible /v1 endpoint. "
+        "Example: vision_base_url='http://localhost:8000/v1' (AbstractCore Server vision endpoints) or "
+        "vision_base_url='http://localhost:1234/v1' (LMStudio/vLLM)."
+    )
+    registry.register_vision_backend(
+        backend_id=_AbstractVisionCapability.backend_id,
+        factory=_factory,
+        priority=0,
+        description="AbstractVision via OpenAI-compatible HTTP backend (env/config-driven).",
+        config_hint=config_hint,
+    )

abstractvision 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

abstractvision 0.1.0py3-none-any.whl → 0.2.1py3-none-any.whl