PyPI - agent-vision-mcp - Versions diffs - 0.0.1__tar.gz - Mend

agent-vision-mcp 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

agent_vision_mcp-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 agent-vision-mcp contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

agent_vision_mcp-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,103 @@
+Metadata-Version: 2.4
+Name: agent-vision-mcp
+Version: 0.0.1
+Summary: Universal vision tools for AI agents via Model Context Protocol
+License-Expression: MIT
+Project-URL: Homepage, https://github.com/idealizing/agent-vision-mcp
+Project-URL: Repository, https://github.com/idealizing/agent-vision-mcp
+Project-URL: Issues, https://github.com/idealizing/agent-vision-mcp/issues
+Keywords: mcp,vision,vlm,ocr,image-analysis
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: mcp>=1.0
+Requires-Dist: openai
+Requires-Dist: langchain-openai
+Requires-Dist: python-dotenv
+Requires-Dist: pydantic
+Requires-Dist: pillow
+Requires-Dist: httpx
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-asyncio; extra == "dev"
+Dynamic: license-file
+# agent-vision-mcp
+<!-- mcp-name: io.github.idealizing/agent-vision-mcp -->
+`agent-vision-mcp` exposes image analysis, inspection, cropping, OCR, and comparison
+tools through the Model Context Protocol.
+## Quickstart
+Run the published package without installing it permanently:
+```bash
+uvx agent-vision-mcp
+```
+Example MCP client configuration:
+```json
+{
+  "mcpServers": {
+    "agent-vision": {
+      "command": "uvx",
+      "args": ["agent-vision-mcp"],
+      "env": {
+        "VISION_API_KEY": "your-api-key",
+        "VISION_BASE_URL": "https://your-provider.example/v1",
+        "VISION_MODEL_ID": "your-vision-model"
+      }
+    }
+  }
+}
+```
+## Development
+```bash
+python -m venv .venv
+.venv/bin/pip install -e ".[dev]"
+cp .env.example .env
+.venv/bin/agent-vision-mcp
+```
+Configure an OpenAI-compatible multimodal endpoint with `VISION_API_KEY`,
+`VISION_BASE_URL`, and `VISION_MODEL_ID`.
+## URL Handling
+`VISION_URL_MODE` controls how remote images are sent to the model:
+- `auto` passes URLs through for analysis and comparison, but downloads them
+  when inspection, cropping, or OCR requires image bytes.
+- `passthrough` prefers URL passthrough, except for tools that require bytes.
+- `download` always downloads and verifies remote images before model calls.
+Downloads are streamed with byte limits, redirects are security checked, and
+all downloaded or encoded inputs are verified as supported images.
+URL passthrough relies on the configured model provider to fetch URLs safely;
+use `download` when the provider is not trusted to enforce outbound-network
+restrictions.
+Dedicated OCR is disabled by default. Set `OCR_ENABLED=true` and configure the
+`OCR_*` variables to use a separate OCR model; otherwise OCR uses the VLM.
+## Run Tests
+```bash
+.venv/bin/python -m unittest discover -s tests -v
+```
+## License
+MIT

agent_vision_mcp-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,72 @@
+# agent-vision-mcp
+<!-- mcp-name: io.github.idealizing/agent-vision-mcp -->
+`agent-vision-mcp` exposes image analysis, inspection, cropping, OCR, and comparison
+tools through the Model Context Protocol.
+## Quickstart
+Run the published package without installing it permanently:
+```bash
+uvx agent-vision-mcp
+```
+Example MCP client configuration:
+```json
+{
+  "mcpServers": {
+    "agent-vision": {
+      "command": "uvx",
+      "args": ["agent-vision-mcp"],
+      "env": {
+        "VISION_API_KEY": "your-api-key",
+        "VISION_BASE_URL": "https://your-provider.example/v1",
+        "VISION_MODEL_ID": "your-vision-model"
+      }
+    }
+  }
+}
+```
+## Development
+```bash
+python -m venv .venv
+.venv/bin/pip install -e ".[dev]"
+cp .env.example .env
+.venv/bin/agent-vision-mcp
+```
+Configure an OpenAI-compatible multimodal endpoint with `VISION_API_KEY`,
+`VISION_BASE_URL`, and `VISION_MODEL_ID`.
+## URL Handling
+`VISION_URL_MODE` controls how remote images are sent to the model:
+- `auto` passes URLs through for analysis and comparison, but downloads them
+  when inspection, cropping, or OCR requires image bytes.
+- `passthrough` prefers URL passthrough, except for tools that require bytes.
+- `download` always downloads and verifies remote images before model calls.
+Downloads are streamed with byte limits, redirects are security checked, and
+all downloaded or encoded inputs are verified as supported images.
+URL passthrough relies on the configured model provider to fetch URLs safely;
+use `download` when the provider is not trusted to enforce outbound-network
+restrictions.
+Dedicated OCR is disabled by default. Set `OCR_ENABLED=true` and configure the
+`OCR_*` variables to use a separate OCR model; otherwise OCR uses the VLM.
+## Run Tests
+```bash
+.venv/bin/python -m unittest discover -s tests -v
+```
+## License
+MIT

agent_vision_mcp-0.0.1/agent_vision_mcp/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""agent-vision-mcp - Universal vision tools for AI agents via MCP"""
+__version__ = "0.0.1"

agent_vision_mcp-0.0.1/agent_vision_mcp/config.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Configuration management for agent-vision-mcp"""
+import os
+from pathlib import Path
+from typing import List, Optional
+class Settings:
+    """Vision-mcp settings from environment variables"""
+    # VLM Provider
+    vision_api_key: str = ""
+    vision_base_url: str = "https://api.example.com/v1"
+    vision_model_id: str = "glm-4v-flash"
+    # OCR Provider
+    ocr_api_key: str = ""
+    ocr_base_url: str = ""
+    ocr_model_id: str = "DeepSeek-OCR"
+    # Runtime
+    vision_timeout: int = 60
+    vision_max_retries: int = 3
+    vision_default_detail: str = "auto"
+    vision_supports_image_detail: bool = False
+    vision_url_mode: str = "auto"
+    # Input limits
+    vision_max_image_size_mb: int = 10
+    vision_max_image_pixels: int = 40_000_000
+    vision_max_batch_images: int = 10
+    # Security
+    vision_allow_local_files: bool = True
+    vision_allowed_paths: List[str] = ["/data", "/tmp"]
+    vision_block_private_ips: bool = True
+    # Transport
+    vision_transport: str = "stdio"
+    # Optional dedicated OCR provider
+    dedicated_ocr_enabled: bool = False
+    @property
+    def ocr_enabled(self) -> bool:
+        return bool(self.dedicated_ocr_enabled and self.ocr_api_key and self.ocr_base_url)
+    @classmethod
+    def from_env(cls, env_file: Optional[Path] = None) -> "Settings":
+        """Load settings from environment variables"""
+        from dotenv import load_dotenv
+        if env_file is None:
+            env_file = Path(__file__).parent.parent / ".env"
+        load_dotenv(env_file)
+        settings = cls()
+        # VLM Provider
+        settings.vision_api_key = os.getenv("VISION_API_KEY", "")
+        settings.vision_base_url = os.getenv("VISION_BASE_URL", "https://api.example.com/v1")
+        settings.vision_model_id = os.getenv("VISION_MODEL_ID", "glm-4v-flash")
+        # OCR Provider - defaults to VLM credentials if not specified
+        settings.ocr_api_key = os.getenv("OCR_API_KEY", settings.vision_api_key)
+        settings.ocr_base_url = os.getenv("OCR_BASE_URL", settings.vision_base_url)
+        settings.ocr_model_id = os.getenv("OCR_MODEL_ID", "DeepSeek-OCR")
+        # Runtime
+        settings.vision_timeout = int(os.getenv("VISION_TIMEOUT", "60"))
+        settings.vision_max_retries = int(os.getenv("VISION_MAX_RETRIES", "3"))
+        settings.vision_default_detail = os.getenv("VISION_DEFAULT_DETAIL", "auto")
+        settings.vision_supports_image_detail = os.getenv("VISION_SUPPORTS_IMAGE_DETAIL", "false").lower() == "true"
+        settings.vision_url_mode = os.getenv("VISION_URL_MODE", "auto").lower()
+        if settings.vision_url_mode not in {"auto", "passthrough", "download"}:
+            raise ValueError("VISION_URL_MODE must be one of: auto, passthrough, download")
+        # Input limits
+        settings.vision_max_image_size_mb = int(os.getenv("VISION_MAX_IMAGE_SIZE_MB", "10"))
+        settings.vision_max_image_pixels = int(os.getenv("VISION_MAX_IMAGE_PIXELS", "40000000"))
+        settings.vision_max_batch_images = int(os.getenv("VISION_MAX_BATCH_IMAGES", "10"))
+        if settings.vision_timeout <= 0:
+            raise ValueError("VISION_TIMEOUT must be greater than 0")
+        if settings.vision_max_retries <= 0:
+            raise ValueError("VISION_MAX_RETRIES must be greater than 0")
+        if settings.vision_max_image_size_mb <= 0:
+            raise ValueError("VISION_MAX_IMAGE_SIZE_MB must be greater than 0")
+        if settings.vision_max_image_pixels <= 0:
+            raise ValueError("VISION_MAX_IMAGE_PIXELS must be greater than 0")
+        if settings.vision_max_batch_images < 2:
+            raise ValueError("VISION_MAX_BATCH_IMAGES must be at least 2")
+        # Security
+        settings.vision_allow_local_files = os.getenv("VISION_ALLOW_LOCAL_FILES", "true").lower() == "true"
+        allowed_paths_str = os.getenv("VISION_ALLOWED_PATHS", "/data,/tmp")
+        settings.vision_allowed_paths = [p.strip() for p in allowed_paths_str.split(",") if p.strip()]
+        settings.vision_block_private_ips = os.getenv("VISION_BLOCK_PRIVATE_IPS", "true").lower() == "true"
+        settings.dedicated_ocr_enabled = os.getenv("OCR_ENABLED", "false").lower() == "true"
+        # Transport
+        settings.vision_transport = os.getenv("VISION_TRANSPORT", "stdio")
+        return settings
+# Global settings instance
+settings = Settings.from_env()

agent_vision_mcp-0.0.1/agent_vision_mcp/errors.py ADDED Viewed

@@ -0,0 +1,106 @@
+"""Error handling for agent-vision-mcp"""
+from typing import Optional, Any
+import json
+class VisionMCPError(Exception):
+    """Base exception for agent-vision-mcp"""
+    def __init__(
+        self,
+        message: str,
+        code: str = "INTERNAL_ERROR",
+        retryable: bool = False,
+        details: Optional[dict] = None,
+    ):
+        super().__init__(message)
+        self.message = message
+        self.code = code
+        self.retryable = retryable
+        self.details = details or {}
+    def to_dict(self) -> dict:
+        return {
+            "error": {
+                "code": self.code,
+                "message": self.message,
+                "retryable": self.retryable,
+                "details": self.details,
+            }
+        }
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), ensure_ascii=False)
+class InvalidInputError(VisionMCPError):
+    """Invalid input error"""
+    def __init__(self, message: str, details: Optional[dict] = None):
+        super().__init__(message, code="INVALID_INPUT", retryable=False, details=details)
+class ImageTooLargeError(VisionMCPError):
+    """Image exceeds size limit"""
+    def __init__(self, size_mb: float, max_size_mb: int):
+        super().__init__(
+            f"Image exceeds max size {max_size_mb}MB (actual: {size_mb:.1f}MB)",
+            code="IMAGE_TOO_LARGE",
+            retryable=False,
+            details={"size_mb": size_mb, "max_size_mb": max_size_mb},
+        )
+class UnsupportedFormatError(VisionMCPError):
+    """Unsupported image format"""
+    def __init__(self, format: str, supported: list):
+        super().__init__(
+            f"Unsupported image format: {format}. Supported: {', '.join(supported)}",
+            code="UNSUPPORTED_FORMAT",
+            retryable=False,
+            details={"format": format, "supported": supported},
+        )
+class SecurityError(VisionMCPError):
+    """Security violation"""
+    def __init__(self, message: str, details: Optional[dict] = None):
+        super().__init__(message, code="SECURITY_ERROR", retryable=False, details=details)
+class ProviderError(VisionMCPError):
+    """VLM provider error"""
+    def __init__(self, message: str, retryable: bool = True, details: Optional[dict] = None):
+        super().__init__(message, code="PROVIDER_ERROR", retryable=retryable, details=details)
+class TimeoutError(VisionMCPError):
+    """Request timeout"""
+    def __init__(self, timeout: int):
+        super().__init__(
+            f"Request timeout after {timeout}s",
+            code="TIMEOUT",
+            retryable=True,
+            details={"timeout": timeout},
+        )
+def handle_exception(e: Exception) -> str:
+    """Convert exception to JSON error response"""
+    if isinstance(e, VisionMCPError):
+        return e.to_json()
+    # Unknown error
+    error = VisionMCPError(
+        message="Internal error occurred",
+        code="INTERNAL_ERROR",
+        retryable=False,
+        details={"type": type(e).__name__},
+    )
+    return error.to_json()

agent_vision_mcp-0.0.1/agent_vision_mcp/image/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Image processing utilities"""