agent-vision-mcp 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. agent_vision_mcp-0.0.1/LICENSE +21 -0
  2. agent_vision_mcp-0.0.1/PKG-INFO +103 -0
  3. agent_vision_mcp-0.0.1/README.md +72 -0
  4. agent_vision_mcp-0.0.1/agent_vision_mcp/__init__.py +3 -0
  5. agent_vision_mcp-0.0.1/agent_vision_mcp/config.py +107 -0
  6. agent_vision_mcp-0.0.1/agent_vision_mcp/errors.py +106 -0
  7. agent_vision_mcp-0.0.1/agent_vision_mcp/image/__init__.py +1 -0
  8. agent_vision_mcp-0.0.1/agent_vision_mcp/image/input.py +344 -0
  9. agent_vision_mcp-0.0.1/agent_vision_mcp/image/security.py +234 -0
  10. agent_vision_mcp-0.0.1/agent_vision_mcp/providers/__init__.py +1 -0
  11. agent_vision_mcp-0.0.1/agent_vision_mcp/providers/base.py +35 -0
  12. agent_vision_mcp-0.0.1/agent_vision_mcp/providers/ocr_provider.py +125 -0
  13. agent_vision_mcp-0.0.1/agent_vision_mcp/providers/openai_compatible.py +130 -0
  14. agent_vision_mcp-0.0.1/agent_vision_mcp/server.py +515 -0
  15. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/PKG-INFO +103 -0
  16. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/SOURCES.txt +24 -0
  17. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/dependency_links.txt +1 -0
  18. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/entry_points.txt +2 -0
  19. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/requires.txt +11 -0
  20. agent_vision_mcp-0.0.1/agent_vision_mcp.egg-info/top_level.txt +1 -0
  21. agent_vision_mcp-0.0.1/pyproject.toml +47 -0
  22. agent_vision_mcp-0.0.1/setup.cfg +4 -0
  23. agent_vision_mcp-0.0.1/tests/test_image_input.py +94 -0
  24. agent_vision_mcp-0.0.1/tests/test_provider.py +29 -0
  25. agent_vision_mcp-0.0.1/tests/test_server_helpers.py +22 -0
  26. agent_vision_mcp-0.0.1/tests/test_stdio_server.py +50 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 agent-vision-mcp contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent-vision-mcp
3
+ Version: 0.0.1
4
+ Summary: Universal vision tools for AI agents via Model Context Protocol
5
+ License-Expression: MIT
6
+ Project-URL: Homepage, https://github.com/idealizing/agent-vision-mcp
7
+ Project-URL: Repository, https://github.com/idealizing/agent-vision-mcp
8
+ Project-URL: Issues, https://github.com/idealizing/agent-vision-mcp/issues
9
+ Keywords: mcp,vision,vlm,ocr,image-analysis
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: mcp>=1.0
21
+ Requires-Dist: openai
22
+ Requires-Dist: langchain-openai
23
+ Requires-Dist: python-dotenv
24
+ Requires-Dist: pydantic
25
+ Requires-Dist: pillow
26
+ Requires-Dist: httpx
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest; extra == "dev"
29
+ Requires-Dist: pytest-asyncio; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # agent-vision-mcp
33
+
34
+ <!-- mcp-name: io.github.idealizing/agent-vision-mcp -->
35
+
36
+ `agent-vision-mcp` exposes image analysis, inspection, cropping, OCR, and comparison
37
+ tools through the Model Context Protocol.
38
+
39
+ ## Quickstart
40
+
41
+ Run the published package without installing it permanently:
42
+
43
+ ```bash
44
+ uvx agent-vision-mcp
45
+ ```
46
+
47
+ Example MCP client configuration:
48
+
49
+ ```json
50
+ {
51
+ "mcpServers": {
52
+ "agent-vision": {
53
+ "command": "uvx",
54
+ "args": ["agent-vision-mcp"],
55
+ "env": {
56
+ "VISION_API_KEY": "your-api-key",
57
+ "VISION_BASE_URL": "https://your-provider.example/v1",
58
+ "VISION_MODEL_ID": "your-vision-model"
59
+ }
60
+ }
61
+ }
62
+ }
63
+ ```
64
+
65
+ ## Development
66
+
67
+ ```bash
68
+ python -m venv .venv
69
+ .venv/bin/pip install -e ".[dev]"
70
+ cp .env.example .env
71
+ .venv/bin/agent-vision-mcp
72
+ ```
73
+
74
+ Configure an OpenAI-compatible multimodal endpoint with `VISION_API_KEY`,
75
+ `VISION_BASE_URL`, and `VISION_MODEL_ID`.
76
+
77
+ ## URL Handling
78
+
79
+ `VISION_URL_MODE` controls how remote images are sent to the model:
80
+
81
+ - `auto` passes URLs through for analysis and comparison, but downloads them
82
+ when inspection, cropping, or OCR requires image bytes.
83
+ - `passthrough` prefers URL passthrough, except for tools that require bytes.
84
+ - `download` always downloads and verifies remote images before model calls.
85
+
86
+ Downloads are streamed with byte limits, redirects are security checked, and
87
+ all downloaded or encoded inputs are verified as supported images.
88
+ URL passthrough relies on the configured model provider to fetch URLs safely;
89
+ use `download` when the provider is not trusted to enforce outbound-network
90
+ restrictions.
91
+
92
+ Dedicated OCR is disabled by default. Set `OCR_ENABLED=true` and configure the
93
+ `OCR_*` variables to use a separate OCR model; otherwise OCR uses the VLM.
94
+
95
+ ## Run Tests
96
+
97
+ ```bash
98
+ .venv/bin/python -m unittest discover -s tests -v
99
+ ```
100
+
101
+ ## License
102
+
103
+ MIT
@@ -0,0 +1,72 @@
1
+ # agent-vision-mcp
2
+
3
+ <!-- mcp-name: io.github.idealizing/agent-vision-mcp -->
4
+
5
+ `agent-vision-mcp` exposes image analysis, inspection, cropping, OCR, and comparison
6
+ tools through the Model Context Protocol.
7
+
8
+ ## Quickstart
9
+
10
+ Run the published package without installing it permanently:
11
+
12
+ ```bash
13
+ uvx agent-vision-mcp
14
+ ```
15
+
16
+ Example MCP client configuration:
17
+
18
+ ```json
19
+ {
20
+ "mcpServers": {
21
+ "agent-vision": {
22
+ "command": "uvx",
23
+ "args": ["agent-vision-mcp"],
24
+ "env": {
25
+ "VISION_API_KEY": "your-api-key",
26
+ "VISION_BASE_URL": "https://your-provider.example/v1",
27
+ "VISION_MODEL_ID": "your-vision-model"
28
+ }
29
+ }
30
+ }
31
+ }
32
+ ```
33
+
34
+ ## Development
35
+
36
+ ```bash
37
+ python -m venv .venv
38
+ .venv/bin/pip install -e ".[dev]"
39
+ cp .env.example .env
40
+ .venv/bin/agent-vision-mcp
41
+ ```
42
+
43
+ Configure an OpenAI-compatible multimodal endpoint with `VISION_API_KEY`,
44
+ `VISION_BASE_URL`, and `VISION_MODEL_ID`.
45
+
46
+ ## URL Handling
47
+
48
+ `VISION_URL_MODE` controls how remote images are sent to the model:
49
+
50
+ - `auto` passes URLs through for analysis and comparison, but downloads them
51
+ when inspection, cropping, or OCR requires image bytes.
52
+ - `passthrough` prefers URL passthrough, except for tools that require bytes.
53
+ - `download` always downloads and verifies remote images before model calls.
54
+
55
+ Downloads are streamed with byte limits, redirects are security checked, and
56
+ all downloaded or encoded inputs are verified as supported images.
57
+ URL passthrough relies on the configured model provider to fetch URLs safely;
58
+ use `download` when the provider is not trusted to enforce outbound-network
59
+ restrictions.
60
+
61
+ Dedicated OCR is disabled by default. Set `OCR_ENABLED=true` and configure the
62
+ `OCR_*` variables to use a separate OCR model; otherwise OCR uses the VLM.
63
+
64
+ ## Run Tests
65
+
66
+ ```bash
67
+ .venv/bin/python -m unittest discover -s tests -v
68
+ ```
69
+
70
+ ## License
71
+
72
+ MIT
@@ -0,0 +1,3 @@
1
+ """agent-vision-mcp - Universal vision tools for AI agents via MCP"""
2
+
3
+ __version__ = "0.0.1"
@@ -0,0 +1,107 @@
1
+ """Configuration management for agent-vision-mcp"""
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import List, Optional
6
+
7
+
8
+ class Settings:
9
+ """Vision-mcp settings from environment variables"""
10
+
11
+ # VLM Provider
12
+ vision_api_key: str = ""
13
+ vision_base_url: str = "https://api.example.com/v1"
14
+ vision_model_id: str = "glm-4v-flash"
15
+
16
+ # OCR Provider
17
+ ocr_api_key: str = ""
18
+ ocr_base_url: str = ""
19
+ ocr_model_id: str = "DeepSeek-OCR"
20
+
21
+ # Runtime
22
+ vision_timeout: int = 60
23
+ vision_max_retries: int = 3
24
+ vision_default_detail: str = "auto"
25
+ vision_supports_image_detail: bool = False
26
+ vision_url_mode: str = "auto"
27
+
28
+ # Input limits
29
+ vision_max_image_size_mb: int = 10
30
+ vision_max_image_pixels: int = 40_000_000
31
+ vision_max_batch_images: int = 10
32
+
33
+ # Security
34
+ vision_allow_local_files: bool = True
35
+ vision_allowed_paths: List[str] = ["/data", "/tmp"]
36
+ vision_block_private_ips: bool = True
37
+
38
+ # Transport
39
+ vision_transport: str = "stdio"
40
+
41
+ # Optional dedicated OCR provider
42
+ dedicated_ocr_enabled: bool = False
43
+
44
+ @property
45
+ def ocr_enabled(self) -> bool:
46
+ return bool(self.dedicated_ocr_enabled and self.ocr_api_key and self.ocr_base_url)
47
+
48
+ @classmethod
49
+ def from_env(cls, env_file: Optional[Path] = None) -> "Settings":
50
+ """Load settings from environment variables"""
51
+ from dotenv import load_dotenv
52
+
53
+ if env_file is None:
54
+ env_file = Path(__file__).parent.parent / ".env"
55
+ load_dotenv(env_file)
56
+
57
+ settings = cls()
58
+
59
+ # VLM Provider
60
+ settings.vision_api_key = os.getenv("VISION_API_KEY", "")
61
+ settings.vision_base_url = os.getenv("VISION_BASE_URL", "https://api.example.com/v1")
62
+ settings.vision_model_id = os.getenv("VISION_MODEL_ID", "glm-4v-flash")
63
+
64
+ # OCR Provider - defaults to VLM credentials if not specified
65
+ settings.ocr_api_key = os.getenv("OCR_API_KEY", settings.vision_api_key)
66
+ settings.ocr_base_url = os.getenv("OCR_BASE_URL", settings.vision_base_url)
67
+ settings.ocr_model_id = os.getenv("OCR_MODEL_ID", "DeepSeek-OCR")
68
+
69
+ # Runtime
70
+ settings.vision_timeout = int(os.getenv("VISION_TIMEOUT", "60"))
71
+ settings.vision_max_retries = int(os.getenv("VISION_MAX_RETRIES", "3"))
72
+ settings.vision_default_detail = os.getenv("VISION_DEFAULT_DETAIL", "auto")
73
+ settings.vision_supports_image_detail = os.getenv("VISION_SUPPORTS_IMAGE_DETAIL", "false").lower() == "true"
74
+ settings.vision_url_mode = os.getenv("VISION_URL_MODE", "auto").lower()
75
+ if settings.vision_url_mode not in {"auto", "passthrough", "download"}:
76
+ raise ValueError("VISION_URL_MODE must be one of: auto, passthrough, download")
77
+
78
+ # Input limits
79
+ settings.vision_max_image_size_mb = int(os.getenv("VISION_MAX_IMAGE_SIZE_MB", "10"))
80
+ settings.vision_max_image_pixels = int(os.getenv("VISION_MAX_IMAGE_PIXELS", "40000000"))
81
+ settings.vision_max_batch_images = int(os.getenv("VISION_MAX_BATCH_IMAGES", "10"))
82
+ if settings.vision_timeout <= 0:
83
+ raise ValueError("VISION_TIMEOUT must be greater than 0")
84
+ if settings.vision_max_retries <= 0:
85
+ raise ValueError("VISION_MAX_RETRIES must be greater than 0")
86
+ if settings.vision_max_image_size_mb <= 0:
87
+ raise ValueError("VISION_MAX_IMAGE_SIZE_MB must be greater than 0")
88
+ if settings.vision_max_image_pixels <= 0:
89
+ raise ValueError("VISION_MAX_IMAGE_PIXELS must be greater than 0")
90
+ if settings.vision_max_batch_images < 2:
91
+ raise ValueError("VISION_MAX_BATCH_IMAGES must be at least 2")
92
+
93
+ # Security
94
+ settings.vision_allow_local_files = os.getenv("VISION_ALLOW_LOCAL_FILES", "true").lower() == "true"
95
+ allowed_paths_str = os.getenv("VISION_ALLOWED_PATHS", "/data,/tmp")
96
+ settings.vision_allowed_paths = [p.strip() for p in allowed_paths_str.split(",") if p.strip()]
97
+ settings.vision_block_private_ips = os.getenv("VISION_BLOCK_PRIVATE_IPS", "true").lower() == "true"
98
+ settings.dedicated_ocr_enabled = os.getenv("OCR_ENABLED", "false").lower() == "true"
99
+
100
+ # Transport
101
+ settings.vision_transport = os.getenv("VISION_TRANSPORT", "stdio")
102
+
103
+ return settings
104
+
105
+
106
+ # Global settings instance
107
+ settings = Settings.from_env()
@@ -0,0 +1,106 @@
1
+ """Error handling for agent-vision-mcp"""
2
+
3
+ from typing import Optional, Any
4
+ import json
5
+
6
+
7
+ class VisionMCPError(Exception):
8
+ """Base exception for agent-vision-mcp"""
9
+
10
+ def __init__(
11
+ self,
12
+ message: str,
13
+ code: str = "INTERNAL_ERROR",
14
+ retryable: bool = False,
15
+ details: Optional[dict] = None,
16
+ ):
17
+ super().__init__(message)
18
+ self.message = message
19
+ self.code = code
20
+ self.retryable = retryable
21
+ self.details = details or {}
22
+
23
+ def to_dict(self) -> dict:
24
+ return {
25
+ "error": {
26
+ "code": self.code,
27
+ "message": self.message,
28
+ "retryable": self.retryable,
29
+ "details": self.details,
30
+ }
31
+ }
32
+
33
+ def to_json(self) -> str:
34
+ return json.dumps(self.to_dict(), ensure_ascii=False)
35
+
36
+
37
+ class InvalidInputError(VisionMCPError):
38
+ """Invalid input error"""
39
+
40
+ def __init__(self, message: str, details: Optional[dict] = None):
41
+ super().__init__(message, code="INVALID_INPUT", retryable=False, details=details)
42
+
43
+
44
+ class ImageTooLargeError(VisionMCPError):
45
+ """Image exceeds size limit"""
46
+
47
+ def __init__(self, size_mb: float, max_size_mb: int):
48
+ super().__init__(
49
+ f"Image exceeds max size {max_size_mb}MB (actual: {size_mb:.1f}MB)",
50
+ code="IMAGE_TOO_LARGE",
51
+ retryable=False,
52
+ details={"size_mb": size_mb, "max_size_mb": max_size_mb},
53
+ )
54
+
55
+
56
+ class UnsupportedFormatError(VisionMCPError):
57
+ """Unsupported image format"""
58
+
59
+ def __init__(self, format: str, supported: list):
60
+ super().__init__(
61
+ f"Unsupported image format: {format}. Supported: {', '.join(supported)}",
62
+ code="UNSUPPORTED_FORMAT",
63
+ retryable=False,
64
+ details={"format": format, "supported": supported},
65
+ )
66
+
67
+
68
+ class SecurityError(VisionMCPError):
69
+ """Security violation"""
70
+
71
+ def __init__(self, message: str, details: Optional[dict] = None):
72
+ super().__init__(message, code="SECURITY_ERROR", retryable=False, details=details)
73
+
74
+
75
+ class ProviderError(VisionMCPError):
76
+ """VLM provider error"""
77
+
78
+ def __init__(self, message: str, retryable: bool = True, details: Optional[dict] = None):
79
+ super().__init__(message, code="PROVIDER_ERROR", retryable=retryable, details=details)
80
+
81
+
82
+ class TimeoutError(VisionMCPError):
83
+ """Request timeout"""
84
+
85
+ def __init__(self, timeout: int):
86
+ super().__init__(
87
+ f"Request timeout after {timeout}s",
88
+ code="TIMEOUT",
89
+ retryable=True,
90
+ details={"timeout": timeout},
91
+ )
92
+
93
+
94
+ def handle_exception(e: Exception) -> str:
95
+ """Convert exception to JSON error response"""
96
+ if isinstance(e, VisionMCPError):
97
+ return e.to_json()
98
+
99
+ # Unknown error
100
+ error = VisionMCPError(
101
+ message="Internal error occurred",
102
+ code="INTERNAL_ERROR",
103
+ retryable=False,
104
+ details={"type": type(e).__name__},
105
+ )
106
+ return error.to_json()
@@ -0,0 +1 @@
1
+ """Image processing utilities"""