mcp-sight 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 nicepkg
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # mcp-sight
2
+
3
+ Give Claude Code the power of sight — describe images using any vision-capable LLM (OpenAI-compatible API).
4
+
5
+ ## Setup
6
+
7
+ Add to `~/.claude/settings.json` or `<project>/.claude/settings.json`:
8
+
9
+ ```json
10
+ {
11
+ "mcpServers": {
12
+ "mcp-sight": {
13
+ "command": "bunx",
14
+ "args": ["mcp-sight"],
15
+ "env": {
16
+ "VISION_API_KEY": "sk-your-api-key",
17
+ "VISION_BASE_URL": "https://api.openai.com/v1",
18
+ "VISION_MODEL": "gpt-4o"
19
+ }
20
+ }
21
+ }
22
+ }
23
+ ```
24
+
25
+ Or use the `claude mcp add` command:
26
+
27
+ ```bash
28
+ claude mcp add mcp-sight \
29
+ --scope user \
30
+ --env VISION_API_KEY=sk-your-api-key \
31
+ --env VISION_BASE_URL=https://api.openai.com/v1 \
32
+ --env VISION_MODEL=gpt-4o \
33
+ -- bunx mcp-sight
34
+ ```
35
+
36
+ ## Environment Variables
37
+
38
+ | Variable | Required | Default | Description |
39
+ |---|---|---|---|
40
+ | `VISION_API_KEY` | **Yes** | — | API key for the vision provider |
41
+ | `VISION_BASE_URL` | No | `https://api.openai.com/v1` | OpenAI-compatible API base URL |
42
+ | `VISION_MODEL` | No | `mimo-v2.5` | Model ID for vision requests |
43
+
44
+ ## Tool: `describe_image`
45
+
46
+ When you send an image to Claude Code, it can call this tool to "see" it.
47
+
48
+ | Parameter | Type | Required | Description |
49
+ |---|---|---|---|
50
+ | `image_path` | string | Yes | Absolute path to the image file |
51
+ | `prompt` | string | No | What to focus on in the image |
52
+ | `context` | string | No | Background context from your conversation |
53
+ | `detail_level` | `"brief"` \| `"standard"` \| `"detailed"` | No | How much detail (default: `"standard"`) |
54
+
55
+ ## Supported Image Formats
56
+
57
+ JPEG, PNG, GIF, WebP, BMP, SVG, TIFF, ICO, HEIC, HEIF
58
+
59
+ ## License
60
+
61
+ MIT
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ // npm-compatible bin entry — bun handles TS import when run via bun
3
+ import("../src/index.ts");
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "mcp-sight",
3
+ "version": "1.0.0",
4
+ "description": "MCP server that gives Claude Code sight — describe images using a vision-capable LLM",
5
+ "type": "module",
6
+ "main": "./src/index.ts",
7
+ "bin": {
8
+ "mcp-sight": "bin/mcp-vision.js"
9
+ },
10
+ "files": [
11
+ "bin/",
12
+ "src/",
13
+ "README.md",
14
+ "LICENSE"
15
+ ],
16
+ "scripts": {
17
+ "dev": "bun run src/index.ts"
18
+ },
19
+ "dependencies": {
20
+ "@ai-sdk/openai": "^3.0",
21
+ "@modelcontextprotocol/sdk": "^1.29",
22
+ "ai": "^6.0",
23
+ "zod": "^3.25"
24
+ },
25
+ "devDependencies": {
26
+ "@types/bun": "latest",
27
+ "@types/node": "^25.9.1"
28
+ },
29
+ "keywords": [
30
+ "mcp",
31
+ "vision",
32
+ "image-description",
33
+ "model-context-protocol",
34
+ "claude-code"
35
+ ],
36
+ "license": "MIT",
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "git+https://github.com/momo1037/mcp-sight.git"
40
+ }
41
+ }
package/src/index.ts ADDED
@@ -0,0 +1,190 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod/v4";
4
+ import { generateText } from "ai";
5
+ import { createOpenAI } from "@ai-sdk/openai";
6
+ import { readFileSync } from "node:fs";
7
+ import { extname } from "node:path";
8
+
9
+ // ─── Configuration from environment ───────────────────────────────
10
+
11
+ const VISION_API_KEY = process.env.VISION_API_KEY;
12
+ const VISION_BASE_URL =
13
+ process.env.VISION_BASE_URL || "https://api.openai.com/v1";
14
+ const VISION_MODEL = process.env.VISION_MODEL || "mimo-v2.5";
15
+
16
+ if (!VISION_API_KEY) {
17
+ console.error("ERROR: VISION_API_KEY environment variable is required");
18
+ process.exit(1);
19
+ }
20
+
21
+ // ─── Media type detection ──────────────────────────────────────────
22
+
23
+ const MIME_MAP: Record<string, string> = {
24
+ ".jpg": "image/jpeg",
25
+ ".jpeg": "image/jpeg",
26
+ ".png": "image/png",
27
+ ".gif": "image/gif",
28
+ ".webp": "image/webp",
29
+ ".bmp": "image/bmp",
30
+ ".svg": "image/svg+xml",
31
+ ".tiff": "image/tiff",
32
+ ".tif": "image/tiff",
33
+ ".ico": "image/x-icon",
34
+ ".heic": "image/heic",
35
+ ".heif": "image/heif",
36
+ };
37
+
38
+ function getMediaType(filePath: string): string {
39
+ const ext = extname(filePath).toLowerCase();
40
+ return MIME_MAP[ext] || "image/png";
41
+ }
42
+
43
+ // ─── Detail-level prompts ──────────────────────────────────────────
44
+
45
+ const SYSTEM_PROMPTS: Record<string, string> = {
46
+ brief:
47
+ "You are a concise image describer. Reply in 1-2 short sentences. " +
48
+ "Focus only on the most important subject and action in the image. " +
49
+ "Be direct and brief.",
50
+
51
+ standard:
52
+ "You are a helpful image describer. Describe the image in detail — " +
53
+ "cover main subjects, composition, colors, setting, and any notable elements. " +
54
+ "Be thorough but focused. Do not add interpretation beyond what is visible.",
55
+
56
+ detailed:
57
+ "You are an expert visual analyst. Provide an extremely thorough, " +
58
+ "comprehensive description. Cover: main subjects, background, composition, " +
59
+ "lighting/contrast, colors/palette, textures, mood/atmosphere, " +
60
+ "spatial relationships, any text visible (transcribe exactly), " +
61
+ "and subtle details that might be overlooked. Be exhaustive.",
62
+ };
63
+
64
+ // ─── OpenAI client ─────────────────────────────────────────────────
65
+
66
+ const openai = createOpenAI({
67
+ apiKey: VISION_API_KEY,
68
+ baseURL: VISION_BASE_URL,
69
+ });
70
+
71
+ // ─── MCP Server ────────────────────────────────────────────────────
72
+
73
+ const server = new McpServer({
74
+ name: "mcp-sight",
75
+ version: "1.0.0",
76
+ });
77
+
78
+ server.tool(
79
+ "describe_image",
80
+ "Describe an image file using a vision-capable large language model. " +
81
+ "Provide the absolute path to an image file on disk, and optionally a " +
82
+ "specific question or context to guide the description.",
83
+ {
84
+ image_path: z.string().describe(
85
+ "Absolute path to the image file to describe (e.g. C:/Users/Admin/pic.png)"
86
+ ),
87
+ prompt: z
88
+ .string()
89
+ .optional()
90
+ .describe(
91
+ "Specific question or instruction for the vision model. " +
92
+ 'Defaults to "Describe this image in detail."'
93
+ ),
94
+ context: z
95
+ .string()
96
+ .optional()
97
+ .describe(
98
+ "User's original question or background context. " +
99
+ "This helps the model understand what the user is ultimately trying to accomplish."
100
+ ),
101
+ detail_level: z
102
+ .enum(["brief", "standard", "detailed"])
103
+ .optional()
104
+ .describe(
105
+ "Level of detail in the description. " +
106
+ "'brief' = 1-2 sentences, " +
107
+ "'standard' = thorough description, " +
108
+ "'detailed' = exhaustive visual analysis. " +
109
+ "Defaults to 'standard'."
110
+ ),
111
+ },
112
+ async ({ image_path, prompt, context, detail_level }) => {
113
+ // 1. Read image file
114
+ let buffer: Buffer;
115
+ let mediaType: string;
116
+ try {
117
+ buffer = readFileSync(image_path);
118
+ mediaType = getMediaType(image_path);
119
+ } catch (err: any) {
120
+ return {
121
+ content: [
122
+ {
123
+ type: "text",
124
+ text: `Failed to read image file at "${image_path}": ${err.message}`,
125
+ },
126
+ ],
127
+ isError: true,
128
+ };
129
+ }
130
+
131
+ // 2. Construct prompts
132
+ const level = detail_level || "standard";
133
+ const systemPrompt = SYSTEM_PROMPTS[level];
134
+
135
+ let userPrompt = prompt || "Describe this image in detail.";
136
+ if (context) {
137
+ userPrompt = [
138
+ "Background context (the user is asking about this):",
139
+ context,
140
+ "",
141
+ "Specific request:",
142
+ userPrompt,
143
+ ].join("\n");
144
+ }
145
+
146
+ // 3. Call vision model
147
+ try {
148
+ const result = await generateText({
149
+ model: openai.chat(VISION_MODEL),
150
+ system: systemPrompt,
151
+ messages: [
152
+ {
153
+ role: "user",
154
+ content: [
155
+ { type: "text", text: userPrompt },
156
+ { type: "image", image: buffer, mediaType },
157
+ ],
158
+ },
159
+ ],
160
+ });
161
+
162
+ return {
163
+ content: [{ type: "text", text: result.text }],
164
+ };
165
+ } catch (err: any) {
166
+ return {
167
+ content: [
168
+ {
169
+ type: "text",
170
+ text: `Vision model API error: ${err.message}`,
171
+ },
172
+ ],
173
+ isError: true,
174
+ };
175
+ }
176
+ }
177
+ );
178
+
179
+ // ─── Start ─────────────────────────────────────────────────────────
180
+
181
+ async function main() {
182
+ const transport = new StdioServerTransport();
183
+ await server.connect(transport);
184
+ console.error(`mcp-sight ready (model: ${VISION_MODEL}, base: ${VISION_BASE_URL})`);
185
+ }
186
+
187
+ main().catch((err) => {
188
+ console.error("Fatal error starting mcp-vision:", err);
189
+ process.exit(1);
190
+ });