npm - @showlotus/opencode-image-vision - Versions diffs - 1.0.0 - Mend

@showlotus/opencode-image-vision 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 OpenCode Image Vision
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,327 @@
+# opencode-image-vision
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
+[![MCP](https://img.shields.io/badge/MCP-Server-blue.svg)](https://modelcontextprotocol.io/)
+[![Node](https://img.shields.io/badge/Node.js-22%2B-green.svg)](https://nodejs.org/)
+> MCP server that adds vision capabilities to text-only LLMs in OpenCode by reading pasted images from the session database and analyzing them via a vision model.
+---
+## The problem
+Text-only models like **GLM-5**, **DeepSeek V4**, and **MiniMax** are great for code, but they cannot process images. Every time you paste a screenshot, OpenCode throws:
+```
+ERROR: Cannot read "clipboard" (this model does not support image input)
+```
+## The fix
+This MCP server reads images directly from OpenCode's **session SQLite database** — where pasted images are stored before the model rejects them — sends each image to a **vision model** (e.g. GLM-4.6V), and returns a text description the text-only model can reason about.
+**Result: paste → ask → done.** No file saving, no manual paths.
+---
+## Features
+- 🔍 **Session-based image reading** — Reads pasted images directly from OpenCode's SQLite database, no clipboard access needed
+- 🖼️ **Multi-image support** — Analyze multiple images in a single tool call
+- 🔌 **Zero API key configuration** — Automatically reads API keys from OpenCode's `account.json`
+- 🧩 **Extensible provider architecture** — Currently supports GLM/ZhipuAI; easily extendable to OpenAI, Claude, Qwen, etc.
+- 🖥️ **Cross-platform** — Auto-detects OpenCode database path on macOS, Linux, and Windows
+- ⚡ **MCP standard** — Works with OpenCode and any MCP-capable client
+---
+## Requirements
+- **Node.js 18+** (ESM support required)
+- **pnpm** (`npm install -g pnpm`)
+- **OpenCode** with a configured text-only model (e.g. GLM-5, DeepSeek V4)
+- A **vision model provider** configured in OpenCode's account (e.g. GLM-4.6V)
+---
+## Quick start
+You can use this MCP server in two ways: **npx** (zero install) or **local clone**.
+### Option A: npx (recommended)
+No clone or install needed. Just add to your `opencode.jsonc`:
+```jsonc
+{
+  "mcp": {
+    "image-vision": {
+      "type": "local",
+      "command": ["npx", "-y", "opencode-image-vision"],
+      "environment": {
+        "model": "zhipuai-coding-plan/glm-4.6v",
+      },
+    },
+  },
+}
+```
+npx will automatically download and run the server on first use.
+### Option B: Local clone
+For development or custom configurations:
+```bash
+git clone https://github.com/showlotus/opencode-image-vision.git ~/.config/opencode/mcp-servers/opencode-image-vision
+cd ~/.config/opencode/mcp-servers/opencode-image-vision
+pnpm install
+```
+Then wire it with the absolute path:
+```jsonc
+{
+  "mcp": {
+    "image-vision": {
+      "type": "local",
+      "command": [
+        "node",
+        "/Users/YOU/.config/opencode/mcp-servers/opencode-image-vision/src/index.js",
+      ],
+      "environment": {
+        "model": "zhipuai-coding-plan/glm-4.6v",
+      },
+    },
+  },
+}
+```
+> The install location doesn't matter — you'll reference it by absolute path in the config.
+### Add AGENTS.md instructions
+Add this to your `~/.config/opencode/AGENTS.md` so the AI knows when to use the tool:
+```markdown
+# Image Recognition
+31. When the user pastes an image or needs image analysis, and the current model may not
+    support image input, call the image-vision MCP `analyze_images` tool. Pass the current
+    session ID (from error messages or context) and the tool will read images from the database
+    and return vision model descriptions. Supports analyzing multiple images at once.
+32. When encountering "does not support image input" errors, auto-invoke
+    `analyze_images` to obtain image descriptions; do not tell the user recognition
+    is unsupported.
+```
+### 4. Restart OpenCode
+That's it. Paste an image and ask about it — the AI will automatically call `analyze_images` to get a description.
+---
+## How it works
+```
+┌──────────┐    tool call     ┌───────────────────┐    SQL query    ┌────────────┐
+│ OpenCode │ ───────────────> │ opencode-image-   │ ──────────────> │ SQLite DB  │
+│  (MCP    │ <─────────────   │ vision (MCP)       │ <────────────── │ (images)   │
+│  client) │    text result   └────────┬──────────┘    base64 rows  └────────────┘
+└──────────┘                           │
+                                       │ POST base64 image
+                                       ▼
+                              ┌───────────────────┐
+                              │  Vision AI API    │
+                              │  (GLM-4.6V, etc)  │
+                              └───────────────────┘
+```
+1. User pastes an image → OpenCode stores it in the session SQLite database
+2. Text-only model rejects the image (`unsupportedParts()`)
+3. Model calls the `analyze_images` tool with the current `session_id`
+4. Server queries the database for image parts in that session
+5. Each image (base64) is sent to the configured vision AI provider
+6. Text descriptions are returned to the model
+---
+## Tool reference
+### `analyze_images`
+Reads images from an OpenCode session and analyzes them via a vision model.
+| Parameter    | Type   | Required | Default    | Description                          |
+| ------------ | ------ | -------- | ---------- | ------------------------------------ |
+| `session_id` | string | **Yes**  | —          | OpenCode session ID (e.g. `ses_xxx`) |
+| `prompt`     | string | No       | _built-in_ | Custom analysis prompt               |
+| `limit`      | number | No       | `5`        | Max number of images to analyze      |
+**Example output:**
+```
+Analyzed 2 image(s):
+### Image 1: clipboard
+This is a GitHub issue page titled "Image Clipboard Paste Not Working in OpenCode"...
+---
+### Image 2: screenshot.png
+The screenshot shows a terminal with the following error message...
+```
+---
+## Configuration
+### Environment variables
+| Variable    | Required | Default                        | Description                                                                                  |
+| ----------- | -------- | ------------------------------ | -------------------------------------------------------------------------------------------- |
+| `model`     | No       | `zhipuai-coding-plan/glm-4.6v` | Vision model in `provider/model` format. API key auto-resolved from OpenCode `account.json`. |
+| `prompt`    | No       | _built-in English prompt_      | Default analysis prompt sent to the vision model                                             |
+| `timeout`   | No       | `60000`                        | Request timeout in milliseconds                                                              |
+| `limit`     | No       | `5`                            | Default max images per analysis                                                              |
+| `max_limit` | No       | `20`                           | Hard cap on images per analysis                                                              |
+> **No API key needed.** The server reads the API key automatically from OpenCode's `account.json` based on the provider ID in the `model` variable. The database path is auto-detected per OS.
+### Advanced example
+```jsonc
+{
+  "mcp": {
+    "image-vision": {
+      "type": "local",
+      "command": ["node", "/path/to/opencode-image-vision/src/index.js"],
+      "environment": {
+        "model": "zhipuai-coding-plan/glm-4.6v",
+        "limit": "10",
+        "timeout": "30000",
+        "prompt": "Extract all text from this image and describe the UI layout.",
+      },
+    },
+  },
+}
+```
+### Supported providers
+| Provider ID           | Base URL                               | Models     |
+| --------------------- | -------------------------------------- | ---------- |
+| `zhipuai-coding-plan` | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
+| `zai-coding-plan`     | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
+| `z-ai`                | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
+| `zhipuai`             | `https://open.bigmodel.cn/api/paas/v4` | `glm-4.6v` |
+---
+## Usage example
+```
+You: [paste a screenshot of an error]
+     "What's wrong with this?"
+Model: [calls analyze_images with session_id]
+     → "The error in the screenshot says ECONNREFUSED 127.0.0.1:5432.
+        PostgreSQL isn't running on port 5432. Start it with: brew services start postgresql"
+```
+The text-only model never sees pixels — it reads the description returned by GLM-4.6V and reasons over it.
+---
+## Extending with new providers
+Adding a new vision provider takes 3 steps:
+**1. Add base URL to the registry** (`src/opencode.js`):
+```javascript
+const PROVIDER_REGISTRY = {
+  'zhipuai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
+  // Add new provider:
+  openai: { baseUrl: 'https://api.openai.com/v1', format: 'openai' },
+}
+```
+**2. Create a provider class** (`src/providers/openai.js`) — only needed if the API format differs:
+```javascript
+import { VisionProvider } from './base.js'
+export class OpenAIProvider extends VisionProvider {
+  async analyze(base64, mime, prompt) {
+    // Implement provider-specific API call
+  }
+}
+```
+**3. Register the mapping** (`src/providers/index.js`):
+```javascript
+const PROVIDER_MAP = {
+  'zhipuai-coding-plan': GLMProvider,
+  openai: OpenAIProvider,
+}
+```
+Then set the `model` environment variable:
+```jsonc
+"environment": { "model": "openai/gpt-4o" }
+```
+---
+## Troubleshooting
+<details>
+<summary><b>MCP error: Connection closed</b></summary>
+The server crashed on startup. Check:
+1. Use **absolute path** in the `command` array (not `~` or `$HOME`)
+2. Run `node src/index.js` manually to see the error output
+3. Ensure `pnpm install` was run in the project directory
+</details>
+<details>
+<summary><b>"Provider not found in account.json"</b></summary>
+The provider ID in `model` doesn't match any entry in `~/.local/share/opencode/account.json`. Verify you're signed in to that provider in OpenCode. Run `opencode auth` to check.
+</details>
+<details>
+<summary><b>"OpenCode database not found"</b></summary>
+The auto-detection couldn't find the database. Set the `OPENCODE_DB_PATH` environment variable to the full path of your `opencode.db` file.
+</details>
+<details>
+<summary><b>Tools don't appear in OpenCode</b></summary>
+Restart OpenCode completely. Check the MCP server status in the right panel — if it shows an error, the server process failed to start.
+</details>
+---
+## Security
+- **No API keys in source code.** Keys are read from OpenCode's `account.json` at runtime
+- **Read-only database access.** The server opens the SQLite database in `readonly` mode — it never writes or modifies OpenCode data
+- **No network listener.** The server runs as a local stdio process — it only talks to the MCP client over stdin/stdout and to the vision API over HTTPS
+- **No telemetry.** No analytics, no phone-home
+---
+## License
+MIT — see [LICENSE](LICENSE).

package/config.example.json ADDED Viewed

@@ -0,0 +1,7 @@
+{
+  "model": "zhipuai-coding-plan/glm-4.6v",
+  "prompt": "Describe this image in detail, including: text content, UI layout structure, interface elements, color scheme. If there are code or technical details, list them thoroughly.",
+  "timeout": 60000,
+  "limit": 5,
+  "max_limit": 20
+}

package/package.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+  "name": "@showlotus/opencode-image-vision",
+  "version": "1.0.0",
+  "description": "MCP server that reads images from OpenCode's SQLite database and analyzes them via vision AI providers",
+  "type": "module",
+  "main": "src/index.js",
+  "bin": {
+    "opencode-image-vision": "./src/index.js"
+  },
+  "files": [
+    "src/",
+    "config.example.json"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/showlotus/opencode-image-vision.git"
+  },
+  "homepage": "https://github.com/showlotus/opencode-image-vision",
+  "bugs": {
+    "url": "https://github.com/showlotus/opencode-image-vision/issues"
+  },
+  "author": "showlotus",
+  "keywords": [
+    "mcp",
+    "opencode",
+    "vision",
+    "image-analysis",
+    "glm",
+    "zhipuai"
+  ],
+  "license": "MIT",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "better-sqlite3": "^11.0.0",
+    "zod": "^3.23.0"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}

package/src/db.js ADDED Viewed

@@ -0,0 +1,62 @@
+import { homedir, platform } from 'node:os'
+import { join } from 'node:path'
+import { existsSync } from 'node:fs'
+import Database from 'better-sqlite3'
+function detectDbPath() {
+  const candidates = []
+  if (process.env.OPENCODE_DB_PATH) {
+    candidates.push(process.env.OPENCODE_DB_PATH)
+  }
+  const opencodeDir = 'opencode'
+  const dbFile = 'opencode.db'
+  if (process.env.XDG_DATA_HOME) {
+    candidates.push(join(process.env.XDG_DATA_HOME, opencodeDir, dbFile))
+  }
+  const home = homedir()
+  if (platform() === 'win32') {
+    const localAppData = process.env.LOCALAPPDATA || join(home, 'AppData', 'Local')
+    candidates.push(join(localAppData, opencodeDir, dbFile))
+  } else {
+    candidates.push(join(home, '.local', 'share', opencodeDir, dbFile))
+  }
+  for (const p of candidates) {
+    if (existsSync(p)) return p
+  }
+  throw new Error(
+    `OpenCode database not found. Searched:\n${candidates.map(p => `  - ${p}`).join('\n')}\nSet OPENCODE_DB_PATH to override.`,
+  )
+}
+export function getDatabase() {
+  return new Database(detectDbPath(), { readonly: true, fileMustExist: true })
+}
+export function getImages(db, sessionId, limit) {
+  const rows = db
+    .prepare(
+      `SELECT data FROM part
+       WHERE json_extract(data, '$.type') = 'file'
+         AND json_extract(data, '$.mime') LIKE 'image/%'
+         AND session_id = ?
+       ORDER BY time_created DESC
+       LIMIT ?`,
+    )
+    .all(sessionId, limit)
+  return rows
+    .map(row => {
+      const d = JSON.parse(row.data)
+      const match = d.url?.match(/^data:([^;]+);base64,(.+)$/)
+      return match
+        ? { mime: d.mime || match[1], base64: match[2], filename: d.filename || 'image.png' }
+        : null
+    })
+    .filter(Boolean)
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,89 @@
+#!/usr/bin/env node
+import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
+import { z } from 'zod'
+import { getDatabase, getImages } from './db.js'
+import { createProvider } from './providers/index.js'
+const DEFAULT_PROMPT =
+  process.env.prompt ||
+  [
+    'Describe this image in detail, including:',
+    'text content, UI layout structure, interface elements, color scheme.',
+    'If there are code or technical details, list them thoroughly.',
+  ].join(' ')
+const DEFAULT_LIMIT = Number(process.env.limit) || 5
+const MAX_LIMIT = Number(process.env.max_limit) || 20
+let provider
+try {
+  provider = createProvider()
+} catch (e) {
+  console.error(`Failed to initialize provider: ${e.message}`)
+  process.exit(1)
+}
+const server = new McpServer({
+  name: 'image-vision',
+  version: '1.0.0',
+})
+server.tool(
+  'analyze_images',
+  'Read images from OpenCode session database and analyze them via a vision AI provider. Returns text descriptions for each image found.',
+  {
+    session_id: z.string().describe('OpenCode session ID (e.g. ses_xxx)'),
+    prompt: z.string().optional().describe('Custom analysis prompt. Defaults to a detailed description prompt.'),
+    limit: z.number().int().positive().max(MAX_LIMIT).optional().describe(`Maximum number of images to analyze. Default: ${DEFAULT_LIMIT}.`),
+  },
+  async ({ session_id, prompt, limit: userLimit }) => {
+    const limit = userLimit || DEFAULT_LIMIT
+    const analysisPrompt = prompt || DEFAULT_PROMPT
+    let db
+    try {
+      db = getDatabase()
+    } catch (e) {
+      return {
+        content: [{ type: 'text', text: `Failed to open database: ${e.message}` }],
+        isError: true,
+      }
+    }
+    try {
+      const images = getImages(db, session_id, limit)
+      if (!images.length) {
+        return {
+          content: [{ type: 'text', text: `No images found in session ${session_id}.` }],
+        }
+      }
+      const results = []
+      for (let i = 0; i < images.length; i++) {
+        const img = images[i]
+        try {
+          const desc = await provider.analyze(img.base64, img.mime, analysisPrompt)
+          results.push(`### Image ${i + 1}: ${img.filename}\n\n${desc}`)
+        } catch (e) {
+          results.push(`### Image ${i + 1}: ${img.filename}\n\n[Analysis failed: ${e.message}]`)
+        }
+      }
+      return {
+        content: [
+          {
+            type: 'text',
+            text: `Analyzed ${images.length} image(s):\n\n${results.join('\n\n---\n\n')}`,
+          },
+        ],
+      }
+    } finally {
+      db.close()
+    }
+  },
+)
+const transport = new StdioServerTransport()
+await server.connect(transport)

package/src/opencode.js ADDED Viewed

@@ -0,0 +1,34 @@
+import { readFileSync } from 'node:fs';
+import { homedir } from 'node:os';
+import { join } from 'node:path';
+// Provider ID → base URL mapping
+// Future providers can be added here
+const PROVIDER_REGISTRY = {
+  'zhipuai-coding-plan': { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
+  'zai-coding-plan':     { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
+  'z-ai':                { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
+  'zhipuai':             { baseUrl: 'https://open.bigmodel.cn/api/paas/v4', format: 'openai' },
+};
+export function resolveProviderConfig(providerId, modelId) {
+  // 从 account.json 中读取 API key
+  const accountPath = join(homedir(), '.local', 'share', 'opencode', 'account.json');
+  const accountJson = JSON.parse(readFileSync(accountPath, 'utf-8'));
+  const accountId = accountJson.active?.[providerId];
+  if (!accountId) throw new Error(`Provider "${providerId}" not found in account.json active list`);
+  const account = accountJson.accounts?.[accountId];
+  if (!account) throw new Error(`Account ${accountId} not found for provider "${providerId}"`);
+  const apiKey = account.credential?.key;
+  if (!apiKey) throw new Error(`No API key found for provider "${providerId}"`);
+  // 从注册表中查找 base URL
+  const registry = PROVIDER_REGISTRY[providerId];
+  if (!registry) throw new Error(`Provider "${providerId}" not in PROVIDER_REGISTRY. Available: ${Object.keys(PROVIDER_REGISTRY).join(', ')}. Please add it to src/opencode.js`);
+  return {
+    apiKey,
+    baseUrl: registry.baseUrl,
+    model: modelId,
+  };
+}

package/src/providers/base.js ADDED Viewed

@@ -0,0 +1,9 @@
+export class VisionProvider {
+  constructor(config) {
+    this.config = config
+  }
+  async analyze(base64, mime, prompt) {
+    throw new Error('Not implemented')
+  }
+}

package/src/providers/glm.js ADDED Viewed

@@ -0,0 +1,60 @@
+import { VisionProvider } from './base.js'
+export class GLMProvider extends VisionProvider {
+  constructor(config) {
+    super(config)
+    this.apiKey = config.apiKey
+    this.baseUrl = config.baseUrl
+    this.model = config.model
+    this.timeout = config.timeout || 60_000
+    if (!this.apiKey) {
+      throw new Error('GLM API key not configured.')
+    }
+    if (!this.baseUrl) {
+      throw new Error('GLM base URL not configured.')
+    }
+    if (!this.model) {
+      throw new Error('GLM model not configured.')
+    }
+  }
+  async analyze(base64, mime, prompt) {
+    const ctrl = new AbortController()
+    const timer = setTimeout(() => ctrl.abort(), this.timeout)
+    try {
+      const res = await fetch(`${this.baseUrl}/chat/completions`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Authorization: `Bearer ${this.apiKey}`,
+        },
+        body: JSON.stringify({
+          model: this.model,
+          messages: [
+            {
+              role: 'user',
+              content: [
+                { type: 'image_url', image_url: { url: `data:${mime};base64,${base64}` } },
+                { type: 'text', text: prompt },
+              ],
+            },
+          ],
+          stream: false,
+        }),
+        signal: ctrl.signal,
+      })
+      if (!res.ok) {
+        const t = await res.text().catch(() => '')
+        throw new Error(`GLM API ${res.status}: ${t.slice(0, 200)}`)
+      }
+      const json = await res.json()
+      return json.choices?.[0]?.message?.content?.trim() || '[No content returned]'
+    } finally {
+      clearTimeout(timer)
+    }
+  }
+}

package/src/providers/index.js ADDED Viewed

@@ -0,0 +1,33 @@
+import { GLMProvider } from './glm.js'
+import { resolveProviderConfig } from '../opencode.js'
+// Provider ID → provider class mapping
+const PROVIDER_MAP = {
+  'zhipuai-coding-plan': GLMProvider,
+  'zai-coding-plan': GLMProvider,
+  'z-ai': GLMProvider,
+  'zhipuai': GLMProvider,
+}
+export function createProvider() {
+  const raw = process.env.model || 'zhipuai-coding-plan/glm-4.6v'
+  const slashIdx = raw.indexOf('/')
+  if (slashIdx === -1) {
+    throw new Error(
+      `Invalid VISION_MODEL format: "${raw}". Expected "provider/model", e.g. "zhipuai-coding-plan/glm-4.6v"`,
+    )
+  }
+  const providerId = raw.slice(0, slashIdx)
+  const modelId = raw.slice(slashIdx + 1)
+  const Provider = PROVIDER_MAP[providerId]
+  if (!Provider) {
+    throw new Error(
+      `Unknown provider: ${providerId}. Available: ${Object.keys(PROVIDER_MAP).join(', ')}`,
+    )
+  }
+  const config = resolveProviderConfig(providerId, modelId)
+  config.timeout = Number(process.env.timeout) || undefined
+  return new Provider(config)
+}