npm - @houtini/gemini-mcp - Versions diffs - 1.4.2 → 2.2.0 - Mend

@houtini/gemini-mcp 1.4.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (171) hide show

package/README.md +314 -784
package/claude_desktop_config_example.json +1 -0
package/dist/config/index.d.ts.map +1 -1
package/dist/config/index.js +8 -4
package/dist/config/index.js.map +1 -1
package/dist/config/types.d.ts +5 -0
package/dist/config/types.d.ts.map +1 -1
package/dist/image-viewer/image-viewer-app.html +180 -0
package/dist/image-viewer/src/ui/image-viewer.html +324 -0
package/dist/index-new.d.ts +3 -0
package/dist/index-new.d.ts.map +1 -0
package/dist/index-new.js +7 -0
package/dist/index-new.js.map +1 -0
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +70 -172
package/dist/index.js.map +1 -1
package/dist/landing-page-viewer/src/ui/landing-page-viewer.html +330 -0
package/dist/services/gemini/export.d.ts +5 -0
package/dist/services/gemini/export.d.ts.map +1 -0
package/dist/services/gemini/export.js +5 -0
package/dist/services/gemini/export.js.map +1 -0
package/dist/services/gemini/image-service.d.ts +45 -0
package/dist/services/gemini/image-service.d.ts.map +1 -0
package/dist/services/gemini/image-service.js +248 -0
package/dist/services/gemini/image-service.js.map +1 -0
package/dist/services/gemini/index.d.ts +7 -2
package/dist/services/gemini/index.d.ts.map +1 -1
package/dist/services/gemini/index.js +132 -56
package/dist/services/gemini/index.js.map +1 -1
package/dist/services/gemini/types.d.ts +32 -0
package/dist/services/gemini/types.d.ts.map +1 -1
package/dist/services/gemini/video-service.d.ts +58 -0
package/dist/services/gemini/video-service.d.ts.map +1 -0
package/dist/services/gemini/video-service.js +325 -0
package/dist/services/gemini/video-service.js.map +1 -0
package/dist/services/media-server.d.ts +28 -0
package/dist/services/media-server.d.ts.map +1 -0
package/dist/services/media-server.js +195 -0
package/dist/services/media-server.js.map +1 -0
package/dist/svg-viewer/src/ui/svg-viewer.html +325 -0
package/dist/tools/gemini-chat.d.ts.map +1 -1
package/dist/tools/gemini-chat.js +7 -1
package/dist/tools/gemini-chat.js.map +1 -1
package/dist/tools/gemini-deep-research.d.ts +1 -2
package/dist/tools/gemini-deep-research.d.ts.map +1 -1
package/dist/tools/gemini-deep-research.js +11 -51
package/dist/tools/gemini-deep-research.js.map +1 -1
package/dist/tools/gemini-help.d.ts +3 -0
package/dist/tools/gemini-help.d.ts.map +1 -0
package/dist/tools/gemini-help.js +534 -0
package/dist/tools/gemini-help.js.map +1 -0
package/dist/tools/gemini-prompt-assistant.d.ts +20 -0
package/dist/tools/gemini-prompt-assistant.d.ts.map +1 -0
package/dist/tools/gemini-prompt-assistant.js +129 -0
package/dist/tools/gemini-prompt-assistant.js.map +1 -0
package/dist/tools/generate-landing-page.d.ts +15 -0
package/dist/tools/generate-landing-page.d.ts.map +1 -0
package/dist/tools/generate-landing-page.js +66 -0
package/dist/tools/generate-landing-page.js.map +1 -0
package/dist/tools/generate-svg.d.ts +14 -0
package/dist/tools/generate-svg.d.ts.map +1 -0
package/dist/tools/generate-svg.js +106 -0
package/dist/tools/generate-svg.js.map +1 -0
package/dist/tools/generate-video.d.ts +24 -0
package/dist/tools/generate-video.d.ts.map +1 -0
package/dist/tools/generate-video.js +163 -0
package/dist/tools/generate-video.js.map +1 -0
package/dist/tools/image-prompt-assistant.d.ts +3 -0
package/dist/tools/image-prompt-assistant.d.ts.map +1 -0
package/dist/tools/image-prompt-assistant.js +790 -0
package/dist/tools/image-prompt-assistant.js.map +1 -0
package/dist/tools/load-image-from-path.d.ts +11 -0
package/dist/tools/load-image-from-path.d.ts.map +1 -0
package/dist/tools/load-image-from-path.js +100 -0
package/dist/tools/load-image-from-path.js.map +1 -0
package/dist/tools/prompt-library/charts.d.ts +325 -0
package/dist/tools/prompt-library/charts.d.ts.map +1 -0
package/dist/tools/prompt-library/charts.js +384 -0
package/dist/tools/prompt-library/charts.js.map +1 -0
package/dist/tools/prompt-library/index.d.ts +8 -0
package/dist/tools/prompt-library/index.d.ts.map +1 -0
package/dist/tools/prompt-library/index.js +10 -0
package/dist/tools/prompt-library/index.js.map +1 -0
package/dist/tools/register-analyze-image.d.ts +3 -0
package/dist/tools/register-analyze-image.d.ts.map +1 -0
package/dist/tools/register-analyze-image.js +67 -0
package/dist/tools/register-analyze-image.js.map +1 -0
package/dist/tools/register-chat.d.ts +3 -0
package/dist/tools/register-chat.d.ts.map +1 -0
package/dist/tools/register-chat.js +71 -0
package/dist/tools/register-chat.js.map +1 -0
package/dist/tools/register-deep-research.d.ts +3 -0
package/dist/tools/register-deep-research.d.ts.map +1 -0
package/dist/tools/register-deep-research.js +59 -0
package/dist/tools/register-deep-research.js.map +1 -0
package/dist/tools/register-describe-image.d.ts +3 -0
package/dist/tools/register-describe-image.d.ts.map +1 -0
package/dist/tools/register-describe-image.js +59 -0
package/dist/tools/register-describe-image.js.map +1 -0
package/dist/tools/register-image-gen.d.ts +3 -0
package/dist/tools/register-image-gen.d.ts.map +1 -0
package/dist/tools/register-image-gen.js +235 -0
package/dist/tools/register-image-gen.js.map +1 -0
package/dist/tools/register-landing-page.d.ts +3 -0
package/dist/tools/register-landing-page.d.ts.map +1 -0
package/dist/tools/register-landing-page.js +79 -0
package/dist/tools/register-landing-page.js.map +1 -0
package/dist/tools/register-list-models.d.ts +3 -0
package/dist/tools/register-list-models.d.ts.map +1 -0
package/dist/tools/register-list-models.js +33 -0
package/dist/tools/register-list-models.js.map +1 -0
package/dist/tools/register-load-image.d.ts +3 -0
package/dist/tools/register-load-image.d.ts.map +1 -0
package/dist/tools/register-load-image.js +66 -0
package/dist/tools/register-load-image.js.map +1 -0
package/dist/tools/register-svg.d.ts +3 -0
package/dist/tools/register-svg.d.ts.map +1 -0
package/dist/tools/register-svg.js +84 -0
package/dist/tools/register-svg.js.map +1 -0
package/dist/tools/register-video.d.ts +3 -0
package/dist/tools/register-video.d.ts.map +1 -0
package/dist/tools/register-video.js +118 -0
package/dist/tools/register-video.js.map +1 -0
package/dist/tools/register-viewers.d.ts +8 -0
package/dist/tools/register-viewers.d.ts.map +1 -0
package/dist/tools/register-viewers.js +89 -0
package/dist/tools/register-viewers.js.map +1 -0
package/dist/tools/schemas.d.ts +33 -0
package/dist/tools/schemas.d.ts.map +1 -0
package/dist/tools/schemas.js +39 -0
package/dist/tools/schemas.js.map +1 -0
package/dist/tools/types.d.ts +12 -0
package/dist/tools/types.d.ts.map +1 -0
package/dist/tools/types.js +2 -0
package/dist/tools/types.js.map +1 -0
package/dist/ui/image-viewer.d.ts +2 -0
package/dist/ui/image-viewer.d.ts.map +1 -0
package/dist/ui/image-viewer.js +42 -0
package/dist/ui/image-viewer.js.map +1 -0
package/dist/utils/chart-design-system.d.ts +92 -0
package/dist/utils/chart-design-system.d.ts.map +1 -0
package/dist/utils/chart-design-system.js +235 -0
package/dist/utils/chart-design-system.js.map +1 -0
package/dist/utils/image-compress.d.ts +9 -0
package/dist/utils/image-compress.d.ts.map +1 -0
package/dist/utils/image-compress.js +43 -0
package/dist/utils/image-compress.js.map +1 -0
package/dist/utils/image-utils.d.ts +9 -0
package/dist/utils/image-utils.d.ts.map +1 -0
package/dist/utils/image-utils.js +257 -0
package/dist/utils/image-utils.js.map +1 -0
package/dist/utils/logger.d.ts.map +1 -1
package/dist/utils/logger.js +45 -11
package/dist/utils/logger.js.map +1 -1
package/dist/utils/resolve-images.d.ts +29 -0
package/dist/utils/resolve-images.d.ts.map +1 -0
package/dist/utils/resolve-images.js +56 -0
package/dist/utils/resolve-images.js.map +1 -0
package/dist/utils/tool-wrapper.d.ts +13 -0
package/dist/utils/tool-wrapper.d.ts.map +1 -0
package/dist/utils/tool-wrapper.js +22 -0
package/dist/utils/tool-wrapper.js.map +1 -0
package/dist/utils/video-utils.d.ts +16 -0
package/dist/utils/video-utils.d.ts.map +1 -0
package/dist/utils/video-utils.js +319 -0
package/dist/utils/video-utils.js.map +1 -0
package/dist/video-viewer/src/ui/video-viewer.html +310 -0
package/houtini-logo.jpg +0 -0
package/package.json +24 -8
package/server.json +30 -0

package/README.md CHANGED Viewed

@@ -1,784 +1,314 @@
-# Gemini MCP Server
-[![npm version](https://img.shields.io/npm/v/@houtini/gemini-mcp.svg?style=flat-square)](https://www.npmjs.com/package/@houtini/gemini-mcp)
-[![MCP Registry](https://img.shields.io/badge/MCP-Registry-blue?style=flat-square)](https://registry.modelcontextprotocol.io)
-[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=flat-square)](https://opensource.org/licenses/Apache-2.0)
-[![TypeScript](https://img.shields.io/badge/TypeScript-5.3-blue?style=flat-square&logo=typescript)](https://www.typescriptlang.org/)
-[![MCP](https://img.shields.io/badge/MCP-Compatible-green?style=flat-square)](https://modelcontextprotocol.io)
-A production-ready Model Context Protocol server for Google's Gemini AI models. I've built this with TypeScript and the latest MCP SDK (1.25.3), focusing on real-world reliability rather than feature bloat.
-## What This Does
-This server connects Claude Desktop (or any MCP client) to Google's Gemini models. The integration is straightforward: chat with Gemini, get model information, and run deep research tasks with Google Search grounding built in.
-What I think matters here: the server discovers available models automatically from Google's API, which means you're always working with the latest releases without updating configuration files. No hardcoded model lists that go stale.
-## Quick Start
-The simplest way to use this is with `npx` - no installation required:
-```bash
-# Get your API key from Google AI Studio first
-# https://makersuite.google.com/app/apikey
-# Test it works (optional)
-npx @houtini/gemini-mcp
-# Add to Claude Desktop (configuration below)
-```
-## Installation Options
-### Recommended: npx (No Installation)
-```bash
-npx @houtini/gemini-mcp
-```
-This approach pulls the latest version automatically. I prefer this because you don't clutter your system with global packages, and updates happen transparently.
-### Alternative: Global Installation
-```bash
-npm install -g @houtini/gemini-mcp
-gemini-mcp
-```
-### Alternative: Local Project
-```bash
-npm install @houtini/gemini-mcp
-npx @houtini/gemini-mcp
-```
-### From Source (Developers)
-```bash
-git clone https://github.com/houtini-ai/gemini-mcp.git
-cd gemini-mcp
-npm install
-npm run build
-npm start
-```
-## Configuration
-### Step 1: Get Your API Key
-Visit [Google AI Studio](https://makersuite.google.com/app/apikey) to create a free API key. This takes about 30 seconds.
-### Step 2: Configure Claude Desktop
-Add this to your Claude Desktop config file:
-**Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
-**macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
-#### Using npx (Recommended)
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "npx",
-      "args": ["@houtini/gemini-mcp"],
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here"
-      }
-    }
-  }
-}
-```
-#### Using Global Installation
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "gemini-mcp",
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here"
-      }
-    }
-  }
-}
-```
-Requires `npm install -g @houtini/gemini-mcp` first.
-#### Using Local Build
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "node",
-      "args": ["./node_modules/@houtini/gemini-mcp/dist/index.js"],
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here"
-      }
-    }
-  }
-}
-```
-Only works if installed locally in the current directory.
-### Step 3: Restart Claude Desktop
-After updating the config, restart Claude Desktop. The server loads on startup.
-### Optional: Additional Configuration
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "npx",
-      "args": ["@houtini/gemini-mcp"],
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here",
-        "LOG_LEVEL": "info",
-        "GEMINI_ALLOW_EXPERIMENTAL": "false"
-      }
-    }
-  }
-}
-```
-**Environment Variables:**
-| Variable | Default | What It Does |
-|----------|---------|--------------|
-| `GEMINI_API_KEY` | *required* | Your Google AI Studio API key |
-| `LOG_LEVEL` | `info` | Logging detail: `debug`, `info`, `warn`, `error` |
-| `GEMINI_ALLOW_EXPERIMENTAL` | `false` | Include experimental models (set `true` to enable) |
-## Dynamic Model Discovery
-The server automatically discovers available Gemini models from Google's API on first use. This happens transparently - you don't need to configure anything.
-### How It Works
-1. Server starts instantly with reliable fallback models
-2. First request triggers model discovery from Google's API (adds 1-2 seconds once)
-3. Subsequent requests use the discovered models (no delay)
-4. If discovery fails, fallback models work immediately
-What I've found: this approach keeps you current with Google's releases whilst maintaining instant startup. The server filters to stable production models by default, which avoids experimental model rate limits.
-### What Gets Discovered
-- All available Gemini models (stable and experimental)
-- Accurate context window sizes directly from Google
-- Model capabilities and recommended use cases
-- Latest releases as soon as Google makes them available
-The default model selection prioritises: stable models over experimental, newest version available, Flash variants for speed, and capability matching for your request type.
-### Performance Impact
-- Startup: 0ms (instant)
-- First request: +1-2 seconds (one-time discovery)
-- Subsequent requests: 0ms overhead
-- Discovery failure: 0ms (uses fallback immediately)
-Check your logs after first request to see what was discovered:
-```
-Models discovered from API (count: 38, defaultModel: gemini-2.5-flash)
-```
-## Experimental Models
-By default, the server uses stable production models. This ensures reliable performance and avoids Google's stricter rate limits on experimental releases.
-### Stable vs Experimental
-**Stable Models** (default behaviour):
-- Production-ready
-- Better rate limits
-- Consistent performance
-- Examples: `gemini-2.5-flash`, `gemini-2.5-pro`, `gemini-2.0-flash`
-**Experimental Models** (opt-in):
-- Latest features before stable release
-- Stricter rate limits
-- Potentially unexpected behaviour
-- Can be deprecated quickly
-- Examples: `gemini-exp-1206`, `gemini-2.0-flash-thinking-exp`
-### Enabling Experimental Models
-Set `GEMINI_ALLOW_EXPERIMENTAL=true` in your configuration:
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "npx",
-      "args": ["@houtini/gemini-mcp"],
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here",
-        "GEMINI_ALLOW_EXPERIMENTAL": "true"
-      }
-    }
-  }
-}
-```
-This includes experimental models in discovery and makes them eligible as defaults. You can still explicitly request any model regardless of this setting - the flag only affects which models are used automatically.
-### When to Enable
-Keep experimental disabled if you need reliable, consistent performance or you're building production applications.
-Enable experimental if you're testing cutting-edge features, doing research, or you understand the rate limit trade-offs.
-## Usage Examples
-### Basic Chat
-```
-Can you help me understand quantum computing using Gemini?
-```
-Claude automatically uses the `gemini_chat` tool.
-### Creative Writing
-```
-Use Gemini to write a short story about artificial intelligence discovering creativity.
-```
-### Technical Analysis
-```
-Use Gemini Pro to explain the differences between various machine learning algorithms.
-```
-### Model Selection
-```
-Use Gemini 1.5 Pro to analyse this code and suggest improvements.
-```
-### Getting Model Information
-```
-Show me all available Gemini models and their capabilities.
-```
----
-## Complete Prompting Guide
-Check the **[Comprehensive Prompting Guide](PROMPTING_GUIDE.md)** for:
-- Advanced prompting techniques
-- Model selection strategies
-- Parameter tuning (temperature, tokens, system prompts)
-- Using Google Search grounding
-- Creative workflows and use cases
-- Best practices
-- Troubleshooting
-**[Read the Prompting Guide](PROMPTING_GUIDE.md)**
----
-## Google Search Grounding
-Google Search grounding is built in and enabled by default. This gives Gemini models access to current web information, which significantly improves accuracy for questions requiring up-to-date data.
-### What It Does
-When you ask a question that benefits from current information:
-1. Analyses your query to determine if web search helps
-2. Generates relevant search queries automatically
-3. Performs Google searches using targeted queries
-4. Processes results and synthesises information
-5. Provides enhanced response with inline citations
-6. Shows search metadata including queries used
-### Best Use Cases
-**Current Events & News**
-```
-What are the latest developments in AI announced this month?
-Recent breakthroughs in quantum computing research?
-```
-**Real-time Data**
-```
-Current stock prices for major tech companies
-Today's weather forecast for London
-```
-**Recent Developments**
-```
-New software releases this week
-Latest scientific discoveries in medicine
-```
-**Fact Checking**
-```
-Verify recent statements about climate change
-Check the latest statistics on global internet usage
-```
-### Controlling Grounding
-Grounding is enabled by default. Disable it for purely creative or hypothetical responses:
-```
-Use Gemini without web search to write a fictional story about dragons in space.
-```
-For API calls, use the `grounding` parameter:
-```json
-{
-  "message": "Write a creative story about time travel",
-  "grounding": false
-}
-```
-### Understanding Grounded Responses
-Grounded responses include source citations and search transparency:
-```
-Sources: (https://example.com/article1) (https://example.com/article2)
-Search queries used: latest AI developments 2025, OpenAI GPT-5 release
-```
-What I've found: grounding dramatically reduces hallucinations for factual queries whilst maintaining creative flexibility when you need it.
-## Deep Research
-The server includes deep research capability that performs iterative multi-step research on complex topics. This synthesises comprehensive reports with proper citations.
-### How It Works
-Deep research conducts multiple research iterations:
-1. Initial broad exploration
-2. Gap analysis identifying what's missing
-3. Targeted research into specific areas
-4. Synthesis into comprehensive report
-5. Iteration until thorough coverage
-### Using Deep Research
-```
-Use Gemini deep research to investigate the impact of quantum computing on cybersecurity.
-```
-With parameters:
-```
-Use Gemini deep research with 7 iterations to create a comprehensive report on renewable energy trends, focusing on solar and wind power adoption rates.
-```
-### Research Parameters
-| Parameter | Type | Default | What It Does |
-|-----------|------|---------|--------------|
-| `research_question` | string | *required* | The topic to investigate |
-| `max_iterations` | integer | 5 | Research cycles (3-10) |
-| `focus_areas` | array | - | Specific aspects to emphasise |
-| `model` | string | *latest stable* | Which model to use |
-### Best For
-- Academic research and literature reviews
-- Market analysis and competitive intelligence
-- Technology trend analysis
-- Policy research and impact assessments
-- Multi-faceted business problems
-### Configuring Iterations by Environment
-Different AI environments have different timeout tolerances:
-**Claude Desktop (3-5 iterations recommended)**
-- Timeout: ~4 minutes
-- Safe maximum: 5 iterations
-- Use 3-4 for most tasks
-**Agent SDK / IDEs (7-10 iterations recommended)**
-- Timeout: 10+ minutes
-- Maximum: 10 iterations
-- Use 7-10 for comprehensive research
-**AI Platforms like Cline, Roo-Cline (7-10 iterations)**
-- Similar to Agent SDK
-- Can handle longer processes
-### Handling Timeouts
-If you hit timeout or thread limits:
-1. Reduce iterations (start with 3)
-2. Narrow focus using `focus_areas` parameter
-3. Split complex topics into smaller research tasks
-4. Check which environment you're using
-Example with focused research:
-```
-Use Gemini deep research with 3 iterations focusing on cost analysis and market adoption to examine solar panel technology trends.
-```
-Deep research takes several minutes. It's designed for comprehensive analysis rather than quick answers.
-## API Reference
-### gemini_chat
-Chat with Gemini models.
-**Parameters:**
-| Parameter | Type | Required | Default | What It Does |
-|-----------|------|----------|---------|--------------|
-| `message` | string | Yes | - | The message to send |
-| `model` | string | No | *Latest stable* | Which model to use |
-| `temperature` | number | No | 0.7 | Randomness (0.0-1.0) |
-| `max_tokens` | integer | No | 8192 | Maximum response length (1-32768) |
-| `system_prompt` | string | No | - | System instruction |
-| `grounding` | boolean | No | true | Enable Google Search |
-**Example:**
-```json
-{
-  "message": "What are the latest developments in quantum computing?",
-  "model": "gemini-1.5-pro",
-  "temperature": 0.5,
-  "max_tokens": 1000,
-  "system_prompt": "You are a technology expert. Provide current information with sources.",
-  "grounding": true
-}
-```
-### gemini_list_models
-Retrieve information about discovered Gemini models.
-**Parameters:** None required
-**Example:**
-```json
-{}
-```
-**Response includes:**
-- Model names and display names
-- Descriptions of strengths
-- Context window sizes from Google
-- Recommended use cases
-### gemini_deep_research
-Conduct iterative multi-step research.
-**Parameters:**
-| Parameter | Type | Required | Default | What It Does |
-|-----------|------|----------|---------|--------------|
-| `research_question` | string | Yes | - | Topic to research |
-| `max_iterations` | integer | No | 5 | Research cycles (3-10) |
-| `focus_areas` | array | No | - | Specific areas to emphasise |
-| `model` | string | No | *Latest stable* | Model to use |
-**Example:**
-```json
-{
-  "research_question": "Impact of AI on healthcare diagnostics",
-  "max_iterations": 7,
-  "focus_areas": ["accuracy improvements", "cost implications", "regulatory challenges"]
-}
-```
-### Available Models
-Models are dynamically discovered from Google's API. Typical available models:
-| Model | Best For | Description |
-|-------|----------|-------------|
-| **gemini-2.5-flash** | General use | Latest Flash - fast, versatile |
-| **gemini-2.5-pro** | Complex reasoning | Latest Pro - advanced capabilities |
-| **gemini-2.0-flash** | Speed-optimised | Gemini 2.0 Flash - efficient |
-| **gemini-1.5-flash** | Quick responses | Gemini 1.5 Flash - fast |
-| **gemini-1.5-pro** | Large context | 2M token context window |
-Use `gemini_list_models` to see exact available models with current context limits.
-## Development
-### Building from Source
-```bash
-git clone https://github.com/houtini-ai/gemini-mcp.git
-cd gemini-mcp
-npm install
-npm run build
-npm run dev
-```
-### Scripts
-| Command | What It Does |
-|---------|--------------|
-| `npm run build` | Compile TypeScript |
-| `npm run dev` | Development mode with live reload |
-| `npm start` | Run compiled server |
-| `npm test` | Run tests |
-| `npm run lint` | Check code style |
-| `npm run lint:fix` | Fix linting issues |
-### Project Structure
-```
-src/
-├── config/           # Configuration management
-├── services/         # Business logic
-│   └── gemini/       # Gemini API integration
-├── tools/            # MCP tool implementations
-├── utils/            # Logger and error handling
-├── cli.ts            # CLI entry
-└── index.ts          # Main server
-```
-### Architecture
-The server follows clean, layered architecture:
-1. CLI Layer - Command-line interface
-2. Server Layer - MCP protocol handling
-3. Tools Layer - MCP tool implementations
-4. Service Layer - Business logic and API integration
-5. Utility Layer - Logging and error handling
-## Troubleshooting
-### "GEMINI_API_KEY environment variable not set"
-Check your Claude Desktop configuration includes the API key in the `env` section.
-### Server Not Appearing in Claude Desktop
-1. Restart Claude Desktop after configuration changes
-2. Verify config file path:
-   - Windows: `%APPDATA%\Claude\claude_desktop_config.json`
-   - macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
-3. Validate JSON syntax
-4. Test your API key at [Google AI Studio](https://makersuite.google.com/app/apikey)
-### "Module not found" with npx
-```bash
-# Clear npx cache
-npx --yes @houtini/gemini-mcp
-# Or install globally
-npm install -g @houtini/gemini-mcp
-```
-### Node.js Version Issues
-```bash
-# Check version
-node --version
-# Should be v18.0.0 or higher
-# Update from https://nodejs.org
-```
-### Debug Mode
-Enable detailed logging:
-```json
-{
-  "mcpServers": {
-    "gemini": {
-      "command": "npx",
-      "args": ["@houtini/gemini-mcp"],
-      "env": {
-        "GEMINI_API_KEY": "your-api-key-here",
-        "LOG_LEVEL": "debug"
-      }
-    }
-  }
-}
-```
-### Log Files
-Logs are written to:
-- Console output (Claude Desktop developer tools)
-- `logs/combined.log` - All levels
-- `logs/error.log` - Errors only
-### Testing Your Setup
-Test with these queries:
-1. "Can you list the available Gemini models?"
-2. "Use Gemini to explain photosynthesis."
-3. "Use Gemini 1.5 Pro with temperature 0.9 to write a creative poem about coding."
-### Performance Tuning
-For better performance:
-- Adjust token limits based on your use case
-- Use appropriate models (Flash for speed, Pro for complexity)
-- Monitor logs for rate limiting issues
-- Set temperature values appropriately (0.7 balanced, 0.3 focused, 0.9 creative)
-## Contributing
-Contributions welcome. Follow these steps:
-1. Fork the repository
-2. Create a feature branch: `git checkout -b feature/amazing-feature`
-3. Make your changes and add tests
-4. Run tests: `npm test`
-5. Lint: `npm run lint:fix`
-6. Build: `npm run build`
-7. Commit: `git commit -m 'Add amazing feature'`
-8. Push: `git push origin feature/amazing-feature`
-9. Open a Pull Request
-### Development Guidelines
-- Follow TypeScript best practices
-- Add tests for new functionality
-- Update documentation
-- Use conventional commit messages
-- Maintain backwards compatibility
-## Technical Details
-### Migration to MCP SDK 1.25.3
-This server has been migrated to the latest MCP SDK (1.25.3) with ES modules support. Key technical changes:
-**SDK Updates:**
-- Migrated from `Server` class to `McpServer` API
-- Tool registration uses `registerTool` with Zod validation
-- ES modules throughout (`"type": "module"`)
-- TypeScript configured for `nodenext` module resolution
-**Compatibility:**
-- Node.js 18+ (changed from 24+ for broader compatibility)
-- All imports use `.js` extensions for ES module compliance
-- Zod schemas for runtime type validation
-- Modern MCP protocol implementation
-**Build System:**
-- TypeScript compiles to ES2022 modules
-- Clean separation between business logic and MCP interface
-- Preserved all Gemini API client functionality
-What this means practically: the server now follows modern Node.js and MCP standards, which should prevent compatibility issues with future Claude Desktop updates whilst maintaining all existing functionality.
-## Licence
-This project is licensed under the Apache 2.0 Licence - see the [LICENSE](LICENSE) file for details.
-## Disclaimer
-**Use at Your Own Risk**: This software is provided "as is" without warranty. The authors accept no responsibility for damages, data loss, or other issues arising from use.
-**Content Safety**: This server interfaces with Google's Gemini AI models. Whilst content safety settings are implemented, AI-generated content quality cannot be guaranteed. Users are responsible for reviewing AI output before use and ensuring compliance with applicable laws.
-**API Key Security**: Your Google Gemini API key is sensitive. Keep it confidential, don't commit it to version control, rotate if exposed, and manage API usage costs.
-**Data Privacy**: This server processes data through the Model Context Protocol. Avoid sending sensitive or confidential information. Review Google's privacy policy and implement appropriate data handling.
-**Production Use**: Users deploying in production should conduct security audits, implement monitoring, have incident response procedures, and regularly update dependencies.
-**Third-Party Services**: This software relies on external services (Google Gemini API, npm packages). Service availability, pricing, and functionality may change.
-**No Professional Advice**: AI-generated content should not be considered professional advice (legal, medical, financial) without verification by qualified professionals.
-By using this software, you acknowledge these terms and agree to use at your own risk.
-## Support
-- **GitHub Issues**: [Report bugs or request features](https://github.com/houtini-ai/gemini-mcp/issues)
-- **GitHub Discussions**: [Ask questions or share ideas](https://github.com/houtini-ai/gemini-mcp/discussions)
-## Changelog
-### v1.3.2 - Node.js 18+ Compatibility & Modern SDK
-**Breaking Changes:** None (all tool interfaces preserved)
-**Technical Updates:**
-- Updated to MCP SDK 1.25.3 (from 1.19.1)
-- Migrated to ES modules (`"type": "module"`)
-- Changed Node.js requirement to >=18.0.0 (from >=24.0.0) for broader compatibility
-- Migrated from `Server` to `McpServer` API
-- Implemented Zod schema validation for all tools
-- Updated TypeScript config to `nodenext` module resolution
-**Fixes:**
-- Resolved Node.js v24 ERR_MODULE_NOT_FOUND errors
-- Fixed TypeScript compilation with DOM types for fetch API
-- All imports now use `.js` extensions for ES module compliance
-**What This Means:**
-The server now works reliably with Node.js 18, 20, 22, and 24. All existing functionality preserved - this is purely a technical infrastructure update for better compatibility.
-### v1.1.0 - Deep Research & Enhanced Discovery
-**New Features:**
-- Added deep research capability for iterative analysis
-- Enhanced model discovery with better filtering
-- Improved default model selection logic
-- Better handling of experimental vs stable models
-### v1.0.4 - Security & Dependencies
-**Updates:**
-- Updated @google/generative-ai to v0.24.1
-- Updated @modelcontextprotocol/sdk to v1.19.1
-- Changed safety settings to BLOCK_MEDIUM_AND_ABOVE
-- Added comprehensive disclaimer
-- Zero vulnerabilities in dependencies
-### v1.0.3 - Enhanced Grounding
-**Improvements:**
-- Fixed grounding metadata field names
-- Enhanced source citation processing
-- Improved grounding reliability
-- Better error handling for grounding
-### v1.0.2 - Google Search Grounding
-**New Features:**
-- Added Google Search grounding (enabled by default)
-- Real-time web search integration
-- Source citations in responses
-- Configurable grounding parameter
-### v1.0.0 - Initial Release
-**Core Features:**
-- Complete TypeScript rewrite
-- Professional modular architecture
-- Comprehensive error handling
-- Full MCP protocol compliance
-- Multiple Gemini model support
-- NPM package distribution
-- Production-ready build system
----
-**Built for the Model Context Protocol community**
-For more about MCP, visit [modelcontextprotocol.io](https://modelcontextprotocol.io)
+# @houtini/gemini-mcp
+[![npm version](https://img.shields.io/npm/v/@houtini/gemini-mcp.svg?style=flat-square)](https://www.npmjs.com/package/@houtini/gemini-mcp)
+[![MCP Registry](https://img.shields.io/badge/MCP-Registry-blue?style=flat-square)](https://registry.modelcontextprotocol.io)
+**I've been running this MCP server in my Claude Desktop setup for several months, and it's one of the few I leave enabled permanently.** Not because Gemini replaces Claude -- it doesn't -- but because grounded search, deep research, image generation, and video are things Gemini does well. Having them as tools inside Claude beats switching between browser tabs.
+Thirteen tools. One `npx` command.
+### MCP App previews
+Generated images and diagrams render inline in Claude Desktop with zoom controls, file paths, and prompt context:
+| Image generation | SVG / diagram generation |
+|:---:|:---:|
+| ![Image preview in MCP App](image-preview-mcp-app.jpg) | ![Diagram preview in MCP App](diagram-preview-mcp-app.jpg) |
+---
+## Get started in two minutes
+**Step 1: Get a Gemini API key**
+Go to [Google AI Studio](https://aistudio.google.com/apikey) and create one. The free tier covers most development use -- you'll hit rate limits on deep research if you're hammering it, but for day-to-day work it's fine.
+**Step 2: Add to your Claude Desktop config**
+Config file locations:
+- Windows: `C:\Users\{username}\AppData\Roaming\Claude\claude_desktop_config.json`
+- macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
+```json
+{
+  "mcpServers": {
+    "gemini": {
+      "command": "npx",
+      "args": ["@houtini/gemini-mcp"],
+      "env": {
+        "GEMINI_API_KEY": "your-api-key-here"
+      }
+    }
+  }
+}
+```
+**Step 3: Restart Claude Desktop**
+That's it. The tools show up automatically. `npx` pulls the package on first run -- no separate install.
+### Local build instead
+For development, or if you'd rather not rely on npx:
+```bash
+git clone https://github.com/houtini-ai/gemini-mcp
+cd gemini-mcp
+npm install --include=dev
+npm run build
+```
+Then point your config at the local build:
+```json
+{
+  "mcpServers": {
+    "gemini": {
+      "command": "node",
+      "args": ["C:/path/to/gemini-mcp/dist/index.js"],
+      "env": {
+        "GEMINI_API_KEY": "your-api-key-here"
+      }
+    }
+  }
+}
+```
+---
+## What it does
+### Chat with Google Search grounding
+```
+Use gemini:gemini_chat to ask: "What changed in the MCP spec in the last month?"
+```
+Grounding is on by default. Gemini searches Google before answering, so you get current information rather than training data cutoff answers. Sources come back as markdown links.
+For questions where you want reasoning over live search -- "explain this code" or similar -- set `grounding: false`.
+Supports `thinking_level` on Gemini 3 models: `high` for maximum reasoning depth, `low` to keep it fast, `medium`/`minimal` on Gemini 3 Flash only.
+### Deep research
+```
+Use gemini:gemini_deep_research with:
+  research_question="What are the current approaches to AI agent memory management?"
+  max_iterations=5
+```
+Runs multiple grounded search iterations, then synthesises a full report. Takes 2-5 minutes depending on complexity. Worth it for anything where you need comprehensive coverage rather than a quick answer.
+Set `max_iterations` to 3-4 in Claude Desktop (4-minute tool timeout). In IDEs (Cursor, Windsurf, VS Code) or agent frameworks with longer timeout tolerance, 7-10 iterations produces noticeably better synthesis. Pass `focus_areas` as an array to steer toward specific angles.
+### Image generation with search grounding
+```
+Use gemini:generate_image with:
+  prompt="Stock price chart showing Apple (AAPL) closing prices for the last 5 trading days"
+  use_search=true
+  aspectRatio="16:9"
+```
+Default model is `gemini-3-pro-image-preview` (Nano Banana Pro). Also supports `gemini-2.5-flash-image` for faster generation.
+When `use_search=true`, Gemini searches Google for current data before generating. Financial and news queries work reliably and return 2-5 grounding sources as markdown links. Weather queries are inconsistent (Gemini API limitation, not a code issue).
+### Video generation with Veo 3.1
+```
+Use gemini:generate_video with:
+  prompt="A close-up shot of a futuristic coffee machine brewing a glowing blue espresso, steam rising dramatically. Cinematic lighting."
+  resolution="1080p"
+  durationSeconds=8
+```
+Uses Google's Veo 3.1 model. Generates 4-8 second videos at up to 4K resolution with native synchronised audio. Processing takes 2-5 minutes -- the tool polls automatically until the video is ready.
+Options worth knowing about:
+- `aspectRatio` -- `16:9` (landscape, default) or `9:16` (portrait/vertical)
+- `generateAudio` -- on by default, produces dialogue and sound effects matching the prompt
+- `sampleCount` -- generate up to 4 variations in one call
+- `seed` -- for deterministic output across runs
+- `generateThumbnail` -- extracts a frame via ffmpeg (needs ffmpeg in PATH)
+- `generateHTMLPlayer` -- creates a local HTML player alongside the video
+### SVG generation
+```
+Use gemini:generate_svg with:
+  prompt="Architecture diagram showing a microservices system with API gateway, three services, and a shared database"
+  style="technical"
+  width=1000
+  height=600
+```
+Generates clean, production-ready SVG code for diagrams, illustrations, icons, and data visualisations. Styles: `technical` (diagrams), `artistic` (illustrations), `minimal` (simple), `data-viz` (charts).
+### Image editing and analysis
+**Conversational editing** -- Gemini 3 Pro Image maintains context across editing turns using thought signatures. The server captures these automatically. Pass them back on subsequent edit calls for full continuity:
+```
+Use gemini:edit_image with:
+  prompt="Change the colour scheme to blue and green"
+  images=[{data: imageBase64, mimeType: "image/png", thoughtSignature: "fromPreviousCall"}]
+```
+Skip thought signatures and each edit starts from scratch.
+**Analysis** -- two tools for different purposes:
+- `describe_image` -- Fast general descriptions using Gemini 3 Flash
+- `analyze_image` -- Structured extraction and detailed reasoning using Gemini 3.1 Pro
+**Load local files:**
+```
+Use gemini:load_image_from_path with filePath="C:/screenshots/error.png"
+```
+Returns base64 data ready for any image tool.
+### Media resolution control
+Reduce token usage by up to 75% whilst maintaining quality:
+| Level | Tokens | Savings | Best for |
+|-------|--------|---------|----------|
+| `MEDIA_RESOLUTION_LOW` | 280 | 75% | Simple tasks, bulk operations |
+| `MEDIA_RESOLUTION_MEDIUM` | 560 | 50% | PDFs/documents (OCR saturates here) |
+| `MEDIA_RESOLUTION_HIGH` | 1120 | default | Detailed analysis |
+| `MEDIA_RESOLUTION_ULTRA_HIGH` | 2000+ | per-image only | Maximum detail |
+For PDF OCR, MEDIUM gives identical text extraction quality to HIGH at half the tokens. Set `global_media_resolution` to apply to all images, or override per-image with `mediaResolution`.
+### Landing page generation
+```
+Use gemini:generate_landing_page with:
+  brief="A SaaS tool that helps developers monitor API latency"
+  companyName="PingWatch"
+  primaryColour="#6366F1"
+  style="startup"
+  sections=["hero", "features", "pricing", "cta"]
+```
+Returns a self-contained HTML file -- inline CSS and vanilla JS, no external dependencies. Styles: `minimal`, `bold`, `corporate`, `startup`.
+### Professional chart design systems
+The `gemini_prompt_assistant` tool includes 9 professional chart design systems:
+| System | Inspiration | Best for |
+|--------|------------|----------|
+| **storytelling** | Cole Nussbaumer Knaflic | Executive presentations -- everything muted except one bold highlight |
+| **financial** | Financial Times | Editorial journalism -- FT Pink background, serif titles |
+| **terminal** | Bloomberg / Fintech | High-density dark mode with electric neon |
+| **modernist** | W.E.B. Du Bois | Bold geometric blocks, stark contrasts |
+| **professional** | IBM Carbon / Tailwind | Enterprise dashboards |
+| **editorial** | FiveThirtyEight / Economist | Data journalism |
+| **scientific** | Nature / Science | Academic rigour |
+| **minimal** | Edward Tufte | Maximum data-ink ratio |
+| **dark** | Observable | Modern dark mode |
+```
+Use gemini:gemini_prompt_assistant with:
+  request_type="template"
+  use_case="product"
+  desired_outcome="Generate a professional product comparison chart"
+```
+### Help system
+```
+Use gemini:gemini_help with topic="overview"
+```
+Documentation for all features without leaving Claude. Topics: `overview`, `image_generation`, `image_editing`, `image_analysis`, `chat`, `deep_research`, `grounding`, `media_resolution`, `models`, `all`.
+---
+## Image output and storage
+**Default behaviour:** Images return as inline base64 previews (quality 100, 1024px) rendered directly in Claude.
+**Persistent storage:** Set `GEMINI_IMAGE_OUTPUT_DIR` to auto-save all generated images:
+```json
+"env": {
+  "GEMINI_API_KEY": "your-api-key-here",
+  "GEMINI_IMAGE_OUTPUT_DIR": "C:/Users/username/Pictures/gemini-output"
+}
+```
+Every image saves with a timestamp filename. The tool returns both the inline preview and the file path.
+**Per-call override:** Pass `outputPath` on any generation tool to save to a specific location.
+The server uses a two-tier compression approach to handle the MCP protocol's ~1MB JSON-RPC limit whilst preserving full-resolution files on disk:
+| Tier | Quality | Max dimension | Purpose |
+|------|---------|---------------|---------|
+| **Full-res** | Original | Original | Saved to disk |
+| **Viewer preview** | 100 | 1024px | MCP App inline preview (~400KB) |
+Gemini returns 2-5MB images. The full image is saved to disk immediately, and a compressed preview is created for the MCP App viewer.
+---
+## Configuration reference
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `GEMINI_API_KEY` | Yes | -- | Google AI API key from [AI Studio](https://aistudio.google.com/apikey) |
+| `GEMINI_DEFAULT_MODEL` | No | `gemini-3.1-pro-preview` | Default model for `gemini_chat` and `analyze_image` |
+| `GEMINI_DEFAULT_GROUNDING` | No | `true` | Enable Google Search grounding by default |
+| `GEMINI_IMAGE_OUTPUT_DIR` | No | -- | Auto-save directory for generated images |
+| `GEMINI_ALLOW_EXPERIMENTAL` | No | `false` | Include experimental/preview models in auto-discovery |
+| `GEMINI_MCP_LOG_FILE` | No | `false` | Write logs to `~/.gemini-mcp/logs/` |
+| `DEBUG_MCP` | No | `false` | Log to stderr for debugging tool calls |
+---
+## Tools reference
+| Tool | Description |
+|------|-------------|
+| `gemini_chat` | Chat with Gemini 3.1 Pro. Google Search grounding on by default. Supports `thinking_level` for Gemini 3 |
+| `gemini_deep_research` | Multi-step iterative research with Google Search. Synthesises comprehensive reports |
+| `gemini_list_models` | Lists available models from the API |
+| `gemini_help` | Documentation for all features without leaving Claude |
+| `gemini_prompt_assistant` | Expert guidance for image generation with 9 chart design systems |
+| `generate_image` | Image generation with search grounding and thought signatures for conversational editing |
+| `edit_image` | Edit images with natural-language instructions. Supports multi-turn continuity |
+| `describe_image` | Fast image descriptions using Gemini 3 Flash |
+| `analyze_image` | Structured extraction and analysis using Gemini 3.1 Pro |
+| `load_image_from_path` | Read a local image file and return base64 for any image tool |
+| `generate_video` | Video generation with Veo 3.1 -- 4-8 seconds at up to 4K with native audio |
+| `generate_svg` | Production-ready SVG graphics for diagrams, illustrations, and data visualisations |
+| `generate_landing_page` | Self-contained HTML landing pages with inline CSS/JS |
+---
+## Model reference
+| Model | Used by | Notes |
+|-------|---------|-------|
+| `gemini-3.1-pro-preview` | `gemini_chat`, `analyze_image` | Default. Advanced reasoning |
+| `gemini-3-pro-image-preview` | `generate_image`, `edit_image` | Nano Banana Pro -- highest quality generation |
+| `gemini-2.5-flash-image` | `generate_image` (optional) | Faster generation, higher volume |
+| `gemini-3-flash-preview` | `describe_image` | Fast general descriptions |
+| `veo-3.1-generate-preview` | `generate_video` | Veo 3.1 -- 4K video with native audio |
+**Gemini 3 notes:** Temperature is forced to 1.0 on Gemini 3 models (Google's requirement -- lower values cause looping). Thought signatures are captured automatically for conversational image editing. Thinking level only applies to `gemini_chat`.
+---
+## Requirements
+- Node.js 18+
+- A Gemini API key from [Google AI Studio](https://aistudio.google.com/apikey)
+- ffmpeg (optional, for video thumbnail extraction)
+## Licence
+Apache-2.0