npm - @rlabs-inc/gemini-mcp - Versions diffs - 0.5.0 → 0.6.0 - Mend

@rlabs-inc/gemini-mcp 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +29 -14
package/dist/gemini-client.d.ts +2 -0
package/dist/gemini-client.js +2 -2
package/dist/index.js +5 -1
package/dist/tools/brainstorm.js +4 -2
package/dist/tools/deep-research.d.ts +11 -0
package/dist/tools/deep-research.js +236 -0
package/dist/tools/token-count.d.ts +10 -0
package/dist/tools/token-count.js +70 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -4,33 +4,45 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
 [![npm version](https://badge.fury.io/js/@rlabs-inc%2Fgemini-mcp.svg)](https://www.npmjs.com/package/@rlabs-inc/gemini-mcp)
-## What's New in v0.4.0
+## What's New in v0.5.1
-**20+ tools** for comprehensive Gemini 3 integration:
+**30+ tools** for comprehensive Gemini 3 integration - the most complete Gemini MCP server available!
+**Text-to-Speech (NEW!):**
+- **gemini-speak** - Convert text to speech with 30 unique voices
+- **gemini-dialogue** - Generate two-speaker conversations
+- **gemini-list-voices** - Browse all available voices
+**URL Analysis (NEW!):**
+- **gemini-analyze-url** - Analyze web pages with questions
+- **gemini-compare-urls** - Compare two URLs side by side
+- **gemini-extract-from-url** - Extract structured data from pages
+**Context Caching (NEW!):**
+- **gemini-create-cache** - Cache large documents for repeated queries
+- **gemini-query-cache** - Query cached content efficiently
+- **gemini-list-caches** / **gemini-delete-cache** - Manage caches
 **Multimodal Analysis:**
 - **YouTube Analysis** - Analyze videos by URL with timestamps and clipping
 - **Document Analysis** - PDFs, DOCX, spreadsheets with table extraction
 **Generation & Editing:**
-- **4K Image Generation** - Up to 4K resolution with Nano Banana Pro
-- **10 Aspect Ratios** - 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
+- **4K Image Generation** - Up to 4K resolution with 10 aspect ratios
 - **Multi-Turn Image Editing** - Iteratively refine images through conversation
-- **Google Search Grounding** - Ground images in real-world information
+- **Video Generation** - Create videos with Veo 2.0
 **Advanced Tools:**
-- **Code Execution** - Gemini writes AND runs Python code (pandas, matplotlib, numpy, etc.)
+- **Code Execution** - Gemini writes AND runs Python code
 - **Google Search** - Real-time web information with citations
 - **Structured Output** - JSON schema responses with validation
-- **Data Extraction** - Extract entities, facts, sentiment from text
-**Core Improvements:**
-- **Thinking Levels** - Control reasoning depth: minimal, low, medium, high
-- **Gemini 3 Models** - Updated to latest frontier models
+- **Brainstorming** - Claude + Gemini collaborative problem-solving
 ### Previous Versions
-**v0.3.0:** Phase 2-3 features (thinking levels, code execution, search)
+**v0.5.0:** 30+ tools, TTS, URL analysis, caching
+**v0.4.0:** YouTube, documents, code execution, search
+**v0.3.0:** Thinking levels, structured output
 **v0.2.0:** Image/Video generation with Veo
 ---
@@ -39,18 +51,21 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
 | Feature                       | Description                                                     |
 |-------------------------------|-----------------------------------------------------------------|
+| **Text-to-Speech**            | 30 unique voices, single speaker or two-speaker dialogues       |
+| **URL Analysis**              | Analyze, compare, and extract data from web pages               |
+| **Context Caching**           | Cache large documents for efficient repeated queries            |
 | **YouTube Analysis**          | Analyze videos by URL with timestamp clipping                   |
 | **Document Analysis**         | PDFs, DOCX, spreadsheets with table extraction                  |
 | **4K Image Generation**       | Generate images up to 4K with 10 aspect ratios                  |
 | **Multi-Turn Image Editing**  | Iteratively refine images through conversation                  |
-| **Video Generation**          | Create videos with Veo (async with polling)                     |
+| **Video Generation**          | Create videos with Veo 2.0 (async with polling)                 |
 | **Code Execution**            | Gemini writes and runs Python code (pandas, numpy, matplotlib)  |
 | **Google Search**             | Real-time web information with inline citations                 |
 | **Structured Output**         | JSON responses with schema validation                           |
 | **Data Extraction**           | Extract entities, facts, sentiment from text                    |
 | **Thinking Levels**           | Control reasoning depth (minimal/low/medium/high)               |
 | **Direct Query**              | Send prompts to Gemini 3 Pro/Flash models                       |
-| **Brainstorming**             | Collaborative problem-solving                                   |
+| **Brainstorming**             | Claude + Gemini collaborative problem-solving                   |
 | **Code Analysis**             | Analyze code for quality, security, performance                 |
 | **Summarization**             | Summarize content at different detail levels                    |

package/dist/gemini-client.d.ts CHANGED Viewed

@@ -9,6 +9,7 @@
  * - 4K Image Generation: Up to 4K resolution with Google Search grounding
  * - Multi-turn Image Editing: Conversational image refinement
  */
+import { GoogleGenAI } from '@google/genai';
 /**
  * Thinking levels for Gemini 3 models
  * - minimal: Fastest, minimal reasoning (Flash only)
@@ -31,6 +32,7 @@ export type AspectRatio = '1:1' | '2:3' | '3:2' | '3:4' | '4:3' | '4:5' | '5:4'
  * Image sizes for Nano Banana Pro (Gemini 3 Pro Image)
  */
 export type ImageSize = '1K' | '2K' | '4K';
+export declare let genAI: GoogleGenAI;
 /**
  * Initialize the Gemini client with configured models
  */

package/dist/gemini-client.js CHANGED Viewed

@@ -13,8 +13,8 @@ import { GoogleGenAI, Modality } from '@google/genai';
 import { logger } from './utils/logger.js';
 import * as fs from 'fs';
 import * as path from 'path';
-// Global clients
-let genAI;
+// Global clients (exported for use by other modules)
+export let genAI;
 let proModelName;
 let flashModelName;
 let imageModelName;

package/dist/index.js CHANGED Viewed

@@ -24,6 +24,8 @@ import { registerDocumentTool } from './tools/document.js';
 import { registerUrlContextTool } from './tools/url-context.js';
 import { registerCacheTool } from './tools/cache.js';
 import { registerSpeechTool } from './tools/speech.js';
+import { registerTokenCountTool } from './tools/token-count.js';
+import { registerDeepResearchTool } from './tools/deep-research.js';
 // Import Gemini client and logger
 import { initGeminiClient } from './gemini-client.js';
 import { setupLogger, logger } from './utils/logger.js';
@@ -111,7 +113,7 @@ async function main() {
         // Create MCP server
         const server = new McpServer({
             name: 'Gemini',
-            version: '0.5.0',
+            version: '0.6.0',
         });
         // Register tools
         registerQueryTool(server);
@@ -129,6 +131,8 @@ async function main() {
         registerUrlContextTool(server);
         registerCacheTool(server);
         registerSpeechTool(server);
+        registerTokenCountTool(server);
+        registerDeepResearchTool(server);
         // Start server with stdio transport with enhanced error handling
         const transport = new StdioServerTransport();
         // Set up error handling for transport with improved error recovery

package/dist/tools/brainstorm.js CHANGED Viewed

@@ -6,6 +6,8 @@
 import { z } from "zod";
 import { generateWithGeminiPro } from "../gemini-client.js";
 import { logger } from "../utils/logger.js";
+/** Consensus threshold - score at which brainstorming is considered complete */
+const CONSENSUS_THRESHOLD = 8;
 /**
  * Register brainstorm tool with the MCP server
  */
@@ -58,7 +60,7 @@ Format this as: "Consensus Score: [NUMBER]"
                 consensusScore: consensusScore
             });
             // Check if we already have consensus
-            if (consensusScore >= 8) {
+            if (consensusScore >= CONSENSUS_THRESHOLD) {
                 logger.info(`Consensus reached in first round with score ${consensusScore}`);
                 consensusReached = true;
             }
@@ -143,7 +145,7 @@ Format: "Consensus Score: [NUMBER]"
                     consensusScore: geminiConsensusScore
                 });
                 // Check if we've reached consensus
-                if (geminiConsensusScore >= 8 || claudeConsensusScore >= 8) {
+                if (geminiConsensusScore >= CONSENSUS_THRESHOLD || claudeConsensusScore >= CONSENSUS_THRESHOLD) {
                     logger.info(`Consensus reached in round ${currentRound} with score ${geminiConsensusScore}`);
                     consensusReached = true;
                 }

package/dist/tools/deep-research.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/**
+ * Deep Research Tool - Autonomous multi-step research agent
+ *
+ * Uses the Gemini Deep Research Agent for complex research tasks.
+ * The agent autonomously plans, searches, reads, and synthesizes research.
+ */
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+/**
+ * Register deep research tools with the MCP server
+ */
+export declare function registerDeepResearchTool(server: McpServer): void;

package/dist/tools/deep-research.js ADDED Viewed

@@ -0,0 +1,236 @@
+/**
+ * Deep Research Tool - Autonomous multi-step research agent
+ *
+ * Uses the Gemini Deep Research Agent for complex research tasks.
+ * The agent autonomously plans, searches, reads, and synthesizes research.
+ */
+import { z } from "zod";
+import { logger } from "../utils/logger.js";
+import { genAI } from "../gemini-client.js";
+// Store active research operations for polling
+const activeResearchOperations = new Map();
+// Deep Research agent model
+const DEEP_RESEARCH_AGENT = "deep-research-pro-preview-12-2025";
+/**
+ * Register deep research tools with the MCP server
+ */
+export function registerDeepResearchTool(server) {
+    // Start a deep research task
+    server.tool("gemini-deep-research", {
+        query: z.string().describe("The research question or topic to investigate"),
+        format: z
+            .string()
+            .optional()
+            .describe("Optional output format instructions (e.g., 'technical report with sections')"),
+    }, async ({ query, format }) => {
+        logger.info(`Starting deep research: ${query.substring(0, 50)}...`);
+        try {
+            // Build the research prompt with optional formatting
+            let researchPrompt = query;
+            if (format) {
+                researchPrompt = `${query}\n\nFormat the output as: ${format}`;
+            }
+            // Start the research task in the background
+            // The Interactions API is accessed via genAI.interactions
+            const interaction = await genAI.interactions.create({
+                input: researchPrompt,
+                agent: DEEP_RESEARCH_AGENT,
+                background: true,
+                agentConfig: {
+                    type: "deep-research",
+                    thinkingSummaries: "auto"
+                }
+            });
+            const interactionId = interaction.id || `research-${Date.now()}`;
+            // Store for later polling
+            activeResearchOperations.set(interactionId, {
+                interactionId,
+                startedAt: new Date(),
+                prompt: query
+            });
+            logger.info(`Deep research started: ${interactionId}`);
+            return {
+                content: [{
+                        type: "text",
+                        text: `**Deep Research Started**
+| Field | Value |
+|-------|-------|
+| **Research ID** | \`${interactionId}\` |
+| **Query** | ${query.substring(0, 100)}${query.length > 100 ? '...' : ''} |
+| **Status** | In Progress |
+| **Started** | ${new Date().toISOString()} |
+**What happens now:**
+1. The Deep Research Agent is autonomously planning its research approach
+2. It will search the web, read sources, and synthesize findings
+3. This typically takes 2-10 minutes depending on complexity
+**To check progress:**
+Use \`gemini-check-research\` with the Research ID above.
+**Note:** Deep research tasks run in the background. You can continue working while waiting.`
+                    }]
+            };
+        }
+        catch (error) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            logger.error(`Error starting deep research: ${errorMessage}`);
+            // Check if it's an API availability issue
+            if (errorMessage.includes("interactions") || errorMessage.includes("not found")) {
+                return {
+                    content: [{
+                            type: "text",
+                            text: `**Deep Research Not Available**
+The Interactions API required for Deep Research may not be available yet in your SDK version or API access.
+**Error:** ${errorMessage}
+**Alternatives:**
+- Use \`gemini-search\` for real-time web search
+- Use \`gemini-query\` with a detailed research prompt
+- Wait for Interactions API to become available in your region`
+                        }],
+                    isError: true
+                };
+            }
+            return {
+                content: [{ type: "text", text: `Error starting deep research: ${errorMessage}` }],
+                isError: true
+            };
+        }
+    });
+    // Check research status
+    server.tool("gemini-check-research", {
+        researchId: z.string().describe("The research ID returned from gemini-deep-research")
+    }, async ({ researchId }) => {
+        logger.info(`Checking research status: ${researchId}`);
+        try {
+            // Get stored operation info
+            const operationInfo = activeResearchOperations.get(researchId);
+            // Get the current status
+            const interaction = await genAI.interactions.get(researchId);
+            const status = interaction.status || "unknown";
+            const elapsedMs = operationInfo
+                ? Date.now() - operationInfo.startedAt.getTime()
+                : 0;
+            const elapsedMinutes = Math.floor(elapsedMs / 60000);
+            const elapsedSeconds = Math.floor((elapsedMs % 60000) / 1000);
+            if (status === "completed") {
+                // Research is done - extract the result
+                activeResearchOperations.delete(researchId);
+                const outputs = interaction.outputs || [];
+                const result = outputs.length > 0
+                    ? outputs[outputs.length - 1].text || "No text output"
+                    : "Research completed but no output found";
+                logger.info(`Research completed: ${researchId}`);
+                return {
+                    content: [{
+                            type: "text",
+                            text: `**Deep Research Complete**
+| Field | Value |
+|-------|-------|
+| **Research ID** | \`${researchId}\` |
+| **Status** | ✅ Completed |
+| **Duration** | ${elapsedMinutes}m ${elapsedSeconds}s |
+---
+## Research Results
+${result}`
+                        }]
+                };
+            }
+            else if (status === "failed") {
+                activeResearchOperations.delete(researchId);
+                const errorInfo = interaction.error || "Unknown error";
+                logger.error(`Research failed: ${researchId} - ${errorInfo}`);
+                return {
+                    content: [{
+                            type: "text",
+                            text: `**Deep Research Failed**
+| Field | Value |
+|-------|-------|
+| **Research ID** | \`${researchId}\` |
+| **Status** | ❌ Failed |
+| **Error** | ${errorInfo} |
+The research task encountered an error. You can try:
+- Starting a new research task with a different query
+- Using \`gemini-search\` for simpler web searches`
+                        }],
+                    isError: true
+                };
+            }
+            else {
+                // Still in progress
+                return {
+                    content: [{
+                            type: "text",
+                            text: `**Deep Research In Progress**
+| Field | Value |
+|-------|-------|
+| **Research ID** | \`${researchId}\` |
+| **Status** | ⏳ ${status} |
+| **Elapsed** | ${elapsedMinutes}m ${elapsedSeconds}s |
+| **Query** | ${operationInfo?.prompt.substring(0, 50) || 'Unknown'}... |
+The agent is still working. Deep research typically takes 2-10 minutes.
+Check again in 30-60 seconds using \`gemini-check-research\`.`
+                        }]
+                };
+            }
+        }
+        catch (error) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            logger.error(`Error checking research status: ${errorMessage}`);
+            return {
+                content: [{ type: "text", text: `Error checking research status: ${errorMessage}` }],
+                isError: true
+            };
+        }
+    });
+    // Follow-up on completed research
+    server.tool("gemini-research-followup", {
+        researchId: z.string().describe("The research ID from a completed research task"),
+        question: z.string().describe("Follow-up question about the research results")
+    }, async ({ researchId, question }) => {
+        logger.info(`Research follow-up on ${researchId}: ${question.substring(0, 50)}...`);
+        try {
+            const interaction = await genAI.interactions.create({
+                input: question,
+                model: "gemini-3-pro-preview",
+                previousInteractionId: researchId
+            });
+            const outputs = interaction.outputs || [];
+            const result = outputs.length > 0
+                ? outputs[outputs.length - 1].text || "No response"
+                : "No response received";
+            return {
+                content: [{
+                        type: "text",
+                        text: `**Research Follow-up**
+**Question:** ${question}
+**Answer:**
+${result}`
+                    }]
+            };
+        }
+        catch (error) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            logger.error(`Error with research follow-up: ${errorMessage}`);
+            return {
+                content: [{ type: "text", text: `Error with follow-up: ${errorMessage}` }],
+                isError: true
+            };
+        }
+    });
+}

package/dist/tools/token-count.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Token Counting Tool - Count tokens before making API calls
+ *
+ * Helps users estimate costs and manage context windows.
+ */
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+/**
+ * Register token counting tool with the MCP server
+ */
+export declare function registerTokenCountTool(server: McpServer): void;

package/dist/tools/token-count.js ADDED Viewed

@@ -0,0 +1,70 @@
+/**
+ * Token Counting Tool - Count tokens before making API calls
+ *
+ * Helps users estimate costs and manage context windows.
+ */
+import { z } from "zod";
+import { logger } from "../utils/logger.js";
+import { genAI } from "../gemini-client.js";
+/**
+ * Register token counting tool with the MCP server
+ */
+export function registerTokenCountTool(server) {
+    server.tool("gemini-count-tokens", {
+        content: z.string().describe("The text content to count tokens for"),
+        model: z
+            .enum(["pro", "flash"])
+            .default("flash")
+            .describe("Which model to use for counting (affects tokenization)")
+    }, async ({ content, model = "flash" }) => {
+        logger.info(`Counting tokens for ${content.length} characters using ${model} model`);
+        try {
+            const modelName = model === "pro"
+                ? (process.env.GEMINI_PRO_MODEL || "gemini-3-pro-preview")
+                : (process.env.GEMINI_FLASH_MODEL || "gemini-3-flash-preview");
+            const result = await genAI.models.countTokens({
+                model: modelName,
+                contents: content
+            });
+            const totalTokens = result.totalTokens || 0;
+            // Estimate costs (approximate, based on typical pricing)
+            // Gemini 3 Pro: ~$1.25 per 1M input tokens
+            // Gemini 3 Flash: ~$0.075 per 1M input tokens
+            const costPer1M = model === "pro" ? 1.25 : 0.075;
+            const estimatedCost = (totalTokens / 1_000_000) * costPer1M;
+            // Context window info
+            const contextWindow = model === "pro" ? 1_000_000 : 1_000_000;
+            const percentUsed = (totalTokens / contextWindow) * 100;
+            const response = `**Token Count Results**
+| Metric | Value |
+|--------|-------|
+| **Total Tokens** | ${totalTokens.toLocaleString()} |
+| **Characters** | ${content.length.toLocaleString()} |
+| **Model** | ${modelName} |
+**Context Window Usage:**
+- Context window: ${contextWindow.toLocaleString()} tokens
+- Used: ${percentUsed.toFixed(4)}%
+- Remaining: ${(contextWindow - totalTokens).toLocaleString()} tokens
+**Estimated Cost:**
+- Input cost: ~$${estimatedCost.toFixed(6)} USD
+- Per 1M tokens: $${costPer1M} (${model})
+*Note: Actual costs may vary. Check [Google AI pricing](https://ai.google.dev/pricing) for current rates.*`;
+            logger.info(`Token count: ${totalTokens}`);
+            return {
+                content: [{ type: "text", text: response }]
+            };
+        }
+        catch (error) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            logger.error(`Error counting tokens: ${errorMessage}`);
+            return {
+                content: [{ type: "text", text: `Error counting tokens: ${errorMessage}` }],
+                isError: true
+            };
+        }
+    });
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@rlabs-inc/gemini-mcp",
-  "version": "0.5.0",
+  "version": "0.6.0",
   "description": "MCP server for Gemini 3 integration with Claude Code - full frontier AI capabilities",
   "main": "dist/index.js",
   "type": "module",