@rlabs-inc/gemini-mcp 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,33 +4,45 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/@rlabs-inc%2Fgemini-mcp.svg)](https://www.npmjs.com/package/@rlabs-inc/gemini-mcp)
6
6
 
7
- ## What's New in v0.4.0
7
+ ## What's New in v0.5.1
8
8
 
9
- **20+ tools** for comprehensive Gemini 3 integration:
9
+ **30+ tools** for comprehensive Gemini 3 integration - the most complete Gemini MCP server available!
10
+
11
+ **Text-to-Speech (NEW!):**
12
+ - **gemini-speak** - Convert text to speech with 30 unique voices
13
+ - **gemini-dialogue** - Generate two-speaker conversations
14
+ - **gemini-list-voices** - Browse all available voices
15
+
16
+ **URL Analysis (NEW!):**
17
+ - **gemini-analyze-url** - Analyze web pages with questions
18
+ - **gemini-compare-urls** - Compare two URLs side by side
19
+ - **gemini-extract-from-url** - Extract structured data from pages
20
+
21
+ **Context Caching (NEW!):**
22
+ - **gemini-create-cache** - Cache large documents for repeated queries
23
+ - **gemini-query-cache** - Query cached content efficiently
24
+ - **gemini-list-caches** / **gemini-delete-cache** - Manage caches
10
25
 
11
26
  **Multimodal Analysis:**
12
27
  - **YouTube Analysis** - Analyze videos by URL with timestamps and clipping
13
28
  - **Document Analysis** - PDFs, DOCX, spreadsheets with table extraction
14
29
 
15
30
  **Generation & Editing:**
16
- - **4K Image Generation** - Up to 4K resolution with Nano Banana Pro
17
- - **10 Aspect Ratios** - 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
31
+ - **4K Image Generation** - Up to 4K resolution with 10 aspect ratios
18
32
  - **Multi-Turn Image Editing** - Iteratively refine images through conversation
19
- - **Google Search Grounding** - Ground images in real-world information
33
+ - **Video Generation** - Create videos with Veo 2.0
20
34
 
21
35
  **Advanced Tools:**
22
- - **Code Execution** - Gemini writes AND runs Python code (pandas, matplotlib, numpy, etc.)
36
+ - **Code Execution** - Gemini writes AND runs Python code
23
37
  - **Google Search** - Real-time web information with citations
24
38
  - **Structured Output** - JSON schema responses with validation
25
- - **Data Extraction** - Extract entities, facts, sentiment from text
26
-
27
- **Core Improvements:**
28
- - **Thinking Levels** - Control reasoning depth: minimal, low, medium, high
29
- - **Gemini 3 Models** - Updated to latest frontier models
39
+ - **Brainstorming** - Claude + Gemini collaborative problem-solving
30
40
 
31
41
  ### Previous Versions
32
42
 
33
- **v0.3.0:** Phase 2-3 features (thinking levels, code execution, search)
43
+ **v0.5.0:** 30+ tools, TTS, URL analysis, caching
44
+ **v0.4.0:** YouTube, documents, code execution, search
45
+ **v0.3.0:** Thinking levels, structured output
34
46
  **v0.2.0:** Image/Video generation with Veo
35
47
 
36
48
  ---
@@ -39,18 +51,21 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
39
51
 
40
52
  | Feature | Description |
41
53
  |-------------------------------|-----------------------------------------------------------------|
54
+ | **Text-to-Speech** | 30 unique voices, single speaker or two-speaker dialogues |
55
+ | **URL Analysis** | Analyze, compare, and extract data from web pages |
56
+ | **Context Caching** | Cache large documents for efficient repeated queries |
42
57
  | **YouTube Analysis** | Analyze videos by URL with timestamp clipping |
43
58
  | **Document Analysis** | PDFs, DOCX, spreadsheets with table extraction |
44
59
  | **4K Image Generation** | Generate images up to 4K with 10 aspect ratios |
45
60
  | **Multi-Turn Image Editing** | Iteratively refine images through conversation |
46
- | **Video Generation** | Create videos with Veo (async with polling) |
61
+ | **Video Generation** | Create videos with Veo 2.0 (async with polling) |
47
62
  | **Code Execution** | Gemini writes and runs Python code (pandas, numpy, matplotlib) |
48
63
  | **Google Search** | Real-time web information with inline citations |
49
64
  | **Structured Output** | JSON responses with schema validation |
50
65
  | **Data Extraction** | Extract entities, facts, sentiment from text |
51
66
  | **Thinking Levels** | Control reasoning depth (minimal/low/medium/high) |
52
67
  | **Direct Query** | Send prompts to Gemini 3 Pro/Flash models |
53
- | **Brainstorming** | Collaborative problem-solving |
68
+ | **Brainstorming** | Claude + Gemini collaborative problem-solving |
54
69
  | **Code Analysis** | Analyze code for quality, security, performance |
55
70
  | **Summarization** | Summarize content at different detail levels |
56
71
 
@@ -9,6 +9,7 @@
9
9
  * - 4K Image Generation: Up to 4K resolution with Google Search grounding
10
10
  * - Multi-turn Image Editing: Conversational image refinement
11
11
  */
12
+ import { GoogleGenAI } from '@google/genai';
12
13
  /**
13
14
  * Thinking levels for Gemini 3 models
14
15
  * - minimal: Fastest, minimal reasoning (Flash only)
@@ -31,6 +32,7 @@ export type AspectRatio = '1:1' | '2:3' | '3:2' | '3:4' | '4:3' | '4:5' | '5:4'
31
32
  * Image sizes for Nano Banana Pro (Gemini 3 Pro Image)
32
33
  */
33
34
  export type ImageSize = '1K' | '2K' | '4K';
35
+ export declare let genAI: GoogleGenAI;
34
36
  /**
35
37
  * Initialize the Gemini client with configured models
36
38
  */
@@ -13,8 +13,8 @@ import { GoogleGenAI, Modality } from '@google/genai';
13
13
  import { logger } from './utils/logger.js';
14
14
  import * as fs from 'fs';
15
15
  import * as path from 'path';
16
- // Global clients
17
- let genAI;
16
+ // Global clients (exported for use by other modules)
17
+ export let genAI;
18
18
  let proModelName;
19
19
  let flashModelName;
20
20
  let imageModelName;
package/dist/index.js CHANGED
@@ -24,6 +24,8 @@ import { registerDocumentTool } from './tools/document.js';
24
24
  import { registerUrlContextTool } from './tools/url-context.js';
25
25
  import { registerCacheTool } from './tools/cache.js';
26
26
  import { registerSpeechTool } from './tools/speech.js';
27
+ import { registerTokenCountTool } from './tools/token-count.js';
28
+ import { registerDeepResearchTool } from './tools/deep-research.js';
27
29
  // Import Gemini client and logger
28
30
  import { initGeminiClient } from './gemini-client.js';
29
31
  import { setupLogger, logger } from './utils/logger.js';
@@ -111,7 +113,7 @@ async function main() {
111
113
  // Create MCP server
112
114
  const server = new McpServer({
113
115
  name: 'Gemini',
114
- version: '0.5.0',
116
+ version: '0.6.0',
115
117
  });
116
118
  // Register tools
117
119
  registerQueryTool(server);
@@ -129,6 +131,8 @@ async function main() {
129
131
  registerUrlContextTool(server);
130
132
  registerCacheTool(server);
131
133
  registerSpeechTool(server);
134
+ registerTokenCountTool(server);
135
+ registerDeepResearchTool(server);
132
136
  // Start server with stdio transport with enhanced error handling
133
137
  const transport = new StdioServerTransport();
134
138
  // Set up error handling for transport with improved error recovery
@@ -6,6 +6,8 @@
6
6
  import { z } from "zod";
7
7
  import { generateWithGeminiPro } from "../gemini-client.js";
8
8
  import { logger } from "../utils/logger.js";
9
+ /** Consensus threshold - score at which brainstorming is considered complete */
10
+ const CONSENSUS_THRESHOLD = 8;
9
11
  /**
10
12
  * Register brainstorm tool with the MCP server
11
13
  */
@@ -58,7 +60,7 @@ Format this as: "Consensus Score: [NUMBER]"
58
60
  consensusScore: consensusScore
59
61
  });
60
62
  // Check if we already have consensus
61
- if (consensusScore >= 8) {
63
+ if (consensusScore >= CONSENSUS_THRESHOLD) {
62
64
  logger.info(`Consensus reached in first round with score ${consensusScore}`);
63
65
  consensusReached = true;
64
66
  }
@@ -143,7 +145,7 @@ Format: "Consensus Score: [NUMBER]"
143
145
  consensusScore: geminiConsensusScore
144
146
  });
145
147
  // Check if we've reached consensus
146
- if (geminiConsensusScore >= 8 || claudeConsensusScore >= 8) {
148
+ if (geminiConsensusScore >= CONSENSUS_THRESHOLD || claudeConsensusScore >= CONSENSUS_THRESHOLD) {
147
149
  logger.info(`Consensus reached in round ${currentRound} with score ${geminiConsensusScore}`);
148
150
  consensusReached = true;
149
151
  }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Deep Research Tool - Autonomous multi-step research agent
3
+ *
4
+ * Uses the Gemini Deep Research Agent for complex research tasks.
5
+ * The agent autonomously plans, searches, reads, and synthesizes research.
6
+ */
7
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
8
+ /**
9
+ * Register deep research tools with the MCP server
10
+ */
11
+ export declare function registerDeepResearchTool(server: McpServer): void;
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Deep Research Tool - Autonomous multi-step research agent
3
+ *
4
+ * Uses the Gemini Deep Research Agent for complex research tasks.
5
+ * The agent autonomously plans, searches, reads, and synthesizes research.
6
+ */
7
+ import { z } from "zod";
8
+ import { logger } from "../utils/logger.js";
9
+ import { genAI } from "../gemini-client.js";
10
+ // Store active research operations for polling
11
+ const activeResearchOperations = new Map();
12
+ // Deep Research agent model
13
+ const DEEP_RESEARCH_AGENT = "deep-research-pro-preview-12-2025";
14
+ /**
15
+ * Register deep research tools with the MCP server
16
+ */
17
+ export function registerDeepResearchTool(server) {
18
+ // Start a deep research task
19
+ server.tool("gemini-deep-research", {
20
+ query: z.string().describe("The research question or topic to investigate"),
21
+ format: z
22
+ .string()
23
+ .optional()
24
+ .describe("Optional output format instructions (e.g., 'technical report with sections')"),
25
+ }, async ({ query, format }) => {
26
+ logger.info(`Starting deep research: ${query.substring(0, 50)}...`);
27
+ try {
28
+ // Build the research prompt with optional formatting
29
+ let researchPrompt = query;
30
+ if (format) {
31
+ researchPrompt = `${query}\n\nFormat the output as: ${format}`;
32
+ }
33
+ // Start the research task in the background
34
+ // The Interactions API is accessed via genAI.interactions
35
+ const interaction = await genAI.interactions.create({
36
+ input: researchPrompt,
37
+ agent: DEEP_RESEARCH_AGENT,
38
+ background: true,
39
+ agentConfig: {
40
+ type: "deep-research",
41
+ thinkingSummaries: "auto"
42
+ }
43
+ });
44
+ const interactionId = interaction.id || `research-${Date.now()}`;
45
+ // Store for later polling
46
+ activeResearchOperations.set(interactionId, {
47
+ interactionId,
48
+ startedAt: new Date(),
49
+ prompt: query
50
+ });
51
+ logger.info(`Deep research started: ${interactionId}`);
52
+ return {
53
+ content: [{
54
+ type: "text",
55
+ text: `**Deep Research Started**
56
+
57
+ | Field | Value |
58
+ |-------|-------|
59
+ | **Research ID** | \`${interactionId}\` |
60
+ | **Query** | ${query.substring(0, 100)}${query.length > 100 ? '...' : ''} |
61
+ | **Status** | In Progress |
62
+ | **Started** | ${new Date().toISOString()} |
63
+
64
+ **What happens now:**
65
+ 1. The Deep Research Agent is autonomously planning its research approach
66
+ 2. It will search the web, read sources, and synthesize findings
67
+ 3. This typically takes 2-10 minutes depending on complexity
68
+
69
+ **To check progress:**
70
+ Use \`gemini-check-research\` with the Research ID above.
71
+
72
+ **Note:** Deep research tasks run in the background. You can continue working while waiting.`
73
+ }]
74
+ };
75
+ }
76
+ catch (error) {
77
+ const errorMessage = error instanceof Error ? error.message : String(error);
78
+ logger.error(`Error starting deep research: ${errorMessage}`);
79
+ // Check if it's an API availability issue
80
+ if (errorMessage.includes("interactions") || errorMessage.includes("not found")) {
81
+ return {
82
+ content: [{
83
+ type: "text",
84
+ text: `**Deep Research Not Available**
85
+
86
+ The Interactions API required for Deep Research may not be available yet in your SDK version or API access.
87
+
88
+ **Error:** ${errorMessage}
89
+
90
+ **Alternatives:**
91
+ - Use \`gemini-search\` for real-time web search
92
+ - Use \`gemini-query\` with a detailed research prompt
93
+ - Wait for Interactions API to become available in your region`
94
+ }],
95
+ isError: true
96
+ };
97
+ }
98
+ return {
99
+ content: [{ type: "text", text: `Error starting deep research: ${errorMessage}` }],
100
+ isError: true
101
+ };
102
+ }
103
+ });
104
+ // Check research status
105
+ server.tool("gemini-check-research", {
106
+ researchId: z.string().describe("The research ID returned from gemini-deep-research")
107
+ }, async ({ researchId }) => {
108
+ logger.info(`Checking research status: ${researchId}`);
109
+ try {
110
+ // Get stored operation info
111
+ const operationInfo = activeResearchOperations.get(researchId);
112
+ // Get the current status
113
+ const interaction = await genAI.interactions.get(researchId);
114
+ const status = interaction.status || "unknown";
115
+ const elapsedMs = operationInfo
116
+ ? Date.now() - operationInfo.startedAt.getTime()
117
+ : 0;
118
+ const elapsedMinutes = Math.floor(elapsedMs / 60000);
119
+ const elapsedSeconds = Math.floor((elapsedMs % 60000) / 1000);
120
+ if (status === "completed") {
121
+ // Research is done - extract the result
122
+ activeResearchOperations.delete(researchId);
123
+ const outputs = interaction.outputs || [];
124
+ const result = outputs.length > 0
125
+ ? outputs[outputs.length - 1].text || "No text output"
126
+ : "Research completed but no output found";
127
+ logger.info(`Research completed: ${researchId}`);
128
+ return {
129
+ content: [{
130
+ type: "text",
131
+ text: `**Deep Research Complete**
132
+
133
+ | Field | Value |
134
+ |-------|-------|
135
+ | **Research ID** | \`${researchId}\` |
136
+ | **Status** | ✅ Completed |
137
+ | **Duration** | ${elapsedMinutes}m ${elapsedSeconds}s |
138
+
139
+ ---
140
+
141
+ ## Research Results
142
+
143
+ ${result}`
144
+ }]
145
+ };
146
+ }
147
+ else if (status === "failed") {
148
+ activeResearchOperations.delete(researchId);
149
+ const errorInfo = interaction.error || "Unknown error";
150
+ logger.error(`Research failed: ${researchId} - ${errorInfo}`);
151
+ return {
152
+ content: [{
153
+ type: "text",
154
+ text: `**Deep Research Failed**
155
+
156
+ | Field | Value |
157
+ |-------|-------|
158
+ | **Research ID** | \`${researchId}\` |
159
+ | **Status** | ❌ Failed |
160
+ | **Error** | ${errorInfo} |
161
+
162
+ The research task encountered an error. You can try:
163
+ - Starting a new research task with a different query
164
+ - Using \`gemini-search\` for simpler web searches`
165
+ }],
166
+ isError: true
167
+ };
168
+ }
169
+ else {
170
+ // Still in progress
171
+ return {
172
+ content: [{
173
+ type: "text",
174
+ text: `**Deep Research In Progress**
175
+
176
+ | Field | Value |
177
+ |-------|-------|
178
+ | **Research ID** | \`${researchId}\` |
179
+ | **Status** | ⏳ ${status} |
180
+ | **Elapsed** | ${elapsedMinutes}m ${elapsedSeconds}s |
181
+ | **Query** | ${operationInfo?.prompt.substring(0, 50) || 'Unknown'}... |
182
+
183
+ The agent is still working. Deep research typically takes 2-10 minutes.
184
+
185
+ Check again in 30-60 seconds using \`gemini-check-research\`.`
186
+ }]
187
+ };
188
+ }
189
+ }
190
+ catch (error) {
191
+ const errorMessage = error instanceof Error ? error.message : String(error);
192
+ logger.error(`Error checking research status: ${errorMessage}`);
193
+ return {
194
+ content: [{ type: "text", text: `Error checking research status: ${errorMessage}` }],
195
+ isError: true
196
+ };
197
+ }
198
+ });
199
+ // Follow-up on completed research
200
+ server.tool("gemini-research-followup", {
201
+ researchId: z.string().describe("The research ID from a completed research task"),
202
+ question: z.string().describe("Follow-up question about the research results")
203
+ }, async ({ researchId, question }) => {
204
+ logger.info(`Research follow-up on ${researchId}: ${question.substring(0, 50)}...`);
205
+ try {
206
+ const interaction = await genAI.interactions.create({
207
+ input: question,
208
+ model: "gemini-3-pro-preview",
209
+ previousInteractionId: researchId
210
+ });
211
+ const outputs = interaction.outputs || [];
212
+ const result = outputs.length > 0
213
+ ? outputs[outputs.length - 1].text || "No response"
214
+ : "No response received";
215
+ return {
216
+ content: [{
217
+ type: "text",
218
+ text: `**Research Follow-up**
219
+
220
+ **Question:** ${question}
221
+
222
+ **Answer:**
223
+ ${result}`
224
+ }]
225
+ };
226
+ }
227
+ catch (error) {
228
+ const errorMessage = error instanceof Error ? error.message : String(error);
229
+ logger.error(`Error with research follow-up: ${errorMessage}`);
230
+ return {
231
+ content: [{ type: "text", text: `Error with follow-up: ${errorMessage}` }],
232
+ isError: true
233
+ };
234
+ }
235
+ });
236
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Token Counting Tool - Count tokens before making API calls
3
+ *
4
+ * Helps users estimate costs and manage context windows.
5
+ */
6
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
7
+ /**
8
+ * Register token counting tool with the MCP server
9
+ */
10
+ export declare function registerTokenCountTool(server: McpServer): void;
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Token Counting Tool - Count tokens before making API calls
3
+ *
4
+ * Helps users estimate costs and manage context windows.
5
+ */
6
+ import { z } from "zod";
7
+ import { logger } from "../utils/logger.js";
8
+ import { genAI } from "../gemini-client.js";
9
+ /**
10
+ * Register token counting tool with the MCP server
11
+ */
12
+ export function registerTokenCountTool(server) {
13
+ server.tool("gemini-count-tokens", {
14
+ content: z.string().describe("The text content to count tokens for"),
15
+ model: z
16
+ .enum(["pro", "flash"])
17
+ .default("flash")
18
+ .describe("Which model to use for counting (affects tokenization)")
19
+ }, async ({ content, model = "flash" }) => {
20
+ logger.info(`Counting tokens for ${content.length} characters using ${model} model`);
21
+ try {
22
+ const modelName = model === "pro"
23
+ ? (process.env.GEMINI_PRO_MODEL || "gemini-3-pro-preview")
24
+ : (process.env.GEMINI_FLASH_MODEL || "gemini-3-flash-preview");
25
+ const result = await genAI.models.countTokens({
26
+ model: modelName,
27
+ contents: content
28
+ });
29
+ const totalTokens = result.totalTokens || 0;
30
+ // Estimate costs (approximate, based on typical pricing)
31
+ // Gemini 3 Pro: ~$1.25 per 1M input tokens
32
+ // Gemini 3 Flash: ~$0.075 per 1M input tokens
33
+ const costPer1M = model === "pro" ? 1.25 : 0.075;
34
+ const estimatedCost = (totalTokens / 1_000_000) * costPer1M;
35
+ // Context window info
36
+ const contextWindow = model === "pro" ? 1_000_000 : 1_000_000;
37
+ const percentUsed = (totalTokens / contextWindow) * 100;
38
+ const response = `**Token Count Results**
39
+
40
+ | Metric | Value |
41
+ |--------|-------|
42
+ | **Total Tokens** | ${totalTokens.toLocaleString()} |
43
+ | **Characters** | ${content.length.toLocaleString()} |
44
+ | **Model** | ${modelName} |
45
+
46
+ **Context Window Usage:**
47
+ - Context window: ${contextWindow.toLocaleString()} tokens
48
+ - Used: ${percentUsed.toFixed(4)}%
49
+ - Remaining: ${(contextWindow - totalTokens).toLocaleString()} tokens
50
+
51
+ **Estimated Cost:**
52
+ - Input cost: ~$${estimatedCost.toFixed(6)} USD
53
+ - Per 1M tokens: $${costPer1M} (${model})
54
+
55
+ *Note: Actual costs may vary. Check [Google AI pricing](https://ai.google.dev/pricing) for current rates.*`;
56
+ logger.info(`Token count: ${totalTokens}`);
57
+ return {
58
+ content: [{ type: "text", text: response }]
59
+ };
60
+ }
61
+ catch (error) {
62
+ const errorMessage = error instanceof Error ? error.message : String(error);
63
+ logger.error(`Error counting tokens: ${errorMessage}`);
64
+ return {
65
+ content: [{ type: "text", text: `Error counting tokens: ${errorMessage}` }],
66
+ isError: true
67
+ };
68
+ }
69
+ });
70
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rlabs-inc/gemini-mcp",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "MCP server for Gemini 3 integration with Claude Code - full frontier AI capabilities",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",