@rlabs-inc/gemini-mcp 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -4,21 +4,29 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/@rlabs-inc%2Fgemini-mcp.svg)](https://www.npmjs.com/package/@rlabs-inc/gemini-mcp)
6
6
 
7
- ## What's New in v0.5.1
7
+ ## What's New in v0.6.0
8
8
 
9
- **30+ tools** for comprehensive Gemini 3 integration - the most complete Gemini MCP server available!
9
+ **35+ tools** for comprehensive Gemini 3 integration - the most complete Gemini MCP server available!
10
10
 
11
- **Text-to-Speech (NEW!):**
11
+ **Deep Research Agent (NEW!):**
12
+ - **gemini-deep-research** - Autonomous multi-step research with web search
13
+ - **gemini-check-research** - Poll research status and get results
14
+ - **gemini-research-followup** - Ask follow-up questions on completed research
15
+
16
+ **Token Management (NEW!):**
17
+ - **gemini-count-tokens** - Count tokens before API calls with cost estimates
18
+
19
+ **Text-to-Speech:**
12
20
  - **gemini-speak** - Convert text to speech with 30 unique voices
13
21
  - **gemini-dialogue** - Generate two-speaker conversations
14
22
  - **gemini-list-voices** - Browse all available voices
15
23
 
16
- **URL Analysis (NEW!):**
24
+ **URL Analysis:**
17
25
  - **gemini-analyze-url** - Analyze web pages with questions
18
26
  - **gemini-compare-urls** - Compare two URLs side by side
19
27
  - **gemini-extract-from-url** - Extract structured data from pages
20
28
 
21
- **Context Caching (NEW!):**
29
+ **Context Caching:**
22
30
  - **gemini-create-cache** - Cache large documents for repeated queries
23
31
  - **gemini-query-cache** - Query cached content efficiently
24
32
  - **gemini-list-caches** / **gemini-delete-cache** - Manage caches
@@ -40,6 +48,7 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
40
48
 
41
49
  ### Previous Versions
42
50
 
51
+ **v0.5.1:** Documentation updates, code quality improvements
43
52
  **v0.5.0:** 30+ tools, TTS, URL analysis, caching
44
53
  **v0.4.0:** YouTube, documents, code execution, search
45
54
  **v0.3.0:** Thinking levels, structured output
@@ -51,6 +60,8 @@ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models w
51
60
 
52
61
  | Feature | Description |
53
62
  |-------------------------------|-----------------------------------------------------------------|
63
+ | **Deep Research Agent** | Autonomous multi-step research with web search and citations |
64
+ | **Token Counting** | Count tokens and estimate costs before API calls |
54
65
  | **Text-to-Speech** | 30 unique voices, single speaker or two-speaker dialogues |
55
66
  | **URL Analysis** | Analyze, compare, and extract data from web pages |
56
67
  | **Context Caching** | Cache large documents for efficient repeated queries |
@@ -9,6 +9,7 @@
9
9
  * - 4K Image Generation: Up to 4K resolution with Google Search grounding
10
10
  * - Multi-turn Image Editing: Conversational image refinement
11
11
  */
12
+ import { GoogleGenAI } from '@google/genai';
12
13
  /**
13
14
  * Thinking levels for Gemini 3 models
14
15
  * - minimal: Fastest, minimal reasoning (Flash only)
@@ -31,6 +32,7 @@ export type AspectRatio = '1:1' | '2:3' | '3:2' | '3:4' | '4:3' | '4:5' | '5:4'
31
32
  * Image sizes for Nano Banana Pro (Gemini 3 Pro Image)
32
33
  */
33
34
  export type ImageSize = '1K' | '2K' | '4K';
35
+ export declare let genAI: GoogleGenAI;
34
36
  /**
35
37
  * Initialize the Gemini client with configured models
36
38
  */
@@ -13,8 +13,8 @@ import { GoogleGenAI, Modality } from '@google/genai';
13
13
  import { logger } from './utils/logger.js';
14
14
  import * as fs from 'fs';
15
15
  import * as path from 'path';
16
- // Global clients
17
- let genAI;
16
+ // Global clients (exported for use by other modules)
17
+ export let genAI;
18
18
  let proModelName;
19
19
  let flashModelName;
20
20
  let imageModelName;
package/dist/index.js CHANGED
@@ -24,6 +24,8 @@ import { registerDocumentTool } from './tools/document.js';
24
24
  import { registerUrlContextTool } from './tools/url-context.js';
25
25
  import { registerCacheTool } from './tools/cache.js';
26
26
  import { registerSpeechTool } from './tools/speech.js';
27
+ import { registerTokenCountTool } from './tools/token-count.js';
28
+ import { registerDeepResearchTool } from './tools/deep-research.js';
27
29
  // Import Gemini client and logger
28
30
  import { initGeminiClient } from './gemini-client.js';
29
31
  import { setupLogger, logger } from './utils/logger.js';
@@ -111,7 +113,7 @@ async function main() {
111
113
  // Create MCP server
112
114
  const server = new McpServer({
113
115
  name: 'Gemini',
114
- version: '0.5.0',
116
+ version: '0.6.1',
115
117
  });
116
118
  // Register tools
117
119
  registerQueryTool(server);
@@ -129,6 +131,8 @@ async function main() {
129
131
  registerUrlContextTool(server);
130
132
  registerCacheTool(server);
131
133
  registerSpeechTool(server);
134
+ registerTokenCountTool(server);
135
+ registerDeepResearchTool(server);
132
136
  // Start server with stdio transport with enhanced error handling
133
137
  const transport = new StdioServerTransport();
134
138
  // Set up error handling for transport with improved error recovery
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Deep Research Tool - Autonomous multi-step research agent
3
+ *
4
+ * Uses the Gemini Deep Research Agent for complex research tasks.
5
+ * The agent autonomously plans, searches, reads, and synthesizes research.
6
+ */
7
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
8
+ /**
9
+ * Register deep research tools with the MCP server
10
+ */
11
+ export declare function registerDeepResearchTool(server: McpServer): void;
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Deep Research Tool - Autonomous multi-step research agent
3
+ *
4
+ * Uses the Gemini Deep Research Agent for complex research tasks.
5
+ * The agent autonomously plans, searches, reads, and synthesizes research.
6
+ */
7
+ import { z } from "zod";
8
+ import { logger } from "../utils/logger.js";
9
+ import { genAI } from "../gemini-client.js";
10
+ // Store active research operations for polling
11
+ const activeResearchOperations = new Map();
12
+ // Deep Research agent model
13
+ const DEEP_RESEARCH_AGENT = "deep-research-pro-preview-12-2025";
14
+ /**
15
+ * Register deep research tools with the MCP server
16
+ */
17
+ export function registerDeepResearchTool(server) {
18
+ // Start a deep research task
19
+ server.tool("gemini-deep-research", {
20
+ query: z.string().describe("The research question or topic to investigate"),
21
+ format: z
22
+ .string()
23
+ .optional()
24
+ .describe("Optional output format instructions (e.g., 'technical report with sections')"),
25
+ }, async ({ query, format }) => {
26
+ logger.info(`Starting deep research: ${query.substring(0, 50)}...`);
27
+ try {
28
+ // Build the research prompt with optional formatting
29
+ let researchPrompt = query;
30
+ if (format) {
31
+ researchPrompt = `${query}\n\nFormat the output as: ${format}`;
32
+ }
33
+ // Start the research task in the background
34
+ // The Interactions API is accessed via genAI.interactions
35
+ const interaction = await genAI.interactions.create({
36
+ input: researchPrompt,
37
+ agent: DEEP_RESEARCH_AGENT,
38
+ background: true,
39
+ agentConfig: {
40
+ type: "deep-research",
41
+ thinkingSummaries: "auto"
42
+ }
43
+ });
44
+ const interactionId = interaction.id || `research-${Date.now()}`;
45
+ // Store for later polling
46
+ activeResearchOperations.set(interactionId, {
47
+ interactionId,
48
+ startedAt: new Date(),
49
+ prompt: query
50
+ });
51
+ logger.info(`Deep research started: ${interactionId}`);
52
+ return {
53
+ content: [{
54
+ type: "text",
55
+ text: `**Deep Research Started**
56
+
57
+ | Field | Value |
58
+ |-------|-------|
59
+ | **Research ID** | \`${interactionId}\` |
60
+ | **Query** | ${query.substring(0, 100)}${query.length > 100 ? '...' : ''} |
61
+ | **Status** | In Progress |
62
+ | **Started** | ${new Date().toISOString()} |
63
+
64
+ **What happens now:**
65
+ 1. The Deep Research Agent is autonomously planning its research approach
66
+ 2. It will search the web, read sources, and synthesize findings
67
+ 3. This typically takes 2-10 minutes depending on complexity
68
+
69
+ **To check progress:**
70
+ Use \`gemini-check-research\` with the Research ID above.
71
+
72
+ **Note:** Deep research tasks run in the background. You can continue working while waiting.`
73
+ }]
74
+ };
75
+ }
76
+ catch (error) {
77
+ const errorMessage = error instanceof Error ? error.message : String(error);
78
+ logger.error(`Error starting deep research: ${errorMessage}`);
79
+ // Check if it's an API availability issue
80
+ if (errorMessage.includes("interactions") || errorMessage.includes("not found")) {
81
+ return {
82
+ content: [{
83
+ type: "text",
84
+ text: `**Deep Research Not Available**
85
+
86
+ The Interactions API required for Deep Research may not be available yet in your SDK version or API access.
87
+
88
+ **Error:** ${errorMessage}
89
+
90
+ **Alternatives:**
91
+ - Use \`gemini-search\` for real-time web search
92
+ - Use \`gemini-query\` with a detailed research prompt
93
+ - Wait for Interactions API to become available in your region`
94
+ }],
95
+ isError: true
96
+ };
97
+ }
98
+ return {
99
+ content: [{ type: "text", text: `Error starting deep research: ${errorMessage}` }],
100
+ isError: true
101
+ };
102
+ }
103
+ });
104
+ // Check research status
105
+ server.tool("gemini-check-research", {
106
+ researchId: z.string().describe("The research ID returned from gemini-deep-research")
107
+ }, async ({ researchId }) => {
108
+ logger.info(`Checking research status: ${researchId}`);
109
+ try {
110
+ // Get stored operation info
111
+ const operationInfo = activeResearchOperations.get(researchId);
112
+ // Get the current status
113
+ const interaction = await genAI.interactions.get(researchId);
114
+ const status = interaction.status || "unknown";
115
+ const elapsedMs = operationInfo
116
+ ? Date.now() - operationInfo.startedAt.getTime()
117
+ : 0;
118
+ const elapsedMinutes = Math.floor(elapsedMs / 60000);
119
+ const elapsedSeconds = Math.floor((elapsedMs % 60000) / 1000);
120
+ if (status === "completed") {
121
+ // Research is done - extract the result
122
+ activeResearchOperations.delete(researchId);
123
+ const outputs = interaction.outputs || [];
124
+ const result = outputs.length > 0
125
+ ? outputs[outputs.length - 1].text || "No text output"
126
+ : "Research completed but no output found";
127
+ logger.info(`Research completed: ${researchId}`);
128
+ return {
129
+ content: [{
130
+ type: "text",
131
+ text: `**Deep Research Complete**
132
+
133
+ | Field | Value |
134
+ |-------|-------|
135
+ | **Research ID** | \`${researchId}\` |
136
+ | **Status** | ✅ Completed |
137
+ | **Duration** | ${elapsedMinutes}m ${elapsedSeconds}s |
138
+
139
+ ---
140
+
141
+ ## Research Results
142
+
143
+ ${result}`
144
+ }]
145
+ };
146
+ }
147
+ else if (status === "failed") {
148
+ activeResearchOperations.delete(researchId);
149
+ const errorInfo = interaction.error || "Unknown error";
150
+ logger.error(`Research failed: ${researchId} - ${errorInfo}`);
151
+ return {
152
+ content: [{
153
+ type: "text",
154
+ text: `**Deep Research Failed**
155
+
156
+ | Field | Value |
157
+ |-------|-------|
158
+ | **Research ID** | \`${researchId}\` |
159
+ | **Status** | ❌ Failed |
160
+ | **Error** | ${errorInfo} |
161
+
162
+ The research task encountered an error. You can try:
163
+ - Starting a new research task with a different query
164
+ - Using \`gemini-search\` for simpler web searches`
165
+ }],
166
+ isError: true
167
+ };
168
+ }
169
+ else {
170
+ // Still in progress
171
+ return {
172
+ content: [{
173
+ type: "text",
174
+ text: `**Deep Research In Progress**
175
+
176
+ | Field | Value |
177
+ |-------|-------|
178
+ | **Research ID** | \`${researchId}\` |
179
+ | **Status** | ⏳ ${status} |
180
+ | **Elapsed** | ${elapsedMinutes}m ${elapsedSeconds}s |
181
+ | **Query** | ${operationInfo?.prompt.substring(0, 50) || 'Unknown'}... |
182
+
183
+ The agent is still working. Deep research typically takes 2-10 minutes.
184
+
185
+ Check again in 30-60 seconds using \`gemini-check-research\`.`
186
+ }]
187
+ };
188
+ }
189
+ }
190
+ catch (error) {
191
+ const errorMessage = error instanceof Error ? error.message : String(error);
192
+ logger.error(`Error checking research status: ${errorMessage}`);
193
+ return {
194
+ content: [{ type: "text", text: `Error checking research status: ${errorMessage}` }],
195
+ isError: true
196
+ };
197
+ }
198
+ });
199
+ // Follow-up on completed research
200
+ server.tool("gemini-research-followup", {
201
+ researchId: z.string().describe("The research ID from a completed research task"),
202
+ question: z.string().describe("Follow-up question about the research results")
203
+ }, async ({ researchId, question }) => {
204
+ logger.info(`Research follow-up on ${researchId}: ${question.substring(0, 50)}...`);
205
+ try {
206
+ const interaction = await genAI.interactions.create({
207
+ input: question,
208
+ model: "gemini-3-pro-preview",
209
+ previousInteractionId: researchId
210
+ });
211
+ const outputs = interaction.outputs || [];
212
+ const result = outputs.length > 0
213
+ ? outputs[outputs.length - 1].text || "No response"
214
+ : "No response received";
215
+ return {
216
+ content: [{
217
+ type: "text",
218
+ text: `**Research Follow-up**
219
+
220
+ **Question:** ${question}
221
+
222
+ **Answer:**
223
+ ${result}`
224
+ }]
225
+ };
226
+ }
227
+ catch (error) {
228
+ const errorMessage = error instanceof Error ? error.message : String(error);
229
+ logger.error(`Error with research follow-up: ${errorMessage}`);
230
+ return {
231
+ content: [{ type: "text", text: `Error with follow-up: ${errorMessage}` }],
232
+ isError: true
233
+ };
234
+ }
235
+ });
236
+ }
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Token Counting Tool - Count tokens before making API calls
3
+ *
4
+ * Helps users estimate costs and manage context windows.
5
+ */
6
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
7
+ /**
8
+ * Register token counting tool with the MCP server
9
+ */
10
+ export declare function registerTokenCountTool(server: McpServer): void;
@@ -0,0 +1,70 @@
1
+ /**
2
+ * Token Counting Tool - Count tokens before making API calls
3
+ *
4
+ * Helps users estimate costs and manage context windows.
5
+ */
6
+ import { z } from "zod";
7
+ import { logger } from "../utils/logger.js";
8
+ import { genAI } from "../gemini-client.js";
9
+ /**
10
+ * Register token counting tool with the MCP server
11
+ */
12
+ export function registerTokenCountTool(server) {
13
+ server.tool("gemini-count-tokens", {
14
+ content: z.string().describe("The text content to count tokens for"),
15
+ model: z
16
+ .enum(["pro", "flash"])
17
+ .default("flash")
18
+ .describe("Which model to use for counting (affects tokenization)")
19
+ }, async ({ content, model = "flash" }) => {
20
+ logger.info(`Counting tokens for ${content.length} characters using ${model} model`);
21
+ try {
22
+ const modelName = model === "pro"
23
+ ? (process.env.GEMINI_PRO_MODEL || "gemini-3-pro-preview")
24
+ : (process.env.GEMINI_FLASH_MODEL || "gemini-3-flash-preview");
25
+ const result = await genAI.models.countTokens({
26
+ model: modelName,
27
+ contents: content
28
+ });
29
+ const totalTokens = result.totalTokens || 0;
30
+ // Estimate costs (approximate, based on typical pricing)
31
+ // Gemini 3 Pro: ~$1.25 per 1M input tokens
32
+ // Gemini 3 Flash: ~$0.075 per 1M input tokens
33
+ const costPer1M = model === "pro" ? 1.25 : 0.075;
34
+ const estimatedCost = (totalTokens / 1_000_000) * costPer1M;
35
+ // Context window info
36
+ const contextWindow = model === "pro" ? 1_000_000 : 1_000_000;
37
+ const percentUsed = (totalTokens / contextWindow) * 100;
38
+ const response = `**Token Count Results**
39
+
40
+ | Metric | Value |
41
+ |--------|-------|
42
+ | **Total Tokens** | ${totalTokens.toLocaleString()} |
43
+ | **Characters** | ${content.length.toLocaleString()} |
44
+ | **Model** | ${modelName} |
45
+
46
+ **Context Window Usage:**
47
+ - Context window: ${contextWindow.toLocaleString()} tokens
48
+ - Used: ${percentUsed.toFixed(4)}%
49
+ - Remaining: ${(contextWindow - totalTokens).toLocaleString()} tokens
50
+
51
+ **Estimated Cost:**
52
+ - Input cost: ~$${estimatedCost.toFixed(6)} USD
53
+ - Per 1M tokens: $${costPer1M} (${model})
54
+
55
+ *Note: Actual costs may vary. Check [Google AI pricing](https://ai.google.dev/pricing) for current rates.*`;
56
+ logger.info(`Token count: ${totalTokens}`);
57
+ return {
58
+ content: [{ type: "text", text: response }]
59
+ };
60
+ }
61
+ catch (error) {
62
+ const errorMessage = error instanceof Error ? error.message : String(error);
63
+ logger.error(`Error counting tokens: ${errorMessage}`);
64
+ return {
65
+ content: [{ type: "text", text: `Error counting tokens: ${errorMessage}` }],
66
+ isError: true
67
+ };
68
+ }
69
+ });
70
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rlabs-inc/gemini-mcp",
3
- "version": "0.5.1",
3
+ "version": "0.6.1",
4
4
  "description": "MCP server for Gemini 3 integration with Claude Code - full frontier AI capabilities",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",