@rlabs-inc/gemini-mcp 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/LICENCE +21 -0
  2. package/README.md +418 -0
  3. package/dist/gemini-client.d.ts +120 -0
  4. package/dist/gemini-client.js +399 -0
  5. package/dist/index.d.ts +8 -0
  6. package/dist/index.js +220 -0
  7. package/dist/tools/analyze.d.ts +10 -0
  8. package/dist/tools/analyze.js +96 -0
  9. package/dist/tools/brainstorm.d.ts +10 -0
  10. package/dist/tools/brainstorm.js +220 -0
  11. package/dist/tools/cache.d.ts +17 -0
  12. package/dist/tools/cache.js +286 -0
  13. package/dist/tools/code-exec.d.ts +17 -0
  14. package/dist/tools/code-exec.js +135 -0
  15. package/dist/tools/document.d.ts +16 -0
  16. package/dist/tools/document.js +333 -0
  17. package/dist/tools/image-edit.d.ts +16 -0
  18. package/dist/tools/image-edit.js +291 -0
  19. package/dist/tools/image-gen.d.ts +17 -0
  20. package/dist/tools/image-gen.js +148 -0
  21. package/dist/tools/query.d.ts +11 -0
  22. package/dist/tools/query.js +63 -0
  23. package/dist/tools/search.d.ts +15 -0
  24. package/dist/tools/search.js +128 -0
  25. package/dist/tools/speech.d.ts +17 -0
  26. package/dist/tools/speech.js +304 -0
  27. package/dist/tools/structured.d.ts +16 -0
  28. package/dist/tools/structured.js +247 -0
  29. package/dist/tools/summarize.d.ts +10 -0
  30. package/dist/tools/summarize.js +77 -0
  31. package/dist/tools/url-context.d.ts +17 -0
  32. package/dist/tools/url-context.js +226 -0
  33. package/dist/tools/video-gen.d.ts +11 -0
  34. package/dist/tools/video-gen.js +136 -0
  35. package/dist/tools/youtube.d.ts +16 -0
  36. package/dist/tools/youtube.js +218 -0
  37. package/dist/utils/logger.d.ts +33 -0
  38. package/dist/utils/logger.js +82 -0
  39. package/package.json +48 -0
package/LICENCE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 RLabs-Inc
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,418 @@
1
+ # MCP Server Gemini
2
+
3
+ A Model Context Protocol (MCP) server for integrating Google's Gemini 3 models with Claude Code, enabling powerful collaboration between both AI systems.
4
+
5
+ [![npm version](https://badge.fury.io/js/@rlabs-inc%2Fgemini-mcp.svg)](https://www.npmjs.com/package/@rlabs-inc/gemini-mcp)
6
+
7
+ ## What's New in v0.4.0
8
+
9
+ **20+ tools** for comprehensive Gemini 3 integration:
10
+
11
+ **Multimodal Analysis:**
12
+ - **YouTube Analysis** - Analyze videos by URL with timestamps and clipping
13
+ - **Document Analysis** - PDFs, DOCX, spreadsheets with table extraction
14
+
15
+ **Generation & Editing:**
16
+ - **4K Image Generation** - Up to 4K resolution with Nano Banana Pro
17
+ - **10 Aspect Ratios** - 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
18
+ - **Multi-Turn Image Editing** - Iteratively refine images through conversation
19
+ - **Google Search Grounding** - Ground images in real-world information
20
+
21
+ **Advanced Tools:**
22
+ - **Code Execution** - Gemini writes AND runs Python code (pandas, matplotlib, numpy, etc.)
23
+ - **Google Search** - Real-time web information with citations
24
+ - **Structured Output** - JSON schema responses with validation
25
+ - **Data Extraction** - Extract entities, facts, sentiment from text
26
+
27
+ **Core Improvements:**
28
+ - **Thinking Levels** - Control reasoning depth: minimal, low, medium, high
29
+ - **Gemini 3 Models** - Updated to latest frontier models
30
+
31
+ ### Previous Versions
32
+
33
+ **v0.3.0:** Phase 2-3 features (thinking levels, code execution, search)
34
+ **v0.2.0:** Image/Video generation with Veo
35
+
36
+ ---
37
+
38
+ ## Features
39
+
40
+ | Feature | Description |
41
+ |-------------------------------|-----------------------------------------------------------------|
42
+ | **YouTube Analysis** | Analyze videos by URL with timestamp clipping |
43
+ | **Document Analysis** | PDFs, DOCX, spreadsheets with table extraction |
44
+ | **4K Image Generation** | Generate images up to 4K with 10 aspect ratios |
45
+ | **Multi-Turn Image Editing** | Iteratively refine images through conversation |
46
+ | **Video Generation** | Create videos with Veo (async with polling) |
47
+ | **Code Execution** | Gemini writes and runs Python code (pandas, numpy, matplotlib) |
48
+ | **Google Search** | Real-time web information with inline citations |
49
+ | **Structured Output** | JSON responses with schema validation |
50
+ | **Data Extraction** | Extract entities, facts, sentiment from text |
51
+ | **Thinking Levels** | Control reasoning depth (minimal/low/medium/high) |
52
+ | **Direct Query** | Send prompts to Gemini 3 Pro/Flash models |
53
+ | **Brainstorming** | Collaborative problem-solving |
54
+ | **Code Analysis** | Analyze code for quality, security, performance |
55
+ | **Summarization** | Summarize content at different detail levels |
56
+
57
+ ---
58
+
59
+ ## Quick Installation
60
+
61
+ ### Using npm (Recommended)
62
+
63
+ ```bash
64
+ claude mcp add gemini -s user -- env GEMINI_API_KEY=YOUR_KEY npx -y @rlabs-inc/gemini-mcp
65
+ ```
66
+
67
+ ### Using bun
68
+
69
+ ```bash
70
+ claude mcp add gemini -s user -- env GEMINI_API_KEY=YOUR_KEY bunx @rlabs-inc/gemini-mcp
71
+ ```
72
+
73
+ **Get your API key:** Visit [Google AI Studio](https://aistudio.google.com/apikey) - it's free and takes seconds!
74
+
75
+ ### Installation Options
76
+
77
+ ```bash
78
+ # With verbose logging
79
+ claude mcp add gemini -s user -- env GEMINI_API_KEY=YOUR_KEY VERBOSE=true bunx -y @rlabs-inc/gemini-mcp
80
+
81
+ # With custom output directory for generated images/videos
82
+ claude mcp add gemini -s user -- env GEMINI_API_KEY=YOUR_KEY GEMINI_OUTPUT_DIR=/path/to/output bunx -y @rlabs-inc/gemini-mcp
83
+ ```
84
+
85
+ ---
86
+
87
+ ## Available Tools
88
+
89
+ ### gemini-query
90
+
91
+ Direct queries to Gemini with thinking level control:
92
+
93
+ ```
94
+ prompt: "Explain quantum entanglement"
95
+ model: "pro" or "flash"
96
+ thinkingLevel: "low" | "medium" | "high" (optional)
97
+ ```
98
+
99
+ - **low**: Fast responses, minimal reasoning
100
+ - **medium**: Balanced (Flash only)
101
+ - **high**: Deep reasoning for complex tasks (default)
102
+
103
+ ### gemini-generate-image
104
+
105
+ Generate images with Nano Banana Pro (Claude can SEE them!):
106
+
107
+ ```
108
+ prompt: "a futuristic city at sunset"
109
+ style: "cyberpunk" (optional)
110
+ aspectRatio: "16:9" (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9)
111
+ imageSize: "2K" (1K, 2K, 4K)
112
+ useGoogleSearch: false (ground in real-world info)
113
+ ```
114
+
115
+ ### gemini-start-image-edit
116
+
117
+ Start a multi-turn image editing session:
118
+
119
+ ```
120
+ prompt: "a cozy cabin in the mountains"
121
+ aspectRatio: "16:9"
122
+ imageSize: "2K"
123
+ useGoogleSearch: false
124
+ ```
125
+
126
+ Returns a session ID for iterative editing.
127
+
128
+ ### gemini-continue-image-edit
129
+
130
+ Continue refining an image:
131
+
132
+ ```
133
+ sessionId: "edit-123456789"
134
+ prompt: "add snow on the roof and make it nighttime"
135
+ ```
136
+
137
+ ### gemini-end-image-edit
138
+
139
+ Close an editing session:
140
+
141
+ ```
142
+ sessionId: "edit-123456789"
143
+ ```
144
+
145
+ ### gemini-list-image-sessions
146
+
147
+ List all active editing sessions.
148
+
149
+ ### gemini-generate-video
150
+
151
+ Generate videos using Veo:
152
+
153
+ ```
154
+ prompt: "a cat playing piano"
155
+ aspectRatio: "16:9" (optional)
156
+ negativePrompt: "blurry, text" (optional)
157
+ ```
158
+
159
+ Video generation is async (takes 1-5 minutes). Use `gemini-check-video` to poll.
160
+
161
+ ### gemini-check-video
162
+
163
+ Check video generation status and download when complete:
164
+
165
+ ```
166
+ operationId: "operations/xxx-xxx-xxx"
167
+ ```
168
+
169
+ ### gemini-analyze-code
170
+
171
+ Analyze code for issues:
172
+
173
+ ```
174
+ code: "function foo() { ... }"
175
+ language: "typescript" (optional)
176
+ focus: "quality" | "security" | "performance" | "bugs" | "general"
177
+ ```
178
+
179
+ ### gemini-analyze-text
180
+
181
+ Analyze text content:
182
+
183
+ ```
184
+ text: "Your text here..."
185
+ type: "sentiment" | "summary" | "entities" | "key-points" | "general"
186
+ ```
187
+
188
+ ### gemini-brainstorm
189
+
190
+ Collaborative brainstorming:
191
+
192
+ ```
193
+ prompt: "How could we implement real-time collaboration?"
194
+ claudeThoughts: "I think we should use WebSockets..."
195
+ maxRounds: 3 (optional)
196
+ ```
197
+
198
+ ### gemini-summarize
199
+
200
+ Summarize content:
201
+
202
+ ```
203
+ content: "Long text to summarize..."
204
+ length: "brief" | "moderate" | "detailed"
205
+ format: "paragraph" | "bullet-points" | "outline"
206
+ ```
207
+
208
+ ### gemini-run-code
209
+
210
+ Let Gemini write and execute Python code:
211
+
212
+ ```
213
+ prompt: "Calculate the first 50 prime numbers and plot them"
214
+ data: "optional CSV data to analyze" (optional)
215
+ ```
216
+
217
+ Supports libraries: numpy, pandas, matplotlib, scipy, scikit-learn, tensorflow, and more.
218
+ Generated charts are saved to the output directory and returned as images.
219
+
220
+ ### gemini-search
221
+
222
+ Real-time web search with citations:
223
+
224
+ ```
225
+ query: "What happened in tech news this week?"
226
+ returnCitations: true (default)
227
+ ```
228
+
229
+ Returns grounded responses with inline citations and source URLs.
230
+
231
+ ### gemini-structured
232
+
233
+ Get JSON responses matching a schema:
234
+
235
+ ```
236
+ prompt: "Extract the meeting details from this email..."
237
+ schema: '{"type":"object","properties":{"date":{"type":"string"},"attendees":{"type":"array"}}}'
238
+ useGoogleSearch: false (optional)
239
+ ```
240
+
241
+ ### gemini-extract
242
+
243
+ Convenience tool for common extraction patterns:
244
+
245
+ ```
246
+ text: "Your text to analyze..."
247
+ extractType: "entities" | "facts" | "summary" | "keywords" | "sentiment" | "custom"
248
+ customFields: "name, date, amount" (for custom extraction)
249
+ ```
250
+
251
+ ### gemini-youtube
252
+
253
+ Analyze YouTube videos directly:
254
+
255
+ ```
256
+ url: "https://www.youtube.com/watch?v=..."
257
+ question: "What happens at 2:30?"
258
+ startTime: "1m30s" (optional, for clipping)
259
+ endTime: "5m00s" (optional, for clipping)
260
+ ```
261
+
262
+ ### gemini-youtube-summary
263
+
264
+ Quick video summarization:
265
+
266
+ ```
267
+ url: "https://www.youtube.com/watch?v=..."
268
+ style: "brief" | "detailed" | "bullet-points" | "chapters"
269
+ ```
270
+
271
+ ### gemini-analyze-document
272
+
273
+ Analyze PDFs and documents:
274
+
275
+ ```
276
+ filePath: "/path/to/document.pdf"
277
+ question: "Summarize the key findings"
278
+ mediaResolution: "low" | "medium" | "high"
279
+ ```
280
+
281
+ ### gemini-summarize-pdf
282
+
283
+ Quick PDF summarization:
284
+
285
+ ```
286
+ filePath: "/path/to/document.pdf"
287
+ style: "brief" | "detailed" | "outline" | "key-points"
288
+ ```
289
+
290
+ ### gemini-extract-tables
291
+
292
+ Extract tables from documents:
293
+
294
+ ```
295
+ filePath: "/path/to/document.pdf"
296
+ outputFormat: "markdown" | "csv" | "json"
297
+ ```
298
+
299
+ ---
300
+
301
+ ## Workflow: Claude + Gemini
302
+
303
+ The killer combination for development:
304
+
305
+ | Claude | Gemini |
306
+ |--------|--------|
307
+ | Complex logic | Frontend/UI |
308
+ | Architecture | Visual components |
309
+ | Backend code | Image generation |
310
+ | Integration | React/CSS styling |
311
+ | Reasoning | Creative generation |
312
+
313
+ **Example workflow:**
314
+ 1. Ask Claude to design the backend API
315
+ 2. Use `gemini-generate-image` for UI mockups
316
+ 3. Ask Gemini to generate React components via `gemini-query`
317
+ 4. Use multi-turn editing to refine visuals
318
+ 5. Let Claude wire everything together
319
+
320
+ ---
321
+
322
+ ## Environment Variables
323
+
324
+ | Variable | Required | Default | Description |
325
+ |-------------------------|----------|------------------------------|-------------------------------|
326
+ | `GEMINI_API_KEY` | Yes | - | Your Google Gemini API key |
327
+ | `GEMINI_OUTPUT_DIR` | No | `./gemini-output` | Where to save generated files |
328
+ | `GEMINI_MODEL` | No | - | Override model for init test |
329
+ | `GEMINI_PRO_MODEL` | No | `gemini-3-pro-preview` | Pro model (Gemini 3) |
330
+ | `GEMINI_FLASH_MODEL` | No | `gemini-3-flash-preview` | Flash model (Gemini 3) |
331
+ | `GEMINI_IMAGE_MODEL` | No | `gemini-3-pro-image-preview` | Image model (Nano Banana Pro) |
332
+ | `GEMINI_VIDEO_MODEL` | No | `veo-2.0-generate-001` | Video model |
333
+ | `VERBOSE` | No | `false` | Enable verbose logging |
334
+ | `QUIET` | No | `false` | Minimize logging |
335
+
336
+ ---
337
+
338
+ ## Manual Installation
339
+
340
+ ### Global Install
341
+
342
+ ```bash
343
+ # Using npm
344
+ npm install -g @rlabs-inc/gemini-mcp
345
+
346
+ # Using bun
347
+ bun install -g @rlabs-inc/gemini-mcp
348
+ ```
349
+
350
+ ### Claude Code Configuration
351
+
352
+ ```json
353
+ {
354
+ "gemini": {
355
+ "command": "npx",
356
+ "args": ["-y", "@rlabs-inc/gemini-mcp"],
357
+ "env": {
358
+ "GEMINI_API_KEY": "your-api-key",
359
+ "GEMINI_OUTPUT_DIR": "/path/to/save/files"
360
+ }
361
+ }
362
+ }
363
+ ```
364
+
365
+ ---
366
+
367
+ ## Troubleshooting
368
+
369
+ ### Rate Limits (429 Errors)
370
+
371
+ If you're hitting rate limits on the free tier:
372
+ - Set `GEMINI_MODEL=gemini-3-flash-preview` to use Flash for init (higher limits)
373
+ - Or upgrade to a paid plan
374
+
375
+ ### Connection Issues
376
+
377
+ 1. Verify your API key at [Google AI Studio](https://aistudio.google.com/apikey)
378
+ 2. Check server status: `claude mcp list`
379
+ 3. Try with verbose logging: `VERBOSE=true`
380
+
381
+ ### Image/Video Issues
382
+
383
+ - Ensure your API key has access to image/video generation
384
+ - Check output directory permissions
385
+ - Files save to `GEMINI_OUTPUT_DIR` (default: `./gemini-output`)
386
+ - For 4K images, generation takes longer
387
+
388
+ ---
389
+
390
+ ## Development
391
+
392
+ ```bash
393
+ git clone https://github.com/rlabs-inc/gemini-mcp.git
394
+ cd gemini-mcp
395
+ bun install
396
+ bun run build
397
+ bun run dev -- --verbose
398
+ ```
399
+
400
+ ### Scripts
401
+
402
+ | Command | Description |
403
+ |---------------------|-----------------------------|
404
+ | `bun run build` | Build for production |
405
+ | `bun run dev` | Development mode with watch |
406
+ | `bun run typecheck` | Type check without emitting |
407
+ | `bun run format` | Format with Prettier |
408
+ | `bun run lint` | Lint with ESLint |
409
+
410
+ ---
411
+
412
+ ## License
413
+
414
+ MIT License
415
+
416
+ ---
417
+
418
+ Made with Claude + Gemini working together
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Gemini Client - Provides access to Google's Generative AI models
3
+ *
4
+ * This module initializes and manages the connection to Google's Gemini API.
5
+ * Supports Gemini 3 Pro, Flash, image generation (Nano Banana Pro), and video generation (Veo).
6
+ *
7
+ * Key Gemini 3 Features:
8
+ * - Thinking Levels: Control reasoning depth (minimal, low, medium, high)
9
+ * - 4K Image Generation: Up to 4K resolution with Google Search grounding
10
+ * - Multi-turn Image Editing: Conversational image refinement
11
+ */
12
+ /**
13
+ * Thinking levels for Gemini 3 models
14
+ * - minimal: Fastest, minimal reasoning (Flash only)
15
+ * - low: Fast responses, basic reasoning
16
+ * - medium: Balanced reasoning (Flash only)
17
+ * - high: Deep reasoning, best for complex tasks (default)
18
+ */
19
+ export type ThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
20
+ /**
21
+ * Options for text generation
22
+ */
23
+ export interface GenerateOptions {
24
+ thinkingLevel?: ThinkingLevel;
25
+ }
26
+ /**
27
+ * All supported aspect ratios for Nano Banana Pro
28
+ */
29
+ export type AspectRatio = '1:1' | '2:3' | '3:2' | '3:4' | '4:3' | '4:5' | '5:4' | '9:16' | '16:9' | '21:9';
30
+ /**
31
+ * Image sizes for Nano Banana Pro (Gemini 3 Pro Image)
32
+ */
33
+ export type ImageSize = '1K' | '2K' | '4K';
34
+ /**
35
+ * Initialize the Gemini client with configured models
36
+ */
37
+ export declare function initGeminiClient(): Promise<void>;
38
+ /**
39
+ * Generate content using the Gemini Pro model
40
+ *
41
+ * @param prompt - The prompt to send to Gemini
42
+ * @param options - Generation options including thinking level
43
+ * @returns The generated text response
44
+ *
45
+ * Gemini 3 Pro supports thinking levels: low, high (default)
46
+ */
47
+ export declare function generateWithGeminiPro(prompt: string, options?: GenerateOptions): Promise<string>;
48
+ /**
49
+ * Generate content using the Gemini Flash model
50
+ *
51
+ * @param prompt - The prompt to send to Gemini
52
+ * @param options - Generation options including thinking level
53
+ * @returns The generated text response
54
+ *
55
+ * Gemini 3 Flash supports ALL thinking levels: minimal, low, medium, high (default)
56
+ */
57
+ export declare function generateWithGeminiFlash(prompt: string, options?: GenerateOptions): Promise<string>;
58
+ /**
59
+ * Generate content with a structured chat history
60
+ */
61
+ export declare function generateWithChat(messages: {
62
+ role: 'user' | 'model';
63
+ content: string;
64
+ }[], useProModel?: boolean): Promise<string>;
65
+ /**
66
+ * Image generation result
67
+ */
68
+ export interface ImageGenerationResult {
69
+ base64: string;
70
+ mimeType: string;
71
+ filePath: string;
72
+ description?: string;
73
+ }
74
+ /**
75
+ * Options for image generation with Nano Banana Pro
76
+ */
77
+ export interface ImageGenerationOptions {
78
+ aspectRatio?: AspectRatio;
79
+ imageSize?: ImageSize;
80
+ style?: string;
81
+ saveToFile?: boolean;
82
+ useGoogleSearch?: boolean;
83
+ }
84
+ /**
85
+ * Generate an image using Gemini's Nano Banana Pro model (gemini-3-pro-image-preview)
86
+ *
87
+ * Features:
88
+ * - 4K resolution support (1K, 2K, 4K)
89
+ * - 10 aspect ratios (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9)
90
+ * - Google Search grounding for real-world accuracy
91
+ * - High-fidelity text rendering
92
+ */
93
+ export declare function generateImage(prompt: string, options?: ImageGenerationOptions): Promise<ImageGenerationResult>;
94
+ /**
95
+ * Video generation operation result
96
+ */
97
+ export interface VideoGenerationResult {
98
+ operationName: string;
99
+ status: 'pending' | 'processing' | 'completed' | 'failed';
100
+ videoUri?: string;
101
+ filePath?: string;
102
+ error?: string;
103
+ }
104
+ /**
105
+ * Start video generation using Gemini's Veo model
106
+ * Returns an operation that can be polled for completion
107
+ */
108
+ export declare function startVideoGeneration(prompt: string, options?: {
109
+ aspectRatio?: '16:9' | '9:16';
110
+ durationSeconds?: number;
111
+ negativePrompt?: string;
112
+ }): Promise<VideoGenerationResult>;
113
+ /**
114
+ * Check the status of a video generation operation
115
+ */
116
+ export declare function checkVideoStatus(operationName: string): Promise<VideoGenerationResult>;
117
+ /**
118
+ * Get the output directory path
119
+ */
120
+ export declare function getOutputDir(): string;