@rlabs-inc/gemini-mcp 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/LICENCE +21 -0
  2. package/README.md +418 -0
  3. package/dist/gemini-client.d.ts +120 -0
  4. package/dist/gemini-client.js +399 -0
  5. package/dist/index.d.ts +8 -0
  6. package/dist/index.js +220 -0
  7. package/dist/tools/analyze.d.ts +10 -0
  8. package/dist/tools/analyze.js +96 -0
  9. package/dist/tools/brainstorm.d.ts +10 -0
  10. package/dist/tools/brainstorm.js +220 -0
  11. package/dist/tools/cache.d.ts +17 -0
  12. package/dist/tools/cache.js +286 -0
  13. package/dist/tools/code-exec.d.ts +17 -0
  14. package/dist/tools/code-exec.js +135 -0
  15. package/dist/tools/document.d.ts +16 -0
  16. package/dist/tools/document.js +333 -0
  17. package/dist/tools/image-edit.d.ts +16 -0
  18. package/dist/tools/image-edit.js +291 -0
  19. package/dist/tools/image-gen.d.ts +17 -0
  20. package/dist/tools/image-gen.js +148 -0
  21. package/dist/tools/query.d.ts +11 -0
  22. package/dist/tools/query.js +63 -0
  23. package/dist/tools/search.d.ts +15 -0
  24. package/dist/tools/search.js +128 -0
  25. package/dist/tools/speech.d.ts +17 -0
  26. package/dist/tools/speech.js +304 -0
  27. package/dist/tools/structured.d.ts +16 -0
  28. package/dist/tools/structured.js +247 -0
  29. package/dist/tools/summarize.d.ts +10 -0
  30. package/dist/tools/summarize.js +77 -0
  31. package/dist/tools/url-context.d.ts +17 -0
  32. package/dist/tools/url-context.js +226 -0
  33. package/dist/tools/video-gen.d.ts +11 -0
  34. package/dist/tools/video-gen.js +136 -0
  35. package/dist/tools/youtube.d.ts +16 -0
  36. package/dist/tools/youtube.js +218 -0
  37. package/dist/utils/logger.d.ts +33 -0
  38. package/dist/utils/logger.js +82 -0
  39. package/package.json +48 -0
@@ -0,0 +1,135 @@
1
+ /**
2
+ * Code Execution Tool - Let Gemini write and run Python code
3
+ *
4
+ * This tool enables Gemini to generate and execute Python code in a sandboxed environment.
5
+ * Useful for:
6
+ * - Data analysis with pandas
7
+ * - Math computations
8
+ * - Chart generation with matplotlib
9
+ * - File processing
10
+ *
11
+ * Supported libraries include: numpy, pandas, matplotlib, scipy, scikit-learn, tensorflow, and more.
12
+ */
13
+ import { z } from 'zod';
14
+ import { GoogleGenAI } from '@google/genai';
15
+ import { logger } from '../utils/logger.js';
16
+ import * as fs from 'fs';
17
+ import * as path from 'path';
18
+ // Get output directory for generated files (charts, etc.)
19
+ function getOutputDir() {
20
+ return (process.env.GEMINI_OUTPUT_DIR || path.join(process.cwd(), 'gemini-output'));
21
+ }
22
+ /**
23
+ * Register code execution tools with the MCP server
24
+ */
25
+ export function registerCodeExecTool(server) {
26
+ server.tool('gemini-run-code', {
27
+ prompt: z
28
+ .string()
29
+ .describe('What you want Gemini to compute or analyze. Gemini will write Python code and execute it.'),
30
+ data: z
31
+ .string()
32
+ .optional()
33
+ .describe('Optional CSV or text data to analyze. Will be passed to the code environment.'),
34
+ }, async ({ prompt, data }) => {
35
+ logger.info(`Code execution request: ${prompt.substring(0, 50)}...`);
36
+ try {
37
+ const apiKey = process.env.GEMINI_API_KEY;
38
+ if (!apiKey) {
39
+ throw new Error('GEMINI_API_KEY not set');
40
+ }
41
+ const genAI = new GoogleGenAI({ apiKey });
42
+ const model = process.env.GEMINI_PRO_MODEL || 'gemini-3-pro-preview';
43
+ // Build contents with optional data
44
+ const contents = [];
45
+ if (data) {
46
+ // Add data as inline content
47
+ contents.push({
48
+ inlineData: {
49
+ mimeType: 'text/csv',
50
+ data: Buffer.from(data).toString('base64'),
51
+ },
52
+ });
53
+ }
54
+ contents.push({ text: prompt });
55
+ // Execute with code execution tool enabled
56
+ const response = await genAI.models.generateContent({
57
+ model,
58
+ contents,
59
+ config: {
60
+ tools: [{ codeExecution: {} }],
61
+ },
62
+ });
63
+ // Process the response parts
64
+ const parts = response.candidates?.[0]?.content?.parts;
65
+ if (!parts) {
66
+ throw new Error('No response parts from code execution');
67
+ }
68
+ const resultContent = [];
69
+ let codeBlock = '';
70
+ let executionOutput = '';
71
+ let explanation = '';
72
+ for (const part of parts) {
73
+ if (part.text) {
74
+ explanation += part.text + '\n';
75
+ }
76
+ if (part.executableCode) {
77
+ codeBlock = part.executableCode.code;
78
+ }
79
+ if (part.codeExecutionResult) {
80
+ executionOutput = part.codeExecutionResult.output;
81
+ }
82
+ if (part.inlineData) {
83
+ // This is a generated image (chart/graph)
84
+ resultContent.push({
85
+ type: 'image',
86
+ data: part.inlineData.data,
87
+ mimeType: part.inlineData.mimeType,
88
+ });
89
+ // Save the image to disk
90
+ const outputDir = getOutputDir();
91
+ if (!fs.existsSync(outputDir)) {
92
+ fs.mkdirSync(outputDir, { recursive: true });
93
+ }
94
+ const timestamp = Date.now();
95
+ const ext = part.inlineData.mimeType.split('/')[1] || 'png';
96
+ const filename = `chart-${timestamp}.${ext}`;
97
+ const filePath = path.join(outputDir, filename);
98
+ const buffer = Buffer.from(part.inlineData.data, 'base64');
99
+ fs.writeFileSync(filePath, buffer);
100
+ logger.info(`Chart saved to: ${filePath}`);
101
+ }
102
+ }
103
+ // Build the text response
104
+ let textResponse = '';
105
+ if (explanation.trim()) {
106
+ textResponse += `**Explanation:**\n${explanation.trim()}\n\n`;
107
+ }
108
+ if (codeBlock) {
109
+ textResponse += `**Generated Code:**\n\`\`\`python\n${codeBlock}\n\`\`\`\n\n`;
110
+ }
111
+ if (executionOutput) {
112
+ textResponse += `**Execution Output:**\n\`\`\`\n${executionOutput}\n\`\`\`\n`;
113
+ }
114
+ resultContent.unshift({
115
+ type: 'text',
116
+ text: textResponse || 'Code execution completed.',
117
+ });
118
+ logger.info('Code execution completed successfully');
119
+ return { content: resultContent };
120
+ }
121
+ catch (error) {
122
+ const errorMessage = error instanceof Error ? error.message : String(error);
123
+ logger.error(`Error in code execution: ${errorMessage}`);
124
+ return {
125
+ content: [
126
+ {
127
+ type: 'text',
128
+ text: `Error executing code: ${errorMessage}`,
129
+ },
130
+ ],
131
+ isError: true,
132
+ };
133
+ }
134
+ });
135
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Document Analysis Tool - Analyze PDFs and documents
3
+ *
4
+ * Gemini can process documents including PDFs, enabling:
5
+ * - Document summarization
6
+ * - Information extraction
7
+ * - Q&A about document content
8
+ * - Table and chart understanding
9
+ *
10
+ * Uses the Files API for larger documents.
11
+ */
12
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
13
+ /**
14
+ * Register document analysis tools with the MCP server
15
+ */
16
+ export declare function registerDocumentTool(server: McpServer): void;
@@ -0,0 +1,333 @@
1
+ /**
2
+ * Document Analysis Tool - Analyze PDFs and documents
3
+ *
4
+ * Gemini can process documents including PDFs, enabling:
5
+ * - Document summarization
6
+ * - Information extraction
7
+ * - Q&A about document content
8
+ * - Table and chart understanding
9
+ *
10
+ * Uses the Files API for larger documents.
11
+ */
12
+ import { z } from 'zod';
13
+ import { GoogleGenAI } from '@google/genai';
14
+ import { logger } from '../utils/logger.js';
15
+ import * as fs from 'fs';
16
+ import * as path from 'path';
17
+ /**
18
+ * Get MIME type from file extension
19
+ */
20
+ function getMimeType(filePath) {
21
+ const ext = path.extname(filePath).toLowerCase();
22
+ const mimeTypes = {
23
+ '.pdf': 'application/pdf',
24
+ '.txt': 'text/plain',
25
+ '.csv': 'text/csv',
26
+ '.html': 'text/html',
27
+ '.htm': 'text/html',
28
+ '.xml': 'application/xml',
29
+ '.json': 'application/json',
30
+ '.md': 'text/markdown',
31
+ '.doc': 'application/msword',
32
+ '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
33
+ '.xls': 'application/vnd.ms-excel',
34
+ '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
35
+ '.ppt': 'application/vnd.ms-powerpoint',
36
+ '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
37
+ };
38
+ return mimeTypes[ext] || 'application/octet-stream';
39
+ }
40
+ /**
41
+ * Register document analysis tools with the MCP server
42
+ */
43
+ export function registerDocumentTool(server) {
44
+ server.tool('gemini-analyze-document', {
45
+ filePath: z
46
+ .string()
47
+ .describe('Path to the document file (PDF, TXT, CSV, DOCX, etc.)'),
48
+ question: z
49
+ .string()
50
+ .describe('Question about the document or task to perform (e.g., "Summarize this document", "Extract all dates mentioned")'),
51
+ mediaResolution: z
52
+ .enum(['low', 'medium', 'high'])
53
+ .default('medium')
54
+ .describe('Resolution for processing: low (faster, less detail), medium (balanced), high (more detail, more tokens)'),
55
+ }, async ({ filePath, question, mediaResolution }) => {
56
+ logger.info(`Document analysis: ${filePath}`);
57
+ try {
58
+ const apiKey = process.env.GEMINI_API_KEY;
59
+ if (!apiKey) {
60
+ throw new Error('GEMINI_API_KEY not set');
61
+ }
62
+ // Check file exists
63
+ if (!fs.existsSync(filePath)) {
64
+ throw new Error(`File not found: ${filePath}`);
65
+ }
66
+ const genAI = new GoogleGenAI({ apiKey });
67
+ const model = process.env.GEMINI_PRO_MODEL || 'gemini-3-pro-preview';
68
+ // Read file
69
+ const fileBuffer = fs.readFileSync(filePath);
70
+ const mimeType = getMimeType(filePath);
71
+ const fileSize = fileBuffer.length;
72
+ logger.debug(`File size: ${fileSize} bytes, MIME type: ${mimeType}`);
73
+ // Map resolution to API parameter
74
+ const resolutionMap = {
75
+ low: 'media_resolution_low',
76
+ medium: 'media_resolution_medium',
77
+ high: 'media_resolution_high',
78
+ };
79
+ // For small files (<20MB), use inline data
80
+ // For larger files, we would need to use the Files API (upload first)
81
+ if (fileSize > 20 * 1024 * 1024) {
82
+ // Upload using Files API
83
+ logger.info('Large file detected, uploading via Files API...');
84
+ const uploadedFile = await genAI.files.upload({
85
+ file: new Blob([fileBuffer], { type: mimeType }),
86
+ config: { mimeType },
87
+ });
88
+ const config = {};
89
+ if (mediaResolution !== 'medium') {
90
+ config.mediaResolution = resolutionMap[mediaResolution];
91
+ }
92
+ const response = await genAI.models.generateContent({
93
+ model,
94
+ contents: [
95
+ {
96
+ role: 'user',
97
+ parts: [
98
+ {
99
+ fileData: {
100
+ fileUri: uploadedFile.uri,
101
+ mimeType: uploadedFile.mimeType,
102
+ },
103
+ },
104
+ { text: question },
105
+ ],
106
+ },
107
+ ],
108
+ config: Object.keys(config).length > 0 ? config : undefined,
109
+ });
110
+ logger.info('Document analysis completed (via Files API)');
111
+ return {
112
+ content: [
113
+ {
114
+ type: 'text',
115
+ text: response.text || 'Unable to analyze document.',
116
+ },
117
+ ],
118
+ };
119
+ }
120
+ else {
121
+ // Use inline data for smaller files
122
+ const base64Data = fileBuffer.toString('base64');
123
+ const inlineConfig = {};
124
+ if (mediaResolution !== 'medium') {
125
+ inlineConfig.mediaResolution = resolutionMap[mediaResolution];
126
+ }
127
+ const response = await genAI.models.generateContent({
128
+ model,
129
+ contents: [
130
+ {
131
+ role: 'user',
132
+ parts: [
133
+ {
134
+ inlineData: {
135
+ mimeType,
136
+ data: base64Data,
137
+ },
138
+ },
139
+ { text: question },
140
+ ],
141
+ },
142
+ ],
143
+ config: Object.keys(inlineConfig).length > 0 ? inlineConfig : undefined,
144
+ });
145
+ logger.info('Document analysis completed');
146
+ return {
147
+ content: [
148
+ {
149
+ type: 'text',
150
+ text: response.text || 'Unable to analyze document.',
151
+ },
152
+ ],
153
+ };
154
+ }
155
+ }
156
+ catch (error) {
157
+ const errorMessage = error instanceof Error ? error.message : String(error);
158
+ logger.error(`Error in document analysis: ${errorMessage}`);
159
+ return {
160
+ content: [
161
+ {
162
+ type: 'text',
163
+ text: `Error analyzing document: ${errorMessage}`,
164
+ },
165
+ ],
166
+ isError: true,
167
+ };
168
+ }
169
+ });
170
+ // Convenience tool for PDF summarization
171
+ server.tool('gemini-summarize-pdf', {
172
+ filePath: z.string().describe('Path to the PDF file'),
173
+ style: z
174
+ .enum(['brief', 'detailed', 'outline', 'key-points'])
175
+ .default('brief')
176
+ .describe('Summary style'),
177
+ }, async ({ filePath, style }) => {
178
+ logger.info(`PDF summary: ${filePath}`);
179
+ try {
180
+ const apiKey = process.env.GEMINI_API_KEY;
181
+ if (!apiKey) {
182
+ throw new Error('GEMINI_API_KEY not set');
183
+ }
184
+ if (!fs.existsSync(filePath)) {
185
+ throw new Error(`File not found: ${filePath}`);
186
+ }
187
+ const genAI = new GoogleGenAI({ apiKey });
188
+ const model = process.env.GEMINI_FLASH_MODEL || 'gemini-3-flash-preview';
189
+ // Build prompt based on style
190
+ let prompt;
191
+ switch (style) {
192
+ case 'brief':
193
+ prompt = 'Provide a brief summary of this document in 2-3 paragraphs.';
194
+ break;
195
+ case 'detailed':
196
+ prompt =
197
+ 'Provide a comprehensive summary of this document, covering all main sections, key arguments, and conclusions.';
198
+ break;
199
+ case 'outline':
200
+ prompt =
201
+ 'Create an outline of this document showing its structure and main topics with sub-points.';
202
+ break;
203
+ case 'key-points':
204
+ prompt =
205
+ 'Extract the key points and takeaways from this document as a bullet-point list.';
206
+ break;
207
+ default:
208
+ prompt = 'Summarize this document.';
209
+ }
210
+ const fileBuffer = fs.readFileSync(filePath);
211
+ const mimeType = getMimeType(filePath);
212
+ const base64Data = fileBuffer.toString('base64');
213
+ const response = await genAI.models.generateContent({
214
+ model,
215
+ contents: [
216
+ {
217
+ role: 'user',
218
+ parts: [
219
+ {
220
+ inlineData: {
221
+ mimeType,
222
+ data: base64Data,
223
+ },
224
+ },
225
+ { text: prompt },
226
+ ],
227
+ },
228
+ ],
229
+ });
230
+ logger.info('PDF summary completed');
231
+ return {
232
+ content: [
233
+ {
234
+ type: 'text',
235
+ text: response.text || 'Unable to summarize document.',
236
+ },
237
+ ],
238
+ };
239
+ }
240
+ catch (error) {
241
+ const errorMessage = error instanceof Error ? error.message : String(error);
242
+ logger.error(`Error in PDF summary: ${errorMessage}`);
243
+ return {
244
+ content: [
245
+ {
246
+ type: 'text',
247
+ text: `Error summarizing PDF: ${errorMessage}`,
248
+ },
249
+ ],
250
+ isError: true,
251
+ };
252
+ }
253
+ });
254
+ // Tool for extracting tables from documents
255
+ server.tool('gemini-extract-tables', {
256
+ filePath: z.string().describe('Path to the document file'),
257
+ outputFormat: z
258
+ .enum(['markdown', 'csv', 'json'])
259
+ .default('markdown')
260
+ .describe('Output format for extracted tables'),
261
+ }, async ({ filePath, outputFormat }) => {
262
+ logger.info(`Table extraction: ${filePath}`);
263
+ try {
264
+ const apiKey = process.env.GEMINI_API_KEY;
265
+ if (!apiKey) {
266
+ throw new Error('GEMINI_API_KEY not set');
267
+ }
268
+ if (!fs.existsSync(filePath)) {
269
+ throw new Error(`File not found: ${filePath}`);
270
+ }
271
+ const genAI = new GoogleGenAI({ apiKey });
272
+ const model = process.env.GEMINI_PRO_MODEL || 'gemini-3-pro-preview';
273
+ // Build prompt for table extraction
274
+ let prompt;
275
+ switch (outputFormat) {
276
+ case 'csv':
277
+ prompt =
278
+ 'Extract all tables from this document and output them as CSV format. Separate multiple tables with blank lines and add a header comment for each table.';
279
+ break;
280
+ case 'json':
281
+ prompt =
282
+ 'Extract all tables from this document and output them as a JSON array. Each table should be an object with "title" and "rows" (array of objects with column headers as keys).';
283
+ break;
284
+ case 'markdown':
285
+ default:
286
+ prompt =
287
+ 'Extract all tables from this document and output them as markdown tables. Add a title for each table.';
288
+ }
289
+ const fileBuffer = fs.readFileSync(filePath);
290
+ const mimeType = getMimeType(filePath);
291
+ const base64Data = fileBuffer.toString('base64');
292
+ const response = await genAI.models.generateContent({
293
+ model,
294
+ contents: [
295
+ {
296
+ role: 'user',
297
+ parts: [
298
+ {
299
+ inlineData: {
300
+ mimeType,
301
+ data: base64Data,
302
+ },
303
+ },
304
+ { text: prompt },
305
+ ],
306
+ },
307
+ ],
308
+ });
309
+ logger.info('Table extraction completed');
310
+ return {
311
+ content: [
312
+ {
313
+ type: 'text',
314
+ text: response.text || 'No tables found in the document.',
315
+ },
316
+ ],
317
+ };
318
+ }
319
+ catch (error) {
320
+ const errorMessage = error instanceof Error ? error.message : String(error);
321
+ logger.error(`Error in table extraction: ${errorMessage}`);
322
+ return {
323
+ content: [
324
+ {
325
+ type: 'text',
326
+ text: `Error extracting tables: ${errorMessage}`,
327
+ },
328
+ ],
329
+ isError: true,
330
+ };
331
+ }
332
+ });
333
+ }
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Image Editing Tool - Multi-turn conversational image editing with Nano Banana Pro
3
+ *
4
+ * This tool enables iterative image refinement through conversation.
5
+ * Uses Gemini 3's chat sessions to maintain context and thought signatures.
6
+ *
7
+ * Workflow:
8
+ * 1. Start an edit session with an initial image generation
9
+ * 2. Continue refining with follow-up prompts ("make it warmer", "add more clouds")
10
+ * 3. Each response returns the updated image
11
+ */
12
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
13
+ /**
14
+ * Register image editing tools with the MCP server
15
+ */
16
+ export declare function registerImageEditTool(server: McpServer): void;