mcp-image 0.7.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +117 -78
  2. package/bin/install-skills.js +7 -6
  3. package/dist/api/geminiClient.d.ts +5 -3
  4. package/dist/api/geminiClient.d.ts.map +1 -1
  5. package/dist/api/geminiClient.js +42 -39
  6. package/dist/api/geminiClient.js.map +1 -1
  7. package/dist/api/geminiTextClient.d.ts +6 -8
  8. package/dist/api/geminiTextClient.d.ts.map +1 -1
  9. package/dist/api/geminiTextClient.js +19 -22
  10. package/dist/api/geminiTextClient.js.map +1 -1
  11. package/dist/business/fileManager.d.ts +2 -2
  12. package/dist/business/fileManager.d.ts.map +1 -1
  13. package/dist/business/fileManager.js +12 -48
  14. package/dist/business/fileManager.js.map +1 -1
  15. package/dist/business/inputValidator.d.ts +3 -3
  16. package/dist/business/inputValidator.d.ts.map +1 -1
  17. package/dist/business/inputValidator.js +41 -37
  18. package/dist/business/inputValidator.js.map +1 -1
  19. package/dist/business/responseBuilder.d.ts +3 -3
  20. package/dist/business/responseBuilder.d.ts.map +1 -1
  21. package/dist/business/responseBuilder.js +9 -45
  22. package/dist/business/responseBuilder.js.map +1 -1
  23. package/dist/business/structuredPromptGenerator.d.ts +4 -4
  24. package/dist/business/structuredPromptGenerator.d.ts.map +1 -1
  25. package/dist/business/structuredPromptGenerator.js +11 -16
  26. package/dist/business/structuredPromptGenerator.js.map +1 -1
  27. package/dist/index.d.ts +3 -3
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +7 -9
  30. package/dist/index.js.map +1 -1
  31. package/dist/server/errorHandler.d.ts +2 -2
  32. package/dist/server/errorHandler.d.ts.map +1 -1
  33. package/dist/server/errorHandler.js +10 -13
  34. package/dist/server/errorHandler.js.map +1 -1
  35. package/dist/server/mcpServer.d.ts +7 -2
  36. package/dist/server/mcpServer.d.ts.map +1 -1
  37. package/dist/server/mcpServer.js +60 -77
  38. package/dist/server/mcpServer.js.map +1 -1
  39. package/dist/server-main.js +6 -8
  40. package/dist/server-main.js.map +1 -1
  41. package/dist/types/mcp.d.ts +24 -2
  42. package/dist/types/mcp.d.ts.map +1 -1
  43. package/dist/types/mcp.js +17 -2
  44. package/dist/types/mcp.js.map +1 -1
  45. package/dist/types/result.js +2 -6
  46. package/dist/types/result.js.map +1 -1
  47. package/dist/utils/config.d.ts +4 -2
  48. package/dist/utils/config.d.ts.map +1 -1
  49. package/dist/utils/config.js +15 -13
  50. package/dist/utils/config.js.map +1 -1
  51. package/dist/utils/errors.d.ts.map +1 -1
  52. package/dist/utils/errors.js +9 -18
  53. package/dist/utils/errors.js.map +1 -1
  54. package/dist/utils/logger.js +2 -39
  55. package/dist/utils/logger.js.map +1 -1
  56. package/dist/utils/security.d.ts +2 -2
  57. package/dist/utils/security.d.ts.map +1 -1
  58. package/dist/utils/security.js +12 -49
  59. package/dist/utils/security.js.map +1 -1
  60. package/package.json +12 -16
package/README.md CHANGED
@@ -1,47 +1,62 @@
1
- # 🍌 MCP Image Generator
2
-
3
- > Powered by Gemini 3 Pro Image - Nano Banana Pro 🍌
4
-
5
- A powerful MCP (Model Context Protocol) server that enables AI assistants to generate and edit images using Google's Gemini 3 Pro Image (Nano Banana Pro 🍌). Seamlessly integrate advanced image generation capabilities into Codex, Cursor, Claude Code, and other MCP-compatible AI tools.
6
-
7
- ## ✨ Features
8
-
9
- - **AI-Powered Image Generation**: Create images from text prompts using Gemini 3 Pro Image (Nano Banana Pro)
10
- - **Intelligent Prompt Enhancement**: Automatically optimizes your prompts using Gemini 2.5 Flash for superior image quality
11
- - Adds photographic and artistic details
12
- - Enriches lighting, composition, and atmosphere descriptions
13
- - Preserves your intent while maximizing generation quality
14
- - **Image Editing**: Transform existing images with natural language instructions
15
- - Context-aware editing that preserves original style
16
- - Maintains visual consistency with source image
17
- - **High-Resolution Output**: Support for 2K and 4K image generation
18
- - Standard quality for fast generation
19
- - 2K resolution for enhanced detail
20
- - 4K resolution for professional-grade images with superior text rendering
21
- - **Flexible Aspect Ratios**: Multiple aspect ratio options (1:1, 16:9, 9:16, 21:9, and more)
22
- - **Advanced Options**:
23
- - Multi-image blending for composite scenes
24
- - Character consistency across generations
25
- - World knowledge integration for accurate context
26
- - **Multiple Output Formats**: PNG, JPEG, WebP support
27
- - **File Output**: Images are saved as files for easy access and integration
1
+ # MCP Image Generator 🍌
2
+
3
+ > AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by Nano Banana 2 and Nano Banana Pro (Google Gemini).
4
+
5
+ [![npm version](https://badge.fury.io/js/mcp-image.svg)](https://www.npmjs.com/package/mcp-image)
6
+ [![npm downloads](https://img.shields.io/npm/dm/mcp-image.svg)](https://www.npmjs.com/package/mcp-image)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ An MCP server that turns simple text prompts into high-quality images. Unlike a simple API wrapper, this server automatically enhances your prompt and configures sensible defaults for generation — you don't need to learn prompt engineering or tune settings. Just describe what you want.
10
+
11
+ ## How It Works
12
+
13
+ ```
14
+ You: "cat on a roof"
15
+
16
+ Your AI assistant infers context
17
+ (purpose, style, mood, resolution...)
18
+
19
+ MCP optimizes your prompt
20
+ (adds lighting, composition, atmosphere, artistic details)
21
+
22
+ Image generation with smart defaults
23
+ (grounding, consistency, resolution all configured automatically)
24
+
25
+ High-quality image, zero effort
26
+ ```
27
+
28
+ Your AI assistant interprets your intent — the style, purpose, and context behind your request. The MCP focuses on output quality by refining the prompt to meet a structured visual clarity standard and selecting appropriate generation settings. You just describe what you want.
28
29
 
29
- ## 🎨 Agent Skill: Image Generation Prompt Guide
30
+ The prompt optimizer uses a **Subject–Context–Style** framework (powered by Gemini 2.5 Flash) to fill in missing visual details — subject characteristics, environment, lighting, camera work — while preserving your original intent. It doesn't blindly add details: prompts that already meet the quality standard are left largely intact.
30
31
 
31
- This project also provides a standalone **Agent Skill** that teaches AI assistants to write better image generation prompts — no MCP server or API key required.
32
+ **Example what the optimizer does to a short prompt:**
32
33
 
33
- ### What it does
34
+ > **Input:** "cat on a roof"
35
+ >
36
+ > **After optimization:** "A sleek, midnight black cat, perched with poised elegance on the apex of a weathered, terracotta tile roof. Its emerald eyes, narrowed slightly, reflect the warm glow of a setting sun. Each individual tile is distinct, showing subtle variations in color and texture, with patches of moss clinging to the crevices. The cat's fur is sharply defined, catching the golden hour light, highlighting its sleek contours. In the background, the silhouettes of distant, old-world city buildings with ornate spires are softly blurred, bathed in a gradient of fiery orange, soft pink, and deep violet twilight. A gentle, ethereal mist begins to rise from the alleyways below, adding a touch of mystery. The composition is a medium shot, taken from a slightly low angle, emphasizing the cat's commanding presence against the vast sky. Photorealistic style, captured with a prime lens, wide aperture to create a beautiful bokeh, enhancing the depth of field."
37
+
38
+ ## Features
39
+
40
+ - **Built-in Prompt Optimization**: Your simple prompt is automatically enriched with photographic and artistic details — lighting, composition, atmosphere — using Gemini 2.5 Flash. No prompt engineering skills required.
41
+ - **Three Quality Tiers**: Choose between fast iteration, balanced quality, or maximum fidelity with Nano Banana 2 (Gemini 3.1 Flash Image) and Nano Banana Pro (Gemini 3 Pro Image). [See Quality Presets](#quality-presets).
42
+ - **Image Editing**: Transform existing images with natural language instructions (image-to-image) while preserving original style and visual consistency.
43
+ - **High-Resolution Output**: Up to 4K image generation for professional-grade output with superior text rendering and fine details.
44
+ - **Flexible Aspect Ratios**: From square (1:1) to ultra-wide (21:9) and ultra-tall (1:8) formats.
45
+ - **Character Consistency**: Maintain consistent character appearance across multiple generations — ideal for storyboards, product shots, and visual series.
46
+ - **Advanced Capabilities**:
47
+ - Google Search grounding for real-time factual accuracy
48
+ - World knowledge for photorealistic depictions of historical figures, landmarks, and factual scenarios
49
+ - Multi-image blending for composite scenes
50
+ - Purpose-aware generation (e.g., "cookbook cover" produces different results than "social media post")
51
+ - **Multiple Output Formats**: PNG, JPEG, WebP support.
34
52
 
35
- > **Note:** This skill does not generate images itself — it teaches your AI assistant to write better prompts. Your AI tool must already have built-in image generation capabilities (e.g., Cursor's image generation feature).
53
+ ## Agent Skill: Image Generation Prompt Guide
36
54
 
37
- A reference guide that AI assistants use to improve image generation prompts based on the **Subject-Context-Style** framework. Works with any image model (Gemini, DALL-E, Flux, Stable Diffusion, etc.).
55
+ This project also provides a standalone **[Agent Skill](https://agentskills.io)** (`SKILL.md`) that teaches AI assistants to write better image generation prompts no MCP server or API key required.
38
56
 
39
- Covers:
57
+ > **Note:** This skill does not generate images itself. It teaches your AI assistant to write better prompts for tools that already have built-in image generation (e.g., Cursor's native image generation).
40
58
 
41
- - **Prompt structure** How to build prompts around Subject, Context, and Style
42
- - **Visual details** — Lighting, textures, camera angles, atmosphere, text in images
43
- - **Advanced features** — Character consistency, multi-element composition, factual accuracy, purpose-specific output
44
- - **Image editing** — How to describe edits while keeping the original look intact
59
+ Based on the **Subject-Context-Style** framework, covering prompt structure, visual details (lighting, textures, camera angles), advanced techniques (character consistency, composition), and image editing. Works with any image model (Gemini, GPT Image, Flux, Stable Diffusion, Midjourney, etc.).
45
60
 
46
61
  ### Install
47
62
 
@@ -62,25 +77,25 @@ npx mcp-image skills install --path ~/.codex/skills
62
77
  npx mcp-image skills install --path ~/.claude/skills
63
78
  ```
64
79
 
65
- ### When to use the Skill vs the MCP server
80
+ ### When to Use the Skill vs the MCP Server
66
81
 
67
82
  | | MCP Server | Agent Skill |
68
83
  |---|---|---|
69
84
  | **Use when** | Your AI tool does not have built-in image generation | Your AI tool already generates images natively |
70
85
  | **Requires** | Gemini API key | Nothing |
71
- | **What it does** | Generates images via API | Teaches the AI to write better prompts |
72
- | **Works with** | MCP-compatible tools | Any tool supporting the [Agent Skills](https://agentskills.io) standard |
86
+ | **What it does** | Generates images via Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
87
+ | **Works with** | MCP-compatible tools (Cursor, Claude Code, Codex, etc.) | Any tool supporting the [Agent Skills](https://agentskills.io) open standard |
73
88
 
74
89
  ---
75
90
 
76
- ## 🔧 Prerequisites
91
+ ## Prerequisites
77
92
 
78
93
  - **Node.js** 20 or higher
79
94
  - **Gemini API Key** - Get yours at [Google AI Studio](https://aistudio.google.com/apikey)
80
- - **Codex**, **Cursor**, or **Claude Code** (file I/O capable AI tools)
95
+ - An MCP-compatible AI tool: **Cursor**, **Claude Code**, **Codex**, or others
81
96
  - Basic terminal/command line knowledge
82
97
 
83
- ## 🚀 Quick Start
98
+ ## Quick Start
84
99
 
85
100
  ### 1. Get Your Gemini API Key
86
101
 
@@ -140,34 +155,53 @@ claude mcp add mcp-image --scope user --env GEMINI_API_KEY=your-api-key --env IM
140
155
 
141
156
  ⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
142
157
 
143
- 📁 **Path Requirements**:
158
+ 📁 **Path Requirements**:
144
159
  - `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
145
160
  - Defaults to `./output` in the current working directory if not specified
146
161
  - Directory will be created automatically if it doesn't exist
147
162
 
148
- ### Optional: Skip Prompt Enhancement
163
+ ## Quality Presets
149
164
 
150
- Set `SKIP_PROMPT_ENHANCEMENT=true` to disable automatic prompt optimization and send your prompts directly to the image generator. Useful when you need full control over the exact prompt wording.
165
+ Choose the right balance of speed, quality, and cost:
166
+
167
+ | Preset | Model | Best for | Speed |
168
+ |--------|-------|----------|-------|
169
+ | `fast` (default) | Nano Banana 2 (Gemini 3.1 Flash Image) | Quick iterations, drafts, high-volume generation | ~30–40s |
170
+ | `balanced` | Nano Banana 2 + Thinking | Production images, good quality with reasonable speed | Medium |
171
+ | `quality` | Nano Banana Pro (Gemini 3 Pro Image) | Final deliverables, maximum fidelity, critical visuals | Slow |
172
+
173
+ Set the default via `IMAGE_QUALITY` environment variable:
174
+
175
+ ```
176
+ IMAGE_QUALITY=fast # (default) Fastest generation
177
+ IMAGE_QUALITY=balanced # Enhanced thinking for better quality
178
+ IMAGE_QUALITY=quality # Maximum quality output
179
+ ```
180
+
181
+ To override per-request, just tell your AI assistant (e.g., "generate in high quality" or "use balanced quality"). The assistant will pass the appropriate `quality` parameter automatically.
151
182
 
152
183
  **Codex:**
153
184
  ```toml
154
185
  [mcp_servers.mcp-image.env]
155
186
  GEMINI_API_KEY = "your_gemini_api_key_here"
156
- SKIP_PROMPT_ENHANCEMENT = "true"
157
- IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
187
+ IMAGE_QUALITY = "balanced"
158
188
  ```
159
189
 
160
190
  **Cursor:**
161
- Add `"SKIP_PROMPT_ENHANCEMENT": "true"` to the env section in your config.
191
+ Add `"IMAGE_QUALITY": "balanced"` to the env section in your config.
162
192
 
163
193
  **Claude Code:**
164
194
  ```bash
165
- claude mcp add mcp-image --env GEMINI_API_KEY=your-api-key --env SKIP_PROMPT_ENHANCEMENT=true --env IMAGE_OUTPUT_DIR=/absolute/path/to/images -- npx -y mcp-image
195
+ claude mcp add mcp-image --env GEMINI_API_KEY=your-api-key --env IMAGE_QUALITY=balanced --env IMAGE_OUTPUT_DIR=/absolute/path/to/images -- npx -y mcp-image
166
196
  ```
167
197
 
168
- ## 📖 Usage Examples
198
+ ### Skip Prompt Enhancement
199
+
200
+ Set `SKIP_PROMPT_ENHANCEMENT=true` to disable automatic prompt optimization and send your prompts directly to the image generator. Useful when you need full control over the exact prompt wording.
201
+
202
+ ## Usage Examples
169
203
 
170
- Once configured, your AI assistant can generate images using natural language:
204
+ Once configured, just describe what you want in natural language:
171
205
 
172
206
  ### Basic Image Generation
173
207
 
@@ -175,7 +209,7 @@ Once configured, your AI assistant can generate images using natural language:
175
209
  "Generate a serene mountain landscape at sunset with a lake reflection"
176
210
  ```
177
211
 
178
- The system automatically enhances this to include rich details about lighting, materials, composition, and atmosphere for optimal results.
212
+ Your prompt is automatically enhanced with rich details about lighting, materials, composition, and atmosphere.
179
213
 
180
214
  ### Image Editing
181
215
 
@@ -192,7 +226,7 @@ The system automatically enhances this to include rich details about lighting, m
192
226
  (with maintainCharacterConsistency: true)
193
227
  ```
194
228
 
195
- **High-Resolution 4K Generation:**
229
+ **High-Resolution 4K with Text Rendering:**
196
230
  ```
197
231
  "Generate a professional product photo of a smartphone with clear text on the screen"
198
232
  (with imageSize: "4K")
@@ -204,28 +238,29 @@ The system automatically enhances this to include rich details about lighting, m
204
238
  (with aspectRatio: "21:9")
205
239
  ```
206
240
 
207
- ## 🔧 API Reference
241
+ ## API Reference
208
242
 
209
243
  ### `generate_image` Tool
210
244
 
211
- The MCP server exposes a single tool for all image operations. Internally, it uses a two-stage process:
212
- 1. **Prompt Optimization**: Gemini 2.5 Flash analyzes and enriches your prompt
213
- 2. **Image Generation**: Gemini 3 Pro Image creates the final image
245
+ The server uses a two-stage process with separate models for each stage:
246
+ 1. **Prompt Optimization** (Gemini 2.5 Flash): Refines your prompt using the Subject–Context–Style framework. Skippable via `SKIP_PROMPT_ENHANCEMENT`.
247
+ 2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
214
248
 
215
249
  #### Parameters
216
250
 
217
251
  | Parameter | Type | Required | Description |
218
252
  |-----------|------|----------|-------------|
219
253
  | `prompt` | string | ✅ | Text description or editing instruction |
220
- | `inputImagePath` | string | - | Absolute path to input image for editing |
254
+ | `quality` | string | - | Quality preset: `fast` (default), `balanced`, `quality`. Overrides `IMAGE_QUALITY` env var for this request |
255
+ | `inputImagePath` | string | - | Absolute path to input image for image-to-image editing |
221
256
  | `fileName` | string | - | Custom filename for output (auto-generated if not specified) |
222
- | `aspectRatio` | string | - | Aspect ratio for the generated image. Supported values: `1:1` (square, default), `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9` |
223
- | `imageSize` | string | - | Image resolution for high-quality output. Specify `2K` or `4K` for higher resolution images with better text rendering and fine details. Leave unspecified for standard quality. Supported values: `2K`, `4K` |
257
+ | `aspectRatio` | string | - | `1:1` (default), `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:4`, `1:8`, `4:1`, `8:1` |
258
+ | `imageSize` | string | - | `1K`, `2K`, `4K`. Leave unspecified for standard quality |
224
259
  | `blendImages` | boolean | - | Enable multi-image blending for combining multiple visual elements naturally |
225
260
  | `maintainCharacterConsistency` | boolean | - | Maintain character appearance consistency across different poses and scenes |
226
- | `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (recommended for historical figures, landmarks, or factual scenarios) |
227
- | `useGoogleSearch` | boolean | - | Enable Google Search grounding to access real-time web information for factually accurate image generation. Use when prompt requires current or time-sensitive data that may have changed since the model's knowledge cutoff. Leave disabled for creative, fictional, historical, or timeless content. |
228
- | `purpose` | string | - | Intended use for the image (e.g., "cookbook cover", "social media post", "presentation slide"). Helps tailor visual style, quality level, and details to match the purpose. |
261
+ | `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (historical figures, landmarks, factual scenarios) |
262
+ | `useGoogleSearch` | boolean | - | Enable Google Search grounding for real-time factual accuracy |
263
+ | `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
229
264
 
230
265
  #### Response
231
266
 
@@ -238,14 +273,14 @@ The MCP server exposes a single tool for all image operations. Internally, it us
238
273
  "mimeType": "image/png"
239
274
  },
240
275
  "metadata": {
241
- "model": "gemini-3-pro-image-preview",
276
+ "model": "gemini-3.1-flash-image-preview",
242
277
  "processingTime": 5000,
243
- "timestamp": "2024-01-01T12:00:00.000Z"
278
+ "timestamp": "2026-01-01T12:00:00.000Z"
244
279
  }
245
280
  }
246
281
  ```
247
282
 
248
- ## 🛠️ Troubleshooting
283
+ ## Troubleshooting
249
284
 
250
285
  ### Common Issues
251
286
 
@@ -265,27 +300,31 @@ The MCP server exposes a single tool for all image operations. Internally, it us
265
300
 
266
301
  ### Performance Tips
267
302
 
268
- - Image generation: 30-60 seconds typical (includes prompt optimization)
269
- - Image editing: 15-45 seconds typical (includes context analysis)
270
- - High-resolution generation (2K/4K): May take longer but provides superior quality
271
- - Simple prompts work great - the AI automatically adds professional details
303
+ - `fast` preset: ~30–40 seconds typical (includes prompt optimization)
304
+ - `balanced` preset: Slightly longer due to enhanced thinking
305
+ - `quality` preset: Slower but highest fidelity output
306
+ - High-resolution (2K/4K): Additional processing time for superior detail
307
+ - Simple prompts work great — the optimizer automatically adds professional details
272
308
  - Complex prompts are preserved and further enhanced
273
- - Consider enabling `useWorldKnowledge` for historical or factual subjects
309
+ - Consider `useWorldKnowledge` for historical or factual subjects
274
310
  - Use `imageSize: "4K"` when text clarity and fine details are critical
275
311
 
276
- ## 💰 Usage Notes
312
+ ## Usage Notes
277
313
 
278
- - This MCP server uses the paid Gemini API for both prompt optimization and image generation
279
- - Gemini 2.5 Flash for intelligent prompt enhancement (minimal token usage)
280
- - Gemini 3 Pro Image for actual image generation
314
+ - This MCP server uses the paid Gemini API:
315
+ - **Prompt optimization**: Gemini 2.5 Flash (minimal token usage)
316
+ - **Image generation**: Model depends on quality preset
317
+ - `fast` / `balanced`: Nano Banana 2 — Gemini 3.1 Flash Image (lower cost)
318
+ - `quality`: Nano Banana Pro — Gemini 3 Pro Image (higher cost)
319
+ - `balanced` uses additional thinking tokens (slightly higher cost than `fast`)
281
320
  - Check current pricing and rate limits at [Google AI Studio](https://aistudio.google.com/)
282
321
  - Monitor your API usage to avoid unexpected charges
283
322
  - The prompt optimization step adds minimal cost while significantly improving output quality
284
323
 
285
- ## 📄 License
324
+ ## License
286
325
 
287
326
  MIT License - see [LICENSE](LICENSE) for details.
288
327
 
289
328
  ---
290
329
 
291
- **Need help?** [Open an issue](https://github.com/shinpr/mcp-image/issues) or check the [troubleshooting section](#-troubleshooting) above.
330
+ **Need help?** [Open an issue](https://github.com/shinpr/mcp-image/issues) or check the [troubleshooting section](#troubleshooting) above.
@@ -1,8 +1,11 @@
1
1
  #!/usr/bin/env node
2
- 'use strict'
3
2
 
4
- const { cpSync, existsSync, mkdirSync } = require('node:fs')
5
- const { dirname, resolve } = require('node:path')
3
+ import { cpSync, existsSync, mkdirSync } from 'node:fs'
4
+ import { dirname, resolve } from 'node:path'
5
+ import { fileURLToPath } from 'node:url'
6
+
7
+ const __filename = fileURLToPath(import.meta.url)
8
+ const __dirname = dirname(__filename)
6
9
 
7
10
  const SKILLS_SOURCE = resolve(__dirname, '..', 'skills', 'image-generation')
8
11
  const SKILL_DIR_NAME = 'image-generation'
@@ -77,7 +80,7 @@ function install(targetPath) {
77
80
  console.log(`Installed skills to: ${targetPath}`)
78
81
  }
79
82
 
80
- function run(args) {
83
+ export function run(args) {
81
84
  if (args.length === 0) {
82
85
  printHelp()
83
86
  process.exit(0)
@@ -110,5 +113,3 @@ function run(args) {
110
113
  console.log('Installed files:')
111
114
  console.log(' - image-generation/SKILL.md')
112
115
  }
113
-
114
- module.exports = { run }
@@ -3,9 +3,10 @@
3
3
  * Integrates with Google's Gemini AI API using the official SDK
4
4
  * Supports automatic URL Context processing and feature parameters
5
5
  */
6
- import type { Result } from '../types/result';
7
- import type { Config } from '../utils/config';
8
- import { GeminiAPIError, NetworkError } from '../utils/errors';
6
+ import type { ImageQuality } from '../types/mcp.js';
7
+ import type { Result } from '../types/result.js';
8
+ import type { Config } from '../utils/config.js';
9
+ import { GeminiAPIError, NetworkError } from '../utils/errors.js';
9
10
  /**
10
11
  * Metadata for generated images
11
12
  */
@@ -27,6 +28,7 @@ export interface GeminiApiParams {
27
28
  aspectRatio?: string;
28
29
  imageSize?: string;
29
30
  useGoogleSearch?: boolean;
31
+ quality?: ImageQuality;
30
32
  }
31
33
  /**
32
34
  * Result of image generation
@@ -1 +1 @@
1
- {"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAA;AAE7C,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAA;AAC7C,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAkH9D;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,IAAI,CAAA;IACf,kBAAkB,EAAE,OAAO,CAAA;IAE3B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,wBAAwB,CAAA;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AA2UD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAevF"}
1
+ {"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAEnD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAChD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAsHjE;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,IAAI,CAAA;IACf,kBAAkB,EAAE,OAAO,CAAA;IAE3B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,OAAO,CAAC,EAAE,YAAY,CAAA;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,wBAAwB,CAAA;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AAoVD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAevF"}
@@ -1,14 +1,12 @@
1
- "use strict";
2
1
  /**
3
2
  * Gemini API client for image generation
4
3
  * Integrates with Google's Gemini AI API using the official SDK
5
4
  * Supports automatic URL Context processing and feature parameters
6
5
  */
7
- Object.defineProperty(exports, "__esModule", { value: true });
8
- exports.createGeminiClient = createGeminiClient;
9
- const genai_1 = require("@google/genai");
10
- const result_1 = require("../types/result");
11
- const errors_1 = require("../utils/errors");
6
+ import { GoogleGenAI } from '@google/genai';
7
+ import { GEMINI_MODELS } from '../types/mcp.js';
8
+ import { Err, Ok } from '../types/result.js';
9
+ import { GeminiAPIError, NetworkError } from '../utils/errors.js';
12
10
  /**
13
11
  * Safely analyze response structure for debugging (removes sensitive data)
14
12
  */
@@ -70,9 +68,9 @@ function isGeminiResponse(obj) {
70
68
  * Implementation of Gemini API client
71
69
  */
72
70
  class GeminiClientImpl {
73
- constructor(genai) {
71
+ constructor(genai, defaultQuality = 'fast') {
74
72
  this.genai = genai;
75
- this.modelName = 'gemini-3-pro-image-preview';
73
+ this.defaultQuality = defaultQuality;
76
74
  }
77
75
  async generateImage(params) {
78
76
  try {
@@ -105,6 +103,10 @@ class GeminiClientImpl {
105
103
  ],
106
104
  });
107
105
  }
106
+ // Determine effective quality
107
+ const effectiveQuality = params.quality ?? this.defaultQuality;
108
+ // Select model based on quality preset
109
+ const modelName = effectiveQuality === 'quality' ? GEMINI_MODELS.PRO : GEMINI_MODELS.FLASH;
108
110
  // Construct config object for generateContent
109
111
  const imageConfig = {};
110
112
  if (params.aspectRatio) {
@@ -113,19 +115,20 @@ class GeminiClientImpl {
113
115
  if (params.imageSize) {
114
116
  imageConfig['imageSize'] = params.imageSize;
115
117
  }
116
- const config = Object.keys(imageConfig).length > 0
117
- ? {
118
- imageConfig,
119
- responseModalities: ['IMAGE'],
120
- }
121
- : {
122
- responseModalities: ['IMAGE'],
123
- };
124
- // Construct tools array for Google Search grounding
125
- const tools = params.useGoogleSearch ? [{ googleSearch: {} }] : undefined;
126
- // Generate content using Gemini API (@google/genai v1.17.0+)
118
+ // Build config with optional thinkingConfig
119
+ const thinkingConfig = effectiveQuality === 'balanced' ? { thinkingConfig: { thinkingLevel: 'high' } } : {};
120
+ const config = {
121
+ ...(Object.keys(imageConfig).length > 0 && { imageConfig }),
122
+ responseModalities: ['IMAGE'],
123
+ ...thinkingConfig,
124
+ };
125
+ // Construct tools array for Google Search grounding (web + image search)
126
+ const tools = params.useGoogleSearch
127
+ ? [{ googleSearch: { searchTypes: { webSearch: {}, imageSearch: {} } } }]
128
+ : undefined;
129
+ // Generate content using Gemini API
127
130
  const rawResponse = await this.genai.models.generateContent({
128
- model: this.modelName,
131
+ model: modelName,
129
132
  contents: requestContent,
130
133
  config,
131
134
  ...(tools && { tools }),
@@ -137,14 +140,14 @@ class GeminiClientImpl {
137
140
  const asRecord = rawResponse;
138
141
  if (asRecord['error']) {
139
142
  const error = asRecord['error'];
140
- return (0, result_1.Err)(new errors_1.GeminiAPIError(`Gemini API Error: ${error['message'] || 'Unknown error'}`, {
143
+ return Err(new GeminiAPIError(`Gemini API Error: ${error['message'] || 'Unknown error'}`, {
141
144
  code: error['code'],
142
145
  status: error['status'],
143
146
  details: error['details'] || responseStructure,
144
147
  stage: 'api_error',
145
148
  }));
146
149
  }
147
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Invalid response structure from Gemini API', {
150
+ return Err(new GeminiAPIError('Invalid response structure from Gemini API', {
148
151
  message: 'The API returned an unexpected response format',
149
152
  responseStructure: responseStructure,
150
153
  stage: 'response_validation',
@@ -160,7 +163,7 @@ class GeminiClientImpl {
160
163
  if (responseAsRecord['promptFeedback']) {
161
164
  const promptFeedback = responseAsRecord['promptFeedback'];
162
165
  if (promptFeedback['blockReason'] === 'SAFETY') {
163
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Image generation blocked for safety reasons', {
166
+ return Err(new GeminiAPIError('Image generation blocked for safety reasons', {
164
167
  stage: 'prompt_analysis',
165
168
  blockReason: promptFeedback['blockReason'],
166
169
  suggestion: 'Rephrase your prompt to avoid potentially sensitive content',
@@ -168,7 +171,7 @@ class GeminiClientImpl {
168
171
  }
169
172
  if (promptFeedback['blockReason'] === 'OTHER' ||
170
173
  promptFeedback['blockReason'] === 'PROHIBITED_CONTENT') {
171
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Image generation blocked due to prohibited content', {
174
+ return Err(new GeminiAPIError('Image generation blocked due to prohibited content', {
172
175
  stage: 'prompt_analysis',
173
176
  blockReason: promptFeedback['blockReason'],
174
177
  suggestion: 'Remove any prohibited content from your prompt and try again',
@@ -177,7 +180,7 @@ class GeminiClientImpl {
177
180
  }
178
181
  // Check for candidates
179
182
  if (!responseData.candidates || responseData.candidates.length === 0) {
180
- return (0, result_1.Err)(new errors_1.GeminiAPIError('No image generated: Content may have been filtered', {
183
+ return Err(new GeminiAPIError('No image generated: Content may have been filtered', {
181
184
  stage: 'generation',
182
185
  candidatesCount: 0,
183
186
  suggestion: 'Try rephrasing your prompt to avoid potentially sensitive content',
@@ -185,7 +188,7 @@ class GeminiClientImpl {
185
188
  }
186
189
  const candidate = responseData.candidates[0];
187
190
  if (!candidate || !candidate.content || !candidate.content.parts) {
188
- return (0, result_1.Err)(new errors_1.GeminiAPIError('No valid content in response', {
191
+ return Err(new GeminiAPIError('No valid content in response', {
189
192
  stage: 'candidate_extraction',
190
193
  suggestion: 'The API response was incomplete. Please try again',
191
194
  }));
@@ -195,7 +198,7 @@ class GeminiClientImpl {
195
198
  if (candidate.finishReason) {
196
199
  const finishReason = candidate.finishReason;
197
200
  if (finishReason === 'IMAGE_SAFETY') {
198
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Image generation stopped for safety reasons', {
201
+ return Err(new GeminiAPIError('Image generation stopped for safety reasons', {
199
202
  finishReason,
200
203
  stage: 'generation_stopped',
201
204
  suggestion: 'Modify your prompt to avoid potentially sensitive content',
@@ -214,7 +217,7 @@ class GeminiClientImpl {
214
217
  }));
215
218
  }
216
219
  if (finishReason === 'MAX_TOKENS') {
217
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Maximum token limit reached during generation', {
220
+ return Err(new GeminiAPIError('Maximum token limit reached during generation', {
218
221
  finishReason,
219
222
  stage: 'generation_stopped',
220
223
  suggestion: 'Try using a shorter or simpler prompt',
@@ -222,7 +225,7 @@ class GeminiClientImpl {
222
225
  }
223
226
  }
224
227
  if (parts.length === 0) {
225
- return (0, result_1.Err)(new errors_1.GeminiAPIError('No content parts in response', {
228
+ return Err(new GeminiAPIError('No content parts in response', {
226
229
  stage: 'content_extraction',
227
230
  suggestion: 'The generation was incomplete. Please try again',
228
231
  }));
@@ -233,7 +236,7 @@ class GeminiClientImpl {
233
236
  if (!imagePart?.inlineData) {
234
237
  // If there's text, it's likely an error message from Gemini
235
238
  const errorMessage = textPart?.text || 'Image generation failed';
236
- return (0, result_1.Err)(new errors_1.GeminiAPIError('Image generation failed due to content filtering', {
239
+ return Err(new GeminiAPIError('Image generation failed due to content filtering', {
237
240
  reason: errorMessage,
238
241
  stage: 'image_extraction',
239
242
  suggestion: 'The prompt was blocked by safety filters. Try rephrasing your prompt to avoid potentially sensitive content.',
@@ -244,7 +247,7 @@ class GeminiClientImpl {
244
247
  const mimeType = imagePart.inlineData.mimeType || 'image/png';
245
248
  // Create metadata
246
249
  const metadata = {
247
- model: this.modelName,
250
+ model: modelName,
248
251
  prompt: params.prompt,
249
252
  mimeType,
250
253
  timestamp: new Date(),
@@ -252,7 +255,7 @@ class GeminiClientImpl {
252
255
  ...(responseData.modelVersion && { modelVersion: responseData.modelVersion }),
253
256
  ...(responseData.responseId && { responseId: responseData.responseId }),
254
257
  };
255
- return (0, result_1.Ok)({
258
+ return Ok({
256
259
  imageData: imageBuffer,
257
260
  metadata,
258
261
  });
@@ -265,14 +268,14 @@ class GeminiClientImpl {
265
268
  const errorMessage = error instanceof Error ? error.message : 'Unknown error';
266
269
  // Check if it's a network error
267
270
  if (this.isNetworkError(error)) {
268
- return (0, result_1.Err)(new errors_1.NetworkError(`Network error during image generation: ${errorMessage}`, 'Check your internet connection and try again', error instanceof Error ? error : undefined));
271
+ return Err(new NetworkError(`Network error during image generation: ${errorMessage}`, 'Check your internet connection and try again', error instanceof Error ? error : undefined));
269
272
  }
270
273
  // Check if it's an API-specific error
271
274
  if (this.isAPIError(error)) {
272
- return (0, result_1.Err)(new errors_1.GeminiAPIError(`Failed to generate image: ${errorMessage}`, this.getAPIErrorSuggestion(errorMessage), this.extractStatusCode(error)));
275
+ return Err(new GeminiAPIError(`Failed to generate image: ${errorMessage}`, this.getAPIErrorSuggestion(errorMessage), this.extractStatusCode(error)));
273
276
  }
274
277
  // Generic API error
275
- return (0, result_1.Err)(new errors_1.GeminiAPIError(`Failed to generate image with prompt "${prompt}": ${errorMessage}`, 'Check your API key, quota, and prompt validity. Try again with a different prompt'));
278
+ return Err(new GeminiAPIError(`Failed to generate image with prompt "${prompt}": ${errorMessage}`, 'Check your API key, quota, and prompt validity. Try again with a different prompt'));
276
279
  }
277
280
  isNetworkError(error) {
278
281
  if (error instanceof Error) {
@@ -313,16 +316,16 @@ class GeminiClientImpl {
313
316
  * @param config Configuration containing API key and other settings
314
317
  * @returns Result containing the client or an error
315
318
  */
316
- function createGeminiClient(config) {
319
+ export function createGeminiClient(config) {
317
320
  try {
318
- const genai = new genai_1.GoogleGenAI({
321
+ const genai = new GoogleGenAI({
319
322
  apiKey: config.geminiApiKey,
320
323
  });
321
- return (0, result_1.Ok)(new GeminiClientImpl(genai));
324
+ return Ok(new GeminiClientImpl(genai, config.imageQuality));
322
325
  }
323
326
  catch (error) {
324
327
  const errorMessage = error instanceof Error ? error.message : 'Unknown error';
325
- return (0, result_1.Err)(new errors_1.GeminiAPIError(`Failed to initialize Gemini client: ${errorMessage}`, 'Verify your GEMINI_API_KEY is valid and the @google/genai package is properly installed'));
328
+ return Err(new GeminiAPIError(`Failed to initialize Gemini client: ${errorMessage}`, 'Verify your GEMINI_API_KEY is valid and the @google/genai package is properly installed'));
326
329
  }
327
330
  }
328
331
  //# sourceMappingURL=geminiClient.js.map