mcp-image 0.7.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +117 -78
- package/bin/install-skills.js +7 -6
- package/dist/api/geminiClient.d.ts +5 -3
- package/dist/api/geminiClient.d.ts.map +1 -1
- package/dist/api/geminiClient.js +42 -39
- package/dist/api/geminiClient.js.map +1 -1
- package/dist/api/geminiTextClient.d.ts +6 -8
- package/dist/api/geminiTextClient.d.ts.map +1 -1
- package/dist/api/geminiTextClient.js +19 -22
- package/dist/api/geminiTextClient.js.map +1 -1
- package/dist/business/fileManager.d.ts +2 -2
- package/dist/business/fileManager.d.ts.map +1 -1
- package/dist/business/fileManager.js +12 -48
- package/dist/business/fileManager.js.map +1 -1
- package/dist/business/inputValidator.d.ts +3 -3
- package/dist/business/inputValidator.d.ts.map +1 -1
- package/dist/business/inputValidator.js +41 -37
- package/dist/business/inputValidator.js.map +1 -1
- package/dist/business/responseBuilder.d.ts +3 -3
- package/dist/business/responseBuilder.d.ts.map +1 -1
- package/dist/business/responseBuilder.js +9 -45
- package/dist/business/responseBuilder.js.map +1 -1
- package/dist/business/structuredPromptGenerator.d.ts +4 -4
- package/dist/business/structuredPromptGenerator.d.ts.map +1 -1
- package/dist/business/structuredPromptGenerator.js +11 -16
- package/dist/business/structuredPromptGenerator.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -9
- package/dist/index.js.map +1 -1
- package/dist/server/errorHandler.d.ts +2 -2
- package/dist/server/errorHandler.d.ts.map +1 -1
- package/dist/server/errorHandler.js +10 -13
- package/dist/server/errorHandler.js.map +1 -1
- package/dist/server/mcpServer.d.ts +7 -2
- package/dist/server/mcpServer.d.ts.map +1 -1
- package/dist/server/mcpServer.js +60 -77
- package/dist/server/mcpServer.js.map +1 -1
- package/dist/server-main.js +6 -8
- package/dist/server-main.js.map +1 -1
- package/dist/types/mcp.d.ts +24 -2
- package/dist/types/mcp.d.ts.map +1 -1
- package/dist/types/mcp.js +17 -2
- package/dist/types/mcp.js.map +1 -1
- package/dist/types/result.js +2 -6
- package/dist/types/result.js.map +1 -1
- package/dist/utils/config.d.ts +4 -2
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +15 -13
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/errors.d.ts.map +1 -1
- package/dist/utils/errors.js +9 -18
- package/dist/utils/errors.js.map +1 -1
- package/dist/utils/logger.js +2 -39
- package/dist/utils/logger.js.map +1 -1
- package/dist/utils/security.d.ts +2 -2
- package/dist/utils/security.d.ts.map +1 -1
- package/dist/utils/security.js +12 -49
- package/dist/utils/security.js.map +1 -1
- package/package.json +12 -16
package/README.md
CHANGED
|
@@ -1,47 +1,62 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
>
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
-
|
|
26
|
-
|
|
27
|
-
|
|
1
|
+
# MCP Image Generator 🍌
|
|
2
|
+
|
|
3
|
+
> AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by Nano Banana 2 and Nano Banana Pro (Google Gemini).
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/mcp-image)
|
|
6
|
+
[](https://www.npmjs.com/package/mcp-image)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
An MCP server that turns simple text prompts into high-quality images. Unlike a simple API wrapper, this server automatically enhances your prompt and configures sensible defaults for generation — you don't need to learn prompt engineering or tune settings. Just describe what you want.
|
|
10
|
+
|
|
11
|
+
## How It Works
|
|
12
|
+
|
|
13
|
+
```
|
|
14
|
+
You: "cat on a roof"
|
|
15
|
+
↓
|
|
16
|
+
Your AI assistant infers context
|
|
17
|
+
(purpose, style, mood, resolution...)
|
|
18
|
+
↓
|
|
19
|
+
MCP optimizes your prompt
|
|
20
|
+
(adds lighting, composition, atmosphere, artistic details)
|
|
21
|
+
↓
|
|
22
|
+
Image generation with smart defaults
|
|
23
|
+
(grounding, consistency, resolution — all configured automatically)
|
|
24
|
+
↓
|
|
25
|
+
High-quality image, zero effort
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Your AI assistant interprets your intent — the style, purpose, and context behind your request. The MCP focuses on output quality by refining the prompt to meet a structured visual clarity standard and selecting appropriate generation settings. You just describe what you want.
|
|
28
29
|
|
|
29
|
-
|
|
30
|
+
The prompt optimizer uses a **Subject–Context–Style** framework (powered by Gemini 2.5 Flash) to fill in missing visual details — subject characteristics, environment, lighting, camera work — while preserving your original intent. It doesn't blindly add details: prompts that already meet the quality standard are left largely intact.
|
|
30
31
|
|
|
31
|
-
|
|
32
|
+
**Example — what the optimizer does to a short prompt:**
|
|
32
33
|
|
|
33
|
-
|
|
34
|
+
> **Input:** "cat on a roof"
|
|
35
|
+
>
|
|
36
|
+
> **After optimization:** "A sleek, midnight black cat, perched with poised elegance on the apex of a weathered, terracotta tile roof. Its emerald eyes, narrowed slightly, reflect the warm glow of a setting sun. Each individual tile is distinct, showing subtle variations in color and texture, with patches of moss clinging to the crevices. The cat's fur is sharply defined, catching the golden hour light, highlighting its sleek contours. In the background, the silhouettes of distant, old-world city buildings with ornate spires are softly blurred, bathed in a gradient of fiery orange, soft pink, and deep violet twilight. A gentle, ethereal mist begins to rise from the alleyways below, adding a touch of mystery. The composition is a medium shot, taken from a slightly low angle, emphasizing the cat's commanding presence against the vast sky. Photorealistic style, captured with a prime lens, wide aperture to create a beautiful bokeh, enhancing the depth of field."
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- **Built-in Prompt Optimization**: Your simple prompt is automatically enriched with photographic and artistic details — lighting, composition, atmosphere — using Gemini 2.5 Flash. No prompt engineering skills required.
|
|
41
|
+
- **Three Quality Tiers**: Choose between fast iteration, balanced quality, or maximum fidelity with Nano Banana 2 (Gemini 3.1 Flash Image) and Nano Banana Pro (Gemini 3 Pro Image). [See Quality Presets](#quality-presets).
|
|
42
|
+
- **Image Editing**: Transform existing images with natural language instructions (image-to-image) while preserving original style and visual consistency.
|
|
43
|
+
- **High-Resolution Output**: Up to 4K image generation for professional-grade output with superior text rendering and fine details.
|
|
44
|
+
- **Flexible Aspect Ratios**: From square (1:1) to ultra-wide (21:9) and ultra-tall (1:8) formats.
|
|
45
|
+
- **Character Consistency**: Maintain consistent character appearance across multiple generations — ideal for storyboards, product shots, and visual series.
|
|
46
|
+
- **Advanced Capabilities**:
|
|
47
|
+
- Google Search grounding for real-time factual accuracy
|
|
48
|
+
- World knowledge for photorealistic depictions of historical figures, landmarks, and factual scenarios
|
|
49
|
+
- Multi-image blending for composite scenes
|
|
50
|
+
- Purpose-aware generation (e.g., "cookbook cover" produces different results than "social media post")
|
|
51
|
+
- **Multiple Output Formats**: PNG, JPEG, WebP support.
|
|
34
52
|
|
|
35
|
-
|
|
53
|
+
## Agent Skill: Image Generation Prompt Guide
|
|
36
54
|
|
|
37
|
-
|
|
55
|
+
This project also provides a standalone **[Agent Skill](https://agentskills.io)** (`SKILL.md`) that teaches AI assistants to write better image generation prompts — no MCP server or API key required.
|
|
38
56
|
|
|
39
|
-
|
|
57
|
+
> **Note:** This skill does not generate images itself. It teaches your AI assistant to write better prompts for tools that already have built-in image generation (e.g., Cursor's native image generation).
|
|
40
58
|
|
|
41
|
-
|
|
42
|
-
- **Visual details** — Lighting, textures, camera angles, atmosphere, text in images
|
|
43
|
-
- **Advanced features** — Character consistency, multi-element composition, factual accuracy, purpose-specific output
|
|
44
|
-
- **Image editing** — How to describe edits while keeping the original look intact
|
|
59
|
+
Based on the **Subject-Context-Style** framework, covering prompt structure, visual details (lighting, textures, camera angles), advanced techniques (character consistency, composition), and image editing. Works with any image model (Gemini, GPT Image, Flux, Stable Diffusion, Midjourney, etc.).
|
|
45
60
|
|
|
46
61
|
### Install
|
|
47
62
|
|
|
@@ -62,25 +77,25 @@ npx mcp-image skills install --path ~/.codex/skills
|
|
|
62
77
|
npx mcp-image skills install --path ~/.claude/skills
|
|
63
78
|
```
|
|
64
79
|
|
|
65
|
-
### When to
|
|
80
|
+
### When to Use the Skill vs the MCP Server
|
|
66
81
|
|
|
67
82
|
| | MCP Server | Agent Skill |
|
|
68
83
|
|---|---|---|
|
|
69
84
|
| **Use when** | Your AI tool does not have built-in image generation | Your AI tool already generates images natively |
|
|
70
85
|
| **Requires** | Gemini API key | Nothing |
|
|
71
|
-
| **What it does** | Generates images via API | Teaches the AI to write better prompts |
|
|
72
|
-
| **Works with** | MCP-compatible tools | Any tool supporting the [Agent Skills](https://agentskills.io) standard |
|
|
86
|
+
| **What it does** | Generates images via Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
|
|
87
|
+
| **Works with** | MCP-compatible tools (Cursor, Claude Code, Codex, etc.) | Any tool supporting the [Agent Skills](https://agentskills.io) open standard |
|
|
73
88
|
|
|
74
89
|
---
|
|
75
90
|
|
|
76
|
-
##
|
|
91
|
+
## Prerequisites
|
|
77
92
|
|
|
78
93
|
- **Node.js** 20 or higher
|
|
79
94
|
- **Gemini API Key** - Get yours at [Google AI Studio](https://aistudio.google.com/apikey)
|
|
80
|
-
-
|
|
95
|
+
- An MCP-compatible AI tool: **Cursor**, **Claude Code**, **Codex**, or others
|
|
81
96
|
- Basic terminal/command line knowledge
|
|
82
97
|
|
|
83
|
-
##
|
|
98
|
+
## Quick Start
|
|
84
99
|
|
|
85
100
|
### 1. Get Your Gemini API Key
|
|
86
101
|
|
|
@@ -140,34 +155,53 @@ claude mcp add mcp-image --scope user --env GEMINI_API_KEY=your-api-key --env IM
|
|
|
140
155
|
|
|
141
156
|
⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
|
|
142
157
|
|
|
143
|
-
📁 **Path Requirements**:
|
|
158
|
+
📁 **Path Requirements**:
|
|
144
159
|
- `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
|
|
145
160
|
- Defaults to `./output` in the current working directory if not specified
|
|
146
161
|
- Directory will be created automatically if it doesn't exist
|
|
147
162
|
|
|
148
|
-
|
|
163
|
+
## Quality Presets
|
|
149
164
|
|
|
150
|
-
|
|
165
|
+
Choose the right balance of speed, quality, and cost:
|
|
166
|
+
|
|
167
|
+
| Preset | Model | Best for | Speed |
|
|
168
|
+
|--------|-------|----------|-------|
|
|
169
|
+
| `fast` (default) | Nano Banana 2 (Gemini 3.1 Flash Image) | Quick iterations, drafts, high-volume generation | ~30–40s |
|
|
170
|
+
| `balanced` | Nano Banana 2 + Thinking | Production images, good quality with reasonable speed | Medium |
|
|
171
|
+
| `quality` | Nano Banana Pro (Gemini 3 Pro Image) | Final deliverables, maximum fidelity, critical visuals | Slow |
|
|
172
|
+
|
|
173
|
+
Set the default via `IMAGE_QUALITY` environment variable:
|
|
174
|
+
|
|
175
|
+
```
|
|
176
|
+
IMAGE_QUALITY=fast # (default) Fastest generation
|
|
177
|
+
IMAGE_QUALITY=balanced # Enhanced thinking for better quality
|
|
178
|
+
IMAGE_QUALITY=quality # Maximum quality output
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
To override per-request, just tell your AI assistant (e.g., "generate in high quality" or "use balanced quality"). The assistant will pass the appropriate `quality` parameter automatically.
|
|
151
182
|
|
|
152
183
|
**Codex:**
|
|
153
184
|
```toml
|
|
154
185
|
[mcp_servers.mcp-image.env]
|
|
155
186
|
GEMINI_API_KEY = "your_gemini_api_key_here"
|
|
156
|
-
|
|
157
|
-
IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
|
|
187
|
+
IMAGE_QUALITY = "balanced"
|
|
158
188
|
```
|
|
159
189
|
|
|
160
190
|
**Cursor:**
|
|
161
|
-
Add `"
|
|
191
|
+
Add `"IMAGE_QUALITY": "balanced"` to the env section in your config.
|
|
162
192
|
|
|
163
193
|
**Claude Code:**
|
|
164
194
|
```bash
|
|
165
|
-
claude mcp add mcp-image --env GEMINI_API_KEY=your-api-key --env
|
|
195
|
+
claude mcp add mcp-image --env GEMINI_API_KEY=your-api-key --env IMAGE_QUALITY=balanced --env IMAGE_OUTPUT_DIR=/absolute/path/to/images -- npx -y mcp-image
|
|
166
196
|
```
|
|
167
197
|
|
|
168
|
-
|
|
198
|
+
### Skip Prompt Enhancement
|
|
199
|
+
|
|
200
|
+
Set `SKIP_PROMPT_ENHANCEMENT=true` to disable automatic prompt optimization and send your prompts directly to the image generator. Useful when you need full control over the exact prompt wording.
|
|
201
|
+
|
|
202
|
+
## Usage Examples
|
|
169
203
|
|
|
170
|
-
Once configured,
|
|
204
|
+
Once configured, just describe what you want in natural language:
|
|
171
205
|
|
|
172
206
|
### Basic Image Generation
|
|
173
207
|
|
|
@@ -175,7 +209,7 @@ Once configured, your AI assistant can generate images using natural language:
|
|
|
175
209
|
"Generate a serene mountain landscape at sunset with a lake reflection"
|
|
176
210
|
```
|
|
177
211
|
|
|
178
|
-
|
|
212
|
+
Your prompt is automatically enhanced with rich details about lighting, materials, composition, and atmosphere.
|
|
179
213
|
|
|
180
214
|
### Image Editing
|
|
181
215
|
|
|
@@ -192,7 +226,7 @@ The system automatically enhances this to include rich details about lighting, m
|
|
|
192
226
|
(with maintainCharacterConsistency: true)
|
|
193
227
|
```
|
|
194
228
|
|
|
195
|
-
**High-Resolution 4K
|
|
229
|
+
**High-Resolution 4K with Text Rendering:**
|
|
196
230
|
```
|
|
197
231
|
"Generate a professional product photo of a smartphone with clear text on the screen"
|
|
198
232
|
(with imageSize: "4K")
|
|
@@ -204,28 +238,29 @@ The system automatically enhances this to include rich details about lighting, m
|
|
|
204
238
|
(with aspectRatio: "21:9")
|
|
205
239
|
```
|
|
206
240
|
|
|
207
|
-
##
|
|
241
|
+
## API Reference
|
|
208
242
|
|
|
209
243
|
### `generate_image` Tool
|
|
210
244
|
|
|
211
|
-
The
|
|
212
|
-
1. **Prompt Optimization
|
|
213
|
-
2. **Image Generation
|
|
245
|
+
The server uses a two-stage process with separate models for each stage:
|
|
246
|
+
1. **Prompt Optimization** (Gemini 2.5 Flash): Refines your prompt using the Subject–Context–Style framework. Skippable via `SKIP_PROMPT_ENHANCEMENT`.
|
|
247
|
+
2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
|
|
214
248
|
|
|
215
249
|
#### Parameters
|
|
216
250
|
|
|
217
251
|
| Parameter | Type | Required | Description |
|
|
218
252
|
|-----------|------|----------|-------------|
|
|
219
253
|
| `prompt` | string | ✅ | Text description or editing instruction |
|
|
220
|
-
| `
|
|
254
|
+
| `quality` | string | - | Quality preset: `fast` (default), `balanced`, `quality`. Overrides `IMAGE_QUALITY` env var for this request |
|
|
255
|
+
| `inputImagePath` | string | - | Absolute path to input image for image-to-image editing |
|
|
221
256
|
| `fileName` | string | - | Custom filename for output (auto-generated if not specified) |
|
|
222
|
-
| `aspectRatio` | string | - |
|
|
223
|
-
| `imageSize` | string | - |
|
|
257
|
+
| `aspectRatio` | string | - | `1:1` (default), `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:4`, `1:8`, `4:1`, `8:1` |
|
|
258
|
+
| `imageSize` | string | - | `1K`, `2K`, `4K`. Leave unspecified for standard quality |
|
|
224
259
|
| `blendImages` | boolean | - | Enable multi-image blending for combining multiple visual elements naturally |
|
|
225
260
|
| `maintainCharacterConsistency` | boolean | - | Maintain character appearance consistency across different poses and scenes |
|
|
226
|
-
| `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (
|
|
227
|
-
| `useGoogleSearch` | boolean | - | Enable Google Search grounding
|
|
228
|
-
| `purpose` | string | - | Intended use
|
|
261
|
+
| `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (historical figures, landmarks, factual scenarios) |
|
|
262
|
+
| `useGoogleSearch` | boolean | - | Enable Google Search grounding for real-time factual accuracy |
|
|
263
|
+
| `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
|
|
229
264
|
|
|
230
265
|
#### Response
|
|
231
266
|
|
|
@@ -238,14 +273,14 @@ The MCP server exposes a single tool for all image operations. Internally, it us
|
|
|
238
273
|
"mimeType": "image/png"
|
|
239
274
|
},
|
|
240
275
|
"metadata": {
|
|
241
|
-
"model": "gemini-3-
|
|
276
|
+
"model": "gemini-3.1-flash-image-preview",
|
|
242
277
|
"processingTime": 5000,
|
|
243
|
-
"timestamp": "
|
|
278
|
+
"timestamp": "2026-01-01T12:00:00.000Z"
|
|
244
279
|
}
|
|
245
280
|
}
|
|
246
281
|
```
|
|
247
282
|
|
|
248
|
-
##
|
|
283
|
+
## Troubleshooting
|
|
249
284
|
|
|
250
285
|
### Common Issues
|
|
251
286
|
|
|
@@ -265,27 +300,31 @@ The MCP server exposes a single tool for all image operations. Internally, it us
|
|
|
265
300
|
|
|
266
301
|
### Performance Tips
|
|
267
302
|
|
|
268
|
-
-
|
|
269
|
-
-
|
|
270
|
-
-
|
|
271
|
-
-
|
|
303
|
+
- `fast` preset: ~30–40 seconds typical (includes prompt optimization)
|
|
304
|
+
- `balanced` preset: Slightly longer due to enhanced thinking
|
|
305
|
+
- `quality` preset: Slower but highest fidelity output
|
|
306
|
+
- High-resolution (2K/4K): Additional processing time for superior detail
|
|
307
|
+
- Simple prompts work great — the optimizer automatically adds professional details
|
|
272
308
|
- Complex prompts are preserved and further enhanced
|
|
273
|
-
- Consider
|
|
309
|
+
- Consider `useWorldKnowledge` for historical or factual subjects
|
|
274
310
|
- Use `imageSize: "4K"` when text clarity and fine details are critical
|
|
275
311
|
|
|
276
|
-
##
|
|
312
|
+
## Usage Notes
|
|
277
313
|
|
|
278
|
-
- This MCP server uses the paid Gemini API
|
|
279
|
-
- Gemini 2.5 Flash
|
|
280
|
-
-
|
|
314
|
+
- This MCP server uses the paid Gemini API:
|
|
315
|
+
- **Prompt optimization**: Gemini 2.5 Flash (minimal token usage)
|
|
316
|
+
- **Image generation**: Model depends on quality preset
|
|
317
|
+
- `fast` / `balanced`: Nano Banana 2 — Gemini 3.1 Flash Image (lower cost)
|
|
318
|
+
- `quality`: Nano Banana Pro — Gemini 3 Pro Image (higher cost)
|
|
319
|
+
- `balanced` uses additional thinking tokens (slightly higher cost than `fast`)
|
|
281
320
|
- Check current pricing and rate limits at [Google AI Studio](https://aistudio.google.com/)
|
|
282
321
|
- Monitor your API usage to avoid unexpected charges
|
|
283
322
|
- The prompt optimization step adds minimal cost while significantly improving output quality
|
|
284
323
|
|
|
285
|
-
##
|
|
324
|
+
## License
|
|
286
325
|
|
|
287
326
|
MIT License - see [LICENSE](LICENSE) for details.
|
|
288
327
|
|
|
289
328
|
---
|
|
290
329
|
|
|
291
|
-
**Need help?** [Open an issue](https://github.com/shinpr/mcp-image/issues) or check the [troubleshooting section](
|
|
330
|
+
**Need help?** [Open an issue](https://github.com/shinpr/mcp-image/issues) or check the [troubleshooting section](#troubleshooting) above.
|
package/bin/install-skills.js
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
'use strict'
|
|
3
2
|
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
import { cpSync, existsSync, mkdirSync } from 'node:fs'
|
|
4
|
+
import { dirname, resolve } from 'node:path'
|
|
5
|
+
import { fileURLToPath } from 'node:url'
|
|
6
|
+
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url)
|
|
8
|
+
const __dirname = dirname(__filename)
|
|
6
9
|
|
|
7
10
|
const SKILLS_SOURCE = resolve(__dirname, '..', 'skills', 'image-generation')
|
|
8
11
|
const SKILL_DIR_NAME = 'image-generation'
|
|
@@ -77,7 +80,7 @@ function install(targetPath) {
|
|
|
77
80
|
console.log(`Installed skills to: ${targetPath}`)
|
|
78
81
|
}
|
|
79
82
|
|
|
80
|
-
function run(args) {
|
|
83
|
+
export function run(args) {
|
|
81
84
|
if (args.length === 0) {
|
|
82
85
|
printHelp()
|
|
83
86
|
process.exit(0)
|
|
@@ -110,5 +113,3 @@ function run(args) {
|
|
|
110
113
|
console.log('Installed files:')
|
|
111
114
|
console.log(' - image-generation/SKILL.md')
|
|
112
115
|
}
|
|
113
|
-
|
|
114
|
-
module.exports = { run }
|
|
@@ -3,9 +3,10 @@
|
|
|
3
3
|
* Integrates with Google's Gemini AI API using the official SDK
|
|
4
4
|
* Supports automatic URL Context processing and feature parameters
|
|
5
5
|
*/
|
|
6
|
-
import type {
|
|
7
|
-
import type {
|
|
8
|
-
import {
|
|
6
|
+
import type { ImageQuality } from '../types/mcp.js';
|
|
7
|
+
import type { Result } from '../types/result.js';
|
|
8
|
+
import type { Config } from '../utils/config.js';
|
|
9
|
+
import { GeminiAPIError, NetworkError } from '../utils/errors.js';
|
|
9
10
|
/**
|
|
10
11
|
* Metadata for generated images
|
|
11
12
|
*/
|
|
@@ -27,6 +28,7 @@ export interface GeminiApiParams {
|
|
|
27
28
|
aspectRatio?: string;
|
|
28
29
|
imageSize?: string;
|
|
29
30
|
useGoogleSearch?: boolean;
|
|
31
|
+
quality?: ImageQuality;
|
|
30
32
|
}
|
|
31
33
|
/**
|
|
32
34
|
* Result of image generation
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAEnD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAChD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAsHjE;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,IAAI,CAAA;IACf,kBAAkB,EAAE,OAAO,CAAA;IAE3B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,OAAO,CAAC,EAAE,YAAY,CAAA;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,wBAAwB,CAAA;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AAoVD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAevF"}
|
package/dist/api/geminiClient.js
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
"use strict";
|
|
2
1
|
/**
|
|
3
2
|
* Gemini API client for image generation
|
|
4
3
|
* Integrates with Google's Gemini AI API using the official SDK
|
|
5
4
|
* Supports automatic URL Context processing and feature parameters
|
|
6
5
|
*/
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
const errors_1 = require("../utils/errors");
|
|
6
|
+
import { GoogleGenAI } from '@google/genai';
|
|
7
|
+
import { GEMINI_MODELS } from '../types/mcp.js';
|
|
8
|
+
import { Err, Ok } from '../types/result.js';
|
|
9
|
+
import { GeminiAPIError, NetworkError } from '../utils/errors.js';
|
|
12
10
|
/**
|
|
13
11
|
* Safely analyze response structure for debugging (removes sensitive data)
|
|
14
12
|
*/
|
|
@@ -70,9 +68,9 @@ function isGeminiResponse(obj) {
|
|
|
70
68
|
* Implementation of Gemini API client
|
|
71
69
|
*/
|
|
72
70
|
class GeminiClientImpl {
|
|
73
|
-
constructor(genai) {
|
|
71
|
+
constructor(genai, defaultQuality = 'fast') {
|
|
74
72
|
this.genai = genai;
|
|
75
|
-
this.
|
|
73
|
+
this.defaultQuality = defaultQuality;
|
|
76
74
|
}
|
|
77
75
|
async generateImage(params) {
|
|
78
76
|
try {
|
|
@@ -105,6 +103,10 @@ class GeminiClientImpl {
|
|
|
105
103
|
],
|
|
106
104
|
});
|
|
107
105
|
}
|
|
106
|
+
// Determine effective quality
|
|
107
|
+
const effectiveQuality = params.quality ?? this.defaultQuality;
|
|
108
|
+
// Select model based on quality preset
|
|
109
|
+
const modelName = effectiveQuality === 'quality' ? GEMINI_MODELS.PRO : GEMINI_MODELS.FLASH;
|
|
108
110
|
// Construct config object for generateContent
|
|
109
111
|
const imageConfig = {};
|
|
110
112
|
if (params.aspectRatio) {
|
|
@@ -113,19 +115,20 @@ class GeminiClientImpl {
|
|
|
113
115
|
if (params.imageSize) {
|
|
114
116
|
imageConfig['imageSize'] = params.imageSize;
|
|
115
117
|
}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
118
|
+
// Build config with optional thinkingConfig
|
|
119
|
+
const thinkingConfig = effectiveQuality === 'balanced' ? { thinkingConfig: { thinkingLevel: 'high' } } : {};
|
|
120
|
+
const config = {
|
|
121
|
+
...(Object.keys(imageConfig).length > 0 && { imageConfig }),
|
|
122
|
+
responseModalities: ['IMAGE'],
|
|
123
|
+
...thinkingConfig,
|
|
124
|
+
};
|
|
125
|
+
// Construct tools array for Google Search grounding (web + image search)
|
|
126
|
+
const tools = params.useGoogleSearch
|
|
127
|
+
? [{ googleSearch: { searchTypes: { webSearch: {}, imageSearch: {} } } }]
|
|
128
|
+
: undefined;
|
|
129
|
+
// Generate content using Gemini API
|
|
127
130
|
const rawResponse = await this.genai.models.generateContent({
|
|
128
|
-
model:
|
|
131
|
+
model: modelName,
|
|
129
132
|
contents: requestContent,
|
|
130
133
|
config,
|
|
131
134
|
...(tools && { tools }),
|
|
@@ -137,14 +140,14 @@ class GeminiClientImpl {
|
|
|
137
140
|
const asRecord = rawResponse;
|
|
138
141
|
if (asRecord['error']) {
|
|
139
142
|
const error = asRecord['error'];
|
|
140
|
-
return
|
|
143
|
+
return Err(new GeminiAPIError(`Gemini API Error: ${error['message'] || 'Unknown error'}`, {
|
|
141
144
|
code: error['code'],
|
|
142
145
|
status: error['status'],
|
|
143
146
|
details: error['details'] || responseStructure,
|
|
144
147
|
stage: 'api_error',
|
|
145
148
|
}));
|
|
146
149
|
}
|
|
147
|
-
return
|
|
150
|
+
return Err(new GeminiAPIError('Invalid response structure from Gemini API', {
|
|
148
151
|
message: 'The API returned an unexpected response format',
|
|
149
152
|
responseStructure: responseStructure,
|
|
150
153
|
stage: 'response_validation',
|
|
@@ -160,7 +163,7 @@ class GeminiClientImpl {
|
|
|
160
163
|
if (responseAsRecord['promptFeedback']) {
|
|
161
164
|
const promptFeedback = responseAsRecord['promptFeedback'];
|
|
162
165
|
if (promptFeedback['blockReason'] === 'SAFETY') {
|
|
163
|
-
return
|
|
166
|
+
return Err(new GeminiAPIError('Image generation blocked for safety reasons', {
|
|
164
167
|
stage: 'prompt_analysis',
|
|
165
168
|
blockReason: promptFeedback['blockReason'],
|
|
166
169
|
suggestion: 'Rephrase your prompt to avoid potentially sensitive content',
|
|
@@ -168,7 +171,7 @@ class GeminiClientImpl {
|
|
|
168
171
|
}
|
|
169
172
|
if (promptFeedback['blockReason'] === 'OTHER' ||
|
|
170
173
|
promptFeedback['blockReason'] === 'PROHIBITED_CONTENT') {
|
|
171
|
-
return
|
|
174
|
+
return Err(new GeminiAPIError('Image generation blocked due to prohibited content', {
|
|
172
175
|
stage: 'prompt_analysis',
|
|
173
176
|
blockReason: promptFeedback['blockReason'],
|
|
174
177
|
suggestion: 'Remove any prohibited content from your prompt and try again',
|
|
@@ -177,7 +180,7 @@ class GeminiClientImpl {
|
|
|
177
180
|
}
|
|
178
181
|
// Check for candidates
|
|
179
182
|
if (!responseData.candidates || responseData.candidates.length === 0) {
|
|
180
|
-
return
|
|
183
|
+
return Err(new GeminiAPIError('No image generated: Content may have been filtered', {
|
|
181
184
|
stage: 'generation',
|
|
182
185
|
candidatesCount: 0,
|
|
183
186
|
suggestion: 'Try rephrasing your prompt to avoid potentially sensitive content',
|
|
@@ -185,7 +188,7 @@ class GeminiClientImpl {
|
|
|
185
188
|
}
|
|
186
189
|
const candidate = responseData.candidates[0];
|
|
187
190
|
if (!candidate || !candidate.content || !candidate.content.parts) {
|
|
188
|
-
return
|
|
191
|
+
return Err(new GeminiAPIError('No valid content in response', {
|
|
189
192
|
stage: 'candidate_extraction',
|
|
190
193
|
suggestion: 'The API response was incomplete. Please try again',
|
|
191
194
|
}));
|
|
@@ -195,7 +198,7 @@ class GeminiClientImpl {
|
|
|
195
198
|
if (candidate.finishReason) {
|
|
196
199
|
const finishReason = candidate.finishReason;
|
|
197
200
|
if (finishReason === 'IMAGE_SAFETY') {
|
|
198
|
-
return
|
|
201
|
+
return Err(new GeminiAPIError('Image generation stopped for safety reasons', {
|
|
199
202
|
finishReason,
|
|
200
203
|
stage: 'generation_stopped',
|
|
201
204
|
suggestion: 'Modify your prompt to avoid potentially sensitive content',
|
|
@@ -214,7 +217,7 @@ class GeminiClientImpl {
|
|
|
214
217
|
}));
|
|
215
218
|
}
|
|
216
219
|
if (finishReason === 'MAX_TOKENS') {
|
|
217
|
-
return
|
|
220
|
+
return Err(new GeminiAPIError('Maximum token limit reached during generation', {
|
|
218
221
|
finishReason,
|
|
219
222
|
stage: 'generation_stopped',
|
|
220
223
|
suggestion: 'Try using a shorter or simpler prompt',
|
|
@@ -222,7 +225,7 @@ class GeminiClientImpl {
|
|
|
222
225
|
}
|
|
223
226
|
}
|
|
224
227
|
if (parts.length === 0) {
|
|
225
|
-
return
|
|
228
|
+
return Err(new GeminiAPIError('No content parts in response', {
|
|
226
229
|
stage: 'content_extraction',
|
|
227
230
|
suggestion: 'The generation was incomplete. Please try again',
|
|
228
231
|
}));
|
|
@@ -233,7 +236,7 @@ class GeminiClientImpl {
|
|
|
233
236
|
if (!imagePart?.inlineData) {
|
|
234
237
|
// If there's text, it's likely an error message from Gemini
|
|
235
238
|
const errorMessage = textPart?.text || 'Image generation failed';
|
|
236
|
-
return
|
|
239
|
+
return Err(new GeminiAPIError('Image generation failed due to content filtering', {
|
|
237
240
|
reason: errorMessage,
|
|
238
241
|
stage: 'image_extraction',
|
|
239
242
|
suggestion: 'The prompt was blocked by safety filters. Try rephrasing your prompt to avoid potentially sensitive content.',
|
|
@@ -244,7 +247,7 @@ class GeminiClientImpl {
|
|
|
244
247
|
const mimeType = imagePart.inlineData.mimeType || 'image/png';
|
|
245
248
|
// Create metadata
|
|
246
249
|
const metadata = {
|
|
247
|
-
model:
|
|
250
|
+
model: modelName,
|
|
248
251
|
prompt: params.prompt,
|
|
249
252
|
mimeType,
|
|
250
253
|
timestamp: new Date(),
|
|
@@ -252,7 +255,7 @@ class GeminiClientImpl {
|
|
|
252
255
|
...(responseData.modelVersion && { modelVersion: responseData.modelVersion }),
|
|
253
256
|
...(responseData.responseId && { responseId: responseData.responseId }),
|
|
254
257
|
};
|
|
255
|
-
return
|
|
258
|
+
return Ok({
|
|
256
259
|
imageData: imageBuffer,
|
|
257
260
|
metadata,
|
|
258
261
|
});
|
|
@@ -265,14 +268,14 @@ class GeminiClientImpl {
|
|
|
265
268
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
266
269
|
// Check if it's a network error
|
|
267
270
|
if (this.isNetworkError(error)) {
|
|
268
|
-
return
|
|
271
|
+
return Err(new NetworkError(`Network error during image generation: ${errorMessage}`, 'Check your internet connection and try again', error instanceof Error ? error : undefined));
|
|
269
272
|
}
|
|
270
273
|
// Check if it's an API-specific error
|
|
271
274
|
if (this.isAPIError(error)) {
|
|
272
|
-
return
|
|
275
|
+
return Err(new GeminiAPIError(`Failed to generate image: ${errorMessage}`, this.getAPIErrorSuggestion(errorMessage), this.extractStatusCode(error)));
|
|
273
276
|
}
|
|
274
277
|
// Generic API error
|
|
275
|
-
return
|
|
278
|
+
return Err(new GeminiAPIError(`Failed to generate image with prompt "${prompt}": ${errorMessage}`, 'Check your API key, quota, and prompt validity. Try again with a different prompt'));
|
|
276
279
|
}
|
|
277
280
|
isNetworkError(error) {
|
|
278
281
|
if (error instanceof Error) {
|
|
@@ -313,16 +316,16 @@ class GeminiClientImpl {
|
|
|
313
316
|
* @param config Configuration containing API key and other settings
|
|
314
317
|
* @returns Result containing the client or an error
|
|
315
318
|
*/
|
|
316
|
-
function createGeminiClient(config) {
|
|
319
|
+
export function createGeminiClient(config) {
|
|
317
320
|
try {
|
|
318
|
-
const genai = new
|
|
321
|
+
const genai = new GoogleGenAI({
|
|
319
322
|
apiKey: config.geminiApiKey,
|
|
320
323
|
});
|
|
321
|
-
return
|
|
324
|
+
return Ok(new GeminiClientImpl(genai, config.imageQuality));
|
|
322
325
|
}
|
|
323
326
|
catch (error) {
|
|
324
327
|
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
325
|
-
return
|
|
328
|
+
return Err(new GeminiAPIError(`Failed to initialize Gemini client: ${errorMessage}`, 'Verify your GEMINI_API_KEY is valid and the @google/genai package is properly installed'));
|
|
326
329
|
}
|
|
327
330
|
}
|
|
328
331
|
//# sourceMappingURL=geminiClient.js.map
|