mcp-hydrocoder-image 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +239 -80
  2. package/dist/api/geminiClient.d.ts +3 -30
  3. package/dist/api/geminiClient.d.ts.map +1 -1
  4. package/dist/api/geminiClient.js +13 -57
  5. package/dist/api/geminiClient.js.map +1 -1
  6. package/dist/api/geminiTextClient.js +1 -1
  7. package/dist/api/geminiTextClient.js.map +1 -1
  8. package/dist/api/imageProvider.d.ts +29 -0
  9. package/dist/api/imageProvider.d.ts.map +1 -0
  10. package/dist/api/imageProvider.js +5 -0
  11. package/dist/api/imageProvider.js.map +1 -0
  12. package/dist/api/volcengineClient.d.ts +13 -0
  13. package/dist/api/volcengineClient.d.ts.map +1 -0
  14. package/dist/api/volcengineClient.js +288 -0
  15. package/dist/api/volcengineClient.js.map +1 -0
  16. package/dist/business/inputValidator.d.ts.map +1 -1
  17. package/dist/business/inputValidator.js +69 -1
  18. package/dist/business/inputValidator.js.map +1 -1
  19. package/dist/business/multiImagePrompt.d.ts +13 -0
  20. package/dist/business/multiImagePrompt.d.ts.map +1 -0
  21. package/dist/business/multiImagePrompt.js +135 -0
  22. package/dist/business/multiImagePrompt.js.map +1 -0
  23. package/dist/business/providerResolver.d.ts +4 -0
  24. package/dist/business/providerResolver.d.ts.map +1 -0
  25. package/dist/business/providerResolver.js +45 -0
  26. package/dist/business/providerResolver.js.map +1 -0
  27. package/dist/business/responseBuilder.d.ts +3 -2
  28. package/dist/business/responseBuilder.d.ts.map +1 -1
  29. package/dist/business/responseBuilder.js +69 -48
  30. package/dist/business/responseBuilder.js.map +1 -1
  31. package/dist/index.d.ts +1 -1
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/server/errorHandler.d.ts.map +1 -1
  34. package/dist/server/errorHandler.js +3 -2
  35. package/dist/server/errorHandler.js.map +1 -1
  36. package/dist/server/mcpServer.d.ts +90 -29
  37. package/dist/server/mcpServer.d.ts.map +1 -1
  38. package/dist/server/mcpServer.js +430 -288
  39. package/dist/server/mcpServer.js.map +1 -1
  40. package/dist/types/mcp.d.ts +62 -15
  41. package/dist/types/mcp.d.ts.map +1 -1
  42. package/dist/types/mcp.js +15 -0
  43. package/dist/types/mcp.js.map +1 -1
  44. package/dist/utils/config.d.ts +6 -2
  45. package/dist/utils/config.d.ts.map +1 -1
  46. package/dist/utils/config.js +43 -13
  47. package/dist/utils/config.js.map +1 -1
  48. package/dist/utils/errors.d.ts +9 -0
  49. package/dist/utils/errors.d.ts.map +1 -1
  50. package/dist/utils/errors.js +50 -1
  51. package/dist/utils/errors.js.map +1 -1
  52. package/package.json +2 -1
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # MCP HydroCoder Image 🍌
2
2
 
3
- > AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by Nano Banana 2 and Nano Banana Pro (Google Gemini).
3
+ > AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by Volcengine Seedream and Google Gemini.
4
4
 
5
5
  [![npm version](https://badge.fury.io/js/mcp-hydrocoder-image.svg)](https://www.npmjs.com/package/mcp-hydrocoder-image)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
@@ -36,6 +36,10 @@ The prompt optimizer uses a **Subject–Context–Style** framework (powered by
36
36
 
37
37
  ## Features
38
38
 
39
+ - **Multi-Provider Support**: Use either Volcengine Seedream or Google Gemini behind the same MCP server, with provider selection via config or per-request override.
40
+ - **Provider-Aware Routing**: If the caller explicitly asks for Volcengine / Seedream / 豆包 / 火山引擎 or Gemini / Nano Banana, the server metadata now nudges agents to pass the correct `provider`, and the server can infer the provider from explicit prompt instructions when the parameter is omitted.
41
+ - **Volcengine-First Production Path**: README and install examples now default to Volcengine because it is the more stable day-to-day path for text-to-image and grouped output in this project.
42
+ - **Dedicated Multi-Image Tooling**: Use `generate_image` for single-image work and `generate_multi_image` for grouped multi-image output with server-side fan-out and prompt normalization.
39
43
  - **Built-in Prompt Optimization**: Your simple prompt is automatically enriched with photographic and artistic details — lighting, composition, atmosphere — using Gemini 2.5 Flash. No prompt engineering skills required.
40
44
  - **Three Quality Tiers**: Choose between fast iteration, balanced quality, or maximum fidelity with Nano Banana 2 (Gemini 3.1 Flash Image) and Nano Banana Pro (Gemini 3 Pro Image). [See Quality Presets](#quality-presets).
41
45
  - **Image Editing**: Transform existing images with natural language instructions (image-to-image) while preserving original style and visual consistency.
@@ -81,24 +85,30 @@ npx mcp-hydrocoder-image skills install --path ~/.claude/skills
81
85
  | | MCP Server | Agent Skill |
82
86
  |---|---|---|
83
87
  | **Use when** | Your AI tool does not have built-in image generation | Your AI tool already generates images natively |
84
- | **Requires** | Gemini API key | Nothing |
85
- | **What it does** | Generates images via Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
88
+ | **Requires** | Volcengine or Gemini API key | Nothing |
89
+ | **What it does** | Generates images via Volcengine or Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
86
90
  | **Works with** | MCP-compatible tools (Cursor, Claude Code, Codex, etc.) | Any tool supporting the [Agent Skills](https://agentskills.io) open standard |
87
91
 
88
92
  ---
89
93
 
90
94
  ## Prerequisites
91
95
 
92
- - **Node.js** 20 or higher
93
- - **Gemini API Key** - Get yours at [Google AI Studio](https://aistudio.google.com/apikey)
94
- - An MCP-compatible AI tool: **Cursor**, **Claude Code**, **Codex**, or others
95
- - Basic terminal/command line knowledge
96
+ - **Node.js** 20 or higher
97
+ - **API Key**
98
+ - Volcengine: create an Ark API key in the Volcengine console
99
+ - Gemini: get yours at [Google AI Studio](https://aistudio.google.com/apikey)
100
+ - An MCP-compatible AI tool: **Cursor**, **Claude Code**, **Codex**, or others
101
+ - Basic terminal/command line knowledge
96
102
 
97
103
  ## Quick Start
98
104
 
99
- ### 1. Get Your Gemini API Key
105
+ ### 1. Get Your API Key
100
106
 
101
- Get your API key from [Google AI Studio](https://aistudio.google.com/apikey)
107
+ Choose a provider and create an API key:
108
+ - **Volcengine**: Volcengine Ark console (`ARK_API_KEY`-compatible key)
109
+ - **Gemini**: [Google AI Studio](https://aistudio.google.com/apikey)
110
+
111
+ > **Recommended default**: Start with Volcengine. In this project it is the preferred production path, especially for single-image generation, grouped multi-image workflows, and predictable output sizing.
102
112
 
103
113
  ### 2. MCP Configuration
104
114
 
@@ -111,11 +121,14 @@ Add to `~/.codex/config.toml`:
111
121
  command = "npx"
112
122
  args = ["-y", "mcp-hydrocoder-image"]
113
123
 
114
- [mcp_servers.mcp-hydrocoder-image.env]
115
- GEMINI_API_KEY = "your_gemini_api_key_here"
116
- IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
117
- API_TIMEOUT = "120000" # Optional: timeout in milliseconds (default: 120s)
118
- ```
124
+ [mcp_servers.mcp-hydrocoder-image.env]
125
+ IMAGE_PROVIDER = "volcengine"
126
+ VOLCENGINE_API_KEY = "your_volcengine_api_key_here"
127
+ VOLCENGINE_MODEL = "doubao-seedream-4-5-251128"
128
+ VOLCENGINE_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
129
+ IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
130
+ API_TIMEOUT = "120000" # Optional: timeout in milliseconds (default: 120s)
131
+ ```
119
132
 
120
133
  #### For Cursor
121
134
 
@@ -128,13 +141,16 @@ Add to your Cursor settings:
128
141
  {
129
142
  "mcpServers": {
130
143
  "mcp-hydrocoder-image": {
131
- "command": "npx",
132
- "args": ["-y", "mcp-hydrocoder-image"],
133
- "env": {
134
- "GEMINI_API_KEY": "your_gemini_api_key_here",
135
- "IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
136
- "API_TIMEOUT": "120000"
137
- }
144
+ "command": "npx",
145
+ "args": ["-y", "mcp-hydrocoder-image"],
146
+ "env": {
147
+ "IMAGE_PROVIDER": "volcengine",
148
+ "VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
149
+ "VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
150
+ "VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
151
+ "IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
152
+ "API_TIMEOUT": "120000"
153
+ }
138
154
  }
139
155
  }
140
156
  }
@@ -145,13 +161,16 @@ Add to your Cursor settings:
145
161
  {
146
162
  "mcpServers": {
147
163
  "mcp-hydrocoder-image": {
148
- "command": "npx",
149
- "args": ["-y", "mcp-hydrocoder-image"],
150
- "env": {
151
- "GEMINI_API_KEY": "your_gemini_api_key_here",
152
- "IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
153
- "API_TIMEOUT": "120000"
154
- }
164
+ "command": "npx",
165
+ "args": ["-y", "mcp-hydrocoder-image"],
166
+ "env": {
167
+ "IMAGE_PROVIDER": "volcengine",
168
+ "VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
169
+ "VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
170
+ "VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
171
+ "IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
172
+ "API_TIMEOUT": "120000"
173
+ }
155
174
  }
156
175
  }
157
176
  }
@@ -163,21 +182,27 @@ Run in your project directory to enable for that project:
163
182
 
164
183
  ```bash
165
184
  cd /path/to/your/project
166
- claude mcp add mcp-hydrocoder-image \
167
- --env GEMINI_API_KEY=your-api-key \
168
- --env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
169
- --env API_TIMEOUT=120000 \
170
- -- npx -y mcp-hydrocoder-image
185
+ claude mcp add mcp-hydrocoder-image \
186
+ --env IMAGE_PROVIDER=volcengine \
187
+ --env VOLCENGINE_API_KEY=your-volcengine-api-key \
188
+ --env VOLCENGINE_MODEL=doubao-seedream-4-5-251128 \
189
+ --env VOLCENGINE_API_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \
190
+ --env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
191
+ --env API_TIMEOUT=120000 \
192
+ -- npx -y mcp-hydrocoder-image
171
193
  ```
172
194
 
173
195
  Or add globally for all projects:
174
196
 
175
197
  ```bash
176
- claude mcp add mcp-hydrocoder-image --scope user \
177
- --env GEMINI_API_KEY=your-api-key \
178
- --env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
179
- --env API_TIMEOUT=120000 \
180
- -- npx -y mcp-hydrocoder-image
198
+ claude mcp add mcp-hydrocoder-image --scope user \
199
+ --env IMAGE_PROVIDER=volcengine \
200
+ --env VOLCENGINE_API_KEY=your-volcengine-api-key \
201
+ --env VOLCENGINE_MODEL=doubao-seedream-4-5-251128 \
202
+ --env VOLCENGINE_API_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \
203
+ --env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
204
+ --env API_TIMEOUT=120000 \
205
+ -- npx -y mcp-hydrocoder-image
181
206
  ```
182
207
 
183
208
  Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for project):
@@ -187,13 +212,16 @@ Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for pr
187
212
  {
188
213
  "mcpServers": {
189
214
  "mcp-hydrocoder-image": {
190
- "command": "npx",
191
- "args": ["-y", "mcp-hydrocoder-image"],
192
- "env": {
193
- "GEMINI_API_KEY": "your_gemini_api_key_here",
194
- "IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
195
- "API_TIMEOUT": "120000"
196
- }
215
+ "command": "npx",
216
+ "args": ["-y", "mcp-hydrocoder-image"],
217
+ "env": {
218
+ "IMAGE_PROVIDER": "volcengine",
219
+ "VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
220
+ "VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
221
+ "VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
222
+ "IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
223
+ "API_TIMEOUT": "120000"
224
+ }
197
225
  }
198
226
  }
199
227
  }
@@ -204,26 +232,88 @@ Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for pr
204
232
  {
205
233
  "mcpServers": {
206
234
  "mcp-hydrocoder-image": {
207
- "command": "npx",
208
- "args": ["-y", "mcp-hydrocoder-image"],
209
- "env": {
210
- "GEMINI_API_KEY": "your_gemini_api_key_here",
211
- "IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
212
- "API_TIMEOUT": "120000"
213
- }
235
+ "command": "npx",
236
+ "args": ["-y", "mcp-hydrocoder-image"],
237
+ "env": {
238
+ "IMAGE_PROVIDER": "volcengine",
239
+ "VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
240
+ "VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
241
+ "VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
242
+ "IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
243
+ "API_TIMEOUT": "120000"
244
+ }
214
245
  }
215
246
  }
216
247
  }
217
248
  ```
218
249
 
219
- ⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
220
-
221
- 📁 **Path Requirements**:
222
- - `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
223
- - Defaults to `./output` in the current working directory if not specified
224
- - Directory will be created automatically if it doesn't exist
225
-
226
- #### Custom API Base URL (Third-party Proxy)
250
+ ⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
251
+
252
+ 📁 **Path Requirements**:
253
+ - `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
254
+ - Defaults to `./output` in the current working directory if not specified
255
+ - Directory will be created automatically if it doesn't exist
256
+
257
+ #### Volcengine Local MCP Example
258
+
259
+ For local MCP testing, build the project first and point your MCP client at the local `dist/index.js` entry instead of `npx`.
260
+
261
+ ```bash
262
+ npm install
263
+ npm run build
264
+ ```
265
+
266
+ **Codex (`~/.codex/config.toml` on Windows):**
267
+ ```toml
268
+ [mcp_servers.mcp-hydrocoder-image-local]
269
+ command = "node"
270
+ args = ["C:\\workspace\\develop\\ccExtensions\\mcpBanana\\dist\\index.js"]
271
+
272
+ [mcp_servers.mcp-hydrocoder-image-local.env]
273
+ IMAGE_PROVIDER = "volcengine"
274
+ VOLCENGINE_API_KEY = "your_volcengine_api_key_here"
275
+ VOLCENGINE_MODEL = "doubao-seedream-4-5-251128"
276
+ VOLCENGINE_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
277
+ IMAGE_OUTPUT_DIR = "C:\\workspace\\develop\\ccExtensions\\mcpBanana\\output"
278
+ API_TIMEOUT = "120000"
279
+ ```
280
+
281
+ **Cursor / Claude Code JSON config (Windows local repo example):**
282
+ ```json
283
+ {
284
+ "mcpServers": {
285
+ "mcp-hydrocoder-image-local": {
286
+ "command": "node",
287
+ "args": [
288
+ "C:\\workspace\\develop\\ccExtensions\\mcpBanana\\dist\\index.js"
289
+ ],
290
+ "env": {
291
+ "IMAGE_PROVIDER": "volcengine",
292
+ "VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
293
+ "VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
294
+ "VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
295
+ "IMAGE_OUTPUT_DIR": "C:\\workspace\\develop\\ccExtensions\\mcpBanana\\output",
296
+ "API_TIMEOUT": "120000"
297
+ }
298
+ }
299
+ }
300
+ }
301
+ ```
302
+
303
+ If you move the repo, update both the `args` path and `IMAGE_OUTPUT_DIR` to the new absolute path.
304
+
305
+ #### Gemini Example
306
+
307
+ If you prefer Gemini, switch `IMAGE_PROVIDER` to `gemini` and replace the Volcengine variables with `GEMINI_API_KEY`. Gemini remains supported, but the README now treats it as the optional path rather than the default install path.
308
+
309
+ For standard direct access to the official Gemini endpoint, you only need:
310
+ - `IMAGE_PROVIDER=gemini`
311
+ - `GEMINI_API_KEY=your_gemini_api_key_here`
312
+ - `IMAGE_OUTPUT_DIR=/absolute/path/to/images`
313
+
314
+ `GEMINI_API_BASE_URL` is optional. Add it only when you use a third-party proxy, relay, or custom compatible gateway.
315
+
316
+ #### Gemini Custom API Base URL (Third-party Proxy)
227
317
 
228
318
  To use a third-party API endpoint or proxy, add the `GEMINI_API_BASE_URL` environment variable:
229
319
 
@@ -263,7 +353,19 @@ IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
263
353
 
264
354
  > **Note**: The base URL should be the root domain (e.g., `https://llm.myseek.fun`), without the `/v1` suffix — the SDK will append the API version automatically.
265
355
 
266
- ## Quality Presets
356
+ ### Volcengine Notes
357
+
358
+ Volcengine is the recommended default provider in this project.
359
+
360
+ Current implementation status and behavior:
361
+ - Stable path: text-to-image
362
+ - Reference-image workflows are wired through the OpenAI-compatible image API using the `image` field
363
+ - Base64 image inputs are normalized to the official Volcengine format: `data:image/<format>;base64,<Base64编码>`
364
+ - Grouped output (`outputCount`) is best-effort and depends on provider-side behavior
365
+ - If the user does not specify `aspectRatio` or `imageSize`, Gemini and Volcengine default to `16:9` and `4K`
366
+ - If the user specifies `aspectRatio` and/or `imageSize`, the server automatically normalizes the final `WxH` into Volcengine's legal pixel range
367
+ - When the user provides local image paths, they should be passed through `inputImagePath` / `inputImagePaths` instead of being summarized into the prompt
368
+
267
369
 
268
370
  Choose the right balance of speed, quality, and cost:
269
371
 
@@ -330,7 +432,7 @@ Your prompt is automatically enhanced with rich details about lighting, material
330
432
  (with inputImagePath: "/path/to/image.jpg")
331
433
  ```
332
434
 
333
- ### Advanced Features
435
+ ### Advanced Features
334
436
 
335
437
  **Character Consistency:**
336
438
  ```
@@ -344,41 +446,98 @@ Your prompt is automatically enhanced with rich details about lighting, material
344
446
  (with imageSize: "4K")
345
447
  ```
346
448
 
347
- **Custom Aspect Ratio:**
348
- ```
349
- "Generate a cinematic landscape of a desert at golden hour"
350
- (with aspectRatio: "21:9")
351
- ```
352
-
353
- ## API Reference
354
-
355
- ### `generate_image` Tool
449
+ **Custom Aspect Ratio:**
450
+ ```
451
+ "Generate a cinematic landscape of a desert at golden hour"
452
+ (with aspectRatio: "21:9")
453
+ ```
454
+
455
+ ### Grouped Multi-Image Generation
456
+
457
+ For a grouped multi-image task, prefer `generate_multi_image` instead of repeating `generate_image`.
458
+
459
+ **Natural-language grouped request:**
460
+ ```text
461
+ "Use generate_multi_image to create 4 unified e-commerce product images of the same minimalist white thermos cup: hero shot, side-detail shot, handheld lifestyle shot, and desk scene. Return 4 separate images in one run."
462
+ ```
463
+
464
+ **Structured grouped request with inferred numbering:**
465
+ ```json
466
+ {
467
+ "prompt": "同一款极简白色保温杯,整体风格统一,高级感、干净、真实、适合品牌官网和详情页使用。",
468
+ "outputCount": 4,
469
+ "provider": "volcengine"
470
+ }
471
+ ```
472
+
473
+ **Structured grouped request with explicit per-image prompts:**
474
+ ```json
475
+ {
476
+ "prompt": "同一款极简白色保温杯,整体风格统一,高级感、干净、真实、适合品牌官网和详情页使用。",
477
+ "provider": "volcengine",
478
+ "imageRequests": [
479
+ "电商主图,白底,正面展示产品",
480
+ "侧面细节图,突出杯盖和材质纹理",
481
+ "手持使用场景图,突出尺寸感",
482
+ "办公桌场景图,氛围高级"
483
+ ]
484
+ }
485
+ ```
486
+
487
+ ## API Reference
488
+
489
+ ### `generate_image` Tool
356
490
 
357
491
  The server uses a two-stage process with separate models for each stage:
358
492
  1. **Prompt Optimization** (Gemini 2.5 Flash): Refines your prompt using the Subject–Context–Style framework. Skippable via `SKIP_PROMPT_ENHANCEMENT`.
359
- 2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
493
+ 2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
494
+
495
+ Use `generate_image` as the default tool for single-image generation and image editing. If the user wants multiple images in one grouped request, prefer `generate_multi_image`.
360
496
 
361
497
  #### Parameters
362
498
 
363
499
  | Parameter | Type | Required | Description |
364
500
  |-----------|------|----------|-------------|
365
501
  | `prompt` | string | Yes | Text description or editing instruction |
502
+ | `provider` | string | - | Optional provider override: `gemini` or `volcengine`. Defaults to `IMAGE_PROVIDER`. If omitted, the server can still infer the provider from explicit instructions such as "use Volcengine" / "用火山引擎" / "use Gemini" |
366
503
  | `quality` | string | - | Quality preset: `fast` (default), `balanced`, `quality`. Overrides `IMAGE_QUALITY` env var for this request |
367
- | `inputImagePath` | string | - | Absolute path to input image for image-to-image editing |
368
- | `inputImage` | string | - | Base64 encoded image data for image-to-image editing. Alternative to `inputImagePath` |
369
- | `inputImageMimeType` | string | - | MIME type of the input image (`image/jpeg`, `image/png`, `image/webp`, `image/gif`, `image/bmp`). Used with `inputImage` |
370
- | `inputImages` | array | - | Multiple input images for multi-image composition. Each item: `{ data: string, mimeType: string }`. Cannot be used with `inputImage`/`inputImagePath`/`inputImagePaths` |
371
- | `inputImagePaths` | array | - | Multiple input image file paths for multi-image composition. Each item is an absolute path string. Cannot be used with other image input params |
504
+ | `outputFormat` | string | - | Output image format if supported by the provider. Some provider endpoints may ignore or reject format overrides |
505
+ | `outputCount` | integer | - | Backward-compatible grouped output count for `generate_image`. For new multi-image requests, prefer `generate_multi_image` |
506
+ | `inputImagePath` | string | - | Absolute path to input image for image-to-image editing. Supported by Gemini and by Volcengine reference-image workflows |
507
+ | `inputImage` | string | - | Base64 encoded image data for image-to-image editing. Gemini accepts raw base64; Volcengine sends it as `data:image/<format>;base64,<data>` and uses `inputImageMimeType` to build the official request format |
508
+ | `inputImageMimeType` | string | - | MIME type of the input image (`image/jpeg`, `image/png`, `image/webp`, `image/gif`, `image/bmp`). Required for correct Volcengine Data URL formatting when `inputImage` is provided |
509
+ | `inputImages` | array | - | Multiple input images for multi-image composition. Each item uses `{ data, mimeType }`; Volcengine converts them to `data:image/<format>;base64,<data>` entries in the `image` array |
510
+ | `inputImagePaths` | array | - | Multiple input image file paths for multi-image composition. Supported by Gemini and by Volcengine when mapped to reference-image arrays |
372
511
  | `returnBase64` | boolean | - | Return the generated image as base64 data in the response. Image is always saved to disk regardless |
373
512
  | `fileName` | string | - | Custom filename for output (auto-generated if not specified). Extension is auto-appended based on output format if omitted |
374
513
  | `skipPromptEnhancement` | boolean | - | Skip prompt enhancement and use the prompt as-is. Recommended for multi-image blending. Overrides `SKIP_PROMPT_ENHANCEMENT` env var. Default: `false` |
375
- | `aspectRatio` | string | - | `1:1` (default), `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:4`, `1:8`, `4:1`, `8:1` |
376
- | `imageSize` | string | - | `1K`, `2K`, `4K`. Leave unspecified for standard quality |
514
+ | `aspectRatio` | string | - | `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:4`, `1:8`, `4:1`, `8:1`. When omitted, Gemini and Volcengine default to `16:9` |
515
+ | `imageRequests` | array | - | Backward-compatible per-image prompts for `generate_image`. For new grouped multi-image requests, prefer `generate_multi_image` |
516
+ | `imageSize` | string | - | `1K`, `2K`, `4K`. When omitted, Gemini and Volcengine default to `4K`. Volcengine further normalizes the final `WxH` into the provider legal pixel range |
377
517
  | `blendImages` | boolean | - | Enable multi-image blending for combining multiple visual elements naturally |
378
518
  | `maintainCharacterConsistency` | boolean | - | Maintain character appearance consistency across different poses and scenes |
379
519
  | `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (historical figures, landmarks, factual scenarios) |
380
520
  | `useGoogleSearch` | boolean | - | Enable Google Search grounding for real-time factual accuracy |
381
- | `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
521
+ | `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
522
+
523
+ ### `generate_multi_image` Tool
524
+
525
+ Use `generate_multi_image` for grouped multi-image generation in a single tool call. This is the preferred entry for Notebook planners that might otherwise split one user request into multiple `generate_image` calls.
526
+
527
+ #### Recommended patterns
528
+
529
+ - Use `outputCount` when the user wants multiple images with shared overall constraints.
530
+ - Use `imageRequests` when the user wants several distinct images in one grouped request.
531
+ - If `outputCount` is omitted, the server will try to infer it from phrases such as `4张图`, `四张海报`, or `4 images`.
532
+
533
+ #### Example
534
+
535
+ ```json
536
+ {
537
+ "prompt": "请生成4张统一风格的电商产品图,主题都是同一款极简白色保温杯,分别覆盖主图、侧面细节、手持使用场景、办公桌场景。要求返回4张独立图片,不要拆成多次生成。",
538
+ "provider": "volcengine"
539
+ }
540
+ ```
382
541
 
383
542
  #### Response
384
543
 
@@ -3,22 +3,10 @@
3
3
  * Integrates with Google's Gemini AI API using the official SDK
4
4
  * Supports automatic URL Context processing and feature parameters
5
5
  */
6
- import type { ImageQuality } from '../types/mcp.js';
6
+ import type { GeneratedImageResult, ImageProviderClient } from './imageProvider.js';
7
7
  import type { Result } from '../types/result.js';
8
8
  import type { Config } from '../utils/config.js';
9
9
  import { GeminiAPIError, NetworkError } from '../utils/errors.js';
10
- /**
11
- * Metadata for generated images
12
- */
13
- export interface GeminiGenerationMetadata {
14
- model: string;
15
- prompt: string;
16
- mimeType: string;
17
- timestamp: Date;
18
- inputImageProvided: boolean;
19
- modelVersion?: string;
20
- responseId?: string;
21
- }
22
10
  /**
23
11
  * Parameters for Gemini API image generation
24
12
  */
@@ -33,25 +21,10 @@ export interface GeminiApiParams {
33
21
  aspectRatio?: string;
34
22
  imageSize?: string;
35
23
  useGoogleSearch?: boolean;
36
- quality?: ImageQuality;
24
+ quality?: 'fast' | 'balanced' | 'quality';
37
25
  }
38
- /**
39
- * Result of image generation
40
- */
41
- export interface GeneratedImageResult {
42
- imageData: Buffer;
43
- metadata: GeminiGenerationMetadata;
44
- }
45
- /**
46
- * Gemini API client interface
47
- */
48
- export interface GeminiClient {
26
+ export interface GeminiClient extends ImageProviderClient {
49
27
  generateImage(params: GeminiApiParams): Promise<Result<GeneratedImageResult, GeminiAPIError | NetworkError>>;
50
28
  }
51
- /**
52
- * Creates a new Gemini API client
53
- * @param config Configuration containing API key and other settings
54
- * @returns Result containing the client or an error
55
- */
56
29
  export declare function createGeminiClient(config: Config): Result<GeminiClient, GeminiAPIError>;
57
30
  //# sourceMappingURL=geminiClient.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,iBAAiB,CAAA;AAEnD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAChD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAuHjE;;GAEG;AACH,MAAM,WAAW,wBAAwB;IACvC,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,EAAE,MAAM,CAAA;IACd,QAAQ,EAAE,MAAM,CAAA;IAChB,SAAS,EAAE,IAAI,CAAA;IACf,kBAAkB,EAAE,OAAO,CAAA;IAE3B,YAAY,CAAC,EAAE,MAAM,CAAA;IACrB,UAAU,CAAC,EAAE,MAAM,CAAA;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,WAAW,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACvD,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,OAAO,CAAC,EAAE,YAAY,CAAA;CACvB;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,MAAM,CAAA;IACjB,QAAQ,EAAE,wBAAwB,CAAA;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AA6VD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAmDvF"}
1
+ {"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAA0B,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AAE3G,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAChD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAkHjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,WAAW,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACvD,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,OAAO,CAAC,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAC1C;AAED,MAAM,WAAW,YAAa,SAAQ,mBAAmB;IACvD,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AA6TD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAiDvF"}