mcp-hydrocoder-image 1.1.1 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +239 -80
- package/dist/api/geminiClient.d.ts +3 -30
- package/dist/api/geminiClient.d.ts.map +1 -1
- package/dist/api/geminiClient.js +13 -57
- package/dist/api/geminiClient.js.map +1 -1
- package/dist/api/geminiTextClient.js +1 -1
- package/dist/api/geminiTextClient.js.map +1 -1
- package/dist/api/imageProvider.d.ts +29 -0
- package/dist/api/imageProvider.d.ts.map +1 -0
- package/dist/api/imageProvider.js +5 -0
- package/dist/api/imageProvider.js.map +1 -0
- package/dist/api/volcengineClient.d.ts +13 -0
- package/dist/api/volcengineClient.d.ts.map +1 -0
- package/dist/api/volcengineClient.js +288 -0
- package/dist/api/volcengineClient.js.map +1 -0
- package/dist/business/inputValidator.d.ts.map +1 -1
- package/dist/business/inputValidator.js +69 -1
- package/dist/business/inputValidator.js.map +1 -1
- package/dist/business/multiImagePrompt.d.ts +13 -0
- package/dist/business/multiImagePrompt.d.ts.map +1 -0
- package/dist/business/multiImagePrompt.js +135 -0
- package/dist/business/multiImagePrompt.js.map +1 -0
- package/dist/business/providerResolver.d.ts +4 -0
- package/dist/business/providerResolver.d.ts.map +1 -0
- package/dist/business/providerResolver.js +45 -0
- package/dist/business/providerResolver.js.map +1 -0
- package/dist/business/responseBuilder.d.ts +3 -2
- package/dist/business/responseBuilder.d.ts.map +1 -1
- package/dist/business/responseBuilder.js +69 -48
- package/dist/business/responseBuilder.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/server/errorHandler.d.ts.map +1 -1
- package/dist/server/errorHandler.js +3 -2
- package/dist/server/errorHandler.js.map +1 -1
- package/dist/server/mcpServer.d.ts +90 -29
- package/dist/server/mcpServer.d.ts.map +1 -1
- package/dist/server/mcpServer.js +430 -288
- package/dist/server/mcpServer.js.map +1 -1
- package/dist/types/mcp.d.ts +62 -15
- package/dist/types/mcp.d.ts.map +1 -1
- package/dist/types/mcp.js +15 -0
- package/dist/types/mcp.js.map +1 -1
- package/dist/utils/config.d.ts +6 -2
- package/dist/utils/config.d.ts.map +1 -1
- package/dist/utils/config.js +43 -13
- package/dist/utils/config.js.map +1 -1
- package/dist/utils/errors.d.ts +9 -0
- package/dist/utils/errors.d.ts.map +1 -1
- package/dist/utils/errors.js +50 -1
- package/dist/utils/errors.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# MCP HydroCoder Image 🍌
|
|
2
2
|
|
|
3
|
-
> AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by
|
|
3
|
+
> AI image generation and editing MCP server for Cursor, Claude Code, Codex, and any MCP-compatible tool — powered by Volcengine Seedream and Google Gemini.
|
|
4
4
|
|
|
5
5
|
[](https://www.npmjs.com/package/mcp-hydrocoder-image)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
@@ -36,6 +36,10 @@ The prompt optimizer uses a **Subject–Context–Style** framework (powered by
|
|
|
36
36
|
|
|
37
37
|
## Features
|
|
38
38
|
|
|
39
|
+
- **Multi-Provider Support**: Use either Volcengine Seedream or Google Gemini behind the same MCP server, with provider selection via config or per-request override.
|
|
40
|
+
- **Provider-Aware Routing**: If the caller explicitly asks for Volcengine / Seedream / 豆包 / 火山引擎 or Gemini / Nano Banana, the server metadata now nudges agents to pass the correct `provider`, and the server can infer the provider from explicit prompt instructions when the parameter is omitted.
|
|
41
|
+
- **Volcengine-First Production Path**: README and install examples now default to Volcengine because it is the more stable day-to-day path for text-to-image and grouped output in this project.
|
|
42
|
+
- **Dedicated Multi-Image Tooling**: Use `generate_image` for single-image work and `generate_multi_image` for grouped multi-image output with server-side fan-out and prompt normalization.
|
|
39
43
|
- **Built-in Prompt Optimization**: Your simple prompt is automatically enriched with photographic and artistic details — lighting, composition, atmosphere — using Gemini 2.5 Flash. No prompt engineering skills required.
|
|
40
44
|
- **Three Quality Tiers**: Choose between fast iteration, balanced quality, or maximum fidelity with Nano Banana 2 (Gemini 3.1 Flash Image) and Nano Banana Pro (Gemini 3 Pro Image). [See Quality Presets](#quality-presets).
|
|
41
45
|
- **Image Editing**: Transform existing images with natural language instructions (image-to-image) while preserving original style and visual consistency.
|
|
@@ -81,24 +85,30 @@ npx mcp-hydrocoder-image skills install --path ~/.claude/skills
|
|
|
81
85
|
| | MCP Server | Agent Skill |
|
|
82
86
|
|---|---|---|
|
|
83
87
|
| **Use when** | Your AI tool does not have built-in image generation | Your AI tool already generates images natively |
|
|
84
|
-
| **Requires** | Gemini API key | Nothing |
|
|
85
|
-
| **What it does** | Generates images via Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
|
|
88
|
+
| **Requires** | Volcengine or Gemini API key | Nothing |
|
|
89
|
+
| **What it does** | Generates images via Volcengine or Gemini API with automatic prompt optimization | Teaches the AI to write better prompts |
|
|
86
90
|
| **Works with** | MCP-compatible tools (Cursor, Claude Code, Codex, etc.) | Any tool supporting the [Agent Skills](https://agentskills.io) open standard |
|
|
87
91
|
|
|
88
92
|
---
|
|
89
93
|
|
|
90
94
|
## Prerequisites
|
|
91
95
|
|
|
92
|
-
- **Node.js** 20 or higher
|
|
93
|
-
- **
|
|
94
|
-
-
|
|
95
|
-
-
|
|
96
|
+
- **Node.js** 20 or higher
|
|
97
|
+
- **API Key**
|
|
98
|
+
- Volcengine: create an Ark API key in the Volcengine console
|
|
99
|
+
- Gemini: get yours at [Google AI Studio](https://aistudio.google.com/apikey)
|
|
100
|
+
- An MCP-compatible AI tool: **Cursor**, **Claude Code**, **Codex**, or others
|
|
101
|
+
- Basic terminal/command line knowledge
|
|
96
102
|
|
|
97
103
|
## Quick Start
|
|
98
104
|
|
|
99
|
-
### 1. Get Your
|
|
105
|
+
### 1. Get Your API Key
|
|
100
106
|
|
|
101
|
-
|
|
107
|
+
Choose a provider and create an API key:
|
|
108
|
+
- **Volcengine**: Volcengine Ark console (`ARK_API_KEY`-compatible key)
|
|
109
|
+
- **Gemini**: [Google AI Studio](https://aistudio.google.com/apikey)
|
|
110
|
+
|
|
111
|
+
> **Recommended default**: Start with Volcengine. In this project it is the preferred production path, especially for single-image generation, grouped multi-image workflows, and predictable output sizing.
|
|
102
112
|
|
|
103
113
|
### 2. MCP Configuration
|
|
104
114
|
|
|
@@ -111,11 +121,14 @@ Add to `~/.codex/config.toml`:
|
|
|
111
121
|
command = "npx"
|
|
112
122
|
args = ["-y", "mcp-hydrocoder-image"]
|
|
113
123
|
|
|
114
|
-
[mcp_servers.mcp-hydrocoder-image.env]
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
124
|
+
[mcp_servers.mcp-hydrocoder-image.env]
|
|
125
|
+
IMAGE_PROVIDER = "volcengine"
|
|
126
|
+
VOLCENGINE_API_KEY = "your_volcengine_api_key_here"
|
|
127
|
+
VOLCENGINE_MODEL = "doubao-seedream-4-5-251128"
|
|
128
|
+
VOLCENGINE_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
|
|
129
|
+
IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
|
|
130
|
+
API_TIMEOUT = "120000" # Optional: timeout in milliseconds (default: 120s)
|
|
131
|
+
```
|
|
119
132
|
|
|
120
133
|
#### For Cursor
|
|
121
134
|
|
|
@@ -128,13 +141,16 @@ Add to your Cursor settings:
|
|
|
128
141
|
{
|
|
129
142
|
"mcpServers": {
|
|
130
143
|
"mcp-hydrocoder-image": {
|
|
131
|
-
"command": "npx",
|
|
132
|
-
"args": ["-y", "mcp-hydrocoder-image"],
|
|
133
|
-
"env": {
|
|
134
|
-
"
|
|
135
|
-
"
|
|
136
|
-
"
|
|
137
|
-
|
|
144
|
+
"command": "npx",
|
|
145
|
+
"args": ["-y", "mcp-hydrocoder-image"],
|
|
146
|
+
"env": {
|
|
147
|
+
"IMAGE_PROVIDER": "volcengine",
|
|
148
|
+
"VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
|
|
149
|
+
"VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
|
|
150
|
+
"VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
151
|
+
"IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
|
|
152
|
+
"API_TIMEOUT": "120000"
|
|
153
|
+
}
|
|
138
154
|
}
|
|
139
155
|
}
|
|
140
156
|
}
|
|
@@ -145,13 +161,16 @@ Add to your Cursor settings:
|
|
|
145
161
|
{
|
|
146
162
|
"mcpServers": {
|
|
147
163
|
"mcp-hydrocoder-image": {
|
|
148
|
-
"command": "npx",
|
|
149
|
-
"args": ["-y", "mcp-hydrocoder-image"],
|
|
150
|
-
"env": {
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
|
|
164
|
+
"command": "npx",
|
|
165
|
+
"args": ["-y", "mcp-hydrocoder-image"],
|
|
166
|
+
"env": {
|
|
167
|
+
"IMAGE_PROVIDER": "volcengine",
|
|
168
|
+
"VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
|
|
169
|
+
"VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
|
|
170
|
+
"VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
171
|
+
"IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
|
|
172
|
+
"API_TIMEOUT": "120000"
|
|
173
|
+
}
|
|
155
174
|
}
|
|
156
175
|
}
|
|
157
176
|
}
|
|
@@ -163,21 +182,27 @@ Run in your project directory to enable for that project:
|
|
|
163
182
|
|
|
164
183
|
```bash
|
|
165
184
|
cd /path/to/your/project
|
|
166
|
-
claude mcp add mcp-hydrocoder-image \
|
|
167
|
-
--env
|
|
168
|
-
--env
|
|
169
|
-
--env
|
|
170
|
-
--
|
|
185
|
+
claude mcp add mcp-hydrocoder-image \
|
|
186
|
+
--env IMAGE_PROVIDER=volcengine \
|
|
187
|
+
--env VOLCENGINE_API_KEY=your-volcengine-api-key \
|
|
188
|
+
--env VOLCENGINE_MODEL=doubao-seedream-4-5-251128 \
|
|
189
|
+
--env VOLCENGINE_API_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \
|
|
190
|
+
--env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
|
|
191
|
+
--env API_TIMEOUT=120000 \
|
|
192
|
+
-- npx -y mcp-hydrocoder-image
|
|
171
193
|
```
|
|
172
194
|
|
|
173
195
|
Or add globally for all projects:
|
|
174
196
|
|
|
175
197
|
```bash
|
|
176
|
-
claude mcp add mcp-hydrocoder-image --scope user \
|
|
177
|
-
--env
|
|
178
|
-
--env
|
|
179
|
-
--env
|
|
180
|
-
--
|
|
198
|
+
claude mcp add mcp-hydrocoder-image --scope user \
|
|
199
|
+
--env IMAGE_PROVIDER=volcengine \
|
|
200
|
+
--env VOLCENGINE_API_KEY=your-volcengine-api-key \
|
|
201
|
+
--env VOLCENGINE_MODEL=doubao-seedream-4-5-251128 \
|
|
202
|
+
--env VOLCENGINE_API_BASE_URL=https://ark.cn-beijing.volces.com/api/v3 \
|
|
203
|
+
--env IMAGE_OUTPUT_DIR=/absolute/path/to/images \
|
|
204
|
+
--env API_TIMEOUT=120000 \
|
|
205
|
+
-- npx -y mcp-hydrocoder-image
|
|
181
206
|
```
|
|
182
207
|
|
|
183
208
|
Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for project):
|
|
@@ -187,13 +212,16 @@ Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for pr
|
|
|
187
212
|
{
|
|
188
213
|
"mcpServers": {
|
|
189
214
|
"mcp-hydrocoder-image": {
|
|
190
|
-
"command": "npx",
|
|
191
|
-
"args": ["-y", "mcp-hydrocoder-image"],
|
|
192
|
-
"env": {
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
|
|
215
|
+
"command": "npx",
|
|
216
|
+
"args": ["-y", "mcp-hydrocoder-image"],
|
|
217
|
+
"env": {
|
|
218
|
+
"IMAGE_PROVIDER": "volcengine",
|
|
219
|
+
"VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
|
|
220
|
+
"VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
|
|
221
|
+
"VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
222
|
+
"IMAGE_OUTPUT_DIR": "/absolute/path/to/images",
|
|
223
|
+
"API_TIMEOUT": "120000"
|
|
224
|
+
}
|
|
197
225
|
}
|
|
198
226
|
}
|
|
199
227
|
}
|
|
@@ -204,26 +232,88 @@ Or add via JSON config (`~/.claude/settings.json` for global, `.mcp.json` for pr
|
|
|
204
232
|
{
|
|
205
233
|
"mcpServers": {
|
|
206
234
|
"mcp-hydrocoder-image": {
|
|
207
|
-
"command": "npx",
|
|
208
|
-
"args": ["-y", "mcp-hydrocoder-image"],
|
|
209
|
-
"env": {
|
|
210
|
-
"
|
|
211
|
-
"
|
|
212
|
-
"
|
|
213
|
-
|
|
235
|
+
"command": "npx",
|
|
236
|
+
"args": ["-y", "mcp-hydrocoder-image"],
|
|
237
|
+
"env": {
|
|
238
|
+
"IMAGE_PROVIDER": "volcengine",
|
|
239
|
+
"VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
|
|
240
|
+
"VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
|
|
241
|
+
"VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
242
|
+
"IMAGE_OUTPUT_DIR": "C:\\absolute\\path\\to\\images",
|
|
243
|
+
"API_TIMEOUT": "120000"
|
|
244
|
+
}
|
|
214
245
|
}
|
|
215
246
|
}
|
|
216
247
|
}
|
|
217
248
|
```
|
|
218
249
|
|
|
219
|
-
⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
|
|
220
|
-
|
|
221
|
-
📁 **Path Requirements**:
|
|
222
|
-
- `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
|
|
223
|
-
- Defaults to `./output` in the current working directory if not specified
|
|
224
|
-
- Directory will be created automatically if it doesn't exist
|
|
225
|
-
|
|
226
|
-
####
|
|
250
|
+
⚠️ **Security Note**: Never commit your API key to version control. Keep it secure and use environment-specific configuration.
|
|
251
|
+
|
|
252
|
+
📁 **Path Requirements**:
|
|
253
|
+
- `IMAGE_OUTPUT_DIR` must be an absolute path (e.g., `/Users/username/images`, not `./images`)
|
|
254
|
+
- Defaults to `./output` in the current working directory if not specified
|
|
255
|
+
- Directory will be created automatically if it doesn't exist
|
|
256
|
+
|
|
257
|
+
#### Volcengine Local MCP Example
|
|
258
|
+
|
|
259
|
+
For local MCP testing, build the project first and point your MCP client at the local `dist/index.js` entry instead of `npx`.
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
npm install
|
|
263
|
+
npm run build
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**Codex (`~/.codex/config.toml` on Windows):**
|
|
267
|
+
```toml
|
|
268
|
+
[mcp_servers.mcp-hydrocoder-image-local]
|
|
269
|
+
command = "node"
|
|
270
|
+
args = ["C:\\workspace\\develop\\ccExtensions\\mcpBanana\\dist\\index.js"]
|
|
271
|
+
|
|
272
|
+
[mcp_servers.mcp-hydrocoder-image-local.env]
|
|
273
|
+
IMAGE_PROVIDER = "volcengine"
|
|
274
|
+
VOLCENGINE_API_KEY = "your_volcengine_api_key_here"
|
|
275
|
+
VOLCENGINE_MODEL = "doubao-seedream-4-5-251128"
|
|
276
|
+
VOLCENGINE_API_BASE_URL = "https://ark.cn-beijing.volces.com/api/v3"
|
|
277
|
+
IMAGE_OUTPUT_DIR = "C:\\workspace\\develop\\ccExtensions\\mcpBanana\\output"
|
|
278
|
+
API_TIMEOUT = "120000"
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
**Cursor / Claude Code JSON config (Windows local repo example):**
|
|
282
|
+
```json
|
|
283
|
+
{
|
|
284
|
+
"mcpServers": {
|
|
285
|
+
"mcp-hydrocoder-image-local": {
|
|
286
|
+
"command": "node",
|
|
287
|
+
"args": [
|
|
288
|
+
"C:\\workspace\\develop\\ccExtensions\\mcpBanana\\dist\\index.js"
|
|
289
|
+
],
|
|
290
|
+
"env": {
|
|
291
|
+
"IMAGE_PROVIDER": "volcengine",
|
|
292
|
+
"VOLCENGINE_API_KEY": "your_volcengine_api_key_here",
|
|
293
|
+
"VOLCENGINE_MODEL": "doubao-seedream-4-5-251128",
|
|
294
|
+
"VOLCENGINE_API_BASE_URL": "https://ark.cn-beijing.volces.com/api/v3",
|
|
295
|
+
"IMAGE_OUTPUT_DIR": "C:\\workspace\\develop\\ccExtensions\\mcpBanana\\output",
|
|
296
|
+
"API_TIMEOUT": "120000"
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
If you move the repo, update both the `args` path and `IMAGE_OUTPUT_DIR` to the new absolute path.
|
|
304
|
+
|
|
305
|
+
#### Gemini Example
|
|
306
|
+
|
|
307
|
+
If you prefer Gemini, switch `IMAGE_PROVIDER` to `gemini` and replace the Volcengine variables with `GEMINI_API_KEY`. Gemini remains supported, but the README now treats it as the optional path rather than the default install path.
|
|
308
|
+
|
|
309
|
+
For standard direct access to the official Gemini endpoint, you only need:
|
|
310
|
+
- `IMAGE_PROVIDER=gemini`
|
|
311
|
+
- `GEMINI_API_KEY=your_gemini_api_key_here`
|
|
312
|
+
- `IMAGE_OUTPUT_DIR=/absolute/path/to/images`
|
|
313
|
+
|
|
314
|
+
`GEMINI_API_BASE_URL` is optional. Add it only when you use a third-party proxy, relay, or custom compatible gateway.
|
|
315
|
+
|
|
316
|
+
#### Gemini Custom API Base URL (Third-party Proxy)
|
|
227
317
|
|
|
228
318
|
To use a third-party API endpoint or proxy, add the `GEMINI_API_BASE_URL` environment variable:
|
|
229
319
|
|
|
@@ -263,7 +353,19 @@ IMAGE_OUTPUT_DIR = "/absolute/path/to/images"
|
|
|
263
353
|
|
|
264
354
|
> **Note**: The base URL should be the root domain (e.g., `https://llm.myseek.fun`), without the `/v1` suffix — the SDK will append the API version automatically.
|
|
265
355
|
|
|
266
|
-
|
|
356
|
+
### Volcengine Notes
|
|
357
|
+
|
|
358
|
+
Volcengine is the recommended default provider in this project.
|
|
359
|
+
|
|
360
|
+
Current implementation status and behavior:
|
|
361
|
+
- Stable path: text-to-image
|
|
362
|
+
- Reference-image workflows are wired through the OpenAI-compatible image API using the `image` field
|
|
363
|
+
- Base64 image inputs are normalized to the official Volcengine format: `data:image/<format>;base64,<Base64编码>`
|
|
364
|
+
- Grouped output (`outputCount`) is best-effort and depends on provider-side behavior
|
|
365
|
+
- If the user does not specify `aspectRatio` or `imageSize`, Gemini and Volcengine default to `16:9` and `4K`
|
|
366
|
+
- If the user specifies `aspectRatio` and/or `imageSize`, the server automatically normalizes the final `WxH` into Volcengine's legal pixel range
|
|
367
|
+
- When the user provides local image paths, they should be passed through `inputImagePath` / `inputImagePaths` instead of being summarized into the prompt
|
|
368
|
+
|
|
267
369
|
|
|
268
370
|
Choose the right balance of speed, quality, and cost:
|
|
269
371
|
|
|
@@ -330,7 +432,7 @@ Your prompt is automatically enhanced with rich details about lighting, material
|
|
|
330
432
|
(with inputImagePath: "/path/to/image.jpg")
|
|
331
433
|
```
|
|
332
434
|
|
|
333
|
-
### Advanced Features
|
|
435
|
+
### Advanced Features
|
|
334
436
|
|
|
335
437
|
**Character Consistency:**
|
|
336
438
|
```
|
|
@@ -344,41 +446,98 @@ Your prompt is automatically enhanced with rich details about lighting, material
|
|
|
344
446
|
(with imageSize: "4K")
|
|
345
447
|
```
|
|
346
448
|
|
|
347
|
-
**Custom Aspect Ratio:**
|
|
348
|
-
```
|
|
349
|
-
"Generate a cinematic landscape of a desert at golden hour"
|
|
350
|
-
(with aspectRatio: "21:9")
|
|
351
|
-
```
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
449
|
+
**Custom Aspect Ratio:**
|
|
450
|
+
```
|
|
451
|
+
"Generate a cinematic landscape of a desert at golden hour"
|
|
452
|
+
(with aspectRatio: "21:9")
|
|
453
|
+
```
|
|
454
|
+
|
|
455
|
+
### Grouped Multi-Image Generation
|
|
456
|
+
|
|
457
|
+
For a grouped multi-image task, prefer `generate_multi_image` instead of repeating `generate_image`.
|
|
458
|
+
|
|
459
|
+
**Natural-language grouped request:**
|
|
460
|
+
```text
|
|
461
|
+
"Use generate_multi_image to create 4 unified e-commerce product images of the same minimalist white thermos cup: hero shot, side-detail shot, handheld lifestyle shot, and desk scene. Return 4 separate images in one run."
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
**Structured grouped request with inferred numbering:**
|
|
465
|
+
```json
|
|
466
|
+
{
|
|
467
|
+
"prompt": "同一款极简白色保温杯,整体风格统一,高级感、干净、真实、适合品牌官网和详情页使用。",
|
|
468
|
+
"outputCount": 4,
|
|
469
|
+
"provider": "volcengine"
|
|
470
|
+
}
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
**Structured grouped request with explicit per-image prompts:**
|
|
474
|
+
```json
|
|
475
|
+
{
|
|
476
|
+
"prompt": "同一款极简白色保温杯,整体风格统一,高级感、干净、真实、适合品牌官网和详情页使用。",
|
|
477
|
+
"provider": "volcengine",
|
|
478
|
+
"imageRequests": [
|
|
479
|
+
"电商主图,白底,正面展示产品",
|
|
480
|
+
"侧面细节图,突出杯盖和材质纹理",
|
|
481
|
+
"手持使用场景图,突出尺寸感",
|
|
482
|
+
"办公桌场景图,氛围高级"
|
|
483
|
+
]
|
|
484
|
+
}
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
## API Reference
|
|
488
|
+
|
|
489
|
+
### `generate_image` Tool
|
|
356
490
|
|
|
357
491
|
The server uses a two-stage process with separate models for each stage:
|
|
358
492
|
1. **Prompt Optimization** (Gemini 2.5 Flash): Refines your prompt using the Subject–Context–Style framework. Skippable via `SKIP_PROMPT_ENHANCEMENT`.
|
|
359
|
-
2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
|
|
493
|
+
2. **Image Generation** (Nano Banana 2 or Pro): Creates the final image. Model varies by quality preset.
|
|
494
|
+
|
|
495
|
+
Use `generate_image` as the default tool for single-image generation and image editing. If the user wants multiple images in one grouped request, prefer `generate_multi_image`.
|
|
360
496
|
|
|
361
497
|
#### Parameters
|
|
362
498
|
|
|
363
499
|
| Parameter | Type | Required | Description |
|
|
364
500
|
|-----------|------|----------|-------------|
|
|
365
501
|
| `prompt` | string | Yes | Text description or editing instruction |
|
|
502
|
+
| `provider` | string | - | Optional provider override: `gemini` or `volcengine`. Defaults to `IMAGE_PROVIDER`. If omitted, the server can still infer the provider from explicit instructions such as "use Volcengine" / "用火山引擎" / "use Gemini" |
|
|
366
503
|
| `quality` | string | - | Quality preset: `fast` (default), `balanced`, `quality`. Overrides `IMAGE_QUALITY` env var for this request |
|
|
367
|
-
| `
|
|
368
|
-
| `
|
|
369
|
-
| `
|
|
370
|
-
| `
|
|
371
|
-
| `
|
|
504
|
+
| `outputFormat` | string | - | Output image format if supported by the provider. Some provider endpoints may ignore or reject format overrides |
|
|
505
|
+
| `outputCount` | integer | - | Backward-compatible grouped output count for `generate_image`. For new multi-image requests, prefer `generate_multi_image` |
|
|
506
|
+
| `inputImagePath` | string | - | Absolute path to input image for image-to-image editing. Supported by Gemini and by Volcengine reference-image workflows |
|
|
507
|
+
| `inputImage` | string | - | Base64 encoded image data for image-to-image editing. Gemini accepts raw base64; Volcengine sends it as `data:image/<format>;base64,<data>` and uses `inputImageMimeType` to build the official request format |
|
|
508
|
+
| `inputImageMimeType` | string | - | MIME type of the input image (`image/jpeg`, `image/png`, `image/webp`, `image/gif`, `image/bmp`). Required for correct Volcengine Data URL formatting when `inputImage` is provided |
|
|
509
|
+
| `inputImages` | array | - | Multiple input images for multi-image composition. Each item uses `{ data, mimeType }`; Volcengine converts them to `data:image/<format>;base64,<data>` entries in the `image` array |
|
|
510
|
+
| `inputImagePaths` | array | - | Multiple input image file paths for multi-image composition. Supported by Gemini and by Volcengine when mapped to reference-image arrays |
|
|
372
511
|
| `returnBase64` | boolean | - | Return the generated image as base64 data in the response. Image is always saved to disk regardless |
|
|
373
512
|
| `fileName` | string | - | Custom filename for output (auto-generated if not specified). Extension is auto-appended based on output format if omitted |
|
|
374
513
|
| `skipPromptEnhancement` | boolean | - | Skip prompt enhancement and use the prompt as-is. Recommended for multi-image blending. Overrides `SKIP_PROMPT_ENHANCEMENT` env var. Default: `false` |
|
|
375
|
-
| `aspectRatio` | string | - | `1:1
|
|
376
|
-
| `
|
|
514
|
+
| `aspectRatio` | string | - | `1:1`, `2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `9:16`, `16:9`, `21:9`, `1:4`, `1:8`, `4:1`, `8:1`. When omitted, Gemini and Volcengine default to `16:9` |
|
|
515
|
+
| `imageRequests` | array | - | Backward-compatible per-image prompts for `generate_image`. For new grouped multi-image requests, prefer `generate_multi_image` |
|
|
516
|
+
| `imageSize` | string | - | `1K`, `2K`, `4K`. When omitted, Gemini and Volcengine default to `4K`. Volcengine further normalizes the final `WxH` into the provider legal pixel range |
|
|
377
517
|
| `blendImages` | boolean | - | Enable multi-image blending for combining multiple visual elements naturally |
|
|
378
518
|
| `maintainCharacterConsistency` | boolean | - | Maintain character appearance consistency across different poses and scenes |
|
|
379
519
|
| `useWorldKnowledge` | boolean | - | Use real-world knowledge for accurate context (historical figures, landmarks, factual scenarios) |
|
|
380
520
|
| `useGoogleSearch` | boolean | - | Enable Google Search grounding for real-time factual accuracy |
|
|
381
|
-
| `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
|
|
521
|
+
| `purpose` | string | - | Intended use (e.g., "cookbook cover", "social media post"). Helps tailor visual style and details |
|
|
522
|
+
|
|
523
|
+
### `generate_multi_image` Tool
|
|
524
|
+
|
|
525
|
+
Use `generate_multi_image` for grouped multi-image generation in a single tool call. This is the preferred entry for Notebook planners that might otherwise split one user request into multiple `generate_image` calls.
|
|
526
|
+
|
|
527
|
+
#### Recommended patterns
|
|
528
|
+
|
|
529
|
+
- Use `outputCount` when the user wants multiple images with shared overall constraints.
|
|
530
|
+
- Use `imageRequests` when the user wants several distinct images in one grouped request.
|
|
531
|
+
- If `outputCount` is omitted, the server will try to infer it from phrases such as `4张图`, `四张海报`, or `4 images`.
|
|
532
|
+
|
|
533
|
+
#### Example
|
|
534
|
+
|
|
535
|
+
```json
|
|
536
|
+
{
|
|
537
|
+
"prompt": "请生成4张统一风格的电商产品图,主题都是同一款极简白色保温杯,分别覆盖主图、侧面细节、手持使用场景、办公桌场景。要求返回4张独立图片,不要拆成多次生成。",
|
|
538
|
+
"provider": "volcengine"
|
|
539
|
+
}
|
|
540
|
+
```
|
|
382
541
|
|
|
383
542
|
#### Response
|
|
384
543
|
|
|
@@ -3,22 +3,10 @@
|
|
|
3
3
|
* Integrates with Google's Gemini AI API using the official SDK
|
|
4
4
|
* Supports automatic URL Context processing and feature parameters
|
|
5
5
|
*/
|
|
6
|
-
import type {
|
|
6
|
+
import type { GeneratedImageResult, ImageProviderClient } from './imageProvider.js';
|
|
7
7
|
import type { Result } from '../types/result.js';
|
|
8
8
|
import type { Config } from '../utils/config.js';
|
|
9
9
|
import { GeminiAPIError, NetworkError } from '../utils/errors.js';
|
|
10
|
-
/**
|
|
11
|
-
* Metadata for generated images
|
|
12
|
-
*/
|
|
13
|
-
export interface GeminiGenerationMetadata {
|
|
14
|
-
model: string;
|
|
15
|
-
prompt: string;
|
|
16
|
-
mimeType: string;
|
|
17
|
-
timestamp: Date;
|
|
18
|
-
inputImageProvided: boolean;
|
|
19
|
-
modelVersion?: string;
|
|
20
|
-
responseId?: string;
|
|
21
|
-
}
|
|
22
10
|
/**
|
|
23
11
|
* Parameters for Gemini API image generation
|
|
24
12
|
*/
|
|
@@ -33,25 +21,10 @@ export interface GeminiApiParams {
|
|
|
33
21
|
aspectRatio?: string;
|
|
34
22
|
imageSize?: string;
|
|
35
23
|
useGoogleSearch?: boolean;
|
|
36
|
-
quality?:
|
|
24
|
+
quality?: 'fast' | 'balanced' | 'quality';
|
|
37
25
|
}
|
|
38
|
-
|
|
39
|
-
* Result of image generation
|
|
40
|
-
*/
|
|
41
|
-
export interface GeneratedImageResult {
|
|
42
|
-
imageData: Buffer;
|
|
43
|
-
metadata: GeminiGenerationMetadata;
|
|
44
|
-
}
|
|
45
|
-
/**
|
|
46
|
-
* Gemini API client interface
|
|
47
|
-
*/
|
|
48
|
-
export interface GeminiClient {
|
|
26
|
+
export interface GeminiClient extends ImageProviderClient {
|
|
49
27
|
generateImage(params: GeminiApiParams): Promise<Result<GeneratedImageResult, GeminiAPIError | NetworkError>>;
|
|
50
28
|
}
|
|
51
|
-
/**
|
|
52
|
-
* Creates a new Gemini API client
|
|
53
|
-
* @param config Configuration containing API key and other settings
|
|
54
|
-
* @returns Result containing the client or an error
|
|
55
|
-
*/
|
|
56
29
|
export declare function createGeminiClient(config: Config): Result<GeminiClient, GeminiAPIError>;
|
|
57
30
|
//# sourceMappingURL=geminiClient.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"geminiClient.d.ts","sourceRoot":"","sources":["../../src/api/geminiClient.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAA0B,oBAAoB,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAA;AAE3G,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAEhD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAA;AAChD,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAA;AAkHjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAA;IACd,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,kBAAkB,CAAC,EAAE,MAAM,CAAA;IAC3B,WAAW,CAAC,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,MAAM,CAAA;KAAE,CAAC,CAAA;IACvD,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,SAAS,CAAC,EAAE,MAAM,CAAA;IAClB,eAAe,CAAC,EAAE,OAAO,CAAA;IACzB,OAAO,CAAC,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS,CAAA;CAC1C;AAED,MAAM,WAAW,YAAa,SAAQ,mBAAmB;IACvD,aAAa,CACX,MAAM,EAAE,eAAe,GACtB,OAAO,CAAC,MAAM,CAAC,oBAAoB,EAAE,cAAc,GAAG,YAAY,CAAC,CAAC,CAAA;CACxE;AA6TD,wBAAgB,kBAAkB,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,YAAY,EAAE,cAAc,CAAC,CAiDvF"}
|