mcp-hydrocoder-image 1.1.1 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +239 -80
  2. package/dist/api/geminiClient.d.ts +3 -30
  3. package/dist/api/geminiClient.d.ts.map +1 -1
  4. package/dist/api/geminiClient.js +13 -57
  5. package/dist/api/geminiClient.js.map +1 -1
  6. package/dist/api/geminiTextClient.js +1 -1
  7. package/dist/api/geminiTextClient.js.map +1 -1
  8. package/dist/api/imageProvider.d.ts +29 -0
  9. package/dist/api/imageProvider.d.ts.map +1 -0
  10. package/dist/api/imageProvider.js +5 -0
  11. package/dist/api/imageProvider.js.map +1 -0
  12. package/dist/api/volcengineClient.d.ts +13 -0
  13. package/dist/api/volcengineClient.d.ts.map +1 -0
  14. package/dist/api/volcengineClient.js +288 -0
  15. package/dist/api/volcengineClient.js.map +1 -0
  16. package/dist/business/inputValidator.d.ts.map +1 -1
  17. package/dist/business/inputValidator.js +69 -1
  18. package/dist/business/inputValidator.js.map +1 -1
  19. package/dist/business/multiImagePrompt.d.ts +13 -0
  20. package/dist/business/multiImagePrompt.d.ts.map +1 -0
  21. package/dist/business/multiImagePrompt.js +135 -0
  22. package/dist/business/multiImagePrompt.js.map +1 -0
  23. package/dist/business/providerResolver.d.ts +4 -0
  24. package/dist/business/providerResolver.d.ts.map +1 -0
  25. package/dist/business/providerResolver.js +45 -0
  26. package/dist/business/providerResolver.js.map +1 -0
  27. package/dist/business/responseBuilder.d.ts +3 -2
  28. package/dist/business/responseBuilder.d.ts.map +1 -1
  29. package/dist/business/responseBuilder.js +69 -48
  30. package/dist/business/responseBuilder.js.map +1 -1
  31. package/dist/index.d.ts +1 -1
  32. package/dist/index.d.ts.map +1 -1
  33. package/dist/server/errorHandler.d.ts.map +1 -1
  34. package/dist/server/errorHandler.js +3 -2
  35. package/dist/server/errorHandler.js.map +1 -1
  36. package/dist/server/mcpServer.d.ts +90 -29
  37. package/dist/server/mcpServer.d.ts.map +1 -1
  38. package/dist/server/mcpServer.js +430 -288
  39. package/dist/server/mcpServer.js.map +1 -1
  40. package/dist/types/mcp.d.ts +62 -15
  41. package/dist/types/mcp.d.ts.map +1 -1
  42. package/dist/types/mcp.js +15 -0
  43. package/dist/types/mcp.js.map +1 -1
  44. package/dist/utils/config.d.ts +6 -2
  45. package/dist/utils/config.d.ts.map +1 -1
  46. package/dist/utils/config.js +43 -13
  47. package/dist/utils/config.js.map +1 -1
  48. package/dist/utils/errors.d.ts +9 -0
  49. package/dist/utils/errors.d.ts.map +1 -1
  50. package/dist/utils/errors.js +50 -1
  51. package/dist/utils/errors.js.map +1 -1
  52. package/package.json +2 -1
@@ -1,201 +1,240 @@
1
1
  /**
2
2
  * MCP Server implementation
3
- * Simplified architecture with direct Gemini integration
3
+ * Supports multiple image providers with Gemini prompt enhancement when applicable
4
4
  */
5
5
  import * as fs from 'node:fs/promises';
6
6
  import * as path from 'node:path';
7
7
  import { Server } from '@modelcontextprotocol/sdk/server/index.js';
8
8
  import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
9
- // API clients
10
9
  import { createGeminiClient } from '../api/geminiClient.js';
11
10
  import { createGeminiTextClient } from '../api/geminiTextClient.js';
12
- // Business logic
11
+ import { createVolcengineClient } from '../api/volcengineClient.js';
13
12
  import { createFileManager } from '../business/fileManager.js';
14
- import { validateGenerateImageParams } from '../business/inputValidator.js';
13
+ import { validateGenerateImageParams, validatePrompt } from '../business/inputValidator.js';
14
+ import { buildIndependentImagePrompt, extractExplicitImageRequests, normalizeMultiImageParams, prepareGenerateMultiImageParams, } from '../business/multiImagePrompt.js';
15
+ import { resolveRequestedProvider } from '../business/providerResolver.js';
15
16
  import { createResponseBuilder } from '../business/responseBuilder.js';
16
17
  import { createStructuredPromptGenerator, } from '../business/structuredPromptGenerator.js';
17
- // Utilities
18
18
  import { getConfig } from '../utils/config.js';
19
19
  import { Logger } from '../utils/logger.js';
20
20
  import { SecurityManager } from '../utils/security.js';
21
21
  import { ErrorHandler } from './errorHandler.js';
22
- /**
23
- * Default MCP server configuration
24
- */
25
22
  const DEFAULT_CONFIG = {
26
23
  name: 'mcp-image-server',
27
24
  version: '0.1.0',
28
25
  defaultOutputDir: './output',
29
26
  };
30
- /**
31
- * Simplified MCP server
32
- */
27
+ function createInputSchema(multiOnly = false) {
28
+ return {
29
+ type: 'object',
30
+ properties: {
31
+ prompt: {
32
+ type: 'string',
33
+ description: multiOnly
34
+ ? 'Shared prompt or overall requirements for a multi-image generation task. Use this tool when the user wants one grouped result containing multiple images. Keep common constraints here and prefer outputCount or imageRequests over multiple tool calls.'
35
+ : 'Shared prompt or overall requirements for single-image generation or editing. If the user wants multiple images in one request, prefer generate_multi_image instead. English recommended for prompt enhancement.',
36
+ },
37
+ provider: {
38
+ type: 'string',
39
+ description: 'Optional provider override. Defaults to IMAGE_PROVIDER when omitted. If the user explicitly asks for Volcengine, Seedream, 豆包, or 火山引擎, set this to `volcengine`. If they explicitly ask for Gemini, Google Gemini, or Nano Banana, set this to `gemini`. Do not probe shell environment variables before calling the tool.',
40
+ enum: ['gemini', 'volcengine'],
41
+ },
42
+ fileName: {
43
+ type: 'string',
44
+ description: 'Custom file name for the output image. Auto-generated if not specified.',
45
+ },
46
+ inputImagePath: {
47
+ type: 'string',
48
+ description: 'Optional absolute path to a source image. If the user provides a local image path, pass it here directly instead of summarizing image contents in the prompt. Supported by Gemini and Volcengine reference-image workflows.',
49
+ },
50
+ inputImage: {
51
+ type: 'string',
52
+ description: 'Optional base64 encoded image data for image-to-image generation. Gemini accepts raw base64; Volcengine sends this as `data:image/<format>;base64,<data>` and requires `inputImageMimeType` for correct formatting.',
53
+ },
54
+ inputImageMimeType: {
55
+ type: 'string',
56
+ description: 'MIME type of the input image provided via inputImage. Required when inputImage is provided for accurate processing',
57
+ enum: ['image/jpeg', 'image/png', 'image/webp', 'image/gif', 'image/bmp'],
58
+ },
59
+ inputImages: {
60
+ type: 'array',
61
+ description: 'Multiple input images for multi-image composition. Supported by Gemini and by Volcengine when mapped to reference-image arrays.',
62
+ items: {
63
+ type: 'object',
64
+ properties: {
65
+ data: {
66
+ type: 'string',
67
+ description: 'Base64 encoded image data. Raw base64 is accepted; for Volcengine it will be sent as `data:image/<format>;base64,<data>` using the paired `mimeType`.',
68
+ },
69
+ mimeType: {
70
+ type: 'string',
71
+ description: 'MIME type of the image',
72
+ enum: ['image/jpeg', 'image/png', 'image/webp', 'image/gif', 'image/bmp'],
73
+ },
74
+ },
75
+ required: ['data', 'mimeType'],
76
+ },
77
+ },
78
+ inputImagePaths: {
79
+ type: 'array',
80
+ description: 'Multiple absolute local image paths for multi-image composition. If the user provides two or more local image paths, pass them here directly instead of summarizing the images in the prompt.',
81
+ items: {
82
+ type: 'string',
83
+ description: 'Absolute path to an image file',
84
+ },
85
+ },
86
+ returnBase64: {
87
+ type: 'boolean',
88
+ description: 'Return the generated image as base64 data in the response. The image is always saved to disk regardless of this setting. Default: false',
89
+ },
90
+ blendImages: {
91
+ type: 'boolean',
92
+ description: 'Enable multi-image blending for combining multiple visual elements naturally. Use when prompt mentions multiple subjects or composite scenes',
93
+ },
94
+ maintainCharacterConsistency: {
95
+ type: 'boolean',
96
+ description: 'Maintain character appearance consistency. Enable when generating same character in different poses/scenes',
97
+ },
98
+ useWorldKnowledge: {
99
+ type: 'boolean',
100
+ description: 'Use real-world knowledge for accurate context. Enable for historical figures, landmarks, or factual scenarios',
101
+ },
102
+ useGoogleSearch: {
103
+ type: 'boolean',
104
+ description: 'Enable Google Search grounding for Gemini. Ignored by providers that do not support it.',
105
+ },
106
+ aspectRatio: {
107
+ type: 'string',
108
+ description: 'Aspect ratio for the generated image. When omitted, the server defaults to 16:9 for Gemini and Volcengine.',
109
+ enum: ['1:1', '1:4', '1:8', '2:3', '3:2', '3:4', '4:1', '4:3', '4:5', '5:4', '8:1', '9:16', '16:9', '21:9'],
110
+ },
111
+ imageSize: {
112
+ type: 'string',
113
+ description: 'Image resolution for high-quality output. Specify "1K", "2K", or "4K" when you need specific resolution. When omitted, the server defaults to 4K for Gemini and Volcengine. Volcengine also normalizes the final size into the provider legal pixel range.',
114
+ enum: ['1K', '2K', '4K'],
115
+ },
116
+ purpose: {
117
+ type: 'string',
118
+ description: 'Intended use for the image (e.g., cookbook cover, social media post, presentation slide). Influences lighting, composition, and detail level to match the context.',
119
+ },
120
+ quality: {
121
+ type: 'string',
122
+ description: 'Quality preset controlling speed/fidelity tradeoff. "fast": drafts, "balanced": better detail, "quality": highest fidelity.',
123
+ enum: ['fast', 'balanced', 'quality'],
124
+ },
125
+ outputFormat: {
126
+ type: 'string',
127
+ description: 'Output image format if supported by the provider. Some provider endpoints may ignore or reject format overrides.',
128
+ enum: ['png', 'jpeg', 'webp'],
129
+ },
130
+ outputCount: {
131
+ type: 'integer',
132
+ description: multiOnly
133
+ ? 'Target number of images to generate in one grouped multi-image call. Prefer values greater than 1. If omitted, the server will try to infer the count from prompts like "4张图" or "4 images".'
134
+ : 'Backward-compatible grouped output count for generate_image. For new multi-image requests, prefer generate_multi_image instead. Currently wired for Volcengine, but final image count still depends on provider behavior.',
135
+ },
136
+ imageRequests: {
137
+ type: 'array',
138
+ description: multiOnly
139
+ ? 'Per-image prompts for one grouped multi-image call. Use this when the user wants multiple distinct images at once. The server rewrites them into explicit 第1张/第2张/... instructions and infers outputCount from the array length when omitted.'
140
+ : 'Backward-compatible per-image prompts for generate_image. For new grouped multi-image requests, prefer generate_multi_image instead.',
141
+ items: {
142
+ type: 'string',
143
+ },
144
+ },
145
+ skipPromptEnhancement: {
146
+ type: 'boolean',
147
+ description: 'Skip prompt enhancement and use the prompt as-is. Enable when your prompt already contains exact instructions.',
148
+ },
149
+ },
150
+ required: ['prompt'],
151
+ };
152
+ }
153
+ function createOutputSchema() {
154
+ return {
155
+ type: 'object',
156
+ properties: {
157
+ type: {
158
+ type: 'string',
159
+ const: 'image_result',
160
+ },
161
+ files: {
162
+ type: 'array',
163
+ items: {
164
+ type: 'object',
165
+ properties: {
166
+ uri: { type: 'string' },
167
+ name: { type: 'string' },
168
+ title: { type: 'string' },
169
+ mimeType: { type: 'string' },
170
+ description: { type: 'string' },
171
+ },
172
+ required: ['uri', 'name', 'mimeType'],
173
+ },
174
+ },
175
+ base64Included: {
176
+ type: 'boolean',
177
+ },
178
+ metadata: {
179
+ type: 'object',
180
+ properties: {
181
+ model: { type: 'string' },
182
+ processingTime: { type: 'number' },
183
+ contextMethod: { type: 'string' },
184
+ timestamp: { type: 'string' },
185
+ imageCount: { type: 'integer' },
186
+ },
187
+ required: ['model', 'processingTime', 'contextMethod', 'timestamp', 'imageCount'],
188
+ },
189
+ },
190
+ required: ['type', 'files', 'metadata'],
191
+ };
192
+ }
33
193
  export class MCPServerImpl {
34
194
  constructor(config = {}) {
35
195
  this.server = null;
36
196
  this.structuredPromptGenerator = null;
37
197
  this.geminiTextClient = null;
38
198
  this.geminiClient = null;
199
+ this.volcengineClient = null;
39
200
  this.config = { ...DEFAULT_CONFIG, ...config };
40
201
  this.logger = new Logger();
41
202
  this.fileManager = createFileManager();
42
203
  this.responseBuilder = createResponseBuilder();
43
204
  this.securityManager = new SecurityManager();
44
205
  }
45
- /**
46
- * Get server info
47
- */
48
206
  getServerInfo() {
49
207
  return {
50
208
  name: this.config.name,
51
209
  version: this.config.version,
52
210
  };
53
211
  }
54
- /**
55
- * Get list of registered tools
56
- */
57
212
  getToolsList() {
58
213
  return {
59
214
  tools: [
60
215
  {
61
216
  name: 'generate_image',
62
- description: 'Generate, edit, blend, or merge images using AI. Supports text-to-image generation, single image editing, and multi-image composition/blending. Use inputImagePaths for merging multiple images from file paths, or inputImages for base64 encoded images.',
63
- inputSchema: {
64
- type: 'object',
65
- properties: {
66
- prompt: {
67
- type: 'string',
68
- description: 'The prompt for image generation (English recommended for optimal structured prompt enhancement)',
69
- },
70
- fileName: {
71
- type: 'string',
72
- description: 'Custom file name for the output image. Auto-generated if not specified.',
73
- },
74
- inputImagePath: {
75
- type: 'string',
76
- description: 'Optional absolute path to source image for image-to-image generation. Use when generating variations, style transfers, or similar images based on an existing image (must be an absolute path)',
77
- },
78
- inputImage: {
79
- type: 'string',
80
- description: 'Optional base64 encoded image data for image-to-image generation. Alternative to inputImagePath when image data is already in memory. Do not include data URI prefix (e.g., "data:image/png;base64,")',
81
- },
82
- inputImageMimeType: {
83
- type: 'string',
84
- description: 'MIME type of the input image provided via inputImage. Required when inputImage is provided for accurate processing',
85
- enum: [
86
- 'image/jpeg',
87
- 'image/png',
88
- 'image/webp',
89
- 'image/gif',
90
- 'image/bmp',
91
- ],
92
- },
93
- inputImages: {
94
- type: 'array',
95
- description: 'Multiple input images for multi-image composition. Cannot be used together with inputImage or inputImagePath. Each item requires base64 data and MIME type.',
96
- items: {
97
- type: 'object',
98
- properties: {
99
- data: {
100
- type: 'string',
101
- description: 'Base64 encoded image data. Do not include data URI prefix.',
102
- },
103
- mimeType: {
104
- type: 'string',
105
- description: 'MIME type of the image',
106
- enum: [
107
- 'image/jpeg',
108
- 'image/png',
109
- 'image/webp',
110
- 'image/gif',
111
- 'image/bmp',
112
- ],
113
- },
114
- },
115
- required: ['data', 'mimeType'],
116
- },
117
- },
118
- inputImagePaths: {
119
- type: 'array',
120
- description: 'Multiple input image file paths for multi-image composition. Cannot be used together with inputImage, inputImagePath, or inputImages. Each path must be absolute.',
121
- items: {
122
- type: 'string',
123
- description: 'Absolute path to an image file',
124
- },
125
- },
126
- returnBase64: {
127
- type: 'boolean',
128
- description: 'Return the generated image as base64 data in the response. The image is always saved to disk regardless of this setting. Default: false',
129
- },
130
- blendImages: {
131
- type: 'boolean',
132
- description: 'Enable multi-image blending for combining multiple visual elements naturally. Use when prompt mentions multiple subjects or composite scenes',
133
- },
134
- maintainCharacterConsistency: {
135
- type: 'boolean',
136
- description: 'Maintain character appearance consistency. Enable when generating same character in different poses/scenes',
137
- },
138
- useWorldKnowledge: {
139
- type: 'boolean',
140
- description: 'Use real-world knowledge for accurate context. Enable for historical figures, landmarks, or factual scenarios',
141
- },
142
- useGoogleSearch: {
143
- type: 'boolean',
144
- description: "Enable Google Search grounding to access real-time web information for factually accurate image generation. Use when prompt requires current or time-sensitive data that may have changed since the model's knowledge cutoff. Leave disabled for creative, fictional, historical, or timeless content.",
145
- },
146
- aspectRatio: {
147
- type: 'string',
148
- description: 'Aspect ratio for the generated image',
149
- enum: [
150
- '1:1',
151
- '1:4',
152
- '1:8',
153
- '2:3',
154
- '3:2',
155
- '3:4',
156
- '4:1',
157
- '4:3',
158
- '4:5',
159
- '5:4',
160
- '8:1',
161
- '9:16',
162
- '16:9',
163
- '21:9',
164
- ],
165
- },
166
- imageSize: {
167
- type: 'string',
168
- description: 'Image resolution for high-quality output. Specify "1K", "2K", or "4K" when you need specific resolution. Leave unspecified for standard quality.',
169
- enum: ['1K', '2K', '4K'],
170
- },
171
- purpose: {
172
- type: 'string',
173
- description: 'Intended use for the image (e.g., cookbook cover, social media post, presentation slide). Influences lighting, composition, and detail level to match the context.',
174
- },
175
- quality: {
176
- type: 'string',
177
- description: 'Quality preset controlling speed/fidelity tradeoff. Only specify when the user explicitly requests a specific quality level; omit to use the server\'s configured default. "fast": best for drafts and rapid iteration. "balanced": better detail and coherence, moderate latency. "quality": highest fidelity, use for final deliverables where quality matters most.',
178
- enum: ['fast', 'balanced', 'quality'],
179
- },
180
- skipPromptEnhancement: {
181
- type: 'boolean',
182
- description: 'Skip prompt enhancement and use the prompt as-is. Enable when your prompt already contains exact instructions (e.g., multi-image blending) that should not be rewritten. Default: false',
183
- },
184
- },
185
- required: ['prompt'],
186
- },
217
+ description: 'Generate, edit, blend, or merge a single image using AI. Use this tool for standard single-image generation and image editing. If the user wants multiple images in one grouped request, prefer generate_multi_image. When the user provides local image file paths, those paths must be passed through inputImagePath or inputImagePaths instead of being summarized by the model. If the user explicitly asks for Volcengine, Seedream, 豆包, or 火山引擎, pass `provider="volcengine"`. If they explicitly ask for Gemini, Google Gemini, or Nano Banana, pass `provider="gemini"`. Do not use shell checks for provider env vars before calling this tool. Gemini handles prompt enhancement; Volcengine supports text-to-image and reference-image workflows through its OpenAI-compatible image API.',
218
+ inputSchema: createInputSchema(false),
219
+ outputSchema: createOutputSchema(),
220
+ },
221
+ {
222
+ name: 'generate_multi_image',
223
+ description: 'Generate multiple images in a single grouped tool call. Use this tool when the user wants a set of images, multiple product shots, multiple scenes, or several variations at once. This tool is preferred for Notebook planners that might otherwise split one request into multiple generate_image calls. If the user explicitly asks for Volcengine, Seedream, 豆包, or 火山引擎, pass `provider="volcengine"`. If they explicitly ask for Gemini, Google Gemini, or Nano Banana, pass `provider="gemini"`. The server will infer outputCount from phrases like "4张图" when possible and will rewrite prompts into explicit 第1张/第2张/... instructions.',
224
+ inputSchema: createInputSchema(true),
225
+ outputSchema: createOutputSchema(),
187
226
  },
188
227
  ],
189
228
  };
190
229
  }
191
- /**
192
- * Tool execution
193
- */
194
230
  async callTool(name, args) {
195
231
  try {
196
232
  if (name === 'generate_image') {
197
233
  return await this.handleGenerateImage(args);
198
234
  }
235
+ if (name === 'generate_multi_image') {
236
+ return await this.handleGenerateMultiImage(args);
237
+ }
199
238
  throw new Error(`Unknown tool: ${name}`);
200
239
  }
201
240
  catch (error) {
@@ -203,17 +242,13 @@ export class MCPServerImpl {
203
242
  return ErrorHandler.handleError(error);
204
243
  }
205
244
  }
206
- /**
207
- * Initialize Gemini clients lazily
208
- */
209
- async initializeClients() {
245
+ async initializeGeminiSupport() {
210
246
  if (this.structuredPromptGenerator && this.geminiClient)
211
247
  return;
212
248
  const configResult = getConfig();
213
249
  if (!configResult.success) {
214
250
  throw configResult.error;
215
251
  }
216
- // Initialize Gemini Text Client for prompt generation
217
252
  if (!this.geminiTextClient) {
218
253
  const textClientResult = createGeminiTextClient(configResult.data);
219
254
  if (!textClientResult.success) {
@@ -221,11 +256,9 @@ export class MCPServerImpl {
221
256
  }
222
257
  this.geminiTextClient = textClientResult.data;
223
258
  }
224
- // Initialize Structured Prompt Generator
225
259
  if (!this.structuredPromptGenerator) {
226
260
  this.structuredPromptGenerator = createStructuredPromptGenerator(this.geminiTextClient);
227
261
  }
228
- // Initialize Gemini Client for image generation
229
262
  if (!this.geminiClient) {
230
263
  const clientResult = createGeminiClient(configResult.data);
231
264
  if (!clientResult.success) {
@@ -233,160 +266,278 @@ export class MCPServerImpl {
233
266
  }
234
267
  this.geminiClient = clientResult.data;
235
268
  }
236
- this.logger.info('mcp-server', 'Gemini clients initialized');
237
269
  }
238
- /**
239
- * Simplified image generation handler
240
- */
241
- async handleGenerateImage(params) {
242
- const result = await ErrorHandler.wrapWithResultType(async () => {
243
- // Validate input
244
- const validationResult = validateGenerateImageParams(params);
245
- if (!validationResult.success) {
246
- throw validationResult.error;
247
- }
248
- // Get configuration
249
- const configResult = getConfig();
250
- if (!configResult.success) {
251
- throw configResult.error;
270
+ async initializeVolcengineSupport() {
271
+ if (this.volcengineClient)
272
+ return;
273
+ const configResult = getConfig();
274
+ if (!configResult.success) {
275
+ throw configResult.error;
276
+ }
277
+ const clientResult = createVolcengineClient(configResult.data);
278
+ if (!clientResult.success) {
279
+ throw clientResult.error;
280
+ }
281
+ this.volcengineClient = clientResult.data;
282
+ }
283
+ getProviderClient(provider) {
284
+ if (provider === 'gemini') {
285
+ if (!this.geminiClient) {
286
+ throw new Error('Gemini client not initialized');
252
287
  }
253
- // Initialize clients
254
- await this.initializeClients();
255
- // Handle input image if provided
256
- let inputImageData;
257
- let inputImageMimeType;
258
- let inputImagesData;
259
- if (params.inputImagePaths && params.inputImagePaths.length > 0) {
260
- // Multi-image from file paths: read each file and derive mimeType from extension
261
- const extToMime = {
262
- '.jpg': 'image/jpeg',
263
- '.jpeg': 'image/jpeg',
264
- '.png': 'image/png',
265
- '.webp': 'image/webp',
266
- '.gif': 'image/gif',
267
- '.bmp': 'image/bmp',
288
+ return this.geminiClient;
289
+ }
290
+ if (!this.volcengineClient) {
291
+ throw new Error('Volcengine client not initialized');
292
+ }
293
+ return this.volcengineClient;
294
+ }
295
+ async prepareInputImages(params) {
296
+ let inputImageData;
297
+ let inputImageMimeType;
298
+ let inputImagesData;
299
+ const extToMime = {
300
+ '.jpg': 'image/jpeg',
301
+ '.jpeg': 'image/jpeg',
302
+ '.png': 'image/png',
303
+ '.webp': 'image/webp',
304
+ '.gif': 'image/gif',
305
+ '.bmp': 'image/bmp',
306
+ };
307
+ if (params.inputImagePaths && params.inputImagePaths.length > 0) {
308
+ inputImagesData = await Promise.all(params.inputImagePaths.map(async (filePath) => {
309
+ const buffer = await fs.readFile(filePath);
310
+ const ext = path.extname(filePath).toLowerCase();
311
+ return {
312
+ data: buffer.toString('base64'),
313
+ mimeType: extToMime[ext] || 'image/jpeg',
268
314
  };
269
- inputImagesData = await Promise.all(params.inputImagePaths.map(async (filePath) => {
270
- const buffer = await fs.readFile(filePath);
271
- const ext = path.extname(filePath).toLowerCase();
272
- return {
273
- data: buffer.toString('base64'),
274
- mimeType: extToMime[ext] || 'image/jpeg',
275
- };
276
- }));
277
- inputImageData = inputImagesData[0]?.data;
278
- inputImageMimeType = inputImagesData[0]?.mimeType;
279
- }
280
- else if (params.inputImages && params.inputImages.length > 0) {
281
- // Multi-image: strip data URI prefix from each image
282
- inputImagesData = params.inputImages.map((img) => ({
283
- data: img.data.replace(/^data:image\/[a-z]+;base64,/, ''),
284
- mimeType: img.mimeType,
285
- }));
286
- // Use first image for prompt enhancement context
287
- inputImageData = inputImagesData[0]?.data;
288
- inputImageMimeType = inputImagesData[0]?.mimeType;
289
- }
290
- else if (params.inputImagePath) {
291
- const imageBuffer = await fs.readFile(params.inputImagePath);
292
- inputImageData = imageBuffer.toString('base64');
293
- }
294
- else if (params.inputImage) {
295
- // Use base64 input directly, stripping data URI prefix if present
296
- inputImageData = params.inputImage.replace(/^data:image\/[a-z]+;base64,/, '');
297
- inputImageMimeType = params.inputImageMimeType;
298
- }
299
- // Generate structured prompt (unless skipped)
300
- let structuredPrompt = params.prompt;
301
- const shouldSkipEnhancement = params.skipPromptEnhancement ?? configResult.data.skipPromptEnhancement;
302
- if (!shouldSkipEnhancement && this.structuredPromptGenerator) {
303
- const features = {};
304
- if (params.maintainCharacterConsistency !== undefined) {
305
- features.maintainCharacterConsistency = params.maintainCharacterConsistency;
306
- }
307
- if (params.blendImages !== undefined) {
308
- features.blendImages = params.blendImages;
309
- }
310
- if (params.useWorldKnowledge !== undefined) {
311
- features.useWorldKnowledge = params.useWorldKnowledge;
312
- }
313
- if (params.useGoogleSearch !== undefined) {
314
- features.useGoogleSearch = params.useGoogleSearch;
315
- }
316
- const promptResult = await this.structuredPromptGenerator.generateStructuredPrompt(params.prompt, features, inputImageData, // Pass image data for context-aware prompt generation
317
- params.purpose // Pass intended use for purpose-aware prompt generation
318
- );
319
- if (promptResult.success) {
320
- structuredPrompt = promptResult.data.structuredPrompt;
321
- this.logger.info('mcp-server', 'Structured prompt generated', {
322
- originalLength: params.prompt.length,
323
- structuredLength: structuredPrompt.length,
324
- selectedPractices: promptResult.data.selectedPractices,
325
- });
326
- }
327
- else {
328
- this.logger.warn('mcp-server', 'Using original prompt', {
329
- error: promptResult.error.message,
330
- });
331
- }
332
- }
333
- else if (shouldSkipEnhancement) {
315
+ }));
316
+ inputImageData = inputImagesData[0]?.data;
317
+ inputImageMimeType = inputImagesData[0]?.mimeType;
318
+ }
319
+ else if (params.inputImages && params.inputImages.length > 0) {
320
+ inputImagesData = params.inputImages.map((img) => ({
321
+ data: img.data.replace(/^data:image\/[a-z0-9.+-]+;base64,/i, ''),
322
+ mimeType: img.mimeType,
323
+ }));
324
+ inputImageData = inputImagesData[0]?.data;
325
+ inputImageMimeType = inputImagesData[0]?.mimeType;
326
+ }
327
+ else if (params.inputImagePath) {
328
+ const imageBuffer = await fs.readFile(params.inputImagePath);
329
+ const ext = path.extname(params.inputImagePath).toLowerCase();
330
+ inputImageData = imageBuffer.toString('base64');
331
+ inputImageMimeType = extToMime[ext] || 'image/jpeg';
332
+ }
333
+ else if (params.inputImage) {
334
+ inputImageData = params.inputImage.replace(/^data:image\/[a-z0-9.+-]+;base64,/i, '');
335
+ inputImageMimeType = params.inputImageMimeType;
336
+ }
337
+ return { inputImageData, inputImageMimeType, inputImagesData };
338
+ }
339
+ async maybeEnhancePrompt(provider, params, inputImageData) {
340
+ const configResult = getConfig();
341
+ if (!configResult.success) {
342
+ throw configResult.error;
343
+ }
344
+ const shouldSkipEnhancement = params.skipPromptEnhancement ?? configResult.data.skipPromptEnhancement;
345
+ if (provider !== 'gemini' || shouldSkipEnhancement || !this.structuredPromptGenerator) {
346
+ if (shouldSkipEnhancement) {
334
347
  this.logger.info('mcp-server', 'Prompt enhancement skipped (SKIP_PROMPT_ENHANCEMENT=true)');
335
348
  }
336
- // Generate image using Gemini API
337
- if (!this.geminiClient) {
338
- throw new Error('Gemini client not initialized');
339
- }
340
- const generationResult = await this.geminiClient.generateImage({
341
- prompt: structuredPrompt,
342
- ...(inputImagesData && { inputImages: inputImagesData }),
343
- ...(!inputImagesData && inputImageData && { inputImage: inputImageData }),
344
- ...(!inputImagesData && inputImageMimeType && { inputImageMimeType }),
345
- ...(params.aspectRatio && { aspectRatio: params.aspectRatio }),
346
- ...(params.imageSize && { imageSize: params.imageSize }),
347
- ...(params.useGoogleSearch !== undefined && { useGoogleSearch: params.useGoogleSearch }),
348
- ...(params.quality !== undefined && { quality: params.quality }),
349
+ return params.prompt;
350
+ }
351
+ const features = {};
352
+ if (params.maintainCharacterConsistency !== undefined) {
353
+ features.maintainCharacterConsistency = params.maintainCharacterConsistency;
354
+ }
355
+ if (params.blendImages !== undefined) {
356
+ features.blendImages = params.blendImages;
357
+ }
358
+ if (params.useWorldKnowledge !== undefined) {
359
+ features.useWorldKnowledge = params.useWorldKnowledge;
360
+ }
361
+ if (params.useGoogleSearch !== undefined) {
362
+ features.useGoogleSearch = params.useGoogleSearch;
363
+ }
364
+ const promptResult = await this.structuredPromptGenerator.generateStructuredPrompt(params.prompt, features, inputImageData, params.purpose);
365
+ if (!promptResult.success) {
366
+ this.logger.warn('mcp-server', 'Using original prompt', {
367
+ error: promptResult.error.message,
349
368
  });
350
- if (!generationResult.success) {
351
- throw generationResult.error;
369
+ return params.prompt;
370
+ }
371
+ this.logger.info('mcp-server', 'Structured prompt generated', {
372
+ originalLength: params.prompt.length,
373
+ structuredLength: promptResult.data.structuredPrompt.length,
374
+ selectedPractices: promptResult.data.selectedPractices,
375
+ });
376
+ return promptResult.data.structuredPrompt;
377
+ }
378
+ buildSuccessResponse(params, generationResult, savedPaths) {
379
+ if (params.returnBase64) {
380
+ return this.responseBuilder.buildBase64SuccessResponse(generationResult, savedPaths);
381
+ }
382
+ return savedPaths.length > 1
383
+ ? this.responseBuilder.buildMultiSuccessResponse(generationResult, savedPaths)
384
+ : this.responseBuilder.buildSuccessResponse(generationResult, savedPaths[0]);
385
+ }
386
+ getGeneratedVariants(generationResult) {
387
+ return generationResult.images?.length
388
+ ? generationResult.images
389
+ : [{ imageData: generationResult.imageData, mimeType: generationResult.metadata.mimeType }];
390
+ }
391
+ async generateAndSave(params) {
392
+ const validationResult = validateGenerateImageParams(params);
393
+ if (!validationResult.success) {
394
+ throw validationResult.error;
395
+ }
396
+ const normalizedParams = normalizeMultiImageParams(validationResult.data);
397
+ const normalizedPromptResult = validatePrompt(normalizedParams.prompt);
398
+ if (!normalizedPromptResult.success) {
399
+ throw normalizedPromptResult.error;
400
+ }
401
+ const configResult = getConfig();
402
+ if (!configResult.success) {
403
+ throw configResult.error;
404
+ }
405
+ const provider = resolveRequestedProvider(normalizedParams.provider, normalizedParams.prompt, configResult.data.imageProvider);
406
+ if (provider === 'gemini') {
407
+ await this.initializeGeminiSupport();
408
+ }
409
+ else {
410
+ await this.initializeVolcengineSupport();
411
+ }
412
+ const { inputImageData, inputImageMimeType, inputImagesData } = await this.prepareInputImages(normalizedParams);
413
+ const prompt = await this.maybeEnhancePrompt(provider, normalizedParams, inputImageData);
414
+ const client = this.getProviderClient(provider);
415
+ const generationResult = await client.generateImage({
416
+ ...normalizedParams,
417
+ provider,
418
+ prompt,
419
+ ...(inputImagesData && { inputImages: inputImagesData }),
420
+ ...(!inputImagesData && inputImageData && { inputImage: inputImageData }),
421
+ ...(!inputImagesData && inputImageMimeType && { inputImageMimeType }),
422
+ });
423
+ if (!generationResult.success) {
424
+ throw generationResult.error;
425
+ }
426
+ const saveTargets = generationResult.data.images?.length
427
+ ? generationResult.data.images
428
+ : [{ imageData: generationResult.data.imageData, mimeType: generationResult.data.metadata.mimeType }];
429
+ const savedPaths = [];
430
+ for (let index = 0; index < saveTargets.length; index++) {
431
+ const target = saveTargets[index];
432
+ if (!target) {
433
+ continue;
352
434
  }
353
- // Save image file
354
- let fileName = params.fileName || this.fileManager.generateFileName();
355
- // Auto-append extension if user-provided fileName has no extension
356
- if (params.fileName && !path.extname(fileName)) {
357
- const mimeToExt = {
435
+ let currentFileName = normalizedParams.fileName || this.fileManager.generateFileName();
436
+ if (normalizedParams.fileName) {
437
+ const ext = path.extname(currentFileName);
438
+ const baseName = ext ? currentFileName.slice(0, -ext.length) : currentFileName;
439
+ const finalExt = ext || ({
358
440
  'image/png': '.png',
359
441
  'image/jpeg': '.jpg',
360
442
  'image/webp': '.webp',
361
443
  'image/gif': '.gif',
362
444
  'image/bmp': '.bmp',
363
- };
364
- fileName += mimeToExt[generationResult.data.metadata.mimeType] || '.png';
445
+ }[target.mimeType] || '.png');
446
+ currentFileName = saveTargets.length > 1 ? `${baseName}-${index + 1}${finalExt}` : `${baseName}${finalExt}`;
365
447
  }
366
- const outputPath = path.join(configResult.data.imageOutputDir, fileName);
448
+ else if (saveTargets.length > 1) {
449
+ const ext = path.extname(currentFileName);
450
+ const baseName = currentFileName.slice(0, -ext.length);
451
+ currentFileName = `${baseName}-${index + 1}${ext}`;
452
+ }
453
+ const outputPath = path.join(configResult.data.imageOutputDir, currentFileName);
367
454
  const sanitizedPath = this.securityManager.sanitizeFilePath(outputPath);
368
455
  if (!sanitizedPath.success) {
369
456
  throw sanitizedPath.error;
370
457
  }
371
- const saveResult = await this.fileManager.saveImage(generationResult.data.imageData, sanitizedPath.data);
458
+ const saveResult = await this.fileManager.saveImage(target.imageData, sanitizedPath.data);
372
459
  if (!saveResult.success) {
373
460
  throw saveResult.error;
374
461
  }
375
- // Build response
376
- if (params.returnBase64) {
377
- const base64Data = generationResult.data.imageData.toString('base64');
378
- return this.responseBuilder.buildBase64SuccessResponse(generationResult.data, saveResult.data, base64Data);
462
+ savedPaths.push(saveResult.data);
463
+ }
464
+ return {
465
+ generationResult: generationResult.data,
466
+ savedPaths,
467
+ normalizedParams,
468
+ };
469
+ }
470
+ async handleGenerateMultiImage(params) {
471
+ const result = await ErrorHandler.wrapWithResultType(async () => {
472
+ const preparedParamsResult = prepareGenerateMultiImageParams(params);
473
+ if (!preparedParamsResult.success) {
474
+ throw preparedParamsResult.error;
475
+ }
476
+ const preparedParams = preparedParamsResult.data;
477
+ const explicitRequests = preparedParams.imageRequests?.length
478
+ ? { sharedPrompt: preparedParams.prompt, imageRequests: preparedParams.imageRequests }
479
+ : extractExplicitImageRequests(preparedParams.prompt);
480
+ if (!explicitRequests.imageRequests.length) {
481
+ const singleRun = await this.generateAndSave(preparedParams);
482
+ return this.buildSuccessResponse(singleRun.normalizedParams, singleRun.generationResult, singleRun.savedPaths);
483
+ }
484
+ const aggregatedPaths = [];
485
+ const aggregatedImages = [];
486
+ let firstGenerationResult;
487
+ for (let index = 0; index < explicitRequests.imageRequests.length; index++) {
488
+ const imageRequest = explicitRequests.imageRequests[index];
489
+ const singleParams = {
490
+ ...preparedParams,
491
+ prompt: buildIndependentImagePrompt(explicitRequests.sharedPrompt, imageRequest),
492
+ skipPromptEnhancement: true,
493
+ };
494
+ delete singleParams.outputCount;
495
+ delete singleParams.imageRequests;
496
+ if (preparedParams.fileName !== undefined) {
497
+ singleParams.fileName = `${preparedParams.fileName.replace(/(\.[^.]+)?$/, '')}-${index + 1}`;
498
+ }
499
+ else {
500
+ delete singleParams.fileName;
501
+ }
502
+ const singleRun = await this.generateAndSave(singleParams);
503
+ firstGenerationResult ?? (firstGenerationResult = singleRun.generationResult);
504
+ aggregatedPaths.push(...singleRun.savedPaths);
505
+ aggregatedImages.push(...this.getGeneratedVariants(singleRun.generationResult));
379
506
  }
380
- return this.responseBuilder.buildSuccessResponse(generationResult.data, saveResult.data);
507
+ const combinedResult = {
508
+ imageData: aggregatedImages[0].imageData,
509
+ images: aggregatedImages,
510
+ metadata: {
511
+ ...(firstGenerationResult?.metadata ?? {
512
+ provider: 'gemini',
513
+ model: 'unknown',
514
+ prompt: preparedParams.prompt,
515
+ mimeType: aggregatedImages[0]?.mimeType ?? 'image/png',
516
+ timestamp: new Date(),
517
+ inputImageProvided: false,
518
+ }),
519
+ prompt: preparedParams.prompt,
520
+ timestamp: new Date(),
521
+ mimeType: aggregatedImages[0]?.mimeType ?? firstGenerationResult?.metadata.mimeType ?? 'image/png',
522
+ },
523
+ };
524
+ return this.buildSuccessResponse(preparedParams, combinedResult, aggregatedPaths);
525
+ }, 'image-generation');
526
+ if (result.ok) {
527
+ return result.value;
528
+ }
529
+ return this.responseBuilder.buildErrorResponse(result.error);
530
+ }
531
+ async handleGenerateImage(params) {
532
+ const result = await ErrorHandler.wrapWithResultType(async () => {
533
+ const execution = await this.generateAndSave(params);
534
+ return this.buildSuccessResponse(execution.normalizedParams, execution.generationResult, execution.savedPaths);
381
535
  }, 'image-generation');
382
536
  if (result.ok) {
383
537
  return result.value;
384
538
  }
385
539
  return this.responseBuilder.buildErrorResponse(result.error);
386
540
  }
387
- /**
388
- * Initialize MCP server with tool handlers
389
- */
390
541
  initialize() {
391
542
  this.server = new Server({
392
543
  name: this.config.name,
@@ -396,22 +547,16 @@ export class MCPServerImpl {
396
547
  tools: {},
397
548
  },
398
549
  });
399
- // Setup tool handlers
400
550
  this.setupHandlers();
401
551
  return this.server;
402
552
  }
403
- /**
404
- * Setup MCP protocol handlers
405
- */
406
553
  setupHandlers() {
407
554
  if (!this.server) {
408
555
  throw new Error('Server not initialized');
409
556
  }
410
- // Register tool list handler
411
557
  this.server.setRequestHandler(ListToolsRequestSchema, async () => {
412
558
  return this.getToolsList();
413
559
  });
414
- // Register tool call handler
415
560
  this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
416
561
  const { name, arguments: args } = request.params;
417
562
  const result = await this.callTool(name, args);
@@ -425,9 +570,6 @@ export class MCPServerImpl {
425
570
  });
426
571
  }
427
572
  }
428
- /**
429
- * Factory function to create MCP server
430
- */
431
573
  export function createMCPServer(config = {}) {
432
574
  return new MCPServerImpl(config);
433
575
  }