mcp-hydrocoder-image 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +153 -29
  2. package/dist/api/geminiClient.d.ts +3 -30
  3. package/dist/api/geminiClient.d.ts.map +1 -1
  4. package/dist/api/geminiClient.js +13 -57
  5. package/dist/api/geminiClient.js.map +1 -1
  6. package/dist/api/geminiTextClient.js +1 -1
  7. package/dist/api/geminiTextClient.js.map +1 -1
  8. package/dist/api/imageProvider.d.ts +29 -0
  9. package/dist/api/imageProvider.d.ts.map +1 -0
  10. package/dist/api/imageProvider.js +5 -0
  11. package/dist/api/imageProvider.js.map +1 -0
  12. package/dist/api/volcengineClient.d.ts +13 -0
  13. package/dist/api/volcengineClient.d.ts.map +1 -0
  14. package/dist/api/volcengineClient.js +288 -0
  15. package/dist/api/volcengineClient.js.map +1 -0
  16. package/dist/business/inputValidator.d.ts.map +1 -1
  17. package/dist/business/inputValidator.js +69 -1
  18. package/dist/business/inputValidator.js.map +1 -1
  19. package/dist/business/multiImagePrompt.d.ts +13 -0
  20. package/dist/business/multiImagePrompt.d.ts.map +1 -0
  21. package/dist/business/multiImagePrompt.js +135 -0
  22. package/dist/business/multiImagePrompt.js.map +1 -0
  23. package/dist/business/responseBuilder.d.ts +3 -2
  24. package/dist/business/responseBuilder.d.ts.map +1 -1
  25. package/dist/business/responseBuilder.js +69 -48
  26. package/dist/business/responseBuilder.js.map +1 -1
  27. package/dist/index.d.ts +1 -1
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/server/errorHandler.d.ts.map +1 -1
  30. package/dist/server/errorHandler.js +3 -2
  31. package/dist/server/errorHandler.js.map +1 -1
  32. package/dist/server/mcpServer.d.ts +90 -29
  33. package/dist/server/mcpServer.d.ts.map +1 -1
  34. package/dist/server/mcpServer.js +429 -288
  35. package/dist/server/mcpServer.js.map +1 -1
  36. package/dist/types/mcp.d.ts +62 -15
  37. package/dist/types/mcp.d.ts.map +1 -1
  38. package/dist/types/mcp.js +15 -0
  39. package/dist/types/mcp.js.map +1 -1
  40. package/dist/utils/config.d.ts +6 -2
  41. package/dist/utils/config.d.ts.map +1 -1
  42. package/dist/utils/config.js +39 -14
  43. package/dist/utils/config.js.map +1 -1
  44. package/dist/utils/errors.d.ts +9 -0
  45. package/dist/utils/errors.d.ts.map +1 -1
  46. package/dist/utils/errors.js +50 -1
  47. package/dist/utils/errors.js.map +1 -1
  48. package/package.json +2 -1
@@ -1,201 +1,239 @@
1
1
  /**
2
2
  * MCP Server implementation
3
- * Simplified architecture with direct Gemini integration
3
+ * Supports multiple image providers with Gemini prompt enhancement when applicable
4
4
  */
5
5
  import * as fs from 'node:fs/promises';
6
6
  import * as path from 'node:path';
7
7
  import { Server } from '@modelcontextprotocol/sdk/server/index.js';
8
8
  import { CallToolRequestSchema, ListToolsRequestSchema, } from '@modelcontextprotocol/sdk/types.js';
9
- // API clients
10
9
  import { createGeminiClient } from '../api/geminiClient.js';
11
10
  import { createGeminiTextClient } from '../api/geminiTextClient.js';
12
- // Business logic
11
+ import { createVolcengineClient } from '../api/volcengineClient.js';
13
12
  import { createFileManager } from '../business/fileManager.js';
14
- import { validateGenerateImageParams } from '../business/inputValidator.js';
13
+ import { validateGenerateImageParams, validatePrompt } from '../business/inputValidator.js';
14
+ import { buildIndependentImagePrompt, extractExplicitImageRequests, normalizeMultiImageParams, prepareGenerateMultiImageParams, } from '../business/multiImagePrompt.js';
15
15
  import { createResponseBuilder } from '../business/responseBuilder.js';
16
16
  import { createStructuredPromptGenerator, } from '../business/structuredPromptGenerator.js';
17
- // Utilities
18
17
  import { getConfig } from '../utils/config.js';
19
18
  import { Logger } from '../utils/logger.js';
20
19
  import { SecurityManager } from '../utils/security.js';
21
20
  import { ErrorHandler } from './errorHandler.js';
22
- /**
23
- * Default MCP server configuration
24
- */
25
21
  const DEFAULT_CONFIG = {
26
22
  name: 'mcp-image-server',
27
23
  version: '0.1.0',
28
24
  defaultOutputDir: './output',
29
25
  };
30
- /**
31
- * Simplified MCP server
32
- */
26
+ function createInputSchema(multiOnly = false) {
27
+ return {
28
+ type: 'object',
29
+ properties: {
30
+ prompt: {
31
+ type: 'string',
32
+ description: multiOnly
33
+ ? 'Shared prompt or overall requirements for a multi-image generation task. Use this tool when the user wants one grouped result containing multiple images. Keep common constraints here and prefer outputCount or imageRequests over multiple tool calls.'
34
+ : 'Shared prompt or overall requirements for single-image generation or editing. If the user wants multiple images in one request, prefer generate_multi_image instead. English recommended for prompt enhancement.',
35
+ },
36
+ provider: {
37
+ type: 'string',
38
+ description: 'Optional provider override. Defaults to IMAGE_PROVIDER environment variable.',
39
+ enum: ['gemini', 'volcengine'],
40
+ },
41
+ fileName: {
42
+ type: 'string',
43
+ description: 'Custom file name for the output image. Auto-generated if not specified.',
44
+ },
45
+ inputImagePath: {
46
+ type: 'string',
47
+ description: 'Optional absolute path to a source image. If the user provides a local image path, pass it here directly instead of summarizing image contents in the prompt. Supported by Gemini and Volcengine reference-image workflows.',
48
+ },
49
+ inputImage: {
50
+ type: 'string',
51
+ description: 'Optional base64 encoded image data for image-to-image generation. Gemini accepts raw base64; Volcengine sends this as `data:image/<format>;base64,<data>` and requires `inputImageMimeType` for correct formatting.',
52
+ },
53
+ inputImageMimeType: {
54
+ type: 'string',
55
+ description: 'MIME type of the input image provided via inputImage. Required when inputImage is provided for accurate processing',
56
+ enum: ['image/jpeg', 'image/png', 'image/webp', 'image/gif', 'image/bmp'],
57
+ },
58
+ inputImages: {
59
+ type: 'array',
60
+ description: 'Multiple input images for multi-image composition. Supported by Gemini and by Volcengine when mapped to reference-image arrays.',
61
+ items: {
62
+ type: 'object',
63
+ properties: {
64
+ data: {
65
+ type: 'string',
66
+ description: 'Base64 encoded image data. Raw base64 is accepted; for Volcengine it will be sent as `data:image/<format>;base64,<data>` using the paired `mimeType`.',
67
+ },
68
+ mimeType: {
69
+ type: 'string',
70
+ description: 'MIME type of the image',
71
+ enum: ['image/jpeg', 'image/png', 'image/webp', 'image/gif', 'image/bmp'],
72
+ },
73
+ },
74
+ required: ['data', 'mimeType'],
75
+ },
76
+ },
77
+ inputImagePaths: {
78
+ type: 'array',
79
+ description: 'Multiple absolute local image paths for multi-image composition. If the user provides two or more local image paths, pass them here directly instead of summarizing the images in the prompt.',
80
+ items: {
81
+ type: 'string',
82
+ description: 'Absolute path to an image file',
83
+ },
84
+ },
85
+ returnBase64: {
86
+ type: 'boolean',
87
+ description: 'Return the generated image as base64 data in the response. The image is always saved to disk regardless of this setting. Default: false',
88
+ },
89
+ blendImages: {
90
+ type: 'boolean',
91
+ description: 'Enable multi-image blending for combining multiple visual elements naturally. Use when prompt mentions multiple subjects or composite scenes',
92
+ },
93
+ maintainCharacterConsistency: {
94
+ type: 'boolean',
95
+ description: 'Maintain character appearance consistency. Enable when generating same character in different poses/scenes',
96
+ },
97
+ useWorldKnowledge: {
98
+ type: 'boolean',
99
+ description: 'Use real-world knowledge for accurate context. Enable for historical figures, landmarks, or factual scenarios',
100
+ },
101
+ useGoogleSearch: {
102
+ type: 'boolean',
103
+ description: 'Enable Google Search grounding for Gemini. Ignored by providers that do not support it.',
104
+ },
105
+ aspectRatio: {
106
+ type: 'string',
107
+ description: 'Aspect ratio for the generated image. When omitted, the server defaults to 16:9 for Gemini and Volcengine.',
108
+ enum: ['1:1', '1:4', '1:8', '2:3', '3:2', '3:4', '4:1', '4:3', '4:5', '5:4', '8:1', '9:16', '16:9', '21:9'],
109
+ },
110
+ imageSize: {
111
+ type: 'string',
112
+ description: 'Image resolution for high-quality output. Specify "1K", "2K", or "4K" when you need specific resolution. When omitted, the server defaults to 4K for Gemini and Volcengine. Volcengine also normalizes the final size into the provider legal pixel range.',
113
+ enum: ['1K', '2K', '4K'],
114
+ },
115
+ purpose: {
116
+ type: 'string',
117
+ description: 'Intended use for the image (e.g., cookbook cover, social media post, presentation slide). Influences lighting, composition, and detail level to match the context.',
118
+ },
119
+ quality: {
120
+ type: 'string',
121
+ description: 'Quality preset controlling speed/fidelity tradeoff. "fast": drafts, "balanced": better detail, "quality": highest fidelity.',
122
+ enum: ['fast', 'balanced', 'quality'],
123
+ },
124
+ outputFormat: {
125
+ type: 'string',
126
+ description: 'Output image format if supported by the provider. Some provider endpoints may ignore or reject format overrides.',
127
+ enum: ['png', 'jpeg', 'webp'],
128
+ },
129
+ outputCount: {
130
+ type: 'integer',
131
+ description: multiOnly
132
+ ? 'Target number of images to generate in one grouped multi-image call. Prefer values greater than 1. If omitted, the server will try to infer the count from prompts like "4张图" or "4 images".'
133
+ : 'Backward-compatible grouped output count for generate_image. For new multi-image requests, prefer generate_multi_image instead. Currently wired for Volcengine, but final image count still depends on provider behavior.',
134
+ },
135
+ imageRequests: {
136
+ type: 'array',
137
+ description: multiOnly
138
+ ? 'Per-image prompts for one grouped multi-image call. Use this when the user wants multiple distinct images at once. The server rewrites them into explicit 第1张/第2张/... instructions and infers outputCount from the array length when omitted.'
139
+ : 'Backward-compatible per-image prompts for generate_image. For new grouped multi-image requests, prefer generate_multi_image instead.',
140
+ items: {
141
+ type: 'string',
142
+ },
143
+ },
144
+ skipPromptEnhancement: {
145
+ type: 'boolean',
146
+ description: 'Skip prompt enhancement and use the prompt as-is. Enable when your prompt already contains exact instructions.',
147
+ },
148
+ },
149
+ required: ['prompt'],
150
+ };
151
+ }
152
+ function createOutputSchema() {
153
+ return {
154
+ type: 'object',
155
+ properties: {
156
+ type: {
157
+ type: 'string',
158
+ const: 'image_result',
159
+ },
160
+ files: {
161
+ type: 'array',
162
+ items: {
163
+ type: 'object',
164
+ properties: {
165
+ uri: { type: 'string' },
166
+ name: { type: 'string' },
167
+ title: { type: 'string' },
168
+ mimeType: { type: 'string' },
169
+ description: { type: 'string' },
170
+ },
171
+ required: ['uri', 'name', 'mimeType'],
172
+ },
173
+ },
174
+ base64Included: {
175
+ type: 'boolean',
176
+ },
177
+ metadata: {
178
+ type: 'object',
179
+ properties: {
180
+ model: { type: 'string' },
181
+ processingTime: { type: 'number' },
182
+ contextMethod: { type: 'string' },
183
+ timestamp: { type: 'string' },
184
+ imageCount: { type: 'integer' },
185
+ },
186
+ required: ['model', 'processingTime', 'contextMethod', 'timestamp', 'imageCount'],
187
+ },
188
+ },
189
+ required: ['type', 'files', 'metadata'],
190
+ };
191
+ }
33
192
  export class MCPServerImpl {
34
193
  constructor(config = {}) {
35
194
  this.server = null;
36
195
  this.structuredPromptGenerator = null;
37
196
  this.geminiTextClient = null;
38
197
  this.geminiClient = null;
198
+ this.volcengineClient = null;
39
199
  this.config = { ...DEFAULT_CONFIG, ...config };
40
200
  this.logger = new Logger();
41
201
  this.fileManager = createFileManager();
42
202
  this.responseBuilder = createResponseBuilder();
43
203
  this.securityManager = new SecurityManager();
44
204
  }
45
- /**
46
- * Get server info
47
- */
48
205
  getServerInfo() {
49
206
  return {
50
207
  name: this.config.name,
51
208
  version: this.config.version,
52
209
  };
53
210
  }
54
- /**
55
- * Get list of registered tools
56
- */
57
211
  getToolsList() {
58
212
  return {
59
213
  tools: [
60
214
  {
61
215
  name: 'generate_image',
62
- description: 'Generate, edit, blend, or merge images using AI. Supports text-to-image generation, single image editing, and multi-image composition/blending. Use inputImagePaths for merging multiple images from file paths, or inputImages for base64 encoded images.',
63
- inputSchema: {
64
- type: 'object',
65
- properties: {
66
- prompt: {
67
- type: 'string',
68
- description: 'The prompt for image generation (English recommended for optimal structured prompt enhancement)',
69
- },
70
- fileName: {
71
- type: 'string',
72
- description: 'Custom file name for the output image. Auto-generated if not specified.',
73
- },
74
- inputImagePath: {
75
- type: 'string',
76
- description: 'Optional absolute path to source image for image-to-image generation. Use when generating variations, style transfers, or similar images based on an existing image (must be an absolute path)',
77
- },
78
- inputImage: {
79
- type: 'string',
80
- description: 'Optional base64 encoded image data for image-to-image generation. Alternative to inputImagePath when image data is already in memory. Do not include data URI prefix (e.g., "data:image/png;base64,")',
81
- },
82
- inputImageMimeType: {
83
- type: 'string',
84
- description: 'MIME type of the input image provided via inputImage. Required when inputImage is provided for accurate processing',
85
- enum: [
86
- 'image/jpeg',
87
- 'image/png',
88
- 'image/webp',
89
- 'image/gif',
90
- 'image/bmp',
91
- ],
92
- },
93
- inputImages: {
94
- type: 'array',
95
- description: 'Multiple input images for multi-image composition. Cannot be used together with inputImage or inputImagePath. Each item requires base64 data and MIME type.',
96
- items: {
97
- type: 'object',
98
- properties: {
99
- data: {
100
- type: 'string',
101
- description: 'Base64 encoded image data. Do not include data URI prefix.',
102
- },
103
- mimeType: {
104
- type: 'string',
105
- description: 'MIME type of the image',
106
- enum: [
107
- 'image/jpeg',
108
- 'image/png',
109
- 'image/webp',
110
- 'image/gif',
111
- 'image/bmp',
112
- ],
113
- },
114
- },
115
- required: ['data', 'mimeType'],
116
- },
117
- },
118
- inputImagePaths: {
119
- type: 'array',
120
- description: 'Multiple input image file paths for multi-image composition. Cannot be used together with inputImage, inputImagePath, or inputImages. Each path must be absolute.',
121
- items: {
122
- type: 'string',
123
- description: 'Absolute path to an image file',
124
- },
125
- },
126
- returnBase64: {
127
- type: 'boolean',
128
- description: 'Return the generated image as base64 data in the response. The image is always saved to disk regardless of this setting. Default: false',
129
- },
130
- blendImages: {
131
- type: 'boolean',
132
- description: 'Enable multi-image blending for combining multiple visual elements naturally. Use when prompt mentions multiple subjects or composite scenes',
133
- },
134
- maintainCharacterConsistency: {
135
- type: 'boolean',
136
- description: 'Maintain character appearance consistency. Enable when generating same character in different poses/scenes',
137
- },
138
- useWorldKnowledge: {
139
- type: 'boolean',
140
- description: 'Use real-world knowledge for accurate context. Enable for historical figures, landmarks, or factual scenarios',
141
- },
142
- useGoogleSearch: {
143
- type: 'boolean',
144
- description: "Enable Google Search grounding to access real-time web information for factually accurate image generation. Use when prompt requires current or time-sensitive data that may have changed since the model's knowledge cutoff. Leave disabled for creative, fictional, historical, or timeless content.",
145
- },
146
- aspectRatio: {
147
- type: 'string',
148
- description: 'Aspect ratio for the generated image',
149
- enum: [
150
- '1:1',
151
- '1:4',
152
- '1:8',
153
- '2:3',
154
- '3:2',
155
- '3:4',
156
- '4:1',
157
- '4:3',
158
- '4:5',
159
- '5:4',
160
- '8:1',
161
- '9:16',
162
- '16:9',
163
- '21:9',
164
- ],
165
- },
166
- imageSize: {
167
- type: 'string',
168
- description: 'Image resolution for high-quality output. Specify "1K", "2K", or "4K" when you need specific resolution. Leave unspecified for standard quality.',
169
- enum: ['1K', '2K', '4K'],
170
- },
171
- purpose: {
172
- type: 'string',
173
- description: 'Intended use for the image (e.g., cookbook cover, social media post, presentation slide). Influences lighting, composition, and detail level to match the context.',
174
- },
175
- quality: {
176
- type: 'string',
177
- description: 'Quality preset controlling speed/fidelity tradeoff. Only specify when the user explicitly requests a specific quality level; omit to use the server\'s configured default. "fast": best for drafts and rapid iteration. "balanced": better detail and coherence, moderate latency. "quality": highest fidelity, use for final deliverables where quality matters most.',
178
- enum: ['fast', 'balanced', 'quality'],
179
- },
180
- skipPromptEnhancement: {
181
- type: 'boolean',
182
- description: 'Skip prompt enhancement and use the prompt as-is. Enable when your prompt already contains exact instructions (e.g., multi-image blending) that should not be rewritten. Default: false',
183
- },
184
- },
185
- required: ['prompt'],
186
- },
216
+ description: 'Generate, edit, blend, or merge a single image using AI. Use this tool for standard single-image generation and image editing. If the user wants multiple images in one grouped request, prefer generate_multi_image. When the user provides local image file paths, those paths must be passed through inputImagePath or inputImagePaths instead of being summarized by the model. Gemini handles prompt enhancement; Volcengine supports text-to-image and reference-image workflows through its OpenAI-compatible image API.',
217
+ inputSchema: createInputSchema(false),
218
+ outputSchema: createOutputSchema(),
219
+ },
220
+ {
221
+ name: 'generate_multi_image',
222
+ description: 'Generate multiple images in a single grouped tool call. Use this tool when the user wants a set of images, multiple product shots, multiple scenes, or several variations at once. This tool is preferred for Notebook planners that might otherwise split one request into multiple generate_image calls. The server will infer outputCount from phrases like "4张图" when possible and will rewrite prompts into explicit 第1张/第2张/... instructions.',
223
+ inputSchema: createInputSchema(true),
224
+ outputSchema: createOutputSchema(),
187
225
  },
188
226
  ],
189
227
  };
190
228
  }
191
- /**
192
- * Tool execution
193
- */
194
229
  async callTool(name, args) {
195
230
  try {
196
231
  if (name === 'generate_image') {
197
232
  return await this.handleGenerateImage(args);
198
233
  }
234
+ if (name === 'generate_multi_image') {
235
+ return await this.handleGenerateMultiImage(args);
236
+ }
199
237
  throw new Error(`Unknown tool: ${name}`);
200
238
  }
201
239
  catch (error) {
@@ -203,17 +241,13 @@ export class MCPServerImpl {
203
241
  return ErrorHandler.handleError(error);
204
242
  }
205
243
  }
206
- /**
207
- * Initialize Gemini clients lazily
208
- */
209
- async initializeClients() {
244
+ async initializeGeminiSupport() {
210
245
  if (this.structuredPromptGenerator && this.geminiClient)
211
246
  return;
212
247
  const configResult = getConfig();
213
248
  if (!configResult.success) {
214
249
  throw configResult.error;
215
250
  }
216
- // Initialize Gemini Text Client for prompt generation
217
251
  if (!this.geminiTextClient) {
218
252
  const textClientResult = createGeminiTextClient(configResult.data);
219
253
  if (!textClientResult.success) {
@@ -221,11 +255,9 @@ export class MCPServerImpl {
221
255
  }
222
256
  this.geminiTextClient = textClientResult.data;
223
257
  }
224
- // Initialize Structured Prompt Generator
225
258
  if (!this.structuredPromptGenerator) {
226
259
  this.structuredPromptGenerator = createStructuredPromptGenerator(this.geminiTextClient);
227
260
  }
228
- // Initialize Gemini Client for image generation
229
261
  if (!this.geminiClient) {
230
262
  const clientResult = createGeminiClient(configResult.data);
231
263
  if (!clientResult.success) {
@@ -233,160 +265,278 @@ export class MCPServerImpl {
233
265
  }
234
266
  this.geminiClient = clientResult.data;
235
267
  }
236
- this.logger.info('mcp-server', 'Gemini clients initialized');
237
268
  }
238
- /**
239
- * Simplified image generation handler
240
- */
241
- async handleGenerateImage(params) {
242
- const result = await ErrorHandler.wrapWithResultType(async () => {
243
- // Validate input
244
- const validationResult = validateGenerateImageParams(params);
245
- if (!validationResult.success) {
246
- throw validationResult.error;
247
- }
248
- // Get configuration
249
- const configResult = getConfig();
250
- if (!configResult.success) {
251
- throw configResult.error;
269
+ async initializeVolcengineSupport() {
270
+ if (this.volcengineClient)
271
+ return;
272
+ const configResult = getConfig();
273
+ if (!configResult.success) {
274
+ throw configResult.error;
275
+ }
276
+ const clientResult = createVolcengineClient(configResult.data);
277
+ if (!clientResult.success) {
278
+ throw clientResult.error;
279
+ }
280
+ this.volcengineClient = clientResult.data;
281
+ }
282
+ getProviderClient(provider) {
283
+ if (provider === 'gemini') {
284
+ if (!this.geminiClient) {
285
+ throw new Error('Gemini client not initialized');
252
286
  }
253
- // Initialize clients
254
- await this.initializeClients();
255
- // Handle input image if provided
256
- let inputImageData;
257
- let inputImageMimeType;
258
- let inputImagesData;
259
- if (params.inputImagePaths && params.inputImagePaths.length > 0) {
260
- // Multi-image from file paths: read each file and derive mimeType from extension
261
- const extToMime = {
262
- '.jpg': 'image/jpeg',
263
- '.jpeg': 'image/jpeg',
264
- '.png': 'image/png',
265
- '.webp': 'image/webp',
266
- '.gif': 'image/gif',
267
- '.bmp': 'image/bmp',
287
+ return this.geminiClient;
288
+ }
289
+ if (!this.volcengineClient) {
290
+ throw new Error('Volcengine client not initialized');
291
+ }
292
+ return this.volcengineClient;
293
+ }
294
+ async prepareInputImages(params) {
295
+ let inputImageData;
296
+ let inputImageMimeType;
297
+ let inputImagesData;
298
+ const extToMime = {
299
+ '.jpg': 'image/jpeg',
300
+ '.jpeg': 'image/jpeg',
301
+ '.png': 'image/png',
302
+ '.webp': 'image/webp',
303
+ '.gif': 'image/gif',
304
+ '.bmp': 'image/bmp',
305
+ };
306
+ if (params.inputImagePaths && params.inputImagePaths.length > 0) {
307
+ inputImagesData = await Promise.all(params.inputImagePaths.map(async (filePath) => {
308
+ const buffer = await fs.readFile(filePath);
309
+ const ext = path.extname(filePath).toLowerCase();
310
+ return {
311
+ data: buffer.toString('base64'),
312
+ mimeType: extToMime[ext] || 'image/jpeg',
268
313
  };
269
- inputImagesData = await Promise.all(params.inputImagePaths.map(async (filePath) => {
270
- const buffer = await fs.readFile(filePath);
271
- const ext = path.extname(filePath).toLowerCase();
272
- return {
273
- data: buffer.toString('base64'),
274
- mimeType: extToMime[ext] || 'image/jpeg',
275
- };
276
- }));
277
- inputImageData = inputImagesData[0]?.data;
278
- inputImageMimeType = inputImagesData[0]?.mimeType;
279
- }
280
- else if (params.inputImages && params.inputImages.length > 0) {
281
- // Multi-image: strip data URI prefix from each image
282
- inputImagesData = params.inputImages.map((img) => ({
283
- data: img.data.replace(/^data:image\/[a-z]+;base64,/, ''),
284
- mimeType: img.mimeType,
285
- }));
286
- // Use first image for prompt enhancement context
287
- inputImageData = inputImagesData[0]?.data;
288
- inputImageMimeType = inputImagesData[0]?.mimeType;
289
- }
290
- else if (params.inputImagePath) {
291
- const imageBuffer = await fs.readFile(params.inputImagePath);
292
- inputImageData = imageBuffer.toString('base64');
293
- }
294
- else if (params.inputImage) {
295
- // Use base64 input directly, stripping data URI prefix if present
296
- inputImageData = params.inputImage.replace(/^data:image\/[a-z]+;base64,/, '');
297
- inputImageMimeType = params.inputImageMimeType;
298
- }
299
- // Generate structured prompt (unless skipped)
300
- let structuredPrompt = params.prompt;
301
- const shouldSkipEnhancement = params.skipPromptEnhancement ?? configResult.data.skipPromptEnhancement;
302
- if (!shouldSkipEnhancement && this.structuredPromptGenerator) {
303
- const features = {};
304
- if (params.maintainCharacterConsistency !== undefined) {
305
- features.maintainCharacterConsistency = params.maintainCharacterConsistency;
306
- }
307
- if (params.blendImages !== undefined) {
308
- features.blendImages = params.blendImages;
309
- }
310
- if (params.useWorldKnowledge !== undefined) {
311
- features.useWorldKnowledge = params.useWorldKnowledge;
312
- }
313
- if (params.useGoogleSearch !== undefined) {
314
- features.useGoogleSearch = params.useGoogleSearch;
315
- }
316
- const promptResult = await this.structuredPromptGenerator.generateStructuredPrompt(params.prompt, features, inputImageData, // Pass image data for context-aware prompt generation
317
- params.purpose // Pass intended use for purpose-aware prompt generation
318
- );
319
- if (promptResult.success) {
320
- structuredPrompt = promptResult.data.structuredPrompt;
321
- this.logger.info('mcp-server', 'Structured prompt generated', {
322
- originalLength: params.prompt.length,
323
- structuredLength: structuredPrompt.length,
324
- selectedPractices: promptResult.data.selectedPractices,
325
- });
326
- }
327
- else {
328
- this.logger.warn('mcp-server', 'Using original prompt', {
329
- error: promptResult.error.message,
330
- });
331
- }
332
- }
333
- else if (shouldSkipEnhancement) {
314
+ }));
315
+ inputImageData = inputImagesData[0]?.data;
316
+ inputImageMimeType = inputImagesData[0]?.mimeType;
317
+ }
318
+ else if (params.inputImages && params.inputImages.length > 0) {
319
+ inputImagesData = params.inputImages.map((img) => ({
320
+ data: img.data.replace(/^data:image\/[a-z0-9.+-]+;base64,/i, ''),
321
+ mimeType: img.mimeType,
322
+ }));
323
+ inputImageData = inputImagesData[0]?.data;
324
+ inputImageMimeType = inputImagesData[0]?.mimeType;
325
+ }
326
+ else if (params.inputImagePath) {
327
+ const imageBuffer = await fs.readFile(params.inputImagePath);
328
+ const ext = path.extname(params.inputImagePath).toLowerCase();
329
+ inputImageData = imageBuffer.toString('base64');
330
+ inputImageMimeType = extToMime[ext] || 'image/jpeg';
331
+ }
332
+ else if (params.inputImage) {
333
+ inputImageData = params.inputImage.replace(/^data:image\/[a-z0-9.+-]+;base64,/i, '');
334
+ inputImageMimeType = params.inputImageMimeType;
335
+ }
336
+ return { inputImageData, inputImageMimeType, inputImagesData };
337
+ }
338
+ async maybeEnhancePrompt(provider, params, inputImageData) {
339
+ const configResult = getConfig();
340
+ if (!configResult.success) {
341
+ throw configResult.error;
342
+ }
343
+ const shouldSkipEnhancement = params.skipPromptEnhancement ?? configResult.data.skipPromptEnhancement;
344
+ if (provider !== 'gemini' || shouldSkipEnhancement || !this.structuredPromptGenerator) {
345
+ if (shouldSkipEnhancement) {
334
346
  this.logger.info('mcp-server', 'Prompt enhancement skipped (SKIP_PROMPT_ENHANCEMENT=true)');
335
347
  }
336
- // Generate image using Gemini API
337
- if (!this.geminiClient) {
338
- throw new Error('Gemini client not initialized');
339
- }
340
- const generationResult = await this.geminiClient.generateImage({
341
- prompt: structuredPrompt,
342
- ...(inputImagesData && { inputImages: inputImagesData }),
343
- ...(!inputImagesData && inputImageData && { inputImage: inputImageData }),
344
- ...(!inputImagesData && inputImageMimeType && { inputImageMimeType }),
345
- ...(params.aspectRatio && { aspectRatio: params.aspectRatio }),
346
- ...(params.imageSize && { imageSize: params.imageSize }),
347
- ...(params.useGoogleSearch !== undefined && { useGoogleSearch: params.useGoogleSearch }),
348
- ...(params.quality !== undefined && { quality: params.quality }),
348
+ return params.prompt;
349
+ }
350
+ const features = {};
351
+ if (params.maintainCharacterConsistency !== undefined) {
352
+ features.maintainCharacterConsistency = params.maintainCharacterConsistency;
353
+ }
354
+ if (params.blendImages !== undefined) {
355
+ features.blendImages = params.blendImages;
356
+ }
357
+ if (params.useWorldKnowledge !== undefined) {
358
+ features.useWorldKnowledge = params.useWorldKnowledge;
359
+ }
360
+ if (params.useGoogleSearch !== undefined) {
361
+ features.useGoogleSearch = params.useGoogleSearch;
362
+ }
363
+ const promptResult = await this.structuredPromptGenerator.generateStructuredPrompt(params.prompt, features, inputImageData, params.purpose);
364
+ if (!promptResult.success) {
365
+ this.logger.warn('mcp-server', 'Using original prompt', {
366
+ error: promptResult.error.message,
349
367
  });
350
- if (!generationResult.success) {
351
- throw generationResult.error;
368
+ return params.prompt;
369
+ }
370
+ this.logger.info('mcp-server', 'Structured prompt generated', {
371
+ originalLength: params.prompt.length,
372
+ structuredLength: promptResult.data.structuredPrompt.length,
373
+ selectedPractices: promptResult.data.selectedPractices,
374
+ });
375
+ return promptResult.data.structuredPrompt;
376
+ }
377
+ buildSuccessResponse(params, generationResult, savedPaths) {
378
+ if (params.returnBase64) {
379
+ return this.responseBuilder.buildBase64SuccessResponse(generationResult, savedPaths);
380
+ }
381
+ return savedPaths.length > 1
382
+ ? this.responseBuilder.buildMultiSuccessResponse(generationResult, savedPaths)
383
+ : this.responseBuilder.buildSuccessResponse(generationResult, savedPaths[0]);
384
+ }
385
+ getGeneratedVariants(generationResult) {
386
+ return generationResult.images?.length
387
+ ? generationResult.images
388
+ : [{ imageData: generationResult.imageData, mimeType: generationResult.metadata.mimeType }];
389
+ }
390
+ async generateAndSave(params) {
391
+ const validationResult = validateGenerateImageParams(params);
392
+ if (!validationResult.success) {
393
+ throw validationResult.error;
394
+ }
395
+ const normalizedParams = normalizeMultiImageParams(validationResult.data);
396
+ const normalizedPromptResult = validatePrompt(normalizedParams.prompt);
397
+ if (!normalizedPromptResult.success) {
398
+ throw normalizedPromptResult.error;
399
+ }
400
+ const configResult = getConfig();
401
+ if (!configResult.success) {
402
+ throw configResult.error;
403
+ }
404
+ const provider = normalizedParams.provider || configResult.data.imageProvider;
405
+ if (provider === 'gemini') {
406
+ await this.initializeGeminiSupport();
407
+ }
408
+ else {
409
+ await this.initializeVolcengineSupport();
410
+ }
411
+ const { inputImageData, inputImageMimeType, inputImagesData } = await this.prepareInputImages(normalizedParams);
412
+ const prompt = await this.maybeEnhancePrompt(provider, normalizedParams, inputImageData);
413
+ const client = this.getProviderClient(provider);
414
+ const generationResult = await client.generateImage({
415
+ ...normalizedParams,
416
+ provider,
417
+ prompt,
418
+ ...(inputImagesData && { inputImages: inputImagesData }),
419
+ ...(!inputImagesData && inputImageData && { inputImage: inputImageData }),
420
+ ...(!inputImagesData && inputImageMimeType && { inputImageMimeType }),
421
+ });
422
+ if (!generationResult.success) {
423
+ throw generationResult.error;
424
+ }
425
+ const saveTargets = generationResult.data.images?.length
426
+ ? generationResult.data.images
427
+ : [{ imageData: generationResult.data.imageData, mimeType: generationResult.data.metadata.mimeType }];
428
+ const savedPaths = [];
429
+ for (let index = 0; index < saveTargets.length; index++) {
430
+ const target = saveTargets[index];
431
+ if (!target) {
432
+ continue;
352
433
  }
353
- // Save image file
354
- let fileName = params.fileName || this.fileManager.generateFileName();
355
- // Auto-append extension if user-provided fileName has no extension
356
- if (params.fileName && !path.extname(fileName)) {
357
- const mimeToExt = {
434
+ let currentFileName = normalizedParams.fileName || this.fileManager.generateFileName();
435
+ if (normalizedParams.fileName) {
436
+ const ext = path.extname(currentFileName);
437
+ const baseName = ext ? currentFileName.slice(0, -ext.length) : currentFileName;
438
+ const finalExt = ext || ({
358
439
  'image/png': '.png',
359
440
  'image/jpeg': '.jpg',
360
441
  'image/webp': '.webp',
361
442
  'image/gif': '.gif',
362
443
  'image/bmp': '.bmp',
363
- };
364
- fileName += mimeToExt[generationResult.data.metadata.mimeType] || '.png';
444
+ }[target.mimeType] || '.png');
445
+ currentFileName = saveTargets.length > 1 ? `${baseName}-${index + 1}${finalExt}` : `${baseName}${finalExt}`;
365
446
  }
366
- const outputPath = path.join(configResult.data.imageOutputDir, fileName);
447
+ else if (saveTargets.length > 1) {
448
+ const ext = path.extname(currentFileName);
449
+ const baseName = currentFileName.slice(0, -ext.length);
450
+ currentFileName = `${baseName}-${index + 1}${ext}`;
451
+ }
452
+ const outputPath = path.join(configResult.data.imageOutputDir, currentFileName);
367
453
  const sanitizedPath = this.securityManager.sanitizeFilePath(outputPath);
368
454
  if (!sanitizedPath.success) {
369
455
  throw sanitizedPath.error;
370
456
  }
371
- const saveResult = await this.fileManager.saveImage(generationResult.data.imageData, sanitizedPath.data);
457
+ const saveResult = await this.fileManager.saveImage(target.imageData, sanitizedPath.data);
372
458
  if (!saveResult.success) {
373
459
  throw saveResult.error;
374
460
  }
375
- // Build response
376
- if (params.returnBase64) {
377
- const base64Data = generationResult.data.imageData.toString('base64');
378
- return this.responseBuilder.buildBase64SuccessResponse(generationResult.data, saveResult.data, base64Data);
461
+ savedPaths.push(saveResult.data);
462
+ }
463
+ return {
464
+ generationResult: generationResult.data,
465
+ savedPaths,
466
+ normalizedParams,
467
+ };
468
+ }
469
+ async handleGenerateMultiImage(params) {
470
+ const result = await ErrorHandler.wrapWithResultType(async () => {
471
+ const preparedParamsResult = prepareGenerateMultiImageParams(params);
472
+ if (!preparedParamsResult.success) {
473
+ throw preparedParamsResult.error;
474
+ }
475
+ const preparedParams = preparedParamsResult.data;
476
+ const explicitRequests = preparedParams.imageRequests?.length
477
+ ? { sharedPrompt: preparedParams.prompt, imageRequests: preparedParams.imageRequests }
478
+ : extractExplicitImageRequests(preparedParams.prompt);
479
+ if (!explicitRequests.imageRequests.length) {
480
+ const singleRun = await this.generateAndSave(preparedParams);
481
+ return this.buildSuccessResponse(singleRun.normalizedParams, singleRun.generationResult, singleRun.savedPaths);
482
+ }
483
+ const aggregatedPaths = [];
484
+ const aggregatedImages = [];
485
+ let firstGenerationResult;
486
+ for (let index = 0; index < explicitRequests.imageRequests.length; index++) {
487
+ const imageRequest = explicitRequests.imageRequests[index];
488
+ const singleParams = {
489
+ ...preparedParams,
490
+ prompt: buildIndependentImagePrompt(explicitRequests.sharedPrompt, imageRequest),
491
+ skipPromptEnhancement: true,
492
+ };
493
+ delete singleParams.outputCount;
494
+ delete singleParams.imageRequests;
495
+ if (preparedParams.fileName !== undefined) {
496
+ singleParams.fileName = `${preparedParams.fileName.replace(/(\.[^.]+)?$/, '')}-${index + 1}`;
497
+ }
498
+ else {
499
+ delete singleParams.fileName;
500
+ }
501
+ const singleRun = await this.generateAndSave(singleParams);
502
+ firstGenerationResult ?? (firstGenerationResult = singleRun.generationResult);
503
+ aggregatedPaths.push(...singleRun.savedPaths);
504
+ aggregatedImages.push(...this.getGeneratedVariants(singleRun.generationResult));
379
505
  }
380
- return this.responseBuilder.buildSuccessResponse(generationResult.data, saveResult.data);
506
+ const combinedResult = {
507
+ imageData: aggregatedImages[0].imageData,
508
+ images: aggregatedImages,
509
+ metadata: {
510
+ ...(firstGenerationResult?.metadata ?? {
511
+ provider: 'gemini',
512
+ model: 'unknown',
513
+ prompt: preparedParams.prompt,
514
+ mimeType: aggregatedImages[0]?.mimeType ?? 'image/png',
515
+ timestamp: new Date(),
516
+ inputImageProvided: false,
517
+ }),
518
+ prompt: preparedParams.prompt,
519
+ timestamp: new Date(),
520
+ mimeType: aggregatedImages[0]?.mimeType ?? firstGenerationResult?.metadata.mimeType ?? 'image/png',
521
+ },
522
+ };
523
+ return this.buildSuccessResponse(preparedParams, combinedResult, aggregatedPaths);
524
+ }, 'image-generation');
525
+ if (result.ok) {
526
+ return result.value;
527
+ }
528
+ return this.responseBuilder.buildErrorResponse(result.error);
529
+ }
530
+ async handleGenerateImage(params) {
531
+ const result = await ErrorHandler.wrapWithResultType(async () => {
532
+ const execution = await this.generateAndSave(params);
533
+ return this.buildSuccessResponse(execution.normalizedParams, execution.generationResult, execution.savedPaths);
381
534
  }, 'image-generation');
382
535
  if (result.ok) {
383
536
  return result.value;
384
537
  }
385
538
  return this.responseBuilder.buildErrorResponse(result.error);
386
539
  }
387
- /**
388
- * Initialize MCP server with tool handlers
389
- */
390
540
  initialize() {
391
541
  this.server = new Server({
392
542
  name: this.config.name,
@@ -396,22 +546,16 @@ export class MCPServerImpl {
396
546
  tools: {},
397
547
  },
398
548
  });
399
- // Setup tool handlers
400
549
  this.setupHandlers();
401
550
  return this.server;
402
551
  }
403
- /**
404
- * Setup MCP protocol handlers
405
- */
406
552
  setupHandlers() {
407
553
  if (!this.server) {
408
554
  throw new Error('Server not initialized');
409
555
  }
410
- // Register tool list handler
411
556
  this.server.setRequestHandler(ListToolsRequestSchema, async () => {
412
557
  return this.getToolsList();
413
558
  });
414
- // Register tool call handler
415
559
  this.server.setRequestHandler(CallToolRequestSchema, async (request) => {
416
560
  const { name, arguments: args } = request.params;
417
561
  const result = await this.callTool(name, args);
@@ -425,9 +569,6 @@ export class MCPServerImpl {
425
569
  });
426
570
  }
427
571
  }
428
- /**
429
- * Factory function to create MCP server
430
- */
431
572
  export function createMCPServer(config = {}) {
432
573
  return new MCPServerImpl(config);
433
574
  }