@goonnguyen/human-mcp 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +261 -19
  2. package/bin/human-mcp.js +2 -0
  3. package/dist/index.js +65180 -1698
  4. package/package.json +19 -2
  5. package/.claude/agents/code-reviewer.md +0 -140
  6. package/.claude/agents/database-admin.md +0 -86
  7. package/.claude/agents/debugger.md +0 -119
  8. package/.claude/agents/docs-manager.md +0 -113
  9. package/.claude/agents/git-manager.md +0 -59
  10. package/.claude/agents/planner-researcher.md +0 -97
  11. package/.claude/agents/project-manager.md +0 -113
  12. package/.claude/agents/tester.md +0 -95
  13. package/.claude/commands/cook.md +0 -7
  14. package/.claude/commands/debug.md +0 -10
  15. package/.claude/commands/docs/init.md +0 -11
  16. package/.claude/commands/docs/update.md +0 -11
  17. package/.claude/commands/fix/ci.md +0 -8
  18. package/.claude/commands/fix/fast.md +0 -5
  19. package/.claude/commands/fix/hard.md +0 -7
  20. package/.claude/commands/fix/test.md +0 -16
  21. package/.claude/commands/git/cm.md +0 -5
  22. package/.claude/commands/git/cp.md +0 -4
  23. package/.claude/commands/plan/ci.md +0 -12
  24. package/.claude/commands/plan/two.md +0 -13
  25. package/.claude/commands/plan.md +0 -10
  26. package/.claude/commands/test.md +0 -7
  27. package/.claude/commands/watzup.md +0 -8
  28. package/.claude/hooks/telegram_notify.sh +0 -136
  29. package/.claude/send-discord.sh +0 -64
  30. package/.claude/settings.json +0 -7
  31. package/.claude/statusline.sh +0 -143
  32. package/.dockerignore +0 -81
  33. package/.env.example +0 -44
  34. package/.github/workflows/publish.yml +0 -88
  35. package/.opencode/agent/code-reviewer.md +0 -142
  36. package/.opencode/agent/debugger.md +0 -74
  37. package/.opencode/agent/docs-manager.md +0 -119
  38. package/.opencode/agent/git-manager.md +0 -60
  39. package/.opencode/agent/planner-researcher.md +0 -100
  40. package/.opencode/agent/project-manager.md +0 -113
  41. package/.opencode/agent/system-architecture.md +0 -200
  42. package/.opencode/agent/tester.md +0 -96
  43. package/.opencode/agent/ui-ux-developer.md +0 -97
  44. package/.opencode/command/cook.md +0 -7
  45. package/.opencode/command/debug.md +0 -10
  46. package/.opencode/command/fix/ci.md +0 -8
  47. package/.opencode/command/fix/fast.md +0 -5
  48. package/.opencode/command/fix/hard.md +0 -7
  49. package/.opencode/command/fix/test.md +0 -16
  50. package/.opencode/command/git/cm.md +0 -5
  51. package/.opencode/command/git/cp.md +0 -4
  52. package/.opencode/command/plan/ci.md +0 -12
  53. package/.opencode/command/plan/two.md +0 -13
  54. package/.opencode/command/plan.md +0 -10
  55. package/.opencode/command/test.md +0 -7
  56. package/.opencode/command/watzup.md +0 -8
  57. package/.releaserc.json +0 -26
  58. package/.serena/project.yml +0 -68
  59. package/CHANGELOG.md +0 -62
  60. package/CLAUDE.md +0 -141
  61. package/DEPLOYMENT.md +0 -329
  62. package/Dockerfile +0 -52
  63. package/QUICKSTART.md +0 -97
  64. package/bun.lock +0 -1872
  65. package/bunfig.toml +0 -15
  66. package/docker-compose.yaml +0 -128
  67. package/docs/README.md +0 -51
  68. package/docs/codebase-structure-architecture-code-standards.md +0 -428
  69. package/docs/codebase-summary.md +0 -321
  70. package/docs/project-overview-pdr.md +0 -286
  71. package/docs/project-roadmap.md +0 -494
  72. package/examples/debugging-session.ts +0 -96
  73. package/human-mcp.png +0 -0
  74. package/inspector-wrapper.mjs +0 -33
  75. package/plans/001-streamable-http-transport-plan.md +0 -905
  76. package/plans/002-sse-fallback-http-transport-plan.md +0 -161
  77. package/plans/003-fix-test-infrastructure-and-ci-plan.md +0 -699
  78. package/plans/003-http-transport-local-file-access-plan.md +0 -880
  79. package/plans/004-fix-typescript-compilation-errors-plan.md +0 -388
  80. package/plans/005-comprehensive-test-infrastructure-fix-plan.md +0 -854
  81. package/plans/templates/bug-fix-template.md +0 -69
  82. package/plans/templates/feature-implementation-template.md +0 -84
  83. package/plans/templates/refactor-template.md +0 -82
  84. package/plans/templates/template-usage-guide.md +0 -58
  85. package/src/index.ts +0 -49
  86. package/src/prompts/debugging-prompts.ts +0 -149
  87. package/src/prompts/index.ts +0 -55
  88. package/src/resources/documentation.ts +0 -316
  89. package/src/resources/index.ts +0 -49
  90. package/src/server.ts +0 -36
  91. package/src/tools/eyes/index.ts +0 -225
  92. package/src/tools/eyes/processors/gif.ts +0 -137
  93. package/src/tools/eyes/processors/image.ts +0 -213
  94. package/src/tools/eyes/processors/video.ts +0 -135
  95. package/src/tools/eyes/schemas.ts +0 -51
  96. package/src/tools/eyes/utils/formatters.ts +0 -126
  97. package/src/tools/eyes/utils/gemini-client.ts +0 -73
  98. package/src/transports/http/file-interceptor.ts +0 -134
  99. package/src/transports/http/middleware.ts +0 -46
  100. package/src/transports/http/routes.ts +0 -297
  101. package/src/transports/http/server.ts +0 -116
  102. package/src/transports/http/session.ts +0 -93
  103. package/src/transports/http/sse-routes.ts +0 -210
  104. package/src/transports/index.ts +0 -36
  105. package/src/transports/stdio.ts +0 -7
  106. package/src/transports/types.ts +0 -50
  107. package/src/types/index.ts +0 -41
  108. package/src/utils/cloudflare-r2.ts +0 -107
  109. package/src/utils/config.ts +0 -123
  110. package/src/utils/errors.ts +0 -40
  111. package/src/utils/logger.ts +0 -49
  112. package/tests/integration/http-transport-files.test.ts +0 -190
  113. package/tests/integration/server.test.ts +0 -27
  114. package/tests/integration/sse-transport.test.ts +0 -142
  115. package/tests/setup.ts +0 -55
  116. package/tests/types/api-responses.ts +0 -35
  117. package/tests/types/test-types.ts +0 -105
  118. package/tests/unit/cloudflare-r2.test.ts +0 -118
  119. package/tests/unit/config.test.ts +0 -40
  120. package/tests/unit/eyes-analyze.test.ts +0 -150
  121. package/tests/unit/formatters.test.ts +0 -85
  122. package/tests/unit/sse-routes.test.ts +0 -92
  123. package/tests/utils/error-scenarios.ts +0 -198
  124. package/tests/utils/index.ts +0 -3
  125. package/tests/utils/mock-helpers.ts +0 -99
  126. package/tests/utils/test-data-generators.ts +0 -217
  127. package/tests/utils/test-server-manager.ts +0 -172
  128. package/tsconfig.json +0 -26
@@ -1,316 +0,0 @@
1
- export const documentationContent = `# Human MCP API Documentation
2
-
3
- ## Overview
4
-
5
- Human MCP brings human-like visual capabilities to AI coding agents, enabling them to understand and debug visual content like screenshots, recordings, and UI elements.
6
-
7
- ## Available Tools
8
-
9
- ### eyes.analyze
10
-
11
- Comprehensive visual analysis tool for images, videos, and GIFs.
12
-
13
- **Parameters:**
14
- - \`source\` (string, required): URL, file path, or base64 encoded content
15
- - \`type\` (enum, required): "image" | "video" | "gif"
16
- - \`analysis_type\` (enum, optional): "general" | "ui_debug" | "error_detection" | "accessibility" | "performance" | "layout"
17
- - \`detail_level\` (enum, optional): "basic" | "detailed" | "extreme"
18
- - \`specific_focus\` (string, optional): Areas to focus analysis on
19
- - \`extract_text\` (boolean, optional): Extract text from image (default: true)
20
- - \`detect_ui_elements\` (boolean, optional): Detect UI elements (default: true)
21
- - \`analyze_colors\` (boolean, optional): Analyze color scheme (default: false)
22
- - \`check_accessibility\` (boolean, optional): Check accessibility (default: false)
23
-
24
- **Example:**
25
- \`\`\`json
26
- {
27
- "source": "/path/to/screenshot.png",
28
- "type": "image",
29
- "analysis_type": "ui_debug",
30
- "detail_level": "detailed",
31
- "specific_focus": "login form validation errors"
32
- }
33
- \`\`\`
34
-
35
- ### eyes.compare
36
-
37
- Compare two images to identify visual differences.
38
-
39
- **Parameters:**
40
- - \`source1\` (string, required): First image to compare
41
- - \`source2\` (string, required): Second image to compare
42
- - \`comparison_type\` (enum, optional): "pixel" | "structural" | "semantic"
43
-
44
- **Example:**
45
- \`\`\`json
46
- {
47
- "source1": "/path/to/before.png",
48
- "source2": "/path/to/after.png",
49
- "comparison_type": "structural"
50
- }
51
- \`\`\`
52
-
53
- ## Analysis Types
54
-
55
- ### ui_debug
56
- Focus on layout issues, rendering problems, misalignments, and visual bugs.
57
-
58
- ### error_detection
59
- Look for visible error messages, error states, and system failures.
60
-
61
- ### accessibility
62
- Analyze color contrast, readability, and WCAG compliance issues.
63
-
64
- ### performance
65
- Identify performance indicators, loading states, and optimization opportunities.
66
-
67
- ### layout
68
- Examine responsive design, positioning, and visual hierarchy.
69
-
70
- ## Detail Levels
71
-
72
- ### basic
73
- Concise analysis focusing on most important findings.
74
-
75
- ### detailed
76
- Thorough analysis with specific details about each finding.
77
-
78
- ### extreme
79
- Exhaustive analysis with pixel-level precision and comprehensive technical details.
80
-
81
- ## Common Use Cases
82
-
83
- ### Debugging UI Issues
84
- \`\`\`json
85
- {
86
- "source": "screenshot.png",
87
- "type": "image",
88
- "analysis_type": "ui_debug",
89
- "detail_level": "detailed"
90
- }
91
- \`\`\`
92
-
93
- ### Analyzing Error States
94
- \`\`\`json
95
- {
96
- "source": "error-recording.mp4",
97
- "type": "video",
98
- "analysis_type": "error_detection",
99
- "specific_focus": "form submission errors"
100
- }
101
- \`\`\`
102
-
103
- ### Accessibility Audits
104
- \`\`\`json
105
- {
106
- "source": "page.png",
107
- "type": "image",
108
- "analysis_type": "accessibility",
109
- "check_accessibility": true
110
- }
111
- \`\`\`
112
-
113
- ### Performance Analysis
114
- \`\`\`json
115
- {
116
- "source": "loading-screen.gif",
117
- "type": "gif",
118
- "analysis_type": "performance"
119
- }
120
- \`\`\`
121
-
122
- ## Response Format
123
-
124
- All tools return structured analysis including:
125
-
126
- - **analysis**: Detailed text analysis
127
- - **detected_elements**: Array of UI elements with locations
128
- - **debugging_insights**: Technical insights for developers
129
- - **recommendations**: Actionable suggestions
130
- - **metadata**: Processing information and timing
131
-
132
- ## Best Practices
133
-
134
- 1. Use appropriate analysis types for your specific needs
135
- 2. Provide context in \`specific_focus\` for better results
136
- 3. Use "detailed" level for most debugging tasks
137
- 4. Compare images when analyzing changes or regressions
138
- 5. Include error descriptions when analyzing failures
139
-
140
- ## Error Handling
141
-
142
- The server provides detailed error messages for:
143
- - Invalid image formats
144
- - Network failures when fetching URLs
145
- - API key issues with Gemini
146
- - Processing timeouts
147
- - Unsupported file types
148
-
149
- For support and issues: https://github.com/human-mcp/human-mcp/issues
150
- `;
151
-
152
- export const examplesContent = `# Human MCP Debugging Examples
153
-
154
- ## Example 1: Debugging a Broken Login Form
155
-
156
- **Scenario**: Users report login button not working
157
-
158
- **Analysis Request**:
159
- \`\`\`json
160
- {
161
- "source": "/screenshots/broken-login.png",
162
- "type": "image",
163
- "analysis_type": "ui_debug",
164
- "detail_level": "detailed",
165
- "specific_focus": "login button and form validation"
166
- }
167
- \`\`\`
168
-
169
- **Key Findings**:
170
- - Login button appears disabled (grayed out)
171
- - Email field shows red border indicating validation error
172
- - No error message visible to user
173
- - Password field missing required indicator
174
-
175
- **Recommendations**:
176
- - Add clear error messages for validation failures
177
- - Ensure button state reflects form validity
178
- - Improve visual feedback for required fields
179
-
180
- ## Example 2: Performance Issue Investigation
181
-
182
- **Scenario**: Page feels slow and unresponsive
183
-
184
- **Analysis Request**:
185
- \`\`\`json
186
- {
187
- "source": "/recordings/slow-loading.mp4",
188
- "type": "video",
189
- "analysis_type": "performance",
190
- "detail_level": "detailed"
191
- }
192
- \`\`\`
193
-
194
- **Key Findings**:
195
- - 3-second blank screen before content appears
196
- - Images loading progressively causing layout shifts
197
- - Spinner shows for extended periods
198
- - No loading state for dynamic content
199
-
200
- **Recommendations**:
201
- - Implement skeleton loading states
202
- - Optimize image loading strategy
203
- - Add progressive enhancement
204
- - Consider lazy loading for below-fold content
205
-
206
- ## Example 3: Accessibility Audit
207
-
208
- **Scenario**: Ensuring WCAG compliance
209
-
210
- **Analysis Request**:
211
- \`\`\`json
212
- {
213
- "source": "/screenshots/dashboard.png",
214
- "type": "image",
215
- "analysis_type": "accessibility",
216
- "check_accessibility": true,
217
- "detail_level": "detailed"
218
- }
219
- \`\`\`
220
-
221
- **Key Findings**:
222
- - Color contrast ratio below 4.5:1 for secondary text
223
- - No visible focus indicators on interactive elements
224
- - Important actions only indicated by color
225
- - Text size appears below 16px on mobile
226
-
227
- **Recommendations**:
228
- - Increase contrast for all text elements
229
- - Add visible focus outlines
230
- - Use icons or text alongside color coding
231
- - Ensure minimum text size for readability
232
-
233
- ## Example 4: Cross-Browser Layout Issues
234
-
235
- **Scenario**: Layout appears different across browsers
236
-
237
- **Comparison Request**:
238
- \`\`\`json
239
- {
240
- "source1": "/screenshots/chrome-layout.png",
241
- "source2": "/screenshots/firefox-layout.png",
242
- "comparison_type": "structural"
243
- }
244
- \`\`\`
245
-
246
- **Key Differences**:
247
- - Firefox shows additional spacing in navigation
248
- - Button heights vary between browsers
249
- - Font rendering differs affecting line heights
250
- - CSS Grid behavior inconsistent
251
-
252
- **Recommendations**:
253
- - Add CSS reset/normalize stylesheet
254
- - Use explicit sizing for interactive elements
255
- - Test with consistent font loading strategies
256
- - Implement browser-specific CSS if needed
257
-
258
- ## Example 5: Error State Analysis
259
-
260
- **Scenario**: Application crashes under certain conditions
261
-
262
- **Analysis Request**:
263
- \`\`\`json
264
- {
265
- "source": "/recordings/crash-reproduction.mp4",
266
- "type": "video",
267
- "analysis_type": "error_detection",
268
- "detail_level": "extreme",
269
- "specific_focus": "sequence leading to white screen"
270
- }
271
- \`\`\`
272
-
273
- **Key Findings**:
274
- - Error occurs after clicking "Submit" on complex form
275
- - Brief loading state followed by blank page
276
- - No user feedback about what went wrong
277
- - Previous data appears lost
278
-
279
- **Recommendations**:
280
- - Implement proper error boundaries
281
- - Add comprehensive form validation
282
- - Preserve user data during errors
283
- - Show helpful error messages instead of blank screens
284
-
285
- ## Integration Patterns
286
-
287
- ### With Testing Frameworks
288
- \`\`\`typescript
289
- // Example: Automated visual regression testing
290
- async function visualRegressionTest(testName: string) {
291
- const screenshot = await takeScreenshot();
292
-
293
- const analysis = await humanMcp.analyze({
294
- source: screenshot,
295
- type: "image",
296
- analysis_type: "ui_debug",
297
- detail_level: "detailed"
298
- });
299
-
300
- if (analysis.debugging_insights.length > 0) {
301
- throw new Error(\`Visual issues found: \${analysis.debugging_insights.join(', ')}\`);
302
- }
303
- }
304
- \`\`\`
305
-
306
- ### With CI/CD Pipelines
307
- \`\`\`yaml
308
- # Example: GitHub Actions integration
309
- - name: Visual Quality Check
310
- run: |
311
- npm run screenshot
312
- human-mcp analyze screenshot.png --type=image --analysis=ui_debug
313
- \`\`\`
314
-
315
- These examples demonstrate the practical application of Human MCP for common debugging scenarios in web development.
316
- `;
@@ -1,49 +0,0 @@
1
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- import { documentationContent, examplesContent } from "./documentation.js";
3
- import { logger } from "@/utils/logger.js";
4
-
5
- export async function registerResources(server: McpServer) {
6
- // Register API documentation resource
7
- server.registerResource(
8
- "api-docs",
9
- "humanmcp://docs/api",
10
- {
11
- title: "Human MCP API Documentation",
12
- description: "Complete API reference for all Human MCP tools",
13
- mimeType: "text/markdown"
14
- },
15
- async (uri) => {
16
- logger.debug(`Reading resource: ${uri.href}`);
17
-
18
- return {
19
- contents: [{
20
- uri: uri.href,
21
- mimeType: "text/markdown",
22
- text: documentationContent
23
- }]
24
- };
25
- }
26
- );
27
-
28
- // Register debugging examples resource
29
- server.registerResource(
30
- "debugging-examples",
31
- "humanmcp://examples/debugging",
32
- {
33
- title: "Debugging Examples",
34
- description: "Real-world examples of using Human MCP for debugging",
35
- mimeType: "text/markdown"
36
- },
37
- async (uri) => {
38
- logger.debug(`Reading resource: ${uri.href}`);
39
-
40
- return {
41
- contents: [{
42
- uri: uri.href,
43
- mimeType: "text/markdown",
44
- text: examplesContent
45
- }]
46
- };
47
- }
48
- );
49
- }
package/src/server.ts DELETED
@@ -1,36 +0,0 @@
1
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
- import { registerEyesTool } from "./tools/eyes/index.js";
4
- import { registerPrompts } from "./prompts/index.js";
5
- import { registerResources } from "./resources/index.js";
6
- import { logger } from "./utils/logger.js";
7
- import { loadConfig } from "./utils/config.js";
8
-
9
- export async function createServer() {
10
- const config = loadConfig();
11
-
12
- const server = new McpServer({
13
- name: "human-mcp",
14
- version: "1.0.0",
15
- });
16
-
17
- await registerEyesTool(server, config);
18
- await registerPrompts(server);
19
- await registerResources(server);
20
-
21
-
22
- return server;
23
- }
24
-
25
- export async function startStdioServer() {
26
- try {
27
- const server = await createServer();
28
- const transport = new StdioServerTransport();
29
-
30
- await server.connect(transport);
31
- logger.info("Human MCP Server started successfully (stdio transport)");
32
- } catch (error) {
33
- logger.error("Failed to start server:", error);
34
- process.exit(1);
35
- }
36
- }
@@ -1,225 +0,0 @@
1
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- import { z } from "zod";
3
- import { processImage } from "./processors/image.js";
4
- import { processVideo } from "./processors/video.js";
5
- import { processGif } from "./processors/gif.js";
6
- import { GeminiClient } from "./utils/gemini-client.js";
7
- import {
8
- EyesInputSchema,
9
- CompareInputSchema,
10
- type EyesInput,
11
- type CompareInput
12
- } from "./schemas.js";
13
- import { logger } from "@/utils/logger.js";
14
- import { handleError } from "@/utils/errors.js";
15
- import type { Config } from "@/utils/config.js";
16
-
17
- export async function registerEyesTool(server: McpServer, config: Config) {
18
- const geminiClient = new GeminiClient(config);
19
-
20
- // Register eyes_analyze tool
21
- server.registerTool(
22
- "eyes_analyze",
23
- {
24
- title: "Vision Analysis Tool",
25
- description: "Analyze images, videos, and GIFs using AI vision capabilities",
26
- inputSchema: {
27
- source: z.string().describe("Path, URL, or base64 data URI of the media to analyze"),
28
- type: z.enum(["image", "video", "gif"]).describe("Type of media to analyze"),
29
- detail_level: z.enum(["quick", "detailed"]).optional().default("detailed").describe("Level of detail in analysis"),
30
- prompt: z.string().optional().describe("Custom prompt for analysis"),
31
- max_frames: z.number().optional().describe("Maximum number of frames to analyze for videos/GIFs")
32
- }
33
- },
34
- async (args) => {
35
- try {
36
- return await handleAnalyze(geminiClient, args, config);
37
- } catch (error) {
38
- const mcpError = handleError(error);
39
- logger.error(`Tool eyes_analyze error:`, mcpError);
40
-
41
- return {
42
- content: [{
43
- type: "text" as const,
44
- text: `Error: ${mcpError.message}`
45
- }],
46
- isError: true
47
- };
48
- }
49
- }
50
- );
51
-
52
- // Register eyes_compare tool
53
- server.registerTool(
54
- "eyes_compare",
55
- {
56
- title: "Image Comparison Tool",
57
- description: "Compare two images and identify differences",
58
- inputSchema: {
59
- source1: z.string().describe("Path, URL, or base64 data URI of the first image"),
60
- source2: z.string().describe("Path, URL, or base64 data URI of the second image"),
61
- comparison_type: z.enum(["pixel", "structural", "semantic"]).optional().default("semantic").describe("Type of comparison to perform")
62
- }
63
- },
64
- async (args) => {
65
- try {
66
- return await handleCompare(geminiClient, args);
67
- } catch (error) {
68
- const mcpError = handleError(error);
69
- logger.error(`Tool eyes_compare error:`, mcpError);
70
-
71
- return {
72
- content: [{
73
- type: "text" as const,
74
- text: `Error: ${mcpError.message}`
75
- }],
76
- isError: true
77
- };
78
- }
79
- }
80
- );
81
- }
82
-
83
- async function handleAnalyze(
84
- geminiClient: GeminiClient,
85
- args: unknown,
86
- config: Config
87
- ) {
88
- const input = EyesInputSchema.parse(args) as EyesInput;
89
- const { source, type, detail_level } = input;
90
-
91
- logger.info(`Analyzing ${type} with detail level: ${detail_level}`);
92
-
93
- const model = geminiClient.getModel(detail_level || "detailed");
94
- const options = {
95
- ...input,
96
- fetchTimeout: config.server.fetchTimeout
97
- };
98
- let result;
99
-
100
- switch (type) {
101
- case "image":
102
- result = await processImage(model, source, options);
103
- break;
104
- case "video":
105
- result = await processVideo(model, source, options);
106
- break;
107
- case "gif":
108
- result = await processGif(model, source, options);
109
- break;
110
- default:
111
- throw new Error(`Unsupported media type: ${type}`);
112
- }
113
-
114
- return {
115
- content: [
116
- {
117
- type: "text" as const,
118
- text: result.analysis
119
- }
120
- ],
121
- isError: false
122
- };
123
- }
124
-
125
- async function handleCompare(
126
- geminiClient: GeminiClient,
127
- args: unknown
128
- ) {
129
- const input = CompareInputSchema.parse(args) as CompareInput;
130
- const { source1, source2, comparison_type } = input;
131
-
132
- logger.info(`Comparing images with type: ${comparison_type}`);
133
-
134
- const model = geminiClient.getModel("detailed");
135
-
136
- const prompt = `Compare these two images and identify the differences. Focus on:
137
-
138
- ${comparison_type === "pixel" ?
139
- "- Exact pixel-level differences\n- Color value changes\n- Any visual artifacts or rendering differences" :
140
- comparison_type === "structural" ?
141
- "- Layout changes\n- Element positioning differences\n- Size and proportion changes\n- Structural modifications" :
142
- "- Semantic meaning differences\n- Content changes\n- Functional differences\n- User experience impact"
143
- }
144
-
145
- Please provide:
146
- 1. SUMMARY: Brief overview of main differences
147
- 2. SPECIFIC DIFFERENCES: Detailed list of changes found
148
- 3. IMPACT ASSESSMENT: How these differences might affect users
149
- 4. RECOMMENDATIONS: Suggested actions based on the differences
150
-
151
- Be precise with locations and measurements where possible.`;
152
-
153
- try {
154
- const [image1Data, image2Data] = await Promise.all([
155
- loadImageForComparison(source1),
156
- loadImageForComparison(source2)
157
- ]);
158
-
159
- const response = await model.generateContent([
160
- { text: prompt },
161
- {
162
- inlineData: {
163
- mimeType: image1Data.mimeType,
164
- data: image1Data.data
165
- }
166
- },
167
- { text: "Image 1 (above) vs Image 2 (below):" },
168
- {
169
- inlineData: {
170
- mimeType: image2Data.mimeType,
171
- data: image2Data.data
172
- }
173
- }
174
- ]);
175
-
176
- const result = response.response;
177
- const comparisonText = result.text();
178
-
179
- return {
180
- content: [
181
- {
182
- type: "text" as const,
183
- text: comparisonText || "No differences detected or analysis failed"
184
- }
185
- ],
186
- isError: false
187
- };
188
-
189
- } catch (error) {
190
- throw new Error(`Failed to compare images: ${error instanceof Error ? error.message : 'Unknown error'}`);
191
- }
192
- }
193
-
194
- async function loadImageForComparison(source: string): Promise<{ data: string; mimeType: string }> {
195
- if (source.startsWith('data:image/')) {
196
- const [header, data] = source.split(',');
197
- if (!header || !data) {
198
- throw new Error("Invalid base64 image format");
199
- }
200
- const mimeMatch = header.match(/data:(image\/[^;]+)/);
201
- if (!mimeMatch || !mimeMatch[1]) {
202
- throw new Error("Invalid base64 image format");
203
- }
204
- return { data, mimeType: mimeMatch[1] };
205
- }
206
-
207
- if (source.startsWith('http://') || source.startsWith('https://')) {
208
- const response = await fetch(source);
209
- if (!response.ok) {
210
- throw new Error(`Failed to fetch image: ${response.statusText}`);
211
- }
212
- const buffer = await response.arrayBuffer();
213
- return {
214
- data: Buffer.from(buffer).toString('base64'),
215
- mimeType: response.headers.get('content-type') || 'image/jpeg'
216
- };
217
- }
218
-
219
- const fs = await import('fs/promises');
220
- const buffer = await fs.readFile(source);
221
- return {
222
- data: buffer.toString('base64'),
223
- mimeType: 'image/jpeg'
224
- };
225
- }