@goonnguyen/human-mcp 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/.claude/agents/code-reviewer.md +140 -0
  2. package/.claude/agents/database-admin.md +86 -0
  3. package/.claude/agents/debugger.md +119 -0
  4. package/.claude/agents/docs-manager.md +113 -0
  5. package/.claude/agents/git-manager.md +59 -0
  6. package/.claude/agents/planner-researcher.md +97 -0
  7. package/.claude/agents/project-manager.md +113 -0
  8. package/.claude/agents/tester.md +95 -0
  9. package/.claude/commands/cook.md +7 -0
  10. package/.claude/commands/debug.md +10 -0
  11. package/.claude/commands/docs/init.md +11 -0
  12. package/.claude/commands/docs/update.md +11 -0
  13. package/.claude/commands/fix/ci.md +8 -0
  14. package/.claude/commands/fix/fast.md +5 -0
  15. package/.claude/commands/fix/hard.md +7 -0
  16. package/.claude/commands/fix/test.md +16 -0
  17. package/.claude/commands/git/cm.md +5 -0
  18. package/.claude/commands/git/cp.md +4 -0
  19. package/.claude/commands/plan/ci.md +12 -0
  20. package/.claude/commands/plan/two.md +13 -0
  21. package/.claude/commands/plan.md +10 -0
  22. package/.claude/commands/test.md +7 -0
  23. package/.claude/commands/watzup.md +8 -0
  24. package/.claude/hooks/telegram_notify.sh +136 -0
  25. package/.claude/send-discord.sh +64 -0
  26. package/.claude/settings.json +7 -0
  27. package/.claude/statusline.sh +143 -0
  28. package/.env.example +17 -0
  29. package/.github/workflows/publish.yml +51 -0
  30. package/.releaserc.json +26 -0
  31. package/.serena/project.yml +68 -0
  32. package/CHANGELOG.md +27 -0
  33. package/CLAUDE.md +139 -0
  34. package/Dockerfile +28 -0
  35. package/LICENSE +21 -0
  36. package/QUICKSTART.md +97 -0
  37. package/README.md +234 -0
  38. package/bun.lock +1555 -0
  39. package/bunfig.toml +15 -0
  40. package/dist/index.js +24568 -0
  41. package/docs/codebase-structure-architecture-code-standards.md +416 -0
  42. package/docs/codebase-summary.md +321 -0
  43. package/docs/project-overview-pdr.md +270 -0
  44. package/examples/debugging-session.ts +96 -0
  45. package/inspector-wrapper.mjs +33 -0
  46. package/package.json +47 -0
  47. package/plans/reports/001-from-qa-engineer-to-development-team-test-suite-report.md +188 -0
  48. package/plans/templates/bug-fix-template.md +69 -0
  49. package/plans/templates/feature-implementation-template.md +84 -0
  50. package/plans/templates/refactor-template.md +82 -0
  51. package/plans/templates/template-usage-guide.md +58 -0
  52. package/src/index.ts +5 -0
  53. package/src/prompts/debugging-prompts.ts +149 -0
  54. package/src/prompts/index.ts +55 -0
  55. package/src/resources/documentation.ts +316 -0
  56. package/src/resources/index.ts +49 -0
  57. package/src/server.ts +36 -0
  58. package/src/tools/eyes/index.ts +225 -0
  59. package/src/tools/eyes/processors/gif.ts +137 -0
  60. package/src/tools/eyes/processors/image.ts +123 -0
  61. package/src/tools/eyes/processors/video.ts +135 -0
  62. package/src/tools/eyes/schemas.ts +51 -0
  63. package/src/tools/eyes/utils/formatters.ts +126 -0
  64. package/src/tools/eyes/utils/gemini-client.ts +73 -0
  65. package/src/types/index.ts +41 -0
  66. package/src/utils/config.ts +51 -0
  67. package/src/utils/errors.ts +40 -0
  68. package/src/utils/logger.ts +49 -0
  69. package/tests/integration/server.test.ts +24 -0
  70. package/tests/setup.ts +11 -0
  71. package/tests/unit/config.test.ts +40 -0
  72. package/tests/unit/formatters.test.ts +85 -0
  73. package/tsconfig.json +26 -0
@@ -0,0 +1,225 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { z } from "zod";
3
+ import { processImage } from "./processors/image.js";
4
+ import { processVideo } from "./processors/video.js";
5
+ import { processGif } from "./processors/gif.js";
6
+ import { GeminiClient } from "./utils/gemini-client.js";
7
+ import {
8
+ EyesInputSchema,
9
+ CompareInputSchema,
10
+ type EyesInput,
11
+ type CompareInput
12
+ } from "./schemas.js";
13
+ import { logger } from "@/utils/logger.js";
14
+ import { handleError } from "@/utils/errors.js";
15
+ import type { Config } from "@/utils/config.js";
16
+
17
+ export async function registerEyesTool(server: McpServer, config: Config) {
18
+ const geminiClient = new GeminiClient(config);
19
+
20
+ // Register eyes.analyze tool
21
+ server.registerTool(
22
+ "eyes.analyze",
23
+ {
24
+ title: "Vision Analysis Tool",
25
+ description: "Analyze images, videos, and GIFs using AI vision capabilities",
26
+ inputSchema: {
27
+ source: z.string().describe("Path, URL, or base64 data URI of the media to analyze"),
28
+ type: z.enum(["image", "video", "gif"]).describe("Type of media to analyze"),
29
+ detail_level: z.enum(["quick", "detailed"]).optional().default("detailed").describe("Level of detail in analysis"),
30
+ prompt: z.string().optional().describe("Custom prompt for analysis"),
31
+ max_frames: z.number().optional().describe("Maximum number of frames to analyze for videos/GIFs")
32
+ }
33
+ },
34
+ async (args) => {
35
+ try {
36
+ return await handleAnalyze(geminiClient, args, config);
37
+ } catch (error) {
38
+ const mcpError = handleError(error);
39
+ logger.error(`Tool eyes.analyze error:`, mcpError);
40
+
41
+ return {
42
+ content: [{
43
+ type: "text" as const,
44
+ text: `Error: ${mcpError.message}`
45
+ }],
46
+ isError: true
47
+ };
48
+ }
49
+ }
50
+ );
51
+
52
+ // Register eyes.compare tool
53
+ server.registerTool(
54
+ "eyes.compare",
55
+ {
56
+ title: "Image Comparison Tool",
57
+ description: "Compare two images and identify differences",
58
+ inputSchema: {
59
+ source1: z.string().describe("Path, URL, or base64 data URI of the first image"),
60
+ source2: z.string().describe("Path, URL, or base64 data URI of the second image"),
61
+ comparison_type: z.enum(["pixel", "structural", "semantic"]).optional().default("semantic").describe("Type of comparison to perform")
62
+ }
63
+ },
64
+ async (args) => {
65
+ try {
66
+ return await handleCompare(geminiClient, args);
67
+ } catch (error) {
68
+ const mcpError = handleError(error);
69
+ logger.error(`Tool eyes.compare error:`, mcpError);
70
+
71
+ return {
72
+ content: [{
73
+ type: "text" as const,
74
+ text: `Error: ${mcpError.message}`
75
+ }],
76
+ isError: true
77
+ };
78
+ }
79
+ }
80
+ );
81
+ }
82
+
83
+ async function handleAnalyze(
84
+ geminiClient: GeminiClient,
85
+ args: unknown,
86
+ config: Config
87
+ ) {
88
+ const input = EyesInputSchema.parse(args) as EyesInput;
89
+ const { source, type, detail_level } = input;
90
+
91
+ logger.info(`Analyzing ${type} with detail level: ${detail_level}`);
92
+
93
+ const model = geminiClient.getModel(detail_level || "detailed");
94
+ const options = {
95
+ ...input,
96
+ fetchTimeout: config.server.fetchTimeout
97
+ };
98
+ let result;
99
+
100
+ switch (type) {
101
+ case "image":
102
+ result = await processImage(model, source, options);
103
+ break;
104
+ case "video":
105
+ result = await processVideo(model, source, options);
106
+ break;
107
+ case "gif":
108
+ result = await processGif(model, source, options);
109
+ break;
110
+ default:
111
+ throw new Error(`Unsupported media type: ${type}`);
112
+ }
113
+
114
+ return {
115
+ content: [
116
+ {
117
+ type: "text" as const,
118
+ text: result.analysis
119
+ }
120
+ ],
121
+ isError: false
122
+ };
123
+ }
124
+
125
+ async function handleCompare(
126
+ geminiClient: GeminiClient,
127
+ args: unknown
128
+ ) {
129
+ const input = CompareInputSchema.parse(args) as CompareInput;
130
+ const { source1, source2, comparison_type } = input;
131
+
132
+ logger.info(`Comparing images with type: ${comparison_type}`);
133
+
134
+ const model = geminiClient.getModel("detailed");
135
+
136
+ const prompt = `Compare these two images and identify the differences. Focus on:
137
+
138
+ ${comparison_type === "pixel" ?
139
+ "- Exact pixel-level differences\n- Color value changes\n- Any visual artifacts or rendering differences" :
140
+ comparison_type === "structural" ?
141
+ "- Layout changes\n- Element positioning differences\n- Size and proportion changes\n- Structural modifications" :
142
+ "- Semantic meaning differences\n- Content changes\n- Functional differences\n- User experience impact"
143
+ }
144
+
145
+ Please provide:
146
+ 1. SUMMARY: Brief overview of main differences
147
+ 2. SPECIFIC DIFFERENCES: Detailed list of changes found
148
+ 3. IMPACT ASSESSMENT: How these differences might affect users
149
+ 4. RECOMMENDATIONS: Suggested actions based on the differences
150
+
151
+ Be precise with locations and measurements where possible.`;
152
+
153
+ try {
154
+ const [image1Data, image2Data] = await Promise.all([
155
+ loadImageForComparison(source1),
156
+ loadImageForComparison(source2)
157
+ ]);
158
+
159
+ const response = await model.generateContent([
160
+ { text: prompt },
161
+ {
162
+ inlineData: {
163
+ mimeType: image1Data.mimeType,
164
+ data: image1Data.data
165
+ }
166
+ },
167
+ { text: "Image 1 (above) vs Image 2 (below):" },
168
+ {
169
+ inlineData: {
170
+ mimeType: image2Data.mimeType,
171
+ data: image2Data.data
172
+ }
173
+ }
174
+ ]);
175
+
176
+ const result = await response.response;
177
+ const comparisonText = result.text();
178
+
179
+ return {
180
+ content: [
181
+ {
182
+ type: "text" as const,
183
+ text: comparisonText || "No differences detected or analysis failed"
184
+ }
185
+ ],
186
+ isError: false
187
+ };
188
+
189
+ } catch (error) {
190
+ throw new Error(`Failed to compare images: ${error instanceof Error ? error.message : 'Unknown error'}`);
191
+ }
192
+ }
193
+
194
+ async function loadImageForComparison(source: string): Promise<{ data: string; mimeType: string }> {
195
+ if (source.startsWith('data:image/')) {
196
+ const [header, data] = source.split(',');
197
+ if (!header || !data) {
198
+ throw new Error("Invalid base64 image format");
199
+ }
200
+ const mimeMatch = header.match(/data:(image\/[^;]+)/);
201
+ if (!mimeMatch || !mimeMatch[1]) {
202
+ throw new Error("Invalid base64 image format");
203
+ }
204
+ return { data, mimeType: mimeMatch[1] };
205
+ }
206
+
207
+ if (source.startsWith('http://') || source.startsWith('https://')) {
208
+ const response = await fetch(source);
209
+ if (!response.ok) {
210
+ throw new Error(`Failed to fetch image: ${response.statusText}`);
211
+ }
212
+ const buffer = await response.arrayBuffer();
213
+ return {
214
+ data: Buffer.from(buffer).toString('base64'),
215
+ mimeType: response.headers.get('content-type') || 'image/jpeg'
216
+ };
217
+ }
218
+
219
+ const fs = await import('fs/promises');
220
+ const buffer = await fs.readFile(source);
221
+ return {
222
+ data: buffer.toString('base64'),
223
+ mimeType: 'image/jpeg'
224
+ };
225
+ }
@@ -0,0 +1,137 @@
1
+ import { GenerativeModel } from "@google/generative-ai";
2
+ import sharp from "sharp";
3
+ import fs from "fs/promises";
4
+ import type { AnalysisOptions, ProcessingResult } from "@/types";
5
+ import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
6
+ import { logger } from "@/utils/logger.js";
7
+ import { ProcessingError } from "@/utils/errors.js";
8
+
9
+ export async function processGif(
10
+ model: GenerativeModel,
11
+ source: string,
12
+ options: AnalysisOptions
13
+ ): Promise<ProcessingResult> {
14
+ const startTime = Date.now();
15
+
16
+ try {
17
+ logger.debug(`Processing GIF: ${source.substring(0, 50)}...`);
18
+
19
+ const gifData = await loadGif(source);
20
+ const frames = await extractGifFrames(gifData);
21
+
22
+ if (frames.length === 0) {
23
+ throw new ProcessingError("No frames could be extracted from GIF");
24
+ }
25
+
26
+ const prompt = createPrompt(options) + `
27
+
28
+ This is an animated GIF analysis with ${frames.length} frames. Pay attention to:
29
+ - Animation timing and smoothness
30
+ - UI state transitions
31
+ - Loading states or progress indicators
32
+ - Error animations or feedback
33
+ - Interactive element hover states
34
+ - Any visual glitches in the animation`;
35
+
36
+ const mediaData = frames.map(frame => ({
37
+ mimeType: 'image/png',
38
+ data: frame
39
+ }));
40
+
41
+ const response = await model.generateContent([
42
+ { text: prompt },
43
+ ...mediaData.map(data => ({
44
+ inlineData: {
45
+ mimeType: data.mimeType,
46
+ data: data.data
47
+ }
48
+ }))
49
+ ]);
50
+
51
+ const result = await response.response;
52
+ const analysisText = result.text();
53
+
54
+ if (!analysisText) {
55
+ throw new ProcessingError("No analysis result from Gemini");
56
+ }
57
+
58
+ const parsed = parseAnalysisResponse(analysisText);
59
+ const processingTime = Date.now() - startTime;
60
+
61
+ return {
62
+ description: parsed.description || "GIF analysis completed",
63
+ analysis: parsed.analysis || analysisText,
64
+ elements: parsed.elements || [],
65
+ insights: parsed.insights || [],
66
+ recommendations: parsed.recommendations || [],
67
+ metadata: {
68
+ processing_time_ms: processingTime,
69
+ model_used: model.model,
70
+ frames_analyzed: frames.length
71
+ }
72
+ };
73
+
74
+ } catch (error) {
75
+ logger.error("GIF processing error:", error);
76
+ throw new ProcessingError(`Failed to process GIF: ${error instanceof Error ? error.message : 'Unknown error'}`);
77
+ }
78
+ }
79
+
80
+ async function loadGif(source: string): Promise<Buffer> {
81
+ if (source.startsWith('data:image/gif')) {
82
+ const [, data] = source.split(',');
83
+ if (!data) {
84
+ throw new ProcessingError("Invalid base64 GIF format");
85
+ }
86
+ return Buffer.from(data, 'base64');
87
+ }
88
+
89
+ if (source.startsWith('http://') || source.startsWith('https://')) {
90
+ const response = await fetch(source);
91
+ if (!response.ok) {
92
+ throw new ProcessingError(`Failed to fetch GIF: ${response.statusText}`);
93
+ }
94
+ return Buffer.from(await response.arrayBuffer());
95
+ }
96
+
97
+ try {
98
+ return await fs.readFile(source);
99
+ } catch (error) {
100
+ throw new ProcessingError(`Failed to load GIF file: ${error instanceof Error ? error.message : 'Unknown error'}`);
101
+ }
102
+ }
103
+
104
+ async function extractGifFrames(gifBuffer: Buffer): Promise<string[]> {
105
+ try {
106
+ const image = sharp(gifBuffer, { animated: true });
107
+ const { pages } = await image.metadata();
108
+
109
+ if (!pages || pages <= 1) {
110
+ const singleFrame = await image
111
+ .resize(512, 512, { fit: 'inside', withoutEnlargement: true })
112
+ .png()
113
+ .toBuffer();
114
+ return [singleFrame.toString('base64')];
115
+ }
116
+
117
+ const frames: string[] = [];
118
+ const maxFrames = Math.min(pages, 16);
119
+
120
+ for (let i = 0; i < maxFrames; i++) {
121
+ const frame = await sharp(gifBuffer, {
122
+ animated: true,
123
+ page: i
124
+ })
125
+ .resize(512, 512, { fit: 'inside', withoutEnlargement: true })
126
+ .png()
127
+ .toBuffer();
128
+
129
+ frames.push(frame.toString('base64'));
130
+ }
131
+
132
+ return frames;
133
+
134
+ } catch (error) {
135
+ throw new ProcessingError(`Failed to extract GIF frames: ${error instanceof Error ? error.message : 'Unknown error'}`);
136
+ }
137
+ }
@@ -0,0 +1,123 @@
1
+ import { GenerativeModel } from "@google/generative-ai";
2
+ import sharp from "sharp";
3
+ import fs from "fs/promises";
4
+ import type { AnalysisOptions, ProcessingResult } from "@/types";
5
+ import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
6
+ import { logger } from "@/utils/logger.js";
7
+ import { ProcessingError } from "@/utils/errors.js";
8
+
9
+ export async function processImage(
10
+ model: GenerativeModel,
11
+ source: string,
12
+ options: AnalysisOptions
13
+ ): Promise<ProcessingResult> {
14
+ const startTime = Date.now();
15
+
16
+ try {
17
+ logger.debug(`Processing image: ${source.substring(0, 50)}...`);
18
+
19
+ const { imageData, mimeType } = await loadImage(source, options.fetchTimeout);
20
+ const prompt = createPrompt(options);
21
+
22
+ const response = await model.generateContent([
23
+ { text: prompt },
24
+ {
25
+ inlineData: {
26
+ mimeType,
27
+ data: imageData
28
+ }
29
+ }
30
+ ]);
31
+
32
+ const result = await response.response;
33
+ const analysisText = result.text();
34
+
35
+ if (!analysisText) {
36
+ throw new ProcessingError("No analysis result from Gemini");
37
+ }
38
+
39
+ const parsed = parseAnalysisResponse(analysisText);
40
+ const processingTime = Date.now() - startTime;
41
+
42
+ return {
43
+ description: parsed.description || "Image analysis completed",
44
+ analysis: parsed.analysis || analysisText,
45
+ elements: parsed.elements || [],
46
+ insights: parsed.insights || [],
47
+ recommendations: parsed.recommendations || [],
48
+ metadata: {
49
+ processing_time_ms: processingTime,
50
+ model_used: model.model,
51
+ }
52
+ };
53
+
54
+ } catch (error) {
55
+ logger.error("Image processing error:", error);
56
+ throw new ProcessingError(`Failed to process image: ${error instanceof Error ? error.message : 'Unknown error'}`);
57
+ }
58
+ }
59
+
60
+ async function loadImage(source: string, fetchTimeout?: number): Promise<{ imageData: string; mimeType: string }> {
61
+ if (source.startsWith('data:image/')) {
62
+ const [header, data] = source.split(',');
63
+ if (!header || !data) {
64
+ throw new ProcessingError("Invalid base64 image format");
65
+ }
66
+ const mimeMatch = header.match(/data:(image\/[^;]+)/);
67
+ if (!mimeMatch || !mimeMatch[1]) {
68
+ throw new ProcessingError("Invalid base64 image format");
69
+ }
70
+ return {
71
+ imageData: data,
72
+ mimeType: mimeMatch[1]
73
+ };
74
+ }
75
+
76
+ if (source.startsWith('http://') || source.startsWith('https://')) {
77
+ const controller = new AbortController();
78
+ const timeoutId = setTimeout(() => controller.abort(), fetchTimeout || 30000);
79
+
80
+ try {
81
+ const response = await fetch(source, { signal: controller.signal });
82
+ clearTimeout(timeoutId);
83
+
84
+ if (!response.ok) {
85
+ throw new ProcessingError(`Failed to fetch image: ${response.statusText}`);
86
+ }
87
+
88
+ const buffer = await response.arrayBuffer();
89
+ const uint8Array = new Uint8Array(buffer);
90
+
91
+ const processedImage = await sharp(uint8Array)
92
+ .resize(1024, 1024, { fit: 'inside', withoutEnlargement: true })
93
+ .jpeg({ quality: 85 })
94
+ .toBuffer();
95
+
96
+ return {
97
+ imageData: processedImage.toString('base64'),
98
+ mimeType: 'image/jpeg'
99
+ };
100
+ } catch (error) {
101
+ clearTimeout(timeoutId);
102
+ if (error instanceof Error && error.name === 'AbortError') {
103
+ throw new ProcessingError(`Fetch timeout: Failed to download image from ${source}`);
104
+ }
105
+ throw new ProcessingError(`Failed to fetch image: ${error instanceof Error ? error.message : 'Unknown error'}`);
106
+ }
107
+ }
108
+
109
+ try {
110
+ const buffer = await fs.readFile(source);
111
+ const processedImage = await sharp(buffer)
112
+ .resize(1024, 1024, { fit: 'inside', withoutEnlargement: true })
113
+ .jpeg({ quality: 85 })
114
+ .toBuffer();
115
+
116
+ return {
117
+ imageData: processedImage.toString('base64'),
118
+ mimeType: 'image/jpeg'
119
+ };
120
+ } catch (error) {
121
+ throw new ProcessingError(`Failed to load image file: ${error instanceof Error ? error.message : 'Unknown error'}`);
122
+ }
123
+ }
@@ -0,0 +1,135 @@
1
+ import { GenerativeModel } from "@google/generative-ai";
2
+ import ffmpeg from "fluent-ffmpeg";
3
+ import fs from "fs/promises";
4
+ import path from "path";
5
+ import os from "os";
6
+ import sharp from "sharp";
7
+ import type { VideoOptions, ProcessingResult } from "@/types";
8
+ import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
9
+ import { logger } from "@/utils/logger.js";
10
+ import { ProcessingError } from "@/utils/errors.js";
11
+
12
+ export async function processVideo(
13
+ model: GenerativeModel,
14
+ source: string,
15
+ options: VideoOptions
16
+ ): Promise<ProcessingResult> {
17
+ const startTime = Date.now();
18
+ const maxFrames = options.max_frames || 32;
19
+ const sampleRate = options.sample_rate || 1;
20
+
21
+ let tempDir: string | null = null;
22
+
23
+ try {
24
+ logger.debug(`Processing video: ${source.substring(0, 50)}... (max ${maxFrames} frames)`);
25
+
26
+ tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'human-mcp-video-'));
27
+ const frames = await extractFrames(source, tempDir, maxFrames, sampleRate);
28
+
29
+ if (frames.length === 0) {
30
+ throw new ProcessingError("No frames could be extracted from video");
31
+ }
32
+
33
+ const prompt = createPrompt(options) + `
34
+
35
+ This is a video analysis with ${frames.length} frames extracted. Focus on:
36
+ - Temporal changes between frames
37
+ - Animation or transition issues
38
+ - Error states that appear over time
39
+ - UI state changes and interactions
40
+ - Any progressive degradation or improvement`;
41
+
42
+ const mediaData = await Promise.all(
43
+ frames.map(async (framePath) => {
44
+ const buffer = await fs.readFile(framePath);
45
+ const processedFrame = await sharp(buffer)
46
+ .resize(512, 512, { fit: 'inside', withoutEnlargement: true })
47
+ .jpeg({ quality: 80 })
48
+ .toBuffer();
49
+
50
+ return {
51
+ mimeType: 'image/jpeg',
52
+ data: processedFrame.toString('base64')
53
+ };
54
+ })
55
+ );
56
+
57
+ const response = await model.generateContent([
58
+ { text: prompt },
59
+ ...mediaData.map(data => ({
60
+ inlineData: {
61
+ mimeType: data.mimeType,
62
+ data: data.data
63
+ }
64
+ }))
65
+ ]);
66
+
67
+ const result = await response.response;
68
+ const analysisText = result.text();
69
+
70
+ if (!analysisText) {
71
+ throw new ProcessingError("No analysis result from Gemini");
72
+ }
73
+
74
+ const parsed = parseAnalysisResponse(analysisText);
75
+ const processingTime = Date.now() - startTime;
76
+
77
+ return {
78
+ description: parsed.description || "Video analysis completed",
79
+ analysis: parsed.analysis || analysisText,
80
+ elements: parsed.elements || [],
81
+ insights: parsed.insights || [],
82
+ recommendations: parsed.recommendations || [],
83
+ metadata: {
84
+ processing_time_ms: processingTime,
85
+ model_used: model.model,
86
+ frames_analyzed: frames.length
87
+ }
88
+ };
89
+
90
+ } catch (error) {
91
+ logger.error("Video processing error:", error);
92
+ throw new ProcessingError(`Failed to process video: ${error instanceof Error ? error.message : 'Unknown error'}`);
93
+ } finally {
94
+ if (tempDir) {
95
+ await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {});
96
+ }
97
+ }
98
+ }
99
+
100
+ async function extractFrames(
101
+ videoSource: string,
102
+ outputDir: string,
103
+ maxFrames: number,
104
+ sampleRate: number
105
+ ): Promise<string[]> {
106
+ return new Promise((resolve, reject) => {
107
+ const framePattern = path.join(outputDir, 'frame_%04d.jpg');
108
+ const frames: string[] = [];
109
+
110
+ ffmpeg(videoSource)
111
+ .outputOptions([
112
+ '-vf', `fps=1/${sampleRate}`,
113
+ '-vframes', maxFrames.toString(),
114
+ '-q:v', '2'
115
+ ])
116
+ .output(framePattern)
117
+ .on('end', async () => {
118
+ try {
119
+ const files = await fs.readdir(outputDir);
120
+ const frameFiles = files
121
+ .filter(file => file.startsWith('frame_') && file.endsWith('.jpg'))
122
+ .sort()
123
+ .map(file => path.join(outputDir, file));
124
+
125
+ resolve(frameFiles);
126
+ } catch (error) {
127
+ reject(error);
128
+ }
129
+ })
130
+ .on('error', (error) => {
131
+ reject(new ProcessingError(`FFmpeg error: ${error.message}`));
132
+ })
133
+ .run();
134
+ });
135
+ }
@@ -0,0 +1,51 @@
1
+ import { z } from "zod";
2
+
3
+ export const EyesInputSchema = z.object({
4
+ source: z.string().describe("URL, file path, or base64 encoded content"),
5
+ type: z.enum(["image", "video", "gif"]).describe("Type of visual content"),
6
+ analysis_type: z.enum([
7
+ "general",
8
+ "ui_debug",
9
+ "error_detection",
10
+ "accessibility",
11
+ "performance",
12
+ "layout"
13
+ ]).default("general"),
14
+ detail_level: z.enum(["quick", "detailed"]).default("detailed"),
15
+ specific_focus: z.string().optional().describe("Specific areas or elements to focus on"),
16
+ extract_text: z.boolean().default(true),
17
+ detect_ui_elements: z.boolean().default(true),
18
+ analyze_colors: z.boolean().default(false),
19
+ check_accessibility: z.boolean().default(false)
20
+ });
21
+
22
+ export const EyesOutputSchema = z.object({
23
+ analysis: z.string(),
24
+ detected_elements: z.array(z.object({
25
+ type: z.string(),
26
+ location: z.object({
27
+ x: z.number(),
28
+ y: z.number(),
29
+ width: z.number(),
30
+ height: z.number()
31
+ }),
32
+ properties: z.record(z.any())
33
+ })),
34
+ debugging_insights: z.array(z.string()),
35
+ recommendations: z.array(z.string()),
36
+ metadata: z.object({
37
+ processing_time_ms: z.number(),
38
+ model_used: z.string(),
39
+ frames_analyzed: z.number().optional()
40
+ })
41
+ });
42
+
43
+ export const CompareInputSchema = z.object({
44
+ source1: z.string(),
45
+ source2: z.string(),
46
+ comparison_type: z.enum(["pixel", "structural", "semantic"]).default("semantic")
47
+ });
48
+
49
+ export type EyesInput = z.infer<typeof EyesInputSchema>;
50
+ export type EyesOutput = z.infer<typeof EyesOutputSchema>;
51
+ export type CompareInput = z.infer<typeof CompareInputSchema>;