@goonnguyen/human-mcp 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/code-reviewer.md +140 -0
- package/.claude/agents/database-admin.md +86 -0
- package/.claude/agents/debugger.md +119 -0
- package/.claude/agents/docs-manager.md +113 -0
- package/.claude/agents/git-manager.md +59 -0
- package/.claude/agents/planner-researcher.md +97 -0
- package/.claude/agents/project-manager.md +113 -0
- package/.claude/agents/tester.md +95 -0
- package/.claude/commands/cook.md +7 -0
- package/.claude/commands/debug.md +10 -0
- package/.claude/commands/docs/init.md +11 -0
- package/.claude/commands/docs/update.md +11 -0
- package/.claude/commands/fix/ci.md +8 -0
- package/.claude/commands/fix/fast.md +5 -0
- package/.claude/commands/fix/hard.md +7 -0
- package/.claude/commands/fix/test.md +16 -0
- package/.claude/commands/git/cm.md +5 -0
- package/.claude/commands/git/cp.md +4 -0
- package/.claude/commands/plan/ci.md +12 -0
- package/.claude/commands/plan/two.md +13 -0
- package/.claude/commands/plan.md +10 -0
- package/.claude/commands/test.md +7 -0
- package/.claude/commands/watzup.md +8 -0
- package/.claude/hooks/telegram_notify.sh +136 -0
- package/.claude/send-discord.sh +64 -0
- package/.claude/settings.json +7 -0
- package/.claude/statusline.sh +143 -0
- package/.env.example +17 -0
- package/.github/workflows/publish.yml +51 -0
- package/.releaserc.json +26 -0
- package/.serena/project.yml +68 -0
- package/CHANGELOG.md +27 -0
- package/CLAUDE.md +139 -0
- package/Dockerfile +28 -0
- package/LICENSE +21 -0
- package/QUICKSTART.md +97 -0
- package/README.md +234 -0
- package/bun.lock +1555 -0
- package/bunfig.toml +15 -0
- package/dist/index.js +24568 -0
- package/docs/codebase-structure-architecture-code-standards.md +416 -0
- package/docs/codebase-summary.md +321 -0
- package/docs/project-overview-pdr.md +270 -0
- package/examples/debugging-session.ts +96 -0
- package/inspector-wrapper.mjs +33 -0
- package/package.json +47 -0
- package/plans/reports/001-from-qa-engineer-to-development-team-test-suite-report.md +188 -0
- package/plans/templates/bug-fix-template.md +69 -0
- package/plans/templates/feature-implementation-template.md +84 -0
- package/plans/templates/refactor-template.md +82 -0
- package/plans/templates/template-usage-guide.md +58 -0
- package/src/index.ts +5 -0
- package/src/prompts/debugging-prompts.ts +149 -0
- package/src/prompts/index.ts +55 -0
- package/src/resources/documentation.ts +316 -0
- package/src/resources/index.ts +49 -0
- package/src/server.ts +36 -0
- package/src/tools/eyes/index.ts +225 -0
- package/src/tools/eyes/processors/gif.ts +137 -0
- package/src/tools/eyes/processors/image.ts +123 -0
- package/src/tools/eyes/processors/video.ts +135 -0
- package/src/tools/eyes/schemas.ts +51 -0
- package/src/tools/eyes/utils/formatters.ts +126 -0
- package/src/tools/eyes/utils/gemini-client.ts +73 -0
- package/src/types/index.ts +41 -0
- package/src/utils/config.ts +51 -0
- package/src/utils/errors.ts +40 -0
- package/src/utils/logger.ts +49 -0
- package/tests/integration/server.test.ts +24 -0
- package/tests/setup.ts +11 -0
- package/tests/unit/config.test.ts +40 -0
- package/tests/unit/formatters.test.ts +85 -0
- package/tsconfig.json +26 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { processImage } from "./processors/image.js";
|
|
4
|
+
import { processVideo } from "./processors/video.js";
|
|
5
|
+
import { processGif } from "./processors/gif.js";
|
|
6
|
+
import { GeminiClient } from "./utils/gemini-client.js";
|
|
7
|
+
import {
|
|
8
|
+
EyesInputSchema,
|
|
9
|
+
CompareInputSchema,
|
|
10
|
+
type EyesInput,
|
|
11
|
+
type CompareInput
|
|
12
|
+
} from "./schemas.js";
|
|
13
|
+
import { logger } from "@/utils/logger.js";
|
|
14
|
+
import { handleError } from "@/utils/errors.js";
|
|
15
|
+
import type { Config } from "@/utils/config.js";
|
|
16
|
+
|
|
17
|
+
export async function registerEyesTool(server: McpServer, config: Config) {
|
|
18
|
+
const geminiClient = new GeminiClient(config);
|
|
19
|
+
|
|
20
|
+
// Register eyes.analyze tool
|
|
21
|
+
server.registerTool(
|
|
22
|
+
"eyes.analyze",
|
|
23
|
+
{
|
|
24
|
+
title: "Vision Analysis Tool",
|
|
25
|
+
description: "Analyze images, videos, and GIFs using AI vision capabilities",
|
|
26
|
+
inputSchema: {
|
|
27
|
+
source: z.string().describe("Path, URL, or base64 data URI of the media to analyze"),
|
|
28
|
+
type: z.enum(["image", "video", "gif"]).describe("Type of media to analyze"),
|
|
29
|
+
detail_level: z.enum(["quick", "detailed"]).optional().default("detailed").describe("Level of detail in analysis"),
|
|
30
|
+
prompt: z.string().optional().describe("Custom prompt for analysis"),
|
|
31
|
+
max_frames: z.number().optional().describe("Maximum number of frames to analyze for videos/GIFs")
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
async (args) => {
|
|
35
|
+
try {
|
|
36
|
+
return await handleAnalyze(geminiClient, args, config);
|
|
37
|
+
} catch (error) {
|
|
38
|
+
const mcpError = handleError(error);
|
|
39
|
+
logger.error(`Tool eyes.analyze error:`, mcpError);
|
|
40
|
+
|
|
41
|
+
return {
|
|
42
|
+
content: [{
|
|
43
|
+
type: "text" as const,
|
|
44
|
+
text: `Error: ${mcpError.message}`
|
|
45
|
+
}],
|
|
46
|
+
isError: true
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
// Register eyes.compare tool
|
|
53
|
+
server.registerTool(
|
|
54
|
+
"eyes.compare",
|
|
55
|
+
{
|
|
56
|
+
title: "Image Comparison Tool",
|
|
57
|
+
description: "Compare two images and identify differences",
|
|
58
|
+
inputSchema: {
|
|
59
|
+
source1: z.string().describe("Path, URL, or base64 data URI of the first image"),
|
|
60
|
+
source2: z.string().describe("Path, URL, or base64 data URI of the second image"),
|
|
61
|
+
comparison_type: z.enum(["pixel", "structural", "semantic"]).optional().default("semantic").describe("Type of comparison to perform")
|
|
62
|
+
}
|
|
63
|
+
},
|
|
64
|
+
async (args) => {
|
|
65
|
+
try {
|
|
66
|
+
return await handleCompare(geminiClient, args);
|
|
67
|
+
} catch (error) {
|
|
68
|
+
const mcpError = handleError(error);
|
|
69
|
+
logger.error(`Tool eyes.compare error:`, mcpError);
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
content: [{
|
|
73
|
+
type: "text" as const,
|
|
74
|
+
text: `Error: ${mcpError.message}`
|
|
75
|
+
}],
|
|
76
|
+
isError: true
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function handleAnalyze(
|
|
84
|
+
geminiClient: GeminiClient,
|
|
85
|
+
args: unknown,
|
|
86
|
+
config: Config
|
|
87
|
+
) {
|
|
88
|
+
const input = EyesInputSchema.parse(args) as EyesInput;
|
|
89
|
+
const { source, type, detail_level } = input;
|
|
90
|
+
|
|
91
|
+
logger.info(`Analyzing ${type} with detail level: ${detail_level}`);
|
|
92
|
+
|
|
93
|
+
const model = geminiClient.getModel(detail_level || "detailed");
|
|
94
|
+
const options = {
|
|
95
|
+
...input,
|
|
96
|
+
fetchTimeout: config.server.fetchTimeout
|
|
97
|
+
};
|
|
98
|
+
let result;
|
|
99
|
+
|
|
100
|
+
switch (type) {
|
|
101
|
+
case "image":
|
|
102
|
+
result = await processImage(model, source, options);
|
|
103
|
+
break;
|
|
104
|
+
case "video":
|
|
105
|
+
result = await processVideo(model, source, options);
|
|
106
|
+
break;
|
|
107
|
+
case "gif":
|
|
108
|
+
result = await processGif(model, source, options);
|
|
109
|
+
break;
|
|
110
|
+
default:
|
|
111
|
+
throw new Error(`Unsupported media type: ${type}`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
content: [
|
|
116
|
+
{
|
|
117
|
+
type: "text" as const,
|
|
118
|
+
text: result.analysis
|
|
119
|
+
}
|
|
120
|
+
],
|
|
121
|
+
isError: false
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
async function handleCompare(
|
|
126
|
+
geminiClient: GeminiClient,
|
|
127
|
+
args: unknown
|
|
128
|
+
) {
|
|
129
|
+
const input = CompareInputSchema.parse(args) as CompareInput;
|
|
130
|
+
const { source1, source2, comparison_type } = input;
|
|
131
|
+
|
|
132
|
+
logger.info(`Comparing images with type: ${comparison_type}`);
|
|
133
|
+
|
|
134
|
+
const model = geminiClient.getModel("detailed");
|
|
135
|
+
|
|
136
|
+
const prompt = `Compare these two images and identify the differences. Focus on:
|
|
137
|
+
|
|
138
|
+
${comparison_type === "pixel" ?
|
|
139
|
+
"- Exact pixel-level differences\n- Color value changes\n- Any visual artifacts or rendering differences" :
|
|
140
|
+
comparison_type === "structural" ?
|
|
141
|
+
"- Layout changes\n- Element positioning differences\n- Size and proportion changes\n- Structural modifications" :
|
|
142
|
+
"- Semantic meaning differences\n- Content changes\n- Functional differences\n- User experience impact"
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
Please provide:
|
|
146
|
+
1. SUMMARY: Brief overview of main differences
|
|
147
|
+
2. SPECIFIC DIFFERENCES: Detailed list of changes found
|
|
148
|
+
3. IMPACT ASSESSMENT: How these differences might affect users
|
|
149
|
+
4. RECOMMENDATIONS: Suggested actions based on the differences
|
|
150
|
+
|
|
151
|
+
Be precise with locations and measurements where possible.`;
|
|
152
|
+
|
|
153
|
+
try {
|
|
154
|
+
const [image1Data, image2Data] = await Promise.all([
|
|
155
|
+
loadImageForComparison(source1),
|
|
156
|
+
loadImageForComparison(source2)
|
|
157
|
+
]);
|
|
158
|
+
|
|
159
|
+
const response = await model.generateContent([
|
|
160
|
+
{ text: prompt },
|
|
161
|
+
{
|
|
162
|
+
inlineData: {
|
|
163
|
+
mimeType: image1Data.mimeType,
|
|
164
|
+
data: image1Data.data
|
|
165
|
+
}
|
|
166
|
+
},
|
|
167
|
+
{ text: "Image 1 (above) vs Image 2 (below):" },
|
|
168
|
+
{
|
|
169
|
+
inlineData: {
|
|
170
|
+
mimeType: image2Data.mimeType,
|
|
171
|
+
data: image2Data.data
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
]);
|
|
175
|
+
|
|
176
|
+
const result = await response.response;
|
|
177
|
+
const comparisonText = result.text();
|
|
178
|
+
|
|
179
|
+
return {
|
|
180
|
+
content: [
|
|
181
|
+
{
|
|
182
|
+
type: "text" as const,
|
|
183
|
+
text: comparisonText || "No differences detected or analysis failed"
|
|
184
|
+
}
|
|
185
|
+
],
|
|
186
|
+
isError: false
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
} catch (error) {
|
|
190
|
+
throw new Error(`Failed to compare images: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
async function loadImageForComparison(source: string): Promise<{ data: string; mimeType: string }> {
|
|
195
|
+
if (source.startsWith('data:image/')) {
|
|
196
|
+
const [header, data] = source.split(',');
|
|
197
|
+
if (!header || !data) {
|
|
198
|
+
throw new Error("Invalid base64 image format");
|
|
199
|
+
}
|
|
200
|
+
const mimeMatch = header.match(/data:(image\/[^;]+)/);
|
|
201
|
+
if (!mimeMatch || !mimeMatch[1]) {
|
|
202
|
+
throw new Error("Invalid base64 image format");
|
|
203
|
+
}
|
|
204
|
+
return { data, mimeType: mimeMatch[1] };
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
208
|
+
const response = await fetch(source);
|
|
209
|
+
if (!response.ok) {
|
|
210
|
+
throw new Error(`Failed to fetch image: ${response.statusText}`);
|
|
211
|
+
}
|
|
212
|
+
const buffer = await response.arrayBuffer();
|
|
213
|
+
return {
|
|
214
|
+
data: Buffer.from(buffer).toString('base64'),
|
|
215
|
+
mimeType: response.headers.get('content-type') || 'image/jpeg'
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const fs = await import('fs/promises');
|
|
220
|
+
const buffer = await fs.readFile(source);
|
|
221
|
+
return {
|
|
222
|
+
data: buffer.toString('base64'),
|
|
223
|
+
mimeType: 'image/jpeg'
|
|
224
|
+
};
|
|
225
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { GenerativeModel } from "@google/generative-ai";
|
|
2
|
+
import sharp from "sharp";
|
|
3
|
+
import fs from "fs/promises";
|
|
4
|
+
import type { AnalysisOptions, ProcessingResult } from "@/types";
|
|
5
|
+
import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
|
|
6
|
+
import { logger } from "@/utils/logger.js";
|
|
7
|
+
import { ProcessingError } from "@/utils/errors.js";
|
|
8
|
+
|
|
9
|
+
export async function processGif(
|
|
10
|
+
model: GenerativeModel,
|
|
11
|
+
source: string,
|
|
12
|
+
options: AnalysisOptions
|
|
13
|
+
): Promise<ProcessingResult> {
|
|
14
|
+
const startTime = Date.now();
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
logger.debug(`Processing GIF: ${source.substring(0, 50)}...`);
|
|
18
|
+
|
|
19
|
+
const gifData = await loadGif(source);
|
|
20
|
+
const frames = await extractGifFrames(gifData);
|
|
21
|
+
|
|
22
|
+
if (frames.length === 0) {
|
|
23
|
+
throw new ProcessingError("No frames could be extracted from GIF");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const prompt = createPrompt(options) + `
|
|
27
|
+
|
|
28
|
+
This is an animated GIF analysis with ${frames.length} frames. Pay attention to:
|
|
29
|
+
- Animation timing and smoothness
|
|
30
|
+
- UI state transitions
|
|
31
|
+
- Loading states or progress indicators
|
|
32
|
+
- Error animations or feedback
|
|
33
|
+
- Interactive element hover states
|
|
34
|
+
- Any visual glitches in the animation`;
|
|
35
|
+
|
|
36
|
+
const mediaData = frames.map(frame => ({
|
|
37
|
+
mimeType: 'image/png',
|
|
38
|
+
data: frame
|
|
39
|
+
}));
|
|
40
|
+
|
|
41
|
+
const response = await model.generateContent([
|
|
42
|
+
{ text: prompt },
|
|
43
|
+
...mediaData.map(data => ({
|
|
44
|
+
inlineData: {
|
|
45
|
+
mimeType: data.mimeType,
|
|
46
|
+
data: data.data
|
|
47
|
+
}
|
|
48
|
+
}))
|
|
49
|
+
]);
|
|
50
|
+
|
|
51
|
+
const result = await response.response;
|
|
52
|
+
const analysisText = result.text();
|
|
53
|
+
|
|
54
|
+
if (!analysisText) {
|
|
55
|
+
throw new ProcessingError("No analysis result from Gemini");
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const parsed = parseAnalysisResponse(analysisText);
|
|
59
|
+
const processingTime = Date.now() - startTime;
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
description: parsed.description || "GIF analysis completed",
|
|
63
|
+
analysis: parsed.analysis || analysisText,
|
|
64
|
+
elements: parsed.elements || [],
|
|
65
|
+
insights: parsed.insights || [],
|
|
66
|
+
recommendations: parsed.recommendations || [],
|
|
67
|
+
metadata: {
|
|
68
|
+
processing_time_ms: processingTime,
|
|
69
|
+
model_used: model.model,
|
|
70
|
+
frames_analyzed: frames.length
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
} catch (error) {
|
|
75
|
+
logger.error("GIF processing error:", error);
|
|
76
|
+
throw new ProcessingError(`Failed to process GIF: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async function loadGif(source: string): Promise<Buffer> {
|
|
81
|
+
if (source.startsWith('data:image/gif')) {
|
|
82
|
+
const [, data] = source.split(',');
|
|
83
|
+
if (!data) {
|
|
84
|
+
throw new ProcessingError("Invalid base64 GIF format");
|
|
85
|
+
}
|
|
86
|
+
return Buffer.from(data, 'base64');
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
90
|
+
const response = await fetch(source);
|
|
91
|
+
if (!response.ok) {
|
|
92
|
+
throw new ProcessingError(`Failed to fetch GIF: ${response.statusText}`);
|
|
93
|
+
}
|
|
94
|
+
return Buffer.from(await response.arrayBuffer());
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
return await fs.readFile(source);
|
|
99
|
+
} catch (error) {
|
|
100
|
+
throw new ProcessingError(`Failed to load GIF file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function extractGifFrames(gifBuffer: Buffer): Promise<string[]> {
|
|
105
|
+
try {
|
|
106
|
+
const image = sharp(gifBuffer, { animated: true });
|
|
107
|
+
const { pages } = await image.metadata();
|
|
108
|
+
|
|
109
|
+
if (!pages || pages <= 1) {
|
|
110
|
+
const singleFrame = await image
|
|
111
|
+
.resize(512, 512, { fit: 'inside', withoutEnlargement: true })
|
|
112
|
+
.png()
|
|
113
|
+
.toBuffer();
|
|
114
|
+
return [singleFrame.toString('base64')];
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const frames: string[] = [];
|
|
118
|
+
const maxFrames = Math.min(pages, 16);
|
|
119
|
+
|
|
120
|
+
for (let i = 0; i < maxFrames; i++) {
|
|
121
|
+
const frame = await sharp(gifBuffer, {
|
|
122
|
+
animated: true,
|
|
123
|
+
page: i
|
|
124
|
+
})
|
|
125
|
+
.resize(512, 512, { fit: 'inside', withoutEnlargement: true })
|
|
126
|
+
.png()
|
|
127
|
+
.toBuffer();
|
|
128
|
+
|
|
129
|
+
frames.push(frame.toString('base64'));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return frames;
|
|
133
|
+
|
|
134
|
+
} catch (error) {
|
|
135
|
+
throw new ProcessingError(`Failed to extract GIF frames: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { GenerativeModel } from "@google/generative-ai";
|
|
2
|
+
import sharp from "sharp";
|
|
3
|
+
import fs from "fs/promises";
|
|
4
|
+
import type { AnalysisOptions, ProcessingResult } from "@/types";
|
|
5
|
+
import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
|
|
6
|
+
import { logger } from "@/utils/logger.js";
|
|
7
|
+
import { ProcessingError } from "@/utils/errors.js";
|
|
8
|
+
|
|
9
|
+
export async function processImage(
|
|
10
|
+
model: GenerativeModel,
|
|
11
|
+
source: string,
|
|
12
|
+
options: AnalysisOptions
|
|
13
|
+
): Promise<ProcessingResult> {
|
|
14
|
+
const startTime = Date.now();
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
logger.debug(`Processing image: ${source.substring(0, 50)}...`);
|
|
18
|
+
|
|
19
|
+
const { imageData, mimeType } = await loadImage(source, options.fetchTimeout);
|
|
20
|
+
const prompt = createPrompt(options);
|
|
21
|
+
|
|
22
|
+
const response = await model.generateContent([
|
|
23
|
+
{ text: prompt },
|
|
24
|
+
{
|
|
25
|
+
inlineData: {
|
|
26
|
+
mimeType,
|
|
27
|
+
data: imageData
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
]);
|
|
31
|
+
|
|
32
|
+
const result = await response.response;
|
|
33
|
+
const analysisText = result.text();
|
|
34
|
+
|
|
35
|
+
if (!analysisText) {
|
|
36
|
+
throw new ProcessingError("No analysis result from Gemini");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const parsed = parseAnalysisResponse(analysisText);
|
|
40
|
+
const processingTime = Date.now() - startTime;
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
description: parsed.description || "Image analysis completed",
|
|
44
|
+
analysis: parsed.analysis || analysisText,
|
|
45
|
+
elements: parsed.elements || [],
|
|
46
|
+
insights: parsed.insights || [],
|
|
47
|
+
recommendations: parsed.recommendations || [],
|
|
48
|
+
metadata: {
|
|
49
|
+
processing_time_ms: processingTime,
|
|
50
|
+
model_used: model.model,
|
|
51
|
+
}
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
} catch (error) {
|
|
55
|
+
logger.error("Image processing error:", error);
|
|
56
|
+
throw new ProcessingError(`Failed to process image: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async function loadImage(source: string, fetchTimeout?: number): Promise<{ imageData: string; mimeType: string }> {
|
|
61
|
+
if (source.startsWith('data:image/')) {
|
|
62
|
+
const [header, data] = source.split(',');
|
|
63
|
+
if (!header || !data) {
|
|
64
|
+
throw new ProcessingError("Invalid base64 image format");
|
|
65
|
+
}
|
|
66
|
+
const mimeMatch = header.match(/data:(image\/[^;]+)/);
|
|
67
|
+
if (!mimeMatch || !mimeMatch[1]) {
|
|
68
|
+
throw new ProcessingError("Invalid base64 image format");
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
imageData: data,
|
|
72
|
+
mimeType: mimeMatch[1]
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (source.startsWith('http://') || source.startsWith('https://')) {
|
|
77
|
+
const controller = new AbortController();
|
|
78
|
+
const timeoutId = setTimeout(() => controller.abort(), fetchTimeout || 30000);
|
|
79
|
+
|
|
80
|
+
try {
|
|
81
|
+
const response = await fetch(source, { signal: controller.signal });
|
|
82
|
+
clearTimeout(timeoutId);
|
|
83
|
+
|
|
84
|
+
if (!response.ok) {
|
|
85
|
+
throw new ProcessingError(`Failed to fetch image: ${response.statusText}`);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const buffer = await response.arrayBuffer();
|
|
89
|
+
const uint8Array = new Uint8Array(buffer);
|
|
90
|
+
|
|
91
|
+
const processedImage = await sharp(uint8Array)
|
|
92
|
+
.resize(1024, 1024, { fit: 'inside', withoutEnlargement: true })
|
|
93
|
+
.jpeg({ quality: 85 })
|
|
94
|
+
.toBuffer();
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
imageData: processedImage.toString('base64'),
|
|
98
|
+
mimeType: 'image/jpeg'
|
|
99
|
+
};
|
|
100
|
+
} catch (error) {
|
|
101
|
+
clearTimeout(timeoutId);
|
|
102
|
+
if (error instanceof Error && error.name === 'AbortError') {
|
|
103
|
+
throw new ProcessingError(`Fetch timeout: Failed to download image from ${source}`);
|
|
104
|
+
}
|
|
105
|
+
throw new ProcessingError(`Failed to fetch image: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
try {
|
|
110
|
+
const buffer = await fs.readFile(source);
|
|
111
|
+
const processedImage = await sharp(buffer)
|
|
112
|
+
.resize(1024, 1024, { fit: 'inside', withoutEnlargement: true })
|
|
113
|
+
.jpeg({ quality: 85 })
|
|
114
|
+
.toBuffer();
|
|
115
|
+
|
|
116
|
+
return {
|
|
117
|
+
imageData: processedImage.toString('base64'),
|
|
118
|
+
mimeType: 'image/jpeg'
|
|
119
|
+
};
|
|
120
|
+
} catch (error) {
|
|
121
|
+
throw new ProcessingError(`Failed to load image file: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import { GenerativeModel } from "@google/generative-ai";
|
|
2
|
+
import ffmpeg from "fluent-ffmpeg";
|
|
3
|
+
import fs from "fs/promises";
|
|
4
|
+
import path from "path";
|
|
5
|
+
import os from "os";
|
|
6
|
+
import sharp from "sharp";
|
|
7
|
+
import type { VideoOptions, ProcessingResult } from "@/types";
|
|
8
|
+
import { createPrompt, parseAnalysisResponse } from "../utils/formatters.js";
|
|
9
|
+
import { logger } from "@/utils/logger.js";
|
|
10
|
+
import { ProcessingError } from "@/utils/errors.js";
|
|
11
|
+
|
|
12
|
+
export async function processVideo(
|
|
13
|
+
model: GenerativeModel,
|
|
14
|
+
source: string,
|
|
15
|
+
options: VideoOptions
|
|
16
|
+
): Promise<ProcessingResult> {
|
|
17
|
+
const startTime = Date.now();
|
|
18
|
+
const maxFrames = options.max_frames || 32;
|
|
19
|
+
const sampleRate = options.sample_rate || 1;
|
|
20
|
+
|
|
21
|
+
let tempDir: string | null = null;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
logger.debug(`Processing video: ${source.substring(0, 50)}... (max ${maxFrames} frames)`);
|
|
25
|
+
|
|
26
|
+
tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'human-mcp-video-'));
|
|
27
|
+
const frames = await extractFrames(source, tempDir, maxFrames, sampleRate);
|
|
28
|
+
|
|
29
|
+
if (frames.length === 0) {
|
|
30
|
+
throw new ProcessingError("No frames could be extracted from video");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const prompt = createPrompt(options) + `
|
|
34
|
+
|
|
35
|
+
This is a video analysis with ${frames.length} frames extracted. Focus on:
|
|
36
|
+
- Temporal changes between frames
|
|
37
|
+
- Animation or transition issues
|
|
38
|
+
- Error states that appear over time
|
|
39
|
+
- UI state changes and interactions
|
|
40
|
+
- Any progressive degradation or improvement`;
|
|
41
|
+
|
|
42
|
+
const mediaData = await Promise.all(
|
|
43
|
+
frames.map(async (framePath) => {
|
|
44
|
+
const buffer = await fs.readFile(framePath);
|
|
45
|
+
const processedFrame = await sharp(buffer)
|
|
46
|
+
.resize(512, 512, { fit: 'inside', withoutEnlargement: true })
|
|
47
|
+
.jpeg({ quality: 80 })
|
|
48
|
+
.toBuffer();
|
|
49
|
+
|
|
50
|
+
return {
|
|
51
|
+
mimeType: 'image/jpeg',
|
|
52
|
+
data: processedFrame.toString('base64')
|
|
53
|
+
};
|
|
54
|
+
})
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
const response = await model.generateContent([
|
|
58
|
+
{ text: prompt },
|
|
59
|
+
...mediaData.map(data => ({
|
|
60
|
+
inlineData: {
|
|
61
|
+
mimeType: data.mimeType,
|
|
62
|
+
data: data.data
|
|
63
|
+
}
|
|
64
|
+
}))
|
|
65
|
+
]);
|
|
66
|
+
|
|
67
|
+
const result = await response.response;
|
|
68
|
+
const analysisText = result.text();
|
|
69
|
+
|
|
70
|
+
if (!analysisText) {
|
|
71
|
+
throw new ProcessingError("No analysis result from Gemini");
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const parsed = parseAnalysisResponse(analysisText);
|
|
75
|
+
const processingTime = Date.now() - startTime;
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
description: parsed.description || "Video analysis completed",
|
|
79
|
+
analysis: parsed.analysis || analysisText,
|
|
80
|
+
elements: parsed.elements || [],
|
|
81
|
+
insights: parsed.insights || [],
|
|
82
|
+
recommendations: parsed.recommendations || [],
|
|
83
|
+
metadata: {
|
|
84
|
+
processing_time_ms: processingTime,
|
|
85
|
+
model_used: model.model,
|
|
86
|
+
frames_analyzed: frames.length
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
} catch (error) {
|
|
91
|
+
logger.error("Video processing error:", error);
|
|
92
|
+
throw new ProcessingError(`Failed to process video: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
|
93
|
+
} finally {
|
|
94
|
+
if (tempDir) {
|
|
95
|
+
await fs.rm(tempDir, { recursive: true, force: true }).catch(() => {});
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async function extractFrames(
|
|
101
|
+
videoSource: string,
|
|
102
|
+
outputDir: string,
|
|
103
|
+
maxFrames: number,
|
|
104
|
+
sampleRate: number
|
|
105
|
+
): Promise<string[]> {
|
|
106
|
+
return new Promise((resolve, reject) => {
|
|
107
|
+
const framePattern = path.join(outputDir, 'frame_%04d.jpg');
|
|
108
|
+
const frames: string[] = [];
|
|
109
|
+
|
|
110
|
+
ffmpeg(videoSource)
|
|
111
|
+
.outputOptions([
|
|
112
|
+
'-vf', `fps=1/${sampleRate}`,
|
|
113
|
+
'-vframes', maxFrames.toString(),
|
|
114
|
+
'-q:v', '2'
|
|
115
|
+
])
|
|
116
|
+
.output(framePattern)
|
|
117
|
+
.on('end', async () => {
|
|
118
|
+
try {
|
|
119
|
+
const files = await fs.readdir(outputDir);
|
|
120
|
+
const frameFiles = files
|
|
121
|
+
.filter(file => file.startsWith('frame_') && file.endsWith('.jpg'))
|
|
122
|
+
.sort()
|
|
123
|
+
.map(file => path.join(outputDir, file));
|
|
124
|
+
|
|
125
|
+
resolve(frameFiles);
|
|
126
|
+
} catch (error) {
|
|
127
|
+
reject(error);
|
|
128
|
+
}
|
|
129
|
+
})
|
|
130
|
+
.on('error', (error) => {
|
|
131
|
+
reject(new ProcessingError(`FFmpeg error: ${error.message}`));
|
|
132
|
+
})
|
|
133
|
+
.run();
|
|
134
|
+
});
|
|
135
|
+
}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
export const EyesInputSchema = z.object({
|
|
4
|
+
source: z.string().describe("URL, file path, or base64 encoded content"),
|
|
5
|
+
type: z.enum(["image", "video", "gif"]).describe("Type of visual content"),
|
|
6
|
+
analysis_type: z.enum([
|
|
7
|
+
"general",
|
|
8
|
+
"ui_debug",
|
|
9
|
+
"error_detection",
|
|
10
|
+
"accessibility",
|
|
11
|
+
"performance",
|
|
12
|
+
"layout"
|
|
13
|
+
]).default("general"),
|
|
14
|
+
detail_level: z.enum(["quick", "detailed"]).default("detailed"),
|
|
15
|
+
specific_focus: z.string().optional().describe("Specific areas or elements to focus on"),
|
|
16
|
+
extract_text: z.boolean().default(true),
|
|
17
|
+
detect_ui_elements: z.boolean().default(true),
|
|
18
|
+
analyze_colors: z.boolean().default(false),
|
|
19
|
+
check_accessibility: z.boolean().default(false)
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
export const EyesOutputSchema = z.object({
|
|
23
|
+
analysis: z.string(),
|
|
24
|
+
detected_elements: z.array(z.object({
|
|
25
|
+
type: z.string(),
|
|
26
|
+
location: z.object({
|
|
27
|
+
x: z.number(),
|
|
28
|
+
y: z.number(),
|
|
29
|
+
width: z.number(),
|
|
30
|
+
height: z.number()
|
|
31
|
+
}),
|
|
32
|
+
properties: z.record(z.any())
|
|
33
|
+
})),
|
|
34
|
+
debugging_insights: z.array(z.string()),
|
|
35
|
+
recommendations: z.array(z.string()),
|
|
36
|
+
metadata: z.object({
|
|
37
|
+
processing_time_ms: z.number(),
|
|
38
|
+
model_used: z.string(),
|
|
39
|
+
frames_analyzed: z.number().optional()
|
|
40
|
+
})
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
export const CompareInputSchema = z.object({
|
|
44
|
+
source1: z.string(),
|
|
45
|
+
source2: z.string(),
|
|
46
|
+
comparison_type: z.enum(["pixel", "structural", "semantic"]).default("semantic")
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
export type EyesInput = z.infer<typeof EyesInputSchema>;
|
|
50
|
+
export type EyesOutput = z.infer<typeof EyesOutputSchema>;
|
|
51
|
+
export type CompareInput = z.infer<typeof CompareInputSchema>;
|