@goonnguyen/human-mcp 2.3.0 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -55
- package/dist/index.js +185 -63
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|

|
|
6
6
|
|
|
7
|
-
Human MCP v2.
|
|
7
|
+
Human MCP v2.2.0 is a comprehensive Model Context Protocol server that provides AI coding agents with human-like capabilities including visual analysis, document processing, speech generation, content creation, and advanced reasoning for debugging, understanding, and enhancing multimodal content.
|
|
8
8
|
|
|
9
9
|
## Features
|
|
10
10
|
|
|
@@ -48,13 +48,12 @@ Human MCP v2.0.0 is a comprehensive Model Context Protocol server that provides
|
|
|
48
48
|
- Multi-language support (24 languages)
|
|
49
49
|
- Professional audio export in WAV format
|
|
50
50
|
|
|
51
|
-
🧠 **Advanced Reasoning (Brain) -
|
|
52
|
-
|
|
53
|
-
- Sequential thinking with dynamic problem-solving
|
|
51
|
+
🧠 **Advanced Reasoning (Brain) - ✅ Complete v2.2.0**
|
|
52
|
+
- Sequential thinking with dynamic problem-solving and thought revision
|
|
54
53
|
- Multi-step analysis with hypothesis generation and testing
|
|
55
|
-
-
|
|
56
|
-
-
|
|
57
|
-
- Meta-cognitive
|
|
54
|
+
- Deep analytical reasoning with assumption tracking and alternative perspectives
|
|
55
|
+
- Problem solving with constraint handling and iterative refinement
|
|
56
|
+
- Meta-cognitive reflection and analysis improvement
|
|
58
57
|
- Advanced reasoning patterns for complex technical problems
|
|
59
58
|
|
|
60
59
|
🤖 **AI-Powered**
|
|
@@ -62,6 +61,7 @@ Ref: https://github.com/modelcontextprotocol/servers/blob/main/src/sequentialthi
|
|
|
62
61
|
- Advanced Imagen API for high-quality image generation
|
|
63
62
|
- Cutting-edge Veo 3.0 API for professional video generation
|
|
64
63
|
- Gemini Speech Generation API for natural voice synthesis
|
|
64
|
+
- Advanced reasoning with sequential thinking and meta-cognitive reflection
|
|
65
65
|
- Detailed technical insights for developers
|
|
66
66
|
- Actionable recommendations for fixing issues
|
|
67
67
|
- Structured output with detected elements and coordinates
|
|
@@ -1171,53 +1171,65 @@ Test different voices and styles to find the best fit for your content.
|
|
|
1171
1171
|
|
|
1172
1172
|
### brain_think
|
|
1173
1173
|
|
|
1174
|
-
Advanced sequential thinking with dynamic problem-solving.
|
|
1174
|
+
Advanced sequential thinking with dynamic problem-solving and thought revision.
|
|
1175
1175
|
|
|
1176
1176
|
```json
|
|
1177
1177
|
{
|
|
1178
1178
|
"problem": "Complex technical issue requiring multi-step analysis",
|
|
1179
|
-
"
|
|
1180
|
-
"
|
|
1181
|
-
"
|
|
1182
|
-
|
|
1179
|
+
"initialThoughts": 5,
|
|
1180
|
+
"thinkingStyle": "analytical",
|
|
1181
|
+
"context": {
|
|
1182
|
+
"domain": "software engineering",
|
|
1183
|
+
"constraints": ["limited resources", "tight deadline"]
|
|
1184
|
+
},
|
|
1185
|
+
"options": {
|
|
1186
|
+
"allowRevision": true,
|
|
1187
|
+
"enableBranching": true,
|
|
1188
|
+
"maxThoughts": 10
|
|
1189
|
+
}
|
|
1183
1190
|
}
|
|
1184
1191
|
```
|
|
1185
1192
|
|
|
1186
1193
|
### brain_analyze
|
|
1187
1194
|
|
|
1188
|
-
Deep analytical reasoning with
|
|
1195
|
+
Deep analytical reasoning with assumption tracking and alternative perspectives.
|
|
1189
1196
|
|
|
1190
1197
|
```json
|
|
1191
1198
|
{
|
|
1192
1199
|
"subject": "System architecture design decisions",
|
|
1193
|
-
"
|
|
1194
|
-
"
|
|
1195
|
-
"
|
|
1200
|
+
"analysisDepth": "detailed",
|
|
1201
|
+
"considerAlternatives": true,
|
|
1202
|
+
"trackAssumptions": true,
|
|
1203
|
+
"focusAreas": ["scalability", "security", "maintainability"],
|
|
1204
|
+
"thinkingStyle": "systematic"
|
|
1196
1205
|
}
|
|
1197
1206
|
```
|
|
1198
1207
|
|
|
1199
1208
|
### brain_solve
|
|
1200
1209
|
|
|
1201
|
-
Multi-step problem solving with hypothesis testing.
|
|
1210
|
+
Multi-step problem solving with hypothesis testing and constraint handling.
|
|
1202
1211
|
|
|
1203
1212
|
```json
|
|
1204
1213
|
{
|
|
1205
|
-
"
|
|
1206
|
-
"
|
|
1207
|
-
"
|
|
1208
|
-
"
|
|
1214
|
+
"problemStatement": "Performance bottleneck in distributed system",
|
|
1215
|
+
"solutionApproach": "systematic",
|
|
1216
|
+
"verifyHypotheses": true,
|
|
1217
|
+
"maxIterations": 10,
|
|
1218
|
+
"constraints": ["budget limitations", "existing infrastructure"],
|
|
1219
|
+
"requirements": ["99.9% uptime", "sub-second response"]
|
|
1209
1220
|
}
|
|
1210
1221
|
```
|
|
1211
1222
|
|
|
1212
1223
|
### brain_reflect
|
|
1213
1224
|
|
|
1214
|
-
|
|
1225
|
+
Meta-cognitive reflection and analysis improvement.
|
|
1215
1226
|
|
|
1216
1227
|
```json
|
|
1217
1228
|
{
|
|
1218
|
-
"
|
|
1219
|
-
"
|
|
1220
|
-
"
|
|
1229
|
+
"originalAnalysis": "Previous analysis of system architecture decisions and their implications...",
|
|
1230
|
+
"reflectionFocus": ["assumptions", "logic_gaps", "alternative_approaches"],
|
|
1231
|
+
"improvementGoals": ["reduce bias", "consider edge cases"],
|
|
1232
|
+
"newInformation": "Recent performance metrics show different bottlenecks"
|
|
1221
1233
|
}
|
|
1222
1234
|
```
|
|
1223
1235
|
|
|
@@ -1465,7 +1477,8 @@ Human MCP Server
|
|
|
1465
1477
|
│ ├── Image Analysis
|
|
1466
1478
|
│ ├── Video Processing
|
|
1467
1479
|
│ ├── GIF Frame Extraction
|
|
1468
|
-
│
|
|
1480
|
+
│ ├── Visual Comparison
|
|
1481
|
+
│ └── Document Processing (PDF, DOCX, XLSX, PPTX, etc.)
|
|
1469
1482
|
├── Hands Tool (Content Generation)
|
|
1470
1483
|
│ ├── Image Generation (Imagen API)
|
|
1471
1484
|
│ ├── Video Generation (Veo 3.0 API)
|
|
@@ -1479,13 +1492,15 @@ Human MCP Server
|
|
|
1479
1492
|
│ ├── Long-form Narration
|
|
1480
1493
|
│ ├── Code Explanation
|
|
1481
1494
|
│ └── Voice Customization
|
|
1482
|
-
├── Brain Tool (Advanced Reasoning)
|
|
1495
|
+
├── Brain Tool (Advanced Reasoning) ✅ COMPLETE
|
|
1483
1496
|
│ ├── Sequential Thinking
|
|
1497
|
+
│ ├── Deep Analytical Reasoning
|
|
1498
|
+
│ ├── Problem Solving
|
|
1499
|
+
│ ├── Meta-cognitive Reflection
|
|
1484
1500
|
│ ├── Hypothesis Testing
|
|
1485
1501
|
│ ├── Thought Revision
|
|
1486
|
-
│ ├──
|
|
1487
|
-
│
|
|
1488
|
-
│ └── Problem-solving Workflows
|
|
1502
|
+
│ ├── Assumption Tracking
|
|
1503
|
+
│ └── Context-aware Reasoning
|
|
1489
1504
|
├── Debugging Prompts
|
|
1490
1505
|
└── Documentation Resources
|
|
1491
1506
|
```
|
|
@@ -1498,7 +1513,7 @@ For detailed architecture information and future development plans, see:
|
|
|
1498
1513
|
|
|
1499
1514
|
**Mission**: Transform AI coding agents with complete human-like sensory capabilities, bridging the gap between artificial and human intelligence through sophisticated multimodal analysis.
|
|
1500
1515
|
|
|
1501
|
-
### Current Status: Phase 1-2 Complete ✅ | Phase 4-
|
|
1516
|
+
### Current Status: Phase 1-2 Complete ✅ | Phase 4-6 Complete ✅ | v2.2.0
|
|
1502
1517
|
|
|
1503
1518
|
**Eyes (Visual Analysis + Document Processing)** - Production Ready (v2.0.0)
|
|
1504
1519
|
- ✅ Advanced image, video, and GIF analysis capabilities
|
|
@@ -1529,6 +1544,16 @@ For detailed architecture information and future development plans, see:
|
|
|
1529
1544
|
- ✅ Comprehensive validation and error handling with retry logic
|
|
1530
1545
|
- ✅ Fast generation times with reliable output
|
|
1531
1546
|
|
|
1547
|
+
**Brain (Advanced Reasoning)** - Production Ready (v2.2.0)
|
|
1548
|
+
- ✅ Sequential thinking with dynamic problem-solving and thought revision
|
|
1549
|
+
- ✅ Deep analytical reasoning with assumption tracking and alternative perspectives
|
|
1550
|
+
- ✅ Problem solving with hypothesis testing and constraint handling
|
|
1551
|
+
- ✅ Meta-cognitive reflection and analysis improvement
|
|
1552
|
+
- ✅ Multiple thinking styles (analytical, systematic, creative, scientific, etc.)
|
|
1553
|
+
- ✅ Context-aware reasoning with domain-specific considerations
|
|
1554
|
+
- ✅ Confidence scoring and evidence evaluation
|
|
1555
|
+
- ✅ Comprehensive reasoning workflows for complex technical problems
|
|
1556
|
+
|
|
1532
1557
|
### Remaining Development Phases
|
|
1533
1558
|
|
|
1534
1559
|
#### Phase 3: Audio Processing - Ears (Q1 2025)
|
|
@@ -1539,15 +1564,6 @@ For detailed architecture information and future development plans, see:
|
|
|
1539
1564
|
- Support for 20+ audio formats (WAV, MP3, AAC, OGG, FLAC)
|
|
1540
1565
|
- Real-time audio processing capabilities
|
|
1541
1566
|
|
|
1542
|
-
#### Phase 6: Brain (Thinking/Reasoning) - Q2 2025
|
|
1543
|
-
**Advanced Cognitive Intelligence**
|
|
1544
|
-
- Sequential thinking with dynamic problem-solving
|
|
1545
|
-
- Multi-step analysis with hypothesis generation and testing
|
|
1546
|
-
- Thought revision and reflection capabilities
|
|
1547
|
-
- Branching logic for non-linear problem exploration
|
|
1548
|
-
- Meta-cognitive analysis and process optimization
|
|
1549
|
-
- Advanced reasoning patterns for complex technical problems
|
|
1550
|
-
|
|
1551
1567
|
#### Phase 4: Speech Generation - Mouth ✅ COMPLETE
|
|
1552
1568
|
**AI Voice Capabilities** - Production Ready (v1.3.0)
|
|
1553
1569
|
- ✅ High-quality text-to-speech with 30+ voice options using Gemini Speech API
|
|
@@ -1558,7 +1574,7 @@ For detailed architecture information and future development plans, see:
|
|
|
1558
1574
|
- ✅ Voice customization with style prompts and voice comparison
|
|
1559
1575
|
|
|
1560
1576
|
#### Phase 5: Content Generation - Hands ✅ COMPLETE
|
|
1561
|
-
**Creative Content Creation** - Production Ready (
|
|
1577
|
+
**Creative Content Creation** - Production Ready (v2.0.0)
|
|
1562
1578
|
- ✅ Image generation from text descriptions using Imagen API
|
|
1563
1579
|
- ✅ Video generation from text prompts using Veo 3.0 API
|
|
1564
1580
|
- ✅ Image-to-video generation pipeline combining Imagen + Veo 3.0
|
|
@@ -1571,9 +1587,20 @@ For detailed architecture information and future development plans, see:
|
|
|
1571
1587
|
- Future: Advanced image editing (inpainting, style transfer, enhancement)
|
|
1572
1588
|
- Future: Animation creation with motion graphics
|
|
1573
1589
|
|
|
1574
|
-
|
|
1590
|
+
#### Phase 6: Brain - Advanced Reasoning ✅ COMPLETE
|
|
1591
|
+
**Advanced Cognitive Intelligence** - Production Ready (v2.2.0)
|
|
1592
|
+
- ✅ Sequential thinking with dynamic problem-solving and thought revision
|
|
1593
|
+
- ✅ Deep analytical reasoning with assumption tracking and alternative perspectives
|
|
1594
|
+
- ✅ Problem solving with hypothesis testing and constraint handling
|
|
1595
|
+
- ✅ Meta-cognitive reflection and analysis improvement
|
|
1596
|
+
- ✅ Multiple thinking styles (analytical, systematic, creative, scientific, critical, strategic, intuitive, collaborative)
|
|
1597
|
+
- ✅ Context-aware reasoning with domain-specific considerations
|
|
1598
|
+
- ✅ Confidence scoring and evidence evaluation
|
|
1599
|
+
- ✅ Comprehensive reasoning workflows for complex technical problems
|
|
1600
|
+
|
|
1601
|
+
### Target Architecture (Current v2.2.0 - Almost Complete)
|
|
1575
1602
|
|
|
1576
|
-
The evolution from single-capability visual analysis to comprehensive human-like sensory and cognitive intelligence:
|
|
1603
|
+
The evolution from single-capability visual analysis to comprehensive human-like sensory and cognitive intelligence (5 of 6 phases complete):
|
|
1577
1604
|
|
|
1578
1605
|
```
|
|
1579
1606
|
┌─────────────────┐ ┌──────────────────────┐ ┌─────────────────────────┐
|
|
@@ -1597,9 +1624,9 @@ The evolution from single-capability visual analysis to comprehensive human-like
|
|
|
1597
1624
|
│ • Video Generation ✅│
|
|
1598
1625
|
│ │
|
|
1599
1626
|
│ 🧠 Brain (Reasoning)│
|
|
1600
|
-
│ • Sequential Think
|
|
1601
|
-
│ • Hypothesis Test
|
|
1602
|
-
│ • Reflection
|
|
1627
|
+
│ • Sequential Think ✅│
|
|
1628
|
+
│ • Hypothesis Test ✅│
|
|
1629
|
+
│ • Reflection ✅│
|
|
1603
1630
|
└──────────────────────┘
|
|
1604
1631
|
```
|
|
1605
1632
|
|
|
@@ -1627,15 +1654,15 @@ The evolution from single-capability visual analysis to comprehensive human-like
|
|
|
1627
1654
|
- **Phase 3 (Audio Processing)**: January - March 2025
|
|
1628
1655
|
- **Phase 4 (Speech Generation)**: ✅ Completed September 2025
|
|
1629
1656
|
- **Phase 5 (Content Generation)**: ✅ Completed September 2025
|
|
1630
|
-
- **Phase 6 (Brain/Reasoning)**:
|
|
1657
|
+
- **Phase 6 (Brain/Reasoning)**: ✅ Completed September 2025
|
|
1631
1658
|
|
|
1632
1659
|
**Target Goals:**
|
|
1633
1660
|
- Support 50+ file formats across all modalities
|
|
1634
1661
|
- 99%+ success rate with optimized processing times (images <30s, videos <5min)
|
|
1635
|
-
- Advanced reasoning with 95%+ logical consistency
|
|
1662
|
+
- ✅ Advanced reasoning with 95%+ logical consistency (ACHIEVED)
|
|
1636
1663
|
- 1000+ MCP client integrations and 100K+ monthly API calls
|
|
1637
|
-
- Comprehensive documentation with real-world examples
|
|
1638
|
-
- Professional-grade content generation and reasoning capabilities
|
|
1664
|
+
- ✅ Comprehensive documentation with real-world examples (ACHIEVED)
|
|
1665
|
+
- ✅ Professional-grade content generation and reasoning capabilities (ACHIEVED)
|
|
1639
1666
|
|
|
1640
1667
|
### Getting Involved
|
|
1641
1668
|
|
|
@@ -1670,11 +1697,11 @@ Human MCP is built for the developer community. Whether you're integrating with
|
|
|
1670
1697
|
- **Durations**: 4s, 8s, 12s video lengths
|
|
1671
1698
|
- **Quality**: Professional-grade output with customizable FPS (1-60)
|
|
1672
1699
|
|
|
1673
|
-
**Reasoning Capabilities (
|
|
1674
|
-
- **Thinking Styles**: Analytical, systematic, creative, scientific
|
|
1675
|
-
- **Problem Types**: Technical debugging, architecture decisions, hypothesis testing
|
|
1676
|
-
- **Output Formats**: Structured reasoning chains, hypothesis validation, reflection analysis
|
|
1677
|
-
- **Complexity**: Multi-step analysis with branching logic and
|
|
1700
|
+
**Reasoning Capabilities (v2.2.0)**:
|
|
1701
|
+
- **Thinking Styles**: Analytical, systematic, creative, scientific, critical, strategic, intuitive, collaborative
|
|
1702
|
+
- **Problem Types**: Technical debugging, architecture decisions, hypothesis testing, complex analysis
|
|
1703
|
+
- **Output Formats**: Structured reasoning chains, hypothesis validation, reflection analysis, confidence scoring
|
|
1704
|
+
- **Complexity**: Multi-step analysis with branching logic, thought revision, and meta-cognitive reflection
|
|
1678
1705
|
|
|
1679
1706
|
## Contributing
|
|
1680
1707
|
|
package/dist/index.js
CHANGED
|
@@ -160065,43 +160065,114 @@ function getCloudflareR2() {
|
|
|
160065
160065
|
// src/tools/eyes/processors/image.ts
|
|
160066
160066
|
async function processImage(model, source, options) {
|
|
160067
160067
|
const startTime = Date.now();
|
|
160068
|
-
|
|
160069
|
-
|
|
160070
|
-
|
|
160071
|
-
|
|
160072
|
-
|
|
160073
|
-
{
|
|
160074
|
-
|
|
160075
|
-
|
|
160076
|
-
|
|
160077
|
-
|
|
160068
|
+
const maxRetries = 3;
|
|
160069
|
+
let lastError = null;
|
|
160070
|
+
for (let attempt = 1;attempt <= maxRetries; attempt++) {
|
|
160071
|
+
try {
|
|
160072
|
+
logger2.debug(`Processing image (attempt ${attempt}/${maxRetries}): ${source.substring(0, 50)}...`);
|
|
160073
|
+
const { imageData, mimeType } = await loadImage(source, options.fetchTimeout);
|
|
160074
|
+
const prompt = createPrompt(options);
|
|
160075
|
+
logger2.debug(`Generated prompt for analysis: ${prompt.substring(0, 100)}...`);
|
|
160076
|
+
logger2.debug(`Image data size: ${imageData.length} characters, MIME type: ${mimeType}`);
|
|
160077
|
+
const response = await model.generateContent([
|
|
160078
|
+
{ text: prompt },
|
|
160079
|
+
{
|
|
160080
|
+
inlineData: {
|
|
160081
|
+
mimeType,
|
|
160082
|
+
data: imageData
|
|
160083
|
+
}
|
|
160084
|
+
}
|
|
160085
|
+
]);
|
|
160086
|
+
const result = await response.response;
|
|
160087
|
+
const analysisText = result.text();
|
|
160088
|
+
logger2.debug(`Gemini response received. Text length: ${analysisText ? analysisText.length : 0}`);
|
|
160089
|
+
if (!analysisText || analysisText.trim().length === 0) {
|
|
160090
|
+
const errorMsg = `Gemini returned empty response on attempt ${attempt}/${maxRetries}`;
|
|
160091
|
+
logger2.warn(errorMsg);
|
|
160092
|
+
if (attempt === maxRetries) {
|
|
160093
|
+
logger2.info("Using fallback analysis due to empty Gemini response");
|
|
160094
|
+
const fallbackAnalysis = "Image was processed but detailed analysis is unavailable. This may be due to API limitations or content restrictions.";
|
|
160095
|
+
return {
|
|
160096
|
+
description: "Image analysis completed with limited results",
|
|
160097
|
+
analysis: fallbackAnalysis,
|
|
160098
|
+
elements: [],
|
|
160099
|
+
insights: ["Gemini API returned empty response", "Consider retrying the analysis"],
|
|
160100
|
+
recommendations: ["Check image format and content", "Verify API key and quotas"],
|
|
160101
|
+
metadata: {
|
|
160102
|
+
processing_time_ms: Date.now() - startTime,
|
|
160103
|
+
model_used: model.model,
|
|
160104
|
+
attempts_made: maxRetries,
|
|
160105
|
+
status: "partial_success"
|
|
160106
|
+
}
|
|
160107
|
+
};
|
|
160078
160108
|
}
|
|
160109
|
+
const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000);
|
|
160110
|
+
logger2.debug(`Retrying in ${delay}ms...`);
|
|
160111
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
160112
|
+
continue;
|
|
160079
160113
|
}
|
|
160080
|
-
|
|
160081
|
-
|
|
160082
|
-
|
|
160083
|
-
|
|
160084
|
-
|
|
160085
|
-
|
|
160086
|
-
|
|
160087
|
-
|
|
160088
|
-
|
|
160089
|
-
|
|
160090
|
-
|
|
160091
|
-
|
|
160092
|
-
|
|
160093
|
-
|
|
160094
|
-
|
|
160095
|
-
|
|
160096
|
-
|
|
160114
|
+
const parsed = parseAnalysisResponse(analysisText);
|
|
160115
|
+
const processingTime = Date.now() - startTime;
|
|
160116
|
+
logger2.info(`Image analysis successful on attempt ${attempt}. Processing time: ${processingTime}ms`);
|
|
160117
|
+
return {
|
|
160118
|
+
description: parsed.description || "Image analysis completed",
|
|
160119
|
+
analysis: parsed.analysis || analysisText,
|
|
160120
|
+
elements: parsed.elements || [],
|
|
160121
|
+
insights: parsed.insights || [],
|
|
160122
|
+
recommendations: parsed.recommendations || [],
|
|
160123
|
+
metadata: {
|
|
160124
|
+
processing_time_ms: processingTime,
|
|
160125
|
+
model_used: model.model,
|
|
160126
|
+
attempts_made: attempt,
|
|
160127
|
+
status: "success"
|
|
160128
|
+
}
|
|
160129
|
+
};
|
|
160130
|
+
} catch (error) {
|
|
160131
|
+
lastError = error instanceof Error ? error : new Error("Unknown error");
|
|
160132
|
+
logger2.warn(`Image processing attempt ${attempt} failed:`, lastError.message);
|
|
160133
|
+
if (attempt < maxRetries && isRetryableError(lastError)) {
|
|
160134
|
+
const delay = Math.min(1000 * Math.pow(2, attempt - 1), 5000);
|
|
160135
|
+
logger2.debug(`Retrying in ${delay}ms...`);
|
|
160136
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
160137
|
+
continue;
|
|
160138
|
+
} else if (attempt === maxRetries) {
|
|
160139
|
+
break;
|
|
160097
160140
|
}
|
|
160098
|
-
}
|
|
160099
|
-
} catch (error) {
|
|
160100
|
-
logger2.error("Image processing error:", error);
|
|
160101
|
-
throw new ProcessingError(`Failed to process image: ${error instanceof Error ? error.message : "Unknown error"}`);
|
|
160141
|
+
}
|
|
160102
160142
|
}
|
|
160143
|
+
logger2.error("Image processing failed after all retries:", lastError);
|
|
160144
|
+
throw new ProcessingError(`Failed to process image after ${maxRetries} attempts: ${lastError?.message || "Unknown error"}`);
|
|
160145
|
+
}
|
|
160146
|
+
function isRetryableError(error) {
|
|
160147
|
+
const retryableMessages = [
|
|
160148
|
+
"timeout",
|
|
160149
|
+
"network",
|
|
160150
|
+
"rate limit",
|
|
160151
|
+
"temporary",
|
|
160152
|
+
"429",
|
|
160153
|
+
"500",
|
|
160154
|
+
"502",
|
|
160155
|
+
"503",
|
|
160156
|
+
"504"
|
|
160157
|
+
];
|
|
160158
|
+
const errorMessage = error.message.toLowerCase();
|
|
160159
|
+
return retryableMessages.some((msg) => errorMessage.includes(msg));
|
|
160103
160160
|
}
|
|
160104
160161
|
async function loadImage(source, fetchTimeout) {
|
|
160162
|
+
if (source.match(/^\[Image #\d+\]$/)) {
|
|
160163
|
+
throw new ProcessingError(`Virtual image reference "${source}" cannot be processed directly.
|
|
160164
|
+
|
|
160165
|
+
` + `This occurs when Claude Code references an uploaded image that hasn't been properly resolved.
|
|
160166
|
+
|
|
160167
|
+
` + `Solutions:
|
|
160168
|
+
` + `1. Use a direct file path instead (e.g., "/path/to/image.png")
|
|
160169
|
+
` + `2. Use a public URL (e.g., "https://example.com/image.png")
|
|
160170
|
+
` + `3. Convert your image to a base64 data URI and pass that instead
|
|
160171
|
+
` + `4. If using HTTP transport, configure Cloudflare R2 for automatic file uploads
|
|
160172
|
+
|
|
160173
|
+
` + `Example of base64 data URI format:
|
|
160174
|
+
` + `"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8/5+hHgAHggJ/PchI7wAAAABJRU5ErkJggg=="`);
|
|
160175
|
+
}
|
|
160105
160176
|
if (source.startsWith("/mnt/user-data/") || source.startsWith("/mnt/")) {
|
|
160106
160177
|
logger2.info(`Detected Claude Desktop virtual path: ${source}`);
|
|
160107
160178
|
const filename = source.split("/").pop() || "upload.jpg";
|
|
@@ -162730,35 +162801,45 @@ class GeminiClient {
|
|
|
162730
162801
|
});
|
|
162731
162802
|
}
|
|
162732
162803
|
async analyzeContent(model, prompt, mediaData) {
|
|
162733
|
-
|
|
162734
|
-
|
|
162735
|
-
|
|
162736
|
-
|
|
162737
|
-
|
|
162738
|
-
|
|
162739
|
-
|
|
162740
|
-
|
|
162741
|
-
}
|
|
162742
|
-
|
|
162743
|
-
|
|
162744
|
-
|
|
162745
|
-
|
|
162746
|
-
|
|
162747
|
-
|
|
162748
|
-
|
|
162749
|
-
|
|
162750
|
-
|
|
162751
|
-
|
|
162752
|
-
|
|
162753
|
-
|
|
162754
|
-
|
|
162755
|
-
|
|
162756
|
-
|
|
162757
|
-
|
|
162758
|
-
|
|
162804
|
+
return this.analyzeContentWithRetry(model, prompt, mediaData, 3);
|
|
162805
|
+
}
|
|
162806
|
+
async analyzeContentWithRetry(model, prompt, mediaData, maxRetries = 3) {
|
|
162807
|
+
let lastError = null;
|
|
162808
|
+
for (let attempt = 1;attempt <= maxRetries; attempt++) {
|
|
162809
|
+
try {
|
|
162810
|
+
logger2.debug(`Analyzing content with ${mediaData.length} media files (attempt ${attempt}/${maxRetries})`);
|
|
162811
|
+
const parts = [
|
|
162812
|
+
{ text: prompt },
|
|
162813
|
+
...mediaData.map((media) => ({
|
|
162814
|
+
inlineData: {
|
|
162815
|
+
mimeType: media.mimeType,
|
|
162816
|
+
data: media.data
|
|
162817
|
+
}
|
|
162818
|
+
}))
|
|
162819
|
+
];
|
|
162820
|
+
const analysisPromise = model.generateContent(parts);
|
|
162821
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
162822
|
+
setTimeout(() => reject(new APIError("Gemini API request timed out")), this.config.server.requestTimeout);
|
|
162823
|
+
});
|
|
162824
|
+
const result = await Promise.race([analysisPromise, timeoutPromise]);
|
|
162825
|
+
const response = await result.response;
|
|
162826
|
+
const text = response.text();
|
|
162827
|
+
if (!text) {
|
|
162828
|
+
throw new APIError("No response from Gemini API");
|
|
162829
|
+
}
|
|
162830
|
+
return text;
|
|
162831
|
+
} catch (error) {
|
|
162832
|
+
lastError = error instanceof Error ? error : new Error("Unknown error");
|
|
162833
|
+
logger2.warn(`Content analysis attempt ${attempt} failed:`, lastError.message);
|
|
162834
|
+
if (!this.isRetryableError(error) || attempt === maxRetries) {
|
|
162835
|
+
break;
|
|
162836
|
+
}
|
|
162837
|
+
const delay = this.createBackoffDelay(attempt);
|
|
162838
|
+
logger2.debug(`Retrying in ${delay}ms...`);
|
|
162839
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
162759
162840
|
}
|
|
162760
|
-
throw new APIError("Unknown Gemini API error");
|
|
162761
162841
|
}
|
|
162842
|
+
this.handleGeminiError(lastError, "Content analysis");
|
|
162762
162843
|
}
|
|
162763
162844
|
getDocumentModel() {
|
|
162764
162845
|
return this.genAI.getGenerativeModel({
|
|
@@ -163399,11 +163480,21 @@ Extract as much metadata as possible from the document properties and content.`;
|
|
|
163399
163480
|
throw new APIError(`${operation}: Gemini API server error - please retry`);
|
|
163400
163481
|
}
|
|
163401
163482
|
if (error?.status === 503) {
|
|
163402
|
-
throw new APIError(`${operation}: Gemini API
|
|
163483
|
+
throw new APIError(`${operation}: Gemini API is currently unavailable (503 Service Unavailable). ` + `This is usually temporary. Please try again in a few moments. ` + `If the issue persists, check Google's Gemini API status page.`);
|
|
163403
163484
|
}
|
|
163404
163485
|
if (error?.code === "ECONNRESET" || error?.code === "ETIMEDOUT") {
|
|
163405
163486
|
throw new APIError(`${operation}: Network error - check connection and retry`);
|
|
163406
163487
|
}
|
|
163488
|
+
if (error?.message?.includes("GoogleGenerativeAI Error")) {
|
|
163489
|
+
const geminiErrorMatch = error.message.match(/\[(\d+)\s+([^\]]+)\]\s+(.+)/);
|
|
163490
|
+
if (geminiErrorMatch) {
|
|
163491
|
+
const [, statusCode, statusText, details] = geminiErrorMatch;
|
|
163492
|
+
if (statusCode === "503") {
|
|
163493
|
+
throw new APIError(`${operation}: Google Gemini API is temporarily unavailable (${statusText}). ` + `This is a service-side issue. Please try again in a few moments.`);
|
|
163494
|
+
}
|
|
163495
|
+
throw new APIError(`${operation}: Gemini API error [${statusCode} ${statusText}] ${details}`);
|
|
163496
|
+
}
|
|
163497
|
+
}
|
|
163407
163498
|
const message = error?.message || "Unknown error occurred";
|
|
163408
163499
|
throw new APIError(`${operation}: ${message}`);
|
|
163409
163500
|
}
|
|
@@ -164014,11 +164105,35 @@ async function registerVisionTools(server, geminiClient, config) {
|
|
|
164014
164105
|
return await handleAnalyze(geminiClient, args, config);
|
|
164015
164106
|
} catch (error) {
|
|
164016
164107
|
const mcpError = handleError(error);
|
|
164017
|
-
logger2.error(`Tool eyes_analyze error:`,
|
|
164108
|
+
logger2.error(`Tool eyes_analyze error:`, {
|
|
164109
|
+
message: mcpError.message,
|
|
164110
|
+
code: mcpError.code,
|
|
164111
|
+
args,
|
|
164112
|
+
timestamp: new Date().toISOString(),
|
|
164113
|
+
stackTrace: error instanceof Error ? error.stack : "No stack trace available"
|
|
164114
|
+
});
|
|
164115
|
+
let userMessage = mcpError.message;
|
|
164116
|
+
if (mcpError.message.includes("No analysis result from Gemini")) {
|
|
164117
|
+
userMessage = `The image analysis service returned an empty response. This can happen due to:
|
|
164118
|
+
` + `• API rate limits or quota exceeded
|
|
164119
|
+
` + `• Image content restrictions
|
|
164120
|
+
` + `• Temporary service issues
|
|
164121
|
+
` + `• Network connectivity problems
|
|
164122
|
+
|
|
164123
|
+
` + "Please try again in a few moments, or check if your image meets the requirements.";
|
|
164124
|
+
} else if (mcpError.message.includes("Failed to process image after")) {
|
|
164125
|
+
userMessage = `Image processing failed after multiple attempts. This could be due to:
|
|
164126
|
+
` + `• Network connectivity issues
|
|
164127
|
+
` + `• API service unavailability
|
|
164128
|
+
` + `• Image format or size issues
|
|
164129
|
+
` + `• Rate limiting
|
|
164130
|
+
|
|
164131
|
+
` + "Please check your internet connection and try again.";
|
|
164132
|
+
}
|
|
164018
164133
|
return {
|
|
164019
164134
|
content: [{
|
|
164020
164135
|
type: "text",
|
|
164021
|
-
text: `Error: ${
|
|
164136
|
+
text: `Error: ${userMessage}`
|
|
164022
164137
|
}],
|
|
164023
164138
|
isError: true
|
|
164024
164139
|
};
|
|
@@ -164182,10 +164297,13 @@ async function registerDocumentTools(server, geminiClient, config) {
|
|
|
164182
164297
|
async function handleAnalyze(geminiClient, args, config) {
|
|
164183
164298
|
const input = EyesInputSchema.parse(args);
|
|
164184
164299
|
const { source, type, detail_level } = input;
|
|
164185
|
-
|
|
164300
|
+
const customPrompt = "prompt" in input ? input.prompt : undefined;
|
|
164301
|
+
logger2.info(`Analyzing ${type} with detail level: ${detail_level}, source: ${source.substring(0, 50)}...`);
|
|
164186
164302
|
const model = geminiClient.getModel(detail_level || "detailed");
|
|
164187
164303
|
const options = {
|
|
164188
|
-
|
|
164304
|
+
analysis_type: "general",
|
|
164305
|
+
detail_level: detail_level || "detailed",
|
|
164306
|
+
specific_focus: customPrompt,
|
|
164189
164307
|
fetchTimeout: config.server.fetchTimeout
|
|
164190
164308
|
};
|
|
164191
164309
|
let result;
|
|
@@ -164202,6 +164320,7 @@ async function handleAnalyze(geminiClient, args, config) {
|
|
|
164202
164320
|
default:
|
|
164203
164321
|
throw new Error(`Unsupported media type: ${type}`);
|
|
164204
164322
|
}
|
|
164323
|
+
logger2.info(`Analysis completed for ${type}. Processing time: ${result.metadata.processing_time_ms}ms`);
|
|
164205
164324
|
return {
|
|
164206
164325
|
content: [
|
|
164207
164326
|
{
|
|
@@ -164273,6 +164392,9 @@ Be precise with locations and measurements where possible.`;
|
|
|
164273
164392
|
}
|
|
164274
164393
|
}
|
|
164275
164394
|
async function loadImageForComparison(source) {
|
|
164395
|
+
if (source.match(/^\[Image #\d+\]$/)) {
|
|
164396
|
+
throw new Error(`Virtual image reference "${source}" cannot be processed. ` + `Please use a direct file path, URL, or base64 data URI instead.`);
|
|
164397
|
+
}
|
|
164276
164398
|
if (source.startsWith("data:image/")) {
|
|
164277
164399
|
const [header, data] = source.split(",");
|
|
164278
164400
|
if (!header || !data) {
|