npm - @goonnguyen/human-mcp - Versions diffs - 1.3.0 → 2.0.0 - Mend

@goonnguyen/human-mcp 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

package/README.md +261 -19
package/bin/human-mcp.js +2 -0
package/dist/index.js +65180 -1698
package/package.json +19 -2
package/.claude/agents/code-reviewer.md +0 -140
package/.claude/agents/database-admin.md +0 -86
package/.claude/agents/debugger.md +0 -119
package/.claude/agents/docs-manager.md +0 -113
package/.claude/agents/git-manager.md +0 -59
package/.claude/agents/planner-researcher.md +0 -97
package/.claude/agents/project-manager.md +0 -113
package/.claude/agents/tester.md +0 -95
package/.claude/commands/cook.md +0 -7
package/.claude/commands/debug.md +0 -10
package/.claude/commands/docs/init.md +0 -11
package/.claude/commands/docs/update.md +0 -11
package/.claude/commands/fix/ci.md +0 -8
package/.claude/commands/fix/fast.md +0 -5
package/.claude/commands/fix/hard.md +0 -7
package/.claude/commands/fix/test.md +0 -16
package/.claude/commands/git/cm.md +0 -5
package/.claude/commands/git/cp.md +0 -4
package/.claude/commands/plan/ci.md +0 -12
package/.claude/commands/plan/two.md +0 -13
package/.claude/commands/plan.md +0 -10
package/.claude/commands/test.md +0 -7
package/.claude/commands/watzup.md +0 -8
package/.claude/hooks/telegram_notify.sh +0 -136
package/.claude/send-discord.sh +0 -64
package/.claude/settings.json +0 -7
package/.claude/statusline.sh +0 -143
package/.dockerignore +0 -81
package/.env.example +0 -44
package/.github/workflows/publish.yml +0 -88
package/.opencode/agent/code-reviewer.md +0 -142
package/.opencode/agent/debugger.md +0 -74
package/.opencode/agent/docs-manager.md +0 -119
package/.opencode/agent/git-manager.md +0 -60
package/.opencode/agent/planner-researcher.md +0 -100
package/.opencode/agent/project-manager.md +0 -113
package/.opencode/agent/system-architecture.md +0 -200
package/.opencode/agent/tester.md +0 -96
package/.opencode/agent/ui-ux-developer.md +0 -97
package/.opencode/command/cook.md +0 -7
package/.opencode/command/debug.md +0 -10
package/.opencode/command/fix/ci.md +0 -8
package/.opencode/command/fix/fast.md +0 -5
package/.opencode/command/fix/hard.md +0 -7
package/.opencode/command/fix/test.md +0 -16
package/.opencode/command/git/cm.md +0 -5
package/.opencode/command/git/cp.md +0 -4
package/.opencode/command/plan/ci.md +0 -12
package/.opencode/command/plan/two.md +0 -13
package/.opencode/command/plan.md +0 -10
package/.opencode/command/test.md +0 -7
package/.opencode/command/watzup.md +0 -8
package/.releaserc.json +0 -26
package/.serena/project.yml +0 -68
package/CHANGELOG.md +0 -62
package/CLAUDE.md +0 -141
package/DEPLOYMENT.md +0 -329
package/Dockerfile +0 -52
package/QUICKSTART.md +0 -97
package/bun.lock +0 -1872
package/bunfig.toml +0 -15
package/docker-compose.yaml +0 -128
package/docs/README.md +0 -51
package/docs/codebase-structure-architecture-code-standards.md +0 -428
package/docs/codebase-summary.md +0 -321
package/docs/project-overview-pdr.md +0 -286
package/docs/project-roadmap.md +0 -494
package/examples/debugging-session.ts +0 -96
package/human-mcp.png +0 -0
package/inspector-wrapper.mjs +0 -33
package/plans/001-streamable-http-transport-plan.md +0 -905
package/plans/002-sse-fallback-http-transport-plan.md +0 -161
package/plans/003-fix-test-infrastructure-and-ci-plan.md +0 -699
package/plans/003-http-transport-local-file-access-plan.md +0 -880
package/plans/004-fix-typescript-compilation-errors-plan.md +0 -388
package/plans/005-comprehensive-test-infrastructure-fix-plan.md +0 -854
package/plans/templates/bug-fix-template.md +0 -69
package/plans/templates/feature-implementation-template.md +0 -84
package/plans/templates/refactor-template.md +0 -82
package/plans/templates/template-usage-guide.md +0 -58
package/src/index.ts +0 -49
package/src/prompts/debugging-prompts.ts +0 -149
package/src/prompts/index.ts +0 -55
package/src/resources/documentation.ts +0 -316
package/src/resources/index.ts +0 -49
package/src/server.ts +0 -36
package/src/tools/eyes/index.ts +0 -225
package/src/tools/eyes/processors/gif.ts +0 -137
package/src/tools/eyes/processors/image.ts +0 -213
package/src/tools/eyes/processors/video.ts +0 -135
package/src/tools/eyes/schemas.ts +0 -51
package/src/tools/eyes/utils/formatters.ts +0 -126
package/src/tools/eyes/utils/gemini-client.ts +0 -73
package/src/transports/http/file-interceptor.ts +0 -134
package/src/transports/http/middleware.ts +0 -46
package/src/transports/http/routes.ts +0 -297
package/src/transports/http/server.ts +0 -116
package/src/transports/http/session.ts +0 -93
package/src/transports/http/sse-routes.ts +0 -210
package/src/transports/index.ts +0 -36
package/src/transports/stdio.ts +0 -7
package/src/transports/types.ts +0 -50
package/src/types/index.ts +0 -41
package/src/utils/cloudflare-r2.ts +0 -107
package/src/utils/config.ts +0 -123
package/src/utils/errors.ts +0 -40
package/src/utils/logger.ts +0 -49
package/tests/integration/http-transport-files.test.ts +0 -190
package/tests/integration/server.test.ts +0 -27
package/tests/integration/sse-transport.test.ts +0 -142
package/tests/setup.ts +0 -55
package/tests/types/api-responses.ts +0 -35
package/tests/types/test-types.ts +0 -105
package/tests/unit/cloudflare-r2.test.ts +0 -118
package/tests/unit/config.test.ts +0 -40
package/tests/unit/eyes-analyze.test.ts +0 -150
package/tests/unit/formatters.test.ts +0 -85
package/tests/unit/sse-routes.test.ts +0 -92
package/tests/utils/error-scenarios.ts +0 -198
package/tests/utils/index.ts +0 -3
package/tests/utils/mock-helpers.ts +0 -99
package/tests/utils/test-data-generators.ts +0 -217
package/tests/utils/test-server-manager.ts +0 -172
package/tsconfig.json +0 -26

package/docs/project-roadmap.md DELETED Viewed

@@ -1,494 +0,0 @@
-# Human MCP - Project Roadmap
-## Project Vision
-**Human MCP: Bringing Human Capabilities to Coding Agents**
-Transform AI coding agents with human-like sensory capabilities by providing sophisticated multimodal analysis tools through the Model Context Protocol. Our mission is to bridge the gap between AI agents and human perception, enabling comprehensive debugging, analysis, and content understanding workflows.
-## Executive Summary
-Human MCP is a Model Context Protocol server that empowers AI coding agents with advanced multimodal capabilities. Currently focused on visual analysis (Eyes), the project roadmap extends to encompass complete human-like sensory capabilities including document understanding, audio processing, speech generation, and content creation.
-**Current Status**: Version 1.2.1 - Visual Analysis Foundation Complete
-**Next Milestone**: Document Understanding (Eyes Extension)
-**Target Completion**: Q4 2025 for full human capabilities suite
-## Current Capabilities (Phase 1 - COMPLETE)
-### Eyes: Visual Analysis - 100% Complete ✅
-**Status**: Production Ready (v1.2.1)
-**Completion Date**: September 08, 2025
-#### Current Features
-- **Image Analysis**: PNG, JPEG, WebP, GIF static image processing
-- **Video Analysis**: MP4, WebM, MOV, AVI video processing with frame extraction
-- **GIF Analysis**: Animated GIF frame-by-frame analysis
-- **Image Comparison**: Pixel, structural, and semantic comparison capabilities
-- **Analysis Types**: UI debugging, error detection, accessibility, performance, layout analysis
-- **Detail Levels**: Quick (< 10s) and detailed (< 30s) analysis modes
-- **Input Sources**: File paths, URLs, and base64 data URIs
-#### Technical Implementation
-```typescript
-// Current Tools Available
-- eyes_analyze: Primary visual analysis tool
-- eyes_compare: Image comparison and difference detection
-// Architecture Components
-- Gemini API integration with configurable models
-- ffmpeg-based video processing
-- Sharp library for GIF frame extraction
-- Comprehensive error handling and logging
-- MCP protocol compliant server implementation
-```
-#### Performance Metrics (Current)
-- **Image Processing**: < 10s (quick) / < 30s (detailed)
-- **Video Processing**: < 2 minutes for 30-second clips
-- **Success Rate**: 98.5% for supported formats
-- **Memory Usage**: < 100MB for typical operations
-- **API Response Time**: 95th percentile < 30 seconds
-## Development Phases & Roadmap
-### Phase 2: Document Understanding (Q4 2025)
-**Priority**: High | **Status**: Planning | **Progress**: 0%
-#### Objectives
-Extend Eyes capability to read and understand documentation formats including PDFs, Word documents, Excel files, and other structured documents using Gemini's Document Understanding API.
-#### Technical Implementation Plan
-```typescript
-// New Tools to Implement
-- eyes_read_document: Document analysis and extraction
-- eyes_extract_data: Structured data extraction from documents
-- eyes_summarize: Document summarization and key insights
-// Required Dependencies
-- pdf-parse: PDF text extraction
-- mammoth: Word document processing
-- xlsx: Excel spreadsheet handling
-- @google/generative-ai: Document Understanding API
-// Architecture Extensions
-src/tools/eyes/processors/
-├── document.ts        # PDF, DOCX document processing
-├── spreadsheet.ts     # Excel, CSV data processing
-├── presentation.ts    # PowerPoint slide analysis
-└── text.ts           # Plain text and markdown processing
-```
-#### Deliverables
-- [ ] PDF document analysis with text extraction and understanding
-- [ ] Word document processing with formatting preservation
-- [ ] Excel spreadsheet data analysis and insights
-- [ ] PowerPoint presentation content analysis
-- [ ] Multi-format document comparison capabilities
-- [ ] Comprehensive documentation and examples
-#### Success Metrics
-- Support for PDF, DOCX, XLSX, PPTX, TXT, MD formats
-- Text extraction accuracy > 95%
-- Processing time < 60 seconds for typical documents
-- Structured data extraction with schema validation
-- Cross-document comparison and analysis capabilities
-#### Timeline: January 2025 - March 2025
-- **Week 1-2**: Document processing architecture design
-- **Week 3-6**: PDF and Word document processor implementation
-- **Week 7-10**: Excel and PowerPoint processor development
-- **Week 11-12**: Testing, optimization, and documentation
-### Phase 3: Audio Processing - Ears (Q4 2025)
-**Priority**: High | **Status**: Not Started | **Progress**: 0%
-#### Objectives
-Implement comprehensive audio analysis capabilities using Gemini's Audio Understanding API, enabling speech-to-text, audio content analysis, and debugging of audio-related issues.
-#### Technical Implementation Plan
-```typescript
-// New Tools to Implement
-- ears_transcribe: Speech-to-text conversion
-- ears_analyze: Audio content analysis and insights
-- ears_compare: Audio comparison and difference detection
-- ears_extract: Audio feature extraction and metadata
-// Required Dependencies
-- fluent-ffmpeg: Audio format conversion and processing
-- audio-context: Web Audio API compatibility
-- wav-file-info: Audio file metadata extraction
-// Architecture Design
-src/tools/ears/
-├── index.ts           # Tool registration and orchestration
-├── schemas.ts         # Audio input validation schemas
-├── processors/
-│   ├── speech.ts      # Speech-to-text processing
-│   ├── music.ts       # Music analysis and classification
-│   ├── effects.ts     # Audio effects and quality analysis
-│   └── comparison.ts  # Audio comparison utilities
-└── utils/
-    ├── audio-client.ts    # Gemini Audio API client
-    ├── converters.ts      # Audio format conversion
-    └── analyzers.ts       # Audio analysis utilities
-```
-#### Deliverables
-- [ ] Speech-to-text transcription with speaker identification
-- [ ] Audio content analysis (music, speech, noise classification)
-- [ ] Audio quality assessment and debugging capabilities
-- [ ] Audio comparison for A/B testing and regression detection
-- [ ] Multi-format audio support (WAV, MP3, AAC, OGG, FLAC)
-- [ ] Real-time audio processing capabilities (future)
-#### Success Metrics
-- Transcription accuracy > 95% for clear speech
-- Support for 20+ audio formats
-- Processing time < file duration + 30 seconds
-- Speaker identification accuracy > 90%
-- Audio quality assessment with detailed metrics
-#### Timeline: April 2025 - June 2025
-- **Month 1**: Core audio processing infrastructure
-- **Month 2**: Speech-to-text and content analysis implementation
-- **Month 3**: Testing, optimization, and advanced features
-### Phase 4: Speech Generation - Mouth (Q4 2025)
-**Priority**: Medium | **Status**: Not Started | **Progress**: 0%
-#### Objectives
-Implement text-to-speech capabilities using Gemini's Speech Generation API, enabling AI agents to provide audio feedback, generate spoken explanations, and create audio content.
-#### Technical Implementation Plan
-```typescript
-// New Tools to Implement
-- mouth_speak: Text-to-speech generation
-- mouth_narrate: Long-form content narration
-- mouth_explain: Code explanation with speech
-- mouth_customize: Voice customization and tuning
-// Architecture Design
-src/tools/mouth/
-├── index.ts           # Tool registration
-├── schemas.ts         # Speech generation schemas
-├── processors/
-│   ├── synthesis.ts   # Core text-to-speech
-│   ├── narration.ts   # Long-form content
-│   ├── explanation.ts # Technical content speech
-│   └── effects.ts     # Voice effects and modulation
-└── utils/
-    ├── speech-client.ts   # Gemini Speech API client
-    ├── voice-profiles.ts  # Voice customization
-    └── audio-export.ts    # Audio file generation
-```
-#### Deliverables
-- [ ] High-quality text-to-speech with multiple voice options
-- [ ] Code explanation and technical content narration
-- [ ] Customizable voice parameters (speed, pitch, tone)
-- [ ] Long-form content narration with chapter breaks
-- [ ] Multi-language speech generation support
-- [ ] Audio export in multiple formats (MP3, WAV, OGG)
-#### Success Metrics
-- Natural-sounding speech with < 2% word error rate
-- Response time < 10 seconds for typical text inputs
-- Support for 10+ languages
-- Voice customization with 5+ parameters
-- Audio quality suitable for professional use
-#### Timeline: September 2025 - October 2025
-- **Month 1**: Speech synthesis core implementation
-- **Month 2**: Voice customization and multi-language support
-- **Month 3**: Advanced features and integration testing
-### Phase 5: Content Generation - Hands (Q4 2025)
-**Priority**: Medium | **Status**: Not Started | **Progress**: 0%
-#### Objectives
-Implement visual and video content generation capabilities using Google's Imagen (Nano Banana) and Veo3 APIs, enabling AI agents to create images, edit visuals, and generate videos.
-#### Technical Implementation Plan
-```typescript
-// New Tools to Implement
-- hands_draw: Image generation from text prompts
-- hands_edit: Image editing and modification
-- hands_create_video: Video generation from text/images
-- hands_animate: Animation creation and motion graphics
-// Architecture Design
-src/tools/hands/
-├── index.ts           # Tool registration
-├── schemas.ts         # Content generation schemas
-├── processors/
-│   ├── image-gen.ts   # Imagen API integration
-│   ├── image-edit.ts  # Image editing capabilities
-│   ├── video-gen.ts   # Veo3 video generation
-│   └── animation.ts   # Animation and motion graphics
-└── utils/
-    ├── imagen-client.ts   # Google Imagen client
-    ├── veo-client.ts      # Google Veo3 client
-    └── content-utils.ts   # Content processing utilities
-```
-#### Deliverables
-- [ ] High-quality image generation from text descriptions
-- [ ] Image editing capabilities (inpainting, style transfer, enhancement)
-- [ ] Video generation from text prompts and image sequences
-- [ ] Animation creation with motion graphics
-- [ ] Batch content generation for workflow automation
-- [ ] Content customization with style and parameter controls
-#### Success Metrics
-- Image generation quality score > 8/10 (human evaluation)
-- Video generation up to 30 seconds duration
-- Processing time < 5 minutes for typical requests
-- Support for multiple artistic styles and formats
-- Batch processing capabilities for efficiency
-#### Timeline: October 2025 - December 2025
-- **Month 1**: Image generation and editing implementation
-- **Month 2**: Video generation with Veo3 integration
-- **Month 3**: Advanced features, optimization, and testing
-## Technical Architecture Evolution
-### Current Architecture (v1.2.1)
-```
-┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
-│   MCP Client    │◄──►│   Human MCP      │◄──►│  Google Gemini  │
-│   (AI Agent)    │    │   Server         │    │   Vision API    │
-└─────────────────┘    └──────────────────┘    └─────────────────┘
-                              │
-                              ▼
-                    ┌──────────────────┐
-                    │ Eyes Processors  │
-                    │(Image/Video/GIF) │
-                    └──────────────────┘
-```
-### Target Architecture (v2.0.0 - End 2025)
-```
-┌─────────────────┐    ┌──────────────────────┐    ┌─────────────────────────┐
-│   MCP Client    │◄──►│    Human MCP         │◄──►│  Google AI Services     │
-│   (AI Agent)    │    │    Server            │    │ ┌─────────────────────┐ │
-└─────────────────┘    │                      │    │ │ Gemini Vision API   │ │
-                       │  ┌─────────────────┐ │    │ │ Gemini Audio API    │ │
-                       │  │ Eyes (Vision)   │ │    │ │ Gemini Speech API   │ │
-                       │  │ • Images/Video  │ │    │ │ Imagen API          │ │
-                       │  │ • Documents     │ │    │ │ Veo3 Video API      │ │
-                       │  └─────────────────┘ │    │ └─────────────────────┘ │
-                       │  ┌─────────────────┐ │    └─────────────────────────┘
-                       │  │ Ears (Audio)    │ │               │
-                       │  │ • Speech-to-Text│ │               │
-                       │  │ • Audio Analysis│ │               ▼
-                       │  └─────────────────┘ │    ┌─────────────────────────┐
-                       │  ┌─────────────────┐ │    │  System Dependencies   │
-                       │  │ Mouth (Speech)  │ │    │ ┌─────────────────────┐ │
-                       │  │ • Text-to-Speech│ │    │ │ ffmpeg (A/V proc)   │ │
-                       │  │ • Narration     │ │    │ │ Sharp (Images)      │ │
-                       │  └─────────────────┘ │    │ │ pdf-parse (Docs)    │ │
-                       │  ┌─────────────────┐ │    │ │ Audio libraries     │ │
-                       │  │ Hands (Creation)│ │    │ └─────────────────────┘ │
-                       │  │ • Image Gen     │ │    └─────────────────────────┘
-                       │  │ • Video Gen     │ │
-                       │  └─────────────────┘ │
-                       └──────────────────────┘
-```
-## Resource Requirements & Dependencies
-### Development Resources
-- **Timeline**: 3 months (September 2025 - December 2025)
-### Technical Dependencies
-```json
-{
-  "current": [
-    "@google/generative-ai": "Gemini Vision API",
-    "ffmpeg": "Video processing",
-    "sharp": "Image processing",
-    "@modelcontextprotocol/sdk": "MCP protocol"
-  ],
-  "phase2": [
-    "pdf-parse": "PDF document processing",
-    "mammoth": "Word document handling",
-    "xlsx": "Excel spreadsheet processing"
-  ],
-  "phase3": [
-    "fluent-ffmpeg": "Enhanced audio processing",
-    "audio-context": "Web Audio API",
-    "wav-file-info": "Audio metadata"
-  ],
-  "phase4": [
-    "@google/speech-api": "Text-to-speech synthesis",
-    "voice-processing": "Audio effects"
-  ],
-  "phase5": [
-    "@google/imagen-api": "Image generation",
-    "@google/veo3-api": "Video generation"
-  ]
-}
-```
-### Infrastructure Requirements
-- **API Access**: Google AI services (Gemini, Imagen, Veo3)
-- **Computing**: Development machines with sufficient RAM (16GB+)
-- **Storage**: Temporary file processing space (10GB+)
-- **Network**: High-bandwidth internet for API calls
-## Success Metrics & KPIs
-### Technical Metrics
-| Metric | Current (Phase 1) | Target (Phase 5) |
-|--------|------------------|------------------|
-| Processing Speed | < 30s (images) | < 60s (any content) |
-| Success Rate | 98.5% | 99%+ |
-| Format Support | 8 formats | 50+ formats |
-| Memory Usage | < 100MB | < 200MB |
-| API Response Time | 95th %ile < 30s | 95th %ile < 45s |
-### Business Metrics
-- **Adoption Rate**: Target 1000+ MCP client integrations by end of 2025
-- **API Usage**: Target 100K+ API calls per month
-- **Community Growth**: Target 500+ GitHub stars, 50+ contributors
-- **Documentation Quality**: 100% API coverage, comprehensive examples
-### Quality Metrics
-- **Test Coverage**: Maintain > 85% code coverage
-- **Bug Rate**: < 5 bugs per 1000 lines of code
-- **Performance**: No regression in processing times
-- **User Satisfaction**: > 4.5/5 star rating in feedback
-## Risk Assessment & Mitigation
-### High-Risk Items
-#### 1. Google API Dependency Risk
-**Risk**: Changes to Google AI APIs or pricing models
-**Impact**: High - Could break functionality or increase costs significantly
-**Mitigation**:
-- Implement adapter pattern for easy API switching
-- Monitor Google AI roadmaps and announcements
-- Develop fallback strategies with alternative providers
-- Maintain API version compatibility layers
-#### 2. Performance Scalability Risk
-**Risk**: Processing large files or high request volumes
-**Impact**: Medium - Could impact user experience
-**Mitigation**:
-- Implement streaming for large files
-- Add request queuing and rate limiting
-- Optimize memory usage and cleanup
-- Provide performance monitoring and alerting
-#### 3. Format Compatibility Risk
-**Risk**: Unsupported media formats or edge cases
-**Impact**: Medium - Limited functionality for some users
-**Mitigation**:
-- Comprehensive format testing matrix
-- Graceful error handling for unsupported formats
-- Clear documentation of supported formats
-- Community feedback loop for new format requests
-### Medium-Risk Items
-#### 4. Development Timeline Risk
-**Risk**: Features taking longer than estimated
-**Impact**: Medium - Delayed roadmap execution
-**Mitigation**:
-- Agile development with monthly milestones
-- Regular progress reviews and timeline adjustments
-- Parallel development tracks where possible
-- MVP approach for each phase
-#### 5. API Cost Management Risk
-**Risk**: Unexpected increase in API usage costs
-**Impact**: Medium - Budget overrun
-**Mitigation**:
-- Implement usage monitoring and alerting
-- Provide cost estimation tools for users
-- Offer different processing tiers (quick vs. detailed)
-- Cache results where appropriate
-### Low-Risk Items
-#### 6. Community Adoption Risk
-**Risk**: Low adoption of new features
-**Impact**: Low - Feature may not justify development cost
-**Mitigation**:
-- User research and feedback collection
-- Beta testing with key integrators
-- Comprehensive documentation and examples
-- Active community engagement
-## Development Methodology
-### Agile Approach
-- **Sprint Duration**: 2-week sprints
-- **Planning**: Monthly planning sessions for each phase
-- **Reviews**: Weekly progress reviews with stakeholders
-- **Retrospectives**: End-of-phase retrospectives for improvement
-### Quality Assurance
-- **Testing Strategy**: Unit tests, integration tests, manual testing
-- **Code Review**: All code reviewed by team lead
-- **Performance Testing**: Automated performance regression testing
-- **Security Review**: Security audit for each major release
-### Release Strategy
-- **Versioning**: Semantic versioning (MAJOR.MINOR.PATCH)
-- **Release Schedule**: Monthly minor releases, quarterly major releases
-- **Beta Testing**: 2-week beta period for major features
-- **Rollback Plan**: Ability to rollback releases if issues discovered
-## Integration Strategy
-### MCP Ecosystem Integration
-- **Client Compatibility**: Ensure compatibility with major MCP clients
-- **Protocol Updates**: Stay current with MCP protocol evolution
-- **Community Tools**: Integration with popular development tools
-- **Documentation**: Comprehensive integration guides
-### External Service Integration
-- **Google AI Services**: Primary integration with Google's AI ecosystem
-- **Alternative Providers**: Future integration with OpenAI, Anthropic, etc.
-- **Local Models**: Support for local AI model deployment
-- **Caching Layer**: Intelligent caching to reduce API calls
-## Future Vision (Beyond 2025)
-### Advanced Capabilities
-- **Real-time Processing**: Live screen capture and analysis
-- **Interactive Debugging**: Conversational debugging workflows
-- **Multi-modal Fusion**: Combined analysis across all sensory modalities
-- **Custom Model Training**: Domain-specific model fine-tuning
-### Enterprise Features
-- **On-premises Deployment**: Air-gapped enterprise installations
-- **SSO Integration**: Enterprise authentication and authorization
-- **Audit Logging**: Comprehensive audit trails for compliance
-- **Scalability**: Horizontal scaling for high-volume usage
-### Research & Development
-- **New AI Models**: Integration with cutting-edge AI research
-- **Performance Optimization**: Advanced caching and preprocessing
-- **Privacy Enhancement**: Local processing capabilities
-- **Accessibility**: Enhanced accessibility features and compliance
-## Conclusion
-The Human MCP project represents a significant advancement in AI-agent capabilities, providing comprehensive human-like sensory analysis through the Model Context Protocol. With the visual analysis foundation complete, the roadmap focuses on expanding to document understanding, audio processing, speech generation, and content creation.
-The phased approach ensures steady progress while maintaining high quality and reliability. Success depends on careful API integration, performance optimization, and active community engagement. By the end of 2025, Human MCP will provide AI agents with a complete suite of human-like capabilities, fundamentally changing how AI systems interact with and understand multimodal content.
-**Key Success Factors**:
-- Maintaining high performance and reliability standards
-- Building strong community adoption and feedback loops
-- Staying ahead of Google AI API evolution
-- Delivering practical value to AI agent developers
-- Comprehensive documentation and developer experience
-The project positions Human MCP as the definitive multimodal analysis solution for AI agents, enabling sophisticated debugging, content analysis, and creation workflows that bridge the gap between artificial and human intelligence.

package/examples/debugging-session.ts DELETED Viewed

@@ -1,96 +0,0 @@
-/**
- * Example: Complete debugging session with Human MCP
- *
- * This demonstrates a typical workflow for debugging UI issues
- * using the Human MCP server's visual analysis capabilities.
- */
-import { createServer } from "../src/server.js";
-async function debuggingSession() {
-  console.log("🔍 Starting Human MCP debugging session...\n");
-  const server = await createServer();
-  // Example 1: Analyze a UI screenshot for layout issues
-  console.log("1️⃣ Analyzing UI screenshot for layout issues...");
-  const uiAnalysis = await server.callTool("eyes.analyze", {
-    source: "/path/to/broken-ui.png",
-    type: "image",
-    analysis_type: "ui_debug",
-    detail_level: "detailed",
-    specific_focus: "navigation menu alignment and button states"
-  });
-  console.log("📊 UI Analysis Results:");
-  console.log(uiAnalysis.content[0].text);
-  console.log("\n" + "=".repeat(50) + "\n");
-  // Example 2: Investigate error in screen recording
-  console.log("2️⃣ Investigating error sequence in recording...");
-  const errorAnalysis = await server.callTool("eyes.analyze", {
-    source: "/path/to/error-recording.mp4",
-    type: "video",
-    analysis_type: "error_detection",
-    detail_level: "detailed",
-    specific_focus: "form submission failure and user feedback"
-  });
-  console.log("🚨 Error Analysis Results:");
-  console.log(errorAnalysis.content[0].text);
-  console.log("\n" + "=".repeat(50) + "\n");
-  // Example 3: Compare before/after layouts
-  console.log("3️⃣ Comparing layouts before and after changes...");
-  const comparison = await server.callTool("eyes.compare", {
-    source1: "/path/to/before-fix.png",
-    source2: "/path/to/after-fix.png",
-    comparison_type: "structural"
-  });
-  console.log("📈 Layout Comparison Results:");
-  console.log(comparison.content[0].text);
-  console.log("\n" + "=".repeat(50) + "\n");
-  // Example 4: Accessibility audit
-  console.log("4️⃣ Performing accessibility audit...");
-  const a11yAnalysis = await server.callTool("eyes.analyze", {
-    source: "/path/to/page-screenshot.png",
-    type: "image",
-    analysis_type: "accessibility",
-    detail_level: "detailed",
-    check_accessibility: true,
-    specific_focus: "color contrast and focus indicators"
-  });
-  console.log("♿ Accessibility Analysis Results:");
-  console.log(a11yAnalysis.content[0].text);
-  console.log("\n" + "=".repeat(50) + "\n");
-  // Example 5: Performance analysis of loading animation
-  console.log("5️⃣ Analyzing loading animation performance...");
-  const perfAnalysis = await server.callTool("eyes.analyze", {
-    source: "/path/to/loading-animation.gif",
-    type: "gif",
-    analysis_type: "performance",
-    detail_level: "detailed",
-    specific_focus: "loading indicators and user feedback timing"
-  });
-  console.log("⚡ Performance Analysis Results:");
-  console.log(perfAnalysis.content[0].text);
-  console.log("\n✅ Debugging session complete!");
-}
-// Run the example if called directly
-if (import.meta.main) {
-  debuggingSession().catch(console.error);
-}
-export { debuggingSession };

package/human-mcp.png DELETED Viewed

Binary file

package/inspector-wrapper.mjs DELETED Viewed

@@ -1,33 +0,0 @@
-#!/usr/bin/env node
-// Workaround for eventsource ESM import issues
-import { createRequire } from 'module';
-import { spawn } from 'child_process';
-const require = createRequire(import.meta.url);
-// Try to fix the eventsource import by patching the module resolution
-const Module = require('module');
-const originalResolveFilename = Module._resolveFilename;
-Module._resolveFilename = function (request, parent, isMain, options) {
-  if (request === 'eventsource' && parent?.filename?.includes('@modelcontextprotocol/inspector')) {
-    // Force CommonJS resolution for eventsource
-    return require.resolve('eventsource');
-  }
-  return originalResolveFilename.call(this, request, parent, isMain, options);
-};
-// Run the inspector with the command line args
-const args = process.argv.slice(2);
-const child = spawn('npx', ['@modelcontextprotocol/inspector', ...args], {
-  stdio: 'inherit',
-  env: {
-    ...process.env,
-    NODE_OPTIONS: '--loader ./inspector-loader.mjs'
-  }
-});
-child.on('close', (code) => {
-  process.exit(code);
-});