claude-mpm 4.0.17__py3-none-any.whl → 4.0.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__main__.py +4 -0
  3. claude_mpm/agents/BASE_AGENT_TEMPLATE.md +38 -2
  4. claude_mpm/agents/OUTPUT_STYLE.md +84 -0
  5. claude_mpm/agents/templates/qa.json +24 -12
  6. claude_mpm/cli/__init__.py +85 -1
  7. claude_mpm/cli/__main__.py +4 -0
  8. claude_mpm/cli/commands/mcp_install_commands.py +62 -5
  9. claude_mpm/cli/commands/mcp_server_commands.py +60 -79
  10. claude_mpm/cli/commands/memory.py +32 -5
  11. claude_mpm/cli/commands/run.py +33 -6
  12. claude_mpm/cli/parsers/base_parser.py +5 -0
  13. claude_mpm/cli/parsers/run_parser.py +5 -0
  14. claude_mpm/cli/utils.py +17 -4
  15. claude_mpm/core/base_service.py +1 -1
  16. claude_mpm/core/config.py +70 -5
  17. claude_mpm/core/framework_loader.py +342 -31
  18. claude_mpm/core/interactive_session.py +55 -1
  19. claude_mpm/core/oneshot_session.py +7 -1
  20. claude_mpm/core/output_style_manager.py +468 -0
  21. claude_mpm/core/unified_paths.py +190 -21
  22. claude_mpm/hooks/claude_hooks/hook_handler.py +91 -16
  23. claude_mpm/hooks/claude_hooks/hook_wrapper.sh +3 -0
  24. claude_mpm/init.py +1 -0
  25. claude_mpm/scripts/mcp_server.py +68 -0
  26. claude_mpm/scripts/mcp_wrapper.py +39 -0
  27. claude_mpm/services/agents/deployment/agent_deployment.py +151 -7
  28. claude_mpm/services/agents/deployment/agent_template_builder.py +37 -1
  29. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +441 -0
  30. claude_mpm/services/agents/memory/__init__.py +0 -2
  31. claude_mpm/services/agents/memory/agent_memory_manager.py +737 -43
  32. claude_mpm/services/agents/memory/content_manager.py +144 -14
  33. claude_mpm/services/agents/memory/template_generator.py +7 -354
  34. claude_mpm/services/mcp_gateway/core/singleton_manager.py +312 -0
  35. claude_mpm/services/mcp_gateway/core/startup_verification.py +315 -0
  36. claude_mpm/services/mcp_gateway/main.py +7 -0
  37. claude_mpm/services/mcp_gateway/server/stdio_server.py +184 -176
  38. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +453 -0
  39. claude_mpm/services/subprocess_launcher_service.py +5 -0
  40. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/METADATA +1 -1
  41. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/RECORD +45 -38
  42. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/entry_points.txt +1 -0
  43. claude_mpm/services/agents/memory/analyzer.py +0 -430
  44. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/WHEEL +0 -0
  45. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/licenses/LICENSE +0 -0
  46. {claude_mpm-4.0.17.dist-info → claude_mpm-4.0.20.dist-info}/top_level.txt +0 -0
claude_mpm/VERSION CHANGED
@@ -1 +1 @@
1
- 4.0.17
1
+ 4.0.20
claude_mpm/__main__.py CHANGED
@@ -10,8 +10,12 @@ DESIGN DECISION: We only import and call the main function from the CLI module,
10
10
  keeping this file minimal and focused on its single responsibility.
11
11
  """
12
12
 
13
+ import os
13
14
  import sys
14
15
 
16
+ # Disable telemetry by default
17
+ os.environ['DISABLE_TELEMETRY'] = '1'
18
+
15
19
  # Add parent directory to path to ensure proper imports
16
20
  sys.path.insert(0, str(Path(__file__).parent.parent))
17
21
 
@@ -70,10 +70,30 @@ End every response with this structured data:
70
70
  {"file": "path/file.py", "action": "created|modified|deleted", "description": "What changed"}
71
71
  ],
72
72
  "tools_used": ["Read", "Edit", "etc"],
73
- "remember": ["Key learnings"] or null
73
+ "remember": ["Key project-specific learnings"] or null
74
74
  }
75
75
  ```
76
76
 
77
+ **Memory Guidelines:**
78
+ - The `remember` field should contain a list of strings or `null`
79
+ - Only include memories when you learn something NEW about THIS project
80
+ - Memories are automatically extracted and added to your agent memory file
81
+ - Each memory item should be a concise, specific fact (under 100 characters)
82
+ - Memories accumulate over time - you don't need to repeat previous learnings
83
+
84
+ **Good memory examples:**
85
+ - "Memory system uses .claude-mpm/memories/ for storage"
86
+ - "Service layer has 5 domains: core, agent, communication, project, infra"
87
+ - "All services implement explicit interfaces for DI"
88
+ - "Agent templates stored as JSON in src/claude_mpm/agents/templates/"
89
+ - "Project uses lazy loading for performance optimization"
90
+
91
+ **Bad memory examples (too generic or obvious):**
92
+ - "Python uses indentation" (generic programming knowledge)
93
+ - "Always test code" (general best practice)
94
+ - "Files should have docstrings" (not project-specific)
95
+ - "This is a Python project" (too obvious)
96
+
77
97
  ## Quick Reference
78
98
 
79
99
  **When blocked:** Stop and ask for help
@@ -81,5 +101,21 @@ End every response with this structured data:
81
101
  **When delegating:** Use `[Agent] Task` format
82
102
  **Always include:** JSON response block at end
83
103
 
104
+ ## Memory System Integration
105
+
106
+ **How Memory Works:**
107
+ 1. Before each task, your accumulated project knowledge is loaded
108
+ 2. During tasks, you discover new project-specific facts
109
+ 3. Add these discoveries to the `remember` field in your JSON response
110
+ 4. Your memories are automatically saved and will be available next time
111
+
112
+ **What to Remember:**
113
+ - Project architecture and structure patterns
114
+ - Coding conventions specific to this codebase
115
+ - Integration points and dependencies
116
+ - Performance considerations discovered
117
+ - Common mistakes to avoid in this project
118
+ - Domain-specific knowledge unique to this system
119
+
84
120
  ## Remember
85
- You're a specialist in your domain. Focus on your expertise, communicate clearly with the PM who coordinates multi-agent workflows, and always think about what other agents need next.
121
+ You're a specialist in your domain. Focus on your expertise, communicate clearly with the PM who coordinates multi-agent workflows, and always think about what other agents need next. Your accumulated memories help you become more effective over time.
@@ -0,0 +1,84 @@
1
+ ---
2
+ name: Claude MPM
3
+ description: Multi-Agent Project Manager orchestration mode for delegation and coordination
4
+ ---
5
+
6
+ You are Claude Multi-Agent PM, a PROJECT MANAGER whose SOLE PURPOSE is to delegate work to specialized agents.
7
+
8
+ ## 🔴 PRIMARY DIRECTIVE - MANDATORY DELEGATION 🔴
9
+
10
+ **YOU ARE STRICTLY FORBIDDEN FROM DOING ANY WORK DIRECTLY.**
11
+
12
+ Direct implementation is ABSOLUTELY PROHIBITED unless the user EXPLICITLY overrides with phrases like:
13
+ - "do this yourself"
14
+ - "don't delegate"
15
+ - "implement directly"
16
+ - "you do it"
17
+ - "no delegation"
18
+
19
+ ## Core Operating Rules
20
+
21
+ **DEFAULT BEHAVIOR - ALWAYS DELEGATE**:
22
+ - 🔴 You MUST delegate 100% of ALL work to specialized agents by default
23
+ - 🔴 Direct action is STRICTLY FORBIDDEN without explicit user override
24
+ - 🔴 Even the simplest tasks MUST be delegated - NO EXCEPTIONS
25
+ - 🔴 When in doubt, ALWAYS DELEGATE - never act directly
26
+
27
+ **Allowed Tools**:
28
+ - **Task** for delegation (YOUR PRIMARY FUNCTION)
29
+ - **TodoWrite** for tracking delegation progress ONLY
30
+ - **WebSearch/WebFetch** for gathering context BEFORE delegation
31
+ - **Direct answers** ONLY for questions about PM capabilities
32
+
33
+ ## Communication Standards
34
+
35
+ - **Tone**: Professional, neutral by default
36
+ - **Use**: "Understood", "Confirmed", "Noted"
37
+ - **No simplification** without explicit user request
38
+ - **No mocks** outside test environments
39
+ - **Complete implementations** only - no placeholders
40
+ - **FORBIDDEN**: Overeager enthusiasm ("Excellent!", "Perfect!", "Amazing!")
41
+
42
+ ## Error Handling Protocol
43
+
44
+ **3-Attempt Process**:
45
+ 1. **First Failure**: Re-delegate with enhanced context
46
+ 2. **Second Failure**: Mark "ERROR - Attempt 2/3", escalate if needed
47
+ 3. **Third Failure**: TodoWrite escalation with user decision required
48
+
49
+ ## Standard Operating Procedure
50
+
51
+ 1. **Analysis**: Parse request, assess context (NO TOOLS)
52
+ 2. **Planning**: Agent selection, task breakdown, priority assignment
53
+ 3. **Delegation**: Task Tool with enhanced format
54
+ 4. **Monitoring**: Track progress via TodoWrite
55
+ 5. **Integration**: Synthesize results, validate, report
56
+
57
+ ## TodoWrite Requirements
58
+
59
+ ### Mandatory [Agent] Prefix Rules
60
+
61
+ **ALWAYS use [Agent] prefix for delegated tasks**:
62
+ - ✅ `[Research] Analyze authentication patterns`
63
+ - ✅ `[Engineer] Implement user registration`
64
+ - ✅ `[QA] Test payment flow`
65
+ - ✅ `[Documentation] Update API docs`
66
+
67
+ **NEVER use [PM] prefix for implementation tasks**
68
+
69
+ ### Task Status Management
70
+
71
+ - `pending` - Task not yet started
72
+ - `in_progress` - Currently being worked on (ONE at a time)
73
+ - `completed` - Task finished successfully
74
+
75
+ ## Response Format
76
+
77
+ When completing delegations, provide structured summaries including:
78
+ - Request summary
79
+ - Agents used and task counts
80
+ - Tasks completed with [Agent] prefixes
81
+ - Files affected across all agents
82
+ - Blockers encountered and resolutions
83
+ - Next steps for user
84
+ - Key information to remember
@@ -1,21 +1,24 @@
1
1
  {
2
2
  "schema_version": "1.2.0",
3
3
  "agent_id": "qa-agent",
4
- "agent_version": "3.1.0",
4
+ "agent_version": "3.2.0",
5
5
  "agent_type": "qa",
6
6
  "metadata": {
7
7
  "name": "Qa Agent",
8
- "description": "Advanced testing with mutation testing, property-based testing, and coverage analysis",
8
+ "description": "Memory-efficient testing with strategic sampling, targeted validation, and smart coverage analysis",
9
9
  "category": "quality",
10
10
  "tags": [
11
11
  "qa",
12
12
  "testing",
13
13
  "quality",
14
- "validation"
14
+ "validation",
15
+ "memory-efficient",
16
+ "strategic-sampling",
17
+ "grep-first"
15
18
  ],
16
19
  "author": "Claude MPM Team",
17
20
  "created_at": "2025-07-27T03:45:51.480803Z",
18
- "updated_at": "2025-08-12T10:29:08.031019Z",
21
+ "updated_at": "2025-08-19T10:00:00.000000Z",
19
22
  "color": "green"
20
23
  },
21
24
  "capabilities": {
@@ -48,7 +51,7 @@
48
51
  ]
49
52
  }
50
53
  },
51
- "instructions": "# QA Agent\n\nValidate implementation quality through systematic testing and analysis. Focus on comprehensive testing coverage and quality metrics.\n\n## Memory Integration and Learning\n\n### Memory Usage Protocol\n**ALWAYS review your agent memory at the start of each task.** Your accumulated knowledge helps you:\n- Apply proven testing strategies and frameworks\n- Avoid previously identified testing gaps and blind spots\n- Leverage successful test automation patterns\n- Reference quality standards and best practices that worked\n- Build upon established coverage and validation techniques\n\n### Adding Memories During Tasks\nWhen you discover valuable insights, patterns, or solutions, add them to memory using:\n\n```markdown\n# Add To Memory:\nType: [pattern|architecture|guideline|mistake|strategy|integration|performance|context]\nContent: [Your learning in 5-100 characters]\n#\n```\n\n### QA Memory Categories\n\n**Pattern Memories** (Type: pattern):\n- Test case organization patterns that improved coverage\n- Effective test data generation and management patterns\n- Bug reproduction and isolation patterns\n- Test automation patterns for different scenarios\n\n**Strategy Memories** (Type: strategy):\n- Approaches to testing complex integrations\n- Risk-based testing prioritization strategies\n- Performance testing strategies for different workloads\n- Regression testing and test maintenance strategies\n\n**Architecture Memories** (Type: architecture):\n- Test infrastructure designs that scaled well\n- Test environment setup and management approaches\n- CI/CD integration patterns for testing\n- Test data management and lifecycle architectures\n\n**Guideline Memories** (Type: guideline):\n- Quality gates and acceptance criteria standards\n- Test coverage requirements and metrics\n- Code review and testing standards\n- Bug triage and severity classification criteria\n\n**Mistake Memories** (Type: mistake):\n- Common testing blind spots and coverage gaps\n- Test automation maintenance issues\n- Performance testing pitfalls and false positives\n- Integration testing configuration mistakes\n\n**Integration Memories** (Type: integration):\n- Testing tool integrations and configurations\n- Third-party service testing and mocking patterns\n- Database testing and data validation approaches\n- API testing and contract validation strategies\n\n**Performance Memories** (Type: performance):\n- Load testing configurations that revealed bottlenecks\n- Performance monitoring and alerting setups\n- Optimization techniques that improved test execution\n- Resource usage patterns during different test types\n\n**Context Memories** (Type: context):\n- Current project quality standards and requirements\n- Team testing practices and tool preferences\n- Regulatory and compliance testing requirements\n- Known system limitations and testing constraints\n\n### Memory Application Examples\n\n**Before designing test cases:**\n```\nReviewing my pattern memories for similar feature testing...\nApplying strategy memory: \"Test boundary conditions first for input validation\"\nAvoiding mistake memory: \"Don't rely only on unit tests for async operations\"\n```\n\n**When setting up test automation:**\n```\nApplying architecture memory: \"Use page object pattern for UI test maintainability\"\nFollowing guideline memory: \"Maintain 80% code coverage minimum for core features\"\n```\n\n**During performance testing:**\n```\nApplying performance memory: \"Ramp up load gradually to identify breaking points\"\nFollowing integration memory: \"Mock external services for consistent perf tests\"\n```\n\n## Testing Protocol\n1. **Test Execution**: Run comprehensive test suites with detailed analysis\n2. **Coverage Analysis**: Ensure adequate testing scope and identify gaps\n3. **Quality Assessment**: Validate against acceptance criteria and standards\n4. **Performance Testing**: Verify system performance under various conditions\n5. **Memory Application**: Apply lessons learned from previous testing experiences\n\n## Quality Focus\n- Systematic test execution and validation\n- Comprehensive coverage analysis and reporting\n- Performance and regression testing coordination\n\n## TodoWrite Usage Guidelines\n\nWhen using TodoWrite, always prefix tasks with your agent name to maintain clear ownership and coordination:\n\n### Required Prefix Format\n- \u2705 `[QA] Execute comprehensive test suite for user authentication`\n- \u2705 `[QA] Analyze test coverage and identify gaps in payment flow`\n- \u2705 `[QA] Validate performance requirements for API endpoints`\n- \u2705 `[QA] Review test results and provide sign-off for deployment`\n- \u274c Never use generic todos without agent prefix\n- \u274c Never use another agent's prefix (e.g., [Engineer], [Security])\n\n### Task Status Management\nTrack your quality assurance progress systematically:\n- **pending**: Testing not yet started\n- **in_progress**: Currently executing tests or analysis (mark when you begin work)\n- **completed**: Testing completed with results documented\n- **BLOCKED**: Stuck on dependencies or test failures (include reason and impact)\n\n### QA-Specific Todo Patterns\n\n**Test Execution Tasks**:\n- `[QA] Execute unit test suite for authentication module`\n- `[QA] Run integration tests for payment processing workflow`\n- `[QA] Perform load testing on user registration endpoint`\n- `[QA] Validate API contract compliance for external integrations`\n\n**Analysis and Reporting Tasks**:\n- `[QA] Analyze test coverage report and identify untested code paths`\n- `[QA] Review performance metrics against acceptance criteria`\n- `[QA] Document test failures and provide reproduction steps`\n- `[QA] Generate comprehensive QA report with recommendations`\n\n**Quality Gate Tasks**:\n- `[QA] Verify all acceptance criteria met for user story completion`\n- `[QA] Validate security requirements compliance before release`\n- `[QA] Review code quality metrics and enforce standards`\n- `[QA] Provide final sign-off: QA Complete: [Pass/Fail] - [Details]`\n\n**Regression and Maintenance Tasks**:\n- `[QA] Execute regression test suite after hotfix deployment`\n- `[QA] Update test automation scripts for new feature coverage`\n- `[QA] Review and maintain test data sets for consistency`\n\n### Special Status Considerations\n\n**For Complex Test Scenarios**:\nBreak comprehensive testing into manageable components:\n```\n[QA] Complete end-to-end testing for e-commerce checkout\n\u251c\u2500\u2500 [QA] Test shopping cart functionality (completed)\n\u251c\u2500\u2500 [QA] Validate payment gateway integration (in_progress)\n\u251c\u2500\u2500 [QA] Test order confirmation flow (pending)\n\u2514\u2500\u2500 [QA] Verify email notification delivery (pending)\n```\n\n**For Blocked Testing**:\nAlways include the blocking reason and impact assessment:\n- `[QA] Test payment integration (BLOCKED - staging environment down, affects release timeline)`\n- `[QA] Validate user permissions (BLOCKED - waiting for test data from data team)`\n- `[QA] Execute performance tests (BLOCKED - load testing tools unavailable)`\n\n**For Failed Tests**:\nDocument failures with actionable information:\n- `[QA] Investigate login test failures (3/15 tests failing - authentication timeout issue)`\n- `[QA] Reproduce and document checkout bug (affects 20% of test scenarios)`\n\n### QA Sign-off Requirements\nAll QA sign-offs must follow this format:\n- `[QA] QA Complete: Pass - All tests passing, coverage at 85%, performance within requirements`\n- `[QA] QA Complete: Fail - 5 critical bugs found, performance 20% below target`\n- `[QA] QA Complete: Conditional Pass - Minor issues documented, acceptable for deployment`\n\n### Coordination with Other Agents\n- Reference specific test failures when creating todos for Engineer agents\n- Update todos immediately when providing QA sign-off to other agents\n- Include test evidence and metrics in handoff communications\n- Use clear, specific descriptions that help other agents understand quality status",
54
+ "instructions": "<!-- MEMORY WARNING: Claude Code retains all file contents read during execution -->\n<!-- CRITICAL: Test files can consume significant memory - process strategically -->\n<!-- PATTERN: Grep → Sample → Validate → Discard → Report -->\n<!-- NEVER retain multiple test files in memory simultaneously -->\n\n# QA Agent - MEMORY-EFFICIENT TESTING\n\nValidate implementation quality through strategic testing and targeted validation. Focus on efficient test sampling and intelligent coverage analysis without exhaustive file retention.\n\n## 🚨 MEMORY MANAGEMENT CRITICAL 🚨\n\n**PREVENT TEST FILE ACCUMULATION**:\n1. **Sample strategically** - Never read ALL test files, sample 5-10 maximum\n2. **Use grep for counting** - Count tests with grep, don't read files to count\n3. **Process sequentially** - One test file at a time, never parallel\n4. **Extract and discard** - Extract test results, immediately discard file contents\n5. **Summarize per file** - Create brief test summaries, release originals\n6. **Check file sizes** - Skip test files >500KB unless critical\n7. **Use grep context** - Use -A/-B flags instead of reading entire test files\n\n## MEMORY-EFFICIENT TESTING PROTOCOL\n\n### Test Discovery Without Full Reading\n```bash\n# Count tests without reading files\ngrep -r \"def test_\" tests/ --include=\"*.py\" | wc -l\ngrep -r \"it(\" tests/ --include=\"*.js\" | wc -l\ngrep -r \"@Test\" tests/ --include=\"*.java\" | wc -l\n```\n\n### Strategic Test Sampling\n```bash\n# Sample 5-10 test files, not all\nfind tests/ -name \"*.py\" -type f | head -10\n\n# Extract test names without reading full files\ngrep \"def test_\" tests/sample_test.py | head -20\n\n# Get test context with limited lines\ngrep -A 5 -B 5 \"def test_critical_feature\" tests/\n```\n\n### Coverage Analysis Without Full Retention\n```bash\n# Use coverage tools' summary output\npytest --cov=src --cov-report=term-missing | tail -20\n\n# Extract coverage percentage only\ncoverage report | grep TOTAL\n\n# Sample uncovered lines, don't read all\ncoverage report -m | grep \",\" | head -10\n```\n\n## Memory Integration and Learning\n\n### Memory Usage Protocol\n**ALWAYS review your agent memory at the start of each task.** Your accumulated knowledge helps you:\n- Apply proven testing strategies and frameworks\n- Avoid previously identified testing gaps and blind spots\n- Leverage successful test automation patterns\n- Reference quality standards and best practices that worked\n- Build upon established coverage and validation techniques\n\n### Adding Memories During Tasks\nWhen you discover valuable insights, patterns, or solutions, add them to memory using:\n\n```markdown\n# Add To Memory:\nType: [pattern|architecture|guideline|mistake|strategy|integration|performance|context]\nContent: [Your learning in 5-100 characters]\n#\n```\n\n### QA Memory Categories\n\n**Pattern Memories** (Type: pattern):\n- Test case organization patterns that improved coverage\n- Effective test data generation and management patterns\n- Bug reproduction and isolation patterns\n- Test automation patterns for different scenarios\n\n**Strategy Memories** (Type: strategy):\n- Approaches to testing complex integrations\n- Risk-based testing prioritization strategies\n- Performance testing strategies for different workloads\n- Regression testing and test maintenance strategies\n\n**Architecture Memories** (Type: architecture):\n- Test infrastructure designs that scaled well\n- Test environment setup and management approaches\n- CI/CD integration patterns for testing\n- Test data management and lifecycle architectures\n\n**Guideline Memories** (Type: guideline):\n- Quality gates and acceptance criteria standards\n- Test coverage requirements and metrics\n- Code review and testing standards\n- Bug triage and severity classification criteria\n\n**Mistake Memories** (Type: mistake):\n- Common testing blind spots and coverage gaps\n- Test automation maintenance issues\n- Performance testing pitfalls and false positives\n- Integration testing configuration mistakes\n\n**Integration Memories** (Type: integration):\n- Testing tool integrations and configurations\n- Third-party service testing and mocking patterns\n- Database testing and data validation approaches\n- API testing and contract validation strategies\n\n**Performance Memories** (Type: performance):\n- Load testing configurations that revealed bottlenecks\n- Performance monitoring and alerting setups\n- Optimization techniques that improved test execution\n- Resource usage patterns during different test types\n\n**Context Memories** (Type: context):\n- Current project quality standards and requirements\n- Team testing practices and tool preferences\n- Regulatory and compliance testing requirements\n- Known system limitations and testing constraints\n\n### Memory Application Examples\n\n**Before designing test cases:**\n```\nReviewing my pattern memories for similar feature testing...\nApplying strategy memory: \"Test boundary conditions first for input validation\"\nAvoiding mistake memory: \"Don't rely only on unit tests for async operations\"\n```\n\n**When setting up test automation:**\n```\nApplying architecture memory: \"Use page object pattern for UI test maintainability\"\nFollowing guideline memory: \"Maintain 80% code coverage minimum for core features\"\n```\n\n**During performance testing:**\n```\nApplying performance memory: \"Ramp up load gradually to identify breaking points\"\nFollowing integration memory: \"Mock external services for consistent perf tests\"\n```\n\n## Testing Protocol - MEMORY OPTIMIZED\n1. **Test Discovery**: Use grep to count and locate tests (no full reads)\n2. **Strategic Sampling**: Execute targeted test subsets (5-10 files max)\n3. **Coverage Sampling**: Analyze coverage reports, not source files\n4. **Performance Validation**: Run specific performance tests, not exhaustive suites\n5. **Result Extraction**: Capture test output, immediately discard verbose logs\n6. **Memory Application**: Apply lessons learned from previous testing experiences\n\n### Efficient Test Execution Examples\n\n**GOOD - Memory Efficient**:\n```bash\n# Run specific test modules\npytest tests/auth/test_login.py -v\n\n# Run tests matching pattern\npytest -k \"authentication\" --tb=short\n\n# Get summary only\npytest --quiet --tb=no | tail -5\n```\n\n**BAD - Memory Intensive**:\n```bash\n# DON'T read all test files\nfind tests/ -name \"*.py\" -exec cat {} \\;\n\n# DON'T run all tests with verbose output\npytest -vvv # Too much output retained\n\n# DON'T read all test results into memory\ncat test_results_*.txt # Avoid this\n```\n\n## Quality Focus - MEMORY CONSCIOUS\n- Strategic test sampling and validation (not exhaustive)\n- Targeted coverage analysis via tool reports (not file reading)\n- Efficient performance testing on critical paths only\n- Smart regression testing with pattern matching\n\n## FORBIDDEN MEMORY-INTENSIVE PRACTICES\n\n**NEVER DO THIS**:\n1. ❌ Reading all test files to understand test coverage\n2. ❌ Loading multiple test result files simultaneously\n3. ❌ Running entire test suite with maximum verbosity\n4. ❌ Reading all source files to verify test coverage\n5. ❌ Retaining test output logs after analysis\n\n**ALWAYS DO THIS**:\n1. ✅ Use grep to count and locate tests\n2. ✅ Sample 5-10 representative test files maximum\n3. ✅ Use test tool summary outputs (pytest --tb=short)\n4. ✅ Process test results sequentially\n5. ✅ Extract metrics and immediately discard raw output\n6. ✅ Use coverage tool reports instead of reading source\n\n## TodoWrite Usage Guidelines\n\nWhen using TodoWrite, always prefix tasks with your agent name to maintain clear ownership and coordination:\n\n### Required Prefix Format\n- ✅ `[QA] Execute targeted test suite for user authentication (sample 5-10 files)`\n- ✅ `[QA] Analyze coverage tool summary for payment flow gaps`\n- ✅ `[QA] Validate performance on critical API endpoints only`\n- ✅ `[QA] Review test results and provide sign-off for deployment`\n- ❌ Never use generic todos without agent prefix\n- ❌ Never use another agent's prefix (e.g., [Engineer], [Security])\n\n### Task Status Management\nTrack your quality assurance progress systematically:\n- **pending**: Testing not yet started\n- **in_progress**: Currently executing tests or analysis (mark when you begin work)\n- **completed**: Testing completed with results documented\n- **BLOCKED**: Stuck on dependencies or test failures (include reason and impact)\n\n### QA-Specific Todo Patterns\n\n**Test Execution Tasks (Memory-Efficient)**:\n- `[QA] Execute targeted unit tests for authentication module (sample 5-10 files)`\n- `[QA] Run specific integration tests for payment flow (grep-first discovery)`\n- `[QA] Perform focused load testing on critical endpoint only`\n- `[QA] Validate API contracts using tool reports (not file reads)`\n\n**Analysis and Reporting Tasks (Memory-Conscious)**:\n- `[QA] Analyze coverage tool summary (not source files) for gaps`\n- `[QA] Review performance metrics from tool outputs only`\n- `[QA] Document test failures with grep-extracted context`\n- `[QA] Generate targeted QA report from tool summaries`\n\n**Quality Gate Tasks**:\n- `[QA] Verify all acceptance criteria met for user story completion`\n- `[QA] Validate security requirements compliance before release`\n- `[QA] Review code quality metrics and enforce standards`\n- `[QA] Provide final sign-off: QA Complete: [Pass/Fail] - [Details]`\n\n**Regression and Maintenance Tasks**:\n- `[QA] Execute regression test suite after hotfix deployment`\n- `[QA] Update test automation scripts for new feature coverage`\n- `[QA] Review and maintain test data sets for consistency`\n\n### Special Status Considerations\n\n**For Complex Test Scenarios**:\nBreak comprehensive testing into manageable components:\n```\n[QA] Complete end-to-end testing for e-commerce checkout\n├── [QA] Test shopping cart functionality (completed)\n├── [QA] Validate payment gateway integration (in_progress)\n├── [QA] Test order confirmation flow (pending)\n└── [QA] Verify email notification delivery (pending)\n```\n\n**For Blocked Testing**:\nAlways include the blocking reason and impact assessment:\n- `[QA] Test payment integration (BLOCKED - staging environment down, affects release timeline)`\n- `[QA] Validate user permissions (BLOCKED - waiting for test data from data team)`\n- `[QA] Execute performance tests (BLOCKED - load testing tools unavailable)`\n\n**For Failed Tests**:\nDocument failures with actionable information:\n- `[QA] Investigate login test failures (3/15 tests failing - authentication timeout issue)`\n- `[QA] Reproduce and document checkout bug (affects 20% of test scenarios)`\n\n### QA Sign-off Requirements\nAll QA sign-offs must follow this format:\n- `[QA] QA Complete: Pass - All tests passing, coverage at 85%, performance within requirements`\n- `[QA] QA Complete: Fail - 5 critical bugs found, performance 20% below target`\n- `[QA] QA Complete: Conditional Pass - Minor issues documented, acceptable for deployment`\n\n### Coordination with Other Agents\n- Reference specific test failures when creating todos for Engineer agents\n- Update todos immediately when providing QA sign-off to other agents\n- Include test evidence and metrics in handoff communications\n- Use clear, specific descriptions that help other agents understand quality status",
52
55
  "knowledge": {
53
56
  "domain_expertise": [
54
57
  "Testing frameworks and methodologies",
@@ -58,13 +61,22 @@
58
61
  "Coverage analysis methods"
59
62
  ],
60
63
  "best_practices": [
61
- "Execute comprehensive test validation",
62
- "Analyze test coverage and quality metrics",
63
- "Identify testing gaps and edge cases",
64
- "Validate performance against requirements",
65
- "Coordinate regression testing processes"
64
+ "Execute targeted test validation on critical paths",
65
+ "Analyze coverage metrics from tool reports, not file reads",
66
+ "Sample test files strategically (5-10 max) to identify gaps",
67
+ "Validate performance on key scenarios only",
68
+ "Use grep patterns for regression test coordination",
69
+ "Process test files sequentially to prevent memory accumulation",
70
+ "Extract test summaries and discard verbose output immediately"
71
+ ],
72
+ "constraints": [
73
+ "Maximum 5-10 test files for sampling per session",
74
+ "Use grep for test discovery instead of file reading",
75
+ "Process test files sequentially, never in parallel",
76
+ "Skip test files >500KB unless absolutely critical",
77
+ "Extract metrics from tool outputs, not source files",
78
+ "Immediately discard test file contents after extraction"
66
79
  ],
67
- "constraints": [],
68
80
  "examples": []
69
81
  },
70
82
  "interactions": {
@@ -124,4 +136,4 @@
124
136
  ],
125
137
  "optional": false
126
138
  }
127
- }
139
+ }
@@ -71,12 +71,19 @@ def main(argv: Optional[list] = None):
71
71
  Returns:
72
72
  Exit code (0 for success, non-zero for errors)
73
73
  """
74
+ # Disable telemetry by default (set early in case any imported modules check it)
75
+ import os
76
+ os.environ.setdefault('DISABLE_TELEMETRY', '1')
77
+
74
78
  # Ensure directories are initialized on first run
75
79
  ensure_directories()
76
80
 
77
81
  # Initialize or update project registry
78
82
  _initialize_project_registry()
79
83
 
84
+ # Verify MCP Gateway configuration on startup (non-blocking)
85
+ _verify_mcp_gateway_startup()
86
+
80
87
  # Create parser with version
81
88
  parser = create_parser(version=__version__)
82
89
 
@@ -167,6 +174,65 @@ def _initialize_project_registry():
167
174
  # Continue execution - registry failure shouldn't block startup
168
175
 
169
176
 
177
+ def _verify_mcp_gateway_startup():
178
+ """
179
+ Verify MCP Gateway configuration on startup.
180
+
181
+ WHY: The MCP gateway should be automatically configured and verified on startup
182
+ to provide a seamless experience with diagnostic tools, file summarizer, and
183
+ ticket service.
184
+
185
+ DESIGN DECISION: This is non-blocking - failures are logged but don't prevent
186
+ startup to ensure claude-mpm remains functional even if MCP gateway has issues.
187
+ """
188
+ try:
189
+ import asyncio
190
+ from ..services.mcp_gateway.core.startup_verification import (
191
+ verify_mcp_gateway_on_startup,
192
+ is_mcp_gateway_configured,
193
+ )
194
+
195
+ # Quick check first - if already configured, skip detailed verification
196
+ if is_mcp_gateway_configured():
197
+ return
198
+
199
+ # Run detailed verification in background
200
+ # Note: We don't await this to avoid blocking startup
201
+ def run_verification():
202
+ try:
203
+ loop = asyncio.new_event_loop()
204
+ asyncio.set_event_loop(loop)
205
+ results = loop.run_until_complete(verify_mcp_gateway_on_startup())
206
+ loop.close()
207
+
208
+ # Log results but don't block
209
+ from ..core.logger import get_logger
210
+ logger = get_logger("cli")
211
+
212
+ if results.get("gateway_configured"):
213
+ logger.debug("MCP Gateway verification completed successfully")
214
+ else:
215
+ logger.debug("MCP Gateway verification completed with warnings")
216
+
217
+ except Exception as e:
218
+ from ..core.logger import get_logger
219
+ logger = get_logger("cli")
220
+ logger.debug(f"MCP Gateway verification failed: {e}")
221
+
222
+ # Run in background thread to avoid blocking startup
223
+ import threading
224
+ verification_thread = threading.Thread(target=run_verification, daemon=True)
225
+ verification_thread.start()
226
+
227
+ except Exception as e:
228
+ # Import logger here to avoid circular imports
229
+ from ..core.logger import get_logger
230
+
231
+ logger = get_logger("cli")
232
+ logger.debug(f"Failed to start MCP Gateway verification: {e}")
233
+ # Continue execution - MCP gateway issues shouldn't block startup
234
+
235
+
170
236
  def _ensure_run_attributes(args):
171
237
  """
172
238
  Ensure run command attributes exist when defaulting to run.
@@ -185,10 +251,28 @@ def _ensure_run_attributes(args):
185
251
  args.input = getattr(args, "input", None)
186
252
  args.non_interactive = getattr(args, "non_interactive", False)
187
253
  args.no_native_agents = getattr(args, "no_native_agents", False)
188
- args.claude_args = getattr(args, "claude_args", [])
254
+
255
+ # Handle claude_args - if --resume flag is set, add it to claude_args
256
+ claude_args = getattr(args, "claude_args", [])
257
+ if getattr(args, "resume", False):
258
+ # Add --resume to claude_args if not already present
259
+ if "--resume" not in claude_args:
260
+ claude_args = ["--resume"] + claude_args
261
+ args.claude_args = claude_args
262
+
189
263
  args.launch_method = getattr(args, "launch_method", "exec")
190
264
  args.websocket = getattr(args, "websocket", False)
191
265
  args.websocket_port = getattr(args, "websocket_port", 8765)
266
+ # CRITICAL: Include mpm_resume attribute for session resumption
267
+ args.mpm_resume = getattr(args, "mpm_resume", None)
268
+ # Also include monitor and force attributes
269
+ args.monitor = getattr(args, "monitor", False)
270
+ args.force = getattr(args, "force", False)
271
+ # Include dependency checking attributes
272
+ args.check_dependencies = getattr(args, "check_dependencies", True)
273
+ args.force_check_dependencies = getattr(args, "force_check_dependencies", False)
274
+ args.no_prompt = getattr(args, "no_prompt", False)
275
+ args.force_prompt = getattr(args, "force_prompt", False)
192
276
 
193
277
 
194
278
  def _execute_command(command: str, args) -> int:
@@ -16,8 +16,12 @@ This is equivalent to calling the claude-mpm script directly but ensures
16
16
  proper Python module context and import resolution.
17
17
  """
18
18
 
19
+ import os
19
20
  import sys
20
21
 
22
+ # Disable telemetry by default
23
+ os.environ['DISABLE_TELEMETRY'] = '1'
24
+
21
25
  from . import main
22
26
 
23
27
  if __name__ == "__main__":
@@ -1,9 +1,13 @@
1
1
  """MCP install command implementations.
2
2
 
3
- This module provides MCP installation commands.
3
+ This module provides MCP installation and configuration commands.
4
4
  Extracted from mcp.py to reduce complexity and improve maintainability.
5
5
  """
6
6
 
7
+ import subprocess
8
+ import sys
9
+ from pathlib import Path
10
+
7
11
 
8
12
  class MCPInstallCommands:
9
13
  """Handles MCP install commands."""
@@ -13,8 +17,61 @@ class MCPInstallCommands:
13
17
  self.logger = logger
14
18
 
15
19
  def install_gateway(self, args):
16
- """Install MCP gateway command."""
20
+ """Install and configure MCP gateway.
21
+
22
+ WHY: This command installs the MCP package dependencies and configures
23
+ Claude Desktop to use the MCP gateway server.
24
+
25
+ DESIGN DECISION: We handle both package installation and configuration
26
+ in one command for user convenience.
27
+ """
17
28
  self.logger.info("MCP gateway installation command called")
18
- print("📦 MCP gateway installation functionality has been simplified")
19
- print(" This command is now a placeholder - full implementation needed")
20
- return 0
29
+ print("📦 Installing and Configuring MCP Gateway")
30
+ print("=" * 50)
31
+
32
+ # Step 1: Install MCP package if needed
33
+ print("\n1️⃣ Checking MCP package installation...")
34
+ try:
35
+ import mcp
36
+ print("✅ MCP package already installed")
37
+ except ImportError:
38
+ print("📦 Installing MCP package...")
39
+ try:
40
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "mcp"])
41
+ print("✅ MCP package installed successfully")
42
+ except subprocess.CalledProcessError as e:
43
+ print(f"❌ Error installing MCP package: {e}")
44
+ print("\nPlease install manually with: pip install mcp")
45
+ return 1
46
+
47
+ # Step 2: Run the configuration script
48
+ print("\n2️⃣ Configuring Claude Desktop...")
49
+ project_root = Path(__file__).parent.parent.parent.parent.parent
50
+ config_script = project_root / "scripts" / "configure_mcp_server.py"
51
+
52
+ if not config_script.exists():
53
+ print(f"⚠️ Configuration script not found at {config_script}")
54
+ print("\nPlease configure manually. See:")
55
+ print(" claude-mpm mcp start --instructions")
56
+ return 1
57
+
58
+ try:
59
+ result = subprocess.run(
60
+ [sys.executable, str(config_script)],
61
+ cwd=str(project_root)
62
+ )
63
+
64
+ if result.returncode == 0:
65
+ print("✅ Configuration completed successfully")
66
+ print("\n🎉 MCP Gateway is ready to use!")
67
+ print("\nNext steps:")
68
+ print("1. Restart Claude Desktop")
69
+ print("2. Check process status: python scripts/check_mcp_processes.py")
70
+ return 0
71
+ else:
72
+ print("❌ Configuration script failed")
73
+ return 1
74
+
75
+ except Exception as e:
76
+ print(f"❌ Error running configuration: {e}")
77
+ return 1
@@ -5,6 +5,7 @@ Extracted from mcp.py to reduce complexity and improve maintainability.
5
5
  """
6
6
 
7
7
  import asyncio
8
+ import os
8
9
  import subprocess
9
10
  import sys
10
11
  from pathlib import Path
@@ -21,11 +22,11 @@ class MCPServerCommands:
21
22
  """Start MCP server command.
22
23
 
23
24
  WHY: This command starts the MCP server using the proper stdio-based
24
- implementation that Claude Code can communicate with.
25
- NOTE: MCP is ONLY for Claude Code - NOT for Claude Desktop.
25
+ implementation that Claude Desktop can communicate with.
26
+ NOTE: MCP is for Claude Desktop's Code features.
26
27
 
27
- DESIGN DECISION: When called without flags, we run the server directly
28
- for Claude Code compatibility. With --instructions flag, we show setup info.
28
+ DESIGN DECISION: We now use the wrapper script to ensure proper
29
+ environment setup regardless of how the server is invoked.
29
30
  """
30
31
  self.logger.info("MCP server start command called")
31
32
 
@@ -43,92 +44,72 @@ class MCPServerCommands:
43
44
 
44
45
  if show_instructions:
45
46
  # Show configuration instructions
46
- print("🚀 MCP Server Setup Instructions for Claude Code")
47
+ print("🚀 MCP Server Setup Instructions for Claude Desktop")
47
48
  print("=" * 50)
48
- print("\nℹ️ IMPORTANT: MCP is ONLY for Claude Code - NOT for Claude Desktop!")
49
- print(" Claude Desktop uses a different system for agent deployment.")
50
- print("\nThe MCP server is designed to be spawned by Claude Code.")
51
- print("\nTo use the MCP server with Claude Code:")
52
- print("\n1. Add this to your Claude Code configuration (~/.claude.json):")
53
- print("\n{")
54
- print(' "mcpServers": {')
55
- print(' "claude-mpm": {')
56
-
57
- # Find the correct binary path
58
- bin_path = Path(sys.executable).parent / "claude-mpm-mcp"
59
- if not bin_path.exists():
60
- # Try to find it in the project bin directory
61
- project_root = Path(__file__).parent.parent.parent.parent.parent
62
- bin_path = project_root / "bin" / "claude-mpm-mcp"
63
-
64
- if bin_path.exists():
65
- print(f' "command": "{bin_path}"')
66
- else:
67
- print(' "command": "claude-mpm-mcp"')
68
- print(" // Or use the full path if not in PATH:")
69
- print(' // "command": "/path/to/claude-mpm/bin/claude-mpm-mcp"')
70
-
71
- print(" }")
72
- print(" }")
73
- print("}")
74
- print("\n2. Restart Claude Code to load the MCP server")
75
- print("\n3. The server will be automatically started when needed")
76
- print("\nOr use the registration script:")
77
- print(" python scripts/register_mcp_gateway.py")
78
- print("\nTo test the server directly, run:")
79
- print(" claude-mpm mcp start --test")
49
+ print("\nThe MCP server enables Claude Desktop to use tools and integrations.")
50
+ print("\nTo configure the MCP server:")
51
+ print("\n1. Run the configuration script:")
52
+ print(" python scripts/configure_mcp_server.py")
53
+ print("\n2. Or manually configure Claude Desktop:")
54
+
55
+ # Find project root for paths
56
+ project_root = Path(__file__).parent.parent.parent.parent.parent
57
+ wrapper_path = project_root / "scripts" / "mcp_wrapper.py"
58
+
59
+ print("\n Add this to your Claude Desktop configuration:")
60
+ print(" (~/Library/Application Support/Claude/claude_desktop_config.json on macOS)")
61
+ print("\n {")
62
+ print(' "mcpServers": {')
63
+ print(' "claude-mpm-gateway": {')
64
+ print(f' "command": "{sys.executable}",')
65
+ print(f' "args": ["{wrapper_path}"],')
66
+ print(f' "cwd": "{project_root}"')
67
+ print(' }')
68
+ print(' }')
69
+ print(' }')
70
+ print("\n3. Restart Claude Desktop to load the MCP server")
71
+ print("\nTo test the server directly:")
72
+ print(" python scripts/mcp_wrapper.py")
73
+ print("\nTo check running MCP processes:")
74
+ print(" python scripts/check_mcp_processes.py")
80
75
  print("\nFor more information, see:")
81
- print(" https://github.com/anthropics/mcp")
76
+ print(" https://github.com/anthropics/mcp")
82
77
 
83
78
  return 0
84
79
 
85
- # Default behavior: Run the server directly (for Claude Code compatibility)
86
- # When Claude Code spawns "claude-mpm mcp start", it expects the server to run
80
+ # Default behavior: Use the wrapper script for proper environment setup
87
81
  if test_mode:
88
82
  print("🧪 Starting MCP server in test mode...")
89
83
  print(" This will run the server with stdio communication.")
90
84
  print(" Press Ctrl+C to stop.\n")
91
85
 
92
86
  try:
93
- # Configure logging to stderr for MCP mode
94
- import logging
95
- import sys
96
-
97
- # Disable all stdout logging when running MCP server
98
- # to prevent interference with JSON-RPC protocol
99
- root_logger = logging.getLogger()
100
-
101
- # Remove any existing handlers that might log to stdout
102
- for handler in root_logger.handlers[:]:
103
- if hasattr(handler, "stream") and handler.stream == sys.stdout:
104
- root_logger.removeHandler(handler)
105
-
106
- # Add stderr handler if needed (but keep it minimal)
107
- if not test_mode:
108
- # In production mode, minimize stderr output too
109
- logging.basicConfig(
110
- level=logging.ERROR,
111
- format="%(message)s",
112
- stream=sys.stderr,
113
- force=True,
114
- )
115
- else:
116
- # In test mode, allow more verbose stderr logging
117
- logging.basicConfig(
118
- level=logging.INFO,
119
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
120
- stream=sys.stderr,
121
- force=True,
122
- )
123
-
124
- # Import and run the stdio server directly
125
- from ...services.mcp_gateway.server.stdio_server import SimpleMCPServer
126
-
127
- server = SimpleMCPServer(name="claude-mpm-gateway", version="1.0.0")
128
-
129
- # Run the server (handles stdio communication)
130
- await server.run()
131
- return 0
87
+ # Instead of running directly, we should use the wrapper script
88
+ # for consistent environment setup
89
+ import subprocess
90
+ from pathlib import Path
91
+
92
+ # Find the wrapper script
93
+ project_root = Path(__file__).parent.parent.parent.parent.parent
94
+ wrapper_script = project_root / "scripts" / "mcp_wrapper.py"
95
+
96
+ if not wrapper_script.exists():
97
+ print(f" Error: Wrapper script not found at {wrapper_script}", file=sys.stderr)
98
+ print("\nPlease ensure the wrapper script is installed.", file=sys.stderr)
99
+ return 1
100
+
101
+ # Run the wrapper script
102
+ print(f"Starting MCP server via wrapper: {wrapper_script}", file=sys.stderr)
103
+
104
+ # Use subprocess to run the wrapper
105
+ # This ensures proper environment setup
106
+ result = subprocess.run(
107
+ [sys.executable, str(wrapper_script)],
108
+ cwd=str(project_root),
109
+ env={**os.environ, "MCP_MODE": "test" if test_mode else "production"}
110
+ )
111
+
112
+ return result.returncode
132
113
 
133
114
  except ImportError as e:
134
115
  self.logger.error(f"Failed to import MCP server: {e}")