massgen 0.0.3__py3-none-any.whl → 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of massgen might be problematic. Click here for more details.
- massgen/__init__.py +142 -8
- massgen/adapters/__init__.py +29 -0
- massgen/adapters/ag2_adapter.py +483 -0
- massgen/adapters/base.py +183 -0
- massgen/adapters/tests/__init__.py +0 -0
- massgen/adapters/tests/test_ag2_adapter.py +439 -0
- massgen/adapters/tests/test_agent_adapter.py +128 -0
- massgen/adapters/utils/__init__.py +2 -0
- massgen/adapters/utils/ag2_utils.py +236 -0
- massgen/adapters/utils/tests/__init__.py +0 -0
- massgen/adapters/utils/tests/test_ag2_utils.py +138 -0
- massgen/agent_config.py +329 -55
- massgen/api_params_handler/__init__.py +10 -0
- massgen/api_params_handler/_api_params_handler_base.py +99 -0
- massgen/api_params_handler/_chat_completions_api_params_handler.py +176 -0
- massgen/api_params_handler/_claude_api_params_handler.py +113 -0
- massgen/api_params_handler/_response_api_params_handler.py +130 -0
- massgen/backend/__init__.py +39 -4
- massgen/backend/azure_openai.py +385 -0
- massgen/backend/base.py +341 -69
- massgen/backend/base_with_mcp.py +1102 -0
- massgen/backend/capabilities.py +386 -0
- massgen/backend/chat_completions.py +577 -130
- massgen/backend/claude.py +1033 -537
- massgen/backend/claude_code.py +1203 -0
- massgen/backend/cli_base.py +209 -0
- massgen/backend/docs/BACKEND_ARCHITECTURE.md +126 -0
- massgen/backend/{CLAUDE_API_RESEARCH.md → docs/CLAUDE_API_RESEARCH.md} +18 -18
- massgen/backend/{GEMINI_API_DOCUMENTATION.md → docs/GEMINI_API_DOCUMENTATION.md} +9 -9
- massgen/backend/docs/Gemini MCP Integration Analysis.md +1050 -0
- massgen/backend/docs/MCP_IMPLEMENTATION_CLAUDE_BACKEND.md +177 -0
- massgen/backend/docs/MCP_INTEGRATION_RESPONSE_BACKEND.md +352 -0
- massgen/backend/docs/OPENAI_GPT5_MODELS.md +211 -0
- massgen/backend/{OPENAI_RESPONSES_API_FORMAT.md → docs/OPENAI_RESPONSE_API_TOOL_CALLS.md} +3 -3
- massgen/backend/docs/OPENAI_response_streaming.md +20654 -0
- massgen/backend/docs/inference_backend.md +257 -0
- massgen/backend/docs/permissions_and_context_files.md +1085 -0
- massgen/backend/external.py +126 -0
- massgen/backend/gemini.py +1850 -241
- massgen/backend/grok.py +40 -156
- massgen/backend/inference.py +156 -0
- massgen/backend/lmstudio.py +171 -0
- massgen/backend/response.py +1095 -322
- massgen/chat_agent.py +131 -113
- massgen/cli.py +1560 -275
- massgen/config_builder.py +2396 -0
- massgen/configs/BACKEND_CONFIGURATION.md +458 -0
- massgen/configs/README.md +559 -216
- massgen/configs/ag2/ag2_case_study.yaml +27 -0
- massgen/configs/ag2/ag2_coder.yaml +34 -0
- massgen/configs/ag2/ag2_coder_case_study.yaml +36 -0
- massgen/configs/ag2/ag2_gemini.yaml +27 -0
- massgen/configs/ag2/ag2_groupchat.yaml +108 -0
- massgen/configs/ag2/ag2_groupchat_gpt.yaml +118 -0
- massgen/configs/ag2/ag2_single_agent.yaml +21 -0
- massgen/configs/basic/multi/fast_timeout_example.yaml +37 -0
- massgen/configs/basic/multi/gemini_4o_claude.yaml +31 -0
- massgen/configs/basic/multi/gemini_gpt5nano_claude.yaml +36 -0
- massgen/configs/{gemini_4o_claude.yaml → basic/multi/geminicode_4o_claude.yaml} +3 -3
- massgen/configs/basic/multi/geminicode_gpt5nano_claude.yaml +36 -0
- massgen/configs/basic/multi/glm_gemini_claude.yaml +25 -0
- massgen/configs/basic/multi/gpt4o_audio_generation.yaml +30 -0
- massgen/configs/basic/multi/gpt4o_image_generation.yaml +31 -0
- massgen/configs/basic/multi/gpt5nano_glm_qwen.yaml +26 -0
- massgen/configs/basic/multi/gpt5nano_image_understanding.yaml +26 -0
- massgen/configs/{three_agents_default.yaml → basic/multi/three_agents_default.yaml} +8 -4
- massgen/configs/basic/multi/three_agents_opensource.yaml +27 -0
- massgen/configs/basic/multi/three_agents_vllm.yaml +20 -0
- massgen/configs/basic/multi/two_agents_gemini.yaml +19 -0
- massgen/configs/{two_agents.yaml → basic/multi/two_agents_gpt5.yaml} +14 -6
- massgen/configs/basic/multi/two_agents_opensource_lmstudio.yaml +31 -0
- massgen/configs/basic/multi/two_qwen_vllm_sglang.yaml +28 -0
- massgen/configs/{single_agent.yaml → basic/single/single_agent.yaml} +1 -1
- massgen/configs/{single_flash2.5.yaml → basic/single/single_flash2.5.yaml} +1 -2
- massgen/configs/basic/single/single_gemini2.5pro.yaml +16 -0
- massgen/configs/basic/single/single_gpt4o_audio_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_image_generation.yaml +22 -0
- massgen/configs/basic/single/single_gpt4o_video_generation.yaml +24 -0
- massgen/configs/basic/single/single_gpt5nano.yaml +20 -0
- massgen/configs/basic/single/single_gpt5nano_file_search.yaml +18 -0
- massgen/configs/basic/single/single_gpt5nano_image_understanding.yaml +17 -0
- massgen/configs/basic/single/single_gptoss120b.yaml +15 -0
- massgen/configs/basic/single/single_openrouter_audio_understanding.yaml +15 -0
- massgen/configs/basic/single/single_qwen_video_understanding.yaml +15 -0
- massgen/configs/debug/code_execution/command_filtering_blacklist.yaml +29 -0
- massgen/configs/debug/code_execution/command_filtering_whitelist.yaml +28 -0
- massgen/configs/debug/code_execution/docker_verification.yaml +29 -0
- massgen/configs/debug/skip_coordination_test.yaml +27 -0
- massgen/configs/debug/test_sdk_migration.yaml +17 -0
- massgen/configs/docs/DISCORD_MCP_SETUP.md +208 -0
- massgen/configs/docs/TWITTER_MCP_ENESCINAR_SETUP.md +82 -0
- massgen/configs/providers/azure/azure_openai_multi.yaml +21 -0
- massgen/configs/providers/azure/azure_openai_single.yaml +19 -0
- massgen/configs/providers/claude/claude.yaml +14 -0
- massgen/configs/providers/gemini/gemini_gpt5nano.yaml +28 -0
- massgen/configs/providers/local/lmstudio.yaml +11 -0
- massgen/configs/providers/openai/gpt5.yaml +46 -0
- massgen/configs/providers/openai/gpt5_nano.yaml +46 -0
- massgen/configs/providers/others/grok_single_agent.yaml +19 -0
- massgen/configs/providers/others/zai_coding_team.yaml +108 -0
- massgen/configs/providers/others/zai_glm45.yaml +12 -0
- massgen/configs/{creative_team.yaml → teams/creative/creative_team.yaml} +16 -6
- massgen/configs/{travel_planning.yaml → teams/creative/travel_planning.yaml} +16 -6
- massgen/configs/{news_analysis.yaml → teams/research/news_analysis.yaml} +16 -6
- massgen/configs/{research_team.yaml → teams/research/research_team.yaml} +15 -7
- massgen/configs/{technical_analysis.yaml → teams/research/technical_analysis.yaml} +16 -6
- massgen/configs/tools/code-execution/basic_command_execution.yaml +25 -0
- massgen/configs/tools/code-execution/code_execution_use_case_simple.yaml +41 -0
- massgen/configs/tools/code-execution/docker_claude_code.yaml +32 -0
- massgen/configs/tools/code-execution/docker_multi_agent.yaml +32 -0
- massgen/configs/tools/code-execution/docker_simple.yaml +29 -0
- massgen/configs/tools/code-execution/docker_with_resource_limits.yaml +32 -0
- massgen/configs/tools/code-execution/multi_agent_playwright_automation.yaml +57 -0
- massgen/configs/tools/filesystem/cc_gpt5_gemini_filesystem.yaml +34 -0
- massgen/configs/tools/filesystem/claude_code_context_sharing.yaml +68 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5.yaml +43 -0
- massgen/configs/tools/filesystem/claude_code_flash2.5_gptoss.yaml +49 -0
- massgen/configs/tools/filesystem/claude_code_gpt5nano.yaml +31 -0
- massgen/configs/tools/filesystem/claude_code_single.yaml +40 -0
- massgen/configs/tools/filesystem/fs_permissions_test.yaml +87 -0
- massgen/configs/tools/filesystem/gemini_gemini_workspace_cleanup.yaml +54 -0
- massgen/configs/tools/filesystem/gemini_gpt5_filesystem_casestudy.yaml +30 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_file_context_path.yaml +43 -0
- massgen/configs/tools/filesystem/gemini_gpt5nano_protected_paths.yaml +45 -0
- massgen/configs/tools/filesystem/gpt5mini_cc_fs_context_path.yaml +31 -0
- massgen/configs/tools/filesystem/grok4_gpt5_gemini_filesystem.yaml +32 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_claude_code_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/grok4_gpt5_gemini_filesystem_multiturn.yaml +58 -0
- massgen/configs/tools/filesystem/multiturn/two_claude_code_filesystem_multiturn.yaml +47 -0
- massgen/configs/tools/filesystem/multiturn/two_gemini_flash_filesystem_multiturn.yaml +48 -0
- massgen/configs/tools/mcp/claude_code_discord_mcp_example.yaml +27 -0
- massgen/configs/tools/mcp/claude_code_simple_mcp.yaml +35 -0
- massgen/configs/tools/mcp/claude_code_twitter_mcp_example.yaml +32 -0
- massgen/configs/tools/mcp/claude_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/claude_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/five_agents_travel_mcp_test.yaml +157 -0
- massgen/configs/tools/mcp/five_agents_weather_mcp_test.yaml +103 -0
- massgen/configs/tools/mcp/gemini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_sharing.yaml +23 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_single_agent.yaml +17 -0
- massgen/configs/tools/mcp/gemini_mcp_filesystem_test_with_claude_code.yaml +24 -0
- massgen/configs/tools/mcp/gemini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gemini_notion_mcp.yaml +52 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/gpt5_nano_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/gpt5mini_claude_code_discord_mcp_example.yaml +38 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/gpt_oss_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/grok3_mini_mcp_test.yaml +27 -0
- massgen/configs/tools/mcp/multimcp_gemini.yaml +111 -0
- massgen/configs/tools/mcp/qwen_api_mcp_example.yaml +25 -0
- massgen/configs/tools/mcp/qwen_api_mcp_test.yaml +28 -0
- massgen/configs/tools/mcp/qwen_local_mcp_example.yaml +24 -0
- massgen/configs/tools/mcp/qwen_local_mcp_test.yaml +27 -0
- massgen/configs/tools/planning/five_agents_discord_mcp_planning_mode.yaml +140 -0
- massgen/configs/tools/planning/five_agents_filesystem_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_notion_mcp_planning_mode.yaml +151 -0
- massgen/configs/tools/planning/five_agents_twitter_mcp_planning_mode.yaml +155 -0
- massgen/configs/tools/planning/gpt5_mini_case_study_mcp_planning_mode.yaml +73 -0
- massgen/configs/tools/web-search/claude_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gemini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt5_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/gpt_oss_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/grok3_mini_streamable_http_test.yaml +43 -0
- massgen/configs/tools/web-search/qwen_api_streamable_http_test.yaml +44 -0
- massgen/configs/tools/web-search/qwen_local_streamable_http_test.yaml +43 -0
- massgen/coordination_tracker.py +708 -0
- massgen/docker/README.md +462 -0
- massgen/filesystem_manager/__init__.py +21 -0
- massgen/filesystem_manager/_base.py +9 -0
- massgen/filesystem_manager/_code_execution_server.py +545 -0
- massgen/filesystem_manager/_docker_manager.py +477 -0
- massgen/filesystem_manager/_file_operation_tracker.py +248 -0
- massgen/filesystem_manager/_filesystem_manager.py +813 -0
- massgen/filesystem_manager/_path_permission_manager.py +1261 -0
- massgen/filesystem_manager/_workspace_tools_server.py +1815 -0
- massgen/formatter/__init__.py +10 -0
- massgen/formatter/_chat_completions_formatter.py +284 -0
- massgen/formatter/_claude_formatter.py +235 -0
- massgen/formatter/_formatter_base.py +156 -0
- massgen/formatter/_response_formatter.py +263 -0
- massgen/frontend/__init__.py +1 -2
- massgen/frontend/coordination_ui.py +471 -286
- massgen/frontend/displays/base_display.py +56 -11
- massgen/frontend/displays/create_coordination_table.py +1956 -0
- massgen/frontend/displays/rich_terminal_display.py +1259 -619
- massgen/frontend/displays/simple_display.py +9 -4
- massgen/frontend/displays/terminal_display.py +27 -68
- massgen/logger_config.py +681 -0
- massgen/mcp_tools/README.md +232 -0
- massgen/mcp_tools/__init__.py +105 -0
- massgen/mcp_tools/backend_utils.py +1035 -0
- massgen/mcp_tools/circuit_breaker.py +195 -0
- massgen/mcp_tools/client.py +894 -0
- massgen/mcp_tools/config_validator.py +138 -0
- massgen/mcp_tools/docs/circuit_breaker.md +646 -0
- massgen/mcp_tools/docs/client.md +950 -0
- massgen/mcp_tools/docs/config_validator.md +478 -0
- massgen/mcp_tools/docs/exceptions.md +1165 -0
- massgen/mcp_tools/docs/security.md +854 -0
- massgen/mcp_tools/exceptions.py +338 -0
- massgen/mcp_tools/hooks.py +212 -0
- massgen/mcp_tools/security.py +780 -0
- massgen/message_templates.py +342 -64
- massgen/orchestrator.py +1515 -241
- massgen/stream_chunk/__init__.py +35 -0
- massgen/stream_chunk/base.py +92 -0
- massgen/stream_chunk/multimodal.py +237 -0
- massgen/stream_chunk/text.py +162 -0
- massgen/tests/mcp_test_server.py +150 -0
- massgen/tests/multi_turn_conversation_design.md +0 -8
- massgen/tests/test_azure_openai_backend.py +156 -0
- massgen/tests/test_backend_capabilities.py +262 -0
- massgen/tests/test_backend_event_loop_all.py +179 -0
- massgen/tests/test_chat_completions_refactor.py +142 -0
- massgen/tests/test_claude_backend.py +15 -28
- massgen/tests/test_claude_code.py +268 -0
- massgen/tests/test_claude_code_context_sharing.py +233 -0
- massgen/tests/test_claude_code_orchestrator.py +175 -0
- massgen/tests/test_cli_backends.py +180 -0
- massgen/tests/test_code_execution.py +679 -0
- massgen/tests/test_external_agent_backend.py +134 -0
- massgen/tests/test_final_presentation_fallback.py +237 -0
- massgen/tests/test_gemini_planning_mode.py +351 -0
- massgen/tests/test_grok_backend.py +7 -10
- massgen/tests/test_http_mcp_server.py +42 -0
- massgen/tests/test_integration_simple.py +198 -0
- massgen/tests/test_mcp_blocking.py +125 -0
- massgen/tests/test_message_context_building.py +29 -47
- massgen/tests/test_orchestrator_final_presentation.py +48 -0
- massgen/tests/test_path_permission_manager.py +2087 -0
- massgen/tests/test_rich_terminal_display.py +14 -13
- massgen/tests/test_timeout.py +133 -0
- massgen/tests/test_v3_3agents.py +11 -12
- massgen/tests/test_v3_simple.py +8 -13
- massgen/tests/test_v3_three_agents.py +11 -18
- massgen/tests/test_v3_two_agents.py +8 -13
- massgen/token_manager/__init__.py +7 -0
- massgen/token_manager/token_manager.py +400 -0
- massgen/utils.py +52 -16
- massgen/v1/agent.py +45 -91
- massgen/v1/agents.py +18 -53
- massgen/v1/backends/gemini.py +50 -153
- massgen/v1/backends/grok.py +21 -54
- massgen/v1/backends/oai.py +39 -111
- massgen/v1/cli.py +36 -93
- massgen/v1/config.py +8 -12
- massgen/v1/logging.py +43 -127
- massgen/v1/main.py +18 -32
- massgen/v1/orchestrator.py +68 -209
- massgen/v1/streaming_display.py +62 -163
- massgen/v1/tools.py +8 -12
- massgen/v1/types.py +9 -23
- massgen/v1/utils.py +5 -23
- massgen-0.1.0.dist-info/METADATA +1245 -0
- massgen-0.1.0.dist-info/RECORD +273 -0
- massgen-0.1.0.dist-info/entry_points.txt +2 -0
- massgen/frontend/logging/__init__.py +0 -9
- massgen/frontend/logging/realtime_logger.py +0 -197
- massgen-0.0.3.dist-info/METADATA +0 -568
- massgen-0.0.3.dist-info/RECORD +0 -76
- massgen-0.0.3.dist-info/entry_points.txt +0 -2
- /massgen/backend/{Function calling openai responses.md → docs/Function calling openai responses.md} +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/WHEEL +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/licenses/LICENSE +0 -0
- {massgen-0.0.3.dist-info → massgen-0.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,646 @@
|
|
|
1
|
+
# MCP Circuit Breaker Documentation
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The `MCPCircuitBreaker` is a resilience pattern implementation designed to handle server failures gracefully and prevent cascading failures in MCP (Model Context Protocol) integrations. It provides automatic failure detection, exponential backoff, and recovery mechanisms to maintain system stability when MCP servers become unreliable or unavailable.
|
|
6
|
+
|
|
7
|
+
### Key Features
|
|
8
|
+
|
|
9
|
+
- **Failure Tracking**: Monitors server health and tracks failure counts
|
|
10
|
+
- **Exponential Backoff**: Implements configurable backoff strategies to avoid overwhelming failing servers
|
|
11
|
+
- **Automatic Recovery**: Allows servers to recover naturally after backoff periods
|
|
12
|
+
- **Circuit Opening**: Temporarily bypasses failing servers to prevent cascading failures
|
|
13
|
+
- **Monitoring**: Provides detailed status information for all tracked servers
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
The circuit breaker consists of three main components:
|
|
18
|
+
|
|
19
|
+
1. **CircuitBreakerConfig**: Configuration class defining behavior parameters
|
|
20
|
+
2. **ServerStatus**: Individual server state tracking
|
|
21
|
+
3. **MCPCircuitBreaker**: Main circuit breaker implementation
|
|
22
|
+
|
|
23
|
+
## Configuration
|
|
24
|
+
|
|
25
|
+
### CircuitBreakerConfig
|
|
26
|
+
|
|
27
|
+
The `CircuitBreakerConfig` class defines the behavior of the circuit breaker:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from massgen.mcp_tools.circuit_breaker import CircuitBreakerConfig
|
|
31
|
+
|
|
32
|
+
# Default configuration
|
|
33
|
+
config = CircuitBreakerConfig()
|
|
34
|
+
print(f"Max failures: {config.max_failures}") # 3
|
|
35
|
+
print(f"Reset time: {config.reset_time_seconds}") # 300 seconds (5 minutes)
|
|
36
|
+
print(f"Backoff multiplier: {config.backoff_multiplier}") # 2
|
|
37
|
+
print(f"Max backoff: {config.max_backoff_multiplier}") # 8
|
|
38
|
+
|
|
39
|
+
# Custom configuration for high-traffic scenarios
|
|
40
|
+
high_traffic_config = CircuitBreakerConfig(
|
|
41
|
+
max_failures=5, # Allow more failures before circuit opens
|
|
42
|
+
reset_time_seconds=60, # Shorter base reset time
|
|
43
|
+
backoff_multiplier=1.5, # Gentler backoff progression
|
|
44
|
+
max_backoff_multiplier=4 # Lower maximum backoff
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# Custom configuration for critical services
|
|
48
|
+
critical_config = CircuitBreakerConfig(
|
|
49
|
+
max_failures=1, # Fail fast for critical services
|
|
50
|
+
reset_time_seconds=600, # Longer base reset time (10 minutes)
|
|
51
|
+
backoff_multiplier=3, # Aggressive backoff
|
|
52
|
+
max_backoff_multiplier=16 # Higher maximum backoff
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
#### Configuration Parameters
|
|
57
|
+
|
|
58
|
+
| Parameter | Type | Default | Description |
|
|
59
|
+
|-----------|------|---------|-------------|
|
|
60
|
+
| `max_failures` | int | 3 | Number of failures before circuit opens |
|
|
61
|
+
| `reset_time_seconds` | int | 300 | Base reset time in seconds (5 minutes) |
|
|
62
|
+
| `backoff_multiplier` | int | 2 | Exponential backoff multiplier |
|
|
63
|
+
| `max_backoff_multiplier` | int | 8 | Maximum backoff multiplier cap |
|
|
64
|
+
|
|
65
|
+
### ServerStatus
|
|
66
|
+
|
|
67
|
+
The `ServerStatus` dataclass tracks the state of individual servers:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
from massgen.mcp_tools.circuit_breaker import ServerStatus
|
|
71
|
+
|
|
72
|
+
# Server status attributes
|
|
73
|
+
status = ServerStatus(
|
|
74
|
+
failure_count=2,
|
|
75
|
+
last_failure_time=1234567890.0
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
print(f"Is failing: {status.is_failing}") # True (failure_count > 0)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## API Reference
|
|
82
|
+
|
|
83
|
+
### MCPCircuitBreaker
|
|
84
|
+
|
|
85
|
+
#### Constructor
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
from massgen.mcp_tools.circuit_breaker import MCPCircuitBreaker, CircuitBreakerConfig
|
|
89
|
+
|
|
90
|
+
# Default configuration
|
|
91
|
+
circuit_breaker = MCPCircuitBreaker()
|
|
92
|
+
|
|
93
|
+
# Custom configuration
|
|
94
|
+
config = CircuitBreakerConfig(max_failures=5, reset_time_seconds=120)
|
|
95
|
+
circuit_breaker = MCPCircuitBreaker(config)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
#### should_skip_server(server_name: str) -> bool
|
|
99
|
+
|
|
100
|
+
Checks if a server should be skipped due to circuit breaker state.
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
# Check if server should be bypassed
|
|
104
|
+
if circuit_breaker.should_skip_server("weather_server"):
|
|
105
|
+
print("Server is currently failing, skipping...")
|
|
106
|
+
else:
|
|
107
|
+
print("Server is healthy, proceeding with connection...")
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
**Returns**: `True` if server should be skipped, `False` if safe to use.
|
|
111
|
+
|
|
112
|
+
#### record_failure(server_name: str) -> None
|
|
113
|
+
|
|
114
|
+
Records a server failure and updates circuit breaker state.
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
try:
|
|
118
|
+
# Attempt server operation
|
|
119
|
+
await client.connect()
|
|
120
|
+
except MCPConnectionError:
|
|
121
|
+
# Record the failure
|
|
122
|
+
circuit_breaker.record_failure("weather_server")
|
|
123
|
+
print("Failure recorded for weather_server")
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
#### record_success(server_name: str) -> None
|
|
127
|
+
|
|
128
|
+
Records a successful operation and resets failure count.
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
try:
|
|
132
|
+
result = await client.call_tool("get_weather", {"city": "Tokyo"})
|
|
133
|
+
# Record success to reset failure count
|
|
134
|
+
circuit_breaker.record_success("weather_server")
|
|
135
|
+
print("Success recorded, server recovered")
|
|
136
|
+
except Exception as e:
|
|
137
|
+
circuit_breaker.record_failure("weather_server")
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
#### get_server_status(server_name: str) -> Tuple[int, float, bool]
|
|
141
|
+
|
|
142
|
+
Returns detailed status information for a specific server.
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
failure_count, last_failure_time, is_circuit_open = circuit_breaker.get_server_status("weather_server")
|
|
146
|
+
|
|
147
|
+
print(f"Failure count: {failure_count}")
|
|
148
|
+
print(f"Last failure: {last_failure_time}")
|
|
149
|
+
print(f"Circuit open: {is_circuit_open}")
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Returns**: Tuple of `(failure_count, last_failure_time, is_circuit_open)`
|
|
153
|
+
|
|
154
|
+
**Note**: This method may reset the server state if the backoff period has expired, as it internally calls `should_skip_server()` to determine circuit status. For read-only inspection without side effects, use `get_all_failing_servers()` instead.
|
|
155
|
+
|
|
156
|
+
#### get_all_failing_servers() -> Dict[str, Dict[str, Any]]
|
|
157
|
+
|
|
158
|
+
Returns status information for all servers with failures.
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
failing_servers = circuit_breaker.get_all_failing_servers()
|
|
162
|
+
|
|
163
|
+
for server_name, status in failing_servers.items():
|
|
164
|
+
print(f"Server: {server_name}")
|
|
165
|
+
print(f" Failures: {status['failure_count']}")
|
|
166
|
+
print(f" Backoff time: {status['backoff_time']:.1f}s")
|
|
167
|
+
print(f" Time remaining: {status['time_remaining']:.1f}s")
|
|
168
|
+
print(f" Circuit open: {status['is_circuit_open']}")
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### reset_all_servers() -> None
|
|
172
|
+
|
|
173
|
+
Manually resets circuit breaker state for all servers.
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
# Reset all servers (useful for maintenance or testing)
|
|
177
|
+
circuit_breaker.reset_all_servers()
|
|
178
|
+
print("All servers reset")
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Integration Examples
|
|
182
|
+
|
|
183
|
+
### MCPClient Integration
|
|
184
|
+
|
|
185
|
+
The circuit breaker is automatically integrated into `MCPClient` through the `MultiMCPClient`:
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
import asyncio
|
|
189
|
+
from massgen.mcp_tools.client import MCPClient
|
|
190
|
+
from massgen.mcp_tools.circuit_breaker import MCPCircuitBreaker, CircuitBreakerConfig
|
|
191
|
+
from massgen.mcp_tools.exceptions import MCPConnectionError
|
|
192
|
+
|
|
193
|
+
async def example_single_client():
|
|
194
|
+
# Circuit breaker is handled internally by MultiMCPClient
|
|
195
|
+
# but you can create your own for custom logic
|
|
196
|
+
|
|
197
|
+
config = CircuitBreakerConfig(max_failures=2, reset_time_seconds=60)
|
|
198
|
+
circuit_breaker = MCPCircuitBreaker(config)
|
|
199
|
+
|
|
200
|
+
server_config = {
|
|
201
|
+
"name": "weather_server",
|
|
202
|
+
"type": "stdio",
|
|
203
|
+
"command": "npx",
|
|
204
|
+
"args": ["-y", "@fak111/weather-mcp"]
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
# Check circuit breaker before attempting connection
|
|
208
|
+
if circuit_breaker.should_skip_server("weather_server"):
|
|
209
|
+
print("Server is failing, skipping connection attempt")
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
async with MCPClient(server_config) as client:
|
|
214
|
+
result = await client.call_tool("get_weather", {"city": "Tokyo"})
|
|
215
|
+
circuit_breaker.record_success("weather_server")
|
|
216
|
+
return result
|
|
217
|
+
except MCPConnectionError as e:
|
|
218
|
+
circuit_breaker.record_failure("weather_server")
|
|
219
|
+
print(f"Connection failed: {e}")
|
|
220
|
+
raise
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### MultiMCPClient Integration
|
|
224
|
+
|
|
225
|
+
The `MultiMCPClient` has built-in circuit breaker functionality:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
import asyncio
|
|
229
|
+
from massgen.mcp_tools.client import MultiMCPClient
|
|
230
|
+
from massgen.mcp_tools.exceptions import MCPConnectionError
|
|
231
|
+
|
|
232
|
+
async def example_multi_client():
|
|
233
|
+
server_configs = [
|
|
234
|
+
{
|
|
235
|
+
"name": "weather_server",
|
|
236
|
+
"type": "stdio",
|
|
237
|
+
"command": "npx",
|
|
238
|
+
"args": ["-y", "@fak111/weather-mcp"]
|
|
239
|
+
},
|
|
240
|
+
{
|
|
241
|
+
"name": "file_server",
|
|
242
|
+
"type": "stdio",
|
|
243
|
+
"command": "python",
|
|
244
|
+
"args": ["-m", "file_mcp_server"]
|
|
245
|
+
}
|
|
246
|
+
]
|
|
247
|
+
|
|
248
|
+
async with MultiMCPClient(server_configs) as multi_client:
|
|
249
|
+
# Circuit breaker automatically handles failing servers
|
|
250
|
+
try:
|
|
251
|
+
# This will skip servers that are currently failing
|
|
252
|
+
result = await multi_client.call_tool("mcp__weather_server__get_weather",
|
|
253
|
+
{"city": "Tokyo"})
|
|
254
|
+
print(f"Weather result: {result}")
|
|
255
|
+
except MCPConnectionError as e:
|
|
256
|
+
print(f"All weather servers are failing: {e}")
|
|
257
|
+
|
|
258
|
+
# Check circuit breaker status
|
|
259
|
+
health_status = await multi_client.health_check_all()
|
|
260
|
+
print(f"Server health: {health_status}")
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## Exponential Backoff Algorithm
|
|
264
|
+
|
|
265
|
+
The circuit breaker implements exponential backoff to gradually increase wait times for failing servers:
|
|
266
|
+
|
|
267
|
+
### Calculation Formula
|
|
268
|
+
|
|
269
|
+
```
|
|
270
|
+
backoff_time = reset_time_seconds * min(backoff_multiplier^(failures - max_failures), max_backoff_multiplier)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Example Calculations
|
|
274
|
+
|
|
275
|
+
With default configuration (`reset_time_seconds=300`, `backoff_multiplier=2`, `max_backoff_multiplier=8`):
|
|
276
|
+
|
|
277
|
+
```python
|
|
278
|
+
# Failure progression example
|
|
279
|
+
failures = [3, 4, 5, 6, 7, 8, 9, 10]
|
|
280
|
+
backoff_times = []
|
|
281
|
+
|
|
282
|
+
for failure_count in failures:
|
|
283
|
+
if failure_count >= 3: # max_failures
|
|
284
|
+
exponent = failure_count - 3
|
|
285
|
+
multiplier = min(2 ** exponent, 8) # Cap at max_backoff_multiplier
|
|
286
|
+
backoff_time = 300 * multiplier
|
|
287
|
+
backoff_times.append(backoff_time)
|
|
288
|
+
print(f"Failure {failure_count}: {backoff_time}s ({backoff_time/60:.1f} minutes)")
|
|
289
|
+
|
|
290
|
+
# Output:
|
|
291
|
+
# Failure 3: 300s (5.0 minutes)
|
|
292
|
+
# Failure 4: 600s (10.0 minutes)
|
|
293
|
+
# Failure 5: 1200s (20.0 minutes)
|
|
294
|
+
# Failure 6: 2400s (40.0 minutes)
|
|
295
|
+
# Failure 7: 2400s (40.0 minutes) - capped
|
|
296
|
+
# Failure 8: 2400s (40.0 minutes) - capped
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Custom Backoff Examples
|
|
300
|
+
|
|
301
|
+
```python
|
|
302
|
+
# Gentle backoff for development
|
|
303
|
+
dev_config = CircuitBreakerConfig(
|
|
304
|
+
max_failures=3,
|
|
305
|
+
reset_time_seconds=30, # 30 seconds base
|
|
306
|
+
backoff_multiplier=1.5, # Slower progression
|
|
307
|
+
max_backoff_multiplier=4 # Max 2 minutes
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Aggressive backoff for production
|
|
311
|
+
prod_config = CircuitBreakerConfig(
|
|
312
|
+
max_failures=2,
|
|
313
|
+
reset_time_seconds=600, # 10 minutes base
|
|
314
|
+
backoff_multiplier=3, # Faster progression
|
|
315
|
+
max_backoff_multiplier=16 # Max 2.67 hours
|
|
316
|
+
)
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
## Monitoring and Observability
|
|
320
|
+
|
|
321
|
+
### Health Monitoring
|
|
322
|
+
|
|
323
|
+
```python
|
|
324
|
+
import asyncio
|
|
325
|
+
from massgen.mcp_tools.client import MultiMCPClient
|
|
326
|
+
|
|
327
|
+
async def monitor_circuit_breaker():
|
|
328
|
+
async with MultiMCPClient(server_configs) as client:
|
|
329
|
+
# Access internal circuit breaker
|
|
330
|
+
circuit_breaker = client._circuit_breaker
|
|
331
|
+
|
|
332
|
+
while True:
|
|
333
|
+
# Get all failing servers
|
|
334
|
+
failing_servers = circuit_breaker.get_all_failing_servers()
|
|
335
|
+
|
|
336
|
+
if failing_servers:
|
|
337
|
+
print("=== Circuit Breaker Status ===")
|
|
338
|
+
for server_name, status in failing_servers.items():
|
|
339
|
+
print(f"Server: {server_name}")
|
|
340
|
+
print(f" Failures: {status['failure_count']}")
|
|
341
|
+
print(f" Time remaining: {status['time_remaining']:.1f}s")
|
|
342
|
+
print(f" Circuit open: {status['is_circuit_open']}")
|
|
343
|
+
print()
|
|
344
|
+
else:
|
|
345
|
+
print("All servers healthy")
|
|
346
|
+
|
|
347
|
+
await asyncio.sleep(30) # Check every 30 seconds
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Logging Integration
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
import logging
|
|
354
|
+
from massgen.mcp_tools.circuit_breaker import MCPCircuitBreaker
|
|
355
|
+
|
|
356
|
+
# Configure logging
|
|
357
|
+
logging.basicConfig(level=logging.INFO)
|
|
358
|
+
logger = logging.getLogger(__name__)
|
|
359
|
+
|
|
360
|
+
# Circuit breaker automatically logs important events:
|
|
361
|
+
# - Server failures
|
|
362
|
+
# - Circuit opening/closing
|
|
363
|
+
# - Recovery events
|
|
364
|
+
|
|
365
|
+
circuit_breaker = MCPCircuitBreaker()
|
|
366
|
+
|
|
367
|
+
# Example log output:
|
|
368
|
+
# INFO:massgen.mcp_tools.circuit_breaker:Server weather_server failure recorded (1/3)
|
|
369
|
+
# WARNING:massgen.mcp_tools.circuit_breaker:Server weather_server has failed 3 times, will be skipped for 300.0 seconds
|
|
370
|
+
# INFO:massgen.mcp_tools.circuit_breaker:Circuit breaker reset for server weather_server after 300.0s
|
|
371
|
+
# INFO:massgen.mcp_tools.circuit_breaker:Server weather_server recovered after 3 failures
|
|
372
|
+
```
|
|
373
|
+
|
|
374
|
+
## Configuration Examples
|
|
375
|
+
|
|
376
|
+
### Development Environment
|
|
377
|
+
|
|
378
|
+
```python
|
|
379
|
+
# Lenient configuration for development
|
|
380
|
+
dev_config = CircuitBreakerConfig(
|
|
381
|
+
max_failures=5, # Allow more failures
|
|
382
|
+
reset_time_seconds=30, # Quick recovery
|
|
383
|
+
backoff_multiplier=1.2, # Gentle backoff
|
|
384
|
+
max_backoff_multiplier=2 # Short maximum wait
|
|
385
|
+
)
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
### Production Environment
|
|
389
|
+
|
|
390
|
+
```python
|
|
391
|
+
# Strict configuration for production
|
|
392
|
+
prod_config = CircuitBreakerConfig(
|
|
393
|
+
max_failures=2, # Fail fast
|
|
394
|
+
reset_time_seconds=300, # Standard recovery time
|
|
395
|
+
backoff_multiplier=2, # Standard backoff
|
|
396
|
+
max_backoff_multiplier=8 # Reasonable maximum wait
|
|
397
|
+
)
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
### High-Availability Setup
|
|
401
|
+
|
|
402
|
+
```python
|
|
403
|
+
# Configuration for critical systems
|
|
404
|
+
ha_config = CircuitBreakerConfig(
|
|
405
|
+
max_failures=1, # Single failure triggers circuit
|
|
406
|
+
reset_time_seconds=600, # Longer recovery time
|
|
407
|
+
backoff_multiplier=3, # Aggressive backoff
|
|
408
|
+
max_backoff_multiplier=16 # Extended maximum wait
|
|
409
|
+
)
|
|
410
|
+
```
|
|
411
|
+
|
|
412
|
+
## Troubleshooting
|
|
413
|
+
|
|
414
|
+
### Common Issues
|
|
415
|
+
|
|
416
|
+
#### Servers Stuck in Failing State
|
|
417
|
+
|
|
418
|
+
**Symptoms**: Servers remain in failing state even after underlying issues are resolved.
|
|
419
|
+
|
|
420
|
+
**Causes**:
|
|
421
|
+
- Network issues resolved but circuit breaker hasn't reset
|
|
422
|
+
- Clock skew affecting time calculations
|
|
423
|
+
- Configuration with very long backoff times
|
|
424
|
+
|
|
425
|
+
**Solutions**:
|
|
426
|
+
```python
|
|
427
|
+
# Manual reset
|
|
428
|
+
circuit_breaker.reset_all_servers()
|
|
429
|
+
|
|
430
|
+
# Check server status
|
|
431
|
+
failure_count, last_failure, is_open = circuit_breaker.get_server_status("server_name")
|
|
432
|
+
print(f"Circuit open: {is_open}, failures: {failure_count}")
|
|
433
|
+
|
|
434
|
+
# Verify time calculations
|
|
435
|
+
import time
|
|
436
|
+
current_time = time.monotonic()
|
|
437
|
+
time_since_failure = current_time - last_failure
|
|
438
|
+
print(f"Time since last failure: {time_since_failure:.1f}s")
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
#### Premature Circuit Opening
|
|
442
|
+
|
|
443
|
+
**Symptoms**: Circuit opens too quickly for transient failures.
|
|
444
|
+
|
|
445
|
+
**Causes**:
|
|
446
|
+
- `max_failures` set too low
|
|
447
|
+
- Transient network issues counted as failures
|
|
448
|
+
- Health check too sensitive
|
|
449
|
+
|
|
450
|
+
**Solutions**:
|
|
451
|
+
```python
|
|
452
|
+
# Increase failure threshold
|
|
453
|
+
config = CircuitBreakerConfig(
|
|
454
|
+
max_failures=5, # Increased from default 3
|
|
455
|
+
reset_time_seconds=60 # Shorter recovery time
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
# Implement retry logic before recording failure
|
|
459
|
+
async def robust_operation(client, circuit_breaker, server_name):
|
|
460
|
+
max_retries = 2
|
|
461
|
+
for attempt in range(max_retries + 1):
|
|
462
|
+
try:
|
|
463
|
+
result = await client.call_tool("tool_name", {})
|
|
464
|
+
circuit_breaker.record_success(server_name)
|
|
465
|
+
return result
|
|
466
|
+
except Exception as e:
|
|
467
|
+
if attempt == max_retries:
|
|
468
|
+
circuit_breaker.record_failure(server_name)
|
|
469
|
+
raise
|
|
470
|
+
await asyncio.sleep(1) # Brief retry delay
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
#### Recovery Monitoring Issues
|
|
474
|
+
|
|
475
|
+
**Symptoms**: Difficulty tracking when servers recover.
|
|
476
|
+
|
|
477
|
+
**Solutions**:
|
|
478
|
+
```python
|
|
479
|
+
# Implement recovery monitoring
|
|
480
|
+
async def monitor_recovery():
|
|
481
|
+
while True:
|
|
482
|
+
failing_servers = circuit_breaker.get_all_failing_servers()
|
|
483
|
+
|
|
484
|
+
for server_name, status in failing_servers.items():
|
|
485
|
+
if status['time_remaining'] <= 0:
|
|
486
|
+
print(f"Server {server_name} ready for retry")
|
|
487
|
+
|
|
488
|
+
# Optionally trigger health check
|
|
489
|
+
try:
|
|
490
|
+
# Attempt connection
|
|
491
|
+
success = await test_server_connection(server_name)
|
|
492
|
+
if success:
|
|
493
|
+
circuit_breaker.record_success(server_name)
|
|
494
|
+
print(f"Server {server_name} recovered!")
|
|
495
|
+
except Exception:
|
|
496
|
+
circuit_breaker.record_failure(server_name)
|
|
497
|
+
|
|
498
|
+
await asyncio.sleep(10)
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
### Debugging Tips
|
|
502
|
+
|
|
503
|
+
#### Enable Debug Logging
|
|
504
|
+
|
|
505
|
+
```python
|
|
506
|
+
import logging
|
|
507
|
+
|
|
508
|
+
# Enable debug logging for circuit breaker
|
|
509
|
+
logging.getLogger('massgen.mcp_tools.circuit_breaker').setLevel(logging.DEBUG)
|
|
510
|
+
|
|
511
|
+
# This will show detailed circuit breaker operations:
|
|
512
|
+
# DEBUG:massgen.mcp_tools.circuit_breaker:Server weather_server failure recorded (2/3)
|
|
513
|
+
# DEBUG:massgen.mcp_tools.circuit_breaker:Checking circuit breaker for weather_server: 2 failures, 120.5s since last failure
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
#### Status Inspection
|
|
517
|
+
|
|
518
|
+
```python
|
|
519
|
+
# Get detailed circuit breaker state
|
|
520
|
+
def inspect_circuit_breaker(circuit_breaker):
|
|
521
|
+
print(f"Circuit breaker: {circuit_breaker}")
|
|
522
|
+
|
|
523
|
+
failing_servers = circuit_breaker.get_all_failing_servers()
|
|
524
|
+
if not failing_servers:
|
|
525
|
+
print("No failing servers")
|
|
526
|
+
return
|
|
527
|
+
|
|
528
|
+
for server_name, status in failing_servers.items():
|
|
529
|
+
print(f"\nServer: {server_name}")
|
|
530
|
+
print(f" Failure count: {status['failure_count']}")
|
|
531
|
+
print(f" Last failure: {status['last_failure_time']}")
|
|
532
|
+
print(f" Backoff time: {status['backoff_time']:.1f}s")
|
|
533
|
+
print(f" Time remaining: {status['time_remaining']:.1f}s")
|
|
534
|
+
print(f" Circuit open: {status['is_circuit_open']}")
|
|
535
|
+
```
|
|
536
|
+
|
|
537
|
+
## Best Practices
|
|
538
|
+
|
|
539
|
+
### Configuration Guidelines
|
|
540
|
+
|
|
541
|
+
1. **Development**: Use lenient settings to avoid interrupting development workflow
|
|
542
|
+
2. **Testing**: Use moderate settings that allow testing failure scenarios
|
|
543
|
+
3. **Production**: Use conservative settings that prioritize system stability
|
|
544
|
+
4. **Critical Systems**: Use strict settings with longer backoff times
|
|
545
|
+
|
|
546
|
+
### Integration Patterns
|
|
547
|
+
|
|
548
|
+
1. **Always check circuit breaker** before attempting operations on known-failing servers
|
|
549
|
+
2. **Record successes** to allow natural recovery
|
|
550
|
+
3. **Implement health checks** to proactively detect server recovery
|
|
551
|
+
4. **Monitor circuit breaker state** in production environments
|
|
552
|
+
5. **Use appropriate logging levels** for different environments
|
|
553
|
+
|
|
554
|
+
### Error Handling
|
|
555
|
+
|
|
556
|
+
```python
|
|
557
|
+
from massgen.mcp_tools.exceptions import MCPConnectionError, MCPTimeoutError
|
|
558
|
+
|
|
559
|
+
async def robust_mcp_operation(client, circuit_breaker, server_name):
|
|
560
|
+
# Check circuit breaker first
|
|
561
|
+
if circuit_breaker.should_skip_server(server_name):
|
|
562
|
+
raise MCPConnectionError(
|
|
563
|
+
f"Server {server_name} is currently failing",
|
|
564
|
+
server_name=server_name
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
try:
|
|
568
|
+
result = await client.call_tool("tool_name", {})
|
|
569
|
+
circuit_breaker.record_success(server_name)
|
|
570
|
+
return result
|
|
571
|
+
except (MCPConnectionError, MCPTimeoutError) as e:
|
|
572
|
+
# Record failure for connection/timeout errors
|
|
573
|
+
circuit_breaker.record_failure(server_name)
|
|
574
|
+
raise
|
|
575
|
+
except Exception as e:
|
|
576
|
+
# Don't record failure for application-level errors
|
|
577
|
+
logger.warning(f"Application error (not recording failure): {e}")
|
|
578
|
+
raise
|
|
579
|
+
```
|
|
580
|
+
|
|
581
|
+
### Monitoring Integration
|
|
582
|
+
|
|
583
|
+
```python
|
|
584
|
+
# Example Prometheus metrics integration
|
|
585
|
+
from prometheus_client import Counter, Gauge, Histogram
|
|
586
|
+
|
|
587
|
+
# Metrics
|
|
588
|
+
circuit_breaker_failures = Counter('mcp_circuit_breaker_failures_total',
|
|
589
|
+
'Total circuit breaker failures', ['server'])
|
|
590
|
+
circuit_breaker_state = Gauge('mcp_circuit_breaker_open',
|
|
591
|
+
'Circuit breaker open state', ['server'])
|
|
592
|
+
circuit_breaker_backoff = Histogram('mcp_circuit_breaker_backoff_seconds',
|
|
593
|
+
'Circuit breaker backoff times', ['server'])
|
|
594
|
+
|
|
595
|
+
def update_metrics(circuit_breaker):
|
|
596
|
+
failing_servers = circuit_breaker.get_all_failing_servers()
|
|
597
|
+
|
|
598
|
+
for server_name, status in failing_servers.items():
|
|
599
|
+
circuit_breaker_failures.labels(server=server_name).inc()
|
|
600
|
+
circuit_breaker_state.labels(server=server_name).set(
|
|
601
|
+
1 if status['is_circuit_open'] else 0
|
|
602
|
+
)
|
|
603
|
+
circuit_breaker_backoff.labels(server=server_name).observe(
|
|
604
|
+
status['backoff_time']
|
|
605
|
+
)
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
## Exception Integration
|
|
609
|
+
|
|
610
|
+
The circuit breaker works seamlessly with the MCP exception system:
|
|
611
|
+
|
|
612
|
+
```python
|
|
613
|
+
from massgen.mcp_tools.exceptions import (
|
|
614
|
+
MCPConnectionError, MCPTimeoutError, MCPServerError
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
# Circuit breaker automatically handles these exception types:
|
|
618
|
+
# - MCPConnectionError: Records failure, triggers circuit breaker
|
|
619
|
+
# - MCPTimeoutError: Records failure, triggers circuit breaker
|
|
620
|
+
# - MCPServerError: May record failure depending on error type
|
|
621
|
+
|
|
622
|
+
async def handle_mcp_errors(client, circuit_breaker, server_name):
|
|
623
|
+
try:
|
|
624
|
+
result = await client.call_tool("tool_name", {})
|
|
625
|
+
circuit_breaker.record_success(server_name)
|
|
626
|
+
return result
|
|
627
|
+
except MCPConnectionError as e:
|
|
628
|
+
# Connection failures always trigger circuit breaker
|
|
629
|
+
circuit_breaker.record_failure(server_name)
|
|
630
|
+
e.log_error() # Use exception's built-in logging
|
|
631
|
+
raise
|
|
632
|
+
except MCPTimeoutError as e:
|
|
633
|
+
# Timeout failures trigger circuit breaker
|
|
634
|
+
circuit_breaker.record_failure(server_name)
|
|
635
|
+
e.log_error()
|
|
636
|
+
raise
|
|
637
|
+
except MCPServerError as e:
|
|
638
|
+
# Server errors may or may not trigger circuit breaker
|
|
639
|
+
if e.http_status and e.http_status >= 500:
|
|
640
|
+
# 5xx errors indicate server problems
|
|
641
|
+
circuit_breaker.record_failure(server_name)
|
|
642
|
+
e.log_error()
|
|
643
|
+
raise
|
|
644
|
+
```
|
|
645
|
+
|
|
646
|
+
This integration ensures that the circuit breaker responds appropriately to different types of failures while maintaining detailed error context through the exception system.
|