mcp-hangar 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. mcp_hangar/__init__.py +139 -0
  2. mcp_hangar/application/__init__.py +1 -0
  3. mcp_hangar/application/commands/__init__.py +67 -0
  4. mcp_hangar/application/commands/auth_commands.py +118 -0
  5. mcp_hangar/application/commands/auth_handlers.py +296 -0
  6. mcp_hangar/application/commands/commands.py +59 -0
  7. mcp_hangar/application/commands/handlers.py +189 -0
  8. mcp_hangar/application/discovery/__init__.py +21 -0
  9. mcp_hangar/application/discovery/discovery_metrics.py +283 -0
  10. mcp_hangar/application/discovery/discovery_orchestrator.py +497 -0
  11. mcp_hangar/application/discovery/lifecycle_manager.py +315 -0
  12. mcp_hangar/application/discovery/security_validator.py +414 -0
  13. mcp_hangar/application/event_handlers/__init__.py +50 -0
  14. mcp_hangar/application/event_handlers/alert_handler.py +191 -0
  15. mcp_hangar/application/event_handlers/audit_handler.py +203 -0
  16. mcp_hangar/application/event_handlers/knowledge_base_handler.py +120 -0
  17. mcp_hangar/application/event_handlers/logging_handler.py +69 -0
  18. mcp_hangar/application/event_handlers/metrics_handler.py +152 -0
  19. mcp_hangar/application/event_handlers/persistent_audit_store.py +217 -0
  20. mcp_hangar/application/event_handlers/security_handler.py +604 -0
  21. mcp_hangar/application/mcp/tooling.py +158 -0
  22. mcp_hangar/application/ports/__init__.py +9 -0
  23. mcp_hangar/application/ports/observability.py +237 -0
  24. mcp_hangar/application/queries/__init__.py +52 -0
  25. mcp_hangar/application/queries/auth_handlers.py +237 -0
  26. mcp_hangar/application/queries/auth_queries.py +118 -0
  27. mcp_hangar/application/queries/handlers.py +227 -0
  28. mcp_hangar/application/read_models/__init__.py +11 -0
  29. mcp_hangar/application/read_models/provider_views.py +139 -0
  30. mcp_hangar/application/sagas/__init__.py +11 -0
  31. mcp_hangar/application/sagas/group_rebalance_saga.py +137 -0
  32. mcp_hangar/application/sagas/provider_failover_saga.py +266 -0
  33. mcp_hangar/application/sagas/provider_recovery_saga.py +172 -0
  34. mcp_hangar/application/services/__init__.py +9 -0
  35. mcp_hangar/application/services/provider_service.py +208 -0
  36. mcp_hangar/application/services/traced_provider_service.py +211 -0
  37. mcp_hangar/bootstrap/runtime.py +328 -0
  38. mcp_hangar/context.py +178 -0
  39. mcp_hangar/domain/__init__.py +117 -0
  40. mcp_hangar/domain/contracts/__init__.py +57 -0
  41. mcp_hangar/domain/contracts/authentication.py +225 -0
  42. mcp_hangar/domain/contracts/authorization.py +229 -0
  43. mcp_hangar/domain/contracts/event_store.py +178 -0
  44. mcp_hangar/domain/contracts/metrics_publisher.py +59 -0
  45. mcp_hangar/domain/contracts/persistence.py +383 -0
  46. mcp_hangar/domain/contracts/provider_runtime.py +146 -0
  47. mcp_hangar/domain/discovery/__init__.py +20 -0
  48. mcp_hangar/domain/discovery/conflict_resolver.py +267 -0
  49. mcp_hangar/domain/discovery/discovered_provider.py +185 -0
  50. mcp_hangar/domain/discovery/discovery_service.py +412 -0
  51. mcp_hangar/domain/discovery/discovery_source.py +192 -0
  52. mcp_hangar/domain/events.py +433 -0
  53. mcp_hangar/domain/exceptions.py +525 -0
  54. mcp_hangar/domain/model/__init__.py +70 -0
  55. mcp_hangar/domain/model/aggregate.py +58 -0
  56. mcp_hangar/domain/model/circuit_breaker.py +152 -0
  57. mcp_hangar/domain/model/event_sourced_api_key.py +413 -0
  58. mcp_hangar/domain/model/event_sourced_provider.py +423 -0
  59. mcp_hangar/domain/model/event_sourced_role_assignment.py +268 -0
  60. mcp_hangar/domain/model/health_tracker.py +183 -0
  61. mcp_hangar/domain/model/load_balancer.py +185 -0
  62. mcp_hangar/domain/model/provider.py +810 -0
  63. mcp_hangar/domain/model/provider_group.py +656 -0
  64. mcp_hangar/domain/model/tool_catalog.py +105 -0
  65. mcp_hangar/domain/policies/__init__.py +19 -0
  66. mcp_hangar/domain/policies/provider_health.py +187 -0
  67. mcp_hangar/domain/repository.py +249 -0
  68. mcp_hangar/domain/security/__init__.py +85 -0
  69. mcp_hangar/domain/security/input_validator.py +710 -0
  70. mcp_hangar/domain/security/rate_limiter.py +387 -0
  71. mcp_hangar/domain/security/roles.py +237 -0
  72. mcp_hangar/domain/security/sanitizer.py +387 -0
  73. mcp_hangar/domain/security/secrets.py +501 -0
  74. mcp_hangar/domain/services/__init__.py +20 -0
  75. mcp_hangar/domain/services/audit_service.py +376 -0
  76. mcp_hangar/domain/services/image_builder.py +328 -0
  77. mcp_hangar/domain/services/provider_launcher.py +1046 -0
  78. mcp_hangar/domain/value_objects.py +1138 -0
  79. mcp_hangar/errors.py +818 -0
  80. mcp_hangar/fastmcp_server.py +1105 -0
  81. mcp_hangar/gc.py +134 -0
  82. mcp_hangar/infrastructure/__init__.py +79 -0
  83. mcp_hangar/infrastructure/async_executor.py +133 -0
  84. mcp_hangar/infrastructure/auth/__init__.py +37 -0
  85. mcp_hangar/infrastructure/auth/api_key_authenticator.py +388 -0
  86. mcp_hangar/infrastructure/auth/event_sourced_store.py +567 -0
  87. mcp_hangar/infrastructure/auth/jwt_authenticator.py +360 -0
  88. mcp_hangar/infrastructure/auth/middleware.py +340 -0
  89. mcp_hangar/infrastructure/auth/opa_authorizer.py +243 -0
  90. mcp_hangar/infrastructure/auth/postgres_store.py +659 -0
  91. mcp_hangar/infrastructure/auth/projections.py +366 -0
  92. mcp_hangar/infrastructure/auth/rate_limiter.py +311 -0
  93. mcp_hangar/infrastructure/auth/rbac_authorizer.py +323 -0
  94. mcp_hangar/infrastructure/auth/sqlite_store.py +624 -0
  95. mcp_hangar/infrastructure/command_bus.py +112 -0
  96. mcp_hangar/infrastructure/discovery/__init__.py +110 -0
  97. mcp_hangar/infrastructure/discovery/docker_source.py +289 -0
  98. mcp_hangar/infrastructure/discovery/entrypoint_source.py +249 -0
  99. mcp_hangar/infrastructure/discovery/filesystem_source.py +383 -0
  100. mcp_hangar/infrastructure/discovery/kubernetes_source.py +247 -0
  101. mcp_hangar/infrastructure/event_bus.py +260 -0
  102. mcp_hangar/infrastructure/event_sourced_repository.py +443 -0
  103. mcp_hangar/infrastructure/event_store.py +396 -0
  104. mcp_hangar/infrastructure/knowledge_base/__init__.py +259 -0
  105. mcp_hangar/infrastructure/knowledge_base/contracts.py +202 -0
  106. mcp_hangar/infrastructure/knowledge_base/memory.py +177 -0
  107. mcp_hangar/infrastructure/knowledge_base/postgres.py +545 -0
  108. mcp_hangar/infrastructure/knowledge_base/sqlite.py +513 -0
  109. mcp_hangar/infrastructure/metrics_publisher.py +36 -0
  110. mcp_hangar/infrastructure/observability/__init__.py +10 -0
  111. mcp_hangar/infrastructure/observability/langfuse_adapter.py +534 -0
  112. mcp_hangar/infrastructure/persistence/__init__.py +33 -0
  113. mcp_hangar/infrastructure/persistence/audit_repository.py +371 -0
  114. mcp_hangar/infrastructure/persistence/config_repository.py +398 -0
  115. mcp_hangar/infrastructure/persistence/database.py +333 -0
  116. mcp_hangar/infrastructure/persistence/database_common.py +330 -0
  117. mcp_hangar/infrastructure/persistence/event_serializer.py +280 -0
  118. mcp_hangar/infrastructure/persistence/event_upcaster.py +166 -0
  119. mcp_hangar/infrastructure/persistence/in_memory_event_store.py +150 -0
  120. mcp_hangar/infrastructure/persistence/recovery_service.py +312 -0
  121. mcp_hangar/infrastructure/persistence/sqlite_event_store.py +386 -0
  122. mcp_hangar/infrastructure/persistence/unit_of_work.py +409 -0
  123. mcp_hangar/infrastructure/persistence/upcasters/README.md +13 -0
  124. mcp_hangar/infrastructure/persistence/upcasters/__init__.py +7 -0
  125. mcp_hangar/infrastructure/query_bus.py +153 -0
  126. mcp_hangar/infrastructure/saga_manager.py +401 -0
  127. mcp_hangar/logging_config.py +209 -0
  128. mcp_hangar/metrics.py +1007 -0
  129. mcp_hangar/models.py +31 -0
  130. mcp_hangar/observability/__init__.py +54 -0
  131. mcp_hangar/observability/health.py +487 -0
  132. mcp_hangar/observability/metrics.py +319 -0
  133. mcp_hangar/observability/tracing.py +433 -0
  134. mcp_hangar/progress.py +542 -0
  135. mcp_hangar/retry.py +613 -0
  136. mcp_hangar/server/__init__.py +120 -0
  137. mcp_hangar/server/__main__.py +6 -0
  138. mcp_hangar/server/auth_bootstrap.py +340 -0
  139. mcp_hangar/server/auth_cli.py +335 -0
  140. mcp_hangar/server/auth_config.py +305 -0
  141. mcp_hangar/server/bootstrap.py +735 -0
  142. mcp_hangar/server/cli.py +161 -0
  143. mcp_hangar/server/config.py +224 -0
  144. mcp_hangar/server/context.py +215 -0
  145. mcp_hangar/server/http_auth_middleware.py +165 -0
  146. mcp_hangar/server/lifecycle.py +467 -0
  147. mcp_hangar/server/state.py +117 -0
  148. mcp_hangar/server/tools/__init__.py +16 -0
  149. mcp_hangar/server/tools/discovery.py +186 -0
  150. mcp_hangar/server/tools/groups.py +75 -0
  151. mcp_hangar/server/tools/health.py +301 -0
  152. mcp_hangar/server/tools/provider.py +939 -0
  153. mcp_hangar/server/tools/registry.py +320 -0
  154. mcp_hangar/server/validation.py +113 -0
  155. mcp_hangar/stdio_client.py +229 -0
  156. mcp_hangar-0.2.0.dist-info/METADATA +347 -0
  157. mcp_hangar-0.2.0.dist-info/RECORD +160 -0
  158. mcp_hangar-0.2.0.dist-info/WHEEL +4 -0
  159. mcp_hangar-0.2.0.dist-info/entry_points.txt +2 -0
  160. mcp_hangar-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,118 @@
1
+ """Authentication and Authorization queries.
2
+
3
+ Queries represent read operations in CQRS pattern.
4
+ They do not modify state, only retrieve data.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+
9
+
10
+ @dataclass(frozen=True)
11
+ class Query:
12
+ """Base class for all queries."""
13
+
14
+ pass
15
+
16
+
17
+ # =============================================================================
18
+ # API Key Queries
19
+ # =============================================================================
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class GetApiKeysByPrincipalQuery(Query):
24
+ """Query to get all API keys for a principal.
25
+
26
+ Attributes:
27
+ principal_id: Principal whose keys to retrieve.
28
+ include_revoked: Whether to include revoked keys.
29
+ """
30
+
31
+ principal_id: str
32
+ include_revoked: bool = True
33
+
34
+
35
+ @dataclass(frozen=True)
36
+ class GetApiKeyCountQuery(Query):
37
+ """Query to get count of active API keys for a principal.
38
+
39
+ Attributes:
40
+ principal_id: Principal whose keys to count.
41
+ """
42
+
43
+ principal_id: str
44
+
45
+
46
+ # =============================================================================
47
+ # Role Queries
48
+ # =============================================================================
49
+
50
+
51
+ @dataclass(frozen=True)
52
+ class GetRolesForPrincipalQuery(Query):
53
+ """Query to get all roles assigned to a principal.
54
+
55
+ Attributes:
56
+ principal_id: Principal whose roles to retrieve.
57
+ scope: Optional scope filter (use "*" for all scopes).
58
+ """
59
+
60
+ principal_id: str
61
+ scope: str = "*"
62
+
63
+
64
+ @dataclass(frozen=True)
65
+ class GetRoleQuery(Query):
66
+ """Query to get a specific role by name.
67
+
68
+ Attributes:
69
+ role_name: Name of the role to retrieve.
70
+ """
71
+
72
+ role_name: str
73
+
74
+
75
+ @dataclass(frozen=True)
76
+ class ListBuiltinRolesQuery(Query):
77
+ """Query to list all built-in roles."""
78
+
79
+ pass
80
+
81
+
82
+ @dataclass(frozen=True)
83
+ class CheckPermissionQuery(Query):
84
+ """Query to check if a principal has a specific permission.
85
+
86
+ Attributes:
87
+ principal_id: Principal to check.
88
+ action: Action being requested.
89
+ resource_type: Type of resource.
90
+ resource_id: Specific resource ID.
91
+ """
92
+
93
+ principal_id: str
94
+ action: str
95
+ resource_type: str
96
+ resource_id: str = "*"
97
+
98
+
99
+ # =============================================================================
100
+ # Audit Queries
101
+ # =============================================================================
102
+
103
+
104
+ @dataclass(frozen=True)
105
+ class GetAuthAuditLogQuery(Query):
106
+ """Query to get authentication audit log entries.
107
+
108
+ Attributes:
109
+ principal_id: Optional filter by principal.
110
+ event_type: Optional filter by event type.
111
+ limit: Maximum number of entries.
112
+ since_timestamp: Optional filter for entries after this time.
113
+ """
114
+
115
+ principal_id: str | None = None
116
+ event_type: str | None = None
117
+ limit: int = 100
118
+ since_timestamp: float | None = None
@@ -0,0 +1,227 @@
1
+ """Query handlers implementation."""
2
+
3
+ import time
4
+ from typing import Dict, List
5
+
6
+ from ...domain.exceptions import ProviderNotFoundError
7
+ from ...domain.policies.provider_health import to_health_status_string
8
+ from ...domain.repository import IProviderRepository
9
+ from ...infrastructure.query_bus import (
10
+ GetProviderHealthQuery,
11
+ GetProviderQuery,
12
+ GetProviderToolsQuery,
13
+ GetSystemMetricsQuery,
14
+ ListProvidersQuery,
15
+ QueryBus,
16
+ QueryHandler,
17
+ )
18
+ from ...logging_config import get_logger
19
+ from ..read_models import HealthInfo, ProviderDetails, ProviderSummary, SystemMetrics, ToolInfo
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class BaseQueryHandler(QueryHandler):
25
+ """Base class for query handlers."""
26
+
27
+ def __init__(self, repository: IProviderRepository):
28
+ self._repository = repository
29
+
30
+ def _get_provider(self, provider_id: str):
31
+ """Get provider or raise ProviderNotFoundError."""
32
+ provider = self._repository.get(provider_id)
33
+ if provider is None:
34
+ raise ProviderNotFoundError(provider_id)
35
+ return provider
36
+
37
+ def _get_health_status(self, provider) -> str:
38
+ """Determine health status string.
39
+
40
+ Delegates classification to a domain policy to keep CQRS query layer free
41
+ from business interpretation logic.
42
+ """
43
+ return to_health_status_string(
44
+ state=provider.state,
45
+ consecutive_failures=provider.health.consecutive_failures,
46
+ )
47
+
48
+ def _build_health_info(self, provider) -> HealthInfo:
49
+ """Build HealthInfo from provider."""
50
+ health = provider.health
51
+ now = time.time()
52
+
53
+ last_success_ago = None
54
+ if health.last_success_at:
55
+ last_success_ago = now - health.last_success_at
56
+
57
+ last_failure_ago = None
58
+ if health.last_failure_at:
59
+ last_failure_ago = now - health.last_failure_at
60
+
61
+ return HealthInfo(
62
+ consecutive_failures=health.consecutive_failures,
63
+ total_invocations=health.total_invocations,
64
+ total_failures=health.total_failures,
65
+ success_rate=health.success_rate,
66
+ can_retry=health.can_retry(),
67
+ last_success_ago=last_success_ago,
68
+ last_failure_ago=last_failure_ago,
69
+ )
70
+
71
+ def _build_tool_info(self, tool) -> ToolInfo:
72
+ """Build ToolInfo from tool schema."""
73
+ return ToolInfo(
74
+ name=tool.name,
75
+ description=tool.description,
76
+ input_schema=tool.input_schema,
77
+ output_schema=tool.output_schema,
78
+ )
79
+
80
+
81
+ class ListProvidersHandler(BaseQueryHandler):
82
+ """Handler for ListProvidersQuery."""
83
+
84
+ def handle(self, query: ListProvidersQuery) -> List[ProviderSummary]:
85
+ """
86
+ List all providers with optional state filtering.
87
+
88
+ Returns:
89
+ List of ProviderSummary
90
+ """
91
+ result = []
92
+ for provider_id, provider in self._repository.get_all().items():
93
+ state = provider.state.value
94
+
95
+ # Apply filter if specified
96
+ if query.state_filter and state != query.state_filter:
97
+ continue
98
+
99
+ summary = ProviderSummary(
100
+ provider_id=provider_id,
101
+ state=state,
102
+ mode=provider.mode.value,
103
+ is_alive=provider.is_alive,
104
+ tools_count=provider.tools.count(),
105
+ health_status=self._get_health_status(provider),
106
+ description=provider.description,
107
+ tools_predefined=provider.tools_predefined,
108
+ )
109
+ result.append(summary)
110
+
111
+ return result
112
+
113
+
114
+ class GetProviderHandler(BaseQueryHandler):
115
+ """Handler for GetProviderQuery."""
116
+
117
+ def handle(self, query: GetProviderQuery) -> ProviderDetails:
118
+ """
119
+ Get detailed information about a provider.
120
+
121
+ Returns:
122
+ ProviderDetails
123
+ """
124
+ provider = self._get_provider(query.provider_id)
125
+
126
+ tools = [self._build_tool_info(t) for t in provider.tools]
127
+ health = self._build_health_info(provider)
128
+
129
+ return ProviderDetails(
130
+ provider_id=query.provider_id,
131
+ state=provider.state.value,
132
+ mode=provider.mode.value,
133
+ is_alive=provider.is_alive,
134
+ tools=tools,
135
+ health=health,
136
+ idle_time=provider.idle_time,
137
+ meta=provider.meta,
138
+ )
139
+
140
+
141
+ class GetProviderToolsHandler(BaseQueryHandler):
142
+ """Handler for GetProviderToolsQuery."""
143
+
144
+ def handle(self, query: GetProviderToolsQuery) -> List[ToolInfo]:
145
+ """
146
+ Get tools for a specific provider.
147
+
148
+ Returns:
149
+ List of ToolInfo
150
+ """
151
+ provider = self._get_provider(query.provider_id)
152
+ return [self._build_tool_info(t) for t in provider.tools]
153
+
154
+
155
+ class GetProviderHealthHandler(BaseQueryHandler):
156
+ """Handler for GetProviderHealthQuery."""
157
+
158
+ def handle(self, query: GetProviderHealthQuery) -> HealthInfo:
159
+ """
160
+ Get health information for a provider.
161
+
162
+ Returns:
163
+ HealthInfo
164
+ """
165
+ provider = self._get_provider(query.provider_id)
166
+ return self._build_health_info(provider)
167
+
168
+
169
+ class GetSystemMetricsHandler(BaseQueryHandler):
170
+ """Handler for GetSystemMetricsQuery."""
171
+
172
+ def handle(self, query: GetSystemMetricsQuery) -> SystemMetrics:
173
+ """
174
+ Get system-wide metrics.
175
+
176
+ Returns:
177
+ SystemMetrics
178
+ """
179
+ providers = self._repository.get_all()
180
+
181
+ total_providers = len(providers)
182
+ providers_by_state: Dict[str, int] = {}
183
+ total_tools = 0
184
+ total_invocations = 0
185
+ total_failures = 0
186
+
187
+ for provider in providers.values():
188
+ # Count by state
189
+ state = provider.state.value
190
+ providers_by_state[state] = providers_by_state.get(state, 0) + 1
191
+
192
+ # Sum metrics
193
+ total_tools += provider.tools.count()
194
+ total_invocations += provider.health.total_invocations
195
+ total_failures += provider.health.total_failures
196
+
197
+ # Calculate overall success rate
198
+ if total_invocations > 0:
199
+ overall_success_rate = (total_invocations - total_failures) / total_invocations
200
+ else:
201
+ overall_success_rate = 1.0
202
+
203
+ return SystemMetrics(
204
+ total_providers=total_providers,
205
+ providers_by_state=providers_by_state,
206
+ total_tools=total_tools,
207
+ total_invocations=total_invocations,
208
+ total_failures=total_failures,
209
+ overall_success_rate=overall_success_rate,
210
+ )
211
+
212
+
213
+ def register_all_handlers(query_bus: QueryBus, repository: IProviderRepository) -> None:
214
+ """
215
+ Register all query handlers with the query bus.
216
+
217
+ Args:
218
+ query_bus: The query bus to register handlers with
219
+ repository: Provider repository
220
+ """
221
+ query_bus.register(ListProvidersQuery, ListProvidersHandler(repository))
222
+ query_bus.register(GetProviderQuery, GetProviderHandler(repository))
223
+ query_bus.register(GetProviderToolsQuery, GetProviderToolsHandler(repository))
224
+ query_bus.register(GetProviderHealthQuery, GetProviderHealthHandler(repository))
225
+ query_bus.register(GetSystemMetricsQuery, GetSystemMetricsHandler(repository))
226
+
227
+ logger.info("query_handlers_registered")
@@ -0,0 +1,11 @@
1
+ """Read models for optimized queries."""
2
+
3
+ from .provider_views import HealthInfo, ProviderDetails, ProviderSummary, SystemMetrics, ToolInfo
4
+
5
+ __all__ = [
6
+ "ProviderSummary",
7
+ "ProviderDetails",
8
+ "ToolInfo",
9
+ "HealthInfo",
10
+ "SystemMetrics",
11
+ ]
@@ -0,0 +1,139 @@
1
+ """Read models (views) for provider queries.
2
+
3
+ Read models are optimized for specific query use cases.
4
+ They are immutable and contain only the data needed for display.
5
+ """
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Any, Dict, List, Optional
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class ToolInfo:
13
+ """Read model for tool information."""
14
+
15
+ name: str
16
+ description: str
17
+ input_schema: Dict[str, Any]
18
+ output_schema: Optional[Dict[str, Any]] = None
19
+
20
+ def to_dict(self) -> Dict[str, Any]:
21
+ """Convert to dictionary."""
22
+ result = {
23
+ "name": self.name,
24
+ "description": self.description,
25
+ "inputSchema": self.input_schema,
26
+ }
27
+ if self.output_schema is not None:
28
+ result["outputSchema"] = self.output_schema
29
+ return result
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class HealthInfo:
34
+ """Read model for health information."""
35
+
36
+ consecutive_failures: int
37
+ total_invocations: int
38
+ total_failures: int
39
+ success_rate: float
40
+ can_retry: bool
41
+ last_success_ago: Optional[float] = None # seconds since last success
42
+ last_failure_ago: Optional[float] = None # seconds since last failure
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert to dictionary."""
46
+ return {
47
+ "consecutive_failures": self.consecutive_failures,
48
+ "total_invocations": self.total_invocations,
49
+ "total_failures": self.total_failures,
50
+ "success_rate": self.success_rate,
51
+ "can_retry": self.can_retry,
52
+ "last_success_ago": self.last_success_ago,
53
+ "last_failure_ago": self.last_failure_ago,
54
+ }
55
+
56
+
57
+ @dataclass(frozen=True)
58
+ class ProviderSummary:
59
+ """Read model for provider list view.
60
+
61
+ Contains minimal information for listing providers.
62
+ """
63
+
64
+ provider_id: str
65
+ state: str
66
+ mode: str
67
+ is_alive: bool
68
+ tools_count: int
69
+ health_status: str # healthy, degraded, unhealthy
70
+ description: Optional[str] = None
71
+ tools_predefined: bool = False # True if tools were defined in config (no startup needed)
72
+
73
+ def to_dict(self) -> Dict[str, Any]:
74
+ """Convert to dictionary."""
75
+ result = {
76
+ "provider": self.provider_id,
77
+ "state": self.state,
78
+ "mode": self.mode,
79
+ "alive": self.is_alive,
80
+ "tools_count": self.tools_count,
81
+ "health_status": self.health_status,
82
+ "tools_predefined": self.tools_predefined,
83
+ }
84
+ if self.description:
85
+ result["description"] = self.description
86
+ return result
87
+
88
+
89
+ @dataclass(frozen=True)
90
+ class ProviderDetails:
91
+ """Read model for detailed provider view.
92
+
93
+ Contains full information about a provider.
94
+ """
95
+
96
+ provider_id: str
97
+ state: str
98
+ mode: str
99
+ is_alive: bool
100
+ tools: List[ToolInfo]
101
+ health: HealthInfo
102
+ idle_time: float
103
+ meta: Dict[str, Any] = field(default_factory=dict)
104
+
105
+ def to_dict(self) -> Dict[str, Any]:
106
+ """Convert to dictionary."""
107
+ return {
108
+ "provider": self.provider_id,
109
+ "state": self.state,
110
+ "mode": self.mode,
111
+ "alive": self.is_alive,
112
+ "tools": [t.to_dict() for t in self.tools],
113
+ "health": self.health.to_dict(),
114
+ "idle_time": self.idle_time,
115
+ "meta": self.meta,
116
+ }
117
+
118
+
119
+ @dataclass(frozen=True)
120
+ class SystemMetrics:
121
+ """Read model for system-wide metrics."""
122
+
123
+ total_providers: int
124
+ providers_by_state: Dict[str, int]
125
+ total_tools: int
126
+ total_invocations: int
127
+ total_failures: int
128
+ overall_success_rate: float
129
+
130
+ def to_dict(self) -> Dict[str, Any]:
131
+ """Convert to dictionary."""
132
+ return {
133
+ "total_providers": self.total_providers,
134
+ "providers_by_state": self.providers_by_state,
135
+ "total_tools": self.total_tools,
136
+ "total_invocations": self.total_invocations,
137
+ "total_failures": self.total_failures,
138
+ "overall_success_rate": self.overall_success_rate,
139
+ }
@@ -0,0 +1,11 @@
1
+ """Sagas for orchestrating complex provider workflows."""
2
+
3
+ from .group_rebalance_saga import GroupRebalanceSaga
4
+ from .provider_failover_saga import ProviderFailoverSaga
5
+ from .provider_recovery_saga import ProviderRecoverySaga
6
+
7
+ __all__ = [
8
+ "ProviderRecoverySaga",
9
+ "ProviderFailoverSaga",
10
+ "GroupRebalanceSaga",
11
+ ]
@@ -0,0 +1,137 @@
1
+ """Group Rebalance Saga - automatically rebalances groups based on events.
2
+
3
+ This saga listens for provider health events and updates group member
4
+ rotation status. The actual logic is delegated to ProviderGroup methods.
5
+
6
+ Note: Most of the group health management is already handled by ProviderGroup
7
+ through report_success() and report_failure() calls. This saga primarily
8
+ serves as an event-driven bridge for external events (like health checks)
9
+ that may not flow through the standard invoke path.
10
+ """
11
+
12
+ from typing import Callable, Dict, List, Optional, Type, TYPE_CHECKING
13
+
14
+ from ...domain.events import (
15
+ DomainEvent,
16
+ HealthCheckFailed,
17
+ HealthCheckPassed,
18
+ ProviderDegraded,
19
+ ProviderStarted,
20
+ ProviderStopped,
21
+ )
22
+ from ...infrastructure.saga_manager import EventTriggeredSaga
23
+ from ...logging_config import get_logger
24
+ from ..commands import Command
25
+
26
+ if TYPE_CHECKING:
27
+ from ...domain.model.provider_group import ProviderGroup
28
+
29
+ logger = get_logger(__name__)
30
+
31
+
32
+ class GroupRebalanceSaga(EventTriggeredSaga):
33
+ """
34
+ Saga that observes provider events for group members.
35
+
36
+ This saga tracks which providers belong to which groups and logs
37
+ relevant events. The actual rotation management is handled by
38
+ ProviderGroup through its report_success/report_failure methods.
39
+
40
+ The saga can optionally execute direct actions on groups if provided
41
+ with a groups reference.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ group_lookup: Optional[Callable[[str], Optional[str]]] = None,
47
+ groups: Optional[Dict[str, "ProviderGroup"]] = None,
48
+ ):
49
+ """
50
+ Initialize the saga.
51
+
52
+ Args:
53
+ group_lookup: Function that takes a member_id and returns
54
+ the group_id it belongs to, or None.
55
+ groups: Direct reference to groups dict for applying changes.
56
+ """
57
+ super().__init__()
58
+ self._group_lookup = group_lookup
59
+ self._groups = groups
60
+ self._member_to_group: Dict[str, str] = {}
61
+
62
+ @property
63
+ def saga_type(self) -> str:
64
+ return "group_rebalance"
65
+
66
+ @property
67
+ def handled_events(self) -> List[Type[DomainEvent]]:
68
+ return [
69
+ ProviderStarted,
70
+ ProviderStopped,
71
+ ProviderDegraded,
72
+ HealthCheckPassed,
73
+ HealthCheckFailed,
74
+ ]
75
+
76
+ def register_member(self, member_id: str, group_id: str) -> None:
77
+ """Register a member-to-group mapping."""
78
+ self._member_to_group[member_id] = group_id
79
+
80
+ def unregister_member(self, member_id: str) -> None:
81
+ """Unregister a member from the mapping."""
82
+ self._member_to_group.pop(member_id, None)
83
+
84
+ def _get_group_id(self, member_id: str) -> Optional[str]:
85
+ """Get the group ID for a member."""
86
+ group_id = self._member_to_group.get(member_id)
87
+ if group_id:
88
+ return group_id
89
+ if self._group_lookup:
90
+ return self._group_lookup(member_id)
91
+ return None
92
+
93
+ def _get_group(self, group_id: str) -> Optional["ProviderGroup"]:
94
+ """Get group instance if available."""
95
+ if self._groups:
96
+ return self._groups.get(group_id)
97
+ return None
98
+
99
+ def handle(self, event: DomainEvent) -> List[Command]:
100
+ """
101
+ Handle provider events that affect group membership.
102
+
103
+ Returns empty list as we apply changes directly to groups
104
+ rather than emitting commands.
105
+ """
106
+ provider_id = getattr(event, "provider_id", None)
107
+ if not provider_id:
108
+ return []
109
+
110
+ group_id = self._get_group_id(provider_id)
111
+ if not group_id:
112
+ return []
113
+
114
+ group = self._get_group(group_id)
115
+
116
+ if isinstance(event, ProviderStarted):
117
+ logger.info(f"Member {provider_id} started in group {group_id}")
118
+ if group:
119
+ group.report_success(provider_id)
120
+
121
+ elif isinstance(event, (ProviderStopped, ProviderDegraded)):
122
+ reason = getattr(event, "reason", "unknown")
123
+ logger.info(f"Member {provider_id} unavailable in group {group_id}: {reason}")
124
+ if group:
125
+ group.report_failure(provider_id)
126
+
127
+ elif isinstance(event, HealthCheckPassed):
128
+ logger.debug(f"Health check passed for {provider_id} in group {group_id}")
129
+ if group:
130
+ group.report_success(provider_id)
131
+
132
+ elif isinstance(event, HealthCheckFailed):
133
+ logger.debug(f"Health check failed for {provider_id} in group {group_id}")
134
+ if group:
135
+ group.report_failure(provider_id)
136
+
137
+ return []