agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
atr/health.py
ADDED
|
@@ -0,0 +1,557 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Health check functionality for ATR tools.
|
|
5
|
+
|
|
6
|
+
Provides mechanisms to verify external tools and APIs are available
|
|
7
|
+
before execution.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime, timedelta
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Any, Callable, Dict, Optional, Union
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class HealthStatus(str, Enum):
|
|
23
|
+
"""Health check status values."""
|
|
24
|
+
|
|
25
|
+
HEALTHY = "healthy"
|
|
26
|
+
DEGRADED = "degraded"
|
|
27
|
+
UNHEALTHY = "unhealthy"
|
|
28
|
+
UNKNOWN = "unknown"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class HealthCheckResult:
|
|
33
|
+
"""Result of a health check.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
status: Overall health status.
|
|
37
|
+
message: Human-readable status message.
|
|
38
|
+
latency_ms: Check latency in milliseconds.
|
|
39
|
+
timestamp: When the check was performed.
|
|
40
|
+
details: Additional details about the check.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
status: HealthStatus
|
|
44
|
+
message: str = ""
|
|
45
|
+
latency_ms: Optional[float] = None
|
|
46
|
+
timestamp: datetime = field(default_factory=datetime.now)
|
|
47
|
+
details: Dict[str, Any] = field(default_factory=dict)
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def is_healthy(self) -> bool:
|
|
51
|
+
"""Check if status indicates tool is usable."""
|
|
52
|
+
return self.status in (HealthStatus.HEALTHY, HealthStatus.DEGRADED)
|
|
53
|
+
|
|
54
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
55
|
+
"""Convert to dictionary."""
|
|
56
|
+
return {
|
|
57
|
+
"status": self.status.value,
|
|
58
|
+
"message": self.message,
|
|
59
|
+
"latency_ms": self.latency_ms,
|
|
60
|
+
"timestamp": self.timestamp.isoformat(),
|
|
61
|
+
"details": self.details,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class HealthCheck(ABC):
|
|
66
|
+
"""Abstract base class for health checks.
|
|
67
|
+
|
|
68
|
+
Subclass this to create custom health checks for your tools.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@abstractmethod
|
|
72
|
+
def check(self) -> HealthCheckResult:
|
|
73
|
+
"""Perform the health check synchronously.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
HealthCheckResult with status and details.
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
async def check_async(self) -> HealthCheckResult:
|
|
81
|
+
"""Perform the health check asynchronously.
|
|
82
|
+
|
|
83
|
+
Default implementation runs sync check in executor.
|
|
84
|
+
Override for true async checks.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
HealthCheckResult with status and details.
|
|
88
|
+
"""
|
|
89
|
+
loop = asyncio.get_event_loop()
|
|
90
|
+
return await loop.run_in_executor(None, self.check)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class HttpHealthCheck(HealthCheck):
|
|
94
|
+
"""Health check that verifies an HTTP endpoint is responding.
|
|
95
|
+
|
|
96
|
+
Example:
|
|
97
|
+
>>> check = HttpHealthCheck(
|
|
98
|
+
... url="https://api.example.com/health",
|
|
99
|
+
... timeout=5.0,
|
|
100
|
+
... expected_status=200
|
|
101
|
+
... )
|
|
102
|
+
>>> result = check.check()
|
|
103
|
+
>>> print(result.status)
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
url: str,
|
|
109
|
+
timeout: float = 5.0,
|
|
110
|
+
expected_status: int = 200,
|
|
111
|
+
method: str = "GET",
|
|
112
|
+
headers: Optional[Dict[str, str]] = None,
|
|
113
|
+
):
|
|
114
|
+
"""Initialize HTTP health check.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
url: The URL to check.
|
|
118
|
+
timeout: Request timeout in seconds.
|
|
119
|
+
expected_status: Expected HTTP status code.
|
|
120
|
+
method: HTTP method to use.
|
|
121
|
+
headers: Optional headers to include.
|
|
122
|
+
"""
|
|
123
|
+
self.url = url
|
|
124
|
+
self.timeout = timeout
|
|
125
|
+
self.expected_status = expected_status
|
|
126
|
+
self.method = method
|
|
127
|
+
self.headers = headers or {}
|
|
128
|
+
|
|
129
|
+
def check(self) -> HealthCheckResult:
|
|
130
|
+
"""Perform HTTP health check."""
|
|
131
|
+
import urllib.error
|
|
132
|
+
import urllib.request
|
|
133
|
+
|
|
134
|
+
start_time = time.perf_counter()
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
request = urllib.request.Request(self.url, method=self.method, headers=self.headers)
|
|
138
|
+
|
|
139
|
+
with urllib.request.urlopen(request, timeout=self.timeout) as response:
|
|
140
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
141
|
+
status_code = response.status
|
|
142
|
+
|
|
143
|
+
if status_code == self.expected_status:
|
|
144
|
+
return HealthCheckResult(
|
|
145
|
+
status=HealthStatus.HEALTHY,
|
|
146
|
+
message=f"HTTP {status_code} OK",
|
|
147
|
+
latency_ms=latency_ms,
|
|
148
|
+
details={"url": self.url, "status_code": status_code},
|
|
149
|
+
)
|
|
150
|
+
else:
|
|
151
|
+
return HealthCheckResult(
|
|
152
|
+
status=HealthStatus.DEGRADED,
|
|
153
|
+
message=f"Unexpected status: {status_code}",
|
|
154
|
+
latency_ms=latency_ms,
|
|
155
|
+
details={"url": self.url, "status_code": status_code},
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
except urllib.error.URLError as e:
|
|
159
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
160
|
+
return HealthCheckResult(
|
|
161
|
+
status=HealthStatus.UNHEALTHY,
|
|
162
|
+
message=f"Connection failed: {e.reason}",
|
|
163
|
+
latency_ms=latency_ms,
|
|
164
|
+
details={"url": self.url, "error": str(e)},
|
|
165
|
+
)
|
|
166
|
+
except Exception as e:
|
|
167
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
168
|
+
return HealthCheckResult(
|
|
169
|
+
status=HealthStatus.UNHEALTHY,
|
|
170
|
+
message=f"Check failed: {str(e)}",
|
|
171
|
+
latency_ms=latency_ms,
|
|
172
|
+
details={"url": self.url, "error": str(e)},
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class TcpHealthCheck(HealthCheck):
|
|
177
|
+
"""Health check that verifies a TCP port is accepting connections.
|
|
178
|
+
|
|
179
|
+
Example:
|
|
180
|
+
>>> check = TcpHealthCheck(host="localhost", port=5432)
|
|
181
|
+
>>> result = check.check()
|
|
182
|
+
"""
|
|
183
|
+
|
|
184
|
+
def __init__(self, host: str, port: int, timeout: float = 5.0):
|
|
185
|
+
"""Initialize TCP health check.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
host: The host to connect to.
|
|
189
|
+
port: The port to check.
|
|
190
|
+
timeout: Connection timeout in seconds.
|
|
191
|
+
"""
|
|
192
|
+
self.host = host
|
|
193
|
+
self.port = port
|
|
194
|
+
self.timeout = timeout
|
|
195
|
+
|
|
196
|
+
def check(self) -> HealthCheckResult:
|
|
197
|
+
"""Perform TCP health check."""
|
|
198
|
+
import socket
|
|
199
|
+
|
|
200
|
+
start_time = time.perf_counter()
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
204
|
+
sock.settimeout(self.timeout)
|
|
205
|
+
result = sock.connect_ex((self.host, self.port))
|
|
206
|
+
sock.close()
|
|
207
|
+
|
|
208
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
209
|
+
|
|
210
|
+
if result == 0:
|
|
211
|
+
return HealthCheckResult(
|
|
212
|
+
status=HealthStatus.HEALTHY,
|
|
213
|
+
message=f"Port {self.port} is open",
|
|
214
|
+
latency_ms=latency_ms,
|
|
215
|
+
details={"host": self.host, "port": self.port},
|
|
216
|
+
)
|
|
217
|
+
else:
|
|
218
|
+
return HealthCheckResult(
|
|
219
|
+
status=HealthStatus.UNHEALTHY,
|
|
220
|
+
message=f"Port {self.port} is closed",
|
|
221
|
+
latency_ms=latency_ms,
|
|
222
|
+
details={"host": self.host, "port": self.port, "error_code": result},
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
except socket.timeout:
|
|
226
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
227
|
+
return HealthCheckResult(
|
|
228
|
+
status=HealthStatus.UNHEALTHY,
|
|
229
|
+
message="Connection timed out",
|
|
230
|
+
latency_ms=latency_ms,
|
|
231
|
+
details={"host": self.host, "port": self.port},
|
|
232
|
+
)
|
|
233
|
+
except Exception as e:
|
|
234
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
235
|
+
return HealthCheckResult(
|
|
236
|
+
status=HealthStatus.UNHEALTHY,
|
|
237
|
+
message=f"Check failed: {str(e)}",
|
|
238
|
+
latency_ms=latency_ms,
|
|
239
|
+
details={"host": self.host, "port": self.port, "error": str(e)},
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class CallableHealthCheck(HealthCheck):
|
|
244
|
+
"""Health check using a custom callable.
|
|
245
|
+
|
|
246
|
+
Example:
|
|
247
|
+
>>> def check_database():
|
|
248
|
+
... # Custom check logic
|
|
249
|
+
... return True, "Database OK"
|
|
250
|
+
>>>
|
|
251
|
+
>>> check = CallableHealthCheck(check_database)
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
def __init__(self, func: Callable[[], Union[bool, tuple]], name: str = "custom"):
|
|
255
|
+
"""Initialize callable health check.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
func: Callable that returns bool or (bool, message) tuple.
|
|
259
|
+
name: Name for this check.
|
|
260
|
+
"""
|
|
261
|
+
self.func = func
|
|
262
|
+
self.name = name
|
|
263
|
+
|
|
264
|
+
def check(self) -> HealthCheckResult:
|
|
265
|
+
"""Perform the custom health check."""
|
|
266
|
+
start_time = time.perf_counter()
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
result = self.func()
|
|
270
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
271
|
+
|
|
272
|
+
if isinstance(result, bool):
|
|
273
|
+
healthy = result
|
|
274
|
+
message = "Check passed" if healthy else "Check failed"
|
|
275
|
+
elif isinstance(result, tuple) and len(result) >= 2:
|
|
276
|
+
healthy = result[0]
|
|
277
|
+
message = result[1]
|
|
278
|
+
else:
|
|
279
|
+
healthy = bool(result)
|
|
280
|
+
message = str(result)
|
|
281
|
+
|
|
282
|
+
return HealthCheckResult(
|
|
283
|
+
status=HealthStatus.HEALTHY if healthy else HealthStatus.UNHEALTHY,
|
|
284
|
+
message=message,
|
|
285
|
+
latency_ms=latency_ms,
|
|
286
|
+
details={"check_name": self.name},
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
latency_ms = (time.perf_counter() - start_time) * 1000
|
|
291
|
+
return HealthCheckResult(
|
|
292
|
+
status=HealthStatus.UNHEALTHY,
|
|
293
|
+
message=f"Check raised exception: {str(e)}",
|
|
294
|
+
latency_ms=latency_ms,
|
|
295
|
+
details={"check_name": self.name, "error": str(e)},
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@dataclass
|
|
300
|
+
class CachedHealthResult:
|
|
301
|
+
"""Cached health check result with TTL."""
|
|
302
|
+
|
|
303
|
+
result: HealthCheckResult
|
|
304
|
+
expires_at: datetime
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class HealthCheckRegistry:
|
|
308
|
+
"""Registry for managing tool health checks.
|
|
309
|
+
|
|
310
|
+
Provides caching, background checking, and aggregated health status.
|
|
311
|
+
|
|
312
|
+
Example:
|
|
313
|
+
>>> registry = HealthCheckRegistry()
|
|
314
|
+
>>> registry.register("api_tool", HttpHealthCheck("https://api.example.com/health"))
|
|
315
|
+
>>>
|
|
316
|
+
>>> # Check all tools
|
|
317
|
+
>>> status = registry.check_all()
|
|
318
|
+
>>> for name, result in status.items():
|
|
319
|
+
... print(f"{name}: {result.status}")
|
|
320
|
+
"""
|
|
321
|
+
|
|
322
|
+
def __init__(self, cache_ttl: timedelta = timedelta(seconds=30), check_timeout: float = 10.0):
|
|
323
|
+
"""Initialize health check registry.
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
cache_ttl: How long to cache health check results.
|
|
327
|
+
check_timeout: Default timeout for health checks.
|
|
328
|
+
"""
|
|
329
|
+
self._checks: Dict[str, HealthCheck] = {}
|
|
330
|
+
self._cache: Dict[str, CachedHealthResult] = {}
|
|
331
|
+
self._cache_ttl = cache_ttl
|
|
332
|
+
self._check_timeout = check_timeout
|
|
333
|
+
self._lock = threading.RLock()
|
|
334
|
+
self._background_thread: Optional[threading.Thread] = None
|
|
335
|
+
self._stop_event = threading.Event()
|
|
336
|
+
|
|
337
|
+
def register(self, tool_name: str, check: Union[HealthCheck, Callable[[], bool], str]) -> None:
|
|
338
|
+
"""Register a health check for a tool.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
tool_name: Name of the tool.
|
|
342
|
+
check: HealthCheck instance, callable, or URL string.
|
|
343
|
+
"""
|
|
344
|
+
with self._lock:
|
|
345
|
+
if isinstance(check, str):
|
|
346
|
+
# Assume it's a URL for HTTP check
|
|
347
|
+
check = HttpHealthCheck(check)
|
|
348
|
+
elif callable(check) and not isinstance(check, HealthCheck):
|
|
349
|
+
check = CallableHealthCheck(check, name=tool_name)
|
|
350
|
+
|
|
351
|
+
self._checks[tool_name] = check
|
|
352
|
+
|
|
353
|
+
def unregister(self, tool_name: str) -> bool:
|
|
354
|
+
"""Unregister a health check.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
tool_name: Name of the tool.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
True if was registered, False otherwise.
|
|
361
|
+
"""
|
|
362
|
+
with self._lock:
|
|
363
|
+
if tool_name in self._checks:
|
|
364
|
+
del self._checks[tool_name]
|
|
365
|
+
self._cache.pop(tool_name, None)
|
|
366
|
+
return True
|
|
367
|
+
return False
|
|
368
|
+
|
|
369
|
+
def check(self, tool_name: str, use_cache: bool = True) -> HealthCheckResult:
|
|
370
|
+
"""Check health of a specific tool.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
tool_name: Name of the tool.
|
|
374
|
+
use_cache: Whether to use cached result if available.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
HealthCheckResult.
|
|
378
|
+
"""
|
|
379
|
+
with self._lock:
|
|
380
|
+
if tool_name not in self._checks:
|
|
381
|
+
return HealthCheckResult(
|
|
382
|
+
status=HealthStatus.UNKNOWN,
|
|
383
|
+
message=f"No health check registered for '{tool_name}'",
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Check cache
|
|
387
|
+
if use_cache and tool_name in self._cache:
|
|
388
|
+
cached = self._cache[tool_name]
|
|
389
|
+
if datetime.now() < cached.expires_at:
|
|
390
|
+
return cached.result
|
|
391
|
+
|
|
392
|
+
# Perform check
|
|
393
|
+
check = self._checks[tool_name]
|
|
394
|
+
|
|
395
|
+
result = check.check()
|
|
396
|
+
|
|
397
|
+
# Update cache
|
|
398
|
+
with self._lock:
|
|
399
|
+
self._cache[tool_name] = CachedHealthResult(
|
|
400
|
+
result=result, expires_at=datetime.now() + self._cache_ttl
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
return result
|
|
404
|
+
|
|
405
|
+
async def check_async(self, tool_name: str, use_cache: bool = True) -> HealthCheckResult:
|
|
406
|
+
"""Check health of a specific tool asynchronously.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
tool_name: Name of the tool.
|
|
410
|
+
use_cache: Whether to use cached result if available.
|
|
411
|
+
|
|
412
|
+
Returns:
|
|
413
|
+
HealthCheckResult.
|
|
414
|
+
"""
|
|
415
|
+
with self._lock:
|
|
416
|
+
if tool_name not in self._checks:
|
|
417
|
+
return HealthCheckResult(
|
|
418
|
+
status=HealthStatus.UNKNOWN,
|
|
419
|
+
message=f"No health check registered for '{tool_name}'",
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
# Check cache
|
|
423
|
+
if use_cache and tool_name in self._cache:
|
|
424
|
+
cached = self._cache[tool_name]
|
|
425
|
+
if datetime.now() < cached.expires_at:
|
|
426
|
+
return cached.result
|
|
427
|
+
|
|
428
|
+
check = self._checks[tool_name]
|
|
429
|
+
|
|
430
|
+
result = await check.check_async()
|
|
431
|
+
|
|
432
|
+
# Update cache
|
|
433
|
+
with self._lock:
|
|
434
|
+
self._cache[tool_name] = CachedHealthResult(
|
|
435
|
+
result=result, expires_at=datetime.now() + self._cache_ttl
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
return result
|
|
439
|
+
|
|
440
|
+
def check_all(self, use_cache: bool = True) -> Dict[str, HealthCheckResult]:
|
|
441
|
+
"""Check health of all registered tools.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
use_cache: Whether to use cached results.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
Dictionary mapping tool names to results.
|
|
448
|
+
"""
|
|
449
|
+
with self._lock:
|
|
450
|
+
tool_names = list(self._checks.keys())
|
|
451
|
+
|
|
452
|
+
return {name: self.check(name, use_cache) for name in tool_names}
|
|
453
|
+
|
|
454
|
+
async def check_all_async(self, use_cache: bool = True) -> Dict[str, HealthCheckResult]:
|
|
455
|
+
"""Check health of all registered tools asynchronously.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
use_cache: Whether to use cached results.
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Dictionary mapping tool names to results.
|
|
462
|
+
"""
|
|
463
|
+
with self._lock:
|
|
464
|
+
tool_names = list(self._checks.keys())
|
|
465
|
+
|
|
466
|
+
tasks = [self.check_async(name, use_cache) for name in tool_names]
|
|
467
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
468
|
+
|
|
469
|
+
health_results = {}
|
|
470
|
+
for name, result in zip(tool_names, results):
|
|
471
|
+
if isinstance(result, Exception):
|
|
472
|
+
health_results[name] = HealthCheckResult(
|
|
473
|
+
status=HealthStatus.UNHEALTHY, message=f"Check failed: {str(result)}"
|
|
474
|
+
)
|
|
475
|
+
else:
|
|
476
|
+
health_results[name] = result
|
|
477
|
+
|
|
478
|
+
return health_results
|
|
479
|
+
|
|
480
|
+
def get_overall_status(self) -> HealthStatus:
|
|
481
|
+
"""Get overall health status across all tools.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
HEALTHY if all healthy, DEGRADED if any degraded, UNHEALTHY if any unhealthy.
|
|
485
|
+
"""
|
|
486
|
+
results = self.check_all()
|
|
487
|
+
|
|
488
|
+
if not results:
|
|
489
|
+
return HealthStatus.UNKNOWN
|
|
490
|
+
|
|
491
|
+
statuses = [r.status for r in results.values()]
|
|
492
|
+
|
|
493
|
+
if all(s == HealthStatus.HEALTHY for s in statuses):
|
|
494
|
+
return HealthStatus.HEALTHY
|
|
495
|
+
elif any(s == HealthStatus.UNHEALTHY for s in statuses):
|
|
496
|
+
return HealthStatus.UNHEALTHY
|
|
497
|
+
elif any(s == HealthStatus.DEGRADED for s in statuses):
|
|
498
|
+
return HealthStatus.DEGRADED
|
|
499
|
+
else:
|
|
500
|
+
return HealthStatus.UNKNOWN
|
|
501
|
+
|
|
502
|
+
def start_background_checks(self, interval: timedelta = timedelta(seconds=30)) -> None:
|
|
503
|
+
"""Start background health checks.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
interval: How often to run checks.
|
|
507
|
+
"""
|
|
508
|
+
if self._background_thread is not None and self._background_thread.is_alive():
|
|
509
|
+
return
|
|
510
|
+
|
|
511
|
+
self._stop_event.clear()
|
|
512
|
+
|
|
513
|
+
def check_loop():
|
|
514
|
+
import contextlib
|
|
515
|
+
|
|
516
|
+
while not self._stop_event.is_set():
|
|
517
|
+
with contextlib.suppress(Exception):
|
|
518
|
+
self.check_all(use_cache=False)
|
|
519
|
+
self._stop_event.wait(interval.total_seconds())
|
|
520
|
+
|
|
521
|
+
self._background_thread = threading.Thread(target=check_loop, daemon=True)
|
|
522
|
+
self._background_thread.start()
|
|
523
|
+
|
|
524
|
+
def stop_background_checks(self) -> None:
|
|
525
|
+
"""Stop background health checks."""
|
|
526
|
+
self._stop_event.set()
|
|
527
|
+
if self._background_thread is not None:
|
|
528
|
+
self._background_thread.join(timeout=5.0)
|
|
529
|
+
self._background_thread = None
|
|
530
|
+
|
|
531
|
+
def clear_cache(self) -> None:
|
|
532
|
+
"""Clear the health check cache."""
|
|
533
|
+
with self._lock:
|
|
534
|
+
self._cache.clear()
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
# Global health check registry
|
|
538
|
+
_global_health_registry: HealthCheckRegistry = HealthCheckRegistry()
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def get_health_registry() -> HealthCheckRegistry:
|
|
542
|
+
"""Get the global health check registry.
|
|
543
|
+
|
|
544
|
+
Returns:
|
|
545
|
+
The global HealthCheckRegistry instance.
|
|
546
|
+
"""
|
|
547
|
+
return _global_health_registry
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def set_health_registry(registry: HealthCheckRegistry) -> None:
|
|
551
|
+
"""Set the global health check registry.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
registry: The registry to use globally.
|
|
555
|
+
"""
|
|
556
|
+
global _global_health_registry
|
|
557
|
+
_global_health_registry = registry
|