agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
atr/metrics.py
ADDED
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Metrics collection for ATR tools.
|
|
5
|
+
|
|
6
|
+
Provides latency tracking, error rate monitoring, and usage statistics.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import threading
|
|
12
|
+
import time
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class MetricType(str, Enum):
|
|
21
|
+
"""Types of metrics collected."""
|
|
22
|
+
|
|
23
|
+
CALL_COUNT = "call_count"
|
|
24
|
+
SUCCESS_COUNT = "success_count"
|
|
25
|
+
ERROR_COUNT = "error_count"
|
|
26
|
+
LATENCY = "latency"
|
|
27
|
+
RATE_LIMITED = "rate_limited"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class ToolMetrics:
|
|
32
|
+
"""Metrics for a single tool.
|
|
33
|
+
|
|
34
|
+
Attributes:
|
|
35
|
+
name: Tool name.
|
|
36
|
+
total_calls: Total number of calls.
|
|
37
|
+
successful_calls: Number of successful calls.
|
|
38
|
+
failed_calls: Number of failed calls.
|
|
39
|
+
rate_limited_calls: Number of rate-limited calls.
|
|
40
|
+
total_latency_ms: Total latency in milliseconds.
|
|
41
|
+
min_latency_ms: Minimum latency.
|
|
42
|
+
max_latency_ms: Maximum latency.
|
|
43
|
+
last_called: Timestamp of last call.
|
|
44
|
+
last_error: Last error message if any.
|
|
45
|
+
error_types: Count of each error type.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
name: str
|
|
49
|
+
total_calls: int = 0
|
|
50
|
+
successful_calls: int = 0
|
|
51
|
+
failed_calls: int = 0
|
|
52
|
+
rate_limited_calls: int = 0
|
|
53
|
+
total_latency_ms: float = 0.0
|
|
54
|
+
min_latency_ms: Optional[float] = None
|
|
55
|
+
max_latency_ms: Optional[float] = None
|
|
56
|
+
last_called: Optional[datetime] = None
|
|
57
|
+
last_error: Optional[str] = None
|
|
58
|
+
error_types: Dict[str, int] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def avg_latency_ms(self) -> Optional[float]:
|
|
62
|
+
"""Average latency in milliseconds."""
|
|
63
|
+
if self.total_calls == 0:
|
|
64
|
+
return None
|
|
65
|
+
return self.total_latency_ms / self.total_calls
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def success_rate(self) -> Optional[float]:
|
|
69
|
+
"""Success rate as a percentage (0-100)."""
|
|
70
|
+
if self.total_calls == 0:
|
|
71
|
+
return None
|
|
72
|
+
return (self.successful_calls / self.total_calls) * 100
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def error_rate(self) -> Optional[float]:
|
|
76
|
+
"""Error rate as a percentage (0-100)."""
|
|
77
|
+
if self.total_calls == 0:
|
|
78
|
+
return None
|
|
79
|
+
return (self.failed_calls / self.total_calls) * 100
|
|
80
|
+
|
|
81
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
82
|
+
"""Convert to dictionary."""
|
|
83
|
+
return {
|
|
84
|
+
"name": self.name,
|
|
85
|
+
"total_calls": self.total_calls,
|
|
86
|
+
"successful_calls": self.successful_calls,
|
|
87
|
+
"failed_calls": self.failed_calls,
|
|
88
|
+
"rate_limited_calls": self.rate_limited_calls,
|
|
89
|
+
"avg_latency_ms": self.avg_latency_ms,
|
|
90
|
+
"min_latency_ms": self.min_latency_ms,
|
|
91
|
+
"max_latency_ms": self.max_latency_ms,
|
|
92
|
+
"success_rate": self.success_rate,
|
|
93
|
+
"error_rate": self.error_rate,
|
|
94
|
+
"last_called": self.last_called.isoformat() if self.last_called else None,
|
|
95
|
+
"last_error": self.last_error,
|
|
96
|
+
"error_types": dict(self.error_types),
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@dataclass
|
|
101
|
+
class TimeWindowMetrics:
|
|
102
|
+
"""Metrics within a specific time window."""
|
|
103
|
+
|
|
104
|
+
window_start: datetime
|
|
105
|
+
window_end: datetime
|
|
106
|
+
call_count: int = 0
|
|
107
|
+
success_count: int = 0
|
|
108
|
+
error_count: int = 0
|
|
109
|
+
total_latency_ms: float = 0.0
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def avg_latency_ms(self) -> Optional[float]:
|
|
113
|
+
if self.call_count == 0:
|
|
114
|
+
return None
|
|
115
|
+
return self.total_latency_ms / self.call_count
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class MetricsCollector:
|
|
119
|
+
"""Collects and aggregates tool metrics.
|
|
120
|
+
|
|
121
|
+
Thread-safe collector that tracks tool execution metrics
|
|
122
|
+
including call counts, latencies, and error rates.
|
|
123
|
+
|
|
124
|
+
Example:
|
|
125
|
+
>>> collector = MetricsCollector()
|
|
126
|
+
>>>
|
|
127
|
+
>>> # Record a successful call
|
|
128
|
+
>>> collector.record_call("my_tool", latency_ms=150.0, success=True)
|
|
129
|
+
>>>
|
|
130
|
+
>>> # Get metrics
|
|
131
|
+
>>> metrics = collector.get_metrics("my_tool")
|
|
132
|
+
>>> print(f"Average latency: {metrics.avg_latency_ms}ms")
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
def __init__(self, retention_period: timedelta = timedelta(hours=24)):
|
|
136
|
+
"""Initialize collector.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
retention_period: How long to retain detailed time-series data.
|
|
140
|
+
"""
|
|
141
|
+
self._metrics: Dict[str, ToolMetrics] = {}
|
|
142
|
+
self._time_series: Dict[str, List[Tuple[datetime, Dict[str, Any]]]] = defaultdict(list)
|
|
143
|
+
self._retention_period = retention_period
|
|
144
|
+
self._lock = threading.RLock()
|
|
145
|
+
self._callbacks: List[Callable[[str, Dict[str, Any]], None]] = []
|
|
146
|
+
|
|
147
|
+
def record_call(
|
|
148
|
+
self,
|
|
149
|
+
tool_name: str,
|
|
150
|
+
latency_ms: float,
|
|
151
|
+
success: bool,
|
|
152
|
+
error: Optional[Exception] = None,
|
|
153
|
+
rate_limited: bool = False,
|
|
154
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
155
|
+
) -> None:
|
|
156
|
+
"""Record a tool call.
|
|
157
|
+
|
|
158
|
+
Args:
|
|
159
|
+
tool_name: Name of the tool.
|
|
160
|
+
latency_ms: Call latency in milliseconds.
|
|
161
|
+
success: Whether the call succeeded.
|
|
162
|
+
error: Exception if call failed.
|
|
163
|
+
rate_limited: Whether call was rate limited.
|
|
164
|
+
metadata: Additional metadata to record.
|
|
165
|
+
"""
|
|
166
|
+
now = datetime.now()
|
|
167
|
+
|
|
168
|
+
with self._lock:
|
|
169
|
+
# Get or create metrics for this tool
|
|
170
|
+
if tool_name not in self._metrics:
|
|
171
|
+
self._metrics[tool_name] = ToolMetrics(name=tool_name)
|
|
172
|
+
|
|
173
|
+
metrics = self._metrics[tool_name]
|
|
174
|
+
|
|
175
|
+
# Update counts
|
|
176
|
+
metrics.total_calls += 1
|
|
177
|
+
metrics.last_called = now
|
|
178
|
+
|
|
179
|
+
if rate_limited:
|
|
180
|
+
metrics.rate_limited_calls += 1
|
|
181
|
+
elif success:
|
|
182
|
+
metrics.successful_calls += 1
|
|
183
|
+
else:
|
|
184
|
+
metrics.failed_calls += 1
|
|
185
|
+
if error:
|
|
186
|
+
error_type = type(error).__name__
|
|
187
|
+
metrics.error_types[error_type] = metrics.error_types.get(error_type, 0) + 1
|
|
188
|
+
metrics.last_error = str(error)
|
|
189
|
+
|
|
190
|
+
# Update latency
|
|
191
|
+
metrics.total_latency_ms += latency_ms
|
|
192
|
+
if metrics.min_latency_ms is None or latency_ms < metrics.min_latency_ms:
|
|
193
|
+
metrics.min_latency_ms = latency_ms
|
|
194
|
+
if metrics.max_latency_ms is None or latency_ms > metrics.max_latency_ms:
|
|
195
|
+
metrics.max_latency_ms = latency_ms
|
|
196
|
+
|
|
197
|
+
# Store time series data
|
|
198
|
+
event = {
|
|
199
|
+
"timestamp": now,
|
|
200
|
+
"latency_ms": latency_ms,
|
|
201
|
+
"success": success,
|
|
202
|
+
"rate_limited": rate_limited,
|
|
203
|
+
"error_type": type(error).__name__ if error else None,
|
|
204
|
+
"metadata": metadata,
|
|
205
|
+
}
|
|
206
|
+
self._time_series[tool_name].append((now, event))
|
|
207
|
+
|
|
208
|
+
# Cleanup old data
|
|
209
|
+
self._cleanup_old_data(tool_name)
|
|
210
|
+
|
|
211
|
+
# Notify callbacks
|
|
212
|
+
import contextlib
|
|
213
|
+
|
|
214
|
+
for callback in self._callbacks:
|
|
215
|
+
with contextlib.suppress(Exception):
|
|
216
|
+
callback(tool_name, event)
|
|
217
|
+
|
|
218
|
+
def _cleanup_old_data(self, tool_name: str) -> None:
|
|
219
|
+
"""Remove data older than retention period."""
|
|
220
|
+
cutoff = datetime.now() - self._retention_period
|
|
221
|
+
self._time_series[tool_name] = [
|
|
222
|
+
(ts, event) for ts, event in self._time_series[tool_name] if ts > cutoff
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
def get_metrics(self, tool_name: str) -> Optional[ToolMetrics]:
|
|
226
|
+
"""Get metrics for a specific tool.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
tool_name: Name of the tool.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
ToolMetrics instance or None if tool not tracked.
|
|
233
|
+
"""
|
|
234
|
+
with self._lock:
|
|
235
|
+
return self._metrics.get(tool_name)
|
|
236
|
+
|
|
237
|
+
def get_all_metrics(self) -> Dict[str, ToolMetrics]:
|
|
238
|
+
"""Get metrics for all tools.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Dictionary mapping tool names to metrics.
|
|
242
|
+
"""
|
|
243
|
+
with self._lock:
|
|
244
|
+
return dict(self._metrics)
|
|
245
|
+
|
|
246
|
+
def get_time_window_metrics(
|
|
247
|
+
self, tool_name: str, window: timedelta
|
|
248
|
+
) -> Optional[TimeWindowMetrics]:
|
|
249
|
+
"""Get metrics for a specific time window.
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
tool_name: Name of the tool.
|
|
253
|
+
window: Time window to aggregate over.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
TimeWindowMetrics or None if no data.
|
|
257
|
+
"""
|
|
258
|
+
now = datetime.now()
|
|
259
|
+
cutoff = now - window
|
|
260
|
+
|
|
261
|
+
with self._lock:
|
|
262
|
+
if tool_name not in self._time_series:
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
events = [event for ts, event in self._time_series[tool_name] if ts > cutoff]
|
|
266
|
+
|
|
267
|
+
if not events:
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
metrics = TimeWindowMetrics(window_start=cutoff, window_end=now)
|
|
271
|
+
|
|
272
|
+
for event in events:
|
|
273
|
+
metrics.call_count += 1
|
|
274
|
+
metrics.total_latency_ms += event["latency_ms"]
|
|
275
|
+
if event["success"]:
|
|
276
|
+
metrics.success_count += 1
|
|
277
|
+
else:
|
|
278
|
+
metrics.error_count += 1
|
|
279
|
+
|
|
280
|
+
return metrics
|
|
281
|
+
|
|
282
|
+
def get_error_breakdown(
|
|
283
|
+
self, tool_name: str, window: Optional[timedelta] = None
|
|
284
|
+
) -> Dict[str, int]:
|
|
285
|
+
"""Get breakdown of error types.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
tool_name: Name of the tool.
|
|
289
|
+
window: Optional time window (None = all time).
|
|
290
|
+
|
|
291
|
+
Returns:
|
|
292
|
+
Dictionary mapping error types to counts.
|
|
293
|
+
"""
|
|
294
|
+
with self._lock:
|
|
295
|
+
if window is None:
|
|
296
|
+
metrics = self._metrics.get(tool_name)
|
|
297
|
+
return dict(metrics.error_types) if metrics else {}
|
|
298
|
+
|
|
299
|
+
cutoff = datetime.now() - window
|
|
300
|
+
error_counts: Dict[str, int] = {}
|
|
301
|
+
|
|
302
|
+
for ts, event in self._time_series.get(tool_name, []):
|
|
303
|
+
if ts > cutoff and event.get("error_type"):
|
|
304
|
+
error_type = event["error_type"]
|
|
305
|
+
error_counts[error_type] = error_counts.get(error_type, 0) + 1
|
|
306
|
+
|
|
307
|
+
return error_counts
|
|
308
|
+
|
|
309
|
+
def add_callback(self, callback: Callable[[str, Dict[str, Any]], None]) -> None:
|
|
310
|
+
"""Add a callback to be notified of new metrics.
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
callback: Function called with (tool_name, event_data).
|
|
314
|
+
"""
|
|
315
|
+
self._callbacks.append(callback)
|
|
316
|
+
|
|
317
|
+
def remove_callback(self, callback: Callable[[str, Dict[str, Any]], None]) -> None:
|
|
318
|
+
"""Remove a callback.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
callback: The callback to remove.
|
|
322
|
+
"""
|
|
323
|
+
if callback in self._callbacks:
|
|
324
|
+
self._callbacks.remove(callback)
|
|
325
|
+
|
|
326
|
+
def reset(self, tool_name: Optional[str] = None) -> None:
|
|
327
|
+
"""Reset metrics.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
tool_name: Specific tool to reset, or None for all.
|
|
331
|
+
"""
|
|
332
|
+
with self._lock:
|
|
333
|
+
if tool_name is None:
|
|
334
|
+
self._metrics.clear()
|
|
335
|
+
self._time_series.clear()
|
|
336
|
+
else:
|
|
337
|
+
self._metrics.pop(tool_name, None)
|
|
338
|
+
self._time_series.pop(tool_name, None)
|
|
339
|
+
|
|
340
|
+
def export_prometheus(self) -> str:
|
|
341
|
+
"""Export metrics in Prometheus format.
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Prometheus-compatible metrics string.
|
|
345
|
+
"""
|
|
346
|
+
lines = []
|
|
347
|
+
|
|
348
|
+
with self._lock:
|
|
349
|
+
for name, metrics in self._metrics.items():
|
|
350
|
+
safe_name = name.replace("-", "_").replace(".", "_")
|
|
351
|
+
|
|
352
|
+
lines.append("# HELP atr_tool_calls_total Total calls to tool")
|
|
353
|
+
lines.append("# TYPE atr_tool_calls_total counter")
|
|
354
|
+
lines.append(f'atr_tool_calls_total{{tool="{safe_name}"}} {metrics.total_calls}')
|
|
355
|
+
|
|
356
|
+
lines.append("# HELP atr_tool_successes_total Successful calls")
|
|
357
|
+
lines.append("# TYPE atr_tool_successes_total counter")
|
|
358
|
+
lines.append(
|
|
359
|
+
f'atr_tool_successes_total{{tool="{safe_name}"}} {metrics.successful_calls}'
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
lines.append("# HELP atr_tool_errors_total Failed calls")
|
|
363
|
+
lines.append("# TYPE atr_tool_errors_total counter")
|
|
364
|
+
lines.append(f'atr_tool_errors_total{{tool="{safe_name}"}} {metrics.failed_calls}')
|
|
365
|
+
|
|
366
|
+
if metrics.avg_latency_ms is not None:
|
|
367
|
+
lines.append("# HELP atr_tool_latency_avg_ms Average latency")
|
|
368
|
+
lines.append("# TYPE atr_tool_latency_avg_ms gauge")
|
|
369
|
+
lines.append(
|
|
370
|
+
f'atr_tool_latency_avg_ms{{tool="{safe_name}"}} {metrics.avg_latency_ms:.2f}'
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
return "\n".join(lines)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
class MetricsContext:
|
|
377
|
+
"""Context manager for measuring tool execution.
|
|
378
|
+
|
|
379
|
+
Example:
|
|
380
|
+
>>> with MetricsContext(collector, "my_tool") as ctx:
|
|
381
|
+
... result = my_tool()
|
|
382
|
+
... ctx.success = True
|
|
383
|
+
"""
|
|
384
|
+
|
|
385
|
+
def __init__(
|
|
386
|
+
self, collector: MetricsCollector, tool_name: str, metadata: Optional[Dict[str, Any]] = None
|
|
387
|
+
):
|
|
388
|
+
self.collector = collector
|
|
389
|
+
self.tool_name = tool_name
|
|
390
|
+
self.metadata = metadata
|
|
391
|
+
self.success = False
|
|
392
|
+
self.error: Optional[Exception] = None
|
|
393
|
+
self.rate_limited = False
|
|
394
|
+
self._start_time: Optional[float] = None
|
|
395
|
+
|
|
396
|
+
def __enter__(self) -> "MetricsContext":
|
|
397
|
+
self._start_time = time.perf_counter()
|
|
398
|
+
return self
|
|
399
|
+
|
|
400
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
401
|
+
if self._start_time is None:
|
|
402
|
+
return
|
|
403
|
+
|
|
404
|
+
latency_ms = (time.perf_counter() - self._start_time) * 1000
|
|
405
|
+
|
|
406
|
+
if exc_val is not None:
|
|
407
|
+
self.error = exc_val
|
|
408
|
+
self.success = False
|
|
409
|
+
|
|
410
|
+
self.collector.record_call(
|
|
411
|
+
tool_name=self.tool_name,
|
|
412
|
+
latency_ms=latency_ms,
|
|
413
|
+
success=self.success,
|
|
414
|
+
error=self.error,
|
|
415
|
+
rate_limited=self.rate_limited,
|
|
416
|
+
metadata=self.metadata,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
# Global metrics collector
|
|
421
|
+
_global_collector: MetricsCollector = MetricsCollector()
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def get_collector() -> MetricsCollector:
|
|
425
|
+
"""Get the global metrics collector.
|
|
426
|
+
|
|
427
|
+
Returns:
|
|
428
|
+
The global MetricsCollector instance.
|
|
429
|
+
"""
|
|
430
|
+
return _global_collector
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def set_collector(collector: MetricsCollector) -> None:
|
|
434
|
+
"""Set the global metrics collector.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
collector: The collector to use globally.
|
|
438
|
+
"""
|
|
439
|
+
global _global_collector
|
|
440
|
+
_global_collector = collector
|