agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
@@ -0,0 +1,787 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Observability and Metrics - Real-time Monitoring and Prometheus Integration
5
+
6
+ This module provides production-grade observability features including real-time
7
+ metrics, Prometheus integration, trace visualization, and monitoring dashboards.
8
+
9
+ Research Foundations:
10
+ - Prometheus monitoring best practices
11
+ - OpenTelemetry for distributed tracing
12
+ - "Observability Engineering" (O'Reilly, 2022) - metrics, logs, traces
13
+ - SRE principles from Google SRE Book
14
+
15
+ See docs/RESEARCH_FOUNDATION.md for complete references.
16
+ """
17
+
18
+ from typing import Any, Dict, List, Optional, Callable
19
+ from dataclasses import dataclass, field
20
+ from enum import Enum
21
+ from datetime import datetime, timedelta
22
+ from collections import defaultdict, deque
23
+ import time
24
+
25
+
26
+ class MetricType(Enum):
27
+ """Types of metrics"""
28
+ COUNTER = "counter"
29
+ GAUGE = "gauge"
30
+ HISTOGRAM = "histogram"
31
+ SUMMARY = "summary"
32
+
33
+
34
+ class AlertSeverity(Enum):
35
+ """Alert severity levels"""
36
+ INFO = "info"
37
+ WARNING = "warning"
38
+ ERROR = "error"
39
+ CRITICAL = "critical"
40
+
41
+
42
+ @dataclass
43
+ class Metric:
44
+ """
45
+ A metric measurement.
46
+
47
+ Attributes:
48
+ name: Metric name
49
+ metric_type: Type of metric
50
+ value: Current value
51
+ labels: Key-value labels
52
+ timestamp: When measured
53
+ help_text: Description of metric
54
+ """
55
+ name: str
56
+ metric_type: MetricType
57
+ value: float
58
+ labels: Dict[str, str] = field(default_factory=dict)
59
+ timestamp: datetime = field(default_factory=datetime.now)
60
+ help_text: str = ""
61
+
62
+
63
+ @dataclass
64
+ class Alert:
65
+ """
66
+ An alert notification.
67
+
68
+ Attributes:
69
+ alert_id: Unique identifier
70
+ name: Alert name
71
+ severity: Severity level
72
+ message: Alert message
73
+ labels: Context labels
74
+ firing: Whether alert is currently firing
75
+ started_at: When alert started firing
76
+ """
77
+ alert_id: str
78
+ name: str
79
+ severity: AlertSeverity
80
+ message: str
81
+ labels: Dict[str, str] = field(default_factory=dict)
82
+ firing: bool = True
83
+ started_at: datetime = field(default_factory=datetime.now)
84
+ resolved_at: Optional[datetime] = None
85
+
86
+
87
+ @dataclass
88
+ class Trace:
89
+ """
90
+ A distributed trace for a request/operation.
91
+
92
+ Attributes:
93
+ trace_id: Unique trace identifier
94
+ spans: List of spans in this trace
95
+ started_at: Trace start time
96
+ duration_ms: Total duration
97
+ metadata: Additional trace metadata
98
+ """
99
+ trace_id: str
100
+ spans: List['Span'] = field(default_factory=list)
101
+ started_at: datetime = field(default_factory=datetime.now)
102
+ duration_ms: Optional[float] = None
103
+ metadata: Dict[str, Any] = field(default_factory=dict)
104
+
105
+
106
+ @dataclass
107
+ class Span:
108
+ """
109
+ A span within a trace.
110
+
111
+ Attributes:
112
+ span_id: Unique span identifier
113
+ parent_span_id: Parent span if nested
114
+ operation_name: Name of operation
115
+ started_at: Span start time
116
+ duration_ms: Span duration
117
+ tags: Span tags
118
+ logs: Span logs
119
+ """
120
+ span_id: str
121
+ parent_span_id: Optional[str]
122
+ operation_name: str
123
+ started_at: datetime = field(default_factory=datetime.now)
124
+ duration_ms: Optional[float] = None
125
+ tags: Dict[str, Any] = field(default_factory=dict)
126
+ logs: List[Dict[str, Any]] = field(default_factory=list)
127
+
128
+
129
+ class PrometheusExporter:
130
+ """
131
+ Prometheus metrics exporter.
132
+
133
+ Exports metrics in Prometheus text format for scraping.
134
+
135
+ Features:
136
+ - Counter, gauge, histogram, summary metrics
137
+ - Multi-dimensional labels
138
+ - Automatic metric registration
139
+ - Text format export for Prometheus scraping
140
+
141
+ Usage:
142
+ exporter = PrometheusExporter()
143
+
144
+ # Record metrics
145
+ exporter.increment_counter(
146
+ "agent_requests_total",
147
+ labels={"agent_id": "agent1", "status": "success"}
148
+ )
149
+
150
+ exporter.set_gauge(
151
+ "agent_active_sessions",
152
+ value=5,
153
+ labels={"agent_id": "agent1"}
154
+ )
155
+
156
+ # Export for Prometheus
157
+ metrics_text = exporter.export()
158
+ """
159
+
160
+ def __init__(self):
161
+ self._metrics: Dict[str, Dict[str, Metric]] = defaultdict(dict)
162
+ self._metric_metadata: Dict[str, Dict[str, Any]] = {}
163
+
164
+ def increment_counter(
165
+ self,
166
+ name: str,
167
+ value: float = 1.0,
168
+ labels: Optional[Dict[str, str]] = None,
169
+ help_text: str = ""
170
+ ):
171
+ """
172
+ Increment a counter metric.
173
+
174
+ Args:
175
+ name: Metric name
176
+ value: Amount to increment
177
+ labels: Metric labels
178
+ help_text: Help text for metric
179
+ """
180
+ labels = labels or {}
181
+ label_key = self._make_label_key(labels)
182
+
183
+ if name not in self._metric_metadata:
184
+ self._metric_metadata[name] = {
185
+ "type": MetricType.COUNTER,
186
+ "help": help_text or f"Counter metric {name}"
187
+ }
188
+
189
+ if label_key in self._metrics[name]:
190
+ self._metrics[name][label_key].value += value
191
+ self._metrics[name][label_key].timestamp = datetime.now()
192
+ else:
193
+ self._metrics[name][label_key] = Metric(
194
+ name=name,
195
+ metric_type=MetricType.COUNTER,
196
+ value=value,
197
+ labels=labels,
198
+ help_text=help_text
199
+ )
200
+
201
+ def set_gauge(
202
+ self,
203
+ name: str,
204
+ value: float,
205
+ labels: Optional[Dict[str, str]] = None,
206
+ help_text: str = ""
207
+ ):
208
+ """
209
+ Set a gauge metric.
210
+
211
+ Args:
212
+ name: Metric name
213
+ value: Value to set
214
+ labels: Metric labels
215
+ help_text: Help text
216
+ """
217
+ labels = labels or {}
218
+ label_key = self._make_label_key(labels)
219
+
220
+ if name not in self._metric_metadata:
221
+ self._metric_metadata[name] = {
222
+ "type": MetricType.GAUGE,
223
+ "help": help_text or f"Gauge metric {name}"
224
+ }
225
+
226
+ self._metrics[name][label_key] = Metric(
227
+ name=name,
228
+ metric_type=MetricType.GAUGE,
229
+ value=value,
230
+ labels=labels,
231
+ help_text=help_text
232
+ )
233
+
234
+ def observe_histogram(
235
+ self,
236
+ name: str,
237
+ value: float,
238
+ labels: Optional[Dict[str, str]] = None,
239
+ help_text: str = ""
240
+ ):
241
+ """
242
+ Observe a histogram metric.
243
+
244
+ Args:
245
+ name: Metric name
246
+ value: Observed value
247
+ labels: Metric labels
248
+ help_text: Help text
249
+ """
250
+ # Simplified histogram - in production would have buckets
251
+ labels = labels or {}
252
+ label_key = self._make_label_key(labels)
253
+
254
+ if name not in self._metric_metadata:
255
+ self._metric_metadata[name] = {
256
+ "type": MetricType.HISTOGRAM,
257
+ "help": help_text or f"Histogram metric {name}"
258
+ }
259
+
260
+ # Store as gauge for simplification
261
+ self._metrics[name][label_key] = Metric(
262
+ name=name,
263
+ metric_type=MetricType.HISTOGRAM,
264
+ value=value,
265
+ labels=labels,
266
+ help_text=help_text
267
+ )
268
+
269
+ def export(self) -> str:
270
+ """
271
+ Export metrics in Prometheus text format.
272
+
273
+ Returns:
274
+ Prometheus-formatted metrics text
275
+ """
276
+ lines = []
277
+
278
+ for metric_name, metadata in self._metric_metadata.items():
279
+ # HELP line
280
+ lines.append(f"# HELP {metric_name} {metadata['help']}")
281
+
282
+ # TYPE line
283
+ lines.append(f"# TYPE {metric_name} {metadata['type'].value}")
284
+
285
+ # Metric lines
286
+ for label_key, metric in self._metrics[metric_name].items():
287
+ if metric.labels:
288
+ label_str = ",".join(
289
+ f'{k}="{v}"' for k, v in metric.labels.items()
290
+ )
291
+ lines.append(f"{metric_name}{{{label_str}}} {metric.value}")
292
+ else:
293
+ lines.append(f"{metric_name} {metric.value}")
294
+
295
+ lines.append("") # Blank line between metrics
296
+
297
+ return "\n".join(lines)
298
+
299
+ def get_metrics(self) -> Dict[str, List[Metric]]:
300
+ """Get all metrics"""
301
+ return {
302
+ name: list(metrics.values())
303
+ for name, metrics in self._metrics.items()
304
+ }
305
+
306
+ def _make_label_key(self, labels: Dict[str, str]) -> str:
307
+ """Create unique key from labels"""
308
+ if not labels:
309
+ return "default"
310
+ return ",".join(f"{k}={v}" for k, v in sorted(labels.items()))
311
+
312
+
313
+ class AlertManager:
314
+ """
315
+ Alert management system.
316
+
317
+ Features:
318
+ - Rule-based alerting
319
+ - Threshold monitoring
320
+ - Alert aggregation and deduplication
321
+ - Alert routing and notifications
322
+
323
+ Usage:
324
+ alert_mgr = AlertManager()
325
+
326
+ # Define alert rule
327
+ alert_mgr.add_rule(
328
+ name="high_error_rate",
329
+ condition=lambda metrics: metrics.get("error_rate", 0) > 0.05,
330
+ severity=AlertSeverity.ERROR,
331
+ message="Error rate exceeds 5%"
332
+ )
333
+
334
+ # Check alerts
335
+ alerts = alert_mgr.evaluate(current_metrics)
336
+ """
337
+
338
+ def __init__(self):
339
+ self._rules: Dict[str, Dict[str, Any]] = {}
340
+ self._active_alerts: Dict[str, Alert] = {}
341
+ self._alert_history: List[Alert] = []
342
+
343
+ def add_rule(
344
+ self,
345
+ name: str,
346
+ condition: Callable[[Dict[str, Any]], bool],
347
+ severity: AlertSeverity,
348
+ message: str,
349
+ labels: Optional[Dict[str, str]] = None
350
+ ):
351
+ """
352
+ Add an alerting rule.
353
+
354
+ Args:
355
+ name: Rule name
356
+ condition: Function that evaluates alert condition
357
+ severity: Alert severity
358
+ message: Alert message
359
+ labels: Additional labels
360
+ """
361
+ self._rules[name] = {
362
+ "condition": condition,
363
+ "severity": severity,
364
+ "message": message,
365
+ "labels": labels or {}
366
+ }
367
+
368
+ def evaluate(
369
+ self,
370
+ metrics: Dict[str, Any]
371
+ ) -> List[Alert]:
372
+ """
373
+ Evaluate alert rules against current metrics.
374
+
375
+ Args:
376
+ metrics: Current metrics to evaluate
377
+
378
+ Returns:
379
+ List of firing alerts
380
+ """
381
+ current_firing = set()
382
+
383
+ for rule_name, rule in self._rules.items():
384
+ try:
385
+ should_fire = rule["condition"](metrics)
386
+
387
+ if should_fire:
388
+ current_firing.add(rule_name)
389
+
390
+ if rule_name not in self._active_alerts:
391
+ # New alert
392
+ alert = Alert(
393
+ alert_id=f"{rule_name}-{int(time.time())}",
394
+ name=rule_name,
395
+ severity=rule["severity"],
396
+ message=rule["message"],
397
+ labels=rule["labels"]
398
+ )
399
+ self._active_alerts[rule_name] = alert
400
+ self._alert_history.append(alert)
401
+ else:
402
+ # Alert should resolve
403
+ if rule_name in self._active_alerts:
404
+ alert = self._active_alerts[rule_name]
405
+ alert.firing = False
406
+ alert.resolved_at = datetime.now()
407
+ del self._active_alerts[rule_name]
408
+
409
+ except Exception as e:
410
+ # Log error but don't fail alerting
411
+ pass
412
+
413
+ return list(self._active_alerts.values())
414
+
415
+ def get_active_alerts(self) -> List[Alert]:
416
+ """Get currently firing alerts"""
417
+ return list(self._active_alerts.values())
418
+
419
+ def get_alert_history(
420
+ self,
421
+ hours: int = 24
422
+ ) -> List[Alert]:
423
+ """Get alert history"""
424
+ cutoff = datetime.now() - timedelta(hours=hours)
425
+ return [
426
+ alert for alert in self._alert_history
427
+ if alert.started_at > cutoff
428
+ ]
429
+
430
+
431
+ class TraceCollector:
432
+ """
433
+ Distributed tracing collector.
434
+
435
+ Features:
436
+ - Trace and span collection
437
+ - Parent-child span relationships
438
+ - Trace visualization data
439
+ - Performance analysis
440
+
441
+ Usage:
442
+ collector = TraceCollector()
443
+
444
+ # Start trace
445
+ trace_id = collector.start_trace("agent_request")
446
+
447
+ # Add spans
448
+ span_id = collector.start_span(
449
+ trace_id=trace_id,
450
+ operation_name="policy_check"
451
+ )
452
+
453
+ # End span
454
+ collector.end_span(trace_id, span_id)
455
+
456
+ # Get trace
457
+ trace = collector.get_trace(trace_id)
458
+ """
459
+
460
+ def __init__(self):
461
+ self._traces: Dict[str, Trace] = {}
462
+ self._active_spans: Dict[str, Dict[str, Span]] = defaultdict(dict)
463
+
464
+ def start_trace(
465
+ self,
466
+ operation_name: str,
467
+ metadata: Optional[Dict[str, Any]] = None
468
+ ) -> str:
469
+ """
470
+ Start a new trace.
471
+
472
+ Args:
473
+ operation_name: Name of the operation
474
+ metadata: Additional metadata
475
+
476
+ Returns:
477
+ trace_id
478
+ """
479
+ import uuid
480
+ trace_id = str(uuid.uuid4())
481
+
482
+ trace = Trace(
483
+ trace_id=trace_id,
484
+ metadata=metadata or {}
485
+ )
486
+
487
+ self._traces[trace_id] = trace
488
+
489
+ # Create root span
490
+ self.start_span(
491
+ trace_id=trace_id,
492
+ operation_name=operation_name
493
+ )
494
+
495
+ return trace_id
496
+
497
+ def start_span(
498
+ self,
499
+ trace_id: str,
500
+ operation_name: str,
501
+ parent_span_id: Optional[str] = None,
502
+ tags: Optional[Dict[str, Any]] = None
503
+ ) -> str:
504
+ """
505
+ Start a new span within a trace.
506
+
507
+ Args:
508
+ trace_id: Trace ID
509
+ operation_name: Operation name
510
+ parent_span_id: Parent span ID if nested
511
+ tags: Span tags
512
+
513
+ Returns:
514
+ span_id
515
+ """
516
+ import uuid
517
+ span_id = str(uuid.uuid4())
518
+
519
+ span = Span(
520
+ span_id=span_id,
521
+ parent_span_id=parent_span_id,
522
+ operation_name=operation_name,
523
+ tags=tags or {}
524
+ )
525
+
526
+ self._active_spans[trace_id][span_id] = span
527
+
528
+ return span_id
529
+
530
+ def end_span(
531
+ self,
532
+ trace_id: str,
533
+ span_id: str,
534
+ tags: Optional[Dict[str, Any]] = None
535
+ ):
536
+ """
537
+ End a span.
538
+
539
+ Args:
540
+ trace_id: Trace ID
541
+ span_id: Span ID
542
+ tags: Additional tags to add
543
+ """
544
+ if trace_id not in self._active_spans:
545
+ return
546
+
547
+ if span_id not in self._active_spans[trace_id]:
548
+ return
549
+
550
+ span = self._active_spans[trace_id][span_id]
551
+ duration = (datetime.now() - span.started_at).total_seconds() * 1000
552
+ span.duration_ms = duration
553
+
554
+ if tags:
555
+ span.tags.update(tags)
556
+
557
+ # Move to trace
558
+ if trace_id in self._traces:
559
+ self._traces[trace_id].spans.append(span)
560
+
561
+ # Remove from active
562
+ del self._active_spans[trace_id][span_id]
563
+
564
+ def end_trace(self, trace_id: str):
565
+ """End a trace"""
566
+ if trace_id not in self._traces:
567
+ return
568
+
569
+ trace = self._traces[trace_id]
570
+ duration = (datetime.now() - trace.started_at).total_seconds() * 1000
571
+ trace.duration_ms = duration
572
+
573
+ # End any remaining active spans
574
+ if trace_id in self._active_spans:
575
+ for span_id in list(self._active_spans[trace_id].keys()):
576
+ self.end_span(trace_id, span_id)
577
+
578
+ def get_trace(self, trace_id: str) -> Optional[Trace]:
579
+ """Get a trace by ID"""
580
+ return self._traces.get(trace_id)
581
+
582
+ def list_traces(
583
+ self,
584
+ limit: int = 100
585
+ ) -> List[Trace]:
586
+ """List recent traces"""
587
+ traces = sorted(
588
+ self._traces.values(),
589
+ key=lambda t: t.started_at,
590
+ reverse=True
591
+ )
592
+ return traces[:limit]
593
+
594
+ def get_trace_visualization(
595
+ self,
596
+ trace_id: str
597
+ ) -> Dict[str, Any]:
598
+ """
599
+ Get trace data formatted for visualization.
600
+
601
+ Args:
602
+ trace_id: Trace ID
603
+
604
+ Returns:
605
+ Visualization data with spans in hierarchical format
606
+ """
607
+ trace = self.get_trace(trace_id)
608
+ if not trace:
609
+ return {}
610
+
611
+ # Build span hierarchy
612
+ span_tree = self._build_span_tree(trace.spans)
613
+
614
+ return {
615
+ "trace_id": trace_id,
616
+ "duration_ms": trace.duration_ms,
617
+ "started_at": trace.started_at.isoformat(),
618
+ "span_count": len(trace.spans),
619
+ "span_tree": span_tree,
620
+ "metadata": trace.metadata
621
+ }
622
+
623
+ def _build_span_tree(
624
+ self,
625
+ spans: List[Span]
626
+ ) -> List[Dict[str, Any]]:
627
+ """Build hierarchical span tree"""
628
+ # Group spans by parent
629
+ by_parent = defaultdict(list)
630
+ for span in spans:
631
+ by_parent[span.parent_span_id].append(span)
632
+
633
+ # Build tree starting from root (parent_span_id = None)
634
+ def build_node(span: Span) -> Dict[str, Any]:
635
+ children = by_parent.get(span.span_id, [])
636
+ return {
637
+ "span_id": span.span_id,
638
+ "operation_name": span.operation_name,
639
+ "duration_ms": span.duration_ms,
640
+ "tags": span.tags,
641
+ "children": [build_node(child) for child in children]
642
+ }
643
+
644
+ return [build_node(span) for span in by_parent[None]]
645
+
646
+
647
+ class ObservabilityDashboard:
648
+ """
649
+ Central observability dashboard aggregating metrics, alerts, and traces.
650
+
651
+ Features:
652
+ - Real-time metrics display
653
+ - Active alert monitoring
654
+ - Trace visualization
655
+ - System health overview
656
+
657
+ Usage:
658
+ dashboard = ObservabilityDashboard(
659
+ prometheus=prometheus_exporter,
660
+ alerts=alert_manager,
661
+ traces=trace_collector
662
+ )
663
+
664
+ # Get dashboard data
665
+ data = dashboard.get_dashboard_data()
666
+ """
667
+
668
+ def __init__(
669
+ self,
670
+ prometheus: PrometheusExporter,
671
+ alerts: AlertManager,
672
+ traces: TraceCollector
673
+ ):
674
+ self.prometheus = prometheus
675
+ self.alerts = alerts
676
+ self.traces = traces
677
+
678
+ def get_dashboard_data(self) -> Dict[str, Any]:
679
+ """
680
+ Get comprehensive dashboard data.
681
+
682
+ Returns:
683
+ Dashboard data with metrics, alerts, traces
684
+ """
685
+ # Get key metrics
686
+ metrics = self.prometheus.get_metrics()
687
+
688
+ # Get active alerts
689
+ active_alerts = self.alerts.get_active_alerts()
690
+
691
+ # Get recent traces
692
+ recent_traces = self.traces.list_traces(limit=10)
693
+
694
+ return {
695
+ "timestamp": datetime.now().isoformat(),
696
+ "metrics": {
697
+ name: [
698
+ {
699
+ "value": m.value,
700
+ "labels": m.labels,
701
+ "timestamp": m.timestamp.isoformat()
702
+ }
703
+ for m in metric_list
704
+ ]
705
+ for name, metric_list in metrics.items()
706
+ },
707
+ "alerts": {
708
+ "active_count": len(active_alerts),
709
+ "alerts": [
710
+ {
711
+ "name": alert.name,
712
+ "severity": alert.severity.value,
713
+ "message": alert.message,
714
+ "started_at": alert.started_at.isoformat()
715
+ }
716
+ for alert in active_alerts
717
+ ]
718
+ },
719
+ "traces": {
720
+ "recent_count": len(recent_traces),
721
+ "traces": [
722
+ {
723
+ "trace_id": trace.trace_id,
724
+ "duration_ms": trace.duration_ms,
725
+ "span_count": len(trace.spans),
726
+ "started_at": trace.started_at.isoformat()
727
+ }
728
+ for trace in recent_traces
729
+ ]
730
+ }
731
+ }
732
+
733
+ def get_health_status(self) -> Dict[str, Any]:
734
+ """
735
+ Get overall system health status.
736
+
737
+ Returns:
738
+ Health status with overall assessment
739
+ """
740
+ active_alerts = self.alerts.get_active_alerts()
741
+
742
+ # Determine health based on alerts
743
+ critical_count = sum(
744
+ 1 for a in active_alerts
745
+ if a.severity == AlertSeverity.CRITICAL
746
+ )
747
+ error_count = sum(
748
+ 1 for a in active_alerts
749
+ if a.severity == AlertSeverity.ERROR
750
+ )
751
+
752
+ if critical_count > 0:
753
+ status = "critical"
754
+ elif error_count > 0:
755
+ status = "degraded"
756
+ elif len(active_alerts) > 0:
757
+ status = "warning"
758
+ else:
759
+ status = "healthy"
760
+
761
+ return {
762
+ "status": status,
763
+ "active_alerts": len(active_alerts),
764
+ "critical_alerts": critical_count,
765
+ "error_alerts": error_count,
766
+ "checked_at": datetime.now().isoformat()
767
+ }
768
+
769
+
770
+ def create_observability_suite() -> Dict[str, Any]:
771
+ """
772
+ Create a complete observability suite.
773
+
774
+ Returns:
775
+ Dictionary with all observability components
776
+ """
777
+ prometheus = PrometheusExporter()
778
+ alert_manager = AlertManager()
779
+ trace_collector = TraceCollector()
780
+ dashboard = ObservabilityDashboard(prometheus, alert_manager, trace_collector)
781
+
782
+ return {
783
+ "prometheus": prometheus,
784
+ "alerts": alert_manager,
785
+ "traces": trace_collector,
786
+ "dashboard": dashboard
787
+ }