agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
@@ -0,0 +1,237 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Completeness Auditor - Detects and fixes agent "Laziness".
6
+
7
+ This implements Differential Auditing: instead of auditing every interaction,
8
+ we only audit when the agent gives up with a "Negative Result".
9
+
10
+ The Shadow Teacher Model attempts the same sub-task, and if it succeeds,
11
+ we generate a "Competence Patch" to prevent future laziness.
12
+ """
13
+
14
+ import logging
15
+ import uuid
16
+ from typing import Optional, List
17
+ from datetime import datetime
18
+
19
+ from .models import AgentOutcome, CompletenessAudit, GiveUpSignal
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class CompletenessAuditor:
25
+ """
26
+ The Shadow Auditor that detects agent laziness.
27
+
28
+ When an agent outputs a "Negative Result" (e.g., "No data found"),
29
+ the system spins up a "Teacher Model" to attempt the same sub-task.
30
+
31
+ If the teacher succeeds where the agent gave up, we identify the gap
32
+ and generate a competence patch.
33
+ """
34
+
35
+ def __init__(self, teacher_model: str = "o1-preview"):
36
+ """
37
+ Initialize the Completeness Auditor.
38
+
39
+ Args:
40
+ teacher_model: High-reasoning model for auditing (e.g., "o1-preview", "o1", "claude-opus")
41
+ """
42
+ self.teacher_model = teacher_model
43
+ self.audit_history: List[CompletenessAudit] = []
44
+ self.audit_count = 0
45
+ self.lazy_detection_count = 0
46
+
47
+ def audit_give_up(self, outcome: AgentOutcome) -> CompletenessAudit:
48
+ """
49
+ Audit an agent's give-up outcome using the Teacher Model.
50
+
51
+ This is "Differential Auditing" - we only audit specific give-up signals,
52
+ not every interaction (which would be too expensive).
53
+
54
+ Args:
55
+ outcome: The agent outcome with give-up signal
56
+
57
+ Returns:
58
+ CompletenessAudit with findings
59
+ """
60
+ audit_id = f"audit-{uuid.uuid4().hex[:8]}"
61
+ self.audit_count += 1
62
+
63
+ logger.info(f"🔍 Completeness Audit {audit_id} started")
64
+ logger.info(f" Agent said: '{outcome.agent_response[:60]}...'")
65
+ logger.info(f" Give-up signal: {outcome.give_up_signal.value if outcome.give_up_signal else 'unknown'}")
66
+
67
+ # Simulate teacher model attempting the same task
68
+ teacher_result = self._run_teacher_model(outcome)
69
+
70
+ # Compare agent vs teacher
71
+ teacher_found_data = teacher_result["found_data"]
72
+
73
+ if teacher_found_data:
74
+ # Teacher succeeded where agent gave up - this is LAZINESS
75
+ self.lazy_detection_count += 1
76
+ logger.warning(f"⚠️ LAZINESS DETECTED: Teacher found data that agent missed!")
77
+
78
+ gap_analysis = self._analyze_gap(outcome, teacher_result)
79
+ competence_patch = self._generate_competence_patch(outcome, gap_analysis, teacher_result)
80
+ confidence = teacher_result["confidence"]
81
+ else:
82
+ # Teacher also couldn't find data - agent was correct
83
+ logger.info(f"✓ Agent was correct: No data available")
84
+ gap_analysis = "Agent response was appropriate. No data available."
85
+ competence_patch = "No patch needed - agent correctly identified unavailability."
86
+ confidence = 0.9
87
+
88
+ audit = CompletenessAudit(
89
+ audit_id=audit_id,
90
+ agent_outcome=outcome,
91
+ teacher_model=self.teacher_model,
92
+ teacher_response=teacher_result["response"],
93
+ teacher_found_data=teacher_found_data,
94
+ gap_analysis=gap_analysis,
95
+ competence_patch=competence_patch,
96
+ confidence=confidence
97
+ )
98
+
99
+ self.audit_history.append(audit)
100
+
101
+ logger.info(f"🏁 Audit complete. Found data: {teacher_found_data}")
102
+
103
+ return audit
104
+
105
+ def _run_teacher_model(self, outcome: AgentOutcome) -> dict:
106
+ """
107
+ Simulate running the teacher model on the same task.
108
+
109
+ In a real system, this would:
110
+ 1. Spin up a high-reasoning model (o1-preview, o1, etc.)
111
+ 2. Give it the same user prompt
112
+ 3. Give it enhanced context/tools
113
+ 4. Capture its response
114
+
115
+ For demonstration, we simulate based on patterns.
116
+ """
117
+ user_prompt = outcome.user_prompt.lower()
118
+ agent_response = outcome.agent_response.lower()
119
+
120
+ # Simulate teacher model's superior reasoning
121
+ # In reality, this would be an actual API call to o1-preview or similar
122
+
123
+ # Pattern: Looking for logs
124
+ if any(keyword in user_prompt for keyword in ["log", "error", "trace", "debug"]):
125
+ if "500" in user_prompt or "error" in user_prompt:
126
+ # Teacher checks additional locations
127
+ if outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
128
+ # Teacher found it by checking archived partitions
129
+ return {
130
+ "found_data": True,
131
+ "response": "Found logs in archived partition /var/log/archive/2024-01/. The agent missed checking archived partitions.",
132
+ "location": "archived partitions",
133
+ "confidence": 0.92
134
+ }
135
+
136
+ # Pattern: Looking for projects/resources
137
+ if any(keyword in user_prompt for keyword in ["project", "resource", "entity"]):
138
+ if "alpha" in user_prompt or "beta" in user_prompt:
139
+ # Teacher verifies against complete registry
140
+ return {
141
+ "found_data": True,
142
+ "response": "Project exists but is archived. Agent should check archived projects registry.",
143
+ "location": "archived registry",
144
+ "confidence": 0.88
145
+ }
146
+
147
+ # Pattern: Database queries
148
+ if any(keyword in user_prompt for keyword in ["user", "customer", "record", "data"]):
149
+ if "recent" in user_prompt or "latest" in user_prompt:
150
+ # Teacher uses proper time window
151
+ return {
152
+ "found_data": True,
153
+ "response": "Found 247 records using proper time window. Agent may have used incorrect date filter.",
154
+ "location": "database with corrected filter",
155
+ "confidence": 0.85
156
+ }
157
+
158
+ # Default: Teacher also couldn't find data
159
+ return {
160
+ "found_data": False,
161
+ "response": "After exhaustive search, confirmed no data available.",
162
+ "location": "none",
163
+ "confidence": 0.9
164
+ }
165
+
166
+ def _analyze_gap(self, outcome: AgentOutcome, teacher_result: dict) -> str:
167
+ """
168
+ Analyze what the agent missed.
169
+
170
+ This is the key insight: identifying the specific gap in the agent's
171
+ reasoning or search strategy.
172
+ """
173
+ location = teacher_result.get("location", "unknown location")
174
+
175
+ # Build gap analysis based on give-up signal type
176
+ if outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
177
+ gap = f"Agent didn't check {location}. "
178
+ elif outcome.give_up_signal == GiveUpSignal.INSUFFICIENT_INFO:
179
+ gap = f"Agent gave up too early. Data exists in {location}. "
180
+ else:
181
+ gap = f"Agent failed to search {location}. "
182
+
183
+ gap += f"Agent response: '{outcome.agent_response[:100]}'. "
184
+ gap += f"Teacher found: '{teacher_result['response'][:100]}'"
185
+
186
+ return gap
187
+
188
+ def _generate_competence_patch(
189
+ self,
190
+ outcome: AgentOutcome,
191
+ gap_analysis: str,
192
+ teacher_result: dict
193
+ ) -> str:
194
+ """
195
+ Generate a "Competence Patch" - a lesson to prevent future laziness.
196
+
197
+ This is NOT just correcting the answer; it's a strategic instruction
198
+ that addresses the systematic gap in the agent's behavior.
199
+
200
+ Example patches:
201
+ - "When searching logs, always check archived partitions if recent logs are empty."
202
+ - "Before reporting 'not found', verify all registry sources including archived items."
203
+ - "Use proper time windows for 'recent' queries: last 7 days for logs, 30 days for records."
204
+ """
205
+ user_prompt_lower = outcome.user_prompt.lower()
206
+ location = teacher_result.get("location", "additional sources")
207
+
208
+ # Generate specific, actionable patch based on the pattern
209
+ if "log" in user_prompt_lower:
210
+ patch = f"When searching logs, always check archived partitions ({location}) if recent logs are empty."
211
+ elif "project" in user_prompt_lower or "resource" in user_prompt_lower:
212
+ patch = f"Before reporting 'not found', verify all registry sources including {location}."
213
+ elif "recent" in user_prompt_lower or "latest" in user_prompt_lower:
214
+ patch = f"For 'recent' queries, use proper time windows and check {location}."
215
+ elif outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
216
+ patch = f"Before reporting 'no data found', exhaustively check all sources including {location}."
217
+ else:
218
+ patch = f"Expand search scope to include {location} before concluding data unavailability."
219
+
220
+ # Add context about the specific failure
221
+ patch += f" This prevents false negatives when data exists but requires deeper search."
222
+
223
+ return patch
224
+
225
+ def get_audit_stats(self) -> dict:
226
+ """Get statistics about auditing activity."""
227
+ return {
228
+ "total_audits": self.audit_count,
229
+ "laziness_detected": self.lazy_detection_count,
230
+ "laziness_rate": self.lazy_detection_count / self.audit_count if self.audit_count > 0 else 0.0,
231
+ "audits_with_data": sum(1 for a in self.audit_history if a.teacher_found_data),
232
+ "audits_no_data": sum(1 for a in self.audit_history if not a.teacher_found_data)
233
+ }
234
+
235
+ def get_audit_history(self, limit: int = 100) -> List[CompletenessAudit]:
236
+ """Get audit history."""
237
+ return self.audit_history[-limit:]
@@ -0,0 +1,203 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Failure detection and monitoring system.
6
+ """
7
+
8
+ import logging
9
+ from typing import Optional, Callable, Dict, Any, List
10
+ from datetime import datetime
11
+ from collections import deque
12
+
13
+ from .models import AgentFailure, FailureType, FailureSeverity, FailureTrace
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class FailureQueue:
19
+ """Queue for storing full failure traces with reasoning chains."""
20
+
21
+ def __init__(self, max_size: int = 1000):
22
+ """
23
+ Initialize failure queue.
24
+
25
+ Args:
26
+ max_size: Maximum number of failures to store in queue
27
+ """
28
+ self.queue: deque = deque(maxlen=max_size)
29
+ self.max_size = max_size
30
+
31
+ def enqueue(self, failure: AgentFailure):
32
+ """
33
+ Add a failure with full trace to the queue.
34
+
35
+ Args:
36
+ failure: AgentFailure object with trace information
37
+ """
38
+ self.queue.append(failure)
39
+ logger.info(f"Enqueued failure for agent {failure.agent_id}. Queue size: {len(self.queue)}")
40
+
41
+ def dequeue(self) -> Optional[AgentFailure]:
42
+ """Remove and return the oldest failure from queue."""
43
+ if self.queue:
44
+ return self.queue.popleft()
45
+ return None
46
+
47
+ def peek(self) -> Optional[AgentFailure]:
48
+ """View the oldest failure without removing it."""
49
+ if self.queue:
50
+ return self.queue[0]
51
+ return None
52
+
53
+ def get_all(self) -> List[AgentFailure]:
54
+ """Get all failures in the queue."""
55
+ return list(self.queue)
56
+
57
+ def size(self) -> int:
58
+ """Get current queue size."""
59
+ return len(self.queue)
60
+
61
+ def clear(self):
62
+ """Clear all failures from queue."""
63
+ self.queue.clear()
64
+
65
+
66
+ class FailureDetector:
67
+ """Detects and classifies agent failures."""
68
+
69
+ def __init__(self):
70
+ self.failure_handlers: Dict[str, Callable] = {}
71
+ self.failure_history: List[AgentFailure] = []
72
+ self.failure_queue = FailureQueue()
73
+
74
+ def register_handler(self, failure_type: str, handler: Callable):
75
+ """Register a custom handler for a specific failure type."""
76
+ self.failure_handlers[failure_type] = handler
77
+ logger.info(f"Registered handler for failure type: {failure_type}")
78
+
79
+ def detect_failure(
80
+ self,
81
+ agent_id: str,
82
+ error_message: str,
83
+ context: Optional[Dict[str, Any]] = None,
84
+ stack_trace: Optional[str] = None,
85
+ user_prompt: Optional[str] = None,
86
+ chain_of_thought: Optional[List[str]] = None,
87
+ failed_action: Optional[Dict[str, Any]] = None
88
+ ) -> AgentFailure:
89
+ """
90
+ Detect and classify a failure with full trace capture.
91
+
92
+ Args:
93
+ agent_id: Identifier of the agent that failed
94
+ error_message: Error message from the failure
95
+ context: Additional context about the failure
96
+ stack_trace: Stack trace if available
97
+ user_prompt: Original user prompt that led to failure
98
+ chain_of_thought: Agent's reasoning steps
99
+ failed_action: The specific action that failed
100
+
101
+ Returns:
102
+ AgentFailure object with classified failure and full trace
103
+ """
104
+ failure_type = self._classify_failure(error_message, context)
105
+ severity = self._assess_severity(failure_type, context)
106
+
107
+ # Create failure trace if information is available
108
+ failure_trace = None
109
+ if user_prompt and failed_action:
110
+ failure_trace = FailureTrace(
111
+ user_prompt=user_prompt,
112
+ chain_of_thought=chain_of_thought or [],
113
+ failed_action=failed_action,
114
+ error_details=error_message
115
+ )
116
+
117
+ failure = AgentFailure(
118
+ agent_id=agent_id,
119
+ failure_type=failure_type,
120
+ severity=severity,
121
+ error_message=error_message,
122
+ context=context or {},
123
+ stack_trace=stack_trace,
124
+ failure_trace=failure_trace,
125
+ timestamp=datetime.utcnow()
126
+ )
127
+
128
+ self.failure_history.append(failure)
129
+
130
+ # Enqueue failure with full trace for processing
131
+ if failure_trace:
132
+ self.failure_queue.enqueue(failure)
133
+ logger.info(f"Failure with full trace enqueued for agent {agent_id}")
134
+
135
+ logger.warning(f"Detected {failure_type} failure for agent {agent_id}: {error_message}")
136
+
137
+ return failure
138
+
139
+ def _classify_failure(self, error_message: str, context: Optional[Dict[str, Any]]) -> FailureType:
140
+ """Classify the type of failure based on error message and context."""
141
+ error_lower = error_message.lower()
142
+
143
+ # Check for control plane blocking (including policy violations)
144
+ if any(keyword in error_lower for keyword in [
145
+ "blocked", "control plane", "policy", "unauthorized", "forbidden",
146
+ "cannot advise", "cannot provide", "not allowed to"
147
+ ]):
148
+ return FailureType.BLOCKED_BY_CONTROL_PLANE
149
+
150
+ # Check for timeout
151
+ if any(keyword in error_lower for keyword in ["timeout", "timed out", "deadline"]):
152
+ return FailureType.TIMEOUT
153
+
154
+ # Check for invalid action (including UUID/parameter type errors)
155
+ if any(keyword in error_lower for keyword in [
156
+ "invalid", "unsupported", "expected", "uuid",
157
+ "does not exist", "not found", "format", "parameter"
158
+ ]):
159
+ return FailureType.INVALID_ACTION
160
+
161
+ # Check for resource exhaustion
162
+ if any(keyword in error_lower for keyword in [
163
+ "resource", "memory", "disk", "quota", "limit exceeded"
164
+ ]):
165
+ return FailureType.RESOURCE_EXHAUSTED
166
+
167
+ # Check for logic errors
168
+ if any(keyword in error_lower for keyword in [
169
+ "assertion", "null pointer", "index out", "key error", "type error"
170
+ ]):
171
+ return FailureType.LOGIC_ERROR
172
+
173
+ return FailureType.UNKNOWN
174
+
175
+ def _assess_severity(self, failure_type: FailureType, context: Optional[Dict[str, Any]]) -> FailureSeverity:
176
+ """Assess the severity of a failure."""
177
+ # Control plane blocks are typically high severity
178
+ if failure_type == FailureType.BLOCKED_BY_CONTROL_PLANE:
179
+ return FailureSeverity.HIGH
180
+
181
+ # Resource exhaustion can be critical
182
+ if failure_type == FailureType.RESOURCE_EXHAUSTED:
183
+ return FailureSeverity.HIGH
184
+
185
+ # Timeouts are usually medium severity
186
+ if failure_type == FailureType.TIMEOUT:
187
+ return FailureSeverity.MEDIUM
188
+
189
+ # Logic errors can vary
190
+ if failure_type == FailureType.LOGIC_ERROR:
191
+ return FailureSeverity.MEDIUM
192
+
193
+ # Default to medium for unknown
194
+ return FailureSeverity.MEDIUM
195
+
196
+ def get_failure_history(self, agent_id: Optional[str] = None, limit: int = 100) -> List[AgentFailure]:
197
+ """Get failure history, optionally filtered by agent_id."""
198
+ history = self.failure_history
199
+
200
+ if agent_id:
201
+ history = [f for f in history if f.agent_id == agent_id]
202
+
203
+ return history[-limit:]