agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
@@ -0,0 +1,263 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Nudge Mechanism - Automatic retry with targeted prompting.
6
+
7
+ When an agent gives up (GIVE_UP outcome), this module automatically
8
+ injects a "nudge" prompt to encourage the agent to try harder.
9
+
10
+ This implements "The Nudge" pattern from industry best practices:
11
+ - Automatic intervention without human involvement
12
+ - Targeted prompting based on the give-up type
13
+ - Tracking of nudge effectiveness
14
+ """
15
+
16
+ import logging
17
+ import uuid
18
+ from typing import Optional, List
19
+ from datetime import datetime
20
+
21
+ from .models import AgentOutcome, NudgeResult, GiveUpSignal
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class NudgeMechanism:
27
+ """
28
+ Implements automatic nudging when agents give up.
29
+
30
+ The "nudge" is a system prompt injection that asks the agent
31
+ to confirm it executed the task correctly and encourages
32
+ a more thorough attempt.
33
+
34
+ Example nudge:
35
+ "You claimed no data was found. Please confirm you executed the
36
+ search tool with the correct parameters and checked all data sources."
37
+ """
38
+
39
+ def __init__(self):
40
+ """Initialize the nudge mechanism."""
41
+ self.nudge_history: List[NudgeResult] = []
42
+ self.nudge_templates = self._load_nudge_templates()
43
+
44
+ def _load_nudge_templates(self) -> dict:
45
+ """Load nudge prompt templates for different give-up signals."""
46
+ return {
47
+ GiveUpSignal.NO_DATA_FOUND: (
48
+ "You claimed no data was found. Please confirm you:\n"
49
+ "1. Executed the search/query tool with the correct parameters\n"
50
+ "2. Checked all relevant data sources including archives\n"
51
+ "3. Used appropriate time ranges and filters\n"
52
+ "Please retry with a more comprehensive search strategy."
53
+ ),
54
+ GiveUpSignal.CANNOT_ANSWER: (
55
+ "You indicated you cannot answer this question. Please confirm you:\n"
56
+ "1. Have access to all necessary tools and resources\n"
57
+ "2. Attempted to use available tools to gather information\n"
58
+ "3. Considered alternative approaches to the problem\n"
59
+ "Please retry with a different strategy."
60
+ ),
61
+ GiveUpSignal.NO_RESULTS: (
62
+ "You reported no results. Please confirm you:\n"
63
+ "1. Used the correct query syntax and parameters\n"
64
+ "2. Checked for typos or incorrect field names\n"
65
+ "3. Tried alternative search terms or filters\n"
66
+ "Please retry with validated parameters."
67
+ ),
68
+ GiveUpSignal.NOT_AVAILABLE: (
69
+ "You indicated the resource is not available. Please confirm you:\n"
70
+ "1. Checked the resource location and accessibility\n"
71
+ "2. Verified you have the correct permissions\n"
72
+ "3. Checked for alternative access methods\n"
73
+ "Please retry with proper access verification."
74
+ ),
75
+ GiveUpSignal.INSUFFICIENT_INFO: (
76
+ "You claimed insufficient information. Please confirm you:\n"
77
+ "1. Attempted to gather additional context from available sources\n"
78
+ "2. Used all available tools to retrieve more information\n"
79
+ "3. Considered what information is actually required vs. nice-to-have\n"
80
+ "Please retry with available information."
81
+ ),
82
+ GiveUpSignal.UNKNOWN: (
83
+ "Your response suggests you may have given up. Please:\n"
84
+ "1. Re-read the user's request carefully\n"
85
+ "2. Use all available tools to attempt the task\n"
86
+ "3. Provide a specific explanation if truly impossible\n"
87
+ "Please retry with full effort."
88
+ )
89
+ }
90
+
91
+ def generate_nudge(
92
+ self,
93
+ outcome: AgentOutcome,
94
+ include_tool_reminder: bool = True
95
+ ) -> str:
96
+ """
97
+ Generate a nudge prompt for the given outcome.
98
+
99
+ Args:
100
+ outcome: The agent outcome that triggered the nudge
101
+ include_tool_reminder: Whether to include tool usage reminder
102
+
103
+ Returns:
104
+ Nudge prompt string
105
+ """
106
+ signal = outcome.give_up_signal or GiveUpSignal.UNKNOWN
107
+ template = self.nudge_templates.get(signal, self.nudge_templates[GiveUpSignal.UNKNOWN])
108
+
109
+ nudge_prompt = template
110
+
111
+ # Add context-specific enhancements
112
+ if include_tool_reminder and outcome.tool_telemetry:
113
+ called_tools = [t.tool_name for t in outcome.tool_telemetry]
114
+ if called_tools:
115
+ # Tools were called
116
+ nudge_prompt += f"\n\nNote: You previously used tools: {', '.join(called_tools)}. Consider using additional tools or different parameters."
117
+ else:
118
+ # Telemetry exists but no tools were called
119
+ nudge_prompt += "\n\nNote: It appears no tools were called. Please use available tools to complete the task."
120
+ elif include_tool_reminder:
121
+ # No telemetry at all
122
+ nudge_prompt += "\n\nNote: It appears no tools were called. Please use available tools to complete the task."
123
+
124
+ # Add original prompt reminder
125
+ nudge_prompt += f"\n\nOriginal request: {outcome.user_prompt}"
126
+
127
+ return nudge_prompt
128
+
129
+ def should_nudge(
130
+ self,
131
+ outcome: AgentOutcome,
132
+ max_nudges: int = 1
133
+ ) -> bool:
134
+ """
135
+ Determine if we should nudge for this outcome.
136
+
137
+ Args:
138
+ outcome: The agent outcome
139
+ max_nudges: Maximum number of nudges per agent/task
140
+
141
+ Returns:
142
+ True if nudge should be applied
143
+ """
144
+ # Check if outcome is a give-up
145
+ from .models import OutcomeType
146
+ if outcome.outcome_type != OutcomeType.GIVE_UP:
147
+ return False
148
+
149
+ # Check if we've already nudged this agent recently
150
+ recent_nudges = [
151
+ n for n in self.nudge_history
152
+ if n.original_outcome.agent_id == outcome.agent_id
153
+ and (datetime.utcnow() - n.original_outcome.timestamp).total_seconds() < 300 # 5 min
154
+ ]
155
+
156
+ if len(recent_nudges) >= max_nudges:
157
+ logger.info(f"Max nudges ({max_nudges}) reached for agent {outcome.agent_id}")
158
+ return False
159
+
160
+ return True
161
+
162
+ def record_nudge_result(
163
+ self,
164
+ outcome: AgentOutcome,
165
+ nudge_prompt: str,
166
+ retry_response: str,
167
+ retry_successful: bool
168
+ ) -> NudgeResult:
169
+ """
170
+ Record the result of a nudge attempt.
171
+
172
+ Args:
173
+ outcome: Original outcome that triggered nudge
174
+ nudge_prompt: The nudge prompt that was used
175
+ retry_response: Agent's response after nudge
176
+ retry_successful: Whether the retry was successful
177
+
178
+ Returns:
179
+ NudgeResult object
180
+ """
181
+ nudge_id = f"nudge-{uuid.uuid4().hex[:8]}"
182
+
183
+ # Detect improvement
184
+ improvement = self._detect_improvement(
185
+ original_response=outcome.agent_response,
186
+ retry_response=retry_response
187
+ )
188
+
189
+ result = NudgeResult(
190
+ nudge_id=nudge_id,
191
+ original_outcome=outcome,
192
+ nudge_prompt=nudge_prompt,
193
+ retry_response=retry_response,
194
+ retry_successful=retry_successful,
195
+ improvement_detected=improvement
196
+ )
197
+
198
+ self.nudge_history.append(result)
199
+
200
+ logger.info(f"Nudge {nudge_id}: success={retry_successful}, improvement={improvement}")
201
+
202
+ return result
203
+
204
+ def _detect_improvement(
205
+ self,
206
+ original_response: str,
207
+ retry_response: str
208
+ ) -> bool:
209
+ """
210
+ Detect if retry response shows improvement over original.
211
+
212
+ Simple heuristic: longer response with less refusal language.
213
+ """
214
+ # Length improvement
215
+ length_improved = len(retry_response) > len(original_response) * 1.2
216
+
217
+ # Refusal language reduction
218
+ refusal_words = ["no data", "cannot", "can't", "unable", "not found"]
219
+ original_refusals = sum(1 for word in refusal_words if word in original_response.lower())
220
+ retry_refusals = sum(1 for word in refusal_words if word in retry_response.lower())
221
+ refusal_reduced = retry_refusals < original_refusals
222
+
223
+ # Check for data/results mention
224
+ data_indicators = ["found", "results", "data shows", "here is", "here are"]
225
+ has_data_now = any(indicator in retry_response.lower() for indicator in data_indicators)
226
+
227
+ return length_improved or refusal_reduced or has_data_now
228
+
229
+ def get_nudge_stats(self) -> dict:
230
+ """Get statistics about nudge effectiveness."""
231
+ if not self.nudge_history:
232
+ return {
233
+ "total_nudges": 0,
234
+ "successful_nudges": 0,
235
+ "success_rate": 0.0,
236
+ "improvements": 0,
237
+ "improvement_rate": 0.0
238
+ }
239
+
240
+ successful = sum(1 for n in self.nudge_history if n.retry_successful)
241
+ improvements = sum(1 for n in self.nudge_history if n.improvement_detected)
242
+
243
+ return {
244
+ "total_nudges": len(self.nudge_history),
245
+ "successful_nudges": successful,
246
+ "success_rate": successful / len(self.nudge_history),
247
+ "improvements": improvements,
248
+ "improvement_rate": improvements / len(self.nudge_history),
249
+ "recent_nudges": self.nudge_history[-10:] # Last 10 nudges
250
+ }
251
+
252
+ def get_nudge_history(
253
+ self,
254
+ agent_id: Optional[str] = None,
255
+ limit: int = 100
256
+ ) -> List[NudgeResult]:
257
+ """Get nudge history with optional filtering."""
258
+ history = self.nudge_history[-limit:]
259
+
260
+ if agent_id:
261
+ history = [n for n in history if n.original_outcome.agent_id == agent_id]
262
+
263
+ return history
@@ -0,0 +1,338 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Outcome Analyzer - Filters agent outcomes for competence issues.
6
+
7
+ This is part of Loop 2 (Alignment Engine) that identifies when agents
8
+ "give up" with negative results instead of delivering value.
9
+
10
+ Enhanced with:
11
+ - Tool execution telemetry to distinguish valid empty results from laziness
12
+ - Semantic analysis for detecting subtle forms of refusal
13
+ """
14
+
15
+ import logging
16
+ import re
17
+ from typing import Optional, List
18
+ from datetime import datetime
19
+
20
+ from .models import (
21
+ AgentOutcome,
22
+ OutcomeType,
23
+ GiveUpSignal,
24
+ ToolExecutionTelemetry,
25
+ ToolExecutionStatus
26
+ )
27
+ from .semantic_analyzer import SemanticAnalyzer
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class OutcomeAnalyzer:
33
+ """
34
+ Analyzes agent outcomes to detect "Give-Up Signals" (Laziness).
35
+
36
+ This filters for competence issues - when agents comply with safety rules
37
+ but fail to deliver value (e.g., "No data found" is safe, but wrong if data exists).
38
+
39
+ Enhanced Features:
40
+ 1. Tool Execution Telemetry - Correlates give-up signals with tool usage
41
+ 2. Semantic Analysis - Goes beyond regex for subtle refusal detection
42
+ 3. False Positive Prevention - Distinguishes valid empty results from laziness
43
+ """
44
+
45
+ def __init__(self, use_semantic_analysis: bool = True):
46
+ self.give_up_patterns = self._load_give_up_patterns()
47
+ self.outcome_history: List[AgentOutcome] = []
48
+ self.use_semantic_analysis = use_semantic_analysis
49
+ self.semantic_analyzer = SemanticAnalyzer() if use_semantic_analysis else None
50
+
51
+ def _load_give_up_patterns(self) -> dict:
52
+ """Load patterns that indicate agent is giving up."""
53
+ return {
54
+ GiveUpSignal.NO_DATA_FOUND: [
55
+ r"no (?:data|results|logs|records|information) (?:found|available)",
56
+ r"could(?:n't| not) find (?:any |the )?(?:data|logs|records|information)",
57
+ r"(?:data|logs|records) (?:not found|unavailable|missing)",
58
+ r"no matching (?:data|logs|records|results)"
59
+ ],
60
+ GiveUpSignal.CANNOT_ANSWER: [
61
+ r"(?:i )?cannot answer",
62
+ r"(?:i )?(?:can't|cannot) (?:help|assist|answer) (?:with|you)",
63
+ r"unable to (?:answer|respond|help)",
64
+ r"(?:i )?don't have (?:enough|sufficient) information"
65
+ ],
66
+ GiveUpSignal.NO_RESULTS: [
67
+ r"no results",
68
+ r"0 results",
69
+ r"zero results",
70
+ r"empty result set",
71
+ r"query returned (?:no|zero) results"
72
+ ],
73
+ GiveUpSignal.NOT_AVAILABLE: [
74
+ r"(?:not|isn't) (?:currently )?available",
75
+ r"(?:is|are) unavailable",
76
+ r"(?:service|resource|data) (?:not|isn't) available"
77
+ ],
78
+ GiveUpSignal.INSUFFICIENT_INFO: [
79
+ r"insufficient (?:data|information)",
80
+ r"not enough (?:data|information|context)",
81
+ r"incomplete (?:data|information)",
82
+ r"missing (?:required|necessary) (?:data|information)"
83
+ ]
84
+ }
85
+
86
+ def analyze_outcome(
87
+ self,
88
+ agent_id: str,
89
+ user_prompt: str,
90
+ agent_response: str,
91
+ context: Optional[dict] = None,
92
+ tool_telemetry: Optional[List[ToolExecutionTelemetry]] = None
93
+ ) -> AgentOutcome:
94
+ """
95
+ Analyze an agent's outcome to determine if it gave up.
96
+
97
+ Enhanced with tool telemetry correlation and semantic analysis.
98
+
99
+ Args:
100
+ agent_id: ID of the agent
101
+ user_prompt: Original user request
102
+ agent_response: Agent's response
103
+ context: Additional context
104
+ tool_telemetry: Tool execution telemetry data
105
+
106
+ Returns:
107
+ AgentOutcome with classification and analysis
108
+ """
109
+ logger.info(f"Analyzing outcome for agent {agent_id}")
110
+
111
+ # Check if this is a give-up signal (regex-based)
112
+ give_up_signal = self._detect_give_up_signal(agent_response)
113
+
114
+ # Perform semantic analysis if enabled
115
+ semantic_analysis = None
116
+ if self.use_semantic_analysis:
117
+ semantic_analysis = self.semantic_analyzer.analyze(
118
+ agent_response=agent_response,
119
+ user_prompt=user_prompt,
120
+ tool_telemetry=tool_telemetry
121
+ )
122
+ logger.debug(f"Semantic analysis: {semantic_analysis.semantic_category} "
123
+ f"(confidence: {semantic_analysis.refusal_confidence:.2f})")
124
+
125
+ # Determine outcome type with enhanced logic
126
+ outcome_type = self._determine_outcome_type(
127
+ agent_response=agent_response,
128
+ give_up_signal=give_up_signal,
129
+ tool_telemetry=tool_telemetry,
130
+ semantic_analysis=semantic_analysis
131
+ )
132
+
133
+ if outcome_type == OutcomeType.GIVE_UP:
134
+ logger.warning(f"Give-up detected: signal={give_up_signal.value if give_up_signal else 'semantic'}")
135
+
136
+ outcome = AgentOutcome(
137
+ agent_id=agent_id,
138
+ outcome_type=outcome_type,
139
+ user_prompt=user_prompt,
140
+ agent_response=agent_response,
141
+ give_up_signal=give_up_signal,
142
+ context=context or {},
143
+ tool_telemetry=tool_telemetry or [],
144
+ semantic_analysis=semantic_analysis
145
+ )
146
+
147
+ self.outcome_history.append(outcome)
148
+
149
+ return outcome
150
+
151
+ def _detect_give_up_signal(self, response: str) -> Optional[GiveUpSignal]:
152
+ """
153
+ Detect if the response contains a give-up signal.
154
+
155
+ These are "Negative Results" that trigger the Completeness Auditor.
156
+ """
157
+ response_lower = response.lower()
158
+
159
+ # Check each pattern category
160
+ for signal_type, patterns in self.give_up_patterns.items():
161
+ for pattern in patterns:
162
+ if re.search(pattern, response_lower):
163
+ logger.debug(f"Matched pattern '{pattern}' for signal {signal_type.value}")
164
+ return signal_type
165
+
166
+ return None
167
+
168
+ def _determine_outcome_type(
169
+ self,
170
+ agent_response: str,
171
+ give_up_signal: Optional[GiveUpSignal],
172
+ tool_telemetry: Optional[List[ToolExecutionTelemetry]],
173
+ semantic_analysis: Optional[any]
174
+ ) -> OutcomeType:
175
+ """
176
+ Determine outcome type with enhanced logic.
177
+
178
+ Considers:
179
+ 1. Regex-based give-up signal
180
+ 2. Tool execution telemetry
181
+ 3. Semantic analysis
182
+
183
+ Key Enhancement: Correlation with tool execution to avoid false positives
184
+ """
185
+ # Check regex signal
186
+ has_regex_signal = give_up_signal is not None
187
+
188
+ # Check semantic signal
189
+ has_semantic_signal = (
190
+ semantic_analysis is not None and
191
+ semantic_analysis.is_refusal and
192
+ semantic_analysis.refusal_confidence > 0.6
193
+ )
194
+
195
+ # Analyze tool telemetry
196
+ tool_analysis = self._analyze_tool_execution(tool_telemetry)
197
+
198
+ # Decision logic with false positive prevention
199
+ if has_regex_signal or has_semantic_signal:
200
+ # Agent said "no data found" or similar
201
+
202
+ # Check if tools were actually called and returned empty
203
+ if tool_analysis["tools_called"] and tool_analysis["all_empty_results"]:
204
+ # Valid empty result: Tools called, returned empty -> SUCCESS
205
+ logger.info("Give-up signal present but tools returned empty results - valid empty set")
206
+ return OutcomeType.SUCCESS
207
+
208
+ elif tool_analysis["tools_called"] and tool_analysis["has_errors"]:
209
+ # Tools called but errored -> Potential laziness (didn't handle errors)
210
+ logger.warning("Give-up with tool errors - potential laziness or error handling issue")
211
+ return OutcomeType.GIVE_UP
212
+
213
+ elif not tool_analysis["tools_called"]:
214
+ # No tools called -> Clear laziness
215
+ logger.warning("Give-up signal without tool execution - clear laziness")
216
+ return OutcomeType.GIVE_UP
217
+
218
+ else:
219
+ # Mixed results or unclear -> Default to GIVE_UP for audit
220
+ logger.warning("Give-up signal with unclear tool usage - flagging for audit")
221
+ return OutcomeType.GIVE_UP
222
+
223
+ else:
224
+ # No give-up signal detected
225
+ if len(agent_response.strip()) < 20:
226
+ return OutcomeType.FAILURE
227
+ else:
228
+ return OutcomeType.SUCCESS
229
+
230
+ def _analyze_tool_execution(
231
+ self,
232
+ tool_telemetry: Optional[List[ToolExecutionTelemetry]]
233
+ ) -> dict:
234
+ """
235
+ Analyze tool execution telemetry.
236
+
237
+ Returns a dict with:
238
+ - tools_called: bool - Were any tools called?
239
+ - all_empty_results: bool - Did all tools return empty results?
240
+ - has_errors: bool - Did any tools error?
241
+ - tool_count: int - Number of tools called
242
+ """
243
+ if not tool_telemetry:
244
+ return {
245
+ "tools_called": False,
246
+ "all_empty_results": False,
247
+ "has_errors": False,
248
+ "tool_count": 0
249
+ }
250
+
251
+ called_tools = [
252
+ t for t in tool_telemetry
253
+ if t.tool_status != ToolExecutionStatus.NOT_CALLED
254
+ ]
255
+
256
+ if not called_tools:
257
+ return {
258
+ "tools_called": False,
259
+ "all_empty_results": False,
260
+ "has_errors": False,
261
+ "tool_count": 0
262
+ }
263
+
264
+ empty_results = [
265
+ t for t in called_tools
266
+ if t.tool_status == ToolExecutionStatus.EMPTY_RESULT
267
+ ]
268
+
269
+ errored_tools = [
270
+ t for t in called_tools
271
+ if t.tool_status == ToolExecutionStatus.ERROR
272
+ ]
273
+
274
+ return {
275
+ "tools_called": True,
276
+ "all_empty_results": len(empty_results) == len(called_tools),
277
+ "has_errors": len(errored_tools) > 0,
278
+ "tool_count": len(called_tools),
279
+ "empty_count": len(empty_results),
280
+ "error_count": len(errored_tools)
281
+ }
282
+
283
+ def should_trigger_audit(self, outcome: AgentOutcome) -> bool:
284
+ """
285
+ Determine if this outcome should trigger a Completeness Audit.
286
+
287
+ The Completeness Auditor is only triggered on "Give-Up Signals"
288
+ to avoid expensive auditing of every interaction.
289
+
290
+ Args:
291
+ outcome: The agent outcome
292
+
293
+ Returns:
294
+ True if Completeness Auditor should be triggered
295
+ """
296
+ return outcome.outcome_type == OutcomeType.GIVE_UP
297
+
298
+ def get_give_up_rate(self, agent_id: Optional[str] = None, recent_n: int = 100) -> float:
299
+ """
300
+ Calculate the give-up rate for an agent.
301
+
302
+ This metric helps identify agents that are consistently lazy.
303
+
304
+ Args:
305
+ agent_id: Optional agent ID to filter by
306
+ recent_n: Number of recent outcomes to analyze
307
+
308
+ Returns:
309
+ Give-up rate as a float between 0 and 1
310
+ """
311
+ outcomes = self.outcome_history[-recent_n:]
312
+
313
+ if agent_id:
314
+ outcomes = [o for o in outcomes if o.agent_id == agent_id]
315
+
316
+ if not outcomes:
317
+ return 0.0
318
+
319
+ give_ups = sum(1 for o in outcomes if o.outcome_type == OutcomeType.GIVE_UP)
320
+
321
+ return give_ups / len(outcomes)
322
+
323
+ def get_outcome_history(
324
+ self,
325
+ agent_id: Optional[str] = None,
326
+ outcome_type: Optional[OutcomeType] = None,
327
+ limit: int = 100
328
+ ) -> List[AgentOutcome]:
329
+ """Get outcome history with optional filters."""
330
+ outcomes = self.outcome_history[-limit:]
331
+
332
+ if agent_id:
333
+ outcomes = [o for o in outcomes if o.agent_id == agent_id]
334
+
335
+ if outcome_type:
336
+ outcomes = [o for o in outcomes if o.outcome_type == outcome_type]
337
+
338
+ return outcomes