agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
@@ -0,0 +1,316 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Semantic Analyzer - Advanced refusal detection beyond regex patterns.
6
+
7
+ This module provides semantic analysis of agent responses to detect
8
+ "refusal" vs "compliance" behavior using contextual understanding
9
+ rather than just pattern matching.
10
+ """
11
+
12
+ import logging
13
+ from typing import Optional, List
14
+
15
+ from .models import SemanticAnalysis
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class SemanticAnalyzer:
21
+ """
22
+ Analyzes agent responses semantically to detect refusal patterns.
23
+
24
+ Unlike regex-based detection, this analyzes the semantic meaning
25
+ to catch subtle forms of giving up like:
26
+ - "I'm afraid those records are elusive at the moment."
27
+ - "The information seems to be unavailable."
28
+ - "It appears there's nothing to show."
29
+
30
+ This is inspired by "Refusal Benchmarking" in AI safety research.
31
+ """
32
+
33
+ def __init__(self):
34
+ """Initialize the semantic analyzer."""
35
+ self.refusal_indicators = self._load_refusal_indicators()
36
+ self.compliance_indicators = self._load_compliance_indicators()
37
+
38
+ def _load_refusal_indicators(self) -> List[str]:
39
+ """Load semantic indicators of refusal/giving up."""
40
+ return [
41
+ # Direct refusal phrases
42
+ "cannot", "can't", "unable", "impossible", "won't",
43
+ "don't have", "doesn't exist", "not available", "unavailable",
44
+
45
+ # Evasive/elusive language
46
+ "elusive", "appears to be", "seems to be", "might be",
47
+ "could be", "perhaps", "possibly", "unclear",
48
+
49
+ # Uncertainty/hedging
50
+ "I'm afraid", "unfortunately", "regrettably", "sadly",
51
+ "it seems", "it appears", "looks like",
52
+
53
+ # Empty/negative results
54
+ "no data", "no results", "no information", "nothing found",
55
+ "zero results", "empty", "none available",
56
+
57
+ # Insufficient effort indicators
58
+ "not sure", "uncertain", "don't know", "can't tell",
59
+ "hard to say", "difficult to determine"
60
+ ]
61
+
62
+ def _load_compliance_indicators(self) -> List[str]:
63
+ """Load semantic indicators of compliance/success."""
64
+ return [
65
+ # Action completion
66
+ "found", "discovered", "located", "identified", "retrieved",
67
+ "obtained", "extracted", "collected",
68
+
69
+ # Data presentation
70
+ "here is", "here are", "the results", "the data shows",
71
+ "according to", "based on", "from the",
72
+
73
+ # Quantity indicators
74
+ "total", "count", "number of", "records", "entries",
75
+ "items", "results show",
76
+
77
+ # Confidence indicators
78
+ "successfully", "confirmed", "verified", "validated"
79
+ ]
80
+
81
+ def analyze(
82
+ self,
83
+ agent_response: str,
84
+ user_prompt: str,
85
+ tool_telemetry: Optional[List] = None
86
+ ) -> SemanticAnalysis:
87
+ """
88
+ Perform semantic analysis on agent response.
89
+
90
+ Args:
91
+ agent_response: The agent's response to analyze
92
+ user_prompt: The original user prompt for context
93
+ tool_telemetry: Optional tool execution telemetry
94
+
95
+ Returns:
96
+ SemanticAnalysis with classification and confidence
97
+ """
98
+ response_lower = agent_response.lower()
99
+ prompt_lower = user_prompt.lower()
100
+
101
+ # Calculate refusal and compliance scores
102
+ refusal_score = self._calculate_refusal_score(response_lower)
103
+ compliance_score = self._calculate_compliance_score(response_lower)
104
+
105
+ # Check for tool execution context
106
+ tool_context_score = self._analyze_tool_context(tool_telemetry)
107
+
108
+ # Determine if this is a refusal
109
+ is_refusal = self._determine_refusal(
110
+ refusal_score,
111
+ compliance_score,
112
+ tool_context_score,
113
+ response_lower
114
+ )
115
+
116
+ # Calculate confidence based on multiple signals
117
+ confidence = self._calculate_confidence(
118
+ refusal_score,
119
+ compliance_score,
120
+ tool_context_score,
121
+ response_lower
122
+ )
123
+
124
+ # Determine semantic category
125
+ category = self._determine_category(
126
+ is_refusal,
127
+ refusal_score,
128
+ compliance_score
129
+ )
130
+
131
+ # Generate reasoning explanation
132
+ reasoning = self._generate_reasoning(
133
+ is_refusal,
134
+ refusal_score,
135
+ compliance_score,
136
+ tool_context_score,
137
+ response_lower
138
+ )
139
+
140
+ return SemanticAnalysis(
141
+ is_refusal=is_refusal,
142
+ refusal_confidence=confidence,
143
+ semantic_category=category,
144
+ reasoning=reasoning
145
+ )
146
+
147
+ def _calculate_refusal_score(self, response: str) -> float:
148
+ """Calculate refusal score based on indicators present."""
149
+ matches = sum(1 for indicator in self.refusal_indicators if indicator in response)
150
+ # Normalize to 0-1 range
151
+ return min(matches / 3.0, 1.0) # 3+ matches = 1.0
152
+
153
+ def _calculate_compliance_score(self, response: str) -> float:
154
+ """Calculate compliance score based on indicators present."""
155
+ matches = sum(1 for indicator in self.compliance_indicators if indicator in response)
156
+ # Normalize to 0-1 range
157
+ return min(matches / 3.0, 1.0) # 3+ matches = 1.0
158
+
159
+ def _analyze_tool_context(self, tool_telemetry: Optional[List]) -> float:
160
+ """
161
+ Analyze tool execution context.
162
+
163
+ Returns a score indicating likelihood of laziness:
164
+ - 0.0: Tools called and returned data (not lazy)
165
+ - 0.5: Tools called but empty results (might be lazy)
166
+ - 1.0: Tools not called (likely lazy)
167
+ """
168
+ if not tool_telemetry:
169
+ return 0.7 # No telemetry suggests possible laziness
170
+
171
+ from .models import ToolExecutionStatus
172
+
173
+ # Check if any tools were called
174
+ called_tools = [t for t in tool_telemetry if t.tool_status != ToolExecutionStatus.NOT_CALLED]
175
+
176
+ if not called_tools:
177
+ return 1.0 # No tools called - clear laziness
178
+
179
+ # Check if tools returned empty results
180
+ empty_results = [t for t in called_tools if t.tool_status == ToolExecutionStatus.EMPTY_RESULT]
181
+
182
+ if len(empty_results) == len(called_tools):
183
+ return 0.3 # All tools returned empty - likely legitimate
184
+
185
+ # Mix of results
186
+ return 0.5
187
+
188
+ def _determine_refusal(
189
+ self,
190
+ refusal_score: float,
191
+ compliance_score: float,
192
+ tool_context_score: float,
193
+ response: str
194
+ ) -> bool:
195
+ """
196
+ Determine if response indicates refusal.
197
+
198
+ Uses multiple signals to make decision:
199
+ - Refusal language
200
+ - Lack of compliance language
201
+ - Tool execution context
202
+ - Response length
203
+ """
204
+ # Short responses with refusal language
205
+ if len(response) < 50 and refusal_score > 0.3:
206
+ return True
207
+
208
+ # High refusal score and low compliance
209
+ if refusal_score > 0.5 and compliance_score < 0.2:
210
+ return True
211
+
212
+ # High tool laziness (not called) + some refusal language
213
+ if tool_context_score > 0.7 and refusal_score > 0.2:
214
+ return True
215
+
216
+ # Moderate refusal with no compliance
217
+ if refusal_score > 0.3 and compliance_score == 0.0:
218
+ return True
219
+
220
+ return False
221
+
222
+ def _calculate_confidence(
223
+ self,
224
+ refusal_score: float,
225
+ compliance_score: float,
226
+ tool_context_score: float,
227
+ response: str
228
+ ) -> float:
229
+ """
230
+ Calculate confidence in the refusal detection.
231
+
232
+ Higher confidence when:
233
+ - Clear refusal indicators
234
+ - Clear tool context (called or not)
235
+ - Low ambiguity
236
+ """
237
+ # Base confidence from score differences
238
+ score_diff = abs(refusal_score - compliance_score)
239
+ base_confidence = min(score_diff + 0.5, 1.0)
240
+
241
+ # Boost confidence if tool context is clear
242
+ if tool_context_score < 0.3 or tool_context_score > 0.7:
243
+ base_confidence = min(base_confidence + 0.1, 1.0)
244
+
245
+ # Reduce confidence for very short responses (ambiguous)
246
+ if len(response) < 20:
247
+ base_confidence *= 0.8
248
+
249
+ # Boost confidence for very clear patterns
250
+ if refusal_score > 0.7 or compliance_score > 0.7:
251
+ base_confidence = min(base_confidence + 0.15, 1.0)
252
+
253
+ return round(base_confidence, 2)
254
+
255
+ def _determine_category(
256
+ self,
257
+ is_refusal: bool,
258
+ refusal_score: float,
259
+ compliance_score: float
260
+ ) -> str:
261
+ """Determine semantic category of response."""
262
+ if is_refusal:
263
+ return "refusal"
264
+
265
+ if compliance_score > 0.5:
266
+ return "compliance"
267
+
268
+ if refusal_score > 0.2 and compliance_score > 0.2:
269
+ return "unclear"
270
+
271
+ return "error"
272
+
273
+ def _generate_reasoning(
274
+ self,
275
+ is_refusal: bool,
276
+ refusal_score: float,
277
+ compliance_score: float,
278
+ tool_context_score: float,
279
+ response: str
280
+ ) -> str:
281
+ """Generate human-readable reasoning for the classification."""
282
+ if is_refusal:
283
+ reasons = []
284
+
285
+ if refusal_score > 0.5:
286
+ reasons.append(f"Strong refusal language detected (score: {refusal_score:.2f})")
287
+ elif refusal_score > 0.3:
288
+ reasons.append(f"Moderate refusal indicators present (score: {refusal_score:.2f})")
289
+
290
+ if compliance_score < 0.2:
291
+ reasons.append(f"Low compliance indicators (score: {compliance_score:.2f})")
292
+
293
+ if tool_context_score > 0.7:
294
+ reasons.append("Tools not called or minimal usage")
295
+ elif tool_context_score > 0.4:
296
+ reasons.append("Tools returned empty results")
297
+
298
+ if len(response) < 50:
299
+ reasons.append("Response is brief, suggesting minimal effort")
300
+
301
+ return "Response indicates refusal/give-up: " + "; ".join(reasons)
302
+ else:
303
+ reasons = []
304
+
305
+ if compliance_score > 0.5:
306
+ reasons.append(f"Strong compliance indicators (score: {compliance_score:.2f})")
307
+ elif compliance_score > 0.2:
308
+ reasons.append(f"Some compliance indicators present (score: {compliance_score:.2f})")
309
+
310
+ if tool_context_score < 0.3:
311
+ reasons.append("Tools executed and returned data")
312
+
313
+ if refusal_score < 0.2:
314
+ reasons.append("Minimal refusal language")
315
+
316
+ return "Response indicates compliance/success: " + "; ".join(reasons)
@@ -0,0 +1,349 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+
4
+ """
5
+ Semantic Purge - Scale by Subtraction for patch lifecycle management.
6
+
7
+ Implements the "Taxonomy of Lessons" to prevent context bloat:
8
+ - Type A (Syntax/Capability): High decay - likely model defects, purge on upgrade
9
+ - Type B (Business/Context): Zero decay - world truths, retain forever
10
+
11
+ This allows reducing context usage by 40-60% over the agent's lifetime.
12
+ """
13
+
14
+ import logging
15
+ from typing import List, Dict, Optional
16
+ from datetime import datetime
17
+
18
+ from .models import (
19
+ CorrectionPatch, ClassifiedPatch, PatchDecayType,
20
+ CognitiveGlitch, CompletenessAudit
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class PatchClassifier:
27
+ """
28
+ Classifies patches into Type A (Syntax) vs Type B (Business).
29
+
30
+ This is the "Taxonomy of Lessons" that determines lifecycle.
31
+ """
32
+
33
+ def __init__(self):
34
+ self.syntax_indicators = [
35
+ "output json", "format", "syntax", "parse", "validation error",
36
+ "type mismatch", "parameter type", "limit 10", "use uuid",
37
+ "tool definition", "schema injection", "parameter checking"
38
+ ]
39
+
40
+ self.business_indicators = [
41
+ "fiscal year", "project", "entity", "business rule", "policy",
42
+ "archived", "deprecated", "does not exist", "negative constraint",
43
+ "company", "organization", "domain", "customer", "workflow"
44
+ ]
45
+
46
+ def classify_patch(
47
+ self,
48
+ patch: CorrectionPatch,
49
+ current_model_version: str
50
+ ) -> ClassifiedPatch:
51
+ """
52
+ Classify a patch as Type A or Type B.
53
+
54
+ Args:
55
+ patch: The correction patch to classify
56
+ current_model_version: Current model version (e.g., "gpt-4o", "gpt-5")
57
+
58
+ Returns:
59
+ ClassifiedPatch with decay type and metadata
60
+ """
61
+ logger.info(f"Classifying patch {patch.patch_id}")
62
+
63
+ # Analyze patch content to determine type
64
+ decay_type = self._determine_decay_type(patch)
65
+
66
+ # Determine if should purge on upgrade
67
+ should_purge = (decay_type == PatchDecayType.SYNTAX_CAPABILITY)
68
+
69
+ # Build metadata
70
+ metadata = self._build_decay_metadata(patch, decay_type)
71
+
72
+ classified = ClassifiedPatch(
73
+ base_patch=patch,
74
+ decay_type=decay_type,
75
+ created_at_model_version=current_model_version,
76
+ decay_metadata=metadata,
77
+ should_purge_on_upgrade=should_purge
78
+ )
79
+
80
+ logger.info(f"Classified as {decay_type.value} (purge on upgrade: {should_purge})")
81
+
82
+ return classified
83
+
84
+ def _determine_decay_type(self, patch: CorrectionPatch) -> PatchDecayType:
85
+ """
86
+ Determine if patch is Type A (Syntax) or Type B (Business).
87
+
88
+ Type A - Syntax/Capability (HIGH DECAY):
89
+ - Model-specific issues (JSON formatting, type errors)
90
+ - Tool usage errors (wrong parameter types)
91
+ - Syntax errors, validation issues
92
+ - Query construction problems
93
+ - These are likely fixed in newer model versions
94
+
95
+ Type B - Business/Context (ZERO DECAY):
96
+ - Company-specific rules ("Fiscal year starts in July")
97
+ - Entity existence ("Project_Alpha is deprecated")
98
+ - Policy violations (medical advice restrictions)
99
+ - Domain knowledge (archived locations, workflows)
100
+ - These are world truths that models can't learn
101
+ """
102
+ # Check diagnosis first (most reliable indicator)
103
+ if patch.diagnosis:
104
+ glitch = patch.diagnosis.cognitive_glitch
105
+
106
+ # Tool misuse is almost always Type A (model capability issue)
107
+ if glitch == CognitiveGlitch.TOOL_MISUSE:
108
+ return PatchDecayType.SYNTAX_CAPABILITY
109
+
110
+ # Policy violations are Type B (business rules)
111
+ if glitch == CognitiveGlitch.POLICY_VIOLATION:
112
+ return PatchDecayType.BUSINESS_CONTEXT
113
+
114
+ # Hallucinations about entities are Type B (world knowledge)
115
+ if glitch == CognitiveGlitch.HALLUCINATION:
116
+ return PatchDecayType.BUSINESS_CONTEXT
117
+
118
+ # Schema mismatches depend on content
119
+ if glitch == CognitiveGlitch.SCHEMA_MISMATCH:
120
+ # Check if it's about company-specific schema
121
+ content_str = str(patch.patch_content).lower()
122
+ if any(indicator in content_str for indicator in self.business_indicators):
123
+ return PatchDecayType.BUSINESS_CONTEXT
124
+ return PatchDecayType.SYNTAX_CAPABILITY
125
+
126
+ # Analyze patch content
127
+ content_str = str(patch.patch_content).lower()
128
+
129
+ # Count indicators
130
+ syntax_score = sum(1 for ind in self.syntax_indicators if ind in content_str)
131
+ business_score = sum(1 for ind in self.business_indicators if ind in content_str)
132
+
133
+ # Check for specific patterns
134
+ if patch.patch_type == "system_prompt":
135
+ rule = patch.patch_content.get("rule", "")
136
+ rule_lower = rule.lower()
137
+
138
+ # Schema injection and parameter checking are Type A
139
+ if "schema injection" in rule_lower or "parameter type" in rule_lower:
140
+ return PatchDecayType.SYNTAX_CAPABILITY
141
+
142
+ # Constitutional rules about domains are Type B
143
+ if "constitutional" in rule_lower or "refuse" in rule_lower:
144
+ return PatchDecayType.BUSINESS_CONTEXT
145
+
146
+ # Entity-specific negative constraints are Type B
147
+ if "does not exist" in rule_lower or "deprecated" in rule_lower:
148
+ return PatchDecayType.BUSINESS_CONTEXT
149
+
150
+ # RAG memory patches are typically Type B (business context)
151
+ if patch.patch_type == "rag_memory":
152
+ negative_constraint = patch.patch_content.get("negative_constraint")
153
+ if negative_constraint:
154
+ return PatchDecayType.BUSINESS_CONTEXT
155
+
156
+ # Score-based classification
157
+ if business_score > syntax_score:
158
+ return PatchDecayType.BUSINESS_CONTEXT
159
+ elif syntax_score > 0:
160
+ return PatchDecayType.SYNTAX_CAPABILITY
161
+
162
+ # Default to business context (safer - won't accidentally purge important rules)
163
+ return PatchDecayType.BUSINESS_CONTEXT
164
+
165
+ def _build_decay_metadata(self, patch: CorrectionPatch, decay_type: PatchDecayType) -> Dict:
166
+ """Build metadata for decay management."""
167
+ metadata = {
168
+ "classification_reason": self._get_classification_reason(patch, decay_type),
169
+ "estimated_lifetime": "until_upgrade" if decay_type == PatchDecayType.SYNTAX_CAPABILITY else "permanent",
170
+ "priority": "low" if decay_type == PatchDecayType.SYNTAX_CAPABILITY else "high"
171
+ }
172
+
173
+ if decay_type == PatchDecayType.SYNTAX_CAPABILITY:
174
+ metadata["purge_trigger"] = "model_version_upgrade"
175
+ metadata["expected_fix_in"] = "next_model_generation"
176
+ else:
177
+ metadata["purge_trigger"] = "manual_review_only"
178
+ metadata["rag_storage_recommended"] = True
179
+
180
+ return metadata
181
+
182
+ def _get_classification_reason(self, patch: CorrectionPatch, decay_type: PatchDecayType) -> str:
183
+ """Get human-readable reason for classification."""
184
+ if patch.diagnosis:
185
+ glitch = patch.diagnosis.cognitive_glitch.value
186
+ if decay_type == PatchDecayType.SYNTAX_CAPABILITY:
187
+ return f"Model capability issue ({glitch}) - likely fixed in upgraded models"
188
+ else:
189
+ return f"Domain/business knowledge ({glitch}) - requires permanent retention"
190
+ return "Content-based classification"
191
+
192
+
193
+ class SemanticPurge:
194
+ """
195
+ Manages patch lifecycle and purging.
196
+
197
+ This is "Scale by Subtraction" - reducing context by purging temporary wisdom.
198
+ """
199
+
200
+ def __init__(self):
201
+ self.classifier = PatchClassifier()
202
+ self.classified_patches: Dict[str, ClassifiedPatch] = {}
203
+ self.purge_history: List[Dict] = []
204
+
205
+ def register_patch(
206
+ self,
207
+ patch: CorrectionPatch,
208
+ current_model_version: str
209
+ ) -> ClassifiedPatch:
210
+ """
211
+ Register a patch with classification for lifecycle management.
212
+
213
+ Args:
214
+ patch: The correction patch
215
+ current_model_version: Current model version
216
+
217
+ Returns:
218
+ ClassifiedPatch with metadata
219
+ """
220
+ classified = self.classifier.classify_patch(patch, current_model_version)
221
+ self.classified_patches[patch.patch_id] = classified
222
+
223
+ logger.info(f"Registered patch {patch.patch_id} as {classified.decay_type.value}")
224
+
225
+ return classified
226
+
227
+ def purge_on_upgrade(
228
+ self,
229
+ old_model_version: str,
230
+ new_model_version: str
231
+ ) -> Dict[str, List[str]]:
232
+ """
233
+ Purge Type A patches when model version upgrades.
234
+
235
+ This is the "Purge Event" - async purging to reclaim tokens.
236
+
237
+ Args:
238
+ old_model_version: Previous model version
239
+ new_model_version: New model version
240
+
241
+ Returns:
242
+ Dictionary with purged and retained patch IDs
243
+ """
244
+ logger.info(f"🗑️ PURGE EVENT: Model upgrade {old_model_version} → {new_model_version}")
245
+
246
+ purged_patches = []
247
+ retained_patches = []
248
+
249
+ for patch_id, classified in self.classified_patches.items():
250
+ if classified.should_purge_on_upgrade:
251
+ # This is Type A (Syntax) - likely fixed in new model
252
+ purged_patches.append(patch_id)
253
+ logger.info(f" Purging Type A patch {patch_id}: {classified.decay_metadata.get('classification_reason', '')}")
254
+ else:
255
+ # This is Type B (Business) - retain forever
256
+ retained_patches.append(patch_id)
257
+
258
+ # Record purge event
259
+ purge_event = {
260
+ "timestamp": datetime.utcnow(),
261
+ "old_version": old_model_version,
262
+ "new_version": new_model_version,
263
+ "purged_count": len(purged_patches),
264
+ "retained_count": len(retained_patches),
265
+ "purged_patches": purged_patches,
266
+ "tokens_reclaimed": self._estimate_tokens_reclaimed(purged_patches)
267
+ }
268
+
269
+ self.purge_history.append(purge_event)
270
+
271
+ # Remove purged patches
272
+ for patch_id in purged_patches:
273
+ del self.classified_patches[patch_id]
274
+
275
+ logger.info(f"✓ Purged {len(purged_patches)} Type A patches")
276
+ logger.info(f"✓ Retained {len(retained_patches)} Type B patches")
277
+ logger.info(f"✓ Estimated tokens reclaimed: {purge_event['tokens_reclaimed']}")
278
+
279
+ return {
280
+ "purged": purged_patches,
281
+ "retained": retained_patches,
282
+ "stats": {
283
+ "purged_count": len(purged_patches),
284
+ "retained_count": len(retained_patches),
285
+ "tokens_reclaimed": purge_event["tokens_reclaimed"]
286
+ }
287
+ }
288
+
289
+ def _estimate_tokens_reclaimed(self, purged_patch_ids: List[str]) -> int:
290
+ """
291
+ Estimate tokens reclaimed by purging patches.
292
+
293
+ Rough estimate: each patch uses 50-200 tokens depending on complexity.
294
+ """
295
+ return len(purged_patch_ids) * 100 # Average 100 tokens per patch
296
+
297
+ def get_purge_stats(self) -> Dict:
298
+ """Get statistics about purging activity."""
299
+ total_patches = len(self.classified_patches)
300
+ type_a_count = sum(1 for p in self.classified_patches.values()
301
+ if p.decay_type == PatchDecayType.SYNTAX_CAPABILITY)
302
+ type_b_count = sum(1 for p in self.classified_patches.values()
303
+ if p.decay_type == PatchDecayType.BUSINESS_CONTEXT)
304
+
305
+ total_purged = sum(event["purged_count"] for event in self.purge_history)
306
+ total_tokens_reclaimed = sum(event["tokens_reclaimed"] for event in self.purge_history)
307
+
308
+ return {
309
+ "current_patches": total_patches,
310
+ "type_a_syntax": type_a_count,
311
+ "type_b_business": type_b_count,
312
+ "purge_events": len(self.purge_history),
313
+ "total_purged": total_purged,
314
+ "total_tokens_reclaimed": total_tokens_reclaimed,
315
+ "estimated_savings": f"{(type_a_count / (total_patches or 1)) * 100:.1f}% can be purged on upgrade"
316
+ }
317
+
318
+ def get_purgeable_patches(self) -> List[ClassifiedPatch]:
319
+ """Get list of patches that would be purged on upgrade."""
320
+ return [
321
+ p for p in self.classified_patches.values()
322
+ if p.should_purge_on_upgrade
323
+ ]
324
+
325
+ def get_permanent_patches(self) -> List[ClassifiedPatch]:
326
+ """Get list of permanent (Type B) patches."""
327
+ return [
328
+ p for p in self.classified_patches.values()
329
+ if not p.should_purge_on_upgrade
330
+ ]
331
+
332
+ def register_completeness_audit(
333
+ self,
334
+ audit: CompletenessAudit,
335
+ current_model_version: str
336
+ ):
337
+ """
338
+ Register a competence patch from completeness audit.
339
+
340
+ Competence patches are always Type B (business context) because they
341
+ represent gaps in domain knowledge, not model defects.
342
+ """
343
+ # Create a synthetic patch for the competence lesson
344
+ # In a real system, this would be integrated with the patcher
345
+ logger.info(f"Registering competence patch from audit {audit.audit_id}")
346
+ logger.info(f" Lesson: {audit.competence_patch[:80]}...")
347
+
348
+ # Competence patches are always Type B - domain knowledge
349
+ # These represent what the agent didn't know about the domain/business