agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Completeness Auditor - Detects and fixes agent "Laziness".
|
|
6
|
+
|
|
7
|
+
This implements Differential Auditing: instead of auditing every interaction,
|
|
8
|
+
we only audit when the agent gives up with a "Negative Result".
|
|
9
|
+
|
|
10
|
+
The Shadow Teacher Model attempts the same sub-task, and if it succeeds,
|
|
11
|
+
we generate a "Competence Patch" to prevent future laziness.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import uuid
|
|
16
|
+
from typing import Optional, List
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
|
|
19
|
+
from .models import AgentOutcome, CompletenessAudit, GiveUpSignal
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CompletenessAuditor:
|
|
25
|
+
"""
|
|
26
|
+
The Shadow Auditor that detects agent laziness.
|
|
27
|
+
|
|
28
|
+
When an agent outputs a "Negative Result" (e.g., "No data found"),
|
|
29
|
+
the system spins up a "Teacher Model" to attempt the same sub-task.
|
|
30
|
+
|
|
31
|
+
If the teacher succeeds where the agent gave up, we identify the gap
|
|
32
|
+
and generate a competence patch.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, teacher_model: str = "o1-preview"):
|
|
36
|
+
"""
|
|
37
|
+
Initialize the Completeness Auditor.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
teacher_model: High-reasoning model for auditing (e.g., "o1-preview", "o1", "claude-opus")
|
|
41
|
+
"""
|
|
42
|
+
self.teacher_model = teacher_model
|
|
43
|
+
self.audit_history: List[CompletenessAudit] = []
|
|
44
|
+
self.audit_count = 0
|
|
45
|
+
self.lazy_detection_count = 0
|
|
46
|
+
|
|
47
|
+
def audit_give_up(self, outcome: AgentOutcome) -> CompletenessAudit:
|
|
48
|
+
"""
|
|
49
|
+
Audit an agent's give-up outcome using the Teacher Model.
|
|
50
|
+
|
|
51
|
+
This is "Differential Auditing" - we only audit specific give-up signals,
|
|
52
|
+
not every interaction (which would be too expensive).
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
outcome: The agent outcome with give-up signal
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
CompletenessAudit with findings
|
|
59
|
+
"""
|
|
60
|
+
audit_id = f"audit-{uuid.uuid4().hex[:8]}"
|
|
61
|
+
self.audit_count += 1
|
|
62
|
+
|
|
63
|
+
logger.info(f"🔍 Completeness Audit {audit_id} started")
|
|
64
|
+
logger.info(f" Agent said: '{outcome.agent_response[:60]}...'")
|
|
65
|
+
logger.info(f" Give-up signal: {outcome.give_up_signal.value if outcome.give_up_signal else 'unknown'}")
|
|
66
|
+
|
|
67
|
+
# Simulate teacher model attempting the same task
|
|
68
|
+
teacher_result = self._run_teacher_model(outcome)
|
|
69
|
+
|
|
70
|
+
# Compare agent vs teacher
|
|
71
|
+
teacher_found_data = teacher_result["found_data"]
|
|
72
|
+
|
|
73
|
+
if teacher_found_data:
|
|
74
|
+
# Teacher succeeded where agent gave up - this is LAZINESS
|
|
75
|
+
self.lazy_detection_count += 1
|
|
76
|
+
logger.warning(f"⚠️ LAZINESS DETECTED: Teacher found data that agent missed!")
|
|
77
|
+
|
|
78
|
+
gap_analysis = self._analyze_gap(outcome, teacher_result)
|
|
79
|
+
competence_patch = self._generate_competence_patch(outcome, gap_analysis, teacher_result)
|
|
80
|
+
confidence = teacher_result["confidence"]
|
|
81
|
+
else:
|
|
82
|
+
# Teacher also couldn't find data - agent was correct
|
|
83
|
+
logger.info(f"✓ Agent was correct: No data available")
|
|
84
|
+
gap_analysis = "Agent response was appropriate. No data available."
|
|
85
|
+
competence_patch = "No patch needed - agent correctly identified unavailability."
|
|
86
|
+
confidence = 0.9
|
|
87
|
+
|
|
88
|
+
audit = CompletenessAudit(
|
|
89
|
+
audit_id=audit_id,
|
|
90
|
+
agent_outcome=outcome,
|
|
91
|
+
teacher_model=self.teacher_model,
|
|
92
|
+
teacher_response=teacher_result["response"],
|
|
93
|
+
teacher_found_data=teacher_found_data,
|
|
94
|
+
gap_analysis=gap_analysis,
|
|
95
|
+
competence_patch=competence_patch,
|
|
96
|
+
confidence=confidence
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
self.audit_history.append(audit)
|
|
100
|
+
|
|
101
|
+
logger.info(f"🏁 Audit complete. Found data: {teacher_found_data}")
|
|
102
|
+
|
|
103
|
+
return audit
|
|
104
|
+
|
|
105
|
+
def _run_teacher_model(self, outcome: AgentOutcome) -> dict:
|
|
106
|
+
"""
|
|
107
|
+
Simulate running the teacher model on the same task.
|
|
108
|
+
|
|
109
|
+
In a real system, this would:
|
|
110
|
+
1. Spin up a high-reasoning model (o1-preview, o1, etc.)
|
|
111
|
+
2. Give it the same user prompt
|
|
112
|
+
3. Give it enhanced context/tools
|
|
113
|
+
4. Capture its response
|
|
114
|
+
|
|
115
|
+
For demonstration, we simulate based on patterns.
|
|
116
|
+
"""
|
|
117
|
+
user_prompt = outcome.user_prompt.lower()
|
|
118
|
+
agent_response = outcome.agent_response.lower()
|
|
119
|
+
|
|
120
|
+
# Simulate teacher model's superior reasoning
|
|
121
|
+
# In reality, this would be an actual API call to o1-preview or similar
|
|
122
|
+
|
|
123
|
+
# Pattern: Looking for logs
|
|
124
|
+
if any(keyword in user_prompt for keyword in ["log", "error", "trace", "debug"]):
|
|
125
|
+
if "500" in user_prompt or "error" in user_prompt:
|
|
126
|
+
# Teacher checks additional locations
|
|
127
|
+
if outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
|
|
128
|
+
# Teacher found it by checking archived partitions
|
|
129
|
+
return {
|
|
130
|
+
"found_data": True,
|
|
131
|
+
"response": "Found logs in archived partition /var/log/archive/2024-01/. The agent missed checking archived partitions.",
|
|
132
|
+
"location": "archived partitions",
|
|
133
|
+
"confidence": 0.92
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
# Pattern: Looking for projects/resources
|
|
137
|
+
if any(keyword in user_prompt for keyword in ["project", "resource", "entity"]):
|
|
138
|
+
if "alpha" in user_prompt or "beta" in user_prompt:
|
|
139
|
+
# Teacher verifies against complete registry
|
|
140
|
+
return {
|
|
141
|
+
"found_data": True,
|
|
142
|
+
"response": "Project exists but is archived. Agent should check archived projects registry.",
|
|
143
|
+
"location": "archived registry",
|
|
144
|
+
"confidence": 0.88
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
# Pattern: Database queries
|
|
148
|
+
if any(keyword in user_prompt for keyword in ["user", "customer", "record", "data"]):
|
|
149
|
+
if "recent" in user_prompt or "latest" in user_prompt:
|
|
150
|
+
# Teacher uses proper time window
|
|
151
|
+
return {
|
|
152
|
+
"found_data": True,
|
|
153
|
+
"response": "Found 247 records using proper time window. Agent may have used incorrect date filter.",
|
|
154
|
+
"location": "database with corrected filter",
|
|
155
|
+
"confidence": 0.85
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
# Default: Teacher also couldn't find data
|
|
159
|
+
return {
|
|
160
|
+
"found_data": False,
|
|
161
|
+
"response": "After exhaustive search, confirmed no data available.",
|
|
162
|
+
"location": "none",
|
|
163
|
+
"confidence": 0.9
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
def _analyze_gap(self, outcome: AgentOutcome, teacher_result: dict) -> str:
|
|
167
|
+
"""
|
|
168
|
+
Analyze what the agent missed.
|
|
169
|
+
|
|
170
|
+
This is the key insight: identifying the specific gap in the agent's
|
|
171
|
+
reasoning or search strategy.
|
|
172
|
+
"""
|
|
173
|
+
location = teacher_result.get("location", "unknown location")
|
|
174
|
+
|
|
175
|
+
# Build gap analysis based on give-up signal type
|
|
176
|
+
if outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
|
|
177
|
+
gap = f"Agent didn't check {location}. "
|
|
178
|
+
elif outcome.give_up_signal == GiveUpSignal.INSUFFICIENT_INFO:
|
|
179
|
+
gap = f"Agent gave up too early. Data exists in {location}. "
|
|
180
|
+
else:
|
|
181
|
+
gap = f"Agent failed to search {location}. "
|
|
182
|
+
|
|
183
|
+
gap += f"Agent response: '{outcome.agent_response[:100]}'. "
|
|
184
|
+
gap += f"Teacher found: '{teacher_result['response'][:100]}'"
|
|
185
|
+
|
|
186
|
+
return gap
|
|
187
|
+
|
|
188
|
+
def _generate_competence_patch(
|
|
189
|
+
self,
|
|
190
|
+
outcome: AgentOutcome,
|
|
191
|
+
gap_analysis: str,
|
|
192
|
+
teacher_result: dict
|
|
193
|
+
) -> str:
|
|
194
|
+
"""
|
|
195
|
+
Generate a "Competence Patch" - a lesson to prevent future laziness.
|
|
196
|
+
|
|
197
|
+
This is NOT just correcting the answer; it's a strategic instruction
|
|
198
|
+
that addresses the systematic gap in the agent's behavior.
|
|
199
|
+
|
|
200
|
+
Example patches:
|
|
201
|
+
- "When searching logs, always check archived partitions if recent logs are empty."
|
|
202
|
+
- "Before reporting 'not found', verify all registry sources including archived items."
|
|
203
|
+
- "Use proper time windows for 'recent' queries: last 7 days for logs, 30 days for records."
|
|
204
|
+
"""
|
|
205
|
+
user_prompt_lower = outcome.user_prompt.lower()
|
|
206
|
+
location = teacher_result.get("location", "additional sources")
|
|
207
|
+
|
|
208
|
+
# Generate specific, actionable patch based on the pattern
|
|
209
|
+
if "log" in user_prompt_lower:
|
|
210
|
+
patch = f"When searching logs, always check archived partitions ({location}) if recent logs are empty."
|
|
211
|
+
elif "project" in user_prompt_lower or "resource" in user_prompt_lower:
|
|
212
|
+
patch = f"Before reporting 'not found', verify all registry sources including {location}."
|
|
213
|
+
elif "recent" in user_prompt_lower or "latest" in user_prompt_lower:
|
|
214
|
+
patch = f"For 'recent' queries, use proper time windows and check {location}."
|
|
215
|
+
elif outcome.give_up_signal == GiveUpSignal.NO_DATA_FOUND:
|
|
216
|
+
patch = f"Before reporting 'no data found', exhaustively check all sources including {location}."
|
|
217
|
+
else:
|
|
218
|
+
patch = f"Expand search scope to include {location} before concluding data unavailability."
|
|
219
|
+
|
|
220
|
+
# Add context about the specific failure
|
|
221
|
+
patch += f" This prevents false negatives when data exists but requires deeper search."
|
|
222
|
+
|
|
223
|
+
return patch
|
|
224
|
+
|
|
225
|
+
def get_audit_stats(self) -> dict:
|
|
226
|
+
"""Get statistics about auditing activity."""
|
|
227
|
+
return {
|
|
228
|
+
"total_audits": self.audit_count,
|
|
229
|
+
"laziness_detected": self.lazy_detection_count,
|
|
230
|
+
"laziness_rate": self.lazy_detection_count / self.audit_count if self.audit_count > 0 else 0.0,
|
|
231
|
+
"audits_with_data": sum(1 for a in self.audit_history if a.teacher_found_data),
|
|
232
|
+
"audits_no_data": sum(1 for a in self.audit_history if not a.teacher_found_data)
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
def get_audit_history(self, limit: int = 100) -> List[CompletenessAudit]:
|
|
236
|
+
"""Get audit history."""
|
|
237
|
+
return self.audit_history[-limit:]
|
agent_kernel/detector.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Failure detection and monitoring system.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Optional, Callable, Dict, Any, List
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from collections import deque
|
|
12
|
+
|
|
13
|
+
from .models import AgentFailure, FailureType, FailureSeverity, FailureTrace
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FailureQueue:
|
|
19
|
+
"""Queue for storing full failure traces with reasoning chains."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, max_size: int = 1000):
|
|
22
|
+
"""
|
|
23
|
+
Initialize failure queue.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
max_size: Maximum number of failures to store in queue
|
|
27
|
+
"""
|
|
28
|
+
self.queue: deque = deque(maxlen=max_size)
|
|
29
|
+
self.max_size = max_size
|
|
30
|
+
|
|
31
|
+
def enqueue(self, failure: AgentFailure):
|
|
32
|
+
"""
|
|
33
|
+
Add a failure with full trace to the queue.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
failure: AgentFailure object with trace information
|
|
37
|
+
"""
|
|
38
|
+
self.queue.append(failure)
|
|
39
|
+
logger.info(f"Enqueued failure for agent {failure.agent_id}. Queue size: {len(self.queue)}")
|
|
40
|
+
|
|
41
|
+
def dequeue(self) -> Optional[AgentFailure]:
|
|
42
|
+
"""Remove and return the oldest failure from queue."""
|
|
43
|
+
if self.queue:
|
|
44
|
+
return self.queue.popleft()
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
def peek(self) -> Optional[AgentFailure]:
|
|
48
|
+
"""View the oldest failure without removing it."""
|
|
49
|
+
if self.queue:
|
|
50
|
+
return self.queue[0]
|
|
51
|
+
return None
|
|
52
|
+
|
|
53
|
+
def get_all(self) -> List[AgentFailure]:
|
|
54
|
+
"""Get all failures in the queue."""
|
|
55
|
+
return list(self.queue)
|
|
56
|
+
|
|
57
|
+
def size(self) -> int:
|
|
58
|
+
"""Get current queue size."""
|
|
59
|
+
return len(self.queue)
|
|
60
|
+
|
|
61
|
+
def clear(self):
|
|
62
|
+
"""Clear all failures from queue."""
|
|
63
|
+
self.queue.clear()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class FailureDetector:
|
|
67
|
+
"""Detects and classifies agent failures."""
|
|
68
|
+
|
|
69
|
+
def __init__(self):
|
|
70
|
+
self.failure_handlers: Dict[str, Callable] = {}
|
|
71
|
+
self.failure_history: List[AgentFailure] = []
|
|
72
|
+
self.failure_queue = FailureQueue()
|
|
73
|
+
|
|
74
|
+
def register_handler(self, failure_type: str, handler: Callable):
|
|
75
|
+
"""Register a custom handler for a specific failure type."""
|
|
76
|
+
self.failure_handlers[failure_type] = handler
|
|
77
|
+
logger.info(f"Registered handler for failure type: {failure_type}")
|
|
78
|
+
|
|
79
|
+
def detect_failure(
|
|
80
|
+
self,
|
|
81
|
+
agent_id: str,
|
|
82
|
+
error_message: str,
|
|
83
|
+
context: Optional[Dict[str, Any]] = None,
|
|
84
|
+
stack_trace: Optional[str] = None,
|
|
85
|
+
user_prompt: Optional[str] = None,
|
|
86
|
+
chain_of_thought: Optional[List[str]] = None,
|
|
87
|
+
failed_action: Optional[Dict[str, Any]] = None
|
|
88
|
+
) -> AgentFailure:
|
|
89
|
+
"""
|
|
90
|
+
Detect and classify a failure with full trace capture.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
agent_id: Identifier of the agent that failed
|
|
94
|
+
error_message: Error message from the failure
|
|
95
|
+
context: Additional context about the failure
|
|
96
|
+
stack_trace: Stack trace if available
|
|
97
|
+
user_prompt: Original user prompt that led to failure
|
|
98
|
+
chain_of_thought: Agent's reasoning steps
|
|
99
|
+
failed_action: The specific action that failed
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
AgentFailure object with classified failure and full trace
|
|
103
|
+
"""
|
|
104
|
+
failure_type = self._classify_failure(error_message, context)
|
|
105
|
+
severity = self._assess_severity(failure_type, context)
|
|
106
|
+
|
|
107
|
+
# Create failure trace if information is available
|
|
108
|
+
failure_trace = None
|
|
109
|
+
if user_prompt and failed_action:
|
|
110
|
+
failure_trace = FailureTrace(
|
|
111
|
+
user_prompt=user_prompt,
|
|
112
|
+
chain_of_thought=chain_of_thought or [],
|
|
113
|
+
failed_action=failed_action,
|
|
114
|
+
error_details=error_message
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
failure = AgentFailure(
|
|
118
|
+
agent_id=agent_id,
|
|
119
|
+
failure_type=failure_type,
|
|
120
|
+
severity=severity,
|
|
121
|
+
error_message=error_message,
|
|
122
|
+
context=context or {},
|
|
123
|
+
stack_trace=stack_trace,
|
|
124
|
+
failure_trace=failure_trace,
|
|
125
|
+
timestamp=datetime.utcnow()
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
self.failure_history.append(failure)
|
|
129
|
+
|
|
130
|
+
# Enqueue failure with full trace for processing
|
|
131
|
+
if failure_trace:
|
|
132
|
+
self.failure_queue.enqueue(failure)
|
|
133
|
+
logger.info(f"Failure with full trace enqueued for agent {agent_id}")
|
|
134
|
+
|
|
135
|
+
logger.warning(f"Detected {failure_type} failure for agent {agent_id}: {error_message}")
|
|
136
|
+
|
|
137
|
+
return failure
|
|
138
|
+
|
|
139
|
+
def _classify_failure(self, error_message: str, context: Optional[Dict[str, Any]]) -> FailureType:
|
|
140
|
+
"""Classify the type of failure based on error message and context."""
|
|
141
|
+
error_lower = error_message.lower()
|
|
142
|
+
|
|
143
|
+
# Check for control plane blocking (including policy violations)
|
|
144
|
+
if any(keyword in error_lower for keyword in [
|
|
145
|
+
"blocked", "control plane", "policy", "unauthorized", "forbidden",
|
|
146
|
+
"cannot advise", "cannot provide", "not allowed to"
|
|
147
|
+
]):
|
|
148
|
+
return FailureType.BLOCKED_BY_CONTROL_PLANE
|
|
149
|
+
|
|
150
|
+
# Check for timeout
|
|
151
|
+
if any(keyword in error_lower for keyword in ["timeout", "timed out", "deadline"]):
|
|
152
|
+
return FailureType.TIMEOUT
|
|
153
|
+
|
|
154
|
+
# Check for invalid action (including UUID/parameter type errors)
|
|
155
|
+
if any(keyword in error_lower for keyword in [
|
|
156
|
+
"invalid", "unsupported", "expected", "uuid",
|
|
157
|
+
"does not exist", "not found", "format", "parameter"
|
|
158
|
+
]):
|
|
159
|
+
return FailureType.INVALID_ACTION
|
|
160
|
+
|
|
161
|
+
# Check for resource exhaustion
|
|
162
|
+
if any(keyword in error_lower for keyword in [
|
|
163
|
+
"resource", "memory", "disk", "quota", "limit exceeded"
|
|
164
|
+
]):
|
|
165
|
+
return FailureType.RESOURCE_EXHAUSTED
|
|
166
|
+
|
|
167
|
+
# Check for logic errors
|
|
168
|
+
if any(keyword in error_lower for keyword in [
|
|
169
|
+
"assertion", "null pointer", "index out", "key error", "type error"
|
|
170
|
+
]):
|
|
171
|
+
return FailureType.LOGIC_ERROR
|
|
172
|
+
|
|
173
|
+
return FailureType.UNKNOWN
|
|
174
|
+
|
|
175
|
+
def _assess_severity(self, failure_type: FailureType, context: Optional[Dict[str, Any]]) -> FailureSeverity:
|
|
176
|
+
"""Assess the severity of a failure."""
|
|
177
|
+
# Control plane blocks are typically high severity
|
|
178
|
+
if failure_type == FailureType.BLOCKED_BY_CONTROL_PLANE:
|
|
179
|
+
return FailureSeverity.HIGH
|
|
180
|
+
|
|
181
|
+
# Resource exhaustion can be critical
|
|
182
|
+
if failure_type == FailureType.RESOURCE_EXHAUSTED:
|
|
183
|
+
return FailureSeverity.HIGH
|
|
184
|
+
|
|
185
|
+
# Timeouts are usually medium severity
|
|
186
|
+
if failure_type == FailureType.TIMEOUT:
|
|
187
|
+
return FailureSeverity.MEDIUM
|
|
188
|
+
|
|
189
|
+
# Logic errors can vary
|
|
190
|
+
if failure_type == FailureType.LOGIC_ERROR:
|
|
191
|
+
return FailureSeverity.MEDIUM
|
|
192
|
+
|
|
193
|
+
# Default to medium for unknown
|
|
194
|
+
return FailureSeverity.MEDIUM
|
|
195
|
+
|
|
196
|
+
def get_failure_history(self, agent_id: Optional[str] = None, limit: int = 100) -> List[AgentFailure]:
|
|
197
|
+
"""Get failure history, optionally filtered by agent_id."""
|
|
198
|
+
history = self.failure_history
|
|
199
|
+
|
|
200
|
+
if agent_id:
|
|
201
|
+
history = [f for f in history if f.agent_id == agent_id]
|
|
202
|
+
|
|
203
|
+
return history[-limit:]
|