agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
|
@@ -0,0 +1,1009 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Kernel Space - Protected core that survives agent crashes.
|
|
6
|
+
|
|
7
|
+
This module implements the kernel/user space separation for Agent OS.
|
|
8
|
+
The kernel space contains critical infrastructure that MUST survive
|
|
9
|
+
even when user-space agents crash or hallucinate.
|
|
10
|
+
|
|
11
|
+
Kernel Space Components:
|
|
12
|
+
- Policy Engine (enforcement)
|
|
13
|
+
- Flight Recorder (audit)
|
|
14
|
+
- Signal Dispatcher (control)
|
|
15
|
+
- VFS Mount Manager (memory)
|
|
16
|
+
- IPC Router (communication)
|
|
17
|
+
|
|
18
|
+
User Space Components:
|
|
19
|
+
- LLM Generation
|
|
20
|
+
- Tool Execution
|
|
21
|
+
- Agent Logic
|
|
22
|
+
- Custom Handlers
|
|
23
|
+
|
|
24
|
+
Design Philosophy:
|
|
25
|
+
- Kernel survives agent crashes (isolation)
|
|
26
|
+
- Policy violations trigger kernel panic (0% tolerance)
|
|
27
|
+
- All agent actions pass through kernel syscalls
|
|
28
|
+
- Kernel state is checkpointed independently
|
|
29
|
+
|
|
30
|
+
Comparison with AIOS:
|
|
31
|
+
AIOS focuses on EFFICIENCY (GPU throughput, scheduling)
|
|
32
|
+
We focus on SAFETY (isolation, policy enforcement, audit)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from abc import ABC, abstractmethod
|
|
36
|
+
from dataclasses import dataclass, field
|
|
37
|
+
from datetime import datetime, timezone
|
|
38
|
+
from enum import Enum, auto
|
|
39
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Generic, Union
|
|
40
|
+
import asyncio
|
|
41
|
+
import logging
|
|
42
|
+
import traceback
|
|
43
|
+
from contextlib import asynccontextmanager
|
|
44
|
+
|
|
45
|
+
# Import kernel components
|
|
46
|
+
from .signals import (
|
|
47
|
+
SignalDispatcher, AgentSignal, SignalInfo, AgentKernelPanic,
|
|
48
|
+
policy_violation, kill_agent, pause_agent
|
|
49
|
+
)
|
|
50
|
+
from .vfs import AgentVFS, create_agent_vfs
|
|
51
|
+
|
|
52
|
+
logger = logging.getLogger(__name__)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class ProtectionRing(Enum):
|
|
56
|
+
"""
|
|
57
|
+
Protection rings (inspired by x86 architecture).
|
|
58
|
+
|
|
59
|
+
Ring 0: Kernel - Most privileged (policy, audit, signals)
|
|
60
|
+
Ring 1: Drivers - Backend drivers (VFS backends, tool executors)
|
|
61
|
+
Ring 2: Services - System services (monitoring, health checks)
|
|
62
|
+
Ring 3: User - Agent code (least privileged)
|
|
63
|
+
"""
|
|
64
|
+
RING_0_KERNEL = 0
|
|
65
|
+
RING_1_DRIVERS = 1
|
|
66
|
+
RING_2_SERVICES = 2
|
|
67
|
+
RING_3_USER = 3
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class SyscallType(Enum):
|
|
71
|
+
"""
|
|
72
|
+
System calls that user space can make into kernel.
|
|
73
|
+
|
|
74
|
+
All agent actions must go through syscalls.
|
|
75
|
+
"""
|
|
76
|
+
# Process control
|
|
77
|
+
SYS_FORK = auto() # Spawn child agent
|
|
78
|
+
SYS_EXIT = auto() # Terminate self
|
|
79
|
+
SYS_WAIT = auto() # Wait for child
|
|
80
|
+
SYS_EXEC = auto() # Execute tool
|
|
81
|
+
|
|
82
|
+
# File operations (VFS)
|
|
83
|
+
SYS_OPEN = auto()
|
|
84
|
+
SYS_CLOSE = auto()
|
|
85
|
+
SYS_READ = auto()
|
|
86
|
+
SYS_WRITE = auto()
|
|
87
|
+
SYS_STAT = auto()
|
|
88
|
+
|
|
89
|
+
# Memory operations
|
|
90
|
+
SYS_MMAP = auto() # Map memory region
|
|
91
|
+
SYS_MUNMAP = auto() # Unmap memory region
|
|
92
|
+
SYS_BRK = auto() # Extend heap (context window)
|
|
93
|
+
|
|
94
|
+
# IPC operations
|
|
95
|
+
SYS_PIPE = auto() # Create pipe
|
|
96
|
+
SYS_SEND = auto() # Send message
|
|
97
|
+
SYS_RECV = auto() # Receive message
|
|
98
|
+
|
|
99
|
+
# Signal operations
|
|
100
|
+
SYS_SIGNAL = auto() # Send signal
|
|
101
|
+
SYS_SIGACTION = auto() # Set signal handler
|
|
102
|
+
SYS_SIGPROCMASK = auto() # Block signals
|
|
103
|
+
|
|
104
|
+
# Policy operations (read-only from user space)
|
|
105
|
+
SYS_GETPOLICY = auto() # Get policy for action
|
|
106
|
+
SYS_CHECKPOLICY = auto() # Check if action allowed
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@dataclass
|
|
110
|
+
class SyscallRequest:
|
|
111
|
+
"""A system call request from user space."""
|
|
112
|
+
syscall: SyscallType
|
|
113
|
+
args: Dict[str, Any]
|
|
114
|
+
caller_ring: ProtectionRing = ProtectionRing.RING_3_USER
|
|
115
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
116
|
+
trace_id: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class SyscallResult:
|
|
121
|
+
"""Result of a system call."""
|
|
122
|
+
success: bool
|
|
123
|
+
return_value: Any = None
|
|
124
|
+
error_code: Optional[int] = None
|
|
125
|
+
error_message: Optional[str] = None
|
|
126
|
+
execution_time_ms: float = 0.0
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class KernelState(Enum):
|
|
130
|
+
"""Kernel operating state."""
|
|
131
|
+
BOOTING = auto()
|
|
132
|
+
RUNNING = auto()
|
|
133
|
+
DEGRADED = auto() # Some components failed
|
|
134
|
+
PANIC = auto() # Unrecoverable error
|
|
135
|
+
SHUTDOWN = auto()
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@dataclass
|
|
139
|
+
class KernelMetrics:
|
|
140
|
+
"""Kernel performance metrics."""
|
|
141
|
+
syscall_count: int = 0
|
|
142
|
+
policy_checks: int = 0
|
|
143
|
+
policy_violations: int = 0
|
|
144
|
+
agent_crashes: int = 0
|
|
145
|
+
kernel_panics: int = 0
|
|
146
|
+
uptime_seconds: float = 0.0
|
|
147
|
+
active_agents: int = 0
|
|
148
|
+
|
|
149
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
150
|
+
return {
|
|
151
|
+
"syscall_count": self.syscall_count,
|
|
152
|
+
"policy_checks": self.policy_checks,
|
|
153
|
+
"policy_violations": self.policy_violations,
|
|
154
|
+
"agent_crashes": int,
|
|
155
|
+
"kernel_panics": self.kernel_panics,
|
|
156
|
+
"uptime_seconds": self.uptime_seconds,
|
|
157
|
+
"active_agents": self.active_agents,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class KernelSpace:
|
|
162
|
+
"""The Kernel Space — protected core of Agent OS.
|
|
163
|
+
|
|
164
|
+
KernelSpace implements Ring 0, the most privileged execution layer in the
|
|
165
|
+
Agent OS architecture. Inspired by operating system kernel design, it
|
|
166
|
+
provides strict isolation between the trusted kernel and untrusted agent
|
|
167
|
+
(user-space) code.
|
|
168
|
+
|
|
169
|
+
Responsibilities:
|
|
170
|
+
- **Policy enforcement**: All agent actions pass through kernel
|
|
171
|
+
syscalls where policies are checked before execution.
|
|
172
|
+
- **Flight recording**: Every syscall is logged to the
|
|
173
|
+
``FlightRecorder`` for forensic audit and compliance.
|
|
174
|
+
- **Signal management**: Agents communicate through POSIX-style
|
|
175
|
+
signals (``SIGTERM``, ``SIGKILL``, ``SIGPAUSE``, etc.).
|
|
176
|
+
- **VFS management**: Each agent gets an isolated virtual filesystem.
|
|
177
|
+
- **Tool execution**: Tools are registered in the kernel and executed
|
|
178
|
+
through the ``SYS_EXEC`` syscall with full policy governance.
|
|
179
|
+
|
|
180
|
+
The kernel SURVIVES agent crashes. If an agent hallucinates, throws an
|
|
181
|
+
exception, or violates policy, the kernel remains stable and can recover
|
|
182
|
+
or terminate the agent.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
policy_engine: Optional policy engine for syscall authorization.
|
|
186
|
+
When ``None``, the kernel runs in permissive mode (all syscalls
|
|
187
|
+
allowed).
|
|
188
|
+
flight_recorder: Optional ``FlightRecorder`` instance for audit
|
|
189
|
+
logging. When ``None``, audit logging is disabled.
|
|
190
|
+
|
|
191
|
+
Example:
|
|
192
|
+
Basic kernel lifecycle::
|
|
193
|
+
|
|
194
|
+
kernel = KernelSpace()
|
|
195
|
+
|
|
196
|
+
# Register an agent in user space
|
|
197
|
+
agent_ctx = kernel.create_agent_context("agent-001")
|
|
198
|
+
|
|
199
|
+
# Agent makes syscalls through the kernel
|
|
200
|
+
result = await kernel.syscall(SyscallRequest(
|
|
201
|
+
syscall=SyscallType.SYS_WRITE,
|
|
202
|
+
args={"path": "/mem/working/notes.txt", "data": "Hello"},
|
|
203
|
+
), agent_ctx)
|
|
204
|
+
|
|
205
|
+
Using the context manager for automatic isolation::
|
|
206
|
+
|
|
207
|
+
async with user_space_execution(kernel, "agent-001") as ctx:
|
|
208
|
+
await ctx.write("/mem/working/task.txt", "Hello World")
|
|
209
|
+
"""
|
|
210
|
+
|
|
211
|
+
def __init__(
|
|
212
|
+
self,
|
|
213
|
+
policy_engine: Optional[Any] = None,
|
|
214
|
+
flight_recorder: Optional[Any] = None,
|
|
215
|
+
):
|
|
216
|
+
self._state = KernelState.BOOTING
|
|
217
|
+
self._metrics = KernelMetrics()
|
|
218
|
+
self._start_time = datetime.now(timezone.utc)
|
|
219
|
+
|
|
220
|
+
# Kernel components (Ring 0)
|
|
221
|
+
self._policy_engine = policy_engine
|
|
222
|
+
self._flight_recorder = flight_recorder
|
|
223
|
+
|
|
224
|
+
# Agent registry
|
|
225
|
+
self._agents: Dict[str, "AgentContext"] = {}
|
|
226
|
+
self._signal_dispatchers: Dict[str, SignalDispatcher] = {}
|
|
227
|
+
self._vfs_instances: Dict[str, AgentVFS] = {}
|
|
228
|
+
|
|
229
|
+
# Tool registry - maps tool names to callable executors
|
|
230
|
+
self._tool_registry: Dict[str, Callable[..., Any]] = {}
|
|
231
|
+
|
|
232
|
+
# Syscall handlers
|
|
233
|
+
self._syscall_handlers: Dict[SyscallType, Callable] = {}
|
|
234
|
+
self._init_syscall_handlers()
|
|
235
|
+
|
|
236
|
+
# Boot complete
|
|
237
|
+
self._state = KernelState.RUNNING
|
|
238
|
+
logger.info("[Kernel] Booted successfully")
|
|
239
|
+
|
|
240
|
+
def _init_syscall_handlers(self) -> None:
|
|
241
|
+
"""Initialize syscall handlers."""
|
|
242
|
+
# File operations
|
|
243
|
+
self._syscall_handlers[SyscallType.SYS_READ] = self._sys_read
|
|
244
|
+
self._syscall_handlers[SyscallType.SYS_WRITE] = self._sys_write
|
|
245
|
+
self._syscall_handlers[SyscallType.SYS_OPEN] = self._sys_open
|
|
246
|
+
self._syscall_handlers[SyscallType.SYS_CLOSE] = self._sys_close
|
|
247
|
+
|
|
248
|
+
# Signal operations
|
|
249
|
+
self._syscall_handlers[SyscallType.SYS_SIGNAL] = self._sys_signal
|
|
250
|
+
|
|
251
|
+
# Policy operations
|
|
252
|
+
self._syscall_handlers[SyscallType.SYS_CHECKPOLICY] = self._sys_checkpolicy
|
|
253
|
+
|
|
254
|
+
# Process operations
|
|
255
|
+
self._syscall_handlers[SyscallType.SYS_EXIT] = self._sys_exit
|
|
256
|
+
self._syscall_handlers[SyscallType.SYS_EXEC] = self._sys_exec
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def state(self) -> KernelState:
|
|
260
|
+
return self._state
|
|
261
|
+
|
|
262
|
+
@property
|
|
263
|
+
def metrics(self) -> KernelMetrics:
|
|
264
|
+
self._metrics.uptime_seconds = (
|
|
265
|
+
datetime.now(timezone.utc) - self._start_time
|
|
266
|
+
).total_seconds()
|
|
267
|
+
self._metrics.active_agents = len(self._agents)
|
|
268
|
+
return self._metrics
|
|
269
|
+
|
|
270
|
+
def create_agent_context(self, agent_id: str) -> "AgentContext":
|
|
271
|
+
"""Create a context for an agent in user space.
|
|
272
|
+
|
|
273
|
+
Allocates all kernel resources the agent needs — a signal
|
|
274
|
+
dispatcher, VFS instance, and policy context — and registers the
|
|
275
|
+
agent in the kernel's internal registry.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
agent_id: Unique string identifier for the agent. Must not
|
|
279
|
+
already be registered with this kernel.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
An ``AgentContext`` bound to this kernel at
|
|
283
|
+
``ProtectionRing.RING_3_USER``.
|
|
284
|
+
|
|
285
|
+
Raises:
|
|
286
|
+
ValueError: If an agent with the given ``agent_id`` is already
|
|
287
|
+
registered.
|
|
288
|
+
"""
|
|
289
|
+
if agent_id in self._agents:
|
|
290
|
+
raise ValueError(f"Agent {agent_id} already registered")
|
|
291
|
+
|
|
292
|
+
# Create kernel resources for this agent
|
|
293
|
+
signal_dispatcher = SignalDispatcher(agent_id)
|
|
294
|
+
vfs = create_agent_vfs(agent_id)
|
|
295
|
+
|
|
296
|
+
self._signal_dispatchers[agent_id] = signal_dispatcher
|
|
297
|
+
self._vfs_instances[agent_id] = vfs
|
|
298
|
+
|
|
299
|
+
# Create agent context
|
|
300
|
+
ctx = AgentContext(
|
|
301
|
+
agent_id=agent_id,
|
|
302
|
+
kernel=self,
|
|
303
|
+
ring=ProtectionRing.RING_3_USER,
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
self._agents[agent_id] = ctx
|
|
307
|
+
|
|
308
|
+
logger.info(f"[Kernel] Created context for agent: {agent_id}")
|
|
309
|
+
return ctx
|
|
310
|
+
|
|
311
|
+
def destroy_agent_context(self, agent_id: str) -> None:
|
|
312
|
+
"""Remove an agent and release all its kernel resources.
|
|
313
|
+
|
|
314
|
+
Cleans up the agent's context, signal dispatcher, and VFS instance.
|
|
315
|
+
No-op if the agent is not registered.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
agent_id: Identifier of the agent to remove.
|
|
319
|
+
"""
|
|
320
|
+
if agent_id in self._agents:
|
|
321
|
+
del self._agents[agent_id]
|
|
322
|
+
if agent_id in self._signal_dispatchers:
|
|
323
|
+
del self._signal_dispatchers[agent_id]
|
|
324
|
+
if agent_id in self._vfs_instances:
|
|
325
|
+
del self._vfs_instances[agent_id]
|
|
326
|
+
|
|
327
|
+
logger.info(f"[Kernel] Destroyed context for agent: {agent_id}")
|
|
328
|
+
|
|
329
|
+
async def syscall(
|
|
330
|
+
self,
|
|
331
|
+
request: SyscallRequest,
|
|
332
|
+
ctx: "AgentContext",
|
|
333
|
+
) -> SyscallResult:
|
|
334
|
+
"""Handle a system call from user space.
|
|
335
|
+
|
|
336
|
+
All agent actions MUST go through this interface. The kernel
|
|
337
|
+
enforces policy, logs the attempt to the flight recorder, checks
|
|
338
|
+
agent liveness, and dispatches to the appropriate syscall handler.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
request: The syscall request describing the operation and its
|
|
342
|
+
arguments.
|
|
343
|
+
ctx: The calling agent's context, used for identity and
|
|
344
|
+
permission checks.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
A ``SyscallResult`` indicating success or failure. On success,
|
|
348
|
+
``return_value`` contains the handler's output. On failure,
|
|
349
|
+
``error_code`` and ``error_message`` describe what went wrong:
|
|
350
|
+
|
|
351
|
+
- ``-1``: Agent has been terminated.
|
|
352
|
+
- ``-2``: Policy violation (action blocked).
|
|
353
|
+
- ``-3``: Unknown / unregistered syscall type.
|
|
354
|
+
- ``-4``: Handler raised an unexpected exception.
|
|
355
|
+
|
|
356
|
+
Raises:
|
|
357
|
+
AgentKernelPanic: If a policy violation triggers a kernel
|
|
358
|
+
panic (0% tolerance mode).
|
|
359
|
+
"""
|
|
360
|
+
start_time = datetime.now(timezone.utc)
|
|
361
|
+
self._metrics.syscall_count += 1
|
|
362
|
+
|
|
363
|
+
# Log to flight recorder using start_trace API
|
|
364
|
+
trace_id = None
|
|
365
|
+
if self._flight_recorder:
|
|
366
|
+
trace_id = self._flight_recorder.start_trace(
|
|
367
|
+
agent_id=ctx.agent_id,
|
|
368
|
+
tool_name=f"syscall_{request.syscall.name}",
|
|
369
|
+
tool_args=request.args,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Check if agent is in valid state
|
|
373
|
+
dispatcher = self._signal_dispatchers.get(ctx.agent_id)
|
|
374
|
+
if dispatcher and dispatcher.is_terminated:
|
|
375
|
+
return SyscallResult(
|
|
376
|
+
success=False,
|
|
377
|
+
error_code=-1,
|
|
378
|
+
error_message="Agent has been terminated",
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Policy check (if policy engine available)
|
|
382
|
+
if self._policy_engine:
|
|
383
|
+
self._metrics.policy_checks += 1
|
|
384
|
+
try:
|
|
385
|
+
allowed, policy_error = await self._check_policy(request, ctx)
|
|
386
|
+
if not allowed:
|
|
387
|
+
self._metrics.policy_violations += 1
|
|
388
|
+
|
|
389
|
+
# Build actionable error message
|
|
390
|
+
error_msg = f"Policy '{request.syscall.name}' blocked: {policy_error or 'Access denied'}"
|
|
391
|
+
|
|
392
|
+
# This is a policy violation - trigger signal
|
|
393
|
+
if dispatcher:
|
|
394
|
+
policy_violation(
|
|
395
|
+
dispatcher,
|
|
396
|
+
policy_name="syscall_policy",
|
|
397
|
+
details=f"Syscall {request.syscall.name} not allowed",
|
|
398
|
+
context={"args": request.args},
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
return SyscallResult(
|
|
402
|
+
success=False,
|
|
403
|
+
error_code=-2,
|
|
404
|
+
error_message=error_msg,
|
|
405
|
+
)
|
|
406
|
+
except AgentKernelPanic as e:
|
|
407
|
+
# Re-raise kernel panics
|
|
408
|
+
self._metrics.kernel_panics += 1
|
|
409
|
+
raise
|
|
410
|
+
|
|
411
|
+
# Execute the syscall
|
|
412
|
+
handler = self._syscall_handlers.get(request.syscall)
|
|
413
|
+
if not handler:
|
|
414
|
+
return SyscallResult(
|
|
415
|
+
success=False,
|
|
416
|
+
error_code=-3,
|
|
417
|
+
error_message=f"Unknown syscall: {request.syscall.name}",
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
try:
|
|
421
|
+
result = await handler(request, ctx)
|
|
422
|
+
execution_time = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
|
|
423
|
+
result.execution_time_ms = execution_time
|
|
424
|
+
return result
|
|
425
|
+
except AgentKernelPanic:
|
|
426
|
+
raise
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.error(f"[Kernel] Syscall {request.syscall.name} failed: {e}")
|
|
429
|
+
self._metrics.agent_crashes += 1
|
|
430
|
+
return SyscallResult(
|
|
431
|
+
success=False,
|
|
432
|
+
error_code=-4,
|
|
433
|
+
error_message=str(e),
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
async def _check_policy(
|
|
437
|
+
self,
|
|
438
|
+
request: SyscallRequest,
|
|
439
|
+
ctx: "AgentContext",
|
|
440
|
+
) -> Tuple[bool, Optional[str]]:
|
|
441
|
+
"""
|
|
442
|
+
Check if syscall is allowed by policy.
|
|
443
|
+
|
|
444
|
+
Returns:
|
|
445
|
+
Tuple of (allowed, error_message). If allowed is True, error_message is None.
|
|
446
|
+
If allowed is False, error_message contains actionable details.
|
|
447
|
+
"""
|
|
448
|
+
self._metrics.policy_checks += 1
|
|
449
|
+
|
|
450
|
+
# If no policy engine, allow (permissive mode)
|
|
451
|
+
if not self._policy_engine:
|
|
452
|
+
logger.debug(f"[Kernel] No policy engine - allowing {request.syscall.name}")
|
|
453
|
+
return (True, None)
|
|
454
|
+
|
|
455
|
+
# For SYS_EXEC, check the actual tool name (not "code_execute")
|
|
456
|
+
# This avoids double-checking at both syscall and tool level
|
|
457
|
+
if request.syscall == SyscallType.SYS_EXEC:
|
|
458
|
+
tool_name = request.args.get("tool", "unknown_tool")
|
|
459
|
+
else:
|
|
460
|
+
# Map syscall to tool_name for policy check
|
|
461
|
+
tool_name = self._syscall_to_tool_name(request.syscall)
|
|
462
|
+
|
|
463
|
+
# Build args from syscall request
|
|
464
|
+
tool_args = request.args.copy()
|
|
465
|
+
tool_args["_syscall"] = request.syscall.name
|
|
466
|
+
tool_args["_ring"] = request.caller_ring.name
|
|
467
|
+
|
|
468
|
+
# Check violation using policy engine (positional args: agent_role, tool_name, args)
|
|
469
|
+
violation = self._policy_engine.check_violation(
|
|
470
|
+
ctx.agent_id, # agent_role
|
|
471
|
+
tool_name, # tool_name
|
|
472
|
+
tool_args, # args
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
if violation:
|
|
476
|
+
self._metrics.policy_violations += 1
|
|
477
|
+
logger.warning(f"[Kernel] Policy violation for {ctx.agent_id}: {violation}")
|
|
478
|
+
|
|
479
|
+
# Record to flight recorder
|
|
480
|
+
if self._flight_recorder:
|
|
481
|
+
trace_id = self._flight_recorder.start_trace(
|
|
482
|
+
agent_id=ctx.agent_id,
|
|
483
|
+
tool_name=tool_name,
|
|
484
|
+
tool_args=tool_args,
|
|
485
|
+
)
|
|
486
|
+
self._flight_recorder.log_violation(trace_id, violation)
|
|
487
|
+
|
|
488
|
+
return (False, violation)
|
|
489
|
+
|
|
490
|
+
return (True, None)
|
|
491
|
+
|
|
492
|
+
def _syscall_to_tool_name(self, syscall: SyscallType) -> str:
|
|
493
|
+
"""Map syscall type to a tool name for policy engine."""
|
|
494
|
+
mapping = {
|
|
495
|
+
SyscallType.SYS_READ: "file_read",
|
|
496
|
+
SyscallType.SYS_WRITE: "file_write",
|
|
497
|
+
SyscallType.SYS_EXEC: "code_execute",
|
|
498
|
+
SyscallType.SYS_OPEN: "file_open",
|
|
499
|
+
SyscallType.SYS_CLOSE: "file_close",
|
|
500
|
+
SyscallType.SYS_FORK: "agent_spawn",
|
|
501
|
+
SyscallType.SYS_EXIT: "agent_exit",
|
|
502
|
+
SyscallType.SYS_SIGNAL: "signal_send",
|
|
503
|
+
SyscallType.SYS_SEND: "ipc_send",
|
|
504
|
+
SyscallType.SYS_RECV: "ipc_recv",
|
|
505
|
+
SyscallType.SYS_MMAP: "memory_map",
|
|
506
|
+
SyscallType.SYS_MUNMAP: "memory_unmap",
|
|
507
|
+
}
|
|
508
|
+
return mapping.get(syscall, f"syscall_{syscall.name.lower()}")
|
|
509
|
+
|
|
510
|
+
# ========== Syscall Implementations ==========
|
|
511
|
+
|
|
512
|
+
async def _sys_read(
|
|
513
|
+
self,
|
|
514
|
+
request: SyscallRequest,
|
|
515
|
+
ctx: "AgentContext",
|
|
516
|
+
) -> SyscallResult:
|
|
517
|
+
"""SYS_READ: Read from VFS."""
|
|
518
|
+
path = request.args.get("path")
|
|
519
|
+
if not path:
|
|
520
|
+
return SyscallResult(success=False, error_code=1, error_message="No path specified")
|
|
521
|
+
|
|
522
|
+
vfs = self._vfs_instances.get(ctx.agent_id)
|
|
523
|
+
if not vfs:
|
|
524
|
+
return SyscallResult(success=False, error_code=2, error_message="No VFS for agent")
|
|
525
|
+
|
|
526
|
+
try:
|
|
527
|
+
data = vfs.read(path)
|
|
528
|
+
return SyscallResult(success=True, return_value=data)
|
|
529
|
+
except Exception as e:
|
|
530
|
+
return SyscallResult(success=False, error_code=3, error_message=str(e))
|
|
531
|
+
|
|
532
|
+
async def _sys_write(
|
|
533
|
+
self,
|
|
534
|
+
request: SyscallRequest,
|
|
535
|
+
ctx: "AgentContext",
|
|
536
|
+
) -> SyscallResult:
|
|
537
|
+
"""SYS_WRITE: Write to VFS."""
|
|
538
|
+
path = request.args.get("path")
|
|
539
|
+
data = request.args.get("data")
|
|
540
|
+
|
|
541
|
+
if not path or data is None:
|
|
542
|
+
return SyscallResult(success=False, error_code=1, error_message="Missing path or data")
|
|
543
|
+
|
|
544
|
+
vfs = self._vfs_instances.get(ctx.agent_id)
|
|
545
|
+
if not vfs:
|
|
546
|
+
return SyscallResult(success=False, error_code=2, error_message="No VFS for agent")
|
|
547
|
+
|
|
548
|
+
try:
|
|
549
|
+
bytes_written = vfs.write(path, data)
|
|
550
|
+
return SyscallResult(success=True, return_value=bytes_written)
|
|
551
|
+
except Exception as e:
|
|
552
|
+
return SyscallResult(success=False, error_code=3, error_message=str(e))
|
|
553
|
+
|
|
554
|
+
async def _sys_open(
|
|
555
|
+
self,
|
|
556
|
+
request: SyscallRequest,
|
|
557
|
+
ctx: "AgentContext",
|
|
558
|
+
) -> SyscallResult:
|
|
559
|
+
"""SYS_OPEN: Open a file descriptor."""
|
|
560
|
+
path = request.args.get("path")
|
|
561
|
+
mode = request.args.get("mode", "r")
|
|
562
|
+
|
|
563
|
+
vfs = self._vfs_instances.get(ctx.agent_id)
|
|
564
|
+
if not vfs:
|
|
565
|
+
return SyscallResult(success=False, error_code=2, error_message="No VFS for agent")
|
|
566
|
+
|
|
567
|
+
try:
|
|
568
|
+
from .vfs import FileMode
|
|
569
|
+
file_mode = FileMode.READ if "r" in mode else FileMode.WRITE
|
|
570
|
+
fd = vfs.open(path, file_mode)
|
|
571
|
+
return SyscallResult(success=True, return_value=fd)
|
|
572
|
+
except Exception as e:
|
|
573
|
+
return SyscallResult(success=False, error_code=3, error_message=str(e))
|
|
574
|
+
|
|
575
|
+
async def _sys_close(
|
|
576
|
+
self,
|
|
577
|
+
request: SyscallRequest,
|
|
578
|
+
ctx: "AgentContext",
|
|
579
|
+
) -> SyscallResult:
|
|
580
|
+
"""SYS_CLOSE: Close a file descriptor."""
|
|
581
|
+
fd = request.args.get("fd")
|
|
582
|
+
|
|
583
|
+
vfs = self._vfs_instances.get(ctx.agent_id)
|
|
584
|
+
if not vfs:
|
|
585
|
+
return SyscallResult(success=False, error_code=2, error_message="No VFS for agent")
|
|
586
|
+
|
|
587
|
+
try:
|
|
588
|
+
vfs.close(fd)
|
|
589
|
+
return SyscallResult(success=True)
|
|
590
|
+
except Exception as e:
|
|
591
|
+
return SyscallResult(success=False, error_code=3, error_message=str(e))
|
|
592
|
+
|
|
593
|
+
async def _sys_signal(
|
|
594
|
+
self,
|
|
595
|
+
request: SyscallRequest,
|
|
596
|
+
ctx: "AgentContext",
|
|
597
|
+
) -> SyscallResult:
|
|
598
|
+
"""SYS_SIGNAL: Send a signal to an agent."""
|
|
599
|
+
target_agent = request.args.get("target", ctx.agent_id)
|
|
600
|
+
signal_num = request.args.get("signal")
|
|
601
|
+
reason = request.args.get("reason", "")
|
|
602
|
+
|
|
603
|
+
dispatcher = self._signal_dispatchers.get(target_agent)
|
|
604
|
+
if not dispatcher:
|
|
605
|
+
return SyscallResult(success=False, error_code=1, error_message="Target agent not found")
|
|
606
|
+
|
|
607
|
+
try:
|
|
608
|
+
signal = AgentSignal(signal_num)
|
|
609
|
+
dispatcher.signal(signal, source=ctx.agent_id, reason=reason)
|
|
610
|
+
return SyscallResult(success=True)
|
|
611
|
+
except Exception as e:
|
|
612
|
+
return SyscallResult(success=False, error_code=2, error_message=str(e))
|
|
613
|
+
|
|
614
|
+
async def _sys_checkpolicy(
|
|
615
|
+
self,
|
|
616
|
+
request: SyscallRequest,
|
|
617
|
+
ctx: "AgentContext",
|
|
618
|
+
) -> SyscallResult:
|
|
619
|
+
"""SYS_CHECKPOLICY: Check if an action is allowed before attempting it."""
|
|
620
|
+
action = request.args.get("action")
|
|
621
|
+
target = request.args.get("target")
|
|
622
|
+
tool_args = request.args.get("args", {})
|
|
623
|
+
|
|
624
|
+
if not action:
|
|
625
|
+
return SyscallResult(
|
|
626
|
+
success=False,
|
|
627
|
+
error_code=1,
|
|
628
|
+
error_message="No action specified",
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
# If no policy engine, allow all
|
|
632
|
+
if not self._policy_engine:
|
|
633
|
+
return SyscallResult(success=True, return_value={"allowed": True})
|
|
634
|
+
|
|
635
|
+
# Check violation using policy engine (positional args)
|
|
636
|
+
args_to_check = {**tool_args, "target": target} if target else tool_args
|
|
637
|
+
violation = self._policy_engine.check_violation(
|
|
638
|
+
ctx.agent_id, # agent_role
|
|
639
|
+
action, # tool_name
|
|
640
|
+
args_to_check, # args
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
if violation:
|
|
644
|
+
return SyscallResult(
|
|
645
|
+
success=True, # The check succeeded, but action would be denied
|
|
646
|
+
return_value={
|
|
647
|
+
"allowed": False,
|
|
648
|
+
"reason": violation,
|
|
649
|
+
"action": action,
|
|
650
|
+
"agent_id": ctx.agent_id,
|
|
651
|
+
},
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
return SyscallResult(
|
|
655
|
+
success=True,
|
|
656
|
+
return_value={
|
|
657
|
+
"allowed": True,
|
|
658
|
+
"action": action,
|
|
659
|
+
"agent_id": ctx.agent_id,
|
|
660
|
+
},
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
async def _sys_exit(
|
|
664
|
+
self,
|
|
665
|
+
request: SyscallRequest,
|
|
666
|
+
ctx: "AgentContext",
|
|
667
|
+
) -> SyscallResult:
|
|
668
|
+
"""SYS_EXIT: Agent requests termination."""
|
|
669
|
+
exit_code = request.args.get("code", 0)
|
|
670
|
+
|
|
671
|
+
logger.info(f"[Kernel] Agent {ctx.agent_id} exiting with code {exit_code}")
|
|
672
|
+
|
|
673
|
+
# Clean up agent
|
|
674
|
+
self.destroy_agent_context(ctx.agent_id)
|
|
675
|
+
|
|
676
|
+
return SyscallResult(success=True, return_value=exit_code)
|
|
677
|
+
|
|
678
|
+
async def _sys_exec(
|
|
679
|
+
self,
|
|
680
|
+
request: SyscallRequest,
|
|
681
|
+
ctx: "AgentContext",
|
|
682
|
+
) -> SyscallResult:
|
|
683
|
+
"""
|
|
684
|
+
SYS_EXEC: Execute a tool through the kernel.
|
|
685
|
+
|
|
686
|
+
This is the critical choke point - ALL tool execution goes through here.
|
|
687
|
+
The kernel:
|
|
688
|
+
1. Checks policy
|
|
689
|
+
2. Records to flight recorder
|
|
690
|
+
3. Executes the tool
|
|
691
|
+
4. Returns result or error
|
|
692
|
+
"""
|
|
693
|
+
tool_name = request.args.get("tool")
|
|
694
|
+
tool_args = request.args.get("args", {})
|
|
695
|
+
input_prompt = request.args.get("input_prompt")
|
|
696
|
+
|
|
697
|
+
if not tool_name:
|
|
698
|
+
return SyscallResult(
|
|
699
|
+
success=False,
|
|
700
|
+
error_code=1,
|
|
701
|
+
error_message="No tool specified",
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
# Start trace in flight recorder
|
|
705
|
+
trace_id = None
|
|
706
|
+
if self._flight_recorder:
|
|
707
|
+
trace_id = self._flight_recorder.start_trace(
|
|
708
|
+
agent_id=ctx.agent_id,
|
|
709
|
+
tool_name=tool_name,
|
|
710
|
+
tool_args=tool_args,
|
|
711
|
+
input_prompt=input_prompt,
|
|
712
|
+
)
|
|
713
|
+
|
|
714
|
+
# NOTE: Policy check already happened at syscall level in syscall()
|
|
715
|
+
# We skip the double-check here for efficiency
|
|
716
|
+
|
|
717
|
+
# Look up tool in registry
|
|
718
|
+
executor = self._tool_registry.get(tool_name)
|
|
719
|
+
if not executor:
|
|
720
|
+
error_msg = f"Tool '{tool_name}' not registered in kernel"
|
|
721
|
+
if self._flight_recorder and trace_id:
|
|
722
|
+
self._flight_recorder.log_error(trace_id, error_msg)
|
|
723
|
+
|
|
724
|
+
return SyscallResult(
|
|
725
|
+
success=False,
|
|
726
|
+
error_code=-404,
|
|
727
|
+
error_message=error_msg,
|
|
728
|
+
)
|
|
729
|
+
|
|
730
|
+
# Execute the tool
|
|
731
|
+
start_time = datetime.now(timezone.utc)
|
|
732
|
+
try:
|
|
733
|
+
# Check if executor is async
|
|
734
|
+
if asyncio.iscoroutinefunction(executor):
|
|
735
|
+
result = await executor(**tool_args)
|
|
736
|
+
else:
|
|
737
|
+
result = executor(**tool_args)
|
|
738
|
+
|
|
739
|
+
execution_time_ms = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
|
|
740
|
+
|
|
741
|
+
if self._flight_recorder and trace_id:
|
|
742
|
+
self._flight_recorder.log_success(trace_id, result, execution_time_ms)
|
|
743
|
+
|
|
744
|
+
logger.info(f"[Kernel] ALLOWED: {ctx.agent_id} executed {tool_name}")
|
|
745
|
+
|
|
746
|
+
return SyscallResult(
|
|
747
|
+
success=True,
|
|
748
|
+
return_value=result,
|
|
749
|
+
execution_time_ms=execution_time_ms,
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
except Exception as e:
|
|
753
|
+
execution_time_ms = (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
|
|
754
|
+
error_msg = f"{type(e).__name__}: {str(e)}"
|
|
755
|
+
|
|
756
|
+
if self._flight_recorder and trace_id:
|
|
757
|
+
self._flight_recorder.log_error(trace_id, error_msg)
|
|
758
|
+
|
|
759
|
+
logger.error(f"[Kernel] Tool execution failed: {error_msg}")
|
|
760
|
+
|
|
761
|
+
return SyscallResult(
|
|
762
|
+
success=False,
|
|
763
|
+
error_code=-500,
|
|
764
|
+
error_message=error_msg,
|
|
765
|
+
execution_time_ms=execution_time_ms,
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
def register_tool(
|
|
769
|
+
self,
|
|
770
|
+
tool_name: str,
|
|
771
|
+
executor: Callable[..., Any],
|
|
772
|
+
description: Optional[str] = None,
|
|
773
|
+
) -> None:
|
|
774
|
+
"""Register a tool in the kernel's tool registry.
|
|
775
|
+
|
|
776
|
+
Registered tools can be invoked by agents via the ``SYS_EXEC``
|
|
777
|
+
syscall. The kernel wraps every invocation with policy checks and
|
|
778
|
+
flight-recorder logging.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
tool_name: Unique name for the tool (e.g. ``"read_file"``,
|
|
782
|
+
``"web_search"``). If a tool with this name is already
|
|
783
|
+
registered, it is silently overwritten.
|
|
784
|
+
executor: A callable (sync or async) that implements the tool.
|
|
785
|
+
Arguments are passed as keyword arguments from the syscall's
|
|
786
|
+
``args["args"]`` dictionary.
|
|
787
|
+
description: Optional human-readable description, recorded in
|
|
788
|
+
the audit log for compliance.
|
|
789
|
+
"""
|
|
790
|
+
self._tool_registry[tool_name] = executor
|
|
791
|
+
logger.info(f"[Kernel] Registered tool: {tool_name}")
|
|
792
|
+
|
|
793
|
+
def unregister_tool(self, tool_name: str) -> bool:
|
|
794
|
+
"""Unregister a tool from the kernel.
|
|
795
|
+
|
|
796
|
+
Args:
|
|
797
|
+
tool_name: Name of the tool to remove.
|
|
798
|
+
|
|
799
|
+
Returns:
|
|
800
|
+
``True`` if the tool was found and removed, ``False`` if no
|
|
801
|
+
tool with that name was registered.
|
|
802
|
+
"""
|
|
803
|
+
if tool_name in self._tool_registry:
|
|
804
|
+
del self._tool_registry[tool_name]
|
|
805
|
+
logger.info(f"[Kernel] Unregistered tool: {tool_name}")
|
|
806
|
+
return True
|
|
807
|
+
return False
|
|
808
|
+
|
|
809
|
+
def list_tools(self) -> List[str]:
|
|
810
|
+
"""List all registered tool names.
|
|
811
|
+
|
|
812
|
+
Returns:
|
|
813
|
+
A list of tool name strings currently in the registry.
|
|
814
|
+
"""
|
|
815
|
+
return list(self._tool_registry.keys())
|
|
816
|
+
|
|
817
|
+
# ========== Kernel Control ==========
|
|
818
|
+
|
|
819
|
+
def panic(self, reason: str) -> None:
|
|
820
|
+
"""Trigger a kernel panic.
|
|
821
|
+
|
|
822
|
+
This is a catastrophic, unrecoverable failure that halts all agent
|
|
823
|
+
processing. The panic is recorded in the flight recorder, kernel
|
|
824
|
+
state transitions to ``KernelState.PANIC``, and an
|
|
825
|
+
``AgentKernelPanic`` exception is raised.
|
|
826
|
+
|
|
827
|
+
Args:
|
|
828
|
+
reason: Human-readable description of why the panic occurred.
|
|
829
|
+
|
|
830
|
+
Raises:
|
|
831
|
+
AgentKernelPanic: Always raised to unwind the call stack.
|
|
832
|
+
"""
|
|
833
|
+
self._state = KernelState.PANIC
|
|
834
|
+
self._metrics.kernel_panics += 1
|
|
835
|
+
|
|
836
|
+
logger.critical(f"[KERNEL PANIC] {reason}")
|
|
837
|
+
|
|
838
|
+
# Record to flight recorder using error API
|
|
839
|
+
if self._flight_recorder:
|
|
840
|
+
trace_id = self._flight_recorder.start_trace(
|
|
841
|
+
agent_id="kernel",
|
|
842
|
+
tool_name="kernel_panic",
|
|
843
|
+
tool_args={"reason": reason, "metrics": self._metrics.to_dict()},
|
|
844
|
+
)
|
|
845
|
+
self._flight_recorder.log_error(trace_id, f"KERNEL PANIC: {reason}")
|
|
846
|
+
|
|
847
|
+
raise AgentKernelPanic(
|
|
848
|
+
agent_id="kernel",
|
|
849
|
+
signal=SignalInfo(signal=AgentSignal.SIGKILL, reason=reason),
|
|
850
|
+
message=f"Kernel panic: {reason}",
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
def shutdown(self) -> None:
|
|
854
|
+
"""Perform a graceful kernel shutdown.
|
|
855
|
+
|
|
856
|
+
Sends ``SIGTERM`` to every registered agent, then destroys all
|
|
857
|
+
agent contexts and transitions the kernel to
|
|
858
|
+
``KernelState.SHUTDOWN``.
|
|
859
|
+
"""
|
|
860
|
+
logger.info("[Kernel] Initiating shutdown")
|
|
861
|
+
self._state = KernelState.SHUTDOWN
|
|
862
|
+
|
|
863
|
+
# Send SIGTERM to all agents
|
|
864
|
+
for agent_id, dispatcher in self._signal_dispatchers.items():
|
|
865
|
+
try:
|
|
866
|
+
dispatcher.signal(
|
|
867
|
+
AgentSignal.SIGTERM,
|
|
868
|
+
source="kernel",
|
|
869
|
+
reason="Kernel shutdown",
|
|
870
|
+
)
|
|
871
|
+
except Exception:
|
|
872
|
+
pass
|
|
873
|
+
|
|
874
|
+
# Clean up all agents
|
|
875
|
+
for agent_id in list(self._agents.keys()):
|
|
876
|
+
self.destroy_agent_context(agent_id)
|
|
877
|
+
|
|
878
|
+
logger.info("[Kernel] Shutdown complete")
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
@dataclass
|
|
882
|
+
class AgentContext:
|
|
883
|
+
"""
|
|
884
|
+
Context for an agent running in user space.
|
|
885
|
+
|
|
886
|
+
This is the agent's view of the kernel - it can only
|
|
887
|
+
interact with the kernel through syscalls.
|
|
888
|
+
"""
|
|
889
|
+
agent_id: str
|
|
890
|
+
kernel: KernelSpace
|
|
891
|
+
ring: ProtectionRing = ProtectionRing.RING_3_USER
|
|
892
|
+
|
|
893
|
+
# Runtime state
|
|
894
|
+
pid: int = field(default_factory=lambda: id(object()))
|
|
895
|
+
created: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
896
|
+
|
|
897
|
+
async def syscall(self, syscall_type: SyscallType, **kwargs) -> SyscallResult:
|
|
898
|
+
"""Make a system call to the kernel."""
|
|
899
|
+
request = SyscallRequest(
|
|
900
|
+
syscall=syscall_type,
|
|
901
|
+
args=kwargs,
|
|
902
|
+
caller_ring=self.ring,
|
|
903
|
+
)
|
|
904
|
+
return await self.kernel.syscall(request, self)
|
|
905
|
+
|
|
906
|
+
# ========== Convenience Methods ==========
|
|
907
|
+
|
|
908
|
+
async def read(self, path: str) -> bytes:
|
|
909
|
+
"""Read from VFS."""
|
|
910
|
+
result = await self.syscall(SyscallType.SYS_READ, path=path)
|
|
911
|
+
if not result.success:
|
|
912
|
+
raise IOError(result.error_message)
|
|
913
|
+
return result.return_value
|
|
914
|
+
|
|
915
|
+
async def write(self, path: str, data: Union[bytes, str]) -> int:
|
|
916
|
+
"""Write to VFS."""
|
|
917
|
+
result = await self.syscall(SyscallType.SYS_WRITE, path=path, data=data)
|
|
918
|
+
if not result.success:
|
|
919
|
+
raise IOError(result.error_message)
|
|
920
|
+
return result.return_value
|
|
921
|
+
|
|
922
|
+
async def exit(self, code: int = 0) -> None:
|
|
923
|
+
"""Request termination."""
|
|
924
|
+
await self.syscall(SyscallType.SYS_EXIT, code=code)
|
|
925
|
+
|
|
926
|
+
async def signal(
|
|
927
|
+
self,
|
|
928
|
+
target: str,
|
|
929
|
+
signal: AgentSignal,
|
|
930
|
+
reason: str = "",
|
|
931
|
+
) -> bool:
|
|
932
|
+
"""Send a signal to another agent."""
|
|
933
|
+
result = await self.syscall(
|
|
934
|
+
SyscallType.SYS_SIGNAL,
|
|
935
|
+
target=target,
|
|
936
|
+
signal=signal.value,
|
|
937
|
+
reason=reason,
|
|
938
|
+
)
|
|
939
|
+
return result.success
|
|
940
|
+
|
|
941
|
+
async def check_policy(self, action: str, target: str) -> bool:
|
|
942
|
+
"""Check if an action is allowed."""
|
|
943
|
+
result = await self.syscall(
|
|
944
|
+
SyscallType.SYS_CHECKPOLICY,
|
|
945
|
+
action=action,
|
|
946
|
+
target=target,
|
|
947
|
+
)
|
|
948
|
+
return result.return_value if result.success else False
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
@asynccontextmanager
|
|
952
|
+
async def user_space_execution(kernel: KernelSpace, agent_id: str):
|
|
953
|
+
"""
|
|
954
|
+
Context manager for user-space agent execution.
|
|
955
|
+
|
|
956
|
+
This provides isolation - if the agent crashes, the kernel survives.
|
|
957
|
+
|
|
958
|
+
Example:
|
|
959
|
+
kernel = KernelSpace()
|
|
960
|
+
|
|
961
|
+
async with user_space_execution(kernel, "agent-001") as ctx:
|
|
962
|
+
# Agent code runs here - isolated from kernel
|
|
963
|
+
await ctx.write("/mem/working/task.txt", "Hello World")
|
|
964
|
+
|
|
965
|
+
# If this raises, kernel catches it
|
|
966
|
+
result = await some_llm_call()
|
|
967
|
+
"""
|
|
968
|
+
ctx = kernel.create_agent_context(agent_id)
|
|
969
|
+
|
|
970
|
+
try:
|
|
971
|
+
yield ctx
|
|
972
|
+
except AgentKernelPanic:
|
|
973
|
+
# Kernel panics propagate up
|
|
974
|
+
raise
|
|
975
|
+
except Exception as e:
|
|
976
|
+
# User space crashes are contained
|
|
977
|
+
logger.error(f"[UserSpace] Agent {agent_id} crashed: {e}")
|
|
978
|
+
logger.debug(traceback.format_exc())
|
|
979
|
+
|
|
980
|
+
# Record crash
|
|
981
|
+
kernel._metrics.agent_crashes += 1
|
|
982
|
+
|
|
983
|
+
# Signal the agent (if still exists)
|
|
984
|
+
dispatcher = kernel._signal_dispatchers.get(agent_id)
|
|
985
|
+
if dispatcher:
|
|
986
|
+
try:
|
|
987
|
+
dispatcher.signal(
|
|
988
|
+
AgentSignal.SIGKILL,
|
|
989
|
+
source="kernel",
|
|
990
|
+
reason=f"Agent crash: {e}",
|
|
991
|
+
)
|
|
992
|
+
except AgentKernelPanic:
|
|
993
|
+
pass
|
|
994
|
+
finally:
|
|
995
|
+
# Clean up
|
|
996
|
+
kernel.destroy_agent_context(agent_id)
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
# ========== Factory Functions ==========
|
|
1000
|
+
|
|
1001
|
+
def create_kernel(
|
|
1002
|
+
policy_engine: Optional[Any] = None,
|
|
1003
|
+
flight_recorder: Optional[Any] = None,
|
|
1004
|
+
) -> KernelSpace:
|
|
1005
|
+
"""Create a new kernel instance."""
|
|
1006
|
+
return KernelSpace(
|
|
1007
|
+
policy_engine=policy_engine,
|
|
1008
|
+
flight_recorder=flight_recorder,
|
|
1009
|
+
)
|