agent_os_kernel 3.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_control_plane/__init__.py +662 -0
- agent_control_plane/a2a_adapter.py +543 -0
- agent_control_plane/adapter.py +417 -0
- agent_control_plane/agent_hibernation.py +394 -0
- agent_control_plane/agent_kernel.py +470 -0
- agent_control_plane/compliance.py +720 -0
- agent_control_plane/constraint_graphs.py +478 -0
- agent_control_plane/control_plane.py +854 -0
- agent_control_plane/example_executors.py +195 -0
- agent_control_plane/execution_engine.py +231 -0
- agent_control_plane/flight_recorder.py +846 -0
- agent_control_plane/governance_layer.py +435 -0
- agent_control_plane/hf_utils.py +563 -0
- agent_control_plane/interfaces/__init__.py +55 -0
- agent_control_plane/interfaces/kernel_interface.py +361 -0
- agent_control_plane/interfaces/plugin_interface.py +497 -0
- agent_control_plane/interfaces/protocol_interfaces.py +387 -0
- agent_control_plane/kernel_space.py +1009 -0
- agent_control_plane/langchain_adapter.py +424 -0
- agent_control_plane/lifecycle.py +3113 -0
- agent_control_plane/mcp_adapter.py +653 -0
- agent_control_plane/ml_safety.py +563 -0
- agent_control_plane/multimodal.py +727 -0
- agent_control_plane/mute_agent.py +422 -0
- agent_control_plane/observability.py +787 -0
- agent_control_plane/orchestrator.py +482 -0
- agent_control_plane/plugin_registry.py +750 -0
- agent_control_plane/policy_engine.py +954 -0
- agent_control_plane/process_isolation.py +777 -0
- agent_control_plane/shadow_mode.py +310 -0
- agent_control_plane/signals.py +493 -0
- agent_control_plane/supervisor_agents.py +430 -0
- agent_control_plane/time_travel_debugger.py +557 -0
- agent_control_plane/tool_registry.py +452 -0
- agent_control_plane/vfs.py +697 -0
- agent_kernel/__init__.py +69 -0
- agent_kernel/analyzer.py +435 -0
- agent_kernel/auditor.py +36 -0
- agent_kernel/completeness_auditor.py +237 -0
- agent_kernel/detector.py +203 -0
- agent_kernel/kernel.py +744 -0
- agent_kernel/memory_manager.py +85 -0
- agent_kernel/models.py +374 -0
- agent_kernel/nudge_mechanism.py +263 -0
- agent_kernel/outcome_analyzer.py +338 -0
- agent_kernel/patcher.py +582 -0
- agent_kernel/semantic_analyzer.py +316 -0
- agent_kernel/semantic_purge.py +349 -0
- agent_kernel/simulator.py +449 -0
- agent_kernel/teacher.py +85 -0
- agent_kernel/triage.py +152 -0
- agent_os/__init__.py +409 -0
- agent_os/_adversarial_impl.py +200 -0
- agent_os/_circuit_breaker_impl.py +232 -0
- agent_os/_mcp_metrics.py +193 -0
- agent_os/adversarial.py +20 -0
- agent_os/agents_compat.py +490 -0
- agent_os/audit_logger.py +135 -0
- agent_os/base_agent.py +651 -0
- agent_os/circuit_breaker.py +34 -0
- agent_os/cli/__init__.py +659 -0
- agent_os/cli/cmd_audit.py +128 -0
- agent_os/cli/cmd_init.py +152 -0
- agent_os/cli/cmd_policy.py +41 -0
- agent_os/cli/cmd_policy_gen.py +180 -0
- agent_os/cli/cmd_validate.py +258 -0
- agent_os/cli/mcp_scan.py +265 -0
- agent_os/cli/output.py +192 -0
- agent_os/cli/policy_checker.py +330 -0
- agent_os/compat.py +74 -0
- agent_os/constraint_graph.py +234 -0
- agent_os/content_governance.py +140 -0
- agent_os/context_budget.py +305 -0
- agent_os/credential_redactor.py +224 -0
- agent_os/diff_policy.py +89 -0
- agent_os/egress_policy.py +159 -0
- agent_os/escalation.py +276 -0
- agent_os/event_bus.py +124 -0
- agent_os/exceptions.py +180 -0
- agent_os/execution_context_policy.py +141 -0
- agent_os/github_enterprise.py +96 -0
- agent_os/health.py +20 -0
- agent_os/integrations/__init__.py +279 -0
- agent_os/integrations/a2a_adapter.py +279 -0
- agent_os/integrations/agent_lightning/__init__.py +30 -0
- agent_os/integrations/anthropic_adapter.py +420 -0
- agent_os/integrations/autogen_adapter.py +620 -0
- agent_os/integrations/base.py +1137 -0
- agent_os/integrations/compat.py +229 -0
- agent_os/integrations/config.py +98 -0
- agent_os/integrations/conversation_guardian.py +957 -0
- agent_os/integrations/crewai_adapter.py +467 -0
- agent_os/integrations/drift_detector.py +425 -0
- agent_os/integrations/dry_run.py +124 -0
- agent_os/integrations/escalation.py +582 -0
- agent_os/integrations/gemini_adapter.py +364 -0
- agent_os/integrations/google_adk_adapter.py +633 -0
- agent_os/integrations/guardrails_adapter.py +394 -0
- agent_os/integrations/health.py +197 -0
- agent_os/integrations/langchain_adapter.py +654 -0
- agent_os/integrations/llamafirewall.py +343 -0
- agent_os/integrations/llamaindex_adapter.py +188 -0
- agent_os/integrations/logging.py +191 -0
- agent_os/integrations/maf_adapter.py +631 -0
- agent_os/integrations/mistral_adapter.py +365 -0
- agent_os/integrations/openai_adapter.py +816 -0
- agent_os/integrations/openai_agents_sdk.py +406 -0
- agent_os/integrations/policy_compose.py +171 -0
- agent_os/integrations/profiling.py +144 -0
- agent_os/integrations/pydantic_ai_adapter.py +420 -0
- agent_os/integrations/rate_limiter.py +130 -0
- agent_os/integrations/rbac.py +143 -0
- agent_os/integrations/registry.py +113 -0
- agent_os/integrations/scope_guard.py +303 -0
- agent_os/integrations/semantic_kernel_adapter.py +769 -0
- agent_os/integrations/smolagents_adapter.py +629 -0
- agent_os/integrations/templates.py +178 -0
- agent_os/integrations/token_budget.py +134 -0
- agent_os/integrations/tool_aliases.py +190 -0
- agent_os/integrations/webhooks.py +177 -0
- agent_os/lite.py +208 -0
- agent_os/mcp_gateway.py +385 -0
- agent_os/mcp_message_signer.py +273 -0
- agent_os/mcp_protocols.py +161 -0
- agent_os/mcp_response_scanner.py +232 -0
- agent_os/mcp_security.py +924 -0
- agent_os/mcp_session_auth.py +231 -0
- agent_os/mcp_sliding_rate_limiter.py +184 -0
- agent_os/memory_guard.py +409 -0
- agent_os/metrics.py +134 -0
- agent_os/mute.py +428 -0
- agent_os/mute_agent.py +209 -0
- agent_os/policies/__init__.py +77 -0
- agent_os/policies/async_evaluator.py +275 -0
- agent_os/policies/backends.py +670 -0
- agent_os/policies/bridge.py +169 -0
- agent_os/policies/budget.py +85 -0
- agent_os/policies/cli.py +294 -0
- agent_os/policies/conflict_resolution.py +270 -0
- agent_os/policies/data_classification.py +252 -0
- agent_os/policies/evaluator.py +239 -0
- agent_os/policies/policy_schema.json +228 -0
- agent_os/policies/rate_limiting.py +145 -0
- agent_os/policies/schema.py +115 -0
- agent_os/policies/shared.py +331 -0
- agent_os/prompt_injection.py +694 -0
- agent_os/providers.py +182 -0
- agent_os/py.typed +0 -0
- agent_os/retry.py +81 -0
- agent_os/reversibility.py +251 -0
- agent_os/sandbox.py +432 -0
- agent_os/sandbox_provider.py +140 -0
- agent_os/secure_codegen.py +525 -0
- agent_os/security_skills.py +538 -0
- agent_os/semantic_policy.py +422 -0
- agent_os/server/__init__.py +15 -0
- agent_os/server/__main__.py +25 -0
- agent_os/server/app.py +277 -0
- agent_os/server/models.py +104 -0
- agent_os/shift_left_metrics.py +130 -0
- agent_os/stateless.py +742 -0
- agent_os/supervisor.py +148 -0
- agent_os/task_outcome.py +148 -0
- agent_os/transparency.py +181 -0
- agent_os/trust_root.py +128 -0
- agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
- agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
- agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
- agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
- agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
- agent_os_observability/__init__.py +27 -0
- agent_os_observability/dashboards.py +898 -0
- agent_os_observability/metrics.py +398 -0
- agent_os_observability/server.py +223 -0
- agent_os_observability/tracer.py +232 -0
- agent_primitives/__init__.py +24 -0
- agent_primitives/failures.py +84 -0
- agent_primitives/py.typed +0 -0
- amb_core/__init__.py +177 -0
- amb_core/adapters/__init__.py +57 -0
- amb_core/adapters/aws_sqs_broker.py +376 -0
- amb_core/adapters/azure_servicebus_broker.py +340 -0
- amb_core/adapters/kafka_broker.py +260 -0
- amb_core/adapters/nats_broker.py +285 -0
- amb_core/adapters/rabbitmq_broker.py +235 -0
- amb_core/adapters/redis_broker.py +262 -0
- amb_core/broker.py +145 -0
- amb_core/bus.py +481 -0
- amb_core/cloudevents.py +509 -0
- amb_core/dlq.py +345 -0
- amb_core/hf_utils.py +536 -0
- amb_core/memory_broker.py +410 -0
- amb_core/models.py +141 -0
- amb_core/persistence.py +529 -0
- amb_core/schema.py +294 -0
- amb_core/tracing.py +358 -0
- atr/__init__.py +640 -0
- atr/access.py +348 -0
- atr/composition.py +645 -0
- atr/decorator.py +357 -0
- atr/executor.py +384 -0
- atr/health.py +557 -0
- atr/hf_utils.py +449 -0
- atr/injection.py +422 -0
- atr/metrics.py +440 -0
- atr/policies.py +403 -0
- atr/py.typed +2 -0
- atr/registry.py +452 -0
- atr/schema.py +480 -0
- atr/tools/safe/__init__.py +75 -0
- atr/tools/safe/calculator.py +467 -0
- atr/tools/safe/datetime_tool.py +443 -0
- atr/tools/safe/file_reader.py +402 -0
- atr/tools/safe/http_client.py +316 -0
- atr/tools/safe/json_parser.py +374 -0
- atr/tools/safe/text_tool.py +537 -0
- atr/tools/safe/toolkit.py +175 -0
- caas/__init__.py +162 -0
- caas/api/__init__.py +7 -0
- caas/api/server.py +1328 -0
- caas/caching.py +834 -0
- caas/cli.py +210 -0
- caas/conversation.py +223 -0
- caas/decay.py +72 -0
- caas/detection/__init__.py +9 -0
- caas/detection/detector.py +238 -0
- caas/enrichment.py +130 -0
- caas/gateway/__init__.py +27 -0
- caas/gateway/trust_gateway.py +474 -0
- caas/hf_utils.py +479 -0
- caas/ingestion/__init__.py +23 -0
- caas/ingestion/processors.py +253 -0
- caas/ingestion/structure_parser.py +188 -0
- caas/models.py +356 -0
- caas/pragmatic_truth.py +444 -0
- caas/routing/__init__.py +10 -0
- caas/routing/heuristic_router.py +58 -0
- caas/storage/__init__.py +9 -0
- caas/storage/store.py +389 -0
- caas/triad.py +213 -0
- caas/tuning/__init__.py +9 -0
- caas/tuning/tuner.py +329 -0
- caas/vfs/__init__.py +14 -0
- caas/vfs/filesystem.py +452 -0
- cmvk/__init__.py +218 -0
- cmvk/audit.py +402 -0
- cmvk/benchmarks.py +478 -0
- cmvk/constitutional.py +904 -0
- cmvk/hf_utils.py +301 -0
- cmvk/metrics.py +473 -0
- cmvk/profiles.py +300 -0
- cmvk/py.typed +0 -0
- cmvk/types.py +12 -0
- cmvk/verification.py +956 -0
- emk/__init__.py +89 -0
- emk/causal.py +352 -0
- emk/hf_utils.py +421 -0
- emk/indexer.py +83 -0
- emk/py.typed +0 -0
- emk/schema.py +204 -0
- emk/sleep_cycle.py +347 -0
- emk/store.py +281 -0
- iatp/__init__.py +166 -0
- iatp/attestation.py +461 -0
- iatp/cli.py +317 -0
- iatp/hf_utils.py +472 -0
- iatp/ipc_pipes.py +580 -0
- iatp/main.py +412 -0
- iatp/models/__init__.py +447 -0
- iatp/policy_engine.py +337 -0
- iatp/py.typed +2 -0
- iatp/recovery.py +321 -0
- iatp/security/__init__.py +270 -0
- iatp/sidecar/__init__.py +519 -0
- iatp/telemetry/__init__.py +164 -0
- iatp/tests/__init__.py +1 -0
- iatp/tests/test_attestation.py +370 -0
- iatp/tests/test_cli.py +131 -0
- iatp/tests/test_ed25519_attestation.py +211 -0
- iatp/tests/test_models.py +130 -0
- iatp/tests/test_policy_engine.py +347 -0
- iatp/tests/test_recovery.py +281 -0
- iatp/tests/test_security.py +222 -0
- iatp/tests/test_sidecar.py +167 -0
- iatp/tests/test_telemetry.py +175 -0
- mcp_kernel_server/__init__.py +28 -0
- mcp_kernel_server/cli.py +274 -0
- mcp_kernel_server/resources.py +217 -0
- mcp_kernel_server/server.py +564 -0
- mcp_kernel_server/tools.py +1174 -0
- mute_agent/__init__.py +68 -0
- mute_agent/core/__init__.py +1 -0
- mute_agent/core/execution_agent.py +166 -0
- mute_agent/core/handshake_protocol.py +201 -0
- mute_agent/core/reasoning_agent.py +238 -0
- mute_agent/knowledge_graph/__init__.py +1 -0
- mute_agent/knowledge_graph/graph_elements.py +65 -0
- mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
- mute_agent/knowledge_graph/subgraph.py +224 -0
- mute_agent/listener/__init__.py +43 -0
- mute_agent/listener/adapters/__init__.py +31 -0
- mute_agent/listener/adapters/base_adapter.py +189 -0
- mute_agent/listener/adapters/caas_adapter.py +344 -0
- mute_agent/listener/adapters/control_plane_adapter.py +436 -0
- mute_agent/listener/adapters/iatp_adapter.py +332 -0
- mute_agent/listener/adapters/scak_adapter.py +251 -0
- mute_agent/listener/listener.py +610 -0
- mute_agent/listener/state_observer.py +436 -0
- mute_agent/listener/threshold_config.py +313 -0
- mute_agent/super_system/__init__.py +1 -0
- mute_agent/super_system/router.py +204 -0
- mute_agent/visualization/__init__.py +10 -0
- mute_agent/visualization/graph_debugger.py +502 -0
- nexus/README.md +60 -0
- nexus/__init__.py +51 -0
- nexus/arbiter.py +359 -0
- nexus/client.py +466 -0
- nexus/dmz.py +444 -0
- nexus/escrow.py +430 -0
- nexus/exceptions.py +286 -0
- nexus/pyproject.toml +36 -0
- nexus/registry.py +393 -0
- nexus/reputation.py +425 -0
- nexus/schemas/__init__.py +51 -0
- nexus/schemas/compliance.py +276 -0
- nexus/schemas/escrow.py +251 -0
- nexus/schemas/manifest.py +225 -0
- nexus/schemas/receipt.py +208 -0
- nexus/tests/__init__.py +0 -0
- nexus/tests/conftest.py +146 -0
- nexus/tests/test_arbiter.py +192 -0
- nexus/tests/test_dmz.py +194 -0
- nexus/tests/test_escrow.py +276 -0
- nexus/tests/test_exceptions.py +225 -0
- nexus/tests/test_registry.py +232 -0
- nexus/tests/test_reputation.py +328 -0
- nexus/tests/test_schemas.py +295 -0
|
@@ -0,0 +1,777 @@
|
|
|
1
|
+
# Copyright (c) Microsoft Corporation.
|
|
2
|
+
# Licensed under the MIT License.
|
|
3
|
+
"""
|
|
4
|
+
Process-Level Agent Isolation
|
|
5
|
+
|
|
6
|
+
Provides real OS-level process isolation for agent execution, addressing
|
|
7
|
+
the limitation that in-process SIGKILL (AgentKernelPanic) is catchable
|
|
8
|
+
by a broad ``except BaseException`` in Python.
|
|
9
|
+
|
|
10
|
+
This module runs agents in separate processes where ``os.kill(SIGKILL)``
|
|
11
|
+
is truly non-catchable by agent code.
|
|
12
|
+
|
|
13
|
+
Architecture::
|
|
14
|
+
|
|
15
|
+
+-------------------------------------------+
|
|
16
|
+
| Supervisor Process (Kernel Space) |
|
|
17
|
+
| - ProcessIsolationManager |
|
|
18
|
+
| - IsolatedSignalDispatcher |
|
|
19
|
+
| |
|
|
20
|
+
| +------------+ +------------+ |
|
|
21
|
+
| | Agent A | | Agent B | |
|
|
22
|
+
| | (Process) | | (Process) | |
|
|
23
|
+
| +------------+ +------------+ |
|
|
24
|
+
+-------------------------------------------+
|
|
25
|
+
|
|
26
|
+
Isolation Levels:
|
|
27
|
+
COOPERATIVE -- In-process, exception-based (current behaviour)
|
|
28
|
+
PROCESS -- Separate process via multiprocessing.Process
|
|
29
|
+
SUBPROCESS -- Separate process via subprocess.Popen
|
|
30
|
+
|
|
31
|
+
Security Model:
|
|
32
|
+
- In-process signals = cooperative path (can be caught)
|
|
33
|
+
- Process-level kill = enforcement path (non-catchable)
|
|
34
|
+
|
|
35
|
+
See Also:
|
|
36
|
+
signals.py -- In-process cooperative signal handling
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
from __future__ import annotations
|
|
40
|
+
|
|
41
|
+
import base64
|
|
42
|
+
import hashlib
|
|
43
|
+
import hmac
|
|
44
|
+
import json
|
|
45
|
+
import logging
|
|
46
|
+
import multiprocessing
|
|
47
|
+
import os
|
|
48
|
+
import signal as _signal
|
|
49
|
+
import subprocess
|
|
50
|
+
import sys
|
|
51
|
+
import threading
|
|
52
|
+
import time
|
|
53
|
+
from dataclasses import dataclass, field
|
|
54
|
+
from enum import Enum
|
|
55
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
56
|
+
|
|
57
|
+
from .signals import AgentKernelPanic, AgentSignal, SignalDispatcher, SignalInfo
|
|
58
|
+
|
|
59
|
+
logger = logging.getLogger(__name__)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
# ================================================================
|
|
63
|
+
# Enums
|
|
64
|
+
# ================================================================
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class IsolationLevel(str, Enum):
|
|
68
|
+
"""Level of process isolation for agent execution."""
|
|
69
|
+
|
|
70
|
+
COOPERATIVE = "cooperative" # In-process, exception-based (current behaviour)
|
|
71
|
+
PROCESS = "process" # Separate process via multiprocessing
|
|
72
|
+
SUBPROCESS = "subprocess" # Separate process via subprocess.Popen
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class AgentProcessState(str, Enum):
|
|
76
|
+
"""Lifecycle state of an isolated agent process."""
|
|
77
|
+
|
|
78
|
+
PENDING = "pending"
|
|
79
|
+
RUNNING = "running"
|
|
80
|
+
COMPLETED = "completed"
|
|
81
|
+
TERMINATED = "terminated"
|
|
82
|
+
FAILED = "failed"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
# ================================================================
|
|
86
|
+
# Result dataclass
|
|
87
|
+
# ================================================================
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class AgentProcessResult:
|
|
92
|
+
"""Result from an isolated agent process."""
|
|
93
|
+
|
|
94
|
+
agent_id: str
|
|
95
|
+
state: AgentProcessState
|
|
96
|
+
return_value: Any = None
|
|
97
|
+
error: Optional[str] = None
|
|
98
|
+
exit_code: Optional[int] = None
|
|
99
|
+
duration_seconds: float = 0.0
|
|
100
|
+
terminated_by_signal: bool = False
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ================================================================
|
|
104
|
+
# Worker function (module top-level so multiprocessing can pickle it)
|
|
105
|
+
# ================================================================
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _agent_worker(
|
|
109
|
+
target: Callable,
|
|
110
|
+
args: tuple,
|
|
111
|
+
kwargs: dict,
|
|
112
|
+
result_queue: multiprocessing.Queue,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Execute *target* inside the child process, sending the outcome via *result_queue*."""
|
|
115
|
+
start = time.monotonic()
|
|
116
|
+
try:
|
|
117
|
+
rv = target(*args, **(kwargs or {}))
|
|
118
|
+
result_queue.put({
|
|
119
|
+
"state": "completed",
|
|
120
|
+
"return_value": rv,
|
|
121
|
+
"error": None,
|
|
122
|
+
"exit_code": 0,
|
|
123
|
+
"duration": time.monotonic() - start,
|
|
124
|
+
})
|
|
125
|
+
except SystemExit as exc:
|
|
126
|
+
code = exc.code if isinstance(exc.code, int) else 1
|
|
127
|
+
result_queue.put({
|
|
128
|
+
"state": "failed",
|
|
129
|
+
"return_value": None,
|
|
130
|
+
"error": f"SystemExit({exc.code})",
|
|
131
|
+
"exit_code": code,
|
|
132
|
+
"duration": time.monotonic() - start,
|
|
133
|
+
})
|
|
134
|
+
except BaseException as exc:
|
|
135
|
+
result_queue.put({
|
|
136
|
+
"state": "failed",
|
|
137
|
+
"return_value": None,
|
|
138
|
+
"error": f"{type(exc).__name__}: {exc}",
|
|
139
|
+
"exit_code": 1,
|
|
140
|
+
"duration": time.monotonic() - start,
|
|
141
|
+
})
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# Bootstrap script executed inside a ``subprocess.Popen`` child.
|
|
145
|
+
# The parent sends: base64(hmac_key + b"|" + hmac_sig + b"|" + json_payload)
|
|
146
|
+
# The child verifies the HMAC before deserializing.
|
|
147
|
+
# The JSON payload contains {"module": "...", "qualname": "...", "args": [...], "kwargs": {...}}
|
|
148
|
+
# and the target function is resolved via importlib, avoiding pickle deserialization.
|
|
149
|
+
_SUBPROCESS_BOOTSTRAP = """\
|
|
150
|
+
import base64, hashlib, hmac, importlib, json, sys, time
|
|
151
|
+
raw = base64.b64decode(sys.stdin.buffer.read())
|
|
152
|
+
parts = raw.split(b"|", 2)
|
|
153
|
+
if len(parts) != 3:
|
|
154
|
+
json.dump({"state": "failed", "error": "Invalid bootstrap payload format", "exit_code": 1, "duration": 0}, sys.stdout)
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
_key, _expected_sig, _payload = parts
|
|
157
|
+
_actual_sig = hmac.new(_key, _payload, hashlib.sha256).digest()
|
|
158
|
+
if not hmac.compare_digest(_actual_sig, _expected_sig):
|
|
159
|
+
json.dump({"state": "failed", "error": "HMAC verification failed — payload tampered", "exit_code": 1, "duration": 0}, sys.stdout)
|
|
160
|
+
sys.exit(1)
|
|
161
|
+
_data = json.loads(_payload)
|
|
162
|
+
_mod = importlib.import_module(_data["module"])
|
|
163
|
+
_obj = _mod
|
|
164
|
+
for _attr in _data["qualname"].split("."):
|
|
165
|
+
_obj = getattr(_obj, _attr)
|
|
166
|
+
target = _obj
|
|
167
|
+
args = tuple(_data.get("args", ()))
|
|
168
|
+
kwargs = _data.get("kwargs", {})
|
|
169
|
+
_start = time.monotonic()
|
|
170
|
+
try:
|
|
171
|
+
_rv = target(*args, **kwargs)
|
|
172
|
+
json.dump({
|
|
173
|
+
"state": "completed",
|
|
174
|
+
"return_value": repr(_rv),
|
|
175
|
+
"error": None,
|
|
176
|
+
"exit_code": 0,
|
|
177
|
+
"duration": time.monotonic() - _start,
|
|
178
|
+
}, sys.stdout)
|
|
179
|
+
except SystemExit as _e:
|
|
180
|
+
json.dump({
|
|
181
|
+
"state": "failed",
|
|
182
|
+
"error": f"SystemExit({_e.code})",
|
|
183
|
+
"exit_code": getattr(_e, "code", 1),
|
|
184
|
+
"duration": time.monotonic() - _start,
|
|
185
|
+
}, sys.stdout)
|
|
186
|
+
except Exception as _e:
|
|
187
|
+
json.dump({
|
|
188
|
+
"state": "failed",
|
|
189
|
+
"error": f"{type(_e).__name__}: {_e}",
|
|
190
|
+
"exit_code": 1,
|
|
191
|
+
"duration": time.monotonic() - _start,
|
|
192
|
+
}, sys.stdout)
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
# ================================================================
|
|
197
|
+
# AgentProcessHandle
|
|
198
|
+
# ================================================================
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
@dataclass
|
|
202
|
+
class AgentProcessHandle:
|
|
203
|
+
"""Handle to a running agent process.
|
|
204
|
+
|
|
205
|
+
Provides real process-level control including non-catchable termination.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
agent_id: str
|
|
209
|
+
pid: Optional[int] = None
|
|
210
|
+
state: AgentProcessState = AgentProcessState.PENDING
|
|
211
|
+
isolation_level: IsolationLevel = IsolationLevel.PROCESS
|
|
212
|
+
|
|
213
|
+
# ---- internal fields (hidden from repr) ----
|
|
214
|
+
_process: Any = field(default=None, repr=False)
|
|
215
|
+
_result_queue: Any = field(default=None, repr=False)
|
|
216
|
+
_start_time: float = field(default=0.0, repr=False)
|
|
217
|
+
_result: Optional[AgentProcessResult] = field(default=None, repr=False)
|
|
218
|
+
_killed: bool = field(default=False, repr=False)
|
|
219
|
+
|
|
220
|
+
# --------------------------------------------------------------
|
|
221
|
+
# Public API
|
|
222
|
+
# --------------------------------------------------------------
|
|
223
|
+
|
|
224
|
+
def terminate(self) -> bool:
|
|
225
|
+
"""Send SIGTERM (graceful shutdown request)."""
|
|
226
|
+
if self._process is None or not self.is_alive():
|
|
227
|
+
return False
|
|
228
|
+
try:
|
|
229
|
+
self._process.terminate()
|
|
230
|
+
self.state = AgentProcessState.TERMINATED
|
|
231
|
+
elapsed = time.monotonic() - self._start_time
|
|
232
|
+
self._result = AgentProcessResult(
|
|
233
|
+
agent_id=self.agent_id,
|
|
234
|
+
state=AgentProcessState.TERMINATED,
|
|
235
|
+
error="Terminated by SIGTERM",
|
|
236
|
+
exit_code=-15 if os.name != "nt" else 1,
|
|
237
|
+
duration_seconds=elapsed,
|
|
238
|
+
terminated_by_signal=True,
|
|
239
|
+
)
|
|
240
|
+
logger.info(
|
|
241
|
+
f"[ProcessIsolation] SIGTERM -> agent {self.agent_id} "
|
|
242
|
+
f"(pid={self.pid})"
|
|
243
|
+
)
|
|
244
|
+
return True
|
|
245
|
+
except (OSError, ProcessLookupError) as exc:
|
|
246
|
+
logger.warning(
|
|
247
|
+
f"[ProcessIsolation] terminate failed for "
|
|
248
|
+
f"{self.agent_id}: {exc}"
|
|
249
|
+
)
|
|
250
|
+
return False
|
|
251
|
+
|
|
252
|
+
def kill(self) -> bool:
|
|
253
|
+
"""Send real OS SIGKILL -- truly non-catchable.
|
|
254
|
+
|
|
255
|
+
On Unix: ``os.kill(pid, signal.SIGKILL)``
|
|
256
|
+
On Windows: ``TerminateProcess`` via ``process.kill()``
|
|
257
|
+
This is the real deal -- the OS scheduler handles it.
|
|
258
|
+
"""
|
|
259
|
+
if self._process is None or not self.is_alive():
|
|
260
|
+
return False
|
|
261
|
+
|
|
262
|
+
# Flag early so a concurrent wait() sees it immediately.
|
|
263
|
+
self._killed = True
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
if os.name != "nt" and self.pid is not None:
|
|
267
|
+
os.kill(self.pid, _signal.SIGKILL)
|
|
268
|
+
else:
|
|
269
|
+
# Windows: process.kill() calls TerminateProcess
|
|
270
|
+
self._process.kill()
|
|
271
|
+
|
|
272
|
+
# Wait briefly for the OS to reap the process.
|
|
273
|
+
if self.isolation_level == IsolationLevel.PROCESS:
|
|
274
|
+
self._process.join(timeout=5)
|
|
275
|
+
elif self.isolation_level == IsolationLevel.SUBPROCESS:
|
|
276
|
+
try:
|
|
277
|
+
self._process.wait(timeout=5)
|
|
278
|
+
except subprocess.TimeoutExpired:
|
|
279
|
+
pass
|
|
280
|
+
|
|
281
|
+
self.state = AgentProcessState.TERMINATED
|
|
282
|
+
elapsed = time.monotonic() - self._start_time
|
|
283
|
+
self._result = AgentProcessResult(
|
|
284
|
+
agent_id=self.agent_id,
|
|
285
|
+
state=AgentProcessState.TERMINATED,
|
|
286
|
+
error="Killed by SIGKILL",
|
|
287
|
+
exit_code=-9 if os.name != "nt" else 1,
|
|
288
|
+
duration_seconds=elapsed,
|
|
289
|
+
terminated_by_signal=True,
|
|
290
|
+
)
|
|
291
|
+
logger.critical(
|
|
292
|
+
f"[ProcessIsolation] SIGKILL -> agent {self.agent_id} "
|
|
293
|
+
f"(pid={self.pid})"
|
|
294
|
+
)
|
|
295
|
+
return True
|
|
296
|
+
except (OSError, ProcessLookupError) as exc:
|
|
297
|
+
logger.warning(
|
|
298
|
+
f"[ProcessIsolation] kill failed for {self.agent_id}: {exc}"
|
|
299
|
+
)
|
|
300
|
+
return False
|
|
301
|
+
|
|
302
|
+
def is_alive(self) -> bool:
|
|
303
|
+
"""Check whether the underlying OS process is still running."""
|
|
304
|
+
if self._process is None:
|
|
305
|
+
return False
|
|
306
|
+
if self.isolation_level == IsolationLevel.PROCESS:
|
|
307
|
+
return self._process.is_alive()
|
|
308
|
+
if self.isolation_level == IsolationLevel.SUBPROCESS:
|
|
309
|
+
return self._process.poll() is None
|
|
310
|
+
return False
|
|
311
|
+
|
|
312
|
+
def wait(self, timeout: Optional[float] = None) -> AgentProcessResult:
|
|
313
|
+
"""Block until the process finishes (or *timeout* expires) and return its result."""
|
|
314
|
+
# Fast path -- already resolved.
|
|
315
|
+
if self._result is not None and not self.is_alive():
|
|
316
|
+
return self._result
|
|
317
|
+
|
|
318
|
+
if self._process is None:
|
|
319
|
+
return AgentProcessResult(
|
|
320
|
+
agent_id=self.agent_id,
|
|
321
|
+
state=AgentProcessState.FAILED,
|
|
322
|
+
error="No process to wait on",
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if self.isolation_level == IsolationLevel.PROCESS:
|
|
326
|
+
return self._wait_process(timeout)
|
|
327
|
+
|
|
328
|
+
if self.isolation_level == IsolationLevel.SUBPROCESS:
|
|
329
|
+
return self._wait_subprocess(timeout)
|
|
330
|
+
|
|
331
|
+
return AgentProcessResult(
|
|
332
|
+
agent_id=self.agent_id,
|
|
333
|
+
state=AgentProcessState.FAILED,
|
|
334
|
+
error=f"Unsupported isolation level: {self.isolation_level}",
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# --------------------------------------------------------------
|
|
338
|
+
# Internal helpers
|
|
339
|
+
# --------------------------------------------------------------
|
|
340
|
+
|
|
341
|
+
def _wait_process(self, timeout: Optional[float]) -> AgentProcessResult:
|
|
342
|
+
"""Wait logic for ``IsolationLevel.PROCESS``."""
|
|
343
|
+
self._process.join(timeout=timeout)
|
|
344
|
+
|
|
345
|
+
if self._process.is_alive():
|
|
346
|
+
# Timed out -- forcibly kill.
|
|
347
|
+
self.kill()
|
|
348
|
+
|
|
349
|
+
# Killed (by us, or by an external timeout timer)?
|
|
350
|
+
if self._result is not None:
|
|
351
|
+
return self._result
|
|
352
|
+
|
|
353
|
+
return self._read_queue_result()
|
|
354
|
+
|
|
355
|
+
def _wait_subprocess(self, timeout: Optional[float]) -> AgentProcessResult:
|
|
356
|
+
"""Wait logic for ``IsolationLevel.SUBPROCESS``."""
|
|
357
|
+
try:
|
|
358
|
+
stdout, stderr = self._process.communicate(timeout=timeout)
|
|
359
|
+
except subprocess.TimeoutExpired:
|
|
360
|
+
self.kill()
|
|
361
|
+
if self._result is not None:
|
|
362
|
+
return self._result
|
|
363
|
+
elapsed = time.monotonic() - self._start_time
|
|
364
|
+
self._result = AgentProcessResult(
|
|
365
|
+
agent_id=self.agent_id,
|
|
366
|
+
state=AgentProcessState.TERMINATED,
|
|
367
|
+
error="Timed out",
|
|
368
|
+
duration_seconds=elapsed,
|
|
369
|
+
terminated_by_signal=True,
|
|
370
|
+
)
|
|
371
|
+
self.state = AgentProcessState.TERMINATED
|
|
372
|
+
return self._result
|
|
373
|
+
|
|
374
|
+
if self._result is not None:
|
|
375
|
+
return self._result
|
|
376
|
+
|
|
377
|
+
return self._parse_subprocess_output(stdout, stderr)
|
|
378
|
+
|
|
379
|
+
def _read_queue_result(self) -> AgentProcessResult:
|
|
380
|
+
"""Read the result dict from the multiprocessing Queue."""
|
|
381
|
+
# Killed by another thread (e.g. timeout timer)?
|
|
382
|
+
if self._killed:
|
|
383
|
+
if self._result is not None:
|
|
384
|
+
return self._result
|
|
385
|
+
elapsed = time.monotonic() - self._start_time
|
|
386
|
+
self._result = AgentProcessResult(
|
|
387
|
+
agent_id=self.agent_id,
|
|
388
|
+
state=AgentProcessState.TERMINATED,
|
|
389
|
+
error="Killed",
|
|
390
|
+
exit_code=-9 if os.name != "nt" else 1,
|
|
391
|
+
duration_seconds=elapsed,
|
|
392
|
+
terminated_by_signal=True,
|
|
393
|
+
)
|
|
394
|
+
self.state = AgentProcessState.TERMINATED
|
|
395
|
+
return self._result
|
|
396
|
+
|
|
397
|
+
try:
|
|
398
|
+
if self._result_queue is not None and not self._result_queue.empty():
|
|
399
|
+
data = self._result_queue.get_nowait()
|
|
400
|
+
state = (
|
|
401
|
+
AgentProcessState.COMPLETED
|
|
402
|
+
if data["state"] == "completed"
|
|
403
|
+
else AgentProcessState.FAILED
|
|
404
|
+
)
|
|
405
|
+
self._result = AgentProcessResult(
|
|
406
|
+
agent_id=self.agent_id,
|
|
407
|
+
state=state,
|
|
408
|
+
return_value=data.get("return_value"),
|
|
409
|
+
error=data.get("error"),
|
|
410
|
+
exit_code=data.get("exit_code"),
|
|
411
|
+
duration_seconds=data.get("duration", 0.0),
|
|
412
|
+
)
|
|
413
|
+
else:
|
|
414
|
+
# Process exited without writing to the queue.
|
|
415
|
+
elapsed = time.monotonic() - self._start_time
|
|
416
|
+
exit_code = getattr(self._process, "exitcode", None)
|
|
417
|
+
if exit_code is not None and exit_code < 0:
|
|
418
|
+
self._result = AgentProcessResult(
|
|
419
|
+
agent_id=self.agent_id,
|
|
420
|
+
state=AgentProcessState.TERMINATED,
|
|
421
|
+
error=f"Terminated by signal {-exit_code}",
|
|
422
|
+
exit_code=exit_code,
|
|
423
|
+
duration_seconds=elapsed,
|
|
424
|
+
terminated_by_signal=True,
|
|
425
|
+
)
|
|
426
|
+
else:
|
|
427
|
+
self._result = AgentProcessResult(
|
|
428
|
+
agent_id=self.agent_id,
|
|
429
|
+
state=AgentProcessState.FAILED,
|
|
430
|
+
error=f"Process exited with code {exit_code}",
|
|
431
|
+
exit_code=exit_code,
|
|
432
|
+
duration_seconds=elapsed,
|
|
433
|
+
)
|
|
434
|
+
except Exception as exc: # noqa: BLE001
|
|
435
|
+
elapsed = time.monotonic() - self._start_time
|
|
436
|
+
self._result = AgentProcessResult(
|
|
437
|
+
agent_id=self.agent_id,
|
|
438
|
+
state=AgentProcessState.FAILED,
|
|
439
|
+
error=str(exc),
|
|
440
|
+
duration_seconds=elapsed,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
self.state = self._result.state
|
|
444
|
+
return self._result
|
|
445
|
+
|
|
446
|
+
def _parse_subprocess_output(
|
|
447
|
+
self, stdout: bytes, stderr: bytes,
|
|
448
|
+
) -> AgentProcessResult:
|
|
449
|
+
"""Parse JSON result from subprocess stdout."""
|
|
450
|
+
exit_code = self._process.returncode
|
|
451
|
+
elapsed = time.monotonic() - self._start_time
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
data = json.loads(stdout.decode("utf-8", errors="replace"))
|
|
455
|
+
state = (
|
|
456
|
+
AgentProcessState.COMPLETED
|
|
457
|
+
if data.get("state") == "completed"
|
|
458
|
+
else AgentProcessState.FAILED
|
|
459
|
+
)
|
|
460
|
+
self._result = AgentProcessResult(
|
|
461
|
+
agent_id=self.agent_id,
|
|
462
|
+
state=state,
|
|
463
|
+
return_value=data.get("return_value"),
|
|
464
|
+
error=data.get("error"),
|
|
465
|
+
exit_code=data.get("exit_code", exit_code),
|
|
466
|
+
duration_seconds=data.get("duration", elapsed),
|
|
467
|
+
)
|
|
468
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
469
|
+
stderr_txt = (
|
|
470
|
+
stderr.decode("utf-8", errors="replace") if stderr else ""
|
|
471
|
+
)
|
|
472
|
+
self._result = AgentProcessResult(
|
|
473
|
+
agent_id=self.agent_id,
|
|
474
|
+
state=(
|
|
475
|
+
AgentProcessState.COMPLETED
|
|
476
|
+
if exit_code == 0
|
|
477
|
+
else AgentProcessState.FAILED
|
|
478
|
+
),
|
|
479
|
+
return_value=(
|
|
480
|
+
stdout.decode("utf-8", errors="replace") if stdout else None
|
|
481
|
+
),
|
|
482
|
+
error=stderr_txt or None,
|
|
483
|
+
exit_code=exit_code,
|
|
484
|
+
duration_seconds=elapsed,
|
|
485
|
+
)
|
|
486
|
+
|
|
487
|
+
self.state = self._result.state
|
|
488
|
+
return self._result
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# ================================================================
|
|
492
|
+
# ProcessIsolationManager
|
|
493
|
+
# ================================================================
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
class ProcessIsolationManager:
|
|
497
|
+
"""Manages agent processes with real OS-level isolation.
|
|
498
|
+
|
|
499
|
+
Unlike in-process ``AgentKernelPanic`` (which can be caught with
|
|
500
|
+
``try/except``), this runs agents in separate processes where
|
|
501
|
+
``os.kill(SIGKILL)`` is truly non-catchable by the agent.
|
|
502
|
+
"""
|
|
503
|
+
|
|
504
|
+
def __init__(
|
|
505
|
+
self,
|
|
506
|
+
default_isolation: IsolationLevel = IsolationLevel.PROCESS,
|
|
507
|
+
) -> None:
|
|
508
|
+
self._default_isolation = default_isolation
|
|
509
|
+
self._handles: Dict[str, AgentProcessHandle] = {}
|
|
510
|
+
self._lock = threading.Lock()
|
|
511
|
+
self._counter = 0
|
|
512
|
+
|
|
513
|
+
# ----------------------------------------------------------
|
|
514
|
+
# Spawn
|
|
515
|
+
# ----------------------------------------------------------
|
|
516
|
+
|
|
517
|
+
def spawn(
|
|
518
|
+
self,
|
|
519
|
+
target: Callable,
|
|
520
|
+
agent_id: Optional[str] = None,
|
|
521
|
+
args: tuple = (),
|
|
522
|
+
kwargs: Optional[dict] = None,
|
|
523
|
+
isolation: Optional[IsolationLevel] = None,
|
|
524
|
+
timeout: Optional[float] = None,
|
|
525
|
+
) -> AgentProcessHandle:
|
|
526
|
+
"""Spawn an agent function in an isolated process."""
|
|
527
|
+
if agent_id is None:
|
|
528
|
+
agent_id = self._next_id()
|
|
529
|
+
|
|
530
|
+
level = isolation or self._default_isolation
|
|
531
|
+
|
|
532
|
+
if level == IsolationLevel.COOPERATIVE:
|
|
533
|
+
raise ValueError(
|
|
534
|
+
"Cooperative isolation is in-process only. "
|
|
535
|
+
"Use SignalDispatcher directly for cooperative mode."
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
if level == IsolationLevel.PROCESS:
|
|
539
|
+
handle = self._spawn_multiprocessing(
|
|
540
|
+
agent_id, target, args, kwargs,
|
|
541
|
+
)
|
|
542
|
+
elif level == IsolationLevel.SUBPROCESS:
|
|
543
|
+
handle = self._spawn_subprocess(
|
|
544
|
+
agent_id, target, args, kwargs,
|
|
545
|
+
)
|
|
546
|
+
else:
|
|
547
|
+
raise ValueError(f"Unsupported isolation level: {level}")
|
|
548
|
+
|
|
549
|
+
with self._lock:
|
|
550
|
+
self._handles[agent_id] = handle
|
|
551
|
+
|
|
552
|
+
# Optional watchdog timer.
|
|
553
|
+
if timeout is not None:
|
|
554
|
+
timer = threading.Timer(
|
|
555
|
+
timeout, self._on_timeout, args=(agent_id,),
|
|
556
|
+
)
|
|
557
|
+
timer.daemon = True
|
|
558
|
+
timer.start()
|
|
559
|
+
|
|
560
|
+
logger.info(
|
|
561
|
+
f"[ProcessIsolation] Spawned {agent_id} "
|
|
562
|
+
f"(pid={handle.pid}, isolation={level.value})"
|
|
563
|
+
)
|
|
564
|
+
return handle
|
|
565
|
+
|
|
566
|
+
# ----------------------------------------------------------
|
|
567
|
+
# Kill / Terminate
|
|
568
|
+
# ----------------------------------------------------------
|
|
569
|
+
|
|
570
|
+
def kill(self, agent_id: str, reason: str = "") -> bool:
|
|
571
|
+
"""Send SIGKILL to agent process -- truly non-catchable."""
|
|
572
|
+
handle = self.get_handle(agent_id)
|
|
573
|
+
if handle is None:
|
|
574
|
+
logger.warning(
|
|
575
|
+
f"[ProcessIsolation] kill: unknown agent {agent_id}"
|
|
576
|
+
)
|
|
577
|
+
return False
|
|
578
|
+
logger.info(f"[ProcessIsolation] kill({agent_id}): {reason}")
|
|
579
|
+
return handle.kill()
|
|
580
|
+
|
|
581
|
+
def terminate(self, agent_id: str, reason: str = "") -> bool:
|
|
582
|
+
"""Send SIGTERM for graceful shutdown."""
|
|
583
|
+
handle = self.get_handle(agent_id)
|
|
584
|
+
if handle is None:
|
|
585
|
+
logger.warning(
|
|
586
|
+
f"[ProcessIsolation] terminate: unknown agent {agent_id}"
|
|
587
|
+
)
|
|
588
|
+
return False
|
|
589
|
+
logger.info(f"[ProcessIsolation] terminate({agent_id}): {reason}")
|
|
590
|
+
return handle.terminate()
|
|
591
|
+
|
|
592
|
+
def kill_all(self, reason: str = "") -> int:
|
|
593
|
+
"""Kill all running agents. Returns count killed."""
|
|
594
|
+
killed = 0
|
|
595
|
+
with self._lock:
|
|
596
|
+
handles = list(self._handles.values())
|
|
597
|
+
for h in handles:
|
|
598
|
+
if h.is_alive() and h.kill():
|
|
599
|
+
killed += 1
|
|
600
|
+
logger.info(
|
|
601
|
+
f"[ProcessIsolation] kill_all: {killed} agents killed"
|
|
602
|
+
+ (f" -- {reason}" if reason else "")
|
|
603
|
+
)
|
|
604
|
+
return killed
|
|
605
|
+
|
|
606
|
+
# ----------------------------------------------------------
|
|
607
|
+
# Queries
|
|
608
|
+
# ----------------------------------------------------------
|
|
609
|
+
|
|
610
|
+
def get_handle(self, agent_id: str) -> Optional[AgentProcessHandle]:
|
|
611
|
+
"""Retrieve the handle for a specific agent."""
|
|
612
|
+
with self._lock:
|
|
613
|
+
return self._handles.get(agent_id)
|
|
614
|
+
|
|
615
|
+
def list_agents(self) -> List[AgentProcessHandle]:
|
|
616
|
+
"""Return a snapshot of all tracked agent handles."""
|
|
617
|
+
with self._lock:
|
|
618
|
+
return list(self._handles.values())
|
|
619
|
+
|
|
620
|
+
# ----------------------------------------------------------
|
|
621
|
+
# Maintenance
|
|
622
|
+
# ----------------------------------------------------------
|
|
623
|
+
|
|
624
|
+
def cleanup(self) -> None:
|
|
625
|
+
"""Remove completed / terminated / failed processes from tracking."""
|
|
626
|
+
with self._lock:
|
|
627
|
+
remove = [
|
|
628
|
+
aid
|
|
629
|
+
for aid, h in self._handles.items()
|
|
630
|
+
if not h.is_alive()
|
|
631
|
+
and h.state
|
|
632
|
+
in (
|
|
633
|
+
AgentProcessState.COMPLETED,
|
|
634
|
+
AgentProcessState.TERMINATED,
|
|
635
|
+
AgentProcessState.FAILED,
|
|
636
|
+
)
|
|
637
|
+
]
|
|
638
|
+
for aid in remove:
|
|
639
|
+
del self._handles[aid]
|
|
640
|
+
if remove:
|
|
641
|
+
logger.debug(
|
|
642
|
+
f"[ProcessIsolation] Cleaned up {len(remove)} processes"
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
# ----------------------------------------------------------
|
|
646
|
+
# Internal helpers
|
|
647
|
+
# ----------------------------------------------------------
|
|
648
|
+
|
|
649
|
+
def _next_id(self) -> str:
|
|
650
|
+
self._counter += 1
|
|
651
|
+
return f"agent-{self._counter:04d}"
|
|
652
|
+
|
|
653
|
+
def _spawn_multiprocessing(
|
|
654
|
+
self,
|
|
655
|
+
agent_id: str,
|
|
656
|
+
target: Callable,
|
|
657
|
+
args: tuple,
|
|
658
|
+
kwargs: Optional[dict],
|
|
659
|
+
) -> AgentProcessHandle:
|
|
660
|
+
q: multiprocessing.Queue = multiprocessing.Queue()
|
|
661
|
+
p = multiprocessing.Process(
|
|
662
|
+
target=_agent_worker,
|
|
663
|
+
args=(target, args, kwargs or {}, q),
|
|
664
|
+
daemon=True,
|
|
665
|
+
)
|
|
666
|
+
p.start()
|
|
667
|
+
return AgentProcessHandle(
|
|
668
|
+
agent_id=agent_id,
|
|
669
|
+
pid=p.pid,
|
|
670
|
+
state=AgentProcessState.RUNNING,
|
|
671
|
+
isolation_level=IsolationLevel.PROCESS,
|
|
672
|
+
_process=p,
|
|
673
|
+
_result_queue=q,
|
|
674
|
+
_start_time=time.monotonic(),
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
def _spawn_subprocess(
|
|
678
|
+
self,
|
|
679
|
+
agent_id: str,
|
|
680
|
+
target: Callable,
|
|
681
|
+
args: tuple,
|
|
682
|
+
kwargs: Optional[dict],
|
|
683
|
+
) -> AgentProcessHandle:
|
|
684
|
+
# Validate target is an importable function (not a lambda/closure)
|
|
685
|
+
if not hasattr(target, '__module__') or not hasattr(target, '__qualname__'):
|
|
686
|
+
raise ValueError(
|
|
687
|
+
f"Target callable {target!r} must be a module-level function "
|
|
688
|
+
"with __module__ and __qualname__ for subprocess isolation"
|
|
689
|
+
)
|
|
690
|
+
# Serialize as JSON with function reference instead of pickling callables
|
|
691
|
+
payload = json.dumps({
|
|
692
|
+
"module": target.__module__,
|
|
693
|
+
"qualname": target.__qualname__,
|
|
694
|
+
"args": list(args),
|
|
695
|
+
"kwargs": kwargs or {},
|
|
696
|
+
}).encode('utf-8')
|
|
697
|
+
# Sign payload with HMAC to prevent tampering
|
|
698
|
+
hmac_key = os.urandom(32)
|
|
699
|
+
sig = hmac.new(hmac_key, payload, hashlib.sha256).digest()
|
|
700
|
+
encoded = base64.b64encode(hmac_key + b"|" + sig + b"|" + payload)
|
|
701
|
+
proc = subprocess.Popen(
|
|
702
|
+
[sys.executable, "-c", _SUBPROCESS_BOOTSTRAP],
|
|
703
|
+
stdin=subprocess.PIPE,
|
|
704
|
+
stdout=subprocess.PIPE,
|
|
705
|
+
stderr=subprocess.PIPE,
|
|
706
|
+
)
|
|
707
|
+
proc.stdin.write(encoded) # type: ignore[union-attr]
|
|
708
|
+
proc.stdin.close() # type: ignore[union-attr]
|
|
709
|
+
return AgentProcessHandle(
|
|
710
|
+
agent_id=agent_id,
|
|
711
|
+
pid=proc.pid,
|
|
712
|
+
state=AgentProcessState.RUNNING,
|
|
713
|
+
isolation_level=IsolationLevel.SUBPROCESS,
|
|
714
|
+
_process=proc,
|
|
715
|
+
_start_time=time.monotonic(),
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
def _on_timeout(self, agent_id: str) -> None:
|
|
719
|
+
handle = self.get_handle(agent_id)
|
|
720
|
+
if handle is not None and handle.is_alive():
|
|
721
|
+
logger.warning(
|
|
722
|
+
f"[ProcessIsolation] Timeout -> killing {agent_id}"
|
|
723
|
+
)
|
|
724
|
+
handle.kill()
|
|
725
|
+
|
|
726
|
+
|
|
727
|
+
# ================================================================
|
|
728
|
+
# IsolatedSignalDispatcher
|
|
729
|
+
# ================================================================
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
class IsolatedSignalDispatcher(SignalDispatcher):
|
|
733
|
+
"""Signal dispatcher that uses real process isolation for SIGKILL.
|
|
734
|
+
|
|
735
|
+
Extends :class:`SignalDispatcher` so that ``SIGKILL`` routes through
|
|
736
|
+
:class:`ProcessIsolationManager` for a true OS-level kill, while all
|
|
737
|
+
other signals continue to use the cooperative in-process path.
|
|
738
|
+
"""
|
|
739
|
+
|
|
740
|
+
def __init__(
|
|
741
|
+
self,
|
|
742
|
+
agent_id: str,
|
|
743
|
+
process_manager: Optional[ProcessIsolationManager] = None,
|
|
744
|
+
) -> None:
|
|
745
|
+
super().__init__(agent_id)
|
|
746
|
+
self._process_manager = process_manager or ProcessIsolationManager()
|
|
747
|
+
|
|
748
|
+
def _handle_kill(self, info: SignalInfo) -> None:
|
|
749
|
+
"""Override: route SIGKILL through real process isolation."""
|
|
750
|
+
handle = self._process_manager.get_handle(self.agent_id)
|
|
751
|
+
if handle is not None and handle.is_alive():
|
|
752
|
+
logger.critical(
|
|
753
|
+
f"[IsolatedSignalDispatcher] SIGKILL -> os.kill for "
|
|
754
|
+
f"{self.agent_id} (pid={handle.pid})"
|
|
755
|
+
)
|
|
756
|
+
handle.kill()
|
|
757
|
+
self._is_terminated = True
|
|
758
|
+
self._is_stopped = True
|
|
759
|
+
else:
|
|
760
|
+
# No isolated process -- fall back to cooperative exception.
|
|
761
|
+
super()._handle_kill(info)
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
# ================================================================
|
|
765
|
+
# Factory
|
|
766
|
+
# ================================================================
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def create_isolated_signal_dispatcher(
|
|
770
|
+
agent_id: str,
|
|
771
|
+
**kwargs: Any,
|
|
772
|
+
) -> IsolatedSignalDispatcher:
|
|
773
|
+
"""Factory function for creating isolated signal dispatchers."""
|
|
774
|
+
return IsolatedSignalDispatcher(
|
|
775
|
+
agent_id=agent_id,
|
|
776
|
+
process_manager=kwargs.get("process_manager"),
|
|
777
|
+
)
|