agent_os_kernel 3.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (337) hide show
  1. agent_control_plane/__init__.py +662 -0
  2. agent_control_plane/a2a_adapter.py +543 -0
  3. agent_control_plane/adapter.py +417 -0
  4. agent_control_plane/agent_hibernation.py +394 -0
  5. agent_control_plane/agent_kernel.py +470 -0
  6. agent_control_plane/compliance.py +720 -0
  7. agent_control_plane/constraint_graphs.py +478 -0
  8. agent_control_plane/control_plane.py +854 -0
  9. agent_control_plane/example_executors.py +195 -0
  10. agent_control_plane/execution_engine.py +231 -0
  11. agent_control_plane/flight_recorder.py +846 -0
  12. agent_control_plane/governance_layer.py +435 -0
  13. agent_control_plane/hf_utils.py +563 -0
  14. agent_control_plane/interfaces/__init__.py +55 -0
  15. agent_control_plane/interfaces/kernel_interface.py +361 -0
  16. agent_control_plane/interfaces/plugin_interface.py +497 -0
  17. agent_control_plane/interfaces/protocol_interfaces.py +387 -0
  18. agent_control_plane/kernel_space.py +1009 -0
  19. agent_control_plane/langchain_adapter.py +424 -0
  20. agent_control_plane/lifecycle.py +3113 -0
  21. agent_control_plane/mcp_adapter.py +653 -0
  22. agent_control_plane/ml_safety.py +563 -0
  23. agent_control_plane/multimodal.py +727 -0
  24. agent_control_plane/mute_agent.py +422 -0
  25. agent_control_plane/observability.py +787 -0
  26. agent_control_plane/orchestrator.py +482 -0
  27. agent_control_plane/plugin_registry.py +750 -0
  28. agent_control_plane/policy_engine.py +954 -0
  29. agent_control_plane/process_isolation.py +777 -0
  30. agent_control_plane/shadow_mode.py +310 -0
  31. agent_control_plane/signals.py +493 -0
  32. agent_control_plane/supervisor_agents.py +430 -0
  33. agent_control_plane/time_travel_debugger.py +557 -0
  34. agent_control_plane/tool_registry.py +452 -0
  35. agent_control_plane/vfs.py +697 -0
  36. agent_kernel/__init__.py +69 -0
  37. agent_kernel/analyzer.py +435 -0
  38. agent_kernel/auditor.py +36 -0
  39. agent_kernel/completeness_auditor.py +237 -0
  40. agent_kernel/detector.py +203 -0
  41. agent_kernel/kernel.py +744 -0
  42. agent_kernel/memory_manager.py +85 -0
  43. agent_kernel/models.py +374 -0
  44. agent_kernel/nudge_mechanism.py +263 -0
  45. agent_kernel/outcome_analyzer.py +338 -0
  46. agent_kernel/patcher.py +582 -0
  47. agent_kernel/semantic_analyzer.py +316 -0
  48. agent_kernel/semantic_purge.py +349 -0
  49. agent_kernel/simulator.py +449 -0
  50. agent_kernel/teacher.py +85 -0
  51. agent_kernel/triage.py +152 -0
  52. agent_os/__init__.py +409 -0
  53. agent_os/_adversarial_impl.py +200 -0
  54. agent_os/_circuit_breaker_impl.py +232 -0
  55. agent_os/_mcp_metrics.py +193 -0
  56. agent_os/adversarial.py +20 -0
  57. agent_os/agents_compat.py +490 -0
  58. agent_os/audit_logger.py +135 -0
  59. agent_os/base_agent.py +651 -0
  60. agent_os/circuit_breaker.py +34 -0
  61. agent_os/cli/__init__.py +659 -0
  62. agent_os/cli/cmd_audit.py +128 -0
  63. agent_os/cli/cmd_init.py +152 -0
  64. agent_os/cli/cmd_policy.py +41 -0
  65. agent_os/cli/cmd_policy_gen.py +180 -0
  66. agent_os/cli/cmd_validate.py +258 -0
  67. agent_os/cli/mcp_scan.py +265 -0
  68. agent_os/cli/output.py +192 -0
  69. agent_os/cli/policy_checker.py +330 -0
  70. agent_os/compat.py +74 -0
  71. agent_os/constraint_graph.py +234 -0
  72. agent_os/content_governance.py +140 -0
  73. agent_os/context_budget.py +305 -0
  74. agent_os/credential_redactor.py +224 -0
  75. agent_os/diff_policy.py +89 -0
  76. agent_os/egress_policy.py +159 -0
  77. agent_os/escalation.py +276 -0
  78. agent_os/event_bus.py +124 -0
  79. agent_os/exceptions.py +180 -0
  80. agent_os/execution_context_policy.py +141 -0
  81. agent_os/github_enterprise.py +96 -0
  82. agent_os/health.py +20 -0
  83. agent_os/integrations/__init__.py +279 -0
  84. agent_os/integrations/a2a_adapter.py +279 -0
  85. agent_os/integrations/agent_lightning/__init__.py +30 -0
  86. agent_os/integrations/anthropic_adapter.py +420 -0
  87. agent_os/integrations/autogen_adapter.py +620 -0
  88. agent_os/integrations/base.py +1137 -0
  89. agent_os/integrations/compat.py +229 -0
  90. agent_os/integrations/config.py +98 -0
  91. agent_os/integrations/conversation_guardian.py +957 -0
  92. agent_os/integrations/crewai_adapter.py +467 -0
  93. agent_os/integrations/drift_detector.py +425 -0
  94. agent_os/integrations/dry_run.py +124 -0
  95. agent_os/integrations/escalation.py +582 -0
  96. agent_os/integrations/gemini_adapter.py +364 -0
  97. agent_os/integrations/google_adk_adapter.py +633 -0
  98. agent_os/integrations/guardrails_adapter.py +394 -0
  99. agent_os/integrations/health.py +197 -0
  100. agent_os/integrations/langchain_adapter.py +654 -0
  101. agent_os/integrations/llamafirewall.py +343 -0
  102. agent_os/integrations/llamaindex_adapter.py +188 -0
  103. agent_os/integrations/logging.py +191 -0
  104. agent_os/integrations/maf_adapter.py +631 -0
  105. agent_os/integrations/mistral_adapter.py +365 -0
  106. agent_os/integrations/openai_adapter.py +816 -0
  107. agent_os/integrations/openai_agents_sdk.py +406 -0
  108. agent_os/integrations/policy_compose.py +171 -0
  109. agent_os/integrations/profiling.py +144 -0
  110. agent_os/integrations/pydantic_ai_adapter.py +420 -0
  111. agent_os/integrations/rate_limiter.py +130 -0
  112. agent_os/integrations/rbac.py +143 -0
  113. agent_os/integrations/registry.py +113 -0
  114. agent_os/integrations/scope_guard.py +303 -0
  115. agent_os/integrations/semantic_kernel_adapter.py +769 -0
  116. agent_os/integrations/smolagents_adapter.py +629 -0
  117. agent_os/integrations/templates.py +178 -0
  118. agent_os/integrations/token_budget.py +134 -0
  119. agent_os/integrations/tool_aliases.py +190 -0
  120. agent_os/integrations/webhooks.py +177 -0
  121. agent_os/lite.py +208 -0
  122. agent_os/mcp_gateway.py +385 -0
  123. agent_os/mcp_message_signer.py +273 -0
  124. agent_os/mcp_protocols.py +161 -0
  125. agent_os/mcp_response_scanner.py +232 -0
  126. agent_os/mcp_security.py +924 -0
  127. agent_os/mcp_session_auth.py +231 -0
  128. agent_os/mcp_sliding_rate_limiter.py +184 -0
  129. agent_os/memory_guard.py +409 -0
  130. agent_os/metrics.py +134 -0
  131. agent_os/mute.py +428 -0
  132. agent_os/mute_agent.py +209 -0
  133. agent_os/policies/__init__.py +77 -0
  134. agent_os/policies/async_evaluator.py +275 -0
  135. agent_os/policies/backends.py +670 -0
  136. agent_os/policies/bridge.py +169 -0
  137. agent_os/policies/budget.py +85 -0
  138. agent_os/policies/cli.py +294 -0
  139. agent_os/policies/conflict_resolution.py +270 -0
  140. agent_os/policies/data_classification.py +252 -0
  141. agent_os/policies/evaluator.py +239 -0
  142. agent_os/policies/policy_schema.json +228 -0
  143. agent_os/policies/rate_limiting.py +145 -0
  144. agent_os/policies/schema.py +115 -0
  145. agent_os/policies/shared.py +331 -0
  146. agent_os/prompt_injection.py +694 -0
  147. agent_os/providers.py +182 -0
  148. agent_os/py.typed +0 -0
  149. agent_os/retry.py +81 -0
  150. agent_os/reversibility.py +251 -0
  151. agent_os/sandbox.py +432 -0
  152. agent_os/sandbox_provider.py +140 -0
  153. agent_os/secure_codegen.py +525 -0
  154. agent_os/security_skills.py +538 -0
  155. agent_os/semantic_policy.py +422 -0
  156. agent_os/server/__init__.py +15 -0
  157. agent_os/server/__main__.py +25 -0
  158. agent_os/server/app.py +277 -0
  159. agent_os/server/models.py +104 -0
  160. agent_os/shift_left_metrics.py +130 -0
  161. agent_os/stateless.py +742 -0
  162. agent_os/supervisor.py +148 -0
  163. agent_os/task_outcome.py +148 -0
  164. agent_os/transparency.py +181 -0
  165. agent_os/trust_root.py +128 -0
  166. agent_os_kernel-3.1.0.dist-info/METADATA +1269 -0
  167. agent_os_kernel-3.1.0.dist-info/RECORD +337 -0
  168. agent_os_kernel-3.1.0.dist-info/WHEEL +4 -0
  169. agent_os_kernel-3.1.0.dist-info/entry_points.txt +2 -0
  170. agent_os_kernel-3.1.0.dist-info/licenses/LICENSE +21 -0
  171. agent_os_observability/__init__.py +27 -0
  172. agent_os_observability/dashboards.py +898 -0
  173. agent_os_observability/metrics.py +398 -0
  174. agent_os_observability/server.py +223 -0
  175. agent_os_observability/tracer.py +232 -0
  176. agent_primitives/__init__.py +24 -0
  177. agent_primitives/failures.py +84 -0
  178. agent_primitives/py.typed +0 -0
  179. amb_core/__init__.py +177 -0
  180. amb_core/adapters/__init__.py +57 -0
  181. amb_core/adapters/aws_sqs_broker.py +376 -0
  182. amb_core/adapters/azure_servicebus_broker.py +340 -0
  183. amb_core/adapters/kafka_broker.py +260 -0
  184. amb_core/adapters/nats_broker.py +285 -0
  185. amb_core/adapters/rabbitmq_broker.py +235 -0
  186. amb_core/adapters/redis_broker.py +262 -0
  187. amb_core/broker.py +145 -0
  188. amb_core/bus.py +481 -0
  189. amb_core/cloudevents.py +509 -0
  190. amb_core/dlq.py +345 -0
  191. amb_core/hf_utils.py +536 -0
  192. amb_core/memory_broker.py +410 -0
  193. amb_core/models.py +141 -0
  194. amb_core/persistence.py +529 -0
  195. amb_core/schema.py +294 -0
  196. amb_core/tracing.py +358 -0
  197. atr/__init__.py +640 -0
  198. atr/access.py +348 -0
  199. atr/composition.py +645 -0
  200. atr/decorator.py +357 -0
  201. atr/executor.py +384 -0
  202. atr/health.py +557 -0
  203. atr/hf_utils.py +449 -0
  204. atr/injection.py +422 -0
  205. atr/metrics.py +440 -0
  206. atr/policies.py +403 -0
  207. atr/py.typed +2 -0
  208. atr/registry.py +452 -0
  209. atr/schema.py +480 -0
  210. atr/tools/safe/__init__.py +75 -0
  211. atr/tools/safe/calculator.py +467 -0
  212. atr/tools/safe/datetime_tool.py +443 -0
  213. atr/tools/safe/file_reader.py +402 -0
  214. atr/tools/safe/http_client.py +316 -0
  215. atr/tools/safe/json_parser.py +374 -0
  216. atr/tools/safe/text_tool.py +537 -0
  217. atr/tools/safe/toolkit.py +175 -0
  218. caas/__init__.py +162 -0
  219. caas/api/__init__.py +7 -0
  220. caas/api/server.py +1328 -0
  221. caas/caching.py +834 -0
  222. caas/cli.py +210 -0
  223. caas/conversation.py +223 -0
  224. caas/decay.py +72 -0
  225. caas/detection/__init__.py +9 -0
  226. caas/detection/detector.py +238 -0
  227. caas/enrichment.py +130 -0
  228. caas/gateway/__init__.py +27 -0
  229. caas/gateway/trust_gateway.py +474 -0
  230. caas/hf_utils.py +479 -0
  231. caas/ingestion/__init__.py +23 -0
  232. caas/ingestion/processors.py +253 -0
  233. caas/ingestion/structure_parser.py +188 -0
  234. caas/models.py +356 -0
  235. caas/pragmatic_truth.py +444 -0
  236. caas/routing/__init__.py +10 -0
  237. caas/routing/heuristic_router.py +58 -0
  238. caas/storage/__init__.py +9 -0
  239. caas/storage/store.py +389 -0
  240. caas/triad.py +213 -0
  241. caas/tuning/__init__.py +9 -0
  242. caas/tuning/tuner.py +329 -0
  243. caas/vfs/__init__.py +14 -0
  244. caas/vfs/filesystem.py +452 -0
  245. cmvk/__init__.py +218 -0
  246. cmvk/audit.py +402 -0
  247. cmvk/benchmarks.py +478 -0
  248. cmvk/constitutional.py +904 -0
  249. cmvk/hf_utils.py +301 -0
  250. cmvk/metrics.py +473 -0
  251. cmvk/profiles.py +300 -0
  252. cmvk/py.typed +0 -0
  253. cmvk/types.py +12 -0
  254. cmvk/verification.py +956 -0
  255. emk/__init__.py +89 -0
  256. emk/causal.py +352 -0
  257. emk/hf_utils.py +421 -0
  258. emk/indexer.py +83 -0
  259. emk/py.typed +0 -0
  260. emk/schema.py +204 -0
  261. emk/sleep_cycle.py +347 -0
  262. emk/store.py +281 -0
  263. iatp/__init__.py +166 -0
  264. iatp/attestation.py +461 -0
  265. iatp/cli.py +317 -0
  266. iatp/hf_utils.py +472 -0
  267. iatp/ipc_pipes.py +580 -0
  268. iatp/main.py +412 -0
  269. iatp/models/__init__.py +447 -0
  270. iatp/policy_engine.py +337 -0
  271. iatp/py.typed +2 -0
  272. iatp/recovery.py +321 -0
  273. iatp/security/__init__.py +270 -0
  274. iatp/sidecar/__init__.py +519 -0
  275. iatp/telemetry/__init__.py +164 -0
  276. iatp/tests/__init__.py +1 -0
  277. iatp/tests/test_attestation.py +370 -0
  278. iatp/tests/test_cli.py +131 -0
  279. iatp/tests/test_ed25519_attestation.py +211 -0
  280. iatp/tests/test_models.py +130 -0
  281. iatp/tests/test_policy_engine.py +347 -0
  282. iatp/tests/test_recovery.py +281 -0
  283. iatp/tests/test_security.py +222 -0
  284. iatp/tests/test_sidecar.py +167 -0
  285. iatp/tests/test_telemetry.py +175 -0
  286. mcp_kernel_server/__init__.py +28 -0
  287. mcp_kernel_server/cli.py +274 -0
  288. mcp_kernel_server/resources.py +217 -0
  289. mcp_kernel_server/server.py +564 -0
  290. mcp_kernel_server/tools.py +1174 -0
  291. mute_agent/__init__.py +68 -0
  292. mute_agent/core/__init__.py +1 -0
  293. mute_agent/core/execution_agent.py +166 -0
  294. mute_agent/core/handshake_protocol.py +201 -0
  295. mute_agent/core/reasoning_agent.py +238 -0
  296. mute_agent/knowledge_graph/__init__.py +1 -0
  297. mute_agent/knowledge_graph/graph_elements.py +65 -0
  298. mute_agent/knowledge_graph/multidimensional_graph.py +170 -0
  299. mute_agent/knowledge_graph/subgraph.py +224 -0
  300. mute_agent/listener/__init__.py +43 -0
  301. mute_agent/listener/adapters/__init__.py +31 -0
  302. mute_agent/listener/adapters/base_adapter.py +189 -0
  303. mute_agent/listener/adapters/caas_adapter.py +344 -0
  304. mute_agent/listener/adapters/control_plane_adapter.py +436 -0
  305. mute_agent/listener/adapters/iatp_adapter.py +332 -0
  306. mute_agent/listener/adapters/scak_adapter.py +251 -0
  307. mute_agent/listener/listener.py +610 -0
  308. mute_agent/listener/state_observer.py +436 -0
  309. mute_agent/listener/threshold_config.py +313 -0
  310. mute_agent/super_system/__init__.py +1 -0
  311. mute_agent/super_system/router.py +204 -0
  312. mute_agent/visualization/__init__.py +10 -0
  313. mute_agent/visualization/graph_debugger.py +502 -0
  314. nexus/README.md +60 -0
  315. nexus/__init__.py +51 -0
  316. nexus/arbiter.py +359 -0
  317. nexus/client.py +466 -0
  318. nexus/dmz.py +444 -0
  319. nexus/escrow.py +430 -0
  320. nexus/exceptions.py +286 -0
  321. nexus/pyproject.toml +36 -0
  322. nexus/registry.py +393 -0
  323. nexus/reputation.py +425 -0
  324. nexus/schemas/__init__.py +51 -0
  325. nexus/schemas/compliance.py +276 -0
  326. nexus/schemas/escrow.py +251 -0
  327. nexus/schemas/manifest.py +225 -0
  328. nexus/schemas/receipt.py +208 -0
  329. nexus/tests/__init__.py +0 -0
  330. nexus/tests/conftest.py +146 -0
  331. nexus/tests/test_arbiter.py +192 -0
  332. nexus/tests/test_dmz.py +194 -0
  333. nexus/tests/test_escrow.py +276 -0
  334. nexus/tests/test_exceptions.py +225 -0
  335. nexus/tests/test_registry.py +232 -0
  336. nexus/tests/test_reputation.py +328 -0
  337. nexus/tests/test_schemas.py +295 -0
@@ -0,0 +1,777 @@
1
+ # Copyright (c) Microsoft Corporation.
2
+ # Licensed under the MIT License.
3
+ """
4
+ Process-Level Agent Isolation
5
+
6
+ Provides real OS-level process isolation for agent execution, addressing
7
+ the limitation that in-process SIGKILL (AgentKernelPanic) is catchable
8
+ by a broad ``except BaseException`` in Python.
9
+
10
+ This module runs agents in separate processes where ``os.kill(SIGKILL)``
11
+ is truly non-catchable by agent code.
12
+
13
+ Architecture::
14
+
15
+ +-------------------------------------------+
16
+ | Supervisor Process (Kernel Space) |
17
+ | - ProcessIsolationManager |
18
+ | - IsolatedSignalDispatcher |
19
+ | |
20
+ | +------------+ +------------+ |
21
+ | | Agent A | | Agent B | |
22
+ | | (Process) | | (Process) | |
23
+ | +------------+ +------------+ |
24
+ +-------------------------------------------+
25
+
26
+ Isolation Levels:
27
+ COOPERATIVE -- In-process, exception-based (current behaviour)
28
+ PROCESS -- Separate process via multiprocessing.Process
29
+ SUBPROCESS -- Separate process via subprocess.Popen
30
+
31
+ Security Model:
32
+ - In-process signals = cooperative path (can be caught)
33
+ - Process-level kill = enforcement path (non-catchable)
34
+
35
+ See Also:
36
+ signals.py -- In-process cooperative signal handling
37
+ """
38
+
39
+ from __future__ import annotations
40
+
41
+ import base64
42
+ import hashlib
43
+ import hmac
44
+ import json
45
+ import logging
46
+ import multiprocessing
47
+ import os
48
+ import signal as _signal
49
+ import subprocess
50
+ import sys
51
+ import threading
52
+ import time
53
+ from dataclasses import dataclass, field
54
+ from enum import Enum
55
+ from typing import Any, Callable, Dict, List, Optional
56
+
57
+ from .signals import AgentKernelPanic, AgentSignal, SignalDispatcher, SignalInfo
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+
62
+ # ================================================================
63
+ # Enums
64
+ # ================================================================
65
+
66
+
67
+ class IsolationLevel(str, Enum):
68
+ """Level of process isolation for agent execution."""
69
+
70
+ COOPERATIVE = "cooperative" # In-process, exception-based (current behaviour)
71
+ PROCESS = "process" # Separate process via multiprocessing
72
+ SUBPROCESS = "subprocess" # Separate process via subprocess.Popen
73
+
74
+
75
+ class AgentProcessState(str, Enum):
76
+ """Lifecycle state of an isolated agent process."""
77
+
78
+ PENDING = "pending"
79
+ RUNNING = "running"
80
+ COMPLETED = "completed"
81
+ TERMINATED = "terminated"
82
+ FAILED = "failed"
83
+
84
+
85
+ # ================================================================
86
+ # Result dataclass
87
+ # ================================================================
88
+
89
+
90
+ @dataclass
91
+ class AgentProcessResult:
92
+ """Result from an isolated agent process."""
93
+
94
+ agent_id: str
95
+ state: AgentProcessState
96
+ return_value: Any = None
97
+ error: Optional[str] = None
98
+ exit_code: Optional[int] = None
99
+ duration_seconds: float = 0.0
100
+ terminated_by_signal: bool = False
101
+
102
+
103
+ # ================================================================
104
+ # Worker function (module top-level so multiprocessing can pickle it)
105
+ # ================================================================
106
+
107
+
108
+ def _agent_worker(
109
+ target: Callable,
110
+ args: tuple,
111
+ kwargs: dict,
112
+ result_queue: multiprocessing.Queue,
113
+ ) -> None:
114
+ """Execute *target* inside the child process, sending the outcome via *result_queue*."""
115
+ start = time.monotonic()
116
+ try:
117
+ rv = target(*args, **(kwargs or {}))
118
+ result_queue.put({
119
+ "state": "completed",
120
+ "return_value": rv,
121
+ "error": None,
122
+ "exit_code": 0,
123
+ "duration": time.monotonic() - start,
124
+ })
125
+ except SystemExit as exc:
126
+ code = exc.code if isinstance(exc.code, int) else 1
127
+ result_queue.put({
128
+ "state": "failed",
129
+ "return_value": None,
130
+ "error": f"SystemExit({exc.code})",
131
+ "exit_code": code,
132
+ "duration": time.monotonic() - start,
133
+ })
134
+ except BaseException as exc:
135
+ result_queue.put({
136
+ "state": "failed",
137
+ "return_value": None,
138
+ "error": f"{type(exc).__name__}: {exc}",
139
+ "exit_code": 1,
140
+ "duration": time.monotonic() - start,
141
+ })
142
+
143
+
144
+ # Bootstrap script executed inside a ``subprocess.Popen`` child.
145
+ # The parent sends: base64(hmac_key + b"|" + hmac_sig + b"|" + json_payload)
146
+ # The child verifies the HMAC before deserializing.
147
+ # The JSON payload contains {"module": "...", "qualname": "...", "args": [...], "kwargs": {...}}
148
+ # and the target function is resolved via importlib, avoiding pickle deserialization.
149
+ _SUBPROCESS_BOOTSTRAP = """\
150
+ import base64, hashlib, hmac, importlib, json, sys, time
151
+ raw = base64.b64decode(sys.stdin.buffer.read())
152
+ parts = raw.split(b"|", 2)
153
+ if len(parts) != 3:
154
+ json.dump({"state": "failed", "error": "Invalid bootstrap payload format", "exit_code": 1, "duration": 0}, sys.stdout)
155
+ sys.exit(1)
156
+ _key, _expected_sig, _payload = parts
157
+ _actual_sig = hmac.new(_key, _payload, hashlib.sha256).digest()
158
+ if not hmac.compare_digest(_actual_sig, _expected_sig):
159
+ json.dump({"state": "failed", "error": "HMAC verification failed — payload tampered", "exit_code": 1, "duration": 0}, sys.stdout)
160
+ sys.exit(1)
161
+ _data = json.loads(_payload)
162
+ _mod = importlib.import_module(_data["module"])
163
+ _obj = _mod
164
+ for _attr in _data["qualname"].split("."):
165
+ _obj = getattr(_obj, _attr)
166
+ target = _obj
167
+ args = tuple(_data.get("args", ()))
168
+ kwargs = _data.get("kwargs", {})
169
+ _start = time.monotonic()
170
+ try:
171
+ _rv = target(*args, **kwargs)
172
+ json.dump({
173
+ "state": "completed",
174
+ "return_value": repr(_rv),
175
+ "error": None,
176
+ "exit_code": 0,
177
+ "duration": time.monotonic() - _start,
178
+ }, sys.stdout)
179
+ except SystemExit as _e:
180
+ json.dump({
181
+ "state": "failed",
182
+ "error": f"SystemExit({_e.code})",
183
+ "exit_code": getattr(_e, "code", 1),
184
+ "duration": time.monotonic() - _start,
185
+ }, sys.stdout)
186
+ except Exception as _e:
187
+ json.dump({
188
+ "state": "failed",
189
+ "error": f"{type(_e).__name__}: {_e}",
190
+ "exit_code": 1,
191
+ "duration": time.monotonic() - _start,
192
+ }, sys.stdout)
193
+ """
194
+
195
+
196
+ # ================================================================
197
+ # AgentProcessHandle
198
+ # ================================================================
199
+
200
+
201
+ @dataclass
202
+ class AgentProcessHandle:
203
+ """Handle to a running agent process.
204
+
205
+ Provides real process-level control including non-catchable termination.
206
+ """
207
+
208
+ agent_id: str
209
+ pid: Optional[int] = None
210
+ state: AgentProcessState = AgentProcessState.PENDING
211
+ isolation_level: IsolationLevel = IsolationLevel.PROCESS
212
+
213
+ # ---- internal fields (hidden from repr) ----
214
+ _process: Any = field(default=None, repr=False)
215
+ _result_queue: Any = field(default=None, repr=False)
216
+ _start_time: float = field(default=0.0, repr=False)
217
+ _result: Optional[AgentProcessResult] = field(default=None, repr=False)
218
+ _killed: bool = field(default=False, repr=False)
219
+
220
+ # --------------------------------------------------------------
221
+ # Public API
222
+ # --------------------------------------------------------------
223
+
224
+ def terminate(self) -> bool:
225
+ """Send SIGTERM (graceful shutdown request)."""
226
+ if self._process is None or not self.is_alive():
227
+ return False
228
+ try:
229
+ self._process.terminate()
230
+ self.state = AgentProcessState.TERMINATED
231
+ elapsed = time.monotonic() - self._start_time
232
+ self._result = AgentProcessResult(
233
+ agent_id=self.agent_id,
234
+ state=AgentProcessState.TERMINATED,
235
+ error="Terminated by SIGTERM",
236
+ exit_code=-15 if os.name != "nt" else 1,
237
+ duration_seconds=elapsed,
238
+ terminated_by_signal=True,
239
+ )
240
+ logger.info(
241
+ f"[ProcessIsolation] SIGTERM -> agent {self.agent_id} "
242
+ f"(pid={self.pid})"
243
+ )
244
+ return True
245
+ except (OSError, ProcessLookupError) as exc:
246
+ logger.warning(
247
+ f"[ProcessIsolation] terminate failed for "
248
+ f"{self.agent_id}: {exc}"
249
+ )
250
+ return False
251
+
252
+ def kill(self) -> bool:
253
+ """Send real OS SIGKILL -- truly non-catchable.
254
+
255
+ On Unix: ``os.kill(pid, signal.SIGKILL)``
256
+ On Windows: ``TerminateProcess`` via ``process.kill()``
257
+ This is the real deal -- the OS scheduler handles it.
258
+ """
259
+ if self._process is None or not self.is_alive():
260
+ return False
261
+
262
+ # Flag early so a concurrent wait() sees it immediately.
263
+ self._killed = True
264
+
265
+ try:
266
+ if os.name != "nt" and self.pid is not None:
267
+ os.kill(self.pid, _signal.SIGKILL)
268
+ else:
269
+ # Windows: process.kill() calls TerminateProcess
270
+ self._process.kill()
271
+
272
+ # Wait briefly for the OS to reap the process.
273
+ if self.isolation_level == IsolationLevel.PROCESS:
274
+ self._process.join(timeout=5)
275
+ elif self.isolation_level == IsolationLevel.SUBPROCESS:
276
+ try:
277
+ self._process.wait(timeout=5)
278
+ except subprocess.TimeoutExpired:
279
+ pass
280
+
281
+ self.state = AgentProcessState.TERMINATED
282
+ elapsed = time.monotonic() - self._start_time
283
+ self._result = AgentProcessResult(
284
+ agent_id=self.agent_id,
285
+ state=AgentProcessState.TERMINATED,
286
+ error="Killed by SIGKILL",
287
+ exit_code=-9 if os.name != "nt" else 1,
288
+ duration_seconds=elapsed,
289
+ terminated_by_signal=True,
290
+ )
291
+ logger.critical(
292
+ f"[ProcessIsolation] SIGKILL -> agent {self.agent_id} "
293
+ f"(pid={self.pid})"
294
+ )
295
+ return True
296
+ except (OSError, ProcessLookupError) as exc:
297
+ logger.warning(
298
+ f"[ProcessIsolation] kill failed for {self.agent_id}: {exc}"
299
+ )
300
+ return False
301
+
302
+ def is_alive(self) -> bool:
303
+ """Check whether the underlying OS process is still running."""
304
+ if self._process is None:
305
+ return False
306
+ if self.isolation_level == IsolationLevel.PROCESS:
307
+ return self._process.is_alive()
308
+ if self.isolation_level == IsolationLevel.SUBPROCESS:
309
+ return self._process.poll() is None
310
+ return False
311
+
312
+ def wait(self, timeout: Optional[float] = None) -> AgentProcessResult:
313
+ """Block until the process finishes (or *timeout* expires) and return its result."""
314
+ # Fast path -- already resolved.
315
+ if self._result is not None and not self.is_alive():
316
+ return self._result
317
+
318
+ if self._process is None:
319
+ return AgentProcessResult(
320
+ agent_id=self.agent_id,
321
+ state=AgentProcessState.FAILED,
322
+ error="No process to wait on",
323
+ )
324
+
325
+ if self.isolation_level == IsolationLevel.PROCESS:
326
+ return self._wait_process(timeout)
327
+
328
+ if self.isolation_level == IsolationLevel.SUBPROCESS:
329
+ return self._wait_subprocess(timeout)
330
+
331
+ return AgentProcessResult(
332
+ agent_id=self.agent_id,
333
+ state=AgentProcessState.FAILED,
334
+ error=f"Unsupported isolation level: {self.isolation_level}",
335
+ )
336
+
337
+ # --------------------------------------------------------------
338
+ # Internal helpers
339
+ # --------------------------------------------------------------
340
+
341
+ def _wait_process(self, timeout: Optional[float]) -> AgentProcessResult:
342
+ """Wait logic for ``IsolationLevel.PROCESS``."""
343
+ self._process.join(timeout=timeout)
344
+
345
+ if self._process.is_alive():
346
+ # Timed out -- forcibly kill.
347
+ self.kill()
348
+
349
+ # Killed (by us, or by an external timeout timer)?
350
+ if self._result is not None:
351
+ return self._result
352
+
353
+ return self._read_queue_result()
354
+
355
+ def _wait_subprocess(self, timeout: Optional[float]) -> AgentProcessResult:
356
+ """Wait logic for ``IsolationLevel.SUBPROCESS``."""
357
+ try:
358
+ stdout, stderr = self._process.communicate(timeout=timeout)
359
+ except subprocess.TimeoutExpired:
360
+ self.kill()
361
+ if self._result is not None:
362
+ return self._result
363
+ elapsed = time.monotonic() - self._start_time
364
+ self._result = AgentProcessResult(
365
+ agent_id=self.agent_id,
366
+ state=AgentProcessState.TERMINATED,
367
+ error="Timed out",
368
+ duration_seconds=elapsed,
369
+ terminated_by_signal=True,
370
+ )
371
+ self.state = AgentProcessState.TERMINATED
372
+ return self._result
373
+
374
+ if self._result is not None:
375
+ return self._result
376
+
377
+ return self._parse_subprocess_output(stdout, stderr)
378
+
379
+ def _read_queue_result(self) -> AgentProcessResult:
380
+ """Read the result dict from the multiprocessing Queue."""
381
+ # Killed by another thread (e.g. timeout timer)?
382
+ if self._killed:
383
+ if self._result is not None:
384
+ return self._result
385
+ elapsed = time.monotonic() - self._start_time
386
+ self._result = AgentProcessResult(
387
+ agent_id=self.agent_id,
388
+ state=AgentProcessState.TERMINATED,
389
+ error="Killed",
390
+ exit_code=-9 if os.name != "nt" else 1,
391
+ duration_seconds=elapsed,
392
+ terminated_by_signal=True,
393
+ )
394
+ self.state = AgentProcessState.TERMINATED
395
+ return self._result
396
+
397
+ try:
398
+ if self._result_queue is not None and not self._result_queue.empty():
399
+ data = self._result_queue.get_nowait()
400
+ state = (
401
+ AgentProcessState.COMPLETED
402
+ if data["state"] == "completed"
403
+ else AgentProcessState.FAILED
404
+ )
405
+ self._result = AgentProcessResult(
406
+ agent_id=self.agent_id,
407
+ state=state,
408
+ return_value=data.get("return_value"),
409
+ error=data.get("error"),
410
+ exit_code=data.get("exit_code"),
411
+ duration_seconds=data.get("duration", 0.0),
412
+ )
413
+ else:
414
+ # Process exited without writing to the queue.
415
+ elapsed = time.monotonic() - self._start_time
416
+ exit_code = getattr(self._process, "exitcode", None)
417
+ if exit_code is not None and exit_code < 0:
418
+ self._result = AgentProcessResult(
419
+ agent_id=self.agent_id,
420
+ state=AgentProcessState.TERMINATED,
421
+ error=f"Terminated by signal {-exit_code}",
422
+ exit_code=exit_code,
423
+ duration_seconds=elapsed,
424
+ terminated_by_signal=True,
425
+ )
426
+ else:
427
+ self._result = AgentProcessResult(
428
+ agent_id=self.agent_id,
429
+ state=AgentProcessState.FAILED,
430
+ error=f"Process exited with code {exit_code}",
431
+ exit_code=exit_code,
432
+ duration_seconds=elapsed,
433
+ )
434
+ except Exception as exc: # noqa: BLE001
435
+ elapsed = time.monotonic() - self._start_time
436
+ self._result = AgentProcessResult(
437
+ agent_id=self.agent_id,
438
+ state=AgentProcessState.FAILED,
439
+ error=str(exc),
440
+ duration_seconds=elapsed,
441
+ )
442
+
443
+ self.state = self._result.state
444
+ return self._result
445
+
446
+ def _parse_subprocess_output(
447
+ self, stdout: bytes, stderr: bytes,
448
+ ) -> AgentProcessResult:
449
+ """Parse JSON result from subprocess stdout."""
450
+ exit_code = self._process.returncode
451
+ elapsed = time.monotonic() - self._start_time
452
+
453
+ try:
454
+ data = json.loads(stdout.decode("utf-8", errors="replace"))
455
+ state = (
456
+ AgentProcessState.COMPLETED
457
+ if data.get("state") == "completed"
458
+ else AgentProcessState.FAILED
459
+ )
460
+ self._result = AgentProcessResult(
461
+ agent_id=self.agent_id,
462
+ state=state,
463
+ return_value=data.get("return_value"),
464
+ error=data.get("error"),
465
+ exit_code=data.get("exit_code", exit_code),
466
+ duration_seconds=data.get("duration", elapsed),
467
+ )
468
+ except (json.JSONDecodeError, UnicodeDecodeError):
469
+ stderr_txt = (
470
+ stderr.decode("utf-8", errors="replace") if stderr else ""
471
+ )
472
+ self._result = AgentProcessResult(
473
+ agent_id=self.agent_id,
474
+ state=(
475
+ AgentProcessState.COMPLETED
476
+ if exit_code == 0
477
+ else AgentProcessState.FAILED
478
+ ),
479
+ return_value=(
480
+ stdout.decode("utf-8", errors="replace") if stdout else None
481
+ ),
482
+ error=stderr_txt or None,
483
+ exit_code=exit_code,
484
+ duration_seconds=elapsed,
485
+ )
486
+
487
+ self.state = self._result.state
488
+ return self._result
489
+
490
+
491
+ # ================================================================
492
+ # ProcessIsolationManager
493
+ # ================================================================
494
+
495
+
496
+ class ProcessIsolationManager:
497
+ """Manages agent processes with real OS-level isolation.
498
+
499
+ Unlike in-process ``AgentKernelPanic`` (which can be caught with
500
+ ``try/except``), this runs agents in separate processes where
501
+ ``os.kill(SIGKILL)`` is truly non-catchable by the agent.
502
+ """
503
+
504
+ def __init__(
505
+ self,
506
+ default_isolation: IsolationLevel = IsolationLevel.PROCESS,
507
+ ) -> None:
508
+ self._default_isolation = default_isolation
509
+ self._handles: Dict[str, AgentProcessHandle] = {}
510
+ self._lock = threading.Lock()
511
+ self._counter = 0
512
+
513
+ # ----------------------------------------------------------
514
+ # Spawn
515
+ # ----------------------------------------------------------
516
+
517
+ def spawn(
518
+ self,
519
+ target: Callable,
520
+ agent_id: Optional[str] = None,
521
+ args: tuple = (),
522
+ kwargs: Optional[dict] = None,
523
+ isolation: Optional[IsolationLevel] = None,
524
+ timeout: Optional[float] = None,
525
+ ) -> AgentProcessHandle:
526
+ """Spawn an agent function in an isolated process."""
527
+ if agent_id is None:
528
+ agent_id = self._next_id()
529
+
530
+ level = isolation or self._default_isolation
531
+
532
+ if level == IsolationLevel.COOPERATIVE:
533
+ raise ValueError(
534
+ "Cooperative isolation is in-process only. "
535
+ "Use SignalDispatcher directly for cooperative mode."
536
+ )
537
+
538
+ if level == IsolationLevel.PROCESS:
539
+ handle = self._spawn_multiprocessing(
540
+ agent_id, target, args, kwargs,
541
+ )
542
+ elif level == IsolationLevel.SUBPROCESS:
543
+ handle = self._spawn_subprocess(
544
+ agent_id, target, args, kwargs,
545
+ )
546
+ else:
547
+ raise ValueError(f"Unsupported isolation level: {level}")
548
+
549
+ with self._lock:
550
+ self._handles[agent_id] = handle
551
+
552
+ # Optional watchdog timer.
553
+ if timeout is not None:
554
+ timer = threading.Timer(
555
+ timeout, self._on_timeout, args=(agent_id,),
556
+ )
557
+ timer.daemon = True
558
+ timer.start()
559
+
560
+ logger.info(
561
+ f"[ProcessIsolation] Spawned {agent_id} "
562
+ f"(pid={handle.pid}, isolation={level.value})"
563
+ )
564
+ return handle
565
+
566
+ # ----------------------------------------------------------
567
+ # Kill / Terminate
568
+ # ----------------------------------------------------------
569
+
570
+ def kill(self, agent_id: str, reason: str = "") -> bool:
571
+ """Send SIGKILL to agent process -- truly non-catchable."""
572
+ handle = self.get_handle(agent_id)
573
+ if handle is None:
574
+ logger.warning(
575
+ f"[ProcessIsolation] kill: unknown agent {agent_id}"
576
+ )
577
+ return False
578
+ logger.info(f"[ProcessIsolation] kill({agent_id}): {reason}")
579
+ return handle.kill()
580
+
581
+ def terminate(self, agent_id: str, reason: str = "") -> bool:
582
+ """Send SIGTERM for graceful shutdown."""
583
+ handle = self.get_handle(agent_id)
584
+ if handle is None:
585
+ logger.warning(
586
+ f"[ProcessIsolation] terminate: unknown agent {agent_id}"
587
+ )
588
+ return False
589
+ logger.info(f"[ProcessIsolation] terminate({agent_id}): {reason}")
590
+ return handle.terminate()
591
+
592
+ def kill_all(self, reason: str = "") -> int:
593
+ """Kill all running agents. Returns count killed."""
594
+ killed = 0
595
+ with self._lock:
596
+ handles = list(self._handles.values())
597
+ for h in handles:
598
+ if h.is_alive() and h.kill():
599
+ killed += 1
600
+ logger.info(
601
+ f"[ProcessIsolation] kill_all: {killed} agents killed"
602
+ + (f" -- {reason}" if reason else "")
603
+ )
604
+ return killed
605
+
606
+ # ----------------------------------------------------------
607
+ # Queries
608
+ # ----------------------------------------------------------
609
+
610
+ def get_handle(self, agent_id: str) -> Optional[AgentProcessHandle]:
611
+ """Retrieve the handle for a specific agent."""
612
+ with self._lock:
613
+ return self._handles.get(agent_id)
614
+
615
+ def list_agents(self) -> List[AgentProcessHandle]:
616
+ """Return a snapshot of all tracked agent handles."""
617
+ with self._lock:
618
+ return list(self._handles.values())
619
+
620
+ # ----------------------------------------------------------
621
+ # Maintenance
622
+ # ----------------------------------------------------------
623
+
624
+ def cleanup(self) -> None:
625
+ """Remove completed / terminated / failed processes from tracking."""
626
+ with self._lock:
627
+ remove = [
628
+ aid
629
+ for aid, h in self._handles.items()
630
+ if not h.is_alive()
631
+ and h.state
632
+ in (
633
+ AgentProcessState.COMPLETED,
634
+ AgentProcessState.TERMINATED,
635
+ AgentProcessState.FAILED,
636
+ )
637
+ ]
638
+ for aid in remove:
639
+ del self._handles[aid]
640
+ if remove:
641
+ logger.debug(
642
+ f"[ProcessIsolation] Cleaned up {len(remove)} processes"
643
+ )
644
+
645
+ # ----------------------------------------------------------
646
+ # Internal helpers
647
+ # ----------------------------------------------------------
648
+
649
+ def _next_id(self) -> str:
650
+ self._counter += 1
651
+ return f"agent-{self._counter:04d}"
652
+
653
+ def _spawn_multiprocessing(
654
+ self,
655
+ agent_id: str,
656
+ target: Callable,
657
+ args: tuple,
658
+ kwargs: Optional[dict],
659
+ ) -> AgentProcessHandle:
660
+ q: multiprocessing.Queue = multiprocessing.Queue()
661
+ p = multiprocessing.Process(
662
+ target=_agent_worker,
663
+ args=(target, args, kwargs or {}, q),
664
+ daemon=True,
665
+ )
666
+ p.start()
667
+ return AgentProcessHandle(
668
+ agent_id=agent_id,
669
+ pid=p.pid,
670
+ state=AgentProcessState.RUNNING,
671
+ isolation_level=IsolationLevel.PROCESS,
672
+ _process=p,
673
+ _result_queue=q,
674
+ _start_time=time.monotonic(),
675
+ )
676
+
677
+ def _spawn_subprocess(
678
+ self,
679
+ agent_id: str,
680
+ target: Callable,
681
+ args: tuple,
682
+ kwargs: Optional[dict],
683
+ ) -> AgentProcessHandle:
684
+ # Validate target is an importable function (not a lambda/closure)
685
+ if not hasattr(target, '__module__') or not hasattr(target, '__qualname__'):
686
+ raise ValueError(
687
+ f"Target callable {target!r} must be a module-level function "
688
+ "with __module__ and __qualname__ for subprocess isolation"
689
+ )
690
+ # Serialize as JSON with function reference instead of pickling callables
691
+ payload = json.dumps({
692
+ "module": target.__module__,
693
+ "qualname": target.__qualname__,
694
+ "args": list(args),
695
+ "kwargs": kwargs or {},
696
+ }).encode('utf-8')
697
+ # Sign payload with HMAC to prevent tampering
698
+ hmac_key = os.urandom(32)
699
+ sig = hmac.new(hmac_key, payload, hashlib.sha256).digest()
700
+ encoded = base64.b64encode(hmac_key + b"|" + sig + b"|" + payload)
701
+ proc = subprocess.Popen(
702
+ [sys.executable, "-c", _SUBPROCESS_BOOTSTRAP],
703
+ stdin=subprocess.PIPE,
704
+ stdout=subprocess.PIPE,
705
+ stderr=subprocess.PIPE,
706
+ )
707
+ proc.stdin.write(encoded) # type: ignore[union-attr]
708
+ proc.stdin.close() # type: ignore[union-attr]
709
+ return AgentProcessHandle(
710
+ agent_id=agent_id,
711
+ pid=proc.pid,
712
+ state=AgentProcessState.RUNNING,
713
+ isolation_level=IsolationLevel.SUBPROCESS,
714
+ _process=proc,
715
+ _start_time=time.monotonic(),
716
+ )
717
+
718
+ def _on_timeout(self, agent_id: str) -> None:
719
+ handle = self.get_handle(agent_id)
720
+ if handle is not None and handle.is_alive():
721
+ logger.warning(
722
+ f"[ProcessIsolation] Timeout -> killing {agent_id}"
723
+ )
724
+ handle.kill()
725
+
726
+
727
+ # ================================================================
728
+ # IsolatedSignalDispatcher
729
+ # ================================================================
730
+
731
+
732
+ class IsolatedSignalDispatcher(SignalDispatcher):
733
+ """Signal dispatcher that uses real process isolation for SIGKILL.
734
+
735
+ Extends :class:`SignalDispatcher` so that ``SIGKILL`` routes through
736
+ :class:`ProcessIsolationManager` for a true OS-level kill, while all
737
+ other signals continue to use the cooperative in-process path.
738
+ """
739
+
740
+ def __init__(
741
+ self,
742
+ agent_id: str,
743
+ process_manager: Optional[ProcessIsolationManager] = None,
744
+ ) -> None:
745
+ super().__init__(agent_id)
746
+ self._process_manager = process_manager or ProcessIsolationManager()
747
+
748
+ def _handle_kill(self, info: SignalInfo) -> None:
749
+ """Override: route SIGKILL through real process isolation."""
750
+ handle = self._process_manager.get_handle(self.agent_id)
751
+ if handle is not None and handle.is_alive():
752
+ logger.critical(
753
+ f"[IsolatedSignalDispatcher] SIGKILL -> os.kill for "
754
+ f"{self.agent_id} (pid={handle.pid})"
755
+ )
756
+ handle.kill()
757
+ self._is_terminated = True
758
+ self._is_stopped = True
759
+ else:
760
+ # No isolated process -- fall back to cooperative exception.
761
+ super()._handle_kill(info)
762
+
763
+
764
+ # ================================================================
765
+ # Factory
766
+ # ================================================================
767
+
768
+
769
+ def create_isolated_signal_dispatcher(
770
+ agent_id: str,
771
+ **kwargs: Any,
772
+ ) -> IsolatedSignalDispatcher:
773
+ """Factory function for creating isolated signal dispatchers."""
774
+ return IsolatedSignalDispatcher(
775
+ agent_id=agent_id,
776
+ process_manager=kwargs.get("process_manager"),
777
+ )