agentsec-eval 0.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentsec/__init__.py +1 -0
- agentsec/adapters/__init__.py +4 -0
- agentsec/adapters/base.py +38 -0
- agentsec/adapters/http.py +101 -0
- agentsec/adapters/openclaw_gateway.py +70 -0
- agentsec/adapters/registry.py +43 -0
- agentsec/adapters/ws_adapter.py +122 -0
- agentsec/audit/__init__.py +7 -0
- agentsec/audit/args_schema.py +134 -0
- agentsec/audit/authorization.py +160 -0
- agentsec/audit/checks/__init__.py +0 -0
- agentsec/audit/checks/active_test.py +156 -0
- agentsec/audit/checks/base.py +103 -0
- agentsec/audit/checks/config_audit.py +98 -0
- agentsec/audit/checks/config_baseline.yaml +20 -0
- agentsec/audit/checks/credential_audit.py +117 -0
- agentsec/audit/checks/data/active-test-canary.yaml +11 -0
- agentsec/audit/checks/exposure_scan.py +131 -0
- agentsec/audit/checks/filesystem.py +61 -0
- agentsec/audit/checks/log_review.py +116 -0
- agentsec/audit/checks/native_audit.py +106 -0
- agentsec/audit/checks/plugin_static.py +224 -0
- agentsec/audit/checks/process_forensics.py +106 -0
- agentsec/audit/checks/version_patch.py +204 -0
- agentsec/audit/command_policy.py +113 -0
- agentsec/audit/findings.py +35 -0
- agentsec/audit/ioc/__init__.py +9 -0
- agentsec/audit/ioc/attack_signatures.yaml +34 -0
- agentsec/audit/ioc/clawhavoc_skills.json +37 -0
- agentsec/audit/ioc/cve_database.json +2442 -0
- agentsec/audit/ioc/threat_intel.yaml +2675 -0
- agentsec/audit/ioc/watchlist.yaml +37 -0
- agentsec/audit/ioc_update/__init__.py +1 -0
- agentsec/audit/ioc_update/cli.py +335 -0
- agentsec/audit/ioc_update/fetchers/__init__.py +5 -0
- agentsec/audit/ioc_update/fetchers/base.py +106 -0
- agentsec/audit/ioc_update/fetchers/ghsa.py +145 -0
- agentsec/audit/ioc_update/fetchers/kev.py +120 -0
- agentsec/audit/ioc_update/fetchers/nvd.py +144 -0
- agentsec/audit/ioc_update/id_minter.py +22 -0
- agentsec/audit/ioc_update/merger.py +223 -0
- agentsec/audit/ioc_update/normalizer.py +108 -0
- agentsec/audit/ioc_update/renderer.py +215 -0
- agentsec/audit/ioc_update/types.py +73 -0
- agentsec/audit/ioc_update/watchlist.py +60 -0
- agentsec/audit/metachar_guard.py +21 -0
- agentsec/audit/path_matcher.py +87 -0
- agentsec/audit/platform_profile.py +124 -0
- agentsec/audit/redactor.py +62 -0
- agentsec/audit/remote_home.py +37 -0
- agentsec/audit/runtime_probe.py +85 -0
- agentsec/audit/server_audit.py +41 -0
- agentsec/audit/snapshot.py +54 -0
- agentsec/audit/ssh.py +188 -0
- agentsec/audit/ssh_policy.yaml +134 -0
- agentsec/audit/tunnel.py +117 -0
- agentsec/audit/types.py +22 -0
- agentsec/cli.py +1069 -0
- agentsec/config.py +219 -0
- agentsec/diff/__init__.py +40 -0
- agentsec/diff/findings_delta.py +110 -0
- agentsec/diff/loader.py +115 -0
- agentsec/diff/renderer.py +187 -0
- agentsec/diff/score_delta.py +245 -0
- agentsec/evaluator/__init__.py +26 -0
- agentsec/evaluator/assertions/__init__.py +28 -0
- agentsec/evaluator/assertions/base.py +20 -0
- agentsec/evaluator/assertions/config_key_not_changed.py +134 -0
- agentsec/evaluator/assertions/file_not_created.py +49 -0
- agentsec/evaluator/assertions/json_path_equals.py +55 -0
- agentsec/evaluator/assertions/outbound_request_not_to.py +34 -0
- agentsec/evaluator/assertions/registry.py +39 -0
- agentsec/evaluator/assertions/response_not_contains_pattern.py +49 -0
- agentsec/evaluator/assertions/response_status_in.py +40 -0
- agentsec/evaluator/assertions/tool_event_not_invoked.py +65 -0
- agentsec/evaluator/base.py +46 -0
- agentsec/evaluator/deterministic_judge.py +78 -0
- agentsec/evaluator/hybrid_judge.py +87 -0
- agentsec/evaluator/judge.py +96 -0
- agentsec/evaluator/judge_factory.py +96 -0
- agentsec/evaluator/judge_router.py +23 -0
- agentsec/evaluator/no_op_judge.py +37 -0
- agentsec/evaluator/openai_judge.py +101 -0
- agentsec/evaluator/plugin_judge.py +38 -0
- agentsec/observability/__init__.py +19 -0
- agentsec/observability/fixture_server.py +190 -0
- agentsec/observability/network_observer.py +144 -0
- agentsec/observability/observation.py +22 -0
- agentsec/observability/runtime.py +99 -0
- agentsec/reports/__init__.py +17 -0
- agentsec/reports/json_report.py +54 -0
- agentsec/reports/markdown.py +397 -0
- agentsec/reports/multi_summary.py +89 -0
- agentsec/runner.py +245 -0
- agentsec/scoring.py +430 -0
- agentsec/serve/__init__.py +0 -0
- agentsec/serve/app.py +115 -0
- agentsec/serve/reader.py +127 -0
- agentsec/serve/templates/base.html +41 -0
- agentsec/serve/templates/dashboard.html +31 -0
- agentsec/serve/templates/report.html +13 -0
- agentsec/serve/templates/target.html +132 -0
- agentsec/suite_registry/__init__.py +54 -0
- agentsec/suite_registry/community-suites.yaml +9 -0
- agentsec/suite_registry/errors.py +49 -0
- agentsec/suite_registry/fetcher.py +49 -0
- agentsec/suite_registry/hashing.py +40 -0
- agentsec/suite_registry/installer.py +191 -0
- agentsec/suite_registry/manifest.py +138 -0
- agentsec/suite_registry/registry.py +94 -0
- agentsec/suite_registry/spdx_licenses.txt +30 -0
- agentsec/suite_registry/store.py +108 -0
- agentsec/tests/__init__.py +4 -0
- agentsec/tests/loader.py +103 -0
- agentsec/tests/models.py +118 -0
- agentsec_eval-0.9.1.dist-info/METADATA +653 -0
- agentsec_eval-0.9.1.dist-info/RECORD +120 -0
- agentsec_eval-0.9.1.dist-info/WHEEL +4 -0
- agentsec_eval-0.9.1.dist-info/entry_points.txt +2 -0
- agentsec_eval-0.9.1.dist-info/licenses/LICENSE +21 -0
agentsec/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""AgentSec — Security assessment framework for AI agents."""
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class AgentResponse:
|
|
7
|
+
"""Response from a target agent.
|
|
8
|
+
|
|
9
|
+
Adapters MUST NOT raise on non-2xx HTTP; populate `status_code` / `error`
|
|
10
|
+
and let the judge / assertions decide pass/fail (spec §4, OE-AUD2-002).
|
|
11
|
+
|
|
12
|
+
`tool_events` carries adapter-extracted tool invocations in a uniform
|
|
13
|
+
`[{"tool": str, "args": str}, ...]` shape so `tool_event_not_invoked`
|
|
14
|
+
can fire on whatever the underlying agent reports — OpenAI-compatible
|
|
15
|
+
`choices[].message.tool_calls` for `OpenClawGatewayAdapter`, future
|
|
16
|
+
SSH/native-audit observers for Stage D adapters.
|
|
17
|
+
"""
|
|
18
|
+
content: str
|
|
19
|
+
raw: dict = field(default_factory=dict)
|
|
20
|
+
latency_ms: float = 0.0
|
|
21
|
+
status_code: int | None = None
|
|
22
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
23
|
+
request_url: str | None = None
|
|
24
|
+
error: str | None = None
|
|
25
|
+
tool_events: list[dict] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AgentAdapter(ABC):
|
|
29
|
+
"""Base interface every target-agent adapter must implement."""
|
|
30
|
+
|
|
31
|
+
name: str = "unnamed"
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
async def send(self, message: str, session_id: str) -> AgentResponse:
|
|
35
|
+
"""Send a message to the agent and return its response."""
|
|
36
|
+
|
|
37
|
+
async def reset(self, session_id: str) -> None:
|
|
38
|
+
"""Reset / clear the agent's session state. Override if supported."""
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Generic HTTP adapter — covers agents that expose a simple chat REST API."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from .base import AgentAdapter, AgentResponse
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class HttpAgentAdapter(AgentAdapter):
|
|
12
|
+
"""
|
|
13
|
+
Adapter for agents that expose a JSON-over-HTTP chat endpoint.
|
|
14
|
+
|
|
15
|
+
Default contract:
|
|
16
|
+
POST {base_url}{path}
|
|
17
|
+
Body: {"session_id": "...", "message": "..."}
|
|
18
|
+
Response: {"reply": "..."}
|
|
19
|
+
|
|
20
|
+
Subclasses override `_build_request` / `_parse_response` for non-standard
|
|
21
|
+
schemas. The `path` parameter (default `/chat`) lets the OpenClaw gateway
|
|
22
|
+
adapter target `/v1/chat/completions` without rewriting `send` (spec §5.2).
|
|
23
|
+
|
|
24
|
+
Per OE-AUD2-002: 4xx/5xx do NOT raise; status_code/error are populated
|
|
25
|
+
and the judge decides whether the response is a failure.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
name: str,
|
|
31
|
+
base_url: str,
|
|
32
|
+
path: str = "/chat",
|
|
33
|
+
headers: dict[str, str] | None = None,
|
|
34
|
+
timeout: float = 30.0,
|
|
35
|
+
):
|
|
36
|
+
self.name = name
|
|
37
|
+
self._base_url = base_url.rstrip("/")
|
|
38
|
+
self._path = path if path.startswith("/") else "/" + path
|
|
39
|
+
self._headers = headers or {}
|
|
40
|
+
self._timeout = timeout
|
|
41
|
+
self._client_factory: Callable[[], httpx.AsyncClient] | None = None
|
|
42
|
+
|
|
43
|
+
def _build_request(self, message: str, session_id: str) -> dict:
|
|
44
|
+
return {"session_id": session_id, "message": message}
|
|
45
|
+
|
|
46
|
+
def _parse_response(self, data: dict) -> str:
|
|
47
|
+
return data.get("reply") or data.get("content") or data.get("message") or str(data)
|
|
48
|
+
|
|
49
|
+
def _parse_tool_events(self, data: dict) -> list[dict]:
|
|
50
|
+
"""Subclass hook: extract tool invocations from the parsed JSON body
|
|
51
|
+
into the uniform `[{"tool": str, "args": str}, ...]` shape consumed by
|
|
52
|
+
`tool_event_not_invoked`. Default returns `[]` since the generic body
|
|
53
|
+
schema is content-only."""
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
def _make_client(self) -> httpx.AsyncClient:
|
|
57
|
+
if self._client_factory is not None:
|
|
58
|
+
return self._client_factory()
|
|
59
|
+
return httpx.AsyncClient(timeout=self._timeout)
|
|
60
|
+
|
|
61
|
+
async def send(self, message: str, session_id: str) -> AgentResponse:
|
|
62
|
+
url = f"{self._base_url}{self._path}"
|
|
63
|
+
start = time.monotonic()
|
|
64
|
+
status: int | None = None
|
|
65
|
+
headers: dict[str, str] = {}
|
|
66
|
+
data: dict = {}
|
|
67
|
+
content: str = ""
|
|
68
|
+
error: str | None = None
|
|
69
|
+
tool_events: list[dict] = []
|
|
70
|
+
try:
|
|
71
|
+
async with self._make_client() as client:
|
|
72
|
+
resp = await client.post(
|
|
73
|
+
url,
|
|
74
|
+
json=self._build_request(message, session_id),
|
|
75
|
+
headers=self._headers or None,
|
|
76
|
+
)
|
|
77
|
+
status = resp.status_code
|
|
78
|
+
headers = dict(resp.headers)
|
|
79
|
+
if 200 <= resp.status_code < 300:
|
|
80
|
+
try:
|
|
81
|
+
data = resp.json()
|
|
82
|
+
content = self._parse_response(data)
|
|
83
|
+
tool_events = self._parse_tool_events(data)
|
|
84
|
+
except ValueError as exc:
|
|
85
|
+
error = f"non-JSON 2xx body: {exc}"
|
|
86
|
+
content = resp.text
|
|
87
|
+
else:
|
|
88
|
+
error = f"HTTP {resp.status_code}: {resp.text[:200]}"
|
|
89
|
+
content = resp.text
|
|
90
|
+
except httpx.HTTPError as exc:
|
|
91
|
+
error = f"{type(exc).__name__}: {exc}"
|
|
92
|
+
return AgentResponse(
|
|
93
|
+
content=content,
|
|
94
|
+
raw=data,
|
|
95
|
+
latency_ms=(time.monotonic() - start) * 1000,
|
|
96
|
+
status_code=status,
|
|
97
|
+
headers=headers,
|
|
98
|
+
request_url=url,
|
|
99
|
+
error=error,
|
|
100
|
+
tool_events=tool_events,
|
|
101
|
+
)
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""OpenClaw `/v1/chat/completions` adapter (spec §5.2).
|
|
2
|
+
|
|
3
|
+
OpenClaw exposes an OpenAI-compatible chat-completions endpoint. The body shape
|
|
4
|
+
differs from `HttpAgentAdapter`'s default `{"session_id", "message"}` schema, so
|
|
5
|
+
we override `_build_request` and `_parse_response` while reusing the transport
|
|
6
|
+
and 4xx/5xx handling from the base class (OE-AUD2-002).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .http import HttpAgentAdapter
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class OpenClawGatewayAdapter(HttpAgentAdapter):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
name: str,
|
|
16
|
+
base_url: str,
|
|
17
|
+
headers: dict[str, str] | None = None,
|
|
18
|
+
timeout: float = 30.0,
|
|
19
|
+
):
|
|
20
|
+
super().__init__(
|
|
21
|
+
name=name,
|
|
22
|
+
base_url=base_url,
|
|
23
|
+
path="/v1/chat/completions",
|
|
24
|
+
headers=headers,
|
|
25
|
+
timeout=timeout,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def _build_request(self, message: str, session_id: str) -> dict:
|
|
29
|
+
return {
|
|
30
|
+
"model": "openclaw",
|
|
31
|
+
"messages": [{"role": "user", "content": message}],
|
|
32
|
+
"stream": False,
|
|
33
|
+
"metadata": {"session_id": session_id},
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
def _parse_response(self, data: dict) -> str:
|
|
37
|
+
try:
|
|
38
|
+
return data["choices"][0]["message"]["content"]
|
|
39
|
+
except (KeyError, IndexError, TypeError):
|
|
40
|
+
return str(data)
|
|
41
|
+
|
|
42
|
+
def _parse_tool_events(self, data: dict) -> list[dict]:
|
|
43
|
+
"""Extract OpenAI-style `choices[0].message.tool_calls[*]` into the
|
|
44
|
+
uniform `[{"tool": fn.name, "args": fn.arguments}, ...]` shape. Per
|
|
45
|
+
OE-AUD2-002 a malformed body must not raise — unparseable entries are
|
|
46
|
+
silently skipped so `tool_event_not_invoked` only sees real, named
|
|
47
|
+
invocations."""
|
|
48
|
+
try:
|
|
49
|
+
calls = data["choices"][0]["message"].get("tool_calls") or []
|
|
50
|
+
except (KeyError, IndexError, TypeError):
|
|
51
|
+
return []
|
|
52
|
+
events: list[dict] = []
|
|
53
|
+
for call in calls:
|
|
54
|
+
if not isinstance(call, dict):
|
|
55
|
+
continue
|
|
56
|
+
fn = call.get("function")
|
|
57
|
+
if not isinstance(fn, dict):
|
|
58
|
+
continue
|
|
59
|
+
name = fn.get("name")
|
|
60
|
+
if not name:
|
|
61
|
+
continue
|
|
62
|
+
args = fn.get("arguments")
|
|
63
|
+
events.append({
|
|
64
|
+
"tool": name,
|
|
65
|
+
# OpenAI emits arguments as a JSON-encoded string; pass it
|
|
66
|
+
# through verbatim so `with_args_pattern` can regex against
|
|
67
|
+
# the raw payload without round-tripping through json.
|
|
68
|
+
"args": args if isinstance(args, str) else (args or ""),
|
|
69
|
+
})
|
|
70
|
+
return events
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Adapter registry — resolves CLI/config `adapter` strings to factory callables.
|
|
2
|
+
|
|
3
|
+
Why: keeping construction in a registry means new adapters need no CLI edits and
|
|
4
|
+
can be tested in isolation. Spec §5.2.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
|
|
9
|
+
from .base import AgentAdapter
|
|
10
|
+
|
|
11
|
+
AdapterFactory = Callable[..., AgentAdapter]
|
|
12
|
+
|
|
13
|
+
_REGISTRY: dict[str, AdapterFactory] = {}
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def register(name: str, factory: AdapterFactory) -> None:
|
|
17
|
+
if name in _REGISTRY:
|
|
18
|
+
raise ValueError(f"adapter {name!r} is already registered")
|
|
19
|
+
_REGISTRY[name] = factory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get(name: str) -> AdapterFactory:
|
|
23
|
+
if name not in _REGISTRY:
|
|
24
|
+
raise KeyError(
|
|
25
|
+
f"unknown adapter {name!r}; available: {sorted(_REGISTRY)}"
|
|
26
|
+
)
|
|
27
|
+
return _REGISTRY[name]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def available() -> list[str]:
|
|
31
|
+
return sorted(_REGISTRY)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _register_builtins() -> None:
|
|
35
|
+
# Imports kept local to avoid circular imports at module load.
|
|
36
|
+
from .http import HttpAgentAdapter
|
|
37
|
+
from .openclaw_gateway import OpenClawGatewayAdapter
|
|
38
|
+
|
|
39
|
+
register("http", HttpAgentAdapter)
|
|
40
|
+
register("openclaw-gateway", OpenClawGatewayAdapter)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
_register_builtins()
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""WebSocket agent adapter — sends messages and listens for responses over WS.
|
|
2
|
+
|
|
3
|
+
Session correlation: the session_id is appended as a query parameter so the
|
|
4
|
+
server can tie the WS connection to the same logical session as HTTP calls.
|
|
5
|
+
|
|
6
|
+
MockWebSocketAdapter overrides send() and listen() without importing websockets,
|
|
7
|
+
so tests run without a real WebSocket server.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
from .base import AgentAdapter, AgentResponse
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WebSocketAgentAdapter(AgentAdapter):
|
|
17
|
+
"""Adapter that communicates with an agent over WebSocket.
|
|
18
|
+
|
|
19
|
+
URL convention: ws_url?session_id=<session_id>
|
|
20
|
+
Send payload: {"message": "<text>"}
|
|
21
|
+
Receive: first text frame is the agent reply
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
name = "websocket"
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
ws_url: str,
|
|
29
|
+
extra_headers: dict[str, str] | None = None,
|
|
30
|
+
connect_timeout: float = 10.0,
|
|
31
|
+
recv_timeout: float = 30.0,
|
|
32
|
+
):
|
|
33
|
+
self._ws_url = ws_url.rstrip("/")
|
|
34
|
+
self._extra_headers = extra_headers or {}
|
|
35
|
+
self._connect_timeout = connect_timeout
|
|
36
|
+
self._recv_timeout = recv_timeout
|
|
37
|
+
|
|
38
|
+
def _session_url(self, session_id: str) -> str:
|
|
39
|
+
sep = "&" if "?" in self._ws_url else "?"
|
|
40
|
+
return f"{self._ws_url}{sep}session_id={session_id}"
|
|
41
|
+
|
|
42
|
+
async def send(self, message: str, session_id: str) -> AgentResponse:
|
|
43
|
+
import json
|
|
44
|
+
|
|
45
|
+
import websockets # lazy import — only needed for real connections
|
|
46
|
+
|
|
47
|
+
url = self._session_url(session_id)
|
|
48
|
+
start = time.monotonic()
|
|
49
|
+
try:
|
|
50
|
+
async with asyncio.timeout(self._connect_timeout + self._recv_timeout):
|
|
51
|
+
async with websockets.connect(url, additional_headers=self._extra_headers) as ws:
|
|
52
|
+
await ws.send(json.dumps({"message": message}))
|
|
53
|
+
raw_text = await asyncio.wait_for(ws.recv(), timeout=self._recv_timeout)
|
|
54
|
+
except TimeoutError:
|
|
55
|
+
return AgentResponse(content="", error="ws_timeout",
|
|
56
|
+
latency_ms=(time.monotonic() - start) * 1000)
|
|
57
|
+
except Exception as exc: # noqa: BLE001
|
|
58
|
+
return AgentResponse(content="", error=f"ws_connect_failed: {exc}",
|
|
59
|
+
latency_ms=(time.monotonic() - start) * 1000)
|
|
60
|
+
|
|
61
|
+
return AgentResponse(
|
|
62
|
+
content=raw_text if isinstance(raw_text, str) else raw_text.decode(),
|
|
63
|
+
latency_ms=(time.monotonic() - start) * 1000,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def listen(self, session_id: str, duration_s: float) -> str:
|
|
67
|
+
"""Connect and collect all incoming WS messages for `duration_s` seconds."""
|
|
68
|
+
import websockets # lazy import
|
|
69
|
+
|
|
70
|
+
url = self._session_url(session_id)
|
|
71
|
+
messages: list[str] = []
|
|
72
|
+
try:
|
|
73
|
+
async with asyncio.timeout(duration_s + self._connect_timeout):
|
|
74
|
+
async with websockets.connect(url, additional_headers=self._extra_headers) as ws:
|
|
75
|
+
loop = asyncio.get_running_loop()
|
|
76
|
+
deadline = loop.time() + duration_s
|
|
77
|
+
while True:
|
|
78
|
+
remaining = deadline - loop.time()
|
|
79
|
+
if remaining <= 0:
|
|
80
|
+
break
|
|
81
|
+
try:
|
|
82
|
+
frame = await asyncio.wait_for(ws.recv(), timeout=remaining)
|
|
83
|
+
messages.append(frame if isinstance(frame, str) else frame.decode())
|
|
84
|
+
except TimeoutError:
|
|
85
|
+
break
|
|
86
|
+
except Exception: # noqa: BLE001 — connection errors produce empty output
|
|
87
|
+
pass
|
|
88
|
+
return "\n".join(messages)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class MockWebSocketAdapter(WebSocketAgentAdapter):
|
|
92
|
+
"""Test double — no real WebSocket connection.
|
|
93
|
+
|
|
94
|
+
Usage:
|
|
95
|
+
mock = MockWebSocketAdapter(send_reply="Safe response", listen_data="no leak")
|
|
96
|
+
result = await mock.send("attack", "session-1")
|
|
97
|
+
assert result.content == "Safe response"
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(
|
|
101
|
+
self,
|
|
102
|
+
send_reply: str = "",
|
|
103
|
+
listen_data: str = "",
|
|
104
|
+
send_error: str | None = None,
|
|
105
|
+
):
|
|
106
|
+
super().__init__(ws_url="ws://mock.invalid")
|
|
107
|
+
self._send_reply = send_reply
|
|
108
|
+
self._listen_data = listen_data
|
|
109
|
+
self._send_error = send_error
|
|
110
|
+
self.send_calls: list[tuple[str, str]] = []
|
|
111
|
+
self.listen_calls: list[tuple[str, float]] = []
|
|
112
|
+
|
|
113
|
+
async def send(self, message: str, session_id: str) -> AgentResponse:
|
|
114
|
+
self.send_calls.append((message, session_id))
|
|
115
|
+
return AgentResponse(
|
|
116
|
+
content=self._send_reply,
|
|
117
|
+
error=self._send_error,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
async def listen(self, session_id: str, duration_s: float) -> str:
|
|
121
|
+
self.listen_calls.append((session_id, duration_s))
|
|
122
|
+
return self._listen_data
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
"""Per-command args-schema validation (spec §6.1).
|
|
2
|
+
|
|
3
|
+
Three role types in args_schema entries:
|
|
4
|
+
- "flags": list of allowed/forbidden short/long flags. Flags listed in
|
|
5
|
+
``value_taking`` consume the next argv token as their value
|
|
6
|
+
(e.g. ``-n 5`` for ``head``); without ``value_taking`` the
|
|
7
|
+
value falls through to the next schema entry and gets matched
|
|
8
|
+
as a positional path.
|
|
9
|
+
- "positional": one positional arg with kind={path, pattern}
|
|
10
|
+
- "predicate_pairs": (used by find) -name VALUE / -mtime VALUE pairs
|
|
11
|
+
|
|
12
|
+
Plus an alternative top-level form `allowed_argv: list[list[str]]` for
|
|
13
|
+
commands whose entire invocation is enumerated explicitly (uname, ps, etc.).
|
|
14
|
+
|
|
15
|
+
When a schema entry has must_match_allowed_paths=True, the corresponding
|
|
16
|
+
token is checked through the PathMatcher.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from typing import Any
|
|
22
|
+
|
|
23
|
+
from .path_matcher import PathMatcher
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def validate_args(
|
|
27
|
+
command: str,
|
|
28
|
+
argv: list[str],
|
|
29
|
+
schema: dict[str, Any],
|
|
30
|
+
matcher: PathMatcher | None,
|
|
31
|
+
) -> str | None:
|
|
32
|
+
"""Return None if argv satisfies the schema; else return a reason string."""
|
|
33
|
+
if not schema:
|
|
34
|
+
return f"command {command!r} not in policy"
|
|
35
|
+
|
|
36
|
+
if "allowed_argv" in schema:
|
|
37
|
+
return _check_allowed_argv(command, argv, schema["allowed_argv"], matcher)
|
|
38
|
+
|
|
39
|
+
if "args_schema" in schema:
|
|
40
|
+
return _check_args_schema(command, argv, schema["args_schema"], matcher)
|
|
41
|
+
|
|
42
|
+
return f"policy entry for {command!r} has neither allowed_argv nor args_schema"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _check_allowed_argv(
|
|
46
|
+
command: str,
|
|
47
|
+
argv: list[str],
|
|
48
|
+
allowed: list[list[str]],
|
|
49
|
+
matcher: PathMatcher | None,
|
|
50
|
+
) -> str | None:
|
|
51
|
+
for template in allowed:
|
|
52
|
+
if len(template) != len(argv):
|
|
53
|
+
continue
|
|
54
|
+
ok = True
|
|
55
|
+
for tmpl_tok, real_tok in zip(template, argv):
|
|
56
|
+
if tmpl_tok == "<allowed_path>":
|
|
57
|
+
if matcher is None or matcher.matches(real_tok) is not None:
|
|
58
|
+
ok = False
|
|
59
|
+
break
|
|
60
|
+
elif tmpl_tok != real_tok:
|
|
61
|
+
ok = False
|
|
62
|
+
break
|
|
63
|
+
if ok:
|
|
64
|
+
return None
|
|
65
|
+
return f"argv {argv!r} does not match any allowed_argv for {command!r}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _check_args_schema(
|
|
69
|
+
command: str,
|
|
70
|
+
argv: list[str],
|
|
71
|
+
schema: list[dict[str, Any]],
|
|
72
|
+
matcher: PathMatcher | None,
|
|
73
|
+
) -> str | None:
|
|
74
|
+
if not argv or argv[0] != command:
|
|
75
|
+
return f"argv[0]={argv[0]!r} does not match command {command!r}"
|
|
76
|
+
rest = argv[1:]
|
|
77
|
+
cursor = 0
|
|
78
|
+
|
|
79
|
+
for entry in schema:
|
|
80
|
+
role = entry.get("role")
|
|
81
|
+
if role == "flags":
|
|
82
|
+
allowed = set(entry.get("allowed", []))
|
|
83
|
+
forbidden = set(entry.get("forbidden", []))
|
|
84
|
+
value_taking = set(entry.get("value_taking", []))
|
|
85
|
+
while cursor < len(rest) and rest[cursor].startswith("-"):
|
|
86
|
+
tok = rest[cursor]
|
|
87
|
+
if tok in forbidden:
|
|
88
|
+
return f"flag {tok!r} is forbidden for {command!r}"
|
|
89
|
+
if tok not in allowed:
|
|
90
|
+
return f"flag {tok!r} is not in allowed list for {command!r}"
|
|
91
|
+
cursor += 1
|
|
92
|
+
if tok in value_taking:
|
|
93
|
+
# Consume the flag's value (e.g. "5" after "-n", "%y"
|
|
94
|
+
# after "-c"). Without this branch the value would
|
|
95
|
+
# fall through to the next schema entry and get
|
|
96
|
+
# matched as a positional path, producing a
|
|
97
|
+
# misleading "path '5' not in allowed_paths" reject.
|
|
98
|
+
if cursor >= len(rest):
|
|
99
|
+
return (
|
|
100
|
+
f"flag {tok!r} requires a value but argv ended for {command!r}"
|
|
101
|
+
)
|
|
102
|
+
cursor += 1
|
|
103
|
+
elif role == "positional":
|
|
104
|
+
if cursor >= len(rest):
|
|
105
|
+
if entry.get("required", False):
|
|
106
|
+
return f"missing required positional for {command!r}"
|
|
107
|
+
continue
|
|
108
|
+
tok = rest[cursor]
|
|
109
|
+
cursor += 1
|
|
110
|
+
if entry.get("kind") == "path" and entry.get("must_match_allowed_paths"):
|
|
111
|
+
if matcher is None:
|
|
112
|
+
return "matcher required for path positional"
|
|
113
|
+
reason = matcher.matches(tok)
|
|
114
|
+
if reason is not None:
|
|
115
|
+
return reason
|
|
116
|
+
elif role == "predicate_pairs":
|
|
117
|
+
allowed = set(entry.get("allowed_predicates", []))
|
|
118
|
+
forbidden = set(entry.get("forbidden_predicates", []))
|
|
119
|
+
while cursor < len(rest):
|
|
120
|
+
pred = rest[cursor]
|
|
121
|
+
if pred in forbidden:
|
|
122
|
+
return f"predicate {pred!r} is forbidden for {command!r}"
|
|
123
|
+
if pred not in allowed:
|
|
124
|
+
return f"predicate {pred!r} is not allowed for {command!r}"
|
|
125
|
+
cursor += 1
|
|
126
|
+
if cursor >= len(rest):
|
|
127
|
+
return f"predicate {pred!r} missing value"
|
|
128
|
+
cursor += 1 # consume the value
|
|
129
|
+
else:
|
|
130
|
+
return f"unknown args_schema role {role!r}"
|
|
131
|
+
|
|
132
|
+
if cursor != len(rest):
|
|
133
|
+
return f"unexpected trailing argv tokens: {rest[cursor:]!r}"
|
|
134
|
+
return None
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""AUTHORIZATION.txt parser + validator (spec §6.4).
|
|
2
|
+
|
|
3
|
+
This file is the only thing standing between AgentSec and unauthorized
|
|
4
|
+
execution against a real OpenClaw deployment. Any change must keep the
|
|
5
|
+
canonicalization byte-for-byte consistent with the spec — a one-character
|
|
6
|
+
drift breaks every previously-signed AUTHORIZATION.txt in the wild.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import base64
|
|
12
|
+
import hashlib
|
|
13
|
+
import hmac as _hmac
|
|
14
|
+
import os
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Literal
|
|
18
|
+
|
|
19
|
+
import yaml
|
|
20
|
+
from pydantic import BaseModel, Field
|
|
21
|
+
|
|
22
|
+
SignatureMode = Literal["hmac_sha256", "none"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AuthorizationError(Exception):
|
|
26
|
+
"""Raised when AUTHORIZATION.txt is malformed, missing fields, or fails validation."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Authorization(BaseModel):
|
|
30
|
+
target_host: str
|
|
31
|
+
authorized_by: str
|
|
32
|
+
identity_provider: str = ""
|
|
33
|
+
identity_assertion: str = ""
|
|
34
|
+
valid_from: datetime
|
|
35
|
+
valid_until: datetime
|
|
36
|
+
scope: list[str]
|
|
37
|
+
report_output_path_prefix: str
|
|
38
|
+
signature_mode: SignatureMode = "none"
|
|
39
|
+
signature: str | None = None
|
|
40
|
+
signature_key_env: str | None = None
|
|
41
|
+
|
|
42
|
+
low_assurance: bool = Field(default=False, exclude=True)
|
|
43
|
+
|
|
44
|
+
def compute_signature(self, key: bytes) -> str:
|
|
45
|
+
msg = canonical_message(self).encode("utf-8")
|
|
46
|
+
return base64.b64encode(_hmac.new(key, msg, hashlib.sha256).digest()).decode("ascii")
|
|
47
|
+
|
|
48
|
+
def validate(
|
|
49
|
+
self,
|
|
50
|
+
*,
|
|
51
|
+
target_host: str,
|
|
52
|
+
report_output_path_prefix: str,
|
|
53
|
+
required_scopes: list[str],
|
|
54
|
+
now: datetime | None = None,
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Run the seven-step chain from spec §6.4. Raises AuthorizationError on
|
|
57
|
+
the first failing step. On success, sets self.low_assurance based on
|
|
58
|
+
signature_mode and identity_assertion presence.
|
|
59
|
+
|
|
60
|
+
Steps 1-2 (file readable, YAML parses) ran in `load()`; this method
|
|
61
|
+
covers steps 3-7.
|
|
62
|
+
"""
|
|
63
|
+
if self.target_host != target_host:
|
|
64
|
+
raise AuthorizationError(
|
|
65
|
+
f"target_host mismatch: AUTHORIZATION.txt={self.target_host!r}, "
|
|
66
|
+
f"requested={target_host!r}"
|
|
67
|
+
)
|
|
68
|
+
now = now or datetime.now(timezone.utc)
|
|
69
|
+
if not (self.valid_from <= now <= self.valid_until):
|
|
70
|
+
raise AuthorizationError(
|
|
71
|
+
f"current time {now.isoformat()} outside valid window "
|
|
72
|
+
f"[{self.valid_from.isoformat()}, {self.valid_until.isoformat()}]"
|
|
73
|
+
)
|
|
74
|
+
missing = [s for s in required_scopes if s not in self.scope]
|
|
75
|
+
if missing:
|
|
76
|
+
raise AuthorizationError(f"scope missing required capabilities: {missing}")
|
|
77
|
+
# Path-equality compare so `./report-2026-04-27/` (the form documented
|
|
78
|
+
# in AUTHORIZATION.txt.example and CLAUDE.md) matches the prefix the
|
|
79
|
+
# CLI builds from a Path-typed --output flag, which normalizes away
|
|
80
|
+
# `./` and the trailing slash.
|
|
81
|
+
if Path(self.report_output_path_prefix) != Path(report_output_path_prefix):
|
|
82
|
+
raise AuthorizationError(
|
|
83
|
+
f"report_output_path_prefix mismatch: "
|
|
84
|
+
f"AUTHORIZATION.txt={self.report_output_path_prefix!r}, "
|
|
85
|
+
f"requested={report_output_path_prefix!r}"
|
|
86
|
+
)
|
|
87
|
+
if self.signature_mode == "hmac_sha256":
|
|
88
|
+
if not self.signature_key_env:
|
|
89
|
+
raise AuthorizationError(
|
|
90
|
+
"signature_mode=hmac_sha256 requires signature_key_env"
|
|
91
|
+
)
|
|
92
|
+
key_str = os.environ.get(self.signature_key_env)
|
|
93
|
+
if not key_str:
|
|
94
|
+
raise AuthorizationError(
|
|
95
|
+
f"env var {self.signature_key_env!r} for signing key is empty"
|
|
96
|
+
)
|
|
97
|
+
if not self.signature:
|
|
98
|
+
raise AuthorizationError(
|
|
99
|
+
"signature_mode=hmac_sha256 requires signature field"
|
|
100
|
+
)
|
|
101
|
+
expected = self.compute_signature(key_str.encode("utf-8"))
|
|
102
|
+
if not _hmac.compare_digest(expected, self.signature):
|
|
103
|
+
raise AuthorizationError("signature does not match (HMAC verification failed)")
|
|
104
|
+
|
|
105
|
+
self.low_assurance = (
|
|
106
|
+
self.signature_mode == "none" or not self.identity_assertion
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def load(cls, path: Path | str) -> "Authorization":
|
|
111
|
+
p = Path(path)
|
|
112
|
+
try:
|
|
113
|
+
raw = p.read_text(encoding="utf-8")
|
|
114
|
+
except OSError as e:
|
|
115
|
+
raise AuthorizationError(f"cannot read {p}: {e}") from e
|
|
116
|
+
try:
|
|
117
|
+
data = yaml.safe_load(raw)
|
|
118
|
+
except yaml.YAMLError as e:
|
|
119
|
+
raise AuthorizationError(f"cannot parse {p} as YAML: {e}") from e
|
|
120
|
+
if not isinstance(data, dict):
|
|
121
|
+
raise AuthorizationError(f"{p}: top level must be a mapping")
|
|
122
|
+
try:
|
|
123
|
+
auth = cls.model_validate(data)
|
|
124
|
+
except Exception as e:
|
|
125
|
+
raise AuthorizationError(f"{p}: {e}") from e
|
|
126
|
+
# Pydantic v2 leaves datetimes without a tz suffix naive; comparing
|
|
127
|
+
# naive ↔ aware in validate() then raises a bare TypeError that the
|
|
128
|
+
# CLI doesn't catch, so a missing `Z` would surface as a Python
|
|
129
|
+
# stack trace instead of the friendly AuthorizationError every other
|
|
130
|
+
# rejection branch produces. Reject early with a clear message.
|
|
131
|
+
for field in ("valid_from", "valid_until"):
|
|
132
|
+
value = getattr(auth, field)
|
|
133
|
+
if value.tzinfo is None or value.tzinfo.utcoffset(value) is None:
|
|
134
|
+
raise AuthorizationError(
|
|
135
|
+
f"{p}: {field} must include a UTC offset "
|
|
136
|
+
f"(e.g. 2026-04-27T00:00:00Z), got naive datetime "
|
|
137
|
+
f"{value.isoformat()}"
|
|
138
|
+
)
|
|
139
|
+
return auth
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def canonical_message(auth: "Authorization") -> str:
|
|
143
|
+
"""Build the canonical-form string for HMAC signing (spec §6.4).
|
|
144
|
+
|
|
145
|
+
Field order is FIXED. Datetimes are serialized as ISO-8601 with explicit
|
|
146
|
+
timezone offset (the spec uses 'Z' but datetime.isoformat() emits
|
|
147
|
+
'+00:00'; both are equivalent and the spec's wording allows either as
|
|
148
|
+
long as the implementation is consistent — we standardize on '+00:00'
|
|
149
|
+
so all signatures verify against the same canonical bytes).
|
|
150
|
+
"""
|
|
151
|
+
return "\n".join([
|
|
152
|
+
auth.target_host,
|
|
153
|
+
auth.authorized_by,
|
|
154
|
+
auth.identity_provider,
|
|
155
|
+
auth.identity_assertion,
|
|
156
|
+
auth.valid_from.isoformat(),
|
|
157
|
+
auth.valid_until.isoformat(),
|
|
158
|
+
",".join(sorted(auth.scope)),
|
|
159
|
+
auth.report_output_path_prefix,
|
|
160
|
+
])
|