agentsec-eval 0.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. agentsec/__init__.py +1 -0
  2. agentsec/adapters/__init__.py +4 -0
  3. agentsec/adapters/base.py +38 -0
  4. agentsec/adapters/http.py +101 -0
  5. agentsec/adapters/openclaw_gateway.py +70 -0
  6. agentsec/adapters/registry.py +43 -0
  7. agentsec/adapters/ws_adapter.py +122 -0
  8. agentsec/audit/__init__.py +7 -0
  9. agentsec/audit/args_schema.py +134 -0
  10. agentsec/audit/authorization.py +160 -0
  11. agentsec/audit/checks/__init__.py +0 -0
  12. agentsec/audit/checks/active_test.py +156 -0
  13. agentsec/audit/checks/base.py +103 -0
  14. agentsec/audit/checks/config_audit.py +98 -0
  15. agentsec/audit/checks/config_baseline.yaml +20 -0
  16. agentsec/audit/checks/credential_audit.py +117 -0
  17. agentsec/audit/checks/data/active-test-canary.yaml +11 -0
  18. agentsec/audit/checks/exposure_scan.py +131 -0
  19. agentsec/audit/checks/filesystem.py +61 -0
  20. agentsec/audit/checks/log_review.py +116 -0
  21. agentsec/audit/checks/native_audit.py +106 -0
  22. agentsec/audit/checks/plugin_static.py +224 -0
  23. agentsec/audit/checks/process_forensics.py +106 -0
  24. agentsec/audit/checks/version_patch.py +204 -0
  25. agentsec/audit/command_policy.py +113 -0
  26. agentsec/audit/findings.py +35 -0
  27. agentsec/audit/ioc/__init__.py +9 -0
  28. agentsec/audit/ioc/attack_signatures.yaml +34 -0
  29. agentsec/audit/ioc/clawhavoc_skills.json +37 -0
  30. agentsec/audit/ioc/cve_database.json +2442 -0
  31. agentsec/audit/ioc/threat_intel.yaml +2675 -0
  32. agentsec/audit/ioc/watchlist.yaml +37 -0
  33. agentsec/audit/ioc_update/__init__.py +1 -0
  34. agentsec/audit/ioc_update/cli.py +335 -0
  35. agentsec/audit/ioc_update/fetchers/__init__.py +5 -0
  36. agentsec/audit/ioc_update/fetchers/base.py +106 -0
  37. agentsec/audit/ioc_update/fetchers/ghsa.py +145 -0
  38. agentsec/audit/ioc_update/fetchers/kev.py +120 -0
  39. agentsec/audit/ioc_update/fetchers/nvd.py +144 -0
  40. agentsec/audit/ioc_update/id_minter.py +22 -0
  41. agentsec/audit/ioc_update/merger.py +223 -0
  42. agentsec/audit/ioc_update/normalizer.py +108 -0
  43. agentsec/audit/ioc_update/renderer.py +215 -0
  44. agentsec/audit/ioc_update/types.py +73 -0
  45. agentsec/audit/ioc_update/watchlist.py +60 -0
  46. agentsec/audit/metachar_guard.py +21 -0
  47. agentsec/audit/path_matcher.py +87 -0
  48. agentsec/audit/platform_profile.py +124 -0
  49. agentsec/audit/redactor.py +62 -0
  50. agentsec/audit/remote_home.py +37 -0
  51. agentsec/audit/runtime_probe.py +85 -0
  52. agentsec/audit/server_audit.py +41 -0
  53. agentsec/audit/snapshot.py +54 -0
  54. agentsec/audit/ssh.py +188 -0
  55. agentsec/audit/ssh_policy.yaml +134 -0
  56. agentsec/audit/tunnel.py +117 -0
  57. agentsec/audit/types.py +22 -0
  58. agentsec/cli.py +1069 -0
  59. agentsec/config.py +219 -0
  60. agentsec/diff/__init__.py +40 -0
  61. agentsec/diff/findings_delta.py +110 -0
  62. agentsec/diff/loader.py +115 -0
  63. agentsec/diff/renderer.py +187 -0
  64. agentsec/diff/score_delta.py +245 -0
  65. agentsec/evaluator/__init__.py +26 -0
  66. agentsec/evaluator/assertions/__init__.py +28 -0
  67. agentsec/evaluator/assertions/base.py +20 -0
  68. agentsec/evaluator/assertions/config_key_not_changed.py +134 -0
  69. agentsec/evaluator/assertions/file_not_created.py +49 -0
  70. agentsec/evaluator/assertions/json_path_equals.py +55 -0
  71. agentsec/evaluator/assertions/outbound_request_not_to.py +34 -0
  72. agentsec/evaluator/assertions/registry.py +39 -0
  73. agentsec/evaluator/assertions/response_not_contains_pattern.py +49 -0
  74. agentsec/evaluator/assertions/response_status_in.py +40 -0
  75. agentsec/evaluator/assertions/tool_event_not_invoked.py +65 -0
  76. agentsec/evaluator/base.py +46 -0
  77. agentsec/evaluator/deterministic_judge.py +78 -0
  78. agentsec/evaluator/hybrid_judge.py +87 -0
  79. agentsec/evaluator/judge.py +96 -0
  80. agentsec/evaluator/judge_factory.py +96 -0
  81. agentsec/evaluator/judge_router.py +23 -0
  82. agentsec/evaluator/no_op_judge.py +37 -0
  83. agentsec/evaluator/openai_judge.py +101 -0
  84. agentsec/evaluator/plugin_judge.py +38 -0
  85. agentsec/observability/__init__.py +19 -0
  86. agentsec/observability/fixture_server.py +190 -0
  87. agentsec/observability/network_observer.py +144 -0
  88. agentsec/observability/observation.py +22 -0
  89. agentsec/observability/runtime.py +99 -0
  90. agentsec/reports/__init__.py +17 -0
  91. agentsec/reports/json_report.py +54 -0
  92. agentsec/reports/markdown.py +397 -0
  93. agentsec/reports/multi_summary.py +89 -0
  94. agentsec/runner.py +245 -0
  95. agentsec/scoring.py +430 -0
  96. agentsec/serve/__init__.py +0 -0
  97. agentsec/serve/app.py +115 -0
  98. agentsec/serve/reader.py +127 -0
  99. agentsec/serve/templates/base.html +41 -0
  100. agentsec/serve/templates/dashboard.html +31 -0
  101. agentsec/serve/templates/report.html +13 -0
  102. agentsec/serve/templates/target.html +132 -0
  103. agentsec/suite_registry/__init__.py +54 -0
  104. agentsec/suite_registry/community-suites.yaml +9 -0
  105. agentsec/suite_registry/errors.py +49 -0
  106. agentsec/suite_registry/fetcher.py +49 -0
  107. agentsec/suite_registry/hashing.py +40 -0
  108. agentsec/suite_registry/installer.py +191 -0
  109. agentsec/suite_registry/manifest.py +138 -0
  110. agentsec/suite_registry/registry.py +94 -0
  111. agentsec/suite_registry/spdx_licenses.txt +30 -0
  112. agentsec/suite_registry/store.py +108 -0
  113. agentsec/tests/__init__.py +4 -0
  114. agentsec/tests/loader.py +103 -0
  115. agentsec/tests/models.py +118 -0
  116. agentsec_eval-0.9.1.dist-info/METADATA +653 -0
  117. agentsec_eval-0.9.1.dist-info/RECORD +120 -0
  118. agentsec_eval-0.9.1.dist-info/WHEEL +4 -0
  119. agentsec_eval-0.9.1.dist-info/entry_points.txt +2 -0
  120. agentsec_eval-0.9.1.dist-info/licenses/LICENSE +21 -0
agentsec/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """AgentSec — Security assessment framework for AI agents."""
@@ -0,0 +1,4 @@
1
+ from .base import AgentAdapter, AgentResponse
2
+ from .registry import available, get, register
3
+
4
+ __all__ = ["AgentAdapter", "AgentResponse", "available", "get", "register"]
@@ -0,0 +1,38 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
3
+
4
+
5
+ @dataclass
6
+ class AgentResponse:
7
+ """Response from a target agent.
8
+
9
+ Adapters MUST NOT raise on non-2xx HTTP; populate `status_code` / `error`
10
+ and let the judge / assertions decide pass/fail (spec §4, OE-AUD2-002).
11
+
12
+ `tool_events` carries adapter-extracted tool invocations in a uniform
13
+ `[{"tool": str, "args": str}, ...]` shape so `tool_event_not_invoked`
14
+ can fire on whatever the underlying agent reports — OpenAI-compatible
15
+ `choices[].message.tool_calls` for `OpenClawGatewayAdapter`, future
16
+ SSH/native-audit observers for Stage D adapters.
17
+ """
18
+ content: str
19
+ raw: dict = field(default_factory=dict)
20
+ latency_ms: float = 0.0
21
+ status_code: int | None = None
22
+ headers: dict[str, str] = field(default_factory=dict)
23
+ request_url: str | None = None
24
+ error: str | None = None
25
+ tool_events: list[dict] = field(default_factory=list)
26
+
27
+
28
+ class AgentAdapter(ABC):
29
+ """Base interface every target-agent adapter must implement."""
30
+
31
+ name: str = "unnamed"
32
+
33
+ @abstractmethod
34
+ async def send(self, message: str, session_id: str) -> AgentResponse:
35
+ """Send a message to the agent and return its response."""
36
+
37
+ async def reset(self, session_id: str) -> None:
38
+ """Reset / clear the agent's session state. Override if supported."""
@@ -0,0 +1,101 @@
1
+ """Generic HTTP adapter — covers agents that expose a simple chat REST API."""
2
+
3
+ import time
4
+ from typing import Callable
5
+
6
+ import httpx
7
+
8
+ from .base import AgentAdapter, AgentResponse
9
+
10
+
11
+ class HttpAgentAdapter(AgentAdapter):
12
+ """
13
+ Adapter for agents that expose a JSON-over-HTTP chat endpoint.
14
+
15
+ Default contract:
16
+ POST {base_url}{path}
17
+ Body: {"session_id": "...", "message": "..."}
18
+ Response: {"reply": "..."}
19
+
20
+ Subclasses override `_build_request` / `_parse_response` for non-standard
21
+ schemas. The `path` parameter (default `/chat`) lets the OpenClaw gateway
22
+ adapter target `/v1/chat/completions` without rewriting `send` (spec §5.2).
23
+
24
+ Per OE-AUD2-002: 4xx/5xx do NOT raise; status_code/error are populated
25
+ and the judge decides whether the response is a failure.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ name: str,
31
+ base_url: str,
32
+ path: str = "/chat",
33
+ headers: dict[str, str] | None = None,
34
+ timeout: float = 30.0,
35
+ ):
36
+ self.name = name
37
+ self._base_url = base_url.rstrip("/")
38
+ self._path = path if path.startswith("/") else "/" + path
39
+ self._headers = headers or {}
40
+ self._timeout = timeout
41
+ self._client_factory: Callable[[], httpx.AsyncClient] | None = None
42
+
43
+ def _build_request(self, message: str, session_id: str) -> dict:
44
+ return {"session_id": session_id, "message": message}
45
+
46
+ def _parse_response(self, data: dict) -> str:
47
+ return data.get("reply") or data.get("content") or data.get("message") or str(data)
48
+
49
+ def _parse_tool_events(self, data: dict) -> list[dict]:
50
+ """Subclass hook: extract tool invocations from the parsed JSON body
51
+ into the uniform `[{"tool": str, "args": str}, ...]` shape consumed by
52
+ `tool_event_not_invoked`. Default returns `[]` since the generic body
53
+ schema is content-only."""
54
+ return []
55
+
56
+ def _make_client(self) -> httpx.AsyncClient:
57
+ if self._client_factory is not None:
58
+ return self._client_factory()
59
+ return httpx.AsyncClient(timeout=self._timeout)
60
+
61
+ async def send(self, message: str, session_id: str) -> AgentResponse:
62
+ url = f"{self._base_url}{self._path}"
63
+ start = time.monotonic()
64
+ status: int | None = None
65
+ headers: dict[str, str] = {}
66
+ data: dict = {}
67
+ content: str = ""
68
+ error: str | None = None
69
+ tool_events: list[dict] = []
70
+ try:
71
+ async with self._make_client() as client:
72
+ resp = await client.post(
73
+ url,
74
+ json=self._build_request(message, session_id),
75
+ headers=self._headers or None,
76
+ )
77
+ status = resp.status_code
78
+ headers = dict(resp.headers)
79
+ if 200 <= resp.status_code < 300:
80
+ try:
81
+ data = resp.json()
82
+ content = self._parse_response(data)
83
+ tool_events = self._parse_tool_events(data)
84
+ except ValueError as exc:
85
+ error = f"non-JSON 2xx body: {exc}"
86
+ content = resp.text
87
+ else:
88
+ error = f"HTTP {resp.status_code}: {resp.text[:200]}"
89
+ content = resp.text
90
+ except httpx.HTTPError as exc:
91
+ error = f"{type(exc).__name__}: {exc}"
92
+ return AgentResponse(
93
+ content=content,
94
+ raw=data,
95
+ latency_ms=(time.monotonic() - start) * 1000,
96
+ status_code=status,
97
+ headers=headers,
98
+ request_url=url,
99
+ error=error,
100
+ tool_events=tool_events,
101
+ )
@@ -0,0 +1,70 @@
1
+ """OpenClaw `/v1/chat/completions` adapter (spec §5.2).
2
+
3
+ OpenClaw exposes an OpenAI-compatible chat-completions endpoint. The body shape
4
+ differs from `HttpAgentAdapter`'s default `{"session_id", "message"}` schema, so
5
+ we override `_build_request` and `_parse_response` while reusing the transport
6
+ and 4xx/5xx handling from the base class (OE-AUD2-002).
7
+ """
8
+
9
+ from .http import HttpAgentAdapter
10
+
11
+
12
+ class OpenClawGatewayAdapter(HttpAgentAdapter):
13
+ def __init__(
14
+ self,
15
+ name: str,
16
+ base_url: str,
17
+ headers: dict[str, str] | None = None,
18
+ timeout: float = 30.0,
19
+ ):
20
+ super().__init__(
21
+ name=name,
22
+ base_url=base_url,
23
+ path="/v1/chat/completions",
24
+ headers=headers,
25
+ timeout=timeout,
26
+ )
27
+
28
+ def _build_request(self, message: str, session_id: str) -> dict:
29
+ return {
30
+ "model": "openclaw",
31
+ "messages": [{"role": "user", "content": message}],
32
+ "stream": False,
33
+ "metadata": {"session_id": session_id},
34
+ }
35
+
36
+ def _parse_response(self, data: dict) -> str:
37
+ try:
38
+ return data["choices"][0]["message"]["content"]
39
+ except (KeyError, IndexError, TypeError):
40
+ return str(data)
41
+
42
+ def _parse_tool_events(self, data: dict) -> list[dict]:
43
+ """Extract OpenAI-style `choices[0].message.tool_calls[*]` into the
44
+ uniform `[{"tool": fn.name, "args": fn.arguments}, ...]` shape. Per
45
+ OE-AUD2-002 a malformed body must not raise — unparseable entries are
46
+ silently skipped so `tool_event_not_invoked` only sees real, named
47
+ invocations."""
48
+ try:
49
+ calls = data["choices"][0]["message"].get("tool_calls") or []
50
+ except (KeyError, IndexError, TypeError):
51
+ return []
52
+ events: list[dict] = []
53
+ for call in calls:
54
+ if not isinstance(call, dict):
55
+ continue
56
+ fn = call.get("function")
57
+ if not isinstance(fn, dict):
58
+ continue
59
+ name = fn.get("name")
60
+ if not name:
61
+ continue
62
+ args = fn.get("arguments")
63
+ events.append({
64
+ "tool": name,
65
+ # OpenAI emits arguments as a JSON-encoded string; pass it
66
+ # through verbatim so `with_args_pattern` can regex against
67
+ # the raw payload without round-tripping through json.
68
+ "args": args if isinstance(args, str) else (args or ""),
69
+ })
70
+ return events
@@ -0,0 +1,43 @@
1
+ """Adapter registry — resolves CLI/config `adapter` strings to factory callables.
2
+
3
+ Why: keeping construction in a registry means new adapters need no CLI edits and
4
+ can be tested in isolation. Spec §5.2.
5
+ """
6
+
7
+ from collections.abc import Callable
8
+
9
+ from .base import AgentAdapter
10
+
11
+ AdapterFactory = Callable[..., AgentAdapter]
12
+
13
+ _REGISTRY: dict[str, AdapterFactory] = {}
14
+
15
+
16
+ def register(name: str, factory: AdapterFactory) -> None:
17
+ if name in _REGISTRY:
18
+ raise ValueError(f"adapter {name!r} is already registered")
19
+ _REGISTRY[name] = factory
20
+
21
+
22
+ def get(name: str) -> AdapterFactory:
23
+ if name not in _REGISTRY:
24
+ raise KeyError(
25
+ f"unknown adapter {name!r}; available: {sorted(_REGISTRY)}"
26
+ )
27
+ return _REGISTRY[name]
28
+
29
+
30
+ def available() -> list[str]:
31
+ return sorted(_REGISTRY)
32
+
33
+
34
+ def _register_builtins() -> None:
35
+ # Imports kept local to avoid circular imports at module load.
36
+ from .http import HttpAgentAdapter
37
+ from .openclaw_gateway import OpenClawGatewayAdapter
38
+
39
+ register("http", HttpAgentAdapter)
40
+ register("openclaw-gateway", OpenClawGatewayAdapter)
41
+
42
+
43
+ _register_builtins()
@@ -0,0 +1,122 @@
1
+ """WebSocket agent adapter — sends messages and listens for responses over WS.
2
+
3
+ Session correlation: the session_id is appended as a query parameter so the
4
+ server can tie the WS connection to the same logical session as HTTP calls.
5
+
6
+ MockWebSocketAdapter overrides send() and listen() without importing websockets,
7
+ so tests run without a real WebSocket server.
8
+ """
9
+
10
+ import asyncio
11
+ import time
12
+
13
+ from .base import AgentAdapter, AgentResponse
14
+
15
+
16
+ class WebSocketAgentAdapter(AgentAdapter):
17
+ """Adapter that communicates with an agent over WebSocket.
18
+
19
+ URL convention: ws_url?session_id=<session_id>
20
+ Send payload: {"message": "<text>"}
21
+ Receive: first text frame is the agent reply
22
+ """
23
+
24
+ name = "websocket"
25
+
26
+ def __init__(
27
+ self,
28
+ ws_url: str,
29
+ extra_headers: dict[str, str] | None = None,
30
+ connect_timeout: float = 10.0,
31
+ recv_timeout: float = 30.0,
32
+ ):
33
+ self._ws_url = ws_url.rstrip("/")
34
+ self._extra_headers = extra_headers or {}
35
+ self._connect_timeout = connect_timeout
36
+ self._recv_timeout = recv_timeout
37
+
38
+ def _session_url(self, session_id: str) -> str:
39
+ sep = "&" if "?" in self._ws_url else "?"
40
+ return f"{self._ws_url}{sep}session_id={session_id}"
41
+
42
+ async def send(self, message: str, session_id: str) -> AgentResponse:
43
+ import json
44
+
45
+ import websockets # lazy import — only needed for real connections
46
+
47
+ url = self._session_url(session_id)
48
+ start = time.monotonic()
49
+ try:
50
+ async with asyncio.timeout(self._connect_timeout + self._recv_timeout):
51
+ async with websockets.connect(url, additional_headers=self._extra_headers) as ws:
52
+ await ws.send(json.dumps({"message": message}))
53
+ raw_text = await asyncio.wait_for(ws.recv(), timeout=self._recv_timeout)
54
+ except TimeoutError:
55
+ return AgentResponse(content="", error="ws_timeout",
56
+ latency_ms=(time.monotonic() - start) * 1000)
57
+ except Exception as exc: # noqa: BLE001
58
+ return AgentResponse(content="", error=f"ws_connect_failed: {exc}",
59
+ latency_ms=(time.monotonic() - start) * 1000)
60
+
61
+ return AgentResponse(
62
+ content=raw_text if isinstance(raw_text, str) else raw_text.decode(),
63
+ latency_ms=(time.monotonic() - start) * 1000,
64
+ )
65
+
66
+ async def listen(self, session_id: str, duration_s: float) -> str:
67
+ """Connect and collect all incoming WS messages for `duration_s` seconds."""
68
+ import websockets # lazy import
69
+
70
+ url = self._session_url(session_id)
71
+ messages: list[str] = []
72
+ try:
73
+ async with asyncio.timeout(duration_s + self._connect_timeout):
74
+ async with websockets.connect(url, additional_headers=self._extra_headers) as ws:
75
+ loop = asyncio.get_running_loop()
76
+ deadline = loop.time() + duration_s
77
+ while True:
78
+ remaining = deadline - loop.time()
79
+ if remaining <= 0:
80
+ break
81
+ try:
82
+ frame = await asyncio.wait_for(ws.recv(), timeout=remaining)
83
+ messages.append(frame if isinstance(frame, str) else frame.decode())
84
+ except TimeoutError:
85
+ break
86
+ except Exception: # noqa: BLE001 — connection errors produce empty output
87
+ pass
88
+ return "\n".join(messages)
89
+
90
+
91
+ class MockWebSocketAdapter(WebSocketAgentAdapter):
92
+ """Test double — no real WebSocket connection.
93
+
94
+ Usage:
95
+ mock = MockWebSocketAdapter(send_reply="Safe response", listen_data="no leak")
96
+ result = await mock.send("attack", "session-1")
97
+ assert result.content == "Safe response"
98
+ """
99
+
100
+ def __init__(
101
+ self,
102
+ send_reply: str = "",
103
+ listen_data: str = "",
104
+ send_error: str | None = None,
105
+ ):
106
+ super().__init__(ws_url="ws://mock.invalid")
107
+ self._send_reply = send_reply
108
+ self._listen_data = listen_data
109
+ self._send_error = send_error
110
+ self.send_calls: list[tuple[str, str]] = []
111
+ self.listen_calls: list[tuple[str, float]] = []
112
+
113
+ async def send(self, message: str, session_id: str) -> AgentResponse:
114
+ self.send_calls.append((message, session_id))
115
+ return AgentResponse(
116
+ content=self._send_reply,
117
+ error=self._send_error,
118
+ )
119
+
120
+ async def listen(self, session_id: str, duration_s: float) -> str:
121
+ self.listen_calls.append((session_id, duration_s))
122
+ return self._listen_data
@@ -0,0 +1,7 @@
1
+ """Audit module — SSH policy, redaction, server-audit checks."""
2
+
3
+ from .findings import Finding, Severity
4
+ from .redactor import Redactor
5
+ from .types import RunResult
6
+
7
+ __all__ = ["Finding", "Redactor", "RunResult", "Severity"]
@@ -0,0 +1,134 @@
1
+ """Per-command args-schema validation (spec §6.1).
2
+
3
+ Three role types in args_schema entries:
4
+ - "flags": list of allowed/forbidden short/long flags. Flags listed in
5
+ ``value_taking`` consume the next argv token as their value
6
+ (e.g. ``-n 5`` for ``head``); without ``value_taking`` the
7
+ value falls through to the next schema entry and gets matched
8
+ as a positional path.
9
+ - "positional": one positional arg with kind={path, pattern}
10
+ - "predicate_pairs": (used by find) -name VALUE / -mtime VALUE pairs
11
+
12
+ Plus an alternative top-level form `allowed_argv: list[list[str]]` for
13
+ commands whose entire invocation is enumerated explicitly (uname, ps, etc.).
14
+
15
+ When a schema entry has must_match_allowed_paths=True, the corresponding
16
+ token is checked through the PathMatcher.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import Any
22
+
23
+ from .path_matcher import PathMatcher
24
+
25
+
26
+ def validate_args(
27
+ command: str,
28
+ argv: list[str],
29
+ schema: dict[str, Any],
30
+ matcher: PathMatcher | None,
31
+ ) -> str | None:
32
+ """Return None if argv satisfies the schema; else return a reason string."""
33
+ if not schema:
34
+ return f"command {command!r} not in policy"
35
+
36
+ if "allowed_argv" in schema:
37
+ return _check_allowed_argv(command, argv, schema["allowed_argv"], matcher)
38
+
39
+ if "args_schema" in schema:
40
+ return _check_args_schema(command, argv, schema["args_schema"], matcher)
41
+
42
+ return f"policy entry for {command!r} has neither allowed_argv nor args_schema"
43
+
44
+
45
+ def _check_allowed_argv(
46
+ command: str,
47
+ argv: list[str],
48
+ allowed: list[list[str]],
49
+ matcher: PathMatcher | None,
50
+ ) -> str | None:
51
+ for template in allowed:
52
+ if len(template) != len(argv):
53
+ continue
54
+ ok = True
55
+ for tmpl_tok, real_tok in zip(template, argv):
56
+ if tmpl_tok == "<allowed_path>":
57
+ if matcher is None or matcher.matches(real_tok) is not None:
58
+ ok = False
59
+ break
60
+ elif tmpl_tok != real_tok:
61
+ ok = False
62
+ break
63
+ if ok:
64
+ return None
65
+ return f"argv {argv!r} does not match any allowed_argv for {command!r}"
66
+
67
+
68
+ def _check_args_schema(
69
+ command: str,
70
+ argv: list[str],
71
+ schema: list[dict[str, Any]],
72
+ matcher: PathMatcher | None,
73
+ ) -> str | None:
74
+ if not argv or argv[0] != command:
75
+ return f"argv[0]={argv[0]!r} does not match command {command!r}"
76
+ rest = argv[1:]
77
+ cursor = 0
78
+
79
+ for entry in schema:
80
+ role = entry.get("role")
81
+ if role == "flags":
82
+ allowed = set(entry.get("allowed", []))
83
+ forbidden = set(entry.get("forbidden", []))
84
+ value_taking = set(entry.get("value_taking", []))
85
+ while cursor < len(rest) and rest[cursor].startswith("-"):
86
+ tok = rest[cursor]
87
+ if tok in forbidden:
88
+ return f"flag {tok!r} is forbidden for {command!r}"
89
+ if tok not in allowed:
90
+ return f"flag {tok!r} is not in allowed list for {command!r}"
91
+ cursor += 1
92
+ if tok in value_taking:
93
+ # Consume the flag's value (e.g. "5" after "-n", "%y"
94
+ # after "-c"). Without this branch the value would
95
+ # fall through to the next schema entry and get
96
+ # matched as a positional path, producing a
97
+ # misleading "path '5' not in allowed_paths" reject.
98
+ if cursor >= len(rest):
99
+ return (
100
+ f"flag {tok!r} requires a value but argv ended for {command!r}"
101
+ )
102
+ cursor += 1
103
+ elif role == "positional":
104
+ if cursor >= len(rest):
105
+ if entry.get("required", False):
106
+ return f"missing required positional for {command!r}"
107
+ continue
108
+ tok = rest[cursor]
109
+ cursor += 1
110
+ if entry.get("kind") == "path" and entry.get("must_match_allowed_paths"):
111
+ if matcher is None:
112
+ return "matcher required for path positional"
113
+ reason = matcher.matches(tok)
114
+ if reason is not None:
115
+ return reason
116
+ elif role == "predicate_pairs":
117
+ allowed = set(entry.get("allowed_predicates", []))
118
+ forbidden = set(entry.get("forbidden_predicates", []))
119
+ while cursor < len(rest):
120
+ pred = rest[cursor]
121
+ if pred in forbidden:
122
+ return f"predicate {pred!r} is forbidden for {command!r}"
123
+ if pred not in allowed:
124
+ return f"predicate {pred!r} is not allowed for {command!r}"
125
+ cursor += 1
126
+ if cursor >= len(rest):
127
+ return f"predicate {pred!r} missing value"
128
+ cursor += 1 # consume the value
129
+ else:
130
+ return f"unknown args_schema role {role!r}"
131
+
132
+ if cursor != len(rest):
133
+ return f"unexpected trailing argv tokens: {rest[cursor:]!r}"
134
+ return None
@@ -0,0 +1,160 @@
1
+ """AUTHORIZATION.txt parser + validator (spec §6.4).
2
+
3
+ This file is the only thing standing between AgentSec and unauthorized
4
+ execution against a real OpenClaw deployment. Any change must keep the
5
+ canonicalization byte-for-byte consistent with the spec — a one-character
6
+ drift breaks every previously-signed AUTHORIZATION.txt in the wild.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import base64
12
+ import hashlib
13
+ import hmac as _hmac
14
+ import os
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Literal
18
+
19
+ import yaml
20
+ from pydantic import BaseModel, Field
21
+
22
+ SignatureMode = Literal["hmac_sha256", "none"]
23
+
24
+
25
+ class AuthorizationError(Exception):
26
+ """Raised when AUTHORIZATION.txt is malformed, missing fields, or fails validation."""
27
+
28
+
29
+ class Authorization(BaseModel):
30
+ target_host: str
31
+ authorized_by: str
32
+ identity_provider: str = ""
33
+ identity_assertion: str = ""
34
+ valid_from: datetime
35
+ valid_until: datetime
36
+ scope: list[str]
37
+ report_output_path_prefix: str
38
+ signature_mode: SignatureMode = "none"
39
+ signature: str | None = None
40
+ signature_key_env: str | None = None
41
+
42
+ low_assurance: bool = Field(default=False, exclude=True)
43
+
44
+ def compute_signature(self, key: bytes) -> str:
45
+ msg = canonical_message(self).encode("utf-8")
46
+ return base64.b64encode(_hmac.new(key, msg, hashlib.sha256).digest()).decode("ascii")
47
+
48
+ def validate(
49
+ self,
50
+ *,
51
+ target_host: str,
52
+ report_output_path_prefix: str,
53
+ required_scopes: list[str],
54
+ now: datetime | None = None,
55
+ ) -> None:
56
+ """Run the seven-step chain from spec §6.4. Raises AuthorizationError on
57
+ the first failing step. On success, sets self.low_assurance based on
58
+ signature_mode and identity_assertion presence.
59
+
60
+ Steps 1-2 (file readable, YAML parses) ran in `load()`; this method
61
+ covers steps 3-7.
62
+ """
63
+ if self.target_host != target_host:
64
+ raise AuthorizationError(
65
+ f"target_host mismatch: AUTHORIZATION.txt={self.target_host!r}, "
66
+ f"requested={target_host!r}"
67
+ )
68
+ now = now or datetime.now(timezone.utc)
69
+ if not (self.valid_from <= now <= self.valid_until):
70
+ raise AuthorizationError(
71
+ f"current time {now.isoformat()} outside valid window "
72
+ f"[{self.valid_from.isoformat()}, {self.valid_until.isoformat()}]"
73
+ )
74
+ missing = [s for s in required_scopes if s not in self.scope]
75
+ if missing:
76
+ raise AuthorizationError(f"scope missing required capabilities: {missing}")
77
+ # Path-equality compare so `./report-2026-04-27/` (the form documented
78
+ # in AUTHORIZATION.txt.example and CLAUDE.md) matches the prefix the
79
+ # CLI builds from a Path-typed --output flag, which normalizes away
80
+ # `./` and the trailing slash.
81
+ if Path(self.report_output_path_prefix) != Path(report_output_path_prefix):
82
+ raise AuthorizationError(
83
+ f"report_output_path_prefix mismatch: "
84
+ f"AUTHORIZATION.txt={self.report_output_path_prefix!r}, "
85
+ f"requested={report_output_path_prefix!r}"
86
+ )
87
+ if self.signature_mode == "hmac_sha256":
88
+ if not self.signature_key_env:
89
+ raise AuthorizationError(
90
+ "signature_mode=hmac_sha256 requires signature_key_env"
91
+ )
92
+ key_str = os.environ.get(self.signature_key_env)
93
+ if not key_str:
94
+ raise AuthorizationError(
95
+ f"env var {self.signature_key_env!r} for signing key is empty"
96
+ )
97
+ if not self.signature:
98
+ raise AuthorizationError(
99
+ "signature_mode=hmac_sha256 requires signature field"
100
+ )
101
+ expected = self.compute_signature(key_str.encode("utf-8"))
102
+ if not _hmac.compare_digest(expected, self.signature):
103
+ raise AuthorizationError("signature does not match (HMAC verification failed)")
104
+
105
+ self.low_assurance = (
106
+ self.signature_mode == "none" or not self.identity_assertion
107
+ )
108
+
109
+ @classmethod
110
+ def load(cls, path: Path | str) -> "Authorization":
111
+ p = Path(path)
112
+ try:
113
+ raw = p.read_text(encoding="utf-8")
114
+ except OSError as e:
115
+ raise AuthorizationError(f"cannot read {p}: {e}") from e
116
+ try:
117
+ data = yaml.safe_load(raw)
118
+ except yaml.YAMLError as e:
119
+ raise AuthorizationError(f"cannot parse {p} as YAML: {e}") from e
120
+ if not isinstance(data, dict):
121
+ raise AuthorizationError(f"{p}: top level must be a mapping")
122
+ try:
123
+ auth = cls.model_validate(data)
124
+ except Exception as e:
125
+ raise AuthorizationError(f"{p}: {e}") from e
126
+ # Pydantic v2 leaves datetimes without a tz suffix naive; comparing
127
+ # naive ↔ aware in validate() then raises a bare TypeError that the
128
+ # CLI doesn't catch, so a missing `Z` would surface as a Python
129
+ # stack trace instead of the friendly AuthorizationError every other
130
+ # rejection branch produces. Reject early with a clear message.
131
+ for field in ("valid_from", "valid_until"):
132
+ value = getattr(auth, field)
133
+ if value.tzinfo is None or value.tzinfo.utcoffset(value) is None:
134
+ raise AuthorizationError(
135
+ f"{p}: {field} must include a UTC offset "
136
+ f"(e.g. 2026-04-27T00:00:00Z), got naive datetime "
137
+ f"{value.isoformat()}"
138
+ )
139
+ return auth
140
+
141
+
142
+ def canonical_message(auth: "Authorization") -> str:
143
+ """Build the canonical-form string for HMAC signing (spec §6.4).
144
+
145
+ Field order is FIXED. Datetimes are serialized as ISO-8601 with explicit
146
+ timezone offset (the spec uses 'Z' but datetime.isoformat() emits
147
+ '+00:00'; both are equivalent and the spec's wording allows either as
148
+ long as the implementation is consistent — we standardize on '+00:00'
149
+ so all signatures verify against the same canonical bytes).
150
+ """
151
+ return "\n".join([
152
+ auth.target_host,
153
+ auth.authorized_by,
154
+ auth.identity_provider,
155
+ auth.identity_assertion,
156
+ auth.valid_from.isoformat(),
157
+ auth.valid_until.isoformat(),
158
+ ",".join(sorted(auth.scope)),
159
+ auth.report_output_path_prefix,
160
+ ])