agentcloak 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentcloak/__init__.py +7 -0
- agentcloak/__main__.py +5 -0
- agentcloak/adapters/__init__.py +16 -0
- agentcloak/adapters/analyzer.py +252 -0
- agentcloak/adapters/context.py +76 -0
- agentcloak/adapters/discovery.py +78 -0
- agentcloak/adapters/executor.py +66 -0
- agentcloak/adapters/generator.py +156 -0
- agentcloak/adapters/pipeline/__init__.py +6 -0
- agentcloak/adapters/pipeline/engine.py +40 -0
- agentcloak/adapters/pipeline/steps.py +182 -0
- agentcloak/adapters/pipeline/template.py +58 -0
- agentcloak/adapters/registry.py +132 -0
- agentcloak/adapters/sites/__init__.py +3 -0
- agentcloak/adapters/sites/_example.py +51 -0
- agentcloak/adapters/types.py +71 -0
- agentcloak/bridge/__init__.py +1 -0
- agentcloak/bridge/__main__.py +44 -0
- agentcloak/bridge/config.py +51 -0
- agentcloak/bridge/extension/background.js +447 -0
- agentcloak/bridge/extension/manifest.json +23 -0
- agentcloak/bridge/extension/options.html +153 -0
- agentcloak/bridge/extension/options.js +85 -0
- agentcloak/bridge/server.py +422 -0
- agentcloak/bridge/start_bridge.bat +12 -0
- agentcloak/browser/__init__.py +61 -0
- agentcloak/browser/_profile_writer.py +57 -0
- agentcloak/browser/cloak_ctx.py +117 -0
- agentcloak/browser/extensions/turnstile_patch/manifest.json +15 -0
- agentcloak/browser/extensions/turnstile_patch/script.js +22 -0
- agentcloak/browser/patchright_ctx.py +1189 -0
- agentcloak/browser/protocol.py +48 -0
- agentcloak/browser/remote_ctx.py +310 -0
- agentcloak/browser/secure_ctx.py +214 -0
- agentcloak/browser/state.py +103 -0
- agentcloak/browser/xvfb.py +124 -0
- agentcloak/cli/__init__.py +3 -0
- agentcloak/cli/app.py +178 -0
- agentcloak/cli/client.py +325 -0
- agentcloak/cli/commands/__init__.py +3 -0
- agentcloak/cli/commands/action.py +196 -0
- agentcloak/cli/commands/bridge_cmd.py +93 -0
- agentcloak/cli/commands/browser.py +109 -0
- agentcloak/cli/commands/capture_cmd.py +82 -0
- agentcloak/cli/commands/cdp.py +29 -0
- agentcloak/cli/commands/cookies_cmd.py +45 -0
- agentcloak/cli/commands/daemon_cmd.py +145 -0
- agentcloak/cli/commands/doctor.py +182 -0
- agentcloak/cli/commands/fetch.py +81 -0
- agentcloak/cli/commands/js.py +34 -0
- agentcloak/cli/commands/network.py +33 -0
- agentcloak/cli/commands/profile.py +229 -0
- agentcloak/cli/commands/site_cmd.py +207 -0
- agentcloak/cli/commands/tab.py +65 -0
- agentcloak/cli/output.py +38 -0
- agentcloak/core/__init__.py +3 -0
- agentcloak/core/capture.py +151 -0
- agentcloak/core/config.py +170 -0
- agentcloak/core/discovery.py +142 -0
- agentcloak/core/errors.py +60 -0
- agentcloak/core/har.py +89 -0
- agentcloak/core/resume.py +120 -0
- agentcloak/core/security.py +152 -0
- agentcloak/core/seq.py +68 -0
- agentcloak/core/types.py +27 -0
- agentcloak/daemon/__init__.py +3 -0
- agentcloak/daemon/__main__.py +37 -0
- agentcloak/daemon/middleware.py +64 -0
- agentcloak/daemon/routes.py +922 -0
- agentcloak/daemon/server.py +339 -0
- agentcloak/mcp/__init__.py +3 -0
- agentcloak/mcp/__main__.py +5 -0
- agentcloak/mcp/client.py +193 -0
- agentcloak/mcp/server.py +91 -0
- agentcloak/mcp/tools/__init__.py +3 -0
- agentcloak/mcp/tools/capture.py +86 -0
- agentcloak/mcp/tools/content.py +94 -0
- agentcloak/mcp/tools/interaction.py +62 -0
- agentcloak/mcp/tools/management.py +422 -0
- agentcloak/mcp/tools/navigation.py +89 -0
- agentcloak/mcp/tools/network.py +32 -0
- agentcloak-0.1.0.dist-info/METADATA +827 -0
- agentcloak-0.1.0.dist-info/RECORD +85 -0
- agentcloak-0.1.0.dist-info/WHEEL +4 -0
- agentcloak-0.1.0.dist-info/entry_points.txt +4 -0
agentcloak/__init__.py
ADDED
agentcloak/__main__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Site adapters — reusable site-specific automation."""
|
|
2
|
+
|
|
3
|
+
from agentcloak.adapters.context import AdapterContext
|
|
4
|
+
from agentcloak.adapters.executor import execute_adapter
|
|
5
|
+
from agentcloak.adapters.registry import adapter, get_registry
|
|
6
|
+
from agentcloak.adapters.types import AdapterEntry, AdapterMeta, Arg
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"AdapterContext",
|
|
10
|
+
"AdapterEntry",
|
|
11
|
+
"AdapterMeta",
|
|
12
|
+
"Arg",
|
|
13
|
+
"adapter",
|
|
14
|
+
"execute_adapter",
|
|
15
|
+
"get_registry",
|
|
16
|
+
]
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""API pattern recognition from captured network traffic."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
9
|
+
from urllib.parse import parse_qs, urlparse
|
|
10
|
+
|
|
11
|
+
from agentcloak.core.types import Strategy
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from agentcloak.core.capture import CaptureEntry
|
|
15
|
+
|
|
16
|
+
__all__ = ["EndpointPattern", "PatternAnalyzer"]
|
|
17
|
+
|
|
18
|
+
_UUID_RE = re.compile(
|
|
19
|
+
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.I
|
|
20
|
+
)
|
|
21
|
+
_LONG_ID_RE = re.compile(r"/\d{4,}\b")
|
|
22
|
+
_HEX_HASH_RE = re.compile(r"/[0-9a-f]{24,}", re.I)
|
|
23
|
+
_DATE_RE = re.compile(r"/\d{4}-\d{2}-\d{2}")
|
|
24
|
+
|
|
25
|
+
_AUTH_HEADERS = frozenset(
|
|
26
|
+
{
|
|
27
|
+
"authorization",
|
|
28
|
+
"x-csrf-token",
|
|
29
|
+
"x-api-key",
|
|
30
|
+
"api-key",
|
|
31
|
+
"x-auth-token",
|
|
32
|
+
"x-session-id",
|
|
33
|
+
}
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
_SKIP_DOMAINS = frozenset(
|
|
37
|
+
{
|
|
38
|
+
"fonts.googleapis.com",
|
|
39
|
+
"cdn.jsdelivr.net",
|
|
40
|
+
"www.google-analytics.com",
|
|
41
|
+
"www.googletagmanager.com",
|
|
42
|
+
"connect.facebook.net",
|
|
43
|
+
}
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
_PAGINATION_PARAMS = frozenset(
|
|
47
|
+
{"page", "offset", "limit", "cursor", "after", "before", "per_page", "pagesize"}
|
|
48
|
+
)
|
|
49
|
+
_FILTER_PARAMS = frozenset(
|
|
50
|
+
{"filter", "q", "query", "search", "status", "type", "category", "keyword"}
|
|
51
|
+
)
|
|
52
|
+
_SORT_PARAMS = frozenset({"sort", "order", "sort_by", "order_by"})
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class EndpointPattern:
|
|
57
|
+
"""Recognized API endpoint pattern."""
|
|
58
|
+
|
|
59
|
+
method: str
|
|
60
|
+
path: str
|
|
61
|
+
domain: str
|
|
62
|
+
call_count: int = 0
|
|
63
|
+
query_params: list[str] = field(default_factory=list[str])
|
|
64
|
+
status_codes: dict[int, int] = field(default_factory=dict[int, int])
|
|
65
|
+
auth_headers: list[str] = field(default_factory=list[str])
|
|
66
|
+
content_type: str = ""
|
|
67
|
+
category: str = "read"
|
|
68
|
+
request_schema: dict[str, Any] | None = None
|
|
69
|
+
response_schema: dict[str, Any] | None = None
|
|
70
|
+
strategy: Strategy = Strategy.PUBLIC
|
|
71
|
+
example_urls: list[str] = field(default_factory=list[str])
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _normalize_path(path: str) -> str:
|
|
75
|
+
result = _UUID_RE.sub(":uuid", path)
|
|
76
|
+
result = _DATE_RE.sub("/:date", result)
|
|
77
|
+
result = _HEX_HASH_RE.sub("/:hash", result)
|
|
78
|
+
result = _LONG_ID_RE.sub("/:id", result)
|
|
79
|
+
return result
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _extract_schema(obj: Any, max_depth: int = 2) -> Any:
|
|
83
|
+
if max_depth <= 0:
|
|
84
|
+
return type(obj).__name__
|
|
85
|
+
if isinstance(obj, dict):
|
|
86
|
+
return {
|
|
87
|
+
str(k): _extract_schema(v, max_depth - 1)
|
|
88
|
+
for k, v in cast("dict[str, Any]", obj).items()
|
|
89
|
+
}
|
|
90
|
+
if isinstance(obj, list):
|
|
91
|
+
if obj:
|
|
92
|
+
return [_extract_schema(obj[0], max_depth)]
|
|
93
|
+
return []
|
|
94
|
+
if isinstance(obj, bool):
|
|
95
|
+
return "bool"
|
|
96
|
+
if isinstance(obj, int):
|
|
97
|
+
return "int"
|
|
98
|
+
if isinstance(obj, float):
|
|
99
|
+
return "float"
|
|
100
|
+
if isinstance(obj, str):
|
|
101
|
+
return "str"
|
|
102
|
+
if obj is None:
|
|
103
|
+
return "null"
|
|
104
|
+
return type(obj).__name__
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _infer_category(method: str, path: str) -> str:
|
|
108
|
+
p = path.lower()
|
|
109
|
+
if any(kw in p for kw in ("auth", "login", "token", "oauth", "signin", "signup")):
|
|
110
|
+
return "auth"
|
|
111
|
+
if any(kw in p for kw in ("search", "query", "find")):
|
|
112
|
+
return "search"
|
|
113
|
+
telemetry = ("log", "track", "event", "beacon", "metric", "telemetry")
|
|
114
|
+
if any(kw in p for kw in telemetry):
|
|
115
|
+
return "telemetry"
|
|
116
|
+
if method in ("GET", "HEAD", "OPTIONS"):
|
|
117
|
+
return "read"
|
|
118
|
+
return "write"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _infer_strategy(auth_headers: list[str]) -> Strategy:
|
|
122
|
+
if not auth_headers:
|
|
123
|
+
return Strategy.PUBLIC
|
|
124
|
+
lower = {h.lower() for h in auth_headers}
|
|
125
|
+
if "authorization" in lower:
|
|
126
|
+
return Strategy.HEADER
|
|
127
|
+
return Strategy.COOKIE
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class PatternAnalyzer:
|
|
131
|
+
"""Analyze captured traffic to extract API endpoint patterns."""
|
|
132
|
+
|
|
133
|
+
def __init__(self, entries: list[CaptureEntry]) -> None:
|
|
134
|
+
self._entries = entries
|
|
135
|
+
|
|
136
|
+
def analyze(self) -> list[EndpointPattern]:
|
|
137
|
+
groups: dict[str, list[CaptureEntry]] = {}
|
|
138
|
+
for entry in self._entries:
|
|
139
|
+
parsed = urlparse(entry.url)
|
|
140
|
+
if parsed.hostname and parsed.hostname in _SKIP_DOMAINS:
|
|
141
|
+
continue
|
|
142
|
+
ct = entry.content_type.split(";", 1)[0].strip().lower()
|
|
143
|
+
if ct not in ("application/json", "text/json"):
|
|
144
|
+
continue
|
|
145
|
+
if entry.status == 0:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
normalized = _normalize_path(parsed.path)
|
|
149
|
+
key = f"{entry.method} {parsed.hostname} {normalized}"
|
|
150
|
+
groups.setdefault(key, []).append(entry)
|
|
151
|
+
|
|
152
|
+
patterns: list[EndpointPattern] = []
|
|
153
|
+
for key, group_entries in groups.items():
|
|
154
|
+
parts = key.split(" ", 2)
|
|
155
|
+
method = parts[0]
|
|
156
|
+
domain = parts[1]
|
|
157
|
+
path = parts[2]
|
|
158
|
+
pattern = self._build_pattern(method, domain, path, group_entries)
|
|
159
|
+
patterns.append(pattern)
|
|
160
|
+
|
|
161
|
+
patterns.sort(key=lambda p: p.call_count, reverse=True)
|
|
162
|
+
return patterns
|
|
163
|
+
|
|
164
|
+
def _build_pattern(
|
|
165
|
+
self,
|
|
166
|
+
method: str,
|
|
167
|
+
domain: str,
|
|
168
|
+
path: str,
|
|
169
|
+
entries: list[CaptureEntry],
|
|
170
|
+
) -> EndpointPattern:
|
|
171
|
+
status_codes: dict[int, int] = {}
|
|
172
|
+
all_query_params: set[str] = set()
|
|
173
|
+
auth_headers_seen: set[str] = set()
|
|
174
|
+
content_types: list[str] = []
|
|
175
|
+
example_urls: list[str] = []
|
|
176
|
+
req_schemas: list[dict[str, Any]] = []
|
|
177
|
+
resp_schemas: list[dict[str, Any]] = []
|
|
178
|
+
|
|
179
|
+
for entry in entries:
|
|
180
|
+
status_codes[entry.status] = status_codes.get(entry.status, 0) + 1
|
|
181
|
+
|
|
182
|
+
parsed = urlparse(entry.url)
|
|
183
|
+
if parsed.query:
|
|
184
|
+
for part in parsed.query.split("&"):
|
|
185
|
+
name = part.split("=", 1)[0]
|
|
186
|
+
if name:
|
|
187
|
+
all_query_params.add(name)
|
|
188
|
+
|
|
189
|
+
for header_name in entry.request_headers:
|
|
190
|
+
h = str(header_name)
|
|
191
|
+
if h.lower() in _AUTH_HEADERS:
|
|
192
|
+
auth_headers_seen.add(h)
|
|
193
|
+
|
|
194
|
+
ct = entry.content_type.split(";", 1)[0].strip()
|
|
195
|
+
if ct:
|
|
196
|
+
content_types.append(ct)
|
|
197
|
+
|
|
198
|
+
if len(example_urls) < 3:
|
|
199
|
+
example_urls.append(entry.url)
|
|
200
|
+
|
|
201
|
+
if entry.request_body:
|
|
202
|
+
try:
|
|
203
|
+
req_ct = next(
|
|
204
|
+
(v for k, v in entry.request_headers.items() if k.lower() == "content-type"),
|
|
205
|
+
"",
|
|
206
|
+
).split(";", 1)[0].strip().lower()
|
|
207
|
+
if req_ct == "application/x-www-form-urlencoded":
|
|
208
|
+
parsed_qs = parse_qs(entry.request_body, keep_blank_values=True)
|
|
209
|
+
body_obj: Any = {
|
|
210
|
+
k: v[0] if len(v) == 1 else v for k, v in parsed_qs.items()
|
|
211
|
+
}
|
|
212
|
+
else:
|
|
213
|
+
body_obj = json.loads(entry.request_body)
|
|
214
|
+
schema: Any = _extract_schema(body_obj)
|
|
215
|
+
if isinstance(schema, dict):
|
|
216
|
+
req_schemas.append(cast("dict[str, Any]", schema))
|
|
217
|
+
except (json.JSONDecodeError, ValueError):
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
if entry.response_body:
|
|
221
|
+
try:
|
|
222
|
+
resp_obj: Any = json.loads(entry.response_body)
|
|
223
|
+
resp_schema_val: Any = _extract_schema(resp_obj)
|
|
224
|
+
if isinstance(resp_schema_val, dict):
|
|
225
|
+
resp_schemas.append(cast("dict[str, Any]", resp_schema_val))
|
|
226
|
+
except (json.JSONDecodeError, ValueError):
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
auth_list = sorted(auth_headers_seen)
|
|
230
|
+
strategy = _infer_strategy(auth_list)
|
|
231
|
+
category = _infer_category(method, path)
|
|
232
|
+
|
|
233
|
+
req_schema = req_schemas[0] if req_schemas else None
|
|
234
|
+
resp_schema = resp_schemas[0] if resp_schemas else None
|
|
235
|
+
|
|
236
|
+
primary_ct = content_types[0] if content_types else ""
|
|
237
|
+
|
|
238
|
+
return EndpointPattern(
|
|
239
|
+
method=method,
|
|
240
|
+
path=path,
|
|
241
|
+
domain=domain,
|
|
242
|
+
call_count=len(entries),
|
|
243
|
+
query_params=sorted(all_query_params),
|
|
244
|
+
status_codes=status_codes,
|
|
245
|
+
auth_headers=auth_list,
|
|
246
|
+
content_type=primary_ct,
|
|
247
|
+
category=category,
|
|
248
|
+
request_schema=req_schema,
|
|
249
|
+
response_schema=resp_schema,
|
|
250
|
+
strategy=strategy,
|
|
251
|
+
example_urls=example_urls,
|
|
252
|
+
)
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""AdapterContext — unified runtime interface for adapter handlers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from agentcloak.adapters.types import AdapterMeta
|
|
9
|
+
from agentcloak.browser.protocol import (
|
|
10
|
+
ActionResult,
|
|
11
|
+
BrowserContext,
|
|
12
|
+
NetworkRequest,
|
|
13
|
+
)
|
|
14
|
+
from agentcloak.browser.state import PageSnapshot
|
|
15
|
+
|
|
16
|
+
__all__ = ["AdapterContext"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AdapterContext:
|
|
20
|
+
"""Wraps BrowserContext + parsed args for adapter execution."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
*,
|
|
25
|
+
meta: AdapterMeta,
|
|
26
|
+
args: dict[str, Any],
|
|
27
|
+
browser: BrowserContext | None = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._meta = meta
|
|
30
|
+
self._args = args
|
|
31
|
+
self._browser = browser
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def meta(self) -> AdapterMeta:
|
|
35
|
+
return self._meta
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def args(self) -> dict[str, Any]:
|
|
39
|
+
return self._args
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def browser(self) -> BrowserContext:
|
|
43
|
+
if self._browser is None:
|
|
44
|
+
msg = "browser context not available for this adapter"
|
|
45
|
+
raise RuntimeError(msg)
|
|
46
|
+
return self._browser
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def has_browser(self) -> bool:
|
|
50
|
+
return self._browser is not None
|
|
51
|
+
|
|
52
|
+
# -- Convenience proxies to BrowserContext --
|
|
53
|
+
|
|
54
|
+
async def navigate(self, url: str, *, timeout: float = 30.0) -> ActionResult:
|
|
55
|
+
return await self.browser.navigate(url, timeout=timeout)
|
|
56
|
+
|
|
57
|
+
async def snapshot(self, *, mode: str = "accessible") -> PageSnapshot:
|
|
58
|
+
return await self.browser.snapshot(mode=mode)
|
|
59
|
+
|
|
60
|
+
async def action(self, kind: str, target: str, **kw: Any) -> ActionResult:
|
|
61
|
+
return await self.browser.action(kind, target, **kw)
|
|
62
|
+
|
|
63
|
+
async def evaluate(self, js: str) -> Any:
|
|
64
|
+
return await self.browser.evaluate(js)
|
|
65
|
+
|
|
66
|
+
async def network(
|
|
67
|
+
self, *, since: int | str = "last_action"
|
|
68
|
+
) -> list[NetworkRequest]:
|
|
69
|
+
return await self.browser.network(since=since)
|
|
70
|
+
|
|
71
|
+
async def screenshot(self, *, full_page: bool = False) -> bytes:
|
|
72
|
+
return await self.browser.screenshot(full_page=full_page)
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def seq(self) -> int:
|
|
76
|
+
return self.browser.seq
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Adapter discovery — scan built-in and user directories for adapters."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
import importlib.util
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from types import ModuleType
|
|
13
|
+
|
|
14
|
+
import structlog
|
|
15
|
+
|
|
16
|
+
from agentcloak.adapters.registry import get_registry
|
|
17
|
+
|
|
18
|
+
__all__ = ["discover_adapters"]
|
|
19
|
+
|
|
20
|
+
log = structlog.get_logger()
|
|
21
|
+
|
|
22
|
+
_USER_ADAPTER_DIR = Path.home() / ".config" / "agentcloak" / "adapters"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _import_module_from_path(name: str, path: Path) -> ModuleType | None:
|
|
26
|
+
"""Import a Python module from a file path."""
|
|
27
|
+
spec = importlib.util.spec_from_file_location(name, path)
|
|
28
|
+
if spec is None or spec.loader is None:
|
|
29
|
+
return None
|
|
30
|
+
module = importlib.util.module_from_spec(spec)
|
|
31
|
+
sys.modules[name] = module
|
|
32
|
+
try:
|
|
33
|
+
spec.loader.exec_module(module) # type: ignore[union-attr]
|
|
34
|
+
except Exception:
|
|
35
|
+
log.warning("adapter.import_failed", path=str(path))
|
|
36
|
+
sys.modules.pop(name, None)
|
|
37
|
+
return None
|
|
38
|
+
return module
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _discover_builtin() -> int:
|
|
42
|
+
"""Import built-in adapters from adapters/sites/."""
|
|
43
|
+
count_before = len(get_registry())
|
|
44
|
+
builtin_module = "agentcloak.adapters.sites._example"
|
|
45
|
+
try:
|
|
46
|
+
if builtin_module in sys.modules:
|
|
47
|
+
importlib.reload(sys.modules[builtin_module])
|
|
48
|
+
else:
|
|
49
|
+
importlib.import_module(builtin_module)
|
|
50
|
+
except Exception:
|
|
51
|
+
log.warning("adapter.builtin_import_failed")
|
|
52
|
+
return len(get_registry()) - count_before
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _discover_user() -> int:
|
|
56
|
+
"""Scan user adapter directory for .py files."""
|
|
57
|
+
count_before = len(get_registry())
|
|
58
|
+
if not _USER_ADAPTER_DIR.is_dir():
|
|
59
|
+
return 0
|
|
60
|
+
for py_file in sorted(_USER_ADAPTER_DIR.glob("*.py")):
|
|
61
|
+
if py_file.name.startswith("_"):
|
|
62
|
+
continue
|
|
63
|
+
module_name = f"agentcloak_user_adapters.{py_file.stem}"
|
|
64
|
+
_import_module_from_path(module_name, py_file)
|
|
65
|
+
return len(get_registry()) - count_before
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def discover_adapters() -> dict[str, int]:
|
|
69
|
+
"""Run full adapter discovery. Returns counts by source."""
|
|
70
|
+
builtin = _discover_builtin()
|
|
71
|
+
user = _discover_user()
|
|
72
|
+
log.debug(
|
|
73
|
+
"adapter.discovery_complete",
|
|
74
|
+
builtin=builtin,
|
|
75
|
+
user=user,
|
|
76
|
+
total=len(get_registry()),
|
|
77
|
+
)
|
|
78
|
+
return {"builtin": builtin, "user": user, "total": len(get_registry())}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Adapter execution — dispatches to function handler or pipeline engine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import TYPE_CHECKING, Any
|
|
6
|
+
|
|
7
|
+
import structlog
|
|
8
|
+
|
|
9
|
+
from agentcloak.adapters.context import AdapterContext
|
|
10
|
+
from agentcloak.adapters.pipeline.engine import execute_pipeline
|
|
11
|
+
from agentcloak.core.errors import AgentBrowserError
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from agentcloak.adapters.types import AdapterEntry
|
|
15
|
+
|
|
16
|
+
__all__ = ["execute_adapter"]
|
|
17
|
+
|
|
18
|
+
log = structlog.get_logger()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
async def execute_adapter(
|
|
22
|
+
entry: AdapterEntry,
|
|
23
|
+
*,
|
|
24
|
+
args: dict[str, Any],
|
|
25
|
+
browser: Any | None = None,
|
|
26
|
+
) -> list[dict[str, Any]]:
|
|
27
|
+
"""Run an adapter and return its result rows."""
|
|
28
|
+
meta = entry.meta
|
|
29
|
+
log.info("adapter.execute", adapter=meta.full_name, strategy=meta.strategy)
|
|
30
|
+
|
|
31
|
+
for arg_def in meta.args:
|
|
32
|
+
if arg_def.name not in args and arg_def.default is not None:
|
|
33
|
+
args[arg_def.name] = arg_def.default
|
|
34
|
+
|
|
35
|
+
if meta.needs_browser and browser is None:
|
|
36
|
+
raise AgentBrowserError(
|
|
37
|
+
error="adapter_no_browser",
|
|
38
|
+
hint=f"Adapter '{meta.full_name}' requires a browser context "
|
|
39
|
+
f"(strategy={meta.strategy}).",
|
|
40
|
+
action="start a browser session first",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
if meta.navigate_before and browser is not None:
|
|
44
|
+
log.debug("adapter.pre_navigate", url=meta.navigate_before)
|
|
45
|
+
await browser.navigate(meta.navigate_before)
|
|
46
|
+
|
|
47
|
+
if entry.is_pipeline:
|
|
48
|
+
if meta.pipeline is None:
|
|
49
|
+
raise RuntimeError(
|
|
50
|
+
f"Adapter '{meta.full_name}' marked as pipeline"
|
|
51
|
+
" but has no pipeline definition"
|
|
52
|
+
)
|
|
53
|
+
raw = await execute_pipeline(meta.pipeline, args=args, browser=browser)
|
|
54
|
+
if isinstance(raw, list):
|
|
55
|
+
return raw # type: ignore[return-value]
|
|
56
|
+
return [raw] if raw is not None else []
|
|
57
|
+
|
|
58
|
+
if entry.handler is None:
|
|
59
|
+
raise AgentBrowserError(
|
|
60
|
+
error="adapter_no_handler",
|
|
61
|
+
hint=f"Adapter '{meta.full_name}' has neither pipeline nor handler.",
|
|
62
|
+
action="add a handler function or pipeline definition",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
ctx = AdapterContext(meta=meta, args=args, browser=browser)
|
|
66
|
+
return await entry.handler(ctx)
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Adapter code generation from API endpoint patterns."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from agentcloak.core.types import Strategy
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from agentcloak.adapters.analyzer import EndpointPattern
|
|
12
|
+
|
|
13
|
+
__all__ = ["generate_adapter", "generate_adapters"]
|
|
14
|
+
|
|
15
|
+
_PARAM_RE = re.compile(r":(\w+)")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _path_params(path: str) -> list[str]:
|
|
19
|
+
return _PARAM_RE.findall(path)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _slugify(text: str) -> str:
|
|
23
|
+
slug = re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
|
|
24
|
+
return slug or "unnamed"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _derive_name(pattern: EndpointPattern) -> str:
|
|
28
|
+
parts = [
|
|
29
|
+
p for p in pattern.path.strip("/").split("/")
|
|
30
|
+
if not p.startswith(":")
|
|
31
|
+
and p not in ("api", "v1", "v2", "v3", "v4")
|
|
32
|
+
]
|
|
33
|
+
if parts:
|
|
34
|
+
return _slugify("_".join(parts[-2:]))
|
|
35
|
+
return _slugify(pattern.path)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _build_args_code(pattern: EndpointPattern) -> str:
|
|
39
|
+
lines: list[str] = []
|
|
40
|
+
for param in _path_params(pattern.path):
|
|
41
|
+
help_text = f"{param.title()} parameter"
|
|
42
|
+
lines.append(
|
|
43
|
+
f' Arg("{param}", type=str,'
|
|
44
|
+
f' required=True, help="{help_text}"),'
|
|
45
|
+
)
|
|
46
|
+
for qp in pattern.query_params:
|
|
47
|
+
lines.append(
|
|
48
|
+
f' Arg("{qp}", default=None, help="{qp} query parameter"),'
|
|
49
|
+
)
|
|
50
|
+
if not lines:
|
|
51
|
+
return ""
|
|
52
|
+
inner = "\n".join(lines)
|
|
53
|
+
return f" args=[\n{inner}\n ],\n"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _build_pipeline_code(pattern: EndpointPattern) -> str:
|
|
57
|
+
path = pattern.path
|
|
58
|
+
for param in _path_params(pattern.path):
|
|
59
|
+
path = path.replace(f":{param}", f"{{args.{param}}}")
|
|
60
|
+
|
|
61
|
+
url = f"https://{pattern.domain}{path}"
|
|
62
|
+
|
|
63
|
+
qp_parts: list[str] = []
|
|
64
|
+
for qp in pattern.query_params:
|
|
65
|
+
qp_parts.append(f"{qp}={{args.{qp}}}")
|
|
66
|
+
if qp_parts:
|
|
67
|
+
url += "?" + "&".join(qp_parts)
|
|
68
|
+
|
|
69
|
+
steps: list[str] = []
|
|
70
|
+
|
|
71
|
+
if pattern.strategy in (Strategy.COOKIE, Strategy.HEADER):
|
|
72
|
+
steps.append(f' {{"navigate": "https://{pattern.domain}"}},')
|
|
73
|
+
|
|
74
|
+
if pattern.method == "GET":
|
|
75
|
+
steps.append(
|
|
76
|
+
f' {{"evaluate": "fetch(\'{url}\', {{credentials: \'include\'}})'
|
|
77
|
+
f".then(r => r.json())\"}},"
|
|
78
|
+
)
|
|
79
|
+
else:
|
|
80
|
+
body_hint = "{}"
|
|
81
|
+
if pattern.request_schema:
|
|
82
|
+
keys = list(pattern.request_schema.keys())[:5]
|
|
83
|
+
pairs = ", ".join(f'\\"{k}\\": {{args.{k}}}' for k in keys)
|
|
84
|
+
body_hint = f"{{{pairs}}}"
|
|
85
|
+
steps.append(
|
|
86
|
+
f' {{"evaluate": "fetch(\'{url}\', '
|
|
87
|
+
f"{{method: '{pattern.method}', "
|
|
88
|
+
f"credentials: 'include', "
|
|
89
|
+
f"headers: {{'Content-Type': 'application/json'}}, "
|
|
90
|
+
f"body: JSON.stringify({body_hint})"
|
|
91
|
+
f'}}).then(r => r.json())"}},',
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return "\n".join(steps)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def generate_adapter(
|
|
98
|
+
site: str,
|
|
99
|
+
pattern: EndpointPattern,
|
|
100
|
+
*,
|
|
101
|
+
name: str | None = None,
|
|
102
|
+
) -> str:
|
|
103
|
+
"""Generate Python adapter source code from an EndpointPattern."""
|
|
104
|
+
adapter_name = name or _derive_name(pattern)
|
|
105
|
+
func_name = f"{_slugify(site)}_{_slugify(adapter_name)}"
|
|
106
|
+
access = "read" if pattern.method in ("GET", "HEAD", "OPTIONS") else "write"
|
|
107
|
+
|
|
108
|
+
args_code = _build_args_code(pattern)
|
|
109
|
+
pipeline_code = _build_pipeline_code(pattern)
|
|
110
|
+
|
|
111
|
+
lines: list[str] = []
|
|
112
|
+
lines.append("@adapter(")
|
|
113
|
+
lines.append(f' site="{site}",')
|
|
114
|
+
lines.append(f' name="{adapter_name}",')
|
|
115
|
+
lines.append(f" strategy=Strategy.{pattern.strategy.name},")
|
|
116
|
+
if pattern.domain:
|
|
117
|
+
lines.append(f' domain="{pattern.domain}",')
|
|
118
|
+
lines.append(f' description="{pattern.method} {pattern.path}",')
|
|
119
|
+
lines.append(f' access="{access}",')
|
|
120
|
+
if args_code:
|
|
121
|
+
lines.append(args_code.rstrip())
|
|
122
|
+
lines.append(" pipeline=[")
|
|
123
|
+
lines.append(pipeline_code)
|
|
124
|
+
lines.append(" ],")
|
|
125
|
+
lines.append(")")
|
|
126
|
+
lines.append(f"def {func_name}() -> None:")
|
|
127
|
+
lines.append(' """Generated adapter — review before use."""')
|
|
128
|
+
lines.append("")
|
|
129
|
+
|
|
130
|
+
return "\n".join(lines)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def generate_adapters(
|
|
134
|
+
site: str,
|
|
135
|
+
patterns: list[EndpointPattern],
|
|
136
|
+
) -> str:
|
|
137
|
+
"""Generate a complete adapter module from multiple patterns."""
|
|
138
|
+
header_lines = [
|
|
139
|
+
f'"""Auto-generated adapters for {site} — review before use."""',
|
|
140
|
+
"",
|
|
141
|
+
"from __future__ import annotations",
|
|
142
|
+
"",
|
|
143
|
+
"from agentcloak.adapters.registry import adapter",
|
|
144
|
+
"from agentcloak.adapters.types import Arg",
|
|
145
|
+
"from agentcloak.core.types import Strategy",
|
|
146
|
+
"",
|
|
147
|
+
]
|
|
148
|
+
header = "\n".join(header_lines) + "\n"
|
|
149
|
+
|
|
150
|
+
body_parts: list[str] = []
|
|
151
|
+
for pattern in patterns:
|
|
152
|
+
if pattern.category == "telemetry":
|
|
153
|
+
continue
|
|
154
|
+
body_parts.append(generate_adapter(site, pattern))
|
|
155
|
+
|
|
156
|
+
return header + "\n\n".join(body_parts)
|