openhack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhack/__init__.py +2 -0
- openhack/__main__.py +225 -0
- openhack/agents/__init__.py +30 -0
- openhack/agents/base.py +230 -0
- openhack/agents/browser_verifier.py +679 -0
- openhack/agents/browser_verifier_swarm.py +256 -0
- openhack/agents/checkpoint.py +89 -0
- openhack/agents/context_manager.py +356 -0
- openhack/agents/coordinator.py +1105 -0
- openhack/agents/endpoint_analyst.py +307 -0
- openhack/agents/feature_hunter.py +93 -0
- openhack/agents/hunter.py +481 -0
- openhack/agents/hunter_swarm.py +385 -0
- openhack/agents/llm.py +334 -0
- openhack/agents/recon.py +19 -0
- openhack/agents/sandbox_verifier.py +396 -0
- openhack/agents/sandbox_verifier_swarm.py +250 -0
- openhack/agents/session.py +286 -0
- openhack/agents/validator.py +217 -0
- openhack/agents/validator_swarm.py +106 -0
- openhack/auth.py +175 -0
- openhack/browser/__init__.py +12 -0
- openhack/browser/runner.py +385 -0
- openhack/categories.py +130 -0
- openhack/config.py +201 -0
- openhack/deterministic_recon.py +464 -0
- openhack/entry_points.py +745 -0
- openhack/framework_classifier.py +515 -0
- openhack/framework_detection.py +269 -0
- openhack/headless_scan.py +179 -0
- openhack/prompts/__init__.py +108 -0
- openhack/prompts/browser_verifier.py +171 -0
- openhack/prompts/coordinator.py +31 -0
- openhack/prompts/django/__init__.py +32 -0
- openhack/prompts/django/auth_bypass.py +76 -0
- openhack/prompts/django/csrf.py +62 -0
- openhack/prompts/django/data_exposure.py +67 -0
- openhack/prompts/django/idor.py +74 -0
- openhack/prompts/django/injection.py +67 -0
- openhack/prompts/django/misconfiguration.py +70 -0
- openhack/prompts/django/ssrf.py +64 -0
- openhack/prompts/endpoint_analyst.py +122 -0
- openhack/prompts/express/__init__.py +29 -0
- openhack/prompts/express/auth_bypass.py +71 -0
- openhack/prompts/express/data_exposure.py +77 -0
- openhack/prompts/express/idor.py +69 -0
- openhack/prompts/express/injection.py +75 -0
- openhack/prompts/express/misconfiguration.py +72 -0
- openhack/prompts/express/ssrf.py +63 -0
- openhack/prompts/feature_hunter.py +140 -0
- openhack/prompts/flask/__init__.py +29 -0
- openhack/prompts/flask/auth_bypass.py +86 -0
- openhack/prompts/flask/data_exposure.py +78 -0
- openhack/prompts/flask/idor.py +83 -0
- openhack/prompts/flask/injection.py +77 -0
- openhack/prompts/flask/misconfiguration.py +73 -0
- openhack/prompts/flask/ssrf.py +65 -0
- openhack/prompts/hunter.py +362 -0
- openhack/prompts/hunter_continuation_loop.py +12 -0
- openhack/prompts/hunter_continuation_no_findings.py +19 -0
- openhack/prompts/hunter_continuation_no_progress.py +22 -0
- openhack/prompts/hunter_tool_instructions.py +55 -0
- openhack/prompts/nextjs/__init__.py +42 -0
- openhack/prompts/nextjs/auth_bypass.py +80 -0
- openhack/prompts/nextjs/csrf.py +71 -0
- openhack/prompts/nextjs/data_exposure.py +88 -0
- openhack/prompts/nextjs/idor.py +64 -0
- openhack/prompts/nextjs/injection.py +65 -0
- openhack/prompts/nextjs/middleware_bypass.py +75 -0
- openhack/prompts/nextjs/misconfiguration.py +92 -0
- openhack/prompts/nextjs/server_actions.py +97 -0
- openhack/prompts/nextjs/ssrf.py +66 -0
- openhack/prompts/nextjs/xss.py +69 -0
- openhack/prompts/pr_analysis_system.py +80 -0
- openhack/prompts/pr_analysis_user.py +11 -0
- openhack/prompts/project_context.py +89 -0
- openhack/prompts/recon.py +199 -0
- openhack/prompts/reporter.py +88 -0
- openhack/prompts/researchers.py +434 -0
- openhack/prompts/sandbox_verifier.py +128 -0
- openhack/prompts/supabase/__init__.py +39 -0
- openhack/prompts/supabase/auth_tokens.py +131 -0
- openhack/prompts/supabase/edge_functions.py +150 -0
- openhack/prompts/supabase/graphql.py +102 -0
- openhack/prompts/supabase/postgrest.py +99 -0
- openhack/prompts/supabase/realtime.py +93 -0
- openhack/prompts/supabase/rls.py +110 -0
- openhack/prompts/supabase/rpc_functions.py +127 -0
- openhack/prompts/supabase/storage.py +110 -0
- openhack/prompts/supabase/tenant_isolation.py +118 -0
- openhack/prompts/validator.py +319 -0
- openhack/prompts/validator_continuation_incomplete.py +12 -0
- openhack/prompts/validator_tool_instructions.py +29 -0
- openhack/quality.py +231 -0
- openhack/sandbox/__init__.py +12 -0
- openhack/sandbox/orchestrator.py +517 -0
- openhack/sandbox/runner.py +177 -0
- openhack/scan_session.py +245 -0
- openhack/setup.py +452 -0
- openhack/static_validator.py +612 -0
- openhack/tools/__init__.py +1 -0
- openhack/tools/ast_tools.py +307 -0
- openhack/tools/coverage.py +1078 -0
- openhack/tools/filesystem.py +404 -0
- openhack/tools/nextjs.py +258 -0
- openhack/tools/registry.py +52 -0
- openhack/tui.py +3450 -0
- openhack/updates.py +170 -0
- openhack-0.1.0.dist-info/METADATA +189 -0
- openhack-0.1.0.dist-info/RECORD +113 -0
- openhack-0.1.0.dist-info/WHEEL +4 -0
- openhack-0.1.0.dist-info/entry_points.txt +2 -0
- openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,679 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Browser verifier agent.
|
|
3
|
+
|
|
4
|
+
Takes a confirmed finding and drives a real Chromium browser to
|
|
5
|
+
verify the exploit, capturing screenshot evidence along the way.
|
|
6
|
+
Handles login flows, CSRF tokens, and multi-step UI interactions.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Optional
|
|
12
|
+
|
|
13
|
+
from .base import BaseAgent
|
|
14
|
+
from .llm import Message, ToolResult
|
|
15
|
+
from ..browser.runner import BrowserRunner, BrowserContext
|
|
16
|
+
from openhack.prompts import format_project_context
|
|
17
|
+
from openhack.prompts.browser_verifier import (
|
|
18
|
+
BROWSER_VERIFIER_PROMPT,
|
|
19
|
+
BROWSER_VERIFIER_TOOL_INSTRUCTIONS,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ── Tool definitions for the browser verifier ────────────────────
|
|
26
|
+
|
|
27
|
+
BROWSER_NAVIGATE_TOOL = {
|
|
28
|
+
"name": "browser_navigate",
|
|
29
|
+
"description": (
|
|
30
|
+
"Navigate the browser to a URL. Returns the page title, URL, "
|
|
31
|
+
"and text content after navigation completes."
|
|
32
|
+
),
|
|
33
|
+
"parameters": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"properties": {
|
|
36
|
+
"url": {
|
|
37
|
+
"type": "string",
|
|
38
|
+
"description": "URL or path to navigate to (e.g., /login, /api/users). Paths are prefixed with the sandbox base URL.",
|
|
39
|
+
},
|
|
40
|
+
"wait_until": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"enum": ["load", "domcontentloaded", "networkidle"],
|
|
43
|
+
"description": "When to consider navigation complete (default: networkidle)",
|
|
44
|
+
"default": "networkidle",
|
|
45
|
+
},
|
|
46
|
+
},
|
|
47
|
+
"required": ["url"],
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
BROWSER_CLICK_TOOL = {
|
|
52
|
+
"name": "browser_click",
|
|
53
|
+
"description": (
|
|
54
|
+
"Click an element on the page. Returns the updated page state after the click. "
|
|
55
|
+
"Use selector_type to choose how to find the element."
|
|
56
|
+
),
|
|
57
|
+
"parameters": {
|
|
58
|
+
"type": "object",
|
|
59
|
+
"properties": {
|
|
60
|
+
"selector": {
|
|
61
|
+
"type": "string",
|
|
62
|
+
"description": "The selector to find the element (CSS selector, visible text, or ARIA role)",
|
|
63
|
+
},
|
|
64
|
+
"selector_type": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"enum": ["css", "text", "role"],
|
|
67
|
+
"description": "How to interpret the selector (default: css)",
|
|
68
|
+
"default": "css",
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
"required": ["selector"],
|
|
72
|
+
},
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
BROWSER_FILL_TOOL = {
|
|
76
|
+
"name": "browser_fill",
|
|
77
|
+
"description": (
|
|
78
|
+
"Type text into a form field. Clears the field first, then types the value. "
|
|
79
|
+
"Use CSS selectors to identify the input field."
|
|
80
|
+
),
|
|
81
|
+
"parameters": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"properties": {
|
|
84
|
+
"selector": {
|
|
85
|
+
"type": "string",
|
|
86
|
+
"description": "CSS selector for the input field (e.g., input[name='email'], #password)",
|
|
87
|
+
},
|
|
88
|
+
"value": {
|
|
89
|
+
"type": "string",
|
|
90
|
+
"description": "The text to type into the field",
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
"required": ["selector", "value"],
|
|
94
|
+
},
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
BROWSER_SCREENSHOT_TOOL = {
|
|
98
|
+
"name": "browser_screenshot",
|
|
99
|
+
"description": (
|
|
100
|
+
"Take a screenshot of the current page. Screenshots are saved as evidence. "
|
|
101
|
+
"Use descriptive names like 'login_page', 'after_xss_injection', 'exploit_confirmed'."
|
|
102
|
+
),
|
|
103
|
+
"parameters": {
|
|
104
|
+
"type": "object",
|
|
105
|
+
"properties": {
|
|
106
|
+
"name": {
|
|
107
|
+
"type": "string",
|
|
108
|
+
"description": "Descriptive name for the screenshot (used in filename)",
|
|
109
|
+
},
|
|
110
|
+
},
|
|
111
|
+
"required": ["name"],
|
|
112
|
+
},
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
BROWSER_GET_CONTENT_TOOL = {
|
|
116
|
+
"name": "browser_get_content",
|
|
117
|
+
"description": (
|
|
118
|
+
"Read the page content — either the full page or a specific element. "
|
|
119
|
+
"Use format 'html' to see raw HTML (useful for checking if XSS payloads are unescaped), "
|
|
120
|
+
"or 'text' for readable text content."
|
|
121
|
+
),
|
|
122
|
+
"parameters": {
|
|
123
|
+
"type": "object",
|
|
124
|
+
"properties": {
|
|
125
|
+
"selector": {
|
|
126
|
+
"type": "string",
|
|
127
|
+
"description": "CSS selector for a specific element (omit for full page)",
|
|
128
|
+
},
|
|
129
|
+
"format": {
|
|
130
|
+
"type": "string",
|
|
131
|
+
"enum": ["text", "html"],
|
|
132
|
+
"description": "Output format: 'text' for readable content, 'html' for raw HTML (default: text)",
|
|
133
|
+
"default": "text",
|
|
134
|
+
},
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
BROWSER_EXECUTE_JS_TOOL = {
|
|
140
|
+
"name": "browser_execute_js",
|
|
141
|
+
"description": (
|
|
142
|
+
"Execute JavaScript in the page context. Use this to inspect the DOM, "
|
|
143
|
+
"check for XSS payload execution, read localStorage, or interact with "
|
|
144
|
+
"the page programmatically. Returns the evaluation result."
|
|
145
|
+
),
|
|
146
|
+
"parameters": {
|
|
147
|
+
"type": "object",
|
|
148
|
+
"properties": {
|
|
149
|
+
"script": {
|
|
150
|
+
"type": "string",
|
|
151
|
+
"description": "JavaScript code to execute (e.g., 'document.title', 'document.cookie', 'document.querySelector(\"#secret\").textContent')",
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
"required": ["script"],
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
BROWSER_WAIT_FOR_TOOL = {
|
|
159
|
+
"name": "browser_wait_for",
|
|
160
|
+
"description": (
|
|
161
|
+
"Wait for an element to appear, disappear, or reach a specific state. "
|
|
162
|
+
"Useful after navigation or form submission when content loads asynchronously."
|
|
163
|
+
),
|
|
164
|
+
"parameters": {
|
|
165
|
+
"type": "object",
|
|
166
|
+
"properties": {
|
|
167
|
+
"selector": {
|
|
168
|
+
"type": "string",
|
|
169
|
+
"description": "CSS selector to wait for",
|
|
170
|
+
},
|
|
171
|
+
"timeout_ms": {
|
|
172
|
+
"type": "integer",
|
|
173
|
+
"description": "Max time to wait in milliseconds (default: 5000)",
|
|
174
|
+
"default": 5000,
|
|
175
|
+
},
|
|
176
|
+
"state": {
|
|
177
|
+
"type": "string",
|
|
178
|
+
"enum": ["visible", "hidden", "attached", "detached"],
|
|
179
|
+
"description": "What state to wait for (default: visible)",
|
|
180
|
+
"default": "visible",
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
"required": ["selector"],
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
BROWSER_GET_COOKIES_TOOL = {
|
|
188
|
+
"name": "browser_get_cookies",
|
|
189
|
+
"description": (
|
|
190
|
+
"Get all cookies for the current page. Useful for inspecting session cookies, "
|
|
191
|
+
"checking HttpOnly/Secure/SameSite flags, and verifying authentication state."
|
|
192
|
+
),
|
|
193
|
+
"parameters": {
|
|
194
|
+
"type": "object",
|
|
195
|
+
"properties": {},
|
|
196
|
+
},
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
BROWSER_SNAPSHOT_TOOL = {
|
|
200
|
+
"name": "browser_snapshot",
|
|
201
|
+
"description": (
|
|
202
|
+
"Tag every interactive element on the current page with a stable @eN ref "
|
|
203
|
+
"and return a compact map. ALWAYS call this BEFORE click/fill — it eliminates "
|
|
204
|
+
"the need to guess CSS selectors. Returns lines like:\n"
|
|
205
|
+
" @e1 <button name='submit'> \"Sign In\"\n"
|
|
206
|
+
" @e2 <input type='email' name='email'>\n"
|
|
207
|
+
"Then use the ref directly: browser_fill(selector='@e2', value='...') "
|
|
208
|
+
"or browser_click(selector='@e1'). Refs persist until the next snapshot or navigation."
|
|
209
|
+
),
|
|
210
|
+
"parameters": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {},
|
|
213
|
+
},
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
REPORT_BROWSER_RESULT_TOOL = {
|
|
217
|
+
"name": "report_browser_result",
|
|
218
|
+
"description": (
|
|
219
|
+
"Report the final result of your browser-based exploit verification. "
|
|
220
|
+
"Call this when you have either confirmed the exploit works "
|
|
221
|
+
"or determined it is not exploitable after multiple attempts."
|
|
222
|
+
),
|
|
223
|
+
"parameters": {
|
|
224
|
+
"type": "object",
|
|
225
|
+
"properties": {
|
|
226
|
+
"status": {
|
|
227
|
+
"type": "string",
|
|
228
|
+
"enum": ["exploitable", "not_exploitable"],
|
|
229
|
+
"description": "Whether the vulnerability was successfully exploited",
|
|
230
|
+
},
|
|
231
|
+
"confidence": {
|
|
232
|
+
"type": "string",
|
|
233
|
+
"enum": ["high", "medium", "low"],
|
|
234
|
+
"description": "Confidence in the result",
|
|
235
|
+
},
|
|
236
|
+
"evidence": {
|
|
237
|
+
"type": "string",
|
|
238
|
+
"description": "Description of what you observed — response data, DOM state, behavior proving exploitation or explaining failure",
|
|
239
|
+
},
|
|
240
|
+
"attempts_made": {
|
|
241
|
+
"type": "integer",
|
|
242
|
+
"description": "How many exploit attempts were made",
|
|
243
|
+
},
|
|
244
|
+
"screenshots": {
|
|
245
|
+
"type": "array",
|
|
246
|
+
"items": {"type": "string"},
|
|
247
|
+
"description": "List of screenshot filenames captured as evidence",
|
|
248
|
+
},
|
|
249
|
+
"dom_evidence": {
|
|
250
|
+
"type": "string",
|
|
251
|
+
"description": "Relevant HTML/DOM snippets proving the exploit (for XSS, injection, etc.)",
|
|
252
|
+
},
|
|
253
|
+
"console_evidence": {
|
|
254
|
+
"type": "string",
|
|
255
|
+
"description": "Relevant browser console output",
|
|
256
|
+
},
|
|
257
|
+
"network_evidence": {
|
|
258
|
+
"type": "string",
|
|
259
|
+
"description": "Relevant network activity (redirects, requests to attacker URLs, etc.)",
|
|
260
|
+
},
|
|
261
|
+
"reason": {
|
|
262
|
+
"type": "string",
|
|
263
|
+
"description": "For not_exploitable: why the exploit cannot work in practice",
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
"required": ["status", "confidence", "evidence", "attempts_made"],
|
|
267
|
+
},
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
BROWSER_TOOLS = [
|
|
272
|
+
BROWSER_NAVIGATE_TOOL,
|
|
273
|
+
BROWSER_SNAPSHOT_TOOL,
|
|
274
|
+
BROWSER_CLICK_TOOL,
|
|
275
|
+
BROWSER_FILL_TOOL,
|
|
276
|
+
BROWSER_SCREENSHOT_TOOL,
|
|
277
|
+
BROWSER_GET_CONTENT_TOOL,
|
|
278
|
+
BROWSER_EXECUTE_JS_TOOL,
|
|
279
|
+
BROWSER_WAIT_FOR_TOOL,
|
|
280
|
+
BROWSER_GET_COOKIES_TOOL,
|
|
281
|
+
REPORT_BROWSER_RESULT_TOOL,
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
class BrowserVerifierAgent(BaseAgent):
|
|
286
|
+
"""Agent that verifies vulnerabilities using a real browser."""
|
|
287
|
+
|
|
288
|
+
name = "browser_verifier"
|
|
289
|
+
description = "Verifying exploit in browser"
|
|
290
|
+
|
|
291
|
+
def __init__(
|
|
292
|
+
self,
|
|
293
|
+
*args,
|
|
294
|
+
sandbox_url: str = "",
|
|
295
|
+
browser_runner: Optional[BrowserRunner] = None,
|
|
296
|
+
sandbox_orchestrator=None,
|
|
297
|
+
finding_index: int = 0,
|
|
298
|
+
max_attempts: int = 7,
|
|
299
|
+
**kwargs,
|
|
300
|
+
):
|
|
301
|
+
super().__init__(*args, **kwargs)
|
|
302
|
+
self.sandbox_url = sandbox_url
|
|
303
|
+
self.browser_runner = browser_runner
|
|
304
|
+
self.sandbox_orchestrator = sandbox_orchestrator
|
|
305
|
+
self.finding_index = finding_index
|
|
306
|
+
self.max_attempts = max_attempts
|
|
307
|
+
self.browser_result: Optional[dict] = None
|
|
308
|
+
self.attempt_count = 0
|
|
309
|
+
self._browser_ctx: Optional[BrowserContext] = None
|
|
310
|
+
|
|
311
|
+
self.name = f"browser_verifier:finding_{finding_index}"
|
|
312
|
+
self.description = f"Browser-verifying finding {finding_index}"
|
|
313
|
+
|
|
314
|
+
def get_system_prompt(self, context: dict) -> str:
|
|
315
|
+
finding = context.get("finding", {})
|
|
316
|
+
project_context = context.get("project_context", {})
|
|
317
|
+
project_context_str = format_project_context(project_context)
|
|
318
|
+
|
|
319
|
+
finding_details = f"""
|
|
320
|
+
### Vulnerability: {finding.get('category', 'Unknown')}
|
|
321
|
+
- **Severity**: {finding.get('severity', 'Unknown')}
|
|
322
|
+
- **File**: {finding.get('file_path', 'Unknown')}
|
|
323
|
+
- **Line**: {finding.get('line_number', 'Unknown')}
|
|
324
|
+
- **Description**: {finding.get('description', 'No description')}
|
|
325
|
+
- **Code**:
|
|
326
|
+
```
|
|
327
|
+
{finding.get('code_snippet', 'No code snippet')}
|
|
328
|
+
```
|
|
329
|
+
- **Original PoC**:
|
|
330
|
+
```
|
|
331
|
+
{finding.get('poc', 'No PoC provided')}
|
|
332
|
+
```
|
|
333
|
+
- **Confidence**: {finding.get('confidence', 'Unknown')}
|
|
334
|
+
- **CVSS Score**: {finding.get('cvss_score', 'N/A')}
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
prompt = BROWSER_VERIFIER_PROMPT.format(
|
|
338
|
+
project_context=project_context_str,
|
|
339
|
+
sandbox_url=self.sandbox_url,
|
|
340
|
+
finding_details=finding_details,
|
|
341
|
+
max_attempts=self.max_attempts,
|
|
342
|
+
)
|
|
343
|
+
prompt += BROWSER_VERIFIER_TOOL_INSTRUCTIONS
|
|
344
|
+
return prompt
|
|
345
|
+
|
|
346
|
+
def get_tools(self) -> list[dict]:
|
|
347
|
+
return super().get_tools() + BROWSER_TOOLS
|
|
348
|
+
|
|
349
|
+
async def _handle_browser_navigate(self, args: dict) -> dict:
|
|
350
|
+
self.attempt_count += 1
|
|
351
|
+
result = await self.browser_runner.navigate(
|
|
352
|
+
self._browser_ctx,
|
|
353
|
+
url=args.get("url", "/"),
|
|
354
|
+
wait_until=args.get("wait_until", "networkidle"),
|
|
355
|
+
)
|
|
356
|
+
return result.to_dict()
|
|
357
|
+
|
|
358
|
+
async def _handle_browser_click(self, args: dict) -> dict:
|
|
359
|
+
result = await self.browser_runner.click(
|
|
360
|
+
self._browser_ctx,
|
|
361
|
+
selector=args.get("selector", ""),
|
|
362
|
+
selector_type=args.get("selector_type", "css"),
|
|
363
|
+
)
|
|
364
|
+
return result.to_dict()
|
|
365
|
+
|
|
366
|
+
async def _handle_browser_fill(self, args: dict) -> dict:
|
|
367
|
+
result = await self.browser_runner.fill(
|
|
368
|
+
self._browser_ctx,
|
|
369
|
+
selector=args.get("selector", ""),
|
|
370
|
+
value=args.get("value", ""),
|
|
371
|
+
)
|
|
372
|
+
return result.to_dict()
|
|
373
|
+
|
|
374
|
+
async def _handle_browser_screenshot(self, args: dict) -> dict:
|
|
375
|
+
return await self.browser_runner.screenshot(
|
|
376
|
+
self._browser_ctx,
|
|
377
|
+
name=args.get("name", "screenshot"),
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
async def _handle_browser_get_content(self, args: dict) -> dict:
|
|
381
|
+
return await self.browser_runner.get_content(
|
|
382
|
+
self._browser_ctx,
|
|
383
|
+
selector=args.get("selector"),
|
|
384
|
+
fmt=args.get("format", "text"),
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
async def _handle_browser_execute_js(self, args: dict) -> dict:
|
|
388
|
+
return await self.browser_runner.execute_js(
|
|
389
|
+
self._browser_ctx,
|
|
390
|
+
script=args.get("script", ""),
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
async def _handle_browser_wait_for(self, args: dict) -> dict:
|
|
394
|
+
return await self.browser_runner.wait_for(
|
|
395
|
+
self._browser_ctx,
|
|
396
|
+
selector=args.get("selector", ""),
|
|
397
|
+
timeout=args.get("timeout_ms", 5000),
|
|
398
|
+
state=args.get("state", "visible"),
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
async def _handle_browser_get_cookies(self, args: dict) -> dict:
|
|
402
|
+
return await self.browser_runner.get_cookies(self._browser_ctx)
|
|
403
|
+
|
|
404
|
+
async def _handle_browser_snapshot(self, args: dict) -> dict:
|
|
405
|
+
return await self.browser_runner.snapshot(self._browser_ctx)
|
|
406
|
+
|
|
407
|
+
def _handle_report_browser_result(self, args: dict) -> dict:
|
|
408
|
+
self.browser_result = {
|
|
409
|
+
"finding_index": self.finding_index,
|
|
410
|
+
"status": args.get("status", "not_exploitable"),
|
|
411
|
+
"confidence": args.get("confidence", "medium"),
|
|
412
|
+
"evidence": args.get("evidence", ""),
|
|
413
|
+
"attempts_made": args.get("attempts_made", self.attempt_count),
|
|
414
|
+
"screenshots": args.get("screenshots", []),
|
|
415
|
+
"dom_evidence": args.get("dom_evidence"),
|
|
416
|
+
"console_evidence": args.get("console_evidence"),
|
|
417
|
+
"network_evidence": args.get("network_evidence"),
|
|
418
|
+
"reason": args.get("reason"),
|
|
419
|
+
}
|
|
420
|
+
return {"status": "recorded", "finding_index": self.finding_index}
|
|
421
|
+
|
|
422
|
+
async def run(self, task: str, context: Optional[dict] = None) -> dict:
|
|
423
|
+
context = context or {}
|
|
424
|
+
self.session.current_agent = self.name
|
|
425
|
+
self.browser_result = None
|
|
426
|
+
self.attempt_count = 0
|
|
427
|
+
|
|
428
|
+
self._browser_ctx = await self.browser_runner.create_context(self.finding_index)
|
|
429
|
+
|
|
430
|
+
try:
|
|
431
|
+
system_prompt = self.get_system_prompt(context)
|
|
432
|
+
self.messages = [Message(role="user", content=task)]
|
|
433
|
+
self._seed_existing_instructions()
|
|
434
|
+
|
|
435
|
+
max_iterations = self.max_attempts * 4
|
|
436
|
+
iteration = 0
|
|
437
|
+
|
|
438
|
+
while iteration < max_iterations:
|
|
439
|
+
if self.session.cancelled:
|
|
440
|
+
break
|
|
441
|
+
iteration += 1
|
|
442
|
+
|
|
443
|
+
self._inject_pending_instructions()
|
|
444
|
+
|
|
445
|
+
response = await self.llm.chat(
|
|
446
|
+
messages=self.messages, tools=self.get_tools(), system=system_prompt,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
self.session.total_cost += response.cost
|
|
450
|
+
if response.usage:
|
|
451
|
+
self.session.total_tokens += response.usage.get("total_tokens", 0)
|
|
452
|
+
self.context_manager.update_usage(response.usage.get("input_tokens", 0))
|
|
453
|
+
|
|
454
|
+
if response.content:
|
|
455
|
+
self.session.add_trace(
|
|
456
|
+
agent=self.name, event_type="thinking", content=response.content,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
if not response.tool_calls:
|
|
460
|
+
return self._build_result(response.content or "")
|
|
461
|
+
|
|
462
|
+
assistant_msg = Message(
|
|
463
|
+
role="assistant", content=response.content,
|
|
464
|
+
tool_calls=[
|
|
465
|
+
{"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": json.dumps(tc.arguments)}}
|
|
466
|
+
for tc in response.tool_calls
|
|
467
|
+
],
|
|
468
|
+
reasoning_content=getattr(response, 'reasoning_content', None),
|
|
469
|
+
)
|
|
470
|
+
self.messages.append(assistant_msg)
|
|
471
|
+
|
|
472
|
+
should_finish = False
|
|
473
|
+
for tool_call in response.tool_calls:
|
|
474
|
+
self.session.add_trace(
|
|
475
|
+
agent=self.name, event_type="tool_call",
|
|
476
|
+
content=f"Calling {tool_call.name}",
|
|
477
|
+
tool_name=tool_call.name, tool_input=tool_call.arguments,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
try:
|
|
481
|
+
if tool_call.name == "browser_navigate":
|
|
482
|
+
result = await self._handle_browser_navigate(tool_call.arguments)
|
|
483
|
+
elif tool_call.name == "browser_click":
|
|
484
|
+
result = await self._handle_browser_click(tool_call.arguments)
|
|
485
|
+
elif tool_call.name == "browser_fill":
|
|
486
|
+
result = await self._handle_browser_fill(tool_call.arguments)
|
|
487
|
+
elif tool_call.name == "browser_screenshot":
|
|
488
|
+
result = await self._handle_browser_screenshot(tool_call.arguments)
|
|
489
|
+
elif tool_call.name == "browser_get_content":
|
|
490
|
+
result = await self._handle_browser_get_content(tool_call.arguments)
|
|
491
|
+
elif tool_call.name == "browser_execute_js":
|
|
492
|
+
result = await self._handle_browser_execute_js(tool_call.arguments)
|
|
493
|
+
elif tool_call.name == "browser_wait_for":
|
|
494
|
+
result = await self._handle_browser_wait_for(tool_call.arguments)
|
|
495
|
+
elif tool_call.name == "browser_get_cookies":
|
|
496
|
+
result = await self._handle_browser_get_cookies(tool_call.arguments)
|
|
497
|
+
elif tool_call.name == "browser_snapshot":
|
|
498
|
+
result = await self._handle_browser_snapshot(tool_call.arguments)
|
|
499
|
+
elif tool_call.name == "report_browser_result":
|
|
500
|
+
result = self._handle_report_browser_result(tool_call.arguments)
|
|
501
|
+
should_finish = True
|
|
502
|
+
else:
|
|
503
|
+
result = self.tools.execute_tool(tool_call.name, tool_call.arguments)
|
|
504
|
+
except Exception as e:
|
|
505
|
+
result = {"error": f"Tool execution failed: {str(e)}"}
|
|
506
|
+
|
|
507
|
+
self.session.add_trace(
|
|
508
|
+
agent=self.name, event_type="tool_result",
|
|
509
|
+
content=f"Result from {tool_call.name}",
|
|
510
|
+
tool_name=tool_call.name, tool_output=result,
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
raw_content = json.dumps(result) if isinstance(result, dict) else str(result)
|
|
514
|
+
truncated_content = self.context_manager.truncate_tool_result(tool_call.name, raw_content)
|
|
515
|
+
tool_result = ToolResult(tool_call_id=tool_call.id, content=truncated_content)
|
|
516
|
+
self.messages.append(tool_result.to_message())
|
|
517
|
+
|
|
518
|
+
if should_finish:
|
|
519
|
+
return self._build_result(response.content or "")
|
|
520
|
+
|
|
521
|
+
if self.context_manager.needs_compaction():
|
|
522
|
+
self.messages = self.context_manager.compact_messages(self.messages)
|
|
523
|
+
logger.info(f"[{self.name}] Compacted message history")
|
|
524
|
+
|
|
525
|
+
if not self.browser_result:
|
|
526
|
+
self.browser_result = self._infer_result_from_trace()
|
|
527
|
+
|
|
528
|
+
return self._build_result("Max iterations reached")
|
|
529
|
+
|
|
530
|
+
finally:
|
|
531
|
+
if self._browser_ctx:
|
|
532
|
+
await self._browser_ctx.close()
|
|
533
|
+
|
|
534
|
+
def _infer_result_from_trace(self) -> dict:
|
|
535
|
+
"""Infer the verdict from the trace when the agent ran out of iterations
|
|
536
|
+
without calling report_browser_result.
|
|
537
|
+
|
|
538
|
+
Looks for evidence patterns that strongly indicate exploitation:
|
|
539
|
+
- browser_fill with payload-looking content
|
|
540
|
+
- browser_click after the fill (form submission)
|
|
541
|
+
- success message in subsequent page content (Saved/Updated/Created/Successful)
|
|
542
|
+
- screenshots taken at the right moments
|
|
543
|
+
"""
|
|
544
|
+
my_trace = [
|
|
545
|
+
t for t in self.session.trace
|
|
546
|
+
if t.agent == self.name and t.event_type in ("tool_call", "tool_result")
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
# Walk the trace and gather signals
|
|
550
|
+
payload_substrings = self._payload_signatures()
|
|
551
|
+
evidence: list[str] = []
|
|
552
|
+
screenshots: list[str] = []
|
|
553
|
+
fill_with_payload = False
|
|
554
|
+
click_after_payload_fill = False
|
|
555
|
+
success_after_payload = False
|
|
556
|
+
navigated_to_external = False
|
|
557
|
+
last_fill_was_payload = False
|
|
558
|
+
|
|
559
|
+
for entry in my_trace:
|
|
560
|
+
tool = entry.tool_name or ""
|
|
561
|
+
args = entry.tool_input or {}
|
|
562
|
+
out = entry.tool_output or {}
|
|
563
|
+
|
|
564
|
+
if tool == "browser_fill" and entry.event_type == "tool_call":
|
|
565
|
+
value = str(args.get("value", ""))
|
|
566
|
+
if any(sig in value for sig in payload_substrings):
|
|
567
|
+
fill_with_payload = True
|
|
568
|
+
last_fill_was_payload = True
|
|
569
|
+
evidence.append(f"browser_fill with payload content: {value[:120]}")
|
|
570
|
+
else:
|
|
571
|
+
last_fill_was_payload = False
|
|
572
|
+
|
|
573
|
+
elif tool == "browser_click" and entry.event_type == "tool_call":
|
|
574
|
+
if last_fill_was_payload:
|
|
575
|
+
click_after_payload_fill = True
|
|
576
|
+
evidence.append(f"browser_click after payload fill: {args.get('selector', '')}")
|
|
577
|
+
|
|
578
|
+
elif tool == "browser_navigate" and entry.event_type == "tool_call":
|
|
579
|
+
url = str(args.get("url", ""))
|
|
580
|
+
if url.startswith("http") and ("evil" in url or "wikipedia.org" in url
|
|
581
|
+
or "example.com" in url or "google.com" in url
|
|
582
|
+
or "169.254.169.254" in url):
|
|
583
|
+
navigated_to_external = True
|
|
584
|
+
evidence.append(f"browser_navigate to external/SSRF target: {url}")
|
|
585
|
+
|
|
586
|
+
elif tool == "browser_screenshot" and entry.event_type == "tool_call":
|
|
587
|
+
screenshots.append(str(args.get("name", "")))
|
|
588
|
+
|
|
589
|
+
elif tool in ("browser_navigate", "browser_click", "browser_get_content",
|
|
590
|
+
"browser_snapshot") and entry.event_type == "tool_result":
|
|
591
|
+
if isinstance(out, dict):
|
|
592
|
+
haystack = (str(out.get("page_content", "")) + " "
|
|
593
|
+
+ str(out.get("snapshot", "")) + " "
|
|
594
|
+
+ str(out.get("content", "")) + " "
|
|
595
|
+
+ str(out.get("page_title", ""))).lower()
|
|
596
|
+
success_markers = (
|
|
597
|
+
"successfully", "updated successfully", "saved successfully",
|
|
598
|
+
"created successfully", "profile updated", "successful",
|
|
599
|
+
"logged in", "welcome back", "dashboard",
|
|
600
|
+
)
|
|
601
|
+
if fill_with_payload and any(m in haystack for m in success_markers):
|
|
602
|
+
success_after_payload = True
|
|
603
|
+
marker = next(m for m in success_markers if m in haystack)
|
|
604
|
+
evidence.append(f"page showed success marker after payload submission: '{marker}'")
|
|
605
|
+
|
|
606
|
+
# Check for redirect chain to attacker-controlled URL
|
|
607
|
+
if (tool == "browser_navigate" and entry.event_type == "tool_result"
|
|
608
|
+
and isinstance(out, dict)):
|
|
609
|
+
page_url = str(out.get("page_url", ""))
|
|
610
|
+
if page_url and not page_url.startswith(self.sandbox_url):
|
|
611
|
+
if "wikipedia.org" in page_url or "evil" in page_url or "google.com" in page_url:
|
|
612
|
+
navigated_to_external = True
|
|
613
|
+
evidence.append(f"page navigated to external URL after redirect: {page_url}")
|
|
614
|
+
|
|
615
|
+
# Decision rules
|
|
616
|
+
if (fill_with_payload and click_after_payload_fill and success_after_payload):
|
|
617
|
+
return {
|
|
618
|
+
"finding_index": self.finding_index,
|
|
619
|
+
"status": "exploitable",
|
|
620
|
+
"confidence": "medium",
|
|
621
|
+
"evidence": (
|
|
622
|
+
"Inferred from trace (agent exhausted iteration budget before reporting). "
|
|
623
|
+
+ " | ".join(evidence[:8])
|
|
624
|
+
),
|
|
625
|
+
"attempts_made": self.attempt_count,
|
|
626
|
+
"screenshots": screenshots,
|
|
627
|
+
"reason": "Trace shows payload was submitted and the app accepted it (success message observed). "
|
|
628
|
+
"DOM-level execution was not separately confirmed.",
|
|
629
|
+
"inferred_from_trace": True,
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
if navigated_to_external:
|
|
633
|
+
return {
|
|
634
|
+
"finding_index": self.finding_index,
|
|
635
|
+
"status": "exploitable",
|
|
636
|
+
"confidence": "medium",
|
|
637
|
+
"evidence": (
|
|
638
|
+
"Inferred from trace (open-redirect-style evidence). "
|
|
639
|
+
+ " | ".join(evidence[:8])
|
|
640
|
+
),
|
|
641
|
+
"attempts_made": self.attempt_count,
|
|
642
|
+
"screenshots": screenshots,
|
|
643
|
+
"reason": "Trace shows the browser was redirected to an external/attacker-controlled URL.",
|
|
644
|
+
"inferred_from_trace": True,
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
return {
|
|
648
|
+
"finding_index": self.finding_index,
|
|
649
|
+
"status": "not_exploitable",
|
|
650
|
+
"confidence": "low",
|
|
651
|
+
"evidence": "Max iterations reached without confirming exploit. Trace did not contain payload-submission + success-marker pattern.",
|
|
652
|
+
"attempts_made": self.attempt_count,
|
|
653
|
+
"screenshots": screenshots,
|
|
654
|
+
"reason": "Agent exhausted iteration budget without conclusive evidence in trace.",
|
|
655
|
+
"inferred_from_trace": True,
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
def _payload_signatures(self) -> list[str]:
|
|
659
|
+
"""Substrings that strongly suggest a fill value is an exploit payload."""
|
|
660
|
+
return [
|
|
661
|
+
"<script", "</script>", "onerror=", "onload=", "onclick=",
|
|
662
|
+
"javascript:", "<img src=x", "<svg",
|
|
663
|
+
"' OR '", "' or '", '" OR "', "1=1", "UNION SELECT", "union select",
|
|
664
|
+
"../", "..\\", "/etc/passwd", "/proc/self",
|
|
665
|
+
"127.0.0.1", "169.254.169.254", "localhost:",
|
|
666
|
+
"${", "{{", "<%",
|
|
667
|
+
"; cat ", "| cat ", "$(", "`",
|
|
668
|
+
]
|
|
669
|
+
|
|
670
|
+
def _build_result(self, summary: str) -> dict:
|
|
671
|
+
return {
|
|
672
|
+
"raw_output": summary,
|
|
673
|
+
"browser_result": self.browser_result,
|
|
674
|
+
"attempts_made": self.attempt_count,
|
|
675
|
+
"type": "browser_verification_complete",
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
def _parse_final_response(self, content: str) -> dict:
|
|
679
|
+
return self._build_result(content)
|