openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,679 @@
1
+ """
2
+ Browser verifier agent.
3
+
4
+ Takes a confirmed finding and drives a real Chromium browser to
5
+ verify the exploit, capturing screenshot evidence along the way.
6
+ Handles login flows, CSRF tokens, and multi-step UI interactions.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from typing import Optional
12
+
13
+ from .base import BaseAgent
14
+ from .llm import Message, ToolResult
15
+ from ..browser.runner import BrowserRunner, BrowserContext
16
+ from openhack.prompts import format_project_context
17
+ from openhack.prompts.browser_verifier import (
18
+ BROWSER_VERIFIER_PROMPT,
19
+ BROWSER_VERIFIER_TOOL_INSTRUCTIONS,
20
+ )
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ # ── Tool definitions for the browser verifier ────────────────────
26
+
27
+ BROWSER_NAVIGATE_TOOL = {
28
+ "name": "browser_navigate",
29
+ "description": (
30
+ "Navigate the browser to a URL. Returns the page title, URL, "
31
+ "and text content after navigation completes."
32
+ ),
33
+ "parameters": {
34
+ "type": "object",
35
+ "properties": {
36
+ "url": {
37
+ "type": "string",
38
+ "description": "URL or path to navigate to (e.g., /login, /api/users). Paths are prefixed with the sandbox base URL.",
39
+ },
40
+ "wait_until": {
41
+ "type": "string",
42
+ "enum": ["load", "domcontentloaded", "networkidle"],
43
+ "description": "When to consider navigation complete (default: networkidle)",
44
+ "default": "networkidle",
45
+ },
46
+ },
47
+ "required": ["url"],
48
+ },
49
+ }
50
+
51
+ BROWSER_CLICK_TOOL = {
52
+ "name": "browser_click",
53
+ "description": (
54
+ "Click an element on the page. Returns the updated page state after the click. "
55
+ "Use selector_type to choose how to find the element."
56
+ ),
57
+ "parameters": {
58
+ "type": "object",
59
+ "properties": {
60
+ "selector": {
61
+ "type": "string",
62
+ "description": "The selector to find the element (CSS selector, visible text, or ARIA role)",
63
+ },
64
+ "selector_type": {
65
+ "type": "string",
66
+ "enum": ["css", "text", "role"],
67
+ "description": "How to interpret the selector (default: css)",
68
+ "default": "css",
69
+ },
70
+ },
71
+ "required": ["selector"],
72
+ },
73
+ }
74
+
75
+ BROWSER_FILL_TOOL = {
76
+ "name": "browser_fill",
77
+ "description": (
78
+ "Type text into a form field. Clears the field first, then types the value. "
79
+ "Use CSS selectors to identify the input field."
80
+ ),
81
+ "parameters": {
82
+ "type": "object",
83
+ "properties": {
84
+ "selector": {
85
+ "type": "string",
86
+ "description": "CSS selector for the input field (e.g., input[name='email'], #password)",
87
+ },
88
+ "value": {
89
+ "type": "string",
90
+ "description": "The text to type into the field",
91
+ },
92
+ },
93
+ "required": ["selector", "value"],
94
+ },
95
+ }
96
+
97
+ BROWSER_SCREENSHOT_TOOL = {
98
+ "name": "browser_screenshot",
99
+ "description": (
100
+ "Take a screenshot of the current page. Screenshots are saved as evidence. "
101
+ "Use descriptive names like 'login_page', 'after_xss_injection', 'exploit_confirmed'."
102
+ ),
103
+ "parameters": {
104
+ "type": "object",
105
+ "properties": {
106
+ "name": {
107
+ "type": "string",
108
+ "description": "Descriptive name for the screenshot (used in filename)",
109
+ },
110
+ },
111
+ "required": ["name"],
112
+ },
113
+ }
114
+
115
+ BROWSER_GET_CONTENT_TOOL = {
116
+ "name": "browser_get_content",
117
+ "description": (
118
+ "Read the page content — either the full page or a specific element. "
119
+ "Use format 'html' to see raw HTML (useful for checking if XSS payloads are unescaped), "
120
+ "or 'text' for readable text content."
121
+ ),
122
+ "parameters": {
123
+ "type": "object",
124
+ "properties": {
125
+ "selector": {
126
+ "type": "string",
127
+ "description": "CSS selector for a specific element (omit for full page)",
128
+ },
129
+ "format": {
130
+ "type": "string",
131
+ "enum": ["text", "html"],
132
+ "description": "Output format: 'text' for readable content, 'html' for raw HTML (default: text)",
133
+ "default": "text",
134
+ },
135
+ },
136
+ },
137
+ }
138
+
139
+ BROWSER_EXECUTE_JS_TOOL = {
140
+ "name": "browser_execute_js",
141
+ "description": (
142
+ "Execute JavaScript in the page context. Use this to inspect the DOM, "
143
+ "check for XSS payload execution, read localStorage, or interact with "
144
+ "the page programmatically. Returns the evaluation result."
145
+ ),
146
+ "parameters": {
147
+ "type": "object",
148
+ "properties": {
149
+ "script": {
150
+ "type": "string",
151
+ "description": "JavaScript code to execute (e.g., 'document.title', 'document.cookie', 'document.querySelector(\"#secret\").textContent')",
152
+ },
153
+ },
154
+ "required": ["script"],
155
+ },
156
+ }
157
+
158
+ BROWSER_WAIT_FOR_TOOL = {
159
+ "name": "browser_wait_for",
160
+ "description": (
161
+ "Wait for an element to appear, disappear, or reach a specific state. "
162
+ "Useful after navigation or form submission when content loads asynchronously."
163
+ ),
164
+ "parameters": {
165
+ "type": "object",
166
+ "properties": {
167
+ "selector": {
168
+ "type": "string",
169
+ "description": "CSS selector to wait for",
170
+ },
171
+ "timeout_ms": {
172
+ "type": "integer",
173
+ "description": "Max time to wait in milliseconds (default: 5000)",
174
+ "default": 5000,
175
+ },
176
+ "state": {
177
+ "type": "string",
178
+ "enum": ["visible", "hidden", "attached", "detached"],
179
+ "description": "What state to wait for (default: visible)",
180
+ "default": "visible",
181
+ },
182
+ },
183
+ "required": ["selector"],
184
+ },
185
+ }
186
+
187
+ BROWSER_GET_COOKIES_TOOL = {
188
+ "name": "browser_get_cookies",
189
+ "description": (
190
+ "Get all cookies for the current page. Useful for inspecting session cookies, "
191
+ "checking HttpOnly/Secure/SameSite flags, and verifying authentication state."
192
+ ),
193
+ "parameters": {
194
+ "type": "object",
195
+ "properties": {},
196
+ },
197
+ }
198
+
199
+ BROWSER_SNAPSHOT_TOOL = {
200
+ "name": "browser_snapshot",
201
+ "description": (
202
+ "Tag every interactive element on the current page with a stable @eN ref "
203
+ "and return a compact map. ALWAYS call this BEFORE click/fill — it eliminates "
204
+ "the need to guess CSS selectors. Returns lines like:\n"
205
+ " @e1 <button name='submit'> \"Sign In\"\n"
206
+ " @e2 <input type='email' name='email'>\n"
207
+ "Then use the ref directly: browser_fill(selector='@e2', value='...') "
208
+ "or browser_click(selector='@e1'). Refs persist until the next snapshot or navigation."
209
+ ),
210
+ "parameters": {
211
+ "type": "object",
212
+ "properties": {},
213
+ },
214
+ }
215
+
216
+ REPORT_BROWSER_RESULT_TOOL = {
217
+ "name": "report_browser_result",
218
+ "description": (
219
+ "Report the final result of your browser-based exploit verification. "
220
+ "Call this when you have either confirmed the exploit works "
221
+ "or determined it is not exploitable after multiple attempts."
222
+ ),
223
+ "parameters": {
224
+ "type": "object",
225
+ "properties": {
226
+ "status": {
227
+ "type": "string",
228
+ "enum": ["exploitable", "not_exploitable"],
229
+ "description": "Whether the vulnerability was successfully exploited",
230
+ },
231
+ "confidence": {
232
+ "type": "string",
233
+ "enum": ["high", "medium", "low"],
234
+ "description": "Confidence in the result",
235
+ },
236
+ "evidence": {
237
+ "type": "string",
238
+ "description": "Description of what you observed — response data, DOM state, behavior proving exploitation or explaining failure",
239
+ },
240
+ "attempts_made": {
241
+ "type": "integer",
242
+ "description": "How many exploit attempts were made",
243
+ },
244
+ "screenshots": {
245
+ "type": "array",
246
+ "items": {"type": "string"},
247
+ "description": "List of screenshot filenames captured as evidence",
248
+ },
249
+ "dom_evidence": {
250
+ "type": "string",
251
+ "description": "Relevant HTML/DOM snippets proving the exploit (for XSS, injection, etc.)",
252
+ },
253
+ "console_evidence": {
254
+ "type": "string",
255
+ "description": "Relevant browser console output",
256
+ },
257
+ "network_evidence": {
258
+ "type": "string",
259
+ "description": "Relevant network activity (redirects, requests to attacker URLs, etc.)",
260
+ },
261
+ "reason": {
262
+ "type": "string",
263
+ "description": "For not_exploitable: why the exploit cannot work in practice",
264
+ },
265
+ },
266
+ "required": ["status", "confidence", "evidence", "attempts_made"],
267
+ },
268
+ }
269
+
270
+
271
+ BROWSER_TOOLS = [
272
+ BROWSER_NAVIGATE_TOOL,
273
+ BROWSER_SNAPSHOT_TOOL,
274
+ BROWSER_CLICK_TOOL,
275
+ BROWSER_FILL_TOOL,
276
+ BROWSER_SCREENSHOT_TOOL,
277
+ BROWSER_GET_CONTENT_TOOL,
278
+ BROWSER_EXECUTE_JS_TOOL,
279
+ BROWSER_WAIT_FOR_TOOL,
280
+ BROWSER_GET_COOKIES_TOOL,
281
+ REPORT_BROWSER_RESULT_TOOL,
282
+ ]
283
+
284
+
285
+ class BrowserVerifierAgent(BaseAgent):
286
+ """Agent that verifies vulnerabilities using a real browser."""
287
+
288
+ name = "browser_verifier"
289
+ description = "Verifying exploit in browser"
290
+
291
+ def __init__(
292
+ self,
293
+ *args,
294
+ sandbox_url: str = "",
295
+ browser_runner: Optional[BrowserRunner] = None,
296
+ sandbox_orchestrator=None,
297
+ finding_index: int = 0,
298
+ max_attempts: int = 7,
299
+ **kwargs,
300
+ ):
301
+ super().__init__(*args, **kwargs)
302
+ self.sandbox_url = sandbox_url
303
+ self.browser_runner = browser_runner
304
+ self.sandbox_orchestrator = sandbox_orchestrator
305
+ self.finding_index = finding_index
306
+ self.max_attempts = max_attempts
307
+ self.browser_result: Optional[dict] = None
308
+ self.attempt_count = 0
309
+ self._browser_ctx: Optional[BrowserContext] = None
310
+
311
+ self.name = f"browser_verifier:finding_{finding_index}"
312
+ self.description = f"Browser-verifying finding {finding_index}"
313
+
314
+ def get_system_prompt(self, context: dict) -> str:
315
+ finding = context.get("finding", {})
316
+ project_context = context.get("project_context", {})
317
+ project_context_str = format_project_context(project_context)
318
+
319
+ finding_details = f"""
320
+ ### Vulnerability: {finding.get('category', 'Unknown')}
321
+ - **Severity**: {finding.get('severity', 'Unknown')}
322
+ - **File**: {finding.get('file_path', 'Unknown')}
323
+ - **Line**: {finding.get('line_number', 'Unknown')}
324
+ - **Description**: {finding.get('description', 'No description')}
325
+ - **Code**:
326
+ ```
327
+ {finding.get('code_snippet', 'No code snippet')}
328
+ ```
329
+ - **Original PoC**:
330
+ ```
331
+ {finding.get('poc', 'No PoC provided')}
332
+ ```
333
+ - **Confidence**: {finding.get('confidence', 'Unknown')}
334
+ - **CVSS Score**: {finding.get('cvss_score', 'N/A')}
335
+ """
336
+
337
+ prompt = BROWSER_VERIFIER_PROMPT.format(
338
+ project_context=project_context_str,
339
+ sandbox_url=self.sandbox_url,
340
+ finding_details=finding_details,
341
+ max_attempts=self.max_attempts,
342
+ )
343
+ prompt += BROWSER_VERIFIER_TOOL_INSTRUCTIONS
344
+ return prompt
345
+
346
+ def get_tools(self) -> list[dict]:
347
+ return super().get_tools() + BROWSER_TOOLS
348
+
349
+ async def _handle_browser_navigate(self, args: dict) -> dict:
350
+ self.attempt_count += 1
351
+ result = await self.browser_runner.navigate(
352
+ self._browser_ctx,
353
+ url=args.get("url", "/"),
354
+ wait_until=args.get("wait_until", "networkidle"),
355
+ )
356
+ return result.to_dict()
357
+
358
+ async def _handle_browser_click(self, args: dict) -> dict:
359
+ result = await self.browser_runner.click(
360
+ self._browser_ctx,
361
+ selector=args.get("selector", ""),
362
+ selector_type=args.get("selector_type", "css"),
363
+ )
364
+ return result.to_dict()
365
+
366
+ async def _handle_browser_fill(self, args: dict) -> dict:
367
+ result = await self.browser_runner.fill(
368
+ self._browser_ctx,
369
+ selector=args.get("selector", ""),
370
+ value=args.get("value", ""),
371
+ )
372
+ return result.to_dict()
373
+
374
+ async def _handle_browser_screenshot(self, args: dict) -> dict:
375
+ return await self.browser_runner.screenshot(
376
+ self._browser_ctx,
377
+ name=args.get("name", "screenshot"),
378
+ )
379
+
380
+ async def _handle_browser_get_content(self, args: dict) -> dict:
381
+ return await self.browser_runner.get_content(
382
+ self._browser_ctx,
383
+ selector=args.get("selector"),
384
+ fmt=args.get("format", "text"),
385
+ )
386
+
387
+ async def _handle_browser_execute_js(self, args: dict) -> dict:
388
+ return await self.browser_runner.execute_js(
389
+ self._browser_ctx,
390
+ script=args.get("script", ""),
391
+ )
392
+
393
+ async def _handle_browser_wait_for(self, args: dict) -> dict:
394
+ return await self.browser_runner.wait_for(
395
+ self._browser_ctx,
396
+ selector=args.get("selector", ""),
397
+ timeout=args.get("timeout_ms", 5000),
398
+ state=args.get("state", "visible"),
399
+ )
400
+
401
+ async def _handle_browser_get_cookies(self, args: dict) -> dict:
402
+ return await self.browser_runner.get_cookies(self._browser_ctx)
403
+
404
+ async def _handle_browser_snapshot(self, args: dict) -> dict:
405
+ return await self.browser_runner.snapshot(self._browser_ctx)
406
+
407
+ def _handle_report_browser_result(self, args: dict) -> dict:
408
+ self.browser_result = {
409
+ "finding_index": self.finding_index,
410
+ "status": args.get("status", "not_exploitable"),
411
+ "confidence": args.get("confidence", "medium"),
412
+ "evidence": args.get("evidence", ""),
413
+ "attempts_made": args.get("attempts_made", self.attempt_count),
414
+ "screenshots": args.get("screenshots", []),
415
+ "dom_evidence": args.get("dom_evidence"),
416
+ "console_evidence": args.get("console_evidence"),
417
+ "network_evidence": args.get("network_evidence"),
418
+ "reason": args.get("reason"),
419
+ }
420
+ return {"status": "recorded", "finding_index": self.finding_index}
421
+
422
+ async def run(self, task: str, context: Optional[dict] = None) -> dict:
423
+ context = context or {}
424
+ self.session.current_agent = self.name
425
+ self.browser_result = None
426
+ self.attempt_count = 0
427
+
428
+ self._browser_ctx = await self.browser_runner.create_context(self.finding_index)
429
+
430
+ try:
431
+ system_prompt = self.get_system_prompt(context)
432
+ self.messages = [Message(role="user", content=task)]
433
+ self._seed_existing_instructions()
434
+
435
+ max_iterations = self.max_attempts * 4
436
+ iteration = 0
437
+
438
+ while iteration < max_iterations:
439
+ if self.session.cancelled:
440
+ break
441
+ iteration += 1
442
+
443
+ self._inject_pending_instructions()
444
+
445
+ response = await self.llm.chat(
446
+ messages=self.messages, tools=self.get_tools(), system=system_prompt,
447
+ )
448
+
449
+ self.session.total_cost += response.cost
450
+ if response.usage:
451
+ self.session.total_tokens += response.usage.get("total_tokens", 0)
452
+ self.context_manager.update_usage(response.usage.get("input_tokens", 0))
453
+
454
+ if response.content:
455
+ self.session.add_trace(
456
+ agent=self.name, event_type="thinking", content=response.content,
457
+ )
458
+
459
+ if not response.tool_calls:
460
+ return self._build_result(response.content or "")
461
+
462
+ assistant_msg = Message(
463
+ role="assistant", content=response.content,
464
+ tool_calls=[
465
+ {"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": json.dumps(tc.arguments)}}
466
+ for tc in response.tool_calls
467
+ ],
468
+ reasoning_content=getattr(response, 'reasoning_content', None),
469
+ )
470
+ self.messages.append(assistant_msg)
471
+
472
+ should_finish = False
473
+ for tool_call in response.tool_calls:
474
+ self.session.add_trace(
475
+ agent=self.name, event_type="tool_call",
476
+ content=f"Calling {tool_call.name}",
477
+ tool_name=tool_call.name, tool_input=tool_call.arguments,
478
+ )
479
+
480
+ try:
481
+ if tool_call.name == "browser_navigate":
482
+ result = await self._handle_browser_navigate(tool_call.arguments)
483
+ elif tool_call.name == "browser_click":
484
+ result = await self._handle_browser_click(tool_call.arguments)
485
+ elif tool_call.name == "browser_fill":
486
+ result = await self._handle_browser_fill(tool_call.arguments)
487
+ elif tool_call.name == "browser_screenshot":
488
+ result = await self._handle_browser_screenshot(tool_call.arguments)
489
+ elif tool_call.name == "browser_get_content":
490
+ result = await self._handle_browser_get_content(tool_call.arguments)
491
+ elif tool_call.name == "browser_execute_js":
492
+ result = await self._handle_browser_execute_js(tool_call.arguments)
493
+ elif tool_call.name == "browser_wait_for":
494
+ result = await self._handle_browser_wait_for(tool_call.arguments)
495
+ elif tool_call.name == "browser_get_cookies":
496
+ result = await self._handle_browser_get_cookies(tool_call.arguments)
497
+ elif tool_call.name == "browser_snapshot":
498
+ result = await self._handle_browser_snapshot(tool_call.arguments)
499
+ elif tool_call.name == "report_browser_result":
500
+ result = self._handle_report_browser_result(tool_call.arguments)
501
+ should_finish = True
502
+ else:
503
+ result = self.tools.execute_tool(tool_call.name, tool_call.arguments)
504
+ except Exception as e:
505
+ result = {"error": f"Tool execution failed: {str(e)}"}
506
+
507
+ self.session.add_trace(
508
+ agent=self.name, event_type="tool_result",
509
+ content=f"Result from {tool_call.name}",
510
+ tool_name=tool_call.name, tool_output=result,
511
+ )
512
+
513
+ raw_content = json.dumps(result) if isinstance(result, dict) else str(result)
514
+ truncated_content = self.context_manager.truncate_tool_result(tool_call.name, raw_content)
515
+ tool_result = ToolResult(tool_call_id=tool_call.id, content=truncated_content)
516
+ self.messages.append(tool_result.to_message())
517
+
518
+ if should_finish:
519
+ return self._build_result(response.content or "")
520
+
521
+ if self.context_manager.needs_compaction():
522
+ self.messages = self.context_manager.compact_messages(self.messages)
523
+ logger.info(f"[{self.name}] Compacted message history")
524
+
525
+ if not self.browser_result:
526
+ self.browser_result = self._infer_result_from_trace()
527
+
528
+ return self._build_result("Max iterations reached")
529
+
530
+ finally:
531
+ if self._browser_ctx:
532
+ await self._browser_ctx.close()
533
+
534
+ def _infer_result_from_trace(self) -> dict:
535
+ """Infer the verdict from the trace when the agent ran out of iterations
536
+ without calling report_browser_result.
537
+
538
+ Looks for evidence patterns that strongly indicate exploitation:
539
+ - browser_fill with payload-looking content
540
+ - browser_click after the fill (form submission)
541
+ - success message in subsequent page content (Saved/Updated/Created/Successful)
542
+ - screenshots taken at the right moments
543
+ """
544
+ my_trace = [
545
+ t for t in self.session.trace
546
+ if t.agent == self.name and t.event_type in ("tool_call", "tool_result")
547
+ ]
548
+
549
+ # Walk the trace and gather signals
550
+ payload_substrings = self._payload_signatures()
551
+ evidence: list[str] = []
552
+ screenshots: list[str] = []
553
+ fill_with_payload = False
554
+ click_after_payload_fill = False
555
+ success_after_payload = False
556
+ navigated_to_external = False
557
+ last_fill_was_payload = False
558
+
559
+ for entry in my_trace:
560
+ tool = entry.tool_name or ""
561
+ args = entry.tool_input or {}
562
+ out = entry.tool_output or {}
563
+
564
+ if tool == "browser_fill" and entry.event_type == "tool_call":
565
+ value = str(args.get("value", ""))
566
+ if any(sig in value for sig in payload_substrings):
567
+ fill_with_payload = True
568
+ last_fill_was_payload = True
569
+ evidence.append(f"browser_fill with payload content: {value[:120]}")
570
+ else:
571
+ last_fill_was_payload = False
572
+
573
+ elif tool == "browser_click" and entry.event_type == "tool_call":
574
+ if last_fill_was_payload:
575
+ click_after_payload_fill = True
576
+ evidence.append(f"browser_click after payload fill: {args.get('selector', '')}")
577
+
578
+ elif tool == "browser_navigate" and entry.event_type == "tool_call":
579
+ url = str(args.get("url", ""))
580
+ if url.startswith("http") and ("evil" in url or "wikipedia.org" in url
581
+ or "example.com" in url or "google.com" in url
582
+ or "169.254.169.254" in url):
583
+ navigated_to_external = True
584
+ evidence.append(f"browser_navigate to external/SSRF target: {url}")
585
+
586
+ elif tool == "browser_screenshot" and entry.event_type == "tool_call":
587
+ screenshots.append(str(args.get("name", "")))
588
+
589
+ elif tool in ("browser_navigate", "browser_click", "browser_get_content",
590
+ "browser_snapshot") and entry.event_type == "tool_result":
591
+ if isinstance(out, dict):
592
+ haystack = (str(out.get("page_content", "")) + " "
593
+ + str(out.get("snapshot", "")) + " "
594
+ + str(out.get("content", "")) + " "
595
+ + str(out.get("page_title", ""))).lower()
596
+ success_markers = (
597
+ "successfully", "updated successfully", "saved successfully",
598
+ "created successfully", "profile updated", "successful",
599
+ "logged in", "welcome back", "dashboard",
600
+ )
601
+ if fill_with_payload and any(m in haystack for m in success_markers):
602
+ success_after_payload = True
603
+ marker = next(m for m in success_markers if m in haystack)
604
+ evidence.append(f"page showed success marker after payload submission: '{marker}'")
605
+
606
+ # Check for redirect chain to attacker-controlled URL
607
+ if (tool == "browser_navigate" and entry.event_type == "tool_result"
608
+ and isinstance(out, dict)):
609
+ page_url = str(out.get("page_url", ""))
610
+ if page_url and not page_url.startswith(self.sandbox_url):
611
+ if "wikipedia.org" in page_url or "evil" in page_url or "google.com" in page_url:
612
+ navigated_to_external = True
613
+ evidence.append(f"page navigated to external URL after redirect: {page_url}")
614
+
615
+ # Decision rules
616
+ if (fill_with_payload and click_after_payload_fill and success_after_payload):
617
+ return {
618
+ "finding_index": self.finding_index,
619
+ "status": "exploitable",
620
+ "confidence": "medium",
621
+ "evidence": (
622
+ "Inferred from trace (agent exhausted iteration budget before reporting). "
623
+ + " | ".join(evidence[:8])
624
+ ),
625
+ "attempts_made": self.attempt_count,
626
+ "screenshots": screenshots,
627
+ "reason": "Trace shows payload was submitted and the app accepted it (success message observed). "
628
+ "DOM-level execution was not separately confirmed.",
629
+ "inferred_from_trace": True,
630
+ }
631
+
632
+ if navigated_to_external:
633
+ return {
634
+ "finding_index": self.finding_index,
635
+ "status": "exploitable",
636
+ "confidence": "medium",
637
+ "evidence": (
638
+ "Inferred from trace (open-redirect-style evidence). "
639
+ + " | ".join(evidence[:8])
640
+ ),
641
+ "attempts_made": self.attempt_count,
642
+ "screenshots": screenshots,
643
+ "reason": "Trace shows the browser was redirected to an external/attacker-controlled URL.",
644
+ "inferred_from_trace": True,
645
+ }
646
+
647
+ return {
648
+ "finding_index": self.finding_index,
649
+ "status": "not_exploitable",
650
+ "confidence": "low",
651
+ "evidence": "Max iterations reached without confirming exploit. Trace did not contain payload-submission + success-marker pattern.",
652
+ "attempts_made": self.attempt_count,
653
+ "screenshots": screenshots,
654
+ "reason": "Agent exhausted iteration budget without conclusive evidence in trace.",
655
+ "inferred_from_trace": True,
656
+ }
657
+
658
+ def _payload_signatures(self) -> list[str]:
659
+ """Substrings that strongly suggest a fill value is an exploit payload."""
660
+ return [
661
+ "<script", "</script>", "onerror=", "onload=", "onclick=",
662
+ "javascript:", "<img src=x", "<svg",
663
+ "' OR '", "' or '", '" OR "', "1=1", "UNION SELECT", "union select",
664
+ "../", "..\\", "/etc/passwd", "/proc/self",
665
+ "127.0.0.1", "169.254.169.254", "localhost:",
666
+ "${", "{{", "<%",
667
+ "; cat ", "| cat ", "$(", "`",
668
+ ]
669
+
670
+ def _build_result(self, summary: str) -> dict:
671
+ return {
672
+ "raw_output": summary,
673
+ "browser_result": self.browser_result,
674
+ "attempts_made": self.attempt_count,
675
+ "type": "browser_verification_complete",
676
+ }
677
+
678
+ def _parse_final_response(self, content: str) -> dict:
679
+ return self._build_result(content)