openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,1105 @@
1
+ """
2
+ Coordinator agent that orchestrates the full vulnerability scan pipeline.
3
+ """
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import re
9
+ from typing import Optional
10
+
11
+ from .base import BaseAgent
12
+ from .recon import ReconAgent
13
+ from .hunter_swarm import HunterSwarmAgent
14
+ from .validator_swarm import ValidatorSwarmAgent
15
+ from .hunter import HunterAgent
16
+ from .feature_hunter import FeatureHunterAgent
17
+ from .sandbox_verifier_swarm import SandboxVerifierSwarmAgent
18
+ from .browser_verifier_swarm import BrowserVerifierSwarmAgent
19
+ from .session import Session, Finding, SessionStatus
20
+ from .llm import LLMClient, Message
21
+ from .checkpoint import CheckpointManager
22
+ from openhack.sandbox.orchestrator import SandboxConfig
23
+ from openhack.prompts import COORDINATOR_PROMPT
24
+ from openhack.prompts.feature_hunter import FEATURE_EXTRACTION_PROMPT
25
+ from openhack.prompts.researchers import (
26
+ HARDCODED_RESEARCHERS, C_RESEARCHERS, JAVA_RESEARCHERS,
27
+ DOTNET_RESEARCHERS, RUST_RESEARCHERS, PROTOCOL_RESEARCHERS,
28
+ RESEARCH_MANAGER_PROMPT,
29
+ )
30
+ from openhack.tools.registry import ToolRegistry
31
+ from openhack.tools.coverage import (
32
+ discover_attack_surface,
33
+ compute_coverage,
34
+ enrich_missed_endpoints,
35
+ build_second_pass_tasks,
36
+ build_researcher_zones,
37
+ )
38
+ from openhack.categories import normalize_category, normalize_severity
39
+ from openhack.framework_detection import detect_frameworks
40
+ from openhack.quality import run_quality_gates
41
+ # Static validator removed — line number correction in hunter, semantic validation by LLM
42
+ from openhack.config import settings
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+
47
+ # _RESEARCHER_TASKS removed — now using HARDCODED_RESEARCHERS + manager-written tasks
48
+
49
+
50
+ class CoordinatorAgent(BaseAgent):
51
+ name = "coordinator"
52
+ description = "Orchestrating security scan"
53
+
54
+ def __init__(self, llm: LLMClient, tools: ToolRegistry, session: Session, resume_from: Optional[str] = None):
55
+ super().__init__(llm, tools, session)
56
+ self.context: dict = {}
57
+ self.checkpoint_mgr = CheckpointManager(session.id) if settings.checkpoint_enabled else None
58
+ self.resume_from = resume_from
59
+
60
+ def get_system_prompt(self, context: dict) -> str:
61
+ detected = context.get("detected_frameworks", [])
62
+ if detected:
63
+ fw_names = [f["framework"] for f in detected]
64
+ framework_context = "an application using " + ", ".join(fw_names)
65
+ else:
66
+ framework_context = "an application"
67
+ return COORDINATOR_PROMPT.format(
68
+ framework_context=framework_context,
69
+ context=str(context),
70
+ task="Coordinate the security scan",
71
+ )
72
+
73
+ def _create_llm_for_agent(self, agent_type: str) -> LLMClient:
74
+ model_override = getattr(settings, f"{agent_type}_model_id", None)
75
+ model = model_override or self.llm.model
76
+ return LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
77
+
78
+ @staticmethod
79
+ def _deduplicate_validated(validated, potential_findings):
80
+ if len(validated) <= 1:
81
+ return validated
82
+
83
+ SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
84
+ seen = {}
85
+ for v in validated:
86
+ idx = v.get("original_index")
87
+ if idx is None or idx < 0 or idx >= len(potential_findings):
88
+ continue
89
+ orig = potential_findings[idx]
90
+ file_path = (orig.get("file_path") or "").strip().lower().split(":")[0]
91
+ cat = normalize_category(orig.get("category", "")).lower()
92
+ key = f"{file_path}::{cat}"
93
+
94
+ if key not in seen:
95
+ seen[key] = v
96
+ else:
97
+ existing_idx = seen[key].get("original_index", 0)
98
+ existing_orig = potential_findings[existing_idx] if 0 <= existing_idx < len(potential_findings) else {}
99
+ existing_sev = SEVERITY_ORDER.get((existing_orig.get("severity") or "info").lower(), 4)
100
+ new_sev = SEVERITY_ORDER.get((orig.get("severity") or "info").lower(), 4)
101
+ if (new_sev, -len(orig.get("description") or "")) < (existing_sev, -len(existing_orig.get("description") or "")):
102
+ seen[key] = v
103
+
104
+ return list(seen.values())
105
+
106
+ @staticmethod
107
+ def _cap_findings_per_file(validated, potential_findings, max_per_file=3):
108
+ if len(validated) <= max_per_file:
109
+ return validated
110
+
111
+ SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
112
+ by_file = {}
113
+ for v in validated:
114
+ idx = v.get("original_index")
115
+ if idx is None or idx < 0 or idx >= len(potential_findings):
116
+ continue
117
+ orig = potential_findings[idx]
118
+ file_path = (orig.get("file_path") or "").strip().lower().split(":")[0]
119
+ by_file.setdefault(file_path, []).append(v)
120
+
121
+ result = []
122
+ for file_path, items in by_file.items():
123
+ if len(items) <= max_per_file:
124
+ result.extend(items)
125
+ else:
126
+ items.sort(key=lambda v: SEVERITY_ORDER.get(
127
+ (potential_findings[v["original_index"]].get("severity") or "info").lower(), 4
128
+ ))
129
+ result.extend(items[:max_per_file])
130
+ return result
131
+
132
+ def _build_checkpoint_data(
133
+ self, total_cost: float, total_tokens: int,
134
+ total_input_tokens: int, total_output_tokens: int,
135
+ potential_findings: Optional[list] = None,
136
+ all_files_analyzed: Optional[list] = None,
137
+ ) -> dict:
138
+ """Build a checkpoint data dict from current state."""
139
+ data = {
140
+ "context": self.context,
141
+ "total_cost": total_cost,
142
+ "total_tokens": total_tokens,
143
+ "total_input_tokens": total_input_tokens,
144
+ "total_output_tokens": total_output_tokens,
145
+ "step_costs": dict(self.session.step_costs),
146
+ "step_tokens": dict(self.session.step_tokens),
147
+ "step_input_tokens": dict(self.session.step_input_tokens),
148
+ "step_output_tokens": dict(self.session.step_output_tokens),
149
+ }
150
+ if potential_findings is not None:
151
+ data["potential_findings"] = potential_findings
152
+ if all_files_analyzed is not None:
153
+ data["all_files_analyzed"] = all_files_analyzed
154
+ return data
155
+
156
+ @staticmethod
157
+ def _parse_json_array(raw: Optional[str], label: str = "response") -> list:
158
+ """Extract a JSON array from an LLM response, handling common failures."""
159
+ content = raw or ""
160
+ if "```json" in content:
161
+ content = content.split("```json", 1)[1].split("```", 1)[0]
162
+ elif "```" in content:
163
+ content = content.split("```", 1)[1].split("```", 1)[0]
164
+ content = content.strip()
165
+
166
+ # Direct parse
167
+ try:
168
+ result = json.loads(content)
169
+ if isinstance(result, list):
170
+ return result
171
+ except (json.JSONDecodeError, ValueError):
172
+ pass
173
+
174
+ # Fix common issues: unescaped newlines, trailing commas
175
+ fixed = re.sub(r'(?<!\\)\n', ' ', content)
176
+ fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
177
+ try:
178
+ result = json.loads(fixed)
179
+ if isinstance(result, list):
180
+ return result
181
+ except (json.JSONDecodeError, ValueError):
182
+ pass
183
+
184
+ # Model returned reasoning text with JSON embedded — find the array
185
+ bracket_pos = content.find("[")
186
+ if bracket_pos > 0:
187
+ candidate = content[bracket_pos:]
188
+ depth = 0
189
+ for i, ch in enumerate(candidate):
190
+ if ch == "[":
191
+ depth += 1
192
+ elif ch == "]":
193
+ depth -= 1
194
+ if depth == 0:
195
+ try:
196
+ result = json.loads(candidate[: i + 1])
197
+ if isinstance(result, list):
198
+ return result
199
+ except (json.JSONDecodeError, ValueError):
200
+ break
201
+
202
+ logger.warning(f"Failed to parse {label} JSON: {content[:200]}")
203
+ return []
204
+
205
+ async def _extract_high_risk_features(self, recon_summary: str, attack_surface: Optional[dict] = None) -> list[dict]:
206
+ """Extract high-risk features from recon output via a single LLM call."""
207
+ attack_surface_str = ""
208
+ if attack_surface:
209
+ # Summarize key attack surface info for the extraction prompt
210
+ parts = []
211
+ for key in ("route_handlers", "api_routes", "danger_files"):
212
+ entries = attack_surface.get(key, [])
213
+ if entries:
214
+ files = [e.get("file", "") for e in entries[:20]]
215
+ parts.append(f"{key}: {', '.join(files)}")
216
+ attack_surface_str = "\n".join(parts) if parts else "No attack surface data available."
217
+ else:
218
+ attack_surface_str = "No attack surface data available."
219
+
220
+ # Extract just the high-risk areas and key sections to keep the prompt focused.
221
+ # Full recon summaries can be 10k+ chars which causes some models to return
222
+ # empty or truncated JSON responses.
223
+ condensed = recon_summary
224
+ if len(recon_summary) > 2000:
225
+ sections = []
226
+ for header in ["## High-Risk Areas", "## Application Overview",
227
+ "## Attacker Model Context"]:
228
+ if header in recon_summary:
229
+ start = recon_summary.index(header)
230
+ next_header = recon_summary.find("\n## ", start + len(header))
231
+ end = next_header if next_header != -1 else min(start + 1000, len(recon_summary))
232
+ sections.append(recon_summary[start:end].strip())
233
+ condensed = "\n\n".join(sections) if sections else recon_summary[:2000]
234
+ # Hard cap
235
+ if len(condensed) > 3000:
236
+ condensed = condensed[:3000]
237
+
238
+ prompt = FEATURE_EXTRACTION_PROMPT.format(
239
+ recon_summary=condensed,
240
+ attack_surface=attack_surface_str,
241
+ )
242
+
243
+ llm = LLMClient(
244
+ model=settings.hunter_model_id or self.llm.model,
245
+ temperature=0.0,
246
+ max_tokens=4096,
247
+ provider=self.llm.provider,
248
+ prompt_cache_key=self.llm.prompt_cache_key,
249
+ )
250
+ full_prompt = (
251
+ "You are a security analyst. Extract 3-5 high-risk features from the recon summary below.\n"
252
+ "Return ONLY a valid JSON array. No markdown, no explanation, no code fences.\n"
253
+ "Keep descriptions SHORT (under 20 words each). Keep risk_reason SHORT (under 20 words).\n"
254
+ "entry_files should list 2-3 likely file paths.\n\n"
255
+ + prompt
256
+ )
257
+ response = await llm.chat(
258
+ messages=[Message(role="user", content=full_prompt)],
259
+ tools=[],
260
+ system=(
261
+ "You are a JSON-only responder. Output ONLY a raw JSON array, nothing else. "
262
+ "Do NOT include any reasoning, thinking, preamble, or explanation. "
263
+ "The very first character of your response must be [."
264
+ ),
265
+ )
266
+
267
+ features = self._parse_json_array(response.content, "feature extraction")
268
+
269
+ # Cap to configured max
270
+ features = features[:settings.max_feature_hunters]
271
+
272
+ # Track the extraction cost
273
+ self.session.total_cost += response.cost
274
+ if response.usage:
275
+ self.session.total_tokens += response.usage.get("total_tokens", 0)
276
+
277
+ features = [f if isinstance(f, dict) else {"name": str(f), "description": str(f)} for f in features]
278
+ logger.info(f"Extracted {len(features)} high-risk features: {[f.get('name', '?') for f in features]}")
279
+ return features
280
+
281
+ async def _write_app_specific_researchers(self, recon_summary: str) -> list[dict]:
282
+ """Manager agent: reads recon and writes app-specific researcher tasks."""
283
+ condensed = recon_summary
284
+ if len(recon_summary) > 3000:
285
+ sections = []
286
+ for header in ["## High-Risk Areas", "## Application Overview",
287
+ "## Attacker Model Context"]:
288
+ if header in recon_summary:
289
+ start = recon_summary.index(header)
290
+ next_header = recon_summary.find("\n## ", start + len(header))
291
+ end = next_header if next_header != -1 else min(start + 1000, len(recon_summary))
292
+ sections.append(recon_summary[start:end].strip())
293
+ condensed = "\n\n".join(sections) if sections else recon_summary[:3000]
294
+
295
+ prompt = RESEARCH_MANAGER_PROMPT.format(recon_summary=condensed)
296
+
297
+ llm = LLMClient(
298
+ model=settings.hunter_model_id or self.llm.model,
299
+ temperature=0.0, max_tokens=4096, provider=self.llm.provider,
300
+ prompt_cache_key=self.llm.prompt_cache_key,
301
+ )
302
+ full_prompt = (
303
+ "You are a security research manager. Write 2-3 app-specific researcher tasks. "
304
+ "Return ONLY a valid JSON array. No markdown, no code fences.\n\n" + prompt
305
+ )
306
+ response = await llm.chat(
307
+ messages=[Message(role="user", content=full_prompt)],
308
+ tools=[],
309
+ system=(
310
+ "You are a JSON-only responder. Output ONLY a raw JSON array, nothing else. "
311
+ "Do NOT include any reasoning, thinking, preamble, or explanation. "
312
+ "The very first character of your response must be [."
313
+ ),
314
+ )
315
+
316
+ tasks = self._parse_json_array(response.content, "manager")
317
+
318
+ self.session.total_cost += response.cost
319
+ if response.usage:
320
+ self.session.total_tokens += response.usage.get("total_tokens", 0)
321
+
322
+ logger.info(f"Manager wrote {len(tasks)} app-specific researchers: {[t.get('name', '?') for t in tasks]}")
323
+ return tasks
324
+
325
+ async def _run_feature_deep_dive(self, features: list[dict], context: dict) -> dict: # noqa: C901
326
+ """Spawn feature hunters concurrently and collect their findings.
327
+
328
+ If features is non-empty, spawns one hunter per feature (legacy mode).
329
+ If features is empty, spawns researcher agents that pick their own targets.
330
+
331
+ When zone-scoped mode is active, returns a 'zone_results' list mapping
332
+ each zone to its researcher's findings and analyzed files — used by
333
+ callers (headless_scan) to update ScanSession zone coverage.
334
+ """
335
+ semaphore = asyncio.Semaphore(settings.max_concurrent_feature_hunters)
336
+ total_cost = 0.0
337
+ total_tokens = 0
338
+ total_input_tokens = 0
339
+ total_output_tokens = 0
340
+ zone_map: dict[int, dict] = {} # hunter_id -> zone metadata
341
+
342
+ async def run_hunter(feature: dict = None, hunter_id: int = 0):
343
+ async with semaphore:
344
+ model = settings.feature_hunter_model_id or settings.hunter_model_id or self.llm.model
345
+ llm = LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
346
+ hunter = FeatureHunterAgent(llm, self.tools, self.session, feature=feature, hunter_id=hunter_id)
347
+ name = hunter.name
348
+ try:
349
+ if feature:
350
+ task_text = (
351
+ f"Deep security audit of the {feature['name']} feature. "
352
+ f"Description: {feature.get('description', '')}. "
353
+ f"Risk: {feature.get('risk_reason', '')}."
354
+ )
355
+ else:
356
+ task_text = researcher_tasks.get(hunter_id, list(researcher_tasks.values())[0])
357
+ result = await hunter.run(task_text, context=context)
358
+ return name, result, llm, hunter_id
359
+ except Exception as e:
360
+ logger.error(f"Feature hunter {name} failed: {e}")
361
+ return name, {"findings": [], "files_analyzed": []}, llm, hunter_id
362
+
363
+ if features:
364
+ tasks = [asyncio.create_task(run_hunter(feature=f)) for f in features]
365
+ else:
366
+ researcher_tasks: dict[int, str] = {}
367
+ idx = 0
368
+
369
+ # Try zone-scoped mode for large repos
370
+ attack_surface = context.get("attack_surface") or self.context.get("attack_surface")
371
+ zones = []
372
+ if attack_surface:
373
+ zones = build_researcher_zones(attack_surface, num_zones=settings.max_feature_hunters)
374
+
375
+ if zones:
376
+ # Zone-scoped mode: each researcher gets a dedicated file zone
377
+ total_zone_files = sum(z["file_count"] for z in zones)
378
+ logger.info(f"Zone-scoped researchers: {len(zones)} zones, {total_zone_files} files")
379
+ self.session.add_trace(
380
+ agent="coordinator", event_type="status",
381
+ content=f"Zone-scoped mode: {len(zones)} zones covering {total_zone_files} files",
382
+ )
383
+
384
+ for zone in zones:
385
+ task_text = (
386
+ zone["scope_text"] + "\n\n---\n\n"
387
+ "Hunt for ALL vulnerability types in these files:\n"
388
+ "- Injection (SQL, command, template/SSTI, LDAP)\n"
389
+ "- XSS (stored, reflected, DOM, dangerouslySetInnerHTML, |safe)\n"
390
+ "- SSRF (user-controlled outbound requests, webhooks, URL fetching)\n"
391
+ "- Auth/Authz bypass, IDOR (missing ownership checks), privilege escalation\n"
392
+ "- Path traversal and file inclusion\n"
393
+ "- Data exposure, hardcoded secrets, verbose errors\n"
394
+ "- Business logic flaws, race conditions, non-atomic operations\n"
395
+ "- Framework-specific: ORM escape hatches, unsafe deserialization, mass assignment\n\n"
396
+ "For each file: read it fully, check authorization, trace user input to sinks, "
397
+ "follow imports to understand validation logic, and report confirmed vulnerabilities."
398
+ )
399
+ researcher_tasks[idx] = task_text
400
+ zone_map[idx] = {"name": zone["name"], "file_paths": zone.get("file_paths", set())}
401
+ idx += 1
402
+
403
+ # Fill remaining slots with manager-written app-specific researchers
404
+ recon_summary = context.get("recon", {}).get("summary", "")
405
+ if recon_summary and idx < settings.max_feature_hunters:
406
+ try:
407
+ app_specific = await self._write_app_specific_researchers(recon_summary)
408
+ for task_def in app_specific:
409
+ if isinstance(task_def, dict) and "task" in task_def and idx < settings.max_feature_hunters:
410
+ researcher_tasks[idx] = task_def["task"]
411
+ logger.info(f"Manager-written researcher {idx}: {task_def.get('name', '?')}")
412
+ idx += 1
413
+ except Exception as e:
414
+ logger.warning(f"Manager agent failed: {e}")
415
+ else:
416
+ # Small repo mode: specialization-based researchers
417
+ detected_frameworks = context.get("detected_frameworks", context.get("recon", {}).get("frameworks", []))
418
+ framework_names = set(
419
+ f.get("framework", "")
420
+ for f in (detected_frameworks if isinstance(detected_frameworks, list) else [])
421
+ )
422
+
423
+ if framework_names & {"c", "cpp"}:
424
+ base_researchers = C_RESEARCHERS
425
+ elif framework_names & {"java", "spring", "springboot"}:
426
+ base_researchers = JAVA_RESEARCHERS
427
+ elif framework_names & {"dotnet", "csharp", "aspnet"}:
428
+ base_researchers = DOTNET_RESEARCHERS
429
+ elif framework_names & {"rust"}:
430
+ base_researchers = RUST_RESEARCHERS
431
+ else:
432
+ base_researchers = HARDCODED_RESEARCHERS
433
+
434
+ for name, task_text in base_researchers.items():
435
+ researcher_tasks[idx] = task_text
436
+ idx += 1
437
+
438
+ recon_features = context.get("recon", {}).get("features", {})
439
+ if isinstance(recon_features, dict):
440
+ feature_keys = set(recon_features.keys())
441
+ if "websocket" in feature_keys or any("websocket" in str(v).lower() for v in recon_features.values()):
442
+ researcher_tasks[idx] = PROTOCOL_RESEARCHERS.get("websocket", "")
443
+ idx += 1
444
+ if "grpc" in feature_keys or any("grpc" in str(v).lower() for v in recon_features.values()):
445
+ researcher_tasks[idx] = PROTOCOL_RESEARCHERS.get("grpc", "")
446
+ idx += 1
447
+
448
+ recon_summary = context.get("recon", {}).get("summary", "")
449
+ if recon_summary:
450
+ try:
451
+ app_specific = await self._write_app_specific_researchers(recon_summary)
452
+ for task_def in app_specific:
453
+ if isinstance(task_def, dict) and "task" in task_def:
454
+ researcher_tasks[idx] = task_def["task"]
455
+ logger.info(f"Manager-written researcher {idx}: {task_def.get('name', '?')}")
456
+ idx += 1
457
+ except Exception as e:
458
+ logger.warning(f"Manager agent failed: {e}")
459
+
460
+ num_researchers = min(len(researcher_tasks), settings.max_feature_hunters)
461
+ tasks = [asyncio.create_task(run_hunter(hunter_id=i)) for i in range(num_researchers)]
462
+
463
+ try:
464
+ results = await asyncio.gather(*tasks)
465
+ except asyncio.CancelledError:
466
+ for t in tasks:
467
+ t.cancel()
468
+ await asyncio.gather(*tasks, return_exceptions=True)
469
+ raise
470
+
471
+ all_findings = []
472
+ all_files = set()
473
+ zone_results = []
474
+ for name, result, llm_client, hunter_id in results:
475
+ findings = result.get("findings", [])
476
+ files_analyzed = result.get("files_analyzed", [])
477
+ all_findings.extend(findings)
478
+ all_files.update(files_analyzed)
479
+ total_cost += llm_client.total_cost
480
+ total_tokens += llm_client.total_tokens
481
+ total_input_tokens += llm_client.total_input_tokens
482
+ total_output_tokens += llm_client.total_output_tokens
483
+ logger.info(f"Feature hunter {name}: {len(findings)} findings")
484
+
485
+ if hunter_id in zone_map:
486
+ zone_results.append({
487
+ "zone_name": zone_map[hunter_id]["name"],
488
+ "zone_file_paths": list(zone_map[hunter_id].get("file_paths", [])),
489
+ "files_analyzed": files_analyzed,
490
+ "findings_count": len(findings),
491
+ })
492
+
493
+ # Deduplicate
494
+ all_findings = HunterSwarmAgent._deduplicate_findings(all_findings)
495
+
496
+ result_dict = {
497
+ "findings": all_findings,
498
+ "files_analyzed": sorted(all_files),
499
+ "total_cost": total_cost,
500
+ "total_tokens": total_tokens,
501
+ "total_input_tokens": total_input_tokens,
502
+ "total_output_tokens": total_output_tokens,
503
+ }
504
+ if zone_results:
505
+ result_dict["zone_results"] = zone_results
506
+ return result_dict
507
+
508
+ async def run_full_scan(self) -> dict:
509
+ self.session.status = SessionStatus.RUNNING
510
+
511
+ total_cost = 0.0
512
+ total_tokens = 0
513
+ total_input_tokens = 0
514
+ total_output_tokens = 0
515
+ potential_findings: list = []
516
+ all_files_analyzed: list = []
517
+
518
+ # ── Resume from checkpoint ──────────────────────────────────────
519
+ skip_to: Optional[str] = None
520
+ if self.resume_from and self.checkpoint_mgr:
521
+ checkpoint = self.checkpoint_mgr.load(self.resume_from)
522
+ if checkpoint:
523
+ data = checkpoint["data"]
524
+ self.context = data.get("context", {})
525
+ self.session.context = dict(self.context)
526
+ self.session.restore_from_checkpoint(data)
527
+ total_cost = data.get("total_cost", 0.0)
528
+ total_tokens = data.get("total_tokens", 0)
529
+ total_input_tokens = data.get("total_input_tokens", 0)
530
+ total_output_tokens = data.get("total_output_tokens", 0)
531
+
532
+ if self.resume_from == "recon":
533
+ skip_to = "hunter"
534
+ elif self.resume_from == "hunter":
535
+ skip_to = "validator"
536
+ potential_findings = data.get("potential_findings", [])
537
+ all_files_analyzed = data.get("all_files_analyzed", [])
538
+ elif self.resume_from == "feature_hunt":
539
+ skip_to = "validator"
540
+ potential_findings = data.get("potential_findings", [])
541
+ all_files_analyzed = data.get("all_files_analyzed", [])
542
+
543
+ logger.info(f"Resuming from checkpoint '{self.resume_from}', skipping to: {skip_to}")
544
+ self.session.add_trace(
545
+ agent="coordinator", event_type="resume",
546
+ content={"from_checkpoint": self.resume_from, "skip_to": skip_to},
547
+ )
548
+
549
+ # Project context: always use the current session's value (survives resume)
550
+ if self.session.project_context:
551
+ self.context["project_context"] = self.session.project_context
552
+
553
+ # Framework detection (deterministic) — skip if restored from checkpoint
554
+ if "detected_frameworks" not in self.context:
555
+ detected_frameworks = detect_frameworks(self.tools.fs_tools)
556
+ self.context["detected_frameworks"] = detected_frameworks
557
+ self.session.context["detected_frameworks"] = detected_frameworks
558
+ logger.info(f"Detected frameworks: {[f['framework'] for f in detected_frameworks]}")
559
+
560
+ try:
561
+ # Attack surface discovery (deterministic)
562
+ attack_surface = self.context.get("attack_surface")
563
+ if attack_surface is None:
564
+ try:
565
+ attack_surface = discover_attack_surface(self.tools.fs_tools, nextjs_tools=self.tools.nextjs_tools)
566
+ self.context["attack_surface"] = attack_surface
567
+ logger.info(f"Attack surface: {attack_surface['total_endpoints']} endpoints")
568
+ except Exception as e:
569
+ logger.warning(f"Attack surface discovery failed: {e}")
570
+
571
+ # Step 1: Reconnaissance
572
+ if skip_to is None:
573
+ self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 1: Reconnaissance")
574
+ recon_llm = self._create_llm_for_agent("recon")
575
+ recon_agent = ReconAgent(recon_llm, self.tools, self.session)
576
+ recon_result = await recon_agent.run(
577
+ "Perform reconnaissance on this application. Map out the structure, "
578
+ "identify authentication mechanisms, API surface, and high-risk areas.",
579
+ context=self.context,
580
+ )
581
+ self.context["recon"] = recon_result
582
+ self.session.context["recon"] = recon_result
583
+
584
+ recon_cost = recon_llm.total_cost
585
+ recon_tokens = recon_llm.total_tokens
586
+ self.session.record_step_cost("recon", recon_cost, recon_tokens,
587
+ input_tokens=recon_llm.total_input_tokens, output_tokens=recon_llm.total_output_tokens)
588
+ total_cost += recon_cost
589
+ total_tokens += recon_tokens
590
+ total_input_tokens += recon_llm.total_input_tokens
591
+ total_output_tokens += recon_llm.total_output_tokens
592
+ self.session.total_cost = total_cost
593
+ self.session.total_tokens = total_tokens
594
+ self.session.add_trace(agent="coordinator", event_type="step_complete",
595
+ content={"step": "recon", "cost": recon_cost, "tokens": recon_tokens,
596
+ "input_tokens": recon_llm.total_input_tokens, "output_tokens": recon_llm.total_output_tokens})
597
+
598
+ # Checkpoint: recon complete
599
+ if self.checkpoint_mgr:
600
+ self.checkpoint_mgr.save("recon", self._build_checkpoint_data(
601
+ total_cost, total_tokens, total_input_tokens, total_output_tokens))
602
+
603
+ # Step 2: Hunting (swarm)
604
+ if skip_to in (None, "hunter"):
605
+ self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 2: Hunting (swarm)")
606
+ hunter_llm = self._create_llm_for_agent("hunter")
607
+ hunter_swarm = HunterSwarmAgent(hunter_llm, self.tools, self.session)
608
+ hunter_result = await hunter_swarm.run(
609
+ "Hunt for security vulnerabilities in this application.", context=self.context)
610
+ self.context["hunter"] = hunter_result
611
+ self.session.context["hunter"] = hunter_result
612
+
613
+ hunter_cost = hunter_swarm.total_cost
614
+ hunter_tokens = hunter_swarm.total_tokens
615
+ self.session.record_step_cost("hunter", hunter_cost, hunter_tokens,
616
+ input_tokens=hunter_swarm.total_input_tokens, output_tokens=hunter_swarm.total_output_tokens)
617
+ total_cost += hunter_cost
618
+ total_tokens += hunter_tokens
619
+ total_input_tokens += hunter_swarm.total_input_tokens
620
+ total_output_tokens += hunter_swarm.total_output_tokens
621
+ self.session.total_cost = total_cost
622
+ self.session.total_tokens = total_tokens
623
+
624
+ potential_findings = hunter_result.get("findings", [])
625
+ all_files_analyzed = list(hunter_result.get("files_analyzed", []))
626
+ self.session.add_trace(agent="coordinator", event_type="step_complete",
627
+ content={"step": "hunter_swarm", "cost": hunter_cost, "tokens": hunter_tokens,
628
+ "potential_findings": len(potential_findings)})
629
+
630
+ # Step 2.5: Coverage-guided second pass
631
+ if attack_surface:
632
+ pass1_coverage = compute_coverage(attack_surface, all_files_analyzed)
633
+ missed_endpoints = pass1_coverage.get("missed", [])
634
+
635
+ if missed_endpoints:
636
+ self.session.add_trace(agent="coordinator", event_type="step_start",
637
+ content=f"Step 2.5: Coverage second pass ({len(missed_endpoints)} missed)")
638
+
639
+ enriched = enrich_missed_endpoints(missed_endpoints, self.tools.fs_tools)
640
+ second_pass_tasks = build_second_pass_tasks(enriched)
641
+
642
+ pass2_findings = []
643
+ pass2_files = set()
644
+ pass2_cost = 0.0
645
+ pass2_tokens = 0
646
+ pass2_input = 0
647
+ pass2_output = 0
648
+
649
+ sem = asyncio.Semaphore(settings.max_concurrent_hunters)
650
+
651
+ async def run_pass2(task_text, batch_idx):
652
+ async with sem:
653
+ llm = self._create_llm_for_agent("hunter")
654
+ hunter = HunterAgent(llm, self.tools, self.session,
655
+ vuln_categories=["xss", "injection", "ssrf", "open_redirect", "idor", "auth_bypass"],
656
+ group_name=f"second_pass_{batch_idx}")
657
+ try:
658
+ result = await hunter.run(task_text, self.context)
659
+ return result, hunter.llm
660
+ except Exception as e:
661
+ logger.error(f"Second pass hunter {batch_idx} failed: {e}")
662
+ return {"findings": [], "files_analyzed": []}, hunter.llm
663
+
664
+ pass2_tasks = [
665
+ asyncio.create_task(run_pass2(t, i))
666
+ for i, t in enumerate(second_pass_tasks)
667
+ ]
668
+ try:
669
+ pass2_results = await asyncio.gather(*pass2_tasks)
670
+ except asyncio.CancelledError:
671
+ for t in pass2_tasks:
672
+ t.cancel()
673
+ await asyncio.gather(*pass2_tasks, return_exceptions=True)
674
+ raise
675
+
676
+ for result, llm_client in pass2_results:
677
+ pass2_findings.extend(result.get("findings", []))
678
+ pass2_files.update(result.get("files_analyzed", []))
679
+ pass2_cost += llm_client.total_cost
680
+ pass2_tokens += llm_client.total_tokens
681
+ pass2_input += llm_client.total_input_tokens
682
+ pass2_output += llm_client.total_output_tokens
683
+
684
+ self.session.record_step_cost("hunter_second_pass", pass2_cost, pass2_tokens,
685
+ input_tokens=pass2_input, output_tokens=pass2_output)
686
+ total_cost += pass2_cost
687
+ total_tokens += pass2_tokens
688
+ self.session.total_cost = total_cost
689
+ self.session.total_tokens = total_tokens
690
+
691
+ potential_findings.extend(pass2_findings)
692
+ all_files_analyzed = sorted(set(all_files_analyzed) | pass2_files)
693
+ hunter_result["findings"] = potential_findings
694
+ hunter_result["files_analyzed"] = all_files_analyzed
695
+ self.context["hunter"] = hunter_result
696
+
697
+ self.session.add_trace(agent="coordinator", event_type="step_complete",
698
+ content={"step": "hunter_second_pass", "cost": pass2_cost, "tokens": pass2_tokens,
699
+ "new_findings": len(pass2_findings), "total_findings": len(potential_findings)})
700
+
701
+ # Checkpoint: hunter complete (includes second pass)
702
+ if self.checkpoint_mgr:
703
+ self.checkpoint_mgr.save("hunter", self._build_checkpoint_data(
704
+ total_cost, total_tokens, total_input_tokens, total_output_tokens,
705
+ potential_findings=potential_findings, all_files_analyzed=all_files_analyzed))
706
+
707
+ # Step 2.25: Feature Deep Dive
708
+ if settings.feature_hunt_enabled and skip_to in (None, "hunter"):
709
+ recon_summary = self.context.get("recon", {}).get("summary", "")
710
+ if recon_summary:
711
+ self.session.add_trace(
712
+ agent="coordinator", event_type="step_start",
713
+ content="Step 2.25: Feature deep dive — extracting high-risk features",
714
+ )
715
+
716
+ features = await self._extract_high_risk_features(
717
+ recon_summary, attack_surface,
718
+ )
719
+
720
+ if features:
721
+ self.session.add_trace(
722
+ agent="coordinator", event_type="status",
723
+ content=f"Feature deep dive: {len(features)} features — "
724
+ + ", ".join(f.get("name", "?") for f in features),
725
+ )
726
+ else:
727
+ # Researcher mode: agents pick their own targets
728
+ logger.info("No features extracted — spawning researcher agents")
729
+ self.session.add_trace(
730
+ agent="coordinator", event_type="status",
731
+ content="Feature deep dive: researcher mode — agents pick their own targets",
732
+ )
733
+
734
+ feature_result = await self._run_feature_deep_dive(features, self.context)
735
+
736
+ feature_findings = feature_result.get("findings", [])
737
+ feature_cost = feature_result["total_cost"]
738
+ feature_tokens = feature_result["total_tokens"]
739
+
740
+ self.session.record_step_cost(
741
+ "feature_hunt", feature_cost, feature_tokens,
742
+ input_tokens=feature_result["total_input_tokens"],
743
+ output_tokens=feature_result["total_output_tokens"],
744
+ )
745
+ total_cost += feature_cost
746
+ total_tokens += feature_tokens
747
+ total_input_tokens += feature_result["total_input_tokens"]
748
+ total_output_tokens += feature_result["total_output_tokens"]
749
+ self.session.total_cost = total_cost
750
+ self.session.total_tokens = total_tokens
751
+
752
+ # Merge and deduplicate with category hunter findings
753
+ potential_findings.extend(feature_findings)
754
+ potential_findings = HunterSwarmAgent._deduplicate_findings(potential_findings)
755
+ all_files_analyzed = sorted(
756
+ set(all_files_analyzed) | set(feature_result.get("files_analyzed", []))
757
+ )
758
+
759
+ if "hunter" not in self.context:
760
+ self.context["hunter"] = {}
761
+ self.context["hunter"]["findings"] = potential_findings
762
+ self.context["hunter"]["files_analyzed"] = all_files_analyzed
763
+
764
+ self.session.add_trace(
765
+ agent="coordinator", event_type="step_complete",
766
+ content={
767
+ "step": "feature_hunt",
768
+ "features_analyzed": len(features),
769
+ "new_findings": len(feature_findings),
770
+ "total_findings": len(potential_findings),
771
+ "cost": feature_cost,
772
+ "tokens": feature_tokens,
773
+ },
774
+ )
775
+
776
+ # Checkpoint: feature hunt complete
777
+ if self.checkpoint_mgr:
778
+ self.checkpoint_mgr.save("feature_hunt", self._build_checkpoint_data(
779
+ total_cost, total_tokens, total_input_tokens, total_output_tokens,
780
+ potential_findings=potential_findings, all_files_analyzed=all_files_analyzed))
781
+
782
+ # Pass findings directly to LLM validation (static validator removed —
783
+ # line number correction now happens in _handle_report_finding,
784
+ # and all semantic judgment is left to the LLM validator)
785
+ if "hunter" not in self.context:
786
+ self.context["hunter"] = {}
787
+ self.context["hunter"]["findings"] = potential_findings
788
+
789
+ # Step 3: Validation (swarm)
790
+ if potential_findings:
791
+ self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 3: Validation (swarm)")
792
+ validator_llm = self._create_llm_for_agent("validator")
793
+ validator_swarm = ValidatorSwarmAgent(validator_llm, self.tools, self.session)
794
+ validator_result = await validator_swarm.run(
795
+ "Validate each potential vulnerability.", context=self.context)
796
+ self.context["validator"] = validator_result
797
+
798
+ validator_cost = validator_swarm.total_cost
799
+ validator_tokens = validator_swarm.total_tokens
800
+ self.session.record_step_cost("validator", validator_cost, validator_tokens,
801
+ input_tokens=validator_swarm.total_input_tokens, output_tokens=validator_swarm.total_output_tokens)
802
+ total_cost += validator_cost
803
+ total_tokens += validator_tokens
804
+ self.session.total_cost = total_cost
805
+ self.session.total_tokens = total_tokens
806
+
807
+ validated = validator_result.get("validated_findings", [])
808
+ self.session.add_trace(agent="coordinator", event_type="step_complete",
809
+ content={"step": "validator_swarm", "cost": validator_cost, "tokens": validator_tokens,
810
+ "validated_findings": len(validated)})
811
+
812
+ # Post-processing
813
+ validated = self._deduplicate_validated(validated, potential_findings)
814
+ validated = self._cap_findings_per_file(validated, potential_findings, max_per_file=3)
815
+
816
+ # Severity normalization
817
+ orig_for_norm = []
818
+ for v in validated:
819
+ idx = v.get("original_index")
820
+ if idx is not None and 0 <= idx < len(potential_findings):
821
+ orig_for_norm.append(potential_findings[idx])
822
+ else:
823
+ orig_for_norm.append({})
824
+ normalised = normalize_severity(orig_for_norm)
825
+ for i, v in enumerate(validated):
826
+ idx = v.get("original_index")
827
+ if idx is not None and 0 <= idx < len(potential_findings):
828
+ potential_findings[idx]["severity"] = normalised[i].get("severity", potential_findings[idx].get("severity", "medium"))
829
+
830
+ # Quality gates
831
+ validated, quality_stats = run_quality_gates(validated, potential_findings, fs_tools=self.tools.fs_tools)
832
+
833
+ # Create Finding objects
834
+ for finding_data in validated:
835
+ original_index = finding_data.get("original_index")
836
+ if original_index is None or original_index < 0 or original_index >= len(potential_findings):
837
+ continue
838
+ orig = potential_findings[original_index]
839
+
840
+ finding = Finding(
841
+ category=orig.get("category", "unknown"),
842
+ severity=orig.get("severity", "medium"),
843
+ title=f"{orig.get('category', 'Unknown')} in {orig.get('file_path', 'unknown')}",
844
+ description=orig.get("description", ""),
845
+ file_path=orig.get("file_path", ""),
846
+ line_number=orig.get("line_number"),
847
+ code_snippet=orig.get("code_snippet"),
848
+ poc=finding_data.get("poc"),
849
+ fix=finding_data.get("fix"),
850
+ cvss_score=finding_data.get("cvss_score"),
851
+ confidence=finding_data.get("confidence", "medium"),
852
+ validated=True,
853
+ )
854
+ self.session.add_finding(finding)
855
+ self.session.add_trace(agent="coordinator", event_type="finding_added",
856
+ content={"title": finding.title, "category": finding.category,
857
+ "severity": finding.severity, "file_path": finding.file_path})
858
+ else:
859
+ self.context["validator"] = {"validated_findings": [], "false_positives": []}
860
+
861
+ # Step 4: Sandbox Verification (optional)
862
+ if settings.sandbox_enabled and self.session.findings:
863
+ self.session.add_trace(
864
+ agent="coordinator", event_type="step_start",
865
+ content=f"Step 4: Sandbox verification ({len(self.session.findings)} findings)",
866
+ )
867
+
868
+ # Build confirmed findings list for the sandbox swarm
869
+ confirmed_findings = []
870
+ for finding in self.session.findings:
871
+ confirmed_findings.append({
872
+ "category": finding.category,
873
+ "severity": finding.severity,
874
+ "title": finding.title,
875
+ "description": finding.description,
876
+ "file_path": finding.file_path,
877
+ "line_number": finding.line_number,
878
+ "code_snippet": finding.code_snippet,
879
+ "poc": finding.poc,
880
+ "fix": finding.fix,
881
+ "cvss_score": finding.cvss_score,
882
+ "confidence": finding.confidence,
883
+ })
884
+
885
+ sandbox_config = SandboxConfig(
886
+ health_check_path=settings.sandbox_health_check_path,
887
+ health_check_timeout=settings.sandbox_health_check_timeout,
888
+ teardown_on_complete=settings.sandbox_teardown_on_complete,
889
+ )
890
+
891
+ sandbox_llm = self._create_llm_for_agent("validator")
892
+ sandbox_swarm = SandboxVerifierSwarmAgent(
893
+ sandbox_llm, self.tools, self.session,
894
+ sandbox_config=sandbox_config,
895
+ )
896
+
897
+ sandbox_context = {
898
+ "confirmed_findings": confirmed_findings,
899
+ "project_context": self.context.get("project_context", {}),
900
+ }
901
+
902
+ try:
903
+ sandbox_result = await sandbox_swarm.run(
904
+ "Verify confirmed findings by exploiting them in the sandbox.",
905
+ context=sandbox_context,
906
+ )
907
+ self.context["sandbox_verification"] = sandbox_result
908
+
909
+ sandbox_cost = sandbox_swarm.total_cost
910
+ sandbox_tokens = sandbox_swarm.total_tokens
911
+ self.session.record_step_cost(
912
+ "sandbox_verification", sandbox_cost, sandbox_tokens,
913
+ input_tokens=sandbox_swarm.total_input_tokens,
914
+ output_tokens=sandbox_swarm.total_output_tokens,
915
+ )
916
+ total_cost += sandbox_cost
917
+ total_tokens += sandbox_tokens
918
+ self.session.total_cost = total_cost
919
+ self.session.total_tokens = total_tokens
920
+
921
+ # Update findings with sandbox verification results
922
+ exploitable = sandbox_result.get("exploitable", [])
923
+ not_exploitable = sandbox_result.get("not_exploitable", [])
924
+
925
+ exploitable_indices = {
926
+ r.get("finding_index") for r in exploitable
927
+ }
928
+
929
+ for r in exploitable:
930
+ idx = r.get("finding_index")
931
+ if idx is not None and idx < len(self.session.findings):
932
+ finding = self.session.findings[idx]
933
+ # Upgrade the PoC with the working exploit
934
+ if r.get("working_poc"):
935
+ finding.poc = r["working_poc"]
936
+ finding.validated = True
937
+ finding.source = "sandbox_verified"
938
+
939
+ # Remove findings that couldn't be exploited in sandbox
940
+ if not_exploitable:
941
+ not_exploitable_indices = {
942
+ r.get("finding_index") for r in not_exploitable
943
+ if r.get("confidence") == "high"
944
+ }
945
+ # Only remove high-confidence non-exploitable findings
946
+ self.session.findings = [
947
+ f for i, f in enumerate(self.session.findings)
948
+ if i not in not_exploitable_indices
949
+ ]
950
+
951
+ self.session.add_trace(
952
+ agent="coordinator", event_type="step_complete",
953
+ content={
954
+ "step": "sandbox_verification",
955
+ "exploitable": len(exploitable),
956
+ "not_exploitable": len(not_exploitable),
957
+ "cost": sandbox_cost,
958
+ "tokens": sandbox_tokens,
959
+ },
960
+ )
961
+
962
+ except Exception as e:
963
+ logger.debug(f"Sandbox verification failed: {e}", exc_info=True)
964
+ self.session.add_trace(
965
+ agent="coordinator", event_type="sandbox_error",
966
+ content=f"Sandbox verification failed: {str(e)}. Findings preserved without sandbox verification.",
967
+ )
968
+
969
+ # Step 5: Browser Verification (optional)
970
+ if settings.browser_verification_enabled and self.session.findings:
971
+ self.session.add_trace(
972
+ agent="coordinator", event_type="step_start",
973
+ content=f"Step 5: Browser verification ({len(self.session.findings)} findings)",
974
+ )
975
+
976
+ confirmed_findings = []
977
+ for finding in self.session.findings:
978
+ confirmed_findings.append({
979
+ "category": finding.category,
980
+ "severity": finding.severity,
981
+ "title": finding.title,
982
+ "description": finding.description,
983
+ "file_path": finding.file_path,
984
+ "line_number": finding.line_number,
985
+ "code_snippet": finding.code_snippet,
986
+ "poc": finding.poc,
987
+ "fix": finding.fix,
988
+ "cvss_score": finding.cvss_score,
989
+ "confidence": finding.confidence,
990
+ })
991
+
992
+ sandbox_config = SandboxConfig(
993
+ health_check_path=settings.sandbox_health_check_path,
994
+ health_check_timeout=settings.sandbox_health_check_timeout,
995
+ teardown_on_complete=settings.sandbox_teardown_on_complete,
996
+ )
997
+
998
+ browser_llm = self._create_llm_for_agent("validator")
999
+ browser_swarm = BrowserVerifierSwarmAgent(
1000
+ browser_llm, self.tools, self.session,
1001
+ sandbox_config=sandbox_config,
1002
+ )
1003
+
1004
+ browser_context = {
1005
+ "confirmed_findings": confirmed_findings,
1006
+ "project_context": self.context.get("project_context", {}),
1007
+ }
1008
+
1009
+ try:
1010
+ browser_result = await browser_swarm.run(
1011
+ "Verify confirmed findings using browser-based exploit verification.",
1012
+ context=browser_context,
1013
+ )
1014
+ self.context["browser_verification"] = browser_result
1015
+
1016
+ browser_cost = browser_swarm.total_cost
1017
+ browser_tokens = browser_swarm.total_tokens
1018
+ self.session.record_step_cost(
1019
+ "browser_verification", browser_cost, browser_tokens,
1020
+ input_tokens=browser_swarm.total_input_tokens,
1021
+ output_tokens=browser_swarm.total_output_tokens,
1022
+ )
1023
+ total_cost += browser_cost
1024
+ total_tokens += browser_tokens
1025
+ self.session.total_cost = total_cost
1026
+ self.session.total_tokens = total_tokens
1027
+
1028
+ exploitable = browser_result.get("exploitable", [])
1029
+ not_exploitable = browser_result.get("not_exploitable", [])
1030
+
1031
+ for r in exploitable:
1032
+ idx = r.get("finding_index")
1033
+ if idx is not None and idx < len(self.session.findings):
1034
+ finding = self.session.findings[idx]
1035
+ finding.validated = True
1036
+ finding.source = "browser_verified"
1037
+
1038
+ if not_exploitable:
1039
+ not_exploitable_indices = {
1040
+ r.get("finding_index") for r in not_exploitable
1041
+ if r.get("confidence") == "high"
1042
+ }
1043
+ self.session.findings = [
1044
+ f for i, f in enumerate(self.session.findings)
1045
+ if i not in not_exploitable_indices
1046
+ ]
1047
+
1048
+ self.session.add_trace(
1049
+ agent="coordinator", event_type="step_complete",
1050
+ content={
1051
+ "step": "browser_verification",
1052
+ "exploitable": len(exploitable),
1053
+ "not_exploitable": len(not_exploitable),
1054
+ "evidence_dir": browser_result.get("evidence_dir", ""),
1055
+ "cost": browser_cost,
1056
+ "tokens": browser_tokens,
1057
+ },
1058
+ )
1059
+
1060
+ except ImportError as e:
1061
+ logger.warning(f"Browser verification skipped: {e}")
1062
+ self.session.add_trace(
1063
+ agent="coordinator", event_type="browser_skip",
1064
+ content=f"Browser verification skipped: {str(e)}",
1065
+ )
1066
+ except Exception as e:
1067
+ logger.debug(f"Browser verification failed: {e}", exc_info=True)
1068
+ self.session.add_trace(
1069
+ agent="coordinator", event_type="browser_error",
1070
+ content=f"Browser verification failed: {str(e)}. Findings preserved.",
1071
+ )
1072
+
1073
+ self.session.status = SessionStatus.COMPLETED
1074
+
1075
+ # Clean up checkpoints on successful completion
1076
+ if self.checkpoint_mgr:
1077
+ self.checkpoint_mgr.cleanup()
1078
+
1079
+ coverage_data = None
1080
+ if attack_surface:
1081
+ coverage_data = compute_coverage(attack_surface, all_files_analyzed)
1082
+
1083
+ cost_breakdown = self.session.get_cost_breakdown()
1084
+ self.session.add_trace(agent="coordinator", event_type="scan_complete",
1085
+ content={"findings_count": len(self.session.findings), "total_cost": self.session.total_cost,
1086
+ "total_tokens": self.session.total_tokens, "cost_breakdown": cost_breakdown,
1087
+ "coverage_pct": coverage_data["coverage_pct"] if coverage_data else None})
1088
+
1089
+ return {
1090
+ "status": "completed",
1091
+ "findings": self.session.get_findings_dict(),
1092
+ "context": self.context,
1093
+ "total_cost": self.session.total_cost,
1094
+ "total_tokens": self.session.total_tokens,
1095
+ "cost_breakdown": cost_breakdown,
1096
+ "coverage_data": coverage_data,
1097
+ }
1098
+
1099
+ except Exception as e:
1100
+ self.session.status = SessionStatus.FAILED
1101
+ logger.debug(f"Scan failed: {e}", exc_info=True)
1102
+ raise
1103
+
1104
+ async def run(self, task: str, context: Optional[dict] = None) -> dict:
1105
+ return await self.run_full_scan()