openhack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhack/__init__.py +2 -0
- openhack/__main__.py +225 -0
- openhack/agents/__init__.py +30 -0
- openhack/agents/base.py +230 -0
- openhack/agents/browser_verifier.py +679 -0
- openhack/agents/browser_verifier_swarm.py +256 -0
- openhack/agents/checkpoint.py +89 -0
- openhack/agents/context_manager.py +356 -0
- openhack/agents/coordinator.py +1105 -0
- openhack/agents/endpoint_analyst.py +307 -0
- openhack/agents/feature_hunter.py +93 -0
- openhack/agents/hunter.py +481 -0
- openhack/agents/hunter_swarm.py +385 -0
- openhack/agents/llm.py +334 -0
- openhack/agents/recon.py +19 -0
- openhack/agents/sandbox_verifier.py +396 -0
- openhack/agents/sandbox_verifier_swarm.py +250 -0
- openhack/agents/session.py +286 -0
- openhack/agents/validator.py +217 -0
- openhack/agents/validator_swarm.py +106 -0
- openhack/auth.py +175 -0
- openhack/browser/__init__.py +12 -0
- openhack/browser/runner.py +385 -0
- openhack/categories.py +130 -0
- openhack/config.py +201 -0
- openhack/deterministic_recon.py +464 -0
- openhack/entry_points.py +745 -0
- openhack/framework_classifier.py +515 -0
- openhack/framework_detection.py +269 -0
- openhack/headless_scan.py +179 -0
- openhack/prompts/__init__.py +108 -0
- openhack/prompts/browser_verifier.py +171 -0
- openhack/prompts/coordinator.py +31 -0
- openhack/prompts/django/__init__.py +32 -0
- openhack/prompts/django/auth_bypass.py +76 -0
- openhack/prompts/django/csrf.py +62 -0
- openhack/prompts/django/data_exposure.py +67 -0
- openhack/prompts/django/idor.py +74 -0
- openhack/prompts/django/injection.py +67 -0
- openhack/prompts/django/misconfiguration.py +70 -0
- openhack/prompts/django/ssrf.py +64 -0
- openhack/prompts/endpoint_analyst.py +122 -0
- openhack/prompts/express/__init__.py +29 -0
- openhack/prompts/express/auth_bypass.py +71 -0
- openhack/prompts/express/data_exposure.py +77 -0
- openhack/prompts/express/idor.py +69 -0
- openhack/prompts/express/injection.py +75 -0
- openhack/prompts/express/misconfiguration.py +72 -0
- openhack/prompts/express/ssrf.py +63 -0
- openhack/prompts/feature_hunter.py +140 -0
- openhack/prompts/flask/__init__.py +29 -0
- openhack/prompts/flask/auth_bypass.py +86 -0
- openhack/prompts/flask/data_exposure.py +78 -0
- openhack/prompts/flask/idor.py +83 -0
- openhack/prompts/flask/injection.py +77 -0
- openhack/prompts/flask/misconfiguration.py +73 -0
- openhack/prompts/flask/ssrf.py +65 -0
- openhack/prompts/hunter.py +362 -0
- openhack/prompts/hunter_continuation_loop.py +12 -0
- openhack/prompts/hunter_continuation_no_findings.py +19 -0
- openhack/prompts/hunter_continuation_no_progress.py +22 -0
- openhack/prompts/hunter_tool_instructions.py +55 -0
- openhack/prompts/nextjs/__init__.py +42 -0
- openhack/prompts/nextjs/auth_bypass.py +80 -0
- openhack/prompts/nextjs/csrf.py +71 -0
- openhack/prompts/nextjs/data_exposure.py +88 -0
- openhack/prompts/nextjs/idor.py +64 -0
- openhack/prompts/nextjs/injection.py +65 -0
- openhack/prompts/nextjs/middleware_bypass.py +75 -0
- openhack/prompts/nextjs/misconfiguration.py +92 -0
- openhack/prompts/nextjs/server_actions.py +97 -0
- openhack/prompts/nextjs/ssrf.py +66 -0
- openhack/prompts/nextjs/xss.py +69 -0
- openhack/prompts/pr_analysis_system.py +80 -0
- openhack/prompts/pr_analysis_user.py +11 -0
- openhack/prompts/project_context.py +89 -0
- openhack/prompts/recon.py +199 -0
- openhack/prompts/reporter.py +88 -0
- openhack/prompts/researchers.py +434 -0
- openhack/prompts/sandbox_verifier.py +128 -0
- openhack/prompts/supabase/__init__.py +39 -0
- openhack/prompts/supabase/auth_tokens.py +131 -0
- openhack/prompts/supabase/edge_functions.py +150 -0
- openhack/prompts/supabase/graphql.py +102 -0
- openhack/prompts/supabase/postgrest.py +99 -0
- openhack/prompts/supabase/realtime.py +93 -0
- openhack/prompts/supabase/rls.py +110 -0
- openhack/prompts/supabase/rpc_functions.py +127 -0
- openhack/prompts/supabase/storage.py +110 -0
- openhack/prompts/supabase/tenant_isolation.py +118 -0
- openhack/prompts/validator.py +319 -0
- openhack/prompts/validator_continuation_incomplete.py +12 -0
- openhack/prompts/validator_tool_instructions.py +29 -0
- openhack/quality.py +231 -0
- openhack/sandbox/__init__.py +12 -0
- openhack/sandbox/orchestrator.py +517 -0
- openhack/sandbox/runner.py +177 -0
- openhack/scan_session.py +245 -0
- openhack/setup.py +452 -0
- openhack/static_validator.py +612 -0
- openhack/tools/__init__.py +1 -0
- openhack/tools/ast_tools.py +307 -0
- openhack/tools/coverage.py +1078 -0
- openhack/tools/filesystem.py +404 -0
- openhack/tools/nextjs.py +258 -0
- openhack/tools/registry.py +52 -0
- openhack/tui.py +3450 -0
- openhack/updates.py +170 -0
- openhack-0.1.0.dist-info/METADATA +189 -0
- openhack-0.1.0.dist-info/RECORD +113 -0
- openhack-0.1.0.dist-info/WHEEL +4 -0
- openhack-0.1.0.dist-info/entry_points.txt +2 -0
- openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,1105 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Coordinator agent that orchestrates the full vulnerability scan pipeline.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import re
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from .base import BaseAgent
|
|
12
|
+
from .recon import ReconAgent
|
|
13
|
+
from .hunter_swarm import HunterSwarmAgent
|
|
14
|
+
from .validator_swarm import ValidatorSwarmAgent
|
|
15
|
+
from .hunter import HunterAgent
|
|
16
|
+
from .feature_hunter import FeatureHunterAgent
|
|
17
|
+
from .sandbox_verifier_swarm import SandboxVerifierSwarmAgent
|
|
18
|
+
from .browser_verifier_swarm import BrowserVerifierSwarmAgent
|
|
19
|
+
from .session import Session, Finding, SessionStatus
|
|
20
|
+
from .llm import LLMClient, Message
|
|
21
|
+
from .checkpoint import CheckpointManager
|
|
22
|
+
from openhack.sandbox.orchestrator import SandboxConfig
|
|
23
|
+
from openhack.prompts import COORDINATOR_PROMPT
|
|
24
|
+
from openhack.prompts.feature_hunter import FEATURE_EXTRACTION_PROMPT
|
|
25
|
+
from openhack.prompts.researchers import (
|
|
26
|
+
HARDCODED_RESEARCHERS, C_RESEARCHERS, JAVA_RESEARCHERS,
|
|
27
|
+
DOTNET_RESEARCHERS, RUST_RESEARCHERS, PROTOCOL_RESEARCHERS,
|
|
28
|
+
RESEARCH_MANAGER_PROMPT,
|
|
29
|
+
)
|
|
30
|
+
from openhack.tools.registry import ToolRegistry
|
|
31
|
+
from openhack.tools.coverage import (
|
|
32
|
+
discover_attack_surface,
|
|
33
|
+
compute_coverage,
|
|
34
|
+
enrich_missed_endpoints,
|
|
35
|
+
build_second_pass_tasks,
|
|
36
|
+
build_researcher_zones,
|
|
37
|
+
)
|
|
38
|
+
from openhack.categories import normalize_category, normalize_severity
|
|
39
|
+
from openhack.framework_detection import detect_frameworks
|
|
40
|
+
from openhack.quality import run_quality_gates
|
|
41
|
+
# Static validator removed — line number correction in hunter, semantic validation by LLM
|
|
42
|
+
from openhack.config import settings
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# _RESEARCHER_TASKS removed — now using HARDCODED_RESEARCHERS + manager-written tasks
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CoordinatorAgent(BaseAgent):
|
|
51
|
+
name = "coordinator"
|
|
52
|
+
description = "Orchestrating security scan"
|
|
53
|
+
|
|
54
|
+
def __init__(self, llm: LLMClient, tools: ToolRegistry, session: Session, resume_from: Optional[str] = None):
|
|
55
|
+
super().__init__(llm, tools, session)
|
|
56
|
+
self.context: dict = {}
|
|
57
|
+
self.checkpoint_mgr = CheckpointManager(session.id) if settings.checkpoint_enabled else None
|
|
58
|
+
self.resume_from = resume_from
|
|
59
|
+
|
|
60
|
+
def get_system_prompt(self, context: dict) -> str:
|
|
61
|
+
detected = context.get("detected_frameworks", [])
|
|
62
|
+
if detected:
|
|
63
|
+
fw_names = [f["framework"] for f in detected]
|
|
64
|
+
framework_context = "an application using " + ", ".join(fw_names)
|
|
65
|
+
else:
|
|
66
|
+
framework_context = "an application"
|
|
67
|
+
return COORDINATOR_PROMPT.format(
|
|
68
|
+
framework_context=framework_context,
|
|
69
|
+
context=str(context),
|
|
70
|
+
task="Coordinate the security scan",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def _create_llm_for_agent(self, agent_type: str) -> LLMClient:
|
|
74
|
+
model_override = getattr(settings, f"{agent_type}_model_id", None)
|
|
75
|
+
model = model_override or self.llm.model
|
|
76
|
+
return LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def _deduplicate_validated(validated, potential_findings):
|
|
80
|
+
if len(validated) <= 1:
|
|
81
|
+
return validated
|
|
82
|
+
|
|
83
|
+
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
|
84
|
+
seen = {}
|
|
85
|
+
for v in validated:
|
|
86
|
+
idx = v.get("original_index")
|
|
87
|
+
if idx is None or idx < 0 or idx >= len(potential_findings):
|
|
88
|
+
continue
|
|
89
|
+
orig = potential_findings[idx]
|
|
90
|
+
file_path = (orig.get("file_path") or "").strip().lower().split(":")[0]
|
|
91
|
+
cat = normalize_category(orig.get("category", "")).lower()
|
|
92
|
+
key = f"{file_path}::{cat}"
|
|
93
|
+
|
|
94
|
+
if key not in seen:
|
|
95
|
+
seen[key] = v
|
|
96
|
+
else:
|
|
97
|
+
existing_idx = seen[key].get("original_index", 0)
|
|
98
|
+
existing_orig = potential_findings[existing_idx] if 0 <= existing_idx < len(potential_findings) else {}
|
|
99
|
+
existing_sev = SEVERITY_ORDER.get((existing_orig.get("severity") or "info").lower(), 4)
|
|
100
|
+
new_sev = SEVERITY_ORDER.get((orig.get("severity") or "info").lower(), 4)
|
|
101
|
+
if (new_sev, -len(orig.get("description") or "")) < (existing_sev, -len(existing_orig.get("description") or "")):
|
|
102
|
+
seen[key] = v
|
|
103
|
+
|
|
104
|
+
return list(seen.values())
|
|
105
|
+
|
|
106
|
+
@staticmethod
|
|
107
|
+
def _cap_findings_per_file(validated, potential_findings, max_per_file=3):
|
|
108
|
+
if len(validated) <= max_per_file:
|
|
109
|
+
return validated
|
|
110
|
+
|
|
111
|
+
SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
|
|
112
|
+
by_file = {}
|
|
113
|
+
for v in validated:
|
|
114
|
+
idx = v.get("original_index")
|
|
115
|
+
if idx is None or idx < 0 or idx >= len(potential_findings):
|
|
116
|
+
continue
|
|
117
|
+
orig = potential_findings[idx]
|
|
118
|
+
file_path = (orig.get("file_path") or "").strip().lower().split(":")[0]
|
|
119
|
+
by_file.setdefault(file_path, []).append(v)
|
|
120
|
+
|
|
121
|
+
result = []
|
|
122
|
+
for file_path, items in by_file.items():
|
|
123
|
+
if len(items) <= max_per_file:
|
|
124
|
+
result.extend(items)
|
|
125
|
+
else:
|
|
126
|
+
items.sort(key=lambda v: SEVERITY_ORDER.get(
|
|
127
|
+
(potential_findings[v["original_index"]].get("severity") or "info").lower(), 4
|
|
128
|
+
))
|
|
129
|
+
result.extend(items[:max_per_file])
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
def _build_checkpoint_data(
|
|
133
|
+
self, total_cost: float, total_tokens: int,
|
|
134
|
+
total_input_tokens: int, total_output_tokens: int,
|
|
135
|
+
potential_findings: Optional[list] = None,
|
|
136
|
+
all_files_analyzed: Optional[list] = None,
|
|
137
|
+
) -> dict:
|
|
138
|
+
"""Build a checkpoint data dict from current state."""
|
|
139
|
+
data = {
|
|
140
|
+
"context": self.context,
|
|
141
|
+
"total_cost": total_cost,
|
|
142
|
+
"total_tokens": total_tokens,
|
|
143
|
+
"total_input_tokens": total_input_tokens,
|
|
144
|
+
"total_output_tokens": total_output_tokens,
|
|
145
|
+
"step_costs": dict(self.session.step_costs),
|
|
146
|
+
"step_tokens": dict(self.session.step_tokens),
|
|
147
|
+
"step_input_tokens": dict(self.session.step_input_tokens),
|
|
148
|
+
"step_output_tokens": dict(self.session.step_output_tokens),
|
|
149
|
+
}
|
|
150
|
+
if potential_findings is not None:
|
|
151
|
+
data["potential_findings"] = potential_findings
|
|
152
|
+
if all_files_analyzed is not None:
|
|
153
|
+
data["all_files_analyzed"] = all_files_analyzed
|
|
154
|
+
return data
|
|
155
|
+
|
|
156
|
+
@staticmethod
|
|
157
|
+
def _parse_json_array(raw: Optional[str], label: str = "response") -> list:
|
|
158
|
+
"""Extract a JSON array from an LLM response, handling common failures."""
|
|
159
|
+
content = raw or ""
|
|
160
|
+
if "```json" in content:
|
|
161
|
+
content = content.split("```json", 1)[1].split("```", 1)[0]
|
|
162
|
+
elif "```" in content:
|
|
163
|
+
content = content.split("```", 1)[1].split("```", 1)[0]
|
|
164
|
+
content = content.strip()
|
|
165
|
+
|
|
166
|
+
# Direct parse
|
|
167
|
+
try:
|
|
168
|
+
result = json.loads(content)
|
|
169
|
+
if isinstance(result, list):
|
|
170
|
+
return result
|
|
171
|
+
except (json.JSONDecodeError, ValueError):
|
|
172
|
+
pass
|
|
173
|
+
|
|
174
|
+
# Fix common issues: unescaped newlines, trailing commas
|
|
175
|
+
fixed = re.sub(r'(?<!\\)\n', ' ', content)
|
|
176
|
+
fixed = re.sub(r',\s*([}\]])', r'\1', fixed)
|
|
177
|
+
try:
|
|
178
|
+
result = json.loads(fixed)
|
|
179
|
+
if isinstance(result, list):
|
|
180
|
+
return result
|
|
181
|
+
except (json.JSONDecodeError, ValueError):
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
# Model returned reasoning text with JSON embedded — find the array
|
|
185
|
+
bracket_pos = content.find("[")
|
|
186
|
+
if bracket_pos > 0:
|
|
187
|
+
candidate = content[bracket_pos:]
|
|
188
|
+
depth = 0
|
|
189
|
+
for i, ch in enumerate(candidate):
|
|
190
|
+
if ch == "[":
|
|
191
|
+
depth += 1
|
|
192
|
+
elif ch == "]":
|
|
193
|
+
depth -= 1
|
|
194
|
+
if depth == 0:
|
|
195
|
+
try:
|
|
196
|
+
result = json.loads(candidate[: i + 1])
|
|
197
|
+
if isinstance(result, list):
|
|
198
|
+
return result
|
|
199
|
+
except (json.JSONDecodeError, ValueError):
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
logger.warning(f"Failed to parse {label} JSON: {content[:200]}")
|
|
203
|
+
return []
|
|
204
|
+
|
|
205
|
+
async def _extract_high_risk_features(self, recon_summary: str, attack_surface: Optional[dict] = None) -> list[dict]:
|
|
206
|
+
"""Extract high-risk features from recon output via a single LLM call."""
|
|
207
|
+
attack_surface_str = ""
|
|
208
|
+
if attack_surface:
|
|
209
|
+
# Summarize key attack surface info for the extraction prompt
|
|
210
|
+
parts = []
|
|
211
|
+
for key in ("route_handlers", "api_routes", "danger_files"):
|
|
212
|
+
entries = attack_surface.get(key, [])
|
|
213
|
+
if entries:
|
|
214
|
+
files = [e.get("file", "") for e in entries[:20]]
|
|
215
|
+
parts.append(f"{key}: {', '.join(files)}")
|
|
216
|
+
attack_surface_str = "\n".join(parts) if parts else "No attack surface data available."
|
|
217
|
+
else:
|
|
218
|
+
attack_surface_str = "No attack surface data available."
|
|
219
|
+
|
|
220
|
+
# Extract just the high-risk areas and key sections to keep the prompt focused.
|
|
221
|
+
# Full recon summaries can be 10k+ chars which causes some models to return
|
|
222
|
+
# empty or truncated JSON responses.
|
|
223
|
+
condensed = recon_summary
|
|
224
|
+
if len(recon_summary) > 2000:
|
|
225
|
+
sections = []
|
|
226
|
+
for header in ["## High-Risk Areas", "## Application Overview",
|
|
227
|
+
"## Attacker Model Context"]:
|
|
228
|
+
if header in recon_summary:
|
|
229
|
+
start = recon_summary.index(header)
|
|
230
|
+
next_header = recon_summary.find("\n## ", start + len(header))
|
|
231
|
+
end = next_header if next_header != -1 else min(start + 1000, len(recon_summary))
|
|
232
|
+
sections.append(recon_summary[start:end].strip())
|
|
233
|
+
condensed = "\n\n".join(sections) if sections else recon_summary[:2000]
|
|
234
|
+
# Hard cap
|
|
235
|
+
if len(condensed) > 3000:
|
|
236
|
+
condensed = condensed[:3000]
|
|
237
|
+
|
|
238
|
+
prompt = FEATURE_EXTRACTION_PROMPT.format(
|
|
239
|
+
recon_summary=condensed,
|
|
240
|
+
attack_surface=attack_surface_str,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
llm = LLMClient(
|
|
244
|
+
model=settings.hunter_model_id or self.llm.model,
|
|
245
|
+
temperature=0.0,
|
|
246
|
+
max_tokens=4096,
|
|
247
|
+
provider=self.llm.provider,
|
|
248
|
+
prompt_cache_key=self.llm.prompt_cache_key,
|
|
249
|
+
)
|
|
250
|
+
full_prompt = (
|
|
251
|
+
"You are a security analyst. Extract 3-5 high-risk features from the recon summary below.\n"
|
|
252
|
+
"Return ONLY a valid JSON array. No markdown, no explanation, no code fences.\n"
|
|
253
|
+
"Keep descriptions SHORT (under 20 words each). Keep risk_reason SHORT (under 20 words).\n"
|
|
254
|
+
"entry_files should list 2-3 likely file paths.\n\n"
|
|
255
|
+
+ prompt
|
|
256
|
+
)
|
|
257
|
+
response = await llm.chat(
|
|
258
|
+
messages=[Message(role="user", content=full_prompt)],
|
|
259
|
+
tools=[],
|
|
260
|
+
system=(
|
|
261
|
+
"You are a JSON-only responder. Output ONLY a raw JSON array, nothing else. "
|
|
262
|
+
"Do NOT include any reasoning, thinking, preamble, or explanation. "
|
|
263
|
+
"The very first character of your response must be [."
|
|
264
|
+
),
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
features = self._parse_json_array(response.content, "feature extraction")
|
|
268
|
+
|
|
269
|
+
# Cap to configured max
|
|
270
|
+
features = features[:settings.max_feature_hunters]
|
|
271
|
+
|
|
272
|
+
# Track the extraction cost
|
|
273
|
+
self.session.total_cost += response.cost
|
|
274
|
+
if response.usage:
|
|
275
|
+
self.session.total_tokens += response.usage.get("total_tokens", 0)
|
|
276
|
+
|
|
277
|
+
features = [f if isinstance(f, dict) else {"name": str(f), "description": str(f)} for f in features]
|
|
278
|
+
logger.info(f"Extracted {len(features)} high-risk features: {[f.get('name', '?') for f in features]}")
|
|
279
|
+
return features
|
|
280
|
+
|
|
281
|
+
async def _write_app_specific_researchers(self, recon_summary: str) -> list[dict]:
|
|
282
|
+
"""Manager agent: reads recon and writes app-specific researcher tasks."""
|
|
283
|
+
condensed = recon_summary
|
|
284
|
+
if len(recon_summary) > 3000:
|
|
285
|
+
sections = []
|
|
286
|
+
for header in ["## High-Risk Areas", "## Application Overview",
|
|
287
|
+
"## Attacker Model Context"]:
|
|
288
|
+
if header in recon_summary:
|
|
289
|
+
start = recon_summary.index(header)
|
|
290
|
+
next_header = recon_summary.find("\n## ", start + len(header))
|
|
291
|
+
end = next_header if next_header != -1 else min(start + 1000, len(recon_summary))
|
|
292
|
+
sections.append(recon_summary[start:end].strip())
|
|
293
|
+
condensed = "\n\n".join(sections) if sections else recon_summary[:3000]
|
|
294
|
+
|
|
295
|
+
prompt = RESEARCH_MANAGER_PROMPT.format(recon_summary=condensed)
|
|
296
|
+
|
|
297
|
+
llm = LLMClient(
|
|
298
|
+
model=settings.hunter_model_id or self.llm.model,
|
|
299
|
+
temperature=0.0, max_tokens=4096, provider=self.llm.provider,
|
|
300
|
+
prompt_cache_key=self.llm.prompt_cache_key,
|
|
301
|
+
)
|
|
302
|
+
full_prompt = (
|
|
303
|
+
"You are a security research manager. Write 2-3 app-specific researcher tasks. "
|
|
304
|
+
"Return ONLY a valid JSON array. No markdown, no code fences.\n\n" + prompt
|
|
305
|
+
)
|
|
306
|
+
response = await llm.chat(
|
|
307
|
+
messages=[Message(role="user", content=full_prompt)],
|
|
308
|
+
tools=[],
|
|
309
|
+
system=(
|
|
310
|
+
"You are a JSON-only responder. Output ONLY a raw JSON array, nothing else. "
|
|
311
|
+
"Do NOT include any reasoning, thinking, preamble, or explanation. "
|
|
312
|
+
"The very first character of your response must be [."
|
|
313
|
+
),
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
tasks = self._parse_json_array(response.content, "manager")
|
|
317
|
+
|
|
318
|
+
self.session.total_cost += response.cost
|
|
319
|
+
if response.usage:
|
|
320
|
+
self.session.total_tokens += response.usage.get("total_tokens", 0)
|
|
321
|
+
|
|
322
|
+
logger.info(f"Manager wrote {len(tasks)} app-specific researchers: {[t.get('name', '?') for t in tasks]}")
|
|
323
|
+
return tasks
|
|
324
|
+
|
|
325
|
+
async def _run_feature_deep_dive(self, features: list[dict], context: dict) -> dict: # noqa: C901
|
|
326
|
+
"""Spawn feature hunters concurrently and collect their findings.
|
|
327
|
+
|
|
328
|
+
If features is non-empty, spawns one hunter per feature (legacy mode).
|
|
329
|
+
If features is empty, spawns researcher agents that pick their own targets.
|
|
330
|
+
|
|
331
|
+
When zone-scoped mode is active, returns a 'zone_results' list mapping
|
|
332
|
+
each zone to its researcher's findings and analyzed files — used by
|
|
333
|
+
callers (headless_scan) to update ScanSession zone coverage.
|
|
334
|
+
"""
|
|
335
|
+
semaphore = asyncio.Semaphore(settings.max_concurrent_feature_hunters)
|
|
336
|
+
total_cost = 0.0
|
|
337
|
+
total_tokens = 0
|
|
338
|
+
total_input_tokens = 0
|
|
339
|
+
total_output_tokens = 0
|
|
340
|
+
zone_map: dict[int, dict] = {} # hunter_id -> zone metadata
|
|
341
|
+
|
|
342
|
+
async def run_hunter(feature: dict = None, hunter_id: int = 0):
|
|
343
|
+
async with semaphore:
|
|
344
|
+
model = settings.feature_hunter_model_id or settings.hunter_model_id or self.llm.model
|
|
345
|
+
llm = LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
|
|
346
|
+
hunter = FeatureHunterAgent(llm, self.tools, self.session, feature=feature, hunter_id=hunter_id)
|
|
347
|
+
name = hunter.name
|
|
348
|
+
try:
|
|
349
|
+
if feature:
|
|
350
|
+
task_text = (
|
|
351
|
+
f"Deep security audit of the {feature['name']} feature. "
|
|
352
|
+
f"Description: {feature.get('description', '')}. "
|
|
353
|
+
f"Risk: {feature.get('risk_reason', '')}."
|
|
354
|
+
)
|
|
355
|
+
else:
|
|
356
|
+
task_text = researcher_tasks.get(hunter_id, list(researcher_tasks.values())[0])
|
|
357
|
+
result = await hunter.run(task_text, context=context)
|
|
358
|
+
return name, result, llm, hunter_id
|
|
359
|
+
except Exception as e:
|
|
360
|
+
logger.error(f"Feature hunter {name} failed: {e}")
|
|
361
|
+
return name, {"findings": [], "files_analyzed": []}, llm, hunter_id
|
|
362
|
+
|
|
363
|
+
if features:
|
|
364
|
+
tasks = [asyncio.create_task(run_hunter(feature=f)) for f in features]
|
|
365
|
+
else:
|
|
366
|
+
researcher_tasks: dict[int, str] = {}
|
|
367
|
+
idx = 0
|
|
368
|
+
|
|
369
|
+
# Try zone-scoped mode for large repos
|
|
370
|
+
attack_surface = context.get("attack_surface") or self.context.get("attack_surface")
|
|
371
|
+
zones = []
|
|
372
|
+
if attack_surface:
|
|
373
|
+
zones = build_researcher_zones(attack_surface, num_zones=settings.max_feature_hunters)
|
|
374
|
+
|
|
375
|
+
if zones:
|
|
376
|
+
# Zone-scoped mode: each researcher gets a dedicated file zone
|
|
377
|
+
total_zone_files = sum(z["file_count"] for z in zones)
|
|
378
|
+
logger.info(f"Zone-scoped researchers: {len(zones)} zones, {total_zone_files} files")
|
|
379
|
+
self.session.add_trace(
|
|
380
|
+
agent="coordinator", event_type="status",
|
|
381
|
+
content=f"Zone-scoped mode: {len(zones)} zones covering {total_zone_files} files",
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
for zone in zones:
|
|
385
|
+
task_text = (
|
|
386
|
+
zone["scope_text"] + "\n\n---\n\n"
|
|
387
|
+
"Hunt for ALL vulnerability types in these files:\n"
|
|
388
|
+
"- Injection (SQL, command, template/SSTI, LDAP)\n"
|
|
389
|
+
"- XSS (stored, reflected, DOM, dangerouslySetInnerHTML, |safe)\n"
|
|
390
|
+
"- SSRF (user-controlled outbound requests, webhooks, URL fetching)\n"
|
|
391
|
+
"- Auth/Authz bypass, IDOR (missing ownership checks), privilege escalation\n"
|
|
392
|
+
"- Path traversal and file inclusion\n"
|
|
393
|
+
"- Data exposure, hardcoded secrets, verbose errors\n"
|
|
394
|
+
"- Business logic flaws, race conditions, non-atomic operations\n"
|
|
395
|
+
"- Framework-specific: ORM escape hatches, unsafe deserialization, mass assignment\n\n"
|
|
396
|
+
"For each file: read it fully, check authorization, trace user input to sinks, "
|
|
397
|
+
"follow imports to understand validation logic, and report confirmed vulnerabilities."
|
|
398
|
+
)
|
|
399
|
+
researcher_tasks[idx] = task_text
|
|
400
|
+
zone_map[idx] = {"name": zone["name"], "file_paths": zone.get("file_paths", set())}
|
|
401
|
+
idx += 1
|
|
402
|
+
|
|
403
|
+
# Fill remaining slots with manager-written app-specific researchers
|
|
404
|
+
recon_summary = context.get("recon", {}).get("summary", "")
|
|
405
|
+
if recon_summary and idx < settings.max_feature_hunters:
|
|
406
|
+
try:
|
|
407
|
+
app_specific = await self._write_app_specific_researchers(recon_summary)
|
|
408
|
+
for task_def in app_specific:
|
|
409
|
+
if isinstance(task_def, dict) and "task" in task_def and idx < settings.max_feature_hunters:
|
|
410
|
+
researcher_tasks[idx] = task_def["task"]
|
|
411
|
+
logger.info(f"Manager-written researcher {idx}: {task_def.get('name', '?')}")
|
|
412
|
+
idx += 1
|
|
413
|
+
except Exception as e:
|
|
414
|
+
logger.warning(f"Manager agent failed: {e}")
|
|
415
|
+
else:
|
|
416
|
+
# Small repo mode: specialization-based researchers
|
|
417
|
+
detected_frameworks = context.get("detected_frameworks", context.get("recon", {}).get("frameworks", []))
|
|
418
|
+
framework_names = set(
|
|
419
|
+
f.get("framework", "")
|
|
420
|
+
for f in (detected_frameworks if isinstance(detected_frameworks, list) else [])
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
if framework_names & {"c", "cpp"}:
|
|
424
|
+
base_researchers = C_RESEARCHERS
|
|
425
|
+
elif framework_names & {"java", "spring", "springboot"}:
|
|
426
|
+
base_researchers = JAVA_RESEARCHERS
|
|
427
|
+
elif framework_names & {"dotnet", "csharp", "aspnet"}:
|
|
428
|
+
base_researchers = DOTNET_RESEARCHERS
|
|
429
|
+
elif framework_names & {"rust"}:
|
|
430
|
+
base_researchers = RUST_RESEARCHERS
|
|
431
|
+
else:
|
|
432
|
+
base_researchers = HARDCODED_RESEARCHERS
|
|
433
|
+
|
|
434
|
+
for name, task_text in base_researchers.items():
|
|
435
|
+
researcher_tasks[idx] = task_text
|
|
436
|
+
idx += 1
|
|
437
|
+
|
|
438
|
+
recon_features = context.get("recon", {}).get("features", {})
|
|
439
|
+
if isinstance(recon_features, dict):
|
|
440
|
+
feature_keys = set(recon_features.keys())
|
|
441
|
+
if "websocket" in feature_keys or any("websocket" in str(v).lower() for v in recon_features.values()):
|
|
442
|
+
researcher_tasks[idx] = PROTOCOL_RESEARCHERS.get("websocket", "")
|
|
443
|
+
idx += 1
|
|
444
|
+
if "grpc" in feature_keys or any("grpc" in str(v).lower() for v in recon_features.values()):
|
|
445
|
+
researcher_tasks[idx] = PROTOCOL_RESEARCHERS.get("grpc", "")
|
|
446
|
+
idx += 1
|
|
447
|
+
|
|
448
|
+
recon_summary = context.get("recon", {}).get("summary", "")
|
|
449
|
+
if recon_summary:
|
|
450
|
+
try:
|
|
451
|
+
app_specific = await self._write_app_specific_researchers(recon_summary)
|
|
452
|
+
for task_def in app_specific:
|
|
453
|
+
if isinstance(task_def, dict) and "task" in task_def:
|
|
454
|
+
researcher_tasks[idx] = task_def["task"]
|
|
455
|
+
logger.info(f"Manager-written researcher {idx}: {task_def.get('name', '?')}")
|
|
456
|
+
idx += 1
|
|
457
|
+
except Exception as e:
|
|
458
|
+
logger.warning(f"Manager agent failed: {e}")
|
|
459
|
+
|
|
460
|
+
num_researchers = min(len(researcher_tasks), settings.max_feature_hunters)
|
|
461
|
+
tasks = [asyncio.create_task(run_hunter(hunter_id=i)) for i in range(num_researchers)]
|
|
462
|
+
|
|
463
|
+
try:
|
|
464
|
+
results = await asyncio.gather(*tasks)
|
|
465
|
+
except asyncio.CancelledError:
|
|
466
|
+
for t in tasks:
|
|
467
|
+
t.cancel()
|
|
468
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
469
|
+
raise
|
|
470
|
+
|
|
471
|
+
all_findings = []
|
|
472
|
+
all_files = set()
|
|
473
|
+
zone_results = []
|
|
474
|
+
for name, result, llm_client, hunter_id in results:
|
|
475
|
+
findings = result.get("findings", [])
|
|
476
|
+
files_analyzed = result.get("files_analyzed", [])
|
|
477
|
+
all_findings.extend(findings)
|
|
478
|
+
all_files.update(files_analyzed)
|
|
479
|
+
total_cost += llm_client.total_cost
|
|
480
|
+
total_tokens += llm_client.total_tokens
|
|
481
|
+
total_input_tokens += llm_client.total_input_tokens
|
|
482
|
+
total_output_tokens += llm_client.total_output_tokens
|
|
483
|
+
logger.info(f"Feature hunter {name}: {len(findings)} findings")
|
|
484
|
+
|
|
485
|
+
if hunter_id in zone_map:
|
|
486
|
+
zone_results.append({
|
|
487
|
+
"zone_name": zone_map[hunter_id]["name"],
|
|
488
|
+
"zone_file_paths": list(zone_map[hunter_id].get("file_paths", [])),
|
|
489
|
+
"files_analyzed": files_analyzed,
|
|
490
|
+
"findings_count": len(findings),
|
|
491
|
+
})
|
|
492
|
+
|
|
493
|
+
# Deduplicate
|
|
494
|
+
all_findings = HunterSwarmAgent._deduplicate_findings(all_findings)
|
|
495
|
+
|
|
496
|
+
result_dict = {
|
|
497
|
+
"findings": all_findings,
|
|
498
|
+
"files_analyzed": sorted(all_files),
|
|
499
|
+
"total_cost": total_cost,
|
|
500
|
+
"total_tokens": total_tokens,
|
|
501
|
+
"total_input_tokens": total_input_tokens,
|
|
502
|
+
"total_output_tokens": total_output_tokens,
|
|
503
|
+
}
|
|
504
|
+
if zone_results:
|
|
505
|
+
result_dict["zone_results"] = zone_results
|
|
506
|
+
return result_dict
|
|
507
|
+
|
|
508
|
+
async def run_full_scan(self) -> dict:
|
|
509
|
+
self.session.status = SessionStatus.RUNNING
|
|
510
|
+
|
|
511
|
+
total_cost = 0.0
|
|
512
|
+
total_tokens = 0
|
|
513
|
+
total_input_tokens = 0
|
|
514
|
+
total_output_tokens = 0
|
|
515
|
+
potential_findings: list = []
|
|
516
|
+
all_files_analyzed: list = []
|
|
517
|
+
|
|
518
|
+
# ── Resume from checkpoint ──────────────────────────────────────
|
|
519
|
+
skip_to: Optional[str] = None
|
|
520
|
+
if self.resume_from and self.checkpoint_mgr:
|
|
521
|
+
checkpoint = self.checkpoint_mgr.load(self.resume_from)
|
|
522
|
+
if checkpoint:
|
|
523
|
+
data = checkpoint["data"]
|
|
524
|
+
self.context = data.get("context", {})
|
|
525
|
+
self.session.context = dict(self.context)
|
|
526
|
+
self.session.restore_from_checkpoint(data)
|
|
527
|
+
total_cost = data.get("total_cost", 0.0)
|
|
528
|
+
total_tokens = data.get("total_tokens", 0)
|
|
529
|
+
total_input_tokens = data.get("total_input_tokens", 0)
|
|
530
|
+
total_output_tokens = data.get("total_output_tokens", 0)
|
|
531
|
+
|
|
532
|
+
if self.resume_from == "recon":
|
|
533
|
+
skip_to = "hunter"
|
|
534
|
+
elif self.resume_from == "hunter":
|
|
535
|
+
skip_to = "validator"
|
|
536
|
+
potential_findings = data.get("potential_findings", [])
|
|
537
|
+
all_files_analyzed = data.get("all_files_analyzed", [])
|
|
538
|
+
elif self.resume_from == "feature_hunt":
|
|
539
|
+
skip_to = "validator"
|
|
540
|
+
potential_findings = data.get("potential_findings", [])
|
|
541
|
+
all_files_analyzed = data.get("all_files_analyzed", [])
|
|
542
|
+
|
|
543
|
+
logger.info(f"Resuming from checkpoint '{self.resume_from}', skipping to: {skip_to}")
|
|
544
|
+
self.session.add_trace(
|
|
545
|
+
agent="coordinator", event_type="resume",
|
|
546
|
+
content={"from_checkpoint": self.resume_from, "skip_to": skip_to},
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
# Project context: always use the current session's value (survives resume)
|
|
550
|
+
if self.session.project_context:
|
|
551
|
+
self.context["project_context"] = self.session.project_context
|
|
552
|
+
|
|
553
|
+
# Framework detection (deterministic) — skip if restored from checkpoint
|
|
554
|
+
if "detected_frameworks" not in self.context:
|
|
555
|
+
detected_frameworks = detect_frameworks(self.tools.fs_tools)
|
|
556
|
+
self.context["detected_frameworks"] = detected_frameworks
|
|
557
|
+
self.session.context["detected_frameworks"] = detected_frameworks
|
|
558
|
+
logger.info(f"Detected frameworks: {[f['framework'] for f in detected_frameworks]}")
|
|
559
|
+
|
|
560
|
+
try:
|
|
561
|
+
# Attack surface discovery (deterministic)
|
|
562
|
+
attack_surface = self.context.get("attack_surface")
|
|
563
|
+
if attack_surface is None:
|
|
564
|
+
try:
|
|
565
|
+
attack_surface = discover_attack_surface(self.tools.fs_tools, nextjs_tools=self.tools.nextjs_tools)
|
|
566
|
+
self.context["attack_surface"] = attack_surface
|
|
567
|
+
logger.info(f"Attack surface: {attack_surface['total_endpoints']} endpoints")
|
|
568
|
+
except Exception as e:
|
|
569
|
+
logger.warning(f"Attack surface discovery failed: {e}")
|
|
570
|
+
|
|
571
|
+
# Step 1: Reconnaissance
|
|
572
|
+
if skip_to is None:
|
|
573
|
+
self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 1: Reconnaissance")
|
|
574
|
+
recon_llm = self._create_llm_for_agent("recon")
|
|
575
|
+
recon_agent = ReconAgent(recon_llm, self.tools, self.session)
|
|
576
|
+
recon_result = await recon_agent.run(
|
|
577
|
+
"Perform reconnaissance on this application. Map out the structure, "
|
|
578
|
+
"identify authentication mechanisms, API surface, and high-risk areas.",
|
|
579
|
+
context=self.context,
|
|
580
|
+
)
|
|
581
|
+
self.context["recon"] = recon_result
|
|
582
|
+
self.session.context["recon"] = recon_result
|
|
583
|
+
|
|
584
|
+
recon_cost = recon_llm.total_cost
|
|
585
|
+
recon_tokens = recon_llm.total_tokens
|
|
586
|
+
self.session.record_step_cost("recon", recon_cost, recon_tokens,
|
|
587
|
+
input_tokens=recon_llm.total_input_tokens, output_tokens=recon_llm.total_output_tokens)
|
|
588
|
+
total_cost += recon_cost
|
|
589
|
+
total_tokens += recon_tokens
|
|
590
|
+
total_input_tokens += recon_llm.total_input_tokens
|
|
591
|
+
total_output_tokens += recon_llm.total_output_tokens
|
|
592
|
+
self.session.total_cost = total_cost
|
|
593
|
+
self.session.total_tokens = total_tokens
|
|
594
|
+
self.session.add_trace(agent="coordinator", event_type="step_complete",
|
|
595
|
+
content={"step": "recon", "cost": recon_cost, "tokens": recon_tokens,
|
|
596
|
+
"input_tokens": recon_llm.total_input_tokens, "output_tokens": recon_llm.total_output_tokens})
|
|
597
|
+
|
|
598
|
+
# Checkpoint: recon complete
|
|
599
|
+
if self.checkpoint_mgr:
|
|
600
|
+
self.checkpoint_mgr.save("recon", self._build_checkpoint_data(
|
|
601
|
+
total_cost, total_tokens, total_input_tokens, total_output_tokens))
|
|
602
|
+
|
|
603
|
+
# Step 2: Hunting (swarm)
|
|
604
|
+
if skip_to in (None, "hunter"):
|
|
605
|
+
self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 2: Hunting (swarm)")
|
|
606
|
+
hunter_llm = self._create_llm_for_agent("hunter")
|
|
607
|
+
hunter_swarm = HunterSwarmAgent(hunter_llm, self.tools, self.session)
|
|
608
|
+
hunter_result = await hunter_swarm.run(
|
|
609
|
+
"Hunt for security vulnerabilities in this application.", context=self.context)
|
|
610
|
+
self.context["hunter"] = hunter_result
|
|
611
|
+
self.session.context["hunter"] = hunter_result
|
|
612
|
+
|
|
613
|
+
hunter_cost = hunter_swarm.total_cost
|
|
614
|
+
hunter_tokens = hunter_swarm.total_tokens
|
|
615
|
+
self.session.record_step_cost("hunter", hunter_cost, hunter_tokens,
|
|
616
|
+
input_tokens=hunter_swarm.total_input_tokens, output_tokens=hunter_swarm.total_output_tokens)
|
|
617
|
+
total_cost += hunter_cost
|
|
618
|
+
total_tokens += hunter_tokens
|
|
619
|
+
total_input_tokens += hunter_swarm.total_input_tokens
|
|
620
|
+
total_output_tokens += hunter_swarm.total_output_tokens
|
|
621
|
+
self.session.total_cost = total_cost
|
|
622
|
+
self.session.total_tokens = total_tokens
|
|
623
|
+
|
|
624
|
+
potential_findings = hunter_result.get("findings", [])
|
|
625
|
+
all_files_analyzed = list(hunter_result.get("files_analyzed", []))
|
|
626
|
+
self.session.add_trace(agent="coordinator", event_type="step_complete",
|
|
627
|
+
content={"step": "hunter_swarm", "cost": hunter_cost, "tokens": hunter_tokens,
|
|
628
|
+
"potential_findings": len(potential_findings)})
|
|
629
|
+
|
|
630
|
+
# Step 2.5: Coverage-guided second pass
|
|
631
|
+
if attack_surface:
|
|
632
|
+
pass1_coverage = compute_coverage(attack_surface, all_files_analyzed)
|
|
633
|
+
missed_endpoints = pass1_coverage.get("missed", [])
|
|
634
|
+
|
|
635
|
+
if missed_endpoints:
|
|
636
|
+
self.session.add_trace(agent="coordinator", event_type="step_start",
|
|
637
|
+
content=f"Step 2.5: Coverage second pass ({len(missed_endpoints)} missed)")
|
|
638
|
+
|
|
639
|
+
enriched = enrich_missed_endpoints(missed_endpoints, self.tools.fs_tools)
|
|
640
|
+
second_pass_tasks = build_second_pass_tasks(enriched)
|
|
641
|
+
|
|
642
|
+
pass2_findings = []
|
|
643
|
+
pass2_files = set()
|
|
644
|
+
pass2_cost = 0.0
|
|
645
|
+
pass2_tokens = 0
|
|
646
|
+
pass2_input = 0
|
|
647
|
+
pass2_output = 0
|
|
648
|
+
|
|
649
|
+
sem = asyncio.Semaphore(settings.max_concurrent_hunters)
|
|
650
|
+
|
|
651
|
+
async def run_pass2(task_text, batch_idx):
|
|
652
|
+
async with sem:
|
|
653
|
+
llm = self._create_llm_for_agent("hunter")
|
|
654
|
+
hunter = HunterAgent(llm, self.tools, self.session,
|
|
655
|
+
vuln_categories=["xss", "injection", "ssrf", "open_redirect", "idor", "auth_bypass"],
|
|
656
|
+
group_name=f"second_pass_{batch_idx}")
|
|
657
|
+
try:
|
|
658
|
+
result = await hunter.run(task_text, self.context)
|
|
659
|
+
return result, hunter.llm
|
|
660
|
+
except Exception as e:
|
|
661
|
+
logger.error(f"Second pass hunter {batch_idx} failed: {e}")
|
|
662
|
+
return {"findings": [], "files_analyzed": []}, hunter.llm
|
|
663
|
+
|
|
664
|
+
pass2_tasks = [
|
|
665
|
+
asyncio.create_task(run_pass2(t, i))
|
|
666
|
+
for i, t in enumerate(second_pass_tasks)
|
|
667
|
+
]
|
|
668
|
+
try:
|
|
669
|
+
pass2_results = await asyncio.gather(*pass2_tasks)
|
|
670
|
+
except asyncio.CancelledError:
|
|
671
|
+
for t in pass2_tasks:
|
|
672
|
+
t.cancel()
|
|
673
|
+
await asyncio.gather(*pass2_tasks, return_exceptions=True)
|
|
674
|
+
raise
|
|
675
|
+
|
|
676
|
+
for result, llm_client in pass2_results:
|
|
677
|
+
pass2_findings.extend(result.get("findings", []))
|
|
678
|
+
pass2_files.update(result.get("files_analyzed", []))
|
|
679
|
+
pass2_cost += llm_client.total_cost
|
|
680
|
+
pass2_tokens += llm_client.total_tokens
|
|
681
|
+
pass2_input += llm_client.total_input_tokens
|
|
682
|
+
pass2_output += llm_client.total_output_tokens
|
|
683
|
+
|
|
684
|
+
self.session.record_step_cost("hunter_second_pass", pass2_cost, pass2_tokens,
|
|
685
|
+
input_tokens=pass2_input, output_tokens=pass2_output)
|
|
686
|
+
total_cost += pass2_cost
|
|
687
|
+
total_tokens += pass2_tokens
|
|
688
|
+
self.session.total_cost = total_cost
|
|
689
|
+
self.session.total_tokens = total_tokens
|
|
690
|
+
|
|
691
|
+
potential_findings.extend(pass2_findings)
|
|
692
|
+
all_files_analyzed = sorted(set(all_files_analyzed) | pass2_files)
|
|
693
|
+
hunter_result["findings"] = potential_findings
|
|
694
|
+
hunter_result["files_analyzed"] = all_files_analyzed
|
|
695
|
+
self.context["hunter"] = hunter_result
|
|
696
|
+
|
|
697
|
+
self.session.add_trace(agent="coordinator", event_type="step_complete",
|
|
698
|
+
content={"step": "hunter_second_pass", "cost": pass2_cost, "tokens": pass2_tokens,
|
|
699
|
+
"new_findings": len(pass2_findings), "total_findings": len(potential_findings)})
|
|
700
|
+
|
|
701
|
+
# Checkpoint: hunter complete (includes second pass)
|
|
702
|
+
if self.checkpoint_mgr:
|
|
703
|
+
self.checkpoint_mgr.save("hunter", self._build_checkpoint_data(
|
|
704
|
+
total_cost, total_tokens, total_input_tokens, total_output_tokens,
|
|
705
|
+
potential_findings=potential_findings, all_files_analyzed=all_files_analyzed))
|
|
706
|
+
|
|
707
|
+
# Step 2.25: Feature Deep Dive
|
|
708
|
+
if settings.feature_hunt_enabled and skip_to in (None, "hunter"):
|
|
709
|
+
recon_summary = self.context.get("recon", {}).get("summary", "")
|
|
710
|
+
if recon_summary:
|
|
711
|
+
self.session.add_trace(
|
|
712
|
+
agent="coordinator", event_type="step_start",
|
|
713
|
+
content="Step 2.25: Feature deep dive — extracting high-risk features",
|
|
714
|
+
)
|
|
715
|
+
|
|
716
|
+
features = await self._extract_high_risk_features(
|
|
717
|
+
recon_summary, attack_surface,
|
|
718
|
+
)
|
|
719
|
+
|
|
720
|
+
if features:
|
|
721
|
+
self.session.add_trace(
|
|
722
|
+
agent="coordinator", event_type="status",
|
|
723
|
+
content=f"Feature deep dive: {len(features)} features — "
|
|
724
|
+
+ ", ".join(f.get("name", "?") for f in features),
|
|
725
|
+
)
|
|
726
|
+
else:
|
|
727
|
+
# Researcher mode: agents pick their own targets
|
|
728
|
+
logger.info("No features extracted — spawning researcher agents")
|
|
729
|
+
self.session.add_trace(
|
|
730
|
+
agent="coordinator", event_type="status",
|
|
731
|
+
content="Feature deep dive: researcher mode — agents pick their own targets",
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
feature_result = await self._run_feature_deep_dive(features, self.context)
|
|
735
|
+
|
|
736
|
+
feature_findings = feature_result.get("findings", [])
|
|
737
|
+
feature_cost = feature_result["total_cost"]
|
|
738
|
+
feature_tokens = feature_result["total_tokens"]
|
|
739
|
+
|
|
740
|
+
self.session.record_step_cost(
|
|
741
|
+
"feature_hunt", feature_cost, feature_tokens,
|
|
742
|
+
input_tokens=feature_result["total_input_tokens"],
|
|
743
|
+
output_tokens=feature_result["total_output_tokens"],
|
|
744
|
+
)
|
|
745
|
+
total_cost += feature_cost
|
|
746
|
+
total_tokens += feature_tokens
|
|
747
|
+
total_input_tokens += feature_result["total_input_tokens"]
|
|
748
|
+
total_output_tokens += feature_result["total_output_tokens"]
|
|
749
|
+
self.session.total_cost = total_cost
|
|
750
|
+
self.session.total_tokens = total_tokens
|
|
751
|
+
|
|
752
|
+
# Merge and deduplicate with category hunter findings
|
|
753
|
+
potential_findings.extend(feature_findings)
|
|
754
|
+
potential_findings = HunterSwarmAgent._deduplicate_findings(potential_findings)
|
|
755
|
+
all_files_analyzed = sorted(
|
|
756
|
+
set(all_files_analyzed) | set(feature_result.get("files_analyzed", []))
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
if "hunter" not in self.context:
|
|
760
|
+
self.context["hunter"] = {}
|
|
761
|
+
self.context["hunter"]["findings"] = potential_findings
|
|
762
|
+
self.context["hunter"]["files_analyzed"] = all_files_analyzed
|
|
763
|
+
|
|
764
|
+
self.session.add_trace(
|
|
765
|
+
agent="coordinator", event_type="step_complete",
|
|
766
|
+
content={
|
|
767
|
+
"step": "feature_hunt",
|
|
768
|
+
"features_analyzed": len(features),
|
|
769
|
+
"new_findings": len(feature_findings),
|
|
770
|
+
"total_findings": len(potential_findings),
|
|
771
|
+
"cost": feature_cost,
|
|
772
|
+
"tokens": feature_tokens,
|
|
773
|
+
},
|
|
774
|
+
)
|
|
775
|
+
|
|
776
|
+
# Checkpoint: feature hunt complete
|
|
777
|
+
if self.checkpoint_mgr:
|
|
778
|
+
self.checkpoint_mgr.save("feature_hunt", self._build_checkpoint_data(
|
|
779
|
+
total_cost, total_tokens, total_input_tokens, total_output_tokens,
|
|
780
|
+
potential_findings=potential_findings, all_files_analyzed=all_files_analyzed))
|
|
781
|
+
|
|
782
|
+
# Pass findings directly to LLM validation (static validator removed —
|
|
783
|
+
# line number correction now happens in _handle_report_finding,
|
|
784
|
+
# and all semantic judgment is left to the LLM validator)
|
|
785
|
+
if "hunter" not in self.context:
|
|
786
|
+
self.context["hunter"] = {}
|
|
787
|
+
self.context["hunter"]["findings"] = potential_findings
|
|
788
|
+
|
|
789
|
+
# Step 3: Validation (swarm)
|
|
790
|
+
if potential_findings:
|
|
791
|
+
self.session.add_trace(agent="coordinator", event_type="step_start", content="Step 3: Validation (swarm)")
|
|
792
|
+
validator_llm = self._create_llm_for_agent("validator")
|
|
793
|
+
validator_swarm = ValidatorSwarmAgent(validator_llm, self.tools, self.session)
|
|
794
|
+
validator_result = await validator_swarm.run(
|
|
795
|
+
"Validate each potential vulnerability.", context=self.context)
|
|
796
|
+
self.context["validator"] = validator_result
|
|
797
|
+
|
|
798
|
+
validator_cost = validator_swarm.total_cost
|
|
799
|
+
validator_tokens = validator_swarm.total_tokens
|
|
800
|
+
self.session.record_step_cost("validator", validator_cost, validator_tokens,
|
|
801
|
+
input_tokens=validator_swarm.total_input_tokens, output_tokens=validator_swarm.total_output_tokens)
|
|
802
|
+
total_cost += validator_cost
|
|
803
|
+
total_tokens += validator_tokens
|
|
804
|
+
self.session.total_cost = total_cost
|
|
805
|
+
self.session.total_tokens = total_tokens
|
|
806
|
+
|
|
807
|
+
validated = validator_result.get("validated_findings", [])
|
|
808
|
+
self.session.add_trace(agent="coordinator", event_type="step_complete",
|
|
809
|
+
content={"step": "validator_swarm", "cost": validator_cost, "tokens": validator_tokens,
|
|
810
|
+
"validated_findings": len(validated)})
|
|
811
|
+
|
|
812
|
+
# Post-processing
|
|
813
|
+
validated = self._deduplicate_validated(validated, potential_findings)
|
|
814
|
+
validated = self._cap_findings_per_file(validated, potential_findings, max_per_file=3)
|
|
815
|
+
|
|
816
|
+
# Severity normalization
|
|
817
|
+
orig_for_norm = []
|
|
818
|
+
for v in validated:
|
|
819
|
+
idx = v.get("original_index")
|
|
820
|
+
if idx is not None and 0 <= idx < len(potential_findings):
|
|
821
|
+
orig_for_norm.append(potential_findings[idx])
|
|
822
|
+
else:
|
|
823
|
+
orig_for_norm.append({})
|
|
824
|
+
normalised = normalize_severity(orig_for_norm)
|
|
825
|
+
for i, v in enumerate(validated):
|
|
826
|
+
idx = v.get("original_index")
|
|
827
|
+
if idx is not None and 0 <= idx < len(potential_findings):
|
|
828
|
+
potential_findings[idx]["severity"] = normalised[i].get("severity", potential_findings[idx].get("severity", "medium"))
|
|
829
|
+
|
|
830
|
+
# Quality gates
|
|
831
|
+
validated, quality_stats = run_quality_gates(validated, potential_findings, fs_tools=self.tools.fs_tools)
|
|
832
|
+
|
|
833
|
+
# Create Finding objects
|
|
834
|
+
for finding_data in validated:
|
|
835
|
+
original_index = finding_data.get("original_index")
|
|
836
|
+
if original_index is None or original_index < 0 or original_index >= len(potential_findings):
|
|
837
|
+
continue
|
|
838
|
+
orig = potential_findings[original_index]
|
|
839
|
+
|
|
840
|
+
finding = Finding(
|
|
841
|
+
category=orig.get("category", "unknown"),
|
|
842
|
+
severity=orig.get("severity", "medium"),
|
|
843
|
+
title=f"{orig.get('category', 'Unknown')} in {orig.get('file_path', 'unknown')}",
|
|
844
|
+
description=orig.get("description", ""),
|
|
845
|
+
file_path=orig.get("file_path", ""),
|
|
846
|
+
line_number=orig.get("line_number"),
|
|
847
|
+
code_snippet=orig.get("code_snippet"),
|
|
848
|
+
poc=finding_data.get("poc"),
|
|
849
|
+
fix=finding_data.get("fix"),
|
|
850
|
+
cvss_score=finding_data.get("cvss_score"),
|
|
851
|
+
confidence=finding_data.get("confidence", "medium"),
|
|
852
|
+
validated=True,
|
|
853
|
+
)
|
|
854
|
+
self.session.add_finding(finding)
|
|
855
|
+
self.session.add_trace(agent="coordinator", event_type="finding_added",
|
|
856
|
+
content={"title": finding.title, "category": finding.category,
|
|
857
|
+
"severity": finding.severity, "file_path": finding.file_path})
|
|
858
|
+
else:
|
|
859
|
+
self.context["validator"] = {"validated_findings": [], "false_positives": []}
|
|
860
|
+
|
|
861
|
+
# Step 4: Sandbox Verification (optional)
|
|
862
|
+
if settings.sandbox_enabled and self.session.findings:
|
|
863
|
+
self.session.add_trace(
|
|
864
|
+
agent="coordinator", event_type="step_start",
|
|
865
|
+
content=f"Step 4: Sandbox verification ({len(self.session.findings)} findings)",
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
# Build confirmed findings list for the sandbox swarm
|
|
869
|
+
confirmed_findings = []
|
|
870
|
+
for finding in self.session.findings:
|
|
871
|
+
confirmed_findings.append({
|
|
872
|
+
"category": finding.category,
|
|
873
|
+
"severity": finding.severity,
|
|
874
|
+
"title": finding.title,
|
|
875
|
+
"description": finding.description,
|
|
876
|
+
"file_path": finding.file_path,
|
|
877
|
+
"line_number": finding.line_number,
|
|
878
|
+
"code_snippet": finding.code_snippet,
|
|
879
|
+
"poc": finding.poc,
|
|
880
|
+
"fix": finding.fix,
|
|
881
|
+
"cvss_score": finding.cvss_score,
|
|
882
|
+
"confidence": finding.confidence,
|
|
883
|
+
})
|
|
884
|
+
|
|
885
|
+
sandbox_config = SandboxConfig(
|
|
886
|
+
health_check_path=settings.sandbox_health_check_path,
|
|
887
|
+
health_check_timeout=settings.sandbox_health_check_timeout,
|
|
888
|
+
teardown_on_complete=settings.sandbox_teardown_on_complete,
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
sandbox_llm = self._create_llm_for_agent("validator")
|
|
892
|
+
sandbox_swarm = SandboxVerifierSwarmAgent(
|
|
893
|
+
sandbox_llm, self.tools, self.session,
|
|
894
|
+
sandbox_config=sandbox_config,
|
|
895
|
+
)
|
|
896
|
+
|
|
897
|
+
sandbox_context = {
|
|
898
|
+
"confirmed_findings": confirmed_findings,
|
|
899
|
+
"project_context": self.context.get("project_context", {}),
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
try:
|
|
903
|
+
sandbox_result = await sandbox_swarm.run(
|
|
904
|
+
"Verify confirmed findings by exploiting them in the sandbox.",
|
|
905
|
+
context=sandbox_context,
|
|
906
|
+
)
|
|
907
|
+
self.context["sandbox_verification"] = sandbox_result
|
|
908
|
+
|
|
909
|
+
sandbox_cost = sandbox_swarm.total_cost
|
|
910
|
+
sandbox_tokens = sandbox_swarm.total_tokens
|
|
911
|
+
self.session.record_step_cost(
|
|
912
|
+
"sandbox_verification", sandbox_cost, sandbox_tokens,
|
|
913
|
+
input_tokens=sandbox_swarm.total_input_tokens,
|
|
914
|
+
output_tokens=sandbox_swarm.total_output_tokens,
|
|
915
|
+
)
|
|
916
|
+
total_cost += sandbox_cost
|
|
917
|
+
total_tokens += sandbox_tokens
|
|
918
|
+
self.session.total_cost = total_cost
|
|
919
|
+
self.session.total_tokens = total_tokens
|
|
920
|
+
|
|
921
|
+
# Update findings with sandbox verification results
|
|
922
|
+
exploitable = sandbox_result.get("exploitable", [])
|
|
923
|
+
not_exploitable = sandbox_result.get("not_exploitable", [])
|
|
924
|
+
|
|
925
|
+
exploitable_indices = {
|
|
926
|
+
r.get("finding_index") for r in exploitable
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
for r in exploitable:
|
|
930
|
+
idx = r.get("finding_index")
|
|
931
|
+
if idx is not None and idx < len(self.session.findings):
|
|
932
|
+
finding = self.session.findings[idx]
|
|
933
|
+
# Upgrade the PoC with the working exploit
|
|
934
|
+
if r.get("working_poc"):
|
|
935
|
+
finding.poc = r["working_poc"]
|
|
936
|
+
finding.validated = True
|
|
937
|
+
finding.source = "sandbox_verified"
|
|
938
|
+
|
|
939
|
+
# Remove findings that couldn't be exploited in sandbox
|
|
940
|
+
if not_exploitable:
|
|
941
|
+
not_exploitable_indices = {
|
|
942
|
+
r.get("finding_index") for r in not_exploitable
|
|
943
|
+
if r.get("confidence") == "high"
|
|
944
|
+
}
|
|
945
|
+
# Only remove high-confidence non-exploitable findings
|
|
946
|
+
self.session.findings = [
|
|
947
|
+
f for i, f in enumerate(self.session.findings)
|
|
948
|
+
if i not in not_exploitable_indices
|
|
949
|
+
]
|
|
950
|
+
|
|
951
|
+
self.session.add_trace(
|
|
952
|
+
agent="coordinator", event_type="step_complete",
|
|
953
|
+
content={
|
|
954
|
+
"step": "sandbox_verification",
|
|
955
|
+
"exploitable": len(exploitable),
|
|
956
|
+
"not_exploitable": len(not_exploitable),
|
|
957
|
+
"cost": sandbox_cost,
|
|
958
|
+
"tokens": sandbox_tokens,
|
|
959
|
+
},
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
except Exception as e:
|
|
963
|
+
logger.debug(f"Sandbox verification failed: {e}", exc_info=True)
|
|
964
|
+
self.session.add_trace(
|
|
965
|
+
agent="coordinator", event_type="sandbox_error",
|
|
966
|
+
content=f"Sandbox verification failed: {str(e)}. Findings preserved without sandbox verification.",
|
|
967
|
+
)
|
|
968
|
+
|
|
969
|
+
# Step 5: Browser Verification (optional)
|
|
970
|
+
if settings.browser_verification_enabled and self.session.findings:
|
|
971
|
+
self.session.add_trace(
|
|
972
|
+
agent="coordinator", event_type="step_start",
|
|
973
|
+
content=f"Step 5: Browser verification ({len(self.session.findings)} findings)",
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
confirmed_findings = []
|
|
977
|
+
for finding in self.session.findings:
|
|
978
|
+
confirmed_findings.append({
|
|
979
|
+
"category": finding.category,
|
|
980
|
+
"severity": finding.severity,
|
|
981
|
+
"title": finding.title,
|
|
982
|
+
"description": finding.description,
|
|
983
|
+
"file_path": finding.file_path,
|
|
984
|
+
"line_number": finding.line_number,
|
|
985
|
+
"code_snippet": finding.code_snippet,
|
|
986
|
+
"poc": finding.poc,
|
|
987
|
+
"fix": finding.fix,
|
|
988
|
+
"cvss_score": finding.cvss_score,
|
|
989
|
+
"confidence": finding.confidence,
|
|
990
|
+
})
|
|
991
|
+
|
|
992
|
+
sandbox_config = SandboxConfig(
|
|
993
|
+
health_check_path=settings.sandbox_health_check_path,
|
|
994
|
+
health_check_timeout=settings.sandbox_health_check_timeout,
|
|
995
|
+
teardown_on_complete=settings.sandbox_teardown_on_complete,
|
|
996
|
+
)
|
|
997
|
+
|
|
998
|
+
browser_llm = self._create_llm_for_agent("validator")
|
|
999
|
+
browser_swarm = BrowserVerifierSwarmAgent(
|
|
1000
|
+
browser_llm, self.tools, self.session,
|
|
1001
|
+
sandbox_config=sandbox_config,
|
|
1002
|
+
)
|
|
1003
|
+
|
|
1004
|
+
browser_context = {
|
|
1005
|
+
"confirmed_findings": confirmed_findings,
|
|
1006
|
+
"project_context": self.context.get("project_context", {}),
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
try:
|
|
1010
|
+
browser_result = await browser_swarm.run(
|
|
1011
|
+
"Verify confirmed findings using browser-based exploit verification.",
|
|
1012
|
+
context=browser_context,
|
|
1013
|
+
)
|
|
1014
|
+
self.context["browser_verification"] = browser_result
|
|
1015
|
+
|
|
1016
|
+
browser_cost = browser_swarm.total_cost
|
|
1017
|
+
browser_tokens = browser_swarm.total_tokens
|
|
1018
|
+
self.session.record_step_cost(
|
|
1019
|
+
"browser_verification", browser_cost, browser_tokens,
|
|
1020
|
+
input_tokens=browser_swarm.total_input_tokens,
|
|
1021
|
+
output_tokens=browser_swarm.total_output_tokens,
|
|
1022
|
+
)
|
|
1023
|
+
total_cost += browser_cost
|
|
1024
|
+
total_tokens += browser_tokens
|
|
1025
|
+
self.session.total_cost = total_cost
|
|
1026
|
+
self.session.total_tokens = total_tokens
|
|
1027
|
+
|
|
1028
|
+
exploitable = browser_result.get("exploitable", [])
|
|
1029
|
+
not_exploitable = browser_result.get("not_exploitable", [])
|
|
1030
|
+
|
|
1031
|
+
for r in exploitable:
|
|
1032
|
+
idx = r.get("finding_index")
|
|
1033
|
+
if idx is not None and idx < len(self.session.findings):
|
|
1034
|
+
finding = self.session.findings[idx]
|
|
1035
|
+
finding.validated = True
|
|
1036
|
+
finding.source = "browser_verified"
|
|
1037
|
+
|
|
1038
|
+
if not_exploitable:
|
|
1039
|
+
not_exploitable_indices = {
|
|
1040
|
+
r.get("finding_index") for r in not_exploitable
|
|
1041
|
+
if r.get("confidence") == "high"
|
|
1042
|
+
}
|
|
1043
|
+
self.session.findings = [
|
|
1044
|
+
f for i, f in enumerate(self.session.findings)
|
|
1045
|
+
if i not in not_exploitable_indices
|
|
1046
|
+
]
|
|
1047
|
+
|
|
1048
|
+
self.session.add_trace(
|
|
1049
|
+
agent="coordinator", event_type="step_complete",
|
|
1050
|
+
content={
|
|
1051
|
+
"step": "browser_verification",
|
|
1052
|
+
"exploitable": len(exploitable),
|
|
1053
|
+
"not_exploitable": len(not_exploitable),
|
|
1054
|
+
"evidence_dir": browser_result.get("evidence_dir", ""),
|
|
1055
|
+
"cost": browser_cost,
|
|
1056
|
+
"tokens": browser_tokens,
|
|
1057
|
+
},
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
except ImportError as e:
|
|
1061
|
+
logger.warning(f"Browser verification skipped: {e}")
|
|
1062
|
+
self.session.add_trace(
|
|
1063
|
+
agent="coordinator", event_type="browser_skip",
|
|
1064
|
+
content=f"Browser verification skipped: {str(e)}",
|
|
1065
|
+
)
|
|
1066
|
+
except Exception as e:
|
|
1067
|
+
logger.debug(f"Browser verification failed: {e}", exc_info=True)
|
|
1068
|
+
self.session.add_trace(
|
|
1069
|
+
agent="coordinator", event_type="browser_error",
|
|
1070
|
+
content=f"Browser verification failed: {str(e)}. Findings preserved.",
|
|
1071
|
+
)
|
|
1072
|
+
|
|
1073
|
+
self.session.status = SessionStatus.COMPLETED
|
|
1074
|
+
|
|
1075
|
+
# Clean up checkpoints on successful completion
|
|
1076
|
+
if self.checkpoint_mgr:
|
|
1077
|
+
self.checkpoint_mgr.cleanup()
|
|
1078
|
+
|
|
1079
|
+
coverage_data = None
|
|
1080
|
+
if attack_surface:
|
|
1081
|
+
coverage_data = compute_coverage(attack_surface, all_files_analyzed)
|
|
1082
|
+
|
|
1083
|
+
cost_breakdown = self.session.get_cost_breakdown()
|
|
1084
|
+
self.session.add_trace(agent="coordinator", event_type="scan_complete",
|
|
1085
|
+
content={"findings_count": len(self.session.findings), "total_cost": self.session.total_cost,
|
|
1086
|
+
"total_tokens": self.session.total_tokens, "cost_breakdown": cost_breakdown,
|
|
1087
|
+
"coverage_pct": coverage_data["coverage_pct"] if coverage_data else None})
|
|
1088
|
+
|
|
1089
|
+
return {
|
|
1090
|
+
"status": "completed",
|
|
1091
|
+
"findings": self.session.get_findings_dict(),
|
|
1092
|
+
"context": self.context,
|
|
1093
|
+
"total_cost": self.session.total_cost,
|
|
1094
|
+
"total_tokens": self.session.total_tokens,
|
|
1095
|
+
"cost_breakdown": cost_breakdown,
|
|
1096
|
+
"coverage_data": coverage_data,
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
except Exception as e:
|
|
1100
|
+
self.session.status = SessionStatus.FAILED
|
|
1101
|
+
logger.debug(f"Scan failed: {e}", exc_info=True)
|
|
1102
|
+
raise
|
|
1103
|
+
|
|
1104
|
+
async def run(self, task: str, context: Optional[dict] = None) -> dict:
|
|
1105
|
+
return await self.run_full_scan()
|