openhack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhack/__init__.py +2 -0
- openhack/__main__.py +225 -0
- openhack/agents/__init__.py +30 -0
- openhack/agents/base.py +230 -0
- openhack/agents/browser_verifier.py +679 -0
- openhack/agents/browser_verifier_swarm.py +256 -0
- openhack/agents/checkpoint.py +89 -0
- openhack/agents/context_manager.py +356 -0
- openhack/agents/coordinator.py +1105 -0
- openhack/agents/endpoint_analyst.py +307 -0
- openhack/agents/feature_hunter.py +93 -0
- openhack/agents/hunter.py +481 -0
- openhack/agents/hunter_swarm.py +385 -0
- openhack/agents/llm.py +334 -0
- openhack/agents/recon.py +19 -0
- openhack/agents/sandbox_verifier.py +396 -0
- openhack/agents/sandbox_verifier_swarm.py +250 -0
- openhack/agents/session.py +286 -0
- openhack/agents/validator.py +217 -0
- openhack/agents/validator_swarm.py +106 -0
- openhack/auth.py +175 -0
- openhack/browser/__init__.py +12 -0
- openhack/browser/runner.py +385 -0
- openhack/categories.py +130 -0
- openhack/config.py +201 -0
- openhack/deterministic_recon.py +464 -0
- openhack/entry_points.py +745 -0
- openhack/framework_classifier.py +515 -0
- openhack/framework_detection.py +269 -0
- openhack/headless_scan.py +179 -0
- openhack/prompts/__init__.py +108 -0
- openhack/prompts/browser_verifier.py +171 -0
- openhack/prompts/coordinator.py +31 -0
- openhack/prompts/django/__init__.py +32 -0
- openhack/prompts/django/auth_bypass.py +76 -0
- openhack/prompts/django/csrf.py +62 -0
- openhack/prompts/django/data_exposure.py +67 -0
- openhack/prompts/django/idor.py +74 -0
- openhack/prompts/django/injection.py +67 -0
- openhack/prompts/django/misconfiguration.py +70 -0
- openhack/prompts/django/ssrf.py +64 -0
- openhack/prompts/endpoint_analyst.py +122 -0
- openhack/prompts/express/__init__.py +29 -0
- openhack/prompts/express/auth_bypass.py +71 -0
- openhack/prompts/express/data_exposure.py +77 -0
- openhack/prompts/express/idor.py +69 -0
- openhack/prompts/express/injection.py +75 -0
- openhack/prompts/express/misconfiguration.py +72 -0
- openhack/prompts/express/ssrf.py +63 -0
- openhack/prompts/feature_hunter.py +140 -0
- openhack/prompts/flask/__init__.py +29 -0
- openhack/prompts/flask/auth_bypass.py +86 -0
- openhack/prompts/flask/data_exposure.py +78 -0
- openhack/prompts/flask/idor.py +83 -0
- openhack/prompts/flask/injection.py +77 -0
- openhack/prompts/flask/misconfiguration.py +73 -0
- openhack/prompts/flask/ssrf.py +65 -0
- openhack/prompts/hunter.py +362 -0
- openhack/prompts/hunter_continuation_loop.py +12 -0
- openhack/prompts/hunter_continuation_no_findings.py +19 -0
- openhack/prompts/hunter_continuation_no_progress.py +22 -0
- openhack/prompts/hunter_tool_instructions.py +55 -0
- openhack/prompts/nextjs/__init__.py +42 -0
- openhack/prompts/nextjs/auth_bypass.py +80 -0
- openhack/prompts/nextjs/csrf.py +71 -0
- openhack/prompts/nextjs/data_exposure.py +88 -0
- openhack/prompts/nextjs/idor.py +64 -0
- openhack/prompts/nextjs/injection.py +65 -0
- openhack/prompts/nextjs/middleware_bypass.py +75 -0
- openhack/prompts/nextjs/misconfiguration.py +92 -0
- openhack/prompts/nextjs/server_actions.py +97 -0
- openhack/prompts/nextjs/ssrf.py +66 -0
- openhack/prompts/nextjs/xss.py +69 -0
- openhack/prompts/pr_analysis_system.py +80 -0
- openhack/prompts/pr_analysis_user.py +11 -0
- openhack/prompts/project_context.py +89 -0
- openhack/prompts/recon.py +199 -0
- openhack/prompts/reporter.py +88 -0
- openhack/prompts/researchers.py +434 -0
- openhack/prompts/sandbox_verifier.py +128 -0
- openhack/prompts/supabase/__init__.py +39 -0
- openhack/prompts/supabase/auth_tokens.py +131 -0
- openhack/prompts/supabase/edge_functions.py +150 -0
- openhack/prompts/supabase/graphql.py +102 -0
- openhack/prompts/supabase/postgrest.py +99 -0
- openhack/prompts/supabase/realtime.py +93 -0
- openhack/prompts/supabase/rls.py +110 -0
- openhack/prompts/supabase/rpc_functions.py +127 -0
- openhack/prompts/supabase/storage.py +110 -0
- openhack/prompts/supabase/tenant_isolation.py +118 -0
- openhack/prompts/validator.py +319 -0
- openhack/prompts/validator_continuation_incomplete.py +12 -0
- openhack/prompts/validator_tool_instructions.py +29 -0
- openhack/quality.py +231 -0
- openhack/sandbox/__init__.py +12 -0
- openhack/sandbox/orchestrator.py +517 -0
- openhack/sandbox/runner.py +177 -0
- openhack/scan_session.py +245 -0
- openhack/setup.py +452 -0
- openhack/static_validator.py +612 -0
- openhack/tools/__init__.py +1 -0
- openhack/tools/ast_tools.py +307 -0
- openhack/tools/coverage.py +1078 -0
- openhack/tools/filesystem.py +404 -0
- openhack/tools/nextjs.py +258 -0
- openhack/tools/registry.py +52 -0
- openhack/tui.py +3450 -0
- openhack/updates.py +170 -0
- openhack-0.1.0.dist-info/METADATA +189 -0
- openhack-0.1.0.dist-info/RECORD +113 -0
- openhack-0.1.0.dist-info/WHEEL +4 -0
- openhack-0.1.0.dist-info/entry_points.txt +2 -0
- openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,481 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hunter agent for finding security vulnerabilities.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import fnmatch
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import openai
|
|
11
|
+
|
|
12
|
+
from .base import BaseAgent
|
|
13
|
+
from .llm import Message, ToolResult
|
|
14
|
+
from openhack.config import settings
|
|
15
|
+
from openhack.prompts import (
|
|
16
|
+
HUNTER_PROMPT,
|
|
17
|
+
ALL_FRAMEWORK_PROMPTS,
|
|
18
|
+
HUNTER_TOOL_INSTRUCTIONS,
|
|
19
|
+
HUNTER_CONTINUATION_NO_FINDINGS,
|
|
20
|
+
HUNTER_CONTINUATION_LOOP,
|
|
21
|
+
HUNTER_CONTINUATION_NO_PROGRESS,
|
|
22
|
+
format_project_context,
|
|
23
|
+
)
|
|
24
|
+
from openhack.categories import CATEGORIES, normalize_category
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
REPORT_FINDING_TOOL = {
|
|
30
|
+
"name": "report_finding",
|
|
31
|
+
"description": "Report a potential security vulnerability found during analysis. You MUST call this tool for EACH vulnerability you discover.",
|
|
32
|
+
"parameters": {
|
|
33
|
+
"type": "object",
|
|
34
|
+
"properties": {
|
|
35
|
+
"category": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"enum": CATEGORIES,
|
|
38
|
+
"description": "Category of the vulnerability. MUST be one of the allowed values."
|
|
39
|
+
},
|
|
40
|
+
"severity": {
|
|
41
|
+
"type": "string",
|
|
42
|
+
"enum": ["critical", "high", "medium", "low", "info"],
|
|
43
|
+
"description": "Severity level"
|
|
44
|
+
},
|
|
45
|
+
"file_path": {"type": "string", "description": "Path to the vulnerable file"},
|
|
46
|
+
"line_number": {"type": "integer", "description": "Line number"},
|
|
47
|
+
"description": {"type": "string", "description": "Detailed description of the vulnerability"},
|
|
48
|
+
"code_snippet": {"type": "string", "description": "The vulnerable code snippet"},
|
|
49
|
+
"confidence": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"enum": ["high", "medium", "low"],
|
|
52
|
+
"description": "Confidence level"
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": ["category", "severity", "file_path", "description"]
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
FINISH_HUNT_TOOL = {
|
|
60
|
+
"name": "finish_hunt",
|
|
61
|
+
"description": "Call this tool ONLY after you have reported ALL vulnerabilities. Signals hunt completion.",
|
|
62
|
+
"parameters": {
|
|
63
|
+
"type": "object",
|
|
64
|
+
"properties": {
|
|
65
|
+
"summary": {"type": "string", "description": "Brief summary of findings"},
|
|
66
|
+
"total_findings": {"type": "integer", "description": "Total vulnerabilities found"},
|
|
67
|
+
"critical_count": {"type": "integer", "description": "Critical findings count"},
|
|
68
|
+
"high_count": {"type": "integer", "description": "High findings count"}
|
|
69
|
+
},
|
|
70
|
+
"required": ["summary", "total_findings"]
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class HunterAgent(BaseAgent):
|
|
76
|
+
name = "hunter"
|
|
77
|
+
description = "Vulnerability hunting"
|
|
78
|
+
max_iterations: int = 50
|
|
79
|
+
|
|
80
|
+
DEFAULT_CATEGORIES = [
|
|
81
|
+
"idor", "xss", "csrf", "ssrf", "injection",
|
|
82
|
+
"auth_bypass", "data_exposure", "middleware_bypass",
|
|
83
|
+
"server_actions", "misconfiguration",
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
def __init__(self, *args, vuln_categories=None, group_name=None, framework=None, **kwargs):
|
|
87
|
+
super().__init__(*args, **kwargs)
|
|
88
|
+
self.vuln_categories = vuln_categories or self.DEFAULT_CATEGORIES
|
|
89
|
+
self.group_name = group_name
|
|
90
|
+
self.framework = framework
|
|
91
|
+
self.findings: list[dict] = []
|
|
92
|
+
self._files_read: set[str] = set()
|
|
93
|
+
|
|
94
|
+
if group_name:
|
|
95
|
+
self.name = f"hunter:{group_name}"
|
|
96
|
+
self.description = f"Vulnerability hunting ({group_name})"
|
|
97
|
+
|
|
98
|
+
def get_system_prompt(self, context: dict) -> str:
|
|
99
|
+
recon_context = context.get("recon", {}).get("summary", "No recon data available")
|
|
100
|
+
project_context = context.get("project_context", {})
|
|
101
|
+
project_context_str = format_project_context(project_context)
|
|
102
|
+
|
|
103
|
+
category_prompts = []
|
|
104
|
+
if self.framework is not None:
|
|
105
|
+
framework_prompts = ALL_FRAMEWORK_PROMPTS.get(self.framework, {})
|
|
106
|
+
for category in self.vuln_categories:
|
|
107
|
+
if category in framework_prompts:
|
|
108
|
+
category_prompts.append(framework_prompts[category])
|
|
109
|
+
|
|
110
|
+
full_prompt = HUNTER_PROMPT.format(
|
|
111
|
+
recon_context=recon_context,
|
|
112
|
+
project_context=project_context_str
|
|
113
|
+
)
|
|
114
|
+
if category_prompts:
|
|
115
|
+
full_prompt += "\n\n## Detailed Vulnerability Guidance\n\n"
|
|
116
|
+
full_prompt += "\n\n---\n\n".join(category_prompts)
|
|
117
|
+
|
|
118
|
+
full_prompt += HUNTER_TOOL_INSTRUCTIONS
|
|
119
|
+
return full_prompt
|
|
120
|
+
|
|
121
|
+
def get_tools(self) -> list[dict]:
|
|
122
|
+
base_tools = super().get_tools()
|
|
123
|
+
return base_tools + [REPORT_FINDING_TOOL, FINISH_HUNT_TOOL]
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def _is_excluded_path(file_path: str) -> bool:
|
|
127
|
+
"""Check if a file path matches any scan exclude pattern."""
|
|
128
|
+
for pattern in settings.scan_exclude_patterns:
|
|
129
|
+
if fnmatch.fnmatch(file_path, pattern):
|
|
130
|
+
return True
|
|
131
|
+
# Also check without leading ./ or /
|
|
132
|
+
normalized = file_path.lstrip("./")
|
|
133
|
+
if fnmatch.fnmatch(normalized, pattern):
|
|
134
|
+
return True
|
|
135
|
+
# Check if any path segment matches (for patterns like "**/test/**")
|
|
136
|
+
if "**" in pattern:
|
|
137
|
+
# Convert glob to work with fnmatch on full paths
|
|
138
|
+
simple_pattern = pattern.replace("**/", "*/")
|
|
139
|
+
if fnmatch.fnmatch(file_path, simple_pattern) or fnmatch.fnmatch(normalized, simple_pattern):
|
|
140
|
+
return True
|
|
141
|
+
# Direct substring check for directory patterns
|
|
142
|
+
dir_part = pattern.replace("**/", "").replace("/**", "").replace("*", "")
|
|
143
|
+
if dir_part and f"/{dir_part}/" in f"/{normalized}":
|
|
144
|
+
return True
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
def _correct_line_number(self, file_path: str, line_number: int, code_snippet: str) -> int:
|
|
148
|
+
"""Correct the reported line number by searching for the code snippet in the file.
|
|
149
|
+
|
|
150
|
+
Models often get the code snippet right but report the wrong line number
|
|
151
|
+
due to context window drift. This does a simple search to fix it.
|
|
152
|
+
"""
|
|
153
|
+
if not file_path or not code_snippet:
|
|
154
|
+
return line_number
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
result = self.tools.execute_tool("read_file", {"path": file_path})
|
|
158
|
+
except Exception:
|
|
159
|
+
return line_number
|
|
160
|
+
|
|
161
|
+
content = result.get("content", "") if isinstance(result, dict) else str(result)
|
|
162
|
+
if not content:
|
|
163
|
+
return line_number
|
|
164
|
+
|
|
165
|
+
lines = content.split("\n")
|
|
166
|
+
# Strip line-number prefix if present (from read_file format: "123\tcontent")
|
|
167
|
+
clean_lines = []
|
|
168
|
+
for line in lines:
|
|
169
|
+
if "\t" in line:
|
|
170
|
+
clean_lines.append(line.split("\t", 1)[1])
|
|
171
|
+
else:
|
|
172
|
+
clean_lines.append(line)
|
|
173
|
+
|
|
174
|
+
# Clean up the snippet for matching — take the most distinctive line
|
|
175
|
+
snippet_lines = [s.strip() for s in code_snippet.strip().split("\n") if s.strip()]
|
|
176
|
+
if not snippet_lines:
|
|
177
|
+
return line_number
|
|
178
|
+
|
|
179
|
+
# Try to find the first non-trivial line of the snippet in the file
|
|
180
|
+
search_line = None
|
|
181
|
+
for sl in snippet_lines:
|
|
182
|
+
# Skip trivial lines (just braces, empty, common keywords)
|
|
183
|
+
if sl in ("{", "}", "});", ");", "*/", "/*", "//"):
|
|
184
|
+
continue
|
|
185
|
+
if len(sl) > 10: # Needs to be distinctive enough
|
|
186
|
+
search_line = sl
|
|
187
|
+
break
|
|
188
|
+
if not search_line:
|
|
189
|
+
search_line = snippet_lines[0]
|
|
190
|
+
|
|
191
|
+
# Search for the line in the file
|
|
192
|
+
for i, file_line in enumerate(clean_lines):
|
|
193
|
+
if search_line in file_line.strip():
|
|
194
|
+
corrected = i + 1 # 1-indexed
|
|
195
|
+
if corrected != line_number:
|
|
196
|
+
logger.debug(f"Corrected line number {line_number} → {corrected} for {file_path}")
|
|
197
|
+
return corrected
|
|
198
|
+
|
|
199
|
+
# Snippet not found — return original
|
|
200
|
+
return line_number
|
|
201
|
+
|
|
202
|
+
def _handle_report_finding(self, args: dict) -> dict:
|
|
203
|
+
file_path = args.get("file_path", "")
|
|
204
|
+
|
|
205
|
+
# Pre-filter: reject findings in excluded paths
|
|
206
|
+
if file_path and self._is_excluded_path(file_path):
|
|
207
|
+
logger.info(f"Finding rejected (excluded path): {args.get('category', '?')} in {file_path}")
|
|
208
|
+
return {"status": "rejected", "reason": f"File is in an excluded path (test/CLI/docs/examples): {file_path}"}
|
|
209
|
+
|
|
210
|
+
# Correct line number by searching for the code snippet in the file
|
|
211
|
+
line_number = args.get("line_number")
|
|
212
|
+
code_snippet = args.get("code_snippet")
|
|
213
|
+
if line_number and code_snippet and file_path:
|
|
214
|
+
line_number = self._correct_line_number(file_path, line_number, code_snippet)
|
|
215
|
+
|
|
216
|
+
finding = {
|
|
217
|
+
"category": normalize_category(args.get("category", "unknown")),
|
|
218
|
+
"severity": args.get("severity", "medium").lower(),
|
|
219
|
+
"file_path": file_path,
|
|
220
|
+
"line_number": line_number,
|
|
221
|
+
"description": args.get("description", ""),
|
|
222
|
+
"code_snippet": code_snippet,
|
|
223
|
+
"confidence": args.get("confidence", "medium").lower(),
|
|
224
|
+
"validated": False,
|
|
225
|
+
}
|
|
226
|
+
self.findings.append(finding)
|
|
227
|
+
logger.info(f"Finding reported: {finding['category']} in {finding['file_path']}")
|
|
228
|
+
return {"status": "recorded", "finding_id": len(self.findings)}
|
|
229
|
+
|
|
230
|
+
def _handle_finish_hunt(self, args: dict) -> dict:
|
|
231
|
+
logger.info(f"Hunt finished: {len(self.findings)} findings")
|
|
232
|
+
return {"status": "hunt_complete", "findings_reported": len(self.findings)}
|
|
233
|
+
|
|
234
|
+
async def run(self, task: str, context: Optional[dict] = None) -> dict:
|
|
235
|
+
context = context or {}
|
|
236
|
+
self.session.current_agent = self.name
|
|
237
|
+
self.findings = []
|
|
238
|
+
self._files_read = set()
|
|
239
|
+
|
|
240
|
+
system_prompt = self.get_system_prompt(context)
|
|
241
|
+
self.messages = [Message(role="user", content=task)]
|
|
242
|
+
self._seed_existing_instructions()
|
|
243
|
+
|
|
244
|
+
max_iterations = self.max_iterations
|
|
245
|
+
iteration = 0
|
|
246
|
+
recent_tools: list[str] = []
|
|
247
|
+
iterations_since_finding = 0
|
|
248
|
+
consecutive_reads = 0
|
|
249
|
+
analysis_checkpoints_sent = 0
|
|
250
|
+
analysis_mode = False
|
|
251
|
+
analysis_mode_turns = 0
|
|
252
|
+
continuation_prompts_sent = 0
|
|
253
|
+
max_continuation_prompts = 3
|
|
254
|
+
LOOP_DETECTION_THRESHOLD = 6
|
|
255
|
+
NO_PROGRESS_THRESHOLD = 15
|
|
256
|
+
ANALYSIS_CHECKPOINT_INTERVAL = 5
|
|
257
|
+
|
|
258
|
+
while iteration < max_iterations:
|
|
259
|
+
if self.session.cancelled:
|
|
260
|
+
break
|
|
261
|
+
await self.session.wait_if_paused()
|
|
262
|
+
if self.session.cancelled:
|
|
263
|
+
break
|
|
264
|
+
iteration += 1
|
|
265
|
+
iterations_since_finding += 1
|
|
266
|
+
|
|
267
|
+
self._inject_pending_instructions()
|
|
268
|
+
|
|
269
|
+
if analysis_mode:
|
|
270
|
+
tools = [REPORT_FINDING_TOOL, FINISH_HUNT_TOOL]
|
|
271
|
+
forced_tool_choice = "required"
|
|
272
|
+
else:
|
|
273
|
+
tools = self.get_tools()
|
|
274
|
+
forced_tool_choice = None
|
|
275
|
+
|
|
276
|
+
self._preflight_compact(system_prompt)
|
|
277
|
+
|
|
278
|
+
try:
|
|
279
|
+
response = await self.llm.chat(
|
|
280
|
+
messages=self.messages,
|
|
281
|
+
tools=tools,
|
|
282
|
+
system=system_prompt,
|
|
283
|
+
tool_choice=forced_tool_choice,
|
|
284
|
+
)
|
|
285
|
+
except openai.BadRequestError as e:
|
|
286
|
+
err_msg = str(e)
|
|
287
|
+
if "too long" in err_msg or "too many tokens" in err_msg.lower() or "context length" in err_msg:
|
|
288
|
+
logger.warning(f"[{self.name}] Context overflow — compacting and retrying")
|
|
289
|
+
self.messages = self.context_manager.compact_messages(self.messages, keep_recent_turns=2)
|
|
290
|
+
estimated = self._estimate_tokens(self.messages, system_prompt)
|
|
291
|
+
if estimated > self.context_manager.context_window_limit * 0.85:
|
|
292
|
+
self.messages = self.context_manager.emergency_compact(self.messages)
|
|
293
|
+
try:
|
|
294
|
+
response = await self.llm.chat(
|
|
295
|
+
messages=self.messages,
|
|
296
|
+
tools=tools,
|
|
297
|
+
system=system_prompt,
|
|
298
|
+
tool_choice=forced_tool_choice,
|
|
299
|
+
)
|
|
300
|
+
except openai.BadRequestError as e2:
|
|
301
|
+
err_msg2 = str(e2)
|
|
302
|
+
if "too long" in err_msg2 or "context length" in err_msg2:
|
|
303
|
+
logger.warning(f"[{self.name}] Still overflowing — emergency compaction")
|
|
304
|
+
self.messages = self.context_manager.emergency_compact(self.messages)
|
|
305
|
+
response = await self.llm.chat(
|
|
306
|
+
messages=self.messages,
|
|
307
|
+
tools=tools,
|
|
308
|
+
system=system_prompt,
|
|
309
|
+
tool_choice=forced_tool_choice,
|
|
310
|
+
)
|
|
311
|
+
else:
|
|
312
|
+
raise
|
|
313
|
+
else:
|
|
314
|
+
raise
|
|
315
|
+
|
|
316
|
+
self.session.total_cost += response.cost
|
|
317
|
+
if response.usage:
|
|
318
|
+
self.session.total_tokens += response.usage.get("total_tokens", 0)
|
|
319
|
+
self.context_manager.update_usage(response.usage.get("input_tokens", 0))
|
|
320
|
+
|
|
321
|
+
if response.content:
|
|
322
|
+
self.session.add_trace(agent=self.name, event_type="thinking", content=response.content)
|
|
323
|
+
|
|
324
|
+
if not response.tool_calls:
|
|
325
|
+
if analysis_mode:
|
|
326
|
+
analysis_mode_turns += 1
|
|
327
|
+
if analysis_mode_turns >= 3:
|
|
328
|
+
analysis_mode = False
|
|
329
|
+
analysis_mode_turns = 0
|
|
330
|
+
continue
|
|
331
|
+
self.messages.append(Message(
|
|
332
|
+
role="user",
|
|
333
|
+
content=(
|
|
334
|
+
"You responded with text but did not call any tools. "
|
|
335
|
+
"You MUST call report_finding or finish_hunt. "
|
|
336
|
+
"Do NOT respond with text — use the tools."
|
|
337
|
+
),
|
|
338
|
+
))
|
|
339
|
+
continue
|
|
340
|
+
if len(self.findings) == 0 and continuation_prompts_sent < max_continuation_prompts:
|
|
341
|
+
continuation_prompts_sent += 1
|
|
342
|
+
self.messages.append(Message(role="user", content=HUNTER_CONTINUATION_NO_FINDINGS))
|
|
343
|
+
continue
|
|
344
|
+
return self._build_result(response.content or "")
|
|
345
|
+
|
|
346
|
+
current_tools = [tc.name for tc in response.tool_calls]
|
|
347
|
+
recent_tools.extend(current_tools)
|
|
348
|
+
recent_tools = recent_tools[-10:]
|
|
349
|
+
|
|
350
|
+
if len(recent_tools) >= LOOP_DETECTION_THRESHOLD:
|
|
351
|
+
last_n = recent_tools[-LOOP_DETECTION_THRESHOLD:]
|
|
352
|
+
if len(set(last_n)) == 1 and last_n[0] in ("list_dir", "glob"):
|
|
353
|
+
if continuation_prompts_sent < max_continuation_prompts:
|
|
354
|
+
continuation_prompts_sent += 1
|
|
355
|
+
self.messages.append(Message(role="user", content=HUNTER_CONTINUATION_LOOP.format(tool_name=last_n[0])))
|
|
356
|
+
recent_tools = []
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
assistant_msg = Message(
|
|
360
|
+
role="assistant",
|
|
361
|
+
content=response.content,
|
|
362
|
+
tool_calls=[
|
|
363
|
+
{"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": json.dumps(tc.arguments)}}
|
|
364
|
+
for tc in response.tool_calls
|
|
365
|
+
],
|
|
366
|
+
reasoning_content=getattr(response, 'reasoning_content', None),
|
|
367
|
+
)
|
|
368
|
+
self.messages.append(assistant_msg)
|
|
369
|
+
|
|
370
|
+
should_finish = False
|
|
371
|
+
has_only_reads = all(tc.name == "read_file" for tc in response.tool_calls)
|
|
372
|
+
for tool_call in response.tool_calls:
|
|
373
|
+
self.session.add_trace(
|
|
374
|
+
agent=self.name, event_type="tool_call",
|
|
375
|
+
content=f"Calling {tool_call.name}",
|
|
376
|
+
tool_name=tool_call.name, tool_input=tool_call.arguments,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
if tool_call.name == "report_finding":
|
|
380
|
+
result = self._handle_report_finding(tool_call.arguments)
|
|
381
|
+
iterations_since_finding = 0
|
|
382
|
+
consecutive_reads = 0
|
|
383
|
+
analysis_mode = False
|
|
384
|
+
elif tool_call.name == "finish_hunt":
|
|
385
|
+
result = self._handle_finish_hunt(tool_call.arguments)
|
|
386
|
+
should_finish = True
|
|
387
|
+
else:
|
|
388
|
+
try:
|
|
389
|
+
result = self.tools.execute_tool(tool_call.name, tool_call.arguments)
|
|
390
|
+
except Exception as e:
|
|
391
|
+
result = {"error": f"Tool execution failed: {e}"}
|
|
392
|
+
logger.warning(f"[{self.name}] Tool {tool_call.name} failed: {e}")
|
|
393
|
+
if tool_call.name == "read_file":
|
|
394
|
+
file_path = tool_call.arguments.get("path", "")
|
|
395
|
+
if file_path:
|
|
396
|
+
self._files_read.add(file_path)
|
|
397
|
+
|
|
398
|
+
self.session.add_trace(
|
|
399
|
+
agent=self.name, event_type="tool_result",
|
|
400
|
+
content=f"Result from {tool_call.name}",
|
|
401
|
+
tool_name=tool_call.name, tool_output=result,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
raw_content = json.dumps(result) if isinstance(result, dict) else str(result)
|
|
405
|
+
truncated_content = self.context_manager.truncate_tool_result(tool_call.name, raw_content)
|
|
406
|
+
tool_result = ToolResult(
|
|
407
|
+
tool_call_id=tool_call.id,
|
|
408
|
+
content=truncated_content,
|
|
409
|
+
)
|
|
410
|
+
self.messages.append(tool_result.to_message())
|
|
411
|
+
|
|
412
|
+
if should_finish:
|
|
413
|
+
return self._build_result(response.content or "")
|
|
414
|
+
|
|
415
|
+
if has_only_reads:
|
|
416
|
+
consecutive_reads += 1
|
|
417
|
+
|
|
418
|
+
if analysis_mode and response.tool_calls:
|
|
419
|
+
has_action = any(tc.name in ("report_finding", "finish_hunt") for tc in response.tool_calls)
|
|
420
|
+
if has_action:
|
|
421
|
+
analysis_mode = False
|
|
422
|
+
analysis_mode_turns = 0
|
|
423
|
+
|
|
424
|
+
if (consecutive_reads >= ANALYSIS_CHECKPOINT_INTERVAL
|
|
425
|
+
and len(self.findings) == 0
|
|
426
|
+
and analysis_checkpoints_sent < 5):
|
|
427
|
+
analysis_checkpoints_sent += 1
|
|
428
|
+
analysis_mode = True
|
|
429
|
+
analysis_mode_turns = 0
|
|
430
|
+
files_list = ", ".join(sorted(self._files_read)[-10:])
|
|
431
|
+
self.messages.append(Message(
|
|
432
|
+
role="user",
|
|
433
|
+
content=(
|
|
434
|
+
f"[ANALYSIS CHECKPOINT] You have read {len(self._files_read)} files without reporting "
|
|
435
|
+
f"any findings. STOP reading new files and call report_finding NOW.\n\n"
|
|
436
|
+
f"Files read: {files_list}\n\n"
|
|
437
|
+
f"You MUST call report_finding (not just describe in text) for EACH of these checks:\n"
|
|
438
|
+
f"1. IDOR: Does any endpoint load an object by ID without verifying the requesting "
|
|
439
|
+
f"user owns it? (e.g., GET /api/thing/<id> checks auth but not ownership)\n"
|
|
440
|
+
f"2. ORM escape hatches: Any .raw(), .extra(), RawSQL, cursor.execute() with "
|
|
441
|
+
f"string formatting instead of parameterized queries?\n"
|
|
442
|
+
f"3. Mass assignment: Can request.data set privileged fields (is_staff, role, "
|
|
443
|
+
f"organization) via serializer without explicit field restrictions?\n"
|
|
444
|
+
f"4. SSRF: Any user-controlled URL passed to requests/httpx/urllib without validation?\n"
|
|
445
|
+
f"5. Auth bypass: Any endpoint missing permission_classes or using weaker auth "
|
|
446
|
+
f"than sibling endpoints?\n\n"
|
|
447
|
+
f"Report at confidence='medium' if uncertain. Under-reporting is a failure mode. "
|
|
448
|
+
f"You MUST use the report_finding tool, not describe findings in text."
|
|
449
|
+
),
|
|
450
|
+
))
|
|
451
|
+
consecutive_reads = 0
|
|
452
|
+
logger.info(f"[{self.name}] Analysis checkpoint at iteration {iteration}, {len(self._files_read)} files read")
|
|
453
|
+
|
|
454
|
+
if self.context_manager.needs_compaction():
|
|
455
|
+
self.messages = self.context_manager.compact_messages(self.messages)
|
|
456
|
+
logger.info(f"[{self.name}] Compacted message history ({self.context_manager.last_input_tokens} input tokens)")
|
|
457
|
+
|
|
458
|
+
if (iterations_since_finding >= NO_PROGRESS_THRESHOLD and
|
|
459
|
+
len(self.findings) == 0 and
|
|
460
|
+
continuation_prompts_sent < max_continuation_prompts):
|
|
461
|
+
continuation_prompts_sent += 1
|
|
462
|
+
self.messages.append(Message(
|
|
463
|
+
role="user",
|
|
464
|
+
content=HUNTER_CONTINUATION_NO_PROGRESS.format(
|
|
465
|
+
files_count=len(self._files_read), iteration=iteration,
|
|
466
|
+
),
|
|
467
|
+
))
|
|
468
|
+
iterations_since_finding = 0
|
|
469
|
+
|
|
470
|
+
return self._build_result("Max iterations reached")
|
|
471
|
+
|
|
472
|
+
def _build_result(self, summary: str) -> dict:
|
|
473
|
+
return {
|
|
474
|
+
"raw_output": summary,
|
|
475
|
+
"findings": self.findings,
|
|
476
|
+
"type": "hunt_complete",
|
|
477
|
+
"files_analyzed": sorted(self._files_read),
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
def _parse_final_response(self, content: str) -> dict:
|
|
481
|
+
return self._build_result(content)
|