openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,481 @@
1
+ """
2
+ Hunter agent for finding security vulnerabilities.
3
+ """
4
+
5
+ import fnmatch
6
+ import json
7
+ import logging
8
+ from typing import Optional
9
+
10
+ import openai
11
+
12
+ from .base import BaseAgent
13
+ from .llm import Message, ToolResult
14
+ from openhack.config import settings
15
+ from openhack.prompts import (
16
+ HUNTER_PROMPT,
17
+ ALL_FRAMEWORK_PROMPTS,
18
+ HUNTER_TOOL_INSTRUCTIONS,
19
+ HUNTER_CONTINUATION_NO_FINDINGS,
20
+ HUNTER_CONTINUATION_LOOP,
21
+ HUNTER_CONTINUATION_NO_PROGRESS,
22
+ format_project_context,
23
+ )
24
+ from openhack.categories import CATEGORIES, normalize_category
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ REPORT_FINDING_TOOL = {
30
+ "name": "report_finding",
31
+ "description": "Report a potential security vulnerability found during analysis. You MUST call this tool for EACH vulnerability you discover.",
32
+ "parameters": {
33
+ "type": "object",
34
+ "properties": {
35
+ "category": {
36
+ "type": "string",
37
+ "enum": CATEGORIES,
38
+ "description": "Category of the vulnerability. MUST be one of the allowed values."
39
+ },
40
+ "severity": {
41
+ "type": "string",
42
+ "enum": ["critical", "high", "medium", "low", "info"],
43
+ "description": "Severity level"
44
+ },
45
+ "file_path": {"type": "string", "description": "Path to the vulnerable file"},
46
+ "line_number": {"type": "integer", "description": "Line number"},
47
+ "description": {"type": "string", "description": "Detailed description of the vulnerability"},
48
+ "code_snippet": {"type": "string", "description": "The vulnerable code snippet"},
49
+ "confidence": {
50
+ "type": "string",
51
+ "enum": ["high", "medium", "low"],
52
+ "description": "Confidence level"
53
+ }
54
+ },
55
+ "required": ["category", "severity", "file_path", "description"]
56
+ }
57
+ }
58
+
59
+ FINISH_HUNT_TOOL = {
60
+ "name": "finish_hunt",
61
+ "description": "Call this tool ONLY after you have reported ALL vulnerabilities. Signals hunt completion.",
62
+ "parameters": {
63
+ "type": "object",
64
+ "properties": {
65
+ "summary": {"type": "string", "description": "Brief summary of findings"},
66
+ "total_findings": {"type": "integer", "description": "Total vulnerabilities found"},
67
+ "critical_count": {"type": "integer", "description": "Critical findings count"},
68
+ "high_count": {"type": "integer", "description": "High findings count"}
69
+ },
70
+ "required": ["summary", "total_findings"]
71
+ }
72
+ }
73
+
74
+
75
+ class HunterAgent(BaseAgent):
76
+ name = "hunter"
77
+ description = "Vulnerability hunting"
78
+ max_iterations: int = 50
79
+
80
+ DEFAULT_CATEGORIES = [
81
+ "idor", "xss", "csrf", "ssrf", "injection",
82
+ "auth_bypass", "data_exposure", "middleware_bypass",
83
+ "server_actions", "misconfiguration",
84
+ ]
85
+
86
+ def __init__(self, *args, vuln_categories=None, group_name=None, framework=None, **kwargs):
87
+ super().__init__(*args, **kwargs)
88
+ self.vuln_categories = vuln_categories or self.DEFAULT_CATEGORIES
89
+ self.group_name = group_name
90
+ self.framework = framework
91
+ self.findings: list[dict] = []
92
+ self._files_read: set[str] = set()
93
+
94
+ if group_name:
95
+ self.name = f"hunter:{group_name}"
96
+ self.description = f"Vulnerability hunting ({group_name})"
97
+
98
+ def get_system_prompt(self, context: dict) -> str:
99
+ recon_context = context.get("recon", {}).get("summary", "No recon data available")
100
+ project_context = context.get("project_context", {})
101
+ project_context_str = format_project_context(project_context)
102
+
103
+ category_prompts = []
104
+ if self.framework is not None:
105
+ framework_prompts = ALL_FRAMEWORK_PROMPTS.get(self.framework, {})
106
+ for category in self.vuln_categories:
107
+ if category in framework_prompts:
108
+ category_prompts.append(framework_prompts[category])
109
+
110
+ full_prompt = HUNTER_PROMPT.format(
111
+ recon_context=recon_context,
112
+ project_context=project_context_str
113
+ )
114
+ if category_prompts:
115
+ full_prompt += "\n\n## Detailed Vulnerability Guidance\n\n"
116
+ full_prompt += "\n\n---\n\n".join(category_prompts)
117
+
118
+ full_prompt += HUNTER_TOOL_INSTRUCTIONS
119
+ return full_prompt
120
+
121
+ def get_tools(self) -> list[dict]:
122
+ base_tools = super().get_tools()
123
+ return base_tools + [REPORT_FINDING_TOOL, FINISH_HUNT_TOOL]
124
+
125
+ @staticmethod
126
+ def _is_excluded_path(file_path: str) -> bool:
127
+ """Check if a file path matches any scan exclude pattern."""
128
+ for pattern in settings.scan_exclude_patterns:
129
+ if fnmatch.fnmatch(file_path, pattern):
130
+ return True
131
+ # Also check without leading ./ or /
132
+ normalized = file_path.lstrip("./")
133
+ if fnmatch.fnmatch(normalized, pattern):
134
+ return True
135
+ # Check if any path segment matches (for patterns like "**/test/**")
136
+ if "**" in pattern:
137
+ # Convert glob to work with fnmatch on full paths
138
+ simple_pattern = pattern.replace("**/", "*/")
139
+ if fnmatch.fnmatch(file_path, simple_pattern) or fnmatch.fnmatch(normalized, simple_pattern):
140
+ return True
141
+ # Direct substring check for directory patterns
142
+ dir_part = pattern.replace("**/", "").replace("/**", "").replace("*", "")
143
+ if dir_part and f"/{dir_part}/" in f"/{normalized}":
144
+ return True
145
+ return False
146
+
147
+ def _correct_line_number(self, file_path: str, line_number: int, code_snippet: str) -> int:
148
+ """Correct the reported line number by searching for the code snippet in the file.
149
+
150
+ Models often get the code snippet right but report the wrong line number
151
+ due to context window drift. This does a simple search to fix it.
152
+ """
153
+ if not file_path or not code_snippet:
154
+ return line_number
155
+
156
+ try:
157
+ result = self.tools.execute_tool("read_file", {"path": file_path})
158
+ except Exception:
159
+ return line_number
160
+
161
+ content = result.get("content", "") if isinstance(result, dict) else str(result)
162
+ if not content:
163
+ return line_number
164
+
165
+ lines = content.split("\n")
166
+ # Strip line-number prefix if present (from read_file format: "123\tcontent")
167
+ clean_lines = []
168
+ for line in lines:
169
+ if "\t" in line:
170
+ clean_lines.append(line.split("\t", 1)[1])
171
+ else:
172
+ clean_lines.append(line)
173
+
174
+ # Clean up the snippet for matching — take the most distinctive line
175
+ snippet_lines = [s.strip() for s in code_snippet.strip().split("\n") if s.strip()]
176
+ if not snippet_lines:
177
+ return line_number
178
+
179
+ # Try to find the first non-trivial line of the snippet in the file
180
+ search_line = None
181
+ for sl in snippet_lines:
182
+ # Skip trivial lines (just braces, empty, common keywords)
183
+ if sl in ("{", "}", "});", ");", "*/", "/*", "//"):
184
+ continue
185
+ if len(sl) > 10: # Needs to be distinctive enough
186
+ search_line = sl
187
+ break
188
+ if not search_line:
189
+ search_line = snippet_lines[0]
190
+
191
+ # Search for the line in the file
192
+ for i, file_line in enumerate(clean_lines):
193
+ if search_line in file_line.strip():
194
+ corrected = i + 1 # 1-indexed
195
+ if corrected != line_number:
196
+ logger.debug(f"Corrected line number {line_number} → {corrected} for {file_path}")
197
+ return corrected
198
+
199
+ # Snippet not found — return original
200
+ return line_number
201
+
202
+ def _handle_report_finding(self, args: dict) -> dict:
203
+ file_path = args.get("file_path", "")
204
+
205
+ # Pre-filter: reject findings in excluded paths
206
+ if file_path and self._is_excluded_path(file_path):
207
+ logger.info(f"Finding rejected (excluded path): {args.get('category', '?')} in {file_path}")
208
+ return {"status": "rejected", "reason": f"File is in an excluded path (test/CLI/docs/examples): {file_path}"}
209
+
210
+ # Correct line number by searching for the code snippet in the file
211
+ line_number = args.get("line_number")
212
+ code_snippet = args.get("code_snippet")
213
+ if line_number and code_snippet and file_path:
214
+ line_number = self._correct_line_number(file_path, line_number, code_snippet)
215
+
216
+ finding = {
217
+ "category": normalize_category(args.get("category", "unknown")),
218
+ "severity": args.get("severity", "medium").lower(),
219
+ "file_path": file_path,
220
+ "line_number": line_number,
221
+ "description": args.get("description", ""),
222
+ "code_snippet": code_snippet,
223
+ "confidence": args.get("confidence", "medium").lower(),
224
+ "validated": False,
225
+ }
226
+ self.findings.append(finding)
227
+ logger.info(f"Finding reported: {finding['category']} in {finding['file_path']}")
228
+ return {"status": "recorded", "finding_id": len(self.findings)}
229
+
230
+ def _handle_finish_hunt(self, args: dict) -> dict:
231
+ logger.info(f"Hunt finished: {len(self.findings)} findings")
232
+ return {"status": "hunt_complete", "findings_reported": len(self.findings)}
233
+
234
+ async def run(self, task: str, context: Optional[dict] = None) -> dict:
235
+ context = context or {}
236
+ self.session.current_agent = self.name
237
+ self.findings = []
238
+ self._files_read = set()
239
+
240
+ system_prompt = self.get_system_prompt(context)
241
+ self.messages = [Message(role="user", content=task)]
242
+ self._seed_existing_instructions()
243
+
244
+ max_iterations = self.max_iterations
245
+ iteration = 0
246
+ recent_tools: list[str] = []
247
+ iterations_since_finding = 0
248
+ consecutive_reads = 0
249
+ analysis_checkpoints_sent = 0
250
+ analysis_mode = False
251
+ analysis_mode_turns = 0
252
+ continuation_prompts_sent = 0
253
+ max_continuation_prompts = 3
254
+ LOOP_DETECTION_THRESHOLD = 6
255
+ NO_PROGRESS_THRESHOLD = 15
256
+ ANALYSIS_CHECKPOINT_INTERVAL = 5
257
+
258
+ while iteration < max_iterations:
259
+ if self.session.cancelled:
260
+ break
261
+ await self.session.wait_if_paused()
262
+ if self.session.cancelled:
263
+ break
264
+ iteration += 1
265
+ iterations_since_finding += 1
266
+
267
+ self._inject_pending_instructions()
268
+
269
+ if analysis_mode:
270
+ tools = [REPORT_FINDING_TOOL, FINISH_HUNT_TOOL]
271
+ forced_tool_choice = "required"
272
+ else:
273
+ tools = self.get_tools()
274
+ forced_tool_choice = None
275
+
276
+ self._preflight_compact(system_prompt)
277
+
278
+ try:
279
+ response = await self.llm.chat(
280
+ messages=self.messages,
281
+ tools=tools,
282
+ system=system_prompt,
283
+ tool_choice=forced_tool_choice,
284
+ )
285
+ except openai.BadRequestError as e:
286
+ err_msg = str(e)
287
+ if "too long" in err_msg or "too many tokens" in err_msg.lower() or "context length" in err_msg:
288
+ logger.warning(f"[{self.name}] Context overflow — compacting and retrying")
289
+ self.messages = self.context_manager.compact_messages(self.messages, keep_recent_turns=2)
290
+ estimated = self._estimate_tokens(self.messages, system_prompt)
291
+ if estimated > self.context_manager.context_window_limit * 0.85:
292
+ self.messages = self.context_manager.emergency_compact(self.messages)
293
+ try:
294
+ response = await self.llm.chat(
295
+ messages=self.messages,
296
+ tools=tools,
297
+ system=system_prompt,
298
+ tool_choice=forced_tool_choice,
299
+ )
300
+ except openai.BadRequestError as e2:
301
+ err_msg2 = str(e2)
302
+ if "too long" in err_msg2 or "context length" in err_msg2:
303
+ logger.warning(f"[{self.name}] Still overflowing — emergency compaction")
304
+ self.messages = self.context_manager.emergency_compact(self.messages)
305
+ response = await self.llm.chat(
306
+ messages=self.messages,
307
+ tools=tools,
308
+ system=system_prompt,
309
+ tool_choice=forced_tool_choice,
310
+ )
311
+ else:
312
+ raise
313
+ else:
314
+ raise
315
+
316
+ self.session.total_cost += response.cost
317
+ if response.usage:
318
+ self.session.total_tokens += response.usage.get("total_tokens", 0)
319
+ self.context_manager.update_usage(response.usage.get("input_tokens", 0))
320
+
321
+ if response.content:
322
+ self.session.add_trace(agent=self.name, event_type="thinking", content=response.content)
323
+
324
+ if not response.tool_calls:
325
+ if analysis_mode:
326
+ analysis_mode_turns += 1
327
+ if analysis_mode_turns >= 3:
328
+ analysis_mode = False
329
+ analysis_mode_turns = 0
330
+ continue
331
+ self.messages.append(Message(
332
+ role="user",
333
+ content=(
334
+ "You responded with text but did not call any tools. "
335
+ "You MUST call report_finding or finish_hunt. "
336
+ "Do NOT respond with text — use the tools."
337
+ ),
338
+ ))
339
+ continue
340
+ if len(self.findings) == 0 and continuation_prompts_sent < max_continuation_prompts:
341
+ continuation_prompts_sent += 1
342
+ self.messages.append(Message(role="user", content=HUNTER_CONTINUATION_NO_FINDINGS))
343
+ continue
344
+ return self._build_result(response.content or "")
345
+
346
+ current_tools = [tc.name for tc in response.tool_calls]
347
+ recent_tools.extend(current_tools)
348
+ recent_tools = recent_tools[-10:]
349
+
350
+ if len(recent_tools) >= LOOP_DETECTION_THRESHOLD:
351
+ last_n = recent_tools[-LOOP_DETECTION_THRESHOLD:]
352
+ if len(set(last_n)) == 1 and last_n[0] in ("list_dir", "glob"):
353
+ if continuation_prompts_sent < max_continuation_prompts:
354
+ continuation_prompts_sent += 1
355
+ self.messages.append(Message(role="user", content=HUNTER_CONTINUATION_LOOP.format(tool_name=last_n[0])))
356
+ recent_tools = []
357
+ continue
358
+
359
+ assistant_msg = Message(
360
+ role="assistant",
361
+ content=response.content,
362
+ tool_calls=[
363
+ {"id": tc.id, "type": "function", "function": {"name": tc.name, "arguments": json.dumps(tc.arguments)}}
364
+ for tc in response.tool_calls
365
+ ],
366
+ reasoning_content=getattr(response, 'reasoning_content', None),
367
+ )
368
+ self.messages.append(assistant_msg)
369
+
370
+ should_finish = False
371
+ has_only_reads = all(tc.name == "read_file" for tc in response.tool_calls)
372
+ for tool_call in response.tool_calls:
373
+ self.session.add_trace(
374
+ agent=self.name, event_type="tool_call",
375
+ content=f"Calling {tool_call.name}",
376
+ tool_name=tool_call.name, tool_input=tool_call.arguments,
377
+ )
378
+
379
+ if tool_call.name == "report_finding":
380
+ result = self._handle_report_finding(tool_call.arguments)
381
+ iterations_since_finding = 0
382
+ consecutive_reads = 0
383
+ analysis_mode = False
384
+ elif tool_call.name == "finish_hunt":
385
+ result = self._handle_finish_hunt(tool_call.arguments)
386
+ should_finish = True
387
+ else:
388
+ try:
389
+ result = self.tools.execute_tool(tool_call.name, tool_call.arguments)
390
+ except Exception as e:
391
+ result = {"error": f"Tool execution failed: {e}"}
392
+ logger.warning(f"[{self.name}] Tool {tool_call.name} failed: {e}")
393
+ if tool_call.name == "read_file":
394
+ file_path = tool_call.arguments.get("path", "")
395
+ if file_path:
396
+ self._files_read.add(file_path)
397
+
398
+ self.session.add_trace(
399
+ agent=self.name, event_type="tool_result",
400
+ content=f"Result from {tool_call.name}",
401
+ tool_name=tool_call.name, tool_output=result,
402
+ )
403
+
404
+ raw_content = json.dumps(result) if isinstance(result, dict) else str(result)
405
+ truncated_content = self.context_manager.truncate_tool_result(tool_call.name, raw_content)
406
+ tool_result = ToolResult(
407
+ tool_call_id=tool_call.id,
408
+ content=truncated_content,
409
+ )
410
+ self.messages.append(tool_result.to_message())
411
+
412
+ if should_finish:
413
+ return self._build_result(response.content or "")
414
+
415
+ if has_only_reads:
416
+ consecutive_reads += 1
417
+
418
+ if analysis_mode and response.tool_calls:
419
+ has_action = any(tc.name in ("report_finding", "finish_hunt") for tc in response.tool_calls)
420
+ if has_action:
421
+ analysis_mode = False
422
+ analysis_mode_turns = 0
423
+
424
+ if (consecutive_reads >= ANALYSIS_CHECKPOINT_INTERVAL
425
+ and len(self.findings) == 0
426
+ and analysis_checkpoints_sent < 5):
427
+ analysis_checkpoints_sent += 1
428
+ analysis_mode = True
429
+ analysis_mode_turns = 0
430
+ files_list = ", ".join(sorted(self._files_read)[-10:])
431
+ self.messages.append(Message(
432
+ role="user",
433
+ content=(
434
+ f"[ANALYSIS CHECKPOINT] You have read {len(self._files_read)} files without reporting "
435
+ f"any findings. STOP reading new files and call report_finding NOW.\n\n"
436
+ f"Files read: {files_list}\n\n"
437
+ f"You MUST call report_finding (not just describe in text) for EACH of these checks:\n"
438
+ f"1. IDOR: Does any endpoint load an object by ID without verifying the requesting "
439
+ f"user owns it? (e.g., GET /api/thing/<id> checks auth but not ownership)\n"
440
+ f"2. ORM escape hatches: Any .raw(), .extra(), RawSQL, cursor.execute() with "
441
+ f"string formatting instead of parameterized queries?\n"
442
+ f"3. Mass assignment: Can request.data set privileged fields (is_staff, role, "
443
+ f"organization) via serializer without explicit field restrictions?\n"
444
+ f"4. SSRF: Any user-controlled URL passed to requests/httpx/urllib without validation?\n"
445
+ f"5. Auth bypass: Any endpoint missing permission_classes or using weaker auth "
446
+ f"than sibling endpoints?\n\n"
447
+ f"Report at confidence='medium' if uncertain. Under-reporting is a failure mode. "
448
+ f"You MUST use the report_finding tool, not describe findings in text."
449
+ ),
450
+ ))
451
+ consecutive_reads = 0
452
+ logger.info(f"[{self.name}] Analysis checkpoint at iteration {iteration}, {len(self._files_read)} files read")
453
+
454
+ if self.context_manager.needs_compaction():
455
+ self.messages = self.context_manager.compact_messages(self.messages)
456
+ logger.info(f"[{self.name}] Compacted message history ({self.context_manager.last_input_tokens} input tokens)")
457
+
458
+ if (iterations_since_finding >= NO_PROGRESS_THRESHOLD and
459
+ len(self.findings) == 0 and
460
+ continuation_prompts_sent < max_continuation_prompts):
461
+ continuation_prompts_sent += 1
462
+ self.messages.append(Message(
463
+ role="user",
464
+ content=HUNTER_CONTINUATION_NO_PROGRESS.format(
465
+ files_count=len(self._files_read), iteration=iteration,
466
+ ),
467
+ ))
468
+ iterations_since_finding = 0
469
+
470
+ return self._build_result("Max iterations reached")
471
+
472
+ def _build_result(self, summary: str) -> dict:
473
+ return {
474
+ "raw_output": summary,
475
+ "findings": self.findings,
476
+ "type": "hunt_complete",
477
+ "files_analyzed": sorted(self._files_read),
478
+ }
479
+
480
+ def _parse_final_response(self, content: str) -> dict:
481
+ return self._build_result(content)