openhack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhack/__init__.py +2 -0
- openhack/__main__.py +225 -0
- openhack/agents/__init__.py +30 -0
- openhack/agents/base.py +230 -0
- openhack/agents/browser_verifier.py +679 -0
- openhack/agents/browser_verifier_swarm.py +256 -0
- openhack/agents/checkpoint.py +89 -0
- openhack/agents/context_manager.py +356 -0
- openhack/agents/coordinator.py +1105 -0
- openhack/agents/endpoint_analyst.py +307 -0
- openhack/agents/feature_hunter.py +93 -0
- openhack/agents/hunter.py +481 -0
- openhack/agents/hunter_swarm.py +385 -0
- openhack/agents/llm.py +334 -0
- openhack/agents/recon.py +19 -0
- openhack/agents/sandbox_verifier.py +396 -0
- openhack/agents/sandbox_verifier_swarm.py +250 -0
- openhack/agents/session.py +286 -0
- openhack/agents/validator.py +217 -0
- openhack/agents/validator_swarm.py +106 -0
- openhack/auth.py +175 -0
- openhack/browser/__init__.py +12 -0
- openhack/browser/runner.py +385 -0
- openhack/categories.py +130 -0
- openhack/config.py +201 -0
- openhack/deterministic_recon.py +464 -0
- openhack/entry_points.py +745 -0
- openhack/framework_classifier.py +515 -0
- openhack/framework_detection.py +269 -0
- openhack/headless_scan.py +179 -0
- openhack/prompts/__init__.py +108 -0
- openhack/prompts/browser_verifier.py +171 -0
- openhack/prompts/coordinator.py +31 -0
- openhack/prompts/django/__init__.py +32 -0
- openhack/prompts/django/auth_bypass.py +76 -0
- openhack/prompts/django/csrf.py +62 -0
- openhack/prompts/django/data_exposure.py +67 -0
- openhack/prompts/django/idor.py +74 -0
- openhack/prompts/django/injection.py +67 -0
- openhack/prompts/django/misconfiguration.py +70 -0
- openhack/prompts/django/ssrf.py +64 -0
- openhack/prompts/endpoint_analyst.py +122 -0
- openhack/prompts/express/__init__.py +29 -0
- openhack/prompts/express/auth_bypass.py +71 -0
- openhack/prompts/express/data_exposure.py +77 -0
- openhack/prompts/express/idor.py +69 -0
- openhack/prompts/express/injection.py +75 -0
- openhack/prompts/express/misconfiguration.py +72 -0
- openhack/prompts/express/ssrf.py +63 -0
- openhack/prompts/feature_hunter.py +140 -0
- openhack/prompts/flask/__init__.py +29 -0
- openhack/prompts/flask/auth_bypass.py +86 -0
- openhack/prompts/flask/data_exposure.py +78 -0
- openhack/prompts/flask/idor.py +83 -0
- openhack/prompts/flask/injection.py +77 -0
- openhack/prompts/flask/misconfiguration.py +73 -0
- openhack/prompts/flask/ssrf.py +65 -0
- openhack/prompts/hunter.py +362 -0
- openhack/prompts/hunter_continuation_loop.py +12 -0
- openhack/prompts/hunter_continuation_no_findings.py +19 -0
- openhack/prompts/hunter_continuation_no_progress.py +22 -0
- openhack/prompts/hunter_tool_instructions.py +55 -0
- openhack/prompts/nextjs/__init__.py +42 -0
- openhack/prompts/nextjs/auth_bypass.py +80 -0
- openhack/prompts/nextjs/csrf.py +71 -0
- openhack/prompts/nextjs/data_exposure.py +88 -0
- openhack/prompts/nextjs/idor.py +64 -0
- openhack/prompts/nextjs/injection.py +65 -0
- openhack/prompts/nextjs/middleware_bypass.py +75 -0
- openhack/prompts/nextjs/misconfiguration.py +92 -0
- openhack/prompts/nextjs/server_actions.py +97 -0
- openhack/prompts/nextjs/ssrf.py +66 -0
- openhack/prompts/nextjs/xss.py +69 -0
- openhack/prompts/pr_analysis_system.py +80 -0
- openhack/prompts/pr_analysis_user.py +11 -0
- openhack/prompts/project_context.py +89 -0
- openhack/prompts/recon.py +199 -0
- openhack/prompts/reporter.py +88 -0
- openhack/prompts/researchers.py +434 -0
- openhack/prompts/sandbox_verifier.py +128 -0
- openhack/prompts/supabase/__init__.py +39 -0
- openhack/prompts/supabase/auth_tokens.py +131 -0
- openhack/prompts/supabase/edge_functions.py +150 -0
- openhack/prompts/supabase/graphql.py +102 -0
- openhack/prompts/supabase/postgrest.py +99 -0
- openhack/prompts/supabase/realtime.py +93 -0
- openhack/prompts/supabase/rls.py +110 -0
- openhack/prompts/supabase/rpc_functions.py +127 -0
- openhack/prompts/supabase/storage.py +110 -0
- openhack/prompts/supabase/tenant_isolation.py +118 -0
- openhack/prompts/validator.py +319 -0
- openhack/prompts/validator_continuation_incomplete.py +12 -0
- openhack/prompts/validator_tool_instructions.py +29 -0
- openhack/quality.py +231 -0
- openhack/sandbox/__init__.py +12 -0
- openhack/sandbox/orchestrator.py +517 -0
- openhack/sandbox/runner.py +177 -0
- openhack/scan_session.py +245 -0
- openhack/setup.py +452 -0
- openhack/static_validator.py +612 -0
- openhack/tools/__init__.py +1 -0
- openhack/tools/ast_tools.py +307 -0
- openhack/tools/coverage.py +1078 -0
- openhack/tools/filesystem.py +404 -0
- openhack/tools/nextjs.py +258 -0
- openhack/tools/registry.py +52 -0
- openhack/tui.py +3450 -0
- openhack/updates.py +170 -0
- openhack-0.1.0.dist-info/METADATA +189 -0
- openhack-0.1.0.dist-info/RECORD +113 -0
- openhack-0.1.0.dist-info/WHEEL +4 -0
- openhack-0.1.0.dist-info/entry_points.txt +2 -0
- openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File system tools for vulnerability scanning.
|
|
3
|
+
Provides safe, jailed access to the target directory.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import fnmatch
|
|
7
|
+
import inspect
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import subprocess
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
_GREP_EXCLUDE_DIRS = [
|
|
15
|
+
".git", "node_modules", "__pycache__", ".venv", "venv",
|
|
16
|
+
"dist", "build", ".next", ".nuxt", ".output",
|
|
17
|
+
"vendor", "target", "coverage", ".mypy_cache",
|
|
18
|
+
".pytest_cache", ".tox", "eggs", "*.egg-info",
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
_GREP_SOURCE_INCLUDES = [
|
|
22
|
+
"*.py", "*.js", "*.ts", "*.tsx", "*.jsx",
|
|
23
|
+
"*.rb", "*.go", "*.rs", "*.java", "*.php",
|
|
24
|
+
"*.c", "*.cpp", "*.h",
|
|
25
|
+
"*.vue", "*.svelte",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class FileSystemTools:
|
|
30
|
+
"""File system tools with path safety enforcement."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, jail_dir: Path):
|
|
33
|
+
self.jail_dir = jail_dir.resolve()
|
|
34
|
+
|
|
35
|
+
def _resolve_safe_path(self, path: str) -> Path:
|
|
36
|
+
"""Resolve a path safely within the jail directory."""
|
|
37
|
+
requested = (self.jail_dir / path).resolve()
|
|
38
|
+
if not str(requested).startswith(str(self.jail_dir)):
|
|
39
|
+
raise PermissionError(f"Access denied: {path} is outside the allowed directory")
|
|
40
|
+
return requested
|
|
41
|
+
|
|
42
|
+
BINARY_EXTENSIONS = frozenset({
|
|
43
|
+
".zip", ".gz", ".tar", ".bz2", ".xz", ".7z", ".rar",
|
|
44
|
+
".jar", ".war", ".ear", ".class",
|
|
45
|
+
".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".svg",
|
|
46
|
+
".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
|
|
47
|
+
".woff", ".woff2", ".ttf", ".eot", ".otf",
|
|
48
|
+
".exe", ".dll", ".so", ".dylib", ".o", ".a",
|
|
49
|
+
".pyc", ".pyo", ".wasm",
|
|
50
|
+
".sqlite", ".db", ".sqlite3",
|
|
51
|
+
".mp3", ".mp4", ".avi", ".mov", ".wav", ".flac",
|
|
52
|
+
".bin", ".dat", ".iso", ".img",
|
|
53
|
+
".sql", ".csv", ".tsv", ".log", ".dump",
|
|
54
|
+
})
|
|
55
|
+
MAX_FILE_SIZE = 200_000 # 200KB — files larger than this are truncated
|
|
56
|
+
MAX_LINES_DEFAULT = 1000
|
|
57
|
+
|
|
58
|
+
def read_file(self, path: str, offset: int = 0, limit: Optional[int] = None) -> dict:
|
|
59
|
+
"""Read the contents of a file with line numbers."""
|
|
60
|
+
try:
|
|
61
|
+
resolved = self._resolve_safe_path(path)
|
|
62
|
+
if not resolved.exists():
|
|
63
|
+
return {"error": f"File not found: {path}"}
|
|
64
|
+
if not resolved.is_file():
|
|
65
|
+
return {"error": f"Not a file: {path}"}
|
|
66
|
+
|
|
67
|
+
if resolved.suffix.lower() in self.BINARY_EXTENSIONS:
|
|
68
|
+
size = resolved.stat().st_size
|
|
69
|
+
return {
|
|
70
|
+
"path": str(resolved.relative_to(self.jail_dir)),
|
|
71
|
+
"content": f"[Binary file: {resolved.suffix}, {size:,} bytes — cannot read]",
|
|
72
|
+
"total_lines": 0,
|
|
73
|
+
"binary": True,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
file_size = resolved.stat().st_size
|
|
77
|
+
if file_size > self.MAX_FILE_SIZE:
|
|
78
|
+
effective_limit = limit or self.MAX_LINES_DEFAULT
|
|
79
|
+
else:
|
|
80
|
+
effective_limit = limit
|
|
81
|
+
|
|
82
|
+
if effective_limit and file_size > self.MAX_FILE_SIZE:
|
|
83
|
+
with open(resolved, "r", encoding="utf-8", errors="replace") as f:
|
|
84
|
+
lines = []
|
|
85
|
+
for _ in range(offset):
|
|
86
|
+
if not f.readline():
|
|
87
|
+
break
|
|
88
|
+
for _ in range(effective_limit):
|
|
89
|
+
line = f.readline()
|
|
90
|
+
if not line:
|
|
91
|
+
break
|
|
92
|
+
lines.append(line)
|
|
93
|
+
remaining = sum(1 for _ in f)
|
|
94
|
+
total_lines = offset + len(lines) + remaining
|
|
95
|
+
was_truncated = remaining > 0
|
|
96
|
+
else:
|
|
97
|
+
with open(resolved, "r", encoding="utf-8", errors="replace") as f:
|
|
98
|
+
lines = f.readlines()
|
|
99
|
+
total_lines = len(lines)
|
|
100
|
+
if effective_limit:
|
|
101
|
+
lines = lines[offset : offset + effective_limit]
|
|
102
|
+
was_truncated = (offset + effective_limit) < total_lines
|
|
103
|
+
else:
|
|
104
|
+
lines = lines[offset:]
|
|
105
|
+
was_truncated = False
|
|
106
|
+
|
|
107
|
+
numbered_lines = []
|
|
108
|
+
for i, line in enumerate(lines, start=offset + 1):
|
|
109
|
+
numbered_lines.append(f"{i:6}\t{line.rstrip()}")
|
|
110
|
+
|
|
111
|
+
if was_truncated and file_size > self.MAX_FILE_SIZE:
|
|
112
|
+
numbered_lines.append(f"\n[... file truncated: {total_lines:,} total lines, {file_size:,} bytes — use offset/limit to read more ...]")
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
"path": str(resolved.relative_to(self.jail_dir)),
|
|
116
|
+
"content": "\n".join(numbered_lines),
|
|
117
|
+
"total_lines": total_lines,
|
|
118
|
+
"offset": offset,
|
|
119
|
+
"lines_returned": len(numbered_lines),
|
|
120
|
+
}
|
|
121
|
+
except PermissionError as e:
|
|
122
|
+
return {"error": str(e)}
|
|
123
|
+
except Exception as e:
|
|
124
|
+
return {"error": f"Error reading file: {e}"}
|
|
125
|
+
|
|
126
|
+
def list_dir(self, path: str = ".", ignore: Optional[list[str]] = None) -> dict:
|
|
127
|
+
"""List contents of a directory."""
|
|
128
|
+
try:
|
|
129
|
+
resolved = self._resolve_safe_path(path)
|
|
130
|
+
if not resolved.exists():
|
|
131
|
+
return {"error": f"Directory not found: {path}"}
|
|
132
|
+
if not resolved.is_dir():
|
|
133
|
+
return {"error": f"Not a directory: {path}"}
|
|
134
|
+
|
|
135
|
+
ignore = ignore or []
|
|
136
|
+
entries = []
|
|
137
|
+
for entry in sorted(resolved.iterdir()):
|
|
138
|
+
rel_path = str(entry.relative_to(self.jail_dir))
|
|
139
|
+
if any(fnmatch.fnmatch(rel_path, pat) or fnmatch.fnmatch(entry.name, pat) for pat in ignore):
|
|
140
|
+
continue
|
|
141
|
+
entry_type = "dir" if entry.is_dir() else "file"
|
|
142
|
+
size = entry.stat().st_size if entry.is_file() else None
|
|
143
|
+
entries.append({"name": entry.name, "type": entry_type, "size": size})
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
"path": str(resolved.relative_to(self.jail_dir)),
|
|
147
|
+
"entries": entries,
|
|
148
|
+
}
|
|
149
|
+
except PermissionError as e:
|
|
150
|
+
return {"error": str(e)}
|
|
151
|
+
except Exception as e:
|
|
152
|
+
return {"error": f"Error listing directory: {e}"}
|
|
153
|
+
|
|
154
|
+
def _expand_braces(self, pattern: str) -> list[str]:
|
|
155
|
+
"""Expand brace patterns like {js,jsx,ts,tsx} into multiple patterns."""
|
|
156
|
+
import re
|
|
157
|
+
brace_pattern = re.compile(r'\{([^}]+)\}')
|
|
158
|
+
match = brace_pattern.search(pattern)
|
|
159
|
+
if not match:
|
|
160
|
+
return [pattern]
|
|
161
|
+
|
|
162
|
+
prefix = pattern[:match.start()]
|
|
163
|
+
suffix = pattern[match.end():]
|
|
164
|
+
alternatives = match.group(1).split(',')
|
|
165
|
+
|
|
166
|
+
expanded = []
|
|
167
|
+
for alt in alternatives:
|
|
168
|
+
expanded.extend(self._expand_braces(prefix + alt.strip() + suffix))
|
|
169
|
+
return expanded
|
|
170
|
+
|
|
171
|
+
def glob(self, pattern: str, path: str = ".") -> dict:
|
|
172
|
+
"""Find files matching a glob pattern recursively.
|
|
173
|
+
|
|
174
|
+
Supports brace expansion like {js,jsx,ts,tsx}.
|
|
175
|
+
Skips known non-source directories for performance.
|
|
176
|
+
"""
|
|
177
|
+
try:
|
|
178
|
+
resolved = self._resolve_safe_path(path)
|
|
179
|
+
if not resolved.exists():
|
|
180
|
+
return {"error": f"Directory not found: {path}"}
|
|
181
|
+
|
|
182
|
+
matches = set()
|
|
183
|
+
skip_dirs = {d.rstrip("*").rstrip(".") for d in _GREP_EXCLUDE_DIRS}
|
|
184
|
+
|
|
185
|
+
expanded_patterns = self._expand_braces(pattern)
|
|
186
|
+
|
|
187
|
+
for exp_pattern in expanded_patterns:
|
|
188
|
+
search_pattern = exp_pattern
|
|
189
|
+
recursive = False
|
|
190
|
+
if search_pattern.startswith("**/"):
|
|
191
|
+
search_pattern = search_pattern[3:]
|
|
192
|
+
recursive = True
|
|
193
|
+
|
|
194
|
+
match_path = "/" in search_pattern
|
|
195
|
+
|
|
196
|
+
for root, dirs, files in os.walk(resolved):
|
|
197
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs]
|
|
198
|
+
for f in files:
|
|
199
|
+
matched = False
|
|
200
|
+
if match_path:
|
|
201
|
+
full = Path(root) / f
|
|
202
|
+
rel_from_base = str(full.relative_to(resolved))
|
|
203
|
+
# Check exact match or any path suffix
|
|
204
|
+
if fnmatch.fnmatch(rel_from_base, search_pattern):
|
|
205
|
+
matched = True
|
|
206
|
+
elif recursive:
|
|
207
|
+
parts = rel_from_base.split("/")
|
|
208
|
+
for i in range(len(parts)):
|
|
209
|
+
suffix = "/".join(parts[i:])
|
|
210
|
+
if fnmatch.fnmatch(suffix, search_pattern):
|
|
211
|
+
matched = True
|
|
212
|
+
break
|
|
213
|
+
else:
|
|
214
|
+
matched = fnmatch.fnmatch(f, search_pattern)
|
|
215
|
+
|
|
216
|
+
if matched:
|
|
217
|
+
full = Path(root) / f if not match_path else full
|
|
218
|
+
rel_path = str(full.relative_to(self.jail_dir))
|
|
219
|
+
matches.add(rel_path)
|
|
220
|
+
if len(matches) >= 500:
|
|
221
|
+
return {"pattern": pattern, "matches": sorted(matches)}
|
|
222
|
+
|
|
223
|
+
return {"pattern": pattern, "matches": sorted(matches)[:500]}
|
|
224
|
+
except PermissionError as e:
|
|
225
|
+
return {"error": str(e)}
|
|
226
|
+
except Exception as e:
|
|
227
|
+
return {"error": f"Error during glob: {e}"}
|
|
228
|
+
|
|
229
|
+
def grep(self, pattern: str, path: str = ".", include: Optional[str] = None) -> dict:
|
|
230
|
+
"""Search for a regex pattern in files using system grep for speed."""
|
|
231
|
+
try:
|
|
232
|
+
resolved = self._resolve_safe_path(path)
|
|
233
|
+
if not resolved.exists():
|
|
234
|
+
return {"error": f"Path not found: {path}"}
|
|
235
|
+
|
|
236
|
+
if resolved.is_file():
|
|
237
|
+
return self._grep_single_file(resolved, pattern)
|
|
238
|
+
|
|
239
|
+
cmd = ["grep", "-rEl", "--max-count=3",
|
|
240
|
+
"--binary-files=without-match",
|
|
241
|
+
pattern, str(resolved)]
|
|
242
|
+
for d in _GREP_EXCLUDE_DIRS:
|
|
243
|
+
cmd.insert(1, f"--exclude-dir={d}")
|
|
244
|
+
if include:
|
|
245
|
+
cmd.insert(1, f"--include={include}")
|
|
246
|
+
else:
|
|
247
|
+
for ext in _GREP_SOURCE_INCLUDES:
|
|
248
|
+
cmd.insert(1, f"--include={ext}")
|
|
249
|
+
|
|
250
|
+
result = subprocess.run(
|
|
251
|
+
cmd, capture_output=True, text=True, timeout=30
|
|
252
|
+
)
|
|
253
|
+
file_paths = [
|
|
254
|
+
p.strip() for p in result.stdout.strip().split("\n") if p.strip()
|
|
255
|
+
][:100]
|
|
256
|
+
|
|
257
|
+
matches = []
|
|
258
|
+
for fp in file_paths:
|
|
259
|
+
try:
|
|
260
|
+
rel = str(Path(fp).relative_to(self.jail_dir))
|
|
261
|
+
except ValueError:
|
|
262
|
+
rel = fp
|
|
263
|
+
if "node_modules" in rel or "test" in rel.lower():
|
|
264
|
+
continue
|
|
265
|
+
matches.append({"file": rel, "line": 0, "content": ""})
|
|
266
|
+
if len(matches) >= 100:
|
|
267
|
+
break
|
|
268
|
+
|
|
269
|
+
return {"pattern": pattern, "matches": matches}
|
|
270
|
+
except subprocess.TimeoutExpired:
|
|
271
|
+
return {"pattern": pattern, "matches": []}
|
|
272
|
+
except PermissionError as e:
|
|
273
|
+
return {"error": str(e)}
|
|
274
|
+
except Exception as e:
|
|
275
|
+
return {"error": f"Error during grep: {e}"}
|
|
276
|
+
|
|
277
|
+
def _grep_single_file(self, file_path: Path, pattern: str) -> dict:
|
|
278
|
+
"""Grep a single file using Python (for when we need line-level results)."""
|
|
279
|
+
regex = re.compile(pattern, re.IGNORECASE)
|
|
280
|
+
matches = []
|
|
281
|
+
try:
|
|
282
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as f:
|
|
283
|
+
for line_num, line in enumerate(f, 1):
|
|
284
|
+
if regex.search(line):
|
|
285
|
+
matches.append({
|
|
286
|
+
"file": str(file_path.relative_to(self.jail_dir)),
|
|
287
|
+
"line": line_num,
|
|
288
|
+
"content": line.strip()[:200],
|
|
289
|
+
})
|
|
290
|
+
if len(matches) >= 100:
|
|
291
|
+
break
|
|
292
|
+
except Exception:
|
|
293
|
+
pass
|
|
294
|
+
return {"pattern": pattern, "matches": matches}
|
|
295
|
+
|
|
296
|
+
def get_tool_definitions(self) -> list[dict]:
|
|
297
|
+
"""Return OpenAI-compatible tool definitions."""
|
|
298
|
+
return [
|
|
299
|
+
{
|
|
300
|
+
"name": "read_file",
|
|
301
|
+
"description": "Read the contents of a file. Returns line-numbered content.",
|
|
302
|
+
"parameters": {
|
|
303
|
+
"type": "object",
|
|
304
|
+
"properties": {
|
|
305
|
+
"path": {
|
|
306
|
+
"type": "string",
|
|
307
|
+
"description": "Path to the file (relative to target directory)",
|
|
308
|
+
},
|
|
309
|
+
"offset": {
|
|
310
|
+
"type": "integer",
|
|
311
|
+
"description": "Line number to start reading from (0-indexed)",
|
|
312
|
+
"default": 0,
|
|
313
|
+
},
|
|
314
|
+
"limit": {
|
|
315
|
+
"type": "integer",
|
|
316
|
+
"description": "Maximum number of lines to read",
|
|
317
|
+
},
|
|
318
|
+
},
|
|
319
|
+
"required": ["path"],
|
|
320
|
+
},
|
|
321
|
+
},
|
|
322
|
+
{
|
|
323
|
+
"name": "list_dir",
|
|
324
|
+
"description": "List contents of a directory.",
|
|
325
|
+
"parameters": {
|
|
326
|
+
"type": "object",
|
|
327
|
+
"properties": {
|
|
328
|
+
"path": {
|
|
329
|
+
"type": "string",
|
|
330
|
+
"description": "Path to directory (relative to target)",
|
|
331
|
+
"default": ".",
|
|
332
|
+
},
|
|
333
|
+
"ignore": {
|
|
334
|
+
"type": "array",
|
|
335
|
+
"items": {"type": "string"},
|
|
336
|
+
"description": "Glob patterns to ignore",
|
|
337
|
+
},
|
|
338
|
+
},
|
|
339
|
+
},
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
"name": "glob",
|
|
343
|
+
"description": "Find files matching a glob pattern recursively.",
|
|
344
|
+
"parameters": {
|
|
345
|
+
"type": "object",
|
|
346
|
+
"properties": {
|
|
347
|
+
"pattern": {
|
|
348
|
+
"type": "string",
|
|
349
|
+
"description": "Glob pattern (e.g., '*.ts', '**/*.tsx')",
|
|
350
|
+
},
|
|
351
|
+
"path": {
|
|
352
|
+
"type": "string",
|
|
353
|
+
"description": "Starting directory",
|
|
354
|
+
"default": ".",
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
"required": ["pattern"],
|
|
358
|
+
},
|
|
359
|
+
},
|
|
360
|
+
{
|
|
361
|
+
"name": "grep",
|
|
362
|
+
"description": "Search for a regex pattern in files.",
|
|
363
|
+
"parameters": {
|
|
364
|
+
"type": "object",
|
|
365
|
+
"properties": {
|
|
366
|
+
"pattern": {
|
|
367
|
+
"type": "string",
|
|
368
|
+
"description": "Regex pattern to search for",
|
|
369
|
+
},
|
|
370
|
+
"path": {
|
|
371
|
+
"type": "string",
|
|
372
|
+
"description": "Starting path",
|
|
373
|
+
"default": ".",
|
|
374
|
+
},
|
|
375
|
+
"include": {
|
|
376
|
+
"type": "string",
|
|
377
|
+
"description": "Glob pattern for files to include (e.g., '*.ts')",
|
|
378
|
+
},
|
|
379
|
+
},
|
|
380
|
+
"required": ["pattern"],
|
|
381
|
+
},
|
|
382
|
+
},
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
def execute_tool(self, name: str, arguments: dict) -> dict:
|
|
386
|
+
"""Execute a tool by name with the given arguments.
|
|
387
|
+
|
|
388
|
+
Filters out unexpected keyword arguments that the LLM may hallucinate
|
|
389
|
+
(e.g., passing 'include' to glob when it only belongs on grep).
|
|
390
|
+
"""
|
|
391
|
+
tools = {
|
|
392
|
+
"read_file": self.read_file,
|
|
393
|
+
"list_dir": self.list_dir,
|
|
394
|
+
"glob": self.glob,
|
|
395
|
+
"grep": self.grep,
|
|
396
|
+
}
|
|
397
|
+
if name not in tools:
|
|
398
|
+
return {"error": f"Unknown tool: {name}"}
|
|
399
|
+
|
|
400
|
+
func = tools[name]
|
|
401
|
+
sig = inspect.signature(func)
|
|
402
|
+
valid_params = set(sig.parameters.keys())
|
|
403
|
+
filtered_args = {k: v for k, v in arguments.items() if k in valid_params}
|
|
404
|
+
return func(**filtered_args)
|
openhack/tools/nextjs.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Next.js specific analysis tools for vulnerability scanning.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from .filesystem import FileSystemTools
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class NextJSTools:
|
|
14
|
+
"""Tools for analyzing Next.js application structure and patterns."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, fs_tools: FileSystemTools):
|
|
17
|
+
self.fs = fs_tools
|
|
18
|
+
self._route_cache: Optional[dict] = None
|
|
19
|
+
self._project_info_cache: Optional[dict] = None
|
|
20
|
+
|
|
21
|
+
def get_project_info(self) -> dict:
|
|
22
|
+
"""Get Next.js project information including router type, TypeScript usage, and version."""
|
|
23
|
+
if self._project_info_cache:
|
|
24
|
+
return self._project_info_cache
|
|
25
|
+
|
|
26
|
+
info = {
|
|
27
|
+
"framework": "nextjs",
|
|
28
|
+
"router_type": None,
|
|
29
|
+
"has_src_dir": False,
|
|
30
|
+
"typescript": False,
|
|
31
|
+
"nextjs_version": None,
|
|
32
|
+
"has_middleware": False,
|
|
33
|
+
"has_app_dir": False,
|
|
34
|
+
"has_pages_dir": False,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
pkg_result = self.fs.read_file("package.json")
|
|
38
|
+
if "content" in pkg_result:
|
|
39
|
+
try:
|
|
40
|
+
lines = pkg_result["content"].split("\n")
|
|
41
|
+
content = "\n".join(line.split("\t", 1)[1] if "\t" in line else line for line in lines)
|
|
42
|
+
pkg = json.loads(content)
|
|
43
|
+
deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
|
|
44
|
+
if "next" in deps:
|
|
45
|
+
info["nextjs_version"] = deps["next"]
|
|
46
|
+
info["typescript"] = "typescript" in deps
|
|
47
|
+
except (json.JSONDecodeError, IndexError):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
src_check = self.fs.list_dir("src")
|
|
51
|
+
info["has_src_dir"] = "error" not in src_check
|
|
52
|
+
|
|
53
|
+
base = "src" if info["has_src_dir"] else "."
|
|
54
|
+
|
|
55
|
+
app_check = self.fs.list_dir(f"{base}/app")
|
|
56
|
+
info["has_app_dir"] = "error" not in app_check
|
|
57
|
+
|
|
58
|
+
pages_check = self.fs.list_dir(f"{base}/pages")
|
|
59
|
+
info["has_pages_dir"] = "error" not in pages_check
|
|
60
|
+
|
|
61
|
+
if info["has_app_dir"]:
|
|
62
|
+
info["router_type"] = "app"
|
|
63
|
+
elif info["has_pages_dir"]:
|
|
64
|
+
info["router_type"] = "pages"
|
|
65
|
+
|
|
66
|
+
mw_ts = self.fs.read_file("middleware.ts")
|
|
67
|
+
mw_js = self.fs.read_file("middleware.js")
|
|
68
|
+
src_mw_ts = self.fs.read_file("src/middleware.ts")
|
|
69
|
+
src_mw_js = self.fs.read_file("src/middleware.js")
|
|
70
|
+
info["has_middleware"] = any(
|
|
71
|
+
"error" not in r for r in [mw_ts, mw_js, src_mw_ts, src_mw_js]
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
self._project_info_cache = info
|
|
75
|
+
return info
|
|
76
|
+
|
|
77
|
+
def get_route_map(self) -> dict:
|
|
78
|
+
"""Extract all routes from the Next.js application (pages, API routes, route handlers)."""
|
|
79
|
+
if self._route_cache:
|
|
80
|
+
return self._route_cache
|
|
81
|
+
|
|
82
|
+
info = self.get_project_info()
|
|
83
|
+
routes = {"app_routes": [], "page_routes": [], "api_routes": []}
|
|
84
|
+
|
|
85
|
+
base = "src" if info["has_src_dir"] else "."
|
|
86
|
+
|
|
87
|
+
if info["has_app_dir"]:
|
|
88
|
+
app_files = self.fs.glob("**/page.{js,jsx,ts,tsx}", f"{base}/app")
|
|
89
|
+
for f in app_files.get("matches", []):
|
|
90
|
+
route = self._file_to_route(f, f"{base}/app", "app")
|
|
91
|
+
routes["app_routes"].append({"file": f, "route": route, "type": "page"})
|
|
92
|
+
|
|
93
|
+
route_files = self.fs.glob("**/route.{js,ts}", f"{base}/app")
|
|
94
|
+
for f in route_files.get("matches", []):
|
|
95
|
+
route = self._file_to_route(f, f"{base}/app", "app")
|
|
96
|
+
routes["api_routes"].append({"file": f, "route": route, "type": "route_handler"})
|
|
97
|
+
|
|
98
|
+
if info["has_pages_dir"]:
|
|
99
|
+
page_files = self.fs.glob("**/*.{js,jsx,ts,tsx}", f"{base}/pages")
|
|
100
|
+
for f in page_files.get("matches", []):
|
|
101
|
+
if "/api/" in f or f.startswith("api/"):
|
|
102
|
+
route = self._file_to_route(f, f"{base}/pages", "pages")
|
|
103
|
+
routes["api_routes"].append({"file": f, "route": route, "type": "api_route"})
|
|
104
|
+
elif not f.startswith("_") and "/_" not in f:
|
|
105
|
+
route = self._file_to_route(f, f"{base}/pages", "pages")
|
|
106
|
+
routes["page_routes"].append({"file": f, "route": route, "type": "page"})
|
|
107
|
+
|
|
108
|
+
self._route_cache = routes
|
|
109
|
+
return routes
|
|
110
|
+
|
|
111
|
+
def _file_to_route(self, file_path: str, base_dir: str, router_type: str) -> str:
|
|
112
|
+
"""Convert a file path to a route path."""
|
|
113
|
+
route = file_path
|
|
114
|
+
if route.startswith(base_dir):
|
|
115
|
+
route = route[len(base_dir):]
|
|
116
|
+
if route.startswith("/"):
|
|
117
|
+
route = route[1:]
|
|
118
|
+
|
|
119
|
+
route = re.sub(r"\.(js|jsx|ts|tsx)$", "", route)
|
|
120
|
+
route = re.sub(r"/(page|route|index)$", "", route)
|
|
121
|
+
|
|
122
|
+
route = re.sub(r"\[\.\.\.(\w+)\]", r"*", route)
|
|
123
|
+
route = re.sub(r"\[(\w+)\]", r":\1", route)
|
|
124
|
+
|
|
125
|
+
if not route.startswith("/"):
|
|
126
|
+
route = "/" + route
|
|
127
|
+
|
|
128
|
+
if route == "/":
|
|
129
|
+
return "/"
|
|
130
|
+
return route.rstrip("/")
|
|
131
|
+
|
|
132
|
+
def get_server_actions(self) -> dict:
|
|
133
|
+
"""Find all server actions ('use server') in the codebase."""
|
|
134
|
+
actions = []
|
|
135
|
+
|
|
136
|
+
ts_files = self.fs.glob("**/*.{ts,tsx}", ".")
|
|
137
|
+
js_files = self.fs.glob("**/*.{js,jsx}", ".")
|
|
138
|
+
|
|
139
|
+
all_files = ts_files.get("matches", []) + js_files.get("matches", [])
|
|
140
|
+
|
|
141
|
+
for file_path in all_files:
|
|
142
|
+
if "node_modules" in file_path:
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
content_result = self.fs.read_file(file_path)
|
|
146
|
+
if "error" in content_result:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
content = content_result["content"]
|
|
150
|
+
|
|
151
|
+
if '"use server"' in content or "'use server'" in content:
|
|
152
|
+
lines = content.split("\n")
|
|
153
|
+
for i, line in enumerate(lines):
|
|
154
|
+
if "async function" in line or "export async function" in line:
|
|
155
|
+
match = re.search(r"(?:export\s+)?async\s+function\s+(\w+)", line)
|
|
156
|
+
if match:
|
|
157
|
+
actions.append({
|
|
158
|
+
"file": file_path,
|
|
159
|
+
"function": match.group(1),
|
|
160
|
+
"line": i + 1,
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
return {"server_actions": actions}
|
|
164
|
+
|
|
165
|
+
def get_middleware_config(self) -> dict:
|
|
166
|
+
"""Get the middleware configuration and matcher patterns."""
|
|
167
|
+
locations = ["middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js"]
|
|
168
|
+
|
|
169
|
+
for loc in locations:
|
|
170
|
+
result = self.fs.read_file(loc)
|
|
171
|
+
if "error" not in result:
|
|
172
|
+
content = result["content"]
|
|
173
|
+
config = {"file": loc, "content": content, "matcher": None}
|
|
174
|
+
|
|
175
|
+
matcher_match = re.search(r"matcher\s*[=:]\s*(\[[\s\S]*?\]|['\"][^'\"]+['\"])", content)
|
|
176
|
+
if matcher_match:
|
|
177
|
+
config["matcher"] = matcher_match.group(1)
|
|
178
|
+
|
|
179
|
+
return config
|
|
180
|
+
|
|
181
|
+
return {"error": "No middleware found"}
|
|
182
|
+
|
|
183
|
+
def check_dependencies(self) -> dict:
|
|
184
|
+
"""Check package.json for security-relevant dependencies."""
|
|
185
|
+
result = self.fs.read_file("package.json")
|
|
186
|
+
if "error" in result:
|
|
187
|
+
return {"error": "Could not read package.json"}
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
lines = result["content"].split("\n")
|
|
191
|
+
content = "\n".join(line.split("\t", 1)[1] if "\t" in line else line for line in lines)
|
|
192
|
+
pkg = json.loads(content)
|
|
193
|
+
except (json.JSONDecodeError, IndexError):
|
|
194
|
+
return {"error": "Could not parse package.json"}
|
|
195
|
+
|
|
196
|
+
deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
|
|
197
|
+
|
|
198
|
+
security_relevant = {
|
|
199
|
+
"auth": ["next-auth", "@auth/core", "lucia", "clerk", "@clerk/nextjs", "supabase", "@supabase/supabase-js"],
|
|
200
|
+
"database": ["prisma", "@prisma/client", "drizzle-orm", "mongoose", "pg", "mysql2", "better-sqlite3"],
|
|
201
|
+
"validation": ["zod", "yup", "joi", "superstruct", "valibot"],
|
|
202
|
+
"sanitization": ["dompurify", "xss", "sanitize-html", "isomorphic-dompurify"],
|
|
203
|
+
"csrf": ["csrf", "csurf"],
|
|
204
|
+
"rate_limiting": ["rate-limiter-flexible", "express-rate-limit", "upstash"],
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
found = {}
|
|
208
|
+
for category, packages in security_relevant.items():
|
|
209
|
+
found[category] = [p for p in packages if p in deps]
|
|
210
|
+
|
|
211
|
+
return {
|
|
212
|
+
"all_dependencies": deps,
|
|
213
|
+
"security_relevant": found,
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
def get_tool_definitions(self) -> list[dict]:
|
|
217
|
+
"""Return OpenAI-compatible tool definitions."""
|
|
218
|
+
return [
|
|
219
|
+
{
|
|
220
|
+
"name": "get_project_info",
|
|
221
|
+
"description": "Get Next.js project information including router type, TypeScript usage, and version.",
|
|
222
|
+
"parameters": {"type": "object", "properties": {}},
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
"name": "get_route_map",
|
|
226
|
+
"description": "Extract all routes from the Next.js application (pages, API routes, route handlers).",
|
|
227
|
+
"parameters": {"type": "object", "properties": {}},
|
|
228
|
+
},
|
|
229
|
+
{
|
|
230
|
+
"name": "get_server_actions",
|
|
231
|
+
"description": "Find all server actions ('use server') in the codebase.",
|
|
232
|
+
"parameters": {"type": "object", "properties": {}},
|
|
233
|
+
},
|
|
234
|
+
{
|
|
235
|
+
"name": "get_middleware_config",
|
|
236
|
+
"description": "Get the middleware configuration and matcher patterns.",
|
|
237
|
+
"parameters": {"type": "object", "properties": {}},
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
"name": "check_dependencies",
|
|
241
|
+
"description": "Check package.json for security-relevant dependencies.",
|
|
242
|
+
"parameters": {"type": "object", "properties": {}},
|
|
243
|
+
},
|
|
244
|
+
]
|
|
245
|
+
|
|
246
|
+
def execute_tool(self, name: str, arguments: dict) -> dict:
|
|
247
|
+
"""Execute a tool by name with the given arguments."""
|
|
248
|
+
tools = {
|
|
249
|
+
"get_project_info": self.get_project_info,
|
|
250
|
+
"get_route_map": self.get_route_map,
|
|
251
|
+
"get_server_actions": self.get_server_actions,
|
|
252
|
+
"get_middleware_config": self.get_middleware_config,
|
|
253
|
+
"check_dependencies": self.check_dependencies,
|
|
254
|
+
}
|
|
255
|
+
if name not in tools:
|
|
256
|
+
return {"error": f"Unknown tool: {name}"}
|
|
257
|
+
# These tools take no arguments - ignore any hallucinated arguments from LLM
|
|
258
|
+
return tools[name]()
|