openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,404 @@
1
+ """
2
+ File system tools for vulnerability scanning.
3
+ Provides safe, jailed access to the target directory.
4
+ """
5
+
6
+ import fnmatch
7
+ import inspect
8
+ import os
9
+ import re
10
+ import subprocess
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ _GREP_EXCLUDE_DIRS = [
15
+ ".git", "node_modules", "__pycache__", ".venv", "venv",
16
+ "dist", "build", ".next", ".nuxt", ".output",
17
+ "vendor", "target", "coverage", ".mypy_cache",
18
+ ".pytest_cache", ".tox", "eggs", "*.egg-info",
19
+ ]
20
+
21
+ _GREP_SOURCE_INCLUDES = [
22
+ "*.py", "*.js", "*.ts", "*.tsx", "*.jsx",
23
+ "*.rb", "*.go", "*.rs", "*.java", "*.php",
24
+ "*.c", "*.cpp", "*.h",
25
+ "*.vue", "*.svelte",
26
+ ]
27
+
28
+
29
+ class FileSystemTools:
30
+ """File system tools with path safety enforcement."""
31
+
32
+ def __init__(self, jail_dir: Path):
33
+ self.jail_dir = jail_dir.resolve()
34
+
35
+ def _resolve_safe_path(self, path: str) -> Path:
36
+ """Resolve a path safely within the jail directory."""
37
+ requested = (self.jail_dir / path).resolve()
38
+ if not str(requested).startswith(str(self.jail_dir)):
39
+ raise PermissionError(f"Access denied: {path} is outside the allowed directory")
40
+ return requested
41
+
42
+ BINARY_EXTENSIONS = frozenset({
43
+ ".zip", ".gz", ".tar", ".bz2", ".xz", ".7z", ".rar",
44
+ ".jar", ".war", ".ear", ".class",
45
+ ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".ico", ".webp", ".svg",
46
+ ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx",
47
+ ".woff", ".woff2", ".ttf", ".eot", ".otf",
48
+ ".exe", ".dll", ".so", ".dylib", ".o", ".a",
49
+ ".pyc", ".pyo", ".wasm",
50
+ ".sqlite", ".db", ".sqlite3",
51
+ ".mp3", ".mp4", ".avi", ".mov", ".wav", ".flac",
52
+ ".bin", ".dat", ".iso", ".img",
53
+ ".sql", ".csv", ".tsv", ".log", ".dump",
54
+ })
55
+ MAX_FILE_SIZE = 200_000 # 200KB — files larger than this are truncated
56
+ MAX_LINES_DEFAULT = 1000
57
+
58
+ def read_file(self, path: str, offset: int = 0, limit: Optional[int] = None) -> dict:
59
+ """Read the contents of a file with line numbers."""
60
+ try:
61
+ resolved = self._resolve_safe_path(path)
62
+ if not resolved.exists():
63
+ return {"error": f"File not found: {path}"}
64
+ if not resolved.is_file():
65
+ return {"error": f"Not a file: {path}"}
66
+
67
+ if resolved.suffix.lower() in self.BINARY_EXTENSIONS:
68
+ size = resolved.stat().st_size
69
+ return {
70
+ "path": str(resolved.relative_to(self.jail_dir)),
71
+ "content": f"[Binary file: {resolved.suffix}, {size:,} bytes — cannot read]",
72
+ "total_lines": 0,
73
+ "binary": True,
74
+ }
75
+
76
+ file_size = resolved.stat().st_size
77
+ if file_size > self.MAX_FILE_SIZE:
78
+ effective_limit = limit or self.MAX_LINES_DEFAULT
79
+ else:
80
+ effective_limit = limit
81
+
82
+ if effective_limit and file_size > self.MAX_FILE_SIZE:
83
+ with open(resolved, "r", encoding="utf-8", errors="replace") as f:
84
+ lines = []
85
+ for _ in range(offset):
86
+ if not f.readline():
87
+ break
88
+ for _ in range(effective_limit):
89
+ line = f.readline()
90
+ if not line:
91
+ break
92
+ lines.append(line)
93
+ remaining = sum(1 for _ in f)
94
+ total_lines = offset + len(lines) + remaining
95
+ was_truncated = remaining > 0
96
+ else:
97
+ with open(resolved, "r", encoding="utf-8", errors="replace") as f:
98
+ lines = f.readlines()
99
+ total_lines = len(lines)
100
+ if effective_limit:
101
+ lines = lines[offset : offset + effective_limit]
102
+ was_truncated = (offset + effective_limit) < total_lines
103
+ else:
104
+ lines = lines[offset:]
105
+ was_truncated = False
106
+
107
+ numbered_lines = []
108
+ for i, line in enumerate(lines, start=offset + 1):
109
+ numbered_lines.append(f"{i:6}\t{line.rstrip()}")
110
+
111
+ if was_truncated and file_size > self.MAX_FILE_SIZE:
112
+ numbered_lines.append(f"\n[... file truncated: {total_lines:,} total lines, {file_size:,} bytes — use offset/limit to read more ...]")
113
+
114
+ return {
115
+ "path": str(resolved.relative_to(self.jail_dir)),
116
+ "content": "\n".join(numbered_lines),
117
+ "total_lines": total_lines,
118
+ "offset": offset,
119
+ "lines_returned": len(numbered_lines),
120
+ }
121
+ except PermissionError as e:
122
+ return {"error": str(e)}
123
+ except Exception as e:
124
+ return {"error": f"Error reading file: {e}"}
125
+
126
+ def list_dir(self, path: str = ".", ignore: Optional[list[str]] = None) -> dict:
127
+ """List contents of a directory."""
128
+ try:
129
+ resolved = self._resolve_safe_path(path)
130
+ if not resolved.exists():
131
+ return {"error": f"Directory not found: {path}"}
132
+ if not resolved.is_dir():
133
+ return {"error": f"Not a directory: {path}"}
134
+
135
+ ignore = ignore or []
136
+ entries = []
137
+ for entry in sorted(resolved.iterdir()):
138
+ rel_path = str(entry.relative_to(self.jail_dir))
139
+ if any(fnmatch.fnmatch(rel_path, pat) or fnmatch.fnmatch(entry.name, pat) for pat in ignore):
140
+ continue
141
+ entry_type = "dir" if entry.is_dir() else "file"
142
+ size = entry.stat().st_size if entry.is_file() else None
143
+ entries.append({"name": entry.name, "type": entry_type, "size": size})
144
+
145
+ return {
146
+ "path": str(resolved.relative_to(self.jail_dir)),
147
+ "entries": entries,
148
+ }
149
+ except PermissionError as e:
150
+ return {"error": str(e)}
151
+ except Exception as e:
152
+ return {"error": f"Error listing directory: {e}"}
153
+
154
+ def _expand_braces(self, pattern: str) -> list[str]:
155
+ """Expand brace patterns like {js,jsx,ts,tsx} into multiple patterns."""
156
+ import re
157
+ brace_pattern = re.compile(r'\{([^}]+)\}')
158
+ match = brace_pattern.search(pattern)
159
+ if not match:
160
+ return [pattern]
161
+
162
+ prefix = pattern[:match.start()]
163
+ suffix = pattern[match.end():]
164
+ alternatives = match.group(1).split(',')
165
+
166
+ expanded = []
167
+ for alt in alternatives:
168
+ expanded.extend(self._expand_braces(prefix + alt.strip() + suffix))
169
+ return expanded
170
+
171
+ def glob(self, pattern: str, path: str = ".") -> dict:
172
+ """Find files matching a glob pattern recursively.
173
+
174
+ Supports brace expansion like {js,jsx,ts,tsx}.
175
+ Skips known non-source directories for performance.
176
+ """
177
+ try:
178
+ resolved = self._resolve_safe_path(path)
179
+ if not resolved.exists():
180
+ return {"error": f"Directory not found: {path}"}
181
+
182
+ matches = set()
183
+ skip_dirs = {d.rstrip("*").rstrip(".") for d in _GREP_EXCLUDE_DIRS}
184
+
185
+ expanded_patterns = self._expand_braces(pattern)
186
+
187
+ for exp_pattern in expanded_patterns:
188
+ search_pattern = exp_pattern
189
+ recursive = False
190
+ if search_pattern.startswith("**/"):
191
+ search_pattern = search_pattern[3:]
192
+ recursive = True
193
+
194
+ match_path = "/" in search_pattern
195
+
196
+ for root, dirs, files in os.walk(resolved):
197
+ dirs[:] = [d for d in dirs if d not in skip_dirs]
198
+ for f in files:
199
+ matched = False
200
+ if match_path:
201
+ full = Path(root) / f
202
+ rel_from_base = str(full.relative_to(resolved))
203
+ # Check exact match or any path suffix
204
+ if fnmatch.fnmatch(rel_from_base, search_pattern):
205
+ matched = True
206
+ elif recursive:
207
+ parts = rel_from_base.split("/")
208
+ for i in range(len(parts)):
209
+ suffix = "/".join(parts[i:])
210
+ if fnmatch.fnmatch(suffix, search_pattern):
211
+ matched = True
212
+ break
213
+ else:
214
+ matched = fnmatch.fnmatch(f, search_pattern)
215
+
216
+ if matched:
217
+ full = Path(root) / f if not match_path else full
218
+ rel_path = str(full.relative_to(self.jail_dir))
219
+ matches.add(rel_path)
220
+ if len(matches) >= 500:
221
+ return {"pattern": pattern, "matches": sorted(matches)}
222
+
223
+ return {"pattern": pattern, "matches": sorted(matches)[:500]}
224
+ except PermissionError as e:
225
+ return {"error": str(e)}
226
+ except Exception as e:
227
+ return {"error": f"Error during glob: {e}"}
228
+
229
+ def grep(self, pattern: str, path: str = ".", include: Optional[str] = None) -> dict:
230
+ """Search for a regex pattern in files using system grep for speed."""
231
+ try:
232
+ resolved = self._resolve_safe_path(path)
233
+ if not resolved.exists():
234
+ return {"error": f"Path not found: {path}"}
235
+
236
+ if resolved.is_file():
237
+ return self._grep_single_file(resolved, pattern)
238
+
239
+ cmd = ["grep", "-rEl", "--max-count=3",
240
+ "--binary-files=without-match",
241
+ pattern, str(resolved)]
242
+ for d in _GREP_EXCLUDE_DIRS:
243
+ cmd.insert(1, f"--exclude-dir={d}")
244
+ if include:
245
+ cmd.insert(1, f"--include={include}")
246
+ else:
247
+ for ext in _GREP_SOURCE_INCLUDES:
248
+ cmd.insert(1, f"--include={ext}")
249
+
250
+ result = subprocess.run(
251
+ cmd, capture_output=True, text=True, timeout=30
252
+ )
253
+ file_paths = [
254
+ p.strip() for p in result.stdout.strip().split("\n") if p.strip()
255
+ ][:100]
256
+
257
+ matches = []
258
+ for fp in file_paths:
259
+ try:
260
+ rel = str(Path(fp).relative_to(self.jail_dir))
261
+ except ValueError:
262
+ rel = fp
263
+ if "node_modules" in rel or "test" in rel.lower():
264
+ continue
265
+ matches.append({"file": rel, "line": 0, "content": ""})
266
+ if len(matches) >= 100:
267
+ break
268
+
269
+ return {"pattern": pattern, "matches": matches}
270
+ except subprocess.TimeoutExpired:
271
+ return {"pattern": pattern, "matches": []}
272
+ except PermissionError as e:
273
+ return {"error": str(e)}
274
+ except Exception as e:
275
+ return {"error": f"Error during grep: {e}"}
276
+
277
+ def _grep_single_file(self, file_path: Path, pattern: str) -> dict:
278
+ """Grep a single file using Python (for when we need line-level results)."""
279
+ regex = re.compile(pattern, re.IGNORECASE)
280
+ matches = []
281
+ try:
282
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
283
+ for line_num, line in enumerate(f, 1):
284
+ if regex.search(line):
285
+ matches.append({
286
+ "file": str(file_path.relative_to(self.jail_dir)),
287
+ "line": line_num,
288
+ "content": line.strip()[:200],
289
+ })
290
+ if len(matches) >= 100:
291
+ break
292
+ except Exception:
293
+ pass
294
+ return {"pattern": pattern, "matches": matches}
295
+
296
+ def get_tool_definitions(self) -> list[dict]:
297
+ """Return OpenAI-compatible tool definitions."""
298
+ return [
299
+ {
300
+ "name": "read_file",
301
+ "description": "Read the contents of a file. Returns line-numbered content.",
302
+ "parameters": {
303
+ "type": "object",
304
+ "properties": {
305
+ "path": {
306
+ "type": "string",
307
+ "description": "Path to the file (relative to target directory)",
308
+ },
309
+ "offset": {
310
+ "type": "integer",
311
+ "description": "Line number to start reading from (0-indexed)",
312
+ "default": 0,
313
+ },
314
+ "limit": {
315
+ "type": "integer",
316
+ "description": "Maximum number of lines to read",
317
+ },
318
+ },
319
+ "required": ["path"],
320
+ },
321
+ },
322
+ {
323
+ "name": "list_dir",
324
+ "description": "List contents of a directory.",
325
+ "parameters": {
326
+ "type": "object",
327
+ "properties": {
328
+ "path": {
329
+ "type": "string",
330
+ "description": "Path to directory (relative to target)",
331
+ "default": ".",
332
+ },
333
+ "ignore": {
334
+ "type": "array",
335
+ "items": {"type": "string"},
336
+ "description": "Glob patterns to ignore",
337
+ },
338
+ },
339
+ },
340
+ },
341
+ {
342
+ "name": "glob",
343
+ "description": "Find files matching a glob pattern recursively.",
344
+ "parameters": {
345
+ "type": "object",
346
+ "properties": {
347
+ "pattern": {
348
+ "type": "string",
349
+ "description": "Glob pattern (e.g., '*.ts', '**/*.tsx')",
350
+ },
351
+ "path": {
352
+ "type": "string",
353
+ "description": "Starting directory",
354
+ "default": ".",
355
+ },
356
+ },
357
+ "required": ["pattern"],
358
+ },
359
+ },
360
+ {
361
+ "name": "grep",
362
+ "description": "Search for a regex pattern in files.",
363
+ "parameters": {
364
+ "type": "object",
365
+ "properties": {
366
+ "pattern": {
367
+ "type": "string",
368
+ "description": "Regex pattern to search for",
369
+ },
370
+ "path": {
371
+ "type": "string",
372
+ "description": "Starting path",
373
+ "default": ".",
374
+ },
375
+ "include": {
376
+ "type": "string",
377
+ "description": "Glob pattern for files to include (e.g., '*.ts')",
378
+ },
379
+ },
380
+ "required": ["pattern"],
381
+ },
382
+ },
383
+ ]
384
+
385
+ def execute_tool(self, name: str, arguments: dict) -> dict:
386
+ """Execute a tool by name with the given arguments.
387
+
388
+ Filters out unexpected keyword arguments that the LLM may hallucinate
389
+ (e.g., passing 'include' to glob when it only belongs on grep).
390
+ """
391
+ tools = {
392
+ "read_file": self.read_file,
393
+ "list_dir": self.list_dir,
394
+ "glob": self.glob,
395
+ "grep": self.grep,
396
+ }
397
+ if name not in tools:
398
+ return {"error": f"Unknown tool: {name}"}
399
+
400
+ func = tools[name]
401
+ sig = inspect.signature(func)
402
+ valid_params = set(sig.parameters.keys())
403
+ filtered_args = {k: v for k, v in arguments.items() if k in valid_params}
404
+ return func(**filtered_args)
@@ -0,0 +1,258 @@
1
+ """
2
+ Next.js specific analysis tools for vulnerability scanning.
3
+ """
4
+
5
+ import json
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from .filesystem import FileSystemTools
11
+
12
+
13
+ class NextJSTools:
14
+ """Tools for analyzing Next.js application structure and patterns."""
15
+
16
+ def __init__(self, fs_tools: FileSystemTools):
17
+ self.fs = fs_tools
18
+ self._route_cache: Optional[dict] = None
19
+ self._project_info_cache: Optional[dict] = None
20
+
21
+ def get_project_info(self) -> dict:
22
+ """Get Next.js project information including router type, TypeScript usage, and version."""
23
+ if self._project_info_cache:
24
+ return self._project_info_cache
25
+
26
+ info = {
27
+ "framework": "nextjs",
28
+ "router_type": None,
29
+ "has_src_dir": False,
30
+ "typescript": False,
31
+ "nextjs_version": None,
32
+ "has_middleware": False,
33
+ "has_app_dir": False,
34
+ "has_pages_dir": False,
35
+ }
36
+
37
+ pkg_result = self.fs.read_file("package.json")
38
+ if "content" in pkg_result:
39
+ try:
40
+ lines = pkg_result["content"].split("\n")
41
+ content = "\n".join(line.split("\t", 1)[1] if "\t" in line else line for line in lines)
42
+ pkg = json.loads(content)
43
+ deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
44
+ if "next" in deps:
45
+ info["nextjs_version"] = deps["next"]
46
+ info["typescript"] = "typescript" in deps
47
+ except (json.JSONDecodeError, IndexError):
48
+ pass
49
+
50
+ src_check = self.fs.list_dir("src")
51
+ info["has_src_dir"] = "error" not in src_check
52
+
53
+ base = "src" if info["has_src_dir"] else "."
54
+
55
+ app_check = self.fs.list_dir(f"{base}/app")
56
+ info["has_app_dir"] = "error" not in app_check
57
+
58
+ pages_check = self.fs.list_dir(f"{base}/pages")
59
+ info["has_pages_dir"] = "error" not in pages_check
60
+
61
+ if info["has_app_dir"]:
62
+ info["router_type"] = "app"
63
+ elif info["has_pages_dir"]:
64
+ info["router_type"] = "pages"
65
+
66
+ mw_ts = self.fs.read_file("middleware.ts")
67
+ mw_js = self.fs.read_file("middleware.js")
68
+ src_mw_ts = self.fs.read_file("src/middleware.ts")
69
+ src_mw_js = self.fs.read_file("src/middleware.js")
70
+ info["has_middleware"] = any(
71
+ "error" not in r for r in [mw_ts, mw_js, src_mw_ts, src_mw_js]
72
+ )
73
+
74
+ self._project_info_cache = info
75
+ return info
76
+
77
+ def get_route_map(self) -> dict:
78
+ """Extract all routes from the Next.js application (pages, API routes, route handlers)."""
79
+ if self._route_cache:
80
+ return self._route_cache
81
+
82
+ info = self.get_project_info()
83
+ routes = {"app_routes": [], "page_routes": [], "api_routes": []}
84
+
85
+ base = "src" if info["has_src_dir"] else "."
86
+
87
+ if info["has_app_dir"]:
88
+ app_files = self.fs.glob("**/page.{js,jsx,ts,tsx}", f"{base}/app")
89
+ for f in app_files.get("matches", []):
90
+ route = self._file_to_route(f, f"{base}/app", "app")
91
+ routes["app_routes"].append({"file": f, "route": route, "type": "page"})
92
+
93
+ route_files = self.fs.glob("**/route.{js,ts}", f"{base}/app")
94
+ for f in route_files.get("matches", []):
95
+ route = self._file_to_route(f, f"{base}/app", "app")
96
+ routes["api_routes"].append({"file": f, "route": route, "type": "route_handler"})
97
+
98
+ if info["has_pages_dir"]:
99
+ page_files = self.fs.glob("**/*.{js,jsx,ts,tsx}", f"{base}/pages")
100
+ for f in page_files.get("matches", []):
101
+ if "/api/" in f or f.startswith("api/"):
102
+ route = self._file_to_route(f, f"{base}/pages", "pages")
103
+ routes["api_routes"].append({"file": f, "route": route, "type": "api_route"})
104
+ elif not f.startswith("_") and "/_" not in f:
105
+ route = self._file_to_route(f, f"{base}/pages", "pages")
106
+ routes["page_routes"].append({"file": f, "route": route, "type": "page"})
107
+
108
+ self._route_cache = routes
109
+ return routes
110
+
111
+ def _file_to_route(self, file_path: str, base_dir: str, router_type: str) -> str:
112
+ """Convert a file path to a route path."""
113
+ route = file_path
114
+ if route.startswith(base_dir):
115
+ route = route[len(base_dir):]
116
+ if route.startswith("/"):
117
+ route = route[1:]
118
+
119
+ route = re.sub(r"\.(js|jsx|ts|tsx)$", "", route)
120
+ route = re.sub(r"/(page|route|index)$", "", route)
121
+
122
+ route = re.sub(r"\[\.\.\.(\w+)\]", r"*", route)
123
+ route = re.sub(r"\[(\w+)\]", r":\1", route)
124
+
125
+ if not route.startswith("/"):
126
+ route = "/" + route
127
+
128
+ if route == "/":
129
+ return "/"
130
+ return route.rstrip("/")
131
+
132
+ def get_server_actions(self) -> dict:
133
+ """Find all server actions ('use server') in the codebase."""
134
+ actions = []
135
+
136
+ ts_files = self.fs.glob("**/*.{ts,tsx}", ".")
137
+ js_files = self.fs.glob("**/*.{js,jsx}", ".")
138
+
139
+ all_files = ts_files.get("matches", []) + js_files.get("matches", [])
140
+
141
+ for file_path in all_files:
142
+ if "node_modules" in file_path:
143
+ continue
144
+
145
+ content_result = self.fs.read_file(file_path)
146
+ if "error" in content_result:
147
+ continue
148
+
149
+ content = content_result["content"]
150
+
151
+ if '"use server"' in content or "'use server'" in content:
152
+ lines = content.split("\n")
153
+ for i, line in enumerate(lines):
154
+ if "async function" in line or "export async function" in line:
155
+ match = re.search(r"(?:export\s+)?async\s+function\s+(\w+)", line)
156
+ if match:
157
+ actions.append({
158
+ "file": file_path,
159
+ "function": match.group(1),
160
+ "line": i + 1,
161
+ })
162
+
163
+ return {"server_actions": actions}
164
+
165
+ def get_middleware_config(self) -> dict:
166
+ """Get the middleware configuration and matcher patterns."""
167
+ locations = ["middleware.ts", "middleware.js", "src/middleware.ts", "src/middleware.js"]
168
+
169
+ for loc in locations:
170
+ result = self.fs.read_file(loc)
171
+ if "error" not in result:
172
+ content = result["content"]
173
+ config = {"file": loc, "content": content, "matcher": None}
174
+
175
+ matcher_match = re.search(r"matcher\s*[=:]\s*(\[[\s\S]*?\]|['\"][^'\"]+['\"])", content)
176
+ if matcher_match:
177
+ config["matcher"] = matcher_match.group(1)
178
+
179
+ return config
180
+
181
+ return {"error": "No middleware found"}
182
+
183
+ def check_dependencies(self) -> dict:
184
+ """Check package.json for security-relevant dependencies."""
185
+ result = self.fs.read_file("package.json")
186
+ if "error" in result:
187
+ return {"error": "Could not read package.json"}
188
+
189
+ try:
190
+ lines = result["content"].split("\n")
191
+ content = "\n".join(line.split("\t", 1)[1] if "\t" in line else line for line in lines)
192
+ pkg = json.loads(content)
193
+ except (json.JSONDecodeError, IndexError):
194
+ return {"error": "Could not parse package.json"}
195
+
196
+ deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
197
+
198
+ security_relevant = {
199
+ "auth": ["next-auth", "@auth/core", "lucia", "clerk", "@clerk/nextjs", "supabase", "@supabase/supabase-js"],
200
+ "database": ["prisma", "@prisma/client", "drizzle-orm", "mongoose", "pg", "mysql2", "better-sqlite3"],
201
+ "validation": ["zod", "yup", "joi", "superstruct", "valibot"],
202
+ "sanitization": ["dompurify", "xss", "sanitize-html", "isomorphic-dompurify"],
203
+ "csrf": ["csrf", "csurf"],
204
+ "rate_limiting": ["rate-limiter-flexible", "express-rate-limit", "upstash"],
205
+ }
206
+
207
+ found = {}
208
+ for category, packages in security_relevant.items():
209
+ found[category] = [p for p in packages if p in deps]
210
+
211
+ return {
212
+ "all_dependencies": deps,
213
+ "security_relevant": found,
214
+ }
215
+
216
+ def get_tool_definitions(self) -> list[dict]:
217
+ """Return OpenAI-compatible tool definitions."""
218
+ return [
219
+ {
220
+ "name": "get_project_info",
221
+ "description": "Get Next.js project information including router type, TypeScript usage, and version.",
222
+ "parameters": {"type": "object", "properties": {}},
223
+ },
224
+ {
225
+ "name": "get_route_map",
226
+ "description": "Extract all routes from the Next.js application (pages, API routes, route handlers).",
227
+ "parameters": {"type": "object", "properties": {}},
228
+ },
229
+ {
230
+ "name": "get_server_actions",
231
+ "description": "Find all server actions ('use server') in the codebase.",
232
+ "parameters": {"type": "object", "properties": {}},
233
+ },
234
+ {
235
+ "name": "get_middleware_config",
236
+ "description": "Get the middleware configuration and matcher patterns.",
237
+ "parameters": {"type": "object", "properties": {}},
238
+ },
239
+ {
240
+ "name": "check_dependencies",
241
+ "description": "Check package.json for security-relevant dependencies.",
242
+ "parameters": {"type": "object", "properties": {}},
243
+ },
244
+ ]
245
+
246
+ def execute_tool(self, name: str, arguments: dict) -> dict:
247
+ """Execute a tool by name with the given arguments."""
248
+ tools = {
249
+ "get_project_info": self.get_project_info,
250
+ "get_route_map": self.get_route_map,
251
+ "get_server_actions": self.get_server_actions,
252
+ "get_middleware_config": self.get_middleware_config,
253
+ "check_dependencies": self.check_dependencies,
254
+ }
255
+ if name not in tools:
256
+ return {"error": f"Unknown tool: {name}"}
257
+ # These tools take no arguments - ignore any hallucinated arguments from LLM
258
+ return tools[name]()