openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,464 @@
1
+ """
2
+ Deterministic reconnaissance — no LLM, same output every time.
3
+
4
+ Replaces the LLM-based recon agent with pure static analysis:
5
+ - Framework detection
6
+ - Attack surface discovery (routes, controllers, danger patterns)
7
+ - Feature detection (file uploads, outbound requests, auth patterns)
8
+ - Auth middleware mapping
9
+ - Dependency analysis
10
+
11
+ Produces a structured summary string that researchers use as context.
12
+ """
13
+
14
+ import logging
15
+ import os
16
+ import re
17
+ import subprocess
18
+ from concurrent.futures import ThreadPoolExecutor, as_completed
19
+ from pathlib import Path
20
+ from typing import Optional
21
+
22
+ from .tools.filesystem import FileSystemTools, _GREP_EXCLUDE_DIRS
23
+ from .tools.registry import ToolRegistry
24
+ from .tools.coverage import discover_attack_surface
25
+ from .framework_detection import detect_frameworks
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # Patterns that indicate specific features exist in the codebase
31
+ _FEATURE_INDICATORS: dict[str, list[tuple[str, str]]] = {
32
+ "file_uploads": [
33
+ (r"multer|busboy|formidable|multipart|upload", "File upload library/pattern"),
34
+ (r"req\.files|req\.file|request\.files", "Request file access"),
35
+ (r"sharp|jimp|imagemagick|gm\(|Pillow|PIL", "Image processing"),
36
+ (r"Content-Disposition|content-disposition", "Content-Disposition header handling"),
37
+ (r"mimeType|mimetype|content.type|contentType", "MIME type handling"),
38
+ ],
39
+ "outbound_requests": [
40
+ (r"fetch\(|axios|got\(|request\(|urllib|httpx|aiohttp", "HTTP client usage"),
41
+ (r"webhook|Webhook|WEBHOOK", "Webhook feature"),
42
+ (r"notification|Notification|apprise|Apprise", "Notification service"),
43
+ (r"favicon|Favicon", "Favicon fetching"),
44
+ (r"scrape|scraper|crawl", "URL scraping"),
45
+ ],
46
+ "auth_system": [
47
+ (r"passport|Passport|bcrypt|argon2|jwt|JWT|jsonwebtoken", "Auth library"),
48
+ (r"login|Login|signIn|sign_in|authenticate", "Login functionality"),
49
+ (r"session|Session|cookie|Cookie", "Session management"),
50
+ (r"oauth|OAuth|oidc|OIDC|openid", "OAuth/OIDC integration"),
51
+ (r"middleware.*auth|auth.*middleware|is.?authenticated|is.?admin", "Auth middleware"),
52
+ ],
53
+ "template_rendering": [
54
+ (r"dangerouslySetInnerHTML|v-html|innerHTML", "Raw HTML rendering"),
55
+ (r"markdown|Markdown|marked|remarkable|markdown-it", "Markdown processing"),
56
+ (r"ejs|pug|handlebars|jinja|nunjucks|mustache", "Template engine"),
57
+ (r"sanitize|DOMPurify|xss|bleach", "Sanitization library"),
58
+ ],
59
+ "database": [
60
+ (r"\.raw\(|\.query\(|execute\(|cursor\.", "Raw SQL usage"),
61
+ (r"prisma|sequelize|typeorm|knex|waterline|mongoose|sqlalchemy|django\.db", "ORM"),
62
+ (r"redis|Redis|memcache|Memcache", "Cache/session store"),
63
+ ],
64
+ "graphql": [
65
+ (r"graphql|GraphQL|gql`|typeDefs|resolvers", "GraphQL usage"),
66
+ (r"__schema|introspection|buildSchema", "GraphQL schema/introspection"),
67
+ (r"apollo|ApolloServer|express-graphql|mercurius", "GraphQL server library"),
68
+ ],
69
+ "websocket": [
70
+ (r"WebSocket|ws\(|socket\.io|Socket\.IO|sockjs", "WebSocket library"),
71
+ (r"wss://|ws://|upgrade.*websocket", "WebSocket connection"),
72
+ (r"\.on\('message'|\.on\('connection'", "WebSocket event handlers"),
73
+ ],
74
+ "grpc": [
75
+ (r"grpc|protobuf|\.proto|grpc-js", "gRPC/protobuf usage"),
76
+ (r"ServiceImpl|addService|grpc\.Server", "gRPC server"),
77
+ ],
78
+ "oauth_oidc": [
79
+ (r"oauth|OAuth|oauth2|OAuth2", "OAuth usage"),
80
+ (r"oidc|OIDC|openid|OpenID", "OIDC usage"),
81
+ (r"id_token|access_token|refresh_token|authorization_code", "OAuth token handling"),
82
+ (r"passport|next-auth|lucia|authjs", "Auth library with OAuth"),
83
+ ],
84
+ "deserialization": [
85
+ (r"ObjectInputStream|readObject|XMLDecoder|SnakeYAML\.load", "Java deserialization"),
86
+ (r"BinaryFormatter|TypeNameHandling|DataContractSerializer", ".NET deserialization"),
87
+ (r"pickle\.load|yaml\.load|marshal\.loads", "Python deserialization"),
88
+ (r"unserialize|json_decode.*class", "PHP deserialization"),
89
+ ],
90
+ }
91
+
92
+ # C/C++ specific feature indicators
93
+ _C_FEATURE_INDICATORS: dict[str, list[tuple[str, str]]] = {
94
+ "memory_operations": [
95
+ (r"memcpy|memmove|memset|bcopy", "Memory copy functions"),
96
+ (r"strcpy|strncpy|strcat|strncat", "String copy functions"),
97
+ (r"sprintf|snprintf|vsprintf|vsnprintf", "String format functions"),
98
+ (r"gets\(|fgets\(|read\(|recv\(|recvfrom\(", "Input reading functions"),
99
+ (r"malloc\(|calloc\(|realloc\(|free\(", "Dynamic memory allocation"),
100
+ ],
101
+ "network_parsing": [
102
+ (r"htons|htonl|ntohs|ntohl", "Network byte order conversion"),
103
+ (r"accept\(|listen\(|bind\(|connect\(|socket\(", "Socket operations"),
104
+ (r"SSL_read|SSL_write|SSL_accept|SSL_connect", "TLS operations"),
105
+ (r"parse.*header|parse.*packet|parse.*message|parse.*request", "Protocol parsing"),
106
+ (r"BIO_read|BIO_write|BIO_new", "OpenSSL BIO operations"),
107
+ ],
108
+ "crypto_operations": [
109
+ (r"EVP_.*Init|EVP_.*Update|EVP_.*Final", "OpenSSL EVP crypto"),
110
+ (r"AES_|DES_|RSA_|EC_|HMAC_|SHA256_|MD5_", "Crypto algorithm usage"),
111
+ (r"RAND_bytes|RAND_pseudo_bytes|rand\(\)|srand\(", "Random number generation"),
112
+ (r"X509_|SSL_CTX_|SSL_new|SSL_free", "Certificate/TLS handling"),
113
+ (r"CRYPTO_memcmp|timingsafe_bcmp|constant_time", "Constant-time comparison"),
114
+ ],
115
+ "string_handling": [
116
+ (r"strlen\(|strcmp\(|strncmp\(|strstr\(", "String comparison/search"),
117
+ (r"strtol\(|strtoul\(|atoi\(|atol\(", "String to integer conversion"),
118
+ (r"printf\(|fprintf\(|syslog\(", "Output/logging functions"),
119
+ ],
120
+ }
121
+
122
+ _SOURCE_EXTENSIONS = (
123
+ ".py", ".js", ".ts", ".tsx", ".jsx",
124
+ ".rb", ".go", ".rs", ".java", ".php",
125
+ ".c", ".cpp", ".h", ".vue", ".svelte",
126
+ )
127
+
128
+
129
+ def _detect_features_fast(
130
+ fs: FileSystemTools,
131
+ feature_indicators: dict[str, list[tuple[str, str]]],
132
+ ) -> dict[str, list[str]]:
133
+ """Detect features using find (once) + grep -l with small sample per category.
134
+
135
+ Collects source file paths once, then for each feature category runs
136
+ grep -l (stop after first 5 matches) on the file list. Fast because:
137
+ - Single directory walk via find
138
+ - Each grep reads from cached file list, stops early (-m 1 per file, -l first 5)
139
+ """
140
+ target_dir = str(fs.jail_dir)
141
+
142
+ find_cmd = ["find", target_dir, "-type", "f", "("]
143
+ for i, ext in enumerate(_SOURCE_EXTENSIONS):
144
+ if i > 0:
145
+ find_cmd.append("-o")
146
+ find_cmd.extend(["-name", f"*{ext}"])
147
+ find_cmd.append(")")
148
+ for d in _GREP_EXCLUDE_DIRS:
149
+ clean = d.rstrip("*").rstrip(".")
150
+ find_cmd[2:2] = ["-not", "-path", f"*/{clean}/*"]
151
+
152
+ try:
153
+ find_result = subprocess.run(
154
+ find_cmd, capture_output=True, text=True, timeout=30
155
+ )
156
+ file_list = find_result.stdout
157
+ except Exception as e:
158
+ logger.warning(f"Find failed: {e}")
159
+ return {}
160
+
161
+ if not file_list.strip():
162
+ return {}
163
+
164
+ file_count = file_list.count("\n")
165
+
166
+ if file_count > 5000:
167
+ # Large repo: fixed-string grep in parallel threads
168
+ quick_checks = {
169
+ "file_uploads": ["multer", "busboy", "formidable", "multipart", "request.files"],
170
+ "outbound_requests": ["webhook", "Webhook", "httpx", "aiohttp", "apprise"],
171
+ "auth_system": ["bcrypt", "argon2", "jsonwebtoken", "passport", "login_required"],
172
+ "template_rendering": ["dangerouslySetInnerHTML", "mark_safe", "render_template_string", "DOMPurify"],
173
+ "database": ["cursor.", "RawSQL", ".raw(", "execute("],
174
+ "graphql": ["graphql", "GraphQL", "ApolloServer"],
175
+ "websocket": ["WebSocket", "socket.io", "Socket.IO"],
176
+ "grpc": ["grpc", "protobuf"],
177
+ "deserialization": ["pickle.load", "yaml.load", "unserialize"],
178
+ }
179
+
180
+ def _check_feature(name_and_keywords):
181
+ fname, keywords = name_and_keywords
182
+ cmd = ["xargs", "grep", "-Fl", "--max-count=1",
183
+ "--binary-files=without-match"]
184
+ for kw in keywords:
185
+ cmd.extend(["-e", kw])
186
+ try:
187
+ proc = subprocess.run(
188
+ cmd, input=file_list, capture_output=True,
189
+ text=True, timeout=15,
190
+ )
191
+ matches = [l for l in proc.stdout.strip().split("\n") if l.strip()]
192
+ if matches:
193
+ readable = fname.replace("_", " ").title()
194
+ return fname, [f"{readable} ({len(matches)} files)"]
195
+ except Exception:
196
+ pass
197
+ return fname, None
198
+
199
+ result: dict[str, list[str]] = {}
200
+ with ThreadPoolExecutor(max_workers=4) as pool:
201
+ futures = {pool.submit(_check_feature, item): item
202
+ for item in quick_checks.items()}
203
+ for future in as_completed(futures):
204
+ fname, value = future.result()
205
+ if value:
206
+ result[fname] = value
207
+ return result
208
+
209
+ # Small/medium repo: full regex scan per category
210
+ result: dict[str, list[str]] = {}
211
+ for feature_name, patterns in feature_indicators.items():
212
+ combined = "|".join(p for p, _ in patterns)
213
+ cmd_parts = ["xargs", "grep", "-El", "--max-count=1",
214
+ "--binary-files=without-match", combined]
215
+ try:
216
+ proc = subprocess.run(
217
+ cmd_parts, input=file_list, capture_output=True,
218
+ text=True, timeout=30,
219
+ )
220
+ files = []
221
+ for line in proc.stdout.strip().split("\n"):
222
+ fp = line.strip()
223
+ if not fp:
224
+ continue
225
+ try:
226
+ rel = str(Path(fp).relative_to(target_dir))
227
+ except ValueError:
228
+ rel = fp
229
+ if "node_modules" not in rel and "/test" not in rel.lower():
230
+ files.append(rel)
231
+ if files:
232
+ readable = feature_name.replace("_", " ").title()
233
+ result[feature_name] = [f"{readable} ({len(files)} files)"]
234
+ except subprocess.TimeoutExpired:
235
+ pass
236
+ except Exception:
237
+ pass
238
+
239
+ return result
240
+
241
+
242
+ def run_deterministic_recon(tools: ToolRegistry) -> dict:
243
+ """Run deterministic recon and return structured results.
244
+
245
+ Returns:
246
+ dict with "summary" (str) and "features" (dict) keys.
247
+ The "summary" is a formatted string suitable for researcher system prompts.
248
+ """
249
+ fs = tools.fs_tools
250
+ target_dir = str(fs.target_dir) if hasattr(fs, "target_dir") else "."
251
+
252
+ # 1. Framework detection
253
+ frameworks = detect_frameworks(fs)
254
+ is_c_project = False
255
+
256
+ if not frameworks:
257
+ # Fallback: check for common indicators
258
+ root_files = set()
259
+ try:
260
+ result = fs.list_dir(".")
261
+ entries = result.get("entries", [])
262
+ root_files = {e.get("name", "") for e in entries} if isinstance(entries, list) else set()
263
+ except Exception:
264
+ pass
265
+
266
+ if "server" in root_files or "app.js" in root_files:
267
+ frameworks = [{"framework": "express", "root": "."}]
268
+ elif "manage.py" in root_files:
269
+ frameworks = [{"framework": "django", "root": "."}]
270
+ elif "requirements.txt" in root_files or "pyproject.toml" in root_files:
271
+ frameworks = [{"framework": "flask", "root": "."}]
272
+ elif "package.json" in root_files:
273
+ frameworks = [{"framework": "nextjs", "root": "."}]
274
+ elif "pom.xml" in root_files or "build.gradle" in root_files or "build.gradle.kts" in root_files:
275
+ frameworks = [{"framework": "java", "root": "."}]
276
+ elif any(f.endswith(".csproj") or f.endswith(".sln") for f in root_files):
277
+ frameworks = [{"framework": "dotnet", "root": "."}]
278
+ elif "Cargo.toml" in root_files:
279
+ frameworks = [{"framework": "rust", "root": "."}]
280
+ elif "Makefile" in root_files or "CMakeLists.txt" in root_files or "configure" in root_files or "Makefile.am" in root_files:
281
+ # C/C++ project detection
282
+ c_files = fs.glob("**/*.c", ".")
283
+ h_files = fs.glob("**/*.h", ".")
284
+ c_count = len(c_files.get("matches", []))
285
+ h_count = len(h_files.get("matches", []))
286
+ if c_count > 10 or h_count > 10:
287
+ is_c_project = True
288
+ frameworks = [{"framework": "c", "root": "."}]
289
+ cpp_files = fs.glob("**/*.cpp", ".")
290
+ cpp_count = len(cpp_files.get("matches", []))
291
+ if cpp_count > 10:
292
+ is_c_project = True
293
+ frameworks = [{"framework": "cpp", "root": "."}]
294
+
295
+ # 2. Attack surface discovery
296
+ try:
297
+ attack_surface = discover_attack_surface(fs, nextjs_tools=tools.nextjs_tools)
298
+ except Exception as e:
299
+ logger.warning(f"Attack surface discovery failed: {e}")
300
+ attack_surface = {"total_endpoints": 0}
301
+
302
+ # 3. Feature detection — single grep + local categorization
303
+ feature_indicators = _C_FEATURE_INDICATORS if is_c_project else _FEATURE_INDICATORS
304
+ detected_features: dict[str, list[str]] = _detect_features_fast(fs, feature_indicators)
305
+
306
+ # 4. Read key config files for auth/route info
307
+ auth_info = _detect_auth_config(fs)
308
+ route_info = _detect_routes(fs, attack_surface)
309
+
310
+ # 5. Dependencies
311
+ deps_info = ""
312
+ try:
313
+ result = tools.execute_tool("check_dependencies", {})
314
+ if isinstance(result, dict) and "dependencies" in result:
315
+ security_deps = [
316
+ d for d in result["dependencies"]
317
+ if any(kw in d.get("name", "").lower() for kw in
318
+ ["auth", "jwt", "bcrypt", "csrf", "helmet", "cors", "sanitize",
319
+ "passport", "session", "crypto", "apprise", "webhook"])
320
+ ]
321
+ if security_deps:
322
+ deps_info = "Security-relevant dependencies: " + ", ".join(
323
+ d.get("name", "") for d in security_deps[:15]
324
+ )
325
+ except Exception:
326
+ pass
327
+
328
+ # 6. Build structured summary
329
+ summary = _build_summary(frameworks, attack_surface, detected_features,
330
+ auth_info, route_info, deps_info)
331
+
332
+ return {
333
+ "summary": summary,
334
+ "type": "recon_complete",
335
+ "frameworks": frameworks,
336
+ "attack_surface": attack_surface,
337
+ "features": detected_features,
338
+ }
339
+
340
+
341
+ def _detect_auth_config(fs: FileSystemTools) -> str:
342
+ """Detect auth configuration by reading common config files."""
343
+ auth_lines = []
344
+
345
+ # Check for common auth config files
346
+ config_files = [
347
+ "server/config/policies.js", # Sails.js
348
+ "server/config/security.js", # Sails.js
349
+ "config/policies.js",
350
+ "src/middleware.ts", # Next.js
351
+ "middleware.ts",
352
+ "app/middleware.py", # Django
353
+ "config/routes.rb", # Rails
354
+ ]
355
+
356
+ for config_file in config_files:
357
+ try:
358
+ result = fs.read_file(config_file)
359
+ if "error" not in result:
360
+ content = result.get("content", "")
361
+ # Count lines to gauge complexity
362
+ line_count = len(content.split("\n"))
363
+ auth_lines.append(f"Auth config found: {config_file} ({line_count} lines)")
364
+ break # Found one, that's enough for the summary
365
+ except Exception:
366
+ pass
367
+
368
+ # Check for auth middleware patterns
369
+ try:
370
+ result = fs.grep(r"is.?authenticated|is.?admin|requireAuth|login_required", ".")
371
+ matches = result.get("matches", [])
372
+ if matches:
373
+ files = set()
374
+ for m in matches:
375
+ fp = m if isinstance(m, str) else m.get("file", "")
376
+ if fp and "node_modules" not in fp and "test" not in fp.lower():
377
+ files.add(fp)
378
+ if files:
379
+ auth_lines.append(f"Auth middleware in {len(files)} files")
380
+ except Exception:
381
+ pass
382
+
383
+ return "; ".join(auth_lines) if auth_lines else "No auth config detected"
384
+
385
+
386
+ def _detect_routes(fs: FileSystemTools, attack_surface: dict) -> str:
387
+ """Summarize route information from attack surface."""
388
+ parts = []
389
+
390
+ route_count = len(attack_surface.get("route_handlers", []))
391
+ api_count = len(attack_surface.get("api_routes", []))
392
+ django_count = len(attack_surface.get("django_views", []))
393
+ flask_count = len(attack_surface.get("flask_routes", []))
394
+ danger_count = len(attack_surface.get("danger_files", []))
395
+
396
+ if route_count:
397
+ parts.append(f"{route_count} Express/Node route handlers")
398
+ if api_count:
399
+ parts.append(f"{api_count} API routes")
400
+ if django_count:
401
+ parts.append(f"{django_count} Django views")
402
+ if flask_count:
403
+ parts.append(f"{flask_count} Flask routes")
404
+ if danger_count:
405
+ parts.append(f"{danger_count} files with dangerous patterns")
406
+
407
+ total = attack_surface.get("total_endpoints", 0)
408
+ parts.append(f"{total} total endpoints")
409
+
410
+ return "; ".join(parts)
411
+
412
+
413
+ def _build_summary(
414
+ frameworks: list[dict],
415
+ attack_surface: dict,
416
+ features: dict[str, list[str]],
417
+ auth_info: str,
418
+ route_info: str,
419
+ deps_info: str,
420
+ ) -> str:
421
+ """Build a formatted summary string for researcher system prompts."""
422
+ lines = []
423
+
424
+ # Frameworks
425
+ if frameworks:
426
+ fw_names = [f"{f['framework']} at {f['root']}/" for f in frameworks]
427
+ lines.append(f"## Application Overview\n- Frameworks: {', '.join(fw_names)}")
428
+ else:
429
+ lines.append("## Application Overview\n- Framework: unknown")
430
+
431
+ # Routes
432
+ lines.append(f"- Routes: {route_info}")
433
+
434
+ # Auth
435
+ lines.append(f"- Auth: {auth_info}")
436
+
437
+ # Dependencies
438
+ if deps_info:
439
+ lines.append(f"- {deps_info}")
440
+
441
+ # Detected features
442
+ if features:
443
+ lines.append("\n## Detected Features")
444
+ for feature_name, indicators in features.items():
445
+ readable = feature_name.replace("_", " ").title()
446
+ lines.append(f"\n### {readable}")
447
+ for indicator in indicators:
448
+ lines.append(f"- {indicator}")
449
+
450
+ # Key files from attack surface
451
+ route_handlers = attack_surface.get("route_handlers", [])
452
+ if route_handlers:
453
+ lines.append("\n## Route Handler Files")
454
+ for ep in route_handlers[:20]:
455
+ lines.append(f"- `{ep['file']}`")
456
+
457
+ danger_files = attack_surface.get("danger_files", [])
458
+ if danger_files:
459
+ lines.append("\n## High-Signal Files (dangerous patterns)")
460
+ for ep in danger_files[:15]:
461
+ trigger = ep.get("trigger", "")
462
+ lines.append(f"- `{ep['file']}` — {trigger}")
463
+
464
+ return "\n".join(lines)