openhack 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openhack/__init__.py +2 -0
- openhack/__main__.py +225 -0
- openhack/agents/__init__.py +30 -0
- openhack/agents/base.py +230 -0
- openhack/agents/browser_verifier.py +679 -0
- openhack/agents/browser_verifier_swarm.py +256 -0
- openhack/agents/checkpoint.py +89 -0
- openhack/agents/context_manager.py +356 -0
- openhack/agents/coordinator.py +1105 -0
- openhack/agents/endpoint_analyst.py +307 -0
- openhack/agents/feature_hunter.py +93 -0
- openhack/agents/hunter.py +481 -0
- openhack/agents/hunter_swarm.py +385 -0
- openhack/agents/llm.py +334 -0
- openhack/agents/recon.py +19 -0
- openhack/agents/sandbox_verifier.py +396 -0
- openhack/agents/sandbox_verifier_swarm.py +250 -0
- openhack/agents/session.py +286 -0
- openhack/agents/validator.py +217 -0
- openhack/agents/validator_swarm.py +106 -0
- openhack/auth.py +175 -0
- openhack/browser/__init__.py +12 -0
- openhack/browser/runner.py +385 -0
- openhack/categories.py +130 -0
- openhack/config.py +201 -0
- openhack/deterministic_recon.py +464 -0
- openhack/entry_points.py +745 -0
- openhack/framework_classifier.py +515 -0
- openhack/framework_detection.py +269 -0
- openhack/headless_scan.py +179 -0
- openhack/prompts/__init__.py +108 -0
- openhack/prompts/browser_verifier.py +171 -0
- openhack/prompts/coordinator.py +31 -0
- openhack/prompts/django/__init__.py +32 -0
- openhack/prompts/django/auth_bypass.py +76 -0
- openhack/prompts/django/csrf.py +62 -0
- openhack/prompts/django/data_exposure.py +67 -0
- openhack/prompts/django/idor.py +74 -0
- openhack/prompts/django/injection.py +67 -0
- openhack/prompts/django/misconfiguration.py +70 -0
- openhack/prompts/django/ssrf.py +64 -0
- openhack/prompts/endpoint_analyst.py +122 -0
- openhack/prompts/express/__init__.py +29 -0
- openhack/prompts/express/auth_bypass.py +71 -0
- openhack/prompts/express/data_exposure.py +77 -0
- openhack/prompts/express/idor.py +69 -0
- openhack/prompts/express/injection.py +75 -0
- openhack/prompts/express/misconfiguration.py +72 -0
- openhack/prompts/express/ssrf.py +63 -0
- openhack/prompts/feature_hunter.py +140 -0
- openhack/prompts/flask/__init__.py +29 -0
- openhack/prompts/flask/auth_bypass.py +86 -0
- openhack/prompts/flask/data_exposure.py +78 -0
- openhack/prompts/flask/idor.py +83 -0
- openhack/prompts/flask/injection.py +77 -0
- openhack/prompts/flask/misconfiguration.py +73 -0
- openhack/prompts/flask/ssrf.py +65 -0
- openhack/prompts/hunter.py +362 -0
- openhack/prompts/hunter_continuation_loop.py +12 -0
- openhack/prompts/hunter_continuation_no_findings.py +19 -0
- openhack/prompts/hunter_continuation_no_progress.py +22 -0
- openhack/prompts/hunter_tool_instructions.py +55 -0
- openhack/prompts/nextjs/__init__.py +42 -0
- openhack/prompts/nextjs/auth_bypass.py +80 -0
- openhack/prompts/nextjs/csrf.py +71 -0
- openhack/prompts/nextjs/data_exposure.py +88 -0
- openhack/prompts/nextjs/idor.py +64 -0
- openhack/prompts/nextjs/injection.py +65 -0
- openhack/prompts/nextjs/middleware_bypass.py +75 -0
- openhack/prompts/nextjs/misconfiguration.py +92 -0
- openhack/prompts/nextjs/server_actions.py +97 -0
- openhack/prompts/nextjs/ssrf.py +66 -0
- openhack/prompts/nextjs/xss.py +69 -0
- openhack/prompts/pr_analysis_system.py +80 -0
- openhack/prompts/pr_analysis_user.py +11 -0
- openhack/prompts/project_context.py +89 -0
- openhack/prompts/recon.py +199 -0
- openhack/prompts/reporter.py +88 -0
- openhack/prompts/researchers.py +434 -0
- openhack/prompts/sandbox_verifier.py +128 -0
- openhack/prompts/supabase/__init__.py +39 -0
- openhack/prompts/supabase/auth_tokens.py +131 -0
- openhack/prompts/supabase/edge_functions.py +150 -0
- openhack/prompts/supabase/graphql.py +102 -0
- openhack/prompts/supabase/postgrest.py +99 -0
- openhack/prompts/supabase/realtime.py +93 -0
- openhack/prompts/supabase/rls.py +110 -0
- openhack/prompts/supabase/rpc_functions.py +127 -0
- openhack/prompts/supabase/storage.py +110 -0
- openhack/prompts/supabase/tenant_isolation.py +118 -0
- openhack/prompts/validator.py +319 -0
- openhack/prompts/validator_continuation_incomplete.py +12 -0
- openhack/prompts/validator_tool_instructions.py +29 -0
- openhack/quality.py +231 -0
- openhack/sandbox/__init__.py +12 -0
- openhack/sandbox/orchestrator.py +517 -0
- openhack/sandbox/runner.py +177 -0
- openhack/scan_session.py +245 -0
- openhack/setup.py +452 -0
- openhack/static_validator.py +612 -0
- openhack/tools/__init__.py +1 -0
- openhack/tools/ast_tools.py +307 -0
- openhack/tools/coverage.py +1078 -0
- openhack/tools/filesystem.py +404 -0
- openhack/tools/nextjs.py +258 -0
- openhack/tools/registry.py +52 -0
- openhack/tui.py +3450 -0
- openhack/updates.py +170 -0
- openhack-0.1.0.dist-info/METADATA +189 -0
- openhack-0.1.0.dist-info/RECORD +113 -0
- openhack-0.1.0.dist-info/WHEEL +4 -0
- openhack-0.1.0.dist-info/entry_points.txt +2 -0
- openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Deterministic reconnaissance — no LLM, same output every time.
|
|
3
|
+
|
|
4
|
+
Replaces the LLM-based recon agent with pure static analysis:
|
|
5
|
+
- Framework detection
|
|
6
|
+
- Attack surface discovery (routes, controllers, danger patterns)
|
|
7
|
+
- Feature detection (file uploads, outbound requests, auth patterns)
|
|
8
|
+
- Auth middleware mapping
|
|
9
|
+
- Dependency analysis
|
|
10
|
+
|
|
11
|
+
Produces a structured summary string that researchers use as context.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import subprocess
|
|
18
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
from .tools.filesystem import FileSystemTools, _GREP_EXCLUDE_DIRS
|
|
23
|
+
from .tools.registry import ToolRegistry
|
|
24
|
+
from .tools.coverage import discover_attack_surface
|
|
25
|
+
from .framework_detection import detect_frameworks
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# Patterns that indicate specific features exist in the codebase
|
|
31
|
+
_FEATURE_INDICATORS: dict[str, list[tuple[str, str]]] = {
|
|
32
|
+
"file_uploads": [
|
|
33
|
+
(r"multer|busboy|formidable|multipart|upload", "File upload library/pattern"),
|
|
34
|
+
(r"req\.files|req\.file|request\.files", "Request file access"),
|
|
35
|
+
(r"sharp|jimp|imagemagick|gm\(|Pillow|PIL", "Image processing"),
|
|
36
|
+
(r"Content-Disposition|content-disposition", "Content-Disposition header handling"),
|
|
37
|
+
(r"mimeType|mimetype|content.type|contentType", "MIME type handling"),
|
|
38
|
+
],
|
|
39
|
+
"outbound_requests": [
|
|
40
|
+
(r"fetch\(|axios|got\(|request\(|urllib|httpx|aiohttp", "HTTP client usage"),
|
|
41
|
+
(r"webhook|Webhook|WEBHOOK", "Webhook feature"),
|
|
42
|
+
(r"notification|Notification|apprise|Apprise", "Notification service"),
|
|
43
|
+
(r"favicon|Favicon", "Favicon fetching"),
|
|
44
|
+
(r"scrape|scraper|crawl", "URL scraping"),
|
|
45
|
+
],
|
|
46
|
+
"auth_system": [
|
|
47
|
+
(r"passport|Passport|bcrypt|argon2|jwt|JWT|jsonwebtoken", "Auth library"),
|
|
48
|
+
(r"login|Login|signIn|sign_in|authenticate", "Login functionality"),
|
|
49
|
+
(r"session|Session|cookie|Cookie", "Session management"),
|
|
50
|
+
(r"oauth|OAuth|oidc|OIDC|openid", "OAuth/OIDC integration"),
|
|
51
|
+
(r"middleware.*auth|auth.*middleware|is.?authenticated|is.?admin", "Auth middleware"),
|
|
52
|
+
],
|
|
53
|
+
"template_rendering": [
|
|
54
|
+
(r"dangerouslySetInnerHTML|v-html|innerHTML", "Raw HTML rendering"),
|
|
55
|
+
(r"markdown|Markdown|marked|remarkable|markdown-it", "Markdown processing"),
|
|
56
|
+
(r"ejs|pug|handlebars|jinja|nunjucks|mustache", "Template engine"),
|
|
57
|
+
(r"sanitize|DOMPurify|xss|bleach", "Sanitization library"),
|
|
58
|
+
],
|
|
59
|
+
"database": [
|
|
60
|
+
(r"\.raw\(|\.query\(|execute\(|cursor\.", "Raw SQL usage"),
|
|
61
|
+
(r"prisma|sequelize|typeorm|knex|waterline|mongoose|sqlalchemy|django\.db", "ORM"),
|
|
62
|
+
(r"redis|Redis|memcache|Memcache", "Cache/session store"),
|
|
63
|
+
],
|
|
64
|
+
"graphql": [
|
|
65
|
+
(r"graphql|GraphQL|gql`|typeDefs|resolvers", "GraphQL usage"),
|
|
66
|
+
(r"__schema|introspection|buildSchema", "GraphQL schema/introspection"),
|
|
67
|
+
(r"apollo|ApolloServer|express-graphql|mercurius", "GraphQL server library"),
|
|
68
|
+
],
|
|
69
|
+
"websocket": [
|
|
70
|
+
(r"WebSocket|ws\(|socket\.io|Socket\.IO|sockjs", "WebSocket library"),
|
|
71
|
+
(r"wss://|ws://|upgrade.*websocket", "WebSocket connection"),
|
|
72
|
+
(r"\.on\('message'|\.on\('connection'", "WebSocket event handlers"),
|
|
73
|
+
],
|
|
74
|
+
"grpc": [
|
|
75
|
+
(r"grpc|protobuf|\.proto|grpc-js", "gRPC/protobuf usage"),
|
|
76
|
+
(r"ServiceImpl|addService|grpc\.Server", "gRPC server"),
|
|
77
|
+
],
|
|
78
|
+
"oauth_oidc": [
|
|
79
|
+
(r"oauth|OAuth|oauth2|OAuth2", "OAuth usage"),
|
|
80
|
+
(r"oidc|OIDC|openid|OpenID", "OIDC usage"),
|
|
81
|
+
(r"id_token|access_token|refresh_token|authorization_code", "OAuth token handling"),
|
|
82
|
+
(r"passport|next-auth|lucia|authjs", "Auth library with OAuth"),
|
|
83
|
+
],
|
|
84
|
+
"deserialization": [
|
|
85
|
+
(r"ObjectInputStream|readObject|XMLDecoder|SnakeYAML\.load", "Java deserialization"),
|
|
86
|
+
(r"BinaryFormatter|TypeNameHandling|DataContractSerializer", ".NET deserialization"),
|
|
87
|
+
(r"pickle\.load|yaml\.load|marshal\.loads", "Python deserialization"),
|
|
88
|
+
(r"unserialize|json_decode.*class", "PHP deserialization"),
|
|
89
|
+
],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# C/C++ specific feature indicators
|
|
93
|
+
_C_FEATURE_INDICATORS: dict[str, list[tuple[str, str]]] = {
|
|
94
|
+
"memory_operations": [
|
|
95
|
+
(r"memcpy|memmove|memset|bcopy", "Memory copy functions"),
|
|
96
|
+
(r"strcpy|strncpy|strcat|strncat", "String copy functions"),
|
|
97
|
+
(r"sprintf|snprintf|vsprintf|vsnprintf", "String format functions"),
|
|
98
|
+
(r"gets\(|fgets\(|read\(|recv\(|recvfrom\(", "Input reading functions"),
|
|
99
|
+
(r"malloc\(|calloc\(|realloc\(|free\(", "Dynamic memory allocation"),
|
|
100
|
+
],
|
|
101
|
+
"network_parsing": [
|
|
102
|
+
(r"htons|htonl|ntohs|ntohl", "Network byte order conversion"),
|
|
103
|
+
(r"accept\(|listen\(|bind\(|connect\(|socket\(", "Socket operations"),
|
|
104
|
+
(r"SSL_read|SSL_write|SSL_accept|SSL_connect", "TLS operations"),
|
|
105
|
+
(r"parse.*header|parse.*packet|parse.*message|parse.*request", "Protocol parsing"),
|
|
106
|
+
(r"BIO_read|BIO_write|BIO_new", "OpenSSL BIO operations"),
|
|
107
|
+
],
|
|
108
|
+
"crypto_operations": [
|
|
109
|
+
(r"EVP_.*Init|EVP_.*Update|EVP_.*Final", "OpenSSL EVP crypto"),
|
|
110
|
+
(r"AES_|DES_|RSA_|EC_|HMAC_|SHA256_|MD5_", "Crypto algorithm usage"),
|
|
111
|
+
(r"RAND_bytes|RAND_pseudo_bytes|rand\(\)|srand\(", "Random number generation"),
|
|
112
|
+
(r"X509_|SSL_CTX_|SSL_new|SSL_free", "Certificate/TLS handling"),
|
|
113
|
+
(r"CRYPTO_memcmp|timingsafe_bcmp|constant_time", "Constant-time comparison"),
|
|
114
|
+
],
|
|
115
|
+
"string_handling": [
|
|
116
|
+
(r"strlen\(|strcmp\(|strncmp\(|strstr\(", "String comparison/search"),
|
|
117
|
+
(r"strtol\(|strtoul\(|atoi\(|atol\(", "String to integer conversion"),
|
|
118
|
+
(r"printf\(|fprintf\(|syslog\(", "Output/logging functions"),
|
|
119
|
+
],
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
_SOURCE_EXTENSIONS = (
|
|
123
|
+
".py", ".js", ".ts", ".tsx", ".jsx",
|
|
124
|
+
".rb", ".go", ".rs", ".java", ".php",
|
|
125
|
+
".c", ".cpp", ".h", ".vue", ".svelte",
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _detect_features_fast(
|
|
130
|
+
fs: FileSystemTools,
|
|
131
|
+
feature_indicators: dict[str, list[tuple[str, str]]],
|
|
132
|
+
) -> dict[str, list[str]]:
|
|
133
|
+
"""Detect features using find (once) + grep -l with small sample per category.
|
|
134
|
+
|
|
135
|
+
Collects source file paths once, then for each feature category runs
|
|
136
|
+
grep -l (stop after first 5 matches) on the file list. Fast because:
|
|
137
|
+
- Single directory walk via find
|
|
138
|
+
- Each grep reads from cached file list, stops early (-m 1 per file, -l first 5)
|
|
139
|
+
"""
|
|
140
|
+
target_dir = str(fs.jail_dir)
|
|
141
|
+
|
|
142
|
+
find_cmd = ["find", target_dir, "-type", "f", "("]
|
|
143
|
+
for i, ext in enumerate(_SOURCE_EXTENSIONS):
|
|
144
|
+
if i > 0:
|
|
145
|
+
find_cmd.append("-o")
|
|
146
|
+
find_cmd.extend(["-name", f"*{ext}"])
|
|
147
|
+
find_cmd.append(")")
|
|
148
|
+
for d in _GREP_EXCLUDE_DIRS:
|
|
149
|
+
clean = d.rstrip("*").rstrip(".")
|
|
150
|
+
find_cmd[2:2] = ["-not", "-path", f"*/{clean}/*"]
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
find_result = subprocess.run(
|
|
154
|
+
find_cmd, capture_output=True, text=True, timeout=30
|
|
155
|
+
)
|
|
156
|
+
file_list = find_result.stdout
|
|
157
|
+
except Exception as e:
|
|
158
|
+
logger.warning(f"Find failed: {e}")
|
|
159
|
+
return {}
|
|
160
|
+
|
|
161
|
+
if not file_list.strip():
|
|
162
|
+
return {}
|
|
163
|
+
|
|
164
|
+
file_count = file_list.count("\n")
|
|
165
|
+
|
|
166
|
+
if file_count > 5000:
|
|
167
|
+
# Large repo: fixed-string grep in parallel threads
|
|
168
|
+
quick_checks = {
|
|
169
|
+
"file_uploads": ["multer", "busboy", "formidable", "multipart", "request.files"],
|
|
170
|
+
"outbound_requests": ["webhook", "Webhook", "httpx", "aiohttp", "apprise"],
|
|
171
|
+
"auth_system": ["bcrypt", "argon2", "jsonwebtoken", "passport", "login_required"],
|
|
172
|
+
"template_rendering": ["dangerouslySetInnerHTML", "mark_safe", "render_template_string", "DOMPurify"],
|
|
173
|
+
"database": ["cursor.", "RawSQL", ".raw(", "execute("],
|
|
174
|
+
"graphql": ["graphql", "GraphQL", "ApolloServer"],
|
|
175
|
+
"websocket": ["WebSocket", "socket.io", "Socket.IO"],
|
|
176
|
+
"grpc": ["grpc", "protobuf"],
|
|
177
|
+
"deserialization": ["pickle.load", "yaml.load", "unserialize"],
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
def _check_feature(name_and_keywords):
|
|
181
|
+
fname, keywords = name_and_keywords
|
|
182
|
+
cmd = ["xargs", "grep", "-Fl", "--max-count=1",
|
|
183
|
+
"--binary-files=without-match"]
|
|
184
|
+
for kw in keywords:
|
|
185
|
+
cmd.extend(["-e", kw])
|
|
186
|
+
try:
|
|
187
|
+
proc = subprocess.run(
|
|
188
|
+
cmd, input=file_list, capture_output=True,
|
|
189
|
+
text=True, timeout=15,
|
|
190
|
+
)
|
|
191
|
+
matches = [l for l in proc.stdout.strip().split("\n") if l.strip()]
|
|
192
|
+
if matches:
|
|
193
|
+
readable = fname.replace("_", " ").title()
|
|
194
|
+
return fname, [f"{readable} ({len(matches)} files)"]
|
|
195
|
+
except Exception:
|
|
196
|
+
pass
|
|
197
|
+
return fname, None
|
|
198
|
+
|
|
199
|
+
result: dict[str, list[str]] = {}
|
|
200
|
+
with ThreadPoolExecutor(max_workers=4) as pool:
|
|
201
|
+
futures = {pool.submit(_check_feature, item): item
|
|
202
|
+
for item in quick_checks.items()}
|
|
203
|
+
for future in as_completed(futures):
|
|
204
|
+
fname, value = future.result()
|
|
205
|
+
if value:
|
|
206
|
+
result[fname] = value
|
|
207
|
+
return result
|
|
208
|
+
|
|
209
|
+
# Small/medium repo: full regex scan per category
|
|
210
|
+
result: dict[str, list[str]] = {}
|
|
211
|
+
for feature_name, patterns in feature_indicators.items():
|
|
212
|
+
combined = "|".join(p for p, _ in patterns)
|
|
213
|
+
cmd_parts = ["xargs", "grep", "-El", "--max-count=1",
|
|
214
|
+
"--binary-files=without-match", combined]
|
|
215
|
+
try:
|
|
216
|
+
proc = subprocess.run(
|
|
217
|
+
cmd_parts, input=file_list, capture_output=True,
|
|
218
|
+
text=True, timeout=30,
|
|
219
|
+
)
|
|
220
|
+
files = []
|
|
221
|
+
for line in proc.stdout.strip().split("\n"):
|
|
222
|
+
fp = line.strip()
|
|
223
|
+
if not fp:
|
|
224
|
+
continue
|
|
225
|
+
try:
|
|
226
|
+
rel = str(Path(fp).relative_to(target_dir))
|
|
227
|
+
except ValueError:
|
|
228
|
+
rel = fp
|
|
229
|
+
if "node_modules" not in rel and "/test" not in rel.lower():
|
|
230
|
+
files.append(rel)
|
|
231
|
+
if files:
|
|
232
|
+
readable = feature_name.replace("_", " ").title()
|
|
233
|
+
result[feature_name] = [f"{readable} ({len(files)} files)"]
|
|
234
|
+
except subprocess.TimeoutExpired:
|
|
235
|
+
pass
|
|
236
|
+
except Exception:
|
|
237
|
+
pass
|
|
238
|
+
|
|
239
|
+
return result
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def run_deterministic_recon(tools: ToolRegistry) -> dict:
|
|
243
|
+
"""Run deterministic recon and return structured results.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
dict with "summary" (str) and "features" (dict) keys.
|
|
247
|
+
The "summary" is a formatted string suitable for researcher system prompts.
|
|
248
|
+
"""
|
|
249
|
+
fs = tools.fs_tools
|
|
250
|
+
target_dir = str(fs.target_dir) if hasattr(fs, "target_dir") else "."
|
|
251
|
+
|
|
252
|
+
# 1. Framework detection
|
|
253
|
+
frameworks = detect_frameworks(fs)
|
|
254
|
+
is_c_project = False
|
|
255
|
+
|
|
256
|
+
if not frameworks:
|
|
257
|
+
# Fallback: check for common indicators
|
|
258
|
+
root_files = set()
|
|
259
|
+
try:
|
|
260
|
+
result = fs.list_dir(".")
|
|
261
|
+
entries = result.get("entries", [])
|
|
262
|
+
root_files = {e.get("name", "") for e in entries} if isinstance(entries, list) else set()
|
|
263
|
+
except Exception:
|
|
264
|
+
pass
|
|
265
|
+
|
|
266
|
+
if "server" in root_files or "app.js" in root_files:
|
|
267
|
+
frameworks = [{"framework": "express", "root": "."}]
|
|
268
|
+
elif "manage.py" in root_files:
|
|
269
|
+
frameworks = [{"framework": "django", "root": "."}]
|
|
270
|
+
elif "requirements.txt" in root_files or "pyproject.toml" in root_files:
|
|
271
|
+
frameworks = [{"framework": "flask", "root": "."}]
|
|
272
|
+
elif "package.json" in root_files:
|
|
273
|
+
frameworks = [{"framework": "nextjs", "root": "."}]
|
|
274
|
+
elif "pom.xml" in root_files or "build.gradle" in root_files or "build.gradle.kts" in root_files:
|
|
275
|
+
frameworks = [{"framework": "java", "root": "."}]
|
|
276
|
+
elif any(f.endswith(".csproj") or f.endswith(".sln") for f in root_files):
|
|
277
|
+
frameworks = [{"framework": "dotnet", "root": "."}]
|
|
278
|
+
elif "Cargo.toml" in root_files:
|
|
279
|
+
frameworks = [{"framework": "rust", "root": "."}]
|
|
280
|
+
elif "Makefile" in root_files or "CMakeLists.txt" in root_files or "configure" in root_files or "Makefile.am" in root_files:
|
|
281
|
+
# C/C++ project detection
|
|
282
|
+
c_files = fs.glob("**/*.c", ".")
|
|
283
|
+
h_files = fs.glob("**/*.h", ".")
|
|
284
|
+
c_count = len(c_files.get("matches", []))
|
|
285
|
+
h_count = len(h_files.get("matches", []))
|
|
286
|
+
if c_count > 10 or h_count > 10:
|
|
287
|
+
is_c_project = True
|
|
288
|
+
frameworks = [{"framework": "c", "root": "."}]
|
|
289
|
+
cpp_files = fs.glob("**/*.cpp", ".")
|
|
290
|
+
cpp_count = len(cpp_files.get("matches", []))
|
|
291
|
+
if cpp_count > 10:
|
|
292
|
+
is_c_project = True
|
|
293
|
+
frameworks = [{"framework": "cpp", "root": "."}]
|
|
294
|
+
|
|
295
|
+
# 2. Attack surface discovery
|
|
296
|
+
try:
|
|
297
|
+
attack_surface = discover_attack_surface(fs, nextjs_tools=tools.nextjs_tools)
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.warning(f"Attack surface discovery failed: {e}")
|
|
300
|
+
attack_surface = {"total_endpoints": 0}
|
|
301
|
+
|
|
302
|
+
# 3. Feature detection — single grep + local categorization
|
|
303
|
+
feature_indicators = _C_FEATURE_INDICATORS if is_c_project else _FEATURE_INDICATORS
|
|
304
|
+
detected_features: dict[str, list[str]] = _detect_features_fast(fs, feature_indicators)
|
|
305
|
+
|
|
306
|
+
# 4. Read key config files for auth/route info
|
|
307
|
+
auth_info = _detect_auth_config(fs)
|
|
308
|
+
route_info = _detect_routes(fs, attack_surface)
|
|
309
|
+
|
|
310
|
+
# 5. Dependencies
|
|
311
|
+
deps_info = ""
|
|
312
|
+
try:
|
|
313
|
+
result = tools.execute_tool("check_dependencies", {})
|
|
314
|
+
if isinstance(result, dict) and "dependencies" in result:
|
|
315
|
+
security_deps = [
|
|
316
|
+
d for d in result["dependencies"]
|
|
317
|
+
if any(kw in d.get("name", "").lower() for kw in
|
|
318
|
+
["auth", "jwt", "bcrypt", "csrf", "helmet", "cors", "sanitize",
|
|
319
|
+
"passport", "session", "crypto", "apprise", "webhook"])
|
|
320
|
+
]
|
|
321
|
+
if security_deps:
|
|
322
|
+
deps_info = "Security-relevant dependencies: " + ", ".join(
|
|
323
|
+
d.get("name", "") for d in security_deps[:15]
|
|
324
|
+
)
|
|
325
|
+
except Exception:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
# 6. Build structured summary
|
|
329
|
+
summary = _build_summary(frameworks, attack_surface, detected_features,
|
|
330
|
+
auth_info, route_info, deps_info)
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
"summary": summary,
|
|
334
|
+
"type": "recon_complete",
|
|
335
|
+
"frameworks": frameworks,
|
|
336
|
+
"attack_surface": attack_surface,
|
|
337
|
+
"features": detected_features,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _detect_auth_config(fs: FileSystemTools) -> str:
|
|
342
|
+
"""Detect auth configuration by reading common config files."""
|
|
343
|
+
auth_lines = []
|
|
344
|
+
|
|
345
|
+
# Check for common auth config files
|
|
346
|
+
config_files = [
|
|
347
|
+
"server/config/policies.js", # Sails.js
|
|
348
|
+
"server/config/security.js", # Sails.js
|
|
349
|
+
"config/policies.js",
|
|
350
|
+
"src/middleware.ts", # Next.js
|
|
351
|
+
"middleware.ts",
|
|
352
|
+
"app/middleware.py", # Django
|
|
353
|
+
"config/routes.rb", # Rails
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
for config_file in config_files:
|
|
357
|
+
try:
|
|
358
|
+
result = fs.read_file(config_file)
|
|
359
|
+
if "error" not in result:
|
|
360
|
+
content = result.get("content", "")
|
|
361
|
+
# Count lines to gauge complexity
|
|
362
|
+
line_count = len(content.split("\n"))
|
|
363
|
+
auth_lines.append(f"Auth config found: {config_file} ({line_count} lines)")
|
|
364
|
+
break # Found one, that's enough for the summary
|
|
365
|
+
except Exception:
|
|
366
|
+
pass
|
|
367
|
+
|
|
368
|
+
# Check for auth middleware patterns
|
|
369
|
+
try:
|
|
370
|
+
result = fs.grep(r"is.?authenticated|is.?admin|requireAuth|login_required", ".")
|
|
371
|
+
matches = result.get("matches", [])
|
|
372
|
+
if matches:
|
|
373
|
+
files = set()
|
|
374
|
+
for m in matches:
|
|
375
|
+
fp = m if isinstance(m, str) else m.get("file", "")
|
|
376
|
+
if fp and "node_modules" not in fp and "test" not in fp.lower():
|
|
377
|
+
files.add(fp)
|
|
378
|
+
if files:
|
|
379
|
+
auth_lines.append(f"Auth middleware in {len(files)} files")
|
|
380
|
+
except Exception:
|
|
381
|
+
pass
|
|
382
|
+
|
|
383
|
+
return "; ".join(auth_lines) if auth_lines else "No auth config detected"
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def _detect_routes(fs: FileSystemTools, attack_surface: dict) -> str:
|
|
387
|
+
"""Summarize route information from attack surface."""
|
|
388
|
+
parts = []
|
|
389
|
+
|
|
390
|
+
route_count = len(attack_surface.get("route_handlers", []))
|
|
391
|
+
api_count = len(attack_surface.get("api_routes", []))
|
|
392
|
+
django_count = len(attack_surface.get("django_views", []))
|
|
393
|
+
flask_count = len(attack_surface.get("flask_routes", []))
|
|
394
|
+
danger_count = len(attack_surface.get("danger_files", []))
|
|
395
|
+
|
|
396
|
+
if route_count:
|
|
397
|
+
parts.append(f"{route_count} Express/Node route handlers")
|
|
398
|
+
if api_count:
|
|
399
|
+
parts.append(f"{api_count} API routes")
|
|
400
|
+
if django_count:
|
|
401
|
+
parts.append(f"{django_count} Django views")
|
|
402
|
+
if flask_count:
|
|
403
|
+
parts.append(f"{flask_count} Flask routes")
|
|
404
|
+
if danger_count:
|
|
405
|
+
parts.append(f"{danger_count} files with dangerous patterns")
|
|
406
|
+
|
|
407
|
+
total = attack_surface.get("total_endpoints", 0)
|
|
408
|
+
parts.append(f"{total} total endpoints")
|
|
409
|
+
|
|
410
|
+
return "; ".join(parts)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _build_summary(
|
|
414
|
+
frameworks: list[dict],
|
|
415
|
+
attack_surface: dict,
|
|
416
|
+
features: dict[str, list[str]],
|
|
417
|
+
auth_info: str,
|
|
418
|
+
route_info: str,
|
|
419
|
+
deps_info: str,
|
|
420
|
+
) -> str:
|
|
421
|
+
"""Build a formatted summary string for researcher system prompts."""
|
|
422
|
+
lines = []
|
|
423
|
+
|
|
424
|
+
# Frameworks
|
|
425
|
+
if frameworks:
|
|
426
|
+
fw_names = [f"{f['framework']} at {f['root']}/" for f in frameworks]
|
|
427
|
+
lines.append(f"## Application Overview\n- Frameworks: {', '.join(fw_names)}")
|
|
428
|
+
else:
|
|
429
|
+
lines.append("## Application Overview\n- Framework: unknown")
|
|
430
|
+
|
|
431
|
+
# Routes
|
|
432
|
+
lines.append(f"- Routes: {route_info}")
|
|
433
|
+
|
|
434
|
+
# Auth
|
|
435
|
+
lines.append(f"- Auth: {auth_info}")
|
|
436
|
+
|
|
437
|
+
# Dependencies
|
|
438
|
+
if deps_info:
|
|
439
|
+
lines.append(f"- {deps_info}")
|
|
440
|
+
|
|
441
|
+
# Detected features
|
|
442
|
+
if features:
|
|
443
|
+
lines.append("\n## Detected Features")
|
|
444
|
+
for feature_name, indicators in features.items():
|
|
445
|
+
readable = feature_name.replace("_", " ").title()
|
|
446
|
+
lines.append(f"\n### {readable}")
|
|
447
|
+
for indicator in indicators:
|
|
448
|
+
lines.append(f"- {indicator}")
|
|
449
|
+
|
|
450
|
+
# Key files from attack surface
|
|
451
|
+
route_handlers = attack_surface.get("route_handlers", [])
|
|
452
|
+
if route_handlers:
|
|
453
|
+
lines.append("\n## Route Handler Files")
|
|
454
|
+
for ep in route_handlers[:20]:
|
|
455
|
+
lines.append(f"- `{ep['file']}`")
|
|
456
|
+
|
|
457
|
+
danger_files = attack_surface.get("danger_files", [])
|
|
458
|
+
if danger_files:
|
|
459
|
+
lines.append("\n## High-Signal Files (dangerous patterns)")
|
|
460
|
+
for ep in danger_files[:15]:
|
|
461
|
+
trigger = ep.get("trigger", "")
|
|
462
|
+
lines.append(f"- `{ep['file']}` — {trigger}")
|
|
463
|
+
|
|
464
|
+
return "\n".join(lines)
|