openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,307 @@
1
+ """
2
+ Endpoint analyst agent — per-entry-point security analysis.
3
+
4
+ Instead of category-based researchers that each scan the whole codebase for
5
+ one type of vulnerability, this agent receives specific endpoints and checks
6
+ them against a comprehensive security checklist.
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ from collections import defaultdict
12
+ from typing import Optional
13
+
14
+ from .hunter import HunterAgent
15
+ from .hunter_swarm import HunterSwarmAgent
16
+ from .llm import LLMClient
17
+ from .session import Session
18
+ from openhack.prompts import format_project_context
19
+ from openhack.prompts.endpoint_analyst import ENDPOINT_ANALYST_PROMPT
20
+ from openhack.tools.registry import ToolRegistry
21
+ from openhack.config import settings
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class EndpointAnalystAgent(HunterAgent):
27
+ """Analyst that audits specific endpoints against a full security checklist."""
28
+
29
+ max_iterations = settings.feature_hunter_max_iterations
30
+
31
+ DEFAULT_CATEGORIES = [
32
+ "idor", "xss", "csrf", "ssrf", "injection",
33
+ "auth_bypass", "data_exposure", "middleware_bypass",
34
+ "server_actions", "misconfiguration", "path_traversal",
35
+ "command_injection", "rce", "open_redirect",
36
+ "xxe", "insecure_deserialization", "race_condition",
37
+ "cors_misconfiguration", "business_logic", "mass_assignment",
38
+ ]
39
+
40
+ def __init__(
41
+ self,
42
+ llm: LLMClient,
43
+ tools: ToolRegistry,
44
+ session: Session,
45
+ endpoints: list[dict],
46
+ group_name: str,
47
+ **kwargs,
48
+ ):
49
+ super().__init__(
50
+ llm, tools, session,
51
+ vuln_categories=self.DEFAULT_CATEGORIES,
52
+ group_name=group_name,
53
+ framework=None,
54
+ **kwargs,
55
+ )
56
+ self.endpoints = endpoints
57
+ self.name = f"analyst:{group_name}"
58
+ self.description = f"Endpoint analyst ({group_name})"
59
+
60
+ def get_system_prompt(self, context: dict) -> str:
61
+ recon_context = context.get("recon", {}).get("summary", "No recon data available")
62
+ project_context = context.get("project_context", {})
63
+ project_context_str = format_project_context(project_context)
64
+
65
+ endpoint_lines = []
66
+ for ep in self.endpoints:
67
+ method = ep.get("method", "ALL")
68
+ path = ep.get("path", ep.get("file", "unknown"))
69
+ file = ep.get("file", "unknown")
70
+ line = ep.get("line")
71
+ auth = ep.get("auth")
72
+ loc = f"`{file}`"
73
+ if line:
74
+ loc += f" (line {line})"
75
+ auth_str = f" [auth: {auth}]" if auth else ""
76
+ endpoint_lines.append(f"- **{method} {path}** → {loc}{auth_str}")
77
+
78
+ endpoint_assignments = "\n".join(endpoint_lines)
79
+
80
+ return ENDPOINT_ANALYST_PROMPT.format(
81
+ recon_context=recon_context,
82
+ project_context=project_context_str,
83
+ endpoint_assignments=endpoint_assignments,
84
+ )
85
+
86
+
87
+ def group_entry_points(entry_points: list[dict], max_groups: int = 12) -> dict[str, list[dict]]:
88
+ """Group entry points by directory for analyst assignment.
89
+
90
+ Groups endpoints that share a parent directory (e.g., all /api/auth/* endpoints
91
+ go to the same analyst). Merges small groups to stay within max_groups.
92
+ """
93
+ by_dir: dict[str, list[dict]] = defaultdict(list)
94
+
95
+ for ep in entry_points:
96
+ file_path = ep.get("file", "")
97
+ parts = file_path.replace("\\", "/").split("/")
98
+
99
+ # Find a meaningful grouping key — use the first 3-4 path segments
100
+ # For "src/app/api/auth/login/route.ts" → "api/auth"
101
+ # For "src/app/api/orders/[id]/route.ts" → "api/orders"
102
+ api_idx = None
103
+ for i, part in enumerate(parts):
104
+ if part in ("api", "routes", "controllers", "views", "handlers"):
105
+ api_idx = i
106
+ break
107
+
108
+ if api_idx is not None and api_idx + 1 < len(parts):
109
+ # Group by the first path segment after "api/"
110
+ group_key = parts[api_idx + 1]
111
+ # Skip dynamic segments like [id]
112
+ if group_key.startswith("[") or group_key.startswith(":"):
113
+ group_key = parts[api_idx] if api_idx > 0 else "root"
114
+ elif len(parts) >= 2:
115
+ group_key = parts[-2] if parts[-1].startswith("route") else parts[-1].split(".")[0]
116
+ else:
117
+ group_key = "root"
118
+
119
+ by_dir[group_key].append(ep)
120
+
121
+ # If we have too many groups, merge the smallest ones
122
+ if len(by_dir) > max_groups:
123
+ groups_sorted = sorted(by_dir.items(), key=lambda x: len(x[1]))
124
+ merged: dict[str, list[dict]] = {}
125
+ overflow: list[dict] = []
126
+
127
+ for name, endpoints in groups_sorted:
128
+ if len(merged) < max_groups - 1:
129
+ merged[name] = endpoints
130
+ else:
131
+ overflow.extend(endpoints)
132
+
133
+ if overflow:
134
+ merged["misc"] = overflow
135
+ by_dir = merged
136
+
137
+ return dict(by_dir)
138
+
139
+
140
+ def _find_cross_cutting_files(tools: ToolRegistry) -> list[dict]:
141
+ """Find middleware, auth helpers, and components that render user input."""
142
+ cross_cutting = []
143
+ fs = tools.fs_tools
144
+
145
+ patterns = [
146
+ ("middleware.ts", "Middleware"),
147
+ ("middleware.js", "Middleware"),
148
+ ("src/middleware.ts", "Middleware"),
149
+ ("src/middleware.js", "Middleware"),
150
+ ]
151
+ for path, label in patterns:
152
+ result = fs.read_file(path)
153
+ if "error" not in result:
154
+ cross_cutting.append({
155
+ "path": f"[{label}] {path}",
156
+ "method": "MIDDLEWARE",
157
+ "file": path,
158
+ "line": None,
159
+ "auth": None,
160
+ })
161
+
162
+ for pattern in ["**/lib/auth.*", "**/utils/auth.*", "**/helpers/auth.*"]:
163
+ result = fs.glob(pattern)
164
+ for match in result.get("matches", []):
165
+ if any(skip in match for skip in [".deepsec/", "node_modules/", ".next/"]):
166
+ continue
167
+ cross_cutting.append({
168
+ "path": f"[Auth Helper] {match}",
169
+ "method": "HELPER",
170
+ "file": match,
171
+ "line": None,
172
+ "auth": None,
173
+ })
174
+
175
+ for pattern in ["**/*.tsx", "**/*.jsx"]:
176
+ result = fs.glob(pattern)
177
+ for match in result.get("matches", []):
178
+ if any(skip in match for skip in ["node_modules/", ".next/", "test/"]):
179
+ continue
180
+ content = fs.read_file(match).get("content", "")
181
+ if "dangerouslySetInnerHTML" in content or "innerHTML" in content:
182
+ cross_cutting.append({
183
+ "path": f"[Component] {match}",
184
+ "method": "RENDER",
185
+ "file": match,
186
+ "line": None,
187
+ "auth": None,
188
+ })
189
+
190
+ return cross_cutting
191
+
192
+
193
+ async def run_endpoint_analysts(
194
+ entry_points: list[dict],
195
+ llm_template: LLMClient,
196
+ tools: ToolRegistry,
197
+ session: Session,
198
+ context: dict,
199
+ max_concurrent: int = 3,
200
+ ) -> dict:
201
+ """Spawn per-endpoint-group analysts and collect findings."""
202
+ groups = group_entry_points(entry_points)
203
+
204
+ cross_cutting = _find_cross_cutting_files(tools)
205
+ if cross_cutting:
206
+ groups["middleware_and_shared"] = cross_cutting
207
+ logger.info(f"Added cross-cutting group with {len(cross_cutting)} files")
208
+
209
+ if not groups:
210
+ return {
211
+ "findings": [],
212
+ "files_analyzed": [],
213
+ "total_cost": 0.0,
214
+ "total_tokens": 0,
215
+ "total_input_tokens": 0,
216
+ "total_output_tokens": 0,
217
+ }
218
+
219
+ logger.info(
220
+ f"Endpoint analyst groups ({len(groups)}): "
221
+ + ", ".join(f"{name}({len(eps)})" for name, eps in groups.items())
222
+ )
223
+
224
+ semaphore = asyncio.Semaphore(max_concurrent)
225
+ total_cost = 0.0
226
+ total_tokens = 0
227
+ total_input_tokens = 0
228
+ total_output_tokens = 0
229
+
230
+ async def run_analyst(group_name: str, endpoints: list[dict]):
231
+ async with semaphore:
232
+ model = (
233
+ settings.feature_hunter_model_id
234
+ or settings.hunter_model_id
235
+ or llm_template.model
236
+ )
237
+ llm = LLMClient(
238
+ model=model,
239
+ temperature=0.0,
240
+ max_tokens=8192,
241
+ provider=llm_template.provider,
242
+ prompt_cache_key=llm_template.prompt_cache_key,
243
+ )
244
+ analyst = EndpointAnalystAgent(
245
+ llm, tools, session,
246
+ endpoints=endpoints,
247
+ group_name=group_name,
248
+ )
249
+
250
+ # Build task description listing the endpoints
251
+ ep_summary = ", ".join(
252
+ f"{ep.get('method', 'ALL')} {ep.get('path', '?')}"
253
+ for ep in endpoints[:5]
254
+ )
255
+ if len(endpoints) > 5:
256
+ ep_summary += f" (+{len(endpoints) - 5} more)"
257
+
258
+ task_text = (
259
+ f"Analyze these {len(endpoints)} endpoint(s) for security vulnerabilities: "
260
+ f"{ep_summary}. "
261
+ f"Read each handler file, trace dependencies, and check against the full "
262
+ f"security checklist. Report every real vulnerability you find."
263
+ )
264
+
265
+ try:
266
+ result = await analyst.run(task_text, context=context)
267
+ return group_name, result, llm
268
+ except Exception as e:
269
+ logger.error(f"Endpoint analyst {group_name} failed: {e}")
270
+ return group_name, {"findings": [], "files_analyzed": []}, llm
271
+
272
+ tasks = [
273
+ asyncio.create_task(run_analyst(name, eps))
274
+ for name, eps in groups.items()
275
+ ]
276
+
277
+ try:
278
+ results = await asyncio.gather(*tasks)
279
+ except asyncio.CancelledError:
280
+ for t in tasks:
281
+ t.cancel()
282
+ await asyncio.gather(*tasks, return_exceptions=True)
283
+ raise
284
+
285
+ all_findings = []
286
+ all_files = set()
287
+
288
+ for group_name, result, llm_client in results:
289
+ findings = result.get("findings", [])
290
+ all_findings.extend(findings)
291
+ all_files.update(result.get("files_analyzed", []))
292
+ total_cost += llm_client.total_cost
293
+ total_tokens += llm_client.total_tokens
294
+ total_input_tokens += llm_client.total_input_tokens
295
+ total_output_tokens += llm_client.total_output_tokens
296
+ logger.info(f"Analyst {group_name}: {len(findings)} findings")
297
+
298
+ all_findings = HunterSwarmAgent._deduplicate_findings(all_findings)
299
+
300
+ return {
301
+ "findings": all_findings,
302
+ "files_analyzed": sorted(all_files),
303
+ "total_cost": total_cost,
304
+ "total_tokens": total_tokens,
305
+ "total_input_tokens": total_input_tokens,
306
+ "total_output_tokens": total_output_tokens,
307
+ }
@@ -0,0 +1,93 @@
1
+ """
2
+ Feature Deep Dive hunter agent.
3
+
4
+ Works like a human security researcher: reads the codebase, decides what's
5
+ interesting, goes deep on the riskiest features. No pre-assigned feature list —
6
+ the agent reads the route map, picks its own targets, and audits them.
7
+ """
8
+
9
+ import logging
10
+ from typing import Optional
11
+
12
+ from .hunter import HunterAgent
13
+ from .llm import LLMClient
14
+ from .session import Session
15
+ from openhack.prompts import format_project_context
16
+ from openhack.prompts.feature_hunter import FEATURE_HUNTER_PROMPT
17
+ from openhack.tools.registry import ToolRegistry
18
+ from openhack.config import settings
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class FeatureHunterAgent(HunterAgent):
24
+ """Security researcher agent that picks its own targets and goes deep."""
25
+
26
+ max_iterations = settings.feature_hunter_max_iterations
27
+
28
+ # Check all categories — not limited to a subset
29
+ DEFAULT_CATEGORIES = [
30
+ "idor", "xss", "csrf", "ssrf", "injection",
31
+ "auth_bypass", "data_exposure", "middleware_bypass",
32
+ "server_actions", "misconfiguration", "path_traversal",
33
+ "command_injection", "rce", "open_redirect",
34
+ ]
35
+
36
+ def __init__(
37
+ self,
38
+ llm: LLMClient,
39
+ tools: ToolRegistry,
40
+ session: Session,
41
+ feature: Optional[dict] = None,
42
+ hunter_id: int = 0,
43
+ **kwargs,
44
+ ):
45
+ name = f"feature:{feature['name']}" if feature else f"researcher:{hunter_id}"
46
+ super().__init__(
47
+ llm, tools, session,
48
+ vuln_categories=self.DEFAULT_CATEGORIES,
49
+ group_name=name,
50
+ framework=None,
51
+ **kwargs,
52
+ )
53
+ self.feature = feature
54
+ self.hunter_id = hunter_id
55
+
56
+ if feature:
57
+ self.name = f"feature_hunter:{feature['name']}"
58
+ self.description = f"Deep dive on {feature['name']}"
59
+ else:
60
+ self.name = f"researcher:{hunter_id}"
61
+ self.description = f"Security researcher #{hunter_id}"
62
+
63
+ def get_system_prompt(self, context: dict) -> str:
64
+ recon_context = context.get("recon", {}).get("summary", "No recon data available")
65
+ project_context = context.get("project_context", {})
66
+ project_context_str = format_project_context(project_context)
67
+
68
+ if self.feature:
69
+ # Legacy mode: pre-assigned feature
70
+ entry_files = self.feature.get("entry_files", [])
71
+ if isinstance(entry_files, list):
72
+ files_str = "\n".join(f"- `{f}`" for f in entry_files)
73
+ else:
74
+ files_str = str(entry_files)
75
+
76
+ feature_section = (
77
+ f"\n## Your Assigned Target Feature\n\n"
78
+ f"**Feature**: {self.feature.get('name', 'unknown')}\n"
79
+ f"**Description**: {self.feature.get('description', '')}\n"
80
+ f"**Key Files**: \n{files_str}\n"
81
+ f"**Why High-Risk**: {self.feature.get('risk_reason', '')}\n"
82
+ )
83
+
84
+ return FEATURE_HUNTER_PROMPT.format(
85
+ recon_context=feature_section + "\n\n## Full Application Context\n\n" + recon_context,
86
+ project_context=project_context_str,
87
+ )
88
+ else:
89
+ # New mode: agent picks its own targets
90
+ return FEATURE_HUNTER_PROMPT.format(
91
+ recon_context=recon_context,
92
+ project_context=project_context_str,
93
+ )