openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,256 @@
1
+ """
2
+ Browser verifier swarm agent.
3
+
4
+ Spawns one browser verifier per confirmed finding and runs them concurrently
5
+ against the live sandboxed application. All verifiers share the same Playwright
6
+ browser instance but get isolated browser contexts.
7
+ """
8
+
9
+ import asyncio
10
+ import logging
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ from .browser_verifier import BrowserVerifierAgent
15
+ from .llm import LLMClient
16
+ from .session import Session
17
+ from ..sandbox.orchestrator import SandboxOrchestrator, SandboxConfig
18
+ from ..browser.runner import BrowserRunner
19
+ from openhack.tools.registry import ToolRegistry
20
+ from openhack.config import settings
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class BrowserVerifierSwarmAgent:
26
+ """Runs browser-based verification for all confirmed findings concurrently."""
27
+
28
+ name = "browser_verifier_swarm"
29
+ description = "Browser exploit verification swarm"
30
+
31
+ def __init__(
32
+ self,
33
+ llm: LLMClient,
34
+ tools: ToolRegistry,
35
+ session: Session,
36
+ sandbox_config: Optional[SandboxConfig] = None,
37
+ ):
38
+ self.llm = llm
39
+ self.tools = tools
40
+ self.session = session
41
+ self.sandbox_config = sandbox_config
42
+ self.total_cost: float = 0.0
43
+ self.total_tokens: int = 0
44
+ self.total_input_tokens: int = 0
45
+ self.total_output_tokens: int = 0
46
+
47
+ def _create_llm_for_verifier(self) -> LLMClient:
48
+ model = settings.browser_verifier_model_id or self.llm.model
49
+ return LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
50
+
51
+ async def run(self, task: str, context: Optional[dict] = None) -> dict:
52
+ context = context or {}
53
+ findings = context.get("confirmed_findings", [])
54
+
55
+ if not findings:
56
+ return {
57
+ "raw_output": "No findings to verify in browser",
58
+ "exploitable": [],
59
+ "not_exploitable": [],
60
+ "evidence_dir": "",
61
+ "type": "browser_verification_complete",
62
+ }
63
+
64
+ self.session.add_trace(
65
+ agent=self.name, event_type="swarm_start",
66
+ content={"findings_count": len(findings)},
67
+ )
68
+
69
+ session_id = getattr(self.session, "trace_id", None) or getattr(self.session, "id", "default")
70
+ evidence_dir = Path.home() / ".openhack" / "evidence" / session_id
71
+ evidence_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ target_dir = self.tools.target_dir
74
+ orchestrator = SandboxOrchestrator(target_dir, self.sandbox_config)
75
+
76
+ self.session.add_trace(
77
+ agent=self.name, event_type="sandbox_starting",
78
+ content="Building and starting sandbox containers…",
79
+ )
80
+
81
+ try:
82
+ sandbox_status = await orchestrator.start()
83
+ sandbox_url = sandbox_status.base_url
84
+
85
+ self.session.add_trace(
86
+ agent=self.name, event_type="sandbox_ready",
87
+ content={"base_url": sandbox_url, "host_port": sandbox_status.host_port},
88
+ )
89
+
90
+ async with BrowserRunner(
91
+ base_url=sandbox_url,
92
+ evidence_dir=evidence_dir,
93
+ headless=settings.browser_headless,
94
+ timeout=settings.browser_timeout_ms,
95
+ ) as runner:
96
+ semaphore = asyncio.Semaphore(settings.max_concurrent_validators)
97
+ FAIL_FAST_THRESHOLD = 3
98
+ abort_event = asyncio.Event()
99
+ error_streak: list[str] = []
100
+ fatal_error: Optional[str] = None
101
+
102
+ async def run_verifier(idx: int, finding: dict) -> tuple[int, dict, LLMClient]:
103
+ nonlocal fatal_error
104
+ verifier_name = f"browser_verifier:finding_{idx}"
105
+ self.session.add_trace(
106
+ agent=verifier_name, event_type="queued",
107
+ content={"finding_index": idx, "title": finding.get("title", "")},
108
+ )
109
+
110
+ if abort_event.is_set():
111
+ self.session.add_trace(
112
+ agent=verifier_name, event_type="skipped",
113
+ content="Skipped — swarm aborted due to repeated failures",
114
+ )
115
+ llm = self._create_llm_for_verifier()
116
+ return idx, {
117
+ "browser_result": {
118
+ "finding_index": idx, "status": "skipped",
119
+ "confidence": "none", "evidence": "Aborted",
120
+ "attempts_made": 0, "reason": fatal_error or "Aborted",
121
+ },
122
+ "type": "browser_verification_skipped",
123
+ }, llm
124
+
125
+ async with semaphore:
126
+ if abort_event.is_set():
127
+ self.session.add_trace(
128
+ agent=verifier_name, event_type="skipped",
129
+ content="Skipped — swarm aborted due to repeated failures",
130
+ )
131
+ llm = self._create_llm_for_verifier()
132
+ return idx, {
133
+ "browser_result": {
134
+ "finding_index": idx, "status": "skipped",
135
+ "confidence": "none", "evidence": "Aborted",
136
+ "attempts_made": 0, "reason": fatal_error or "Aborted",
137
+ },
138
+ "type": "browser_verification_skipped",
139
+ }, llm
140
+
141
+ llm = self._create_llm_for_verifier()
142
+ verifier = BrowserVerifierAgent(
143
+ llm, self.tools, self.session,
144
+ sandbox_url=sandbox_url,
145
+ browser_runner=runner,
146
+ sandbox_orchestrator=orchestrator,
147
+ finding_index=idx,
148
+ max_attempts=settings.browser_max_exploit_attempts,
149
+ )
150
+ try:
151
+ sub_context = {
152
+ "finding": finding,
153
+ "project_context": context.get("project_context", {}),
154
+ }
155
+ result = await verifier.run(
156
+ "Verify this vulnerability by exploiting it in the browser.",
157
+ context=sub_context,
158
+ )
159
+ error_streak.clear()
160
+ return idx, result, llm
161
+ except Exception as e:
162
+ error_msg = str(e)
163
+ logger.error(f"Browser verifier for finding {idx} failed: {e}")
164
+ self.session.add_trace(
165
+ agent=verifier_name, event_type="error",
166
+ content=f"Verifier crashed: {e}",
167
+ )
168
+ error_streak.append(error_msg)
169
+ if (
170
+ len(error_streak) >= FAIL_FAST_THRESHOLD
171
+ and len(set(error_streak[-FAIL_FAST_THRESHOLD:])) == 1
172
+ ):
173
+ fatal_error = error_msg
174
+ abort_event.set()
175
+ self.session.add_trace(
176
+ agent=self.name, event_type="swarm_aborted",
177
+ content=(
178
+ f"Aborting: {FAIL_FAST_THRESHOLD} consecutive "
179
+ f"verifiers failed with: {error_msg}"
180
+ ),
181
+ )
182
+ return idx, {
183
+ "browser_result": {
184
+ "finding_index": idx,
185
+ "status": "not_exploitable",
186
+ "confidence": "low",
187
+ "evidence": f"Verifier crashed: {error_msg}",
188
+ "attempts_made": 0,
189
+ "reason": "Internal error",
190
+ },
191
+ "type": "browser_verification_failed",
192
+ }, llm
193
+
194
+ tasks = [
195
+ asyncio.create_task(run_verifier(idx, finding))
196
+ for idx, finding in enumerate(findings)
197
+ ]
198
+
199
+ try:
200
+ results = await asyncio.gather(*tasks)
201
+ except asyncio.CancelledError:
202
+ for t in tasks:
203
+ t.cancel()
204
+ await asyncio.gather(*tasks, return_exceptions=True)
205
+ raise
206
+
207
+ exploitable = []
208
+ not_exploitable = []
209
+
210
+ for idx, result, llm_client in results:
211
+ self.total_cost += llm_client.total_cost
212
+ self.total_tokens += llm_client.total_tokens
213
+ self.total_input_tokens += llm_client.total_input_tokens
214
+ self.total_output_tokens += llm_client.total_output_tokens
215
+
216
+ browser_result = result.get("browser_result") if result else None
217
+ if not browser_result:
218
+ not_exploitable.append({"finding_index": idx, "status": "error", "confidence": "low"})
219
+ continue
220
+ if browser_result.get("status") == "exploitable":
221
+ exploitable.append(browser_result)
222
+ else:
223
+ not_exploitable.append(browser_result)
224
+
225
+ self.session.add_trace(
226
+ agent=self.name, event_type="swarm_complete",
227
+ content={
228
+ "total_exploitable": len(exploitable),
229
+ "total_not_exploitable": len(not_exploitable),
230
+ "total_cost": self.total_cost,
231
+ "total_tokens": self.total_tokens,
232
+ "evidence_dir": str(evidence_dir),
233
+ "fatal_error": fatal_error,
234
+ },
235
+ )
236
+
237
+ result_dict = {
238
+ "raw_output": (
239
+ f"Browser verification complete: {len(exploitable)} exploitable, "
240
+ f"{len(not_exploitable)} not exploitable out of {len(findings)} findings"
241
+ ),
242
+ "exploitable": exploitable,
243
+ "not_exploitable": not_exploitable,
244
+ "evidence_dir": str(evidence_dir),
245
+ "type": "browser_verification_complete",
246
+ }
247
+ if fatal_error:
248
+ result_dict["fatal_error"] = fatal_error
249
+ return result_dict
250
+
251
+ finally:
252
+ self.session.add_trace(
253
+ agent=self.name, event_type="sandbox_teardown",
254
+ content="Stopping sandbox containers",
255
+ )
256
+ await orchestrator.stop()
@@ -0,0 +1,89 @@
1
+ """
2
+ Intermediate state checkpointing for the scan pipeline.
3
+
4
+ Saves pipeline state after each major step so that a failed scan
5
+ can be resumed without re-running expensive earlier stages.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ import shutil
11
+ import time
12
+ from pathlib import Path
13
+ from typing import Optional
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+ CHECKPOINT_BASE_DIR = Path.home() / ".openhack" / "checkpoints"
18
+
19
+ STEP_ORDER = ["recon", "hunter", "static_validation"]
20
+
21
+
22
+ class CheckpointManager:
23
+ """Manages checkpoint files for a single scan session."""
24
+
25
+ def __init__(self, session_id: str, base_dir: Optional[Path] = None):
26
+ self.session_id = session_id
27
+ self.checkpoint_dir = (base_dir or CHECKPOINT_BASE_DIR) / session_id
28
+
29
+ def save(self, step_name: str, data: dict) -> None:
30
+ """Save a checkpoint after a pipeline step completes."""
31
+ self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
32
+ checkpoint = {
33
+ "step": step_name,
34
+ "session_id": self.session_id,
35
+ "timestamp": time.time(),
36
+ "data": data,
37
+ }
38
+ path = self.checkpoint_dir / f"{step_name}.json"
39
+ path.write_text(json.dumps(checkpoint, indent=2, default=str))
40
+ print(f" Checkpoint saved: {step_name} — resume with: openhack --resume {self.session_id}")
41
+ logger.info(f"Checkpoint saved: {step_name} -> {path}")
42
+
43
+ def load(self, step_name: str) -> Optional[dict]:
44
+ """Load a checkpoint for a given step. Returns None if not found."""
45
+ path = self.checkpoint_dir / f"{step_name}.json"
46
+ if not path.exists():
47
+ return None
48
+ try:
49
+ return json.loads(path.read_text())
50
+ except (json.JSONDecodeError, OSError) as e:
51
+ logger.warning(f"Failed to load checkpoint {path}: {e}")
52
+ return None
53
+
54
+ def get_latest_step(self) -> Optional[str]:
55
+ """Find the most advanced completed step by checking which checkpoint files exist."""
56
+ latest = None
57
+ for step in STEP_ORDER:
58
+ if (self.checkpoint_dir / f"{step}.json").exists():
59
+ latest = step
60
+ return latest
61
+
62
+ def cleanup(self) -> None:
63
+ """Remove all checkpoints for this session (called on successful completion)."""
64
+ if self.checkpoint_dir.exists():
65
+ shutil.rmtree(self.checkpoint_dir, ignore_errors=True)
66
+ logger.info(f"Checkpoints cleaned up for session {self.session_id}")
67
+
68
+ @classmethod
69
+ def list_resumable_sessions(cls, base_dir: Optional[Path] = None) -> list[dict]:
70
+ """List all sessions that have checkpoints available for resume."""
71
+ root = base_dir or CHECKPOINT_BASE_DIR
72
+ sessions = []
73
+ if not root.exists():
74
+ return sessions
75
+ for session_dir in sorted(root.iterdir()):
76
+ if session_dir.is_dir():
77
+ mgr = cls(session_dir.name, base_dir=root)
78
+ latest = mgr.get_latest_step()
79
+ if latest:
80
+ # Read timestamp from the latest checkpoint
81
+ checkpoint = mgr.load(latest)
82
+ ts = checkpoint.get("timestamp") if checkpoint else None
83
+ sessions.append({
84
+ "session_id": session_dir.name,
85
+ "latest_step": latest,
86
+ "timestamp": ts,
87
+ "checkpoint_dir": str(session_dir),
88
+ })
89
+ return sessions