PyPI - openhack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

openhack/__init__.py +2 -0
openhack/__main__.py +225 -0
openhack/agents/__init__.py +30 -0
openhack/agents/base.py +230 -0
openhack/agents/browser_verifier.py +679 -0
openhack/agents/browser_verifier_swarm.py +256 -0
openhack/agents/checkpoint.py +89 -0
openhack/agents/context_manager.py +356 -0
openhack/agents/coordinator.py +1105 -0
openhack/agents/endpoint_analyst.py +307 -0
openhack/agents/feature_hunter.py +93 -0
openhack/agents/hunter.py +481 -0
openhack/agents/hunter_swarm.py +385 -0
openhack/agents/llm.py +334 -0
openhack/agents/recon.py +19 -0
openhack/agents/sandbox_verifier.py +396 -0
openhack/agents/sandbox_verifier_swarm.py +250 -0
openhack/agents/session.py +286 -0
openhack/agents/validator.py +217 -0
openhack/agents/validator_swarm.py +106 -0
openhack/auth.py +175 -0
openhack/browser/__init__.py +12 -0
openhack/browser/runner.py +385 -0
openhack/categories.py +130 -0
openhack/config.py +201 -0
openhack/deterministic_recon.py +464 -0
openhack/entry_points.py +745 -0
openhack/framework_classifier.py +515 -0
openhack/framework_detection.py +269 -0
openhack/headless_scan.py +179 -0
openhack/prompts/__init__.py +108 -0
openhack/prompts/browser_verifier.py +171 -0
openhack/prompts/coordinator.py +31 -0
openhack/prompts/django/__init__.py +32 -0
openhack/prompts/django/auth_bypass.py +76 -0
openhack/prompts/django/csrf.py +62 -0
openhack/prompts/django/data_exposure.py +67 -0
openhack/prompts/django/idor.py +74 -0
openhack/prompts/django/injection.py +67 -0
openhack/prompts/django/misconfiguration.py +70 -0
openhack/prompts/django/ssrf.py +64 -0
openhack/prompts/endpoint_analyst.py +122 -0
openhack/prompts/express/__init__.py +29 -0
openhack/prompts/express/auth_bypass.py +71 -0
openhack/prompts/express/data_exposure.py +77 -0
openhack/prompts/express/idor.py +69 -0
openhack/prompts/express/injection.py +75 -0
openhack/prompts/express/misconfiguration.py +72 -0
openhack/prompts/express/ssrf.py +63 -0
openhack/prompts/feature_hunter.py +140 -0
openhack/prompts/flask/__init__.py +29 -0
openhack/prompts/flask/auth_bypass.py +86 -0
openhack/prompts/flask/data_exposure.py +78 -0
openhack/prompts/flask/idor.py +83 -0
openhack/prompts/flask/injection.py +77 -0
openhack/prompts/flask/misconfiguration.py +73 -0
openhack/prompts/flask/ssrf.py +65 -0
openhack/prompts/hunter.py +362 -0
openhack/prompts/hunter_continuation_loop.py +12 -0
openhack/prompts/hunter_continuation_no_findings.py +19 -0
openhack/prompts/hunter_continuation_no_progress.py +22 -0
openhack/prompts/hunter_tool_instructions.py +55 -0
openhack/prompts/nextjs/__init__.py +42 -0
openhack/prompts/nextjs/auth_bypass.py +80 -0
openhack/prompts/nextjs/csrf.py +71 -0
openhack/prompts/nextjs/data_exposure.py +88 -0
openhack/prompts/nextjs/idor.py +64 -0
openhack/prompts/nextjs/injection.py +65 -0
openhack/prompts/nextjs/middleware_bypass.py +75 -0
openhack/prompts/nextjs/misconfiguration.py +92 -0
openhack/prompts/nextjs/server_actions.py +97 -0
openhack/prompts/nextjs/ssrf.py +66 -0
openhack/prompts/nextjs/xss.py +69 -0
openhack/prompts/pr_analysis_system.py +80 -0
openhack/prompts/pr_analysis_user.py +11 -0
openhack/prompts/project_context.py +89 -0
openhack/prompts/recon.py +199 -0
openhack/prompts/reporter.py +88 -0
openhack/prompts/researchers.py +434 -0
openhack/prompts/sandbox_verifier.py +128 -0
openhack/prompts/supabase/__init__.py +39 -0
openhack/prompts/supabase/auth_tokens.py +131 -0
openhack/prompts/supabase/edge_functions.py +150 -0
openhack/prompts/supabase/graphql.py +102 -0
openhack/prompts/supabase/postgrest.py +99 -0
openhack/prompts/supabase/realtime.py +93 -0
openhack/prompts/supabase/rls.py +110 -0
openhack/prompts/supabase/rpc_functions.py +127 -0
openhack/prompts/supabase/storage.py +110 -0
openhack/prompts/supabase/tenant_isolation.py +118 -0
openhack/prompts/validator.py +319 -0
openhack/prompts/validator_continuation_incomplete.py +12 -0
openhack/prompts/validator_tool_instructions.py +29 -0
openhack/quality.py +231 -0
openhack/sandbox/__init__.py +12 -0
openhack/sandbox/orchestrator.py +517 -0
openhack/sandbox/runner.py +177 -0
openhack/scan_session.py +245 -0
openhack/setup.py +452 -0
openhack/static_validator.py +612 -0
openhack/tools/__init__.py +1 -0
openhack/tools/ast_tools.py +307 -0
openhack/tools/coverage.py +1078 -0
openhack/tools/filesystem.py +404 -0
openhack/tools/nextjs.py +258 -0
openhack/tools/registry.py +52 -0
openhack/tui.py +3450 -0
openhack/updates.py +170 -0
openhack-0.1.0.dist-info/METADATA +189 -0
openhack-0.1.0.dist-info/RECORD +113 -0
openhack-0.1.0.dist-info/WHEEL +4 -0
openhack-0.1.0.dist-info/entry_points.txt +2 -0
openhack-0.1.0.dist-info/licenses/LICENSE +661 -0

openhack/agents/browser_verifier_swarm.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""
+Browser verifier swarm agent.
+Spawns one browser verifier per confirmed finding and runs them concurrently
+against the live sandboxed application. All verifiers share the same Playwright
+browser instance but get isolated browser contexts.
+"""
+import asyncio
+import logging
+from pathlib import Path
+from typing import Optional
+from .browser_verifier import BrowserVerifierAgent
+from .llm import LLMClient
+from .session import Session
+from ..sandbox.orchestrator import SandboxOrchestrator, SandboxConfig
+from ..browser.runner import BrowserRunner
+from openhack.tools.registry import ToolRegistry
+from openhack.config import settings
+logger = logging.getLogger(__name__)
+class BrowserVerifierSwarmAgent:
+    """Runs browser-based verification for all confirmed findings concurrently."""
+    name = "browser_verifier_swarm"
+    description = "Browser exploit verification swarm"
+    def __init__(
+        self,
+        llm: LLMClient,
+        tools: ToolRegistry,
+        session: Session,
+        sandbox_config: Optional[SandboxConfig] = None,
+    ):
+        self.llm = llm
+        self.tools = tools
+        self.session = session
+        self.sandbox_config = sandbox_config
+        self.total_cost: float = 0.0
+        self.total_tokens: int = 0
+        self.total_input_tokens: int = 0
+        self.total_output_tokens: int = 0
+    def _create_llm_for_verifier(self) -> LLMClient:
+        model = settings.browser_verifier_model_id or self.llm.model
+        return LLMClient(model=model, temperature=0.0, max_tokens=8192, provider=self.llm.provider, prompt_cache_key=self.llm.prompt_cache_key)
+    async def run(self, task: str, context: Optional[dict] = None) -> dict:
+        context = context or {}
+        findings = context.get("confirmed_findings", [])
+        if not findings:
+            return {
+                "raw_output": "No findings to verify in browser",
+                "exploitable": [],
+                "not_exploitable": [],
+                "evidence_dir": "",
+                "type": "browser_verification_complete",
+            }
+        self.session.add_trace(
+            agent=self.name, event_type="swarm_start",
+            content={"findings_count": len(findings)},
+        )
+        session_id = getattr(self.session, "trace_id", None) or getattr(self.session, "id", "default")
+        evidence_dir = Path.home() / ".openhack" / "evidence" / session_id
+        evidence_dir.mkdir(parents=True, exist_ok=True)
+        target_dir = self.tools.target_dir
+        orchestrator = SandboxOrchestrator(target_dir, self.sandbox_config)
+        self.session.add_trace(
+            agent=self.name, event_type="sandbox_starting",
+            content="Building and starting sandbox containers…",
+        )
+        try:
+            sandbox_status = await orchestrator.start()
+            sandbox_url = sandbox_status.base_url
+            self.session.add_trace(
+                agent=self.name, event_type="sandbox_ready",
+                content={"base_url": sandbox_url, "host_port": sandbox_status.host_port},
+            )
+            async with BrowserRunner(
+                base_url=sandbox_url,
+                evidence_dir=evidence_dir,
+                headless=settings.browser_headless,
+                timeout=settings.browser_timeout_ms,
+            ) as runner:
+                semaphore = asyncio.Semaphore(settings.max_concurrent_validators)
+                FAIL_FAST_THRESHOLD = 3
+                abort_event = asyncio.Event()
+                error_streak: list[str] = []
+                fatal_error: Optional[str] = None
+                async def run_verifier(idx: int, finding: dict) -> tuple[int, dict, LLMClient]:
+                    nonlocal fatal_error
+                    verifier_name = f"browser_verifier:finding_{idx}"
+                    self.session.add_trace(
+                        agent=verifier_name, event_type="queued",
+                        content={"finding_index": idx, "title": finding.get("title", "")},
+                    )
+                    if abort_event.is_set():
+                        self.session.add_trace(
+                            agent=verifier_name, event_type="skipped",
+                            content="Skipped — swarm aborted due to repeated failures",
+                        )
+                        llm = self._create_llm_for_verifier()
+                        return idx, {
+                            "browser_result": {
+                                "finding_index": idx, "status": "skipped",
+                                "confidence": "none", "evidence": "Aborted",
+                                "attempts_made": 0, "reason": fatal_error or "Aborted",
+                            },
+                            "type": "browser_verification_skipped",
+                        }, llm
+                    async with semaphore:
+                        if abort_event.is_set():
+                            self.session.add_trace(
+                                agent=verifier_name, event_type="skipped",
+                                content="Skipped — swarm aborted due to repeated failures",
+                            )
+                            llm = self._create_llm_for_verifier()
+                            return idx, {
+                                "browser_result": {
+                                    "finding_index": idx, "status": "skipped",
+                                    "confidence": "none", "evidence": "Aborted",
+                                    "attempts_made": 0, "reason": fatal_error or "Aborted",
+                                },
+                                "type": "browser_verification_skipped",
+                            }, llm
+                        llm = self._create_llm_for_verifier()
+                        verifier = BrowserVerifierAgent(
+                            llm, self.tools, self.session,
+                            sandbox_url=sandbox_url,
+                            browser_runner=runner,
+                            sandbox_orchestrator=orchestrator,
+                            finding_index=idx,
+                            max_attempts=settings.browser_max_exploit_attempts,
+                        )
+                        try:
+                            sub_context = {
+                                "finding": finding,
+                                "project_context": context.get("project_context", {}),
+                            }
+                            result = await verifier.run(
+                                "Verify this vulnerability by exploiting it in the browser.",
+                                context=sub_context,
+                            )
+                            error_streak.clear()
+                            return idx, result, llm
+                        except Exception as e:
+                            error_msg = str(e)
+                            logger.error(f"Browser verifier for finding {idx} failed: {e}")
+                            self.session.add_trace(
+                                agent=verifier_name, event_type="error",
+                                content=f"Verifier crashed: {e}",
+                            )
+                            error_streak.append(error_msg)
+                            if (
+                                len(error_streak) >= FAIL_FAST_THRESHOLD
+                                and len(set(error_streak[-FAIL_FAST_THRESHOLD:])) == 1
+                            ):
+                                fatal_error = error_msg
+                                abort_event.set()
+                                self.session.add_trace(
+                                    agent=self.name, event_type="swarm_aborted",
+                                    content=(
+                                        f"Aborting: {FAIL_FAST_THRESHOLD} consecutive "
+                                        f"verifiers failed with: {error_msg}"
+                                    ),
+                                )
+                            return idx, {
+                                "browser_result": {
+                                    "finding_index": idx,
+                                    "status": "not_exploitable",
+                                    "confidence": "low",
+                                    "evidence": f"Verifier crashed: {error_msg}",
+                                    "attempts_made": 0,
+                                    "reason": "Internal error",
+                                },
+                                "type": "browser_verification_failed",
+                            }, llm
+                tasks = [
+                    asyncio.create_task(run_verifier(idx, finding))
+                    for idx, finding in enumerate(findings)
+                ]
+                try:
+                    results = await asyncio.gather(*tasks)
+                except asyncio.CancelledError:
+                    for t in tasks:
+                        t.cancel()
+                    await asyncio.gather(*tasks, return_exceptions=True)
+                    raise
+            exploitable = []
+            not_exploitable = []
+            for idx, result, llm_client in results:
+                self.total_cost += llm_client.total_cost
+                self.total_tokens += llm_client.total_tokens
+                self.total_input_tokens += llm_client.total_input_tokens
+                self.total_output_tokens += llm_client.total_output_tokens
+                browser_result = result.get("browser_result") if result else None
+                if not browser_result:
+                    not_exploitable.append({"finding_index": idx, "status": "error", "confidence": "low"})
+                    continue
+                if browser_result.get("status") == "exploitable":
+                    exploitable.append(browser_result)
+                else:
+                    not_exploitable.append(browser_result)
+            self.session.add_trace(
+                agent=self.name, event_type="swarm_complete",
+                content={
+                    "total_exploitable": len(exploitable),
+                    "total_not_exploitable": len(not_exploitable),
+                    "total_cost": self.total_cost,
+                    "total_tokens": self.total_tokens,
+                    "evidence_dir": str(evidence_dir),
+                    "fatal_error": fatal_error,
+                },
+            )
+            result_dict = {
+                "raw_output": (
+                    f"Browser verification complete: {len(exploitable)} exploitable, "
+                    f"{len(not_exploitable)} not exploitable out of {len(findings)} findings"
+                ),
+                "exploitable": exploitable,
+                "not_exploitable": not_exploitable,
+                "evidence_dir": str(evidence_dir),
+                "type": "browser_verification_complete",
+            }
+            if fatal_error:
+                result_dict["fatal_error"] = fatal_error
+            return result_dict
+        finally:
+            self.session.add_trace(
+                agent=self.name, event_type="sandbox_teardown",
+                content="Stopping sandbox containers",
+            )
+            await orchestrator.stop()

openhack/agents/checkpoint.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+Intermediate state checkpointing for the scan pipeline.
+Saves pipeline state after each major step so that a failed scan
+can be resumed without re-running expensive earlier stages.
+"""
+import json
+import logging
+import shutil
+import time
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+CHECKPOINT_BASE_DIR = Path.home() / ".openhack" / "checkpoints"
+STEP_ORDER = ["recon", "hunter", "static_validation"]
+class CheckpointManager:
+    """Manages checkpoint files for a single scan session."""
+    def __init__(self, session_id: str, base_dir: Optional[Path] = None):
+        self.session_id = session_id
+        self.checkpoint_dir = (base_dir or CHECKPOINT_BASE_DIR) / session_id
+    def save(self, step_name: str, data: dict) -> None:
+        """Save a checkpoint after a pipeline step completes."""
+        self.checkpoint_dir.mkdir(parents=True, exist_ok=True)
+        checkpoint = {
+            "step": step_name,
+            "session_id": self.session_id,
+            "timestamp": time.time(),
+            "data": data,
+        }
+        path = self.checkpoint_dir / f"{step_name}.json"
+        path.write_text(json.dumps(checkpoint, indent=2, default=str))
+        print(f"    Checkpoint saved: {step_name} — resume with: openhack --resume {self.session_id}")
+        logger.info(f"Checkpoint saved: {step_name} -> {path}")
+    def load(self, step_name: str) -> Optional[dict]:
+        """Load a checkpoint for a given step. Returns None if not found."""
+        path = self.checkpoint_dir / f"{step_name}.json"
+        if not path.exists():
+            return None
+        try:
+            return json.loads(path.read_text())
+        except (json.JSONDecodeError, OSError) as e:
+            logger.warning(f"Failed to load checkpoint {path}: {e}")
+            return None
+    def get_latest_step(self) -> Optional[str]:
+        """Find the most advanced completed step by checking which checkpoint files exist."""
+        latest = None
+        for step in STEP_ORDER:
+            if (self.checkpoint_dir / f"{step}.json").exists():
+                latest = step
+        return latest
+    def cleanup(self) -> None:
+        """Remove all checkpoints for this session (called on successful completion)."""
+        if self.checkpoint_dir.exists():
+            shutil.rmtree(self.checkpoint_dir, ignore_errors=True)
+            logger.info(f"Checkpoints cleaned up for session {self.session_id}")
+    @classmethod
+    def list_resumable_sessions(cls, base_dir: Optional[Path] = None) -> list[dict]:
+        """List all sessions that have checkpoints available for resume."""
+        root = base_dir or CHECKPOINT_BASE_DIR
+        sessions = []
+        if not root.exists():
+            return sessions
+        for session_dir in sorted(root.iterdir()):
+            if session_dir.is_dir():
+                mgr = cls(session_dir.name, base_dir=root)
+                latest = mgr.get_latest_step()
+                if latest:
+                    # Read timestamp from the latest checkpoint
+                    checkpoint = mgr.load(latest)
+                    ts = checkpoint.get("timestamp") if checkpoint else None
+                    sessions.append({
+                        "session_id": session_dir.name,
+                        "latest_step": latest,
+                        "timestamp": ts,
+                        "checkpoint_dir": str(session_dir),
+                    })
+        return sessions