PyPI - openhack - Versions diffs - 0.1.0__py3-none-any.whl - Mend

openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (113) hide show

openhack/__init__.py +2 -0
openhack/__main__.py +225 -0
openhack/agents/__init__.py +30 -0
openhack/agents/base.py +230 -0
openhack/agents/browser_verifier.py +679 -0
openhack/agents/browser_verifier_swarm.py +256 -0
openhack/agents/checkpoint.py +89 -0
openhack/agents/context_manager.py +356 -0
openhack/agents/coordinator.py +1105 -0
openhack/agents/endpoint_analyst.py +307 -0
openhack/agents/feature_hunter.py +93 -0
openhack/agents/hunter.py +481 -0
openhack/agents/hunter_swarm.py +385 -0
openhack/agents/llm.py +334 -0
openhack/agents/recon.py +19 -0
openhack/agents/sandbox_verifier.py +396 -0
openhack/agents/sandbox_verifier_swarm.py +250 -0
openhack/agents/session.py +286 -0
openhack/agents/validator.py +217 -0
openhack/agents/validator_swarm.py +106 -0
openhack/auth.py +175 -0
openhack/browser/__init__.py +12 -0
openhack/browser/runner.py +385 -0
openhack/categories.py +130 -0
openhack/config.py +201 -0
openhack/deterministic_recon.py +464 -0
openhack/entry_points.py +745 -0
openhack/framework_classifier.py +515 -0
openhack/framework_detection.py +269 -0
openhack/headless_scan.py +179 -0
openhack/prompts/__init__.py +108 -0
openhack/prompts/browser_verifier.py +171 -0
openhack/prompts/coordinator.py +31 -0
openhack/prompts/django/__init__.py +32 -0
openhack/prompts/django/auth_bypass.py +76 -0
openhack/prompts/django/csrf.py +62 -0
openhack/prompts/django/data_exposure.py +67 -0
openhack/prompts/django/idor.py +74 -0
openhack/prompts/django/injection.py +67 -0
openhack/prompts/django/misconfiguration.py +70 -0
openhack/prompts/django/ssrf.py +64 -0
openhack/prompts/endpoint_analyst.py +122 -0
openhack/prompts/express/__init__.py +29 -0
openhack/prompts/express/auth_bypass.py +71 -0
openhack/prompts/express/data_exposure.py +77 -0
openhack/prompts/express/idor.py +69 -0
openhack/prompts/express/injection.py +75 -0
openhack/prompts/express/misconfiguration.py +72 -0
openhack/prompts/express/ssrf.py +63 -0
openhack/prompts/feature_hunter.py +140 -0
openhack/prompts/flask/__init__.py +29 -0
openhack/prompts/flask/auth_bypass.py +86 -0
openhack/prompts/flask/data_exposure.py +78 -0
openhack/prompts/flask/idor.py +83 -0
openhack/prompts/flask/injection.py +77 -0
openhack/prompts/flask/misconfiguration.py +73 -0
openhack/prompts/flask/ssrf.py +65 -0
openhack/prompts/hunter.py +362 -0
openhack/prompts/hunter_continuation_loop.py +12 -0
openhack/prompts/hunter_continuation_no_findings.py +19 -0
openhack/prompts/hunter_continuation_no_progress.py +22 -0
openhack/prompts/hunter_tool_instructions.py +55 -0
openhack/prompts/nextjs/__init__.py +42 -0
openhack/prompts/nextjs/auth_bypass.py +80 -0
openhack/prompts/nextjs/csrf.py +71 -0
openhack/prompts/nextjs/data_exposure.py +88 -0
openhack/prompts/nextjs/idor.py +64 -0
openhack/prompts/nextjs/injection.py +65 -0
openhack/prompts/nextjs/middleware_bypass.py +75 -0
openhack/prompts/nextjs/misconfiguration.py +92 -0
openhack/prompts/nextjs/server_actions.py +97 -0
openhack/prompts/nextjs/ssrf.py +66 -0
openhack/prompts/nextjs/xss.py +69 -0
openhack/prompts/pr_analysis_system.py +80 -0
openhack/prompts/pr_analysis_user.py +11 -0
openhack/prompts/project_context.py +89 -0
openhack/prompts/recon.py +199 -0
openhack/prompts/reporter.py +88 -0
openhack/prompts/researchers.py +434 -0
openhack/prompts/sandbox_verifier.py +128 -0
openhack/prompts/supabase/__init__.py +39 -0
openhack/prompts/supabase/auth_tokens.py +131 -0
openhack/prompts/supabase/edge_functions.py +150 -0
openhack/prompts/supabase/graphql.py +102 -0
openhack/prompts/supabase/postgrest.py +99 -0
openhack/prompts/supabase/realtime.py +93 -0
openhack/prompts/supabase/rls.py +110 -0
openhack/prompts/supabase/rpc_functions.py +127 -0
openhack/prompts/supabase/storage.py +110 -0
openhack/prompts/supabase/tenant_isolation.py +118 -0
openhack/prompts/validator.py +319 -0
openhack/prompts/validator_continuation_incomplete.py +12 -0
openhack/prompts/validator_tool_instructions.py +29 -0
openhack/quality.py +231 -0
openhack/sandbox/__init__.py +12 -0
openhack/sandbox/orchestrator.py +517 -0
openhack/sandbox/runner.py +177 -0
openhack/scan_session.py +245 -0
openhack/setup.py +452 -0
openhack/static_validator.py +612 -0
openhack/tools/__init__.py +1 -0
openhack/tools/ast_tools.py +307 -0
openhack/tools/coverage.py +1078 -0
openhack/tools/filesystem.py +404 -0
openhack/tools/nextjs.py +258 -0
openhack/tools/registry.py +52 -0
openhack/tui.py +3450 -0
openhack/updates.py +170 -0
openhack-0.1.0.dist-info/METADATA +189 -0
openhack-0.1.0.dist-info/RECORD +113 -0
openhack-0.1.0.dist-info/WHEEL +4 -0
openhack-0.1.0.dist-info/entry_points.txt +2 -0
openhack-0.1.0.dist-info/licenses/LICENSE +661 -0

openhack/agents/endpoint_analyst.py ADDED Viewed

@@ -0,0 +1,307 @@
+"""
+Endpoint analyst agent — per-entry-point security analysis.
+Instead of category-based researchers that each scan the whole codebase for
+one type of vulnerability, this agent receives specific endpoints and checks
+them against a comprehensive security checklist.
+"""
+import asyncio
+import logging
+from collections import defaultdict
+from typing import Optional
+from .hunter import HunterAgent
+from .hunter_swarm import HunterSwarmAgent
+from .llm import LLMClient
+from .session import Session
+from openhack.prompts import format_project_context
+from openhack.prompts.endpoint_analyst import ENDPOINT_ANALYST_PROMPT
+from openhack.tools.registry import ToolRegistry
+from openhack.config import settings
+logger = logging.getLogger(__name__)
+class EndpointAnalystAgent(HunterAgent):
+    """Analyst that audits specific endpoints against a full security checklist."""
+    max_iterations = settings.feature_hunter_max_iterations
+    DEFAULT_CATEGORIES = [
+        "idor", "xss", "csrf", "ssrf", "injection",
+        "auth_bypass", "data_exposure", "middleware_bypass",
+        "server_actions", "misconfiguration", "path_traversal",
+        "command_injection", "rce", "open_redirect",
+        "xxe", "insecure_deserialization", "race_condition",
+        "cors_misconfiguration", "business_logic", "mass_assignment",
+    ]
+    def __init__(
+        self,
+        llm: LLMClient,
+        tools: ToolRegistry,
+        session: Session,
+        endpoints: list[dict],
+        group_name: str,
+        **kwargs,
+    ):
+        super().__init__(
+            llm, tools, session,
+            vuln_categories=self.DEFAULT_CATEGORIES,
+            group_name=group_name,
+            framework=None,
+            **kwargs,
+        )
+        self.endpoints = endpoints
+        self.name = f"analyst:{group_name}"
+        self.description = f"Endpoint analyst ({group_name})"
+    def get_system_prompt(self, context: dict) -> str:
+        recon_context = context.get("recon", {}).get("summary", "No recon data available")
+        project_context = context.get("project_context", {})
+        project_context_str = format_project_context(project_context)
+        endpoint_lines = []
+        for ep in self.endpoints:
+            method = ep.get("method", "ALL")
+            path = ep.get("path", ep.get("file", "unknown"))
+            file = ep.get("file", "unknown")
+            line = ep.get("line")
+            auth = ep.get("auth")
+            loc = f"`{file}`"
+            if line:
+                loc += f" (line {line})"
+            auth_str = f" [auth: {auth}]" if auth else ""
+            endpoint_lines.append(f"- **{method} {path}** → {loc}{auth_str}")
+        endpoint_assignments = "\n".join(endpoint_lines)
+        return ENDPOINT_ANALYST_PROMPT.format(
+            recon_context=recon_context,
+            project_context=project_context_str,
+            endpoint_assignments=endpoint_assignments,
+        )
+def group_entry_points(entry_points: list[dict], max_groups: int = 12) -> dict[str, list[dict]]:
+    """Group entry points by directory for analyst assignment.
+    Groups endpoints that share a parent directory (e.g., all /api/auth/* endpoints
+    go to the same analyst). Merges small groups to stay within max_groups.
+    """
+    by_dir: dict[str, list[dict]] = defaultdict(list)
+    for ep in entry_points:
+        file_path = ep.get("file", "")
+        parts = file_path.replace("\\", "/").split("/")
+        # Find a meaningful grouping key — use the first 3-4 path segments
+        # For "src/app/api/auth/login/route.ts" → "api/auth"
+        # For "src/app/api/orders/[id]/route.ts" → "api/orders"
+        api_idx = None
+        for i, part in enumerate(parts):
+            if part in ("api", "routes", "controllers", "views", "handlers"):
+                api_idx = i
+                break
+        if api_idx is not None and api_idx + 1 < len(parts):
+            # Group by the first path segment after "api/"
+            group_key = parts[api_idx + 1]
+            # Skip dynamic segments like [id]
+            if group_key.startswith("[") or group_key.startswith(":"):
+                group_key = parts[api_idx] if api_idx > 0 else "root"
+        elif len(parts) >= 2:
+            group_key = parts[-2] if parts[-1].startswith("route") else parts[-1].split(".")[0]
+        else:
+            group_key = "root"
+        by_dir[group_key].append(ep)
+    # If we have too many groups, merge the smallest ones
+    if len(by_dir) > max_groups:
+        groups_sorted = sorted(by_dir.items(), key=lambda x: len(x[1]))
+        merged: dict[str, list[dict]] = {}
+        overflow: list[dict] = []
+        for name, endpoints in groups_sorted:
+            if len(merged) < max_groups - 1:
+                merged[name] = endpoints
+            else:
+                overflow.extend(endpoints)
+        if overflow:
+            merged["misc"] = overflow
+        by_dir = merged
+    return dict(by_dir)
+def _find_cross_cutting_files(tools: ToolRegistry) -> list[dict]:
+    """Find middleware, auth helpers, and components that render user input."""
+    cross_cutting = []
+    fs = tools.fs_tools
+    patterns = [
+        ("middleware.ts", "Middleware"),
+        ("middleware.js", "Middleware"),
+        ("src/middleware.ts", "Middleware"),
+        ("src/middleware.js", "Middleware"),
+    ]
+    for path, label in patterns:
+        result = fs.read_file(path)
+        if "error" not in result:
+            cross_cutting.append({
+                "path": f"[{label}] {path}",
+                "method": "MIDDLEWARE",
+                "file": path,
+                "line": None,
+                "auth": None,
+            })
+    for pattern in ["**/lib/auth.*", "**/utils/auth.*", "**/helpers/auth.*"]:
+        result = fs.glob(pattern)
+        for match in result.get("matches", []):
+            if any(skip in match for skip in [".deepsec/", "node_modules/", ".next/"]):
+                continue
+            cross_cutting.append({
+                "path": f"[Auth Helper] {match}",
+                "method": "HELPER",
+                "file": match,
+                "line": None,
+                "auth": None,
+            })
+    for pattern in ["**/*.tsx", "**/*.jsx"]:
+        result = fs.glob(pattern)
+        for match in result.get("matches", []):
+            if any(skip in match for skip in ["node_modules/", ".next/", "test/"]):
+                continue
+            content = fs.read_file(match).get("content", "")
+            if "dangerouslySetInnerHTML" in content or "innerHTML" in content:
+                cross_cutting.append({
+                    "path": f"[Component] {match}",
+                    "method": "RENDER",
+                    "file": match,
+                    "line": None,
+                    "auth": None,
+                })
+    return cross_cutting
+async def run_endpoint_analysts(
+    entry_points: list[dict],
+    llm_template: LLMClient,
+    tools: ToolRegistry,
+    session: Session,
+    context: dict,
+    max_concurrent: int = 3,
+) -> dict:
+    """Spawn per-endpoint-group analysts and collect findings."""
+    groups = group_entry_points(entry_points)
+    cross_cutting = _find_cross_cutting_files(tools)
+    if cross_cutting:
+        groups["middleware_and_shared"] = cross_cutting
+        logger.info(f"Added cross-cutting group with {len(cross_cutting)} files")
+    if not groups:
+        return {
+            "findings": [],
+            "files_analyzed": [],
+            "total_cost": 0.0,
+            "total_tokens": 0,
+            "total_input_tokens": 0,
+            "total_output_tokens": 0,
+        }
+    logger.info(
+        f"Endpoint analyst groups ({len(groups)}): "
+        + ", ".join(f"{name}({len(eps)})" for name, eps in groups.items())
+    )
+    semaphore = asyncio.Semaphore(max_concurrent)
+    total_cost = 0.0
+    total_tokens = 0
+    total_input_tokens = 0
+    total_output_tokens = 0
+    async def run_analyst(group_name: str, endpoints: list[dict]):
+        async with semaphore:
+            model = (
+                settings.feature_hunter_model_id
+                or settings.hunter_model_id
+                or llm_template.model
+            )
+            llm = LLMClient(
+                model=model,
+                temperature=0.0,
+                max_tokens=8192,
+                provider=llm_template.provider,
+                prompt_cache_key=llm_template.prompt_cache_key,
+            )
+            analyst = EndpointAnalystAgent(
+                llm, tools, session,
+                endpoints=endpoints,
+                group_name=group_name,
+            )
+            # Build task description listing the endpoints
+            ep_summary = ", ".join(
+                f"{ep.get('method', 'ALL')} {ep.get('path', '?')}"
+                for ep in endpoints[:5]
+            )
+            if len(endpoints) > 5:
+                ep_summary += f" (+{len(endpoints) - 5} more)"
+            task_text = (
+                f"Analyze these {len(endpoints)} endpoint(s) for security vulnerabilities: "
+                f"{ep_summary}. "
+                f"Read each handler file, trace dependencies, and check against the full "
+                f"security checklist. Report every real vulnerability you find."
+            )
+            try:
+                result = await analyst.run(task_text, context=context)
+                return group_name, result, llm
+            except Exception as e:
+                logger.error(f"Endpoint analyst {group_name} failed: {e}")
+                return group_name, {"findings": [], "files_analyzed": []}, llm
+    tasks = [
+        asyncio.create_task(run_analyst(name, eps))
+        for name, eps in groups.items()
+    ]
+    try:
+        results = await asyncio.gather(*tasks)
+    except asyncio.CancelledError:
+        for t in tasks:
+            t.cancel()
+        await asyncio.gather(*tasks, return_exceptions=True)
+        raise
+    all_findings = []
+    all_files = set()
+    for group_name, result, llm_client in results:
+        findings = result.get("findings", [])
+        all_findings.extend(findings)
+        all_files.update(result.get("files_analyzed", []))
+        total_cost += llm_client.total_cost
+        total_tokens += llm_client.total_tokens
+        total_input_tokens += llm_client.total_input_tokens
+        total_output_tokens += llm_client.total_output_tokens
+        logger.info(f"Analyst {group_name}: {len(findings)} findings")
+    all_findings = HunterSwarmAgent._deduplicate_findings(all_findings)
+    return {
+        "findings": all_findings,
+        "files_analyzed": sorted(all_files),
+        "total_cost": total_cost,
+        "total_tokens": total_tokens,
+        "total_input_tokens": total_input_tokens,
+        "total_output_tokens": total_output_tokens,
+    }

openhack/agents/feature_hunter.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+Feature Deep Dive hunter agent.
+Works like a human security researcher: reads the codebase, decides what's
+interesting, goes deep on the riskiest features. No pre-assigned feature list —
+the agent reads the route map, picks its own targets, and audits them.
+"""
+import logging
+from typing import Optional
+from .hunter import HunterAgent
+from .llm import LLMClient
+from .session import Session
+from openhack.prompts import format_project_context
+from openhack.prompts.feature_hunter import FEATURE_HUNTER_PROMPT
+from openhack.tools.registry import ToolRegistry
+from openhack.config import settings
+logger = logging.getLogger(__name__)
+class FeatureHunterAgent(HunterAgent):
+    """Security researcher agent that picks its own targets and goes deep."""
+    max_iterations = settings.feature_hunter_max_iterations
+    # Check all categories — not limited to a subset
+    DEFAULT_CATEGORIES = [
+        "idor", "xss", "csrf", "ssrf", "injection",
+        "auth_bypass", "data_exposure", "middleware_bypass",
+        "server_actions", "misconfiguration", "path_traversal",
+        "command_injection", "rce", "open_redirect",
+    ]
+    def __init__(
+        self,
+        llm: LLMClient,
+        tools: ToolRegistry,
+        session: Session,
+        feature: Optional[dict] = None,
+        hunter_id: int = 0,
+        **kwargs,
+    ):
+        name = f"feature:{feature['name']}" if feature else f"researcher:{hunter_id}"
+        super().__init__(
+            llm, tools, session,
+            vuln_categories=self.DEFAULT_CATEGORIES,
+            group_name=name,
+            framework=None,
+            **kwargs,
+        )
+        self.feature = feature
+        self.hunter_id = hunter_id
+        if feature:
+            self.name = f"feature_hunter:{feature['name']}"
+            self.description = f"Deep dive on {feature['name']}"
+        else:
+            self.name = f"researcher:{hunter_id}"
+            self.description = f"Security researcher #{hunter_id}"
+    def get_system_prompt(self, context: dict) -> str:
+        recon_context = context.get("recon", {}).get("summary", "No recon data available")
+        project_context = context.get("project_context", {})
+        project_context_str = format_project_context(project_context)
+        if self.feature:
+            # Legacy mode: pre-assigned feature
+            entry_files = self.feature.get("entry_files", [])
+            if isinstance(entry_files, list):
+                files_str = "\n".join(f"- `{f}`" for f in entry_files)
+            else:
+                files_str = str(entry_files)
+            feature_section = (
+                f"\n## Your Assigned Target Feature\n\n"
+                f"**Feature**: {self.feature.get('name', 'unknown')}\n"
+                f"**Description**: {self.feature.get('description', '')}\n"
+                f"**Key Files**: \n{files_str}\n"
+                f"**Why High-Risk**: {self.feature.get('risk_reason', '')}\n"
+            )
+            return FEATURE_HUNTER_PROMPT.format(
+                recon_context=feature_section + "\n\n## Full Application Context\n\n" + recon_context,
+                project_context=project_context_str,
+            )
+        else:
+            # New mode: agent picks its own targets
+            return FEATURE_HUNTER_PROMPT.format(
+                recon_context=recon_context,
+                project_context=project_context_str,
+            )