openhack 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. openhack/__init__.py +2 -0
  2. openhack/__main__.py +225 -0
  3. openhack/agents/__init__.py +30 -0
  4. openhack/agents/base.py +230 -0
  5. openhack/agents/browser_verifier.py +679 -0
  6. openhack/agents/browser_verifier_swarm.py +256 -0
  7. openhack/agents/checkpoint.py +89 -0
  8. openhack/agents/context_manager.py +356 -0
  9. openhack/agents/coordinator.py +1105 -0
  10. openhack/agents/endpoint_analyst.py +307 -0
  11. openhack/agents/feature_hunter.py +93 -0
  12. openhack/agents/hunter.py +481 -0
  13. openhack/agents/hunter_swarm.py +385 -0
  14. openhack/agents/llm.py +334 -0
  15. openhack/agents/recon.py +19 -0
  16. openhack/agents/sandbox_verifier.py +396 -0
  17. openhack/agents/sandbox_verifier_swarm.py +250 -0
  18. openhack/agents/session.py +286 -0
  19. openhack/agents/validator.py +217 -0
  20. openhack/agents/validator_swarm.py +106 -0
  21. openhack/auth.py +175 -0
  22. openhack/browser/__init__.py +12 -0
  23. openhack/browser/runner.py +385 -0
  24. openhack/categories.py +130 -0
  25. openhack/config.py +201 -0
  26. openhack/deterministic_recon.py +464 -0
  27. openhack/entry_points.py +745 -0
  28. openhack/framework_classifier.py +515 -0
  29. openhack/framework_detection.py +269 -0
  30. openhack/headless_scan.py +179 -0
  31. openhack/prompts/__init__.py +108 -0
  32. openhack/prompts/browser_verifier.py +171 -0
  33. openhack/prompts/coordinator.py +31 -0
  34. openhack/prompts/django/__init__.py +32 -0
  35. openhack/prompts/django/auth_bypass.py +76 -0
  36. openhack/prompts/django/csrf.py +62 -0
  37. openhack/prompts/django/data_exposure.py +67 -0
  38. openhack/prompts/django/idor.py +74 -0
  39. openhack/prompts/django/injection.py +67 -0
  40. openhack/prompts/django/misconfiguration.py +70 -0
  41. openhack/prompts/django/ssrf.py +64 -0
  42. openhack/prompts/endpoint_analyst.py +122 -0
  43. openhack/prompts/express/__init__.py +29 -0
  44. openhack/prompts/express/auth_bypass.py +71 -0
  45. openhack/prompts/express/data_exposure.py +77 -0
  46. openhack/prompts/express/idor.py +69 -0
  47. openhack/prompts/express/injection.py +75 -0
  48. openhack/prompts/express/misconfiguration.py +72 -0
  49. openhack/prompts/express/ssrf.py +63 -0
  50. openhack/prompts/feature_hunter.py +140 -0
  51. openhack/prompts/flask/__init__.py +29 -0
  52. openhack/prompts/flask/auth_bypass.py +86 -0
  53. openhack/prompts/flask/data_exposure.py +78 -0
  54. openhack/prompts/flask/idor.py +83 -0
  55. openhack/prompts/flask/injection.py +77 -0
  56. openhack/prompts/flask/misconfiguration.py +73 -0
  57. openhack/prompts/flask/ssrf.py +65 -0
  58. openhack/prompts/hunter.py +362 -0
  59. openhack/prompts/hunter_continuation_loop.py +12 -0
  60. openhack/prompts/hunter_continuation_no_findings.py +19 -0
  61. openhack/prompts/hunter_continuation_no_progress.py +22 -0
  62. openhack/prompts/hunter_tool_instructions.py +55 -0
  63. openhack/prompts/nextjs/__init__.py +42 -0
  64. openhack/prompts/nextjs/auth_bypass.py +80 -0
  65. openhack/prompts/nextjs/csrf.py +71 -0
  66. openhack/prompts/nextjs/data_exposure.py +88 -0
  67. openhack/prompts/nextjs/idor.py +64 -0
  68. openhack/prompts/nextjs/injection.py +65 -0
  69. openhack/prompts/nextjs/middleware_bypass.py +75 -0
  70. openhack/prompts/nextjs/misconfiguration.py +92 -0
  71. openhack/prompts/nextjs/server_actions.py +97 -0
  72. openhack/prompts/nextjs/ssrf.py +66 -0
  73. openhack/prompts/nextjs/xss.py +69 -0
  74. openhack/prompts/pr_analysis_system.py +80 -0
  75. openhack/prompts/pr_analysis_user.py +11 -0
  76. openhack/prompts/project_context.py +89 -0
  77. openhack/prompts/recon.py +199 -0
  78. openhack/prompts/reporter.py +88 -0
  79. openhack/prompts/researchers.py +434 -0
  80. openhack/prompts/sandbox_verifier.py +128 -0
  81. openhack/prompts/supabase/__init__.py +39 -0
  82. openhack/prompts/supabase/auth_tokens.py +131 -0
  83. openhack/prompts/supabase/edge_functions.py +150 -0
  84. openhack/prompts/supabase/graphql.py +102 -0
  85. openhack/prompts/supabase/postgrest.py +99 -0
  86. openhack/prompts/supabase/realtime.py +93 -0
  87. openhack/prompts/supabase/rls.py +110 -0
  88. openhack/prompts/supabase/rpc_functions.py +127 -0
  89. openhack/prompts/supabase/storage.py +110 -0
  90. openhack/prompts/supabase/tenant_isolation.py +118 -0
  91. openhack/prompts/validator.py +319 -0
  92. openhack/prompts/validator_continuation_incomplete.py +12 -0
  93. openhack/prompts/validator_tool_instructions.py +29 -0
  94. openhack/quality.py +231 -0
  95. openhack/sandbox/__init__.py +12 -0
  96. openhack/sandbox/orchestrator.py +517 -0
  97. openhack/sandbox/runner.py +177 -0
  98. openhack/scan_session.py +245 -0
  99. openhack/setup.py +452 -0
  100. openhack/static_validator.py +612 -0
  101. openhack/tools/__init__.py +1 -0
  102. openhack/tools/ast_tools.py +307 -0
  103. openhack/tools/coverage.py +1078 -0
  104. openhack/tools/filesystem.py +404 -0
  105. openhack/tools/nextjs.py +258 -0
  106. openhack/tools/registry.py +52 -0
  107. openhack/tui.py +3450 -0
  108. openhack/updates.py +170 -0
  109. openhack-0.1.0.dist-info/METADATA +189 -0
  110. openhack-0.1.0.dist-info/RECORD +113 -0
  111. openhack-0.1.0.dist-info/WHEEL +4 -0
  112. openhack-0.1.0.dist-info/entry_points.txt +2 -0
  113. openhack-0.1.0.dist-info/licenses/LICENSE +661 -0
openhack/categories.py ADDED
@@ -0,0 +1,130 @@
1
+ """
2
+ Canonical vulnerability categories.
3
+
4
+ Every finding reported by the scanner MUST use one of these exact category
5
+ strings. This eliminates the cross-scan drift where the LLM invents
6
+ slightly different labels for the same vulnerability class.
7
+
8
+ The `normalize_category` function maps freeform LLM output to the closest
9
+ canonical category using keyword matching.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ CATEGORIES: list[str] = [
15
+ "SQL Injection",
16
+ "Command Injection",
17
+ "XSS",
18
+ "SSRF",
19
+ "Open Redirect",
20
+ "Path Traversal",
21
+ "IDOR",
22
+ "Authentication Bypass",
23
+ "Authorization Bypass",
24
+ "CSRF",
25
+ "Data Exposure",
26
+ "Information Disclosure",
27
+ "Hardcoded Secret",
28
+ "Security Misconfiguration",
29
+ "Missing RLS",
30
+ "RPC Function Abuse",
31
+ "Storage Misconfiguration",
32
+ "Mass Assignment",
33
+ "Business Logic Flaw",
34
+ "Denial of Service",
35
+ "RCE",
36
+ ]
37
+
38
+ _CANONICAL_LOWER: dict[str, str] = {c.lower(): c for c in CATEGORIES}
39
+
40
+ _KEYWORD_MAP: list[tuple[set[str], str]] = [
41
+ ({"sqli", "sql injection", "sql inject"}, "SQL Injection"),
42
+ ({"command injection", "command inject", "child_process", "exec injection", "rce", "remote code"}, "RCE"),
43
+ ({"xss", "cross-site scripting", "cross site scripting", "dangerouslysetinnerhtml", "innerhtml"}, "XSS"),
44
+ ({"ssrf", "server-side request", "server side request"}, "SSRF"),
45
+ ({"open redirect", "redirect", "returnto", "redirectto", "callbackurl"}, "Open Redirect"),
46
+ ({"path traversal", "directory traversal", "lfi", "local file inclusion"}, "Path Traversal"),
47
+ ({"idor", "insecure direct object", "broken object level"}, "IDOR"),
48
+ ({"authentication bypass", "auth bypass", "broken authentication"}, "Authentication Bypass"),
49
+ ({"authorization bypass", "broken access", "missing authorization", "privilege escalation", "access control"}, "Authorization Bypass"),
50
+ ({"csrf", "cross-site request forgery", "cross site request forgery"}, "CSRF"),
51
+ ({"missing rls", "row level security", "rls policy", "cross-tenant", "tenant isolation", "missing delete policy"}, "Missing RLS"),
52
+ ({"data exposure", "data leak", "pii exposure", "sensitive data", "token exposure",
53
+ "credential exposure", "plaintext", "write access"}, "Data Exposure"),
54
+ ({"information disclosure", "info disclosure", "verbose error", "error message", "stack trace"}, "Information Disclosure"),
55
+ ({"hardcoded secret", "hardcoded credential", "hardcoded key", "hardcoded password", "embedded secret"}, "Hardcoded Secret"),
56
+ ({"misconfiguration", "security header", "cors", "missing header", "insecure config",
57
+ "insecure documentation", "auth misconfiguration"}, "Security Misconfiguration"),
58
+ ({"rpc function", "rpc abuse", "security definer"}, "RPC Function Abuse"),
59
+ ({"storage misconfiguration", "storage bucket", "storage policy", "public bucket", "insecure storage"}, "Storage Misconfiguration"),
60
+ ({"mass assignment", "over-posting", "parameter pollution"}, "Mass Assignment"),
61
+ ({"business logic", "logic flaw", "logic error", "race condition",
62
+ "broken functionality"}, "Business Logic Flaw"),
63
+ ({"denial of service", "dos", "redos", "resource exhaustion",
64
+ "rate limit", "missing rate"}, "Denial of Service"),
65
+ ]
66
+
67
+ CATEGORY_SEVERITY: dict[str, str] = {
68
+ "SQL Injection": "critical",
69
+ "Command Injection": "critical",
70
+ "RCE": "critical",
71
+ "Authentication Bypass": "critical",
72
+ "Missing RLS": "critical",
73
+ "SSRF": "high",
74
+ "Path Traversal": "high",
75
+ "IDOR": "high",
76
+ "Authorization Bypass": "high",
77
+ "Hardcoded Secret": "high",
78
+ "Data Exposure": "high",
79
+ "RPC Function Abuse": "high",
80
+ "Storage Misconfiguration": "high",
81
+ "Open Redirect": "medium",
82
+ "XSS": "medium",
83
+ "CSRF": "medium",
84
+ "Mass Assignment": "medium",
85
+ "Business Logic Flaw": "medium",
86
+ "Denial of Service": "medium",
87
+ "Information Disclosure": "low",
88
+ "Security Misconfiguration":"medium",
89
+ }
90
+
91
+
92
+ def normalize_severity(
93
+ findings: list[dict],
94
+ *,
95
+ use_category_default: bool = True,
96
+ ) -> list[dict]:
97
+ SEVERITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3, "info": 4}
98
+ SEVERITY_NAMES = {v: k for k, v in SEVERITY_ORDER.items()}
99
+
100
+ out = [dict(f) for f in findings]
101
+
102
+ for i, f in enumerate(out):
103
+ canonical = normalize_category(f.get("category", ""))
104
+ current = SEVERITY_ORDER.get((f.get("severity") or "info").lower(), 4)
105
+
106
+ if use_category_default:
107
+ default_sev = SEVERITY_ORDER.get(
108
+ CATEGORY_SEVERITY.get(canonical, "medium").lower(), 2
109
+ )
110
+ if current > default_sev:
111
+ out[i]["severity"] = SEVERITY_NAMES[default_sev]
112
+
113
+ return out
114
+
115
+
116
+ def normalize_category(raw: str) -> str:
117
+ if not raw:
118
+ return "Security Misconfiguration"
119
+
120
+ lower = raw.strip().lower()
121
+
122
+ if lower in _CANONICAL_LOWER:
123
+ return _CANONICAL_LOWER[lower]
124
+
125
+ for keywords, canonical in _KEYWORD_MAP:
126
+ for kw in keywords:
127
+ if kw in lower:
128
+ return canonical
129
+
130
+ return raw.strip().title()
openhack/config.py ADDED
@@ -0,0 +1,201 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ from pydantic_settings import BaseSettings, SettingsConfigDict
7
+
8
+ CONFIG_DIR = Path.home() / ".openhack"
9
+ CONFIG_PATH = CONFIG_DIR / "config"
10
+
11
+ _PROVIDER_KEY_FIELDS = {
12
+ "openhack": "openhack_api_key",
13
+ }
14
+
15
+
16
+ def _dotenv_nonempty_keys(path: Path) -> set[str]:
17
+ """Return uppercase keys with non-empty values from a dotenv file."""
18
+ keys: set[str] = set()
19
+ if not path.exists():
20
+ return keys
21
+ try:
22
+ for raw_line in path.read_text().splitlines():
23
+ line = raw_line.strip()
24
+ if not line or line.startswith("#"):
25
+ continue
26
+ if line.startswith("export "):
27
+ line = line[len("export "):].strip()
28
+ if "=" not in line:
29
+ continue
30
+ key, val = line.split("=", 1)
31
+ key = key.strip()
32
+ val = val.strip().strip('"').strip("'")
33
+ if key and val != "":
34
+ keys.add(key.upper())
35
+ except OSError:
36
+ return set()
37
+ return keys
38
+
39
+
40
+ def load_user_config() -> dict:
41
+ """Load persistent config from ~/.openhack/config."""
42
+ if CONFIG_PATH.exists():
43
+ try:
44
+ return json.loads(CONFIG_PATH.read_text())
45
+ except (json.JSONDecodeError, OSError):
46
+ return {}
47
+ return {}
48
+
49
+
50
+ def save_user_config(data: dict) -> None:
51
+ """Save persistent config to ~/.openhack/config."""
52
+ CONFIG_DIR.mkdir(parents=True, exist_ok=True)
53
+ try:
54
+ os.chmod(CONFIG_DIR, 0o700)
55
+ except OSError:
56
+ pass
57
+ existing = load_user_config()
58
+ existing.update(data)
59
+ CONFIG_PATH.write_text(json.dumps(existing, indent=2) + "\n")
60
+ # Config now holds long-lived bearer tokens; restrict to owner-only read/write.
61
+ try:
62
+ os.chmod(CONFIG_PATH, 0o600)
63
+ except OSError:
64
+ pass
65
+
66
+
67
+ def resolve_provider(name: str) -> str:
68
+ """Normalize provider name."""
69
+ return name
70
+
71
+
72
+ PROD_APP_URL = "https://app.openhack.com"
73
+ PROD_BASE_URL = "https://api.openhack.com/v1"
74
+ DEV_APP_URL = "http://localhost:9080"
75
+ DEV_BASE_URL = "http://localhost:8787/v1"
76
+
77
+
78
+ class Settings(BaseSettings):
79
+ """Minimal settings for the standalone scanner."""
80
+
81
+ # Set OPENHACK_DEV=1 to point both URLs at local dev (Next.js app on :9080,
82
+ # wrangler dev inference on :8787) instead of production.
83
+ openhack_dev: bool = False
84
+
85
+ llm_provider: str = "openhack"
86
+
87
+ openhack_api_key: Optional[str] = None
88
+ openhack_base_url: str = ""
89
+ openhack_app_url: str = ""
90
+ openhack_model_id: str = "kimi-k2.5"
91
+
92
+ openhack_org_id: Optional[str] = None
93
+ openhack_org_slug: Optional[str] = None
94
+ openhack_org_name: Optional[str] = None
95
+ openhack_user_email: Optional[str] = None
96
+ openhack_user_first_name: Optional[str] = None
97
+ openhack_user_last_name: Optional[str] = None
98
+ openhack_read_timeout: int = 600
99
+ openhack_connect_timeout: int = 30
100
+ openhack_max_retries: int = 5
101
+
102
+ recon_model_id: Optional[str] = None
103
+ hunter_model_id: Optional[str] = None
104
+ validator_model_id: Optional[str] = None
105
+ browser_verifier_model_id: Optional[str] = None
106
+
107
+ max_concurrent_hunters: int = 3
108
+ max_concurrent_validators: int = 5
109
+
110
+ compaction_threshold: float = 0.70
111
+ tool_result_max_lines: int = 200
112
+ checkpoint_enabled: bool = True
113
+
114
+ # Scan scoping — exclude paths that are never production web attack surface
115
+ scan_exclude_patterns: list[str] = [
116
+ "**/test/**", "**/tests/**", "**/__tests__/**", "**/spec/**",
117
+ "**/__mocks__/**", "**/fixtures/**", "**/__fixtures__/**",
118
+ "**/e2e/**", "**/cypress/**", "**/playwright/**",
119
+ "**/cli/**", "**/CLI/**",
120
+ "**/docs/**", "**/documentation/**",
121
+ "**/examples/**", "**/example/**", "**/samples/**", "**/demo/**", "**/demos/**",
122
+ "**/tutorial/**", "**/tutorials/**", "**/playground/**", "**/sandbox/**",
123
+ "**/mock/**", "**/mocks/**", "**/stub/**", "**/stubs/**",
124
+ "**/scripts/**", "**/tools/**", "**/devtools/**",
125
+ "**/benchmarks/**", "**/benchmark/**",
126
+ "**/integration-tests/**",
127
+ "**/*.test.*", "**/*.spec.*", "**/test_*",
128
+ "**/conftest.py", "**/jest.config.*", "**/vitest.config.*",
129
+ "**/.storybook/**", "**/stories/**",
130
+ ]
131
+
132
+ # Feature deep dive
133
+ feature_hunt_enabled: bool = True
134
+ max_feature_hunters: int = 7
135
+ feature_hunter_max_iterations: int = 75
136
+ max_concurrent_feature_hunters: int = 2
137
+ feature_hunter_model_id: Optional[str] = None
138
+
139
+ # Sandbox verification
140
+ sandbox_enabled: bool = False
141
+ sandbox_max_exploit_attempts: int = 7
142
+ sandbox_health_check_timeout: int = 120
143
+ sandbox_health_check_path: str = "/"
144
+ sandbox_teardown_on_complete: bool = True
145
+
146
+ # Browser verification
147
+ # Browser verification
148
+ browser_verification_enabled: bool = False
149
+ browser_headless: bool = True
150
+ browser_max_exploit_attempts: int = 7
151
+ browser_timeout_ms: int = 30000
152
+
153
+ model_config = SettingsConfigDict(
154
+ env_file=".env",
155
+ env_file_encoding="utf-8",
156
+ case_sensitive=False,
157
+ )
158
+
159
+ def model_post_init(self, __context) -> None:
160
+ if not self.openhack_app_url:
161
+ self.openhack_app_url = DEV_APP_URL if self.openhack_dev else PROD_APP_URL
162
+ if not self.openhack_base_url:
163
+ self.openhack_base_url = DEV_BASE_URL if self.openhack_dev else PROD_BASE_URL
164
+
165
+
166
+ def _build_settings() -> Settings:
167
+ """Build Settings, overlaying ~/.openhack/config values as env-like overrides."""
168
+ user_cfg = load_user_config()
169
+ env_overrides = {}
170
+ for key, val in user_cfg.items():
171
+ if val is not None and val != "":
172
+ env_overrides[key.upper()] = str(val)
173
+
174
+ dotenv_keys = _dotenv_nonempty_keys(Path(".env"))
175
+ old_env = {}
176
+ for k, v in env_overrides.items():
177
+ # Respect explicit non-empty environment variables, but allow persisted
178
+ # config to fill missing or blank values. Also let .env values win.
179
+ current = os.environ.get(k)
180
+ if (current is None or current == "") and k not in dotenv_keys:
181
+ old_env[k] = current
182
+ os.environ[k] = v
183
+
184
+ try:
185
+ s = Settings()
186
+ finally:
187
+ for k, prev in old_env.items():
188
+ if prev is None:
189
+ os.environ.pop(k, None)
190
+ else:
191
+ os.environ[k] = prev
192
+ return s
193
+
194
+
195
+ settings = _build_settings()
196
+
197
+
198
+ def reload_settings() -> None:
199
+ """Reload settings from ~/.openhack/config and environment."""
200
+ global settings
201
+ settings = _build_settings()