opencode-api-security-testing 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/SKILL.md +1797 -0
  2. package/core/advanced_recon.py +788 -0
  3. package/core/agentic_analyzer.py +445 -0
  4. package/core/analyzers/api_parser.py +210 -0
  5. package/core/analyzers/response_analyzer.py +212 -0
  6. package/core/analyzers/sensitive_finder.py +184 -0
  7. package/core/api_fuzzer.py +422 -0
  8. package/core/api_interceptor.py +525 -0
  9. package/core/api_parser.py +955 -0
  10. package/core/browser_tester.py +479 -0
  11. package/core/cloud_storage_tester.py +1330 -0
  12. package/core/collectors/__init__.py +23 -0
  13. package/core/collectors/api_path_finder.py +300 -0
  14. package/core/collectors/browser_collect.py +645 -0
  15. package/core/collectors/browser_collector.py +411 -0
  16. package/core/collectors/http_client.py +111 -0
  17. package/core/collectors/js_collector.py +490 -0
  18. package/core/collectors/js_parser.py +780 -0
  19. package/core/collectors/url_collector.py +319 -0
  20. package/core/context_manager.py +682 -0
  21. package/core/deep_api_tester_v35.py +844 -0
  22. package/core/deep_api_tester_v55.py +366 -0
  23. package/core/dynamic_api_analyzer.py +532 -0
  24. package/core/http_client.py +179 -0
  25. package/core/models.py +296 -0
  26. package/core/orchestrator.py +890 -0
  27. package/core/prerequisite.py +227 -0
  28. package/core/reasoning_engine.py +1042 -0
  29. package/core/response_classifier.py +606 -0
  30. package/core/runner.py +938 -0
  31. package/core/scan_engine.py +599 -0
  32. package/core/skill_executor.py +435 -0
  33. package/core/skill_executor_v2.py +670 -0
  34. package/core/skill_executor_v3.py +704 -0
  35. package/core/smart_analyzer.py +687 -0
  36. package/core/strategy_pool.py +707 -0
  37. package/core/testers/auth_tester.py +264 -0
  38. package/core/testers/idor_tester.py +200 -0
  39. package/core/testers/sqli_tester.py +211 -0
  40. package/core/testing_loop.py +655 -0
  41. package/core/utils/base_path_dict.py +255 -0
  42. package/core/utils/payload_lib.py +167 -0
  43. package/core/utils/ssrf_detector.py +220 -0
  44. package/core/verifiers/vuln_verifier.py +536 -0
  45. package/package.json +17 -13
  46. package/references/asset-discovery.md +119 -612
  47. package/references/graphql-guidance.md +65 -641
  48. package/references/intake.md +84 -0
  49. package/references/report-template.md +131 -38
  50. package/references/rest-guidance.md +55 -526
  51. package/references/severity-model.md +52 -264
  52. package/references/test-matrix.md +65 -263
  53. package/references/validation.md +53 -400
  54. package/scripts/postinstall.js +46 -0
  55. package/agents/cyber-supervisor.md +0 -55
  56. package/agents/probing-miner.md +0 -42
  57. package/agents/resource-specialist.md +0 -31
  58. package/commands/api-security-testing-scan.md +0 -59
  59. package/commands/api-security-testing-test.md +0 -49
  60. package/commands/api-security-testing.md +0 -72
  61. package/tsconfig.json +0 -17
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ URL Collector - 域名/URL 采集器
4
+ 从 HTML、JS、响应中发现域名、URL、Base URL
5
+ """
6
+
7
+ import re
8
+ from typing import Dict, List, Set, Tuple, Optional
9
+ from urllib.parse import urljoin, urlparse, parse_qs
10
+ from dataclasses import dataclass, field
11
+ import requests
12
+
13
+
14
+ @dataclass
15
+ class URLCollectionResult:
16
+ """URL 采集结果"""
17
+ domains: Set[str] = field(default_factory=set)
18
+ subdomains: Set[str] = field(default_factory=set)
19
+ base_urls: Set[str] = field(default_factory=set)
20
+ static_urls: Set[str] = field(default_factory=set)
21
+ api_urls: Set[str] = field(default_factory=set)
22
+ inline_urls: Set[str] = field(default_factory=set)
23
+ redirected_urls: Set[str] = field(default_factory=set)
24
+
25
+
26
+ class URLCollector:
27
+ """
28
+ URL 采集器
29
+
30
+ 功能:
31
+ - 域名/子域名采集
32
+ - Base URL 发现 (微服务名称提取)
33
+ - 静态地址采集
34
+ - 跨域 URL 采集
35
+ - API URL 采集
36
+ - 内联 URL 提取
37
+ """
38
+
39
+ def __init__(self, session: requests.Session = None):
40
+ self.session = session or requests.Session()
41
+ self.session.headers.update({
42
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
43
+ })
44
+ self.result = URLCollectionResult()
45
+
46
+ def collect_from_html(self, html_content: str, base_url: str) -> URLCollectionResult:
47
+ """从 HTML 中采集 URL"""
48
+
49
+ # 1. 提取所有链接
50
+ link_patterns = [
51
+ r'href=["\']([^"\']+)["\']',
52
+ r"href=['\"]([^'\"]+)['\"]",
53
+ r'src=["\']([^"\']+)["\']',
54
+ r"src=['\"]([^'\"]+)['\"]",
55
+ r'url\(["\']?([^"\'()]+)["\']?\)',
56
+ r'url\(["\']?([^"\'()]+)["\']?\)',
57
+ ]
58
+
59
+ for pattern in link_patterns:
60
+ matches = re.findall(pattern, html_content, re.IGNORECASE)
61
+ for url in matches:
62
+ self._process_url(url, base_url)
63
+
64
+ # 2. 提取 meta 标签中的 URL
65
+ meta_patterns = [
66
+ r'<meta[^>]+content=["\']([^"\']+)["\']',
67
+ r'<link[^>]+href=["\']([^"\']+)["\']',
68
+ ]
69
+
70
+ for pattern in meta_patterns:
71
+ matches = re.findall(pattern, html_content, re.IGNORECASE)
72
+ for content in matches:
73
+ urls = re.findall(r'https?://[^\s"\'<>]+', content)
74
+ for url in urls:
75
+ self._process_url(url, base_url)
76
+
77
+ # 3. 提取 JSON/JS 中的 URL
78
+ json_patterns = [
79
+ r'["\']((https?|wss?)://[^\s"\'<>]+)["\']',
80
+ r'["\'](/[a-zA-Z0-9_/-]+\.json)["\']',
81
+ r'api[Uu]rl\s*[:=]\s*["\']([^"\']+)["\']',
82
+ r'base[Uu]rl\s*[:=]\s*["\']([^"\']+)["\']',
83
+ r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
84
+ ]
85
+
86
+ for pattern in json_patterns:
87
+ matches = re.findall(pattern, html_content, re.IGNORECASE)
88
+ for url in matches:
89
+ self._process_url(url, base_url)
90
+
91
+ return self.result
92
+
93
+ def collect_from_js(self, js_content: str, base_url: str) -> URLCollectionResult:
94
+ """从 JS 中采集 URL"""
95
+
96
+ # 1. 提取字符串中的 URL
97
+ url_patterns = [
98
+ r'["\']((https?|wss?)://[^\s"\'<>]+)["\']',
99
+ r'["\'](/[a-zA-Z0-9_/.-]+)["\']',
100
+ ]
101
+
102
+ for pattern in url_patterns:
103
+ matches = re.findall(pattern, js_content, re.IGNORECASE)
104
+ for url in matches:
105
+ self._process_url(url, base_url)
106
+
107
+ # 2. 提取配置对象
108
+ config_patterns = [
109
+ r'(?:baseURL|apiURL|apiUrl|endpoint|BaseUrl)\s*[:=]\s*["\']([^"\']+)["\']',
110
+ r'(?:BASE_URL|API_URL|API_ENDPOINT)\s*[:=]\s*["\']([^"\']+)["\']',
111
+ r'process\.env\.([A-Z_]+)\s*[:=]\s*["\']([^"\']+)["\']',
112
+ ]
113
+
114
+ for pattern in config_patterns:
115
+ matches = re.findall(pattern, js_content, re.IGNORECASE)
116
+ for match in matches:
117
+ if isinstance(match, tuple):
118
+ url = match[-1]
119
+ else:
120
+ url = match
121
+ self._process_url(url, base_url)
122
+
123
+ # 3. 提取 WebSocket URL
124
+ ws_patterns = [
125
+ r'new\s+WebSocket\s*\(\s*["\']([^"\']+)["\']',
126
+ r'wss?://[^\s"\'<>]+',
127
+ ]
128
+
129
+ for pattern in ws_patterns:
130
+ matches = re.findall(pattern, js_content, re.IGNORECASE)
131
+ for url in matches:
132
+ self._process_url(url, base_url)
133
+
134
+ return self.result
135
+
136
+ def collect_from_response(self, response_text: str, base_url: str, content_type: str = "") -> URLCollectionResult:
137
+ """从 API 响应中采集 URL"""
138
+
139
+ if 'json' in content_type.lower() or response_text.strip().startswith('{'):
140
+ try:
141
+ import json
142
+ data = json.loads(response_text)
143
+ text = json.dumps(data)
144
+ except:
145
+ text = response_text
146
+ else:
147
+ text = response_text
148
+
149
+ return self.collect_from_html(text, base_url)
150
+
151
+ def _process_url(self, url: str, base_url: str):
152
+ """处理单个 URL"""
153
+ if not url:
154
+ return
155
+
156
+ url = url.strip()
157
+
158
+ if url.startswith('//'):
159
+ url = 'https:' + url
160
+
161
+ if url.startswith('/'):
162
+ url = urljoin(base_url, url)
163
+
164
+ parsed = urlparse(url)
165
+
166
+ if not parsed.scheme or not parsed.netloc:
167
+ if '/' in url:
168
+ url = urljoin(base_url, url)
169
+ parsed = urlparse(url)
170
+
171
+ if not parsed.scheme or not parsed.netloc:
172
+ return
173
+
174
+ domain = parsed.netloc.lower()
175
+
176
+ if self._is_ip(domain):
177
+ self.result.domains.add(domain)
178
+ else:
179
+ self.result.subdomains.add(domain)
180
+
181
+ parts = domain.split('.')
182
+ if len(parts) >= 2:
183
+ base_domain = '.'.join(parts[-2:])
184
+ self.result.domains.add(base_domain)
185
+
186
+ path = parsed.path
187
+ if path:
188
+ if '/api/' in path or '/v' in path:
189
+ self.result.api_urls.add(url)
190
+ elif self._is_static_resource(path):
191
+ self.result.static_urls.add(url)
192
+ else:
193
+ self.result.inline_urls.add(url)
194
+
195
+ if '/api/' in path:
196
+ base = parsed.scheme + '://' + parsed.netloc
197
+ api_path = path.split('/api/')[0] + '/api' if '/api/' in path else path
198
+ self.result.base_urls.add(api_path)
199
+
200
+ def _is_ip(self, host: str) -> bool:
201
+ """判断是否为 IP 地址"""
202
+ ip_pattern = r'^(\d{1,3}\.){3}\d{1,3}$'
203
+ return bool(re.match(ip_pattern, host))
204
+
205
+ def _is_static_resource(self, path: str) -> bool:
206
+ """判断是否为静态资源"""
207
+ static_extensions = [
208
+ '.js', '.css', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.ico',
209
+ '.woff', '.woff2', '.ttf', '.eot', '.otf', '.map',
210
+ '.html', '.htm', '.xml', '.json',
211
+ ]
212
+
213
+ for ext in static_extensions:
214
+ if path.lower().endswith(ext):
215
+ return True
216
+
217
+ return False
218
+
219
+ def discover_base_urls(self, urls: Set[str]) -> Set[str]:
220
+ """发现 Base URL (微服务名称)"""
221
+ base_urls = set()
222
+
223
+ for url in urls:
224
+ parsed = urlparse(url)
225
+ path = parsed.path
226
+
227
+ if '/api/' in path:
228
+ parts = path.split('/')
229
+ if len(parts) >= 3:
230
+ idx = parts.index('api')
231
+ if idx >= 1:
232
+ base = '/' + '/'.join(parts[:idx+1])
233
+ base_urls.add(base)
234
+
235
+ elif '/v' in path:
236
+ match = re.search(r'(/v\d+)', path)
237
+ if match:
238
+ base_urls.add(match.group(1))
239
+
240
+ return base_urls
241
+
242
+ def get_all_collectors_results(self) -> Dict[str, Set[str]]:
243
+ """获取所有采集结果"""
244
+ return {
245
+ 'domains': self.result.domains,
246
+ 'subdomains': self.result.subdomains,
247
+ 'base_urls': self.result.base_urls,
248
+ 'static_urls': self.result.static_urls,
249
+ 'api_urls': self.result.api_urls,
250
+ 'inline_urls': self.result.inline_urls,
251
+ }
252
+
253
+
254
+ class DomainURLCollector:
255
+ """域名/URL 专项采集器"""
256
+
257
+ def __init__(self, session: requests.Session = None):
258
+ self.session = session or requests.Session()
259
+ self.session.headers.update({
260
+ 'User-Agent': 'Mozilla/5.0 (compatible; SecurityBot/1.0)'
261
+ })
262
+
263
+ def collect_from_cname(self, domain: str) -> Set[str]:
264
+ """通过 CNAME 记录发现子域名"""
265
+ subdomains = set()
266
+
267
+ try:
268
+ import dns.resolver
269
+ resolver = dns.resolver.Resolver()
270
+ resolver.timeout = 2
271
+ answers = resolver.resolve(domain, 'CNAME')
272
+ for rdata in answers:
273
+ cname = str(rdata.target).rstrip('.')
274
+ if domain in cname:
275
+ subdomains.add(cname)
276
+ except:
277
+ pass
278
+
279
+ return subdomains
280
+
281
+ def collect_from_certificate(self, domain: str) -> Set[str]:
282
+ """通过 SSL 证书发现子域名"""
283
+ subdomains = set()
284
+
285
+ try:
286
+ import socket
287
+ import ssl
288
+ ctx = ssl.create_default_context()
289
+ with socket.create_connection((domain, 443), timeout=3) as sock:
290
+ with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
291
+ cert = ssock.getpeercert()
292
+ if 'subjectAltName' in cert:
293
+ for san in cert['subjectAltName']:
294
+ if san[0] == 'DNS':
295
+ subdomains.add(san[1].lower())
296
+ except:
297
+ pass
298
+
299
+ return subdomains
300
+
301
+ def collect_from_wayback(self, domain: str) -> Set[str]:
302
+ """通过 Wayback Machine 发现历史 URL"""
303
+ urls = set()
304
+
305
+ try:
306
+ resp = self.session.get(
307
+ f"https://web.archive.org/cdx/search/cdx?url=*.{domain}/*&output=json&limit=100",
308
+ timeout=10
309
+ )
310
+ if resp.status_code == 200:
311
+ data = resp.json()
312
+ if len(data) > 1:
313
+ for row in data[1:]:
314
+ if len(row) >= 2:
315
+ urls.add(row[2])
316
+ except:
317
+ pass
318
+
319
+ return urls