opencode-api-security-testing 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +1797 -0
- package/core/advanced_recon.py +788 -0
- package/core/agentic_analyzer.py +445 -0
- package/core/analyzers/api_parser.py +210 -0
- package/core/analyzers/response_analyzer.py +212 -0
- package/core/analyzers/sensitive_finder.py +184 -0
- package/core/api_fuzzer.py +422 -0
- package/core/api_interceptor.py +525 -0
- package/core/api_parser.py +955 -0
- package/core/browser_tester.py +479 -0
- package/core/cloud_storage_tester.py +1330 -0
- package/core/collectors/__init__.py +23 -0
- package/core/collectors/api_path_finder.py +300 -0
- package/core/collectors/browser_collect.py +645 -0
- package/core/collectors/browser_collector.py +411 -0
- package/core/collectors/http_client.py +111 -0
- package/core/collectors/js_collector.py +490 -0
- package/core/collectors/js_parser.py +780 -0
- package/core/collectors/url_collector.py +319 -0
- package/core/context_manager.py +682 -0
- package/core/deep_api_tester_v35.py +844 -0
- package/core/deep_api_tester_v55.py +366 -0
- package/core/dynamic_api_analyzer.py +532 -0
- package/core/http_client.py +179 -0
- package/core/models.py +296 -0
- package/core/orchestrator.py +890 -0
- package/core/prerequisite.py +227 -0
- package/core/reasoning_engine.py +1042 -0
- package/core/response_classifier.py +606 -0
- package/core/runner.py +938 -0
- package/core/scan_engine.py +599 -0
- package/core/skill_executor.py +435 -0
- package/core/skill_executor_v2.py +670 -0
- package/core/skill_executor_v3.py +704 -0
- package/core/smart_analyzer.py +687 -0
- package/core/strategy_pool.py +707 -0
- package/core/testers/auth_tester.py +264 -0
- package/core/testers/idor_tester.py +200 -0
- package/core/testers/sqli_tester.py +211 -0
- package/core/testing_loop.py +655 -0
- package/core/utils/base_path_dict.py +255 -0
- package/core/utils/payload_lib.py +167 -0
- package/core/utils/ssrf_detector.py +220 -0
- package/core/verifiers/vuln_verifier.py +536 -0
- package/package.json +17 -13
- package/references/asset-discovery.md +119 -612
- package/references/graphql-guidance.md +65 -641
- package/references/intake.md +84 -0
- package/references/report-template.md +131 -38
- package/references/rest-guidance.md +55 -526
- package/references/severity-model.md +52 -264
- package/references/test-matrix.md +65 -263
- package/references/validation.md +53 -400
- package/scripts/postinstall.js +46 -0
- package/agents/cyber-supervisor.md +0 -55
- package/agents/probing-miner.md +0 -42
- package/agents/resource-specialist.md +0 -31
- package/commands/api-security-testing-scan.md +0 -59
- package/commands/api-security-testing-test.md +0 -49
- package/commands/api-security-testing.md +0 -72
- package/tsconfig.json +0 -17
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
URL Collector - 域名/URL 采集器
|
|
4
|
+
从 HTML、JS、响应中发现域名、URL、Base URL
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Dict, List, Set, Tuple, Optional
|
|
9
|
+
from urllib.parse import urljoin, urlparse, parse_qs
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class URLCollectionResult:
|
|
16
|
+
"""URL 采集结果"""
|
|
17
|
+
domains: Set[str] = field(default_factory=set)
|
|
18
|
+
subdomains: Set[str] = field(default_factory=set)
|
|
19
|
+
base_urls: Set[str] = field(default_factory=set)
|
|
20
|
+
static_urls: Set[str] = field(default_factory=set)
|
|
21
|
+
api_urls: Set[str] = field(default_factory=set)
|
|
22
|
+
inline_urls: Set[str] = field(default_factory=set)
|
|
23
|
+
redirected_urls: Set[str] = field(default_factory=set)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class URLCollector:
|
|
27
|
+
"""
|
|
28
|
+
URL 采集器
|
|
29
|
+
|
|
30
|
+
功能:
|
|
31
|
+
- 域名/子域名采集
|
|
32
|
+
- Base URL 发现 (微服务名称提取)
|
|
33
|
+
- 静态地址采集
|
|
34
|
+
- 跨域 URL 采集
|
|
35
|
+
- API URL 采集
|
|
36
|
+
- 内联 URL 提取
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def __init__(self, session: requests.Session = None):
|
|
40
|
+
self.session = session or requests.Session()
|
|
41
|
+
self.session.headers.update({
|
|
42
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
43
|
+
})
|
|
44
|
+
self.result = URLCollectionResult()
|
|
45
|
+
|
|
46
|
+
def collect_from_html(self, html_content: str, base_url: str) -> URLCollectionResult:
|
|
47
|
+
"""从 HTML 中采集 URL"""
|
|
48
|
+
|
|
49
|
+
# 1. 提取所有链接
|
|
50
|
+
link_patterns = [
|
|
51
|
+
r'href=["\']([^"\']+)["\']',
|
|
52
|
+
r"href=['\"]([^'\"]+)['\"]",
|
|
53
|
+
r'src=["\']([^"\']+)["\']',
|
|
54
|
+
r"src=['\"]([^'\"]+)['\"]",
|
|
55
|
+
r'url\(["\']?([^"\'()]+)["\']?\)',
|
|
56
|
+
r'url\(["\']?([^"\'()]+)["\']?\)',
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
for pattern in link_patterns:
|
|
60
|
+
matches = re.findall(pattern, html_content, re.IGNORECASE)
|
|
61
|
+
for url in matches:
|
|
62
|
+
self._process_url(url, base_url)
|
|
63
|
+
|
|
64
|
+
# 2. 提取 meta 标签中的 URL
|
|
65
|
+
meta_patterns = [
|
|
66
|
+
r'<meta[^>]+content=["\']([^"\']+)["\']',
|
|
67
|
+
r'<link[^>]+href=["\']([^"\']+)["\']',
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
for pattern in meta_patterns:
|
|
71
|
+
matches = re.findall(pattern, html_content, re.IGNORECASE)
|
|
72
|
+
for content in matches:
|
|
73
|
+
urls = re.findall(r'https?://[^\s"\'<>]+', content)
|
|
74
|
+
for url in urls:
|
|
75
|
+
self._process_url(url, base_url)
|
|
76
|
+
|
|
77
|
+
# 3. 提取 JSON/JS 中的 URL
|
|
78
|
+
json_patterns = [
|
|
79
|
+
r'["\']((https?|wss?)://[^\s"\'<>]+)["\']',
|
|
80
|
+
r'["\'](/[a-zA-Z0-9_/-]+\.json)["\']',
|
|
81
|
+
r'api[Uu]rl\s*[:=]\s*["\']([^"\']+)["\']',
|
|
82
|
+
r'base[Uu]rl\s*[:=]\s*["\']([^"\']+)["\']',
|
|
83
|
+
r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
|
|
84
|
+
]
|
|
85
|
+
|
|
86
|
+
for pattern in json_patterns:
|
|
87
|
+
matches = re.findall(pattern, html_content, re.IGNORECASE)
|
|
88
|
+
for url in matches:
|
|
89
|
+
self._process_url(url, base_url)
|
|
90
|
+
|
|
91
|
+
return self.result
|
|
92
|
+
|
|
93
|
+
def collect_from_js(self, js_content: str, base_url: str) -> URLCollectionResult:
|
|
94
|
+
"""从 JS 中采集 URL"""
|
|
95
|
+
|
|
96
|
+
# 1. 提取字符串中的 URL
|
|
97
|
+
url_patterns = [
|
|
98
|
+
r'["\']((https?|wss?)://[^\s"\'<>]+)["\']',
|
|
99
|
+
r'["\'](/[a-zA-Z0-9_/.-]+)["\']',
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
for pattern in url_patterns:
|
|
103
|
+
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
104
|
+
for url in matches:
|
|
105
|
+
self._process_url(url, base_url)
|
|
106
|
+
|
|
107
|
+
# 2. 提取配置对象
|
|
108
|
+
config_patterns = [
|
|
109
|
+
r'(?:baseURL|apiURL|apiUrl|endpoint|BaseUrl)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
110
|
+
r'(?:BASE_URL|API_URL|API_ENDPOINT)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
111
|
+
r'process\.env\.([A-Z_]+)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
for pattern in config_patterns:
|
|
115
|
+
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
116
|
+
for match in matches:
|
|
117
|
+
if isinstance(match, tuple):
|
|
118
|
+
url = match[-1]
|
|
119
|
+
else:
|
|
120
|
+
url = match
|
|
121
|
+
self._process_url(url, base_url)
|
|
122
|
+
|
|
123
|
+
# 3. 提取 WebSocket URL
|
|
124
|
+
ws_patterns = [
|
|
125
|
+
r'new\s+WebSocket\s*\(\s*["\']([^"\']+)["\']',
|
|
126
|
+
r'wss?://[^\s"\'<>]+',
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
for pattern in ws_patterns:
|
|
130
|
+
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
131
|
+
for url in matches:
|
|
132
|
+
self._process_url(url, base_url)
|
|
133
|
+
|
|
134
|
+
return self.result
|
|
135
|
+
|
|
136
|
+
def collect_from_response(self, response_text: str, base_url: str, content_type: str = "") -> URLCollectionResult:
|
|
137
|
+
"""从 API 响应中采集 URL"""
|
|
138
|
+
|
|
139
|
+
if 'json' in content_type.lower() or response_text.strip().startswith('{'):
|
|
140
|
+
try:
|
|
141
|
+
import json
|
|
142
|
+
data = json.loads(response_text)
|
|
143
|
+
text = json.dumps(data)
|
|
144
|
+
except:
|
|
145
|
+
text = response_text
|
|
146
|
+
else:
|
|
147
|
+
text = response_text
|
|
148
|
+
|
|
149
|
+
return self.collect_from_html(text, base_url)
|
|
150
|
+
|
|
151
|
+
def _process_url(self, url: str, base_url: str):
|
|
152
|
+
"""处理单个 URL"""
|
|
153
|
+
if not url:
|
|
154
|
+
return
|
|
155
|
+
|
|
156
|
+
url = url.strip()
|
|
157
|
+
|
|
158
|
+
if url.startswith('//'):
|
|
159
|
+
url = 'https:' + url
|
|
160
|
+
|
|
161
|
+
if url.startswith('/'):
|
|
162
|
+
url = urljoin(base_url, url)
|
|
163
|
+
|
|
164
|
+
parsed = urlparse(url)
|
|
165
|
+
|
|
166
|
+
if not parsed.scheme or not parsed.netloc:
|
|
167
|
+
if '/' in url:
|
|
168
|
+
url = urljoin(base_url, url)
|
|
169
|
+
parsed = urlparse(url)
|
|
170
|
+
|
|
171
|
+
if not parsed.scheme or not parsed.netloc:
|
|
172
|
+
return
|
|
173
|
+
|
|
174
|
+
domain = parsed.netloc.lower()
|
|
175
|
+
|
|
176
|
+
if self._is_ip(domain):
|
|
177
|
+
self.result.domains.add(domain)
|
|
178
|
+
else:
|
|
179
|
+
self.result.subdomains.add(domain)
|
|
180
|
+
|
|
181
|
+
parts = domain.split('.')
|
|
182
|
+
if len(parts) >= 2:
|
|
183
|
+
base_domain = '.'.join(parts[-2:])
|
|
184
|
+
self.result.domains.add(base_domain)
|
|
185
|
+
|
|
186
|
+
path = parsed.path
|
|
187
|
+
if path:
|
|
188
|
+
if '/api/' in path or '/v' in path:
|
|
189
|
+
self.result.api_urls.add(url)
|
|
190
|
+
elif self._is_static_resource(path):
|
|
191
|
+
self.result.static_urls.add(url)
|
|
192
|
+
else:
|
|
193
|
+
self.result.inline_urls.add(url)
|
|
194
|
+
|
|
195
|
+
if '/api/' in path:
|
|
196
|
+
base = parsed.scheme + '://' + parsed.netloc
|
|
197
|
+
api_path = path.split('/api/')[0] + '/api' if '/api/' in path else path
|
|
198
|
+
self.result.base_urls.add(api_path)
|
|
199
|
+
|
|
200
|
+
def _is_ip(self, host: str) -> bool:
|
|
201
|
+
"""判断是否为 IP 地址"""
|
|
202
|
+
ip_pattern = r'^(\d{1,3}\.){3}\d{1,3}$'
|
|
203
|
+
return bool(re.match(ip_pattern, host))
|
|
204
|
+
|
|
205
|
+
def _is_static_resource(self, path: str) -> bool:
|
|
206
|
+
"""判断是否为静态资源"""
|
|
207
|
+
static_extensions = [
|
|
208
|
+
'.js', '.css', '.jpg', '.jpeg', '.png', '.gif', '.svg', '.ico',
|
|
209
|
+
'.woff', '.woff2', '.ttf', '.eot', '.otf', '.map',
|
|
210
|
+
'.html', '.htm', '.xml', '.json',
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
for ext in static_extensions:
|
|
214
|
+
if path.lower().endswith(ext):
|
|
215
|
+
return True
|
|
216
|
+
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
def discover_base_urls(self, urls: Set[str]) -> Set[str]:
|
|
220
|
+
"""发现 Base URL (微服务名称)"""
|
|
221
|
+
base_urls = set()
|
|
222
|
+
|
|
223
|
+
for url in urls:
|
|
224
|
+
parsed = urlparse(url)
|
|
225
|
+
path = parsed.path
|
|
226
|
+
|
|
227
|
+
if '/api/' in path:
|
|
228
|
+
parts = path.split('/')
|
|
229
|
+
if len(parts) >= 3:
|
|
230
|
+
idx = parts.index('api')
|
|
231
|
+
if idx >= 1:
|
|
232
|
+
base = '/' + '/'.join(parts[:idx+1])
|
|
233
|
+
base_urls.add(base)
|
|
234
|
+
|
|
235
|
+
elif '/v' in path:
|
|
236
|
+
match = re.search(r'(/v\d+)', path)
|
|
237
|
+
if match:
|
|
238
|
+
base_urls.add(match.group(1))
|
|
239
|
+
|
|
240
|
+
return base_urls
|
|
241
|
+
|
|
242
|
+
def get_all_collectors_results(self) -> Dict[str, Set[str]]:
|
|
243
|
+
"""获取所有采集结果"""
|
|
244
|
+
return {
|
|
245
|
+
'domains': self.result.domains,
|
|
246
|
+
'subdomains': self.result.subdomains,
|
|
247
|
+
'base_urls': self.result.base_urls,
|
|
248
|
+
'static_urls': self.result.static_urls,
|
|
249
|
+
'api_urls': self.result.api_urls,
|
|
250
|
+
'inline_urls': self.result.inline_urls,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class DomainURLCollector:
|
|
255
|
+
"""域名/URL 专项采集器"""
|
|
256
|
+
|
|
257
|
+
def __init__(self, session: requests.Session = None):
|
|
258
|
+
self.session = session or requests.Session()
|
|
259
|
+
self.session.headers.update({
|
|
260
|
+
'User-Agent': 'Mozilla/5.0 (compatible; SecurityBot/1.0)'
|
|
261
|
+
})
|
|
262
|
+
|
|
263
|
+
def collect_from_cname(self, domain: str) -> Set[str]:
|
|
264
|
+
"""通过 CNAME 记录发现子域名"""
|
|
265
|
+
subdomains = set()
|
|
266
|
+
|
|
267
|
+
try:
|
|
268
|
+
import dns.resolver
|
|
269
|
+
resolver = dns.resolver.Resolver()
|
|
270
|
+
resolver.timeout = 2
|
|
271
|
+
answers = resolver.resolve(domain, 'CNAME')
|
|
272
|
+
for rdata in answers:
|
|
273
|
+
cname = str(rdata.target).rstrip('.')
|
|
274
|
+
if domain in cname:
|
|
275
|
+
subdomains.add(cname)
|
|
276
|
+
except:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
return subdomains
|
|
280
|
+
|
|
281
|
+
def collect_from_certificate(self, domain: str) -> Set[str]:
|
|
282
|
+
"""通过 SSL 证书发现子域名"""
|
|
283
|
+
subdomains = set()
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
import socket
|
|
287
|
+
import ssl
|
|
288
|
+
ctx = ssl.create_default_context()
|
|
289
|
+
with socket.create_connection((domain, 443), timeout=3) as sock:
|
|
290
|
+
with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
|
|
291
|
+
cert = ssock.getpeercert()
|
|
292
|
+
if 'subjectAltName' in cert:
|
|
293
|
+
for san in cert['subjectAltName']:
|
|
294
|
+
if san[0] == 'DNS':
|
|
295
|
+
subdomains.add(san[1].lower())
|
|
296
|
+
except:
|
|
297
|
+
pass
|
|
298
|
+
|
|
299
|
+
return subdomains
|
|
300
|
+
|
|
301
|
+
def collect_from_wayback(self, domain: str) -> Set[str]:
|
|
302
|
+
"""通过 Wayback Machine 发现历史 URL"""
|
|
303
|
+
urls = set()
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
resp = self.session.get(
|
|
307
|
+
f"https://web.archive.org/cdx/search/cdx?url=*.{domain}/*&output=json&limit=100",
|
|
308
|
+
timeout=10
|
|
309
|
+
)
|
|
310
|
+
if resp.status_code == 200:
|
|
311
|
+
data = resp.json()
|
|
312
|
+
if len(data) > 1:
|
|
313
|
+
for row in data[1:]:
|
|
314
|
+
if len(row) >= 2:
|
|
315
|
+
urls.add(row[2])
|
|
316
|
+
except:
|
|
317
|
+
pass
|
|
318
|
+
|
|
319
|
+
return urls
|