opencode-api-security-testing 3.0.10 → 3.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/SKILL.md +1797 -0
- package/core/advanced_recon.py +788 -0
- package/core/agentic_analyzer.py +445 -0
- package/core/analyzers/api_parser.py +210 -0
- package/core/analyzers/response_analyzer.py +212 -0
- package/core/analyzers/sensitive_finder.py +184 -0
- package/core/api_fuzzer.py +422 -0
- package/core/api_interceptor.py +525 -0
- package/core/api_parser.py +955 -0
- package/core/browser_tester.py +479 -0
- package/core/cloud_storage_tester.py +1330 -0
- package/core/collectors/__init__.py +23 -0
- package/core/collectors/api_path_finder.py +300 -0
- package/core/collectors/browser_collect.py +645 -0
- package/core/collectors/browser_collector.py +411 -0
- package/core/collectors/http_client.py +111 -0
- package/core/collectors/js_collector.py +490 -0
- package/core/collectors/js_parser.py +780 -0
- package/core/collectors/url_collector.py +319 -0
- package/core/context_manager.py +682 -0
- package/core/deep_api_tester_v35.py +844 -0
- package/core/deep_api_tester_v55.py +366 -0
- package/core/dynamic_api_analyzer.py +532 -0
- package/core/http_client.py +179 -0
- package/core/models.py +296 -0
- package/core/orchestrator.py +890 -0
- package/core/prerequisite.py +227 -0
- package/core/reasoning_engine.py +1042 -0
- package/core/response_classifier.py +606 -0
- package/core/runner.py +938 -0
- package/core/scan_engine.py +599 -0
- package/core/skill_executor.py +435 -0
- package/core/skill_executor_v2.py +670 -0
- package/core/skill_executor_v3.py +704 -0
- package/core/smart_analyzer.py +687 -0
- package/core/strategy_pool.py +707 -0
- package/core/testers/auth_tester.py +264 -0
- package/core/testers/idor_tester.py +200 -0
- package/core/testers/sqli_tester.py +211 -0
- package/core/testing_loop.py +655 -0
- package/core/utils/base_path_dict.py +255 -0
- package/core/utils/payload_lib.py +167 -0
- package/core/utils/ssrf_detector.py +220 -0
- package/core/verifiers/vuln_verifier.py +536 -0
- package/package.json +1 -1
- package/references/README.md +72 -0
- package/references/asset-discovery.md +119 -0
- package/references/fuzzing-patterns.md +129 -0
- package/references/graphql-guidance.md +108 -0
- package/references/intake.md +84 -0
- package/references/pua-agent.md +192 -0
- package/references/report-template.md +156 -0
- package/references/rest-guidance.md +76 -0
- package/references/severity-model.md +76 -0
- package/references/test-matrix.md +86 -0
- package/references/validation.md +78 -0
- package/references/vulnerabilities/01-sqli-tests.md +1128 -0
- package/references/vulnerabilities/02-user-enum-tests.md +423 -0
- package/references/vulnerabilities/03-jwt-tests.md +499 -0
- package/references/vulnerabilities/04-idor-tests.md +362 -0
- package/references/vulnerabilities/05-sensitive-data-tests.md +466 -0
- package/references/vulnerabilities/06-biz-logic-tests.md +501 -0
- package/references/vulnerabilities/07-security-config-tests.md +511 -0
- package/references/vulnerabilities/08-brute-force-tests.md +457 -0
- package/references/vulnerabilities/09-vulnerability-chains.md +465 -0
- package/references/vulnerabilities/10-auth-tests.md +537 -0
- package/references/vulnerabilities/11-graphql-tests.md +355 -0
- package/references/vulnerabilities/12-ssrf-tests.md +396 -0
- package/references/vulnerabilities/README.md +148 -0
- package/references/workflows.md +192 -0
|
@@ -0,0 +1,645 @@
|
|
|
1
|
+
"""
|
|
2
|
+
无头浏览器采集 - 使用Playwright进行动态采集
|
|
3
|
+
输入: {url, wait_until, interact, intercept_api}
|
|
4
|
+
输出: {apis, storage, forms, page_title, js_files, tech_stack, sensitive_urls, ip_addresses}
|
|
5
|
+
|
|
6
|
+
【重要】SPA采集完整流程:
|
|
7
|
+
1. browser_collect 采集JS文件、API请求、外部URL、IP
|
|
8
|
+
2. js_parser 分析JS提取API端点和baseURL配置
|
|
9
|
+
3. sensitive_finder 提取敏感信息
|
|
10
|
+
4. http_client 测试发现的API
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import re
|
|
15
|
+
import json
|
|
16
|
+
import requests
|
|
17
|
+
from urllib.parse import urlparse, parse_qs
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
from playwright.sync_api import sync_playwright
|
|
21
|
+
PLAYWRIGHT_AVAILABLE = True
|
|
22
|
+
except ImportError:
|
|
23
|
+
PLAYWRIGHT_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
requests.packages.urllib3.disable_warnings()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def browser_collect(config):
|
|
29
|
+
"""
|
|
30
|
+
使用无头浏览器采集API和信息(同步版本)
|
|
31
|
+
|
|
32
|
+
输入:
|
|
33
|
+
url: string - 目标URL
|
|
34
|
+
wait_until?: "networkidle" | "domcontentloaded"
|
|
35
|
+
interact?: boolean - 是否模拟交互
|
|
36
|
+
intercept_api?: boolean - 是否拦截API请求
|
|
37
|
+
extract_js_files?: boolean - 是否提取JS文件列表
|
|
38
|
+
extract_external_urls?: boolean - 是否提取外部URL/域名
|
|
39
|
+
extract_ip_addresses?: boolean - 是否提取IP地址
|
|
40
|
+
|
|
41
|
+
输出:
|
|
42
|
+
apis: Array<{method, url, post_data}>
|
|
43
|
+
storage: {localStorage, cookies}
|
|
44
|
+
forms: Array<{action, method, inputs}>
|
|
45
|
+
page_title: string
|
|
46
|
+
js_files: Array<string> - JS文件路径列表
|
|
47
|
+
tech_stack: Array<string> - 检测到的技术栈
|
|
48
|
+
sensitive_urls: Array<string> - 发现的敏感URL(API、后台等)
|
|
49
|
+
ip_addresses: Array<string> - 发现的IP地址
|
|
50
|
+
domains: Array<string> - 发现的相关域名
|
|
51
|
+
"""
|
|
52
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
53
|
+
return {
|
|
54
|
+
'error': 'playwright_not_available',
|
|
55
|
+
'apis': [],
|
|
56
|
+
'storage': {},
|
|
57
|
+
'forms': [],
|
|
58
|
+
'js_files': [],
|
|
59
|
+
'tech_stack': [],
|
|
60
|
+
'sensitive_urls': [],
|
|
61
|
+
'ip_addresses': [],
|
|
62
|
+
'domains': []
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
url = config.get('url')
|
|
66
|
+
wait_until = config.get('wait_until', 'networkidle')
|
|
67
|
+
interact = config.get('interact', False)
|
|
68
|
+
intercept_api = config.get('intercept_api', True)
|
|
69
|
+
extract_js_files = config.get('extract_js_files', True)
|
|
70
|
+
extract_external_urls = config.get('extract_external_urls', True)
|
|
71
|
+
extract_ip_addresses = config.get('extract_ip_addresses', True)
|
|
72
|
+
|
|
73
|
+
result = {
|
|
74
|
+
'apis': [],
|
|
75
|
+
'storage': {},
|
|
76
|
+
'forms': [],
|
|
77
|
+
'page_title': '',
|
|
78
|
+
'js_files': [],
|
|
79
|
+
'tech_stack': [],
|
|
80
|
+
'sensitive_urls': [],
|
|
81
|
+
'ip_addresses': [],
|
|
82
|
+
'domains': []
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
target_domain = urlparse(url).netloc
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
with sync_playwright() as p:
|
|
89
|
+
browser = p.chromium.launch(
|
|
90
|
+
headless=True,
|
|
91
|
+
args=['--no-sandbox', '--disable-dev-shm-usage']
|
|
92
|
+
)
|
|
93
|
+
context = browser.new_context(
|
|
94
|
+
viewport={'width': 1920, 'height': 1080},
|
|
95
|
+
ignore_https_errors=True
|
|
96
|
+
)
|
|
97
|
+
page = context.new_page()
|
|
98
|
+
|
|
99
|
+
# API拦截
|
|
100
|
+
captured_apis = []
|
|
101
|
+
captured_urls = [] # 所有请求的完整URL
|
|
102
|
+
all_responses = [] # 所有响应
|
|
103
|
+
|
|
104
|
+
if intercept_api:
|
|
105
|
+
def on_request(request):
|
|
106
|
+
if request.resource_type in ['xhr', 'fetch', 'document', 'script']:
|
|
107
|
+
captured_apis.append({
|
|
108
|
+
'method': request.method,
|
|
109
|
+
'url': request.url,
|
|
110
|
+
'post_data': request.post_data,
|
|
111
|
+
'headers': dict(request.headers)
|
|
112
|
+
})
|
|
113
|
+
captured_urls.append(request.url)
|
|
114
|
+
|
|
115
|
+
def on_response(response):
|
|
116
|
+
all_responses.append({
|
|
117
|
+
'url': response.url,
|
|
118
|
+
'status': response.status,
|
|
119
|
+
'headers': dict(response.headers),
|
|
120
|
+
'content_type': response.headers.get('content-type', '')
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
page.on('request', on_request)
|
|
124
|
+
page.on('response', on_response)
|
|
125
|
+
|
|
126
|
+
# 访问页面
|
|
127
|
+
try:
|
|
128
|
+
response = page.goto(url, timeout=60000, wait_until=wait_until)
|
|
129
|
+
result['status_code'] = response.status if response else None
|
|
130
|
+
result['response_headers'] = dict(response.headers) if response else {}
|
|
131
|
+
except Exception as e:
|
|
132
|
+
result['error'] = str(e)
|
|
133
|
+
|
|
134
|
+
# 等待JS执行(关键!必须等待)
|
|
135
|
+
page.wait_for_timeout(5000)
|
|
136
|
+
|
|
137
|
+
# 提取JS文件列表(关键!)
|
|
138
|
+
if extract_js_files:
|
|
139
|
+
try:
|
|
140
|
+
html_content = page.content()
|
|
141
|
+
js_files = re.findall(r'<script[^>]+src=["\']([^"\']+\.js[^"\']*)["\']', html_content)
|
|
142
|
+
result['js_files'] = js_files
|
|
143
|
+
|
|
144
|
+
# 检测技术栈
|
|
145
|
+
tech = []
|
|
146
|
+
if 'vue' in html_content.lower(): tech.append('Vue')
|
|
147
|
+
if 'react' in html_content.lower(): tech.append('React')
|
|
148
|
+
if 'angular' in html_content.lower(): tech.append('Angular')
|
|
149
|
+
if 'webpack' in html_content.lower(): tech.append('Webpack')
|
|
150
|
+
if 'element-ui' in html_content.lower(): tech.append('ElementUI')
|
|
151
|
+
if 'ant-design' in html_content.lower(): tech.append('AntDesign')
|
|
152
|
+
result['tech_stack'] = tech
|
|
153
|
+
|
|
154
|
+
# 【新增】从HTML中提取敏感URL
|
|
155
|
+
html_urls = extract_urls_from_html(html_content, target_domain)
|
|
156
|
+
result['sensitive_urls'].extend(html_urls)
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
result['js_extract_error'] = str(e)
|
|
160
|
+
|
|
161
|
+
# 【新增】提取外部URL和IP
|
|
162
|
+
if extract_external_urls or extract_ip_addresses:
|
|
163
|
+
all_urls = set()
|
|
164
|
+
all_ips = set()
|
|
165
|
+
all_domains = set()
|
|
166
|
+
|
|
167
|
+
# 从请求中提取
|
|
168
|
+
for req_url in captured_urls:
|
|
169
|
+
parsed = urlparse(req_url)
|
|
170
|
+
|
|
171
|
+
# 收集域名
|
|
172
|
+
if parsed.netloc and parsed.netloc != target_domain:
|
|
173
|
+
all_domains.add(parsed.netloc)
|
|
174
|
+
|
|
175
|
+
# 收集完整URL
|
|
176
|
+
all_urls.add(req_url)
|
|
177
|
+
|
|
178
|
+
# 提取IP
|
|
179
|
+
if extract_ip_addresses:
|
|
180
|
+
ips = extract_ip_addresses_from_string(req_url)
|
|
181
|
+
all_ips.update(ips)
|
|
182
|
+
|
|
183
|
+
# 从响应头中提取
|
|
184
|
+
for resp in all_responses:
|
|
185
|
+
headers_str = json.dumps(resp.get('headers', {}))
|
|
186
|
+
|
|
187
|
+
if extract_external_urls:
|
|
188
|
+
# 从header中提取URL
|
|
189
|
+
url_in_headers = re.findall(r'https?://[^\s"\'<>]+', headers_str)
|
|
190
|
+
all_urls.update(url_in_headers)
|
|
191
|
+
|
|
192
|
+
# 提取域名
|
|
193
|
+
for u in url_in_headers:
|
|
194
|
+
p = urlparse(u)
|
|
195
|
+
if p.netloc:
|
|
196
|
+
all_domains.add(p.netloc)
|
|
197
|
+
|
|
198
|
+
if extract_ip_addresses:
|
|
199
|
+
ips = extract_ip_addresses_from_string(headers_str)
|
|
200
|
+
all_ips.update(ips)
|
|
201
|
+
|
|
202
|
+
result['sensitive_urls'] = list(all_urls)
|
|
203
|
+
result['ip_addresses'] = list(all_ips)
|
|
204
|
+
result['domains'] = list(all_domains)
|
|
205
|
+
|
|
206
|
+
# 模拟交互(增强版:自动尝试登录触发API)
|
|
207
|
+
if interact:
|
|
208
|
+
try:
|
|
209
|
+
# 1. 查找登录表单
|
|
210
|
+
inputs = page.query_selector_all('input')
|
|
211
|
+
for inp in inputs[:10]:
|
|
212
|
+
try:
|
|
213
|
+
inp_type = inp.get_attribute('type')
|
|
214
|
+
inp_name = inp.get_attribute('name')
|
|
215
|
+
inp_id = inp.get_attribute('id')
|
|
216
|
+
|
|
217
|
+
# 填写用户名
|
|
218
|
+
if inp_type == 'text' or inp_name in ['username', 'user', 'account', 'uname'] or inp_id in ['username', 'user']:
|
|
219
|
+
inp.fill('admin')
|
|
220
|
+
# 填写密码
|
|
221
|
+
elif inp_type == 'password':
|
|
222
|
+
inp.fill('admin123')
|
|
223
|
+
except:
|
|
224
|
+
pass
|
|
225
|
+
|
|
226
|
+
# 2. 查找登录按钮并点击
|
|
227
|
+
buttons = page.query_selector_all('button')
|
|
228
|
+
for btn in buttons[:5]:
|
|
229
|
+
try:
|
|
230
|
+
btn_text = btn.inner_text()
|
|
231
|
+
if any(k in btn_text.lower() for k in ['login', '登录', 'submit', '确定']):
|
|
232
|
+
btn.click()
|
|
233
|
+
page.wait_for_timeout(2000) # 等待登录请求
|
|
234
|
+
break
|
|
235
|
+
except:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
# 3. 如果有form直接提交
|
|
239
|
+
try:
|
|
240
|
+
page.evaluate("""
|
|
241
|
+
() => {
|
|
242
|
+
const forms = document.querySelectorAll('form');
|
|
243
|
+
forms.forEach(f => {
|
|
244
|
+
if (f.querySelector('input[type="password"]')) {
|
|
245
|
+
f.submit();
|
|
246
|
+
}
|
|
247
|
+
});
|
|
248
|
+
}
|
|
249
|
+
""")
|
|
250
|
+
page.wait_for_timeout(2000)
|
|
251
|
+
except:
|
|
252
|
+
pass
|
|
253
|
+
|
|
254
|
+
# 4. 捕获登录后的API请求
|
|
255
|
+
page.wait_for_timeout(3000)
|
|
256
|
+
|
|
257
|
+
except:
|
|
258
|
+
pass
|
|
259
|
+
|
|
260
|
+
# 采集localStorage
|
|
261
|
+
try:
|
|
262
|
+
ls = page.evaluate("""
|
|
263
|
+
() => {
|
|
264
|
+
const data = {};
|
|
265
|
+
try {
|
|
266
|
+
for (let i = 0; i < localStorage.length; i++) {
|
|
267
|
+
const key = localStorage.key(i);
|
|
268
|
+
data[key] = localStorage.getItem(key);
|
|
269
|
+
}
|
|
270
|
+
} catch (e) {}
|
|
271
|
+
return data;
|
|
272
|
+
}
|
|
273
|
+
""")
|
|
274
|
+
result['storage']['localStorage'] = ls
|
|
275
|
+
|
|
276
|
+
# 【新增】从localStorage中提取敏感信息
|
|
277
|
+
if ls:
|
|
278
|
+
for key, value in ls.items():
|
|
279
|
+
if any(k in key.lower() for k in ['token', 'key', 'secret', 'auth']):
|
|
280
|
+
result['sensitive_urls'].append(f"localStorage:{key}")
|
|
281
|
+
# 提取URL
|
|
282
|
+
urls = extract_urls_from_string(str(value))
|
|
283
|
+
result['sensitive_urls'].extend(urls)
|
|
284
|
+
# 提取IP
|
|
285
|
+
ips = extract_ip_addresses_from_string(str(value))
|
|
286
|
+
result['ip_addresses'].extend(ips)
|
|
287
|
+
|
|
288
|
+
except:
|
|
289
|
+
pass
|
|
290
|
+
|
|
291
|
+
# 采集Cookie
|
|
292
|
+
try:
|
|
293
|
+
cookies = context.cookies()
|
|
294
|
+
result['storage']['cookies'] = [
|
|
295
|
+
{'name': c['name'], 'value': c['value'][:50]}
|
|
296
|
+
for c in cookies
|
|
297
|
+
]
|
|
298
|
+
except:
|
|
299
|
+
pass
|
|
300
|
+
|
|
301
|
+
# 采集表单
|
|
302
|
+
try:
|
|
303
|
+
forms = page.evaluate("""
|
|
304
|
+
() => {
|
|
305
|
+
const forms = [];
|
|
306
|
+
document.querySelectorAll('form').forEach(f => {
|
|
307
|
+
const formData = {
|
|
308
|
+
action: f.action,
|
|
309
|
+
method: f.method,
|
|
310
|
+
inputs: []
|
|
311
|
+
};
|
|
312
|
+
f.querySelectorAll('input').forEach(inp => {
|
|
313
|
+
formData.inputs.push({
|
|
314
|
+
name: inp.name,
|
|
315
|
+
type: inp.type,
|
|
316
|
+
id: inp.id
|
|
317
|
+
});
|
|
318
|
+
});
|
|
319
|
+
forms.push(formData);
|
|
320
|
+
});
|
|
321
|
+
return forms;
|
|
322
|
+
}
|
|
323
|
+
""")
|
|
324
|
+
result['forms'] = forms
|
|
325
|
+
except:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
# 采集页面标题
|
|
329
|
+
try:
|
|
330
|
+
result['page_title'] = page.title()
|
|
331
|
+
except:
|
|
332
|
+
pass
|
|
333
|
+
|
|
334
|
+
# 采集API请求
|
|
335
|
+
result['apis'] = captured_apis
|
|
336
|
+
|
|
337
|
+
# 【新增】登录即测:发现login请求时立即分析
|
|
338
|
+
login_test = analyze_login_requests(captured_apis, url)
|
|
339
|
+
if login_test:
|
|
340
|
+
result['login_test_hint'] = login_test
|
|
341
|
+
|
|
342
|
+
browser.close()
|
|
343
|
+
|
|
344
|
+
except Exception as e:
|
|
345
|
+
result['error'] = str(e)
|
|
346
|
+
|
|
347
|
+
return result
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def analyze_login_requests(captured_apis, target_url):
|
|
351
|
+
"""
|
|
352
|
+
【新增】分析捕获到的登录请求,返回测试提示
|
|
353
|
+
|
|
354
|
+
发现login请求时,返回测试建议
|
|
355
|
+
"""
|
|
356
|
+
login_keywords = ['login', 'signin', 'auth', 'token', 'pwd', 'password']
|
|
357
|
+
|
|
358
|
+
for api in captured_apis:
|
|
359
|
+
url = api.get('url', '')
|
|
360
|
+
method = api.get('method', 'GET')
|
|
361
|
+
post_data = api.get('post_data', '')
|
|
362
|
+
|
|
363
|
+
# 检查是否是登录相关请求
|
|
364
|
+
is_login = any(k in url.lower() for k in login_keywords)
|
|
365
|
+
if post_data and any(k in str(post_data).lower() for k in login_keywords):
|
|
366
|
+
is_login = True
|
|
367
|
+
|
|
368
|
+
if is_login:
|
|
369
|
+
# 构建测试提示
|
|
370
|
+
test_hints = []
|
|
371
|
+
|
|
372
|
+
# GET请求
|
|
373
|
+
if method == 'GET' and 'password' in url:
|
|
374
|
+
test_hints.append({
|
|
375
|
+
'type': 'GET_login_with_password_in_url',
|
|
376
|
+
'url': url,
|
|
377
|
+
'risk': 'HIGH',
|
|
378
|
+
'description': '密码可能暴露在URL中'
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
# POST请求
|
|
382
|
+
if method == 'POST' and post_data:
|
|
383
|
+
test_hints.append({
|
|
384
|
+
'type': 'POST_login_test',
|
|
385
|
+
'url': url,
|
|
386
|
+
'method': 'POST',
|
|
387
|
+
'body': post_data,
|
|
388
|
+
'risk': 'MEDIUM',
|
|
389
|
+
'description': '立即测试SQL注入、弱密码'
|
|
390
|
+
})
|
|
391
|
+
|
|
392
|
+
# SQL注入测试payload
|
|
393
|
+
sql_payloads = [
|
|
394
|
+
{"username": "admin'--", "password": "any"},
|
|
395
|
+
{"username": "admin' OR '1'='1", "password": "any"},
|
|
396
|
+
]
|
|
397
|
+
test_hints[0]['sql_payloads'] = sql_payloads
|
|
398
|
+
|
|
399
|
+
return {
|
|
400
|
+
'found_login': True,
|
|
401
|
+
'url': url,
|
|
402
|
+
'method': method,
|
|
403
|
+
'test_hints': test_hints
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return None
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def extract_urls_from_html(html_content, target_domain):
|
|
410
|
+
"""从HTML内容中提取所有URL"""
|
|
411
|
+
urls = set()
|
|
412
|
+
|
|
413
|
+
# href属性
|
|
414
|
+
hrefs = re.findall(r'href=["\']([^"\']+)["\']', html_content)
|
|
415
|
+
for href in hrefs:
|
|
416
|
+
if href.startswith('http'):
|
|
417
|
+
parsed = urlparse(href)
|
|
418
|
+
if parsed.netloc != target_domain:
|
|
419
|
+
urls.add(href)
|
|
420
|
+
elif href.startswith('/') or href.startswith('./'):
|
|
421
|
+
urls.add(href)
|
|
422
|
+
|
|
423
|
+
# src属性
|
|
424
|
+
srcs = re.findall(r'src=["\']([^"\']+)["\']', html_content)
|
|
425
|
+
for src in srcs:
|
|
426
|
+
if src.startswith('http'):
|
|
427
|
+
urls.add(src)
|
|
428
|
+
|
|
429
|
+
# URL模板
|
|
430
|
+
url_templates = re.findall(r'["\'](https?://[^"\']+)["\']', html_content)
|
|
431
|
+
urls.update(url_templates)
|
|
432
|
+
|
|
433
|
+
return list(urls)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def extract_urls_from_string(content):
|
|
437
|
+
"""从字符串中提取URL"""
|
|
438
|
+
urls = set()
|
|
439
|
+
|
|
440
|
+
# HTTP/HTTPS URL
|
|
441
|
+
http_urls = re.findall(r'https?://[^\s"\'<>]+', content)
|
|
442
|
+
urls.update(http_urls)
|
|
443
|
+
|
|
444
|
+
return list(urls)
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def extract_ip_addresses_from_string(content):
|
|
448
|
+
"""从字符串中提取IP地址"""
|
|
449
|
+
ips = set()
|
|
450
|
+
|
|
451
|
+
# IPv4地址
|
|
452
|
+
ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
|
453
|
+
ipv4_matches = re.findall(ipv4_pattern, content)
|
|
454
|
+
ips.update(ipv4_matches)
|
|
455
|
+
|
|
456
|
+
return list(ips)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def extract_apis_from_browser(result):
|
|
460
|
+
"""从浏览器采集结果中提取API"""
|
|
461
|
+
apis = result.get('apis', [])
|
|
462
|
+
|
|
463
|
+
# 去重
|
|
464
|
+
unique_apis = {}
|
|
465
|
+
for api in apis:
|
|
466
|
+
api_url = api['url']
|
|
467
|
+
if api_url not in unique_apis:
|
|
468
|
+
unique_apis[api_url] = api
|
|
469
|
+
|
|
470
|
+
return list(unique_apis.values())
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
def extract_js_api_patterns(js_content):
|
|
474
|
+
"""
|
|
475
|
+
从JS内容中提取API端点模式和配置
|
|
476
|
+
|
|
477
|
+
【使用AST+正则双模式】
|
|
478
|
+
|
|
479
|
+
返回:
|
|
480
|
+
base_url: string - 发现的baseURL配置
|
|
481
|
+
api_paths: Array<string> - 发现的API路径
|
|
482
|
+
env_vars: object - 发现的环境变量
|
|
483
|
+
sensitive_urls: Array<string> - 发现的敏感URL
|
|
484
|
+
ip_addresses: Array<string> - 发现的IP地址
|
|
485
|
+
"""
|
|
486
|
+
base_url = None
|
|
487
|
+
api_paths = set()
|
|
488
|
+
env_vars = {}
|
|
489
|
+
sensitive_urls = set()
|
|
490
|
+
ip_addresses = set()
|
|
491
|
+
|
|
492
|
+
# baseURL配置
|
|
493
|
+
baseurl_patterns = [
|
|
494
|
+
r'baseURL\s*[:=]\s*["\']([^"\']+)["\']',
|
|
495
|
+
r'axios\.create\s*\(\s*\{[^}]*baseURL\s*[:=]\s*["\']([^"\']+)["\']',
|
|
496
|
+
]
|
|
497
|
+
for pattern in baseurl_patterns:
|
|
498
|
+
match = re.search(pattern, js_content)
|
|
499
|
+
if match:
|
|
500
|
+
base_url = match.group(1)
|
|
501
|
+
break
|
|
502
|
+
|
|
503
|
+
# API路径
|
|
504
|
+
api_patterns = [
|
|
505
|
+
r'["\'](/(?:user|auth|admin|login|logout|api|v\d|frame|hszh|table|dashboard|supplement|attach|code|module|file)[a-zA-Z0-9_/?=&-]*)["\']',
|
|
506
|
+
r'axios\.[a-z]+\(["\']([^"\']+)["\']',
|
|
507
|
+
r'fetch\(["\']([^"\']+)["\']',
|
|
508
|
+
r'\.get\(["\']([^"\']+)["\']',
|
|
509
|
+
r'\.post\(["\']([^"\']+)["\']',
|
|
510
|
+
]
|
|
511
|
+
for pattern in api_patterns:
|
|
512
|
+
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
513
|
+
for m in matches:
|
|
514
|
+
if isinstance(m, str) and len(m) > 2 and len(m) < 200:
|
|
515
|
+
api_paths.add(m)
|
|
516
|
+
|
|
517
|
+
# 环境变量
|
|
518
|
+
env_patterns = [
|
|
519
|
+
r'(VUE_APP_\w+)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
520
|
+
r'process\.env\.(\w+)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
521
|
+
]
|
|
522
|
+
for pattern in env_patterns:
|
|
523
|
+
matches = re.findall(pattern, js_content)
|
|
524
|
+
for var_name, var_value in matches:
|
|
525
|
+
env_vars[var_name] = var_value
|
|
526
|
+
# 检查环境变量中是否包含敏感URL或IP
|
|
527
|
+
sensitive_urls.update(extract_urls_from_string(var_value))
|
|
528
|
+
ip_addresses.update(extract_ip_addresses_from_string(var_value))
|
|
529
|
+
|
|
530
|
+
# 【新增】从JS中提取敏感信息
|
|
531
|
+
# 密钥/凭证模式
|
|
532
|
+
credential_patterns = [
|
|
533
|
+
r'["\']((?:api[_-]?key|secret[_-]?key|access[_-]?token|private[_-]?key)["\']\s*[:=]\s*["\']([^"\']+)["\']',
|
|
534
|
+
r'(?:password|passwd|pwd)\s*[:=]\s*["\']([^"\']+)["\']',
|
|
535
|
+
r'["\']https?://[^"\']*[:@][^"\']+@[^"\']+["\']', # URL with credentials
|
|
536
|
+
]
|
|
537
|
+
for pattern in credential_patterns:
|
|
538
|
+
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
539
|
+
sensitive_urls.update(matches)
|
|
540
|
+
|
|
541
|
+
# 【新增】提取IP
|
|
542
|
+
ip_addresses.update(extract_ip_addresses_from_string(js_content))
|
|
543
|
+
|
|
544
|
+
# 【新增】提取外部URL
|
|
545
|
+
sensitive_urls.update(extract_urls_from_string(js_content))
|
|
546
|
+
|
|
547
|
+
return {
|
|
548
|
+
'base_url': base_url,
|
|
549
|
+
'api_paths': list(api_paths),
|
|
550
|
+
'env_vars': env_vars,
|
|
551
|
+
'sensitive_urls': list(sensitive_urls),
|
|
552
|
+
'ip_addresses': list(ip_addresses)
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
|
|
556
|
+
# 【新增】AST模式解析(使用esprima)
|
|
557
|
+
def extract_with_ast(js_content):
|
|
558
|
+
"""
|
|
559
|
+
使用AST(esprima)深度解析JS代码
|
|
560
|
+
|
|
561
|
+
需要先安装: pip install esprima
|
|
562
|
+
|
|
563
|
+
返回:
|
|
564
|
+
ast_info: dict - AST解析结果
|
|
565
|
+
"""
|
|
566
|
+
try:
|
|
567
|
+
import esprima
|
|
568
|
+
|
|
569
|
+
# 解析JS为AST
|
|
570
|
+
ast = esprima.parse(js_content, sourceType='script', range=True)
|
|
571
|
+
|
|
572
|
+
result = {
|
|
573
|
+
'string_literals': [],
|
|
574
|
+
'object_properties': {},
|
|
575
|
+
'function_calls': [],
|
|
576
|
+
'import_sources': []
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
# 遍历AST提取信息
|
|
580
|
+
def traverse(node, depth=0):
|
|
581
|
+
if depth > 20: # 防止过深递归
|
|
582
|
+
return
|
|
583
|
+
|
|
584
|
+
if hasattr(node, 'type'):
|
|
585
|
+
# 字符串字面量
|
|
586
|
+
if node.type == 'Literal' and isinstance(node.value, str):
|
|
587
|
+
result['string_literals'].append(node.value)
|
|
588
|
+
|
|
589
|
+
# 对象属性
|
|
590
|
+
elif node.type == 'Property':
|
|
591
|
+
key = getattr(node, 'key', None)
|
|
592
|
+
value = getattr(node, 'value', None)
|
|
593
|
+
if key and hasattr(key, 'value'):
|
|
594
|
+
result['object_properties'][key.value] = getattr(value, 'value', None)
|
|
595
|
+
|
|
596
|
+
# 函数调用
|
|
597
|
+
elif node.type == 'CallExpression':
|
|
598
|
+
callee = getattr(node, 'callee', None)
|
|
599
|
+
if callee and hasattr(callee, 'name'):
|
|
600
|
+
result['function_calls'].append(callee.name)
|
|
601
|
+
|
|
602
|
+
# Import声明
|
|
603
|
+
elif node.type == 'ImportDeclaration':
|
|
604
|
+
source = getattr(node, 'source', None)
|
|
605
|
+
if source and hasattr(source, 'value'):
|
|
606
|
+
result['import_sources'].append(source.value)
|
|
607
|
+
|
|
608
|
+
# 递归遍历子节点
|
|
609
|
+
for child in node.__dict__.values():
|
|
610
|
+
if isinstance(child, list):
|
|
611
|
+
for item in child:
|
|
612
|
+
if hasattr(item, 'type'):
|
|
613
|
+
traverse(item, depth + 1)
|
|
614
|
+
elif hasattr(child, 'type'):
|
|
615
|
+
traverse(child, depth + 1)
|
|
616
|
+
|
|
617
|
+
traverse(ast.body)
|
|
618
|
+
|
|
619
|
+
# 去重
|
|
620
|
+
result['string_literals'] = list(set(result['string_literals']))
|
|
621
|
+
result['function_calls'] = list(set(result['function_calls']))
|
|
622
|
+
result['import_sources'] = list(set(result['import_sources']))
|
|
623
|
+
|
|
624
|
+
return result
|
|
625
|
+
|
|
626
|
+
except ImportError:
|
|
627
|
+
return {'error': 'esprima not installed, use regex fallback'}
|
|
628
|
+
except Exception as e:
|
|
629
|
+
return {'error': str(e)}
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
if __name__ == '__main__':
|
|
633
|
+
# 测试
|
|
634
|
+
result = browser_collect({
|
|
635
|
+
'url': 'https://example.com',
|
|
636
|
+
'wait_until': 'networkidle',
|
|
637
|
+
'interact': True
|
|
638
|
+
})
|
|
639
|
+
print(f"APIs: {len(result.get('apis', []))}")
|
|
640
|
+
print(f"JS Files: {len(result.get('js_files', []))}")
|
|
641
|
+
print(f"Tech Stack: {result.get('tech_stack', [])}")
|
|
642
|
+
print(f"Sensitive URLs: {len(result.get('sensitive_urls', []))}")
|
|
643
|
+
print(f"IP Addresses: {len(result.get('ip_addresses', []))}")
|
|
644
|
+
print(f"Domains: {len(result.get('domains', []))}")
|
|
645
|
+
print(f"Storage: {len(result.get('storage', {}))}")
|