opencode-api-security-testing 3.0.10 → 3.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -0
- package/SKILL.md +1797 -0
- package/core/advanced_recon.py +788 -0
- package/core/agentic_analyzer.py +445 -0
- package/core/analyzers/api_parser.py +210 -0
- package/core/analyzers/response_analyzer.py +212 -0
- package/core/analyzers/sensitive_finder.py +184 -0
- package/core/api_fuzzer.py +422 -0
- package/core/api_interceptor.py +525 -0
- package/core/api_parser.py +955 -0
- package/core/browser_tester.py +479 -0
- package/core/cloud_storage_tester.py +1330 -0
- package/core/collectors/__init__.py +23 -0
- package/core/collectors/api_path_finder.py +300 -0
- package/core/collectors/browser_collect.py +645 -0
- package/core/collectors/browser_collector.py +411 -0
- package/core/collectors/http_client.py +111 -0
- package/core/collectors/js_collector.py +490 -0
- package/core/collectors/js_parser.py +780 -0
- package/core/collectors/url_collector.py +319 -0
- package/core/context_manager.py +682 -0
- package/core/deep_api_tester_v35.py +844 -0
- package/core/deep_api_tester_v55.py +366 -0
- package/core/dynamic_api_analyzer.py +532 -0
- package/core/http_client.py +179 -0
- package/core/models.py +296 -0
- package/core/orchestrator.py +890 -0
- package/core/prerequisite.py +227 -0
- package/core/reasoning_engine.py +1042 -0
- package/core/response_classifier.py +606 -0
- package/core/runner.py +938 -0
- package/core/scan_engine.py +599 -0
- package/core/skill_executor.py +435 -0
- package/core/skill_executor_v2.py +670 -0
- package/core/skill_executor_v3.py +704 -0
- package/core/smart_analyzer.py +687 -0
- package/core/strategy_pool.py +707 -0
- package/core/testers/auth_tester.py +264 -0
- package/core/testers/idor_tester.py +200 -0
- package/core/testers/sqli_tester.py +211 -0
- package/core/testing_loop.py +655 -0
- package/core/utils/base_path_dict.py +255 -0
- package/core/utils/payload_lib.py +167 -0
- package/core/utils/ssrf_detector.py +220 -0
- package/core/verifiers/vuln_verifier.py +536 -0
- package/package.json +1 -1
- package/references/README.md +72 -0
- package/references/asset-discovery.md +119 -0
- package/references/fuzzing-patterns.md +129 -0
- package/references/graphql-guidance.md +108 -0
- package/references/intake.md +84 -0
- package/references/pua-agent.md +192 -0
- package/references/report-template.md +156 -0
- package/references/rest-guidance.md +76 -0
- package/references/severity-model.md +76 -0
- package/references/test-matrix.md +86 -0
- package/references/validation.md +78 -0
- package/references/vulnerabilities/01-sqli-tests.md +1128 -0
- package/references/vulnerabilities/02-user-enum-tests.md +423 -0
- package/references/vulnerabilities/03-jwt-tests.md +499 -0
- package/references/vulnerabilities/04-idor-tests.md +362 -0
- package/references/vulnerabilities/05-sensitive-data-tests.md +466 -0
- package/references/vulnerabilities/06-biz-logic-tests.md +501 -0
- package/references/vulnerabilities/07-security-config-tests.md +511 -0
- package/references/vulnerabilities/08-brute-force-tests.md +457 -0
- package/references/vulnerabilities/09-vulnerability-chains.md +465 -0
- package/references/vulnerabilities/10-auth-tests.md +537 -0
- package/references/vulnerabilities/11-graphql-tests.md +355 -0
- package/references/vulnerabilities/12-ssrf-tests.md +396 -0
- package/references/vulnerabilities/README.md +148 -0
- package/references/workflows.md +192 -0
|
@@ -0,0 +1,780 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JS源码解析 - 从HTML/JS中提取API配置
|
|
3
|
+
|
|
4
|
+
【重要】使用多模式解析:
|
|
5
|
+
1. 正则模式:快速提取API路径、baseURL、凭证等
|
|
6
|
+
2. Agent模式:下载JS供Agent解析(用于混淆JS)
|
|
7
|
+
|
|
8
|
+
输入: {html, js_urls, base_url}
|
|
9
|
+
输出: {
|
|
10
|
+
api_patterns: API路径,
|
|
11
|
+
base_urls: baseURL配置,
|
|
12
|
+
tokens: token,
|
|
13
|
+
endpoints: 完整端点,
|
|
14
|
+
sensitive_urls: 敏感URL,
|
|
15
|
+
ip_addresses: IP地址,
|
|
16
|
+
domains: 相关域名,
|
|
17
|
+
credentials: 发现的凭证,
|
|
18
|
+
js_for_agent: [JS文件路径列表,供Agent解析]
|
|
19
|
+
}
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import re
|
|
23
|
+
import requests
|
|
24
|
+
from urllib.parse import urljoin, urlparse
|
|
25
|
+
|
|
26
|
+
requests.packages.urllib3.disable_warnings()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def js_parser(config):
|
|
30
|
+
"""
|
|
31
|
+
解析JS文件提取API配置(AST+正则双模式)
|
|
32
|
+
|
|
33
|
+
输入:
|
|
34
|
+
html: string - 页面HTML
|
|
35
|
+
js_urls?: string[] - JS URL列表
|
|
36
|
+
base_url: string - 基准URL
|
|
37
|
+
use_ast?: boolean - 是否使用AST解析(默认True)
|
|
38
|
+
|
|
39
|
+
输出:
|
|
40
|
+
api_patterns: string[] - API路径
|
|
41
|
+
base_urls: string[] - API Base URL
|
|
42
|
+
tokens: string[] - 可能的token
|
|
43
|
+
endpoints: string[] - 完整端点
|
|
44
|
+
sensitive_urls: string[] - 敏感URL
|
|
45
|
+
ip_addresses: string[] - IP地址
|
|
46
|
+
domains: string[] - 相关域名
|
|
47
|
+
credentials: object - 发现的凭证
|
|
48
|
+
"""
|
|
49
|
+
html = config.get('html', '')
|
|
50
|
+
js_urls = config.get('js_urls', [])
|
|
51
|
+
base_url = config.get('base_url', '')
|
|
52
|
+
use_ast = config.get('use_ast', True)
|
|
53
|
+
|
|
54
|
+
result = {
|
|
55
|
+
'api_patterns': [],
|
|
56
|
+
'base_urls': [],
|
|
57
|
+
'tokens': [],
|
|
58
|
+
'endpoints': [],
|
|
59
|
+
'sensitive_urls': [],
|
|
60
|
+
'ip_addresses': [],
|
|
61
|
+
'domains': [],
|
|
62
|
+
'credentials': {}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# 从HTML中提取JS URL
|
|
66
|
+
if not js_urls:
|
|
67
|
+
js_urls = extract_js_urls(html)
|
|
68
|
+
|
|
69
|
+
# 提取Base URL配置
|
|
70
|
+
base_urls = extract_base_urls(html)
|
|
71
|
+
result['base_urls'] = base_urls
|
|
72
|
+
|
|
73
|
+
# 提取API路径模式
|
|
74
|
+
api_patterns = extract_api_patterns(html)
|
|
75
|
+
result['api_patterns'] = api_patterns
|
|
76
|
+
|
|
77
|
+
# 从HTML中提取敏感URL和IP
|
|
78
|
+
html_sensitive = extract_sensitive_from_string(html)
|
|
79
|
+
result['sensitive_urls'].extend(html_sensitive.get('urls', []))
|
|
80
|
+
result['ip_addresses'].extend(html_sensitive.get('ips', []))
|
|
81
|
+
result['domains'].extend(html_sensitive.get('domains', []))
|
|
82
|
+
|
|
83
|
+
# 分析JS文件
|
|
84
|
+
for js_url in js_urls[:15]: # 增加分析数量
|
|
85
|
+
full_url = resolve_js_url(js_url, base_url)
|
|
86
|
+
if not full_url:
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
js_content = fetch_js_content(full_url)
|
|
91
|
+
if not js_content:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
# 【新增】AST模式解析
|
|
95
|
+
if use_ast:
|
|
96
|
+
ast_result = extract_with_ast(js_content)
|
|
97
|
+
if 'error' not in ast_result:
|
|
98
|
+
# 从AST字符串字面量中提取API
|
|
99
|
+
for literal in ast_result.get('string_literals', []):
|
|
100
|
+
if is_api_path(literal):
|
|
101
|
+
result['api_patterns'].append(literal)
|
|
102
|
+
# 提取URL
|
|
103
|
+
urls = extract_urls_from_string(literal)
|
|
104
|
+
result['sensitive_urls'].extend(urls)
|
|
105
|
+
# 提取IP
|
|
106
|
+
ips = extract_ip_from_string(literal)
|
|
107
|
+
result['ip_addresses'].extend(ips)
|
|
108
|
+
|
|
109
|
+
# 正则模式提取API路径
|
|
110
|
+
js_api_patterns = extract_api_patterns(js_content)
|
|
111
|
+
result['api_patterns'].extend(js_api_patterns)
|
|
112
|
+
|
|
113
|
+
# 提取Base URL
|
|
114
|
+
js_base_urls = extract_base_urls(js_content)
|
|
115
|
+
result['base_urls'].extend(js_base_urls)
|
|
116
|
+
|
|
117
|
+
# 提取Token
|
|
118
|
+
js_tokens = extract_tokens(js_content)
|
|
119
|
+
result['tokens'].extend(js_tokens)
|
|
120
|
+
|
|
121
|
+
# 【新增】提取敏感信息
|
|
122
|
+
sensitive = extract_sensitive_from_string(js_content)
|
|
123
|
+
result['sensitive_urls'].extend(sensitive.get('urls', []))
|
|
124
|
+
result['ip_addresses'].extend(sensitive.get('ips', []))
|
|
125
|
+
result['domains'].extend(sensitive.get('domains', []))
|
|
126
|
+
if sensitive.get('credentials'):
|
|
127
|
+
result['credentials'].update(sensitive['credentials'])
|
|
128
|
+
|
|
129
|
+
except:
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
# 去重
|
|
133
|
+
result['api_patterns'] = list(set(result['api_patterns']))
|
|
134
|
+
result['base_urls'] = list(set(result['base_urls']))
|
|
135
|
+
result['tokens'] = list(set(result['tokens']))
|
|
136
|
+
result['sensitive_urls'] = list(set(result['sensitive_urls']))
|
|
137
|
+
result['ip_addresses'] = list(set(result['ip_addresses']))
|
|
138
|
+
result['domains'] = list(set(result['domains']))
|
|
139
|
+
|
|
140
|
+
# 生成完整端点
|
|
141
|
+
for base in result['base_urls']:
|
|
142
|
+
for pattern in result['api_patterns']:
|
|
143
|
+
if pattern.startswith('/'):
|
|
144
|
+
endpoint = base.rstrip('/') + pattern
|
|
145
|
+
else:
|
|
146
|
+
endpoint = base + '/' + pattern
|
|
147
|
+
result['endpoints'].append(endpoint)
|
|
148
|
+
|
|
149
|
+
result['endpoints'] = list(set(result['endpoints']))
|
|
150
|
+
|
|
151
|
+
return result
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def extract_with_ast(js_content):
|
|
155
|
+
"""
|
|
156
|
+
使用AST(esprima)深度解析JS代码
|
|
157
|
+
|
|
158
|
+
【重要】需要先安装esprima: pip install esprima
|
|
159
|
+
|
|
160
|
+
返回:
|
|
161
|
+
{
|
|
162
|
+
string_literals: 所有字符串字面量,
|
|
163
|
+
object_properties: 对象属性,
|
|
164
|
+
function_calls: 函数调用,
|
|
165
|
+
import_sources: import来源
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
依赖:
|
|
169
|
+
pip install esprima
|
|
170
|
+
"""
|
|
171
|
+
try:
|
|
172
|
+
import esprima
|
|
173
|
+
except ImportError:
|
|
174
|
+
return {
|
|
175
|
+
'error': 'esprima not installed. Run: pip install esprima',
|
|
176
|
+
'fix_command': 'pip install esprima'
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# 解析JS为AST(带位置信息)
|
|
181
|
+
ast = esprima.parse(js_content, sourceType='script', range=True)
|
|
182
|
+
|
|
183
|
+
result = {
|
|
184
|
+
'string_literals': [],
|
|
185
|
+
'object_properties': {},
|
|
186
|
+
'function_calls': [],
|
|
187
|
+
'import_sources': []
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
def traverse(node, depth=0):
|
|
191
|
+
if depth > 30: # 防止过深递归
|
|
192
|
+
return
|
|
193
|
+
|
|
194
|
+
if hasattr(node, 'type'):
|
|
195
|
+
# 字符串字面量
|
|
196
|
+
if node.type == 'Literal' and isinstance(node.value, str):
|
|
197
|
+
result['string_literals'].append(node.value)
|
|
198
|
+
|
|
199
|
+
# 对象属性(键值对)
|
|
200
|
+
elif node.type == 'Property':
|
|
201
|
+
key_node = getattr(node, 'key', None)
|
|
202
|
+
value_node = getattr(node, 'value', None)
|
|
203
|
+
if key_node and hasattr(key_node, 'value'):
|
|
204
|
+
key = key_node.value
|
|
205
|
+
value = getattr(value_node, 'value', None) if value_node else None
|
|
206
|
+
if value and isinstance(value, str):
|
|
207
|
+
result['object_properties'][key] = value
|
|
208
|
+
|
|
209
|
+
# 函数调用
|
|
210
|
+
elif node.type == 'CallExpression':
|
|
211
|
+
callee = getattr(node, 'callee', None)
|
|
212
|
+
if callee:
|
|
213
|
+
if hasattr(callee, 'name'):
|
|
214
|
+
result['function_calls'].append(callee.name)
|
|
215
|
+
elif hasattr(callee, 'value'):
|
|
216
|
+
result['function_calls'].append(callee.value)
|
|
217
|
+
|
|
218
|
+
# Import声明
|
|
219
|
+
elif node.type == 'ImportDeclaration':
|
|
220
|
+
source = getattr(node, 'source', None)
|
|
221
|
+
if source and hasattr(source, 'value'):
|
|
222
|
+
result['import_sources'].append(source.value)
|
|
223
|
+
|
|
224
|
+
# 递归遍历子节点
|
|
225
|
+
for child in node.__dict__.values():
|
|
226
|
+
if isinstance(child, list):
|
|
227
|
+
for item in child:
|
|
228
|
+
if hasattr(item, 'type'):
|
|
229
|
+
traverse(item, depth + 1)
|
|
230
|
+
elif hasattr(child, 'type'):
|
|
231
|
+
traverse(child, depth + 1)
|
|
232
|
+
|
|
233
|
+
traverse(ast.body)
|
|
234
|
+
|
|
235
|
+
# 去重
|
|
236
|
+
result['string_literals'] = list(set(result['string_literals']))
|
|
237
|
+
result['function_calls'] = list(set(result['function_calls']))
|
|
238
|
+
result['import_sources'] = list(set(result['import_sources']))
|
|
239
|
+
|
|
240
|
+
return result
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
return {
|
|
244
|
+
'error': f'AST parse failed: {str(e)[:100]}',
|
|
245
|
+
'fix_command': 'pip install --upgrade esprima',
|
|
246
|
+
'fallback_available': True
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def extract_simplified(content):
|
|
251
|
+
"""
|
|
252
|
+
【新增】简化的字符串提取(AST失败时的fallback)
|
|
253
|
+
|
|
254
|
+
使用简单的正则避免复杂模式报错
|
|
255
|
+
"""
|
|
256
|
+
result = {
|
|
257
|
+
'string_literals': [],
|
|
258
|
+
'api_paths': [],
|
|
259
|
+
'error': 'fallback_mode'
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# 简化:提取所有双引号字符串
|
|
263
|
+
try:
|
|
264
|
+
double_quoted = re.findall(r'"([^"]{3,150})"', content)
|
|
265
|
+
result['string_literals'].extend(double_quoted)
|
|
266
|
+
except:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
# 简化:提取所有单引号字符串
|
|
271
|
+
single_quoted = re.findall(r"'([^']{3,150})'", content)
|
|
272
|
+
result['string_literals'].extend(single_quoted)
|
|
273
|
+
except:
|
|
274
|
+
pass
|
|
275
|
+
|
|
276
|
+
# 筛选API路径
|
|
277
|
+
api_keywords = ['user', 'auth', 'login', 'logout', 'api', 'frame', 'admin', 'info', 'list', 'supplement', 'dashboard', 'module', 'code', 'attach', 'v1', 'v2', 'v3']
|
|
278
|
+
for s in result['string_literals']:
|
|
279
|
+
if any(k in s.lower() for k in api_keywords):
|
|
280
|
+
if s.startswith('/') or 'axios' in s.lower() or 'fetch' in s.lower():
|
|
281
|
+
result['api_paths'].append(s)
|
|
282
|
+
|
|
283
|
+
return result
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def extract_sensitive_from_string(content):
|
|
287
|
+
"""
|
|
288
|
+
从字符串中提取敏感信息
|
|
289
|
+
|
|
290
|
+
返回:
|
|
291
|
+
{
|
|
292
|
+
urls: 发现的URL,
|
|
293
|
+
ips: 发现的IP,
|
|
294
|
+
domains: 发现的域名,
|
|
295
|
+
credentials: 发现的凭证
|
|
296
|
+
}
|
|
297
|
+
"""
|
|
298
|
+
result = {
|
|
299
|
+
'urls': set(),
|
|
300
|
+
'ips': set(),
|
|
301
|
+
'domains': set(),
|
|
302
|
+
'credentials': {}
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# 提取HTTP/HTTPS URL
|
|
306
|
+
urls = re.findall(r'https?://[^\s"\'<>]+', content)
|
|
307
|
+
result['urls'].update(urls)
|
|
308
|
+
|
|
309
|
+
# 提取域名
|
|
310
|
+
for url in urls:
|
|
311
|
+
parsed = urlparse(url)
|
|
312
|
+
if parsed.netloc:
|
|
313
|
+
result['domains'].add(parsed.netloc)
|
|
314
|
+
|
|
315
|
+
# 提取IPv4地址
|
|
316
|
+
ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
|
317
|
+
ips = re.findall(ipv4_pattern, content)
|
|
318
|
+
result['ips'].update(ips)
|
|
319
|
+
|
|
320
|
+
# 提取凭证
|
|
321
|
+
credential_patterns = [
|
|
322
|
+
(r'(?:api[_-]?key|API[_-]?KEY)\s*[:=]\s*["\']([^"\']+)["\']', 'api_key'),
|
|
323
|
+
(r'(?:secret[_-]?key|SECRET[_-]?KEY)\s*[:=]\s*["\']([^"\']+)["\']', 'secret_key'),
|
|
324
|
+
(r'(?:access[_-]?token|ACCESS[_-]?TOKEN)\s*[:=]\s*["\']([^"\']+)["\']', 'access_token'),
|
|
325
|
+
(r'(?:password|passwd|pwd)\s*[:=]\s*["\']([^"\']+)["\']', 'password'),
|
|
326
|
+
(r'Bearer\s+([a-zA-Z0-9\-_\.]+)', 'bearer_token'),
|
|
327
|
+
(r'Basic\s+([a-zA-Z0-9\-_\.+]+=*)', 'basic_auth'),
|
|
328
|
+
]
|
|
329
|
+
|
|
330
|
+
for pattern, cred_type in credential_patterns:
|
|
331
|
+
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
332
|
+
for match in matches:
|
|
333
|
+
if len(match) > 3 and 'undefined' not in match.lower(): # 过滤无效值
|
|
334
|
+
result['credentials'][cred_type] = match
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
'urls': list(result['urls']),
|
|
338
|
+
'ips': list(result['ips']),
|
|
339
|
+
'domains': list(result['domains']),
|
|
340
|
+
'credentials': result['credentials']
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def extract_urls_from_string(content):
|
|
345
|
+
"""从字符串中提取URL"""
|
|
346
|
+
urls = set()
|
|
347
|
+
|
|
348
|
+
http_urls = re.findall(r'https?://[^\s"\'<>]+', content)
|
|
349
|
+
urls.update(http_urls)
|
|
350
|
+
|
|
351
|
+
return list(urls)
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def extract_ip_from_string(content):
|
|
355
|
+
"""从字符串中提取IP地址"""
|
|
356
|
+
ips = set()
|
|
357
|
+
|
|
358
|
+
ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
|
|
359
|
+
matches = re.findall(ipv4_pattern, content)
|
|
360
|
+
ips.update(matches)
|
|
361
|
+
|
|
362
|
+
return list(ips)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def extract_js_urls(html):
|
|
366
|
+
"""从HTML中提取JS URL"""
|
|
367
|
+
js_urls = []
|
|
368
|
+
|
|
369
|
+
# script标签
|
|
370
|
+
src_pattern = r'<script[^>]+src=["\']([^"\']+\.js)["\']'
|
|
371
|
+
matches = re.findall(src_pattern, html, re.I)
|
|
372
|
+
js_urls.extend(matches)
|
|
373
|
+
|
|
374
|
+
# link标签 (可能包含JS)
|
|
375
|
+
href_pattern = r'<link[^>]+href=["\']([^"\']+\.js)["\']'
|
|
376
|
+
matches = re.findall(href_pattern, html, re.I)
|
|
377
|
+
js_urls.extend(matches)
|
|
378
|
+
|
|
379
|
+
return js_urls
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def extract_base_urls(content):
|
|
383
|
+
"""提取Base URL配置"""
|
|
384
|
+
base_urls = []
|
|
385
|
+
|
|
386
|
+
patterns = [
|
|
387
|
+
r'baseURL\s*[:=]\s*["\']([^"\']+)["\']',
|
|
388
|
+
r'apiBase\s*[:=]\s*["\']([^"\']+)["\']',
|
|
389
|
+
r'API_BASE\s*[:=]\s*["\']([^"\']+)["\']',
|
|
390
|
+
r'VUE_APP_API\s*[:=]\s*["\']([^"\']+)["\']',
|
|
391
|
+
r'REACT_APP_API\s*[:=]\s*["\']([^"\']+)["\']',
|
|
392
|
+
r'NEXT_PUBLIC_API\s*[:=]\s*["\']([^"\']+)["\']',
|
|
393
|
+
r'axios\.defaults\.baseURL\s*=\s*["\']([^"\']+)["\']',
|
|
394
|
+
]
|
|
395
|
+
|
|
396
|
+
for pattern in patterns:
|
|
397
|
+
matches = re.findall(pattern, content)
|
|
398
|
+
base_urls.extend(matches)
|
|
399
|
+
|
|
400
|
+
return base_urls
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def extract_api_patterns(content):
|
|
404
|
+
"""提取API路径模式"""
|
|
405
|
+
api_patterns = []
|
|
406
|
+
|
|
407
|
+
# RESTful API模式
|
|
408
|
+
patterns = [
|
|
409
|
+
r'["\'](/api/[^"\']+)["\']',
|
|
410
|
+
r'["\'](/v\d+/[^"\']+)["\']',
|
|
411
|
+
r'["\'](/api\.php/[^"\']+)["\']',
|
|
412
|
+
r'url\s*[:=]\s*["\']([^"\']*api[^"\']*)["\']',
|
|
413
|
+
r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
|
|
414
|
+
r'path\s*[:=]\s*["\']([^"\']+)["\']',
|
|
415
|
+
]
|
|
416
|
+
|
|
417
|
+
# 【重要】业务模块API模式 - 覆盖更多场景
|
|
418
|
+
business_patterns = [
|
|
419
|
+
# 用户认证类
|
|
420
|
+
r'["\'](/(?:user|auth|login|logout|oauth|supplement|userinfo)[a-zA-Z0-9_/?=&-]*)["\']',
|
|
421
|
+
# 框架管理类
|
|
422
|
+
r'["\'](/(?:frame|module|code|attach|file)[a-zA-Z0-9_/?=&-]*)["\']',
|
|
423
|
+
# Dashboard/统计类
|
|
424
|
+
r'["\'](/(?:dashboard|table|dash|board|stats|statistics)[a-zA-Z0-9_/?=&-]*)["\']',
|
|
425
|
+
# 微信相关
|
|
426
|
+
r'["\'](/(?:wx|wechat|wxapi|hszh)[a-zA-Z0-9_/?=&-]*)["\']',
|
|
427
|
+
# axios/fetch调用
|
|
428
|
+
r'axios\.[a-z]+\(["\']([^"\']+)["\']',
|
|
429
|
+
r'fetch\(["\']([^"\']+)["\']',
|
|
430
|
+
r'\.get\(["\']([^"\']+)["\']',
|
|
431
|
+
r'\.post\(["\']([^"\']+)["\']',
|
|
432
|
+
r'\.put\(["\']([^"\']+)["\']',
|
|
433
|
+
r'\.delete\(["\']([^"\']+)["\']',
|
|
434
|
+
]
|
|
435
|
+
|
|
436
|
+
for pattern in patterns + business_patterns:
|
|
437
|
+
matches = re.findall(pattern, content, re.I)
|
|
438
|
+
for match in matches:
|
|
439
|
+
if isinstance(match, str):
|
|
440
|
+
api_patterns.append(match)
|
|
441
|
+
|
|
442
|
+
# 过滤掉非API路径
|
|
443
|
+
filtered = []
|
|
444
|
+
for pattern in api_patterns:
|
|
445
|
+
if is_api_path(pattern):
|
|
446
|
+
filtered.append(pattern)
|
|
447
|
+
|
|
448
|
+
return filtered
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def extract_tokens(content):
|
|
452
|
+
"""提取可能的Token"""
|
|
453
|
+
tokens = []
|
|
454
|
+
|
|
455
|
+
patterns = [
|
|
456
|
+
r'(?:token|Token|TOKEN)\s*[:=]\s*["\']([a-zA-Z0-9\-_\.]+)["\']',
|
|
457
|
+
r'Bearer\s+([a-zA-Z0-9\-_\.]+)',
|
|
458
|
+
r'Authorization["\']?\s*[:=]\s*["\'][^"\']*([a-zA-Z0-9\-_\.]+)',
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
for pattern in patterns:
|
|
462
|
+
matches = re.findall(pattern, content, re.I)
|
|
463
|
+
tokens.extend(matches)
|
|
464
|
+
|
|
465
|
+
# 过滤掉测试token
|
|
466
|
+
filtered = []
|
|
467
|
+
for token in tokens:
|
|
468
|
+
if len(token) > 10 and 'test' not in token.lower():
|
|
469
|
+
filtered.append(token)
|
|
470
|
+
|
|
471
|
+
return filtered
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def is_api_path(path):
|
|
475
|
+
"""判断是否是API路径"""
|
|
476
|
+
if not path or len(path) < 2:
|
|
477
|
+
return False
|
|
478
|
+
|
|
479
|
+
api_indicators = [
|
|
480
|
+
'/api/', '/v1/', '/v2/', '/v3/', '/rest/',
|
|
481
|
+
'/user', '/auth', '/login', '/logout', '/oauth',
|
|
482
|
+
'/frame', '/module', '/code', '/attach', '/file',
|
|
483
|
+
'/dashboard', '/table', '/supplement',
|
|
484
|
+
'/wx', '/wechat', '/hszh', '/api',
|
|
485
|
+
]
|
|
486
|
+
|
|
487
|
+
# 检查是否包含API指示符
|
|
488
|
+
for indicator in api_indicators:
|
|
489
|
+
if indicator in path.lower():
|
|
490
|
+
return True
|
|
491
|
+
|
|
492
|
+
# 过滤掉明显不是API的路径
|
|
493
|
+
non_api_patterns = [
|
|
494
|
+
'.css', '.js', '.html', '.png', '.jpg', '.gif',
|
|
495
|
+
'/static/', '/public/', '/assets/', '/images/',
|
|
496
|
+
'chunk-', 'app.', 'vendor.',
|
|
497
|
+
]
|
|
498
|
+
for pattern in non_api_patterns:
|
|
499
|
+
if pattern in path:
|
|
500
|
+
return False
|
|
501
|
+
|
|
502
|
+
return False
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def resolve_js_url(js_url, base_url):
|
|
506
|
+
"""解析JS URL为完整URL"""
|
|
507
|
+
if not js_url:
|
|
508
|
+
return None
|
|
509
|
+
|
|
510
|
+
if js_url.startswith('http'):
|
|
511
|
+
return js_url
|
|
512
|
+
|
|
513
|
+
if js_url.startswith('//'):
|
|
514
|
+
parsed = urlparse(base_url)
|
|
515
|
+
return f"{parsed.scheme}:{js_url}"
|
|
516
|
+
|
|
517
|
+
if js_url.startswith('/'):
|
|
518
|
+
parsed = urlparse(base_url)
|
|
519
|
+
return f"{parsed.scheme}://{parsed.netloc}{js_url}"
|
|
520
|
+
|
|
521
|
+
return urljoin(base_url, js_url)
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def fetch_js_content(js_url):
|
|
525
|
+
"""获取JS文件内容"""
|
|
526
|
+
try:
|
|
527
|
+
resp = requests.get(js_url, timeout=10, verify=False)
|
|
528
|
+
if resp.status_code == 200:
|
|
529
|
+
return resp.text
|
|
530
|
+
except:
|
|
531
|
+
pass
|
|
532
|
+
return None
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
if __name__ == '__main__':
|
|
536
|
+
# 测试
|
|
537
|
+
result = js_parser({
|
|
538
|
+
'html': '<script src="/static/js/app.js"></script>',
|
|
539
|
+
'base_url': 'https://example.com'
|
|
540
|
+
})
|
|
541
|
+
print(f"API Patterns: {result['api_patterns']}")
|
|
542
|
+
print(f"Base URLs: {result['base_urls']}")
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def prepare_js_for_agent_analysis(js_url, base_url):
|
|
546
|
+
"""
|
|
547
|
+
【新增】下载JS文件,准备供Agent解析
|
|
548
|
+
|
|
549
|
+
用于混淆JS无法用esprima解析时,将JS内容提供给Agent/LLM进行解析
|
|
550
|
+
|
|
551
|
+
输入:
|
|
552
|
+
js_url: string - JS文件URL
|
|
553
|
+
base_url: string - 基准URL
|
|
554
|
+
|
|
555
|
+
输出:
|
|
556
|
+
{
|
|
557
|
+
js_url: JS文件路径,
|
|
558
|
+
js_content: JS原始内容(截断到20KB),
|
|
559
|
+
content_hash: 内容哈希,
|
|
560
|
+
lines: 行数,
|
|
561
|
+
prompt_template: Agent解析提示模板
|
|
562
|
+
}
|
|
563
|
+
"""
|
|
564
|
+
full_url = resolve_js_url(js_url, base_url) if not js_url.startswith('http') else js_url
|
|
565
|
+
|
|
566
|
+
js_content = fetch_js_content(full_url)
|
|
567
|
+
|
|
568
|
+
if not js_content:
|
|
569
|
+
return {
|
|
570
|
+
'error': f'Failed to fetch JS: {js_url}',
|
|
571
|
+
'js_url': js_url
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
# 截断过长的JS(保留前20KB)
|
|
575
|
+
truncated = len(js_content) > 20000
|
|
576
|
+
display_content = js_content[:20000] if truncated else js_content
|
|
577
|
+
|
|
578
|
+
prompt_template = f"""请分析以下JavaScript代码,提取API接口:
|
|
579
|
+
|
|
580
|
+
1. baseURL/basePath配置
|
|
581
|
+
2. 所有API路径(如 /user/login, /api/v1/user/info)
|
|
582
|
+
3. 请求方法(GET/POST/PUT/DELETE)
|
|
583
|
+
4. 参数名和参数位置(query/path/body)
|
|
584
|
+
5. 敏感信息(token、apiKey、硬编码凭证)
|
|
585
|
+
6. 外部URL或域名
|
|
586
|
+
7. IP地址
|
|
587
|
+
|
|
588
|
+
--- JS文件 ---
|
|
589
|
+
{js_content[:5000]}...
|
|
590
|
+
(共 {len(js_content)} 字符,已截断)
|
|
591
|
+
|
|
592
|
+
请返回JSON格式:
|
|
593
|
+
{{
|
|
594
|
+
"base_url": "发现的baseURL或空",
|
|
595
|
+
"api_paths": ["路径1", "路径2"],
|
|
596
|
+
"sensitive": ["敏感信息"],
|
|
597
|
+
"external_urls": ["外部URL"],
|
|
598
|
+
"ips": ["IP地址"]
|
|
599
|
+
}}"""
|
|
600
|
+
|
|
601
|
+
return {
|
|
602
|
+
'js_url': js_url,
|
|
603
|
+
'full_url': full_url,
|
|
604
|
+
'js_content': display_content,
|
|
605
|
+
'js_content_full': js_content if not truncated else None,
|
|
606
|
+
'content_hash': str(hash(js_content)),
|
|
607
|
+
'lines': len(js_content.split('\n')),
|
|
608
|
+
'truncated': truncated,
|
|
609
|
+
'agent_prompt': prompt_template if truncated else None,
|
|
610
|
+
'fetch_success': True
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def batch_prepare_js_for_agent(js_urls, base_url):
|
|
615
|
+
"""
|
|
616
|
+
批量下载JS文件准备Agent分析
|
|
617
|
+
|
|
618
|
+
输入:
|
|
619
|
+
js_urls: string[] - JS文件URL列表
|
|
620
|
+
base_url: string - 基准URL
|
|
621
|
+
|
|
622
|
+
输出:
|
|
623
|
+
prepared: object[] - 准备好的JS列表
|
|
624
|
+
"""
|
|
625
|
+
prepared = []
|
|
626
|
+
|
|
627
|
+
for js_url in js_urls[:5]: # 限制数量
|
|
628
|
+
result = prepare_js_for_agent_analysis(js_url, base_url)
|
|
629
|
+
if 'error' not in result:
|
|
630
|
+
prepared.append(result)
|
|
631
|
+
|
|
632
|
+
return prepared
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def extract_oauth_credentials(js_content):
|
|
636
|
+
"""
|
|
637
|
+
【新增】从JS内容中提取OAuth凭据
|
|
638
|
+
|
|
639
|
+
用于检测前端JS中硬编码的OAuth client_id/client_secret
|
|
640
|
+
|
|
641
|
+
输入:
|
|
642
|
+
js_content: string - JS文件内容
|
|
643
|
+
|
|
644
|
+
输出:
|
|
645
|
+
{
|
|
646
|
+
client_id: string,
|
|
647
|
+
client_secret: string,
|
|
648
|
+
grant_type: string,
|
|
649
|
+
token_url: string
|
|
650
|
+
}
|
|
651
|
+
"""
|
|
652
|
+
import re
|
|
653
|
+
|
|
654
|
+
result = {
|
|
655
|
+
'client_id': None,
|
|
656
|
+
'client_secret': None,
|
|
657
|
+
'grant_type': None,
|
|
658
|
+
'token_url': None
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
# 提取client_id
|
|
662
|
+
patterns = [
|
|
663
|
+
r'client_id[\s:\"]+([^\s\"\']+)',
|
|
664
|
+
r'clientId[\s:\"]+([^\s\"\']+)',
|
|
665
|
+
r'"client_id"\s*:\s*["\']([^"\']+)["\']',
|
|
666
|
+
r"'client_id'\s*:\s*[']([^']+)[']",
|
|
667
|
+
]
|
|
668
|
+
for p in patterns:
|
|
669
|
+
match = re.search(p, js_content, re.I)
|
|
670
|
+
if match:
|
|
671
|
+
val = match.group(1)
|
|
672
|
+
if len(val) > 3 and 'undefined' not in val.lower():
|
|
673
|
+
result['client_id'] = val
|
|
674
|
+
break
|
|
675
|
+
|
|
676
|
+
# 提取client_secret
|
|
677
|
+
patterns = [
|
|
678
|
+
r'client_secret[\s:\"]+([^\s\"\']+)',
|
|
679
|
+
r'clientSecret[\s:\"]+([^\s\"\']+)',
|
|
680
|
+
r'"client_secret"\s*:\s*["\']([^"\']+)["\']',
|
|
681
|
+
r"'client_secret'\s*:\s*[']([^']+)[']",
|
|
682
|
+
]
|
|
683
|
+
for p in patterns:
|
|
684
|
+
match = re.search(p, js_content, re.I)
|
|
685
|
+
if match:
|
|
686
|
+
val = match.group(1)
|
|
687
|
+
if len(val) > 3 and 'undefined' not in val.lower():
|
|
688
|
+
result['client_secret'] = val
|
|
689
|
+
break
|
|
690
|
+
|
|
691
|
+
# 提取grant_type
|
|
692
|
+
patterns = [
|
|
693
|
+
r'grant_type[\s:\"]+([^\s\"\']+)',
|
|
694
|
+
r'"grant_type"\s*:\s*["\']([^"\']+)["\']',
|
|
695
|
+
]
|
|
696
|
+
for p in patterns:
|
|
697
|
+
match = re.search(p, js_content, re.I)
|
|
698
|
+
if match:
|
|
699
|
+
result['grant_type'] = match.group(1)
|
|
700
|
+
break
|
|
701
|
+
|
|
702
|
+
# 提取token_url
|
|
703
|
+
patterns = [
|
|
704
|
+
r'[\"\'](/auth/oauth/token[^"\']*)["\']',
|
|
705
|
+
r'[\"\'](https?://[^\s\"\']+/oauth/token[^"\']*)["\']',
|
|
706
|
+
r'token[\sUrl]*[\":\s]+[\"\']([^\s\"\']+token[^\s\"\']*)["\']',
|
|
707
|
+
]
|
|
708
|
+
for p in patterns:
|
|
709
|
+
match = re.search(p, js_content, re.I)
|
|
710
|
+
if match:
|
|
711
|
+
result['token_url'] = match.group(1)
|
|
712
|
+
break
|
|
713
|
+
|
|
714
|
+
# 检查是否有实际值
|
|
715
|
+
if result['client_id'] or result['client_secret']:
|
|
716
|
+
return result
|
|
717
|
+
return None
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def extract_all_api_endpoints(js_content):
|
|
721
|
+
"""
|
|
722
|
+
【新增】从JS内容中提取所有API端点
|
|
723
|
+
|
|
724
|
+
输入:
|
|
725
|
+
js_content: string - JS文件内容
|
|
726
|
+
|
|
727
|
+
输出:
|
|
728
|
+
api_endpoints: [{
|
|
729
|
+
path: string,
|
|
730
|
+
method: string,
|
|
731
|
+
params: []
|
|
732
|
+
}]
|
|
733
|
+
"""
|
|
734
|
+
import re
|
|
735
|
+
|
|
736
|
+
endpoints = []
|
|
737
|
+
|
|
738
|
+
# axios模式
|
|
739
|
+
axios_patterns = [
|
|
740
|
+
r'axios\.(get|post|put|delete|patch)\(["\']([^"\']+)["\']',
|
|
741
|
+
r'\.(get|post|put|delete|patch)\(["\']([^"\']+)["\']',
|
|
742
|
+
r'fetch\(["\']([^"\']+)["\']',
|
|
743
|
+
]
|
|
744
|
+
|
|
745
|
+
for pattern in axios_patterns:
|
|
746
|
+
matches = re.findall(pattern, js_content, re.I)
|
|
747
|
+
for m in matches:
|
|
748
|
+
if len(m) == 2:
|
|
749
|
+
method = m[0].upper()
|
|
750
|
+
path = m[1]
|
|
751
|
+
endpoints.append({
|
|
752
|
+
'path': path,
|
|
753
|
+
'method': method,
|
|
754
|
+
'source': 'axios_pattern'
|
|
755
|
+
})
|
|
756
|
+
|
|
757
|
+
# URL配置模式
|
|
758
|
+
url_patterns = [
|
|
759
|
+
r'url\s*[:=]\s*["\']([^"\']+)["\']',
|
|
760
|
+
r'path\s*[:=]\s*["\']([^"\']+)["\']',
|
|
761
|
+
r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
|
|
762
|
+
]
|
|
763
|
+
|
|
764
|
+
for pattern in url_patterns:
|
|
765
|
+
matches = re.findall(pattern, js_content, re.I)
|
|
766
|
+
for m in matches:
|
|
767
|
+
if '/' in m and len(m) > 3:
|
|
768
|
+
endpoints.append({
|
|
769
|
+
'path': m,
|
|
770
|
+
'method': 'UNKNOWN',
|
|
771
|
+
'source': 'url_pattern'
|
|
772
|
+
})
|
|
773
|
+
|
|
774
|
+
# 去重
|
|
775
|
+
unique = {}
|
|
776
|
+
for ep in endpoints:
|
|
777
|
+
key = f"{ep['method']}:{ep['path']}"
|
|
778
|
+
if key not in unique:
|
|
779
|
+
unique[key] = ep
|
|
780
|
+
return list(unique.values())
|