opencode-api-security-testing 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/SKILL.md +1797 -0
  2. package/core/advanced_recon.py +788 -0
  3. package/core/agentic_analyzer.py +445 -0
  4. package/core/analyzers/api_parser.py +210 -0
  5. package/core/analyzers/response_analyzer.py +212 -0
  6. package/core/analyzers/sensitive_finder.py +184 -0
  7. package/core/api_fuzzer.py +422 -0
  8. package/core/api_interceptor.py +525 -0
  9. package/core/api_parser.py +955 -0
  10. package/core/browser_tester.py +479 -0
  11. package/core/cloud_storage_tester.py +1330 -0
  12. package/core/collectors/__init__.py +23 -0
  13. package/core/collectors/api_path_finder.py +300 -0
  14. package/core/collectors/browser_collect.py +645 -0
  15. package/core/collectors/browser_collector.py +411 -0
  16. package/core/collectors/http_client.py +111 -0
  17. package/core/collectors/js_collector.py +490 -0
  18. package/core/collectors/js_parser.py +780 -0
  19. package/core/collectors/url_collector.py +319 -0
  20. package/core/context_manager.py +682 -0
  21. package/core/deep_api_tester_v35.py +844 -0
  22. package/core/deep_api_tester_v55.py +366 -0
  23. package/core/dynamic_api_analyzer.py +532 -0
  24. package/core/http_client.py +179 -0
  25. package/core/models.py +296 -0
  26. package/core/orchestrator.py +890 -0
  27. package/core/prerequisite.py +227 -0
  28. package/core/reasoning_engine.py +1042 -0
  29. package/core/response_classifier.py +606 -0
  30. package/core/runner.py +938 -0
  31. package/core/scan_engine.py +599 -0
  32. package/core/skill_executor.py +435 -0
  33. package/core/skill_executor_v2.py +670 -0
  34. package/core/skill_executor_v3.py +704 -0
  35. package/core/smart_analyzer.py +687 -0
  36. package/core/strategy_pool.py +707 -0
  37. package/core/testers/auth_tester.py +264 -0
  38. package/core/testers/idor_tester.py +200 -0
  39. package/core/testers/sqli_tester.py +211 -0
  40. package/core/testing_loop.py +655 -0
  41. package/core/utils/base_path_dict.py +255 -0
  42. package/core/utils/payload_lib.py +167 -0
  43. package/core/utils/ssrf_detector.py +220 -0
  44. package/core/verifiers/vuln_verifier.py +536 -0
  45. package/package.json +17 -13
  46. package/references/asset-discovery.md +119 -612
  47. package/references/graphql-guidance.md +65 -641
  48. package/references/intake.md +84 -0
  49. package/references/report-template.md +131 -38
  50. package/references/rest-guidance.md +55 -526
  51. package/references/severity-model.md +52 -264
  52. package/references/test-matrix.md +65 -263
  53. package/references/validation.md +53 -400
  54. package/scripts/postinstall.js +46 -0
  55. package/agents/cyber-supervisor.md +0 -55
  56. package/agents/probing-miner.md +0 -42
  57. package/agents/resource-specialist.md +0 -31
  58. package/commands/api-security-testing-scan.md +0 -59
  59. package/commands/api-security-testing-test.md +0 -49
  60. package/commands/api-security-testing.md +0 -72
  61. package/tsconfig.json +0 -17
@@ -0,0 +1,780 @@
1
+ """
2
+ JS源码解析 - 从HTML/JS中提取API配置
3
+
4
+ 【重要】使用多模式解析:
5
+ 1. 正则模式:快速提取API路径、baseURL、凭证等
6
+ 2. Agent模式:下载JS供Agent解析(用于混淆JS)
7
+
8
+ 输入: {html, js_urls, base_url}
9
+ 输出: {
10
+ api_patterns: API路径,
11
+ base_urls: baseURL配置,
12
+ tokens: token,
13
+ endpoints: 完整端点,
14
+ sensitive_urls: 敏感URL,
15
+ ip_addresses: IP地址,
16
+ domains: 相关域名,
17
+ credentials: 发现的凭证,
18
+ js_for_agent: [JS文件路径列表,供Agent解析]
19
+ }
20
+ """
21
+
22
+ import re
23
+ import requests
24
+ from urllib.parse import urljoin, urlparse
25
+
26
+ requests.packages.urllib3.disable_warnings()
27
+
28
+
29
+ def js_parser(config):
30
+ """
31
+ 解析JS文件提取API配置(AST+正则双模式)
32
+
33
+ 输入:
34
+ html: string - 页面HTML
35
+ js_urls?: string[] - JS URL列表
36
+ base_url: string - 基准URL
37
+ use_ast?: boolean - 是否使用AST解析(默认True)
38
+
39
+ 输出:
40
+ api_patterns: string[] - API路径
41
+ base_urls: string[] - API Base URL
42
+ tokens: string[] - 可能的token
43
+ endpoints: string[] - 完整端点
44
+ sensitive_urls: string[] - 敏感URL
45
+ ip_addresses: string[] - IP地址
46
+ domains: string[] - 相关域名
47
+ credentials: object - 发现的凭证
48
+ """
49
+ html = config.get('html', '')
50
+ js_urls = config.get('js_urls', [])
51
+ base_url = config.get('base_url', '')
52
+ use_ast = config.get('use_ast', True)
53
+
54
+ result = {
55
+ 'api_patterns': [],
56
+ 'base_urls': [],
57
+ 'tokens': [],
58
+ 'endpoints': [],
59
+ 'sensitive_urls': [],
60
+ 'ip_addresses': [],
61
+ 'domains': [],
62
+ 'credentials': {}
63
+ }
64
+
65
+ # 从HTML中提取JS URL
66
+ if not js_urls:
67
+ js_urls = extract_js_urls(html)
68
+
69
+ # 提取Base URL配置
70
+ base_urls = extract_base_urls(html)
71
+ result['base_urls'] = base_urls
72
+
73
+ # 提取API路径模式
74
+ api_patterns = extract_api_patterns(html)
75
+ result['api_patterns'] = api_patterns
76
+
77
+ # 从HTML中提取敏感URL和IP
78
+ html_sensitive = extract_sensitive_from_string(html)
79
+ result['sensitive_urls'].extend(html_sensitive.get('urls', []))
80
+ result['ip_addresses'].extend(html_sensitive.get('ips', []))
81
+ result['domains'].extend(html_sensitive.get('domains', []))
82
+
83
+ # 分析JS文件
84
+ for js_url in js_urls[:15]: # 增加分析数量
85
+ full_url = resolve_js_url(js_url, base_url)
86
+ if not full_url:
87
+ continue
88
+
89
+ try:
90
+ js_content = fetch_js_content(full_url)
91
+ if not js_content:
92
+ continue
93
+
94
+ # 【新增】AST模式解析
95
+ if use_ast:
96
+ ast_result = extract_with_ast(js_content)
97
+ if 'error' not in ast_result:
98
+ # 从AST字符串字面量中提取API
99
+ for literal in ast_result.get('string_literals', []):
100
+ if is_api_path(literal):
101
+ result['api_patterns'].append(literal)
102
+ # 提取URL
103
+ urls = extract_urls_from_string(literal)
104
+ result['sensitive_urls'].extend(urls)
105
+ # 提取IP
106
+ ips = extract_ip_from_string(literal)
107
+ result['ip_addresses'].extend(ips)
108
+
109
+ # 正则模式提取API路径
110
+ js_api_patterns = extract_api_patterns(js_content)
111
+ result['api_patterns'].extend(js_api_patterns)
112
+
113
+ # 提取Base URL
114
+ js_base_urls = extract_base_urls(js_content)
115
+ result['base_urls'].extend(js_base_urls)
116
+
117
+ # 提取Token
118
+ js_tokens = extract_tokens(js_content)
119
+ result['tokens'].extend(js_tokens)
120
+
121
+ # 【新增】提取敏感信息
122
+ sensitive = extract_sensitive_from_string(js_content)
123
+ result['sensitive_urls'].extend(sensitive.get('urls', []))
124
+ result['ip_addresses'].extend(sensitive.get('ips', []))
125
+ result['domains'].extend(sensitive.get('domains', []))
126
+ if sensitive.get('credentials'):
127
+ result['credentials'].update(sensitive['credentials'])
128
+
129
+ except:
130
+ pass
131
+
132
+ # 去重
133
+ result['api_patterns'] = list(set(result['api_patterns']))
134
+ result['base_urls'] = list(set(result['base_urls']))
135
+ result['tokens'] = list(set(result['tokens']))
136
+ result['sensitive_urls'] = list(set(result['sensitive_urls']))
137
+ result['ip_addresses'] = list(set(result['ip_addresses']))
138
+ result['domains'] = list(set(result['domains']))
139
+
140
+ # 生成完整端点
141
+ for base in result['base_urls']:
142
+ for pattern in result['api_patterns']:
143
+ if pattern.startswith('/'):
144
+ endpoint = base.rstrip('/') + pattern
145
+ else:
146
+ endpoint = base + '/' + pattern
147
+ result['endpoints'].append(endpoint)
148
+
149
+ result['endpoints'] = list(set(result['endpoints']))
150
+
151
+ return result
152
+
153
+
154
+ def extract_with_ast(js_content):
155
+ """
156
+ 使用AST(esprima)深度解析JS代码
157
+
158
+ 【重要】需要先安装esprima: pip install esprima
159
+
160
+ 返回:
161
+ {
162
+ string_literals: 所有字符串字面量,
163
+ object_properties: 对象属性,
164
+ function_calls: 函数调用,
165
+ import_sources: import来源
166
+ }
167
+
168
+ 依赖:
169
+ pip install esprima
170
+ """
171
+ try:
172
+ import esprima
173
+ except ImportError:
174
+ return {
175
+ 'error': 'esprima not installed. Run: pip install esprima',
176
+ 'fix_command': 'pip install esprima'
177
+ }
178
+
179
+ try:
180
+ # 解析JS为AST(带位置信息)
181
+ ast = esprima.parse(js_content, sourceType='script', range=True)
182
+
183
+ result = {
184
+ 'string_literals': [],
185
+ 'object_properties': {},
186
+ 'function_calls': [],
187
+ 'import_sources': []
188
+ }
189
+
190
+ def traverse(node, depth=0):
191
+ if depth > 30: # 防止过深递归
192
+ return
193
+
194
+ if hasattr(node, 'type'):
195
+ # 字符串字面量
196
+ if node.type == 'Literal' and isinstance(node.value, str):
197
+ result['string_literals'].append(node.value)
198
+
199
+ # 对象属性(键值对)
200
+ elif node.type == 'Property':
201
+ key_node = getattr(node, 'key', None)
202
+ value_node = getattr(node, 'value', None)
203
+ if key_node and hasattr(key_node, 'value'):
204
+ key = key_node.value
205
+ value = getattr(value_node, 'value', None) if value_node else None
206
+ if value and isinstance(value, str):
207
+ result['object_properties'][key] = value
208
+
209
+ # 函数调用
210
+ elif node.type == 'CallExpression':
211
+ callee = getattr(node, 'callee', None)
212
+ if callee:
213
+ if hasattr(callee, 'name'):
214
+ result['function_calls'].append(callee.name)
215
+ elif hasattr(callee, 'value'):
216
+ result['function_calls'].append(callee.value)
217
+
218
+ # Import声明
219
+ elif node.type == 'ImportDeclaration':
220
+ source = getattr(node, 'source', None)
221
+ if source and hasattr(source, 'value'):
222
+ result['import_sources'].append(source.value)
223
+
224
+ # 递归遍历子节点
225
+ for child in node.__dict__.values():
226
+ if isinstance(child, list):
227
+ for item in child:
228
+ if hasattr(item, 'type'):
229
+ traverse(item, depth + 1)
230
+ elif hasattr(child, 'type'):
231
+ traverse(child, depth + 1)
232
+
233
+ traverse(ast.body)
234
+
235
+ # 去重
236
+ result['string_literals'] = list(set(result['string_literals']))
237
+ result['function_calls'] = list(set(result['function_calls']))
238
+ result['import_sources'] = list(set(result['import_sources']))
239
+
240
+ return result
241
+
242
+ except Exception as e:
243
+ return {
244
+ 'error': f'AST parse failed: {str(e)[:100]}',
245
+ 'fix_command': 'pip install --upgrade esprima',
246
+ 'fallback_available': True
247
+ }
248
+
249
+
250
+ def extract_simplified(content):
251
+ """
252
+ 【新增】简化的字符串提取(AST失败时的fallback)
253
+
254
+ 使用简单的正则避免复杂模式报错
255
+ """
256
+ result = {
257
+ 'string_literals': [],
258
+ 'api_paths': [],
259
+ 'error': 'fallback_mode'
260
+ }
261
+
262
+ # 简化:提取所有双引号字符串
263
+ try:
264
+ double_quoted = re.findall(r'"([^"]{3,150})"', content)
265
+ result['string_literals'].extend(double_quoted)
266
+ except:
267
+ pass
268
+
269
+ try:
270
+ # 简化:提取所有单引号字符串
271
+ single_quoted = re.findall(r"'([^']{3,150})'", content)
272
+ result['string_literals'].extend(single_quoted)
273
+ except:
274
+ pass
275
+
276
+ # 筛选API路径
277
+ api_keywords = ['user', 'auth', 'login', 'logout', 'api', 'frame', 'admin', 'info', 'list', 'supplement', 'dashboard', 'module', 'code', 'attach', 'v1', 'v2', 'v3']
278
+ for s in result['string_literals']:
279
+ if any(k in s.lower() for k in api_keywords):
280
+ if s.startswith('/') or 'axios' in s.lower() or 'fetch' in s.lower():
281
+ result['api_paths'].append(s)
282
+
283
+ return result
284
+
285
+
286
+ def extract_sensitive_from_string(content):
287
+ """
288
+ 从字符串中提取敏感信息
289
+
290
+ 返回:
291
+ {
292
+ urls: 发现的URL,
293
+ ips: 发现的IP,
294
+ domains: 发现的域名,
295
+ credentials: 发现的凭证
296
+ }
297
+ """
298
+ result = {
299
+ 'urls': set(),
300
+ 'ips': set(),
301
+ 'domains': set(),
302
+ 'credentials': {}
303
+ }
304
+
305
+ # 提取HTTP/HTTPS URL
306
+ urls = re.findall(r'https?://[^\s"\'<>]+', content)
307
+ result['urls'].update(urls)
308
+
309
+ # 提取域名
310
+ for url in urls:
311
+ parsed = urlparse(url)
312
+ if parsed.netloc:
313
+ result['domains'].add(parsed.netloc)
314
+
315
+ # 提取IPv4地址
316
+ ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
317
+ ips = re.findall(ipv4_pattern, content)
318
+ result['ips'].update(ips)
319
+
320
+ # 提取凭证
321
+ credential_patterns = [
322
+ (r'(?:api[_-]?key|API[_-]?KEY)\s*[:=]\s*["\']([^"\']+)["\']', 'api_key'),
323
+ (r'(?:secret[_-]?key|SECRET[_-]?KEY)\s*[:=]\s*["\']([^"\']+)["\']', 'secret_key'),
324
+ (r'(?:access[_-]?token|ACCESS[_-]?TOKEN)\s*[:=]\s*["\']([^"\']+)["\']', 'access_token'),
325
+ (r'(?:password|passwd|pwd)\s*[:=]\s*["\']([^"\']+)["\']', 'password'),
326
+ (r'Bearer\s+([a-zA-Z0-9\-_\.]+)', 'bearer_token'),
327
+ (r'Basic\s+([a-zA-Z0-9\-_\.+]+=*)', 'basic_auth'),
328
+ ]
329
+
330
+ for pattern, cred_type in credential_patterns:
331
+ matches = re.findall(pattern, content, re.IGNORECASE)
332
+ for match in matches:
333
+ if len(match) > 3 and 'undefined' not in match.lower(): # 过滤无效值
334
+ result['credentials'][cred_type] = match
335
+
336
+ return {
337
+ 'urls': list(result['urls']),
338
+ 'ips': list(result['ips']),
339
+ 'domains': list(result['domains']),
340
+ 'credentials': result['credentials']
341
+ }
342
+
343
+
344
+ def extract_urls_from_string(content):
345
+ """从字符串中提取URL"""
346
+ urls = set()
347
+
348
+ http_urls = re.findall(r'https?://[^\s"\'<>]+', content)
349
+ urls.update(http_urls)
350
+
351
+ return list(urls)
352
+
353
+
354
+ def extract_ip_from_string(content):
355
+ """从字符串中提取IP地址"""
356
+ ips = set()
357
+
358
+ ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
359
+ matches = re.findall(ipv4_pattern, content)
360
+ ips.update(matches)
361
+
362
+ return list(ips)
363
+
364
+
365
+ def extract_js_urls(html):
366
+ """从HTML中提取JS URL"""
367
+ js_urls = []
368
+
369
+ # script标签
370
+ src_pattern = r'<script[^>]+src=["\']([^"\']+\.js)["\']'
371
+ matches = re.findall(src_pattern, html, re.I)
372
+ js_urls.extend(matches)
373
+
374
+ # link标签 (可能包含JS)
375
+ href_pattern = r'<link[^>]+href=["\']([^"\']+\.js)["\']'
376
+ matches = re.findall(href_pattern, html, re.I)
377
+ js_urls.extend(matches)
378
+
379
+ return js_urls
380
+
381
+
382
+ def extract_base_urls(content):
383
+ """提取Base URL配置"""
384
+ base_urls = []
385
+
386
+ patterns = [
387
+ r'baseURL\s*[:=]\s*["\']([^"\']+)["\']',
388
+ r'apiBase\s*[:=]\s*["\']([^"\']+)["\']',
389
+ r'API_BASE\s*[:=]\s*["\']([^"\']+)["\']',
390
+ r'VUE_APP_API\s*[:=]\s*["\']([^"\']+)["\']',
391
+ r'REACT_APP_API\s*[:=]\s*["\']([^"\']+)["\']',
392
+ r'NEXT_PUBLIC_API\s*[:=]\s*["\']([^"\']+)["\']',
393
+ r'axios\.defaults\.baseURL\s*=\s*["\']([^"\']+)["\']',
394
+ ]
395
+
396
+ for pattern in patterns:
397
+ matches = re.findall(pattern, content)
398
+ base_urls.extend(matches)
399
+
400
+ return base_urls
401
+
402
+
403
+ def extract_api_patterns(content):
404
+ """提取API路径模式"""
405
+ api_patterns = []
406
+
407
+ # RESTful API模式
408
+ patterns = [
409
+ r'["\'](/api/[^"\']+)["\']',
410
+ r'["\'](/v\d+/[^"\']+)["\']',
411
+ r'["\'](/api\.php/[^"\']+)["\']',
412
+ r'url\s*[:=]\s*["\']([^"\']*api[^"\']*)["\']',
413
+ r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
414
+ r'path\s*[:=]\s*["\']([^"\']+)["\']',
415
+ ]
416
+
417
+ # 【重要】业务模块API模式 - 覆盖更多场景
418
+ business_patterns = [
419
+ # 用户认证类
420
+ r'["\'](/(?:user|auth|login|logout|oauth|supplement|userinfo)[a-zA-Z0-9_/?=&-]*)["\']',
421
+ # 框架管理类
422
+ r'["\'](/(?:frame|module|code|attach|file)[a-zA-Z0-9_/?=&-]*)["\']',
423
+ # Dashboard/统计类
424
+ r'["\'](/(?:dashboard|table|dash|board|stats|statistics)[a-zA-Z0-9_/?=&-]*)["\']',
425
+ # 微信相关
426
+ r'["\'](/(?:wx|wechat|wxapi|hszh)[a-zA-Z0-9_/?=&-]*)["\']',
427
+ # axios/fetch调用
428
+ r'axios\.[a-z]+\(["\']([^"\']+)["\']',
429
+ r'fetch\(["\']([^"\']+)["\']',
430
+ r'\.get\(["\']([^"\']+)["\']',
431
+ r'\.post\(["\']([^"\']+)["\']',
432
+ r'\.put\(["\']([^"\']+)["\']',
433
+ r'\.delete\(["\']([^"\']+)["\']',
434
+ ]
435
+
436
+ for pattern in patterns + business_patterns:
437
+ matches = re.findall(pattern, content, re.I)
438
+ for match in matches:
439
+ if isinstance(match, str):
440
+ api_patterns.append(match)
441
+
442
+ # 过滤掉非API路径
443
+ filtered = []
444
+ for pattern in api_patterns:
445
+ if is_api_path(pattern):
446
+ filtered.append(pattern)
447
+
448
+ return filtered
449
+
450
+
451
+ def extract_tokens(content):
452
+ """提取可能的Token"""
453
+ tokens = []
454
+
455
+ patterns = [
456
+ r'(?:token|Token|TOKEN)\s*[:=]\s*["\']([a-zA-Z0-9\-_\.]+)["\']',
457
+ r'Bearer\s+([a-zA-Z0-9\-_\.]+)',
458
+ r'Authorization["\']?\s*[:=]\s*["\'][^"\']*([a-zA-Z0-9\-_\.]+)',
459
+ ]
460
+
461
+ for pattern in patterns:
462
+ matches = re.findall(pattern, content, re.I)
463
+ tokens.extend(matches)
464
+
465
+ # 过滤掉测试token
466
+ filtered = []
467
+ for token in tokens:
468
+ if len(token) > 10 and 'test' not in token.lower():
469
+ filtered.append(token)
470
+
471
+ return filtered
472
+
473
+
474
+ def is_api_path(path):
475
+ """判断是否是API路径"""
476
+ if not path or len(path) < 2:
477
+ return False
478
+
479
+ api_indicators = [
480
+ '/api/', '/v1/', '/v2/', '/v3/', '/rest/',
481
+ '/user', '/auth', '/login', '/logout', '/oauth',
482
+ '/frame', '/module', '/code', '/attach', '/file',
483
+ '/dashboard', '/table', '/supplement',
484
+ '/wx', '/wechat', '/hszh', '/api',
485
+ ]
486
+
487
+ # 检查是否包含API指示符
488
+ for indicator in api_indicators:
489
+ if indicator in path.lower():
490
+ return True
491
+
492
+ # 过滤掉明显不是API的路径
493
+ non_api_patterns = [
494
+ '.css', '.js', '.html', '.png', '.jpg', '.gif',
495
+ '/static/', '/public/', '/assets/', '/images/',
496
+ 'chunk-', 'app.', 'vendor.',
497
+ ]
498
+ for pattern in non_api_patterns:
499
+ if pattern in path:
500
+ return False
501
+
502
+ return False
503
+
504
+
505
+ def resolve_js_url(js_url, base_url):
506
+ """解析JS URL为完整URL"""
507
+ if not js_url:
508
+ return None
509
+
510
+ if js_url.startswith('http'):
511
+ return js_url
512
+
513
+ if js_url.startswith('//'):
514
+ parsed = urlparse(base_url)
515
+ return f"{parsed.scheme}:{js_url}"
516
+
517
+ if js_url.startswith('/'):
518
+ parsed = urlparse(base_url)
519
+ return f"{parsed.scheme}://{parsed.netloc}{js_url}"
520
+
521
+ return urljoin(base_url, js_url)
522
+
523
+
524
+ def fetch_js_content(js_url):
525
+ """获取JS文件内容"""
526
+ try:
527
+ resp = requests.get(js_url, timeout=10, verify=False)
528
+ if resp.status_code == 200:
529
+ return resp.text
530
+ except:
531
+ pass
532
+ return None
533
+
534
+
535
+ if __name__ == '__main__':
536
+ # 测试
537
+ result = js_parser({
538
+ 'html': '<script src="/static/js/app.js"></script>',
539
+ 'base_url': 'https://example.com'
540
+ })
541
+ print(f"API Patterns: {result['api_patterns']}")
542
+ print(f"Base URLs: {result['base_urls']}")
543
+
544
+
545
+ def prepare_js_for_agent_analysis(js_url, base_url):
546
+ """
547
+ 【新增】下载JS文件,准备供Agent解析
548
+
549
+ 用于混淆JS无法用esprima解析时,将JS内容提供给Agent/LLM进行解析
550
+
551
+ 输入:
552
+ js_url: string - JS文件URL
553
+ base_url: string - 基准URL
554
+
555
+ 输出:
556
+ {
557
+ js_url: JS文件路径,
558
+ js_content: JS原始内容(截断到20KB),
559
+ content_hash: 内容哈希,
560
+ lines: 行数,
561
+ prompt_template: Agent解析提示模板
562
+ }
563
+ """
564
+ full_url = resolve_js_url(js_url, base_url) if not js_url.startswith('http') else js_url
565
+
566
+ js_content = fetch_js_content(full_url)
567
+
568
+ if not js_content:
569
+ return {
570
+ 'error': f'Failed to fetch JS: {js_url}',
571
+ 'js_url': js_url
572
+ }
573
+
574
+ # 截断过长的JS(保留前20KB)
575
+ truncated = len(js_content) > 20000
576
+ display_content = js_content[:20000] if truncated else js_content
577
+
578
+ prompt_template = f"""请分析以下JavaScript代码,提取API接口:
579
+
580
+ 1. baseURL/basePath配置
581
+ 2. 所有API路径(如 /user/login, /api/v1/user/info)
582
+ 3. 请求方法(GET/POST/PUT/DELETE)
583
+ 4. 参数名和参数位置(query/path/body)
584
+ 5. 敏感信息(token、apiKey、硬编码凭证)
585
+ 6. 外部URL或域名
586
+ 7. IP地址
587
+
588
+ --- JS文件 ---
589
+ {js_content[:5000]}...
590
+ (共 {len(js_content)} 字符,已截断)
591
+
592
+ 请返回JSON格式:
593
+ {{
594
+ "base_url": "发现的baseURL或空",
595
+ "api_paths": ["路径1", "路径2"],
596
+ "sensitive": ["敏感信息"],
597
+ "external_urls": ["外部URL"],
598
+ "ips": ["IP地址"]
599
+ }}"""
600
+
601
+ return {
602
+ 'js_url': js_url,
603
+ 'full_url': full_url,
604
+ 'js_content': display_content,
605
+ 'js_content_full': js_content if not truncated else None,
606
+ 'content_hash': str(hash(js_content)),
607
+ 'lines': len(js_content.split('\n')),
608
+ 'truncated': truncated,
609
+ 'agent_prompt': prompt_template if truncated else None,
610
+ 'fetch_success': True
611
+ }
612
+
613
+
614
+ def batch_prepare_js_for_agent(js_urls, base_url):
615
+ """
616
+ 批量下载JS文件准备Agent分析
617
+
618
+ 输入:
619
+ js_urls: string[] - JS文件URL列表
620
+ base_url: string - 基准URL
621
+
622
+ 输出:
623
+ prepared: object[] - 准备好的JS列表
624
+ """
625
+ prepared = []
626
+
627
+ for js_url in js_urls[:5]: # 限制数量
628
+ result = prepare_js_for_agent_analysis(js_url, base_url)
629
+ if 'error' not in result:
630
+ prepared.append(result)
631
+
632
+ return prepared
633
+
634
+
635
+ def extract_oauth_credentials(js_content):
636
+ """
637
+ 【新增】从JS内容中提取OAuth凭据
638
+
639
+ 用于检测前端JS中硬编码的OAuth client_id/client_secret
640
+
641
+ 输入:
642
+ js_content: string - JS文件内容
643
+
644
+ 输出:
645
+ {
646
+ client_id: string,
647
+ client_secret: string,
648
+ grant_type: string,
649
+ token_url: string
650
+ }
651
+ """
652
+ import re
653
+
654
+ result = {
655
+ 'client_id': None,
656
+ 'client_secret': None,
657
+ 'grant_type': None,
658
+ 'token_url': None
659
+ }
660
+
661
+ # 提取client_id
662
+ patterns = [
663
+ r'client_id[\s:\"]+([^\s\"\']+)',
664
+ r'clientId[\s:\"]+([^\s\"\']+)',
665
+ r'"client_id"\s*:\s*["\']([^"\']+)["\']',
666
+ r"'client_id'\s*:\s*[']([^']+)[']",
667
+ ]
668
+ for p in patterns:
669
+ match = re.search(p, js_content, re.I)
670
+ if match:
671
+ val = match.group(1)
672
+ if len(val) > 3 and 'undefined' not in val.lower():
673
+ result['client_id'] = val
674
+ break
675
+
676
+ # 提取client_secret
677
+ patterns = [
678
+ r'client_secret[\s:\"]+([^\s\"\']+)',
679
+ r'clientSecret[\s:\"]+([^\s\"\']+)',
680
+ r'"client_secret"\s*:\s*["\']([^"\']+)["\']',
681
+ r"'client_secret'\s*:\s*[']([^']+)[']",
682
+ ]
683
+ for p in patterns:
684
+ match = re.search(p, js_content, re.I)
685
+ if match:
686
+ val = match.group(1)
687
+ if len(val) > 3 and 'undefined' not in val.lower():
688
+ result['client_secret'] = val
689
+ break
690
+
691
+ # 提取grant_type
692
+ patterns = [
693
+ r'grant_type[\s:\"]+([^\s\"\']+)',
694
+ r'"grant_type"\s*:\s*["\']([^"\']+)["\']',
695
+ ]
696
+ for p in patterns:
697
+ match = re.search(p, js_content, re.I)
698
+ if match:
699
+ result['grant_type'] = match.group(1)
700
+ break
701
+
702
+ # 提取token_url
703
+ patterns = [
704
+ r'[\"\'](/auth/oauth/token[^"\']*)["\']',
705
+ r'[\"\'](https?://[^\s\"\']+/oauth/token[^"\']*)["\']',
706
+ r'token[\sUrl]*[\":\s]+[\"\']([^\s\"\']+token[^\s\"\']*)["\']',
707
+ ]
708
+ for p in patterns:
709
+ match = re.search(p, js_content, re.I)
710
+ if match:
711
+ result['token_url'] = match.group(1)
712
+ break
713
+
714
+ # 检查是否有实际值
715
+ if result['client_id'] or result['client_secret']:
716
+ return result
717
+ return None
718
+
719
+
720
+ def extract_all_api_endpoints(js_content):
721
+ """
722
+ 【新增】从JS内容中提取所有API端点
723
+
724
+ 输入:
725
+ js_content: string - JS文件内容
726
+
727
+ 输出:
728
+ api_endpoints: [{
729
+ path: string,
730
+ method: string,
731
+ params: []
732
+ }]
733
+ """
734
+ import re
735
+
736
+ endpoints = []
737
+
738
+ # axios模式
739
+ axios_patterns = [
740
+ r'axios\.(get|post|put|delete|patch)\(["\']([^"\']+)["\']',
741
+ r'\.(get|post|put|delete|patch)\(["\']([^"\']+)["\']',
742
+ r'fetch\(["\']([^"\']+)["\']',
743
+ ]
744
+
745
+ for pattern in axios_patterns:
746
+ matches = re.findall(pattern, js_content, re.I)
747
+ for m in matches:
748
+ if len(m) == 2:
749
+ method = m[0].upper()
750
+ path = m[1]
751
+ endpoints.append({
752
+ 'path': path,
753
+ 'method': method,
754
+ 'source': 'axios_pattern'
755
+ })
756
+
757
+ # URL配置模式
758
+ url_patterns = [
759
+ r'url\s*[:=]\s*["\']([^"\']+)["\']',
760
+ r'path\s*[:=]\s*["\']([^"\']+)["\']',
761
+ r'endpoint\s*[:=]\s*["\']([^"\']+)["\']',
762
+ ]
763
+
764
+ for pattern in url_patterns:
765
+ matches = re.findall(pattern, js_content, re.I)
766
+ for m in matches:
767
+ if '/' in m and len(m) > 3:
768
+ endpoints.append({
769
+ 'path': m,
770
+ 'method': 'UNKNOWN',
771
+ 'source': 'url_pattern'
772
+ })
773
+
774
+ # 去重
775
+ unique = {}
776
+ for ep in endpoints:
777
+ key = f"{ep['method']}:{ep['path']}"
778
+ if key not in unique:
779
+ unique[key] = ep
780
+ return list(unique.values())