opencode-api-security-testing 2.1.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/SKILL.md +1797 -0
  2. package/core/advanced_recon.py +788 -0
  3. package/core/agentic_analyzer.py +445 -0
  4. package/core/analyzers/api_parser.py +210 -0
  5. package/core/analyzers/response_analyzer.py +212 -0
  6. package/core/analyzers/sensitive_finder.py +184 -0
  7. package/core/api_fuzzer.py +422 -0
  8. package/core/api_interceptor.py +525 -0
  9. package/core/api_parser.py +955 -0
  10. package/core/browser_tester.py +479 -0
  11. package/core/cloud_storage_tester.py +1330 -0
  12. package/core/collectors/__init__.py +23 -0
  13. package/core/collectors/api_path_finder.py +300 -0
  14. package/core/collectors/browser_collect.py +645 -0
  15. package/core/collectors/browser_collector.py +411 -0
  16. package/core/collectors/http_client.py +111 -0
  17. package/core/collectors/js_collector.py +490 -0
  18. package/core/collectors/js_parser.py +780 -0
  19. package/core/collectors/url_collector.py +319 -0
  20. package/core/context_manager.py +682 -0
  21. package/core/deep_api_tester_v35.py +844 -0
  22. package/core/deep_api_tester_v55.py +366 -0
  23. package/core/dynamic_api_analyzer.py +532 -0
  24. package/core/http_client.py +179 -0
  25. package/core/models.py +296 -0
  26. package/core/orchestrator.py +890 -0
  27. package/core/prerequisite.py +227 -0
  28. package/core/reasoning_engine.py +1042 -0
  29. package/core/response_classifier.py +606 -0
  30. package/core/runner.py +938 -0
  31. package/core/scan_engine.py +599 -0
  32. package/core/skill_executor.py +435 -0
  33. package/core/skill_executor_v2.py +670 -0
  34. package/core/skill_executor_v3.py +704 -0
  35. package/core/smart_analyzer.py +687 -0
  36. package/core/strategy_pool.py +707 -0
  37. package/core/testers/auth_tester.py +264 -0
  38. package/core/testers/idor_tester.py +200 -0
  39. package/core/testers/sqli_tester.py +211 -0
  40. package/core/testing_loop.py +655 -0
  41. package/core/utils/base_path_dict.py +255 -0
  42. package/core/utils/payload_lib.py +167 -0
  43. package/core/utils/ssrf_detector.py +220 -0
  44. package/core/verifiers/vuln_verifier.py +536 -0
  45. package/package.json +17 -13
  46. package/references/asset-discovery.md +119 -612
  47. package/references/graphql-guidance.md +65 -641
  48. package/references/intake.md +84 -0
  49. package/references/report-template.md +131 -38
  50. package/references/rest-guidance.md +55 -526
  51. package/references/severity-model.md +52 -264
  52. package/references/test-matrix.md +65 -263
  53. package/references/validation.md +53 -400
  54. package/scripts/postinstall.js +46 -0
  55. package/agents/cyber-supervisor.md +0 -55
  56. package/agents/probing-miner.md +0 -42
  57. package/agents/resource-specialist.md +0 -31
  58. package/commands/api-security-testing-scan.md +0 -59
  59. package/commands/api-security-testing-test.md +0 -49
  60. package/commands/api-security-testing.md +0 -72
  61. package/tsconfig.json +0 -17
@@ -0,0 +1,645 @@
1
+ """
2
+ 无头浏览器采集 - 使用Playwright进行动态采集
3
+ 输入: {url, wait_until, interact, intercept_api}
4
+ 输出: {apis, storage, forms, page_title, js_files, tech_stack, sensitive_urls, ip_addresses}
5
+
6
+ 【重要】SPA采集完整流程:
7
+ 1. browser_collect 采集JS文件、API请求、外部URL、IP
8
+ 2. js_parser 分析JS提取API端点和baseURL配置
9
+ 3. sensitive_finder 提取敏感信息
10
+ 4. http_client 测试发现的API
11
+ """
12
+
13
+ import asyncio
14
+ import re
15
+ import json
16
+ import requests
17
+ from urllib.parse import urlparse, parse_qs
18
+
19
+ try:
20
+ from playwright.sync_api import sync_playwright
21
+ PLAYWRIGHT_AVAILABLE = True
22
+ except ImportError:
23
+ PLAYWRIGHT_AVAILABLE = False
24
+
25
+ requests.packages.urllib3.disable_warnings()
26
+
27
+
28
+ def browser_collect(config):
29
+ """
30
+ 使用无头浏览器采集API和信息(同步版本)
31
+
32
+ 输入:
33
+ url: string - 目标URL
34
+ wait_until?: "networkidle" | "domcontentloaded"
35
+ interact?: boolean - 是否模拟交互
36
+ intercept_api?: boolean - 是否拦截API请求
37
+ extract_js_files?: boolean - 是否提取JS文件列表
38
+ extract_external_urls?: boolean - 是否提取外部URL/域名
39
+ extract_ip_addresses?: boolean - 是否提取IP地址
40
+
41
+ 输出:
42
+ apis: Array<{method, url, post_data}>
43
+ storage: {localStorage, cookies}
44
+ forms: Array<{action, method, inputs}>
45
+ page_title: string
46
+ js_files: Array<string> - JS文件路径列表
47
+ tech_stack: Array<string> - 检测到的技术栈
48
+ sensitive_urls: Array<string> - 发现的敏感URL(API、后台等)
49
+ ip_addresses: Array<string> - 发现的IP地址
50
+ domains: Array<string> - 发现的相关域名
51
+ """
52
+ if not PLAYWRIGHT_AVAILABLE:
53
+ return {
54
+ 'error': 'playwright_not_available',
55
+ 'apis': [],
56
+ 'storage': {},
57
+ 'forms': [],
58
+ 'js_files': [],
59
+ 'tech_stack': [],
60
+ 'sensitive_urls': [],
61
+ 'ip_addresses': [],
62
+ 'domains': []
63
+ }
64
+
65
+ url = config.get('url')
66
+ wait_until = config.get('wait_until', 'networkidle')
67
+ interact = config.get('interact', False)
68
+ intercept_api = config.get('intercept_api', True)
69
+ extract_js_files = config.get('extract_js_files', True)
70
+ extract_external_urls = config.get('extract_external_urls', True)
71
+ extract_ip_addresses = config.get('extract_ip_addresses', True)
72
+
73
+ result = {
74
+ 'apis': [],
75
+ 'storage': {},
76
+ 'forms': [],
77
+ 'page_title': '',
78
+ 'js_files': [],
79
+ 'tech_stack': [],
80
+ 'sensitive_urls': [],
81
+ 'ip_addresses': [],
82
+ 'domains': []
83
+ }
84
+
85
+ target_domain = urlparse(url).netloc
86
+
87
+ try:
88
+ with sync_playwright() as p:
89
+ browser = p.chromium.launch(
90
+ headless=True,
91
+ args=['--no-sandbox', '--disable-dev-shm-usage']
92
+ )
93
+ context = browser.new_context(
94
+ viewport={'width': 1920, 'height': 1080},
95
+ ignore_https_errors=True
96
+ )
97
+ page = context.new_page()
98
+
99
+ # API拦截
100
+ captured_apis = []
101
+ captured_urls = [] # 所有请求的完整URL
102
+ all_responses = [] # 所有响应
103
+
104
+ if intercept_api:
105
+ def on_request(request):
106
+ if request.resource_type in ['xhr', 'fetch', 'document', 'script']:
107
+ captured_apis.append({
108
+ 'method': request.method,
109
+ 'url': request.url,
110
+ 'post_data': request.post_data,
111
+ 'headers': dict(request.headers)
112
+ })
113
+ captured_urls.append(request.url)
114
+
115
+ def on_response(response):
116
+ all_responses.append({
117
+ 'url': response.url,
118
+ 'status': response.status,
119
+ 'headers': dict(response.headers),
120
+ 'content_type': response.headers.get('content-type', '')
121
+ })
122
+
123
+ page.on('request', on_request)
124
+ page.on('response', on_response)
125
+
126
+ # 访问页面
127
+ try:
128
+ response = page.goto(url, timeout=60000, wait_until=wait_until)
129
+ result['status_code'] = response.status if response else None
130
+ result['response_headers'] = dict(response.headers) if response else {}
131
+ except Exception as e:
132
+ result['error'] = str(e)
133
+
134
+ # 等待JS执行(关键!必须等待)
135
+ page.wait_for_timeout(5000)
136
+
137
+ # 提取JS文件列表(关键!)
138
+ if extract_js_files:
139
+ try:
140
+ html_content = page.content()
141
+ js_files = re.findall(r'<script[^>]+src=["\']([^"\']+\.js[^"\']*)["\']', html_content)
142
+ result['js_files'] = js_files
143
+
144
+ # 检测技术栈
145
+ tech = []
146
+ if 'vue' in html_content.lower(): tech.append('Vue')
147
+ if 'react' in html_content.lower(): tech.append('React')
148
+ if 'angular' in html_content.lower(): tech.append('Angular')
149
+ if 'webpack' in html_content.lower(): tech.append('Webpack')
150
+ if 'element-ui' in html_content.lower(): tech.append('ElementUI')
151
+ if 'ant-design' in html_content.lower(): tech.append('AntDesign')
152
+ result['tech_stack'] = tech
153
+
154
+ # 【新增】从HTML中提取敏感URL
155
+ html_urls = extract_urls_from_html(html_content, target_domain)
156
+ result['sensitive_urls'].extend(html_urls)
157
+
158
+ except Exception as e:
159
+ result['js_extract_error'] = str(e)
160
+
161
+ # 【新增】提取外部URL和IP
162
+ if extract_external_urls or extract_ip_addresses:
163
+ all_urls = set()
164
+ all_ips = set()
165
+ all_domains = set()
166
+
167
+ # 从请求中提取
168
+ for req_url in captured_urls:
169
+ parsed = urlparse(req_url)
170
+
171
+ # 收集域名
172
+ if parsed.netloc and parsed.netloc != target_domain:
173
+ all_domains.add(parsed.netloc)
174
+
175
+ # 收集完整URL
176
+ all_urls.add(req_url)
177
+
178
+ # 提取IP
179
+ if extract_ip_addresses:
180
+ ips = extract_ip_addresses_from_string(req_url)
181
+ all_ips.update(ips)
182
+
183
+ # 从响应头中提取
184
+ for resp in all_responses:
185
+ headers_str = json.dumps(resp.get('headers', {}))
186
+
187
+ if extract_external_urls:
188
+ # 从header中提取URL
189
+ url_in_headers = re.findall(r'https?://[^\s"\'<>]+', headers_str)
190
+ all_urls.update(url_in_headers)
191
+
192
+ # 提取域名
193
+ for u in url_in_headers:
194
+ p = urlparse(u)
195
+ if p.netloc:
196
+ all_domains.add(p.netloc)
197
+
198
+ if extract_ip_addresses:
199
+ ips = extract_ip_addresses_from_string(headers_str)
200
+ all_ips.update(ips)
201
+
202
+ result['sensitive_urls'] = list(all_urls)
203
+ result['ip_addresses'] = list(all_ips)
204
+ result['domains'] = list(all_domains)
205
+
206
+ # 模拟交互(增强版:自动尝试登录触发API)
207
+ if interact:
208
+ try:
209
+ # 1. 查找登录表单
210
+ inputs = page.query_selector_all('input')
211
+ for inp in inputs[:10]:
212
+ try:
213
+ inp_type = inp.get_attribute('type')
214
+ inp_name = inp.get_attribute('name')
215
+ inp_id = inp.get_attribute('id')
216
+
217
+ # 填写用户名
218
+ if inp_type == 'text' or inp_name in ['username', 'user', 'account', 'uname'] or inp_id in ['username', 'user']:
219
+ inp.fill('admin')
220
+ # 填写密码
221
+ elif inp_type == 'password':
222
+ inp.fill('admin123')
223
+ except:
224
+ pass
225
+
226
+ # 2. 查找登录按钮并点击
227
+ buttons = page.query_selector_all('button')
228
+ for btn in buttons[:5]:
229
+ try:
230
+ btn_text = btn.inner_text()
231
+ if any(k in btn_text.lower() for k in ['login', '登录', 'submit', '确定']):
232
+ btn.click()
233
+ page.wait_for_timeout(2000) # 等待登录请求
234
+ break
235
+ except:
236
+ pass
237
+
238
+ # 3. 如果有form直接提交
239
+ try:
240
+ page.evaluate("""
241
+ () => {
242
+ const forms = document.querySelectorAll('form');
243
+ forms.forEach(f => {
244
+ if (f.querySelector('input[type="password"]')) {
245
+ f.submit();
246
+ }
247
+ });
248
+ }
249
+ """)
250
+ page.wait_for_timeout(2000)
251
+ except:
252
+ pass
253
+
254
+ # 4. 捕获登录后的API请求
255
+ page.wait_for_timeout(3000)
256
+
257
+ except:
258
+ pass
259
+
260
+ # 采集localStorage
261
+ try:
262
+ ls = page.evaluate("""
263
+ () => {
264
+ const data = {};
265
+ try {
266
+ for (let i = 0; i < localStorage.length; i++) {
267
+ const key = localStorage.key(i);
268
+ data[key] = localStorage.getItem(key);
269
+ }
270
+ } catch (e) {}
271
+ return data;
272
+ }
273
+ """)
274
+ result['storage']['localStorage'] = ls
275
+
276
+ # 【新增】从localStorage中提取敏感信息
277
+ if ls:
278
+ for key, value in ls.items():
279
+ if any(k in key.lower() for k in ['token', 'key', 'secret', 'auth']):
280
+ result['sensitive_urls'].append(f"localStorage:{key}")
281
+ # 提取URL
282
+ urls = extract_urls_from_string(str(value))
283
+ result['sensitive_urls'].extend(urls)
284
+ # 提取IP
285
+ ips = extract_ip_addresses_from_string(str(value))
286
+ result['ip_addresses'].extend(ips)
287
+
288
+ except:
289
+ pass
290
+
291
+ # 采集Cookie
292
+ try:
293
+ cookies = context.cookies()
294
+ result['storage']['cookies'] = [
295
+ {'name': c['name'], 'value': c['value'][:50]}
296
+ for c in cookies
297
+ ]
298
+ except:
299
+ pass
300
+
301
+ # 采集表单
302
+ try:
303
+ forms = page.evaluate("""
304
+ () => {
305
+ const forms = [];
306
+ document.querySelectorAll('form').forEach(f => {
307
+ const formData = {
308
+ action: f.action,
309
+ method: f.method,
310
+ inputs: []
311
+ };
312
+ f.querySelectorAll('input').forEach(inp => {
313
+ formData.inputs.push({
314
+ name: inp.name,
315
+ type: inp.type,
316
+ id: inp.id
317
+ });
318
+ });
319
+ forms.push(formData);
320
+ });
321
+ return forms;
322
+ }
323
+ """)
324
+ result['forms'] = forms
325
+ except:
326
+ pass
327
+
328
+ # 采集页面标题
329
+ try:
330
+ result['page_title'] = page.title()
331
+ except:
332
+ pass
333
+
334
+ # 采集API请求
335
+ result['apis'] = captured_apis
336
+
337
+ # 【新增】登录即测:发现login请求时立即分析
338
+ login_test = analyze_login_requests(captured_apis, url)
339
+ if login_test:
340
+ result['login_test_hint'] = login_test
341
+
342
+ browser.close()
343
+
344
+ except Exception as e:
345
+ result['error'] = str(e)
346
+
347
+ return result
348
+
349
+
350
+ def analyze_login_requests(captured_apis, target_url):
351
+ """
352
+ 【新增】分析捕获到的登录请求,返回测试提示
353
+
354
+ 发现login请求时,返回测试建议
355
+ """
356
+ login_keywords = ['login', 'signin', 'auth', 'token', 'pwd', 'password']
357
+
358
+ for api in captured_apis:
359
+ url = api.get('url', '')
360
+ method = api.get('method', 'GET')
361
+ post_data = api.get('post_data', '')
362
+
363
+ # 检查是否是登录相关请求
364
+ is_login = any(k in url.lower() for k in login_keywords)
365
+ if post_data and any(k in str(post_data).lower() for k in login_keywords):
366
+ is_login = True
367
+
368
+ if is_login:
369
+ # 构建测试提示
370
+ test_hints = []
371
+
372
+ # GET请求
373
+ if method == 'GET' and 'password' in url:
374
+ test_hints.append({
375
+ 'type': 'GET_login_with_password_in_url',
376
+ 'url': url,
377
+ 'risk': 'HIGH',
378
+ 'description': '密码可能暴露在URL中'
379
+ })
380
+
381
+ # POST请求
382
+ if method == 'POST' and post_data:
383
+ test_hints.append({
384
+ 'type': 'POST_login_test',
385
+ 'url': url,
386
+ 'method': 'POST',
387
+ 'body': post_data,
388
+ 'risk': 'MEDIUM',
389
+ 'description': '立即测试SQL注入、弱密码'
390
+ })
391
+
392
+ # SQL注入测试payload
393
+ sql_payloads = [
394
+ {"username": "admin'--", "password": "any"},
395
+ {"username": "admin' OR '1'='1", "password": "any"},
396
+ ]
397
+ test_hints[0]['sql_payloads'] = sql_payloads
398
+
399
+ return {
400
+ 'found_login': True,
401
+ 'url': url,
402
+ 'method': method,
403
+ 'test_hints': test_hints
404
+ }
405
+
406
+ return None
407
+
408
+
409
+ def extract_urls_from_html(html_content, target_domain):
410
+ """从HTML内容中提取所有URL"""
411
+ urls = set()
412
+
413
+ # href属性
414
+ hrefs = re.findall(r'href=["\']([^"\']+)["\']', html_content)
415
+ for href in hrefs:
416
+ if href.startswith('http'):
417
+ parsed = urlparse(href)
418
+ if parsed.netloc != target_domain:
419
+ urls.add(href)
420
+ elif href.startswith('/') or href.startswith('./'):
421
+ urls.add(href)
422
+
423
+ # src属性
424
+ srcs = re.findall(r'src=["\']([^"\']+)["\']', html_content)
425
+ for src in srcs:
426
+ if src.startswith('http'):
427
+ urls.add(src)
428
+
429
+ # URL模板
430
+ url_templates = re.findall(r'["\'](https?://[^"\']+)["\']', html_content)
431
+ urls.update(url_templates)
432
+
433
+ return list(urls)
434
+
435
+
436
+ def extract_urls_from_string(content):
437
+ """从字符串中提取URL"""
438
+ urls = set()
439
+
440
+ # HTTP/HTTPS URL
441
+ http_urls = re.findall(r'https?://[^\s"\'<>]+', content)
442
+ urls.update(http_urls)
443
+
444
+ return list(urls)
445
+
446
+
447
+ def extract_ip_addresses_from_string(content):
448
+ """从字符串中提取IP地址"""
449
+ ips = set()
450
+
451
+ # IPv4地址
452
+ ipv4_pattern = r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b'
453
+ ipv4_matches = re.findall(ipv4_pattern, content)
454
+ ips.update(ipv4_matches)
455
+
456
+ return list(ips)
457
+
458
+
459
+ def extract_apis_from_browser(result):
460
+ """从浏览器采集结果中提取API"""
461
+ apis = result.get('apis', [])
462
+
463
+ # 去重
464
+ unique_apis = {}
465
+ for api in apis:
466
+ api_url = api['url']
467
+ if api_url not in unique_apis:
468
+ unique_apis[api_url] = api
469
+
470
+ return list(unique_apis.values())
471
+
472
+
473
+ def extract_js_api_patterns(js_content):
474
+ """
475
+ 从JS内容中提取API端点模式和配置
476
+
477
+ 【使用AST+正则双模式】
478
+
479
+ 返回:
480
+ base_url: string - 发现的baseURL配置
481
+ api_paths: Array<string> - 发现的API路径
482
+ env_vars: object - 发现的环境变量
483
+ sensitive_urls: Array<string> - 发现的敏感URL
484
+ ip_addresses: Array<string> - 发现的IP地址
485
+ """
486
+ base_url = None
487
+ api_paths = set()
488
+ env_vars = {}
489
+ sensitive_urls = set()
490
+ ip_addresses = set()
491
+
492
+ # baseURL配置
493
+ baseurl_patterns = [
494
+ r'baseURL\s*[:=]\s*["\']([^"\']+)["\']',
495
+ r'axios\.create\s*\(\s*\{[^}]*baseURL\s*[:=]\s*["\']([^"\']+)["\']',
496
+ ]
497
+ for pattern in baseurl_patterns:
498
+ match = re.search(pattern, js_content)
499
+ if match:
500
+ base_url = match.group(1)
501
+ break
502
+
503
+ # API路径
504
+ api_patterns = [
505
+ r'["\'](/(?:user|auth|admin|login|logout|api|v\d|frame|hszh|table|dashboard|supplement|attach|code|module|file)[a-zA-Z0-9_/?=&-]*)["\']',
506
+ r'axios\.[a-z]+\(["\']([^"\']+)["\']',
507
+ r'fetch\(["\']([^"\']+)["\']',
508
+ r'\.get\(["\']([^"\']+)["\']',
509
+ r'\.post\(["\']([^"\']+)["\']',
510
+ ]
511
+ for pattern in api_patterns:
512
+ matches = re.findall(pattern, js_content, re.IGNORECASE)
513
+ for m in matches:
514
+ if isinstance(m, str) and len(m) > 2 and len(m) < 200:
515
+ api_paths.add(m)
516
+
517
+ # 环境变量
518
+ env_patterns = [
519
+ r'(VUE_APP_\w+)\s*[:=]\s*["\']([^"\']+)["\']',
520
+ r'process\.env\.(\w+)\s*[:=]\s*["\']([^"\']+)["\']',
521
+ ]
522
+ for pattern in env_patterns:
523
+ matches = re.findall(pattern, js_content)
524
+ for var_name, var_value in matches:
525
+ env_vars[var_name] = var_value
526
+ # 检查环境变量中是否包含敏感URL或IP
527
+ sensitive_urls.update(extract_urls_from_string(var_value))
528
+ ip_addresses.update(extract_ip_addresses_from_string(var_value))
529
+
530
+ # 【新增】从JS中提取敏感信息
531
+ # 密钥/凭证模式
532
+ credential_patterns = [
533
+ r'["\']((?:api[_-]?key|secret[_-]?key|access[_-]?token|private[_-]?key)["\']\s*[:=]\s*["\']([^"\']+)["\']',
534
+ r'(?:password|passwd|pwd)\s*[:=]\s*["\']([^"\']+)["\']',
535
+ r'["\']https?://[^"\']*[:@][^"\']+@[^"\']+["\']', # URL with credentials
536
+ ]
537
+ for pattern in credential_patterns:
538
+ matches = re.findall(pattern, js_content, re.IGNORECASE)
539
+ sensitive_urls.update(matches)
540
+
541
+ # 【新增】提取IP
542
+ ip_addresses.update(extract_ip_addresses_from_string(js_content))
543
+
544
+ # 【新增】提取外部URL
545
+ sensitive_urls.update(extract_urls_from_string(js_content))
546
+
547
+ return {
548
+ 'base_url': base_url,
549
+ 'api_paths': list(api_paths),
550
+ 'env_vars': env_vars,
551
+ 'sensitive_urls': list(sensitive_urls),
552
+ 'ip_addresses': list(ip_addresses)
553
+ }
554
+
555
+
556
+ # 【新增】AST模式解析(使用esprima)
557
+ def extract_with_ast(js_content):
558
+ """
559
+ 使用AST(esprima)深度解析JS代码
560
+
561
+ 需要先安装: pip install esprima
562
+
563
+ 返回:
564
+ ast_info: dict - AST解析结果
565
+ """
566
+ try:
567
+ import esprima
568
+
569
+ # 解析JS为AST
570
+ ast = esprima.parse(js_content, sourceType='script', range=True)
571
+
572
+ result = {
573
+ 'string_literals': [],
574
+ 'object_properties': {},
575
+ 'function_calls': [],
576
+ 'import_sources': []
577
+ }
578
+
579
+ # 遍历AST提取信息
580
+ def traverse(node, depth=0):
581
+ if depth > 20: # 防止过深递归
582
+ return
583
+
584
+ if hasattr(node, 'type'):
585
+ # 字符串字面量
586
+ if node.type == 'Literal' and isinstance(node.value, str):
587
+ result['string_literals'].append(node.value)
588
+
589
+ # 对象属性
590
+ elif node.type == 'Property':
591
+ key = getattr(node, 'key', None)
592
+ value = getattr(node, 'value', None)
593
+ if key and hasattr(key, 'value'):
594
+ result['object_properties'][key.value] = getattr(value, 'value', None)
595
+
596
+ # 函数调用
597
+ elif node.type == 'CallExpression':
598
+ callee = getattr(node, 'callee', None)
599
+ if callee and hasattr(callee, 'name'):
600
+ result['function_calls'].append(callee.name)
601
+
602
+ # Import声明
603
+ elif node.type == 'ImportDeclaration':
604
+ source = getattr(node, 'source', None)
605
+ if source and hasattr(source, 'value'):
606
+ result['import_sources'].append(source.value)
607
+
608
+ # 递归遍历子节点
609
+ for child in node.__dict__.values():
610
+ if isinstance(child, list):
611
+ for item in child:
612
+ if hasattr(item, 'type'):
613
+ traverse(item, depth + 1)
614
+ elif hasattr(child, 'type'):
615
+ traverse(child, depth + 1)
616
+
617
+ traverse(ast.body)
618
+
619
+ # 去重
620
+ result['string_literals'] = list(set(result['string_literals']))
621
+ result['function_calls'] = list(set(result['function_calls']))
622
+ result['import_sources'] = list(set(result['import_sources']))
623
+
624
+ return result
625
+
626
+ except ImportError:
627
+ return {'error': 'esprima not installed, use regex fallback'}
628
+ except Exception as e:
629
+ return {'error': str(e)}
630
+
631
+
632
+ if __name__ == '__main__':
633
+ # 测试
634
+ result = browser_collect({
635
+ 'url': 'https://example.com',
636
+ 'wait_until': 'networkidle',
637
+ 'interact': True
638
+ })
639
+ print(f"APIs: {len(result.get('apis', []))}")
640
+ print(f"JS Files: {len(result.get('js_files', []))}")
641
+ print(f"Tech Stack: {result.get('tech_stack', [])}")
642
+ print(f"Sensitive URLs: {len(result.get('sensitive_urls', []))}")
643
+ print(f"IP Addresses: {len(result.get('ip_addresses', []))}")
644
+ print(f"Domains: {len(result.get('domains', []))}")
645
+ print(f"Storage: {len(result.get('storage', {}))}")