opencode-api-security-testing 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/README.md +30 -24
  2. package/SKILL.md +1797 -0
  3. package/core/advanced_recon.py +788 -0
  4. package/core/agentic_analyzer.py +445 -0
  5. package/core/analyzers/api_parser.py +210 -0
  6. package/core/analyzers/response_analyzer.py +212 -0
  7. package/core/analyzers/sensitive_finder.py +184 -0
  8. package/core/api_fuzzer.py +422 -0
  9. package/core/api_interceptor.py +525 -0
  10. package/core/api_parser.py +955 -0
  11. package/core/browser_tester.py +479 -0
  12. package/core/cloud_storage_tester.py +1330 -0
  13. package/core/collectors/__init__.py +23 -0
  14. package/core/collectors/api_path_finder.py +300 -0
  15. package/core/collectors/browser_collect.py +645 -0
  16. package/core/collectors/browser_collector.py +411 -0
  17. package/core/collectors/http_client.py +111 -0
  18. package/core/collectors/js_collector.py +490 -0
  19. package/core/collectors/js_parser.py +780 -0
  20. package/core/collectors/url_collector.py +319 -0
  21. package/core/context_manager.py +682 -0
  22. package/core/deep_api_tester_v35.py +844 -0
  23. package/core/deep_api_tester_v55.py +366 -0
  24. package/core/dynamic_api_analyzer.py +532 -0
  25. package/core/http_client.py +179 -0
  26. package/core/models.py +296 -0
  27. package/core/orchestrator.py +890 -0
  28. package/core/prerequisite.py +227 -0
  29. package/core/reasoning_engine.py +1042 -0
  30. package/core/response_classifier.py +606 -0
  31. package/core/runner.py +938 -0
  32. package/core/scan_engine.py +599 -0
  33. package/core/skill_executor.py +435 -0
  34. package/core/skill_executor_v2.py +670 -0
  35. package/core/skill_executor_v3.py +704 -0
  36. package/core/smart_analyzer.py +687 -0
  37. package/core/strategy_pool.py +707 -0
  38. package/core/testers/auth_tester.py +264 -0
  39. package/core/testers/idor_tester.py +200 -0
  40. package/core/testers/sqli_tester.py +211 -0
  41. package/core/testing_loop.py +655 -0
  42. package/core/utils/base_path_dict.py +255 -0
  43. package/core/utils/payload_lib.py +167 -0
  44. package/core/utils/ssrf_detector.py +220 -0
  45. package/core/verifiers/vuln_verifier.py +536 -0
  46. package/package.json +17 -13
  47. package/references/asset-discovery.md +119 -612
  48. package/references/graphql-guidance.md +65 -641
  49. package/references/intake.md +84 -0
  50. package/references/report-template.md +131 -38
  51. package/references/rest-guidance.md +55 -526
  52. package/references/severity-model.md +52 -264
  53. package/references/test-matrix.md +65 -263
  54. package/references/validation.md +53 -400
  55. package/scripts/postinstall.js +46 -0
  56. package/src/index.ts +259 -275
  57. package/agents/cyber-supervisor.md +0 -55
  58. package/agents/probing-miner.md +0 -42
  59. package/agents/resource-specialist.md +0 -31
  60. package/commands/api-security-testing-scan.md +0 -59
  61. package/commands/api-security-testing-test.md +0 -49
  62. package/commands/api-security-testing.md +0 -72
  63. package/tsconfig.json +0 -17
@@ -0,0 +1,955 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ 增强版 API 端点解析器
5
+
6
+ 功能:
7
+ 1. 使用 AST 和正则提取 API 端点和参数
8
+ 2. 识别参数类型 (path, query, body)
9
+ 3. 识别参数约束 (required, optional, enum)
10
+ 4. 父路径探测
11
+ 5. API 语义分析
12
+
13
+ 使用方式:
14
+ from core.api_parser import APIEndpointParser
15
+
16
+ parser = APIEndpointParser(target, session)
17
+ result = parser.parse_js_files(js_files)
18
+ result = parser.probe_parent_paths()
19
+ """
20
+
21
+ import re
22
+ import sys
23
+ import json
24
+ import requests
25
+ from typing import Dict, List, Set, Tuple, Optional, Any
26
+ from dataclasses import dataclass, field
27
+ from urllib.parse import urljoin, urlparse, parse_qs
28
+ from enum import Enum
29
+
30
+ sys.path.insert(0, '/workspace/skill-play/API-Security-Testing-Optimized')
31
+
32
+
33
+ class ParamType(Enum):
34
+ """参数类型"""
35
+ PATH = "path" # /users/{id}
36
+ QUERY = "query" # /users?id=1
37
+ BODY = "body" # POST data
38
+ HEADER = "header" # Authorization
39
+
40
+
41
+ class ParamLocation(Enum):
42
+ """参数位置"""
43
+ URL = "url"
44
+ FORM = "form"
45
+ JSON = "json"
46
+ PARAMS = "params" # axios params config
47
+ HEADER = "header"
48
+
49
+
50
+ @dataclass
51
+ class APIParam:
52
+ """API 参数"""
53
+ name: str
54
+ param_type: ParamType
55
+ location: ParamLocation
56
+ required: bool = True
57
+ data_type: str = "string" # string, number, boolean, object, array
58
+ enum_values: List[Any] = field(default_factory=list)
59
+ description: str = ""
60
+ example: Any = None
61
+
62
+
63
+ @dataclass
64
+ class ParsedEndpoint:
65
+ """解析后的 API 端点"""
66
+ path: str
67
+ method: str = "GET"
68
+ params: List[APIParam] = field(default_factory=list)
69
+ source: str = ""
70
+ raw_url: str = ""
71
+ auth_required: bool = False
72
+ description: str = ""
73
+ semantic_type: str = "" # user_query, file_upload, auth, etc.
74
+
75
+ def get_path_params(self) -> List[APIParam]:
76
+ return [p for p in self.params if p.param_type == ParamType.PATH]
77
+
78
+ def get_query_params(self) -> List[APIParam]:
79
+ return [p for p in self.params if p.param_type == ParamType.QUERY]
80
+
81
+ def has_params(self) -> bool:
82
+ return len(self.params) > 0
83
+
84
+ def to_dict(self) -> Dict:
85
+ return {
86
+ 'path': self.path,
87
+ 'method': self.method,
88
+ 'params': [{'name': p.name, 'type': p.param_type.value, 'required': p.required} for p in self.params],
89
+ 'source': self.source,
90
+ 'semantic_type': self.semantic_type,
91
+ }
92
+
93
+
94
+ class APIEndpointParser:
95
+ """API 端点解析器"""
96
+
97
+ def __init__(self, target: str, session: requests.Session = None):
98
+ self.target = target
99
+ self.session = session or requests.Session()
100
+ self.session.headers.update({
101
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
102
+ })
103
+
104
+ self.parsed_endpoints: List[ParsedEndpoint] = []
105
+ self.parent_paths: Set[str] = set()
106
+ self.js_files: List[str] = []
107
+
108
+ def discover_js_files(self) -> List[str]:
109
+ """发现 JS 文件"""
110
+ try:
111
+ resp = self.session.get(self.target, timeout=10)
112
+ patterns = [
113
+ r'<script[^>]+src=["\']?([^"\'>\s]+\.js)["\']?',
114
+ ]
115
+
116
+ from urllib.parse import urlparse
117
+ parsed = urlparse(self.target)
118
+ base_origin = f"{parsed.scheme}://{parsed.netloc}"
119
+ base_path = parsed.path.rstrip('/')
120
+
121
+ for pattern in patterns:
122
+ matches = re.findall(pattern, resp.text)
123
+ for match in matches:
124
+ if match.startswith('/'):
125
+ # match 是相对于网站根路径的,需要拼接到 origin
126
+ # 如果 target 是 /idbd-web,match 是 /idbd-web/xxx
127
+ # 则直接用 base_origin + match
128
+ url = base_origin + match
129
+ elif match.startswith('http'):
130
+ url = match
131
+ else:
132
+ continue
133
+
134
+ if parsed.netloc in url:
135
+ if url not in self.js_files:
136
+ self.js_files.append(url)
137
+
138
+ return self.js_files
139
+
140
+ except Exception as e:
141
+ print(f" [WARN] JS 文件发现失败: {e}")
142
+ return []
143
+
144
+ def parse_js_files(self, js_files: List[str] = None) -> List[ParsedEndpoint]:
145
+ """解析 JS 文件中的 API 端点"""
146
+ if js_files is None:
147
+ js_files = self.discover_js_files()
148
+
149
+ print(f" [API Parser] 解析 {len(js_files)} 个 JS 文件...")
150
+
151
+ for js_url in js_files:
152
+ try:
153
+ resp = self.session.get(js_url, timeout=10)
154
+ content = resp.text
155
+
156
+ # 使用多种方法提取
157
+ endpoints = self._extract_axios_endpoints(content, js_url)
158
+ endpoints.extend(self._extract_fetch_endpoints(content, js_url))
159
+ endpoints.extend(self._extract_path_patterns(content, js_url))
160
+ endpoints.extend(self._extract_api_definition(content, js_url))
161
+
162
+ self.parsed_endpoints.extend(endpoints)
163
+
164
+ except Exception as e:
165
+ print(f" [WARN] 解析 {js_url}: {e}")
166
+
167
+ # 去重
168
+ self._deduplicate()
169
+
170
+ # 提取父路径
171
+ self._extract_parent_paths()
172
+
173
+ print(f" [API Parser] 发现 {len(self.parsed_endpoints)} 个端点")
174
+ print(f" [API Parser] 发现 {len(self.parent_paths)} 个父路径")
175
+
176
+ return self.parsed_endpoints
177
+
178
+ def _extract_axios_endpoints(self, content: str, source: str) -> List[ParsedEndpoint]:
179
+ """提取 axios 调用的端点"""
180
+ endpoints = []
181
+
182
+ # axios.get('/api/users')
183
+ patterns = [
184
+ (r'axios\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)["\']', 'axios'),
185
+ (r'this\.\$axios\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)["\']', 'vue_axios'),
186
+ (r'axios\.(get|post|put|delete|patch)\s*\(\s*`([^`]+)`', 'axios_template'),
187
+ ]
188
+
189
+ for pattern, ptype in patterns:
190
+ matches = re.findall(pattern, content, re.IGNORECASE)
191
+ for match in matches:
192
+ method = match[0].upper() if isinstance(match[0], str) and match[0].lower() in ['get', 'post', 'put', 'delete', 'patch', 'head', 'options'] else 'GET'
193
+ url = match[1] if isinstance(match, tuple) else match
194
+
195
+ # 验证是有效的 HTTP 方法
196
+ if method not in ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']:
197
+ continue
198
+
199
+ # 使用混合方法提取参数
200
+ url_params = self._extract_params_from_url(url)
201
+ js_params = self._extract_params_from_js_context(content, url)
202
+
203
+ # 合并参数,去重
204
+ all_params = url_params.copy()
205
+ for jp in js_params:
206
+ if jp.name not in [p.name for p in all_params]:
207
+ all_params.append(jp)
208
+
209
+ endpoint = ParsedEndpoint(
210
+ path=url,
211
+ method=method,
212
+ params=all_params,
213
+ source=f'axios_{ptype}',
214
+ raw_url=url,
215
+ semantic_type=self._infer_semantic_type(url)
216
+ )
217
+ endpoints.append(endpoint)
218
+
219
+ return endpoints
220
+
221
+ def _extract_fetch_endpoints(self, content: str, source: str) -> List[ParsedEndpoint]:
222
+ """提取 fetch 调用的端点"""
223
+ endpoints = []
224
+
225
+ # fetch('/api/users')
226
+ pattern = r'fetch\s*\(\s*["\']([^"\']+)["\']'
227
+ matches = re.findall(pattern, content, re.IGNORECASE)
228
+
229
+ for url in matches:
230
+ params = self._extract_params_from_url(url)
231
+
232
+ endpoint = ParsedEndpoint(
233
+ path=url,
234
+ method='GET',
235
+ params=params,
236
+ source='fetch',
237
+ raw_url=url,
238
+ semantic_type=self._infer_semantic_type(url)
239
+ )
240
+ endpoints.append(endpoint)
241
+
242
+ return endpoints
243
+
244
+ def _extract_path_patterns(self, content: str, source: str) -> List[ParsedEndpoint]:
245
+ """提取路径模式 (使用更精确的正则)"""
246
+ endpoints = []
247
+
248
+ # API 路径模式
249
+ path_patterns = [
250
+ # /api/users/{id}
251
+ (r'["\'](/api/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'api_path'),
252
+ # /users/{id}
253
+ (r'["\'](/users/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'users_path'),
254
+ # /v1/admin/*
255
+ (r'["\'](/v\d+/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'versioned_api'),
256
+ # RESTful 模式
257
+ (r'["\'](/[a-z]+/[a-z]+/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'restful'),
258
+ # /xxx-server/api/xxx 模式 (如 /auth-server/api/xxx)
259
+ (r'["\'](/[a-zA-Z0-9]+-server/api/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'server_api'),
260
+ # /idbd-api/xxx 模式
261
+ (r'["\'](/idbd-api/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'idbd_api'),
262
+ # /user-server/api/xxx 模式
263
+ (r'["\'](/user-server/api/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'user_server_api'),
264
+ # /auth-server/api/xxx 模式
265
+ (r'["\'](/auth-server/api/[a-zA-Z0-9_/{}?-]+)["\']', 'GET', 'auth_server_api'),
266
+ ]
267
+
268
+ for pattern, default_method, ptype in path_patterns:
269
+ matches = re.findall(pattern, content)
270
+ for url in matches:
271
+ if self._is_valid_api_path(url):
272
+ # 使用混合方法提取参数
273
+ url_params = self._extract_params_from_url(url)
274
+ js_params = self._extract_params_from_js_context(content, url)
275
+
276
+ # 合并参数,去重
277
+ all_params = url_params.copy()
278
+ for jp in js_params:
279
+ if jp.name not in [p.name for p in all_params]:
280
+ all_params.append(jp)
281
+
282
+ endpoint = ParsedEndpoint(
283
+ path=url,
284
+ method=default_method,
285
+ params=all_params,
286
+ source=ptype,
287
+ raw_url=url,
288
+ semantic_type=self._infer_semantic_type(url)
289
+ )
290
+ endpoints.append(endpoint)
291
+
292
+ return endpoints
293
+
294
+ def _extract_api_definition(self, content: str, source: str) -> List[ParsedEndpoint]:
295
+ """从 API 定义中提取 (如 swagger 风格)"""
296
+ endpoints = []
297
+
298
+ # 查找 API 配置对象 - 只匹配有效的 HTTP 方法
299
+ valid_methods = ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'HEAD', 'OPTIONS']
300
+
301
+ patterns = [
302
+ # { method: 'get', url: '/api/users' }
303
+ r'["\']?(method|http_method)["\']?\s*:\s*["\'](\w+)["\']',
304
+ # fetch/axios config
305
+ r'["\']?(method)["\']?\s*:\s*["\'](\w+)["\']',
306
+ ]
307
+
308
+ for pattern in patterns:
309
+ matches = re.findall(pattern, content, re.IGNORECASE)
310
+ for match in matches:
311
+ if len(match) >= 2:
312
+ method_str = match[1].upper()
313
+ if method_str in valid_methods:
314
+ # 查找附近的 URL
315
+ url_match = re.search(r'["\']([/a-zA-Z0-9_{}?-]+)["\']', content[content.find(match[0]):content.find(match[0])+200])
316
+ if url_match:
317
+ url = url_match.group(0).strip('"\'')
318
+ if self._is_valid_api_path(url):
319
+ params = self._extract_params_from_url(url)
320
+
321
+ endpoint = ParsedEndpoint(
322
+ path=url,
323
+ method=method_str,
324
+ params=params,
325
+ source='api_definition',
326
+ raw_url=url,
327
+ semantic_type=self._infer_semantic_type(url)
328
+ )
329
+ endpoints.append(endpoint)
330
+
331
+ return endpoints
332
+
333
+ def _extract_params_from_url(self, url: str) -> List[APIParam]:
334
+ """从 URL 中提取参数"""
335
+ params = []
336
+
337
+ # 1. 显式路径参数 {id}, :id
338
+ path_patterns = [
339
+ r'\{([a-zA-Z_][a-zA-Z0-9_]*)\}', # {id}
340
+ r':([a-zA-Z_][a-zA-Z0-9_]*)', # :id
341
+ ]
342
+
343
+ for pattern in path_patterns:
344
+ matches = re.findall(pattern, url)
345
+ for param_name in matches:
346
+ data_type = self._infer_param_type(param_name)
347
+ params.append(APIParam(
348
+ name=param_name,
349
+ param_type=ParamType.PATH,
350
+ location=ParamLocation.URL,
351
+ required=True,
352
+ data_type=data_type
353
+ ))
354
+
355
+ # 2. RESTful 风格参数推断 (基于常见模式)
356
+ # 例如 /users/123 -> id, /users/abc123/profile -> id, /page/1 -> page
357
+ restful_patterns = [
358
+ (r'/users?/([a-zA-Z0-9_-]+)/?', 'user_id', 'id'),
359
+ (r'/orders?/([a-zA-Z0-9_-]+)/?', 'order_id', 'id'),
360
+ (r'/products?/([a-zA-Z0-9_-]+)/?', 'product_id', 'id'),
361
+ (r'/categories?/([a-zA-Z0-9_-]+)/?', 'category_id', 'id'),
362
+ (r'/files?/([a-zA-Z0-9_-]+)/?', 'file_id', 'id'),
363
+ (r'/records?/([a-zA-Z0-9_-]+)/?', 'record_id', 'id'),
364
+ (r'/ids?/([a-zA-Z0-9_-]+)/?', 'ids', 'id'),
365
+ (r'/ids,([a-zA-Z0-9_-]+)', 'ids', 'array'),
366
+ (r'/page/(\d+)', 'page', 'page'),
367
+ (r'/p/(\d+)', 'page', 'page'),
368
+ (r'/(\d+)/page', 'page', 'page'),
369
+ (r'/size/(\d+)', 'size', 'size'),
370
+ (r'/limit/(\d+)', 'limit', 'limit'),
371
+ (r'/offset/(\d+)', 'offset', 'offset'),
372
+ ]
373
+
374
+ seen_params = {p.name for p in params}
375
+ for pattern, param_name, param_type in restful_patterns:
376
+ if param_name not in seen_params:
377
+ match = re.search(pattern, url)
378
+ if match:
379
+ params.append(APIParam(
380
+ name=param_name,
381
+ param_type=ParamType.PATH if param_type == 'id' else ParamType.QUERY,
382
+ location=ParamLocation.URL,
383
+ required=True,
384
+ data_type=param_type
385
+ ))
386
+ seen_params.add(param_name)
387
+
388
+ # 3. 查询参数 ?key=value
389
+ if '?' in url:
390
+ query_str = url.split('?')[1] if '?' in url else ''
391
+ query_params = query_str.split('&')
392
+ for qp in query_params:
393
+ if '=' in qp:
394
+ param_name = qp.split('=')[0]
395
+ if param_name and param_name not in seen_params:
396
+ params.append(APIParam(
397
+ name=param_name,
398
+ param_type=ParamType.QUERY,
399
+ location=ParamLocation.URL,
400
+ required=False,
401
+ data_type='string'
402
+ ))
403
+ seen_params.add(param_name)
404
+
405
+ return params
406
+
407
+ def _extract_params_from_js_context(self, content: str, api_path: str) -> List[APIParam]:
408
+ """
409
+ 从 JavaScript 代码上下文中提取参数 (简化版正则)
410
+
411
+ 策略: 直接从 axios/fetch 调用模式中提取参数
412
+ """
413
+ params = []
414
+
415
+ # 1. axios 调用参数模式
416
+ # axios.get('/path', { params: { id: 1, page: 1 } })
417
+ # axios.post('/path', { data: { name: 'xxx' } })
418
+ axios_patterns = [
419
+ (r'axios\.\w+\s*\(\s*["\']([^"\']+)["\']\s*,\s*\{([^}]+)\}', 'axios_config'),
420
+ (r'axios\.\w+\s*\(\s*["\']([^"\']+)["\']\s*,\s*\{([^}]+)\}\s*\)', 'axios_config'),
421
+ ]
422
+
423
+ for pattern, ptype in axios_patterns:
424
+ matches = re.findall(pattern, content, re.DOTALL)
425
+ for match in matches:
426
+ if isinstance(match, tuple) and len(match) >= 2:
427
+ path_from_code = match[0]
428
+ config_str = match[1]
429
+
430
+ # 如果这个配置匹配的路径包含我们要找的 api_path
431
+ if api_path in path_from_code or path_from_code in api_path:
432
+ # 提取参数
433
+ param_names = re.findall(r'(\w+)\s*:', config_str)
434
+ for param_name in param_names:
435
+ if param_name not in [p.name for p in params]:
436
+ # 判断参数位置
437
+ location = ParamLocation.PARAMS if 'params' in config_str else ParamLocation.JSON
438
+ params.append(APIParam(
439
+ name=param_name,
440
+ param_type=ParamType.QUERY if 'params' in config_str or 'query' in config_str else ParamType.BODY,
441
+ location=location,
442
+ required=True,
443
+ data_type=self._infer_param_type(param_name)
444
+ ))
445
+
446
+ # 2. 查找 fetch 调用
447
+ # fetch('/path?param1=value1&param2=value2')
448
+ if 'fetch' in content.lower():
449
+ fetch_pattern = r'fetch\s*\(\s*["\']([^"\']+)["\']'
450
+ fetch_matches = re.findall(fetch_pattern, content)
451
+ for fetch_url in fetch_matches:
452
+ if api_path in fetch_url or fetch_url in api_path:
453
+ # 提取查询参数
454
+ if '?' in fetch_url:
455
+ query_str = fetch_url.split('?')[1]
456
+ query_params = query_str.split('&')
457
+ for qp in query_params:
458
+ if '=' in qp:
459
+ param_name = qp.split('=')[0]
460
+ if param_name and param_name not in [p.name for p in params]:
461
+ params.append(APIParam(
462
+ name=param_name,
463
+ param_type=ParamType.QUERY,
464
+ location=ParamLocation.URL,
465
+ required=False,
466
+ data_type='string'
467
+ ))
468
+
469
+ return params
470
+
471
+ def _is_valid_api_path(self, path: str) -> bool:
472
+ """验证是否是有效的 API 路径"""
473
+ if not path or len(path) < 2:
474
+ return False
475
+
476
+ if not path.startswith('/'):
477
+ return False
478
+
479
+ # 排除明显不是 API 的路径
480
+ skip_patterns = [
481
+ r'\.(css|js|jpg|jpeg|png|gif|ico|svg|woff|ttf)$',
482
+ r'^/static/',
483
+ r'^/public/',
484
+ r'^/assets/',
485
+ r'^/images/',
486
+ r'^/styles/',
487
+ ]
488
+
489
+ for pattern in skip_patterns:
490
+ if re.search(pattern, path, re.I):
491
+ return False
492
+
493
+ # 必须是有效的路径模式
494
+ if re.match(r'^/[a-zA-Z]', path):
495
+ return True
496
+
497
+ return False
498
+
499
+ def _infer_param_type(self, param_name: str) -> str:
500
+ """推断参数类型"""
501
+ name_lower = param_name.lower()
502
+
503
+ type_hints = {
504
+ 'id': 'number',
505
+ 'user_id': 'number',
506
+ 'order_id': 'number',
507
+ 'page': 'number',
508
+ 'limit': 'number',
509
+ 'size': 'number',
510
+ 'count': 'number',
511
+ 'page_size': 'number',
512
+ 'is_': 'boolean',
513
+ 'has_': 'boolean',
514
+ 'enable': 'boolean',
515
+ 'active': 'boolean',
516
+ 'enabled': 'boolean',
517
+ 'list': 'array',
518
+ 'ids': 'array',
519
+ 'data': 'object',
520
+ 'info': 'object',
521
+ 'params': 'object',
522
+ 'options': 'object',
523
+ }
524
+
525
+ for hint, dtype in type_hints.items():
526
+ if hint in name_lower:
527
+ return dtype
528
+
529
+ return 'string'
530
+
531
+ def _infer_semantic_type(self, path: str) -> str:
532
+ """推断 API 的语义类型"""
533
+ path_lower = path.lower()
534
+
535
+ semantic_mappings = {
536
+ 'auth': ['/auth', '/login', '/logout', '/token', '/signin', '/signup'],
537
+ 'user': ['/user', '/profile', '/account', '/avatar'],
538
+ 'admin': ['/admin', '/manage', '/system', '/config'],
539
+ 'file': ['/file', '/upload', '/download', '/attachment', '/image', '/avatar'],
540
+ 'order': ['/order', '/cart', '/checkout', '/payment'],
541
+ 'product': ['/product', '/goods', '/item', '/sku'],
542
+ 'data': ['/data', '/statistics', '/report', '/analytics'],
543
+ 'api': ['/api', '/v1', '/v2', '/rest'],
544
+ 'search': ['/search', '/query', '/find'],
545
+ 'list': ['/list', '/items', '/records'],
546
+ 'detail': ['/detail', '/info', '/view'],
547
+ 'create': ['/create', '/add', '/new'],
548
+ 'update': ['/update', '/edit', '/modify', '/put'],
549
+ 'delete': ['/delete', '/remove', '/del'],
550
+ }
551
+
552
+ for semantic_type, keywords in semantic_mappings.items():
553
+ for keyword in keywords:
554
+ if keyword in path_lower:
555
+ return semantic_type
556
+
557
+ return 'unknown'
558
+
559
+ def _extract_parent_paths(self):
560
+ """提取父路径"""
561
+ for ep in self.parsed_endpoints:
562
+ path = ep.path
563
+
564
+ # 分解路径,提取所有父路径
565
+ parts = path.strip('/').split('/')
566
+
567
+ for i in range(1, len(parts)):
568
+ parent = '/' + '/'.join(parts[:i])
569
+ if parent != path: # 不包括自身
570
+ self.parent_paths.add(parent)
571
+
572
+ # 也包括根路径
573
+ if len(parts) > 0:
574
+ self.parent_paths.add('/' + parts[0])
575
+
576
+ # 过滤
577
+ valid_parents = set()
578
+ for parent in self.parent_paths:
579
+ if self._is_valid_api_path(parent):
580
+ valid_parents.add(parent)
581
+
582
+ self.parent_paths = valid_parents
583
+
584
+ def _deduplicate(self):
585
+ """去重"""
586
+ seen = set()
587
+ unique = []
588
+
589
+ for ep in self.parsed_endpoints:
590
+ key = f"{ep.method}:{ep.path}"
591
+ if key not in seen:
592
+ seen.add(key)
593
+ unique.append(ep)
594
+
595
+ self.parsed_endpoints = unique
596
+
597
+ def probe_parent_paths(self, known_api_paths: list = None) -> Dict[str, Any]:
598
+ """
599
+ 探测父路径,返回可访问的路径
600
+
601
+ Args:
602
+ known_api_paths: 已知的后端 API 路径列表(如 ['/prod-api/auth/login'])
603
+ 如果为 None,会自动尝试常见路径
604
+ """
605
+ print(f" [API Parser] 探测父路径 ({len(self.parent_paths)} 个)...")
606
+
607
+ accessible_paths = {}
608
+
609
+ # 从 target 中提取根路径
610
+ from urllib.parse import urlparse
611
+ parsed = urlparse(self.target)
612
+ root_origin = f"{parsed.scheme}://{parsed.netloc}"
613
+ subpath = parsed.path.rstrip('/')
614
+
615
+ # 检测是否有子路径(前端路由)
616
+ has_subpath = len(subpath.split('/')) > 1
617
+ if has_subpath:
618
+ print(f" [INFO] 检测到前端子路径: {subpath}")
619
+
620
+ # 常见的根路径 API(用于检测前缀)
621
+ root_api_paths = [
622
+ '/prod-api/auth/login',
623
+ '/prod-api/system/user/info',
624
+ '/prod-api/captcha',
625
+ '/api/auth/login',
626
+ '/api/system/user/info',
627
+ '/admin-api/auth/login',
628
+ ]
629
+
630
+ # 探测根路径的 API 前缀
631
+ api_prefix = None
632
+ root_url = root_origin
633
+
634
+ for path in root_api_paths:
635
+ url = root_url + path
636
+ try:
637
+ r = self.session.get(url, timeout=5, allow_redirects=False)
638
+ ct = r.headers.get('Content-Type', '').lower()
639
+ if 'json' in ct and r.status_code != 404:
640
+ if '/prod-api/' in path:
641
+ api_prefix = '/prod-api/'
642
+ elif '/admin-api/' in path:
643
+ api_prefix = '/admin-api/'
644
+ elif '/api/' in path:
645
+ api_prefix = '/api/'
646
+ print(f" [API Prefix] 检测到后端前缀: {api_prefix}")
647
+ break
648
+ except:
649
+ pass
650
+
651
+ # 根据是否有子路径,决定如何探测
652
+ for parent in self.parent_paths:
653
+ if has_subpath and api_prefix:
654
+ # 有子路径 + 有前缀: 探测根路径的 API
655
+ # URL: root_origin + api_prefix + parent
656
+ test_path = api_prefix + parent.lstrip('/')
657
+ url = root_origin + test_path
658
+ elif api_prefix:
659
+ # 无子路径 + 有前缀: 探测 target + 前缀 + parent
660
+ test_path = self.target.rstrip('/') + api_prefix + parent.lstrip('/')
661
+ url = test_path
662
+ else:
663
+ # 无前缀: 探测原始路径
664
+ url = self.target.rstrip('/') + parent
665
+ test_path = parent
666
+
667
+ try:
668
+ r = self.session.get(url, timeout=5, allow_redirects=False)
669
+
670
+ ct = r.headers.get('Content-Type', '').lower()
671
+ is_api = 'json' in ct or '{' in r.text[:100]
672
+
673
+ result = {
674
+ 'path': test_path,
675
+ 'original_path': parent,
676
+ 'prefix': api_prefix,
677
+ 'status': r.status_code,
678
+ 'content_type': ct,
679
+ 'is_api': is_api,
680
+ 'content_length': len(r.text),
681
+ }
682
+
683
+ if is_api:
684
+ print(f" [API] {test_path}: {r.status_code}")
685
+ accessible_paths[test_path] = result
686
+
687
+ except Exception as e:
688
+ pass
689
+
690
+ return accessible_paths
691
+
692
+ def get_endpoints_summary(self) -> str:
693
+ """获取端点摘要"""
694
+ summary = []
695
+ summary.append(f"端点总数: {len(self.parsed_endpoints)}")
696
+
697
+ # 按方法统计
698
+ methods = {}
699
+ for ep in self.parsed_endpoints:
700
+ m = ep.method
701
+ methods[m] = methods.get(m, 0) + 1
702
+
703
+ summary.append("按方法:")
704
+ for m, count in sorted(methods.items()):
705
+ summary.append(f" {m}: {count}")
706
+
707
+ # 按语义类型统计
708
+ semantic = {}
709
+ for ep in self.parsed_endpoints:
710
+ t = ep.semantic_type or 'unknown'
711
+ semantic[t] = semantic.get(t, 0) + 1
712
+
713
+ summary.append("按语义类型:")
714
+ for t, count in sorted(semantic.items(), key=lambda x: -x[1]):
715
+ summary.append(f" {t}: {count}")
716
+
717
+ # 带参数的端点
718
+ with_params = sum(1 for ep in self.parsed_endpoints if ep.has_params())
719
+ summary.append(f"带参数端点: {with_params}")
720
+
721
+ return "\n".join(summary)
722
+
723
+
724
+ class APIFuzzer:
725
+ """API Fuzzer - 对发现的端点进行模糊测试"""
726
+
727
+ def __init__(self, target: str, session: requests.Session = None):
728
+ self.target = target
729
+ self.session = session or requests.Session()
730
+ self.session.headers.update({
731
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
732
+ })
733
+ self.fuzz_results = []
734
+
735
+ def fuzz_endpoints(self, endpoints: List[ParsedEndpoint], parent_probe_result: Dict = None) -> List[Dict]:
736
+ """对端点进行 fuzzing"""
737
+ print(f"\n [Fuzzer] Fuzzing {len(endpoints)} 个端点...")
738
+
739
+ fuzz_results = []
740
+
741
+ # 常见 fuzzing payload
742
+ fuzz_payloads = {
743
+ 'sqli': [
744
+ "' OR '1'='1",
745
+ "' OR 1=1--",
746
+ "admin'--",
747
+ "' UNION SELECT NULL--",
748
+ "1' AND '1'='1",
749
+ ],
750
+ 'xss': [
751
+ "<script>alert(1)</script>",
752
+ "<img src=x onerror=alert(1)>",
753
+ "javascript:alert(1)",
754
+ "<svg onload=alert(1)>",
755
+ ],
756
+ 'path_traversal': [
757
+ "../../../etc/passwd",
758
+ "..\\..\\..\\windows\\win.ini",
759
+ "%2e%2e%2f%2e%2e%2fetc%2fpasswd",
760
+ "....//....//etc/passwd",
761
+ ],
762
+ 'cmd_injection': [
763
+ "; ls",
764
+ "| cat /etc/passwd",
765
+ "`whoami`",
766
+ "$(whoami)",
767
+ ],
768
+ 'ssti': [
769
+ "{{7*7}}",
770
+ "${7*7}",
771
+ "<%= 7*7 %>",
772
+ ],
773
+ }
774
+
775
+ for ep in endpoints:
776
+ url = self.target.rstrip('/') + ep.path
777
+
778
+ # 测试 SQL 注入
779
+ for payload in fuzz_payloads['sqli'][:2]:
780
+ result = self._test_sqli(url, ep, payload)
781
+ if result:
782
+ fuzz_results.append(result)
783
+
784
+ # 测试 XSS
785
+ for payload in fuzz_payloads['xss'][:1]:
786
+ result = self._test_xss(url, ep, payload)
787
+ if result:
788
+ fuzz_results.append(result)
789
+
790
+ # 测试路径遍历
791
+ for payload in fuzz_payloads['path_traversal'][:1]:
792
+ result = self._test_path_traversal(url, ep, payload)
793
+ if result:
794
+ fuzz_results.append(result)
795
+
796
+ # 对父路径进行 fuzzing
797
+ if parent_probe_result:
798
+ probed_api_count = 0
799
+ for path, info in parent_probe_result.items():
800
+ # 即使不是 JSON API,也进行 fuzzing(nginx fallback 也是安全问题)
801
+ if info.get('is_api') or info.get('status') == 200:
802
+ probed_api_count += 1
803
+ url = self.target.rstrip('/') + path
804
+
805
+ # 尝试添加参数
806
+ test_urls = [
807
+ url + '?id=1',
808
+ url + '?id=1 OR 1=1',
809
+ url + '?q=<script>alert(1)</script>',
810
+ ]
811
+
812
+ for test_url in test_urls:
813
+ result = self._test_url(test_url, path)
814
+ if result:
815
+ fuzz_results.append(result)
816
+
817
+ if probed_api_count == 0:
818
+ print(f" [WARN] 所有父路径返回 HTML (nginx fallback),无法进行有效 fuzzing")
819
+ # 即使是 fallback,也添加一些测试 URL
820
+ for path, info in list(parent_probe_result.items())[:5]:
821
+ url = self.target.rstrip('/') + path
822
+ for payload in ['<script>alert(1)</script>', "' OR '1'='1"]:
823
+ test_url = url + '?q=' + payload
824
+ result = self._test_url(test_url, path)
825
+ if result:
826
+ fuzz_results.append(result)
827
+
828
+ self.fuzz_results = fuzz_results
829
+ print(f" [Fuzzer] 发现 {len(fuzz_results)} 个问题")
830
+
831
+ return fuzz_results
832
+
833
+ def _test_sqli(self, url: str, ep: ParsedEndpoint, payload: str) -> Optional[Dict]:
834
+ """测试 SQL 注入"""
835
+ try:
836
+ # 如果有 path 参数,替换它
837
+ test_url = url
838
+ for param in ep.get_path_params():
839
+ test_url = re.sub(r'\{' + param.name + r'\}', payload, test_url)
840
+ test_url = re.sub(r':' + param.name, payload, test_url)
841
+
842
+ # 如果 URL 没有参数,添加到末尾
843
+ if '?' not in test_url:
844
+ test_url = test_url + '?id=' + payload
845
+
846
+ r = self.session.get(test_url, timeout=5)
847
+
848
+ sqli_indicators = [
849
+ 'sql', 'syntax', 'mysql', 'oracle', 'error in your sql',
850
+ 'postgresql', 'sqlite', 'mariadb', 'sqlstate', 'odbc'
851
+ ]
852
+
853
+ resp_lower = r.text.lower()
854
+ for indicator in sqli_indicators:
855
+ if indicator in resp_lower:
856
+ return {
857
+ 'type': 'SQL Injection',
858
+ 'severity': 'CRITICAL',
859
+ 'url': test_url,
860
+ 'payload': payload,
861
+ 'endpoint': ep.path,
862
+ 'evidence': f'SQL error indicator: {indicator}',
863
+ }
864
+
865
+ except Exception as e:
866
+ pass
867
+
868
+ return None
869
+
870
+ def _test_xss(self, url: str, ep: ParsedEndpoint, payload: str) -> Optional[Dict]:
871
+ """测试 XSS"""
872
+ try:
873
+ test_url = url
874
+ for param in ep.get_path_params():
875
+ test_url = re.sub(r'\{' + param.name + r'\}', payload, test_url)
876
+
877
+ if '?' not in test_url:
878
+ test_url = test_url + '?q=' + payload
879
+ else:
880
+ test_url = test_url + '&q=' + payload
881
+
882
+ r = self.session.get(test_url, timeout=5)
883
+
884
+ if payload in r.text:
885
+ return {
886
+ 'type': 'XSS (Reflected)',
887
+ 'severity': 'HIGH',
888
+ 'url': test_url,
889
+ 'payload': payload,
890
+ 'endpoint': ep.path,
891
+ 'evidence': 'Payload reflected in response',
892
+ }
893
+
894
+ except Exception as e:
895
+ pass
896
+
897
+ return None
898
+
899
+ def _test_path_traversal(self, url: str, ep: ParsedEndpoint, payload: str) -> Optional[Dict]:
900
+ """测试路径遍历"""
901
+ try:
902
+ test_url = url.rstrip('/') + '/' + payload
903
+
904
+ r = self.session.get(test_url, timeout=5)
905
+
906
+ if 'root:' in r.text or '[extensions]' in r.text or 'boot.ini' in r.text:
907
+ return {
908
+ 'type': 'Path Traversal',
909
+ 'severity': 'HIGH',
910
+ 'url': test_url,
911
+ 'payload': payload,
912
+ 'endpoint': ep.path,
913
+ 'evidence': 'Sensitive file content exposed',
914
+ }
915
+
916
+ except Exception as e:
917
+ pass
918
+
919
+ return None
920
+
921
+ def _test_url(self, url: str, base_path: str) -> Optional[Dict]:
922
+ """测试 URL"""
923
+ try:
924
+ r = self.session.get(url, timeout=5)
925
+
926
+ # 检查是否反射 payload
927
+ sqli_payloads = ["' OR '1'='1", "1 OR 1=1"]
928
+ xss_payloads = ["<script>alert(1)</script>", "<img src=x onerror=alert(1)>"]
929
+
930
+ for payload in sqli_payloads:
931
+ if payload in r.text and 'sql' in r.text.lower():
932
+ return {
933
+ 'type': 'SQL Injection',
934
+ 'severity': 'CRITICAL',
935
+ 'url': url,
936
+ 'payload': payload,
937
+ 'endpoint': base_path,
938
+ 'evidence': 'Potential SQL injection',
939
+ }
940
+
941
+ for payload in xss_payloads:
942
+ if payload in r.text:
943
+ return {
944
+ 'type': 'XSS (Reflected)',
945
+ 'severity': 'HIGH',
946
+ 'url': url,
947
+ 'payload': payload,
948
+ 'endpoint': base_path,
949
+ 'evidence': 'Payload reflected',
950
+ }
951
+
952
+ except Exception as e:
953
+ pass
954
+
955
+ return None