opencode-api-security-testing 3.0.8 → 3.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/api-cyber-supervisor.md +9 -3
- package/agents/api-probing-miner.md +10 -2
- package/agents/api-resource-specialist.md +44 -35
- package/agents/api-vuln-verifier.md +56 -24
- package/package.json +1 -1
- package/postinstall.mjs +1 -0
- package/preuninstall.mjs +43 -32
- package/src/index.ts +3 -100
- package/README.md +0 -74
- package/SKILL.md +0 -1797
- package/core/advanced_recon.py +0 -788
- package/core/agentic_analyzer.py +0 -445
- package/core/analyzers/api_parser.py +0 -210
- package/core/analyzers/response_analyzer.py +0 -212
- package/core/analyzers/sensitive_finder.py +0 -184
- package/core/api_fuzzer.py +0 -422
- package/core/api_interceptor.py +0 -525
- package/core/api_parser.py +0 -955
- package/core/browser_tester.py +0 -479
- package/core/cloud_storage_tester.py +0 -1330
- package/core/collectors/__init__.py +0 -23
- package/core/collectors/api_path_finder.py +0 -300
- package/core/collectors/browser_collect.py +0 -645
- package/core/collectors/browser_collector.py +0 -411
- package/core/collectors/http_client.py +0 -111
- package/core/collectors/js_collector.py +0 -490
- package/core/collectors/js_parser.py +0 -780
- package/core/collectors/url_collector.py +0 -319
- package/core/context_manager.py +0 -682
- package/core/deep_api_tester_v35.py +0 -844
- package/core/deep_api_tester_v55.py +0 -366
- package/core/dynamic_api_analyzer.py +0 -532
- package/core/http_client.py +0 -179
- package/core/models.py +0 -296
- package/core/orchestrator.py +0 -890
- package/core/prerequisite.py +0 -227
- package/core/reasoning_engine.py +0 -1042
- package/core/response_classifier.py +0 -606
- package/core/runner.py +0 -938
- package/core/scan_engine.py +0 -599
- package/core/skill_executor.py +0 -435
- package/core/skill_executor_v2.py +0 -670
- package/core/skill_executor_v3.py +0 -704
- package/core/smart_analyzer.py +0 -687
- package/core/strategy_pool.py +0 -707
- package/core/testers/auth_tester.py +0 -264
- package/core/testers/idor_tester.py +0 -200
- package/core/testers/sqli_tester.py +0 -211
- package/core/testing_loop.py +0 -655
- package/core/utils/base_path_dict.py +0 -255
- package/core/utils/payload_lib.py +0 -167
- package/core/utils/ssrf_detector.py +0 -220
- package/core/verifiers/vuln_verifier.py +0 -536
- package/references/README.md +0 -72
- package/references/asset-discovery.md +0 -119
- package/references/fuzzing-patterns.md +0 -129
- package/references/graphql-guidance.md +0 -108
- package/references/intake.md +0 -84
- package/references/pua-agent.md +0 -192
- package/references/report-template.md +0 -156
- package/references/rest-guidance.md +0 -76
- package/references/severity-model.md +0 -76
- package/references/test-matrix.md +0 -86
- package/references/validation.md +0 -78
- package/references/vulnerabilities/01-sqli-tests.md +0 -1128
- package/references/vulnerabilities/02-user-enum-tests.md +0 -423
- package/references/vulnerabilities/03-jwt-tests.md +0 -499
- package/references/vulnerabilities/04-idor-tests.md +0 -362
- package/references/vulnerabilities/05-sensitive-data-tests.md +0 -466
- package/references/vulnerabilities/06-biz-logic-tests.md +0 -501
- package/references/vulnerabilities/07-security-config-tests.md +0 -511
- package/references/vulnerabilities/08-brute-force-tests.md +0 -457
- package/references/vulnerabilities/09-vulnerability-chains.md +0 -465
- package/references/vulnerabilities/10-auth-tests.md +0 -537
- package/references/vulnerabilities/11-graphql-tests.md +0 -355
- package/references/vulnerabilities/12-ssrf-tests.md +0 -396
- package/references/vulnerabilities/README.md +0 -148
- package/references/workflows.md +0 -192
- package/src/hooks/directory-agents-injector.ts +0 -106
|
@@ -1,490 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
JS Collector - JavaScript 指纹缓存 + Webpack 分析
|
|
4
|
-
从 JS 文件中发现 API 路径、参数、前端路由等
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import re
|
|
8
|
-
import hashlib
|
|
9
|
-
import asyncio
|
|
10
|
-
from typing import Dict, List, Set, Optional, Tuple
|
|
11
|
-
from urllib.parse import urljoin, urlparse
|
|
12
|
-
from dataclasses import dataclass, field
|
|
13
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
14
|
-
import requests
|
|
15
|
-
|
|
16
|
-
try:
|
|
17
|
-
import esprima
|
|
18
|
-
HAS_ESPRIMA = True
|
|
19
|
-
except ImportError:
|
|
20
|
-
HAS_ESPRIMA = False
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@dataclass
|
|
24
|
-
class ParsedJSResult:
|
|
25
|
-
"""JS 解析结果"""
|
|
26
|
-
js_url: str
|
|
27
|
-
content_hash: str
|
|
28
|
-
endpoints: List[Dict[str, str]] = field(default_factory=list)
|
|
29
|
-
parameter_names: Set[str] = field(default_factory=set)
|
|
30
|
-
websocket_endpoints: List[str] = field(default_factory=list)
|
|
31
|
-
env_configs: Dict[str, str] = field(default_factory=dict)
|
|
32
|
-
routes: List[str] = field(default_factory=list)
|
|
33
|
-
parent_paths: Set[str] = field(default_factory=set)
|
|
34
|
-
extracted_suffixes: List[str] = field(default_factory=list)
|
|
35
|
-
resource_fragments: List[str] = field(default_factory=list)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class JSFingerprintCache:
|
|
39
|
-
"""JS 指纹缓存,避免重复 AST 解析"""
|
|
40
|
-
|
|
41
|
-
def __init__(self):
|
|
42
|
-
self._cache: Dict[str, ParsedJSResult] = {}
|
|
43
|
-
self._content_hashes: Dict[str, str] = {}
|
|
44
|
-
|
|
45
|
-
def get(self, js_url: str, content: str) -> Optional[ParsedJSResult]:
|
|
46
|
-
"""检查缓存或返回 None"""
|
|
47
|
-
content_hash = hashlib.md5(content.encode()).hexdigest()
|
|
48
|
-
|
|
49
|
-
if js_url in self._cache:
|
|
50
|
-
cached = self._cache[js_url]
|
|
51
|
-
if cached.content_hash == content_hash:
|
|
52
|
-
return cached
|
|
53
|
-
del self._cache[js_url]
|
|
54
|
-
|
|
55
|
-
self._content_hashes[js_url] = content_hash
|
|
56
|
-
return None
|
|
57
|
-
|
|
58
|
-
def put(self, js_url: str, result: ParsedJSResult):
|
|
59
|
-
"""缓存解析结果"""
|
|
60
|
-
self._cache[js_url] = result
|
|
61
|
-
|
|
62
|
-
def get_all_parent_paths(self) -> Set[str]:
|
|
63
|
-
"""获取所有父路径"""
|
|
64
|
-
paths = set()
|
|
65
|
-
for result in self._cache.values():
|
|
66
|
-
paths.update(result.parent_paths)
|
|
67
|
-
return paths
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class JSCollector:
|
|
71
|
-
"""
|
|
72
|
-
JS 采集器
|
|
73
|
-
|
|
74
|
-
功能:
|
|
75
|
-
- 主页 HTML 正则提取 <script src="*.js">
|
|
76
|
-
- 递归 JS 提取 (Webpack 动态 import/require)
|
|
77
|
-
- AST + 正则双引擎解析
|
|
78
|
-
- Webpack chunk 分析
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
# HTTP 方法模式
|
|
82
|
-
HTTP_METHODS = ['get', 'post', 'put', 'delete', 'patch', 'head', 'options']
|
|
83
|
-
|
|
84
|
-
# API 路径正则 (25+ 规则)
|
|
85
|
-
API_PATH_PATTERNS = [
|
|
86
|
-
r"/api/[a-zA-Z0-9_/-]+",
|
|
87
|
-
r"/[a-zA-Z0-9_/-]+/[a-zA-Z0-9_/-]+",
|
|
88
|
-
r"fetch\s*\(\s*['\"](/[^'\"]+)['\"]",
|
|
89
|
-
r"axios\.(get|post|put|delete)\s*\(\s*['\"](/[^'\"]+)['\"]",
|
|
90
|
-
r"\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*['\"](/[^'\"]+)['\"]",
|
|
91
|
-
r"request\s*\(\s*\{[^}]*url\s*:\s*['\"](/[^'\"]+)['\"]",
|
|
92
|
-
r"http[s]?://[a-zA-Z0-9.-]+(:\d+)?(/[a-zA-Z0-9_/.-]*)?['\"]",
|
|
93
|
-
]
|
|
94
|
-
|
|
95
|
-
# 参数名正则
|
|
96
|
-
PARAM_PATTERNS = [
|
|
97
|
-
r'(?:id|userId|user_id|page|token|key|secret|password|email|username|name|type|category|search|query|filter|sort|order|limit|offset|pageSize|page_size)',
|
|
98
|
-
r'\{([a-zA-Z_][a-zA-Z0-9_]*)\}',
|
|
99
|
-
]
|
|
100
|
-
|
|
101
|
-
# WebSocket 模式
|
|
102
|
-
WS_PATTERNS = [
|
|
103
|
-
r'new\s+WebSocket\s*\(\s*[\'"]([^\'"]+)[\'"]',
|
|
104
|
-
r'ws[s]?://[^\s\'"<>]+',
|
|
105
|
-
]
|
|
106
|
-
|
|
107
|
-
# 环境配置模式
|
|
108
|
-
ENV_PATTERNS = [
|
|
109
|
-
r'(?:BASE_URL|API_URL|API_ENDPOINT|API_KEY|SECRET_KEY|TOKEN|AUTH_TOKEN)\s*[:=]\s*[\'"]([^\'"]+)',
|
|
110
|
-
r'process\.env\.([A-Z_]+)',
|
|
111
|
-
]
|
|
112
|
-
|
|
113
|
-
# Vue/React Router 模式
|
|
114
|
-
ROUTE_PATTERNS = [
|
|
115
|
-
r'/user/:id',
|
|
116
|
-
r'/product/:productId',
|
|
117
|
-
r'/admin/:action',
|
|
118
|
-
r'router\.(?:push|replace|go)\s*\(\s*[\'"](/[^\'"]+)[\'"]',
|
|
119
|
-
r'<Route\s+(?:path|component)=[\'"](/[^\'"]+)[\'"]',
|
|
120
|
-
r'path\s*:\s*[\'"](/[^\'"]+)[\'"]',
|
|
121
|
-
]
|
|
122
|
-
|
|
123
|
-
# Webpack chunk 模式
|
|
124
|
-
WEBPACK_CHUNK_PATTERNS = [
|
|
125
|
-
r'chunk-[a-f0-9]+\.js',
|
|
126
|
-
r'\.[a-f0-9]{8}\.js',
|
|
127
|
-
]
|
|
128
|
-
|
|
129
|
-
def __init__(self, session: requests.Session = None, max_depth: int = 3, max_js_per_depth: int = 50):
|
|
130
|
-
self.session = session or requests.Session()
|
|
131
|
-
self.session.headers.update({
|
|
132
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
|
133
|
-
})
|
|
134
|
-
self.cache = JSFingerprintCache()
|
|
135
|
-
self.max_depth = max_depth
|
|
136
|
-
self.max_js_per_depth = max_js_per_depth
|
|
137
|
-
self.visited_urls: Set[str] = set()
|
|
138
|
-
self.all_js_urls: List[str] = []
|
|
139
|
-
|
|
140
|
-
def extract_js_from_html(self, html_content: str, base_url: str) -> List[str]:
|
|
141
|
-
"""从 HTML 中提取 JS URL"""
|
|
142
|
-
js_urls = []
|
|
143
|
-
|
|
144
|
-
# script src 模式
|
|
145
|
-
patterns = [
|
|
146
|
-
r'<script[^>]+src=["\']([^"\']+\.js[^"\']*)["\']',
|
|
147
|
-
r"<script[^>]+src=['\"]([^'\"]+\.js[^'\"]*)['\"]",
|
|
148
|
-
]
|
|
149
|
-
|
|
150
|
-
for pattern in patterns:
|
|
151
|
-
matches = re.findall(pattern, html_content, re.IGNORECASE)
|
|
152
|
-
for match in matches:
|
|
153
|
-
if match.startswith('http'):
|
|
154
|
-
js_urls.append(match)
|
|
155
|
-
elif match.startswith('//'):
|
|
156
|
-
js_urls.append('https:' + match)
|
|
157
|
-
else:
|
|
158
|
-
js_urls.append(urljoin(base_url, match))
|
|
159
|
-
|
|
160
|
-
return list(set(js_urls))
|
|
161
|
-
|
|
162
|
-
def extract_js_imports(self, js_content: str) -> List[str]:
|
|
163
|
-
"""从 JS 内容中提取 import/require 引入的新 JS"""
|
|
164
|
-
imports = []
|
|
165
|
-
|
|
166
|
-
patterns = [
|
|
167
|
-
r'import\s+.*?from\s+[\'"]([^\'"]+\.js[^\'"]*)[\'"]',
|
|
168
|
-
r'import\s+[\'"]([^\'"]+\.js[^\'"]*)[\'"]',
|
|
169
|
-
r'require\s*\(\s*[\'"]([^\'"]+\.js[^\'"]*)[\'"]',
|
|
170
|
-
r'export\s+.*?from\s+[\'"]([^\'"]+\.js[^\'"]*)[\'"]',
|
|
171
|
-
r'webpackChunkName:\s*["\']([^"\']+)["\']',
|
|
172
|
-
]
|
|
173
|
-
|
|
174
|
-
for pattern in patterns:
|
|
175
|
-
matches = re.findall(pattern, js_content, re.IGNORECASE)
|
|
176
|
-
imports.extend(matches)
|
|
177
|
-
|
|
178
|
-
return list(set(imports))
|
|
179
|
-
|
|
180
|
-
def parse_js_content(self, js_url: str, content: str) -> ParsedJSResult:
|
|
181
|
-
"""解析 JS 内容"""
|
|
182
|
-
cached = self.cache.get(js_url, content)
|
|
183
|
-
if cached:
|
|
184
|
-
return cached
|
|
185
|
-
|
|
186
|
-
result = ParsedJSResult(
|
|
187
|
-
js_url=js_url,
|
|
188
|
-
content_hash=hashlib.md5(content.encode()).hexdigest()
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
# 1. API 端点提取
|
|
192
|
-
result.endpoints = self._extract_endpoints(content)
|
|
193
|
-
|
|
194
|
-
# 2. 参数名提取
|
|
195
|
-
result.parameter_names = self._extract_parameters(content)
|
|
196
|
-
|
|
197
|
-
# 3. WebSocket 端点
|
|
198
|
-
result.websocket_endpoints = self._extract_websocket(content)
|
|
199
|
-
|
|
200
|
-
# 4. 环境配置
|
|
201
|
-
result.env_configs = self._extract_env_configs(content)
|
|
202
|
-
|
|
203
|
-
# 5. 前端路由
|
|
204
|
-
result.routes = self._extract_routes(content)
|
|
205
|
-
|
|
206
|
-
# 6. 父路径提取
|
|
207
|
-
result.parent_paths = self._extract_parent_paths(result.endpoints)
|
|
208
|
-
|
|
209
|
-
# 7. 路径后缀和资源片段
|
|
210
|
-
result.extracted_suffixes = self._extract_suffixes(result.endpoints)
|
|
211
|
-
result.resource_fragments = self._extract_resource_fragments(result.endpoints)
|
|
212
|
-
|
|
213
|
-
self.cache.put(js_url, result)
|
|
214
|
-
return result
|
|
215
|
-
|
|
216
|
-
def _extract_endpoints(self, content: str) -> List[Dict[str, str]]:
|
|
217
|
-
"""提取 API 端点"""
|
|
218
|
-
endpoints = []
|
|
219
|
-
found = set()
|
|
220
|
-
|
|
221
|
-
# HTTP 方法 + 路径
|
|
222
|
-
for method in self.HTTP_METHODS:
|
|
223
|
-
patterns = [
|
|
224
|
-
rf"{method}\s*\(\s*['\"]([^'\"]+)['\"]",
|
|
225
|
-
rf"\.{method}\s*\(\s*['\"]([^'\"]+)['\"]",
|
|
226
|
-
rf"['\"]([/a-zA-Z0-9_-]*{method}[/a-zA-Z0-9_-]*)['\"]",
|
|
227
|
-
]
|
|
228
|
-
for pattern in patterns:
|
|
229
|
-
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
230
|
-
for path in matches:
|
|
231
|
-
if self._is_api_path(path) and path not in found:
|
|
232
|
-
found.add(path)
|
|
233
|
-
endpoints.append({
|
|
234
|
-
'method': method.upper(),
|
|
235
|
-
'path': path,
|
|
236
|
-
'source': 'regex'
|
|
237
|
-
})
|
|
238
|
-
|
|
239
|
-
# fetch/axios/$.ajax
|
|
240
|
-
patterns = [
|
|
241
|
-
(r'fetch\s*\(\s*[\'"]([^\'"]+)[\'"]', 'GET'),
|
|
242
|
-
(r'axios\.(get|post|put|delete)\s*\(\s*[\'"]([^\'"]+)[\'"]', None),
|
|
243
|
-
(r'\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*[\'"](/[^\'"]+)[\'"]', None),
|
|
244
|
-
]
|
|
245
|
-
|
|
246
|
-
for pattern, default_method in patterns:
|
|
247
|
-
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
248
|
-
for match in matches:
|
|
249
|
-
if isinstance(match, tuple):
|
|
250
|
-
method = match[0].upper() if match[0].lower() in self.HTTP_METHODS else default_method or 'GET'
|
|
251
|
-
path = match[1] if len(match) > 1 else match[0]
|
|
252
|
-
else:
|
|
253
|
-
method = 'GET'
|
|
254
|
-
path = match
|
|
255
|
-
|
|
256
|
-
if self._is_api_path(path) and path not in found:
|
|
257
|
-
found.add(path)
|
|
258
|
-
endpoints.append({
|
|
259
|
-
'method': method,
|
|
260
|
-
'path': path,
|
|
261
|
-
'source': 'http_client'
|
|
262
|
-
})
|
|
263
|
-
|
|
264
|
-
return endpoints
|
|
265
|
-
|
|
266
|
-
def _is_api_path(self, path: str) -> bool:
|
|
267
|
-
"""判断是否为 API 路径"""
|
|
268
|
-
if not path or len(path) < 2:
|
|
269
|
-
return False
|
|
270
|
-
|
|
271
|
-
skip_patterns = [
|
|
272
|
-
r'\.css', r'\.jpg', r'\.png', r'\.gif', r'\.svg',
|
|
273
|
-
r'\.woff', r'\.ttf', r'\.eot', r'\.ico',
|
|
274
|
-
r'html?', r'\.json[^\w]',
|
|
275
|
-
]
|
|
276
|
-
|
|
277
|
-
for pattern in skip_patterns:
|
|
278
|
-
if re.search(pattern, path, re.IGNORECASE):
|
|
279
|
-
return False
|
|
280
|
-
|
|
281
|
-
return True
|
|
282
|
-
|
|
283
|
-
def _extract_parameters(self, content: str) -> Set[str]:
|
|
284
|
-
"""提取参数名"""
|
|
285
|
-
params = set()
|
|
286
|
-
|
|
287
|
-
# URL 中的 {param} 格式
|
|
288
|
-
matches = re.findall(r'\{([a-zA-Z_][a-zA-Z0-9_]*)\}', content)
|
|
289
|
-
params.update(matches)
|
|
290
|
-
|
|
291
|
-
# 常见参数名
|
|
292
|
-
param_names = [
|
|
293
|
-
'id', 'userId', 'user_id', 'page', 'pageNum', 'page_size',
|
|
294
|
-
'token', 'key', 'secret', 'password', 'email', 'username',
|
|
295
|
-
'name', 'type', 'category', 'search', 'query', 'filter',
|
|
296
|
-
'sort', 'order', 'limit', 'offset', 'pageSize',
|
|
297
|
-
'status', 'action', 'method', 'callback', 'data', 'params',
|
|
298
|
-
]
|
|
299
|
-
|
|
300
|
-
for param in param_names:
|
|
301
|
-
if re.search(rf'[\s\({{]{param}[\s\)}},=]', content, re.IGNORECASE):
|
|
302
|
-
params.add(param)
|
|
303
|
-
|
|
304
|
-
return params
|
|
305
|
-
|
|
306
|
-
def _extract_websocket(self, content: str) -> List[str]:
|
|
307
|
-
"""提取 WebSocket 端点"""
|
|
308
|
-
ws_endpoints = []
|
|
309
|
-
|
|
310
|
-
for pattern in self.WS_PATTERNS:
|
|
311
|
-
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
312
|
-
ws_endpoints.extend(matches)
|
|
313
|
-
|
|
314
|
-
return list(set(ws_endpoints))
|
|
315
|
-
|
|
316
|
-
def _extract_env_configs(self, content: str) -> Dict[str, str]:
|
|
317
|
-
"""提取环境配置"""
|
|
318
|
-
configs = {}
|
|
319
|
-
|
|
320
|
-
for pattern in self.ENV_PATTERNS:
|
|
321
|
-
matches = re.findall(pattern, content)
|
|
322
|
-
for match in matches:
|
|
323
|
-
if isinstance(match, tuple) and len(match) == 2:
|
|
324
|
-
configs[match[0]] = match[1]
|
|
325
|
-
elif isinstance(match, str):
|
|
326
|
-
configs[pattern.split('(')[1].split(')')[0].replace('?:', '')] = match
|
|
327
|
-
|
|
328
|
-
return configs
|
|
329
|
-
|
|
330
|
-
def _extract_routes(self, content: str) -> List[str]:
|
|
331
|
-
"""提取前端路由"""
|
|
332
|
-
routes = []
|
|
333
|
-
|
|
334
|
-
# Vue/React Router 格式
|
|
335
|
-
patterns = [
|
|
336
|
-
r'path\s*:\s*[\'"]([/a-zA-Z0-9_:-]*:[\w]+[/a-zA-Z0-9_-]*)[\'"]',
|
|
337
|
-
r'router\.push\s*\(\s*[\'"](/[^\'"]+)[\'"]',
|
|
338
|
-
r'<Route\s+[^>]*path=[\'"](/[^\'"]+)[\'"]',
|
|
339
|
-
r'["\']/(?:user|admin|product|order|api)[:/][a-zA-Z0-9_]+["\']',
|
|
340
|
-
]
|
|
341
|
-
|
|
342
|
-
for pattern in patterns:
|
|
343
|
-
matches = re.findall(pattern, content, re.IGNORECASE)
|
|
344
|
-
routes.extend(matches)
|
|
345
|
-
|
|
346
|
-
return list(set(routes))
|
|
347
|
-
|
|
348
|
-
def _extract_parent_paths(self, endpoints: List[Dict[str, str]]) -> Set[str]:
|
|
349
|
-
"""提取父路径"""
|
|
350
|
-
parent_paths = set()
|
|
351
|
-
|
|
352
|
-
for ep in endpoints:
|
|
353
|
-
path = ep.get('path', '')
|
|
354
|
-
if not path:
|
|
355
|
-
continue
|
|
356
|
-
|
|
357
|
-
parts = path.strip('/').split('/')
|
|
358
|
-
if len(parts) > 1:
|
|
359
|
-
parent = '/' + '/'.join(parts[:-1])
|
|
360
|
-
parent_paths.add(parent)
|
|
361
|
-
|
|
362
|
-
if len(parts) > 2:
|
|
363
|
-
parent = '/' + '/'.join(parts[:-2])
|
|
364
|
-
parent_paths.add(parent)
|
|
365
|
-
|
|
366
|
-
return parent_paths
|
|
367
|
-
|
|
368
|
-
def _extract_suffixes(self, endpoints: List[Dict[str, str]]) -> List[str]:
|
|
369
|
-
"""提取路径后缀"""
|
|
370
|
-
suffixes = []
|
|
371
|
-
|
|
372
|
-
for ep in endpoints:
|
|
373
|
-
path = ep.get('path', '')
|
|
374
|
-
parts = path.strip('/').split('/')
|
|
375
|
-
if len(parts) > 0:
|
|
376
|
-
suffixes.append(parts[-1])
|
|
377
|
-
|
|
378
|
-
return list(set(suffixes))
|
|
379
|
-
|
|
380
|
-
def _extract_resource_fragments(self, endpoints: List[Dict[str, str]]) -> List[str]:
|
|
381
|
-
"""提取资源片段"""
|
|
382
|
-
resources = []
|
|
383
|
-
|
|
384
|
-
resource_names = [
|
|
385
|
-
'user', 'users', 'product', 'products', 'order', 'orders',
|
|
386
|
-
'admin', 'auth', 'login', 'logout', 'register', 'profile',
|
|
387
|
-
'config', 'setting', 'menu', 'role', 'permission',
|
|
388
|
-
]
|
|
389
|
-
|
|
390
|
-
for ep in endpoints:
|
|
391
|
-
path = ep.get('path', '').lower()
|
|
392
|
-
for resource in resource_names:
|
|
393
|
-
if resource in path:
|
|
394
|
-
resources.append(resource)
|
|
395
|
-
|
|
396
|
-
return list(set(resources))
|
|
397
|
-
|
|
398
|
-
async def _recursive_js_extract(self, initial_js_urls: List[str], base_url: str) -> List[str]:
|
|
399
|
-
"""递归 JS 提取"""
|
|
400
|
-
all_js_content = {}
|
|
401
|
-
pending_urls = list(set(initial_js_urls))
|
|
402
|
-
|
|
403
|
-
for depth in range(self.max_depth):
|
|
404
|
-
if not pending_urls:
|
|
405
|
-
break
|
|
406
|
-
|
|
407
|
-
current_batch = pending_urls[:self.max_js_per_depth]
|
|
408
|
-
pending_urls = pending_urls[self.max_js_per_depth:]
|
|
409
|
-
|
|
410
|
-
for js_url in current_batch:
|
|
411
|
-
if js_url in self.visited_urls:
|
|
412
|
-
continue
|
|
413
|
-
self.visited_urls.add(js_url)
|
|
414
|
-
|
|
415
|
-
try:
|
|
416
|
-
if not js_url.startswith('http'):
|
|
417
|
-
js_url = urljoin(base_url, js_url)
|
|
418
|
-
|
|
419
|
-
resp = self.session.get(js_url, timeout=10)
|
|
420
|
-
if resp.status_code == 200:
|
|
421
|
-
content = resp.text
|
|
422
|
-
all_js_content[js_url] = content
|
|
423
|
-
self.all_js_urls.append(js_url)
|
|
424
|
-
|
|
425
|
-
# 提取新 JS
|
|
426
|
-
new_imports = self.extract_js_imports(content)
|
|
427
|
-
for imp in new_imports:
|
|
428
|
-
if imp not in self.visited_urls:
|
|
429
|
-
normalized = urljoin(js_url, imp)
|
|
430
|
-
pending_urls.append(normalized)
|
|
431
|
-
except:
|
|
432
|
-
pass
|
|
433
|
-
|
|
434
|
-
return self.all_js_urls
|
|
435
|
-
|
|
436
|
-
def collect(self, target_url: str) -> JSFingerprintCache:
|
|
437
|
-
"""执行 JS 采集"""
|
|
438
|
-
try:
|
|
439
|
-
resp = self.session.get(target_url, timeout=10)
|
|
440
|
-
html = resp.text
|
|
441
|
-
except Exception as e:
|
|
442
|
-
print(f"[!] Failed to fetch target: {e}")
|
|
443
|
-
return self.cache
|
|
444
|
-
|
|
445
|
-
# 1. 提取 HTML 中的 JS
|
|
446
|
-
js_urls = self.extract_js_from_html(html, target_url)
|
|
447
|
-
print(f"[*] Found {len(js_urls)} JS files in HTML")
|
|
448
|
-
|
|
449
|
-
# 2. 递归提取
|
|
450
|
-
import asyncio
|
|
451
|
-
loop = asyncio.new_event_loop()
|
|
452
|
-
asyncio.set_event_loop(loop)
|
|
453
|
-
loop.run_until_complete(self._recursive_js_extract(js_urls, target_url))
|
|
454
|
-
|
|
455
|
-
# 3. 解析每个 JS
|
|
456
|
-
for js_url in self.all_js_urls:
|
|
457
|
-
try:
|
|
458
|
-
resp = self.session.get(js_url, timeout=10)
|
|
459
|
-
if resp.status_code == 200:
|
|
460
|
-
self.parse_js_content(js_url, resp.text)
|
|
461
|
-
except:
|
|
462
|
-
pass
|
|
463
|
-
|
|
464
|
-
print(f"[*] Parsed {len(self.cache._cache)} JS files")
|
|
465
|
-
return self.cache
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
# CLI interface
|
|
469
|
-
if __name__ == "__main__":
|
|
470
|
-
import argparse
|
|
471
|
-
|
|
472
|
-
parser = argparse.ArgumentParser(description="JS Collector")
|
|
473
|
-
parser.add_argument("--target", required=True, help="Target URL")
|
|
474
|
-
parser.add_argument("--depth", type=int, default=3, help="Max recursion depth")
|
|
475
|
-
parser.add_argument("--output", help="Output file")
|
|
476
|
-
|
|
477
|
-
args = parser.parse_args()
|
|
478
|
-
|
|
479
|
-
collector = JSCollector(max_depth=args.depth)
|
|
480
|
-
cache = collector.collect(args.target)
|
|
481
|
-
|
|
482
|
-
print("\n=== Results ===")
|
|
483
|
-
print(f"Total JS files: {len(cache._cache)}")
|
|
484
|
-
print(f"Parent paths: {cache.get_all_parent_paths()}")
|
|
485
|
-
|
|
486
|
-
for js_url, result in cache._cache.items():
|
|
487
|
-
if result.endpoints:
|
|
488
|
-
print(f"\n{js_url}:")
|
|
489
|
-
for ep in result.endpoints[:5]:
|
|
490
|
-
print(f" - {ep['method']} {ep['path']}")
|