opencode-api-security-testing 3.0.10 → 3.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/README.md +74 -0
  2. package/SKILL.md +1797 -0
  3. package/core/advanced_recon.py +788 -0
  4. package/core/agentic_analyzer.py +445 -0
  5. package/core/analyzers/api_parser.py +210 -0
  6. package/core/analyzers/response_analyzer.py +212 -0
  7. package/core/analyzers/sensitive_finder.py +184 -0
  8. package/core/api_fuzzer.py +422 -0
  9. package/core/api_interceptor.py +525 -0
  10. package/core/api_parser.py +955 -0
  11. package/core/browser_tester.py +479 -0
  12. package/core/cloud_storage_tester.py +1330 -0
  13. package/core/collectors/__init__.py +23 -0
  14. package/core/collectors/api_path_finder.py +300 -0
  15. package/core/collectors/browser_collect.py +645 -0
  16. package/core/collectors/browser_collector.py +411 -0
  17. package/core/collectors/http_client.py +111 -0
  18. package/core/collectors/js_collector.py +490 -0
  19. package/core/collectors/js_parser.py +780 -0
  20. package/core/collectors/url_collector.py +319 -0
  21. package/core/context_manager.py +682 -0
  22. package/core/deep_api_tester_v35.py +844 -0
  23. package/core/deep_api_tester_v55.py +366 -0
  24. package/core/dynamic_api_analyzer.py +532 -0
  25. package/core/http_client.py +179 -0
  26. package/core/models.py +296 -0
  27. package/core/orchestrator.py +890 -0
  28. package/core/prerequisite.py +227 -0
  29. package/core/reasoning_engine.py +1042 -0
  30. package/core/response_classifier.py +606 -0
  31. package/core/runner.py +938 -0
  32. package/core/scan_engine.py +599 -0
  33. package/core/skill_executor.py +435 -0
  34. package/core/skill_executor_v2.py +670 -0
  35. package/core/skill_executor_v3.py +704 -0
  36. package/core/smart_analyzer.py +687 -0
  37. package/core/strategy_pool.py +707 -0
  38. package/core/testers/auth_tester.py +264 -0
  39. package/core/testers/idor_tester.py +200 -0
  40. package/core/testers/sqli_tester.py +211 -0
  41. package/core/testing_loop.py +655 -0
  42. package/core/utils/base_path_dict.py +255 -0
  43. package/core/utils/payload_lib.py +167 -0
  44. package/core/utils/ssrf_detector.py +220 -0
  45. package/core/verifiers/vuln_verifier.py +536 -0
  46. package/package.json +1 -1
  47. package/references/README.md +72 -0
  48. package/references/asset-discovery.md +119 -0
  49. package/references/fuzzing-patterns.md +129 -0
  50. package/references/graphql-guidance.md +108 -0
  51. package/references/intake.md +84 -0
  52. package/references/pua-agent.md +192 -0
  53. package/references/report-template.md +156 -0
  54. package/references/rest-guidance.md +76 -0
  55. package/references/severity-model.md +76 -0
  56. package/references/test-matrix.md +86 -0
  57. package/references/validation.md +78 -0
  58. package/references/vulnerabilities/01-sqli-tests.md +1128 -0
  59. package/references/vulnerabilities/02-user-enum-tests.md +423 -0
  60. package/references/vulnerabilities/03-jwt-tests.md +499 -0
  61. package/references/vulnerabilities/04-idor-tests.md +362 -0
  62. package/references/vulnerabilities/05-sensitive-data-tests.md +466 -0
  63. package/references/vulnerabilities/06-biz-logic-tests.md +501 -0
  64. package/references/vulnerabilities/07-security-config-tests.md +511 -0
  65. package/references/vulnerabilities/08-brute-force-tests.md +457 -0
  66. package/references/vulnerabilities/09-vulnerability-chains.md +465 -0
  67. package/references/vulnerabilities/10-auth-tests.md +537 -0
  68. package/references/vulnerabilities/11-graphql-tests.md +355 -0
  69. package/references/vulnerabilities/12-ssrf-tests.md +396 -0
  70. package/references/vulnerabilities/README.md +148 -0
  71. package/references/workflows.md +192 -0
@@ -0,0 +1,606 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Response Classifier - 响应综合分类器
4
+ 多维度判断响应类型,像渗透测试工程师一样思考
5
+ """
6
+
7
+ import re
8
+ import json
9
+ import hashlib
10
+ from typing import Dict, List, Tuple, Optional, Any
11
+ from dataclasses import dataclass
12
+ from enum import Enum
13
+ import requests
14
+
15
+
16
+ class ResponseType(Enum):
17
+ """响应类型"""
18
+ REAL_API_DOC = "real_api_doc" # 真正的 API 文档
19
+ SPA_FALLBACK = "spa_fallback" # Vue.js/React SPA fallback
20
+ STATIC_RESOURCE = "static_resource" # 静态资源
21
+ REST_API_ENDPOINT = "rest_api_endpoint" # REST API 端点
22
+ GRAPHQL_ENDPOINT = "graphql_endpoint" # GraphQL 端点
23
+ ERROR_PAGE = "error_page" # 错误页面
24
+ LOGIN_PAGE = "login_page" # 登录页面
25
+ ADMIN_PAGE = "admin_page" # 管理后台
26
+ UNKNOWN = "unknown"
27
+
28
+
29
+ class Confidence(Enum):
30
+ """置信度"""
31
+ HIGH = "high" # >= 90%
32
+ MEDIUM = "medium" # >= 70%
33
+ LOW = "low" # >= 50%
34
+ UNCERTAIN = "uncertain"
35
+
36
+
37
+ @dataclass
38
+ class ResponseAnalysis:
39
+ """响应分析结果"""
40
+ url: str
41
+ response_type: ResponseType
42
+ confidence: Confidence
43
+
44
+ # 原始数据
45
+ status_code: int = 0
46
+ content_type: str = ""
47
+ content_length: int = 0
48
+ content_hash: str = ""
49
+
50
+ # 特征检测
51
+ is_json: bool = False
52
+ is_yaml: bool = False
53
+ is_html: bool = False
54
+ is_xml: bool = False
55
+
56
+ # 内容特征
57
+ has_swagger: bool = False
58
+ has_openapi: bool = False
59
+ has_paths: bool = False
60
+ has_api_paths: bool = False
61
+ has_api_keyword: bool = False
62
+ has_login_form: bool = False
63
+ has_admin_keyword: bool = False
64
+ has_error_keyword: bool = False
65
+ has_graphql_keyword: bool = False
66
+ has_websocket_keyword: bool = False
67
+
68
+ # JSON 特定
69
+ json_structure: Dict = None
70
+ json_endpoints: List = None
71
+ json_has_servers: bool = False
72
+ json_has_components: bool = False
73
+
74
+ # 其他
75
+ content_preview: str = ""
76
+ reasoning: str = ""
77
+
78
+ def __post_init__(self):
79
+ if self.json_structure is None:
80
+ self.json_structure = {}
81
+ if self.json_endpoints is None:
82
+ self.json_endpoints = []
83
+
84
+
85
+ class ResponseClassifier:
86
+ """
87
+ 响应综合分类器
88
+
89
+ 核心思维:不像新手只靠状态码判断,而是综合分析:
90
+ 1. Content-Type 头
91
+ 2. 响应内容本身
92
+ 3. JSON 结构特征
93
+ 4. 特定关键字
94
+ 5. 响应大小
95
+ """
96
+
97
+ # 登录页面特征
98
+ LOGIN_PATTERNS = [
99
+ r'<form[^>]*login',
100
+ r'<input[^>]*name=["\']username',
101
+ r'<input[^>]*name=["\']password',
102
+ r'login.*username',
103
+ r'username.*password',
104
+ r'doLogin',
105
+ r'loginForm',
106
+ r'signin',
107
+ r'登录',
108
+ r'用户名',
109
+ r'密码',
110
+ ]
111
+
112
+ # 管理后台特征
113
+ ADMIN_PATTERNS = [
114
+ r'admin.*panel',
115
+ r'dashboard',
116
+ r'control.*panel',
117
+ r'management',
118
+ r'后台管理',
119
+ r'管理员',
120
+ ]
121
+
122
+ # 错误页面特征
123
+ ERROR_PATTERNS = [
124
+ r'404.*not.*found',
125
+ r'500.*error',
126
+ r'403.*forbidden',
127
+ r'access.*denied',
128
+ r'unauthorized',
129
+ r'error.*occurred',
130
+ r'页面不存在',
131
+ r'访问被拒绝',
132
+ ]
133
+
134
+ # GraphQL 特征
135
+ GRAPHQL_PATTERNS = [
136
+ r'__schema',
137
+ r'__type',
138
+ r'graphiql',
139
+ r'graphql',
140
+ r'application/graphql',
141
+ ]
142
+
143
+ # Swagger/OpenAPI 特征
144
+ SWAGGER_PATTERNS = [
145
+ r'"swagger"',
146
+ r'"openapi"',
147
+ r'"paths"',
148
+ r'"info"',
149
+ r'"components"',
150
+ r'"schemas"',
151
+ r'"security"',
152
+ ]
153
+
154
+ # REST API 特征
155
+ REST_PATTERNS = [
156
+ r'/api/v',
157
+ r'/rest/',
158
+ r'"method"',
159
+ r'"endpoint"',
160
+ r'"route"',
161
+ r'"status"',
162
+ r'"message"',
163
+ r'"code"',
164
+ r'"data"',
165
+ r'"result"',
166
+ ]
167
+
168
+ # API 相关关键字
169
+ API_KEYWORDS = [
170
+ 'api', 'endpoint', 'route', 'resource', 'collection',
171
+ 'user', 'product', 'order', 'admin', 'auth', 'login',
172
+ 'get', 'post', 'put', 'delete', 'patch',
173
+ ]
174
+
175
+ def __init__(self, session: requests.Session = None):
176
+ self.session = session or requests.Session()
177
+ self.baseline_hash = ""
178
+ self.baseline_length = 0
179
+
180
+ def analyze(self, url: str, response: requests.Response = None) -> ResponseAnalysis:
181
+ """
182
+ 综合分析响应
183
+
184
+ Args:
185
+ url: 请求的 URL
186
+ response: requests.Response 对象
187
+
188
+ Returns:
189
+ ResponseAnalysis 对象
190
+ """
191
+ if response is None:
192
+ response = self.session.get(url, timeout=10, allow_redirects=True)
193
+
194
+ analysis = ResponseAnalysis(
195
+ url=url,
196
+ response_type=ResponseType.UNKNOWN,
197
+ confidence=Confidence.UNCERTAIN,
198
+ status_code=response.status_code,
199
+ content_type=response.headers.get('Content-Type', ''),
200
+ content_length=len(response.content),
201
+ content_hash=hashlib.md5(response.content).hexdigest(),
202
+ content_preview=response.text[:200]
203
+ )
204
+
205
+ content = response.text
206
+ content_lower = content.lower()
207
+
208
+ # 1. 检测内容类型
209
+ analysis.is_json = self._is_json(content)
210
+ analysis.is_yaml = self._is_yaml(content)
211
+ analysis.is_html = self._is_html(content)
212
+ analysis.is_xml = self._is_xml(content)
213
+
214
+ # 2. JSON 特定分析
215
+ if analysis.is_json:
216
+ analysis.json_structure = self._parse_json(content)
217
+ if analysis.json_structure:
218
+ analysis.json_has_servers = 'servers' in analysis.json_structure
219
+ analysis.json_has_components = 'components' in analysis.json_structure
220
+
221
+ # 检测 Swagger/OpenAPI
222
+ if any(p.replace('"', '') in content for p in self.SWAGGER_PATTERNS):
223
+ analysis.has_swagger = True
224
+ if '"openapi"' in content or "'openapi'" in content:
225
+ analysis.has_openapi = True
226
+
227
+ # 提取端点
228
+ analysis.json_endpoints = self._extract_json_endpoints(analysis.json_structure)
229
+
230
+ # 3. HTML 特定分析
231
+ if analysis.is_html:
232
+ analysis.has_login_form = self._match_patterns(content, self.LOGIN_PATTERNS)
233
+ analysis.has_admin_keyword = self._match_patterns(content, self.ADMIN_PATTERNS)
234
+ analysis.has_error_keyword = self._match_patterns(content, self.ERROR_PATTERNS)
235
+
236
+ # 4. 通用内容分析
237
+ analysis.has_api_paths = bool(re.search(r'/api/v\d+', content))
238
+ analysis.has_api_keyword = any(kw in content_lower for kw in self.API_KEYWORDS)
239
+ analysis.has_graphql_keyword = self._match_patterns(content, self.GRAPHQL_PATTERNS)
240
+
241
+ # 5. 分类判断
242
+ analysis.response_type, analysis.confidence, analysis.reasoning = self._classify(analysis)
243
+
244
+ return analysis
245
+
246
+ def _is_json(self, content: str) -> bool:
247
+ """检测是否为 JSON"""
248
+ if not content or not content.strip():
249
+ return False
250
+
251
+ content = content.strip()
252
+
253
+ if content.startswith('{') or content.startswith('['):
254
+ try:
255
+ json.loads(content)
256
+ return True
257
+ except:
258
+ pass
259
+
260
+ return False
261
+
262
+ def _is_yaml(self, content: str) -> bool:
263
+ """检测是否为 YAML"""
264
+ if not content:
265
+ return False
266
+
267
+ yaml_indicators = ['---', 'openapi:', 'swagger:', 'paths:', 'components:']
268
+ return any(indicator in content for indicator in yaml_indicators)
269
+
270
+ def _is_html(self, content: str) -> bool:
271
+ """检测是否为 HTML"""
272
+ html_indicators = ['<!doctype', '<html', '<head>', '<body>', '<div']
273
+ return any(indicator in content[:200].lower() for indicator in html_indicators)
274
+
275
+ def _is_xml(self, content: str) -> bool:
276
+ """检测是否为 XML"""
277
+ return content.strip().startswith('<?xml') or content.strip().startswith('<')
278
+
279
+ def _parse_json(self, content: str) -> Dict:
280
+ """解析 JSON"""
281
+ try:
282
+ return json.loads(content)
283
+ except:
284
+ return {}
285
+
286
+ def _extract_json_endpoints(self, data: Dict) -> List[str]:
287
+ """从 JSON 中提取端点"""
288
+ endpoints = []
289
+
290
+ if 'paths' in data:
291
+ for path in data['paths'].keys():
292
+ endpoints.append(path)
293
+
294
+ if 'servers' in data:
295
+ for server in data['servers']:
296
+ if isinstance(server, dict) and 'url' in server:
297
+ endpoints.append(f"server: {server['url']}")
298
+
299
+ return endpoints
300
+
301
+ def _match_patterns(self, content: str, patterns: List[str]) -> bool:
302
+ """检测是否匹配模式"""
303
+ content_lower = content.lower()
304
+ for pattern in patterns:
305
+ if re.search(pattern, content_lower, re.IGNORECASE):
306
+ return True
307
+ return False
308
+
309
+ def _classify(self, analysis: ResponseAnalysis) -> Tuple[ResponseType, Confidence, str]:
310
+ """
311
+ 综合判断响应类型
312
+
313
+ Returns:
314
+ (ResponseType, Confidence, reasoning)
315
+ """
316
+ # === 1. 真正的 API 文档 (最高优先级) ===
317
+ if analysis.is_json and analysis.has_swagger:
318
+ if analysis.json_endpoints:
319
+ return (
320
+ ResponseType.REAL_API_DOC,
321
+ Confidence.HIGH,
322
+ f"JSON with Swagger/OpenAPI, {len(analysis.json_endpoints)} endpoints found"
323
+ )
324
+ return (
325
+ ResponseType.REAL_API_DOC,
326
+ Confidence.MEDIUM,
327
+ "JSON with Swagger/OpenAPI structure"
328
+ )
329
+
330
+ # === 2. GraphQL 端点 ===
331
+ if analysis.has_graphql_keyword and analysis.is_json:
332
+ return (
333
+ ResponseType.GRAPHQL_ENDPOINT,
334
+ Confidence.HIGH,
335
+ "GraphQL __schema or __type found"
336
+ )
337
+
338
+ # === 3. REST API 端点 ===
339
+ if analysis.is_json:
340
+ rest_score = 0
341
+ if analysis.has_api_paths:
342
+ rest_score += 3
343
+ if analysis.has_api_keyword:
344
+ rest_score += 2
345
+ if analysis.json_has_servers:
346
+ rest_score += 3
347
+ if analysis.json_endpoints:
348
+ rest_score += 2
349
+
350
+ if rest_score >= 4:
351
+ return (
352
+ ResponseType.REST_API_ENDPOINT,
353
+ Confidence.HIGH if rest_score >= 6 else Confidence.MEDIUM,
354
+ f"JSON REST API structure (score: {rest_score})"
355
+ )
356
+
357
+ # === 4. 登录页面 ===
358
+ if analysis.is_html and analysis.has_login_form:
359
+ return (
360
+ ResponseType.LOGIN_PAGE,
361
+ Confidence.HIGH,
362
+ "HTML login form detected"
363
+ )
364
+
365
+ # === 5. 管理后台 ===
366
+ if analysis.is_html and analysis.has_admin_keyword:
367
+ return (
368
+ ResponseType.ADMIN_PAGE,
369
+ Confidence.MEDIUM,
370
+ "Admin dashboard keywords found"
371
+ )
372
+
373
+ # === 6. 错误页面 ===
374
+ if analysis.status_code >= 400:
375
+ if analysis.is_html and analysis.has_error_keyword:
376
+ return (
377
+ ResponseType.ERROR_PAGE,
378
+ Confidence.HIGH,
379
+ f"Error page ({analysis.status_code})"
380
+ )
381
+
382
+ # === 7. Vue.js/React SPA Fallback ===
383
+ if analysis.is_html:
384
+ # Vue SPA 典型特征
385
+ if any(x in analysis.content_preview for x in ['chunk-vendors', '__VUE__', 'vue', 'react']):
386
+ return (
387
+ ResponseType.SPA_FALLBACK,
388
+ Confidence.HIGH,
389
+ "Vue.js/React SPA fallback detected"
390
+ )
391
+
392
+ # React SPA 特征
393
+ if '__NEXT_DATA__' in analysis.content_preview or '_NEXT' in analysis.content_preview:
394
+ return (
395
+ ResponseType.SPA_FALLBACK,
396
+ Confidence.HIGH,
397
+ "Next.js SPA fallback detected"
398
+ )
399
+
400
+ # Angular SPA 特征
401
+ if 'ng-version' in analysis.content_preview or 'angular' in analysis.content_preview:
402
+ return (
403
+ ResponseType.SPA_FALLBACK,
404
+ Confidence.HIGH,
405
+ "Angular SPA fallback detected"
406
+ )
407
+
408
+ # 通用 SPA 特征
409
+ if '<div id="app">' in analysis.content_preview or '<div id="root">' in analysis.content_preview:
410
+ return (
411
+ ResponseType.SPA_FALLBACK,
412
+ Confidence.HIGH,
413
+ "SPA div#app or div#root detected"
414
+ )
415
+
416
+ return (
417
+ ResponseType.SPA_FALLBACK,
418
+ Confidence.MEDIUM,
419
+ "HTML content (likely SPA)"
420
+ )
421
+
422
+ # === 8. 静态资源 ===
423
+ static_extensions = ['.js', '.css', '.png', '.jpg', '.svg', '.woff', '.woff2']
424
+ if any(analysis.url.endswith(ext) for ext in static_extensions):
425
+ return (
426
+ ResponseType.STATIC_RESOURCE,
427
+ Confidence.HIGH,
428
+ "Static resource file"
429
+ )
430
+
431
+ # === 9. 未知 ===
432
+ return (
433
+ ResponseType.UNKNOWN,
434
+ Confidence.UNCERTAIN,
435
+ f"Cannot determine (status={analysis.status_code}, type={analysis.content_type})"
436
+ )
437
+
438
+ def set_baseline(self, url: str):
439
+ """设置基线响应用于对比"""
440
+ try:
441
+ resp = self.session.get(url, timeout=10)
442
+ self.baseline_hash = hashlib.md5(resp.content).hexdigest()
443
+ self.baseline_length = len(resp.content)
444
+ except:
445
+ pass
446
+
447
+ def is_different_from_baseline(self, resp: requests.Response) -> bool:
448
+ """判断响应是否与基线不同"""
449
+ if not self.baseline_hash:
450
+ return True
451
+
452
+ content_hash = hashlib.md5(resp.content).hexdigest()
453
+ return content_hash != self.baseline_hash
454
+
455
+ def classify_batch(self, urls: List[str]) -> List[ResponseAnalysis]:
456
+ """批量分类"""
457
+ results = []
458
+ for url in urls:
459
+ try:
460
+ analysis = self.analyze(url)
461
+ results.append(analysis)
462
+ except Exception as e:
463
+ results.append(ResponseAnalysis(
464
+ url=url,
465
+ response_type=ResponseType.UNKNOWN,
466
+ confidence=Confidence.UNCERTAIN,
467
+ reasoning=f"Error: {str(e)}"
468
+ ))
469
+ return results
470
+
471
+
472
+ def smart_discover_and_classify(target_url: str, session: requests.Session = None) -> Dict:
473
+ """
474
+ 智能发现并分类 API 端点
475
+
476
+ 像渗透测试工程师一样:
477
+ 1. 多维度判断响应类型
478
+ 2. 识别 SPA fallback
479
+ 3. 发现真正的 API 端点
480
+ """
481
+ session = session or requests.Session()
482
+ classifier = ResponseClassifier(session)
483
+
484
+ print("[*] Starting smart discovery and classification...")
485
+
486
+ # 从 JS 提取父路径
487
+ from smart_analyzer import SmartAPIAnalyzer
488
+ analyzer = SmartAPIAnalyzer(session)
489
+
490
+ resp = session.get(target_url, timeout=10)
491
+ js_url_pattern = r'<script[^>]+src=["\']([^"\']+\.js[^"\']*)["\']'
492
+ js_urls = re.findall(js_url_pattern, resp.text)
493
+
494
+ parent_paths = set()
495
+ for js_url in js_urls[:5]:
496
+ if not js_url.startswith('http'):
497
+ js_url = target_url.rstrip('/') + '/' + js_url
498
+
499
+ try:
500
+ js_resp = session.get(js_url, timeout=10)
501
+ endpoints = analyzer.analyze_js_file(js_url, js_resp.text)
502
+
503
+ for ep in endpoints:
504
+ parts = ep.path.strip('/').split('/')
505
+ for i in range(1, len(parts)):
506
+ parent_paths.add('/' + '/'.join(parts[:i]))
507
+ except:
508
+ pass
509
+
510
+ print(f"[*] Found {len(parent_paths)} parent paths")
511
+
512
+ # 分类测试
513
+ swagger_suffixes = [
514
+ '/swagger.json', '/v3/api-docs', '/v2/api-docs',
515
+ '/api-docs', '/openapi.json', '/swagger.yaml',
516
+ ]
517
+
518
+ classified = {
519
+ 'real_api_docs': [],
520
+ 'rest_endpoints': [],
521
+ 'spa_fallbacks': [],
522
+ 'login_pages': [],
523
+ 'admin_pages': [],
524
+ 'unknowns': [],
525
+ }
526
+
527
+ for parent in list(parent_paths)[:15]:
528
+ for suffix in swagger_suffixes:
529
+ url = target_url.rstrip('/') + parent + suffix
530
+
531
+ try:
532
+ resp = session.get(url, timeout=5, allow_redirects=True)
533
+ analysis = classifier.analyze(url, resp)
534
+
535
+ if analysis.response_type == ResponseType.REAL_API_DOC:
536
+ classified['real_api_docs'].append({
537
+ 'url': url,
538
+ 'endpoints': analysis.json_endpoints,
539
+ 'confidence': analysis.confidence.value
540
+ })
541
+ elif analysis.response_type == ResponseType.REST_API_ENDPOINT:
542
+ classified['rest_endpoints'].append({
543
+ 'url': url,
544
+ 'confidence': analysis.confidence.value
545
+ })
546
+ elif analysis.response_type == ResponseType.SPA_FALLBACK:
547
+ classified['spa_fallbacks'].append({
548
+ 'url': url,
549
+ 'reasoning': analysis.reasoning
550
+ })
551
+ elif analysis.response_type == ResponseType.LOGIN_PAGE:
552
+ classified['login_pages'].append(url)
553
+ elif analysis.response_type == ResponseType.ADMIN_PAGE:
554
+ classified['admin_pages'].append(url)
555
+ else:
556
+ classified['unknowns'].append({
557
+ 'url': url,
558
+ 'reasoning': analysis.reasoning
559
+ })
560
+
561
+ except Exception as e:
562
+ classified['unknowns'].append({
563
+ 'url': url,
564
+ 'reasoning': f"Request error: {e}"
565
+ })
566
+
567
+ return {
568
+ 'target': target_url,
569
+ 'parent_paths_count': len(parent_paths),
570
+ 'classified': classified,
571
+ 'summary': {
572
+ 'real_api_docs': len(classified['real_api_docs']),
573
+ 'rest_endpoints': len(classified['rest_endpoints']),
574
+ 'spa_fallbacks': len(classified['spa_fallbacks']),
575
+ 'login_pages': len(classified['login_pages']),
576
+ 'admin_pages': len(classified['admin_pages']),
577
+ }
578
+ }
579
+
580
+
581
+ if __name__ == "__main__":
582
+ import sys
583
+ target = sys.argv[1] if len(sys.argv) > 1 else "http://49.65.100.160:6004"
584
+
585
+ result = smart_discover_and_classify(target)
586
+
587
+ print("\n" + "=" * 70)
588
+ print(" Smart Discovery & Classification Results")
589
+ print("=" * 70)
590
+
591
+ print(f"\n[*] Target: {result['target']}")
592
+ print(f"[*] Parent paths tested: {result['parent_paths_count']}")
593
+
594
+ summary = result['summary']
595
+ print(f"\n[*] Summary:")
596
+ print(f" Real API Docs: {summary['real_api_docs']}")
597
+ print(f" REST Endpoints: {summary['rest_endpoints']}")
598
+ print(f" SPA Fallbacks: {summary['spa_fallbacks']}")
599
+ print(f" Login Pages: {summary['login_pages']}")
600
+ print(f" Admin Pages: {summary['admin_pages']}")
601
+
602
+ if result['classified']['real_api_docs']:
603
+ print(f"\n[*] Real API Docs Found:")
604
+ for doc in result['classified']['real_api_docs'][:5]:
605
+ print(f" - {doc['url']}")
606
+ print(f" Endpoints: {doc['endpoints'][:5] if doc['endpoints'] else 'N/A'}")