aiptx 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiptx might be problematic. Click here for more details.

Files changed (165) hide show
  1. aipt_v2/__init__.py +110 -0
  2. aipt_v2/__main__.py +24 -0
  3. aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
  4. aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
  5. aipt_v2/agents/__init__.py +24 -0
  6. aipt_v2/agents/base.py +520 -0
  7. aipt_v2/agents/ptt.py +406 -0
  8. aipt_v2/agents/state.py +168 -0
  9. aipt_v2/app.py +960 -0
  10. aipt_v2/browser/__init__.py +31 -0
  11. aipt_v2/browser/automation.py +458 -0
  12. aipt_v2/browser/crawler.py +453 -0
  13. aipt_v2/cli.py +321 -0
  14. aipt_v2/compliance/__init__.py +71 -0
  15. aipt_v2/compliance/compliance_report.py +449 -0
  16. aipt_v2/compliance/framework_mapper.py +424 -0
  17. aipt_v2/compliance/nist_mapping.py +345 -0
  18. aipt_v2/compliance/owasp_mapping.py +330 -0
  19. aipt_v2/compliance/pci_mapping.py +297 -0
  20. aipt_v2/config.py +288 -0
  21. aipt_v2/core/__init__.py +43 -0
  22. aipt_v2/core/agent.py +630 -0
  23. aipt_v2/core/llm.py +395 -0
  24. aipt_v2/core/memory.py +305 -0
  25. aipt_v2/core/ptt.py +329 -0
  26. aipt_v2/database/__init__.py +14 -0
  27. aipt_v2/database/models.py +232 -0
  28. aipt_v2/database/repository.py +384 -0
  29. aipt_v2/docker/__init__.py +23 -0
  30. aipt_v2/docker/builder.py +260 -0
  31. aipt_v2/docker/manager.py +222 -0
  32. aipt_v2/docker/sandbox.py +371 -0
  33. aipt_v2/evasion/__init__.py +58 -0
  34. aipt_v2/evasion/request_obfuscator.py +272 -0
  35. aipt_v2/evasion/tls_fingerprint.py +285 -0
  36. aipt_v2/evasion/ua_rotator.py +301 -0
  37. aipt_v2/evasion/waf_bypass.py +439 -0
  38. aipt_v2/execution/__init__.py +23 -0
  39. aipt_v2/execution/executor.py +302 -0
  40. aipt_v2/execution/parser.py +544 -0
  41. aipt_v2/execution/terminal.py +337 -0
  42. aipt_v2/health.py +437 -0
  43. aipt_v2/intelligence/__init__.py +85 -0
  44. aipt_v2/intelligence/auth.py +520 -0
  45. aipt_v2/intelligence/chaining.py +775 -0
  46. aipt_v2/intelligence/cve_aipt.py +334 -0
  47. aipt_v2/intelligence/cve_info.py +1111 -0
  48. aipt_v2/intelligence/rag.py +239 -0
  49. aipt_v2/intelligence/scope.py +442 -0
  50. aipt_v2/intelligence/searchers/__init__.py +5 -0
  51. aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
  52. aipt_v2/intelligence/searchers/github_searcher.py +467 -0
  53. aipt_v2/intelligence/searchers/google_searcher.py +281 -0
  54. aipt_v2/intelligence/tools.json +443 -0
  55. aipt_v2/intelligence/triage.py +670 -0
  56. aipt_v2/interface/__init__.py +5 -0
  57. aipt_v2/interface/cli.py +230 -0
  58. aipt_v2/interface/main.py +501 -0
  59. aipt_v2/interface/tui.py +1276 -0
  60. aipt_v2/interface/utils.py +583 -0
  61. aipt_v2/llm/__init__.py +39 -0
  62. aipt_v2/llm/config.py +26 -0
  63. aipt_v2/llm/llm.py +514 -0
  64. aipt_v2/llm/memory.py +214 -0
  65. aipt_v2/llm/request_queue.py +89 -0
  66. aipt_v2/llm/utils.py +89 -0
  67. aipt_v2/models/__init__.py +15 -0
  68. aipt_v2/models/findings.py +295 -0
  69. aipt_v2/models/phase_result.py +224 -0
  70. aipt_v2/models/scan_config.py +207 -0
  71. aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
  72. aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
  73. aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
  74. aipt_v2/monitoring/prometheus.yml +60 -0
  75. aipt_v2/orchestration/__init__.py +52 -0
  76. aipt_v2/orchestration/pipeline.py +398 -0
  77. aipt_v2/orchestration/progress.py +300 -0
  78. aipt_v2/orchestration/scheduler.py +296 -0
  79. aipt_v2/orchestrator.py +2284 -0
  80. aipt_v2/payloads/__init__.py +27 -0
  81. aipt_v2/payloads/cmdi.py +150 -0
  82. aipt_v2/payloads/sqli.py +263 -0
  83. aipt_v2/payloads/ssrf.py +204 -0
  84. aipt_v2/payloads/templates.py +222 -0
  85. aipt_v2/payloads/traversal.py +166 -0
  86. aipt_v2/payloads/xss.py +204 -0
  87. aipt_v2/prompts/__init__.py +60 -0
  88. aipt_v2/proxy/__init__.py +29 -0
  89. aipt_v2/proxy/history.py +352 -0
  90. aipt_v2/proxy/interceptor.py +452 -0
  91. aipt_v2/recon/__init__.py +44 -0
  92. aipt_v2/recon/dns.py +241 -0
  93. aipt_v2/recon/osint.py +367 -0
  94. aipt_v2/recon/subdomain.py +372 -0
  95. aipt_v2/recon/tech_detect.py +311 -0
  96. aipt_v2/reports/__init__.py +17 -0
  97. aipt_v2/reports/generator.py +313 -0
  98. aipt_v2/reports/html_report.py +378 -0
  99. aipt_v2/runtime/__init__.py +44 -0
  100. aipt_v2/runtime/base.py +30 -0
  101. aipt_v2/runtime/docker.py +401 -0
  102. aipt_v2/runtime/local.py +346 -0
  103. aipt_v2/runtime/tool_server.py +205 -0
  104. aipt_v2/scanners/__init__.py +28 -0
  105. aipt_v2/scanners/base.py +273 -0
  106. aipt_v2/scanners/nikto.py +244 -0
  107. aipt_v2/scanners/nmap.py +402 -0
  108. aipt_v2/scanners/nuclei.py +273 -0
  109. aipt_v2/scanners/web.py +454 -0
  110. aipt_v2/scripts/security_audit.py +366 -0
  111. aipt_v2/telemetry/__init__.py +7 -0
  112. aipt_v2/telemetry/tracer.py +347 -0
  113. aipt_v2/terminal/__init__.py +28 -0
  114. aipt_v2/terminal/executor.py +400 -0
  115. aipt_v2/terminal/sandbox.py +350 -0
  116. aipt_v2/tools/__init__.py +44 -0
  117. aipt_v2/tools/active_directory/__init__.py +78 -0
  118. aipt_v2/tools/active_directory/ad_config.py +238 -0
  119. aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
  120. aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
  121. aipt_v2/tools/active_directory/ldap_enum.py +533 -0
  122. aipt_v2/tools/active_directory/smb_attacks.py +505 -0
  123. aipt_v2/tools/agents_graph/__init__.py +19 -0
  124. aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
  125. aipt_v2/tools/api_security/__init__.py +76 -0
  126. aipt_v2/tools/api_security/api_discovery.py +608 -0
  127. aipt_v2/tools/api_security/graphql_scanner.py +622 -0
  128. aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
  129. aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
  130. aipt_v2/tools/browser/__init__.py +5 -0
  131. aipt_v2/tools/browser/browser_actions.py +238 -0
  132. aipt_v2/tools/browser/browser_instance.py +535 -0
  133. aipt_v2/tools/browser/tab_manager.py +344 -0
  134. aipt_v2/tools/cloud/__init__.py +70 -0
  135. aipt_v2/tools/cloud/cloud_config.py +273 -0
  136. aipt_v2/tools/cloud/cloud_scanner.py +639 -0
  137. aipt_v2/tools/cloud/prowler_tool.py +571 -0
  138. aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
  139. aipt_v2/tools/executor.py +307 -0
  140. aipt_v2/tools/parser.py +408 -0
  141. aipt_v2/tools/proxy/__init__.py +5 -0
  142. aipt_v2/tools/proxy/proxy_actions.py +103 -0
  143. aipt_v2/tools/proxy/proxy_manager.py +789 -0
  144. aipt_v2/tools/registry.py +196 -0
  145. aipt_v2/tools/scanners/__init__.py +343 -0
  146. aipt_v2/tools/scanners/acunetix_tool.py +712 -0
  147. aipt_v2/tools/scanners/burp_tool.py +631 -0
  148. aipt_v2/tools/scanners/config.py +156 -0
  149. aipt_v2/tools/scanners/nessus_tool.py +588 -0
  150. aipt_v2/tools/scanners/zap_tool.py +612 -0
  151. aipt_v2/tools/terminal/__init__.py +5 -0
  152. aipt_v2/tools/terminal/terminal_actions.py +37 -0
  153. aipt_v2/tools/terminal/terminal_manager.py +153 -0
  154. aipt_v2/tools/terminal/terminal_session.py +449 -0
  155. aipt_v2/tools/tool_processing.py +108 -0
  156. aipt_v2/utils/__init__.py +17 -0
  157. aipt_v2/utils/logging.py +201 -0
  158. aipt_v2/utils/model_manager.py +187 -0
  159. aipt_v2/utils/searchers/__init__.py +269 -0
  160. aiptx-2.0.2.dist-info/METADATA +324 -0
  161. aiptx-2.0.2.dist-info/RECORD +165 -0
  162. aiptx-2.0.2.dist-info/WHEEL +5 -0
  163. aiptx-2.0.2.dist-info/entry_points.txt +7 -0
  164. aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
  165. aiptx-2.0.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,608 @@
1
+ """
2
+ API Endpoint Discovery
3
+
4
+ Automatic API endpoint detection and enumeration:
5
+ - OpenAPI/Swagger spec detection
6
+ - GraphQL endpoint detection
7
+ - Common API path fuzzing
8
+ - Version enumeration
9
+ - Documentation discovery
10
+ - Hidden endpoint detection
11
+
12
+ Usage:
13
+ from aipt_v2.tools.api_security import APIDiscovery
14
+
15
+ discovery = APIDiscovery("https://target.com")
16
+ endpoints = await discovery.discover()
17
+ """
18
+
19
+ import asyncio
20
+ import json
21
+ import re
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime, timezone
24
+ from typing import List, Dict, Any, Optional, Set
25
+ from urllib.parse import urljoin, urlparse
26
+
27
+ try:
28
+ import aiohttp
29
+ except ImportError:
30
+ aiohttp = None
31
+
32
+
33
+ @dataclass
34
+ class DiscoveredEndpoint:
35
+ """Discovered API endpoint."""
36
+ url: str
37
+ method: str
38
+ status_code: int
39
+ endpoint_type: str # rest, graphql, swagger, soap, grpc
40
+ content_type: str
41
+ response_size: int
42
+ auth_required: bool
43
+ documentation_url: str = ""
44
+ version: str = ""
45
+ timestamp: str = ""
46
+
47
+ def __post_init__(self):
48
+ if not self.timestamp:
49
+ self.timestamp = datetime.now(timezone.utc).isoformat()
50
+
51
+
52
+ @dataclass
53
+ class APIDiscoveryConfig:
54
+ """API discovery configuration."""
55
+ base_url: str
56
+
57
+ # Discovery options
58
+ discover_swagger: bool = True
59
+ discover_graphql: bool = True
60
+ discover_common_paths: bool = True
61
+ discover_versions: bool = True
62
+
63
+ # Authentication
64
+ auth_token: str = ""
65
+ api_key: str = ""
66
+ headers: Dict[str, str] = field(default_factory=dict)
67
+
68
+ # Performance
69
+ max_concurrent: int = 10
70
+ timeout: int = 10
71
+ delay_ms: int = 50
72
+
73
+
74
+ @dataclass
75
+ class APIDiscoveryResult:
76
+ """Result of API discovery."""
77
+ base_url: str
78
+ status: str
79
+ started_at: str
80
+ finished_at: str
81
+ duration: float
82
+ endpoints: List[DiscoveredEndpoint]
83
+ swagger_specs: List[str]
84
+ graphql_endpoints: List[str]
85
+ api_versions: List[str]
86
+ metadata: Dict[str, Any] = field(default_factory=dict)
87
+
88
+
89
+ class APIDiscovery:
90
+ """
91
+ API Endpoint Discovery Tool.
92
+
93
+ Automatically discovers and enumerates API endpoints
94
+ through various techniques including spec detection,
95
+ common path fuzzing, and version enumeration.
96
+ """
97
+
98
+ # OpenAPI/Swagger spec locations
99
+ SWAGGER_PATHS = [
100
+ "/openapi.json", "/openapi.yaml",
101
+ "/swagger.json", "/swagger.yaml",
102
+ "/api-docs", "/api-docs.json",
103
+ "/v1/api-docs", "/v2/api-docs", "/v3/api-docs",
104
+ "/swagger/v1/swagger.json",
105
+ "/swagger-resources",
106
+ "/api/swagger.json", "/api/openapi.json",
107
+ "/.well-known/openapi.json",
108
+ "/docs", "/docs/", "/redoc",
109
+ "/api/docs", "/api/documentation"
110
+ ]
111
+
112
+ # GraphQL common endpoints
113
+ GRAPHQL_PATHS = [
114
+ "/graphql", "/graphiql", "/graphql/console",
115
+ "/api/graphql", "/v1/graphql",
116
+ "/gql", "/query",
117
+ "/graphql/v1", "/graphql/v2"
118
+ ]
119
+
120
+ # Common API paths to enumerate
121
+ COMMON_API_PATHS = [
122
+ # Authentication
123
+ "/api/auth", "/api/login", "/api/logout", "/api/register",
124
+ "/api/oauth", "/api/token", "/api/refresh",
125
+ "/auth/login", "/auth/token", "/oauth/token",
126
+
127
+ # User management
128
+ "/api/users", "/api/user", "/api/me", "/api/profile",
129
+ "/api/account", "/api/accounts",
130
+ "/users", "/user", "/me", "/profile",
131
+
132
+ # Common resources
133
+ "/api/items", "/api/products", "/api/orders",
134
+ "/api/customers", "/api/data", "/api/resources",
135
+
136
+ # Admin endpoints
137
+ "/api/admin", "/admin/api", "/api/internal",
138
+ "/api/management", "/api/config", "/api/settings",
139
+
140
+ # Health/Status
141
+ "/api/health", "/api/status", "/api/ping",
142
+ "/health", "/healthz", "/ready", "/status",
143
+ "/_health", "/_status",
144
+
145
+ # Info/Debug
146
+ "/api/info", "/api/version", "/api/debug",
147
+ "/info", "/version", "/debug",
148
+ "/actuator", "/actuator/health", "/actuator/info",
149
+
150
+ # SOAP/Legacy
151
+ "/soap", "/wsdl", "/service", "/services",
152
+ "/ws", "/webservice"
153
+ ]
154
+
155
+ # API version patterns
156
+ VERSION_PATTERNS = [
157
+ "/v1", "/v2", "/v3", "/v4",
158
+ "/api/v1", "/api/v2", "/api/v3",
159
+ "/api/1.0", "/api/2.0", "/api/3.0",
160
+ "/1.0", "/2.0", "/3.0"
161
+ ]
162
+
163
+ # GraphQL introspection query
164
+ GRAPHQL_INTROSPECTION = """
165
+ query { __schema { queryType { name } } }
166
+ """
167
+
168
+ def __init__(self, base_url: str, config: Optional[APIDiscoveryConfig] = None):
169
+ """
170
+ Initialize API discovery.
171
+
172
+ Args:
173
+ base_url: Target base URL
174
+ config: Discovery configuration
175
+ """
176
+ self.base_url = base_url.rstrip("/")
177
+ self.config = config or APIDiscoveryConfig(base_url=base_url)
178
+ self.discovered: Set[str] = set()
179
+ self.endpoints: List[DiscoveredEndpoint] = []
180
+ self.swagger_specs: List[str] = []
181
+ self.graphql_endpoints: List[str] = []
182
+ self.api_versions: List[str] = []
183
+
184
+ def _get_headers(self) -> Dict[str, str]:
185
+ """Build request headers."""
186
+ headers = {
187
+ "User-Agent": "AIPTX-API-Discovery/1.0",
188
+ "Accept": "application/json, text/html, */*"
189
+ }
190
+ headers.update(self.config.headers)
191
+
192
+ if self.config.auth_token:
193
+ headers["Authorization"] = f"Bearer {self.config.auth_token}"
194
+
195
+ if self.config.api_key:
196
+ headers["X-API-Key"] = self.config.api_key
197
+
198
+ return headers
199
+
200
+ async def _check_url(self, path: str, method: str = "GET") -> Optional[DiscoveredEndpoint]:
201
+ """Check if URL exists and is accessible."""
202
+ if aiohttp is None:
203
+ raise ImportError("aiohttp required. Install with: pip install aiohttp")
204
+
205
+ url = urljoin(self.base_url, path)
206
+
207
+ if url in self.discovered:
208
+ return None
209
+
210
+ try:
211
+ await asyncio.sleep(self.config.delay_ms / 1000)
212
+
213
+ async with aiohttp.ClientSession() as session:
214
+ async with session.request(
215
+ method,
216
+ url,
217
+ headers=self._get_headers(),
218
+ timeout=aiohttp.ClientTimeout(total=self.config.timeout),
219
+ ssl=False,
220
+ allow_redirects=False
221
+ ) as response:
222
+ if response.status in [200, 201, 301, 302, 401, 403]:
223
+ self.discovered.add(url)
224
+
225
+ content_type = response.headers.get("Content-Type", "")
226
+ body = await response.text()
227
+
228
+ # Determine endpoint type
229
+ endpoint_type = self._detect_endpoint_type(path, content_type, body)
230
+
231
+ # Check if auth required
232
+ auth_required = response.status in [401, 403]
233
+
234
+ return DiscoveredEndpoint(
235
+ url=url,
236
+ method=method,
237
+ status_code=response.status,
238
+ endpoint_type=endpoint_type,
239
+ content_type=content_type,
240
+ response_size=len(body),
241
+ auth_required=auth_required
242
+ )
243
+
244
+ except Exception:
245
+ pass
246
+
247
+ return None
248
+
249
+ def _detect_endpoint_type(self, path: str, content_type: str, body: str) -> str:
250
+ """Detect the type of API endpoint."""
251
+ path_lower = path.lower()
252
+ content_lower = content_type.lower()
253
+ body_lower = body.lower()
254
+
255
+ # GraphQL detection
256
+ if "graphql" in path_lower or "graphiql" in path_lower:
257
+ return "graphql"
258
+ if '"data"' in body and '"__schema"' in body:
259
+ return "graphql"
260
+
261
+ # OpenAPI/Swagger detection
262
+ if "swagger" in path_lower or "openapi" in path_lower or "api-docs" in path_lower:
263
+ return "swagger"
264
+ if '"openapi"' in body_lower or '"swagger"' in body_lower:
265
+ return "swagger"
266
+
267
+ # SOAP detection
268
+ if "soap" in path_lower or "wsdl" in path_lower:
269
+ return "soap"
270
+ if "xml" in content_lower and ("<wsdl:" in body_lower or "<soap:" in body_lower):
271
+ return "soap"
272
+
273
+ # gRPC detection
274
+ if "grpc" in path_lower or "application/grpc" in content_lower:
275
+ return "grpc"
276
+
277
+ # Default REST
278
+ return "rest"
279
+
280
+ async def discover_swagger(self) -> List[str]:
281
+ """Discover OpenAPI/Swagger specifications."""
282
+ specs = []
283
+
284
+ tasks = [self._check_url(path) for path in self.SWAGGER_PATHS]
285
+ results = await asyncio.gather(*tasks, return_exceptions=True)
286
+
287
+ for result in results:
288
+ if isinstance(result, DiscoveredEndpoint):
289
+ if result.endpoint_type == "swagger":
290
+ specs.append(result.url)
291
+ self.endpoints.append(result)
292
+
293
+ self.swagger_specs = specs
294
+ return specs
295
+
296
+ async def discover_graphql(self) -> List[str]:
297
+ """Discover GraphQL endpoints."""
298
+ graphql_eps = []
299
+
300
+ for path in self.GRAPHQL_PATHS:
301
+ # Try GET request
302
+ endpoint = await self._check_url(path)
303
+ if endpoint:
304
+ self.endpoints.append(endpoint)
305
+ if endpoint.endpoint_type == "graphql":
306
+ graphql_eps.append(endpoint.url)
307
+ continue
308
+
309
+ # Try POST with introspection
310
+ url = urljoin(self.base_url, path)
311
+ try:
312
+ async with aiohttp.ClientSession() as session:
313
+ async with session.post(
314
+ url,
315
+ json={"query": self.GRAPHQL_INTROSPECTION},
316
+ headers={"Content-Type": "application/json", **self._get_headers()},
317
+ timeout=aiohttp.ClientTimeout(total=self.config.timeout),
318
+ ssl=False
319
+ ) as response:
320
+ if response.status == 200:
321
+ body = await response.text()
322
+ if "__schema" in body or "queryType" in body:
323
+ graphql_eps.append(url)
324
+ self.discovered.add(url)
325
+ self.endpoints.append(DiscoveredEndpoint(
326
+ url=url,
327
+ method="POST",
328
+ status_code=200,
329
+ endpoint_type="graphql",
330
+ content_type="application/json",
331
+ response_size=len(body),
332
+ auth_required=False
333
+ ))
334
+ except Exception:
335
+ pass
336
+
337
+ self.graphql_endpoints = graphql_eps
338
+ return graphql_eps
339
+
340
+ async def discover_common_paths(self) -> List[DiscoveredEndpoint]:
341
+ """Discover common API paths."""
342
+ found = []
343
+
344
+ # Batch requests with semaphore for rate limiting
345
+ semaphore = asyncio.Semaphore(self.config.max_concurrent)
346
+
347
+ async def check_with_limit(path: str):
348
+ async with semaphore:
349
+ return await self._check_url(path)
350
+
351
+ tasks = [check_with_limit(path) for path in self.COMMON_API_PATHS]
352
+ results = await asyncio.gather(*tasks, return_exceptions=True)
353
+
354
+ for result in results:
355
+ if isinstance(result, DiscoveredEndpoint):
356
+ found.append(result)
357
+ self.endpoints.append(result)
358
+
359
+ return found
360
+
361
+ async def discover_versions(self) -> List[str]:
362
+ """Discover API versions."""
363
+ versions = []
364
+
365
+ for version_path in self.VERSION_PATTERNS:
366
+ # Test version root
367
+ endpoint = await self._check_url(version_path)
368
+ if endpoint:
369
+ versions.append(version_path)
370
+ self.endpoints.append(endpoint)
371
+
372
+ # Test version with common endpoints
373
+ for suffix in ["/users", "/health", "/status", "/info"]:
374
+ full_path = f"{version_path}{suffix}"
375
+ endpoint = await self._check_url(full_path)
376
+ if endpoint:
377
+ if version_path not in versions:
378
+ versions.append(version_path)
379
+ self.endpoints.append(endpoint)
380
+
381
+ self.api_versions = versions
382
+ return versions
383
+
384
+ async def discover_from_html(self) -> List[DiscoveredEndpoint]:
385
+ """Extract API endpoints from HTML/JavaScript."""
386
+ found = []
387
+
388
+ try:
389
+ async with aiohttp.ClientSession() as session:
390
+ # Fetch main page
391
+ async with session.get(
392
+ self.base_url,
393
+ headers=self._get_headers(),
394
+ ssl=False
395
+ ) as response:
396
+ body = await response.text()
397
+
398
+ # Extract URLs from HTML/JS
399
+ url_patterns = [
400
+ r'["\'](/api/[^"\']+)["\']',
401
+ r'["\'](/v\d+/[^"\']+)["\']',
402
+ r'["\'](https?://[^"\']*api[^"\']*)["\']',
403
+ r'fetch\(["\']([^"\']+)["\']',
404
+ r'axios\.\w+\(["\']([^"\']+)["\']'
405
+ ]
406
+
407
+ extracted_urls = set()
408
+ for pattern in url_patterns:
409
+ matches = re.findall(pattern, body)
410
+ extracted_urls.update(matches)
411
+
412
+ for url in extracted_urls:
413
+ # Normalize URL
414
+ if url.startswith("/"):
415
+ url = urljoin(self.base_url, url)
416
+ elif not url.startswith("http"):
417
+ continue
418
+
419
+ # Only check URLs from same domain
420
+ if urlparse(url).netloc == urlparse(self.base_url).netloc:
421
+ endpoint = await self._check_url(urlparse(url).path)
422
+ if endpoint:
423
+ found.append(endpoint)
424
+ self.endpoints.append(endpoint)
425
+
426
+ except Exception:
427
+ pass
428
+
429
+ return found
430
+
431
+ async def discover_from_robots(self) -> List[str]:
432
+ """Check robots.txt for API paths."""
433
+ paths = []
434
+
435
+ try:
436
+ url = urljoin(self.base_url, "/robots.txt")
437
+ async with aiohttp.ClientSession() as session:
438
+ async with session.get(url, ssl=False) as response:
439
+ if response.status == 200:
440
+ body = await response.text()
441
+
442
+ # Extract Disallow paths
443
+ for line in body.split("\n"):
444
+ if line.lower().startswith("disallow:"):
445
+ path = line.split(":", 1)[1].strip()
446
+ if "/api" in path.lower() or "/v" in path:
447
+ paths.append(path)
448
+ endpoint = await self._check_url(path)
449
+ if endpoint:
450
+ self.endpoints.append(endpoint)
451
+
452
+ except Exception:
453
+ pass
454
+
455
+ return paths
456
+
457
+ async def discover_from_sitemap(self) -> List[str]:
458
+ """Check sitemap for API paths."""
459
+ paths = []
460
+
461
+ try:
462
+ for sitemap_path in ["/sitemap.xml", "/sitemap_index.xml"]:
463
+ url = urljoin(self.base_url, sitemap_path)
464
+ async with aiohttp.ClientSession() as session:
465
+ async with session.get(url, ssl=False) as response:
466
+ if response.status == 200:
467
+ body = await response.text()
468
+
469
+ # Extract URLs from sitemap
470
+ urls = re.findall(r"<loc>([^<]+)</loc>", body)
471
+ for found_url in urls:
472
+ if "/api" in found_url.lower() or "/v" in found_url:
473
+ parsed = urlparse(found_url)
474
+ paths.append(parsed.path)
475
+ endpoint = await self._check_url(parsed.path)
476
+ if endpoint:
477
+ self.endpoints.append(endpoint)
478
+
479
+ except Exception:
480
+ pass
481
+
482
+ return paths
483
+
484
+ async def discover(self) -> APIDiscoveryResult:
485
+ """
486
+ Run full API discovery.
487
+
488
+ Returns:
489
+ APIDiscoveryResult with all discovered endpoints
490
+ """
491
+ started_at = datetime.now(timezone.utc).isoformat()
492
+ start_time = asyncio.get_event_loop().time()
493
+
494
+ # Run discovery tasks
495
+ if self.config.discover_swagger:
496
+ await self.discover_swagger()
497
+
498
+ if self.config.discover_graphql:
499
+ await self.discover_graphql()
500
+
501
+ if self.config.discover_common_paths:
502
+ await self.discover_common_paths()
503
+
504
+ if self.config.discover_versions:
505
+ await self.discover_versions()
506
+
507
+ # Additional discovery
508
+ await self.discover_from_html()
509
+ await self.discover_from_robots()
510
+ await self.discover_from_sitemap()
511
+
512
+ finished_at = datetime.now(timezone.utc).isoformat()
513
+ duration = asyncio.get_event_loop().time() - start_time
514
+
515
+ # Deduplicate endpoints
516
+ seen = set()
517
+ unique_endpoints = []
518
+ for ep in self.endpoints:
519
+ key = f"{ep.method}:{ep.url}"
520
+ if key not in seen:
521
+ seen.add(key)
522
+ unique_endpoints.append(ep)
523
+
524
+ return APIDiscoveryResult(
525
+ base_url=self.base_url,
526
+ status="completed",
527
+ started_at=started_at,
528
+ finished_at=finished_at,
529
+ duration=duration,
530
+ endpoints=unique_endpoints,
531
+ swagger_specs=self.swagger_specs,
532
+ graphql_endpoints=self.graphql_endpoints,
533
+ api_versions=self.api_versions,
534
+ metadata={
535
+ "urls_checked": len(self.discovered),
536
+ "endpoints_found": len(unique_endpoints)
537
+ }
538
+ )
539
+
540
+
541
+ # Convenience function
542
+ async def discover_api(
543
+ base_url: str,
544
+ auth_token: Optional[str] = None,
545
+ full_scan: bool = True
546
+ ) -> APIDiscoveryResult:
547
+ """
548
+ Quick API discovery.
549
+
550
+ Args:
551
+ base_url: Target base URL
552
+ auth_token: Optional bearer token
553
+ full_scan: Run comprehensive discovery
554
+
555
+ Returns:
556
+ APIDiscoveryResult
557
+ """
558
+ config = APIDiscoveryConfig(
559
+ base_url=base_url,
560
+ auth_token=auth_token or "",
561
+ discover_swagger=True,
562
+ discover_graphql=True,
563
+ discover_common_paths=full_scan,
564
+ discover_versions=full_scan
565
+ )
566
+
567
+ discovery = APIDiscovery(base_url, config)
568
+ return await discovery.discover()
569
+
570
+
571
+ async def quick_api_check(base_url: str) -> Dict[str, Any]:
572
+ """
573
+ Quick check for API presence.
574
+
575
+ Args:
576
+ base_url: Target URL
577
+
578
+ Returns:
579
+ Dict with API detection results
580
+ """
581
+ discovery = APIDiscovery(base_url)
582
+
583
+ result = {
584
+ "has_swagger": False,
585
+ "has_graphql": False,
586
+ "has_api": False,
587
+ "swagger_url": None,
588
+ "graphql_url": None,
589
+ "api_version": None
590
+ }
591
+
592
+ # Quick checks
593
+ swagger = await discovery.discover_swagger()
594
+ if swagger:
595
+ result["has_swagger"] = True
596
+ result["swagger_url"] = swagger[0]
597
+
598
+ graphql = await discovery.discover_graphql()
599
+ if graphql:
600
+ result["has_graphql"] = True
601
+ result["graphql_url"] = graphql[0]
602
+
603
+ versions = await discovery.discover_versions()
604
+ if versions:
605
+ result["has_api"] = True
606
+ result["api_version"] = versions[0]
607
+
608
+ return result