aiptx 2.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. aipt_v2/__init__.py +110 -0
  2. aipt_v2/__main__.py +24 -0
  3. aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
  4. aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
  5. aipt_v2/agents/__init__.py +46 -0
  6. aipt_v2/agents/base.py +520 -0
  7. aipt_v2/agents/exploit_agent.py +688 -0
  8. aipt_v2/agents/ptt.py +406 -0
  9. aipt_v2/agents/state.py +168 -0
  10. aipt_v2/app.py +957 -0
  11. aipt_v2/browser/__init__.py +31 -0
  12. aipt_v2/browser/automation.py +458 -0
  13. aipt_v2/browser/crawler.py +453 -0
  14. aipt_v2/cli.py +2933 -0
  15. aipt_v2/compliance/__init__.py +71 -0
  16. aipt_v2/compliance/compliance_report.py +449 -0
  17. aipt_v2/compliance/framework_mapper.py +424 -0
  18. aipt_v2/compliance/nist_mapping.py +345 -0
  19. aipt_v2/compliance/owasp_mapping.py +330 -0
  20. aipt_v2/compliance/pci_mapping.py +297 -0
  21. aipt_v2/config.py +341 -0
  22. aipt_v2/core/__init__.py +43 -0
  23. aipt_v2/core/agent.py +630 -0
  24. aipt_v2/core/llm.py +395 -0
  25. aipt_v2/core/memory.py +305 -0
  26. aipt_v2/core/ptt.py +329 -0
  27. aipt_v2/database/__init__.py +14 -0
  28. aipt_v2/database/models.py +232 -0
  29. aipt_v2/database/repository.py +384 -0
  30. aipt_v2/docker/__init__.py +23 -0
  31. aipt_v2/docker/builder.py +260 -0
  32. aipt_v2/docker/manager.py +222 -0
  33. aipt_v2/docker/sandbox.py +371 -0
  34. aipt_v2/evasion/__init__.py +58 -0
  35. aipt_v2/evasion/request_obfuscator.py +272 -0
  36. aipt_v2/evasion/tls_fingerprint.py +285 -0
  37. aipt_v2/evasion/ua_rotator.py +301 -0
  38. aipt_v2/evasion/waf_bypass.py +439 -0
  39. aipt_v2/execution/__init__.py +23 -0
  40. aipt_v2/execution/executor.py +302 -0
  41. aipt_v2/execution/parser.py +544 -0
  42. aipt_v2/execution/terminal.py +337 -0
  43. aipt_v2/health.py +437 -0
  44. aipt_v2/intelligence/__init__.py +194 -0
  45. aipt_v2/intelligence/adaptation.py +474 -0
  46. aipt_v2/intelligence/auth.py +520 -0
  47. aipt_v2/intelligence/chaining.py +775 -0
  48. aipt_v2/intelligence/correlation.py +536 -0
  49. aipt_v2/intelligence/cve_aipt.py +334 -0
  50. aipt_v2/intelligence/cve_info.py +1111 -0
  51. aipt_v2/intelligence/knowledge_graph.py +590 -0
  52. aipt_v2/intelligence/learning.py +626 -0
  53. aipt_v2/intelligence/llm_analyzer.py +502 -0
  54. aipt_v2/intelligence/llm_tool_selector.py +518 -0
  55. aipt_v2/intelligence/payload_generator.py +562 -0
  56. aipt_v2/intelligence/rag.py +239 -0
  57. aipt_v2/intelligence/scope.py +442 -0
  58. aipt_v2/intelligence/searchers/__init__.py +5 -0
  59. aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
  60. aipt_v2/intelligence/searchers/github_searcher.py +467 -0
  61. aipt_v2/intelligence/searchers/google_searcher.py +281 -0
  62. aipt_v2/intelligence/tools.json +443 -0
  63. aipt_v2/intelligence/triage.py +670 -0
  64. aipt_v2/interactive_shell.py +559 -0
  65. aipt_v2/interface/__init__.py +5 -0
  66. aipt_v2/interface/cli.py +230 -0
  67. aipt_v2/interface/main.py +501 -0
  68. aipt_v2/interface/tui.py +1276 -0
  69. aipt_v2/interface/utils.py +583 -0
  70. aipt_v2/llm/__init__.py +39 -0
  71. aipt_v2/llm/config.py +26 -0
  72. aipt_v2/llm/llm.py +514 -0
  73. aipt_v2/llm/memory.py +214 -0
  74. aipt_v2/llm/request_queue.py +89 -0
  75. aipt_v2/llm/utils.py +89 -0
  76. aipt_v2/local_tool_installer.py +1467 -0
  77. aipt_v2/models/__init__.py +15 -0
  78. aipt_v2/models/findings.py +295 -0
  79. aipt_v2/models/phase_result.py +224 -0
  80. aipt_v2/models/scan_config.py +207 -0
  81. aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
  82. aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
  83. aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
  84. aipt_v2/monitoring/prometheus.yml +60 -0
  85. aipt_v2/orchestration/__init__.py +52 -0
  86. aipt_v2/orchestration/pipeline.py +398 -0
  87. aipt_v2/orchestration/progress.py +300 -0
  88. aipt_v2/orchestration/scheduler.py +296 -0
  89. aipt_v2/orchestrator.py +2427 -0
  90. aipt_v2/payloads/__init__.py +27 -0
  91. aipt_v2/payloads/cmdi.py +150 -0
  92. aipt_v2/payloads/sqli.py +263 -0
  93. aipt_v2/payloads/ssrf.py +204 -0
  94. aipt_v2/payloads/templates.py +222 -0
  95. aipt_v2/payloads/traversal.py +166 -0
  96. aipt_v2/payloads/xss.py +204 -0
  97. aipt_v2/prompts/__init__.py +60 -0
  98. aipt_v2/proxy/__init__.py +29 -0
  99. aipt_v2/proxy/history.py +352 -0
  100. aipt_v2/proxy/interceptor.py +452 -0
  101. aipt_v2/recon/__init__.py +44 -0
  102. aipt_v2/recon/dns.py +241 -0
  103. aipt_v2/recon/osint.py +367 -0
  104. aipt_v2/recon/subdomain.py +372 -0
  105. aipt_v2/recon/tech_detect.py +311 -0
  106. aipt_v2/reports/__init__.py +17 -0
  107. aipt_v2/reports/generator.py +313 -0
  108. aipt_v2/reports/html_report.py +378 -0
  109. aipt_v2/runtime/__init__.py +53 -0
  110. aipt_v2/runtime/base.py +30 -0
  111. aipt_v2/runtime/docker.py +401 -0
  112. aipt_v2/runtime/local.py +346 -0
  113. aipt_v2/runtime/tool_server.py +205 -0
  114. aipt_v2/runtime/vps.py +830 -0
  115. aipt_v2/scanners/__init__.py +28 -0
  116. aipt_v2/scanners/base.py +273 -0
  117. aipt_v2/scanners/nikto.py +244 -0
  118. aipt_v2/scanners/nmap.py +402 -0
  119. aipt_v2/scanners/nuclei.py +273 -0
  120. aipt_v2/scanners/web.py +454 -0
  121. aipt_v2/scripts/security_audit.py +366 -0
  122. aipt_v2/setup_wizard.py +941 -0
  123. aipt_v2/skills/__init__.py +80 -0
  124. aipt_v2/skills/agents/__init__.py +14 -0
  125. aipt_v2/skills/agents/api_tester.py +706 -0
  126. aipt_v2/skills/agents/base.py +477 -0
  127. aipt_v2/skills/agents/code_review.py +459 -0
  128. aipt_v2/skills/agents/security_agent.py +336 -0
  129. aipt_v2/skills/agents/web_pentest.py +818 -0
  130. aipt_v2/skills/prompts/__init__.py +647 -0
  131. aipt_v2/system_detector.py +539 -0
  132. aipt_v2/telemetry/__init__.py +7 -0
  133. aipt_v2/telemetry/tracer.py +347 -0
  134. aipt_v2/terminal/__init__.py +28 -0
  135. aipt_v2/terminal/executor.py +400 -0
  136. aipt_v2/terminal/sandbox.py +350 -0
  137. aipt_v2/tools/__init__.py +44 -0
  138. aipt_v2/tools/active_directory/__init__.py +78 -0
  139. aipt_v2/tools/active_directory/ad_config.py +238 -0
  140. aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
  141. aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
  142. aipt_v2/tools/active_directory/ldap_enum.py +533 -0
  143. aipt_v2/tools/active_directory/smb_attacks.py +505 -0
  144. aipt_v2/tools/agents_graph/__init__.py +19 -0
  145. aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
  146. aipt_v2/tools/api_security/__init__.py +76 -0
  147. aipt_v2/tools/api_security/api_discovery.py +608 -0
  148. aipt_v2/tools/api_security/graphql_scanner.py +622 -0
  149. aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
  150. aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
  151. aipt_v2/tools/browser/__init__.py +5 -0
  152. aipt_v2/tools/browser/browser_actions.py +238 -0
  153. aipt_v2/tools/browser/browser_instance.py +535 -0
  154. aipt_v2/tools/browser/tab_manager.py +344 -0
  155. aipt_v2/tools/cloud/__init__.py +70 -0
  156. aipt_v2/tools/cloud/cloud_config.py +273 -0
  157. aipt_v2/tools/cloud/cloud_scanner.py +639 -0
  158. aipt_v2/tools/cloud/prowler_tool.py +571 -0
  159. aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
  160. aipt_v2/tools/executor.py +307 -0
  161. aipt_v2/tools/parser.py +408 -0
  162. aipt_v2/tools/proxy/__init__.py +5 -0
  163. aipt_v2/tools/proxy/proxy_actions.py +103 -0
  164. aipt_v2/tools/proxy/proxy_manager.py +789 -0
  165. aipt_v2/tools/registry.py +196 -0
  166. aipt_v2/tools/scanners/__init__.py +343 -0
  167. aipt_v2/tools/scanners/acunetix_tool.py +712 -0
  168. aipt_v2/tools/scanners/burp_tool.py +631 -0
  169. aipt_v2/tools/scanners/config.py +156 -0
  170. aipt_v2/tools/scanners/nessus_tool.py +588 -0
  171. aipt_v2/tools/scanners/zap_tool.py +612 -0
  172. aipt_v2/tools/terminal/__init__.py +5 -0
  173. aipt_v2/tools/terminal/terminal_actions.py +37 -0
  174. aipt_v2/tools/terminal/terminal_manager.py +153 -0
  175. aipt_v2/tools/terminal/terminal_session.py +449 -0
  176. aipt_v2/tools/tool_processing.py +108 -0
  177. aipt_v2/utils/__init__.py +17 -0
  178. aipt_v2/utils/logging.py +202 -0
  179. aipt_v2/utils/model_manager.py +187 -0
  180. aipt_v2/utils/searchers/__init__.py +269 -0
  181. aipt_v2/verify_install.py +793 -0
  182. aiptx-2.0.7.dist-info/METADATA +345 -0
  183. aiptx-2.0.7.dist-info/RECORD +187 -0
  184. aiptx-2.0.7.dist-info/WHEEL +5 -0
  185. aiptx-2.0.7.dist-info/entry_points.txt +7 -0
  186. aiptx-2.0.7.dist-info/licenses/LICENSE +21 -0
  187. aiptx-2.0.7.dist-info/top_level.txt +1 -0
@@ -0,0 +1,454 @@
1
+ """
2
+ AIPT Web Scanner
3
+
4
+ Built-in web vulnerability scanner for common issues.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ import logging
10
+ import re
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ from typing import Optional
14
+ from urllib.parse import urljoin, urlparse
15
+
16
+ import httpx
17
+
18
+ from .base import BaseScanner, ScanFinding, ScanResult, ScanSeverity
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class WebScanConfig:
25
+ """Web scanner configuration"""
26
+ # Checks to perform
27
+ check_headers: bool = True
28
+ check_ssl: bool = True
29
+ check_directories: bool = True
30
+ check_methods: bool = True
31
+ check_robots: bool = True
32
+ check_security_txt: bool = True
33
+
34
+ # Request settings
35
+ timeout: float = 10.0
36
+ user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
37
+ follow_redirects: bool = True
38
+
39
+ # Authentication
40
+ cookies: dict[str, str] = field(default_factory=dict)
41
+ headers: dict[str, str] = field(default_factory=dict)
42
+
43
+ # Directory enumeration
44
+ common_dirs: list[str] = field(default_factory=lambda: [
45
+ "admin", "administrator", "wp-admin", "login", "dashboard",
46
+ "api", "api/v1", "api/v2", "graphql",
47
+ "backup", "backups", "db", "database",
48
+ "config", "configuration", "settings",
49
+ "test", "testing", "dev", "development", "staging",
50
+ "uploads", "upload", "files", "media",
51
+ ".git", ".svn", ".env", ".htaccess",
52
+ "phpinfo.php", "info.php", "test.php",
53
+ "server-status", "server-info",
54
+ ])
55
+
56
+
57
+ class WebScanner(BaseScanner):
58
+ """
59
+ Built-in web vulnerability scanner.
60
+
61
+ Performs common security checks:
62
+ - Security header analysis
63
+ - SSL/TLS configuration
64
+ - Directory enumeration
65
+ - HTTP method testing
66
+ - robots.txt/security.txt analysis
67
+
68
+ Example:
69
+ scanner = WebScanner(WebScanConfig(
70
+ check_directories=True,
71
+ check_headers=True,
72
+ ))
73
+ result = await scanner.scan("https://target.com")
74
+ """
75
+
76
+ def __init__(self, config: Optional[WebScanConfig] = None):
77
+ super().__init__()
78
+ self.config = config or WebScanConfig()
79
+ self._client: Optional[httpx.AsyncClient] = None
80
+
81
+ def is_available(self) -> bool:
82
+ """Always available - uses httpx"""
83
+ return True
84
+
85
+ async def scan(self, target: str, **kwargs) -> ScanResult:
86
+ """
87
+ Run web security scan.
88
+
89
+ Args:
90
+ target: Target URL
91
+ **kwargs: Override config options
92
+
93
+ Returns:
94
+ ScanResult with findings
95
+ """
96
+ result = ScanResult(scanner="web_scanner", target=target)
97
+ result.start_time = datetime.utcnow()
98
+ result.status = "running"
99
+
100
+ # Ensure URL has scheme
101
+ if not target.startswith(("http://", "https://")):
102
+ target = f"https://{target}"
103
+
104
+ # Create HTTP client
105
+ headers = {"User-Agent": self.config.user_agent}
106
+ headers.update(self.config.headers)
107
+
108
+ self._client = httpx.AsyncClient(
109
+ timeout=self.config.timeout,
110
+ follow_redirects=self.config.follow_redirects,
111
+ headers=headers,
112
+ cookies=self.config.cookies,
113
+ verify=False, # For testing sites with self-signed certs
114
+ )
115
+
116
+ try:
117
+ # Run checks concurrently
118
+ tasks = []
119
+
120
+ if self.config.check_headers:
121
+ tasks.append(self._check_security_headers(target))
122
+
123
+ if self.config.check_ssl:
124
+ tasks.append(self._check_ssl(target))
125
+
126
+ if self.config.check_methods:
127
+ tasks.append(self._check_http_methods(target))
128
+
129
+ if self.config.check_robots:
130
+ tasks.append(self._check_robots_txt(target))
131
+
132
+ if self.config.check_security_txt:
133
+ tasks.append(self._check_security_txt(target))
134
+
135
+ if self.config.check_directories:
136
+ tasks.append(self._check_directories(target))
137
+
138
+ # Gather results
139
+ findings_lists = await asyncio.gather(*tasks, return_exceptions=True)
140
+
141
+ for findings in findings_lists:
142
+ if isinstance(findings, list):
143
+ result.findings.extend(findings)
144
+ elif isinstance(findings, Exception):
145
+ result.errors.append(str(findings))
146
+
147
+ result.status = "completed"
148
+
149
+ except Exception as e:
150
+ result.status = "failed"
151
+ result.errors.append(str(e))
152
+ finally:
153
+ await self._client.aclose()
154
+ result.end_time = datetime.utcnow()
155
+ result.duration_seconds = (result.end_time - result.start_time).total_seconds()
156
+
157
+ logger.info(f"Web scan complete: {len(result.findings)} findings")
158
+ return result
159
+
160
+ def parse_output(self, output: str) -> list[ScanFinding]:
161
+ """Not used - findings created directly"""
162
+ return []
163
+
164
+ async def _check_security_headers(self, url: str) -> list[ScanFinding]:
165
+ """Check for missing security headers"""
166
+ findings = []
167
+
168
+ try:
169
+ response = await self._client.get(url)
170
+
171
+ # Required security headers
172
+ security_headers = {
173
+ "Strict-Transport-Security": {
174
+ "severity": ScanSeverity.MEDIUM,
175
+ "description": "HSTS header not set. Site may be vulnerable to protocol downgrade attacks.",
176
+ },
177
+ "X-Content-Type-Options": {
178
+ "severity": ScanSeverity.LOW,
179
+ "description": "X-Content-Type-Options header not set. Browser MIME sniffing not prevented.",
180
+ },
181
+ "X-Frame-Options": {
182
+ "severity": ScanSeverity.MEDIUM,
183
+ "description": "X-Frame-Options header not set. Site may be vulnerable to clickjacking.",
184
+ },
185
+ "Content-Security-Policy": {
186
+ "severity": ScanSeverity.MEDIUM,
187
+ "description": "CSP header not set. Site may be vulnerable to XSS attacks.",
188
+ },
189
+ "X-XSS-Protection": {
190
+ "severity": ScanSeverity.LOW,
191
+ "description": "X-XSS-Protection header not set (legacy browsers).",
192
+ },
193
+ "Referrer-Policy": {
194
+ "severity": ScanSeverity.LOW,
195
+ "description": "Referrer-Policy header not set. Referrer information may leak.",
196
+ },
197
+ "Permissions-Policy": {
198
+ "severity": ScanSeverity.LOW,
199
+ "description": "Permissions-Policy header not set. Browser features not restricted.",
200
+ },
201
+ }
202
+
203
+ for header, info in security_headers.items():
204
+ if header.lower() not in [h.lower() for h in response.headers.keys()]:
205
+ findings.append(ScanFinding(
206
+ title=f"Missing Security Header: {header}",
207
+ severity=info["severity"],
208
+ description=info["description"],
209
+ url=url,
210
+ scanner="web_scanner",
211
+ tags=["header", "security"],
212
+ ))
213
+
214
+ # Check for information disclosure in headers
215
+ sensitive_headers = ["X-Powered-By", "Server", "X-AspNet-Version"]
216
+ for header in sensitive_headers:
217
+ if header.lower() in [h.lower() for h in response.headers.keys()]:
218
+ value = response.headers.get(header, "")
219
+ findings.append(ScanFinding(
220
+ title=f"Information Disclosure: {header}",
221
+ severity=ScanSeverity.LOW,
222
+ description=f"Header reveals: {value}",
223
+ url=url,
224
+ evidence=f"{header}: {value}",
225
+ scanner="web_scanner",
226
+ tags=["header", "disclosure"],
227
+ ))
228
+
229
+ except Exception as e:
230
+ logger.debug(f"Header check error: {e}")
231
+
232
+ return findings
233
+
234
+ async def _check_ssl(self, url: str) -> list[ScanFinding]:
235
+ """Check SSL/TLS configuration"""
236
+ findings = []
237
+
238
+ parsed = urlparse(url)
239
+ if parsed.scheme != "https":
240
+ findings.append(ScanFinding(
241
+ title="Site Not Using HTTPS",
242
+ severity=ScanSeverity.HIGH,
243
+ description="Site is not using HTTPS. All traffic is unencrypted.",
244
+ url=url,
245
+ scanner="web_scanner",
246
+ tags=["ssl", "encryption"],
247
+ ))
248
+ return findings
249
+
250
+ # Check for SSL issues
251
+ try:
252
+ import ssl
253
+ import socket
254
+
255
+ hostname = parsed.netloc.split(":")[0]
256
+ port = int(parsed.port) if parsed.port else 443
257
+
258
+ context = ssl.create_default_context()
259
+ with socket.create_connection((hostname, port), timeout=10) as sock:
260
+ with context.wrap_socket(sock, server_hostname=hostname) as ssock:
261
+ cert = ssock.getpeercert()
262
+
263
+ # Check expiry
264
+ from datetime import datetime
265
+ not_after = datetime.strptime(cert["notAfter"], "%b %d %H:%M:%S %Y %Z")
266
+ days_until_expiry = (not_after - datetime.utcnow()).days
267
+
268
+ if days_until_expiry < 0:
269
+ findings.append(ScanFinding(
270
+ title="SSL Certificate Expired",
271
+ severity=ScanSeverity.HIGH,
272
+ description=f"Certificate expired {abs(days_until_expiry)} days ago",
273
+ url=url,
274
+ scanner="web_scanner",
275
+ tags=["ssl", "certificate"],
276
+ ))
277
+ elif days_until_expiry < 30:
278
+ findings.append(ScanFinding(
279
+ title="SSL Certificate Expiring Soon",
280
+ severity=ScanSeverity.MEDIUM,
281
+ description=f"Certificate expires in {days_until_expiry} days",
282
+ url=url,
283
+ scanner="web_scanner",
284
+ tags=["ssl", "certificate"],
285
+ ))
286
+
287
+ except Exception as e:
288
+ logger.debug(f"SSL check error: {e}")
289
+
290
+ return findings
291
+
292
+ async def _check_http_methods(self, url: str) -> list[ScanFinding]:
293
+ """Check for dangerous HTTP methods"""
294
+ findings = []
295
+ dangerous_methods = ["PUT", "DELETE", "TRACE", "CONNECT"]
296
+
297
+ try:
298
+ # OPTIONS request
299
+ response = await self._client.options(url)
300
+ allowed = response.headers.get("Allow", "")
301
+
302
+ for method in dangerous_methods:
303
+ if method in allowed.upper():
304
+ findings.append(ScanFinding(
305
+ title=f"Dangerous HTTP Method Allowed: {method}",
306
+ severity=ScanSeverity.MEDIUM,
307
+ description=f"HTTP {method} method is enabled on the server",
308
+ url=url,
309
+ evidence=f"Allow: {allowed}",
310
+ scanner="web_scanner",
311
+ tags=["method", "configuration"],
312
+ ))
313
+
314
+ # Check TRACE specifically
315
+ try:
316
+ response = await self._client.request("TRACE", url)
317
+ if response.status_code == 200:
318
+ findings.append(ScanFinding(
319
+ title="HTTP TRACE Method Enabled",
320
+ severity=ScanSeverity.MEDIUM,
321
+ description="TRACE method is enabled, potential XST vulnerability",
322
+ url=url,
323
+ scanner="web_scanner",
324
+ tags=["method", "xst"],
325
+ ))
326
+ except Exception:
327
+ pass
328
+
329
+ except Exception as e:
330
+ logger.debug(f"Method check error: {e}")
331
+
332
+ return findings
333
+
334
+ async def _check_robots_txt(self, url: str) -> list[ScanFinding]:
335
+ """Analyze robots.txt for sensitive paths"""
336
+ findings = []
337
+
338
+ try:
339
+ robots_url = urljoin(url, "/robots.txt")
340
+ response = await self._client.get(robots_url)
341
+
342
+ if response.status_code == 200:
343
+ content = response.text
344
+
345
+ # Look for sensitive paths
346
+ sensitive_patterns = [
347
+ r"disallow:\s*/admin",
348
+ r"disallow:\s*/backup",
349
+ r"disallow:\s*/private",
350
+ r"disallow:\s*/config",
351
+ r"disallow:\s*/api",
352
+ r"disallow:\s*/\*password",
353
+ r"disallow:\s*/\*secret",
354
+ ]
355
+
356
+ found_paths = []
357
+ for pattern in sensitive_patterns:
358
+ matches = re.findall(pattern, content, re.IGNORECASE)
359
+ found_paths.extend(matches)
360
+
361
+ if found_paths:
362
+ findings.append(ScanFinding(
363
+ title="Sensitive Paths in robots.txt",
364
+ severity=ScanSeverity.LOW,
365
+ description="robots.txt reveals potentially sensitive paths",
366
+ url=robots_url,
367
+ evidence="\n".join(found_paths[:10]),
368
+ scanner="web_scanner",
369
+ tags=["robots", "disclosure"],
370
+ ))
371
+
372
+ except Exception as e:
373
+ logger.debug(f"robots.txt check error: {e}")
374
+
375
+ return findings
376
+
377
+ async def _check_security_txt(self, url: str) -> list[ScanFinding]:
378
+ """Check for security.txt"""
379
+ findings = []
380
+
381
+ try:
382
+ # Check both locations
383
+ locations = [
384
+ urljoin(url, "/.well-known/security.txt"),
385
+ urljoin(url, "/security.txt"),
386
+ ]
387
+
388
+ found = False
389
+ for security_url in locations:
390
+ response = await self._client.get(security_url)
391
+ if response.status_code == 200 and "contact" in response.text.lower():
392
+ found = True
393
+ break
394
+
395
+ if not found:
396
+ findings.append(ScanFinding(
397
+ title="Missing security.txt",
398
+ severity=ScanSeverity.INFO,
399
+ description="No security.txt found. Consider adding one for vulnerability disclosure.",
400
+ url=url,
401
+ scanner="web_scanner",
402
+ tags=["security.txt", "best-practice"],
403
+ ))
404
+
405
+ except Exception as e:
406
+ logger.debug(f"security.txt check error: {e}")
407
+
408
+ return findings
409
+
410
+ async def _check_directories(self, url: str) -> list[ScanFinding]:
411
+ """Check for exposed directories"""
412
+ findings = []
413
+ semaphore = asyncio.Semaphore(10) # Limit concurrent requests
414
+
415
+ async def check_dir(path: str) -> Optional[ScanFinding]:
416
+ async with semaphore:
417
+ try:
418
+ check_url = urljoin(url, f"/{path}")
419
+ response = await self._client.get(check_url)
420
+
421
+ if response.status_code == 200:
422
+ severity = ScanSeverity.MEDIUM
423
+ if any(s in path for s in [".git", ".env", "backup", "config"]):
424
+ severity = ScanSeverity.HIGH
425
+
426
+ return ScanFinding(
427
+ title=f"Exposed Path: /{path}",
428
+ severity=severity,
429
+ description=f"Path /{path} is accessible (HTTP {response.status_code})",
430
+ url=check_url,
431
+ scanner="web_scanner",
432
+ tags=["directory", "enumeration"],
433
+ )
434
+ elif response.status_code == 403:
435
+ return ScanFinding(
436
+ title=f"Protected Path Found: /{path}",
437
+ severity=ScanSeverity.INFO,
438
+ description=f"Path /{path} exists but is forbidden",
439
+ url=check_url,
440
+ scanner="web_scanner",
441
+ tags=["directory", "forbidden"],
442
+ )
443
+ except Exception:
444
+ pass
445
+ return None
446
+
447
+ tasks = [check_dir(path) for path in self.config.common_dirs]
448
+ results = await asyncio.gather(*tasks)
449
+
450
+ for result in results:
451
+ if result:
452
+ findings.append(result)
453
+
454
+ return findings