aiptx 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiptx might be problematic. Click here for more details.
- aipt_v2/__init__.py +110 -0
- aipt_v2/__main__.py +24 -0
- aipt_v2/agents/AIPTxAgent/__init__.py +10 -0
- aipt_v2/agents/AIPTxAgent/aiptx_agent.py +211 -0
- aipt_v2/agents/__init__.py +24 -0
- aipt_v2/agents/base.py +520 -0
- aipt_v2/agents/ptt.py +406 -0
- aipt_v2/agents/state.py +168 -0
- aipt_v2/app.py +960 -0
- aipt_v2/browser/__init__.py +31 -0
- aipt_v2/browser/automation.py +458 -0
- aipt_v2/browser/crawler.py +453 -0
- aipt_v2/cli.py +321 -0
- aipt_v2/compliance/__init__.py +71 -0
- aipt_v2/compliance/compliance_report.py +449 -0
- aipt_v2/compliance/framework_mapper.py +424 -0
- aipt_v2/compliance/nist_mapping.py +345 -0
- aipt_v2/compliance/owasp_mapping.py +330 -0
- aipt_v2/compliance/pci_mapping.py +297 -0
- aipt_v2/config.py +288 -0
- aipt_v2/core/__init__.py +43 -0
- aipt_v2/core/agent.py +630 -0
- aipt_v2/core/llm.py +395 -0
- aipt_v2/core/memory.py +305 -0
- aipt_v2/core/ptt.py +329 -0
- aipt_v2/database/__init__.py +14 -0
- aipt_v2/database/models.py +232 -0
- aipt_v2/database/repository.py +384 -0
- aipt_v2/docker/__init__.py +23 -0
- aipt_v2/docker/builder.py +260 -0
- aipt_v2/docker/manager.py +222 -0
- aipt_v2/docker/sandbox.py +371 -0
- aipt_v2/evasion/__init__.py +58 -0
- aipt_v2/evasion/request_obfuscator.py +272 -0
- aipt_v2/evasion/tls_fingerprint.py +285 -0
- aipt_v2/evasion/ua_rotator.py +301 -0
- aipt_v2/evasion/waf_bypass.py +439 -0
- aipt_v2/execution/__init__.py +23 -0
- aipt_v2/execution/executor.py +302 -0
- aipt_v2/execution/parser.py +544 -0
- aipt_v2/execution/terminal.py +337 -0
- aipt_v2/health.py +437 -0
- aipt_v2/intelligence/__init__.py +85 -0
- aipt_v2/intelligence/auth.py +520 -0
- aipt_v2/intelligence/chaining.py +775 -0
- aipt_v2/intelligence/cve_aipt.py +334 -0
- aipt_v2/intelligence/cve_info.py +1111 -0
- aipt_v2/intelligence/rag.py +239 -0
- aipt_v2/intelligence/scope.py +442 -0
- aipt_v2/intelligence/searchers/__init__.py +5 -0
- aipt_v2/intelligence/searchers/exploitdb_searcher.py +523 -0
- aipt_v2/intelligence/searchers/github_searcher.py +467 -0
- aipt_v2/intelligence/searchers/google_searcher.py +281 -0
- aipt_v2/intelligence/tools.json +443 -0
- aipt_v2/intelligence/triage.py +670 -0
- aipt_v2/interface/__init__.py +5 -0
- aipt_v2/interface/cli.py +230 -0
- aipt_v2/interface/main.py +501 -0
- aipt_v2/interface/tui.py +1276 -0
- aipt_v2/interface/utils.py +583 -0
- aipt_v2/llm/__init__.py +39 -0
- aipt_v2/llm/config.py +26 -0
- aipt_v2/llm/llm.py +514 -0
- aipt_v2/llm/memory.py +214 -0
- aipt_v2/llm/request_queue.py +89 -0
- aipt_v2/llm/utils.py +89 -0
- aipt_v2/models/__init__.py +15 -0
- aipt_v2/models/findings.py +295 -0
- aipt_v2/models/phase_result.py +224 -0
- aipt_v2/models/scan_config.py +207 -0
- aipt_v2/monitoring/grafana/dashboards/aipt-dashboard.json +355 -0
- aipt_v2/monitoring/grafana/dashboards/default.yml +17 -0
- aipt_v2/monitoring/grafana/datasources/prometheus.yml +17 -0
- aipt_v2/monitoring/prometheus.yml +60 -0
- aipt_v2/orchestration/__init__.py +52 -0
- aipt_v2/orchestration/pipeline.py +398 -0
- aipt_v2/orchestration/progress.py +300 -0
- aipt_v2/orchestration/scheduler.py +296 -0
- aipt_v2/orchestrator.py +2284 -0
- aipt_v2/payloads/__init__.py +27 -0
- aipt_v2/payloads/cmdi.py +150 -0
- aipt_v2/payloads/sqli.py +263 -0
- aipt_v2/payloads/ssrf.py +204 -0
- aipt_v2/payloads/templates.py +222 -0
- aipt_v2/payloads/traversal.py +166 -0
- aipt_v2/payloads/xss.py +204 -0
- aipt_v2/prompts/__init__.py +60 -0
- aipt_v2/proxy/__init__.py +29 -0
- aipt_v2/proxy/history.py +352 -0
- aipt_v2/proxy/interceptor.py +452 -0
- aipt_v2/recon/__init__.py +44 -0
- aipt_v2/recon/dns.py +241 -0
- aipt_v2/recon/osint.py +367 -0
- aipt_v2/recon/subdomain.py +372 -0
- aipt_v2/recon/tech_detect.py +311 -0
- aipt_v2/reports/__init__.py +17 -0
- aipt_v2/reports/generator.py +313 -0
- aipt_v2/reports/html_report.py +378 -0
- aipt_v2/runtime/__init__.py +44 -0
- aipt_v2/runtime/base.py +30 -0
- aipt_v2/runtime/docker.py +401 -0
- aipt_v2/runtime/local.py +346 -0
- aipt_v2/runtime/tool_server.py +205 -0
- aipt_v2/scanners/__init__.py +28 -0
- aipt_v2/scanners/base.py +273 -0
- aipt_v2/scanners/nikto.py +244 -0
- aipt_v2/scanners/nmap.py +402 -0
- aipt_v2/scanners/nuclei.py +273 -0
- aipt_v2/scanners/web.py +454 -0
- aipt_v2/scripts/security_audit.py +366 -0
- aipt_v2/telemetry/__init__.py +7 -0
- aipt_v2/telemetry/tracer.py +347 -0
- aipt_v2/terminal/__init__.py +28 -0
- aipt_v2/terminal/executor.py +400 -0
- aipt_v2/terminal/sandbox.py +350 -0
- aipt_v2/tools/__init__.py +44 -0
- aipt_v2/tools/active_directory/__init__.py +78 -0
- aipt_v2/tools/active_directory/ad_config.py +238 -0
- aipt_v2/tools/active_directory/bloodhound_wrapper.py +447 -0
- aipt_v2/tools/active_directory/kerberos_attacks.py +430 -0
- aipt_v2/tools/active_directory/ldap_enum.py +533 -0
- aipt_v2/tools/active_directory/smb_attacks.py +505 -0
- aipt_v2/tools/agents_graph/__init__.py +19 -0
- aipt_v2/tools/agents_graph/agents_graph_actions.py +69 -0
- aipt_v2/tools/api_security/__init__.py +76 -0
- aipt_v2/tools/api_security/api_discovery.py +608 -0
- aipt_v2/tools/api_security/graphql_scanner.py +622 -0
- aipt_v2/tools/api_security/jwt_analyzer.py +577 -0
- aipt_v2/tools/api_security/openapi_fuzzer.py +761 -0
- aipt_v2/tools/browser/__init__.py +5 -0
- aipt_v2/tools/browser/browser_actions.py +238 -0
- aipt_v2/tools/browser/browser_instance.py +535 -0
- aipt_v2/tools/browser/tab_manager.py +344 -0
- aipt_v2/tools/cloud/__init__.py +70 -0
- aipt_v2/tools/cloud/cloud_config.py +273 -0
- aipt_v2/tools/cloud/cloud_scanner.py +639 -0
- aipt_v2/tools/cloud/prowler_tool.py +571 -0
- aipt_v2/tools/cloud/scoutsuite_tool.py +359 -0
- aipt_v2/tools/executor.py +307 -0
- aipt_v2/tools/parser.py +408 -0
- aipt_v2/tools/proxy/__init__.py +5 -0
- aipt_v2/tools/proxy/proxy_actions.py +103 -0
- aipt_v2/tools/proxy/proxy_manager.py +789 -0
- aipt_v2/tools/registry.py +196 -0
- aipt_v2/tools/scanners/__init__.py +343 -0
- aipt_v2/tools/scanners/acunetix_tool.py +712 -0
- aipt_v2/tools/scanners/burp_tool.py +631 -0
- aipt_v2/tools/scanners/config.py +156 -0
- aipt_v2/tools/scanners/nessus_tool.py +588 -0
- aipt_v2/tools/scanners/zap_tool.py +612 -0
- aipt_v2/tools/terminal/__init__.py +5 -0
- aipt_v2/tools/terminal/terminal_actions.py +37 -0
- aipt_v2/tools/terminal/terminal_manager.py +153 -0
- aipt_v2/tools/terminal/terminal_session.py +449 -0
- aipt_v2/tools/tool_processing.py +108 -0
- aipt_v2/utils/__init__.py +17 -0
- aipt_v2/utils/logging.py +201 -0
- aipt_v2/utils/model_manager.py +187 -0
- aipt_v2/utils/searchers/__init__.py +269 -0
- aiptx-2.0.2.dist-info/METADATA +324 -0
- aiptx-2.0.2.dist-info/RECORD +165 -0
- aiptx-2.0.2.dist-info/WHEEL +5 -0
- aiptx-2.0.2.dist-info/entry_points.txt +7 -0
- aiptx-2.0.2.dist-info/licenses/LICENSE +21 -0
- aiptx-2.0.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AIPT Browser Module
|
|
3
|
+
|
|
4
|
+
Browser automation for penetration testing:
|
|
5
|
+
- Playwright-based headless browsing
|
|
6
|
+
- Screenshot capture
|
|
7
|
+
- Form interaction
|
|
8
|
+
- JavaScript injection
|
|
9
|
+
- Cookie/session management
|
|
10
|
+
- DOM analysis
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .automation import (
|
|
14
|
+
BrowserAutomation,
|
|
15
|
+
BrowserConfig,
|
|
16
|
+
PageResult,
|
|
17
|
+
)
|
|
18
|
+
from .crawler import (
|
|
19
|
+
WebCrawler,
|
|
20
|
+
CrawlConfig,
|
|
21
|
+
CrawlResult,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"BrowserAutomation",
|
|
26
|
+
"BrowserConfig",
|
|
27
|
+
"PageResult",
|
|
28
|
+
"WebCrawler",
|
|
29
|
+
"CrawlConfig",
|
|
30
|
+
"CrawlResult",
|
|
31
|
+
]
|
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AIPT Browser Automation
|
|
3
|
+
|
|
4
|
+
Playwright-based browser automation for security testing.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import asyncio
|
|
9
|
+
import base64
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Any, Optional
|
|
15
|
+
from urllib.parse import urljoin, urlparse
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
# Playwright import with fallback
|
|
20
|
+
try:
|
|
21
|
+
from playwright.async_api import async_playwright, Browser, Page, BrowserContext
|
|
22
|
+
PLAYWRIGHT_AVAILABLE = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
PLAYWRIGHT_AVAILABLE = False
|
|
25
|
+
logger.warning("Playwright not installed. Install with: pip install playwright && playwright install")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class BrowserConfig:
|
|
30
|
+
"""Browser automation configuration"""
|
|
31
|
+
headless: bool = True
|
|
32
|
+
browser_type: str = "chromium" # chromium, firefox, webkit
|
|
33
|
+
viewport_width: int = 1920
|
|
34
|
+
viewport_height: int = 1080
|
|
35
|
+
timeout: float = 30000 # milliseconds
|
|
36
|
+
user_agent: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
# Proxy settings
|
|
39
|
+
proxy_server: Optional[str] = None
|
|
40
|
+
proxy_username: Optional[str] = None
|
|
41
|
+
proxy_password: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
# Security testing options
|
|
44
|
+
ignore_https_errors: bool = True
|
|
45
|
+
disable_javascript: bool = False
|
|
46
|
+
|
|
47
|
+
# Performance
|
|
48
|
+
slow_mo: int = 0 # Slow down actions (ms)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class PageResult:
|
|
53
|
+
"""Result of a page interaction"""
|
|
54
|
+
url: str
|
|
55
|
+
status_code: int = 0
|
|
56
|
+
content: str = ""
|
|
57
|
+
title: str = ""
|
|
58
|
+
cookies: list[dict] = field(default_factory=list)
|
|
59
|
+
headers: dict[str, str] = field(default_factory=dict)
|
|
60
|
+
screenshot_base64: Optional[str] = None
|
|
61
|
+
console_logs: list[str] = field(default_factory=list)
|
|
62
|
+
network_requests: list[dict] = field(default_factory=list)
|
|
63
|
+
forms: list[dict] = field(default_factory=list)
|
|
64
|
+
links: list[str] = field(default_factory=list)
|
|
65
|
+
scripts: list[str] = field(default_factory=list)
|
|
66
|
+
load_time_ms: float = 0.0
|
|
67
|
+
error: Optional[str] = None
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> dict:
|
|
70
|
+
return {
|
|
71
|
+
"url": self.url,
|
|
72
|
+
"status_code": self.status_code,
|
|
73
|
+
"title": self.title,
|
|
74
|
+
"cookies_count": len(self.cookies),
|
|
75
|
+
"forms_count": len(self.forms),
|
|
76
|
+
"links_count": len(self.links),
|
|
77
|
+
"load_time_ms": self.load_time_ms,
|
|
78
|
+
"error": self.error,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class BrowserAutomation:
|
|
83
|
+
"""
|
|
84
|
+
Browser automation for penetration testing.
|
|
85
|
+
|
|
86
|
+
Features:
|
|
87
|
+
- Headless browser control
|
|
88
|
+
- Screenshot capture
|
|
89
|
+
- Form submission
|
|
90
|
+
- JavaScript execution
|
|
91
|
+
- Cookie/session management
|
|
92
|
+
- Network request interception
|
|
93
|
+
- DOM analysis
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
async with BrowserAutomation() as browser:
|
|
97
|
+
result = await browser.navigate("https://target.com/login")
|
|
98
|
+
await browser.fill_form({
|
|
99
|
+
"username": "admin",
|
|
100
|
+
"password": "password123"
|
|
101
|
+
})
|
|
102
|
+
await browser.click("button[type=submit]")
|
|
103
|
+
await browser.screenshot("login_result.png")
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(self, config: Optional[BrowserConfig] = None):
|
|
107
|
+
if not PLAYWRIGHT_AVAILABLE:
|
|
108
|
+
raise ImportError("Playwright is required. Install with: pip install playwright && playwright install")
|
|
109
|
+
|
|
110
|
+
self.config = config or BrowserConfig()
|
|
111
|
+
self._playwright = None
|
|
112
|
+
self._browser: Optional[Browser] = None
|
|
113
|
+
self._context: Optional[BrowserContext] = None
|
|
114
|
+
self._page: Optional[Page] = None
|
|
115
|
+
self._console_logs: list[str] = []
|
|
116
|
+
self._network_requests: list[dict] = []
|
|
117
|
+
|
|
118
|
+
async def __aenter__(self) -> "BrowserAutomation":
|
|
119
|
+
await self.start()
|
|
120
|
+
return self
|
|
121
|
+
|
|
122
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
123
|
+
await self.close()
|
|
124
|
+
|
|
125
|
+
async def start(self) -> None:
|
|
126
|
+
"""Start browser instance"""
|
|
127
|
+
self._playwright = await async_playwright().start()
|
|
128
|
+
|
|
129
|
+
# Select browser type
|
|
130
|
+
if self.config.browser_type == "firefox":
|
|
131
|
+
browser_launcher = self._playwright.firefox
|
|
132
|
+
elif self.config.browser_type == "webkit":
|
|
133
|
+
browser_launcher = self._playwright.webkit
|
|
134
|
+
else:
|
|
135
|
+
browser_launcher = self._playwright.chromium
|
|
136
|
+
|
|
137
|
+
# Launch options
|
|
138
|
+
launch_options = {
|
|
139
|
+
"headless": self.config.headless,
|
|
140
|
+
"slow_mo": self.config.slow_mo,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
if self.config.proxy_server:
|
|
144
|
+
launch_options["proxy"] = {
|
|
145
|
+
"server": self.config.proxy_server,
|
|
146
|
+
}
|
|
147
|
+
if self.config.proxy_username:
|
|
148
|
+
launch_options["proxy"]["username"] = self.config.proxy_username
|
|
149
|
+
launch_options["proxy"]["password"] = self.config.proxy_password or ""
|
|
150
|
+
|
|
151
|
+
self._browser = await browser_launcher.launch(**launch_options)
|
|
152
|
+
|
|
153
|
+
# Create context
|
|
154
|
+
context_options = {
|
|
155
|
+
"viewport": {
|
|
156
|
+
"width": self.config.viewport_width,
|
|
157
|
+
"height": self.config.viewport_height,
|
|
158
|
+
},
|
|
159
|
+
"ignore_https_errors": self.config.ignore_https_errors,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if self.config.user_agent:
|
|
163
|
+
context_options["user_agent"] = self.config.user_agent
|
|
164
|
+
|
|
165
|
+
self._context = await self._browser.new_context(**context_options)
|
|
166
|
+
|
|
167
|
+
# Create page
|
|
168
|
+
self._page = await self._context.new_page()
|
|
169
|
+
self._page.set_default_timeout(self.config.timeout)
|
|
170
|
+
|
|
171
|
+
# Set up event listeners
|
|
172
|
+
self._page.on("console", self._on_console)
|
|
173
|
+
self._page.on("request", self._on_request)
|
|
174
|
+
|
|
175
|
+
if self.config.disable_javascript:
|
|
176
|
+
await self._context.route("**/*", lambda route: route.fulfill(body="") if route.request.resource_type == "script" else route.continue_())
|
|
177
|
+
|
|
178
|
+
logger.info(f"Browser started: {self.config.browser_type}")
|
|
179
|
+
|
|
180
|
+
async def close(self) -> None:
|
|
181
|
+
"""Close browser"""
|
|
182
|
+
if self._browser:
|
|
183
|
+
await self._browser.close()
|
|
184
|
+
if self._playwright:
|
|
185
|
+
await self._playwright.stop()
|
|
186
|
+
logger.info("Browser closed")
|
|
187
|
+
|
|
188
|
+
async def navigate(self, url: str, wait_until: str = "networkidle") -> PageResult:
|
|
189
|
+
"""
|
|
190
|
+
Navigate to URL and analyze page.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
url: Target URL
|
|
194
|
+
wait_until: load, domcontentloaded, networkidle
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
PageResult with page analysis
|
|
198
|
+
"""
|
|
199
|
+
self._console_logs.clear()
|
|
200
|
+
self._network_requests.clear()
|
|
201
|
+
|
|
202
|
+
result = PageResult(url=url)
|
|
203
|
+
start_time = datetime.utcnow()
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
response = await self._page.goto(url, wait_until=wait_until)
|
|
207
|
+
|
|
208
|
+
if response:
|
|
209
|
+
result.status_code = response.status
|
|
210
|
+
result.headers = dict(response.headers)
|
|
211
|
+
|
|
212
|
+
result.content = await self._page.content()
|
|
213
|
+
result.title = await self._page.title()
|
|
214
|
+
result.cookies = await self._context.cookies()
|
|
215
|
+
result.console_logs = self._console_logs.copy()
|
|
216
|
+
result.network_requests = self._network_requests.copy()
|
|
217
|
+
|
|
218
|
+
# Extract forms
|
|
219
|
+
result.forms = await self._extract_forms()
|
|
220
|
+
|
|
221
|
+
# Extract links
|
|
222
|
+
result.links = await self._extract_links()
|
|
223
|
+
|
|
224
|
+
# Extract scripts
|
|
225
|
+
result.scripts = await self._extract_scripts()
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
result.error = str(e)
|
|
229
|
+
logger.error(f"Navigation error: {e}")
|
|
230
|
+
finally:
|
|
231
|
+
result.load_time_ms = (datetime.utcnow() - start_time).total_seconds() * 1000
|
|
232
|
+
|
|
233
|
+
return result
|
|
234
|
+
|
|
235
|
+
async def screenshot(
|
|
236
|
+
self,
|
|
237
|
+
path: Optional[str] = None,
|
|
238
|
+
full_page: bool = False,
|
|
239
|
+
) -> Optional[str]:
|
|
240
|
+
"""
|
|
241
|
+
Take screenshot.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
path: Save path (optional)
|
|
245
|
+
full_page: Capture full scrollable page
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Base64 encoded screenshot if no path specified
|
|
249
|
+
"""
|
|
250
|
+
try:
|
|
251
|
+
screenshot_bytes = await self._page.screenshot(
|
|
252
|
+
path=path,
|
|
253
|
+
full_page=full_page,
|
|
254
|
+
)
|
|
255
|
+
if not path:
|
|
256
|
+
return base64.b64encode(screenshot_bytes).decode()
|
|
257
|
+
return path
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error(f"Screenshot error: {e}")
|
|
260
|
+
return None
|
|
261
|
+
|
|
262
|
+
async def fill_form(
|
|
263
|
+
self,
|
|
264
|
+
form_data: dict[str, str],
|
|
265
|
+
form_selector: Optional[str] = None,
|
|
266
|
+
) -> bool:
|
|
267
|
+
"""
|
|
268
|
+
Fill form fields.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
form_data: Field name/id -> value mapping
|
|
272
|
+
form_selector: Optional form selector
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
True if successful
|
|
276
|
+
"""
|
|
277
|
+
try:
|
|
278
|
+
for field_name, value in form_data.items():
|
|
279
|
+
# Try multiple selector strategies
|
|
280
|
+
selectors = [
|
|
281
|
+
f'[name="{field_name}"]',
|
|
282
|
+
f'#{field_name}',
|
|
283
|
+
f'[id="{field_name}"]',
|
|
284
|
+
f'[placeholder*="{field_name}" i]',
|
|
285
|
+
]
|
|
286
|
+
|
|
287
|
+
if form_selector:
|
|
288
|
+
selectors = [f'{form_selector} {s}' for s in selectors]
|
|
289
|
+
|
|
290
|
+
for selector in selectors:
|
|
291
|
+
try:
|
|
292
|
+
element = await self._page.query_selector(selector)
|
|
293
|
+
if element:
|
|
294
|
+
await element.fill(value)
|
|
295
|
+
break
|
|
296
|
+
except Exception:
|
|
297
|
+
continue
|
|
298
|
+
|
|
299
|
+
return True
|
|
300
|
+
except Exception as e:
|
|
301
|
+
logger.error(f"Form fill error: {e}")
|
|
302
|
+
return False
|
|
303
|
+
|
|
304
|
+
async def click(self, selector: str) -> bool:
|
|
305
|
+
"""Click element"""
|
|
306
|
+
try:
|
|
307
|
+
await self._page.click(selector)
|
|
308
|
+
return True
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Click error: {e}")
|
|
311
|
+
return False
|
|
312
|
+
|
|
313
|
+
async def execute_js(self, script: str) -> Any:
|
|
314
|
+
"""Execute JavaScript and return result"""
|
|
315
|
+
try:
|
|
316
|
+
return await self._page.evaluate(script)
|
|
317
|
+
except Exception as e:
|
|
318
|
+
logger.error(f"JS execution error: {e}")
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
async def inject_script(self, script: str) -> bool:
|
|
322
|
+
"""Inject script into page"""
|
|
323
|
+
try:
|
|
324
|
+
await self._page.add_script_tag(content=script)
|
|
325
|
+
return True
|
|
326
|
+
except Exception as e:
|
|
327
|
+
logger.error(f"Script injection error: {e}")
|
|
328
|
+
return False
|
|
329
|
+
|
|
330
|
+
async def set_cookies(self, cookies: list[dict]) -> None:
|
|
331
|
+
"""Set cookies"""
|
|
332
|
+
await self._context.add_cookies(cookies)
|
|
333
|
+
|
|
334
|
+
async def get_cookies(self) -> list[dict]:
|
|
335
|
+
"""Get all cookies"""
|
|
336
|
+
return await self._context.cookies()
|
|
337
|
+
|
|
338
|
+
async def clear_cookies(self) -> None:
|
|
339
|
+
"""Clear all cookies"""
|
|
340
|
+
await self._context.clear_cookies()
|
|
341
|
+
|
|
342
|
+
async def wait_for_selector(self, selector: str, timeout: float = 30000) -> bool:
|
|
343
|
+
"""Wait for element to appear"""
|
|
344
|
+
try:
|
|
345
|
+
await self._page.wait_for_selector(selector, timeout=timeout)
|
|
346
|
+
return True
|
|
347
|
+
except Exception:
|
|
348
|
+
return False
|
|
349
|
+
|
|
350
|
+
async def get_text(self, selector: str) -> Optional[str]:
|
|
351
|
+
"""Get text content of element"""
|
|
352
|
+
try:
|
|
353
|
+
element = await self._page.query_selector(selector)
|
|
354
|
+
if element:
|
|
355
|
+
return await element.text_content()
|
|
356
|
+
except Exception:
|
|
357
|
+
pass
|
|
358
|
+
return None
|
|
359
|
+
|
|
360
|
+
async def get_attribute(self, selector: str, attribute: str) -> Optional[str]:
|
|
361
|
+
"""Get element attribute"""
|
|
362
|
+
try:
|
|
363
|
+
element = await self._page.query_selector(selector)
|
|
364
|
+
if element:
|
|
365
|
+
return await element.get_attribute(attribute)
|
|
366
|
+
except Exception:
|
|
367
|
+
pass
|
|
368
|
+
return None
|
|
369
|
+
|
|
370
|
+
async def _extract_forms(self) -> list[dict]:
|
|
371
|
+
"""Extract all forms from page"""
|
|
372
|
+
forms = []
|
|
373
|
+
try:
|
|
374
|
+
form_elements = await self._page.query_selector_all("form")
|
|
375
|
+
for form in form_elements:
|
|
376
|
+
form_data = {
|
|
377
|
+
"action": await form.get_attribute("action") or "",
|
|
378
|
+
"method": await form.get_attribute("method") or "GET",
|
|
379
|
+
"id": await form.get_attribute("id") or "",
|
|
380
|
+
"inputs": [],
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
inputs = await form.query_selector_all("input, textarea, select")
|
|
384
|
+
for input_elem in inputs:
|
|
385
|
+
input_data = {
|
|
386
|
+
"name": await input_elem.get_attribute("name") or "",
|
|
387
|
+
"type": await input_elem.get_attribute("type") or "text",
|
|
388
|
+
"value": await input_elem.get_attribute("value") or "",
|
|
389
|
+
}
|
|
390
|
+
form_data["inputs"].append(input_data)
|
|
391
|
+
|
|
392
|
+
forms.append(form_data)
|
|
393
|
+
except Exception as e:
|
|
394
|
+
logger.debug(f"Form extraction error: {e}")
|
|
395
|
+
|
|
396
|
+
return forms
|
|
397
|
+
|
|
398
|
+
async def _extract_links(self) -> list[str]:
|
|
399
|
+
"""Extract all links from page"""
|
|
400
|
+
links = []
|
|
401
|
+
try:
|
|
402
|
+
anchors = await self._page.query_selector_all("a[href]")
|
|
403
|
+
base_url = self._page.url
|
|
404
|
+
|
|
405
|
+
for anchor in anchors:
|
|
406
|
+
href = await anchor.get_attribute("href")
|
|
407
|
+
if href:
|
|
408
|
+
# Resolve relative URLs
|
|
409
|
+
full_url = urljoin(base_url, href)
|
|
410
|
+
if full_url not in links:
|
|
411
|
+
links.append(full_url)
|
|
412
|
+
except Exception as e:
|
|
413
|
+
logger.debug(f"Link extraction error: {e}")
|
|
414
|
+
|
|
415
|
+
return links
|
|
416
|
+
|
|
417
|
+
async def _extract_scripts(self) -> list[str]:
|
|
418
|
+
"""Extract script sources from page"""
|
|
419
|
+
scripts = []
|
|
420
|
+
try:
|
|
421
|
+
script_elements = await self._page.query_selector_all("script[src]")
|
|
422
|
+
for script in script_elements:
|
|
423
|
+
src = await script.get_attribute("src")
|
|
424
|
+
if src:
|
|
425
|
+
scripts.append(urljoin(self._page.url, src))
|
|
426
|
+
|
|
427
|
+
# Also get inline scripts
|
|
428
|
+
inline_scripts = await self._page.query_selector_all("script:not([src])")
|
|
429
|
+
for script in inline_scripts:
|
|
430
|
+
content = await script.text_content()
|
|
431
|
+
if content and len(content) > 10:
|
|
432
|
+
scripts.append(f"[inline:{len(content)} chars]")
|
|
433
|
+
except Exception:
|
|
434
|
+
pass
|
|
435
|
+
|
|
436
|
+
return scripts
|
|
437
|
+
|
|
438
|
+
def _on_console(self, message) -> None:
|
|
439
|
+
"""Handle console messages"""
|
|
440
|
+
self._console_logs.append(f"[{message.type}] {message.text}")
|
|
441
|
+
|
|
442
|
+
def _on_request(self, request) -> None:
|
|
443
|
+
"""Handle network requests"""
|
|
444
|
+
self._network_requests.append({
|
|
445
|
+
"url": request.url,
|
|
446
|
+
"method": request.method,
|
|
447
|
+
"resource_type": request.resource_type,
|
|
448
|
+
})
|
|
449
|
+
|
|
450
|
+
@property
|
|
451
|
+
def page(self) -> Optional[Page]:
|
|
452
|
+
"""Get underlying Playwright page"""
|
|
453
|
+
return self._page
|
|
454
|
+
|
|
455
|
+
@property
|
|
456
|
+
def current_url(self) -> str:
|
|
457
|
+
"""Get current page URL"""
|
|
458
|
+
return self._page.url if self._page else ""
|