atomicshop 2.11.47__py3-none-any.whl → 3.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicshop/__init__.py +1 -1
- atomicshop/{addons/mains → a_mains}/FACT/update_extract.py +3 -2
- atomicshop/a_mains/addons/process_list/compile.cmd +7 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/{addons → a_mains/addons}/process_list/process_list.cpp +8 -1
- atomicshop/a_mains/dns_gateway_setting.py +11 -0
- atomicshop/a_mains/get_local_tcp_ports.py +85 -0
- atomicshop/a_mains/github_wrapper.py +11 -0
- atomicshop/a_mains/install_ca_certificate.py +172 -0
- atomicshop/{addons/mains → a_mains}/msi_unpacker.py +3 -1
- atomicshop/a_mains/process_from_port.py +119 -0
- atomicshop/a_mains/set_default_dns_gateway.py +90 -0
- atomicshop/a_mains/update_config_toml.py +38 -0
- atomicshop/appointment_management.py +5 -3
- atomicshop/basics/ansi_escape_codes.py +3 -1
- atomicshop/basics/argparse_template.py +2 -0
- atomicshop/basics/booleans.py +27 -30
- atomicshop/basics/bytes_arrays.py +43 -0
- atomicshop/basics/classes.py +149 -1
- atomicshop/basics/dicts.py +12 -0
- atomicshop/basics/enums.py +2 -2
- atomicshop/basics/exceptions.py +5 -1
- atomicshop/basics/list_of_classes.py +29 -0
- atomicshop/basics/list_of_dicts.py +69 -5
- atomicshop/basics/lists.py +14 -0
- atomicshop/basics/multiprocesses.py +374 -50
- atomicshop/basics/package_module.py +10 -0
- atomicshop/basics/strings.py +160 -7
- atomicshop/basics/threads.py +14 -0
- atomicshop/basics/tracebacks.py +13 -4
- atomicshop/certificates.py +153 -52
- atomicshop/config_init.py +12 -7
- atomicshop/console_user_response.py +7 -14
- atomicshop/consoles.py +9 -0
- atomicshop/datetimes.py +98 -0
- atomicshop/diff_check.py +340 -40
- atomicshop/dns.py +128 -12
- atomicshop/etws/_pywintrace_fix.py +17 -0
- atomicshop/etws/const.py +38 -0
- atomicshop/etws/providers.py +21 -0
- atomicshop/etws/sessions.py +43 -0
- atomicshop/etws/trace.py +168 -0
- atomicshop/etws/traces/trace_dns.py +162 -0
- atomicshop/etws/traces/trace_sysmon_process_creation.py +126 -0
- atomicshop/etws/traces/trace_tcp.py +130 -0
- atomicshop/file_io/csvs.py +222 -24
- atomicshop/file_io/docxs.py +35 -18
- atomicshop/file_io/file_io.py +35 -19
- atomicshop/file_io/jsons.py +49 -0
- atomicshop/file_io/tomls.py +139 -0
- atomicshop/filesystem.py +864 -293
- atomicshop/get_process_list.py +133 -0
- atomicshop/{process_name_cmd.py → get_process_name_cmd_dll.py} +52 -19
- atomicshop/http_parse.py +149 -93
- atomicshop/ip_addresses.py +6 -1
- atomicshop/mitm/centered_settings.py +132 -0
- atomicshop/mitm/config_static.py +207 -0
- atomicshop/mitm/config_toml_editor.py +55 -0
- atomicshop/mitm/connection_thread_worker.py +875 -357
- atomicshop/mitm/engines/__parent/parser___parent.py +4 -17
- atomicshop/mitm/engines/__parent/recorder___parent.py +108 -51
- atomicshop/mitm/engines/__parent/requester___parent.py +116 -0
- atomicshop/mitm/engines/__parent/responder___parent.py +75 -114
- atomicshop/mitm/engines/__reference_general/parser___reference_general.py +10 -7
- atomicshop/mitm/engines/__reference_general/recorder___reference_general.py +5 -5
- atomicshop/mitm/engines/__reference_general/requester___reference_general.py +47 -0
- atomicshop/mitm/engines/__reference_general/responder___reference_general.py +95 -13
- atomicshop/mitm/engines/create_module_template.py +58 -14
- atomicshop/mitm/import_config.py +359 -139
- atomicshop/mitm/initialize_engines.py +160 -74
- atomicshop/mitm/message.py +64 -23
- atomicshop/mitm/mitm_main.py +892 -0
- atomicshop/mitm/recs_files.py +183 -0
- atomicshop/mitm/shared_functions.py +4 -10
- atomicshop/mitm/ssh_tester.py +82 -0
- atomicshop/mitm/statistic_analyzer.py +257 -166
- atomicshop/mitm/statistic_analyzer_helper/analyzer_helper.py +136 -0
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +525 -0
- atomicshop/monitor/change_monitor.py +96 -120
- atomicshop/monitor/checks/dns.py +139 -70
- atomicshop/monitor/checks/file.py +77 -0
- atomicshop/monitor/checks/network.py +81 -77
- atomicshop/monitor/checks/process_running.py +33 -34
- atomicshop/monitor/checks/url.py +94 -0
- atomicshop/networks.py +671 -0
- atomicshop/on_exit.py +205 -0
- atomicshop/package_mains_processor.py +84 -0
- atomicshop/permissions/permissions.py +22 -0
- atomicshop/permissions/ubuntu_permissions.py +239 -0
- atomicshop/permissions/win_permissions.py +33 -0
- atomicshop/print_api.py +24 -41
- atomicshop/process.py +63 -17
- atomicshop/process_poller/__init__.py +0 -0
- atomicshop/process_poller/pollers/__init__.py +0 -0
- atomicshop/process_poller/pollers/psutil_pywin32wmi_dll.py +95 -0
- atomicshop/process_poller/process_pool.py +207 -0
- atomicshop/process_poller/simple_process_pool.py +311 -0
- atomicshop/process_poller/tracer_base.py +45 -0
- atomicshop/process_poller/tracers/__init__.py +0 -0
- atomicshop/process_poller/tracers/event_log.py +46 -0
- atomicshop/process_poller/tracers/sysmon_etw.py +68 -0
- atomicshop/python_file_patcher.py +1 -1
- atomicshop/python_functions.py +27 -75
- atomicshop/question_answer_engine.py +2 -2
- atomicshop/scheduling.py +24 -5
- atomicshop/sound.py +4 -2
- atomicshop/speech_recognize.py +8 -0
- atomicshop/ssh_remote.py +158 -172
- atomicshop/startup/__init__.py +0 -0
- atomicshop/startup/win/__init__.py +0 -0
- atomicshop/startup/win/startup_folder.py +53 -0
- atomicshop/startup/win/task_scheduler.py +119 -0
- atomicshop/system_resource_monitor.py +61 -46
- atomicshop/system_resources.py +8 -8
- atomicshop/tempfiles.py +1 -2
- atomicshop/timer.py +30 -11
- atomicshop/urls.py +41 -0
- atomicshop/venvs.py +28 -0
- atomicshop/versioning.py +27 -0
- atomicshop/web.py +110 -25
- atomicshop/web_apis/__init__.py +0 -0
- atomicshop/web_apis/google_custom_search.py +44 -0
- atomicshop/web_apis/google_llm.py +188 -0
- atomicshop/websocket_parse.py +450 -0
- atomicshop/wrappers/certauthw/certauth.py +1 -0
- atomicshop/wrappers/cryptographyw.py +29 -8
- atomicshop/wrappers/ctyping/etw_winapi/__init__.py +0 -0
- atomicshop/wrappers/ctyping/etw_winapi/const.py +335 -0
- atomicshop/wrappers/ctyping/etw_winapi/etw_functions.py +393 -0
- atomicshop/wrappers/ctyping/file_details_winapi.py +67 -0
- atomicshop/wrappers/ctyping/msi_windows_installer/cabs.py +2 -1
- atomicshop/wrappers/ctyping/msi_windows_installer/extract_msi_main.py +13 -9
- atomicshop/wrappers/ctyping/msi_windows_installer/tables.py +35 -0
- atomicshop/wrappers/ctyping/setup_device.py +466 -0
- atomicshop/wrappers/ctyping/win_console.py +39 -0
- atomicshop/wrappers/dockerw/dockerw.py +113 -2
- atomicshop/wrappers/elasticsearchw/config_basic.py +0 -12
- atomicshop/wrappers/elasticsearchw/elastic_infra.py +75 -0
- atomicshop/wrappers/elasticsearchw/elasticsearchw.py +2 -20
- atomicshop/wrappers/factw/get_file_data.py +12 -5
- atomicshop/wrappers/factw/install/install_after_restart.py +89 -5
- atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py +20 -14
- atomicshop/wrappers/factw/postgresql/firmware.py +4 -6
- atomicshop/wrappers/githubw.py +583 -51
- atomicshop/wrappers/loggingw/consts.py +49 -0
- atomicshop/wrappers/loggingw/filters.py +102 -0
- atomicshop/wrappers/loggingw/formatters.py +58 -71
- atomicshop/wrappers/loggingw/handlers.py +459 -40
- atomicshop/wrappers/loggingw/loggers.py +19 -0
- atomicshop/wrappers/loggingw/loggingw.py +1010 -178
- atomicshop/wrappers/loggingw/reading.py +344 -19
- atomicshop/wrappers/mongodbw/__init__.py +0 -0
- atomicshop/wrappers/mongodbw/mongo_infra.py +31 -0
- atomicshop/wrappers/mongodbw/mongodbw.py +1432 -0
- atomicshop/wrappers/netshw.py +271 -0
- atomicshop/wrappers/playwrightw/engine.py +34 -19
- atomicshop/wrappers/playwrightw/infra.py +5 -0
- atomicshop/wrappers/playwrightw/javascript.py +7 -3
- atomicshop/wrappers/playwrightw/keyboard.py +14 -0
- atomicshop/wrappers/playwrightw/scenarios.py +172 -5
- atomicshop/wrappers/playwrightw/waits.py +9 -7
- atomicshop/wrappers/powershell_networking.py +80 -0
- atomicshop/wrappers/psutilw/processes.py +81 -0
- atomicshop/wrappers/psutilw/psutil_networks.py +85 -0
- atomicshop/wrappers/psutilw/psutilw.py +9 -0
- atomicshop/wrappers/pyopensslw.py +9 -2
- atomicshop/wrappers/pywin32w/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/cert_store.py +116 -0
- atomicshop/wrappers/pywin32w/console.py +34 -0
- atomicshop/wrappers/pywin32w/win_event_log/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/fetch.py +174 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribe.py +212 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_create.py +57 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_terminate.py +49 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/schannel_logging.py +97 -0
- atomicshop/wrappers/pywin32w/winshell.py +19 -0
- atomicshop/wrappers/pywin32w/wmis/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/wmis/msft_netipaddress.py +113 -0
- atomicshop/wrappers/pywin32w/wmis/win32_networkadapterconfiguration.py +259 -0
- atomicshop/wrappers/pywin32w/wmis/win32networkadapter.py +112 -0
- atomicshop/wrappers/pywin32w/wmis/wmi_helpers.py +236 -0
- atomicshop/wrappers/socketw/accepter.py +21 -7
- atomicshop/wrappers/socketw/certificator.py +216 -150
- atomicshop/wrappers/socketw/creator.py +190 -50
- atomicshop/wrappers/socketw/dns_server.py +500 -173
- atomicshop/wrappers/socketw/exception_wrapper.py +45 -52
- atomicshop/wrappers/socketw/process_getter.py +86 -0
- atomicshop/wrappers/socketw/receiver.py +144 -102
- atomicshop/wrappers/socketw/sender.py +65 -35
- atomicshop/wrappers/socketw/sni.py +334 -165
- atomicshop/wrappers/socketw/socket_base.py +134 -0
- atomicshop/wrappers/socketw/socket_client.py +137 -95
- atomicshop/wrappers/socketw/socket_server_tester.py +14 -9
- atomicshop/wrappers/socketw/socket_wrapper.py +717 -116
- atomicshop/wrappers/socketw/ssl_base.py +15 -14
- atomicshop/wrappers/socketw/statistics_csv.py +148 -17
- atomicshop/wrappers/sysmonw.py +157 -0
- atomicshop/wrappers/ubuntu_terminal.py +65 -26
- atomicshop/wrappers/win_auditw.py +189 -0
- atomicshop/wrappers/winregw/__init__.py +0 -0
- atomicshop/wrappers/winregw/winreg_installed_software.py +58 -0
- atomicshop/wrappers/winregw/winreg_network.py +232 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/METADATA +31 -49
- atomicshop-3.10.5.dist-info/RECORD +306 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/WHEEL +1 -1
- atomicshop/_basics_temp.py +0 -101
- atomicshop/addons/a_setup_scripts/install_psycopg2_ubuntu.sh +0 -3
- atomicshop/addons/a_setup_scripts/install_pywintrace_0.3.cmd +0 -2
- atomicshop/addons/mains/install_docker_rootless_ubuntu.py +0 -11
- atomicshop/addons/mains/install_docker_ubuntu_main_sudo.py +0 -11
- atomicshop/addons/mains/install_elastic_search_and_kibana_ubuntu.py +0 -10
- atomicshop/addons/mains/install_wsl_ubuntu_lts_admin.py +0 -9
- atomicshop/addons/package_setup/CreateWheel.cmd +0 -7
- atomicshop/addons/package_setup/Setup in Edit mode.cmd +0 -6
- atomicshop/addons/package_setup/Setup.cmd +0 -7
- atomicshop/addons/process_list/compile.cmd +0 -2
- atomicshop/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/archiver/_search_in_zip.py +0 -189
- atomicshop/archiver/archiver.py +0 -34
- atomicshop/archiver/search_in_archive.py +0 -250
- atomicshop/archiver/sevenz_app_w.py +0 -86
- atomicshop/archiver/sevenzs.py +0 -44
- atomicshop/archiver/zips.py +0 -293
- atomicshop/etw/dns_trace.py +0 -118
- atomicshop/etw/etw.py +0 -61
- atomicshop/file_types.py +0 -24
- atomicshop/mitm/engines/create_module_template_example.py +0 -13
- atomicshop/mitm/initialize_mitm_server.py +0 -240
- atomicshop/monitor/checks/hash.py +0 -44
- atomicshop/monitor/checks/hash_checks/file.py +0 -55
- atomicshop/monitor/checks/hash_checks/url.py +0 -62
- atomicshop/pbtkmultifile_argparse.py +0 -88
- atomicshop/permissions.py +0 -110
- atomicshop/process_poller.py +0 -237
- atomicshop/script_as_string_processor.py +0 -38
- atomicshop/ssh_scripts/process_from_ipv4.py +0 -37
- atomicshop/ssh_scripts/process_from_port.py +0 -27
- atomicshop/wrappers/_process_wrapper_curl.py +0 -27
- atomicshop/wrappers/_process_wrapper_tar.py +0 -21
- atomicshop/wrappers/dockerw/install_docker.py +0 -209
- atomicshop/wrappers/elasticsearchw/infrastructure.py +0 -265
- atomicshop/wrappers/elasticsearchw/install_elastic.py +0 -232
- atomicshop/wrappers/ffmpegw.py +0 -125
- atomicshop/wrappers/loggingw/checks.py +0 -20
- atomicshop/wrappers/nodejsw/install_nodejs.py +0 -139
- atomicshop/wrappers/process_wrapper_pbtk.py +0 -16
- atomicshop/wrappers/socketw/base.py +0 -59
- atomicshop/wrappers/socketw/get_process.py +0 -107
- atomicshop/wrappers/wslw.py +0 -191
- atomicshop-2.11.47.dist-info/RECORD +0 -251
- /atomicshop/{addons/mains → a_mains}/FACT/factw_fact_extractor_docker_image_main_sudo.py +0 -0
- /atomicshop/{addons → a_mains/addons}/PlayWrightCodegen.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/ScriptExecution.cmd +0 -0
- /atomicshop/{addons/mains → a_mains/addons}/inits/init_to_import_all_modules.py +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/ReadMe.txt +0 -0
- /atomicshop/{addons/mains → a_mains}/search_for_hyperlinks_in_docx.py +0 -0
- /atomicshop/{archiver → etws}/__init__.py +0 -0
- /atomicshop/{etw → etws/traces}/__init__.py +0 -0
- /atomicshop/{monitor/checks/hash_checks → mitm/statistic_analyzer_helper}/__init__.py +0 -0
- /atomicshop/{wrappers/nodejsw → permissions}/__init__.py +0 -0
- /atomicshop/wrappers/pywin32w/{wmi_win32process.py → wmis/win32process.py} +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info/licenses}/LICENSE.txt +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
import re
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
|
|
5
|
+
# ── regex helpers ─────────────────────────────────────────────────────────
|
|
6
|
+
IP_PATTERN = r'(?:\d{1,3}\.){3}\d{1,3}'
|
|
7
|
+
RE_ADAPTER_HEADER = re.compile(r'Configuration for interface +"([^"]+)"', re.I)
|
|
8
|
+
RE_NUMERIC = re.compile(r'\d+')
|
|
9
|
+
RE_SUBNET = re.compile(rf'(?P<prefix>{IP_PATTERN}/\d+)\s+\(mask\s+(?P<mask>{IP_PATTERN})', re.I)
|
|
10
|
+
RE_IP = re.compile(IP_PATTERN)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_netsh_show_config() -> str:
|
|
14
|
+
"""Run `netsh interface ipv4 show config` and return the raw text."""
|
|
15
|
+
return subprocess.check_output(
|
|
16
|
+
["netsh", "interface", "ipv4", "show", "config"],
|
|
17
|
+
text=True, encoding="utf-8", errors="ignore"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# noinspection PyUnresolvedReferences
|
|
22
|
+
def get_netsh_ipv4() -> List[Dict[str, Any]]:
|
|
23
|
+
"""
|
|
24
|
+
Parse *all* data from `netsh interface ipv4 show config`.
|
|
25
|
+
|
|
26
|
+
Returns a list of dicts – one per adapter – with keys:
|
|
27
|
+
interface, dhcp_enabled, ip_addresses, subnet_prefixes, subnet_masks,
|
|
28
|
+
default_gateways, gateway_metric, interface_metric,
|
|
29
|
+
dns_mode, dns_servers, wins_mode, wins_servers
|
|
30
|
+
"""
|
|
31
|
+
config_text = get_netsh_show_config()
|
|
32
|
+
|
|
33
|
+
adapters: List[Dict[str, Any]] = []
|
|
34
|
+
adapter: Dict[str, Any] | None = None
|
|
35
|
+
|
|
36
|
+
# Track whether we’re in continuation lines of DNS / WINS lists
|
|
37
|
+
dns_list_type: str | None = None # 'static' | 'dynamic' | None
|
|
38
|
+
wins_list_type: str | None = None
|
|
39
|
+
|
|
40
|
+
for raw_line in config_text.splitlines():
|
|
41
|
+
line = raw_line.strip()
|
|
42
|
+
|
|
43
|
+
# 1) New adapter block ------------------------------------------------
|
|
44
|
+
header_match = RE_ADAPTER_HEADER.search(line)
|
|
45
|
+
if header_match:
|
|
46
|
+
# Flush the previous adapter, if any
|
|
47
|
+
if adapter:
|
|
48
|
+
adapters.append(adapter)
|
|
49
|
+
|
|
50
|
+
iface_name = header_match.group(1)
|
|
51
|
+
adapter = {
|
|
52
|
+
'interface_name' : iface_name,
|
|
53
|
+
'dhcp_enabled' : None,
|
|
54
|
+
'gateway_metric' : None,
|
|
55
|
+
'interface_metric' : None,
|
|
56
|
+
'dns_mode' : 'unknown',
|
|
57
|
+
'wins_mode' : 'unknown',
|
|
58
|
+
'dns_servers' : [],
|
|
59
|
+
'wins_servers' : [],
|
|
60
|
+
'ip_addresses' : [],
|
|
61
|
+
'subnet_prefixes' : [],
|
|
62
|
+
'subnet_masks' : [],
|
|
63
|
+
'default_gateways' : [],
|
|
64
|
+
}
|
|
65
|
+
dns_list_type = wins_list_type = None
|
|
66
|
+
continue
|
|
67
|
+
|
|
68
|
+
if adapter is None: # skip prologue lines
|
|
69
|
+
continue
|
|
70
|
+
|
|
71
|
+
# 2) DHCP flag -------------------------------------------------------
|
|
72
|
+
if line.startswith("DHCP enabled"):
|
|
73
|
+
adapter['dhcp_enabled'] = "yes" in line.lower()
|
|
74
|
+
continue
|
|
75
|
+
|
|
76
|
+
# 3) IP addresses ----------------------------------------------------
|
|
77
|
+
if line.startswith("IP Address"):
|
|
78
|
+
adapter['ip_addresses'].extend(RE_IP.findall(line))
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
# 4) Subnet prefix & mask -------------------------------------------
|
|
82
|
+
if line.startswith("Subnet Prefix"):
|
|
83
|
+
subnet_match = RE_SUBNET.search(line)
|
|
84
|
+
if subnet_match:
|
|
85
|
+
adapter['subnet_prefixes'].append(subnet_match.group('prefix'))
|
|
86
|
+
adapter['subnet_masks'].append(subnet_match.group('mask'))
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
# 5) Gateway & metrics ----------------------------------------------
|
|
90
|
+
if line.startswith("Default Gateway"):
|
|
91
|
+
adapter['default_gateways'].extend(RE_IP.findall(line))
|
|
92
|
+
continue
|
|
93
|
+
if line.startswith("Gateway Metric"):
|
|
94
|
+
metric = RE_NUMERIC.search(line)
|
|
95
|
+
if metric:
|
|
96
|
+
adapter['gateway_metric'] = int(metric.group())
|
|
97
|
+
continue
|
|
98
|
+
if line.startswith("InterfaceMetric"):
|
|
99
|
+
metric = RE_NUMERIC.search(line)
|
|
100
|
+
if metric:
|
|
101
|
+
adapter['interface_metric'] = int(metric.group())
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
# 6) DNS header lines -----------------------------------------------
|
|
105
|
+
if "DNS servers configured through DHCP" in line:
|
|
106
|
+
adapter['dns_mode'] = 'dynamic'
|
|
107
|
+
adapter['dns_servers'].extend(RE_IP.findall(line))
|
|
108
|
+
dns_list_type = 'dynamic'
|
|
109
|
+
continue
|
|
110
|
+
if "Statically Configured DNS Servers" in line:
|
|
111
|
+
adapter['dns_mode'] = 'static'
|
|
112
|
+
adapter['dns_servers'].extend(RE_IP.findall(line))
|
|
113
|
+
dns_list_type = 'static'
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
# 7) WINS header lines ----------------------------------------------
|
|
117
|
+
if "WINS servers configured through DHCP" in line:
|
|
118
|
+
adapter['wins_mode'] = 'dynamic'
|
|
119
|
+
adapter['wins_servers'].extend(RE_IP.findall(line))
|
|
120
|
+
wins_list_type = 'dynamic'
|
|
121
|
+
continue
|
|
122
|
+
if line.startswith(("Primary WINS Server", "Secondary WINS Server")):
|
|
123
|
+
adapter['wins_mode'] = 'static'
|
|
124
|
+
adapter['wins_servers'].extend(RE_IP.findall(line))
|
|
125
|
+
wins_list_type = 'static'
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
# 8) Continuation lines for DNS / WINS -------------------------------
|
|
129
|
+
if dns_list_type and RE_IP.search(line):
|
|
130
|
+
adapter['dns_servers'].extend(RE_IP.findall(line))
|
|
131
|
+
continue
|
|
132
|
+
if wins_list_type and RE_IP.search(line):
|
|
133
|
+
adapter['wins_servers'].extend(RE_IP.findall(line))
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Flush the final adapter block
|
|
137
|
+
if adapter:
|
|
138
|
+
adapters.append(adapter)
|
|
139
|
+
|
|
140
|
+
# # ── post-process: detect “mixed” modes ----------------------------------
|
|
141
|
+
# NOT SURE THIS PART WORKS AS INTENDED!!!
|
|
142
|
+
# for ad in adapters:
|
|
143
|
+
# if ad['dns_mode'] == 'dynamic' and ad['dns_servers']:
|
|
144
|
+
# # If both headers appeared the last one wins; treat that as mixed
|
|
145
|
+
# if any(k in ad['dns_servers'] for k in ad['default_gateways']):
|
|
146
|
+
# ad['dns_mode'] = 'mixed'
|
|
147
|
+
# if ad['wins_mode'] == 'dynamic' and ad['wins_servers']:
|
|
148
|
+
# if any(ip not in ad['wins_servers'] for ip in ad['wins_servers']):
|
|
149
|
+
# ad['wins_mode'] = 'mixed'
|
|
150
|
+
|
|
151
|
+
return adapters
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def run_netsh(*args: str) -> subprocess.CompletedProcess:
|
|
155
|
+
"""
|
|
156
|
+
Run a netsh command and return stdout as text.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
run_netsh("interface", "ipv4", "show", "interfaces")
|
|
160
|
+
"""
|
|
161
|
+
cmd = ["netsh"] + list(args)
|
|
162
|
+
result = subprocess.run(
|
|
163
|
+
cmd,
|
|
164
|
+
capture_output=True,
|
|
165
|
+
text=True,
|
|
166
|
+
check=False
|
|
167
|
+
)
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def enable_dhcp_static_coexistence(interface_name: str) -> subprocess.CompletedProcess:
|
|
172
|
+
"""
|
|
173
|
+
Enable DHCP + static IP coexistence on an interface.
|
|
174
|
+
|
|
175
|
+
Equivalent to:
|
|
176
|
+
netsh interface ipv4 set interface "Ethernet0" dhcpstaticipcoexistence=enabled
|
|
177
|
+
"""
|
|
178
|
+
return run_netsh(
|
|
179
|
+
"interface", "ipv4", "set", "interface",
|
|
180
|
+
interface_name,
|
|
181
|
+
"dhcpstaticipcoexistence=enabled"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def disable_dhcp_static_coexistence(interface_name: str) -> subprocess.CompletedProcess:
|
|
186
|
+
"""
|
|
187
|
+
Disable DHCP + static IP coexistence on an interface (optional).
|
|
188
|
+
|
|
189
|
+
Equivalent to:
|
|
190
|
+
netsh interface ipv4 set interface "Ethernet0" dhcpstaticipcoexistence=disabled
|
|
191
|
+
"""
|
|
192
|
+
return run_netsh(
|
|
193
|
+
"interface", "ipv4", "set", "interface",
|
|
194
|
+
interface_name,
|
|
195
|
+
"dhcpstaticipcoexistence=disabled"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def add_virtual_ip(
|
|
200
|
+
interface_name: str,
|
|
201
|
+
ip: str,
|
|
202
|
+
mask: str,
|
|
203
|
+
skip_as_source: bool = True
|
|
204
|
+
) -> subprocess.CompletedProcess:
|
|
205
|
+
"""
|
|
206
|
+
Add a static 'virtual' IP to a DHCP interface, keeping DHCP intact.
|
|
207
|
+
|
|
208
|
+
Equivalent to:
|
|
209
|
+
netsh interface ipv4 add address "Ethernet0" 192.168.1.201 255.255.255.0 skipassource=true
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
interface_name: Interface name, e.g. "Ethernet0"
|
|
213
|
+
ip: IP to add, e.g. "192.168.1.201"
|
|
214
|
+
mask: Subnet mask, e.g. "255.255.255.0"
|
|
215
|
+
skip_as_source: If True, adds 'skipassource=true' so Windows does
|
|
216
|
+
not prefer this IP as the outbound source address.
|
|
217
|
+
"""
|
|
218
|
+
args = [
|
|
219
|
+
"interface", "ipv4", "add", "address",
|
|
220
|
+
interface_name,
|
|
221
|
+
ip,
|
|
222
|
+
mask,
|
|
223
|
+
]
|
|
224
|
+
if skip_as_source:
|
|
225
|
+
args.append("skipassource=true")
|
|
226
|
+
|
|
227
|
+
return run_netsh(*args)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def remove_virtual_ip(
|
|
231
|
+
interface_name: str,
|
|
232
|
+
ip: str
|
|
233
|
+
) -> subprocess.CompletedProcess:
|
|
234
|
+
"""
|
|
235
|
+
Remove a previously added virtual IP from the interface.
|
|
236
|
+
|
|
237
|
+
Equivalent to:
|
|
238
|
+
netsh interface ipv4 delete address "Ethernet0" addr=192.168.1.201
|
|
239
|
+
"""
|
|
240
|
+
return run_netsh(
|
|
241
|
+
"interface", "ipv4", "delete", "address",
|
|
242
|
+
interface_name,
|
|
243
|
+
f"addr={ip}"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def show_interface_config(
|
|
248
|
+
interface_name: Optional[str] = None
|
|
249
|
+
) -> subprocess.CompletedProcess:
|
|
250
|
+
"""
|
|
251
|
+
Show IPv4 configuration for all interfaces or a specific one.
|
|
252
|
+
|
|
253
|
+
Equivalent to:
|
|
254
|
+
netsh interface ipv4 show config
|
|
255
|
+
or:
|
|
256
|
+
netsh interface ipv4 show config "Ethernet0"
|
|
257
|
+
"""
|
|
258
|
+
if interface_name:
|
|
259
|
+
return run_netsh("interface", "ipv4", "show", "config", interface_name)
|
|
260
|
+
else:
|
|
261
|
+
return run_netsh("interface", "ipv4", "show", "config")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def list_ipv4_interfaces() -> subprocess.CompletedProcess:
|
|
265
|
+
"""
|
|
266
|
+
List IPv4 interfaces.
|
|
267
|
+
|
|
268
|
+
Equivalent to:
|
|
269
|
+
netsh interface ipv4 show interfaces
|
|
270
|
+
"""
|
|
271
|
+
return run_netsh("interface", "ipv4", "show", "interfaces")
|
|
@@ -5,14 +5,15 @@ import random
|
|
|
5
5
|
import getpass
|
|
6
6
|
from tempfile import gettempdir
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
# noinspection PyPackageRequirements
|
|
9
|
+
from playwright.sync_api import sync_playwright, Error
|
|
10
|
+
# Stealth options for playwright. External.
|
|
11
|
+
# from playwright_stealth import stealth_sync
|
|
12
|
+
|
|
9
13
|
from ...keyboard_press import send_alt_tab
|
|
10
|
-
from ... import filesystem
|
|
14
|
+
from ... import filesystem, print_api
|
|
11
15
|
|
|
12
|
-
|
|
13
|
-
from playwright.sync_api import sync_playwright
|
|
14
|
-
# Stealth options for playwright. External.
|
|
15
|
-
from playwright_stealth import stealth_sync
|
|
16
|
+
from . import infra
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class PlaywrightEngine:
|
|
@@ -22,8 +23,11 @@ class PlaywrightEngine:
|
|
|
22
23
|
|
|
23
24
|
def __init__(
|
|
24
25
|
self,
|
|
25
|
-
browser: str = 'chromium',
|
|
26
|
-
|
|
26
|
+
browser: str = 'chromium',
|
|
27
|
+
headless: bool = False,
|
|
28
|
+
incognito_mode: bool = True,
|
|
29
|
+
browser_content_working_directory: str = None
|
|
30
|
+
):
|
|
27
31
|
"""
|
|
28
32
|
:param browser: string, specifies which browser will be executed. Playwright default is 'chromium'.
|
|
29
33
|
:param headless: boolean, specifies if browser will be executed in headless mode. Default is 'False'.
|
|
@@ -70,7 +74,15 @@ class PlaywrightEngine:
|
|
|
70
74
|
# Also, 'sync_playwright()' has 'start()' function that executes the '__enter__()' function.
|
|
71
75
|
# So, we can execute only that.
|
|
72
76
|
self.playwright = sync_playwright().start()
|
|
73
|
-
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
self.execute_browser()
|
|
80
|
+
except Error as e:
|
|
81
|
+
if "BrowserType.launch: Executable doesn't exist" in e.message:
|
|
82
|
+
infra.install_playwright()
|
|
83
|
+
self.execute_browser()
|
|
84
|
+
else:
|
|
85
|
+
raise e
|
|
74
86
|
|
|
75
87
|
def stop(self):
|
|
76
88
|
# Close browser objects. You can only close browser without stopping playwright.
|
|
@@ -80,8 +92,7 @@ class PlaywrightEngine:
|
|
|
80
92
|
# creates '.stop' attribute for 'playwright' object, which gets the '__exit__' function reference name.
|
|
81
93
|
# playwright.stop = self.__exit__
|
|
82
94
|
# So, we can call 'playwright.stop()' in order to close the object without 'with' statement.
|
|
83
|
-
|
|
84
|
-
self.playwright.stop
|
|
95
|
+
self.playwright.stop()
|
|
85
96
|
|
|
86
97
|
def execute_browser(self) -> None:
|
|
87
98
|
""" Execute browser based on mode """
|
|
@@ -139,7 +150,7 @@ class PlaywrightEngine:
|
|
|
139
150
|
self.page = self.browser.new_page()
|
|
140
151
|
|
|
141
152
|
# Making playwright stealthier with less footprint of automation.
|
|
142
|
-
stealth_sync(self.page)
|
|
153
|
+
# stealth_sync(self.page)
|
|
143
154
|
|
|
144
155
|
def close_browser(self) -> None:
|
|
145
156
|
self.page.close()
|
|
@@ -170,9 +181,13 @@ class PlaywrightEngine:
|
|
|
170
181
|
self.page.wait_for_selector(f'{element}[{attribute}="{value}"]')
|
|
171
182
|
|
|
172
183
|
def login(
|
|
173
|
-
self,
|
|
174
|
-
|
|
175
|
-
|
|
184
|
+
self,
|
|
185
|
+
url_login: str,
|
|
186
|
+
user_box_text: str = str(),
|
|
187
|
+
pass_box_text: str = str(),
|
|
188
|
+
submit_button_text: str = str(),
|
|
189
|
+
username: str = str(),
|
|
190
|
+
password: str = str(),
|
|
176
191
|
credential_single_usage: bool = False
|
|
177
192
|
) -> None:
|
|
178
193
|
"""
|
|
@@ -242,7 +257,7 @@ class PlaywrightEngine:
|
|
|
242
257
|
for i in range(element_count):
|
|
243
258
|
string_current = string_current + element.nth(i).text_content()
|
|
244
259
|
|
|
245
|
-
print_api(f'Current element text of [{locator_string}]: {string_current}', rtl=True)
|
|
260
|
+
print_api.print_api(f'Current element text of [{locator_string}]: {string_current}', rtl=True)
|
|
246
261
|
|
|
247
262
|
# If text from previous cycle isn't the same as text from current cycle, then put the new value to the
|
|
248
263
|
# previous one and return 'True' since the text really changed.
|
|
@@ -312,7 +327,7 @@ class PlaywrightEngine:
|
|
|
312
327
|
# Nullifying 'string_previous', so new loop will not have the same one as previous loop in case of error.
|
|
313
328
|
self.string_previous = str()
|
|
314
329
|
|
|
315
|
-
print_api('Finished execution Time: ' + str(datetime.datetime.now()), **kwargs)
|
|
316
|
-
print_api('Waiting minutes: ' + str(time_to_sleep_minutes), **kwargs)
|
|
330
|
+
print_api.print_api('Finished execution Time: ' + str(datetime.datetime.now()), **kwargs)
|
|
331
|
+
print_api.print_api('Waiting minutes: ' + str(time_to_sleep_minutes), **kwargs)
|
|
317
332
|
time.sleep(time_to_sleep_minutes * 60)
|
|
318
|
-
print_api('-----------------------------------------', **kwargs)
|
|
333
|
+
print_api.print_api('-----------------------------------------', **kwargs)
|
|
@@ -44,8 +44,12 @@ def get_page_text_content(page) -> str:
|
|
|
44
44
|
:return: string, text content of the page.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
47
|
+
# Full javascript.
|
|
48
|
+
# text_content: str = page.evaluate('''() => {
|
|
49
|
+
# return document.body.innerText;
|
|
50
|
+
# }''')
|
|
51
|
+
|
|
52
|
+
# Short javascript.
|
|
53
|
+
text_content: str = page.evaluate("document.body.innerText")
|
|
50
54
|
|
|
51
55
|
return text_content
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
def type_text(page, text):
|
|
2
|
+
# Directly type into the focused field (if the cursor is already there)
|
|
3
|
+
page.keyboard.type(text)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def press_key(page, key: str):
|
|
7
|
+
"""
|
|
8
|
+
Press a key on the keyboard.
|
|
9
|
+
:param page: playwright page
|
|
10
|
+
:param key: str, the key to press. Example: 'Enter'.
|
|
11
|
+
:return:
|
|
12
|
+
"""
|
|
13
|
+
# Optionally, you can press Enter or other keys as needed
|
|
14
|
+
page.keyboard.press(key)
|
|
@@ -3,8 +3,16 @@ Scenarios file contains full execution scenarios of playwright wrapper.
|
|
|
3
3
|
For example: run playwright, navigate to URL, get text from a locator.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
from typing import Literal
|
|
8
|
+
|
|
9
|
+
from playwright.sync_api import sync_playwright
|
|
10
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
11
|
+
from bs4 import BeautifulSoup
|
|
12
|
+
|
|
6
13
|
from . import engine, base, combos
|
|
7
14
|
from ...basics import threads, multiprocesses
|
|
15
|
+
from ... import web
|
|
8
16
|
|
|
9
17
|
|
|
10
18
|
def get_text_from_html_tag(url: str, tag_name: str, attribute: str, value: str) -> str:
|
|
@@ -43,8 +51,12 @@ def get_text_from_html_tag(url: str, tag_name: str, attribute: str, value: str)
|
|
|
43
51
|
|
|
44
52
|
|
|
45
53
|
def get_page_content(
|
|
46
|
-
url: str,
|
|
47
|
-
|
|
54
|
+
url: str,
|
|
55
|
+
page_format: str = 'html',
|
|
56
|
+
path: str = None,
|
|
57
|
+
pdf_format: str = 'A4',
|
|
58
|
+
html_txt_convert_to_bytes: bool = True,
|
|
59
|
+
print_kwargs: dict = None
|
|
48
60
|
) -> any:
|
|
49
61
|
"""
|
|
50
62
|
The function receives playwright engine and page object, navigates to URL, gets page content in specified format,
|
|
@@ -57,10 +69,10 @@ def get_page_content(
|
|
|
57
69
|
'png' - returns png binary.
|
|
58
70
|
'jpeg' - returns jpeg binary.
|
|
59
71
|
:param path: string of path to save the file to. Default is None.
|
|
60
|
-
:param print_kwargs: dict, that contains all the arguments for 'print_api' function.
|
|
61
72
|
:param pdf_format: string of pdf format, applicable only if 'page_format=pdf'. Default is 'A4'.
|
|
62
73
|
:param html_txt_convert_to_bytes: boolean, applicable only if 'page_format=html' or 'page_format=txt'.
|
|
63
74
|
Default is True.
|
|
75
|
+
:param print_kwargs: dict, that contains all the arguments for 'print_api' function.
|
|
64
76
|
|
|
65
77
|
:return: any page content in specified format.
|
|
66
78
|
"""
|
|
@@ -95,8 +107,13 @@ def get_page_content(
|
|
|
95
107
|
|
|
96
108
|
|
|
97
109
|
def get_page_content_in_thread(
|
|
98
|
-
url: str,
|
|
99
|
-
|
|
110
|
+
url: str,
|
|
111
|
+
page_format: str = 'html',
|
|
112
|
+
path: str = None,
|
|
113
|
+
pdf_format: str = 'A4',
|
|
114
|
+
html_txt_convert_to_bytes: bool = True,
|
|
115
|
+
print_kwargs: dict = None
|
|
116
|
+
):
|
|
100
117
|
"""
|
|
101
118
|
The function uses 'threads.thread_wrap_var' function in order to wrap the function 'get_page_content' and
|
|
102
119
|
execute it in a thread with arguments and return the result.
|
|
@@ -130,3 +147,153 @@ def _get_page_content_in_process(
|
|
|
130
147
|
html_txt_convert_to_bytes=html_txt_convert_to_bytes,
|
|
131
148
|
print_kwargs=print_kwargs
|
|
132
149
|
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def fetch_urls_content_in_threads(
|
|
153
|
+
urls: list[str],
|
|
154
|
+
number_of_characters_per_link: int,
|
|
155
|
+
text_fetch_method: Literal[
|
|
156
|
+
'playwright_text',
|
|
157
|
+
'js_text',
|
|
158
|
+
'playwright_html',
|
|
159
|
+
'js_html',
|
|
160
|
+
'playwright_copypaste'
|
|
161
|
+
]
|
|
162
|
+
) -> list[str]:
|
|
163
|
+
""" The function to fetch all URLs concurrently using threads """
|
|
164
|
+
contents = []
|
|
165
|
+
|
|
166
|
+
# Use ThreadPoolExecutor to run multiple threads
|
|
167
|
+
with ThreadPoolExecutor() as executor:
|
|
168
|
+
# Submit tasks for each URL
|
|
169
|
+
future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link, text_fetch_method): url for url in urls}
|
|
170
|
+
|
|
171
|
+
# Collect results as they complete
|
|
172
|
+
for future in as_completed(future_to_url):
|
|
173
|
+
url = future_to_url[future]
|
|
174
|
+
try:
|
|
175
|
+
data = future.result()
|
|
176
|
+
contents.append(data)
|
|
177
|
+
except Exception as exc:
|
|
178
|
+
print(f"An error occurred when fetching {url}: {exc}")
|
|
179
|
+
|
|
180
|
+
return contents
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def fetch_urls_content(
|
|
184
|
+
urls: list[str],
|
|
185
|
+
number_of_characters_per_link: int,
|
|
186
|
+
text_fetch_method: Literal[
|
|
187
|
+
'playwright_text',
|
|
188
|
+
'js_text',
|
|
189
|
+
'playwright_html',
|
|
190
|
+
'js_html',
|
|
191
|
+
'playwright_copypaste'
|
|
192
|
+
],
|
|
193
|
+
) -> list[str]:
|
|
194
|
+
""" The function to fetch all URLs not concurrently without using threads """
|
|
195
|
+
contents = []
|
|
196
|
+
|
|
197
|
+
for url in urls:
|
|
198
|
+
data = _fetch_content(url, number_of_characters_per_link, text_fetch_method)
|
|
199
|
+
contents.append(data)
|
|
200
|
+
|
|
201
|
+
return contents
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _fetch_content(
|
|
205
|
+
url,
|
|
206
|
+
number_of_characters_per_link,
|
|
207
|
+
text_fetch_method: Literal[
|
|
208
|
+
'playwright_text',
|
|
209
|
+
'js_text',
|
|
210
|
+
'playwright_html',
|
|
211
|
+
'playwright_html_to_text',
|
|
212
|
+
'js_html',
|
|
213
|
+
'js_html_to_text',
|
|
214
|
+
'playwright_copypaste'
|
|
215
|
+
],
|
|
216
|
+
headless: bool = True):
|
|
217
|
+
""" Function to fetch content from a single URL using the synchronous Playwright API """
|
|
218
|
+
|
|
219
|
+
with sync_playwright() as p:
|
|
220
|
+
browser = p.chromium.launch(headless=headless) # Set headless=True if you don't want to see the browser
|
|
221
|
+
|
|
222
|
+
user_agent: str = web.USER_AGENTS['Chrome 142.0.0 Windows 10/11 x64']
|
|
223
|
+
|
|
224
|
+
if text_fetch_method == "playwright_copypaste":
|
|
225
|
+
context = browser.new_context(permissions=["clipboard-read", "clipboard-write"], user_agent=user_agent)
|
|
226
|
+
else:
|
|
227
|
+
context = browser.new_context(user_agent=user_agent)
|
|
228
|
+
|
|
229
|
+
page = context.new_page()
|
|
230
|
+
|
|
231
|
+
# from playwright_stealth import stealth_sync
|
|
232
|
+
# stealth_sync(page)
|
|
233
|
+
|
|
234
|
+
# # Block specific script by URL or partial URL match
|
|
235
|
+
# def block_script(route):
|
|
236
|
+
# if "custom.js" in route.request.url:
|
|
237
|
+
# print(f"Blocking: {route.request.url}")
|
|
238
|
+
# route.abort() # Block the request
|
|
239
|
+
# else:
|
|
240
|
+
# route.continue_() # Allow other requests
|
|
241
|
+
#
|
|
242
|
+
# # Intercept and handle network requests
|
|
243
|
+
# page.route("**/*", block_script)
|
|
244
|
+
|
|
245
|
+
page.goto(url)
|
|
246
|
+
|
|
247
|
+
# Wait for the page to load using all possible methods, since there is no specific method
|
|
248
|
+
# that will work for all websites.
|
|
249
|
+
page.wait_for_load_state("load", timeout=5000)
|
|
250
|
+
page.wait_for_load_state("domcontentloaded", timeout=5000)
|
|
251
|
+
# The above is not enough, wait for network to stop loading files.
|
|
252
|
+
response_list: list = []
|
|
253
|
+
while True:
|
|
254
|
+
try:
|
|
255
|
+
# "**/*" is the wildcard for all URLs.
|
|
256
|
+
# 'page.expect_response' will wait for the response to be received, and then return the response object.
|
|
257
|
+
# When timeout is reached, it will raise a TimeoutError, which will break the while loop.
|
|
258
|
+
with page.expect_response("**/*", timeout=2000) as response_info:
|
|
259
|
+
response_list.append(response_info.value)
|
|
260
|
+
except PlaywrightTimeoutError:
|
|
261
|
+
break
|
|
262
|
+
|
|
263
|
+
if text_fetch_method == "playwright_text":
|
|
264
|
+
text_content = page.inner_text('body')
|
|
265
|
+
elif text_fetch_method == "js_text":
|
|
266
|
+
# Use JavaScript to extract only the visible text from the page
|
|
267
|
+
text_content: str = page.evaluate("document.body.innerText")
|
|
268
|
+
elif "playwright_html" in text_fetch_method:
|
|
269
|
+
# Get the full HTML content of the page
|
|
270
|
+
text_content = page.content()
|
|
271
|
+
elif "js_html" in text_fetch_method:
|
|
272
|
+
# Use JavaScript to extract the full text from the page
|
|
273
|
+
text_content = page.evaluate('document.documentElement.outerHTML')
|
|
274
|
+
elif text_fetch_method == "playwright_copypaste":
|
|
275
|
+
# Focus the page and simulate Ctrl+A and Ctrl+C
|
|
276
|
+
page.keyboard.press("Control+a") # Select all text
|
|
277
|
+
page.keyboard.press("Control+c") # Copy text to clipboard
|
|
278
|
+
# Retrieve copied text from the clipboard
|
|
279
|
+
text_content = page.evaluate("navigator.clipboard.readText()")
|
|
280
|
+
else:
|
|
281
|
+
raise ValueError(f"Invalid text_fetch_method: {text_fetch_method}")
|
|
282
|
+
|
|
283
|
+
if "to_text" in text_fetch_method:
|
|
284
|
+
# Convert HTML to plain text using BeautifulSoup
|
|
285
|
+
soup = BeautifulSoup(text_content, "html.parser")
|
|
286
|
+
text_content = soup.get_text()
|
|
287
|
+
|
|
288
|
+
# text = page.evaluate('document.body.textContent')
|
|
289
|
+
# text = page.eval_on_selector('body', 'element => element.innerText')
|
|
290
|
+
# text = page.eval_on_selector('body', 'element => element.textContent')
|
|
291
|
+
# text = page.inner_text('body')
|
|
292
|
+
# text = page.text_content('body')
|
|
293
|
+
|
|
294
|
+
# text = page.evaluate('document.documentElement.innerText')
|
|
295
|
+
# text = page.inner_text(':root')
|
|
296
|
+
|
|
297
|
+
browser.close()
|
|
298
|
+
# Return only the first X characters of the text content to not overload the LLM.
|
|
299
|
+
return text_content[:number_of_characters_per_link]
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
-
from
|
|
1
|
+
from ... import print_api
|
|
2
2
|
|
|
3
|
+
# noinspection PyPackageRequirements
|
|
3
4
|
from playwright.sync_api import expect
|
|
4
5
|
# This is from official docs: https://playwright.dev/python/docs/api/class-timeouterror
|
|
6
|
+
# noinspection PyPackageRequirements
|
|
5
7
|
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
6
8
|
|
|
7
9
|
|
|
@@ -124,7 +126,7 @@ def network_fully_idle(page, timeout: int = 2000, print_kwargs: dict = None) ->
|
|
|
124
126
|
# 'page.expect_response' will wait for the response to be received, and then return the response object.
|
|
125
127
|
# When timeout is reached, it will raise a TimeoutError, which will break the while loop.
|
|
126
128
|
with page.expect_response("**/*", timeout=timeout) as response_info:
|
|
127
|
-
print_api(response_info.value, **print_kwargs)
|
|
129
|
+
print_api.print_api(response_info.value, **(print_kwargs or {}))
|
|
128
130
|
except PlaywrightTimeoutError:
|
|
129
131
|
break
|
|
130
132
|
|
|
@@ -151,13 +153,13 @@ def maximum_idle(page, print_kwargs: dict = None) -> None:
|
|
|
151
153
|
:return: None
|
|
152
154
|
"""
|
|
153
155
|
|
|
154
|
-
print_api('Before wait_for_load', **print_kwargs)
|
|
156
|
+
print_api.print_api('Before wait_for_load', **(print_kwargs or {}))
|
|
155
157
|
load(page)
|
|
156
|
-
print_api('After wait_for_load, Before wait_for_domcontentloaded', **print_kwargs)
|
|
158
|
+
print_api.print_api('After wait_for_load, Before wait_for_domcontentloaded', **(print_kwargs or {}))
|
|
157
159
|
domcontentloaded(page)
|
|
158
|
-
print_api('After wait_for_domcontentloaded', **print_kwargs)
|
|
160
|
+
print_api.print_api('After wait_for_domcontentloaded', **(print_kwargs or {}))
|
|
159
161
|
# For some reason 'networkidle' can result in timeout errors, so currently this is disabled.
|
|
160
162
|
# networkidle(page)
|
|
161
|
-
print_api('Before wait_for_network_fully_idle', **print_kwargs)
|
|
163
|
+
print_api.print_api('Before wait_for_network_fully_idle', **(print_kwargs or {}))
|
|
162
164
|
network_fully_idle(page, print_kwargs=print_kwargs)
|
|
163
|
-
print_api('After wait_for_network_fully_idle', **print_kwargs)
|
|
165
|
+
print_api.print_api('After wait_for_network_fully_idle', **(print_kwargs or {}))
|