atomicshop 2.11.47__py3-none-any.whl → 3.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicshop/__init__.py +1 -1
- atomicshop/{addons/mains → a_mains}/FACT/update_extract.py +3 -2
- atomicshop/a_mains/addons/process_list/compile.cmd +7 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/a_mains/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/{addons → a_mains/addons}/process_list/process_list.cpp +8 -1
- atomicshop/a_mains/dns_gateway_setting.py +11 -0
- atomicshop/a_mains/get_local_tcp_ports.py +85 -0
- atomicshop/a_mains/github_wrapper.py +11 -0
- atomicshop/a_mains/install_ca_certificate.py +172 -0
- atomicshop/{addons/mains → a_mains}/msi_unpacker.py +3 -1
- atomicshop/a_mains/process_from_port.py +119 -0
- atomicshop/a_mains/set_default_dns_gateway.py +90 -0
- atomicshop/a_mains/update_config_toml.py +38 -0
- atomicshop/appointment_management.py +5 -3
- atomicshop/basics/ansi_escape_codes.py +3 -1
- atomicshop/basics/argparse_template.py +2 -0
- atomicshop/basics/booleans.py +27 -30
- atomicshop/basics/bytes_arrays.py +43 -0
- atomicshop/basics/classes.py +149 -1
- atomicshop/basics/dicts.py +12 -0
- atomicshop/basics/enums.py +2 -2
- atomicshop/basics/exceptions.py +5 -1
- atomicshop/basics/list_of_classes.py +29 -0
- atomicshop/basics/list_of_dicts.py +69 -5
- atomicshop/basics/lists.py +14 -0
- atomicshop/basics/multiprocesses.py +374 -50
- atomicshop/basics/package_module.py +10 -0
- atomicshop/basics/strings.py +160 -7
- atomicshop/basics/threads.py +14 -0
- atomicshop/basics/tracebacks.py +13 -4
- atomicshop/certificates.py +153 -52
- atomicshop/config_init.py +12 -7
- atomicshop/console_user_response.py +7 -14
- atomicshop/consoles.py +9 -0
- atomicshop/datetimes.py +98 -0
- atomicshop/diff_check.py +340 -40
- atomicshop/dns.py +128 -12
- atomicshop/etws/_pywintrace_fix.py +17 -0
- atomicshop/etws/const.py +38 -0
- atomicshop/etws/providers.py +21 -0
- atomicshop/etws/sessions.py +43 -0
- atomicshop/etws/trace.py +168 -0
- atomicshop/etws/traces/trace_dns.py +162 -0
- atomicshop/etws/traces/trace_sysmon_process_creation.py +126 -0
- atomicshop/etws/traces/trace_tcp.py +130 -0
- atomicshop/file_io/csvs.py +222 -24
- atomicshop/file_io/docxs.py +35 -18
- atomicshop/file_io/file_io.py +35 -19
- atomicshop/file_io/jsons.py +49 -0
- atomicshop/file_io/tomls.py +139 -0
- atomicshop/filesystem.py +864 -293
- atomicshop/get_process_list.py +133 -0
- atomicshop/{process_name_cmd.py → get_process_name_cmd_dll.py} +52 -19
- atomicshop/http_parse.py +149 -93
- atomicshop/ip_addresses.py +6 -1
- atomicshop/mitm/centered_settings.py +132 -0
- atomicshop/mitm/config_static.py +207 -0
- atomicshop/mitm/config_toml_editor.py +55 -0
- atomicshop/mitm/connection_thread_worker.py +875 -357
- atomicshop/mitm/engines/__parent/parser___parent.py +4 -17
- atomicshop/mitm/engines/__parent/recorder___parent.py +108 -51
- atomicshop/mitm/engines/__parent/requester___parent.py +116 -0
- atomicshop/mitm/engines/__parent/responder___parent.py +75 -114
- atomicshop/mitm/engines/__reference_general/parser___reference_general.py +10 -7
- atomicshop/mitm/engines/__reference_general/recorder___reference_general.py +5 -5
- atomicshop/mitm/engines/__reference_general/requester___reference_general.py +47 -0
- atomicshop/mitm/engines/__reference_general/responder___reference_general.py +95 -13
- atomicshop/mitm/engines/create_module_template.py +58 -14
- atomicshop/mitm/import_config.py +359 -139
- atomicshop/mitm/initialize_engines.py +160 -74
- atomicshop/mitm/message.py +64 -23
- atomicshop/mitm/mitm_main.py +892 -0
- atomicshop/mitm/recs_files.py +183 -0
- atomicshop/mitm/shared_functions.py +4 -10
- atomicshop/mitm/ssh_tester.py +82 -0
- atomicshop/mitm/statistic_analyzer.py +257 -166
- atomicshop/mitm/statistic_analyzer_helper/analyzer_helper.py +136 -0
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +525 -0
- atomicshop/monitor/change_monitor.py +96 -120
- atomicshop/monitor/checks/dns.py +139 -70
- atomicshop/monitor/checks/file.py +77 -0
- atomicshop/monitor/checks/network.py +81 -77
- atomicshop/monitor/checks/process_running.py +33 -34
- atomicshop/monitor/checks/url.py +94 -0
- atomicshop/networks.py +671 -0
- atomicshop/on_exit.py +205 -0
- atomicshop/package_mains_processor.py +84 -0
- atomicshop/permissions/permissions.py +22 -0
- atomicshop/permissions/ubuntu_permissions.py +239 -0
- atomicshop/permissions/win_permissions.py +33 -0
- atomicshop/print_api.py +24 -41
- atomicshop/process.py +63 -17
- atomicshop/process_poller/__init__.py +0 -0
- atomicshop/process_poller/pollers/__init__.py +0 -0
- atomicshop/process_poller/pollers/psutil_pywin32wmi_dll.py +95 -0
- atomicshop/process_poller/process_pool.py +207 -0
- atomicshop/process_poller/simple_process_pool.py +311 -0
- atomicshop/process_poller/tracer_base.py +45 -0
- atomicshop/process_poller/tracers/__init__.py +0 -0
- atomicshop/process_poller/tracers/event_log.py +46 -0
- atomicshop/process_poller/tracers/sysmon_etw.py +68 -0
- atomicshop/python_file_patcher.py +1 -1
- atomicshop/python_functions.py +27 -75
- atomicshop/question_answer_engine.py +2 -2
- atomicshop/scheduling.py +24 -5
- atomicshop/sound.py +4 -2
- atomicshop/speech_recognize.py +8 -0
- atomicshop/ssh_remote.py +158 -172
- atomicshop/startup/__init__.py +0 -0
- atomicshop/startup/win/__init__.py +0 -0
- atomicshop/startup/win/startup_folder.py +53 -0
- atomicshop/startup/win/task_scheduler.py +119 -0
- atomicshop/system_resource_monitor.py +61 -46
- atomicshop/system_resources.py +8 -8
- atomicshop/tempfiles.py +1 -2
- atomicshop/timer.py +30 -11
- atomicshop/urls.py +41 -0
- atomicshop/venvs.py +28 -0
- atomicshop/versioning.py +27 -0
- atomicshop/web.py +110 -25
- atomicshop/web_apis/__init__.py +0 -0
- atomicshop/web_apis/google_custom_search.py +44 -0
- atomicshop/web_apis/google_llm.py +188 -0
- atomicshop/websocket_parse.py +450 -0
- atomicshop/wrappers/certauthw/certauth.py +1 -0
- atomicshop/wrappers/cryptographyw.py +29 -8
- atomicshop/wrappers/ctyping/etw_winapi/__init__.py +0 -0
- atomicshop/wrappers/ctyping/etw_winapi/const.py +335 -0
- atomicshop/wrappers/ctyping/etw_winapi/etw_functions.py +393 -0
- atomicshop/wrappers/ctyping/file_details_winapi.py +67 -0
- atomicshop/wrappers/ctyping/msi_windows_installer/cabs.py +2 -1
- atomicshop/wrappers/ctyping/msi_windows_installer/extract_msi_main.py +13 -9
- atomicshop/wrappers/ctyping/msi_windows_installer/tables.py +35 -0
- atomicshop/wrappers/ctyping/setup_device.py +466 -0
- atomicshop/wrappers/ctyping/win_console.py +39 -0
- atomicshop/wrappers/dockerw/dockerw.py +113 -2
- atomicshop/wrappers/elasticsearchw/config_basic.py +0 -12
- atomicshop/wrappers/elasticsearchw/elastic_infra.py +75 -0
- atomicshop/wrappers/elasticsearchw/elasticsearchw.py +2 -20
- atomicshop/wrappers/factw/get_file_data.py +12 -5
- atomicshop/wrappers/factw/install/install_after_restart.py +89 -5
- atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py +20 -14
- atomicshop/wrappers/factw/postgresql/firmware.py +4 -6
- atomicshop/wrappers/githubw.py +583 -51
- atomicshop/wrappers/loggingw/consts.py +49 -0
- atomicshop/wrappers/loggingw/filters.py +102 -0
- atomicshop/wrappers/loggingw/formatters.py +58 -71
- atomicshop/wrappers/loggingw/handlers.py +459 -40
- atomicshop/wrappers/loggingw/loggers.py +19 -0
- atomicshop/wrappers/loggingw/loggingw.py +1010 -178
- atomicshop/wrappers/loggingw/reading.py +344 -19
- atomicshop/wrappers/mongodbw/__init__.py +0 -0
- atomicshop/wrappers/mongodbw/mongo_infra.py +31 -0
- atomicshop/wrappers/mongodbw/mongodbw.py +1432 -0
- atomicshop/wrappers/netshw.py +271 -0
- atomicshop/wrappers/playwrightw/engine.py +34 -19
- atomicshop/wrappers/playwrightw/infra.py +5 -0
- atomicshop/wrappers/playwrightw/javascript.py +7 -3
- atomicshop/wrappers/playwrightw/keyboard.py +14 -0
- atomicshop/wrappers/playwrightw/scenarios.py +172 -5
- atomicshop/wrappers/playwrightw/waits.py +9 -7
- atomicshop/wrappers/powershell_networking.py +80 -0
- atomicshop/wrappers/psutilw/processes.py +81 -0
- atomicshop/wrappers/psutilw/psutil_networks.py +85 -0
- atomicshop/wrappers/psutilw/psutilw.py +9 -0
- atomicshop/wrappers/pyopensslw.py +9 -2
- atomicshop/wrappers/pywin32w/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/cert_store.py +116 -0
- atomicshop/wrappers/pywin32w/console.py +34 -0
- atomicshop/wrappers/pywin32w/win_event_log/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/fetch.py +174 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribe.py +212 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_create.py +57 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_terminate.py +49 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/schannel_logging.py +97 -0
- atomicshop/wrappers/pywin32w/winshell.py +19 -0
- atomicshop/wrappers/pywin32w/wmis/__init__.py +0 -0
- atomicshop/wrappers/pywin32w/wmis/msft_netipaddress.py +113 -0
- atomicshop/wrappers/pywin32w/wmis/win32_networkadapterconfiguration.py +259 -0
- atomicshop/wrappers/pywin32w/wmis/win32networkadapter.py +112 -0
- atomicshop/wrappers/pywin32w/wmis/wmi_helpers.py +236 -0
- atomicshop/wrappers/socketw/accepter.py +21 -7
- atomicshop/wrappers/socketw/certificator.py +216 -150
- atomicshop/wrappers/socketw/creator.py +190 -50
- atomicshop/wrappers/socketw/dns_server.py +500 -173
- atomicshop/wrappers/socketw/exception_wrapper.py +45 -52
- atomicshop/wrappers/socketw/process_getter.py +86 -0
- atomicshop/wrappers/socketw/receiver.py +144 -102
- atomicshop/wrappers/socketw/sender.py +65 -35
- atomicshop/wrappers/socketw/sni.py +334 -165
- atomicshop/wrappers/socketw/socket_base.py +134 -0
- atomicshop/wrappers/socketw/socket_client.py +137 -95
- atomicshop/wrappers/socketw/socket_server_tester.py +14 -9
- atomicshop/wrappers/socketw/socket_wrapper.py +717 -116
- atomicshop/wrappers/socketw/ssl_base.py +15 -14
- atomicshop/wrappers/socketw/statistics_csv.py +148 -17
- atomicshop/wrappers/sysmonw.py +157 -0
- atomicshop/wrappers/ubuntu_terminal.py +65 -26
- atomicshop/wrappers/win_auditw.py +189 -0
- atomicshop/wrappers/winregw/__init__.py +0 -0
- atomicshop/wrappers/winregw/winreg_installed_software.py +58 -0
- atomicshop/wrappers/winregw/winreg_network.py +232 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/METADATA +31 -49
- atomicshop-3.10.5.dist-info/RECORD +306 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/WHEEL +1 -1
- atomicshop/_basics_temp.py +0 -101
- atomicshop/addons/a_setup_scripts/install_psycopg2_ubuntu.sh +0 -3
- atomicshop/addons/a_setup_scripts/install_pywintrace_0.3.cmd +0 -2
- atomicshop/addons/mains/install_docker_rootless_ubuntu.py +0 -11
- atomicshop/addons/mains/install_docker_ubuntu_main_sudo.py +0 -11
- atomicshop/addons/mains/install_elastic_search_and_kibana_ubuntu.py +0 -10
- atomicshop/addons/mains/install_wsl_ubuntu_lts_admin.py +0 -9
- atomicshop/addons/package_setup/CreateWheel.cmd +0 -7
- atomicshop/addons/package_setup/Setup in Edit mode.cmd +0 -6
- atomicshop/addons/package_setup/Setup.cmd +0 -7
- atomicshop/addons/process_list/compile.cmd +0 -2
- atomicshop/addons/process_list/compiled/Win10x64/process_list.dll +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.exp +0 -0
- atomicshop/addons/process_list/compiled/Win10x64/process_list.lib +0 -0
- atomicshop/archiver/_search_in_zip.py +0 -189
- atomicshop/archiver/archiver.py +0 -34
- atomicshop/archiver/search_in_archive.py +0 -250
- atomicshop/archiver/sevenz_app_w.py +0 -86
- atomicshop/archiver/sevenzs.py +0 -44
- atomicshop/archiver/zips.py +0 -293
- atomicshop/etw/dns_trace.py +0 -118
- atomicshop/etw/etw.py +0 -61
- atomicshop/file_types.py +0 -24
- atomicshop/mitm/engines/create_module_template_example.py +0 -13
- atomicshop/mitm/initialize_mitm_server.py +0 -240
- atomicshop/monitor/checks/hash.py +0 -44
- atomicshop/monitor/checks/hash_checks/file.py +0 -55
- atomicshop/monitor/checks/hash_checks/url.py +0 -62
- atomicshop/pbtkmultifile_argparse.py +0 -88
- atomicshop/permissions.py +0 -110
- atomicshop/process_poller.py +0 -237
- atomicshop/script_as_string_processor.py +0 -38
- atomicshop/ssh_scripts/process_from_ipv4.py +0 -37
- atomicshop/ssh_scripts/process_from_port.py +0 -27
- atomicshop/wrappers/_process_wrapper_curl.py +0 -27
- atomicshop/wrappers/_process_wrapper_tar.py +0 -21
- atomicshop/wrappers/dockerw/install_docker.py +0 -209
- atomicshop/wrappers/elasticsearchw/infrastructure.py +0 -265
- atomicshop/wrappers/elasticsearchw/install_elastic.py +0 -232
- atomicshop/wrappers/ffmpegw.py +0 -125
- atomicshop/wrappers/loggingw/checks.py +0 -20
- atomicshop/wrappers/nodejsw/install_nodejs.py +0 -139
- atomicshop/wrappers/process_wrapper_pbtk.py +0 -16
- atomicshop/wrappers/socketw/base.py +0 -59
- atomicshop/wrappers/socketw/get_process.py +0 -107
- atomicshop/wrappers/wslw.py +0 -191
- atomicshop-2.11.47.dist-info/RECORD +0 -251
- /atomicshop/{addons/mains → a_mains}/FACT/factw_fact_extractor_docker_image_main_sudo.py +0 -0
- /atomicshop/{addons → a_mains/addons}/PlayWrightCodegen.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/ScriptExecution.cmd +0 -0
- /atomicshop/{addons/mains → a_mains/addons}/inits/init_to_import_all_modules.py +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/ReadMe.txt +0 -0
- /atomicshop/{addons/mains → a_mains}/search_for_hyperlinks_in_docx.py +0 -0
- /atomicshop/{archiver → etws}/__init__.py +0 -0
- /atomicshop/{etw → etws/traces}/__init__.py +0 -0
- /atomicshop/{monitor/checks/hash_checks → mitm/statistic_analyzer_helper}/__init__.py +0 -0
- /atomicshop/{wrappers/nodejsw → permissions}/__init__.py +0 -0
- /atomicshop/wrappers/pywin32w/{wmi_win32process.py → wmis/win32process.py} +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info/licenses}/LICENSE.txt +0 -0
- {atomicshop-2.11.47.dist-info → atomicshop-3.10.5.dist-info}/top_level.txt +0 -0
atomicshop/web.py
CHANGED
|
@@ -1,20 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import urllib.request
|
|
3
|
+
import urllib.error
|
|
4
|
+
import ssl
|
|
5
|
+
from typing import Any
|
|
6
|
+
import http.client
|
|
7
|
+
|
|
8
|
+
# noinspection PyPackageRequirements
|
|
9
|
+
import certifi
|
|
10
|
+
from dkarchiver.arch_wrappers import zips
|
|
3
11
|
|
|
4
|
-
from .print_api import print_api
|
|
5
|
-
from .archiver import zips
|
|
6
12
|
from .urls import url_parser
|
|
7
13
|
from .file_io import file_io
|
|
8
14
|
from .wrappers.playwrightw import scenarios
|
|
9
|
-
from . import filesystem
|
|
15
|
+
from . import filesystem, print_api
|
|
10
16
|
|
|
11
17
|
|
|
12
18
|
# https://www.useragents.me/
|
|
13
19
|
# https://user-agents.net/
|
|
14
20
|
USER_AGENTS = {
|
|
15
|
-
'
|
|
16
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
|
17
|
-
|
|
21
|
+
'Chrome 111.0.0 Windows_10/11 x64':
|
|
22
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
|
23
|
+
'Chrome 132.0.0 Windows 10/11 x64':
|
|
24
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
|
25
|
+
'Chrome 142.0.0 Windows 10/11 x64':
|
|
26
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'
|
|
18
27
|
}
|
|
19
28
|
|
|
20
29
|
|
|
@@ -26,10 +35,10 @@ def is_status_ok(status_code: int, **kwargs) -> bool:
|
|
|
26
35
|
"""
|
|
27
36
|
|
|
28
37
|
if status_code != 200:
|
|
29
|
-
print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
|
|
38
|
+
print_api.print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
|
|
30
39
|
return False
|
|
31
40
|
else:
|
|
32
|
-
print_api('URL Status: 200 OK', color="green", **kwargs)
|
|
41
|
+
print_api.print_api('URL Status: 200 OK', color="green", **kwargs)
|
|
33
42
|
return True
|
|
34
43
|
|
|
35
44
|
|
|
@@ -68,7 +77,7 @@ def get_page_bytes(
|
|
|
68
77
|
raise ValueError('ERROR: [user_agent] specified and [chrome_user_agent] usage is [True]. Choose one.')
|
|
69
78
|
|
|
70
79
|
if chrome_user_agent:
|
|
71
|
-
user_agent = USER_AGENTS['
|
|
80
|
+
user_agent = USER_AGENTS['Chrome 142.0.0 Windows 10/11 x64']
|
|
72
81
|
|
|
73
82
|
if user_agent:
|
|
74
83
|
# Create a 'Request' object with the URL and user agent.
|
|
@@ -88,10 +97,13 @@ def get_page_bytes(
|
|
|
88
97
|
|
|
89
98
|
|
|
90
99
|
def get_page_content(
|
|
91
|
-
url: str,
|
|
100
|
+
url: str,
|
|
101
|
+
get_method: str = 'urllib',
|
|
102
|
+
path: str = None,
|
|
92
103
|
playwright_pdf_format: str = 'A4',
|
|
93
104
|
playwright_html_txt_convert_to_bytes: bool = True,
|
|
94
|
-
print_kwargs: dict = None
|
|
105
|
+
print_kwargs: dict = None
|
|
106
|
+
) -> Any:
|
|
95
107
|
"""
|
|
96
108
|
Function returns the page content from the given URL.
|
|
97
109
|
|
|
@@ -144,23 +156,41 @@ def get_page_content(
|
|
|
144
156
|
return result
|
|
145
157
|
|
|
146
158
|
|
|
147
|
-
def download(
|
|
159
|
+
def download(
|
|
160
|
+
file_url: str,
|
|
161
|
+
target_directory: str = None,
|
|
162
|
+
file_name: str = None,
|
|
163
|
+
headers: dict = None,
|
|
164
|
+
overwrite: bool = False,
|
|
165
|
+
# use_certifi_ca_repository: bool = False,
|
|
166
|
+
**kwargs
|
|
167
|
+
) -> str | None:
|
|
148
168
|
"""
|
|
149
169
|
The function receives url and target filesystem directory to download the file.
|
|
150
170
|
|
|
171
|
+
Note: Install 'pip-system-certs' package if you want to use system's CA store for SSL context
|
|
172
|
+
in an environment where 'certifi' package is installed.
|
|
173
|
+
|
|
151
174
|
:param file_url: full URL to download the file.
|
|
152
175
|
:param target_directory: The directory on the filesystem to save the file to.
|
|
176
|
+
If not specified, temporary directory will be used.
|
|
153
177
|
:param file_name: string, file name (example: file.zip) that you want the downloaded file to be saved as.
|
|
154
178
|
If not specified, the default filename from 'file_url' will be used.
|
|
179
|
+
:param headers: dictionary, HTTP headers to use when downloading the file.
|
|
180
|
+
:param overwrite: boolean, if True, the file will be overwritten if it already exists.
|
|
181
|
+
If False, the file will not be overwritten and the function will return None if the file already exists.
|
|
182
|
+
Default is False.
|
|
183
|
+
:param use_certifi_ca_repository: boolean, if True, the certifi CA store will be used for SSL context
|
|
184
|
+
instead of the system's default CA store.
|
|
155
185
|
:return: string, full file path of downloaded file. If download failed, 'None' will be returned.
|
|
156
186
|
"""
|
|
157
187
|
|
|
158
188
|
def print_to_console(print_end=None):
|
|
159
189
|
if file_size_bytes_int:
|
|
160
|
-
print_api(
|
|
190
|
+
print_api.print_api(
|
|
161
191
|
f'Downloaded bytes: {aggregated_bytes_int} / {file_size_bytes_int}', print_end=print_end, **kwargs)
|
|
162
192
|
else:
|
|
163
|
-
print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
|
|
193
|
+
print_api.print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
|
|
164
194
|
|
|
165
195
|
# Size of the buffer to read each time from url.
|
|
166
196
|
buffer_size: int = 4096
|
|
@@ -170,20 +200,66 @@ def download(file_url: str, target_directory: str, file_name: str = None, **kwar
|
|
|
170
200
|
# Get only the filename from URL.
|
|
171
201
|
file_name = get_filename_from_url(file_url=file_url)
|
|
172
202
|
|
|
203
|
+
# If 'target_directory' wasn't specified, we will use the temporary directory.
|
|
204
|
+
if not target_directory:
|
|
205
|
+
target_directory = filesystem.get_temp_directory()
|
|
206
|
+
|
|
173
207
|
# Build full path to file.
|
|
174
208
|
file_path: str = f'{target_directory}{os.sep}{file_name}'
|
|
175
209
|
|
|
176
|
-
|
|
210
|
+
if os.path.exists(file_path):
|
|
211
|
+
if overwrite:
|
|
212
|
+
print_api.print_api(f'File already exists: {file_path}. Overwriting...', **kwargs)
|
|
213
|
+
else:
|
|
214
|
+
print_api.print_api(f'File already exists: {file_path}. Skipping download.', **kwargs)
|
|
215
|
+
return file_path
|
|
216
|
+
|
|
217
|
+
print_api.print_api(f'Downloading: {file_url}', **kwargs)
|
|
218
|
+
print_api.print_api(f'To: {file_path}', **kwargs)
|
|
219
|
+
|
|
220
|
+
# Open the URL for data gathering with SSL context.
|
|
221
|
+
# if not use_certifi_ca_repository:
|
|
222
|
+
# # Create a default SSL context using the system's CA store.
|
|
223
|
+
# ssl_context = ssl.create_default_context()
|
|
224
|
+
# else:
|
|
225
|
+
|
|
226
|
+
# Create a default SSL context using the certifi CA store.
|
|
227
|
+
# This is useful for environments where the system's CA store is not available or not trusted.
|
|
228
|
+
# 'certifi.where()' returns the path to the certifi CA bundle.
|
|
229
|
+
ssl_context: ssl.SSLContext = ssl.create_default_context(cafile=certifi.where())
|
|
177
230
|
|
|
178
231
|
# In order to use 'urllib.request', it is not enough to 'import urllib', you need to 'import urllib.request'.
|
|
179
|
-
#
|
|
180
|
-
|
|
232
|
+
# Build a Request object with headers if provided.
|
|
233
|
+
req = urllib.request.Request(file_url, headers=headers or {})
|
|
234
|
+
|
|
235
|
+
def do_urlopen(ssl_context_internal: ssl.SSLContext) -> http.client.HTTPResponse | None:
|
|
236
|
+
try:
|
|
237
|
+
response: http.client.HTTPResponse = urllib.request.urlopen(req, context=ssl_context_internal)
|
|
238
|
+
return response
|
|
239
|
+
except urllib.error.URLError as e:
|
|
240
|
+
if getattr(e, 'reason', None) and isinstance(e.reason, ssl.SSLCertVerificationError):
|
|
241
|
+
if getattr(e.reason, 'reason', None) and e.reason.reason == 'CERTIFICATE_VERIFY_FAILED':
|
|
242
|
+
if getattr(e.reason, 'verify_message', None) and e.reason.verify_message == 'unable to get local issuer certificate':
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
raise e
|
|
246
|
+
|
|
247
|
+
# Try to open the URL with the created SSL context with certifi.
|
|
248
|
+
file_to_download = do_urlopen(ssl_context_internal=ssl_context)
|
|
249
|
+
if not file_to_download:
|
|
250
|
+
# If failed, try to open the URL with the system's default SSL context.
|
|
251
|
+
ssl_context = ssl.create_default_context()
|
|
252
|
+
file_to_download = do_urlopen(ssl_context_internal=ssl_context)
|
|
253
|
+
if not file_to_download:
|
|
254
|
+
print_api.print_api(
|
|
255
|
+
'ERROR: URL open failed with both certifi and system\'s default SSL context.', error_type=True, **kwargs)
|
|
256
|
+
return None
|
|
181
257
|
|
|
182
258
|
# Check status of url.
|
|
183
259
|
if not is_status_ok(status_code=file_to_download.status, **kwargs):
|
|
184
260
|
return None
|
|
185
261
|
|
|
186
|
-
file_size_bytes_int: int = None
|
|
262
|
+
file_size_bytes_int: int | None = None
|
|
187
263
|
# Get file size. For some reason doesn't show for GitHub branch downloads.
|
|
188
264
|
if file_to_download.headers['Content-Length']:
|
|
189
265
|
file_size_bytes_int = int(file_to_download.headers['Content-Length'])
|
|
@@ -208,19 +284,27 @@ def download(file_url: str, target_directory: str, file_name: str = None, **kwar
|
|
|
208
284
|
else:
|
|
209
285
|
print_to_console()
|
|
210
286
|
break
|
|
287
|
+
|
|
211
288
|
if aggregated_bytes_int == file_size_bytes_int:
|
|
212
|
-
print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
|
|
289
|
+
print_api.print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
|
|
290
|
+
elif file_size_bytes_int is None:
|
|
291
|
+
pass
|
|
213
292
|
else:
|
|
214
293
|
message = f'Download failed: {aggregated_bytes_int} / {file_size_bytes_int}. File: {file_path}'
|
|
215
|
-
print_api(
|
|
294
|
+
print_api.print_api(
|
|
216
295
|
message, error_type=True, color="red", **kwargs)
|
|
217
296
|
|
|
218
297
|
return file_path
|
|
219
298
|
|
|
220
299
|
|
|
221
300
|
def download_and_extract_file(
|
|
222
|
-
file_url: str,
|
|
223
|
-
|
|
301
|
+
file_url: str,
|
|
302
|
+
target_directory: str,
|
|
303
|
+
file_name: str = str(),
|
|
304
|
+
archive_remove_first_directory: bool = False,
|
|
305
|
+
headers: dict = None,
|
|
306
|
+
**kwargs
|
|
307
|
+
):
|
|
224
308
|
"""
|
|
225
309
|
This function will download the branch file from GitHub, extract the file and remove the file, leaving
|
|
226
310
|
only the extracted folder.
|
|
@@ -230,18 +314,19 @@ def download_and_extract_file(
|
|
|
230
314
|
Default is empty. If it is empty, then the filename will be extracted from 'file_url'.
|
|
231
315
|
:param target_directory: string, target directory where to save the file.
|
|
232
316
|
:param archive_remove_first_directory: boolean, sets if archive extract function will extract the archive without
|
|
233
|
-
first directory in the archive. Check reference in the 'extract_archive_with_zipfile' function.
|
|
317
|
+
first directory in the archive. Check reference in the 'dkarchiver.arch_wrappers.zips.extract_archive_with_zipfile' function.
|
|
318
|
+
:param headers: dictionary, HTTP headers to use when downloading the file.
|
|
234
319
|
:return:
|
|
235
320
|
"""
|
|
236
321
|
|
|
237
322
|
# Download the repo to current working directory and return full file path of downloaded file.
|
|
238
323
|
file_path = download(
|
|
239
|
-
file_url=file_url, target_directory=target_directory, file_name=file_name, **kwargs)
|
|
324
|
+
file_url=file_url, target_directory=target_directory, file_name=file_name, headers=headers, **kwargs)
|
|
240
325
|
|
|
241
326
|
# Extract the archive and remove the first directory.
|
|
242
327
|
zips.extract_archive_with_zipfile(
|
|
243
328
|
archive_path=f'{file_path}', extract_directory=target_directory,
|
|
244
|
-
remove_first_directory=archive_remove_first_directory
|
|
329
|
+
remove_first_directory=archive_remove_first_directory)
|
|
245
330
|
|
|
246
331
|
# Remove the archive file.
|
|
247
332
|
filesystem.remove_file(file_path=f'{file_path}', **kwargs)
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
import googleapiclient.errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def search_google(
|
|
8
|
+
query: str,
|
|
9
|
+
api_key: str,
|
|
10
|
+
search_engine_id: str
|
|
11
|
+
) -> tuple[
|
|
12
|
+
Union[list[str], None],
|
|
13
|
+
str]:
|
|
14
|
+
"""
|
|
15
|
+
Function to search Google using Google Custom Search API for links related to a query.
|
|
16
|
+
:param query: string, the search query to search on Google Custom Search.
|
|
17
|
+
:param api_key: string, the API key for the Google Custom Search API.
|
|
18
|
+
:param search_engine_id: string, the search engine ID for the Google Custom Search API.
|
|
19
|
+
|
|
20
|
+
:return: tuple(list of strings - the links related to the query, string - the error message if any)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# noinspection PyTypeChecker
|
|
24
|
+
error: str = None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
service = build("customsearch", "v1", developerKey=api_key)
|
|
28
|
+
result = service.cse().list(
|
|
29
|
+
q=query,
|
|
30
|
+
cx=search_engine_id,
|
|
31
|
+
# gl="us", # Country code
|
|
32
|
+
# lr="lang_en", # Language restriction
|
|
33
|
+
# safe="off", # Safe search off
|
|
34
|
+
# dateRestrict="m1" # Restrict results to the last month
|
|
35
|
+
).execute()
|
|
36
|
+
items = result.get('items', [])
|
|
37
|
+
links = [item['link'] for item in items if 'link' in item]
|
|
38
|
+
return links, error
|
|
39
|
+
except googleapiclient.errors.HttpError as e:
|
|
40
|
+
# In case of rate limit error, return the error message.
|
|
41
|
+
if e.status_code == 429:
|
|
42
|
+
return None, str(e.reason)
|
|
43
|
+
else:
|
|
44
|
+
raise e
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from google import genai
|
|
4
|
+
from google.genai.types import GenerateContentConfig
|
|
5
|
+
|
|
6
|
+
from . import google_custom_search
|
|
7
|
+
from ..wrappers.playwrightw import scenarios
|
|
8
|
+
from .. import urls
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GoogleCustomSearchError(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
class GoogleLLMNoContentError(Exception):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
Rate Limits and Quotas: https://ai.google.dev/gemini-api/docs/rate-limits
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GoogleLLM:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
llm_api_key: str,
|
|
27
|
+
search_api_key: str,
|
|
28
|
+
search_engine_id: str
|
|
29
|
+
) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Constructor for the GoogleLLM class.
|
|
32
|
+
:param llm_api_key: str, the API key for the Gemini API.
|
|
33
|
+
:param search_api_key: str, the API key for the Google Custom Search API.
|
|
34
|
+
:param search_engine_id: str, the search engine ID for the Google Custom Search API.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
self.client = genai.Client(api_key=llm_api_key)
|
|
38
|
+
self.search_api_key: str = search_api_key
|
|
39
|
+
self.search_engine_id: str = search_engine_id
|
|
40
|
+
|
|
41
|
+
def get_current_models(
|
|
42
|
+
self,
|
|
43
|
+
full_info: bool = False,
|
|
44
|
+
model_type: str = None,
|
|
45
|
+
verbose: bool = False
|
|
46
|
+
) -> list:
|
|
47
|
+
"""
|
|
48
|
+
Function to get the current models available in the Gemini API
|
|
49
|
+
|
|
50
|
+
:param full_info: bool, if True, returns the full information about the models, otherwise only the names for API usage.
|
|
51
|
+
:param model_type: str, the type of models to filter by. None, for all models.
|
|
52
|
+
Examples of known types: 'gemini', 'veo', 'imagen', 'deep-research', 'nano-banana'.
|
|
53
|
+
:param verbose: bool, if True, prints the models information to the console.
|
|
54
|
+
"""
|
|
55
|
+
result_list: list = []
|
|
56
|
+
for model in self.client.models.list():
|
|
57
|
+
if model_type and model_type not in model.name:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
if full_info:
|
|
61
|
+
result_list.append(model)
|
|
62
|
+
else:
|
|
63
|
+
result_list.append(model.name)
|
|
64
|
+
|
|
65
|
+
if verbose:
|
|
66
|
+
for model in result_list:
|
|
67
|
+
print(model)
|
|
68
|
+
|
|
69
|
+
return result_list
|
|
70
|
+
|
|
71
|
+
def get_answer_online(
|
|
72
|
+
self,
|
|
73
|
+
search_query_or_url: str,
|
|
74
|
+
text_fetch_method: Literal[
|
|
75
|
+
'playwright_text',
|
|
76
|
+
'js_text',
|
|
77
|
+
'playwright_html',
|
|
78
|
+
'js_html',
|
|
79
|
+
'playwright_copypaste'
|
|
80
|
+
],
|
|
81
|
+
llm_query: str,
|
|
82
|
+
llm_post_instructions: str,
|
|
83
|
+
number_of_top_links: int = 2,
|
|
84
|
+
number_of_characters_per_link: int = 15000,
|
|
85
|
+
temperature: float = 0,
|
|
86
|
+
# max_output_tokens: int = 4096,
|
|
87
|
+
# model_name: str = 'gemini-2.0-flash-thinking-exp-01-21'
|
|
88
|
+
model_name: str = 'gemini-2.5-pro'
|
|
89
|
+
) -> str:
|
|
90
|
+
"""
|
|
91
|
+
Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
|
|
92
|
+
|
|
93
|
+
:param search_query_or_url: string, is checked if it is a URL or a search query.
|
|
94
|
+
Search query: the search query to search on Google Custom Search.
|
|
95
|
+
URL: the URL to fetch content from without using Google Custom Search.
|
|
96
|
+
:param text_fetch_method: string, the method to fetch text from the URL.
|
|
97
|
+
playwright_text: uses native Playwright to fetch text from the URL.
|
|
98
|
+
js_text: uses Playwright and JavaScript evaluation to fetch text from the URL.
|
|
99
|
+
playwright_html: uses native Playwright to fetch HTML from the URL and then parse it to text using beautiful soup.
|
|
100
|
+
js_html: uses Playwright and JavaScript evaluation to fetch HTML from the URL and then parse it to text using beautiful soup.
|
|
101
|
+
playwright_copypaste: uses native Playwright to fetch text from the URL by copying and pasting the text from rendered page using clipboard.
|
|
102
|
+
:param llm_query: string, the question to ask the LLM about the text content that is returned from the search query or the URL.
|
|
103
|
+
:param llm_post_instructions: string, additional instructions to provide to the LLM on the answer it provided after the llm_query.
|
|
104
|
+
:param number_of_top_links: integer, the number of top links to fetch content from.
|
|
105
|
+
:param number_of_characters_per_link: integer, the number of characters to fetch from each link.
|
|
106
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
107
|
+
:param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
|
|
108
|
+
:param model_name: string, the name of the model to use for the LLM.
|
|
109
|
+
|
|
110
|
+
:return: string, the answer by LLM to the question.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
# Check if the search query is a URL.
|
|
114
|
+
if urls.is_valid_url(search_query_or_url):
|
|
115
|
+
# Fetch content from the URL
|
|
116
|
+
contents = scenarios.fetch_urls_content_in_threads(
|
|
117
|
+
urls=[search_query_or_url], number_of_characters_per_link=number_of_characters_per_link,
|
|
118
|
+
text_fetch_method=text_fetch_method)
|
|
119
|
+
# If not a URL, Search Google for links related to the query
|
|
120
|
+
else:
|
|
121
|
+
links, search_error = google_custom_search.search_google(
|
|
122
|
+
query=search_query_or_url, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
|
|
123
|
+
|
|
124
|
+
if search_error:
|
|
125
|
+
raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
|
|
126
|
+
|
|
127
|
+
# Get only the first X links to not overload the LLM.
|
|
128
|
+
contents = scenarios.fetch_urls_content_in_threads(
|
|
129
|
+
urls=links[:number_of_top_links], number_of_characters_per_link=number_of_characters_per_link,
|
|
130
|
+
text_fetch_method=text_fetch_method)
|
|
131
|
+
|
|
132
|
+
if not contents:
|
|
133
|
+
raise GoogleLLMNoContentError("No content was fetched from the provided URL(s).")
|
|
134
|
+
|
|
135
|
+
combined_content = ""
|
|
136
|
+
for content in contents:
|
|
137
|
+
combined_content += f'{content}\n\n\n\n================================================================'
|
|
138
|
+
|
|
139
|
+
final_question = (f'Answer this question: {llm_query}\n\n'
|
|
140
|
+
f'Follow these instructions: {llm_post_instructions}\n\n'
|
|
141
|
+
f'Based on these data contents:\n\n'
|
|
142
|
+
f'{combined_content}')
|
|
143
|
+
|
|
144
|
+
# Ask Gemini to process the combined content
|
|
145
|
+
# gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
|
|
146
|
+
gemini_response = self.ask_gemini(final_question, temperature, model_name)
|
|
147
|
+
return gemini_response
|
|
148
|
+
|
|
149
|
+
def ask_gemini(
|
|
150
|
+
self,
|
|
151
|
+
question: str,
|
|
152
|
+
temperature: float,
|
|
153
|
+
# max_output_tokens: int,
|
|
154
|
+
model_name: str = 'gemini-2.5-pro'
|
|
155
|
+
) -> str:
|
|
156
|
+
r"""
|
|
157
|
+
Function to ask the Gemini API a question and get the response.
|
|
158
|
+
:param question: str, the question to ask the Gemini API.
|
|
159
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
160
|
+
While 0 is deterministic, higher values can lead to more creative responses.
|
|
161
|
+
:param model_name: str, the name of the model to use for the LLM.
|
|
162
|
+
|
|
163
|
+
max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
|
|
164
|
+
UPDATE: Disabled this feature since it gave exceptions in some situations.
|
|
165
|
+
Example:
|
|
166
|
+
File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 464, in text
|
|
167
|
+
parts = self.parts
|
|
168
|
+
^^^^^^^^^^
|
|
169
|
+
File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 447, in parts
|
|
170
|
+
raise ValueError(msg)
|
|
171
|
+
ValueError: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
:return: str, the response from the Gemini API.
|
|
175
|
+
"""
|
|
176
|
+
# Model Configuration
|
|
177
|
+
model_config = {
|
|
178
|
+
"temperature": temperature,
|
|
179
|
+
"top_p": 0.99,
|
|
180
|
+
"top_k": 0,
|
|
181
|
+
# "max_output_tokens": max_output_tokens,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
response = self.client.models.generate_content(
|
|
185
|
+
model=model_name,
|
|
186
|
+
contents=question,
|
|
187
|
+
config=GenerateContentConfig(**model_config))
|
|
188
|
+
return response.text
|