atomicshop 2.15.11__py3-none-any.whl → 3.10.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- atomicshop/__init__.py +1 -1
- atomicshop/{addons/mains → a_mains}/FACT/update_extract.py +3 -2
- atomicshop/a_mains/dns_gateway_setting.py +11 -0
- atomicshop/a_mains/get_local_tcp_ports.py +85 -0
- atomicshop/a_mains/github_wrapper.py +11 -0
- atomicshop/a_mains/install_ca_certificate.py +172 -0
- atomicshop/a_mains/process_from_port.py +119 -0
- atomicshop/a_mains/set_default_dns_gateway.py +90 -0
- atomicshop/a_mains/update_config_toml.py +38 -0
- atomicshop/basics/ansi_escape_codes.py +3 -1
- atomicshop/basics/argparse_template.py +2 -0
- atomicshop/basics/booleans.py +27 -30
- atomicshop/basics/bytes_arrays.py +43 -0
- atomicshop/basics/classes.py +149 -1
- atomicshop/basics/enums.py +2 -2
- atomicshop/basics/exceptions.py +5 -1
- atomicshop/basics/list_of_classes.py +29 -0
- atomicshop/basics/multiprocesses.py +374 -50
- atomicshop/basics/strings.py +72 -3
- atomicshop/basics/threads.py +14 -0
- atomicshop/basics/tracebacks.py +13 -3
- atomicshop/certificates.py +153 -52
- atomicshop/config_init.py +11 -6
- atomicshop/console_user_response.py +7 -14
- atomicshop/consoles.py +9 -0
- atomicshop/datetimes.py +1 -1
- atomicshop/diff_check.py +3 -3
- atomicshop/dns.py +128 -3
- atomicshop/etws/_pywintrace_fix.py +17 -0
- atomicshop/etws/trace.py +40 -42
- atomicshop/etws/traces/trace_dns.py +56 -44
- atomicshop/etws/traces/trace_tcp.py +130 -0
- atomicshop/file_io/csvs.py +27 -5
- atomicshop/file_io/docxs.py +34 -17
- atomicshop/file_io/file_io.py +31 -17
- atomicshop/file_io/jsons.py +49 -0
- atomicshop/file_io/tomls.py +139 -0
- atomicshop/filesystem.py +616 -291
- atomicshop/get_process_list.py +3 -3
- atomicshop/http_parse.py +149 -93
- atomicshop/ip_addresses.py +6 -1
- atomicshop/mitm/centered_settings.py +132 -0
- atomicshop/mitm/config_static.py +207 -0
- atomicshop/mitm/config_toml_editor.py +55 -0
- atomicshop/mitm/connection_thread_worker.py +875 -357
- atomicshop/mitm/engines/__parent/parser___parent.py +4 -17
- atomicshop/mitm/engines/__parent/recorder___parent.py +108 -51
- atomicshop/mitm/engines/__parent/requester___parent.py +116 -0
- atomicshop/mitm/engines/__parent/responder___parent.py +75 -114
- atomicshop/mitm/engines/__reference_general/parser___reference_general.py +10 -7
- atomicshop/mitm/engines/__reference_general/recorder___reference_general.py +5 -5
- atomicshop/mitm/engines/__reference_general/requester___reference_general.py +47 -0
- atomicshop/mitm/engines/__reference_general/responder___reference_general.py +95 -13
- atomicshop/mitm/engines/create_module_template.py +58 -14
- atomicshop/mitm/import_config.py +359 -139
- atomicshop/mitm/initialize_engines.py +160 -80
- atomicshop/mitm/message.py +64 -23
- atomicshop/mitm/mitm_main.py +892 -0
- atomicshop/mitm/recs_files.py +183 -0
- atomicshop/mitm/shared_functions.py +4 -10
- atomicshop/mitm/ssh_tester.py +82 -0
- atomicshop/mitm/statistic_analyzer.py +136 -40
- atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +265 -83
- atomicshop/monitor/checks/dns.py +1 -1
- atomicshop/networks.py +671 -0
- atomicshop/on_exit.py +39 -9
- atomicshop/package_mains_processor.py +84 -0
- atomicshop/permissions/permissions.py +22 -0
- atomicshop/permissions/ubuntu_permissions.py +239 -0
- atomicshop/permissions/win_permissions.py +33 -0
- atomicshop/print_api.py +24 -42
- atomicshop/process.py +24 -6
- atomicshop/process_poller/process_pool.py +0 -1
- atomicshop/process_poller/simple_process_pool.py +204 -5
- atomicshop/python_file_patcher.py +1 -1
- atomicshop/python_functions.py +27 -75
- atomicshop/speech_recognize.py +8 -0
- atomicshop/ssh_remote.py +158 -172
- atomicshop/system_resource_monitor.py +61 -47
- atomicshop/system_resources.py +8 -8
- atomicshop/tempfiles.py +1 -2
- atomicshop/urls.py +6 -0
- atomicshop/venvs.py +28 -0
- atomicshop/versioning.py +27 -0
- atomicshop/web.py +98 -27
- atomicshop/web_apis/google_custom_search.py +44 -0
- atomicshop/web_apis/google_llm.py +188 -0
- atomicshop/websocket_parse.py +450 -0
- atomicshop/wrappers/certauthw/certauth.py +1 -0
- atomicshop/wrappers/cryptographyw.py +29 -8
- atomicshop/wrappers/ctyping/etw_winapi/const.py +97 -47
- atomicshop/wrappers/ctyping/etw_winapi/etw_functions.py +178 -49
- atomicshop/wrappers/ctyping/file_details_winapi.py +67 -0
- atomicshop/wrappers/ctyping/msi_windows_installer/cabs.py +2 -1
- atomicshop/wrappers/ctyping/msi_windows_installer/extract_msi_main.py +2 -2
- atomicshop/wrappers/ctyping/setup_device.py +466 -0
- atomicshop/wrappers/ctyping/win_console.py +39 -0
- atomicshop/wrappers/dockerw/dockerw.py +113 -2
- atomicshop/wrappers/elasticsearchw/config_basic.py +0 -12
- atomicshop/wrappers/elasticsearchw/elastic_infra.py +75 -0
- atomicshop/wrappers/elasticsearchw/elasticsearchw.py +2 -20
- atomicshop/wrappers/factw/get_file_data.py +12 -5
- atomicshop/wrappers/factw/install/install_after_restart.py +89 -5
- atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py +20 -14
- atomicshop/wrappers/githubw.py +537 -54
- atomicshop/wrappers/loggingw/consts.py +1 -1
- atomicshop/wrappers/loggingw/filters.py +23 -0
- atomicshop/wrappers/loggingw/formatters.py +12 -0
- atomicshop/wrappers/loggingw/handlers.py +214 -107
- atomicshop/wrappers/loggingw/loggers.py +19 -0
- atomicshop/wrappers/loggingw/loggingw.py +860 -22
- atomicshop/wrappers/loggingw/reading.py +134 -112
- atomicshop/wrappers/mongodbw/mongo_infra.py +31 -0
- atomicshop/wrappers/mongodbw/mongodbw.py +1324 -36
- atomicshop/wrappers/netshw.py +271 -0
- atomicshop/wrappers/playwrightw/engine.py +34 -19
- atomicshop/wrappers/playwrightw/infra.py +5 -0
- atomicshop/wrappers/playwrightw/javascript.py +7 -3
- atomicshop/wrappers/playwrightw/keyboard.py +14 -0
- atomicshop/wrappers/playwrightw/scenarios.py +172 -5
- atomicshop/wrappers/playwrightw/waits.py +9 -7
- atomicshop/wrappers/powershell_networking.py +80 -0
- atomicshop/wrappers/psutilw/processes.py +37 -1
- atomicshop/wrappers/psutilw/psutil_networks.py +85 -0
- atomicshop/wrappers/pyopensslw.py +9 -2
- atomicshop/wrappers/pywin32w/cert_store.py +116 -0
- atomicshop/wrappers/pywin32w/win_event_log/fetch.py +174 -0
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_create.py +3 -105
- atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_terminate.py +3 -57
- atomicshop/wrappers/pywin32w/wmis/msft_netipaddress.py +113 -0
- atomicshop/wrappers/pywin32w/wmis/win32_networkadapterconfiguration.py +259 -0
- atomicshop/wrappers/pywin32w/wmis/win32networkadapter.py +112 -0
- atomicshop/wrappers/pywin32w/wmis/wmi_helpers.py +236 -0
- atomicshop/wrappers/socketw/accepter.py +21 -7
- atomicshop/wrappers/socketw/certificator.py +216 -150
- atomicshop/wrappers/socketw/creator.py +190 -50
- atomicshop/wrappers/socketw/dns_server.py +491 -182
- atomicshop/wrappers/socketw/exception_wrapper.py +45 -52
- atomicshop/wrappers/socketw/process_getter.py +86 -0
- atomicshop/wrappers/socketw/receiver.py +144 -102
- atomicshop/wrappers/socketw/sender.py +65 -35
- atomicshop/wrappers/socketw/sni.py +334 -165
- atomicshop/wrappers/socketw/socket_base.py +134 -0
- atomicshop/wrappers/socketw/socket_client.py +137 -95
- atomicshop/wrappers/socketw/socket_server_tester.py +11 -7
- atomicshop/wrappers/socketw/socket_wrapper.py +717 -116
- atomicshop/wrappers/socketw/ssl_base.py +15 -14
- atomicshop/wrappers/socketw/statistics_csv.py +148 -17
- atomicshop/wrappers/sysmonw.py +1 -1
- atomicshop/wrappers/ubuntu_terminal.py +65 -26
- atomicshop/wrappers/win_auditw.py +189 -0
- atomicshop/wrappers/winregw/__init__.py +0 -0
- atomicshop/wrappers/winregw/winreg_installed_software.py +58 -0
- atomicshop/wrappers/winregw/winreg_network.py +232 -0
- {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/METADATA +31 -51
- atomicshop-3.10.5.dist-info/RECORD +306 -0
- {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/WHEEL +1 -1
- atomicshop/_basics_temp.py +0 -101
- atomicshop/a_installs/win/fibratus.py +0 -9
- atomicshop/a_installs/win/mongodb.py +0 -9
- atomicshop/a_installs/win/pycharm.py +0 -9
- atomicshop/addons/a_setup_scripts/install_psycopg2_ubuntu.sh +0 -3
- atomicshop/addons/a_setup_scripts/install_pywintrace_0.3.cmd +0 -2
- atomicshop/addons/mains/__pycache__/install_fibratus_windows.cpython-312.pyc +0 -0
- atomicshop/addons/mains/__pycache__/msi_unpacker.cpython-312.pyc +0 -0
- atomicshop/addons/mains/install_docker_rootless_ubuntu.py +0 -11
- atomicshop/addons/mains/install_docker_ubuntu_main_sudo.py +0 -11
- atomicshop/addons/mains/install_elastic_search_and_kibana_ubuntu.py +0 -10
- atomicshop/addons/mains/install_wsl_ubuntu_lts_admin.py +0 -9
- atomicshop/addons/package_setup/CreateWheel.cmd +0 -7
- atomicshop/addons/package_setup/Setup in Edit mode.cmd +0 -6
- atomicshop/addons/package_setup/Setup.cmd +0 -7
- atomicshop/archiver/_search_in_zip.py +0 -189
- atomicshop/archiver/archiver.py +0 -34
- atomicshop/archiver/search_in_archive.py +0 -250
- atomicshop/archiver/sevenz_app_w.py +0 -86
- atomicshop/archiver/sevenzs.py +0 -44
- atomicshop/archiver/zips.py +0 -293
- atomicshop/file_types.py +0 -24
- atomicshop/mitm/config_editor.py +0 -37
- atomicshop/mitm/engines/create_module_template_example.py +0 -13
- atomicshop/mitm/initialize_mitm_server.py +0 -268
- atomicshop/pbtkmultifile_argparse.py +0 -88
- atomicshop/permissions.py +0 -151
- atomicshop/script_as_string_processor.py +0 -38
- atomicshop/ssh_scripts/process_from_ipv4.py +0 -37
- atomicshop/ssh_scripts/process_from_port.py +0 -27
- atomicshop/wrappers/_process_wrapper_curl.py +0 -27
- atomicshop/wrappers/_process_wrapper_tar.py +0 -21
- atomicshop/wrappers/dockerw/install_docker.py +0 -209
- atomicshop/wrappers/elasticsearchw/infrastructure.py +0 -265
- atomicshop/wrappers/elasticsearchw/install_elastic.py +0 -232
- atomicshop/wrappers/ffmpegw.py +0 -125
- atomicshop/wrappers/fibratusw/install.py +0 -81
- atomicshop/wrappers/mongodbw/infrastructure.py +0 -53
- atomicshop/wrappers/mongodbw/install_mongodb.py +0 -190
- atomicshop/wrappers/msiw.py +0 -149
- atomicshop/wrappers/nodejsw/install_nodejs.py +0 -139
- atomicshop/wrappers/process_wrapper_pbtk.py +0 -16
- atomicshop/wrappers/psutilw/networks.py +0 -45
- atomicshop/wrappers/pycharmw.py +0 -81
- atomicshop/wrappers/socketw/base.py +0 -59
- atomicshop/wrappers/socketw/get_process.py +0 -107
- atomicshop/wrappers/wslw.py +0 -191
- atomicshop-2.15.11.dist-info/RECORD +0 -302
- /atomicshop/{addons/mains → a_mains}/FACT/factw_fact_extractor_docker_image_main_sudo.py +0 -0
- /atomicshop/{addons → a_mains/addons}/PlayWrightCodegen.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/ScriptExecution.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/inits/init_to_import_all_modules.py +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/ReadMe.txt +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/compile.cmd +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.dll +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.exp +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.lib +0 -0
- /atomicshop/{addons → a_mains/addons}/process_list/process_list.cpp +0 -0
- /atomicshop/{archiver → permissions}/__init__.py +0 -0
- /atomicshop/{wrappers/fibratusw → web_apis}/__init__.py +0 -0
- /atomicshop/wrappers/{nodejsw → pywin32w/wmis}/__init__.py +0 -0
- /atomicshop/wrappers/pywin32w/{wmi_win32process.py → wmis/win32process.py} +0 -0
- {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info/licenses}/LICENSE.txt +0 -0
- {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/top_level.txt +0 -0
atomicshop/urls.py
CHANGED
|
@@ -8,6 +8,11 @@ def url_parser(url):
|
|
|
8
8
|
directories = parts.path.strip('/').split('/')
|
|
9
9
|
queries = parts.query.strip('&').split('&')
|
|
10
10
|
|
|
11
|
+
if len(directories) > 1 and '.' in directories[-1]:
|
|
12
|
+
file = directories[-1]
|
|
13
|
+
else:
|
|
14
|
+
file = ''
|
|
15
|
+
|
|
11
16
|
elements = {
|
|
12
17
|
'scheme': parts.scheme,
|
|
13
18
|
'netloc': parts.netloc,
|
|
@@ -17,6 +22,7 @@ def url_parser(url):
|
|
|
17
22
|
'fragment': parts.fragment,
|
|
18
23
|
'directories': directories,
|
|
19
24
|
'queries': queries,
|
|
25
|
+
'file': file
|
|
20
26
|
}
|
|
21
27
|
|
|
22
28
|
return elements
|
atomicshop/venvs.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import os
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def is_running_venv() -> Union[str, None]:
|
|
7
|
+
"""
|
|
8
|
+
Check if the script is running in a virtual environment.
|
|
9
|
+
|
|
10
|
+
:return: string of the virtual environment path if it is running in a virtual environment, None otherwise.
|
|
11
|
+
"""
|
|
12
|
+
if hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
|
|
13
|
+
return sys.prefix
|
|
14
|
+
else:
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def add_venv_to_path():
|
|
19
|
+
"""
|
|
20
|
+
Add the virtual environment to the PATH environment variable.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
venv_environment = is_running_venv()
|
|
24
|
+
if venv_environment:
|
|
25
|
+
# We're in a virtual environment, so modify the PATH
|
|
26
|
+
venv_bin = os.path.join(venv_environment, 'bin')
|
|
27
|
+
# Prepend the virtual environment's bin directory to the existing PATH
|
|
28
|
+
os.environ['PATH'] = f"{venv_bin}:{os.environ['PATH']}"
|
atomicshop/versioning.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_target_newer(
|
|
5
|
+
base_version: Union[str, tuple],
|
|
6
|
+
target_version: Union[str, tuple]
|
|
7
|
+
):
|
|
8
|
+
"""
|
|
9
|
+
Check if the target version is newer than the base version.
|
|
10
|
+
Example: is_target_newer('1.0.0', '1.0.1') -> True
|
|
11
|
+
Example: is_target_newer('1.0.0', '1.0.0') -> False
|
|
12
|
+
Example: is_target_newer('1.0.1', '1.0.0') -> False
|
|
13
|
+
Example: is_target_newer('1.0.1', '1.0.2') -> True
|
|
14
|
+
Example: is_target_newer((1,0,1), (1,1,0)) -> True
|
|
15
|
+
|
|
16
|
+
:param base_version: The base version to compare against.
|
|
17
|
+
:param target_version: The target version to compare.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# Convert string to tuple if string was passed.
|
|
21
|
+
if isinstance(base_version, str):
|
|
22
|
+
base_version = tuple(map(int, base_version.split('.')))
|
|
23
|
+
if isinstance(target_version, str):
|
|
24
|
+
target_version = tuple(map(int, target_version.split('.')))
|
|
25
|
+
|
|
26
|
+
# Compare the versions.
|
|
27
|
+
return target_version > base_version
|
atomicshop/web.py
CHANGED
|
@@ -1,22 +1,29 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import urllib.request
|
|
3
|
+
import urllib.error
|
|
3
4
|
import ssl
|
|
5
|
+
from typing import Any
|
|
6
|
+
import http.client
|
|
7
|
+
|
|
8
|
+
# noinspection PyPackageRequirements
|
|
4
9
|
import certifi
|
|
10
|
+
from dkarchiver.arch_wrappers import zips
|
|
5
11
|
|
|
6
|
-
from .print_api import print_api
|
|
7
|
-
from .archiver import zips
|
|
8
12
|
from .urls import url_parser
|
|
9
13
|
from .file_io import file_io
|
|
10
14
|
from .wrappers.playwrightw import scenarios
|
|
11
|
-
from . import filesystem
|
|
15
|
+
from . import filesystem, print_api
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
# https://www.useragents.me/
|
|
15
19
|
# https://user-agents.net/
|
|
16
20
|
USER_AGENTS = {
|
|
17
|
-
'
|
|
18
|
-
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
|
19
|
-
|
|
21
|
+
'Chrome 111.0.0 Windows_10/11 x64':
|
|
22
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
|
|
23
|
+
'Chrome 132.0.0 Windows 10/11 x64':
|
|
24
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
|
|
25
|
+
'Chrome 142.0.0 Windows 10/11 x64':
|
|
26
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'
|
|
20
27
|
}
|
|
21
28
|
|
|
22
29
|
|
|
@@ -28,10 +35,10 @@ def is_status_ok(status_code: int, **kwargs) -> bool:
|
|
|
28
35
|
"""
|
|
29
36
|
|
|
30
37
|
if status_code != 200:
|
|
31
|
-
print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
|
|
38
|
+
print_api.print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
|
|
32
39
|
return False
|
|
33
40
|
else:
|
|
34
|
-
print_api('URL Status: 200 OK', color="green", **kwargs)
|
|
41
|
+
print_api.print_api('URL Status: 200 OK', color="green", **kwargs)
|
|
35
42
|
return True
|
|
36
43
|
|
|
37
44
|
|
|
@@ -70,7 +77,7 @@ def get_page_bytes(
|
|
|
70
77
|
raise ValueError('ERROR: [user_agent] specified and [chrome_user_agent] usage is [True]. Choose one.')
|
|
71
78
|
|
|
72
79
|
if chrome_user_agent:
|
|
73
|
-
user_agent = USER_AGENTS['
|
|
80
|
+
user_agent = USER_AGENTS['Chrome 142.0.0 Windows 10/11 x64']
|
|
74
81
|
|
|
75
82
|
if user_agent:
|
|
76
83
|
# Create a 'Request' object with the URL and user agent.
|
|
@@ -90,10 +97,13 @@ def get_page_bytes(
|
|
|
90
97
|
|
|
91
98
|
|
|
92
99
|
def get_page_content(
|
|
93
|
-
url: str,
|
|
100
|
+
url: str,
|
|
101
|
+
get_method: str = 'urllib',
|
|
102
|
+
path: str = None,
|
|
94
103
|
playwright_pdf_format: str = 'A4',
|
|
95
104
|
playwright_html_txt_convert_to_bytes: bool = True,
|
|
96
|
-
print_kwargs: dict = None
|
|
105
|
+
print_kwargs: dict = None
|
|
106
|
+
) -> Any:
|
|
97
107
|
"""
|
|
98
108
|
Function returns the page content from the given URL.
|
|
99
109
|
|
|
@@ -150,25 +160,37 @@ def download(
|
|
|
150
160
|
file_url: str,
|
|
151
161
|
target_directory: str = None,
|
|
152
162
|
file_name: str = None,
|
|
163
|
+
headers: dict = None,
|
|
164
|
+
overwrite: bool = False,
|
|
165
|
+
# use_certifi_ca_repository: bool = False,
|
|
153
166
|
**kwargs
|
|
154
|
-
) -> str:
|
|
167
|
+
) -> str | None:
|
|
155
168
|
"""
|
|
156
169
|
The function receives url and target filesystem directory to download the file.
|
|
157
170
|
|
|
171
|
+
Note: Install 'pip-system-certs' package if you want to use system's CA store for SSL context
|
|
172
|
+
in an environment where 'certifi' package is installed.
|
|
173
|
+
|
|
158
174
|
:param file_url: full URL to download the file.
|
|
159
175
|
:param target_directory: The directory on the filesystem to save the file to.
|
|
160
176
|
If not specified, temporary directory will be used.
|
|
161
177
|
:param file_name: string, file name (example: file.zip) that you want the downloaded file to be saved as.
|
|
162
178
|
If not specified, the default filename from 'file_url' will be used.
|
|
179
|
+
:param headers: dictionary, HTTP headers to use when downloading the file.
|
|
180
|
+
:param overwrite: boolean, if True, the file will be overwritten if it already exists.
|
|
181
|
+
If False, the file will not be overwritten and the function will return None if the file already exists.
|
|
182
|
+
Default is False.
|
|
183
|
+
:param use_certifi_ca_repository: boolean, if True, the certifi CA store will be used for SSL context
|
|
184
|
+
instead of the system's default CA store.
|
|
163
185
|
:return: string, full file path of downloaded file. If download failed, 'None' will be returned.
|
|
164
186
|
"""
|
|
165
187
|
|
|
166
188
|
def print_to_console(print_end=None):
|
|
167
189
|
if file_size_bytes_int:
|
|
168
|
-
print_api(
|
|
190
|
+
print_api.print_api(
|
|
169
191
|
f'Downloaded bytes: {aggregated_bytes_int} / {file_size_bytes_int}', print_end=print_end, **kwargs)
|
|
170
192
|
else:
|
|
171
|
-
print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
|
|
193
|
+
print_api.print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
|
|
172
194
|
|
|
173
195
|
# Size of the buffer to read each time from url.
|
|
174
196
|
buffer_size: int = 4096
|
|
@@ -185,19 +207,59 @@ def download(
|
|
|
185
207
|
# Build full path to file.
|
|
186
208
|
file_path: str = f'{target_directory}{os.sep}{file_name}'
|
|
187
209
|
|
|
188
|
-
|
|
189
|
-
|
|
210
|
+
if os.path.exists(file_path):
|
|
211
|
+
if overwrite:
|
|
212
|
+
print_api.print_api(f'File already exists: {file_path}. Overwriting...', **kwargs)
|
|
213
|
+
else:
|
|
214
|
+
print_api.print_api(f'File already exists: {file_path}. Skipping download.', **kwargs)
|
|
215
|
+
return file_path
|
|
216
|
+
|
|
217
|
+
print_api.print_api(f'Downloading: {file_url}', **kwargs)
|
|
218
|
+
print_api.print_api(f'To: {file_path}', **kwargs)
|
|
219
|
+
|
|
220
|
+
# Open the URL for data gathering with SSL context.
|
|
221
|
+
# if not use_certifi_ca_repository:
|
|
222
|
+
# # Create a default SSL context using the system's CA store.
|
|
223
|
+
# ssl_context = ssl.create_default_context()
|
|
224
|
+
# else:
|
|
225
|
+
|
|
226
|
+
# Create a default SSL context using the certifi CA store.
|
|
227
|
+
# This is useful for environments where the system's CA store is not available or not trusted.
|
|
228
|
+
# 'certifi.where()' returns the path to the certifi CA bundle.
|
|
229
|
+
ssl_context: ssl.SSLContext = ssl.create_default_context(cafile=certifi.where())
|
|
190
230
|
|
|
191
231
|
# In order to use 'urllib.request', it is not enough to 'import urllib', you need to 'import urllib.request'.
|
|
192
|
-
#
|
|
193
|
-
|
|
194
|
-
|
|
232
|
+
# Build a Request object with headers if provided.
|
|
233
|
+
req = urllib.request.Request(file_url, headers=headers or {})
|
|
234
|
+
|
|
235
|
+
def do_urlopen(ssl_context_internal: ssl.SSLContext) -> http.client.HTTPResponse | None:
|
|
236
|
+
try:
|
|
237
|
+
response: http.client.HTTPResponse = urllib.request.urlopen(req, context=ssl_context_internal)
|
|
238
|
+
return response
|
|
239
|
+
except urllib.error.URLError as e:
|
|
240
|
+
if getattr(e, 'reason', None) and isinstance(e.reason, ssl.SSLCertVerificationError):
|
|
241
|
+
if getattr(e.reason, 'reason', None) and e.reason.reason == 'CERTIFICATE_VERIFY_FAILED':
|
|
242
|
+
if getattr(e.reason, 'verify_message', None) and e.reason.verify_message == 'unable to get local issuer certificate':
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
raise e
|
|
246
|
+
|
|
247
|
+
# Try to open the URL with the created SSL context with certifi.
|
|
248
|
+
file_to_download = do_urlopen(ssl_context_internal=ssl_context)
|
|
249
|
+
if not file_to_download:
|
|
250
|
+
# If failed, try to open the URL with the system's default SSL context.
|
|
251
|
+
ssl_context = ssl.create_default_context()
|
|
252
|
+
file_to_download = do_urlopen(ssl_context_internal=ssl_context)
|
|
253
|
+
if not file_to_download:
|
|
254
|
+
print_api.print_api(
|
|
255
|
+
'ERROR: URL open failed with both certifi and system\'s default SSL context.', error_type=True, **kwargs)
|
|
256
|
+
return None
|
|
195
257
|
|
|
196
258
|
# Check status of url.
|
|
197
259
|
if not is_status_ok(status_code=file_to_download.status, **kwargs):
|
|
198
260
|
return None
|
|
199
261
|
|
|
200
|
-
file_size_bytes_int: int = None
|
|
262
|
+
file_size_bytes_int: int | None = None
|
|
201
263
|
# Get file size. For some reason doesn't show for GitHub branch downloads.
|
|
202
264
|
if file_to_download.headers['Content-Length']:
|
|
203
265
|
file_size_bytes_int = int(file_to_download.headers['Content-Length'])
|
|
@@ -222,19 +284,27 @@ def download(
|
|
|
222
284
|
else:
|
|
223
285
|
print_to_console()
|
|
224
286
|
break
|
|
287
|
+
|
|
225
288
|
if aggregated_bytes_int == file_size_bytes_int:
|
|
226
|
-
print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
|
|
289
|
+
print_api.print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
|
|
290
|
+
elif file_size_bytes_int is None:
|
|
291
|
+
pass
|
|
227
292
|
else:
|
|
228
293
|
message = f'Download failed: {aggregated_bytes_int} / {file_size_bytes_int}. File: {file_path}'
|
|
229
|
-
print_api(
|
|
294
|
+
print_api.print_api(
|
|
230
295
|
message, error_type=True, color="red", **kwargs)
|
|
231
296
|
|
|
232
297
|
return file_path
|
|
233
298
|
|
|
234
299
|
|
|
235
300
|
def download_and_extract_file(
|
|
236
|
-
file_url: str,
|
|
237
|
-
|
|
301
|
+
file_url: str,
|
|
302
|
+
target_directory: str,
|
|
303
|
+
file_name: str = str(),
|
|
304
|
+
archive_remove_first_directory: bool = False,
|
|
305
|
+
headers: dict = None,
|
|
306
|
+
**kwargs
|
|
307
|
+
):
|
|
238
308
|
"""
|
|
239
309
|
This function will download the branch file from GitHub, extract the file and remove the file, leaving
|
|
240
310
|
only the extracted folder.
|
|
@@ -244,18 +314,19 @@ def download_and_extract_file(
|
|
|
244
314
|
Default is empty. If it is empty, then the filename will be extracted from 'file_url'.
|
|
245
315
|
:param target_directory: string, target directory where to save the file.
|
|
246
316
|
:param archive_remove_first_directory: boolean, sets if archive extract function will extract the archive without
|
|
247
|
-
first directory in the archive. Check reference in the 'extract_archive_with_zipfile' function.
|
|
317
|
+
first directory in the archive. Check reference in the 'dkarchiver.arch_wrappers.zips.extract_archive_with_zipfile' function.
|
|
318
|
+
:param headers: dictionary, HTTP headers to use when downloading the file.
|
|
248
319
|
:return:
|
|
249
320
|
"""
|
|
250
321
|
|
|
251
322
|
# Download the repo to current working directory and return full file path of downloaded file.
|
|
252
323
|
file_path = download(
|
|
253
|
-
file_url=file_url, target_directory=target_directory, file_name=file_name, **kwargs)
|
|
324
|
+
file_url=file_url, target_directory=target_directory, file_name=file_name, headers=headers, **kwargs)
|
|
254
325
|
|
|
255
326
|
# Extract the archive and remove the first directory.
|
|
256
327
|
zips.extract_archive_with_zipfile(
|
|
257
328
|
archive_path=f'{file_path}', extract_directory=target_directory,
|
|
258
|
-
remove_first_directory=archive_remove_first_directory
|
|
329
|
+
remove_first_directory=archive_remove_first_directory)
|
|
259
330
|
|
|
260
331
|
# Remove the archive file.
|
|
261
332
|
filesystem.remove_file(file_path=f'{file_path}', **kwargs)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
import googleapiclient.errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def search_google(
|
|
8
|
+
query: str,
|
|
9
|
+
api_key: str,
|
|
10
|
+
search_engine_id: str
|
|
11
|
+
) -> tuple[
|
|
12
|
+
Union[list[str], None],
|
|
13
|
+
str]:
|
|
14
|
+
"""
|
|
15
|
+
Function to search Google using Google Custom Search API for links related to a query.
|
|
16
|
+
:param query: string, the search query to search on Google Custom Search.
|
|
17
|
+
:param api_key: string, the API key for the Google Custom Search API.
|
|
18
|
+
:param search_engine_id: string, the search engine ID for the Google Custom Search API.
|
|
19
|
+
|
|
20
|
+
:return: tuple(list of strings - the links related to the query, string - the error message if any)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# noinspection PyTypeChecker
|
|
24
|
+
error: str = None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
service = build("customsearch", "v1", developerKey=api_key)
|
|
28
|
+
result = service.cse().list(
|
|
29
|
+
q=query,
|
|
30
|
+
cx=search_engine_id,
|
|
31
|
+
# gl="us", # Country code
|
|
32
|
+
# lr="lang_en", # Language restriction
|
|
33
|
+
# safe="off", # Safe search off
|
|
34
|
+
# dateRestrict="m1" # Restrict results to the last month
|
|
35
|
+
).execute()
|
|
36
|
+
items = result.get('items', [])
|
|
37
|
+
links = [item['link'] for item in items if 'link' in item]
|
|
38
|
+
return links, error
|
|
39
|
+
except googleapiclient.errors.HttpError as e:
|
|
40
|
+
# In case of rate limit error, return the error message.
|
|
41
|
+
if e.status_code == 429:
|
|
42
|
+
return None, str(e.reason)
|
|
43
|
+
else:
|
|
44
|
+
raise e
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from google import genai
|
|
4
|
+
from google.genai.types import GenerateContentConfig
|
|
5
|
+
|
|
6
|
+
from . import google_custom_search
|
|
7
|
+
from ..wrappers.playwrightw import scenarios
|
|
8
|
+
from .. import urls
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GoogleCustomSearchError(Exception):
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
class GoogleLLMNoContentError(Exception):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
"""
|
|
19
|
+
Rate Limits and Quotas: https://ai.google.dev/gemini-api/docs/rate-limits
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GoogleLLM:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
llm_api_key: str,
|
|
27
|
+
search_api_key: str,
|
|
28
|
+
search_engine_id: str
|
|
29
|
+
) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Constructor for the GoogleLLM class.
|
|
32
|
+
:param llm_api_key: str, the API key for the Gemini API.
|
|
33
|
+
:param search_api_key: str, the API key for the Google Custom Search API.
|
|
34
|
+
:param search_engine_id: str, the search engine ID for the Google Custom Search API.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
self.client = genai.Client(api_key=llm_api_key)
|
|
38
|
+
self.search_api_key: str = search_api_key
|
|
39
|
+
self.search_engine_id: str = search_engine_id
|
|
40
|
+
|
|
41
|
+
def get_current_models(
|
|
42
|
+
self,
|
|
43
|
+
full_info: bool = False,
|
|
44
|
+
model_type: str = None,
|
|
45
|
+
verbose: bool = False
|
|
46
|
+
) -> list:
|
|
47
|
+
"""
|
|
48
|
+
Function to get the current models available in the Gemini API
|
|
49
|
+
|
|
50
|
+
:param full_info: bool, if True, returns the full information about the models, otherwise only the names for API usage.
|
|
51
|
+
:param model_type: str, the type of models to filter by. None, for all models.
|
|
52
|
+
Examples of known types: 'gemini', 'veo', 'imagen', 'deep-research', 'nano-banana'.
|
|
53
|
+
:param verbose: bool, if True, prints the models information to the console.
|
|
54
|
+
"""
|
|
55
|
+
result_list: list = []
|
|
56
|
+
for model in self.client.models.list():
|
|
57
|
+
if model_type and model_type not in model.name:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
if full_info:
|
|
61
|
+
result_list.append(model)
|
|
62
|
+
else:
|
|
63
|
+
result_list.append(model.name)
|
|
64
|
+
|
|
65
|
+
if verbose:
|
|
66
|
+
for model in result_list:
|
|
67
|
+
print(model)
|
|
68
|
+
|
|
69
|
+
return result_list
|
|
70
|
+
|
|
71
|
+
def get_answer_online(
|
|
72
|
+
self,
|
|
73
|
+
search_query_or_url: str,
|
|
74
|
+
text_fetch_method: Literal[
|
|
75
|
+
'playwright_text',
|
|
76
|
+
'js_text',
|
|
77
|
+
'playwright_html',
|
|
78
|
+
'js_html',
|
|
79
|
+
'playwright_copypaste'
|
|
80
|
+
],
|
|
81
|
+
llm_query: str,
|
|
82
|
+
llm_post_instructions: str,
|
|
83
|
+
number_of_top_links: int = 2,
|
|
84
|
+
number_of_characters_per_link: int = 15000,
|
|
85
|
+
temperature: float = 0,
|
|
86
|
+
# max_output_tokens: int = 4096,
|
|
87
|
+
# model_name: str = 'gemini-2.0-flash-thinking-exp-01-21'
|
|
88
|
+
model_name: str = 'gemini-2.5-pro'
|
|
89
|
+
) -> str:
|
|
90
|
+
"""
|
|
91
|
+
Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
|
|
92
|
+
|
|
93
|
+
:param search_query_or_url: string, is checked if it is a URL or a search query.
|
|
94
|
+
Search query: the search query to search on Google Custom Search.
|
|
95
|
+
URL: the URL to fetch content from without using Google Custom Search.
|
|
96
|
+
:param text_fetch_method: string, the method to fetch text from the URL.
|
|
97
|
+
playwright_text: uses native Playwright to fetch text from the URL.
|
|
98
|
+
js_text: uses Playwright and JavaScript evaluation to fetch text from the URL.
|
|
99
|
+
playwright_html: uses native Playwright to fetch HTML from the URL and then parse it to text using beautiful soup.
|
|
100
|
+
js_html: uses Playwright and JavaScript evaluation to fetch HTML from the URL and then parse it to text using beautiful soup.
|
|
101
|
+
playwright_copypaste: uses native Playwright to fetch text from the URL by copying and pasting the text from rendered page using clipboard.
|
|
102
|
+
:param llm_query: string, the question to ask the LLM about the text content that is returned from the search query or the URL.
|
|
103
|
+
:param llm_post_instructions: string, additional instructions to provide to the LLM on the answer it provided after the llm_query.
|
|
104
|
+
:param number_of_top_links: integer, the number of top links to fetch content from.
|
|
105
|
+
:param number_of_characters_per_link: integer, the number of characters to fetch from each link.
|
|
106
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
107
|
+
:param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
|
|
108
|
+
:param model_name: string, the name of the model to use for the LLM.
|
|
109
|
+
|
|
110
|
+
:return: string, the answer by LLM to the question.
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
# Check if the search query is a URL.
|
|
114
|
+
if urls.is_valid_url(search_query_or_url):
|
|
115
|
+
# Fetch content from the URL
|
|
116
|
+
contents = scenarios.fetch_urls_content_in_threads(
|
|
117
|
+
urls=[search_query_or_url], number_of_characters_per_link=number_of_characters_per_link,
|
|
118
|
+
text_fetch_method=text_fetch_method)
|
|
119
|
+
# If not a URL, Search Google for links related to the query
|
|
120
|
+
else:
|
|
121
|
+
links, search_error = google_custom_search.search_google(
|
|
122
|
+
query=search_query_or_url, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
|
|
123
|
+
|
|
124
|
+
if search_error:
|
|
125
|
+
raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
|
|
126
|
+
|
|
127
|
+
# Get only the first X links to not overload the LLM.
|
|
128
|
+
contents = scenarios.fetch_urls_content_in_threads(
|
|
129
|
+
urls=links[:number_of_top_links], number_of_characters_per_link=number_of_characters_per_link,
|
|
130
|
+
text_fetch_method=text_fetch_method)
|
|
131
|
+
|
|
132
|
+
if not contents:
|
|
133
|
+
raise GoogleLLMNoContentError("No content was fetched from the provided URL(s).")
|
|
134
|
+
|
|
135
|
+
combined_content = ""
|
|
136
|
+
for content in contents:
|
|
137
|
+
combined_content += f'{content}\n\n\n\n================================================================'
|
|
138
|
+
|
|
139
|
+
final_question = (f'Answer this question: {llm_query}\n\n'
|
|
140
|
+
f'Follow these instructions: {llm_post_instructions}\n\n'
|
|
141
|
+
f'Based on these data contents:\n\n'
|
|
142
|
+
f'{combined_content}')
|
|
143
|
+
|
|
144
|
+
# Ask Gemini to process the combined content
|
|
145
|
+
# gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
|
|
146
|
+
gemini_response = self.ask_gemini(final_question, temperature, model_name)
|
|
147
|
+
return gemini_response
|
|
148
|
+
|
|
149
|
+
def ask_gemini(
|
|
150
|
+
self,
|
|
151
|
+
question: str,
|
|
152
|
+
temperature: float,
|
|
153
|
+
# max_output_tokens: int,
|
|
154
|
+
model_name: str = 'gemini-2.5-pro'
|
|
155
|
+
) -> str:
|
|
156
|
+
r"""
|
|
157
|
+
Function to ask the Gemini API a question and get the response.
|
|
158
|
+
:param question: str, the question to ask the Gemini API.
|
|
159
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
160
|
+
While 0 is deterministic, higher values can lead to more creative responses.
|
|
161
|
+
:param model_name: str, the name of the model to use for the LLM.
|
|
162
|
+
|
|
163
|
+
max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
|
|
164
|
+
UPDATE: Disabled this feature since it gave exceptions in some situations.
|
|
165
|
+
Example:
|
|
166
|
+
File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 464, in text
|
|
167
|
+
parts = self.parts
|
|
168
|
+
^^^^^^^^^^
|
|
169
|
+
File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 447, in parts
|
|
170
|
+
raise ValueError(msg)
|
|
171
|
+
ValueError: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
:return: str, the response from the Gemini API.
|
|
175
|
+
"""
|
|
176
|
+
# Model Configuration
|
|
177
|
+
model_config = {
|
|
178
|
+
"temperature": temperature,
|
|
179
|
+
"top_p": 0.99,
|
|
180
|
+
"top_k": 0,
|
|
181
|
+
# "max_output_tokens": max_output_tokens,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
response = self.client.models.generate_content(
|
|
185
|
+
model=model_name,
|
|
186
|
+
contents=question,
|
|
187
|
+
config=GenerateContentConfig(**model_config))
|
|
188
|
+
return response.text
|