atomicshop 2.15.11__py3-none-any.whl → 3.10.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. atomicshop/__init__.py +1 -1
  2. atomicshop/{addons/mains → a_mains}/FACT/update_extract.py +3 -2
  3. atomicshop/a_mains/dns_gateway_setting.py +11 -0
  4. atomicshop/a_mains/get_local_tcp_ports.py +85 -0
  5. atomicshop/a_mains/github_wrapper.py +11 -0
  6. atomicshop/a_mains/install_ca_certificate.py +172 -0
  7. atomicshop/a_mains/process_from_port.py +119 -0
  8. atomicshop/a_mains/set_default_dns_gateway.py +90 -0
  9. atomicshop/a_mains/update_config_toml.py +38 -0
  10. atomicshop/basics/ansi_escape_codes.py +3 -1
  11. atomicshop/basics/argparse_template.py +2 -0
  12. atomicshop/basics/booleans.py +27 -30
  13. atomicshop/basics/bytes_arrays.py +43 -0
  14. atomicshop/basics/classes.py +149 -1
  15. atomicshop/basics/enums.py +2 -2
  16. atomicshop/basics/exceptions.py +5 -1
  17. atomicshop/basics/list_of_classes.py +29 -0
  18. atomicshop/basics/multiprocesses.py +374 -50
  19. atomicshop/basics/strings.py +72 -3
  20. atomicshop/basics/threads.py +14 -0
  21. atomicshop/basics/tracebacks.py +13 -3
  22. atomicshop/certificates.py +153 -52
  23. atomicshop/config_init.py +11 -6
  24. atomicshop/console_user_response.py +7 -14
  25. atomicshop/consoles.py +9 -0
  26. atomicshop/datetimes.py +1 -1
  27. atomicshop/diff_check.py +3 -3
  28. atomicshop/dns.py +128 -3
  29. atomicshop/etws/_pywintrace_fix.py +17 -0
  30. atomicshop/etws/trace.py +40 -42
  31. atomicshop/etws/traces/trace_dns.py +56 -44
  32. atomicshop/etws/traces/trace_tcp.py +130 -0
  33. atomicshop/file_io/csvs.py +27 -5
  34. atomicshop/file_io/docxs.py +34 -17
  35. atomicshop/file_io/file_io.py +31 -17
  36. atomicshop/file_io/jsons.py +49 -0
  37. atomicshop/file_io/tomls.py +139 -0
  38. atomicshop/filesystem.py +616 -291
  39. atomicshop/get_process_list.py +3 -3
  40. atomicshop/http_parse.py +149 -93
  41. atomicshop/ip_addresses.py +6 -1
  42. atomicshop/mitm/centered_settings.py +132 -0
  43. atomicshop/mitm/config_static.py +207 -0
  44. atomicshop/mitm/config_toml_editor.py +55 -0
  45. atomicshop/mitm/connection_thread_worker.py +875 -357
  46. atomicshop/mitm/engines/__parent/parser___parent.py +4 -17
  47. atomicshop/mitm/engines/__parent/recorder___parent.py +108 -51
  48. atomicshop/mitm/engines/__parent/requester___parent.py +116 -0
  49. atomicshop/mitm/engines/__parent/responder___parent.py +75 -114
  50. atomicshop/mitm/engines/__reference_general/parser___reference_general.py +10 -7
  51. atomicshop/mitm/engines/__reference_general/recorder___reference_general.py +5 -5
  52. atomicshop/mitm/engines/__reference_general/requester___reference_general.py +47 -0
  53. atomicshop/mitm/engines/__reference_general/responder___reference_general.py +95 -13
  54. atomicshop/mitm/engines/create_module_template.py +58 -14
  55. atomicshop/mitm/import_config.py +359 -139
  56. atomicshop/mitm/initialize_engines.py +160 -80
  57. atomicshop/mitm/message.py +64 -23
  58. atomicshop/mitm/mitm_main.py +892 -0
  59. atomicshop/mitm/recs_files.py +183 -0
  60. atomicshop/mitm/shared_functions.py +4 -10
  61. atomicshop/mitm/ssh_tester.py +82 -0
  62. atomicshop/mitm/statistic_analyzer.py +136 -40
  63. atomicshop/mitm/statistic_analyzer_helper/moving_average_helper.py +265 -83
  64. atomicshop/monitor/checks/dns.py +1 -1
  65. atomicshop/networks.py +671 -0
  66. atomicshop/on_exit.py +39 -9
  67. atomicshop/package_mains_processor.py +84 -0
  68. atomicshop/permissions/permissions.py +22 -0
  69. atomicshop/permissions/ubuntu_permissions.py +239 -0
  70. atomicshop/permissions/win_permissions.py +33 -0
  71. atomicshop/print_api.py +24 -42
  72. atomicshop/process.py +24 -6
  73. atomicshop/process_poller/process_pool.py +0 -1
  74. atomicshop/process_poller/simple_process_pool.py +204 -5
  75. atomicshop/python_file_patcher.py +1 -1
  76. atomicshop/python_functions.py +27 -75
  77. atomicshop/speech_recognize.py +8 -0
  78. atomicshop/ssh_remote.py +158 -172
  79. atomicshop/system_resource_monitor.py +61 -47
  80. atomicshop/system_resources.py +8 -8
  81. atomicshop/tempfiles.py +1 -2
  82. atomicshop/urls.py +6 -0
  83. atomicshop/venvs.py +28 -0
  84. atomicshop/versioning.py +27 -0
  85. atomicshop/web.py +98 -27
  86. atomicshop/web_apis/google_custom_search.py +44 -0
  87. atomicshop/web_apis/google_llm.py +188 -0
  88. atomicshop/websocket_parse.py +450 -0
  89. atomicshop/wrappers/certauthw/certauth.py +1 -0
  90. atomicshop/wrappers/cryptographyw.py +29 -8
  91. atomicshop/wrappers/ctyping/etw_winapi/const.py +97 -47
  92. atomicshop/wrappers/ctyping/etw_winapi/etw_functions.py +178 -49
  93. atomicshop/wrappers/ctyping/file_details_winapi.py +67 -0
  94. atomicshop/wrappers/ctyping/msi_windows_installer/cabs.py +2 -1
  95. atomicshop/wrappers/ctyping/msi_windows_installer/extract_msi_main.py +2 -2
  96. atomicshop/wrappers/ctyping/setup_device.py +466 -0
  97. atomicshop/wrappers/ctyping/win_console.py +39 -0
  98. atomicshop/wrappers/dockerw/dockerw.py +113 -2
  99. atomicshop/wrappers/elasticsearchw/config_basic.py +0 -12
  100. atomicshop/wrappers/elasticsearchw/elastic_infra.py +75 -0
  101. atomicshop/wrappers/elasticsearchw/elasticsearchw.py +2 -20
  102. atomicshop/wrappers/factw/get_file_data.py +12 -5
  103. atomicshop/wrappers/factw/install/install_after_restart.py +89 -5
  104. atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py +20 -14
  105. atomicshop/wrappers/githubw.py +537 -54
  106. atomicshop/wrappers/loggingw/consts.py +1 -1
  107. atomicshop/wrappers/loggingw/filters.py +23 -0
  108. atomicshop/wrappers/loggingw/formatters.py +12 -0
  109. atomicshop/wrappers/loggingw/handlers.py +214 -107
  110. atomicshop/wrappers/loggingw/loggers.py +19 -0
  111. atomicshop/wrappers/loggingw/loggingw.py +860 -22
  112. atomicshop/wrappers/loggingw/reading.py +134 -112
  113. atomicshop/wrappers/mongodbw/mongo_infra.py +31 -0
  114. atomicshop/wrappers/mongodbw/mongodbw.py +1324 -36
  115. atomicshop/wrappers/netshw.py +271 -0
  116. atomicshop/wrappers/playwrightw/engine.py +34 -19
  117. atomicshop/wrappers/playwrightw/infra.py +5 -0
  118. atomicshop/wrappers/playwrightw/javascript.py +7 -3
  119. atomicshop/wrappers/playwrightw/keyboard.py +14 -0
  120. atomicshop/wrappers/playwrightw/scenarios.py +172 -5
  121. atomicshop/wrappers/playwrightw/waits.py +9 -7
  122. atomicshop/wrappers/powershell_networking.py +80 -0
  123. atomicshop/wrappers/psutilw/processes.py +37 -1
  124. atomicshop/wrappers/psutilw/psutil_networks.py +85 -0
  125. atomicshop/wrappers/pyopensslw.py +9 -2
  126. atomicshop/wrappers/pywin32w/cert_store.py +116 -0
  127. atomicshop/wrappers/pywin32w/win_event_log/fetch.py +174 -0
  128. atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_create.py +3 -105
  129. atomicshop/wrappers/pywin32w/win_event_log/subscribes/process_terminate.py +3 -57
  130. atomicshop/wrappers/pywin32w/wmis/msft_netipaddress.py +113 -0
  131. atomicshop/wrappers/pywin32w/wmis/win32_networkadapterconfiguration.py +259 -0
  132. atomicshop/wrappers/pywin32w/wmis/win32networkadapter.py +112 -0
  133. atomicshop/wrappers/pywin32w/wmis/wmi_helpers.py +236 -0
  134. atomicshop/wrappers/socketw/accepter.py +21 -7
  135. atomicshop/wrappers/socketw/certificator.py +216 -150
  136. atomicshop/wrappers/socketw/creator.py +190 -50
  137. atomicshop/wrappers/socketw/dns_server.py +491 -182
  138. atomicshop/wrappers/socketw/exception_wrapper.py +45 -52
  139. atomicshop/wrappers/socketw/process_getter.py +86 -0
  140. atomicshop/wrappers/socketw/receiver.py +144 -102
  141. atomicshop/wrappers/socketw/sender.py +65 -35
  142. atomicshop/wrappers/socketw/sni.py +334 -165
  143. atomicshop/wrappers/socketw/socket_base.py +134 -0
  144. atomicshop/wrappers/socketw/socket_client.py +137 -95
  145. atomicshop/wrappers/socketw/socket_server_tester.py +11 -7
  146. atomicshop/wrappers/socketw/socket_wrapper.py +717 -116
  147. atomicshop/wrappers/socketw/ssl_base.py +15 -14
  148. atomicshop/wrappers/socketw/statistics_csv.py +148 -17
  149. atomicshop/wrappers/sysmonw.py +1 -1
  150. atomicshop/wrappers/ubuntu_terminal.py +65 -26
  151. atomicshop/wrappers/win_auditw.py +189 -0
  152. atomicshop/wrappers/winregw/__init__.py +0 -0
  153. atomicshop/wrappers/winregw/winreg_installed_software.py +58 -0
  154. atomicshop/wrappers/winregw/winreg_network.py +232 -0
  155. {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/METADATA +31 -51
  156. atomicshop-3.10.5.dist-info/RECORD +306 -0
  157. {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/WHEEL +1 -1
  158. atomicshop/_basics_temp.py +0 -101
  159. atomicshop/a_installs/win/fibratus.py +0 -9
  160. atomicshop/a_installs/win/mongodb.py +0 -9
  161. atomicshop/a_installs/win/pycharm.py +0 -9
  162. atomicshop/addons/a_setup_scripts/install_psycopg2_ubuntu.sh +0 -3
  163. atomicshop/addons/a_setup_scripts/install_pywintrace_0.3.cmd +0 -2
  164. atomicshop/addons/mains/__pycache__/install_fibratus_windows.cpython-312.pyc +0 -0
  165. atomicshop/addons/mains/__pycache__/msi_unpacker.cpython-312.pyc +0 -0
  166. atomicshop/addons/mains/install_docker_rootless_ubuntu.py +0 -11
  167. atomicshop/addons/mains/install_docker_ubuntu_main_sudo.py +0 -11
  168. atomicshop/addons/mains/install_elastic_search_and_kibana_ubuntu.py +0 -10
  169. atomicshop/addons/mains/install_wsl_ubuntu_lts_admin.py +0 -9
  170. atomicshop/addons/package_setup/CreateWheel.cmd +0 -7
  171. atomicshop/addons/package_setup/Setup in Edit mode.cmd +0 -6
  172. atomicshop/addons/package_setup/Setup.cmd +0 -7
  173. atomicshop/archiver/_search_in_zip.py +0 -189
  174. atomicshop/archiver/archiver.py +0 -34
  175. atomicshop/archiver/search_in_archive.py +0 -250
  176. atomicshop/archiver/sevenz_app_w.py +0 -86
  177. atomicshop/archiver/sevenzs.py +0 -44
  178. atomicshop/archiver/zips.py +0 -293
  179. atomicshop/file_types.py +0 -24
  180. atomicshop/mitm/config_editor.py +0 -37
  181. atomicshop/mitm/engines/create_module_template_example.py +0 -13
  182. atomicshop/mitm/initialize_mitm_server.py +0 -268
  183. atomicshop/pbtkmultifile_argparse.py +0 -88
  184. atomicshop/permissions.py +0 -151
  185. atomicshop/script_as_string_processor.py +0 -38
  186. atomicshop/ssh_scripts/process_from_ipv4.py +0 -37
  187. atomicshop/ssh_scripts/process_from_port.py +0 -27
  188. atomicshop/wrappers/_process_wrapper_curl.py +0 -27
  189. atomicshop/wrappers/_process_wrapper_tar.py +0 -21
  190. atomicshop/wrappers/dockerw/install_docker.py +0 -209
  191. atomicshop/wrappers/elasticsearchw/infrastructure.py +0 -265
  192. atomicshop/wrappers/elasticsearchw/install_elastic.py +0 -232
  193. atomicshop/wrappers/ffmpegw.py +0 -125
  194. atomicshop/wrappers/fibratusw/install.py +0 -81
  195. atomicshop/wrappers/mongodbw/infrastructure.py +0 -53
  196. atomicshop/wrappers/mongodbw/install_mongodb.py +0 -190
  197. atomicshop/wrappers/msiw.py +0 -149
  198. atomicshop/wrappers/nodejsw/install_nodejs.py +0 -139
  199. atomicshop/wrappers/process_wrapper_pbtk.py +0 -16
  200. atomicshop/wrappers/psutilw/networks.py +0 -45
  201. atomicshop/wrappers/pycharmw.py +0 -81
  202. atomicshop/wrappers/socketw/base.py +0 -59
  203. atomicshop/wrappers/socketw/get_process.py +0 -107
  204. atomicshop/wrappers/wslw.py +0 -191
  205. atomicshop-2.15.11.dist-info/RECORD +0 -302
  206. /atomicshop/{addons/mains → a_mains}/FACT/factw_fact_extractor_docker_image_main_sudo.py +0 -0
  207. /atomicshop/{addons → a_mains/addons}/PlayWrightCodegen.cmd +0 -0
  208. /atomicshop/{addons → a_mains/addons}/ScriptExecution.cmd +0 -0
  209. /atomicshop/{addons → a_mains/addons}/inits/init_to_import_all_modules.py +0 -0
  210. /atomicshop/{addons → a_mains/addons}/process_list/ReadMe.txt +0 -0
  211. /atomicshop/{addons → a_mains/addons}/process_list/compile.cmd +0 -0
  212. /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.dll +0 -0
  213. /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.exp +0 -0
  214. /atomicshop/{addons → a_mains/addons}/process_list/compiled/Win10x64/process_list.lib +0 -0
  215. /atomicshop/{addons → a_mains/addons}/process_list/process_list.cpp +0 -0
  216. /atomicshop/{archiver → permissions}/__init__.py +0 -0
  217. /atomicshop/{wrappers/fibratusw → web_apis}/__init__.py +0 -0
  218. /atomicshop/wrappers/{nodejsw → pywin32w/wmis}/__init__.py +0 -0
  219. /atomicshop/wrappers/pywin32w/{wmi_win32process.py → wmis/win32process.py} +0 -0
  220. {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info/licenses}/LICENSE.txt +0 -0
  221. {atomicshop-2.15.11.dist-info → atomicshop-3.10.5.dist-info}/top_level.txt +0 -0
atomicshop/urls.py CHANGED
@@ -8,6 +8,11 @@ def url_parser(url):
8
8
  directories = parts.path.strip('/').split('/')
9
9
  queries = parts.query.strip('&').split('&')
10
10
 
11
+ if len(directories) > 1 and '.' in directories[-1]:
12
+ file = directories[-1]
13
+ else:
14
+ file = ''
15
+
11
16
  elements = {
12
17
  'scheme': parts.scheme,
13
18
  'netloc': parts.netloc,
@@ -17,6 +22,7 @@ def url_parser(url):
17
22
  'fragment': parts.fragment,
18
23
  'directories': directories,
19
24
  'queries': queries,
25
+ 'file': file
20
26
  }
21
27
 
22
28
  return elements
atomicshop/venvs.py ADDED
@@ -0,0 +1,28 @@
1
+ import sys
2
+ import os
3
+ from typing import Union
4
+
5
+
6
+ def is_running_venv() -> Union[str, None]:
7
+ """
8
+ Check if the script is running in a virtual environment.
9
+
10
+ :return: string of the virtual environment path if it is running in a virtual environment, None otherwise.
11
+ """
12
+ if hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
13
+ return sys.prefix
14
+ else:
15
+ return None
16
+
17
+
18
+ def add_venv_to_path():
19
+ """
20
+ Add the virtual environment to the PATH environment variable.
21
+ """
22
+
23
+ venv_environment = is_running_venv()
24
+ if venv_environment:
25
+ # We're in a virtual environment, so modify the PATH
26
+ venv_bin = os.path.join(venv_environment, 'bin')
27
+ # Prepend the virtual environment's bin directory to the existing PATH
28
+ os.environ['PATH'] = f"{venv_bin}:{os.environ['PATH']}"
@@ -0,0 +1,27 @@
1
+ from typing import Union
2
+
3
+
4
+ def is_target_newer(
5
+ base_version: Union[str, tuple],
6
+ target_version: Union[str, tuple]
7
+ ):
8
+ """
9
+ Check if the target version is newer than the base version.
10
+ Example: is_target_newer('1.0.0', '1.0.1') -> True
11
+ Example: is_target_newer('1.0.0', '1.0.0') -> False
12
+ Example: is_target_newer('1.0.1', '1.0.0') -> False
13
+ Example: is_target_newer('1.0.1', '1.0.2') -> True
14
+ Example: is_target_newer((1,0,1), (1,1,0)) -> True
15
+
16
+ :param base_version: The base version to compare against.
17
+ :param target_version: The target version to compare.
18
+ """
19
+
20
+ # Convert string to tuple if string was passed.
21
+ if isinstance(base_version, str):
22
+ base_version = tuple(map(int, base_version.split('.')))
23
+ if isinstance(target_version, str):
24
+ target_version = tuple(map(int, target_version.split('.')))
25
+
26
+ # Compare the versions.
27
+ return target_version > base_version
atomicshop/web.py CHANGED
@@ -1,22 +1,29 @@
1
1
  import os
2
2
  import urllib.request
3
+ import urllib.error
3
4
  import ssl
5
+ from typing import Any
6
+ import http.client
7
+
8
+ # noinspection PyPackageRequirements
4
9
  import certifi
10
+ from dkarchiver.arch_wrappers import zips
5
11
 
6
- from .print_api import print_api
7
- from .archiver import zips
8
12
  from .urls import url_parser
9
13
  from .file_io import file_io
10
14
  from .wrappers.playwrightw import scenarios
11
- from . import filesystem
15
+ from . import filesystem, print_api
12
16
 
13
17
 
14
18
  # https://www.useragents.me/
15
19
  # https://user-agents.net/
16
20
  USER_AGENTS = {
17
- 'Chrome_111.0.0_Windows_10-11_x64':
18
- 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
19
- 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'
21
+ 'Chrome 111.0.0 Windows_10/11 x64':
22
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
23
+ 'Chrome 132.0.0 Windows 10/11 x64':
24
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36',
25
+ 'Chrome 142.0.0 Windows 10/11 x64':
26
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36'
20
27
  }
21
28
 
22
29
 
@@ -28,10 +35,10 @@ def is_status_ok(status_code: int, **kwargs) -> bool:
28
35
  """
29
36
 
30
37
  if status_code != 200:
31
- print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
38
+ print_api.print_api(f'URL Error, status code: {str(status_code)}', error_type=True, **kwargs)
32
39
  return False
33
40
  else:
34
- print_api('URL Status: 200 OK', color="green", **kwargs)
41
+ print_api.print_api('URL Status: 200 OK', color="green", **kwargs)
35
42
  return True
36
43
 
37
44
 
@@ -70,7 +77,7 @@ def get_page_bytes(
70
77
  raise ValueError('ERROR: [user_agent] specified and [chrome_user_agent] usage is [True]. Choose one.')
71
78
 
72
79
  if chrome_user_agent:
73
- user_agent = USER_AGENTS['Chrome_111.0.0_Windows_10-11_x64']
80
+ user_agent = USER_AGENTS['Chrome 142.0.0 Windows 10/11 x64']
74
81
 
75
82
  if user_agent:
76
83
  # Create a 'Request' object with the URL and user agent.
@@ -90,10 +97,13 @@ def get_page_bytes(
90
97
 
91
98
 
92
99
  def get_page_content(
93
- url: str, get_method: str = 'urllib', path: str = None,
100
+ url: str,
101
+ get_method: str = 'urllib',
102
+ path: str = None,
94
103
  playwright_pdf_format: str = 'A4',
95
104
  playwright_html_txt_convert_to_bytes: bool = True,
96
- print_kwargs: dict = None) -> any:
105
+ print_kwargs: dict = None
106
+ ) -> Any:
97
107
  """
98
108
  Function returns the page content from the given URL.
99
109
 
@@ -150,25 +160,37 @@ def download(
150
160
  file_url: str,
151
161
  target_directory: str = None,
152
162
  file_name: str = None,
163
+ headers: dict = None,
164
+ overwrite: bool = False,
165
+ # use_certifi_ca_repository: bool = False,
153
166
  **kwargs
154
- ) -> str:
167
+ ) -> str | None:
155
168
  """
156
169
  The function receives url and target filesystem directory to download the file.
157
170
 
171
+ Note: Install 'pip-system-certs' package if you want to use system's CA store for SSL context
172
+ in an environment where 'certifi' package is installed.
173
+
158
174
  :param file_url: full URL to download the file.
159
175
  :param target_directory: The directory on the filesystem to save the file to.
160
176
  If not specified, temporary directory will be used.
161
177
  :param file_name: string, file name (example: file.zip) that you want the downloaded file to be saved as.
162
178
  If not specified, the default filename from 'file_url' will be used.
179
+ :param headers: dictionary, HTTP headers to use when downloading the file.
180
+ :param overwrite: boolean, if True, the file will be overwritten if it already exists.
181
+ If False, the file will not be overwritten and the function will return None if the file already exists.
182
+ Default is False.
183
+ :param use_certifi_ca_repository: boolean, if True, the certifi CA store will be used for SSL context
184
+ instead of the system's default CA store.
163
185
  :return: string, full file path of downloaded file. If download failed, 'None' will be returned.
164
186
  """
165
187
 
166
188
  def print_to_console(print_end=None):
167
189
  if file_size_bytes_int:
168
- print_api(
190
+ print_api.print_api(
169
191
  f'Downloaded bytes: {aggregated_bytes_int} / {file_size_bytes_int}', print_end=print_end, **kwargs)
170
192
  else:
171
- print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
193
+ print_api.print_api(f'Downloaded bytes: {aggregated_bytes_int}', print_end=print_end, **kwargs)
172
194
 
173
195
  # Size of the buffer to read each time from url.
174
196
  buffer_size: int = 4096
@@ -185,19 +207,59 @@ def download(
185
207
  # Build full path to file.
186
208
  file_path: str = f'{target_directory}{os.sep}{file_name}'
187
209
 
188
- print_api(f'Downloading: {file_url}', **kwargs)
189
- print_api(f'To: {file_path}', **kwargs)
210
+ if os.path.exists(file_path):
211
+ if overwrite:
212
+ print_api.print_api(f'File already exists: {file_path}. Overwriting...', **kwargs)
213
+ else:
214
+ print_api.print_api(f'File already exists: {file_path}. Skipping download.', **kwargs)
215
+ return file_path
216
+
217
+ print_api.print_api(f'Downloading: {file_url}', **kwargs)
218
+ print_api.print_api(f'To: {file_path}', **kwargs)
219
+
220
+ # Open the URL for data gathering with SSL context.
221
+ # if not use_certifi_ca_repository:
222
+ # # Create a default SSL context using the system's CA store.
223
+ # ssl_context = ssl.create_default_context()
224
+ # else:
225
+
226
+ # Create a default SSL context using the certifi CA store.
227
+ # This is useful for environments where the system's CA store is not available or not trusted.
228
+ # 'certifi.where()' returns the path to the certifi CA bundle.
229
+ ssl_context: ssl.SSLContext = ssl.create_default_context(cafile=certifi.where())
190
230
 
191
231
  # In order to use 'urllib.request', it is not enough to 'import urllib', you need to 'import urllib.request'.
192
- # Open the URL for data gathering with SSL context from certifi
193
- ssl_context = ssl.create_default_context(cafile=certifi.where())
194
- file_to_download = urllib.request.urlopen(file_url, context=ssl_context)
232
+ # Build a Request object with headers if provided.
233
+ req = urllib.request.Request(file_url, headers=headers or {})
234
+
235
+ def do_urlopen(ssl_context_internal: ssl.SSLContext) -> http.client.HTTPResponse | None:
236
+ try:
237
+ response: http.client.HTTPResponse = urllib.request.urlopen(req, context=ssl_context_internal)
238
+ return response
239
+ except urllib.error.URLError as e:
240
+ if getattr(e, 'reason', None) and isinstance(e.reason, ssl.SSLCertVerificationError):
241
+ if getattr(e.reason, 'reason', None) and e.reason.reason == 'CERTIFICATE_VERIFY_FAILED':
242
+ if getattr(e.reason, 'verify_message', None) and e.reason.verify_message == 'unable to get local issuer certificate':
243
+ return None
244
+
245
+ raise e
246
+
247
+ # Try to open the URL with the created SSL context with certifi.
248
+ file_to_download = do_urlopen(ssl_context_internal=ssl_context)
249
+ if not file_to_download:
250
+ # If failed, try to open the URL with the system's default SSL context.
251
+ ssl_context = ssl.create_default_context()
252
+ file_to_download = do_urlopen(ssl_context_internal=ssl_context)
253
+ if not file_to_download:
254
+ print_api.print_api(
255
+ 'ERROR: URL open failed with both certifi and system\'s default SSL context.', error_type=True, **kwargs)
256
+ return None
195
257
 
196
258
  # Check status of url.
197
259
  if not is_status_ok(status_code=file_to_download.status, **kwargs):
198
260
  return None
199
261
 
200
- file_size_bytes_int: int = None
262
+ file_size_bytes_int: int | None = None
201
263
  # Get file size. For some reason doesn't show for GitHub branch downloads.
202
264
  if file_to_download.headers['Content-Length']:
203
265
  file_size_bytes_int = int(file_to_download.headers['Content-Length'])
@@ -222,19 +284,27 @@ def download(
222
284
  else:
223
285
  print_to_console()
224
286
  break
287
+
225
288
  if aggregated_bytes_int == file_size_bytes_int:
226
- print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
289
+ print_api.print_api(f'Successfully Downloaded to: {file_path}', color="green", **kwargs)
290
+ elif file_size_bytes_int is None:
291
+ pass
227
292
  else:
228
293
  message = f'Download failed: {aggregated_bytes_int} / {file_size_bytes_int}. File: {file_path}'
229
- print_api(
294
+ print_api.print_api(
230
295
  message, error_type=True, color="red", **kwargs)
231
296
 
232
297
  return file_path
233
298
 
234
299
 
235
300
  def download_and_extract_file(
236
- file_url: str, target_directory: str, file_name: str = str(), archive_remove_first_directory: bool = False,
237
- **kwargs):
301
+ file_url: str,
302
+ target_directory: str,
303
+ file_name: str = str(),
304
+ archive_remove_first_directory: bool = False,
305
+ headers: dict = None,
306
+ **kwargs
307
+ ):
238
308
  """
239
309
  This function will download the branch file from GitHub, extract the file and remove the file, leaving
240
310
  only the extracted folder.
@@ -244,18 +314,19 @@ def download_and_extract_file(
244
314
  Default is empty. If it is empty, then the filename will be extracted from 'file_url'.
245
315
  :param target_directory: string, target directory where to save the file.
246
316
  :param archive_remove_first_directory: boolean, sets if archive extract function will extract the archive without
247
- first directory in the archive. Check reference in the 'extract_archive_with_zipfile' function.
317
+ first directory in the archive. Check reference in the 'dkarchiver.arch_wrappers.zips.extract_archive_with_zipfile' function.
318
+ :param headers: dictionary, HTTP headers to use when downloading the file.
248
319
  :return:
249
320
  """
250
321
 
251
322
  # Download the repo to current working directory and return full file path of downloaded file.
252
323
  file_path = download(
253
- file_url=file_url, target_directory=target_directory, file_name=file_name, **kwargs)
324
+ file_url=file_url, target_directory=target_directory, file_name=file_name, headers=headers, **kwargs)
254
325
 
255
326
  # Extract the archive and remove the first directory.
256
327
  zips.extract_archive_with_zipfile(
257
328
  archive_path=f'{file_path}', extract_directory=target_directory,
258
- remove_first_directory=archive_remove_first_directory, **kwargs)
329
+ remove_first_directory=archive_remove_first_directory)
259
330
 
260
331
  # Remove the archive file.
261
332
  filesystem.remove_file(file_path=f'{file_path}', **kwargs)
@@ -0,0 +1,44 @@
1
+ from typing import Union
2
+
3
+ from googleapiclient.discovery import build
4
+ import googleapiclient.errors
5
+
6
+
7
+ def search_google(
8
+ query: str,
9
+ api_key: str,
10
+ search_engine_id: str
11
+ ) -> tuple[
12
+ Union[list[str], None],
13
+ str]:
14
+ """
15
+ Function to search Google using Google Custom Search API for links related to a query.
16
+ :param query: string, the search query to search on Google Custom Search.
17
+ :param api_key: string, the API key for the Google Custom Search API.
18
+ :param search_engine_id: string, the search engine ID for the Google Custom Search API.
19
+
20
+ :return: tuple(list of strings - the links related to the query, string - the error message if any)
21
+ """
22
+
23
+ # noinspection PyTypeChecker
24
+ error: str = None
25
+
26
+ try:
27
+ service = build("customsearch", "v1", developerKey=api_key)
28
+ result = service.cse().list(
29
+ q=query,
30
+ cx=search_engine_id,
31
+ # gl="us", # Country code
32
+ # lr="lang_en", # Language restriction
33
+ # safe="off", # Safe search off
34
+ # dateRestrict="m1" # Restrict results to the last month
35
+ ).execute()
36
+ items = result.get('items', [])
37
+ links = [item['link'] for item in items if 'link' in item]
38
+ return links, error
39
+ except googleapiclient.errors.HttpError as e:
40
+ # In case of rate limit error, return the error message.
41
+ if e.status_code == 429:
42
+ return None, str(e.reason)
43
+ else:
44
+ raise e
@@ -0,0 +1,188 @@
1
+ from typing import Literal
2
+
3
+ from google import genai
4
+ from google.genai.types import GenerateContentConfig
5
+
6
+ from . import google_custom_search
7
+ from ..wrappers.playwrightw import scenarios
8
+ from .. import urls
9
+
10
+
11
+ class GoogleCustomSearchError(Exception):
12
+ pass
13
+
14
+ class GoogleLLMNoContentError(Exception):
15
+ pass
16
+
17
+
18
+ """
19
+ Rate Limits and Quotas: https://ai.google.dev/gemini-api/docs/rate-limits
20
+ """
21
+
22
+
23
+ class GoogleLLM:
24
+ def __init__(
25
+ self,
26
+ llm_api_key: str,
27
+ search_api_key: str,
28
+ search_engine_id: str
29
+ ) -> None:
30
+ """
31
+ Constructor for the GoogleLLM class.
32
+ :param llm_api_key: str, the API key for the Gemini API.
33
+ :param search_api_key: str, the API key for the Google Custom Search API.
34
+ :param search_engine_id: str, the search engine ID for the Google Custom Search API.
35
+ """
36
+
37
+ self.client = genai.Client(api_key=llm_api_key)
38
+ self.search_api_key: str = search_api_key
39
+ self.search_engine_id: str = search_engine_id
40
+
41
+ def get_current_models(
42
+ self,
43
+ full_info: bool = False,
44
+ model_type: str = None,
45
+ verbose: bool = False
46
+ ) -> list:
47
+ """
48
+ Function to get the current models available in the Gemini API
49
+
50
+ :param full_info: bool, if True, returns the full information about the models, otherwise only the names for API usage.
51
+ :param model_type: str, the type of models to filter by. None, for all models.
52
+ Examples of known types: 'gemini', 'veo', 'imagen', 'deep-research', 'nano-banana'.
53
+ :param verbose: bool, if True, prints the models information to the console.
54
+ """
55
+ result_list: list = []
56
+ for model in self.client.models.list():
57
+ if model_type and model_type not in model.name:
58
+ continue
59
+
60
+ if full_info:
61
+ result_list.append(model)
62
+ else:
63
+ result_list.append(model.name)
64
+
65
+ if verbose:
66
+ for model in result_list:
67
+ print(model)
68
+
69
+ return result_list
70
+
71
+ def get_answer_online(
72
+ self,
73
+ search_query_or_url: str,
74
+ text_fetch_method: Literal[
75
+ 'playwright_text',
76
+ 'js_text',
77
+ 'playwright_html',
78
+ 'js_html',
79
+ 'playwright_copypaste'
80
+ ],
81
+ llm_query: str,
82
+ llm_post_instructions: str,
83
+ number_of_top_links: int = 2,
84
+ number_of_characters_per_link: int = 15000,
85
+ temperature: float = 0,
86
+ # max_output_tokens: int = 4096,
87
+ # model_name: str = 'gemini-2.0-flash-thinking-exp-01-21'
88
+ model_name: str = 'gemini-2.5-pro'
89
+ ) -> str:
90
+ """
91
+ Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
92
+
93
+ :param search_query_or_url: string, is checked if it is a URL or a search query.
94
+ Search query: the search query to search on Google Custom Search.
95
+ URL: the URL to fetch content from without using Google Custom Search.
96
+ :param text_fetch_method: string, the method to fetch text from the URL.
97
+ playwright_text: uses native Playwright to fetch text from the URL.
98
+ js_text: uses Playwright and JavaScript evaluation to fetch text from the URL.
99
+ playwright_html: uses native Playwright to fetch HTML from the URL and then parse it to text using beautiful soup.
100
+ js_html: uses Playwright and JavaScript evaluation to fetch HTML from the URL and then parse it to text using beautiful soup.
101
+ playwright_copypaste: uses native Playwright to fetch text from the URL by copying and pasting the text from rendered page using clipboard.
102
+ :param llm_query: string, the question to ask the LLM about the text content that is returned from the search query or the URL.
103
+ :param llm_post_instructions: string, additional instructions to provide to the LLM on the answer it provided after the llm_query.
104
+ :param number_of_top_links: integer, the number of top links to fetch content from.
105
+ :param number_of_characters_per_link: integer, the number of characters to fetch from each link.
106
+ :param temperature: float, the temperature parameter for the LLM.
107
+ :param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
108
+ :param model_name: string, the name of the model to use for the LLM.
109
+
110
+ :return: string, the answer by LLM to the question.
111
+ """
112
+
113
+ # Check if the search query is a URL.
114
+ if urls.is_valid_url(search_query_or_url):
115
+ # Fetch content from the URL
116
+ contents = scenarios.fetch_urls_content_in_threads(
117
+ urls=[search_query_or_url], number_of_characters_per_link=number_of_characters_per_link,
118
+ text_fetch_method=text_fetch_method)
119
+ # If not a URL, Search Google for links related to the query
120
+ else:
121
+ links, search_error = google_custom_search.search_google(
122
+ query=search_query_or_url, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
123
+
124
+ if search_error:
125
+ raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
126
+
127
+ # Get only the first X links to not overload the LLM.
128
+ contents = scenarios.fetch_urls_content_in_threads(
129
+ urls=links[:number_of_top_links], number_of_characters_per_link=number_of_characters_per_link,
130
+ text_fetch_method=text_fetch_method)
131
+
132
+ if not contents:
133
+ raise GoogleLLMNoContentError("No content was fetched from the provided URL(s).")
134
+
135
+ combined_content = ""
136
+ for content in contents:
137
+ combined_content += f'{content}\n\n\n\n================================================================'
138
+
139
+ final_question = (f'Answer this question: {llm_query}\n\n'
140
+ f'Follow these instructions: {llm_post_instructions}\n\n'
141
+ f'Based on these data contents:\n\n'
142
+ f'{combined_content}')
143
+
144
+ # Ask Gemini to process the combined content
145
+ # gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
146
+ gemini_response = self.ask_gemini(final_question, temperature, model_name)
147
+ return gemini_response
148
+
149
+ def ask_gemini(
150
+ self,
151
+ question: str,
152
+ temperature: float,
153
+ # max_output_tokens: int,
154
+ model_name: str = 'gemini-2.5-pro'
155
+ ) -> str:
156
+ r"""
157
+ Function to ask the Gemini API a question and get the response.
158
+ :param question: str, the question to ask the Gemini API.
159
+ :param temperature: float, the temperature parameter for the LLM.
160
+ While 0 is deterministic, higher values can lead to more creative responses.
161
+ :param model_name: str, the name of the model to use for the LLM.
162
+
163
+ max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
164
+ UPDATE: Disabled this feature since it gave exceptions in some situations.
165
+ Example:
166
+ File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 464, in text
167
+ parts = self.parts
168
+ ^^^^^^^^^^
169
+ File ".\Lib\site-packages\google\generativeai\types\generation_types.py", line 447, in parts
170
+ raise ValueError(msg)
171
+ ValueError: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
172
+
173
+
174
+ :return: str, the response from the Gemini API.
175
+ """
176
+ # Model Configuration
177
+ model_config = {
178
+ "temperature": temperature,
179
+ "top_p": 0.99,
180
+ "top_k": 0,
181
+ # "max_output_tokens": max_output_tokens,
182
+ }
183
+
184
+ response = self.client.models.generate_content(
185
+ model=model_name,
186
+ contents=question,
187
+ config=GenerateContentConfig(**model_config))
188
+ return response.text