atomicshop 2.18.3__py3-none-any.whl → 2.18.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/mitm/mitm_main.py +0 -2
- atomicshop/web_apis/google_custom_search.py +37 -0
- atomicshop/web_apis/google_llm.py +87 -11
- atomicshop/wrappers/factw/install/install_after_restart.py +1 -1
- atomicshop/wrappers/mongodbw/install_mongodb_win.py +132 -79
- atomicshop/wrappers/playwrightw/scenarios.py +70 -0
- {atomicshop-2.18.3.dist-info → atomicshop-2.18.5.dist-info}/METADATA +1 -1
- {atomicshop-2.18.3.dist-info → atomicshop-2.18.5.dist-info}/RECORD +12 -12
- {atomicshop-2.18.3.dist-info → atomicshop-2.18.5.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.18.3.dist-info → atomicshop-2.18.5.dist-info}/WHEEL +0 -0
- {atomicshop-2.18.3.dist-info → atomicshop-2.18.5.dist-info}/top_level.txt +0 -0
atomicshop/__init__.py
CHANGED
atomicshop/mitm/mitm_main.py
CHANGED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
from googleapiclient.discovery import build
|
|
4
|
+
import googleapiclient.errors
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def search_google(
|
|
8
|
+
query: str,
|
|
9
|
+
api_key: str,
|
|
10
|
+
search_engine_id: str
|
|
11
|
+
) -> tuple[
|
|
12
|
+
Union[list[str], None],
|
|
13
|
+
str]:
|
|
14
|
+
"""
|
|
15
|
+
Function to search Google using Google Custom Search API for links related to a query.
|
|
16
|
+
:param query: string, the search query to search on Google Custom Search.
|
|
17
|
+
:param api_key: string, the API key for the Google Custom Search API.
|
|
18
|
+
:param search_engine_id: string, the search engine ID for the Google Custom Search API.
|
|
19
|
+
|
|
20
|
+
:return: tuple(list of strings - the links related to the query, string - the error message if any)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# noinspection PyTypeChecker
|
|
24
|
+
error: str = None
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
service = build("customsearch", "v1", developerKey=api_key)
|
|
28
|
+
result = service.cse().list(q=query, cx=search_engine_id).execute()
|
|
29
|
+
items = result.get('items', [])
|
|
30
|
+
links = [item['link'] for item in items if 'link' in item]
|
|
31
|
+
return links, error
|
|
32
|
+
except googleapiclient.errors.HttpError as e:
|
|
33
|
+
# In case of rate limit error, return the error message.
|
|
34
|
+
if e.status_code == 429:
|
|
35
|
+
return None, str(e.reason)
|
|
36
|
+
else:
|
|
37
|
+
raise e
|
|
@@ -2,13 +2,31 @@ import os
|
|
|
2
2
|
|
|
3
3
|
import google.generativeai as genai
|
|
4
4
|
|
|
5
|
+
from . import google_custom_search
|
|
6
|
+
from ..wrappers.playwrightw import scenarios
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GoogleCustomSearchError(Exception):
|
|
10
|
+
pass
|
|
11
|
+
|
|
5
12
|
|
|
6
13
|
class GoogleLLM:
|
|
7
14
|
def __init__(
|
|
8
15
|
self,
|
|
9
|
-
llm_api_key: str
|
|
16
|
+
llm_api_key: str,
|
|
17
|
+
search_api_key: str,
|
|
18
|
+
search_engine_id: str
|
|
10
19
|
) -> None:
|
|
20
|
+
"""
|
|
21
|
+
Constructor for the GoogleLLM class.
|
|
22
|
+
:param llm_api_key: str, the API key for the Gemini API.
|
|
23
|
+
:param search_api_key: str, the API key for the Google Custom Search API.
|
|
24
|
+
:param search_engine_id: str, the search engine ID for the Google Custom Search API.
|
|
25
|
+
"""
|
|
26
|
+
|
|
11
27
|
self.genai = genai
|
|
28
|
+
self.search_api_key: str = search_api_key
|
|
29
|
+
self.search_engine_id: str = search_engine_id
|
|
12
30
|
|
|
13
31
|
os.environ["API_KEY"] = llm_api_key
|
|
14
32
|
genai.configure(api_key=os.environ["API_KEY"])
|
|
@@ -28,15 +46,73 @@ class GoogleLLM:
|
|
|
28
46
|
number_of_top_links: int = 2,
|
|
29
47
|
number_of_characters_per_link: int = 15000,
|
|
30
48
|
temperature: float = 0,
|
|
31
|
-
max_output_tokens: int = 4096
|
|
32
|
-
|
|
49
|
+
max_output_tokens: int = 4096,
|
|
50
|
+
model_name: str = 'gemini-pro'
|
|
51
|
+
) -> str:
|
|
33
52
|
"""
|
|
34
53
|
Function to get the answer to a question by searching Google Custom Console API and processing the content using Gemini API.
|
|
35
|
-
|
|
36
|
-
:param
|
|
37
|
-
:param
|
|
38
|
-
:param
|
|
39
|
-
:param
|
|
40
|
-
:param
|
|
41
|
-
:
|
|
42
|
-
|
|
54
|
+
|
|
55
|
+
:param search_query: string, the search query to search on Google Custom Search.
|
|
56
|
+
:param additional_llm_instructions: string, additional instructions to provide to the LLM.
|
|
57
|
+
:param number_of_top_links: integer, the number of top links to fetch content from.
|
|
58
|
+
:param number_of_characters_per_link: integer, the number of characters to fetch from each link.
|
|
59
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
60
|
+
:param max_output_tokens: integer, the maximum number of tokens to generate in the LLM response.
|
|
61
|
+
:param model_name: string, the name of the model to use for the LLM.
|
|
62
|
+
|
|
63
|
+
:return: string, the answer by LLM to the question.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Search Google for links related to the query
|
|
67
|
+
links, search_error = google_custom_search.search_google(
|
|
68
|
+
query=search_query, api_key=self.search_api_key, search_engine_id=self.search_engine_id)
|
|
69
|
+
|
|
70
|
+
if search_error:
|
|
71
|
+
raise GoogleCustomSearchError(f"Error occurred when searching Google: {search_error}")
|
|
72
|
+
|
|
73
|
+
# Get only the first X links to not overload the LLM.
|
|
74
|
+
contents = scenarios.fetch_urls_content_in_threads(links[:number_of_top_links], number_of_characters_per_link)
|
|
75
|
+
|
|
76
|
+
combined_content = ""
|
|
77
|
+
for content in contents:
|
|
78
|
+
combined_content += f'{content}\n\n\n\n================================================================'
|
|
79
|
+
|
|
80
|
+
final_question = (f'Answer this question: {search_query}\n\n'
|
|
81
|
+
f'Follow these instructions: {additional_llm_instructions}\n\n'
|
|
82
|
+
f'Based on these data contents:\n\n'
|
|
83
|
+
f'{combined_content}')
|
|
84
|
+
|
|
85
|
+
# Ask Gemini to process the combined content
|
|
86
|
+
gemini_response = self.ask_gemini(final_question, temperature, max_output_tokens, model_name)
|
|
87
|
+
return gemini_response
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def ask_gemini(
|
|
91
|
+
question: str,
|
|
92
|
+
temperature: float,
|
|
93
|
+
max_output_tokens: int,
|
|
94
|
+
model_name: str = 'gemini-pro'
|
|
95
|
+
) -> str:
|
|
96
|
+
"""
|
|
97
|
+
Function to ask the Gemini API a question and get the response.
|
|
98
|
+
:param question: str, the question to ask the Gemini API.
|
|
99
|
+
:param temperature: float, the temperature parameter for the LLM.
|
|
100
|
+
While 0 is deterministic, higher values can lead to more creative responses.
|
|
101
|
+
:param max_output_tokens: int, the maximum number of tokens to generate in the LLM response.
|
|
102
|
+
:param model_name: str, the name of the model to use for the LLM.
|
|
103
|
+
|
|
104
|
+
:return: str, the response from the Gemini API.
|
|
105
|
+
"""
|
|
106
|
+
# Model Configuration
|
|
107
|
+
model_config = {
|
|
108
|
+
"temperature": temperature,
|
|
109
|
+
"top_p": 0.99,
|
|
110
|
+
"top_k": 0,
|
|
111
|
+
"max_output_tokens": max_output_tokens,
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
# model = genai.GenerativeModel('gemini-1.5-pro-latest',
|
|
115
|
+
# noinspection PyTypeChecker
|
|
116
|
+
model = genai.GenerativeModel(model_name, generation_config=model_config)
|
|
117
|
+
response = model.generate_content(question)
|
|
118
|
+
return response.text
|
|
@@ -103,7 +103,7 @@ def analyze_log_file(log_file: str):
|
|
|
103
103
|
|
|
104
104
|
for plugin in PLUGIN_LIST:
|
|
105
105
|
if f'{FINISHED_INSTALLING_STRINGS[0]} {plugin} {FINISHED_INSTALLING_STRINGS[1]}' not in log_content:
|
|
106
|
-
message = (f'Error: [{plugin}]
|
|
106
|
+
message = (f'Error: [{plugin}] plugin missing in the log. Installation failed.\n'
|
|
107
107
|
f'Check the log file: {log_file}\n'
|
|
108
108
|
f'Exiting...')
|
|
109
109
|
print_api.print_api(message, color='red')
|
|
@@ -33,7 +33,7 @@ class MongoDBInstallationError(Exception):
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def get_latest_mongodb_download_url(
|
|
36
|
-
|
|
36
|
+
rc_version: bool = True,
|
|
37
37
|
major_specific: int = None
|
|
38
38
|
):
|
|
39
39
|
response = requests.get(MONGODB_DOWNLOAD_PAGE_URL)
|
|
@@ -48,7 +48,7 @@ def get_latest_mongodb_download_url(
|
|
|
48
48
|
windows_urls: list = []
|
|
49
49
|
for url in urls_in_page:
|
|
50
50
|
if 'windows' in url and 'x86_64' in url and url.endswith('.msi'):
|
|
51
|
-
if
|
|
51
|
+
if not rc_version and '-rc' in url:
|
|
52
52
|
continue
|
|
53
53
|
windows_urls.append(url)
|
|
54
54
|
|
|
@@ -68,109 +68,144 @@ def get_latest_mongodb_download_url(
|
|
|
68
68
|
|
|
69
69
|
def parse_args():
|
|
70
70
|
parser = argparse.ArgumentParser(description='Install MongoDB Community Server.')
|
|
71
|
+
# Mutually exclusive group for 'ir' and 'er'
|
|
72
|
+
group = parser.add_mutually_exclusive_group()
|
|
73
|
+
group.add_argument(
|
|
74
|
+
'-er', '--exclude-rc',
|
|
75
|
+
action='store_true',
|
|
76
|
+
help='Install latest version, but exclude release candidate versions.'
|
|
77
|
+
)
|
|
78
|
+
group.add_argument(
|
|
79
|
+
'-ir', '--include-rc',
|
|
80
|
+
action='store_true',
|
|
81
|
+
help='Install the latest version of MongoDB including release candidates.'
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Optional arguments
|
|
71
85
|
parser.add_argument(
|
|
72
|
-
'-
|
|
73
|
-
|
|
74
|
-
|
|
86
|
+
'-c', '--compass',
|
|
87
|
+
action='store_true',
|
|
88
|
+
help='Install MongoDB Compass.'
|
|
89
|
+
)
|
|
75
90
|
parser.add_argument(
|
|
76
|
-
'-
|
|
91
|
+
'-m', '--major',
|
|
92
|
+
type=int,
|
|
93
|
+
help='Download the latest version of the specified major version.'
|
|
94
|
+
)
|
|
77
95
|
parser.add_argument(
|
|
78
|
-
'-f', '--force',
|
|
96
|
+
'-f', '--force',
|
|
97
|
+
action='store_true',
|
|
98
|
+
help='Force the installation even if MongoDB is already installed.'
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
args = parser.parse_args()
|
|
102
|
+
|
|
103
|
+
# Validation logic: At least one of '-nr', '-ir', or '-c' must be provided
|
|
104
|
+
if not (args.no_rc or args.include_rc or args.compass):
|
|
105
|
+
parser.error("At least one of '-nr', '-ir', or '-c' must be specified.")
|
|
79
106
|
|
|
80
|
-
return
|
|
107
|
+
return args
|
|
81
108
|
|
|
82
109
|
|
|
83
|
-
def
|
|
84
|
-
|
|
110
|
+
def download_install_process(
|
|
111
|
+
rc_version: bool = False,
|
|
112
|
+
no_rc_version: bool = True,
|
|
85
113
|
major_specific: int = None,
|
|
86
114
|
compass: bool = False,
|
|
87
115
|
force: bool = False
|
|
88
|
-
):
|
|
116
|
+
) -> int:
|
|
89
117
|
"""
|
|
90
118
|
Download and install the latest version of MongoDB Community Server.
|
|
119
|
+
|
|
120
|
+
:param rc_version: bool, if True, the latest RC version will be downloaded.
|
|
91
121
|
:param no_rc_version: bool, if True, the latest non-RC version will be downloaded.
|
|
92
122
|
:param major_specific: int, if set, the latest version of the specified major version will be downloaded.
|
|
93
123
|
:param compass: bool, if True, MongoDB Compass will be installed.
|
|
94
124
|
:param force: bool, if True, MongoDB will be installed even if it is already installed.
|
|
95
|
-
:return:
|
|
125
|
+
:return: int, 0 if successful, 1 if failed.
|
|
96
126
|
"""
|
|
97
127
|
|
|
98
|
-
args = parse_args()
|
|
99
|
-
|
|
100
|
-
# Set the args only if they were used.
|
|
101
|
-
if args.no_rc:
|
|
102
|
-
no_rc_version = args.no_rc
|
|
103
|
-
if args.major:
|
|
104
|
-
major_specific = args.major
|
|
105
|
-
if args.compass:
|
|
106
|
-
compass = args.compass
|
|
107
|
-
if args.force:
|
|
108
|
-
force = args.force
|
|
109
|
-
|
|
110
128
|
if not permissions.is_admin():
|
|
111
129
|
print_api("This function requires administrator privileges.", color='red')
|
|
112
130
|
return 1
|
|
113
131
|
|
|
114
|
-
if
|
|
115
|
-
print_api("
|
|
116
|
-
|
|
117
|
-
if not force:
|
|
118
|
-
return 0
|
|
119
|
-
else:
|
|
120
|
-
print_api("MongoDB is service is not running.")
|
|
121
|
-
|
|
122
|
-
mongo_is_installed: Union[str, None] = mongo_infra.is_installed()
|
|
123
|
-
if mongo_infra.is_installed():
|
|
124
|
-
message = f"MongoDB is installed in: {mongo_is_installed}\n" \
|
|
125
|
-
f"The service is not running. Fix the service or use the 'force' parameter to reinstall."
|
|
126
|
-
print_api(message, color='yellow')
|
|
127
|
-
|
|
128
|
-
if not force:
|
|
129
|
-
return 0
|
|
130
|
-
|
|
131
|
-
print_api("Fetching the latest MongoDB download URL...")
|
|
132
|
-
mongo_installer_url = get_latest_mongodb_download_url(no_rc_version=no_rc_version, major_specific=major_specific)
|
|
133
|
-
|
|
134
|
-
print_api(f"Downloading MongoDB installer from: {mongo_installer_url}")
|
|
135
|
-
installer_file_path: str = web.download(mongo_installer_url)
|
|
136
|
-
|
|
137
|
-
print_api("Installing MongoDB...")
|
|
138
|
-
try:
|
|
139
|
-
msiw.install_msi(
|
|
140
|
-
installer_file_path,
|
|
141
|
-
silent_no_gui=True,
|
|
142
|
-
no_restart=True,
|
|
143
|
-
terminate_required_processes=True,
|
|
144
|
-
create_log_near_msi=True,
|
|
145
|
-
scan_log_for_errors=True,
|
|
146
|
-
additional_args='ADDLOCAL="ServerService"'
|
|
147
|
-
)
|
|
148
|
-
except msiw.MsiInstallationError as e:
|
|
149
|
-
print_api(f'{e} Exiting...', color='red')
|
|
132
|
+
if rc_version and no_rc_version:
|
|
133
|
+
print_api("Both 'rc_version' and 'no_rc_version' cannot be True at the same time.", color='red')
|
|
150
134
|
return 1
|
|
151
135
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
if not mongo_is_installed:
|
|
156
|
-
message += "MongoDB Executable not found.\n"
|
|
136
|
+
if not (rc_version or no_rc_version) and not compass:
|
|
137
|
+
print_api("At least one of 'rc_version', 'no_rc_version', or 'compass' must be True.", color='red')
|
|
138
|
+
return 1
|
|
157
139
|
|
|
158
|
-
|
|
159
|
-
|
|
140
|
+
# If we need to install mongo db.
|
|
141
|
+
if rc_version or no_rc_version:
|
|
142
|
+
if rc_version:
|
|
143
|
+
download_rc_version: bool = True
|
|
144
|
+
elif no_rc_version:
|
|
145
|
+
download_rc_version: bool = False
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError("Invalid value for 'rc_version' and 'no_rc_version'.")
|
|
160
148
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
print_api(message, color='red')
|
|
164
|
-
return 1
|
|
165
|
-
else:
|
|
166
|
-
success_message: str = f"MongoDB installed successfully to: {mongo_is_installed}\n" \
|
|
167
|
-
f"Service is running."
|
|
168
|
-
print_api(success_message, color='green')
|
|
149
|
+
if mongo_infra.is_service_running():
|
|
150
|
+
print_api("MongoDB service is running - already installed. Use [-f] to reinstall.", color='blue')
|
|
169
151
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
152
|
+
if not force:
|
|
153
|
+
return 0
|
|
154
|
+
else:
|
|
155
|
+
print_api("MongoDB is service is not running.")
|
|
156
|
+
|
|
157
|
+
mongo_is_installed: Union[str, None] = mongo_infra.is_installed()
|
|
158
|
+
if mongo_infra.is_installed():
|
|
159
|
+
message = f"MongoDB is installed in: {mongo_is_installed}\n" \
|
|
160
|
+
f"The service is not running. Fix the service or use the 'force' parameter to reinstall."
|
|
161
|
+
print_api(message, color='yellow')
|
|
162
|
+
|
|
163
|
+
if not force:
|
|
164
|
+
return 0
|
|
165
|
+
|
|
166
|
+
print_api("Fetching the latest MongoDB download URL...")
|
|
167
|
+
mongo_installer_url = get_latest_mongodb_download_url(rc_version=download_rc_version, major_specific=major_specific)
|
|
168
|
+
|
|
169
|
+
print_api(f"Downloading MongoDB installer from: {mongo_installer_url}")
|
|
170
|
+
installer_file_path: str = web.download(mongo_installer_url)
|
|
171
|
+
|
|
172
|
+
print_api("Installing MongoDB...")
|
|
173
|
+
try:
|
|
174
|
+
msiw.install_msi(
|
|
175
|
+
installer_file_path,
|
|
176
|
+
silent_no_gui=True,
|
|
177
|
+
no_restart=True,
|
|
178
|
+
terminate_required_processes=True,
|
|
179
|
+
create_log_near_msi=True,
|
|
180
|
+
scan_log_for_errors=True,
|
|
181
|
+
additional_args='ADDLOCAL="ServerService"'
|
|
182
|
+
)
|
|
183
|
+
except msiw.MsiInstallationError as e:
|
|
184
|
+
print_api(f'{e} Exiting...', color='red')
|
|
185
|
+
return 1
|
|
186
|
+
|
|
187
|
+
# Check if MongoDB is installed.
|
|
188
|
+
message: str = ''
|
|
189
|
+
mongo_is_installed = mongo_infra.is_installed()
|
|
190
|
+
if not mongo_is_installed:
|
|
191
|
+
message += "MongoDB Executable not found.\n"
|
|
192
|
+
|
|
193
|
+
if not mongo_infra.is_service_running():
|
|
194
|
+
message += "MongoDB service is not running.\n"
|
|
195
|
+
|
|
196
|
+
if message:
|
|
197
|
+
message += f"MSI Path: {installer_file_path}"
|
|
198
|
+
print_api(message, color='red')
|
|
199
|
+
return 1
|
|
200
|
+
else:
|
|
201
|
+
success_message: str = f"MongoDB installed successfully to: {mongo_is_installed}\n" \
|
|
202
|
+
f"Service is running."
|
|
203
|
+
print_api(success_message, color='green')
|
|
204
|
+
|
|
205
|
+
# Clean up the installer file
|
|
206
|
+
if os.path.exists(installer_file_path):
|
|
207
|
+
os.remove(installer_file_path)
|
|
208
|
+
print_api("Cleaned up the installer file.")
|
|
174
209
|
|
|
175
210
|
if not compass:
|
|
176
211
|
return 0
|
|
@@ -189,3 +224,21 @@ def download_install_latest_main(
|
|
|
189
224
|
print_api("Cleaned up the Compass installer file.")
|
|
190
225
|
|
|
191
226
|
return 0
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def download_install_latest_main() -> int:
|
|
230
|
+
"""
|
|
231
|
+
Download and install the latest version of MongoDB Community Server.
|
|
232
|
+
|
|
233
|
+
:return: int, 0 if successful, 1 if failed.
|
|
234
|
+
"""
|
|
235
|
+
|
|
236
|
+
args = parse_args()
|
|
237
|
+
|
|
238
|
+
return download_install_process(
|
|
239
|
+
rc_version=args.include_rc,
|
|
240
|
+
no_rc_version=args.exclude_rc,
|
|
241
|
+
major_specific=args.major,
|
|
242
|
+
compass=args.compass,
|
|
243
|
+
force=args.force
|
|
244
|
+
)
|
|
@@ -3,6 +3,11 @@ Scenarios file contains full execution scenarios of playwright wrapper.
|
|
|
3
3
|
For example: run playwright, navigate to URL, get text from a locator.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
|
|
8
|
+
from playwright.sync_api import sync_playwright
|
|
9
|
+
from playwright.sync_api import TimeoutError as PlaywrightTimeoutError
|
|
10
|
+
|
|
6
11
|
from . import engine, base, combos
|
|
7
12
|
from ...basics import threads, multiprocesses
|
|
8
13
|
|
|
@@ -139,3 +144,68 @@ def _get_page_content_in_process(
|
|
|
139
144
|
html_txt_convert_to_bytes=html_txt_convert_to_bytes,
|
|
140
145
|
print_kwargs=print_kwargs
|
|
141
146
|
)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def fetch_urls_content_in_threads(
|
|
150
|
+
urls: list[str],
|
|
151
|
+
number_of_characters_per_link: int
|
|
152
|
+
) -> list[str]:
|
|
153
|
+
""" The function to fetch all URLs concurrently using threads """
|
|
154
|
+
contents = []
|
|
155
|
+
|
|
156
|
+
# Use ThreadPoolExecutor to run multiple threads
|
|
157
|
+
with ThreadPoolExecutor() as executor:
|
|
158
|
+
# Submit tasks for each URL
|
|
159
|
+
future_to_url = {executor.submit(_fetch_content, url, number_of_characters_per_link): url for url in urls}
|
|
160
|
+
|
|
161
|
+
# Collect results as they complete
|
|
162
|
+
for future in as_completed(future_to_url):
|
|
163
|
+
url = future_to_url[future]
|
|
164
|
+
try:
|
|
165
|
+
data = future.result()
|
|
166
|
+
contents.append(data)
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
print(f"An error occurred when fetching {url}: {exc}")
|
|
169
|
+
|
|
170
|
+
return contents
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _fetch_content(url, number_of_characters_per_link):
|
|
174
|
+
""" Function to fetch content from a single URL using the synchronous Playwright API """
|
|
175
|
+
with sync_playwright() as p:
|
|
176
|
+
browser = p.chromium.launch(headless=True)
|
|
177
|
+
page = browser.new_page()
|
|
178
|
+
page.goto(url)
|
|
179
|
+
|
|
180
|
+
# Wait for the page to load using all possible methods, since there is no specific method
|
|
181
|
+
# that will work for all websites.
|
|
182
|
+
page.wait_for_load_state("load", timeout=5000)
|
|
183
|
+
page.wait_for_load_state("domcontentloaded", timeout=5000)
|
|
184
|
+
# The above is not enough, wait for network to stop loading files.
|
|
185
|
+
response_list: list = []
|
|
186
|
+
while True:
|
|
187
|
+
try:
|
|
188
|
+
# "**/*" is the wildcard for all URLs.
|
|
189
|
+
# 'page.expect_response' will wait for the response to be received, and then return the response object.
|
|
190
|
+
# When timeout is reached, it will raise a TimeoutError, which will break the while loop.
|
|
191
|
+
with page.expect_response("**/*", timeout=2000) as response_info:
|
|
192
|
+
response_list.append(response_info.value)
|
|
193
|
+
except PlaywrightTimeoutError:
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
# Use JavaScript to extract only the visible text from the page
|
|
197
|
+
text_content: str = page.evaluate("document.body.innerText")
|
|
198
|
+
# text = page.evaluate('document.body.textContent')
|
|
199
|
+
# text = page.eval_on_selector('body', 'element => element.innerText')
|
|
200
|
+
# text = page.eval_on_selector('body', 'element => element.textContent')
|
|
201
|
+
# text = page.inner_text('body')
|
|
202
|
+
# text = page.text_content('body')
|
|
203
|
+
|
|
204
|
+
# text = page.evaluate('document.documentElement.innerText')
|
|
205
|
+
# text = page.inner_text(':root')
|
|
206
|
+
# html = page.content()
|
|
207
|
+
# html = page.evaluate('document.documentElement.outerHTML')
|
|
208
|
+
|
|
209
|
+
browser.close()
|
|
210
|
+
# Return only the first X characters of the text content to not overload the LLM.
|
|
211
|
+
return text_content[:number_of_characters_per_link]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
atomicshop/__init__.py,sha256=
|
|
1
|
+
atomicshop/__init__.py,sha256=djTLkiQczxSrBfseOQFp0kfiVNR7-UsGEJSqlifxWfA,123
|
|
2
2
|
atomicshop/_basics_temp.py,sha256=6cu2dd6r2dLrd1BRNcVDKTHlsHs_26Gpw8QS6v32lQ0,3699
|
|
3
3
|
atomicshop/_create_pdf_demo.py,sha256=Yi-PGZuMg0RKvQmLqVeLIZYadqEZwUm-4A9JxBl_vYA,3713
|
|
4
4
|
atomicshop/_patch_import.py,sha256=ENp55sKVJ0e6-4lBvZnpz9PQCt3Otbur7F6aXDlyje4,6334
|
|
@@ -131,7 +131,7 @@ atomicshop/mitm/connection_thread_worker.py,sha256=Of-QmuIbUO8Qd1N_BXdGQk2TnPpZd
|
|
|
131
131
|
atomicshop/mitm/import_config.py,sha256=0Ij14aISTllTOiWYJpIUMOWobQqGofD6uafui5uWllE,9272
|
|
132
132
|
atomicshop/mitm/initialize_engines.py,sha256=NWz0yBErSrYBn0xWkJDBcHStBJ-kcsv9VtorcSP9x5M,8258
|
|
133
133
|
atomicshop/mitm/message.py,sha256=mNo4Lphr_Jo6IlNX5mPJzABpogWGkjOhwI4meAivwHw,2987
|
|
134
|
-
atomicshop/mitm/mitm_main.py,sha256=
|
|
134
|
+
atomicshop/mitm/mitm_main.py,sha256=Uko4lFG96ZeZ1yVJD5CT4c48NhfX_Hu1g0-THEiZfAc,23454
|
|
135
135
|
atomicshop/mitm/recs_files.py,sha256=ZAAD0twun-FtmbSniXe3XQhIlawvANNB_HxwbHj7kwI,3151
|
|
136
136
|
atomicshop/mitm/shared_functions.py,sha256=0lzeyINd44sVEfFbahJxQmz6KAMWbYrW5ou3UYfItvw,1777
|
|
137
137
|
atomicshop/mitm/statistic_analyzer.py,sha256=5_sAYGX2Xunzo_pS2W5WijNCwr_BlGJbbOO462y_wN4,27533
|
|
@@ -177,8 +177,8 @@ atomicshop/startup/win/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
177
177
|
atomicshop/startup/win/startup_folder.py,sha256=2RZEyF-Mf8eWPlt_-OaoGKKnMs6YhELEzJZ376EI0E0,1891
|
|
178
178
|
atomicshop/startup/win/task_scheduler.py,sha256=qALe-8sfthYxsdCViH2r8OsH3x-WauDqteg5RzElPdk,4348
|
|
179
179
|
atomicshop/web_apis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
180
|
-
atomicshop/web_apis/google_custom_search.py,sha256=
|
|
181
|
-
atomicshop/web_apis/google_llm.py,sha256=
|
|
180
|
+
atomicshop/web_apis/google_custom_search.py,sha256=evixI7y8JYyGwurRZH03nAWdD-417VFaNe1mAtuKPNA,1310
|
|
181
|
+
atomicshop/web_apis/google_llm.py,sha256=UzZkPtyNA03g0xcb4vJ8imdjdNvyjUvmPUtxf9J9HnY,4898
|
|
182
182
|
atomicshop/wrappers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
183
|
atomicshop/wrappers/_process_wrapper_curl.py,sha256=XkZZXYl7D0Q6UfdWqy-18AvpU0yVp9i2BVD2qRcXlkk,841
|
|
184
184
|
atomicshop/wrappers/_process_wrapper_tar.py,sha256=WUMZFKNrlG4nJP9tWZ51W7BR1j_pIjsjgyAStmWjRGs,655
|
|
@@ -233,7 +233,7 @@ atomicshop/wrappers/factw/fact_extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5
|
|
|
233
233
|
atomicshop/wrappers/factw/fact_extractor/docker_image.py,sha256=2FyYjnw8gxFNwISQ83OwH-iGivkFi6EAluyCZ0loHEQ,2501
|
|
234
234
|
atomicshop/wrappers/factw/fact_extractor/get_extractor.py,sha256=2mfOAftHIlCcGt1s7MWdq7DsDCuI6wX3MtvcEZ4SK-0,756
|
|
235
235
|
atomicshop/wrappers/factw/install/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
236
|
-
atomicshop/wrappers/factw/install/install_after_restart.py,sha256=
|
|
236
|
+
atomicshop/wrappers/factw/install/install_after_restart.py,sha256=4dHn2XMbYaPJlhRCmZqqwsgHQBlG2mT7aW50pQCPtp4,4345
|
|
237
237
|
atomicshop/wrappers/factw/install/pre_install_and_install_before_restart.py,sha256=GFsO9MTH0czKoxkiPJtjalilUwsmFLBCcx9Znv37S4M,5945
|
|
238
238
|
atomicshop/wrappers/factw/postgresql/__init__.py,sha256=xMBn2d3Exo23IPP2F_9-SXmOlhFbwWDgS9KwozSTjA0,162
|
|
239
239
|
atomicshop/wrappers/factw/postgresql/analysis.py,sha256=2Rxzy2jyq3zEKIo53z8VkjuslKE_i5mq2ZpmJAvyd6U,716
|
|
@@ -260,7 +260,7 @@ atomicshop/wrappers/loggingw/loggingw.py,sha256=uLY7DJS-3xIYQBRvI--9eFvdcnvsWSXm
|
|
|
260
260
|
atomicshop/wrappers/loggingw/reading.py,sha256=sCNlgqLNH5XdKqOOjjEox7CvViMHzs6h7-hwCnx4NKk,17566
|
|
261
261
|
atomicshop/wrappers/mongodbw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
262
262
|
atomicshop/wrappers/mongodbw/install_mongodb_ubuntu.py,sha256=pmI9AwWJ2cv5h8GionSpSJkllg6kfp0M381pk6y4Y5U,4015
|
|
263
|
-
atomicshop/wrappers/mongodbw/install_mongodb_win.py,sha256=
|
|
263
|
+
atomicshop/wrappers/mongodbw/install_mongodb_win.py,sha256=64EUQYx7VuMC3ndO2x3nSErh5NZ_BsqMwGvPcybfC-Q,8499
|
|
264
264
|
atomicshop/wrappers/mongodbw/mongo_infra.py,sha256=IjEF0jPzQz866MpTm7rnksnyyWQeUT_B2h2DA9ryAio,2034
|
|
265
265
|
atomicshop/wrappers/mongodbw/mongodbw.py,sha256=IkEw86QFyVRU-5p5s6_6yupvSxmaQxr59GKNgSEkAm4,52617
|
|
266
266
|
atomicshop/wrappers/nodejsw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -275,7 +275,7 @@ atomicshop/wrappers/playwrightw/javascript.py,sha256=_bW7CAtm0Y8IHYrAalg5HpPFnk6
|
|
|
275
275
|
atomicshop/wrappers/playwrightw/keyboard.py,sha256=zN3YddGO-qUkn6C0CRVFejP4cTuaUwXLDNFhFREjERY,422
|
|
276
276
|
atomicshop/wrappers/playwrightw/locators.py,sha256=6wsLywZxDuii7mwv-zQsRbqQC8r7j96Bma5b5_7ZoVo,2411
|
|
277
277
|
atomicshop/wrappers/playwrightw/mouse.py,sha256=-2FZbQtjgH7tdXWld6ZPGqlKFUdf5in0ujN0hewxa50,656
|
|
278
|
-
atomicshop/wrappers/playwrightw/scenarios.py,sha256=
|
|
278
|
+
atomicshop/wrappers/playwrightw/scenarios.py,sha256=RY56hH7UKvDoBr5j1JwP5xRoQtaz0AnCAkA602MurPk,8396
|
|
279
279
|
atomicshop/wrappers/playwrightw/waits.py,sha256=PBFdz_PoM7Fo7O8hLqMrxNPzBEYgPoXwZceFFCGGeu8,7182
|
|
280
280
|
atomicshop/wrappers/psutilw/cpus.py,sha256=w6LPBMINqS-T_X8vzdYkLS2Wzuve28Ydp_GafTCngrc,236
|
|
281
281
|
atomicshop/wrappers/psutilw/disks.py,sha256=3ZSVoommKH1TWo37j_83frB-NqXF4Nf5q5mBCX8G4jE,9221
|
|
@@ -319,8 +319,8 @@ atomicshop/wrappers/socketw/ssl_base.py,sha256=kmiif84kMhBr5yjQW17p935sfjR5JKG0L
|
|
|
319
319
|
atomicshop/wrappers/socketw/statistics_csv.py,sha256=fgMzDXI0cybwUEqAxprRmY3lqbh30KAV-jOpoFKT-m8,3395
|
|
320
320
|
atomicshop/wrappers/winregw/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
321
321
|
atomicshop/wrappers/winregw/winreg_network.py,sha256=zZQfps-CdODQaTUADbHAwKHr5RUg7BLafnKWBbKaLN4,8728
|
|
322
|
-
atomicshop-2.18.
|
|
323
|
-
atomicshop-2.18.
|
|
324
|
-
atomicshop-2.18.
|
|
325
|
-
atomicshop-2.18.
|
|
326
|
-
atomicshop-2.18.
|
|
322
|
+
atomicshop-2.18.5.dist-info/LICENSE.txt,sha256=lLU7EYycfYcK2NR_1gfnhnRC8b8ccOTElACYplgZN88,1094
|
|
323
|
+
atomicshop-2.18.5.dist-info/METADATA,sha256=jFNipJDLDZS4YVzhPI5lVuaMh1xDHuH2Fohc6wyNjy8,10499
|
|
324
|
+
atomicshop-2.18.5.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
325
|
+
atomicshop-2.18.5.dist-info/top_level.txt,sha256=EgKJB-7xcrAPeqTRF2laD_Np2gNGYkJkd4OyXqpJphA,11
|
|
326
|
+
atomicshop-2.18.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|